[
  {
    "path": ".github/workflows/basic.yml",
    "content": "name: basic\n\non:\n  push:\n    branches: [ master ]\n  pull_request:\n    branches: [ master ]\n  schedule:\n    - cron: 0 14 * * 1\n  workflow_dispatch:\n\njobs:\n  build:\n    runs-on: ubuntu-22.04\n    steps:\n      - uses: actions/checkout@v2\n\n      - uses: actions/cache@v2\n        id: cache\n        with:\n          path: |\n            capstone/\n            keystone/\n            glib/\n            libunwind/\n          key: ${{ runner.os }}-${{ hashFiles('build.sh') }}\n\n      - name: set up python 3.x\n        if: steps.cache.outputs.cache-hit != 'true'\n        uses: actions/setup-python@v2\n        with:\n          python-version: '3.x'\n          architecture: 'x64'\n\n      - name: install dependencies\n        if: steps.cache.outputs.cache-hit != 'true'\n        run: |\n          python -m pip install --upgrade pip meson ninja\n\n      - name: build\n        if: steps.cache.outputs.cache-hit != 'true'\n        run: |\n          ./build.sh\n  \n  debug:\n    runs-on: ubuntu-22.04\n    needs: [build]\n    steps:\n      - uses: actions/checkout@v2\n      - uses: actions/cache@v2\n        with:\n          path: |\n            capstone/\n            keystone/\n            glib/\n            libunwind/\n          key: ${{ runner.os }}-${{ hashFiles('build.sh') }}\n      - name: make format\n        run: make format\n        working-directory: ./src\n      - name: make debug\n        run: |\n          clang --version\n          make clean\n          make debug\n        working-directory: ./src\n      - name: make test\n        run: timeout --signal=KILL 30m make test\n        working-directory: ./src\n  \n  release:\n    runs-on: ubuntu-22.04\n    needs: [build]\n    steps:\n      - uses: actions/checkout@v2\n      - uses: actions/cache@v2\n        with:\n          path: |\n            capstone/\n            keystone/\n            glib/\n            libunwind/\n          key: ${{ runner.os }}-${{ hashFiles('build.sh') }}\n      - name: make format\n        run: make format\n        working-directory: ./src\n      - name: make release\n        run: |\n          clang --version\n          make clean\n          make release\n        working-directory: ./src\n      - name: make test\n        run: timeout --signal=KILL 30m make test\n        working-directory: ./src\n"
  },
  {
    "path": ".github/workflows/benchmark.yml",
    "content": "name: benchmark\n\non:\n  push:\n    branches:\n      - \"!*\"\n    tags:\n      - \"v*\"\n  schedule:\n    - cron: 0 14 * * 1\n  workflow_dispatch:\n\njobs:\n  build:\n    runs-on: ubuntu-22.04\n    steps:\n      - uses: actions/checkout@v2\n\n      - uses: actions/cache@v2\n        id: cache\n        with:\n          path: |\n            capstone/\n            keystone/\n            glib/\n            libunwind/\n          key: ${{ runner.os }}-${{ hashFiles('build.sh') }}\n\n      - name: set up python 3.x\n        if: steps.cache.outputs.cache-hit != 'true'\n        uses: actions/setup-python@v2\n        with:\n          python-version: '3.x'\n          architecture: 'x64'\n\n      - name: install dependencies\n        if: steps.cache.outputs.cache-hit != 'true'\n        run: |\n          python -m pip install --upgrade pip meson ninja\n\n      - name: build\n        if: steps.cache.outputs.cache-hit != 'true'\n        run: |\n          ./build.sh\n\n  normal:\n    runs-on: ubuntu-22.04\n    needs: [build]\n    steps:\n      - uses: actions/checkout@v2\n      - uses: actions/cache@v2\n        with:\n          path: |\n            capstone/\n            keystone/\n            glib/\n            libunwind/\n          key: ${{ runner.os }}-${{ hashFiles('build.sh') }}\n      - name: make format\n        run: make format\n        working-directory: ./src\n      - name: make release\n        run: |\n          clang --version\n          make clean\n          make release\n        working-directory: ./src\n      - name: make benchmark\n        run: timeout --signal=KILL 35m make benchmark\n        working-directory: ./src\n\n  early_instrumentation:\n    runs-on: ubuntu-22.04\n    needs: [build]\n    steps:\n      - uses: actions/checkout@v2\n      - uses: actions/cache@v2\n        with:\n          path: |\n            capstone/\n            keystone/\n            glib/\n            libunwind/\n          key: ${{ runner.os }}-${{ hashFiles('build.sh') }}\n      - name: make format\n        run: make format\n        working-directory: ./src\n      - name: make release\n        run: |\n          clang --version\n          make clean\n          make release\n        working-directory: ./src\n      - name: make benchmark\n        run: timeout --signal=KILL 35m make benchmark TEST_OPTIONS=\"-e\"\n        working-directory: ./src\n"
  },
  {
    "path": ".github/workflows/early_instrumentation.yml",
    "content": "name: early_instrumentation\n\non:\n  push:\n    branches: [ master ]\n  pull_request:\n    branches: [ master ]\n  schedule:\n    - cron: 0 14 * * 1\n  workflow_dispatch:\n\njobs:\n  build:\n    runs-on: ubuntu-22.04\n    steps:\n      - uses: actions/checkout@v2\n\n      - uses: actions/cache@v2\n        id: cache\n        with:\n          path: |\n            capstone/\n            keystone/\n            glib/\n            libunwind/\n          key: ${{ runner.os }}-${{ hashFiles('build.sh') }}\n\n      - name: set up python 3.x\n        if: steps.cache.outputs.cache-hit != 'true'\n        uses: actions/setup-python@v2\n        with:\n          python-version: '3.x'\n          architecture: 'x64'\n\n      - name: install dependencies\n        if: steps.cache.outputs.cache-hit != 'true'\n        run: |\n          python -m pip install --upgrade pip meson ninja\n\n      - name: build\n        if: steps.cache.outputs.cache-hit != 'true'\n        run: |\n          ./build.sh\n  \n  debug:\n    runs-on: ubuntu-22.04\n    needs: [build]\n    steps:\n      - uses: actions/checkout@v2\n      - uses: actions/cache@v2\n        with:\n          path: |\n            capstone/\n            keystone/\n            glib/\n            libunwind/\n          key: ${{ runner.os }}-${{ hashFiles('build.sh') }}\n      - name: make format\n        run: make format\n        working-directory: ./src\n      - name: make debug\n        run: |\n          clang --version\n          make clean\n          make debug\n        working-directory: ./src\n      - name: make test\n        run: timeout --signal=KILL 30m make test TEST_OPTIONS='-e'\n        working-directory: ./src\n  \n  release:\n    runs-on: ubuntu-22.04\n    needs: [build]\n    steps:\n      - uses: actions/checkout@v2\n      - uses: actions/cache@v2\n        with:\n          path: |\n            capstone/\n            keystone/\n            glib/\n            libunwind/\n          key: ${{ runner.os }}-${{ hashFiles('build.sh') }}\n      - name: make format\n        run: make format\n        working-directory: ./src\n      - name: make release\n        run: |\n          clang --version\n          make clean\n          make release\n        working-directory: ./src\n      - name: make test\n        run: timeout --signal=KILL 30m make test TEST_OPTIONS='-e'\n        working-directory: ./src\n"
  },
  {
    "path": ".github/workflows/force_linear.yml",
    "content": "name: force_linear\n\non:\n  push:\n    branches: [ master ]\n  pull_request:\n    branches: [ master ]\n  schedule:\n    - cron: 0 14 * * 1\n  workflow_dispatch:\n\njobs:\n  build:\n    runs-on: ubuntu-22.04\n    steps:\n      - uses: actions/checkout@v2\n\n      - uses: actions/cache@v2\n        id: cache\n        with:\n          path: |\n            capstone/\n            keystone/\n            glib/\n            libunwind/\n          key: ${{ runner.os }}-${{ hashFiles('build.sh') }}\n\n      - name: set up python 3.x\n        if: steps.cache.outputs.cache-hit != 'true'\n        uses: actions/setup-python@v2\n        with:\n          python-version: '3.x'\n          architecture: 'x64'\n\n      - name: install dependencies\n        if: steps.cache.outputs.cache-hit != 'true'\n        run: |\n          python -m pip install --upgrade pip meson ninja\n\n      - name: build\n        if: steps.cache.outputs.cache-hit != 'true'\n        run: |\n          ./build.sh\n  \n  debug:\n    runs-on: ubuntu-22.04\n    needs: [build]\n    steps:\n      - uses: actions/checkout@v2\n      - uses: actions/cache@v2\n        with:\n          path: |\n            capstone/\n            keystone/\n            glib/\n            libunwind/\n          key: ${{ runner.os }}-${{ hashFiles('build.sh') }}\n      - name: make format\n        run: make format\n        working-directory: ./src\n      - name: make debug\n        run: |\n          clang --version\n          make clean\n          make debug\n        working-directory: ./src\n      - name: make test\n        run: timeout --signal=KILL 30m make test TEST_OPTIONS='-n'\n        working-directory: ./src\n  \n  release:\n    runs-on: ubuntu-22.04\n    needs: [build]\n    steps:\n      - uses: actions/checkout@v2\n      - uses: actions/cache@v2\n        with:\n          path: |\n            capstone/\n            keystone/\n            glib/\n            libunwind/\n          key: ${{ runner.os }}-${{ hashFiles('build.sh') }}\n      - name: make format\n        run: make format\n        working-directory: ./src\n      - name: make release\n        run: |\n          clang --version\n          make clean\n          make release\n        working-directory: ./src\n      - name: make test\n        run: timeout --signal=KILL 30m make test TEST_OPTIONS='-n'\n        working-directory: ./src\n"
  },
  {
    "path": ".github/workflows/no_generic_pic.yml",
    "content": "name: no_generic_pic\n\non:\n  push:\n    branches: [ master ]\n  pull_request:\n    branches: [ master ]\n  schedule:\n    - cron: 0 14 * * 1\n  workflow_dispatch:\n\njobs:\n  build:\n    runs-on: ubuntu-22.04\n    steps:\n      - uses: actions/checkout@v2\n\n      - uses: actions/cache@v2\n        id: cache\n        with:\n          path: |\n            capstone/\n            keystone/\n            glib/\n            libunwind/\n          key: ${{ runner.os }}-${{ hashFiles('build.sh') }}\n\n      - name: set up python 3.x\n        if: steps.cache.outputs.cache-hit != 'true'\n        uses: actions/setup-python@v2\n        with:\n          python-version: '3.x'\n          architecture: 'x64'\n\n      - name: install dependencies\n        if: steps.cache.outputs.cache-hit != 'true'\n        run: |\n          python -m pip install --upgrade pip meson ninja\n\n      - name: build\n        if: steps.cache.outputs.cache-hit != 'true'\n        run: |\n          ./build.sh\n  \n  debug:\n    runs-on: ubuntu-22.04\n    needs: [build]\n    steps:\n      - uses: actions/checkout@v2\n      - uses: actions/cache@v2\n        with:\n          path: |\n            capstone/\n            keystone/\n            glib/\n            libunwind/\n          key: ${{ runner.os }}-${{ hashFiles('build.sh') }}\n      - name: make format\n        run: make format\n        working-directory: ./src\n      - name: make debug\n        run: |\n          clang --version\n          make clean\n          make debug\n        working-directory: ./src\n      - name: make test\n        run: timeout --signal=KILL 30m make test TEST_OPTIONS='-r'\n        working-directory: ./src\n  \n  release:\n    runs-on: ubuntu-22.04\n    needs: [build]\n    steps:\n      - uses: actions/checkout@v2\n      - uses: actions/cache@v2\n        with:\n          path: |\n            capstone/\n            keystone/\n            glib/\n            libunwind/\n          key: ${{ runner.os }}-${{ hashFiles('build.sh') }}\n      - name: make format\n        run: make format\n        working-directory: ./src\n      - name: make release\n        run: |\n          clang --version\n          make clean\n          make release\n        working-directory: ./src\n      - name: make test\n        run: timeout --signal=KILL 30m make test TEST_OPTIONS='-r'\n        working-directory: ./src\n"
  },
  {
    "path": ".github/workflows/pdisasm.yml",
    "content": "name: pdisasm\n\non:\n  push:\n    branches: [ master ]\n  pull_request:\n    branches: [ master ]\n  schedule:\n    - cron: 0 14 * * 1\n  workflow_dispatch:\n\njobs:\n  build:\n    runs-on: ubuntu-22.04\n    steps:\n      - uses: actions/checkout@v2\n\n      - uses: actions/cache@v2\n        id: cache\n        with:\n          path: |\n            capstone/\n            keystone/\n            glib/\n            libunwind/\n          key: ${{ runner.os }}-${{ hashFiles('build.sh') }}\n\n      - name: set up python 3.x\n        if: steps.cache.outputs.cache-hit != 'true'\n        uses: actions/setup-python@v2\n        with:\n          python-version: '3.x'\n          architecture: 'x64'\n\n      - name: install dependencies\n        if: steps.cache.outputs.cache-hit != 'true'\n        run: |\n          python -m pip install --upgrade pip meson ninja\n\n      - name: build\n        if: steps.cache.outputs.cache-hit != 'true'\n        run: |\n          ./build.sh\n  \n  release:\n    runs-on: ubuntu-22.04\n    needs: [build]\n    steps:\n      - uses: actions/checkout@v2\n      - uses: actions/cache@v2\n        with:\n          path: |\n            capstone/\n            keystone/\n            glib/\n            libunwind/\n          key: ${{ runner.os }}-${{ hashFiles('build.sh') }}\n      - name: make format\n        run: make format\n        working-directory: ./src\n      - name: make release\n        run: |\n          clang --version\n          make clean\n          make release\n        working-directory: ./src\n      - name: make test\n        run: timeout --signal=KILL 30m make test TEST_OPTIONS='-f'\n        working-directory: ./src\n"
  },
  {
    "path": ".gitignore",
    "content": "# Prerequisites\n*.d\n\n# Object files\n*.o\n*.ko\n*.obj\n*.elf\n\n# Linker output\n*.ilk\n*.map\n*.exp\n\n# Precompiled Headers\n*.gch\n*.pch\n\n# Libraries\n*.lib\n*.a\n*.la\n*.lo\n\n# Shared objects (inc. Windows DLLs)\n*.dll\n*.so\n*.so.*\n*.dylib\n\n# Executables\n*.exe\n*.out\n*.app\n*.i*86\n*.x86_64\n*.hex\n\n# Debug files\n*.dSYM/\n*.su\n*.idb\n*.pdb\n\n# Kernel Module Compile Results\n*.mod*\n*.cmd\n.tmp_versions/\nmodules.order\nModule.symvers\nMkfile.old\ndkms.conf\n\n# Customize files\nworkshop/\ncapstone/ \nkeystone/\nglib/\nlibunwind/\nAFL/\n*.zip\n*.swp\n*.tmp\n*.bin\n*_bin.c\n*.tp\n*_tp.c\nstoch-fuzz\n*.gdb_history\npeda*\nhandler_main.c\n.stoch-fuzz.*\nlocal_crashes\nlibrary_functions_load.c\n\n# whitelist test\nsrc/test/\ntest/*\n!test/test_daemon.sh\n!test/test_daemon_ignore_asan_sof.sh\n!test/bzip2.no.pie\n!test/test.c.bz2\n!test/libpng-1.2.56\n!test/seed.png\n!test/crash\n!test/crash.c\n!test/openssl-1.0.1f\n!test/openssl-1.0.1f.code.segments\n!test/leak-268f0e85f4bc45cbaf4d257222b830eac18977f3\n!test/runtime\n!test/check_avx512.c\n!test/check_avx512\n!test/z3\n!test/ex.smt2\n!test/.crashpoint.z3\n!test/hello\n!test/test.rar\n!test/rar\n!test/timeout\n!test/timeout.c\n!test/unintentional_crash\n!test/unintentional_crash.c\n!test/no_main\n!test/no_main.c\n!test/json-2017-02-12.normal\n!test/json.seed\n!test/readelf.pie\n!test/small_exec.elf\n!test/bzip2.pie\n!test/pngfix.pie\n!test/toucan.png\n!test/tcpdump.pie\n!test/vrrp.pcap\n!test/libjpeg.asan\n!test/seed.jpg\n"
  },
  {
    "path": "LICENSE",
    "content": "                    GNU GENERAL PUBLIC LICENSE\n                       Version 3, 29 June 2007\n\n Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>\n Everyone is permitted to copy and distribute verbatim copies\n of this license document, but changing it is not allowed.\n\n                            Preamble\n\n  The GNU General Public License is a free, copyleft license for\nsoftware and other kinds of works.\n\n  The licenses for most software and other practical works are designed\nto take away your freedom to share and change the works.  By contrast,\nthe GNU General Public License is intended to guarantee your freedom to\nshare and change all versions of a program--to make sure it remains free\nsoftware for all its users.  We, the Free Software Foundation, use the\nGNU General Public License for most of our software; it applies also to\nany other work released this way by its authors.  You can apply it to\nyour programs, too.\n\n  When we speak of free software, we are referring to freedom, not\nprice.  Our General Public Licenses are designed to make sure that you\nhave the freedom to distribute copies of free software (and charge for\nthem if you wish), that you receive source code or can get it if you\nwant it, that you can change the software or use pieces of it in new\nfree programs, and that you know you can do these things.\n\n  To protect your rights, we need to prevent others from denying you\nthese rights or asking you to surrender the rights.  Therefore, you have\ncertain responsibilities if you distribute copies of the software, or if\nyou modify it: responsibilities to respect the freedom of others.\n\n  For example, if you distribute copies of such a program, whether\ngratis or for a fee, you must pass on to the recipients the same\nfreedoms that you received.  You must make sure that they, too, receive\nor can get the source code.  And you must show them these terms so they\nknow their rights.\n\n  Developers that use the GNU GPL protect your rights with two steps:\n(1) assert copyright on the software, and (2) offer you this License\ngiving you legal permission to copy, distribute and/or modify it.\n\n  For the developers' and authors' protection, the GPL clearly explains\nthat there is no warranty for this free software.  For both users' and\nauthors' sake, the GPL requires that modified versions be marked as\nchanged, so that their problems will not be attributed erroneously to\nauthors of previous versions.\n\n  Some devices are designed to deny users access to install or run\nmodified versions of the software inside them, although the manufacturer\ncan do so.  This is fundamentally incompatible with the aim of\nprotecting users' freedom to change the software.  The systematic\npattern of such abuse occurs in the area of products for individuals to\nuse, which is precisely where it is most unacceptable.  Therefore, we\nhave designed this version of the GPL to prohibit the practice for those\nproducts.  If such problems arise substantially in other domains, we\nstand ready to extend this provision to those domains in future versions\nof the GPL, as needed to protect the freedom of users.\n\n  Finally, every program is threatened constantly by software patents.\nStates should not allow patents to restrict development and use of\nsoftware on general-purpose computers, but in those that do, we wish to\navoid the special danger that patents applied to a free program could\nmake it effectively proprietary.  To prevent this, the GPL assures that\npatents cannot be used to render the program non-free.\n\n  The precise terms and conditions for copying, distribution and\nmodification follow.\n\n                       TERMS AND CONDITIONS\n\n  0. Definitions.\n\n  \"This License\" refers to version 3 of the GNU General Public License.\n\n  \"Copyright\" also means copyright-like laws that apply to other kinds of\nworks, such as semiconductor masks.\n\n  \"The Program\" refers to any copyrightable work licensed under this\nLicense.  Each licensee is addressed as \"you\".  \"Licensees\" and\n\"recipients\" may be individuals or organizations.\n\n  To \"modify\" a work means to copy from or adapt all or part of the work\nin a fashion requiring copyright permission, other than the making of an\nexact copy.  The resulting work is called a \"modified version\" of the\nearlier work or a work \"based on\" the earlier work.\n\n  A \"covered work\" means either the unmodified Program or a work based\non the Program.\n\n  To \"propagate\" a work means to do anything with it that, without\npermission, would make you directly or secondarily liable for\ninfringement under applicable copyright law, except executing it on a\ncomputer or modifying a private copy.  Propagation includes copying,\ndistribution (with or without modification), making available to the\npublic, and in some countries other activities as well.\n\n  To \"convey\" a work means any kind of propagation that enables other\nparties to make or receive copies.  Mere interaction with a user through\na computer network, with no transfer of a copy, is not conveying.\n\n  An interactive user interface displays \"Appropriate Legal Notices\"\nto the extent that it includes a convenient and prominently visible\nfeature that (1) displays an appropriate copyright notice, and (2)\ntells the user that there is no warranty for the work (except to the\nextent that warranties are provided), that licensees may convey the\nwork under this License, and how to view a copy of this License.  If\nthe interface presents a list of user commands or options, such as a\nmenu, a prominent item in the list meets this criterion.\n\n  1. Source Code.\n\n  The \"source code\" for a work means the preferred form of the work\nfor making modifications to it.  \"Object code\" means any non-source\nform of a work.\n\n  A \"Standard Interface\" means an interface that either is an official\nstandard defined by a recognized standards body, or, in the case of\ninterfaces specified for a particular programming language, one that\nis widely used among developers working in that language.\n\n  The \"System Libraries\" of an executable work include anything, other\nthan the work as a whole, that (a) is included in the normal form of\npackaging a Major Component, but which is not part of that Major\nComponent, and (b) serves only to enable use of the work with that\nMajor Component, or to implement a Standard Interface for which an\nimplementation is available to the public in source code form.  A\n\"Major Component\", in this context, means a major essential component\n(kernel, window system, and so on) of the specific operating system\n(if any) on which the executable work runs, or a compiler used to\nproduce the work, or an object code interpreter used to run it.\n\n  The \"Corresponding Source\" for a work in object code form means all\nthe source code needed to generate, install, and (for an executable\nwork) run the object code and to modify the work, including scripts to\ncontrol those activities.  However, it does not include the work's\nSystem Libraries, or general-purpose tools or generally available free\nprograms which are used unmodified in performing those activities but\nwhich are not part of the work.  For example, Corresponding Source\nincludes interface definition files associated with source files for\nthe work, and the source code for shared libraries and dynamically\nlinked subprograms that the work is specifically designed to require,\nsuch as by intimate data communication or control flow between those\nsubprograms and other parts of the work.\n\n  The Corresponding Source need not include anything that users\ncan regenerate automatically from other parts of the Corresponding\nSource.\n\n  The Corresponding Source for a work in source code form is that\nsame work.\n\n  2. Basic Permissions.\n\n  All rights granted under this License are granted for the term of\ncopyright on the Program, and are irrevocable provided the stated\nconditions are met.  This License explicitly affirms your unlimited\npermission to run the unmodified Program.  The output from running a\ncovered work is covered by this License only if the output, given its\ncontent, constitutes a covered work.  This License acknowledges your\nrights of fair use or other equivalent, as provided by copyright law.\n\n  You may make, run and propagate covered works that you do not\nconvey, without conditions so long as your license otherwise remains\nin force.  You may convey covered works to others for the sole purpose\nof having them make modifications exclusively for you, or provide you\nwith facilities for running those works, provided that you comply with\nthe terms of this License in conveying all material for which you do\nnot control copyright.  Those thus making or running the covered works\nfor you must do so exclusively on your behalf, under your direction\nand control, on terms that prohibit them from making any copies of\nyour copyrighted material outside their relationship with you.\n\n  Conveying under any other circumstances is permitted solely under\nthe conditions stated below.  Sublicensing is not allowed; section 10\nmakes it unnecessary.\n\n  3. Protecting Users' Legal Rights From Anti-Circumvention Law.\n\n  No covered work shall be deemed part of an effective technological\nmeasure under any applicable law fulfilling obligations under article\n11 of the WIPO copyright treaty adopted on 20 December 1996, or\nsimilar laws prohibiting or restricting circumvention of such\nmeasures.\n\n  When you convey a covered work, you waive any legal power to forbid\ncircumvention of technological measures to the extent such circumvention\nis effected by exercising rights under this License with respect to\nthe covered work, and you disclaim any intention to limit operation or\nmodification of the work as a means of enforcing, against the work's\nusers, your or third parties' legal rights to forbid circumvention of\ntechnological measures.\n\n  4. Conveying Verbatim Copies.\n\n  You may convey verbatim copies of the Program's source code as you\nreceive it, in any medium, provided that you conspicuously and\nappropriately publish on each copy an appropriate copyright notice;\nkeep intact all notices stating that this License and any\nnon-permissive terms added in accord with section 7 apply to the code;\nkeep intact all notices of the absence of any warranty; and give all\nrecipients a copy of this License along with the Program.\n\n  You may charge any price or no price for each copy that you convey,\nand you may offer support or warranty protection for a fee.\n\n  5. Conveying Modified Source Versions.\n\n  You may convey a work based on the Program, or the modifications to\nproduce it from the Program, in the form of source code under the\nterms of section 4, provided that you also meet all of these conditions:\n\n    a) The work must carry prominent notices stating that you modified\n    it, and giving a relevant date.\n\n    b) The work must carry prominent notices stating that it is\n    released under this License and any conditions added under section\n    7.  This requirement modifies the requirement in section 4 to\n    \"keep intact all notices\".\n\n    c) You must license the entire work, as a whole, under this\n    License to anyone who comes into possession of a copy.  This\n    License will therefore apply, along with any applicable section 7\n    additional terms, to the whole of the work, and all its parts,\n    regardless of how they are packaged.  This License gives no\n    permission to license the work in any other way, but it does not\n    invalidate such permission if you have separately received it.\n\n    d) If the work has interactive user interfaces, each must display\n    Appropriate Legal Notices; however, if the Program has interactive\n    interfaces that do not display Appropriate Legal Notices, your\n    work need not make them do so.\n\n  A compilation of a covered work with other separate and independent\nworks, which are not by their nature extensions of the covered work,\nand which are not combined with it such as to form a larger program,\nin or on a volume of a storage or distribution medium, is called an\n\"aggregate\" if the compilation and its resulting copyright are not\nused to limit the access or legal rights of the compilation's users\nbeyond what the individual works permit.  Inclusion of a covered work\nin an aggregate does not cause this License to apply to the other\nparts of the aggregate.\n\n  6. Conveying Non-Source Forms.\n\n  You may convey a covered work in object code form under the terms\nof sections 4 and 5, provided that you also convey the\nmachine-readable Corresponding Source under the terms of this License,\nin one of these ways:\n\n    a) Convey the object code in, or embodied in, a physical product\n    (including a physical distribution medium), accompanied by the\n    Corresponding Source fixed on a durable physical medium\n    customarily used for software interchange.\n\n    b) Convey the object code in, or embodied in, a physical product\n    (including a physical distribution medium), accompanied by a\n    written offer, valid for at least three years and valid for as\n    long as you offer spare parts or customer support for that product\n    model, to give anyone who possesses the object code either (1) a\n    copy of the Corresponding Source for all the software in the\n    product that is covered by this License, on a durable physical\n    medium customarily used for software interchange, for a price no\n    more than your reasonable cost of physically performing this\n    conveying of source, or (2) access to copy the\n    Corresponding Source from a network server at no charge.\n\n    c) Convey individual copies of the object code with a copy of the\n    written offer to provide the Corresponding Source.  This\n    alternative is allowed only occasionally and noncommercially, and\n    only if you received the object code with such an offer, in accord\n    with subsection 6b.\n\n    d) Convey the object code by offering access from a designated\n    place (gratis or for a charge), and offer equivalent access to the\n    Corresponding Source in the same way through the same place at no\n    further charge.  You need not require recipients to copy the\n    Corresponding Source along with the object code.  If the place to\n    copy the object code is a network server, the Corresponding Source\n    may be on a different server (operated by you or a third party)\n    that supports equivalent copying facilities, provided you maintain\n    clear directions next to the object code saying where to find the\n    Corresponding Source.  Regardless of what server hosts the\n    Corresponding Source, you remain obligated to ensure that it is\n    available for as long as needed to satisfy these requirements.\n\n    e) Convey the object code using peer-to-peer transmission, provided\n    you inform other peers where the object code and Corresponding\n    Source of the work are being offered to the general public at no\n    charge under subsection 6d.\n\n  A separable portion of the object code, whose source code is excluded\nfrom the Corresponding Source as a System Library, need not be\nincluded in conveying the object code work.\n\n  A \"User Product\" is either (1) a \"consumer product\", which means any\ntangible personal property which is normally used for personal, family,\nor household purposes, or (2) anything designed or sold for incorporation\ninto a dwelling.  In determining whether a product is a consumer product,\ndoubtful cases shall be resolved in favor of coverage.  For a particular\nproduct received by a particular user, \"normally used\" refers to a\ntypical or common use of that class of product, regardless of the status\nof the particular user or of the way in which the particular user\nactually uses, or expects or is expected to use, the product.  A product\nis a consumer product regardless of whether the product has substantial\ncommercial, industrial or non-consumer uses, unless such uses represent\nthe only significant mode of use of the product.\n\n  \"Installation Information\" for a User Product means any methods,\nprocedures, authorization keys, or other information required to install\nand execute modified versions of a covered work in that User Product from\na modified version of its Corresponding Source.  The information must\nsuffice to ensure that the continued functioning of the modified object\ncode is in no case prevented or interfered with solely because\nmodification has been made.\n\n  If you convey an object code work under this section in, or with, or\nspecifically for use in, a User Product, and the conveying occurs as\npart of a transaction in which the right of possession and use of the\nUser Product is transferred to the recipient in perpetuity or for a\nfixed term (regardless of how the transaction is characterized), the\nCorresponding Source conveyed under this section must be accompanied\nby the Installation Information.  But this requirement does not apply\nif neither you nor any third party retains the ability to install\nmodified object code on the User Product (for example, the work has\nbeen installed in ROM).\n\n  The requirement to provide Installation Information does not include a\nrequirement to continue to provide support service, warranty, or updates\nfor a work that has been modified or installed by the recipient, or for\nthe User Product in which it has been modified or installed.  Access to a\nnetwork may be denied when the modification itself materially and\nadversely affects the operation of the network or violates the rules and\nprotocols for communication across the network.\n\n  Corresponding Source conveyed, and Installation Information provided,\nin accord with this section must be in a format that is publicly\ndocumented (and with an implementation available to the public in\nsource code form), and must require no special password or key for\nunpacking, reading or copying.\n\n  7. Additional Terms.\n\n  \"Additional permissions\" are terms that supplement the terms of this\nLicense by making exceptions from one or more of its conditions.\nAdditional permissions that are applicable to the entire Program shall\nbe treated as though they were included in this License, to the extent\nthat they are valid under applicable law.  If additional permissions\napply only to part of the Program, that part may be used separately\nunder those permissions, but the entire Program remains governed by\nthis License without regard to the additional permissions.\n\n  When you convey a copy of a covered work, you may at your option\nremove any additional permissions from that copy, or from any part of\nit.  (Additional permissions may be written to require their own\nremoval in certain cases when you modify the work.)  You may place\nadditional permissions on material, added by you to a covered work,\nfor which you have or can give appropriate copyright permission.\n\n  Notwithstanding any other provision of this License, for material you\nadd to a covered work, you may (if authorized by the copyright holders of\nthat material) supplement the terms of this License with terms:\n\n    a) Disclaiming warranty or limiting liability differently from the\n    terms of sections 15 and 16 of this License; or\n\n    b) Requiring preservation of specified reasonable legal notices or\n    author attributions in that material or in the Appropriate Legal\n    Notices displayed by works containing it; or\n\n    c) Prohibiting misrepresentation of the origin of that material, or\n    requiring that modified versions of such material be marked in\n    reasonable ways as different from the original version; or\n\n    d) Limiting the use for publicity purposes of names of licensors or\n    authors of the material; or\n\n    e) Declining to grant rights under trademark law for use of some\n    trade names, trademarks, or service marks; or\n\n    f) Requiring indemnification of licensors and authors of that\n    material by anyone who conveys the material (or modified versions of\n    it) with contractual assumptions of liability to the recipient, for\n    any liability that these contractual assumptions directly impose on\n    those licensors and authors.\n\n  All other non-permissive additional terms are considered \"further\nrestrictions\" within the meaning of section 10.  If the Program as you\nreceived it, or any part of it, contains a notice stating that it is\ngoverned by this License along with a term that is a further\nrestriction, you may remove that term.  If a license document contains\na further restriction but permits relicensing or conveying under this\nLicense, you may add to a covered work material governed by the terms\nof that license document, provided that the further restriction does\nnot survive such relicensing or conveying.\n\n  If you add terms to a covered work in accord with this section, you\nmust place, in the relevant source files, a statement of the\nadditional terms that apply to those files, or a notice indicating\nwhere to find the applicable terms.\n\n  Additional terms, permissive or non-permissive, may be stated in the\nform of a separately written license, or stated as exceptions;\nthe above requirements apply either way.\n\n  8. Termination.\n\n  You may not propagate or modify a covered work except as expressly\nprovided under this License.  Any attempt otherwise to propagate or\nmodify it is void, and will automatically terminate your rights under\nthis License (including any patent licenses granted under the third\nparagraph of section 11).\n\n  However, if you cease all violation of this License, then your\nlicense from a particular copyright holder is reinstated (a)\nprovisionally, unless and until the copyright holder explicitly and\nfinally terminates your license, and (b) permanently, if the copyright\nholder fails to notify you of the violation by some reasonable means\nprior to 60 days after the cessation.\n\n  Moreover, your license from a particular copyright holder is\nreinstated permanently if the copyright holder notifies you of the\nviolation by some reasonable means, this is the first time you have\nreceived notice of violation of this License (for any work) from that\ncopyright holder, and you cure the violation prior to 30 days after\nyour receipt of the notice.\n\n  Termination of your rights under this section does not terminate the\nlicenses of parties who have received copies or rights from you under\nthis License.  If your rights have been terminated and not permanently\nreinstated, you do not qualify to receive new licenses for the same\nmaterial under section 10.\n\n  9. Acceptance Not Required for Having Copies.\n\n  You are not required to accept this License in order to receive or\nrun a copy of the Program.  Ancillary propagation of a covered work\noccurring solely as a consequence of using peer-to-peer transmission\nto receive a copy likewise does not require acceptance.  However,\nnothing other than this License grants you permission to propagate or\nmodify any covered work.  These actions infringe copyright if you do\nnot accept this License.  Therefore, by modifying or propagating a\ncovered work, you indicate your acceptance of this License to do so.\n\n  10. Automatic Licensing of Downstream Recipients.\n\n  Each time you convey a covered work, the recipient automatically\nreceives a license from the original licensors, to run, modify and\npropagate that work, subject to this License.  You are not responsible\nfor enforcing compliance by third parties with this License.\n\n  An \"entity transaction\" is a transaction transferring control of an\norganization, or substantially all assets of one, or subdividing an\norganization, or merging organizations.  If propagation of a covered\nwork results from an entity transaction, each party to that\ntransaction who receives a copy of the work also receives whatever\nlicenses to the work the party's predecessor in interest had or could\ngive under the previous paragraph, plus a right to possession of the\nCorresponding Source of the work from the predecessor in interest, if\nthe predecessor has it or can get it with reasonable efforts.\n\n  You may not impose any further restrictions on the exercise of the\nrights granted or affirmed under this License.  For example, you may\nnot impose a license fee, royalty, or other charge for exercise of\nrights granted under this License, and you may not initiate litigation\n(including a cross-claim or counterclaim in a lawsuit) alleging that\nany patent claim is infringed by making, using, selling, offering for\nsale, or importing the Program or any portion of it.\n\n  11. Patents.\n\n  A \"contributor\" is a copyright holder who authorizes use under this\nLicense of the Program or a work on which the Program is based.  The\nwork thus licensed is called the contributor's \"contributor version\".\n\n  A contributor's \"essential patent claims\" are all patent claims\nowned or controlled by the contributor, whether already acquired or\nhereafter acquired, that would be infringed by some manner, permitted\nby this License, of making, using, or selling its contributor version,\nbut do not include claims that would be infringed only as a\nconsequence of further modification of the contributor version.  For\npurposes of this definition, \"control\" includes the right to grant\npatent sublicenses in a manner consistent with the requirements of\nthis License.\n\n  Each contributor grants you a non-exclusive, worldwide, royalty-free\npatent license under the contributor's essential patent claims, to\nmake, use, sell, offer for sale, import and otherwise run, modify and\npropagate the contents of its contributor version.\n\n  In the following three paragraphs, a \"patent license\" is any express\nagreement or commitment, however denominated, not to enforce a patent\n(such as an express permission to practice a patent or covenant not to\nsue for patent infringement).  To \"grant\" such a patent license to a\nparty means to make such an agreement or commitment not to enforce a\npatent against the party.\n\n  If you convey a covered work, knowingly relying on a patent license,\nand the Corresponding Source of the work is not available for anyone\nto copy, free of charge and under the terms of this License, through a\npublicly available network server or other readily accessible means,\nthen you must either (1) cause the Corresponding Source to be so\navailable, or (2) arrange to deprive yourself of the benefit of the\npatent license for this particular work, or (3) arrange, in a manner\nconsistent with the requirements of this License, to extend the patent\nlicense to downstream recipients.  \"Knowingly relying\" means you have\nactual knowledge that, but for the patent license, your conveying the\ncovered work in a country, or your recipient's use of the covered work\nin a country, would infringe one or more identifiable patents in that\ncountry that you have reason to believe are valid.\n\n  If, pursuant to or in connection with a single transaction or\narrangement, you convey, or propagate by procuring conveyance of, a\ncovered work, and grant a patent license to some of the parties\nreceiving the covered work authorizing them to use, propagate, modify\nor convey a specific copy of the covered work, then the patent license\nyou grant is automatically extended to all recipients of the covered\nwork and works based on it.\n\n  A patent license is \"discriminatory\" if it does not include within\nthe scope of its coverage, prohibits the exercise of, or is\nconditioned on the non-exercise of one or more of the rights that are\nspecifically granted under this License.  You may not convey a covered\nwork if you are a party to an arrangement with a third party that is\nin the business of distributing software, under which you make payment\nto the third party based on the extent of your activity of conveying\nthe work, and under which the third party grants, to any of the\nparties who would receive the covered work from you, a discriminatory\npatent license (a) in connection with copies of the covered work\nconveyed by you (or copies made from those copies), or (b) primarily\nfor and in connection with specific products or compilations that\ncontain the covered work, unless you entered into that arrangement,\nor that patent license was granted, prior to 28 March 2007.\n\n  Nothing in this License shall be construed as excluding or limiting\nany implied license or other defenses to infringement that may\notherwise be available to you under applicable patent law.\n\n  12. No Surrender of Others' Freedom.\n\n  If conditions are imposed on you (whether by court order, agreement or\notherwise) that contradict the conditions of this License, they do not\nexcuse you from the conditions of this License.  If you cannot convey a\ncovered work so as to satisfy simultaneously your obligations under this\nLicense and any other pertinent obligations, then as a consequence you may\nnot convey it at all.  For example, if you agree to terms that obligate you\nto collect a royalty for further conveying from those to whom you convey\nthe Program, the only way you could satisfy both those terms and this\nLicense would be to refrain entirely from conveying the Program.\n\n  13. Use with the GNU Affero General Public License.\n\n  Notwithstanding any other provision of this License, you have\npermission to link or combine any covered work with a work licensed\nunder version 3 of the GNU Affero General Public License into a single\ncombined work, and to convey the resulting work.  The terms of this\nLicense will continue to apply to the part which is the covered work,\nbut the special requirements of the GNU Affero General Public License,\nsection 13, concerning interaction through a network will apply to the\ncombination as such.\n\n  14. Revised Versions of this License.\n\n  The Free Software Foundation may publish revised and/or new versions of\nthe GNU General Public License from time to time.  Such new versions will\nbe similar in spirit to the present version, but may differ in detail to\naddress new problems or concerns.\n\n  Each version is given a distinguishing version number.  If the\nProgram specifies that a certain numbered version of the GNU General\nPublic License \"or any later version\" applies to it, you have the\noption of following the terms and conditions either of that numbered\nversion or of any later version published by the Free Software\nFoundation.  If the Program does not specify a version number of the\nGNU General Public License, you may choose any version ever published\nby the Free Software Foundation.\n\n  If the Program specifies that a proxy can decide which future\nversions of the GNU General Public License can be used, that proxy's\npublic statement of acceptance of a version permanently authorizes you\nto choose that version for the Program.\n\n  Later license versions may give you additional or different\npermissions.  However, no additional obligations are imposed on any\nauthor or copyright holder as a result of your choosing to follow a\nlater version.\n\n  15. Disclaimer of Warranty.\n\n  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY\nAPPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT\nHOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM \"AS IS\" WITHOUT WARRANTY\nOF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,\nTHE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR\nPURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM\nIS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF\nALL NECESSARY SERVICING, REPAIR OR CORRECTION.\n\n  16. Limitation of Liability.\n\n  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING\nWILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS\nTHE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY\nGENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE\nUSE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF\nDATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD\nPARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),\nEVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF\nSUCH DAMAGES.\n\n  17. Interpretation of Sections 15 and 16.\n\n  If the disclaimer of warranty and limitation of liability provided\nabove cannot be given local legal effect according to their terms,\nreviewing courts shall apply local law that most closely approximates\nan absolute waiver of all civil liability in connection with the\nProgram, unless a warranty or assumption of liability accompanies a\ncopy of the Program in return for a fee.\n\n                     END OF TERMS AND CONDITIONS\n\n            How to Apply These Terms to Your New Programs\n\n  If you develop a new program, and you want it to be of the greatest\npossible use to the public, the best way to achieve this is to make it\nfree software which everyone can redistribute and change under these terms.\n\n  To do so, attach the following notices to the program.  It is safest\nto attach them to the start of each source file to most effectively\nstate the exclusion of warranty; and each file should have at least\nthe \"copyright\" line and a pointer to where the full notice is found.\n\n    <one line to give the program's name and a brief idea of what it does.>\n    Copyright (C) <year>  <name of author>\n\n    This program is free software: you can redistribute it and/or modify\n    it under the terms of the GNU General Public License as published by\n    the Free Software Foundation, either version 3 of the License, or\n    (at your option) any later version.\n\n    This program is distributed in the hope that it will be useful,\n    but WITHOUT ANY WARRANTY; without even the implied warranty of\n    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n    GNU General Public License for more details.\n\n    You should have received a copy of the GNU General Public License\n    along with this program.  If not, see <https://www.gnu.org/licenses/>.\n\nAlso add information on how to contact you by electronic and paper mail.\n\n  If the program does terminal interaction, make it output a short\nnotice like this when it starts in an interactive mode:\n\n    <program>  Copyright (C) <year>  <name of author>\n    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.\n    This is free software, and you are welcome to redistribute it\n    under certain conditions; type `show c' for details.\n\nThe hypothetical commands `show w' and `show c' should show the appropriate\nparts of the General Public License.  Of course, your program's commands\nmight be different; for a GUI interface, you would use an \"about box\".\n\n  You should also get your employer (if you work as a programmer) or school,\nif any, to sign a \"copyright disclaimer\" for the program, if necessary.\nFor more information on this, and how to apply and follow the GNU GPL, see\n<https://www.gnu.org/licenses/>.\n\n  The GNU General Public License does not permit incorporating your program\ninto proprietary programs.  If your program is a subroutine library, you\nmay consider it more useful to permit linking proprietary applications with\nthe library.  If this is what you want to do, use the GNU Lesser General\nPublic License instead of this License.  But first, please read\n<https://www.gnu.org/licenses/why-not-lgpl.html>.\n"
  },
  {
    "path": "README.md",
    "content": "# StochFuzz: A New Solution for Binary-only Fuzzing <a href=\"https://openai.com/product/dall-e-2\"><img src=\"imgs/logo.png\" alt=\"Logo\" align=\"right\" width=\"72\"/></a>\n\n[![test](https://github.com/ZhangZhuoSJTU/StochFuzz/actions/workflows/basic.yml/badge.svg)](https://github.com/ZhangZhuoSJTU/StochFuzz/actions/workflows/basic.yml)\n[![benchmark](https://github.com/ZhangZhuoSJTU/StochFuzz/actions/workflows/benchmark.yml/badge.svg)](https://github.com/ZhangZhuoSJTU/StochFuzz/actions/workflows/benchmark.yml)\n\n<p>\n<a href=\"https://github.com/ZhangZhuoSJTU/StochFuzz/blob/master/docs/stochfuzz.pdf\"> <img title=\"\" src=\"imgs/paper.png\" alt=\"loading-ag-167\" align=\"right\" width=\"220\"></a>\n\nStochFuzz is a (probabilistically) sound and cost-effective fuzzing technique for stripped binaries. It is facilitated by a novel incremental and stochastic rewriting technique that is particularly suitable for binary-only fuzzing. Any AFL-based fuzzer, which takes edge coverage (defined by [AFL](https://github.com/google/AFL)) as runtime feedback, can acquire benefits from StochFuzz to directly fuzz stripped binaries.\n</p>\n  \nMore data and the results of the experiments can be found [here](https://github.com/ZhangZhuoSJTU/StochFuzz-data). Example cases of leveraging StochFuzz to improve advanced AFL-based fuzzers ([AFL++](https://github.com/AFLplusplus/AFLplusplus) and [Polyglot](https://github.com/s3team/Polyglot)) can be found in [system.md](docs/system.md#how-to-make-stochfuzz-compatible-with-other-afl-based-fuzzers).\n\n## Clarifications\n\n+ We adopt a new system design than the one from the paper. Details can be found at [system.md](docs/system.md).\n+ In the paper, when we are talking about `e9patch`, we are actually talking about the binary-only fuzzing tool built upon e9patch, namely `e9tool`. Please refer to its [website](https://github.com/GJDuck/e9patch/blob/master/README.md#building) for more details.\n+ StochFuzz provides sound rewriting for binaries without inlined data, and probabilistically sound rewriting for the rest.\n\n\n\n## Building StochFuzz\n\nStochFuzz is built upon [Keystone](https://www.keystone-engine.org/), [Capstone](https://www.capstone-engine.org/), [GLib](https://developer.gnome.org/glib/), and [libunwind](https://www.nongnu.org/libunwind/).\n\nMost of these depenences require `meson >= 0.60.1`. Please use `meson --version` to check you are using the up-to-date version.\n\n[Build.sh](https://github.com/ZhangZhuoSJTU/StochFuzz/blob/master/build.sh) helps build all the dependences automatically. For clean containers, make sure some standard tools like `autoreconf` and `libtool` are installed.\n \n```bash\n$ git clone https://github.com/ZhangZhuoSJTU/StochFuzz.git\n$ cd StochFuzz\n$ ./build.sh\n```\n\nStochFuzz itself can be built by GNU Make.\n\n```bash\n$ cd src\n$ make release\n```\n\nWe have tested StochFuzz on Ubuntu 18.04. If you have any issue when running StochFuzz on other systems, please kindly let us [know](https://github.com/ZhangZhuoSJTU/StochFuzz/issues/new).\n\n## How to Use\n\nStochFuzz provides multiple rewriting options, which follows the AFL's style of passing arguments.\n\n```\n$ ./stoch-fuzz -h\nstoch-fuzz 1.0.0 by <zhan3299@purdue.edu>\n\n./stoch-fuzz [ options ] -- target_binary [ ... ]\n\nMode settings:\n\n  -S            - start a background daemon and wait for a fuzzer to attach (defualt mode)\n  -R            - dry run target_binary with given arguments without an attached fuzzer\n  -P            - patch target_binary without incremental rewriting\n  -D            - probabilistic disassembly without rewriting\n  -V            - show currently observed breakpoints\n\nRewriting settings:\n\n  -g            - trace previous PC\n  -c            - count the number of basic blocks with conflicting hash values\n  -d            - disable instrumentation optimization\n  -r            - assume the return addresses are only used by RET instructions\n  -e            - install the fork server at the entrypoint instead of the main function\n  -f            - forcedly assume there is data interleaving with code\n  -i            - ignore the call-fallthrough edges to defense RET-misusing obfuscation\n\nOther stuff:\n\n  -h            - print this help\n  -x execs      - set the number of executions after which a checking run will be triggered\n                  set it as zero to disable checking runs (default: 200000)\n  -t msec       - set the timeout for each daemon-triggering execution\n                  set it as zero to ignore the timeout (default: 2000 ms)\n  -l level      - set the log level, including INFO, WARN, ERROR, and FATAL (default: INFO)\n\n```\n\n\n### Basic Usage\n\n```diff\n- It is worth first trying the advanced strategy (see below) because that is much more cost-effective.\n```\n\nTo fuzz a stripped binary, namely `example.out`, we need to `cd` to the directory of the target binary. For example, if the full path of `example.out` is `/root/example.out`, we need to first `cd /root/`. Furthermore, _it is dangerous to run two StochFuzz instances under the same directory._ These restrictions are caused by some design faults and we will try to relax them in the future. \n\nAssuming StochFuzz is located at `/root/StochFuzz/src/stoch-fuzz`, execute the following command to start rewriting the target binary.\n\n```bash\n$ cd /root/\n$ /root/StochFuzz/src/stoch-fuzz -- example.out # do not use ./example.out here\n```\n\nAfter the initial rewriting, we will get a phantom file named `example.out.phantom`. This phantom file can be directly fuzzed by AFL or any AFL-based fuzzer. Note that the StochFuzz process would not stop during fuzzing, so please make sure the process is alive during fuzzing.\n\nHere is a demo that shows how StochFuzz works.\n\n[![asciicast](https://asciinema.org/a/415987.svg)](https://asciinema.org/a/415987)\n\n### Advanced Usage\n\nCompared with the compiler-based instrumentation (e.g., afl-clang-fast), StochFuzz has additional runtime overhead because it needs to emulate each _CALL_ instruction to support stack unwinding.\n\nInspired by a recent [work](https://dl.acm.org/doi/abs/10.1145/3445814.3446765), we provide an advanced rewriting strategy where we do not emulate _CALL_ instructions but wrap the `_ULx86_64_step` function from [libunwind](https://github.com/libunwind/libunwind) to support stack unwinding. This strategy works for most binaries but may fail in some cases like fuzzing statically linked binaries.\n\nTo enable such strategy, simply provide a __-r__ option to StochFuzz.\n\n```bash\n$ cd /root/\n$ /root/StochFuzz/src/stoch-fuzz -r -- example.out # do not use ./example.out here\n```\n\nAddtionally, before fuzzing, we need to prepare the `AFL_PRELOAD` environment variable for AFL.\n\n```bash\n$ export STOCHFUZZ_PRELOAD=$(/root/StochFuzz/scritps/stochfuzz_env.sh)\n$ AFL_PRELOAD=$STOCHFUZZ_PRELOAD afl-fuzz -i seeds -o output -t 2000 -- example.out.phantom @@\n```\n\nFollowing demo shows how to apply this advanced strategy.\n\n[![asciicast](https://asciinema.org/a/416230.svg)](https://asciinema.org/a/416230)\n\n## Troubleshootings\n\nCommon issues can be referred to [trouble.md](docs/trouble.md). If it cannot help solve your problem, please kindly open a Github issue.\n\nBesides, we provide some tips on using StochFuzz, which can be found at [tips.md](docs/tips.md)\n\n## Development\n\nCurrently, we have many todo items. We present them in [todo.md](docs/todo.md#todo-list). \n\nWe also present many pending decisions which we are hesitating to take, in [todo.md](docs/todo.md#challenges). __If you have any thought/suggestion, do not hesitate to let us know.__ It would be very appreciated if you can help us improve StochFuzz.\n\nStochFuzz should be considered an alpha-quality software and it is likely to contain bugs. \n\nI will try my best to maintain StochFuzz timely, but sometimes it may take me more time to respond. Thanks for your understanding in advance.\n\n## Cite\n\nZhang, Zhuo, et al. \"STOCHFUZZ: Sound and Cost-effective Fuzzing of Stripped Binaries by Incremental and Stochastic Rewriting.\" 2021 IEEE Symposium on Security and Privacy (SP). IEEE, 2021.\n\n## References\n\n+ Duck, Gregory J., Xiang Gao, and Abhik Roychoudhury. \"Binary rewriting without control flow recovery.\" Proceedings of the 41st ACM SIGPLAN Conference on Programming Language Design and Implementation. 2020.\n+ Meng, Xiaozhu, and Weijie Liu. \"Incremental CFG patching for binary rewriting.\" Proceedings of the 26th ACM International Conference on Architectural Support for Programming Languages and Operating Systems. 2021.\n+ Aschermann, Cornelius, et al. \"Ijon: Exploring deep state spaces via fuzzing.\" 2020 IEEE Symposium on Security and Privacy (SP). IEEE, 2020.\n+ Google. “Google/AFL.” GitHub, github.com/google/AFL. \n"
  },
  {
    "path": "benchmark/json-2017-02-12.seed",
    "content": "10000000010E5\n"
  },
  {
    "path": "benchmark/llvm-libcxxabi-2017-01-27.seed",
    "content": "ZUlSaIJT_2_EET_E5\n"
  },
  {
    "path": "benchmark/pcre2-10.00.seed",
    "content": "# This is a specialized test for checking, when PCRE2 is compiled with the\n# EBCDIC option but in an ASCII environment, that newline and white space\n# functionality is working. It catches cases where explicit values such as 0x0a\n# have been used instead of names like CHAR_LF. Needless to say, it is not a\n# genuine EBCDIC test! In patterns, alphabetic characters that follow a\n# backslash must be in EBCDIC code. In data, NL, NEL, LF, ESC, and DEL must be\n# in EBCDIC, but can of course be specified as escapes.\n\n# Test default newline and variations\n\n/^A/m\n    ABC\n    12\\x15ABC\n\n/^A/m,newline=any\n    12\\x15ABC\n    12\\x0dABC\n    12\\x0d\\x15ABC\n    12\\x25ABC\n\n/^A/m,newline=anycrlf\n    12\\x15ABC\n    12\\x0dABC\n    12\\x0d\\x15ABC\n    ** Fail\n    12\\x25ABC\n\n# Test \\h\n\n/^A\\/\n    A B\n\n# Test \\H\n\n/^A\\/\n    AB\n    ** Fail\n    A B\n\n# Test \\R\n\n/^A\\/\n    A\\x15B\n    A\\x0dB\n    A\\x25B\n    A\\x0bB\n    A\\x0cB\n    ** Fail\n    A B\n\n# Test \\v\n\n/^A\\/\n    A\\x15B\n    A\\x0dB\n    A\\x25B\n    A\\x0bB\n    A\\x0cB\n    ** Fail\n    A B\n\n# Test \\V\n\n/^A\\/\n    A B\n    ** Fail\n    A\\x15B\n    A\\x0dB\n    A\\x25B\n    A\\x0bB\n    A\\x0cB\n    \n# For repeated items, use an atomic group so that the output is the same\n# for DFA matching (otherwise it may show multiple matches).\n\n# Test \\h+\n\n/^A(?>\\+)/\n    A B\n\n# Test \\H+\n\n/^A(?>\\+)/\n    AB\n    ** Fail\n    A B\n\n# Test \\R+\n\n/^A(?>\\+)/\n    A\\x15B\n    A\\x0dB\n    A\\x25B\n    A\\x0bB\n    A\\x0cB\n    ** Fail\n    A B\n\n# Test \\v+\n\n/^A(?>\\+)/\n    A\\x15B\n    A\\x0dB\n    A\\x25B\n    A\\x0bB\n    A\\x0cB\n    ** Fail\n    A B\n\n# Test \\V+\n\n/^A(?>\\+)/\n    A B\n    ** Fail\n    A\\x15B\n    A\\x0dB\n    A\\x25B\n    A\\x0bB\n    A\\x0cB\n\n# End\n"
  },
  {
    "path": "benchmark/runtime/server.key",
    "content": "-----BEGIN PRIVATE KEY-----\nMIIBVAIBADANBgkqhkiG9w0BAQEFAASCAT4wggE6AgEAAkEA1AdZNDVOA9cXm97f\nerp1bukz2kohjToJS6Ma8fOb36VV9lQGmDNsJanXFiqafOgV+kh1HXqZ3l1I0JmZ\n71b+QQIDAQABAkAHGfPn5r0lLcgRpWZQwvv56f+dmQwEoeP7z4uwfNtEo0JcRD66\n1WRCvx3LE0VbNeaEdNmSPiRXhlwIggjfrBi9AiEA9UusPBcEp/QcPGs96nQQdQzE\nfw4x0HL/eSV3qHimT6MCIQDdSAiX4Ouxoiwn/9KhDMcZXRYX/OPzj6w8u1YIH7BI\nywIgSozbJdAhHCJ2ym4VfUIVFl3xAmSAA0hQGLOocE1qzl0CIQDRicOxZmhqBiKA\nIgznOn1StEYWov+MhRFZVSBLgw5gbwIgJzOlSlu0Y22hEUsLCKyHBrCAZZHcZ020\n20pfogmQYn0=\n-----END PRIVATE KEY-----\n"
  },
  {
    "path": "benchmark/runtime/server.pem",
    "content": "-----BEGIN CERTIFICATE-----\nMIIBYTCCAQugAwIBAgIJAMPQQtUHkx+KMA0GCSqGSIb3DQEBCwUAMAwxCjAIBgNV\nBAMMAWEwHhcNMTYwOTI0MjIyMDUyWhcNNDQwMjA5MjIyMDUyWjAMMQowCAYDVQQD\nDAFhMFwwDQYJKoZIhvcNAQEBBQADSwAwSAJBANQHWTQ1TgPXF5ve33q6dW7pM9pK\nIY06CUujGvHzm9+lVfZUBpgzbCWp1xYqmnzoFfpIdR16md5dSNCZme9W/kECAwEA\nAaNQME4wHQYDVR0OBBYEFCXtEo9rkLuKGSlm0mFE4Yk/HDJVMB8GA1UdIwQYMBaA\nFCXtEo9rkLuKGSlm0mFE4Yk/HDJVMAwGA1UdEwQFMAMBAf8wDQYJKoZIhvcNAQEL\nBQADQQCnldOnbdNJZxBO/J+979Urg8qDp8MnlN0979AmK1P5/YzPnAF4BU7QTOTE\nimS5qZ0MvziBa81nVlnnFRkIezcD\n-----END CERTIFICATE-----\n"
  },
  {
    "path": "benchmark/sqlite-2016-11-14.seed",
    "content": " SELECT 888<8888888 | daDROPme(1,1 \n,11,1 \n,1)| 388<8888888 | datetime(1,1 ,11,1 \n,1)| 388<8888888 | datetime(1,1 \n,1)|8 | datetime(1,1 \n,1)| ( SELECT 887<7777777777777888888<88,1 \n,1)|8 | datetime(1,1 \n,1)| ( SELECT 887<77774777777777888888<8888888 \n,11,1 \n,1)| 194<8888888 | dattime(1,1 \n,1)|8 | datetime(88\n,1)|8 | datetime(11,1 \n,1)| ( SELECT 8878888 | datetime(1,1 \n,1)|8 | datetime(1,1 \n,1)| 388<8888888 | datetime(1,1 \n,1,1 \n,1)|8 | ANALYZEe(1,1 \n,1)| ( SELECT 887<$$$$$$$02e777777 || 888<(7777777777777<888888\n,1)|8 | datetime(1,1 \n,1)| ( SELECT 887<777377777 | datetime(1,1 \n,1,1 \n,1)|8 | datetime| 388<8888888 | datetime(1,1 \n,1)|8 | datetime(1,1 \n,1)| ( SELECT 887<7777777777777888888<88,1 \n,1)|8 | datetime(1,1 \n,1)| ( SELECT 887<77774777777777888888<8888888 \n,11,1 \n,1)| 388<8888888 | datetime(1,1 \n,1)|8 | datetime(88\n,1)|8 | datetime(1,1 \n,1)| ( SELECT 8878888 | datetime(1,1 \n,1)|8 | datetime(1,1 \n,1)| 388<8888888 | datetime(1,1 \n,1)|8 | datetime(1,1 \n,1)| ( (E1 \n,1 \n,1)| 388<8888888 | d$$BETWEE,1)|8 | datetime(1,1 \n,1)| ( SELECT 887<777777,1)|8 | datetime(1,1 \n,1)| ( SELECT 887<$$$$$$$02e777777 ||888 | datetimf(1,1 \n,1)|8 | datetime(1,1 \n,1)| 388<888888888888777888888<88,1 \n,1)|8 | datetime(1,1 \n,1)| ( SELECT 887<77774777777777888888<8888888 \n,11,1 \n,1)| 388<8888888 | datetime(1,1 \n,1)|8 | datet(1,1 \n,1)|8 | datetime(1,1 \n,1)| ( SELECT 887<77888888\n,1)|8 |etime(1,1 \n,1)| 388<8888888 | datetime(1,188<8888888 | datetime(1,1| datetime(1,1 \n,1)| ( SELECT  y  in\n(1,1 \n,1)| ( SELECT 81 \n,1)|8 | date|etime(1,1 \n,1)| 388<8888888 | datetime(1,188<8888888 | datetime(1,1 \n,1)|8 | datetime(1,1 \n,1)| ( SELECT  y  in\nm e_crash$$\n,1)| 38\n,1)| 388<8888888 | datetime(1,1 \n,1)|8 | datetimetetime(1,1 \n,1)|8 | datetime(1,1 \n,1)| ( SELECT  y  in\nm e_crash$$\n,1)| 388<8888888 | datetime(1,1 \n,1)|8 | datetime(1,1 \n,1)| ( SELECT  y  in\n(1,1 \n,1)| ( SELECT 81 \n,1)|8 | datetime(1,1 \n,1)| ( SELECT  y  in\nm e_crash$$\n,e(1,1 \n,1)| ( SELECT  y  in\nm e_crash$$\n,1)| 388<8888888 | datetime(1,1 \n,1)|8 | datetime(1,1 \n,1)| ( SELECT  y  in\n(1,1 \n,1)| ( SELECT 81 \n,1)|8 | datet)| 388<8888888 | datetime(1,1 \n,1,0 \n,1)|8 | ANALYZEe(1,1 \n,1)| ( SELECT 887<$$$$$$$02e777777 || 888<7777777777777<888888\n,1)|8 | datetime(1,1 \n,1)| ( SELECT 887<777377777 | datetime(1,1 \n,1,1 \n,11 \n,1)| ( SELECT87<77888888\n,1)|8 |etime(1,1  datetime(1,1 \n,1)| ( SELECT  y  in\nm e_crash$$\n,1)| 388<88888\n,1)| 388<8888888 | datetime(1,1 \n,1)|8 | datetime(1,1 \n,1)| ( SELECT 887<77888888\n,1)|8 |etime(1,1 \n,1)| 388<8888888 | datetime(1,188<8888888 | datetime(1,1 \n,1)|8 | datetime(1,1 \n,1)| ( SELECT  y  in\nm e_crash$$\n,1)| 38\n,1)| 388<8888888 | datetime(1,1 \n,1)|8 | datetime(1,reload1 \n,1)| ( SELECT  y  in\nm (1)  SELECT 8 87<577777"
  },
  {
    "path": "benchmark/wpantund-2018-02-27.seed",
    "content": "0ConfiG:NCP:SocketPath \"/dev/null\"\nConfig:NCP:SocketPath \"/dev/null\"\n"
  },
  {
    "path": "build.sh",
    "content": "#!/bin/bash\n\nRED=\"\\033[31m\"\nGREEN=\"\\033[32m\"\nYELLOW=\"\\033[33m\"\nBOLD=\"\\033[1m\"\nOFF=\"\\033[0m\"\n\nCAPSTONE_VERSION=\"4.0.2\"\nKEYSTONE_VERSION=\"0.9.2\"\nGLIB_VERSION=\"2.72.0\"\nLIBUNWIND_VERSION=\"1.5\"\n\nset -e\n\n#\n# check necessary command\n#\n\ncheck_command () {\n    for cmd in $@\n    do\n        if [ ! -x \"$(command -v $cmd)\" ]; then\n            echo -e \"${RED}Error${OFF}: $cmd is not installed.\" >&2\n            exit 1\n        fi\n    done\n}\n\ncheck_command \"wget\" \"unzip\" \"make\" \"cmake\" \"meson\" \"ninja\" \"pkg-config\" \"clang\" \"python3\"\n\n#\n# check clang version (>= 6.0.0)\n#\n\nCLANG_VERSION=$(clang --version | head -n 1 | grep -o -E \"[[:digit:]]+\\.[[:digit:]]+\\.[[:digit:]]+\" | uniq | sort)\nCLANG_MAJOR_VERSION=$(echo $CLANG_VERSION | awk -F '.' '{ print $1 }')\nif [[ $CLANG_VERSION < \"6.0.0\" && ${#CLANG_MAJOR_VERSION} = \"1\" ]]; then\n    echo \"clang-6.0 or a newer version is required\"\n    exit 1\nfi\n\n\n#\n# build capstone\n#\n\nCAPSTONE_URL=\"https://github.com/aquynh/capstone/archive/$CAPSTONE_VERSION.zip\"\n\nif [ ! -d capstone ]\nthen\n    if [ ! -f capstone.zip ]\n    then\n        echo -e \"${GREEN}$0${OFF}: downloading capstone.zip...\"\n        wget -O capstone.zip $CAPSTONE_URL\n    fi\n\n    echo -e \"${GREEN}$0${OFF}: extracting capstone.zip...\"\n    unzip capstone.zip\n    mv capstone-$CAPSTONE_VERSION capstone\n\n    echo -e \"${GREEN}$0${OFF}: building capstone.zip...\"\n    cd capstone\n    CAPSTONE_DIET=no CAPSTONE_X86_REDUCE=no CAPSTONE_ARCHS=\"x86\" ./make.sh\n    cd ..\nfi\n\n\n#\n# build keystone\n#\n\nKEYSTONE_URL=\"https://github.com/keystone-engine/keystone/archive/$KEYSTONE_VERSION.zip\"\n\nif [ ! -d keystone ]\nthen\n    if [ ! -f keystone.zip ]\n    then\n        echo -e \"${GREEN}$0${OFF}: downloading keystone.zip...\"\n        wget -O keystone.zip $KEYSTONE_URL\n    fi\n\n    echo -e \"${GREEN}$0${OFF}: extracting keystone.zip...\"\n    unzip keystone.zip\n    mv keystone-$KEYSTONE_VERSION keystone\n\n    echo -e \"${GREEN}$0${OFF}: building keystone.zip...\"\n    cd keystone\n    if [ -d build ]\n    then\n        rm -rf build\n    fi\n    mkdir build\n    cd build\n    cmake -DBUILD_LIBS_ONLY=1 -DLLVM_BUILD_32_BITS=0 -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF -DLLVM_TARGETS_TO_BUILD=\"AArch64;X86\" -G \"Unix Makefiles\" ..\n    make -j8\n    cd ../..\nfi\n\n\n#\n# build glib\n#\n\nGLIB_URL=\"https://github.com/GNOME/glib/archive/$GLIB_VERSION.zip\"\n\nif [ ! -d glib ]\nthen\n    if [ ! -f glib.zip ]\n    then\n        echo -e \"${GREEN}$0${OFF}: downloading glib.zip...\"\n        wget -O glib.zip $GLIB_URL\n    fi\n\n    echo -e \"${GREEN}$0${OFF}: extracting glib.zip...\"\n    unzip glib.zip\n    mv glib-$GLIB_VERSION glib\n\n    echo -e \"${GREEN}$0${OFF}: building glib.zip...\"\n    cd glib\n    meson _build --buildtype=release --default-library=static --prefix=$(realpath .)\n    ninja -C _build\n    ninja -C _build install\n    cd ..\nfi\n\n#\n# build libunwind\n#\n\nLIBUNWIND_URL=\"https://github.com/libunwind/libunwind/archive/v$LIBUNWIND_VERSION.zip\"\n\nif [ ! -d libunwind ]\nthen\n    if [ ! -f libunwind.zip ]\n    then\n        echo -e \"${GREEN}$0${OFF}: downloading libunwind.zip...\"\n        wget -O libunwind.zip $LIBUNWIND_URL\n    fi\n\n    echo -e \"${GREEN}$0${OFF}: extracting libunwind.zip...\"\n    unzip libunwind.zip\n    mv libunwind-$LIBUNWIND_VERSION libunwind\n\n    echo -e \"${GREEN}$0${OFF}: building libunwind.zip...\"\n    cd libunwind\n    mkdir install\n    ./autogen.sh\n    ./configure --prefix=`pwd`/install --enable-cxx-exceptions\n    make install -j8\n    cd ..\nfi\n\n#\n# build src\n#\n\n# cd src\n# make release\n"
  },
  {
    "path": "clean.sh",
    "content": "#!/bin/bash\n\nif [ -d capstone ]\nthen\n    rm -rf capstone\nfi\n\nif [ -d keystone ]\nthen\n    rm -rf keystone\nfi\n\nif [ -d glib ]\nthen\n    rm -rf glib\nfi\n\nif [ -d libunwind ]\nthen\n    rm -rf libunwind\nfi\n\nrm -rf *.zip\n\ncd src && make clean\n"
  },
  {
    "path": "docs/system.md",
    "content": "# New System Design\n\nOriginally, StochFuzz was integrated into AFL, which made an easy development. However, as more and more advanced fuzzing tools (e.g., [Polyglot](https://github.com/s3team/Polyglot)) take their own implementations (e.g., developing a new variant of AFL), it becomes hard to combine StochFuzz with these tools. \n\nAs such, we decide to separate StochFuzz and AFL. With this new system design, any AFL-based fuzzer can directly fuzz the phantom binary generated by StochFuzz.\n\nHowever, it also brings some new challenges. \n\nFirst of all, it is easy for the old design to keep multiple versions of rewritten binaries at the same time (by modifying AFL to set up multiple fork servers), but not for the new one. Currently, StochFuzz is independent of AFL, which means it can only keep a single binary at a time. Hence, to detect those erroneous rewriting errors only changing execution paths but not triggering crashes, we introduce a new technique named __checking executions__. These checking executions are triggered periodically and check the coverage consistency w/ and w/o uncertain patchings. After each checking execution, the rewritten binary will get changed. `-x` option is for checking runnings.\n\nBesides, we need to manually set the timeout for StochFuzz, which should be consistent with the one of AFL. `-t` option is for the timeout.\n\nA good observation is that the edge coverage is at the block level, which means we do not need to trap all instructions but one instruction per block. This observation helps us avoid many rewriting errors.\n\n## How to make StochFuzz compatible with other AFL-based fuzzers\n\nOne of the most common practices of variants of [AFL](https://github.com/google/AFL) is to extend the size of the shared memory. For example, [AFL++](https://github.com/AFLplusplus/AFLplusplus) extends the size to [8388608](https://github.com/AFLplusplus/AFLplusplus/blob/48c878a76ddec2c133fd5708b185b2ac27740084/include/config.h#L44) bytes (`1 << 23`). To make StochFuzz compatible with such AFL variants, we need to do some slight modifications.\n\nSpecifically, we need to modify two macros defined in [afl_config.h](https://github.com/ZhangZhuoSJTU/StochFuzz/blob/master/src/afl_config.h), [AFL_MAP_SIZE_POW2](https://github.com/ZhangZhuoSJTU/StochFuzz/blob/9fe1500791729e267894e44faa935757e13124e6/src/afl_config.h#L37) and [AFL_MAP_ADDR](https://github.com/ZhangZhuoSJTU/StochFuzz/blob/9fe1500791729e267894e44faa935757e13124e6/src/afl_config.h#L39). \n\n__AFL_MAP_SIZE_POW2__ is the logarithm of the size to the base 2. For example, to support AFL++, AFL_MAP_SIZE_POW2 should be set to 23: <img src=\"https://render.githubusercontent.com/render/math?math=log_2 8388608 = 23\">.\n\n__AFL_MAP_ADDR__ is the address of the shared memory in the subject binary. Based on our testing, 0x3000000 would be a safe address.\n\nIn short, taking AFL++ as an example, following modifications are sufficient.\n\n```c\n#define AFL_MAP_SIZE_POW2 23\n#define AFL_MAP_ADDR 0x3000000\n```\n\n## Case: Polyglot\n\n[Polyglot](https://github.com/s3team/Polyglot) is a state-of-the-art language fuzzer that focuses on testing compilers and language interpreters. Since many programming languages are bootstrapping, which means their language processors are written in themselves, it is difficult or time-consuming to instrument these processors (e.g., __GCC__). The developers of Polyglot originally used AFL-QEMU mode to test such processors. \n\nIn this case study, we try to combine Polyglot with StochFuzz to provide a more efficient fuzzing test for GCC. \n\nBefore starting our experiments, we need to make a slight change on StochFuzz. Since the developers of Polyglot extend the [size of AFL shared memory](https://github.com/s3team/Polyglot/blob/a49f67ffb95684ae2227800a85eb7963eeb2692d/AFL_replace_mutate/config.h#L323), we need to update it in StochFuzz accordingly.\n\nSpecifically, we need first to change [AFL_MAP_SIZE_POW2](https://github.com/ZhangZhuoSJTU/StochFuzz/blob/f90db25c300e79b9dd37748da883cb9d66a8253f/src/afl_config.h#L37) to 20. To avoid the conflicts of mmap, we additionally need to change [AFL_MAP_ADDR](https://github.com/ZhangZhuoSJTU/StochFuzz/blob/f90db25c300e79b9dd37748da883cb9d66a8253f/src/afl_config.h#L39). I set it as 0x180000 in this case. The new [afl_config.h](https://github.com/ZhangZhuoSJTU/StochFuzz/blob/f90db25c300e79b9dd37748da883cb9d66a8253f/src/afl_config.h) would look like:\n\n```c\n...\n#define AFL_FORKSRV_FD 198\n#define AFL_SHM_ENV \"__AFL_SHM_ID\"\n#define AFL_MAP_SIZE_POW2 20 \n#define AFL_MAP_SIZE (1 << AFL_MAP_SIZE_POW2)\n#define AFL_MAP_ADDR 0x180000\n#define AFL_PREV_ID_PTR (RW_PAGE_ADDR + 0x8)\n#define AFL_MAP_SIZE_MASK ((1 << AFL_MAP_SIZE_POW2) - 1)\n...\n```\n\nWe apply the [advanced strategy](https://github.com/ZhangZhuoSJTU/StochFuzz#advanced-usage) of StochFuzz and run two GCC fuzzing instances, including `Polyglot + StochFuzz` and `Polyglot + AFL-QEMU`, for 24 hours.\n\n### Screenshot\n\n![screenshots](../imgs/polyglot_screenshot.png)\n\nIn the above screenshot, the top left and bottom left panels show the progresses of `Polyglot + StochFuzz` and `Polyglot + AFL-QEMU`, respectively. The right panel shows some logging information of StochFuzz.\n\nIn short, when running for around 20 minutes, StochFuzz can achieve 70 executions per second while AFL-QEMU only 4 executions per seconds. In the meantime, StochFuzz found 6520 paths, two times more than AFL-QEMU did (2169 paths). Our register liveness analysis helps StochFuzz avoid _98.4%_ saving/restoring for FLAGS register and _81.2%_ for general purpose registers.\n\n### Results\n\n<p float=\"center\">\n  <img title=\"\" src=\"../imgs/polyglot_path.png\" height=\"280\">\n  <img title=\"\" src=\"../imgs/polyglot_eps.png\" height=\"280\">\n</p>\n\nThe left figure presents how many paths each tool found over time. The blue solid line is for StochFuzz and the orange dotted line is for AFL-QEMU. We can see at the end, StochFuzz found much more paths than AFL-QEMU.\n\nThe right figure presents the distribution of EPS (executions per second) of two tools, the blue one for StochFuzz and the orange one for AFL-QEMU. The average EPS of StochFuzz is around 60 executions per second, nearly 7 times faster than AFL-QEMU.\n"
  },
  {
    "path": "docs/tips.md",
    "content": "# Tips\n\nTo enable a more effective and efficient fuzzing, we provide several tips about better using StochFuzz. \n\n## Advanced Strategy\n\nAs mentioned in [README.md](../README.md#advanced-usage), we strongly recommend every user first tries the advanced strategy.\n\nStochFuzz tries to provide a conservative rewriting. As such, it emulates all the _CALL_ instructions to maintain an unchanged data flow. \n\nHowever, in most cases, the return addresses pushed by _CALL_ instructions are only used by _RET_ instructions and the stack unwinding. Based on this observation, we provide an advanced rewriting strategy that hooks the process of stack unwinding and hence does not need to emulate _CALL_ instructions. This strategy is quite efficient and can reduce around 80% overhead of StochFuzz.\n\nThe advanced strategy can be applied to most binaries but will cause rewriting errors on some including:\n\n+ statically-linked binaries that do online stack unwinding\n+ some CFI-protected binaries\n+ some go-written binary\n+ ...\n\nHow to adopt the advanced rewriting strategy can be found in [README.md](../README.md#advanced-usage).\n\n## Timeout\n\nStochFuzz needs to specify a timeout for any execution caused by the increment rewriting. The timeout is configured by the `-t` option.\n\n```\n  -t msec       - set the timeout for each daemon-triggering execution\n                  set it as zero to ignore the timeout (default: 2000 ms)\n```\n\nAFL, or any attached AFL-based fuzzer, needs to specify a timeout either. We recommend that the two timeouts should be set consistently, but it is not mandatory. \n\nHowever, for the binaries with inlined data, the timeout set for the attached fuzzer should __BE LARGER THAN 1000MS__. Otherwise, the auto-scaling feature of AFL timeout will cause incorrect error diagnosis during the stochastic rewriting. \n\n## Checking Executions\n\nAs we mentioned in [system.md](system.md), we adopt a new system design to have a wide application in the fields of binary-only fuzzing. This new architecture design is enabled by the observation that we only need to instrument an instruction per basic block to collect the code coverage of AFL and is facilitated by a new technique named checking executions. \n\nTechnically speaking, checking executions are triggered periodically and to check whether the collected coverages are consistent with and without uncertain patches. \n\nThe `-x` option is provided for configuring the checking executions, setting the number of executions after which a checking execution will be triggered.  \n\n```\n  -x execs      - set the number of executions after which a checking run will be triggered\n                  set it as zero to disable checking runs (default: 200000)\n```\n\nFor example, if we provide `-x 1000`, it means a checking execution will be triggered every 1000 AFL executions. \n\nThe period of checking executions affects the fuzzing effectiveness. Intuitively, the more checking executions we have, the more overhead they cause. The overhead caused by checking executions is <img src=\"https://render.githubusercontent.com/render/math?math=\\large \\frac{1}{M}\">\n, where a checking execution is triggered every _M_ normal executions.\n\nThe period of checking executions also affects the probabilistic soundness we provided. Let's additionally assume that the probability of a given rewriting error changing the execution path is _p_. Then, we will have that, the probability _P_ that a rewriting error cannot be detected after the total _N_ executions is <img src=\"https://render.githubusercontent.com/render/math?math=\\large (1 - p)^\\frac{N}{M}\">.\n\nIn a nutshell, the larger number we set, the fewer check executions we will take. In other words, the larger number we set, the more cost-effective but the less probabilistically sound the fuzzing is.\n\nFor example, if _M = 1000_, _p = 1e-4_, _N = 1e8_. The overhead caused by checking executions is _0.1%_ and the probability of have an undetected rewriting error is _4.5e-5_.\n\nThe user may need to provide a suitable number. __A NUMBER LARGER THAN 200 IS RECOMMENDED__. \n\nNote that this option is useful only when the inlined data is presented. To eliminate the overhead caused by checking executions, we additionally plan to set up two different fuzzing instances like what [QSYM](https://github.com/sslab-gatech/qsym) does, where one is for fuzzing and the other is for checking executions.\n"
  },
  {
    "path": "docs/todo.md",
    "content": "# Development Plan\n\n## Todo List\n\nWhile we have successfully migrated StochFuzz to a new system design, we can still improve StochFuzz from multiple places.\n\n+ [x] __NEW SYSTEM DESIGN__ (daemon), which separates AFL and StochFuzz and makes advanced fuzzing possible.\n+ [x] In release version, remove unnecessary z\\_log (e.g., z\\_debug, z\\_trace, and etc.).\n+ [x] Support probabilisitic disassembly.\n+ [x] Mark .text section non-writable.\n+ [x] Support C++ exceptions (via pushing the original ret\\_addr onto the stack).\n+ [x] When a CP\\_RETADDR is found, support updating other CP\\_RETADDR from the same callee.\n+ [x] Use-def analysis on EFLAG register to avoid unnecessary context switching.\n+ [x] Support pre-disassembly (linear disassembly) -- IT SEEMS NOT A GOOD IDEA.\n+ [x] Support `jrcxz` and `jecxz` instructions.\n+ [x] It may be a good idea to additionally hook SIGILL caused by mis-patched instructions. In that design, exiting the program with a specific status code (in SIGSEGV handler) is a better approach, compared with raising SIGILL. It can also avoid recursive signal handling.\n+ [x] Support retaddr patch when pdisasm is enabled (check retaddr's probability) -- it seems impossible. Note that we cannot guarantee the control flow is returned from the callee even the returen address is visited.\n+ [x] A better frontend for passing arguments.\n+ [x] Use runtime arguments to set different modes, instead of makefile.\n+ [x] Use simple linear disassembly to check the existence of inlined data.\n+ [x] Read PLT table to get library functions' names, and support the white-list for library functions.\n+ [x] Correctly handle timeout from AFL.\n+ [x] Use shared memory for .text section, to avoid the expensive patch commands.\n+ [x] Support self-correction procedure (delta debugging).\n+ [x] Support non-return analysis on UCFG, with the help of the white-list for library functions.\n+ [x] Support the on-the-fly probability recalculation.\n+ [x] Add a new flag/option to enable early instrumentation for fork server (i.e., before the entrypoint of binary).\n+ [x] Enable periodic checking (for coverage feedback) to determine those false postives which do not lead to crashes.\n+ [x] Add tailed invalid instructions for those basic blocks terminated by bad decoding.\n+ [x] Add a license.\n+ [x] Do not use a global sys\\_config, but put the options into each object.\n+ [x] Current TP\\_EMIT is only compatible with fuzzers compiled with AFL\\_MAP\\_SIZE = (1 << 16), we need to change the underlying implementation of TP\\_EMIT to automatically fit the AFL\\_MAP\\_SIZE.\n+ [x] Fix the bugs when rewriting PIE binary and support it.\n+ [x] Place `ENDBR64` instruction before the AFL trampoline. The phantom program will crash otherwise.\n+ [x] Support binaries compiled with gcc ASAN (clang would inline ASAN functions).\n+ [ ] Use g\\_hash\\_table\\_iter\\_init instead of g\\_hash\\_table\\_get\\_keys.\n+ [ ] Apply AddrDict to all possible places..\n+ [ ] Apply Iter to all possible places..\n+ [ ] Support other disassembly backends, for the initial disassembly (e.g., [XDA](https://github.com/CUMLSec/XDA)).\n+ [ ] Calculate [entropy](https://github.com/NationalSecurityAgency/ghidra/issues/1035) to check the existence of inlined data (ADVANCED).\n+ [ ] Remove legacy code (e.g., the function of building bridges by Rewriter is no longer needed).\n+ [ ] Instead of patching a fixed invalid instruction (0x2f), randomly choose an invalid instruction to patch. More details can be found [here](http://ref.x86asm.net/coder64.html).\n+ [ ] Automatically scale the number of executions triggering checking runs (based on the result of previous checking run).\n+ [ ] Set the default log level as WARN (note that we need to update `make test` and `make benchmark`).\n+ [ ] Use a general method to add segments in the given ELF instead of using the simple PT\\_NOTE trick.\n+ [ ] Fix the failed Github Actions on Ubuntu 20.04 (the root cause is unknown currently).\n+ [ ] Add more stress test for rewriting PIE binary.\n+ [ ] Support binaries compiled with MSAN.\n\n\n## Challenges\n\nWe additionally have some challenges which may cause troubles or make StochFuzz not that easy to use. We are trying to resolve them.\n\n+ The fixed LOOKUP\\_TABLE\\_ADDR is mixed with other random addresses, which may cause bugs in PIE binary.\n+ The glibc code contains some overlapping instructions (e.g., the [instructions with the LOCK prefix](https://code.woboq.org/userspace/glibc/sysdeps/x86/atomic-machine.h.html#_M/__arch_c_compare_and_exchange_val_8_acq)), which may cause troubles for the patcher and pdisasm.\n\nThere are some other challenges introduced by the [new system design](system.md).\n\n+ The input file may be changed by the previous crashed executing, which makes the next execution incorrect. But it seems ok in practice, because fuzzing is a highly repeative procedure which can fix the incorrect feedback automatically and quickly.\n+ Timeout needs to be set up separately for AFL and StochFuzz, which may bother the users a little bit.\n+ The auto-scaled timeout of AFL may cause incorrect error diagnosis (the [dd\\_status](https://github.com/ZhangZhuoSJTU/StochFuzz/blob/master/src/diagnoser.h#L91) may be invalid), so it is highly recommended to specify a timeout (>= 1000ms or >= AFL\\_HANG\\_TMOUT if set) for AFL by `-t` option, to disable the feature of auto-scaled timeout.\n\nNote that in the old design, we can fully control AFL, so that we can _create a new input file for the next execution_, _use the same timeout_, or _disable the auto-scaled timeout_ to avoid aforementioned challenges.\n\n## Pending Development Decisions\n\nCurrently, there are many steps which we are hesitating to take. We may need to carefully evaluate them. __If you have any suggestion, please kindly let us know__. We are happy to take any possible discussion about improving StochFuzz.\n\n+ Currently, we use a lookup table to translate indirect call/jump on the fly. We are not sure whether it is necessary because simply patching a jump instruction at the target address may also work well. Note that a large lookup table may increase the cache missing rate and the overhead of process forking.\n+ For now, to support the [advanced strategy](https://github.com/ZhangZhuoSJTU/StochFuzz#advanced-usage), we maintain a retaddr mapping and do _O(log n)_ online binary searching to find the original retaddr when unwinding stack. It may be better to maintain a retaddr lookup table which supports _O(1)_ looking up. But also, this lookup table will extremely increase the memory usage as well as the cache missing rate and the overhead of process forking.\n+ Hook more signals to collect address information for a better error diagnosis, which, on the other hand, may cause conflicts of signal handlers set by the subject program.\n"
  },
  {
    "path": "docs/trouble.md",
    "content": "# Troubleshootings\n\nIf you are using a variant of AFL instead of the original [AFL](https://github.com/google/AFL), please refer to [system.md](https://github.com/ZhangZhuoSJTU/StochFuzz/blob/master/docs/system.md#how-to-make-stochfuzz-compatible-with-other-afl-based-fuzzers) which tells you how to make StochFuzz compatible with those variants.\n\nBesides, this documentation mainly talks about how to handle the case where the rewritten binaries have inconsistent behaviors taking the same inputs (e.g., invalid crashes which cannot be reproduced by the original binaries). Please kindly open an issue to report any other problem, including:\n\n+ The execution speed is quite slow (e.g., slower than AFL-QEMU)\n+ The fuzzing process is stuck (i.e., the AFL panel does not have updates for a while)\n+ StochFuzz crashes during rewriting\n+ ...\n\n## How to check whether an input will cause inconsistent behaviors.\n\nAs mentioned in [README.md](../README.md#basic-usage), after the initial rewriting, StochFuzz will generate a _phantom file_. Originally, if we want to do binary-only fuzzing, we attach AFL to this phantom binary. \n\nActually, this phantom binary can also be directly executed, with the same arguments as the original binary has. \n\nHence, to check whether an input will cause inconsistent behaviors, you can execute both the original binary and the phantom binary with the given input and check the behaviors of two binaries.\n\n## Incorrect rewriting options or latent bugs in StochFuzz?\n\nStochFuzz provides different rewriting options and will automatically choose some, based on the given binary. In some cases, StochFuzz may pick the wrong choices. The following steps can help us identify whether the erroneous behaviors are caused by incorrect rewriting options or latent bugs in StochFuzz.\n\n+ First of all, make sure all the cached files are removed (`rm .*`) and try to rerun StochFuzz.\n+ If the erroneous behaviors still exist but you have adopted the advanced strategy, please remove all cached files (`rm .*`) and try the basic mode.\n+ If the erroneous behaviors still exist after adopting the basic mode, please remove all cached files (`rm .*`) and feed `-e -f -i` options into StochFuzz.\n\n```\n  -e            - install the fork server at the entrypoint instead of the main function\n  -f            - forcedly assume there is data interleaving with code\n  -i            - ignore the call-fallthrough edges to defense RET-misusing obfuscation\n```\n\n+ If the erroneous behaviors still exist after rewriting with the aforementioned options, please kindly open an issue to let us know; if the erroneous behaviors are gone, you can try aforementioned options one by one to identify which one contributes to elimilate the errors, and if possible, you can also open an issue to let us know. \n\n\n## Known issues\n\n+ Like [AFL](https://github.com/google/AFL/blob/fab1ca5ed7e3552833a18fc2116d33a9241699bc/README.md#13-known-limitations--areas-for-improvement), StochFuzz cannot handle programs that install custom handlers for some important signals (SIGSEGV, SIGABRT, etc). Moreover, StochFuzz additionally occupies one more signal, _SIGUSR1_. If the subject program has a custom handler for SIGUSR1, the user may need to modify StochFuzz to use SIGUSR2 or other unused signals.\n"
  },
  {
    "path": "scripts/stochfuzz_env.sh",
    "content": "#!/bin/bash\n\nstochfuzz_dir=$(realpath $(dirname \"$(realpath $0)\")/../)\nlibstochfuzzRT_path=\"$stochfuzz_dir/src/libstochfuzzRT.so\"\nlibunwind_path=\"$stochfuzz_dir/libunwind/install/lib/libunwind.so\"\n\nif [ ! -f $libstochfuzzRT_path ]; then\n    echo \"libstochfuzzRT.so not found!\"\n    exit 1\nfi\n\nif [ ! -f $libunwind_path ]; then\n    echo \"libunwind.so not found!\"\n    exit 1\nfi\n\nexport STOCHFUZZ_PRELOAD=$libstochfuzzRT_path:$libunwind_path\necho $STOCHFUZZ_PRELOAD\n"
  },
  {
    "path": "src/.clang-format",
    "content": "BasedOnStyle: Google\nIndentWidth: 4\nAlwaysBreakTemplateDeclarations: true\nColumnLimit: 80\nUseTab: Never\nAllowShortIfStatementsOnASingleLine: false\nAllowShortBlocksOnASingleLine: false\nAllowShortLoopsOnASingleLine: false\n"
  },
  {
    "path": "src/Makefile",
    "content": "CC = clang\n\nSIGSTKSZ =  $(shell ./get_signal_stack_size.sh)\nCFLAGS = -Wall -Wno-unused-command-line-argument -Wno-void-pointer-to-int-cast -Wno-void-pointer-to-enum-cast -fPIC -pie -ffast-math -D_GNU_SOURCE -DSIGNAL_STACK_SIZE=$(SIGSTKSZ)\nLDFLAGS =\n\nSHELLCODE_CFLAGS = -Wall -fno-stack-protector -fno-jump-tables -fpie -O3 -D_GNU_SOURCE -DSIGNAL_STACK_SIZE=$(SIGSTKSZ)\n\nLIBNAME = $(shell find . -regex './lib[^\\.\\/]*\\.h' | tr -d '/' | cut -d '.' -f2)\nTOOLNAME = $(shell grep '^\\#define OURTOOL ' config.h | cut -d '\"' -f2)\nVERSION = $(shell grep '^\\#define VERSION ' config.h | cut -d '\"' -f2)\n\nifeq ($(shell ../test/check_avx512 2>/dev/null; echo $$?), 0)\n\tAVX_CFLAGS = -mavx512f -DAVX512\nelse\n\tAVX_CFLAGS =\nendif\n\nifneq ($(origin DEBUG_REWRITER), undefined)\n\tCFLAGS += -DBINARY_SEARCH_INVALID_CRASH -DBINARY_SEARCH_DEBUG_REWRITER=$(strip $(DEBUG_REWRITER))\n\tSHELLCODE_CFLAGS += -DBINARY_SEARCH_INVALID_CRASH -DBINARY_SEARCH_DEBUG_REWRITER=$(strip $(DEBUG_REWRITER))\nendif\n\n# note that the new SINGLE_SUCC_OPT is not well test, as such we add an option to disable it\nifneq ($(origin SINGLE_SUCC_OPT), undefined)\nifeq ('$(SINGLE_SUCC_OPT)', 'disable')\n\tCFLAGS += -DNSINGLE_SUCC_OPT\n\tSHELLCODE_CFLAGS += -DNSINGLE_SUCC_OPT\nendif\nendif\n\nifneq ($(origin CONSERVATIVE_PATCH), undefined)\nifeq ('$(CONSERVATIVE_PATCH)', 'enable')\n\tCFLAGS += -DCONSERVATIVE_PATCH\n\tSHELLCODE_CFLAGS += -DCONSERVATIVE_PATCH\nendif\nendif\n\n# glib\nCFLAGS += $(shell PKG_CONFIG_PATH=$(realpath ..)/glib/lib/x86_64-linux-gnu/pkgconfig/ pkg-config --cflags glib-2.0)\nLDFLAGS += -lpthread\n\n# keystone\nCFLAGS += -I $(realpath ..)/keystone/include\nLDFLAGS += -lstdc++ -lm\n\n# capstone\nCFLAGS += -I $(realpath ..)/capstone/include\nLDFLAGS +=\n\n# libunwind\nLIBUNWIND_RT_STEP_OFFSET = 0x$(shell readelf -s $(realpath ..)/libunwind/install/lib/libunwind.so  | grep _ULx86_64_step | head -n 1 | awk '{print $$2}')\nLIBUNWIND_RT_CFLAGS += -fPIC -shared -I $(realpath ..)/libunwind/install/include -DSTEP_OFFSET=$(LIBUNWIND_RT_STEP_OFFSET) -DSIGNAL_STACK_SIZE=$(SIGSTKSZ)\n\nOBJS=\\\n\tbinary.o \\\n\tbuffer.o \\\n\telf_.o \\\n\tutils.o \\\n\tinterval_splay.o \\\n\tmem_file.o \\\n\trestricted_ptr.o \\\n\ttp_dispatcher.o \\\n\tsys_optarg.o \\\n\tdisassembler.o \\\n\trewriter.o \\\n\tpatcher.o \\\n\tucfg_analyzer.o \\\n\tcapstone_.o \\\n\tdiagnoser.o \\\n\tlibrary_functions/library_functions.o \\\n\tcore.o\n\n.PHONY: clean format\n\nlibstochfuzzRT:\n\tgcc $(LIBUNWIND_RT_CFLAGS) -o libstochfuzzRT.so libstochfuzzRT.c\n\ndebug: CFLAGS += -g -O0 -fsanitize=address -fno-omit-frame-pointer -DDEBUG\ndebug: SHELLCODE_CFLAGS += -DDEBUG\ndebug: executable\n\nprofile: CFLAGS += -pg -O2 -DNDEBUG\nprofile: SHELLCODE_CFLAGS += -DNDEBUG\nprofile: executable\n\nrelease: CFLAGS += -O2 -DNDEBUG\nrelease: SHELLCODE_CFLAGS += -DNDEBUG\nrelease: executable\n\nexecutable: loader fork_server tps handlers library_functions_load libstochfuzzRT $(OBJS)\n\tar rcs $(LIBNAME).a $(OBJS)\n\t$(CC) $(CFLAGS) $(LDFLAGS) -shared $(OBJS) $(realpath ..)/glib/lib/x86_64-linux-gnu/libglib-2.0.a $(realpath ..)/keystone/build/llvm/lib/libkeystone.a $(realpath ..)/capstone/libcapstone.a -o $(LIBNAME).so\n\t$(CC) $(CFLAGS) $(LDFLAGS) frontend.c $(LIBNAME).a $(realpath ..)/glib/lib/x86_64-linux-gnu/libglib-2.0.a $(realpath ..)/keystone/build/llvm/lib/libkeystone.a $(realpath ..)/capstone/libcapstone.a -o $(TOOLNAME)\n\nloader:\n\t$(CC) $(SHELLCODE_CFLAGS) -c loader.c\n\t$(CC) -nostdlib -o loader.out loader.o -Wl,--entry=_entry\n\tobjcopy --dump-section .text=loader.bin loader.out\n\txxd -i loader.bin > loader_bin.c\n\nfork_server:\n\t$(CC) $(SHELLCODE_CFLAGS) $(AVX_CFLAGS) -c fork_server.c\n\t$(CC) -nostdlib -o fork_server.out fork_server.o -Wl,--entry=_entry\n\tobjcopy --dump-section .text=fork_server.bin fork_server.out\n\txxd -i fork_server.bin > fork_server_bin.c\n\ntps:\n\t$(MAKE) -C trampolines\n\nhandlers:\n\tpython3 rewriter_handlers/generate.py rewriter_handlers\n\nlibrary_functions_load:\n\tpython3 library_functions/generate.py lib.csv library_functions\n\nifeq ($(findstring -r,$(TEST_OPTIONS)), -r)\nSTOCHFUZZ_PRELOAD = $(shell ../scripts/stochfuzz_env.sh)\ndefine test_succ\n\tcd test && ( STOCHFUZZ_PRELOAD=$(strip ${2}):$(STOCHFUZZ_PRELOAD) ${1} )\nendef\n\ndefine test_fail\n\tcd test && ( ! STOCHFUZZ_PRELOAD=$(strip ${2}):$(STOCHFUZZ_PRELOAD) ${1} )\nendef\n\ndefine test_whatever\n\tcd test && ( STOCHFUZZ_PRELOAD=$(strip ${2}):$(STOCHFUZZ_PRELOAD) ${1} || true )\nendef\nelse\ndefine test_succ\n\tcd test && ( ${1} )\nendef\n\ndefine test_fail\n\tcd test && ( ! ${1} )\nendef\n\ndefine test_whatever\n\tcd test && ( ${1} || true )\nendef\nendif\n\ntest:\n\trm -rf test; cp -r ../test test\n\t$(call test_succ, ../$(TOOLNAME) -P $(TEST_OPTIONS) -- bzip2.no.pie)\n\t$(call test_succ, ../$(TOOLNAME) -R $(TEST_OPTIONS) -- bzip2.no.pie --help)\n\t$(call test_succ, ../$(TOOLNAME) -R $(TEST_OPTIONS) -- bzip2.no.pie -kfd test.c.bz2)\n\t$(call test_succ, ../$(TOOLNAME) -V $(TEST_OPTIONS) -- bzip2.no.pie )\n\t$(call test_succ, ../$(TOOLNAME) -R $(TEST_OPTIONS) -- libpng-1.2.56 seed.png)\n\t$(call test_succ, ../$(TOOLNAME) -V $(TEST_OPTIONS) -- libpng-1.2.56)\n\t$(call test_succ, ../$(TOOLNAME) -R $(TEST_OPTIONS) -- json-2017-02-12.normal json.seed)\n\t$(call test_fail, ../$(TOOLNAME) -R $(TEST_OPTIONS) -- crash mdzz)\n\t$(call test_succ, ../$(TOOLNAME) -V $(TEST_OPTIONS) -- crash )\n\t$(call test_succ, ../$(TOOLNAME) -P $(TEST_OPTIONS) -- openssl-1.0.1f)\n\t$(call test_succ, ../$(TOOLNAME) -R $(TEST_OPTIONS) -- openssl-1.0.1f leak-268f0e85f4bc45cbaf4d257222b830eac18977f3)\n\t$(call test_succ, ../$(TOOLNAME) -V $(TEST_OPTIONS) -- openssl-1.0.1f)\n\t$(call test_succ, ../$(TOOLNAME) -R $(TEST_OPTIONS) -- hello)\n\t$(call test_succ, ../$(TOOLNAME) -V $(TEST_OPTIONS) -- hello)\n\t$(call test_succ, ../$(TOOLNAME) -R $(TEST_OPTIONS) -- rar e -o+ -mt3 -- test.rar)\n\t$(call test_succ, ../$(TOOLNAME) -V $(TEST_OPTIONS) -- rar)\n\t$(call test_fail, ../$(TOOLNAME) -R $(TEST_OPTIONS) -- timeout mdzz)\n\t$(call test_succ, ../$(TOOLNAME) -V $(TEST_OPTIONS) -- timeout)\n\t$(call test_succ, ../$(TOOLNAME) -R $(TEST_OPTIONS) -- readelf.pie -a small_exec.elf)\n\t$(call test_succ, ../$(TOOLNAME) -R $(TEST_OPTIONS) -- bzip2.pie -kfd test.c.bz2)\n\t$(call test_succ, ../$(TOOLNAME) -R $(TEST_OPTIONS) -- pngfix.pie seed.png)\n\t$(call test_succ, ../$(TOOLNAME) -R $(TEST_OPTIONS) -- pngfix.pie toucan.png)\nifneq ($(strip $(shell whereis libasan.so.4 | cut -d ' ' -f2- | xargs ls | grep 'libasan.so.4')),)\n\t$(call test_succ, ../$(TOOLNAME) -R $(TEST_OPTIONS) -- libjpeg.asan seed.jpg, $(shell whereis libasan.so.4 | cut -d ' ' -f2- | xargs ls | grep 'libasan.so.4'))\nendif\nifneq ($(findstring -n,$(TEST_OPTIONS)), -n)\n\t$(call test_whatever, ../$(TOOLNAME) -R $(TEST_OPTIONS) -- unintentional_crash mdzz)\n\t$(call test_succ, ../$(TOOLNAME) -R $(TEST_OPTIONS) -- unintentional_crash)\n\t$(call test_succ, ../$(TOOLNAME) -V $(TEST_OPTIONS) -- unintentional_crash)\nendif\nifeq ($(findstring -e,$(TEST_OPTIONS)), -e)\n\t$(call test_fail, ../$(TOOLNAME) -R $(TEST_OPTIONS) -- no_main mdzz)\n\t$(call test_succ, ../$(TOOLNAME) -R $(TEST_OPTIONS) -- no_main)\nendif\nifneq ($(findstring -f,$(TEST_OPTIONS)), -f)\n\t$(call test_whatever, timeout --signal=KILL 10m ../$(TOOLNAME) -R -t 5000 $(TEST_OPTIONS) -- z3 -smt2 ex.smt2) # this test may fail due to the memory limit of Github Actions\nendif\n\t# test daemon\n\trm -rf test; cp -r ../test test\n\t$(call test_succ, ./test_daemon.sh ../$(TOOLNAME) '$(TEST_OPTIONS)' bzip2.no.pie -kfd test.c.bz2)\n\t$(call test_succ, ./test_daemon.sh ../$(TOOLNAME) '$(TEST_OPTIONS)' libpng-1.2.56 seed.png)\n\t$(call test_succ, ./test_daemon.sh ../$(TOOLNAME) '$(TEST_OPTIONS)' json-2017-02-12.normal json.seed)\n\t$(call test_succ, ./test_daemon.sh ../$(TOOLNAME) '$(TEST_OPTIONS)' openssl-1.0.1f leak-268f0e85f4bc45cbaf4d257222b830eac18977f3)\n\t$(call test_succ, ./test_daemon.sh ../$(TOOLNAME) '$(TEST_OPTIONS)' rar e -o+ -mt3 -- test.rar)\n\t$(call test_fail, ./test_daemon.sh ../$(TOOLNAME) '$(TEST_OPTIONS)' crash mdzz)\n\t$(call test_succ, ./test_daemon.sh ../$(TOOLNAME) '$(TEST_OPTIONS)' crash)\n\t$(call test_succ, ./test_daemon.sh ../$(TOOLNAME) '$(TEST_OPTIONS)' readelf.pie -a small_exec.elf)\n\t$(call test_succ, ./test_daemon.sh ../$(TOOLNAME) '$(TEST_OPTIONS)' bzip2.pie -kfd test.c.bz2)\n\t$(call test_succ, ./test_daemon.sh ../$(TOOLNAME) '$(TEST_OPTIONS)' pngfix.pie seed.png)\n\t$(call test_succ, ./test_daemon.sh ../$(TOOLNAME) '$(TEST_OPTIONS)' pngfix.pie toucan.png)\nifneq ($(strip $(shell whereis libasan.so.4 | cut -d ' ' -f2- | xargs ls | grep 'libasan.so.4')),)\n\t$(call test_succ, ./test_daemon.sh ../$(TOOLNAME) '$(TEST_OPTIONS)' libjpeg.asan seed.jpg, $(shell whereis libasan.so.4 | cut -d ' ' -f2- | xargs ls | grep 'libasan.so.4'))\nendif\nifeq ($(findstring -r,$(TEST_OPTIONS)), -r)\n\t$(call test_succ, ./test_daemon.sh ../$(TOOLNAME) '$(TEST_OPTIONS) -e' hello)\nelse\n\t$(call test_succ, ./test_daemon.sh ../$(TOOLNAME) '$(TEST_OPTIONS)' hello)\nendif\nifneq ($(findstring -n,$(TEST_OPTIONS)), -n)\n\t$(call test_succ, ./test_daemon.sh ../$(TOOLNAME) '$(TEST_OPTIONS)' unintentional_crash mdzz)\nendif\nifeq ($(findstring -e,$(TEST_OPTIONS)), -e)\n\t$(call test_succ, ./test_daemon.sh ../$(TOOLNAME) '$(TEST_OPTIONS)' no_main mdzz)\nendif\n\t$(call test_fail, ./test_daemon.sh ../$(TOOLNAME) '$(TEST_OPTIONS)' timeout mdzz)\n\t$(call test_succ, cat timeout.daemon.log)\n\t$(call test_succ, grep -F 'get status code: 0x9 (signal: 9)' timeout.daemon.log)\n\nGOOGLE_FTS=\\\n    boringssl-2016-02-12 \\\n    c-ares-CVE-2016-5180 \\\n    freetype2-2017 \\\n    guetzli-2017-3-30 \\\n    harfbuzz-1.3.2 \\\n    json-2017-02-12 \\\n    lcms-2017-03-21 \\\n    libarchive-2017-01-04 \\\n    libjpeg-turbo-07-2017 \\\n    libpng-1.2.56 \\\n    libssh-2017-1272 \\\n    libxml2-v2.9.2 \\\n    llvm-libcxxabi-2017-01-27 \\\n    openssl-1.0.1f \\\n    openssl-1.0.2d \\\n    openssl-1.1.0c \\\n    openthread-2018-02-27 \\\n    pcre2-10.00 \\\n    proj4-2017-08-14 \\\n    re2-2014-12-09 \\\n    sqlite-2016-11-14 \\\n    vorbis-2017-12-11 \\\n    woff2-2016-05-06 \\\n    wpantund-2018-02-27\n\nprepare_google_fts:\n\trm -rf test; cp -r ../benchmark test; cp ../test/test_daemon_ignore_asan_sof.sh test\n\n$(GOOGLE_FTS): prepare_google_fts\n\t$(call test_succ, ./test_daemon_ignore_asan_sof.sh ../$(TOOLNAME) '$(TEST_OPTIONS)' $@.normal $@.seed)\n\t$(call test_succ, ./test_daemon_ignore_asan_sof.sh ../$(TOOLNAME) '$(TEST_OPTIONS)' $@.inline $@.seed)\n\t$(call test_succ, rm -f .pdisasm.$@.normal .pdisasm.$@.inline)\n\t$(call test_succ, grep -F \"SUMMARY: AddressSanitizer: stack-overflow\" $@.normal.daemon.log || ../$(TOOLNAME) -R $(TEST_OPTIONS) -- $@.normal $@.seed || grep -F \"we encounter a rewriting error\" $@.normal.daemon.log)\n\t$(call test_succ, grep -F \"SUMMARY: AddressSanitizer: stack-overflow\" $@.inline.daemon.log || ../$(TOOLNAME) -R $(TEST_OPTIONS) -- $@.inline $@.seed || grep -F \"we encounter a rewriting error\" $@.inline.daemon.log)\n\nbenchmark: prepare_google_fts $(GOOGLE_FTS)\n\nclean:\n\trm -rf $(OBJS) *.out *.bin *.o *.a *.so *_bin.c $(TOOLNAME) test/ library_functions/library_functions_load.c rewriter_handlers/handler_main.c\n\t$(MAKE) -C trampolines clean\n\nSOURCES:=$(OBJS:.o=.c)\nHEADERS:=$(OBJS:.o=.h)\nSOURCES += loader.c fork_server.c frontend.c asm_syscall.c asm_utils.c libstochfuzzRT.c\nSOURCES += rewriter_handlers/*.c rewriter_handlers/*.in\nSOURCES += prob_disasm/*.c\nSOURCES += prob_disasm/prob_disasm_complete/*.c\nHEADERS += address_dictionary.h loader.h fork_server.h config.h afl_config.h crs_config.h $(LIBNAME).h\n\nformat:\n\tclang-format -sort-includes -style=file -i $(SOURCES)\n\tclang-format -sort-includes -style=file -i $(HEADERS)\n\t$(MAKE) -C trampolines format\n"
  },
  {
    "path": "src/address_dictionary.h",
    "content": "/*\n * address_dictionary.h\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n#ifndef __ADDRESS_DICTIONARY_H\n#define __ADDRESS_DICTIONARY_H\n\n#include \"config.h\"\n#include \"utils.h\"\n\n// force evaluation\n#define __ADDR_DICT_NAME_2(x, y) __AddrDict_##y##_##x##_t\n#define __ADDR_DICT_NAME_1(x, y) __ADDR_DICT_NAME_2(x, y)\n#define __ADDR_DICT_NAME(x) __ADDR_DICT_NAME_1(x, __COUNTER__)\n\n/*\n * Address dictionary uses a contiguous memory to store data, and uses key as\n * index to access. Compared with GHashTable, it is a much more efficient\n * approach to build a hash table who uses address as key and is likely to use\n * all addresses.\n *\n * Note that we use macro to simulate template in C++.\n */\n#define AddrDict(type, name)        \\\n    struct __ADDR_DICT_NAME(name) { \\\n        type *__data;               \\\n        uint64_t *__used;           \\\n        addr_t __base;              \\\n        size_t __size;              \\\n    } name\n\n/*\n * AddrDict without checking existence.\n * It is very helpful for hash tables whose value cannot be zero.\n */\n#define AddrDictFast(type, name)    \\\n    struct __ADDR_DICT_NAME(name) { \\\n        type *__data;               \\\n        PhantomType *__used;        \\\n        addr_t __base;              \\\n        size_t __size;              \\\n    } name\n\n#define z_addr_dict_init(dict, base_addr, size)                                \\\n    do {                                                                       \\\n        (dict).__base = (base_addr);                                           \\\n        (dict).__size = (size);                                                \\\n        (dict).__data = z_alloc((dict).__size, sizeof(*((dict).__data)));      \\\n        if (_Generic(((dict).__used), PhantomType *                            \\\n                     : false, default                                          \\\n                     : true)) {                                                \\\n            (dict).__used = z_alloc((dict).__size / 64 + 1, sizeof(uint64_t)); \\\n        } else {                                                               \\\n            (dict).__used = NULL;                                              \\\n        }                                                                      \\\n    } while (0)\n\n#define z_addr_dict_check_addr(dict, addr)                         \\\n    do {                                                           \\\n        if ((addr) < (dict).__base ||                              \\\n            (addr) >= (dict).__base + (dict).__size) {             \\\n            EXITME(\"out-of-boundry access in address dictionary\"); \\\n        }                                                          \\\n    } while (0)\n\n#define z_addr_dict_exist(dict, addr)                         \\\n    ({                                                        \\\n        bool res;                                             \\\n        z_addr_dict_check_addr(dict, addr);                   \\\n                                                              \\\n        size_t __off = (addr) - (dict).__base;                \\\n                                                              \\\n        if (_Generic(((dict).__used), PhantomType *           \\\n                     : false, default                         \\\n                     : true)) {                               \\\n            size_t __off_div = __off / 64;                    \\\n            size_t __off_mod = __off % 64;                    \\\n            uint64_t *__bits = (uint64_t *)((dict).__used);   \\\n            res = !!(__bits[__off_div] & (1UL << __off_mod)); \\\n        } else {                                              \\\n            res = !!((dict).__data[__off]);                   \\\n        }                                                     \\\n                                                              \\\n        res;                                                  \\\n    })\n\n#define z_addr_dict_set(dict, addr, val)                    \\\n    do {                                                    \\\n        z_addr_dict_check_addr(dict, addr);                 \\\n                                                            \\\n        size_t __off = (addr) - (dict).__base;              \\\n        (dict).__data[__off] = (val);                       \\\n                                                            \\\n        if ((dict).__used) {                                \\\n            size_t __off_div = __off / 64;                  \\\n            size_t __off_mod = __off % 64;                  \\\n            uint64_t *__bits = (uint64_t *)((dict).__used); \\\n            __bits[__off_div] |= (1UL << __off_mod);        \\\n        }                                                   \\\n    } while (0)\n\n#define z_addr_dict_get(dict, addr)                               \\\n    ({                                                            \\\n        z_addr_dict_check_addr(dict, addr);                       \\\n        if (!z_addr_dict_exist(dict, addr)) {                     \\\n            EXITME(\"uninitialized access in address dictionary\"); \\\n        }                                                         \\\n        (dict).__data[(addr) - (dict).__base];                    \\\n    })\n\n#define z_addr_dict_get_data(dict) ((dict).__data)\n#define z_addr_dict_get_base(dict) ((dict).__base)\n#define z_addr_dict_get_size(dict) ((dict).__size)\n\n#define z_addr_dict_remove(dict, addr)                      \\\n    do {                                                    \\\n        z_addr_dict_check_addr(dict, addr);                 \\\n        size_t __off = (addr) - (dict).__base;              \\\n        (dict).__data[__off] = 0;                           \\\n                                                            \\\n        if ((dict).__used) {                                \\\n            size_t __off_div = __off / 64;                  \\\n            size_t __off_mod = __off % 64;                  \\\n            uint64_t *__bits = (uint64_t *)((dict).__used); \\\n            __bits[__off_div] &= (~(1UL << __off_mod));     \\\n        }                                                   \\\n    } while (0)\n\n/*\n * z_addr_dist_destroy should support variable numbers of arguments\n */\n#define __addr_dict_destroy_opt_0(...)\n#define __addr_dict_destroy_opt_1(...)\n#define __addr_dict_destroy_opt_2(dict, func)              \\\n    do {                                                   \\\n        for (size_t __i = 0; __i < (dict).__size; __i++) { \\\n            addr_t __addr = (dict).__base + __i;           \\\n            if (z_addr_dict_exist(dict, __addr)) {         \\\n                (*(func))((dict).__data[__i]);             \\\n            }                                              \\\n        }                                                  \\\n    } while (0)\n\n#define __addr_dict_destroy_choose(a, b, c, f, ...) f\n\n#define __addr_dict_destroy_data(...)                                  \\\n    __addr_dict_destroy_choose(, ##__VA_ARGS__,                        \\\n                               __addr_dict_destroy_opt_2(__VA_ARGS__), \\\n                               __addr_dict_destroy_opt_1(__VA_ARGS__), \\\n                               __addr_dict_destroy_opt_0(__VA_ARGS__))\n\n#define __addr_dict_destroy_self(dict, ...) \\\n    do {                                    \\\n        z_free((dict).__data);              \\\n        z_free((dict).__used);              \\\n    } while (0)\n\n#define z_addr_dict_destroy(...)               \\\n    do {                                       \\\n        __addr_dict_destroy_data(__VA_ARGS__); \\\n        __addr_dict_destroy_self(__VA_ARGS__); \\\n    } while (0)\n\n#endif\n"
  },
  {
    "path": "src/afl_config.h",
    "content": "/*\n * afl_config.h\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n#ifndef __AFL_CONFIG_H\n#define __AFL_CONFIG_H\n\n#include \"config.h\"\n\n/*\n * XXX: Attaching SHM at a fixed address allows around 10% perf gain. see\n * https://github.com/google/AFL/blob/master/afl-as.h#L71.\n *\n * Note that it is reasonable for a binary-instrumented tool to fix the address,\n * as we can know the memory layout comparied with from assemble view.\n *\n */\n/*\n * TODO: when rewritting, dynamically calculate the fixed AFL_MAP_ADDR.\n */\n#define AFL_FORKSRV_FD 198\n#define AFL_SHM_ENV \"__AFL_SHM_ID\"\n#define AFL_MAP_SIZE_POW2 16\n#define AFL_MAP_SIZE (1 << AFL_MAP_SIZE_POW2)\n#define AFL_MAP_ADDR (RW_PAGE_ADDR + 0x10000)\n#define AFL_PREV_ID_PTR (RW_PAGE_ADDR + 0x8)\n#define AFL_MAP_SIZE_MASK ((1 << AFL_MAP_SIZE_POW2) - 1)\n\n// #define AFL_BB_ID(x) ((((x) >> 4) ^ ((x) << 8)) & AFL_MAP_SIZE_MASK)\n// AFL_BB_ID Algorithm used in AFL-QEMU, but it seems bad on static binary\n// rewriting\n\n#define AFL_BB_ID(x) (((x) ^ ((x) >> AFL_MAP_SIZE_POW2)) & AFL_MAP_SIZE_MASK)\n\n#define AFL_HASH_CONST 0xa5b35705\n\n#endif\n"
  },
  {
    "path": "src/asm_syscall.c",
    "content": "/*\n * asm_syscall.h\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n#include <signal.h>\n#include <sys/mman.h>\n#include <sys/resource.h>\n#include <sys/shm.h>\n#include <sys/socket.h>\n#include <sys/stat.h>\n#include <sys/types.h>\n#include <sys/wait.h>\n#include <unistd.h>\n\n#define NO_INLINE __attribute__((__noinline__))\n#define NO_RETURN __attribute__((__noreturn__))\n\n#define Z_SYSCALL __attribute__((unused)) static inline\n\n#define ASM_STRING(name, content)       \\\n    \".global \" #name                    \\\n    \"\\n\"                                \\\n    \".type \" #name \",@function\\n\" #name \\\n    \":\\n\"                               \\\n    \".ascii \\\"\" content                 \\\n    \"\\\"\\n\"                              \\\n    \".byte 0x00\\n\"\n\n/*\n * Kernal sigaction (unlike glibc wrapper)\n */\nstruct kernel_sigaction {\n    void (*k_sa_handler)(int, siginfo_t *, void *);\n    unsigned long sa_flags;\n    void (*sa_restorer)(void);\n    unsigned long sa_mask;\n};\n\nZ_SYSCALL unsigned long sys_prctl(unsigned long option_0, unsigned long arg2_0,\n                                  unsigned long arg3_0, unsigned long arg4_0,\n                                  unsigned long arg5_0) {\n    register uintptr_t option asm(\"rdi\") = (uintptr_t)option_0;\n    register uintptr_t arg2 asm(\"rsi\") = (uintptr_t)arg2_0;\n    register uintptr_t arg3 asm(\"rdx\") = (uintptr_t)arg3_0;\n    register uintptr_t arg4 asm(\"r10\") = (uintptr_t)arg4_0;\n    register uintptr_t arg5 asm(\"r8\") = (uintptr_t)arg5_0;\n    register uintptr_t err asm(\"rax\");\n\n    asm volatile(\n        \"mov $157, %%eax\\n\\t\"  // SYS_PRCTL\n        \"syscall\"\n        : \"=rax\"(err)\n        : \"r\"(option), \"r\"(arg2), \"r\"(arg3), \"r\"(arg4), \"r\"(arg5)\n        : \"rcx\", \"r11\");\n\n    return (unsigned long)err;\n}\n\nZ_SYSCALL unsigned long sys_clone(unsigned long clone_flags_0,\n                                  unsigned long newsp_0, pid_t *parent_tidptr_0,\n                                  pid_t *child_tidptr_0, void *tls_val_0) {\n    register uintptr_t clone_flags asm(\"rdi\") = (uintptr_t)clone_flags_0;\n    register uintptr_t newsp asm(\"rsi\") = (uintptr_t)newsp_0;\n    register uintptr_t parent_tidptr asm(\"rdx\") = (uintptr_t)parent_tidptr_0;\n    register uintptr_t child_tidptr asm(\"r10\") = (uintptr_t)child_tidptr_0;\n    register uintptr_t tls_val asm(\"r8\") = (uintptr_t)tls_val_0;\n    register uintptr_t err asm(\"rax\");\n\n    asm volatile(\n        \"mov $56, %%eax\\n\\t\"  // SYS_CLONE\n        \"syscall\"\n        : \"=rax\"(err)\n        : \"r\"(clone_flags), \"r\"(newsp), \"r\"(parent_tidptr), \"r\"(child_tidptr),\n          \"r\"(tls_val)\n        : \"rcx\", \"r11\");\n\n    return (unsigned long)err;\n}\n\nZ_SYSCALL unsigned long sys_mmap(unsigned long addr_0, unsigned long len_0,\n                                 unsigned long prot_0, unsigned long flags_0,\n                                 unsigned long fd_0, unsigned long off_0) {\n    register uintptr_t addr asm(\"rdi\") = (uintptr_t)addr_0;\n    register uintptr_t len asm(\"rsi\") = (uintptr_t)len_0;\n    register uintptr_t prot asm(\"rdx\") = (uintptr_t)prot_0;\n    register uintptr_t flags asm(\"r10\") = (uintptr_t)flags_0;\n    register uintptr_t fd asm(\"r8\") = (uintptr_t)fd_0;\n    register uintptr_t off asm(\"r9\") = (uintptr_t)off_0;\n    register uintptr_t err asm(\"rax\");\n\n    asm volatile(\n        \"mov $9, %%eax\\n\\t\"  // SYS_MMAP\n        \"syscall\"\n        : \"=rax\"(err)\n        : \"r\"(addr), \"r\"(len), \"r\"(prot), \"r\"(flags), \"r\"(fd), \"r\"(off)\n        : \"rcx\", \"r11\");\n\n    return (unsigned long)err;\n}\n\nZ_SYSCALL int sys_mprotect(unsigned long start_0, size_t len_0,\n                           unsigned long prot_0) {\n    register uintptr_t start asm(\"rdi\") = (uintptr_t)start_0;\n    register uintptr_t len asm(\"rsi\") = (uintptr_t)len_0;\n    register uintptr_t prot asm(\"rdx\") = (uintptr_t)prot_0;\n    register intptr_t err asm(\"rax\");\n\n    asm volatile(\n        \"mov $10, %%eax\\n\\t\"  // SYS_MPROTECT\n        \"syscall\"\n        : \"=rax\"(err)\n        : \"r\"(start), \"r\"(len), \"r\"(prot)\n        : \"rcx\", \"r11\");\n\n    return (int)err;\n}\n\nZ_SYSCALL int sys_open(const char *filename_0, int flags_0, int mode_0) {\n    register uintptr_t filename asm(\"rdi\") = (uintptr_t)filename_0;\n    register uintptr_t flags asm(\"rsi\") = (uintptr_t)flags_0;\n    register uintptr_t mode asm(\"rdx\") = (uintptr_t)mode_0;\n    register intptr_t fd asm(\"rax\");\n\n    asm volatile(\n        \"mov $2, %%eax\\n\\t\"  // SYS_OPEN\n        \"syscall\"\n        : \"=rax\"(fd)\n        : \"r\"(filename), \"r\"(flags), \"r\"(mode)\n        : \"rcx\", \"r11\");\n\n    return (int)fd;\n}\n\nZ_SYSCALL int sys_pipe(int *pipefd_0) {\n    register uintptr_t pipefd asm(\"rdi\") = (uintptr_t)pipefd_0;\n    register intptr_t err asm(\"rax\");\n\n    asm volatile(\n        \"mov $22, %%eax\\n\\t\"  // SYS_PIPE\n        \"syscall\"\n        : \"=rax\"(err)\n        : \"r\"(pipefd)\n        : \"rcx\", \"r11\");\n\n    return (int)err;\n}\n\nZ_SYSCALL int sys_close(int fd_0) {\n    register uintptr_t fd asm(\"rdi\") = (uintptr_t)fd_0;\n    register intptr_t err asm(\"rax\");\n\n    asm volatile(\n        \"mov $3, %%eax\\n\\t\"  // SYS_CLOSE\n        \"syscall\"\n        : \"=rax\"(err)\n        : \"r\"(fd)\n        : \"rcx\", \"r11\");\n\n    return (int)err;\n}\n\nZ_SYSCALL int sys_rt_sigaction(int sig_0, struct kernel_sigaction *act_0,\n                               struct kernel_sigaction *oact_0,\n                               size_t sigsetsize_0) {\n    register uintptr_t sig asm(\"rdi\") = (uintptr_t)sig_0;\n    register uintptr_t act asm(\"rsi\") = (uintptr_t)act_0;\n    register uintptr_t oact asm(\"rdx\") = (uintptr_t)oact_0;\n    register uintptr_t sigsetsize asm(\"r10\") = (uintptr_t)sigsetsize_0;\n    register intptr_t err asm(\"rax\");\n\n    asm volatile(\n        \"mov $13, %%eax\\n\\t\"  // SYS_RT_SIGACTION\n        \"syscall\"\n        : \"=rax\"(err)\n        : \"r\"(sig), \"r\"(act), \"r\"(oact), \"r\"(sigsetsize)\n        : \"rcx\", \"r11\");\n\n    return (int)err;\n}\n\nZ_SYSCALL int sys_connect(int fd_0, struct sockaddr *addr_0, int addrlen_0) {\n    register uintptr_t fd asm(\"rdi\") = (uintptr_t)fd_0;\n    register uintptr_t addr asm(\"rsi\") = (uintptr_t)addr_0;\n    register uintptr_t addrlen asm(\"rdx\") = (uintptr_t)addrlen_0;\n    register intptr_t err asm(\"rax\");\n\n    asm volatile(\n        \"mov $42, %%eax\\n\\t\"  // SYS_CONNECT\n        \"syscall\"\n        : \"=rax\"(err)\n        : \"r\"(fd), \"r\"(addr), \"r\"(addrlen)\n        : \"rcx\", \"r11\");\n\n    return (int)err;\n}\n\nZ_SYSCALL int sys_socket(int family_0, int type_0, int protocol_0) {\n    register uintptr_t family asm(\"rdi\") = (uintptr_t)family_0;\n    register uintptr_t type asm(\"rsi\") = (uintptr_t)type_0;\n    register uintptr_t protocol asm(\"rdx\") = (uintptr_t)protocol_0;\n    register intptr_t err asm(\"rax\");\n\n    asm volatile(\n        \"mov $41, %%eax\\n\\t\"  // SYS_SOCKET\n        \"syscall\"\n        : \"=rax\"(err)\n        : \"r\"(family), \"r\"(type), \"r\"(protocol)\n        : \"rcx\", \"r11\");\n\n    return (int)err;\n}\n\nZ_SYSCALL int sys_write(int fd_0, const char *buf_0, size_t len_0) {\n    register uintptr_t fd asm(\"rdi\") = (uintptr_t)fd_0;\n    register uintptr_t buf asm(\"rsi\") = (uintptr_t)buf_0;\n    register uintptr_t len asm(\"rdx\") = (uintptr_t)len_0;\n    register intptr_t err asm(\"rax\");\n\n    asm volatile(\n        \"mov $1, %%eax\\n\\t\"  // SYS_WRITE\n        \"syscall\"\n        : \"=rax\"(err)\n        : \"r\"(fd), \"r\"(buf), \"r\"(len)\n        : \"rcx\", \"r11\");\n\n    return (int)err;\n}\n\nZ_SYSCALL int sys_sigaltstack(stack_t *uss_0, stack_t *uoss_0) {\n    register uintptr_t uss asm(\"rdi\") = (uintptr_t)uss_0;\n    register uintptr_t uoss asm(\"rsi\") = (uintptr_t)uoss_0;\n    register intptr_t err asm(\"rax\");\n\n    asm volatile(\n        \"mov $131, %%eax\\n\\t\"  // SYS_SIGALTSTACK\n        \"syscall\"\n        : \"=rax\"(err)\n        : \"r\"(uss), \"r\"(uoss)\n        : \"rcx\", \"r11\");\n\n    return (int)err;\n}\n\nZ_SYSCALL int sys_dup2(int oldfd_0, int newfd_0) {\n    register uintptr_t oldfd asm(\"rdi\") = (uintptr_t)oldfd_0;\n    register uintptr_t newfd asm(\"rsi\") = (uintptr_t)newfd_0;\n    register intptr_t err asm(\"rax\");\n\n    asm volatile(\n        \"mov $33, %%eax\\n\\t\"  // SYS_DUP2\n        \"syscall\"\n        : \"=rax\"(err)\n        : \"r\"(oldfd), \"r\"(newfd)\n        : \"rcx\", \"r11\");\n\n    return (int)err;\n}\n\nZ_SYSCALL int sys_fstat(unsigned int fd_0, struct stat *buf_0) {\n    register uintptr_t fd asm(\"rdi\") = (uintptr_t)fd_0;\n    register uintptr_t buf asm(\"rsi\") = (uintptr_t)buf_0;\n    register intptr_t err asm(\"rax\");\n\n    asm volatile(\n        \"mov $5, %%eax\\n\\t\"  // SYS_FSTAT\n        \"syscall\"\n        : \"=rax\"(err)\n        : \"r\"(fd), \"r\"(buf)\n        : \"rcx\", \"r11\");\n\n    return (int)err;\n}\n\nZ_SYSCALL int sys_exit(int error_code_0) {\n    register uintptr_t error_code asm(\"rdi\") = (uintptr_t)error_code_0;\n    register intptr_t err asm(\"rax\");\n\n    asm volatile(\n        \"mov $60, %%eax\\n\\t\"  // SYS_EXIT\n        \"syscall\"\n        : \"=rax\"(err)\n        : \"r\"(error_code)\n        : \"rcx\", \"r11\");\n\n    return (int)err;\n}\n\nZ_SYSCALL int sys_kill(pid_t pid_0, int sig_0) {\n    register uintptr_t pid asm(\"rdi\") = (uintptr_t)pid_0;\n    register uintptr_t sig asm(\"rsi\") = (uintptr_t)sig_0;\n    register intptr_t err asm(\"rax\");\n\n    asm volatile(\n        \"mov $62, %%eax\\n\\t\"  // SYS_KILL\n        \"syscall\"\n        : \"=rax\"(err)\n        : \"r\"(pid), \"r\"(sig)\n        : \"rcx\", \"r11\");\n\n    return (int)err;\n}\n\nZ_SYSCALL int sys_msync(unsigned long start_0, size_t len_0, int flags_0) {\n    register uintptr_t start asm(\"rdi\") = (uintptr_t)start_0;\n    register uintptr_t len asm(\"rsi\") = (uintptr_t)len_0;\n    register uintptr_t flags asm(\"rdx\") = (uintptr_t)flags_0;\n    register intptr_t err asm(\"rax\");\n\n    asm volatile(\n        \"mov $26, %%eax\\n\\t\"  // SYS_MSYNC\n        \"syscall\"\n        : \"=rax\"(err)\n        : \"r\"(start), \"r\"(len), \"r\"(flags)\n        : \"rcx\", \"r11\");\n\n    return (int)err;\n}\n\nZ_SYSCALL int sys_read(int fd_0, const char *buf_0, size_t len_0) {\n    register uintptr_t fd asm(\"rdi\") = (uintptr_t)fd_0;\n    register uintptr_t buf asm(\"rsi\") = (uintptr_t)buf_0;\n    register uintptr_t len asm(\"rdx\") = (uintptr_t)len_0;\n    register intptr_t err asm(\"rax\");\n\n    asm volatile(\n        \"mov $0, %%eax\\n\\t\"  // SYS_READ\n        \"syscall\"\n        : \"=rax\"(err)\n        : \"r\"(fd), \"r\"(buf), \"r\"(len)\n        : \"rcx\", \"r11\");\n\n    return (int)err;\n}\n\nZ_SYSCALL pid_t sys_wait4(pid_t pid_0, int *wstatus_0, int options_0,\n                          struct rusage *rusage_0) {\n    register uintptr_t pid asm(\"rdi\") = (uintptr_t)pid_0;\n    register uintptr_t wstatus asm(\"rsi\") = (uintptr_t)wstatus_0;\n    register uintptr_t options asm(\"rdx\") = (uintptr_t)options_0;\n    register uintptr_t rusage asm(\"r10\") = (uintptr_t)rusage_0;\n    register intptr_t err asm(\"rax\");\n\n    asm volatile(\n        \"mov $61, %%eax\\n\\t\"  // SYS_WAIT4\n        \"syscall\"\n        : \"=rax\"(err)\n        : \"r\"(pid), \"r\"(wstatus), \"r\"(options), \"r\"(rusage)\n        : \"rcx\", \"r11\");\n\n    return (pid_t)err;\n}\n\nZ_SYSCALL void *sys_shmat(int shmid_0, const void *shmaddr_0, int shmflg_0) {\n    register uintptr_t shmid asm(\"rdi\") = (uintptr_t)shmid_0;\n    register uintptr_t shmaddr asm(\"rsi\") = (uintptr_t)shmaddr_0;\n    register uintptr_t shmflg asm(\"rdx\") = (uintptr_t)shmflg_0;\n    register intptr_t err asm(\"rax\");\n\n    asm volatile(\n        \"mov $30, %%eax\\n\\t\"  // SYS_SHMAT\n        \"syscall\"\n        : \"=rax\"(err)\n        : \"r\"(shmid), \"r\"(shmaddr), \"r\"(shmflg)\n        : \"rcx\", \"r11\");\n\n    return (void *)err;\n}\n\nZ_SYSCALL pid_t sys_getpid() {\n    register intptr_t err asm(\"rax\");\n\n    asm volatile(\n        \"mov $39, %%eax\\n\\t\"  // SYS_GETPID\n        \"syscall\"\n        : \"=rax\"(err)\n        :\n        : \"rcx\", \"r11\");\n\n    return (pid_t)err;\n}\n\nZ_SYSCALL pid_t sys_fork() {\n    register intptr_t err asm(\"rax\");\n\n    asm volatile(\n        \"mov $57, %%eax\\n\\t\"  // SYS_FORK\n        \"syscall\"\n        : \"=rax\"(err)\n        :\n        : \"rcx\", \"r11\");\n\n    return (pid_t)err;\n}\n\nZ_SYSCALL pid_t sys_pause() {\n    register intptr_t err asm(\"rax\");\n\n    asm volatile(\n        \"mov $34, %%eax\\n\\t\"  // SYS_PAUSE\n        \"syscall\"\n        : \"=rax\"(err)\n        :\n        : \"rcx\", \"r11\");\n\n    return (pid_t)err;\n}\n\nZ_SYSCALL int sys_setpgid(pid_t pid_0, pid_t pgid_0) {\n    register uintptr_t pid asm(\"rdi\") = (uintptr_t)pid_0;\n    register uintptr_t pgid asm(\"rsi\") = (uintptr_t)pgid_0;\n    register intptr_t err asm(\"rax\");\n\n    asm volatile(\n        \"mov $109, %%eax\\n\\t\"  // SYS_SETPGID\n        \"syscall\"\n        : \"=rax\"(err)\n        : \"r\"(pid), \"r\"(pgid)\n        : \"rcx\", \"r11\");\n\n    return (int)err;\n}\n\nZ_SYSCALL int sys_munmap(unsigned long addr_0, size_t len_0) {\n    register uintptr_t addr asm(\"rdi\") = (uintptr_t)addr_0;\n    register uintptr_t len asm(\"rsi\") = (uintptr_t)len_0;\n    register intptr_t err asm(\"rax\");\n\n    asm volatile(\n        \"mov $11, %%eax\\n\\t\"  // SYS_MUNMAP\n        \"syscall\"\n        : \"=rax\"(err)\n        : \"r\"(addr), \"r\"(len)\n        : \"rcx\", \"r11\");\n\n    return (int)err;\n}\n"
  },
  {
    "path": "src/asm_utils.c",
    "content": "/*\n * asm_utils.h\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n#include <fcntl.h>\n\n#include \"asm_syscall.c\"\n\n#define Z_UTILS __attribute__((unused)) static inline\n\n#ifdef DEBUG\n#define utils_puts(s, b) __utils_puts(s, b)\n#define utils_error(s, e) __utils_error(s, e)\n#else\n#define utils_puts(s, b)\n#define utils_error(s, e)                 \\\n    do {                                  \\\n        if (e) {                          \\\n            sys_kill(/*pid=*/0, SIGKILL); \\\n            asm volatile(\"ud2\");          \\\n            __builtin_unreachable();      \\\n        }                                 \\\n    } while (0)\n#endif\n\n#define utils_likely(x) __builtin_expect(!!(x), 1)\n#define utils_unlikely(x) __builtin_expect(!!(x), 0)\n\nZ_UTILS void __utils_puts(const char *s, bool newline) {\n    const char *buf = s;\n    const char *cur = s;\n    for (; *cur != '\\0'; cur++)\n        ;\n    sys_write(STDERR_FILENO, buf, cur - buf);\n\n    if (newline) {\n        const char newline = '\\n';\n        sys_write(STDERR_FILENO, &newline, 1);\n    }\n}\n\nZ_UTILS void utils_num2hexstr(char *s, uint64_t n) {\n    uint64_t r = 0x1000000000000000;\n    while (r != 0) {\n        char c = n / r;\n        if (c < 10) {\n            *(s++) = '0' + c;\n        } else {\n            *(s++) = 'a' + c - 10;\n        }\n        n %= r;\n        r /= 0x10;\n    }\n}\n\nZ_UTILS unsigned long utils_hexstr2num(const char **str_ptr) {\n    const char *str = *str_ptr;\n    unsigned long x = 0;\n    while (true) {\n        char c = *str++;\n        if (c >= '0' && c <= '9') {\n            x <<= 4;\n            x |= (unsigned long)(c - '0');\n        } else if (c >= 'a' && c <= 'f') {\n            x <<= 4;\n            x |= (unsigned long)(10 + c - 'a');\n        } else {\n            *str_ptr = str;\n            return x;\n        }\n    }\n}\n\nZ_UTILS void utils_output_number(uint64_t n) {\n    char *s = (char *)(RW_PAGE_ADDR + RW_PAGE_USED_SIZE + 0x50);\n    *(s + 16) = '\\x00';\n    utils_num2hexstr(s, n);\n    utils_puts(s, true);\n}\n\nZ_UTILS void __utils_error(const char *err_str, bool need_exit) {\n    utils_puts(err_str, true);\n    if (need_exit) {\n        asm volatile(\"int3\");\n        __builtin_unreachable();\n    }\n}\n\nZ_UTILS size_t utils_strcpy(char *dst, char *src) {\n    for (size_t i = 0;; i++) {\n        dst[i] = src[i];\n        if (!src[i]) {\n            return i;\n        }\n    }\n}\n\n/*\n * Load external file.\n */\nZ_UTILS size_t utils_mmap_external_file(const char *filename, bool remmap,\n                                        unsigned long addr, int prot) {\n    // Step (0): prepare error string\n#ifdef DEBUG\n    char s_[16];\n    s_[0] = 'm';\n    s_[1] = 'm';\n    s_[2] = 'a';\n    s_[3] = 'p';\n    s_[4] = ' ';\n    s_[5] = 'f';\n    s_[6] = 'a';\n    s_[7] = 'i';\n    s_[8] = 'l';\n    s_[9] = 'e';\n    s_[10] = 'd';\n    s_[11] = '\\n';\n    s_[12] = '\\x00';\n    s_[13] = ' ';\n    s_[14] = '\\x00';\n#endif\n\n    // Step (1): open file\n    int fd = sys_open(filename, (prot & PROT_WRITE) ? O_RDWR : O_RDONLY, 0);\n    if (fd < 0) {\n        utils_puts(filename, false);\n        utils_puts(s_ + 13, false);\n        utils_error(s_, true);\n    }\n\n    // Step (2): get file size\n    struct stat buf = {};\n    if (sys_fstat(fd, &buf)) {\n        utils_error(s_, true);\n    }\n    size_t fd_size = buf.st_size;\n    if (fd_size != (fd_size >> PAGE_SIZE_POW2) << PAGE_SIZE_POW2) {\n        char s[0x20] = \"\";\n        utils_num2hexstr(s, fd_size);\n        utils_puts(s, false);\n        utils_error(s_, true);\n    }\n\n    // Step (3). remmap if needed\n    if (remmap) {\n        if (sys_munmap(addr, fd_size)) {\n            utils_error(s_, true);\n        }\n    }\n\n    // Step (4): mmap file\n#ifdef BINARY_SEARCH_INVALID_CRASH\n    // make gdb able to set breakpoints at mmapped pages\n    if (sys_mmap(addr, fd_size, prot, MAP_PRIVATE | MAP_FIXED, fd, 0) != addr) {\n#else\n    if (sys_mmap(addr, fd_size, prot, MAP_SHARED | MAP_FIXED, fd, 0) != addr) {\n#endif\n        utils_error(s_, true);\n    }\n    if (sys_close(fd)) {\n        utils_error(s_, true);\n    }\n\n    return fd_size;\n}\n"
  },
  {
    "path": "src/binary.c",
    "content": "/*\n * binary.c\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n#include \"binary.h\"\n#include \"elf_.h\"\n#include \"fork_server.h\"\n#include \"interval_splay.h\"\n#include \"loader.h\"\n#include \"utils.h\"\n\n#include \"x64_utils.c\"\n\n#include \"fork_server_bin.c\"\n#include \"loader_bin.c\"\n\n#define BINARY_MMAP_EXIST(b, addr)                                          \\\n    (g_hash_table_lookup(b->mmapped_pages, GSIZE_TO_POINTER(addr)) == NULL) \\\n        ? false                                                             \\\n        : true\n\n#define BINARY_MMAP_INSERT(b, addr)                               \\\n    g_hash_table_insert(b->mmapped_pages, GSIZE_TO_POINTER(addr), \\\n                        GINT_TO_POINTER(1))\n\nstatic const char null_buf[0x30] = {0};\n\n/*\n * Align trampolines_addr\n */\nZ_PRIVATE void __binary_align_trampolines_addr(Binary *b);\n\n/*\n * Setup basic information for loader\n */\nZ_PRIVATE void __binary_setup_loader(Binary *b);\n\n/*\n * Setup lookup table\n */\nZ_PRIVATE void __binary_setup_lookup_table(Binary *b);\n\n/*\n * Setup retaddr mapping\n */\nZ_PRIVATE void __binary_setup_retaddr_mapping(Binary *b);\n\n/*\n * Setup fork server\n */\nZ_PRIVATE void __binary_setup_fork_server(Binary *b);\n\n/*\n * Setup trampoline zone\n */\nZ_PRIVATE void __binary_setup_tp_zone(Binary *b);\n\n/*\n * Setter and Getter\n */\nDEFINE_GETTER(Binary, binary, ELF *, elf);\nDEFINE_GETTER(Binary, binary, const char *, original_filename);\nDEFINE_GETTER(Binary, binary, addr_t, trampolines_addr);\nDEFINE_GETTER(Binary, binary, addr_t, shadow_main);\nOVERLOAD_GETTER(Binary, binary, addr_t, shadow_code_addr) {\n    return binary->trampolines_addr;\n}\n\nOVERLOAD_SETTER(Binary, binary, addr_t, shadow_start) {\n    z_info(\"shadow _start address: %#lx\", shadow_start);\n    binary->shadow_start = shadow_start;\n\n    if (binary->prior_fork_server) {\n        // when -e option is given, we need to change the fork server to _start\n        addr_t gadget_addr = binary->fork_server_addr + fork_server_bin_len;\n        KS_ASM_JMP(gadget_addr, shadow_start);\n        z_elf_write(binary->elf, gadget_addr, ks_size, ks_encode);\n    } else {\n        addr_t gadget_addr = binary->loader_addr + loader_bin_len;\n        KS_ASM_JMP(gadget_addr, shadow_start);\n        z_elf_write(binary->elf, gadget_addr, ks_size, ks_encode);\n    }\n}\n\nOVERLOAD_SETTER(Binary, binary, addr_t, shadow_main) {\n    if (binary->prior_fork_server) {\n        EXITME(\"main function has not been detected\");\n    }\n\n    z_info(\"shadow main address: %#lx\", shadow_main);\n    binary->shadow_main = shadow_main;\n    addr_t gadget_addr = binary->fork_server_addr + fork_server_bin_len;\n    KS_ASM_JMP(gadget_addr, shadow_main);\n    z_elf_write(binary->elf, gadget_addr, ks_size, ks_encode);\n}\n\nOVERLOAD_SETTER(Binary, binary, ELFState, elf_state) {\n    z_elf_set_state(binary->elf, elf_state);\n}\n\nZ_PRIVATE void __binary_align_trampolines_addr(Binary *b) {\n    b->trampolines_addr = BITS_ALIGN_CELL(b->trampolines_addr, 3);\n}\n\nZ_PRIVATE void __binary_setup_loader(Binary *b) {\n    // step (0). create basic data struction\n    b->mmapped_pages =\n        g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL);\n\n    // current address for setting instruction\n    addr_t loader_base = z_elf_get_loader_addr(b->elf);\n    b->loader_addr = loader_base;\n    addr_t cur_addr = loader_base;\n\n    // step (1). set entrypoint to loader address\n    z_elf_get_ehdr(b->elf)->e_entry = cur_addr;\n\n    // step (2). set down loader\n    z_elf_write(b->elf, cur_addr, loader_bin_len, loader_bin);\n    cur_addr += loader_bin_len;\n\n    // step (3). jump to original entrypoint\n    addr_t loader_transfer_jmp_addr = cur_addr;\n    KS_ASM_JMP(cur_addr, z_elf_get_ori_entry(b->elf));\n    assert(ks_size == 5);\n    z_elf_write(b->elf, cur_addr, ks_size, ks_encode);\n    cur_addr += ks_size;\n\n    // step (4). 8-byte alignment for following data\n    cur_addr = BITS_ALIGN_CELL(cur_addr, 3);\n\n    // step (5). set down loader_base\n    z_elf_write(b->elf, cur_addr, sizeof(addr_t), &loader_base);\n    cur_addr += sizeof(addr_t);\n\n    // step (6). set down tp_addr\n    // XXX: we will first set a NULL trampoline at trapoline zone\n    addr_t trampolines_addr = z_elf_get_trampolines_addr(b->elf);\n    assert(trampolines_addr % PAGE_SIZE == 0);\n    z_elf_write(b->elf, cur_addr, sizeof(addr_t), &(trampolines_addr));\n    cur_addr += sizeof(addr_t);\n\n    // step (7). set down shared .text base address\n    addr_t shared_text_addr = z_elf_get_shared_text_addr(b->elf);\n    assert(shared_text_addr % PAGE_SIZE == 0);\n    z_elf_write(b->elf, cur_addr, sizeof(addr_t), &(shared_text_addr));\n    cur_addr += sizeof(addr_t);\n\n    // step (8). store trampolines name\n    const char *trampolines_name = z_elf_get_trampolines_name(b->elf);\n    z_elf_write(b->elf, cur_addr, z_strlen(trampolines_name) + 1,\n                trampolines_name);\n    cur_addr += z_strlen(trampolines_name) + 1;\n\n    // step (9). store lookup table name\n    const char *lookup_tabname = z_elf_get_lookup_tabname(b->elf);\n    z_elf_write(b->elf, cur_addr, z_strlen(lookup_tabname) + 1, lookup_tabname);\n    cur_addr += z_strlen(lookup_tabname) + 1;\n\n    // step (10). store pipeline filename\n    const char *pipe_filename = z_elf_get_pipe_filename(b->elf);\n    z_elf_write(b->elf, cur_addr, z_strlen(pipe_filename) + 1, pipe_filename);\n    cur_addr += z_strlen(pipe_filename) + 1;\n\n    // step (11). store pipeline filename\n    const char *shared_text_name = z_elf_get_shared_text_name(b->elf);\n    z_elf_write(b->elf, cur_addr, z_strlen(shared_text_name) + 1,\n                shared_text_name);\n    cur_addr += z_strlen(shared_text_name) + 1;\n\n    // step (12). store retaddr mapping filename\n    const char *retaddr_mapping_name = z_elf_get_retaddr_mapping_name(b->elf);\n    z_elf_write(b->elf, cur_addr, z_strlen(retaddr_mapping_name) + 1,\n                retaddr_mapping_name);\n    cur_addr += z_strlen(retaddr_mapping_name) + 1;\n\n    // step (13). 16-byte alignment for fork server (avoid error in xmm)\n    cur_addr = BITS_ALIGN_CELL(cur_addr, 4);\n\n    // step (14). prepare the address of fork server\n    b->fork_server_addr = cur_addr;\n    z_info(\"fork server address: %#lx\", b->fork_server_addr);\n    if (b->prior_fork_server) {\n        // over-write the loader_transfer_jmp_addr to the fork server\n        KS_ASM_JMP(loader_transfer_jmp_addr, b->fork_server_addr);\n        assert(ks_size == 5);\n        z_elf_write(b->elf, loader_transfer_jmp_addr, ks_size, ks_encode);\n    } else {\n        // redirect __libc_start_main into fork server address\n        addr_t load_main = z_elf_get_load_main(b->elf);\n        if (z_elf_get_is_pie(b->elf)) {\n            // size of \"lea rdi, [rip + xxx]\" is 7\n            KS_ASM(load_main, \"lea rdi, [rip %+ld];\",\n                   b->fork_server_addr - load_main - 7);\n        } else {\n            KS_ASM(load_main, \"mov rdi, %#lx;\", b->fork_server_addr);\n        }\n        assert(ks_size == 7);\n        z_elf_write(b->elf, load_main, ks_size, ks_encode);\n    }\n}\n\nZ_PRIVATE void __binary_setup_fork_server(Binary *b) {\n    // step (0). create basic data structure\n    addr_t cur_addr = b->fork_server_addr;\n\n    // step (1). set down fork server\n    uint8_t *fork_server_code = z_alloc(fork_server_bin_len, sizeof(uint8_t));\n    memcpy(fork_server_code, fork_server_bin, fork_server_bin_len);\n\n    if (z_elf_is_statically_linked(b->elf) && b->prior_fork_server) {\n        // XXX: it is import to skip the TLS initialization for\n        // statically-linked binaries when instrument_early option is on. Note\n        // that if instrument_early is not on, we do not need to wipe off such\n        // instructions because TLS will be initialized before main.\n        // XXX: there is a bug for keystone to assemble such code, so we have to\n        // encode it manually. See:\n        // https://github.com/keystone-engine/keystone/issues/296\n\n        /*\n         * \"mov DWORD PTR fs:0x2d0,eax;\"\n         * \"mov DWORD PTR fs:0x2d4,eax;\"\n         */\n        uint8_t tls_init_code[] = {0x64, 0x89, 0x04, 0x25, 0xd0, 0x02,\n                                   0x00, 0x00, 0x64, 0x89, 0x04, 0x25,\n                                   0xd4, 0x02, 0x00, 0x00};\n        size_t tls_init_code_len = 16;\n\n        // locate the code\n        uint8_t *hole = memmem(fork_server_code, fork_server_bin_len,\n                               tls_init_code, tls_init_code_len);\n        if (!hole) {\n            EXITME(\"TLS initialization code not found\");\n        }\n\n        // wipe the code with nop\n        memcpy(hole, z_x64_gen_nop(8), 8);\n        memcpy(hole + 8, z_x64_gen_nop(8), 8);\n    }\n\n    z_elf_write(b->elf, cur_addr, fork_server_bin_len, fork_server_code);\n    z_free(fork_server_code);\n    cur_addr += fork_server_bin_len;\n\n    // step (2). set jump gadget (default to original main/entrypoint)\n    if (b->prior_fork_server) {\n        addr_t entrypoint_addr = z_elf_get_ori_entry(b->elf);\n        KS_ASM_JMP(cur_addr, entrypoint_addr);\n        z_elf_write(b->elf, cur_addr, ks_size, ks_encode);\n        cur_addr += 5;\n    } else {\n        addr_t main_addr = z_elf_get_main(b->elf);\n        KS_ASM_JMP(cur_addr, main_addr);\n        z_elf_write(b->elf, cur_addr, ks_size, ks_encode);\n        cur_addr += 5;\n    }\n\n    // step (3). 8-byte alignment for following data\n    cur_addr = BITS_ALIGN_CELL(cur_addr, 3);\n\n    // step (4). write down whether -e option is enabled\n    uint64_t ei_enabled = (uint64_t)b->prior_fork_server;\n    z_elf_write(b->elf, cur_addr, sizeof(ei_enabled), &ei_enabled);\n    cur_addr += sizeof(ei_enabled);\n\n    // step (5). set random patch address\n    // TODO: random patch is disable currently\n    b->random_patch_addr = BITS_ALIGN_CELL(cur_addr, 3);\n    b->random_patch_num = 0;\n    z_info(\"random patch address: %#lx\", b->random_patch_addr);\n}\n\nZ_PRIVATE void __binary_setup_lookup_table(Binary *b) {\n    b->lookup_table_addr = z_elf_get_lookup_table_addr(b->elf);\n}\n\nZ_PRIVATE void __binary_setup_retaddr_mapping(Binary *b) {\n    // XXX: the memory layout of retaddr mapping\"\n    //      0  - 7 : number of entities\n    //      8  - 15: address of real_unw_step\n    //      16 - ??: mapping entities\n    b->retaddr_mapping_addr = z_elf_get_retaddr_mapping_addr(b->elf);\n    b->retaddr_entity_addr = b->retaddr_mapping_addr + 0x10;\n    b->retaddr_n = 0;\n\n    // XXX: we first set the number of entities as -1 to indicate this space is\n    // useless\n    int64_t n = -1;\n    z_elf_write(b->elf, b->retaddr_mapping_addr, sizeof(int64_t), &n);\n}\n\nZ_PRIVATE void __binary_setup_tp_zone(Binary *b) {\n    b->trampolines_addr = z_elf_get_trampolines_addr(b->elf);\n    b->last_tp_addr = b->trampolines_addr;\n\n    // insert a NULL Trampoline to indicate terminal\n    assert(sizeof(Trampoline) <= sizeof(null_buf));\n    z_elf_write(b->elf, b->trampolines_addr, sizeof(Trampoline),\n                (void *)null_buf);\n    b->trampolines_addr += sizeof(Trampoline);\n}\n\nZ_API Binary *z_binary_open(const char *pathname, bool prior_fork_server) {\n    // step (0). create a binary struct.\n    Binary *b = STRUCT_ALLOC(Binary);\n    b->original_filename = z_strdup(pathname);\n    b->shadow_main = INVALID_ADDR;\n    b->shadow_start = INVALID_ADDR;\n\n    b->prior_fork_server = prior_fork_server;\n\n    // step (1). setup elf\n    b->elf = z_elf_open(b->original_filename, !prior_fork_server);\n\n    // step (2). setup loader\n    __binary_setup_loader(b);\n\n    // step (3). setup lookup table\n    __binary_setup_lookup_table(b);\n\n    // step (4). setup fork server\n    __binary_setup_fork_server(b);\n\n    // step (5). setup trampoline zone\n    __binary_setup_tp_zone(b);\n\n    // step (6). setup retaddr mapping\n    __binary_setup_retaddr_mapping(b);\n\n    return b;\n}\n\nZ_API void z_binary_destroy(Binary *b) {\n    z_elf_destroy(b->elf);\n\n    z_free((char *)b->original_filename);\n\n    g_hash_table_destroy(b->mmapped_pages);\n\n    z_free(b);\n}\n\nZ_API void z_binary_fsync(Binary *b) {\n    // sync ELF\n    z_elf_fsync(b->elf);\n}\n\nZ_API void z_binary_save(Binary *b, const char *pathname) {\n    // save ELF\n    z_elf_save(b->elf, pathname);\n}\n\nZ_API void z_binary_create_snapshot(Binary *b, const char *pathname) {\n    z_elf_create_snapshot(b->elf, pathname);\n}\n\nZ_API void z_binary_insert_utp(Binary *b, addr_t utp_addr, const uint8_t *utp,\n                               const size_t utp_size) {\n    assert(b != NULL);\n\n    if (utp_size > PAGE_SIZE) {\n        EXITME(\"utp size is too large [%#lx]\", utp_size);\n    }\n\n    Snode *snode = z_snode_create(utp_addr, utp_size, NULL, NULL);\n    addr_t mmap_addr = 0;\n    size_t mmap_size = 0;\n    if (!z_elf_insert_utp(b->elf, snode, &mmap_addr, &mmap_size)) {\n        EXITME(\"Insert utp into an overlapped region: %#lx\", utp_addr);\n    }\n\n    z_trace(\"mmap address (%#lx) and size (%#lx)\", mmap_addr, mmap_size);\n\n    // update last tp\n    addr_t next_tp_offset = b->trampolines_addr - b->last_tp_addr;\n    z_elf_write(b->elf, b->last_tp_addr + offsetof(Trampoline, next_tp_offset),\n                sizeof(size_t), &next_tp_offset);\n    b->last_tp_addr = b->trampolines_addr;\n\n    // emit this utp\n    z_elf_write(b->elf, b->trampolines_addr + offsetof(Trampoline, mmap_addr),\n                sizeof(void *), &mmap_addr);\n    z_elf_write(b->elf, b->trampolines_addr + offsetof(Trampoline, mmap_size),\n                sizeof(size_t), &mmap_size);\n    z_elf_write(b->elf, b->trampolines_addr + offsetof(Trampoline, tp_addr),\n                sizeof(void *), &utp_addr);\n    z_elf_write(b->elf, b->trampolines_addr + offsetof(Trampoline, tp_size),\n                sizeof(size_t), &utp_size);\n    z_elf_write(b->elf,\n                b->trampolines_addr + offsetof(Trampoline, next_tp_offset),\n                sizeof(size_t), (char *)null_buf);\n    b->trampolines_addr += sizeof(Trampoline);\n    z_elf_write(b->elf, b->trampolines_addr, utp_size, utp);\n    b->trampolines_addr += utp_size;\n\n    __binary_align_trampolines_addr(b);\n}\n\nZ_API addr_t z_binary_insert_shadow_code(Binary *b, const uint8_t *sc,\n                                         const size_t sc_size) {\n    addr_t cur_shadow_addr = b->trampolines_addr;\n\n    z_elf_write(b->elf, b->trampolines_addr, sc_size, sc);\n    b->trampolines_addr += sc_size;\n\n    return cur_shadow_addr;\n}\n\nZ_API void z_binary_update_lookup_table(Binary *b, addr_t ori_addr,\n                                        addr_t shadow_addr) {\n    Elf64_Shdr *text = z_elf_get_shdr_text(b->elf);\n    addr_t text_addr = text->sh_addr;\n\n    if (ori_addr < text_addr)\n        EXITME(\"too small address (%#lx) compared to .text (%#lx)\", ori_addr,\n               text_addr);\n\n    size_t cell_num = ori_addr - text_addr;\n    if (cell_num > LOOKUP_TABLE_CELL_NUM)\n        EXITME(\"too big address (%#lx) compared to .text (%#lx)\", ori_addr,\n               text_addr);\n    addr_t cell_addr = b->lookup_table_addr + cell_num * LOOKUP_TABLE_CELL_SIZE;\n\n    if (shadow_addr > LOOKUP_TABLE_CELL_MASK)\n        EXITME(\"too big shadow address (%#lx)\", shadow_addr);\n\n    z_elf_write(b->elf, cell_addr, LOOKUP_TABLE_CELL_SIZE,\n                (uint8_t *)(&shadow_addr));\n}\n\nZ_API bool z_binary_check_state(Binary *b, ELFState state) {\n    return z_elf_check_state(b->elf, state);\n}\n\nZ_API void z_binary_new_retaddr_entity(Binary *b, addr_t shadow_retaddr,\n                                       addr_t ori_retaddr) {\n    // update retaddr_n first\n    b->retaddr_n += 1;\n    z_elf_write(b->elf, b->retaddr_mapping_addr, sizeof(size_t),\n                &(b->retaddr_n));\n\n    uint32_t addr_buf;\n    // insert shadow_retaddr\n    addr_buf = (uint32_t)shadow_retaddr;\n    assert((addr_t)addr_buf == shadow_retaddr);\n    z_elf_write(b->elf, b->retaddr_entity_addr, sizeof(uint32_t), &addr_buf);\n    b->retaddr_entity_addr += sizeof(uint32_t);\n\n    // insert ori_retaddr\n    addr_buf = (uint32_t)ori_retaddr;\n    assert((addr_t)addr_buf == ori_retaddr);\n    z_elf_write(b->elf, b->retaddr_entity_addr, sizeof(uint32_t), &addr_buf);\n    b->retaddr_entity_addr += sizeof(uint32_t);\n}\n"
  },
  {
    "path": "src/binary.h",
    "content": "/*\n * binary.h\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n#ifndef __BINARY_H\n#define __BINARY_H\n\n#include \"buffer.h\"\n#include \"config.h\"\n#include \"elf_.h\"\n#include \"interval_splay.h\"\n\n#include <gmodule.h>\n\ntypedef addr_t bb_t;\n\n/*\n * Binary: used to story patch meta-information.\n *\n * ELF virtual memory layout can be found in elf_.h\n */\nSTRUCT(Binary, {\n    const char *original_filename;  // Path of input file\n    ELF *elf;                       // Basic ELF information\n    addr_t shadow_main;             // Address of shadow main function\n    addr_t shadow_start;            // Address of shadow _start function\n\n    // Loader\n    addr_t loader_addr;  // Address of loader\n\n    // Loader info for uTP (TramPolines for ucall/ujmp)\n    // XXX: the mmapped_pages seems useless currently (delete it maybe?)\n    GHashTable *mmapped_pages;  // Hashset of mmapped pages\n\n    // Fork server and random patcher\n    addr_t fork_server_addr;   // Address of fork server\n    addr_t random_patch_addr;  // Address of random patch table\n    addr_t random_patch_num;   // Number of random patch table\n    bool prior_fork_server;    // Whether we need to defer the fork server\n\n    // Lookup table\n    addr_t lookup_table_addr;  // Address of lookup table\n\n    // Retaddr mapping\n    size_t retaddr_n;             // Number of retaddr mapping entities\n    addr_t retaddr_mapping_addr;  // Address of the retaddr mapping\n    addr_t retaddr_entity_addr;   // Address of the next retaddr mapping entity\n\n    // Shadow Code and Trampolines\n    addr_t trampolines_addr;  // Next avaiable address of trampolines\n    addr_t last_tp_addr;\n});\n\nDECLARE_GETTER(Binary, binary, ELF *, elf);\nDECLARE_GETTER(Binary, binary, const char *, original_filename);\nDECLARE_GETTER(Binary, binary, addr_t, trampolines_addr);\nDECLARE_GETTER(Binary, binary, addr_t, shadow_main);\nDECLARE_GETTER(Binary, binary, addr_t, shadow_code_addr);\nDECLARE_SETTER(Binary, binary, addr_t, shadow_main);\nDECLARE_SETTER(Binary, binary, addr_t, shadow_start);\nDECLARE_SETTER(Binary, binary, ELFState, elf_state);\n\n/*\n * Construct a binary for given file.\n */\nZ_API Binary *z_binary_open(const char *in_filename, bool prior_fork_server);\n\n/*\n * Destructor of Binary\n */\nZ_API void z_binary_destroy(Binary *b);\n\n/*\n * Save binary\n */\nZ_API void z_binary_save(Binary *b, const char *pathname);\n\n/*\n * Create a snapshot for current Binary.\n * Differnt from z_binary_save, this Binary's main body (except loookup tabel\n * and shadow) will remain unchanged even future patches are applied.\n */\nZ_API void z_binary_create_snapshot(Binary *b, const char *pathname);\n\n/*\n * Insert a new uTP\n */\n// XXX: currently we do not use uTP in the actual rewriting, but it will be\n// extremely useful when we start to handle overlapped jmp bridges.\nZ_API void z_binary_insert_utp(Binary *b, addr_t utp_addr, const uint8_t *utp,\n                               const size_t utp_size);\n\n/*\n * Insert a new piece of shadow code, and return the address of the shadow code\n */\nZ_API addr_t z_binary_insert_shadow_code(Binary *b, const uint8_t *sc,\n                                         const size_t sc_size);\n\n/*\n * Notify binary that all shadow code has been inserted\n */\nZ_API void z_binary_shadow_code_notify(Binary *b, addr_t shadow_main);\n\n/*\n * Add a look up cell\n */\nZ_API void z_binary_update_lookup_table(Binary *b, addr_t ori_addr,\n                                        addr_t shadow_addr);\n\n/*\n * Sync binary with underlying files\n */\nZ_API void z_binary_fsync(Binary *b);\n\n/*\n * Wrapper for z_elf_check_state()\n */\nZ_API bool z_binary_check_state(Binary *b, ELFState state);\n\n/*\n * Add a new retaddr entity\n */\nZ_API void z_binary_new_retaddr_entity(Binary *b, addr_t shadow_retaddr,\n                                       addr_t ori_retaddr);\n#endif\n"
  },
  {
    "path": "src/buffer.c",
    "content": "/*\n * buffer.c\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n#include \"buffer.h\"\n#include \"utils.h\"\n\n/*\n * Extend buffer's chunk so that it can contain at lease new_chunk_size bytes\n */\nZ_PRIVATE void __buffer_extend(Buffer *buf, size_t new_chunk_size);\n\n/*\n * Create an empty buffer whose chunk can contain at lease size bytes\n */\nZ_PRIVATE Buffer *__buffer_new(size_t size);\n\nZ_PRIVATE Buffer *__buffer_new(size_t size) {\n    // Get chunk_size\n    size_t chunk_size = 8;\n    if (size >= 1) {\n        chunk_size = size;\n        chunk_size |= (chunk_size >> 1);\n        chunk_size |= (chunk_size >> 2);\n        chunk_size |= (chunk_size >> 4);\n        chunk_size |= (chunk_size >> 8);\n        chunk_size |= (chunk_size >> 16);\n        chunk_size |= (chunk_size >> 32);\n        chunk_size += 1;\n    }\n    assert(chunk_size > size);\n\n    z_trace(\"get chunk_size (%#lx) for requested size (%#lx)\", chunk_size,\n            size);\n\n    // Create a buffer\n    Buffer *buf = STRUCT_ALLOC(Buffer);\n    buf->raw_buf = (uint8_t *)z_alloc(chunk_size, sizeof(uint8_t));\n    buf->size = 0;\n    buf->chunk_size = chunk_size;\n\n    return buf;\n}\n\nZ_PRIVATE void __buffer_extend(Buffer *buf, size_t new_chunk_size) {\n    assert(buf != NULL);\n    z_trace(\"extend to %#lx bytes, original one is %#lx bytes\", new_chunk_size,\n            buf->chunk_size);\n    while (new_chunk_size >= buf->chunk_size) {\n        if (buf->chunk_size * 2 <= buf->chunk_size) {\n            EXITME(\"too big chunk size (%#lx)\", buf->chunk_size);\n        }\n        buf->raw_buf = z_realloc(buf->raw_buf, buf->chunk_size * 2);\n        buf->chunk_size *= 2;\n    }\n}\n\n/*\n * Setter and Getter\n */\nDEFINE_GETTER(Buffer, buffer, size_t, size);\nDEFINE_GETTER(Buffer, buffer, uint8_t *, raw_buf);\n\nZ_API Buffer *z_buffer_create(const uint8_t *ptr, size_t size) {\n    Buffer *buf = __buffer_new(size);\n    if (ptr != NULL) {\n        memcpy(buf->raw_buf, ptr, size);\n    } else {\n        if (size) {\n            EXITME(\"try to create a buffer with NULL ptr and positive size\");\n        }\n    }\n    buf->size = size;\n    return buf;\n}\n\nZ_API void z_buffer_push(Buffer *buf, uint8_t ch) {\n    assert(buf != NULL);\n    __buffer_extend(buf, buf->size + 1);\n    buf->raw_buf[buf->size] = ch;\n    buf->size += 1;\n}\n\nZ_API void z_buffer_append(Buffer *dst, Buffer *src) {\n    assert(dst != NULL && src != NULL);\n    __buffer_extend(dst, dst->size + src->size);\n    memcpy(dst->raw_buf + dst->size, src->raw_buf, src->size);\n    dst->size += src->size;\n}\n\nZ_API void z_buffer_append_raw(Buffer *buf, const uint8_t *ptr, size_t size) {\n    assert(buf != NULL);\n    if (ptr != NULL) {\n        __buffer_extend(buf, buf->size + size);\n        memcpy(buf->raw_buf + buf->size, ptr, size);\n        buf->size += size;\n    }\n}\n\nZ_API Buffer *z_buffer_read_file(const char *pathname) {\n    FILE *f = z_fopen(pathname, \"rb\");\n\n    // Get file size\n    z_fseek(f, 0L, SEEK_END);\n    size_t f_size = (size_t)z_ftell(f);\n\n    // Create a buffer\n    Buffer *buf = (Buffer *)__buffer_new(f_size);\n\n    // Read file\n    z_fseek(f, 0L, SEEK_SET);\n    size_t r_size = z_fread(buf->raw_buf, sizeof(uint8_t), f_size, f);\n    if (r_size < f_size) {\n        EXITME(\"read %lu bytes from \\\"%s\\\", but %lu bytes expected\", r_size,\n               pathname, f_size);\n    }\n    buf->size = r_size;\n\n    z_fclose(f);\n\n    z_trace(\"successfully read %lu bytes from \\\"%s\\\"\", f_size, pathname);\n    return buf;\n}\n\nZ_API void z_buffer_write_file(Buffer *buf, const char *pathname) {\n    assert(buf != NULL);\n    FILE *f = z_fopen(pathname, \"wb\");\n\n    size_t size = z_fwrite(buf->raw_buf, sizeof(uint8_t), buf->size, f);\n    if (size != buf->size) {\n        EXITME(\n            \"fail when writing content to \\\"%s\\\", expect %ld bytes, but only \"\n            \"%ld bytes\",\n            pathname, buf->size, size);\n    }\n\n    z_fclose(f);\n}\n\nZ_API Buffer *z_buffer_dup(Buffer *src) {\n    assert(src != NULL);\n    Buffer *dst = STRUCT_ALLOC(Buffer);\n    dst->size = src->size;\n    dst->chunk_size = src->chunk_size;\n    dst->raw_buf = z_alloc(dst->chunk_size, sizeof(uint8_t));\n    memcpy(dst->raw_buf, src->raw_buf, dst->size);\n    return dst;\n}\n\nZ_API void z_buffer_destroy(Buffer *buf) {\n    assert(buf != NULL);\n    // Free Buffer.buf\n    memset(buf->raw_buf, 0, buf->size);\n    z_free((void *)buf->raw_buf);\n\n    // Free Buffer itself\n    memset(buf, 0, sizeof(Buffer));\n    z_free((void *)buf);\n}\n\nZ_API uint8_t *z_buffer_seek(Buffer *buf, size_t offset, int whence) {\n    assert(buf != NULL);\n    if (offset >= buf->size) {\n        z_warn(\"offset (%lu) is bigger than buffer size (%lu)\", offset,\n               buf->size);\n        return NULL;\n    }\n\n    size_t st_offset;\n    if (whence == SEEK_END) {\n        // The last byte should be 0 from SEEK_END\n        st_offset = buf->size - offset - 1;\n    } else if (whence == SEEK_SET) {\n        st_offset = offset;\n    } else {\n        z_warn(\"invalid whence (%d)\", whence);\n        return NULL;\n    }\n\n    return (buf->raw_buf + st_offset);\n}\n\nZ_API size_t z_buffer_tell(Buffer *buf, const uint8_t *ptr, int whence) {\n    assert(buf != NULL);\n    if (ptr < buf->raw_buf) {\n        z_warn(\"ptr is smaller than buf->raw_buf\");\n        return SIZE_MAX;\n    }\n\n    if (ptr - buf->raw_buf >= buf->size) {\n        z_warn(\"ptr is bigger than buf->raw_buf + buf->size\");\n        return SIZE_MAX;\n    }\n\n    if (whence == SEEK_END) {\n        return (buf->size - (ptr - buf->raw_buf) - 1);\n    } else if (whence == SEEK_SET) {\n        return (ptr - buf->raw_buf);\n    } else {\n        z_warn(\"invalid whence (%d)\", whence);\n        return SIZE_MAX;\n    }\n}\n\nZ_API void z_buffer_truncate(Buffer *buf, size_t index) {\n    assert(buf != NULL);\n    if (index >= buf->size) {\n        z_trace(\"index is out of buffer (%lu >= %lu)\", index, buf->size);\n        return;\n    }\n\n    buf->size = index;\n}\n\nZ_API void z_buffer_fill(Buffer *buf, uint8_t ch, size_t size) {\n    assert(buf != NULL);\n    if (size < buf->size) {\n        z_warn(\n            \"buffer's original size (%lu) is bigger than requested size (%lu)\",\n            buf->size, size);\n        return;\n    }\n    __buffer_extend(buf, size);\n    if (ch != 0) {\n        // Little optimization\n        memset(buf->raw_buf + buf->size, ch, size - buf->size);\n    }\n    buf->size = size;\n}\n"
  },
  {
    "path": "src/buffer.h",
    "content": "/*\n * buffer.h\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n#ifndef __BUFFER_H\n#define __BUFFER_H\n\n#include \"config.h\"\n\n/*\n * Buffer structure for all heap-allocated data\n */\nSTRUCT(Buffer, {\n    uint8_t *raw_buf;\n    size_t size;\n    size_t chunk_size;\n});\n\n/*\n * Setter and Getter\n */\nDECLARE_GETTER(Buffer, buffer, size_t, size);\nDECLARE_GETTER(Buffer, buffer, uint8_t *, raw_buf);\n\n/*\n * Create a buffer from a raw pointer.\n * If ptr == NULL and size == 0, return an empty buffer\n */\nZ_API Buffer *z_buffer_create(const uint8_t *ptr, size_t size);\n\n/*\n * Push a ch into buffer\n */\nZ_API void z_buffer_push(Buffer *buf, uint8_t ch);\n\n/*\n * Append src buffer into the end of buffer dst\n */\nZ_API void z_buffer_append(Buffer *dst, Buffer *src);\n\n/*\n * Append raw pointer into the end of buffer\n */\nZ_API void z_buffer_append_raw(Buffer *buf, const uint8_t *ptr, size_t size);\n\n/*\n * Fill buffer with ch to size bytes\n */\nZ_API void z_buffer_fill(Buffer *buf, uint8_t ch, size_t size);\n\n/*\n * Create a buffer and read content from pathname\n */\nZ_API Buffer *z_buffer_read_file(const char *pathname);\n\n/*\n * Create a file and write content to pathname\n */\nZ_API void z_buffer_write_file(Buffer *buf, const char *pathname);\n\n/*\n * Duplicate a buffer\n */\nZ_API Buffer *z_buffer_dup(Buffer *src);\n\n/*\n * Destructor of Buffer\n */\nZ_API void z_buffer_destroy(Buffer *buf);\n\n/*\n * Seek an offset, return a pointer to that offset.\n * Return NULL if the offset is invalid.\n */\nZ_API uint8_t *z_buffer_seek(Buffer *buf, size_t offset, int whence);\n\n/*\n * Tell an pointer, return the pointer's offset on the buffer.\n * Return MAX of size_t if the pointer is not on the buffer.\n */\nZ_API size_t z_buffer_tell(Buffer *buf, const uint8_t *ptr, int whence);\n\n/*\n * Truncate all content after index (included).\n */\nZ_API void z_buffer_truncate(Buffer *buf, size_t index);\n\n#endif\n"
  },
  {
    "path": "src/capstone_.c",
    "content": "/*\n * capstone_.c\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n#include \"capstone_.h\"\n#include \"utils.h\"\n\n#define __FLG_READ(F) static uint64_t __FLG_READ_##F = X86_EFLAGS_TEST_##F\n__FLG_READ(OF);\n__FLG_READ(SF);\n__FLG_READ(ZF);\n__FLG_READ(AF);\n__FLG_READ(CF);\n__FLG_READ(PF);\n#undef __FLG_READ\n\n#define __FLG_WRITE(F)                                                      \\\n    static uint64_t __FLG_WRITE_##F =                                       \\\n        X86_EFLAGS_MODIFY_##F | X86_EFLAGS_RESET_##F | X86_EFLAGS_SET_##F | \\\n        X86_EFLAGS_UNDEFINED_##F\n__FLG_WRITE(OF);\n__FLG_WRITE(SF);\n__FLG_WRITE(ZF);\n__FLG_WRITE(AF);\n__FLG_WRITE(CF);\n__FLG_WRITE(PF);\n#undef __FLA_WRITE\n\n/*\n * Mapping a eflag action into individual flag's read state\n */\nZ_PRIVATE FLGState __capstone_mapping_flg_read(uint64_t flg_state);\n\n/*\n * Mapping a eflag action into individual flag's write state\n */\nZ_PRIVATE FLGState __capstone_mapping_flg_write(uint64_t flg_state);\n\n/*\n * Mapping CAPSTONE general purpose register info into GPRState. Here we mapping\n * a sub-register into its complete form.\n *\n * More information please refer to\n *   https://www.tortall.net/projects/yasm/manual/html/arch-x86-registers.html.\n */\nZ_PRIVATE GPRState __capstone_mapping_pgr(x86_reg reg_id);\n\n/*\n * Filter CAPSTONE general purpose register, we only need 32-bit and 64-bit GPR\n * here\n */\nZ_PRIVATE GPRState __capstone_filter_pgr(x86_reg reg_id);\n\nZ_PRIVATE FLGState __capstone_mapping_flg_write(uint64_t flg_state) {\n#define __FLG_MAPPING_WRITE(fs, F)         \\\n    do {                                   \\\n        if (flg_state & __FLG_WRITE_##F) { \\\n            (fs) |= FLGSTATE_##F;          \\\n        }                                  \\\n    } while (0)\n\n    FLGState fs = 0;\n    __FLG_MAPPING_WRITE(fs, OF);\n    __FLG_MAPPING_WRITE(fs, SF);\n    __FLG_MAPPING_WRITE(fs, ZF);\n    __FLG_MAPPING_WRITE(fs, AF);\n    __FLG_MAPPING_WRITE(fs, CF);\n    __FLG_MAPPING_WRITE(fs, PF);\n    return fs;\n\n#undef __FLG_MAPPING_WRITE\n}\n\nZ_PRIVATE FLGState __capstone_mapping_flg_read(uint64_t flg_state) {\n#define __FLG_MAPPING_READ(fs, F)         \\\n    do {                                  \\\n        if (flg_state & __FLG_READ_##F) { \\\n            (fs) |= FLGSTATE_##F;         \\\n        }                                 \\\n    } while (0)\n\n    FLGState fs = 0;\n    __FLG_MAPPING_READ(fs, OF);\n    __FLG_MAPPING_READ(fs, SF);\n    __FLG_MAPPING_READ(fs, ZF);\n    __FLG_MAPPING_READ(fs, AF);\n    __FLG_MAPPING_READ(fs, CF);\n    __FLG_MAPPING_READ(fs, PF);\n    return fs;\n\n#undef __FLG_MAPPING_READ\n}\n\nZ_PRIVATE GPRState __capstone_mapping_pgr(x86_reg reg_id) {\n#define __GPR_MAPPING_1(T) \\\n    case X86_REG_##T##H:   \\\n    case X86_REG_##T##L:   \\\n    case X86_REG_##T##X:   \\\n    case X86_REG_E##T##X:  \\\n    case X86_REG_R##T##X:  \\\n        return GPRSTATE_R##T##X\n\n#define __GPR_MAPPING_2(T) \\\n    case X86_REG_##T:      \\\n    case X86_REG_##T##L:   \\\n    case X86_REG_E##T:     \\\n    case X86_REG_R##T:     \\\n        return GPRSTATE_R##T\n\n#define __GPR_MAPPING_3(T) \\\n    case X86_REG_##T##B:   \\\n    case X86_REG_##T##W:   \\\n    case X86_REG_##T##D:   \\\n    case X86_REG_##T:      \\\n        return GPRSTATE_##T\n\n    switch (reg_id) {\n        __GPR_MAPPING_1(A);\n        __GPR_MAPPING_1(B);\n        __GPR_MAPPING_1(C);\n        __GPR_MAPPING_1(D);\n\n        __GPR_MAPPING_2(DI);\n        __GPR_MAPPING_2(SI);\n        __GPR_MAPPING_2(BP);\n\n        __GPR_MAPPING_3(R8);\n        __GPR_MAPPING_3(R9);\n        __GPR_MAPPING_3(R10);\n        __GPR_MAPPING_3(R11);\n        __GPR_MAPPING_3(R12);\n        __GPR_MAPPING_3(R13);\n        __GPR_MAPPING_3(R14);\n        __GPR_MAPPING_3(R15);\n\n        default:\n            return 0;\n    }\n\n#undef __GPR_MAPPING_1\n#undef __GPR_MAPPING_2\n#undef __GPR_MAPPING_3\n}\n\nZ_PRIVATE GPRState __capstone_filter_pgr(x86_reg reg_id) {\n#define __GPR_FILTER_1(T) \\\n    case X86_REG_E##T##X: \\\n    case X86_REG_R##T##X: \\\n        return GPRSTATE_R##T##X\n\n#define __GPR_FILTER_2(T) \\\n    case X86_REG_E##T:    \\\n    case X86_REG_R##T:    \\\n        return GPRSTATE_R##T\n\n#define __GPR_FILTER_3(T) \\\n    case X86_REG_##T##D:  \\\n    case X86_REG_##T:     \\\n        return GPRSTATE_##T\n\n    switch (reg_id) {\n        __GPR_FILTER_1(A);\n        __GPR_FILTER_1(B);\n        __GPR_FILTER_1(C);\n        __GPR_FILTER_1(D);\n\n        __GPR_FILTER_2(DI);\n        __GPR_FILTER_2(SI);\n        __GPR_FILTER_2(BP);\n\n        __GPR_FILTER_3(R8);\n        __GPR_FILTER_3(R9);\n        __GPR_FILTER_3(R10);\n        __GPR_FILTER_3(R11);\n        __GPR_FILTER_3(R12);\n        __GPR_FILTER_3(R13);\n        __GPR_FILTER_3(R14);\n        __GPR_FILTER_3(R15);\n\n        default:\n            return 0;\n    }\n\n#undef __GPR_FILTER_1\n#undef __GPR_FILTER_2\n#undef __GPR_FILTER_3\n}\n\nZ_API bool z_capstone_is_call(const cs_insn *inst) {\n    return (inst->id == X86_INS_CALL) || (inst->id == X86_INS_LCALL);\n}\n\nZ_API bool z_capstone_is_jmp(const cs_insn *inst) {\n    return (inst->id == X86_INS_JMP) || (inst->id == X86_INS_LJMP);\n}\n\nZ_API bool z_capstone_is_xbegin(const cs_insn *inst) {\n    return inst->id == X86_INS_XBEGIN;\n}\n\nZ_API bool z_capstone_is_ret(const cs_insn *inst) {\n    return inst->id == X86_INS_RET;\n}\n\nZ_API bool z_capstone_is_loop(const cs_insn *inst) {\n    switch (inst->id) {\n        case X86_INS_LOOP:\n        case X86_INS_LOOPE:\n        case X86_INS_LOOPNE:\n            return true;\n        default:\n            return false;\n    }\n}\nZ_API bool z_capstone_is_cjmp(const cs_insn *inst) {\n    switch (inst->id) {\n        case X86_INS_JAE:\n        case X86_INS_JA:\n        case X86_INS_JBE:\n        case X86_INS_JB:\n        case X86_INS_JCXZ:\n        case X86_INS_JECXZ:\n        case X86_INS_JE:\n        case X86_INS_JGE:\n        case X86_INS_JG:\n        case X86_INS_JLE:\n        case X86_INS_JL:\n        case X86_INS_JNE:\n        case X86_INS_JNO:\n        case X86_INS_JNP:\n        case X86_INS_JNS:\n        case X86_INS_JO:\n        case X86_INS_JP:\n        case X86_INS_JRCXZ:\n        case X86_INS_JS:\n            return true;\n        default:\n            return false;\n    }\n}\n\nZ_API bool z_capstone_is_terminator(const cs_insn *inst) {\n    // TODO: better non-return analysis? (light-weight approach)\n    if (z_capstone_is_jmp(inst))\n        return true;\n    if (z_capstone_is_cjmp(inst))\n        return false;\n    if (z_capstone_is_call(inst))\n        return false;\n    if (z_capstone_is_loop(inst))\n        return false;\n    if (z_capstone_is_xbegin(inst))\n        return false;\n\n    // check HLT first\n    if (inst->id == X86_INS_HLT) {\n        return true;\n    }\n\n    cs_detail *detail = inst->detail;\n    for (int32_t i = 0; i < detail->groups_count; i++) {\n        switch (detail->groups[i]) {\n            case X86_GRP_JUMP:\n            case X86_GRP_CALL:\n            case X86_GRP_BRANCH_RELATIVE:\n                EXITME(\n                    \"branch-relative instruction should be catched before \"\n                    \"[%#lx:\\t%s %s]\",\n                    inst->address, inst->mnemonic, inst->op_str);\n            /*\n             * instructions in RET and IRET group will change the control flow,\n             * but most instructions (except HLT) in INT and PRIVILEGE groups\n             * seem not. Please refer to\n             * https://github.com/aquynh/capstone/blob/master/arch/X86/X86MappingInsn_reduce.inc\n             * for more information\n             */\n            case X86_GRP_RET:\n            case X86_GRP_IRET:\n                return true;\n            case X86_GRP_INT:\n            case X86_GRP_PRIVILEGE:\n            default:\n                continue;\n        }\n    }\n\n    return false;\n}\n\nZ_API bool z_capstone_is_rare(const cs_insn *inst) {\n    // we maintain a rare instruction list to benifit hint collection\n    switch (inst->id) {\n        case X86_INS_OUT:\n        case X86_INS_OUTSB:\n        case X86_INS_OUTSD:\n        case X86_INS_OUTSW:\n        case X86_INS_IN:\n        case X86_INS_IRETD:\n        case X86_INS_FLD:\n        case X86_INS_ENTER:\n        case X86_INS_XCHG:\n            return true;\n        default:\n            return false;\n    }\n}\n\nZ_API RegState *z_capstone_get_register_state(const cs_insn *inst) {\n    RegState *rs = STRUCT_ALLOC(RegState);\n\n    // step (1). get grp\n    cs_regs regs_read, regs_write;\n    uint8_t regs_read_count, regs_write_count;\n    if (cs_regs_access(cs, inst, regs_read, &regs_read_count, regs_write,\n                       &regs_write_count)) {\n        EXITME(\"fail on cs_regs_access\");\n    }\n    rs->gpr_read = rs->gpr_write = 0;\n    rs->gpr_read_32_64 = rs->gpr_write_32_64 = 0;\n    // step (1.1). read\n    for (int i = 0; i < regs_read_count; i++) {\n        rs->gpr_read |= __capstone_mapping_pgr(regs_read[i]);\n        rs->gpr_read_32_64 |= __capstone_filter_pgr(regs_read[i]);\n    }\n    // step (1.2). write\n    for (int i = 0; i < regs_write_count; i++) {\n        rs->gpr_write |= __capstone_mapping_pgr(regs_write[i]);\n        rs->gpr_write_32_64 |= __capstone_filter_pgr(regs_write[i]);\n    }\n\n    // step (2). get sse\n#define __SSE_MAPPING(T, N, reg, states) \\\n    do {                                 \\\n        if ((reg) == X86_REG_##T##N) {   \\\n            (states) |= T##STATE_##T##N; \\\n        }                                \\\n    } while (0)\n\n#define __SSE_MAPPING_FOR_ALL(T, reg, states) \\\n    do {                                      \\\n        __SSE_MAPPING(T, 0, reg, states);     \\\n        __SSE_MAPPING(T, 1, reg, states);     \\\n        __SSE_MAPPING(T, 2, reg, states);     \\\n        __SSE_MAPPING(T, 3, reg, states);     \\\n        __SSE_MAPPING(T, 4, reg, states);     \\\n        __SSE_MAPPING(T, 5, reg, states);     \\\n        __SSE_MAPPING(T, 6, reg, states);     \\\n        __SSE_MAPPING(T, 7, reg, states);     \\\n        __SSE_MAPPING(T, 8, reg, states);     \\\n        __SSE_MAPPING(T, 9, reg, states);     \\\n        __SSE_MAPPING(T, 10, reg, states);    \\\n        __SSE_MAPPING(T, 11, reg, states);    \\\n        __SSE_MAPPING(T, 12, reg, states);    \\\n        __SSE_MAPPING(T, 13, reg, states);    \\\n        __SSE_MAPPING(T, 14, reg, states);    \\\n        __SSE_MAPPING(T, 15, reg, states);    \\\n    } while (0)\n\n    for (int i = 0; i < regs_read_count; i++) {\n        __SSE_MAPPING_FOR_ALL(XMM, regs_read[i], rs->xmm_read);\n        __SSE_MAPPING_FOR_ALL(YMM, regs_read[i], rs->ymm_read);\n        __SSE_MAPPING_FOR_ALL(ZMM, regs_read[i], rs->zmm_read);\n    }\n    for (int i = 0; i < regs_write_count; i++) {\n        __SSE_MAPPING_FOR_ALL(XMM, regs_write[i], rs->xmm_write);\n        __SSE_MAPPING_FOR_ALL(YMM, regs_write[i], rs->ymm_write);\n        __SSE_MAPPING_FOR_ALL(ZMM, regs_write[i], rs->zmm_write);\n    }\n\n#undef __SSE_MAPPING\n#undef __SSE_MAPPING_FOR_ALL\n\n    // step (3). get flg\n    rs->flg_read = rs->flg_write = 0;\n    // step (3.0). check FPU first\n    for (int i = 0; i < inst->detail->groups_count; i++) {\n        if (inst->detail->groups[i] == X86_GRP_FPU) {\n            goto DONE;\n        }\n    }\n    // step (3.1). get flg state\n    rs->flg_read = __capstone_mapping_flg_read(inst->detail->x86.eflags);\n    rs->flg_write = __capstone_mapping_flg_write(inst->detail->x86.eflags);\n\n    /*\n     * XXX: capstone: *sbb* and *adc* instructions do not have any TEST_CF bit.\n     * Hence, we use a very conservative approach to get rs->flg_read. Please\n     * refer to https://github.com/aquynh/capstone/issues/1696 for more\n     * information.\n     *\n     * However, we do not remove previous rs->flg_read code. Maybe one day we\n     * can patch capstone to enable a more powerful optimization.\n     */\n    // TODO: prepare our own patch for capstone and keystone.\n    for (int i = 0; i < regs_read_count; i++) {\n        if (regs_read[i] == X86_REG_EFLAGS) {\n            rs->flg_read = FLGSTATE_ALL;\n            break;\n        }\n    }\n\nDONE:\n    return rs;\n}\n\n// XXX: call qword byte [xxx]\nZ_API bool z_capstone_is_const_mem_ucall(const cs_insn *inst,\n                                         addr_t *addr_ptr) {\n    // first check that it is a jump instruction\n    if (inst->id != X86_INS_CALL) {\n        return false;\n    }\n\n    // then check that it only has one operand\n    cs_detail *detail = inst->detail;\n    if (detail->x86.op_count != 1) {\n        return false;\n    }\n\n    // then check the operand is a qword memory\n    cs_x86_op *op = &(detail->x86.operands[0]);\n    if (op->type != X86_OP_MEM || op->mem.base != X86_REG_INVALID ||\n        op->mem.index != X86_REG_INVALID || op->size != 8) {\n        return false;\n    }\n\n    // update addr_ptr\n    if (addr_ptr) {\n        *addr_ptr = op->mem.disp;\n    }\n    return true;\n}\n\n// XXX: call qword byte [rip+xxx]\nZ_API bool z_capstone_is_pc_related_ucall(const cs_insn *inst,\n                                          addr_t *addr_ptr) {\n    // first check that it is a jump instruction\n    if (inst->id != X86_INS_CALL) {\n        return false;\n    }\n\n    // then check that it only has one operand\n    cs_detail *detail = inst->detail;\n    if (detail->x86.op_count != 1) {\n        return false;\n    }\n\n    // then check the operand is a qword memory\n    cs_x86_op *op = &(detail->x86.operands[0]);\n    if (op->type != X86_OP_MEM || op->mem.base != X86_REG_RIP ||\n        op->mem.index != X86_REG_INVALID || op->size != 8) {\n        return false;\n    }\n\n    // update addr_ptr\n    if (addr_ptr) {\n        *addr_ptr = inst->address + inst->size + op->mem.disp;\n    }\n    return true;\n}\n\n// XXX: jmp qword byte [xxx]\nZ_API bool z_capstone_is_const_mem_ujmp(const cs_insn *inst, addr_t *addr_ptr) {\n    // first check that it is a jump instruction\n    if (inst->id != X86_INS_JMP) {\n        return false;\n    }\n\n    // then check that it only has one operand\n    cs_detail *detail = inst->detail;\n    if (detail->x86.op_count != 1) {\n        return false;\n    }\n\n    // then check the operand is a qword memory\n    cs_x86_op *op = &(detail->x86.operands[0]);\n    if (op->type != X86_OP_MEM || op->mem.base != X86_REG_INVALID ||\n        op->mem.index != X86_REG_INVALID || op->size != 8) {\n        return false;\n    }\n\n    // update addr_ptr\n    if (addr_ptr) {\n        *addr_ptr = op->mem.disp;\n    }\n    return true;\n}\n\n// XXX: jmp qword byte [rip+xxx]\nZ_API bool z_capstone_is_pc_related_ujmp(const cs_insn *inst,\n                                         addr_t *addr_ptr) {\n    // first check that it is a jump instruction\n    if (inst->id != X86_INS_JMP) {\n        return false;\n    }\n\n    // then check that it only has one operand\n    cs_detail *detail = inst->detail;\n    if (detail->x86.op_count != 1) {\n        return false;\n    }\n\n    // then check the operand is a qword memory\n    cs_x86_op *op = &(detail->x86.operands[0]);\n    if (op->type != X86_OP_MEM || op->mem.base != X86_REG_RIP ||\n        op->mem.index != X86_REG_INVALID || op->size != 8) {\n        return false;\n    }\n\n    // update addr_ptr\n    if (addr_ptr) {\n        *addr_ptr = inst->address + inst->size + op->mem.disp;\n    }\n    return true;\n}\n\nZ_API void z_capstone_show_gpr_state(GPRState gpr_state) {\n    z_info(\n        \"rax %d | rbx %d | rcx %d | rdx %d | rdi %d | rsi %d | rbp %d | r8 %d \"\n        \"| r9 %d | r10 %d | r11 %d | r12 %d | r13 %d | r14 %d | r15 %d\",\n        (gpr_state >> 0) & 1UL, (gpr_state >> 1) & 1UL, (gpr_state >> 2) & 1UL,\n        (gpr_state >> 3) & 1UL, (gpr_state >> 4) & 1UL, (gpr_state >> 5) & 1UL,\n        (gpr_state >> 6) & 1UL, (gpr_state >> 7) & 1UL, (gpr_state >> 8) & 1UL,\n        (gpr_state >> 9) & 1UL, (gpr_state >> 10) & 1UL,\n        (gpr_state >> 11) & 1UL, (gpr_state >> 12) & 1UL,\n        (gpr_state >> 13) & 1UL, (gpr_state >> 14) & 1UL);\n}\n\nZ_API void z_capstone_show_flg_state(FLGState flg_state) {\n    z_info(\"OF %d | SF %d | ZF %d | AF %d | CF %d | PF %d\",\n           (flg_state >> 0) & 1UL, (flg_state >> 1) & 1UL,\n           (flg_state >> 2) & 1UL, (flg_state >> 3) & 1UL,\n           (flg_state >> 4) & 1UL, (flg_state >> 5) & 1UL);\n}\n"
  },
  {
    "path": "src/capstone_.h",
    "content": "/*\n * capstone_.h\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n#ifndef __CAPSTONE__H\n#define __CAPSTONE__H\n\n/*\n * My own wrapper for capstone, which aims at extracting information from known\n * cs_insn structures. CS_DISASM is not included in this file because it is used\n * for disassembly instead of check cs_insn's features.\n */\n\n#include \"config.h\"\n\n#include <capstone/capstone.h>\n\n/*\n * General Purposed Register\n */\n#define CAPSTONE_FORALL_GPR(STATEMENT) \\\n    do {                               \\\n        STATEMENT(RAX);                \\\n        STATEMENT(RBX);                \\\n        STATEMENT(RCX);                \\\n        STATEMENT(RDX);                \\\n        STATEMENT(RBP);                \\\n        STATEMENT(RDI);                \\\n        STATEMENT(RSI);                \\\n        STATEMENT(R8);                 \\\n        STATEMENT(R9);                 \\\n        STATEMENT(R10);                \\\n        STATEMENT(R11);                \\\n        STATEMENT(R12);                \\\n        STATEMENT(R13);                \\\n        STATEMENT(R14);                \\\n        STATEMENT(R15);                \\\n    } while (0)\n\ntypedef enum gpr_state_t {\n    GPRSTATE_RAX = (1UL << 0),\n    GPRSTATE_RBX = (1UL << 1),\n    GPRSTATE_RCX = (1UL << 2),\n    GPRSTATE_RDX = (1UL << 3),\n    GPRSTATE_RDI = (1UL << 4),\n    GPRSTATE_RSI = (1UL << 5),\n    GPRSTATE_RBP = (1UL << 6),  // <-- NO RSP HERE\n    GPRSTATE_R8 = (1UL << 7),\n    GPRSTATE_R9 = (1UL << 8),\n    GPRSTATE_R10 = (1UL << 9),\n    GPRSTATE_R11 = (1UL << 10),\n    GPRSTATE_R12 = (1UL << 11),\n    GPRSTATE_R13 = (1UL << 12),\n    GPRSTATE_R14 = (1UL << 13),\n    GPRSTATE_R15 = (1UL << 14),\n\n    GPRSTATE_ALL = ((1UL << 15) - 1),\n} GPRState;\n\n/*\n * EFLAGS Register\n */\n#define CAPSTONE_FORALL_FLG(STATEMENT) \\\n    do {                               \\\n        STATEMENT(OF);                 \\\n        STATEMENT(SF);                 \\\n        STATEMENT(ZF);                 \\\n        STATEMENT(AF);                 \\\n        STATEMENT(CF);                 \\\n        STATEMENT(PF);                 \\\n    } while (0)\n\ntypedef enum flg_state_t {\n    FLGSTATE_OF = (1UL << 0),\n    FLGSTATE_SF = (1UL << 1),\n    FLGSTATE_ZF = (1UL << 2),\n    FLGSTATE_AF = (1UL << 3),\n    FLGSTATE_CF = (1UL << 4),\n    FLGSTATE_PF = (1UL << 5),\n\n    FLGSTATE_ALL = ((1UL << 6) - 1),\n} FLGState;\n\n/*\n * SSE Register\n */\n#define CAPSTONE_FORALL_SSE(T, STATEMENT) \\\n    do {                                  \\\n        STATEMENT(T, 0);                  \\\n        STATEMENT(T, 1);                  \\\n        STATEMENT(T, 2);                  \\\n        STATEMENT(T, 3);                  \\\n        STATEMENT(T, 4);                  \\\n        STATEMENT(T, 5);                  \\\n        STATEMENT(T, 6);                  \\\n        STATEMENT(T, 7);                  \\\n        STATEMENT(T, 8);                  \\\n        STATEMENT(T, 9);                  \\\n        STATEMENT(T, 10);                 \\\n        STATEMENT(T, 11);                 \\\n        STATEMENT(T, 12);                 \\\n        STATEMENT(T, 13);                 \\\n        STATEMENT(T, 14);                 \\\n        STATEMENT(T, 15);                 \\\n    } while (0)\n\n#define __SSE_DEFINE(T, N) T##STATE_##T##N = (1UL << N)\n#define __SSE_DEFINE_ALL(enum_name, T) \\\n    typedef enum enum_name {           \\\n        __SSE_DEFINE(T, 0),            \\\n        __SSE_DEFINE(T, 1),            \\\n        __SSE_DEFINE(T, 2),            \\\n        __SSE_DEFINE(T, 3),            \\\n        __SSE_DEFINE(T, 4),            \\\n        __SSE_DEFINE(T, 5),            \\\n        __SSE_DEFINE(T, 6),            \\\n        __SSE_DEFINE(T, 7),            \\\n        __SSE_DEFINE(T, 8),            \\\n        __SSE_DEFINE(T, 9),            \\\n        __SSE_DEFINE(T, 10),           \\\n        __SSE_DEFINE(T, 11),           \\\n        __SSE_DEFINE(T, 12),           \\\n        __SSE_DEFINE(T, 13),           \\\n        __SSE_DEFINE(T, 14),           \\\n        __SSE_DEFINE(T, 15),           \\\n                                       \\\n        T##STATE_ALL = ~(0UL),         \\\n    } T##State;\n\n__SSE_DEFINE_ALL(xmm_state_t, XMM);\n__SSE_DEFINE_ALL(ymm_state_t, YMM);\n__SSE_DEFINE_ALL(zmm_state_t, ZMM);\n\n#undef __SSE_DEFINE_ALL\n#undef __SSE_DEFINE\n\nSTRUCT(RegState, {\n    GPRState gpr_read;\n    GPRState gpr_read_32_64;\n    GPRState gpr_write;\n    GPRState gpr_write_32_64;\n    FLGState flg_read;\n    FLGState flg_write;\n    XMMState xmm_read;\n    XMMState xmm_write;\n    YMMState ymm_read;\n    YMMState ymm_write;\n    ZMMState zmm_read;\n    ZMMState zmm_write;\n});\n\nZ_API bool z_capstone_is_call(const cs_insn *inst);\n\nZ_API bool z_capstone_is_jmp(const cs_insn *inst);\n\nZ_API bool z_capstone_is_cjmp(const cs_insn *inst);\n\nZ_API bool z_capstone_is_loop(const cs_insn *inst);\n\nZ_API bool z_capstone_is_xbegin(const cs_insn *inst);\n\nZ_API bool z_capstone_is_ret(const cs_insn *inst);\n\nZ_API bool z_capstone_is_terminator(const cs_insn *inst);\n\nZ_API bool z_capstone_is_rare(const cs_insn *inst);\n\nZ_API bool z_capstone_is_pc_related_ujmp(const cs_insn *inst, addr_t *addr_ptr);\n\nZ_API bool z_capstone_is_const_mem_ujmp(const cs_insn *inst, addr_t *addr_ptr);\n\nZ_API bool z_capstone_is_pc_related_ucall(const cs_insn *inst,\n                                          addr_t *addr_ptr);\n\nZ_API bool z_capstone_is_const_mem_ucall(const cs_insn *inst, addr_t *addr_ptr);\n\nZ_API RegState *z_capstone_get_register_state(const cs_insn *inst);\n\nZ_API void z_capstone_show_gpr_state(GPRState gpr_state);\n\nZ_API void z_capstone_show_flg_state(FLGState flg_state);\n\n#endif\n"
  },
  {
    "path": "src/config.h",
    "content": "/*\n * config.h\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n#ifndef __CONFIG_H\n#define __CONFIG_H\n\n/*\n * Include basic headers\n */\n#include <assert.h>\n#include <stdbool.h>\n#include <stddef.h>\n#include <stdint.h>\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include <sys/stat.h>\n#include <sys/user.h>\n#include <sys/wait.h>\n#include <unistd.h>\n\n/*\n * Tool name and version\n */\n#define OURTOOL \"stoch-fuzz\"\n#define VERSION \"1.1.0\"\n\n/*\n * Magic string to indicate the file is patched\n */\n#define MAGIC_STRING \"2015.05.02 Shanghai Snow\"\n#define MAGIC_NUMBER 0x527569787565\n\n/*\n * Genral macro and types\n */\n#define STRING(x) STRING_2(x)\n#define STRING_2(x) #x\n\n// error code for EXITME\n#define MY_ERR_CODE 233\n\n#define Z_API __attribute__((visibility(\"hidden\")))\n#define Z_PRIVATE __attribute__((visibility(\"hidden\"))) static inline\n#define Z_PUBLIC __attribute__((visibility(\"default\")))\n#define Z_RESERVED __attribute__((unused))\n\n#define PAGE_SIZE_POW2 PAGE_SHIFT\n\n#define ADDR_MAX SIZE_MAX\n\n/*\n * Invalid information\n */\n#define INVALID_ADDR ADDR_MAX\n#define INVALID_FD -1\n#define INVALID_SHM_ID -1\n#define INVALID_PID 0\n\n/*\n * Re-define type\n */\ntypedef size_t addr_t;\ntypedef void PhantomType;\ntypedef long double double128_t;\n\n/*\n * XXX:\n *  + SHADOW_CODE_ADDR: random address based on ASLR/PIE\n *  + SIGNAL_STACK_ADDR: random address based on ASLR/PIE\n *  + RETADDR_MAPPING_ADDR: random address based on ASLR/PIE\n *  + LOOKUP_TABLE_ADDR: random address based on ASLR/PIE\n *  + RW_PAGE_ADDR: fixed address\n *  + AFL_MAP_ADDR: fixed address\n *  + CRS_MAP_ADDR: fixed address\n *\n * Note that, RW_PAGE_ADDR can only be fixed because we need it to access the\n * programb base on the runtime. On the contrary, AFL_MAP_ADDR and CRS_MAP_ADDR\n * should have not been fixed, but they indeed are mainly due to the keystone\n * bug that is related to address redirecting.\n */\n// XXX: see http://ref.x86asm.net/coder64.html for x64 encoding\n#define SHADOW_CODE_ADDR 0x1f1f8000\n\n// XXX: SIGSTKSZ is now a run-time variable, which makes compilation of loader\n// and forkserver failed.\n// Check discussion below:\n//  https://public-inbox.org/libc-alpha/87y2ew8i1w.fsf@igel.home/T/\n// Some references:\n//  https://codebrowser.dev/glibc/glibc/sysdeps/unix/sysv/linux/bits/sigstack.h.html#30\n//  https://codebrowser.dev/glibc/glibc/sysdeps/unix/sysv/linux/bits/sigstksz.h.html#28\n#ifndef SIGNAL_STACK_SIZE\n#error \"SIGNAL_STACK_SIZE should be determined before compilation\"\n#endif\n// XXX: we pick a high address to avoid overflow with other important pages\n#define SIGNAL_STACK_ADDR (0x100000000 + SIGNAL_STACK_SIZE)\n\n#define RETADDR_MAPPING_ADDR (SIGNAL_STACK_ADDR + SIGNAL_STACK_SIZE)\n\n/*\n * [RW_PAGE_ADDR] The meta information needed during loading\n */\n// XXX: members with addr_t are runtime address (especially for PIE)\ntypedef struct __loading_info_t {\n    addr_t program_base;\n\n    uint64_t afl_prev_id;\n\n    uint64_t client_pid;\n\n    uint64_t prev_pc;\n\n    char shadow_path[0x100];\n    uint64_t shadow_size;\n    addr_t shadow_base;\n\n    char lookup_tab_path[0x100];\n    uint64_t lookup_tab_size;\n    addr_t lookup_tab_base;\n\n    char pipe_path[0x100];\n\n    char shared_text_path[0x100];\n    uint64_t shared_text_size;\n    addr_t shared_text_base;\n\n    char retaddr_mapping_path[0x100];\n    uint64_t retaddr_mapping_size;\n    addr_t retaddr_mapping_base;\n    bool retaddr_mapping_used;\n\n    bool daemon_attached;\n\n} __LoadingInfo;\n\n#define RW_PAGE_ADDR 0x300000\n#define RW_PAGE_SIZE PAGE_SIZE\n#define RW_PAGE_USED_SIZE sizeof(__LoadingInfo)\n#define RW_PAGE_INFO_ADDR(f) (RW_PAGE_ADDR + offsetof(__LoadingInfo, f))\n#define RW_PAGE_INFO(field) (((__LoadingInfo *)RW_PAGE_ADDR)->field)\n\n/*\n * Prefix and suffix for additional files\n */\n#define TEMPFILE_NAME_PREFIX \".\" OURTOOL \".\"\n#define LOOKUP_TABNAME_PREFIX \".lookup.\"\n#define TRAMPOLINES_NAME_PREFIX \".shadow.\"\n#define SHARED_TEXT_PREFIX \".text.\"\n#define RETADDR_MAPPING_PREFIX \".ret.\"\n#define CRASHPOINT_LOG_PREFIX \".crashpoint.\"\n#define PIPE_FILENAME_PREFIX \".pipe.\"\n#define PDISASM_FILENAME_PREFIX \".pdisasm.\"\n#define METADATA_FILENAME_PREFIX \".meta.\"\n#define CODE_SEGMENT_FILE_SUFFIX \".code.segments\"\n#define BACKUP_FILE_SUFFIX \".bak\"\n#define PATCHED_FILE_SUFFIX \".patch\"\n#define PHANTOM_FILE_SUFFIX \".phantom\"\n\n/*\n * Lookup table\n */\nextern void z_lookup_table_init_cell_num(uint64_t text_size);\nextern uint64_t z_lookup_table_get_cell_num();\n\n#define LOOKUP_TABLE_INIT_CELL_NUM(x) z_lookup_table_init_cell_num(x)\n\n#define LOOKUP_TABLE_CELL_SIZE_POW2 2\n#define LOOKUP_TABLE_CELL_SIZE (1 << LOOKUP_TABLE_CELL_SIZE_POW2)\n#define LOOKUP_TABLE_CELL_MASK ((1UL << (LOOKUP_TABLE_CELL_SIZE * 8)) - 1)\n#define LOOKUP_TABLE_CELL_NUM z_lookup_table_get_cell_num()\n\n#define LOOKUP_TABLE_SIZE (LOOKUP_TABLE_CELL_SIZE * LOOKUP_TABLE_CELL_NUM)\n\n#define LOOKUP_TABLE_MAX_CELL_NUM 0x8000000\n#define LOOKUP_TABLE_MAX_SIZE \\\n    (LOOKUP_TABLE_CELL_SIZE * LOOKUP_TABLE_MAX_CELL_NUM)\n\n// in case of conflicting with asan shadow memory\n#define LOOKUP_TABLE_ADDR ((0x7fff8000) - LOOKUP_TABLE_MAX_SIZE)\n\n/*\n * Crash check\n */\n// For exit code usage, check https://tldp.org/LDP/abs/html/exitcodes.html for\n// more information.\n// It is lucky that we can play with the high 16 bits of status.\n#define __RS(s) ((s) & (0xffff))          // __REAL_STATUS\n#define __SF(s) (!!((s) & (0xffff0000)))  // __SELF_FIRED\n#define PACK_STATUS(s, r) ((((r) << 16) & (0xffff0000)) | (((s) & (0xffff))))\n#define IS_SUSPECT_STATUS(s) \\\n    (__SF(s) && WIFSIGNALED(__RS(s)) && (WTERMSIG(__RS(s)) == SIGKILL))\n#define IS_ABNORMAL_STATUS(s) \\\n    (__SF(s) || (!WIFEXITED(__RS(s)) && WIFSIGNALED(__RS(s))))\n// XXX: AFL uses SIGKILL to terminate a timeouted process (same as us)\n#define IS_TIMEOUT_STATUS(s) \\\n    (!__SF(s) && WIFSIGNALED(__RS(s)) && (WTERMSIG(__RS(s)) == SIGKILL))\n\n/*\n * Define struct with type info\n */\ntypedef struct meta_struct_t {\n    const char *__type;\n} MetaStruct;\n\n#define STRUCT(name, content) \\\n    typedef struct name##_t { \\\n        const char *__type;   \\\n        struct content;       \\\n    } name\n\n#define STRUCT_REALNAME(type) struct type##_t\n\n#define STRUCT_TYPE(var) ((MetaStruct *)var)->__type\n\n#define STRUCT_ALLOC(type)                    \\\n    ({                                        \\\n        type *var = z_alloc(1, sizeof(type)); \\\n        var->__type = #type;                  \\\n        var;                                  \\\n    })\n\n/*\n * Setter and Getter\n *      OTYPE: type of object (e.g., Binary)\n *      ONAME: name of object (e.g., binary)\n *      FTYPE: type of filed (e.g., Elf_Info *)\n *      FNAME: name of filed (e.g., elf)\n */\n#define DECLARE_SETTER(OTYPE, ONAME, FTYPE, FNAME) \\\n    Z_API void z_##ONAME##_##set_##FNAME(OTYPE *ONAME, FTYPE FNAME)\n\n#define DEFINE_SETTER(OTYPE, ONAME, FTYPE, FNAME)                     \\\n    Z_API void z_##ONAME##_##set_##FNAME(OTYPE *ONAME, FTYPE FNAME) { \\\n        assert(ONAME != NULL);                                        \\\n        ONAME->FNAME = FNAME;                                         \\\n    }\n\n#define DECLARE_GETTER(OTYPE, ONAME, FTYPE, FNAME) \\\n    Z_API FTYPE z_##ONAME##_##get_##FNAME(OTYPE *ONAME)\n\n#define DEFINE_GETTER(OTYPE, ONAME, FTYPE, FNAME)         \\\n    Z_API FTYPE z_##ONAME##_##get_##FNAME(OTYPE *ONAME) { \\\n        assert(ONAME != NULL);                            \\\n        return ONAME->FNAME;                              \\\n    }\n\n#define OVERLOAD_SETTER(OTYPE, ONAME, FTYPE, FNAME) \\\n    Z_API void z_##ONAME##_##set_##FNAME(OTYPE *ONAME, FTYPE FNAME)\n\n#define OVERLOAD_GETTER(OTYPE, ONAME, FTYPE, FNAME) \\\n    Z_API FTYPE z_##ONAME##_##get_##FNAME(OTYPE *ONAME)\n\n#endif\n"
  },
  {
    "path": "src/core.c",
    "content": "/*\n * core.c\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n#include \"core.h\"\n#include \"crs_config.h\"\n#include \"elf_.h\"\n#include \"utils.h\"\n\n#include <fcntl.h>\n#include <sys/ipc.h>\n#include <sys/mman.h>\n#include <sys/shm.h>\n#include <sys/socket.h>\n#include <sys/types.h>\n#include <sys/un.h>\n\n/*\n * System clean up\n */\nstatic Core *__core = NULL;\n\n// callback function for exit\nstatic void __core_atexit(void) {\n    if (__core) {\n        z_core_destroy(__core);\n    }\n    system(\"rm -f \" TEMPFILE_NAME_PREFIX \"*\");\n}\n\n// stop signal handling\nstatic void __core_handle_stop_sig(int _sig_id) {\n    __core_atexit();\n    kill(getpid(), SIGKILL);\n}\n\n// timeout handling\nstatic void __core_handle_timeout(int _sig_id) {\n    if (__core && __core->client_pid != INVALID_PID) {\n        z_warn(\"client timeout\");\n        kill(__core->client_pid, SIGKILL);\n    }\n}\n\n// setup all signal handlers\nstatic void __core_setup_signal_handlers(void) {\n    struct sigaction sa;\n\n    sa.sa_handler = NULL;\n    sa.sa_flags = SA_RESTART;\n    sa.sa_sigaction = NULL;\n\n    sigemptyset(&sa.sa_mask);\n\n    /* Various ways of saying \"stop\". */\n\n    sa.sa_handler = __core_handle_stop_sig;\n    sigaction(SIGHUP, &sa, NULL);\n    sigaction(SIGINT, &sa, NULL);\n    sigaction(SIGTERM, &sa, NULL);\n\n    /* Exec timeout notifications. */\n\n    sa.sa_handler = __core_handle_timeout;\n    sigaction(SIGALRM, &sa, NULL);\n}\n\n// avoid duplicate setting (in case there are two instances of core)\nstatic bool __core_signal_handled = false;\n\n// setup environment needed by core\nstatic void __core_environment_setup(void) {\n    atexit(__core_atexit);\n    if (!__core_signal_handled) {\n        __core_setup_signal_handlers();\n        __core_signal_handled = true;\n    }\n}\n\n// clean cached files\nstatic void __core_clean_cache(const char *pathname) {\n#define __RM_CACHE(prefix, binary)                       \\\n    do {                                                 \\\n        const char *filename = z_strcat(prefix, binary); \\\n        if (!z_access(filename, F_OK)) {                 \\\n            if (remove(filename)) {                      \\\n                EXITME(\"failed to remove %s\", filename); \\\n            }                                            \\\n        }                                                \\\n        z_free((void *)filename);                        \\\n    } while (0)\n\n    __RM_CACHE(LOOKUP_TABNAME_PREFIX, pathname);\n    __RM_CACHE(TRAMPOLINES_NAME_PREFIX, pathname);\n    __RM_CACHE(SHARED_TEXT_PREFIX, pathname);\n    __RM_CACHE(RETADDR_MAPPING_PREFIX, pathname);\n    __RM_CACHE(CRASHPOINT_LOG_PREFIX, pathname);\n    __RM_CACHE(PIPE_FILENAME_PREFIX, pathname);\n    __RM_CACHE(PDISASM_FILENAME_PREFIX, pathname);\n    __RM_CACHE(METADATA_FILENAME_PREFIX, pathname);\n\n#undef __RM_CACHE\n}\n\n// check whether the binary and cached files are valid, and update the meta file\n// if needed.\nstatic void __core_check_binary(const char *pathname, RewritingOptArgs *opts) {\n    // step 1. check pathname\n    z_info(\"patch binary file: \\\"%s\\\"\", pathname);\n    if (z_strchr(pathname, '/')) {\n        // TODO: it is a ugly approach to check working directory, change it\n        // when possible\n        EXITME(\"please make sure \" OURTOOL\n               \" running under the same directory with the target bianry (no \"\n               \"slash symbol).\");\n    }\n\n    // step 2. collect metadate\n    Buffer *binary_buf = z_buffer_read_file(pathname);\n    GChecksum *checksum = g_checksum_new(G_CHECKSUM_MD5);\n    g_checksum_update(checksum, z_buffer_get_raw_buf(binary_buf),\n                      z_buffer_get_size(binary_buf));\n    const char *checksum_str = g_checksum_get_string(checksum);\n    z_info(\"MD5(%s) = %s\", pathname, checksum_str);\n\n    // step 3. check metadata if needed\n    const char *metadata_filename =\n        z_strcat(METADATA_FILENAME_PREFIX, pathname);\n    if (!z_access(metadata_filename, F_OK)) {\n        Buffer *metadata_buf = z_buffer_read_file(metadata_filename);\n        size_t metadata_size = z_buffer_get_size(metadata_buf);\n        const uint8_t *metadata = z_buffer_get_raw_buf(metadata_buf);\n\n        if (metadata_size !=\n            sizeof(RewritingOptArgs) + z_strlen(checksum_str) + 1) {\n            z_info(\"inconsistent size of cache metadata, remove cached files\");\n            __core_clean_cache(pathname);\n        } else if (memcmp(metadata, opts, sizeof(RewritingOptArgs))) {\n            z_info(\"inconsistent rewriting options, remove cached files\");\n            __core_clean_cache(pathname);\n        } else if (z_strcmp((const char *)metadata + sizeof(RewritingOptArgs),\n                            checksum_str)) {\n            z_info(\"inconsistent binaries, remove cached files\");\n            __core_clean_cache(pathname);\n        }\n\n        z_buffer_destroy(metadata_buf);\n    }\n\n    // step 4. update medadata file\n    {\n        Buffer *metadata_buf = z_buffer_create(NULL, 0);\n\n        z_buffer_append_raw(metadata_buf, (const uint8_t *)opts,\n                            sizeof(RewritingOptArgs));\n        z_buffer_append_raw(metadata_buf, (const uint8_t *)checksum_str,\n                            z_strlen(checksum_str));\n        z_buffer_push(metadata_buf, '\\x00');\n\n        z_buffer_write_file(metadata_buf, metadata_filename);\n\n        z_buffer_destroy(metadata_buf);\n    }\n\n    // step 5. free\n    g_checksum_free(checksum);\n    z_buffer_destroy(binary_buf);\n    z_free((void *)metadata_filename);\n}\n\n/*\n * Functions and Macros copied and pasted from AFL source code\n */\n#define __AFL_ROL64(_x, _r) \\\n    ((((uint64_t)(_x)) << (_r)) | (((uint64_t)(_x)) >> (64 - (_r))))\n\nZ_PRIVATE uint32_t __afl_hash32(const void *key, uint32_t len, uint32_t seed) {\n    const uint64_t *data = (uint64_t *)key;\n    uint64_t h1 = seed ^ len;\n\n    len >>= 3;\n\n    while (len--) {\n        uint64_t k1 = *data++;\n\n        k1 *= 0x87c37b91114253d5ULL;\n        k1 = __AFL_ROL64(k1, 31);\n        k1 *= 0x4cf5ad432745937fULL;\n\n        h1 ^= k1;\n        h1 = __AFL_ROL64(h1, 27);\n        h1 = h1 * 5 + 0x52dce729;\n    }\n\n    h1 ^= h1 >> 33;\n    h1 *= 0xff51afd7ed558ccdULL;\n    h1 ^= h1 >> 33;\n    h1 *= 0xc4ceb9fe1a85ec53ULL;\n    h1 ^= h1 >> 33;\n\n    return h1;\n}\n\n/*\n * Get the hash value of current afl bitmap\n */\nZ_PRIVATE uint32_t __core_get_bitmap_hash(Core *core);\n\n/*\n * Set clock for client timeout\n */\nZ_PRIVATE void __core_set_client_clock(Core *core, pid_t client_pid);\n\n/*\n * Cancel clock for client timeout\n */\nZ_PRIVATE void __core_cancel_client_clock(Core *core, pid_t client_pid);\n\n/*\n * Setup shared memory of CRS\n */\nZ_PRIVATE void __core_setup_shm(Core *core);\n\n/*\n * Setup shared memory of AFL\n */\nZ_PRIVATE void __core_setup_afl_shm(Core *core, int afl_shm_id);\n\n/*\n * Clean up\n */\nZ_PRIVATE void __core_clean_environment(Core *core);\n\n/*\n * Setup a unix domain socker for core\n */\nZ_PRIVATE void __core_setup_unix_domain_socket(Core *core);\n\n/*\n * Prepare a target binary under the current working directory\n */\nZ_PRIVATE const char *__core_prepare_binary_under_curdir(const char *pathname);\n\nZ_PRIVATE const char *__core_prepare_binary_under_curdir(const char *pathname) {\n    // check whether pathname exists\n    if (access(pathname, F_OK)) {\n        EXITME(\"file not found: %s\", pathname);\n    }\n\n    const char *last_slash = z_strrchr(pathname, '/');\n    if (!last_slash) {\n        return pathname;\n    }\n\n    // check new_pathname is valid\n    const char *new_pathname = last_slash + 1;\n    if (!new_pathname[0]) {\n        EXITME(\"please provide a file path instead of a directory one: %s\",\n               pathname);\n    }\n\n    // check whether new_pathname exists.\n    if (!access(new_pathname, F_OK)) {\n        // if so, check whether these two files are the same\n        struct stat statbuf, new_statbuf;\n        if (stat(pathname, &statbuf) || stat(new_pathname, &new_statbuf)) {\n            EXITME(\"cannot stat %s or %s\", pathname, new_pathname);\n        }\n\n        if (statbuf.st_ino == new_statbuf.st_ino) {\n            // nice, these two files are identical, and we do not need to do\n            // anything\n            return new_pathname;\n        }\n    }\n\n    // copy pathname to new_pathname\n    Buffer *tmp_buf = z_buffer_read_file(pathname);\n    z_buffer_write_file(tmp_buf, new_pathname);\n    z_buffer_destroy(tmp_buf);\n\n    // chmod\n    if (z_chmod(new_pathname, 0755)) {\n        EXITME(\"fail to chmod new binary: %s\", new_pathname);\n    }\n\n    return new_pathname;\n}\n\nZ_PRIVATE uint32_t __core_get_bitmap_hash(Core *core) {\n    if (!core->afl_trace_bits) {\n        // checking runs are not enabled\n        return 0;\n    } else {\n        return __afl_hash32(core->afl_trace_bits, AFL_MAP_SIZE, AFL_HASH_CONST);\n    }\n}\n\nZ_PRIVATE void __core_set_client_clock(Core *core, pid_t client_pid) {\n    core->client_pid = client_pid;\n    core->it.it_value.tv_sec = (core->opts->timeout / 1000);\n    core->it.it_value.tv_usec = (core->opts->timeout % 1000) * 1000;\n    setitimer(ITIMER_REAL, &core->it, NULL);\n}\n\nZ_PRIVATE void __core_cancel_client_clock(Core *core, pid_t client_pid) {\n    if (client_pid != core->client_pid) {\n        EXITME(\"inconsistent client_pid\");\n    }\n    core->client_pid = INVALID_PID;\n    core->it.it_value.tv_sec = 0;\n    core->it.it_value.tv_usec = 0;\n    setitimer(ITIMER_REAL, &core->it, NULL);\n}\n\nZ_PRIVATE void __core_setup_unix_domain_socket(Core *core) {\n    if (core->sock_fd != INVALID_FD) {\n        EXITME(\"multiple pipelines detected\");\n    }\n\n    // get pipe filename\n    ELF *e = z_binary_get_elf(core->binary);\n    const char *pipe_filename = z_elf_get_pipe_filename(e);\n\n    // check filename length\n    struct sockaddr_un server;\n    if (z_strlen(pipe_filename) >= sizeof(server.sun_path)) {\n        EXITME(\"pipe filename is too long: %s\", pipe_filename);\n    }\n\n    // set socket\n    core->sock_fd = socket(AF_UNIX, SOCK_STREAM, 0);\n    if (core->sock_fd < 0) {\n        EXITME(\"opening unix domain socket error\");\n    }\n    server.sun_family = AF_UNIX;\n    strcpy(server.sun_path, pipe_filename);\n\n    // bind socket\n    if (bind(core->sock_fd, (struct sockaddr *)&server,\n             sizeof(struct sockaddr_un))) {\n        EXITME(\"binding stream socket error\");\n    }\n}\n\nZ_PRIVATE void __core_setup_shm(Core *core) {\n    // step (0). check shared memory is already setup\n    if (core->shm_id != INVALID_SHM_ID) {\n        EXITME(\"multiple CRS shared memory detected\");\n    }\n\n    // step (1). set shared memory id\n    core->shm_id =\n        shmget(IPC_PRIVATE, CRS_MAP_SIZE, IPC_CREAT | IPC_EXCL | 0600);\n    if (core->shm_id < 0) {\n        EXITME(\"failed: shmget()\");\n    }\n\n    // step (2). set shared memory address\n    core->shm_addr = (addr_t)shmat(core->shm_id, NULL, 0);\n    if (core->shm_addr == INVALID_ADDR) {\n        EXITME(\"failed: shmat()\");\n    }\n}\n\nZ_PRIVATE void __core_setup_afl_shm(Core *core, int afl_shm_id) {\n    // initial checking\n    if (core->opts->check_execs == 0) {\n        EXITME(\"checking runs are disabled\");\n    }\n    if (!z_disassembler_fully_support_prob_disasm(core->disassembler)) {\n        EXITME(\n            \"checking runs are disabled when pdisasm is not fully supported\");\n    }\n    if (afl_shm_id == INVALID_SHM_ID) {\n        EXITME(\"invalid afl_shm_id\");\n    }\n\n    core->afl_trace_bits = shmat(afl_shm_id, NULL, 0);\n    if (core->afl_trace_bits == (void *)-1) {\n        EXITME(\"failed: shmat() for AFL\");\n    }\n\n    z_info(\"setup the shared memory of AFL at %p\", core->afl_trace_bits);\n}\n\nZ_PRIVATE void __core_clean_environment(Core *core) {\n    if (core->shm_id != INVALID_SHM_ID) {\n        // XXX: remove lock to avoid dead lock\n        CRS_INFO_BASE(core->shm_addr, lock) = 0;\n        shmctl(core->shm_id, IPC_RMID, NULL);\n        core->shm_id = INVALID_SHM_ID;\n        core->shm_addr = INVALID_ADDR;\n    }\n\n    if (core->sock_fd != INVALID_FD) {\n        close(core->sock_fd);\n        core->sock_fd = INVALID_FD;\n    }\n\n    ELF *e = z_binary_get_elf(core->binary);\n    const char *pipe_filename = z_elf_get_pipe_filename(e);\n    if (!z_access(pipe_filename, F_OK)) {\n        remove(pipe_filename);\n    }\n}\n\nZ_PUBLIC int z_core_perform_dry_run(Core *core, int argc, const char **argv) {\n    // update original file\n    const char *filename = z_binary_get_original_filename(core->binary);\n    assert(!z_strcmp(filename, argv[0]));\n\n    // create phantom file, instead of removing the original file\n    const char *patched_filename = z_strcat(filename, PATCHED_FILE_SUFFIX);\n    z_binary_save(core->binary, patched_filename);\n    z_info(\"start dry run: %s\", patched_filename);\n\n    // get .text information\n    ELF *e = z_binary_get_elf(core->binary);\n    Elf64_Shdr *text = z_elf_get_shdr_text(e);\n    addr_t text_addr = text->sh_addr;\n    size_t text_size = text->sh_size;\n\n    // prepare a shaow argv_ with argv[0] replaced by patched_filename\n    const char **argv_ = z_alloc(argc + 1, sizeof(const char *));\n    assert(!argv[argc]);  // the last pointer should be NULL\n    for (int i = 1; i <= argc; i++) {\n        argv_[i] = argv[i];\n    }\n    argv_[0] = patched_filename;\n\n#ifdef NDEBUG\n    int dev_null_fd = open(\"/dev/null\", O_RDWR);\n    if (dev_null_fd < 0) {\n        EXITME(\"unable to open /dev/null\");\n    }\n#endif\n\n    while (true) {\n        // we have to build new pipe each round, to avoid multi-thread problems\n        int st_pipe[2];\n        if (pipe(st_pipe) < 0) {\n            EXITME(\"pipe() failed\");\n        }\n\n        z_core_detach(core);\n        pid_t pid = fork();\n        if (pid == 0) {\n            // isolate the process and configure standard descriptors (including\n            // process group)\n            if (setsid() < 0) {\n                EXITME(\"setsid() failed\");\n            }\n\n            // child\n            if (dup2(st_pipe[1], CRS_DATA_FD) < 0) {\n                EXITME(\"dup2() failed\");\n            }\n\n            close(st_pipe[0]);\n            close(st_pipe[1]);\n#ifdef NDEBUG\n            dup2(dev_null_fd, 0);\n            dup2(dev_null_fd, 1);\n            dup2(dev_null_fd, 2);\n            close(dev_null_fd);\n#endif\n\n            // set LD_PRELOAD if needed\n            if (core->opts->r.safe_ret && getenv(\"STOCHFUZZ_PRELOAD\")) {\n                setenv(\"LD_PRELOAD\", getenv(\"STOCHFUZZ_PRELOAD\"), 1);\n            }\n\n            // set other environments including ASAN (copied from AFL)\n\n            /* This should improve performance a bit, since it stops the linker\n               from doing extra work post-fork(). */\n\n            if (!getenv(\"LD_BIND_LAZY\"))\n                setenv(\"LD_BIND_NOW\", \"1\", 0);\n\n            /* Set sane defaults for ASAN if nothing else specified. */\n\n            setenv(\"ASAN_OPTIONS\",\n                   \"abort_on_error=1:\"\n                   \"detect_leaks=0:\"\n                   \"symbolize=0:\"\n                   \"allocator_may_return_null=1\",\n                   0);\n\n            /* MSAN is tricky, because it doesn't support abort_on_error=1 at\n               this point. So, we do this in a very hacky way. */\n\n            // note: #define MSAN_ERROR 86 (in AFL)\n            setenv(\"MSAN_OPTIONS\",\n                   \"exit_code=86:\"\n                   \"symbolize=0:\"\n                   \"abort_on_error=1:\"\n                   \"allocator_may_return_null=1:\"\n                   \"msan_track_origins=0\",\n                   0);\n\n            execv(argv_[0], (char **)argv_);\n            exit(0);\n        } else {\n            // parent\n            z_trace(\"start child process [%d]\", pid);\n\n            close(st_pipe[1]);\n            int signal_fd = st_pipe[0];\n\n            // set clock\n            __core_set_client_clock(core, pid);\n\n            int status = 0;\n            if (waitpid(pid, &status, 0) < 0) {\n                EXITME(\"waitpid failed\");\n            }\n\n            // cancel clock\n            __core_cancel_client_clock(core, pid);\n\n            z_core_attach(core);\n\n            addr_t crash_rip = CRS_INVALID_IP;\n            // XXX: this read may fail when the status is not suspicious.\n            if (read(signal_fd, (char *)(&crash_rip), 8) == 8) {\n                // well received, we need to update status\n                status = PACK_STATUS(status, 1);\n            } else {\n                // re-init crash_rip\n                crash_rip = CRS_INVALID_IP;\n            }\n            close(st_pipe[0]);\n\n            z_info(\"child process exit with %#lx\", status);\n\n            uint32_t cov = __core_get_bitmap_hash(core);\n            CRSStatus crs_status = z_diagnoser_new_crashpoint(\n                core->diagnoser, status, crash_rip, cov, false);\n\n            if (crs_status == CRS_STATUS_CRASH ||\n                crs_status == CRS_STATUS_NORMAL) {\n                z_free(argv_);\n                z_free((char *)patched_filename);\n                return status;\n            }\n\n            // TODO: try to fix this somehow (no idea how currently)\n            if (crs_status == CRS_STATUS_DEBUG) {\n                // XXX: note that alought it is high likely that the self\n                // correction procedure works fine when the crash_rip is on\n                // .text sectoin, it is still possible that ASLR can cause some\n                // problems.\n                // TODO: handle the *extremely* corner case.\n                if (IS_SUSPECT_STATUS(status) &&\n                    (crash_rip < text_addr ||\n                     crash_rip >= text_addr + text_size)) {\n                    EXITME(\n                        \"self correction procedure under dry run mode is \"\n                        \"problematic due to ASLR\");\n                }\n            }\n        }\n    }\n}\n\nZ_PUBLIC Core *z_core_create(const char *pathname, SysOptArgs *opts) {\n    if (__core) {\n        EXITME(\"there can only be one Core instance\");\n    }\n\n    pathname = __core_prepare_binary_under_curdir(pathname);\n\n    __core_environment_setup();\n\n    __core_check_binary(pathname, &opts->r);\n\n    Core *core = STRUCT_ALLOC(Core);\n\n    core->opts = opts;\n\n    core->binary = z_binary_open(pathname, core->opts->r.instrument_early);\n    if (core->opts->r.safe_ret && !core->opts->r.instrument_early) {\n        ELF *e = z_binary_get_elf(core->binary);\n        if (z_elf_is_statically_linked(e)) {\n            z_warn(\n                \"it is a statically-linked ELF file, make sure you DO NOT set \"\n                \"LD_PRELOAD when running the phantom file.\");\n        }\n    }\n\n    core->disassembler = z_disassembler_create(core->binary, &core->opts->r);\n    core->rewriter = z_rewriter_create(core->disassembler, &core->opts->r);\n    core->patcher = z_patcher_create(core->disassembler, &core->opts->r);\n    core->diagnoser = z_diagnoser_create(core->patcher, core->rewriter,\n                                         core->disassembler, &core->opts->r);\n\n    z_diagnoser_read_crashpoint_log(core->diagnoser);\n\n    core->client_pid = INVALID_PID;\n    core->it.it_interval.tv_sec = 0;\n    core->it.it_interval.tv_usec = 0;\n    core->it.it_value.tv_sec = 0;\n    core->it.it_value.tv_usec = 0;\n\n    core->shm_id = INVALID_SHM_ID;\n    core->shm_addr = INVALID_ADDR;\n\n    core->afl_trace_bits = NULL;\n\n    core->sock_fd = INVALID_FD;\n\n    __core = core;\n\n    return core;\n}\n\nZ_PUBLIC void z_core_activate(Core *core) {\n    z_patcher_initially_patch(core->patcher);\n\n    z_rewriter_initially_rewrite(core->rewriter);\n\n    // XXX: it seems not a good idea to do pre-disassembly (linear-disassembly)\n    // due to the heavy overhead of forking a process\n    // z_rewriter_heuristics_rewrite(core->rewriter);\n\n    z_diagnoser_apply_logged_crashpoints(core->diagnoser);\n}\n\nZ_PUBLIC void z_core_destroy(Core *core) {\n    if (!__core) {\n        EXITME(\"detected an unrestrained core object\");\n    }\n\n    __core_clean_environment(core);\n\n    z_diagnoser_write_crashpoint_log(core->diagnoser);\n\n    z_diagnoser_destroy(core->diagnoser);\n    z_patcher_destroy(core->patcher);\n    z_rewriter_destroy(core->rewriter);\n    z_disassembler_destroy(core->disassembler);\n    z_binary_destroy(core->binary);\n\n    z_free(core);\n\n    __core = NULL;\n}\n\nZ_PUBLIC void z_core_detach(Core *core) {\n    z_binary_set_elf_state(core->binary, ELFSTATE_DISABLE | ELFSTATE_CONNECTED);\n}\n\nZ_PUBLIC void z_core_attach(Core *core) {\n    z_binary_set_elf_state(core->binary, ELFSTATE_CONNECTED);\n}\n\nZ_PUBLIC void z_core_start_daemon(Core *core, int notify_fd) {\n    const char *filename = z_binary_get_original_filename(core->binary);\n\n    // first dry run w/o any parameter to find some crashpoint during init\n    // XXX: dry run must be performed before setting up shm\n    // XXX: when -e option is given, we do not need to perform such dry runs\n    if (!core->opts->r.instrument_early) {\n        // before dry run, we first patch the main function as directly\n        // returning. As such, we can try our best to avoid the error diagnosis\n        // during dry run\n        addr_t shadow_main_addr = z_binary_get_shadow_main(core->binary);\n        uint8_t ret_byte = 0xc3;\n        uint8_t ori_byte = 0;\n        z_patcher_unsafe_patch(core->patcher, shadow_main_addr, 1, &ret_byte,\n                               &ori_byte);\n\n        const char *argv[2] = {NULL, NULL};\n        argv[0] = filename;\n        z_core_perform_dry_run(core, 1, argv);\n\n        // repair the main\n        z_patcher_unsafe_patch(core->patcher, shadow_main_addr, 1, &ori_byte,\n                               NULL);\n    }\n\n    // create phantom file, instead of removing the original file\n    const char *phantom_filename = z_strcat(filename, PHANTOM_FILE_SUFFIX);\n    z_binary_create_snapshot(core->binary, phantom_filename);\n    z_info(\n        \"phantom file is create, please execute %s to communicate with the \"\n        \"daemon\",\n        phantom_filename);\n    z_free((char *)phantom_filename);\n\n    __core_setup_shm(core);\n    __core_setup_unix_domain_socket(core);\n\n    /*\n     * Main body to handle on-the-fly patch\n     */\n    // step (0). listen on core->sock_fd\n    if (listen(core->sock_fd, 1)) {\n        EXITME(\"listen unix domain socket failed\");\n    }\n\n    // step (1). comm connection\n    //  step (1.0). notify if necessar\n    if (notify_fd != INVALID_FD) {\n        if (write(notify_fd, &core->sock_fd, 4) != 4) {\n            EXITME(\"fail to notify parent process\");\n        }\n        close(notify_fd);\n        notify_fd = INVALID_FD;\n    }\n    //  step (1.1). wait connection\n    int comm_fd = accept(core->sock_fd, NULL, NULL);\n    z_info(\"daemon gets connection for comm\");\n    //  step (1.2). handshake:\n    //      * send out shm_id\n    //      * recv afl_attached\n    //      * recv afl_shm_id\n    //      * send core->opts->check_execs (useless when AFL is not attached)\n    int afl_attached = 0;\n    int afl_shm_id = INVALID_SHM_ID;\n    // checking runs are enabled only if\n    //      * AFL is attached\n    //      * Prob Disassembly is fully supported\n    //      * core->opts->check_execs is not zero\n    bool check_run_enabled = false;\n    {\n        assert(sizeof(core->shm_id) == 4);\n        if (write(comm_fd, &core->shm_id, sizeof(core->shm_id)) !=\n            sizeof(core->shm_id)) {\n            EXITME(\"fail to send shm_id\");\n        }\n        if (read(comm_fd, &afl_attached, 4) != 4) {\n            EXITME(\"fail to recv afl_attached\");\n        }\n\n        // update checking run information based on whether AFL is attached\n        check_run_enabled =\n            !!(afl_attached &&\n               z_disassembler_fully_support_prob_disasm(core->disassembler) &&\n               core->opts->check_execs > 0);\n        uint32_t check_execs =\n            (check_run_enabled ? core->opts->check_execs : 0);\n\n        if (read(comm_fd, &afl_shm_id, sizeof(afl_shm_id)) !=\n            sizeof(afl_shm_id)) {\n            EXITME(\"fail to recv alf_shm_id\");\n        }\n        if (write(comm_fd, &check_execs, 4) != 4) {\n            EXITME(\"fail to send check_execs\");\n        }\n\n        // simple validation\n        if (afl_attached && afl_shm_id == INVALID_SHM_ID) {\n            EXITME(\"AFL is attached but the daemon does not get AFL_SHM_ID\");\n        }\n        if (!afl_attached && afl_shm_id != INVALID_SHM_ID) {\n            EXITME(\"AFL is notattached but the daemon gets AFL_SHM_ID\");\n        }\n        if (check_run_enabled && !afl_attached) {\n            EXITME(\"checking runs are only enabled when AFL is attched\");\n        }\n    }\n\n    // step (2). output basic information and setup AFL shared memory\n    if (afl_attached) {\n        z_info(\"AFL detected: %d\", afl_attached);\n        if (check_run_enabled) {\n            // XXX: we only setup the shared memory for AFL when checking runs\n            // are enabled\n            // XXX: in other words, core->afl_trace_bits indicates whether the\n            // checking runs are enabled or not\n            __core_setup_afl_shm(core, afl_shm_id);\n        }\n    } else {\n        z_info(\"no AFL attached: %d\", afl_attached);\n    }\n    z_info(\"daemon handshake successes\");\n\n    // step (3). communicate with the client\n    //      + if it is not a crash (normal exit), directly stop the daemon. note\n    //      that when AFL is attached, no any normal status can be recevied;\n    //      + if it is a real crash, the daemon sends CRS_STATUS_CRASH to notify\n    //      the client, and (a.) stop the daemon when AFL is not attached or\n    //      (b.) continue a new round when AFL is attached;\n    //      + if it is a patch crash, the daemon sends\n    //      CRS_STATUS_NOTHING/_REMMAP to guide the client do the on-the-fly\n    //      patch.\n    while (true) {\n        /*\n         * step (3.1). recv program status from the client\n         */\n        int status = 0;\n        if (read(comm_fd, &status, 4) != 4) {\n            EXITME(\"fail to recv status\");\n        }\n        if (WIFSIGNALED(status)) {\n            z_info(\"get status code: %#x (signal: %d)\", status,\n                   WTERMSIG(status));\n        } else if (WIFEXITED(status)) {\n            z_info(\"get status code: %#x (exit: %d)\", status,\n                   WEXITSTATUS(status));\n        } else {\n            // I have been confused by the status handling for a long time at\n            // the early time, so I comment it down here for convenience.\n            //\n            // XXX: theoretically, this branch happens only when\n            // WTERMSIG(status) == 0x7f, which covers WIFSTOPPED(status) see:\n            //\n            //  * WTERMSIG(status)    = ((status) & 0x7f)\n            //  * WIFEXITED(status)   = (WTERMSIG(status) == 0)\n            //  * WIFSIGNALED(status) =\n            //              (((signed char) (((status) & 0x7f) + 1) >> 1) > 0)\n            //  * WIFSTOPPED(status)  = (((status) & 0xff) == 0x7f)\n            //\n            // It is very interesting to see how glibc construct such status:\n            //\n            //  For WTERMSIG(status) and WIFEXITED(status):\n            //      * __W_EXITCODE(ret, sig) = ((ret) << 8 | (sig))\n            //  For WIFSTOPPED(status):\n            //      * __W_STOPCODE(sig) = ((sig) << 8 | 0x7f)\n            //\n            z_info(\"get status code: %#x (stopped? signal: %d)\", status,\n                   WSTOPSIG(status));\n        }\n\n        /*\n         * step (3.2). get crash rip and coverage\n         */\n        addr_t crash_rip = CRS_INFO_BASE(core->shm_addr, crash_ip);\n        CRS_INFO_BASE(core->shm_addr, crash_ip) = CRS_INVALID_IP;\n\n        uint32_t cov = __core_get_bitmap_hash(core);\n\n        /*\n         * step (3.3). check returning status and get patch commands\n         */\n        // XXX: we use int to guarantee a 4-byte integer\n        int crs_status = z_diagnoser_new_crashpoint(\n            core->diagnoser, status, crash_rip, cov, check_run_enabled);\n\n        if (crs_status == CRS_STATUS_CRASH) {\n            if (write(comm_fd, &crs_status, 4) != 4) {\n                EXITME(\"fail to notify real crash\");\n            }\n            goto NOT_PATCHED_CRASH;\n        }\n\n        if (crs_status == CRS_STATUS_NORMAL) {\n            if (check_run_enabled) {\n                // notify the fork server about the result of checking runs\n                if (write(comm_fd, &crs_status, 4) != 4) {\n                    EXITME(\"fail to notify real crash\");\n                }\n            } else if (afl_attached) {\n                EXITME(\n                    \"CRS_STATUS_NORMAL is invalid when afl is attached but \"\n                    \"checking runs are disabled\");\n            }\n            goto NOT_PATCHED_CRASH;\n        }\n\n        /*\n         * step (3.4). sync binary\n         */\n        // XXX: according to the following link, it seems the fsync is used to\n        // sync changed pages from RAM to the file. It means, those changes made\n        // by the daemon is already visible to the phantom file even without\n        // fsync. Hence, to improve the performance when the underlying files\n        // are relatively large, we disable the fsync.\n        //\n        // https://unix.stackexchange.com/questions/474946/are-sharing-a-memory-mapped-file-and-sharing-a-memory-region-implemented-based-o\n        //\n        // z_binary_fsync(core->binary);\n\n        /*\n         * step (3.5). send status\n         */\n        if (write(comm_fd, &crs_status, 4) != 4) {\n            EXITME(\"fail to send crs status\");\n        }\n\n        /*\n         * step (3.6). continue on patching while checking timeout\n         */\n        {\n            // step (3.6.1). set clock\n            pid_t client_pid = INVALID_PID;\n            if (read(comm_fd, &client_pid, 4) != 4) {\n                EXITME(\"fail to recv client_pid [befor execution]\");\n            }\n            __core_set_client_clock(core, client_pid);\n\n            // step (3.6.2). cancel clock\n            if (read(comm_fd, &client_pid, 4) != 4) {\n                EXITME(\"fail to recv client_pid [after execution]\");\n            }\n            __core_cancel_client_clock(core, client_pid);\n        }\n        // step (3.6.3). continue\n        continue;\n\n    NOT_PATCHED_CRASH:\n        if (!afl_attached) {\n            goto DAEMON_STOP;\n        }\n    }\n\nDAEMON_STOP:\n    __core_clean_environment(core);\n}\n"
  },
  {
    "path": "src/core.h",
    "content": "/*\n * core.h\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n/*\n * Backend of OURTOOL\n */\n#ifndef __CORE_H\n#define __CORE_H\n\n#include \"binary.h\"\n#include \"buffer.h\"\n#include \"config.h\"\n#include \"diagnoser.h\"\n#include \"disassembler.h\"\n#include \"patcher.h\"\n#include \"rewriter.h\"\n#include \"sys_optarg.h\"\n\n#include <gmodule.h>\n\n#include <sys/time.h>\n\n/*\n * Core\n */\nSTRUCT(Core, {\n    Binary *binary;\n    Disassembler *disassembler;\n    Patcher *patcher;\n    Rewriter *rewriter;\n    Diagnoser *diagnoser;\n\n    // timeout info\n    pid_t client_pid;\n    struct itimerval it;\n\n    // shared memory information\n    int shm_id;\n    addr_t shm_addr;\n\n    // shared memory of AFL\n    uint8_t *afl_trace_bits;\n\n    // unix domain information\n    int sock_fd;\n\n    // system otpargs\n    SysOptArgs *opts;\n});\n\n/*\n * Dry run without starting any server\n */\nZ_PUBLIC int z_core_perform_dry_run(Core *core, int argc, const char **argv);\n\n/*\n * Start a daemon server to automatically patch any running program\n * (note that only one connection at a time)\n */\nZ_PUBLIC void z_core_start_daemon(Core *core, int notify_fd);\n\n/*\n * Create OURTOOL Core\n */\nZ_PUBLIC Core *z_core_create(const char *pathname, SysOptArgs *opts);\n\n/*\n * Destroy OURTOOL Core\n */\nZ_PUBLIC void z_core_destroy(Core *core);\n\n/*\n * Activate core analysis\n */\nZ_PUBLIC void z_core_activate(Core *core);\n\n/*\n * Disattach core from underlaying executable\n */\nZ_PUBLIC void z_core_detach(Core *core);\n\n/*\n * Attach core to attach to its underlaying executable\n */\nZ_PUBLIC void z_core_attach(Core *core);\n\n#endif\n"
  },
  {
    "path": "src/crs_config.h",
    "content": "/*\n * crs_config.h\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n/*\n * CRS (Crash Site) configuration\n */\n#ifndef __CRS_CONFIG_H\n#define __CRS_CONFIG_H\n\n#include \"afl_config.h\"\n\ntypedef enum crs_status_t {\n    CRS_STATUS_NOTHING,  // nothing to do for fork server\n    CRS_STATUS_REMMAP,   // fork server needs to remmap shadow code\n    CRS_STATUS_DEBUG,    // the program are set into delta debugging mode\n    CRS_STATUS_CRASH,    // a crash in the subject program\n    CRS_STATUS_NORMAL,   // normal exit without crash\n} CRSStatus;\n\n/*\n * [CRS_INFO] The crash site information needed by self-patching\n */\ntypedef struct __crs_info_t {\n    uint32_t lock;\n    addr_t crash_ip;\n    size_t self_fired;\n} __CRSInfo;\n\n#define CRS_MAP_SIZE_POW2 PAGE_SIZE_POW2\n#define CRS_MAP_SIZE (1 << CRS_MAP_SIZE_POW2)\n#define CRS_MAP_ADDR (AFL_MAP_ADDR + AFL_MAP_SIZE)\n\n#define CRS_USED_SIZE sizeof(__CRSInfo)\n\n#define CRS_INFO(field) (((__CRSInfo *)CRS_MAP_ADDR)->field)\n#define CRS_INFO_BASE(addr, field) (((__CRSInfo *)(addr))->field)\n#define CRS_INFO_ADDR(f) (CRS_MAP_ADDR + offsetof(__CRSInfo, f))\n\n#define CRS_COMM_FD 222\n\n// TODO: CRS_DATA_FD is only used in dry run since now. But dry run does need a\n// better communication approach in the future.\n#define CRS_DATA_FD 233\n\n#define CRS_INVALID_IP 0x1996083019961219\n\n#endif\n"
  },
  {
    "path": "src/diagnoser.c",
    "content": "/*\n * diagnoser.c\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n#include \"diagnoser.h\"\n#include \"utils.h\"\n\n/*\n * Perform delta debugging to locate rewriting errors\n */\nZ_PRIVATE CRSStatus __diagnoser_delta_debug(Diagnoser *g, int status,\n                                            addr_t addr, uint32_t cov);\n\n/*\n * Handler a single crashpoint (the real function while handles patching).\n */\nZ_PRIVATE void __diagnoser_handle_single_crashpoint(Diagnoser *g, addr_t addr,\n                                                    CPType type, bool is_real,\n                                                    bool need_log);\n\n/*\n * Validate a crashpoint, return INVALID_ADDR if it is an unintentional crash\n */\nZ_PRIVATE addr_t __diagnoser_validate_crashpoint(Diagnoser *g, addr_t addr);\n\n/*\n * Get the CPType of the given crashpoint\n */\nZ_PRIVATE CPType __diagnoser_get_crashpoint_type(Diagnoser *g, addr_t addr,\n                                                 addr_t real_addr);\n\n/*\n * Patch the intentional crash\n */\nZ_PRIVATE void __diagnoser_patch_crashpoint(Diagnoser *g, addr_t addr,\n                                            CPType cp_type);\n\n/*\n * Getter and Setter\n */\nDEFINE_GETTER(Diagnoser, diagnoser, GQueue *, crashpoints);\n\n// XXX: this function is only used for those new crashpoints detected during\n// execution.\nZ_PRIVATE void __diagnoser_patch_crashpoint(Diagnoser *g, addr_t addr,\n                                            CPType cp_type) {\n    if (cp_type == CP_RETADDR) {\n        // for CP_RETADDR, we want to also update other retaddrs who share the\n        // same callee with the found one\n\n        Buffer *retaddrs = z_rewriter_new_validate_retaddr(g->rewriter, addr);\n        size_t n = z_buffer_get_size(retaddrs) / sizeof(addr_t);\n        addr_t *addrs = (addr_t *)z_buffer_get_raw_buf(retaddrs);\n        z_info(\"we found %d CP_RETADDR sharing the same callee\", n);\n\n        // we first patch the addr itself as real crashpoint\n        __diagnoser_handle_single_crashpoint(g, addr, CP_RETADDR, true, true);\n\n        // we then patch other retaddrs sharing with the same callee\n        for (int i = 0; i < n; i++) {\n            if (addrs[i] == addr) {\n                continue;\n            }\n\n            // XXX: note that the following check is very necessary. Although\n            // CP_RETADDR cannot be an internal PP_BRIDGE (i.e., overlapping\n            // bridge), it can be a PP_BRIDGE after the patched jmp instruction.\n            if (z_patcher_check_patchpoint(g->patcher, addrs[i]) == PP_BRIDGE) {\n                continue;\n            }\n\n            __diagnoser_handle_single_crashpoint(g, addrs[i], CP_RETADDR, false,\n                                                 true);\n        }\n\n        z_buffer_destroy(retaddrs);\n    } else {\n        __diagnoser_handle_single_crashpoint(g, addr, cp_type, true, true);\n    }\n}\n\nZ_PRIVATE CPType __diagnoser_get_crashpoint_type(Diagnoser *g, addr_t addr,\n                                                 addr_t real_addr) {\n    if ((int64_t)addr < 0) {\n        z_info(\"find new address [internal]: \" COLOR(GREEN, \"%#lx\"), addr);\n        return CP_INTERNAL;\n    } else {\n        // XXX: retaddr patch may cause crash when enabling pdisasm.\n        // XXX: note that if diagnoser does not generate any CP_RETADDR, all\n        // ret-related functions of rewriter will not be invoked and no\n        // callee will be regarded as returnable. That is why this check is\n        // extremely important.\n        if (!z_disassembler_fully_support_prob_disasm(g->disassembler) &&\n            z_rewriter_check_retaddr_crashpoint(g->rewriter, real_addr) &&\n            real_addr == addr) {\n            z_info(\"find new address [retaddr]: \" COLOR(GREEN, \"%#lx\"),\n                   real_addr);\n            return CP_RETADDR;\n        } else {\n            z_info(\"find new address [external]: \" COLOR(GREEN, \"%#lx\"),\n                   real_addr);\n            return CP_EXTERNAL;\n        }\n    }\n}\n\nZ_PRIVATE addr_t __diagnoser_validate_crashpoint(Diagnoser *g, addr_t addr) {\n    assert(g != NULL);\n\n    // step (1). check INVALID_ADDR\n    if (addr == INVALID_ADDR) {\n        return INVALID_ADDR;\n    }\n\n    // step (2). validate addr by different type\n    if ((int64_t)addr < 0) {\n        // it is caused by a missed ujmp/ucall entry\n        addr = (~addr) + 1;\n        if (z_disassembler_is_within_disasm_range(g->disassembler, addr) &&\n            !z_disassembler_is_potential_inst_entrypoint(g->disassembler,\n                                                         addr)) {\n            return addr;\n        } else {\n            return INVALID_ADDR;\n        }\n    } else {\n        // it is cause by patch\n        if (z_patcher_check_patchpoint(g->patcher, addr) == PP_INVALID) {\n            return INVALID_ADDR;\n        } else {\n            return addr;\n        }\n    }\n}\n\n// XXX: addr must be an adjusted address if needed\nZ_PRIVATE void __diagnoser_handle_single_crashpoint(Diagnoser *g, addr_t addr,\n                                                    CPType type, bool is_real,\n                                                    bool need_log) {\n    if (type != CP_RETADDR) {\n        // The recursive disassembly treats all library function as returnable.\n        z_rewriter_rewrite(g->rewriter, addr);\n    }\n\n    if (type != CP_INTERNAL) {\n        // XXX: note that if it is a retaddr crashpoint, its corresponding\n        // shadow code should not start with an AFL trampoline.\n        addr_t shadow_addr = z_rewriter_get_shadow_addr(g->rewriter, addr);\n        assert(shadow_addr != INVALID_ADDR);\n        z_patcher_build_bridge(g->patcher, addr, shadow_addr, is_real);\n    }\n\n    if (need_log) {\n        g_queue_push_tail(g->crashpoints, GSIZE_TO_POINTER(addr));\n        g_queue_push_tail(g->crashpoints, GSIZE_TO_POINTER(type));\n        g_queue_push_tail(g->crashpoints, GINT_TO_POINTER(!!is_real));\n    }\n}\n\n// XXX: it is highly recommended to specify a timeout (>= 1000ms, or >=\n// AFL_HANG_TMOUT if set) for AFL by its -t option. Otherwise, the auto-scaled\n// timeout may cause incorrect error diagnosis (e.g., the dd_status may change\n// when timeout). more information can be found at\n// https://github.com/google/AFL/blob/master/afl-fuzz.c#L3244\n// XXX: note that we currently downgrade the delta debugging into a more\n// efficient dup-binary-search. This simplified algorithm works well as the\n// unintentional crash is caused by a single bad patch in most cases. The delta\n// debugging algorithm can be easily brought back if necessary.\n\n/*\n * XXX: to explain how and why the simplified algorithm works well, we first\n * need to give a definition about *key patch*.\n *\n * Key patch means if we remove this patch, the original unintentional crash\n * cannot be reproduced.\n *\n * The simplified algorithm works by first finding the last *key patch*. It\n * ignores all the patches after the last key patch. Then it checks if the\n * unintentional crash can be reproduced by only keeping the last DD_RANGE\n * uncertain patches(e.g., if the last key patch is 54-th patch and DD_RANGE ==\n * 32, then we only keep the 22-nd to 54-th patches).\n *\n * If the crash can be reproduced, it means all the rewriting errors are in the\n * DD_RANGE. Then it use binary search to find the first key patch and regard\n * all patches between the first and the last key patch are rewriting errors.\n *\n * If the crash cannot be repoduced, it only regards the last key patch as an\n * error and re-runs the program to detect other rewriting errors.\n *\n * The algorithm works beacuse of the following two observatoins.\n *\n * The first observation is that, all key patches must be rewritting erros. It\n * is because the correct patches are applied on the instructions and such\n * patches can only trigger intentional crashes (note that we can safeguard\n * non-crashing rewriting errors).\n *\n * The second observation is that, in most cases, an unintentional crash is\n * caused by a single rewriting error or a few continuous errors. It is because\n * the program is very sensitive to incorrect data flow. Once the data flow is\n * randomly polluted, the program is going to crash very soon.\n */\nZ_PRIVATE CRSStatus __diagnoser_delta_debug(Diagnoser *g, int status,\n                                            addr_t addr, uint32_t cov) {\n#define __UPDATE_STAGE_AND_RETURN(stage, ret) \\\n    do {                                      \\\n        g->dd_stage = (stage);                \\\n        return (ret);                         \\\n    } while (0)\n\n    if (!z_disassembler_fully_support_prob_disasm(g->disassembler)) {\n        assert(g->dd_stage == DD_NONE);\n        assert(IS_ABNORMAL_STATUS(status));\n        // XXX: this cannot be caused by checking runs\n        __UPDATE_STAGE_AND_RETURN(DD_NONE, CRS_STATUS_CRASH);\n    }\n\n    // XXX: it is very improtant to change addr to CRS_INVALID_IP, because for\n    // non-suspect status, addr is meaningless. Additionally, when it is caused\n    // by timeout, cov may vary. See Undecided Changes in the documents for more\n    // information.\n    if (IS_SUSPECT_STATUS(status)) {\n        z_info(\"suspect status (%d) at %#lx [cov: %#x]\", status, addr, cov);\n    } else if (IS_TIMEOUT_STATUS(status)) {\n        // XXX: for timeouted process, both addr and cov are useless\n        addr = CRS_INVALID_IP;\n        cov = 0;\n        z_info(\"timeout status (%d)\", status);\n    } else {\n        addr = CRS_INVALID_IP;\n        z_info(\"non-suspect status (%d) [cov: %#x]\", status, cov);\n    }\n\n    if (g->dd_stage == DD_NONE) {\n        // step (0). distinguish real crashes and checking runs\n        if (IS_ABNORMAL_STATUS(status)) {\n            g->dd_crs_status = CRS_STATUS_CRASH;\n            g->dd_banner =\n                COLOR(RED, \"a latent bug at %#lx with status %d [cov: %#x]\");\n        } else {\n            g->dd_crs_status = CRS_STATUS_NORMAL;\n            g->dd_banner = COLOR(\n                GREEN,\n                \"a passed checking run at %#lx with status %d [cov: %#x]\");\n        }\n\n        // step (1). check whether there is any uncertain patches\n        size_t n = z_patcher_uncertain_patches_n(g->patcher);\n        if (!n) {\n            // we do not need to wrap up the self correction procedure of\n            // patcher here, because it has not been started.\n            __UPDATE_STAGE_AND_RETURN(DD_NONE, g->dd_crs_status);\n        }\n\n        // step (2). set dd_status, dd_addr, and dd_cov\n        g->dd_status = status;\n        g->dd_addr = addr;\n        g->dd_cov = cov;\n\n        // step (3). enable delta debugging for patcher\n        g->dd_high = n;\n        z_patcher_self_correction_start(g->patcher);\n\n        // step (4). disable all uncertain patches\n        z_patcher_flip_uncertain_patches(g->patcher, false, -n);\n\n        // step (5). update dd_stage and return\n        __UPDATE_STAGE_AND_RETURN(DD_STAGE0, CRS_STATUS_DEBUG);\n    }\n\n    if (g->dd_stage == DD_STAGE0) {\n        // step (1). check whether the unintentional crash can be reproduced, if\n        // so, we can determine it is caused by a latent bug.\n        if (status == g->dd_status && addr == g->dd_addr && cov == g->dd_cov) {\n            z_info(g->dd_banner, addr, status, cov);\n            z_patcher_self_correction_end(g->patcher);\n            __UPDATE_STAGE_AND_RETURN(DD_NONE, g->dd_crs_status);\n        }\n\n        // step (2). it is caused by a rewriting error, let's setup the error\n        // diagnosis.\n        z_info(\"we encounter a rewriting error, let's do error diagnosis\");\n        g->dd_low = 0;\n        g->dd_e_cur = 0;\n\n        // step (3). set the mid for e_iter, and update e_iter\n        int64_t mid = (g->dd_low + g->dd_high) >> 1;\n        z_patcher_flip_uncertain_patches(g->patcher, false, mid - g->dd_e_cur);\n        g->dd_e_cur = mid;\n\n        // step (4). update stage and return\n        __UPDATE_STAGE_AND_RETURN(DD_STAGE1, CRS_STATUS_DEBUG);\n    }\n\n    if (g->dd_stage == DD_STAGE1) {\n        // step (1). update dd_low and dd_high\n        if (status == g->dd_status && addr == g->dd_addr && cov == g->dd_cov) {\n            z_info(\n                \"error diagnosis stage 1: test uncertain patches within [0, \"\n                \"%ld), reproduced: \" COLOR(GREEN, \"true\"),\n                g->dd_e_cur);\n            g->dd_high = g->dd_e_cur;\n        } else {\n            z_info(\n                \"error diagnosis stage 1: test uncertain patches within [0, \"\n                \"%ld), reproduced: \" COLOR(RED, \"false\"),\n                g->dd_e_cur);\n            g->dd_low = g->dd_e_cur;\n        }\n\n        assert(g->dd_low != g->dd_high);\n\n        // step (2). binary search\n        if (g->dd_low + 1 == g->dd_high) {\n            // step (2.1.1). the binary search is done, move e_iter to\n            // g->dd_high\n            z_patcher_flip_uncertain_patches(g->patcher, false,\n                                             g->dd_high - g->dd_e_cur);\n            g->dd_e_cur = g->dd_high;\n            assert(g->dd_e_cur > 0);\n\n            // step (2.1.2). check whether we need to go into DD_STAGE2\n            if (g->dd_e_cur <= DD_RANGE) {\n                // setup the binary search for s_iter\n                g->dd_low = 0;\n                g->dd_high = g->dd_e_cur;\n                g->dd_s_cur = g->dd_low;\n\n                // ready for s_iter binary search\n                int64_t mid = (g->dd_low + g->dd_high) >> 1;\n                z_patcher_flip_uncertain_patches(g->patcher, true,\n                                                 mid - g->dd_s_cur);\n                g->dd_s_cur = mid;\n                __UPDATE_STAGE_AND_RETURN(DD_STAGE3, CRS_STATUS_DEBUG);\n            } else {\n                g->dd_s_cur = 0;\n                int64_t target = g->dd_e_cur - DD_RANGE;\n                z_patcher_flip_uncertain_patches(g->patcher, true,\n                                                 target - g->dd_s_cur);\n                g->dd_s_cur = target;\n                __UPDATE_STAGE_AND_RETURN(DD_STAGE2, CRS_STATUS_DEBUG);\n            }\n        } else {\n            // step (2.2.1). set the mid for e_iter, and update e_iter\n            int64_t mid = (g->dd_low + g->dd_high) >> 1;\n            z_patcher_flip_uncertain_patches(g->patcher, false,\n                                             mid - g->dd_e_cur);\n            g->dd_e_cur = mid;\n\n            // step (2.2.2). update stage and return\n            __UPDATE_STAGE_AND_RETURN(DD_STAGE1, CRS_STATUS_DEBUG);\n        }\n    }\n\n    if (g->dd_stage == DD_STAGE2) {\n        if (status == g->dd_status && addr == g->dd_addr && cov == g->dd_cov) {\n            z_info(\n                \"error diagnosis stage 2: dup-binary-search works for [%ld, \"\n                \"%ld)\",\n                g->dd_s_cur, g->dd_e_cur);\n\n            // goto DD_STAGE3 for s_iter binary search\n            g->dd_low = g->dd_s_cur;\n            g->dd_high = g->dd_e_cur;\n\n            int64_t mid = (g->dd_low + g->dd_high) >> 1;\n            z_patcher_flip_uncertain_patches(g->patcher, true,\n                                             mid - g->dd_s_cur);\n            g->dd_s_cur = mid;\n            __UPDATE_STAGE_AND_RETURN(DD_STAGE3, CRS_STATUS_DEBUG);\n        } else {\n            // this branch means the distance between two rewriting errors are\n            // relatively large. So we first repair the last rewriting error.\n            z_info(\n                \"error diagnosis stage 2: the distance between two errors is \"\n                \"large, let's first repair \"\n                \"the last one: [%ld, %ld)\",\n                g->dd_e_cur - 1, g->dd_e_cur);\n            assert(g->dd_e_cur - 1 >= g->dd_s_cur);\n            z_patcher_flip_uncertain_patches(g->patcher, true,\n                                             (g->dd_e_cur - 1) - g->dd_s_cur);\n            z_patcher_self_correction_end(g->patcher);\n            // TODO: for checking runs, in this case, we can actually return a\n            // CRS_STATUS_DEBUG to force the fork server to re-run the checking\n            // run.\n            __UPDATE_STAGE_AND_RETURN(DD_NONE, CRS_STATUS_NOTHING);\n        }\n    }\n\n    if (g->dd_stage == DD_STAGE3) {\n        // step (1). update dd_low and dd_high\n        if (status == g->dd_status && addr == g->dd_addr && cov == g->dd_cov) {\n            z_info(\n                \"error diagnosis stage 3: test uncertain patches within [%ld, \"\n                \"%ld), reproduced: \" COLOR(GREEN, \"true\"),\n                g->dd_s_cur, g->dd_e_cur);\n            g->dd_low = g->dd_s_cur;\n        } else {\n            z_info(\n                \"error diagnosis stage 3: test uncertain patches within [%ld, \"\n                \"%ld), reproduced: \" COLOR(RED, \"false\"),\n                g->dd_s_cur, g->dd_e_cur);\n            g->dd_high = g->dd_s_cur;\n        }\n\n        assert(g->dd_low != g->dd_high);\n\n        // step (2). check whether the procedure is done\n        if (g->dd_low + 1 == g->dd_high) {\n            z_patcher_flip_uncertain_patches(g->patcher, true,\n                                             g->dd_low - g->dd_s_cur);\n            g->dd_s_cur = g->dd_low;\n            z_info(\"locate the error: [%ld, %ld)\", g->dd_s_cur, g->dd_e_cur);\n            z_patcher_self_correction_end(g->patcher);\n            __UPDATE_STAGE_AND_RETURN(DD_NONE, CRS_STATUS_NOTHING);\n        }\n\n        // step (3). continue binary search\n        int64_t mid = (g->dd_low + g->dd_high) >> 1;\n        z_patcher_flip_uncertain_patches(g->patcher, true, mid - g->dd_s_cur);\n        g->dd_s_cur = mid;\n        __UPDATE_STAGE_AND_RETURN(DD_STAGE3, CRS_STATUS_DEBUG);\n    }\n\n    EXITME(\"unreachable code\");\n    return g->dd_crs_status;  // used to emit warnings\n\n#undef __UPDATE_STAGE_AND_RETURN\n}\n\nZ_API Diagnoser *z_diagnoser_create(Patcher *patcher, Rewriter *rewriter,\n                                    Disassembler *disassembler,\n                                    RewritingOptArgs *opts) {\n    Diagnoser *g = STRUCT_ALLOC(Diagnoser);\n\n    g->opts = opts;\n\n    g->binary = z_disassembler_get_binary(disassembler);\n\n    g->patcher = patcher;\n    g->rewriter = rewriter;\n    g->disassembler = disassembler;\n\n    // all other DD-related fields will be initilized when enabling DD.\n    g->dd_stage = DD_NONE;\n\n    g->crashpoints = g_queue_new();\n\n    const char *binary_filename = z_binary_get_original_filename(g->binary);\n    g->cp_filename = z_strcat(CRASHPOINT_LOG_PREFIX, binary_filename);\n\n    return g;\n}\n\nZ_API void z_diagnoser_destroy(Diagnoser *g) {\n    g_queue_free(g->crashpoints);\n    z_free((void *)g->cp_filename);\n    z_free(g);\n}\n\nZ_API void z_diagnoser_read_crashpoint_log(Diagnoser *g) {\n    if (z_access(g->cp_filename, F_OK)) {\n        z_trace(\"log file for crashpoints (%s) does not exist\", g->cp_filename);\n        return;\n    }\n\n    Buffer *buffer = z_buffer_read_file(g->cp_filename);\n    CrashPoint *cp = (CrashPoint *)z_buffer_get_raw_buf(buffer);\n    size_t file_size = z_buffer_get_size(buffer);\n    for (size_t i = 0; i < file_size; i += sizeof(CrashPoint), cp++) {\n        g_queue_push_tail(g->crashpoints, GSIZE_TO_POINTER(cp->addr));\n        g_queue_push_tail(g->crashpoints, GSIZE_TO_POINTER(cp->type));\n        g_queue_push_tail(g->crashpoints, GINT_TO_POINTER(!!cp->is_real));\n    }\n\n    z_buffer_destroy(buffer);\n}\n\nZ_API void z_diagnoser_write_crashpoint_log(Diagnoser *g) {\n#ifndef BINARY_SEARCH_INVALID_CRASH\n    // write down all crashpoints\n    FILE *f = z_fopen(g->cp_filename, \"wb\");\n    CrashPoint cp = {\n        .addr = INVALID_ADDR,\n        .type = CP_NONE,\n        .is_real = false,\n    };\n\n    GList *l = g->crashpoints->head;\n    while (l != NULL) {\n        // get address first\n        cp.addr = (addr_t)l->data;\n\n        // get status\n        l = l->next;\n        cp.type = (CPType)l->data;\n\n        // get is_real\n        l = l->next;\n        cp.is_real = !!(l->data);\n\n        if (z_fwrite(&cp, sizeof(CrashPoint), 1, f) != 1) {\n            EXITME(\"error on writing crashpoint log file\");\n        }\n\n        // go to next CrashPoint struct\n        l = l->next;\n    }\n\n    z_fclose(f);\n#endif\n}\n\nZ_API void z_diagnoser_apply_logged_crashpoints(Diagnoser *g) {\n    // replay all\n    GList *l = g->crashpoints->head;\n    while (l != NULL) {\n        // get address first\n        addr_t addr = (addr_t)l->data;\n\n        // get status\n        l = l->next;\n        CPType type = (CPType)l->data;\n\n        // get is_real\n        l = l->next;\n        bool is_real = !!(l->data);\n\n        // adjust the bridge crashpoint\n        // XXX: it does not exactly follow the original execution, but it should\n        // get the same rewriting/pathcing as the original execution does.\n        if (type != CP_INTERNAL) {\n            addr_t adjusted_addr =\n                z_patcher_adjust_bridge_address(g->patcher, addr);\n            if (adjusted_addr != addr) {\n                EXITME(\n                    \"the logged crashpoint does not generate the same patching \"\n                    \"as the original execution does\");\n            }\n        }\n\n        // update the retaddr information in rewriter\n        if (type == CP_RETADDR && is_real) {\n            Buffer *addrs = z_rewriter_new_validate_retaddr(g->rewriter, addr);\n            // XXX: we directly free addrs as it is useless here\n            z_buffer_destroy(addrs);\n        }\n\n        // invoke z_diagnoser_new_crashpoint\n        z_info(\"logged %s crashpoint: %#lx\", z_cptype_string(type), addr);\n        __diagnoser_handle_single_crashpoint(g, addr, type, is_real, false);\n\n        // go to next CrashPoint struct\n        l = l->next;\n    }\n\n    z_rewriter_optimization_stats(g->rewriter);\n    z_patcher_bridge_stats(g->patcher);\n}\n\nZ_API CRSStatus z_diagnoser_new_crashpoint(Diagnoser *g, int status,\n                                           addr_t addr, uint32_t cov,\n                                           bool check_run_enabled) {\n    // step (0). check whether diagnoser is under delta debugging mode\n    if (g->dd_stage != DD_NONE) {\n        // the diagnoser is under delta debugging mode\n        return __diagnoser_delta_debug(g, status, addr, cov);\n    }\n\n    // step (1). check whether the status is suspect\n    if (!IS_ABNORMAL_STATUS(status)) {\n        if (check_run_enabled) {\n            // this will only happen when checking runs are enabled\n            return __diagnoser_delta_debug(g, status, addr, cov);\n        } else {\n            return CRS_STATUS_NORMAL;\n        }\n    }\n    if (!IS_SUSPECT_STATUS(status)) {\n        // it is an unintentional crash\n        assert(g->dd_stage == DD_NONE);\n        return __diagnoser_delta_debug(g, status, addr, cov);\n    }\n    if (addr == CRS_INVALID_IP) {\n        EXITME(\"the client exits as SUSPECT but no suspected address is sent\");\n    }\n\n    // step (2). validate crashpoint\n    addr_t real_addr = __diagnoser_validate_crashpoint(g, addr);\n    // XXX: we have to adjust bridge patch pointer when real_addr is unchanged.\n    if (real_addr == addr) {\n        // in this case, it cannot be a CP_INTERNAL\n        real_addr = z_patcher_adjust_bridge_address(g->patcher, real_addr);\n    }\n\n    // step (3). check whether real_addr is INVALID_ADDR\n    if (real_addr == INVALID_ADDR) {\n        // it is an unintentional crash\n        z_info(COLOR(RED, \"a potential crash with suspect status! (%#lx)\"),\n               addr);\n        assert(g->dd_stage == DD_NONE);\n        return __diagnoser_delta_debug(g, status, addr, cov);\n    }\n\n    // step (4). get CPType\n    CPType cp_type = __diagnoser_get_crashpoint_type(g, addr, real_addr);\n\n    // step (5). patch the intentional crash\n    __diagnoser_patch_crashpoint(g, real_addr, cp_type);\n\n    z_rewriter_optimization_stats(g->rewriter);\n    z_patcher_bridge_stats(g->patcher);\n\n    // step (6). check remmap\n    if (z_binary_check_state(g->binary, ELFSTATE_SHADOW_EXTENDED)) {\n        z_info(\"underlying shadow file is extended\");\n\n        // do not forget to disable the shadow_extened flag\n        z_binary_set_elf_state(g->binary,\n                               ELFSTATE_SHADOW_EXTENDED | ELFSTATE_DISABLE);\n\n        return CRS_STATUS_REMMAP;\n    } else {\n        return CRS_STATUS_NOTHING;\n    }\n}\n"
  },
  {
    "path": "src/diagnoser.h",
    "content": "/*\n * diagnoser.h\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n#ifndef __DIAGNOSER_H\n#define __DIAGNOSER_H\n\n#include \"binary.h\"\n#include \"config.h\"\n#include \"crs_config.h\"\n#include \"disassembler.h\"\n#include \"patcher.h\"\n#include \"rewriter.h\"\n#include \"sys_optarg.h\"\n\n#include <gmodule.h>\n\n/*\n * CrashPoint Type\n *\n *      CP_INTERNAL:    need to disassemble address\n *      CP_EXTERNAL:    need to disassembly address and build jump bridge\n *      CP_RETADDR:     need to build jump bridge\n */\n// XXX: CP_RETADDR are only used when pdisasm is not fully supported. Note that\n// in this situation, even we misidentify a CP_RETADDR, it would not impact the\n// rewriting procedure (i.e., any wrong bridge will got fixed later / not\n// uncertain_patches in Patcher).\ntypedef enum cp_type_t {\n    CP_NONE = 0UL,\n    CP_INTERNAL,  // internal indirect call/jump\n    CP_EXTERNAL,  // external callback from library\n    CP_RETADDR,   // return address when calling library\n} CPType;\n\n#define z_cptype_string(t)              \\\n    ((type == CP_INTERNAL) ? \"INTERNAL\" \\\n                           : ((type == CP_EXTERNAL) ? \"EXTERNAL\" : \"RETADDR\"))\n\n/*\n * Logged CrashPoint\n */\ntypedef struct crash_point_t {\n    addr_t addr;\n    CPType type;\n    bool is_real;\n} CrashPoint;\n\n/*\n * The range of Dup-Binary-Search\n */\n#define DD_RANGE 4\n\n/*\n * Stage for delta debugging mode\n */\ntypedef enum delta_debugging_stage {\n    DD_STAGE0,  // validate whether it is a rewriting error\n    DD_STAGE1,  // binary search to locate the e_iter in Patcher\n    DD_STAGE2,  // validate whether all rewriting errors are in a DD_RANGE\n    DD_STAGE3,  // binary search to locate the s_iter in Pacther\n\n    DD_NONE = -1,  // not in the delta debugging mode\n} DDStage;\n\n/*\n * Diagnoser distinguishes the intentional crashes and the unintentional ones,\n * while it also manages the schedule of self-recovering.\n */\nSTRUCT(Diagnoser, {\n    Binary *binary;\n\n    Patcher *patcher;\n    Rewriter *rewriter;\n    Disassembler *disassembler;\n\n    DDStage dd_stage;\n    int dd_status;\n    addr_t dd_addr;\n    uint32_t dd_cov;\n    // used for distinguishing crash and checking runs\n    CRSStatus dd_crs_status;\n    const char *dd_banner;\n    // used for dup-binary-search (int64_t to avoid overflow)\n    int64_t dd_low;\n    int64_t dd_high;\n    int64_t dd_s_cur;\n    int64_t dd_e_cur;\n\n    // XXX: for effeciency, a CrashPoint struct is broken into three elements in\n    // the queue.\n    GQueue *crashpoints;\n    const char *cp_filename;\n\n    // rewriting optargs\n    RewritingOptArgs *opts;\n});\n\nDECLARE_GETTER(Diagnoser, diagnoser, GQueue *, crashpoints);\n\n/*\n * Create diagnoser\n */\nZ_API Diagnoser *z_diagnoser_create(Patcher *patcher, Rewriter *rewriter,\n                                    Disassembler *disassembler,\n                                    RewritingOptArgs *opts);\n\n/*\n * Destroy diagnoser\n */\nZ_API void z_diagnoser_destroy(Diagnoser *g);\n\n/*\n * Read recorded crashpoints from log file\n */\nZ_API void z_diagnoser_read_crashpoint_log(Diagnoser *g);\n\n/*\n * Log down recorded crashpoints\n */\nZ_API void z_diagnoser_write_crashpoint_log(Diagnoser *g);\n\n/*\n * Apply all logged crashpoints\n */\nZ_API void z_diagnoser_apply_logged_crashpoints(Diagnoser *g);\n\n/*\n * Find a new crashpoint, and diagnoser will validate this crashpoint and does\n * patch accordingly.\n */\nZ_API CRSStatus z_diagnoser_new_crashpoint(Diagnoser *g, int status,\n                                           addr_t addr, uint32_t cov,\n                                           bool check_run_enabled);\n\n#endif\n"
  },
  {
    "path": "src/disassembler.c",
    "content": "/*\n * disassembler.c\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n#include \"disassembler.h\"\n#include \"capstone_.h\"\n#include \"elf_.h\"\n#include \"interval_splay.h\"\n#include \"restricted_ptr.h\"\n#include \"utils.h\"\n\n#include <capstone/capstone.h>\n#include <elf.h>\n#include <gmodule.h>\n\n#include \"prob_disasm/prob_disasm_complete.c\"\n#include \"prob_disasm/prob_disasm_simple.c\"\n\n#define SUPERSET_DISASM_THRESHOLD 0x400000\n\n/*\n * Runtime binding for probabilistic disassembly\n */\n#define __disassembler_invoke_prob_disasm(d, func, __args...) \\\n    ({ (d->enable_pdisasm ? func(__args) : func##_S(__args)); })\n\n/*\n * Function Pointer: destroy a cs_insn\n */\nZ_PRIVATE void __disassembler_free_cs_insn(cs_insn *inst);\n\n/*\n * Superset disassembly\n */\nZ_PRIVATE void __disassembler_superset_disasm(Disassembler *d);\n\n/*\n * Check whether underlying binary has inlined data (potentially)\n */\nZ_PRIVATE bool __disassembler_has_inlined_data(Disassembler *d);\n\n/*\n * Analyse instruction group, return whether need to continue analysis.\n */\nZ_PRIVATE bool __disassembler_analyze_inst(cs_insn *inst, addr_t *target);\n\n/*\n * Disassembly _start / .init / .fini / main\n */\nZ_RESERVED Z_PRIVATE void __disassembler_pre_disasm(Disassembler *d);\n\n/*\n * Getter and Setter\n */\nDEFINE_GETTER(Disassembler, disassembler, Binary *, binary);\nDEFINE_GETTER(Disassembler, disassembler, UCFG_Analyzer *, ucfg_analyzer);\nDEFINE_GETTER(Disassembler, disassembler, bool, enable_pdisasm);\n\nZ_PRIVATE void __disassembler_free_cs_insn(cs_insn *inst) { cs_free(inst, 1); }\n\n/*\n * XXX: This function is out of date. Hence, there is no guarantee to use it.\n */\nZ_RESERVED Z_PRIVATE void __disassembler_pre_disasm(Disassembler *d) {\n    ELF *e = z_binary_get_elf(d->binary);\n\n    z_info(\"disassemble .init/.fini\");\n\n    GQueue *bbs = g_queue_new();\n\n    // _start\n    addr_t entrypoint = z_elf_get_ori_entry(e);\n    g_queue_push_tail(bbs, GSIZE_TO_POINTER(entrypoint));\n\n    // .init\n    addr_t _init = z_elf_get_init(e);\n    z_info(\".init: %#lx\", _init);\n    g_queue_push_tail(bbs, GSIZE_TO_POINTER(_init));\n\n    // .fini\n    addr_t _fini = z_elf_get_fini(e);\n    z_info(\".fini: %#lx\", _fini);\n    g_queue_push_tail(bbs, GSIZE_TO_POINTER(_fini));\n\n    Rptr *array = NULL;\n    size_t array_size = 0;\n    addr_t array_addr = INVALID_ADDR;\n\n    // .init.array\n    Elf64_Shdr *init_array = z_elf_get_shdr_init_array(e);\n    array_size = init_array->sh_size;\n    array_addr = init_array->sh_addr;\n    array = z_elf_vaddr2ptr(e, array_addr);\n    for (int i = 0; i < array_size / sizeof(addr_t); i++) {\n        addr_t fcn = *z_rptr_get_ptr(array, addr_t);\n        z_info(\".init.array[%d]: %#lx\", i, fcn);\n        g_queue_push_tail(bbs, GSIZE_TO_POINTER(fcn));\n        z_rptr_inc(array, addr_t, 1);\n    }\n    z_rptr_destroy(array);\n\n    // .fini.array\n    Elf64_Shdr *fini_array = z_elf_get_shdr_fini_array(e);\n    array_size = fini_array->sh_size;\n    array_addr = fini_array->sh_addr;\n    array = z_elf_vaddr2ptr(e, array_addr);\n    for (int i = 0; i < array_size / sizeof(addr_t); i++) {\n        addr_t fcn = *z_rptr_get_ptr(array, addr_t);\n        z_info(\".fini.array[%d]: %#lx\", i, fcn);\n        g_queue_push_tail(bbs, GSIZE_TO_POINTER(fcn));\n        z_rptr_inc(array, addr_t, 1);\n    }\n    z_rptr_destroy(array);\n\n    // disassemble without call\n    while (!g_queue_is_empty(bbs)) {\n        addr_t bb_addr = (addr_t)g_queue_pop_head(bbs);\n\n        addr_t cur_addr = bb_addr;\n        cs_insn *inst = NULL;\n\n        do {\n            if (g_hash_table_lookup(d->potential_insts,\n                                    GSIZE_TO_POINTER(cur_addr))) {\n                break;\n            }\n\n            inst = z_disassembler_get_superset_disasm(d, cur_addr);\n            if (inst == NULL) {\n                break;\n            }\n\n            g_hash_table_insert(d->recursive_disasm, GSIZE_TO_POINTER(cur_addr),\n                                (gpointer)inst);\n\n            if (z_capstone_is_jmp(inst) || z_capstone_is_cjmp(inst) ||\n                z_capstone_is_loop(inst) || z_capstone_is_xbegin(inst)) {\n                cs_detail *detail = inst->detail;\n                if ((detail->x86.op_count == 1) &&\n                    (detail->x86.operands[0].type == X86_OP_IMM)) {\n                    g_queue_push_tail(\n                        bbs, GSIZE_TO_POINTER(detail->x86.operands[0].imm));\n                }\n            }\n\n            cur_addr += inst->size;\n        } while (!z_capstone_is_terminator(inst));\n    }\n\n    z_info(\"disassemble .init/.fini done\");\n    z_info(\"we have %ld correct instructions disassemblied\",\n           g_hash_table_size(d->recursive_disasm));\n}\n\n// XXX: here we simply check whether linear disassembly can decode all\n// instructions (which seems good enough for most cases), but we can have\n// advanced algorithms in the future (e.g., using entropy or data hints from\n// probabilistic disassembly)\nZ_PRIVATE bool __disassembler_has_inlined_data(Disassembler *d) {\n    assert(d != NULL);\n\n    addr_t cur_addr = d->text_addr;\n    do {\n        cs_insn *cur_inst = z_disassembler_get_superset_disasm(d, cur_addr);\n        if (!cur_inst) {\n            return true;\n        }\n        cur_addr += cur_inst->size;\n    } while (cur_addr < d->text_addr + d->text_size);\n\n    return false;\n}\n\n// XXX: we do not use UCFG_Analyzer here, as the following code runs faster than\n// a searching operation in hashmap. Note that the following code will happen\n// during fuzzing\nZ_PRIVATE bool __disassembler_analyze_inst(cs_insn *inst, addr_t *targets) {\n    assert(inst != NULL);\n\n    cs_detail *detail = inst->detail;\n\n    if (z_capstone_is_cjmp(inst) || z_capstone_is_loop(inst)) {\n        assert((detail->x86.op_count == 1) &&\n               (detail->x86.operands[0].type == X86_OP_IMM));\n\n        *(targets++) = inst->address + inst->size;\n        *targets = detail->x86.operands[0].imm;\n\n    } else if (z_capstone_is_jmp(inst) || z_capstone_is_call(inst) ||\n               z_capstone_is_xbegin(inst)) {\n        if ((detail->x86.op_count == 1) &&\n            (detail->x86.operands[0].type == X86_OP_IMM)) {\n            // direct call and direct/condition jump\n            *targets = detail->x86.operands[0].imm;\n        } else {\n            // indirect call/jump\n            z_trace(\"indirect call/jmp \" CS_SHOW_INST(inst));\n        }\n    }\n\n    return !z_capstone_is_terminator(inst);\n}\n\nZ_PRIVATE void __disassembler_superset_disasm(Disassembler *d) {\n    assert(d);\n\n    // step (0). get .text section range.\n    ELF *e = z_binary_get_elf(d->binary);\n    addr_t text_addr = d->text_addr;\n    size_t text_size = d->text_size;\n\n    z_info(\"start superset disassembly in [%#lx, %#lx]\", text_addr,\n           text_size + text_addr - 1);\n\n    // step (1). get code buf\n    Rptr *buf = z_elf_vaddr2ptr(e, text_addr);\n\n    // step (2). disassembly\n    for (addr_t cur_addr = text_addr; cur_addr < text_addr + text_size;\n         cur_addr++) {\n        CS_DISASM(buf, cur_addr, 1);\n        if (cs_count == 1) {\n            z_ucfg_analyzer_add_inst(d->ucfg_analyzer, cur_addr, cs_inst,\n                                     false);\n            g_hash_table_insert(d->superset_disasm, GSIZE_TO_POINTER(cur_addr),\n                                (gpointer)cs_inst);\n            z_addr_dict_set(d->occ_addrs, cur_addr, z_buffer_create(NULL, 0));\n\n            z_trace(\"superset disassembly \" CS_SHOW_INST(cs_inst));\n\n            cs_inst = NULL;  // avoid double free\n        }\n        z_rptr_inc(buf, uint8_t, 1);\n    }\n\n    z_info(\"superset disassembly done, found %ld instructions\",\n           g_hash_table_size(d->superset_disasm));\n\n    // step (3). remember to free code buffer\n    z_rptr_destroy(buf);\n\n    // step (4). calculate occluded address\n    for (addr_t cur_addr = text_addr; cur_addr < text_addr + text_size;\n         cur_addr++) {\n        // validation\n        cs_insn *inst = (cs_insn *)g_hash_table_lookup(\n            d->superset_disasm, GSIZE_TO_POINTER(cur_addr));\n        if (!inst) {\n            continue;\n        }\n\n        // find all possible occluded instructions\n        for (addr_t occ_addr = cur_addr + 1; occ_addr < cur_addr + inst->size;\n             occ_addr++) {\n            cs_insn *occ_inst = (cs_insn *)g_hash_table_lookup(\n                d->superset_disasm, GSIZE_TO_POINTER(occ_addr));\n            if (!occ_inst) {\n                continue;\n            }\n\n            // update both\n            z_buffer_append_raw(z_addr_dict_get(d->occ_addrs, cur_addr),\n                                (uint8_t *)&occ_addr, sizeof(occ_addr));\n            z_buffer_append_raw(z_addr_dict_get(d->occ_addrs, occ_addr),\n                                (uint8_t *)&cur_addr, sizeof(cur_addr));\n        }\n    }\n}\n\nZ_API Disassembler *z_disassembler_create(Binary *b, RewritingOptArgs *opts) {\n    Disassembler *d = STRUCT_ALLOC(Disassembler);\n\n    d->opts = opts;\n\n    d->binary = b;\n\n    d->superset_disasm =\n        g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL,\n                              (GDestroyNotify)(&__disassembler_free_cs_insn));\n    d->recursive_disasm =\n        g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL);\n    // recursive_disasm does not free cs_insn, freed by superset_disasm\n    d->linear_disasm =\n        g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL);\n    // linear_disasm does not free cs_insn, freed by superset_disasm\n    d->prob_disasm = NULL;\n\n    d->potential_insts =\n        g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL);\n    d->potential_blocks =\n        g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL);\n\n    d->ucfg_analyzer = z_ucfg_analyzer_create(d->binary, d->opts);\n\n    // we choose to superset disassemble relative-small binary\n    ELF *e = z_binary_get_elf(d->binary);\n    Elf64_Shdr *text = z_elf_get_shdr_text(e);\n    d->text_addr = text->sh_addr;\n    d->text_size = text->sh_size;\n\n    // get occluded address\n    z_addr_dict_init(d->occ_addrs, d->text_addr, d->text_size);\n\n    if (d->text_size <= SUPERSET_DISASM_THRESHOLD) {\n        z_info(\".text section (%#lx bytes) is suitable for pre-disasm\",\n               d->text_size);\n\n        // do not backup .text\n        d->text_backup = NULL;\n\n        __disassembler_superset_disasm(d);\n    } else {\n        z_info(\".text section (%#lx bytes) is not suitable for pre-disasm\",\n               d->text_size);\n\n        d->text_backup = z_alloc(d->text_size, sizeof(uint8_t));\n        Rptr *ptr = z_elf_vaddr2ptr(e, d->text_addr);\n        z_rptr_memcpy(d->text_backup, ptr, d->text_size);\n        z_rptr_destroy(ptr);\n    }\n\n    d->enable_pdisasm =\n        (!d->opts->force_linear) &&\n        (d->opts->force_pdisasm || __disassembler_has_inlined_data(d));\n    z_info(\"enable probabilistic disassembly: %s\",\n           d->enable_pdisasm ? \"true\" : \"false\");\n\n    __disassembler_invoke_prob_disasm(d, __disassembler_pdisasm_create, d);\n    return d;\n}\n\nZ_API void z_disassembler_destroy(Disassembler *d) {\n    __disassembler_invoke_prob_disasm(d, __disassembler_pdisasm_destroy, d);\n\n    g_hash_table_destroy(d->superset_disasm);\n    g_hash_table_destroy(d->recursive_disasm);\n    g_hash_table_destroy(d->linear_disasm);\n\n    if (d->text_backup) {\n        z_free(d->text_backup);\n    }\n\n    g_hash_table_destroy(d->potential_insts);\n    g_hash_table_destroy(d->potential_blocks);\n\n    z_addr_dict_destroy(d->occ_addrs, &z_buffer_destroy);\n\n    z_ucfg_analyzer_destroy(d->ucfg_analyzer);\n\n    z_free(d);\n}\n\nZ_API void z_disassembler_get_prob_disasm_internal(\n    Disassembler *d, addr_t addr, cs_insn **inst, uint32_t *scc_id,\n    double128_t *inst_hint, double128_t *inst_lost, double128_t *data_hint,\n    double128_t *D, double128_t *P) {\n    __disassembler_invoke_prob_disasm(d, __disassembler_pdisasm_get_internal, d,\n                                      addr, inst, scc_id, inst_hint, inst_lost,\n                                      data_hint, D, P);\n}\n\nZ_API void z_disassembler_prob_disasm(Disassembler *d) {\n    __disassembler_invoke_prob_disasm(d, __disassembler_pdisasm_start, d);\n}\n\nZ_API double128_t z_disassembler_get_prob_disasm(Disassembler *d, addr_t addr) {\n    return __disassembler_invoke_prob_disasm(\n        d, __disassembler_pdisasm_get_inst_prob, d, addr);\n}\n\nZ_API void z_diassembler_update_prob_disasm(Disassembler *d, addr_t addr,\n                                            bool is_inst) {\n    __disassembler_invoke_prob_disasm(d, __disassembler_pdisasm_update, d, addr,\n                                      is_inst);\n}\n\n// XXX: note that this function is not completed.\nZ_API GQueue *z_disassembler_linear_disasm(Disassembler *d) {\n    assert(d != NULL);\n\n    // step (0). get .text section range.\n    addr_t text_addr = d->text_addr;\n    size_t text_size = d->text_size;\n\n    // step (1). other structures\n    addr_t cur_addr = text_addr;\n    GQueue *bbs = g_queue_new();\n    g_queue_push_tail(bbs, GSIZE_TO_POINTER(cur_addr));  // first addr is a BB\n\n    // step (2). linear disassembler\n    GQueue *tmp_bbs = g_queue_new();\n    GQueue *tmp_insts = g_queue_new();\n    while (cur_addr < text_addr + text_size) {\n        bool valid_bb = true;\n        addr_t tmp_cur_addr = cur_addr;\n\n        // step (2.1) use inner loop to check whether current basic block is\n        // valid. Note that when the inner exits, the tmp_cur_addr is always the\n        // next no-tried instruction address\n        do {\n            cs_insn *inst = z_disassembler_get_superset_disasm(d, tmp_cur_addr);\n\n            // check instruction itself\n            if (inst == NULL) {\n                z_trace(\"invalid instruction in linear disassembly: %#lx\",\n                        tmp_cur_addr);\n                valid_bb = false;\n                break;\n            }\n\n            // check branch instructions and update basic block information\n            cs_detail *detail = inst->detail;\n            if ((z_capstone_is_call(inst) || z_capstone_is_cjmp(inst) ||\n                 z_capstone_is_xbegin(inst) || z_capstone_is_loop(inst) ||\n                 z_capstone_is_jmp(inst)) &&  // check instruction type\n                ((detail->x86.op_count == 1) &&\n                 (detail->x86.operands[0].type ==\n                  X86_OP_IMM))  // check direct transfer\n            ) {\n                addr_t tar_addr = detail->x86.operands[0].imm;\n                if (tar_addr >= text_addr && tar_addr < text_addr + text_size) {\n                    // target address inside .text\n                    // TODO: acutally, we should check for linear disassembly\n                    // result, instead of superset disassembly!\n                    if (z_disassembler_get_superset_disasm(d, tar_addr)) {\n                        g_queue_push_tail(tmp_bbs, GSIZE_TO_POINTER(tar_addr));\n                    } else {\n                        z_trace(\n                            \"invalid instruction in linear disassembly \"\n                            \"(target): %#lx\",\n                            tmp_cur_addr);\n                        valid_bb = false;\n                        break;\n                    }\n                }\n            }\n\n            // TODO: do not forget cjmp and loop's false branch\n\n            // update instruction\n            g_queue_push_tail(tmp_insts, GSIZE_TO_POINTER(tmp_cur_addr));\n\n            // update tmp_cur_addr\n            tmp_cur_addr += inst->size;\n\n            // if inst is terminator, break temporary try\n            if (z_capstone_is_terminator(inst)) {\n                break;\n            }\n        } while (tmp_cur_addr < text_addr + text_size);\n\n        if (valid_bb) {\n            // step (2.2): if valid, update bbs and insts, and update cur_addr.\n            //      Note that original cur_addr is another bb entrypoint.\n            g_queue_push_tail(bbs, GSIZE_TO_POINTER(cur_addr));\n            g_hash_table_insert(d->potential_blocks, GSIZE_TO_POINTER(cur_addr),\n                                GSIZE_TO_POINTER(true));\n            while (!g_queue_is_empty(tmp_bbs)) {\n                addr_t bb_addr = (addr_t)g_queue_pop_head(tmp_bbs);\n                g_queue_push_tail(bbs, GSIZE_TO_POINTER(bb_addr));\n                g_hash_table_insert(d->potential_blocks,\n                                    GSIZE_TO_POINTER(bb_addr),\n                                    GSIZE_TO_POINTER(true));\n            }\n            while (!g_queue_is_empty(tmp_insts)) {\n                addr_t inst_addr = (addr_t)g_queue_pop_head(tmp_insts);\n                cs_insn *inst =\n                    z_disassembler_get_superset_disasm(d, inst_addr);\n                assert(inst);\n                g_hash_table_insert(d->linear_disasm,\n                                    GSIZE_TO_POINTER(inst_addr),\n                                    (gpointer)inst);\n                g_hash_table_insert(d->potential_insts,\n                                    GSIZE_TO_POINTER(inst_addr),\n                                    (gpointer)inst);\n            }\n            cur_addr = tmp_cur_addr;\n        } else {\n            // setp (2.3): if not valid, inc cur_addr and clear tmp_bbs/_insts\n            g_queue_clear(tmp_bbs);\n            g_queue_clear(tmp_insts);\n            cur_addr += 1;\n        }\n    }\n\n    g_queue_free(tmp_bbs);\n    g_queue_free(tmp_insts);\n\n    z_info(\"we have %ld instruction linearly disassemblied\",\n           g_hash_table_size(d->linear_disasm));\n\n    z_info(\"with %ld basic block entrys\", g_queue_get_length(bbs));\n\n    return bbs;\n}\n\nZ_API GQueue *z_disassembler_recursive_disasm(Disassembler *d, addr_t addr) {\n    assert(d);\n    z_trace(\"disassemble at %#lx\", addr);\n\n    GQueue *new_bbs = g_queue_new();\n\n    // step (0). get .text section range.\n    // We do not disassembly any code outside this range.\n    addr_t text_addr = d->text_addr;\n    size_t text_size = d->text_size;\n    z_trace(\".text section: [%#lx, %#lx]\", text_addr,\n            text_addr + text_size - 1);\n    if (!((addr >= text_addr) && (addr - text_addr < text_size))) {\n        z_warn(\"%#lx is out of .text section\", addr);\n        return new_bbs;\n    }\n\n    // step (1). check addr is an new BB (XXX: this might be wrong)\n    if (!g_hash_table_lookup(d->potential_blocks, GSIZE_TO_POINTER(addr))) {\n        g_queue_push_tail(new_bbs, GSIZE_TO_POINTER(addr));\n        g_hash_table_insert(d->potential_blocks, GSIZE_TO_POINTER(addr),\n                            GSIZE_TO_POINTER(true));\n    }\n\n    // step (2). init queue\n    GQueue *q = g_queue_new();\n    g_queue_push_tail(q, GSIZE_TO_POINTER(addr));\n\n    // step (3). disassembly until no new target\n    while (!g_queue_is_empty(q)) {\n        // step (3.1). get starting address\n        addr_t bb_addr = (addr_t)g_queue_pop_head(q);\n        addr_t cur_addr = bb_addr;\n        cs_insn *inst = NULL;\n\n        z_trace(\"recursive disassembly: BB address [%#lx]\", bb_addr);\n\n        // step (3.2). disassembly basic block\n        while (true) {\n            // [1]. check whether this region is disassembled\n            if (g_hash_table_lookup(d->potential_insts,\n                                    GSIZE_TO_POINTER(cur_addr))) {\n                break;\n            }\n\n            // [2]. get corresponding instruction\n            cs_insn *tmp = z_disassembler_get_superset_disasm(d, cur_addr);\n\n            // [3]. check whether it is a valid instruction\n            if (tmp == NULL) {\n                z_warn(\"go into an invalid address: %#lx\", cur_addr);\n                if (inst != NULL) {\n                    z_warn(\"previous instruction \" CS_SHOW_INST(inst));\n                }\n                break;\n            }\n\n            // [4]. add into recursive_disasm and update potential instruction\n            inst = tmp;\n            z_trace(\"recursive disassembly \" CS_SHOW_INST(inst));\n            g_hash_table_insert(d->recursive_disasm, GSIZE_TO_POINTER(cur_addr),\n                                (gpointer)inst);\n            g_hash_table_insert(d->potential_insts, GSIZE_TO_POINTER(cur_addr),\n                                (gpointer)inst);\n\n            // [5]. analyze instruction group\n            addr_t target_addrs[2] = {INVALID_ADDR, INVALID_ADDR};\n            bool do_more = __disassembler_analyze_inst(inst, target_addrs);\n            z_trace(\"find target addresss: %#lx %#lx\", target_addrs[0],\n                    target_addrs[1]);\n\n            // [6]. update target\n            for (int i = 0; i < 2; i++) {\n                addr_t target_addr = target_addrs[i];\n                if (target_addr >= text_addr &&\n                    target_addr - text_addr < text_size) {\n                    g_queue_push_tail(q, GSIZE_TO_POINTER(target_addr));\n\n                    z_trace(\"find new target: %#lx\", target_addr);\n\n                    if (!g_hash_table_lookup(d->potential_blocks,\n                                             GSIZE_TO_POINTER(target_addr))) {\n                        g_queue_push_tail(new_bbs,\n                                          GSIZE_TO_POINTER(target_addr));\n\n                        g_hash_table_insert(d->potential_blocks,\n                                            GSIZE_TO_POINTER(target_addr),\n                                            GSIZE_TO_POINTER(true));\n                    }\n                }\n            }\n\n            // [7]. update cur_addr\n            cur_addr += inst->size;\n\n            // [8]. break if needed\n            if (!do_more) {\n                break;\n            }\n        }\n    }\n\n    // step (4). free queue\n    g_queue_free(q);\n\n    // step (5). output how many instruction are correctly disassembly\n    z_info(\"number of new basic blocks      : %ld\",\n           g_queue_get_length(new_bbs));\n    z_info(\"number of rewritten instructions: %ld\",\n           g_hash_table_size(d->recursive_disasm));\n\n    return new_bbs;\n}\n\n// update superset disasm\nZ_API const cs_insn *z_disassembler_update_superset_disasm(Disassembler *d,\n                                                           addr_t addr) {\n    const cs_insn *res = NULL;\n\n    addr_t text_addr = d->text_addr;\n    size_t text_size = d->text_size;\n    if (addr < text_addr || addr >= text_addr + text_size) {\n        EXITME(\"try to re-disasm an invalid address: %#lx\", addr);\n    }\n\n    if (z_disassembler_is_potential_inst_entrypoint(d, addr)) {\n        EXITME(\"try to re-disasm a validated address: %#lx\", addr);\n    }\n\n    ELF *e = z_binary_get_elf(d->binary);\n    Rptr *ptr = z_elf_vaddr2ptr(e, addr);\n    CS_DISASM(ptr, addr, 1);\n    if (cs_count == 1) {\n        // update superset disassembly\n        // XXX: the z_ucfg_analyzer_add_inst must be placed before\n        // g_hash_table_insert, as the g_hash_table_insert may free the original\n        // instruction\n        z_ucfg_analyzer_add_inst(d->ucfg_analyzer, addr, cs_inst, true);\n        g_hash_table_insert(d->superset_disasm, GSIZE_TO_POINTER(addr),\n                            (gpointer)cs_inst);\n        res = cs_inst;\n\n        // update backup\n        if (d->text_backup) {\n            size_t off = addr - text_addr;\n            memcpy(d->text_backup + off, res->bytes, res->size);\n        }\n\n        cs_inst = NULL;  // avoid double free\n    } else {\n        EXITME(\"invalid instruction at %#lx\", addr);\n    }\n\n    z_rptr_destroy(ptr);\n    assert(res != NULL);\n    return res;\n}\n\nZ_API cs_insn *z_disassembler_get_superset_disasm(Disassembler *d,\n                                                  addr_t addr) {\n    cs_insn *inst = (cs_insn *)g_hash_table_lookup(d->superset_disasm,\n                                                   GSIZE_TO_POINTER(addr));\n\n    // check whether we need to update superset disasm\n    if (d->text_backup && (!inst)) {\n        // step(1). check addr in .text (we only consider code in .text)\n        addr_t text_addr = d->text_addr;\n        size_t text_size = d->text_size;\n        if (addr < text_addr || addr >= text_addr + text_size) {\n            return NULL;\n        }\n\n        // step(2). disasm non-disassembled instruction\n        size_t off1 = addr - text_addr;\n        size_t off2 = text_size - off1;\n        CS_DISASM_RAW(d->text_backup + off1, off2, addr, 1);\n        if (cs_count == 1) {\n            z_ucfg_analyzer_add_inst(d->ucfg_analyzer, addr, cs_inst, false);\n            g_hash_table_insert(d->superset_disasm, GSIZE_TO_POINTER(addr),\n                                (gpointer)cs_inst);\n\n            z_trace(\"superset disassembly \" CS_SHOW_INST(cs_inst));\n\n            inst = (cs_insn *)cs_inst;\n            cs_inst = NULL;  // avoid double free\n        }\n    }\n\n    return inst;\n}\n\nZ_API cs_insn *z_disassembler_get_recursive_disasm(Disassembler *d,\n                                                   addr_t addr) {\n    return (cs_insn *)g_hash_table_lookup(d->recursive_disasm,\n                                          GSIZE_TO_POINTER(addr));\n}\n\nZ_API cs_insn *z_disassembler_get_linear_disasm(Disassembler *d, addr_t addr) {\n    return (cs_insn *)g_hash_table_lookup(d->linear_disasm,\n                                          GSIZE_TO_POINTER(addr));\n}\n\nZ_API bool z_disassembler_is_potential_block_entrypoint(Disassembler *d,\n                                                        addr_t addr) {\n    return !!g_hash_table_lookup(d->potential_blocks, GSIZE_TO_POINTER(addr));\n}\n\nZ_API bool z_disassembler_is_potential_inst_entrypoint(Disassembler *d,\n                                                       addr_t addr) {\n    return !!g_hash_table_lookup(d->potential_insts, GSIZE_TO_POINTER(addr));\n}\n\nZ_API bool z_disassembler_is_within_disasm_range(Disassembler *d, addr_t addr) {\n    return !!(addr >= d->text_addr && addr < (d->text_addr + d->text_size));\n}\n\nZ_API Buffer *z_disassembler_get_occluded_addrs(Disassembler *d, addr_t addr) {\n    cs_insn *inst = z_disassembler_get_superset_disasm(d, addr);\n    if (!inst) {\n        return NULL;\n    }\n\n    if (!z_addr_dict_exist(d->occ_addrs, addr)) {\n        // occluded address hasn't been analyzed\n        z_addr_dict_set(d->occ_addrs, addr, z_buffer_create(NULL, 0));\n\n        // note that the longest x86/64 instruction is 15-bytes\n        for (addr_t occ_addr = addr - 14; occ_addr < addr + inst->size;\n             occ_addr++) {\n            cs_insn *occ_inst = z_disassembler_get_superset_disasm(d, occ_addr);\n            if (!occ_inst) {\n                continue;\n            }\n\n            if (occ_addr < addr && occ_addr + occ_inst->size > addr) {\n                goto SUCC;\n            }\n            if (occ_addr > addr && addr + inst->size > occ_addr) {\n                goto SUCC;\n            }\n            continue;\n\n        SUCC:\n            z_buffer_append_raw(z_addr_dict_get(d->occ_addrs, addr),\n                                (uint8_t *)&occ_addr, sizeof(occ_addr));\n        }\n    }\n\n    return z_addr_dict_get(d->occ_addrs, addr);\n}\n\nZ_API bool z_disassembler_fully_support_prob_disasm(Disassembler *d) {\n    return !z_strcmp(\"ProbDisassembler\", STRUCT_TYPE(d->prob_disasm));\n}\n\n#define __DISASSEMBLER_DECLARE_SUCC_AND_PRED(etype, rtype)                    \\\n    Z_API Buffer *z_disassembler_get_##etype##_##rtype(Disassembler *d,       \\\n                                                       addr_t addr) {         \\\n        /* force superset disasm */                                           \\\n        if (d->text_backup) {                                                 \\\n            z_disassembler_get_superset_disasm(d, addr);                      \\\n        }                                                                     \\\n                                                                              \\\n        return z_ucfg_analyzer_get_##etype##_##rtype(d->ucfg_analyzer, addr); \\\n    }\n\n__DISASSEMBLER_DECLARE_SUCC_AND_PRED(direct, predecessors);\n__DISASSEMBLER_DECLARE_SUCC_AND_PRED(direct, successors);\n__DISASSEMBLER_DECLARE_SUCC_AND_PRED(intra, predecessors);\n__DISASSEMBLER_DECLARE_SUCC_AND_PRED(intra, successors);\n__DISASSEMBLER_DECLARE_SUCC_AND_PRED(all, predecessors);\n__DISASSEMBLER_DECLARE_SUCC_AND_PRED(all, successors);\n\n#undef __DISASSEMBLER_DECLARE_SUCC_AND_PRED\n"
  },
  {
    "path": "src/disassembler.h",
    "content": "/*\n * disassembler.h\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n#ifndef __DISASSEMBLER_H\n#define __DISASSEMBLER_H\n\n#include \"address_dictionary.h\"\n#include \"binary.h\"\n#include \"buffer.h\"\n#include \"config.h\"\n#include \"interval_splay.h\"\n#include \"sys_optarg.h\"\n#include \"ucfg_analyzer.h\"\n\n#include <capstone/capstone.h>\n#include <gmodule.h>\n\nSTRUCT(Disassembler, {\n    // Binary which needs disassembly\n    Binary *binary;\n\n    // .text info\n    addr_t text_addr;\n    size_t text_size;\n    uint8_t *text_backup;\n\n    // Disassembly\n    GHashTable *superset_disasm;\n    GHashTable *recursive_disasm;\n    GHashTable *linear_disasm;\n    PhantomType *prob_disasm;\n\n    // Occluded address\n    AddrDictFast(Buffer *, occ_addrs);\n\n    // Pdisasm enable?\n    bool enable_pdisasm;\n\n    /*\n     * Potential information.\n     * These information is collected by linear and recursive disassembly. But\n     * due to the inlined data, non-return function, or any other incomplete\n     * analysis result, these information may be wrong.\n     */\n    // Entrypoints of *confidentially* disassemblied instructions\n    GHashTable *potential_insts;\n    // Entrypoints of *confidentially* disassemblied basic blocks\n    GHashTable *potential_blocks;\n\n    // Light-weight instruction-level analyzer;\n    UCFG_Analyzer *ucfg_analyzer;\n\n    // rewriting optargs\n    RewritingOptArgs *opts;\n});\n\n/*\n * Getter and Setter\n */\nDECLARE_GETTER(Disassembler, disassembler, Binary *, binary);\nDECLARE_GETTER(Disassembler, disassembler, UCFG_Analyzer *, ucfg_analyzer);\n\n/*\n * Create a disassembler\n */\nZ_API Disassembler *z_disassembler_create(Binary *b, RewritingOptArgs *opts);\n\n/*\n * Destroy a disassembler\n */\nZ_API void z_disassembler_destroy(Disassembler *d);\n\n/*\n * [P-Disasm API]\n * Return the probability of being an instruction entrypoint for the given\n * address.\n *\n * Return value:\n *   P = 1.0:       be very confident that addr is an instruction entrypoint\n *   0.0 < P < 1.0: based on P, greater P means higer confidence\n *   P = 0.0:       be very confident that addr is not an instruction entrypoint\n *   P = -0.0:      we have **very** strong evidence it is not an entrypoint\n */\nZ_API double128_t z_disassembler_get_prob_disasm(Disassembler *d, addr_t addr);\n\nZ_API void z_diassembler_update_prob_disasm(Disassembler *d, addr_t addr,\n                                            bool is_inst);\n\n/*\n * Probabilistic disassemble the whole binary\n */\nZ_API void z_disassembler_prob_disasm(Disassembler *d);\n\n/*\n * Get internal informaiton of probabilistic disassemble (in most case, this API\n * is used for debugging)\n */\nZ_API void z_disassembler_get_prob_disasm_internal(\n    Disassembler *d, addr_t addr, cs_insn **inst, uint32_t *scc_id,\n    double128_t *inst_hint, double128_t *inst_lost, double128_t *data_hint,\n    double128_t *D, double128_t *P);\n\n/*\n * Check whether disassembler fully support prob-disasm\n */\nZ_API bool z_disassembler_fully_support_prob_disasm(Disassembler *d);\n\n/*\n * Superset disassemble one instruction at given address\n */\nZ_API const cs_insn *z_disassembler_update_superset_disasm(Disassembler *d,\n                                                           addr_t addr);\n\n/*\n * Show the occludeds addresses of a given address\n */\nZ_API Buffer *z_disassembler_get_occluded_addrs(Disassembler *d, addr_t addr);\n\n/*\n * Recursive disassemble from given address\n */\n// XXX: note that currently z_disassembler_recursive_disasm can only be called\n// by z_rewriter_rewrite.\n// TODO: it is a fault of our system design. We need to fix such strong\n// coupling.\nZ_API GQueue *z_disassembler_recursive_disasm(Disassembler *d, addr_t addr);\n\n/*\n * Linear disassemble the whole binary\n */\nZ_API GQueue *z_disassembler_linear_disasm(Disassembler *d);\n\n/*\n * Get linear disasm\n */\nZ_API cs_insn *z_disassembler_get_linear_disasm(Disassembler *d, addr_t addr);\n\n/*\n * Get recursive disasm\n */\nZ_API cs_insn *z_disassembler_get_recursive_disasm(Disassembler *d,\n                                                   addr_t addr);\n\n/*\n * Get superset disasm\n */\nZ_API cs_insn *z_disassembler_get_superset_disasm(Disassembler *d, addr_t addr);\n\n/*\n * Check whether address is a potential potential entrypoint\n */\nZ_API bool z_disassembler_is_potential_block_entrypoint(Disassembler *d,\n                                                        addr_t addr);\n\n/*\n * Check whether address is a potential inst entrypoint\n */\nZ_API bool z_disassembler_is_potential_inst_entrypoint(Disassembler *d,\n                                                       addr_t addr);\n\n/*\n * Check whether address is within disassemble range\n */\nZ_API bool z_disassembler_is_within_disasm_range(Disassembler *d, addr_t addr);\n\n#define __DISASSEMBLER_DEFINE_SUCC_AND_PRED(etype, rtype)               \\\n    Z_API Buffer *z_disassembler_get_##etype##_##rtype(Disassembler *d, \\\n                                                       addr_t addr)\n\n__DISASSEMBLER_DEFINE_SUCC_AND_PRED(direct, predecessors);\n__DISASSEMBLER_DEFINE_SUCC_AND_PRED(direct, successors);\n__DISASSEMBLER_DEFINE_SUCC_AND_PRED(intra, predecessors);\n__DISASSEMBLER_DEFINE_SUCC_AND_PRED(intra, successors);\n__DISASSEMBLER_DEFINE_SUCC_AND_PRED(all, predecessors);\n__DISASSEMBLER_DEFINE_SUCC_AND_PRED(all, successors);\n\n#undef __DISASSEMBLER_DEFINE_SUCC_AND_PRED\n\n#endif\n"
  },
  {
    "path": "src/elf_.c",
    "content": "/*\n * __elf_parse_relocation in elf_.c\n *\n * URL: https://github.com/kubo/plthook\n *\n * ------------------------------------------------------\n *\n * Copyright 2013-2019 Kubo Takehiro <kubo@jiubao.org>\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * Redistribution and use in source and binary forms, with or without\n * modification, are permitted provided that the following conditions are met:\n *\n *    1. Redistributions of source code must retain the above copyright notice,\n * this list of conditions and the following disclaimer.\n *\n *    2. Redistributions in binary form must reproduce the above copyright\n * notice, this list of conditions and the following disclaimer in the\n * documentation and/or other materials provided with the distribution.\n *\n * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS OR IMPLIED\n * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF\n * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO\n * EVENT SHALL <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,\n * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES\n * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\n * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND\n * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\n * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n *\n * The views and conclusions contained in the software and documentation are\n * those of the authors and should not be interpreted as representing official\n * policies, either expressed or implied, of the authors.\n *\n */\n// XXX: __elf_parse_relocation is modified based on\n// https://github.com/kubo/plthook/blob/master/plthook_elf.c\n\n/*\n * other parts of elf_.c\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n// XXX: note that we have multiple streams under an ELF file. Make sure you are\n// handling the correct stream(s)\n\n#include \"elf_.h\"\n#include \"buffer.h\"\n#include \"capstone_.h\"\n#include \"crs_config.h\"\n#include \"interval_splay.h\"\n#include \"loader.h\"\n#include \"mem_file.h\"\n#include \"restricted_ptr.h\"\n#include \"utils.h\"\n\n#include <capstone/capstone.h>\n\n#include <errno.h>\n\n#define EXTEND_ZONE_NUM 1\n#define ZONE_SIZE PAGE_SIZE\n#define GUARD_SIZE 8\n\n// it seems DEBUG version has much bigger fork_server and loader\n#ifdef DEBUG\n#define LOADER_ZONE_SIZE (ZONE_SIZE * 3)\n#else\n#define LOADER_ZONE_SIZE (ZONE_SIZE * 2)\n#endif\n\n#define TRAMPOLINES_INIT_SIZE (ZONE_SIZE * 0x100)\n#define RETADDR_MAPPING_INIT_SIZE ZONE_SIZE\n\n/*\n * Define special getter and setter for ELF\n */\n// XXX: such elements all locate on the main stream\n#define ELF_DEFINE_SETTER(OTYPE, ONAME, FTYPE, FNAME)                        \\\n    Z_API void z_##ONAME##_##set_##FNAME(OTYPE *ONAME, FTYPE FNAME) {        \\\n        assert(ONAME != NULL);                                               \\\n        if (FNAME == NULL)                                                   \\\n            ONAME->FNAME##_off = SIZE_MAX;                                   \\\n        else {                                                               \\\n            ONAME->FNAME##_off =                                             \\\n                ((uint8_t *)FNAME) - z_mem_file_get_raw_buf(ONAME->stream);  \\\n            assert(ONAME->FNAME##_off < z_mem_file_get_size(ONAME->stream)); \\\n        }                                                                    \\\n    }\n\n#define ELF_DEFINE_GETTER(OTYPE, ONAME, FTYPE, FNAME)              \\\n    Z_API FTYPE z_##ONAME##_##get_##FNAME(OTYPE *ONAME) {          \\\n        assert(ONAME != NULL);                                     \\\n        if (ONAME->FNAME##_off == SIZE_MAX)                        \\\n            return NULL;                                           \\\n        else                                                       \\\n            return (FTYPE)(z_mem_file_get_raw_buf(ONAME->stream) + \\\n                           ONAME->FNAME##_off);                    \\\n    }\n\n/*\n * Private structure for vmapping\n */\nSTRUCT(FChunk, {\n    _MEM_FILE *stream;\n    size_t offset;\n    size_t size;\n    bool extendable;\n});\n\nDEFINE_GETTER(FChunk, fchunk, _MEM_FILE *, stream);\nDEFINE_GETTER(FChunk, fchunk, bool, extendable);\nDEFINE_GETTER(FChunk, fchunk, size_t, offset);\nDEFINE_GETTER(FChunk, fchunk, size_t, size);\nDEFINE_SETTER(FChunk, fchunk, size_t, size);\n\nZ_PRIVATE FChunk *z_fchunk_create(_MEM_FILE *stream, size_t offset, size_t size,\n                                  bool extendable) {\n    FChunk *fc = STRUCT_ALLOC(FChunk);\n    fc->stream = stream;\n    fc->offset = offset;\n    fc->size = size;\n    fc->extendable = extendable;\n    return fc;\n}\n\nZ_PRIVATE void z_fchunk_destroy(FChunk *fc) { z_free(fc); }\n\n/*\n * Find Elf64_Dyn by tag name\n */\nZ_PRIVATE Elf64_Dyn *__elf_find_dyn_by_tag(ELF *e, Elf64_Xword tag);\n\n/*\n * Fine Segment by virtual addr\n */\nZ_PRIVATE Snode *__elf_find_segment_by_vaddr(ELF *e, addr_t vaddr);\n\n/*\n * Open a file (ori_filename) and load data into _MEM_FILE\n */\nZ_PRIVATE _MEM_FILE *__elf_open_file(ELF *e, const char *ori_filename);\n\n/*\n * Valid the header of given ELF\n */\nZ_PRIVATE void __elf_validate_header(_MEM_FILE *stream);\n\n/*\n * Parse the program header\n */\nZ_PRIVATE void __elf_parse_phdr(ELF *e);\n\n/*\n * Parse the section header\n */\nZ_PRIVATE void __elf_parse_shdr(ELF *e);\n\n/*\n * Get relocation information\n */\nZ_PRIVATE void __elf_parse_relocation(ELF *e);\n\n/*\n * Detect and parse main function\n */\nZ_PRIVATE void __elf_parse_main(ELF *e);\n\n/*\n * Set relocation-preset for given ELF\n */\nZ_PRIVATE void __elf_set_relro(ELF *e);\n\n/*\n * Set virtual mapping for given ELF\n */\n// Note that after this function, the main stream will be splitted into two\n// pieces\nZ_PRIVATE void __elf_set_virtual_mapping(ELF *e, const char *filename);\n\n/*\n * Rewrite PT_NOTE\n */\nZ_PRIVATE void __elf_rewrite_pt_note(ELF *e);\n\n/*\n * Extend additional zones onto ELF\n */\nZ_PRIVATE void __elf_extend_zones(ELF *e);\n\n/*\n * Setup lookup table\n */\nZ_PRIVATE void __elf_setup_lookup_table(ELF *e, const char *filename);\n\n/*\n * Setup retaddr mapping\n */\nZ_PRIVATE void __elf_setup_retaddr_mapping(ELF *e, const char *filename);\n\n/*\n * Setup trampolines (shadow code)\n */\nZ_PRIVATE void __elf_setup_trampolines(ELF *e, const char *filename);\n\n/*\n * Setup shared .text section\n */\nZ_PRIVATE void __elf_setup_shared_text(ELF *e, const char *filename);\n\n/*\n * Setup pipeline file\n */\nZ_PRIVATE void __elf_setup_pipe(ELF *e, const char *filename);\n\n// TODO: raw pointer might lead to overflow, but we need effecience.\n// In the furture, we need a better trade-off.\n// Currently, we have checked the access will not be out of boundary in advance.\n// Make sure all your raw-pointer access is valid.\n/*\n * Get pointer from offset\n */\nZ_PRIVATE void *__elf_stream_off2ptr(_MEM_FILE *stream, size_t off);\n\n/*\n * Get offset from virtual address.\n * (the caller must know the addr is on which stream)\n */\nZ_PRIVATE size_t __elf_stream_vaddr2off(ELF *e, addr_t addr);\n\n/*\n * Setter and Getter\n */\nELF_DEFINE_SETTER(ELF, elf, Elf64_Ehdr *, ehdr);\nELF_DEFINE_SETTER(ELF, elf, Elf64_Phdr *, phdr_note);\nELF_DEFINE_SETTER(ELF, elf, Elf64_Phdr *, phdr_dynamic);\nELF_DEFINE_SETTER(ELF, elf, Elf64_Shdr *, shdr_shstrtab);\nELF_DEFINE_SETTER(ELF, elf, Elf64_Shdr *, shdr_text);\nELF_DEFINE_SETTER(ELF, elf, Elf64_Shdr *, shdr_init);\nELF_DEFINE_SETTER(ELF, elf, Elf64_Shdr *, shdr_fini);\nELF_DEFINE_SETTER(ELF, elf, Elf64_Shdr *, shdr_init_array);\nELF_DEFINE_SETTER(ELF, elf, Elf64_Shdr *, shdr_fini_array);\nELF_DEFINE_SETTER(ELF, elf, Elf64_Shdr *, shdr_plt);\nELF_DEFINE_SETTER(ELF, elf, Elf64_Shdr *, shdr_plt_got);\nELF_DEFINE_SETTER(ELF, elf, Elf64_Shdr *, shdr_plt_sec);\n\n#define __WAIT_STREAM_COUNT 5\n#define __WAIT_STREAM_INTERVAL 1\n#define __WAIT_STREAM(fname)                                                  \\\n    do {                                                                      \\\n        size_t __n = __WAIT_STREAM_COUNT;                                     \\\n        while ((__n--) && (access((fname), W_OK) == -1) &&                    \\\n               (errno == ETXTBSY)) {                                          \\\n            z_warn(\"underlying binary (%s) is busy, wait for % sec\", (fname), \\\n                   __WAIT_STREAM_INTERVAL);                                   \\\n            sleep(__WAIT_STREAM_INTERVAL);                                    \\\n        }                                                                     \\\n    } while (0)\nOVERLOAD_SETTER(ELF, elf, ELFState, state) {\n    if (state & ELFSTATE_DISABLE) {\n        // if is used to disable associated states\n        state = state ^ ELFSTATE_DISABLE;\n        if (state & ELFSTATE_CONNECTED) {\n            __WAIT_STREAM(elf->tmpnam);\n            z_mem_file_suspend(elf->stream);\n        }\n        elf->state &= (state ^ ELFSTATE_MASK);\n    } else {\n        if (state & ELFSTATE_CONNECTED) {\n            __WAIT_STREAM(elf->tmpnam);\n            z_mem_file_resume(elf->stream);\n        }\n        elf->state |= state;\n    }\n}\n#undef __WAIT_STREAM_COUNT\n#undef __WAIT_STREAM_INTERVAL\n#undef __WAIT_STREAM\n\nELF_DEFINE_GETTER(ELF, elf, Elf64_Ehdr *, ehdr);\nELF_DEFINE_GETTER(ELF, elf, Elf64_Phdr *, phdr_note);\nELF_DEFINE_GETTER(ELF, elf, Elf64_Phdr *, phdr_dynamic);\nELF_DEFINE_GETTER(ELF, elf, Elf64_Shdr *, shdr_shstrtab);\nELF_DEFINE_GETTER(ELF, elf, Elf64_Shdr *, shdr_text);\nELF_DEFINE_GETTER(ELF, elf, Elf64_Shdr *, shdr_init);\nELF_DEFINE_GETTER(ELF, elf, Elf64_Shdr *, shdr_fini);\nELF_DEFINE_GETTER(ELF, elf, Elf64_Shdr *, shdr_init_array);\nELF_DEFINE_GETTER(ELF, elf, Elf64_Shdr *, shdr_fini_array);\nELF_DEFINE_GETTER(ELF, elf, Elf64_Shdr *, shdr_plt);\nELF_DEFINE_GETTER(ELF, elf, Elf64_Shdr *, shdr_plt_got);\nELF_DEFINE_GETTER(ELF, elf, Elf64_Shdr *, shdr_plt_sec);\nDEFINE_GETTER(ELF, elf, addr_t, loader_addr);\nDEFINE_GETTER(ELF, elf, addr_t, trampolines_addr);\nDEFINE_GETTER(ELF, elf, addr_t, lookup_table_addr);\nDEFINE_GETTER(ELF, elf, addr_t, shared_text_addr);\nDEFINE_GETTER(ELF, elf, addr_t, retaddr_mapping_addr);\nDEFINE_GETTER(ELF, elf, bool, is_pie);\nDEFINE_GETTER(ELF, elf, addr_t, ori_entry);\nDEFINE_GETTER(ELF, elf, const char *, lookup_tabname);\nDEFINE_GETTER(ELF, elf, const char *, trampolines_name);\nDEFINE_GETTER(ELF, elf, const char *, shared_text_name);\nDEFINE_GETTER(ELF, elf, const char *, pipe_filename);\nDEFINE_GETTER(ELF, elf, const char *, retaddr_mapping_name);\n\nOVERLOAD_GETTER(ELF, elf, size_t, plt_n) { return g_hash_table_size(elf->plt); }\n\nOVERLOAD_GETTER(ELF, elf, addr_t, main) {\n    if (!elf->detect_main) {\n        EXITME(\"the main function has not been automatically detected\");\n    }\n\n    return elf->main;\n}\n\nOVERLOAD_GETTER(ELF, elf, addr_t, init) {\n    if (!elf->detect_main) {\n        EXITME(\"the main function has not been automatically detected\");\n    }\n\n    return elf->init;\n}\n\nOVERLOAD_GETTER(ELF, elf, addr_t, fini) {\n    if (!elf->detect_main) {\n        EXITME(\"the main function has not been automatically detected\");\n    }\n\n    return elf->fini;\n}\n\nOVERLOAD_GETTER(ELF, elf, addr_t, load_main) {\n    if (!elf->detect_main) {\n        EXITME(\"the main function has not been automatically detected\");\n    }\n\n    return elf->load_main;\n}\n\nOVERLOAD_GETTER(ELF, elf, addr_t, load_init) {\n    if (!elf->detect_main) {\n        EXITME(\"the main function has not been automatically detected\");\n    }\n\n    return elf->load_init;\n}\n\nOVERLOAD_GETTER(ELF, elf, addr_t, load_fini) {\n    if (!elf->detect_main) {\n        EXITME(\"the main function has not been automatically detected\");\n    }\n\n    return elf->load_fini;\n}\n\nZ_PRIVATE size_t __elf_stream_vaddr2off(ELF *e, addr_t addr) {\n    // Get corresponding segment\n    Snode *segment = __elf_find_segment_by_vaddr(e, addr);\n    if (segment == NULL) {\n        EXITME(\"invalid virtual address [%#lx]\", addr);\n    }\n\n    // Create Rptr\n    FChunk *fc = (FChunk *)z_snode_get_data(segment);\n    if (fc == NULL || z_strcmp(STRUCT_TYPE(fc), \"FChunk\")) {\n        EXITME(\"get address into dynamically allocated space\");\n    }\n    size_t off1 = addr - z_snode_get_lower_bound(segment);\n    size_t off2 = z_fchunk_get_offset(fc);\n    if (off1 >= z_fchunk_get_size(fc)) {\n        EXITME(\"trying to read on zero-padding region\");\n    }\n\n    return off1 + off2;\n}\n\nZ_PRIVATE _MEM_FILE *__elf_open_file(ELF *e, const char *ori_filename) {\n    Buffer *buf = z_buffer_read_file(ori_filename);\n\n    const char *buf_raw_buf = (const char *)z_buffer_get_raw_buf(buf);\n    size_t buf_size = z_buffer_get_size(buf);\n\n    // magic check for re-patch\n    if (memmem(buf_raw_buf, buf_size, MAGIC_STRING, z_strlen(MAGIC_STRING))) {\n        EXITME(\"try to re-instrument file \\\"%s\\\"\", ori_filename);\n    }\n\n    _MEM_FILE *stream = z_mem_file_fopen((const char *)e->tmpnam, \"w+\");\n    z_mem_file_fwrite((char *)buf_raw_buf, buf_size, sizeof(uint8_t), stream);\n\n    // generate backup file\n    const char *bak_filename = z_strcat(ori_filename, BACKUP_FILE_SUFFIX);\n    z_buffer_write_file(buf, bak_filename);\n    z_free((char *)bak_filename);\n    z_buffer_destroy(buf);\n    return stream;\n}\n\nZ_PRIVATE void *__elf_stream_off2ptr(_MEM_FILE *stream, size_t off) {\n    assert(stream != NULL);\n\n    if (z_mem_file_get_size(stream) <= off) {\n        EXITME(\"invalid offset(%ld) from stream(%ld): %s\", off,\n               z_mem_file_get_size(stream), z_mem_file_get_filename(stream));\n    }\n\n    return (void *)(z_mem_file_get_raw_buf(stream) + off);\n}\n\nZ_PRIVATE void __elf_rewrite_pt_note(ELF *e) {\n    // XXX: note that rewriter_pt_note should be applied on the main stream.\n    assert(e != NULL);\n\n    Elf64_Phdr *phdr = z_elf_get_phdr_note(e);\n    phdr->p_type = PT_LOAD;\n    phdr->p_flags = PF_X | PF_R;\n    // XXX: e->loader_addr cannot be on the shared .text stream\n    phdr->p_offset = __elf_stream_vaddr2off(e, e->loader_addr);\n    phdr->p_vaddr = (Elf64_Addr)e->loader_addr;\n    phdr->p_paddr = (Elf64_Addr)NULL;\n    phdr->p_filesz = LOADER_ZONE_SIZE;\n    phdr->p_memsz = LOADER_ZONE_SIZE;\n    phdr->p_align = PAGE_SIZE;\n}\n\nZ_PRIVATE void __elf_setup_pipe(ELF *e, const char *filename) {\n    assert(e != NULL);\n\n    assert(!z_strchr(filename, '/'));\n    e->pipe_filename = z_strcat(PIPE_FILENAME_PREFIX, filename);\n\n    return;\n}\n\nZ_PRIVATE void __elf_setup_retaddr_mapping(ELF *e, const char *filename) {\n    assert(e != NULL);\n\n    // step (0). update retaddr_mapping_addr\n    e->retaddr_mapping_addr = RETADDR_MAPPING_ADDR;\n\n    // step (1). get filename\n    assert(!z_strchr(filename, '/'));\n    e->retaddr_mapping_name = z_strcat(RETADDR_MAPPING_PREFIX, filename);\n\n    // step (2). create _MEM_FILE\n    e->retaddr_mapping_stream =\n        z_mem_file_fopen((const char *)e->retaddr_mapping_name, \"w+\");\n    z_mem_file_pwrite(e->retaddr_mapping_stream, \"\", 1,\n                      RETADDR_MAPPING_INIT_SIZE - 1);\n\n    // step (3). insert into virtual mapping\n    Snode *node = NULL;\n    FChunk *fc = z_fchunk_create(e->retaddr_mapping_stream, 0,\n                                 RETADDR_MAPPING_INIT_SIZE, true);\n    node = z_snode_create(e->retaddr_mapping_addr, RETADDR_MAPPING_INIT_SIZE,\n                          (void *)fc, (void (*)(void *))(&z_fchunk_destroy));\n    if (!z_splay_insert(e->vmapping, node)) {\n        EXITME(\"overlapped retaddr mapping\");\n    }\n\n    // step (4). update mmapped informaiton\n    node = z_snode_create(e->retaddr_mapping_addr, RETADDR_MAPPING_INIT_SIZE,\n                          NULL, NULL);\n    if (!z_splay_insert(e->mmapped_pages, node)) {\n        EXITME(\"overlapped retaddr mapping\");\n    }\n}\n\nZ_PRIVATE void __elf_setup_lookup_table(ELF *e, const char *filename) {\n    assert(e != NULL);\n\n    // step (1). get address\n    e->lookup_table_addr = LOOKUP_TABLE_ADDR;\n\n    // step (2). get filename\n    assert(!z_strchr(filename, '/'));\n    e->lookup_tabname = z_strcat(LOOKUP_TABNAME_PREFIX, filename);\n\n    // step (3). create _MEM_FILE\n    e->lookup_table_stream =\n        z_mem_file_fopen((const char *)e->lookup_tabname, \"w+\");\n    z_mem_file_fix_size(e->lookup_table_stream, LOOKUP_TABLE_SIZE);\n    z_mem_file_pwrite(e->lookup_table_stream, \"\", 1, LOOKUP_TABLE_SIZE - 1);\n\n    // step (4). fill in pre-defined values\n    Elf64_Shdr *text = z_elf_get_shdr_text(e);\n    addr_t text_addr = text->sh_addr;\n    size_t text_size = text->sh_size;\n    addr_t cur_addr = text_addr;\n    int64_t cell_val = -1;\n    for (size_t i = 0; i < LOOKUP_TABLE_CELL_NUM; i++) {\n        cell_val = -1;\n        if (cur_addr < text_addr + text_size) {\n            // For valid address, we initial it as its original value's opposite\n            // value\n            cell_val = -((int64_t)cur_addr);\n        }\n        cell_val &= LOOKUP_TABLE_CELL_MASK;\n        z_mem_file_fwrite((uint8_t *)(&cell_val), sizeof(uint8_t),\n                          LOOKUP_TABLE_CELL_SIZE, e->lookup_table_stream);\n        cur_addr += 1;\n    }\n    assert(cell_val == (-1 & LOOKUP_TABLE_CELL_MASK));\n\n    // step (5). insert into virtual mapping\n    Snode *node = NULL;\n    FChunk *fc =\n        z_fchunk_create(e->lookup_table_stream, 0, LOOKUP_TABLE_SIZE, false);\n    node = z_snode_create(e->lookup_table_addr, LOOKUP_TABLE_SIZE, (void *)fc,\n                          (void (*)(void *))(&z_fchunk_destroy));\n    if (!z_splay_insert(e->vmapping, node)) {\n        EXITME(\"overlapped lookup table\");\n    }\n\n    // step (6). update mmapped informaiton\n    node = z_snode_create(e->lookup_table_addr, LOOKUP_TABLE_SIZE, NULL, NULL);\n    if (!z_splay_insert(e->mmapped_pages, node)) {\n        EXITME(\"overlapped lookup table\");\n    }\n}\n\nZ_PRIVATE void __elf_setup_shared_text(ELF *e, const char *filename) {\n    assert(e != NULL);\n\n    // step (0). get .text information\n    Elf64_Shdr *text = z_elf_get_shdr_text(e);\n    addr_t text_addr = text->sh_addr;\n    size_t text_size = text->sh_size;\n    size_t text_offset = text->sh_offset;\n\n    addr_t aligned_addr = BITS_ALIGN_FLOOR(text_addr, PAGE_SIZE_POW2);\n    size_t aligned_offset = BITS_ALIGN_FLOOR(text_offset, PAGE_SIZE_POW2);\n    size_t aligned_size = BITS_ALIGN_CELL(\n        text_size + text_offset - aligned_offset, PAGE_SIZE_POW2);\n\n    e->shared_text_addr = aligned_addr;\n\n    // step (1). get filename\n    assert(!z_strchr(filename, '/'));\n    e->shared_text_name = z_strcat(SHARED_TEXT_PREFIX, filename);\n\n    // step (2). create _MEM_FILE\n    e->shared_text_stream =\n        z_mem_file_fopen((const char *)e->shared_text_name, \"w+\");\n    z_mem_file_fix_size(e->shared_text_stream, aligned_size);\n    z_mem_file_pwrite(e->shared_text_stream, \"\", 1, aligned_size - 1);\n\n    // step (3). update data to _MEM_FILE\n    // XXX: note that e->stream is alreay page-aligned, which means the\n    // following memcpy is safe.\n    uint8_t *base = z_mem_file_get_raw_buf(e->stream);\n    uint8_t *src = base + aligned_offset;\n    uint8_t *dst = z_mem_file_get_raw_buf(e->shared_text_stream);\n    memcpy(dst, src, aligned_size);\n\n    // step (4). generate virtual mapping information\n    FChunk *fc = z_fchunk_create(e->shared_text_stream, 0, aligned_size, false);\n    Snode *node = z_snode_create(aligned_addr, aligned_size, (void *)fc,\n                                 (void (*)(void *))(&z_fchunk_destroy));\n\n    // step (5). insert into virtual mapping\n    if (!z_splay_insert(e->vmapping, node)) {\n        EXITME(\"overlapped shared .text section\");\n    }\n\n    // XXX: mapped_pages will be updated in __elf_set_virtual_mapping\n}\n\nZ_PRIVATE void __elf_setup_trampolines(ELF *e, const char *filename) {\n    assert(e != NULL);\n\n    // step (0). update trampolines_addr\n    e->trampolines_addr = SHADOW_CODE_ADDR;\n\n    // step (1). get filename\n    assert(!z_strchr(filename, '/'));\n    e->trampolines_name = z_strcat(TRAMPOLINES_NAME_PREFIX, filename);\n\n    // step (2). create _MEM_FILE\n    e->trampolines_stream =\n        z_mem_file_fopen((const char *)e->trampolines_name, \"w+\");\n    z_mem_file_pwrite(e->trampolines_stream, \"\", 1, TRAMPOLINES_INIT_SIZE - 1);\n\n    // step (3). insert into virtual mapping\n    Snode *node = NULL;\n    FChunk *fc =\n        z_fchunk_create(e->trampolines_stream, 0, TRAMPOLINES_INIT_SIZE, true);\n    node = z_snode_create(e->trampolines_addr, TRAMPOLINES_INIT_SIZE,\n                          (void *)fc, (void (*)(void *))(&z_fchunk_destroy));\n    if (!z_splay_insert(e->vmapping, node)) {\n        EXITME(\"overlapped trampolines\");\n    }\n\n    // step (4). update mmapped informaiton\n    node =\n        z_snode_create(e->trampolines_addr, TRAMPOLINES_INIT_SIZE, NULL, NULL);\n    if (!z_splay_insert(e->mmapped_pages, node)) {\n        EXITME(\"overlapped trampolines\");\n    }\n}\n\nZ_PRIVATE void __elf_extend_zones(ELF *e) {\n    assert(e != NULL);\n\n    /*\n     * A trick here to splite amongs new zones is to insert an eight-byte gap at\n     * the end of each zone.\n     */\n\n    Snode *node;\n    addr_t vaddr = BITS_ALIGN_CELL(e->max_addr, PAGE_SIZE_POW2);\n    size_t offset = z_mem_file_get_size(e->stream);\n    assert(offset % PAGE_SIZE == 0);\n\n    size_t *zones[EXTEND_ZONE_NUM] = {&e->loader_addr};\n    addr_t zones_addr[EXTEND_ZONE_NUM] = {vaddr};\n    size_t zones_size[EXTEND_ZONE_NUM] = {LOADER_ZONE_SIZE};\n    size_t zones_guard[EXTEND_ZONE_NUM] = {GUARD_SIZE};\n\n    // Set zones\n    for (size_t i = 0; i < EXTEND_ZONE_NUM; i++) {\n        size_t zone_size = zones_size[i];\n        size_t zone_guard = zones_guard[i];\n\n        vaddr = zones_addr[i];\n        *zones[i] = vaddr;\n\n        FChunk *fc =\n            z_fchunk_create(e->stream, offset, zone_size - zone_guard, false);\n        node = z_snode_create(vaddr, zone_size - zone_guard, (void *)fc,\n                              (void (*)(void *))(&z_fchunk_destroy));\n\n        if (!z_splay_insert(e->vmapping, node)) {\n            EXITME(\"overlapped zones\");\n        }\n        z_info(\"zone base at %#lx with offset %#lx\", vaddr, offset);\n\n        assert(vaddr % PAGE_SIZE == 0);\n        assert(zone_size % PAGE_SIZE == 0);\n        node = z_snode_create(vaddr, zone_size, NULL, NULL);\n        if (!z_splay_insert(e->mmapped_pages, node)) {\n            EXITME(\"overlapped zones\");\n        }\n\n        offset += zone_size;\n    }\n\n    // Extend file\n    z_mem_file_pwrite(e->stream, \"\", 1, offset - 1);\n}\n\nZ_PRIVATE Snode *__elf_find_segment_by_vaddr(ELF *e, addr_t vaddr) {\n    Snode *segment = z_splay_search(e->vmapping, vaddr);\n    if (segment == NULL) {\n        return NULL;\n    }\n\n    assert(vaddr >= z_snode_get_lower_bound(segment));\n    assert(vaddr <= z_snode_get_upper_bound(segment));\n\n    return segment;\n}\n\nZ_PRIVATE Elf64_Dyn *__elf_find_dyn_by_tag(ELF *e, Elf64_Xword tag) {\n    Elf64_Phdr *dynamic_phdr = z_elf_get_phdr_dynamic(e);\n    if (z_unlikely(!dynamic_phdr)) {\n        EXITME(\"dynamic segment not found\");\n    }\n\n    // get the first dyn\n    // XXX: note that it is safe to use __elf_stream_off2ptr\n    Elf64_Dyn *dyn =\n        (Elf64_Dyn *)__elf_stream_off2ptr(e->stream, dynamic_phdr->p_offset);\n\n    while (dyn->d_tag != DT_NULL) {\n        if (dyn->d_tag == tag) {\n            return dyn;\n        }\n        dyn++;\n    }\n\n    return (tag == DT_NULL ? dyn : NULL);\n}\n\nZ_RESERVED Z_PRIVATE void __elf_set_relro(ELF *e) {\n    assert(e != NULL);\n\n    Elf64_Phdr *dynamic_phdr = z_elf_get_phdr_dynamic(e);\n\n    if (dynamic_phdr != NULL) {\n        bool is_relro = false;\n        Elf64_Dyn *dt_debug = NULL;\n        Elf64_Dyn *iter = (Elf64_Dyn *)__elf_stream_off2ptr(\n            e->stream, dynamic_phdr->p_offset);\n\n        while (iter->d_tag != DT_NULL) {\n            z_trace(\n                \"find dynamic section with d_tag: %#lx =? %#lx, and d_un \"\n                \"%p\",\n                iter->d_tag, DT_BIND_NOW, iter->d_un);\n\n            if (iter->d_tag == DT_DEBUG)\n                dt_debug = iter;\n            if (iter->d_tag == DT_BIND_NOW) {\n                is_relro = true;\n                break;\n            }\n            if (iter->d_tag == DT_FLAGS &&\n                (iter->d_un.d_val & DF_BIND_NOW) != 0) {\n                is_relro = true;\n                break;\n            }\n\n            iter++;\n        }\n\n        if (is_relro) {\n            z_info(\"binary is already RELRO\");\n        } else {\n            if (dt_debug) {\n                z_info(\n                    \"binary is not RELRO. Hence, we patch it into DT_DEBUG \"\n                    \"entry.\");\n                dt_debug->d_tag = DT_FLAGS;\n                dt_debug->d_un.d_val = DF_BIND_NOW;\n            } else {\n                z_warn(\n                    \"binary is not RELRO and has no DT_DEBUG entry. Hence, \"\n                    \"we failed to patch it\");\n            }\n        }\n    } else {\n        z_info(\"statically linked binary\");\n    }\n}\n\n#define __NUMBER_OF_GOTS 2\n#define __NUMBER_OF_PLTS 3\n\n// TODO: make sure PIE binaries would not cause any trouble\n// TODO: if any section is missed, directly return errors instead of EXITME\nZ_PRIVATE void __elf_parse_relocation(ELF *e) {\n    // XXX: we use z_elf_read_all to avoid inter-stream data\n\n    // step (0). init related field of ELF and return if statically-linked\n    e->got = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL);\n    e->plt = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL);\n\n    if (!z_elf_get_phdr_dynamic(e)) {\n        z_info(\"statically-linked binary does not have relocation information\");\n        return;\n    }\n\n    /*\n     * step (1). collect necessary information\n     */\n    const Elf64_Dyn *dyn = NULL;\n    const Elf64_Sym *dynsym = NULL;\n    const char *dynstr = NULL;\n    size_t dynstr_size = 0;\n    const Elf64_Rela *rela_plt = NULL;\n    size_t rela_plt_cnt = 0;\n    const Elf64_Rela *rela_dyn = NULL;\n    size_t rela_dyn_cnt = 0;\n\n    // .dynstr size\n    dyn = __elf_find_dyn_by_tag(e, DT_STRSZ);\n    if (!dyn) {\n        EXITME(\"fail to find DT_STRSZ\");\n    }\n    dynstr_size = dyn->d_un.d_val;\n\n    // .dynstr section\n    dyn = __elf_find_dyn_by_tag(e, DT_STRTAB);\n    if (!dyn) {\n        EXITME(\"fail to find DT_STRTAB\");\n    }\n    dynstr = z_alloc(dynstr_size + 1, sizeof(char));\n    if (z_elf_read_all(e, dyn->d_un.d_ptr, dynstr_size, (void *)dynstr) !=\n        dynstr_size) {\n        EXITME(\"invalid synstr_size\");\n    }\n\n    // .rela.plt section\n    dyn = __elf_find_dyn_by_tag(e, DT_JMPREL);\n    if (dyn) {\n        addr_t rela_plt_addr = dyn->d_un.d_ptr;\n\n        dyn = __elf_find_dyn_by_tag(e, DT_PLTRELSZ);\n        if (!dyn) {\n            EXITME(\"fail to find DT_PLTRELSZ when DT_JMPREL is found\");\n        }\n        rela_plt_cnt = dyn->d_un.d_val / sizeof(Elf64_Rela);\n\n        rela_plt = z_alloc(rela_plt_cnt, sizeof(Elf64_Rela));\n        if (z_elf_read_all(e, rela_plt_addr, dyn->d_un.d_val,\n                           (void *)rela_plt) != dyn->d_un.d_val) {\n            EXITME(\"invalid size of .rela.plt\");\n        }\n\n        if (!z_elf_get_shdr_plt(e)) {\n            EXITME(\"fail to find .plt section when DT_JMPREL is found\");\n        }\n    }\n\n    // .rela.dyn section\n    dyn = __elf_find_dyn_by_tag(e, DT_RELA);\n    if (dyn) {\n        addr_t rela_dyn_addr = dyn->d_un.d_ptr;\n\n        size_t total_size = 0, elem_size = 0;\n\n        dyn = __elf_find_dyn_by_tag(e, DT_RELASZ);\n        if (!dyn) {\n            EXITME(\"fail to find DT_RELASZ when DT_RELA is found\");\n        }\n        total_size = dyn->d_un.d_val;\n\n        dyn = __elf_find_dyn_by_tag(e, DT_RELAENT);\n        if (!dyn) {\n            EXITME(\"fail to find DT_RELAENT when DT_RELA is found\");\n        }\n        elem_size = dyn->d_un.d_val;\n\n        rela_dyn_cnt = total_size / elem_size;\n\n        rela_dyn = z_alloc(rela_dyn_cnt, elem_size);\n        if (z_elf_read_all(e, rela_dyn_addr, total_size, (void *)rela_dyn) !=\n            total_size) {\n            EXITME(\"invalid size of .rela.dyn\");\n        }\n    }\n\n    // check .rela.plt and .rela.dyn\n    if (!rela_plt && !rela_dyn) {\n        EXITME(\"fail to find neither DT_JMPREL nor DT_RELA\");\n    }\n\n    const Elf64_Rela *gots[__NUMBER_OF_GOTS] = {rela_plt, rela_dyn};\n    const size_t gots_cnt[__NUMBER_OF_GOTS] = {rela_plt_cnt, rela_dyn_cnt};\n    const int gots_type[__NUMBER_OF_GOTS] = {R_X86_64_JUMP_SLOT,\n                                             R_X86_64_GLOB_DAT};\n    const char *gots_str[__NUMBER_OF_GOTS] = {\".rela.plt\", \".rela.dyn\"};\n\n    // let first quickly go though how many symbols we need\n    size_t max_idx = 0;\n    for (size_t k = 0; k < __NUMBER_OF_GOTS; k++) {\n        const Elf64_Rela *got = gots[k];\n        const size_t cnt = gots_cnt[k];\n        const int type = gots_type[k];\n\n        for (size_t i = 0; i < cnt; i++, got++) {\n            if (ELF64_R_TYPE(got->r_info) == type) {\n                size_t idx = ELF64_R_SYM(got->r_info);\n                if (idx > max_idx) {\n                    max_idx = idx;\n                }\n            }\n        }\n    }\n    z_info(\"require %d symbols\", max_idx + 1);\n\n    // check sizeof(Elf64_Sym)\n    dyn = __elf_find_dyn_by_tag(e, DT_SYMENT);\n    if (!dyn) {\n        EXITME(\"fail to find DT_SYMTAB\");\n    }\n    if (dyn->d_un.d_val != sizeof(Elf64_Sym)) {\n        EXITME(\"inconsistent size of Elf64_Sym: %#lx v/s %#lx\", dyn->d_un.d_val,\n               sizeof(Elf64_Sym));\n    }\n\n    // .dynsym section\n    dyn = __elf_find_dyn_by_tag(e, DT_SYMTAB);\n    if (!dyn) {\n        EXITME(\"fail to find DT_SYMTAB\");\n    }\n    dynsym = z_alloc(max_idx + 1, sizeof(Elf64_Sym));\n    if (z_elf_read_all(e, dyn->d_un.d_ptr, sizeof(Elf64_Sym) * (max_idx + 1),\n                       (void *)dynsym) != sizeof(Elf64_Sym) * (max_idx + 1)) {\n        EXITME(\"symtab does not hold enough symbols\");\n    }\n\n    /*\n     * step (2). collect GOT information\n     */\n    for (size_t k = 0; k < __NUMBER_OF_GOTS; k++) {\n        const Elf64_Rela *got = gots[k];\n        const size_t cnt = gots_cnt[k];\n        const int type = gots_type[k];\n        const char *str = gots_str[k];\n\n        for (size_t i = 0; i < cnt; i++, got++) {\n            if (ELF64_R_TYPE(got->r_info) == type) {\n                // get function name\n                size_t idx = ELF64_R_SYM(got->r_info);\n                idx = dynsym[idx].st_name;\n                if (idx >= dynstr_size) {\n                    EXITME(\"too big section header string table index: %#lx\",\n                           idx);\n                }\n                const char *func_name = dynstr + idx;\n\n                // get function address\n                const addr_t func_addr = (addr_t)(got->r_offset);\n\n                const LFuncInfo *func_info = LB_QUERY(func_name);\n                z_info(\"function GOT [%s]: %s @ %#lx | %s | %s \", str,\n                       func_name, func_addr,\n                       (func_info->cfg_info == LCFG_UNK\n                            ? COLOR(YELLOW, \"unknown\")\n                            : (func_info->cfg_info == LCFG_OBJ\n                                   ? \"object\"\n                                   : (func_info->cfg_info == LCFG_RET\n                                          ? COLOR(GREEN, \"returnable\")\n                                          : COLOR(RED, \"terminated\")))),\n                       (func_info->ra_info == LRA_UNK\n                            ? COLOR(YELLOW, \"unknown\")\n                            : (func_info->ra_info == LRA_OBJ\n                                   ? \"object\"\n                                   : (func_info->ra_info == LRA_USED\n                                          ? COLOR(RED, \"used\")\n                                          : COLOR(GREEN, \"unused\")))));\n\n                g_hash_table_insert(e->got, GSIZE_TO_POINTER(func_addr),\n                                    (gpointer)func_info);\n            }\n        }\n    }\n\n    /*\n     * step (3). collect PLT information\n     */\n    // we check .plt and .plt.got sections by check the instruction\n    Elf64_Shdr *plts[__NUMBER_OF_PLTS] = {z_elf_get_shdr_plt(e),\n                                          z_elf_get_shdr_plt_got(e),\n                                          z_elf_get_shdr_plt_sec(e)};\n\n    for (size_t k = 0; k < __NUMBER_OF_PLTS; k++) {\n        Elf64_Shdr *plt = plts[k];\n        if (!plt) {\n            continue;\n        }\n\n        addr_t plt_addr = plt->sh_addr;\n        size_t plt_size = plt->sh_size;\n        size_t plt_entsize = plt->sh_entsize;\n        if (!plt_addr || !plt_size) {\n            EXITME(\"invalid .plt section\");\n        }\n        if (!plt_entsize) {\n            plt_entsize = plt_size;\n        }\n\n        size_t off = 0;\n        uint8_t *ptr = z_alloc(plt_size, sizeof(uint8_t));\n        if (z_elf_read_all(e, plt_addr, plt_size, ptr) != plt_size) {\n            EXITME(\"fail to load data form PLT\");\n        }\n\n        // TODO: the first element in .plt is reserved for resloving, remove it.\n        while (off < plt_size) {\n            const LFuncInfo *func_info = LB_DEFAULT();\n\n            CS_DISASM_RAW(ptr + off, plt_size - off, plt_addr + off, 1);\n\n            if (cs_inst->id == X86_INS_ENDBR64 &&\n                off + cs_inst->size < plt_size) {\n                // XXX: handle intel CET tech. Note that we may need to\n                // carefully design our system about how to handle CET/IBT.\n                size_t endbr64_size = cs_inst->size;\n                CS_DISASM_RAW(ptr + off + endbr64_size,\n                              plt_size - off - endbr64_size,\n                              plt_addr + off + endbr64_size, 1);\n            }\n\n            addr_t got_addr = INVALID_ADDR;\n            if (cs_count == 1 &&\n                z_capstone_is_pc_related_ujmp(cs_inst, &got_addr)) {\n                assert(got_addr != INVALID_ADDR);\n\n                const LFuncInfo *got_info =\n                    (const LFuncInfo *)g_hash_table_lookup(\n                        e->got, GSIZE_TO_POINTER(got_addr));\n\n                if (got_info) {\n                    func_info = got_info;\n                    z_info(\"function PLT: %s @ %#lx\", func_info->name,\n                           plt_addr + off);\n                }\n            }\n\n            g_hash_table_insert(e->plt, GSIZE_TO_POINTER(plt_addr + off),\n                                (gpointer)func_info);\n            off += plt_entsize;\n        }\n\n        z_free(ptr);\n    }\n\n    /*\n     * step (4). free allocated memory\n     */\n    z_free((void *)dynstr);\n    z_free((void *)rela_plt);\n    z_free((void *)rela_dyn);\n    z_free((void *)dynsym);\n\n    /*\n     * step (5). change the value of DT_NULL to indicate this program is patched\n     * by StochFuzz\n     */\n    Elf64_Dyn *dyn_ = __elf_find_dyn_by_tag(e, DT_NULL);\n    if (!dyn_) {\n        EXITME(\"DT_NULL not found\");\n    }\n    dyn_->d_un.d_val = MAGIC_NUMBER;\n}\n\n#undef __NUMBER_OF_GOTS\n#undef __NUMBER_OF_PLTS\n\nZ_PRIVATE void __elf_parse_shdr(ELF *e) {\n    Elf64_Ehdr *ehdr = z_elf_get_ehdr(e);\n    size_t size = z_mem_file_ftell(e->stream);\n\n    z_elf_set_shdr_shstrtab(e, NULL);\n    z_elf_set_shdr_text(e, NULL);\n    z_elf_set_shdr_init(e, NULL);\n    z_elf_set_shdr_fini(e, NULL);\n    z_elf_set_shdr_init_array(e, NULL);\n    z_elf_set_shdr_fini_array(e, NULL);\n    z_elf_set_shdr_plt(e, NULL);\n    z_elf_set_shdr_plt_got(e, NULL);\n    z_elf_set_shdr_plt_sec(e, NULL);\n\n    Elf64_Shdr *shdrs = (Elf64_Shdr *)((uint8_t *)ehdr + ehdr->e_shoff);\n\n    // Get string table first\n    uint16_t shstrndx = ehdr->e_shstrndx;\n    z_elf_set_shdr_shstrtab(e, shdrs + shstrndx);\n    Elf64_Shdr *shdr_shstrtab = z_elf_get_shdr_shstrtab(e);\n\n    assert(shdr_shstrtab != NULL);\n    assert(shdr_shstrtab->sh_type == SHT_STRTAB);\n\n    if (shdr_shstrtab->sh_offset >= size ||\n        shdr_shstrtab->sh_offset + shdr_shstrtab->sh_size > size) {\n        EXITME(\"string table offset is too large\");\n    }\n    const char *shstrtab =\n        __elf_stream_off2ptr(e->stream, shdr_shstrtab->sh_offset);\n    size_t shstrtab_sz = shdr_shstrtab->sh_size;\n\n#ifdef DEBUG\n    if (true) {\n        size_t name_off = shdr_shstrtab->sh_name;\n        assert(name_off < shstrtab_sz);\n        const char *shstrtab_name = shstrtab + name_off;\n        assert(!z_strcmp(shstrtab_name, \".shstrtab\"));\n    }\n#endif\n\n    // Get other section header\n    for (unsigned i = 0; i < ehdr->e_shnum; i++) {\n        Elf64_Shdr *shdr = shdrs + i;\n        size_t name_off = shdr->sh_name;\n        if (name_off >= shstrtab_sz)\n            continue;\n        const char *shdr_name = shstrtab + name_off;\n\n        if (!z_strcmp(shdr_name, \".text\")) {\n            if ((int64_t)shdr->sh_addr < 0 ||\n                (int64_t)(shdr->sh_addr + shdr->sh_size) < 0) {\n                EXITME(\"some addresses in .text section are negative\");\n            }\n            z_elf_set_shdr_text(e, shdr);\n            LOOKUP_TABLE_INIT_CELL_NUM(shdr->sh_size);\n        } else if (!z_strcmp(shdr_name, \".init\")) {\n            z_elf_set_shdr_init(e, shdr);\n        } else if (!z_strcmp(shdr_name, \".fini\")) {\n            z_elf_set_shdr_fini(e, shdr);\n        } else if (!z_strcmp(shdr_name, \".init_array\")) {\n            z_elf_set_shdr_init_array(e, shdr);\n        } else if (!z_strcmp(shdr_name, \".fini_array\")) {\n            z_elf_set_shdr_fini_array(e, shdr);\n        } else if (!z_strcmp(shdr_name, \".plt\")) {\n            z_elf_set_shdr_plt(e, shdr);\n        } else if (!z_strcmp(shdr_name, \".plt.got\")) {\n            z_elf_set_shdr_plt_got(e, shdr);\n        } else if (!z_strcmp(shdr_name, \".plt.sec\")) {\n            z_elf_set_shdr_plt_sec(e, shdr);\n        }\n    }\n\n    if (!z_elf_get_shdr_text(e)) {\n        // TODO: .text is not always necessary.\n        EXITME(\"cannot find .text section\");\n    }\n    z_info(\"find .text section @ %#lx\", z_elf_get_shdr_text(e)->sh_addr);\n\n    // in some cases, init_/fini_array does not exist\n    // assert(z_elf_get_shdr_init(e) != NULL);\n    // assert(z_elf_get_shdr_fini(e) != NULL);\n    // assert(z_elf_get_shdr_init_array(e) != NULL);\n    // assert(z_elf_get_shdr_fini_array(e) != NULL);\n\n    // static-linked binary may not have PLT\n    // assert(z_elf_get_shdr_plt(e) != NULL);\n    // assert(z_elf_get_shdr_plt_got(e) != NULL);\n\n    if (z_elf_get_shdr_init(e)) {\n        z_info(\"find .init section @ %#lx\", z_elf_get_shdr_init(e)->sh_addr);\n    }\n    if (z_elf_get_shdr_fini(e)) {\n        z_info(\"find .fini section @ %#lx\", z_elf_get_shdr_fini(e)->sh_addr);\n    }\n\n    if (z_elf_get_shdr_init_array(e)) {\n        z_info(\"find .init_array section @ %#lx\",\n               z_elf_get_shdr_init_array(e)->sh_addr);\n    }\n    if (z_elf_get_shdr_fini_array(e)) {\n        z_info(\"find .fini_array section @ %#lx\",\n               z_elf_get_shdr_fini_array(e)->sh_addr);\n    }\n\n    if (z_elf_get_shdr_plt(e)) {\n        z_info(\"find .plt section @ %#lx\", z_elf_get_shdr_plt(e)->sh_addr);\n    } else {\n        z_info(\".plt section not found\");\n    }\n\n    if (z_elf_get_shdr_plt_got(e)) {\n        z_info(\"find .plt.got section @ %#lx\",\n               z_elf_get_shdr_plt_got(e)->sh_addr);\n    } else {\n        z_info(\".plt.got section not found\");\n    }\n\n    if (z_elf_get_shdr_plt_sec(e)) {\n        z_info(\"find .plt.sec section @ %#lx\",\n               z_elf_get_shdr_plt_sec(e)->sh_addr);\n    } else {\n        z_info(\".plt.sec section not found\");\n    }\n}\n\nZ_PRIVATE void __elf_parse_phdr(ELF *e) {\n    uint8_t *base = z_mem_file_get_raw_buf(e->stream);\n    size_t size = z_mem_file_ftell(e->stream);\n\n    Elf64_Ehdr *ehdr = (Elf64_Ehdr *)base;\n\n    // Entry point\n    e->ori_entry = ehdr->e_entry;\n    z_info(\"find entrypoint: %#lx\", e->ori_entry);\n\n    // Whether the ELF is compiled as PIE\n    e->is_pie = ehdr->e_type == ET_EXEC ? false : true;\n    if (e->is_pie) {\n        z_info(\"try to handle PIE executable\");\n    } else {\n        z_info(\"try to handle non-PIE executable\");\n    }\n\n    z_elf_set_ehdr(e, ehdr);\n    z_elf_set_phdr_note(e, NULL);\n    z_elf_set_phdr_dynamic(e, NULL);\n\n    // Locate phdr_note and phdr_dynamic\n    Elf64_Phdr *phdrs = (Elf64_Phdr *)(base + ehdr->e_phoff);\n    for (unsigned i = 0; i < ehdr->e_phnum; i++) {\n        Elf64_Phdr *phdr = phdrs + i;\n        if (phdr->p_type == PT_DYNAMIC)\n            z_elf_set_phdr_dynamic(e, phdr);\n        if (phdr->p_type == PT_NOTE)\n            z_elf_set_phdr_note(e, phdr);\n    }\n\n    Elf64_Phdr *phdr_note = z_elf_get_phdr_note(e);\n    if (phdr_note == NULL) {\n        // TODO: currently we use a very naive but effective method to inject a\n        // new segment, by modifying the PT_NOTE. However, it does not always\n        // work. A better but more complex solution is to move the segment table\n        // to a new place which makes it easior to add segments.\n        EXITME(\"failed to parse ELF file [missing PT_NOTE segment]\");\n    }\n\n    Elf64_Phdr *phdr_dynamic = z_elf_get_phdr_dynamic(e);\n    if (phdr_dynamic != NULL &&\n        phdr_dynamic->p_offset + phdr_dynamic->p_memsz > size) {\n        EXITME(\"failed to parse ELF file [invalid dynamic section]\");\n    }\n\n    z_trace(\"successfully parse ELF header\");\n}\n\nZ_PRIVATE void __elf_validate_header(_MEM_FILE *stream) {\n    size_t size = z_mem_file_ftell(stream);\n    if (size < sizeof(Elf64_Ehdr)) {\n        EXITME(\"failed to parse ELF EHDR [file is too small]\");\n    }\n\n    Elf64_Ehdr *ehdr = (Elf64_Ehdr *)z_mem_file_get_raw_buf(stream);\n\n    if (ehdr->e_ident[EI_MAG0] != ELFMAG0 ||\n        ehdr->e_ident[EI_MAG1] != ELFMAG1 ||\n        ehdr->e_ident[EI_MAG2] != ELFMAG2 ||\n        ehdr->e_ident[EI_MAG3] != ELFMAG3) {\n        EXITME(\"failed to parse ELF EHDR [invalid magic number (%c%c%c%c)]\",\n               ehdr->e_ident[EI_MAG0], ehdr->e_ident[EI_MAG1],\n               ehdr->e_ident[EI_MAG2], ehdr->e_ident[EI_MAG3]);\n    }\n\n    if (ehdr->e_ident[EI_CLASS] != ELFCLASS64) {\n        EXITME(\"failed to parse ELF EHDR [file is not 64bit]\");\n    }\n\n    if (ehdr->e_ident[EI_DATA] != ELFDATA2LSB) {\n        EXITME(\"failed to parse ELF EHDR [file is not little endian]\");\n    }\n\n    if (ehdr->e_ident[EI_VERSION] != EV_CURRENT) {\n        EXITME(\"failed to parse ELF EHDR [invalid version]\");\n    }\n\n    if (ehdr->e_machine != EM_X86_64) {\n        EXITME(\"failed to parse ELF EHDR [file is not x86_64]\");\n    }\n\n    if (ehdr->e_phoff < sizeof(Elf64_Ehdr)) {\n        EXITME(\"failed to parse ELF EHDR [invalid program header offset (%u)]\",\n               ehdr->e_phoff);\n    }\n\n    if (ehdr->e_phnum > PN_XNUM) {\n        EXITME(\"failed to parse ELF EHDR [too many program headers (%d)]\",\n               ehdr->e_phnum);\n    }\n\n    if (ehdr->e_phoff + ehdr->e_phnum * sizeof(Elf64_Phdr) > size) {\n        EXITME(\"failed to parse ELF EHDR [invalid program headers]\");\n    }\n\n    if (ehdr->e_shoff < sizeof(Elf64_Ehdr)) {\n        EXITME(\"failed to parse ELF SHDR [invalid section header offset (%u)]\",\n               ehdr->e_shoff);\n    }\n\n    if (ehdr->e_shoff + ehdr->e_shnum * sizeof(Elf64_Shdr) > size) {\n        EXITME(\"failed to parse ELF EHDR [invalid section headers]\");\n    }\n\n    if (ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN) {\n        EXITME(\"failed to parse ELF EHDR [file is not executable]\");\n    }\n}\n\nZ_PRIVATE void __elf_set_virtual_mapping(ELF *e, const char *filename) {\n    // Get .text information\n    Elf64_Shdr *text = z_elf_get_shdr_text(e);\n    addr_t text_addr = text->sh_addr;\n    size_t text_size = text->sh_size;\n\n    size_t size = z_mem_file_ftell(e->stream);\n\n    e->vmapping = z_splay_create(NULL);  // Do not support merging\n    e->mmapped_pages = z_splay_create(&z_direct_merge);\n    e->max_addr = 0;\n\n    // Get segments table\n    Elf64_Ehdr *ehdr = z_elf_get_ehdr(e);\n    Elf64_Phdr *phdrs =\n        (Elf64_Phdr *)__elf_stream_off2ptr(e->stream, ehdr->e_phoff);\n\n    FChunk *fc = NULL;\n    Snode *node = NULL;\n    for (unsigned i = 0; i < ehdr->e_phnum; i++) {\n        Elf64_Phdr *phdr = phdrs + i;\n\n        // We only consider PT_LOAD segment\n        if (phdr->p_type != PT_LOAD)\n            continue;\n\n        // Get segment information\n        //      1. vaddr\n        addr_t vaddr = (addr_t)phdr->p_vaddr;\n        //      2. offset\n        size_t offset = (size_t)phdr->p_offset;\n        //      3. filesz\n        size_t filesz = (size_t)phdr->p_filesz;\n        //      4. memsz\n        size_t memsz = (size_t)phdr->p_memsz;\n        assert(memsz >= filesz);\n        if (offset + filesz > size) {\n            EXITME(\"invalid segment [%ld, %ld]: larger than ELF size(%ld)\",\n                   offset, offset + filesz - 1, size);\n        }\n\n        // Update max virtual address\n        if (e->max_addr < vaddr + memsz) {\n            e->max_addr = vaddr + memsz;\n        }\n\n        if (text_addr >= vaddr && text_addr < vaddr + memsz) {\n            if (!(phdr->p_flags & PF_X)) {\n                EXITME(\".text section is not executable\");\n            }\n\n            // XXX: note that the shared .text section will be mapped in\n            // page-level\n\n            // step (0). make sure all .text are contained by file\n            if (text_addr + text_size > vaddr + filesz) {\n                EXITME(\"some data in .text section is not contained by file\");\n            }\n\n            // step (1). first check whether we need to map the head part\n            addr_t aligned_addr = BITS_ALIGN_FLOOR(text_addr, PAGE_SIZE_POW2);\n            if (vaddr < aligned_addr) {\n                assert(aligned_addr - vaddr <= filesz);\n                fc = z_fchunk_create(e->stream, offset, aligned_addr - vaddr,\n                                     false);\n                node = z_snode_create(vaddr, aligned_addr - vaddr, (void *)fc,\n                                      (void (*)(void *))(&z_fchunk_destroy));\n                if (!z_splay_insert(e->vmapping, node)) {\n                    EXITME(\"overlapped virtual addresses\");\n                }\n            }\n\n            // step (2). then check whether we need to map the tail part\n            aligned_addr =\n                BITS_ALIGN_CELL(text_addr + text_size, PAGE_SIZE_POW2);\n\n            // update max_addr if needed\n            if (e->max_addr < aligned_addr) {\n                e->max_addr = aligned_addr;\n            }\n\n            if (aligned_addr < vaddr + memsz) {\n                assert(aligned_addr > vaddr);\n\n                // check which kind of node we need to insert\n                if (aligned_addr - vaddr >= filesz) {\n                    // it means the tail part is purely alloced\n                    node = z_snode_create(\n                        aligned_addr, vaddr + memsz - aligned_addr, NULL, NULL);\n                } else {\n                    // it means the tail part contains some data bytes\n                    fc = z_fchunk_create(e->stream,\n                                         offset + aligned_addr - vaddr,\n                                         vaddr + filesz - aligned_addr, false);\n                    node = z_snode_create(\n                        aligned_addr, vaddr + memsz - aligned_addr, (void *)fc,\n                        (void (*)(void *))(&z_fchunk_destroy));\n                }\n\n                if (!z_splay_insert(e->vmapping, node)) {\n                    EXITME(\"overlapped virtual addresses\");\n                }\n            }\n\n            // step (3). setup shared .text section\n            __elf_setup_shared_text(e, filename);\n        } else {\n            fc = z_fchunk_create(e->stream, offset, filesz, false);\n            node = z_snode_create(vaddr, memsz, (void *)fc,\n                                  (void (*)(void *))(&z_fchunk_destroy));\n            if (!z_splay_insert(e->vmapping, node)) {\n                EXITME(\"overlapeed virtual addresses\");\n            }\n        }\n\n        // For non-exec segment, we need to insert virtual uTP.\n        // XXX: I totally forget what the following code does...\n        // XXX: the segment containing .text does not go into this branch.\n        if (!(phdr->p_flags & PF_X)) {\n            addr_t gap_1_addr = BITS_ALIGN_FLOOR(vaddr, PAGE_SIZE_POW2);\n            size_t gap_1_size = vaddr - gap_1_addr;\n            if (gap_1_size > 0) {\n                node = z_snode_create(gap_1_addr, gap_1_size, NULL, NULL);\n                if (!z_splay_insert(e->vmapping, node)) {\n                    EXITME(\"overlapped virtual uTPs\");\n                }\n            }\n\n            addr_t gap_2_addr = vaddr + memsz;\n            size_t gap_2_size =\n                PAGE_SIZE - (gap_2_addr & ((1 << PAGE_SIZE_POW2) - 1));\n            if (gap_2_size > 0) {\n                node = z_snode_create(gap_2_addr, gap_2_size, NULL, NULL);\n                if (!z_splay_insert(e->vmapping, node)) {\n                    EXITME(\"overlapped virtual uTPs\");\n                }\n            }\n        }\n\n        // Update mmapped pages\n        // XXX: the .text insertion does not impact the mapped pages\n        assert(memsz != 0);\n        addr_t mmap_addr = BITS_ALIGN_FLOOR(vaddr, PAGE_SIZE_POW2);\n        size_t mmap_size = vaddr + memsz - mmap_addr;\n        mmap_size = BITS_ALIGN_CELL(mmap_size, PAGE_SIZE_POW2);\n        node = z_snode_create(mmap_addr, mmap_size, NULL, NULL);\n        if (!z_splay_insert(e->mmapped_pages, node)) {\n            EXITME(\"overlapped mapped addresses\");\n        }\n\n        z_trace(\"find segment [%#lx, %#lx] @ %#lx\", vaddr, vaddr + filesz - 1,\n                offset);\n    }\n\n    // XXX: note that max_addr is only used to find the max address of those\n    // segments in the orignal ELF, which excludes those pages mapped by us\n    if (!e->max_addr) {\n        EXITME(\"no loaded segment found\");\n    }\n    z_trace(\"max address for original ELF: %#lx\", e->max_addr - 1);\n\n    // Add constant address into vmmaping\n    if (!e->is_pie) {\n        // For PIE binary, it is almost impossible to touch the constant\n        // address, so we ignore them\n        if (!z_splay_insert(\n                e->vmapping,\n                z_snode_create(RW_PAGE_ADDR, RW_PAGE_USED_SIZE, NULL, NULL))) {\n            EXITME(\"constant address is occupied\");\n        }\n        if (!z_splay_insert(\n                e->mmapped_pages,\n                z_snode_create(RW_PAGE_ADDR, RW_PAGE_SIZE, NULL, NULL))) {\n            EXITME(\"constant address is occupied\");\n        }\n        if (!z_splay_insert(\n                e->vmapping,\n                z_snode_create(AFL_MAP_ADDR, AFL_MAP_SIZE, NULL, NULL))) {\n            EXITME(\"constant address is occupied\");\n        }\n        if (!z_splay_insert(\n                e->mmapped_pages,\n                z_snode_create(AFL_MAP_ADDR, AFL_MAP_SIZE, NULL, NULL))) {\n            EXITME(\"constant address is occupied\");\n        }\n        if (!z_splay_insert(\n                e->vmapping,\n                z_snode_create(CRS_MAP_ADDR, CRS_MAP_SIZE, NULL, NULL))) {\n            EXITME(\"constant address is occupied\");\n        }\n        if (!z_splay_insert(\n                e->mmapped_pages,\n                z_snode_create(CRS_MAP_ADDR, CRS_MAP_SIZE, NULL, NULL))) {\n            EXITME(\"constant address is occupied\");\n        }\n    }\n\n    // We additionally need to add those mapped pages whose address is based on\n    // ASLR/PIE\n    {\n        if (!z_splay_insert(e->vmapping,\n                            z_snode_create(SIGNAL_STACK_ADDR, SIGNAL_STACK_SIZE,\n                                           NULL, NULL))) {\n            EXITME(\"signal stack is occupied\");\n        }\n        if (!z_splay_insert(e->mmapped_pages,\n                            z_snode_create(SIGNAL_STACK_ADDR, SIGNAL_STACK_SIZE,\n                                           NULL, NULL))) {\n            EXITME(\"signal stack is occupied\");\n        }\n    }\n}\n\nZ_PRIVATE void __elf_parse_main(ELF *e) {\n    assert(e != NULL);\n\n    if (!e->detect_main) {\n        z_info(\n            \"we skip the detection of main function because we are going to \"\n            \"instrument the fork server before the entrypoint\");\n        return;\n    }\n\n    // Try to identify the address of main function.\n\n    // XXX: like AFL, we try to instrument the binary before main(). But we may\n    // not always successfully locate the main() function.\n    //  * https://github.com/google/AFL/tree/master/llvm_mode\n    //  * https://github.com/talos-vulndev/afl-dyninst\n\n    Rptr *cur_ptr = z_elf_vaddr2ptr(e, e->ori_entry);\n    addr_t cur_addr = e->ori_entry;\n\n    while (true) {\n        if (z_rptr_is_null(cur_ptr)) {\n            EXITME(\"invalid entrypoint or run out of segment\");\n        }\n\n        CS_DISASM(cur_ptr, cur_addr, 1);\n\n        // If searching all instructions in _start\n        if ((cs_count == 0) || (cs_inst[0].id == X86_INS_CALL)) {\n            EXITME(\n                \"no main function found, please use -e option to install the \"\n                \"fork server at entrypoint\");\n        }\n        z_trace(\"finding main: %#lx:\\t%s %s\", cs_inst[0].address,\n                cs_inst[0].mnemonic, cs_inst[0].op_str);\n\n        // Check load_main\n        if (e->is_pie) {\n            // For PIE binary, we check: lea rdi, [rip + xxx]\n            if (cs_inst[0].id != X86_INS_LEA)\n                goto NEXT;\n            cs_x86_op *ops = cs_inst[0].detail->x86.operands;\n            if (ops[0].type != X86_OP_REG)\n                goto NEXT;\n            if ((ops[1].type != X86_OP_MEM) ||\n                (ops[1].mem.base != X86_REG_RIP) ||\n                (ops[1].mem.index != X86_REG_INVALID))\n                goto NEXT;\n            switch (ops[0].reg) {\n                case X86_REG_RCX:\n                    e->init = ops[1].mem.disp + cs_inst[0].size + cur_addr;\n                    e->load_init = cur_addr;\n                    goto NEXT;\n                case X86_REG_R8:\n                    e->fini = ops[1].mem.disp + cs_inst[0].size + cur_addr;\n                    e->load_fini = cur_addr;\n                    goto NEXT;\n                case X86_REG_RDI:\n                    e->main = ops[1].mem.disp + cs_inst[0].size + cur_addr;\n                    e->load_main = cur_addr;\n                    goto LOOP_DONE;\n                default:\n                    goto NEXT;\n            }\n        } else {\n            // For non-PIE binary, we check: mov rdi, xxx\n            if (cs_inst[0].id != X86_INS_MOV)\n                goto NEXT;\n            cs_x86_op *ops = cs_inst[0].detail->x86.operands;\n            if (ops[0].type != X86_OP_REG)\n                goto NEXT;\n            if (ops[1].type != X86_OP_IMM)\n                goto NEXT;\n            switch (ops[0].reg) {\n                case X86_REG_R8:\n                    e->fini = ops[1].imm;\n                    e->load_fini = cur_addr;\n                    goto NEXT;\n                case X86_REG_RCX:\n                    e->init = ops[1].imm;\n                    e->load_init = cur_addr;\n                    goto NEXT;\n                case X86_REG_RDI:\n                    e->main = ops[1].imm;\n                    e->load_main = cur_addr;\n                    goto LOOP_DONE;\n                default:\n                    goto NEXT;\n            }\n        }\n\n    NEXT:\n        cur_addr += cs_inst[0].size;\n        z_rptr_inc(cur_ptr, uint8_t, cs_inst[0].size);\n    }\nLOOP_DONE:\n\n    z_rptr_destroy(cur_ptr);\n    z_info(\"find main function: %#lx\", e->main);\n    z_info(\"find init function: %#lx\", e->init);\n    z_info(\"find fini function: %#lx\", e->fini);\n}\n\nZ_API ELF *z_elf_open(const char *ori_filename, bool detect_main) {\n    ELF *e = STRUCT_ALLOC(ELF);\n\n    e->detect_main = detect_main;\n\n    memset(e->tmpnam, 0, TMPNAME_LEN);\n    z_snprintf(e->tmpnam, TMPNAME_LEN, TMPNAME_FMT, z_rand());\n    z_trace(\"use temp file: %s\", e->tmpnam);\n\n    _MEM_FILE *stream = __elf_open_file(e, ori_filename);\n\n    // Step (0). Validate header\n    __elf_validate_header(stream);\n\n    // Step (1). Alloc ELF struct\n    e->stream = stream;\n\n    // Step (2). Parse program header\n    __elf_parse_phdr(e);\n\n    // Step (3). Parse section header\n    __elf_parse_shdr(e);\n\n    // Step (4). Do virtual mapping\n    __elf_set_virtual_mapping(e, ori_filename);\n\n    // Step (5). Extend loader/Trampolines zones onto file\n    __elf_extend_zones(e);\n\n    // Step (6). Setup lookup table\n    __elf_setup_lookup_table(e, ori_filename);\n\n    // Step (7). Setup trampolines (shadow code)\n    __elf_setup_trampolines(e, ori_filename);\n\n    // Step (8). Setup pipe file\n    __elf_setup_pipe(e, ori_filename);\n\n    // Step (9). Setup retaddr mapping\n    __elf_setup_retaddr_mapping(e, ori_filename);\n\n    // Step (10). Detect and parse main function\n    __elf_parse_main(e);\n\n    // Step (11). Rewrite PT_NOTE meta info\n    __elf_rewrite_pt_note(e);\n\n    // Step (12). Set RELRO for elf (REMOVE to allow gdb load library symbols)\n    // XXX: AFL already set LD_BIND_NOW to stops the linker from doing extra\n    // work post-fork()\n    // __elf_set_relro(e);\n\n    // step (13). Get relocation information\n    __elf_parse_relocation(e);\n\n    // step (14). link patched file\n    char *patched_filename = z_strcat(ori_filename, PATCHED_FILE_SUFFIX);\n    z_elf_save(e, patched_filename);\n    z_free(patched_filename);\n\n    // step (15). set state\n    e->state = ELFSTATE_CONNECTED;\n\n    return e;\n}\n\nZ_API Rptr *z_elf_vaddr2ptr(ELF *e, addr_t vaddr) {\n    assert(e != NULL);\n\n    // Get corresponding segment\n    Snode *segment = __elf_find_segment_by_vaddr(e, vaddr);\n    if (segment == NULL) {\n        return NULL;\n    }\n\n    // Create Rptr\n    FChunk *fc = (FChunk *)z_snode_get_data(segment);\n    if (z_strcmp(STRUCT_TYPE(fc), \"FChunk\")) {\n        z_trace(\"get address into dynamically allocated space\");\n        return NULL;\n    }\n    size_t off1 = vaddr - z_snode_get_lower_bound(segment);\n    size_t off2 = z_fchunk_get_offset(fc);\n    if (off1 >= z_fchunk_get_size(fc)) {\n        z_trace(\"trying to read on zero-padding region\");\n        return NULL;\n    }\n\n    size_t size = z_snode_get_upper_bound(segment) - vaddr + 1;\n\n    _MEM_FILE *stream = z_fchunk_get_stream(fc);\n\n    return z_rptr_create(__elf_stream_off2ptr(stream, off1 + off2), size);\n}\n\nZ_API void z_elf_destroy(ELF *e) {\n    z_splay_destroy(e->vmapping);\n    z_splay_destroy(e->mmapped_pages);\n\n    g_hash_table_destroy(e->got);\n    g_hash_table_destroy(e->plt);\n\n    z_free(e->retaddr_mapping_name);\n    z_free(e->lookup_tabname);\n    z_free(e->trampolines_name);\n    z_free(e->shared_text_name);\n    z_free(e->pipe_filename);\n\n    z_mem_file_fclose(e->retaddr_mapping_stream);\n    z_mem_file_fclose(e->lookup_table_stream);\n    z_mem_file_fclose(e->trampolines_stream);\n    z_mem_file_fclose(e->shared_text_stream);\n    z_mem_file_fclose(e->stream);\n\n    if (remove(e->tmpnam)) {\n        EXITME(\"failed on remove %s: %s\", e->tmpnam, strerror(errno));\n    }\n\n    z_free(e);\n}\n\nZ_API void z_elf_fsync(ELF *e) {\n    assert(e != NULL);\n\n    z_mem_file_fsync(e->lookup_table_stream);\n    z_mem_file_fsync(e->trampolines_stream);\n    z_mem_file_fsync(e->shared_text_stream);\n    z_mem_file_fsync(e->stream);\n}\n\nZ_API void z_elf_save(ELF *e, const char *pathname) {\n    // curently no need to update PT_NOTE, because trampolines are putting in\n    // an individual file.\n\n    // fsync\n    z_elf_fsync(e);\n\n    // check whether pathname exists. if so, remove it.\n    if (!z_access(pathname, F_OK)) {\n        if (remove(pathname)) {\n            EXITME(\"failed on remove: %s (error: %s)\", pathname,\n                   strerror(errno));\n        }\n    }\n\n    // create a symbolic link to e->tmpnam\n    z_info(\"save patched file into %s\", pathname);\n    if (link(e->tmpnam, pathname)) {\n        EXITME(\"failed on link: %s\", strerror(errno));\n    }\n}\n\nZ_API void z_elf_create_snapshot(ELF *e, const char *pathname) {\n    z_elf_fsync(e);\n    z_mem_file_save_as(e->stream, pathname);\n}\n\nZ_API size_t z_elf_read_all(ELF *e, addr_t addr, size_t n, void *buf) {\n    assert(e != NULL);\n\n    size_t cur_n = n;\n\n    while (cur_n > 0) {\n        size_t k = z_elf_read(e, addr, cur_n, buf);\n\n        if (!k) {\n            return n - cur_n;\n        }\n\n        cur_n -= k;\n        buf += k;\n        addr += k;\n    }\n\n    return n;\n}\n\nZ_API size_t z_elf_read(ELF *e, addr_t addr, size_t n, void *buf) {\n    assert(e != NULL);\n\n    Rptr *rptr = z_elf_vaddr2ptr(e, addr);\n    if (z_rptr_is_null(rptr)) {\n        z_error(\"invalid address: %#lx\", addr);\n        return 0;\n    }\n\n    size_t n_ = n < z_rptr_get_size(rptr) ? n : z_rptr_get_size(rptr);\n\n    z_rptr_memcpy(buf, rptr, n_);\n    z_rptr_destroy(rptr);\n    return n_;\n}\n\nZ_API size_t z_elf_write(ELF *e, addr_t addr, size_t n, const void *buf) {\n    assert(e != NULL);\n\n    Snode *segment = __elf_find_segment_by_vaddr(e, addr);\n    if (!segment) {\n        EXITME(\"invalid address: %#lx\", addr);\n    }\n    FChunk *fc = (FChunk *)z_snode_get_data(segment);\n\n    if (z_fchunk_get_extendable(fc)) {\n        // write on an extendable space\n        addr_t segment_base_addr = z_snode_get_lower_bound(segment);\n        _MEM_FILE *underlying_stream = z_fchunk_get_stream(fc);\n\n        // XXX: similar to the false branch, the overhead of\n        // __elf_stream_vaddr2off is small because the target snode is already\n        // at the root of Splay\n        size_t tp_off = __elf_stream_vaddr2off(e, segment_base_addr);\n        assert(tp_off == 0);\n\n        size_t write_off = addr - segment_base_addr + tp_off;\n        if (z_mem_file_get_size(underlying_stream) < write_off) {\n            EXITME(\"write on too bigger address: %#lx\", addr);\n        }\n\n        // get old size\n        size_t old_size = z_mem_file_get_size(underlying_stream) - tp_off;\n\n        // We cannot directly use __elf_stream_vaddr2off here, as addr may not\n        // in current virtual memroy.\n        z_mem_file_pwrite(underlying_stream, buf, n, write_off);\n\n        if (write_off + n == z_mem_file_get_size(underlying_stream)) {\n            // XXX: if the underlying stream is fully written, we need to extend\n            // it. For example, if the original address range is [0x1000,\n            // 0x1100) and we wrote all the 0x100 bytes, next time we want to\n            // write on address 0x1100. It sould be valid because the underlying\n            // stream is extendable.\n            z_mem_file_pwrite(underlying_stream, \"\", 1, write_off + n);\n            assert(write_off + n < z_mem_file_get_size(underlying_stream));\n        }\n\n        // calculate new node\n        size_t new_size = z_mem_file_get_size(underlying_stream) - tp_off;\n\n        // update if new_size is not equal to old_size\n        if (new_size != old_size) {\n            assert(new_size > old_size);\n\n            // delete previous node\n            Snode *node = z_splay_delete(e->vmapping, segment_base_addr);\n            assert(node != NULL);\n\n            addr_t vaddr = z_snode_get_lower_bound(node);\n            z_snode_set_len(node, new_size);\n            z_fchunk_set_size((FChunk *)z_snode_get_data(node), new_size);\n\n            // update virtual mapping\n            if (!z_splay_insert(e->vmapping, node)) {\n                EXITME(\"extend writing [new_size: %#lx, old_size: %#lx]\",\n                       new_size, old_size);\n            }\n\n            // update mapped pages\n            node = z_snode_create(vaddr + old_size, new_size - old_size, NULL,\n                                  NULL);\n            if (!z_splay_insert(e->mmapped_pages, node)) {\n                EXITME(\"extend writing\");\n            }\n\n            // update state\n            z_elf_set_state(e, ELFSTATE_SHADOW_EXTENDED);\n        }\n    } else {\n        // other range\n\n        // XXX: the overhead of re-searching splay is small because the target\n        // snode is already at the root, so we re-invoke z_elf_vaddr2pter for\n        // the easy understanding of the code\n        Rptr *rptr = z_elf_vaddr2ptr(e, addr);\n        z_rptr_memcpy(rptr, buf, n);\n        z_rptr_destroy(rptr);\n    }\n\n    return n;\n}\n\nZ_API bool z_elf_check_region_free(ELF *e, Snode *region) {\n    assert(e != NULL && region != NULL);\n    return !z_splay_interval_overlap(e->vmapping, region);\n}\n\nZ_API bool z_elf_insert_utp(ELF *e, Snode *utp, addr_t *mmap_addr,\n                            size_t *mmap_size) {\n    assert(z_snode_get_data(utp) == NULL);\n    assert(z_snode_get_len(utp) <= PAGE_SIZE);\n\n    // insert utp first\n    if (!z_splay_insert(e->vmapping, utp))\n        return false;\n\n    // calculate mmap page\n    addr_t utp_mmap_lo =\n        BITS_ALIGN_FLOOR(z_snode_get_lower_bound(utp), PAGE_SIZE_POW2);\n    addr_t utp_mmap_up =\n        BITS_ALIGN_FLOOR(z_snode_get_upper_bound(utp), PAGE_SIZE_POW2);\n\n    // init values\n    *mmap_addr = INVALID_ADDR;\n    *mmap_size = 0;\n\n    // check\n    for (addr_t addr = utp_mmap_lo; addr <= utp_mmap_up; addr += PAGE_SIZE) {\n        Snode *node = z_snode_create(addr, PAGE_SIZE, NULL, NULL);\n        if (z_splay_insert(e->mmapped_pages, node)) {\n            *mmap_addr = (*mmap_addr < addr ? *mmap_addr : addr);\n            *mmap_size += PAGE_SIZE;\n        } else {\n            z_snode_destroy(node);\n        }\n    }\n\n    if (*mmap_addr == INVALID_ADDR)\n        *mmap_addr = 0;\n\n    return true;\n}\n\nZ_API const LFuncInfo *z_elf_get_plt_info(ELF *e, addr_t addr) {\n    return (const LFuncInfo *)g_hash_table_lookup(e->plt,\n                                                  GSIZE_TO_POINTER(addr));\n}\n\nZ_API const LFuncInfo *z_elf_get_got_info(ELF *e, addr_t addr) {\n    return (const LFuncInfo *)g_hash_table_lookup(e->got,\n                                                  GSIZE_TO_POINTER(addr));\n}\n\nZ_API bool z_elf_check_state(ELF *e, ELFState state) {\n    if (state & ELFSTATE_DISABLE) {\n        EXITME(\n            \"check state function does not support disabling any state (state: \"\n            \"%#x)\",\n            state);\n    }\n\n    return (e->state & state);\n}\n\nZ_API bool z_elf_is_statically_linked(ELF *e) {\n    // XXX: linux kernel uses .INTERP segment to determine whether a dynmaic\n    // linker is required, but here we use .DYNAMIC segment which is good enough\n    // (like what readelf does)\n    return !z_elf_get_phdr_dynamic(e);\n}\n"
  },
  {
    "path": "src/elf_.h",
    "content": "/*\n * elf_.h\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n#ifndef __ELF__H\n#define __ELF__H\n\n#include \"config.h\"\n#include \"interval_splay.h\"\n#include \"library_functions/library_functions.h\"\n#include \"mem_file.h\"\n#include \"restricted_ptr.h\"\n\n#include <elf.h>\n#include <gmodule.h>\n\n#define TMPNAME_FMT TEMPFILE_NAME_PREFIX \"%08x\"\n#define TMPNAME_LEN 0x20\n\ntypedef enum elf_state_t {\n    ELFSTATE_NONE = 0x0,             // none\n    ELFSTATE_CONNECTED = 0x1,        // disconnect ELF from underlying file\n    ELFSTATE_SHADOW_EXTENDED = 0x2,  // shadow file is extended\n    ELFSTATE_DISABLE = 0x100,        // flag for disable state\n    ELFSTATE_MASK = 0xffff,          // mask\n} ELFState;\n\n/*\n * ELF info for binary rewrite (Key Structure)\n */\nSTRUCT(ELF, {\n    char tmpnam[TMPNAME_LEN];\n    _MEM_FILE *stream;\n    bool detect_main;\n\n    /*\n     *         new                     original\n     *      entrypoint                   main\n     *          |     original          ^\n     *          |    entrypoint         |\n     *          |         ^             |\n     *          V         |             |\n     * |-----|.|-----------|-------------|--------------|.|--------|.|-------|\n     * | ELF |.| TP loader | fork server | random patch |.| BB Tab |.|  TPs  |\n     * |-----|.|-----------|-------------|--------------|.|--------|.|-------|\n     *         ^            ^\n     *         |            |\n     *      PT_NOTE      new main\n     *\n     * |  ELF  |  LOADER and FORK SEVER                   | BB Tab | |  TPs  |\n     *\n     *         ^                                          ^          ^\n     *    PAGE-ALIGNED                              PAGE-ALIGNED PAGE-ALIGNED\n     */\n\n    /*\n     * ELF Header\n     */\n    size_t ehdr_off;  // EHDR (Elf header)\n\n    /*\n     * Program Header\n     */\n    size_t phdr_note_off;     // PHDR PT_NOTE to be used for loader.\n    size_t phdr_dynamic_off;  // PHDR PT_DYNAMIC else nullptr.\n\n    /*\n     * Section Header\n     */\n    size_t shdr_shstrtab_off;    // SHDR SHT_STRTAB for strings.\n    size_t shdr_text_off;        // SHDR .text section.\n    size_t shdr_init_off;        // SHDR .init section.\n    size_t shdr_fini_off;        // SHDR .fini section.\n    size_t shdr_init_array_off;  // SHDR .init_array\n    size_t shdr_fini_array_off;  // SHDR .fini_array\n    size_t shdr_plt_off;         // SHDR .plt\n    size_t shdr_plt_got_off;     // SHDR .plt.got\n    size_t shdr_plt_sec_off;     // SHDR .plt.sec\n\n    /*\n     * Dynamic information\n     */\n    addr_t fini_array;    // .fini_array\n    size_t fini_arraysz;  // size of .fini_array\n    addr_t init_array;    // .init_array\n    size_t init_arraysz;  // size of .init_array\n\n    /*\n     * Virtual Memory\n     */\n    // vmapping is the actually mappings, while mapped_pages is the thing at\n    // paging level. For example, an actual mapping [0x1010, 0x1020] has a\n    // mapped page [0x1000, 0x2000). We use mapped_pages to support multiple\n    // uTPs which fall into the same page (e.g., [0x1010, 0x1020] and [0x1100,\n    // 0x1110]).\n    //\n    // XXX: when the underlying binary is PIE, vmapping does not containe those\n    // segments that are at fixed adddesses (RW_PAGE_ADDR, AFL_MAP_ADDR, and\n    // CRS_MAP_ADDR; details can be found in config.h). It is not a 100% safe\n    // solution, but the likelihood of segment conflicts seems tiny, since the\n    // size of these fixed-address segments is relatively small and the\n    // addresses of them are low.\n    Splay *vmapping;           // Virtual memory\n    Splay *mmapped_pages;      // Mmapped pages\n    addr_t max_addr;           // Max virtual address (XXX: excluding endpoint)\n    addr_t loader_addr;        // Base address of loader\n    addr_t trampolines_addr;   // Base address of trampolines(TP)\n    addr_t lookup_table_addr;  // Base address of lookup table\n    addr_t shared_text_addr;   // Base address of shared .text (page-aligned)\n    addr_t\n        retaddr_mapping_addr;  // Base address of retaddr mapping (page-aligned)\n\n    /*\n     * Lookup table\n     */\n    char *lookup_tabname;            // Name of mmapped lookup table\n    _MEM_FILE *lookup_table_stream;  //_MEM_FILE of lookup table\n\n    /*\n     * Trampolines\n     */\n    char *trampolines_name;         // Name of mmapped trampolines\n    _MEM_FILE *trampolines_stream;  // _MEM_FILE of trampolines\n\n    /*\n     * Shared .text section;\n     */\n    char *shared_text_name;         // Name of shared .text section\n    _MEM_FILE *shared_text_stream;  // _MEM_FILE of shared .text section\n\n    /*\n     * Pipeline\n     */\n    char *pipe_filename;  // Name of pipe communicated with daemon\n\n    /*\n     * Return address mapping\n     */\n    char *retaddr_mapping_name;  // Name of the mapping of return addreseses\n    _MEM_FILE *retaddr_mapping_stream;  // _MEM_FILE of retaddr mapping\n\n    /*\n     * ELF state\n     */\n    ELFState state;\n\n    /*\n     * Relocation information\n     */\n    GHashTable *got;  // GOT information\n    GHashTable *plt;  // PLT information\n\n    /*\n     * Other basic information\n     */\n    bool is_pie;       // Whether the binary is compiled as PIE\n    addr_t ori_entry;  // Address of original Entry Point\n    addr_t main;       // Address of main\n    addr_t init;       // Address of init\n    addr_t fini;       // Address of fini\n    addr_t load_main;  // Address of the instruction loading main address\n    addr_t load_init;  // Address of the instruction loading init address\n    addr_t load_fini;  // Address of the instruction loading fini address\n});\n\n/*\n * Setter and Getter\n */\nDECLARE_SETTER(ELF, elf, Elf64_Ehdr *, ehdr);\nDECLARE_SETTER(ELF, elf, Elf64_Phdr *, phdr_note);\nDECLARE_SETTER(ELF, elf, Elf64_Phdr *, phdr_dynamic);\nDECLARE_SETTER(ELF, elf, Elf64_Shdr *, shdr_shstrtab);\nDECLARE_SETTER(ELF, elf, Elf64_Shdr *, shdr_text);\nDECLARE_SETTER(ELF, elf, Elf64_Shdr *, shdr_init);\nDECLARE_SETTER(ELF, elf, Elf64_Shdr *, shdr_fini);\nDECLARE_SETTER(ELF, elf, Elf64_Shdr *, shdr_init_array);\nDECLARE_SETTER(ELF, elf, Elf64_Shdr *, shdr_fini_array);\nDECLARE_SETTER(ELF, elf, Elf64_Shdr *, shdr_plt);\nDECLARE_SETTER(ELF, elf, Elf64_Shdr *, shdr_plt_got);\nDECLARE_SETTER(ELF, elf, Elf64_Shdr *, shdr_plt_sec);\nDECLARE_SETTER(ELF, elf, ELFState, state);\n\nDECLARE_GETTER(ELF, elf, Elf64_Ehdr *, ehdr);\nDECLARE_GETTER(ELF, elf, Elf64_Phdr *, phdr_note);\nDECLARE_GETTER(ELF, elf, Elf64_Phdr *, phdr_dynamic);\nDECLARE_GETTER(ELF, elf, Elf64_Shdr *, shdr_shstrtab);\nDECLARE_GETTER(ELF, elf, Elf64_Shdr *, shdr_text);\nDECLARE_GETTER(ELF, elf, Elf64_Shdr *, shdr_init);\nDECLARE_GETTER(ELF, elf, Elf64_Shdr *, shdr_fini);\nDECLARE_GETTER(ELF, elf, Elf64_Shdr *, shdr_init_array);\nDECLARE_GETTER(ELF, elf, Elf64_Shdr *, shdr_fini_array);\nDECLARE_GETTER(ELF, elf, Elf64_Shdr *, shdr_plt);\nDECLARE_GETTER(ELF, elf, Elf64_Shdr *, shdr_plt_got);\nDECLARE_GETTER(ELF, elf, Elf64_Shdr *, shdr_plt_sec);\nDECLARE_GETTER(ELF, elf, addr_t, loader_addr);\nDECLARE_GETTER(ELF, elf, addr_t, trampolines_addr);\nDECLARE_GETTER(ELF, elf, addr_t, lookup_table_addr);\nDECLARE_GETTER(ELF, elf, addr_t, shared_text_addr);\nDECLARE_GETTER(ELF, elf, addr_t, retaddr_mapping_addr);\nDECLARE_GETTER(ELF, elf, bool, is_pie);\nDECLARE_GETTER(ELF, elf, addr_t, ori_entry);\nDECLARE_GETTER(ELF, elf, addr_t, main);\nDECLARE_GETTER(ELF, elf, addr_t, init);\nDECLARE_GETTER(ELF, elf, addr_t, fini);\nDECLARE_GETTER(ELF, elf, addr_t, load_main);\nDECLARE_GETTER(ELF, elf, addr_t, load_init);\nDECLARE_GETTER(ELF, elf, addr_t, load_fini);\nDECLARE_GETTER(ELF, elf, const char *, lookup_tabname);\nDECLARE_GETTER(ELF, elf, const char *, trampolines_name);\nDECLARE_GETTER(ELF, elf, const char *, shared_text_name);\nDECLARE_GETTER(ELF, elf, const char *, pipe_filename);\nDECLARE_GETTER(ELF, elf, const char *, retaddr_mapping_name);\nDECLARE_GETTER(ELF, elf, size_t, plt_n);\n\n/*\n * Open an ELF file.\n */\nZ_API ELF *z_elf_open(const char *ori_filename, bool detect_main);\n\n/*\n * Destructor of ELF\n */\nZ_API void z_elf_destroy(ELF *e);\n\n/*\n * Save ELF to pathname\n */\nZ_API void z_elf_save(ELF *e, const char *pathname);\n\n/*\n * Return a pointer pointed to given virtual address, NULL if the virtual\n * address is invalid.\n */\nZ_API Rptr *z_elf_vaddr2ptr(ELF *e, addr_t vaddr);\n\n/*\n * Read data from given virtual address.\n * z_elf_read only reads data from a stream, which means if the requested bytes\n * are cross-stream, z_elf_read only returns the first k bytes in the same\n * stream.\n */\nZ_API size_t z_elf_read(ELF *e, addr_t addr, size_t n, void *buf);\n\n/*\n * Forcely read data from given virtual address.\n * Different from z_elf_read, z_elf_read_all forcely read all requested bytes\n * even if they are cross-stream.\n */\nZ_API size_t z_elf_read_all(ELF *e, addr_t addr, size_t n, void *buf);\n\n/*\n * Write data to given virtual address.\n * z_elf_write only writes data on a stream, like z_elf_read.\n */\n// XXX: note that the z_elf_write only supports writing on data stored in file\n// but not those dynamically alloced segments.\nZ_API size_t z_elf_write(ELF *e, addr_t addr, size_t n, const void *buf);\n\n// TODO: add z_elf_write_all if necessart\n\n/*\n * Check whether the ELF is statically-linked\n */\nZ_API bool z_elf_is_statically_linked(ELF *e);\n\n/*\n * Get PLT information\n */\nZ_API const LFuncInfo *z_elf_get_plt_info(ELF *e, addr_t addr);\n\n/*\n * Get GOT information\n */\nZ_API const LFuncInfo *z_elf_get_got_info(ELF *e, addr_t addr);\n\n/*\n * Check where region is free.\n */\nZ_API bool z_elf_check_region_free(ELF *e, Snode *region);\n\n/*\n * Insert a utp into vmapping.\n */\nZ_API bool z_elf_insert_utp(ELF *e, Snode *utp, addr_t *mmap_addr,\n                            size_t *mmap_size);\n\n/*\n * Sync all mapping file\n */\nZ_API void z_elf_fsync(ELF *e);\n\n/*\n * Create a snapshot for current ELF.\n * Differnt from z_elf_save, this ELF's main body (except loookup tabel and\n * shadow) will remain unchanged even future patches are applied.\n */\nZ_API void z_elf_create_snapshot(ELF *e, const char *pathname);\n\n/*\n * Check ELF state\n */\nZ_API bool z_elf_check_state(ELF *e, ELFState state);\n\n#endif\n"
  },
  {
    "path": "src/fork_server.c",
    "content": "/*\n * the code inside asm(\".globl _entry\\n ...\")\n * Copyright (C) 2021 National University of Singapore\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to\n * deal in the Software without restriction, including without limitation the\n * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or\n * sell copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING\n * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS\n * IN THE SOFTWARE.\n *\n */\n// XXX: the code inside the asm(\".globl _entry\\n ...\") is modified based on\n// https://github.com/GJDuck/e9patch/blob/master/src/e9patch/e9loader.cpp\n\n/*\n * other parts of fork_server.c\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n/*\n * Workflow between different components:\n *\n *   +--------- pre-handshake (shm) -----------+\n *   |               +-- pre-handshake (shm) --+\n *   |               |                         |\n * +-+-+         +---+---+              +-----+-----+                +------+\n * |AFL|         |OURTOOL|              |fork server|                |client|\n * +-+-+         +---+---+              +-----+-----+                +------+\n *   |               |                         |\n *   |               |   [trigger execution]   |   [   new client  &  ]\n *   +--------------{|}----------------------->|   [handshake (socket)]\n *   |               |                         +------------------------>|\n *   |               |                         |                         |\n *   |               |                         |                         |\n *   |               |                         |     [status (wait4)]    x MIC\n *   |               |  [status (comm socket)] |<----------------------+-+\n *   |               |<------------------------+                       |\n *   |               |     [*CRPS* (shm)]      |                       |\n *   |               |<-----------------------{|}----------------------+\n *   |               |                         |\n *   |     validate  | [trigger (comm socket)] |\n *   |     crashsite ~ [ patch commands (shm)] |\n *   |     (if fake) +------------------------>|\n *   |               |                         ~ patch self and re-mmap\n *   |               |                         |   [   new client  &  ]\n *   |               |                         |   [handshake (socket)]\n *   |               |[clock ON (comm socket)] +------------------------>|\n *   |               |<------------------------+                         |\n *   |               |                         |                         |\n *   |               |                         |     [status (wait4)]    x MIC\n *   |               |[clock OFF (comm socket)]|<----------------------+-+\n *   |               |<------------------------+                       |\n *   |               |  [status (comm socket)] |                       |\n *   |               |<------------------------+                       |\n *   |               |     [*CRPS* (shm)]      |                       |\n *   |               |<-----------------------{|}----------------------+\n *   |               |                         |\n *   |     validate  | [trigger (comm socket)] |\n *   |     crashsite ~ [ patch commands (shm)] |\n *   |     (if real) +------------------------>|\n *   |               |                         |\n *   |               |    [status (socket)]    |\n *   |<-------------{|}------------------------+\n *   |               |                         |\n *   |               |                         |\n *   |               | [trigger new execution] |   [   new client  &  ]\n *   +--------------{|}----------------------->|   [handshake (socket)]\n *   |               |                         +------------------------>|\n *   |               |                         |                         |\n *   |               |                         |     [status (wait4)]    | exit\n *   |               |    [status (socket)]    |<------------------------+\n *   |<-------------{|}------------------------+\n *\n *\n *  *CRPS*: crash points\n *  *MIC* : maybe-intentional crash\n *\n */\n\n/*\n * Different situations:\n *\n * +------------------------+------------------+-------------------------------+\n * | Daemon mode / Run mode |   AFL attached   |           Action              |\n * +========================+==================+===============================+\n * |                        |        No        |        Perform dry run        |\n * |        Run mode        +------------------+-------------------------------+\n * |                        |        Yes       |           Invalid             |\n * +------------------------+------------------+-------------------------------+\n * |                        |        No        | Ignore AFL-related operations |\n * |       Daemon mode      +------------------+-------------------------------+\n * |                        |        Yes       |      Follow above workflow    |\n * +------------------------+------------------+-------------------------------+\n */\n\n#include \"fork_server.h\"\n\n#include <sched.h>\n#include <signal.h>\n#include <stdbool.h>\n#include <stdint.h>\n#include <sys/mman.h>\n#include <sys/socket.h>\n#include <sys/un.h>\n#include <unistd.h>\n\n#include \"asm_utils.c\"\n\n#ifdef DEBUG\nextern const char no_daemon_str[];\nextern const char getenv_err_str[];\nextern const char afl_shmat_err_str[];\nextern const char crs_shmat_err_str[];\nextern const char hello_err_str[];\nextern const char read_err_str[];\nextern const char fork_err_str[];\nextern const char wait4_err_str[];\nextern const char mumap_err_str[];\nextern const char mprotect_err_str[];\nextern const char pipe_err_str[];\nextern const char dup2_err_str[];\nextern const char env_setting_err_str[];\nextern const char socket_err_str[];\nextern const char data_pipe_err_str[];\n// extern const char msync_err_str[];\nextern const char write_err_str[];\nextern const char pipe_filename_err_str[];\nextern const char afl_attached_str[];\nextern const char status_str[];\nextern const char setpgid_err_str[];\n#endif\n\nextern const char magic_string[];\nextern const char afl_shm_env[];\n\nasm(\".globl _entry\\n\"\n    \".type _entry,@function\\n\"\n    \"_entry:\\n\"\n\n    // (1) push all registers\n    \"\\tpushq %r15;\\n\"\n    \"\\tpushq %r14;\\n\"\n    \"\\tpushq %r13;\\n\"\n    \"\\tpushq %r12;\\n\"\n    \"\\tpushq %r11;\\n\"\n    \"\\tpushq %r10;\\n\"\n    \"\\tpushq %r9;\\n\"\n    \"\\tpushq %r8;\\n\"\n    \"\\tpushq %rcx;\\n\"\n    \"\\tpushq %rdx;\\n\"\n    \"\\tpushq %rsi;\\n\"\n    \"\\tpushq %rdi;\\n\"\n\n    // (2) make rsp 16-bytes alignment\n    \"\\tmovq %rsp, %rbp;\\n\"\n    \"\\torq $8, %rsp;\\n\"\n    \"\\tpushq %rbp;\\n\"\n\n    // (3) get envp into %rdi\n    \"\\tlea __etext(%rip), %rdi;\\n\"\n    \"\\taddq $4, %rdi;\\n\"\n    \"\\tshrq $3, %rdi;\\n\"\n    \"\\tincq %rdi;\\n\"\n    \"\\tshlq $3, %rdi;\\n\"      // cur_addr in __binary_setup_fork_server step (3)\n                              // binary.c\n    \"\\tmovq (%rdi), %rsi;\\n\"  // whether the fork server is at the entrypoint or\n                              // not\n    \"\\ttest %rsi, %rsi;\\n\"\n    \"\\tje _envp_done;\\n\"\n    \"\\taddq $96, %rbp;\\n\"\n    \"\\tmovq (%rbp), %rdx;\\n\"  // argc\n    \"\\taddq $2, %rdx;\\n\"\n    \"\\tshlq $3, %rdx;\\n\"\n    \"\\taddq %rbp, %rdx;\\n\"  // envp\n\n    \".globl _envp_done\\n\"\n    \"_envp_done:\\n\"\n    \"\\tmovq %rdx, %rdi;\\n\"\n\n    // (4) call fork_server_start()\n    \"\\tcallq fork_server_start;\\n\"\n\n    // (5) restore context\n    \"\\tpopq %rsp;\\n\"\n    \"\\tpopq %rdi;\\n\"\n    \"\\tpopq %rsi;\\n\"\n    \"\\tpopq %rdx;\\n\"\n    \"\\tpopq %rcx;\\n\"\n    \"\\tpopq %r8;\\n\"\n    \"\\tpopq %r9;\\n\"\n    \"\\tpopq %r10;\\n\"\n    \"\\tpopq %r11;\\n\"\n    \"\\tpopq %r12;\\n\"\n    \"\\tpopq %r13;\\n\"\n    \"\\tpopq %r14;\\n\"\n    \"\\tpopq %r15;\\n\"\n\n    // (6) jump to following code\n    \"\\tjmp __etext;\\n\"\n\n#ifdef DEBUG\n    // no_daemon_str\n    ASM_STRING(no_daemon_str, \"fork server: no daemon found, switch to dry run\")\n    // getenv_err_str\n    ASM_STRING(getenv_err_str, \"fork server: environments not found\")\n    // afl_shmat_err_str\n    ASM_STRING(afl_shmat_err_str, \"fork server: shmat error (AFL)\")\n    // crs_shmat_err_str\n    ASM_STRING(crs_shmat_err_str, \"fork server: shmat error (CRS)\")\n    // hello_err_str\n    ASM_STRING(hello_err_str, \"fork server: handshake error\")\n    // write_err_str\n    ASM_STRING(write_err_str, \"fork server: write error\")\n    // read_err_str\n    ASM_STRING(read_err_str, \"fork server: read error\")\n    // fork_err_str\n    ASM_STRING(fork_err_str, \"fork server: fork error\")\n    // wait4_err_str\n    ASM_STRING(wait4_err_str, \"fork server: wait4 error\")\n    // mumap_err_str\n    ASM_STRING(mumap_err_str, \"fork server: mumap error\")\n    // mprotect_err_str\n    ASM_STRING(mprotect_err_str, \"fork server: mprotect error\")\n    // pipe_err_str\n    ASM_STRING(pipe_err_str, \"fork server: pipe error\")\n    // socket_err_str\n    ASM_STRING(socket_err_str, \"fork server: socket error\")\n    // data_pipe_err_str\n    ASM_STRING(data_pipe_err_str, \"fork server: data pipe connection error\")\n    // // msync_err_str\n    // ASM_STRING(msync_err_str, \"fork server: msync error\")\n    // dup2_err_str\n    ASM_STRING(dup2_err_str, \"fork server: dup2 error\")\n    // pipe_filename_err_str\n    ASM_STRING(pipe_filename_err_str, \"fork server: pipe filename too long\")\n    // env_setting_err_str\n    ASM_STRING(env_setting_err_str,\n               \"fork server: fuzzing without daemon running\")\n    // afl_attached_str\n    ASM_STRING(afl_attached_str, \"fork server: AFL detected\")\n    // status_str\n    ASM_STRING(status_str, \"fork server: client status: \")\n    // setpgid_err_str\n    ASM_STRING(setpgid_err_str, \"fork server: setpgid error\")\n#endif\n\n    // Magic String to indicate instrumented\n    ASM_STRING(magic_string, MAGIC_STRING)\n    // AFL's shm environment variable\n    ASM_STRING(afl_shm_env, AFL_SHM_ENV));\n\n/*\n * Atoi without any safe check\n */\nstatic inline int fork_server_atoi(char *s) {\n    int val = 0;\n    bool is_neg = false;\n\n    if (*s == '-') {\n        s++;\n        is_neg = true;\n    }\n\n    while (*s)\n        val = val * 10 + (*(s++) - '0');\n\n    if (is_neg) {\n        val = -val;\n    }\n\n    return val;\n}\n\n/*\n * Get shm_id from environment.\n */\nstatic inline int fork_server_get_shm_id(char **envp) {\n    char *s;\n    while ((s = *(envp++))) {\n        // hand-written strcmp with \"__AFL_SHM_ID=\"\n        if (*(unsigned long *)s != 0x48535f4c46415f5f) {\n            continue;\n        }\n        if (*(unsigned int *)(s + 8) != 0x44495f4d) {\n            continue;\n        }\n        if (*(s + 12) != '=') {\n            continue;\n        }\n\n        return fork_server_atoi(s + 13);\n    }\n\n    utils_puts(getenv_err_str, true);\n    return INVALID_SHM_ID;\n}\n\n/*\n * Connect to the pipeline\n */\nstatic inline int fork_server_connect_pipe() {\n    // step (1). create sock_fd\n    int sock_fd = sys_socket(AF_UNIX, SOCK_STREAM, 0);\n    if (sock_fd < 0) {\n        utils_error(socket_err_str, true);\n    }\n\n    //  step (2). construct sockaddr\n    struct sockaddr_un server = {0};\n    server.sun_family = AF_UNIX;\n#ifdef DEBUG\n    size_t n_ = utils_strcpy(server.sun_path, RW_PAGE_INFO(pipe_path));\n    if (n_ >= sizeof(server.sun_path)) {\n        utils_error(pipe_filename_err_str, true);\n    }\n#else\n    utils_strcpy(server.sun_path, RW_PAGE_INFO(pipe_path));\n#endif\n\n    //  step (3). connect to daemon\n    if (sys_connect(sock_fd, (struct sockaddr *)&server,\n                    sizeof(struct sockaddr_un)) < 0) {\n        // daemon is not setup, direct return (dry_run)\n        sys_close(sock_fd);\n        return -1;\n    }\n\n    return sock_fd;\n}\n\n/*\n * Start fork server and do random patch.\n */\nNO_INLINE void fork_server_start(char **envp) {\n    /*\n     * step (1). setup comm connection\n     */\n    // step (1.1). connect socket for comm_fd\n    int comm_fd = fork_server_connect_pipe();\n    if (comm_fd < 0) {\n        // make sure AFL is not attached\n        if (fork_server_get_shm_id(envp) != INVALID_SHM_ID) {\n            utils_error(env_setting_err_str, true);\n        }\n        utils_puts(no_daemon_str, true);\n        RW_PAGE_INFO(daemon_attached) = false;\n        return;\n    } else {\n        RW_PAGE_INFO(daemon_attached) = true;\n    }\n\n    // step (1.2). dup2 comm_fd to CRS_COMM_FD\n    {\n        if (sys_dup2(comm_fd, CRS_COMM_FD) < 0) {\n            utils_error(dup2_err_str, true);\n        }\n        sys_close(comm_fd);\n    }\n\n    /*\n     * step (2). check whether AFL is attached\n     */\n    int afl_shm_id = fork_server_get_shm_id(envp);\n    bool afl_attached = (afl_shm_id != INVALID_SHM_ID);\n    if (afl_attached) {\n        utils_puts(afl_attached_str, true);\n    }\n\n    /*\n     * step (3). read crs_shm_id/check_execs from daemon and respond\n     * afl_attached/afl_shm_id (comm shakehand)\n     */\n    // XXX: CRS may be uncessary once we use shared memory for .text section\n    int crs_shm_id = INVALID_SHM_ID;\n    uint32_t check_execs = 0;\n    {\n        if (sys_read(CRS_COMM_FD, (char *)&crs_shm_id, 4) != 4) {\n            utils_error(hello_err_str, true);\n        }\n\n        int __tmp_data = afl_attached;\n        if (sys_write(CRS_COMM_FD, (char *)&__tmp_data, 4) != 4) {\n            utils_error(hello_err_str, true);\n        }\n\n        __tmp_data = afl_shm_id;\n        if (sys_write(CRS_COMM_FD, (char *)&__tmp_data, 4) != 4) {\n            utils_error(hello_err_str, true);\n        }\n\n        if (sys_read(CRS_COMM_FD, (char *)&check_execs, 4) != 4) {\n            utils_error(hello_err_str, true);\n        }\n    }\n\n    /*\n     * step (4). mmap CRS_SHARED_MEMORY\n     */\n    // ummap the fake CRS map\n    if (sys_munmap(CRS_MAP_ADDR, CRS_MAP_SIZE) != 0) {\n        utils_error(mumap_err_str, true);\n    }\n    if ((size_t)sys_shmat(crs_shm_id, (const void *)CRS_MAP_ADDR, SHM_RND) !=\n        CRS_MAP_ADDR) {\n        utils_error(crs_shmat_err_str, true);\n    }\n\n    /*\n     * step (5) [if: AFL_ATTACHED].\n     *      munmap the fake AFL_SHARED_MEMORY and mmap the real one\n     */\n    if (afl_attached) {\n        if (sys_munmap(AFL_MAP_ADDR, AFL_MAP_SIZE) != 0) {\n            utils_error(mumap_err_str, true);\n        }\n        if ((size_t)sys_shmat(afl_shm_id, (const void *)AFL_MAP_ADDR,\n                              SHM_RND) != AFL_MAP_ADDR) {\n            utils_error(afl_shmat_err_str, true);\n        }\n    }\n\n    /*\n     * step (6). [if: AFL_ATTACHED]\n     *      send 4-byte \"hello\" message to AFL\n     */\n    {\n        int __tmp_data = 0x19961219;\n        if (afl_attached) {\n            if (sys_write(AFL_FORKSRV_FD + 1, (char *)&__tmp_data, 4) != 4) {\n                utils_error(hello_err_str, true);\n            }\n        }\n    }\n\n    /*\n     * step (7). main while-loop\n     */\n    CRSLoopType crs_loop = CRS_LOOP_NONE;\n    uint32_t cur_execs = 0;\n    while (true) {\n        // step (7.1). [if: AFL_ATTACHED && !CRS_LOOP]\n        //      wait AFL's signal\n        if (afl_attached && !crs_loop) {\n            int __tmp_data;\n            if (sys_read(AFL_FORKSRV_FD, (char *)&__tmp_data, 4) != 4) {\n                utils_error(read_err_str, true);\n            }\n        }\n\n        // step (7.2). do fork\n        // XXX: just before the fork, we need to clean CRS\n        CRS_INFO(lock) = 0;\n        CRS_INFO(crash_ip) = CRS_INVALID_IP;\n        CRS_INFO(self_fired) = 0;\n\n        pid_t tid = 0;\n        pid_t client_pid =\n            sys_clone(CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID | SIGCHLD, 0,\n                      NULL, &tid, NULL);\n        if (client_pid < 0) {\n            utils_error(fork_err_str, true);\n        }\n\n        if (client_pid == 0) {\n            /*\n             * child process\n             */\n\n            /*\n             * Update at Nov 2021:\n             *  For binaries compiled with ASAN, it seems there always are some\n             * new processes (?). Hence, it would be better to isolate the\n             * client into a new process group.\n             */\n            // set pgid, to avoid kill fork_server when sending signal\n            // or we can setsid to directly isolate the process\n            if (sys_setpgid(0, 0)) {\n                utils_error(setpgid_err_str, true);\n            }\n\n            /*\n             * XXX: To handle multi-thread/-process programs, a safe approach is\n             * to change client's process group, and every time a potential\n             * patch crash happens, the signal hander kills all processes in the\n             * client's process group. The following code can be used to\n             * implement this approach:\n             *\n             *   ------\n             *      // set pgid, to avoid kill fork_server when sending signal\n             *      // or we can setsid to directly isolate the process\n             *      if (sys_setpgid(0, 0)) {\n             *          utils_error(setpgid_err_str, true);\n             *      }\n             *   ------\n             *\n             * However, the disadvantage of this approach is that, every time\n             * the fork server creates a new client, the *setpgid* syscall will\n             * bring additional overhead (seems negligible tbh).\n             *\n             * Alternatively, we can use following code in the signal handler to\n             * kill client and the crashed process:\n             *\n             *   ------\n             *      sys_kill(client_pid, SIGKILL);\n             *      sys_kill(sys_getpid(), SIGKILL);\n             *   ------\n             *\n             * Instead of killing the whole process group like following\n             *\n             *   ------\n             *      sys_kill(0, SIGKILL);\n             *   ------\n             *\n             * It is helpful when facing multi-thread/-process programs.\n             * Additionally, it is also good to know that a child process can\n             * send signal to its parent process (as if they share the same user\n             * ID or effective user ID). But it may also leave some other\n             * processes zombie (e.g., the parent process creates two child\n             * processes).\n             *\n             * However, a good obversation is that, vanilla AFL can also have\n             * such problem. Imagine that a multi-process program has a crashed\n             * parent process, AFL will not take care of the client processes\n             * anymore and leave them zombie.\n             *\n             * Hence, we choose the latter approach to reduce overhead.\n             */\n            /*\n             * XXX: actually, I do not know why AFL does not setpgid/setsid when\n             * forking new processes. If the target program invoked kill(0,\n             * SIGXXX), the fork server would be killed too, imo.\n             */\n            RW_PAGE_INFO(client_pid) = tid;\n\n            // update pid and tid in TLS, so that when the child process sends\n            // signal to itself, it will not mis-send to its parent.\n            //\n            // check glibc source code for more information:\n            //\n            // https://code.woboq.org/userspace/glibc/sysdeps/nptl/fork.c.html#76\n            // for how glibc implements fork() as a wrapper of syscall clone;\n            //\n            // https://code.woboq.org/userspace/glibc/nptl/descr.h.html#pthread\n            // for the memory layout of struct pthread in glibc.\n            register unsigned int tid_ asm(\"eax\") = (unsigned int)tid;\n            asm(\".intel_syntax noprefix\\n\"\n                \"  mov DWORD PTR fs:0x2d0, eax;\\n\"\n                \"  mov DWORD PTR fs:0x2d4, eax;\\n\"\n                :\n                : \"r\"(tid_)\n                :);\n\n            // close uncessary file descriptors\n            sys_close(AFL_FORKSRV_FD);\n            sys_close(AFL_FORKSRV_FD + 1);\n            sys_close(CRS_COMM_FD);\n\n            RW_PAGE_INFO(afl_prev_id) = 0;\n            break;\n        }\n\n        // step (7.3). [if: AFL_ATTACHED && !CRS_LOOP]\n        //      tell AFL that the client is started\n        if (afl_attached && !crs_loop) {\n            sys_write(AFL_FORKSRV_FD + 1, (char *)&client_pid, 4);\n        }\n\n        // step (7.4). notify the daemon about the client_pid if crs_loop\n        if (crs_loop) {\n            sys_write(CRS_COMM_FD, (char *)&client_pid, 4);\n        }\n\n        // step (7.5). wait till the client stop\n        int client_status = 0;\n        if (sys_wait4(client_pid, &client_status, 0, NULL) < 0) {\n            utils_error(wait4_err_str, true);\n        }\n        // update client_status\n        int self_fired = CRS_INFO(self_fired);\n        client_status = PACK_STATUS(client_status, self_fired);\n#ifdef DEBUG\n        utils_puts(status_str, false);\n        utils_output_number(client_status);\n#endif\n\n        // step (7.6). notify the daemon that the crs run is done\n        if (crs_loop) {\n            sys_write(CRS_COMM_FD, (char *)&client_pid, 4);\n        }\n\n        // step (7.7). check the client's status\n        // XXX: after going into the ABNORMAL_STATUS branch, the program is\n        // either crashed by a patch (which will lead to a crs_loop) or a\n        // subject bug.\n        // XXX: a new situation is that the program is under delta debugging.\n        if (IS_ABNORMAL_STATUS(client_status) || crs_loop == CRS_LOOP_DEBUG) {\n        TALK_TO_DAEMON:;\n            // step (7.7.1). notify the daemon and wait response\n            //      + sending out the status\n            //      + receiving the status of crash site (CRS)\n            int crs_status = CRS_STATUS_CRASH;\n            {\n                sys_write(CRS_COMM_FD, (char *)&client_status, 4);\n                sys_read(CRS_COMM_FD, (char *)&crs_status, 4);\n            }\n\n            // step (7.7.2). if there is a crash and it is not caused by a\n            // latent bug\n            if (crs_status != CRS_STATUS_CRASH &&\n                crs_status != CRS_STATUS_NORMAL) {\n                // check remmap\n                if (crs_status == CRS_STATUS_REMMAP) {\n                    // munmap current shadow file (due to the different size)\n                    if (sys_munmap(RW_PAGE_INFO(shadow_base),\n                                   RW_PAGE_INFO(shadow_size))) {\n                        utils_error(mumap_err_str, true);\n                    }\n                    // remmap it\n                    RW_PAGE_INFO(shadow_size) = utils_mmap_external_file(\n                        RW_PAGE_INFO(shadow_path), false,\n                        RW_PAGE_INFO(shadow_base), PROT_READ | PROT_EXEC);\n\n                    if (RW_PAGE_INFO(retaddr_mapping_used)) {\n                        // munmap current retaddr mapping\n                        if (sys_munmap(RW_PAGE_INFO(retaddr_mapping_base),\n                                       RW_PAGE_INFO(retaddr_mapping_size))) {\n                            utils_error(mumap_err_str, true);\n                        }\n                        // remmap it\n                        RW_PAGE_INFO(retaddr_mapping_size) =\n                            utils_mmap_external_file(\n                                RW_PAGE_INFO(retaddr_mapping_path), false,\n                                RW_PAGE_INFO(retaddr_mapping_base), PROT_READ);\n                    }\n                }\n\n                // check delta debugging mode\n                if (crs_status == CRS_STATUS_DEBUG) {\n                    // the next loop is forced to communicate with the daemon\n                    crs_loop = CRS_LOOP_DEBUG;\n                } else {\n                    // we are going into the CRS loop which is out of AFL's\n                    // control\n                    crs_loop = CRS_LOOP_INCR;\n                }\n\n                // clear shared memory\n                {\n                    register uintptr_t dst asm(\"rdi\") = (uintptr_t)AFL_MAP_ADDR;\n                    register uintptr_t n asm(\"rcx\") = (uintptr_t)AFL_MAP_SIZE;\n#ifdef AVX512\n                    // (AVX512F version)\n                    asm volatile(\n                        \".intel_syntax noprefix\\n\"\n                        \"  xor rax, rax;\\n\"\n                        \"  vpbroadcastd zmm16, eax;\\n\"\n                        \"  lea rax, [rdi + rcx];\\n\"\n                        \"  sub rdi, rax;\\n\"\n                        \"loop:\\n\"\n                        \"  vmovdqa64 [rax + rdi], zmm16;\\n\"\n                        \"  add rdi, 0x40;\\n\"\n                        \"  jnz loop;\\n\"\n                        :\n                        : \"r\"(dst), \"r\"(n)\n                        : \"rax\", \"zmm16\");\n#else\n                    // (SSE version)\n                    asm volatile(\n                        \".intel_syntax noprefix\\n\"\n                        \"  xorps xmm0, xmm0;\\n\"\n                        \"  lea rax, [rdi + rcx];\\n\"\n                        \"  sub rdi, rax;\\n\"\n                        \"loop:\\n\"\n                        \"  movdqa [rax + rdi], xmm0;\\n\"\n                        \"  add rdi, 0x10;\\n\"\n                        \"  jnz loop;\\n\"\n                        :\n                        : \"r\"(dst), \"r\"(n)\n                        : \"rax\", \"xmm0\");\n#endif\n                }\n\n                // go into CRS loop\n                continue;\n            }\n\n            // If the program has reached this part, it indicates a real\n            // crash has occured. Here, we need to reset client_status as\n            // any suspect status, here we choose SIGKILL\n            if (IS_SUSPECT_STATUS(client_status)) {\n                // XXX: please MAKE SURE **SIGKILL** is used, otherwise it is\n                // possible to meet dead lock in the signal handler\n                client_status = SIGKILL;\n            }\n        } else if (check_execs) {\n            // handle checking runs when current execution is normal\n            if (cur_execs++ == check_execs) {\n                cur_execs = 0;\n                goto TALK_TO_DAEMON;\n            }\n        }\n\n        // step (7.8). handle any other situation which is not caused by\n        // patching\n        //      [if: AFL_ATTCHED]: notify AFL and loop\n        //      [if: !AFL_ATTACHED]: exit as normal or kill self with the same\n        //      signal\n        crs_loop = CRS_LOOP_NONE;\n        if (afl_attached) {\n            sys_write(AFL_FORKSRV_FD + 1, (char *)&client_status, 4);\n        } else {\n            // notify the daemon is exited normally\n            sys_write(CRS_COMM_FD, (char *)&client_status, 4);\n            // XXX: in case of any hooked signal\n            if (WIFEXITED(client_status)) {\n                sys_exit(WEXITSTATUS(client_status));\n            } else if (WIFSIGNALED(client_status)) {\n                // XXX: if the daemon already identified this crash, it will\n                // stop automatically\n                // XXX: we are using SIGKILL which cannot be caught by any\n                // signal handler\n                sys_kill(0, WTERMSIG(client_status));\n            } else {\n                sys_kill(0, WSTOPSIG(client_status));\n            }\n        }\n    }\n\n    return;\n}\n"
  },
  {
    "path": "src/fork_server.h",
    "content": "/*\n * fork_server.h\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n#ifndef __FORK_SERVER_H\n#define __FORK_SERVER_H\n\n#include \"afl_config.h\"\n#include \"crs_config.h\"\n#include \"loader.h\"\n\ntypedef enum crs_loop_type {\n    CRS_LOOP_NONE = 0,  // not a crs loop\n    CRS_LOOP_INCR,      // crs loop caused by incremental rewriting\n    CRS_LOOP_DEBUG,     // crs loop caused by delta debugging\n} CRSLoopType;\n\n#endif\n"
  },
  {
    "path": "src/frontend.c",
    "content": "/*\n * frontend.c\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n/*\n * Frontend for OURTOOL\n */\n\n#include \"afl_config.h\"\n#include \"libstochfuzz.h\"\n\n#include <assert.h>\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include <sys/ipc.h>\n#include <sys/shm.h>\n#include <unistd.h>\n\n/*\n * Display usage hints.\n */\nstatic void usage(const char *argv0, int ret_status);\n\n/*\n * Parse arguments\n */\nstatic inline int parse_args(int argc, const char **argv);\n\n/*\n * Handle different modes\n */\nstatic inline void mode_disasm(int argc, const char **argv);\n\nstatic inline void mode_patch(int argc, const char **argv);\n\nstatic inline void mode_view(int argc, const char **argv);\n\nstatic inline void mode_run(int argc, const char **argv);\n\nstatic inline void mode_start(int argc, const char **argv);\n\nstatic void usage(const char *argv0, int ret_status) {\n    z_sayf(\n        \"\\n%s [ options ] -- target_binary [ ... ] \\n\\n\"\n\n        \"Mode settings:\\n\\n\"\n\n        \"  -S            - start a background daemon and wait for a fuzzer to \"\n        \"attach (defualt mode)\\n\"\n        \"  -R            - dry run target_binary with given arguments without \"\n        \"an attached fuzzer\\n\"\n        \"  -P            - patch target_binary without incremental rewriting\\n\"\n        \"  -D            - probabilistic disassembly without rewriting\\n\"\n        \"  -V            - show currently observed breakpoints\\n\\n\"\n\n        \"Rewriting settings:\\n\\n\"\n\n        \"  -g            - trace previous PC\\n\"\n        \"  -c            - count the number of basic blocks with conflicting \"\n        \"hash values\\n\"\n        \"  -d            - disable instrumentation optimization\\n\"\n        \"  -r            - assume the return addresses are only used by RET \"\n        \"instructions\\n\"\n        \"  -e            - install the fork server at the entrypoint instead \"\n        \"of the main function\\n\"\n        \"  -f            - forcedly assume there is data interleaving with \"\n        \"code\\n\"\n        \"  -i            - ignore the call-fallthrough edges to defense \"\n        \"RET-misusing obfuscation\\n\\n\"\n\n        \"Other stuff:\\n\\n\"\n\n        \"  -h            - print this help\\n\"\n        \"  -x execs      - set the number of executions after which a checking \"\n        \"run will be triggered\\n\"\n        \"                  set it as zero to disable checking runs \"\n        \"(default: %u)\\n\"\n        \"  -t msec       - set the timeout for each daemon-triggering \"\n        \"execution\\n\"\n        \"                  set it as zero to ignore the timeout \"\n        \"(default: %lu ms)\\n\"\n#ifdef DEBUG\n        \"  -l level      - set the log level, including TRACE, DEBUG, INFO, \"\n        \"WARN, ERROR, and FATAL (default: INFO)\\n\\n\",\n#else\n        \"  -l level      - set the log level, including INFO, WARN, ERROR, and \"\n        \"FATAL (default: INFO)\\n\\n\",\n#endif\n\n        argv0, SYS_CHECK_EXECS, SYS_TIMEOUT);\n\n    exit(ret_status);\n}\n\nstatic int parse_args(int argc, const char **argv) {\n    z_sayf(COLOR(CYAN, OURTOOL) \" \" COLOR(\n        BRIGHT, VERSION) \" by <zhan3299@purdue.edu>\\n\");\n\n    bool timeout_given = false;\n    bool log_level_given = false;\n    bool check_execs_given = false;\n\n    int opt = 0;\n    while ((opt = getopt(argc, (char *const *)argv, \"+SRPDVgceidrfnht:l:x:\")) >\n           0) {\n        switch (opt) {\n#define __MODE_CASE(c, m)                                   \\\n    case c:                                                 \\\n        if (sys_optargs.mode != SYSMODE_NONE) {             \\\n            EXITME(\"multiple mode settings not supported\"); \\\n        }                                                   \\\n        sys_optargs.mode = SYSMODE_##m;                     \\\n        break;\n            __MODE_CASE('S', DAEMON);\n            __MODE_CASE('R', RUN);\n            __MODE_CASE('P', PATCH);\n            __MODE_CASE('D', DISASM);\n            __MODE_CASE('V', VIEW);\n#undef __MODE_CASE\n\n#define __SETTING_CASE(c, m)    \\\n    case c:                     \\\n        sys_optargs.r.m = true; \\\n        break;\n            __SETTING_CASE('g', trace_pc);\n            __SETTING_CASE('c', count_conflict);\n            __SETTING_CASE('d', disable_opt);\n            __SETTING_CASE('r', safe_ret);\n            __SETTING_CASE('e', instrument_early);\n            __SETTING_CASE('f', force_pdisasm);\n            __SETTING_CASE('i', disable_callthrough);\n            // This is a secret undocumented option! It is mainly used for\n            // Github Actions which has memory limitation. Forcely using linear\n            // disassembly (which means not doing pre-disassembly and patching\n            // all .text) makes smaller memory usage.\n            __SETTING_CASE('n', force_linear);\n#undef __SETTING_CASE\n\n#define __LOG_LEVEL_STRCASECMP(l, s)         \\\n    do {                                     \\\n        if (!strcasecmp(#l, s)) {            \\\n            sys_optargs.log_level = LOG_##l; \\\n            goto DONE;                       \\\n        }                                    \\\n    } while (0)\n            case 'l':\n                if (log_level_given) {\n                    EXITME(\"multiple -l options not supported\");\n                }\n                log_level_given = true;\n                __LOG_LEVEL_STRCASECMP(TRACE, optarg);\n                __LOG_LEVEL_STRCASECMP(DEBUG, optarg);\n                __LOG_LEVEL_STRCASECMP(INFO, optarg);\n                __LOG_LEVEL_STRCASECMP(WARN, optarg);\n                __LOG_LEVEL_STRCASECMP(ERROR, optarg);\n                __LOG_LEVEL_STRCASECMP(FATAL, optarg);\n                z_warn(\"invalid log level: \\\"%s\\\"\", optarg);\n            DONE:\n                break;\n#undef __LOG_LEVEL_STRCASECMP\n\n            case 't':\n                if (timeout_given) {\n                    EXITME(\"multiple -t options not supported\");\n                }\n                timeout_given = true;\n                if (z_sscanf(optarg, \"%lu\", &sys_optargs.timeout) < 1) {\n                    EXITME(\"bad syntax used for -t\");\n                }\n                break;\n\n            case 'x':\n                if (check_execs_given) {\n                    EXITME(\"multiple -x options not supported\");\n                }\n                check_execs_given = true;\n                if (z_sscanf(optarg, \"%u\", &sys_optargs.check_execs) < 1) {\n                    EXITME(\"bad syntax used for -x\");\n                }\n                if (sys_optargs.check_execs < 500) {\n                    z_warn(\n                        \"frequent checking runs will significatly impact the \"\n                        \"fuzzing efficiency\");\n                }\n                break;\n\n            case 'h':\n                usage(argv[0], 0);\n                break;\n\n            default:\n                usage(argv[0], 1);\n        }\n    }\n\n    // Validating arguments\n\n    if (argc == optind) {\n        usage(argv[0], 1);\n    }\n\n    if (sys_optargs.mode == SYSMODE_NONE) {\n        sys_optargs.mode = SYSMODE_DAEMON;\n    }\n\n    if (sys_optargs.mode == SYSMODE_DISASM) {\n        // Under disasm mode, we forcely use probabilistic disassembly\n        sys_optargs.r.force_pdisasm = true;\n        sys_optargs.r.force_linear = false;\n    }\n\n    if (sys_optargs.r.force_pdisasm && sys_optargs.r.force_linear) {\n        EXITME(\"-f and -n cannot be set together\");\n    }\n\n    if (sys_optargs.r.instrument_early) {\n        z_warn(\n            \"-e option is experimental, it may cause invalid crashes on a \"\n            \"different system other than Ubuntu 18.04\");\n    }\n\n    return optind;\n}\n\nint main(int argc, const char **argv) {\n    assert(PAGE_SIZE == 0x1000);\n    assert(PAGE_SIZE_POW2 == 12);\n\n    int next_idx = parse_args(argc, argv);\n    argc -= next_idx;\n    argv += next_idx;\n\n    z_log_set_level(sys_optargs.log_level);\n    Z_INIT;\n\n    switch (sys_optargs.mode) {\n        case SYSMODE_DAEMON:\n            mode_start(argc, argv);\n            break;\n\n        case SYSMODE_RUN:\n            mode_run(argc, argv);\n            break;\n\n        case SYSMODE_PATCH:\n            mode_patch(argc, argv);\n            break;\n\n        case SYSMODE_DISASM:\n            mode_disasm(argc, argv);\n            break;\n\n        case SYSMODE_VIEW:\n            mode_view(argc, argv);\n            break;\n\n        default:\n            EXITME(\"unreachable\");\n    }\n\n    Z_FINI;\n\n    return 0;\n}\n\nstatic inline void mode_patch(int argc, const char **argv) {\n    const char *target = argv[0];\n    z_info(\"target binary: %s\", target);\n\n    Core *core = z_core_create(target, &sys_optargs);\n    z_core_activate(core);\n    z_core_destroy(core);\n}\n\nstatic inline void mode_disasm(int argc, const char **argv) {\n    const char *target = argv[0];\n    z_info(\"target binary: %s\", target);\n\n    Core *core = z_core_create(target, &sys_optargs);\n\n    z_diagnoser_apply_logged_crashpoints(core->diagnoser);\n    z_patcher_describe(core->patcher);\n\n    z_core_destroy(core);\n}\n\nstatic inline void mode_view(int argc, const char **argv) {\n    const char *target = argv[0];\n    z_info(\"target binary: %s\", target);\n\n    Core *core = z_core_create(target, &sys_optargs);\n    GQueue *cps = z_diagnoser_get_crashpoints(core->diagnoser);\n\n    GList *l = cps->head;\n\n    z_sayf(\"%-20s%-10s%-6s\\n\", \"Address\", \"CPType\", \"Real?\");\n    while (l != NULL) {\n        addr_t addr = (addr_t)l->data;\n\n        l = l->next;\n        CPType type = (CPType)l->data;\n\n        l = l->next;\n        bool is_real = !!(l->data);\n\n        z_sayf(\"%-#20lx%-10s%-6s\\n\", addr, z_cptype_string(type),\n               (is_real ? \"True\" : \"False\"));\n\n        l = l->next;\n    }\n\n    z_core_destroy(core);\n}\n\nstatic inline void mode_run(int argc, const char **argv) {\n    const char *target = argv[0];\n    z_info(\"target binary: %s\", target);\n\n    Core *core = z_core_create(target, &sys_optargs);\n    z_core_activate(core);\n    int status = z_core_perform_dry_run(core, argc, argv);\n    z_core_destroy(core);\n\n    if (IS_ABNORMAL_STATUS(status)) {\n        z_info(COLOR(RED, \"not a normal exit (status: %#x)\"), status);\n    }\n\n    // follow how the client is terminated\n    if (WIFEXITED(status)) {\n        exit(WEXITSTATUS(status));\n    } else if (WIFSIGNALED(status)) {\n        kill(getpid(), WTERMSIG(status));\n    } else {\n        kill(getpid(), WSTOPSIG(status));\n    }\n}\n\nstatic inline void mode_start(int argc, const char **argv) {\n#ifdef BINARY_SEARCH_INVALID_CRASH\n    EXITME(\n        \"daemon mode is not supported when doing binary search for invalid \"\n        \"crash\");\n#else\n    const char *target = argv[0];\n    z_info(\"target binary: %s\", target);\n    Core *core = z_core_create(target, &sys_optargs);\n    z_core_activate(core);\n    z_core_start_daemon(core, INVALID_FD);\n    z_core_destroy(core);\n#endif\n}\n"
  },
  {
    "path": "src/get_signal_stack_size.sh",
    "content": "#!/bin/bash\n\necho \"\n#include <signal.h>\n#include <stdio.h>\n\nint main(int argc, char **argv) {\n        int sz = SIGSTKSZ;\n        if (sz < MINSIGSTKSZ) {\n                sz = MINSIGSTKSZ;\n        }\n        printf(\\\"%#x\\n\\\", sz * 2);\n}\" > /tmp/__sigstksz.c\n\nclang /tmp/__sigstksz.c -o /tmp/__sigstksz\n/tmp/__sigstksz\n"
  },
  {
    "path": "src/interval_splay.c",
    "content": "/*\n * interval_splay.c\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n#include \"interval_splay.h\"\n#include \"utils.h\"\n\n/*\n * Print contents of a tree, indented by depth.\n */\nZ_PRIVATE void __splay_print_indented(Snode *root, int depth);\n\n/*\n * Rotate child in given direction to root\n */\nZ_PRIVATE void __splay_rotate(Snode **root, int direction);\n\n/*\n * Link operations for top-down splay\n *\n * This pastes a node in as !d-most node in subtree on side d\n */\nZ_PRIVATE void __splay_link(Snode ***hook, int d, Snode *node);\n\n/*\n * Splay last element on path to target to root\n *\n * NOTE! Remember to link parent with splay-ed subtree\n */\nZ_PRIVATE Snode *__splay(Snode **root, addr_t target);\n\n/*\n * Return a list of Snode * in order, based on root.\n */\nZ_PRIVATE Buffer *__splay_sorted_list(Snode *root);\n\n/*\n * Setter and Getter\n */\nDEFINE_SETTER(Snode, snode, addr_t, addr);\nDEFINE_SETTER(Snode, snode, size_t, len);\nDEFINE_SETTER(Snode, snode, void *, data);\n\nDEFINE_GETTER(Snode, snode, size_t, len);\nDEFINE_GETTER(Snode, snode, void *, data);\nDEFINE_GETTER(Splay, splay, size_t, node_count);\n\n/*\n * Overloaded Setter and Getter\n */\nOVERLOAD_GETTER(Snode, snode, addr_t, lower_bound) { return snode->addr; }\n\nOVERLOAD_GETTER(Snode, snode, addr_t, upper_bound) {\n    return snode->addr + snode->len - 1;\n}\n\n/*\n * How far to indent each level of the tree.\n */\n#define SPLAY_INDENTATION_LEVEL 2\n\nZ_PRIVATE void __splay_print_indented(Snode *root, int depth) {\n    int i;\n\n    if (root != SPLAY_EMPTY) {\n        __splay_print_indented(root->child[SPLAY_LEFT], depth + 1);\n\n        for (i = 0; i < SPLAY_INDENTATION_LEVEL * depth; i++) {\n            putchar(' ');\n        }\n        z_sayf(\"[%ld, %ld](%p)\\n\", root->addr, root->addr + root->len - 1,\n               root->data);\n\n        __splay_print_indented(root->child[SPLAY_RIGHT], depth + 1);\n    }\n}\n\nZ_PRIVATE void __splay_rotate(Snode **root, int direction) {\n    Snode *x;\n    Snode *y;\n    Snode *b;\n\n    /*\n     *      y           x\n     *     / \\         / \\\n     *    x   C  <=>  A   y\n     *   / \\             / \\\n     *  A   B           B   C\n     */\n\n    y = *root;\n    assert(y);\n    x = y->child[direction];\n    assert(x);\n    b = x->child[!direction];\n\n    /* do the rotation */\n    *root = x;\n    x->child[!direction] = y;\n    y->child[direction] = b;\n}\n\nZ_PRIVATE void __splay_link(Snode ***hook, int d, Snode *node) {\n    *hook[d] = node;\n    // Strictly speaking we don't need to do this, but it allows printing the\n    // partial trees.\n    node->child[!d] = NULL;\n    hook[d] = &node->child[!d];\n}\n\nZ_PRIVATE Snode *__splay(Snode **root, addr_t target) {\n    Snode *t;\n    Snode *child;\n    Snode *grandchild;\n    Snode *top[SPLAY_NUM_CHILDREN];   /* accumulator trees that will become\n                                              subtrees of new root */\n    Snode **hook[SPLAY_NUM_CHILDREN]; /* where to link new elements into\n                                              accumulator trees */\n    int d;\n    int dChild;      /* direction of child */\n    int dGrandchild; /* direction of grandchild */\n\n    // we don't need to keep following this pointer, we'll just fix it at the\n    // end.\n    assert(root != NULL);\n    t = *root;\n\n    // Don't do anything to an empty tree.\n    if (t == SPLAY_EMPTY) {\n        return NULL;\n    }\n\n    // Ok, tree is not empty, start chopping it up.\n    for (d = 0; d < SPLAY_NUM_CHILDREN; d++) {\n        top[d] = NULL;\n        hook[d] = &top[d];\n    }\n\n    // Keep going until we hit the addr or we would hit a null pointer in the\n    // child.\n    while (t->addr != target &&\n           (child = t->child[dChild = t->addr < target]) != NULL) {\n        // Child is not null.\n        grandchild = child->child[dGrandchild = child->addr < target];\n\n        if (grandchild == NULL || child->addr == target) {\n            /* zig case; paste root into opposite-side hook */\n            __splay_link(hook, !dChild, t);\n            t = child;\n            /* we can break because we know we will hit child == NULL next */\n            break;\n        } else if (dChild == dGrandchild) {\n            /* zig-zig case */\n            /* rotate and then hook up child */\n            /* grandChild becomes new root */\n            __splay_rotate(&t, dChild);\n            __splay_link(hook, !dChild, child);\n            t = grandchild;\n        } else {\n            /* zig-zag case */\n            /* root goes to !dChild, child goes to dChild, grandchild goes to\n             * root */\n            __splay_link(hook, !dChild, t);\n            __splay_link(hook, dChild, child);\n            t = grandchild;\n        }\n    }\n\n    // Now reassemble the tree.\n    // t's children go in hooks, top nodes become t's new children.\n    for (d = 0; d < SPLAY_NUM_CHILDREN; d++) {\n        *hook[d] = t->child[d];\n        t->child[d] = top[d];\n    }\n\n    // And put t back in *root.\n    return (*root = t);\n}\n\nZ_PRIVATE Buffer *__splay_sorted_list(Snode *root) {\n    Buffer *list = NULL;\n\n    assert(root != NULL);\n    if (root->child[SPLAY_LEFT] != NULL)\n        list = __splay_sorted_list(root->child[SPLAY_LEFT]);\n    else\n        list = z_buffer_create(NULL, 0);\n\n    z_buffer_append_raw(list, (const uint8_t *)&root, sizeof(Snode *));\n\n    if (root->child[SPLAY_RIGHT] != NULL) {\n        Buffer *rlist = __splay_sorted_list(root->child[SPLAY_RIGHT]);\n        z_buffer_append(list, rlist);\n        z_buffer_destroy(rlist);\n    }\n\n    return list;\n}\n\nZ_API Snode *z_snode_create(addr_t addr, size_t len, void *data,\n                            void (*data_destroy)(void *)) {\n    assert(len > 0);\n    Snode *e = STRUCT_ALLOC(Snode);\n    e->addr = addr;\n    e->len = len;\n    e->data = data;\n    e->data_destroy = data_destroy;\n    return e;\n}\n\nZ_API void z_snode_destroy(Snode *node) {\n    if (node != NULL) {\n        if (node->data_destroy)\n            (*(node->data_destroy))(node->data);\n        z_free(node);\n    } else {\n        z_trace(\"try to delete a NULL node\");\n    }\n}\n\nZ_API Splay *z_splay_create(void *(*merge_fcn)(void *, void *)) {\n    Splay *t = STRUCT_ALLOC(Splay);\n    t->root = SPLAY_EMPTY;\n    t->node_count = 0;\n    t->merge_fcn = merge_fcn;\n    return t;\n}\n\nZ_API void z_splay_destroy(Splay *splay) {\n    // We want to avoid doing this recursively, because the tree might be deep.\n    // So we will repeatedly delete the root until the tree is empty.\n    while (splay->root) {\n        Snode *e = z_splay_delete(splay, splay->root->addr);\n        z_snode_destroy(e);\n    }\n    assert(splay->node_count == 0);\n    z_free(splay);\n}\n\nZ_API bool z_splay_interval_overlap(Splay *splay, Snode *node) {\n    Snode *t = NULL;\n\n    __splay(&(splay->root), node->addr);\n    SPLAY_ROOT(splay, t);\n\n    // If splay is empty, return false\n    if (t == SPLAY_EMPTY)\n        return false;\n\n    // If addr already exists, return true;\n    if (t->addr == node->addr)\n        return true;\n\n    if (t->addr < node->addr) {\n        Snode *e = t->child[SPLAY_RIGHT];\n        if (t->addr + t->len > node->addr)\n            return true;\n        if (e != NULL) {\n            // Try to find the smallest node in the right tree\n            t->child[SPLAY_RIGHT] = __splay(&e, 0);\n            if (node->addr + node->len > e->addr)\n                return true;\n        }\n    } else {\n        Snode *e = t->child[SPLAY_LEFT];\n        if (node->addr + node->len > t->addr)\n            return true;\n        if (e != NULL) {\n            // Try to find the biggest node in the left tree\n            t->child[SPLAY_LEFT] = __splay(&e, ADDR_MAX);\n            if (e->addr + e->len > node->addr)\n                return true;\n        }\n    }\n\n    return false;\n}\n\nZ_API Snode *z_splay_insert(Splay *splay, Snode *node) {\n    Snode *e;\n    Snode *t;\n    int d;  // Which side of e to put old root on\n\n    if (z_splay_interval_overlap(splay, node)) {\n        // Overlap\n        z_trace(\"node([%ld, %ld]) is overlapped with existed nodes\", node->addr,\n                node->addr + node->len - 1);\n        return NULL;\n    }\n\n    __splay(&(splay->root), node->addr);\n    SPLAY_ROOT(splay, t);\n\n    e = node;\n\n    if (t == NULL) {\n        e->child[SPLAY_LEFT] = e->child[SPLAY_RIGHT] = NULL;\n    } else {\n        // Split tree and put e on top.\n        // We know t is closest to e, so we don't have to move anything else.\n        d = t->addr > e->addr;\n        e->child[d] = t;\n        e->child[!d] = t->child[!d];\n        t->child[!d] = NULL;\n    }\n\n    // Either way we stuff e in *splay.\n    splay->root = e;\n    splay->node_count += 1;\n\n    // Check merge.\n    if (splay->merge_fcn) {\n        Snode *left = e->child[SPLAY_LEFT];\n        Snode *right = e->child[SPLAY_RIGHT];\n        if (left != NULL)\n            e->child[SPLAY_LEFT] = __splay(&left, ADDR_MAX);\n        if (right != NULL)\n            e->child[SPLAY_RIGHT] = __splay(&right, 0);\n\n        if ((left != NULL) && (left->addr + left->len == e->addr)) {\n            Snode *deleted = z_splay_delete(splay, left->addr);\n            assert(deleted == left);\n            e->addr = left->addr;\n            e->len += left->len;\n            e->data = (*(splay->merge_fcn))(left->data, e->data);\n            z_snode_destroy(deleted);\n        }\n\n        if ((right != NULL) && (e->addr + e->len == right->addr)) {\n            Snode *deleted = z_splay_delete(splay, right->addr);\n            assert(deleted == right);\n            e->len += right->len;\n            e->data = (*(splay->merge_fcn))(e->data, right->data);\n            z_snode_destroy(deleted);\n        }\n    }\n\n    return e;\n}\n\nZ_API Snode *z_splay_delete(Splay *splay, addr_t addr) {\n    Snode *left;\n    Snode *right;\n    Snode *deleted = NULL;\n\n    __splay(&(splay->root), addr);\n\n    if (splay->root && splay->root->addr == addr) {\n        // Save pointers to kids.\n        left = splay->root->child[SPLAY_LEFT];\n        right = splay->root->child[SPLAY_RIGHT];\n\n        deleted = splay->root;\n        splay->node_count -= 1;\n        assert(splay->node_count >= 0);\n\n        // If left is empty, just return right.\n        if (left == NULL) {\n            splay->root = right;\n        } else {\n            // First splay max element in left to top.\n            __splay(&left, ADDR_MAX);\n\n            // Now paste in right subtree.\n            left->child[SPLAY_RIGHT] = right;\n\n            // Return left\n            splay->root = left;\n        }\n    } else {\n        z_trace(\"node([%ld, ?]) does not exist\", addr);\n    }\n\n    return deleted;\n}\n\nZ_API Snode *z_splay_search(Splay *splay, addr_t addr) {\n    assert(splay != NULL);\n    Snode *t;\n\n    if (splay->root == NULL)\n        return NULL;\n\n    __splay(&(splay->root), addr);\n\n    SPLAY_ROOT(splay, t);\n    if (t->addr <= addr) {\n        if (z_snode_get_upper_bound(t) >= addr)\n            return t;\n        else\n            return NULL;\n    } else {\n        if (t->child[SPLAY_LEFT]) {\n            __splay(&(t->child[SPLAY_LEFT]), ADDR_MAX);\n            if (z_snode_get_lower_bound(t->child[SPLAY_LEFT]) <= addr &&\n                z_snode_get_upper_bound(t->child[SPLAY_LEFT]) >= addr)\n                return t->child[SPLAY_LEFT];\n            else\n                return NULL;\n        } else {\n            return NULL;\n        }\n    }\n}\n\nZ_API inline Snode *z_splay_max(Splay *splay) {\n    assert(splay != NULL);\n    return __splay(&(splay->root), ADDR_MAX);\n}\n\nZ_API inline Snode *z_splay_min(Splay *splay) {\n    assert(splay != NULL);\n    return __splay(&(splay->root), 0);\n}\n\nZ_API Buffer *z_splay_sorted_list(Splay *splay) {\n    if (splay->root)\n        return __splay_sorted_list(splay->root);\n    else\n        return NULL;\n}\n\nZ_API void z_splay_print(Splay *splay) {\n    Snode *t;\n    SPLAY_ROOT(splay, t);\n    z_sayf(\"number of current nodes: %ld\\n\", z_splay_get_node_count(splay));\n    __splay_print_indented(t, 0);\n}\n\nZ_API void *z_direct_merge(void *_x, void *_y) { return NULL; }\n"
  },
  {
    "path": "src/interval_splay.h",
    "content": "/*\n * interval_splay.h\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n#ifndef __INTERVAL_SPLAY_H\n#define __INTERVAL_SPLAY_H\n\n#include \"buffer.h\"\n#include \"config.h\"\n\n/*\n * Use SPLAY tree to support interval operations\n */\n\n#define SPLAY_LEFT 0\n#define SPLAY_RIGHT 1\n#define SPLAY_NUM_CHILDREN 2\n\nSTRUCT(Snode, {\n    // We'll make this an array so that we can make some operations symmetric.\n    STRUCT_REALNAME(Snode) * child[SPLAY_NUM_CHILDREN];\n    // Key for splay\n    addr_t addr;\n    // Length of interval\n    size_t len;\n    // Data, NULL if not existing\n    void *data;\n    // Function used to free data\n    void (*data_destroy)(void *);\n});\n\nSTRUCT(Splay, {\n    Snode *root;\n    size_t node_count;\n    void *(*merge_fcn)(void *, void *);\n});\n\n#define SPLAY_EMPTY NULL\n#define SPLAY_ROOT(splay, node) \\\n    do {                        \\\n        assert(splay != NULL);  \\\n        node = splay->root;     \\\n    } while (0)\n\n/*\n * Setter and Getter\n */\nDECLARE_SETTER(Snode, snode, addr_t, addr);\nDECLARE_SETTER(Snode, snode, size_t, len);\nDECLARE_SETTER(Snode, snode, void *, data);\n\nDECLARE_GETTER(Snode, snode, addr_t, lower_bound);\nDECLARE_GETTER(Snode, snode, addr_t, upper_bound);\nDECLARE_GETTER(Snode, snode, size_t, len);\nDECLARE_GETTER(Snode, snode, void *, data);\nDECLARE_GETTER(Splay, splay, size_t, node_count);\n\n/*\n * Pack a Snode from scratch.\n */\nZ_API Snode *z_snode_create(addr_t addr, size_t len, void *data,\n                            void (*data_destroy)(void *));\n\n/*\n * Unpack a Snode and its data.\n */\nZ_API void z_snode_destroy(Snode *node);\n\n/*\n * Create a splay.\n *\n * merge_fcn is used to merge data, and **NULL indicates the intervals will\n * not merge**.\n *\n * Note that is Snode's responsibility to free the alloced memory, instead of\n * merge_fcn.\n */\nZ_API Splay *z_splay_create(void *(*merge_fcn)(void *, void *));\n\n/*\n * Free all elements of splay, and replace it with SPLAY_EMPTY.\n */\nZ_API void z_splay_destroy(Splay *splay);\n\n/*\n * Insert an element into splay, and return the inserted node, NULL if\n * overlaping.\n */\nZ_API Snode *z_splay_insert(Splay *splay, Snode *node);\n\n/*\n * Delete Snode starting from addr from splay.\n * Return the delted node, NULL if the addr does not exist.\n */\nZ_API Snode *z_splay_delete(Splay *splay, addr_t addr);\n\n/*\n * Check whether node is overlapped with some nodes inside splay.\n * Return true if overlap, false otherwise.\n */\nZ_API bool z_splay_interval_overlap(Splay *splay, Snode *node);\n\n/*\n * Search a snode containint addr, return NULL if not exist.\n */\nZ_API Snode *z_splay_search(Splay *splay, addr_t addr);\n\n/*\n * Return the Snode with max address.\n */\nZ_API inline Snode *z_splay_max(Splay *splay);\n\n/*\n * Return the Snode with min address.\n */\nZ_API inline Snode *z_splay_min(Splay *splay);\n\n/*\n * Return a list of Snode * in order.\n */\nZ_API Buffer *z_splay_sorted_list(Splay *splay);\n\n/*\n * Pretty-print the contents of splay\n */\nZ_API void z_splay_print(Splay *splay);\n\n/*\n * Default merging function: do nothing;\n */\nZ_API void *z_direct_merge(void *_x, void *_y);\n\n#endif\n"
  },
  {
    "path": "src/iterator.h",
    "content": "/*\n * iterator.h\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n#ifndef __ITERATOR_H\n#define __ITERATOR_H\n\n#include \"buffer.h\"\n#include \"config.h\"\n\n// force evaluation\n#define __ITERATOR_2(x, y) __Iter_##y##_##x##_t\n#define __ITERATOR_1(x, y) __ITERATOR_2(x, y)\n#define __ITERATOR(x) __ITERATOR_1(x, __COUNTER__)\n\n/*\n * Iterator, only for local usage\n */\n#define Iter(type, name)      \\\n    struct __ITERATOR(name) { \\\n        type *__ptr;          \\\n        size_t __i;           \\\n        size_t __n;           \\\n    } name\n\n#define z_iter_init(iter, ptr, n)                      \\\n    do {                                               \\\n        if (!ptr) {                                    \\\n            EXITME(\"try to init an invalid iterator\"); \\\n        }                                              \\\n        (iter).__ptr = (typeof((iter).__ptr))(ptr);    \\\n        (iter).__i = 0;                                \\\n        (iter).__n = (n);                              \\\n    } while (0)\n\n#define z_iter_init_from_buf(iter, buf)                                \\\n    do {                                                               \\\n        assert(buf);                                                   \\\n        z_iter_init((iter), z_buffer_get_raw_buf(buf),                 \\\n                    z_buffer_get_size(buf) / sizeof(*((iter).__ptr))); \\\n    } while (0)\n\n#define z_iter_next(iter)                      \\\n    ({                                         \\\n        typeof((iter).__ptr) __res = NULL;     \\\n                                               \\\n        if ((iter).__i < (iter).__n) {         \\\n            __res = (iter).__ptr + (iter).__i; \\\n            (iter).__i++;                      \\\n        }                                      \\\n                                               \\\n        __res;                                 \\\n    })\n\n#define z_iter_is_empty(iter) ((iter).__i >= (iter).__n)\n\n#define z_iter_get_size(iter) ((iter).__n)\n\n#define z_iter_reset(iter) \\\n    do {                   \\\n        (iter).__i = 0;    \\\n    } while (0)\n\n#define z_iter_destroy(iter) /* empty */\n\n#endif\n"
  },
  {
    "path": "src/library_functions/generate.py",
    "content": "import csv\nimport os\nimport sys\n\ntemplate = \"\"\"\nZ_PRIVATE void __libfunc_load(GHashTable *d) {\n%s\n}\n\"\"\"\n\nfilename = \"library_functions_load.c\"\n\n\ndef generate_from_csv(filename):\n    code = \"\"\n    n = 0\n\n    with open(filename, \"r\") as csv_file:\n        csv_reader = csv.reader(csv_file, delimiter=\",\")\n        for row in csv_reader:\n            if len(row) != 4:\n                print(\"invalid input: %s\" % line)\n                exit(-1)\n\n            demangled_name = row[0].strip()  # useless currently\n\n            name = row[1].strip()\n            if len(name) == 0:\n                print(\"empty library function name\")\n                exit(-1)\n\n            lcfg = row[2].strip().upper()\n            if len(lcfg) == 0:\n                lcfg = \"UNK\"\n\n            lra = row[3].strip().upper()\n            if len(lra) == 0:\n                lra = \"UNK\"\n\n            code += \"\"\"\n    LFuncInfo *lf_%d = __lfunc_info_create(\"%s\", LCFG_%s, LRA_%s);\n    g_hash_table_insert(d, (gpointer)z_strdup(\"%s\"), (gpointer)lf_%d);\n            \"\"\" % (\n                n,\n                name,\n                lcfg,\n                lra,\n                name,\n                n,\n            )\n            n += 1\n\n    return code\n\n\nif __name__ == \"__main__\":\n    if len(sys.argv) != 3:\n        print(\"generate.py: ./generate.py <library_functions.csv> <directory>\")\n        exit(-1)\n\n    dirname = sys.argv[2].strip()\n    csv_filename = os.path.join(dirname, sys.argv[1].strip())\n    out_filename = os.path.join(dirname, filename)\n\n    code = generate_from_csv(csv_filename)\n\n    f = open(out_filename, \"w\")\n    f.write(template % code)\n    f.close()\n"
  },
  {
    "path": "src/library_functions/lib.csv",
    "content": ",abort,TERM,USED\n,accept,RET,UNUSED\n,access,RET,UNUSED\n,acos,RET,UNUSED\n,__acosf_finite,,\n,alarm,RET,USED\n,archive_read_close,,\n,archive_read_data_block,,\n,archive_read_free,,\n,archive_read_new,,\n,archive_read_next_header,,\n,archive_read_open_memory,,\n,archive_read_support_format_tar,,\n,asin,RET,UNUSED\n,__asprintf_chk,,\n,__assert_fail,,\n,atan,RET,UNUSED\n,atan2,RET,UNUSED\n,backtrace,,\n,backtrace_symbols,,\n,backtrace_symbols_fd,,\n,bind,,\n,BIO_ctrl,,\n,BIO_free,,\n,BIO_new,,\n,BIO_new_mem_buf,,\n,BIO_s_mem,,\n,BN_bin2bn,,\n,BN_bn2bin,,\n,BN_bn2dec,,\n,BN_bn2hex,,\n,BN_clear_free,,\n,BN_cmp,,\n,BN_CTX_free,,\n,BN_CTX_new,,\n,BN_dec2bn,,\n,BN_dup,,\n,BN_free,,\n,BN_is_bit_set,,\n,BN_mod_exp,,\n,BN_new,,\n,BN_num_bits,,\n,BN_rand,,\n,BN_set_word,,\n,BZ2_bzDecompress,,\n,BZ2_bzDecompressEnd,,\n,BZ2_bzDecompressInit,,\n,BZ2_bzlibVersion,,\n,calloc,RET,UNUSED\n,ceil,RET,UNUSED\n,cfmakeraw,,\n,cfsetspeed,,\n,clock,RET,UNUSED\n,clock_gettime,RET,UNUSED\n,close,RET,UNUSED\n,closedir,RET,UNUSED\n,connect,,\n,cos,RET,UNUSED\n,cosh,RET,UNUSED\n,crc32,RET,UNUSED\n,CRYPTO_free,,\n,__ctype_b_loc,,\n,__ctype_get_mb_cur_max,,\n,__ctype_tolower_loc,,\n,__ctype_toupper_loc,,\n,__cxa_allocate_exception,,\n,__cxa_atexit,,\n,__cxa_bad_cast,,\n,__cxa_bad_typeid,,\n,__cxa_begin_catch,,\n,__cxa_end_catch,,\n,__cxa_free_exception,,\n,__cxa_get_exception_ptr,,\n,__cxa_guard_abort,,\n,__cxa_guard_acquire,,\n,__cxa_guard_release,,\n,__cxa_rethrow,,\n,__cxa_throw,,\n,__cxa_throw_bad_array_new_length,,\n,deflate,,\n,deflateEnd,,\n,deflateInit_,,\n,deflateInit2_,,\n,DES_ncbc_encrypt,,\n,DES_set_key_unchecked,,\n,DES_set_odd_parity,,\n,dladdr,,\n,dlclose,RET,UNUSED\n,dlerror,RET,UNUSED\n,dlopen,RET,UNUSED\n,dlsym,RET,UNUSED\n,DSA_do_sign,,\n,DSA_do_verify,,\n,DSA_free,,\n,DSA_generate_key,,\n,DSA_generate_parameters,,\n,DSA_get0_key,,\n,DSA_get0_pqg,,\n,DSA_new,,\n,DSA_set0_key,,\n,DSA_set0_pqg,,\n,DSA_SIG_free,,\n,DSA_SIG_get0,,\n,DSA_SIG_new,,\n,DSA_SIG_set0,,\n,DSA_size,,\n,dup,RET,UNUSED\n,dup2,RET,UNUSED\n,__dynamic_cast,,\n,ECDH_compute_key,,\n,ECDSA_do_sign,,\n,ECDSA_do_verify,,\n,ECDSA_SIG_free,,\n,ECDSA_SIG_get0,,\n,ECDSA_SIG_new,,\n,ECDSA_SIG_set0,,\n,EC_GROUP_cmp,,\n,EC_GROUP_get_curve_name,,\n,EC_GROUP_get_degree,,\n,EC_KEY_dup,,\n,EC_KEY_free,,\n,EC_KEY_generate_key,,\n,EC_KEY_get0_group,,\n,EC_KEY_get0_private_key,,\n,EC_KEY_get0_public_key,,\n,EC_KEY_new_by_curve_name,,\n,EC_KEY_set_asn1_flag,,\n,EC_KEY_set_public_key,,\n,EC_POINT_clear_free,,\n,EC_POINT_cmp,,\n,EC_POINT_free,,\n,EC_POINT_new,,\n,EC_POINT_oct2point,,\n,EC_POINT_point2oct,,\n,ERR_error_string,,\n,ERR_get_error,,\n,__errno_location,RET,UNUSED\n,EVP_aes_128_cbc,,\n,EVP_aes_128_ctr,,\n,EVP_aes_128_ecb,,\n,EVP_aes_192_cbc,,\n,EVP_aes_192_ctr,,\n,EVP_aes_192_ecb,,\n,EVP_aes_256_cbc,,\n,EVP_aes_256_ctr,,\n,EVP_aes_256_ecb,,\n,EVP_bf_cbc,,\n,EVP_CIPHER_CTX_free,,\n,EVP_CIPHER_CTX_new,,\n,EVP_CIPHER_CTX_reset,,\n,EVP_CIPHER_CTX_set_padding,,\n,EVP_DecryptInit_ex,,\n,EVP_DecryptUpdate,,\n,EVP_des_ede3_cbc,,\n,EVP_DigestFinal,,\n,EVP_DigestInit,,\n,EVP_DigestInit_ex,,\n,EVP_DigestUpdate,,\n,EVP_EncryptInit_ex,,\n,EVP_EncryptUpdate,,\n,EVP_md5,,\n,EVP_MD_CTX_free,,\n,EVP_MD_CTX_new,,\n,EVP_MD_CTX_reset,,\n,EVP_ripemd160,,\n,EVP_sha1,,\n,EVP_sha256,,\n,EVP_sha384,,\n,EVP_sha512,,\n,execl,,\n,execv,,\n,exit,TERM,UNUSED\n,_exit,TERM,UNUSED\n,exp,RET,UNUSED\n,__exp_finite,,\n,fclose,RET,UNUSED\n,fcntl,RET,UNUSED\n,__fdelt_chk,,\n,fdopen,RET,UNUSED\n,feof,RET,UNUSED\n,ferror,RET,UNUSED\n,fesetround,RET,UNUSED\n,fflush,RET,UNUSED\n,fgetc,RET,UNUSED\n,fgets,RET,UNUSED\n,fileno,RET,UNUSED\n,flock,RET,UNUSED\n,floor,RET,UNUSED\n,fma,RET,UNUSED\n,fmin,RET,UNUSED\n,fmod,RET,UNUSED\n,fopen,RET,UNUSED\n,fopen64,RET,UNUSED\n,fork,,\n,forkpty,,\n,fprintf,RET,UNUSED\n,__fprintf_chk,,\n,fputc,RET,UNUSED\n,fputs,RET,UNUSED\n,fread,RET,UNUSED\n,free,RET,UNUSED\n,freeaddrinfo,RET,UNUSED\n,fseek,RET,UNUSED\n,fsync,,\n,ftell,RET,UNUSED\n,fwrite,RET,UNUSED\n,__fxstat,,\n,__fxstat64,,\n,gai_strerror,,\n,g_bytes_get_data,,\n,g_bytes_ref,,\n,g_bytes_unref,,\n,gcry_mpi_aprint,,\n,gcry_mpi_new,,\n,gcry_mpi_powm,,\n,gcry_mpi_release,,\n,gcry_mpi_scan,,\n,getaddrinfo,RET,UNUSED\n,getcontext,,\n,getcwd,RET,UNUSED\n,__getdelim,,\n,getdtablesize,,\n,getenv,RET,UNUSED\n,gethostbyname,,\n,gethostbyname2,,\n,gethostname,,\n,getline,RET,UNUSED\n,getloadavg,,\n,getnameinfo,,\n,getopt,RET,UNUSED\n,getopt_long,,\n,getpagesize,,\n,getpeername,,\n,getpid,,\n,getpwnam,,\n,getpwuid_r,,\n,getsockname,,\n,getsockopt,,\n,gettimeofday,,\n,getuid,,\n,__gmon_start__,,\n,gmtime,,\n,gmtime_r,,\n,GOMP_critical_name_end,,\n,GOMP_critical_name_start,,\n,GOMP_parallel,,\n,grantpt,,\n,g_unichar_combining_class,,\n,g_unichar_compose,,\n,g_unichar_decompose,,\n,g_unichar_fully_decompose,,\n,g_unichar_get_mirror_char,,\n,g_unichar_get_script,,\n,g_unichar_iswide,,\n,g_unichar_type,,\n,g_unicode_script_from_iso15924,,\n,g_unicode_script_to_iso15924,,\n,gzclose,,\n,gzdirect,,\n,gzdopen,,\n,gzopen64,,\n,gzread,,\n,gzwrite,,\n,__h_errno_location,,\n,HMAC_CTX_free,,\n,HMAC_CTX_new,,\n,HMAC_CTX_reset,,\n,HMAC_Final,,\n,HMAC_Init_ex,,\n,HMAC_Update,,\n,hypot,,\n,iconv,,\n,iconv_close,,\n,iconv_open,,\n,if_indextoname,,\n,inet_addr,,\n,inet_ntop,,\n,inet_pton,,\n,inflate,,\n,inflateEnd,,\n,inflateInit_,,\n,inflateInit2_,,\n,inflateReset,,\n,inflateSetDictionary,,\n,ioctl,,\n,_IO_putc,,\n,isalnum,,\n,isatty,,\n,isdigit,RET,UNUSED\n,__isoc99_sscanf,,\n,isspace,RET,UNUSED\n,isupper,RET,UNUSED\n,iswdigit,RET,UNUSED\n,iswspace,RET,UNUSED\n,isxdigit,RET,UNUSED\n,kill,,\n,ldexp,,\n,__libc_start_main,,\n,listen,,\n,localeconv,,\n,localtime,,\n,log,RET,UNUSED\n,log10,RET,UNUSED\n,log2,RET,UNUSED\n,__log_finite,,\n,_longjmp,,\n,__longjmp_chk,,\n,lseek,RET,UNUSED\n,lseek64,RET,UNUSED\n,__lxstat,,\n,__lxstat64,,\n,lzma_alone_decoder,,\n,lzma_code,,\n,lzma_crc32,,\n,lzma_end,,\n,lzma_properties_decode,,\n,lzma_raw_decoder,,\n,lzma_stream_decoder,,\n,madvise,,\n,makecontext,,\n,malloc,RET,UNUSED\n,mbrtowc,,\n,memchr,RET,UNUSED\n,memcmp,RET,UNUSED\n,memcpy,RET,UNUSED\n,__memcpy_chk,,\n,memmem,RET,UNUSED\n,memmove,RET,UNUSED\n,__memmove_chk,,\n,memrchr,RET,UNUSED\n,memset,RET,UNUSED\n,__memset_chk,,\n,mkdir,RET,UNUSED\n,mkstemp,,\n,mktime,,\n,mlock,,\n,mmap,RET,UNUSED\n,mprotect,,\n,munmap,RET,UNUSED\n,nanosleep,,\n,nearbyint,,\n,nl_langinfo,,\n,omp_destroy_nest_lock,,\n,omp_get_num_threads,,\n,omp_get_thread_num,,\n,omp_init_nest_lock,,\n,omp_in_parallel,,\n,omp_set_nested,,\n,omp_set_nest_lock,,\n,omp_unset_nest_lock,,\n,open,RET,UNUSED\n,open64,RET,UNUSED\n,opendir,RET,UNUSED\n,openlog,,\n,OPENSSL_init_crypto,,\n,optarg,,\n,opterr,,\n,optind,,\n,optopt,,\n,PEM_read_bio_DSAPrivateKey,,\n,PEM_read_bio_ECPrivateKey,,\n,PEM_read_bio_RSAPrivateKey,,\n,PEM_write_bio_DSAPrivateKey,,\n,PEM_write_bio_ECPrivateKey,,\n,PEM_write_bio_RSAPrivateKey,,\n,perror,RET,UNUSED\n,pipe,RET,UNUSED\n,PKCS5_PBKDF2_HMAC_SHA1,,\n,poll,,\n,__poll_chk,,\n,__popcountdi2,,\n,posix_openpt,,\n,posix_spawn_file_actions_addclose,,\n,posix_spawn_file_actions_adddup2,,\n,posix_spawn_file_actions_destroy,,\n,posix_spawn_file_actions_init,,\n,posix_spawnp,,\n,pow,RET,UNUSED\n,__pow_finite,,\n,prctl,,\n,__printf_chk,,\n,pthread_cond_destroy,,\n,pthread_cond_init,,\n,pthread_cond_signal,,\n,pthread_cond_timedwait,,\n,pthread_cond_wait,,\n,pthread_create,,\n,pthread_equal,,\n,pthread_getspecific,,\n,pthread_join,,\n,pthread_key_create,,\n,pthread_key_delete,,\n,pthread_mutexattr_destroy,,\n,pthread_mutexattr_init,,\n,pthread_mutexattr_settype,,\n,pthread_mutex_destroy,,\n,pthread_mutex_init,,\n,pthread_mutex_lock,,\n,pthread_mutex_trylock,,\n,pthread_mutex_unlock,,\n,pthread_once,,\n,pthread_rwlock_destroy,,\n,pthread_rwlock_init,,\n,pthread_rwlock_rdlock,,\n,pthread_rwlock_unlock,,\n,pthread_rwlock_wrlock,,\n,pthread_self,,\n,pthread_setspecific,,\n,ptsname,,\n,putchar,RET,UNUSED\n,putenv,,\n,puts,RET,UNUSED\n,qsort,RET,UNUSED\n,raise,,\n,rand,RET,UNUSED\n,RAND_add,,\n,RAND_bytes,,\n,RAND_pseudo_bytes,,\n,rand_r,,\n,read,RET,UNUSED\n,__read_chk,,\n,readdir,RET,UNUSED\n,realloc,RET,UNUSED\n,recv,,\n,remainder,,\n,remove,RET,UNUSED\n,rewind,RET,UNUSED\n,round,RET,UNUSED\n,RSA_free,,\n,RSA_generate_key_ex,,\n,RSA_get0_crt_params,,\n,RSA_get0_factors,,\n,RSA_get0_key,,\n,RSA_new,,\n,RSA_set0_crt_params,,\n,RSA_set0_factors,,\n,RSA_set0_key,,\n,RSA_sign,,\n,RSA_size,,\n,RSA_verify,,\n,sched_yield,,\n,select,,\n,send,,\n,setcontext,,\n,setenv,RET,UNUSED\n,_setjmp,,\n,setlocale,,\n,setlogmask,,\n,setrlimit,RET,UNUSED\n,setsid,RET,UNUSED\n,setsockopt,RET,UNUSED\n,shutdown,RET,UNUSED\n,sigaction,RET,UNUSED\n,signal,RET,UNUSED\n,sin,RET,UNUSED\n,sincos,RET,UNUSED\n,sincosf,RET,UNUSED\n,sinh,RET,UNUSED\n,sleep,RET,UNUSED\n,snprintf,RET,UNUSED\n,__snprintf_chk,,\n,socket,RET,UNUSED\n,socketpair,RET,UNUSED\n,__sprintf_chk,RET,UNUSED\n,sqrt,RET,UNUSED\n,sqrtf,RET,UNUSED\n,sscanf,RET,UNUSED\n,__stack_chk_fail,,\n,stderr,OBJ,OBJ\n,stdin,OBJ,OBJ\n,stdout,OBJ,OBJ\n,stpcpy,RET,UNUSED\n,__stpcpy_chk,,\n,strcasecmp,RET,UNUSED\n,strcasestr,RET,UNUSED\n,strcat,RET,UNUSED\n,__strcat_chk,,\n,strchr,RET,UNUSED\n,strcmp,RET,UNUSED\n,strcpy,RET,UNUSED\n,__strcpy_chk,,\n,strcspn,RET,UNUSED\n,strdup,RET,UNUSED\n,strerror,RET,UNUSED\n,strftime,RET,UNUSED\n,strlen,RET,UNUSED\n,strncasecmp,RET,UNUSED\n,__strncat_chk,,\n,strncmp,RET,UNUSED\n,strncpy,RET,UNUSED\n,__strncpy_chk,,\n,strnlen,RET,UNUSED\n,strrchr,RET,UNUSED\n,strsignal,RET,UNUSED\n,strspn,RET,UNUSED\n,strstr,RET,UNUSED\n,strtod,RET,UNUSED\n,strtof,RET,UNUSED\n,strtok,RET,UNUSED\n,strtol,RET,UNUSED\n,strtoll,RET,UNUSED\n,strtoul,RET,UNUSED\n,strtoull,RET,UNUSED\n,sysconf,RET,UNUSED\n,__syslog_chk,,\n,system,RET,UNUSED\n,tan,RET,UNUSED\n,tcflush,RET,UNUSED\n,tcgetattr,RET,UNUSED\n,tcsendbreak,RET,UNUSED\n,tcsetattr,RET,UNUSED\n,time,RET,UNUSED\n,timegm,RET,UNUSED\n,__tls_get_addr,,\n,tmpfile,RET,UNUSED\n,trunc,RET,UNUSED\n,__udivti3,,\n,__umodti3,,\n,unlink,RET,UNUSED\n,unlockpt,RET,UNUSED\n,_Unwind_Resume,,\n,usleep,RET,UNUSED\n,utimes,RET,UNUSED\n,__vfprintf_chk,,\n,vsnprintf,RET,UNUSED\n,__vsnprintf_chk,,\n,__vsprintf_chk,,\n,waitpid,RET,UNUSED\n,wcrtomb,RET,UNUSED\n,wcscpy,RET,UNUSED\n,wcslen,RET,UNUSED\n,wmemcmp,RET,UNUSED\n,wmemmove,RET,UNUSED\n,write,RET,UNUSED\n,xmlCleanupParser,,\n,xmlFreeTextReader,,\n,xmlReaderForIO,,\n,xmlTextReaderConstLocalName,,\n,xmlTextReaderConstValue,,\n,xmlTextReaderIsEmptyElement,,\n,xmlTextReaderMoveToFirstAttribute,,\n,xmlTextReaderMoveToNextAttribute,,\n,xmlTextReaderNodeType,,\n,xmlTextReaderRead,,\n,xmlTextReaderSetErrorHandler,,\n,__xpg_basename,,\n,__xpg_strerror_r,,\n,__xstat,,\n,__xstat64,,\noperator delete[](void*),_ZdaPv,,\noperator delete(void*),_ZdlPv,,\n\"operator delete(void*, unsigned long)\",_ZdlPvm,,\n\"operator delete(void*, std::nothrow_t const&)\",_ZdlPvRKSt9nothrow_t,,\noperator new[](unsigned long),_Znam,,\nstd::__basic_file<char>::is_open() const,_ZNKSt12__basic_fileIcE7is_openEv,,\nstd::runtime_error::what() const,_ZNKSt13runtime_error4whatEv,,\nstd::ctype<char>::_M_widen_init() const,_ZNKSt5ctypeIcE13_M_widen_initEv,,\n\"std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::find_last_of(char const*, unsigned long, unsigned long) const\",_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12find_last_ofEPKcmm,,\n\"std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::find_first_of(char const*, unsigned long, unsigned long) const\",_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE13find_first_ofEPKcmm,,\n\"std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::find_last_not_of(char, unsigned long) const\",_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE16find_last_not_ofEcm,,\n\"std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::find_last_not_of(char const*, unsigned long, unsigned long) const\",_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE16find_last_not_ofEPKcmm,,\n\"std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::find_first_not_of(char const*, unsigned long, unsigned long) const\",_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE17find_first_not_ofEPKcmm,,\n\"std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::find(char, unsigned long) const\",_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE4findEcm,,\n\"std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::find(char const*, unsigned long, unsigned long) const\",_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE4findEPKcmm,,\n\"std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::rfind(char, unsigned long) const\",_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE5rfindEcm,,\n\"std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::rfind(char const*, unsigned long, unsigned long) const\",_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE5rfindEPKcmm,,\n\"std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::substr(unsigned long, unsigned long) const\",_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE6substrEmm,,\n\"std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::compare(unsigned long, unsigned long, char const*) const\",_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE7compareEmmPKc,,\n\"std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::compare(char const*) const\",_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE7compareEPKc,,\n\"std::__cxx11::basic_stringbuf<char, std::char_traits<char>, std::allocator<char> >::str() const\",_ZNKSt7__cxx1115basic_stringbufIcSt11char_traitsIcESaIcEE3strEv,,\n\"std::__detail::_Prime_rehash_policy::_M_need_rehash(unsigned long, unsigned long, unsigned long) const\",_ZNKSt8__detail20_Prime_rehash_policy14_M_need_rehashEmmm,,\nstd::exception::what() const,_ZNKSt9exception4whatEv,,\nstd::istream::get(),_ZNSi3getEv,,\n\"std::istream::read(char*, long)\",_ZNSi4readEPcl,,\nstd::istream::unget(),_ZNSi5ungetEv,,\nstd::ostream::put(char),_ZNSo3putEc,,\nstd::ostream::flush(),_ZNSo5flushEv,,\n\"std::ostream::write(char const*, long)\",_ZNSo5writeEPKcl,,\nstd::ostream& std::ostream::_M_insert<bool>(bool),_ZNSo9_M_insertIbEERSoT_,,\nstd::ostream& std::ostream::_M_insert<double>(double),_ZNSo9_M_insertIdEERSoT_,,\nstd::ostream& std::ostream::_M_insert<unsigned long>(unsigned long),_ZNSo9_M_insertImEERSoT_,,\nstd::ostream& std::ostream::_M_insert<void const*>(void const*),_ZNSo9_M_insertIPKvEERSoT_,,\nstd::ostream& std::ostream::_M_insert<long long>(long long),_ZNSo9_M_insertIxEERSoT_,,\nstd::ostream& std::ostream::_M_insert<unsigned long long>(unsigned long long),_ZNSo9_M_insertIyEERSoT_,,\nstd::ostream::operator<<(int),_ZNSolsEi,,\nstd::ostream::operator<<(short),_ZNSolsEs,,\nstd::logic_error::logic_error(std::logic_error const&),_ZNSt11logic_errorC2ERKS_,,\nstd::__basic_file<char>::~__basic_file(),_ZNSt12__basic_fileIcED1Ev,,\n\"std::domain_error::domain_error(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&)\",_ZNSt12domain_errorC1ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE,,\nstd::domain_error::~domain_error(),_ZNSt12domain_errorD1Ev,,\nstd::out_of_range::out_of_range(char const*),_ZNSt12out_of_rangeC1EPKc,,\nstd::out_of_range::~out_of_range(),_ZNSt12out_of_rangeD1Ev,,\n\"std::basic_filebuf<char, std::char_traits<char> >::open(char const*, std::_Ios_Openmode)\",_ZNSt13basic_filebufIcSt11char_traitsIcEE4openEPKcSt13_Ios_Openmode,,\n\"std::basic_filebuf<char, std::char_traits<char> >::close()\",_ZNSt13basic_filebufIcSt11char_traitsIcEE5closeEv,,\n\"std::basic_filebuf<char, std::char_traits<char> >::basic_filebuf()\",_ZNSt13basic_filebufIcSt11char_traitsIcEEC1Ev,,\n\"std::basic_filebuf<char, std::char_traits<char> >::~basic_filebuf()\",_ZNSt13basic_filebufIcSt11char_traitsIcEED1Ev,,\n\"std::basic_fstream<char, std::char_traits<char> >::basic_fstream(char const*, std::_Ios_Openmode)\",_ZNSt13basic_fstreamIcSt11char_traitsIcEEC1EPKcSt13_Ios_Openmode,,\nstd::runtime_error::runtime_error(std::runtime_error const&),_ZNSt13runtime_errorC1ERKS_,,\nstd::runtime_error::runtime_error(char const*),_ZNSt13runtime_errorC2EPKc,,\nstd::runtime_error::runtime_error(std::runtime_error const&),_ZNSt13runtime_errorC2ERKS_,,\nstd::runtime_error::~runtime_error(),_ZNSt13runtime_errorD1Ev,,\nstd::runtime_error::~runtime_error(),_ZNSt13runtime_errorD2Ev,,\n\"std::basic_ifstream<char, std::char_traits<char> >::basic_ifstream(char const*, std::_Ios_Openmode)\",_ZNSt14basic_ifstreamIcSt11char_traitsIcEEC1EPKcSt13_Ios_Openmode,,\n\"std::basic_ifstream<char, std::char_traits<char> >::~basic_ifstream()\",_ZNSt14basic_ifstreamIcSt11char_traitsIcEED1Ev,,\n\"std::basic_ofstream<char, std::char_traits<char> >::basic_ofstream(char const*, std::_Ios_Openmode)\",_ZNSt14basic_ofstreamIcSt11char_traitsIcEEC1EPKcSt13_Ios_Openmode,,\n\"std::basic_ofstream<char, std::char_traits<char> >::~basic_ofstream()\",_ZNSt14basic_ofstreamIcSt11char_traitsIcEED1Ev,,\nstd::invalid_argument::invalid_argument(char const*),_ZNSt16invalid_argumentC1EPKc,,\n\"std::invalid_argument::invalid_argument(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&)\",_ZNSt16invalid_argumentC1ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE,,\nstd::invalid_argument::~invalid_argument(),_ZNSt16invalid_argumentD1Ev,,\nstd::locale::locale(),_ZNSt6localeC1Ev,,\nstd::locale::~locale(),_ZNSt6localeD1Ev,,\n\"std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::_M_replace(unsigned long, unsigned long, char const*, unsigned long)\",_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE10_M_replaceEmmPKcm,,\n\"std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::_M_construct(unsigned long, char)\",_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructEmc,,\n\"std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::_M_replace_aux(unsigned long, unsigned long, unsigned long, char)\",_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE14_M_replace_auxEmmmc,,\n\"std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::swap(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >&)\",_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE4swapERS4_,,\n\"std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::append(char const*)\",_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE6appendEPKc,,\n\"std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::assign(char const*)\",_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE6assignEPKc,,\n\"std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::resize(unsigned long, char)\",_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE6resizeEmc,,\n\"std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::reserve(unsigned long)\",_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE7reserveEm,,\n\"std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::_M_erase(unsigned long, unsigned long)\",_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE8_M_eraseEmm,,\n\"std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::_M_append(char const*, unsigned long)\",_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_appendEPKcm,,\n\"std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::_M_assign(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&)\",_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_assignERKS4_,,\n\"std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::_M_create(unsigned long&, unsigned long)\",_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_createERmm,,\n\"std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::_M_mutate(unsigned long, unsigned long, char const*, unsigned long)\",_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_mutateEmmPKcm,,\n\"std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::push_back(char)\",_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9push_backEc,,\n\"std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::operator=(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >&&)\",_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEaSEOS4_,,\n\"std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::~basic_string()\",_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED1Ev,,\n\"std::__cxx11::basic_stringbuf<char, std::char_traits<char>, std::allocator<char> >::_M_sync(char*, unsigned long, unsigned long)\",_ZNSt7__cxx1115basic_stringbufIcSt11char_traitsIcESaIcEE7_M_syncEPcmm,,\n\"std::__cxx11::basic_stringstream<char, std::char_traits<char>, std::allocator<char> >::basic_stringstream(std::_Ios_Openmode)\",_ZNSt7__cxx1118basic_stringstreamIcSt11char_traitsIcESaIcEEC1ESt13_Ios_Openmode,,\n\"std::__cxx11::basic_stringstream<char, std::char_traits<char>, std::allocator<char> >::~basic_stringstream()\",_ZNSt7__cxx1118basic_stringstreamIcSt11char_traitsIcESaIcEED1Ev,,\n\"std::__cxx11::basic_istringstream<char, std::char_traits<char>, std::allocator<char> >::basic_istringstream(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::_Ios_Openmode)\",_ZNSt7__cxx1119basic_istringstreamIcSt11char_traitsIcESaIcEEC1ERKNS_12basic_stringIcS2_S3_EESt13_Ios_Openmode,,\n\"std::__cxx11::basic_istringstream<char, std::char_traits<char>, std::allocator<char> >::~basic_istringstream()\",_ZNSt7__cxx1119basic_istringstreamIcSt11char_traitsIcESaIcEED1Ev,,\n\"std::__cxx11::basic_ostringstream<char, std::char_traits<char>, std::allocator<char> >::basic_ostringstream(std::_Ios_Openmode)\",_ZNSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEEC1ESt13_Ios_Openmode,,\n\"std::__cxx11::basic_ostringstream<char, std::char_traits<char>, std::allocator<char> >::~basic_ostringstream()\",_ZNSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEED1Ev,,\nstd::bad_cast::~bad_cast(),_ZNSt8bad_castD2Ev,,\n\"std::__detail::_List_node_base::swap(std::__detail::_List_node_base&, std::__detail::_List_node_base&)\",_ZNSt8__detail15_List_node_base4swapERS0_S1_,,\nstd::__detail::_List_node_base::_M_hook(std::__detail::_List_node_base*),_ZNSt8__detail15_List_node_base7_M_hookEPS0_,,\nstd::__detail::_List_node_base::_M_unhook(),_ZNSt8__detail15_List_node_base9_M_unhookEv,,\nstd::ios_base::Init::Init(),_ZNSt8ios_base4InitC1Ev,,\nstd::ios_base::Init::~Init(),_ZNSt8ios_base4InitD1Ev,,\nstd::ios_base::ios_base(),_ZNSt8ios_baseC2Ev,,\nstd::ios_base::~ios_base(),_ZNSt8ios_baseD2Ev,,\n\"std::basic_ios<char, std::char_traits<char> >::init(std::basic_streambuf<char, std::char_traits<char> >*)\",_ZNSt9basic_iosIcSt11char_traitsIcEE4initEPSt15basic_streambufIcS1_E,,\n\"std::basic_ios<char, std::char_traits<char> >::clear(std::_Ios_Iostate)\",_ZNSt9basic_iosIcSt11char_traitsIcEE5clearESt12_Ios_Iostate,,\nstd::exception::~exception(),_ZNSt9exceptionD1Ev,,\noperator new(unsigned long),_Znwm,,\n\"operator new(unsigned long, std::nothrow_t const&)\",_ZnwmRKSt9nothrow_t,,\n\"std::_Hash_bytes(void const*, unsigned long, unsigned long)\",_ZSt11_Hash_bytesPKvmm,,\n\"std::basic_ostream<char, std::char_traits<char> >& std::__ostream_insert<char, std::char_traits<char> >(std::basic_ostream<char, std::char_traits<char> >&, char const*, long)\",_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l,,\nstd::__throw_bad_cast(),_ZSt16__throw_bad_castv,,\nstd::__throw_bad_alloc(),_ZSt17__throw_bad_allocv,,\nstd::_Rb_tree_decrement(std::_Rb_tree_node_base const*),_ZSt18_Rb_tree_decrementPKSt18_Rb_tree_node_base,,\nstd::_Rb_tree_decrement(std::_Rb_tree_node_base*),_ZSt18_Rb_tree_decrementPSt18_Rb_tree_node_base,,\nstd::_Rb_tree_increment(std::_Rb_tree_node_base const*),_ZSt18_Rb_tree_incrementPKSt18_Rb_tree_node_base,,\nstd::_Rb_tree_increment(std::_Rb_tree_node_base*),_ZSt18_Rb_tree_incrementPSt18_Rb_tree_node_base,,\nstd::__throw_logic_error(char const*),_ZSt19__throw_logic_errorPKc,,\nstd::__throw_length_error(char const*),_ZSt20__throw_length_errorPKc,,\nstd::__throw_out_of_range(char const*),_ZSt20__throw_out_of_rangePKc,,\n\"std::__throw_out_of_range_fmt(char const*, ...)\",_ZSt24__throw_out_of_range_fmtPKcz,,\nstd::__throw_bad_function_call(),_ZSt25__throw_bad_function_callv,,\n\"std::_Rb_tree_rebalance_for_erase(std::_Rb_tree_node_base*, std::_Rb_tree_node_base&)\",_ZSt28_Rb_tree_rebalance_for_erasePSt18_Rb_tree_node_baseRS_,,\n\"std::_Rb_tree_insert_and_rebalance(bool, std::_Rb_tree_node_base*, std::_Rb_tree_node_base*, std::_Rb_tree_node_base&)\",_ZSt29_Rb_tree_insert_and_rebalancebPSt18_Rb_tree_node_baseS0_RS_,,\nstd::cin,_ZSt3cin,,\nstd::cerr,_ZSt4cerr,,\nstd::cout,_ZSt4cout,,\n\"std::basic_ostream<char, std::char_traits<char> >& std::endl<char, std::char_traits<char> >(std::basic_ostream<char, std::char_traits<char> >&)\",_ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_,,\n\"std::basic_istream<char, std::char_traits<char> >& std::getline<char, std::char_traits<char>, std::allocator<char> >(std::basic_istream<char, std::char_traits<char> >&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >&, char)\",_ZSt7getlineIcSt11char_traitsIcESaIcEERSt13basic_istreamIT_T0_ES7_RNSt7__cxx1112basic_stringIS4_S5_T1_EES4_,,\nstd::nothrow,_ZSt7nothrow,,\nstd::terminate(),_ZSt9terminatev,,\n\"std::basic_ostream<char, std::char_traits<char> >& std::operator<< <std::char_traits<char> >(std::basic_ostream<char, std::char_traits<char> >&, char const*)\",_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc,,\n\"std::basic_istream<char, std::char_traits<char> >& std::operator>><char, std::char_traits<char> >(std::basic_istream<char, std::char_traits<char> >&, char&)\",_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_RS3_,,\ntypeinfo for int,_ZTIi,OBJ,OBJ\ntypeinfo for char const*,_ZTIPKc,OBJ,OBJ\n\"VTT for std::__cxx11::basic_stringstream<char, std::char_traits<char>, std::allocator<char> >\",_ZTTNSt7__cxx1118basic_stringstreamIcSt11char_traitsIcESaIcEEE,OBJ,OBJ\n\"VTT for std::__cxx11::basic_istringstream<char, std::char_traits<char>, std::allocator<char> >\",_ZTTNSt7__cxx1119basic_istringstreamIcSt11char_traitsIcESaIcEEE,OBJ,OBJ\n\"VTT for std::__cxx11::basic_ostringstream<char, std::char_traits<char>, std::allocator<char> >\",_ZTTNSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEEE,OBJ,OBJ\n\"VTT for std::basic_ifstream<char, std::char_traits<char> >\",_ZTTSt14basic_ifstreamIcSt11char_traitsIcEE,OBJ,OBJ\n\"VTT for std::basic_ofstream<char, std::char_traits<char> >\",_ZTTSt14basic_ofstreamIcSt11char_traitsIcEE,OBJ,OBJ\n\"vtable for std::__cxx11::basic_stringbuf<char, std::char_traits<char>, std::allocator<char> >\",_ZTVNSt7__cxx1115basic_stringbufIcSt11char_traitsIcESaIcEEE,OBJ,OBJ\n\"vtable for std::__cxx11::basic_stringstream<char, std::char_traits<char>, std::allocator<char> >\",_ZTVNSt7__cxx1118basic_stringstreamIcSt11char_traitsIcESaIcEEE,OBJ,OBJ\n\"vtable for std::__cxx11::basic_istringstream<char, std::char_traits<char>, std::allocator<char> >\",_ZTVNSt7__cxx1119basic_istringstreamIcSt11char_traitsIcESaIcEEE,OBJ,OBJ\n\"vtable for std::__cxx11::basic_ostringstream<char, std::char_traits<char>, std::allocator<char> >\",_ZTVNSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEEE,OBJ,OBJ\n\"vtable for std::basic_filebuf<char, std::char_traits<char> >\",_ZTVSt13basic_filebufIcSt11char_traitsIcEE,OBJ,OBJ\n\"vtable for std::basic_ifstream<char, std::char_traits<char> >\",_ZTVSt14basic_ifstreamIcSt11char_traitsIcEE,OBJ,OBJ\n\"vtable for std::basic_ofstream<char, std::char_traits<char> >\",_ZTVSt14basic_ofstreamIcSt11char_traitsIcEE,OBJ,OBJ\n\"vtable for std::basic_streambuf<char, std::char_traits<char> >\",_ZTVSt15basic_streambufIcSt11char_traitsIcEE,OBJ,OBJ\n\"vtable for std::basic_ios<char, std::char_traits<char> >\",_ZTVSt9basic_iosIcSt11char_traitsIcEE,OBJ,OBJ\n"
  },
  {
    "path": "src/library_functions/library_functions.c",
    "content": "/*\n * library_functions.c\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n#include \"library_functions.h\"\n#include \"../utils.h\"\n\n#include <gmodule.h>\n\n/*\n * Create a LFuncInfo\n */\nZ_PRIVATE LFuncInfo *__lfunc_info_create(const char *name, LCFGInfo cfg_info,\n                                         LRAInfo ra_info);\n\n/*\n * Destroy a LFuncInfo\n */\nZ_PRIVATE void __lfunc_info_destroy(LFuncInfo *info);\n\n/*\n * Load data into database\n */\nZ_PRIVATE void __libfunc_load(GHashTable *d);\n\nZ_PRIVATE LFuncInfo *__lfunc_info_create(const char *name, LCFGInfo cfg_info,\n                                         LRAInfo ra_info) {\n    LFuncInfo *rv = z_alloc(1, sizeof(LFuncInfo));\n    rv->name = z_strdup(name);\n    rv->cfg_info = cfg_info;\n    rv->ra_info = ra_info;\n    return rv;\n}\n\nZ_PRIVATE void __lfunc_info_destroy(LFuncInfo *info) {\n    z_free((void *)info->name);\n    z_free(info);\n}\n\n// XXX: the file must be included here.\n#include \"library_functions_load.c\"\n\nGHashTable *lf_info = NULL;\n\nZ_API void z_libfunc_init() {\n    if (lf_info) {\n        return;\n    }\n\n    lf_info =\n        g_hash_table_new_full(g_str_hash, g_str_equal, (GDestroyNotify)&z_free,\n                              (GDestroyNotify)&__lfunc_info_destroy);\n\n    __libfunc_load(lf_info);\n}\n\nZ_API void z_libfunc_fini() {\n    if (lf_info) {\n        g_hash_table_destroy(lf_info);\n        lf_info = NULL;\n    }\n}\n\nZ_API const LFuncInfo *z_libfunc_get_info(const char *name) {\n    if (!lf_info) {\n        z_libfunc_init();\n    }\n\n    LFuncInfo *rv = (LFuncInfo *)g_hash_table_lookup(lf_info, (gpointer)name);\n    if (!rv) {\n        rv = __lfunc_info_create(name, LCFG_UNK, LRA_UNK);\n        g_hash_table_insert(lf_info, (gpointer)z_strdup(name), (gpointer)rv);\n    }\n\n    return rv;\n}\n\nconst LFuncInfo default_func_info = {\n    .name = NULL,\n    .cfg_info = LCFG_UNK,\n    .ra_info = LRA_UNK,\n};\n\nZ_API const LFuncInfo *z_libfunc_default() { return &default_func_info; }\n"
  },
  {
    "path": "src/library_functions/library_functions.h",
    "content": "/*\n * library_functions.h\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n#ifndef __LIBRARY_FUNCTIONS_H\n#define __LIBRARY_FUNCTIONS_H\n\n#include \"../config.h\"\n\n// whether the library function will return to caller\ntypedef enum lcfg_info_t {\n    LCFG_OBJ,  // this is not an imported function but an object\n    LCFG_UNK,\n    LCFG_RET,\n    LCFG_TERM,\n} LCFGInfo;\n\n// whether the retaddr pushed by `call` instructions is used\ntypedef enum lra_info_t {\n    LRA_OBJ,  // this is not an imported function but an object\n    LRA_UNK,\n    LRA_USED,\n    LRA_UNUSED,\n} LRAInfo;\n\ntypedef struct lfunc_info_t {\n    const char *name;\n    LCFGInfo cfg_info;\n    LRAInfo ra_info;\n} LFuncInfo;\n\nZ_API void z_libfunc_init();\n\nZ_API void z_libfunc_fini();\n\nZ_API const LFuncInfo *z_libfunc_get_info(const char *name);\n\nZ_API const LFuncInfo *z_libfunc_default();\n\n#endif\n"
  },
  {
    "path": "src/libstochfuzz.h",
    "content": "/*\n * libstochfuzz.h\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n#ifndef __LIBSTOCHFUZZ_H\n#define __LIBSTOCHFUZZ_H\n\n#include \"core.h\"\n#include \"utils.h\"\n\n#endif\n"
  },
  {
    "path": "src/libstochfuzzRT.c",
    "content": "/*\n * libstochfuzzRT.c\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n// XXX: some code is modified from\n// https://github.com/mxz297/dyninst/blob/asplos21/dyninstAPI_RT/src/RTunwind.c.\n\n#include \"config.h\"\n\n#include <libunwind.h>\n\n#include <dlfcn.h>\n#include <errno.h>\n#include <fcntl.h>\n#include <link.h>\n#include <sys/mman.h>\n#include <sys/types.h>\n\n#define IP_OFFSET_IN_CURSOR 3\n\ntypedef int (*unw_step_fn_type)(unw_cursor_t*);\n\ntypedef struct retaddr_entity_t {\n    uint32_t shadow;\n    uint32_t original;\n} Retaddr;\n\ntypedef struct retaddr_mapping_t {\n    size_t n;\n    unw_step_fn_type real_unw_step;\n    Retaddr addrs[];\n} RetaddrMapping;\n\nstatic void __runtime_mremap(const char* filename, void* addr, size_t length,\n                             int prot) {\n    // msync the data\n    if (msync(addr, length, MS_SYNC)) {\n        fprintf(stderr, \"msync failed: %s\\n\", strerror(errno));\n        exit(MY_ERR_CODE);\n    }\n\n    // munmap the underlying memory\n    if (munmap(addr, length)) {\n        fprintf(stderr, \"munmap failed: %s\\n\", strerror(errno));\n        exit(MY_ERR_CODE);\n    }\n\n    // open file\n    int fd = open(filename, (prot & PROT_WRITE) ? O_RDWR : O_RDONLY);\n    if (fd < 0) {\n        fprintf(stderr, \"open %s failed: %s\\n\", filename, strerror(errno));\n        exit(MY_ERR_CODE);\n    }\n\n    // mmap file\n    if (mmap(addr, length, prot, MAP_SHARED | MAP_FIXED, fd, 0) != addr) {\n        fprintf(stderr, \"mmap failed: %s\\n\", strerror(errno));\n        exit(MY_ERR_CODE);\n    }\n\n    // close fd\n    if (close(fd)) {\n        fprintf(stderr, \"close failed: %s\\n\", strerror(errno));\n        exit(MY_ERR_CODE);\n    }\n}\n\nstatic unw_word_t __runtime_retaddr_translate(RetaddrMapping* mapping,\n                                              unw_word_t ip) {\n    size_t low_id = 0;\n    size_t high_id = mapping->n - 1;\n\n    if (mapping->addrs[low_id].shadow > ip ||\n        mapping->addrs[high_id].shadow < ip) {\n        return ip;\n    }\n\n    if (mapping->addrs[low_id].shadow == ip) {\n        return mapping->addrs[low_id].original;\n    }\n    if (mapping->addrs[high_id].shadow == ip) {\n        return mapping->addrs[high_id].original;\n    }\n\n    size_t mid_id = (low_id + high_id) >> 1;\n    while (low_id + 1 != high_id) {\n        if (mapping->addrs[mid_id].shadow < ip) {\n            low_id = mid_id;\n        } else if (mapping->addrs[mid_id].shadow > ip) {\n            high_id = mid_id;\n        } else {\n            return mapping->addrs[mid_id].original;\n        }\n\n        mid_id = (low_id + high_id) >> 1;\n    }\n\n    return ip;\n}\n\nint _ULx86_64_step(unw_cursor_t* cursor) {\n    if (!RW_PAGE_INFO(retaddr_mapping_used)) {\n        fprintf(stderr, \"stochfuzz's -r option is disabled!\\n\");\n        exit(MY_ERR_CODE);\n    }\n\n    RetaddrMapping* mapping =\n        (RetaddrMapping*)RW_PAGE_INFO(retaddr_mapping_base);\n\n    if (!mapping->real_unw_step) {\n        // first check size\n        if (sizeof(addr_t) != sizeof(unw_word_t)) {\n            fprintf(stderr, \"inconsistent size of addr_t and unw_word_t\");\n            exit(MY_ERR_CODE);\n        }\n\n        // get basic information\n        void* retaddr_mapping_base = (void*)RW_PAGE_INFO(retaddr_mapping_base);\n        size_t retaddr_mapping_size = RW_PAGE_INFO(retaddr_mapping_size);\n        const char* retaddr_mapping_path = RW_PAGE_INFO(retaddr_mapping_path);\n\n        // update mapping prot\n        __runtime_mremap(retaddr_mapping_path, retaddr_mapping_base,\n                         retaddr_mapping_size, PROT_READ | PROT_WRITE);\n\n        // find the real address\n        struct link_map* l_current = _r_debug.r_map;\n        while (l_current) {\n            if (strstr(l_current->l_name, \"libunwind.so\")) {\n                break;\n            }\n            l_current = l_current->l_next;\n        }\n        if (!l_current) {\n            fprintf(stderr, \"Cannot find libunwind handle\\n\");\n            exit(MY_ERR_CODE);\n        }\n        mapping->real_unw_step =\n            (unw_step_fn_type)(l_current->l_addr + STEP_OFFSET);\n\n        // remapping as non-writable\n        __runtime_mremap(retaddr_mapping_path, retaddr_mapping_base,\n                         retaddr_mapping_size, PROT_READ);\n    }\n\n    int rv = (*(mapping->real_unw_step))(cursor);\n\n    unw_word_t* typed_cursor = (unw_word_t*)cursor;\n    unw_word_t base_ip = RW_PAGE_INFO(program_base);\n\n    unw_word_t ip = typed_cursor[IP_OFFSET_IN_CURSOR] - base_ip;\n    unw_word_t new_ip = __runtime_retaddr_translate(mapping, ip);\n    typed_cursor[IP_OFFSET_IN_CURSOR] = new_ip + base_ip;\n\n    return rv;\n}\n"
  },
  {
    "path": "src/loader.c",
    "content": "/*\n * the code inside asm(\".globl _entry\\n ...\")\n * Copyright (C) 2021 National University of Singapore\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to\n * deal in the Software without restriction, including without limitation the\n * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or\n * sell copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING\n * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS\n * IN THE SOFTWARE.\n *\n */\n// XXX: the code inside the asm(\".globl _entry\\n ...\") is modified based on\n// https://github.com/GJDuck/e9patch/blob/master/src/e9patch/e9loader.cpp\n\n/*\n * other parts of loader.c\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n/*\n * Layout of patched binary (on disk):\n *\n *         padding                              padding\n *            |                                    |\n *            V                                    V\n * | ori ELF |.| loader (see below) | fork server |.| trampolines (see below) |\n *  \\_____________________  _______________________/ \\___________  __________/\n *                        \\/                                     \\/\n *                   <INPUT_ELF>                     SHADOW_PREFIX.<INPUT_ELF>\n */\n\n/*\n * Layout of loader:\n *\n * | loader | jmp 2 ori entrypoint | loader base | TP base | TP size | names |\n *                                                    |\n *                                                    |\n * +--------------------------------------------------+\n * |\n * V\n * | trampoline 1 | shadow code | trampoline 2| ... | trampoline n |\n *        ^\n *        |\n *        |\n *        +-- | mmap addr | mmap size | TP addr | TP size | next TP off | data |\n *\n *\n *  For trampolines meta data:\n *\n *    +----------------+-----------+-----------+----------+----------+\n *    |      Type      | mmap addr | mmap size | TP addr  | TP size  |\n *    +----------------+-----------+-----------+----------+----------+\n *    |  uTP (w/ mmap) |  Non-NULL |  Non-NULL | Non-NULL | Non-NULL |\n *    +----------------+-----------+-----------+----------+----------+\n *    | uTP (w/o mmap) |    NULL   |    NULL   | Non-NULL | Non-NULL |\n *    +----------------+-----------+-----------+----------+----------+\n *    |      TP        |    NULL   |    NULL   |   NULL   | Non-NULL |\n *    +----------------+-----------+-----------+----------+----------+\n *    |    Terminal    |    NULL   |    NULL   |   NULL   |   NULL   |\n *    +----------------+-----------+-----------+----------+----------+\n *\n */\n\n#include \"loader.h\"\n\n#include <linux/filter.h>\n#include <linux/seccomp.h>\n#include <linux/unistd.h>\n#include <signal.h>\n#include <sys/prctl.h>\n\n#include \"asm_utils.c\"\n\n#define SA_RESTORER 0x04000000\n\n#ifdef DEBUG\nextern const char loader_logo_str[];\nextern const char suspect_signal_info_str[];\nextern const char loader_err_str[];\nextern const char prctl_err_str[];\nextern const char handler_err_str[];\n#endif\n\nextern void restorer();\n\nasm(\n    /*\n     * Entry into stage #1 (loader).  We:\n     *  (0) save all registers\n     *  (1) call loader_output_running_path() if necessary\n     *  (2) setup stage parameters for loader_load()\n     *  (3) call loader_load() to mmap and copy data to target virtual addr\n     *  (4) restore all registers\n     *  (5) jump to original entrypoint\n     */\n    \".globl _entry\\n\"\n    \".type _entry,@function\\n\"\n    \"_entry:\\n\"\n\n    // (0) save registers (meanwhile storing variable *envp*)\n    \"\\tpushq %r15;\\n\"\n    \"\\tpushq %r14;\\n\"\n    \"\\tpushq %r13;\\n\"\n    \"\\tpushq %r12;\\n\"\n    \"\\tpushq %r11;\\n\"\n    \"\\tpushq %r10;\\n\"\n    \"\\tpushq %r9;\\n\"\n    \"\\tpushq %r8;\\n\"\n    \"\\tpushq %rcx;\\n\"\n    \"\\tpushq %rdx;\\n\"\n    \"\\tpushq %rsi;\\n\"\n    \"\\tpushq %rdi;\\n\"\n\n    // (1) call loader_output_running_path()\n    \"\\tmovq 0x68(%rsp), %rdi;\\n\"  // XXX: note that the magic number 0x68 is\n                                  // associated with how many registers we\n                                  // pushed on the stack\n    \"\\tcallq loader_output_running_path;\\n\"  // Show current path\n\n    // (2) setup stage parameters for loader_load()\n    \"\\tlea __etext(%rip), %rdi;\\n\"\n    \"\\taddq $4, %rdi;\\n\"\n    \"\\tshrq $3, %rdi;\\n\"\n    \"\\tincq %rdi;\\n\"\n    \"\\tshlq $3, %rdi;\\n\"  // cur_addr in __binary_setup_loader step (4) binary.c\n    \"\\tmovq (%rdi), %rbx;\\n\"\n    \"\\tleaq _entry(%rip), %rdx;\\n\"\n    \"\\tsubq %rbx, %rdx;\\n\"      // program base into %rdx (size_t rip_base)\n    \"\\tleaq 24(%rdi), %rcx;\\n\"  // names in %rcx (const char *name)\n    \"\\tmovq 16(%rdi), %rsi;\\n\"\n    \"\\taddq %rdx, %rsi;\\n\"  // .text base into %rsi (void *shared_text_base)\n    \"\\tmovq 8(%rdi), %rdi;\\n\"\n    \"\\taddq %rdx, %rdi;\\n\"  // TP chunk base into %rdi (Trampoline *tp)\n    \"\\tmovq %rax, %r8;\\n\"   // pathname into %r8 (const char *pathname)\n\n    // (3) mmap and copy data to target virtual addr\n    \"\\tcld;\\n\"                // set DF register\n    \"\\tcallq loader_load;\\n\"  // call loader_load()\n\n    // (4) restore all registers\n    \"\\tpopq %rdi;\\n\"\n    \"\\tpopq %rsi;\\n\"\n    \"\\tpopq %rdx;\\n\"\n    \"\\tpopq %rcx;\\n\"\n    \"\\tpopq %r8;\\n\"\n    \"\\tpopq %r9;\\n\"\n    \"\\tpopq %r10;\\n\"\n    \"\\tpopq %r11;\\n\"\n    \"\\tpopq %r12;\\n\"\n    \"\\tpopq %r13;\\n\"\n    \"\\tpopq %r14;\\n\"\n    \"\\tpopq %r15;\\n\"\n\n    // (5) jump to original entrypoint\n    // The springboard to original entrypoint will be placed at the end of the\n    // (.text) section.\n    \"\\tjmp __etext\\n\"\n\n    /*\n     * restore function for rt_sigaction\n     */\n    \".global restorer\\n\"\n    \".type restorer,@function\\n\"\n    \"restorer:\\n\"\n    \"\\tmov $15,%rax;\\n\"\n    \"\\tsyscall;\\n\"\n    \"\\tret;\\n\"\n\n/*\n * The following defines the read-only data used by the loader.\n * Note that we define the data as executable code to keep everything\n * in the (.text) section.\n */\n#ifdef DEBUG\n    ASM_STRING(loader_logo_str, \"\\\\033[32mpatched by \" OURTOOL\n                                \", current running path: \\\\033[0m\")\n    // suspect signal info string\n    ASM_STRING(suspect_signal_info_str, \"suspect signal occurs, with \")\n    // prctl error\n    ASM_STRING(prctl_err_str, \"prctl error\")\n    // handler error\n    ASM_STRING(handler_err_str, \"signal handler error\")\n    // loader error\n    ASM_STRING(loader_err_str, \"loader: loading error\")\n#endif\n\n);\n\nstatic void loader_memcpy(void *dst_0, void *src_0, size_t n_0) {\n    register uintptr_t dst asm(\"rdi\") = (uintptr_t)dst_0;\n    register uintptr_t src asm(\"rsi\") = (uintptr_t)src_0;\n    register uintptr_t n asm(\"rcx\") = (uintptr_t)(n_0);\n\n    asm volatile(\n        \"movq %%rcx, %%rdx\\n\\t\"\n        \"andq $7, %%rdx\\n\\t\"\n        \"shrq $3, %%rcx\\n\\t\"\n        \"rep movsq\\n\\t\"\n        \"movq %%rdx, %%rcx\\n\\t\"\n        \"rep movsb\\n\\t\"\n        :\n        : \"r\"(dst), \"r\"(src), \"r\"(n)\n        : \"rdx\");\n}\n\n/*\n * mmap a fake AFL_SHARED_MEMORY to avoid instrumentation before main\n */\nstatic inline void loader_mmap_fake_shared_memory() {\n    unsigned long shared_mem_addr = AFL_MAP_ADDR;\n    size_t shared_mem_size = AFL_MAP_SIZE;\n\n    if (sys_mmap(shared_mem_addr, shared_mem_size, PROT_READ | PROT_WRITE,\n                 MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1,\n                 0) != shared_mem_addr) {\n        utils_error(loader_err_str, true);\n    }\n\n    shared_mem_addr = CRS_MAP_ADDR;\n    shared_mem_size = CRS_MAP_SIZE;\n    if (sys_mmap(shared_mem_addr, shared_mem_size, PROT_READ | PROT_WRITE,\n                 MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1,\n                 0) != shared_mem_addr) {\n        utils_error(loader_err_str, true);\n    }\n}\n\n/*\n * mmap a R/W data page at fixed address RW_PAGE_ADDR, and store rip base into\n * the first qword.\n */\nstatic inline void loader_mmap_data_page(size_t rip_base) {\n    if (sys_mmap(RW_PAGE_ADDR, RW_PAGE_SIZE, PROT_READ | PROT_WRITE,\n                 MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1,\n                 0) != RW_PAGE_ADDR) {\n        utils_error(loader_err_str, true);\n    }\n\n    RW_PAGE_INFO(program_base) = (addr_t)rip_base;\n}\n\n/*\n * signal handler\n */\nstatic void loader_catch_suspect_signals(int signal, siginfo_t *siginfo,\n                                         void *context) {\n    uint64_t rip = ((ucontext_t *)context)->uc_mcontext.gregs[REG_RIP];\n\n#ifdef DEBUG\n    uint64_t client_pid = RW_PAGE_INFO(client_pid);\n\n    char s[0x40] = \"\";\n    s[0] = 'r';\n    s[1] = 'i';\n    s[2] = 'p';\n    s[3] = ':';\n    s[4] = ' ';\n    utils_num2hexstr(s + 5, rip);\n    s[21] = '(';\n    utils_num2hexstr(s + 22, client_pid);\n    s[38] = ',';\n    s[39] = ' ';\n    utils_num2hexstr(s + 40, sys_getpid());\n    s[56] = ')';\n    s[57] = '\\n';\n    s[58] = '\\x00';\n    utils_puts(suspect_signal_info_str, false);\n    utils_puts(s, false);\n#endif\n\n    rip -= RW_PAGE_INFO(program_base);\n\n    // XXX: For an *UNKNOWN* reason, pipe CRS_DATA_FD sometimes is broken,\n    // resulting in an incorrect patching schedule. Hence, we adopt shared\n    // memory to sent crashed PC. Note that CRS_DATA_FD is still valid in dry\n    // run, for compatibility. In the future, we will abandon this pipe.\n    if (RW_PAGE_INFO(daemon_attached)) {\n        // we need a lock to avoid race condition\n        if (!__sync_lock_test_and_set((uint32_t *)CRS_INFO_ADDR(lock), 1)) {\n            CRS_INFO(crash_ip) = (addr_t)rip;\n            CRS_INFO(self_fired) = 1UL;\n        } else {\n            // we pause this process here. Note that we are fine with pause()\n            // here.\n            sys_pause();\n        }\n    } else {\n        // we only need to send rip, since a successful communication indicates\n        // a signal fired\n        if (sys_write(CRS_DATA_FD, (char *)(&rip), 8) != 8) {\n            utils_error(handler_err_str, true);\n        }\n    }\n\n    // it would be better to kill all the process in the group\n    sys_kill(0, SIGKILL);\n}\n\n/*\n * Register signal handlers for suspect signals to send crash site information.\n */\nstatic inline void loader_set_signal_handler(addr_t rip_base) {\n    /*\n     * Before we set signal handler, we will first mmap a new stack for the\n     * handler. As such, even if the stack gets polluted, we can send the crash\n     * address to the daemon. More details can be found in:\n     *   https://man7.org/linux/man-pages/man2/sigaltstack.2.html\n     *   https://stackoverflow.com/questions/39297207/catching-sigsegv-when-triggered-by-corrupt-stack\n     */\n    addr_t ss_addr = rip_base + SIGNAL_STACK_ADDR;\n    if (sys_mmap(ss_addr, SIGNAL_STACK_SIZE, PROT_READ | PROT_WRITE,\n                 MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0) != ss_addr) {\n        utils_error(loader_err_str, true);\n    }\n    stack_t ss = {\n        .ss_sp = (void *)ss_addr,\n        .ss_flags = 0,\n        .ss_size = SIGNAL_STACK_SIZE,\n    };\n    sys_sigaltstack(&ss, NULL);\n\n    struct kernel_sigaction sa = {};\n\n    sa.k_sa_handler = &loader_catch_suspect_signals;\n    sa.sa_flags = SA_SIGINFO | SA_RESTORER | SA_ONSTACK;\n    sa.sa_restorer = &restorer;\n\n    if (sys_rt_sigaction(SIGSEGV, &sa, NULL, _NSIG / 8)) {\n        utils_error(loader_err_str, true);\n    }\n\n    if (sys_rt_sigaction(SIGILL, &sa, NULL, _NSIG / 8)) {\n        utils_error(loader_err_str, true);\n    }\n\n    // XXX: overlapping bridges may cause SIGTRAP\n    if (sys_rt_sigaction(SIGTRAP, &sa, NULL, _NSIG / 8)) {\n        utils_error(loader_err_str, true);\n    }\n}\n\n/*\n * Install seccomp filter to avoid modify suspect signal handler\n */\nstatic inline void loader_set_seccomp() {\n    if (sys_prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {\n        utils_error(prctl_err_str, true);\n    }\n\n    // XXX: note that we cannot block sigprocmask (which may delay the following\n    // signals). For more information, please refer to\n    // https://lwn.net/Articles/822256/ (\"accepting or rejecting the system call\n    // cannot depend on, for example, values in structures that are passed to\n    // system calls via pointers\")\n    /*\n     * Use compiled seccomp rule (bytecode) to avoid compilation difference\n     *\n     *    int error = 1;\n     *    struct sock_filter filter[] = {\n     *        BPF_STMT(BPF_LD | BPF_W | BPF_ABS,\n     *                 (offsetof(struct seccomp_data, nr))),\n     *        BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_rt_sigaction, 0, 4),\n     *        BPF_STMT(BPF_LD | BPF_W | BPF_ABS,\n     *                 (offsetof(struct seccomp_data, args[0]))),\n     *        BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SIGTRAP, 3, 0),\n     *        BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SIGSEGV, 2, 0),\n     *        BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SIGILL, 1, 0),\n     *        BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),\n     *        BPF_STMT(BPF_RET | BPF_K,\n     *                 SECCOMP_RET_ERRNO | (error & SECCOMP_RET_DATA))};\n     *\n     */\n\n    register struct sock_filter *filter asm(\"rax\");\n    asm volatile(\n        \"  leaq _filter(%%rip), %%rax\\n\\t\"\n        \"  jmp _out\\n\\t\"\n        \"_filter:\\n\\t\"\n        \".ascii \\\"\"\n        \"\\\\040\\\\000\\\\000\\\\000\\\\000\\\\000\\\\000\\\\000\"  // 0. BPF_STMT\n        \"\\\\025\\\\000\\\\000\\\\004\\\\015\\\\000\\\\000\\\\000\"  // 1. BPF_JUMP\n        \"\\\\040\\\\000\\\\000\\\\000\\\\020\\\\000\\\\000\\\\000\"  // 2. BPF_STMT\n        \"\\\\025\\\\000\\\\003\\\\000\\\\005\\\\000\\\\000\\\\000\"  // 3. BPF_JUMP\n        \"\\\\025\\\\000\\\\002\\\\000\\\\013\\\\000\\\\000\\\\000\"  // 4. BPF_JUMP\n        \"\\\\025\\\\000\\\\001\\\\000\\\\004\\\\000\\\\000\\\\000\"  // 5. BPF_JUMP\n        \"\\\\006\\\\000\\\\000\\\\000\\\\000\\\\000\\\\377\\\\177\"  // 6. BPF_STMT\n        \"\\\\006\\\\000\\\\000\\\\000\\\\001\\\\000\\\\005\\\\000\"  // 7. BPF_STMT\n        \"\\\"\\n\\t\"\n        \"_out:\"\n        : \"=rax\"(filter)\n        :\n        :);\n\n    struct sock_fprog prog = {\n        .len = 8,  // (unsigned short)(sizeof(filter) / sizeof(filter[0])),\n        .filter = filter,\n    };\n\n    if (sys_prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, (unsigned long)(&prog),\n                  0, 0)) {\n        utils_error(prctl_err_str, true);\n    }\n}\n\n/*\n * Load ujmp/ucall trampolines, and set a W/R page tp store global data\n */\nNO_INLINE void loader_load(Trampoline *tp, void *shared_text_base,\n                           size_t rip_base, const char *name,\n                           const char *pathname) {\n    void *mmap_addr, *tp_addr;\n    unsigned long mmap_size, tp_size, next_tp_offset;\n\n    // in case we send SIGKILL to all the parent signal\n    sys_setpgid(0, 0);  // ignore errors\n\n    loader_set_signal_handler((addr_t)rip_base);\n    loader_set_seccomp();\n\n    loader_mmap_data_page(rip_base);\n    loader_mmap_fake_shared_memory();\n\n    // get related path\n    // (XXX: check overflow? but the longest path on linux is only 0x100 bytes)\n    char fullpath[0x200];\n    const char *slash_ = NULL;\n    for (int i = 0; i < 0x200; i++) {\n        char c = pathname[i];\n        fullpath[i] = c;\n        if (c == '/') {\n            slash_ = fullpath + i;\n        }\n        if (!c) {\n            break;\n        }\n    }\n\n    char *cur_ = NULL;\n    if (slash_) {\n        // get last slash symbol\n        cur_ = (char *)slash_ + 1;\n    } else {\n        cur_ = fullpath;\n    }\n\n#define __PARSE_FILENAME(dir, s)   \\\n    do {                           \\\n        int i = 0;                 \\\n        do {                       \\\n            (dir)[i++] = *((s)++); \\\n        } while (*s);              \\\n        (s)++;                     \\\n        (dir)[i] = '\\x00';         \\\n    } while (0)\n\n    // shadow file\n    __PARSE_FILENAME(cur_, name);\n    utils_strcpy(RW_PAGE_INFO(shadow_path), fullpath);\n    utils_puts(RW_PAGE_INFO(shadow_path), true);\n    RW_PAGE_INFO(shadow_size) = utils_mmap_external_file(\n        fullpath, false, (unsigned long)tp, PROT_READ | PROT_EXEC);\n    RW_PAGE_INFO(shadow_base) = (addr_t)tp;\n\n    // lookup table file\n    __PARSE_FILENAME(cur_, name);\n    utils_strcpy(RW_PAGE_INFO(lookup_tab_path), fullpath);\n    utils_puts(RW_PAGE_INFO(lookup_tab_path), true);\n    addr_t lookup_table_addr = rip_base + LOOKUP_TABLE_ADDR;\n    RW_PAGE_INFO(lookup_tab_base) = lookup_table_addr;\n    RW_PAGE_INFO(lookup_tab_size) =\n        utils_mmap_external_file(fullpath, false, lookup_table_addr, PROT_READ);\n\n    // pipe file\n    __PARSE_FILENAME(cur_, name);\n    utils_strcpy(RW_PAGE_INFO(pipe_path), fullpath);\n    utils_puts(RW_PAGE_INFO(pipe_path), true);\n\n    // shared .text file\n    __PARSE_FILENAME(cur_, name);\n    utils_strcpy(RW_PAGE_INFO(shared_text_path), fullpath);\n    utils_puts(RW_PAGE_INFO(shared_text_path), true);\n    RW_PAGE_INFO(shared_text_size) = utils_mmap_external_file(\n        fullpath, true, (unsigned long)shared_text_base, PROT_READ | PROT_EXEC);\n    RW_PAGE_INFO(shared_text_base) = (addr_t)shared_text_base;\n\n    // retaddr mapping file\n    __PARSE_FILENAME(cur_, name);\n    utils_strcpy(RW_PAGE_INFO(retaddr_mapping_path), fullpath);\n    utils_puts(RW_PAGE_INFO(retaddr_mapping_path), true);\n    addr_t retaddr_mapping_addr = rip_base + RETADDR_MAPPING_ADDR;\n    RW_PAGE_INFO(retaddr_mapping_base) = retaddr_mapping_addr;\n    RW_PAGE_INFO(retaddr_mapping_size) = utils_mmap_external_file(\n        fullpath, false, retaddr_mapping_addr, PROT_READ | PROT_WRITE);\n    if (*((int64_t *)retaddr_mapping_addr) == -1) {\n        // retaddr mapping is useless\n        uint64_t ori_size = RW_PAGE_INFO(retaddr_mapping_size);\n        if (sys_munmap(retaddr_mapping_addr, ori_size)) {\n            utils_error(loader_err_str, true);\n        }\n        RW_PAGE_INFO(retaddr_mapping_used) = false;\n        RW_PAGE_INFO(retaddr_mapping_size) = 0;\n    } else {\n        RW_PAGE_INFO(retaddr_mapping_used) = true;\n        // set the function pointer as NULL\n        *((void **)retaddr_mapping_addr + 1) = NULL;\n        // remap the page as read only\n        utils_mmap_external_file(fullpath, true, retaddr_mapping_addr,\n                                 PROT_READ);\n    }\n\n#undef __PARSE_FILENAME\n\n    // set the client pid as the pid of fork server (loader) itself\n    // it will be updated every time we fork a new process\n    RW_PAGE_INFO(client_pid) = sys_getpid();\n\n    // XXX: currently TP mapping is not used but reserved for advanced patching.\n    // However, note that we still to maintain it as it can be quite useful in\n    // the futuer\n    while (true) {\n        // get every TP's meta-data\n        mmap_addr = tp->mmap_addr;\n        mmap_size = tp->mmap_size;\n        tp_addr = tp->tp_addr;\n        tp_size = tp->tp_size;\n        next_tp_offset = tp->next_tp_offset;\n\n        // check whether the tp needs to mmap\n        if (mmap_addr != NULL && mmap_size != 0) {\n            if (sys_mmap((unsigned long)mmap_addr + rip_base, mmap_size,\n                         // XXX: PROT_READ | PROT_WRITE | PROT_EXEC ?\n                         PROT_READ | PROT_EXEC,\n                         MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1,\n                         0) != (unsigned long)mmap_addr + rip_base) {\n                utils_error(loader_err_str, true);\n            }\n        }\n\n        // check whether the tp needs to memcpy\n        if (tp_addr != NULL && tp_size != 0) {\n            loader_memcpy(tp_addr + rip_base, tp->tp, tp_size);\n        }\n\n        // check terminal\n        if (next_tp_offset == 0) {\n            break;\n        }\n\n        tp = (void *)tp + next_tp_offset;\n    }\n}\n\nNO_INLINE const char *loader_output_running_path(const char *pathname) {\n    utils_puts(loader_logo_str, false);\n    utils_puts(pathname, true);\n    return pathname;\n}\n"
  },
  {
    "path": "src/loader.h",
    "content": "/*\n * loader.h\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n#ifndef __LOADER_H\n#define __LOADER_H\n\n#include \"crs_config.h\"\n\ntypedef struct trampoline_t {\n    void *mmap_addr;\n    unsigned long mmap_size;\n    void *tp_addr;\n    unsigned long tp_size;\n    unsigned long next_tp_offset;\n    unsigned char tp[];\n} Trampoline;\n\n#endif\n"
  },
  {
    "path": "src/mem_file.c",
    "content": "/*\n * mem_file.c\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n#include \"mem_file.h\"\n#include \"utils.h\"\n\n#include <errno.h>\n#include <fcntl.h>\n#include <sys/mman.h>\n#include <sys/types.h>\n\n#define INIT_SIZE PAGE_SIZE\n// XXX: will a small INC_SIZE_POW2 helps reduce fork overhead?\n#define INC_SIZE_POW2 (PAGE_SIZE_POW2 + 6)\n#define INC_SIZE (1 << INC_SIZE_POW2)\n\n#define _MEM_FILE_DEFINE_GETTER(OTYPE, ONAME, FTYPE, FNAME) \\\n    Z_API FTYPE z_##ONAME##_##get_##FNAME(OTYPE *ONAME) {   \\\n        assert(ONAME != NULL);                              \\\n        __mem_file_check_state(ONAME);                      \\\n        return ONAME->FNAME;                                \\\n    }\n\n// Stretch the file size to size.\nZ_PRIVATE int __mem_file_stretch_to_size(_MEM_FILE *stream, size_t size);\n\n// Open stream.\nZ_PRIVATE void __mem_file_open_stream(_MEM_FILE *stream, bool is_resumed);\n\n// Check the state of _MEM_FILE, to identify whether it is suitable to operate\n// on it.\nZ_PRIVATE void __mem_file_check_state(_MEM_FILE *stream);\n\n/*\n * Setter and Getter\n */\n_MEM_FILE_DEFINE_GETTER(_MEM_FILE, mem_file, const char *, filename);\n_MEM_FILE_DEFINE_GETTER(_MEM_FILE, mem_file, uint8_t *, raw_buf);\n_MEM_FILE_DEFINE_GETTER(_MEM_FILE, mem_file, uint8_t *, cur_ptr);\n_MEM_FILE_DEFINE_GETTER(_MEM_FILE, mem_file, size_t, size);\n\nZ_PRIVATE void __mem_file_check_state(_MEM_FILE *stream) {\n    if (!stream) {\n        EXITME(\"try to operate on an empty _MEM_FILE\");\n    }\n    if (stream->fd == INVALID_FD) {\n        EXITME(\"try to operate on a disconnected _MEM_FILE\");\n    }\n}\n\n// XXX: all possible cases when invoking __mem_file_stretch_to_size\n//  case 1: stream->size == 0, stream->raw_buf == NULL (open a new file)\n//  case 2: stream->size >  0, stream->raw_buf == NULL (resume a file)\n//  case 3: stream->size >  0, stream->raw_buf != NULL (update a file)\nZ_PRIVATE int __mem_file_stretch_to_size(_MEM_FILE *stream, size_t size) {\n    // step (0). valid size\n    if (stream->size_fixed) {\n        if (size != stream->size) {\n            EXITME(\"try to resize a size-fixed file\");\n        }\n    } else {\n        if (size < stream->size) {\n            EXITME(\"the given _MEM_FILE is too large\");\n        }\n    }\n\n    if (!size) {\n        EXITME(\"cannot stretch to 0\");\n    }\n\n    if (!stream->size && (stream->raw_buf || stream->cur_ptr)) {\n        EXITME(\"impossible case when stream->size == 0\");\n    }\n\n    // step (1). update the size of underlying file\n    // XXX: avoid write on existing data\n    if (size > stream->size) {\n        if (lseek(stream->fd, size - 1, SEEK_SET) == -1) {\n            return -1;\n        }\n\n        if (write(stream->fd, \"\", 1) == -1) {\n            return -1;\n        }\n    }\n\n    // step (2). update memory mapping\n    size_t old_size = stream->size;\n    size_t new_size = size;\n\n    if (stream->raw_buf) {\n        // the raw_ptr exists\n        if (new_size != old_size) {\n            assert(stream->cur_ptr >= stream->raw_buf);\n            size_t cur_offset = stream->cur_ptr - stream->raw_buf;\n            if ((stream->raw_buf = mremap(stream->raw_buf, old_size, new_size,\n                                          MREMAP_MAYMOVE)) == MAP_FAILED) {\n                EXITME(\"failed to mremap\");\n            }\n            stream->cur_ptr = stream->raw_buf + cur_offset;\n        }\n    } else {\n        // the raw_ptr does not exist\n        if ((stream->raw_buf = mmap(NULL, new_size, PROT_READ | PROT_WRITE,\n                                    MAP_SHARED, stream->fd, 0)) == MAP_FAILED) {\n            EXITME(\"failed to mmap\");\n        }\n        stream->cur_ptr = stream->raw_buf;\n    }\n\n    stream->size = new_size;\n\n    return 0;\n}\n\nZ_PRIVATE void __mem_file_open_stream(_MEM_FILE *stream, bool is_resumed) {\n    assert(stream != NULL && stream->filename != NULL);\n\n    int flag = (is_resumed ? O_RDWR : O_RDWR | O_CREAT | O_TRUNC);\n    size_t file_size = (is_resumed ? stream->size : INIT_SIZE);\n\n    if ((stream->fd = open(stream->filename, flag, (mode_t)0755)) == -1) {\n        goto ERROR;\n    }\n\n    // XXX: here we can have two cases:\n    //  case 1: a new file, where stream->size = 0, stream->raw_buf = NULL\n    //  case 2: an old file, where stream->size > 0, stream->raw_buf = NULL\n    if (__mem_file_stretch_to_size(stream, file_size) == -1) {\n        goto ERROR;\n    }\n\n    return;\n\nERROR:\n    z_error(\"_MEM_FILE open stream: %d(%s)\", errno, strerror(errno));\n    z_free((void *)stream->filename);\n    z_free(stream);\n    z_exit(errno);\n    return;\n}\n\nZ_API _MEM_FILE *z_mem_file_fopen(const char *pathname, const char *mode) {\n    if (z_strcmp(mode, \"w+\")) {\n        EXITME(\"for _MEM_FILE, we only support \\\"w+\\\" mode\");\n    }\n\n    _MEM_FILE *stream = STRUCT_ALLOC(_MEM_FILE);\n\n    stream->filename = z_strdup(pathname);\n    stream->raw_buf = stream->cur_ptr = NULL;\n    stream->size_fixed = false;\n\n    __mem_file_open_stream(stream, false);\n\n    return stream;\n}\n\nZ_API void z_mem_file_fsync(_MEM_FILE *stream) {\n    __mem_file_check_state(stream);\n\n    assert(stream != NULL);\n\n    z_trace(\"fsync _MEM_FILE\");\n    if (msync(stream->raw_buf, stream->size, MS_SYNC) == -1) {\n        z_error(\"_MEM_FILE fsync: %d(%s)\", errno, strerror(errno));\n        munmap(stream->raw_buf, stream->size);\n        close(stream->fd);\n        z_free(stream);\n        z_exit(errno);\n    }\n}\n\nZ_API void z_mem_file_fclose(_MEM_FILE *stream) {\n    __mem_file_check_state(stream);\n\n    assert(stream != NULL);\n\n    z_mem_file_fsync(stream);\n\n    if (munmap(stream->raw_buf, stream->size) == -1) {\n        z_error(\"_MEM_FILE fclose: %d(%s)\", errno, strerror(errno));\n        close(stream->fd);\n        z_free(stream);\n        z_exit(errno);\n    }\n\n    if (close(stream->fd) == -1) {\n        z_error(\"_MEM_FILE fclose: %d(%s)\", errno, strerror(errno));\n        z_free(stream);\n        z_exit(errno);\n    }\n\n    z_free((void *)stream->filename);\n    z_free(stream);\n}\n\nZ_API size_t z_mem_file_pwrite(_MEM_FILE *stream, const void *buf, size_t count,\n                               size_t offset) {\n    __mem_file_check_state(stream);\n\n    assert(stream != NULL);\n\n    if (stream->size < count + offset) {\n        // stretch file size\n        size_t new_size = BITS_ALIGN_CELL(count + offset, INC_SIZE_POW2);\n        assert(new_size >= count + offset);\n\n        if (__mem_file_stretch_to_size(stream, new_size) == -1) {\n            goto ERROR;\n        }\n    }\n\n    memcpy(stream->raw_buf + offset, buf, count);\n\n    return count;\n\nERROR:\n    z_error(\"_MEM_FILE pwrite: %d(%s)\", errno, strerror(errno));\n    close(stream->fd);\n    z_free(stream);\n    z_exit(errno);\n    return SIZE_MAX;\n}\n\nZ_API size_t z_mem_file_pread(_MEM_FILE *stream, void *buf, size_t count,\n                              size_t offset) {\n    __mem_file_check_state(stream);\n\n    assert(stream != NULL);\n\n    if (stream->size < count + offset) {\n        EXITME(\"read too much from _MEM_FILE\");\n    }\n\n    memcpy(buf, stream->raw_buf + offset, count);\n    return count;\n}\n\nZ_API size_t z_mem_file_fwrite(void *ptr, size_t size, size_t nmemb,\n                               _MEM_FILE *stream) {\n    __mem_file_check_state(stream);\n\n    size_t n = z_mem_file_pwrite(stream, ptr, nmemb * size,\n                                 stream->cur_ptr - stream->raw_buf);\n    stream->cur_ptr += n;\n    return n;\n}\n\nZ_API void z_mem_file_fix_size(_MEM_FILE *stream, size_t size) {\n    __mem_file_check_state(stream);\n\n    if (size < stream->size) {\n        EXITME(\"the size of the given _MEM_FILE is too large\");\n    }\n\n    if (size % PAGE_SIZE) {\n        EXITME(\"the given size is not page-aligned\");\n    }\n\n    if (__mem_file_stretch_to_size(stream, size) == -1) {\n        EXITME(\"failed to set size for the underlying file\");\n    }\n\n    stream->size_fixed = true;\n}\n\nZ_API size_t z_mem_file_fread(void *ptr, size_t size, size_t nmemb,\n                              _MEM_FILE *stream) {\n    __mem_file_check_state(stream);\n\n    size_t n = z_mem_file_pread(stream, ptr, nmemb * size,\n                                stream->cur_ptr - stream->raw_buf);\n    stream->cur_ptr += n;\n    return n;\n}\n\nZ_API void z_mem_file_fseek(_MEM_FILE *stream, long offset, int whence) {\n    __mem_file_check_state(stream);\n\n    assert(stream != NULL);\n\n    if (whence != SEEK_SET) {\n        EXITME(\"for _MEM_FILE seek, we only support SEEK_SET\");\n    }\n\n    if (offset >= stream->size) {\n        EXITME(\"offset is out of boundary\");\n    }\n\n    stream->cur_ptr = stream->raw_buf + offset;\n}\n\nZ_API long z_mem_file_ftell(_MEM_FILE *stream) {\n    __mem_file_check_state(stream);\n\n    assert(stream != NULL);\n\n    return (long)(stream->cur_ptr - stream->raw_buf);\n}\n\nZ_API void z_mem_file_suspend(_MEM_FILE *stream) {\n    __mem_file_check_state(stream);\n\n    z_trace(\"suspend file %s\", stream->filename);\n    if (stream->fd == INVALID_FD && stream->raw_buf == NULL &&\n        stream->cur_ptr == NULL) {\n        // XXX: a good place to debug by changing return to EXITME\n        z_warn(\"try to suspend a disconnected file, ignore\");\n        return;\n    }\n\n    z_mem_file_fsync(stream);\n\n    if (close(stream->fd) == -1) {\n        z_error(\"_MEM_FILE suspend: %d(%s)\", errno, strerror(errno));\n        z_free(stream);\n        z_exit(errno);\n    }\n    stream->fd = INVALID_FD;\n\n    if (munmap(stream->raw_buf, stream->size) == -1) {\n        z_error(\"_MEM_FILE suspend: %d(%s)\", errno, strerror(errno));\n        close(stream->fd);\n        z_free(stream);\n        z_exit(errno);\n    }\n    stream->raw_buf = stream->cur_ptr = NULL;\n}\n\nZ_API void z_mem_file_resume(_MEM_FILE *stream) {\n    z_trace(\"resume file %s\", stream->filename);\n    if (stream->fd != INVALID_FD && stream->raw_buf != NULL &&\n        stream->cur_ptr != NULL) {\n        // XXX: a good place to debug by changing return to EXITME\n        z_warn(\"try to resume a connected file, ignore\");\n        return;\n    }\n    __mem_file_open_stream(stream, true);\n}\n\nZ_API void z_mem_file_save_as(_MEM_FILE *stream, const char *pathname) {\n    __mem_file_check_state(stream);\n    assert(stream != NULL);\n\n    // check whether pathname exists. if so, remove it.\n    // Note that we have to remove pathname first. Otherwise, if pathname is\n    // linked with any important file (e.g., patched file), directly\n    // fopen(pathname, \"wb\") will rewrite the important file.\n    if (!z_access(pathname, F_OK)) {\n        if (remove(pathname)) {\n            EXITME(\"failed on remove: %s (error: %s)\", pathname,\n                   strerror(errno));\n        }\n    }\n\n    FILE *f = z_fopen(pathname, \"wb\");\n    if (!f) {\n        EXITME(\"fail to open %s\", pathname);\n    }\n\n    size_t size = z_fwrite(stream->raw_buf, sizeof(uint8_t), stream->size, f);\n    if (size != stream->size) {\n        EXITME(\n            \"fail when writing content to \\\"%s\\\", expect %ld bytes, but only \"\n            \"%ld bytes (error: %s)\",\n            pathname, stream->size, size, strerror(errno));\n    }\n\n    z_fclose(f);\n    if (z_chmod(pathname, 0755)) {\n        EXITME(\"fail when chmod snapshot\");\n    }\n}\n"
  },
  {
    "path": "src/mem_file.h",
    "content": "/*\n * mem_file.h\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n#ifndef __MEM_FILE_H\n#define __MEM_FILE_H\n\n#include \"config.h\"\n\n/*\n * Use mmap to speed up FILE operations (similar with _IO_FILE)\n */\n\nSTRUCT(_MEM_FILE, {\n    int fd;\n    const char *filename;\n    uint8_t *raw_buf;\n    uint8_t *cur_ptr;\n    size_t size;  // page-aligned\n    bool size_fixed;\n});\n\n/*\n * Setter and Getter\n */\nDECLARE_GETTER(_MEM_FILE, mem_file, const char *, filename);\nDECLARE_GETTER(_MEM_FILE, mem_file, uint8_t *, raw_buf);\nDECLARE_GETTER(_MEM_FILE, mem_file, uint8_t *, cur_ptr);\nDECLARE_GETTER(_MEM_FILE, mem_file, size_t, size);\n\n/*\n * Open a _MEM_FILE with pathname.\n * Currently, we only support \"w+\" mode.\n */\nZ_API _MEM_FILE *z_mem_file_fopen(const char *pathname, const char *mode);\n\n/*\n * Synchronize a _MEM_FILE with its memory mapping.\n */\nZ_API void z_mem_file_fsync(_MEM_FILE *stream);\n\n/*\n * Close a _MEM_FILE.\n */\nZ_API void z_mem_file_fclose(_MEM_FILE *stream);\n\n/*\n * Fix the size of a _MEM_FILE. This function requires the size of _MEM_FILE\n * cannot be larger than size.\n */\nZ_API void z_mem_file_fix_size(_MEM_FILE *stream, size_t size);\n\n/*\n * Write to a _MEM_FILE.\n * Note that only pwrite can extend file.\n */\nZ_API size_t z_mem_file_pwrite(_MEM_FILE *stream, const void *buf, size_t count,\n                               size_t offset);\n\n/*\n * Read from a _MEM_FILE.\n */\nZ_API size_t z_mem_file_pread(_MEM_FILE *stream, void *buf, size_t count,\n                              size_t offset);\n\n/*\n * fread for _MEM_FILE.\n */\nZ_API size_t z_mem_file_fread(void *ptr, size_t size, size_t nmemb,\n                              _MEM_FILE *stream);\n\n/*\n * fwrite for _MEM_FILE.\n */\nZ_API size_t z_mem_file_fwrite(void *ptr, size_t size, size_t nmemb,\n                               _MEM_FILE *stream);\n\n/*\n * fseek for _MEM_FILE.\n * Currently, we only support SEEK_SET.\n */\nZ_API void z_mem_file_fseek(_MEM_FILE *stream, long offset, int whence);\n\n/*\n * ftell for _MEM_FILE.\n */\nZ_API long z_mem_file_ftell(_MEM_FILE *stream);\n\n/*\n * suspend a _MEM_FILE, to allow other processes access the underlaying file.\n */\nZ_API void z_mem_file_suspend(_MEM_FILE *stream);\n\n/*\n * resume a _MEM_FILE.\n */\nZ_API void z_mem_file_resume(_MEM_FILE *stream);\n\n/*\n * save _MEM_FILE as pathname\n */\nZ_API void z_mem_file_save_as(_MEM_FILE *stream, const char *pathname);\n\n#endif\n"
  },
  {
    "path": "src/patcher.c",
    "content": "/*\n * patcher.c\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n#include \"patcher.h\"\n#include \"capstone_.h\"\n#include \"interval_splay.h\"\n#include \"iterator.h\"\n#include \"utils.h\"\n\n#include \"x64_utils.c\"\n\n#include <math.h>\n\n#define PATCH_THRESHOLD 0.99999\n#define PATCH_THRESHOLD_FOR_RETADDR (PATCH_THRESHOLD / 2)\n#define PATCH_RET_DEPTH 20\n#define BRIDGE_PRE_DEPTH 5\n\ntypedef struct bridge_point_t {\n    addr_t bridge_addr;\n    addr_t jump_addr;\n    addr_t source_addr;\n    addr_t max_addr;  // used for revoke bridge patching\n} BridgePoint;\n\n/*\n * When the underlying disassembler does not fully support prob-disasm, we\n * directly patch all possible instructions without calculating pathcing\n * candidates.\n */\nZ_PRIVATE void __patcher_patch_all_S(Patcher *p);\n\n/*\n * When the underlying fully supports prob-disasm, we need to carefully decide\n * which the patch candidates are.\n */\nZ_PRIVATE void __patcher_patch_all_F(Patcher *p);\n\n/*\n * Flip uncertain patches (used in delta debugging mode)\n */\nZ_PRIVATE void __patcher_flip_uncertain_patch(Patcher *p, addr_t addr,\n                                              bool is_enable);\n\n/*\n * Find new certain addresses via BFS\n */\nZ_PRIVATE void __patcher_bfs_certain_addresses(Patcher *p, addr_t addr);\n\n/*\n * Patch a new certain address, return whether this patch is successfully\n * applied.\n */\nZ_PRIVATE bool __patcher_patch_certain_address(Patcher *p, addr_t addr,\n                                               uint8_t inst_size);\n\n/*\n * Patch a new uncertain address, return whether this patch is successfully\n * applied.\n */\nZ_PRIVATE bool __patcher_patch_uncertain_address(Patcher *p, addr_t addr);\n\n/*\n * Compare two address\n */\nZ_PRIVATE int32_t __patcher_compare_address(addr_t a, addr_t b, void *_data);\n\nZ_PRIVATE int32_t __patcher_compare_address(addr_t a, addr_t b, void *_data) {\n    assert(!_data);\n    if (a < b) {\n        return -1;\n    } else if (a > b) {\n        return 1;\n    } else {\n        return 0;\n    }\n}\n\nZ_PRIVATE void __patcher_flip_uncertain_patch(Patcher *p, addr_t addr,\n                                              bool is_enable) {\n    if (is_enable) {\n        z_patcher_unsafe_patch(p, addr, 1, z_x64_gen_invalid(1), NULL);\n    } else {\n        size_t off = addr - p->text_addr;\n        if (off >= p->text_size) {\n            EXITME(\"invalid address: %#lx\", addr);\n        }\n        z_patcher_unsafe_patch(p, addr, 1, p->text_backup + off, NULL);\n    }\n}\n\nZ_PRIVATE bool __patcher_patch_uncertain_address(Patcher *p, addr_t addr) {\n    // step (1). check whether this address is certain\n    if (z_addr_dict_exist(p->certain_addresses, addr)) {\n        return false;\n    }\n\n    // step (2). check whether it is already patched as uncertain patch\n    if (g_sequence_lookup(p->uncertain_patches, GSIZE_TO_POINTER(addr),\n                          (GCompareDataFunc)__patcher_compare_address, NULL)) {\n        return false;\n    }\n\n    // step (3). patch underlying binary\n    z_patcher_unsafe_patch(p, addr, 1, z_x64_gen_invalid(1), NULL);\n\n    // step (4). update uncertain_patches\n    g_sequence_insert_sorted(p->uncertain_patches, GSIZE_TO_POINTER(addr),\n                             (GCompareDataFunc)__patcher_compare_address, NULL);\n\n    return true;\n}\n\nZ_PRIVATE bool __patcher_patch_certain_address(Patcher *p, addr_t addr,\n                                               uint8_t inst_size) {\n    // XXX: one address cannot be set as certain twice (except for the ones\n    // which are revoked for adjusting bridges)\n    if (z_addr_dict_exist(p->certain_addresses, addr)) {\n        return false;\n    }\n    z_trace(\"certain patch: %#lx\", addr);\n\n    // step (1). set certain_addresses\n    z_addr_dict_set(p->certain_addresses, addr, inst_size);\n\n    // step (2). patch underlying binary\n    z_patcher_unsafe_patch(p, addr, 1, z_x64_gen_invalid(1), NULL);\n\n    // step (3). update certain_patches and uncertain_patches\n    z_addr_dict_set(p->certain_patches, addr, true);\n    GSequenceIter *iter =\n        g_sequence_lookup(p->uncertain_patches, GSIZE_TO_POINTER(addr),\n                          (GCompareDataFunc)__patcher_compare_address, NULL);\n    if (iter) {\n        g_sequence_remove(iter);\n    }\n\n    return true;\n}\n\nZ_PRIVATE void __patcher_bfs_certain_addresses(Patcher *p, addr_t addr) {\n    // step (0). a quick check of whether addr is already known\n    if (z_addr_dict_exist(p->certain_addresses, addr)) {\n        return;\n    }\n\n    Disassembler *d = p->disassembler;\n    addr_t text_addr = p->text_addr;\n    size_t text_size = p->text_size;\n\n    // step (1). BFS to find all certain addresses\n    GQueue *queue = g_queue_new();\n    g_queue_push_tail(queue, GSIZE_TO_POINTER(addr));\n\n    while (!g_queue_is_empty(queue)) {\n        // step (3.1). pop from queue and get basic information\n        addr_t cur_addr = (addr_t)g_queue_pop_head(queue);\n\n        // step (3.2). update certain_addresses (true means it is an instruction\n        // boundary, otherwise false)\n        if (z_addr_dict_exist(p->certain_addresses, cur_addr)) {\n            // XXX: there are two cases of duplicate updating:\n            //  a: we push the same instruction into the queue twice\n            //  b: there is an overlapping instruction caused by *LOCK* prefix\n            // The other two assertions have the same situation.\n            assert(z_addr_dict_get(p->certain_addresses, cur_addr) ||\n                   (z_addr_dict_get(p->certain_addresses, cur_addr - 1) &&\n                    z_disassembler_get_superset_disasm(d, cur_addr - 1)\n                            ->detail->x86.prefix[0] == X86_PREFIX_LOCK));\n            continue;\n        }\n\n        cs_insn *cur_inst = z_disassembler_get_superset_disasm(d, cur_addr);\n        assert(cur_inst);\n        z_trace(\"find a certain address \" CS_SHOW_INST(cur_inst));\n\n        for (int i = 0; i < cur_inst->size; i++) {\n            if (z_addr_dict_exist(p->certain_addresses, cur_addr + i)) {\n                // XXX: avoid rewriting the instruction boundary\n                assert(i == 1 &&\n                       z_addr_dict_get(p->certain_addresses, cur_addr + i) &&\n                       cur_inst->detail->x86.prefix[0] == X86_PREFIX_LOCK);\n                break;\n            }\n            __patcher_patch_certain_address(p, cur_addr + i,\n                                            (i == 0 ? cur_inst->size : 0));\n\n            // update pdisasm here\n            if (i == 0) {\n                z_diassembler_update_prob_disasm(d, cur_addr + i, true);\n            } else if (i == 1 &&\n                       cur_inst->detail->x86.prefix[0] == X86_PREFIX_LOCK) {\n                // XXX: we make it conservative, as we are not sure whether\n                // cur_addr + i will be used as another instruction.\n                //\n                // do nothing\n            } else {\n                z_diassembler_update_prob_disasm(d, cur_addr + i, false);\n            }\n        }\n\n        // step (3.3). check successors\n        Iter(addr_t, succ_addrs);\n        z_iter_init_from_buf(succ_addrs,\n                             z_disassembler_get_all_successors(d, cur_addr));\n        while (!z_iter_is_empty(succ_addrs)) {\n            addr_t succ_addr = *(z_iter_next(succ_addrs));\n\n            // ignore the one which is not in .text\n            if (succ_addr < text_addr || succ_addr >= text_addr + text_size) {\n                continue;\n            }\n\n            if (z_addr_dict_exist(p->certain_addresses, succ_addr)) {\n                assert(z_addr_dict_get(p->certain_addresses, succ_addr) ||\n                       (z_addr_dict_get(p->certain_addresses, succ_addr - 1) &&\n                        z_disassembler_get_superset_disasm(d, succ_addr - 1)\n                                ->detail->x86.prefix[0] == X86_PREFIX_LOCK));\n                continue;\n            }\n\n            g_queue_push_tail(queue, GSIZE_TO_POINTER(succ_addr));\n        }\n        z_iter_destroy(succ_addrs);\n    }\n\n    // step (2). free queue\n    g_queue_free(queue);\n}\n\n#ifdef CONSERVATIVE_PATCH\nZ_PRIVATE void __patcher_patch_all_F(Patcher *p) {\n    Disassembler *d = p->disassembler;\n    ELF *e = z_binary_get_elf(p->binary);\n\n    addr_t text_addr = p->text_addr;\n    size_t text_size = p->text_size;\n\n    // we first patch call/cjmp/jmp (at least 5 bytes)\n    for (addr_t addr = text_addr; addr < text_addr + text_size; addr++) {\n        if (z_disassembler_get_prob_disasm(d, addr) < PATCH_THRESHOLD) {\n            goto NEXT_ADDR;\n        }\n\n        cs_insn *inst = z_disassembler_get_superset_disasm(d, addr);\n        assert(inst);\n\n        if (!z_capstone_is_call(inst) && !z_capstone_is_ret(inst) &&\n            !z_capstone_is_cjmp(inst) && !z_capstone_is_jmp(inst)) {\n            goto NEXT_ADDR;\n        }\n\n        // check RET without number\n        if (z_capstone_is_ret(inst) && inst->detail->x86.op_count) {\n            goto NEXT_ADDR;\n        }\n\n        addr_t end_addr = addr + inst->size;\n        addr_t cur_addr = addr;\n\n        // guarantee at least 5 bytes\n        while (end_addr - cur_addr < 5) {\n            Iter(addr_t, pred_addrs);\n            z_iter_init_from_buf(\n                pred_addrs,\n                z_disassembler_get_direct_predecessors(d, cur_addr));\n\n            bool found = false;\n            addr_t pred_addr = INVALID_ADDR;\n\n            while (!z_iter_is_empty(pred_addrs)) {\n                addr_t pred_addr_ = *(z_iter_next(pred_addrs));\n\n                // check the operand is not single-byte-length\n                cs_insn *pred_inst_ =\n                    z_disassembler_get_superset_disasm(d, pred_addr_);\n                if (!pred_inst_) {\n                    continue;\n                }\n                cs_detail *pred_detail_ = pred_inst_->detail;\n                if (pred_detail_->x86.op_count >= 1) {\n                    if (pred_detail_->x86.operands[0].size == 1) {\n                        continue;\n                    }\n                }\n\n                // check probability\n                if (z_disassembler_get_prob_disasm(d, pred_addr_) <\n                    PATCH_THRESHOLD) {\n                    continue;\n                }\n\n                // multiple valid predecessors\n                if (found) {\n                    goto NEXT_ADDR;\n                }\n\n                found = true;\n                pred_addr = pred_addr_;\n            }\n\n            if (!found) {\n                goto NEXT_ADDR;\n            }\n\n            cs_insn *pred_inst =\n                z_disassembler_get_superset_disasm(d, pred_addr);\n\n            if (z_capstone_is_call(pred_inst) || z_capstone_is_ret(pred_inst) ||\n                z_capstone_is_cjmp(pred_inst) || z_capstone_is_jmp(pred_inst) ||\n                pred_addr + pred_inst->size != cur_addr) {\n                goto NEXT_ADDR;\n            }\n\n            cur_addr = pred_addr;\n        }\n\n        // TODO: advanced patching\n        // XXX: advanced patching is not that necessary for now, as the error\n        // diagnosis can help find such erroneous patchings\n\n        // check no prior patchpoints are call/cjmp/jmp\n        // Iter(addr_t, occ_addrs);\n        // z_iter_init_from_buf(occ_addrs,\n        //                      z_disassembler_get_occluded_addrs(d, cur_addr));\n        // while (!z_iter_is_empty(occ_addrs)) {\n        //     addr_t occ_addr = *(z_iter_next(occ_addrs));\n        //     if (occ_addr >= cur_addr) {\n        //         continue;\n        //     }\n        //     cs_insn *occ_inst = z_disassembler_get_superset_disasm(d,\n        //     occ_addr); assert(occ_inst); if (z_capstone_is_call(occ_inst) ||\n        //     z_capstone_is_cjmp(occ_inst) ||\n        //         z_capstone_is_jmp(occ_inst)) {\n        //         goto NEXT_ADDR;\n        //     }\n        // }\n\n        __patcher_patch_uncertain_address(p, cur_addr);\n\n    NEXT_ADDR:\n        continue;\n    }\n\n    // we then patch returan address for normal call and plt call\n    GQueue *bfs = g_queue_new();\n    for (addr_t addr = text_addr; addr < text_addr + text_size; addr++) {\n        double128_t addr_prob = z_disassembler_get_prob_disasm(d, addr);\n        if (addr_prob < PATCH_THRESHOLD) {\n            continue;\n        }\n\n        cs_insn *inst = z_disassembler_get_superset_disasm(d, addr);\n        assert(inst);\n\n        if (!z_capstone_is_call(inst)) {\n            continue;\n        }\n\n        cs_detail *detail = inst->detail;\n        if ((detail->x86.op_count != 1) ||\n            (detail->x86.operands[0].type != X86_OP_IMM)) {\n            continue;\n        }\n\n        addr_t callee_addr = detail->x86.operands[0].imm;\n        if (!z_elf_get_plt_info(e, callee_addr) &&\n            (callee_addr < text_addr || callee_addr >= text_addr + text_size)) {\n            continue;\n        }\n\n        addr_t ret_addr = addr + inst->size;\n\n        if (!z_elf_get_plt_info(e, callee_addr)) {\n            g_queue_push_tail(bfs, GSIZE_TO_POINTER(ret_addr));\n            size_t bfs_n = 0;\n            bool valid = false;\n\n            while (!g_queue_is_empty(bfs)) {\n                addr_t cur_addr = (addr_t)g_queue_pop_head(bfs);\n                if (z_disassembler_get_prob_disasm(d, cur_addr) >=\n                    PATCH_THRESHOLD) {\n                    valid = true;\n                    break;\n                }\n\n                Iter(addr_t, succ_addrs);\n                z_iter_init_from_buf(\n                    succ_addrs,\n                    z_disassembler_get_direct_successors(d, cur_addr));\n\n                while (!z_iter_is_empty(succ_addrs)) {\n                    addr_t succ_addr = *(z_iter_next(succ_addrs));\n                    if ((bfs_n++) < PATCH_RET_DEPTH) {\n                        g_queue_push_tail(bfs, GSIZE_TO_POINTER(succ_addr));\n                    }\n                }\n\n                if (bfs_n >= PATCH_RET_DEPTH) {\n                    break;\n                }\n            }\n            g_queue_clear(bfs);\n\n            if (!valid) {\n                continue;\n            }\n        } else {\n            double128_t ret_P = z_disassembler_get_prob_disasm(d, ret_addr);\n            if (copysignl(1.0, ret_P) < 0.0) {\n                continue;\n            }\n        }\n\n        __patcher_patch_uncertain_address(p, ret_addr);\n    }\n}\n#else\nZ_PRIVATE void __patcher_patch_all_F(Patcher *p) {\n    Disassembler *d = p->disassembler;\n\n    addr_t text_addr = p->text_addr;\n    size_t text_size = p->text_size;\n\n    // step (1). we first find all potential uncertain patch points including\n    // all call/cjmp/jmp/ret instruction and the ret_addr of any call\n    // instruction.\n    if (!p->potential_uncertain_addresses) {\n        for (addr_t addr = text_addr; addr < text_addr + text_size; addr++) {\n            cs_insn *inst = z_disassembler_get_superset_disasm(d, addr);\n            if (!inst) {\n                continue;\n            }\n\n            // TODO: patch some predecessors to reduce the number of delayed\n            // bridges\n\n            if (z_capstone_is_ret(inst) || z_capstone_is_cjmp(inst) ||\n                z_capstone_is_jmp(inst)) {\n                p->potential_uncertain_addresses = g_list_prepend(\n                    p->potential_uncertain_addresses, GSIZE_TO_POINTER(addr));\n                continue;\n            }\n\n            if (z_capstone_is_call(inst)) {\n                p->potential_uncertain_addresses = g_list_prepend(\n                    p->potential_uncertain_addresses, GSIZE_TO_POINTER(addr));\n\n                // TODO: leverage non-return analysis to improve here\n                addr_t ret_addr = addr + inst->size;\n                if (z_disassembler_get_superset_disasm(d, ret_addr)) {\n                    // XXX: we use -ret_addr to indicate it is a return address\n                    addr_t negative_addr = (addr_t)(-(int64_t)ret_addr);\n                    p->potential_uncertain_addresses =\n                        g_list_prepend(p->potential_uncertain_addresses,\n                                       GSIZE_TO_POINTER(negative_addr));\n                }\n            }\n        }\n    }\n\n    // step (2). apply patches\n    {\n        GList *l = p->potential_uncertain_addresses;\n        while (l != NULL) {\n            GList *next = l->next;\n\n            // step (2.1) get address and threshold_p\n            addr_t addr = INVALID_ADDR;\n            double128_t threshold_p = 1.0;\n\n            int64_t addr_r = (int64_t)l->data;\n            if (addr_r >= 0) {\n                addr = (addr_t)addr_r;\n                threshold_p = PATCH_THRESHOLD;\n            } else {\n                addr = (addr_t)(-addr_r);\n                threshold_p = PATCH_THRESHOLD_FOR_RETADDR;\n            }\n\n            // step (2.2). patch the ones which have high probabilities and\n            // which are still uncertain\n            if (z_addr_dict_exist(p->certain_addresses, addr)) {\n                // addr is certain to be code currently, which means it can be\n                // remove from the uncertain patch list\n                p->potential_uncertain_addresses =\n                    g_list_delete_link(p->potential_uncertain_addresses, l);\n            } else {\n                if (z_disassembler_get_prob_disasm(d, addr) > threshold_p) {\n                    __patcher_patch_uncertain_address(p, addr);\n                }\n            }\n\n            // step (2.3). goto next\n            l = next;\n        }\n    }\n}\n#endif\n\nZ_PRIVATE void __patcher_patch_all_S(Patcher *p) {\n    addr_t text_addr = p->text_addr;\n    size_t text_size = p->text_size;\n\n    Disassembler *d = p->disassembler;\n\n    addr_t cur_addr = text_addr;\n    while (cur_addr < text_addr + text_size) {\n        if (z_disassembler_get_prob_disasm(d, cur_addr) < PATCH_THRESHOLD) {\n            cur_addr += 1;\n            continue;\n        }\n\n        cs_insn *cur_inst = z_disassembler_get_superset_disasm(d, cur_addr);\n        assert(cur_inst);\n        z_trace(\"handle instruction: \" CS_SHOW_INST(cur_inst));\n\n        // TODO: handle the overlapping instruction introduced by *LOCK* prefix\n        size_t i = 0;\n        do {\n            if (z_disassembler_get_prob_disasm(d, cur_addr) < PATCH_THRESHOLD) {\n                EXITME(\"invalid address for simple pdisasm \" CS_SHOW_INST(\n                    cur_inst));\n            }\n\n            __patcher_patch_certain_address(p, cur_addr,\n                                            (i == 0 ? cur_inst->size : 0));\n\n            cur_addr += 1;\n            i += 1;\n        } while (i < cur_inst->size);\n    }\n}\n\nZ_API void z_patcher_describe(Patcher *p) {\n    if (p->s_iter || p->e_iter) {\n        EXITME(\"cannot make requests when delta debugging mode is enable\");\n    }\n\n    // first do patching\n    z_patcher_initially_patch(p);\n\n    Disassembler *d = p->disassembler;\n    addr_t text_addr = p->text_addr;\n    size_t text_size = p->text_size;\n\n    z_sayf(\"%-7s%-25s%-25s%-25s%-25s%-25s%-8s%-60s%-5s%s\\n\", \"status\",\n           \"inst hint\", \"inst lost\", \"data hint\", \"D\", \"P\", \"SCC\", \"inst\",\n           \"size\", \" succs\");\n\n    Buffer *patchpoints = z_buffer_create(NULL, 0);\n\n    for (addr_t addr = text_addr; addr < text_addr + text_size; addr++) {\n        cs_insn *inst = NULL;\n        uint32_t scc_id = 0;\n        double128_t inst_hint = NAN;\n        double128_t inst_lost = NAN;\n        double128_t data_hint = NAN;\n        double128_t D = NAN;\n        double128_t P = NAN;\n\n        z_disassembler_get_prob_disasm_internal(d, addr, &inst, &scc_id,\n                                                &inst_hint, &inst_lost,\n                                                &data_hint, &D, &P);\n\n        const char *status = \"\";\n        PPType pp_type = z_patcher_check_patchpoint(p, addr);\n        if (pp_type != PP_INVALID) {\n            if (pp_type == PP_CERTAIN) {\n                status = \"CC\";\n            } else if (pp_type == PP_UNCERTAIN) {\n                status = \"UC\";\n            } else if (pp_type == PP_BRIDGE) {\n                status = \"BC\";\n            }\n            z_buffer_append_raw(patchpoints, (uint8_t *)&addr, sizeof(addr));\n        }\n\n        if (!isnan(data_hint) && !isinf(data_hint) &&\n            data_hint > 10000000000000000000.0) {\n            z_sayf(\"%-7s%-25.12Lf%-25.2Lf%-25Le%-25.12Lf%+-25.12Lf\", status,\n                   inst_hint, inst_lost, data_hint, D, P);\n        } else {\n            z_sayf(\"%-7s%-25.12Lf%-25.2Lf%-25.2Lf%-25.12Lf%+-25.12Lf\", status,\n                   inst_hint, inst_lost, data_hint, D, P);\n        }\n        if (inst) {\n            z_sayf(\"%-8d\", scc_id);\n            const char *inst_str = z_alloc_printf(CS_SHOW_INST(inst));\n            z_sayf(\"%-60s%-5d\", inst_str, inst->size);\n            z_free((void *)inst_str);\n            Iter(addr_t, succ_addrs);\n            z_iter_init_from_buf(succ_addrs,\n                                 z_disassembler_get_all_successors(d, addr));\n            while (!z_iter_is_empty(succ_addrs)) {\n                z_sayf(\" {%#lx}\", *(z_iter_next(succ_addrs)));\n            }\n            z_sayf(\"\\n\");\n        } else {\n            z_sayf(\"%-8d(%#lx:\\tinvalid)\\n\", scc_id, addr);\n        }\n    }\n\n    z_buffer_write_file(patchpoints, \"patchpoints.log\");\n    z_buffer_destroy(patchpoints);\n}\n\nZ_API Patcher *z_patcher_create(Disassembler *d, RewritingOptArgs *opts) {\n    Patcher *p = STRUCT_ALLOC(Patcher);\n\n    p->opts = opts;\n\n    p->disassembler = d;\n    p->binary = z_disassembler_get_binary(d);\n\n    p->pdisasm_enable = z_disassembler_fully_support_prob_disasm(d);\n\n    p->elf = z_binary_get_elf(p->binary);\n    Elf64_Shdr *text = z_elf_get_shdr_text(p->elf);\n    p->text_addr = text->sh_addr;\n    p->text_size = text->sh_size;\n    p->text_ptr = z_elf_vaddr2ptr(p->elf, p->text_addr);\n    p->text_backup = NULL;\n\n    z_addr_dict_init(p->certain_addresses, p->text_addr, p->text_size);\n\n    z_addr_dict_init(p->certain_patches, p->text_addr, p->text_size);\n    p->uncertain_patches = g_sequence_new(NULL);\n    p->bridges = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL,\n                                       (GDestroyNotify)(&z_free));\n\n    p->potential_uncertain_addresses = NULL;\n\n    p->s_iter = NULL;\n    p->e_iter = NULL;\n\n    p->patched_bridges = 0;\n    p->delayed_bridges = 0;\n    p->resolved_bridges = 0;\n    p->adjusted_bridges = 0;\n\n    return p;\n}\n\nZ_API void z_patcher_destroy(Patcher *p) {\n    z_addr_dict_destroy(p->certain_addresses);\n    z_addr_dict_destroy(p->certain_patches);\n    g_sequence_free(p->uncertain_patches);\n\n    g_hash_table_destroy(p->bridges);\n\n    z_rptr_destroy(p->text_ptr);\n\n    if (p->text_backup) {\n        z_free(p->text_backup);\n    }\n\n    if (p->potential_uncertain_addresses) {\n        g_list_free(p->potential_uncertain_addresses);\n    }\n\n    z_free(p);\n}\n\nZ_API void z_patcher_initially_patch(Patcher *p) {\n    assert(p != NULL);\n    if (p->s_iter || p->e_iter) {\n        EXITME(\"cannot do initial patch in delta debugging mode\");\n    }\n\n    // backup .text\n    if (p->text_backup) {\n        EXITME(\"backed up .text before initial patching\");\n    }\n    p->text_backup = z_alloc(p->text_size, sizeof(uint8_t));\n    z_rptr_memcpy(p->text_backup, p->text_ptr, p->text_size);\n\n    // do prob-disassemble first\n    z_disassembler_prob_disasm(p->disassembler);\n\n    // fill all patch candidates as HLT (0xf4) or ILLEGAL INSTRUCTION\n    if (!p->pdisasm_enable) {\n        __patcher_patch_all_S(p);\n    } else {\n        __patcher_patch_all_F(p);\n    }\n}\n\nZ_API PPType z_patcher_check_patchpoint(Patcher *p, addr_t addr) {\n    if (p->s_iter || p->e_iter) {\n        EXITME(\"cannot make requests when delta debugging mode is enable\");\n    }\n\n#ifdef BINARY_SEARCH_DEBUG_REWRITER\n    z_warn(\n        \"when debuging rewriter, real crashes may cause unintentional \"\n        \"behaviors\");\n#endif\n\n    // step (0). check whether addr is in .text (some real crash points are in\n    // the shadow code)\n    if (addr < p->text_addr || addr >= p->text_addr + p->text_size) {\n        return PP_INVALID;\n    }\n\n    // step (1). check certain patches\n    // TODO: the overlapping *LOCK* instruction may cause problems\n    if (z_addr_dict_exist(p->certain_patches, addr) &&\n        z_addr_dict_get(p->certain_addresses, addr)) {\n        return PP_CERTAIN;\n    }\n\n    // step (2). check uncertain patches\n    GSequenceIter *iter =\n        g_sequence_lookup(p->uncertain_patches, GSIZE_TO_POINTER(addr),\n                          (GCompareDataFunc)__patcher_compare_address, NULL);\n    if (iter) {\n        return PP_UNCERTAIN;\n    }\n\n    // step (3). check bridge\n    if (g_hash_table_lookup(p->bridges, GSIZE_TO_POINTER(addr))) {\n        return PP_BRIDGE;\n    }\n\n    return PP_INVALID;\n}\n\n// TODO: BINARY_SEARCH_DEBUG_XXX may cause bugs for the following new code\n//\n// TODO: it is a basic jump instruction patching algorithm (w/ auto fix and\n// delayed patching) currently, we may leverage E9Patch tech in the future\n//\n// XXX: following is a typical bridge layout (before and after patching), where\n// symbol \"|\" denotes an instruction boundary.\n//\n//                      Bytes: B B B B B B B B B B B B B B B B B B B B B B B\n//  ---------------------------------------------------------------------------\n//            Before patching:\n//                             |---|-|-----|-------|---|-----|-----|-------|\n//\n//  ---------------------------------------------------------------------------\n//             After patching:\n//                             |*******|???|-------|---|-----|-----|-------|\n//\n//         overlapping inst A:     |---------------------|\n//         overlapping inst B:       |-|\n//\n//                jump values: J J J J J\n//            patching values:           P P P P P P P P P P P P P P P P P P\n//\n//              bridge points: X   X X  X  X       X   X   X\n//       left certain patches:                               C C C C C C C C\n//\n//   bridge-related addresses: A A A A A A A A A A A A A A A\n//\nZ_API void z_patcher_build_bridge(Patcher *p, addr_t ori_addr,\n                                  addr_t shadow_addr, bool is_real) {\n    if (p->s_iter || p->e_iter) {\n        EXITME(\"cannot build bridge in delta debugging mode\");\n    }\n\n    Disassembler *d = p->disassembler;\n\n#ifdef BINARY_SEARCH_DEBUG_REWRITER\n    // avoid infinite loop\n    ELF *e = z_binary_get_elf(p->binary);\n    if (ori_addr == shadow_addr) {\n        cs_insn *inst = z_disassembler_get_superset_disasm(d, ori_addr);\n        z_elf_write(e, ori_addr, inst->size, inst->bytes);\n        return;\n    }\n#endif\n\n    // step (0). check ori_addr range\n    if (ori_addr < p->text_addr || ori_addr >= p->text_addr + p->text_size) {\n        EXITME(\"invalid address for bridge: %#lx\", ori_addr);\n    }\n\n    // step (1). update certain_addresses\n    __patcher_bfs_certain_addresses(p, ori_addr);\n\n    // step (2). check whether there is a bridge already built on current addr\n    BridgePoint *ori_bp = (BridgePoint *)g_hash_table_lookup(\n        p->bridges, GSIZE_TO_POINTER(ori_addr));\n    if (ori_bp) {\n        // It is possible when the address is regarded as external crashpoint\n        // and then regarded as retaddr.\n        // Additionally, note that even if this is a fake crashpoint, it still\n        // cannot be a non-leading PP_BRIDGE (i.e., not the starting point of a\n        // bridge), as ori_addr should have been adjusted.\n        if (ori_bp->bridge_addr != ori_addr) {\n            EXITME(\"strange overlapped bridge detected: %#lx / %#lx\", ori_addr,\n                   ori_bp->bridge_addr);\n        }\n        return;\n    }\n    if (!ori_bp && !z_addr_dict_exist(p->certain_patches, ori_addr)) {\n        // TODO: remove the following is_real checking when confirming it is\n        // useless.\n        if (!is_real) {\n            // XXX: it is possible that a fake bridge, which is not triggered by\n            // a control flow crash, is added on code for another delayed\n            // bridge.\n            // XXX: a very typical case for this branch is, when pdisasm is\n            // fully enabled:\n            //  1. For an unsafe crashpoint A, we resolved this unsafety by\n            //  adding a new crashpoint B.\n            //  2. Crashpoint B was triggered, but it is still unsafe and cannot\n            //  be resolved. So we delayed it.\n            //  3. Both A and B are logged. But later, when applying the log, B\n            //  is first applied.\n\n            // XXX: Above comments may be out-of-date. By applying the new way\n            // of logging crashpoints, the aforementioned case seems to be\n            // impossible to happend.\n            EXITME(\"invalid fake bridge address: %#lx\", ori_addr);\n            return;\n        }\n        EXITME(\"invalid bridge address: %#lx\", ori_addr);\n    }\n\n    // step (3). declare some important variables for futher operations\n    bool safe_patch = true;\n    bool bridge_patched = false;\n\n    addr_t bridge_sources[35];  // the longest x64 inst is 15-bytes (5 + 15 * 2)\n    addr_t bridge_max_addr = ori_addr;\n    GQueue *bridge_queue = g_queue_new();\n\n    size_t ori_size = z_addr_dict_get(p->certain_addresses, ori_addr);\n    if (!ori_size) {\n        EXITME(\"the address of a bridge should be an instruction boundary\");\n    }\n\n    // the real address of the patched jump instruction\n    addr_t jmp_addr = ori_addr;\n\n    // We will try use all the addresses in [ori_addr, ori_addr + ori_size) as\n    // the starting point of the jump instruction, so that we do not delay too\n    // many bridges.\n    // XXX: the overlapping *LOCK* instruction may cause some troubles.\n    do {\n        // initize some local variables first\n        // XXX: the safe_patch should be initized as true, because we haven't\n        // tested the new jmp_addr.\n        safe_patch = true;\n\n        bridge_max_addr = ori_addr;\n        memset(bridge_sources, 0, sizeof(bridge_sources));\n\n        // patch nop\n        if (jmp_addr != ori_addr) {\n            if (!bridge_patched) {\n                EXITME(\"the bridge much be applied in this case\");\n            }\n            z_patcher_unsafe_patch(p, ori_addr, jmp_addr - ori_addr,\n                                   z_x64_gen_nop(jmp_addr - ori_addr), NULL);\n        }\n\n        // step (4). pre-patch bridge and additionally check whether current\n        // patch is valid (for overlapping instructions).\n\n        // step (4.0). check whether the new occupied byte is certain_patches\n        if (jmp_addr == ori_addr) {\n            // all first 5 bytes (a jmp instruction) need to be certain\n            // patches\n            for (size_t i = 0; i < 5; i++) {\n                if (!z_addr_dict_exist(p->certain_patches, ori_addr + i)) {\n                    z_info(\n                        \"an unsafe bridge patching caused by no enough certain \"\n                        \"patches, try to resolve it... \"\n                        \"(failed address %#lx, based on bridge address %#lx)\",\n                        ori_addr + i, ori_addr);\n                    safe_patch = false;\n                    goto TRY_TO_PATCH_DONE;\n                }\n            }\n        } else {\n            if (!z_addr_dict_exist(p->certain_patches, jmp_addr + 4)) {\n                // XXX: it means all next jmp_addrs will be invalid\n                z_info(\n                    \"an unsafe bridge patching caused by no enough certain \"\n                    \"patches, try to resolve it... \"\n                    \"(failed address %#lx, based on bridge address %#lx)\",\n                    jmp_addr + 4, ori_addr);\n                safe_patch = false;\n                goto TRY_TO_PATCH_DONE;\n            }\n        }\n\n        // step (4.1). pre-patch bridge (and revoke certain patches).\n        {\n            bridge_patched = true;\n            KS_ASM_JMP(jmp_addr, shadow_addr);\n            z_patcher_unsafe_patch(p, jmp_addr, ks_size, ks_encode, NULL);\n            assert(ks_size == 5);\n\n            // revoke patchpoints of PP_CERTAIN\n            // XXX: note that the uncertain patchpoints have already be replaced\n            // by certain ones in step (1).\n            if (jmp_addr == ori_addr) {\n                for (size_t off = 0; off < 5; off++) {\n                    z_addr_dict_remove(p->certain_patches, jmp_addr + off);\n                }\n            } else {\n                // for the jmp_addr other than ori_addr, we only need to remove\n                // the last byte of the patched jmp instruction\n                z_addr_dict_remove(p->certain_patches, jmp_addr + 4);\n            }\n        }\n\n        // step (4.2). additionally check whether current patch is valid (for\n        // overlapping instructions)\n        {\n            // step (4.2.0). set up bridge starting point\n            bridge_sources[0] = ori_addr;\n\n            // XXX: as jmp_addr is inside the original bridge instruction, it\n            // cannot be a crashpoint.\n            /* bridge_sources[jmp_addr - ori_addr] = jmp_addr; */\n\n            // XXX: The first element is the target address and the second\n            // element is the source address.\n            g_queue_clear(bridge_queue);\n\n            // step (4.2.1). insert the sources of overlapping instruction\n            for (size_t off = 1; off < 5; off++) {\n                if (z_addr_dict_get(p->certain_addresses, jmp_addr + off)) {\n                    g_queue_push_tail(bridge_queue,\n                                      GSIZE_TO_POINTER(jmp_addr + off));\n                    g_queue_push_tail(bridge_queue,\n                                      GSIZE_TO_POINTER(jmp_addr + off));\n                }\n            }\n\n            // step (4.2.2). validate all possible overlapping instructions\n            while (!g_queue_is_empty(bridge_queue)) {\n                addr_t cur_addr = (addr_t)g_queue_pop_head(bridge_queue);\n                addr_t src_addr = (addr_t)g_queue_pop_head(bridge_queue);\n\n                size_t cur_off = cur_addr - ori_addr;\n\n                z_rptr_inc(p->text_ptr, uint8_t, cur_addr - p->text_addr);\n                CS_DISASM(p->text_ptr, cur_addr, 1);\n                z_rptr_reset(p->text_ptr);\n\n                // update bridge information\n                {\n                    if (cur_addr > bridge_max_addr) {\n                        bridge_max_addr = cur_addr;\n                    }\n                    if (!bridge_sources[cur_off] ||\n                        src_addr < bridge_sources[cur_off]) {\n                        bridge_sources[cur_off] = src_addr;\n                    }\n                }\n\n                // invalid instruction (nice!)\n                if (cs_count == 0) {\n                    continue;\n                }\n\n                // TODO: handle control flow transfer instruction (e.g., set\n                // unsafe_patch once any control flow transfer instruction is\n                // involved)\n                if (z_capstone_is_ret(cs_inst) || z_capstone_is_cjmp(cs_inst) ||\n                    z_capstone_is_jmp(cs_inst) || z_capstone_is_call(cs_inst)) {\n                    z_info(\n                        \"find an unsafe patch caused an inner jump, try next \"\n                        \"jmp_addr... (current bridge address %#lx and jmp addr \"\n                        \"%#lx)\",\n                        ori_addr, jmp_addr);\n                    z_info(\"current failed jmp inst: \" CS_SHOW_INST(cs_inst));\n                    safe_patch = false;\n                    break;\n                }\n\n                // check whether the successor is still in the bridge\n                addr_t next_addr = cur_addr + cs_inst->size;\n                size_t next_off = cur_off + cs_inst->size;\n                if (next_addr < jmp_addr + 5) {\n                    g_queue_push_tail(bridge_queue,\n                                      GSIZE_TO_POINTER(next_addr));\n                    g_queue_push_tail(bridge_queue, GSIZE_TO_POINTER(src_addr));\n                    continue;\n                }\n\n                // check whether the successor is a certain patch\n                if (z_addr_dict_exist(p->certain_patches, next_addr)) {\n                    // additionally handle the next instruction\n                    if (next_addr > bridge_max_addr) {\n                        bridge_max_addr = next_addr;\n                    }\n                    if (!bridge_sources[next_off] ||\n                        src_addr < bridge_sources[next_off]) {\n                        bridge_sources[next_off] = src_addr;\n                    }\n                    continue;\n                }\n\n                z_info(\n                    \"find an unsafe bridge patching without a certain ending, \"\n                    \"try next jmp_addr... (failed address %#lx, based on \"\n                    \"bridge address %#lx and jmp addr %#lx)\",\n                    next_addr, ori_addr, jmp_addr);\n\n                safe_patch = false;\n                break;\n            }\n\n            if (!safe_patch) {\n                // XXX: current !safe_patch means this jmp_addr is unsafe\n                goto NEXT_JMP_ADDR;\n            }\n        }\n\n        // step (4.3). check all affected addresses are in certain_patches.\n        for (addr_t cur_addr = jmp_addr + 5; cur_addr <= bridge_max_addr;\n             cur_addr++) {\n            if (!z_addr_dict_exist(p->certain_patches, cur_addr)) {\n                safe_patch = false;\n                goto NEXT_JMP_ADDR;\n            }\n        }\n\n        // step (4.4) find a safe patch\n        if (!safe_patch) {\n            EXITME(\"only safe patch can go into here\");\n        }\n        goto TRY_TO_PATCH_DONE;\n\n    NEXT_JMP_ADDR:\n        jmp_addr += 1;\n    } while (jmp_addr < ori_addr + ori_size);\n\nTRY_TO_PATCH_DONE:\n    g_queue_free(bridge_queue);\n\n    // step (5). if it is a safe patch, update bridge information\n    if (safe_patch) {\n        if (jmp_addr == ori_addr + ori_size) {\n            EXITME(\"invalid jmp_addr\");\n        }\n\n        z_info(\"successfully patch at address %#lx @ %#lx\", jmp_addr, ori_addr);\n\n        for (addr_t cur_addr = ori_addr; cur_addr <= bridge_max_addr;\n             cur_addr++) {\n            assert(\n                !g_hash_table_lookup(p->bridges, GSIZE_TO_POINTER(cur_addr)));\n            size_t off = cur_addr - ori_addr;\n\n            // XXX: remember to revoke certain_patches\n            if (z_addr_dict_exist(p->certain_patches, cur_addr)) {\n                z_addr_dict_remove(p->certain_patches, cur_addr);\n            }\n\n            // first check whether it is a patch-influenced detection point\n            if (bridge_sources[off]) {\n                BridgePoint *bp = z_alloc(1, sizeof(BridgePoint));\n                bp->bridge_addr = ori_addr;\n                bp->jump_addr = jmp_addr;\n                bp->source_addr = bridge_sources[off];\n                bp->max_addr = bridge_max_addr;\n\n                g_hash_table_insert(p->bridges, GSIZE_TO_POINTER(cur_addr),\n                                    (gpointer)bp);\n                continue;\n            }\n\n            // actually, all affected instruction boudnaries in jmp patching\n            // shoud be handled before\n            assert(!(cur_addr < jmp_addr + 5 &&\n                     z_addr_dict_get(p->certain_addresses, cur_addr)));\n\n            // then check it is an inst boundary before the patched jmp inst\n            if (cur_addr >= jmp_addr + 5 &&\n                z_addr_dict_get(p->certain_addresses, cur_addr)) {\n                BridgePoint *bp = z_alloc(1, sizeof(BridgePoint));\n                bp->bridge_addr = ori_addr;\n                bp->jump_addr = jmp_addr;\n                bp->source_addr = cur_addr;\n                bp->max_addr = bridge_max_addr;\n\n                g_hash_table_insert(p->bridges, GSIZE_TO_POINTER(cur_addr),\n                                    (gpointer)bp);\n                continue;\n            }\n        }\n\n        p->patched_bridges += 1;\n        return;\n    }\n\n    // step (6). for unsafe patches, we need first revoke the patched bridge\n    if (bridge_patched) {\n        // XXX: all bytes before jmp_addr + 5, which are patched as bridge (jmp)\n        // and nop, werer origianlly certain patches. So we can safely reset\n        // them as certain patches.\n        size_t n = jmp_addr + 5 - ori_addr;\n        z_patcher_unsafe_patch(p, ori_addr, n, z_x64_gen_invalid(n), NULL);\n\n        for (size_t i = 0; i < n; i++) {\n            z_addr_dict_set(p->certain_patches, ori_addr + i, true);\n        }\n    }\n\n    // step (7). for unsafe patches, we try to resolve it\n    // XXX: note that we can only resolve such unsafe patches when pdisasm is\n    // fully supported, because only uncertain patches, which do not exist when\n    // pidasm is not fully supported, can help fix the unsafe patches.\n    if (p->pdisasm_enable) {\n        bool new_uncertain_patch = false;\n\n        // XXX: the first element is the target address, the second is the depth\n        GQueue *queue = g_queue_new();\n\n        // step (7.1). find all possible uncertain precedessor patches\n        g_queue_push_tail(queue, GSIZE_TO_POINTER(ori_addr));\n        g_queue_push_tail(queue, GSIZE_TO_POINTER(0));\n\n        while (!g_queue_is_empty(queue)) {\n            addr_t cur_addr = (addr_t)g_queue_pop_head(queue);\n            size_t depth = (size_t)g_queue_pop_head(queue);\n\n            if (depth > BRIDGE_PRE_DEPTH) {\n                continue;\n            }\n\n            // get predecessors\n            Iter(addr_t, pred_addrs);\n            z_iter_init_from_buf(\n                pred_addrs, z_disassembler_get_all_predecessors(d, cur_addr));\n\n            while (!z_iter_is_empty(pred_addrs)) {\n                // pred_addr must in .text (it may be incomplete when\n                // pre-superset diaasm is not enable)\n                addr_t pred_addr = *(z_iter_next(pred_addrs));\n\n                // check prob\n                if (z_disassembler_get_prob_disasm(d, pred_addr) <\n                    PATCH_THRESHOLD) {\n                    continue;\n                }\n\n                // there are some cases where the following predicate is false:\n                //  case (1). pred_addr is in certain_addresses\n                //  case (2). pred_addr already be patched as uncertain patches\n                //      case (2.a). pred_addr is patched by this BFS\n                //      case (2.b). pred_addr is patched by others\n                if (!__patcher_patch_uncertain_address(p, pred_addr)) {\n                    continue;\n                }\n\n                // TODO: decide whether this new uncertain patch should be added\n                // into the list of potential_uncertain_addresses\n                new_uncertain_patch = true;\n                z_info(\"resolve the unsafe patch by patching %#lx\", pred_addr);\n                g_queue_push_tail(queue, GSIZE_TO_POINTER(pred_addr));\n                g_queue_push_tail(queue, GSIZE_TO_POINTER(depth + 1));\n            }\n\n            z_iter_destroy(pred_addrs);\n        }\n\n        g_queue_free(queue);\n\n        // step (7.2) return if we can resolve it by the next execution\n        if (new_uncertain_patch) {\n            p->resolved_bridges += 1;\n            return;\n        }\n    }\n\n    // step (8). if we cannot resolve it, we delay the patches\n    // XXX: avoid touch other patch points\n    z_info(\"fail to resolve the unsafe patch, let's delay it: %#lx\", ori_addr);\n    {\n        z_rptr_inc(p->text_ptr, uint8_t, ori_addr - p->text_addr);\n        addr_t cur_addr = ori_addr;\n\n        while (z_addr_dict_exist(p->certain_addresses, cur_addr) &&\n               z_addr_dict_exist(p->certain_patches, cur_addr)) {\n            assert(z_addr_dict_get(p->certain_addresses, cur_addr));\n\n            cs_insn *cur_inst = z_disassembler_get_superset_disasm(d, cur_addr);\n            assert(cur_inst);\n\n            z_rptr_memcpy(p->text_ptr, cur_inst->bytes, cur_inst->size);\n\n            for (size_t i = 0; i < cur_inst->size; i++) {\n                // XXX: in this case, cur_addr + i belongs to neighter bridges\n                // nor certain_patches, but it belongs to certain_addresses. It\n                // is a special case for delayed bridges.\n                z_addr_dict_remove(p->certain_patches, cur_addr + i);\n            }\n\n            // we end at terminator (e.g., ret) or call\n            if (z_capstone_is_terminator(cur_inst) ||\n                z_capstone_is_call(cur_inst)) {\n                break;\n            }\n\n            cur_addr += cur_inst->size;\n            z_rptr_inc(p->text_ptr, uint8_t, cur_inst->size);\n        }\n\n        z_rptr_reset(p->text_ptr);\n\n        p->delayed_bridges += 1;\n    }\n\n    return;\n}\n\nZ_API void z_patcher_bridge_stats(Patcher *p) {\n    z_info(\"number of patched bridges : %d\", p->patched_bridges);\n    z_info(\"number of delayed bridges : %d\", p->delayed_bridges);\n    z_info(\"number of resolved bridges: %d\", p->resolved_bridges);\n    z_info(\"number of adjusted bridges: %d\", p->adjusted_bridges);\n}\n\nZ_API addr_t z_patcher_adjust_bridge_address(Patcher *p, addr_t addr) {\n    if (p->s_iter || p->e_iter) {\n        EXITME(\"cannot adjust bridge in delta debugging mode\");\n    }\n\n    BridgePoint *bp = g_hash_table_lookup(p->bridges, GSIZE_TO_POINTER(addr));\n\n    // case (1). this is not a bridge point, and we do nothing.\n    if (!bp) {\n        return addr;\n    }\n\n    // case (2). this is the bridge starting point, and we do nothing too.\n    if (bp->bridge_addr == addr) {\n        return addr;\n    }\n\n    // it is invalid that jump_addr == addr at here (note that currently addr is\n    // not the bridge point).\n    if (bp->jump_addr == addr) {\n        EXITME(\"internal jump point cannot be a crash point\");\n    }\n\n    // case (3). this crash is caused by an overlapping instruction. We need to\n    // revoke this bridge patching.\n    addr_t bridge_addr = bp->bridge_addr;\n    addr_t jump_addr = bp->jump_addr;\n    addr_t source_addr = bp->source_addr;\n    addr_t max_addr = bp->max_addr;\n    z_info(\"detect a solvable bridge overlapping: %#lx / %#lx\", addr,\n           bridge_addr);\n\n    // step (1). revoke the tail part of bridge (after source_addr), if\n    // necessary\n    if (source_addr < jump_addr + 5) {\n        size_t tail_size = jump_addr + 5 - source_addr;\n        z_patcher_unsafe_patch(p, source_addr, tail_size,\n                               z_x64_gen_invalid(tail_size), NULL);\n    }\n\n    // step (2). revoke the head part of bridge (before source_addr)\n    {\n        assert(source_addr > bridge_addr);\n        size_t head_size = source_addr - bridge_addr;\n\n        // XXX: these addresses are also the special cases for delayed bridges.\n        // Again, them do not belong to certain_patches and bridges, but belong\n        // to certain_addresses.\n        z_patcher_unsafe_patch(p, bridge_addr, head_size,\n                               p->text_backup + (bridge_addr - p->text_addr),\n                               NULL);\n    }\n\n    // step (3). remove all associated bridge information and reset some as\n    // certain patches\n    {\n        for (addr_t cur_addr = bridge_addr; cur_addr <= max_addr; cur_addr++) {\n            if (cur_addr >= source_addr) {\n                z_addr_dict_set(p->certain_patches, cur_addr, true);\n            }\n\n            g_hash_table_remove(p->bridges, GSIZE_TO_POINTER(cur_addr));\n        }\n    }\n\n    p->adjusted_bridges += 1;\n\n    return source_addr;\n}\n\nZ_API size_t z_patcher_uncertain_patches_n(Patcher *p) {\n    if (p->s_iter || p->e_iter) {\n        EXITME(\"cannot make requests when delta debugging mode is enable\");\n    }\n\n    return g_sequence_get_length(p->uncertain_patches);\n}\n\nZ_API void z_patcher_self_correction_start(Patcher *p) {\n    if (p->s_iter || p->e_iter) {\n        EXITME(\"self correction procedure already started\");\n    }\n    if (!p->pdisasm_enable) {\n        EXITME(\"self correction procedure only works when pdisasm is enable\");\n    }\n\n    p->s_iter = g_sequence_get_begin_iter(p->uncertain_patches);\n    p->e_iter = g_sequence_get_end_iter(p->uncertain_patches);\n}\n\nZ_API void z_patcher_self_correction_end(Patcher *p) {\n    if (!p->s_iter || !p->e_iter) {\n        EXITME(\"self correction procedure did not start\");\n    }\n    if (!p->pdisasm_enable) {\n        EXITME(\"self correction procedure only works when pdisasm is enable\");\n    }\n\n    Disassembler *d = p->disassembler;\n\n    // step (1). repair the buggy rewriting if any\n    // XXX: note that we only need to do online re-patching when there are some\n    // rewritting errors.\n    if (p->s_iter != p->e_iter) {\n        // step (1.1) disable such uncertain patches and update pdisasm\n        GSequenceIter *iter = p->s_iter;\n        while (iter != p->e_iter) {\n            addr_t err_addr = (addr_t)g_sequence_get(iter);\n            z_info(\"repair rewriting error: %#lx\", err_addr);\n\n            __patcher_flip_uncertain_patch(p, err_addr, false);\n            z_diassembler_update_prob_disasm(d, err_addr, false);\n\n            iter = g_sequence_iter_next(iter);\n        }\n\n        // step (1.2). rerun pdisasm\n        assert(p->pdisasm_enable);\n        z_disassembler_prob_disasm(d);\n\n        // step (1.3). remove all uncertain patches and re-patch\n        // XXX: note that current all the uncertain patches are disabled\n        GSequenceIter *s_iter = g_sequence_get_begin_iter(p->uncertain_patches);\n        GSequenceIter *e_iter = g_sequence_get_end_iter(p->uncertain_patches);\n        g_sequence_remove_range(s_iter, e_iter);\n        __patcher_patch_all_F(p);\n    } else {\n        // XXX: it means there is no rewritting error. We just need to re-enable\n        // all uncertain patches.\n        GSequenceIter *iter = g_sequence_get_begin_iter(p->uncertain_patches);\n        while (!g_sequence_iter_is_end(iter)) {\n            __patcher_flip_uncertain_patch(p, (addr_t)g_sequence_get(iter),\n                                           true);\n            iter = g_sequence_iter_next(iter);\n        }\n    }\n\n    // step (2). disable the s_iter and e_iter flags\n    p->s_iter = NULL;\n    p->e_iter = NULL;\n}\n\nZ_API void z_patcher_flip_uncertain_patches(Patcher *p, bool is_s_iter,\n                                            int64_t off) {\n    if (!p->s_iter || !p->e_iter) {\n        EXITME(\"self correction procedure did not start\");\n    }\n    if (!p->pdisasm_enable) {\n        EXITME(\"self correction procedure only works when pdisasm is enable\");\n    }\n    if (!off) {\n        return;\n    }\n\n    // step (1). prepart basic infomation\n    GSequenceIter *iter = (is_s_iter ? p->s_iter : p->e_iter);\n    GSequenceIter *(*change_iter)(GSequenceIter *) =\n        ((off > 0) ? &g_sequence_iter_next : &g_sequence_iter_prev);\n    size_t steps = ((off < 0) ? (size_t)(-off) : (size_t)off);\n\n    // is_enable | is_s_iter | off > 0\n    // ----------+-----------+----------------\n    // True      | True      | False (off < 0)\n    // True      | False     | True  (off > 0)\n    // False     | True      | True  (off > 0)\n    // False     | False     | False (off < 0)\n    bool is_enable = (!!is_s_iter) ^ (!!(off > 0));\n\n    // step (2). flip uncertain patches\n    bool do_before_change = (off > 0);\n    for (size_t i = 0; i < steps; i++) {\n        if (do_before_change) {\n            __patcher_flip_uncertain_patch(p, (addr_t)g_sequence_get(iter),\n                                           is_enable);\n        }\n\n        GSequenceIter *tmp = (*change_iter)(iter);\n        assert(tmp != iter);\n        iter = tmp;\n\n        if (!do_before_change) {\n            __patcher_flip_uncertain_patch(p, (addr_t)g_sequence_get(iter),\n                                           is_enable);\n        }\n    }\n\n    // step (3). update s_iter/e_iter\n    if (is_s_iter) {\n        p->s_iter = iter;\n    } else {\n        p->e_iter = iter;\n    }\n    assert(p->s_iter && p->e_iter);\n\n    // it is also possible that s_iter == e_iter\n    if (!g_sequence_iter_is_end(p->e_iter) &&\n        __patcher_compare_address((addr_t)g_sequence_get(p->s_iter),\n                                  (addr_t)g_sequence_get(p->e_iter),\n                                  NULL) > 0) {\n        EXITME(\"invalid s_iter and e_iter: %#lx - %#lx\",\n               (addr_t)g_sequence_get(p->s_iter),\n               (addr_t)g_sequence_get(p->e_iter));\n    }\n}\n\n// XXX: real patch function\nZ_API void z_patcher_unsafe_patch(Patcher *p, addr_t addr, size_t size,\n                                  const uint8_t *buf, uint8_t *obuf) {\n    if (z_likely(addr >= p->text_addr && addr < p->text_addr + p->text_size)) {\n        // XXX: hot branch\n        z_rptr_inc(p->text_ptr, uint8_t, addr - p->text_addr);\n        if (obuf) {\n            z_rptr_memcpy(obuf, p->text_ptr, size);\n        }\n        z_rptr_memcpy(p->text_ptr, buf, size);\n        z_rptr_reset(p->text_ptr);\n    } else {\n        if (obuf) {\n            z_elf_read(p->elf, addr, size, obuf);\n        }\n        z_elf_write(p->elf, addr, size, buf);\n    }\n}\n"
  },
  {
    "path": "src/patcher.h",
    "content": "/*\n * patcher.h\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n#ifndef __PATCHER_H\n#define __PATCHER_H\n\n#include \"address_dictionary.h\"\n#include \"binary.h\"\n#include \"buffer.h\"\n#include \"config.h\"\n#include \"crs_config.h\"\n#include \"disassembler.h\"\n#include \"elf_.h\"\n#include \"sys_optarg.h\"\n\n#include <gmodule.h>\n\n// XXX: note that patchpoint has priority of:\n//  PP_BRIDEG > PP_CERTAIN > PP_UNCERTAIN\ntypedef enum patchpoint_type {\n    PP_INVALID = 0UL,\n    PP_UNCERTAIN = 1UL,\n    PP_CERTAIN = 2UL,\n    PP_BRIDGE = 3UL,\n} PPType;\n\n// XXX: some fields of Patcher are essential to understand the underlying logic:\n//\n//  * certain_addresses: all the address which are *certainly* sure to be code\n//                       bytes. The values of this dictionary have two types:\n//                       the instruction size for each instruction boundary, and\n//                       zero for the others.\n//\n//  * uncertain_patches: all the patches which are *uncertainly* sure. Most of\n//                       them are patched based on the calculated probability.\n//\n//  *   certain_patches: all the patches which are *certainly* sure. The only\n//                       patched value of this type is invalid inst. This kind\n//                       of patches excludes the ones serve for bridge\n//                       overlapping detection. It also exlucdes those code\n//                       which was patched and has been revoked for delayed\n//                       bridges.\n//\n//  *           bridges: all potential patch points which can help detect bridge\n//                       overlapping.\n//\n//\n// There are some relations between aforementioned fields.\n//\n//      keys(uncertain_patches).intersaction(keys(certain_addresses)) = EmptySet\n//\n//        keys(uncertain_patches).intersaction(keys(certain_patches)) = EmptySet\n//                keys(uncertain_patches).intersaction(keys(bridges)) = EmptySet\n//                  keys(certain_patches).intersaction(keys(bridges)) = EmptySet\n//\n//                              keys(certain_patches) in keys(certain_addresses)\n//                                      keys(bridges) in keys(certain_addresses)\n//\n//      keys(certain_addresses)\n//    -  (keys(certain_patches) + keys(bridges))\n//    =  set(address which was patched and has been revoked for delayed bridges)\n//\n// Only uncertain_patches are involved in the delta debugging procedure.\nSTRUCT(Patcher, {\n    Binary *binary;\n    Disassembler *disassembler;\n\n    bool pdisasm_enable;\n\n    // ELF\n    ELF *elf;\n\n    // .text info (for efficient patching)\n    addr_t text_addr;\n    size_t text_size;\n    Rptr *text_ptr;        // pointer to the shared .text section\n    uint8_t *text_backup;  // original data before any patching\n\n    // addresses which are certainly known as code\n    //  for instruction boundary, the value is the length of instruction\n    //  for other places, the value is zero\n    AddrDict(uint8_t, certain_addresses);\n\n    // patch information\n    GSequence *uncertain_patches;\n    AddrDictFast(bool, certain_patches);\n    GHashTable *bridges;  // bridges detection points\n\n    // potential addresses for uncertain patches (only used when pdisasm is\n    // enable and CONSERVATIVE_PATCH is disable)\n    GList *potential_uncertain_addresses;\n\n    // delta debugging info\n    GSequenceIter *s_iter;\n    GSequenceIter *e_iter;\n\n    // statistic information\n    size_t patched_bridges;\n    size_t delayed_bridges;\n    size_t resolved_bridges;\n    size_t adjusted_bridges;\n\n    // rewriting optargs\n    RewritingOptArgs *opts;\n});\n\n/*\n * Create a patcher\n */\nZ_API Patcher *z_patcher_create(Disassembler *d, RewritingOptArgs *opts);\n\n/*\n * Destroy a patcher\n */\nZ_API void z_patcher_destroy(Patcher *p);\n\n/*\n * Patcher show details\n */\nZ_API void z_patcher_describe(Patcher *p);\n\n/*\n * Initial patching for the instructions whose probabilities are high enough\n */\nZ_API void z_patcher_initially_patch(Patcher *p);\n\n/*\n * Check whether address is a patched crash points (patch point)\n */\nZ_API PPType z_patcher_check_patchpoint(Patcher *p, addr_t addr);\n\n/*\n * Patch address as a jump bridge.\n *\n * The parameter is_real means the bridge is triggered by a crash during\n * execution, and vice versa (e.g., logged crashpoint and CP_RETADDR).\n */\nZ_API void z_patcher_build_bridge(Patcher *p, addr_t ori_addr,\n                                  addr_t shadow_addr, bool is_real);\n\n/*\n * Adjust the address of a given bridge. This function may also change current\n * patching.\n */\nZ_API addr_t z_patcher_adjust_bridge_address(Patcher *p, addr_t addr);\n\n/*\n * Show bridge stat\n */\nZ_API void z_patcher_bridge_stats(Patcher *p);\n\n/*\n * Show the number of uncertain patches\n */\nZ_API size_t z_patcher_uncertain_patches_n(Patcher *p);\n\n/*\n * Self correction starts\n */\nZ_API void z_patcher_self_correction_start(Patcher *p);\n\n/*\n * Self correction ends\n */\nZ_API void z_patcher_self_correction_end(Patcher *p);\n\n/*\n * Enable or disable uncertain patches by moving s_iter/e_iter\n */\nZ_API void z_patcher_flip_uncertain_patches(Patcher *p, bool is_s_iter,\n                                            int64_t off);\n\n/*\n * Basic patching function: patch at the given address and return the original\n * value if obuf is not NULL.\n *\n * Note that this function is unsafe because it allows users to do their own\n * patches *without* changing the metadata (e.g., bridges) of the patcher.\n *\n * Only use it when you are sure your patches are safe. Any crash triggered by\n * patches from this function cannot be diagnosed and repaired.\n */\nZ_API void z_patcher_unsafe_patch(Patcher *p, addr_t addr, size_t size,\n                                  const uint8_t *buf, uint8_t *obuf);\n#endif\n"
  },
  {
    "path": "src/prob_disasm/prob_disasm_complete/dag.c",
    "content": "/*\n * dag.c\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n/*\n * Tarjan data\n */\ntypedef struct tarjan_info_t {\n    AddrDict(uint32_t, low);\n    AddrDict(uint32_t, dfn);\n    uint32_t addr_n;\n} TarjanInfo;\n\n/*\n * Tarjan algorithm to calculate SCC (return low[cur_addr])\n */\nZ_PRIVATE void __prob_disassembler_tarjan(ProbDisassembler *pd,\n                                          TarjanInfo *info, GQueue *stack,\n                                          GHashTable *in_stack,\n                                          addr_t cur_addr);\n\n/*\n * Bulid DAG using Tarjan algorithm\n */\nZ_PRIVATE void __prob_disassembler_build_dag(ProbDisassembler *pd);\n\nZ_PRIVATE void __prob_disassembler_tarjan(ProbDisassembler *pd,\n                                          TarjanInfo *info, GQueue *stack,\n                                          GHashTable *in_stack,\n                                          addr_t cur_addr) {\n    // step [0]. basic info\n    Disassembler *d = pd->base;\n\n    // step [1]. update low and dfn\n    z_addr_dict_set(info->low, cur_addr, info->addr_n);\n    z_addr_dict_set(info->dfn, cur_addr, info->addr_n);\n    info->addr_n++;\n\n    // step [2]. push into stack\n    g_queue_push_tail(stack, GSIZE_TO_POINTER(cur_addr));\n    g_hash_table_insert(in_stack, GSIZE_TO_POINTER(cur_addr),\n                        GSIZE_TO_POINTER(1));\n\n    // step [3]. get nexts\n    size_t n = 0;\n    addr_t *next_addrs = NULL;\n    if (!__prob_disassembler_get_propogate_successors(pd, cur_addr, &n,\n                                                      &next_addrs)) {\n        EXITME(\"invalid successors\");\n    }\n\n    // step [5]. main loop\n    for (size_t i = 0; i < n; i++) {\n        addr_t next_addr = next_addrs[i];\n\n        // step [5.1]. check whether next_addr is valid instruction\n        if (!z_disassembler_get_superset_disasm(d, next_addr)) {\n            continue;\n        }\n\n        // step [5.2]. for non-visited next_addr\n        if (!z_addr_dict_exist(info->low, next_addr)) {\n            assert(!z_addr_dict_exist(info->dfn, next_addr));\n            __prob_disassembler_tarjan(pd, info, stack, in_stack, next_addr);\n\n            uint32_t cur_low = z_addr_dict_get(info->low, cur_addr);\n            uint32_t next_low = z_addr_dict_get(info->low, next_addr);\n\n            if (next_low < cur_low) {\n                z_addr_dict_set(info->low, cur_addr, next_low);\n            }\n        } else if (g_hash_table_lookup(in_stack, GSIZE_TO_POINTER(next_addr))) {\n            uint32_t cur_low = z_addr_dict_get(info->low, cur_addr);\n            uint32_t next_dfn = z_addr_dict_get(info->dfn, next_addr);\n\n            if (next_dfn < cur_low) {\n                z_addr_dict_set(info->low, cur_addr, next_dfn);\n            }\n        }\n    }\n\n    // step [6]. get SCC\n    if (z_addr_dict_get(info->dfn, cur_addr) ==\n        z_addr_dict_get(info->low, cur_addr)) {\n        uint32_t scc_id = pd->scc_n++;\n        while (!g_queue_is_empty(stack)) {\n            addr_t poped_addr = (addr_t)g_queue_pop_tail(stack);\n            g_hash_table_remove(in_stack, GSIZE_TO_POINTER(poped_addr));\n\n            z_addr_dict_set(pd->addr2sccid, poped_addr, scc_id);\n\n            if (poped_addr == cur_addr) {\n                break;\n            }\n        }\n    }\n}\n\nZ_PRIVATE void __prob_disassembler_build_dag(ProbDisassembler *pd) {\n    /*\n     * step [0]. basic stuff\n     */\n    Disassembler *d = pd->base;\n    addr_t text_addr = pd->text_addr;\n    size_t text_size = pd->text_size;\n\n    /*\n     * step [1]. initialization members\n     */\n    z_addr_dict_init(pd->addr2sccid, pd->text_addr, pd->text_size);\n    pd->scc_n = 1;  // XXX: scc_id == 0 is reserved for invalid instructions\n\n    /*\n     * step [2]. use Tarjan to calculate SCC\n     */\n    {\n        TarjanInfo *info = z_alloc(1, sizeof(TarjanInfo));\n        info->addr_n = 0;\n        z_addr_dict_init(info->low, text_addr, text_size);\n        z_addr_dict_init(info->dfn, text_addr, text_size);\n\n        GQueue *stack = g_queue_new(); /* stack */\n        GHashTable *in_stack =         /* whehter addr is in stack */\n            g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL);\n\n        for (addr_t addr = text_addr; addr < text_addr + text_size; addr++) {\n            // check whether addr is handled\n            if (z_addr_dict_exist(pd->addr2sccid, addr)) {\n                assert(z_addr_dict_exist(info->low, addr));\n                assert(z_addr_dict_exist(info->dfn, addr));\n                continue;\n            }\n\n            // check cur_addr is valid\n            if (!z_disassembler_get_superset_disasm(d, addr)) {\n                z_addr_dict_set(pd->addr2sccid, addr, 0);\n                continue;\n            }\n\n            // do tarjan\n            __prob_disassembler_tarjan(pd, info, stack, in_stack, addr);\n\n            assert(g_queue_is_empty(stack));\n            assert(!g_hash_table_size(in_stack));\n        }\n\n        z_info(\"we found %d SCCs in the superset control flow graph\",\n               pd->scc_n);\n\n        // free memory\n        g_hash_table_destroy(in_stack);\n        g_queue_free(stack);\n        z_addr_dict_destroy(info->low);\n        z_addr_dict_destroy(info->dfn);\n        z_free(info);\n    }\n\n    /*\n     * step [3]. build DAG\n     */\n    z_addr_dict_init(pd->dag_succs, 0, pd->scc_n);\n    z_addr_dict_init(pd->dag_preds, 0, pd->scc_n);\n    z_addr_dict_init(pd->dag_dead, 0, pd->scc_n);\n\n    z_addr_dict_init(pd->dag_P, 0, pd->scc_n);\n\n    AddrDict(uint32_t, dag_preds_n); /* used for toposord */\n    z_addr_dict_init(dag_preds_n, 0, pd->scc_n);\n\n    {\n        // step [3.1]. init all necessary members\n        for (uint32_t scc_id = 0; scc_id < pd->scc_n; scc_id++) {\n            z_addr_dict_set(dag_preds_n, scc_id, 0);\n            z_addr_dict_set(pd->dag_succs, scc_id,\n                            g_hash_table_new_full(g_direct_hash, g_direct_equal,\n                                                  NULL, NULL));\n            z_addr_dict_set(pd->dag_preds, scc_id,\n                            g_hash_table_new_full(g_direct_hash, g_direct_equal,\n                                                  NULL, NULL));\n        }\n\n        // step [3.2]. construct DAG based on each address's information\n        for (addr_t addr = text_addr; addr < text_addr + text_size; addr++) {\n            // ignore invalid instructions\n            assert(z_addr_dict_exist(pd->addr2sccid, addr));\n            uint32_t scc_id = z_addr_dict_get(pd->addr2sccid, addr);\n            if (!scc_id) {\n                continue;\n            }\n\n            // get dag_succs\n            GHashTable *dag_succs = z_addr_dict_get(pd->dag_succs, scc_id);\n\n            // get succ_addrs\n            size_t n = 0;\n            addr_t *succ_addrs = NULL;\n            if (!__prob_disassembler_get_propogate_successors(pd, addr, &n,\n                                                              &succ_addrs)) {\n                EXITME(\"invalid successors\");\n            }\n\n            for (int i = 0; i < n; i++) {\n                addr_t succ_addr = succ_addrs[i];\n\n                // check succ_addr is in .text (we cannot know the outside info)\n                // XXX: OUTSIDE LOST already handles this\n                uint32_t succ_scc_id;\n                if (succ_addr < text_addr ||\n                    succ_addr >= text_addr + text_size) {\n                    continue;\n                }\n\n                assert(z_addr_dict_exist(pd->addr2sccid, succ_addr));\n                succ_scc_id = z_addr_dict_get(pd->addr2sccid, succ_addr);\n\n                // and not equal to scc_id\n                if (succ_scc_id == scc_id) {\n                    continue;\n                }\n\n                // check whether succ_scc is in dag_succs and insert if not\n                if (!g_hash_table_lookup(dag_succs,\n                                         GSIZE_TO_POINTER(succ_scc_id))) {\n                    // update dag_succs\n                    g_hash_table_insert(dag_succs,\n                                        GSIZE_TO_POINTER(succ_scc_id),\n                                        GSIZE_TO_POINTER(1));\n\n                    // update dag_preds_n\n                    z_addr_dict_set(\n                        dag_preds_n, succ_scc_id,\n                        z_addr_dict_get(dag_preds_n, succ_scc_id) + 1);\n\n                    // update dag_preds\n                    g_hash_table_insert(\n                        z_addr_dict_get(pd->dag_preds, succ_scc_id),\n                        GSIZE_TO_POINTER(scc_id), GSIZE_TO_POINTER(1));\n                }\n            }\n        }\n\n#ifdef DEBUG\n        /*\n         * step [3.3]. check the correctness of DAG\n         */\n        size_t edge_n = 0;\n        for (uint32_t scc_id = 0; scc_id < pd->scc_n; scc_id++) {\n            assert(z_addr_dict_exist(dag_preds_n, scc_id));\n            assert(z_addr_dict_exist(pd->dag_succs, scc_id));\n            assert(z_addr_dict_exist(pd->dag_preds, scc_id));\n\n            GHashTable *dag_succs = z_addr_dict_get(pd->dag_succs, scc_id);\n            GHashTable *dag_preds = z_addr_dict_get(pd->dag_preds, scc_id);\n\n            assert(z_addr_dict_get(dag_preds_n, scc_id) ==\n                   g_hash_table_size(dag_preds));\n\n            GList *list_dag_succs = g_hash_table_get_keys(dag_succs);\n            for (GList *l = list_dag_succs; l != NULL; l = l->next) {\n                edge_n++;\n                uint32_t succ_scc_id = (uint32_t)l->data;\n                assert(g_hash_table_lookup(\n                    z_addr_dict_get(pd->dag_preds, succ_scc_id),\n                    GSIZE_TO_POINTER(scc_id)));\n            }\n            g_list_free(list_dag_succs);\n        }\n        assert(edge_n);\n        z_info(\"there are %d edges in contructed DAG\", edge_n);\n#endif\n    }\n\n    /*\n     * step [4]. topo-sort\n     */\n    pd->topo = g_queue_new();\n    {\n        GQueue *queue = g_queue_new();\n\n        // first find all nodes without preds\n        for (uint32_t scc_id = 0; scc_id < pd->scc_n; scc_id++) {\n            if (!z_addr_dict_get(dag_preds_n, scc_id)) {\n                g_queue_push_tail(queue, GSIZE_TO_POINTER(scc_id));\n            }\n        }\n\n        // get topo\n        while (!g_queue_is_empty(queue)) {\n            uint32_t scc_id = (uint32_t)g_queue_pop_head(queue);\n            g_queue_push_tail(pd->topo, GSIZE_TO_POINTER(scc_id));\n\n            GHashTable *dag_succs = z_addr_dict_get(pd->dag_succs, scc_id);\n\n            GList *list_dag_succs = g_hash_table_get_keys(dag_succs);\n            for (GList *l = list_dag_succs; l != NULL; l = l->next) {\n                uint32_t succ_scc_id = (uint32_t)l->data;\n\n                assert(z_addr_dict_exist(dag_preds_n, succ_scc_id));\n                z_addr_dict_set(dag_preds_n, succ_scc_id,\n                                z_addr_dict_get(dag_preds_n, succ_scc_id) - 1);\n\n                if (!z_addr_dict_get(dag_preds_n, succ_scc_id)) {\n                    g_queue_push_tail(queue, GSIZE_TO_POINTER(succ_scc_id));\n                }\n            }\n            g_list_free(list_dag_succs);\n        }\n        assert(g_queue_get_length(pd->topo) == pd->scc_n);\n\n        g_queue_free(queue);\n    }\n    z_addr_dict_destroy(dag_preds_n);\n}\n"
  },
  {
    "path": "src/prob_disasm/prob_disasm_complete/hints.c",
    "content": "/*\n * hints.c\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n// TODO: it would be more convencing to dynamically update hints\n\n/*\n * Register info\n */\ntypedef struct reg_info_t {\n    GPRState gpr;\n    XMMState xmm;\n    YMMState ymm;\n    ZMMState zmm;\n} RegInfo;\n\n/*\n * Coolect hints from registers' use-def\n */\nZ_PRIVATE void __prob_disassembler_reg_hints_dfs(\n    ProbDisassembler *pd, GHashTable *seen,\n    Buffer *(*get_next)(UCFG_Analyzer *, addr_t),\n    void (*update_info)(ProbDisassembler *, addr_t, RegInfo *), addr_t cur_addr,\n    RegInfo *info, bool is_first_addr);\n\n/*\n * Data length threshold\n */\n#define STRING_LENGTH_THRESHOLD 6\n#define VALUE_LENGTH_THRESHOLD 4\n#define CONFIDENT_LENGTH_THRESHOLD 100\n\n/*\n * Code pattern distance\n */\n#define CMP_CJMP_DISTANCE 2\n#define ARG_CALL_DISTANCE 2\n\n/*\n * Collect control-flow-related hints\n */\nZ_PRIVATE void __prob_disassembler_collect_cf_hints(ProbDisassembler *pd);\n\n/*\n * Collect pop-ret hints\n */\nZ_PRIVATE void __prob_disassembler_collect_pop_ret_hints(ProbDisassembler *pd);\n\n/*\n * Collect cmp/test-cjmp hints\n */\nZ_PRIVATE void __prob_disassembler_collect_cmp_cjmp_hints(ProbDisassembler *pd);\n\n/*\n * Collect arg-call hints\n */\nZ_PRIVATE void __prob_disassembler_collect_arg_call_hints(ProbDisassembler *pd);\n\n/*\n * Collect register-related hints\n */\nZ_PRIVATE void __prob_disassembler_collect_reg_hints(ProbDisassembler *pd);\n\n/*\n * Collect string hints\n */\nZ_PRIVATE void __prob_disassembler_collect_str_hints(ProbDisassembler *pd);\n\n/*\n * Collect value hints\n */\nZ_PRIVATE void __prob_disassembler_collect_value_hints(ProbDisassembler *pd);\n\nZ_PRIVATE void __prob_disassembler_reg_hints_dfs(\n    ProbDisassembler *pd, GHashTable *seen,\n    Buffer *(*get_next)(UCFG_Analyzer *, addr_t),\n    void (*update_info)(ProbDisassembler *, addr_t, RegInfo *), addr_t cur_addr,\n    RegInfo *info, bool is_first_addr) {\n    Disassembler *d = pd->base;\n\n    // step [0]. if info in zero, we do not need to go deeper\n    if (!info->gpr && !info->xmm && !info->ymm && !info->zmm) {\n        return;\n    }\n\n    // step [1]. check cur_addr is valid\n    if (!z_disassembler_get_superset_disasm(d, cur_addr)) {\n        return;\n    }\n\n    // step [2]. get all necessary information\n    Iter(addr_t, next_addrs);\n    z_iter_init_from_buf(next_addrs, (*get_next)(d->ucfg_analyzer, cur_addr));\n\n    // step [3]. collect hints and update next info\n    RegInfo backup_info = *info;\n    if (!is_first_addr) {\n        (*update_info)(pd, cur_addr, info);\n    }\n\n    // step [4]. go deep\n    while (!z_iter_is_empty(next_addrs)) {\n        addr_t next_addr = *(z_iter_next(next_addrs));\n        // check seen\n        if (g_hash_table_lookup(seen, GSIZE_TO_POINTER(next_addr))) {\n            continue;\n        }\n        g_hash_table_insert(seen, GSIZE_TO_POINTER(next_addr),\n                            GSIZE_TO_POINTER(1));\n\n        // deep search\n        __prob_disassembler_reg_hints_dfs(pd, seen, get_next, update_info,\n                                          next_addr, info, false);\n    }\n\n    // step [5]. restore info\n    *info = backup_info;\n}\n\nZ_PRIVATE void __prob_disassembler_collect_cf_hints(ProbDisassembler *pd) {\n    // step [0]. create call_/jmp_ targets and other basic information\n    GHashTable *call_targets =\n        g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL,\n                              (GDestroyNotify)(&z_buffer_destroy));\n    GHashTable *jmp_targets =\n        g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL,\n                              (GDestroyNotify)(&z_buffer_destroy));\n\n    Disassembler *d = pd->base;\n\n    ELF *e = z_binary_get_elf(pd->binary);\n    addr_t text_addr = pd->text_addr;\n    size_t text_size = pd->text_size;\n\n    addr_t init_addr, fini_addr;\n    size_t init_size, fini_size;\n\n    if (z_elf_get_shdr_init(e)) {\n        init_addr = z_elf_get_shdr_init(e)->sh_addr;\n        init_size = z_elf_get_shdr_init(e)->sh_size;\n    } else {\n        // if we do not detect .init, we set it as .text\n        init_addr = text_addr;\n        init_size = text_size;\n    }\n\n    if (z_elf_get_shdr_fini(e)) {\n        fini_addr = z_elf_get_shdr_fini(e)->sh_addr;\n        fini_size = z_elf_get_shdr_fini(e)->sh_size;\n    } else {\n        // if we do not detect .fini, we set it as .text\n        fini_addr = text_addr;\n        fini_size = text_size;\n    }\n\n    size_t plt_n = z_elf_get_plt_n(e);\n\n    // step [2]. main loop to check all instruction\n    for (addr_t addr = text_addr; addr < text_addr + text_size; addr++) {\n        // step [2.1]. get corresponding instruction\n        cs_insn *inst = z_disassembler_get_superset_disasm(d, addr);\n        if (!inst) {\n            continue;\n        }\n\n        // step [2.2]. check the instruction only has one imm operand\n        cs_detail *detail = inst->detail;\n        if ((detail->x86.op_count != 1) ||\n            (detail->x86.operands[0].type != X86_OP_IMM)) {\n            continue;\n        }\n\n        // step [2.3]. handle different cf transfer instruction\n        addr_t target = detail->x86.operands[0].imm;\n\n#define __COLLECT_CF_TARGET(TYPE, plt_check, targets)                       \\\n    do {                                                                    \\\n        /* pre-check invalid prefix */                                      \\\n        if (*((uint32_t *)(inst->detail->x86.prefix))) {                    \\\n            z_trace(\"find invalid prefix: \" CS_SHOW_INST(inst));            \\\n            continue;                                                       \\\n        }                                                                   \\\n                                                                            \\\n        /* additional check for invalid prefix: HUG capstone */             \\\n        KS_ASM(inst->address, \"%s %s\", inst->mnemonic, inst->op_str);       \\\n        if (ks_size != inst->size) {                                        \\\n            z_trace(\"find invalid prefix: \" CS_SHOW_INST(inst));            \\\n            continue;                                                       \\\n        }                                                                   \\\n                                                                            \\\n        /* check PLT transfer */                                            \\\n        if (z_elf_get_plt_info(e, target)) {                                \\\n            /* for PLT transfer, we have further check */                   \\\n            if (plt_check) {                                                \\\n                z_trace(\"find PLT \" #TYPE \": \" CS_SHOW_INST(inst));         \\\n                __prob_disassembler_update_inst_hint(                       \\\n                    pd, addr, HINT(PLT_##TYPE, BASE_CF(inst) * plt_n));     \\\n            }                                                               \\\n            continue;                                                       \\\n        }                                                                   \\\n                                                                            \\\n        /* check outsider transfer */                                       \\\n        if ((target < text_addr || target >= text_addr + text_size) &&      \\\n            (target < init_addr || target >= init_addr + init_size) &&      \\\n            (target < fini_addr || target >= fini_addr + fini_size)) {      \\\n            z_trace(\"find outside \" #TYPE \": \" CS_SHOW_INST(inst));         \\\n            __prob_disassembler_update_inst_lost(                           \\\n                pd, addr, LOST(OUTSIDE_##TYPE, BASE_CF(inst) * text_size)); \\\n            continue;                                                       \\\n        }                                                                   \\\n                                                                            \\\n        /* check target is valid */                                         \\\n        if (!z_disassembler_get_superset_disasm(d, target)) {               \\\n            continue;                                                       \\\n        }                                                                   \\\n                                                                            \\\n        /* check it does not jump into its next instruction */              \\\n        if (target == inst->address + inst->size) {                         \\\n            continue;                                                       \\\n        }                                                                   \\\n                                                                            \\\n        /* maintain a relation from dst address to src address */           \\\n        Buffer *dst2src =                                                   \\\n            g_hash_table_lookup((targets), GSIZE_TO_POINTER(target));       \\\n        if (!dst2src) {                                                     \\\n            dst2src = z_buffer_create(NULL, 0);                             \\\n            g_hash_table_insert((targets), GSIZE_TO_POINTER(target),        \\\n                                (gpointer)(dst2src));                       \\\n        }                                                                   \\\n        z_buffer_append_raw(dst2src, (uint8_t *)&addr, sizeof(addr));       \\\n    } while (0)\n\n        if (z_capstone_is_call(inst)) {\n            __COLLECT_CF_TARGET(CALL, inst->size == 5, call_targets);\n        } else if (z_capstone_is_jmp(inst) || z_capstone_is_cjmp(inst)) {\n            __COLLECT_CF_TARGET(\n                JMP, (inst->size == 5 && z_capstone_is_jmp(inst)), jmp_targets);\n        }\n\n#undef __COLLECT_CF_TARGET\n    }\n\n    // step [3]. collect hints from converged calls\n    for (addr_t addr = text_addr; addr < text_addr + text_size; addr++) {\n        Buffer *callers_buf =\n            (Buffer *)g_hash_table_lookup(call_targets, GSIZE_TO_POINTER(addr));\n        if (!callers_buf) {\n            continue;\n        }\n\n        Iter(addr_t, callers);\n        z_iter_init_from_buf(callers, callers_buf);\n        assert(!z_iter_is_empty(callers));\n        if (z_iter_get_size(callers) == 1) {\n            continue;\n        }\n\n        while (!z_iter_is_empty(callers)) {\n            addr_t caller = *(z_iter_next(callers));\n\n            cs_insn *caller_inst =\n                z_disassembler_get_superset_disasm(d, caller);\n            assert(caller_inst);\n            __prob_disassembler_update_inst_hint(\n                pd, caller,\n                HINT(CONVERGED_CALL,\n                     BASE_CF(caller_inst) / (z_iter_get_size(callers) - 1)));\n        }\n    }\n    g_hash_table_destroy(call_targets);\n\n    // step [4]. collect hints from converged jumps and cross jumps\n    for (addr_t addr = text_addr; addr < text_addr + text_size; addr++) {\n        Buffer *jmp_sources_buf =\n            (Buffer *)g_hash_table_lookup(jmp_targets, GSIZE_TO_POINTER(addr));\n        if (!jmp_sources_buf) {\n            continue;\n        }\n\n        Iter(addr_t, jmp_sources);\n        z_iter_init_from_buf(jmp_sources, jmp_sources_buf);\n        assert(!z_iter_is_empty(jmp_sources));\n\n        // step [4.1]. collect hints from converged jumps\n        size_t jmp_sources_n = z_iter_get_size(jmp_sources);\n        if (jmp_sources_n > 1) {\n            while (!z_iter_is_empty(jmp_sources)) {\n                addr_t jmp_source = *(z_iter_next(jmp_sources));\n\n                cs_insn *jmp_source_inst =\n                    z_disassembler_get_superset_disasm(d, jmp_source);\n                assert(jmp_source_inst);\n                __prob_disassembler_update_inst_hint(\n                    pd, jmp_source,\n                    HINT(CONVERGED_JMP,\n                         BASE_CF(jmp_source_inst) / (jmp_sources_n - 1)));\n            }\n        }\n\n        // step [4.2]. collect hints from crossed jumps\n        assert(addr > 7);\n        // As the longest jump, which we will consider, is 7-bytes\n        for (size_t pred = addr - 7; pred < addr; pred++) {\n            // get predecessors\n            cs_insn *pred_inst = z_disassembler_get_superset_disasm(d, pred);\n            if (!pred_inst) {\n                goto NEXT_PRED;\n            }\n\n            // check cross\n            if (pred + pred_inst->size != addr) {\n                goto NEXT_PRED;\n            }\n\n            // check pred is jmp and cjmp\n            if (!z_capstone_is_jmp(pred_inst) &&\n                !z_capstone_is_cjmp(pred_inst)) {\n                goto NEXT_PRED;\n            }\n\n            // check pred's succs are valid\n            Iter(addr_t, pred_succs);\n            z_iter_init_from_buf(\n                pred_succs,\n                z_ucfg_analyzer_get_direct_successors(d->ucfg_analyzer, pred));\n\n            while (!z_iter_is_empty(pred_succs)) {\n                addr_t pred_succ = *(z_iter_next(pred_succs));\n                if (!z_disassembler_get_superset_disasm(d, pred_succ)) {\n                    goto NEXT_PRED;\n                }\n            }\n\n            // collect hints for pred, where we assume most crossed jump is only\n            // 1-byte\n            __prob_disassembler_update_inst_hint(\n                pd, pred, HINT(CROSSED_JMP, BASE_CF_RAW(1) / jmp_sources_n));\n\n            // collect hints for jump sources\n            z_iter_reset(jmp_sources);\n            while (!z_iter_is_empty(jmp_sources)) {\n                addr_t jmp_source = *(z_iter_next(jmp_sources));\n\n                z_trace(\"find crossed JMP: %#lx - %#lx\", pred, jmp_source);\n                cs_insn *jmp_source_inst =\n                    z_disassembler_get_superset_disasm(d, jmp_source);\n                assert(jmp_source_inst);\n                __prob_disassembler_update_inst_hint(\n                    pd, jmp_source,\n                    HINT(CROSSED_JMP,\n                         BASE_CF(jmp_source_inst) / jmp_sources_n));\n            }\n\n        NEXT_PRED:;\n        }\n    }\n    g_hash_table_destroy(jmp_targets);\n}\n\n/*\n * Functions for updating info.\n * Note that following two functions will only be used during dfs\n */\nZ_PRIVATE void __update_info_for_usedef_reg_hint(ProbDisassembler *pd,\n                                                 addr_t addr, RegInfo *info) {\n    Disassembler *d = pd->base;\n\n    RegState *rs = z_ucfg_analyzer_get_register_state(d->ucfg_analyzer, addr);\n    assert(rs);\n\n    if (rs->gpr_write_32_64 & info->gpr) {\n        __prob_disassembler_update_inst_hint(pd, addr,\n                                             HINT(USEDEF_GPR, BASE_REG));\n        info->gpr &= (~rs->gpr_write_32_64);\n    }\n\n#define __SSE_TEMPLATE(T)                                                     \\\n    do {                                                                      \\\n        if (rs->T##_write & info->T) {                                        \\\n            __prob_disassembler_update_inst_hint(pd, addr,                    \\\n                                                 HINT(USEDEF_SSE, BASE_REG)); \\\n            info->T &= (~rs->T##_write);                                      \\\n        }                                                                     \\\n    } while (0)\n\n    __SSE_TEMPLATE(xmm);\n    __SSE_TEMPLATE(ymm);\n    __SSE_TEMPLATE(zmm);\n\n#undef __SSE_TEMPLATE\n}\n\nZ_PRIVATE void __update_info_for_killed_reg_hint(ProbDisassembler *pd,\n                                                 addr_t addr, RegInfo *info) {\n    Disassembler *d = pd->base;\n\n    RegState *rs = z_ucfg_analyzer_get_register_state(d->ucfg_analyzer, addr);\n    assert(rs);\n\n    if (rs->gpr_write_32_64 & info->gpr) {\n        __prob_disassembler_update_inst_lost(pd, addr,\n                                             LOST(KILLED_GPR, BASE_REG));\n        info->gpr &= (~rs->gpr_write_32_64);\n    }\n    if (rs->gpr_read_32_64 & info->gpr) {\n        info->gpr &= (~rs->gpr_read_32_64);\n    }\n\n#define __SSE_TEMPLATE(T)                                                     \\\n    do {                                                                      \\\n        if (rs->T##_write & info->T) {                                        \\\n            __prob_disassembler_update_inst_lost(pd, addr,                    \\\n                                                 LOST(KILLED_SSE, BASE_REG)); \\\n            info->T &= (~rs->T##_write);                                      \\\n        }                                                                     \\\n        if (rs->T##_read & info->T) {                                         \\\n            info->T &= (~rs->T##_read);                                       \\\n        }                                                                     \\\n    } while (0)\n\n    __SSE_TEMPLATE(xmm);\n    __SSE_TEMPLATE(ymm);\n    __SSE_TEMPLATE(zmm);\n\n#undef __SSE_TEMPLATE\n}\n\nZ_PRIVATE void __prob_disassembler_collect_reg_hints(ProbDisassembler *pd) {\n    Disassembler *d = pd->base;\n    addr_t text_addr = pd->text_addr;\n    size_t text_size = pd->text_size;\n\n    GHashTable *seen =\n        g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL);\n    RegInfo info = {};\n\n    for (addr_t addr = text_addr; addr < text_addr + text_size; addr++) {\n        RegState *rs =\n            z_ucfg_analyzer_get_register_state(d->ucfg_analyzer, addr);\n        if (!rs) {\n            continue;\n        }\n\n        /*\n         * step [1]. get use-def hints\n         */\n        info.gpr = rs->gpr_read_32_64;\n        info.xmm = rs->xmm_read;\n        info.ymm = rs->ymm_read;\n        info.zmm = rs->zmm_read;\n\n        g_hash_table_remove_all(seen);\n        g_hash_table_insert(seen, GSIZE_TO_POINTER(addr), GSIZE_TO_POINTER(1));\n\n        __prob_disassembler_reg_hints_dfs(\n            pd, seen, &z_ucfg_analyzer_get_direct_predecessors,\n            &__update_info_for_usedef_reg_hint, addr, &info, true);\n\n        /*\n         * step [2]. get killed hints\n         */\n        info.gpr = rs->gpr_write_32_64 & (~rs->gpr_read_32_64);\n        info.xmm = rs->xmm_write & (~rs->xmm_read);\n        info.ymm = rs->ymm_write & (~rs->ymm_read);\n        info.zmm = rs->zmm_write & (~rs->zmm_read);\n\n        g_hash_table_remove_all(seen);\n        g_hash_table_insert(seen, GSIZE_TO_POINTER(addr), GSIZE_TO_POINTER(1));\n\n        __prob_disassembler_reg_hints_dfs(\n            pd, seen, &z_ucfg_analyzer_get_direct_predecessors,\n            &__update_info_for_killed_reg_hint, addr, &info, true);\n    }\n}\n\nZ_PRIVATE void __prob_disassembler_collect_pop_ret_hints(ProbDisassembler *pd) {\n    Disassembler *d = pd->base;\n    addr_t text_addr = pd->text_addr;\n    size_t text_size = pd->text_size;\n\n    for (addr_t addr = text_addr; addr < text_addr + text_size; addr++) {\n        cs_insn *inst = z_disassembler_get_superset_disasm(d, addr);\n        if (!inst) {\n            continue;\n        }\n\n        if (inst->id != X86_INS_POP) {\n            continue;\n        }\n\n        size_t pop_n = 0;\n        addr_t cur_addr = addr;\n        cs_insn *cur_inst = inst;\n        bool pop_ret = false;\n\n        while (true) {\n            pop_n += 1;\n            cur_addr += cur_inst->size;\n            cur_inst = z_disassembler_get_superset_disasm(d, cur_addr);\n\n            if (!cur_inst) {\n                break;\n            }\n            if (cur_inst->id == X86_INS_RET) {\n                pop_ret = true;\n                break;\n            }\n            if (cur_inst->id != X86_INS_POP) {\n                break;\n            }\n        }\n\n        if (!pop_ret) {\n            continue;\n        }\n\n        z_trace(\"find %d pop at %#lx\", pop_n, addr);\n        __prob_disassembler_update_inst_hint(pd, addr,\n                                             HINT(POP_RET, BASE_REG / pop_n));\n    }\n}\n\nZ_PRIVATE void __prob_disassembler_collect_str_hints(ProbDisassembler *pd) {\n    addr_t text_addr = pd->text_addr;\n    size_t text_size = pd->text_size;\n\n    ELF *e = z_binary_get_elf(pd->binary);\n    Rptr *text_ptr = z_elf_vaddr2ptr(e, text_addr);\n\n    // collect all string-like hints\n    addr_t prev_string = INVALID_ADDR;\n    addr_t prev_null = INVALID_ADDR;\n    for (addr_t addr = text_addr; addr < text_addr + text_size; addr++) {\n        uint8_t c = *(z_rptr_get_ptr(text_ptr, uint8_t));\n        if (!c) {\n            if (prev_string != INVALID_ADDR) {\n                // we ignore null during string scanning\n                prev_null = addr;\n            }\n        } else if (isprint(c)) {\n            if (prev_string == INVALID_ADDR) {\n                prev_string = addr;\n                prev_null = INVALID_ADDR;\n            }\n        } else {\n            if (prev_string != INVALID_ADDR && prev_null != INVALID_ADDR) {\n                assert(prev_null > prev_string);\n                size_t n = prev_null - prev_string;\n                if (n > STRING_LENGTH_THRESHOLD) {\n                    z_trace(\"find string starting from %#lx with %d bytes\",\n                            prev_string, n);\n                    double128_t hint;\n                    if (n < CONFIDENT_LENGTH_THRESHOLD) {\n                        hint = HINT(STRING, BASE_STRING(n));\n                    } else {\n                        hint = +INFINITY;\n                    }\n                    for (addr_t cur_addr = prev_string; cur_addr <= prev_null;\n                         cur_addr++) {\n                        __prob_disassembler_update_data_hint(pd, cur_addr,\n                                                             hint);\n                    }\n                }\n            }\n            prev_string = INVALID_ADDR;\n            prev_null = INVALID_ADDR;\n        }\n\n        z_rptr_inc(text_ptr, uint8_t, 1);\n    }\n\n    z_rptr_destroy(text_ptr);\n}\n\nZ_PRIVATE void __prob_disassembler_collect_value_hints(ProbDisassembler *pd) {\n/*\n * Macro to collect continuous numerical number:\n *      T: type (int16_t, int32_t, int64_t)\n *      B: bit offset of size (1, 2, 3)\n *      L: length threshold\n *      C: count zero and 0xff\n */\n#define __COLLECT_VALUE_HINTS(T, B, L, C)                                    \\\n    do {                                                                     \\\n        assert(sizeof(T) == (1 << B));                                       \\\n                                                                             \\\n        addr_t text_addr = pd->text_addr;                                    \\\n        size_t text_size = pd->text_size;                                    \\\n        double128_t threshold = __pow_in_4(0x100, (B));                      \\\n        z_trace(\"threshold: %Lf\", threshold);                                \\\n                                                                             \\\n        /* alignment */                                                      \\\n        text_size = BITS_ALIGN_FLOOR(text_addr + text_size, (B));            \\\n        text_addr = BITS_ALIGN_CELL(text_addr, (B));                         \\\n        text_size -= text_addr;                                              \\\n        z_trace(\"aligned range: [%#lx, %#lx]\", text_addr,                    \\\n                text_addr + text_size - 1);                                  \\\n        assert(!(text_addr % sizeof(T)));                                    \\\n        assert(!(text_size % sizeof(T)));                                    \\\n                                                                             \\\n        ELF *e = z_binary_get_elf(pd->binary);                               \\\n        Rptr *text_ptr = z_elf_vaddr2ptr(e, text_addr);                      \\\n                                                                             \\\n        /* collect continued likely numerical value */                       \\\n        addr_t numerical_addr = INVALID_ADDR;                                \\\n        double128_t numerical_val = 0.0;                                     \\\n        for (addr_t addr = text_addr; addr < text_addr + text_size;          \\\n             addr += sizeof(T)) {                                            \\\n            T val = *(z_rptr_get_ptr(text_ptr, T));                          \\\n            double128_t val_f = (double128_t)val;                            \\\n            size_t n = (addr - numerical_addr) >> (B);                       \\\n                                                                             \\\n            if (numerical_addr == INVALID_ADDR) {                            \\\n                /* the first value */                                        \\\n                numerical_addr = addr;                                       \\\n                numerical_val = val_f;                                       \\\n            } else if ((!(C)) && (val == 0 || val == -1)) {                  \\\n                /* we ignore 0 and 0xfff..ff. Hence, do nothing. */          \\\n            } else if (fabsl(numerical_val - val_f) < threshold) {           \\\n                /* valid numerical number */                                 \\\n                numerical_val =                                              \\\n                    (numerical_val / (n + 1)) * n + (val_f / (n + 1));       \\\n            } else {                                                         \\\n                if (n > (L)) {                                               \\\n                    z_trace(                                                 \\\n                        \"find %d-byte numerical array from %#lx with %d \"    \\\n                        \"elements (mean: %.2Lf)\",                            \\\n                        sizeof(T), numerical_addr, n, numerical_val);        \\\n                    double128_t hint;                                        \\\n                    if (n < CONFIDENT_LENGTH_THRESHOLD) {                    \\\n                        hint = HINT(VALUE,                                   \\\n                                    BASE_VALUE(1 << (B), threshold * 2, n)); \\\n                    } else {                                                 \\\n                        hint = +INFINITY;                                    \\\n                    }                                                        \\\n                    for (addr_t cur_addr = numerical_addr; cur_addr < addr;  \\\n                         cur_addr++) {                                       \\\n                        __prob_disassembler_update_data_hint(pd, cur_addr,   \\\n                                                             hint);          \\\n                    }                                                        \\\n                }                                                            \\\n                                                                             \\\n                numerical_addr = addr;                                       \\\n                numerical_val = val_f;                                       \\\n            }                                                                \\\n                                                                             \\\n            z_rptr_inc(text_ptr, T, 1);                                      \\\n        }                                                                    \\\n                                                                             \\\n        z_rptr_destroy(text_ptr);                                            \\\n    } while (0)\n\n    __COLLECT_VALUE_HINTS(int8_t, 0, VALUE_LENGTH_THRESHOLD << 2, true);\n    __COLLECT_VALUE_HINTS(int16_t, 1, VALUE_LENGTH_THRESHOLD << 2, false);\n    __COLLECT_VALUE_HINTS(int32_t, 2, VALUE_LENGTH_THRESHOLD << 1, false);\n    __COLLECT_VALUE_HINTS(int64_t, 3, VALUE_LENGTH_THRESHOLD << 0, false);\n\n#undef __COLLECT_VALUE_HINTS\n}\n\nZ_PRIVATE void __prob_disassembler_collect_cmp_cjmp_hints(\n    ProbDisassembler *pd) {\n    Disassembler *d = pd->base;\n    addr_t text_addr = pd->text_addr;\n    size_t text_size = pd->text_size;\n\n    for (addr_t addr = text_addr; addr < text_addr + text_size; addr++) {\n        cs_insn *inst = z_disassembler_get_superset_disasm(d, addr);\n\n        // check valid\n        if (!inst) {\n            continue;\n        }\n\n        // check cmp and test\n        if (inst->id != X86_INS_TEST && inst->id != X86_INS_CMP) {\n            continue;\n        }\n\n        // try to find a cjmp within CMP_CJMP_DISTANCE\n        bool found_cjmp = false;\n        addr_t cur_addr = addr;\n        cs_insn *cur_inst = inst;\n        Iter(addr_t, succ_addrs);\n\n        for (size_t i = 0; i < CMP_CJMP_DISTANCE; i++) {\n            z_iter_init_from_buf(\n                succ_addrs, z_disassembler_get_direct_successors(d, cur_addr));\n            if (z_iter_get_size(succ_addrs) != 1) {\n                break;\n            }\n\n            addr_t succ_addr = *(z_iter_next(succ_addrs));\n            if (succ_addr != cur_addr + cur_inst->size) {\n                break;\n            }\n\n            // switch into next address\n            cur_addr = succ_addr;\n            cur_inst = z_disassembler_get_superset_disasm(d, cur_addr);\n\n            if (!cur_inst) {\n                break;\n            }\n\n            if (z_capstone_is_cjmp(cur_inst)) {\n                found_cjmp = true;\n                break;\n            }\n        }\n\n        if (found_cjmp) {\n            z_trace(\"find cmp-cjmp pattern at %#lx - %#lx\", addr, cur_addr);\n            __prob_disassembler_update_inst_hint(\n                pd, addr, HINT(CMP_CJMP, __pow_in_4(BASE_INS, 2)));\n        }\n    }\n}\n\nZ_PRIVATE void __prob_disassembler_collect_arg_call_hints(\n    ProbDisassembler *pd) {\n    Disassembler *d = pd->base;\n    addr_t text_addr = pd->text_addr;\n    size_t text_size = pd->text_size;\n\n    for (addr_t addr = text_addr; addr < text_addr + text_size; addr++) {\n        cs_insn *inst = z_disassembler_get_superset_disasm(d, addr);\n\n        // check valid\n        if (!inst) {\n            continue;\n        }\n\n        // check mov\n        if (inst->id != X86_INS_MOV) {\n            continue;\n        }\n\n        // check the rdi and rsi\n        cs_detail *detail = inst->detail;\n        if (detail->x86.operands[0].type != X86_OP_REG) {\n            continue;\n        }\n        if (detail->x86.operands[0].reg != X86_REG_RDI &&\n            detail->x86.operands[0].reg != X86_REG_RSI) {\n            continue;\n        }\n\n        // try to find a call within ARG_CALL_DISTANCE\n        bool found_call = false;\n        addr_t cur_addr = addr;\n        cs_insn *cur_inst = inst;\n        Iter(addr_t, succ_addrs);\n\n        for (size_t i = 0; i < ARG_CALL_DISTANCE; i++) {\n            z_iter_init_from_buf(\n                succ_addrs, z_disassembler_get_direct_successors(d, cur_addr));\n            if (z_iter_get_size(succ_addrs) != 1) {\n                break;\n            }\n\n            addr_t succ_addr = *(z_iter_next(succ_addrs));\n            if (succ_addr != cur_addr + cur_inst->size) {\n                break;\n            }\n\n            // switch into next address\n            cur_addr = succ_addr;\n            cur_inst = z_disassembler_get_superset_disasm(d, cur_addr);\n\n            if (!cur_inst) {\n                break;\n            }\n\n            if (z_capstone_is_call(cur_inst)) {\n                found_call = true;\n                break;\n            }\n        }\n\n        if (found_call) {\n            z_trace(\"find arg-call pattern at %#lx - %#lx\", addr, cur_addr);\n            __prob_disassembler_update_inst_hint(\n                pd, addr, HINT(ARG_CALL, __pow_in_4(BASE_INS, 2)));\n        }\n    }\n}\n"
  },
  {
    "path": "src/prob_disasm/prob_disasm_complete/propagation.c",
    "content": "/*\n * propagation.c\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n/*\n * Propogate instruction hints\n */\nZ_PRIVATE void __prob_disassembler_propogate_inst_hints(ProbDisassembler *pd);\n\nZ_PRIVATE void __prob_disassembler_propogate_inst_hints(ProbDisassembler *pd) {\n    // step [0]. basic information\n    Disassembler *d = pd->base;\n    addr_t text_addr = pd->text_addr;\n    size_t text_size = pd->text_size;\n\n    // step [1]. aggregate all hints within a SCC\n    AddrDict(double128_t, dag_hints);\n    z_addr_dict_init(dag_hints, 0, pd->scc_n);\n\n    // XXX: invalid_sccs means those SCCs whose likelihook of being instructions\n    // is quite small. Hence, we stop propogation when reaching them. Note that\n    // it is different from those SCCs in pd->dag_dead which are 100% not\n    // instruction boundaries.\n    AddrDictFast(bool, invalid_sccs);\n    z_addr_dict_init(invalid_sccs, 0, pd->scc_n);\n\n    for (addr_t addr = text_addr; addr < text_addr + text_size; addr++) {\n        // check addr is valid\n        uint32_t scc_id = z_addr_dict_get(pd->addr2sccid, addr);\n        if (!scc_id) {\n            continue;\n        }\n\n        // check invalid_scc\n        if (z_addr_dict_exist(pd->dag_P, scc_id) &&\n            z_addr_dict_get(pd->dag_P, scc_id) < PROPAGATE_P) {\n            if (!z_addr_dict_exist(invalid_sccs, scc_id)) {\n                z_addr_dict_set(invalid_sccs, scc_id, true);\n                z_addr_dict_set(dag_hints, scc_id, 1.0);\n            }\n\n            continue;\n        }\n\n        // we do not use hints of very rare instructions\n        // TODO: get a instruction distribution to weaken the hints instead of\n        // directly disabling it.\n        cs_insn *inst = z_disassembler_get_superset_disasm(d, addr);\n        if (z_capstone_is_rare(inst)) {\n            continue;\n        }\n\n        // update aggragated hints\n        double128_t addr_hint = NAN;\n        if (__prob_disassembler_get_H(pd, addr, &addr_hint)) {\n            if (!z_addr_dict_exist(dag_hints, scc_id)) {\n                // new hints\n                z_addr_dict_set(dag_hints, scc_id, addr_hint);\n            } else {\n                z_addr_dict_set(dag_hints, scc_id,\n                                z_addr_dict_get(dag_hints, scc_id) * addr_hint);\n            }\n        }\n    }\n\n    // step [2]. find all predecessors of invalid scc (only for first round)\n    if (!pd->round_n) {\n        GQueue *queue = g_queue_new();\n        g_queue_push_tail(queue, GSIZE_TO_POINTER(0));\n\n        AddrDictFast(bool, seen);\n        z_addr_dict_init(seen, 0, pd->scc_n);\n        z_addr_dict_set(seen, 0, true);\n\n        while (!g_queue_is_empty(queue)) {\n            uint32_t scc_id = (uint32_t)g_queue_pop_head(queue);\n\n            // update dag_hints and invalid_sccs\n            z_addr_dict_set(pd->dag_dead, scc_id, true);\n            z_addr_dict_set(invalid_sccs, scc_id, true);\n            z_addr_dict_set(dag_hints, scc_id, 1.0);\n\n            // find predecessors\n            GHashTable *dag_preds = z_addr_dict_get(pd->dag_preds, scc_id);\n            GList *list_dag_preds = g_hash_table_get_keys(dag_preds);\n            for (GList *l = list_dag_preds; l != NULL; l = l->next) {\n                uint32_t pred_scc_id = (uint32_t)l->data;\n                if (z_addr_dict_exist(seen, pred_scc_id)) {\n                    continue;\n                }\n                z_addr_dict_set(seen, pred_scc_id, true);\n                g_queue_push_tail(queue, GSIZE_TO_POINTER(pred_scc_id));\n            }\n            g_list_free(list_dag_preds);\n        }\n\n        g_queue_free(queue);\n        z_addr_dict_destroy(seen);\n    }\n\n    // step [3]. propogate hints\n    for (GList *l = pd->topo->head; l != NULL; l = l->next) {\n        uint32_t scc_id = (uint32_t)l->data;\n\n        // check scc without any hint\n        if (!z_addr_dict_exist(dag_hints, scc_id)) {\n            continue;\n        }\n\n        // check invalid scc. If so, stop propagation.\n        if (z_addr_dict_exist(invalid_sccs, scc_id)) {\n            continue;\n        }\n\n        // get hints\n        double128_t scc_hint = z_addr_dict_get(dag_hints, scc_id);\n\n        // propogate hints\n        GHashTable *dag_succs = z_addr_dict_get(pd->dag_succs, scc_id);\n\n        GList *list_dag_succs = g_hash_table_get_keys(dag_succs);\n        for (GList *ll = list_dag_succs; ll != NULL; ll = ll->next) {\n            uint32_t succ_scc_id = (uint32_t)ll->data;\n\n            if (!z_addr_dict_exist(dag_hints, succ_scc_id)) {\n                z_addr_dict_set(dag_hints, succ_scc_id, scc_hint);\n            } else {\n                z_addr_dict_set(\n                    dag_hints, succ_scc_id,\n                    z_addr_dict_get(dag_hints, succ_scc_id) * scc_hint);\n            }\n        }\n        g_list_free(list_dag_succs);\n    }\n    z_addr_dict_destroy(invalid_sccs);\n\n    // step [4]. update RH for each address\n    for (addr_t addr = text_addr; addr < text_addr + text_size; addr++) {\n        // ignore invalid instruction\n        uint32_t scc_id = z_addr_dict_get(pd->addr2sccid, addr);\n        if (!scc_id) {\n            continue;\n        }\n\n        if (!z_addr_dict_exist(dag_hints, scc_id)) {\n            continue;\n        }\n\n        double128_t scc_hint = z_addr_dict_get(dag_hints, scc_id);\n\n        __prob_disassembler_update_RH(pd, addr, scc_hint);\n    }\n    z_addr_dict_destroy(dag_hints);\n}\n"
  },
  {
    "path": "src/prob_disasm/prob_disasm_complete/solving.c",
    "content": "/*\n * solving.c\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n/*\n * Normalize probabilities\n */\nZ_PRIVATE void __prob_disassembler_normalize_prob(ProbDisassembler *pd);\n\n/*\n * Restrain probabilities based on control flow constrains\n */\nZ_PRIVATE void __prob_disassembler_restrain_prob(ProbDisassembler *pd);\n\n/*\n * Spread hints to occluded instructions\n */\nZ_PRIVATE void __prob_disassembler_spread_hints(ProbDisassembler *pd);\n\n#define __DECLARE_RESTRAIN(T, op)                                              \\\n    Z_PRIVATE void __prob_disassembler_restrain_##T(ProbDisassembler *pd) {    \\\n        addr_t text_addr = pd->text_addr;                                      \\\n        size_t text_size = pd->text_size;                                      \\\n                                                                               \\\n        /* step [1]. calculate better T for each scc */                        \\\n        AddrDict(double128_t, dag_better);                                     \\\n        z_addr_dict_init(dag_better, 0, pd->scc_n);                            \\\n        for (addr_t addr = text_addr; addr < text_addr + text_size; addr++) {  \\\n            double128_t T = NAN;                                               \\\n            __prob_disassembler_get_##T(pd, addr, &T);                         \\\n            assert(!isnan(T));                                                 \\\n                                                                               \\\n            uint32_t scc_id = z_addr_dict_get(pd->addr2sccid, addr);           \\\n                                                                               \\\n            if (z_addr_dict_exist(dag_better, scc_id)) {                       \\\n                double128_t T##_ = z_addr_dict_get(dag_better, scc_id);        \\\n                if (T op T##_) {                                               \\\n                    z_addr_dict_set(dag_better, scc_id, T);                    \\\n                }                                                              \\\n            } else {                                                           \\\n                z_addr_dict_set(dag_better, scc_id, T);                        \\\n            }                                                                  \\\n        }                                                                      \\\n                                                                               \\\n        /* step [2]. restrain T */                                             \\\n        for (GList *l = pd->topo->tail; l != NULL; l = l->prev) {              \\\n            uint32_t scc_id = (uint32_t)l->data;                               \\\n                                                                               \\\n            assert(z_addr_dict_exist(dag_better, scc_id));                     \\\n                                                                               \\\n            double128_t T = z_addr_dict_get(dag_better, scc_id);               \\\n                                                                               \\\n            GHashTable *pred_scc_ids = z_addr_dict_get(pd->dag_preds, scc_id); \\\n            GList *list_pred_scc_ids = g_hash_table_get_keys(pred_scc_ids);    \\\n            for (GList *ll = list_pred_scc_ids; ll != NULL; ll = ll->next) {   \\\n                uint32_t pred_scc_id = (uint32_t)ll->data;                     \\\n                                                                               \\\n                double128_t pred_##T =                                         \\\n                    z_addr_dict_get(dag_better, pred_scc_id);                  \\\n                                                                               \\\n                if (T op pred_##T) {                                           \\\n                    z_addr_dict_set(dag_better, pred_scc_id, T);               \\\n                }                                                              \\\n            }                                                                  \\\n            g_list_free(list_pred_scc_ids);                                    \\\n        }                                                                      \\\n                                                                               \\\n        /* step [3]. reassign T for each address */                            \\\n        for (addr_t addr = text_addr; addr < text_addr + text_size; addr++) {  \\\n            uint32_t scc_id = z_addr_dict_get(pd->addr2sccid, addr);           \\\n                                                                               \\\n            assert(z_addr_dict_exist(dag_better, scc_id));                     \\\n                                                                               \\\n            __prob_disassembler_reset_##T(                                     \\\n                pd, addr, z_addr_dict_get(dag_better, scc_id));                \\\n        }                                                                      \\\n                                                                               \\\n        z_addr_dict_destroy(dag_better);                                       \\\n    }\n\n__DECLARE_RESTRAIN(D, >);\n__DECLARE_RESTRAIN(P, <);\n\n#undef __DECLARE_RESTRAIN\n\nZ_PRIVATE void __prob_disassembler_normalize_prob(ProbDisassembler *pd) {\n    Disassembler *d = pd->base;\n    addr_t text_addr = pd->text_addr;\n    size_t text_size = pd->text_size;\n\n    for (addr_t addr = text_addr; addr < text_addr + text_size; addr++) {\n        double128_t D = NAN;\n        __prob_disassembler_get_D(pd, addr, &D);\n        assert(!isnan(D));\n\n        // check P first to make sure a 100% data is still data\n        double128_t P = NAN;\n        if (__prob_disassembler_get_P(pd, addr, &P)) {\n            if (__double128_equal(P, 0.0)) {\n                continue;\n            }\n        }\n\n        if (__double128_equal(D, 1.0)) {\n            __prob_disassembler_reset_P(pd, addr, 0.0);\n            continue;\n        }\n\n        if (__double128_equal(D, 0.0)) {\n            __prob_disassembler_reset_P(pd, addr, 1.0);\n            continue;\n        }\n\n        double128_t s = 1.0 / D;\n\n        if (isinf(s)) {\n            __prob_disassembler_reset_P(pd, addr, 1.0);\n            continue;\n        }\n\n        Iter(addr_t, occ_addrs);\n        z_iter_init_from_buf(occ_addrs,\n                             z_disassembler_get_occluded_addrs(d, addr));\n\n        while (!z_iter_is_empty(occ_addrs)) {\n            addr_t occ_addr = *(z_iter_next(occ_addrs));\n\n            double128_t occ_D = NAN;\n            __prob_disassembler_get_D(pd, occ_addr, &occ_D);\n            assert(!isnan(occ_D));\n\n            if (__double128_equal(occ_D, 0.0)) {\n                s = +INFINITY;\n            } else {\n                s += 1.0 / occ_D;\n            }\n        }\n        assert(!isnan(s));\n\n        double128_t final_P = (1.0 / D) / s;\n        assert(!isnan(final_P));\n        if (!isnan(P)) {\n            size_t n = pd->round_n;\n            assert(n);\n\n            final_P = (final_P / (n + 1)) * n + P / (n + 1);\n        }\n\n        __prob_disassembler_reset_P(pd, addr, final_P);\n    }\n\n    __prob_disassembler_restrain_P(pd);\n}\n\nZ_PRIVATE void __prob_disassembler_restrain_prob(ProbDisassembler *pd) {\n    __prob_disassembler_restrain_D(pd);\n}\n\nZ_PRIVATE void __prob_disassembler_spread_hints(ProbDisassembler *pd) {\n    Disassembler *d = pd->base;\n    addr_t text_addr = pd->text_addr;\n    size_t text_size = pd->text_size;\n\n    // step [1]. use RH to update D, and reset any D bigger than 1.0 as 1.0\n    for (addr_t addr = text_addr; addr < text_addr + text_size; addr++) {\n        double128_t RH = NAN;\n        if (__prob_disassembler_get_RH(pd, addr, &RH)) {\n            __prob_disassembler_update_D(pd, addr, RH);\n        }\n\n        cs_insn *inst = z_disassembler_get_superset_disasm(d, addr);\n        if (!inst) {\n            assert(isnan(RH) || isinf(RH));  // we may update inst_lost as +inf\n            __prob_disassembler_reset_D(pd, addr, 1.0);\n        }\n\n        double128_t D = NAN;\n        if (__prob_disassembler_get_D(pd, addr, &D)) {\n            // XXX: when D is nan or inf, it means addr has a very strong data\n            // hint and a strong inst hint. As we are trying to avoid false\n            // postive, in this case, we will set it as data.\n            if (isnan(D) || isinf(D) || D > 1.0) {\n                __prob_disassembler_reset_D(pd, addr, 1.0);\n            }\n        }\n    }\n\n    // step [2]. spread D into occluded instructions\n    for (addr_t addr = text_addr; addr < text_addr + text_size; addr++) {\n        double128_t min_D = NAN;\n\n        // ignore the ones already with D value\n        if (__prob_disassembler_get_D(pd, addr, &min_D)) {\n            continue;\n        }\n\n        assert(z_disassembler_get_occluded_addrs(d, addr));\n\n        Iter(addr_t, occ_addrs);\n        z_iter_init_from_buf(occ_addrs,\n                             z_disassembler_get_occluded_addrs(d, addr));\n\n        while (!z_iter_is_empty(occ_addrs)) {\n            addr_t occ_addr = *(z_iter_next(occ_addrs));\n            double128_t D = NAN;\n\n            if (__prob_disassembler_get_D(pd, occ_addr, &D)) {\n                if (isnan(min_D) || D < min_D) {\n                    min_D = D;\n                }\n            }\n        }\n\n        // XXX: note here, for a given address, if all addresses occluded with\n        // it are 100% data, it should be data. (the threshold 1.0 can be\n        // changed in the future -- maybe)\n        // TODO: the logic here is weird.\n        if (isnan(min_D) || __double128_equal(min_D, 1.0)) {\n            __prob_disassembler_reset_D(pd, addr, 1.0);\n        } else {\n            __prob_disassembler_reset_D(pd, addr, 1.0 - min_D);\n        }\n    }\n}\n"
  },
  {
    "path": "src/prob_disasm/prob_disasm_complete.c",
    "content": "/*\n * prob_disasm_complete.c\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n#include \"../buffer.h\"\n#include \"../disassembler.h\"\n#include \"../iterator.h\"\n#include \"../restricted_ptr.h\"\n\n#include <ctype.h>\n#include <math.h>\n\ntypedef enum dynamic_hint_type_t {\n    DHINT_NONE = 0,\n    DHINT_CODE = 1,  // XXX: we skip 0 for easy use of GHashTable\n    DHINT_DATA,\n} DHintType;\n\ntypedef struct dynamic_hint_t {\n    addr_t addr;\n    DHintType type;\n} DHint;\n\n///////////////////////////////////\n// ProbDisassembler\n///////////////////////////////////\n\nSTRUCT(ProbDisassembler, {\n    // pointer to Disassembler, so that we can call some functions of\n    // Disassembler (it looks like inheritance but not really)\n    Disassembler *base;\n\n    AddrDict(double128_t, H);\n    AddrDict(double128_t, RH);\n    AddrDict(double128_t, P);\n    AddrDict(double128_t, D);\n\n    AddrDict(double128_t, inst_lost);\n    AddrDict(double128_t, data_hint);\n\n    // basic information\n    Binary *binary;\n    addr_t text_addr;\n    size_t text_size;\n\n    // logged dynamic hints (i.e., certain code/data information collected\n    // during previous runs)\n    const char *dhint_filename;\n    GHashTable *dynamic_hints;\n\n    // DAG information\n    // TODO: we should do this for other address-keyed hash table.\n    uint32_t scc_n;\n    AddrDict(uint32_t, addr2sccid);\n    AddrDictFast(GHashTable *, dag_succs);\n    AddrDictFast(GHashTable *, dag_preds);\n    AddrDictFast(bool, dag_dead);\n    GQueue *topo;\n\n    AddrDict(double128_t, dag_P);\n\n    // how many round we have played\n    size_t round_n;\n});\n\n#define __GET_PDISASM(d) ((ProbDisassembler *)((d)->prob_disasm))\n#define __SET_PDISASM(d, v)                    \\\n    do {                                       \\\n        (d)->prob_disasm = (PhantomType *)(v); \\\n    } while (0)\n\n#define INIT_ROUND_N 3\n#define PROPAGATE_P 0.1\n#define STRONG_DATA_HINT 1e52\n\n///////////////////////////////////\n// All hints and losts value\n///////////////////////////////////\n\n// base\n#define __BASE_CF (1.0 / 256.0)\n#define __BASE_REG (1.0 / 16.0)\n#define __BASE_INS (1.0 / 502.0)  // it is naively/semi-randomly picked by me\n#define __BASE_PRINTABLE_CHAR (256.0 / 95.0)\n#define __BASE_VALUE (256.0)\n\n#define BASE_CF(INST) \\\n    (__pow_in_4(__BASE_CF, (INST)->detail->x86.encoding.imm_size))\n#define BASE_CF_RAW(N) (__pow_in_4(__BASE_CF, (N)))\n#define BASE_REG (__BASE_REG)\n#define BASE_INS (__BASE_INS)\n#define BASE_STRING(N) (__pow_in_n(__BASE_PRINTABLE_CHAR, (N)))\n#define BASE_VALUE(L, R, N) \\\n    (__pow_in_n(__pow_in_n(__BASE_VALUE, (L)) / (R), (N)))\n\n// hint weights: bigger weight means higher confidence\n#define __HINT_PLT_CALL_WEIGHT (100000.0)\n#define __HINT_PLT_JMP_WEIGHT (0.5)\n#define __HINT_CONVERGED_CALL_WEIGHT (1.0)\n#define __HINT_CONVERGED_JMP_WEIGHT (1.0)\n#define __HINT_CROSSED_JMP_WEIGHT (1.0)\n#define __HINT_USEDEF_GPR_WEIGHT (1.0)\n#define __HINT_USEDEF_SSE_WEIGHT (0.5)\n#define __HINT_POP_RET_WEIGHT (1.0)\n#define __HINT_CMP_CJMP_WEIGHT (1.0)\n#define __HINT_ARG_CALL_WEIGHT (1.0)\n// data hint is different, higher means lower confidence\n#define __HINT_STRING_WEIGHT \\\n    (0.00001 * (1.0 / 256.0))  // TODO: check the string is valid instead of\n                               // assigning a very small weight\n#define __HINT_VALUE_WEIGHT (1.0)\n\n// hint functions\n#define HINT(TYPE, BASE) ((1.0 / (__HINT_##TYPE##_WEIGHT)) * (BASE))\n\n// lost weights: bigger weight means higher confidence\n#define __LOST_OUTSIDE_CALL_WEIGHT (+INFINITY)\n#define __LOST_OUTSIDE_JMP_WEIGHT (+INFINITY)\n#define __LOST_KILLED_GPR_WEIGHT (1.0)\n#define __LOST_KILLED_SSE_WEIGHT (2.0)\n\n// lost functions\n#define LOST(TYPE, BASE) ((__LOST_##TYPE##_WEIGHT) * (1.0 / (BASE)))\n\n///////////////////////////////////\n// Useful functions\n///////////////////////////////////\n\n/*\n * Securely check whether two double128_t variables are equal\n */\nZ_PRIVATE bool __double128_equal(double128_t a, double128_t b) {\n    double128_t max_val = (fabsl(a) > fabsl(b) ? fabsl(a) : fabsl(b));\n    return (fabsl(a - b) <= max_val * LDBL_EPSILON);\n}\n\n/*\n * simple function to calculate pow\n */\nZ_PRIVATE double128_t __pow_in_4(double128_t base, size_t n) {\n    double128_t res = base;\n    switch (n) {\n        case 4:\n            res = res * res;\n        case 2:\n            res = res * res;\n            break;\n        case 3:\n            res = res * res * res;\n        case 1:\n            break;\n        case 0:\n            if (__double128_equal(base, 0.0)) {\n                res = NAN;\n            } else {\n                res = 1.0;\n            }\n            break;\n        default:\n            EXITME(\"invalid pow: %d\", n);\n    }\n    return res;\n}\n\n/*\n * fast function to calculate pow when n is integer\n */\nZ_PRIVATE double128_t __pow_in_n(double128_t base, size_t n) {\n    double128_t res = 1.0;\n    double128_t cur = base;\n    while (n > 0) {\n        if (n & 1) {\n            res *= cur;\n        }\n        cur = cur * cur;\n        n >>= 1;\n    }\n\n    return res;\n}\n\n///////////////////////////////////\n// Getter and Setter\n///////////////////////////////////\n\n#define PROB_DISASSEMBLER_DEFINE_PRIVATE_SETTER(T)                          \\\n    Z_PRIVATE void __prob_disassembler_update_##T(                          \\\n        ProbDisassembler *pd, addr_t addr, double128_t T) {                 \\\n        if (!z_addr_dict_exist(pd->T, addr)) {                              \\\n            z_addr_dict_set(pd->T, addr, T);                                \\\n        } else {                                                            \\\n            z_addr_dict_set(pd->T, addr, z_addr_dict_get(pd->T, addr) * T); \\\n        }                                                                   \\\n    }\n\n#define PROB_DISASSEMBLER_DEFINE_PRIVATE_GETTER(T)                            \\\n    Z_PRIVATE bool __prob_disassembler_get_##T(ProbDisassembler *pd,          \\\n                                               addr_t addr, double128_t *T) { \\\n        if (!z_addr_dict_exist(pd->T, addr)) {                                \\\n            return false;                                                     \\\n        } else {                                                              \\\n            *T = z_addr_dict_get(pd->T, addr);                                \\\n            return true;                                                      \\\n        }                                                                     \\\n    }\n\n#define PROB_DISASSEMBLER_DEFINE_PRIVATE_RESETTER(T)                           \\\n    Z_PRIVATE void __prob_disassembler_reset_##T(ProbDisassembler *pd,         \\\n                                                 addr_t addr, double128_t T) { \\\n        z_addr_dict_set(pd->T, addr, T);                                       \\\n    }\n\nPROB_DISASSEMBLER_DEFINE_PRIVATE_SETTER(H);\nPROB_DISASSEMBLER_DEFINE_PRIVATE_SETTER(RH);\nPROB_DISASSEMBLER_DEFINE_PRIVATE_SETTER(D);\nPROB_DISASSEMBLER_DEFINE_PRIVATE_SETTER(inst_lost);\nPROB_DISASSEMBLER_DEFINE_PRIVATE_SETTER(data_hint);\n\nPROB_DISASSEMBLER_DEFINE_PRIVATE_RESETTER(H);\nPROB_DISASSEMBLER_DEFINE_PRIVATE_RESETTER(RH);\nPROB_DISASSEMBLER_DEFINE_PRIVATE_RESETTER(D);\nPROB_DISASSEMBLER_DEFINE_PRIVATE_RESETTER(inst_lost);\nPROB_DISASSEMBLER_DEFINE_PRIVATE_RESETTER(data_hint);\nPROB_DISASSEMBLER_DEFINE_PRIVATE_RESETTER(P);\n\nPROB_DISASSEMBLER_DEFINE_PRIVATE_GETTER(H);\nPROB_DISASSEMBLER_DEFINE_PRIVATE_GETTER(RH);\nPROB_DISASSEMBLER_DEFINE_PRIVATE_GETTER(D);\nPROB_DISASSEMBLER_DEFINE_PRIVATE_GETTER(inst_lost);\nPROB_DISASSEMBLER_DEFINE_PRIVATE_GETTER(data_hint);\nPROB_DISASSEMBLER_DEFINE_PRIVATE_GETTER(P);\n\n#define __prob_disassembler_update_inst_hint __prob_disassembler_update_H\n#define __prob_disassembler_get_inst_hint __prob_disassembler_get_H\n#define __prob_disassembler_reset_inst_hint __prob_disassembler_reset_H\n\n///////////////////////////////////\n// Local functions\n///////////////////////////////////\n\n/*\n * Get successors for propogating instruction hints. It is a very helpful\n * wrapper function to customize propogation rule for instruction hints.\n */\nZ_PRIVATE bool __prob_disassembler_get_propogate_successors(\n    ProbDisassembler *pd, addr_t addr, size_t *n, addr_t **succs);\n\n/*\n * Apply hints and losts into working environment (RH/D/P), and remove previous\n * data when there are no hint and lost. (playground = H + RH + D + P, and H is\n * for inst_hint)\n */\nZ_PRIVATE void __prob_disassembler_refresh_playground(ProbDisassembler *pd);\n\n///////////////////////////////////\n// Components\n///////////////////////////////////\n\n// XXX: note that we should import following components here, as they might use\n// above local functions.\n#include \"prob_disasm_complete/dag.c\"\n#include \"prob_disasm_complete/hints.c\"\n#include \"prob_disasm_complete/propagation.c\"\n#include \"prob_disasm_complete/solving.c\"\n\n///////////////////////////////////\n// Test Code\n///////////////////////////////////\n\n#ifdef DEBUG\n\nZ_RESERVED Z_PRIVATE bool __prob_disassembler_path_dfs(\n    ProbDisassembler *pd, Buffer *(*get_next)(UCFG_Analyzer *, addr_t),\n    GQueue *stack, GHashTable *seen, addr_t cur_addr, addr_t target) {\n    Disassembler *d = pd->base;\n\n    cs_insn *inst = z_disassembler_get_superset_disasm(d, cur_addr);\n    if (!inst) {\n        return false;\n    }\n\n    g_queue_push_tail(stack, (gpointer)(inst));\n\n    if (cur_addr == target) {\n        return true;\n    }\n\n    Iter(addr_t, next_addrs);\n    z_iter_init_from_buf(next_addrs, (*get_next)(d->ucfg_analyzer, cur_addr));\n\n    while (!z_iter_is_empty(next_addrs)) {\n        addr_t next_addr = *(z_iter_next(next_addrs));\n\n        if (g_hash_table_lookup(seen, GSIZE_TO_POINTER(next_addr))) {\n            continue;\n        }\n\n        g_hash_table_insert(seen, GSIZE_TO_POINTER(next_addr),\n                            GSIZE_TO_POINTER(1));\n\n        if (__prob_disassembler_path_dfs(pd, get_next, stack, seen, next_addr,\n                                         target)) {\n            return true;\n        }\n    }\n\n    g_queue_pop_tail(stack);\n    return false;\n}\n\nZ_RESERVED Z_PRIVATE void __prob_disassembler_search_path(ProbDisassembler *pd,\n                                                          addr_t src,\n                                                          addr_t dst) {\n    GHashTable *seen =\n        g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL);\n    GQueue *stack = g_queue_new();\n\n    if (!__prob_disassembler_path_dfs(pd, &z_ucfg_analyzer_get_all_successors,\n                                      stack, seen, src, dst)) {\n        EXITME(\"cannot reach %#lx from %#lx\", dst, src);\n    } else {\n        while (!g_queue_is_empty(stack)) {\n            cs_insn *inst = (cs_insn *)g_queue_pop_head(stack);\n            z_info(CS_SHOW_INST(inst));\n        }\n    }\n\n    g_hash_table_destroy(seen);\n    g_queue_free(stack);\n}\n\n#endif\n\nZ_PRIVATE void __prob_disassembler_refresh_playground(ProbDisassembler *pd) {\n    addr_t text_addr = pd->text_addr;\n    size_t text_size = pd->text_size;\n\n#ifdef DEBUG\n    // remove dag_P first (it is improtant for the following checking at step 3)\n    for (uint32_t scc_id = 0; scc_id < pd->scc_n; scc_id++) {\n        z_addr_dict_remove(pd->dag_P, scc_id);\n        assert(!z_addr_dict_exist(pd->dag_P, scc_id));\n    }\n#endif\n\n    for (addr_t addr = text_addr; addr < text_addr + text_size; addr++) {\n        // step [1]. apply inst_lost into RH\n        double128_t inst_lost = NAN;\n        if (__prob_disassembler_get_inst_lost(pd, addr, &inst_lost)) {\n            __prob_disassembler_reset_RH(pd, addr, inst_lost);\n        } else {\n            z_addr_dict_remove(pd->RH, addr);\n            assert(!z_addr_dict_exist(pd->RH, addr));\n        }\n\n        // step [2]. apply data_hint into D\n        double128_t data_hint = NAN;\n        if (__prob_disassembler_get_data_hint(pd, addr, &data_hint)) {\n            __prob_disassembler_reset_D(pd, addr, data_hint);\n        } else {\n            z_addr_dict_remove(pd->D, addr);\n            assert(!z_addr_dict_exist(pd->D, addr));\n        }\n\n        // step [3]. update dag P\n        double128_t P = NAN;\n        if (__prob_disassembler_get_P(pd, addr, &P)) {\n            uint32_t scc_id = z_addr_dict_get(pd->addr2sccid, addr);\n#ifdef DEBUG\n            if (z_addr_dict_exist(pd->dag_P, scc_id) &&\n                z_addr_dict_get(pd->dag_P, scc_id) != P) {\n                EXITME(\"inconsistent dag P: %#lx (%Le) v/s %d (%Le)\", addr, P,\n                       scc_id, z_addr_dict_get(pd->dag_P, scc_id));\n            }\n#endif\n            z_addr_dict_set(pd->dag_P, scc_id, P);\n            // XXX: note that we do not perform z_addr_dict_remove(pd->P, addr)\n            // here. It is mainly beacuse we want to maintain a feature that if\n            // an address was thought as 100% non-instruction before, the\n            // address should always be non-instruction.\n        }\n    }\n}\n\nZ_PRIVATE bool __prob_disassembler_get_propogate_successors(\n    ProbDisassembler *pd, addr_t addr, size_t *n, addr_t **succs) {\n    Disassembler *d = pd->base;\n\n    cs_insn *inst = z_disassembler_get_superset_disasm(d, addr);\n    if (!inst) {\n        return false;\n    }\n\n    Buffer *succs_buf =\n        z_ucfg_analyzer_get_all_successors(d->ucfg_analyzer, addr);\n    assert(succs_buf);\n\n    // XXX:  option one: propogate hints through fall-through edges for calls\n    // ------\n    // if (z_capstone_is_call(inst)) {\n    //     addr_t next_addr = addr + inst->size;\n    //     z_buffer_append_raw(succs_buf, (uint8_t *)&next_addr,\n    //                         sizeof(next_addr));\n    // }\n    // ------\n\n    *n = z_buffer_get_size(succs_buf) / sizeof(addr_t);\n    *succs = (addr_t *)z_buffer_get_raw_buf(succs_buf);\n\n    return true;\n}\n\n///////////////////////////////////\n// ProbDisassembler Pubilc API\n///////////////////////////////////\n\nZ_PRIVATE double128_t z_prob_disassembler_get_inst_prob(ProbDisassembler *pd,\n                                                        addr_t addr) {\n    if (addr < pd->text_addr || addr >= pd->text_addr + pd->text_size) {\n        return 0.0;\n    }\n\n    double128_t P = NAN;\n    __prob_disassembler_get_P(pd, addr, &P);\n    assert(!isnan(P));\n\n    if (!__double128_equal(P, 0.0)) {\n        return P;\n    }\n\n    // additionally check dag_dead and very huge data hint\n    double128_t data_hint = NAN;\n    if (__prob_disassembler_get_data_hint(pd, addr, &data_hint)) {\n        if (data_hint > STRONG_DATA_HINT) {\n            return -0.0;\n        }\n    }\n\n    uint32_t scc_id = z_addr_dict_get(pd->addr2sccid, addr);\n    if (z_addr_dict_exist(pd->dag_dead, scc_id)) {\n        return -0.0;\n    }\n\n    return P;\n}\n\nZ_PRIVATE void z_prob_disassembler_get_internal(\n    ProbDisassembler *pd, addr_t addr, cs_insn **inst, uint32_t *scc_id,\n    double128_t *inst_hint, double128_t *inst_lost, double128_t *data_hint,\n    double128_t *D, double128_t *P) {\n    Disassembler *d = pd->base;\n\n    *inst = z_disassembler_get_superset_disasm(d, addr);\n    *scc_id = z_addr_dict_get(pd->addr2sccid, addr);\n\n    __prob_disassembler_get_inst_hint(pd, addr, inst_hint);\n    __prob_disassembler_get_inst_lost(pd, addr, inst_lost);\n    __prob_disassembler_get_data_hint(pd, addr, data_hint);\n    __prob_disassembler_get_D(pd, addr, D);\n\n    *P = z_prob_disassembler_get_inst_prob(pd, addr);\n}\n\nZ_PRIVATE void z_prob_disassembler_update(ProbDisassembler *pd, addr_t addr,\n                                          bool is_inst, bool need_log) {\n    if (is_inst) {\n        // we have known for sure this addr is an instruction boundary\n        __prob_disassembler_reset_inst_hint(pd, addr, 0.0);\n        z_addr_dict_remove(pd->inst_lost, addr);\n        z_addr_dict_remove(pd->data_hint, addr);\n    } else {\n        // we have known for sure this addr is not an instruction boundary\n        z_addr_dict_remove(pd->H, addr);  // inst_hint\n        __prob_disassembler_reset_inst_lost(pd, addr, +INFINITY);\n        // XXX: resetting data_hint should be more carefully handled as there\n        // are two cases of is_inst == false: 1) inside an instrution and 2)\n        // data\n        __prob_disassembler_reset_data_hint(pd, addr, +INFINITY);\n    }\n\n    if (need_log) {\n        // log the hint\n        DHintType type = (is_inst ? DHINT_CODE : DHINT_DATA);\n\n#ifdef DEBUG\n        DHintType old_type = (DHintType)g_hash_table_lookup(\n            pd->dynamic_hints, GSIZE_TO_POINTER(addr));\n        if (old_type && (old_type != type)) {\n            EXITME(\"inconstatn type of the dynamic hint at %#lx\", addr);\n        }\n#endif\n\n        g_hash_table_insert(pd->dynamic_hints, GSIZE_TO_POINTER(addr),\n                            GSIZE_TO_POINTER(type));\n    }\n}\n\nZ_PRIVATE void z_prob_disassembler_start(ProbDisassembler *pd) {\n    /*\n     * step [1]. collect hints if we haven't: please refer to\n     * *prob_disasm_complete/hints.c*\n     */\n    if (!pd->round_n) {\n        // calculate static hints\n        __prob_disassembler_collect_cf_hints(pd);\n        __prob_disassembler_collect_reg_hints(pd);\n        __prob_disassembler_collect_pop_ret_hints(pd);\n        __prob_disassembler_collect_cmp_cjmp_hints(pd);\n        __prob_disassembler_collect_arg_call_hints(pd);\n        __prob_disassembler_collect_str_hints(pd);\n        __prob_disassembler_collect_value_hints(pd);\n\n        // apply logged dynamic hint\n        {\n            GHashTableIter iter;\n            gpointer key, value;\n            g_hash_table_iter_init(&iter, pd->dynamic_hints);\n\n            while (g_hash_table_iter_next(&iter, &key, &value)) {\n                addr_t addr = (addr_t)key;\n                bool is_inst = ((DHintType)value == DHINT_CODE);\n                z_prob_disassembler_update(pd, addr, is_inst, false);\n            }\n        }\n\n        z_info(\"probabilistic disassembly: hints collection done\");\n    }\n\n    /*\n     * step [2]. play several rounds to calculate probabilities\n     */\n    do {\n        /*\n         * step [2.1]. refresh playground\n         */\n        __prob_disassembler_refresh_playground(pd);\n\n        /*\n         * step [2]. propogate hints:\n         *      refer to *prob_disasm_complete/propagation.c*\n         */\n        __prob_disassembler_propogate_inst_hints(pd);\n        // TODO: __prob_disassembler_propogate_data_hints(pd);\n        z_trace(\"probabilistic disassembly: hints propagation done\");\n\n        /*\n         * step [3]. spread hints: refer to *prob_disasm_complete/solving.c*\n         */\n        __prob_disassembler_spread_hints(pd);\n        z_trace(\"probabilistic disassembly: hints spreading done\");\n\n        /*\n         * step [4]. restrain probabilities:\n         *      refer to *prob_disasm_complete/solving.c*\n         */\n        __prob_disassembler_restrain_prob(pd);\n        z_trace(\"probabilistic disassembly: probability restraint done\");\n\n        /*\n         * step [5]. normalized probabilities:\n         *      refer to *prob_disasm_complete/solving.c*\n         */\n        __prob_disassembler_normalize_prob(pd);\n        z_trace(\"probabilistic disassembly: probability normalization done\");\n\n        pd->round_n += 1;\n        z_info(\"probabilistic disassembly round %d done\", pd->round_n);\n    } while (pd->round_n < INIT_ROUND_N);\n}\n\nZ_PRIVATE ProbDisassembler *z_prob_disassembler_create(Disassembler *d) {\n    ProbDisassembler *pd = STRUCT_ALLOC(ProbDisassembler);\n\n    pd->base = d;\n\n    pd->binary = d->binary;\n    pd->text_addr = d->text_addr;\n    pd->text_size = d->text_size;\n\n    pd->round_n = 0;\n\n    // read p-disasm file\n    pd->dynamic_hints =\n        g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL);\n    const char *original_filename = z_binary_get_original_filename(d->binary);\n    pd->dhint_filename = z_strcat(PDISASM_FILENAME_PREFIX, original_filename);\n    {\n        if (!z_access(pd->dhint_filename, F_OK)) {\n            z_info(\n                \"pdisasm file exists, so we will read those pre-calcualted \"\n                \"hints\");\n            Buffer *buf = z_buffer_read_file(pd->dhint_filename);\n\n            size_t n = z_buffer_get_size(buf) / sizeof(DHint);\n            DHint *hints = (DHint *)z_buffer_get_raw_buf(buf);\n\n            // XXX: note that we will apply those dynamic hints after collecting\n            // static hints.\n            for (size_t i = 0; i < n; i++) {\n                g_hash_table_insert(pd->dynamic_hints,\n                                    GSIZE_TO_POINTER(hints[i].addr),\n                                    GSIZE_TO_POINTER(hints[i].type));\n            }\n\n            z_buffer_destroy(buf);\n        }\n    }\n\n    /*\n     * H: instruction hint source for each address, which is also the\n     * update point for all *instruction hints*.\n     */\n    z_addr_dict_init(pd->H, pd->text_addr, pd->text_size);\n\n    /*\n     * RH: Propogated instruction hints for each address, which is the\n     * result of hint propogation, and also the update point of all\n     * *instruction losts*.\n     *\n     * Additionally, we do not propogate instruction losts.\n     */\n    z_addr_dict_init(pd->RH, pd->text_addr, pd->text_size);\n    z_addr_dict_init(pd->inst_lost, pd->text_addr, pd->text_size);\n\n    /*\n     * D: final probabilities of eash address to be data, which is also the\n     * update point of all *data hints*.\n     */\n    z_addr_dict_init(pd->D, pd->text_addr, pd->text_size);\n    z_addr_dict_init(pd->data_hint, pd->text_addr, pd->text_size);\n\n    /*\n     * P: final probabilities of each address to be instructoin.\n     */\n    z_addr_dict_init(pd->P, pd->text_addr, pd->text_size);\n\n    /*\n     * dag building: please refer to: *prob_disasm_complete/dag.c*\n     */\n    __prob_disassembler_build_dag(pd);\n\n    return pd;\n}\n\nZ_PRIVATE void z_prob_disassembler_destroy(ProbDisassembler *pd) {\n    // XXX: note that *base* should not be destroyed here.\n    z_addr_dict_destroy(pd->H);\n    z_addr_dict_destroy(pd->RH);\n    z_addr_dict_destroy(pd->P);\n    z_addr_dict_destroy(pd->D);\n\n    z_addr_dict_destroy(pd->inst_lost);\n    z_addr_dict_destroy(pd->data_hint);\n\n    z_addr_dict_destroy(pd->addr2sccid);\n    z_addr_dict_destroy(pd->dag_succs, &g_hash_table_destroy);\n    z_addr_dict_destroy(pd->dag_preds, &g_hash_table_destroy);\n    z_addr_dict_destroy(pd->dag_dead);\n    g_queue_free(pd->topo);\n\n    z_addr_dict_destroy(pd->dag_P);\n\n    // write down dynamic hints\n    {\n        FILE *f = z_fopen(pd->dhint_filename, \"wb\");\n        DHint hint = {\n            .addr = INVALID_ADDR,\n            .type = DHINT_NONE,\n        };\n\n        GHashTableIter iter;\n        gpointer key, value;\n        g_hash_table_iter_init(&iter, pd->dynamic_hints);\n\n        while (g_hash_table_iter_next(&iter, &key, &value)) {\n            hint.addr = (addr_t)key;\n            hint.type = (DHintType)value;\n            if (z_fwrite(&hint, sizeof(DHint), 1, f) != 1) {\n                EXITME(\"error on writing dynamic hint file\");\n            }\n        }\n\n        z_fclose(f);\n    }\n    z_free((char *)pd->dhint_filename);\n    g_hash_table_destroy(pd->dynamic_hints);\n\n    z_free(pd);\n}\n\n///////////////////////////////////\n// Disassembler Private API\n///////////////////////////////////\n\nZ_PRIVATE void __disassembler_pdisasm_create(Disassembler *d) {\n    __SET_PDISASM(d, z_prob_disassembler_create(d));\n}\n\nZ_PRIVATE void __disassembler_pdisasm_destroy(Disassembler *d) {\n    z_prob_disassembler_destroy(__GET_PDISASM(d));\n}\n\nZ_PRIVATE void __disassembler_pdisasm_start(Disassembler *d) {\n    z_prob_disassembler_start(__GET_PDISASM(d));\n}\n\nZ_PRIVATE double128_t __disassembler_pdisasm_get_inst_prob(Disassembler *d,\n                                                           addr_t addr) {\n    return z_prob_disassembler_get_inst_prob(__GET_PDISASM(d), addr);\n}\n\nZ_PRIVATE void __disassembler_pdisasm_get_internal(\n    Disassembler *d, addr_t addr, cs_insn **inst, uint32_t *scc_id,\n    double128_t *inst_hint, double128_t *inst_lost, double128_t *data_hint,\n    double128_t *D, double128_t *P) {\n    z_prob_disassembler_get_internal(__GET_PDISASM(d), addr, inst, scc_id,\n                                     inst_hint, inst_lost, data_hint, D, P);\n}\n\nZ_PRIVATE void __disassembler_pdisasm_update(Disassembler *d, addr_t addr,\n                                             bool is_inst) {\n    z_prob_disassembler_update(__GET_PDISASM(d), addr, is_inst, true);\n}\n\n#undef __GET_PDISASM\n#undef __SET_PDISASM\n"
  },
  {
    "path": "src/prob_disasm/prob_disasm_simple.c",
    "content": "/*\n * prob_disasm_simple.c\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n#define __GET_PDISASM(d) ((Splay *)((d)->prob_disasm))\n#define __SET_PDISASM(d, v)                    \\\n    do {                                       \\\n        (d)->prob_disasm = (PhantomType *)(v); \\\n    } while (0)\n\ntypedef struct code_segment_t {\n    addr_t addr;\n    size_t size;\n} CodeSegment;\n\nZ_PRIVATE void __disassembler_pdisasm_create_S(Disassembler *d) {\n    const char *original_filename = z_binary_get_original_filename(d->binary);\n    const char *codeseg_filename =\n        z_strcat(original_filename, CODE_SEGMENT_FILE_SUFFIX);\n\n    __SET_PDISASM(d, z_splay_create(NULL));\n\n    // XXX: code segment file is mainly used for debugging purpose.\n    if (!z_access(codeseg_filename, F_OK)) {\n        // code segment file exits\n        z_info(\n            \"code segment file (for linear disassembly) is persent, and we will\"\n            \"use those pre-defined code segments\");\n\n        Buffer *buf = z_buffer_read_file(codeseg_filename);\n\n        // tail (virtual) code segment\n        assert(INVALID_ADDR > 0);\n        CodeSegment virtual_code_segment = {\n            .addr = INVALID_ADDR,\n            .size = 0,\n        };\n        z_buffer_append_raw(buf, (uint8_t *)&virtual_code_segment,\n                            sizeof(virtual_code_segment));\n\n        size_t n = z_buffer_get_size(buf) / sizeof(CodeSegment);\n        CodeSegment *codes = (CodeSegment *)z_buffer_get_raw_buf(buf);\n\n        addr_t cur_addr = codes[0].addr;\n        size_t cur_size = codes[0].size;\n        for (int i = 1; i < n; i++) {\n            CodeSegment *code = &(codes[i]);\n\n            if (code->addr <= cur_addr) {\n                EXITME(\"pre-defined code segments are not in increasing order\");\n            }\n\n            if (code->addr <= cur_addr + cur_size &&\n                code->addr != INVALID_ADDR) {\n                size_t tmp_size = code->addr + code->size - cur_addr;\n                cur_size = (cur_size >= tmp_size ? cur_size : tmp_size);\n            } else {\n                z_info(\"pre-defined code segment: [%#lx, %#lx]\", cur_addr,\n                       cur_addr + cur_size - 1);\n                Snode *node = z_snode_create(cur_addr, cur_size, NULL, NULL);\n                z_splay_insert(__GET_PDISASM(d), node);\n\n                cur_addr = code->addr;\n                cur_size = code->size;\n            }\n        }\n\n        z_buffer_destroy(buf);\n    } else {\n        z_info(\"no code segment file found, patch the whole .text section\");\n        Snode *node = z_snode_create(d->text_addr, d->text_size, NULL, NULL);\n        z_splay_insert(__GET_PDISASM(d), node);\n    }\n\n    z_free((char *)codeseg_filename);\n}\n\nZ_PRIVATE void __disassembler_pdisasm_destroy_S(Disassembler *d) {\n    z_splay_destroy(__GET_PDISASM(d));\n}\n\nZ_PRIVATE void __disassembler_pdisasm_start_S(Disassembler *d) {\n    /*\n     * leave it blank\n     */\n}\n\nZ_PRIVATE double128_t __disassembler_pdisasm_get_inst_prob_S(Disassembler *d,\n                                                             addr_t addr) {\n    if (z_splay_search(__GET_PDISASM(d), addr)) {\n        return 1.0;\n    } else {\n        return 0.0;\n    }\n}\n\nZ_PRIVATE void __disassembler_pdisasm_get_internal_S(\n    Disassembler *d, addr_t addr, cs_insn **inst, uint32_t *scc_id,\n    double128_t *inst_hint, double128_t *inst_lost, double128_t *data_hint,\n    double128_t *D, double128_t *P) {\n    EXITME(\"Probabilisitic Disassembly is not fully supported\");\n}\n\nZ_PRIVATE void __disassembler_pdisasm_update_S(Disassembler *d, addr_t addr,\n                                               bool is_inst) {\n    /*\n     * leave it blank\n     */\n}\n\n#undef __GET_PDISASM\n#undef __SET_PDISASM\n"
  },
  {
    "path": "src/restricted_ptr.c",
    "content": "/*\n * restricted_ptr.c\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n#include \"restricted_ptr.h\"\n#include \"utils.h\"\n\n/*\n * Setter and Getter\n */\nDEFINE_GETTER(Rptr, rptr, size_t, size);\n\nZ_PRIVATE void __rptr_check_null(Rptr *rptr) {\n    if (z_rptr_is_null(rptr)) {\n        EXITME(\"rptr is NULL\");\n    }\n}\n\nZ_API Rptr *z_rptr_create(uint8_t *base_ptr, size_t size) {\n    Rptr *rptr = STRUCT_ALLOC(Rptr);\n    rptr->base_ptr = base_ptr;\n    rptr->raw_ptr = base_ptr;\n    rptr->size = size;\n    return rptr;\n}\n\nZ_API void z_rptr_destroy(Rptr *rptr) { z_free(rptr); }\n\nZ_API void *z_rptr_safe_raw_ptr(Rptr *rptr, size_t n) {\n    __rptr_check_null(rptr);\n    if (rptr->size < n) {\n        EXITME(\"restricted pointer's size is smaller than memcpy size\");\n    }\n    return rptr->raw_ptr;\n}\n\nZ_API void z_rptr_memcpy_to(Rptr *rptr, uint8_t *src, size_t size) {\n    __rptr_check_null(rptr);\n    if (rptr->size < size) {\n        EXITME(\"restricted pointer's size is smaller than memcpy size\");\n    }\n    memcpy(rptr->raw_ptr, src, size);\n}\n\nZ_API void z_rptr_memcpy_from(Rptr *rptr, uint8_t *dst, size_t size) {\n    __rptr_check_null(rptr);\n    if (rptr->size < size) {\n        EXITME(\"restricted pointer's size is smaller than memcpy size\");\n    }\n    memcpy(dst, rptr->raw_ptr, size);\n}\n\nZ_API void z_rptr_truncate(Rptr *rptr, size_t n) {\n    __rptr_check_null(rptr);\n    if (n > rptr->size) {\n        EXITME(\"truncate pointer to a bigger size\");\n    }\n    rptr->size = n;\n}\n"
  },
  {
    "path": "src/restricted_ptr.h",
    "content": "/*\n * restricted_ptr.h\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n#ifndef __RESTRICTED_PTR_H\n#define __RESTRICTED_PTR_H\n\n#include \"config.h\"\n\n#define __IS_RPTR(x) _Generic((x), Rptr * : true, default : false)\n\n#define z_rptr_get_ptr(rptr, type) \\\n    ((type *)z_rptr_safe_raw_ptr(rptr, sizeof(type)))\n\n#define z_rptr_is_null(rptr) \\\n    (((rptr) == NULL) || ((rptr)->raw_ptr == NULL) || ((rptr)->size == 0))\n\n#define z_rptr_inc(rptr, type, n)                             \\\n    do {                                                      \\\n        if ((rptr)->size < (n) * sizeof(type)) {              \\\n            EXITME(\"restricted pointer's size is too small\"); \\\n        }                                                     \\\n        (rptr)->raw_ptr += (n) * sizeof(type);                \\\n        (rptr)->size -= (n) * sizeof(type);                   \\\n    } while (0)\n\n#define z_rptr_memset(s, c, n)                                \\\n    do {                                                      \\\n        if ((s)->size < n) {                                  \\\n            EXITME(\"restricted pointer's size is too small\"); \\\n        }                                                     \\\n        memset((s)->raw_ptr, c, n);                           \\\n    } while (0)\n\n#define z_rptr_memcpy(dst, src, n)                                  \\\n    do {                                                            \\\n        if (__IS_RPTR(dst)) {                                       \\\n            z_rptr_memcpy_to((Rptr *)(dst), (uint8_t *)(src), n);   \\\n        } else {                                                    \\\n            z_rptr_memcpy_from((Rptr *)(src), (uint8_t *)(dst), n); \\\n        }                                                           \\\n    } while (0)\n\n#define z_rptr_reset(rptr)                                  \\\n    do {                                                    \\\n        (rptr)->size += (rptr)->raw_ptr - (rptr)->base_ptr; \\\n        (rptr)->raw_ptr = (rptr)->base_ptr;                 \\\n    } while (0)\n\nSTRUCT(Rptr, {\n    uint8_t *base_ptr;\n    uint8_t *raw_ptr;\n    size_t size;\n});\n\n/*\n * Setter and Getter\n */\nDECLARE_GETTER(Rptr, rptr, size_t, size);\n\n/*\n * Create a restricted pointer.\n */\nZ_API Rptr *z_rptr_create(uint8_t *base_ptr, size_t size);\n\n/*\n * Destroy a restricted pointer.\n */\nZ_API void z_rptr_destroy(Rptr *rptr);\n\n/*\n * Safely return a raw ptr\n */\nZ_API void *z_rptr_safe_raw_ptr(Rptr *rptr, size_t n);\n\n/*\n * memcpy to Rptr\n */\nZ_API void z_rptr_memcpy_to(Rptr *rptr, uint8_t *src, size_t size);\n\n/*\n * memcpy from Rptr\n */\nZ_API void z_rptr_memcpy_from(Rptr *rptr, uint8_t *dst, size_t size);\n\n/*\n * Truncate a Pptr to n\n */\nZ_API void z_rptr_truncate(Rptr *rptr, size_t n);\n\n#endif\n"
  },
  {
    "path": "src/rewriter.c",
    "content": "/*\n * rewriter.c\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n#include \"rewriter.h\"\n#include \"buffer.h\"\n#include \"capstone_.h\"\n#include \"config.h\"\n#include \"ucfg_analyzer.h\"\n#include \"utils.h\"\n\n#include \"x64_utils.c\"\n\n#include <capstone/capstone.h>\n\n#ifdef DEBUG\nFILE *__debug_file = NULL;\n#define __debug_printf(...) fprintf(__debug_file, __VA_ARGS__)\n#endif\n\n#define ASMLINE_FMT_SIZE 0x100\n\nstatic char asmline_fmt[ASMLINE_FMT_SIZE];\n\n// TODO: add BeforeBB/AfterBB/BeforeInst/AfterInst handler\n\n/*\n * Rewrite entrypoint\n */\nZ_PRIVATE void z_rewriter_rewrite_entrypoint(Rewriter *r);\n\n/*\n * Rewrite main\n */\nZ_PRIVATE void z_rewriter_rewrite_main(Rewriter *r);\n\n/*\n * Rewrite functions beyond main (_start, .init, .fini, ...)\n */\nZ_PRIVATE void z_rewriter_rewrite_beyond_main(Rewriter *r);\n\n/*\n * Function Pointer: compare two address\n */\nZ_PRIVATE int __rewriter_compare_address(addr_t x, addr_t y, void *_z);\n\n/*\n * Calculate uTP address, and store the new inst_addr into inst_addr\n */\nZ_RESERVED Z_PRIVATE addr_t __rewriter_calculate_utp_addr(Rewriter *r,\n                                                          addr_t *inst_addr,\n                                                          size_t inst_size);\n/*\n * Find a possible uTP address\n */\nZ_RESERVED Z_PRIVATE bool __rewriter_patch_utp(Rewriter *r, addr_t ori_addr);\n\n/*\n * Translate inst into shadow address\n */\nZ_PRIVATE cs_insn *__rewriter_translate_shadow_inst(Rewriter *r, cs_insn *inst,\n                                                    addr_t ori_addr);\n\n/*\n * Generate an instruction of shadow code\n */\nZ_PRIVATE void __rewriter_generate_shadow_inst(Rewriter *r, GHashTable *holes,\n                                               cs_insn *inst, addr_t ori_addr,\n                                               bool bb_entry);\n/*\n * Generate a basic block of shadow code\n */\nZ_PRIVATE void __rewriter_generate_shadow_block(\n    Rewriter *r, GHashTable *holes, GQueue *instructions, addr_t ori_addr,\n    cs_insn *(*disasm_func)(Disassembler *, addr_t));\n\n/*\n * Fill in shadow holes\n */\nZ_PRIVATE void __rewriter_fillin_shadow_hole(Rewriter *r, GHashTable *holes);\n\n/*\n * Build bridgs\n */\nZ_RESERVED Z_PRIVATE void __rewriter_build_bridges(Rewriter *r,\n                                                   GQueue *instructions);\n\n/*\n * Emit Trampoline based on analyzed results\n */\nZ_PRIVATE void __rewriter_emit_trampoline(Rewriter *r, addr_t addr);\n\n// XXX: this include must be placed here, to use above predeclared these\n// prototypes\n#include \"rewriter_handlers/handler_main.c\"\n\n/*\n * Cound how many BB ID is conflicted\n */\nZ_PRIVATE void __rewriter_count_conflicted_ids(Rewriter *r);\n\nZ_PRIVATE void __rewriter_count_conflicted_ids(Rewriter *r) {\n    size_t conflicts = 0;\n\n    GHashTable *id_2_bb =\n        g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL);\n    GList *bbs = g_hash_table_get_keys(r->rewritten_bbs);\n\n    for (GList *l = bbs; l != NULL; l = l->next) {\n        addr_t bb_addr = (addr_t)(l->data);\n        size_t bb_id = AFL_BB_ID(bb_addr);\n\n        addr_t old_bb =\n            (addr_t)g_hash_table_lookup(id_2_bb, GSIZE_TO_POINTER(bb_id));\n        if (!old_bb) {\n            g_hash_table_insert(id_2_bb, GSIZE_TO_POINTER(bb_id),\n                                GSIZE_TO_POINTER(bb_addr));\n        } else {\n            conflicts += 1;\n            z_trace(\"conflict: %#lx v/s %#lx (%#lx)\", bb_addr, old_bb, bb_id);\n        }\n    }\n\n    g_hash_table_destroy(id_2_bb);\n    g_list_free(bbs);\n\n    z_info(\"number of conflicted block IDs : %ld\", conflicts);\n}\n\nZ_PRIVATE void z_rewriter_rewrite_beyond_main(Rewriter *r) {\n    if (r->__main_rewritten) {\n        EXITME(\n            \"z_rewriter_rewrite_beyond_main should execute before \"\n            \"z_rewriter_rewrite_main\");\n    }\n\n    ELF *e = z_binary_get_elf(r->binary);\n\n// init and fini\n#define __REWRITE_FCN_FROM_REG(type, reg)                                     \\\n    do {                                                                      \\\n        addr_t type##_addr = z_elf_get_##type(e);                             \\\n        z_rewriter_rewrite(r, type##_addr);                                   \\\n        addr_t shadow_##type##_addr =                                         \\\n            z_rewriter_get_shadow_addr(r, type##_addr);                       \\\n        if (shadow_##type##_addr == INVALID_ADDR) {                           \\\n            break;                                                            \\\n        }                                                                     \\\n                                                                              \\\n        addr_t load_##type = z_elf_get_load_##type(e);                        \\\n        assert(z_rewriter_get_shadow_addr(r, load_##type) == INVALID_ADDR);   \\\n        assert(z_disassembler_get_recursive_disasm(r->disassembler,           \\\n                                                   load_##type) == NULL);     \\\n        assert(z_disassembler_get_linear_disasm(r->disassembler,              \\\n                                                load_##type) == NULL);        \\\n        if (z_elf_get_is_pie(e)) {                                            \\\n            KS_ASM(load_##type, \"lea \" #reg \", [rip %+ld];\",                  \\\n                   shadow_##type##_addr - load_##type - 7);                   \\\n        } else {                                                              \\\n            KS_ASM(load_##type, \"mov \" #reg \", %#lx;\", shadow_##type##_addr); \\\n        }                                                                     \\\n        assert(ks_size == 7);                                                 \\\n        z_elf_write(e, load_##type, ks_size, ks_encode);                      \\\n        z_disassembler_update_superset_disasm(r->disassembler, load_##type);  \\\n    } while (0)\n\n    __REWRITE_FCN_FROM_REG(init, rcx);\n    __REWRITE_FCN_FROM_REG(fini, r8);\n#undef __REWRITE_FCN_FROM_REG\n\n// .init.array and .fini array\n#define __REWRITE_FCN_FROM_ARRAY(type)                               \\\n    do {                                                             \\\n        Rptr *array = NULL;                                          \\\n        size_t array_size = 0;                                       \\\n        addr_t array_addr = INVALID_ADDR;                            \\\n                                                                     \\\n        Elf64_Shdr *type##_array = z_elf_get_shdr_##type##_array(e); \\\n        if (!type##_array) {                                         \\\n            break;                                                   \\\n        }                                                            \\\n        array_size = type##_array->sh_size;                          \\\n        array_addr = type##_array->sh_addr;                          \\\n        array = z_elf_vaddr2ptr(e, array_addr);                      \\\n        for (int i = 0; i < array_size / sizeof(addr_t); i++) {      \\\n            addr_t fcn = *z_rptr_get_ptr(array, addr_t);             \\\n            z_info(\".\" #type \".array[%d]: %#lx\", i, fcn);            \\\n            z_rewriter_rewrite(r, fcn);                              \\\n            addr_t shadow_fcn = z_rewriter_get_shadow_addr(r, fcn);  \\\n            *z_rptr_get_ptr(array, addr_t) = shadow_fcn;             \\\n            z_rptr_inc(array, addr_t, 1);                            \\\n        }                                                            \\\n        z_rptr_destroy(array);                                       \\\n    } while (0)\n\n    __REWRITE_FCN_FROM_ARRAY(init);\n    __REWRITE_FCN_FROM_ARRAY(fini);\n#undef __REWRITE_FCN_FROM_ARRAY\n\n    // start\n    z_rewriter_rewrite_entrypoint(r);\n}\n\nZ_PRIVATE void z_rewriter_rewrite_entrypoint(Rewriter *r) {\n    ELF *e = z_binary_get_elf(r->binary);\n    addr_t start_addr = z_elf_get_ori_entry(e);\n\n    // rewrite entrypoint\n    z_rewriter_rewrite(r, start_addr);\n\n    // update shadow start\n    addr_t shadow_start_addr = z_rewriter_get_shadow_addr(r, start_addr);\n    assert(shadow_start_addr != INVALID_ADDR);\n    z_binary_set_shadow_start(r->binary, shadow_start_addr);\n}\n\nZ_PRIVATE void z_rewriter_rewrite_main(Rewriter *r) {\n    if (r->__main_rewritten) {\n        EXITME(\"z_rewriter_rewrite_main already executed\");\n    }\n\n    ELF *e = z_binary_get_elf(r->binary);\n    addr_t main_addr = z_elf_get_main(e);\n\n    // rewrite main\n    z_rewriter_rewrite(r, main_addr);\n\n    // update shadow main\n    addr_t shadow_main_addr = z_rewriter_get_shadow_addr(r, main_addr);\n    assert(shadow_main_addr != INVALID_ADDR);\n    z_binary_set_shadow_main(r->binary, shadow_main_addr);\n\n    // update __main_rewritten\n    r->__main_rewritten = true;\n}\n\nZ_RESERVED Z_PRIVATE bool __rewriter_patch_utp(Rewriter *r, addr_t ori_addr) {\n    // [0] get instruction\n    cs_insn *inst =\n        z_disassembler_get_recursive_disasm(r->disassembler, ori_addr);\n    assert(inst != NULL);\n\n    // [1] get upt_addr\n    addr_t new_inst_addr = ori_addr;\n    addr_t utp_addr =\n        __rewriter_calculate_utp_addr(r, &new_inst_addr, inst->size);\n\n    // [2] validate uTP\n    if (utp_addr == INVALID_ADDR) {\n        return false;\n    }\n\n    // [3] get shadow_address\n    addr_t shadow_addr = z_rewriter_get_shadow_addr(r, ori_addr);\n    assert(shadow_addr != INVALID_ADDR);\n\n    // [4] generate utp trampoline\n    KS_ASM_JMP(utp_addr, shadow_addr);\n    z_binary_insert_utp(r->binary, utp_addr, ks_encode, ks_size);\n\n    // [5] generate patched code\n    // KS_ASM_JMP(new_inst_addr, utp_addr);\n\n    // [6] do patch\n    // ELF *e = z_binary_get_elf(r->binary);\n    // z_elf_write(e, new_inst_addr, ks_size, ks_encode);\n\n    // [7] patch prefix code\n    // if (new_inst_addr != ori_addr) {\n    //     size_t padding_size = new_inst_addr - ori_addr;\n    //     z_elf_write(e, ori_addr, padding_size,\n    //                 z_x64_gen_nop(padding_size));\n    // }\n\n    // [8] update count\n    r->patched_unsafe_bg_count++;\n    return true;\n}\n\nZ_RESERVED Z_PRIVATE addr_t __rewriter_calculate_utp_addr(Rewriter *r,\n                                                          addr_t *inst_addr,\n                                                          size_t inst_size) {\n    ELF *e = z_binary_get_elf(r->binary);\n    bool is_pie = z_elf_get_is_pie(e);\n\n    // [1] get offset buf\n    uint64_t tmp = 0;\n    addr_t ori_inst_addr = *inst_addr;\n    z_elf_read(e, ori_inst_addr, 8, (uint8_t *)(&tmp));\n\n    // [2] prepare init pointer\n    uint8_t *buffer = (uint8_t *)(&tmp) + 1;\n    size_t buffer_size = inst_size - 1;\n\n    // [3] prepare a utp snode\n    Snode *utp =\n        z_snode_create(0, __rewriter_get_hole_len(X86_INS_JMP), NULL, NULL);\n\n    // [4] Brute-force OP inst_addr\n    while ((int64_t)buffer_size >= 0) {\n        int32_t *offset = (int32_t *)buffer;\n\n        // [4.1] pre-check for non-pie (avoid cache miss)\n        if (!is_pie && buffer[3] > 0x7f)\n            goto NEXT;\n\n        // [4.2] initial offset buf (a trick to avoid conflict)\n        memset(buffer, 1, buffer_size);\n        int32_t ori_offset = *offset;\n\n        // [4.3] brute-force offset\n        int64_t utp_addr = 0;\n        do {\n            utp_addr = (int64_t)(*inst_addr + 5) + (int64_t)(*offset);\n\n            if (is_pie || utp_addr >= 0) {\n                z_snode_set_addr(utp, (addr_t)utp_addr);\n                if (z_elf_check_region_free(e, utp)) {\n                    z_snode_destroy(utp);\n                    return (addr_t)utp_addr;\n                }\n            }\n\n            if (buffer_size == 0)\n                goto NEXT;\n\n            for (int32_t i = buffer_size - 1; i >= 0; i--) {\n                if (buffer[i] != 0xff) {\n                    buffer[i] += 1;\n                    break;\n                } else {\n                    buffer[i] = 0x00;\n                }\n            }\n        } while (*offset != ori_offset);\n\n    NEXT:\n        // [4.4] check next OP inst_addr\n        (*inst_addr)++;\n        buffer++;\n        buffer_size--;\n    }\n\n    // [5] failed\n    z_trace(\"fail to find suitable uTP address: %#lx\", ori_inst_addr);\n    z_snode_destroy(utp);\n    return INVALID_ADDR;\n}\n\nZ_PRIVATE int __rewriter_compare_address(addr_t x, addr_t y, void *_z) {\n    if (x == y)\n        return 0;\n    else if (x > y)\n        return 1;\n    else\n        return -1;\n}\n\nZ_PRIVATE void __rewriter_emit_trampoline(Rewriter *r, addr_t addr) {\n#ifndef BINARY_SEARCH_INVALID_CRASH\n    UCFG_Analyzer *ucfg_analyzer =\n        z_disassembler_get_ucfg_analyzer(r->disassembler);\n\n    if (g_hash_table_lookup(r->instrumentation_free_bbs,\n                            GSIZE_TO_POINTER(addr)) ||\n        z_ucfg_analyzer_is_security_chk_failed(ucfg_analyzer, addr)) {\n        // instrumentation-free blocks do not need trampoline\n        return;\n    }\n\n    FLGState flg_state =\n        z_ucfg_analyzer_get_flg_need_write(ucfg_analyzer, addr);\n    GPRState gpr_state = z_ucfg_analyzer_get_gpr_can_write(ucfg_analyzer, addr);\n\n    // update total number of tramplines\n    r->afl_trampoline_count += 1;\n\n    // update gpr state\n    if (gpr_state) {\n        r->optimized_gpr_count += 1;\n    }\n\n    if (!flg_state) {\n        // no need to store eflags\n        r->optimized_flg_count += 1;\n\n        TP_EMIT(bitmap, addr, gpr_state);\n        z_binary_insert_shadow_code(r->binary, tp_code, tp_size);\n    } else {\n        // need to store eflags\n        TP_EMIT(context_save);\n        z_binary_insert_shadow_code(r->binary, tp_code, tp_size);\n\n        TP_EMIT(bitmap, addr, gpr_state & (~GPRSTATE_RAX));\n        z_binary_insert_shadow_code(r->binary, tp_code, tp_size);\n\n        TP_EMIT(context_restore);\n        z_binary_insert_shadow_code(r->binary, tp_code, tp_size);\n    }\n#endif\n}\n\nZ_PRIVATE void __rewriter_fillin_shadow_hole(Rewriter *r, GHashTable *holes) {\n    GList *shadow_addrs = g_hash_table_get_keys(holes);\n    ELF *e = z_binary_get_elf(r->binary);\n\n    for (GList *l = shadow_addrs; l != NULL; l = l->next) {\n        addr_t shadow_inst_addr = (addr_t)(l->data);\n        addr_t ori_tar_addr = (addr_t)g_hash_table_lookup(\n            holes, GSIZE_TO_POINTER(shadow_inst_addr));\n\n        addr_t shadow_tar_addr = (addr_t)g_hash_table_lookup(\n            r->rewritten_bbs, GSIZE_TO_POINTER(ori_tar_addr));\n        if (shadow_tar_addr == 0) {\n            // XXX: ignore invalid hole as it may be false instruction\n            z_warn(\"an invalid hole: %#lx <- %#lx\", ori_tar_addr,\n                   shadow_inst_addr);\n            continue;\n        }\n\n        // get id and hole size\n        uint32_t inst_id;\n        z_elf_read(e, shadow_inst_addr, sizeof(uint32_t),\n                   (uint8_t *)(&inst_id));\n\n#ifndef NSINGLE_SUCC_OPT\n        // check whether we need to do optimization\n        if (!r->opts->disable_opt) {\n            if ((int32_t)inst_id < 0) {\n                // it is a trampoline-free transfer\n                inst_id = (~inst_id) + 1;\n                shadow_tar_addr = (addr_t)g_hash_table_lookup(\n                    r->shadow_code, GSIZE_TO_POINTER(ori_tar_addr));\n            }\n        } else {\n            assert((int32_t)inst_id >= 0);\n        }\n#endif\n\n        size_t hole_size = __rewriter_get_hole_len(inst_id);\n\n        // generate code\n        KS_ASM(shadow_inst_addr, \"%s %#lx\", cs_insn_name(cs, inst_id),\n               shadow_tar_addr);\n        z_elf_write(e, shadow_inst_addr, ks_size, ks_encode);\n\n        // padding hole\n        assert(ks_size <= hole_size);\n        if (ks_size < hole_size) {\n            z_elf_write(e, shadow_inst_addr + ks_size, hole_size - ks_size,\n                        z_x64_gen_nop(hole_size - ks_size));\n        }\n    }\n\n    g_list_free(shadow_addrs);\n}\n\nZ_PRIVATE cs_insn *__rewriter_translate_shadow_inst(Rewriter *r, cs_insn *inst,\n                                                    addr_t ori_addr) {\n    cs_detail *detail = inst->detail;\n\n    for (int32_t i = 0; i < detail->x86.op_count; i++) {\n        cs_x86_op *op = &(detail->x86.operands[i]);\n        if (op->type == X86_OP_MEM &&\n            (op->mem.base == X86_REG_RIP || op->mem.base == X86_REG_EIP)) {\n            goto TRANSLATE_RIP_INS;\n        }\n    }\n\n    // PC non-related instruction, directly return\n    assert(strstr(inst->op_str, \"rip\") == NULL);\n    assert(strstr(inst->op_str, \"eip\") == NULL);\n    return inst;\n\nTRANSLATE_RIP_INS:\n    z_trace(\n        \"rip-related memory access \"\n        \"instruction \" CS_SHOW_INST(inst));\n\n    const char *pc_regname = NULL;\n    if (strstr(inst->op_str, \"eip\")) {\n        z_warn(\"translate eip-related instruction: \" CS_SHOW_INST(inst));\n        pc_regname = \"eip\";\n    } else {\n        pc_regname = \"rip\";\n    }\n\n    // step [1]. generate asmline fmt (FMTSTR ATTACK!!!)\n    int64_t op_mem_disp = 0;\n\n    // step [1.1]. generate mnemonic\n    z_snprintf(asmline_fmt, ASMLINE_FMT_SIZE, \"%s\\t\", inst->mnemonic);\n\n    // step [1.2]. generate operands\n    for (int32_t i = 0; i < detail->x86.op_count; i++) {\n        cs_x86_op *op = &(detail->x86.operands[i]);\n        switch (op->type) {\n            case X86_OP_REG:\n                assert(op->reg != X86_REG_RIP);\n                assert(op->reg != X86_REG_EIP);\n                z_snprintf(asmline_fmt + z_strlen(asmline_fmt),\n                           ASMLINE_FMT_SIZE - z_strlen(asmline_fmt), \"%s, \",\n                           cs_reg_name(cs, op->reg));\n                continue;\n            case X86_OP_IMM:\n                z_snprintf(asmline_fmt + z_strlen(asmline_fmt),\n                           ASMLINE_FMT_SIZE - z_strlen(asmline_fmt), \"%#lx, \",\n                           op->imm);\n                continue;\n            case X86_OP_MEM:\n                assert(op->mem.base == X86_REG_RIP ||\n                       op->mem.base == X86_REG_EIP);\n                assert(op->mem.index == X86_REG_INVALID);\n\n                /*\n                 * XXX: keystone and capstone bug! For more information, please\n                 * refer to\n                 * https://github.com/keystone-engine/keystone/issues/92\n                 */\n                // TODO: build our own keystone and capstone (HUG!)\n                size_t hooked_size = op->size;\n                if (inst->id == X86_INS_COMISS) {\n                    hooked_size = 4;\n                } else if (inst->id == X86_INS_COMISD) {\n                    hooked_size = 8;\n                }\n\n                switch (hooked_size) {\n                    case 1:\n                        z_strcpy(asmline_fmt + z_strlen(asmline_fmt),\n                                 \"byte ptr [%s%+ld], \");\n                        break;\n                    case 2:\n                        z_strcpy(asmline_fmt + z_strlen(asmline_fmt),\n                                 \"word ptr [%s%+ld], \");\n                        break;\n                    case 4:\n                        z_strcpy(asmline_fmt + z_strlen(asmline_fmt),\n                                 \"dword ptr [%s%+ld], \");\n                        break;\n                    case 8:\n                        z_strcpy(asmline_fmt + z_strlen(asmline_fmt),\n                                 \"qword ptr [%s%+ld], \");\n                        break;\n                    case 10:\n                        z_strcpy(asmline_fmt + z_strlen(asmline_fmt),\n                                 \"fword ptr [%s%+ld], \");\n                        break;\n                    case 16:\n                        z_strcpy(asmline_fmt + z_strlen(asmline_fmt),\n                                 \"xmmword ptr [%s%+ld], \");\n                        break;\n                    default:\n                        z_strcpy(asmline_fmt + z_strlen(asmline_fmt),\n                                 \"[%s%+ld], \");\n                        break;\n                }\n                op_mem_disp = op->mem.disp;\n                continue;\n            default:\n                EXITME(\"invalid op type \" CS_SHOW_INST(inst));\n        }\n    }\n\n    // step [1.3]. add NULL at last comma\n    assert(asmline_fmt[z_strlen(asmline_fmt) - 2] == ',');\n    asmline_fmt[z_strlen(asmline_fmt) - 2] = '\\x00';\n    z_trace(\"generated asmline_fmt: %s\", asmline_fmt);\n\n    const addr_t ori_pc = ori_addr + inst->size;\n    const addr_t shadow_addr = z_binary_get_shadow_code_addr(r->binary);\n\n    addr_t shadow_pc = shadow_addr + inst->size;\n\n    // step [2]. brute-force to find the suitable shadow_pc, starting from the\n    // most possible address (the longest meanful x64 instruction is 15-byte)\n    for (; shadow_pc < shadow_addr + 0x10; shadow_pc++) {\n        // step [2.1]. asm and disasm (FMTSTR ATTACK!!!)\n        KS_ASM(shadow_addr, asmline_fmt, pc_regname,\n               ori_pc - shadow_pc + op_mem_disp);\n        assert(ks_size > 0);\n        CS_DISASM_RAW(ks_encode, ks_size, shadow_addr, 1);\n        if (ks_size != cs_inst->size) {\n            EXITME(\"invalid instruction rewriting\");\n        }\n\n        // step [2.2]. check and re-fit next pc address\n        if (shadow_addr + cs_inst->size == shadow_pc) {\n            // nice, break\n            break;\n        } else if (shadow_addr + cs_inst->size < shadow_pc) {\n            // for short instruction,\n            // easy to padding nop\n            size_t padding_size = shadow_pc - cs_inst->size - shadow_addr;\n            z_binary_insert_shadow_code(r->binary, z_x64_gen_nop(padding_size),\n                                        padding_size);\n            break;\n        }\n\n        // we need to check bigger shadow pc\n    }\n\n    assert(z_binary_get_shadow_code_addr(r->binary) + cs_inst->size ==\n           shadow_pc);\n    return (cs_insn *)cs_inst;\n}\n\nZ_PRIVATE void __rewriter_generate_shadow_inst(Rewriter *r, GHashTable *holes,\n                                               cs_insn *inst, addr_t ori_addr,\n                                               bool bb_entry) {\n    // step [0]. get next address, we must do this before translation\n    addr_t ori_next_addr = ori_addr + inst->size;\n\n    /*\n     * XXX: for the basic block entrypoints' address mapping, there is a silght\n     * difference between r->rewritten_bbs and LOOKUP_TABLE:\n     *      r->rewritten_bbs maps the bb entrypoint address to its corresponding\n     * bitmap code's shadow address (it serves for handlers to find shadow\n     * tranfer target);\n     *      LOOKUP_TABLE maps the bb entrypoint address to its own shadow\n     * address (it serves for on-the-fly translattion of indirect call/jmp);\n     *\n     * In short, for a given bb entrypoint, r->rewritten_bbs's mapping\n     * value is always samller than LOOKUP_TABLE's.\n     *\n     */\n    /*\n     * XXX: it is very important to distinguish r->rewritten_bbs and\n     * LOOKUP_TABLE. Note that LOOKUP_TABLE is used for indirect call/jmp's\n     * dynamic mapping, and these indirect call/jmp's targets are very different\n     * to identify. Hence, it is possible that their targets are already\n     * rewritten but not identified as block entrypoints. In that case, the best\n     * we can do is to instrument AFL_TRAMPOLINE at the tail of these indirect\n     * call/jmp, and directly tranfer to the shadow address (w/o\n     * AFL_TRAMPOLINE). And r->rewritten_bbs is used for direct call/jmp at\n     * rewriting time. When rewriting a direct call/jmp, it is possible its\n     * target is not rewritten. Hence, we use holes and r->rewritten_bbs to lazy\n     * update the target address. As these direct call/jmp's targets can always\n     * be identified as block entrypoints, we do not need to instrument\n     * AFL_TRAMPOLINE at their tails (to reduce memory usage).\n     *\n     */\n\n    // step [1]. handle entry of basic block\n    if (bb_entry) {\n        size_t shadow_addr = z_binary_get_shadow_code_addr(r->binary);\n        // step [1.1]. update rewritten_bbs\n        if (!g_hash_table_lookup(r->rewritten_bbs,\n                                 GSIZE_TO_POINTER(ori_addr))) {\n            g_hash_table_insert(r->rewritten_bbs, GSIZE_TO_POINTER(ori_addr),\n                                GSIZE_TO_POINTER(shadow_addr));\n        }\n\n        // step [1.2]. place an endbr64 at the beginning if the original inst is\n        // endbr64\n        if (inst->id == X86_INS_ENDBR64) {\n            z_binary_insert_shadow_code(r->binary, inst->bytes, inst->size);\n        }\n\n        // step [1.3]. insert trampolines based on optimization\n        __rewriter_emit_trampoline(r, ori_addr);\n    }\n\n    // step [2]. update shadow code\n    if (!g_hash_table_lookup(r->shadow_code, GSIZE_TO_POINTER(ori_addr))) {\n        size_t shadow_addr = z_binary_get_shadow_code_addr(r->binary);\n        // we store the first apperance of each instruction\n        g_hash_table_insert(r->shadow_code, GSIZE_TO_POINTER(ori_addr),\n                            GSIZE_TO_POINTER(shadow_addr));\n        z_binary_update_lookup_table(r->binary, ori_addr, shadow_addr);\n    }\n\n    if (r->opts->trace_pc) {\n        // trace previous pc\n        KS_ASM_CONST_MOV(RW_PAGE_INFO_ADDR(prev_pc), ori_addr);\n        z_binary_insert_shadow_code(r->binary, ks_encode, ks_size);\n    }\n\n#ifdef DEBUG\n    __debug_printf(\"%#lx -> %#lx:\\n\", ori_addr,\n                   z_binary_get_shadow_code_addr(r->binary));\n    __debug_printf(\"\\told inst \" CS_SHOW_INST(inst));\n    __debug_printf(\"\\n\");\n#endif\n    // step [3]. translate rip-related instrution\n    //      XXX: note that inserting any new code between step [3] and step [4]\n    //      will cause wrong instrumentation.\n    inst = __rewriter_translate_shadow_inst(r, inst, ori_addr);\n\n#ifdef DEBUG\n    __debug_printf(\"\\tnew inst \" CS_SHOW_INST(inst));\n    __debug_printf(\"\\n\");\n#endif\n\n    // step [4]. check handlers\n    RHandler **handlers = (RHandler **)z_buffer_get_raw_buf(r->handlers);\n    size_t n = z_buffer_get_size(r->handlers) / sizeof(RHandler *);\n    for (size_t i = 0; i < n; i++) {\n        REvent event = z_rhandler_get_event(handlers[i]);\n        RHandlerFcn fcn = z_rhandler_get_fcn(handlers[i]);\n        if ((*event)(inst)) {\n            // XXX: note that the inst->address is incorrect here\n            (*fcn)(r, holes, inst, ori_addr, ori_next_addr);\n            return;\n        }\n    }\n\n    // for unhandled instruction, we simply rewrite it\n    z_binary_insert_shadow_code(r->binary, inst->bytes, inst->size);\n    return;\n}\n\n/*\n * XXX: For every BB entrypoint found by Disassembler, Rewriter should not\n * inject any AFL_TRAMPOLINE until a terminator instruction is found. In other\n * words, if we consider a basic block's complete body starting from its\n * entrypoint and ending at its terminator instruction (ret, jmp, int, or any\n * other preivilege instrution), every disassembler-found basic block should\n * have its own unique copy of its complete body, and these copies do not\n * overlap with each other.\n *\n * Maintaining this property will help us on:\n *      1. Any Disassembler's false positive of indentifying basic blocks will\n *      not impact Rewriter's correctness;\n *      2. Fuzzing speed can be optimized. When a basic block is reached, it is\n *      meaningless to record any fall-through edge within this basic block.\n *      This design naturely avoids recording these fall-through edges;\n *\n * Variable *bb_entry* is the key to maintain this property.\n */\n/*\n * XXX: Above idea is very reasonable. *However*, it does not consider the cache\n * hit rate and forking overhead. In other words, if every disassembly-found\n * block has an own copy of its complete body, the memory usage will increase.\n * Therefore, the cache hit rate will quickly drop down. When the program is\n * large (e.g., openssl), the missing hit will siginificately influence the\n * execution speed. We have test the fuzzing speed w/ and w/o above\n * optimization, and the results are shown this optimization does hurt\n * performance.\n *\n * Hence, we decide to disable this optimization right now.\n */\n/*\n * XXX: FALL_THROUGH opt can be enabled by jumping over the trampoline. However,\n * considering we can almost elimiate all EFLAGS saving, the overhead of an AFL\n * trampoline may be smaller than the one caused by a jump instruction.\n * TODO: decide whether we need to enable FALL_THROUGH (note that in SotchFuzz\n * paper, this optimization is enabled)\n */\n/*\n * XXX: It is ok for our tool to instrument false instructions or block\n * entrypoint, as long as the false rate it limited. Note that when the false\n * rate increases, the number of memory usage (influencing cache hit rate) and\n * extra-false AFL_TRAMPOLINE will increase, reasulting a low execution speed.\n * That is why we abandon pre- linear disassembly.\n *\n * However, keep in mind that, for our tool, it is very critical to avoid\n * missing any instruction or block entrypoint.\n */\nZ_PRIVATE void __rewriter_generate_shadow_block(\n    Rewriter *r, GHashTable *holes, GQueue *instructions, addr_t bb_addr,\n    cs_insn *(*disasm_func)(Disassembler *, addr_t)) {\n    // step [1]. basic information\n    cs_insn *inst = NULL;\n    addr_t ori_addr = bb_addr;\n    bool bb_entry = true;  // whether next instrution is a BB entrypoint\n\n    // step [2]. check whether this block is handled\n    if (g_hash_table_lookup(r->rewritten_bbs, GSIZE_TO_POINTER(bb_addr))) {\n        // we already rewrite this basic block\n        return;\n    }\n\n    // step [3]. rewrite code one by one\n    do {\n        inst = (*disasm_func)(r->disassembler, ori_addr);\n\n        // step [3.1]. check this address is valid and update instructions\n        //      Note that it is possible inst is NULL, as no-return / inline\n        //      data may cause incorrect disasm.\n        if (!inst) {\n            // XXX: it is important to insert an invalid instruction to\n            // terminate the incorrect control flow, for effective unintentional\n            // crash detection.\n\n            // step [3.1.1]. update shadow code\n            if (!g_hash_table_lookup(r->shadow_code,\n                                     GSIZE_TO_POINTER(ori_addr))) {\n                size_t shadow_addr = z_binary_get_shadow_code_addr(r->binary);\n                g_hash_table_insert(r->shadow_code, GSIZE_TO_POINTER(ori_addr),\n                                    GSIZE_TO_POINTER(shadow_addr));\n                z_binary_update_lookup_table(r->binary, ori_addr, shadow_addr);\n            }\n\n            // step [3.1.2]. insert invalid instruction\n            z_binary_insert_shadow_code(r->binary, z_x64_gen_invalid(1), 1);\n            return;\n        }\n\n        // XXX: instructions was used to build bridges by Rewriter, which is\n        // no longer supported currently.\n        if (instructions) {\n            if (!g_hash_table_lookup(r->shadow_code,\n                                     GSIZE_TO_POINTER(ori_addr))) {\n                g_queue_push_tail(instructions, GSIZE_TO_POINTER(ori_addr));\n            }\n        }\n\n#ifdef BINARY_SEARCH_DEBUG_REWRITER\n        if (ori_addr <= BINARY_SEARCH_DEBUG_REWRITER) {\n            if (bb_entry) {\n                g_hash_table_insert(r->rewritten_bbs,\n                                    GSIZE_TO_POINTER(ori_addr),\n                                    GSIZE_TO_POINTER(ori_addr));\n            }\n            g_hash_table_insert(r->shadow_code, GSIZE_TO_POINTER(ori_addr),\n                                GSIZE_TO_POINTER(ori_addr));\n            z_binary_update_lookup_table(r->binary, ori_addr, ori_addr);\n            z_elf_write(r->binary->elf, ori_addr, inst->size, inst->bytes);\n        } else\n#endif\n        {\n            // step [3.2]. rewrite the single instruction\n            __rewriter_generate_shadow_inst(r, holes, inst, ori_addr, bb_entry);\n        }\n\n        bb_entry = !!z_disassembler_is_potential_block_entrypoint(\n            r->disassembler, ori_addr + inst->size);\n\n#ifdef FALL_THROUGH_OPT\n        if (bb_entry &&\n            !(z_capstone_is_cjmp(inst) || z_capstone_is_loop(inst) ||\n              z_capstone_is_terminator(inst))) {\n            // XXX: insert a short jmp instruction here\n        }\n#endif\n\n        // step [3.3]. update cur_addr\n        ori_addr += inst->size;\n    } while (!z_capstone_is_terminator(inst));\n\n    return;\n}\n\nZ_RESERVED Z_PRIVATE void __rewriter_build_bridges(Rewriter *r,\n                                                   GQueue *instructions) {\n    assert(r != NULL && instructions != NULL);\n\n    ELF *e = z_binary_get_elf(r->binary);\n    bool prev_patched = false;\n    addr_t prev_addr = INVALID_ADDR;\n\n    while (!g_queue_is_empty(instructions)) {\n        addr_t cur_addr = (addr_t)g_queue_pop_tail(instructions);\n\n        assert(prev_addr > cur_addr);\n\n        cs_insn *ori_inst =\n            z_disassembler_get_recursive_disasm(r->disassembler, cur_addr);\n\n        assert(ori_inst != NULL);\n        assert(ori_inst->size + cur_addr <= prev_addr);\n\n        // get shadow_addr\n        addr_t shadow_addr = z_rewriter_get_shadow_addr(r, cur_addr);\n        assert(shadow_addr != INVALID_ADDR);\n\n        // check ori_inst->size\n        if (ori_inst->size >= __rewriter_get_hole_len(X86_INS_JMP)) {\n            // build bridge\n            KS_ASM_JMP(cur_addr, shadow_addr);\n            z_elf_write(e, cur_addr, ks_size, ks_encode);\n\n            // update statistic\n            r->patched_safe_bg_count++;\n            prev_patched = true;\n        } else {\n            // if previous instruction is patched, we ignore here\n            if (prev_patched) {\n                prev_patched = false;\n                goto NEXT;\n            }\n\n            // we only do crashed brideg on continued instructions\n            if (prev_addr != cur_addr + ori_inst->size) {\n                prev_patched = false;\n                goto NEXT;\n            }\n\n            cs_insn *prev_inst =\n                z_disassembler_get_recursive_disasm(r->disassembler, prev_addr);\n            assert(prev_inst != NULL);\n\n            // we only do patch within two instruction\n            if (ori_inst->size + prev_inst->size <\n                __rewriter_get_hole_len(X86_INS_JMP)) {\n                prev_patched = false;\n                goto NEXT;\n            }\n\n            // test for next instruction\n            uint8_t tmp_buf[16] = {0};\n            z_elf_read(e, cur_addr, sizeof(tmp_buf), tmp_buf);\n            KS_ASM_JMP(cur_addr, shadow_addr);\n            memcpy(tmp_buf, ks_encode, ks_size);\n\n            CS_DISASM_RAW(tmp_buf + ori_inst->size,\n                          sizeof(tmp_buf) - ori_inst->size,\n                          cur_addr + ori_inst->size, 1);\n\n            if (cs_count == 0) {\n                // invalid, nice\n                z_elf_write(e, cur_addr, ks_size, ks_encode);\n\n                // update statistic\n                r->patched_unsafe_bg_count++;\n                prev_patched = true;\n            } else {\n                prev_patched = false;\n            }\n        }\n\n    NEXT:\n        prev_addr = cur_addr;\n    }\n}\n\nZ_API Rewriter *z_rewriter_create(Disassembler *d, RewritingOptArgs *opts) {\n    Rewriter *r = STRUCT_ALLOC(Rewriter);\n\n    r->opts = opts;\n\n    r->disassembler = d;\n    r->binary = z_disassembler_get_binary(d);\n\n    // init basic information\n    r->shadow_code =\n        g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL);\n    r->rewritten_bbs =\n        g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL);\n\n    // init instrumentation information\n    r->instrumentation_free_bbs =\n        g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL);\n\n    // init potential returen address info\n    r->potential_retaddrs =\n        g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL);\n    r->unpatched_retaddrs = g_hash_table_new_full(\n        g_direct_hash, g_direct_equal, NULL, (GDestroyNotify)&z_buffer_destroy);\n\n    // init statistical data\n    r->patched_safe_bg_count = 0;\n    r->patched_unsafe_bg_count = 0;\n    r->afl_trampoline_count = 0;\n    r->optimized_flg_count = 0;\n    r->optimized_gpr_count = 0;\n    r->optimized_single_succ = 0;\n\n    // init handlers\n    r->handlers = z_buffer_create(NULL, 0);\n\n    r->__main_rewritten = false;\n\n    __rewriter_init_predefined_handler(r);\n\n#ifdef DEBUG\n    __debug_file = fopen(\"shadow.log\", \"w\");\n#endif\n\n    return r;\n}\n\n// XXX: note that its underlying disassembly (linear) is not completed.\n// XXX: useless and hence unused!\nZ_RESERVED Z_API void z_rewriter_heuristics_rewrite(Rewriter *r) {\n    assert(r != NULL);\n\n    if (!r->__main_rewritten) {\n        EXITME(\n            \"z_rewriter_heuristics_rewrite should execute after \"\n            \"z_rewriter_rewrite_main\");\n    }\n\n    // step [1]. request disassembler to recursive disassemble code\n    GQueue *new_bbs = z_disassembler_linear_disasm(r->disassembler);\n    z_trace(\"find %d new basic blocks by linear disassembly\",\n            g_queue_get_length(new_bbs));\n\n    g_queue_sort(new_bbs, (GCompareDataFunc)__rewriter_compare_address, NULL);\n\n    // step [2]. prepare cf_related hole\n    GHashTable *cf_related_holes =\n        g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL);\n\n    // step [3]. rewrite all new basci blocks\n    while (!g_queue_is_empty(new_bbs)) {\n        addr_t bb_addr = (addr_t)g_queue_pop_head(new_bbs);\n\n        // rewrite the whole basic block\n        __rewriter_generate_shadow_block(r, cf_related_holes, NULL, bb_addr,\n                                         &z_disassembler_get_linear_disasm);\n    }\n\n    // step [4]. fill in all cf_related holes\n    __rewriter_fillin_shadow_hole(r, cf_related_holes);\n\n    // step [5]. destroy structure to avoid memleak\n    g_hash_table_destroy(cf_related_holes);\n    g_queue_free(new_bbs);\n\n    if (r->opts->count_conflict) {\n        __rewriter_count_conflicted_ids(r);\n    }\n}\n\nZ_API void z_rewriter_destroy(Rewriter *r) {\n    RHandler **handlers = (RHandler **)z_buffer_get_raw_buf(r->handlers);\n    for (int32_t i = 0; i < z_buffer_get_size(r->handlers) / sizeof(RHandler *);\n         i++)\n        z_rhandler_destroy(handlers[i]);\n    z_buffer_destroy(r->handlers);\n\n    g_hash_table_destroy(r->shadow_code);\n    g_hash_table_destroy(r->rewritten_bbs);\n\n    g_hash_table_destroy(r->instrumentation_free_bbs);\n\n    g_hash_table_destroy(r->potential_retaddrs);\n    g_hash_table_destroy(r->unpatched_retaddrs);\n\n    z_free(r);\n\n#ifdef DEBUG\n    fclose(__debug_file);\n#endif\n}\n\nZ_API void z_rewriter_register_handler(Rewriter *r, REvent event,\n                                       RHandlerFcn fcn) {\n    RHandler *handler = z_rhandler_create(event, fcn);\n    z_buffer_append_raw(r->handlers, (uint8_t *)(&handler), sizeof(RHandler *));\n}\n\nZ_API void z_rewriter_rewrite(Rewriter *r, addr_t new_addr) {\n    assert(r != NULL);\n\n    z_trace(\"rewrite new target: %#lx\", new_addr);\n\n    // step [1]. request disassembler to recursive disassemble code\n    // XXX: it is important that we have to rewrite those new basic blocks each\n    // time we call z_disassembler_recursive_disasm. Or in other words,\n    // z_disassembler_recursive_disasm can only be called in z_rewriter_rewrite.\n    GQueue *new_bbs =\n        z_disassembler_recursive_disasm(r->disassembler, new_addr);\n    z_trace(\"find %d new basic blocks\", g_queue_get_length(new_bbs));\n\n    g_queue_sort(new_bbs, (GCompareDataFunc)__rewriter_compare_address, NULL);\n\n    // step [2]. prepare cf_related hole\n    GHashTable *cf_related_holes =\n        g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL);\n\n    // step [3]. rewrite all new basci blocks\n    while (!g_queue_is_empty(new_bbs)) {\n        addr_t bb_addr = (addr_t)g_queue_pop_head(new_bbs);\n\n        // rewrite the whole basic block\n        __rewriter_generate_shadow_block(r, cf_related_holes, NULL, bb_addr,\n                                         &z_disassembler_get_recursive_disasm);\n    }\n\n    // step [4]. fill in all cf_related holes\n    __rewriter_fillin_shadow_hole(r, cf_related_holes);\n\n    // step [5]. destroy structure to avoid memleak\n    g_hash_table_destroy(cf_related_holes);\n    g_queue_free(new_bbs);\n\n    if (r->opts->count_conflict) {\n        __rewriter_count_conflicted_ids(r);\n    }\n}\n\nZ_API void z_rewriter_optimization_stats(Rewriter *r) {\n    z_info(\"number of optimized FLG savings: %6d / %d\", r->optimized_flg_count,\n           r->afl_trampoline_count);\n    z_info(\"number of optimized GPR savings: %6d / %d\", r->optimized_gpr_count,\n           r->afl_trampoline_count);\n    z_info(\"number of optimized trampolines: %6d / %d\",\n           r->optimized_single_succ, r->afl_trampoline_count);\n}\n\nZ_API addr_t z_rewriter_get_shadow_addr(Rewriter *r, addr_t addr) {\n    addr_t shadow_addr =\n        (addr_t)g_hash_table_lookup(r->rewritten_bbs, GSIZE_TO_POINTER(addr));\n\n    if (!shadow_addr) {\n        shadow_addr =\n            (addr_t)g_hash_table_lookup(r->shadow_code, GSIZE_TO_POINTER(addr));\n    }\n\n    if (shadow_addr) {\n        return shadow_addr;\n    } else {\n        return INVALID_ADDR;\n    }\n}\n\nZ_API bool z_rewriter_check_retaddr_crashpoint(Rewriter *r, addr_t addr) {\n    // XXX: there is a special case where the correspoind callee\n    // (potential_retaddrs) exists but the related retaddrs (unpatched_retaddrs)\n    // do not. Specifically, It is possible that:\n    //  1. addresses A and B are found as the retaddrs of a callee X.\n    //  2. B is detected and X is marked as returnable. Hence, A, as a related\n    //  retaddr of X, should be patched.\n    //  3. However, A is additionally serving as a BRIDGE_POINT, which will not\n    //  be patched actually.\n    //  4. A is detected (during next execution). At this point, A is in\n    //  potential_retaddrs but X is not in unpatched_retaddrs\n    addr_t callee = (addr_t)g_hash_table_lookup(r->potential_retaddrs,\n                                                GSIZE_TO_POINTER(addr));\n    if (!callee) {\n        return false;\n    } else {\n        return !!g_hash_table_lookup(r->unpatched_retaddrs,\n                                     GSIZE_TO_POINTER(callee));\n    }\n}\n\n// XXX: every time we find a new retaddr, we will return all the unpatched\n// retaddrs which share the same callee with this given retaddr.\nZ_API Buffer *z_rewriter_new_validate_retaddr(Rewriter *r, addr_t retaddr) {\n    // step (1). find corresponding callee\n    addr_t callee = (addr_t)g_hash_table_lookup(r->potential_retaddrs,\n                                                GSIZE_TO_POINTER(retaddr));\n    if (!callee) {\n        // XXX: theoretically this branch cannot be reached, but when we have\n        // different rewriting order than last execution, the logged crashpoints\n        // may force the program to go into this branch.\n        return z_buffer_create(NULL, 0);\n    }\n\n    // step (2). get all retaddrs and remove the entity\n    Buffer *buf = (Buffer *)g_hash_table_lookup(r->unpatched_retaddrs,\n                                                GSIZE_TO_POINTER(callee));\n    assert(buf);\n    g_hash_table_steal(r->unpatched_retaddrs, GSIZE_TO_POINTER(callee));\n\n    return buf;\n}\n\nZ_API void z_rewriter_initially_rewrite(Rewriter *r) {\n    if (r->opts->instrument_early) {\n        z_rewriter_rewrite_entrypoint(r);\n    } else {\n        z_rewriter_rewrite_beyond_main(r);\n        z_rewriter_rewrite_main(r);\n    }\n}\n"
  },
  {
    "path": "src/rewriter.h",
    "content": "/*\n * rewriter.h\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n#ifndef __REWRITER_H\n#define __REWRITER_H\n\n#include \"binary.h\"\n#include \"buffer.h\"\n#include \"config.h\"\n#include \"disassembler.h\"\n#include \"sys_optarg.h\"\n\n#include <gmodule.h>\n\nSTRUCT(Rewriter, {\n    // Binary which nees to rewrite\n    Binary *binary;\n\n    // Disassembler\n    Disassembler *disassembler;\n\n    // Handlers\n    Buffer *handlers;\n\n    // Basic information\n    GHashTable *shadow_code;\n    GHashTable *rewritten_bbs;\n\n    /*\n     * meta-info for CP_RETADDR\n     */\n    // XXX: note that when pdisasm is fully supported, CP_RETADDR is disabled.\n    // XXX: CP_RETADDR is only used for unknown library functions, which means\n    // it is not for those internal calls or white-listed library calls.\n\n    // patched retaddr, which is potential to be crashpoint\n    GHashTable *potential_retaddrs;\n    // for a given callee, all unpatched retaddr crashpoints associated with it\n    GHashTable *unpatched_retaddrs;  // callee -> retaddrs\n\n    // instrumentaion-free blocks (following a security_check predicate)\n    GHashTable *instrumentation_free_bbs;\n\n    // Statistical data\n    size_t patched_safe_bg_count;\n    size_t patched_unsafe_bg_count;\n    size_t afl_trampoline_count;\n    size_t optimized_flg_count;\n    size_t optimized_gpr_count;\n    size_t optimized_single_succ;\n\n    // Internal data\n    bool __main_rewritten;\n\n    // rewriting optargs\n    RewritingOptArgs *opts;\n});\n\n// which instruction needs to be handled\ntypedef bool (*REvent)(const cs_insn *);\n\n// how to rewrite the instruction\ntypedef void (*RHandlerFcn)(Rewriter *, GHashTable *, cs_insn *,\n                            addr_t ori_addr, addr_t ori_next_addr);\n\nSTRUCT(RHandler, {\n    REvent event;\n    RHandlerFcn fcn;\n});\n\nDECLARE_GETTER(RHandler, rhandler, REvent, evnet);\nDECLARE_GETTER(RHandler, rhandler, RHandlerFcn, fcn);\n\n/*\n * Create a REvent\n */\nZ_API RHandler *z_rhandler_create(REvent event, RHandlerFcn fcn);\n\n/*\n * Destroy a REvent\n */\nZ_API void z_rhandler_destroy(RHandler *handler);\n\n/*\n * Create a rewriter\n */\nZ_API Rewriter *z_rewriter_create(Disassembler *d, RewritingOptArgs *opts);\n\n/*\n * Destroy a rewrite\n */\nZ_API void z_rewriter_destroy(Rewriter *r);\n\n/*\n * Register a handler for rewriter\n */\nZ_API void z_rewriter_register_handler(Rewriter *r, REvent event,\n                                       RHandlerFcn fcn);\n\n/*\n * Rewrite based on known knowledge\n */\nZ_API void z_rewriter_rewrite(Rewriter *r, addr_t new_addr);\n\n/*\n * Get the shadow address of given addr\n */\nZ_API addr_t z_rewriter_get_shadow_addr(Rewriter *r, addr_t addr);\n\n/*\n * Initial rewriting for those addresses known to be code\n */\nZ_API void z_rewriter_initially_rewrite(Rewriter *r);\n\n/*\n * Heuristics rewriting after rewriting main\n */\nZ_RESERVED Z_API void z_rewriter_heuristics_rewrite(Rewriter *r);\n\n/*\n * Check whether the address is a potential return address which is already\n * rewritten\n */\nZ_API bool z_rewriter_check_retaddr_crashpoint(Rewriter *r, addr_t addr);\n\n/*\n * Find a new validate retaddr and return all retaddrs who share the same call\n * with given retaddr. Note that destorying returned Buffer is not this\n * function's responsibility.\n */\nZ_API Buffer *z_rewriter_new_validate_retaddr(Rewriter *r, addr_t retaddr);\n\n/*\n * Show optimization stats\n */\nZ_API void z_rewriter_optimization_stats(Rewriter *r);\n\n#endif\n"
  },
  {
    "path": "src/rewriter_handlers/generate.py",
    "content": "import sys\nimport os\nimport re\n\nevent_re = re.compile(r\"\\s*#define\\s*REVENT\\s*(?P<event>\\S*)\\s*\")\nhandler_re = re.compile(r\"\\s*#define\\s*RHANDLER\\s*(?P<handler>\\S*)\\s*\")\n\n\ndef extract_c_file(c_file):\n    meta_info = {}\n\n    f = open(c_file, \"r\")\n    data = f.read()\n    f.close()\n    meta_info[\"c_file\"] = os.path.basename(c_file)\n\n    captured_event = event_re.search(data)\n    if captured_event is None:\n        print(\"generate.py: invalid format of handler plugin [no REVENT defined]\")\n        exit(-1)\n    meta_info[\"event\"] = captured_event.group(\"event\")\n\n    captured_handler = handler_re.search(data)\n    if captured_handler is None:\n        print(\"generate.py: invalid format of handler plugin [no RHANDLER defined]\")\n        exit(-1)\n    meta_info[\"handler\"] = captured_handler.group(\"handler\")\n\n    print(\"generate.py: find %s\" % meta_info)\n    return meta_info\n\n\ndef extend_buffer(buffer, handlers):\n    register_fcns = \"\"\n    for h in handlers:\n        buffer += '#include \"%s\"\\n' % h[\"c_file\"]\n        buffer += \"#undef REVENT\\n\"\n        buffer += \"#undef RHANDLER\\n\"\n        register_fcns += \"    z_rewriter_register_handler(r, %s, %s);\\n\" % (\n            h[\"event\"],\n            h[\"handler\"],\n        )\n\n    buffer += (\n        \"\"\"\nZ_PRIVATE void __rewriter_init_predefined_handler(Rewriter *r) {\n    %s\n}\n        \"\"\"\n        % register_fcns.strip()\n    )\n\n    return buffer\n\n\nif __name__ == \"__main__\":\n    if len(sys.argv) != 2:\n        print(\"generate.py: ./generate.py <directory>\")\n        exit(-1)\n\n    dir = sys.argv[1].strip()\n    in_file = os.path.join(dir, \"handler_main.in\")\n    if not os.path.exists(in_file):\n        print(\"generate.py: %s does not exist\" % in_file)\n        exit(-1)\n\n    f = open(in_file, \"r\")\n    buffer = f.read() + \"\\n\"\n    f.close()\n\n    handlers = []\n    for _file in os.listdir(dir):\n        if _file.endswith(\".c\"):\n            if \"main\" in _file:\n                continue\n            handlers.append(extract_c_file(os.path.join(dir, _file)))\n\n    buffer = extend_buffer(buffer, handlers)\n\n    out_file = os.path.join(dir, \"handler_main.c\")\n    f = open(out_file, \"w\")\n    f.write(buffer)\n    f.close()\n"
  },
  {
    "path": "src/rewriter_handlers/handler_call.c",
    "content": "/*\n * handler_call.c\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n#define REVENT z_capstone_is_call\n#define RHANDLER __rewriter_call_handler\n\n#define KS_ASM_PRESERVER_RETADDR(is_pie, shadow_addr, ori_next_addr) \\\n    do {                                                             \\\n        if ((is_pie)) {                                              \\\n            KS_ASM((shadow_addr),                                    \\\n                   \"  call next;\\n\"                                  \\\n                   \"next:\\n\"                                         \\\n                   \"  sub qword ptr [rsp], %#lx;\\n\",                 \\\n                   (shadow_addr) + 5 - (ori_next_addr));             \\\n        } else {                                                     \\\n            KS_ASM((shadow_addr), \"push %#lx\", (ori_next_addr));     \\\n        }                                                            \\\n    } while (0)\n\n/*\n * Rewriter handler for call instruction.\n */\nZ_PRIVATE void __rewriter_call_handler(Rewriter *r, GHashTable *holes,\n                                       cs_insn *inst, addr_t ori_addr,\n                                       addr_t ori_next_addr);\n\n/*\n * Check whether it is a library call\n */\nZ_PRIVATE const LFuncInfo *__rewriter_is_library_call(ELF *e, cs_insn *inst);\n\nZ_PRIVATE const LFuncInfo *__rewriter_is_library_call(ELF *e, cs_insn *inst) {\n    const LFuncInfo *rv = NULL;\n    addr_t got_addr = INVALID_ADDR;\n\n    cs_detail *detail = inst->detail;\n    if (detail->x86.op_count != 1) {\n        return NULL;\n    }\n\n    cs_x86_op *op = &(detail->x86.operands[0]);\n\n    if (op->type == X86_OP_IMM) {\n        // check call to PLT\n        rv = z_elf_get_plt_info(e, op->imm);\n    } else if (z_capstone_is_pc_related_ucall(inst, &got_addr) ||\n               (!z_elf_get_is_pie(e) &&\n                z_capstone_is_const_mem_ucall(inst, &got_addr))) {\n        // check call to GOT\n        rv = z_elf_get_got_info(e, got_addr);\n    }\n\n    if (!rv || rv->cfg_info == LCFG_OBJ || rv->ra_info == LRA_OBJ) {\n        return NULL;\n    } else {\n        return rv;\n    }\n}\n\nZ_PRIVATE void __rewriter_call_handler(Rewriter *r, GHashTable *holes,\n                                       cs_insn *inst, addr_t ori_addr,\n                                       addr_t ori_next_addr) {\n    if (inst->id == X86_INS_LCALL) {\n        // XXX: I am not so sure, but it seems lcall is no longer used in amd64\n        z_warn(\"false instruction detected \" CS_SHOW_INST(inst));\n        return;\n    }\n\n    cs_detail *detail = inst->detail;\n    cs_x86_op *op = &(detail->x86.operands[0]);\n\n    addr_t shadow_addr = z_binary_get_shadow_code_addr(r->binary);\n\n    ELF *e = z_binary_get_elf(r->binary);\n    bool is_pie = z_elf_get_is_pie(e);\n\n    // first let's correct the inst->address\n    inst->address = shadow_addr;\n\n    const LFuncInfo *lf_info = __rewriter_is_library_call(e, inst);\n\n    /*\n     * first handle library calls\n     */\n    if (lf_info) {\n        assert(detail->x86.op_count == 1);\n        if (op->type == X86_OP_IMM) {\n            // call to PLT\n            z_trace(\"find plt call %s @ %#lx\", lf_info->name, ori_addr);\n\n            addr_t callee_addr = op->imm;\n\n            if (r->opts->safe_ret) {\n                // direct write down the instruction\n                KS_ASM_CALL(shadow_addr, callee_addr);\n                z_binary_insert_shadow_code(r->binary, ks_encode, ks_size);\n                z_binary_new_retaddr_entity(r->binary, shadow_addr + ks_size,\n                                            ori_next_addr);\n            } else if (lf_info->ra_info == LRA_UNUSED) {\n                // direct write down the instruction\n                KS_ASM_CALL(shadow_addr, callee_addr);\n                z_binary_insert_shadow_code(r->binary, ks_encode, ks_size);\n            } else {\n                KS_ASM_PRESERVER_RETADDR(is_pie, shadow_addr, ori_next_addr);\n                z_binary_insert_shadow_code(r->binary, ks_encode, ks_size);\n                shadow_addr += ks_size;\n\n                KS_ASM_JMP(shadow_addr, callee_addr);\n                z_binary_insert_shadow_code(r->binary, ks_encode, ks_size);\n\n                // update retaddr information\n                if (lf_info->cfg_info != LCFG_TERM &&\n                    !g_hash_table_lookup(r->potential_retaddrs,\n                                         GSIZE_TO_POINTER(ori_next_addr))) {\n                    // we do not known whether this callee will return. Hence,\n                    // it is a potential CP_RETADDR. Additionaly, it is the\n                    // first time that we find this retaddr.\n                    g_hash_table_insert(r->potential_retaddrs,\n                                        GSIZE_TO_POINTER(ori_next_addr),\n                                        GSIZE_TO_POINTER(callee_addr));\n                    Buffer *buf = (Buffer *)g_hash_table_lookup(\n                        r->unpatched_retaddrs, GSIZE_TO_POINTER(callee_addr));\n                    if (!buf) {\n                        buf = z_buffer_create(NULL, 0);\n                        g_hash_table_insert(r->unpatched_retaddrs,\n                                            GSIZE_TO_POINTER(callee_addr),\n                                            (gpointer)buf);\n                    }\n                    z_buffer_append_raw(buf, (uint8_t *)&ori_next_addr,\n                                        sizeof(ori_next_addr));\n                }\n            }\n\n            return;\n        }\n\n        if (op->type == X86_OP_MEM) {\n            // call to GOT\n            z_trace(\"find got call %s @ %#lx\", lf_info->name, ori_addr);\n\n            if (r->opts->safe_ret) {\n                // direct write down the instruction\n                z_binary_insert_shadow_code(r->binary, inst->bytes, inst->size);\n                z_binary_new_retaddr_entity(r->binary, shadow_addr + inst->size,\n                                            ori_next_addr);\n            } else if (lf_info->ra_info == LRA_UNUSED) {\n                // direct write down the instruction\n                z_binary_insert_shadow_code(r->binary, inst->bytes, inst->size);\n            } else {\n                // we first push the retaddr\n                KS_ASM_PRESERVER_RETADDR(is_pie, shadow_addr, ori_next_addr);\n                z_binary_insert_shadow_code(r->binary, ks_encode, ks_size);\n                shadow_addr += ks_size;\n\n                addr_t got_addr = INVALID_ADDR;\n                if (z_capstone_is_pc_related_ucall(inst, &got_addr)) {\n                    // jmp qword ptr [rip+xxx]\n                    if (inst->size != 6) {\n                        EXITME(\"invalid pc-related ucall \" CS_SHOW_INST(inst));\n                    }\n\n                    int32_t off = got_addr - (shadow_addr + inst->size);\n                    KS_ASM(shadow_addr, \"jmp qword ptr [rip + %+d]\", off);\n                    if (ks_size != 6) {\n                        EXITME(\"invalid pc-related ucall\");\n                    }\n\n                    z_binary_insert_shadow_code(r->binary, ks_encode, ks_size);\n                } else {\n                    // jmp qword ptr [xxx]\n                    KS_ASM(shadow_addr, \"jmp %s\", inst->op_str);\n                    z_binary_insert_shadow_code(r->binary, ks_encode, ks_size);\n                }\n\n                // XXX: note that we do not update retaddr information here to\n                // avoid some case where the GOT gets changed during execution\n            }\n\n            return;\n        }\n\n        EXITME(\"unreachable code\");\n    }\n\n    if (detail->x86.op_count == 1 && op->type == X86_OP_IMM) {\n        addr_t callee_addr = op->imm;\n        // direct call\n\n        /*\n         * step [1]. first check callee_addr is inside .text\n         */\n        if (!z_disassembler_get_superset_disasm(r->disassembler, callee_addr)) {\n            if (r->opts->safe_ret) {\n                // directly write\n                KS_ASM_CALL(shadow_addr, callee_addr);\n                z_binary_insert_shadow_code(r->binary, ks_encode, ks_size);\n                z_binary_new_retaddr_entity(r->binary, shadow_addr + ks_size,\n                                            ori_next_addr);\n            } else {\n                KS_ASM_PRESERVER_RETADDR(is_pie, shadow_addr, ori_next_addr);\n                z_binary_insert_shadow_code(r->binary, ks_encode, ks_size);\n                shadow_addr += ks_size;\n\n                KS_ASM_JMP(shadow_addr, callee_addr);\n                z_binary_insert_shadow_code(r->binary, ks_encode, ks_size);\n            }\n            return;\n        }\n\n        /*\n         * step [2]. get shadow callee and prepare hole_buf\n         */\n        uint64_t hole_buf = 0;\n#ifndef NSINGLE_SUCC_OPT\n        addr_t shadow_callee_addr;\n        if (r->opts->disable_opt) {\n            shadow_callee_addr = (addr_t)g_hash_table_lookup(\n                r->rewritten_bbs, GSIZE_TO_POINTER(callee_addr));\n            hole_buf = (uint64_t)X86_INS_JMP;\n        } else {\n            shadow_callee_addr = (addr_t)g_hash_table_lookup(\n                r->shadow_code, GSIZE_TO_POINTER(callee_addr));\n            hole_buf = (uint64_t)(-(int64_t)X86_INS_JMP);\n            assert((int64_t)hole_buf < 0);\n\n            r->optimized_single_succ += 1;\n        }\n#else\n        addr_t shadow_callee_addr = (addr_t)g_hash_table_lookup(\n            r->rewritten_bbs, GSIZE_TO_POINTER(callee_addr));\n#endif\n\n        /*\n         * step [3]. rewrite and insrumentation\n         */\n        if (shadow_callee_addr) {\n            if (r->opts->safe_ret) {\n                KS_ASM_CALL(shadow_addr, shadow_callee_addr);\n                z_binary_insert_shadow_code(r->binary, ks_encode, ks_size);\n                z_binary_new_retaddr_entity(r->binary, shadow_addr + ks_size,\n                                            ori_next_addr);\n            } else {\n                KS_ASM_PRESERVER_RETADDR(is_pie, shadow_addr, ori_next_addr);\n                z_binary_insert_shadow_code(r->binary, ks_encode, ks_size);\n                shadow_addr += ks_size;\n\n                KS_ASM_JMP(shadow_addr, shadow_callee_addr);\n                z_binary_insert_shadow_code(r->binary, ks_encode, ks_size);\n            }\n        } else {\n            // rewrite return address\n            if (r->opts->safe_ret) {\n                // insert hole\n                hole_buf = X86_INS_CALL;\n                z_binary_insert_shadow_code(r->binary, (uint8_t *)(&hole_buf),\n                                            __rewriter_get_hole_len(hole_buf));\n\n                z_binary_new_retaddr_entity(\n                    r->binary, shadow_addr + __rewriter_get_hole_len(hole_buf),\n                    ori_next_addr);\n            } else {\n                KS_ASM_PRESERVER_RETADDR(is_pie, shadow_addr, ori_next_addr);\n                z_binary_insert_shadow_code(r->binary, ks_encode, ks_size);\n                shadow_addr += ks_size;\n\n                // insert hole\n                hole_buf = X86_INS_JMP;\n                z_binary_insert_shadow_code(r->binary, (uint8_t *)(&hole_buf),\n                                            __rewriter_get_hole_len(hole_buf));\n            }\n\n            g_hash_table_insert(holes, GSIZE_TO_POINTER(shadow_addr),\n                                GSIZE_TO_POINTER(callee_addr));\n        }\n    } else {\n        // indirect call\n        addr_t text_addr = z_elf_get_shdr_text(e)->sh_addr;\n        size_t text_size = z_elf_get_shdr_text(e)->sh_size;\n\n        /*\n         * step [1]. store target value\n         */\n        if (strstr(inst->op_str, \"rip\")) {\n            // Handle PC-relative jmp: a good observation is that any\n            // rip-relative jmp/call are equal to or longer than\n            //          `push ??? PTR [rip + ???]`\n            // Note that we need to keep `next instruction` at the same address\n\n            // step [1]. get ks_size\n            KS_ASM(INVALID_ADDR, \"push %s\", inst->op_str);\n            assert(inst->size >= ks_size);\n\n            // step [2]. padding\n            if (inst->size > ks_size) {\n                size_t padding_size = inst->size - ks_size;\n                z_binary_insert_shadow_code(\n                    r->binary, z_x64_gen_nop(padding_size), padding_size);\n            }\n\n            // step [3]. rewriting\n            KS_ASM(shadow_addr + inst->size - ks_size, \"push %s\", inst->op_str);\n            assert(z_binary_get_shadow_code_addr(r->binary) + ks_size ==\n                   shadow_addr + inst->size);\n            z_binary_insert_shadow_code(r->binary, ks_encode, ks_size);\n\n            shadow_addr += inst->size;\n        } else {\n            KS_ASM(shadow_addr, \"push %s\", inst->op_str);\n            z_binary_insert_shadow_code(r->binary, ks_encode, ks_size);\n            shadow_addr += ks_size;\n        }\n\n        /*\n         * step [2]. rewrite ucall using hand-written assembly code\n         */\n        z_debug(\"rewrite ucall \" CS_SHOW_INST(inst));\n        // XXX: it is ok to directly use LOOKUP_TABLE_ADDR since the underlying\n        // binary is not compiled with PIE.\n        // XXX: call may not care about eflags\n        if (is_pie) {\n            KS_ASM(shadow_addr,\n               \"  mov [rsp - 168], rsi;\\n\"\n               \"  mov [rsp - 128], rcx;\\n\"\n               // \"  mov [rsp - 120], rax;\\n\"\n               // \"  lahf;\\n\"\n               // \"  seto al;\\n\"\n               \"  pop rcx;\\n\"\n               \"  mov [rsp - 144], rcx;\\n\"\n               /*\n                * get program base and update rcx\n                */\n               \"  mov rsi, %#lx;\\n\"\n               \"  mov rsi, [rsi];\\n\"\n               \"  sub rcx, rsi;\\n\"\n               /*\n                * for addresses outside .text, directly go through\n                */\n               \"  cmp rcx, %#lx;\\n\" // compare upper bound of .text\n               \"  jae hug;\\n\"\n               \"  sub rcx, %#lx;\\n\" // sub .text base and compare\n               \"  jb hug;\\n\"\n               /*\n                * update bitmap and prev_id\n                */\n               \"  mov [rsp - 152], rdx;\\n\"\n               \"  mov [rsp - 160], rdi;\\n\"\n               \"  xor rdx, rdx;\\n\" // hug keystone (issue #295)\n               \"  mov rdi, qword ptr [\" STRING(AFL_PREV_ID_PTR) \" + rdx];\\n\"\n               \"  mov rdx, rcx;\\n\"\n               \"  shr rdx, \" STRING(AFL_MAP_SIZE_POW2) \";\\n\"\n               \"  xor rdx, rcx;\\n\"\n               \"  and rdx, \" STRING(AFL_MAP_SIZE_MASK) \";\\n\"\n               \"  xor rdi, rdx;\\n\"\n               \"  inc BYTE PTR [\" STRING(AFL_MAP_ADDR) \" + rdi];\\n\"\n               \"  xor rdi, rdi;\\n\" // hug keystone (issue #295)\n               \"  shr rdx, 1;\\n\"\n               \"  mov qword ptr [\" STRING(AFL_PREV_ID_PTR) \" + rdi], rdx;\\n\"\n               \"  mov rdi, [rsp - 160];\\n\"\n               \"  mov rdx, [rsp - 152];\\n\"\n               /*\n                * lookup target shadow address\n                */\n               \"  shl rcx, \" STRING(LOOKUP_TABLE_CELL_SIZE_POW2) \";\\n\"\n               \"  add rcx, rsi;\\n\"\n               \"  movsxd rcx, dword ptr [\" STRING(LOOKUP_TABLE_ADDR) \" + rcx];\\n\"\n               \"  add rcx, rsi;\\n\"\n               \"  mov [rsp - 144], rcx;\\n\"\n               /*\n                * go to target\n                */\n               \"hug:\\n\"\n               // \"  add al, 127;\\n\"\n               // \"  sahf;\\n\"\n               // \"  mov rax, [rsp - 120 - 8];\\n\"\n               \"  mov rcx, [rsp - 128 - 8];\\n\"\n               \"  mov rsi, [rsp - 168 - 8];\\n\",\n               RW_PAGE_INFO_ADDR(program_base), text_addr + text_size, text_addr);\n\n            z_binary_insert_shadow_code(r->binary, ks_encode, ks_size);\n            shadow_addr += ks_size;\n\n        } else {\n            KS_ASM(shadow_addr,\n               \"  mov [rsp - 128], rcx;\\n\"\n               // \"  mov [rsp - 120], rax;\\n\"\n               // \"  lahf;\\n\"\n               // \"  seto al;\\n\"\n               \"  pop rcx;\\n\"\n               \"  mov [rsp - 144], rcx;\\n\"\n               /*\n                * for addresses outside .text, directly go through\n                */\n               \"  cmp rcx, %#lx;\\n\" // compare upper bound of .text\n               \"  jae hug;\\n\"\n               \"  sub rcx, %#lx;\\n\" // sub .text base and compare\n               \"  jb hug;\\n\"\n               /*\n                * update bitmap and prev_id\n                */\n               \"  mov [rsp - 152], rdx;\\n\"\n               \"  mov [rsp - 160], rdi;\\n\"\n               \"  xor rdx, rdx;\\n\" // hug keystone (issue #295)\n               \"  mov rdi, qword ptr [\" STRING(AFL_PREV_ID_PTR) \" + rdx];\\n\"\n               \"  mov rdx, rcx;\\n\"\n               \"  shr rdx, \" STRING(AFL_MAP_SIZE_POW2) \";\\n\"\n               \"  xor rdx, rcx;\\n\"\n               \"  and rdx, \" STRING(AFL_MAP_SIZE_MASK) \";\\n\"\n               \"  xor rdi, rdx;\\n\"\n               \"  inc BYTE PTR [\" STRING(AFL_MAP_ADDR) \" + rdi];\\n\"\n               \"  xor rdi, rdi;\\n\" // hug keystone (issue #295)\n               \"  shr rdx, 1;\\n\"\n               \"  mov qword ptr [\" STRING(AFL_PREV_ID_PTR) \" + rdi], rdx;\\n\"\n               \"  mov rdi, [rsp - 160];\\n\"\n               \"  mov rdx, [rsp - 152];\\n\"\n               /*\n                * lookup target shadow address\n                */\n               \"  shl rcx, \" STRING(LOOKUP_TABLE_CELL_SIZE_POW2) \";\\n\"\n               \"  movsxd rcx, dword ptr [\" STRING(LOOKUP_TABLE_ADDR) \" + rcx];\\n\"\n               \"  mov [rsp - 144], rcx;\\n\"\n               /*\n                * go to target\n                */\n               \"hug:\\n\"\n               // \"  add al, 127;\\n\"\n               // \"  sahf;\\n\"\n               // \"  mov rax, [rsp - 120 - 8];\\n\"\n               \"  mov rcx, [rsp - 128 - 8];\\n\",\n               text_addr + text_size, text_addr);\n\n            z_binary_insert_shadow_code(r->binary, ks_encode, ks_size);\n            shadow_addr += ks_size;\n        }\n\n        // XXX: the below assembly is following the previous one\n        if (r->opts->safe_ret) {\n            KS_ASM(shadow_addr, \"call qword ptr [rsp - 144]\");\n            z_binary_insert_shadow_code(r->binary, ks_encode, ks_size);\n            z_binary_new_retaddr_entity(r->binary, shadow_addr + ks_size,\n                                        ori_next_addr);\n        } else {\n            KS_ASM_PRESERVER_RETADDR(is_pie, shadow_addr, ori_next_addr);\n            z_binary_insert_shadow_code(r->binary, ks_encode, ks_size);\n            shadow_addr += ks_size;\n\n            KS_ASM(shadow_addr, \"jmp qword ptr [rsp - 144 + 8];\\n\");\n            z_binary_insert_shadow_code(r->binary, ks_encode, ks_size);\n        }\n    }\n}\n"
  },
  {
    "path": "src/rewriter_handlers/handler_cjmp.c",
    "content": "/*\n * handler_cjmp.c\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n#define REVENT z_capstone_is_cjmp\n#define RHANDLER __rewriter_cjmp_handler\n\n/*\n * Rewriter handler for cjmp instruction.\n */\nZ_PRIVATE void __rewriter_cjmp_handler(Rewriter *r, GHashTable *holes,\n                                       cs_insn *inst, addr_t ori_addr,\n                                       addr_t ori_next_addr);\n\n/*\n * Rewriter handler for JRCXZ, JECXZ and JCXZ instruction.\n */\nZ_PRIVATE void __rewriter_cjmp_handler_for_rcx(Rewriter *r, GHashTable *holes,\n                                               cs_insn *inst, addr_t ori_addr,\n                                               addr_t ori_next_addr);\n\nZ_PRIVATE void __rewriter_cjmp_handler_for_rcx(Rewriter *r, GHashTable *holes,\n                                               cs_insn *inst, addr_t ori_addr,\n                                               addr_t ori_next_addr) {\n    if (inst->id == X86_INS_JCXZ) {\n        EXITME(\"`jcxz' is not supported in 64-bit mode\");\n    }\n\n    cs_detail *detail = inst->detail;\n    cs_x86_op *op = &(detail->x86.operands[0]);\n    assert(detail->x86.op_count == 1 && op->type == X86_OP_IMM);\n\n    // get origianl branch address\n    addr_t true_branch_addr = op->imm;\n    addr_t false_branch_addr = ori_next_addr;\n\n    if (!z_disassembler_get_superset_disasm(r->disassembler,\n                                            true_branch_addr) ||\n        !z_disassembler_get_superset_disasm(r->disassembler,\n                                            false_branch_addr)) {\n        // j*cxz can only do short jump, if this happend, it means we are\n        // writing a false instruction\n        z_warn(\"false instruction detected \" CS_SHOW_INST(inst));\n        return;\n    }\n\n    /*\n     * We will rewrite the instruction in following format:\n     *\n     *      j*cxz hug:\n     *      jmp shadow_false_branch;\n     *  hug:\n     *      jmp shadow_true_brach;\n     *\n     */\n\n    switch (inst->id) {\n        case X86_INS_JECXZ:\n            // jecxz $+5\n            z_binary_insert_shadow_code(r->binary,\n                                        (const uint8_t *)\"\\x67\\xe3\\x05\", 3);\n            break;\n        case X86_INS_JRCXZ:\n            // jrcxz $+5\n            z_binary_insert_shadow_code(r->binary, (const uint8_t *)\"\\xe3\\x05\",\n                                        2);\n            break;\n        default:\n            EXITME(\"invalid opcode \" CS_SHOW_INST(inst));\n    }\n\n#define __GENERATE_SHADOW_JMP(tar_addr)                                  \\\n    do {                                                                 \\\n        addr_t shadow_addr = z_binary_get_shadow_code_addr(r->binary);   \\\n        addr_t shadow_tar_addr = (addr_t)g_hash_table_lookup(            \\\n            r->rewritten_bbs, GSIZE_TO_POINTER(tar_addr));               \\\n        if (shadow_tar_addr) {                                           \\\n            KS_ASM(shadow_addr, \"jmp %#lx\", shadow_tar_addr);            \\\n            z_binary_insert_shadow_code(r->binary, ks_encode, ks_size);  \\\n            if (ks_size < 5) {                                           \\\n                z_binary_insert_shadow_code(                             \\\n                    r->binary, z_x64_gen_nop(5 - ks_size), 5 - ks_size); \\\n            }                                                            \\\n        } else {                                                         \\\n            uint64_t hole_buf = X86_INS_JMP;                             \\\n            shadow_addr = z_binary_insert_shadow_code(                   \\\n                r->binary, (uint8_t *)(&hole_buf),                       \\\n                __rewriter_get_hole_len(hole_buf));                      \\\n            g_hash_table_insert(holes, GSIZE_TO_POINTER(shadow_addr),    \\\n                                GSIZE_TO_POINTER(tar_addr));             \\\n        }                                                                \\\n    } while (0)\n\n    __GENERATE_SHADOW_JMP(false_branch_addr);\n    __GENERATE_SHADOW_JMP(true_branch_addr);\n#undef __GENERATE_SHADOW_JMP\n}\n\nZ_PRIVATE bool __rewriter_cjmp_is_security_check(Rewriter *r, addr_t addr);\n\n// check whether this cjmp is directly related to security check\nZ_PRIVATE bool __rewriter_cjmp_is_security_check(Rewriter *r, addr_t addr) {\n    // XXX: this function must be sound but does not need to be complete, since\n    // we cannot skip any non-security-check cjmp but can afford the additional\n    // efforts of flipping security check cjmp.\n\n    Disassembler *d = r->disassembler;\n    UCFG_Analyzer *a = z_disassembler_get_ucfg_analyzer(d);\n\n    Buffer *succ_addrs_buf = z_disassembler_get_intra_successors(d, addr);\n    size_t succ_n = z_buffer_get_size(succ_addrs_buf) / sizeof(addr_t);\n    addr_t *succ_addrs = (addr_t *)z_buffer_get_raw_buf(succ_addrs_buf);\n\n    bool is_security_check = false;\n    for (int i = 0; i < succ_n; i++) {\n        if (z_ucfg_analyzer_is_security_chk_failed(a, succ_addrs[i])) {\n            is_security_check = true;\n            break;\n        }\n    }\n\n    if (is_security_check) {\n        z_trace(\"find a security check: %#lx\", addr);\n        // update instrumentation_free_bbs\n        for (int i = 0; i < succ_n; i++) {\n            g_hash_table_add(r->instrumentation_free_bbs,\n                             GSIZE_TO_POINTER(succ_addrs[i]));\n        }\n    }\n\n    return is_security_check;\n}\n\nZ_PRIVATE void __rewriter_cjmp_handler(Rewriter *r, GHashTable *holes,\n                                       cs_insn *inst, addr_t ori_addr,\n                                       addr_t ori_next_addr) {\n    __rewriter_cjmp_is_security_check(r, ori_addr);\n\n    if (inst->id == X86_INS_JCXZ || inst->id == X86_INS_JECXZ ||\n        inst->id == X86_INS_JRCXZ) {\n        __rewriter_cjmp_handler_for_rcx(r, holes, inst, ori_addr,\n                                        ori_next_addr);\n        return;\n    }\n\n    cs_detail *detail = inst->detail;\n    cs_x86_op *op = &(detail->x86.operands[0]);\n\n    uint64_t hole_buf = 0;\n    addr_t shadow_addr = z_binary_get_shadow_code_addr(r->binary);\n\n    // no conditional indirect jump\n    assert(detail->x86.op_count == 1 && op->type == X86_OP_IMM);\n    addr_t cjmp_addr = op->imm;\n\n    // first check cjmp_addr is inside .text\n    if (!z_disassembler_get_superset_disasm(r->disassembler, cjmp_addr)) {\n        // directly write\n        KS_ASM(shadow_addr, \"%s %#lx\", cs_insn_name(cs, inst->id), cjmp_addr);\n        z_binary_insert_shadow_code(r->binary, ks_encode, ks_size);\n        return;\n    }\n\n    addr_t shadow_cjmp_addr = (addr_t)g_hash_table_lookup(\n        r->rewritten_bbs, GSIZE_TO_POINTER(cjmp_addr));\n\n    if (shadow_cjmp_addr) {\n        KS_ASM(shadow_addr, \"%s %#lx\", cs_insn_name(cs, inst->id),\n               shadow_cjmp_addr);\n        z_binary_insert_shadow_code(r->binary, ks_encode, ks_size);\n    } else {\n        // cjmp ??? (HOLE)\n        hole_buf = (uint64_t)inst->id;\n        shadow_addr =\n            z_binary_insert_shadow_code(r->binary, (uint8_t *)(&hole_buf),\n                                        __rewriter_get_hole_len(hole_buf));\n        g_hash_table_insert(holes, GSIZE_TO_POINTER(shadow_addr),\n                            GSIZE_TO_POINTER(cjmp_addr));\n    }\n}\n"
  },
  {
    "path": "src/rewriter_handlers/handler_jmp.c",
    "content": "/*\n * handler_jmp.c\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n#define REVENT z_capstone_is_jmp\n#define RHANDLER __rewriter_jmp_handler\n\n/*\n * Rewriter handler for jmp instruction.\n */\nZ_PRIVATE void __rewriter_jmp_handler(Rewriter *r, GHashTable *holes,\n                                      cs_insn *inst, addr_t ori_addr,\n                                      addr_t ori_next_addr);\n\nZ_PRIVATE void __rewriter_jmp_handler(Rewriter *r, GHashTable *holes,\n                                      cs_insn *inst, addr_t ori_addr,\n                                      addr_t ori_next_addr) {\n    cs_detail *detail = inst->detail;\n    cs_x86_op *op = &(detail->x86.operands[0]);\n\n    ELF *e = z_binary_get_elf(r->binary);\n    addr_t text_addr = z_elf_get_shdr_text(e)->sh_addr;\n    size_t text_size = z_elf_get_shdr_text(e)->sh_size;\n\n    if (detail->x86.op_count == 1 && op->type == X86_OP_IMM) {\n        // direct jump here\n        addr_t shadow_addr = z_binary_get_shadow_code_addr(r->binary);\n        addr_t jmp_addr = op->imm;\n\n        // first check jmp_addr is inside .text\n        if (!z_disassembler_get_superset_disasm(r->disassembler, jmp_addr)) {\n            // directly write\n            KS_ASM_JMP(shadow_addr, jmp_addr);\n            z_binary_insert_shadow_code(r->binary, ks_encode, ks_size);\n            return;\n        }\n\n#ifndef NSINGLE_SUCC_OPT\n        uint64_t hole_buf = 0;\n        addr_t shadow_jmp_addr;\n        if (r->opts->disable_opt) {\n            shadow_jmp_addr = (addr_t)g_hash_table_lookup(\n                r->rewritten_bbs, GSIZE_TO_POINTER(jmp_addr));\n            hole_buf = (uint64_t)X86_INS_JMP;\n        } else {\n            shadow_jmp_addr = (addr_t)g_hash_table_lookup(\n                r->shadow_code, GSIZE_TO_POINTER(jmp_addr));\n            hole_buf = (uint64_t)(-(int64_t)X86_INS_JMP);\n            assert((int64_t)hole_buf < 0);\n\n            r->optimized_single_succ += 1;\n        }\n#else\n        uint64_t hole_buf = X86_INS_JMP;\n        addr_t shadow_jmp_addr = (addr_t)g_hash_table_lookup(\n            r->rewritten_bbs, GSIZE_TO_POINTER(jmp_addr));\n#endif\n\n        if (shadow_jmp_addr) {\n            KS_ASM_JMP(shadow_addr, shadow_jmp_addr);\n            z_binary_insert_shadow_code(r->binary, ks_encode, ks_size);\n        } else {\n            shadow_addr =\n                z_binary_insert_shadow_code(r->binary, (uint8_t *)(&hole_buf),\n                                            __rewriter_get_hole_len(hole_buf));\n            g_hash_table_insert(holes, GSIZE_TO_POINTER(shadow_addr),\n                                GSIZE_TO_POINTER(jmp_addr));\n        }\n    } else {\n        // jmp may not jump out of .text (NO! z3 binary has such behaviour)\n        z_debug(\"rewrite ujmp \" CS_SHOW_INST(inst));\n\n        // record the original shadow_addr for inst\n        addr_t ori_shadow_addr = INVALID_ADDR;\n\n        // store rcx value\n        {\n            addr_t shadow_addr = ori_shadow_addr =\n                z_binary_get_shadow_code_addr(r->binary);\n            KS_ASM(shadow_addr, \"mov [rsp - 128], rcx;\\n\");\n            z_binary_insert_shadow_code(r->binary, ks_encode, ks_size);\n            shadow_addr += ks_size;\n        }\n\n        // translate jump instruction into mov instruction\n        // XXX: note that if we simply push the target value on the stack,\n        // the program may crash when it uses the value near the bootom of\n        // the stack (e.g., mov rbx, [rsp - 8]). Hence, we use 'mov' instead\n        // of 'push';\n        if (strstr(inst->op_str, \"rip\")) {\n            assert(ori_shadow_addr != INVALID_ADDR);\n\n            // step [1]. get new instruction\n            KS_ASM(INVALID_ADDR, \"mov rcx, %s\", inst->op_str);\n            cs_inst = NULL;  // avoid double free inst\n            CS_DISASM_RAW(ks_encode, ks_size, INVALID_ADDR, 1);\n            cs_insn *new_inst = (cs_insn *)cs_inst;\n            cs_inst = NULL;  // avoid double free new_inst\n\n            // step [2]. calculate a possible starting address for the new mov\n            // instruction, so that we can guarantee correctness:\n            // new_shadow_addr + new_inst->size == ori_shadow_addr + inst->size\n            addr_t new_shadow_addr =\n                ori_shadow_addr + inst->size - new_inst->size;\n\n            // step [3]. translate the instruction, so that:\n            cs_insn *translated_inst =\n                __rewriter_translate_shadow_inst(r, new_inst, new_shadow_addr);\n\n            // step [4]. rewrite\n            z_binary_insert_shadow_code(r->binary, translated_inst->bytes,\n                                        translated_inst->size);\n\n            // step [5]. free inst and new_inst\n            cs_free(inst, 1);\n            cs_free(new_inst, 1);\n        } else {\n            addr_t shadow_addr = z_binary_get_shadow_code_addr(r->binary);\n            KS_ASM(shadow_addr, \"mov rcx, %s\", inst->op_str);\n            z_binary_insert_shadow_code(r->binary, ks_encode, ks_size);\n        }\n\n        // do the addrss translation\n        if (z_elf_get_is_pie(e)) {\n            addr_t shadow_addr = z_binary_get_shadow_code_addr(r->binary);\n\n            KS_ASM(shadow_addr,\n                   /*\n                    * backup rsi for program base\n                    */\n                   \"  mov [rsp - 152], rsi;\\n\"\n                   /*\n                    * store rcx\n                    */\n                   \"  mov [rsp - 112], rcx;\\n\"\n                   /*\n                    * store EFLAGS\n                    */\n                   \"  mov [rsp - 120], rax;\\n\"\n                   \"  lahf;\\n\"\n                   \"  seto al;\\n\"\n                   /*\n                    * get program base into rsi\n                    */\n                   \"  mov rsi, %#lx;\\n\"\n                   \"  mov rsi, [rsi];\\n\"\n                   /*\n                    * get *static* address in rcx\n                    */\n                   \"  sub rcx, rsi;\\n\"\n                   /*\n                    * for addresses outside .text, directly go through\n                    */\n                   \"  cmp rcx, %#lx;\\n\" // compare upper bound of .text\n                   \"  jae hug;\\n\"\n                   \"  sub rcx, %#lx;\\n\" // sub .text base\n                   \"  jb hug;\\n\"\n                   /*\n                    * update bitmap and prev_id\n                    */\n                   \"  mov [rsp - 136], rdx;\\n\"\n                   \"  mov [rsp - 144], rdi;\\n\"\n                   \"  xor rdx, rdx;\\n\" // hug keystone (issue #295)\n                   \"  mov rdi, qword ptr [\" STRING(AFL_PREV_ID_PTR) \" + rdx];\\n\"\n                   \"  mov rdx, rcx;\\n\"\n                   \"  shr rdx, \" STRING(AFL_MAP_SIZE_POW2) \";\\n\"\n                   \"  xor rdx, rcx;\\n\"\n                   \"  and rdx, \" STRING(AFL_MAP_SIZE_MASK) \";\\n\"\n                   \"  xor rdi, rdx;\\n\"\n                   \"  inc BYTE PTR [\" STRING(AFL_MAP_ADDR) \" + rdi];\\n\"\n                   \"  xor rdi, rdi;\\n\" // hug keystone (issue #295)\n                   \"  shr rdx, 1;\\n\"\n                   \"  mov qword ptr [\" STRING(AFL_PREV_ID_PTR) \" + rdi], rdx;\\n\"\n                   \"  mov rdi, [rsp - 144];\\n\"\n                   \"  mov rdx, [rsp - 136];\\n\"\n                   /*\n                    * lookup target shadow address\n                    */\n                   \"  shl rcx, \" STRING(LOOKUP_TABLE_CELL_SIZE_POW2)  \" ;\\n\"\n                   \"  add rcx, rsi;\\n\"\n                   \"  movsxd rcx, dword ptr [\" STRING(LOOKUP_TABLE_ADDR) \" + rcx];\\n\"\n                   \"  add rcx, rsi;\\n\"\n                   \"  mov [rsp - 112], rcx;\\n\"\n                   /*\n                    * go to target\n                    */\n                   \"hug:\\n\"\n                   \"  add al, 127;\\n\"\n                   \"  sahf;\\n\"\n                   \"  mov rax, [rsp - 120];\\n\"\n                   \"  mov rcx, [rsp - 128];\\n\"\n                   \"  mov rsi, [rsp - 142];\\n\"\n                   \"  jmp qword ptr [rsp - 112];\\n\",\n                   RW_PAGE_INFO_ADDR(program_base), text_addr + text_size, text_addr);\n\n            z_binary_insert_shadow_code(r->binary, ks_encode, ks_size);\n\n        } else {\n            // XXX: it is ok to directly use LOOKUP_TABLE_ADDR since the\n            // underlying binary is not compiled with PIE.\n            addr_t shadow_addr = z_binary_get_shadow_code_addr(r->binary);\n            KS_ASM(shadow_addr,\n                   /*\n                    * store rcx\n                    */\n                   \"  mov [rsp - 112], rcx;\\n\"\n                   /*\n                    * store EFLAGS\n                    */\n                   \"  mov [rsp - 120], rax;\\n\"\n                   \"  lahf;\\n\"\n                   \"  seto al;\\n\"\n                   /*\n                    * for addresses outside .text, directly go through\n                    */\n                   \"  cmp rcx, %#lx;\\n\" // compare upper bound of .text\n                   \"  jae hug;\\n\"\n                   \"  sub rcx, %#lx;\\n\" // sub .text base\n                   \"  jb hug;\\n\"\n                   /*\n                    * update bitmap and prev_id\n                    */\n                   \"  mov [rsp - 136], rdx;\\n\"\n                   \"  mov [rsp - 144], rdi;\\n\"\n                   \"  xor rdx, rdx;\\n\" // hug keystone (issue #295)\n                   \"  mov rdi, qword ptr [\" STRING(AFL_PREV_ID_PTR) \" + rdx];\\n\"\n                   \"  mov rdx, rcx;\\n\"\n                   \"  shr rdx, \" STRING(AFL_MAP_SIZE_POW2) \";\\n\"\n                   \"  xor rdx, rcx;\\n\"\n                   \"  and rdx, \" STRING(AFL_MAP_SIZE_MASK) \";\\n\"\n                   \"  xor rdi, rdx;\\n\"\n                   \"  inc BYTE PTR [\" STRING(AFL_MAP_ADDR) \" + rdi];\\n\"\n                   \"  xor rdi, rdi;\\n\" // hug keystone (issue #295)\n                   \"  shr rdx, 1;\\n\"\n                   \"  mov qword ptr [\" STRING(AFL_PREV_ID_PTR) \" + rdi], rdx;\\n\"\n                   \"  mov rdi, [rsp - 144];\\n\"\n                   \"  mov rdx, [rsp - 136];\\n\"\n                   /*\n                    * lookup target shadow address\n                    */\n                   \"  shl rcx, \" STRING(LOOKUP_TABLE_CELL_SIZE_POW2)  \" ;\\n\"\n                   \"  movsxd rcx, dword ptr [\" STRING(LOOKUP_TABLE_ADDR) \" + rcx];\\n\"\n                   \"  mov [rsp - 112], rcx;\\n\"\n                   /*\n                    * go to target\n                    */\n                   \"hug:\\n\"\n                   \"  add al, 127;\\n\"\n                   \"  sahf;\\n\"\n                   \"  mov rax, [rsp - 120];\\n\"\n                   \"  mov rcx, [rsp - 128];\\n\"\n                   \"  jmp qword ptr [rsp - 112];\\n\",\n                   text_addr + text_size, text_addr);\n\n            z_binary_insert_shadow_code(r->binary, ks_encode, ks_size);\n        }\n    }\n}\n"
  },
  {
    "path": "src/rewriter_handlers/handler_loop.c",
    "content": "/*\n * handler_loop.c\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n#define REVENT z_capstone_is_loop\n#define RHANDLER __rewriter_loop_handler\n\n/*\n * Rewriter handler for loop instruction.\n */\nZ_PRIVATE void __rewriter_loop_handler(Rewriter *r, GHashTable *holes,\n                                       cs_insn *inst, addr_t ori_addr,\n                                       addr_t ori_next_addr);\n\nZ_PRIVATE void __rewriter_loop_handler(Rewriter *r, GHashTable *holes,\n                                       cs_insn *inst, addr_t ori_addr,\n                                       addr_t ori_next_addr) {\n    cs_detail *detail = inst->detail;\n    cs_x86_op *op = &(detail->x86.operands[0]);\n\n    uint64_t hole_buf = 0;\n    addr_t loop_addr = op->imm;\n    addr_t shadow_addr = z_binary_get_shadow_code_addr(r->binary);\n\n    // no conditional indirect jump\n    assert(detail->x86.op_count == 1 && op->type == X86_OP_IMM);\n\n    // get hand-written asm\n    KS_ASM(shadow_addr,\n           \"    mov [rsp - 128], rax;\\n\"  // store context\n           \"    lahf;\\n\"\n           \"    seto al;\\n\"\n           \"    dec rcx;\\n\"\n           \"    jz out1;\\n\"\n           \"    add al, 127;\\n\"\n           \"    sahf;\\n\"\n           \"    mov rax, [rsp - 128];\\n\"\n           \"jmp_target:\\n\"\n           \"    jz 0x0;\\n\"\n           \"    jmp out2;\\n\"\n           \"out1:\\n\"\n           \"    add al, 127;\\n\"\n           \"    sahf;\\n\"\n           \"    mov rax, [rsp - 128];\\n\"\n           \"out2:\\n\");\n    z_binary_insert_shadow_code(r->binary, ks_encode, ks_size);\n\n    ELF *e = z_binary_get_elf(r->binary);\n    if (inst->id == X86_INS_LOOP) {\n        // jmp ???\n        hole_buf = (uint64_t)X86_INS_JMP;\n        z_elf_write(e, shadow_addr + 0x16, __rewriter_get_hole_len(hole_buf),\n                    (uint8_t *)(&hole_buf));\n        g_hash_table_insert(holes, GSIZE_TO_POINTER(shadow_addr + 0x16),\n                            GSIZE_TO_POINTER(loop_addr));\n    } else if (inst->id == X86_INS_LOOPE) {\n        // je ???\n        hole_buf = (uint64_t)X86_INS_JE;\n        z_elf_write(e, shadow_addr + 0x16, __rewriter_get_hole_len(hole_buf),\n                    (uint8_t *)(&hole_buf));\n        g_hash_table_insert(holes, GSIZE_TO_POINTER(shadow_addr + 0x16),\n                            GSIZE_TO_POINTER(loop_addr));\n    } else if (inst->id == X86_INS_LOOPNE) {\n        // jne ???\n        hole_buf = (uint64_t)X86_INS_JNE;\n        z_elf_write(e, shadow_addr + 0x16, __rewriter_get_hole_len(hole_buf),\n                    (uint8_t *)(&hole_buf));\n        g_hash_table_insert(holes, GSIZE_TO_POINTER(shadow_addr + 0x16),\n                            GSIZE_TO_POINTER(loop_addr));\n    }\n}\n"
  },
  {
    "path": "src/rewriter_handlers/handler_main.in",
    "content": "/*\n * handler_main.in\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n/*\n * rewriter_handler.c: pre-defined handler for rewriting different instruction.\n */\n\n#include \"../fork_server.h\"\n#include \"../utils.h\"\n\nDEFINE_GETTER(RHandler, rhandler, REvent, event);\nDEFINE_GETTER(RHandler, rhandler, RHandlerFcn, fcn);\n\nZ_API RHandler *z_rhandler_create(REvent event, RHandlerFcn fcn) {\n    RHandler *handler = STRUCT_ALLOC(RHandler);\n    handler->event = event;\n    handler->fcn = fcn;\n\n    return handler;\n}\n\nZ_API void z_rhandler_destroy(RHandler *handler) { z_free(handler); }\n\n/*\n * Get control-flow hole size for different instruction types.\n */\nZ_PRIVATE size_t __rewriter_get_hole_len(uint64_t id);\n\nZ_PRIVATE size_t __rewriter_get_hole_len(uint64_t id) {\n    if ((int64_t)id < 0) {\n        id = (~id) + 1;\n    }\n    size_t res = 0;\n    switch (id) {\n        case X86_INS_CALL:\n        case X86_INS_JMP:\n            res = 5;\n            break;\n        case X86_INS_JAE:\n        case X86_INS_JA:\n        case X86_INS_JBE:\n        case X86_INS_JB:\n        case X86_INS_JE:\n        case X86_INS_JGE:\n        case X86_INS_JG:\n        case X86_INS_JLE:\n        case X86_INS_JL:\n        case X86_INS_JNE:\n        case X86_INS_JNO:\n        case X86_INS_JNP:\n        case X86_INS_JNS:\n        case X86_INS_JO:\n        case X86_INS_JP:\n        case X86_INS_JS:\n            res = 6;\n            break;\n        case X86_INS_JECXZ:\n            EXITME(\"temporarily not support for jecxz\");\n        case X86_INS_JRCXZ:\n            EXITME(\"temporarily not support for jrcxz\");\n        case X86_INS_JCXZ:\n            EXITME(\"jcxz is not supported in 64-bit mode\");\n        default:\n            EXITME(\"invalid hole\");\n    }\n\n    assert(res >= 4);\n    return res;\n}\n"
  },
  {
    "path": "src/rewriter_handlers/handler_ret.c",
    "content": "/*\n * handler_ret.c\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n#define REVENT z_capstone_is_ret\n#define RHANDLER __rewriter_ret_handler\n\n/*\n * Rewriter handler for ret instruction.\n */\nZ_PRIVATE void __rewriter_ret_handler(Rewriter *r, GHashTable *holes,\n                                      cs_insn *inst, addr_t ori_addr,\n                                      addr_t ori_next_addr);\n\nZ_PRIVATE void __rewriter_ret_handler(Rewriter *r, GHashTable *holes,\n                                      cs_insn *inst, addr_t ori_addr,\n                                      addr_t ori_next_addr) {\n    if (r->opts->safe_ret) {\n        z_binary_insert_shadow_code(r->binary, inst->bytes, inst->size);\n        return;\n    }\n\n    // modern CPU will do nothing more except direct returning about `repz ret`\n    addr_t shadow_addr = z_binary_get_shadow_code_addr(r->binary);\n    ELF *e = z_binary_get_elf(r->binary);\n    addr_t text_addr = z_elf_get_shdr_text(e)->sh_addr;\n    size_t text_size = z_elf_get_shdr_text(e)->sh_size;\n\n    if (z_elf_get_is_pie(e)) {\n        KS_ASM(shadow_addr,\n               \"  mov [rsp - 128], rcx;\\n\"\n               \"  mov [rsp - 136], rdx;\\n\"\n               // \"  mov [rsp - 120], rax;\\n\"\n               // \"  lahf;\\n\"\n               // \"  seto al;\\n\"\n               /*\n                * get program base\n                */\n               \"  mov rdx, %#lx;\\n\"\n               \"  mov rdx, [rdx];\\n\"\n               /*\n                * calculate the *static* address of the retaddr (w/o PIE)\n                */\n               \"  mov rcx, [rsp];\\n\"\n               \"  sub rcx, rdx;\\n\"\n               /*\n                * check whether the retaddr is inside .text\n                */\n               \"  cmp rcx, %#lx;\\n\"\n               \"  jae hug;\\n\"\n               \"  sub rcx, %#lx;\\n\"  // sub .text base\n               \"  jb hug;\\n\"\n               /*\n                * translate the retaddr\n                */\n               \"  shl rcx, \" STRING(LOOKUP_TABLE_CELL_SIZE_POW2) \";\\n\"\n               \"  add rcx, rdx;\\n\"  // add the program base for lookup table, in advance\n               \"  movsxd rcx, dword ptr [\" STRING(LOOKUP_TABLE_ADDR) \" + rcx];\\n\"  // lookup table\n               \"  add rcx, rdx;\\n\"  // add the program base onto the translated address\n               \"  mov [rsp], rcx;\\n\"\n               \"hug:\\n\"\n               // \"  add al, 127;\\n\"\n               // \"  sahf;\\n\"\n               // \"  mov rax, [rsp - 120];\\n\"\n               \"  mov rdx, [rsp - 136];\\n\"\n               \"  mov rcx, [rsp - 128];\\n\",\n               // \"  ret;\\n\", // XXX: ret is replaced by the original inst, see below\n               RW_PAGE_INFO_ADDR(program_base), text_addr + text_size, text_addr);\n\n        z_binary_insert_shadow_code(r->binary, ks_encode, ks_size);\n        shadow_addr += ks_size;\n    } else {\n        // XXX: it is ok to directly use LOOKUP_TABLE_ADDR since the underlying\n        // binary is not compiled with PIE.\n        KS_ASM(shadow_addr,\n               \"  mov [rsp - 128], rcx;\\n\"\n               // \"  mov [rsp - 120], rax;\\n\"\n               // \"  lahf;\\n\"\n               // \"  seto al;\\n\"\n               \"  mov rcx, [rsp];\\n\"\n               \"  cmp rcx, %#lx;\\n\"\n               \"  jae hug;\\n\"\n               \"  sub rcx, %#lx;\\n\"  // sub .text base\n               \"  jb hug;\\n\"\n               \"  shl rcx, \" STRING(LOOKUP_TABLE_CELL_SIZE_POW2) \";\\n\"\n               \"  movsxd rcx, dword ptr [\" STRING(LOOKUP_TABLE_ADDR) \" + rcx];\\n\"  // lookup table\n               \"  mov [rsp], rcx;\\n\"\n               \"hug:\\n\"\n               // \"  add al, 127;\\n\"\n               // \"  sahf;\\n\"\n               // \"  mov rax, [rsp - 120];\\n\"\n               \"  mov rcx, [rsp - 128];\\n\",\n               // \"  ret;\\n\", // XXX: ret is replaced by the original inst, see below\n               text_addr + text_size, text_addr);\n\n        z_binary_insert_shadow_code(r->binary, ks_encode, ks_size);\n        shadow_addr += ks_size;\n    }\n\n    // XXX: we respect the original encoding of inst, to support `ret n`\n    // XXX: we keep the bnp prefix here if present. Note that we have to place\n    // endbr64 instruction at a suitable position since it is always possible\n    // for the control flow to jump from the original code (w/ an endbr64\n    // prefix)\n    z_binary_insert_shadow_code(r->binary, inst->bytes, inst->size);\n}\n"
  },
  {
    "path": "src/sys_optarg.c",
    "content": "/*\n * sys_optarg.c\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n#include \"sys_optarg.h\"\n#include \"utils.h\"\n\nSysOptArgs sys_optargs = {\n    .mode = SYSMODE_NONE,\n    .r =\n        {\n            .trace_pc = false,\n            .count_conflict = false,\n            .disable_opt = false,\n            .safe_ret = false,\n            .instrument_early = false,\n            .force_pdisasm = false,\n            .disable_callthrough = false,\n            .force_linear = false,\n        },\n    .log_level = LOG_INFO,\n    .timeout = SYS_TIMEOUT,\n    .check_execs = SYS_CHECK_EXECS,\n};\n"
  },
  {
    "path": "src/sys_optarg.h",
    "content": "/*\n * sys_optarg.h\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n#ifndef __SYS_OPTARGS_H\n#define __SYS_OPTARGS_H\n\n#include \"config.h\"\n\n/*\n * Default system options\n */\n#define SYS_TIMEOUT 2000UL\n#define SYS_CHECK_EXECS 200000\n\n/*\n * System mode\n */\ntypedef enum system_mode_t {\n    SYSMODE_NONE,\n    SYSMODE_DAEMON,\n    SYSMODE_RUN,\n    SYSMODE_PATCH,\n    SYSMODE_DISASM,\n    SYSMODE_VIEW,\n} SysMode;\n\n/*\n * Rewriting options\n */\ntypedef struct rewriting_optargs_t {\n    bool trace_pc;\n    bool count_conflict;\n    bool disable_opt;\n    bool safe_ret;\n    bool instrument_early;\n    bool force_pdisasm;\n    bool disable_callthrough;\n    bool force_linear;  // secret option\n} RewritingOptArgs;\n\ntypedef struct system_optargs_t {\n    SysMode mode;\n\n    RewritingOptArgs r;\n\n    int32_t log_level;\n\n    uint64_t timeout;\n\n    uint32_t check_execs;\n} SysOptArgs;\n\nextern SysOptArgs sys_optargs;\n\n#endif\n"
  },
  {
    "path": "src/tp_dispatcher.c",
    "content": "/*\n * tp_dispatcher.c\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n#include \"tp_dispatcher.h\"\n#include \"afl_config.h\"\n#include \"utils.h\"\n\n#include \"trampolines/trampolines.h\"\n\n#define TPD_LOCATE_HOLE(haystack, haystacklen, needle, needlelen, err)   \\\n    ({                                                                   \\\n        void *p = memmem((void *)(haystack), (size_t)(haystacklen),      \\\n                         (void *)(needle), (size_t)(needlelen));         \\\n        if (p == NULL) {                                                 \\\n            EXITME(err);                                                 \\\n        }                                                                \\\n        assert(memmem(p + (size_t)(needlelen),                           \\\n                      (size_t)(haystacklen) - (p - (void *)(haystack)) - \\\n                          (size_t)(needlelen),                           \\\n                      (void *)(needle), (size_t)(needlelen)) == NULL);   \\\n        p;                                                               \\\n    })\n\n/*\n * Create a TPCode\n */\nZ_PRIVATE TPCode *__tp_code_create(size_t size);\n\n/*\n * Destroy a TPcode\n */\nZ_PRIVATE void __tp_code_destroy(TPCode *tpc);\n\n/*\n * Emit TPCode\n */\nZ_PRIVATE const uint8_t *__tp_code_emit(TPCode *tpc, uint32_t id,\n                                        size_t *size_ptr);\n\n/*\n * Append raw code into TPCode\n */\nZ_PRIVATE void __tp_code_append_raw(TPCode *tpc, const uint8_t *buf,\n                                    size_t size);\n\n/*\n * Locate holes in TPCode\n */\nZ_PRIVATE void __tp_code_locate_holes(TPCode *tpc, uint32_t id_hole,\n                                      uint32_t shr_id_hole);\n\nZ_PRIVATE void __tp_code_destroy(TPCode *tpc) {\n    z_free(tpc->code);\n    z_free(tpc);\n}\n\nZ_PRIVATE TPCode *__tp_code_create(size_t size) {\n    TPCode *tpc = z_alloc(1, sizeof(TPCode));\n    tpc->code = z_alloc(size, sizeof(uint8_t));\n    tpc->len = 0;\n    tpc->capacity = size;\n    tpc->id_hole = tpc->shr_id_hole = NULL;\n    return tpc;\n}\n\nZ_PRIVATE void __tp_code_locate_holes(TPCode *tpc, uint32_t id_hole,\n                                      uint32_t shr_id_hole) {\n    tpc->id_hole = (uint32_t *)TPD_LOCATE_HOLE(\n        tpc->code, tpc->len, &id_hole, sizeof(id_hole), \"missing id hole\");\n    tpc->shr_id_hole =\n        (uint32_t *)TPD_LOCATE_HOLE(tpc->code, tpc->len, &shr_id_hole,\n                                    sizeof(shr_id_hole), \"missing shr id hole\");\n}\n\nZ_PRIVATE void __tp_code_append_raw(TPCode *tpc, const uint8_t *buf,\n                                    size_t size) {\n    if (tpc->len + size > tpc->capacity) {\n        EXITME(\"TPCode execceds its total capacity\");\n    }\n    memcpy(tpc->code + tpc->len, buf, size);\n    tpc->len += size;\n}\n\nZ_PRIVATE const uint8_t *__tp_code_emit(TPCode *tpc, uint32_t id,\n                                        size_t *size_ptr) {\n    assert(id < AFL_MAP_SIZE);\n\n    *(tpc->id_hole) = (id);\n    *(tpc->shr_id_hole) = ((id) >> 1);\n    *(size_ptr) = tpc->len;\n    return tpc->code;\n}\n\nZ_API void z_tp_dispatcher_destroy(TPDispatcher *tpd) {\n    __tp_code_destroy(tpd->bitmap);\n\n#define __DESTROY_TPCODE_FOR_REG(REG) __tp_code_destroy(tpd->bitmap_##REG)\n    CAPSTONE_FORALL_GPR(__DESTROY_TPCODE_FOR_REG);\n#undef __DESTROY_TPCODE_FOR_REG\n\n    z_free(tpd);\n}\n\nZ_API TPDispatcher *z_tp_dispatcher_create() {\n    TPDispatcher *tpd = STRUCT_ALLOC(TPDispatcher);\n\n    /*\n     * Context Save\n     */\n    tpd->context_save = context_save_bin;\n    tpd->context_save_len = context_save_bin_len;\n\n    /*\n     * Context Restore\n     */\n    tpd->context_restore = context_restore_bin;\n    tpd->context_restore_len = context_restore_bin_len;\n\n    /*\n     * Register bitmap\n     */\n#define __GENERATE_TPCODE_FOR_REG(REG)                                       \\\n    do {                                                                     \\\n        tpd->bitmap_##REG =                                                  \\\n            __tp_code_create(__BITMAP_##REG##_END - __BITMAP_##REG);         \\\n        __tp_code_append_raw(tpd->bitmap_##REG, bitmap_bin + __BITMAP_##REG, \\\n                             __BITMAP_##REG##_END - __BITMAP_##REG);         \\\n        __tp_code_locate_holes(tpd->bitmap_##REG, bitmap_id_hole,            \\\n                               bitmap_shr_id_hole);                          \\\n    } while (0)\n\n    CAPSTONE_FORALL_GPR(__GENERATE_TPCODE_FOR_REG);\n\n#undef __GENERATE_TPCODE_FOR_REG\n\n    /*\n     * Bitmap (w/ push and pop GPR): we choose RDI here\n     */\n    tpd->bitmap = __tp_code_create(tpd->bitmap_RDI->len + 0x10);\n    // 'push rdi'\n    KS_ASM(INVALID_ADDR, \"mov [rsp - 152], rdi\");\n    __tp_code_append_raw(tpd->bitmap, ks_encode, ks_size);\n    // rdi bitmap\n    __tp_code_append_raw(tpd->bitmap, tpd->bitmap_RDI->code,\n                         tpd->bitmap_RDI->len);\n    // 'pop rdi'\n    KS_ASM(INVALID_ADDR, \"mov rdi, [rsp - 152]\");\n    __tp_code_append_raw(tpd->bitmap, ks_encode, ks_size);\n    // find holes\n    __tp_code_locate_holes(tpd->bitmap, bitmap_id_hole, bitmap_shr_id_hole);\n\n    return tpd;\n}\n\nZ_API const uint8_t *z_tp_dispatcher_emit_context_save(TPDispatcher *tpd,\n                                                       size_t *size) {\n    *size = tpd->context_save_len;\n    return (const uint8_t *)tpd->context_save;\n}\n\nZ_API const uint8_t *z_tp_dispatcher_emit_context_restore(TPDispatcher *tpd,\n                                                          size_t *size) {\n    *size = tpd->context_restore_len;\n    return (const uint8_t *)tpd->context_restore;\n}\n\nZ_API const uint8_t *z_tp_dispatcher_emit_bitmap(TPDispatcher *tpd,\n                                                 size_t *size, addr_t addr,\n                                                 GPRState state) {\n#define __EMIT_TP_FOR_REG(REG)                                               \\\n    do {                                                                     \\\n        if (state & GPRSTATE_##REG) {                                        \\\n            return __tp_code_emit(tpd->bitmap_##REG, AFL_BB_ID(addr), size); \\\n        }                                                                    \\\n    } while (0)\n\n    CAPSTONE_FORALL_GPR(__EMIT_TP_FOR_REG);\n\n#undef __EMIT_TP_FOR_REG\n\n    return __tp_code_emit(tpd->bitmap, AFL_BB_ID(addr), size);\n}\n"
  },
  {
    "path": "src/tp_dispatcher.h",
    "content": "/*\n * tp_dispatcher.h\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n#ifndef __TP_DISPATCHER_H\n#define __TP_DISPATCHER_H\n\n#include \"capstone_.h\"\n#include \"config.h\"\n\n// XXX: we avoid using Buffer as raw pointer can be faster. Note that TP_EMIT\n// will be invocated during fuzzing.\ntypedef struct tp_code_t {\n    uint8_t *code;\n    size_t len;\n    size_t capacity;\n    uint32_t *id_hole;\n    uint32_t *shr_id_hole;\n} TPCode;\n\nSTRUCT(TPDispatcher, {\n    uint8_t *context_save;\n    size_t context_save_len;\n\n    uint8_t *context_restore;\n    size_t context_restore_len;\n\n    TPCode *bitmap;\n\n    TPCode *bitmap_RAX;\n    TPCode *bitmap_RBX;\n    TPCode *bitmap_RCX;\n    TPCode *bitmap_RDX;\n    TPCode *bitmap_RBP;\n    TPCode *bitmap_RDI;\n    TPCode *bitmap_RSI;\n    TPCode *bitmap_R8;\n    TPCode *bitmap_R9;\n    TPCode *bitmap_R10;\n    TPCode *bitmap_R11;\n    TPCode *bitmap_R12;\n    TPCode *bitmap_R13;\n    TPCode *bitmap_R14;\n    TPCode *bitmap_R15;\n});\n\n/*\n * Create a tp_dispatcher.\n */\nZ_API TPDispatcher *z_tp_dispatcher_create();\n\n/*\n * Destroy a tp_dispatcher.\n */\nZ_API void z_tp_dispatcher_destroy(TPDispatcher *tpd);\n\n/*\n * Emit a Context Saving TP\n */\nZ_API const uint8_t *z_tp_dispatcher_emit_context_save(TPDispatcher *tpd,\n                                                       size_t *size);\n\n/*\n * Emit a Context Restoring TP\n */\nZ_API const uint8_t *z_tp_dispatcher_emit_context_restore(TPDispatcher *tpd,\n                                                          size_t *size);\n\n/*\n * Emit a bitmap TP\n */\nZ_API const uint8_t *z_tp_dispatcher_emit_bitmap(TPDispatcher *tpd,\n                                                 size_t *size, addr_t addr,\n                                                 GPRState state);\n\n#endif\n"
  },
  {
    "path": "src/trampolines/Makefile",
    "content": "SIGSTKSZ =  $(shell ../get_signal_stack_size.sh)\n\nall: bitmap context_save context_restore\n\nbitmap:\n\t$(CC) -Wall -fno-stack-protector -fpie -Os -c bitmap.c -DSIGNAL_STACK_SIZE=$(SIGSTKSZ)\n\t$(CC) -nostdlib -o bitmap.out bitmap.o -Wl,--entry=_entry\n\tobjcopy --dump-section .text=bitmap.bin bitmap.out\n\txxd -i bitmap.bin > bitmap_bin.c\n\treadelf -s bitmap.o | grep __BITMAP_ |  awk '{print \"const size_t \" $$8 \" = 0x\" $$2 \";\"}' >> bitmap_bin.c\n\techo \"const unsigned int bitmap_id_hole = 0xDEAD;\" >> bitmap_bin.c\n\techo \"const unsigned int bitmap_shr_id_hole = 0xBEEF;\" >> bitmap_bin.c\n\ncontext_save:\n\t$(CC) -Wall -fno-stack-protector -fpie -Os -c context_save.c -DSIGNAL_STACK_SIZE=$(SIGSTKSZ)\n\t$(CC) -nostdlib -o context_save.out context_save.o -Wl,--entry=_entry\n\tobjcopy --dump-section .text=context_save.bin context_save.out\n\txxd -i context_save.bin > context_save_bin.c\n\ncontext_restore:\n\t$(CC) -Wall -fno-stack-protector -fpie -Os -c context_restore.c -DSIGNAL_STACK_SIZE=$(SIGSTKSZ)\n\t$(CC) -nostdlib -o context_restore.out context_restore.o -Wl,--entry=_entry\n\tobjcopy --dump-section .text=context_restore.bin context_restore.out\n\txxd -i context_restore.bin > context_restore_bin.c\n\nclean:\n\trm -rf *.o *_bin.c *.bin *.out\n\n\nSOURCES += bitmap.c context_save.c context_restore.c\nHDEADERS += trampolines.h\n\nformat:\n\tclang-format -sort-includes -style=file -i $(SOURCES)\n\tclang-format -sort-includes -style=file -i $(HDEADERS)\n"
  },
  {
    "path": "src/trampolines/bitmap.c",
    "content": "/*\n * bitmap.c\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n#include \"../afl_config.h\"\n\n#define __BITMAP_FOR_REG(REG)                                          \\\n    /****************************************************************/ \\\n    /* set symbol name */                                              \\\n    \".globl __BITMAP_\" STRING(REG) \"\\n\"                                \\\n    \".type __BITMAP_\" STRING(REG) \",@function\\n\"                       \\\n    \"__BITMAP_\" STRING(REG)\":\\n\"                                       \\\n    /* get prev_id */                                                  \\\n    \"\\tmov \" STRING(REG) \", [\" STRING(AFL_PREV_ID_PTR) \"];\\n\"          \\\n    /* inc bitmap */                                                   \\\n    \"\\txor \" STRING(REG) \", 0xDEAD;\\n\"                                 \\\n    \"\\tinc BYTE PTR [\" STRING(AFL_MAP_ADDR) \" + \" STRING(REG) \"];\\n\"   \\\n    /* update prev_id */                                               \\\n    \"\\tmov QWORD PTR [\" STRING(AFL_PREV_ID_PTR) \"], 0xBEEF;\\n\"         \\\n    /* set symbol end  */                                              \\\n    \".globl __BITMAP_\" STRING(REG) \"_END\\n\"                            \\\n    \".type __BITMAP_\" STRING(REG) \"_END,@function\\n\"                   \\\n    \"__BITMAP_\" STRING(REG)\"_END:\\n\"                                   \\\n    /****************************************************************/\n\nasm(\".intel_syntax noprefix\\n\"\n    \".globl _entry\\n\"\n    \".type _entry,@function\\n\"\n    \"_entry:\\n\"\n\n    __BITMAP_FOR_REG(RAX)  // FORCE NEWLINE\n    __BITMAP_FOR_REG(RBX)  // FORCE NEWLINE\n    __BITMAP_FOR_REG(RCX)  // FORCE NEWLINE\n    __BITMAP_FOR_REG(RDX)  // FORCE NEWLINE\n    __BITMAP_FOR_REG(RDI)  // FORCE NEWLINE\n    __BITMAP_FOR_REG(RSI)  // FORCE NEWLINE\n    __BITMAP_FOR_REG(RBP)  // FORCE NEWLINE\n    __BITMAP_FOR_REG(R8)   // FORCE NEWLINE\n    __BITMAP_FOR_REG(R9)   // FORCE NEWLINE\n    __BITMAP_FOR_REG(R10)  // FORCE NEWLINE\n    __BITMAP_FOR_REG(R11)  // FORCE NEWLINE\n    __BITMAP_FOR_REG(R12)  // FORCE NEWLINE\n    __BITMAP_FOR_REG(R13)  // FORCE NEWLINE\n    __BITMAP_FOR_REG(R14)  // FORCE NEWLINE\n    __BITMAP_FOR_REG(R15)  // FORCE NEWLINE\n);\n\n#undef __BITMAP_FOR_REG\n"
  },
  {
    "path": "src/trampolines/context_restore.c",
    "content": "/*\n * context_restor.c\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n/*\n * COPY FROM AFL\n *\n * - popf is *awfully* slow, which is why we're doing the lahf / sahf +\n *  overflow test trick. Unfortunately, this forces us to taint eax / rax, but\n *  this dependency on a commonly-used register still beats the alternative of\n *  using pushf / popf.\n *\n *  One possible optimization is to avoid touching flags by using a circular\n *  buffer that stores just a sequence of current locations, with the XOR stuff\n *  happening offline. Alas, this doesn't seem to have a huge impact:\n *\n *  https://groups.google.com/d/msg/afl-users/MsajVf4fRLo/2u6t88ntUBIJ\n */\n\n/*\n * IT SEEMS PUSH/POP generate register is a little bit faster than MOV RSP\n */\n\nasm(\".intel_syntax noprefix\\n\"\n    \".globl _entry\\n\"\n    \".type _entry,@function\\n\"\n    \"_entry:\\n\"\n\n    // restore EFLAGS\n    \"\\tadd al, 127;\\n\"\n    \"\\tsahf;\\n\"\n\n    // restore rdi and rax\n    \"\\tmov rax, [rsp - 144];\\n\");\n"
  },
  {
    "path": "src/trampolines/context_save.c",
    "content": "/*\n * context_save.c\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n/*\n * COPY FROM AFL\n *\n * - popf is *awfully* slow, which is why we're doing the lahf / sahf +\n *  overflow test trick. Unfortunately, this forces us to taint eax / rax, but\n *  this dependency on a commonly-used register still beats the alternative of\n *  using pushf / popf.\n *\n *  One possible optimization is to avoid touching flags by using a circular\n *  buffer that stores just a sequence of current locations, with the XOR stuff\n *  happening offline. Alas, this doesn't seem to have a huge impact:\n *\n *  https://groups.google.com/d/msg/afl-users/MsajVf4fRLo/2u6t88ntUBIJ\n */\n\n/*\n * IT SEEMS PUSH/POP generate register is a little bit faster than MOV RSP\n */\n\nasm(\".intel_syntax noprefix\\n\"\n    \".globl _entry\\n\"\n    \".type _entry,@function\\n\"\n    \"_entry:\\n\"\n\n    // store rdi and rax\n    \"\\tmov [rsp - 144], rax;\\n\"\n\n    // store EFLAGS\n    \"\\tlahf;\\n\"\n    \"\\tseto al;\\n\");\n"
  },
  {
    "path": "src/trampolines/trampolines.h",
    "content": "/*\n * trampolines.h\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n#ifndef __TRAMPOLINES_H\n#define __TRAMPOLINES_H\n\n#include \"bitmap_bin.c\"\n#include \"context_restore_bin.c\"\n#include \"context_save_bin.c\"\n\n#define BITMAP_REG X86_INS_RDI\n\n#endif\n"
  },
  {
    "path": "src/ucfg_analyzer.c",
    "content": "/*\n * ucfg_analyzer.c\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n#include \"ucfg_analyzer.h\"\n#include \"elf_.h\"\n#include \"iterator.h\"\n#include \"library_functions/library_functions.h\"\n#include \"utils.h\"\n\n// XXX: there are three types of UCFG edges:\n//  DIRECT_UEDGE              : call edges to the callee\n//  INTRA_UEDGE               : call-fallthrough edges\n//  DIRECT_UEDGE | INTRA_UEDGE: other control flow edges\ntypedef enum ucfg_edge_t {\n    DIRECT_UEDGE = (1 << 0),\n    INTRA_UEDGE = (1 << 1),\n} UEdge;\n\n#define __UCFG_ANALYZER_GHASHTABLE_GET_BUFFER(t, k)        \\\n    ({                                                     \\\n        Buffer *buf = (Buffer *)g_hash_table_lookup(t, k); \\\n        if (!buf) {                                        \\\n            buf = z_buffer_create(NULL, 0);                \\\n            g_hash_table_insert(t, k, (gpointer)buf);      \\\n        }                                                  \\\n        buf;                                               \\\n    })\n\n/*\n * Initial analysis for each instruction (calculate direct successors and\n * predecessors)\n */\nZ_PRIVATE void __ucfg_analyzer_init_analyze(UCFG_Analyzer *a, addr_t addr,\n                                            const cs_insn *inst);\n\n/*\n * Advanced analysis for each instruction (gpr & flg's use-def)\n */\nZ_PRIVATE void __ucfg_analyzer_advance_analyze(UCFG_Analyzer *a, addr_t addr,\n                                               const cs_insn *inst);\n\n/*\n * Use-def analysis for eflag reigster\n */\nZ_PRIVATE void __ucfg_analyzer_analyze_flg(UCFG_Analyzer *a, addr_t addr,\n                                           const cs_insn *inst);\n\n/*\n * Use-def analysis for general purpose register\n */\nZ_PRIVATE void __ucfg_analyzer_analyze_gpr(UCFG_Analyzer *a, addr_t addr,\n                                           const cs_insn *inst);\n\n/*\n * Returning / non-returning functions analysis: whether a given inst (at addr)\n * can reach a RET instruction via intra-procedure edges\n */\nZ_PRIVATE void __ucfg_analyzer_analyze_ret(UCFG_Analyzer *a, addr_t addr,\n                                           const cs_insn *inst);\n\n/*\n * Reachability analysis for security check failed functions: whether a given\n * inst (at addr) can reach a security-chk-failed PLT call without any condition\n * and indirect edge\n */\nZ_PRIVATE void __ucfg_analyzer_analyze_sec_chk(UCFG_Analyzer *a, addr_t addr,\n                                               const cs_insn *inst);\n\n/*\n * Add predecessor and successor relation\n */\nZ_PRIVATE void __ucfg_analyzer_new_pred_and_succ(UCFG_Analyzer *a,\n                                                 addr_t src_addr,\n                                                 addr_t dst_addr, UEdge edge);\n\n/*\n * Check whether two instructions are consistent, so that simply replacing one\n * with another one will not influence current analysis result\n */\nZ_PRIVATE bool __ucfg_analyzer_check_consistent(const cs_insn *inst_alice,\n                                                const cs_insn *inst_bob);\n\nZ_PRIVATE void __ucfg_analyzer_analyze_sec_chk(UCFG_Analyzer *a, addr_t addr,\n                                               const cs_insn *inst) {\n    // this addr cannot be in a->sec_chk_failed now\n    assert(!g_hash_table_lookup(a->sec_chk_failed, GSIZE_TO_POINTER(addr)));\n\n    ELF *e = z_binary_get_elf(a->binary);\n\n    GQueue *queue = g_queue_new();  // queue for back trace\n\n    // step (1). check whether current address is a sec_check_failed function.\n    // Any other call wouldbe invalid.\n    if (z_capstone_is_call(inst)) {\n        const cs_detail *detail = inst->detail;\n        if (detail->x86.op_count != 1) {\n            return;\n        }\n\n        const cs_x86_op *op = &(detail->x86.operands[0]);\n        if (op->type != X86_OP_IMM) {\n            return;\n        }\n\n        const addr_t callee_addr = op->imm;\n        const LFuncInfo *callee_info = z_elf_get_plt_info(e, callee_addr);\n        if (!callee_info) {\n            return;\n        }\n\n        // see https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling for\n        // C++ mangling rules\n        if ((z_strstr(callee_info->name, \"__asan_report\")) ||\n            (z_strstr(callee_info->name, \"__stack_chk_fail\")) ||\n            (z_strncmp(callee_info->name, \"_Z\", 2) &&\n             z_strstr(callee_info->name, \"__asan\") &&\n             z_strstr(callee_info->name, \"Report\"))) {\n            // it is a sec_chk_failed PLT call\n            z_trace(\"find a sec_chk_failed instruction: %#lx\", addr);\n            g_hash_table_add(a->sec_chk_failed, GSIZE_TO_POINTER(addr));\n            g_queue_push_tail(queue, GSIZE_TO_POINTER(addr));\n        }\n    } else if (z_capstone_is_cjmp(inst) || z_capstone_is_loop(inst) ||\n               z_capstone_is_xbegin(inst) || z_capstone_is_ret(inst)) {\n        // these instructions cannot belong to a sec_chk_failed block\n        return;\n    } else {\n        Buffer *succ_addrs = z_ucfg_analyzer_get_intra_successors(a, addr);\n        size_t succ_n = z_buffer_get_size(succ_addrs) / sizeof(addr_t);\n        if (succ_n != 1) {\n            // we only consider those instructions with only one successor\n            return;\n        }\n\n        addr_t succ_addr = *((addr_t *)z_buffer_get_raw_buf(succ_addrs));\n        if (!g_hash_table_lookup(a->sec_chk_failed,\n                                 GSIZE_TO_POINTER(succ_addr))) {\n            return;\n        }\n\n        // it belongs to a sec_chk_failed block\n        z_trace(\"find a sec_chk_failed instruction: %#lx\", addr);\n        g_hash_table_add(a->sec_chk_failed, GSIZE_TO_POINTER(addr));\n        g_queue_push_tail(queue, GSIZE_TO_POINTER(addr));\n    }\n\n    // step (2). check all the possible predecessors\n    while (!g_queue_is_empty(queue)) {\n        addr_t cur_addr = (addr_t)g_queue_pop_head(queue);\n        assert(\n            g_hash_table_lookup(a->sec_chk_failed, GSIZE_TO_POINTER(cur_addr)));\n\n        Buffer *pred_addrs_buf =\n            z_ucfg_analyzer_get_intra_predecessors(a, cur_addr);\n        size_t pred_n = z_buffer_get_size(pred_addrs_buf) / sizeof(addr_t);\n        addr_t *pred_addrs = (addr_t *)z_buffer_get_raw_buf(pred_addrs_buf);\n        for (int i = 0; i < pred_n; i++) {\n            addr_t pred_addr = pred_addrs[i];\n\n            const cs_insn *pred_inst = (const cs_insn *)g_hash_table_lookup(\n                a->insts, GSIZE_TO_POINTER(pred_addr));\n            // pred_inst cannot be NULL\n            assert(pred_inst);\n\n            // step (2.1). check the type of pred_inst\n            if (z_capstone_is_call(pred_inst) ||\n                z_capstone_is_cjmp(pred_inst) ||\n                z_capstone_is_xbegin(pred_inst) ||\n                z_capstone_is_ret(pred_inst) || z_capstone_is_loop(pred_inst)) {\n                continue;\n            }\n\n            // step (2.2). check the number of succ\n            size_t succ_n =\n                z_buffer_get_size(\n                    z_ucfg_analyzer_get_intra_successors(a, pred_addr)) /\n                sizeof(addr_t);\n            if (succ_n != 1) {\n                continue;\n            }\n\n            // step (2.3). add into queue if not find before\n            if (!g_hash_table_lookup(a->sec_chk_failed,\n                                     GSIZE_TO_POINTER(pred_addr))) {\n                z_trace(\"find a sec_chk_failed instruction: %#lx\", pred_addr);\n                g_hash_table_add(a->sec_chk_failed,\n                                 GSIZE_TO_POINTER(pred_addr));\n                g_queue_push_tail(queue, GSIZE_TO_POINTER(pred_addr));\n            }\n        }\n    }\n}\n\nZ_PRIVATE void __ucfg_analyzer_analyze_ret(UCFG_Analyzer *a, addr_t addr,\n                                           const cs_insn *inst) {\n    if (a->opts->disable_callthrough) {\n        return;\n    }\n\n    // this addr cannot be in a->can_ret now\n    assert(!g_hash_table_lookup(a->can_ret, GSIZE_TO_POINTER(addr)));\n\n    // step (1). add intra-procedure edges if inst is calling a returning\n    // function\n    if (z_capstone_is_call(inst)) {\n        Buffer *succ_buf = z_ucfg_analyzer_get_intra_successors(a, addr);\n\n        if (!z_buffer_get_size(succ_buf)) {\n            // XXX: no intra-procedure successor found\n            cs_detail *detail = inst->detail;\n\n            if ((detail->x86.op_count == 1) &&\n                (detail->x86.operands[0].type == X86_OP_IMM)) {\n                addr_t callee_addr = detail->x86.operands[0].imm;\n                if (callee_addr != addr + inst->size &&\n                    g_hash_table_lookup(a->can_ret,\n                                        GSIZE_TO_POINTER(callee_addr))) {\n                    // XXX: avoid duplicated edges\n                    z_trace(\"call-fallthrough: %#lx -> %#lx\", addr,\n                            addr + inst->size);\n\n                    __ucfg_analyzer_new_pred_and_succ(\n                        a, addr, addr + inst->size, INTRA_UEDGE);\n                    if (z_unlikely(!z_buffer_get_size(succ_buf))) {\n                        EXITME(\"invalid intra-procedure successors\");\n                    }\n                }\n            }\n        }\n    }\n\n    // step (2). check whether current address is returnable\n    GQueue *queue = g_queue_new();  // queue for back trace\n    {\n        if (z_capstone_is_ret(inst)) {\n            // it is a RET instruction\n            g_hash_table_add(a->can_ret, GSIZE_TO_POINTER(addr));\n            g_queue_push_tail(queue, GSIZE_TO_POINTER(addr));\n        } else {\n            // this is all intra-procedure success\n            Iter(addr_t, intra_succs);\n            z_iter_init_from_buf(intra_succs,\n                                 z_ucfg_analyzer_get_intra_successors(a, addr));\n\n            // other instructions\n            while (!z_iter_is_empty(intra_succs)) {\n                addr_t succ_addr = *(z_iter_next(intra_succs));\n                if (g_hash_table_lookup(a->can_ret,\n                                        GSIZE_TO_POINTER(succ_addr))) {\n                    g_hash_table_add(a->can_ret, GSIZE_TO_POINTER(addr));\n                    g_queue_push_tail(queue, GSIZE_TO_POINTER(addr));\n                    break;\n                }\n            }\n            z_iter_destroy(intra_succs);\n        }\n    }\n\n    // step (3). update all predecessors\n    while (!g_queue_is_empty(queue)) {\n        addr_t cur_addr = (addr_t)g_queue_pop_head(queue);\n        z_trace(\"find returanable address: %#lx\", cur_addr);\n        assert(g_hash_table_lookup(a->can_ret, GSIZE_TO_POINTER(cur_addr)));\n\n        // step (3.1). first update calls if cur_addr is a function entrypoint\n        Iter(addr_t, direct_preds);\n        z_iter_init_from_buf(\n            direct_preds, z_ucfg_analyzer_get_direct_predecessors(a, cur_addr));\n        while (!z_iter_is_empty(direct_preds)) {\n            addr_t pred_addr = *(z_iter_next(direct_preds));\n            const cs_insn *pred_inst = (const cs_insn *)g_hash_table_lookup(\n                a->insts, GSIZE_TO_POINTER(pred_addr));\n            // pred_inst cannot be NULL\n            assert(pred_inst);\n\n            if (!z_capstone_is_call(pred_inst)) {\n                continue;\n            }\n\n            addr_t call_addr = pred_addr;\n            addr_t fallthrough_addr = call_addr + pred_inst->size;\n\n            if (fallthrough_addr == cur_addr) {\n                // XXX: avoid duplicated edges\n                continue;\n            }\n\n            if (z_buffer_get_size(\n                    z_ucfg_analyzer_get_intra_successors(a, call_addr))) {\n                continue;\n            }\n\n            __ucfg_analyzer_new_pred_and_succ(a, call_addr, fallthrough_addr,\n                                              INTRA_UEDGE);\n            if (g_hash_table_lookup(a->can_ret,\n                                    GSIZE_TO_POINTER(fallthrough_addr))) {\n                z_trace(\"call-fallthrough: %#lx -> %#lx\", call_addr,\n                        fallthrough_addr);\n                g_hash_table_add(a->can_ret, GSIZE_TO_POINTER(call_addr));\n                g_queue_push_tail(queue, GSIZE_TO_POINTER(call_addr));\n            }\n        }\n        z_iter_destroy(direct_preds);\n\n        // step (3.2) update all intra-procedure predecessors\n        Iter(addr_t, intra_preds);\n        z_iter_init_from_buf(\n            intra_preds, z_ucfg_analyzer_get_intra_predecessors(a, cur_addr));\n        while (!z_iter_is_empty(intra_preds)) {\n            addr_t pred_addr = *(z_iter_next(intra_preds));\n            if (!g_hash_table_lookup(a->can_ret, GSIZE_TO_POINTER(pred_addr))) {\n                g_hash_table_add(a->can_ret, GSIZE_TO_POINTER(pred_addr));\n                g_queue_push_tail(queue, GSIZE_TO_POINTER(pred_addr));\n            }\n        }\n        z_iter_destroy(intra_preds);\n    }\n\n    // destroy queue\n    g_queue_free(queue);\n}\n\nZ_PRIVATE void __ucfg_analyzer_analyze_gpr(UCFG_Analyzer *a, addr_t addr,\n                                           const cs_insn *inst) {\n    if (a->opts->disable_opt) {\n        return;\n    }\n\n    // step (0). check whether addr is analyzed\n    if (g_hash_table_lookup(a->gpr_can_write, GSIZE_TO_POINTER(addr))) {\n        return;\n    }\n\n    // step (1). update gpr_analyzed_succs\n    {\n        // check addr's succs\n        Buffer *succs = z_ucfg_analyzer_get_direct_successors(a, addr);\n        assert(succs != NULL);\n        size_t succ_n = z_buffer_get_size(succs) / sizeof(addr_t);\n        addr_t *succs_array = (addr_t *)z_buffer_get_raw_buf(succs);\n\n        size_t analyzed_succ_n = 0;\n        for (int i = 0; i < succ_n; i++) {\n            if (g_hash_table_lookup(a->gpr_can_write,\n                                    GSIZE_TO_POINTER(succs_array[i]))) {\n                analyzed_succ_n += 1;\n            }\n        }\n        g_hash_table_insert(a->gpr_analyzed_succs, GSIZE_TO_POINTER(addr),\n                            GSIZE_TO_POINTER(analyzed_succ_n));\n\n        // update addr's direct preds\n        Buffer *preds = z_ucfg_analyzer_get_direct_predecessors(a, addr);\n        assert(preds != NULL);\n        size_t pred_n = z_buffer_get_size(preds) / sizeof(addr_t);\n        addr_t *preds_array = (addr_t *)z_buffer_get_raw_buf(preds);\n        for (int i = 0; i < pred_n; i++) {\n            addr_t pred = preds_array[i];\n            size_t pred_analyzed_succs = (size_t)g_hash_table_lookup(\n                a->gpr_analyzed_succs, GSIZE_TO_POINTER(pred));\n            g_hash_table_insert(a->gpr_analyzed_succs, GSIZE_TO_POINTER(pred),\n                                GSIZE_TO_POINTER(pred_analyzed_succs + 1));\n        }\n    }\n\n    // step (2). push addr into analysis queue\n    GQueue *queue = g_queue_new();\n    g_queue_push_tail(queue, GSIZE_TO_POINTER(addr));\n\n    // step (3). do analysis and propogate the result\n    while (!g_queue_is_empty(queue)) {\n        // step (3.1). pop from queue and get basic information\n        addr_t cur_addr = (addr_t)g_queue_pop_head(queue);\n\n        Buffer *preds = z_ucfg_analyzer_get_direct_predecessors(a, cur_addr);\n        assert(preds != NULL);\n        size_t pred_n = z_buffer_get_size(preds) / sizeof(addr_t);\n\n        Buffer *succs = z_ucfg_analyzer_get_direct_successors(a, cur_addr);\n        assert(succs != NULL);\n        size_t succ_n = z_buffer_get_size(succs) / sizeof(addr_t);\n\n        RegState *rs = (RegState *)g_hash_table_lookup(\n            a->reg_states, GSIZE_TO_POINTER(cur_addr));\n        // XXX: a good observation is that for a given address, its known\n        // successors must be added before it. And according to the logic of\n        // z_ucfg_analyzer_add_inst, any instruction will be analyzed once it is\n        // added into analyzer. Hence, we can sure any instruction in the queue\n        // is already analyzed (except addr itself).\n        assert(rs != NULL);\n\n        // step (3.2). calculate succs_can_write\n        size_t analyzed_succ_n = (size_t)g_hash_table_lookup(\n            a->gpr_analyzed_succs, GSIZE_TO_POINTER(cur_addr));\n        assert(succ_n >= analyzed_succ_n);\n\n        GPRState succs_can_write = GPRSTATE_ALL + 1;\n\n        if (succ_n != 0 && succ_n == analyzed_succ_n) {\n            // assume succs_can_write all registers\n            succs_can_write |= GPRSTATE_ALL;\n\n            // all succs are analyzed\n            addr_t *succs_array = (addr_t *)z_buffer_get_raw_buf(succs);\n            for (int i = 0; i < succ_n; i++) {\n                GPRState succ_can_write = 0;\n                if (cur_addr == succs_array[i]) {\n                    // handle self-loop!\n                    succ_can_write = GPRSTATE_ALL + 1;\n                } else {\n                    succ_can_write = (GPRState)g_hash_table_lookup(\n                        a->gpr_can_write, GSIZE_TO_POINTER(succs_array[i]));\n                }\n                assert(succ_can_write);\n                succs_can_write &= succ_can_write;\n            }\n        }\n\n        // step (3.3). calcualte can_write for cur_addr.\n        // According to datalog disassembly\n        // (https://www.usenix.org/conference/usenixsecurity20/presentation/flores-montoya)\n        // section 5.1, the x64 architecture zeroes the upper part of 64 bits\n        // registers whenever the corresponding 32 bits register is written.\n        GPRState can_write = GPRSTATE_ALL + 1;\n        can_write |= rs->gpr_write_32_64 | succs_can_write;\n        can_write &= (~rs->gpr_read);\n\n        // step (3.4). update predecessors\n        GPRState ori_can_write = (GPRState)g_hash_table_lookup(\n            a->gpr_can_write, GSIZE_TO_POINTER(cur_addr));\n        if (ori_can_write != can_write) {\n            assert((uint64_t)can_write > (uint64_t)ori_can_write);\n            addr_t *preds_array = (addr_t *)z_buffer_get_raw_buf(preds);\n            for (int i = 0; i < pred_n; i++) {\n                g_queue_push_tail(queue, GSIZE_TO_POINTER(preds_array[i]));\n            }\n            // update can_write\n            g_hash_table_insert(a->gpr_can_write, GSIZE_TO_POINTER(cur_addr),\n                                GSIZE_TO_POINTER(can_write));\n        }\n    }\n\n    g_queue_free(queue);\n\n    return;\n}\n\nZ_PRIVATE void __ucfg_analyzer_analyze_flg(UCFG_Analyzer *a, addr_t addr,\n                                           const cs_insn *inst) {\n    if (a->opts->disable_opt) {\n        return;\n    }\n\n    // step (0). check whether addr is analyzed\n    if (g_hash_table_lookup(a->flg_need_write, GSIZE_TO_POINTER(addr))) {\n        return;\n    }\n    GQueue *queue = g_queue_new();\n\n    // step (1). check whether it is ready to analyze\n    {\n        Buffer *succs = z_ucfg_analyzer_get_direct_successors(a, addr);\n        assert(succs != NULL);\n        size_t succ_n = z_buffer_get_size(succs) / sizeof(addr_t);\n        addr_t *succs_array = (addr_t *)z_buffer_get_raw_buf(succs);\n\n        // step (1.1). update flg_finished succs\n        size_t finished_succ_n = 0;\n        for (int i = 0; i < succ_n; i++) {\n            if (g_hash_table_lookup(a->flg_need_write,\n                                    GSIZE_TO_POINTER(succs_array[i]))) {\n                finished_succ_n += 1;\n            }\n        }\n        g_hash_table_insert(a->flg_finished_succs, GSIZE_TO_POINTER(addr),\n                            GSIZE_TO_POINTER(finished_succ_n));\n\n        RegState *rs = (RegState *)g_hash_table_lookup(a->reg_states,\n                                                       GSIZE_TO_POINTER(addr));\n        assert(rs != NULL);\n\n        // step (1.2). check whether it is ready\n        if (rs->flg_write == FLGSTATE_ALL || rs->flg_read == FLGSTATE_ALL) {\n            // case A: writing/reading all means it is ready to analyze\n            g_queue_push_tail(queue, GSIZE_TO_POINTER(addr));\n        } else if (z_capstone_is_call(inst) || z_capstone_is_ret(inst)) {\n            // case B: we are trying to do an intra-procedure analysis\n            g_queue_push_tail(queue, GSIZE_TO_POINTER(addr));\n        } else if (succ_n == 0) {\n            // case C: for instruction without successors, it is ready to\n            // analyze\n            g_queue_push_tail(queue, GSIZE_TO_POINTER(addr));\n        } else if (succ_n == finished_succ_n) {\n            // case D: all successors are done with analysis (it actually can be\n            // mergied into case C, but for clarity we set it as an individual\n            // case)\n            g_queue_push_tail(queue, GSIZE_TO_POINTER(addr));\n        }\n    }\n\n    // step (2). do analysis and propagate the result\n    while (!g_queue_is_empty(queue)) {\n        // step (2.1). pop from queue and set a flag on result (distinguished\n        // from non-existed key)\n        addr_t cur_addr = (addr_t)g_queue_pop_head(queue);\n        const cs_insn *cur_inst = (const cs_insn *)g_hash_table_lookup(\n            a->insts, GSIZE_TO_POINTER(cur_addr));\n        assert(cur_inst);\n\n        FLGState need_write = FLGSTATE_ALL + 1;\n        assert(!g_hash_table_lookup(a->flg_need_write,\n                                    GSIZE_TO_POINTER(cur_addr)));\n\n        // step (2.2). basic infomration\n        Buffer *preds = z_ucfg_analyzer_get_direct_predecessors(a, cur_addr);\n        assert(preds != NULL);\n        size_t pred_n = z_buffer_get_size(preds) / sizeof(addr_t);\n\n        Buffer *succs = z_ucfg_analyzer_get_direct_successors(a, cur_addr);\n        assert(succs != NULL);\n        size_t succ_n = z_buffer_get_size(succs) / sizeof(addr_t);\n\n        RegState *rs = (RegState *)g_hash_table_lookup(\n            a->reg_states, GSIZE_TO_POINTER(cur_addr));\n        assert(rs != NULL);\n\n        // step (2.3). calculate need to write\n        if (rs->flg_write == FLGSTATE_ALL) {\n            // case A.1: write all\n            need_write |= 0;\n        } else if (rs->flg_read == FLGSTATE_ALL) {\n            // case A.2: read all\n            need_write |= FLGSTATE_ALL;\n        } else if (z_capstone_is_call(cur_inst) ||\n                   z_capstone_is_ret(cur_inst)) {\n            // case B: call & ret\n            need_write |= 0;\n        } else if (succ_n == 0) {\n            // case C: no successors\n            need_write |= FLGSTATE_ALL;\n        } else if (succ_n ==\n                   (size_t)g_hash_table_lookup(a->flg_finished_succs,\n                                               GSIZE_TO_POINTER(cur_addr))) {\n            FLGState post_need_write = 0;\n            addr_t *succs_array = (addr_t *)z_buffer_get_raw_buf(succs);\n            for (int i = 0; i < succ_n; i++) {\n                FLGState succ_need_write = (FLGState)g_hash_table_lookup(\n                    a->flg_need_write, GSIZE_TO_POINTER(succs_array[i]));\n                assert(succ_need_write);\n                post_need_write |= succ_need_write;\n            }\n            need_write |= post_need_write & (FLGSTATE_ALL ^ rs->flg_write);\n        } else {\n            EXITME(\"incomplete address in analysis: %#lx\", cur_addr);\n        }\n\n        // step (2.4). do not forget flag read by it self\n        need_write |= rs->flg_read;\n\n        // step (2.5). update need_write\n        g_hash_table_insert(a->flg_need_write, GSIZE_TO_POINTER(cur_addr),\n                            GSIZE_TO_POINTER(need_write));\n\n        // step (2.6). update predecessors' information\n        addr_t *preds_array = (addr_t *)z_buffer_get_raw_buf(preds);\n        for (int i = 0; i < pred_n; i++) {\n            addr_t pred = preds_array[i];\n            // it is very important to check whether pred is analyzed\n            if (g_hash_table_lookup(a->flg_need_write,\n                                    GSIZE_TO_POINTER(pred))) {\n                continue;\n            }\n            size_t pred_finish_succs = (size_t)g_hash_table_lookup(\n                a->flg_finished_succs, GSIZE_TO_POINTER(pred));\n            pred_finish_succs += 1;\n            g_hash_table_insert(a->flg_finished_succs, GSIZE_TO_POINTER(pred),\n                                GSIZE_TO_POINTER(pred_finish_succs));\n            if (pred_finish_succs ==\n                (size_t)(z_buffer_get_size(\n                             z_ucfg_analyzer_get_direct_successors(a, pred)) /\n                         sizeof(addr_t))) {\n                g_queue_push_tail(queue, GSIZE_TO_POINTER(pred));\n            }\n        }\n    }\n\n    g_queue_free(queue);\n}\n\nZ_PRIVATE void __ucfg_analyzer_advance_analyze(UCFG_Analyzer *a, addr_t addr,\n                                               const cs_insn *inst) {\n    __ucfg_analyzer_analyze_flg(a, addr, inst);\n    __ucfg_analyzer_analyze_gpr(a, addr, inst);\n    __ucfg_analyzer_analyze_ret(a, addr, inst);\n    __ucfg_analyzer_analyze_sec_chk(a, addr, inst);\n}\n\nZ_PRIVATE bool __ucfg_analyzer_check_consistent(const cs_insn *inst_alice,\n                                                const cs_insn *inst_bob) {\n    // check size\n    if (inst_alice->size != inst_bob->size) {\n        return false;\n    }\n\n    // control-flow-related instructions always change analysis result\n    {\n        const cs_insn *inst = inst_alice;\n        if (z_capstone_is_jmp(inst) || z_capstone_is_call(inst) ||\n            z_capstone_is_xbegin(inst) || z_capstone_is_cjmp(inst) ||\n            z_capstone_is_loop(inst) || z_capstone_is_ret(inst) ||\n            z_capstone_is_terminator(inst)) {\n            z_trace(\"CFG related instructions\");\n            return false;\n        }\n    }\n\n    // first check instruction type\n    if (inst_alice->id != inst_bob->id) {\n        z_trace(\"inconsistent instruction types\");\n        return false;\n    }\n\n    cs_detail *detail_alice = inst_alice->detail;\n    cs_detail *detail_bob = inst_bob->detail;\n\n    // then check operands\n    if (detail_alice->x86.op_count != detail_bob->x86.op_count) {\n        z_trace(\"inconsistent operand count\");\n        return false;\n    }\n\n    // check individual operand\n    for (int i = 0; i < detail_alice->x86.op_count; i++) {\n        cs_x86_op *op_alice = &(detail_alice->x86.operands[i]);\n        cs_x86_op *op_bob = &(detail_bob->x86.operands[i]);\n        if (op_alice->type != op_bob->type) {\n            z_trace(\"inconsisten operand type\");\n            return false;\n        }\n        switch (op_alice->type) {\n            case X86_OP_REG:\n                if (op_alice->reg != op_bob->reg) {\n                    z_trace(\"inconsisten operand register\");\n                    return false;\n                }\n                break;\n            case X86_OP_MEM:\n                if (op_alice->mem.segment != op_bob->mem.segment) {\n                    z_trace(\"inconsisten operand mem segment\");\n                    return false;\n                }\n                if (op_alice->mem.base != op_bob->mem.base) {\n                    z_trace(\"inconsisten operand mem base\");\n                    return false;\n                }\n                if (op_alice->mem.index != op_bob->mem.index) {\n                    z_trace(\"inconsisten operand mem index\");\n                    return false;\n                }\n                break;\n            default:\n                break;\n        }\n    }\n\n    return true;\n}\n\nZ_PRIVATE void __ucfg_analyzer_new_pred_and_succ(UCFG_Analyzer *a,\n                                                 addr_t src_addr,\n                                                 addr_t dst_addr, UEdge edge) {\n#ifdef DEBUG\n\n#define __NEW_RELATION(relation, from_addr, to_addr)                         \\\n    do {                                                                     \\\n        Buffer *buf = NULL;                                                  \\\n        if (!(buf = g_hash_table_lookup(a->relation,                         \\\n                                        GSIZE_TO_POINTER(from_addr)))) {     \\\n            buf = z_buffer_create(NULL, 0);                                  \\\n            g_hash_table_insert(a->relation, GSIZE_TO_POINTER(from_addr),    \\\n                                (gpointer)buf);                              \\\n        }                                                                    \\\n                                                                             \\\n        addr_t *targets = (addr_t *)z_buffer_get_raw_buf(buf);               \\\n        size_t n = z_buffer_get_size(buf) / sizeof(addr_t);                  \\\n        for (size_t i = 0; i < n; i++) {                                     \\\n            if (targets[i] == (to_addr)) {                                   \\\n                EXITME(\"duplicated \" #relation \" for %#lx->%#lx\", from_addr, \\\n                       to_addr);                                             \\\n            }                                                                \\\n        }                                                                    \\\n                                                                             \\\n        z_buffer_append_raw(buf, (uint8_t *)&(to_addr), sizeof(to_addr));    \\\n    } while (0)\n\n#else\n\n#define __NEW_RELATION(relation, from_addr, to_addr)                      \\\n    do {                                                                  \\\n        Buffer *buf = NULL;                                               \\\n        if (!(buf = g_hash_table_lookup(a->relation,                      \\\n                                        GSIZE_TO_POINTER(from_addr)))) {  \\\n            buf = z_buffer_create(NULL, 0);                               \\\n            g_hash_table_insert(a->relation, GSIZE_TO_POINTER(from_addr), \\\n                                (gpointer)buf);                           \\\n        }                                                                 \\\n        z_buffer_append_raw(buf, (uint8_t *)&(to_addr), sizeof(to_addr)); \\\n    } while (0)\n\n#endif\n\n    if (edge & DIRECT_UEDGE) {\n        __NEW_RELATION(direct_succs, src_addr, dst_addr);\n        __NEW_RELATION(direct_preds, dst_addr, src_addr);\n    }\n\n    if (edge & INTRA_UEDGE) {\n        __NEW_RELATION(intra_succs, src_addr, dst_addr);\n        __NEW_RELATION(intra_preds, dst_addr, src_addr);\n    }\n\n    __NEW_RELATION(all_succs, src_addr, dst_addr);\n    __NEW_RELATION(all_preds, dst_addr, src_addr);\n\n#undef __NEW_RELATION\n}\n\nZ_PRIVATE void __ucfg_analyzer_init_analyze(UCFG_Analyzer *a, addr_t addr,\n                                            const cs_insn *inst) {\n    assert(inst != NULL);\n\n    cs_detail *detail = inst->detail;\n\n    if (z_capstone_is_cjmp(inst) || z_capstone_is_loop(inst)) {\n        assert((detail->x86.op_count == 1) &&\n               (detail->x86.operands[0].type == X86_OP_IMM));\n\n        // avoid dupilicated succs/preds\n        if (true) {\n            __ucfg_analyzer_new_pred_and_succ(a, addr, addr + inst->size,\n                                              DIRECT_UEDGE | INTRA_UEDGE);\n        }\n        if (detail->x86.operands[0].imm != addr + inst->size) {\n            __ucfg_analyzer_new_pred_and_succ(a, addr,\n                                              detail->x86.operands[0].imm,\n                                              DIRECT_UEDGE | INTRA_UEDGE);\n        }\n\n    } else if (z_capstone_is_jmp(inst) || z_capstone_is_xbegin(inst)) {\n        if ((detail->x86.op_count == 1) &&\n            (detail->x86.operands[0].type == X86_OP_IMM)) {\n            __ucfg_analyzer_new_pred_and_succ(a, addr,\n                                              detail->x86.operands[0].imm,\n                                              DIRECT_UEDGE | INTRA_UEDGE);\n        }\n    } else if (z_capstone_is_call(inst)) {\n        ELF *e = z_binary_get_elf(a->binary);\n        if ((detail->x86.op_count == 1) &&\n            (detail->x86.operands[0].type == X86_OP_IMM)) {\n            // get callee first\n            addr_t callee_addr = detail->x86.operands[0].imm;\n\n            // add the inter-procedure edge (the call edge)\n            __ucfg_analyzer_new_pred_and_succ(a, addr, callee_addr,\n                                              DIRECT_UEDGE);\n\n            // check plt\n            const LFuncInfo *lf_info = z_elf_get_plt_info(e, callee_addr);\n            if (lf_info && lf_info->cfg_info == LCFG_RET) {\n                if (callee_addr == addr + inst->size) {\n                    EXITME(\"invalid PLT call: \" CS_SHOW_INST(inst));\n                }\n                __ucfg_analyzer_new_pred_and_succ(a, addr, addr + inst->size,\n                                                  INTRA_UEDGE);\n            }\n        } else {\n            // let check GOT call\n            addr_t got_addr = INVALID_ADDR;\n            if (z_capstone_is_pc_related_ucall(inst, &got_addr) ||\n                (!z_elf_get_is_pie(e) &&\n                 z_capstone_is_const_mem_ucall(inst, &got_addr))) {\n                const LFuncInfo *lf_info = z_elf_get_got_info(e, got_addr);\n                if (lf_info && lf_info->cfg_info == LCFG_RET) {\n                    __ucfg_analyzer_new_pred_and_succ(\n                        a, addr, addr + inst->size, INTRA_UEDGE);\n                }\n            }\n        }\n    } else if (z_capstone_is_terminator(inst)) {\n        // do nothing for terminator\n    } else {\n        __ucfg_analyzer_new_pred_and_succ(a, addr, addr + inst->size,\n                                          DIRECT_UEDGE | INTRA_UEDGE);\n    }\n}\n\nZ_API UCFG_Analyzer *z_ucfg_analyzer_create(Binary *binary,\n                                            RewritingOptArgs *opts) {\n    UCFG_Analyzer *a = STRUCT_ALLOC(UCFG_Analyzer);\n\n    a->binary = binary;\n\n    a->opts = opts;\n\n    a->insts = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL);\n\n    a->reg_states = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL,\n                                          (GDestroyNotify)(&z_free));\n\n    a->direct_preds =\n        g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL,\n                              (GDestroyNotify)(&z_buffer_destroy));\n    a->direct_succs =\n        g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL,\n                              (GDestroyNotify)(&z_buffer_destroy));\n    a->intra_preds = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL,\n                                           (GDestroyNotify)(&z_buffer_destroy));\n    a->intra_succs = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL,\n                                           (GDestroyNotify)(&z_buffer_destroy));\n    a->all_preds = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL,\n                                         (GDestroyNotify)(&z_buffer_destroy));\n    a->all_succs = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL,\n                                         (GDestroyNotify)(&z_buffer_destroy));\n\n    a->flg_finished_succs =\n        g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL);\n    a->flg_need_write =\n        g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL);\n\n    a->gpr_analyzed_succs =\n        g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL);\n    a->gpr_can_write =\n        g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL);\n\n    a->can_ret =\n        g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL);\n\n    a->sec_chk_failed =\n        g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL);\n\n    return a;\n}\n\nZ_API void z_ucfg_analyzer_destroy(UCFG_Analyzer *a) {\n    g_hash_table_destroy(a->insts);\n    g_hash_table_destroy(a->reg_states);\n    g_hash_table_destroy(a->direct_preds);\n    g_hash_table_destroy(a->direct_succs);\n    g_hash_table_destroy(a->intra_preds);\n    g_hash_table_destroy(a->intra_succs);\n    g_hash_table_destroy(a->all_preds);\n    g_hash_table_destroy(a->all_succs);\n    g_hash_table_destroy(a->flg_finished_succs);\n    g_hash_table_destroy(a->flg_need_write);\n    g_hash_table_destroy(a->gpr_analyzed_succs);\n    g_hash_table_destroy(a->gpr_can_write);\n    g_hash_table_destroy(a->can_ret);\n    g_hash_table_destroy(a->sec_chk_failed);\n\n    z_free(a);\n}\n\nZ_API void z_ucfg_analyzer_add_inst(UCFG_Analyzer *a, addr_t addr,\n                                    const cs_insn *inst,\n                                    bool maybe_duplicated) {\n    assert(a != NULL);\n\n    if (maybe_duplicated) {\n        cs_insn *ori_inst =\n            (cs_insn *)g_hash_table_lookup(a->insts, GSIZE_TO_POINTER(addr));\n        if (ori_inst) {\n            if (!__ucfg_analyzer_check_consistent(ori_inst, inst)) {\n                EXITME(\"inconsistent instruction update \" CS_SHOW_INST(inst));\n            }\n            g_hash_table_insert(a->insts, GSIZE_TO_POINTER(addr),\n                                (gpointer)inst);\n            return;\n        }\n    }\n\n    // update insts\n    assert(!g_hash_table_lookup(a->insts, GSIZE_TO_POINTER(addr)));\n    g_hash_table_insert(a->insts, GSIZE_TO_POINTER(addr), (gpointer)inst);\n\n    // update register states\n    RegState *rs = z_capstone_get_register_state(inst);\n    g_hash_table_insert(a->reg_states, GSIZE_TO_POINTER(addr), (gpointer)rs);\n\n    /*\n     * XXX: it is important that following analysis happens in order and\n     * closely.\n     */\n    // initial analysis\n    __ucfg_analyzer_init_analyze(a, addr, inst);\n    // advanced analysis\n    __ucfg_analyzer_advance_analyze(a, addr, inst);\n}\n\nZ_API Buffer *z_ucfg_analyzer_get_direct_successors(UCFG_Analyzer *a,\n                                                    addr_t addr) {\n    assert(a != NULL);\n    return __UCFG_ANALYZER_GHASHTABLE_GET_BUFFER(a->direct_succs,\n                                                 GSIZE_TO_POINTER(addr));\n}\n\nZ_API Buffer *z_ucfg_analyzer_get_direct_predecessors(UCFG_Analyzer *a,\n                                                      addr_t addr) {\n    assert(a != NULL);\n    return __UCFG_ANALYZER_GHASHTABLE_GET_BUFFER(a->direct_preds,\n                                                 GSIZE_TO_POINTER(addr));\n}\n\nZ_API Buffer *z_ucfg_analyzer_get_intra_successors(UCFG_Analyzer *a,\n                                                   addr_t addr) {\n    assert(a != NULL);\n    return __UCFG_ANALYZER_GHASHTABLE_GET_BUFFER(a->intra_succs,\n                                                 GSIZE_TO_POINTER(addr));\n}\n\nZ_API Buffer *z_ucfg_analyzer_get_intra_predecessors(UCFG_Analyzer *a,\n                                                     addr_t addr) {\n    assert(a != NULL);\n    return __UCFG_ANALYZER_GHASHTABLE_GET_BUFFER(a->intra_preds,\n                                                 GSIZE_TO_POINTER(addr));\n}\n\nZ_API Buffer *z_ucfg_analyzer_get_all_successors(UCFG_Analyzer *a,\n                                                 addr_t addr) {\n    assert(a != NULL);\n    return __UCFG_ANALYZER_GHASHTABLE_GET_BUFFER(a->all_succs,\n                                                 GSIZE_TO_POINTER(addr));\n}\n\nZ_API Buffer *z_ucfg_analyzer_get_all_predecessors(UCFG_Analyzer *a,\n                                                   addr_t addr) {\n    assert(a != NULL);\n    return __UCFG_ANALYZER_GHASHTABLE_GET_BUFFER(a->all_preds,\n                                                 GSIZE_TO_POINTER(addr));\n}\n\nZ_API FLGState z_ucfg_analyzer_get_flg_need_write(UCFG_Analyzer *a,\n                                                  addr_t addr) {\n    FLGState state = (FLGState)g_hash_table_lookup(a->flg_need_write,\n                                                   GSIZE_TO_POINTER(addr));\n    if (!state) {\n        // there is not enough infomration to analyze this address\n        return FLGSTATE_ALL;\n    } else {\n        return state & FLGSTATE_ALL;\n    }\n}\n\nZ_API GPRState z_ucfg_analyzer_get_gpr_can_write(UCFG_Analyzer *a,\n                                                 addr_t addr) {\n    GPRState state =\n        (GPRState)g_hash_table_lookup(a->gpr_can_write, GSIZE_TO_POINTER(addr));\n    return state & GPRSTATE_ALL;\n}\n\nZ_API RegState *z_ucfg_analyzer_get_register_state(UCFG_Analyzer *a,\n                                                   addr_t addr) {\n    return (RegState *)g_hash_table_lookup(a->reg_states,\n                                           GSIZE_TO_POINTER(addr));\n}\n\nZ_API bool z_ucfg_analyzer_is_security_chk_failed(UCFG_Analyzer *a,\n                                                  addr_t addr) {\n    return !!(g_hash_table_lookup(a->sec_chk_failed, GSIZE_TO_POINTER(addr)));\n}\n\n#undef __UCFG_ANALYZER_GHASHTABLE_GET_BUFFER\n"
  },
  {
    "path": "src/ucfg_analyzer.h",
    "content": "/*\n * ucfg_analyzer.h\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n#ifndef __UCFG_ANALYZER_H\n#define __UCFG_ANALYZER_H\n\n#include \"binary.h\"\n#include \"buffer.h\"\n#include \"capstone_.h\"\n#include \"config.h\"\n#include \"sys_optarg.h\"\n\n#include <capstone/capstone.h>\n#include <gmodule.h>\n\n/*\n * Light-weight instruction-level analyzer, which aims at analyzing conservative\n * use-def relation on the Universal CFG (UCFG).\n */\nSTRUCT(UCFG_Analyzer, {\n    // basic instruction information\n    GHashTable *insts;\n\n    // register state for each instruction\n    GHashTable *reg_states;\n\n    /*\n     * successors and predecessor\n     * XXX: note that it is possible to return preds/succs for an invalid\n     * address\n     *\n     * all_preds = direct_preds U intra_preds\n     * all_succs = direct_succs U intra_succs\n     */\n    // direct/explict successors and predecessors without call-fallthrough edges\n    GHashTable *direct_preds;\n    GHashTable *direct_succs;\n    // intra-procedure successsors and predecessors\n    GHashTable *intra_preds;\n    GHashTable *intra_succs;\n    // successors and predecessors with call-fallthrough edges\n    GHashTable *all_preds;\n    GHashTable *all_succs;\n\n    // eflags register analysis\n    GHashTable *flg_finished_succs;\n    GHashTable *flg_need_write;\n\n    // general register analysis\n    GHashTable *gpr_analyzed_succs;\n    GHashTable *gpr_can_write;\n\n    // whether an inst can reach a RET inst via intra-procedure edges\n    GHashTable *can_ret;\n\n    // whether an inst can reach a security-chk-failed PLT call without any\n    // condition and indirect edges\n    GHashTable *sec_chk_failed;\n\n    // rewriting optargs\n    RewritingOptArgs *opts;\n\n    Binary *binary;\n});\n\n/*\n * Create an ucfg_analyzer\n */\nZ_API UCFG_Analyzer *z_ucfg_analyzer_create(Binary *binary,\n                                            RewritingOptArgs *opts);\n\n/*\n * Destroy an ucfg_analyzer\n */\nZ_API void z_ucfg_analyzer_destroy(UCFG_Analyzer *a);\n\n/*\n * Add a new instruction into analyzing buffer, *maybe_duplicated* means it is\n * possible that UCFG_Analyzer already analyzes this address\n */\n// XXX: note that it is ok if the predecessors of addr is unknown, which means\n// it is safe to use this function even the superset disassembly is incomplete.\nZ_API void z_ucfg_analyzer_add_inst(UCFG_Analyzer *a, addr_t addr,\n                                    const cs_insn *inst, bool maybe_duplicated);\n\n/*\n * Get succerrors without the call-fallthrough edges (return value will never be\n * NULL)\n */\nZ_API Buffer *z_ucfg_analyzer_get_direct_successors(UCFG_Analyzer *a,\n                                                    addr_t addr);\n\n/*\n * Get predecessor without the call-fallthrough edges (return value will never\n * be NULL)\n */\nZ_API Buffer *z_ucfg_analyzer_get_direct_predecessors(UCFG_Analyzer *a,\n                                                      addr_t addr);\n\n/*\n * Get intra-procedure successors\n */\nZ_API Buffer *z_ucfg_analyzer_get_intra_successors(UCFG_Analyzer *a,\n                                                   addr_t addr);\n\n/*\n * Get intra-procedure predecessors\n */\nZ_API Buffer *z_ucfg_analyzer_get_intra_predecessors(UCFG_Analyzer *a,\n                                                     addr_t addr);\n\n/*\n * Get all successors\n */\nZ_API Buffer *z_ucfg_analyzer_get_all_successors(UCFG_Analyzer *a, addr_t addr);\n\n/*\n * Get all predecessors\n */\nZ_API Buffer *z_ucfg_analyzer_get_all_predecessors(UCFG_Analyzer *a,\n                                                   addr_t addr);\n\n/*\n * Get *need-write* information for flag registers\n */\nZ_API FLGState z_ucfg_analyzer_get_flg_need_write(UCFG_Analyzer *a,\n                                                  addr_t addr);\n\n/*\n * Get *can_write* information for general purpose registers\n */\nZ_API GPRState z_ucfg_analyzer_get_gpr_can_write(UCFG_Analyzer *a, addr_t addr);\n\n/*\n * Get register state for a given addr\n */\nZ_API RegState *z_ucfg_analyzer_get_register_state(UCFG_Analyzer *a,\n                                                   addr_t addr);\n\n/*\n * Get whether an instruction belongs to a security_chk_failed block\n */\nZ_API bool z_ucfg_analyzer_is_security_chk_failed(UCFG_Analyzer *a,\n                                                  addr_t addr);\n\n#endif\n"
  },
  {
    "path": "src/utils.c",
    "content": "/*\n * utils.c\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n#include \"utils.h\"\n\n#include <errno.h>\n#include <stdarg.h>\n#include <time.h>\n\n/*\n * Lookup table function\n */\n#define __INVALID_LOOKUP_TABLE_CELL_NUM ((uint64_t)(-1L))\n\nstatic uint64_t __lookup_table_cell_num = __INVALID_LOOKUP_TABLE_CELL_NUM;\n\nvoid z_lookup_table_init_cell_num(uint64_t text_size) {\n    if (__lookup_table_cell_num != __INVALID_LOOKUP_TABLE_CELL_NUM) {\n        EXITME(\"duplicated initization for lookup table cell number\");\n    }\n    __lookup_table_cell_num = BITS_ALIGN_CELL(text_size, PAGE_SIZE_POW2);\n    if (__lookup_table_cell_num > LOOKUP_TABLE_MAX_CELL_NUM) {\n        EXITME(\"too big cell number: %#lx\", __lookup_table_cell_num);\n    }\n    z_info(\"cell number of lookup table: %#lx\", __lookup_table_cell_num);\n}\n\nuint64_t z_lookup_table_get_cell_num() {\n    if (__lookup_table_cell_num == __INVALID_LOOKUP_TABLE_CELL_NUM) {\n        EXITME(\"non-initizated lookup table cell number\");\n    }\n    return __lookup_table_cell_num;\n}\n\n#undef __INVALID_LOOKUP_TABLE_CELL_NUM\n\n/*\n * Log session\n */\nstatic const char *level_names[] = {\"TRACE\", \"DEBUG\", \"INFO\",\n                                    \"WARN\",  \"ERROR\", \"FATAL\"};\n\nstatic const char *level_colors[] = {COLOR_PURPLE, COLOR_CYAN, COLOR_GREEN,\n                                     COLOR_YELLOW, COLOR_RED,  COLOR_MAGENTA};\n\nstatic int log_level = 0;\n\nZ_API void z_log_set_level(int level) { log_level = level; }\n\nZ_API void z_log(int level, const char *file, int line, const char *fmt, ...) {\n    if (level < log_level) {\n        return;\n    }\n\n    time_t t = time(NULL);\n    struct tm *lt = localtime(&t);\n\n    va_list args;\n    char buf[16];\n    buf[strftime(buf, sizeof(buf), \"%H:%M:%S\", lt)] = '\\0';\n    fprintf(stderr, \"%s %s%-5s\" COLOR_RESET \" \", buf, level_colors[level],\n            level_names[level]);\n    va_start(args, fmt);\n    vfprintf(stderr, fmt, args);\n    va_end(args);\n    fprintf(stderr, \" \" COLOR_GRAY \":%s:%d\" COLOR_RESET \" \", file, line);\n    fprintf(stderr, \"\\n\");\n    fflush(stderr);\n}\n\n/*\n * General methods\n */\nstatic bool is_srand = false;\n\nZ_API int z_rand() {\n    if (!is_srand) {\n        srand(time(NULL));\n        is_srand = true;\n    }\n\n    return rand();\n}\n\nZ_API void z_exit(int status) { exit(status); }\n\nZ_API FILE *z_fopen(const char *pathname, const char *mode) {\n    FILE *out = fopen(pathname, mode);\n    if (out == NULL) {\n        z_error(\"fopen: %d (%s)\", errno, strerror(errno));\n        z_exit(errno);\n    }\n    return out;\n}\n\nZ_API void z_fclose(FILE *stream) {\n    if (fclose(stream) != 0) {\n        z_error(\"fclose: %d (%s)\", errno, strerror(errno));\n        z_exit(errno);\n    }\n}\n\nZ_API void z_fseek(FILE *stream, long offset, int whence) {\n    if (fseek(stream, offset, whence) != 0) {\n        z_error(\"fseek: %d (%s)\", errno, strerror(errno));\n        z_exit(errno);\n    }\n}\n\nZ_API long z_ftell(FILE *stream) {\n    long out = ftell(stream);\n    if (out == -1) {\n        z_error(\"ftell: %d (%s)\", errno, strerror(errno));\n        z_exit(errno);\n    }\n    return out;\n}\n\nZ_API size_t z_fread(void *ptr, size_t size, size_t nmemb, FILE *stream) {\n    return fread(ptr, size, nmemb, stream);\n}\n\nZ_API size_t z_fwrite(void *ptr, size_t size, size_t nmemb, FILE *stream) {\n    return fwrite(ptr, size, nmemb, stream);\n}\n\nZ_API int z_chmod(const char *pathname, mode_t mode) {\n    return chmod(pathname, mode);\n}\n\nZ_API int z_access(const char *path, int mode) { return access(path, mode); }\n\nZ_API void *z_alloc(size_t nmemb, size_t size) {\n    void *out = calloc(nmemb, size);\n    if (out == NULL) {\n        EXITME(\"calloc: run out of memory\");\n    }\n    return out;\n}\n\nZ_API void *z_realloc(void *ptr, size_t size) {\n    void *out = realloc(ptr, size);\n    if (out == NULL) {\n        EXITME(\"realloc: run out of memory\");\n    }\n    return out;\n}\n\nZ_API void z_free(void *ptr) { free(ptr); }\n\n/*\n * String methods\n */\nZ_API char *z_strcat(const char *s1, const char *s2) {\n    char *s = z_alloc(z_strlen(s1) + z_strlen(s2) + 0x10, sizeof(char));\n    z_strcpy(s, s1);\n    z_strcpy(s + z_strlen(s1), s2);\n\n    return s;\n}\n\nZ_API char *z_strstr(const char *haystack, const char *needle) {\n    return strstr(haystack, needle);\n}\n\nZ_API char *z_strdup(const char *s) {\n    char *o = strdup(s);\n    if (o == NULL)\n        EXITME(\"strdup: run out of memory\");\n    return o;\n}\n\nZ_API int z_strcmp(const char *s1, const char *s2) { return strcmp(s1, s2); }\n\nZ_API int z_strncmp(const char *s1, const char *s2, size_t n) {\n    return strncmp(s1, s2, n);\n}\n\nZ_API size_t z_strlen(const char *s) { return strlen(s); }\n\nZ_API void z_strcpy(char *dst, const char *src) { strcpy(dst, src); }\n\nZ_API char *z_strchr(const char *s, int c) { return strchr(s, c); }\n\nZ_API char *z_strrchr(const char *s, int c) { return strrchr(s, c); }\n\n/*\n * Keystone\n */\nks_engine *ks = NULL;\nsize_t ks_count = 0;\nsize_t ks_size = 0;\nconst unsigned char *ks_encode = NULL;\nunsigned char ks_encode_fast[0x10];\nchar ks_buf[KS_BUFMAX];\n\n/*\n * Capstone\n */\ncsh cs;\nsize_t cs_count;\nconst cs_insn *cs_inst;\n\n/*\n * TPDispatcher\n */\nTPDispatcher *tp;\nsize_t tp_size;\nconst uint8_t *tp_code;\n"
  },
  {
    "path": "src/utils.h",
    "content": "/*\n * utils.h\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n#ifndef __UTILS_H\n#define __UTILS_H\n\n#include \"afl_config.h\"\n#include \"config.h\"\n#include \"library_functions/library_functions.h\"\n#include \"tp_dispatcher.h\"\n\n#include <capstone/capstone.h>\n#include <keystone/keystone.h>\n\n/*\n * Color\n */\n#define COLOR_BLACK \"\\x1b[30m\"\n#define COLOR_RED \"\\x1b[31m\"\n#define COLOR_GREEN \"\\x1b[32m\"\n#define COLOR_YELLOW \"\\x1b[33m\"\n#define COLOR_BLUE \"\\x1b[34m\"\n#define COLOR_MAGENTA \"\\x1b[35m\"\n#define COLOR_CYAN \"\\x1b[36m\"\n#define COLOR_GRAY \"\\x1b[90m\"\n#define COLOR_PURPLE \"\\x1b[94m\"\n#define COLOR_BRIGHT \"\\x1b[1;97m\"\n#define COLOR_RESET \"\\x1b[0m\"\n#define COLOR(color, str) COLOR_##color str COLOR_RESET\n\n/*\n * Bit aligments\n */\n// floor alignment:\n//  e.g., for 12-bits alignment, 0x1000 -> 0x1000, 0x1001 -> 0x1000\n#define BITS_ALIGN_FLOOR(addr, bits) (((addr) >> (bits)) << (bits))\n// cell alignment:\n//  e.g., for 12-bits alignment, 0x1000 -> 0x1000, 0x1001 -> 0x2000\n#define BITS_ALIGN_CELL(addr, bits) (((((addr)-1) >> (bits)) + 1) << (bits))\n\n/*\n * Lookup table\n */\nvoid z_lookup_table_init_cell_num(uint64_t text_size);\nuint64_t z_lookup_table_get_cell_num();\n\n/*\n * Log session\n */\nZ_API void z_log(int level, const char *file, int line, const char *fmt, ...);\nZ_API void z_log_set_level(int level);\n\nenum { LOG_TRACE, LOG_DEBUG, LOG_INFO, LOG_WARN, LOG_ERROR, LOG_FATAL };\n\n#ifdef DEBUG\n#define z_trace(...) z_log(LOG_TRACE, __FILE__, __LINE__, __VA_ARGS__)\n#define z_debug(...) z_log(LOG_DEBUG, __FILE__, __LINE__, __VA_ARGS__)\n#define z_info(...) z_log(LOG_INFO, __FILE__, __LINE__, __VA_ARGS__)\n#define z_warn(...) z_log(LOG_WARN, __FILE__, __LINE__, __VA_ARGS__)\n#define z_error(...) z_log(LOG_ERROR, __FILE__, __LINE__, __VA_ARGS__)\n#define z_fatal(...) z_log(LOG_FATAL, __FILE__, __LINE__, __VA_ARGS__)\n#else\n#define z_trace(...)\n#define z_debug(...)\n#define z_info(...) z_log(LOG_INFO, __FILE__, __LINE__, __VA_ARGS__)\n#define z_warn(...) z_log(LOG_WARN, __FILE__, __LINE__, __VA_ARGS__)\n#define z_error(...) z_log(LOG_ERROR, __FILE__, __LINE__, __VA_ARGS__)\n#define z_fatal(...) z_log(LOG_FATAL, __FILE__, __LINE__, __VA_ARGS__)\n#endif\n\n// print message\n#define z_sayf(...) fprintf(stderr, __VA_ARGS__)\n\n/*\n * Unreachable\n */\n#define EXITME(...)           \\\n    do {                      \\\n        z_error(__VA_ARGS__); \\\n        z_exit(MY_ERR_CODE);  \\\n    } while (0)\n\n/*\n * General methods (wrapper of glibc alloc/file/string function)\n */\nZ_API void z_exit(int status);\n\nZ_API FILE *z_fopen(const char *pathname, const char *mode);\nZ_API void z_fclose(FILE *stream);\nZ_API void z_fseek(FILE *stream, long offset, int whence);\nZ_API long z_ftell(FILE *stream);\nZ_API size_t z_fread(void *ptr, size_t size, size_t nmemb, FILE *stream);\nZ_API size_t z_fwrite(void *ptr, size_t size, size_t nmemb, FILE *stream);\nZ_API int z_access(const char *path, int mode);\nZ_API int z_chmod(const char *pathname, mode_t mode);\n\nZ_API void *z_alloc(size_t nmemb, size_t size);\nZ_API void *z_realloc(void *ptr, size_t size);\nZ_API void z_free(void *ptr);\n\nZ_API int z_rand();\n\nZ_API char *z_strcat(const char *s1, const char *s2);\nZ_API int z_strcmp(const char *s1, const char *s2);\nZ_API int z_strncmp(const char *s1, const char *s2, size_t n);\nZ_API char *z_strstr(const char *haystack, const char *needle);\nZ_API char *z_strdup(const char *s);\nZ_API size_t z_strlen(const char *s);\nZ_API void z_strcpy(char *dst, const char *src);\nZ_API char *z_strchr(const char *s, int c);\nZ_API char *z_strrchr(const char *s, int c);\n\n#define z_alloc_printf(_str...)                 \\\n    ({                                          \\\n        char *_tmp;                             \\\n        size_t _len = snprintf(NULL, 0, _str);  \\\n        if (_len < 0) {                         \\\n            EXITME(\"Whoa, snprintf() fails?!\"); \\\n        }                                       \\\n        _tmp = z_alloc(_len + 1, sizeof(char)); \\\n        snprintf(_tmp, _len + 1, _str);         \\\n        _tmp;                                   \\\n    })\n#define z_snprintf(...) snprintf(__VA_ARGS__)\n#define z_sscanf(...) sscanf(__VA_ARGS__)\n\n#define z_likely(x) __builtin_expect(!!(x), 1)\n#define z_unlikely(x) __builtin_expect(!!(x), 0)\n\n/*\n * Keystone\n */\n#define KS_BUFMAX 0x400\n\nextern ks_engine *ks;\nextern size_t ks_count;\nextern size_t ks_size;\nextern const unsigned char *ks_encode;\nextern unsigned char ks_encode_fast[0x10];\nextern char ks_buf[KS_BUFMAX];\n\n#define KS_INIT                                                       \\\n    do {                                                              \\\n        if (ks == NULL) {                                             \\\n            if (ks_open(KS_ARCH_X86, KS_MODE_64, &ks) != KS_ERR_OK) { \\\n                EXITME(\"fail on ks_open()\");                          \\\n            }                                                         \\\n        }                                                             \\\n    } while (0)\n\n#define KS_FINI                                                 \\\n    do {                                                        \\\n        if (ks_encode != NULL && ks_encode != ks_encode_fast) { \\\n            ks_free((unsigned char *)ks_encode);                \\\n        }                                                       \\\n        if (ks != NULL) {                                       \\\n            ks_close(ks);                                       \\\n        }                                                       \\\n    } while (0)\n\n// for quick assembly\n#define KS_ASM_CALL(cur_addr, tar_addr)                           \\\n    do {                                                          \\\n        ks_encode_fast[0] = '\\xe8';                               \\\n        *(int *)(ks_encode_fast + 1) = (tar_addr) - (cur_addr)-5; \\\n        if (ks_encode != NULL && ks_encode != ks_encode_fast) {   \\\n            ks_free((unsigned char *)ks_encode);                  \\\n        }                                                         \\\n        ks_size = 5;                                              \\\n        ks_count = 1;                                             \\\n        ks_encode = ks_encode_fast;                               \\\n    } while (0)\n\n#define KS_ASM_JMP(cur_addr, tar_addr)                            \\\n    do {                                                          \\\n        ks_encode_fast[0] = '\\xe9';                               \\\n        *(int *)(ks_encode_fast + 1) = (tar_addr) - (cur_addr)-5; \\\n        if (ks_encode != NULL && ks_encode != ks_encode_fast) {   \\\n            ks_free((unsigned char *)ks_encode);                  \\\n        }                                                         \\\n        ks_size = 5;                                              \\\n        ks_count = 1;                                             \\\n        ks_encode = ks_encode_fast;                               \\\n    } while (0)\n\n// XXX: note that  KS_ASM_CONST_MOV can only mov to an address smaller than\n// 0x7fffffff, and can only store a value smaller than 0x7fffffff\n#define KS_ASM_CONST_MOV(mem, val)                                            \\\n    do {                                                                      \\\n        if (ks_encode != NULL && ks_encode != ks_encode_fast) {               \\\n            ks_free((unsigned char *)ks_encode);                              \\\n        }                                                                     \\\n        long mem_ = (mem);                                                    \\\n        long val_ = (val)&0x7FFFFFFF;                                         \\\n        if (mem_ > 0x7FFFFFFF) {                                              \\\n            EXITME(\"KS_ASM_CONST_MOV stores to a large address: %#lx\", mem_); \\\n        }                                                                     \\\n        memcpy(ks_encode_fast,                                                \\\n               \"\\x48\\xC7\\x04\\x25\\xDD\\xDD\\xDD\\xDD\\xFF\\xFF\\xFF\\xFF\", 12);       \\\n        memcpy(ks_encode_fast + 4, &(mem_), 4);                               \\\n        memcpy(ks_encode_fast + 8, &(val_), 4);                               \\\n        ks_size = 12;                                                         \\\n        ks_count = 1;                                                         \\\n        ks_encode = ks_encode_fast;                                           \\\n    } while (0)\n\n#define KS_ASM(addr, ...)                                                      \\\n    do {                                                                       \\\n        if (snprintf(ks_buf, KS_BUFMAX, __VA_ARGS__) >= KS_BUFMAX) {           \\\n            EXITME(\"assembly code is too long:\\n%s\", ks_buf);                  \\\n        }                                                                      \\\n        if (ks_encode != NULL && ks_encode != ks_encode_fast)                  \\\n            ks_free((unsigned char *)ks_encode);                               \\\n        if (ks_asm(ks, ks_buf, addr, (unsigned char **)(&ks_encode), &ks_size, \\\n                   &ks_count) != KS_ERR_OK) {                                  \\\n            EXITME(\"fail on ks_asm:\\n%s\", ks_buf);                             \\\n        }                                                                      \\\n    } while (0)\n\n/*\n * Capstone\n */\nextern csh cs;\nextern size_t cs_count;\nextern const cs_insn *cs_inst;\n\n#define CS_SHOW_INST(i) \\\n    \"(%#lx:\\t%s %s)\", (i)->address, (i)->mnemonic, (i)->op_str\n\n#define CS_INVALID_CSH 0\n\n#define CS_DETAIL_ON                                                \\\n    do {                                                            \\\n        if (cs_option(cs, CS_OPT_DETAIL, CS_OPT_ON) != CS_ERR_OK) { \\\n            EXITME(\"fail on cs_option()\");                          \\\n        }                                                           \\\n    } while (0)\n\n#define CS_DETAIL_OFF                                                \\\n    do {                                                             \\\n        if (cs_option(cs, CS_OPT_DETAIL, CS_OPT_OFF) != CS_ERR_OK) { \\\n            EXITME(\"fail on cs_option()\");                           \\\n        }                                                            \\\n    } while (0)\n\n#define CS_INIT                                                       \\\n    do {                                                              \\\n        if (cs == CS_INVALID_CSH) {                                   \\\n            if (cs_open(CS_ARCH_X86, CS_MODE_64, &cs) != CS_ERR_OK) { \\\n                EXITME(\"fail on cs_open()\");                          \\\n            }                                                         \\\n            CS_DETAIL_ON;                                             \\\n        }                                                             \\\n    } while (0)\n\n#define CS_FINI                                    \\\n    do {                                           \\\n        if (cs_inst != NULL)                       \\\n            cs_free((cs_insn *)cs_inst, cs_count); \\\n        if (cs != CS_INVALID_CSH)                  \\\n            cs_close(&cs);                         \\\n    } while (0)\n\n#define CS_DISASM_RAW(ptr, size, addr, count)                              \\\n    do {                                                                   \\\n        if (cs_inst != NULL)                                               \\\n            cs_free((cs_insn *)cs_inst, cs_count);                         \\\n        cs_count =                                                         \\\n            cs_disasm(cs, ptr, size, addr, count, (cs_insn **)(&cs_inst)); \\\n    } while (0)\n\n#define CS_DISASM(rptr, addr, count)                               \\\n    do {                                                           \\\n        CS_DISASM_RAW((rptr)->raw_ptr, (rptr)->size, addr, count); \\\n    } while (0)\n\n/*\n * TPDispatcher\n */\nextern TPDispatcher *tp;\nextern size_t tp_size;\nextern const uint8_t *tp_code;\n\n#define TP_INIT                            \\\n    do {                                   \\\n        if (tp == NULL)                    \\\n            tp = z_tp_dispatcher_create(); \\\n    } while (0)\n\n#define TP_FINI                          \\\n    do {                                 \\\n        if (tp != NULL)                  \\\n            z_tp_dispatcher_destroy(tp); \\\n    } while (0)\n\n#define TP_EMIT(type, ...)                                                  \\\n    do {                                                                    \\\n        if (tp == NULL)                                                     \\\n            TP_INIT;                                                        \\\n        tp_code = z_tp_dispatcher_emit_##type(tp, &tp_size, ##__VA_ARGS__); \\\n    } while (0)\n\n/*\n * Library function information\n */\n#define LB_INIT z_libfunc_init()\n\n#define LB_FINI z_libfunc_fini()\n\n#define LB_QUERY(name) z_libfunc_get_info(name)\n\n#define LB_DEFAULT() z_libfunc_default()\n\n/*\n * System\n */\n#define __PRE_CHECK                                                            \\\n    do {                                                                       \\\n        if (AFL_PREV_ID_PTR != RW_PAGE_INFO_ADDR(afl_prev_id)) {               \\\n            EXITME(\"invalid AFL_PREV_ID_PTR value: %#lx v/s %#lx\",             \\\n                   AFL_PREV_ID_PTR, RW_PAGE_INFO_ADDR(afl_prev_id));           \\\n        }                                                                      \\\n        if (AFL_MAP_SIZE_POW2 > 31) {                                          \\\n            EXITME(\"the size of AFL's shared memory is too large: %#lx\",       \\\n                   AFL_MAP_SIZE);                                              \\\n        }                                                                      \\\n        if (RW_PAGE_SIZE < RW_PAGE_USED_SIZE + 0x100) {                        \\\n            /* XXX: 0x100 is left for utils_output_number when DEBUG */        \\\n            EXITME(\"use too much space on RW_PAGE: %#lx v/s %#lx\",             \\\n                   RW_PAGE_SIZE, RW_PAGE_USED_SIZE + 0x100);                   \\\n        }                                                                      \\\n        if (CRS_MAP_SIZE < CRS_USED_SIZE) {                                    \\\n            EXITME(\"use too much space on CRS PAGE: %#lx v/s %#lx\",            \\\n                   CRS_MAP_SIZE, CRS_USED_SIZE);                               \\\n        }                                                                      \\\n        if (LOOKUP_TABLE_CELL_SIZE_POW2 != 2) {                                \\\n            EXITME(\"the element size of lookup table must be dword\");          \\\n        }                                                                      \\\n        if (SIGNAL_STACK_SIZE < MINSIGSTKSZ) {                                 \\\n            EXITME(                                                            \\\n                \"the size of signal stack is smaller than MINSIGSTKSZ (%#lx)\", \\\n                MINSIGSTKSZ);                                                  \\\n        }                                                                      \\\n    } while (0)\n\n#define Z_INIT       \\\n    do {             \\\n        __PRE_CHECK; \\\n        KS_INIT;     \\\n        CS_INIT;     \\\n        TP_INIT;     \\\n        LB_INIT;     \\\n    } while (0)\n\n#define Z_FINI   \\\n    do {         \\\n        KS_FINI; \\\n        CS_FINI; \\\n        TP_FINI; \\\n        LB_FINI; \\\n    } while (0)\n\n#endif\n"
  },
  {
    "path": "src/x64_utils.c",
    "content": "/*\n * x64_utils.c\n * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang\n *\n * This program is free software: you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation, either version 3 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program.  If not, see <http://www.gnu.org/licenses/>.\n */\n\n#ifndef __X64_UTILS_C\n#define __X64_UTILS_C\n\n// XXX: this file is always included into .c file to benifit compiler\n// optimization\n\nZ_PRIVATE const uint8_t *z_x64_gen_nop(size_t n) {\n    static const char *nop_bufs[15] = {\n        \"\\x90\",\n        \"\\x66\\x90\",\n        \"\\x0F\\x1F\\x00\",\n        \"\\x0F\\x1F\\x40\\x00\",\n        \"\\x0F\\x1F\\x44\\x00\\x00\",\n        \"\\x66\\x0F\\x1F\\x44\\x00\\x00\",\n        \"\\x0F\\x1F\\x80\\x00\\x00\\x00\\x00\",\n        \"\\x0F\\x1F\\x84\\x00\\x00\\x00\\x00\\x00\",\n        \"\\x66\\x0F\\x1F\\x84\\x00\\x00\\x00\\x00\\x00\",\n        \"\\x0F\\x1F\\x44\\x00\\x00\\x0F\\x1F\\x44\\x00\\x00\",\n        \"\\x0F\\x1F\\x44\\x00\\x00\\x66\\x0F\\x1F\\x44\\x00\\x00\",\n        \"\\x66\\x0F\\x1F\\x44\\x00\\x00\\x66\\x0F\\x1F\\x44\\x00\\x00\",\n        \"\\x66\\x0F\\x1F\\x44\\x00\\x00\\x0F\\x1F\\x80\\x00\\x00\\x00\\x00\",\n        \"\\x0F\\x1F\\x80\\x00\\x00\\x00\\x00\\x0F\\x1F\\x80\\x00\\x00\\x00\\x00\",\n        \"\\x0F\\x1F\\x80\\x00\\x00\\x00\\x00\\x0F\\x1F\\x84\\x00\\x00\\x00\\x00\\x00\",\n    };\n\n    if (n > 15) {\n        EXITME(\"invalid size for a nop instruction: %d\", n);\n        return NULL;\n    } else {\n        return (const uint8_t *)nop_bufs[n - 1];\n    }\n}\n\nZ_PRIVATE const uint8_t *z_x64_gen_invalid(size_t n) {\n    if (n > 15) {\n        EXITME(\"invalid size for an invalid instruction: %d\", n);\n        return NULL;\n    } else {\n        const char *buf =\n            \"\\x2F\\x2F\\x2F\\x2F\\x2F\\x2F\\x2F\\x2F\\x2F\\x2F\\x2F\\x2F\\x2F\\x2F\\x2F\\x2F\";\n        return (const uint8_t *)buf;\n    }\n}\n\n#endif\n"
  },
  {
    "path": "test/check_avx512.c",
    "content": "/*\n * Check whether current CPU support AVX512. To compile, use following command:\n *\n *      clang -mavx512f check_avx512.c -o check_avx512\n */\n\n#include <stdint.h>\n#include <stdio.h>\n\n#define BUF_SIZE 0x10000\nunsigned char buffer[BUF_SIZE];\n\nint main(int argc, char **argv) {\n    register uintptr_t dst asm(\"rdi\") = (uintptr_t)buffer;\n    register uintptr_t n asm(\"rcx\") = (uintptr_t)BUF_SIZE;\n\n    asm volatile(\n        \".intel_syntax noprefix\\n\"\n        \"  xor rax, rax;\\n\"\n        \"  vpbroadcastd zmm16, eax;\\n\"\n        \"  lea rax, [rdi + rcx];\\n\"\n        \"  sub rdi, rax;\\n\"\n        \"loop:\\n\"\n        \"  vmovdqa64 [rax + rdi], zmm16;\\n\"\n        \"  add rdi, 0x40;\\n\"\n        \"  jnz loop;\\n\"\n        :\n        : \"r\"(dst), \"r\"(n)\n        : \"rax\", \"zmm16\", \"memory\");\n\n    return 0;\n}\n"
  },
  {
    "path": "test/crash.c",
    "content": "#include <stdio.h>\n#include <string.h>\n\nint main(int argc, const char **argv) {\n    if (argc > 1 && !strcmp(argv[1], \"mdzz\")) {\n        char *a = NULL;\n        a[1] = 'z';\n    }\n}\n"
  },
  {
    "path": "test/ex.smt2",
    "content": "; Boogie universal background predicate\n; Copyright (c) 2004-2010, Microsoft Corp.\n(set-info :category \"industrial\")\n(declare-sort |T@U| 0)\n(declare-sort |T@T| 0)\n(declare-fun real_pow (Real Real) Real)\n(declare-fun UOrdering2 (|T@U| |T@U|) Bool)\n(declare-fun UOrdering3 (|T@T| |T@U| |T@U|) Bool)\n\n(declare-fun tickleBool (Bool) Bool)\n(assert (and (tickleBool true) (tickleBool false)))\n(declare-fun TV (Int) Bool)\n(declare-fun TO (Int) Bool)\n(declare-fun between (Int Int Int) Bool)\n(declare-fun word (Int) Bool)\n(declare-fun WORD_HI () Int)\n(declare-fun NULL () Int)\n(declare-fun TVM (Int Int) Bool)\n(declare-fun Mult (Int Int) Int)\n(declare-fun TVM3 (Int Int Int) Bool)\n(declare-fun memAddr (Int) Bool)\n(declare-fun ?memLo () Int)\n(declare-fun ?memHi () Int)\n(declare-fun memAddrEx (Int) Bool)\n(declare-fun TBV ((_ BitVec 32)) Bool)\n(declare-fun $Aligned ((_ BitVec 32)) Bool)\n(declare-fun $bbvec4 ((Array Int Int) Int Int (Array Int Int) Int Int Int Int Int) Bool)\n(declare-fun B (Int) (_ BitVec 32))\n(declare-fun I ((_ BitVec 32)) Int)\n(declare-fun $bb2vec4 ((Array Int Int) Int (Array Int Int) Int Int Int Int Int) Bool)\n(declare-fun q@and (Int Int) Int)\n(declare-fun q@or (Int Int) Int)\n(declare-fun q@xor (Int Int) Int)\n(declare-fun shl (Int Int) Int)\n(declare-fun shr (Int Int) Int)\n(declare-fun neg (Int) Int)\n(declare-fun Aligned (Int) Bool)\n(declare-fun %lbl%+2849 () Bool)\n(declare-fun %lbl%@4150 () Bool)\n(declare-fun $x () (_ BitVec 32))\n(declare-fun %lbl%+4132 () Bool)\n(assert (forall ((val Int) ) (! (= (TV val) true)\n :qid |baseibpl.14:28|\n :skolemid |0|\n :pattern ( (TV val))\n)))\n(assert (forall ((wordOffset Int) ) (! (= (TO wordOffset) true)\n :qid |baseibpl.18:28|\n :skolemid |1|\n :pattern ( (TO wordOffset))\n)))\n(assert (forall ((i1 Int) (i2 Int) (x Int) ) (! (= (between i1 i2 x) (and\n(<= i1 x)\n(< x i2)))\n :qid |baseibpl.25:18|\n :skolemid |2|\n :pattern ( (between i1 i2 x))\n)))\n(assert (forall ((val@@0 Int) ) (! (= (word val@@0) (and\n(<= 0 val@@0)\n(< val@@0 WORD_HI)))\n :qid |baseibpl.30:15|\n :skolemid |3|\n :pattern ( (word val@@0))\n)))\n(assert (= NULL 0))\n(assert (forall ((a Int) (b Int) ) (! (= (TVM a b) true)\n :qid |baseibpl.45:29|\n :skolemid |4|\n :pattern ( (TVM a b))\n)))\n(assert (forall ((a@@0 Int) (b@@0 Int) ) (! (= (Mult a@@0 b@@0) (* a@@0 b@@0))\n :qid |baseibpl.47:15|\n :skolemid |5|\n :pattern ( (TVM a@@0 b@@0))\n)))\n(assert (forall ((a@@1 Int) (b1 Int) (b2 Int) ) (! (= (TVM3 a@@1 b1 b2) true)\n :qid |baseibpl.49:30|\n :skolemid |6|\n :pattern ( (TVM3 a@@1 b1 b2))\n)))\n(assert (forall ((i Int) ) (! (= (memAddr i) (and\n(<= ?memLo i)\n(< i ?memHi)))\n :qid |memoryib.18:18|\n :skolemid |7|\n :pattern ( (memAddr i))\n)))\n(assert (forall ((i@@0 Int) ) (! (= (memAddrEx i@@0) (and\n(<= ?memLo i@@0)\n(<= i@@0 ?memHi)))\n :qid |memoryib.19:20|\n :skolemid |8|\n :pattern ( (memAddrEx i@@0))\n)))\n(assert (forall ((b@@1 (_ BitVec 32)) ) (! (= (TBV b@@1) true)\n :qid |BitVecto.18:29|\n :skolemid |9|\n :pattern ( (TBV b@@1))\n)))\n(assert (forall ((b@@2 (_ BitVec 32)) ) (! (= ($Aligned b@@2) (= (bvand b@@2 #x00000003) #x00000000))\n :qid |BitVecto.12:19|\n :skolemid |10|\n :pattern ( ($Aligned b@@2))\n)))\n(assert (forall ((a@@2 (Array Int Int)) (off Int) (aBase Int) (bb (Array Int Int)) (i0 Int) (i1@@0 Int) (i2@@0 Int) (g1 Int) (g2 Int) ) (! (= ($bbvec4 a@@2 off aBase bb i0 i1@@0 i2@@0 g1 g2) (forall ((i@@1 Int) ) (! (=> (and\n(TV i@@1)\n(word (- i@@1 i0))\n(<= i1@@0 i@@1)\n(< i@@1 i2@@0)\n($Aligned (B (- i@@1 i0)))) (and\n(between g1 g2 (+ g1 (* 4 (I (bvlshr (B (- i@@1 i0)) #x00000007)))))\n(= (= (select a@@2 (+ aBase (- i@@1 i0))) off) (= #x00000000 (bvand (B (select bb (+ g1 (* 4 (I (bvlshr (B (- i@@1 i0)) #x00000007)))))) (bvshl #x00000001 (bvand (bvlshr (B (- i@@1 i0)) #x00000002) #x0000001f)))))))\n :qid |BitVecto.19:11|\n :skolemid |11|\n :pattern ( (TV i@@1))\n)))\n :qid |BitVecto.17:18|\n :skolemid |12|\n :pattern ( ($bbvec4 a@@2 off aBase bb i0 i1@@0 i2@@0 g1 g2))\n)))\n(assert (forall ((a@@3 (Array Int Int)) (aBase@@0 Int) (bb@@0 (Array Int Int)) (i0@@0 Int) (i1@@1 Int) (i2@@1 Int) (g1@@0 Int) (g2@@0 Int) ) (! (= ($bb2vec4 a@@3 aBase@@0 bb@@0 i0@@0 i1@@1 i2@@1 g1@@0 g2@@0) (forall ((i@@2 Int) ) (! (=> (and\n(TV i@@2)\n(word (- i@@2 i0@@0))\n(<= i1@@1 i@@2)\n(< i@@2 i2@@1)\n($Aligned (B (- i@@2 i0@@0)))) (and\n(between g1@@0 g2@@0 (+ g1@@0 (* 4 (I (bvlshr (B (- i@@2 i0@@0)) #x00000006)))))\n(= (B (select a@@3 (+ aBase@@0 (- i@@2 i0@@0)))) (bvand (bvlshr (B (select bb@@0 (+ g1@@0 (* 4 (I (bvlshr (B (- i@@2 i0@@0)) #x00000006)))))) (bvand (bvlshr (B (- i@@2 i0@@0)) #x00000001) #x0000001f)) #x00000003))))\n :qid |BitVecto.28:11|\n :skolemid |13|\n :pattern ( (TV i@@2))\n)))\n :qid |BitVecto.26:19|\n :skolemid |14|\n :pattern ( ($bb2vec4 a@@3 aBase@@0 bb@@0 i0@@0 i1@@1 i2@@1 g1@@0 g2@@0))\n)))\n(assert (= WORD_HI (+ (+ 2147483647 2147483647) 2)))\n(assert (= (I #x00000001) 1))\n(assert (forall ((i1@@2 Int) (i2@@2 Int) ) (! (=> (and\n(word i1@@2)\n(word i2@@2)) (= (= i1@@2 i2@@2) (= (B i1@@2) (B i2@@2))))\n :qid |BitVecto.9:15|\n :skolemid |19|\n :pattern ( (B i1@@2) (B i2@@2))\n)))\n(assert (forall ((b1@@0 (_ BitVec 32)) (b2@@0 (_ BitVec 32)) ) (! (= (= b1@@0 b2@@0) (= (I b1@@0) (I b2@@0)))\n :qid |BitVecto.10:15|\n :skolemid |20|\n :pattern ( (I b1@@0) (I b2@@0))\n)))\n(assert (forall ((b@@3 (_ BitVec 32)) ) (! (word (I b@@3))\n :qid |BitVecto.12:15|\n :skolemid |21|\n :pattern ( (I b@@3))\n)))\n(assert (forall ((b@@4 (_ BitVec 32)) ) (! (= (B (I b@@4)) b@@4)\n :qid |BitVecto.13:15|\n :skolemid |22|\n :pattern ( (B (I b@@4)))\n)))\n(assert (forall ((i@@3 Int) ) (! (=> (word i@@3) (= (I (B i@@3)) i@@3))\n :qid |BitVecto.14:15|\n :skolemid |23|\n :pattern ( (I (B i@@3)))\n)))\n(assert (forall ((b1@@1 (_ BitVec 32)) (b2@@1 (_ BitVec 32)) ) (! (=> (word (+ (I b1@@1) (I b2@@1))) (= (+ (I b1@@1) (I b2@@1)) (I (bvadd b1@@1 b2@@1))))\n :qid |BitVecto.16:15|\n :skolemid |24|\n :pattern ( (bvadd b1@@1 b2@@1))\n :pattern ( (TBV b1@@1) (TBV b2@@1))\n)))\n(assert (forall ((b1@@2 (_ BitVec 32)) (b2@@2 (_ BitVec 32)) ) (! (=> (word (- (I b1@@2) (I b2@@2))) (= (- (I b1@@2) (I b2@@2)) (I (bvsub b1@@2 b2@@2))))\n :qid |BitVecto.17:15|\n :skolemid |25|\n :pattern ( (bvsub b1@@2 b2@@2))\n :pattern ( (TBV b1@@2) (TBV b2@@2))\n)))\n(assert (forall ((b1@@3 (_ BitVec 32)) (b2@@3 (_ BitVec 32)) ) (! (=> (word (* (I b1@@3) (I b2@@3))) (= (* (I b1@@3) (I b2@@3)) (I (bvmul b1@@3 b2@@3))))\n :qid |BitVecto.18:15|\n :skolemid |26|\n :pattern ( (bvmul b1@@3 b2@@3))\n :pattern ( (TBV b1@@3) (TBV b2@@3))\n)))\n(assert (forall ((b1@@4 (_ BitVec 32)) (b2@@4 (_ BitVec 32)) ) (! (= (<= (I b1@@4) (I b2@@4)) (bvule b1@@4 b2@@4))\n :qid |BitVecto.19:15|\n :skolemid |27|\n :pattern ( (bvule b1@@4 b2@@4))\n :pattern ( (TBV b1@@4) (TBV b2@@4))\n)))\n(assert (forall ((i1@@3 Int) (i2@@3 Int) ) (! (= (q@and i1@@3 i2@@3) (I (bvand (B i1@@3) (B i2@@3))))\n :qid |BitVecto.20:15|\n :skolemid |28|\n :pattern ( (q@and i1@@3 i2@@3))\n)))\n(assert (forall ((i1@@4 Int) (i2@@4 Int) ) (! (= (q@or i1@@4 i2@@4) (I (bvor (B i1@@4) (B i2@@4))))\n :qid |BitVecto.21:15|\n :skolemid |29|\n :pattern ( (q@or i1@@4 i2@@4))\n)))\n(assert (forall ((i1@@5 Int) (i2@@5 Int) ) (! (= (q@xor i1@@5 i2@@5) (I (bvxor (B i1@@5) (B i2@@5))))\n :qid |BitVecto.22:15|\n :skolemid |30|\n :pattern ( (q@xor i1@@5 i2@@5))\n)))\n(assert (forall ((i1@@6 Int) (i2@@6 Int) ) (! (= (shl i1@@6 i2@@6) (I (bvshl (B i1@@6) (B i2@@6))))\n :qid |BitVecto.23:15|\n :skolemid |31|\n :pattern ( (shl i1@@6 i2@@6))\n)))\n(assert (forall ((i1@@7 Int) (i2@@7 Int) ) (! (= (shr i1@@7 i2@@7) (I (bvlshr (B i1@@7) (B i2@@7))))\n :qid |BitVecto.24:15|\n :skolemid |32|\n :pattern ( (shr i1@@7 i2@@7))\n)))\n(assert (forall ((i@@4 Int) ) (! (= (neg i@@4) (I (bvnot (B i@@4))))\n :qid |BitVecto.25:15|\n :skolemid |33|\n :pattern ( (neg i@@4))\n)))\n(assert (forall ((b@@5 (_ BitVec 32)) ) (! (= (Aligned (I b@@5)) (= (bvand b@@5 #x00000003) #x00000000))\n :qid |BitVecto.27:15|\n :skolemid |34|\n :pattern ( (Aligned (I b@@5)))\n)))\n(push 1)\n(set-info :boogie-vc-id _aligned)\n(assert (not\n(let ((anon0_correct (=> (! (and %lbl%+2849 true) :lblpos +2849) (and\n(! (or %lbl%@4150 ($Aligned (bvmul #x00000004 $x))) :lblneg @4150)\n(=> ($Aligned (bvmul #x00000004 $x)) true)))))\n(let ((PreconditionGeneratedEntry_correct (=> (! (and %lbl%+4132 true) :lblpos +4132) anon0_correct)))\nPreconditionGeneratedEntry_correct))\n))\n(check-sat)\n(pop 1)\n(declare-fun %lbl%+2852 () Bool)\n(declare-fun %lbl%@4178 () Bool)\n(declare-fun %lbl%+4172 () Bool)\n(push 1)\n(set-info :boogie-vc-id _zeroAligned)\n(assert (not\n(let ((anon0_correct@@0 (=> (! (and %lbl%+2852 true) :lblpos +2852) (and\n(! (or %lbl%@4178 ($Aligned #x00000000)) :lblneg @4178)\n(=> ($Aligned #x00000000) true)))))\n(let ((PreconditionGeneratedEntry_correct@@0 (=> (! (and %lbl%+4172 true) :lblpos +4172) anon0_correct@@0)))\nPreconditionGeneratedEntry_correct@@0))\n))\n(check-sat)\n(pop 1)\n(declare-fun %lbl%+2858 () Bool)\n(declare-fun %lbl%@4199 () Bool)\n(declare-fun $x@@0 () (_ BitVec 32))\n(declare-fun %lbl%+4183 () Bool)\n(push 1)\n(set-info :boogie-vc-id _andAligned)\n(assert (not\n(let ((anon0_correct@@1 (=> (! (and %lbl%+2858 true) :lblpos +2858) (and\n(! (or %lbl%@4199 (= (= (bvand $x@@0 #x00000003) #x00000000) ($Aligned $x@@0))) :lblneg @4199)\n(=> (= (= (bvand $x@@0 #x00000003) #x00000000) ($Aligned $x@@0)) true)))))\n(let ((PreconditionGeneratedEntry_correct@@1 (=> (! (and %lbl%+4183 true) :lblpos +4183) anon0_correct@@1)))\nPreconditionGeneratedEntry_correct@@1))\n))\n(check-sat)\n(pop 1)\n(declare-fun %lbl%+2867 () Bool)\n(declare-fun %lbl%@4234 () Bool)\n(declare-fun $x@@1 () (_ BitVec 32))\n(declare-fun $y () (_ BitVec 32))\n(declare-fun %lbl%+4216 () Bool)\n(push 1)\n(set-info :boogie-vc-id _addAligned)\n(assert (not\n(let ((anon0_correct@@2 (=> (! (and %lbl%+2867 true) :lblpos +2867) (and\n(! (or %lbl%@4234 (=> ($Aligned $x@@1) (= ($Aligned $y) ($Aligned (bvadd $x@@1 $y))))) :lblneg @4234)\n(=> (=> ($Aligned $x@@1) (= ($Aligned $y) ($Aligned (bvadd $x@@1 $y)))) true)))))\n(let ((PreconditionGeneratedEntry_correct@@2 (=> (! (and %lbl%+4216 true) :lblpos +4216) anon0_correct@@2)))\nPreconditionGeneratedEntry_correct@@2))\n))\n(check-sat)\n(pop 1)\n(declare-fun %lbl%+2876 () Bool)\n(declare-fun %lbl%@4273 () Bool)\n(declare-fun $x@@2 () (_ BitVec 32))\n(declare-fun $y@@0 () (_ BitVec 32))\n(declare-fun %lbl%+4255 () Bool)\n(push 1)\n(set-info :boogie-vc-id _subAligned)\n(assert (not\n(let ((anon0_correct@@3 (=> (! (and %lbl%+2876 true) :lblpos +2876) (and\n(! (or %lbl%@4273 (=> ($Aligned $x@@2) (= ($Aligned $y@@0) ($Aligned (bvsub $x@@2 $y@@0))))) :lblneg @4273)\n(=> (=> ($Aligned $x@@2) (= ($Aligned $y@@0) ($Aligned (bvsub $x@@2 $y@@0)))) true)))))\n(let ((PreconditionGeneratedEntry_correct@@3 (=> (! (and %lbl%+4255 true) :lblpos +4255) anon0_correct@@3)))\nPreconditionGeneratedEntry_correct@@3))\n))\n(check-sat)\n(pop 1)\n(declare-fun %lbl%+2882 () Bool)\n(declare-fun %lbl%@4338 () Bool)\n(declare-fun $b () (_ BitVec 32))\n(declare-fun %lbl%@4348 () Bool)\n(declare-fun %lbl%@4358 () Bool)\n(declare-fun %lbl%@4368 () Bool)\n(declare-fun %lbl%+4294 () Bool)\n(push 1)\n(set-info :boogie-vc-id _notAligned)\n(assert (not\n(let ((anon0_correct@@4 (=> (! (and %lbl%+2882 true) :lblpos +2882) (and\n(! (or %lbl%@4338 (not ($Aligned (bvadd $b #x00000001)))) :lblneg @4338)\n(=> (not ($Aligned (bvadd $b #x00000001))) (and\n(! (or %lbl%@4348 (not ($Aligned (bvadd $b #x00000002)))) :lblneg @4348)\n(=> (not ($Aligned (bvadd $b #x00000002))) (and\n(! (or %lbl%@4358 (not ($Aligned (bvadd $b #x00000003)))) :lblneg @4358)\n(=> (not ($Aligned (bvadd $b #x00000003))) (and\n(! (or %lbl%@4368 (bvule $b #xfffffffc)) :lblneg @4368)\n(=> (bvule $b #xfffffffc) true)))))))))))\n(let ((PreconditionGeneratedEntry_correct@@4 (=> (! (and %lbl%+4294 true) :lblpos +4294) (=> ($Aligned $b) anon0_correct@@4))))\nPreconditionGeneratedEntry_correct@@4))\n))\n(check-sat)\n(pop 1)\n(declare-fun %lbl%+2888 () Bool)\n(declare-fun %lbl%@4414 () Bool)\n(declare-fun $x@@3 () (_ BitVec 32))\n(declare-fun %lbl%@4432 () Bool)\n(declare-fun %lbl%+4375 () Bool)\n(push 1)\n(set-info :boogie-vc-id _is4kAligned)\n(assert (not\n(let ((anon0_correct@@5 (=> (! (and %lbl%+2888 true) :lblpos +2888) (and\n(! (or %lbl%@4414 (= (bvand (bvsub $x@@3 (bvand $x@@3 #x00000fff)) #x00000fff) #x00000000)) :lblneg @4414)\n(=> (= (bvand (bvsub $x@@3 (bvand $x@@3 #x00000fff)) #x00000fff) #x00000000) (and\n(! (or %lbl%@4432 (and\n(bvule #x00000000 (bvand $x@@3 #x00000fff))\n(bvule (bvand $x@@3 #x00000fff) #x00000fff))) :lblneg @4432)\n(=> (and\n(bvule #x00000000 (bvand $x@@3 #x00000fff))\n(bvule (bvand $x@@3 #x00000fff) #x00000fff)) true)))))))\n(let ((PreconditionGeneratedEntry_correct@@5 (=> (! (and %lbl%+4375 true) :lblpos +4375) anon0_correct@@5)))\nPreconditionGeneratedEntry_correct@@5))\n))\n(check-sat)\n(pop 1)\n(declare-fun %lbl%+2894 () Bool)\n(declare-fun %lbl%@4498 () Bool)\n(declare-fun $x@@4 () (_ BitVec 32))\n(declare-fun %lbl%@4520 () Bool)\n(declare-fun %lbl%+4455 () Bool)\n(push 1)\n(set-info :boogie-vc-id _is2m4kAligned)\n(assert (not\n(let ((anon0_correct@@6 (=> (! (and %lbl%+2894 true) :lblpos +2894) (and\n(! (or %lbl%@4498 (= (bvand (bvsub (bvadd $x@@4 #x00200000) (bvand $x@@4 #x001fffff)) #x00000fff) #x00000000)) :lblneg @4498)\n(=> (= (bvand (bvsub (bvadd $x@@4 #x00200000) (bvand $x@@4 #x001fffff)) #x00000fff) #x00000000) (and\n(! (or %lbl%@4520 (and\n(bvule #x00000000 (bvand $x@@4 #x001fffff))\n(bvule (bvand $x@@4 #x001fffff) #x001fffff))) :lblneg @4520)\n(=> (and\n(bvule #x00000000 (bvand $x@@4 #x001fffff))\n(bvule (bvand $x@@4 #x001fffff) #x001fffff)) true)))))))\n(let ((PreconditionGeneratedEntry_correct@@6 (=> (! (and %lbl%+4455 true) :lblpos +4455) anon0_correct@@6)))\nPreconditionGeneratedEntry_correct@@6))\n))\n(check-sat)\n(pop 1)\n(declare-fun %lbl%+2900 () Bool)\n(declare-fun %lbl%@4581 () Bool)\n(declare-fun $x@@5 () (_ BitVec 32))\n(declare-fun %lbl%@4595 () Bool)\n(declare-fun %lbl%+4543 () Bool)\n(push 1)\n(set-info :boogie-vc-id _add4kAligned)\n(assert (not\n(let ((anon0_correct@@7 (=> (! (and %lbl%+2900 true) :lblpos +2900) (and\n(! (or %lbl%@4581 (= (bvand (bvadd $x@@5 #x00001000) #x00000fff) #x00000000)) :lblneg @4581)\n(=> (= (bvand (bvadd $x@@5 #x00001000) #x00000fff) #x00000000) (and\n(! (or %lbl%@4595 ($Aligned $x@@5)) :lblneg @4595)\n(=> ($Aligned $x@@5) true)))))))\n(let ((PreconditionGeneratedEntry_correct@@7 (=> (! (and %lbl%+4543 true) :lblpos +4543) (=> (= (bvand $x@@5 #x00000fff) #x00000000) anon0_correct@@7))))\nPreconditionGeneratedEntry_correct@@7))\n))\n(check-sat)\n(pop 1)\n(declare-fun %lbl%+2906 () Bool)\n(declare-fun %lbl%@4652 () Bool)\n(declare-fun %lbl%@4662 () Bool)\n(declare-fun $unitSize () (_ BitVec 32))\n(declare-fun %lbl%@4676 () Bool)\n(declare-fun %lbl%+4600 () Bool)\n(push 1)\n(set-info :boogie-vc-id _initialize)\n(assert (not\n(let ((anon0_correct@@8 (=> (! (and %lbl%+2906 true) :lblpos +2906) (and\n(! (or %lbl%@4652 (= (bvlshr #x00000000 #x00000007) #x00000000)) :lblneg @4652)\n(=> (= (bvlshr #x00000000 #x00000007) #x00000000) (and\n(! (or %lbl%@4662 (= (bvlshr (bvmul #x00000080 $unitSize) #x00000007) $unitSize)) :lblneg @4662)\n(=> (= (bvlshr (bvmul #x00000080 $unitSize) #x00000007) $unitSize) (and\n(! (or %lbl%@4676 (= (bvlshr (bvmul #x00000100 $unitSize) #x00000007) (bvadd $unitSize $unitSize))) :lblneg @4676)\n(=> (= (bvlshr (bvmul #x00000100 $unitSize) #x00000007) (bvadd $unitSize $unitSize)) true)))))))))\n(let ((PreconditionGeneratedEntry_correct@@8 (=> (! (and %lbl%+4600 true) :lblpos +4600) (=> (bvule $unitSize #x00ffffff) anon0_correct@@8))))\nPreconditionGeneratedEntry_correct@@8))\n))\n(check-sat)\n(pop 1)\n(declare-fun %lbl%+3018 () Bool)\n(declare-fun %lbl%@5233 () Bool)\n(declare-fun $i2 () Int)\n(declare-fun $i0 () Int)\n(declare-fun %lbl%@5259 () Bool)\n(declare-fun $idx () Int)\n(declare-fun $g1 () Int)\n(declare-fun %lbl%@5285 () Bool)\n(declare-fun %lbl%@5345 () Bool)\n(declare-fun $a () (Array Int Int))\n(declare-fun $off () Int)\n(declare-fun $aBase () Int)\n(declare-fun $bb () (Array Int Int))\n(declare-fun $i1 () Int)\n(declare-fun $g2 () Int)\n(declare-fun %lbl%+4695 () Bool)\n(push 1)\n(set-info :boogie-vc-id _bb4Zero)\n(assert (not\n(let ((anon0_correct@@9 (=> (! (and %lbl%+3018 true) :lblpos +3018) (and\n(! (or %lbl%@5233 (= (bvmul #x00000080 (bvlshr (B (- $i2 $i0)) #x00000007)) (B (- $i2 $i0)))) :lblneg @5233)\n(=> (= (bvmul #x00000080 (bvlshr (B (- $i2 $i0)) #x00000007)) (B (- $i2 $i0))) (and\n(! (or %lbl%@5259 (= (- $idx $g1) (* 4 (I (bvlshr (B (- $i2 $i0)) #x00000007))))) :lblneg @5259)\n(=> (= (- $idx $g1) (* 4 (I (bvlshr (B (- $i2 $i0)) #x00000007)))) (and\n(! (or %lbl%@5285 (forall ((i@@5 Int) ) (! (=> (and\n(TV i@@5)\n(<= $i2 i@@5)\n(< i@@5 (+ $i2 128))) (= (bvlshr (B (- i@@5 $i0)) #x00000007) (bvlshr (B (- $i2 $i0)) #x00000007)))\n :qid |BitVecto.62:18|\n :skolemid |35|\n :pattern ( (TV i@@5))\n))) :lblneg @5285)\n(=> (forall ((i@@6 Int) ) (! (=> (and\n(TV i@@6)\n(<= $i2 i@@6)\n(< i@@6 (+ $i2 128))) (= (bvlshr (B (- i@@6 $i0)) #x00000007) (bvlshr (B (- $i2 $i0)) #x00000007)))\n :qid |BitVecto.62:18|\n :skolemid |35|\n :pattern ( (TV i@@6))\n)) (and\n(! (or %lbl%@5345 ($bbvec4 $a $off $aBase (store $bb $idx 0) $i0 $i1 (+ $i2 128) $g1 $g2)) :lblneg @5345)\n(=> ($bbvec4 $a $off $aBase (store $bb $idx 0) $i0 $i1 (+ $i2 128) $g1 $g2) true)))))))))))\n(let ((PreconditionGeneratedEntry_correct@@9 (=> (! (and %lbl%+4695 true) :lblpos +4695) (=> (and\n(forall ((i@@7 Int) ) (! (=> (and\n(TV i@@7)\n(<= $i1 i@@7)\n(< i@@7 (+ $i2 128))) (= (select $a (+ $aBase (- i@@7 $i0))) $off))\n :qid |BitVecto.80:20|\n :skolemid |15|\n :pattern ( (TV i@@7))\n))\n($bbvec4 $a $off $aBase $bb $i0 $i1 $i2 $g1 $g2)) (=> (and\n($Aligned (B $idx))\n($Aligned (B $g1))\n(= (B (- $i2 $i0)) (bvmul #x00000020 (bvsub (B $idx) (B $g1))))\n(= $i1 $i0)\n(=> (and\n(bvule (bvlshr (B (- $i2 $i0)) #x00000007) #x01ffffff)\n(= (bvmul #x00000080 (bvlshr (B (- $i2 $i0)) #x00000007)) (B (- $i2 $i0)))) (= (- $idx $g1) (* 4 (I (bvlshr (B (- $i2 $i0)) #x00000007)))))\n(forall ((i@@8 Int) ) (! (=> (and\n(TV i@@8)\n(<= $i2 i@@8)\n(< i@@8 (+ $i2 128))) (and\n(bvule (B (- $i2 $i0)) (B (- i@@8 $i0)))\n(bvule (B (- i@@8 $i0)) (bvadd (B (- $i2 $i0)) #x0000007f))))\n :qid |BitVecto.87:20|\n :skolemid |16|\n :pattern ( (TV i@@8))\n))\n(between $g1 $g2 $idx)\n(= (B 0) #x00000000)) anon0_correct@@9)))))\nPreconditionGeneratedEntry_correct@@9))\n))\n(check-sat)\n(pop 1)\n(declare-fun %lbl%+3027 () Bool)\n(declare-fun %lbl%@5396 () Bool)\n(declare-fun $k () Int)\n(declare-fun $i0@@0 () Int)\n(declare-fun %lbl%+5376 () Bool)\n(push 1)\n(set-info :boogie-vc-id _bb4GetBit)\n(assert (not\n(let ((anon0_correct@@10 (=> (! (and %lbl%+3027 true) :lblpos +3027) (and\n(! (or %lbl%@5396 (bvule (bvand (bvlshr (B (- $k $i0@@0)) #x00000002) #x0000001f) #x0000001f)) :lblneg @5396)\n(=> (bvule (bvand (bvlshr (B (- $k $i0@@0)) #x00000002) #x0000001f) #x0000001f) true)))))\n(let ((PreconditionGeneratedEntry_correct@@10 (=> (! (and %lbl%+5376 true) :lblpos +5376) anon0_correct@@10)))\nPreconditionGeneratedEntry_correct@@10))\n))\n(check-sat)\n(pop 1)\n(declare-fun %lbl%+3078 () Bool)\n(declare-fun %lbl%@5716 () Bool)\n(declare-fun $a@@0 () (Array Int Int))\n(declare-fun $aBase@@0 () Int)\n(declare-fun $k@@0 () Int)\n(declare-fun $i0@@1 () Int)\n(declare-fun $on () Int)\n(declare-fun $off@@0 () Int)\n(declare-fun $ret () (Array Int Int))\n(declare-fun $i1@@0 () Int)\n(declare-fun $i2@@0 () Int)\n(declare-fun $g1@@0 () Int)\n(declare-fun $g2@@0 () Int)\n(declare-fun %lbl%@5750 () Bool)\n(declare-fun $idx@@0 () Int)\n(declare-fun %lbl%@5758 () Bool)\n(declare-fun %lbl%+5417 () Bool)\n(declare-fun $bb@@0 () (Array Int Int))\n(declare-fun $bbb () Int)\n(push 1)\n(set-info :boogie-vc-id _bb4SetBit)\n(assert (not\n(let ((anon0_correct@@11 (=> (! (and %lbl%+3078 true) :lblpos +3078) (and\n(! (or %lbl%@5716 ($bbvec4 (store $a@@0 (+ $aBase@@0 (- $k@@0 $i0@@1)) $on) $off@@0 $aBase@@0 $ret $i0@@1 $i1@@0 $i2@@0 $g1@@0 $g2@@0)) :lblneg @5716)\n(=> ($bbvec4 (store $a@@0 (+ $aBase@@0 (- $k@@0 $i0@@1)) $on) $off@@0 $aBase@@0 $ret $i0@@1 $i1@@0 $i2@@0 $g1@@0 $g2@@0) (and\n(! (or %lbl%@5750 (between $g1@@0 $g2@@0 $idx@@0)) :lblneg @5750)\n(=> (between $g1@@0 $g2@@0 $idx@@0) (and\n(! (or %lbl%@5758 (bvule (bvand (bvlshr (B (- $k@@0 $i0@@1)) #x00000002) #x0000001f) #x0000001f)) :lblneg @5758)\n(=> (bvule (bvand (bvlshr (B (- $k@@0 $i0@@1)) #x00000002) #x0000001f) #x0000001f) true)))))))))\n(let ((PreconditionGeneratedEntry_correct@@11 (=> (! (and %lbl%+5417 true) :lblpos +5417) (=> ($bbvec4 $a@@0 $off@@0 $aBase@@0 $bb@@0 $i0@@1 $i1@@0 $i2@@0 $g1@@0 $g2@@0) (=> (and\n(TV $k@@0)\n(word (- $k@@0 $i0@@1))\n(<= $i1@@0 $k@@0)\n(< $k@@0 $i2@@0)\n($Aligned (B (- $k@@0 $i0@@1)))\n(not (= $on $off@@0))\n(= $idx@@0 (+ $g1@@0 (* 4 (I (bvlshr (B (- $k@@0 $i0@@1)) #x00000007)))))\n(= (B $bbb) (bvor (B (select $bb@@0 $idx@@0)) (bvshl #x00000001 (bvand (bvlshr (B (- $k@@0 $i0@@1)) #x00000002) #x0000001f))))\n(= $ret (store $bb@@0 $idx@@0 $bbb))) anon0_correct@@11)))))\nPreconditionGeneratedEntry_correct@@11))\n))\n(check-sat)\n(pop 1)\n(declare-fun %lbl%+3187 () Bool)\n(declare-fun %lbl%@6314 () Bool)\n(declare-fun $i2@@1 () Int)\n(declare-fun $i0@@2 () Int)\n(declare-fun %lbl%@6340 () Bool)\n(declare-fun $idx@@1 () Int)\n(declare-fun $g1@@1 () Int)\n(declare-fun %lbl%@6366 () Bool)\n(declare-fun %lbl%@6426 () Bool)\n(declare-fun $a@@1 () (Array Int Int))\n(declare-fun $aBase@@1 () Int)\n(declare-fun $bb@@1 () (Array Int Int))\n(declare-fun $i1@@1 () Int)\n(declare-fun $g2@@1 () Int)\n(declare-fun %lbl%+5779 () Bool)\n(push 1)\n(set-info :boogie-vc-id _bb4Zero2)\n(assert (not\n(let ((anon0_correct@@12 (=> (! (and %lbl%+3187 true) :lblpos +3187) (and\n(! (or %lbl%@6314 (= (bvmul #x00000040 (bvlshr (B (- $i2@@1 $i0@@2)) #x00000006)) (B (- $i2@@1 $i0@@2)))) :lblneg @6314)\n(=> (= (bvmul #x00000040 (bvlshr (B (- $i2@@1 $i0@@2)) #x00000006)) (B (- $i2@@1 $i0@@2))) (and\n(! (or %lbl%@6340 (= (- $idx@@1 $g1@@1) (* 4 (I (bvlshr (B (- $i2@@1 $i0@@2)) #x00000006))))) :lblneg @6340)\n(=> (= (- $idx@@1 $g1@@1) (* 4 (I (bvlshr (B (- $i2@@1 $i0@@2)) #x00000006)))) (and\n(! (or %lbl%@6366 (forall ((i@@9 Int) ) (! (=> (and\n(TV i@@9)\n(<= $i2@@1 i@@9)\n(< i@@9 (+ $i2@@1 64))) (= (bvlshr (B (- i@@9 $i0@@2)) #x00000006) (bvlshr (B (- $i2@@1 $i0@@2)) #x00000006)))\n :qid |BitVecto.77:18|\n :skolemid |36|\n :pattern ( (TV i@@9))\n))) :lblneg @6366)\n(=> (forall ((i@@10 Int) ) (! (=> (and\n(TV i@@10)\n(<= $i2@@1 i@@10)\n(< i@@10 (+ $i2@@1 64))) (= (bvlshr (B (- i@@10 $i0@@2)) #x00000006) (bvlshr (B (- $i2@@1 $i0@@2)) #x00000006)))\n :qid |BitVecto.77:18|\n :skolemid |36|\n :pattern ( (TV i@@10))\n)) (and\n(! (or %lbl%@6426 ($bb2vec4 $a@@1 $aBase@@1 (store $bb@@1 $idx@@1 0) $i0@@2 $i1@@1 (+ $i2@@1 64) $g1@@1 $g2@@1)) :lblneg @6426)\n(=> ($bb2vec4 $a@@1 $aBase@@1 (store $bb@@1 $idx@@1 0) $i0@@2 $i1@@1 (+ $i2@@1 64) $g1@@1 $g2@@1) true)))))))))))\n(let ((PreconditionGeneratedEntry_correct@@12 (=> (! (and %lbl%+5779 true) :lblpos +5779) (=> (and\n(forall ((i@@11 Int) ) (! (=> (and\n(TV i@@11)\n(<= $i1@@1 i@@11)\n(< i@@11 (+ $i2@@1 64))) (= (select $a@@1 (+ $aBase@@1 (- i@@11 $i0@@2))) 0))\n :qid |BitVecto.108:20|\n :skolemid |17|\n :pattern ( (TV i@@11))\n))\n($bb2vec4 $a@@1 $aBase@@1 $bb@@1 $i0@@2 $i1@@1 $i2@@1 $g1@@1 $g2@@1)) (=> (and\n($Aligned (B $idx@@1))\n($Aligned (B $g1@@1))\n(= (B (- $i2@@1 $i0@@2)) (bvmul #x00000010 (bvsub (B $idx@@1) (B $g1@@1))))\n(= $i1@@1 $i0@@2)\n(=> (and\n(bvule (bvlshr (B (- $i2@@1 $i0@@2)) #x00000006) #x03ffffff)\n(= (bvmul #x00000040 (bvlshr (B (- $i2@@1 $i0@@2)) #x00000006)) (B (- $i2@@1 $i0@@2)))) (= (- $idx@@1 $g1@@1) (* 4 (I (bvlshr (B (- $i2@@1 $i0@@2)) #x00000006)))))\n(forall ((i@@12 Int) ) (! (=> (and\n(TV i@@12)\n(<= $i2@@1 i@@12)\n(< i@@12 (+ $i2@@1 64))) (and\n(bvule (B (- $i2@@1 $i0@@2)) (B (- i@@12 $i0@@2)))\n(bvule (B (- i@@12 $i0@@2)) (bvadd (B (- $i2@@1 $i0@@2)) #x0000003f))))\n :qid |BitVecto.115:20|\n :skolemid |18|\n :pattern ( (TV i@@12))\n))\n(between $g1@@1 $g2@@1 $idx@@1)\n(= (B 0) #x00000000)) anon0_correct@@12)))))\nPreconditionGeneratedEntry_correct@@12))\n))\n(check-sat)\n(pop 1)\n(declare-fun %lbl%+3196 () Bool)\n(declare-fun %lbl%@6475 () Bool)\n(declare-fun $k@@1 () Int)\n(declare-fun $i0@@3 () Int)\n(declare-fun %lbl%+6455 () Bool)\n(push 1)\n(set-info :boogie-vc-id _bb4Get2Bit)\n(assert (not\n(let ((anon0_correct@@13 (=> (! (and %lbl%+3196 true) :lblpos +3196) (and\n(! (or %lbl%@6475 (bvule (bvand (bvlshr (B (- $k@@1 $i0@@3)) #x00000001) #x0000001f) #x0000001f)) :lblneg @6475)\n(=> (bvule (bvand (bvlshr (B (- $k@@1 $i0@@3)) #x00000001) #x0000001f) #x0000001f) true)))))\n(let ((PreconditionGeneratedEntry_correct@@13 (=> (! (and %lbl%+6455 true) :lblpos +6455) anon0_correct@@13)))\nPreconditionGeneratedEntry_correct@@13))\n))\n(check-sat)\n(pop 1)\n(declare-fun %lbl%+3247 () Bool)\n(declare-fun %lbl%@6863 () Bool)\n(declare-fun $a@@2 () (Array Int Int))\n(declare-fun $aBase@@2 () Int)\n(declare-fun $k@@2 () Int)\n(declare-fun $i0@@4 () Int)\n(declare-fun $val () Int)\n(declare-fun $ret@@0 () (Array Int Int))\n(declare-fun $i1@@2 () Int)\n(declare-fun $i2@@2 () Int)\n(declare-fun $g1@@2 () Int)\n(declare-fun $g2@@2 () Int)\n(declare-fun %lbl%@6895 () Bool)\n(declare-fun $idx@@2 () Int)\n(declare-fun %lbl%@6903 () Bool)\n(declare-fun %lbl%+6496 () Bool)\n(declare-fun $bb@@2 () (Array Int Int))\n(declare-fun $bbb@@0 () Int)\n(declare-fun $_bbb () Int)\n(push 1)\n(set-info :boogie-vc-id _bb4Set2Bit)\n(assert (not\n(let ((anon0_correct@@14 (=> (! (and %lbl%+3247 true) :lblpos +3247) (and\n(! (or %lbl%@6863 ($bb2vec4 (store $a@@2 (+ $aBase@@2 (- $k@@2 $i0@@4)) $val) $aBase@@2 $ret@@0 $i0@@4 $i1@@2 $i2@@2 $g1@@2 $g2@@2)) :lblneg @6863)\n(=> ($bb2vec4 (store $a@@2 (+ $aBase@@2 (- $k@@2 $i0@@4)) $val) $aBase@@2 $ret@@0 $i0@@4 $i1@@2 $i2@@2 $g1@@2 $g2@@2) (and\n(! (or %lbl%@6895 (between $g1@@2 $g2@@2 $idx@@2)) :lblneg @6895)\n(=> (between $g1@@2 $g2@@2 $idx@@2) (and\n(! (or %lbl%@6903 (bvule (bvand (bvlshr (B (- $k@@2 $i0@@4)) #x00000001) #x0000001f) #x0000001f)) :lblneg @6903)\n(=> (bvule (bvand (bvlshr (B (- $k@@2 $i0@@4)) #x00000001) #x0000001f) #x0000001f) true)))))))))\n(let ((PreconditionGeneratedEntry_correct@@14 (=> (! (and %lbl%+6496 true) :lblpos +6496) (=> ($bb2vec4 $a@@2 $aBase@@2 $bb@@2 $i0@@4 $i1@@2 $i2@@2 $g1@@2 $g2@@2) (=> (and\n(TV $k@@2)\n(word (- $k@@2 $i0@@4))\n(<= $i1@@2 $k@@2)\n(< $k@@2 $i2@@2)\n($Aligned (B (- $k@@2 $i0@@4)))\n(= $idx@@2 (+ $g1@@2 (* 4 (I (bvlshr (B (- $k@@2 $i0@@4)) #x00000006)))))\n(bvule (B $val) #x00000003)\n(= (B $bbb@@0) (bvand (B (select $bb@@2 $idx@@2)) (bvnot (bvshl #x00000003 (bvand (bvlshr (B (- $k@@2 $i0@@4)) #x00000001) #x0000001f)))))\n(= (B $_bbb) (bvor (B $bbb@@0) (bvshl (B $val) (bvand (bvlshr (B (- $k@@2 $i0@@4)) #x00000001) #x0000001f))))\n(= $ret@@0 (store $bb@@2 $idx@@2 $_bbb))) anon0_correct@@14)))))\nPreconditionGeneratedEntry_correct@@14))\n))\n(check-sat)\n(pop 1)\n(declare-fun %lbl%+3250 () Bool)\n(declare-fun %lbl%@7266 () Bool)\n(declare-fun %lbl%@7276 () Bool)\n(declare-fun %lbl%@7286 () Bool)\n(declare-fun %lbl%@7296 () Bool)\n(declare-fun %lbl%@7306 () Bool)\n(declare-fun %lbl%@7316 () Bool)\n(declare-fun %lbl%@7326 () Bool)\n(declare-fun %lbl%@7336 () Bool)\n(declare-fun %lbl%@7346 () Bool)\n(declare-fun %lbl%@7356 () Bool)\n(declare-fun %lbl%@7366 () Bool)\n(declare-fun %lbl%@7376 () Bool)\n(declare-fun %lbl%@7386 () Bool)\n(declare-fun %lbl%@7396 () Bool)\n(declare-fun %lbl%@7406 () Bool)\n(declare-fun %lbl%@7416 () Bool)\n(declare-fun %lbl%@7426 () Bool)\n(declare-fun %lbl%@7436 () Bool)\n(declare-fun %lbl%@7446 () Bool)\n(declare-fun %lbl%@7456 () Bool)\n(declare-fun %lbl%@7466 () Bool)\n(declare-fun %lbl%@7476 () Bool)\n(declare-fun %lbl%@7486 () Bool)\n(declare-fun %lbl%@7496 () Bool)\n(declare-fun %lbl%@7506 () Bool)\n(declare-fun %lbl%@7516 () Bool)\n(declare-fun %lbl%@7526 () Bool)\n(declare-fun %lbl%@7536 () Bool)\n(declare-fun %lbl%@7546 () Bool)\n(declare-fun %lbl%@7556 () Bool)\n(declare-fun %lbl%@7566 () Bool)\n(declare-fun %lbl%+6924 () Bool)\n(push 1)\n(set-info :boogie-vc-id _const)\n(assert (not\n(let ((anon0_correct@@15 (=> (! (and %lbl%+3250 true) :lblpos +3250) (and\n(! (or %lbl%@7266 (= (bvsub #x00000001 #x00000001) #x00000000)) :lblneg @7266)\n(=> (= (bvsub #x00000001 #x00000001) #x00000000) (and\n(! (or %lbl%@7276 (= (bvadd #x00000001 #x00000001) #x00000002)) :lblneg @7276)\n(=> (= (bvadd #x00000001 #x00000001) #x00000002) (and\n(! (or %lbl%@7286 (= (bvadd #x00000002 #x00000001) #x00000003)) :lblneg @7286)\n(=> (= (bvadd #x00000002 #x00000001) #x00000003) (and\n(! (or %lbl%@7296 (= (bvadd #x00000002 #x00000002) #x00000004)) :lblneg @7296)\n(=> (= (bvadd #x00000002 #x00000002) #x00000004) (and\n(! (or %lbl%@7306 (= (bvadd #x00000004 #x00000001) #x00000005)) :lblneg @7306)\n(=> (= (bvadd #x00000004 #x00000001) #x00000005) (and\n(! (or %lbl%@7316 (= (bvadd #x00000005 #x00000001) #x00000006)) :lblneg @7316)\n(=> (= (bvadd #x00000005 #x00000001) #x00000006) (and\n(! (or %lbl%@7326 (= (bvadd #x00000005 #x00000002) #x00000007)) :lblneg @7326)\n(=> (= (bvadd #x00000005 #x00000002) #x00000007) (and\n(! (or %lbl%@7336 (= (bvmul #x00000004 #x00000004) #x00000010)) :lblneg @7336)\n(=> (= (bvmul #x00000004 #x00000004) #x00000010) (and\n(! (or %lbl%@7346 (= (bvadd #x00000010 #x00000010) #x00000020)) :lblneg @7346)\n(=> (= (bvadd #x00000010 #x00000010) #x00000020) (and\n(! (or %lbl%@7356 (= (bvsub #x00000020 #x00000001) #x0000001f)) :lblneg @7356)\n(=> (= (bvsub #x00000020 #x00000001) #x0000001f) (and\n(! (or %lbl%@7366 (= (bvadd #x00000020 #x00000020) #x00000040)) :lblneg @7366)\n(=> (= (bvadd #x00000020 #x00000020) #x00000040) (and\n(! (or %lbl%@7376 (= (bvsub #x00000040 #x00000001) #x0000003f)) :lblneg @7376)\n(=> (= (bvsub #x00000040 #x00000001) #x0000003f) (and\n(! (or %lbl%@7386 (= (bvmul #x00000020 #x00000004) #x00000080)) :lblneg @7386)\n(=> (= (bvmul #x00000020 #x00000004) #x00000080) (and\n(! (or %lbl%@7396 (= (bvsub #x00000080 #x00000001) #x0000007f)) :lblneg @7396)\n(=> (= (bvsub #x00000080 #x00000001) #x0000007f) (and\n(! (or %lbl%@7406 (= (bvmul #x00000010 #x00000010) #x00000100)) :lblneg @7406)\n(=> (= (bvmul #x00000010 #x00000010) #x00000100) (and\n(! (or %lbl%@7416 (= (bvadd #x00000100 #x00000100) #x00000200)) :lblneg @7416)\n(=> (= (bvadd #x00000100 #x00000100) #x00000200) (and\n(! (or %lbl%@7426 (= (bvmul #x00000040 #x00000040) #x00001000)) :lblneg @7426)\n(=> (= (bvmul #x00000040 #x00000040) #x00001000) (and\n(! (or %lbl%@7436 (= (bvsub #x00001000 #x00000001) #x00000fff)) :lblneg @7436)\n(=> (= (bvsub #x00001000 #x00000001) #x00000fff) (and\n(! (or %lbl%@7446 (= (bvmul #x00000100 #x00000100) #x00010000)) :lblneg @7446)\n(=> (= (bvmul #x00000100 #x00000100) #x00010000) (and\n(! (or %lbl%@7456 (= (bvsub #x00010000 #x00000001) #x0000ffff)) :lblneg @7456)\n(=> (= (bvsub #x00010000 #x00000001) #x0000ffff) (and\n(! (or %lbl%@7466 (= (bvmul #x00010000 #x00000020) #x00200000)) :lblneg @7466)\n(=> (= (bvmul #x00010000 #x00000020) #x00200000) (and\n(! (or %lbl%@7476 (= (bvsub #x00200000 #x00000001) #x001fffff)) :lblneg @7476)\n(=> (= (bvsub #x00200000 #x00000001) #x001fffff) (and\n(! (or %lbl%@7486 (= (bvmul #x00010000 #x00000100) #x01000000)) :lblneg @7486)\n(=> (= (bvmul #x00010000 #x00000100) #x01000000) (and\n(! (or %lbl%@7496 (= (bvsub #x01000000 #x00000001) #x00ffffff)) :lblneg @7496)\n(=> (= (bvsub #x01000000 #x00000001) #x00ffffff) (and\n(! (or %lbl%@7506 (= (bvmul #x00010000 #x00000200) #x02000000)) :lblneg @7506)\n(=> (= (bvmul #x00010000 #x00000200) #x02000000) (and\n(! (or %lbl%@7516 (= (bvsub #x02000000 #x00000001) #x01ffffff)) :lblneg @7516)\n(=> (= (bvsub #x02000000 #x00000001) #x01ffffff) (and\n(! (or %lbl%@7526 (= (bvadd #x02000000 #x02000000) #x04000000)) :lblneg @7526)\n(=> (= (bvadd #x02000000 #x02000000) #x04000000) (and\n(! (or %lbl%@7536 (= (bvsub #x04000000 #x00000001) #x03ffffff)) :lblneg @7536)\n(=> (= (bvsub #x04000000 #x00000001) #x03ffffff) (and\n(! (or %lbl%@7546 (= (bvmul #x00010000 #x0000ffff) #xffff0000)) :lblneg @7546)\n(=> (= (bvmul #x00010000 #x0000ffff) #xffff0000) (and\n(! (or %lbl%@7556 (= (bvadd #xffff0000 #x0000ffff) #xffffffff)) :lblneg @7556)\n(=> (= (bvadd #xffff0000 #x0000ffff) #xffffffff) (and\n(! (or %lbl%@7566 (= (bvsub #xffffffff #x00000003) #xfffffffc)) :lblneg @7566)\n(=> (= (bvsub #xffffffff #x00000003) #xfffffffc) true)))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))\n(let ((PreconditionGeneratedEntry_correct@@15 (=> (! (and %lbl%+6924 true) :lblpos +6924) anon0_correct@@15)))\nPreconditionGeneratedEntry_correct@@15))\n))\n(check-sat)\n(pop 1)\n"
  },
  {
    "path": "test/json.seed",
    "content": "1000000 010E5\n"
  },
  {
    "path": "test/no_main.c",
    "content": "#define ASMSTR(S) \"\\t\" S \"\\n\"\n\nasm(\n    \".globl _entry\\n\"\n    \".type _entry,@function\\n\"\n    \"_entry:\\n\"\n\n    ASMSTR(\".intel_syntax noprefix\")\n\n    ASMSTR(\"mov rdi, [rsp];\")\n    ASMSTR(\"dec rdi;\")\n    ASMSTR(\"mov r15, rdi;\")\n    ASMSTR(\"test rdi, rdi;\")\n    ASMSTR(\"jne Y1;\")\n    ASMSTR(\"jmp B1;\")\n    ASMSTR(\".byte 0x2f\")\n    ASMSTR(\"jmp A1;\")\n    ASMSTR(\"jmp Z1;\")\n    ASMSTR(\"jmp A1;\")\n    ASMSTR(\"jmp Z1;\")\n    ASMSTR(\"jmp A1;\")\n    ASMSTR(\"jmp Z1;\")\n    ASMSTR(\"jmp A1;\")\n    ASMSTR(\"jmp Z1;\")\n    ASMSTR(\"jmp A1;\")\n    ASMSTR(\"jmp Z1;\")\n    ASMSTR(\"jmp A1;\")\n    ASMSTR(\"jmp Z1;\")\n    ASMSTR(\".global Y1\")\n    ASMSTR(\"Y1:\")\n    ASMSTR(\"xor rbx, rbx\")\n    ASMSTR(\"mov bx, word ptr [X1];\")\n    ASMSTR(\"sub rbx, 0xf1dc;\")\n    ASMSTR(\"push rbx;\")\n    ASMSTR(\"lea r8, [rip + A1];\")\n    ASMSTR(\".global A1\")\n    ASMSTR(\"A1:\")\n    ASMSTR(\"pop r9;\")\n    ASMSTR(\"add r8, r9;\")\n    ASMSTR(\"call r8;\")\n    ASMSTR(\".global Z1\")\n    ASMSTR(\"Z1:\")\n    ASMSTR(\"call A1;\")\n    ASMSTR(\".global X1\")\n    ASMSTR(\"X1:\")\n    ASMSTR(\"jmp A1;\")\n    ASMSTR(\".global B1\")\n    ASMSTR(\"B1:\")\n\n\n\n    ASMSTR(\"test r15, r15;\")\n    ASMSTR(\"jne Y2;\")\n    ASMSTR(\"jmp B2;\")\n    ASMSTR(\"jmp A2;\")\n    ASMSTR(\"jmp Z2;\")\n    ASMSTR(\"jmp A2;\")\n    ASMSTR(\"jmp Z2;\")\n    ASMSTR(\"jmp A2;\")\n    ASMSTR(\"jmp Z2;\")\n    ASMSTR(\"jmp A2;\")\n    ASMSTR(\"jmp Z2;\")\n    ASMSTR(\"jmp A2;\")\n    ASMSTR(\"jmp Z2;\")\n    ASMSTR(\"jmp A2;\")\n    ASMSTR(\"jmp Z2;\")\n    ASMSTR(\".global Y2\")\n    ASMSTR(\"Y2:\")\n    ASMSTR(\"xor rbx, rbx\")\n    ASMSTR(\"mov bx, word ptr [X2];\")\n    ASMSTR(\"sub rbx, 0xf1dc;\")\n    ASMSTR(\"push rbx;\")\n    ASMSTR(\"lea r8, [rip + A2];\")\n    ASMSTR(\".global A2\")\n    ASMSTR(\"A2:\")\n    ASMSTR(\"pop r9;\")\n    ASMSTR(\"add r8, r9;\")\n    ASMSTR(\"call r8;\")\n    ASMSTR(\".global Z2\")\n    ASMSTR(\"Z2:\")\n    ASMSTR(\"call A2;\")\n    ASMSTR(\".global X2\")\n    ASMSTR(\"X2:\")\n    ASMSTR(\"jmp A2;\")\n    ASMSTR(\".global B2\")\n    ASMSTR(\"B2:\")\n\n\n\n\n\n\n\n\n    ASMSTR(\"test r15, r15;\")\n    ASMSTR(\"jne Y3;\")\n    ASMSTR(\"jmp B3;\")\n    ASMSTR(\"jmp A3;\")\n    ASMSTR(\"jmp Z3;\")\n    ASMSTR(\"jmp A3;\")\n    ASMSTR(\"jmp Z3;\")\n    ASMSTR(\"jmp A3;\")\n    ASMSTR(\"jmp Z3;\")\n    ASMSTR(\"jmp A3;\")\n    ASMSTR(\"jmp Z3;\")\n    ASMSTR(\"jmp A3;\")\n    ASMSTR(\"jmp Z3;\")\n    ASMSTR(\"jmp A3;\")\n    ASMSTR(\"jmp Z3;\")\n    ASMSTR(\".global Y3\")\n    ASMSTR(\"Y3:\")\n    ASMSTR(\"xor rbx, rbx\")\n    ASMSTR(\"mov bx, word ptr [X3];\")\n    ASMSTR(\"sub rbx, 0xf1dc;\")\n    ASMSTR(\"push rbx;\")\n    ASMSTR(\"lea r8, [rip + A3];\")\n    ASMSTR(\".global A3\")\n    ASMSTR(\"A3:\")\n    ASMSTR(\"pop r9;\")\n    ASMSTR(\"add r8, r9;\")\n    ASMSTR(\"call r8;\")\n    ASMSTR(\".global Z3\")\n    ASMSTR(\"Z3:\")\n    ASMSTR(\"call A3;\")\n    ASMSTR(\".global X3\")\n    ASMSTR(\"X3:\")\n    ASMSTR(\"jmp A3;\")\n    ASMSTR(\".global B3\")\n    ASMSTR(\"B3:\")\n\n\n\n\n\n    ASMSTR(\"test r15, r15;\")\n    ASMSTR(\"jne Y4;\")\n    ASMSTR(\"jmp B4;\")\n    ASMSTR(\"jmp A4;\")\n    ASMSTR(\"jmp Z4;\")\n    ASMSTR(\"jmp A4;\")\n    ASMSTR(\"jmp Z4;\")\n    ASMSTR(\"jmp A4;\")\n    ASMSTR(\"jmp Z4;\")\n    ASMSTR(\"jmp A4;\")\n    ASMSTR(\"jmp Z4;\")\n    ASMSTR(\"jmp A4;\")\n    ASMSTR(\"jmp Z4;\")\n    ASMSTR(\"jmp A4;\")\n    ASMSTR(\"jmp Z4;\")\n    ASMSTR(\".global Y4\")\n    ASMSTR(\"Y4:\")\n    ASMSTR(\"xor rbx, rbx\")\n    ASMSTR(\"mov bx, word ptr [X4];\")\n    ASMSTR(\"sub rbx, 0xf1dc;\")\n    ASMSTR(\"push rbx;\")\n    ASMSTR(\"lea r8, [rip + A4];\")\n    ASMSTR(\".global A4\")\n    ASMSTR(\"A4:\")\n    ASMSTR(\"pop r9;\")\n    ASMSTR(\"add r8, r9;\")\n    ASMSTR(\"call r8;\")\n    ASMSTR(\".global Z4\")\n    ASMSTR(\"Z4:\")\n    ASMSTR(\"call A4;\")\n    ASMSTR(\".global X4\")\n    ASMSTR(\"X4:\")\n    ASMSTR(\"jmp A4;\")\n    ASMSTR(\".global B4\")\n    ASMSTR(\"B4:\")\n\n\n\n\n\n    ASMSTR(\"test r15, r15;\")\n    ASMSTR(\"jne Y5;\")\n    ASMSTR(\"jmp B5;\")\n    ASMSTR(\"jmp A5;\")\n    ASMSTR(\"jmp Z5;\")\n    ASMSTR(\"jmp A5;\")\n    ASMSTR(\"jmp Z5;\")\n    ASMSTR(\"jmp A5;\")\n    ASMSTR(\"jmp Z5;\")\n    ASMSTR(\"jmp A5;\")\n    ASMSTR(\"jmp Z5;\")\n    ASMSTR(\"jmp A5;\")\n    ASMSTR(\"jmp Z5;\")\n    ASMSTR(\"jmp A5;\")\n    ASMSTR(\"jmp Z5;\")\n    ASMSTR(\".global Y5\")\n    ASMSTR(\"Y5:\")\n    ASMSTR(\"xor rbx, rbx\")\n    ASMSTR(\"mov bx, word ptr [X5];\")\n    ASMSTR(\"sub rbx, 0xf1dc;\")\n    ASMSTR(\"push rbx;\")\n    ASMSTR(\"lea r8, [rip + A5];\")\n    ASMSTR(\".global A5\")\n    ASMSTR(\"A5:\")\n    ASMSTR(\"pop r9;\")\n    ASMSTR(\"add r8, r9;\")\n    ASMSTR(\"call r8;\")\n    ASMSTR(\".global Z5\")\n    ASMSTR(\"Z5:\")\n    ASMSTR(\"call A5;\")\n    ASMSTR(\".global X5\")\n    ASMSTR(\"X5:\")\n    ASMSTR(\"jmp A5;\")\n    ASMSTR(\".global B5\")\n    ASMSTR(\"B5:\")\n\n\n\n\n\n    ASMSTR(\"test r15, r15;\")\n    ASMSTR(\"jne Y6;\")\n    ASMSTR(\"jmp B6;\")\n    ASMSTR(\"jmp A6;\")\n    ASMSTR(\"jmp Z6;\")\n    ASMSTR(\"jmp A6;\")\n    ASMSTR(\"jmp Z6;\")\n    ASMSTR(\"jmp A6;\")\n    ASMSTR(\"jmp Z6;\")\n    ASMSTR(\"jmp A6;\")\n    ASMSTR(\"jmp Z6;\")\n    ASMSTR(\"jmp A6;\")\n    ASMSTR(\"jmp Z6;\")\n    ASMSTR(\"jmp A6;\")\n    ASMSTR(\"jmp Z6;\")\n    ASMSTR(\".global Y6\")\n    ASMSTR(\"Y6:\")\n    ASMSTR(\"xor rbx, rbx\")\n    ASMSTR(\"mov bx, word ptr [X6];\")\n    ASMSTR(\"sub rbx, 0xf1dc;\")\n    ASMSTR(\"push rbx;\")\n    ASMSTR(\"lea r8, [rip + A6];\")\n    ASMSTR(\".global A6\")\n    ASMSTR(\"A6:\")\n    ASMSTR(\"pop r9;\")\n    ASMSTR(\"add r8, r9;\")\n    ASMSTR(\"call r8;\")\n    ASMSTR(\".global Z6\")\n    ASMSTR(\"Z6:\")\n    ASMSTR(\"call A6;\")\n    ASMSTR(\".global X6\")\n    ASMSTR(\"X6:\")\n    ASMSTR(\"jmp A6;\")\n    ASMSTR(\".global B6\")\n    ASMSTR(\"B6:\")\n\n\n\n\n\n\n    ASMSTR(\"test r15, r15;\")\n    ASMSTR(\"jne Y7;\")\n    ASMSTR(\"jmp B7;\")\n    ASMSTR(\"jmp A7;\")\n    ASMSTR(\"jmp Z7;\")\n    ASMSTR(\"jmp A7;\")\n    ASMSTR(\"jmp Z7;\")\n    ASMSTR(\"jmp A7;\")\n    ASMSTR(\"jmp Z7;\")\n    ASMSTR(\"jmp A7;\")\n    ASMSTR(\"jmp Z7;\")\n    ASMSTR(\"jmp A7;\")\n    ASMSTR(\"jmp Z7;\")\n    ASMSTR(\"jmp A7;\")\n    ASMSTR(\"jmp Z7;\")\n    ASMSTR(\".global Y7\")\n    ASMSTR(\"Y7:\")\n    ASMSTR(\"xor rbx, rbx\")\n    ASMSTR(\"mov bx, word ptr [X7];\")\n    ASMSTR(\"sub rbx, 0xf1dc;\")\n    ASMSTR(\"push rbx;\")\n    ASMSTR(\"lea r8, [rip + A7];\")\n    ASMSTR(\".global A7\")\n    ASMSTR(\"A7:\")\n    ASMSTR(\"pop r9;\")\n    ASMSTR(\"add r8, r9;\")\n    ASMSTR(\"call r8;\")\n    ASMSTR(\".global Z7\")\n    ASMSTR(\"Z7:\")\n    ASMSTR(\"call A7;\")\n    ASMSTR(\".global X7\")\n    ASMSTR(\"X7:\")\n    ASMSTR(\"jmp A7;\")\n    ASMSTR(\".global B7\")\n    ASMSTR(\"B7:\")\n\n\n\n\n\n    ASMSTR(\"test r15, r15;\")\n    ASMSTR(\"jne Y8;\")\n    ASMSTR(\"jmp B8;\")\n    ASMSTR(\"jmp A8;\")\n    ASMSTR(\"jmp Z8;\")\n    ASMSTR(\"jmp A8;\")\n    ASMSTR(\"jmp Z8;\")\n    ASMSTR(\"jmp A8;\")\n    ASMSTR(\"jmp Z8;\")\n    ASMSTR(\"jmp A8;\")\n    ASMSTR(\"jmp Z8;\")\n    ASMSTR(\"jmp A8;\")\n    ASMSTR(\"jmp Z8;\")\n    ASMSTR(\"jmp A8;\")\n    ASMSTR(\"jmp Z8;\")\n    ASMSTR(\".global Y8\")\n    ASMSTR(\"Y8:\")\n    ASMSTR(\"xor rbx, rbx\")\n    ASMSTR(\"mov bx, word ptr [X8];\")\n    ASMSTR(\"sub rbx, 0xf1dc;\")\n    ASMSTR(\"push rbx;\")\n    ASMSTR(\"lea r8, [rip + A8];\")\n    ASMSTR(\".global A8\")\n    ASMSTR(\"A8:\")\n    ASMSTR(\"pop r9;\")\n    ASMSTR(\"add r8, r9;\")\n    ASMSTR(\"call r8;\")\n    ASMSTR(\".global Z8\")\n    ASMSTR(\"Z8:\")\n    ASMSTR(\"call A8;\")\n    ASMSTR(\".global X8\")\n    ASMSTR(\"X8:\")\n    ASMSTR(\"jmp A8;\")\n    ASMSTR(\".global B8\")\n    ASMSTR(\"B8:\")\n\n\n\n\n\n    ASMSTR(\"test r15, r15;\")\n    ASMSTR(\"jne Y9;\")\n    ASMSTR(\"jmp B9;\")\n    ASMSTR(\"jmp A9;\")\n    ASMSTR(\"jmp Z9;\")\n    ASMSTR(\"jmp A9;\")\n    ASMSTR(\"jmp Z9;\")\n    ASMSTR(\"jmp A9;\")\n    ASMSTR(\"jmp Z9;\")\n    ASMSTR(\"jmp A9;\")\n    ASMSTR(\"jmp Z9;\")\n    ASMSTR(\"jmp A9;\")\n    ASMSTR(\"jmp Z9;\")\n    ASMSTR(\"jmp A9;\")\n    ASMSTR(\"jmp Z9;\")\n    ASMSTR(\".global Y9\")\n    ASMSTR(\"Y9:\")\n    ASMSTR(\"xor rbx, rbx\")\n    ASMSTR(\"mov bx, word ptr [X9];\")\n    ASMSTR(\"sub rbx, 0xf1dc;\")\n    ASMSTR(\"push rbx;\")\n    ASMSTR(\"lea r8, [rip + A9];\")\n    ASMSTR(\".global A9\")\n    ASMSTR(\"A9:\")\n    ASMSTR(\"pop r9;\")\n    ASMSTR(\"add r8, r9;\")\n    ASMSTR(\"call r8;\")\n    ASMSTR(\".global Z9\")\n    ASMSTR(\"Z9:\")\n    ASMSTR(\"call A9;\")\n    ASMSTR(\".global X9\")\n    ASMSTR(\"X9:\")\n    ASMSTR(\"jmp A9;\")\n    ASMSTR(\".global B9\")\n    ASMSTR(\"B9:\")\n\n\n\n\n\n    ASMSTR(\"mov rax, 60;\")\n    ASMSTR(\"mov rdi, 0;\")\n    ASMSTR(\"syscall;\")\n\n\n    ASMSTR(\"ret;\")\n\n    ASMSTR(\".att_syntax;\")\n);\n"
  },
  {
    "path": "test/runtime/server.key",
    "content": "-----BEGIN PRIVATE KEY-----\nMIIBVAIBADANBgkqhkiG9w0BAQEFAASCAT4wggE6AgEAAkEA1AdZNDVOA9cXm97f\nerp1bukz2kohjToJS6Ma8fOb36VV9lQGmDNsJanXFiqafOgV+kh1HXqZ3l1I0JmZ\n71b+QQIDAQABAkAHGfPn5r0lLcgRpWZQwvv56f+dmQwEoeP7z4uwfNtEo0JcRD66\n1WRCvx3LE0VbNeaEdNmSPiRXhlwIggjfrBi9AiEA9UusPBcEp/QcPGs96nQQdQzE\nfw4x0HL/eSV3qHimT6MCIQDdSAiX4Ouxoiwn/9KhDMcZXRYX/OPzj6w8u1YIH7BI\nywIgSozbJdAhHCJ2ym4VfUIVFl3xAmSAA0hQGLOocE1qzl0CIQDRicOxZmhqBiKA\nIgznOn1StEYWov+MhRFZVSBLgw5gbwIgJzOlSlu0Y22hEUsLCKyHBrCAZZHcZ020\n20pfogmQYn0=\n-----END PRIVATE KEY-----\n"
  },
  {
    "path": "test/runtime/server.pem",
    "content": "-----BEGIN CERTIFICATE-----\nMIIBYTCCAQugAwIBAgIJAMPQQtUHkx+KMA0GCSqGSIb3DQEBCwUAMAwxCjAIBgNV\nBAMMAWEwHhcNMTYwOTI0MjIyMDUyWhcNNDQwMjA5MjIyMDUyWjAMMQowCAYDVQQD\nDAFhMFwwDQYJKoZIhvcNAQEBBQADSwAwSAJBANQHWTQ1TgPXF5ve33q6dW7pM9pK\nIY06CUujGvHzm9+lVfZUBpgzbCWp1xYqmnzoFfpIdR16md5dSNCZme9W/kECAwEA\nAaNQME4wHQYDVR0OBBYEFCXtEo9rkLuKGSlm0mFE4Yk/HDJVMB8GA1UdIwQYMBaA\nFCXtEo9rkLuKGSlm0mFE4Yk/HDJVMAwGA1UdEwQFMAMBAf8wDQYJKoZIhvcNAQEL\nBQADQQCnldOnbdNJZxBO/J+979Urg8qDp8MnlN0979AmK1P5/YzPnAF4BU7QTOTE\nimS5qZ0MvziBa81nVlnnFRkIezcD\n-----END CERTIFICATE-----\n"
  },
  {
    "path": "test/test_daemon.sh",
    "content": "#!/bin/bash\n\nreadonly EXIT_FAILURE=1\n\ntool=$1\noptions=$2\ntarget=$3\nphantom=$target.phantom\necho \"phantom file: $phantom\"\n\nrm -rf $phantom\n$tool $options -- $target 2>$target.daemon.log &\ndaemon_pid=$!\n\nfor i in {1..100}\ndo\n    if [ -f $phantom ]; then\n        echo \"$target: daemon is up\"\n        if [ -v STOCHFUZZ_PRELOAD ]; then\n            LD_PRELOAD=$STOCHFUZZ_PRELOAD ./$phantom ${@:4}\n            code=$?\n        else\n            ./$phantom ${@:4}\n            code=$?\n        fi\n        kill -0 $daemon_pid\n        if [ \"$?\" -eq \"0\" ]; then\n            wait $daemon_pid\n        fi\n        exit $code\n    else\n        echo \"$target: daemon is not ready\"\n        sleep 5\n    fi\ndone\n\necho \"$target: timeout\"\nkill -9 $daemon_pid\nexit 1\n"
  },
  {
    "path": "test/test_daemon_ignore_asan_sof.sh",
    "content": "#!/bin/bash\n\nreadonly EXIT_FAILURE=1\n\ntool=$1\noptions=$2\ntarget=$3\nphantom=$target.phantom\necho \"phantom file: $phantom\"\n\nrm -rf $phantom\n$tool $options -- $target 2>$target.daemon.log &\ndaemon_pid=$!\n\nfor i in {1..100}\ndo\n    if [ -f $phantom ]; then\n        echo \"$target: daemon is up\"\n        ./$phantom ${@:4}\n        code=$?\n        kill -0 $daemon_pid\n        if [ \"$?\" -eq \"0\" ]; then\n            wait $daemon_pid\n        fi\n        exit $code\n    else\n        grep -F \"SUMMARY: AddressSanitizer: stack-overflow\" $target.daemon.log\n        if [ \"$?\" -eq \"0\" ]; then\n            echo \"ASAN stack-overflow: ignore this program\"\n            exit 0\n        fi\n        echo \"$target: daemon is not ready\"\n        sleep 5\n    fi\ndone\n\necho \"$target: timeout\"\nkill -9 $daemon_pid\nexit 1\n"
  },
  {
    "path": "test/timeout.c",
    "content": "#include <string.h>\n#include <unistd.h>\n\nstatic void my_sleep() {\n    sleep(5);\n}\n\nint main(int argc, const char **argv) {\n    if (argc == 2 && !strcmp(argv[1], \"mdzz\")) {\n        void (*p)() = my_sleep;\n        (*p)();\n    }\n}\n"
  },
  {
    "path": "test/unintentional_crash.c",
    "content": "#define ASMSTR(S) \"\\t\" S \"\\n\"\n\nint main(int argc, char **argv) {\n    asm volatile(\n        ASMSTR(\".intel_syntax noprefix\")\n\n        ASMSTR(\"dec rdi;\")\n        ASMSTR(\"mov r15, rdi;\")\n        ASMSTR(\"test rdi, rdi;\")\n        ASMSTR(\"jne Y1;\")\n        ASMSTR(\"jmp B1;\")\n        ASMSTR(\".byte 0x2f\")\n        ASMSTR(\"jmp A1;\")\n        ASMSTR(\"jmp Z1;\")\n        ASMSTR(\"jmp A1;\")\n        ASMSTR(\"jmp Z1;\")\n        ASMSTR(\"jmp A1;\")\n        ASMSTR(\"jmp Z1;\")\n        ASMSTR(\"jmp A1;\")\n        ASMSTR(\"jmp Z1;\")\n        ASMSTR(\"jmp A1;\")\n        ASMSTR(\"jmp Z1;\")\n        ASMSTR(\"jmp A1;\")\n        ASMSTR(\"jmp Z1;\")\n        ASMSTR(\".global Y1\")\n        ASMSTR(\"Y1:\")\n        ASMSTR(\"xor rbx, rbx\")\n        ASMSTR(\"mov bx, word ptr [X1];\")\n        ASMSTR(\"sub rbx, 0xf1dc;\")\n        ASMSTR(\"push rbx;\")\n        ASMSTR(\"lea r8, [rip + A1];\")\n        ASMSTR(\".global A1\")\n        ASMSTR(\"A1:\")\n        ASMSTR(\"pop r9;\")\n        ASMSTR(\"add r8, r9;\")\n        ASMSTR(\"call r8;\")\n        ASMSTR(\".global Z1\")\n        ASMSTR(\"Z1:\")\n        ASMSTR(\"call A1;\")\n        ASMSTR(\".global X1\")\n        ASMSTR(\"X1:\")\n        ASMSTR(\"jmp A1;\")\n        ASMSTR(\".global B1\")\n        ASMSTR(\"B1:\")\n\n\n\n        ASMSTR(\"test r15, r15;\")\n        ASMSTR(\"jne Y2;\")\n        ASMSTR(\"jmp B2;\")\n        ASMSTR(\"jmp A2;\")\n        ASMSTR(\"jmp Z2;\")\n        ASMSTR(\"jmp A2;\")\n        ASMSTR(\"jmp Z2;\")\n        ASMSTR(\"jmp A2;\")\n        ASMSTR(\"jmp Z2;\")\n        ASMSTR(\"jmp A2;\")\n        ASMSTR(\"jmp Z2;\")\n        ASMSTR(\"jmp A2;\")\n        ASMSTR(\"jmp Z2;\")\n        ASMSTR(\"jmp A2;\")\n        ASMSTR(\"jmp Z2;\")\n        ASMSTR(\".global Y2\")\n        ASMSTR(\"Y2:\")\n        ASMSTR(\"xor rbx, rbx\")\n        ASMSTR(\"mov bx, word ptr [X2];\")\n        ASMSTR(\"sub rbx, 0xf1dc;\")\n        ASMSTR(\"push rbx;\")\n        ASMSTR(\"lea r8, [rip + A2];\")\n        ASMSTR(\".global A2\")\n        ASMSTR(\"A2:\")\n        ASMSTR(\"pop r9;\")\n        ASMSTR(\"add r8, r9;\")\n        ASMSTR(\"call r8;\")\n        ASMSTR(\".global Z2\")\n        ASMSTR(\"Z2:\")\n        ASMSTR(\"call A2;\")\n        ASMSTR(\".global X2\")\n        ASMSTR(\"X2:\")\n        ASMSTR(\"jmp A2;\")\n        ASMSTR(\".global B2\")\n        ASMSTR(\"B2:\")\n\n\n\n\n\n\n\n\n        ASMSTR(\"test r15, r15;\")\n        ASMSTR(\"jne Y3;\")\n        ASMSTR(\"jmp B3;\")\n        ASMSTR(\"jmp A3;\")\n        ASMSTR(\"jmp Z3;\")\n        ASMSTR(\"jmp A3;\")\n        ASMSTR(\"jmp Z3;\")\n        ASMSTR(\"jmp A3;\")\n        ASMSTR(\"jmp Z3;\")\n        ASMSTR(\"jmp A3;\")\n        ASMSTR(\"jmp Z3;\")\n        ASMSTR(\"jmp A3;\")\n        ASMSTR(\"jmp Z3;\")\n        ASMSTR(\"jmp A3;\")\n        ASMSTR(\"jmp Z3;\")\n        ASMSTR(\".global Y3\")\n        ASMSTR(\"Y3:\")\n        ASMSTR(\"xor rbx, rbx\")\n        ASMSTR(\"mov bx, word ptr [X3];\")\n        ASMSTR(\"sub rbx, 0xf1dc;\")\n        ASMSTR(\"push rbx;\")\n        ASMSTR(\"lea r8, [rip + A3];\")\n        ASMSTR(\".global A3\")\n        ASMSTR(\"A3:\")\n        ASMSTR(\"pop r9;\")\n        ASMSTR(\"add r8, r9;\")\n        ASMSTR(\"call r8;\")\n        ASMSTR(\".global Z3\")\n        ASMSTR(\"Z3:\")\n        ASMSTR(\"call A3;\")\n        ASMSTR(\".global X3\")\n        ASMSTR(\"X3:\")\n        ASMSTR(\"jmp A3;\")\n        ASMSTR(\".global B3\")\n        ASMSTR(\"B3:\")\n\n\n\n\n\n        ASMSTR(\"test r15, r15;\")\n        ASMSTR(\"jne Y4;\")\n        ASMSTR(\"jmp B4;\")\n        ASMSTR(\"jmp A4;\")\n        ASMSTR(\"jmp Z4;\")\n        ASMSTR(\"jmp A4;\")\n        ASMSTR(\"jmp Z4;\")\n        ASMSTR(\"jmp A4;\")\n        ASMSTR(\"jmp Z4;\")\n        ASMSTR(\"jmp A4;\")\n        ASMSTR(\"jmp Z4;\")\n        ASMSTR(\"jmp A4;\")\n        ASMSTR(\"jmp Z4;\")\n        ASMSTR(\"jmp A4;\")\n        ASMSTR(\"jmp Z4;\")\n        ASMSTR(\".global Y4\")\n        ASMSTR(\"Y4:\")\n        ASMSTR(\"xor rbx, rbx\")\n        ASMSTR(\"mov bx, word ptr [X4];\")\n        ASMSTR(\"sub rbx, 0xf1dc;\")\n        ASMSTR(\"push rbx;\")\n        ASMSTR(\"lea r8, [rip + A4];\")\n        ASMSTR(\".global A4\")\n        ASMSTR(\"A4:\")\n        ASMSTR(\"pop r9;\")\n        ASMSTR(\"add r8, r9;\")\n        ASMSTR(\"call r8;\")\n        ASMSTR(\".global Z4\")\n        ASMSTR(\"Z4:\")\n        ASMSTR(\"call A4;\")\n        ASMSTR(\".global X4\")\n        ASMSTR(\"X4:\")\n        ASMSTR(\"jmp A4;\")\n        ASMSTR(\".global B4\")\n        ASMSTR(\"B4:\")\n\n\n\n\n\n        ASMSTR(\"test r15, r15;\")\n        ASMSTR(\"jne Y5;\")\n        ASMSTR(\"jmp B5;\")\n        ASMSTR(\"jmp A5;\")\n        ASMSTR(\"jmp Z5;\")\n        ASMSTR(\"jmp A5;\")\n        ASMSTR(\"jmp Z5;\")\n        ASMSTR(\"jmp A5;\")\n        ASMSTR(\"jmp Z5;\")\n        ASMSTR(\"jmp A5;\")\n        ASMSTR(\"jmp Z5;\")\n        ASMSTR(\"jmp A5;\")\n        ASMSTR(\"jmp Z5;\")\n        ASMSTR(\"jmp A5;\")\n        ASMSTR(\"jmp Z5;\")\n        ASMSTR(\".global Y5\")\n        ASMSTR(\"Y5:\")\n        ASMSTR(\"xor rbx, rbx\")\n        ASMSTR(\"mov bx, word ptr [X5];\")\n        ASMSTR(\"sub rbx, 0xf1dc;\")\n        ASMSTR(\"push rbx;\")\n        ASMSTR(\"lea r8, [rip + A5];\")\n        ASMSTR(\".global A5\")\n        ASMSTR(\"A5:\")\n        ASMSTR(\"pop r9;\")\n        ASMSTR(\"add r8, r9;\")\n        ASMSTR(\"call r8;\")\n        ASMSTR(\".global Z5\")\n        ASMSTR(\"Z5:\")\n        ASMSTR(\"call A5;\")\n        ASMSTR(\".global X5\")\n        ASMSTR(\"X5:\")\n        ASMSTR(\"jmp A5;\")\n        ASMSTR(\".global B5\")\n        ASMSTR(\"B5:\")\n\n\n\n\n\n        ASMSTR(\"test r15, r15;\")\n        ASMSTR(\"jne Y6;\")\n        ASMSTR(\"jmp B6;\")\n        ASMSTR(\"jmp A6;\")\n        ASMSTR(\"jmp Z6;\")\n        ASMSTR(\"jmp A6;\")\n        ASMSTR(\"jmp Z6;\")\n        ASMSTR(\"jmp A6;\")\n        ASMSTR(\"jmp Z6;\")\n        ASMSTR(\"jmp A6;\")\n        ASMSTR(\"jmp Z6;\")\n        ASMSTR(\"jmp A6;\")\n        ASMSTR(\"jmp Z6;\")\n        ASMSTR(\"jmp A6;\")\n        ASMSTR(\"jmp Z6;\")\n        ASMSTR(\".global Y6\")\n        ASMSTR(\"Y6:\")\n        ASMSTR(\"xor rbx, rbx\")\n        ASMSTR(\"mov bx, word ptr [X6];\")\n        ASMSTR(\"sub rbx, 0xf1dc;\")\n        ASMSTR(\"push rbx;\")\n        ASMSTR(\"lea r8, [rip + A6];\")\n        ASMSTR(\".global A6\")\n        ASMSTR(\"A6:\")\n        ASMSTR(\"pop r9;\")\n        ASMSTR(\"add r8, r9;\")\n        ASMSTR(\"call r8;\")\n        ASMSTR(\".global Z6\")\n        ASMSTR(\"Z6:\")\n        ASMSTR(\"call A6;\")\n        ASMSTR(\".global X6\")\n        ASMSTR(\"X6:\")\n        ASMSTR(\"jmp A6;\")\n        ASMSTR(\".global B6\")\n        ASMSTR(\"B6:\")\n\n\n\n\n\n\n        ASMSTR(\"test r15, r15;\")\n        ASMSTR(\"jne Y7;\")\n        ASMSTR(\"jmp B7;\")\n        ASMSTR(\"jmp A7;\")\n        ASMSTR(\"jmp Z7;\")\n        ASMSTR(\"jmp A7;\")\n        ASMSTR(\"jmp Z7;\")\n        ASMSTR(\"jmp A7;\")\n        ASMSTR(\"jmp Z7;\")\n        ASMSTR(\"jmp A7;\")\n        ASMSTR(\"jmp Z7;\")\n        ASMSTR(\"jmp A7;\")\n        ASMSTR(\"jmp Z7;\")\n        ASMSTR(\"jmp A7;\")\n        ASMSTR(\"jmp Z7;\")\n        ASMSTR(\".global Y7\")\n        ASMSTR(\"Y7:\")\n        ASMSTR(\"xor rbx, rbx\")\n        ASMSTR(\"mov bx, word ptr [X7];\")\n        ASMSTR(\"sub rbx, 0xf1dc;\")\n        ASMSTR(\"push rbx;\")\n        ASMSTR(\"lea r8, [rip + A7];\")\n        ASMSTR(\".global A7\")\n        ASMSTR(\"A7:\")\n        ASMSTR(\"pop r9;\")\n        ASMSTR(\"add r8, r9;\")\n        ASMSTR(\"call r8;\")\n        ASMSTR(\".global Z7\")\n        ASMSTR(\"Z7:\")\n        ASMSTR(\"call A7;\")\n        ASMSTR(\".global X7\")\n        ASMSTR(\"X7:\")\n        ASMSTR(\"jmp A7;\")\n        ASMSTR(\".global B7\")\n        ASMSTR(\"B7:\")\n\n\n\n\n\n        ASMSTR(\"test r15, r15;\")\n        ASMSTR(\"jne Y8;\")\n        ASMSTR(\"jmp B8;\")\n        ASMSTR(\"jmp A8;\")\n        ASMSTR(\"jmp Z8;\")\n        ASMSTR(\"jmp A8;\")\n        ASMSTR(\"jmp Z8;\")\n        ASMSTR(\"jmp A8;\")\n        ASMSTR(\"jmp Z8;\")\n        ASMSTR(\"jmp A8;\")\n        ASMSTR(\"jmp Z8;\")\n        ASMSTR(\"jmp A8;\")\n        ASMSTR(\"jmp Z8;\")\n        ASMSTR(\"jmp A8;\")\n        ASMSTR(\"jmp Z8;\")\n        ASMSTR(\".global Y8\")\n        ASMSTR(\"Y8:\")\n        ASMSTR(\"xor rbx, rbx\")\n        ASMSTR(\"mov bx, word ptr [X8];\")\n        ASMSTR(\"sub rbx, 0xf1dc;\")\n        ASMSTR(\"push rbx;\")\n        ASMSTR(\"lea r8, [rip + A8];\")\n        ASMSTR(\".global A8\")\n        ASMSTR(\"A8:\")\n        ASMSTR(\"pop r9;\")\n        ASMSTR(\"add r8, r9;\")\n        ASMSTR(\"call r8;\")\n        ASMSTR(\".global Z8\")\n        ASMSTR(\"Z8:\")\n        ASMSTR(\"call A8;\")\n        ASMSTR(\".global X8\")\n        ASMSTR(\"X8:\")\n        ASMSTR(\"jmp A8;\")\n        ASMSTR(\".global B8\")\n        ASMSTR(\"B8:\")\n\n\n\n\n\n        ASMSTR(\"test r15, r15;\")\n        ASMSTR(\"jne Y9;\")\n        ASMSTR(\"jmp B9;\")\n        ASMSTR(\"jmp A9;\")\n        ASMSTR(\"jmp Z9;\")\n        ASMSTR(\"jmp A9;\")\n        ASMSTR(\"jmp Z9;\")\n        ASMSTR(\"jmp A9;\")\n        ASMSTR(\"jmp Z9;\")\n        ASMSTR(\"jmp A9;\")\n        ASMSTR(\"jmp Z9;\")\n        ASMSTR(\"jmp A9;\")\n        ASMSTR(\"jmp Z9;\")\n        ASMSTR(\"jmp A9;\")\n        ASMSTR(\"jmp Z9;\")\n        ASMSTR(\".global Y9\")\n        ASMSTR(\"Y9:\")\n        ASMSTR(\"xor rbx, rbx\")\n        ASMSTR(\"mov bx, word ptr [X9];\")\n        ASMSTR(\"sub rbx, 0xf1dc;\")\n        ASMSTR(\"push rbx;\")\n        ASMSTR(\"lea r8, [rip + A9];\")\n        ASMSTR(\".global A9\")\n        ASMSTR(\"A9:\")\n        ASMSTR(\"pop r9;\")\n        ASMSTR(\"add r8, r9;\")\n        ASMSTR(\"call r8;\")\n        ASMSTR(\".global Z9\")\n        ASMSTR(\"Z9:\")\n        ASMSTR(\"call A9;\")\n        ASMSTR(\".global X9\")\n        ASMSTR(\"X9:\")\n        ASMSTR(\"jmp A9;\")\n        ASMSTR(\".global B9\")\n        ASMSTR(\"B9:\")\n\n\n\n\n\n        ASMSTR(\"mov rax, 60;\")\n        ASMSTR(\"mov rdi, 0;\")\n        ASMSTR(\"syscall;\")\n\n\n        ASMSTR(\"ret;\")\n\n        ASMSTR(\".att_syntax;\")\n        );\n}\n"
  }
]