Repository: ZhangZhuoSJTU/StochFuzz Branch: master Commit: de10f62f3af9 Files: 196 Total size: 20.4 MB Directory structure: gitextract_uw9hr437/ ├── .github/ │ └── workflows/ │ ├── basic.yml │ ├── benchmark.yml │ ├── early_instrumentation.yml │ ├── force_linear.yml │ ├── no_generic_pic.yml │ └── pdisasm.yml ├── .gitignore ├── LICENSE ├── README.md ├── benchmark/ │ ├── boringssl-2016-02-12.inline │ ├── boringssl-2016-02-12.normal │ ├── boringssl-2016-02-12.seed │ ├── c-ares-CVE-2016-5180.inline │ ├── c-ares-CVE-2016-5180.normal │ ├── c-ares-CVE-2016-5180.seed │ ├── freetype2-2017.inline │ ├── freetype2-2017.normal │ ├── freetype2-2017.seed │ ├── guetzli-2017-3-30.inline │ ├── guetzli-2017-3-30.normal │ ├── guetzli-2017-3-30.seed │ ├── harfbuzz-1.3.2.inline │ ├── harfbuzz-1.3.2.normal │ ├── harfbuzz-1.3.2.seed │ ├── json-2017-02-12.inline │ ├── json-2017-02-12.normal │ ├── json-2017-02-12.seed │ ├── lcms-2017-03-21.inline │ ├── lcms-2017-03-21.normal │ ├── lcms-2017-03-21.seed │ ├── libarchive-2017-01-04.inline │ ├── libarchive-2017-01-04.normal │ ├── libarchive-2017-01-04.seed │ ├── libjpeg-turbo-07-2017.inline │ ├── libjpeg-turbo-07-2017.normal │ ├── libjpeg-turbo-07-2017.seed │ ├── libpng-1.2.56.inline │ ├── libpng-1.2.56.normal │ ├── libpng-1.2.56.seed │ ├── libssh-2017-1272.inline │ ├── libssh-2017-1272.normal │ ├── libssh-2017-1272.seed │ ├── libxml2-v2.9.2.inline │ ├── libxml2-v2.9.2.normal │ ├── libxml2-v2.9.2.seed │ ├── llvm-libcxxabi-2017-01-27.inline │ ├── llvm-libcxxabi-2017-01-27.normal │ ├── llvm-libcxxabi-2017-01-27.seed │ ├── openssl-1.0.1f.inline │ ├── openssl-1.0.1f.normal │ ├── openssl-1.0.1f.seed │ ├── openssl-1.0.2d.inline │ ├── openssl-1.0.2d.normal │ ├── openssl-1.0.2d.seed │ ├── openssl-1.1.0c.inline │ ├── openssl-1.1.0c.normal │ ├── openssl-1.1.0c.seed │ ├── openthread-2018-02-27.inline │ ├── openthread-2018-02-27.normal │ ├── openthread-2018-02-27.seed │ ├── pcre2-10.00.inline │ ├── pcre2-10.00.normal │ ├── pcre2-10.00.seed │ ├── proj4-2017-08-14.inline │ ├── proj4-2017-08-14.normal │ ├── proj4-2017-08-14.seed │ ├── re2-2014-12-09.inline │ ├── re2-2014-12-09.normal │ ├── re2-2014-12-09.seed │ ├── runtime/ │ │ ├── server.key │ │ └── server.pem │ ├── sqlite-2016-11-14.inline │ ├── sqlite-2016-11-14.normal │ ├── sqlite-2016-11-14.seed │ ├── vorbis-2017-12-11.inline │ ├── vorbis-2017-12-11.normal │ ├── vorbis-2017-12-11.seed │ ├── woff2-2016-05-06.inline │ ├── woff2-2016-05-06.normal │ ├── woff2-2016-05-06.seed │ ├── wpantund-2018-02-27.inline │ ├── wpantund-2018-02-27.normal │ └── wpantund-2018-02-27.seed ├── build.sh ├── clean.sh ├── docs/ │ ├── system.md │ ├── tips.md │ ├── todo.md │ └── trouble.md ├── scripts/ │ └── stochfuzz_env.sh ├── src/ │ ├── .clang-format │ ├── Makefile │ ├── address_dictionary.h │ ├── afl_config.h │ ├── asm_syscall.c │ ├── asm_utils.c │ ├── binary.c │ ├── binary.h │ ├── buffer.c │ ├── buffer.h │ ├── capstone_.c │ ├── capstone_.h │ ├── config.h │ ├── core.c │ ├── core.h │ ├── crs_config.h │ ├── diagnoser.c │ ├── diagnoser.h │ ├── disassembler.c │ ├── disassembler.h │ ├── elf_.c │ ├── elf_.h │ ├── fork_server.c │ ├── fork_server.h │ ├── frontend.c │ ├── get_signal_stack_size.sh │ ├── interval_splay.c │ ├── interval_splay.h │ ├── iterator.h │ ├── library_functions/ │ │ ├── generate.py │ │ ├── lib.csv │ │ ├── library_functions.c │ │ └── library_functions.h │ ├── libstochfuzz.h │ ├── libstochfuzzRT.c │ ├── loader.c │ ├── loader.h │ ├── mem_file.c │ ├── mem_file.h │ ├── patcher.c │ ├── patcher.h │ ├── prob_disasm/ │ │ ├── prob_disasm_complete/ │ │ │ ├── dag.c │ │ │ ├── hints.c │ │ │ ├── propagation.c │ │ │ └── solving.c │ │ ├── prob_disasm_complete.c │ │ └── prob_disasm_simple.c │ ├── restricted_ptr.c │ ├── restricted_ptr.h │ ├── rewriter.c │ ├── rewriter.h │ ├── rewriter_handlers/ │ │ ├── generate.py │ │ ├── handler_call.c │ │ ├── handler_cjmp.c │ │ ├── handler_jmp.c │ │ ├── handler_loop.c │ │ ├── handler_main.in │ │ └── handler_ret.c │ ├── sys_optarg.c │ ├── sys_optarg.h │ ├── tp_dispatcher.c │ ├── tp_dispatcher.h │ ├── trampolines/ │ │ ├── Makefile │ │ ├── bitmap.c │ │ ├── context_restore.c │ │ ├── context_save.c │ │ └── trampolines.h │ ├── ucfg_analyzer.c │ ├── ucfg_analyzer.h │ ├── utils.c │ ├── utils.h │ └── x64_utils.c └── test/ ├── .crashpoint.z3 ├── bzip2.no.pie ├── bzip2.pie ├── check_avx512 ├── check_avx512.c ├── crash ├── crash.c ├── ex.smt2 ├── hello ├── json-2017-02-12.normal ├── json.seed ├── leak-268f0e85f4bc45cbaf4d257222b830eac18977f3 ├── libjpeg.asan ├── libpng-1.2.56 ├── no_main ├── no_main.c ├── openssl-1.0.1f ├── openssl-1.0.1f.code.segments ├── pngfix.pie ├── rar ├── readelf.pie ├── runtime/ │ ├── server.key │ └── server.pem ├── small_exec.elf ├── tcpdump.pie ├── test.c.bz2 ├── test_daemon.sh ├── test_daemon_ignore_asan_sof.sh ├── timeout ├── timeout.c ├── unintentional_crash ├── unintentional_crash.c ├── vrrp.pcap └── z3 ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/workflows/basic.yml ================================================ name: basic on: push: branches: [ master ] pull_request: branches: [ master ] schedule: - cron: 0 14 * * 1 workflow_dispatch: jobs: build: runs-on: ubuntu-22.04 steps: - uses: actions/checkout@v2 - uses: actions/cache@v2 id: cache with: path: | capstone/ keystone/ glib/ libunwind/ key: ${{ runner.os }}-${{ hashFiles('build.sh') }} - name: set up python 3.x if: steps.cache.outputs.cache-hit != 'true' uses: actions/setup-python@v2 with: python-version: '3.x' architecture: 'x64' - name: install dependencies if: steps.cache.outputs.cache-hit != 'true' run: | python -m pip install --upgrade pip meson ninja - name: build if: steps.cache.outputs.cache-hit != 'true' run: | ./build.sh debug: runs-on: ubuntu-22.04 needs: [build] steps: - uses: actions/checkout@v2 - uses: actions/cache@v2 with: path: | capstone/ keystone/ glib/ libunwind/ key: ${{ runner.os }}-${{ hashFiles('build.sh') }} - name: make format run: make format working-directory: ./src - name: make debug run: | clang --version make clean make debug working-directory: ./src - name: make test run: timeout --signal=KILL 30m make test working-directory: ./src release: runs-on: ubuntu-22.04 needs: [build] steps: - uses: actions/checkout@v2 - uses: actions/cache@v2 with: path: | capstone/ keystone/ glib/ libunwind/ key: ${{ runner.os }}-${{ hashFiles('build.sh') }} - name: make format run: make format working-directory: ./src - name: make release run: | clang --version make clean make release working-directory: ./src - name: make test run: timeout --signal=KILL 30m make test working-directory: ./src ================================================ FILE: .github/workflows/benchmark.yml ================================================ name: benchmark on: push: branches: - "!*" tags: - "v*" schedule: - cron: 0 14 * * 1 workflow_dispatch: jobs: build: runs-on: ubuntu-22.04 steps: - uses: actions/checkout@v2 - uses: actions/cache@v2 id: cache with: path: | capstone/ keystone/ glib/ libunwind/ key: ${{ runner.os }}-${{ hashFiles('build.sh') }} - name: set up python 3.x if: steps.cache.outputs.cache-hit != 'true' uses: actions/setup-python@v2 with: python-version: '3.x' architecture: 'x64' - name: install dependencies if: steps.cache.outputs.cache-hit != 'true' run: | python -m pip install --upgrade pip meson ninja - name: build if: steps.cache.outputs.cache-hit != 'true' run: | ./build.sh normal: runs-on: ubuntu-22.04 needs: [build] steps: - uses: actions/checkout@v2 - uses: actions/cache@v2 with: path: | capstone/ keystone/ glib/ libunwind/ key: ${{ runner.os }}-${{ hashFiles('build.sh') }} - name: make format run: make format working-directory: ./src - name: make release run: | clang --version make clean make release working-directory: ./src - name: make benchmark run: timeout --signal=KILL 35m make benchmark working-directory: ./src early_instrumentation: runs-on: ubuntu-22.04 needs: [build] steps: - uses: actions/checkout@v2 - uses: actions/cache@v2 with: path: | capstone/ keystone/ glib/ libunwind/ key: ${{ runner.os }}-${{ hashFiles('build.sh') }} - name: make format run: make format working-directory: ./src - name: make release run: | clang --version make clean make release working-directory: ./src - name: make benchmark run: timeout --signal=KILL 35m make benchmark TEST_OPTIONS="-e" working-directory: ./src ================================================ FILE: .github/workflows/early_instrumentation.yml ================================================ name: early_instrumentation on: push: branches: [ master ] pull_request: branches: [ master ] schedule: - cron: 0 14 * * 1 workflow_dispatch: jobs: build: runs-on: ubuntu-22.04 steps: - uses: actions/checkout@v2 - uses: actions/cache@v2 id: cache with: path: | capstone/ keystone/ glib/ libunwind/ key: ${{ runner.os }}-${{ hashFiles('build.sh') }} - name: set up python 3.x if: steps.cache.outputs.cache-hit != 'true' uses: actions/setup-python@v2 with: python-version: '3.x' architecture: 'x64' - name: install dependencies if: steps.cache.outputs.cache-hit != 'true' run: | python -m pip install --upgrade pip meson ninja - name: build if: steps.cache.outputs.cache-hit != 'true' run: | ./build.sh debug: runs-on: ubuntu-22.04 needs: [build] steps: - uses: actions/checkout@v2 - uses: actions/cache@v2 with: path: | capstone/ keystone/ glib/ libunwind/ key: ${{ runner.os }}-${{ hashFiles('build.sh') }} - name: make format run: make format working-directory: ./src - name: make debug run: | clang --version make clean make debug working-directory: ./src - name: make test run: timeout --signal=KILL 30m make test TEST_OPTIONS='-e' working-directory: ./src release: runs-on: ubuntu-22.04 needs: [build] steps: - uses: actions/checkout@v2 - uses: actions/cache@v2 with: path: | capstone/ keystone/ glib/ libunwind/ key: ${{ runner.os }}-${{ hashFiles('build.sh') }} - name: make format run: make format working-directory: ./src - name: make release run: | clang --version make clean make release working-directory: ./src - name: make test run: timeout --signal=KILL 30m make test TEST_OPTIONS='-e' working-directory: ./src ================================================ FILE: .github/workflows/force_linear.yml ================================================ name: force_linear on: push: branches: [ master ] pull_request: branches: [ master ] schedule: - cron: 0 14 * * 1 workflow_dispatch: jobs: build: runs-on: ubuntu-22.04 steps: - uses: actions/checkout@v2 - uses: actions/cache@v2 id: cache with: path: | capstone/ keystone/ glib/ libunwind/ key: ${{ runner.os }}-${{ hashFiles('build.sh') }} - name: set up python 3.x if: steps.cache.outputs.cache-hit != 'true' uses: actions/setup-python@v2 with: python-version: '3.x' architecture: 'x64' - name: install dependencies if: steps.cache.outputs.cache-hit != 'true' run: | python -m pip install --upgrade pip meson ninja - name: build if: steps.cache.outputs.cache-hit != 'true' run: | ./build.sh debug: runs-on: ubuntu-22.04 needs: [build] steps: - uses: actions/checkout@v2 - uses: actions/cache@v2 with: path: | capstone/ keystone/ glib/ libunwind/ key: ${{ runner.os }}-${{ hashFiles('build.sh') }} - name: make format run: make format working-directory: ./src - name: make debug run: | clang --version make clean make debug working-directory: ./src - name: make test run: timeout --signal=KILL 30m make test TEST_OPTIONS='-n' working-directory: ./src release: runs-on: ubuntu-22.04 needs: [build] steps: - uses: actions/checkout@v2 - uses: actions/cache@v2 with: path: | capstone/ keystone/ glib/ libunwind/ key: ${{ runner.os }}-${{ hashFiles('build.sh') }} - name: make format run: make format working-directory: ./src - name: make release run: | clang --version make clean make release working-directory: ./src - name: make test run: timeout --signal=KILL 30m make test TEST_OPTIONS='-n' working-directory: ./src ================================================ FILE: .github/workflows/no_generic_pic.yml ================================================ name: no_generic_pic on: push: branches: [ master ] pull_request: branches: [ master ] schedule: - cron: 0 14 * * 1 workflow_dispatch: jobs: build: runs-on: ubuntu-22.04 steps: - uses: actions/checkout@v2 - uses: actions/cache@v2 id: cache with: path: | capstone/ keystone/ glib/ libunwind/ key: ${{ runner.os }}-${{ hashFiles('build.sh') }} - name: set up python 3.x if: steps.cache.outputs.cache-hit != 'true' uses: actions/setup-python@v2 with: python-version: '3.x' architecture: 'x64' - name: install dependencies if: steps.cache.outputs.cache-hit != 'true' run: | python -m pip install --upgrade pip meson ninja - name: build if: steps.cache.outputs.cache-hit != 'true' run: | ./build.sh debug: runs-on: ubuntu-22.04 needs: [build] steps: - uses: actions/checkout@v2 - uses: actions/cache@v2 with: path: | capstone/ keystone/ glib/ libunwind/ key: ${{ runner.os }}-${{ hashFiles('build.sh') }} - name: make format run: make format working-directory: ./src - name: make debug run: | clang --version make clean make debug working-directory: ./src - name: make test run: timeout --signal=KILL 30m make test TEST_OPTIONS='-r' working-directory: ./src release: runs-on: ubuntu-22.04 needs: [build] steps: - uses: actions/checkout@v2 - uses: actions/cache@v2 with: path: | capstone/ keystone/ glib/ libunwind/ key: ${{ runner.os }}-${{ hashFiles('build.sh') }} - name: make format run: make format working-directory: ./src - name: make release run: | clang --version make clean make release working-directory: ./src - name: make test run: timeout --signal=KILL 30m make test TEST_OPTIONS='-r' working-directory: ./src ================================================ FILE: .github/workflows/pdisasm.yml ================================================ name: pdisasm on: push: branches: [ master ] pull_request: branches: [ master ] schedule: - cron: 0 14 * * 1 workflow_dispatch: jobs: build: runs-on: ubuntu-22.04 steps: - uses: actions/checkout@v2 - uses: actions/cache@v2 id: cache with: path: | capstone/ keystone/ glib/ libunwind/ key: ${{ runner.os }}-${{ hashFiles('build.sh') }} - name: set up python 3.x if: steps.cache.outputs.cache-hit != 'true' uses: actions/setup-python@v2 with: python-version: '3.x' architecture: 'x64' - name: install dependencies if: steps.cache.outputs.cache-hit != 'true' run: | python -m pip install --upgrade pip meson ninja - name: build if: steps.cache.outputs.cache-hit != 'true' run: | ./build.sh release: runs-on: ubuntu-22.04 needs: [build] steps: - uses: actions/checkout@v2 - uses: actions/cache@v2 with: path: | capstone/ keystone/ glib/ libunwind/ key: ${{ runner.os }}-${{ hashFiles('build.sh') }} - name: make format run: make format working-directory: ./src - name: make release run: | clang --version make clean make release working-directory: ./src - name: make test run: timeout --signal=KILL 30m make test TEST_OPTIONS='-f' working-directory: ./src ================================================ FILE: .gitignore ================================================ # Prerequisites *.d # Object files *.o *.ko *.obj *.elf # Linker output *.ilk *.map *.exp # Precompiled Headers *.gch *.pch # Libraries *.lib *.a *.la *.lo # Shared objects (inc. Windows DLLs) *.dll *.so *.so.* *.dylib # Executables *.exe *.out *.app *.i*86 *.x86_64 *.hex # Debug files *.dSYM/ *.su *.idb *.pdb # Kernel Module Compile Results *.mod* *.cmd .tmp_versions/ modules.order Module.symvers Mkfile.old dkms.conf # Customize files workshop/ capstone/ keystone/ glib/ libunwind/ AFL/ *.zip *.swp *.tmp *.bin *_bin.c *.tp *_tp.c stoch-fuzz *.gdb_history peda* handler_main.c .stoch-fuzz.* local_crashes library_functions_load.c # whitelist test src/test/ test/* !test/test_daemon.sh !test/test_daemon_ignore_asan_sof.sh !test/bzip2.no.pie !test/test.c.bz2 !test/libpng-1.2.56 !test/seed.png !test/crash !test/crash.c !test/openssl-1.0.1f !test/openssl-1.0.1f.code.segments !test/leak-268f0e85f4bc45cbaf4d257222b830eac18977f3 !test/runtime !test/check_avx512.c !test/check_avx512 !test/z3 !test/ex.smt2 !test/.crashpoint.z3 !test/hello !test/test.rar !test/rar !test/timeout !test/timeout.c !test/unintentional_crash !test/unintentional_crash.c !test/no_main !test/no_main.c !test/json-2017-02-12.normal !test/json.seed !test/readelf.pie !test/small_exec.elf !test/bzip2.pie !test/pngfix.pie !test/toucan.png !test/tcpdump.pie !test/vrrp.pcap !test/libjpeg.asan !test/seed.jpg ================================================ FILE: LICENSE ================================================ GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007 Copyright (C) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The GNU General Public License is a free, copyleft license for software and other kinds of works. The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. We, the Free Software Foundation, use the GNU General Public License for most of our software; it applies also to any other work released this way by its authors. You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things. To protect your rights, we need to prevent others from denying you these rights or asking you to surrender the rights. Therefore, you have certain responsibilities if you distribute copies of the software, or if you modify it: responsibilities to respect the freedom of others. For example, if you distribute copies of such a program, whether gratis or for a fee, you must pass on to the recipients the same freedoms that you received. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. Developers that use the GNU GPL protect your rights with two steps: (1) assert copyright on the software, and (2) offer you this License giving you legal permission to copy, distribute and/or modify it. For the developers' and authors' protection, the GPL clearly explains that there is no warranty for this free software. For both users' and authors' sake, the GPL requires that modified versions be marked as changed, so that their problems will not be attributed erroneously to authors of previous versions. Some devices are designed to deny users access to install or run modified versions of the software inside them, although the manufacturer can do so. This is fundamentally incompatible with the aim of protecting users' freedom to change the software. The systematic pattern of such abuse occurs in the area of products for individuals to use, which is precisely where it is most unacceptable. Therefore, we have designed this version of the GPL to prohibit the practice for those products. If such problems arise substantially in other domains, we stand ready to extend this provision to those domains in future versions of the GPL, as needed to protect the freedom of users. Finally, every program is threatened constantly by software patents. States should not allow patents to restrict development and use of software on general-purpose computers, but in those that do, we wish to avoid the special danger that patents applied to a free program could make it effectively proprietary. To prevent this, the GPL assures that patents cannot be used to render the program non-free. The precise terms and conditions for copying, distribution and modification follow. TERMS AND CONDITIONS 0. Definitions. "This License" refers to version 3 of the GNU General Public License. "Copyright" also means copyright-like laws that apply to other kinds of works, such as semiconductor masks. "The Program" refers to any copyrightable work licensed under this License. Each licensee is addressed as "you". "Licensees" and "recipients" may be individuals or organizations. To "modify" a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a "modified version" of the earlier work or a work "based on" the earlier work. A "covered work" means either the unmodified Program or a work based on the Program. To "propagate" a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well. To "convey" a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying. An interactive user interface displays "Appropriate Legal Notices" to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion. 1. Source Code. The "source code" for a work means the preferred form of the work for making modifications to it. "Object code" means any non-source form of a work. A "Standard Interface" means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language. The "System Libraries" of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A "Major Component", in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it. The "Corresponding Source" for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work's System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those subprograms and other parts of the work. The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source. The Corresponding Source for a work in source code form is that same work. 2. Basic Permissions. All rights granted under this License are granted for the term of copyright on the Program, and are irrevocable provided the stated conditions are met. This License explicitly affirms your unlimited permission to run the unmodified Program. The output from running a covered work is covered by this License only if the output, given its content, constitutes a covered work. This License acknowledges your rights of fair use or other equivalent, as provided by copyright law. You may make, run and propagate covered works that you do not convey, without conditions so long as your license otherwise remains in force. You may convey covered works to others for the sole purpose of having them make modifications exclusively for you, or provide you with facilities for running those works, provided that you comply with the terms of this License in conveying all material for which you do not control copyright. Those thus making or running the covered works for you must do so exclusively on your behalf, under your direction and control, on terms that prohibit them from making any copies of your copyrighted material outside their relationship with you. Conveying under any other circumstances is permitted solely under the conditions stated below. Sublicensing is not allowed; section 10 makes it unnecessary. 3. Protecting Users' Legal Rights From Anti-Circumvention Law. No covered work shall be deemed part of an effective technological measure under any applicable law fulfilling obligations under article 11 of the WIPO copyright treaty adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention of such measures. When you convey a covered work, you waive any legal power to forbid circumvention of technological measures to the extent such circumvention is effected by exercising rights under this License with respect to the covered work, and you disclaim any intention to limit operation or modification of the work as a means of enforcing, against the work's users, your or third parties' legal rights to forbid circumvention of technological measures. 4. Conveying Verbatim Copies. You may convey verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice; keep intact all notices stating that this License and any non-permissive terms added in accord with section 7 apply to the code; keep intact all notices of the absence of any warranty; and give all recipients a copy of this License along with the Program. You may charge any price or no price for each copy that you convey, and you may offer support or warranty protection for a fee. 5. Conveying Modified Source Versions. You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions: a) The work must carry prominent notices stating that you modified it, and giving a relevant date. b) The work must carry prominent notices stating that it is released under this License and any conditions added under section 7. This requirement modifies the requirement in section 4 to "keep intact all notices". c) You must license the entire work, as a whole, under this License to anyone who comes into possession of a copy. This License will therefore apply, along with any applicable section 7 additional terms, to the whole of the work, and all its parts, regardless of how they are packaged. This License gives no permission to license the work in any other way, but it does not invalidate such permission if you have separately received it. d) If the work has interactive user interfaces, each must display Appropriate Legal Notices; however, if the Program has interactive interfaces that do not display Appropriate Legal Notices, your work need not make them do so. A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an "aggregate" if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation's users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate. 6. Conveying Non-Source Forms. You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways: a) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by the Corresponding Source fixed on a durable physical medium customarily used for software interchange. b) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by a written offer, valid for at least three years and valid for as long as you offer spare parts or customer support for that product model, to give anyone who possesses the object code either (1) a copy of the Corresponding Source for all the software in the product that is covered by this License, on a durable physical medium customarily used for software interchange, for a price no more than your reasonable cost of physically performing this conveying of source, or (2) access to copy the Corresponding Source from a network server at no charge. c) Convey individual copies of the object code with a copy of the written offer to provide the Corresponding Source. This alternative is allowed only occasionally and noncommercially, and only if you received the object code with such an offer, in accord with subsection 6b. d) Convey the object code by offering access from a designated place (gratis or for a charge), and offer equivalent access to the Corresponding Source in the same way through the same place at no further charge. You need not require recipients to copy the Corresponding Source along with the object code. If the place to copy the object code is a network server, the Corresponding Source may be on a different server (operated by you or a third party) that supports equivalent copying facilities, provided you maintain clear directions next to the object code saying where to find the Corresponding Source. Regardless of what server hosts the Corresponding Source, you remain obligated to ensure that it is available for as long as needed to satisfy these requirements. e) Convey the object code using peer-to-peer transmission, provided you inform other peers where the object code and Corresponding Source of the work are being offered to the general public at no charge under subsection 6d. A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work. A "User Product" is either (1) a "consumer product", which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, "normally used" refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product. "Installation Information" for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made. If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information. But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM). The requirement to provide Installation Information does not include a requirement to continue to provide support service, warranty, or updates for a work that has been modified or installed by the recipient, or for the User Product in which it has been modified or installed. Access to a network may be denied when the modification itself materially and adversely affects the operation of the network or violates the rules and protocols for communication across the network. Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying. 7. Additional Terms. "Additional permissions" are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions. When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it. (Additional permissions may be written to require their own removal in certain cases when you modify the work.) You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission. Notwithstanding any other provision of this License, for material you add to a covered work, you may (if authorized by the copyright holders of that material) supplement the terms of this License with terms: a) Disclaiming warranty or limiting liability differently from the terms of sections 15 and 16 of this License; or b) Requiring preservation of specified reasonable legal notices or author attributions in that material or in the Appropriate Legal Notices displayed by works containing it; or c) Prohibiting misrepresentation of the origin of that material, or requiring that modified versions of such material be marked in reasonable ways as different from the original version; or d) Limiting the use for publicity purposes of names of licensors or authors of the material; or e) Declining to grant rights under trademark law for use of some trade names, trademarks, or service marks; or f) Requiring indemnification of licensors and authors of that material by anyone who conveys the material (or modified versions of it) with contractual assumptions of liability to the recipient, for any liability that these contractual assumptions directly impose on those licensors and authors. All other non-permissive additional terms are considered "further restrictions" within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying. If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms. Additional terms, permissive or non-permissive, may be stated in the form of a separately written license, or stated as exceptions; the above requirements apply either way. 8. Termination. You may not propagate or modify a covered work except as expressly provided under this License. Any attempt otherwise to propagate or modify it is void, and will automatically terminate your rights under this License (including any patent licenses granted under the third paragraph of section 11). However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation. Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice. Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, you do not qualify to receive new licenses for the same material under section 10. 9. Acceptance Not Required for Having Copies. You are not required to accept this License in order to receive or run a copy of the Program. Ancillary propagation of a covered work occurring solely as a consequence of using peer-to-peer transmission to receive a copy likewise does not require acceptance. However, nothing other than this License grants you permission to propagate or modify any covered work. These actions infringe copyright if you do not accept this License. Therefore, by modifying or propagating a covered work, you indicate your acceptance of this License to do so. 10. Automatic Licensing of Downstream Recipients. Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License. You are not responsible for enforcing compliance by third parties with this License. An "entity transaction" is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party's predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts. You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License. For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it. 11. Patents. A "contributor" is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor's "contributor version". A contributor's "essential patent claims" are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, "control" includes the right to grant patent sublicenses in a manner consistent with the requirements of this License. Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor's essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version. In the following three paragraphs, a "patent license" is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To "grant" such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party. If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent license to downstream recipients. "Knowingly relying" means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient's use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid. If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it. A patent license is "discriminatory" if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007. Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law. 12. No Surrender of Others' Freedom. If conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot convey a covered work so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not convey it at all. For example, if you agree to terms that obligate you to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program. 13. Use with the GNU Affero General Public License. Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed under version 3 of the GNU Affero General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, but the special requirements of the GNU Affero General Public License, section 13, concerning interaction through a network will apply to the combination as such. 14. Revised Versions of this License. The Free Software Foundation may publish revised and/or new versions of the GNU General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU General Public License, you may choose any version ever published by the Free Software Foundation. If the Program specifies that a proxy can decide which future versions of the GNU General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program. Later license versions may give you additional or different permissions. However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version. 15. Disclaimer of Warranty. THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. Limitation of Liability. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 17. Interpretation of Sections 15 and 16. If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively state the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . Also add information on how to contact you by electronic and paper mail. If the program does terminal interaction, make it output a short notice like this when it starts in an interactive mode: Copyright (C) This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, your program's commands might be different; for a GUI interface, you would use an "about box". You should also get your employer (if you work as a programmer) or school, if any, to sign a "copyright disclaimer" for the program, if necessary. For more information on this, and how to apply and follow the GNU GPL, see . The GNU General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. But first, please read . ================================================ FILE: README.md ================================================ # StochFuzz: A New Solution for Binary-only Fuzzing Logo [![test](https://github.com/ZhangZhuoSJTU/StochFuzz/actions/workflows/basic.yml/badge.svg)](https://github.com/ZhangZhuoSJTU/StochFuzz/actions/workflows/basic.yml) [![benchmark](https://github.com/ZhangZhuoSJTU/StochFuzz/actions/workflows/benchmark.yml/badge.svg)](https://github.com/ZhangZhuoSJTU/StochFuzz/actions/workflows/benchmark.yml)

loading-ag-167 StochFuzz is a (probabilistically) sound and cost-effective fuzzing technique for stripped binaries. It is facilitated by a novel incremental and stochastic rewriting technique that is particularly suitable for binary-only fuzzing. Any AFL-based fuzzer, which takes edge coverage (defined by [AFL](https://github.com/google/AFL)) as runtime feedback, can acquire benefits from StochFuzz to directly fuzz stripped binaries.

More data and the results of the experiments can be found [here](https://github.com/ZhangZhuoSJTU/StochFuzz-data). Example cases of leveraging StochFuzz to improve advanced AFL-based fuzzers ([AFL++](https://github.com/AFLplusplus/AFLplusplus) and [Polyglot](https://github.com/s3team/Polyglot)) can be found in [system.md](docs/system.md#how-to-make-stochfuzz-compatible-with-other-afl-based-fuzzers). ## Clarifications + We adopt a new system design than the one from the paper. Details can be found at [system.md](docs/system.md). + In the paper, when we are talking about `e9patch`, we are actually talking about the binary-only fuzzing tool built upon e9patch, namely `e9tool`. Please refer to its [website](https://github.com/GJDuck/e9patch/blob/master/README.md#building) for more details. + StochFuzz provides sound rewriting for binaries without inlined data, and probabilistically sound rewriting for the rest. ## Building StochFuzz StochFuzz is built upon [Keystone](https://www.keystone-engine.org/), [Capstone](https://www.capstone-engine.org/), [GLib](https://developer.gnome.org/glib/), and [libunwind](https://www.nongnu.org/libunwind/). Most of these depenences require `meson >= 0.60.1`. Please use `meson --version` to check you are using the up-to-date version. [Build.sh](https://github.com/ZhangZhuoSJTU/StochFuzz/blob/master/build.sh) helps build all the dependences automatically. For clean containers, make sure some standard tools like `autoreconf` and `libtool` are installed. ```bash $ git clone https://github.com/ZhangZhuoSJTU/StochFuzz.git $ cd StochFuzz $ ./build.sh ``` StochFuzz itself can be built by GNU Make. ```bash $ cd src $ make release ``` We have tested StochFuzz on Ubuntu 18.04. If you have any issue when running StochFuzz on other systems, please kindly let us [know](https://github.com/ZhangZhuoSJTU/StochFuzz/issues/new). ## How to Use StochFuzz provides multiple rewriting options, which follows the AFL's style of passing arguments. ``` $ ./stoch-fuzz -h stoch-fuzz 1.0.0 by ./stoch-fuzz [ options ] -- target_binary [ ... ] Mode settings: -S - start a background daemon and wait for a fuzzer to attach (defualt mode) -R - dry run target_binary with given arguments without an attached fuzzer -P - patch target_binary without incremental rewriting -D - probabilistic disassembly without rewriting -V - show currently observed breakpoints Rewriting settings: -g - trace previous PC -c - count the number of basic blocks with conflicting hash values -d - disable instrumentation optimization -r - assume the return addresses are only used by RET instructions -e - install the fork server at the entrypoint instead of the main function -f - forcedly assume there is data interleaving with code -i - ignore the call-fallthrough edges to defense RET-misusing obfuscation Other stuff: -h - print this help -x execs - set the number of executions after which a checking run will be triggered set it as zero to disable checking runs (default: 200000) -t msec - set the timeout for each daemon-triggering execution set it as zero to ignore the timeout (default: 2000 ms) -l level - set the log level, including INFO, WARN, ERROR, and FATAL (default: INFO) ``` ### Basic Usage ```diff - It is worth first trying the advanced strategy (see below) because that is much more cost-effective. ``` To fuzz a stripped binary, namely `example.out`, we need to `cd` to the directory of the target binary. For example, if the full path of `example.out` is `/root/example.out`, we need to first `cd /root/`. Furthermore, _it is dangerous to run two StochFuzz instances under the same directory._ These restrictions are caused by some design faults and we will try to relax them in the future. Assuming StochFuzz is located at `/root/StochFuzz/src/stoch-fuzz`, execute the following command to start rewriting the target binary. ```bash $ cd /root/ $ /root/StochFuzz/src/stoch-fuzz -- example.out # do not use ./example.out here ``` After the initial rewriting, we will get a phantom file named `example.out.phantom`. This phantom file can be directly fuzzed by AFL or any AFL-based fuzzer. Note that the StochFuzz process would not stop during fuzzing, so please make sure the process is alive during fuzzing. Here is a demo that shows how StochFuzz works. [![asciicast](https://asciinema.org/a/415987.svg)](https://asciinema.org/a/415987) ### Advanced Usage Compared with the compiler-based instrumentation (e.g., afl-clang-fast), StochFuzz has additional runtime overhead because it needs to emulate each _CALL_ instruction to support stack unwinding. Inspired by a recent [work](https://dl.acm.org/doi/abs/10.1145/3445814.3446765), we provide an advanced rewriting strategy where we do not emulate _CALL_ instructions but wrap the `_ULx86_64_step` function from [libunwind](https://github.com/libunwind/libunwind) to support stack unwinding. This strategy works for most binaries but may fail in some cases like fuzzing statically linked binaries. To enable such strategy, simply provide a __-r__ option to StochFuzz. ```bash $ cd /root/ $ /root/StochFuzz/src/stoch-fuzz -r -- example.out # do not use ./example.out here ``` Addtionally, before fuzzing, we need to prepare the `AFL_PRELOAD` environment variable for AFL. ```bash $ export STOCHFUZZ_PRELOAD=$(/root/StochFuzz/scritps/stochfuzz_env.sh) $ AFL_PRELOAD=$STOCHFUZZ_PRELOAD afl-fuzz -i seeds -o output -t 2000 -- example.out.phantom @@ ``` Following demo shows how to apply this advanced strategy. [![asciicast](https://asciinema.org/a/416230.svg)](https://asciinema.org/a/416230) ## Troubleshootings Common issues can be referred to [trouble.md](docs/trouble.md). If it cannot help solve your problem, please kindly open a Github issue. Besides, we provide some tips on using StochFuzz, which can be found at [tips.md](docs/tips.md) ## Development Currently, we have many todo items. We present them in [todo.md](docs/todo.md#todo-list). We also present many pending decisions which we are hesitating to take, in [todo.md](docs/todo.md#challenges). __If you have any thought/suggestion, do not hesitate to let us know.__ It would be very appreciated if you can help us improve StochFuzz. StochFuzz should be considered an alpha-quality software and it is likely to contain bugs. I will try my best to maintain StochFuzz timely, but sometimes it may take me more time to respond. Thanks for your understanding in advance. ## Cite Zhang, Zhuo, et al. "STOCHFUZZ: Sound and Cost-effective Fuzzing of Stripped Binaries by Incremental and Stochastic Rewriting." 2021 IEEE Symposium on Security and Privacy (SP). IEEE, 2021. ## References + Duck, Gregory J., Xiang Gao, and Abhik Roychoudhury. "Binary rewriting without control flow recovery." Proceedings of the 41st ACM SIGPLAN Conference on Programming Language Design and Implementation. 2020. + Meng, Xiaozhu, and Weijie Liu. "Incremental CFG patching for binary rewriting." Proceedings of the 26th ACM International Conference on Architectural Support for Programming Languages and Operating Systems. 2021. + Aschermann, Cornelius, et al. "Ijon: Exploring deep state spaces via fuzzing." 2020 IEEE Symposium on Security and Privacy (SP). IEEE, 2020. + Google. “Google/AFL.” GitHub, github.com/google/AFL. ================================================ FILE: benchmark/json-2017-02-12.seed ================================================ 10000000010E5 ================================================ FILE: benchmark/llvm-libcxxabi-2017-01-27.seed ================================================ ZUlSaIJT_2_EET_E5 ================================================ FILE: benchmark/pcre2-10.00.seed ================================================ # This is a specialized test for checking, when PCRE2 is compiled with the # EBCDIC option but in an ASCII environment, that newline and white space # functionality is working. It catches cases where explicit values such as 0x0a # have been used instead of names like CHAR_LF. Needless to say, it is not a # genuine EBCDIC test! In patterns, alphabetic characters that follow a # backslash must be in EBCDIC code. In data, NL, NEL, LF, ESC, and DEL must be # in EBCDIC, but can of course be specified as escapes. # Test default newline and variations /^A/m ABC 12\x15ABC /^A/m,newline=any 12\x15ABC 12\x0dABC 12\x0d\x15ABC 12\x25ABC /^A/m,newline=anycrlf 12\x15ABC 12\x0dABC 12\x0d\x15ABC ** Fail 12\x25ABC # Test \h /^A\/ A B # Test \H /^A\/ AB ** Fail A B # Test \R /^A\/ A\x15B A\x0dB A\x25B A\x0bB A\x0cB ** Fail A B # Test \v /^A\/ A\x15B A\x0dB A\x25B A\x0bB A\x0cB ** Fail A B # Test \V /^A\/ A B ** Fail A\x15B A\x0dB A\x25B A\x0bB A\x0cB # For repeated items, use an atomic group so that the output is the same # for DFA matching (otherwise it may show multiple matches). # Test \h+ /^A(?>\+)/ A B # Test \H+ /^A(?>\+)/ AB ** Fail A B # Test \R+ /^A(?>\+)/ A\x15B A\x0dB A\x25B A\x0bB A\x0cB ** Fail A B # Test \v+ /^A(?>\+)/ A\x15B A\x0dB A\x25B A\x0bB A\x0cB ** Fail A B # Test \V+ /^A(?>\+)/ A B ** Fail A\x15B A\x0dB A\x25B A\x0bB A\x0cB # End ================================================ FILE: benchmark/runtime/server.key ================================================ -----BEGIN PRIVATE KEY----- MIIBVAIBADANBgkqhkiG9w0BAQEFAASCAT4wggE6AgEAAkEA1AdZNDVOA9cXm97f erp1bukz2kohjToJS6Ma8fOb36VV9lQGmDNsJanXFiqafOgV+kh1HXqZ3l1I0JmZ 71b+QQIDAQABAkAHGfPn5r0lLcgRpWZQwvv56f+dmQwEoeP7z4uwfNtEo0JcRD66 1WRCvx3LE0VbNeaEdNmSPiRXhlwIggjfrBi9AiEA9UusPBcEp/QcPGs96nQQdQzE fw4x0HL/eSV3qHimT6MCIQDdSAiX4Ouxoiwn/9KhDMcZXRYX/OPzj6w8u1YIH7BI ywIgSozbJdAhHCJ2ym4VfUIVFl3xAmSAA0hQGLOocE1qzl0CIQDRicOxZmhqBiKA IgznOn1StEYWov+MhRFZVSBLgw5gbwIgJzOlSlu0Y22hEUsLCKyHBrCAZZHcZ020 20pfogmQYn0= -----END PRIVATE KEY----- ================================================ FILE: benchmark/runtime/server.pem ================================================ -----BEGIN CERTIFICATE----- MIIBYTCCAQugAwIBAgIJAMPQQtUHkx+KMA0GCSqGSIb3DQEBCwUAMAwxCjAIBgNV BAMMAWEwHhcNMTYwOTI0MjIyMDUyWhcNNDQwMjA5MjIyMDUyWjAMMQowCAYDVQQD DAFhMFwwDQYJKoZIhvcNAQEBBQADSwAwSAJBANQHWTQ1TgPXF5ve33q6dW7pM9pK IY06CUujGvHzm9+lVfZUBpgzbCWp1xYqmnzoFfpIdR16md5dSNCZme9W/kECAwEA AaNQME4wHQYDVR0OBBYEFCXtEo9rkLuKGSlm0mFE4Yk/HDJVMB8GA1UdIwQYMBaA FCXtEo9rkLuKGSlm0mFE4Yk/HDJVMAwGA1UdEwQFMAMBAf8wDQYJKoZIhvcNAQEL BQADQQCnldOnbdNJZxBO/J+979Urg8qDp8MnlN0979AmK1P5/YzPnAF4BU7QTOTE imS5qZ0MvziBa81nVlnnFRkIezcD -----END CERTIFICATE----- ================================================ FILE: benchmark/sqlite-2016-11-14.seed ================================================ SELECT 888<8888888 | daDROPme(1,1 ,11,1 ,1)| 388<8888888 | datetime(1,1 ,11,1 ,1)| 388<8888888 | datetime(1,1 ,1)|8 | datetime(1,1 ,1)| ( SELECT 887<7777777777777888888<88,1 ,1)|8 | datetime(1,1 ,1)| ( SELECT 887<77774777777777888888<8888888 ,11,1 ,1)| 194<8888888 | dattime(1,1 ,1)|8 | datetime(88 ,1)|8 | datetime(11,1 ,1)| ( SELECT 8878888 | datetime(1,1 ,1)|8 | datetime(1,1 ,1)| 388<8888888 | datetime(1,1 ,1,1 ,1)|8 | ANALYZEe(1,1 ,1)| ( SELECT 887<$$$$$$$02e777777 || 888<(7777777777777<888888 ,1)|8 | datetime(1,1 ,1)| ( SELECT 887<777377777 | datetime(1,1 ,1,1 ,1)|8 | datetime| 388<8888888 | datetime(1,1 ,1)|8 | datetime(1,1 ,1)| ( SELECT 887<7777777777777888888<88,1 ,1)|8 | datetime(1,1 ,1)| ( SELECT 887<77774777777777888888<8888888 ,11,1 ,1)| 388<8888888 | datetime(1,1 ,1)|8 | datetime(88 ,1)|8 | datetime(1,1 ,1)| ( SELECT 8878888 | datetime(1,1 ,1)|8 | datetime(1,1 ,1)| 388<8888888 | datetime(1,1 ,1)|8 | datetime(1,1 ,1)| ( (E1 ,1 ,1)| 388<8888888 | d$$BETWEE,1)|8 | datetime(1,1 ,1)| ( SELECT 887<777777,1)|8 | datetime(1,1 ,1)| ( SELECT 887<$$$$$$$02e777777 ||888 | datetimf(1,1 ,1)|8 | datetime(1,1 ,1)| 388<888888888888777888888<88,1 ,1)|8 | datetime(1,1 ,1)| ( SELECT 887<77774777777777888888<8888888 ,11,1 ,1)| 388<8888888 | datetime(1,1 ,1)|8 | datet(1,1 ,1)|8 | datetime(1,1 ,1)| ( SELECT 887<77888888 ,1)|8 |etime(1,1 ,1)| 388<8888888 | datetime(1,188<8888888 | datetime(1,1| datetime(1,1 ,1)| ( SELECT y in (1,1 ,1)| ( SELECT 81 ,1)|8 | date|etime(1,1 ,1)| 388<8888888 | datetime(1,188<8888888 | datetime(1,1 ,1)|8 | datetime(1,1 ,1)| ( SELECT y in m e_crash$$ ,1)| 38 ,1)| 388<8888888 | datetime(1,1 ,1)|8 | datetimetetime(1,1 ,1)|8 | datetime(1,1 ,1)| ( SELECT y in m e_crash$$ ,1)| 388<8888888 | datetime(1,1 ,1)|8 | datetime(1,1 ,1)| ( SELECT y in (1,1 ,1)| ( SELECT 81 ,1)|8 | datetime(1,1 ,1)| ( SELECT y in m e_crash$$ ,e(1,1 ,1)| ( SELECT y in m e_crash$$ ,1)| 388<8888888 | datetime(1,1 ,1)|8 | datetime(1,1 ,1)| ( SELECT y in (1,1 ,1)| ( SELECT 81 ,1)|8 | datet)| 388<8888888 | datetime(1,1 ,1,0 ,1)|8 | ANALYZEe(1,1 ,1)| ( SELECT 887<$$$$$$$02e777777 || 888<7777777777777<888888 ,1)|8 | datetime(1,1 ,1)| ( SELECT 887<777377777 | datetime(1,1 ,1,1 ,11 ,1)| ( SELECT87<77888888 ,1)|8 |etime(1,1 datetime(1,1 ,1)| ( SELECT y in m e_crash$$ ,1)| 388<88888 ,1)| 388<8888888 | datetime(1,1 ,1)|8 | datetime(1,1 ,1)| ( SELECT 887<77888888 ,1)|8 |etime(1,1 ,1)| 388<8888888 | datetime(1,188<8888888 | datetime(1,1 ,1)|8 | datetime(1,1 ,1)| ( SELECT y in m e_crash$$ ,1)| 38 ,1)| 388<8888888 | datetime(1,1 ,1)|8 | datetime(1,reload1 ,1)| ( SELECT y in m (1) SELECT 8 87<577777 ================================================ FILE: benchmark/wpantund-2018-02-27.seed ================================================ 0ConfiG:NCP:SocketPath "/dev/null" Config:NCP:SocketPath "/dev/null" ================================================ FILE: build.sh ================================================ #!/bin/bash RED="\033[31m" GREEN="\033[32m" YELLOW="\033[33m" BOLD="\033[1m" OFF="\033[0m" CAPSTONE_VERSION="4.0.2" KEYSTONE_VERSION="0.9.2" GLIB_VERSION="2.72.0" LIBUNWIND_VERSION="1.5" set -e # # check necessary command # check_command () { for cmd in $@ do if [ ! -x "$(command -v $cmd)" ]; then echo -e "${RED}Error${OFF}: $cmd is not installed." >&2 exit 1 fi done } check_command "wget" "unzip" "make" "cmake" "meson" "ninja" "pkg-config" "clang" "python3" # # check clang version (>= 6.0.0) # CLANG_VERSION=$(clang --version | head -n 1 | grep -o -E "[[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+" | uniq | sort) CLANG_MAJOR_VERSION=$(echo $CLANG_VERSION | awk -F '.' '{ print $1 }') if [[ $CLANG_VERSION < "6.0.0" && ${#CLANG_MAJOR_VERSION} = "1" ]]; then echo "clang-6.0 or a newer version is required" exit 1 fi # # build capstone # CAPSTONE_URL="https://github.com/aquynh/capstone/archive/$CAPSTONE_VERSION.zip" if [ ! -d capstone ] then if [ ! -f capstone.zip ] then echo -e "${GREEN}$0${OFF}: downloading capstone.zip..." wget -O capstone.zip $CAPSTONE_URL fi echo -e "${GREEN}$0${OFF}: extracting capstone.zip..." unzip capstone.zip mv capstone-$CAPSTONE_VERSION capstone echo -e "${GREEN}$0${OFF}: building capstone.zip..." cd capstone CAPSTONE_DIET=no CAPSTONE_X86_REDUCE=no CAPSTONE_ARCHS="x86" ./make.sh cd .. fi # # build keystone # KEYSTONE_URL="https://github.com/keystone-engine/keystone/archive/$KEYSTONE_VERSION.zip" if [ ! -d keystone ] then if [ ! -f keystone.zip ] then echo -e "${GREEN}$0${OFF}: downloading keystone.zip..." wget -O keystone.zip $KEYSTONE_URL fi echo -e "${GREEN}$0${OFF}: extracting keystone.zip..." unzip keystone.zip mv keystone-$KEYSTONE_VERSION keystone echo -e "${GREEN}$0${OFF}: building keystone.zip..." cd keystone if [ -d build ] then rm -rf build fi mkdir build cd build cmake -DBUILD_LIBS_ONLY=1 -DLLVM_BUILD_32_BITS=0 -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF -DLLVM_TARGETS_TO_BUILD="AArch64;X86" -G "Unix Makefiles" .. make -j8 cd ../.. fi # # build glib # GLIB_URL="https://github.com/GNOME/glib/archive/$GLIB_VERSION.zip" if [ ! -d glib ] then if [ ! -f glib.zip ] then echo -e "${GREEN}$0${OFF}: downloading glib.zip..." wget -O glib.zip $GLIB_URL fi echo -e "${GREEN}$0${OFF}: extracting glib.zip..." unzip glib.zip mv glib-$GLIB_VERSION glib echo -e "${GREEN}$0${OFF}: building glib.zip..." cd glib meson _build --buildtype=release --default-library=static --prefix=$(realpath .) ninja -C _build ninja -C _build install cd .. fi # # build libunwind # LIBUNWIND_URL="https://github.com/libunwind/libunwind/archive/v$LIBUNWIND_VERSION.zip" if [ ! -d libunwind ] then if [ ! -f libunwind.zip ] then echo -e "${GREEN}$0${OFF}: downloading libunwind.zip..." wget -O libunwind.zip $LIBUNWIND_URL fi echo -e "${GREEN}$0${OFF}: extracting libunwind.zip..." unzip libunwind.zip mv libunwind-$LIBUNWIND_VERSION libunwind echo -e "${GREEN}$0${OFF}: building libunwind.zip..." cd libunwind mkdir install ./autogen.sh ./configure --prefix=`pwd`/install --enable-cxx-exceptions make install -j8 cd .. fi # # build src # # cd src # make release ================================================ FILE: clean.sh ================================================ #!/bin/bash if [ -d capstone ] then rm -rf capstone fi if [ -d keystone ] then rm -rf keystone fi if [ -d glib ] then rm -rf glib fi if [ -d libunwind ] then rm -rf libunwind fi rm -rf *.zip cd src && make clean ================================================ FILE: docs/system.md ================================================ # New System Design Originally, StochFuzz was integrated into AFL, which made an easy development. However, as more and more advanced fuzzing tools (e.g., [Polyglot](https://github.com/s3team/Polyglot)) take their own implementations (e.g., developing a new variant of AFL), it becomes hard to combine StochFuzz with these tools. As such, we decide to separate StochFuzz and AFL. With this new system design, any AFL-based fuzzer can directly fuzz the phantom binary generated by StochFuzz. However, it also brings some new challenges. First of all, it is easy for the old design to keep multiple versions of rewritten binaries at the same time (by modifying AFL to set up multiple fork servers), but not for the new one. Currently, StochFuzz is independent of AFL, which means it can only keep a single binary at a time. Hence, to detect those erroneous rewriting errors only changing execution paths but not triggering crashes, we introduce a new technique named __checking executions__. These checking executions are triggered periodically and check the coverage consistency w/ and w/o uncertain patchings. After each checking execution, the rewritten binary will get changed. `-x` option is for checking runnings. Besides, we need to manually set the timeout for StochFuzz, which should be consistent with the one of AFL. `-t` option is for the timeout. A good observation is that the edge coverage is at the block level, which means we do not need to trap all instructions but one instruction per block. This observation helps us avoid many rewriting errors. ## How to make StochFuzz compatible with other AFL-based fuzzers One of the most common practices of variants of [AFL](https://github.com/google/AFL) is to extend the size of the shared memory. For example, [AFL++](https://github.com/AFLplusplus/AFLplusplus) extends the size to [8388608](https://github.com/AFLplusplus/AFLplusplus/blob/48c878a76ddec2c133fd5708b185b2ac27740084/include/config.h#L44) bytes (`1 << 23`). To make StochFuzz compatible with such AFL variants, we need to do some slight modifications. Specifically, we need to modify two macros defined in [afl_config.h](https://github.com/ZhangZhuoSJTU/StochFuzz/blob/master/src/afl_config.h), [AFL_MAP_SIZE_POW2](https://github.com/ZhangZhuoSJTU/StochFuzz/blob/9fe1500791729e267894e44faa935757e13124e6/src/afl_config.h#L37) and [AFL_MAP_ADDR](https://github.com/ZhangZhuoSJTU/StochFuzz/blob/9fe1500791729e267894e44faa935757e13124e6/src/afl_config.h#L39). __AFL_MAP_SIZE_POW2__ is the logarithm of the size to the base 2. For example, to support AFL++, AFL_MAP_SIZE_POW2 should be set to 23: . __AFL_MAP_ADDR__ is the address of the shared memory in the subject binary. Based on our testing, 0x3000000 would be a safe address. In short, taking AFL++ as an example, following modifications are sufficient. ```c #define AFL_MAP_SIZE_POW2 23 #define AFL_MAP_ADDR 0x3000000 ``` ## Case: Polyglot [Polyglot](https://github.com/s3team/Polyglot) is a state-of-the-art language fuzzer that focuses on testing compilers and language interpreters. Since many programming languages are bootstrapping, which means their language processors are written in themselves, it is difficult or time-consuming to instrument these processors (e.g., __GCC__). The developers of Polyglot originally used AFL-QEMU mode to test such processors. In this case study, we try to combine Polyglot with StochFuzz to provide a more efficient fuzzing test for GCC. Before starting our experiments, we need to make a slight change on StochFuzz. Since the developers of Polyglot extend the [size of AFL shared memory](https://github.com/s3team/Polyglot/blob/a49f67ffb95684ae2227800a85eb7963eeb2692d/AFL_replace_mutate/config.h#L323), we need to update it in StochFuzz accordingly. Specifically, we need first to change [AFL_MAP_SIZE_POW2](https://github.com/ZhangZhuoSJTU/StochFuzz/blob/f90db25c300e79b9dd37748da883cb9d66a8253f/src/afl_config.h#L37) to 20. To avoid the conflicts of mmap, we additionally need to change [AFL_MAP_ADDR](https://github.com/ZhangZhuoSJTU/StochFuzz/blob/f90db25c300e79b9dd37748da883cb9d66a8253f/src/afl_config.h#L39). I set it as 0x180000 in this case. The new [afl_config.h](https://github.com/ZhangZhuoSJTU/StochFuzz/blob/f90db25c300e79b9dd37748da883cb9d66a8253f/src/afl_config.h) would look like: ```c ... #define AFL_FORKSRV_FD 198 #define AFL_SHM_ENV "__AFL_SHM_ID" #define AFL_MAP_SIZE_POW2 20 #define AFL_MAP_SIZE (1 << AFL_MAP_SIZE_POW2) #define AFL_MAP_ADDR 0x180000 #define AFL_PREV_ID_PTR (RW_PAGE_ADDR + 0x8) #define AFL_MAP_SIZE_MASK ((1 << AFL_MAP_SIZE_POW2) - 1) ... ``` We apply the [advanced strategy](https://github.com/ZhangZhuoSJTU/StochFuzz#advanced-usage) of StochFuzz and run two GCC fuzzing instances, including `Polyglot + StochFuzz` and `Polyglot + AFL-QEMU`, for 24 hours. ### Screenshot ![screenshots](../imgs/polyglot_screenshot.png) In the above screenshot, the top left and bottom left panels show the progresses of `Polyglot + StochFuzz` and `Polyglot + AFL-QEMU`, respectively. The right panel shows some logging information of StochFuzz. In short, when running for around 20 minutes, StochFuzz can achieve 70 executions per second while AFL-QEMU only 4 executions per seconds. In the meantime, StochFuzz found 6520 paths, two times more than AFL-QEMU did (2169 paths). Our register liveness analysis helps StochFuzz avoid _98.4%_ saving/restoring for FLAGS register and _81.2%_ for general purpose registers. ### Results

The left figure presents how many paths each tool found over time. The blue solid line is for StochFuzz and the orange dotted line is for AFL-QEMU. We can see at the end, StochFuzz found much more paths than AFL-QEMU. The right figure presents the distribution of EPS (executions per second) of two tools, the blue one for StochFuzz and the orange one for AFL-QEMU. The average EPS of StochFuzz is around 60 executions per second, nearly 7 times faster than AFL-QEMU. ================================================ FILE: docs/tips.md ================================================ # Tips To enable a more effective and efficient fuzzing, we provide several tips about better using StochFuzz. ## Advanced Strategy As mentioned in [README.md](../README.md#advanced-usage), we strongly recommend every user first tries the advanced strategy. StochFuzz tries to provide a conservative rewriting. As such, it emulates all the _CALL_ instructions to maintain an unchanged data flow. However, in most cases, the return addresses pushed by _CALL_ instructions are only used by _RET_ instructions and the stack unwinding. Based on this observation, we provide an advanced rewriting strategy that hooks the process of stack unwinding and hence does not need to emulate _CALL_ instructions. This strategy is quite efficient and can reduce around 80% overhead of StochFuzz. The advanced strategy can be applied to most binaries but will cause rewriting errors on some including: + statically-linked binaries that do online stack unwinding + some CFI-protected binaries + some go-written binary + ... How to adopt the advanced rewriting strategy can be found in [README.md](../README.md#advanced-usage). ## Timeout StochFuzz needs to specify a timeout for any execution caused by the increment rewriting. The timeout is configured by the `-t` option. ``` -t msec - set the timeout for each daemon-triggering execution set it as zero to ignore the timeout (default: 2000 ms) ``` AFL, or any attached AFL-based fuzzer, needs to specify a timeout either. We recommend that the two timeouts should be set consistently, but it is not mandatory. However, for the binaries with inlined data, the timeout set for the attached fuzzer should __BE LARGER THAN 1000MS__. Otherwise, the auto-scaling feature of AFL timeout will cause incorrect error diagnosis during the stochastic rewriting. ## Checking Executions As we mentioned in [system.md](system.md), we adopt a new system design to have a wide application in the fields of binary-only fuzzing. This new architecture design is enabled by the observation that we only need to instrument an instruction per basic block to collect the code coverage of AFL and is facilitated by a new technique named checking executions. Technically speaking, checking executions are triggered periodically and to check whether the collected coverages are consistent with and without uncertain patches. The `-x` option is provided for configuring the checking executions, setting the number of executions after which a checking execution will be triggered. ``` -x execs - set the number of executions after which a checking run will be triggered set it as zero to disable checking runs (default: 200000) ``` For example, if we provide `-x 1000`, it means a checking execution will be triggered every 1000 AFL executions. The period of checking executions affects the fuzzing effectiveness. Intuitively, the more checking executions we have, the more overhead they cause. The overhead caused by checking executions is , where a checking execution is triggered every _M_ normal executions. The period of checking executions also affects the probabilistic soundness we provided. Let's additionally assume that the probability of a given rewriting error changing the execution path is _p_. Then, we will have that, the probability _P_ that a rewriting error cannot be detected after the total _N_ executions is . In a nutshell, the larger number we set, the fewer check executions we will take. In other words, the larger number we set, the more cost-effective but the less probabilistically sound the fuzzing is. For example, if _M = 1000_, _p = 1e-4_, _N = 1e8_. The overhead caused by checking executions is _0.1%_ and the probability of have an undetected rewriting error is _4.5e-5_. The user may need to provide a suitable number. __A NUMBER LARGER THAN 200 IS RECOMMENDED__. Note that this option is useful only when the inlined data is presented. To eliminate the overhead caused by checking executions, we additionally plan to set up two different fuzzing instances like what [QSYM](https://github.com/sslab-gatech/qsym) does, where one is for fuzzing and the other is for checking executions. ================================================ FILE: docs/todo.md ================================================ # Development Plan ## Todo List While we have successfully migrated StochFuzz to a new system design, we can still improve StochFuzz from multiple places. + [x] __NEW SYSTEM DESIGN__ (daemon), which separates AFL and StochFuzz and makes advanced fuzzing possible. + [x] In release version, remove unnecessary z\_log (e.g., z\_debug, z\_trace, and etc.). + [x] Support probabilisitic disassembly. + [x] Mark .text section non-writable. + [x] Support C++ exceptions (via pushing the original ret\_addr onto the stack). + [x] When a CP\_RETADDR is found, support updating other CP\_RETADDR from the same callee. + [x] Use-def analysis on EFLAG register to avoid unnecessary context switching. + [x] Support pre-disassembly (linear disassembly) -- IT SEEMS NOT A GOOD IDEA. + [x] Support `jrcxz` and `jecxz` instructions. + [x] It may be a good idea to additionally hook SIGILL caused by mis-patched instructions. In that design, exiting the program with a specific status code (in SIGSEGV handler) is a better approach, compared with raising SIGILL. It can also avoid recursive signal handling. + [x] Support retaddr patch when pdisasm is enabled (check retaddr's probability) -- it seems impossible. Note that we cannot guarantee the control flow is returned from the callee even the returen address is visited. + [x] A better frontend for passing arguments. + [x] Use runtime arguments to set different modes, instead of makefile. + [x] Use simple linear disassembly to check the existence of inlined data. + [x] Read PLT table to get library functions' names, and support the white-list for library functions. + [x] Correctly handle timeout from AFL. + [x] Use shared memory for .text section, to avoid the expensive patch commands. + [x] Support self-correction procedure (delta debugging). + [x] Support non-return analysis on UCFG, with the help of the white-list for library functions. + [x] Support the on-the-fly probability recalculation. + [x] Add a new flag/option to enable early instrumentation for fork server (i.e., before the entrypoint of binary). + [x] Enable periodic checking (for coverage feedback) to determine those false postives which do not lead to crashes. + [x] Add tailed invalid instructions for those basic blocks terminated by bad decoding. + [x] Add a license. + [x] Do not use a global sys\_config, but put the options into each object. + [x] Current TP\_EMIT is only compatible with fuzzers compiled with AFL\_MAP\_SIZE = (1 << 16), we need to change the underlying implementation of TP\_EMIT to automatically fit the AFL\_MAP\_SIZE. + [x] Fix the bugs when rewriting PIE binary and support it. + [x] Place `ENDBR64` instruction before the AFL trampoline. The phantom program will crash otherwise. + [x] Support binaries compiled with gcc ASAN (clang would inline ASAN functions). + [ ] Use g\_hash\_table\_iter\_init instead of g\_hash\_table\_get\_keys. + [ ] Apply AddrDict to all possible places.. + [ ] Apply Iter to all possible places.. + [ ] Support other disassembly backends, for the initial disassembly (e.g., [XDA](https://github.com/CUMLSec/XDA)). + [ ] Calculate [entropy](https://github.com/NationalSecurityAgency/ghidra/issues/1035) to check the existence of inlined data (ADVANCED). + [ ] Remove legacy code (e.g., the function of building bridges by Rewriter is no longer needed). + [ ] Instead of patching a fixed invalid instruction (0x2f), randomly choose an invalid instruction to patch. More details can be found [here](http://ref.x86asm.net/coder64.html). + [ ] Automatically scale the number of executions triggering checking runs (based on the result of previous checking run). + [ ] Set the default log level as WARN (note that we need to update `make test` and `make benchmark`). + [ ] Use a general method to add segments in the given ELF instead of using the simple PT\_NOTE trick. + [ ] Fix the failed Github Actions on Ubuntu 20.04 (the root cause is unknown currently). + [ ] Add more stress test for rewriting PIE binary. + [ ] Support binaries compiled with MSAN. ## Challenges We additionally have some challenges which may cause troubles or make StochFuzz not that easy to use. We are trying to resolve them. + The fixed LOOKUP\_TABLE\_ADDR is mixed with other random addresses, which may cause bugs in PIE binary. + The glibc code contains some overlapping instructions (e.g., the [instructions with the LOCK prefix](https://code.woboq.org/userspace/glibc/sysdeps/x86/atomic-machine.h.html#_M/__arch_c_compare_and_exchange_val_8_acq)), which may cause troubles for the patcher and pdisasm. There are some other challenges introduced by the [new system design](system.md). + The input file may be changed by the previous crashed executing, which makes the next execution incorrect. But it seems ok in practice, because fuzzing is a highly repeative procedure which can fix the incorrect feedback automatically and quickly. + Timeout needs to be set up separately for AFL and StochFuzz, which may bother the users a little bit. + The auto-scaled timeout of AFL may cause incorrect error diagnosis (the [dd\_status](https://github.com/ZhangZhuoSJTU/StochFuzz/blob/master/src/diagnoser.h#L91) may be invalid), so it is highly recommended to specify a timeout (>= 1000ms or >= AFL\_HANG\_TMOUT if set) for AFL by `-t` option, to disable the feature of auto-scaled timeout. Note that in the old design, we can fully control AFL, so that we can _create a new input file for the next execution_, _use the same timeout_, or _disable the auto-scaled timeout_ to avoid aforementioned challenges. ## Pending Development Decisions Currently, there are many steps which we are hesitating to take. We may need to carefully evaluate them. __If you have any suggestion, please kindly let us know__. We are happy to take any possible discussion about improving StochFuzz. + Currently, we use a lookup table to translate indirect call/jump on the fly. We are not sure whether it is necessary because simply patching a jump instruction at the target address may also work well. Note that a large lookup table may increase the cache missing rate and the overhead of process forking. + For now, to support the [advanced strategy](https://github.com/ZhangZhuoSJTU/StochFuzz#advanced-usage), we maintain a retaddr mapping and do _O(log n)_ online binary searching to find the original retaddr when unwinding stack. It may be better to maintain a retaddr lookup table which supports _O(1)_ looking up. But also, this lookup table will extremely increase the memory usage as well as the cache missing rate and the overhead of process forking. + Hook more signals to collect address information for a better error diagnosis, which, on the other hand, may cause conflicts of signal handlers set by the subject program. ================================================ FILE: docs/trouble.md ================================================ # Troubleshootings If you are using a variant of AFL instead of the original [AFL](https://github.com/google/AFL), please refer to [system.md](https://github.com/ZhangZhuoSJTU/StochFuzz/blob/master/docs/system.md#how-to-make-stochfuzz-compatible-with-other-afl-based-fuzzers) which tells you how to make StochFuzz compatible with those variants. Besides, this documentation mainly talks about how to handle the case where the rewritten binaries have inconsistent behaviors taking the same inputs (e.g., invalid crashes which cannot be reproduced by the original binaries). Please kindly open an issue to report any other problem, including: + The execution speed is quite slow (e.g., slower than AFL-QEMU) + The fuzzing process is stuck (i.e., the AFL panel does not have updates for a while) + StochFuzz crashes during rewriting + ... ## How to check whether an input will cause inconsistent behaviors. As mentioned in [README.md](../README.md#basic-usage), after the initial rewriting, StochFuzz will generate a _phantom file_. Originally, if we want to do binary-only fuzzing, we attach AFL to this phantom binary. Actually, this phantom binary can also be directly executed, with the same arguments as the original binary has. Hence, to check whether an input will cause inconsistent behaviors, you can execute both the original binary and the phantom binary with the given input and check the behaviors of two binaries. ## Incorrect rewriting options or latent bugs in StochFuzz? StochFuzz provides different rewriting options and will automatically choose some, based on the given binary. In some cases, StochFuzz may pick the wrong choices. The following steps can help us identify whether the erroneous behaviors are caused by incorrect rewriting options or latent bugs in StochFuzz. + First of all, make sure all the cached files are removed (`rm .*`) and try to rerun StochFuzz. + If the erroneous behaviors still exist but you have adopted the advanced strategy, please remove all cached files (`rm .*`) and try the basic mode. + If the erroneous behaviors still exist after adopting the basic mode, please remove all cached files (`rm .*`) and feed `-e -f -i` options into StochFuzz. ``` -e - install the fork server at the entrypoint instead of the main function -f - forcedly assume there is data interleaving with code -i - ignore the call-fallthrough edges to defense RET-misusing obfuscation ``` + If the erroneous behaviors still exist after rewriting with the aforementioned options, please kindly open an issue to let us know; if the erroneous behaviors are gone, you can try aforementioned options one by one to identify which one contributes to elimilate the errors, and if possible, you can also open an issue to let us know. ## Known issues + Like [AFL](https://github.com/google/AFL/blob/fab1ca5ed7e3552833a18fc2116d33a9241699bc/README.md#13-known-limitations--areas-for-improvement), StochFuzz cannot handle programs that install custom handlers for some important signals (SIGSEGV, SIGABRT, etc). Moreover, StochFuzz additionally occupies one more signal, _SIGUSR1_. If the subject program has a custom handler for SIGUSR1, the user may need to modify StochFuzz to use SIGUSR2 or other unused signals. ================================================ FILE: scripts/stochfuzz_env.sh ================================================ #!/bin/bash stochfuzz_dir=$(realpath $(dirname "$(realpath $0)")/../) libstochfuzzRT_path="$stochfuzz_dir/src/libstochfuzzRT.so" libunwind_path="$stochfuzz_dir/libunwind/install/lib/libunwind.so" if [ ! -f $libstochfuzzRT_path ]; then echo "libstochfuzzRT.so not found!" exit 1 fi if [ ! -f $libunwind_path ]; then echo "libunwind.so not found!" exit 1 fi export STOCHFUZZ_PRELOAD=$libstochfuzzRT_path:$libunwind_path echo $STOCHFUZZ_PRELOAD ================================================ FILE: src/.clang-format ================================================ BasedOnStyle: Google IndentWidth: 4 AlwaysBreakTemplateDeclarations: true ColumnLimit: 80 UseTab: Never AllowShortIfStatementsOnASingleLine: false AllowShortBlocksOnASingleLine: false AllowShortLoopsOnASingleLine: false ================================================ FILE: src/Makefile ================================================ CC = clang SIGSTKSZ = $(shell ./get_signal_stack_size.sh) CFLAGS = -Wall -Wno-unused-command-line-argument -Wno-void-pointer-to-int-cast -Wno-void-pointer-to-enum-cast -fPIC -pie -ffast-math -D_GNU_SOURCE -DSIGNAL_STACK_SIZE=$(SIGSTKSZ) LDFLAGS = SHELLCODE_CFLAGS = -Wall -fno-stack-protector -fno-jump-tables -fpie -O3 -D_GNU_SOURCE -DSIGNAL_STACK_SIZE=$(SIGSTKSZ) LIBNAME = $(shell find . -regex './lib[^\.\/]*\.h' | tr -d '/' | cut -d '.' -f2) TOOLNAME = $(shell grep '^\#define OURTOOL ' config.h | cut -d '"' -f2) VERSION = $(shell grep '^\#define VERSION ' config.h | cut -d '"' -f2) ifeq ($(shell ../test/check_avx512 2>/dev/null; echo $$?), 0) AVX_CFLAGS = -mavx512f -DAVX512 else AVX_CFLAGS = endif ifneq ($(origin DEBUG_REWRITER), undefined) CFLAGS += -DBINARY_SEARCH_INVALID_CRASH -DBINARY_SEARCH_DEBUG_REWRITER=$(strip $(DEBUG_REWRITER)) SHELLCODE_CFLAGS += -DBINARY_SEARCH_INVALID_CRASH -DBINARY_SEARCH_DEBUG_REWRITER=$(strip $(DEBUG_REWRITER)) endif # note that the new SINGLE_SUCC_OPT is not well test, as such we add an option to disable it ifneq ($(origin SINGLE_SUCC_OPT), undefined) ifeq ('$(SINGLE_SUCC_OPT)', 'disable') CFLAGS += -DNSINGLE_SUCC_OPT SHELLCODE_CFLAGS += -DNSINGLE_SUCC_OPT endif endif ifneq ($(origin CONSERVATIVE_PATCH), undefined) ifeq ('$(CONSERVATIVE_PATCH)', 'enable') CFLAGS += -DCONSERVATIVE_PATCH SHELLCODE_CFLAGS += -DCONSERVATIVE_PATCH endif endif # glib CFLAGS += $(shell PKG_CONFIG_PATH=$(realpath ..)/glib/lib/x86_64-linux-gnu/pkgconfig/ pkg-config --cflags glib-2.0) LDFLAGS += -lpthread # keystone CFLAGS += -I $(realpath ..)/keystone/include LDFLAGS += -lstdc++ -lm # capstone CFLAGS += -I $(realpath ..)/capstone/include LDFLAGS += # libunwind LIBUNWIND_RT_STEP_OFFSET = 0x$(shell readelf -s $(realpath ..)/libunwind/install/lib/libunwind.so | grep _ULx86_64_step | head -n 1 | awk '{print $$2}') LIBUNWIND_RT_CFLAGS += -fPIC -shared -I $(realpath ..)/libunwind/install/include -DSTEP_OFFSET=$(LIBUNWIND_RT_STEP_OFFSET) -DSIGNAL_STACK_SIZE=$(SIGSTKSZ) OBJS=\ binary.o \ buffer.o \ elf_.o \ utils.o \ interval_splay.o \ mem_file.o \ restricted_ptr.o \ tp_dispatcher.o \ sys_optarg.o \ disassembler.o \ rewriter.o \ patcher.o \ ucfg_analyzer.o \ capstone_.o \ diagnoser.o \ library_functions/library_functions.o \ core.o .PHONY: clean format libstochfuzzRT: gcc $(LIBUNWIND_RT_CFLAGS) -o libstochfuzzRT.so libstochfuzzRT.c debug: CFLAGS += -g -O0 -fsanitize=address -fno-omit-frame-pointer -DDEBUG debug: SHELLCODE_CFLAGS += -DDEBUG debug: executable profile: CFLAGS += -pg -O2 -DNDEBUG profile: SHELLCODE_CFLAGS += -DNDEBUG profile: executable release: CFLAGS += -O2 -DNDEBUG release: SHELLCODE_CFLAGS += -DNDEBUG release: executable executable: loader fork_server tps handlers library_functions_load libstochfuzzRT $(OBJS) ar rcs $(LIBNAME).a $(OBJS) $(CC) $(CFLAGS) $(LDFLAGS) -shared $(OBJS) $(realpath ..)/glib/lib/x86_64-linux-gnu/libglib-2.0.a $(realpath ..)/keystone/build/llvm/lib/libkeystone.a $(realpath ..)/capstone/libcapstone.a -o $(LIBNAME).so $(CC) $(CFLAGS) $(LDFLAGS) frontend.c $(LIBNAME).a $(realpath ..)/glib/lib/x86_64-linux-gnu/libglib-2.0.a $(realpath ..)/keystone/build/llvm/lib/libkeystone.a $(realpath ..)/capstone/libcapstone.a -o $(TOOLNAME) loader: $(CC) $(SHELLCODE_CFLAGS) -c loader.c $(CC) -nostdlib -o loader.out loader.o -Wl,--entry=_entry objcopy --dump-section .text=loader.bin loader.out xxd -i loader.bin > loader_bin.c fork_server: $(CC) $(SHELLCODE_CFLAGS) $(AVX_CFLAGS) -c fork_server.c $(CC) -nostdlib -o fork_server.out fork_server.o -Wl,--entry=_entry objcopy --dump-section .text=fork_server.bin fork_server.out xxd -i fork_server.bin > fork_server_bin.c tps: $(MAKE) -C trampolines handlers: python3 rewriter_handlers/generate.py rewriter_handlers library_functions_load: python3 library_functions/generate.py lib.csv library_functions ifeq ($(findstring -r,$(TEST_OPTIONS)), -r) STOCHFUZZ_PRELOAD = $(shell ../scripts/stochfuzz_env.sh) define test_succ cd test && ( STOCHFUZZ_PRELOAD=$(strip ${2}):$(STOCHFUZZ_PRELOAD) ${1} ) endef define test_fail cd test && ( ! STOCHFUZZ_PRELOAD=$(strip ${2}):$(STOCHFUZZ_PRELOAD) ${1} ) endef define test_whatever cd test && ( STOCHFUZZ_PRELOAD=$(strip ${2}):$(STOCHFUZZ_PRELOAD) ${1} || true ) endef else define test_succ cd test && ( ${1} ) endef define test_fail cd test && ( ! ${1} ) endef define test_whatever cd test && ( ${1} || true ) endef endif test: rm -rf test; cp -r ../test test $(call test_succ, ../$(TOOLNAME) -P $(TEST_OPTIONS) -- bzip2.no.pie) $(call test_succ, ../$(TOOLNAME) -R $(TEST_OPTIONS) -- bzip2.no.pie --help) $(call test_succ, ../$(TOOLNAME) -R $(TEST_OPTIONS) -- bzip2.no.pie -kfd test.c.bz2) $(call test_succ, ../$(TOOLNAME) -V $(TEST_OPTIONS) -- bzip2.no.pie ) $(call test_succ, ../$(TOOLNAME) -R $(TEST_OPTIONS) -- libpng-1.2.56 seed.png) $(call test_succ, ../$(TOOLNAME) -V $(TEST_OPTIONS) -- libpng-1.2.56) $(call test_succ, ../$(TOOLNAME) -R $(TEST_OPTIONS) -- json-2017-02-12.normal json.seed) $(call test_fail, ../$(TOOLNAME) -R $(TEST_OPTIONS) -- crash mdzz) $(call test_succ, ../$(TOOLNAME) -V $(TEST_OPTIONS) -- crash ) $(call test_succ, ../$(TOOLNAME) -P $(TEST_OPTIONS) -- openssl-1.0.1f) $(call test_succ, ../$(TOOLNAME) -R $(TEST_OPTIONS) -- openssl-1.0.1f leak-268f0e85f4bc45cbaf4d257222b830eac18977f3) $(call test_succ, ../$(TOOLNAME) -V $(TEST_OPTIONS) -- openssl-1.0.1f) $(call test_succ, ../$(TOOLNAME) -R $(TEST_OPTIONS) -- hello) $(call test_succ, ../$(TOOLNAME) -V $(TEST_OPTIONS) -- hello) $(call test_succ, ../$(TOOLNAME) -R $(TEST_OPTIONS) -- rar e -o+ -mt3 -- test.rar) $(call test_succ, ../$(TOOLNAME) -V $(TEST_OPTIONS) -- rar) $(call test_fail, ../$(TOOLNAME) -R $(TEST_OPTIONS) -- timeout mdzz) $(call test_succ, ../$(TOOLNAME) -V $(TEST_OPTIONS) -- timeout) $(call test_succ, ../$(TOOLNAME) -R $(TEST_OPTIONS) -- readelf.pie -a small_exec.elf) $(call test_succ, ../$(TOOLNAME) -R $(TEST_OPTIONS) -- bzip2.pie -kfd test.c.bz2) $(call test_succ, ../$(TOOLNAME) -R $(TEST_OPTIONS) -- pngfix.pie seed.png) $(call test_succ, ../$(TOOLNAME) -R $(TEST_OPTIONS) -- pngfix.pie toucan.png) ifneq ($(strip $(shell whereis libasan.so.4 | cut -d ' ' -f2- | xargs ls | grep 'libasan.so.4')),) $(call test_succ, ../$(TOOLNAME) -R $(TEST_OPTIONS) -- libjpeg.asan seed.jpg, $(shell whereis libasan.so.4 | cut -d ' ' -f2- | xargs ls | grep 'libasan.so.4')) endif ifneq ($(findstring -n,$(TEST_OPTIONS)), -n) $(call test_whatever, ../$(TOOLNAME) -R $(TEST_OPTIONS) -- unintentional_crash mdzz) $(call test_succ, ../$(TOOLNAME) -R $(TEST_OPTIONS) -- unintentional_crash) $(call test_succ, ../$(TOOLNAME) -V $(TEST_OPTIONS) -- unintentional_crash) endif ifeq ($(findstring -e,$(TEST_OPTIONS)), -e) $(call test_fail, ../$(TOOLNAME) -R $(TEST_OPTIONS) -- no_main mdzz) $(call test_succ, ../$(TOOLNAME) -R $(TEST_OPTIONS) -- no_main) endif ifneq ($(findstring -f,$(TEST_OPTIONS)), -f) $(call test_whatever, timeout --signal=KILL 10m ../$(TOOLNAME) -R -t 5000 $(TEST_OPTIONS) -- z3 -smt2 ex.smt2) # this test may fail due to the memory limit of Github Actions endif # test daemon rm -rf test; cp -r ../test test $(call test_succ, ./test_daemon.sh ../$(TOOLNAME) '$(TEST_OPTIONS)' bzip2.no.pie -kfd test.c.bz2) $(call test_succ, ./test_daemon.sh ../$(TOOLNAME) '$(TEST_OPTIONS)' libpng-1.2.56 seed.png) $(call test_succ, ./test_daemon.sh ../$(TOOLNAME) '$(TEST_OPTIONS)' json-2017-02-12.normal json.seed) $(call test_succ, ./test_daemon.sh ../$(TOOLNAME) '$(TEST_OPTIONS)' openssl-1.0.1f leak-268f0e85f4bc45cbaf4d257222b830eac18977f3) $(call test_succ, ./test_daemon.sh ../$(TOOLNAME) '$(TEST_OPTIONS)' rar e -o+ -mt3 -- test.rar) $(call test_fail, ./test_daemon.sh ../$(TOOLNAME) '$(TEST_OPTIONS)' crash mdzz) $(call test_succ, ./test_daemon.sh ../$(TOOLNAME) '$(TEST_OPTIONS)' crash) $(call test_succ, ./test_daemon.sh ../$(TOOLNAME) '$(TEST_OPTIONS)' readelf.pie -a small_exec.elf) $(call test_succ, ./test_daemon.sh ../$(TOOLNAME) '$(TEST_OPTIONS)' bzip2.pie -kfd test.c.bz2) $(call test_succ, ./test_daemon.sh ../$(TOOLNAME) '$(TEST_OPTIONS)' pngfix.pie seed.png) $(call test_succ, ./test_daemon.sh ../$(TOOLNAME) '$(TEST_OPTIONS)' pngfix.pie toucan.png) ifneq ($(strip $(shell whereis libasan.so.4 | cut -d ' ' -f2- | xargs ls | grep 'libasan.so.4')),) $(call test_succ, ./test_daemon.sh ../$(TOOLNAME) '$(TEST_OPTIONS)' libjpeg.asan seed.jpg, $(shell whereis libasan.so.4 | cut -d ' ' -f2- | xargs ls | grep 'libasan.so.4')) endif ifeq ($(findstring -r,$(TEST_OPTIONS)), -r) $(call test_succ, ./test_daemon.sh ../$(TOOLNAME) '$(TEST_OPTIONS) -e' hello) else $(call test_succ, ./test_daemon.sh ../$(TOOLNAME) '$(TEST_OPTIONS)' hello) endif ifneq ($(findstring -n,$(TEST_OPTIONS)), -n) $(call test_succ, ./test_daemon.sh ../$(TOOLNAME) '$(TEST_OPTIONS)' unintentional_crash mdzz) endif ifeq ($(findstring -e,$(TEST_OPTIONS)), -e) $(call test_succ, ./test_daemon.sh ../$(TOOLNAME) '$(TEST_OPTIONS)' no_main mdzz) endif $(call test_fail, ./test_daemon.sh ../$(TOOLNAME) '$(TEST_OPTIONS)' timeout mdzz) $(call test_succ, cat timeout.daemon.log) $(call test_succ, grep -F 'get status code: 0x9 (signal: 9)' timeout.daemon.log) GOOGLE_FTS=\ boringssl-2016-02-12 \ c-ares-CVE-2016-5180 \ freetype2-2017 \ guetzli-2017-3-30 \ harfbuzz-1.3.2 \ json-2017-02-12 \ lcms-2017-03-21 \ libarchive-2017-01-04 \ libjpeg-turbo-07-2017 \ libpng-1.2.56 \ libssh-2017-1272 \ libxml2-v2.9.2 \ llvm-libcxxabi-2017-01-27 \ openssl-1.0.1f \ openssl-1.0.2d \ openssl-1.1.0c \ openthread-2018-02-27 \ pcre2-10.00 \ proj4-2017-08-14 \ re2-2014-12-09 \ sqlite-2016-11-14 \ vorbis-2017-12-11 \ woff2-2016-05-06 \ wpantund-2018-02-27 prepare_google_fts: rm -rf test; cp -r ../benchmark test; cp ../test/test_daemon_ignore_asan_sof.sh test $(GOOGLE_FTS): prepare_google_fts $(call test_succ, ./test_daemon_ignore_asan_sof.sh ../$(TOOLNAME) '$(TEST_OPTIONS)' $@.normal $@.seed) $(call test_succ, ./test_daemon_ignore_asan_sof.sh ../$(TOOLNAME) '$(TEST_OPTIONS)' $@.inline $@.seed) $(call test_succ, rm -f .pdisasm.$@.normal .pdisasm.$@.inline) $(call test_succ, grep -F "SUMMARY: AddressSanitizer: stack-overflow" $@.normal.daemon.log || ../$(TOOLNAME) -R $(TEST_OPTIONS) -- $@.normal $@.seed || grep -F "we encounter a rewriting error" $@.normal.daemon.log) $(call test_succ, grep -F "SUMMARY: AddressSanitizer: stack-overflow" $@.inline.daemon.log || ../$(TOOLNAME) -R $(TEST_OPTIONS) -- $@.inline $@.seed || grep -F "we encounter a rewriting error" $@.inline.daemon.log) benchmark: prepare_google_fts $(GOOGLE_FTS) clean: rm -rf $(OBJS) *.out *.bin *.o *.a *.so *_bin.c $(TOOLNAME) test/ library_functions/library_functions_load.c rewriter_handlers/handler_main.c $(MAKE) -C trampolines clean SOURCES:=$(OBJS:.o=.c) HEADERS:=$(OBJS:.o=.h) SOURCES += loader.c fork_server.c frontend.c asm_syscall.c asm_utils.c libstochfuzzRT.c SOURCES += rewriter_handlers/*.c rewriter_handlers/*.in SOURCES += prob_disasm/*.c SOURCES += prob_disasm/prob_disasm_complete/*.c HEADERS += address_dictionary.h loader.h fork_server.h config.h afl_config.h crs_config.h $(LIBNAME).h format: clang-format -sort-includes -style=file -i $(SOURCES) clang-format -sort-includes -style=file -i $(HEADERS) $(MAKE) -C trampolines format ================================================ FILE: src/address_dictionary.h ================================================ /* * address_dictionary.h * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #ifndef __ADDRESS_DICTIONARY_H #define __ADDRESS_DICTIONARY_H #include "config.h" #include "utils.h" // force evaluation #define __ADDR_DICT_NAME_2(x, y) __AddrDict_##y##_##x##_t #define __ADDR_DICT_NAME_1(x, y) __ADDR_DICT_NAME_2(x, y) #define __ADDR_DICT_NAME(x) __ADDR_DICT_NAME_1(x, __COUNTER__) /* * Address dictionary uses a contiguous memory to store data, and uses key as * index to access. Compared with GHashTable, it is a much more efficient * approach to build a hash table who uses address as key and is likely to use * all addresses. * * Note that we use macro to simulate template in C++. */ #define AddrDict(type, name) \ struct __ADDR_DICT_NAME(name) { \ type *__data; \ uint64_t *__used; \ addr_t __base; \ size_t __size; \ } name /* * AddrDict without checking existence. * It is very helpful for hash tables whose value cannot be zero. */ #define AddrDictFast(type, name) \ struct __ADDR_DICT_NAME(name) { \ type *__data; \ PhantomType *__used; \ addr_t __base; \ size_t __size; \ } name #define z_addr_dict_init(dict, base_addr, size) \ do { \ (dict).__base = (base_addr); \ (dict).__size = (size); \ (dict).__data = z_alloc((dict).__size, sizeof(*((dict).__data))); \ if (_Generic(((dict).__used), PhantomType * \ : false, default \ : true)) { \ (dict).__used = z_alloc((dict).__size / 64 + 1, sizeof(uint64_t)); \ } else { \ (dict).__used = NULL; \ } \ } while (0) #define z_addr_dict_check_addr(dict, addr) \ do { \ if ((addr) < (dict).__base || \ (addr) >= (dict).__base + (dict).__size) { \ EXITME("out-of-boundry access in address dictionary"); \ } \ } while (0) #define z_addr_dict_exist(dict, addr) \ ({ \ bool res; \ z_addr_dict_check_addr(dict, addr); \ \ size_t __off = (addr) - (dict).__base; \ \ if (_Generic(((dict).__used), PhantomType * \ : false, default \ : true)) { \ size_t __off_div = __off / 64; \ size_t __off_mod = __off % 64; \ uint64_t *__bits = (uint64_t *)((dict).__used); \ res = !!(__bits[__off_div] & (1UL << __off_mod)); \ } else { \ res = !!((dict).__data[__off]); \ } \ \ res; \ }) #define z_addr_dict_set(dict, addr, val) \ do { \ z_addr_dict_check_addr(dict, addr); \ \ size_t __off = (addr) - (dict).__base; \ (dict).__data[__off] = (val); \ \ if ((dict).__used) { \ size_t __off_div = __off / 64; \ size_t __off_mod = __off % 64; \ uint64_t *__bits = (uint64_t *)((dict).__used); \ __bits[__off_div] |= (1UL << __off_mod); \ } \ } while (0) #define z_addr_dict_get(dict, addr) \ ({ \ z_addr_dict_check_addr(dict, addr); \ if (!z_addr_dict_exist(dict, addr)) { \ EXITME("uninitialized access in address dictionary"); \ } \ (dict).__data[(addr) - (dict).__base]; \ }) #define z_addr_dict_get_data(dict) ((dict).__data) #define z_addr_dict_get_base(dict) ((dict).__base) #define z_addr_dict_get_size(dict) ((dict).__size) #define z_addr_dict_remove(dict, addr) \ do { \ z_addr_dict_check_addr(dict, addr); \ size_t __off = (addr) - (dict).__base; \ (dict).__data[__off] = 0; \ \ if ((dict).__used) { \ size_t __off_div = __off / 64; \ size_t __off_mod = __off % 64; \ uint64_t *__bits = (uint64_t *)((dict).__used); \ __bits[__off_div] &= (~(1UL << __off_mod)); \ } \ } while (0) /* * z_addr_dist_destroy should support variable numbers of arguments */ #define __addr_dict_destroy_opt_0(...) #define __addr_dict_destroy_opt_1(...) #define __addr_dict_destroy_opt_2(dict, func) \ do { \ for (size_t __i = 0; __i < (dict).__size; __i++) { \ addr_t __addr = (dict).__base + __i; \ if (z_addr_dict_exist(dict, __addr)) { \ (*(func))((dict).__data[__i]); \ } \ } \ } while (0) #define __addr_dict_destroy_choose(a, b, c, f, ...) f #define __addr_dict_destroy_data(...) \ __addr_dict_destroy_choose(, ##__VA_ARGS__, \ __addr_dict_destroy_opt_2(__VA_ARGS__), \ __addr_dict_destroy_opt_1(__VA_ARGS__), \ __addr_dict_destroy_opt_0(__VA_ARGS__)) #define __addr_dict_destroy_self(dict, ...) \ do { \ z_free((dict).__data); \ z_free((dict).__used); \ } while (0) #define z_addr_dict_destroy(...) \ do { \ __addr_dict_destroy_data(__VA_ARGS__); \ __addr_dict_destroy_self(__VA_ARGS__); \ } while (0) #endif ================================================ FILE: src/afl_config.h ================================================ /* * afl_config.h * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #ifndef __AFL_CONFIG_H #define __AFL_CONFIG_H #include "config.h" /* * XXX: Attaching SHM at a fixed address allows around 10% perf gain. see * https://github.com/google/AFL/blob/master/afl-as.h#L71. * * Note that it is reasonable for a binary-instrumented tool to fix the address, * as we can know the memory layout comparied with from assemble view. * */ /* * TODO: when rewritting, dynamically calculate the fixed AFL_MAP_ADDR. */ #define AFL_FORKSRV_FD 198 #define AFL_SHM_ENV "__AFL_SHM_ID" #define AFL_MAP_SIZE_POW2 16 #define AFL_MAP_SIZE (1 << AFL_MAP_SIZE_POW2) #define AFL_MAP_ADDR (RW_PAGE_ADDR + 0x10000) #define AFL_PREV_ID_PTR (RW_PAGE_ADDR + 0x8) #define AFL_MAP_SIZE_MASK ((1 << AFL_MAP_SIZE_POW2) - 1) // #define AFL_BB_ID(x) ((((x) >> 4) ^ ((x) << 8)) & AFL_MAP_SIZE_MASK) // AFL_BB_ID Algorithm used in AFL-QEMU, but it seems bad on static binary // rewriting #define AFL_BB_ID(x) (((x) ^ ((x) >> AFL_MAP_SIZE_POW2)) & AFL_MAP_SIZE_MASK) #define AFL_HASH_CONST 0xa5b35705 #endif ================================================ FILE: src/asm_syscall.c ================================================ /* * asm_syscall.h * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include #include #include #include #include #include #include #include #include #define NO_INLINE __attribute__((__noinline__)) #define NO_RETURN __attribute__((__noreturn__)) #define Z_SYSCALL __attribute__((unused)) static inline #define ASM_STRING(name, content) \ ".global " #name \ "\n" \ ".type " #name ",@function\n" #name \ ":\n" \ ".ascii \"" content \ "\"\n" \ ".byte 0x00\n" /* * Kernal sigaction (unlike glibc wrapper) */ struct kernel_sigaction { void (*k_sa_handler)(int, siginfo_t *, void *); unsigned long sa_flags; void (*sa_restorer)(void); unsigned long sa_mask; }; Z_SYSCALL unsigned long sys_prctl(unsigned long option_0, unsigned long arg2_0, unsigned long arg3_0, unsigned long arg4_0, unsigned long arg5_0) { register uintptr_t option asm("rdi") = (uintptr_t)option_0; register uintptr_t arg2 asm("rsi") = (uintptr_t)arg2_0; register uintptr_t arg3 asm("rdx") = (uintptr_t)arg3_0; register uintptr_t arg4 asm("r10") = (uintptr_t)arg4_0; register uintptr_t arg5 asm("r8") = (uintptr_t)arg5_0; register uintptr_t err asm("rax"); asm volatile( "mov $157, %%eax\n\t" // SYS_PRCTL "syscall" : "=rax"(err) : "r"(option), "r"(arg2), "r"(arg3), "r"(arg4), "r"(arg5) : "rcx", "r11"); return (unsigned long)err; } Z_SYSCALL unsigned long sys_clone(unsigned long clone_flags_0, unsigned long newsp_0, pid_t *parent_tidptr_0, pid_t *child_tidptr_0, void *tls_val_0) { register uintptr_t clone_flags asm("rdi") = (uintptr_t)clone_flags_0; register uintptr_t newsp asm("rsi") = (uintptr_t)newsp_0; register uintptr_t parent_tidptr asm("rdx") = (uintptr_t)parent_tidptr_0; register uintptr_t child_tidptr asm("r10") = (uintptr_t)child_tidptr_0; register uintptr_t tls_val asm("r8") = (uintptr_t)tls_val_0; register uintptr_t err asm("rax"); asm volatile( "mov $56, %%eax\n\t" // SYS_CLONE "syscall" : "=rax"(err) : "r"(clone_flags), "r"(newsp), "r"(parent_tidptr), "r"(child_tidptr), "r"(tls_val) : "rcx", "r11"); return (unsigned long)err; } Z_SYSCALL unsigned long sys_mmap(unsigned long addr_0, unsigned long len_0, unsigned long prot_0, unsigned long flags_0, unsigned long fd_0, unsigned long off_0) { register uintptr_t addr asm("rdi") = (uintptr_t)addr_0; register uintptr_t len asm("rsi") = (uintptr_t)len_0; register uintptr_t prot asm("rdx") = (uintptr_t)prot_0; register uintptr_t flags asm("r10") = (uintptr_t)flags_0; register uintptr_t fd asm("r8") = (uintptr_t)fd_0; register uintptr_t off asm("r9") = (uintptr_t)off_0; register uintptr_t err asm("rax"); asm volatile( "mov $9, %%eax\n\t" // SYS_MMAP "syscall" : "=rax"(err) : "r"(addr), "r"(len), "r"(prot), "r"(flags), "r"(fd), "r"(off) : "rcx", "r11"); return (unsigned long)err; } Z_SYSCALL int sys_mprotect(unsigned long start_0, size_t len_0, unsigned long prot_0) { register uintptr_t start asm("rdi") = (uintptr_t)start_0; register uintptr_t len asm("rsi") = (uintptr_t)len_0; register uintptr_t prot asm("rdx") = (uintptr_t)prot_0; register intptr_t err asm("rax"); asm volatile( "mov $10, %%eax\n\t" // SYS_MPROTECT "syscall" : "=rax"(err) : "r"(start), "r"(len), "r"(prot) : "rcx", "r11"); return (int)err; } Z_SYSCALL int sys_open(const char *filename_0, int flags_0, int mode_0) { register uintptr_t filename asm("rdi") = (uintptr_t)filename_0; register uintptr_t flags asm("rsi") = (uintptr_t)flags_0; register uintptr_t mode asm("rdx") = (uintptr_t)mode_0; register intptr_t fd asm("rax"); asm volatile( "mov $2, %%eax\n\t" // SYS_OPEN "syscall" : "=rax"(fd) : "r"(filename), "r"(flags), "r"(mode) : "rcx", "r11"); return (int)fd; } Z_SYSCALL int sys_pipe(int *pipefd_0) { register uintptr_t pipefd asm("rdi") = (uintptr_t)pipefd_0; register intptr_t err asm("rax"); asm volatile( "mov $22, %%eax\n\t" // SYS_PIPE "syscall" : "=rax"(err) : "r"(pipefd) : "rcx", "r11"); return (int)err; } Z_SYSCALL int sys_close(int fd_0) { register uintptr_t fd asm("rdi") = (uintptr_t)fd_0; register intptr_t err asm("rax"); asm volatile( "mov $3, %%eax\n\t" // SYS_CLOSE "syscall" : "=rax"(err) : "r"(fd) : "rcx", "r11"); return (int)err; } Z_SYSCALL int sys_rt_sigaction(int sig_0, struct kernel_sigaction *act_0, struct kernel_sigaction *oact_0, size_t sigsetsize_0) { register uintptr_t sig asm("rdi") = (uintptr_t)sig_0; register uintptr_t act asm("rsi") = (uintptr_t)act_0; register uintptr_t oact asm("rdx") = (uintptr_t)oact_0; register uintptr_t sigsetsize asm("r10") = (uintptr_t)sigsetsize_0; register intptr_t err asm("rax"); asm volatile( "mov $13, %%eax\n\t" // SYS_RT_SIGACTION "syscall" : "=rax"(err) : "r"(sig), "r"(act), "r"(oact), "r"(sigsetsize) : "rcx", "r11"); return (int)err; } Z_SYSCALL int sys_connect(int fd_0, struct sockaddr *addr_0, int addrlen_0) { register uintptr_t fd asm("rdi") = (uintptr_t)fd_0; register uintptr_t addr asm("rsi") = (uintptr_t)addr_0; register uintptr_t addrlen asm("rdx") = (uintptr_t)addrlen_0; register intptr_t err asm("rax"); asm volatile( "mov $42, %%eax\n\t" // SYS_CONNECT "syscall" : "=rax"(err) : "r"(fd), "r"(addr), "r"(addrlen) : "rcx", "r11"); return (int)err; } Z_SYSCALL int sys_socket(int family_0, int type_0, int protocol_0) { register uintptr_t family asm("rdi") = (uintptr_t)family_0; register uintptr_t type asm("rsi") = (uintptr_t)type_0; register uintptr_t protocol asm("rdx") = (uintptr_t)protocol_0; register intptr_t err asm("rax"); asm volatile( "mov $41, %%eax\n\t" // SYS_SOCKET "syscall" : "=rax"(err) : "r"(family), "r"(type), "r"(protocol) : "rcx", "r11"); return (int)err; } Z_SYSCALL int sys_write(int fd_0, const char *buf_0, size_t len_0) { register uintptr_t fd asm("rdi") = (uintptr_t)fd_0; register uintptr_t buf asm("rsi") = (uintptr_t)buf_0; register uintptr_t len asm("rdx") = (uintptr_t)len_0; register intptr_t err asm("rax"); asm volatile( "mov $1, %%eax\n\t" // SYS_WRITE "syscall" : "=rax"(err) : "r"(fd), "r"(buf), "r"(len) : "rcx", "r11"); return (int)err; } Z_SYSCALL int sys_sigaltstack(stack_t *uss_0, stack_t *uoss_0) { register uintptr_t uss asm("rdi") = (uintptr_t)uss_0; register uintptr_t uoss asm("rsi") = (uintptr_t)uoss_0; register intptr_t err asm("rax"); asm volatile( "mov $131, %%eax\n\t" // SYS_SIGALTSTACK "syscall" : "=rax"(err) : "r"(uss), "r"(uoss) : "rcx", "r11"); return (int)err; } Z_SYSCALL int sys_dup2(int oldfd_0, int newfd_0) { register uintptr_t oldfd asm("rdi") = (uintptr_t)oldfd_0; register uintptr_t newfd asm("rsi") = (uintptr_t)newfd_0; register intptr_t err asm("rax"); asm volatile( "mov $33, %%eax\n\t" // SYS_DUP2 "syscall" : "=rax"(err) : "r"(oldfd), "r"(newfd) : "rcx", "r11"); return (int)err; } Z_SYSCALL int sys_fstat(unsigned int fd_0, struct stat *buf_0) { register uintptr_t fd asm("rdi") = (uintptr_t)fd_0; register uintptr_t buf asm("rsi") = (uintptr_t)buf_0; register intptr_t err asm("rax"); asm volatile( "mov $5, %%eax\n\t" // SYS_FSTAT "syscall" : "=rax"(err) : "r"(fd), "r"(buf) : "rcx", "r11"); return (int)err; } Z_SYSCALL int sys_exit(int error_code_0) { register uintptr_t error_code asm("rdi") = (uintptr_t)error_code_0; register intptr_t err asm("rax"); asm volatile( "mov $60, %%eax\n\t" // SYS_EXIT "syscall" : "=rax"(err) : "r"(error_code) : "rcx", "r11"); return (int)err; } Z_SYSCALL int sys_kill(pid_t pid_0, int sig_0) { register uintptr_t pid asm("rdi") = (uintptr_t)pid_0; register uintptr_t sig asm("rsi") = (uintptr_t)sig_0; register intptr_t err asm("rax"); asm volatile( "mov $62, %%eax\n\t" // SYS_KILL "syscall" : "=rax"(err) : "r"(pid), "r"(sig) : "rcx", "r11"); return (int)err; } Z_SYSCALL int sys_msync(unsigned long start_0, size_t len_0, int flags_0) { register uintptr_t start asm("rdi") = (uintptr_t)start_0; register uintptr_t len asm("rsi") = (uintptr_t)len_0; register uintptr_t flags asm("rdx") = (uintptr_t)flags_0; register intptr_t err asm("rax"); asm volatile( "mov $26, %%eax\n\t" // SYS_MSYNC "syscall" : "=rax"(err) : "r"(start), "r"(len), "r"(flags) : "rcx", "r11"); return (int)err; } Z_SYSCALL int sys_read(int fd_0, const char *buf_0, size_t len_0) { register uintptr_t fd asm("rdi") = (uintptr_t)fd_0; register uintptr_t buf asm("rsi") = (uintptr_t)buf_0; register uintptr_t len asm("rdx") = (uintptr_t)len_0; register intptr_t err asm("rax"); asm volatile( "mov $0, %%eax\n\t" // SYS_READ "syscall" : "=rax"(err) : "r"(fd), "r"(buf), "r"(len) : "rcx", "r11"); return (int)err; } Z_SYSCALL pid_t sys_wait4(pid_t pid_0, int *wstatus_0, int options_0, struct rusage *rusage_0) { register uintptr_t pid asm("rdi") = (uintptr_t)pid_0; register uintptr_t wstatus asm("rsi") = (uintptr_t)wstatus_0; register uintptr_t options asm("rdx") = (uintptr_t)options_0; register uintptr_t rusage asm("r10") = (uintptr_t)rusage_0; register intptr_t err asm("rax"); asm volatile( "mov $61, %%eax\n\t" // SYS_WAIT4 "syscall" : "=rax"(err) : "r"(pid), "r"(wstatus), "r"(options), "r"(rusage) : "rcx", "r11"); return (pid_t)err; } Z_SYSCALL void *sys_shmat(int shmid_0, const void *shmaddr_0, int shmflg_0) { register uintptr_t shmid asm("rdi") = (uintptr_t)shmid_0; register uintptr_t shmaddr asm("rsi") = (uintptr_t)shmaddr_0; register uintptr_t shmflg asm("rdx") = (uintptr_t)shmflg_0; register intptr_t err asm("rax"); asm volatile( "mov $30, %%eax\n\t" // SYS_SHMAT "syscall" : "=rax"(err) : "r"(shmid), "r"(shmaddr), "r"(shmflg) : "rcx", "r11"); return (void *)err; } Z_SYSCALL pid_t sys_getpid() { register intptr_t err asm("rax"); asm volatile( "mov $39, %%eax\n\t" // SYS_GETPID "syscall" : "=rax"(err) : : "rcx", "r11"); return (pid_t)err; } Z_SYSCALL pid_t sys_fork() { register intptr_t err asm("rax"); asm volatile( "mov $57, %%eax\n\t" // SYS_FORK "syscall" : "=rax"(err) : : "rcx", "r11"); return (pid_t)err; } Z_SYSCALL pid_t sys_pause() { register intptr_t err asm("rax"); asm volatile( "mov $34, %%eax\n\t" // SYS_PAUSE "syscall" : "=rax"(err) : : "rcx", "r11"); return (pid_t)err; } Z_SYSCALL int sys_setpgid(pid_t pid_0, pid_t pgid_0) { register uintptr_t pid asm("rdi") = (uintptr_t)pid_0; register uintptr_t pgid asm("rsi") = (uintptr_t)pgid_0; register intptr_t err asm("rax"); asm volatile( "mov $109, %%eax\n\t" // SYS_SETPGID "syscall" : "=rax"(err) : "r"(pid), "r"(pgid) : "rcx", "r11"); return (int)err; } Z_SYSCALL int sys_munmap(unsigned long addr_0, size_t len_0) { register uintptr_t addr asm("rdi") = (uintptr_t)addr_0; register uintptr_t len asm("rsi") = (uintptr_t)len_0; register intptr_t err asm("rax"); asm volatile( "mov $11, %%eax\n\t" // SYS_MUNMAP "syscall" : "=rax"(err) : "r"(addr), "r"(len) : "rcx", "r11"); return (int)err; } ================================================ FILE: src/asm_utils.c ================================================ /* * asm_utils.h * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include #include "asm_syscall.c" #define Z_UTILS __attribute__((unused)) static inline #ifdef DEBUG #define utils_puts(s, b) __utils_puts(s, b) #define utils_error(s, e) __utils_error(s, e) #else #define utils_puts(s, b) #define utils_error(s, e) \ do { \ if (e) { \ sys_kill(/*pid=*/0, SIGKILL); \ asm volatile("ud2"); \ __builtin_unreachable(); \ } \ } while (0) #endif #define utils_likely(x) __builtin_expect(!!(x), 1) #define utils_unlikely(x) __builtin_expect(!!(x), 0) Z_UTILS void __utils_puts(const char *s, bool newline) { const char *buf = s; const char *cur = s; for (; *cur != '\0'; cur++) ; sys_write(STDERR_FILENO, buf, cur - buf); if (newline) { const char newline = '\n'; sys_write(STDERR_FILENO, &newline, 1); } } Z_UTILS void utils_num2hexstr(char *s, uint64_t n) { uint64_t r = 0x1000000000000000; while (r != 0) { char c = n / r; if (c < 10) { *(s++) = '0' + c; } else { *(s++) = 'a' + c - 10; } n %= r; r /= 0x10; } } Z_UTILS unsigned long utils_hexstr2num(const char **str_ptr) { const char *str = *str_ptr; unsigned long x = 0; while (true) { char c = *str++; if (c >= '0' && c <= '9') { x <<= 4; x |= (unsigned long)(c - '0'); } else if (c >= 'a' && c <= 'f') { x <<= 4; x |= (unsigned long)(10 + c - 'a'); } else { *str_ptr = str; return x; } } } Z_UTILS void utils_output_number(uint64_t n) { char *s = (char *)(RW_PAGE_ADDR + RW_PAGE_USED_SIZE + 0x50); *(s + 16) = '\x00'; utils_num2hexstr(s, n); utils_puts(s, true); } Z_UTILS void __utils_error(const char *err_str, bool need_exit) { utils_puts(err_str, true); if (need_exit) { asm volatile("int3"); __builtin_unreachable(); } } Z_UTILS size_t utils_strcpy(char *dst, char *src) { for (size_t i = 0;; i++) { dst[i] = src[i]; if (!src[i]) { return i; } } } /* * Load external file. */ Z_UTILS size_t utils_mmap_external_file(const char *filename, bool remmap, unsigned long addr, int prot) { // Step (0): prepare error string #ifdef DEBUG char s_[16]; s_[0] = 'm'; s_[1] = 'm'; s_[2] = 'a'; s_[3] = 'p'; s_[4] = ' '; s_[5] = 'f'; s_[6] = 'a'; s_[7] = 'i'; s_[8] = 'l'; s_[9] = 'e'; s_[10] = 'd'; s_[11] = '\n'; s_[12] = '\x00'; s_[13] = ' '; s_[14] = '\x00'; #endif // Step (1): open file int fd = sys_open(filename, (prot & PROT_WRITE) ? O_RDWR : O_RDONLY, 0); if (fd < 0) { utils_puts(filename, false); utils_puts(s_ + 13, false); utils_error(s_, true); } // Step (2): get file size struct stat buf = {}; if (sys_fstat(fd, &buf)) { utils_error(s_, true); } size_t fd_size = buf.st_size; if (fd_size != (fd_size >> PAGE_SIZE_POW2) << PAGE_SIZE_POW2) { char s[0x20] = ""; utils_num2hexstr(s, fd_size); utils_puts(s, false); utils_error(s_, true); } // Step (3). remmap if needed if (remmap) { if (sys_munmap(addr, fd_size)) { utils_error(s_, true); } } // Step (4): mmap file #ifdef BINARY_SEARCH_INVALID_CRASH // make gdb able to set breakpoints at mmapped pages if (sys_mmap(addr, fd_size, prot, MAP_PRIVATE | MAP_FIXED, fd, 0) != addr) { #else if (sys_mmap(addr, fd_size, prot, MAP_SHARED | MAP_FIXED, fd, 0) != addr) { #endif utils_error(s_, true); } if (sys_close(fd)) { utils_error(s_, true); } return fd_size; } ================================================ FILE: src/binary.c ================================================ /* * binary.c * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include "binary.h" #include "elf_.h" #include "fork_server.h" #include "interval_splay.h" #include "loader.h" #include "utils.h" #include "x64_utils.c" #include "fork_server_bin.c" #include "loader_bin.c" #define BINARY_MMAP_EXIST(b, addr) \ (g_hash_table_lookup(b->mmapped_pages, GSIZE_TO_POINTER(addr)) == NULL) \ ? false \ : true #define BINARY_MMAP_INSERT(b, addr) \ g_hash_table_insert(b->mmapped_pages, GSIZE_TO_POINTER(addr), \ GINT_TO_POINTER(1)) static const char null_buf[0x30] = {0}; /* * Align trampolines_addr */ Z_PRIVATE void __binary_align_trampolines_addr(Binary *b); /* * Setup basic information for loader */ Z_PRIVATE void __binary_setup_loader(Binary *b); /* * Setup lookup table */ Z_PRIVATE void __binary_setup_lookup_table(Binary *b); /* * Setup retaddr mapping */ Z_PRIVATE void __binary_setup_retaddr_mapping(Binary *b); /* * Setup fork server */ Z_PRIVATE void __binary_setup_fork_server(Binary *b); /* * Setup trampoline zone */ Z_PRIVATE void __binary_setup_tp_zone(Binary *b); /* * Setter and Getter */ DEFINE_GETTER(Binary, binary, ELF *, elf); DEFINE_GETTER(Binary, binary, const char *, original_filename); DEFINE_GETTER(Binary, binary, addr_t, trampolines_addr); DEFINE_GETTER(Binary, binary, addr_t, shadow_main); OVERLOAD_GETTER(Binary, binary, addr_t, shadow_code_addr) { return binary->trampolines_addr; } OVERLOAD_SETTER(Binary, binary, addr_t, shadow_start) { z_info("shadow _start address: %#lx", shadow_start); binary->shadow_start = shadow_start; if (binary->prior_fork_server) { // when -e option is given, we need to change the fork server to _start addr_t gadget_addr = binary->fork_server_addr + fork_server_bin_len; KS_ASM_JMP(gadget_addr, shadow_start); z_elf_write(binary->elf, gadget_addr, ks_size, ks_encode); } else { addr_t gadget_addr = binary->loader_addr + loader_bin_len; KS_ASM_JMP(gadget_addr, shadow_start); z_elf_write(binary->elf, gadget_addr, ks_size, ks_encode); } } OVERLOAD_SETTER(Binary, binary, addr_t, shadow_main) { if (binary->prior_fork_server) { EXITME("main function has not been detected"); } z_info("shadow main address: %#lx", shadow_main); binary->shadow_main = shadow_main; addr_t gadget_addr = binary->fork_server_addr + fork_server_bin_len; KS_ASM_JMP(gadget_addr, shadow_main); z_elf_write(binary->elf, gadget_addr, ks_size, ks_encode); } OVERLOAD_SETTER(Binary, binary, ELFState, elf_state) { z_elf_set_state(binary->elf, elf_state); } Z_PRIVATE void __binary_align_trampolines_addr(Binary *b) { b->trampolines_addr = BITS_ALIGN_CELL(b->trampolines_addr, 3); } Z_PRIVATE void __binary_setup_loader(Binary *b) { // step (0). create basic data struction b->mmapped_pages = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL); // current address for setting instruction addr_t loader_base = z_elf_get_loader_addr(b->elf); b->loader_addr = loader_base; addr_t cur_addr = loader_base; // step (1). set entrypoint to loader address z_elf_get_ehdr(b->elf)->e_entry = cur_addr; // step (2). set down loader z_elf_write(b->elf, cur_addr, loader_bin_len, loader_bin); cur_addr += loader_bin_len; // step (3). jump to original entrypoint addr_t loader_transfer_jmp_addr = cur_addr; KS_ASM_JMP(cur_addr, z_elf_get_ori_entry(b->elf)); assert(ks_size == 5); z_elf_write(b->elf, cur_addr, ks_size, ks_encode); cur_addr += ks_size; // step (4). 8-byte alignment for following data cur_addr = BITS_ALIGN_CELL(cur_addr, 3); // step (5). set down loader_base z_elf_write(b->elf, cur_addr, sizeof(addr_t), &loader_base); cur_addr += sizeof(addr_t); // step (6). set down tp_addr // XXX: we will first set a NULL trampoline at trapoline zone addr_t trampolines_addr = z_elf_get_trampolines_addr(b->elf); assert(trampolines_addr % PAGE_SIZE == 0); z_elf_write(b->elf, cur_addr, sizeof(addr_t), &(trampolines_addr)); cur_addr += sizeof(addr_t); // step (7). set down shared .text base address addr_t shared_text_addr = z_elf_get_shared_text_addr(b->elf); assert(shared_text_addr % PAGE_SIZE == 0); z_elf_write(b->elf, cur_addr, sizeof(addr_t), &(shared_text_addr)); cur_addr += sizeof(addr_t); // step (8). store trampolines name const char *trampolines_name = z_elf_get_trampolines_name(b->elf); z_elf_write(b->elf, cur_addr, z_strlen(trampolines_name) + 1, trampolines_name); cur_addr += z_strlen(trampolines_name) + 1; // step (9). store lookup table name const char *lookup_tabname = z_elf_get_lookup_tabname(b->elf); z_elf_write(b->elf, cur_addr, z_strlen(lookup_tabname) + 1, lookup_tabname); cur_addr += z_strlen(lookup_tabname) + 1; // step (10). store pipeline filename const char *pipe_filename = z_elf_get_pipe_filename(b->elf); z_elf_write(b->elf, cur_addr, z_strlen(pipe_filename) + 1, pipe_filename); cur_addr += z_strlen(pipe_filename) + 1; // step (11). store pipeline filename const char *shared_text_name = z_elf_get_shared_text_name(b->elf); z_elf_write(b->elf, cur_addr, z_strlen(shared_text_name) + 1, shared_text_name); cur_addr += z_strlen(shared_text_name) + 1; // step (12). store retaddr mapping filename const char *retaddr_mapping_name = z_elf_get_retaddr_mapping_name(b->elf); z_elf_write(b->elf, cur_addr, z_strlen(retaddr_mapping_name) + 1, retaddr_mapping_name); cur_addr += z_strlen(retaddr_mapping_name) + 1; // step (13). 16-byte alignment for fork server (avoid error in xmm) cur_addr = BITS_ALIGN_CELL(cur_addr, 4); // step (14). prepare the address of fork server b->fork_server_addr = cur_addr; z_info("fork server address: %#lx", b->fork_server_addr); if (b->prior_fork_server) { // over-write the loader_transfer_jmp_addr to the fork server KS_ASM_JMP(loader_transfer_jmp_addr, b->fork_server_addr); assert(ks_size == 5); z_elf_write(b->elf, loader_transfer_jmp_addr, ks_size, ks_encode); } else { // redirect __libc_start_main into fork server address addr_t load_main = z_elf_get_load_main(b->elf); if (z_elf_get_is_pie(b->elf)) { // size of "lea rdi, [rip + xxx]" is 7 KS_ASM(load_main, "lea rdi, [rip %+ld];", b->fork_server_addr - load_main - 7); } else { KS_ASM(load_main, "mov rdi, %#lx;", b->fork_server_addr); } assert(ks_size == 7); z_elf_write(b->elf, load_main, ks_size, ks_encode); } } Z_PRIVATE void __binary_setup_fork_server(Binary *b) { // step (0). create basic data structure addr_t cur_addr = b->fork_server_addr; // step (1). set down fork server uint8_t *fork_server_code = z_alloc(fork_server_bin_len, sizeof(uint8_t)); memcpy(fork_server_code, fork_server_bin, fork_server_bin_len); if (z_elf_is_statically_linked(b->elf) && b->prior_fork_server) { // XXX: it is import to skip the TLS initialization for // statically-linked binaries when instrument_early option is on. Note // that if instrument_early is not on, we do not need to wipe off such // instructions because TLS will be initialized before main. // XXX: there is a bug for keystone to assemble such code, so we have to // encode it manually. See: // https://github.com/keystone-engine/keystone/issues/296 /* * "mov DWORD PTR fs:0x2d0,eax;" * "mov DWORD PTR fs:0x2d4,eax;" */ uint8_t tls_init_code[] = {0x64, 0x89, 0x04, 0x25, 0xd0, 0x02, 0x00, 0x00, 0x64, 0x89, 0x04, 0x25, 0xd4, 0x02, 0x00, 0x00}; size_t tls_init_code_len = 16; // locate the code uint8_t *hole = memmem(fork_server_code, fork_server_bin_len, tls_init_code, tls_init_code_len); if (!hole) { EXITME("TLS initialization code not found"); } // wipe the code with nop memcpy(hole, z_x64_gen_nop(8), 8); memcpy(hole + 8, z_x64_gen_nop(8), 8); } z_elf_write(b->elf, cur_addr, fork_server_bin_len, fork_server_code); z_free(fork_server_code); cur_addr += fork_server_bin_len; // step (2). set jump gadget (default to original main/entrypoint) if (b->prior_fork_server) { addr_t entrypoint_addr = z_elf_get_ori_entry(b->elf); KS_ASM_JMP(cur_addr, entrypoint_addr); z_elf_write(b->elf, cur_addr, ks_size, ks_encode); cur_addr += 5; } else { addr_t main_addr = z_elf_get_main(b->elf); KS_ASM_JMP(cur_addr, main_addr); z_elf_write(b->elf, cur_addr, ks_size, ks_encode); cur_addr += 5; } // step (3). 8-byte alignment for following data cur_addr = BITS_ALIGN_CELL(cur_addr, 3); // step (4). write down whether -e option is enabled uint64_t ei_enabled = (uint64_t)b->prior_fork_server; z_elf_write(b->elf, cur_addr, sizeof(ei_enabled), &ei_enabled); cur_addr += sizeof(ei_enabled); // step (5). set random patch address // TODO: random patch is disable currently b->random_patch_addr = BITS_ALIGN_CELL(cur_addr, 3); b->random_patch_num = 0; z_info("random patch address: %#lx", b->random_patch_addr); } Z_PRIVATE void __binary_setup_lookup_table(Binary *b) { b->lookup_table_addr = z_elf_get_lookup_table_addr(b->elf); } Z_PRIVATE void __binary_setup_retaddr_mapping(Binary *b) { // XXX: the memory layout of retaddr mapping" // 0 - 7 : number of entities // 8 - 15: address of real_unw_step // 16 - ??: mapping entities b->retaddr_mapping_addr = z_elf_get_retaddr_mapping_addr(b->elf); b->retaddr_entity_addr = b->retaddr_mapping_addr + 0x10; b->retaddr_n = 0; // XXX: we first set the number of entities as -1 to indicate this space is // useless int64_t n = -1; z_elf_write(b->elf, b->retaddr_mapping_addr, sizeof(int64_t), &n); } Z_PRIVATE void __binary_setup_tp_zone(Binary *b) { b->trampolines_addr = z_elf_get_trampolines_addr(b->elf); b->last_tp_addr = b->trampolines_addr; // insert a NULL Trampoline to indicate terminal assert(sizeof(Trampoline) <= sizeof(null_buf)); z_elf_write(b->elf, b->trampolines_addr, sizeof(Trampoline), (void *)null_buf); b->trampolines_addr += sizeof(Trampoline); } Z_API Binary *z_binary_open(const char *pathname, bool prior_fork_server) { // step (0). create a binary struct. Binary *b = STRUCT_ALLOC(Binary); b->original_filename = z_strdup(pathname); b->shadow_main = INVALID_ADDR; b->shadow_start = INVALID_ADDR; b->prior_fork_server = prior_fork_server; // step (1). setup elf b->elf = z_elf_open(b->original_filename, !prior_fork_server); // step (2). setup loader __binary_setup_loader(b); // step (3). setup lookup table __binary_setup_lookup_table(b); // step (4). setup fork server __binary_setup_fork_server(b); // step (5). setup trampoline zone __binary_setup_tp_zone(b); // step (6). setup retaddr mapping __binary_setup_retaddr_mapping(b); return b; } Z_API void z_binary_destroy(Binary *b) { z_elf_destroy(b->elf); z_free((char *)b->original_filename); g_hash_table_destroy(b->mmapped_pages); z_free(b); } Z_API void z_binary_fsync(Binary *b) { // sync ELF z_elf_fsync(b->elf); } Z_API void z_binary_save(Binary *b, const char *pathname) { // save ELF z_elf_save(b->elf, pathname); } Z_API void z_binary_create_snapshot(Binary *b, const char *pathname) { z_elf_create_snapshot(b->elf, pathname); } Z_API void z_binary_insert_utp(Binary *b, addr_t utp_addr, const uint8_t *utp, const size_t utp_size) { assert(b != NULL); if (utp_size > PAGE_SIZE) { EXITME("utp size is too large [%#lx]", utp_size); } Snode *snode = z_snode_create(utp_addr, utp_size, NULL, NULL); addr_t mmap_addr = 0; size_t mmap_size = 0; if (!z_elf_insert_utp(b->elf, snode, &mmap_addr, &mmap_size)) { EXITME("Insert utp into an overlapped region: %#lx", utp_addr); } z_trace("mmap address (%#lx) and size (%#lx)", mmap_addr, mmap_size); // update last tp addr_t next_tp_offset = b->trampolines_addr - b->last_tp_addr; z_elf_write(b->elf, b->last_tp_addr + offsetof(Trampoline, next_tp_offset), sizeof(size_t), &next_tp_offset); b->last_tp_addr = b->trampolines_addr; // emit this utp z_elf_write(b->elf, b->trampolines_addr + offsetof(Trampoline, mmap_addr), sizeof(void *), &mmap_addr); z_elf_write(b->elf, b->trampolines_addr + offsetof(Trampoline, mmap_size), sizeof(size_t), &mmap_size); z_elf_write(b->elf, b->trampolines_addr + offsetof(Trampoline, tp_addr), sizeof(void *), &utp_addr); z_elf_write(b->elf, b->trampolines_addr + offsetof(Trampoline, tp_size), sizeof(size_t), &utp_size); z_elf_write(b->elf, b->trampolines_addr + offsetof(Trampoline, next_tp_offset), sizeof(size_t), (char *)null_buf); b->trampolines_addr += sizeof(Trampoline); z_elf_write(b->elf, b->trampolines_addr, utp_size, utp); b->trampolines_addr += utp_size; __binary_align_trampolines_addr(b); } Z_API addr_t z_binary_insert_shadow_code(Binary *b, const uint8_t *sc, const size_t sc_size) { addr_t cur_shadow_addr = b->trampolines_addr; z_elf_write(b->elf, b->trampolines_addr, sc_size, sc); b->trampolines_addr += sc_size; return cur_shadow_addr; } Z_API void z_binary_update_lookup_table(Binary *b, addr_t ori_addr, addr_t shadow_addr) { Elf64_Shdr *text = z_elf_get_shdr_text(b->elf); addr_t text_addr = text->sh_addr; if (ori_addr < text_addr) EXITME("too small address (%#lx) compared to .text (%#lx)", ori_addr, text_addr); size_t cell_num = ori_addr - text_addr; if (cell_num > LOOKUP_TABLE_CELL_NUM) EXITME("too big address (%#lx) compared to .text (%#lx)", ori_addr, text_addr); addr_t cell_addr = b->lookup_table_addr + cell_num * LOOKUP_TABLE_CELL_SIZE; if (shadow_addr > LOOKUP_TABLE_CELL_MASK) EXITME("too big shadow address (%#lx)", shadow_addr); z_elf_write(b->elf, cell_addr, LOOKUP_TABLE_CELL_SIZE, (uint8_t *)(&shadow_addr)); } Z_API bool z_binary_check_state(Binary *b, ELFState state) { return z_elf_check_state(b->elf, state); } Z_API void z_binary_new_retaddr_entity(Binary *b, addr_t shadow_retaddr, addr_t ori_retaddr) { // update retaddr_n first b->retaddr_n += 1; z_elf_write(b->elf, b->retaddr_mapping_addr, sizeof(size_t), &(b->retaddr_n)); uint32_t addr_buf; // insert shadow_retaddr addr_buf = (uint32_t)shadow_retaddr; assert((addr_t)addr_buf == shadow_retaddr); z_elf_write(b->elf, b->retaddr_entity_addr, sizeof(uint32_t), &addr_buf); b->retaddr_entity_addr += sizeof(uint32_t); // insert ori_retaddr addr_buf = (uint32_t)ori_retaddr; assert((addr_t)addr_buf == ori_retaddr); z_elf_write(b->elf, b->retaddr_entity_addr, sizeof(uint32_t), &addr_buf); b->retaddr_entity_addr += sizeof(uint32_t); } ================================================ FILE: src/binary.h ================================================ /* * binary.h * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #ifndef __BINARY_H #define __BINARY_H #include "buffer.h" #include "config.h" #include "elf_.h" #include "interval_splay.h" #include typedef addr_t bb_t; /* * Binary: used to story patch meta-information. * * ELF virtual memory layout can be found in elf_.h */ STRUCT(Binary, { const char *original_filename; // Path of input file ELF *elf; // Basic ELF information addr_t shadow_main; // Address of shadow main function addr_t shadow_start; // Address of shadow _start function // Loader addr_t loader_addr; // Address of loader // Loader info for uTP (TramPolines for ucall/ujmp) // XXX: the mmapped_pages seems useless currently (delete it maybe?) GHashTable *mmapped_pages; // Hashset of mmapped pages // Fork server and random patcher addr_t fork_server_addr; // Address of fork server addr_t random_patch_addr; // Address of random patch table addr_t random_patch_num; // Number of random patch table bool prior_fork_server; // Whether we need to defer the fork server // Lookup table addr_t lookup_table_addr; // Address of lookup table // Retaddr mapping size_t retaddr_n; // Number of retaddr mapping entities addr_t retaddr_mapping_addr; // Address of the retaddr mapping addr_t retaddr_entity_addr; // Address of the next retaddr mapping entity // Shadow Code and Trampolines addr_t trampolines_addr; // Next avaiable address of trampolines addr_t last_tp_addr; }); DECLARE_GETTER(Binary, binary, ELF *, elf); DECLARE_GETTER(Binary, binary, const char *, original_filename); DECLARE_GETTER(Binary, binary, addr_t, trampolines_addr); DECLARE_GETTER(Binary, binary, addr_t, shadow_main); DECLARE_GETTER(Binary, binary, addr_t, shadow_code_addr); DECLARE_SETTER(Binary, binary, addr_t, shadow_main); DECLARE_SETTER(Binary, binary, addr_t, shadow_start); DECLARE_SETTER(Binary, binary, ELFState, elf_state); /* * Construct a binary for given file. */ Z_API Binary *z_binary_open(const char *in_filename, bool prior_fork_server); /* * Destructor of Binary */ Z_API void z_binary_destroy(Binary *b); /* * Save binary */ Z_API void z_binary_save(Binary *b, const char *pathname); /* * Create a snapshot for current Binary. * Differnt from z_binary_save, this Binary's main body (except loookup tabel * and shadow) will remain unchanged even future patches are applied. */ Z_API void z_binary_create_snapshot(Binary *b, const char *pathname); /* * Insert a new uTP */ // XXX: currently we do not use uTP in the actual rewriting, but it will be // extremely useful when we start to handle overlapped jmp bridges. Z_API void z_binary_insert_utp(Binary *b, addr_t utp_addr, const uint8_t *utp, const size_t utp_size); /* * Insert a new piece of shadow code, and return the address of the shadow code */ Z_API addr_t z_binary_insert_shadow_code(Binary *b, const uint8_t *sc, const size_t sc_size); /* * Notify binary that all shadow code has been inserted */ Z_API void z_binary_shadow_code_notify(Binary *b, addr_t shadow_main); /* * Add a look up cell */ Z_API void z_binary_update_lookup_table(Binary *b, addr_t ori_addr, addr_t shadow_addr); /* * Sync binary with underlying files */ Z_API void z_binary_fsync(Binary *b); /* * Wrapper for z_elf_check_state() */ Z_API bool z_binary_check_state(Binary *b, ELFState state); /* * Add a new retaddr entity */ Z_API void z_binary_new_retaddr_entity(Binary *b, addr_t shadow_retaddr, addr_t ori_retaddr); #endif ================================================ FILE: src/buffer.c ================================================ /* * buffer.c * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include "buffer.h" #include "utils.h" /* * Extend buffer's chunk so that it can contain at lease new_chunk_size bytes */ Z_PRIVATE void __buffer_extend(Buffer *buf, size_t new_chunk_size); /* * Create an empty buffer whose chunk can contain at lease size bytes */ Z_PRIVATE Buffer *__buffer_new(size_t size); Z_PRIVATE Buffer *__buffer_new(size_t size) { // Get chunk_size size_t chunk_size = 8; if (size >= 1) { chunk_size = size; chunk_size |= (chunk_size >> 1); chunk_size |= (chunk_size >> 2); chunk_size |= (chunk_size >> 4); chunk_size |= (chunk_size >> 8); chunk_size |= (chunk_size >> 16); chunk_size |= (chunk_size >> 32); chunk_size += 1; } assert(chunk_size > size); z_trace("get chunk_size (%#lx) for requested size (%#lx)", chunk_size, size); // Create a buffer Buffer *buf = STRUCT_ALLOC(Buffer); buf->raw_buf = (uint8_t *)z_alloc(chunk_size, sizeof(uint8_t)); buf->size = 0; buf->chunk_size = chunk_size; return buf; } Z_PRIVATE void __buffer_extend(Buffer *buf, size_t new_chunk_size) { assert(buf != NULL); z_trace("extend to %#lx bytes, original one is %#lx bytes", new_chunk_size, buf->chunk_size); while (new_chunk_size >= buf->chunk_size) { if (buf->chunk_size * 2 <= buf->chunk_size) { EXITME("too big chunk size (%#lx)", buf->chunk_size); } buf->raw_buf = z_realloc(buf->raw_buf, buf->chunk_size * 2); buf->chunk_size *= 2; } } /* * Setter and Getter */ DEFINE_GETTER(Buffer, buffer, size_t, size); DEFINE_GETTER(Buffer, buffer, uint8_t *, raw_buf); Z_API Buffer *z_buffer_create(const uint8_t *ptr, size_t size) { Buffer *buf = __buffer_new(size); if (ptr != NULL) { memcpy(buf->raw_buf, ptr, size); } else { if (size) { EXITME("try to create a buffer with NULL ptr and positive size"); } } buf->size = size; return buf; } Z_API void z_buffer_push(Buffer *buf, uint8_t ch) { assert(buf != NULL); __buffer_extend(buf, buf->size + 1); buf->raw_buf[buf->size] = ch; buf->size += 1; } Z_API void z_buffer_append(Buffer *dst, Buffer *src) { assert(dst != NULL && src != NULL); __buffer_extend(dst, dst->size + src->size); memcpy(dst->raw_buf + dst->size, src->raw_buf, src->size); dst->size += src->size; } Z_API void z_buffer_append_raw(Buffer *buf, const uint8_t *ptr, size_t size) { assert(buf != NULL); if (ptr != NULL) { __buffer_extend(buf, buf->size + size); memcpy(buf->raw_buf + buf->size, ptr, size); buf->size += size; } } Z_API Buffer *z_buffer_read_file(const char *pathname) { FILE *f = z_fopen(pathname, "rb"); // Get file size z_fseek(f, 0L, SEEK_END); size_t f_size = (size_t)z_ftell(f); // Create a buffer Buffer *buf = (Buffer *)__buffer_new(f_size); // Read file z_fseek(f, 0L, SEEK_SET); size_t r_size = z_fread(buf->raw_buf, sizeof(uint8_t), f_size, f); if (r_size < f_size) { EXITME("read %lu bytes from \"%s\", but %lu bytes expected", r_size, pathname, f_size); } buf->size = r_size; z_fclose(f); z_trace("successfully read %lu bytes from \"%s\"", f_size, pathname); return buf; } Z_API void z_buffer_write_file(Buffer *buf, const char *pathname) { assert(buf != NULL); FILE *f = z_fopen(pathname, "wb"); size_t size = z_fwrite(buf->raw_buf, sizeof(uint8_t), buf->size, f); if (size != buf->size) { EXITME( "fail when writing content to \"%s\", expect %ld bytes, but only " "%ld bytes", pathname, buf->size, size); } z_fclose(f); } Z_API Buffer *z_buffer_dup(Buffer *src) { assert(src != NULL); Buffer *dst = STRUCT_ALLOC(Buffer); dst->size = src->size; dst->chunk_size = src->chunk_size; dst->raw_buf = z_alloc(dst->chunk_size, sizeof(uint8_t)); memcpy(dst->raw_buf, src->raw_buf, dst->size); return dst; } Z_API void z_buffer_destroy(Buffer *buf) { assert(buf != NULL); // Free Buffer.buf memset(buf->raw_buf, 0, buf->size); z_free((void *)buf->raw_buf); // Free Buffer itself memset(buf, 0, sizeof(Buffer)); z_free((void *)buf); } Z_API uint8_t *z_buffer_seek(Buffer *buf, size_t offset, int whence) { assert(buf != NULL); if (offset >= buf->size) { z_warn("offset (%lu) is bigger than buffer size (%lu)", offset, buf->size); return NULL; } size_t st_offset; if (whence == SEEK_END) { // The last byte should be 0 from SEEK_END st_offset = buf->size - offset - 1; } else if (whence == SEEK_SET) { st_offset = offset; } else { z_warn("invalid whence (%d)", whence); return NULL; } return (buf->raw_buf + st_offset); } Z_API size_t z_buffer_tell(Buffer *buf, const uint8_t *ptr, int whence) { assert(buf != NULL); if (ptr < buf->raw_buf) { z_warn("ptr is smaller than buf->raw_buf"); return SIZE_MAX; } if (ptr - buf->raw_buf >= buf->size) { z_warn("ptr is bigger than buf->raw_buf + buf->size"); return SIZE_MAX; } if (whence == SEEK_END) { return (buf->size - (ptr - buf->raw_buf) - 1); } else if (whence == SEEK_SET) { return (ptr - buf->raw_buf); } else { z_warn("invalid whence (%d)", whence); return SIZE_MAX; } } Z_API void z_buffer_truncate(Buffer *buf, size_t index) { assert(buf != NULL); if (index >= buf->size) { z_trace("index is out of buffer (%lu >= %lu)", index, buf->size); return; } buf->size = index; } Z_API void z_buffer_fill(Buffer *buf, uint8_t ch, size_t size) { assert(buf != NULL); if (size < buf->size) { z_warn( "buffer's original size (%lu) is bigger than requested size (%lu)", buf->size, size); return; } __buffer_extend(buf, size); if (ch != 0) { // Little optimization memset(buf->raw_buf + buf->size, ch, size - buf->size); } buf->size = size; } ================================================ FILE: src/buffer.h ================================================ /* * buffer.h * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #ifndef __BUFFER_H #define __BUFFER_H #include "config.h" /* * Buffer structure for all heap-allocated data */ STRUCT(Buffer, { uint8_t *raw_buf; size_t size; size_t chunk_size; }); /* * Setter and Getter */ DECLARE_GETTER(Buffer, buffer, size_t, size); DECLARE_GETTER(Buffer, buffer, uint8_t *, raw_buf); /* * Create a buffer from a raw pointer. * If ptr == NULL and size == 0, return an empty buffer */ Z_API Buffer *z_buffer_create(const uint8_t *ptr, size_t size); /* * Push a ch into buffer */ Z_API void z_buffer_push(Buffer *buf, uint8_t ch); /* * Append src buffer into the end of buffer dst */ Z_API void z_buffer_append(Buffer *dst, Buffer *src); /* * Append raw pointer into the end of buffer */ Z_API void z_buffer_append_raw(Buffer *buf, const uint8_t *ptr, size_t size); /* * Fill buffer with ch to size bytes */ Z_API void z_buffer_fill(Buffer *buf, uint8_t ch, size_t size); /* * Create a buffer and read content from pathname */ Z_API Buffer *z_buffer_read_file(const char *pathname); /* * Create a file and write content to pathname */ Z_API void z_buffer_write_file(Buffer *buf, const char *pathname); /* * Duplicate a buffer */ Z_API Buffer *z_buffer_dup(Buffer *src); /* * Destructor of Buffer */ Z_API void z_buffer_destroy(Buffer *buf); /* * Seek an offset, return a pointer to that offset. * Return NULL if the offset is invalid. */ Z_API uint8_t *z_buffer_seek(Buffer *buf, size_t offset, int whence); /* * Tell an pointer, return the pointer's offset on the buffer. * Return MAX of size_t if the pointer is not on the buffer. */ Z_API size_t z_buffer_tell(Buffer *buf, const uint8_t *ptr, int whence); /* * Truncate all content after index (included). */ Z_API void z_buffer_truncate(Buffer *buf, size_t index); #endif ================================================ FILE: src/capstone_.c ================================================ /* * capstone_.c * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include "capstone_.h" #include "utils.h" #define __FLG_READ(F) static uint64_t __FLG_READ_##F = X86_EFLAGS_TEST_##F __FLG_READ(OF); __FLG_READ(SF); __FLG_READ(ZF); __FLG_READ(AF); __FLG_READ(CF); __FLG_READ(PF); #undef __FLG_READ #define __FLG_WRITE(F) \ static uint64_t __FLG_WRITE_##F = \ X86_EFLAGS_MODIFY_##F | X86_EFLAGS_RESET_##F | X86_EFLAGS_SET_##F | \ X86_EFLAGS_UNDEFINED_##F __FLG_WRITE(OF); __FLG_WRITE(SF); __FLG_WRITE(ZF); __FLG_WRITE(AF); __FLG_WRITE(CF); __FLG_WRITE(PF); #undef __FLA_WRITE /* * Mapping a eflag action into individual flag's read state */ Z_PRIVATE FLGState __capstone_mapping_flg_read(uint64_t flg_state); /* * Mapping a eflag action into individual flag's write state */ Z_PRIVATE FLGState __capstone_mapping_flg_write(uint64_t flg_state); /* * Mapping CAPSTONE general purpose register info into GPRState. Here we mapping * a sub-register into its complete form. * * More information please refer to * https://www.tortall.net/projects/yasm/manual/html/arch-x86-registers.html. */ Z_PRIVATE GPRState __capstone_mapping_pgr(x86_reg reg_id); /* * Filter CAPSTONE general purpose register, we only need 32-bit and 64-bit GPR * here */ Z_PRIVATE GPRState __capstone_filter_pgr(x86_reg reg_id); Z_PRIVATE FLGState __capstone_mapping_flg_write(uint64_t flg_state) { #define __FLG_MAPPING_WRITE(fs, F) \ do { \ if (flg_state & __FLG_WRITE_##F) { \ (fs) |= FLGSTATE_##F; \ } \ } while (0) FLGState fs = 0; __FLG_MAPPING_WRITE(fs, OF); __FLG_MAPPING_WRITE(fs, SF); __FLG_MAPPING_WRITE(fs, ZF); __FLG_MAPPING_WRITE(fs, AF); __FLG_MAPPING_WRITE(fs, CF); __FLG_MAPPING_WRITE(fs, PF); return fs; #undef __FLG_MAPPING_WRITE } Z_PRIVATE FLGState __capstone_mapping_flg_read(uint64_t flg_state) { #define __FLG_MAPPING_READ(fs, F) \ do { \ if (flg_state & __FLG_READ_##F) { \ (fs) |= FLGSTATE_##F; \ } \ } while (0) FLGState fs = 0; __FLG_MAPPING_READ(fs, OF); __FLG_MAPPING_READ(fs, SF); __FLG_MAPPING_READ(fs, ZF); __FLG_MAPPING_READ(fs, AF); __FLG_MAPPING_READ(fs, CF); __FLG_MAPPING_READ(fs, PF); return fs; #undef __FLG_MAPPING_READ } Z_PRIVATE GPRState __capstone_mapping_pgr(x86_reg reg_id) { #define __GPR_MAPPING_1(T) \ case X86_REG_##T##H: \ case X86_REG_##T##L: \ case X86_REG_##T##X: \ case X86_REG_E##T##X: \ case X86_REG_R##T##X: \ return GPRSTATE_R##T##X #define __GPR_MAPPING_2(T) \ case X86_REG_##T: \ case X86_REG_##T##L: \ case X86_REG_E##T: \ case X86_REG_R##T: \ return GPRSTATE_R##T #define __GPR_MAPPING_3(T) \ case X86_REG_##T##B: \ case X86_REG_##T##W: \ case X86_REG_##T##D: \ case X86_REG_##T: \ return GPRSTATE_##T switch (reg_id) { __GPR_MAPPING_1(A); __GPR_MAPPING_1(B); __GPR_MAPPING_1(C); __GPR_MAPPING_1(D); __GPR_MAPPING_2(DI); __GPR_MAPPING_2(SI); __GPR_MAPPING_2(BP); __GPR_MAPPING_3(R8); __GPR_MAPPING_3(R9); __GPR_MAPPING_3(R10); __GPR_MAPPING_3(R11); __GPR_MAPPING_3(R12); __GPR_MAPPING_3(R13); __GPR_MAPPING_3(R14); __GPR_MAPPING_3(R15); default: return 0; } #undef __GPR_MAPPING_1 #undef __GPR_MAPPING_2 #undef __GPR_MAPPING_3 } Z_PRIVATE GPRState __capstone_filter_pgr(x86_reg reg_id) { #define __GPR_FILTER_1(T) \ case X86_REG_E##T##X: \ case X86_REG_R##T##X: \ return GPRSTATE_R##T##X #define __GPR_FILTER_2(T) \ case X86_REG_E##T: \ case X86_REG_R##T: \ return GPRSTATE_R##T #define __GPR_FILTER_3(T) \ case X86_REG_##T##D: \ case X86_REG_##T: \ return GPRSTATE_##T switch (reg_id) { __GPR_FILTER_1(A); __GPR_FILTER_1(B); __GPR_FILTER_1(C); __GPR_FILTER_1(D); __GPR_FILTER_2(DI); __GPR_FILTER_2(SI); __GPR_FILTER_2(BP); __GPR_FILTER_3(R8); __GPR_FILTER_3(R9); __GPR_FILTER_3(R10); __GPR_FILTER_3(R11); __GPR_FILTER_3(R12); __GPR_FILTER_3(R13); __GPR_FILTER_3(R14); __GPR_FILTER_3(R15); default: return 0; } #undef __GPR_FILTER_1 #undef __GPR_FILTER_2 #undef __GPR_FILTER_3 } Z_API bool z_capstone_is_call(const cs_insn *inst) { return (inst->id == X86_INS_CALL) || (inst->id == X86_INS_LCALL); } Z_API bool z_capstone_is_jmp(const cs_insn *inst) { return (inst->id == X86_INS_JMP) || (inst->id == X86_INS_LJMP); } Z_API bool z_capstone_is_xbegin(const cs_insn *inst) { return inst->id == X86_INS_XBEGIN; } Z_API bool z_capstone_is_ret(const cs_insn *inst) { return inst->id == X86_INS_RET; } Z_API bool z_capstone_is_loop(const cs_insn *inst) { switch (inst->id) { case X86_INS_LOOP: case X86_INS_LOOPE: case X86_INS_LOOPNE: return true; default: return false; } } Z_API bool z_capstone_is_cjmp(const cs_insn *inst) { switch (inst->id) { case X86_INS_JAE: case X86_INS_JA: case X86_INS_JBE: case X86_INS_JB: case X86_INS_JCXZ: case X86_INS_JECXZ: case X86_INS_JE: case X86_INS_JGE: case X86_INS_JG: case X86_INS_JLE: case X86_INS_JL: case X86_INS_JNE: case X86_INS_JNO: case X86_INS_JNP: case X86_INS_JNS: case X86_INS_JO: case X86_INS_JP: case X86_INS_JRCXZ: case X86_INS_JS: return true; default: return false; } } Z_API bool z_capstone_is_terminator(const cs_insn *inst) { // TODO: better non-return analysis? (light-weight approach) if (z_capstone_is_jmp(inst)) return true; if (z_capstone_is_cjmp(inst)) return false; if (z_capstone_is_call(inst)) return false; if (z_capstone_is_loop(inst)) return false; if (z_capstone_is_xbegin(inst)) return false; // check HLT first if (inst->id == X86_INS_HLT) { return true; } cs_detail *detail = inst->detail; for (int32_t i = 0; i < detail->groups_count; i++) { switch (detail->groups[i]) { case X86_GRP_JUMP: case X86_GRP_CALL: case X86_GRP_BRANCH_RELATIVE: EXITME( "branch-relative instruction should be catched before " "[%#lx:\t%s %s]", inst->address, inst->mnemonic, inst->op_str); /* * instructions in RET and IRET group will change the control flow, * but most instructions (except HLT) in INT and PRIVILEGE groups * seem not. Please refer to * https://github.com/aquynh/capstone/blob/master/arch/X86/X86MappingInsn_reduce.inc * for more information */ case X86_GRP_RET: case X86_GRP_IRET: return true; case X86_GRP_INT: case X86_GRP_PRIVILEGE: default: continue; } } return false; } Z_API bool z_capstone_is_rare(const cs_insn *inst) { // we maintain a rare instruction list to benifit hint collection switch (inst->id) { case X86_INS_OUT: case X86_INS_OUTSB: case X86_INS_OUTSD: case X86_INS_OUTSW: case X86_INS_IN: case X86_INS_IRETD: case X86_INS_FLD: case X86_INS_ENTER: case X86_INS_XCHG: return true; default: return false; } } Z_API RegState *z_capstone_get_register_state(const cs_insn *inst) { RegState *rs = STRUCT_ALLOC(RegState); // step (1). get grp cs_regs regs_read, regs_write; uint8_t regs_read_count, regs_write_count; if (cs_regs_access(cs, inst, regs_read, ®s_read_count, regs_write, ®s_write_count)) { EXITME("fail on cs_regs_access"); } rs->gpr_read = rs->gpr_write = 0; rs->gpr_read_32_64 = rs->gpr_write_32_64 = 0; // step (1.1). read for (int i = 0; i < regs_read_count; i++) { rs->gpr_read |= __capstone_mapping_pgr(regs_read[i]); rs->gpr_read_32_64 |= __capstone_filter_pgr(regs_read[i]); } // step (1.2). write for (int i = 0; i < regs_write_count; i++) { rs->gpr_write |= __capstone_mapping_pgr(regs_write[i]); rs->gpr_write_32_64 |= __capstone_filter_pgr(regs_write[i]); } // step (2). get sse #define __SSE_MAPPING(T, N, reg, states) \ do { \ if ((reg) == X86_REG_##T##N) { \ (states) |= T##STATE_##T##N; \ } \ } while (0) #define __SSE_MAPPING_FOR_ALL(T, reg, states) \ do { \ __SSE_MAPPING(T, 0, reg, states); \ __SSE_MAPPING(T, 1, reg, states); \ __SSE_MAPPING(T, 2, reg, states); \ __SSE_MAPPING(T, 3, reg, states); \ __SSE_MAPPING(T, 4, reg, states); \ __SSE_MAPPING(T, 5, reg, states); \ __SSE_MAPPING(T, 6, reg, states); \ __SSE_MAPPING(T, 7, reg, states); \ __SSE_MAPPING(T, 8, reg, states); \ __SSE_MAPPING(T, 9, reg, states); \ __SSE_MAPPING(T, 10, reg, states); \ __SSE_MAPPING(T, 11, reg, states); \ __SSE_MAPPING(T, 12, reg, states); \ __SSE_MAPPING(T, 13, reg, states); \ __SSE_MAPPING(T, 14, reg, states); \ __SSE_MAPPING(T, 15, reg, states); \ } while (0) for (int i = 0; i < regs_read_count; i++) { __SSE_MAPPING_FOR_ALL(XMM, regs_read[i], rs->xmm_read); __SSE_MAPPING_FOR_ALL(YMM, regs_read[i], rs->ymm_read); __SSE_MAPPING_FOR_ALL(ZMM, regs_read[i], rs->zmm_read); } for (int i = 0; i < regs_write_count; i++) { __SSE_MAPPING_FOR_ALL(XMM, regs_write[i], rs->xmm_write); __SSE_MAPPING_FOR_ALL(YMM, regs_write[i], rs->ymm_write); __SSE_MAPPING_FOR_ALL(ZMM, regs_write[i], rs->zmm_write); } #undef __SSE_MAPPING #undef __SSE_MAPPING_FOR_ALL // step (3). get flg rs->flg_read = rs->flg_write = 0; // step (3.0). check FPU first for (int i = 0; i < inst->detail->groups_count; i++) { if (inst->detail->groups[i] == X86_GRP_FPU) { goto DONE; } } // step (3.1). get flg state rs->flg_read = __capstone_mapping_flg_read(inst->detail->x86.eflags); rs->flg_write = __capstone_mapping_flg_write(inst->detail->x86.eflags); /* * XXX: capstone: *sbb* and *adc* instructions do not have any TEST_CF bit. * Hence, we use a very conservative approach to get rs->flg_read. Please * refer to https://github.com/aquynh/capstone/issues/1696 for more * information. * * However, we do not remove previous rs->flg_read code. Maybe one day we * can patch capstone to enable a more powerful optimization. */ // TODO: prepare our own patch for capstone and keystone. for (int i = 0; i < regs_read_count; i++) { if (regs_read[i] == X86_REG_EFLAGS) { rs->flg_read = FLGSTATE_ALL; break; } } DONE: return rs; } // XXX: call qword byte [xxx] Z_API bool z_capstone_is_const_mem_ucall(const cs_insn *inst, addr_t *addr_ptr) { // first check that it is a jump instruction if (inst->id != X86_INS_CALL) { return false; } // then check that it only has one operand cs_detail *detail = inst->detail; if (detail->x86.op_count != 1) { return false; } // then check the operand is a qword memory cs_x86_op *op = &(detail->x86.operands[0]); if (op->type != X86_OP_MEM || op->mem.base != X86_REG_INVALID || op->mem.index != X86_REG_INVALID || op->size != 8) { return false; } // update addr_ptr if (addr_ptr) { *addr_ptr = op->mem.disp; } return true; } // XXX: call qword byte [rip+xxx] Z_API bool z_capstone_is_pc_related_ucall(const cs_insn *inst, addr_t *addr_ptr) { // first check that it is a jump instruction if (inst->id != X86_INS_CALL) { return false; } // then check that it only has one operand cs_detail *detail = inst->detail; if (detail->x86.op_count != 1) { return false; } // then check the operand is a qword memory cs_x86_op *op = &(detail->x86.operands[0]); if (op->type != X86_OP_MEM || op->mem.base != X86_REG_RIP || op->mem.index != X86_REG_INVALID || op->size != 8) { return false; } // update addr_ptr if (addr_ptr) { *addr_ptr = inst->address + inst->size + op->mem.disp; } return true; } // XXX: jmp qword byte [xxx] Z_API bool z_capstone_is_const_mem_ujmp(const cs_insn *inst, addr_t *addr_ptr) { // first check that it is a jump instruction if (inst->id != X86_INS_JMP) { return false; } // then check that it only has one operand cs_detail *detail = inst->detail; if (detail->x86.op_count != 1) { return false; } // then check the operand is a qword memory cs_x86_op *op = &(detail->x86.operands[0]); if (op->type != X86_OP_MEM || op->mem.base != X86_REG_INVALID || op->mem.index != X86_REG_INVALID || op->size != 8) { return false; } // update addr_ptr if (addr_ptr) { *addr_ptr = op->mem.disp; } return true; } // XXX: jmp qword byte [rip+xxx] Z_API bool z_capstone_is_pc_related_ujmp(const cs_insn *inst, addr_t *addr_ptr) { // first check that it is a jump instruction if (inst->id != X86_INS_JMP) { return false; } // then check that it only has one operand cs_detail *detail = inst->detail; if (detail->x86.op_count != 1) { return false; } // then check the operand is a qword memory cs_x86_op *op = &(detail->x86.operands[0]); if (op->type != X86_OP_MEM || op->mem.base != X86_REG_RIP || op->mem.index != X86_REG_INVALID || op->size != 8) { return false; } // update addr_ptr if (addr_ptr) { *addr_ptr = inst->address + inst->size + op->mem.disp; } return true; } Z_API void z_capstone_show_gpr_state(GPRState gpr_state) { z_info( "rax %d | rbx %d | rcx %d | rdx %d | rdi %d | rsi %d | rbp %d | r8 %d " "| r9 %d | r10 %d | r11 %d | r12 %d | r13 %d | r14 %d | r15 %d", (gpr_state >> 0) & 1UL, (gpr_state >> 1) & 1UL, (gpr_state >> 2) & 1UL, (gpr_state >> 3) & 1UL, (gpr_state >> 4) & 1UL, (gpr_state >> 5) & 1UL, (gpr_state >> 6) & 1UL, (gpr_state >> 7) & 1UL, (gpr_state >> 8) & 1UL, (gpr_state >> 9) & 1UL, (gpr_state >> 10) & 1UL, (gpr_state >> 11) & 1UL, (gpr_state >> 12) & 1UL, (gpr_state >> 13) & 1UL, (gpr_state >> 14) & 1UL); } Z_API void z_capstone_show_flg_state(FLGState flg_state) { z_info("OF %d | SF %d | ZF %d | AF %d | CF %d | PF %d", (flg_state >> 0) & 1UL, (flg_state >> 1) & 1UL, (flg_state >> 2) & 1UL, (flg_state >> 3) & 1UL, (flg_state >> 4) & 1UL, (flg_state >> 5) & 1UL); } ================================================ FILE: src/capstone_.h ================================================ /* * capstone_.h * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #ifndef __CAPSTONE__H #define __CAPSTONE__H /* * My own wrapper for capstone, which aims at extracting information from known * cs_insn structures. CS_DISASM is not included in this file because it is used * for disassembly instead of check cs_insn's features. */ #include "config.h" #include /* * General Purposed Register */ #define CAPSTONE_FORALL_GPR(STATEMENT) \ do { \ STATEMENT(RAX); \ STATEMENT(RBX); \ STATEMENT(RCX); \ STATEMENT(RDX); \ STATEMENT(RBP); \ STATEMENT(RDI); \ STATEMENT(RSI); \ STATEMENT(R8); \ STATEMENT(R9); \ STATEMENT(R10); \ STATEMENT(R11); \ STATEMENT(R12); \ STATEMENT(R13); \ STATEMENT(R14); \ STATEMENT(R15); \ } while (0) typedef enum gpr_state_t { GPRSTATE_RAX = (1UL << 0), GPRSTATE_RBX = (1UL << 1), GPRSTATE_RCX = (1UL << 2), GPRSTATE_RDX = (1UL << 3), GPRSTATE_RDI = (1UL << 4), GPRSTATE_RSI = (1UL << 5), GPRSTATE_RBP = (1UL << 6), // <-- NO RSP HERE GPRSTATE_R8 = (1UL << 7), GPRSTATE_R9 = (1UL << 8), GPRSTATE_R10 = (1UL << 9), GPRSTATE_R11 = (1UL << 10), GPRSTATE_R12 = (1UL << 11), GPRSTATE_R13 = (1UL << 12), GPRSTATE_R14 = (1UL << 13), GPRSTATE_R15 = (1UL << 14), GPRSTATE_ALL = ((1UL << 15) - 1), } GPRState; /* * EFLAGS Register */ #define CAPSTONE_FORALL_FLG(STATEMENT) \ do { \ STATEMENT(OF); \ STATEMENT(SF); \ STATEMENT(ZF); \ STATEMENT(AF); \ STATEMENT(CF); \ STATEMENT(PF); \ } while (0) typedef enum flg_state_t { FLGSTATE_OF = (1UL << 0), FLGSTATE_SF = (1UL << 1), FLGSTATE_ZF = (1UL << 2), FLGSTATE_AF = (1UL << 3), FLGSTATE_CF = (1UL << 4), FLGSTATE_PF = (1UL << 5), FLGSTATE_ALL = ((1UL << 6) - 1), } FLGState; /* * SSE Register */ #define CAPSTONE_FORALL_SSE(T, STATEMENT) \ do { \ STATEMENT(T, 0); \ STATEMENT(T, 1); \ STATEMENT(T, 2); \ STATEMENT(T, 3); \ STATEMENT(T, 4); \ STATEMENT(T, 5); \ STATEMENT(T, 6); \ STATEMENT(T, 7); \ STATEMENT(T, 8); \ STATEMENT(T, 9); \ STATEMENT(T, 10); \ STATEMENT(T, 11); \ STATEMENT(T, 12); \ STATEMENT(T, 13); \ STATEMENT(T, 14); \ STATEMENT(T, 15); \ } while (0) #define __SSE_DEFINE(T, N) T##STATE_##T##N = (1UL << N) #define __SSE_DEFINE_ALL(enum_name, T) \ typedef enum enum_name { \ __SSE_DEFINE(T, 0), \ __SSE_DEFINE(T, 1), \ __SSE_DEFINE(T, 2), \ __SSE_DEFINE(T, 3), \ __SSE_DEFINE(T, 4), \ __SSE_DEFINE(T, 5), \ __SSE_DEFINE(T, 6), \ __SSE_DEFINE(T, 7), \ __SSE_DEFINE(T, 8), \ __SSE_DEFINE(T, 9), \ __SSE_DEFINE(T, 10), \ __SSE_DEFINE(T, 11), \ __SSE_DEFINE(T, 12), \ __SSE_DEFINE(T, 13), \ __SSE_DEFINE(T, 14), \ __SSE_DEFINE(T, 15), \ \ T##STATE_ALL = ~(0UL), \ } T##State; __SSE_DEFINE_ALL(xmm_state_t, XMM); __SSE_DEFINE_ALL(ymm_state_t, YMM); __SSE_DEFINE_ALL(zmm_state_t, ZMM); #undef __SSE_DEFINE_ALL #undef __SSE_DEFINE STRUCT(RegState, { GPRState gpr_read; GPRState gpr_read_32_64; GPRState gpr_write; GPRState gpr_write_32_64; FLGState flg_read; FLGState flg_write; XMMState xmm_read; XMMState xmm_write; YMMState ymm_read; YMMState ymm_write; ZMMState zmm_read; ZMMState zmm_write; }); Z_API bool z_capstone_is_call(const cs_insn *inst); Z_API bool z_capstone_is_jmp(const cs_insn *inst); Z_API bool z_capstone_is_cjmp(const cs_insn *inst); Z_API bool z_capstone_is_loop(const cs_insn *inst); Z_API bool z_capstone_is_xbegin(const cs_insn *inst); Z_API bool z_capstone_is_ret(const cs_insn *inst); Z_API bool z_capstone_is_terminator(const cs_insn *inst); Z_API bool z_capstone_is_rare(const cs_insn *inst); Z_API bool z_capstone_is_pc_related_ujmp(const cs_insn *inst, addr_t *addr_ptr); Z_API bool z_capstone_is_const_mem_ujmp(const cs_insn *inst, addr_t *addr_ptr); Z_API bool z_capstone_is_pc_related_ucall(const cs_insn *inst, addr_t *addr_ptr); Z_API bool z_capstone_is_const_mem_ucall(const cs_insn *inst, addr_t *addr_ptr); Z_API RegState *z_capstone_get_register_state(const cs_insn *inst); Z_API void z_capstone_show_gpr_state(GPRState gpr_state); Z_API void z_capstone_show_flg_state(FLGState flg_state); #endif ================================================ FILE: src/config.h ================================================ /* * config.h * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #ifndef __CONFIG_H #define __CONFIG_H /* * Include basic headers */ #include #include #include #include #include #include #include #include #include #include #include /* * Tool name and version */ #define OURTOOL "stoch-fuzz" #define VERSION "1.1.0" /* * Magic string to indicate the file is patched */ #define MAGIC_STRING "2015.05.02 Shanghai Snow" #define MAGIC_NUMBER 0x527569787565 /* * Genral macro and types */ #define STRING(x) STRING_2(x) #define STRING_2(x) #x // error code for EXITME #define MY_ERR_CODE 233 #define Z_API __attribute__((visibility("hidden"))) #define Z_PRIVATE __attribute__((visibility("hidden"))) static inline #define Z_PUBLIC __attribute__((visibility("default"))) #define Z_RESERVED __attribute__((unused)) #define PAGE_SIZE_POW2 PAGE_SHIFT #define ADDR_MAX SIZE_MAX /* * Invalid information */ #define INVALID_ADDR ADDR_MAX #define INVALID_FD -1 #define INVALID_SHM_ID -1 #define INVALID_PID 0 /* * Re-define type */ typedef size_t addr_t; typedef void PhantomType; typedef long double double128_t; /* * XXX: * + SHADOW_CODE_ADDR: random address based on ASLR/PIE * + SIGNAL_STACK_ADDR: random address based on ASLR/PIE * + RETADDR_MAPPING_ADDR: random address based on ASLR/PIE * + LOOKUP_TABLE_ADDR: random address based on ASLR/PIE * + RW_PAGE_ADDR: fixed address * + AFL_MAP_ADDR: fixed address * + CRS_MAP_ADDR: fixed address * * Note that, RW_PAGE_ADDR can only be fixed because we need it to access the * programb base on the runtime. On the contrary, AFL_MAP_ADDR and CRS_MAP_ADDR * should have not been fixed, but they indeed are mainly due to the keystone * bug that is related to address redirecting. */ // XXX: see http://ref.x86asm.net/coder64.html for x64 encoding #define SHADOW_CODE_ADDR 0x1f1f8000 // XXX: SIGSTKSZ is now a run-time variable, which makes compilation of loader // and forkserver failed. // Check discussion below: // https://public-inbox.org/libc-alpha/87y2ew8i1w.fsf@igel.home/T/ // Some references: // https://codebrowser.dev/glibc/glibc/sysdeps/unix/sysv/linux/bits/sigstack.h.html#30 // https://codebrowser.dev/glibc/glibc/sysdeps/unix/sysv/linux/bits/sigstksz.h.html#28 #ifndef SIGNAL_STACK_SIZE #error "SIGNAL_STACK_SIZE should be determined before compilation" #endif // XXX: we pick a high address to avoid overflow with other important pages #define SIGNAL_STACK_ADDR (0x100000000 + SIGNAL_STACK_SIZE) #define RETADDR_MAPPING_ADDR (SIGNAL_STACK_ADDR + SIGNAL_STACK_SIZE) /* * [RW_PAGE_ADDR] The meta information needed during loading */ // XXX: members with addr_t are runtime address (especially for PIE) typedef struct __loading_info_t { addr_t program_base; uint64_t afl_prev_id; uint64_t client_pid; uint64_t prev_pc; char shadow_path[0x100]; uint64_t shadow_size; addr_t shadow_base; char lookup_tab_path[0x100]; uint64_t lookup_tab_size; addr_t lookup_tab_base; char pipe_path[0x100]; char shared_text_path[0x100]; uint64_t shared_text_size; addr_t shared_text_base; char retaddr_mapping_path[0x100]; uint64_t retaddr_mapping_size; addr_t retaddr_mapping_base; bool retaddr_mapping_used; bool daemon_attached; } __LoadingInfo; #define RW_PAGE_ADDR 0x300000 #define RW_PAGE_SIZE PAGE_SIZE #define RW_PAGE_USED_SIZE sizeof(__LoadingInfo) #define RW_PAGE_INFO_ADDR(f) (RW_PAGE_ADDR + offsetof(__LoadingInfo, f)) #define RW_PAGE_INFO(field) (((__LoadingInfo *)RW_PAGE_ADDR)->field) /* * Prefix and suffix for additional files */ #define TEMPFILE_NAME_PREFIX "." OURTOOL "." #define LOOKUP_TABNAME_PREFIX ".lookup." #define TRAMPOLINES_NAME_PREFIX ".shadow." #define SHARED_TEXT_PREFIX ".text." #define RETADDR_MAPPING_PREFIX ".ret." #define CRASHPOINT_LOG_PREFIX ".crashpoint." #define PIPE_FILENAME_PREFIX ".pipe." #define PDISASM_FILENAME_PREFIX ".pdisasm." #define METADATA_FILENAME_PREFIX ".meta." #define CODE_SEGMENT_FILE_SUFFIX ".code.segments" #define BACKUP_FILE_SUFFIX ".bak" #define PATCHED_FILE_SUFFIX ".patch" #define PHANTOM_FILE_SUFFIX ".phantom" /* * Lookup table */ extern void z_lookup_table_init_cell_num(uint64_t text_size); extern uint64_t z_lookup_table_get_cell_num(); #define LOOKUP_TABLE_INIT_CELL_NUM(x) z_lookup_table_init_cell_num(x) #define LOOKUP_TABLE_CELL_SIZE_POW2 2 #define LOOKUP_TABLE_CELL_SIZE (1 << LOOKUP_TABLE_CELL_SIZE_POW2) #define LOOKUP_TABLE_CELL_MASK ((1UL << (LOOKUP_TABLE_CELL_SIZE * 8)) - 1) #define LOOKUP_TABLE_CELL_NUM z_lookup_table_get_cell_num() #define LOOKUP_TABLE_SIZE (LOOKUP_TABLE_CELL_SIZE * LOOKUP_TABLE_CELL_NUM) #define LOOKUP_TABLE_MAX_CELL_NUM 0x8000000 #define LOOKUP_TABLE_MAX_SIZE \ (LOOKUP_TABLE_CELL_SIZE * LOOKUP_TABLE_MAX_CELL_NUM) // in case of conflicting with asan shadow memory #define LOOKUP_TABLE_ADDR ((0x7fff8000) - LOOKUP_TABLE_MAX_SIZE) /* * Crash check */ // For exit code usage, check https://tldp.org/LDP/abs/html/exitcodes.html for // more information. // It is lucky that we can play with the high 16 bits of status. #define __RS(s) ((s) & (0xffff)) // __REAL_STATUS #define __SF(s) (!!((s) & (0xffff0000))) // __SELF_FIRED #define PACK_STATUS(s, r) ((((r) << 16) & (0xffff0000)) | (((s) & (0xffff)))) #define IS_SUSPECT_STATUS(s) \ (__SF(s) && WIFSIGNALED(__RS(s)) && (WTERMSIG(__RS(s)) == SIGKILL)) #define IS_ABNORMAL_STATUS(s) \ (__SF(s) || (!WIFEXITED(__RS(s)) && WIFSIGNALED(__RS(s)))) // XXX: AFL uses SIGKILL to terminate a timeouted process (same as us) #define IS_TIMEOUT_STATUS(s) \ (!__SF(s) && WIFSIGNALED(__RS(s)) && (WTERMSIG(__RS(s)) == SIGKILL)) /* * Define struct with type info */ typedef struct meta_struct_t { const char *__type; } MetaStruct; #define STRUCT(name, content) \ typedef struct name##_t { \ const char *__type; \ struct content; \ } name #define STRUCT_REALNAME(type) struct type##_t #define STRUCT_TYPE(var) ((MetaStruct *)var)->__type #define STRUCT_ALLOC(type) \ ({ \ type *var = z_alloc(1, sizeof(type)); \ var->__type = #type; \ var; \ }) /* * Setter and Getter * OTYPE: type of object (e.g., Binary) * ONAME: name of object (e.g., binary) * FTYPE: type of filed (e.g., Elf_Info *) * FNAME: name of filed (e.g., elf) */ #define DECLARE_SETTER(OTYPE, ONAME, FTYPE, FNAME) \ Z_API void z_##ONAME##_##set_##FNAME(OTYPE *ONAME, FTYPE FNAME) #define DEFINE_SETTER(OTYPE, ONAME, FTYPE, FNAME) \ Z_API void z_##ONAME##_##set_##FNAME(OTYPE *ONAME, FTYPE FNAME) { \ assert(ONAME != NULL); \ ONAME->FNAME = FNAME; \ } #define DECLARE_GETTER(OTYPE, ONAME, FTYPE, FNAME) \ Z_API FTYPE z_##ONAME##_##get_##FNAME(OTYPE *ONAME) #define DEFINE_GETTER(OTYPE, ONAME, FTYPE, FNAME) \ Z_API FTYPE z_##ONAME##_##get_##FNAME(OTYPE *ONAME) { \ assert(ONAME != NULL); \ return ONAME->FNAME; \ } #define OVERLOAD_SETTER(OTYPE, ONAME, FTYPE, FNAME) \ Z_API void z_##ONAME##_##set_##FNAME(OTYPE *ONAME, FTYPE FNAME) #define OVERLOAD_GETTER(OTYPE, ONAME, FTYPE, FNAME) \ Z_API FTYPE z_##ONAME##_##get_##FNAME(OTYPE *ONAME) #endif ================================================ FILE: src/core.c ================================================ /* * core.c * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include "core.h" #include "crs_config.h" #include "elf_.h" #include "utils.h" #include #include #include #include #include #include #include /* * System clean up */ static Core *__core = NULL; // callback function for exit static void __core_atexit(void) { if (__core) { z_core_destroy(__core); } system("rm -f " TEMPFILE_NAME_PREFIX "*"); } // stop signal handling static void __core_handle_stop_sig(int _sig_id) { __core_atexit(); kill(getpid(), SIGKILL); } // timeout handling static void __core_handle_timeout(int _sig_id) { if (__core && __core->client_pid != INVALID_PID) { z_warn("client timeout"); kill(__core->client_pid, SIGKILL); } } // setup all signal handlers static void __core_setup_signal_handlers(void) { struct sigaction sa; sa.sa_handler = NULL; sa.sa_flags = SA_RESTART; sa.sa_sigaction = NULL; sigemptyset(&sa.sa_mask); /* Various ways of saying "stop". */ sa.sa_handler = __core_handle_stop_sig; sigaction(SIGHUP, &sa, NULL); sigaction(SIGINT, &sa, NULL); sigaction(SIGTERM, &sa, NULL); /* Exec timeout notifications. */ sa.sa_handler = __core_handle_timeout; sigaction(SIGALRM, &sa, NULL); } // avoid duplicate setting (in case there are two instances of core) static bool __core_signal_handled = false; // setup environment needed by core static void __core_environment_setup(void) { atexit(__core_atexit); if (!__core_signal_handled) { __core_setup_signal_handlers(); __core_signal_handled = true; } } // clean cached files static void __core_clean_cache(const char *pathname) { #define __RM_CACHE(prefix, binary) \ do { \ const char *filename = z_strcat(prefix, binary); \ if (!z_access(filename, F_OK)) { \ if (remove(filename)) { \ EXITME("failed to remove %s", filename); \ } \ } \ z_free((void *)filename); \ } while (0) __RM_CACHE(LOOKUP_TABNAME_PREFIX, pathname); __RM_CACHE(TRAMPOLINES_NAME_PREFIX, pathname); __RM_CACHE(SHARED_TEXT_PREFIX, pathname); __RM_CACHE(RETADDR_MAPPING_PREFIX, pathname); __RM_CACHE(CRASHPOINT_LOG_PREFIX, pathname); __RM_CACHE(PIPE_FILENAME_PREFIX, pathname); __RM_CACHE(PDISASM_FILENAME_PREFIX, pathname); __RM_CACHE(METADATA_FILENAME_PREFIX, pathname); #undef __RM_CACHE } // check whether the binary and cached files are valid, and update the meta file // if needed. static void __core_check_binary(const char *pathname, RewritingOptArgs *opts) { // step 1. check pathname z_info("patch binary file: \"%s\"", pathname); if (z_strchr(pathname, '/')) { // TODO: it is a ugly approach to check working directory, change it // when possible EXITME("please make sure " OURTOOL " running under the same directory with the target bianry (no " "slash symbol)."); } // step 2. collect metadate Buffer *binary_buf = z_buffer_read_file(pathname); GChecksum *checksum = g_checksum_new(G_CHECKSUM_MD5); g_checksum_update(checksum, z_buffer_get_raw_buf(binary_buf), z_buffer_get_size(binary_buf)); const char *checksum_str = g_checksum_get_string(checksum); z_info("MD5(%s) = %s", pathname, checksum_str); // step 3. check metadata if needed const char *metadata_filename = z_strcat(METADATA_FILENAME_PREFIX, pathname); if (!z_access(metadata_filename, F_OK)) { Buffer *metadata_buf = z_buffer_read_file(metadata_filename); size_t metadata_size = z_buffer_get_size(metadata_buf); const uint8_t *metadata = z_buffer_get_raw_buf(metadata_buf); if (metadata_size != sizeof(RewritingOptArgs) + z_strlen(checksum_str) + 1) { z_info("inconsistent size of cache metadata, remove cached files"); __core_clean_cache(pathname); } else if (memcmp(metadata, opts, sizeof(RewritingOptArgs))) { z_info("inconsistent rewriting options, remove cached files"); __core_clean_cache(pathname); } else if (z_strcmp((const char *)metadata + sizeof(RewritingOptArgs), checksum_str)) { z_info("inconsistent binaries, remove cached files"); __core_clean_cache(pathname); } z_buffer_destroy(metadata_buf); } // step 4. update medadata file { Buffer *metadata_buf = z_buffer_create(NULL, 0); z_buffer_append_raw(metadata_buf, (const uint8_t *)opts, sizeof(RewritingOptArgs)); z_buffer_append_raw(metadata_buf, (const uint8_t *)checksum_str, z_strlen(checksum_str)); z_buffer_push(metadata_buf, '\x00'); z_buffer_write_file(metadata_buf, metadata_filename); z_buffer_destroy(metadata_buf); } // step 5. free g_checksum_free(checksum); z_buffer_destroy(binary_buf); z_free((void *)metadata_filename); } /* * Functions and Macros copied and pasted from AFL source code */ #define __AFL_ROL64(_x, _r) \ ((((uint64_t)(_x)) << (_r)) | (((uint64_t)(_x)) >> (64 - (_r)))) Z_PRIVATE uint32_t __afl_hash32(const void *key, uint32_t len, uint32_t seed) { const uint64_t *data = (uint64_t *)key; uint64_t h1 = seed ^ len; len >>= 3; while (len--) { uint64_t k1 = *data++; k1 *= 0x87c37b91114253d5ULL; k1 = __AFL_ROL64(k1, 31); k1 *= 0x4cf5ad432745937fULL; h1 ^= k1; h1 = __AFL_ROL64(h1, 27); h1 = h1 * 5 + 0x52dce729; } h1 ^= h1 >> 33; h1 *= 0xff51afd7ed558ccdULL; h1 ^= h1 >> 33; h1 *= 0xc4ceb9fe1a85ec53ULL; h1 ^= h1 >> 33; return h1; } /* * Get the hash value of current afl bitmap */ Z_PRIVATE uint32_t __core_get_bitmap_hash(Core *core); /* * Set clock for client timeout */ Z_PRIVATE void __core_set_client_clock(Core *core, pid_t client_pid); /* * Cancel clock for client timeout */ Z_PRIVATE void __core_cancel_client_clock(Core *core, pid_t client_pid); /* * Setup shared memory of CRS */ Z_PRIVATE void __core_setup_shm(Core *core); /* * Setup shared memory of AFL */ Z_PRIVATE void __core_setup_afl_shm(Core *core, int afl_shm_id); /* * Clean up */ Z_PRIVATE void __core_clean_environment(Core *core); /* * Setup a unix domain socker for core */ Z_PRIVATE void __core_setup_unix_domain_socket(Core *core); /* * Prepare a target binary under the current working directory */ Z_PRIVATE const char *__core_prepare_binary_under_curdir(const char *pathname); Z_PRIVATE const char *__core_prepare_binary_under_curdir(const char *pathname) { // check whether pathname exists if (access(pathname, F_OK)) { EXITME("file not found: %s", pathname); } const char *last_slash = z_strrchr(pathname, '/'); if (!last_slash) { return pathname; } // check new_pathname is valid const char *new_pathname = last_slash + 1; if (!new_pathname[0]) { EXITME("please provide a file path instead of a directory one: %s", pathname); } // check whether new_pathname exists. if (!access(new_pathname, F_OK)) { // if so, check whether these two files are the same struct stat statbuf, new_statbuf; if (stat(pathname, &statbuf) || stat(new_pathname, &new_statbuf)) { EXITME("cannot stat %s or %s", pathname, new_pathname); } if (statbuf.st_ino == new_statbuf.st_ino) { // nice, these two files are identical, and we do not need to do // anything return new_pathname; } } // copy pathname to new_pathname Buffer *tmp_buf = z_buffer_read_file(pathname); z_buffer_write_file(tmp_buf, new_pathname); z_buffer_destroy(tmp_buf); // chmod if (z_chmod(new_pathname, 0755)) { EXITME("fail to chmod new binary: %s", new_pathname); } return new_pathname; } Z_PRIVATE uint32_t __core_get_bitmap_hash(Core *core) { if (!core->afl_trace_bits) { // checking runs are not enabled return 0; } else { return __afl_hash32(core->afl_trace_bits, AFL_MAP_SIZE, AFL_HASH_CONST); } } Z_PRIVATE void __core_set_client_clock(Core *core, pid_t client_pid) { core->client_pid = client_pid; core->it.it_value.tv_sec = (core->opts->timeout / 1000); core->it.it_value.tv_usec = (core->opts->timeout % 1000) * 1000; setitimer(ITIMER_REAL, &core->it, NULL); } Z_PRIVATE void __core_cancel_client_clock(Core *core, pid_t client_pid) { if (client_pid != core->client_pid) { EXITME("inconsistent client_pid"); } core->client_pid = INVALID_PID; core->it.it_value.tv_sec = 0; core->it.it_value.tv_usec = 0; setitimer(ITIMER_REAL, &core->it, NULL); } Z_PRIVATE void __core_setup_unix_domain_socket(Core *core) { if (core->sock_fd != INVALID_FD) { EXITME("multiple pipelines detected"); } // get pipe filename ELF *e = z_binary_get_elf(core->binary); const char *pipe_filename = z_elf_get_pipe_filename(e); // check filename length struct sockaddr_un server; if (z_strlen(pipe_filename) >= sizeof(server.sun_path)) { EXITME("pipe filename is too long: %s", pipe_filename); } // set socket core->sock_fd = socket(AF_UNIX, SOCK_STREAM, 0); if (core->sock_fd < 0) { EXITME("opening unix domain socket error"); } server.sun_family = AF_UNIX; strcpy(server.sun_path, pipe_filename); // bind socket if (bind(core->sock_fd, (struct sockaddr *)&server, sizeof(struct sockaddr_un))) { EXITME("binding stream socket error"); } } Z_PRIVATE void __core_setup_shm(Core *core) { // step (0). check shared memory is already setup if (core->shm_id != INVALID_SHM_ID) { EXITME("multiple CRS shared memory detected"); } // step (1). set shared memory id core->shm_id = shmget(IPC_PRIVATE, CRS_MAP_SIZE, IPC_CREAT | IPC_EXCL | 0600); if (core->shm_id < 0) { EXITME("failed: shmget()"); } // step (2). set shared memory address core->shm_addr = (addr_t)shmat(core->shm_id, NULL, 0); if (core->shm_addr == INVALID_ADDR) { EXITME("failed: shmat()"); } } Z_PRIVATE void __core_setup_afl_shm(Core *core, int afl_shm_id) { // initial checking if (core->opts->check_execs == 0) { EXITME("checking runs are disabled"); } if (!z_disassembler_fully_support_prob_disasm(core->disassembler)) { EXITME( "checking runs are disabled when pdisasm is not fully supported"); } if (afl_shm_id == INVALID_SHM_ID) { EXITME("invalid afl_shm_id"); } core->afl_trace_bits = shmat(afl_shm_id, NULL, 0); if (core->afl_trace_bits == (void *)-1) { EXITME("failed: shmat() for AFL"); } z_info("setup the shared memory of AFL at %p", core->afl_trace_bits); } Z_PRIVATE void __core_clean_environment(Core *core) { if (core->shm_id != INVALID_SHM_ID) { // XXX: remove lock to avoid dead lock CRS_INFO_BASE(core->shm_addr, lock) = 0; shmctl(core->shm_id, IPC_RMID, NULL); core->shm_id = INVALID_SHM_ID; core->shm_addr = INVALID_ADDR; } if (core->sock_fd != INVALID_FD) { close(core->sock_fd); core->sock_fd = INVALID_FD; } ELF *e = z_binary_get_elf(core->binary); const char *pipe_filename = z_elf_get_pipe_filename(e); if (!z_access(pipe_filename, F_OK)) { remove(pipe_filename); } } Z_PUBLIC int z_core_perform_dry_run(Core *core, int argc, const char **argv) { // update original file const char *filename = z_binary_get_original_filename(core->binary); assert(!z_strcmp(filename, argv[0])); // create phantom file, instead of removing the original file const char *patched_filename = z_strcat(filename, PATCHED_FILE_SUFFIX); z_binary_save(core->binary, patched_filename); z_info("start dry run: %s", patched_filename); // get .text information ELF *e = z_binary_get_elf(core->binary); Elf64_Shdr *text = z_elf_get_shdr_text(e); addr_t text_addr = text->sh_addr; size_t text_size = text->sh_size; // prepare a shaow argv_ with argv[0] replaced by patched_filename const char **argv_ = z_alloc(argc + 1, sizeof(const char *)); assert(!argv[argc]); // the last pointer should be NULL for (int i = 1; i <= argc; i++) { argv_[i] = argv[i]; } argv_[0] = patched_filename; #ifdef NDEBUG int dev_null_fd = open("/dev/null", O_RDWR); if (dev_null_fd < 0) { EXITME("unable to open /dev/null"); } #endif while (true) { // we have to build new pipe each round, to avoid multi-thread problems int st_pipe[2]; if (pipe(st_pipe) < 0) { EXITME("pipe() failed"); } z_core_detach(core); pid_t pid = fork(); if (pid == 0) { // isolate the process and configure standard descriptors (including // process group) if (setsid() < 0) { EXITME("setsid() failed"); } // child if (dup2(st_pipe[1], CRS_DATA_FD) < 0) { EXITME("dup2() failed"); } close(st_pipe[0]); close(st_pipe[1]); #ifdef NDEBUG dup2(dev_null_fd, 0); dup2(dev_null_fd, 1); dup2(dev_null_fd, 2); close(dev_null_fd); #endif // set LD_PRELOAD if needed if (core->opts->r.safe_ret && getenv("STOCHFUZZ_PRELOAD")) { setenv("LD_PRELOAD", getenv("STOCHFUZZ_PRELOAD"), 1); } // set other environments including ASAN (copied from AFL) /* This should improve performance a bit, since it stops the linker from doing extra work post-fork(). */ if (!getenv("LD_BIND_LAZY")) setenv("LD_BIND_NOW", "1", 0); /* Set sane defaults for ASAN if nothing else specified. */ setenv("ASAN_OPTIONS", "abort_on_error=1:" "detect_leaks=0:" "symbolize=0:" "allocator_may_return_null=1", 0); /* MSAN is tricky, because it doesn't support abort_on_error=1 at this point. So, we do this in a very hacky way. */ // note: #define MSAN_ERROR 86 (in AFL) setenv("MSAN_OPTIONS", "exit_code=86:" "symbolize=0:" "abort_on_error=1:" "allocator_may_return_null=1:" "msan_track_origins=0", 0); execv(argv_[0], (char **)argv_); exit(0); } else { // parent z_trace("start child process [%d]", pid); close(st_pipe[1]); int signal_fd = st_pipe[0]; // set clock __core_set_client_clock(core, pid); int status = 0; if (waitpid(pid, &status, 0) < 0) { EXITME("waitpid failed"); } // cancel clock __core_cancel_client_clock(core, pid); z_core_attach(core); addr_t crash_rip = CRS_INVALID_IP; // XXX: this read may fail when the status is not suspicious. if (read(signal_fd, (char *)(&crash_rip), 8) == 8) { // well received, we need to update status status = PACK_STATUS(status, 1); } else { // re-init crash_rip crash_rip = CRS_INVALID_IP; } close(st_pipe[0]); z_info("child process exit with %#lx", status); uint32_t cov = __core_get_bitmap_hash(core); CRSStatus crs_status = z_diagnoser_new_crashpoint( core->diagnoser, status, crash_rip, cov, false); if (crs_status == CRS_STATUS_CRASH || crs_status == CRS_STATUS_NORMAL) { z_free(argv_); z_free((char *)patched_filename); return status; } // TODO: try to fix this somehow (no idea how currently) if (crs_status == CRS_STATUS_DEBUG) { // XXX: note that alought it is high likely that the self // correction procedure works fine when the crash_rip is on // .text sectoin, it is still possible that ASLR can cause some // problems. // TODO: handle the *extremely* corner case. if (IS_SUSPECT_STATUS(status) && (crash_rip < text_addr || crash_rip >= text_addr + text_size)) { EXITME( "self correction procedure under dry run mode is " "problematic due to ASLR"); } } } } } Z_PUBLIC Core *z_core_create(const char *pathname, SysOptArgs *opts) { if (__core) { EXITME("there can only be one Core instance"); } pathname = __core_prepare_binary_under_curdir(pathname); __core_environment_setup(); __core_check_binary(pathname, &opts->r); Core *core = STRUCT_ALLOC(Core); core->opts = opts; core->binary = z_binary_open(pathname, core->opts->r.instrument_early); if (core->opts->r.safe_ret && !core->opts->r.instrument_early) { ELF *e = z_binary_get_elf(core->binary); if (z_elf_is_statically_linked(e)) { z_warn( "it is a statically-linked ELF file, make sure you DO NOT set " "LD_PRELOAD when running the phantom file."); } } core->disassembler = z_disassembler_create(core->binary, &core->opts->r); core->rewriter = z_rewriter_create(core->disassembler, &core->opts->r); core->patcher = z_patcher_create(core->disassembler, &core->opts->r); core->diagnoser = z_diagnoser_create(core->patcher, core->rewriter, core->disassembler, &core->opts->r); z_diagnoser_read_crashpoint_log(core->diagnoser); core->client_pid = INVALID_PID; core->it.it_interval.tv_sec = 0; core->it.it_interval.tv_usec = 0; core->it.it_value.tv_sec = 0; core->it.it_value.tv_usec = 0; core->shm_id = INVALID_SHM_ID; core->shm_addr = INVALID_ADDR; core->afl_trace_bits = NULL; core->sock_fd = INVALID_FD; __core = core; return core; } Z_PUBLIC void z_core_activate(Core *core) { z_patcher_initially_patch(core->patcher); z_rewriter_initially_rewrite(core->rewriter); // XXX: it seems not a good idea to do pre-disassembly (linear-disassembly) // due to the heavy overhead of forking a process // z_rewriter_heuristics_rewrite(core->rewriter); z_diagnoser_apply_logged_crashpoints(core->diagnoser); } Z_PUBLIC void z_core_destroy(Core *core) { if (!__core) { EXITME("detected an unrestrained core object"); } __core_clean_environment(core); z_diagnoser_write_crashpoint_log(core->diagnoser); z_diagnoser_destroy(core->diagnoser); z_patcher_destroy(core->patcher); z_rewriter_destroy(core->rewriter); z_disassembler_destroy(core->disassembler); z_binary_destroy(core->binary); z_free(core); __core = NULL; } Z_PUBLIC void z_core_detach(Core *core) { z_binary_set_elf_state(core->binary, ELFSTATE_DISABLE | ELFSTATE_CONNECTED); } Z_PUBLIC void z_core_attach(Core *core) { z_binary_set_elf_state(core->binary, ELFSTATE_CONNECTED); } Z_PUBLIC void z_core_start_daemon(Core *core, int notify_fd) { const char *filename = z_binary_get_original_filename(core->binary); // first dry run w/o any parameter to find some crashpoint during init // XXX: dry run must be performed before setting up shm // XXX: when -e option is given, we do not need to perform such dry runs if (!core->opts->r.instrument_early) { // before dry run, we first patch the main function as directly // returning. As such, we can try our best to avoid the error diagnosis // during dry run addr_t shadow_main_addr = z_binary_get_shadow_main(core->binary); uint8_t ret_byte = 0xc3; uint8_t ori_byte = 0; z_patcher_unsafe_patch(core->patcher, shadow_main_addr, 1, &ret_byte, &ori_byte); const char *argv[2] = {NULL, NULL}; argv[0] = filename; z_core_perform_dry_run(core, 1, argv); // repair the main z_patcher_unsafe_patch(core->patcher, shadow_main_addr, 1, &ori_byte, NULL); } // create phantom file, instead of removing the original file const char *phantom_filename = z_strcat(filename, PHANTOM_FILE_SUFFIX); z_binary_create_snapshot(core->binary, phantom_filename); z_info( "phantom file is create, please execute %s to communicate with the " "daemon", phantom_filename); z_free((char *)phantom_filename); __core_setup_shm(core); __core_setup_unix_domain_socket(core); /* * Main body to handle on-the-fly patch */ // step (0). listen on core->sock_fd if (listen(core->sock_fd, 1)) { EXITME("listen unix domain socket failed"); } // step (1). comm connection // step (1.0). notify if necessar if (notify_fd != INVALID_FD) { if (write(notify_fd, &core->sock_fd, 4) != 4) { EXITME("fail to notify parent process"); } close(notify_fd); notify_fd = INVALID_FD; } // step (1.1). wait connection int comm_fd = accept(core->sock_fd, NULL, NULL); z_info("daemon gets connection for comm"); // step (1.2). handshake: // * send out shm_id // * recv afl_attached // * recv afl_shm_id // * send core->opts->check_execs (useless when AFL is not attached) int afl_attached = 0; int afl_shm_id = INVALID_SHM_ID; // checking runs are enabled only if // * AFL is attached // * Prob Disassembly is fully supported // * core->opts->check_execs is not zero bool check_run_enabled = false; { assert(sizeof(core->shm_id) == 4); if (write(comm_fd, &core->shm_id, sizeof(core->shm_id)) != sizeof(core->shm_id)) { EXITME("fail to send shm_id"); } if (read(comm_fd, &afl_attached, 4) != 4) { EXITME("fail to recv afl_attached"); } // update checking run information based on whether AFL is attached check_run_enabled = !!(afl_attached && z_disassembler_fully_support_prob_disasm(core->disassembler) && core->opts->check_execs > 0); uint32_t check_execs = (check_run_enabled ? core->opts->check_execs : 0); if (read(comm_fd, &afl_shm_id, sizeof(afl_shm_id)) != sizeof(afl_shm_id)) { EXITME("fail to recv alf_shm_id"); } if (write(comm_fd, &check_execs, 4) != 4) { EXITME("fail to send check_execs"); } // simple validation if (afl_attached && afl_shm_id == INVALID_SHM_ID) { EXITME("AFL is attached but the daemon does not get AFL_SHM_ID"); } if (!afl_attached && afl_shm_id != INVALID_SHM_ID) { EXITME("AFL is notattached but the daemon gets AFL_SHM_ID"); } if (check_run_enabled && !afl_attached) { EXITME("checking runs are only enabled when AFL is attched"); } } // step (2). output basic information and setup AFL shared memory if (afl_attached) { z_info("AFL detected: %d", afl_attached); if (check_run_enabled) { // XXX: we only setup the shared memory for AFL when checking runs // are enabled // XXX: in other words, core->afl_trace_bits indicates whether the // checking runs are enabled or not __core_setup_afl_shm(core, afl_shm_id); } } else { z_info("no AFL attached: %d", afl_attached); } z_info("daemon handshake successes"); // step (3). communicate with the client // + if it is not a crash (normal exit), directly stop the daemon. note // that when AFL is attached, no any normal status can be recevied; // + if it is a real crash, the daemon sends CRS_STATUS_CRASH to notify // the client, and (a.) stop the daemon when AFL is not attached or // (b.) continue a new round when AFL is attached; // + if it is a patch crash, the daemon sends // CRS_STATUS_NOTHING/_REMMAP to guide the client do the on-the-fly // patch. while (true) { /* * step (3.1). recv program status from the client */ int status = 0; if (read(comm_fd, &status, 4) != 4) { EXITME("fail to recv status"); } if (WIFSIGNALED(status)) { z_info("get status code: %#x (signal: %d)", status, WTERMSIG(status)); } else if (WIFEXITED(status)) { z_info("get status code: %#x (exit: %d)", status, WEXITSTATUS(status)); } else { // I have been confused by the status handling for a long time at // the early time, so I comment it down here for convenience. // // XXX: theoretically, this branch happens only when // WTERMSIG(status) == 0x7f, which covers WIFSTOPPED(status) see: // // * WTERMSIG(status) = ((status) & 0x7f) // * WIFEXITED(status) = (WTERMSIG(status) == 0) // * WIFSIGNALED(status) = // (((signed char) (((status) & 0x7f) + 1) >> 1) > 0) // * WIFSTOPPED(status) = (((status) & 0xff) == 0x7f) // // It is very interesting to see how glibc construct such status: // // For WTERMSIG(status) and WIFEXITED(status): // * __W_EXITCODE(ret, sig) = ((ret) << 8 | (sig)) // For WIFSTOPPED(status): // * __W_STOPCODE(sig) = ((sig) << 8 | 0x7f) // z_info("get status code: %#x (stopped? signal: %d)", status, WSTOPSIG(status)); } /* * step (3.2). get crash rip and coverage */ addr_t crash_rip = CRS_INFO_BASE(core->shm_addr, crash_ip); CRS_INFO_BASE(core->shm_addr, crash_ip) = CRS_INVALID_IP; uint32_t cov = __core_get_bitmap_hash(core); /* * step (3.3). check returning status and get patch commands */ // XXX: we use int to guarantee a 4-byte integer int crs_status = z_diagnoser_new_crashpoint( core->diagnoser, status, crash_rip, cov, check_run_enabled); if (crs_status == CRS_STATUS_CRASH) { if (write(comm_fd, &crs_status, 4) != 4) { EXITME("fail to notify real crash"); } goto NOT_PATCHED_CRASH; } if (crs_status == CRS_STATUS_NORMAL) { if (check_run_enabled) { // notify the fork server about the result of checking runs if (write(comm_fd, &crs_status, 4) != 4) { EXITME("fail to notify real crash"); } } else if (afl_attached) { EXITME( "CRS_STATUS_NORMAL is invalid when afl is attached but " "checking runs are disabled"); } goto NOT_PATCHED_CRASH; } /* * step (3.4). sync binary */ // XXX: according to the following link, it seems the fsync is used to // sync changed pages from RAM to the file. It means, those changes made // by the daemon is already visible to the phantom file even without // fsync. Hence, to improve the performance when the underlying files // are relatively large, we disable the fsync. // // https://unix.stackexchange.com/questions/474946/are-sharing-a-memory-mapped-file-and-sharing-a-memory-region-implemented-based-o // // z_binary_fsync(core->binary); /* * step (3.5). send status */ if (write(comm_fd, &crs_status, 4) != 4) { EXITME("fail to send crs status"); } /* * step (3.6). continue on patching while checking timeout */ { // step (3.6.1). set clock pid_t client_pid = INVALID_PID; if (read(comm_fd, &client_pid, 4) != 4) { EXITME("fail to recv client_pid [befor execution]"); } __core_set_client_clock(core, client_pid); // step (3.6.2). cancel clock if (read(comm_fd, &client_pid, 4) != 4) { EXITME("fail to recv client_pid [after execution]"); } __core_cancel_client_clock(core, client_pid); } // step (3.6.3). continue continue; NOT_PATCHED_CRASH: if (!afl_attached) { goto DAEMON_STOP; } } DAEMON_STOP: __core_clean_environment(core); } ================================================ FILE: src/core.h ================================================ /* * core.h * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ /* * Backend of OURTOOL */ #ifndef __CORE_H #define __CORE_H #include "binary.h" #include "buffer.h" #include "config.h" #include "diagnoser.h" #include "disassembler.h" #include "patcher.h" #include "rewriter.h" #include "sys_optarg.h" #include #include /* * Core */ STRUCT(Core, { Binary *binary; Disassembler *disassembler; Patcher *patcher; Rewriter *rewriter; Diagnoser *diagnoser; // timeout info pid_t client_pid; struct itimerval it; // shared memory information int shm_id; addr_t shm_addr; // shared memory of AFL uint8_t *afl_trace_bits; // unix domain information int sock_fd; // system otpargs SysOptArgs *opts; }); /* * Dry run without starting any server */ Z_PUBLIC int z_core_perform_dry_run(Core *core, int argc, const char **argv); /* * Start a daemon server to automatically patch any running program * (note that only one connection at a time) */ Z_PUBLIC void z_core_start_daemon(Core *core, int notify_fd); /* * Create OURTOOL Core */ Z_PUBLIC Core *z_core_create(const char *pathname, SysOptArgs *opts); /* * Destroy OURTOOL Core */ Z_PUBLIC void z_core_destroy(Core *core); /* * Activate core analysis */ Z_PUBLIC void z_core_activate(Core *core); /* * Disattach core from underlaying executable */ Z_PUBLIC void z_core_detach(Core *core); /* * Attach core to attach to its underlaying executable */ Z_PUBLIC void z_core_attach(Core *core); #endif ================================================ FILE: src/crs_config.h ================================================ /* * crs_config.h * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ /* * CRS (Crash Site) configuration */ #ifndef __CRS_CONFIG_H #define __CRS_CONFIG_H #include "afl_config.h" typedef enum crs_status_t { CRS_STATUS_NOTHING, // nothing to do for fork server CRS_STATUS_REMMAP, // fork server needs to remmap shadow code CRS_STATUS_DEBUG, // the program are set into delta debugging mode CRS_STATUS_CRASH, // a crash in the subject program CRS_STATUS_NORMAL, // normal exit without crash } CRSStatus; /* * [CRS_INFO] The crash site information needed by self-patching */ typedef struct __crs_info_t { uint32_t lock; addr_t crash_ip; size_t self_fired; } __CRSInfo; #define CRS_MAP_SIZE_POW2 PAGE_SIZE_POW2 #define CRS_MAP_SIZE (1 << CRS_MAP_SIZE_POW2) #define CRS_MAP_ADDR (AFL_MAP_ADDR + AFL_MAP_SIZE) #define CRS_USED_SIZE sizeof(__CRSInfo) #define CRS_INFO(field) (((__CRSInfo *)CRS_MAP_ADDR)->field) #define CRS_INFO_BASE(addr, field) (((__CRSInfo *)(addr))->field) #define CRS_INFO_ADDR(f) (CRS_MAP_ADDR + offsetof(__CRSInfo, f)) #define CRS_COMM_FD 222 // TODO: CRS_DATA_FD is only used in dry run since now. But dry run does need a // better communication approach in the future. #define CRS_DATA_FD 233 #define CRS_INVALID_IP 0x1996083019961219 #endif ================================================ FILE: src/diagnoser.c ================================================ /* * diagnoser.c * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include "diagnoser.h" #include "utils.h" /* * Perform delta debugging to locate rewriting errors */ Z_PRIVATE CRSStatus __diagnoser_delta_debug(Diagnoser *g, int status, addr_t addr, uint32_t cov); /* * Handler a single crashpoint (the real function while handles patching). */ Z_PRIVATE void __diagnoser_handle_single_crashpoint(Diagnoser *g, addr_t addr, CPType type, bool is_real, bool need_log); /* * Validate a crashpoint, return INVALID_ADDR if it is an unintentional crash */ Z_PRIVATE addr_t __diagnoser_validate_crashpoint(Diagnoser *g, addr_t addr); /* * Get the CPType of the given crashpoint */ Z_PRIVATE CPType __diagnoser_get_crashpoint_type(Diagnoser *g, addr_t addr, addr_t real_addr); /* * Patch the intentional crash */ Z_PRIVATE void __diagnoser_patch_crashpoint(Diagnoser *g, addr_t addr, CPType cp_type); /* * Getter and Setter */ DEFINE_GETTER(Diagnoser, diagnoser, GQueue *, crashpoints); // XXX: this function is only used for those new crashpoints detected during // execution. Z_PRIVATE void __diagnoser_patch_crashpoint(Diagnoser *g, addr_t addr, CPType cp_type) { if (cp_type == CP_RETADDR) { // for CP_RETADDR, we want to also update other retaddrs who share the // same callee with the found one Buffer *retaddrs = z_rewriter_new_validate_retaddr(g->rewriter, addr); size_t n = z_buffer_get_size(retaddrs) / sizeof(addr_t); addr_t *addrs = (addr_t *)z_buffer_get_raw_buf(retaddrs); z_info("we found %d CP_RETADDR sharing the same callee", n); // we first patch the addr itself as real crashpoint __diagnoser_handle_single_crashpoint(g, addr, CP_RETADDR, true, true); // we then patch other retaddrs sharing with the same callee for (int i = 0; i < n; i++) { if (addrs[i] == addr) { continue; } // XXX: note that the following check is very necessary. Although // CP_RETADDR cannot be an internal PP_BRIDGE (i.e., overlapping // bridge), it can be a PP_BRIDGE after the patched jmp instruction. if (z_patcher_check_patchpoint(g->patcher, addrs[i]) == PP_BRIDGE) { continue; } __diagnoser_handle_single_crashpoint(g, addrs[i], CP_RETADDR, false, true); } z_buffer_destroy(retaddrs); } else { __diagnoser_handle_single_crashpoint(g, addr, cp_type, true, true); } } Z_PRIVATE CPType __diagnoser_get_crashpoint_type(Diagnoser *g, addr_t addr, addr_t real_addr) { if ((int64_t)addr < 0) { z_info("find new address [internal]: " COLOR(GREEN, "%#lx"), addr); return CP_INTERNAL; } else { // XXX: retaddr patch may cause crash when enabling pdisasm. // XXX: note that if diagnoser does not generate any CP_RETADDR, all // ret-related functions of rewriter will not be invoked and no // callee will be regarded as returnable. That is why this check is // extremely important. if (!z_disassembler_fully_support_prob_disasm(g->disassembler) && z_rewriter_check_retaddr_crashpoint(g->rewriter, real_addr) && real_addr == addr) { z_info("find new address [retaddr]: " COLOR(GREEN, "%#lx"), real_addr); return CP_RETADDR; } else { z_info("find new address [external]: " COLOR(GREEN, "%#lx"), real_addr); return CP_EXTERNAL; } } } Z_PRIVATE addr_t __diagnoser_validate_crashpoint(Diagnoser *g, addr_t addr) { assert(g != NULL); // step (1). check INVALID_ADDR if (addr == INVALID_ADDR) { return INVALID_ADDR; } // step (2). validate addr by different type if ((int64_t)addr < 0) { // it is caused by a missed ujmp/ucall entry addr = (~addr) + 1; if (z_disassembler_is_within_disasm_range(g->disassembler, addr) && !z_disassembler_is_potential_inst_entrypoint(g->disassembler, addr)) { return addr; } else { return INVALID_ADDR; } } else { // it is cause by patch if (z_patcher_check_patchpoint(g->patcher, addr) == PP_INVALID) { return INVALID_ADDR; } else { return addr; } } } // XXX: addr must be an adjusted address if needed Z_PRIVATE void __diagnoser_handle_single_crashpoint(Diagnoser *g, addr_t addr, CPType type, bool is_real, bool need_log) { if (type != CP_RETADDR) { // The recursive disassembly treats all library function as returnable. z_rewriter_rewrite(g->rewriter, addr); } if (type != CP_INTERNAL) { // XXX: note that if it is a retaddr crashpoint, its corresponding // shadow code should not start with an AFL trampoline. addr_t shadow_addr = z_rewriter_get_shadow_addr(g->rewriter, addr); assert(shadow_addr != INVALID_ADDR); z_patcher_build_bridge(g->patcher, addr, shadow_addr, is_real); } if (need_log) { g_queue_push_tail(g->crashpoints, GSIZE_TO_POINTER(addr)); g_queue_push_tail(g->crashpoints, GSIZE_TO_POINTER(type)); g_queue_push_tail(g->crashpoints, GINT_TO_POINTER(!!is_real)); } } // XXX: it is highly recommended to specify a timeout (>= 1000ms, or >= // AFL_HANG_TMOUT if set) for AFL by its -t option. Otherwise, the auto-scaled // timeout may cause incorrect error diagnosis (e.g., the dd_status may change // when timeout). more information can be found at // https://github.com/google/AFL/blob/master/afl-fuzz.c#L3244 // XXX: note that we currently downgrade the delta debugging into a more // efficient dup-binary-search. This simplified algorithm works well as the // unintentional crash is caused by a single bad patch in most cases. The delta // debugging algorithm can be easily brought back if necessary. /* * XXX: to explain how and why the simplified algorithm works well, we first * need to give a definition about *key patch*. * * Key patch means if we remove this patch, the original unintentional crash * cannot be reproduced. * * The simplified algorithm works by first finding the last *key patch*. It * ignores all the patches after the last key patch. Then it checks if the * unintentional crash can be reproduced by only keeping the last DD_RANGE * uncertain patches(e.g., if the last key patch is 54-th patch and DD_RANGE == * 32, then we only keep the 22-nd to 54-th patches). * * If the crash can be reproduced, it means all the rewriting errors are in the * DD_RANGE. Then it use binary search to find the first key patch and regard * all patches between the first and the last key patch are rewriting errors. * * If the crash cannot be repoduced, it only regards the last key patch as an * error and re-runs the program to detect other rewriting errors. * * The algorithm works beacuse of the following two observatoins. * * The first observation is that, all key patches must be rewritting erros. It * is because the correct patches are applied on the instructions and such * patches can only trigger intentional crashes (note that we can safeguard * non-crashing rewriting errors). * * The second observation is that, in most cases, an unintentional crash is * caused by a single rewriting error or a few continuous errors. It is because * the program is very sensitive to incorrect data flow. Once the data flow is * randomly polluted, the program is going to crash very soon. */ Z_PRIVATE CRSStatus __diagnoser_delta_debug(Diagnoser *g, int status, addr_t addr, uint32_t cov) { #define __UPDATE_STAGE_AND_RETURN(stage, ret) \ do { \ g->dd_stage = (stage); \ return (ret); \ } while (0) if (!z_disassembler_fully_support_prob_disasm(g->disassembler)) { assert(g->dd_stage == DD_NONE); assert(IS_ABNORMAL_STATUS(status)); // XXX: this cannot be caused by checking runs __UPDATE_STAGE_AND_RETURN(DD_NONE, CRS_STATUS_CRASH); } // XXX: it is very improtant to change addr to CRS_INVALID_IP, because for // non-suspect status, addr is meaningless. Additionally, when it is caused // by timeout, cov may vary. See Undecided Changes in the documents for more // information. if (IS_SUSPECT_STATUS(status)) { z_info("suspect status (%d) at %#lx [cov: %#x]", status, addr, cov); } else if (IS_TIMEOUT_STATUS(status)) { // XXX: for timeouted process, both addr and cov are useless addr = CRS_INVALID_IP; cov = 0; z_info("timeout status (%d)", status); } else { addr = CRS_INVALID_IP; z_info("non-suspect status (%d) [cov: %#x]", status, cov); } if (g->dd_stage == DD_NONE) { // step (0). distinguish real crashes and checking runs if (IS_ABNORMAL_STATUS(status)) { g->dd_crs_status = CRS_STATUS_CRASH; g->dd_banner = COLOR(RED, "a latent bug at %#lx with status %d [cov: %#x]"); } else { g->dd_crs_status = CRS_STATUS_NORMAL; g->dd_banner = COLOR( GREEN, "a passed checking run at %#lx with status %d [cov: %#x]"); } // step (1). check whether there is any uncertain patches size_t n = z_patcher_uncertain_patches_n(g->patcher); if (!n) { // we do not need to wrap up the self correction procedure of // patcher here, because it has not been started. __UPDATE_STAGE_AND_RETURN(DD_NONE, g->dd_crs_status); } // step (2). set dd_status, dd_addr, and dd_cov g->dd_status = status; g->dd_addr = addr; g->dd_cov = cov; // step (3). enable delta debugging for patcher g->dd_high = n; z_patcher_self_correction_start(g->patcher); // step (4). disable all uncertain patches z_patcher_flip_uncertain_patches(g->patcher, false, -n); // step (5). update dd_stage and return __UPDATE_STAGE_AND_RETURN(DD_STAGE0, CRS_STATUS_DEBUG); } if (g->dd_stage == DD_STAGE0) { // step (1). check whether the unintentional crash can be reproduced, if // so, we can determine it is caused by a latent bug. if (status == g->dd_status && addr == g->dd_addr && cov == g->dd_cov) { z_info(g->dd_banner, addr, status, cov); z_patcher_self_correction_end(g->patcher); __UPDATE_STAGE_AND_RETURN(DD_NONE, g->dd_crs_status); } // step (2). it is caused by a rewriting error, let's setup the error // diagnosis. z_info("we encounter a rewriting error, let's do error diagnosis"); g->dd_low = 0; g->dd_e_cur = 0; // step (3). set the mid for e_iter, and update e_iter int64_t mid = (g->dd_low + g->dd_high) >> 1; z_patcher_flip_uncertain_patches(g->patcher, false, mid - g->dd_e_cur); g->dd_e_cur = mid; // step (4). update stage and return __UPDATE_STAGE_AND_RETURN(DD_STAGE1, CRS_STATUS_DEBUG); } if (g->dd_stage == DD_STAGE1) { // step (1). update dd_low and dd_high if (status == g->dd_status && addr == g->dd_addr && cov == g->dd_cov) { z_info( "error diagnosis stage 1: test uncertain patches within [0, " "%ld), reproduced: " COLOR(GREEN, "true"), g->dd_e_cur); g->dd_high = g->dd_e_cur; } else { z_info( "error diagnosis stage 1: test uncertain patches within [0, " "%ld), reproduced: " COLOR(RED, "false"), g->dd_e_cur); g->dd_low = g->dd_e_cur; } assert(g->dd_low != g->dd_high); // step (2). binary search if (g->dd_low + 1 == g->dd_high) { // step (2.1.1). the binary search is done, move e_iter to // g->dd_high z_patcher_flip_uncertain_patches(g->patcher, false, g->dd_high - g->dd_e_cur); g->dd_e_cur = g->dd_high; assert(g->dd_e_cur > 0); // step (2.1.2). check whether we need to go into DD_STAGE2 if (g->dd_e_cur <= DD_RANGE) { // setup the binary search for s_iter g->dd_low = 0; g->dd_high = g->dd_e_cur; g->dd_s_cur = g->dd_low; // ready for s_iter binary search int64_t mid = (g->dd_low + g->dd_high) >> 1; z_patcher_flip_uncertain_patches(g->patcher, true, mid - g->dd_s_cur); g->dd_s_cur = mid; __UPDATE_STAGE_AND_RETURN(DD_STAGE3, CRS_STATUS_DEBUG); } else { g->dd_s_cur = 0; int64_t target = g->dd_e_cur - DD_RANGE; z_patcher_flip_uncertain_patches(g->patcher, true, target - g->dd_s_cur); g->dd_s_cur = target; __UPDATE_STAGE_AND_RETURN(DD_STAGE2, CRS_STATUS_DEBUG); } } else { // step (2.2.1). set the mid for e_iter, and update e_iter int64_t mid = (g->dd_low + g->dd_high) >> 1; z_patcher_flip_uncertain_patches(g->patcher, false, mid - g->dd_e_cur); g->dd_e_cur = mid; // step (2.2.2). update stage and return __UPDATE_STAGE_AND_RETURN(DD_STAGE1, CRS_STATUS_DEBUG); } } if (g->dd_stage == DD_STAGE2) { if (status == g->dd_status && addr == g->dd_addr && cov == g->dd_cov) { z_info( "error diagnosis stage 2: dup-binary-search works for [%ld, " "%ld)", g->dd_s_cur, g->dd_e_cur); // goto DD_STAGE3 for s_iter binary search g->dd_low = g->dd_s_cur; g->dd_high = g->dd_e_cur; int64_t mid = (g->dd_low + g->dd_high) >> 1; z_patcher_flip_uncertain_patches(g->patcher, true, mid - g->dd_s_cur); g->dd_s_cur = mid; __UPDATE_STAGE_AND_RETURN(DD_STAGE3, CRS_STATUS_DEBUG); } else { // this branch means the distance between two rewriting errors are // relatively large. So we first repair the last rewriting error. z_info( "error diagnosis stage 2: the distance between two errors is " "large, let's first repair " "the last one: [%ld, %ld)", g->dd_e_cur - 1, g->dd_e_cur); assert(g->dd_e_cur - 1 >= g->dd_s_cur); z_patcher_flip_uncertain_patches(g->patcher, true, (g->dd_e_cur - 1) - g->dd_s_cur); z_patcher_self_correction_end(g->patcher); // TODO: for checking runs, in this case, we can actually return a // CRS_STATUS_DEBUG to force the fork server to re-run the checking // run. __UPDATE_STAGE_AND_RETURN(DD_NONE, CRS_STATUS_NOTHING); } } if (g->dd_stage == DD_STAGE3) { // step (1). update dd_low and dd_high if (status == g->dd_status && addr == g->dd_addr && cov == g->dd_cov) { z_info( "error diagnosis stage 3: test uncertain patches within [%ld, " "%ld), reproduced: " COLOR(GREEN, "true"), g->dd_s_cur, g->dd_e_cur); g->dd_low = g->dd_s_cur; } else { z_info( "error diagnosis stage 3: test uncertain patches within [%ld, " "%ld), reproduced: " COLOR(RED, "false"), g->dd_s_cur, g->dd_e_cur); g->dd_high = g->dd_s_cur; } assert(g->dd_low != g->dd_high); // step (2). check whether the procedure is done if (g->dd_low + 1 == g->dd_high) { z_patcher_flip_uncertain_patches(g->patcher, true, g->dd_low - g->dd_s_cur); g->dd_s_cur = g->dd_low; z_info("locate the error: [%ld, %ld)", g->dd_s_cur, g->dd_e_cur); z_patcher_self_correction_end(g->patcher); __UPDATE_STAGE_AND_RETURN(DD_NONE, CRS_STATUS_NOTHING); } // step (3). continue binary search int64_t mid = (g->dd_low + g->dd_high) >> 1; z_patcher_flip_uncertain_patches(g->patcher, true, mid - g->dd_s_cur); g->dd_s_cur = mid; __UPDATE_STAGE_AND_RETURN(DD_STAGE3, CRS_STATUS_DEBUG); } EXITME("unreachable code"); return g->dd_crs_status; // used to emit warnings #undef __UPDATE_STAGE_AND_RETURN } Z_API Diagnoser *z_diagnoser_create(Patcher *patcher, Rewriter *rewriter, Disassembler *disassembler, RewritingOptArgs *opts) { Diagnoser *g = STRUCT_ALLOC(Diagnoser); g->opts = opts; g->binary = z_disassembler_get_binary(disassembler); g->patcher = patcher; g->rewriter = rewriter; g->disassembler = disassembler; // all other DD-related fields will be initilized when enabling DD. g->dd_stage = DD_NONE; g->crashpoints = g_queue_new(); const char *binary_filename = z_binary_get_original_filename(g->binary); g->cp_filename = z_strcat(CRASHPOINT_LOG_PREFIX, binary_filename); return g; } Z_API void z_diagnoser_destroy(Diagnoser *g) { g_queue_free(g->crashpoints); z_free((void *)g->cp_filename); z_free(g); } Z_API void z_diagnoser_read_crashpoint_log(Diagnoser *g) { if (z_access(g->cp_filename, F_OK)) { z_trace("log file for crashpoints (%s) does not exist", g->cp_filename); return; } Buffer *buffer = z_buffer_read_file(g->cp_filename); CrashPoint *cp = (CrashPoint *)z_buffer_get_raw_buf(buffer); size_t file_size = z_buffer_get_size(buffer); for (size_t i = 0; i < file_size; i += sizeof(CrashPoint), cp++) { g_queue_push_tail(g->crashpoints, GSIZE_TO_POINTER(cp->addr)); g_queue_push_tail(g->crashpoints, GSIZE_TO_POINTER(cp->type)); g_queue_push_tail(g->crashpoints, GINT_TO_POINTER(!!cp->is_real)); } z_buffer_destroy(buffer); } Z_API void z_diagnoser_write_crashpoint_log(Diagnoser *g) { #ifndef BINARY_SEARCH_INVALID_CRASH // write down all crashpoints FILE *f = z_fopen(g->cp_filename, "wb"); CrashPoint cp = { .addr = INVALID_ADDR, .type = CP_NONE, .is_real = false, }; GList *l = g->crashpoints->head; while (l != NULL) { // get address first cp.addr = (addr_t)l->data; // get status l = l->next; cp.type = (CPType)l->data; // get is_real l = l->next; cp.is_real = !!(l->data); if (z_fwrite(&cp, sizeof(CrashPoint), 1, f) != 1) { EXITME("error on writing crashpoint log file"); } // go to next CrashPoint struct l = l->next; } z_fclose(f); #endif } Z_API void z_diagnoser_apply_logged_crashpoints(Diagnoser *g) { // replay all GList *l = g->crashpoints->head; while (l != NULL) { // get address first addr_t addr = (addr_t)l->data; // get status l = l->next; CPType type = (CPType)l->data; // get is_real l = l->next; bool is_real = !!(l->data); // adjust the bridge crashpoint // XXX: it does not exactly follow the original execution, but it should // get the same rewriting/pathcing as the original execution does. if (type != CP_INTERNAL) { addr_t adjusted_addr = z_patcher_adjust_bridge_address(g->patcher, addr); if (adjusted_addr != addr) { EXITME( "the logged crashpoint does not generate the same patching " "as the original execution does"); } } // update the retaddr information in rewriter if (type == CP_RETADDR && is_real) { Buffer *addrs = z_rewriter_new_validate_retaddr(g->rewriter, addr); // XXX: we directly free addrs as it is useless here z_buffer_destroy(addrs); } // invoke z_diagnoser_new_crashpoint z_info("logged %s crashpoint: %#lx", z_cptype_string(type), addr); __diagnoser_handle_single_crashpoint(g, addr, type, is_real, false); // go to next CrashPoint struct l = l->next; } z_rewriter_optimization_stats(g->rewriter); z_patcher_bridge_stats(g->patcher); } Z_API CRSStatus z_diagnoser_new_crashpoint(Diagnoser *g, int status, addr_t addr, uint32_t cov, bool check_run_enabled) { // step (0). check whether diagnoser is under delta debugging mode if (g->dd_stage != DD_NONE) { // the diagnoser is under delta debugging mode return __diagnoser_delta_debug(g, status, addr, cov); } // step (1). check whether the status is suspect if (!IS_ABNORMAL_STATUS(status)) { if (check_run_enabled) { // this will only happen when checking runs are enabled return __diagnoser_delta_debug(g, status, addr, cov); } else { return CRS_STATUS_NORMAL; } } if (!IS_SUSPECT_STATUS(status)) { // it is an unintentional crash assert(g->dd_stage == DD_NONE); return __diagnoser_delta_debug(g, status, addr, cov); } if (addr == CRS_INVALID_IP) { EXITME("the client exits as SUSPECT but no suspected address is sent"); } // step (2). validate crashpoint addr_t real_addr = __diagnoser_validate_crashpoint(g, addr); // XXX: we have to adjust bridge patch pointer when real_addr is unchanged. if (real_addr == addr) { // in this case, it cannot be a CP_INTERNAL real_addr = z_patcher_adjust_bridge_address(g->patcher, real_addr); } // step (3). check whether real_addr is INVALID_ADDR if (real_addr == INVALID_ADDR) { // it is an unintentional crash z_info(COLOR(RED, "a potential crash with suspect status! (%#lx)"), addr); assert(g->dd_stage == DD_NONE); return __diagnoser_delta_debug(g, status, addr, cov); } // step (4). get CPType CPType cp_type = __diagnoser_get_crashpoint_type(g, addr, real_addr); // step (5). patch the intentional crash __diagnoser_patch_crashpoint(g, real_addr, cp_type); z_rewriter_optimization_stats(g->rewriter); z_patcher_bridge_stats(g->patcher); // step (6). check remmap if (z_binary_check_state(g->binary, ELFSTATE_SHADOW_EXTENDED)) { z_info("underlying shadow file is extended"); // do not forget to disable the shadow_extened flag z_binary_set_elf_state(g->binary, ELFSTATE_SHADOW_EXTENDED | ELFSTATE_DISABLE); return CRS_STATUS_REMMAP; } else { return CRS_STATUS_NOTHING; } } ================================================ FILE: src/diagnoser.h ================================================ /* * diagnoser.h * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #ifndef __DIAGNOSER_H #define __DIAGNOSER_H #include "binary.h" #include "config.h" #include "crs_config.h" #include "disassembler.h" #include "patcher.h" #include "rewriter.h" #include "sys_optarg.h" #include /* * CrashPoint Type * * CP_INTERNAL: need to disassemble address * CP_EXTERNAL: need to disassembly address and build jump bridge * CP_RETADDR: need to build jump bridge */ // XXX: CP_RETADDR are only used when pdisasm is not fully supported. Note that // in this situation, even we misidentify a CP_RETADDR, it would not impact the // rewriting procedure (i.e., any wrong bridge will got fixed later / not // uncertain_patches in Patcher). typedef enum cp_type_t { CP_NONE = 0UL, CP_INTERNAL, // internal indirect call/jump CP_EXTERNAL, // external callback from library CP_RETADDR, // return address when calling library } CPType; #define z_cptype_string(t) \ ((type == CP_INTERNAL) ? "INTERNAL" \ : ((type == CP_EXTERNAL) ? "EXTERNAL" : "RETADDR")) /* * Logged CrashPoint */ typedef struct crash_point_t { addr_t addr; CPType type; bool is_real; } CrashPoint; /* * The range of Dup-Binary-Search */ #define DD_RANGE 4 /* * Stage for delta debugging mode */ typedef enum delta_debugging_stage { DD_STAGE0, // validate whether it is a rewriting error DD_STAGE1, // binary search to locate the e_iter in Patcher DD_STAGE2, // validate whether all rewriting errors are in a DD_RANGE DD_STAGE3, // binary search to locate the s_iter in Pacther DD_NONE = -1, // not in the delta debugging mode } DDStage; /* * Diagnoser distinguishes the intentional crashes and the unintentional ones, * while it also manages the schedule of self-recovering. */ STRUCT(Diagnoser, { Binary *binary; Patcher *patcher; Rewriter *rewriter; Disassembler *disassembler; DDStage dd_stage; int dd_status; addr_t dd_addr; uint32_t dd_cov; // used for distinguishing crash and checking runs CRSStatus dd_crs_status; const char *dd_banner; // used for dup-binary-search (int64_t to avoid overflow) int64_t dd_low; int64_t dd_high; int64_t dd_s_cur; int64_t dd_e_cur; // XXX: for effeciency, a CrashPoint struct is broken into three elements in // the queue. GQueue *crashpoints; const char *cp_filename; // rewriting optargs RewritingOptArgs *opts; }); DECLARE_GETTER(Diagnoser, diagnoser, GQueue *, crashpoints); /* * Create diagnoser */ Z_API Diagnoser *z_diagnoser_create(Patcher *patcher, Rewriter *rewriter, Disassembler *disassembler, RewritingOptArgs *opts); /* * Destroy diagnoser */ Z_API void z_diagnoser_destroy(Diagnoser *g); /* * Read recorded crashpoints from log file */ Z_API void z_diagnoser_read_crashpoint_log(Diagnoser *g); /* * Log down recorded crashpoints */ Z_API void z_diagnoser_write_crashpoint_log(Diagnoser *g); /* * Apply all logged crashpoints */ Z_API void z_diagnoser_apply_logged_crashpoints(Diagnoser *g); /* * Find a new crashpoint, and diagnoser will validate this crashpoint and does * patch accordingly. */ Z_API CRSStatus z_diagnoser_new_crashpoint(Diagnoser *g, int status, addr_t addr, uint32_t cov, bool check_run_enabled); #endif ================================================ FILE: src/disassembler.c ================================================ /* * disassembler.c * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include "disassembler.h" #include "capstone_.h" #include "elf_.h" #include "interval_splay.h" #include "restricted_ptr.h" #include "utils.h" #include #include #include #include "prob_disasm/prob_disasm_complete.c" #include "prob_disasm/prob_disasm_simple.c" #define SUPERSET_DISASM_THRESHOLD 0x400000 /* * Runtime binding for probabilistic disassembly */ #define __disassembler_invoke_prob_disasm(d, func, __args...) \ ({ (d->enable_pdisasm ? func(__args) : func##_S(__args)); }) /* * Function Pointer: destroy a cs_insn */ Z_PRIVATE void __disassembler_free_cs_insn(cs_insn *inst); /* * Superset disassembly */ Z_PRIVATE void __disassembler_superset_disasm(Disassembler *d); /* * Check whether underlying binary has inlined data (potentially) */ Z_PRIVATE bool __disassembler_has_inlined_data(Disassembler *d); /* * Analyse instruction group, return whether need to continue analysis. */ Z_PRIVATE bool __disassembler_analyze_inst(cs_insn *inst, addr_t *target); /* * Disassembly _start / .init / .fini / main */ Z_RESERVED Z_PRIVATE void __disassembler_pre_disasm(Disassembler *d); /* * Getter and Setter */ DEFINE_GETTER(Disassembler, disassembler, Binary *, binary); DEFINE_GETTER(Disassembler, disassembler, UCFG_Analyzer *, ucfg_analyzer); DEFINE_GETTER(Disassembler, disassembler, bool, enable_pdisasm); Z_PRIVATE void __disassembler_free_cs_insn(cs_insn *inst) { cs_free(inst, 1); } /* * XXX: This function is out of date. Hence, there is no guarantee to use it. */ Z_RESERVED Z_PRIVATE void __disassembler_pre_disasm(Disassembler *d) { ELF *e = z_binary_get_elf(d->binary); z_info("disassemble .init/.fini"); GQueue *bbs = g_queue_new(); // _start addr_t entrypoint = z_elf_get_ori_entry(e); g_queue_push_tail(bbs, GSIZE_TO_POINTER(entrypoint)); // .init addr_t _init = z_elf_get_init(e); z_info(".init: %#lx", _init); g_queue_push_tail(bbs, GSIZE_TO_POINTER(_init)); // .fini addr_t _fini = z_elf_get_fini(e); z_info(".fini: %#lx", _fini); g_queue_push_tail(bbs, GSIZE_TO_POINTER(_fini)); Rptr *array = NULL; size_t array_size = 0; addr_t array_addr = INVALID_ADDR; // .init.array Elf64_Shdr *init_array = z_elf_get_shdr_init_array(e); array_size = init_array->sh_size; array_addr = init_array->sh_addr; array = z_elf_vaddr2ptr(e, array_addr); for (int i = 0; i < array_size / sizeof(addr_t); i++) { addr_t fcn = *z_rptr_get_ptr(array, addr_t); z_info(".init.array[%d]: %#lx", i, fcn); g_queue_push_tail(bbs, GSIZE_TO_POINTER(fcn)); z_rptr_inc(array, addr_t, 1); } z_rptr_destroy(array); // .fini.array Elf64_Shdr *fini_array = z_elf_get_shdr_fini_array(e); array_size = fini_array->sh_size; array_addr = fini_array->sh_addr; array = z_elf_vaddr2ptr(e, array_addr); for (int i = 0; i < array_size / sizeof(addr_t); i++) { addr_t fcn = *z_rptr_get_ptr(array, addr_t); z_info(".fini.array[%d]: %#lx", i, fcn); g_queue_push_tail(bbs, GSIZE_TO_POINTER(fcn)); z_rptr_inc(array, addr_t, 1); } z_rptr_destroy(array); // disassemble without call while (!g_queue_is_empty(bbs)) { addr_t bb_addr = (addr_t)g_queue_pop_head(bbs); addr_t cur_addr = bb_addr; cs_insn *inst = NULL; do { if (g_hash_table_lookup(d->potential_insts, GSIZE_TO_POINTER(cur_addr))) { break; } inst = z_disassembler_get_superset_disasm(d, cur_addr); if (inst == NULL) { break; } g_hash_table_insert(d->recursive_disasm, GSIZE_TO_POINTER(cur_addr), (gpointer)inst); if (z_capstone_is_jmp(inst) || z_capstone_is_cjmp(inst) || z_capstone_is_loop(inst) || z_capstone_is_xbegin(inst)) { cs_detail *detail = inst->detail; if ((detail->x86.op_count == 1) && (detail->x86.operands[0].type == X86_OP_IMM)) { g_queue_push_tail( bbs, GSIZE_TO_POINTER(detail->x86.operands[0].imm)); } } cur_addr += inst->size; } while (!z_capstone_is_terminator(inst)); } z_info("disassemble .init/.fini done"); z_info("we have %ld correct instructions disassemblied", g_hash_table_size(d->recursive_disasm)); } // XXX: here we simply check whether linear disassembly can decode all // instructions (which seems good enough for most cases), but we can have // advanced algorithms in the future (e.g., using entropy or data hints from // probabilistic disassembly) Z_PRIVATE bool __disassembler_has_inlined_data(Disassembler *d) { assert(d != NULL); addr_t cur_addr = d->text_addr; do { cs_insn *cur_inst = z_disassembler_get_superset_disasm(d, cur_addr); if (!cur_inst) { return true; } cur_addr += cur_inst->size; } while (cur_addr < d->text_addr + d->text_size); return false; } // XXX: we do not use UCFG_Analyzer here, as the following code runs faster than // a searching operation in hashmap. Note that the following code will happen // during fuzzing Z_PRIVATE bool __disassembler_analyze_inst(cs_insn *inst, addr_t *targets) { assert(inst != NULL); cs_detail *detail = inst->detail; if (z_capstone_is_cjmp(inst) || z_capstone_is_loop(inst)) { assert((detail->x86.op_count == 1) && (detail->x86.operands[0].type == X86_OP_IMM)); *(targets++) = inst->address + inst->size; *targets = detail->x86.operands[0].imm; } else if (z_capstone_is_jmp(inst) || z_capstone_is_call(inst) || z_capstone_is_xbegin(inst)) { if ((detail->x86.op_count == 1) && (detail->x86.operands[0].type == X86_OP_IMM)) { // direct call and direct/condition jump *targets = detail->x86.operands[0].imm; } else { // indirect call/jump z_trace("indirect call/jmp " CS_SHOW_INST(inst)); } } return !z_capstone_is_terminator(inst); } Z_PRIVATE void __disassembler_superset_disasm(Disassembler *d) { assert(d); // step (0). get .text section range. ELF *e = z_binary_get_elf(d->binary); addr_t text_addr = d->text_addr; size_t text_size = d->text_size; z_info("start superset disassembly in [%#lx, %#lx]", text_addr, text_size + text_addr - 1); // step (1). get code buf Rptr *buf = z_elf_vaddr2ptr(e, text_addr); // step (2). disassembly for (addr_t cur_addr = text_addr; cur_addr < text_addr + text_size; cur_addr++) { CS_DISASM(buf, cur_addr, 1); if (cs_count == 1) { z_ucfg_analyzer_add_inst(d->ucfg_analyzer, cur_addr, cs_inst, false); g_hash_table_insert(d->superset_disasm, GSIZE_TO_POINTER(cur_addr), (gpointer)cs_inst); z_addr_dict_set(d->occ_addrs, cur_addr, z_buffer_create(NULL, 0)); z_trace("superset disassembly " CS_SHOW_INST(cs_inst)); cs_inst = NULL; // avoid double free } z_rptr_inc(buf, uint8_t, 1); } z_info("superset disassembly done, found %ld instructions", g_hash_table_size(d->superset_disasm)); // step (3). remember to free code buffer z_rptr_destroy(buf); // step (4). calculate occluded address for (addr_t cur_addr = text_addr; cur_addr < text_addr + text_size; cur_addr++) { // validation cs_insn *inst = (cs_insn *)g_hash_table_lookup( d->superset_disasm, GSIZE_TO_POINTER(cur_addr)); if (!inst) { continue; } // find all possible occluded instructions for (addr_t occ_addr = cur_addr + 1; occ_addr < cur_addr + inst->size; occ_addr++) { cs_insn *occ_inst = (cs_insn *)g_hash_table_lookup( d->superset_disasm, GSIZE_TO_POINTER(occ_addr)); if (!occ_inst) { continue; } // update both z_buffer_append_raw(z_addr_dict_get(d->occ_addrs, cur_addr), (uint8_t *)&occ_addr, sizeof(occ_addr)); z_buffer_append_raw(z_addr_dict_get(d->occ_addrs, occ_addr), (uint8_t *)&cur_addr, sizeof(cur_addr)); } } } Z_API Disassembler *z_disassembler_create(Binary *b, RewritingOptArgs *opts) { Disassembler *d = STRUCT_ALLOC(Disassembler); d->opts = opts; d->binary = b; d->superset_disasm = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, (GDestroyNotify)(&__disassembler_free_cs_insn)); d->recursive_disasm = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL); // recursive_disasm does not free cs_insn, freed by superset_disasm d->linear_disasm = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL); // linear_disasm does not free cs_insn, freed by superset_disasm d->prob_disasm = NULL; d->potential_insts = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL); d->potential_blocks = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL); d->ucfg_analyzer = z_ucfg_analyzer_create(d->binary, d->opts); // we choose to superset disassemble relative-small binary ELF *e = z_binary_get_elf(d->binary); Elf64_Shdr *text = z_elf_get_shdr_text(e); d->text_addr = text->sh_addr; d->text_size = text->sh_size; // get occluded address z_addr_dict_init(d->occ_addrs, d->text_addr, d->text_size); if (d->text_size <= SUPERSET_DISASM_THRESHOLD) { z_info(".text section (%#lx bytes) is suitable for pre-disasm", d->text_size); // do not backup .text d->text_backup = NULL; __disassembler_superset_disasm(d); } else { z_info(".text section (%#lx bytes) is not suitable for pre-disasm", d->text_size); d->text_backup = z_alloc(d->text_size, sizeof(uint8_t)); Rptr *ptr = z_elf_vaddr2ptr(e, d->text_addr); z_rptr_memcpy(d->text_backup, ptr, d->text_size); z_rptr_destroy(ptr); } d->enable_pdisasm = (!d->opts->force_linear) && (d->opts->force_pdisasm || __disassembler_has_inlined_data(d)); z_info("enable probabilistic disassembly: %s", d->enable_pdisasm ? "true" : "false"); __disassembler_invoke_prob_disasm(d, __disassembler_pdisasm_create, d); return d; } Z_API void z_disassembler_destroy(Disassembler *d) { __disassembler_invoke_prob_disasm(d, __disassembler_pdisasm_destroy, d); g_hash_table_destroy(d->superset_disasm); g_hash_table_destroy(d->recursive_disasm); g_hash_table_destroy(d->linear_disasm); if (d->text_backup) { z_free(d->text_backup); } g_hash_table_destroy(d->potential_insts); g_hash_table_destroy(d->potential_blocks); z_addr_dict_destroy(d->occ_addrs, &z_buffer_destroy); z_ucfg_analyzer_destroy(d->ucfg_analyzer); z_free(d); } Z_API void z_disassembler_get_prob_disasm_internal( Disassembler *d, addr_t addr, cs_insn **inst, uint32_t *scc_id, double128_t *inst_hint, double128_t *inst_lost, double128_t *data_hint, double128_t *D, double128_t *P) { __disassembler_invoke_prob_disasm(d, __disassembler_pdisasm_get_internal, d, addr, inst, scc_id, inst_hint, inst_lost, data_hint, D, P); } Z_API void z_disassembler_prob_disasm(Disassembler *d) { __disassembler_invoke_prob_disasm(d, __disassembler_pdisasm_start, d); } Z_API double128_t z_disassembler_get_prob_disasm(Disassembler *d, addr_t addr) { return __disassembler_invoke_prob_disasm( d, __disassembler_pdisasm_get_inst_prob, d, addr); } Z_API void z_diassembler_update_prob_disasm(Disassembler *d, addr_t addr, bool is_inst) { __disassembler_invoke_prob_disasm(d, __disassembler_pdisasm_update, d, addr, is_inst); } // XXX: note that this function is not completed. Z_API GQueue *z_disassembler_linear_disasm(Disassembler *d) { assert(d != NULL); // step (0). get .text section range. addr_t text_addr = d->text_addr; size_t text_size = d->text_size; // step (1). other structures addr_t cur_addr = text_addr; GQueue *bbs = g_queue_new(); g_queue_push_tail(bbs, GSIZE_TO_POINTER(cur_addr)); // first addr is a BB // step (2). linear disassembler GQueue *tmp_bbs = g_queue_new(); GQueue *tmp_insts = g_queue_new(); while (cur_addr < text_addr + text_size) { bool valid_bb = true; addr_t tmp_cur_addr = cur_addr; // step (2.1) use inner loop to check whether current basic block is // valid. Note that when the inner exits, the tmp_cur_addr is always the // next no-tried instruction address do { cs_insn *inst = z_disassembler_get_superset_disasm(d, tmp_cur_addr); // check instruction itself if (inst == NULL) { z_trace("invalid instruction in linear disassembly: %#lx", tmp_cur_addr); valid_bb = false; break; } // check branch instructions and update basic block information cs_detail *detail = inst->detail; if ((z_capstone_is_call(inst) || z_capstone_is_cjmp(inst) || z_capstone_is_xbegin(inst) || z_capstone_is_loop(inst) || z_capstone_is_jmp(inst)) && // check instruction type ((detail->x86.op_count == 1) && (detail->x86.operands[0].type == X86_OP_IMM)) // check direct transfer ) { addr_t tar_addr = detail->x86.operands[0].imm; if (tar_addr >= text_addr && tar_addr < text_addr + text_size) { // target address inside .text // TODO: acutally, we should check for linear disassembly // result, instead of superset disassembly! if (z_disassembler_get_superset_disasm(d, tar_addr)) { g_queue_push_tail(tmp_bbs, GSIZE_TO_POINTER(tar_addr)); } else { z_trace( "invalid instruction in linear disassembly " "(target): %#lx", tmp_cur_addr); valid_bb = false; break; } } } // TODO: do not forget cjmp and loop's false branch // update instruction g_queue_push_tail(tmp_insts, GSIZE_TO_POINTER(tmp_cur_addr)); // update tmp_cur_addr tmp_cur_addr += inst->size; // if inst is terminator, break temporary try if (z_capstone_is_terminator(inst)) { break; } } while (tmp_cur_addr < text_addr + text_size); if (valid_bb) { // step (2.2): if valid, update bbs and insts, and update cur_addr. // Note that original cur_addr is another bb entrypoint. g_queue_push_tail(bbs, GSIZE_TO_POINTER(cur_addr)); g_hash_table_insert(d->potential_blocks, GSIZE_TO_POINTER(cur_addr), GSIZE_TO_POINTER(true)); while (!g_queue_is_empty(tmp_bbs)) { addr_t bb_addr = (addr_t)g_queue_pop_head(tmp_bbs); g_queue_push_tail(bbs, GSIZE_TO_POINTER(bb_addr)); g_hash_table_insert(d->potential_blocks, GSIZE_TO_POINTER(bb_addr), GSIZE_TO_POINTER(true)); } while (!g_queue_is_empty(tmp_insts)) { addr_t inst_addr = (addr_t)g_queue_pop_head(tmp_insts); cs_insn *inst = z_disassembler_get_superset_disasm(d, inst_addr); assert(inst); g_hash_table_insert(d->linear_disasm, GSIZE_TO_POINTER(inst_addr), (gpointer)inst); g_hash_table_insert(d->potential_insts, GSIZE_TO_POINTER(inst_addr), (gpointer)inst); } cur_addr = tmp_cur_addr; } else { // setp (2.3): if not valid, inc cur_addr and clear tmp_bbs/_insts g_queue_clear(tmp_bbs); g_queue_clear(tmp_insts); cur_addr += 1; } } g_queue_free(tmp_bbs); g_queue_free(tmp_insts); z_info("we have %ld instruction linearly disassemblied", g_hash_table_size(d->linear_disasm)); z_info("with %ld basic block entrys", g_queue_get_length(bbs)); return bbs; } Z_API GQueue *z_disassembler_recursive_disasm(Disassembler *d, addr_t addr) { assert(d); z_trace("disassemble at %#lx", addr); GQueue *new_bbs = g_queue_new(); // step (0). get .text section range. // We do not disassembly any code outside this range. addr_t text_addr = d->text_addr; size_t text_size = d->text_size; z_trace(".text section: [%#lx, %#lx]", text_addr, text_addr + text_size - 1); if (!((addr >= text_addr) && (addr - text_addr < text_size))) { z_warn("%#lx is out of .text section", addr); return new_bbs; } // step (1). check addr is an new BB (XXX: this might be wrong) if (!g_hash_table_lookup(d->potential_blocks, GSIZE_TO_POINTER(addr))) { g_queue_push_tail(new_bbs, GSIZE_TO_POINTER(addr)); g_hash_table_insert(d->potential_blocks, GSIZE_TO_POINTER(addr), GSIZE_TO_POINTER(true)); } // step (2). init queue GQueue *q = g_queue_new(); g_queue_push_tail(q, GSIZE_TO_POINTER(addr)); // step (3). disassembly until no new target while (!g_queue_is_empty(q)) { // step (3.1). get starting address addr_t bb_addr = (addr_t)g_queue_pop_head(q); addr_t cur_addr = bb_addr; cs_insn *inst = NULL; z_trace("recursive disassembly: BB address [%#lx]", bb_addr); // step (3.2). disassembly basic block while (true) { // [1]. check whether this region is disassembled if (g_hash_table_lookup(d->potential_insts, GSIZE_TO_POINTER(cur_addr))) { break; } // [2]. get corresponding instruction cs_insn *tmp = z_disassembler_get_superset_disasm(d, cur_addr); // [3]. check whether it is a valid instruction if (tmp == NULL) { z_warn("go into an invalid address: %#lx", cur_addr); if (inst != NULL) { z_warn("previous instruction " CS_SHOW_INST(inst)); } break; } // [4]. add into recursive_disasm and update potential instruction inst = tmp; z_trace("recursive disassembly " CS_SHOW_INST(inst)); g_hash_table_insert(d->recursive_disasm, GSIZE_TO_POINTER(cur_addr), (gpointer)inst); g_hash_table_insert(d->potential_insts, GSIZE_TO_POINTER(cur_addr), (gpointer)inst); // [5]. analyze instruction group addr_t target_addrs[2] = {INVALID_ADDR, INVALID_ADDR}; bool do_more = __disassembler_analyze_inst(inst, target_addrs); z_trace("find target addresss: %#lx %#lx", target_addrs[0], target_addrs[1]); // [6]. update target for (int i = 0; i < 2; i++) { addr_t target_addr = target_addrs[i]; if (target_addr >= text_addr && target_addr - text_addr < text_size) { g_queue_push_tail(q, GSIZE_TO_POINTER(target_addr)); z_trace("find new target: %#lx", target_addr); if (!g_hash_table_lookup(d->potential_blocks, GSIZE_TO_POINTER(target_addr))) { g_queue_push_tail(new_bbs, GSIZE_TO_POINTER(target_addr)); g_hash_table_insert(d->potential_blocks, GSIZE_TO_POINTER(target_addr), GSIZE_TO_POINTER(true)); } } } // [7]. update cur_addr cur_addr += inst->size; // [8]. break if needed if (!do_more) { break; } } } // step (4). free queue g_queue_free(q); // step (5). output how many instruction are correctly disassembly z_info("number of new basic blocks : %ld", g_queue_get_length(new_bbs)); z_info("number of rewritten instructions: %ld", g_hash_table_size(d->recursive_disasm)); return new_bbs; } // update superset disasm Z_API const cs_insn *z_disassembler_update_superset_disasm(Disassembler *d, addr_t addr) { const cs_insn *res = NULL; addr_t text_addr = d->text_addr; size_t text_size = d->text_size; if (addr < text_addr || addr >= text_addr + text_size) { EXITME("try to re-disasm an invalid address: %#lx", addr); } if (z_disassembler_is_potential_inst_entrypoint(d, addr)) { EXITME("try to re-disasm a validated address: %#lx", addr); } ELF *e = z_binary_get_elf(d->binary); Rptr *ptr = z_elf_vaddr2ptr(e, addr); CS_DISASM(ptr, addr, 1); if (cs_count == 1) { // update superset disassembly // XXX: the z_ucfg_analyzer_add_inst must be placed before // g_hash_table_insert, as the g_hash_table_insert may free the original // instruction z_ucfg_analyzer_add_inst(d->ucfg_analyzer, addr, cs_inst, true); g_hash_table_insert(d->superset_disasm, GSIZE_TO_POINTER(addr), (gpointer)cs_inst); res = cs_inst; // update backup if (d->text_backup) { size_t off = addr - text_addr; memcpy(d->text_backup + off, res->bytes, res->size); } cs_inst = NULL; // avoid double free } else { EXITME("invalid instruction at %#lx", addr); } z_rptr_destroy(ptr); assert(res != NULL); return res; } Z_API cs_insn *z_disassembler_get_superset_disasm(Disassembler *d, addr_t addr) { cs_insn *inst = (cs_insn *)g_hash_table_lookup(d->superset_disasm, GSIZE_TO_POINTER(addr)); // check whether we need to update superset disasm if (d->text_backup && (!inst)) { // step(1). check addr in .text (we only consider code in .text) addr_t text_addr = d->text_addr; size_t text_size = d->text_size; if (addr < text_addr || addr >= text_addr + text_size) { return NULL; } // step(2). disasm non-disassembled instruction size_t off1 = addr - text_addr; size_t off2 = text_size - off1; CS_DISASM_RAW(d->text_backup + off1, off2, addr, 1); if (cs_count == 1) { z_ucfg_analyzer_add_inst(d->ucfg_analyzer, addr, cs_inst, false); g_hash_table_insert(d->superset_disasm, GSIZE_TO_POINTER(addr), (gpointer)cs_inst); z_trace("superset disassembly " CS_SHOW_INST(cs_inst)); inst = (cs_insn *)cs_inst; cs_inst = NULL; // avoid double free } } return inst; } Z_API cs_insn *z_disassembler_get_recursive_disasm(Disassembler *d, addr_t addr) { return (cs_insn *)g_hash_table_lookup(d->recursive_disasm, GSIZE_TO_POINTER(addr)); } Z_API cs_insn *z_disassembler_get_linear_disasm(Disassembler *d, addr_t addr) { return (cs_insn *)g_hash_table_lookup(d->linear_disasm, GSIZE_TO_POINTER(addr)); } Z_API bool z_disassembler_is_potential_block_entrypoint(Disassembler *d, addr_t addr) { return !!g_hash_table_lookup(d->potential_blocks, GSIZE_TO_POINTER(addr)); } Z_API bool z_disassembler_is_potential_inst_entrypoint(Disassembler *d, addr_t addr) { return !!g_hash_table_lookup(d->potential_insts, GSIZE_TO_POINTER(addr)); } Z_API bool z_disassembler_is_within_disasm_range(Disassembler *d, addr_t addr) { return !!(addr >= d->text_addr && addr < (d->text_addr + d->text_size)); } Z_API Buffer *z_disassembler_get_occluded_addrs(Disassembler *d, addr_t addr) { cs_insn *inst = z_disassembler_get_superset_disasm(d, addr); if (!inst) { return NULL; } if (!z_addr_dict_exist(d->occ_addrs, addr)) { // occluded address hasn't been analyzed z_addr_dict_set(d->occ_addrs, addr, z_buffer_create(NULL, 0)); // note that the longest x86/64 instruction is 15-bytes for (addr_t occ_addr = addr - 14; occ_addr < addr + inst->size; occ_addr++) { cs_insn *occ_inst = z_disassembler_get_superset_disasm(d, occ_addr); if (!occ_inst) { continue; } if (occ_addr < addr && occ_addr + occ_inst->size > addr) { goto SUCC; } if (occ_addr > addr && addr + inst->size > occ_addr) { goto SUCC; } continue; SUCC: z_buffer_append_raw(z_addr_dict_get(d->occ_addrs, addr), (uint8_t *)&occ_addr, sizeof(occ_addr)); } } return z_addr_dict_get(d->occ_addrs, addr); } Z_API bool z_disassembler_fully_support_prob_disasm(Disassembler *d) { return !z_strcmp("ProbDisassembler", STRUCT_TYPE(d->prob_disasm)); } #define __DISASSEMBLER_DECLARE_SUCC_AND_PRED(etype, rtype) \ Z_API Buffer *z_disassembler_get_##etype##_##rtype(Disassembler *d, \ addr_t addr) { \ /* force superset disasm */ \ if (d->text_backup) { \ z_disassembler_get_superset_disasm(d, addr); \ } \ \ return z_ucfg_analyzer_get_##etype##_##rtype(d->ucfg_analyzer, addr); \ } __DISASSEMBLER_DECLARE_SUCC_AND_PRED(direct, predecessors); __DISASSEMBLER_DECLARE_SUCC_AND_PRED(direct, successors); __DISASSEMBLER_DECLARE_SUCC_AND_PRED(intra, predecessors); __DISASSEMBLER_DECLARE_SUCC_AND_PRED(intra, successors); __DISASSEMBLER_DECLARE_SUCC_AND_PRED(all, predecessors); __DISASSEMBLER_DECLARE_SUCC_AND_PRED(all, successors); #undef __DISASSEMBLER_DECLARE_SUCC_AND_PRED ================================================ FILE: src/disassembler.h ================================================ /* * disassembler.h * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #ifndef __DISASSEMBLER_H #define __DISASSEMBLER_H #include "address_dictionary.h" #include "binary.h" #include "buffer.h" #include "config.h" #include "interval_splay.h" #include "sys_optarg.h" #include "ucfg_analyzer.h" #include #include STRUCT(Disassembler, { // Binary which needs disassembly Binary *binary; // .text info addr_t text_addr; size_t text_size; uint8_t *text_backup; // Disassembly GHashTable *superset_disasm; GHashTable *recursive_disasm; GHashTable *linear_disasm; PhantomType *prob_disasm; // Occluded address AddrDictFast(Buffer *, occ_addrs); // Pdisasm enable? bool enable_pdisasm; /* * Potential information. * These information is collected by linear and recursive disassembly. But * due to the inlined data, non-return function, or any other incomplete * analysis result, these information may be wrong. */ // Entrypoints of *confidentially* disassemblied instructions GHashTable *potential_insts; // Entrypoints of *confidentially* disassemblied basic blocks GHashTable *potential_blocks; // Light-weight instruction-level analyzer; UCFG_Analyzer *ucfg_analyzer; // rewriting optargs RewritingOptArgs *opts; }); /* * Getter and Setter */ DECLARE_GETTER(Disassembler, disassembler, Binary *, binary); DECLARE_GETTER(Disassembler, disassembler, UCFG_Analyzer *, ucfg_analyzer); /* * Create a disassembler */ Z_API Disassembler *z_disassembler_create(Binary *b, RewritingOptArgs *opts); /* * Destroy a disassembler */ Z_API void z_disassembler_destroy(Disassembler *d); /* * [P-Disasm API] * Return the probability of being an instruction entrypoint for the given * address. * * Return value: * P = 1.0: be very confident that addr is an instruction entrypoint * 0.0 < P < 1.0: based on P, greater P means higer confidence * P = 0.0: be very confident that addr is not an instruction entrypoint * P = -0.0: we have **very** strong evidence it is not an entrypoint */ Z_API double128_t z_disassembler_get_prob_disasm(Disassembler *d, addr_t addr); Z_API void z_diassembler_update_prob_disasm(Disassembler *d, addr_t addr, bool is_inst); /* * Probabilistic disassemble the whole binary */ Z_API void z_disassembler_prob_disasm(Disassembler *d); /* * Get internal informaiton of probabilistic disassemble (in most case, this API * is used for debugging) */ Z_API void z_disassembler_get_prob_disasm_internal( Disassembler *d, addr_t addr, cs_insn **inst, uint32_t *scc_id, double128_t *inst_hint, double128_t *inst_lost, double128_t *data_hint, double128_t *D, double128_t *P); /* * Check whether disassembler fully support prob-disasm */ Z_API bool z_disassembler_fully_support_prob_disasm(Disassembler *d); /* * Superset disassemble one instruction at given address */ Z_API const cs_insn *z_disassembler_update_superset_disasm(Disassembler *d, addr_t addr); /* * Show the occludeds addresses of a given address */ Z_API Buffer *z_disassembler_get_occluded_addrs(Disassembler *d, addr_t addr); /* * Recursive disassemble from given address */ // XXX: note that currently z_disassembler_recursive_disasm can only be called // by z_rewriter_rewrite. // TODO: it is a fault of our system design. We need to fix such strong // coupling. Z_API GQueue *z_disassembler_recursive_disasm(Disassembler *d, addr_t addr); /* * Linear disassemble the whole binary */ Z_API GQueue *z_disassembler_linear_disasm(Disassembler *d); /* * Get linear disasm */ Z_API cs_insn *z_disassembler_get_linear_disasm(Disassembler *d, addr_t addr); /* * Get recursive disasm */ Z_API cs_insn *z_disassembler_get_recursive_disasm(Disassembler *d, addr_t addr); /* * Get superset disasm */ Z_API cs_insn *z_disassembler_get_superset_disasm(Disassembler *d, addr_t addr); /* * Check whether address is a potential potential entrypoint */ Z_API bool z_disassembler_is_potential_block_entrypoint(Disassembler *d, addr_t addr); /* * Check whether address is a potential inst entrypoint */ Z_API bool z_disassembler_is_potential_inst_entrypoint(Disassembler *d, addr_t addr); /* * Check whether address is within disassemble range */ Z_API bool z_disassembler_is_within_disasm_range(Disassembler *d, addr_t addr); #define __DISASSEMBLER_DEFINE_SUCC_AND_PRED(etype, rtype) \ Z_API Buffer *z_disassembler_get_##etype##_##rtype(Disassembler *d, \ addr_t addr) __DISASSEMBLER_DEFINE_SUCC_AND_PRED(direct, predecessors); __DISASSEMBLER_DEFINE_SUCC_AND_PRED(direct, successors); __DISASSEMBLER_DEFINE_SUCC_AND_PRED(intra, predecessors); __DISASSEMBLER_DEFINE_SUCC_AND_PRED(intra, successors); __DISASSEMBLER_DEFINE_SUCC_AND_PRED(all, predecessors); __DISASSEMBLER_DEFINE_SUCC_AND_PRED(all, successors); #undef __DISASSEMBLER_DEFINE_SUCC_AND_PRED #endif ================================================ FILE: src/elf_.c ================================================ /* * __elf_parse_relocation in elf_.c * * URL: https://github.com/kubo/plthook * * ------------------------------------------------------ * * Copyright 2013-2019 Kubo Takehiro * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO * EVENT SHALL OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * The views and conclusions contained in the software and documentation are * those of the authors and should not be interpreted as representing official * policies, either expressed or implied, of the authors. * */ // XXX: __elf_parse_relocation is modified based on // https://github.com/kubo/plthook/blob/master/plthook_elf.c /* * other parts of elf_.c * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ // XXX: note that we have multiple streams under an ELF file. Make sure you are // handling the correct stream(s) #include "elf_.h" #include "buffer.h" #include "capstone_.h" #include "crs_config.h" #include "interval_splay.h" #include "loader.h" #include "mem_file.h" #include "restricted_ptr.h" #include "utils.h" #include #include #define EXTEND_ZONE_NUM 1 #define ZONE_SIZE PAGE_SIZE #define GUARD_SIZE 8 // it seems DEBUG version has much bigger fork_server and loader #ifdef DEBUG #define LOADER_ZONE_SIZE (ZONE_SIZE * 3) #else #define LOADER_ZONE_SIZE (ZONE_SIZE * 2) #endif #define TRAMPOLINES_INIT_SIZE (ZONE_SIZE * 0x100) #define RETADDR_MAPPING_INIT_SIZE ZONE_SIZE /* * Define special getter and setter for ELF */ // XXX: such elements all locate on the main stream #define ELF_DEFINE_SETTER(OTYPE, ONAME, FTYPE, FNAME) \ Z_API void z_##ONAME##_##set_##FNAME(OTYPE *ONAME, FTYPE FNAME) { \ assert(ONAME != NULL); \ if (FNAME == NULL) \ ONAME->FNAME##_off = SIZE_MAX; \ else { \ ONAME->FNAME##_off = \ ((uint8_t *)FNAME) - z_mem_file_get_raw_buf(ONAME->stream); \ assert(ONAME->FNAME##_off < z_mem_file_get_size(ONAME->stream)); \ } \ } #define ELF_DEFINE_GETTER(OTYPE, ONAME, FTYPE, FNAME) \ Z_API FTYPE z_##ONAME##_##get_##FNAME(OTYPE *ONAME) { \ assert(ONAME != NULL); \ if (ONAME->FNAME##_off == SIZE_MAX) \ return NULL; \ else \ return (FTYPE)(z_mem_file_get_raw_buf(ONAME->stream) + \ ONAME->FNAME##_off); \ } /* * Private structure for vmapping */ STRUCT(FChunk, { _MEM_FILE *stream; size_t offset; size_t size; bool extendable; }); DEFINE_GETTER(FChunk, fchunk, _MEM_FILE *, stream); DEFINE_GETTER(FChunk, fchunk, bool, extendable); DEFINE_GETTER(FChunk, fchunk, size_t, offset); DEFINE_GETTER(FChunk, fchunk, size_t, size); DEFINE_SETTER(FChunk, fchunk, size_t, size); Z_PRIVATE FChunk *z_fchunk_create(_MEM_FILE *stream, size_t offset, size_t size, bool extendable) { FChunk *fc = STRUCT_ALLOC(FChunk); fc->stream = stream; fc->offset = offset; fc->size = size; fc->extendable = extendable; return fc; } Z_PRIVATE void z_fchunk_destroy(FChunk *fc) { z_free(fc); } /* * Find Elf64_Dyn by tag name */ Z_PRIVATE Elf64_Dyn *__elf_find_dyn_by_tag(ELF *e, Elf64_Xword tag); /* * Fine Segment by virtual addr */ Z_PRIVATE Snode *__elf_find_segment_by_vaddr(ELF *e, addr_t vaddr); /* * Open a file (ori_filename) and load data into _MEM_FILE */ Z_PRIVATE _MEM_FILE *__elf_open_file(ELF *e, const char *ori_filename); /* * Valid the header of given ELF */ Z_PRIVATE void __elf_validate_header(_MEM_FILE *stream); /* * Parse the program header */ Z_PRIVATE void __elf_parse_phdr(ELF *e); /* * Parse the section header */ Z_PRIVATE void __elf_parse_shdr(ELF *e); /* * Get relocation information */ Z_PRIVATE void __elf_parse_relocation(ELF *e); /* * Detect and parse main function */ Z_PRIVATE void __elf_parse_main(ELF *e); /* * Set relocation-preset for given ELF */ Z_PRIVATE void __elf_set_relro(ELF *e); /* * Set virtual mapping for given ELF */ // Note that after this function, the main stream will be splitted into two // pieces Z_PRIVATE void __elf_set_virtual_mapping(ELF *e, const char *filename); /* * Rewrite PT_NOTE */ Z_PRIVATE void __elf_rewrite_pt_note(ELF *e); /* * Extend additional zones onto ELF */ Z_PRIVATE void __elf_extend_zones(ELF *e); /* * Setup lookup table */ Z_PRIVATE void __elf_setup_lookup_table(ELF *e, const char *filename); /* * Setup retaddr mapping */ Z_PRIVATE void __elf_setup_retaddr_mapping(ELF *e, const char *filename); /* * Setup trampolines (shadow code) */ Z_PRIVATE void __elf_setup_trampolines(ELF *e, const char *filename); /* * Setup shared .text section */ Z_PRIVATE void __elf_setup_shared_text(ELF *e, const char *filename); /* * Setup pipeline file */ Z_PRIVATE void __elf_setup_pipe(ELF *e, const char *filename); // TODO: raw pointer might lead to overflow, but we need effecience. // In the furture, we need a better trade-off. // Currently, we have checked the access will not be out of boundary in advance. // Make sure all your raw-pointer access is valid. /* * Get pointer from offset */ Z_PRIVATE void *__elf_stream_off2ptr(_MEM_FILE *stream, size_t off); /* * Get offset from virtual address. * (the caller must know the addr is on which stream) */ Z_PRIVATE size_t __elf_stream_vaddr2off(ELF *e, addr_t addr); /* * Setter and Getter */ ELF_DEFINE_SETTER(ELF, elf, Elf64_Ehdr *, ehdr); ELF_DEFINE_SETTER(ELF, elf, Elf64_Phdr *, phdr_note); ELF_DEFINE_SETTER(ELF, elf, Elf64_Phdr *, phdr_dynamic); ELF_DEFINE_SETTER(ELF, elf, Elf64_Shdr *, shdr_shstrtab); ELF_DEFINE_SETTER(ELF, elf, Elf64_Shdr *, shdr_text); ELF_DEFINE_SETTER(ELF, elf, Elf64_Shdr *, shdr_init); ELF_DEFINE_SETTER(ELF, elf, Elf64_Shdr *, shdr_fini); ELF_DEFINE_SETTER(ELF, elf, Elf64_Shdr *, shdr_init_array); ELF_DEFINE_SETTER(ELF, elf, Elf64_Shdr *, shdr_fini_array); ELF_DEFINE_SETTER(ELF, elf, Elf64_Shdr *, shdr_plt); ELF_DEFINE_SETTER(ELF, elf, Elf64_Shdr *, shdr_plt_got); ELF_DEFINE_SETTER(ELF, elf, Elf64_Shdr *, shdr_plt_sec); #define __WAIT_STREAM_COUNT 5 #define __WAIT_STREAM_INTERVAL 1 #define __WAIT_STREAM(fname) \ do { \ size_t __n = __WAIT_STREAM_COUNT; \ while ((__n--) && (access((fname), W_OK) == -1) && \ (errno == ETXTBSY)) { \ z_warn("underlying binary (%s) is busy, wait for % sec", (fname), \ __WAIT_STREAM_INTERVAL); \ sleep(__WAIT_STREAM_INTERVAL); \ } \ } while (0) OVERLOAD_SETTER(ELF, elf, ELFState, state) { if (state & ELFSTATE_DISABLE) { // if is used to disable associated states state = state ^ ELFSTATE_DISABLE; if (state & ELFSTATE_CONNECTED) { __WAIT_STREAM(elf->tmpnam); z_mem_file_suspend(elf->stream); } elf->state &= (state ^ ELFSTATE_MASK); } else { if (state & ELFSTATE_CONNECTED) { __WAIT_STREAM(elf->tmpnam); z_mem_file_resume(elf->stream); } elf->state |= state; } } #undef __WAIT_STREAM_COUNT #undef __WAIT_STREAM_INTERVAL #undef __WAIT_STREAM ELF_DEFINE_GETTER(ELF, elf, Elf64_Ehdr *, ehdr); ELF_DEFINE_GETTER(ELF, elf, Elf64_Phdr *, phdr_note); ELF_DEFINE_GETTER(ELF, elf, Elf64_Phdr *, phdr_dynamic); ELF_DEFINE_GETTER(ELF, elf, Elf64_Shdr *, shdr_shstrtab); ELF_DEFINE_GETTER(ELF, elf, Elf64_Shdr *, shdr_text); ELF_DEFINE_GETTER(ELF, elf, Elf64_Shdr *, shdr_init); ELF_DEFINE_GETTER(ELF, elf, Elf64_Shdr *, shdr_fini); ELF_DEFINE_GETTER(ELF, elf, Elf64_Shdr *, shdr_init_array); ELF_DEFINE_GETTER(ELF, elf, Elf64_Shdr *, shdr_fini_array); ELF_DEFINE_GETTER(ELF, elf, Elf64_Shdr *, shdr_plt); ELF_DEFINE_GETTER(ELF, elf, Elf64_Shdr *, shdr_plt_got); ELF_DEFINE_GETTER(ELF, elf, Elf64_Shdr *, shdr_plt_sec); DEFINE_GETTER(ELF, elf, addr_t, loader_addr); DEFINE_GETTER(ELF, elf, addr_t, trampolines_addr); DEFINE_GETTER(ELF, elf, addr_t, lookup_table_addr); DEFINE_GETTER(ELF, elf, addr_t, shared_text_addr); DEFINE_GETTER(ELF, elf, addr_t, retaddr_mapping_addr); DEFINE_GETTER(ELF, elf, bool, is_pie); DEFINE_GETTER(ELF, elf, addr_t, ori_entry); DEFINE_GETTER(ELF, elf, const char *, lookup_tabname); DEFINE_GETTER(ELF, elf, const char *, trampolines_name); DEFINE_GETTER(ELF, elf, const char *, shared_text_name); DEFINE_GETTER(ELF, elf, const char *, pipe_filename); DEFINE_GETTER(ELF, elf, const char *, retaddr_mapping_name); OVERLOAD_GETTER(ELF, elf, size_t, plt_n) { return g_hash_table_size(elf->plt); } OVERLOAD_GETTER(ELF, elf, addr_t, main) { if (!elf->detect_main) { EXITME("the main function has not been automatically detected"); } return elf->main; } OVERLOAD_GETTER(ELF, elf, addr_t, init) { if (!elf->detect_main) { EXITME("the main function has not been automatically detected"); } return elf->init; } OVERLOAD_GETTER(ELF, elf, addr_t, fini) { if (!elf->detect_main) { EXITME("the main function has not been automatically detected"); } return elf->fini; } OVERLOAD_GETTER(ELF, elf, addr_t, load_main) { if (!elf->detect_main) { EXITME("the main function has not been automatically detected"); } return elf->load_main; } OVERLOAD_GETTER(ELF, elf, addr_t, load_init) { if (!elf->detect_main) { EXITME("the main function has not been automatically detected"); } return elf->load_init; } OVERLOAD_GETTER(ELF, elf, addr_t, load_fini) { if (!elf->detect_main) { EXITME("the main function has not been automatically detected"); } return elf->load_fini; } Z_PRIVATE size_t __elf_stream_vaddr2off(ELF *e, addr_t addr) { // Get corresponding segment Snode *segment = __elf_find_segment_by_vaddr(e, addr); if (segment == NULL) { EXITME("invalid virtual address [%#lx]", addr); } // Create Rptr FChunk *fc = (FChunk *)z_snode_get_data(segment); if (fc == NULL || z_strcmp(STRUCT_TYPE(fc), "FChunk")) { EXITME("get address into dynamically allocated space"); } size_t off1 = addr - z_snode_get_lower_bound(segment); size_t off2 = z_fchunk_get_offset(fc); if (off1 >= z_fchunk_get_size(fc)) { EXITME("trying to read on zero-padding region"); } return off1 + off2; } Z_PRIVATE _MEM_FILE *__elf_open_file(ELF *e, const char *ori_filename) { Buffer *buf = z_buffer_read_file(ori_filename); const char *buf_raw_buf = (const char *)z_buffer_get_raw_buf(buf); size_t buf_size = z_buffer_get_size(buf); // magic check for re-patch if (memmem(buf_raw_buf, buf_size, MAGIC_STRING, z_strlen(MAGIC_STRING))) { EXITME("try to re-instrument file \"%s\"", ori_filename); } _MEM_FILE *stream = z_mem_file_fopen((const char *)e->tmpnam, "w+"); z_mem_file_fwrite((char *)buf_raw_buf, buf_size, sizeof(uint8_t), stream); // generate backup file const char *bak_filename = z_strcat(ori_filename, BACKUP_FILE_SUFFIX); z_buffer_write_file(buf, bak_filename); z_free((char *)bak_filename); z_buffer_destroy(buf); return stream; } Z_PRIVATE void *__elf_stream_off2ptr(_MEM_FILE *stream, size_t off) { assert(stream != NULL); if (z_mem_file_get_size(stream) <= off) { EXITME("invalid offset(%ld) from stream(%ld): %s", off, z_mem_file_get_size(stream), z_mem_file_get_filename(stream)); } return (void *)(z_mem_file_get_raw_buf(stream) + off); } Z_PRIVATE void __elf_rewrite_pt_note(ELF *e) { // XXX: note that rewriter_pt_note should be applied on the main stream. assert(e != NULL); Elf64_Phdr *phdr = z_elf_get_phdr_note(e); phdr->p_type = PT_LOAD; phdr->p_flags = PF_X | PF_R; // XXX: e->loader_addr cannot be on the shared .text stream phdr->p_offset = __elf_stream_vaddr2off(e, e->loader_addr); phdr->p_vaddr = (Elf64_Addr)e->loader_addr; phdr->p_paddr = (Elf64_Addr)NULL; phdr->p_filesz = LOADER_ZONE_SIZE; phdr->p_memsz = LOADER_ZONE_SIZE; phdr->p_align = PAGE_SIZE; } Z_PRIVATE void __elf_setup_pipe(ELF *e, const char *filename) { assert(e != NULL); assert(!z_strchr(filename, '/')); e->pipe_filename = z_strcat(PIPE_FILENAME_PREFIX, filename); return; } Z_PRIVATE void __elf_setup_retaddr_mapping(ELF *e, const char *filename) { assert(e != NULL); // step (0). update retaddr_mapping_addr e->retaddr_mapping_addr = RETADDR_MAPPING_ADDR; // step (1). get filename assert(!z_strchr(filename, '/')); e->retaddr_mapping_name = z_strcat(RETADDR_MAPPING_PREFIX, filename); // step (2). create _MEM_FILE e->retaddr_mapping_stream = z_mem_file_fopen((const char *)e->retaddr_mapping_name, "w+"); z_mem_file_pwrite(e->retaddr_mapping_stream, "", 1, RETADDR_MAPPING_INIT_SIZE - 1); // step (3). insert into virtual mapping Snode *node = NULL; FChunk *fc = z_fchunk_create(e->retaddr_mapping_stream, 0, RETADDR_MAPPING_INIT_SIZE, true); node = z_snode_create(e->retaddr_mapping_addr, RETADDR_MAPPING_INIT_SIZE, (void *)fc, (void (*)(void *))(&z_fchunk_destroy)); if (!z_splay_insert(e->vmapping, node)) { EXITME("overlapped retaddr mapping"); } // step (4). update mmapped informaiton node = z_snode_create(e->retaddr_mapping_addr, RETADDR_MAPPING_INIT_SIZE, NULL, NULL); if (!z_splay_insert(e->mmapped_pages, node)) { EXITME("overlapped retaddr mapping"); } } Z_PRIVATE void __elf_setup_lookup_table(ELF *e, const char *filename) { assert(e != NULL); // step (1). get address e->lookup_table_addr = LOOKUP_TABLE_ADDR; // step (2). get filename assert(!z_strchr(filename, '/')); e->lookup_tabname = z_strcat(LOOKUP_TABNAME_PREFIX, filename); // step (3). create _MEM_FILE e->lookup_table_stream = z_mem_file_fopen((const char *)e->lookup_tabname, "w+"); z_mem_file_fix_size(e->lookup_table_stream, LOOKUP_TABLE_SIZE); z_mem_file_pwrite(e->lookup_table_stream, "", 1, LOOKUP_TABLE_SIZE - 1); // step (4). fill in pre-defined values Elf64_Shdr *text = z_elf_get_shdr_text(e); addr_t text_addr = text->sh_addr; size_t text_size = text->sh_size; addr_t cur_addr = text_addr; int64_t cell_val = -1; for (size_t i = 0; i < LOOKUP_TABLE_CELL_NUM; i++) { cell_val = -1; if (cur_addr < text_addr + text_size) { // For valid address, we initial it as its original value's opposite // value cell_val = -((int64_t)cur_addr); } cell_val &= LOOKUP_TABLE_CELL_MASK; z_mem_file_fwrite((uint8_t *)(&cell_val), sizeof(uint8_t), LOOKUP_TABLE_CELL_SIZE, e->lookup_table_stream); cur_addr += 1; } assert(cell_val == (-1 & LOOKUP_TABLE_CELL_MASK)); // step (5). insert into virtual mapping Snode *node = NULL; FChunk *fc = z_fchunk_create(e->lookup_table_stream, 0, LOOKUP_TABLE_SIZE, false); node = z_snode_create(e->lookup_table_addr, LOOKUP_TABLE_SIZE, (void *)fc, (void (*)(void *))(&z_fchunk_destroy)); if (!z_splay_insert(e->vmapping, node)) { EXITME("overlapped lookup table"); } // step (6). update mmapped informaiton node = z_snode_create(e->lookup_table_addr, LOOKUP_TABLE_SIZE, NULL, NULL); if (!z_splay_insert(e->mmapped_pages, node)) { EXITME("overlapped lookup table"); } } Z_PRIVATE void __elf_setup_shared_text(ELF *e, const char *filename) { assert(e != NULL); // step (0). get .text information Elf64_Shdr *text = z_elf_get_shdr_text(e); addr_t text_addr = text->sh_addr; size_t text_size = text->sh_size; size_t text_offset = text->sh_offset; addr_t aligned_addr = BITS_ALIGN_FLOOR(text_addr, PAGE_SIZE_POW2); size_t aligned_offset = BITS_ALIGN_FLOOR(text_offset, PAGE_SIZE_POW2); size_t aligned_size = BITS_ALIGN_CELL( text_size + text_offset - aligned_offset, PAGE_SIZE_POW2); e->shared_text_addr = aligned_addr; // step (1). get filename assert(!z_strchr(filename, '/')); e->shared_text_name = z_strcat(SHARED_TEXT_PREFIX, filename); // step (2). create _MEM_FILE e->shared_text_stream = z_mem_file_fopen((const char *)e->shared_text_name, "w+"); z_mem_file_fix_size(e->shared_text_stream, aligned_size); z_mem_file_pwrite(e->shared_text_stream, "", 1, aligned_size - 1); // step (3). update data to _MEM_FILE // XXX: note that e->stream is alreay page-aligned, which means the // following memcpy is safe. uint8_t *base = z_mem_file_get_raw_buf(e->stream); uint8_t *src = base + aligned_offset; uint8_t *dst = z_mem_file_get_raw_buf(e->shared_text_stream); memcpy(dst, src, aligned_size); // step (4). generate virtual mapping information FChunk *fc = z_fchunk_create(e->shared_text_stream, 0, aligned_size, false); Snode *node = z_snode_create(aligned_addr, aligned_size, (void *)fc, (void (*)(void *))(&z_fchunk_destroy)); // step (5). insert into virtual mapping if (!z_splay_insert(e->vmapping, node)) { EXITME("overlapped shared .text section"); } // XXX: mapped_pages will be updated in __elf_set_virtual_mapping } Z_PRIVATE void __elf_setup_trampolines(ELF *e, const char *filename) { assert(e != NULL); // step (0). update trampolines_addr e->trampolines_addr = SHADOW_CODE_ADDR; // step (1). get filename assert(!z_strchr(filename, '/')); e->trampolines_name = z_strcat(TRAMPOLINES_NAME_PREFIX, filename); // step (2). create _MEM_FILE e->trampolines_stream = z_mem_file_fopen((const char *)e->trampolines_name, "w+"); z_mem_file_pwrite(e->trampolines_stream, "", 1, TRAMPOLINES_INIT_SIZE - 1); // step (3). insert into virtual mapping Snode *node = NULL; FChunk *fc = z_fchunk_create(e->trampolines_stream, 0, TRAMPOLINES_INIT_SIZE, true); node = z_snode_create(e->trampolines_addr, TRAMPOLINES_INIT_SIZE, (void *)fc, (void (*)(void *))(&z_fchunk_destroy)); if (!z_splay_insert(e->vmapping, node)) { EXITME("overlapped trampolines"); } // step (4). update mmapped informaiton node = z_snode_create(e->trampolines_addr, TRAMPOLINES_INIT_SIZE, NULL, NULL); if (!z_splay_insert(e->mmapped_pages, node)) { EXITME("overlapped trampolines"); } } Z_PRIVATE void __elf_extend_zones(ELF *e) { assert(e != NULL); /* * A trick here to splite amongs new zones is to insert an eight-byte gap at * the end of each zone. */ Snode *node; addr_t vaddr = BITS_ALIGN_CELL(e->max_addr, PAGE_SIZE_POW2); size_t offset = z_mem_file_get_size(e->stream); assert(offset % PAGE_SIZE == 0); size_t *zones[EXTEND_ZONE_NUM] = {&e->loader_addr}; addr_t zones_addr[EXTEND_ZONE_NUM] = {vaddr}; size_t zones_size[EXTEND_ZONE_NUM] = {LOADER_ZONE_SIZE}; size_t zones_guard[EXTEND_ZONE_NUM] = {GUARD_SIZE}; // Set zones for (size_t i = 0; i < EXTEND_ZONE_NUM; i++) { size_t zone_size = zones_size[i]; size_t zone_guard = zones_guard[i]; vaddr = zones_addr[i]; *zones[i] = vaddr; FChunk *fc = z_fchunk_create(e->stream, offset, zone_size - zone_guard, false); node = z_snode_create(vaddr, zone_size - zone_guard, (void *)fc, (void (*)(void *))(&z_fchunk_destroy)); if (!z_splay_insert(e->vmapping, node)) { EXITME("overlapped zones"); } z_info("zone base at %#lx with offset %#lx", vaddr, offset); assert(vaddr % PAGE_SIZE == 0); assert(zone_size % PAGE_SIZE == 0); node = z_snode_create(vaddr, zone_size, NULL, NULL); if (!z_splay_insert(e->mmapped_pages, node)) { EXITME("overlapped zones"); } offset += zone_size; } // Extend file z_mem_file_pwrite(e->stream, "", 1, offset - 1); } Z_PRIVATE Snode *__elf_find_segment_by_vaddr(ELF *e, addr_t vaddr) { Snode *segment = z_splay_search(e->vmapping, vaddr); if (segment == NULL) { return NULL; } assert(vaddr >= z_snode_get_lower_bound(segment)); assert(vaddr <= z_snode_get_upper_bound(segment)); return segment; } Z_PRIVATE Elf64_Dyn *__elf_find_dyn_by_tag(ELF *e, Elf64_Xword tag) { Elf64_Phdr *dynamic_phdr = z_elf_get_phdr_dynamic(e); if (z_unlikely(!dynamic_phdr)) { EXITME("dynamic segment not found"); } // get the first dyn // XXX: note that it is safe to use __elf_stream_off2ptr Elf64_Dyn *dyn = (Elf64_Dyn *)__elf_stream_off2ptr(e->stream, dynamic_phdr->p_offset); while (dyn->d_tag != DT_NULL) { if (dyn->d_tag == tag) { return dyn; } dyn++; } return (tag == DT_NULL ? dyn : NULL); } Z_RESERVED Z_PRIVATE void __elf_set_relro(ELF *e) { assert(e != NULL); Elf64_Phdr *dynamic_phdr = z_elf_get_phdr_dynamic(e); if (dynamic_phdr != NULL) { bool is_relro = false; Elf64_Dyn *dt_debug = NULL; Elf64_Dyn *iter = (Elf64_Dyn *)__elf_stream_off2ptr( e->stream, dynamic_phdr->p_offset); while (iter->d_tag != DT_NULL) { z_trace( "find dynamic section with d_tag: %#lx =? %#lx, and d_un " "%p", iter->d_tag, DT_BIND_NOW, iter->d_un); if (iter->d_tag == DT_DEBUG) dt_debug = iter; if (iter->d_tag == DT_BIND_NOW) { is_relro = true; break; } if (iter->d_tag == DT_FLAGS && (iter->d_un.d_val & DF_BIND_NOW) != 0) { is_relro = true; break; } iter++; } if (is_relro) { z_info("binary is already RELRO"); } else { if (dt_debug) { z_info( "binary is not RELRO. Hence, we patch it into DT_DEBUG " "entry."); dt_debug->d_tag = DT_FLAGS; dt_debug->d_un.d_val = DF_BIND_NOW; } else { z_warn( "binary is not RELRO and has no DT_DEBUG entry. Hence, " "we failed to patch it"); } } } else { z_info("statically linked binary"); } } #define __NUMBER_OF_GOTS 2 #define __NUMBER_OF_PLTS 3 // TODO: make sure PIE binaries would not cause any trouble // TODO: if any section is missed, directly return errors instead of EXITME Z_PRIVATE void __elf_parse_relocation(ELF *e) { // XXX: we use z_elf_read_all to avoid inter-stream data // step (0). init related field of ELF and return if statically-linked e->got = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL); e->plt = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL); if (!z_elf_get_phdr_dynamic(e)) { z_info("statically-linked binary does not have relocation information"); return; } /* * step (1). collect necessary information */ const Elf64_Dyn *dyn = NULL; const Elf64_Sym *dynsym = NULL; const char *dynstr = NULL; size_t dynstr_size = 0; const Elf64_Rela *rela_plt = NULL; size_t rela_plt_cnt = 0; const Elf64_Rela *rela_dyn = NULL; size_t rela_dyn_cnt = 0; // .dynstr size dyn = __elf_find_dyn_by_tag(e, DT_STRSZ); if (!dyn) { EXITME("fail to find DT_STRSZ"); } dynstr_size = dyn->d_un.d_val; // .dynstr section dyn = __elf_find_dyn_by_tag(e, DT_STRTAB); if (!dyn) { EXITME("fail to find DT_STRTAB"); } dynstr = z_alloc(dynstr_size + 1, sizeof(char)); if (z_elf_read_all(e, dyn->d_un.d_ptr, dynstr_size, (void *)dynstr) != dynstr_size) { EXITME("invalid synstr_size"); } // .rela.plt section dyn = __elf_find_dyn_by_tag(e, DT_JMPREL); if (dyn) { addr_t rela_plt_addr = dyn->d_un.d_ptr; dyn = __elf_find_dyn_by_tag(e, DT_PLTRELSZ); if (!dyn) { EXITME("fail to find DT_PLTRELSZ when DT_JMPREL is found"); } rela_plt_cnt = dyn->d_un.d_val / sizeof(Elf64_Rela); rela_plt = z_alloc(rela_plt_cnt, sizeof(Elf64_Rela)); if (z_elf_read_all(e, rela_plt_addr, dyn->d_un.d_val, (void *)rela_plt) != dyn->d_un.d_val) { EXITME("invalid size of .rela.plt"); } if (!z_elf_get_shdr_plt(e)) { EXITME("fail to find .plt section when DT_JMPREL is found"); } } // .rela.dyn section dyn = __elf_find_dyn_by_tag(e, DT_RELA); if (dyn) { addr_t rela_dyn_addr = dyn->d_un.d_ptr; size_t total_size = 0, elem_size = 0; dyn = __elf_find_dyn_by_tag(e, DT_RELASZ); if (!dyn) { EXITME("fail to find DT_RELASZ when DT_RELA is found"); } total_size = dyn->d_un.d_val; dyn = __elf_find_dyn_by_tag(e, DT_RELAENT); if (!dyn) { EXITME("fail to find DT_RELAENT when DT_RELA is found"); } elem_size = dyn->d_un.d_val; rela_dyn_cnt = total_size / elem_size; rela_dyn = z_alloc(rela_dyn_cnt, elem_size); if (z_elf_read_all(e, rela_dyn_addr, total_size, (void *)rela_dyn) != total_size) { EXITME("invalid size of .rela.dyn"); } } // check .rela.plt and .rela.dyn if (!rela_plt && !rela_dyn) { EXITME("fail to find neither DT_JMPREL nor DT_RELA"); } const Elf64_Rela *gots[__NUMBER_OF_GOTS] = {rela_plt, rela_dyn}; const size_t gots_cnt[__NUMBER_OF_GOTS] = {rela_plt_cnt, rela_dyn_cnt}; const int gots_type[__NUMBER_OF_GOTS] = {R_X86_64_JUMP_SLOT, R_X86_64_GLOB_DAT}; const char *gots_str[__NUMBER_OF_GOTS] = {".rela.plt", ".rela.dyn"}; // let first quickly go though how many symbols we need size_t max_idx = 0; for (size_t k = 0; k < __NUMBER_OF_GOTS; k++) { const Elf64_Rela *got = gots[k]; const size_t cnt = gots_cnt[k]; const int type = gots_type[k]; for (size_t i = 0; i < cnt; i++, got++) { if (ELF64_R_TYPE(got->r_info) == type) { size_t idx = ELF64_R_SYM(got->r_info); if (idx > max_idx) { max_idx = idx; } } } } z_info("require %d symbols", max_idx + 1); // check sizeof(Elf64_Sym) dyn = __elf_find_dyn_by_tag(e, DT_SYMENT); if (!dyn) { EXITME("fail to find DT_SYMTAB"); } if (dyn->d_un.d_val != sizeof(Elf64_Sym)) { EXITME("inconsistent size of Elf64_Sym: %#lx v/s %#lx", dyn->d_un.d_val, sizeof(Elf64_Sym)); } // .dynsym section dyn = __elf_find_dyn_by_tag(e, DT_SYMTAB); if (!dyn) { EXITME("fail to find DT_SYMTAB"); } dynsym = z_alloc(max_idx + 1, sizeof(Elf64_Sym)); if (z_elf_read_all(e, dyn->d_un.d_ptr, sizeof(Elf64_Sym) * (max_idx + 1), (void *)dynsym) != sizeof(Elf64_Sym) * (max_idx + 1)) { EXITME("symtab does not hold enough symbols"); } /* * step (2). collect GOT information */ for (size_t k = 0; k < __NUMBER_OF_GOTS; k++) { const Elf64_Rela *got = gots[k]; const size_t cnt = gots_cnt[k]; const int type = gots_type[k]; const char *str = gots_str[k]; for (size_t i = 0; i < cnt; i++, got++) { if (ELF64_R_TYPE(got->r_info) == type) { // get function name size_t idx = ELF64_R_SYM(got->r_info); idx = dynsym[idx].st_name; if (idx >= dynstr_size) { EXITME("too big section header string table index: %#lx", idx); } const char *func_name = dynstr + idx; // get function address const addr_t func_addr = (addr_t)(got->r_offset); const LFuncInfo *func_info = LB_QUERY(func_name); z_info("function GOT [%s]: %s @ %#lx | %s | %s ", str, func_name, func_addr, (func_info->cfg_info == LCFG_UNK ? COLOR(YELLOW, "unknown") : (func_info->cfg_info == LCFG_OBJ ? "object" : (func_info->cfg_info == LCFG_RET ? COLOR(GREEN, "returnable") : COLOR(RED, "terminated")))), (func_info->ra_info == LRA_UNK ? COLOR(YELLOW, "unknown") : (func_info->ra_info == LRA_OBJ ? "object" : (func_info->ra_info == LRA_USED ? COLOR(RED, "used") : COLOR(GREEN, "unused"))))); g_hash_table_insert(e->got, GSIZE_TO_POINTER(func_addr), (gpointer)func_info); } } } /* * step (3). collect PLT information */ // we check .plt and .plt.got sections by check the instruction Elf64_Shdr *plts[__NUMBER_OF_PLTS] = {z_elf_get_shdr_plt(e), z_elf_get_shdr_plt_got(e), z_elf_get_shdr_plt_sec(e)}; for (size_t k = 0; k < __NUMBER_OF_PLTS; k++) { Elf64_Shdr *plt = plts[k]; if (!plt) { continue; } addr_t plt_addr = plt->sh_addr; size_t plt_size = plt->sh_size; size_t plt_entsize = plt->sh_entsize; if (!plt_addr || !plt_size) { EXITME("invalid .plt section"); } if (!plt_entsize) { plt_entsize = plt_size; } size_t off = 0; uint8_t *ptr = z_alloc(plt_size, sizeof(uint8_t)); if (z_elf_read_all(e, plt_addr, plt_size, ptr) != plt_size) { EXITME("fail to load data form PLT"); } // TODO: the first element in .plt is reserved for resloving, remove it. while (off < plt_size) { const LFuncInfo *func_info = LB_DEFAULT(); CS_DISASM_RAW(ptr + off, plt_size - off, plt_addr + off, 1); if (cs_inst->id == X86_INS_ENDBR64 && off + cs_inst->size < plt_size) { // XXX: handle intel CET tech. Note that we may need to // carefully design our system about how to handle CET/IBT. size_t endbr64_size = cs_inst->size; CS_DISASM_RAW(ptr + off + endbr64_size, plt_size - off - endbr64_size, plt_addr + off + endbr64_size, 1); } addr_t got_addr = INVALID_ADDR; if (cs_count == 1 && z_capstone_is_pc_related_ujmp(cs_inst, &got_addr)) { assert(got_addr != INVALID_ADDR); const LFuncInfo *got_info = (const LFuncInfo *)g_hash_table_lookup( e->got, GSIZE_TO_POINTER(got_addr)); if (got_info) { func_info = got_info; z_info("function PLT: %s @ %#lx", func_info->name, plt_addr + off); } } g_hash_table_insert(e->plt, GSIZE_TO_POINTER(plt_addr + off), (gpointer)func_info); off += plt_entsize; } z_free(ptr); } /* * step (4). free allocated memory */ z_free((void *)dynstr); z_free((void *)rela_plt); z_free((void *)rela_dyn); z_free((void *)dynsym); /* * step (5). change the value of DT_NULL to indicate this program is patched * by StochFuzz */ Elf64_Dyn *dyn_ = __elf_find_dyn_by_tag(e, DT_NULL); if (!dyn_) { EXITME("DT_NULL not found"); } dyn_->d_un.d_val = MAGIC_NUMBER; } #undef __NUMBER_OF_GOTS #undef __NUMBER_OF_PLTS Z_PRIVATE void __elf_parse_shdr(ELF *e) { Elf64_Ehdr *ehdr = z_elf_get_ehdr(e); size_t size = z_mem_file_ftell(e->stream); z_elf_set_shdr_shstrtab(e, NULL); z_elf_set_shdr_text(e, NULL); z_elf_set_shdr_init(e, NULL); z_elf_set_shdr_fini(e, NULL); z_elf_set_shdr_init_array(e, NULL); z_elf_set_shdr_fini_array(e, NULL); z_elf_set_shdr_plt(e, NULL); z_elf_set_shdr_plt_got(e, NULL); z_elf_set_shdr_plt_sec(e, NULL); Elf64_Shdr *shdrs = (Elf64_Shdr *)((uint8_t *)ehdr + ehdr->e_shoff); // Get string table first uint16_t shstrndx = ehdr->e_shstrndx; z_elf_set_shdr_shstrtab(e, shdrs + shstrndx); Elf64_Shdr *shdr_shstrtab = z_elf_get_shdr_shstrtab(e); assert(shdr_shstrtab != NULL); assert(shdr_shstrtab->sh_type == SHT_STRTAB); if (shdr_shstrtab->sh_offset >= size || shdr_shstrtab->sh_offset + shdr_shstrtab->sh_size > size) { EXITME("string table offset is too large"); } const char *shstrtab = __elf_stream_off2ptr(e->stream, shdr_shstrtab->sh_offset); size_t shstrtab_sz = shdr_shstrtab->sh_size; #ifdef DEBUG if (true) { size_t name_off = shdr_shstrtab->sh_name; assert(name_off < shstrtab_sz); const char *shstrtab_name = shstrtab + name_off; assert(!z_strcmp(shstrtab_name, ".shstrtab")); } #endif // Get other section header for (unsigned i = 0; i < ehdr->e_shnum; i++) { Elf64_Shdr *shdr = shdrs + i; size_t name_off = shdr->sh_name; if (name_off >= shstrtab_sz) continue; const char *shdr_name = shstrtab + name_off; if (!z_strcmp(shdr_name, ".text")) { if ((int64_t)shdr->sh_addr < 0 || (int64_t)(shdr->sh_addr + shdr->sh_size) < 0) { EXITME("some addresses in .text section are negative"); } z_elf_set_shdr_text(e, shdr); LOOKUP_TABLE_INIT_CELL_NUM(shdr->sh_size); } else if (!z_strcmp(shdr_name, ".init")) { z_elf_set_shdr_init(e, shdr); } else if (!z_strcmp(shdr_name, ".fini")) { z_elf_set_shdr_fini(e, shdr); } else if (!z_strcmp(shdr_name, ".init_array")) { z_elf_set_shdr_init_array(e, shdr); } else if (!z_strcmp(shdr_name, ".fini_array")) { z_elf_set_shdr_fini_array(e, shdr); } else if (!z_strcmp(shdr_name, ".plt")) { z_elf_set_shdr_plt(e, shdr); } else if (!z_strcmp(shdr_name, ".plt.got")) { z_elf_set_shdr_plt_got(e, shdr); } else if (!z_strcmp(shdr_name, ".plt.sec")) { z_elf_set_shdr_plt_sec(e, shdr); } } if (!z_elf_get_shdr_text(e)) { // TODO: .text is not always necessary. EXITME("cannot find .text section"); } z_info("find .text section @ %#lx", z_elf_get_shdr_text(e)->sh_addr); // in some cases, init_/fini_array does not exist // assert(z_elf_get_shdr_init(e) != NULL); // assert(z_elf_get_shdr_fini(e) != NULL); // assert(z_elf_get_shdr_init_array(e) != NULL); // assert(z_elf_get_shdr_fini_array(e) != NULL); // static-linked binary may not have PLT // assert(z_elf_get_shdr_plt(e) != NULL); // assert(z_elf_get_shdr_plt_got(e) != NULL); if (z_elf_get_shdr_init(e)) { z_info("find .init section @ %#lx", z_elf_get_shdr_init(e)->sh_addr); } if (z_elf_get_shdr_fini(e)) { z_info("find .fini section @ %#lx", z_elf_get_shdr_fini(e)->sh_addr); } if (z_elf_get_shdr_init_array(e)) { z_info("find .init_array section @ %#lx", z_elf_get_shdr_init_array(e)->sh_addr); } if (z_elf_get_shdr_fini_array(e)) { z_info("find .fini_array section @ %#lx", z_elf_get_shdr_fini_array(e)->sh_addr); } if (z_elf_get_shdr_plt(e)) { z_info("find .plt section @ %#lx", z_elf_get_shdr_plt(e)->sh_addr); } else { z_info(".plt section not found"); } if (z_elf_get_shdr_plt_got(e)) { z_info("find .plt.got section @ %#lx", z_elf_get_shdr_plt_got(e)->sh_addr); } else { z_info(".plt.got section not found"); } if (z_elf_get_shdr_plt_sec(e)) { z_info("find .plt.sec section @ %#lx", z_elf_get_shdr_plt_sec(e)->sh_addr); } else { z_info(".plt.sec section not found"); } } Z_PRIVATE void __elf_parse_phdr(ELF *e) { uint8_t *base = z_mem_file_get_raw_buf(e->stream); size_t size = z_mem_file_ftell(e->stream); Elf64_Ehdr *ehdr = (Elf64_Ehdr *)base; // Entry point e->ori_entry = ehdr->e_entry; z_info("find entrypoint: %#lx", e->ori_entry); // Whether the ELF is compiled as PIE e->is_pie = ehdr->e_type == ET_EXEC ? false : true; if (e->is_pie) { z_info("try to handle PIE executable"); } else { z_info("try to handle non-PIE executable"); } z_elf_set_ehdr(e, ehdr); z_elf_set_phdr_note(e, NULL); z_elf_set_phdr_dynamic(e, NULL); // Locate phdr_note and phdr_dynamic Elf64_Phdr *phdrs = (Elf64_Phdr *)(base + ehdr->e_phoff); for (unsigned i = 0; i < ehdr->e_phnum; i++) { Elf64_Phdr *phdr = phdrs + i; if (phdr->p_type == PT_DYNAMIC) z_elf_set_phdr_dynamic(e, phdr); if (phdr->p_type == PT_NOTE) z_elf_set_phdr_note(e, phdr); } Elf64_Phdr *phdr_note = z_elf_get_phdr_note(e); if (phdr_note == NULL) { // TODO: currently we use a very naive but effective method to inject a // new segment, by modifying the PT_NOTE. However, it does not always // work. A better but more complex solution is to move the segment table // to a new place which makes it easior to add segments. EXITME("failed to parse ELF file [missing PT_NOTE segment]"); } Elf64_Phdr *phdr_dynamic = z_elf_get_phdr_dynamic(e); if (phdr_dynamic != NULL && phdr_dynamic->p_offset + phdr_dynamic->p_memsz > size) { EXITME("failed to parse ELF file [invalid dynamic section]"); } z_trace("successfully parse ELF header"); } Z_PRIVATE void __elf_validate_header(_MEM_FILE *stream) { size_t size = z_mem_file_ftell(stream); if (size < sizeof(Elf64_Ehdr)) { EXITME("failed to parse ELF EHDR [file is too small]"); } Elf64_Ehdr *ehdr = (Elf64_Ehdr *)z_mem_file_get_raw_buf(stream); if (ehdr->e_ident[EI_MAG0] != ELFMAG0 || ehdr->e_ident[EI_MAG1] != ELFMAG1 || ehdr->e_ident[EI_MAG2] != ELFMAG2 || ehdr->e_ident[EI_MAG3] != ELFMAG3) { EXITME("failed to parse ELF EHDR [invalid magic number (%c%c%c%c)]", ehdr->e_ident[EI_MAG0], ehdr->e_ident[EI_MAG1], ehdr->e_ident[EI_MAG2], ehdr->e_ident[EI_MAG3]); } if (ehdr->e_ident[EI_CLASS] != ELFCLASS64) { EXITME("failed to parse ELF EHDR [file is not 64bit]"); } if (ehdr->e_ident[EI_DATA] != ELFDATA2LSB) { EXITME("failed to parse ELF EHDR [file is not little endian]"); } if (ehdr->e_ident[EI_VERSION] != EV_CURRENT) { EXITME("failed to parse ELF EHDR [invalid version]"); } if (ehdr->e_machine != EM_X86_64) { EXITME("failed to parse ELF EHDR [file is not x86_64]"); } if (ehdr->e_phoff < sizeof(Elf64_Ehdr)) { EXITME("failed to parse ELF EHDR [invalid program header offset (%u)]", ehdr->e_phoff); } if (ehdr->e_phnum > PN_XNUM) { EXITME("failed to parse ELF EHDR [too many program headers (%d)]", ehdr->e_phnum); } if (ehdr->e_phoff + ehdr->e_phnum * sizeof(Elf64_Phdr) > size) { EXITME("failed to parse ELF EHDR [invalid program headers]"); } if (ehdr->e_shoff < sizeof(Elf64_Ehdr)) { EXITME("failed to parse ELF SHDR [invalid section header offset (%u)]", ehdr->e_shoff); } if (ehdr->e_shoff + ehdr->e_shnum * sizeof(Elf64_Shdr) > size) { EXITME("failed to parse ELF EHDR [invalid section headers]"); } if (ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN) { EXITME("failed to parse ELF EHDR [file is not executable]"); } } Z_PRIVATE void __elf_set_virtual_mapping(ELF *e, const char *filename) { // Get .text information Elf64_Shdr *text = z_elf_get_shdr_text(e); addr_t text_addr = text->sh_addr; size_t text_size = text->sh_size; size_t size = z_mem_file_ftell(e->stream); e->vmapping = z_splay_create(NULL); // Do not support merging e->mmapped_pages = z_splay_create(&z_direct_merge); e->max_addr = 0; // Get segments table Elf64_Ehdr *ehdr = z_elf_get_ehdr(e); Elf64_Phdr *phdrs = (Elf64_Phdr *)__elf_stream_off2ptr(e->stream, ehdr->e_phoff); FChunk *fc = NULL; Snode *node = NULL; for (unsigned i = 0; i < ehdr->e_phnum; i++) { Elf64_Phdr *phdr = phdrs + i; // We only consider PT_LOAD segment if (phdr->p_type != PT_LOAD) continue; // Get segment information // 1. vaddr addr_t vaddr = (addr_t)phdr->p_vaddr; // 2. offset size_t offset = (size_t)phdr->p_offset; // 3. filesz size_t filesz = (size_t)phdr->p_filesz; // 4. memsz size_t memsz = (size_t)phdr->p_memsz; assert(memsz >= filesz); if (offset + filesz > size) { EXITME("invalid segment [%ld, %ld]: larger than ELF size(%ld)", offset, offset + filesz - 1, size); } // Update max virtual address if (e->max_addr < vaddr + memsz) { e->max_addr = vaddr + memsz; } if (text_addr >= vaddr && text_addr < vaddr + memsz) { if (!(phdr->p_flags & PF_X)) { EXITME(".text section is not executable"); } // XXX: note that the shared .text section will be mapped in // page-level // step (0). make sure all .text are contained by file if (text_addr + text_size > vaddr + filesz) { EXITME("some data in .text section is not contained by file"); } // step (1). first check whether we need to map the head part addr_t aligned_addr = BITS_ALIGN_FLOOR(text_addr, PAGE_SIZE_POW2); if (vaddr < aligned_addr) { assert(aligned_addr - vaddr <= filesz); fc = z_fchunk_create(e->stream, offset, aligned_addr - vaddr, false); node = z_snode_create(vaddr, aligned_addr - vaddr, (void *)fc, (void (*)(void *))(&z_fchunk_destroy)); if (!z_splay_insert(e->vmapping, node)) { EXITME("overlapped virtual addresses"); } } // step (2). then check whether we need to map the tail part aligned_addr = BITS_ALIGN_CELL(text_addr + text_size, PAGE_SIZE_POW2); // update max_addr if needed if (e->max_addr < aligned_addr) { e->max_addr = aligned_addr; } if (aligned_addr < vaddr + memsz) { assert(aligned_addr > vaddr); // check which kind of node we need to insert if (aligned_addr - vaddr >= filesz) { // it means the tail part is purely alloced node = z_snode_create( aligned_addr, vaddr + memsz - aligned_addr, NULL, NULL); } else { // it means the tail part contains some data bytes fc = z_fchunk_create(e->stream, offset + aligned_addr - vaddr, vaddr + filesz - aligned_addr, false); node = z_snode_create( aligned_addr, vaddr + memsz - aligned_addr, (void *)fc, (void (*)(void *))(&z_fchunk_destroy)); } if (!z_splay_insert(e->vmapping, node)) { EXITME("overlapped virtual addresses"); } } // step (3). setup shared .text section __elf_setup_shared_text(e, filename); } else { fc = z_fchunk_create(e->stream, offset, filesz, false); node = z_snode_create(vaddr, memsz, (void *)fc, (void (*)(void *))(&z_fchunk_destroy)); if (!z_splay_insert(e->vmapping, node)) { EXITME("overlapeed virtual addresses"); } } // For non-exec segment, we need to insert virtual uTP. // XXX: I totally forget what the following code does... // XXX: the segment containing .text does not go into this branch. if (!(phdr->p_flags & PF_X)) { addr_t gap_1_addr = BITS_ALIGN_FLOOR(vaddr, PAGE_SIZE_POW2); size_t gap_1_size = vaddr - gap_1_addr; if (gap_1_size > 0) { node = z_snode_create(gap_1_addr, gap_1_size, NULL, NULL); if (!z_splay_insert(e->vmapping, node)) { EXITME("overlapped virtual uTPs"); } } addr_t gap_2_addr = vaddr + memsz; size_t gap_2_size = PAGE_SIZE - (gap_2_addr & ((1 << PAGE_SIZE_POW2) - 1)); if (gap_2_size > 0) { node = z_snode_create(gap_2_addr, gap_2_size, NULL, NULL); if (!z_splay_insert(e->vmapping, node)) { EXITME("overlapped virtual uTPs"); } } } // Update mmapped pages // XXX: the .text insertion does not impact the mapped pages assert(memsz != 0); addr_t mmap_addr = BITS_ALIGN_FLOOR(vaddr, PAGE_SIZE_POW2); size_t mmap_size = vaddr + memsz - mmap_addr; mmap_size = BITS_ALIGN_CELL(mmap_size, PAGE_SIZE_POW2); node = z_snode_create(mmap_addr, mmap_size, NULL, NULL); if (!z_splay_insert(e->mmapped_pages, node)) { EXITME("overlapped mapped addresses"); } z_trace("find segment [%#lx, %#lx] @ %#lx", vaddr, vaddr + filesz - 1, offset); } // XXX: note that max_addr is only used to find the max address of those // segments in the orignal ELF, which excludes those pages mapped by us if (!e->max_addr) { EXITME("no loaded segment found"); } z_trace("max address for original ELF: %#lx", e->max_addr - 1); // Add constant address into vmmaping if (!e->is_pie) { // For PIE binary, it is almost impossible to touch the constant // address, so we ignore them if (!z_splay_insert( e->vmapping, z_snode_create(RW_PAGE_ADDR, RW_PAGE_USED_SIZE, NULL, NULL))) { EXITME("constant address is occupied"); } if (!z_splay_insert( e->mmapped_pages, z_snode_create(RW_PAGE_ADDR, RW_PAGE_SIZE, NULL, NULL))) { EXITME("constant address is occupied"); } if (!z_splay_insert( e->vmapping, z_snode_create(AFL_MAP_ADDR, AFL_MAP_SIZE, NULL, NULL))) { EXITME("constant address is occupied"); } if (!z_splay_insert( e->mmapped_pages, z_snode_create(AFL_MAP_ADDR, AFL_MAP_SIZE, NULL, NULL))) { EXITME("constant address is occupied"); } if (!z_splay_insert( e->vmapping, z_snode_create(CRS_MAP_ADDR, CRS_MAP_SIZE, NULL, NULL))) { EXITME("constant address is occupied"); } if (!z_splay_insert( e->mmapped_pages, z_snode_create(CRS_MAP_ADDR, CRS_MAP_SIZE, NULL, NULL))) { EXITME("constant address is occupied"); } } // We additionally need to add those mapped pages whose address is based on // ASLR/PIE { if (!z_splay_insert(e->vmapping, z_snode_create(SIGNAL_STACK_ADDR, SIGNAL_STACK_SIZE, NULL, NULL))) { EXITME("signal stack is occupied"); } if (!z_splay_insert(e->mmapped_pages, z_snode_create(SIGNAL_STACK_ADDR, SIGNAL_STACK_SIZE, NULL, NULL))) { EXITME("signal stack is occupied"); } } } Z_PRIVATE void __elf_parse_main(ELF *e) { assert(e != NULL); if (!e->detect_main) { z_info( "we skip the detection of main function because we are going to " "instrument the fork server before the entrypoint"); return; } // Try to identify the address of main function. // XXX: like AFL, we try to instrument the binary before main(). But we may // not always successfully locate the main() function. // * https://github.com/google/AFL/tree/master/llvm_mode // * https://github.com/talos-vulndev/afl-dyninst Rptr *cur_ptr = z_elf_vaddr2ptr(e, e->ori_entry); addr_t cur_addr = e->ori_entry; while (true) { if (z_rptr_is_null(cur_ptr)) { EXITME("invalid entrypoint or run out of segment"); } CS_DISASM(cur_ptr, cur_addr, 1); // If searching all instructions in _start if ((cs_count == 0) || (cs_inst[0].id == X86_INS_CALL)) { EXITME( "no main function found, please use -e option to install the " "fork server at entrypoint"); } z_trace("finding main: %#lx:\t%s %s", cs_inst[0].address, cs_inst[0].mnemonic, cs_inst[0].op_str); // Check load_main if (e->is_pie) { // For PIE binary, we check: lea rdi, [rip + xxx] if (cs_inst[0].id != X86_INS_LEA) goto NEXT; cs_x86_op *ops = cs_inst[0].detail->x86.operands; if (ops[0].type != X86_OP_REG) goto NEXT; if ((ops[1].type != X86_OP_MEM) || (ops[1].mem.base != X86_REG_RIP) || (ops[1].mem.index != X86_REG_INVALID)) goto NEXT; switch (ops[0].reg) { case X86_REG_RCX: e->init = ops[1].mem.disp + cs_inst[0].size + cur_addr; e->load_init = cur_addr; goto NEXT; case X86_REG_R8: e->fini = ops[1].mem.disp + cs_inst[0].size + cur_addr; e->load_fini = cur_addr; goto NEXT; case X86_REG_RDI: e->main = ops[1].mem.disp + cs_inst[0].size + cur_addr; e->load_main = cur_addr; goto LOOP_DONE; default: goto NEXT; } } else { // For non-PIE binary, we check: mov rdi, xxx if (cs_inst[0].id != X86_INS_MOV) goto NEXT; cs_x86_op *ops = cs_inst[0].detail->x86.operands; if (ops[0].type != X86_OP_REG) goto NEXT; if (ops[1].type != X86_OP_IMM) goto NEXT; switch (ops[0].reg) { case X86_REG_R8: e->fini = ops[1].imm; e->load_fini = cur_addr; goto NEXT; case X86_REG_RCX: e->init = ops[1].imm; e->load_init = cur_addr; goto NEXT; case X86_REG_RDI: e->main = ops[1].imm; e->load_main = cur_addr; goto LOOP_DONE; default: goto NEXT; } } NEXT: cur_addr += cs_inst[0].size; z_rptr_inc(cur_ptr, uint8_t, cs_inst[0].size); } LOOP_DONE: z_rptr_destroy(cur_ptr); z_info("find main function: %#lx", e->main); z_info("find init function: %#lx", e->init); z_info("find fini function: %#lx", e->fini); } Z_API ELF *z_elf_open(const char *ori_filename, bool detect_main) { ELF *e = STRUCT_ALLOC(ELF); e->detect_main = detect_main; memset(e->tmpnam, 0, TMPNAME_LEN); z_snprintf(e->tmpnam, TMPNAME_LEN, TMPNAME_FMT, z_rand()); z_trace("use temp file: %s", e->tmpnam); _MEM_FILE *stream = __elf_open_file(e, ori_filename); // Step (0). Validate header __elf_validate_header(stream); // Step (1). Alloc ELF struct e->stream = stream; // Step (2). Parse program header __elf_parse_phdr(e); // Step (3). Parse section header __elf_parse_shdr(e); // Step (4). Do virtual mapping __elf_set_virtual_mapping(e, ori_filename); // Step (5). Extend loader/Trampolines zones onto file __elf_extend_zones(e); // Step (6). Setup lookup table __elf_setup_lookup_table(e, ori_filename); // Step (7). Setup trampolines (shadow code) __elf_setup_trampolines(e, ori_filename); // Step (8). Setup pipe file __elf_setup_pipe(e, ori_filename); // Step (9). Setup retaddr mapping __elf_setup_retaddr_mapping(e, ori_filename); // Step (10). Detect and parse main function __elf_parse_main(e); // Step (11). Rewrite PT_NOTE meta info __elf_rewrite_pt_note(e); // Step (12). Set RELRO for elf (REMOVE to allow gdb load library symbols) // XXX: AFL already set LD_BIND_NOW to stops the linker from doing extra // work post-fork() // __elf_set_relro(e); // step (13). Get relocation information __elf_parse_relocation(e); // step (14). link patched file char *patched_filename = z_strcat(ori_filename, PATCHED_FILE_SUFFIX); z_elf_save(e, patched_filename); z_free(patched_filename); // step (15). set state e->state = ELFSTATE_CONNECTED; return e; } Z_API Rptr *z_elf_vaddr2ptr(ELF *e, addr_t vaddr) { assert(e != NULL); // Get corresponding segment Snode *segment = __elf_find_segment_by_vaddr(e, vaddr); if (segment == NULL) { return NULL; } // Create Rptr FChunk *fc = (FChunk *)z_snode_get_data(segment); if (z_strcmp(STRUCT_TYPE(fc), "FChunk")) { z_trace("get address into dynamically allocated space"); return NULL; } size_t off1 = vaddr - z_snode_get_lower_bound(segment); size_t off2 = z_fchunk_get_offset(fc); if (off1 >= z_fchunk_get_size(fc)) { z_trace("trying to read on zero-padding region"); return NULL; } size_t size = z_snode_get_upper_bound(segment) - vaddr + 1; _MEM_FILE *stream = z_fchunk_get_stream(fc); return z_rptr_create(__elf_stream_off2ptr(stream, off1 + off2), size); } Z_API void z_elf_destroy(ELF *e) { z_splay_destroy(e->vmapping); z_splay_destroy(e->mmapped_pages); g_hash_table_destroy(e->got); g_hash_table_destroy(e->plt); z_free(e->retaddr_mapping_name); z_free(e->lookup_tabname); z_free(e->trampolines_name); z_free(e->shared_text_name); z_free(e->pipe_filename); z_mem_file_fclose(e->retaddr_mapping_stream); z_mem_file_fclose(e->lookup_table_stream); z_mem_file_fclose(e->trampolines_stream); z_mem_file_fclose(e->shared_text_stream); z_mem_file_fclose(e->stream); if (remove(e->tmpnam)) { EXITME("failed on remove %s: %s", e->tmpnam, strerror(errno)); } z_free(e); } Z_API void z_elf_fsync(ELF *e) { assert(e != NULL); z_mem_file_fsync(e->lookup_table_stream); z_mem_file_fsync(e->trampolines_stream); z_mem_file_fsync(e->shared_text_stream); z_mem_file_fsync(e->stream); } Z_API void z_elf_save(ELF *e, const char *pathname) { // curently no need to update PT_NOTE, because trampolines are putting in // an individual file. // fsync z_elf_fsync(e); // check whether pathname exists. if so, remove it. if (!z_access(pathname, F_OK)) { if (remove(pathname)) { EXITME("failed on remove: %s (error: %s)", pathname, strerror(errno)); } } // create a symbolic link to e->tmpnam z_info("save patched file into %s", pathname); if (link(e->tmpnam, pathname)) { EXITME("failed on link: %s", strerror(errno)); } } Z_API void z_elf_create_snapshot(ELF *e, const char *pathname) { z_elf_fsync(e); z_mem_file_save_as(e->stream, pathname); } Z_API size_t z_elf_read_all(ELF *e, addr_t addr, size_t n, void *buf) { assert(e != NULL); size_t cur_n = n; while (cur_n > 0) { size_t k = z_elf_read(e, addr, cur_n, buf); if (!k) { return n - cur_n; } cur_n -= k; buf += k; addr += k; } return n; } Z_API size_t z_elf_read(ELF *e, addr_t addr, size_t n, void *buf) { assert(e != NULL); Rptr *rptr = z_elf_vaddr2ptr(e, addr); if (z_rptr_is_null(rptr)) { z_error("invalid address: %#lx", addr); return 0; } size_t n_ = n < z_rptr_get_size(rptr) ? n : z_rptr_get_size(rptr); z_rptr_memcpy(buf, rptr, n_); z_rptr_destroy(rptr); return n_; } Z_API size_t z_elf_write(ELF *e, addr_t addr, size_t n, const void *buf) { assert(e != NULL); Snode *segment = __elf_find_segment_by_vaddr(e, addr); if (!segment) { EXITME("invalid address: %#lx", addr); } FChunk *fc = (FChunk *)z_snode_get_data(segment); if (z_fchunk_get_extendable(fc)) { // write on an extendable space addr_t segment_base_addr = z_snode_get_lower_bound(segment); _MEM_FILE *underlying_stream = z_fchunk_get_stream(fc); // XXX: similar to the false branch, the overhead of // __elf_stream_vaddr2off is small because the target snode is already // at the root of Splay size_t tp_off = __elf_stream_vaddr2off(e, segment_base_addr); assert(tp_off == 0); size_t write_off = addr - segment_base_addr + tp_off; if (z_mem_file_get_size(underlying_stream) < write_off) { EXITME("write on too bigger address: %#lx", addr); } // get old size size_t old_size = z_mem_file_get_size(underlying_stream) - tp_off; // We cannot directly use __elf_stream_vaddr2off here, as addr may not // in current virtual memroy. z_mem_file_pwrite(underlying_stream, buf, n, write_off); if (write_off + n == z_mem_file_get_size(underlying_stream)) { // XXX: if the underlying stream is fully written, we need to extend // it. For example, if the original address range is [0x1000, // 0x1100) and we wrote all the 0x100 bytes, next time we want to // write on address 0x1100. It sould be valid because the underlying // stream is extendable. z_mem_file_pwrite(underlying_stream, "", 1, write_off + n); assert(write_off + n < z_mem_file_get_size(underlying_stream)); } // calculate new node size_t new_size = z_mem_file_get_size(underlying_stream) - tp_off; // update if new_size is not equal to old_size if (new_size != old_size) { assert(new_size > old_size); // delete previous node Snode *node = z_splay_delete(e->vmapping, segment_base_addr); assert(node != NULL); addr_t vaddr = z_snode_get_lower_bound(node); z_snode_set_len(node, new_size); z_fchunk_set_size((FChunk *)z_snode_get_data(node), new_size); // update virtual mapping if (!z_splay_insert(e->vmapping, node)) { EXITME("extend writing [new_size: %#lx, old_size: %#lx]", new_size, old_size); } // update mapped pages node = z_snode_create(vaddr + old_size, new_size - old_size, NULL, NULL); if (!z_splay_insert(e->mmapped_pages, node)) { EXITME("extend writing"); } // update state z_elf_set_state(e, ELFSTATE_SHADOW_EXTENDED); } } else { // other range // XXX: the overhead of re-searching splay is small because the target // snode is already at the root, so we re-invoke z_elf_vaddr2pter for // the easy understanding of the code Rptr *rptr = z_elf_vaddr2ptr(e, addr); z_rptr_memcpy(rptr, buf, n); z_rptr_destroy(rptr); } return n; } Z_API bool z_elf_check_region_free(ELF *e, Snode *region) { assert(e != NULL && region != NULL); return !z_splay_interval_overlap(e->vmapping, region); } Z_API bool z_elf_insert_utp(ELF *e, Snode *utp, addr_t *mmap_addr, size_t *mmap_size) { assert(z_snode_get_data(utp) == NULL); assert(z_snode_get_len(utp) <= PAGE_SIZE); // insert utp first if (!z_splay_insert(e->vmapping, utp)) return false; // calculate mmap page addr_t utp_mmap_lo = BITS_ALIGN_FLOOR(z_snode_get_lower_bound(utp), PAGE_SIZE_POW2); addr_t utp_mmap_up = BITS_ALIGN_FLOOR(z_snode_get_upper_bound(utp), PAGE_SIZE_POW2); // init values *mmap_addr = INVALID_ADDR; *mmap_size = 0; // check for (addr_t addr = utp_mmap_lo; addr <= utp_mmap_up; addr += PAGE_SIZE) { Snode *node = z_snode_create(addr, PAGE_SIZE, NULL, NULL); if (z_splay_insert(e->mmapped_pages, node)) { *mmap_addr = (*mmap_addr < addr ? *mmap_addr : addr); *mmap_size += PAGE_SIZE; } else { z_snode_destroy(node); } } if (*mmap_addr == INVALID_ADDR) *mmap_addr = 0; return true; } Z_API const LFuncInfo *z_elf_get_plt_info(ELF *e, addr_t addr) { return (const LFuncInfo *)g_hash_table_lookup(e->plt, GSIZE_TO_POINTER(addr)); } Z_API const LFuncInfo *z_elf_get_got_info(ELF *e, addr_t addr) { return (const LFuncInfo *)g_hash_table_lookup(e->got, GSIZE_TO_POINTER(addr)); } Z_API bool z_elf_check_state(ELF *e, ELFState state) { if (state & ELFSTATE_DISABLE) { EXITME( "check state function does not support disabling any state (state: " "%#x)", state); } return (e->state & state); } Z_API bool z_elf_is_statically_linked(ELF *e) { // XXX: linux kernel uses .INTERP segment to determine whether a dynmaic // linker is required, but here we use .DYNAMIC segment which is good enough // (like what readelf does) return !z_elf_get_phdr_dynamic(e); } ================================================ FILE: src/elf_.h ================================================ /* * elf_.h * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #ifndef __ELF__H #define __ELF__H #include "config.h" #include "interval_splay.h" #include "library_functions/library_functions.h" #include "mem_file.h" #include "restricted_ptr.h" #include #include #define TMPNAME_FMT TEMPFILE_NAME_PREFIX "%08x" #define TMPNAME_LEN 0x20 typedef enum elf_state_t { ELFSTATE_NONE = 0x0, // none ELFSTATE_CONNECTED = 0x1, // disconnect ELF from underlying file ELFSTATE_SHADOW_EXTENDED = 0x2, // shadow file is extended ELFSTATE_DISABLE = 0x100, // flag for disable state ELFSTATE_MASK = 0xffff, // mask } ELFState; /* * ELF info for binary rewrite (Key Structure) */ STRUCT(ELF, { char tmpnam[TMPNAME_LEN]; _MEM_FILE *stream; bool detect_main; /* * new original * entrypoint main * | original ^ * | entrypoint | * | ^ | * V | | * |-----|.|-----------|-------------|--------------|.|--------|.|-------| * | ELF |.| TP loader | fork server | random patch |.| BB Tab |.| TPs | * |-----|.|-----------|-------------|--------------|.|--------|.|-------| * ^ ^ * | | * PT_NOTE new main * * | ELF | LOADER and FORK SEVER | BB Tab | | TPs | * * ^ ^ ^ * PAGE-ALIGNED PAGE-ALIGNED PAGE-ALIGNED */ /* * ELF Header */ size_t ehdr_off; // EHDR (Elf header) /* * Program Header */ size_t phdr_note_off; // PHDR PT_NOTE to be used for loader. size_t phdr_dynamic_off; // PHDR PT_DYNAMIC else nullptr. /* * Section Header */ size_t shdr_shstrtab_off; // SHDR SHT_STRTAB for strings. size_t shdr_text_off; // SHDR .text section. size_t shdr_init_off; // SHDR .init section. size_t shdr_fini_off; // SHDR .fini section. size_t shdr_init_array_off; // SHDR .init_array size_t shdr_fini_array_off; // SHDR .fini_array size_t shdr_plt_off; // SHDR .plt size_t shdr_plt_got_off; // SHDR .plt.got size_t shdr_plt_sec_off; // SHDR .plt.sec /* * Dynamic information */ addr_t fini_array; // .fini_array size_t fini_arraysz; // size of .fini_array addr_t init_array; // .init_array size_t init_arraysz; // size of .init_array /* * Virtual Memory */ // vmapping is the actually mappings, while mapped_pages is the thing at // paging level. For example, an actual mapping [0x1010, 0x1020] has a // mapped page [0x1000, 0x2000). We use mapped_pages to support multiple // uTPs which fall into the same page (e.g., [0x1010, 0x1020] and [0x1100, // 0x1110]). // // XXX: when the underlying binary is PIE, vmapping does not containe those // segments that are at fixed adddesses (RW_PAGE_ADDR, AFL_MAP_ADDR, and // CRS_MAP_ADDR; details can be found in config.h). It is not a 100% safe // solution, but the likelihood of segment conflicts seems tiny, since the // size of these fixed-address segments is relatively small and the // addresses of them are low. Splay *vmapping; // Virtual memory Splay *mmapped_pages; // Mmapped pages addr_t max_addr; // Max virtual address (XXX: excluding endpoint) addr_t loader_addr; // Base address of loader addr_t trampolines_addr; // Base address of trampolines(TP) addr_t lookup_table_addr; // Base address of lookup table addr_t shared_text_addr; // Base address of shared .text (page-aligned) addr_t retaddr_mapping_addr; // Base address of retaddr mapping (page-aligned) /* * Lookup table */ char *lookup_tabname; // Name of mmapped lookup table _MEM_FILE *lookup_table_stream; //_MEM_FILE of lookup table /* * Trampolines */ char *trampolines_name; // Name of mmapped trampolines _MEM_FILE *trampolines_stream; // _MEM_FILE of trampolines /* * Shared .text section; */ char *shared_text_name; // Name of shared .text section _MEM_FILE *shared_text_stream; // _MEM_FILE of shared .text section /* * Pipeline */ char *pipe_filename; // Name of pipe communicated with daemon /* * Return address mapping */ char *retaddr_mapping_name; // Name of the mapping of return addreseses _MEM_FILE *retaddr_mapping_stream; // _MEM_FILE of retaddr mapping /* * ELF state */ ELFState state; /* * Relocation information */ GHashTable *got; // GOT information GHashTable *plt; // PLT information /* * Other basic information */ bool is_pie; // Whether the binary is compiled as PIE addr_t ori_entry; // Address of original Entry Point addr_t main; // Address of main addr_t init; // Address of init addr_t fini; // Address of fini addr_t load_main; // Address of the instruction loading main address addr_t load_init; // Address of the instruction loading init address addr_t load_fini; // Address of the instruction loading fini address }); /* * Setter and Getter */ DECLARE_SETTER(ELF, elf, Elf64_Ehdr *, ehdr); DECLARE_SETTER(ELF, elf, Elf64_Phdr *, phdr_note); DECLARE_SETTER(ELF, elf, Elf64_Phdr *, phdr_dynamic); DECLARE_SETTER(ELF, elf, Elf64_Shdr *, shdr_shstrtab); DECLARE_SETTER(ELF, elf, Elf64_Shdr *, shdr_text); DECLARE_SETTER(ELF, elf, Elf64_Shdr *, shdr_init); DECLARE_SETTER(ELF, elf, Elf64_Shdr *, shdr_fini); DECLARE_SETTER(ELF, elf, Elf64_Shdr *, shdr_init_array); DECLARE_SETTER(ELF, elf, Elf64_Shdr *, shdr_fini_array); DECLARE_SETTER(ELF, elf, Elf64_Shdr *, shdr_plt); DECLARE_SETTER(ELF, elf, Elf64_Shdr *, shdr_plt_got); DECLARE_SETTER(ELF, elf, Elf64_Shdr *, shdr_plt_sec); DECLARE_SETTER(ELF, elf, ELFState, state); DECLARE_GETTER(ELF, elf, Elf64_Ehdr *, ehdr); DECLARE_GETTER(ELF, elf, Elf64_Phdr *, phdr_note); DECLARE_GETTER(ELF, elf, Elf64_Phdr *, phdr_dynamic); DECLARE_GETTER(ELF, elf, Elf64_Shdr *, shdr_shstrtab); DECLARE_GETTER(ELF, elf, Elf64_Shdr *, shdr_text); DECLARE_GETTER(ELF, elf, Elf64_Shdr *, shdr_init); DECLARE_GETTER(ELF, elf, Elf64_Shdr *, shdr_fini); DECLARE_GETTER(ELF, elf, Elf64_Shdr *, shdr_init_array); DECLARE_GETTER(ELF, elf, Elf64_Shdr *, shdr_fini_array); DECLARE_GETTER(ELF, elf, Elf64_Shdr *, shdr_plt); DECLARE_GETTER(ELF, elf, Elf64_Shdr *, shdr_plt_got); DECLARE_GETTER(ELF, elf, Elf64_Shdr *, shdr_plt_sec); DECLARE_GETTER(ELF, elf, addr_t, loader_addr); DECLARE_GETTER(ELF, elf, addr_t, trampolines_addr); DECLARE_GETTER(ELF, elf, addr_t, lookup_table_addr); DECLARE_GETTER(ELF, elf, addr_t, shared_text_addr); DECLARE_GETTER(ELF, elf, addr_t, retaddr_mapping_addr); DECLARE_GETTER(ELF, elf, bool, is_pie); DECLARE_GETTER(ELF, elf, addr_t, ori_entry); DECLARE_GETTER(ELF, elf, addr_t, main); DECLARE_GETTER(ELF, elf, addr_t, init); DECLARE_GETTER(ELF, elf, addr_t, fini); DECLARE_GETTER(ELF, elf, addr_t, load_main); DECLARE_GETTER(ELF, elf, addr_t, load_init); DECLARE_GETTER(ELF, elf, addr_t, load_fini); DECLARE_GETTER(ELF, elf, const char *, lookup_tabname); DECLARE_GETTER(ELF, elf, const char *, trampolines_name); DECLARE_GETTER(ELF, elf, const char *, shared_text_name); DECLARE_GETTER(ELF, elf, const char *, pipe_filename); DECLARE_GETTER(ELF, elf, const char *, retaddr_mapping_name); DECLARE_GETTER(ELF, elf, size_t, plt_n); /* * Open an ELF file. */ Z_API ELF *z_elf_open(const char *ori_filename, bool detect_main); /* * Destructor of ELF */ Z_API void z_elf_destroy(ELF *e); /* * Save ELF to pathname */ Z_API void z_elf_save(ELF *e, const char *pathname); /* * Return a pointer pointed to given virtual address, NULL if the virtual * address is invalid. */ Z_API Rptr *z_elf_vaddr2ptr(ELF *e, addr_t vaddr); /* * Read data from given virtual address. * z_elf_read only reads data from a stream, which means if the requested bytes * are cross-stream, z_elf_read only returns the first k bytes in the same * stream. */ Z_API size_t z_elf_read(ELF *e, addr_t addr, size_t n, void *buf); /* * Forcely read data from given virtual address. * Different from z_elf_read, z_elf_read_all forcely read all requested bytes * even if they are cross-stream. */ Z_API size_t z_elf_read_all(ELF *e, addr_t addr, size_t n, void *buf); /* * Write data to given virtual address. * z_elf_write only writes data on a stream, like z_elf_read. */ // XXX: note that the z_elf_write only supports writing on data stored in file // but not those dynamically alloced segments. Z_API size_t z_elf_write(ELF *e, addr_t addr, size_t n, const void *buf); // TODO: add z_elf_write_all if necessart /* * Check whether the ELF is statically-linked */ Z_API bool z_elf_is_statically_linked(ELF *e); /* * Get PLT information */ Z_API const LFuncInfo *z_elf_get_plt_info(ELF *e, addr_t addr); /* * Get GOT information */ Z_API const LFuncInfo *z_elf_get_got_info(ELF *e, addr_t addr); /* * Check where region is free. */ Z_API bool z_elf_check_region_free(ELF *e, Snode *region); /* * Insert a utp into vmapping. */ Z_API bool z_elf_insert_utp(ELF *e, Snode *utp, addr_t *mmap_addr, size_t *mmap_size); /* * Sync all mapping file */ Z_API void z_elf_fsync(ELF *e); /* * Create a snapshot for current ELF. * Differnt from z_elf_save, this ELF's main body (except loookup tabel and * shadow) will remain unchanged even future patches are applied. */ Z_API void z_elf_create_snapshot(ELF *e, const char *pathname); /* * Check ELF state */ Z_API bool z_elf_check_state(ELF *e, ELFState state); #endif ================================================ FILE: src/fork_server.c ================================================ /* * the code inside asm(".globl _entry\n ...") * Copyright (C) 2021 National University of Singapore * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. * */ // XXX: the code inside the asm(".globl _entry\n ...") is modified based on // https://github.com/GJDuck/e9patch/blob/master/src/e9patch/e9loader.cpp /* * other parts of fork_server.c * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ /* * Workflow between different components: * * +--------- pre-handshake (shm) -----------+ * | +-- pre-handshake (shm) --+ * | | | * +-+-+ +---+---+ +-----+-----+ +------+ * |AFL| |OURTOOL| |fork server| |client| * +-+-+ +---+---+ +-----+-----+ +------+ * | | | * | | [trigger execution] | [ new client & ] * +--------------{|}----------------------->| [handshake (socket)] * | | +------------------------>| * | | | | * | | | | * | | | [status (wait4)] x MIC * | | [status (comm socket)] |<----------------------+-+ * | |<------------------------+ | * | | [*CRPS* (shm)] | | * | |<-----------------------{|}----------------------+ * | | | * | validate | [trigger (comm socket)] | * | crashsite ~ [ patch commands (shm)] | * | (if fake) +------------------------>| * | | ~ patch self and re-mmap * | | | [ new client & ] * | | | [handshake (socket)] * | |[clock ON (comm socket)] +------------------------>| * | |<------------------------+ | * | | | | * | | | [status (wait4)] x MIC * | |[clock OFF (comm socket)]|<----------------------+-+ * | |<------------------------+ | * | | [status (comm socket)] | | * | |<------------------------+ | * | | [*CRPS* (shm)] | | * | |<-----------------------{|}----------------------+ * | | | * | validate | [trigger (comm socket)] | * | crashsite ~ [ patch commands (shm)] | * | (if real) +------------------------>| * | | | * | | [status (socket)] | * |<-------------{|}------------------------+ * | | | * | | | * | | [trigger new execution] | [ new client & ] * +--------------{|}----------------------->| [handshake (socket)] * | | +------------------------>| * | | | | * | | | [status (wait4)] | exit * | | [status (socket)] |<------------------------+ * |<-------------{|}------------------------+ * * * *CRPS*: crash points * *MIC* : maybe-intentional crash * */ /* * Different situations: * * +------------------------+------------------+-------------------------------+ * | Daemon mode / Run mode | AFL attached | Action | * +========================+==================+===============================+ * | | No | Perform dry run | * | Run mode +------------------+-------------------------------+ * | | Yes | Invalid | * +------------------------+------------------+-------------------------------+ * | | No | Ignore AFL-related operations | * | Daemon mode +------------------+-------------------------------+ * | | Yes | Follow above workflow | * +------------------------+------------------+-------------------------------+ */ #include "fork_server.h" #include #include #include #include #include #include #include #include #include "asm_utils.c" #ifdef DEBUG extern const char no_daemon_str[]; extern const char getenv_err_str[]; extern const char afl_shmat_err_str[]; extern const char crs_shmat_err_str[]; extern const char hello_err_str[]; extern const char read_err_str[]; extern const char fork_err_str[]; extern const char wait4_err_str[]; extern const char mumap_err_str[]; extern const char mprotect_err_str[]; extern const char pipe_err_str[]; extern const char dup2_err_str[]; extern const char env_setting_err_str[]; extern const char socket_err_str[]; extern const char data_pipe_err_str[]; // extern const char msync_err_str[]; extern const char write_err_str[]; extern const char pipe_filename_err_str[]; extern const char afl_attached_str[]; extern const char status_str[]; extern const char setpgid_err_str[]; #endif extern const char magic_string[]; extern const char afl_shm_env[]; asm(".globl _entry\n" ".type _entry,@function\n" "_entry:\n" // (1) push all registers "\tpushq %r15;\n" "\tpushq %r14;\n" "\tpushq %r13;\n" "\tpushq %r12;\n" "\tpushq %r11;\n" "\tpushq %r10;\n" "\tpushq %r9;\n" "\tpushq %r8;\n" "\tpushq %rcx;\n" "\tpushq %rdx;\n" "\tpushq %rsi;\n" "\tpushq %rdi;\n" // (2) make rsp 16-bytes alignment "\tmovq %rsp, %rbp;\n" "\torq $8, %rsp;\n" "\tpushq %rbp;\n" // (3) get envp into %rdi "\tlea __etext(%rip), %rdi;\n" "\taddq $4, %rdi;\n" "\tshrq $3, %rdi;\n" "\tincq %rdi;\n" "\tshlq $3, %rdi;\n" // cur_addr in __binary_setup_fork_server step (3) // binary.c "\tmovq (%rdi), %rsi;\n" // whether the fork server is at the entrypoint or // not "\ttest %rsi, %rsi;\n" "\tje _envp_done;\n" "\taddq $96, %rbp;\n" "\tmovq (%rbp), %rdx;\n" // argc "\taddq $2, %rdx;\n" "\tshlq $3, %rdx;\n" "\taddq %rbp, %rdx;\n" // envp ".globl _envp_done\n" "_envp_done:\n" "\tmovq %rdx, %rdi;\n" // (4) call fork_server_start() "\tcallq fork_server_start;\n" // (5) restore context "\tpopq %rsp;\n" "\tpopq %rdi;\n" "\tpopq %rsi;\n" "\tpopq %rdx;\n" "\tpopq %rcx;\n" "\tpopq %r8;\n" "\tpopq %r9;\n" "\tpopq %r10;\n" "\tpopq %r11;\n" "\tpopq %r12;\n" "\tpopq %r13;\n" "\tpopq %r14;\n" "\tpopq %r15;\n" // (6) jump to following code "\tjmp __etext;\n" #ifdef DEBUG // no_daemon_str ASM_STRING(no_daemon_str, "fork server: no daemon found, switch to dry run") // getenv_err_str ASM_STRING(getenv_err_str, "fork server: environments not found") // afl_shmat_err_str ASM_STRING(afl_shmat_err_str, "fork server: shmat error (AFL)") // crs_shmat_err_str ASM_STRING(crs_shmat_err_str, "fork server: shmat error (CRS)") // hello_err_str ASM_STRING(hello_err_str, "fork server: handshake error") // write_err_str ASM_STRING(write_err_str, "fork server: write error") // read_err_str ASM_STRING(read_err_str, "fork server: read error") // fork_err_str ASM_STRING(fork_err_str, "fork server: fork error") // wait4_err_str ASM_STRING(wait4_err_str, "fork server: wait4 error") // mumap_err_str ASM_STRING(mumap_err_str, "fork server: mumap error") // mprotect_err_str ASM_STRING(mprotect_err_str, "fork server: mprotect error") // pipe_err_str ASM_STRING(pipe_err_str, "fork server: pipe error") // socket_err_str ASM_STRING(socket_err_str, "fork server: socket error") // data_pipe_err_str ASM_STRING(data_pipe_err_str, "fork server: data pipe connection error") // // msync_err_str // ASM_STRING(msync_err_str, "fork server: msync error") // dup2_err_str ASM_STRING(dup2_err_str, "fork server: dup2 error") // pipe_filename_err_str ASM_STRING(pipe_filename_err_str, "fork server: pipe filename too long") // env_setting_err_str ASM_STRING(env_setting_err_str, "fork server: fuzzing without daemon running") // afl_attached_str ASM_STRING(afl_attached_str, "fork server: AFL detected") // status_str ASM_STRING(status_str, "fork server: client status: ") // setpgid_err_str ASM_STRING(setpgid_err_str, "fork server: setpgid error") #endif // Magic String to indicate instrumented ASM_STRING(magic_string, MAGIC_STRING) // AFL's shm environment variable ASM_STRING(afl_shm_env, AFL_SHM_ENV)); /* * Atoi without any safe check */ static inline int fork_server_atoi(char *s) { int val = 0; bool is_neg = false; if (*s == '-') { s++; is_neg = true; } while (*s) val = val * 10 + (*(s++) - '0'); if (is_neg) { val = -val; } return val; } /* * Get shm_id from environment. */ static inline int fork_server_get_shm_id(char **envp) { char *s; while ((s = *(envp++))) { // hand-written strcmp with "__AFL_SHM_ID=" if (*(unsigned long *)s != 0x48535f4c46415f5f) { continue; } if (*(unsigned int *)(s + 8) != 0x44495f4d) { continue; } if (*(s + 12) != '=') { continue; } return fork_server_atoi(s + 13); } utils_puts(getenv_err_str, true); return INVALID_SHM_ID; } /* * Connect to the pipeline */ static inline int fork_server_connect_pipe() { // step (1). create sock_fd int sock_fd = sys_socket(AF_UNIX, SOCK_STREAM, 0); if (sock_fd < 0) { utils_error(socket_err_str, true); } // step (2). construct sockaddr struct sockaddr_un server = {0}; server.sun_family = AF_UNIX; #ifdef DEBUG size_t n_ = utils_strcpy(server.sun_path, RW_PAGE_INFO(pipe_path)); if (n_ >= sizeof(server.sun_path)) { utils_error(pipe_filename_err_str, true); } #else utils_strcpy(server.sun_path, RW_PAGE_INFO(pipe_path)); #endif // step (3). connect to daemon if (sys_connect(sock_fd, (struct sockaddr *)&server, sizeof(struct sockaddr_un)) < 0) { // daemon is not setup, direct return (dry_run) sys_close(sock_fd); return -1; } return sock_fd; } /* * Start fork server and do random patch. */ NO_INLINE void fork_server_start(char **envp) { /* * step (1). setup comm connection */ // step (1.1). connect socket for comm_fd int comm_fd = fork_server_connect_pipe(); if (comm_fd < 0) { // make sure AFL is not attached if (fork_server_get_shm_id(envp) != INVALID_SHM_ID) { utils_error(env_setting_err_str, true); } utils_puts(no_daemon_str, true); RW_PAGE_INFO(daemon_attached) = false; return; } else { RW_PAGE_INFO(daemon_attached) = true; } // step (1.2). dup2 comm_fd to CRS_COMM_FD { if (sys_dup2(comm_fd, CRS_COMM_FD) < 0) { utils_error(dup2_err_str, true); } sys_close(comm_fd); } /* * step (2). check whether AFL is attached */ int afl_shm_id = fork_server_get_shm_id(envp); bool afl_attached = (afl_shm_id != INVALID_SHM_ID); if (afl_attached) { utils_puts(afl_attached_str, true); } /* * step (3). read crs_shm_id/check_execs from daemon and respond * afl_attached/afl_shm_id (comm shakehand) */ // XXX: CRS may be uncessary once we use shared memory for .text section int crs_shm_id = INVALID_SHM_ID; uint32_t check_execs = 0; { if (sys_read(CRS_COMM_FD, (char *)&crs_shm_id, 4) != 4) { utils_error(hello_err_str, true); } int __tmp_data = afl_attached; if (sys_write(CRS_COMM_FD, (char *)&__tmp_data, 4) != 4) { utils_error(hello_err_str, true); } __tmp_data = afl_shm_id; if (sys_write(CRS_COMM_FD, (char *)&__tmp_data, 4) != 4) { utils_error(hello_err_str, true); } if (sys_read(CRS_COMM_FD, (char *)&check_execs, 4) != 4) { utils_error(hello_err_str, true); } } /* * step (4). mmap CRS_SHARED_MEMORY */ // ummap the fake CRS map if (sys_munmap(CRS_MAP_ADDR, CRS_MAP_SIZE) != 0) { utils_error(mumap_err_str, true); } if ((size_t)sys_shmat(crs_shm_id, (const void *)CRS_MAP_ADDR, SHM_RND) != CRS_MAP_ADDR) { utils_error(crs_shmat_err_str, true); } /* * step (5) [if: AFL_ATTACHED]. * munmap the fake AFL_SHARED_MEMORY and mmap the real one */ if (afl_attached) { if (sys_munmap(AFL_MAP_ADDR, AFL_MAP_SIZE) != 0) { utils_error(mumap_err_str, true); } if ((size_t)sys_shmat(afl_shm_id, (const void *)AFL_MAP_ADDR, SHM_RND) != AFL_MAP_ADDR) { utils_error(afl_shmat_err_str, true); } } /* * step (6). [if: AFL_ATTACHED] * send 4-byte "hello" message to AFL */ { int __tmp_data = 0x19961219; if (afl_attached) { if (sys_write(AFL_FORKSRV_FD + 1, (char *)&__tmp_data, 4) != 4) { utils_error(hello_err_str, true); } } } /* * step (7). main while-loop */ CRSLoopType crs_loop = CRS_LOOP_NONE; uint32_t cur_execs = 0; while (true) { // step (7.1). [if: AFL_ATTACHED && !CRS_LOOP] // wait AFL's signal if (afl_attached && !crs_loop) { int __tmp_data; if (sys_read(AFL_FORKSRV_FD, (char *)&__tmp_data, 4) != 4) { utils_error(read_err_str, true); } } // step (7.2). do fork // XXX: just before the fork, we need to clean CRS CRS_INFO(lock) = 0; CRS_INFO(crash_ip) = CRS_INVALID_IP; CRS_INFO(self_fired) = 0; pid_t tid = 0; pid_t client_pid = sys_clone(CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID | SIGCHLD, 0, NULL, &tid, NULL); if (client_pid < 0) { utils_error(fork_err_str, true); } if (client_pid == 0) { /* * child process */ /* * Update at Nov 2021: * For binaries compiled with ASAN, it seems there always are some * new processes (?). Hence, it would be better to isolate the * client into a new process group. */ // set pgid, to avoid kill fork_server when sending signal // or we can setsid to directly isolate the process if (sys_setpgid(0, 0)) { utils_error(setpgid_err_str, true); } /* * XXX: To handle multi-thread/-process programs, a safe approach is * to change client's process group, and every time a potential * patch crash happens, the signal hander kills all processes in the * client's process group. The following code can be used to * implement this approach: * * ------ * // set pgid, to avoid kill fork_server when sending signal * // or we can setsid to directly isolate the process * if (sys_setpgid(0, 0)) { * utils_error(setpgid_err_str, true); * } * ------ * * However, the disadvantage of this approach is that, every time * the fork server creates a new client, the *setpgid* syscall will * bring additional overhead (seems negligible tbh). * * Alternatively, we can use following code in the signal handler to * kill client and the crashed process: * * ------ * sys_kill(client_pid, SIGKILL); * sys_kill(sys_getpid(), SIGKILL); * ------ * * Instead of killing the whole process group like following * * ------ * sys_kill(0, SIGKILL); * ------ * * It is helpful when facing multi-thread/-process programs. * Additionally, it is also good to know that a child process can * send signal to its parent process (as if they share the same user * ID or effective user ID). But it may also leave some other * processes zombie (e.g., the parent process creates two child * processes). * * However, a good obversation is that, vanilla AFL can also have * such problem. Imagine that a multi-process program has a crashed * parent process, AFL will not take care of the client processes * anymore and leave them zombie. * * Hence, we choose the latter approach to reduce overhead. */ /* * XXX: actually, I do not know why AFL does not setpgid/setsid when * forking new processes. If the target program invoked kill(0, * SIGXXX), the fork server would be killed too, imo. */ RW_PAGE_INFO(client_pid) = tid; // update pid and tid in TLS, so that when the child process sends // signal to itself, it will not mis-send to its parent. // // check glibc source code for more information: // // https://code.woboq.org/userspace/glibc/sysdeps/nptl/fork.c.html#76 // for how glibc implements fork() as a wrapper of syscall clone; // // https://code.woboq.org/userspace/glibc/nptl/descr.h.html#pthread // for the memory layout of struct pthread in glibc. register unsigned int tid_ asm("eax") = (unsigned int)tid; asm(".intel_syntax noprefix\n" " mov DWORD PTR fs:0x2d0, eax;\n" " mov DWORD PTR fs:0x2d4, eax;\n" : : "r"(tid_) :); // close uncessary file descriptors sys_close(AFL_FORKSRV_FD); sys_close(AFL_FORKSRV_FD + 1); sys_close(CRS_COMM_FD); RW_PAGE_INFO(afl_prev_id) = 0; break; } // step (7.3). [if: AFL_ATTACHED && !CRS_LOOP] // tell AFL that the client is started if (afl_attached && !crs_loop) { sys_write(AFL_FORKSRV_FD + 1, (char *)&client_pid, 4); } // step (7.4). notify the daemon about the client_pid if crs_loop if (crs_loop) { sys_write(CRS_COMM_FD, (char *)&client_pid, 4); } // step (7.5). wait till the client stop int client_status = 0; if (sys_wait4(client_pid, &client_status, 0, NULL) < 0) { utils_error(wait4_err_str, true); } // update client_status int self_fired = CRS_INFO(self_fired); client_status = PACK_STATUS(client_status, self_fired); #ifdef DEBUG utils_puts(status_str, false); utils_output_number(client_status); #endif // step (7.6). notify the daemon that the crs run is done if (crs_loop) { sys_write(CRS_COMM_FD, (char *)&client_pid, 4); } // step (7.7). check the client's status // XXX: after going into the ABNORMAL_STATUS branch, the program is // either crashed by a patch (which will lead to a crs_loop) or a // subject bug. // XXX: a new situation is that the program is under delta debugging. if (IS_ABNORMAL_STATUS(client_status) || crs_loop == CRS_LOOP_DEBUG) { TALK_TO_DAEMON:; // step (7.7.1). notify the daemon and wait response // + sending out the status // + receiving the status of crash site (CRS) int crs_status = CRS_STATUS_CRASH; { sys_write(CRS_COMM_FD, (char *)&client_status, 4); sys_read(CRS_COMM_FD, (char *)&crs_status, 4); } // step (7.7.2). if there is a crash and it is not caused by a // latent bug if (crs_status != CRS_STATUS_CRASH && crs_status != CRS_STATUS_NORMAL) { // check remmap if (crs_status == CRS_STATUS_REMMAP) { // munmap current shadow file (due to the different size) if (sys_munmap(RW_PAGE_INFO(shadow_base), RW_PAGE_INFO(shadow_size))) { utils_error(mumap_err_str, true); } // remmap it RW_PAGE_INFO(shadow_size) = utils_mmap_external_file( RW_PAGE_INFO(shadow_path), false, RW_PAGE_INFO(shadow_base), PROT_READ | PROT_EXEC); if (RW_PAGE_INFO(retaddr_mapping_used)) { // munmap current retaddr mapping if (sys_munmap(RW_PAGE_INFO(retaddr_mapping_base), RW_PAGE_INFO(retaddr_mapping_size))) { utils_error(mumap_err_str, true); } // remmap it RW_PAGE_INFO(retaddr_mapping_size) = utils_mmap_external_file( RW_PAGE_INFO(retaddr_mapping_path), false, RW_PAGE_INFO(retaddr_mapping_base), PROT_READ); } } // check delta debugging mode if (crs_status == CRS_STATUS_DEBUG) { // the next loop is forced to communicate with the daemon crs_loop = CRS_LOOP_DEBUG; } else { // we are going into the CRS loop which is out of AFL's // control crs_loop = CRS_LOOP_INCR; } // clear shared memory { register uintptr_t dst asm("rdi") = (uintptr_t)AFL_MAP_ADDR; register uintptr_t n asm("rcx") = (uintptr_t)AFL_MAP_SIZE; #ifdef AVX512 // (AVX512F version) asm volatile( ".intel_syntax noprefix\n" " xor rax, rax;\n" " vpbroadcastd zmm16, eax;\n" " lea rax, [rdi + rcx];\n" " sub rdi, rax;\n" "loop:\n" " vmovdqa64 [rax + rdi], zmm16;\n" " add rdi, 0x40;\n" " jnz loop;\n" : : "r"(dst), "r"(n) : "rax", "zmm16"); #else // (SSE version) asm volatile( ".intel_syntax noprefix\n" " xorps xmm0, xmm0;\n" " lea rax, [rdi + rcx];\n" " sub rdi, rax;\n" "loop:\n" " movdqa [rax + rdi], xmm0;\n" " add rdi, 0x10;\n" " jnz loop;\n" : : "r"(dst), "r"(n) : "rax", "xmm0"); #endif } // go into CRS loop continue; } // If the program has reached this part, it indicates a real // crash has occured. Here, we need to reset client_status as // any suspect status, here we choose SIGKILL if (IS_SUSPECT_STATUS(client_status)) { // XXX: please MAKE SURE **SIGKILL** is used, otherwise it is // possible to meet dead lock in the signal handler client_status = SIGKILL; } } else if (check_execs) { // handle checking runs when current execution is normal if (cur_execs++ == check_execs) { cur_execs = 0; goto TALK_TO_DAEMON; } } // step (7.8). handle any other situation which is not caused by // patching // [if: AFL_ATTCHED]: notify AFL and loop // [if: !AFL_ATTACHED]: exit as normal or kill self with the same // signal crs_loop = CRS_LOOP_NONE; if (afl_attached) { sys_write(AFL_FORKSRV_FD + 1, (char *)&client_status, 4); } else { // notify the daemon is exited normally sys_write(CRS_COMM_FD, (char *)&client_status, 4); // XXX: in case of any hooked signal if (WIFEXITED(client_status)) { sys_exit(WEXITSTATUS(client_status)); } else if (WIFSIGNALED(client_status)) { // XXX: if the daemon already identified this crash, it will // stop automatically // XXX: we are using SIGKILL which cannot be caught by any // signal handler sys_kill(0, WTERMSIG(client_status)); } else { sys_kill(0, WSTOPSIG(client_status)); } } } return; } ================================================ FILE: src/fork_server.h ================================================ /* * fork_server.h * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #ifndef __FORK_SERVER_H #define __FORK_SERVER_H #include "afl_config.h" #include "crs_config.h" #include "loader.h" typedef enum crs_loop_type { CRS_LOOP_NONE = 0, // not a crs loop CRS_LOOP_INCR, // crs loop caused by incremental rewriting CRS_LOOP_DEBUG, // crs loop caused by delta debugging } CRSLoopType; #endif ================================================ FILE: src/frontend.c ================================================ /* * frontend.c * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ /* * Frontend for OURTOOL */ #include "afl_config.h" #include "libstochfuzz.h" #include #include #include #include #include #include #include /* * Display usage hints. */ static void usage(const char *argv0, int ret_status); /* * Parse arguments */ static inline int parse_args(int argc, const char **argv); /* * Handle different modes */ static inline void mode_disasm(int argc, const char **argv); static inline void mode_patch(int argc, const char **argv); static inline void mode_view(int argc, const char **argv); static inline void mode_run(int argc, const char **argv); static inline void mode_start(int argc, const char **argv); static void usage(const char *argv0, int ret_status) { z_sayf( "\n%s [ options ] -- target_binary [ ... ] \n\n" "Mode settings:\n\n" " -S - start a background daemon and wait for a fuzzer to " "attach (defualt mode)\n" " -R - dry run target_binary with given arguments without " "an attached fuzzer\n" " -P - patch target_binary without incremental rewriting\n" " -D - probabilistic disassembly without rewriting\n" " -V - show currently observed breakpoints\n\n" "Rewriting settings:\n\n" " -g - trace previous PC\n" " -c - count the number of basic blocks with conflicting " "hash values\n" " -d - disable instrumentation optimization\n" " -r - assume the return addresses are only used by RET " "instructions\n" " -e - install the fork server at the entrypoint instead " "of the main function\n" " -f - forcedly assume there is data interleaving with " "code\n" " -i - ignore the call-fallthrough edges to defense " "RET-misusing obfuscation\n\n" "Other stuff:\n\n" " -h - print this help\n" " -x execs - set the number of executions after which a checking " "run will be triggered\n" " set it as zero to disable checking runs " "(default: %u)\n" " -t msec - set the timeout for each daemon-triggering " "execution\n" " set it as zero to ignore the timeout " "(default: %lu ms)\n" #ifdef DEBUG " -l level - set the log level, including TRACE, DEBUG, INFO, " "WARN, ERROR, and FATAL (default: INFO)\n\n", #else " -l level - set the log level, including INFO, WARN, ERROR, and " "FATAL (default: INFO)\n\n", #endif argv0, SYS_CHECK_EXECS, SYS_TIMEOUT); exit(ret_status); } static int parse_args(int argc, const char **argv) { z_sayf(COLOR(CYAN, OURTOOL) " " COLOR( BRIGHT, VERSION) " by \n"); bool timeout_given = false; bool log_level_given = false; bool check_execs_given = false; int opt = 0; while ((opt = getopt(argc, (char *const *)argv, "+SRPDVgceidrfnht:l:x:")) > 0) { switch (opt) { #define __MODE_CASE(c, m) \ case c: \ if (sys_optargs.mode != SYSMODE_NONE) { \ EXITME("multiple mode settings not supported"); \ } \ sys_optargs.mode = SYSMODE_##m; \ break; __MODE_CASE('S', DAEMON); __MODE_CASE('R', RUN); __MODE_CASE('P', PATCH); __MODE_CASE('D', DISASM); __MODE_CASE('V', VIEW); #undef __MODE_CASE #define __SETTING_CASE(c, m) \ case c: \ sys_optargs.r.m = true; \ break; __SETTING_CASE('g', trace_pc); __SETTING_CASE('c', count_conflict); __SETTING_CASE('d', disable_opt); __SETTING_CASE('r', safe_ret); __SETTING_CASE('e', instrument_early); __SETTING_CASE('f', force_pdisasm); __SETTING_CASE('i', disable_callthrough); // This is a secret undocumented option! It is mainly used for // Github Actions which has memory limitation. Forcely using linear // disassembly (which means not doing pre-disassembly and patching // all .text) makes smaller memory usage. __SETTING_CASE('n', force_linear); #undef __SETTING_CASE #define __LOG_LEVEL_STRCASECMP(l, s) \ do { \ if (!strcasecmp(#l, s)) { \ sys_optargs.log_level = LOG_##l; \ goto DONE; \ } \ } while (0) case 'l': if (log_level_given) { EXITME("multiple -l options not supported"); } log_level_given = true; __LOG_LEVEL_STRCASECMP(TRACE, optarg); __LOG_LEVEL_STRCASECMP(DEBUG, optarg); __LOG_LEVEL_STRCASECMP(INFO, optarg); __LOG_LEVEL_STRCASECMP(WARN, optarg); __LOG_LEVEL_STRCASECMP(ERROR, optarg); __LOG_LEVEL_STRCASECMP(FATAL, optarg); z_warn("invalid log level: \"%s\"", optarg); DONE: break; #undef __LOG_LEVEL_STRCASECMP case 't': if (timeout_given) { EXITME("multiple -t options not supported"); } timeout_given = true; if (z_sscanf(optarg, "%lu", &sys_optargs.timeout) < 1) { EXITME("bad syntax used for -t"); } break; case 'x': if (check_execs_given) { EXITME("multiple -x options not supported"); } check_execs_given = true; if (z_sscanf(optarg, "%u", &sys_optargs.check_execs) < 1) { EXITME("bad syntax used for -x"); } if (sys_optargs.check_execs < 500) { z_warn( "frequent checking runs will significatly impact the " "fuzzing efficiency"); } break; case 'h': usage(argv[0], 0); break; default: usage(argv[0], 1); } } // Validating arguments if (argc == optind) { usage(argv[0], 1); } if (sys_optargs.mode == SYSMODE_NONE) { sys_optargs.mode = SYSMODE_DAEMON; } if (sys_optargs.mode == SYSMODE_DISASM) { // Under disasm mode, we forcely use probabilistic disassembly sys_optargs.r.force_pdisasm = true; sys_optargs.r.force_linear = false; } if (sys_optargs.r.force_pdisasm && sys_optargs.r.force_linear) { EXITME("-f and -n cannot be set together"); } if (sys_optargs.r.instrument_early) { z_warn( "-e option is experimental, it may cause invalid crashes on a " "different system other than Ubuntu 18.04"); } return optind; } int main(int argc, const char **argv) { assert(PAGE_SIZE == 0x1000); assert(PAGE_SIZE_POW2 == 12); int next_idx = parse_args(argc, argv); argc -= next_idx; argv += next_idx; z_log_set_level(sys_optargs.log_level); Z_INIT; switch (sys_optargs.mode) { case SYSMODE_DAEMON: mode_start(argc, argv); break; case SYSMODE_RUN: mode_run(argc, argv); break; case SYSMODE_PATCH: mode_patch(argc, argv); break; case SYSMODE_DISASM: mode_disasm(argc, argv); break; case SYSMODE_VIEW: mode_view(argc, argv); break; default: EXITME("unreachable"); } Z_FINI; return 0; } static inline void mode_patch(int argc, const char **argv) { const char *target = argv[0]; z_info("target binary: %s", target); Core *core = z_core_create(target, &sys_optargs); z_core_activate(core); z_core_destroy(core); } static inline void mode_disasm(int argc, const char **argv) { const char *target = argv[0]; z_info("target binary: %s", target); Core *core = z_core_create(target, &sys_optargs); z_diagnoser_apply_logged_crashpoints(core->diagnoser); z_patcher_describe(core->patcher); z_core_destroy(core); } static inline void mode_view(int argc, const char **argv) { const char *target = argv[0]; z_info("target binary: %s", target); Core *core = z_core_create(target, &sys_optargs); GQueue *cps = z_diagnoser_get_crashpoints(core->diagnoser); GList *l = cps->head; z_sayf("%-20s%-10s%-6s\n", "Address", "CPType", "Real?"); while (l != NULL) { addr_t addr = (addr_t)l->data; l = l->next; CPType type = (CPType)l->data; l = l->next; bool is_real = !!(l->data); z_sayf("%-#20lx%-10s%-6s\n", addr, z_cptype_string(type), (is_real ? "True" : "False")); l = l->next; } z_core_destroy(core); } static inline void mode_run(int argc, const char **argv) { const char *target = argv[0]; z_info("target binary: %s", target); Core *core = z_core_create(target, &sys_optargs); z_core_activate(core); int status = z_core_perform_dry_run(core, argc, argv); z_core_destroy(core); if (IS_ABNORMAL_STATUS(status)) { z_info(COLOR(RED, "not a normal exit (status: %#x)"), status); } // follow how the client is terminated if (WIFEXITED(status)) { exit(WEXITSTATUS(status)); } else if (WIFSIGNALED(status)) { kill(getpid(), WTERMSIG(status)); } else { kill(getpid(), WSTOPSIG(status)); } } static inline void mode_start(int argc, const char **argv) { #ifdef BINARY_SEARCH_INVALID_CRASH EXITME( "daemon mode is not supported when doing binary search for invalid " "crash"); #else const char *target = argv[0]; z_info("target binary: %s", target); Core *core = z_core_create(target, &sys_optargs); z_core_activate(core); z_core_start_daemon(core, INVALID_FD); z_core_destroy(core); #endif } ================================================ FILE: src/get_signal_stack_size.sh ================================================ #!/bin/bash echo " #include #include int main(int argc, char **argv) { int sz = SIGSTKSZ; if (sz < MINSIGSTKSZ) { sz = MINSIGSTKSZ; } printf(\"%#x\n\", sz * 2); }" > /tmp/__sigstksz.c clang /tmp/__sigstksz.c -o /tmp/__sigstksz /tmp/__sigstksz ================================================ FILE: src/interval_splay.c ================================================ /* * interval_splay.c * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include "interval_splay.h" #include "utils.h" /* * Print contents of a tree, indented by depth. */ Z_PRIVATE void __splay_print_indented(Snode *root, int depth); /* * Rotate child in given direction to root */ Z_PRIVATE void __splay_rotate(Snode **root, int direction); /* * Link operations for top-down splay * * This pastes a node in as !d-most node in subtree on side d */ Z_PRIVATE void __splay_link(Snode ***hook, int d, Snode *node); /* * Splay last element on path to target to root * * NOTE! Remember to link parent with splay-ed subtree */ Z_PRIVATE Snode *__splay(Snode **root, addr_t target); /* * Return a list of Snode * in order, based on root. */ Z_PRIVATE Buffer *__splay_sorted_list(Snode *root); /* * Setter and Getter */ DEFINE_SETTER(Snode, snode, addr_t, addr); DEFINE_SETTER(Snode, snode, size_t, len); DEFINE_SETTER(Snode, snode, void *, data); DEFINE_GETTER(Snode, snode, size_t, len); DEFINE_GETTER(Snode, snode, void *, data); DEFINE_GETTER(Splay, splay, size_t, node_count); /* * Overloaded Setter and Getter */ OVERLOAD_GETTER(Snode, snode, addr_t, lower_bound) { return snode->addr; } OVERLOAD_GETTER(Snode, snode, addr_t, upper_bound) { return snode->addr + snode->len - 1; } /* * How far to indent each level of the tree. */ #define SPLAY_INDENTATION_LEVEL 2 Z_PRIVATE void __splay_print_indented(Snode *root, int depth) { int i; if (root != SPLAY_EMPTY) { __splay_print_indented(root->child[SPLAY_LEFT], depth + 1); for (i = 0; i < SPLAY_INDENTATION_LEVEL * depth; i++) { putchar(' '); } z_sayf("[%ld, %ld](%p)\n", root->addr, root->addr + root->len - 1, root->data); __splay_print_indented(root->child[SPLAY_RIGHT], depth + 1); } } Z_PRIVATE void __splay_rotate(Snode **root, int direction) { Snode *x; Snode *y; Snode *b; /* * y x * / \ / \ * x C <=> A y * / \ / \ * A B B C */ y = *root; assert(y); x = y->child[direction]; assert(x); b = x->child[!direction]; /* do the rotation */ *root = x; x->child[!direction] = y; y->child[direction] = b; } Z_PRIVATE void __splay_link(Snode ***hook, int d, Snode *node) { *hook[d] = node; // Strictly speaking we don't need to do this, but it allows printing the // partial trees. node->child[!d] = NULL; hook[d] = &node->child[!d]; } Z_PRIVATE Snode *__splay(Snode **root, addr_t target) { Snode *t; Snode *child; Snode *grandchild; Snode *top[SPLAY_NUM_CHILDREN]; /* accumulator trees that will become subtrees of new root */ Snode **hook[SPLAY_NUM_CHILDREN]; /* where to link new elements into accumulator trees */ int d; int dChild; /* direction of child */ int dGrandchild; /* direction of grandchild */ // we don't need to keep following this pointer, we'll just fix it at the // end. assert(root != NULL); t = *root; // Don't do anything to an empty tree. if (t == SPLAY_EMPTY) { return NULL; } // Ok, tree is not empty, start chopping it up. for (d = 0; d < SPLAY_NUM_CHILDREN; d++) { top[d] = NULL; hook[d] = &top[d]; } // Keep going until we hit the addr or we would hit a null pointer in the // child. while (t->addr != target && (child = t->child[dChild = t->addr < target]) != NULL) { // Child is not null. grandchild = child->child[dGrandchild = child->addr < target]; if (grandchild == NULL || child->addr == target) { /* zig case; paste root into opposite-side hook */ __splay_link(hook, !dChild, t); t = child; /* we can break because we know we will hit child == NULL next */ break; } else if (dChild == dGrandchild) { /* zig-zig case */ /* rotate and then hook up child */ /* grandChild becomes new root */ __splay_rotate(&t, dChild); __splay_link(hook, !dChild, child); t = grandchild; } else { /* zig-zag case */ /* root goes to !dChild, child goes to dChild, grandchild goes to * root */ __splay_link(hook, !dChild, t); __splay_link(hook, dChild, child); t = grandchild; } } // Now reassemble the tree. // t's children go in hooks, top nodes become t's new children. for (d = 0; d < SPLAY_NUM_CHILDREN; d++) { *hook[d] = t->child[d]; t->child[d] = top[d]; } // And put t back in *root. return (*root = t); } Z_PRIVATE Buffer *__splay_sorted_list(Snode *root) { Buffer *list = NULL; assert(root != NULL); if (root->child[SPLAY_LEFT] != NULL) list = __splay_sorted_list(root->child[SPLAY_LEFT]); else list = z_buffer_create(NULL, 0); z_buffer_append_raw(list, (const uint8_t *)&root, sizeof(Snode *)); if (root->child[SPLAY_RIGHT] != NULL) { Buffer *rlist = __splay_sorted_list(root->child[SPLAY_RIGHT]); z_buffer_append(list, rlist); z_buffer_destroy(rlist); } return list; } Z_API Snode *z_snode_create(addr_t addr, size_t len, void *data, void (*data_destroy)(void *)) { assert(len > 0); Snode *e = STRUCT_ALLOC(Snode); e->addr = addr; e->len = len; e->data = data; e->data_destroy = data_destroy; return e; } Z_API void z_snode_destroy(Snode *node) { if (node != NULL) { if (node->data_destroy) (*(node->data_destroy))(node->data); z_free(node); } else { z_trace("try to delete a NULL node"); } } Z_API Splay *z_splay_create(void *(*merge_fcn)(void *, void *)) { Splay *t = STRUCT_ALLOC(Splay); t->root = SPLAY_EMPTY; t->node_count = 0; t->merge_fcn = merge_fcn; return t; } Z_API void z_splay_destroy(Splay *splay) { // We want to avoid doing this recursively, because the tree might be deep. // So we will repeatedly delete the root until the tree is empty. while (splay->root) { Snode *e = z_splay_delete(splay, splay->root->addr); z_snode_destroy(e); } assert(splay->node_count == 0); z_free(splay); } Z_API bool z_splay_interval_overlap(Splay *splay, Snode *node) { Snode *t = NULL; __splay(&(splay->root), node->addr); SPLAY_ROOT(splay, t); // If splay is empty, return false if (t == SPLAY_EMPTY) return false; // If addr already exists, return true; if (t->addr == node->addr) return true; if (t->addr < node->addr) { Snode *e = t->child[SPLAY_RIGHT]; if (t->addr + t->len > node->addr) return true; if (e != NULL) { // Try to find the smallest node in the right tree t->child[SPLAY_RIGHT] = __splay(&e, 0); if (node->addr + node->len > e->addr) return true; } } else { Snode *e = t->child[SPLAY_LEFT]; if (node->addr + node->len > t->addr) return true; if (e != NULL) { // Try to find the biggest node in the left tree t->child[SPLAY_LEFT] = __splay(&e, ADDR_MAX); if (e->addr + e->len > node->addr) return true; } } return false; } Z_API Snode *z_splay_insert(Splay *splay, Snode *node) { Snode *e; Snode *t; int d; // Which side of e to put old root on if (z_splay_interval_overlap(splay, node)) { // Overlap z_trace("node([%ld, %ld]) is overlapped with existed nodes", node->addr, node->addr + node->len - 1); return NULL; } __splay(&(splay->root), node->addr); SPLAY_ROOT(splay, t); e = node; if (t == NULL) { e->child[SPLAY_LEFT] = e->child[SPLAY_RIGHT] = NULL; } else { // Split tree and put e on top. // We know t is closest to e, so we don't have to move anything else. d = t->addr > e->addr; e->child[d] = t; e->child[!d] = t->child[!d]; t->child[!d] = NULL; } // Either way we stuff e in *splay. splay->root = e; splay->node_count += 1; // Check merge. if (splay->merge_fcn) { Snode *left = e->child[SPLAY_LEFT]; Snode *right = e->child[SPLAY_RIGHT]; if (left != NULL) e->child[SPLAY_LEFT] = __splay(&left, ADDR_MAX); if (right != NULL) e->child[SPLAY_RIGHT] = __splay(&right, 0); if ((left != NULL) && (left->addr + left->len == e->addr)) { Snode *deleted = z_splay_delete(splay, left->addr); assert(deleted == left); e->addr = left->addr; e->len += left->len; e->data = (*(splay->merge_fcn))(left->data, e->data); z_snode_destroy(deleted); } if ((right != NULL) && (e->addr + e->len == right->addr)) { Snode *deleted = z_splay_delete(splay, right->addr); assert(deleted == right); e->len += right->len; e->data = (*(splay->merge_fcn))(e->data, right->data); z_snode_destroy(deleted); } } return e; } Z_API Snode *z_splay_delete(Splay *splay, addr_t addr) { Snode *left; Snode *right; Snode *deleted = NULL; __splay(&(splay->root), addr); if (splay->root && splay->root->addr == addr) { // Save pointers to kids. left = splay->root->child[SPLAY_LEFT]; right = splay->root->child[SPLAY_RIGHT]; deleted = splay->root; splay->node_count -= 1; assert(splay->node_count >= 0); // If left is empty, just return right. if (left == NULL) { splay->root = right; } else { // First splay max element in left to top. __splay(&left, ADDR_MAX); // Now paste in right subtree. left->child[SPLAY_RIGHT] = right; // Return left splay->root = left; } } else { z_trace("node([%ld, ?]) does not exist", addr); } return deleted; } Z_API Snode *z_splay_search(Splay *splay, addr_t addr) { assert(splay != NULL); Snode *t; if (splay->root == NULL) return NULL; __splay(&(splay->root), addr); SPLAY_ROOT(splay, t); if (t->addr <= addr) { if (z_snode_get_upper_bound(t) >= addr) return t; else return NULL; } else { if (t->child[SPLAY_LEFT]) { __splay(&(t->child[SPLAY_LEFT]), ADDR_MAX); if (z_snode_get_lower_bound(t->child[SPLAY_LEFT]) <= addr && z_snode_get_upper_bound(t->child[SPLAY_LEFT]) >= addr) return t->child[SPLAY_LEFT]; else return NULL; } else { return NULL; } } } Z_API inline Snode *z_splay_max(Splay *splay) { assert(splay != NULL); return __splay(&(splay->root), ADDR_MAX); } Z_API inline Snode *z_splay_min(Splay *splay) { assert(splay != NULL); return __splay(&(splay->root), 0); } Z_API Buffer *z_splay_sorted_list(Splay *splay) { if (splay->root) return __splay_sorted_list(splay->root); else return NULL; } Z_API void z_splay_print(Splay *splay) { Snode *t; SPLAY_ROOT(splay, t); z_sayf("number of current nodes: %ld\n", z_splay_get_node_count(splay)); __splay_print_indented(t, 0); } Z_API void *z_direct_merge(void *_x, void *_y) { return NULL; } ================================================ FILE: src/interval_splay.h ================================================ /* * interval_splay.h * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #ifndef __INTERVAL_SPLAY_H #define __INTERVAL_SPLAY_H #include "buffer.h" #include "config.h" /* * Use SPLAY tree to support interval operations */ #define SPLAY_LEFT 0 #define SPLAY_RIGHT 1 #define SPLAY_NUM_CHILDREN 2 STRUCT(Snode, { // We'll make this an array so that we can make some operations symmetric. STRUCT_REALNAME(Snode) * child[SPLAY_NUM_CHILDREN]; // Key for splay addr_t addr; // Length of interval size_t len; // Data, NULL if not existing void *data; // Function used to free data void (*data_destroy)(void *); }); STRUCT(Splay, { Snode *root; size_t node_count; void *(*merge_fcn)(void *, void *); }); #define SPLAY_EMPTY NULL #define SPLAY_ROOT(splay, node) \ do { \ assert(splay != NULL); \ node = splay->root; \ } while (0) /* * Setter and Getter */ DECLARE_SETTER(Snode, snode, addr_t, addr); DECLARE_SETTER(Snode, snode, size_t, len); DECLARE_SETTER(Snode, snode, void *, data); DECLARE_GETTER(Snode, snode, addr_t, lower_bound); DECLARE_GETTER(Snode, snode, addr_t, upper_bound); DECLARE_GETTER(Snode, snode, size_t, len); DECLARE_GETTER(Snode, snode, void *, data); DECLARE_GETTER(Splay, splay, size_t, node_count); /* * Pack a Snode from scratch. */ Z_API Snode *z_snode_create(addr_t addr, size_t len, void *data, void (*data_destroy)(void *)); /* * Unpack a Snode and its data. */ Z_API void z_snode_destroy(Snode *node); /* * Create a splay. * * merge_fcn is used to merge data, and **NULL indicates the intervals will * not merge**. * * Note that is Snode's responsibility to free the alloced memory, instead of * merge_fcn. */ Z_API Splay *z_splay_create(void *(*merge_fcn)(void *, void *)); /* * Free all elements of splay, and replace it with SPLAY_EMPTY. */ Z_API void z_splay_destroy(Splay *splay); /* * Insert an element into splay, and return the inserted node, NULL if * overlaping. */ Z_API Snode *z_splay_insert(Splay *splay, Snode *node); /* * Delete Snode starting from addr from splay. * Return the delted node, NULL if the addr does not exist. */ Z_API Snode *z_splay_delete(Splay *splay, addr_t addr); /* * Check whether node is overlapped with some nodes inside splay. * Return true if overlap, false otherwise. */ Z_API bool z_splay_interval_overlap(Splay *splay, Snode *node); /* * Search a snode containint addr, return NULL if not exist. */ Z_API Snode *z_splay_search(Splay *splay, addr_t addr); /* * Return the Snode with max address. */ Z_API inline Snode *z_splay_max(Splay *splay); /* * Return the Snode with min address. */ Z_API inline Snode *z_splay_min(Splay *splay); /* * Return a list of Snode * in order. */ Z_API Buffer *z_splay_sorted_list(Splay *splay); /* * Pretty-print the contents of splay */ Z_API void z_splay_print(Splay *splay); /* * Default merging function: do nothing; */ Z_API void *z_direct_merge(void *_x, void *_y); #endif ================================================ FILE: src/iterator.h ================================================ /* * iterator.h * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #ifndef __ITERATOR_H #define __ITERATOR_H #include "buffer.h" #include "config.h" // force evaluation #define __ITERATOR_2(x, y) __Iter_##y##_##x##_t #define __ITERATOR_1(x, y) __ITERATOR_2(x, y) #define __ITERATOR(x) __ITERATOR_1(x, __COUNTER__) /* * Iterator, only for local usage */ #define Iter(type, name) \ struct __ITERATOR(name) { \ type *__ptr; \ size_t __i; \ size_t __n; \ } name #define z_iter_init(iter, ptr, n) \ do { \ if (!ptr) { \ EXITME("try to init an invalid iterator"); \ } \ (iter).__ptr = (typeof((iter).__ptr))(ptr); \ (iter).__i = 0; \ (iter).__n = (n); \ } while (0) #define z_iter_init_from_buf(iter, buf) \ do { \ assert(buf); \ z_iter_init((iter), z_buffer_get_raw_buf(buf), \ z_buffer_get_size(buf) / sizeof(*((iter).__ptr))); \ } while (0) #define z_iter_next(iter) \ ({ \ typeof((iter).__ptr) __res = NULL; \ \ if ((iter).__i < (iter).__n) { \ __res = (iter).__ptr + (iter).__i; \ (iter).__i++; \ } \ \ __res; \ }) #define z_iter_is_empty(iter) ((iter).__i >= (iter).__n) #define z_iter_get_size(iter) ((iter).__n) #define z_iter_reset(iter) \ do { \ (iter).__i = 0; \ } while (0) #define z_iter_destroy(iter) /* empty */ #endif ================================================ FILE: src/library_functions/generate.py ================================================ import csv import os import sys template = """ Z_PRIVATE void __libfunc_load(GHashTable *d) { %s } """ filename = "library_functions_load.c" def generate_from_csv(filename): code = "" n = 0 with open(filename, "r") as csv_file: csv_reader = csv.reader(csv_file, delimiter=",") for row in csv_reader: if len(row) != 4: print("invalid input: %s" % line) exit(-1) demangled_name = row[0].strip() # useless currently name = row[1].strip() if len(name) == 0: print("empty library function name") exit(-1) lcfg = row[2].strip().upper() if len(lcfg) == 0: lcfg = "UNK" lra = row[3].strip().upper() if len(lra) == 0: lra = "UNK" code += """ LFuncInfo *lf_%d = __lfunc_info_create("%s", LCFG_%s, LRA_%s); g_hash_table_insert(d, (gpointer)z_strdup("%s"), (gpointer)lf_%d); """ % ( n, name, lcfg, lra, name, n, ) n += 1 return code if __name__ == "__main__": if len(sys.argv) != 3: print("generate.py: ./generate.py ") exit(-1) dirname = sys.argv[2].strip() csv_filename = os.path.join(dirname, sys.argv[1].strip()) out_filename = os.path.join(dirname, filename) code = generate_from_csv(csv_filename) f = open(out_filename, "w") f.write(template % code) f.close() ================================================ FILE: src/library_functions/lib.csv ================================================ ,abort,TERM,USED ,accept,RET,UNUSED ,access,RET,UNUSED ,acos,RET,UNUSED ,__acosf_finite,, ,alarm,RET,USED ,archive_read_close,, ,archive_read_data_block,, ,archive_read_free,, ,archive_read_new,, ,archive_read_next_header,, ,archive_read_open_memory,, ,archive_read_support_format_tar,, ,asin,RET,UNUSED ,__asprintf_chk,, ,__assert_fail,, ,atan,RET,UNUSED ,atan2,RET,UNUSED ,backtrace,, ,backtrace_symbols,, ,backtrace_symbols_fd,, ,bind,, ,BIO_ctrl,, ,BIO_free,, ,BIO_new,, ,BIO_new_mem_buf,, ,BIO_s_mem,, ,BN_bin2bn,, ,BN_bn2bin,, ,BN_bn2dec,, ,BN_bn2hex,, ,BN_clear_free,, ,BN_cmp,, ,BN_CTX_free,, ,BN_CTX_new,, ,BN_dec2bn,, ,BN_dup,, ,BN_free,, ,BN_is_bit_set,, ,BN_mod_exp,, ,BN_new,, ,BN_num_bits,, ,BN_rand,, ,BN_set_word,, ,BZ2_bzDecompress,, ,BZ2_bzDecompressEnd,, ,BZ2_bzDecompressInit,, ,BZ2_bzlibVersion,, ,calloc,RET,UNUSED ,ceil,RET,UNUSED ,cfmakeraw,, ,cfsetspeed,, ,clock,RET,UNUSED ,clock_gettime,RET,UNUSED ,close,RET,UNUSED ,closedir,RET,UNUSED ,connect,, ,cos,RET,UNUSED ,cosh,RET,UNUSED ,crc32,RET,UNUSED ,CRYPTO_free,, ,__ctype_b_loc,, ,__ctype_get_mb_cur_max,, ,__ctype_tolower_loc,, ,__ctype_toupper_loc,, ,__cxa_allocate_exception,, ,__cxa_atexit,, ,__cxa_bad_cast,, ,__cxa_bad_typeid,, ,__cxa_begin_catch,, ,__cxa_end_catch,, ,__cxa_free_exception,, ,__cxa_get_exception_ptr,, ,__cxa_guard_abort,, ,__cxa_guard_acquire,, ,__cxa_guard_release,, ,__cxa_rethrow,, ,__cxa_throw,, ,__cxa_throw_bad_array_new_length,, ,deflate,, ,deflateEnd,, ,deflateInit_,, ,deflateInit2_,, ,DES_ncbc_encrypt,, ,DES_set_key_unchecked,, ,DES_set_odd_parity,, ,dladdr,, ,dlclose,RET,UNUSED ,dlerror,RET,UNUSED ,dlopen,RET,UNUSED ,dlsym,RET,UNUSED ,DSA_do_sign,, ,DSA_do_verify,, ,DSA_free,, ,DSA_generate_key,, ,DSA_generate_parameters,, ,DSA_get0_key,, ,DSA_get0_pqg,, ,DSA_new,, ,DSA_set0_key,, ,DSA_set0_pqg,, ,DSA_SIG_free,, ,DSA_SIG_get0,, ,DSA_SIG_new,, ,DSA_SIG_set0,, ,DSA_size,, ,dup,RET,UNUSED ,dup2,RET,UNUSED ,__dynamic_cast,, ,ECDH_compute_key,, ,ECDSA_do_sign,, ,ECDSA_do_verify,, ,ECDSA_SIG_free,, ,ECDSA_SIG_get0,, ,ECDSA_SIG_new,, ,ECDSA_SIG_set0,, ,EC_GROUP_cmp,, ,EC_GROUP_get_curve_name,, ,EC_GROUP_get_degree,, ,EC_KEY_dup,, ,EC_KEY_free,, ,EC_KEY_generate_key,, ,EC_KEY_get0_group,, ,EC_KEY_get0_private_key,, ,EC_KEY_get0_public_key,, ,EC_KEY_new_by_curve_name,, ,EC_KEY_set_asn1_flag,, ,EC_KEY_set_public_key,, ,EC_POINT_clear_free,, ,EC_POINT_cmp,, ,EC_POINT_free,, ,EC_POINT_new,, ,EC_POINT_oct2point,, ,EC_POINT_point2oct,, ,ERR_error_string,, ,ERR_get_error,, ,__errno_location,RET,UNUSED ,EVP_aes_128_cbc,, ,EVP_aes_128_ctr,, ,EVP_aes_128_ecb,, ,EVP_aes_192_cbc,, ,EVP_aes_192_ctr,, ,EVP_aes_192_ecb,, ,EVP_aes_256_cbc,, ,EVP_aes_256_ctr,, ,EVP_aes_256_ecb,, ,EVP_bf_cbc,, ,EVP_CIPHER_CTX_free,, ,EVP_CIPHER_CTX_new,, ,EVP_CIPHER_CTX_reset,, ,EVP_CIPHER_CTX_set_padding,, ,EVP_DecryptInit_ex,, ,EVP_DecryptUpdate,, ,EVP_des_ede3_cbc,, ,EVP_DigestFinal,, ,EVP_DigestInit,, ,EVP_DigestInit_ex,, ,EVP_DigestUpdate,, ,EVP_EncryptInit_ex,, ,EVP_EncryptUpdate,, ,EVP_md5,, ,EVP_MD_CTX_free,, ,EVP_MD_CTX_new,, ,EVP_MD_CTX_reset,, ,EVP_ripemd160,, ,EVP_sha1,, ,EVP_sha256,, ,EVP_sha384,, ,EVP_sha512,, ,execl,, ,execv,, ,exit,TERM,UNUSED ,_exit,TERM,UNUSED ,exp,RET,UNUSED ,__exp_finite,, ,fclose,RET,UNUSED ,fcntl,RET,UNUSED ,__fdelt_chk,, ,fdopen,RET,UNUSED ,feof,RET,UNUSED ,ferror,RET,UNUSED ,fesetround,RET,UNUSED ,fflush,RET,UNUSED ,fgetc,RET,UNUSED ,fgets,RET,UNUSED ,fileno,RET,UNUSED ,flock,RET,UNUSED ,floor,RET,UNUSED ,fma,RET,UNUSED ,fmin,RET,UNUSED ,fmod,RET,UNUSED ,fopen,RET,UNUSED ,fopen64,RET,UNUSED ,fork,, ,forkpty,, ,fprintf,RET,UNUSED ,__fprintf_chk,, ,fputc,RET,UNUSED ,fputs,RET,UNUSED ,fread,RET,UNUSED ,free,RET,UNUSED ,freeaddrinfo,RET,UNUSED ,fseek,RET,UNUSED ,fsync,, ,ftell,RET,UNUSED ,fwrite,RET,UNUSED ,__fxstat,, ,__fxstat64,, ,gai_strerror,, ,g_bytes_get_data,, ,g_bytes_ref,, ,g_bytes_unref,, ,gcry_mpi_aprint,, ,gcry_mpi_new,, ,gcry_mpi_powm,, ,gcry_mpi_release,, ,gcry_mpi_scan,, ,getaddrinfo,RET,UNUSED ,getcontext,, ,getcwd,RET,UNUSED ,__getdelim,, ,getdtablesize,, ,getenv,RET,UNUSED ,gethostbyname,, ,gethostbyname2,, ,gethostname,, ,getline,RET,UNUSED ,getloadavg,, ,getnameinfo,, ,getopt,RET,UNUSED ,getopt_long,, ,getpagesize,, ,getpeername,, ,getpid,, ,getpwnam,, ,getpwuid_r,, ,getsockname,, ,getsockopt,, ,gettimeofday,, ,getuid,, ,__gmon_start__,, ,gmtime,, ,gmtime_r,, ,GOMP_critical_name_end,, ,GOMP_critical_name_start,, ,GOMP_parallel,, ,grantpt,, ,g_unichar_combining_class,, ,g_unichar_compose,, ,g_unichar_decompose,, ,g_unichar_fully_decompose,, ,g_unichar_get_mirror_char,, ,g_unichar_get_script,, ,g_unichar_iswide,, ,g_unichar_type,, ,g_unicode_script_from_iso15924,, ,g_unicode_script_to_iso15924,, ,gzclose,, ,gzdirect,, ,gzdopen,, ,gzopen64,, ,gzread,, ,gzwrite,, ,__h_errno_location,, ,HMAC_CTX_free,, ,HMAC_CTX_new,, ,HMAC_CTX_reset,, ,HMAC_Final,, ,HMAC_Init_ex,, ,HMAC_Update,, ,hypot,, ,iconv,, ,iconv_close,, ,iconv_open,, ,if_indextoname,, ,inet_addr,, ,inet_ntop,, ,inet_pton,, ,inflate,, ,inflateEnd,, ,inflateInit_,, ,inflateInit2_,, ,inflateReset,, ,inflateSetDictionary,, ,ioctl,, ,_IO_putc,, ,isalnum,, ,isatty,, ,isdigit,RET,UNUSED ,__isoc99_sscanf,, ,isspace,RET,UNUSED ,isupper,RET,UNUSED ,iswdigit,RET,UNUSED ,iswspace,RET,UNUSED ,isxdigit,RET,UNUSED ,kill,, ,ldexp,, ,__libc_start_main,, ,listen,, ,localeconv,, ,localtime,, ,log,RET,UNUSED ,log10,RET,UNUSED ,log2,RET,UNUSED ,__log_finite,, ,_longjmp,, ,__longjmp_chk,, ,lseek,RET,UNUSED ,lseek64,RET,UNUSED ,__lxstat,, ,__lxstat64,, ,lzma_alone_decoder,, ,lzma_code,, ,lzma_crc32,, ,lzma_end,, ,lzma_properties_decode,, ,lzma_raw_decoder,, ,lzma_stream_decoder,, ,madvise,, ,makecontext,, ,malloc,RET,UNUSED ,mbrtowc,, ,memchr,RET,UNUSED ,memcmp,RET,UNUSED ,memcpy,RET,UNUSED ,__memcpy_chk,, ,memmem,RET,UNUSED ,memmove,RET,UNUSED ,__memmove_chk,, ,memrchr,RET,UNUSED ,memset,RET,UNUSED ,__memset_chk,, ,mkdir,RET,UNUSED ,mkstemp,, ,mktime,, ,mlock,, ,mmap,RET,UNUSED ,mprotect,, ,munmap,RET,UNUSED ,nanosleep,, ,nearbyint,, ,nl_langinfo,, ,omp_destroy_nest_lock,, ,omp_get_num_threads,, ,omp_get_thread_num,, ,omp_init_nest_lock,, ,omp_in_parallel,, ,omp_set_nested,, ,omp_set_nest_lock,, ,omp_unset_nest_lock,, ,open,RET,UNUSED ,open64,RET,UNUSED ,opendir,RET,UNUSED ,openlog,, ,OPENSSL_init_crypto,, ,optarg,, ,opterr,, ,optind,, ,optopt,, ,PEM_read_bio_DSAPrivateKey,, ,PEM_read_bio_ECPrivateKey,, ,PEM_read_bio_RSAPrivateKey,, ,PEM_write_bio_DSAPrivateKey,, ,PEM_write_bio_ECPrivateKey,, ,PEM_write_bio_RSAPrivateKey,, ,perror,RET,UNUSED ,pipe,RET,UNUSED ,PKCS5_PBKDF2_HMAC_SHA1,, ,poll,, ,__poll_chk,, ,__popcountdi2,, ,posix_openpt,, ,posix_spawn_file_actions_addclose,, ,posix_spawn_file_actions_adddup2,, ,posix_spawn_file_actions_destroy,, ,posix_spawn_file_actions_init,, ,posix_spawnp,, ,pow,RET,UNUSED ,__pow_finite,, ,prctl,, ,__printf_chk,, ,pthread_cond_destroy,, ,pthread_cond_init,, ,pthread_cond_signal,, ,pthread_cond_timedwait,, ,pthread_cond_wait,, ,pthread_create,, ,pthread_equal,, ,pthread_getspecific,, ,pthread_join,, ,pthread_key_create,, ,pthread_key_delete,, ,pthread_mutexattr_destroy,, ,pthread_mutexattr_init,, ,pthread_mutexattr_settype,, ,pthread_mutex_destroy,, ,pthread_mutex_init,, ,pthread_mutex_lock,, ,pthread_mutex_trylock,, ,pthread_mutex_unlock,, ,pthread_once,, ,pthread_rwlock_destroy,, ,pthread_rwlock_init,, ,pthread_rwlock_rdlock,, ,pthread_rwlock_unlock,, ,pthread_rwlock_wrlock,, ,pthread_self,, ,pthread_setspecific,, ,ptsname,, ,putchar,RET,UNUSED ,putenv,, ,puts,RET,UNUSED ,qsort,RET,UNUSED ,raise,, ,rand,RET,UNUSED ,RAND_add,, ,RAND_bytes,, ,RAND_pseudo_bytes,, ,rand_r,, ,read,RET,UNUSED ,__read_chk,, ,readdir,RET,UNUSED ,realloc,RET,UNUSED ,recv,, ,remainder,, ,remove,RET,UNUSED ,rewind,RET,UNUSED ,round,RET,UNUSED ,RSA_free,, ,RSA_generate_key_ex,, ,RSA_get0_crt_params,, ,RSA_get0_factors,, ,RSA_get0_key,, ,RSA_new,, ,RSA_set0_crt_params,, ,RSA_set0_factors,, ,RSA_set0_key,, ,RSA_sign,, ,RSA_size,, ,RSA_verify,, ,sched_yield,, ,select,, ,send,, ,setcontext,, ,setenv,RET,UNUSED ,_setjmp,, ,setlocale,, ,setlogmask,, ,setrlimit,RET,UNUSED ,setsid,RET,UNUSED ,setsockopt,RET,UNUSED ,shutdown,RET,UNUSED ,sigaction,RET,UNUSED ,signal,RET,UNUSED ,sin,RET,UNUSED ,sincos,RET,UNUSED ,sincosf,RET,UNUSED ,sinh,RET,UNUSED ,sleep,RET,UNUSED ,snprintf,RET,UNUSED ,__snprintf_chk,, ,socket,RET,UNUSED ,socketpair,RET,UNUSED ,__sprintf_chk,RET,UNUSED ,sqrt,RET,UNUSED ,sqrtf,RET,UNUSED ,sscanf,RET,UNUSED ,__stack_chk_fail,, ,stderr,OBJ,OBJ ,stdin,OBJ,OBJ ,stdout,OBJ,OBJ ,stpcpy,RET,UNUSED ,__stpcpy_chk,, ,strcasecmp,RET,UNUSED ,strcasestr,RET,UNUSED ,strcat,RET,UNUSED ,__strcat_chk,, ,strchr,RET,UNUSED ,strcmp,RET,UNUSED ,strcpy,RET,UNUSED ,__strcpy_chk,, ,strcspn,RET,UNUSED ,strdup,RET,UNUSED ,strerror,RET,UNUSED ,strftime,RET,UNUSED ,strlen,RET,UNUSED ,strncasecmp,RET,UNUSED ,__strncat_chk,, ,strncmp,RET,UNUSED ,strncpy,RET,UNUSED ,__strncpy_chk,, ,strnlen,RET,UNUSED ,strrchr,RET,UNUSED ,strsignal,RET,UNUSED ,strspn,RET,UNUSED ,strstr,RET,UNUSED ,strtod,RET,UNUSED ,strtof,RET,UNUSED ,strtok,RET,UNUSED ,strtol,RET,UNUSED ,strtoll,RET,UNUSED ,strtoul,RET,UNUSED ,strtoull,RET,UNUSED ,sysconf,RET,UNUSED ,__syslog_chk,, ,system,RET,UNUSED ,tan,RET,UNUSED ,tcflush,RET,UNUSED ,tcgetattr,RET,UNUSED ,tcsendbreak,RET,UNUSED ,tcsetattr,RET,UNUSED ,time,RET,UNUSED ,timegm,RET,UNUSED ,__tls_get_addr,, ,tmpfile,RET,UNUSED ,trunc,RET,UNUSED ,__udivti3,, ,__umodti3,, ,unlink,RET,UNUSED ,unlockpt,RET,UNUSED ,_Unwind_Resume,, ,usleep,RET,UNUSED ,utimes,RET,UNUSED ,__vfprintf_chk,, ,vsnprintf,RET,UNUSED ,__vsnprintf_chk,, ,__vsprintf_chk,, ,waitpid,RET,UNUSED ,wcrtomb,RET,UNUSED ,wcscpy,RET,UNUSED ,wcslen,RET,UNUSED ,wmemcmp,RET,UNUSED ,wmemmove,RET,UNUSED ,write,RET,UNUSED ,xmlCleanupParser,, ,xmlFreeTextReader,, ,xmlReaderForIO,, ,xmlTextReaderConstLocalName,, ,xmlTextReaderConstValue,, ,xmlTextReaderIsEmptyElement,, ,xmlTextReaderMoveToFirstAttribute,, ,xmlTextReaderMoveToNextAttribute,, ,xmlTextReaderNodeType,, ,xmlTextReaderRead,, ,xmlTextReaderSetErrorHandler,, ,__xpg_basename,, ,__xpg_strerror_r,, ,__xstat,, ,__xstat64,, operator delete[](void*),_ZdaPv,, operator delete(void*),_ZdlPv,, "operator delete(void*, unsigned long)",_ZdlPvm,, "operator delete(void*, std::nothrow_t const&)",_ZdlPvRKSt9nothrow_t,, operator new[](unsigned long),_Znam,, std::__basic_file::is_open() const,_ZNKSt12__basic_fileIcE7is_openEv,, std::runtime_error::what() const,_ZNKSt13runtime_error4whatEv,, std::ctype::_M_widen_init() const,_ZNKSt5ctypeIcE13_M_widen_initEv,, "std::__cxx11::basic_string, std::allocator >::find_last_of(char const*, unsigned long, unsigned long) const",_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12find_last_ofEPKcmm,, "std::__cxx11::basic_string, std::allocator >::find_first_of(char const*, unsigned long, unsigned long) const",_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE13find_first_ofEPKcmm,, "std::__cxx11::basic_string, std::allocator >::find_last_not_of(char, unsigned long) const",_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE16find_last_not_ofEcm,, "std::__cxx11::basic_string, std::allocator >::find_last_not_of(char const*, unsigned long, unsigned long) const",_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE16find_last_not_ofEPKcmm,, "std::__cxx11::basic_string, std::allocator >::find_first_not_of(char const*, unsigned long, unsigned long) const",_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE17find_first_not_ofEPKcmm,, "std::__cxx11::basic_string, std::allocator >::find(char, unsigned long) const",_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE4findEcm,, "std::__cxx11::basic_string, std::allocator >::find(char const*, unsigned long, unsigned long) const",_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE4findEPKcmm,, "std::__cxx11::basic_string, std::allocator >::rfind(char, unsigned long) const",_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE5rfindEcm,, "std::__cxx11::basic_string, std::allocator >::rfind(char const*, unsigned long, unsigned long) const",_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE5rfindEPKcmm,, "std::__cxx11::basic_string, std::allocator >::substr(unsigned long, unsigned long) const",_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE6substrEmm,, "std::__cxx11::basic_string, std::allocator >::compare(unsigned long, unsigned long, char const*) const",_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE7compareEmmPKc,, "std::__cxx11::basic_string, std::allocator >::compare(char const*) const",_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE7compareEPKc,, "std::__cxx11::basic_stringbuf, std::allocator >::str() const",_ZNKSt7__cxx1115basic_stringbufIcSt11char_traitsIcESaIcEE3strEv,, "std::__detail::_Prime_rehash_policy::_M_need_rehash(unsigned long, unsigned long, unsigned long) const",_ZNKSt8__detail20_Prime_rehash_policy14_M_need_rehashEmmm,, std::exception::what() const,_ZNKSt9exception4whatEv,, std::istream::get(),_ZNSi3getEv,, "std::istream::read(char*, long)",_ZNSi4readEPcl,, std::istream::unget(),_ZNSi5ungetEv,, std::ostream::put(char),_ZNSo3putEc,, std::ostream::flush(),_ZNSo5flushEv,, "std::ostream::write(char const*, long)",_ZNSo5writeEPKcl,, std::ostream& std::ostream::_M_insert(bool),_ZNSo9_M_insertIbEERSoT_,, std::ostream& std::ostream::_M_insert(double),_ZNSo9_M_insertIdEERSoT_,, std::ostream& std::ostream::_M_insert(unsigned long),_ZNSo9_M_insertImEERSoT_,, std::ostream& std::ostream::_M_insert(void const*),_ZNSo9_M_insertIPKvEERSoT_,, std::ostream& std::ostream::_M_insert(long long),_ZNSo9_M_insertIxEERSoT_,, std::ostream& std::ostream::_M_insert(unsigned long long),_ZNSo9_M_insertIyEERSoT_,, std::ostream::operator<<(int),_ZNSolsEi,, std::ostream::operator<<(short),_ZNSolsEs,, std::logic_error::logic_error(std::logic_error const&),_ZNSt11logic_errorC2ERKS_,, std::__basic_file::~__basic_file(),_ZNSt12__basic_fileIcED1Ev,, "std::domain_error::domain_error(std::__cxx11::basic_string, std::allocator > const&)",_ZNSt12domain_errorC1ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE,, std::domain_error::~domain_error(),_ZNSt12domain_errorD1Ev,, std::out_of_range::out_of_range(char const*),_ZNSt12out_of_rangeC1EPKc,, std::out_of_range::~out_of_range(),_ZNSt12out_of_rangeD1Ev,, "std::basic_filebuf >::open(char const*, std::_Ios_Openmode)",_ZNSt13basic_filebufIcSt11char_traitsIcEE4openEPKcSt13_Ios_Openmode,, "std::basic_filebuf >::close()",_ZNSt13basic_filebufIcSt11char_traitsIcEE5closeEv,, "std::basic_filebuf >::basic_filebuf()",_ZNSt13basic_filebufIcSt11char_traitsIcEEC1Ev,, "std::basic_filebuf >::~basic_filebuf()",_ZNSt13basic_filebufIcSt11char_traitsIcEED1Ev,, "std::basic_fstream >::basic_fstream(char const*, std::_Ios_Openmode)",_ZNSt13basic_fstreamIcSt11char_traitsIcEEC1EPKcSt13_Ios_Openmode,, std::runtime_error::runtime_error(std::runtime_error const&),_ZNSt13runtime_errorC1ERKS_,, std::runtime_error::runtime_error(char const*),_ZNSt13runtime_errorC2EPKc,, std::runtime_error::runtime_error(std::runtime_error const&),_ZNSt13runtime_errorC2ERKS_,, std::runtime_error::~runtime_error(),_ZNSt13runtime_errorD1Ev,, std::runtime_error::~runtime_error(),_ZNSt13runtime_errorD2Ev,, "std::basic_ifstream >::basic_ifstream(char const*, std::_Ios_Openmode)",_ZNSt14basic_ifstreamIcSt11char_traitsIcEEC1EPKcSt13_Ios_Openmode,, "std::basic_ifstream >::~basic_ifstream()",_ZNSt14basic_ifstreamIcSt11char_traitsIcEED1Ev,, "std::basic_ofstream >::basic_ofstream(char const*, std::_Ios_Openmode)",_ZNSt14basic_ofstreamIcSt11char_traitsIcEEC1EPKcSt13_Ios_Openmode,, "std::basic_ofstream >::~basic_ofstream()",_ZNSt14basic_ofstreamIcSt11char_traitsIcEED1Ev,, std::invalid_argument::invalid_argument(char const*),_ZNSt16invalid_argumentC1EPKc,, "std::invalid_argument::invalid_argument(std::__cxx11::basic_string, std::allocator > const&)",_ZNSt16invalid_argumentC1ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE,, std::invalid_argument::~invalid_argument(),_ZNSt16invalid_argumentD1Ev,, std::locale::locale(),_ZNSt6localeC1Ev,, std::locale::~locale(),_ZNSt6localeD1Ev,, "std::__cxx11::basic_string, std::allocator >::_M_replace(unsigned long, unsigned long, char const*, unsigned long)",_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE10_M_replaceEmmPKcm,, "std::__cxx11::basic_string, std::allocator >::_M_construct(unsigned long, char)",_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructEmc,, "std::__cxx11::basic_string, std::allocator >::_M_replace_aux(unsigned long, unsigned long, unsigned long, char)",_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE14_M_replace_auxEmmmc,, "std::__cxx11::basic_string, std::allocator >::swap(std::__cxx11::basic_string, std::allocator >&)",_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE4swapERS4_,, "std::__cxx11::basic_string, std::allocator >::append(char const*)",_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE6appendEPKc,, "std::__cxx11::basic_string, std::allocator >::assign(char const*)",_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE6assignEPKc,, "std::__cxx11::basic_string, std::allocator >::resize(unsigned long, char)",_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE6resizeEmc,, "std::__cxx11::basic_string, std::allocator >::reserve(unsigned long)",_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE7reserveEm,, "std::__cxx11::basic_string, std::allocator >::_M_erase(unsigned long, unsigned long)",_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE8_M_eraseEmm,, "std::__cxx11::basic_string, std::allocator >::_M_append(char const*, unsigned long)",_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_appendEPKcm,, "std::__cxx11::basic_string, std::allocator >::_M_assign(std::__cxx11::basic_string, std::allocator > const&)",_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_assignERKS4_,, "std::__cxx11::basic_string, std::allocator >::_M_create(unsigned long&, unsigned long)",_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_createERmm,, "std::__cxx11::basic_string, std::allocator >::_M_mutate(unsigned long, unsigned long, char const*, unsigned long)",_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_mutateEmmPKcm,, "std::__cxx11::basic_string, std::allocator >::push_back(char)",_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9push_backEc,, "std::__cxx11::basic_string, std::allocator >::operator=(std::__cxx11::basic_string, std::allocator >&&)",_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEaSEOS4_,, "std::__cxx11::basic_string, std::allocator >::~basic_string()",_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED1Ev,, "std::__cxx11::basic_stringbuf, std::allocator >::_M_sync(char*, unsigned long, unsigned long)",_ZNSt7__cxx1115basic_stringbufIcSt11char_traitsIcESaIcEE7_M_syncEPcmm,, "std::__cxx11::basic_stringstream, std::allocator >::basic_stringstream(std::_Ios_Openmode)",_ZNSt7__cxx1118basic_stringstreamIcSt11char_traitsIcESaIcEEC1ESt13_Ios_Openmode,, "std::__cxx11::basic_stringstream, std::allocator >::~basic_stringstream()",_ZNSt7__cxx1118basic_stringstreamIcSt11char_traitsIcESaIcEED1Ev,, "std::__cxx11::basic_istringstream, std::allocator >::basic_istringstream(std::__cxx11::basic_string, std::allocator > const&, std::_Ios_Openmode)",_ZNSt7__cxx1119basic_istringstreamIcSt11char_traitsIcESaIcEEC1ERKNS_12basic_stringIcS2_S3_EESt13_Ios_Openmode,, "std::__cxx11::basic_istringstream, std::allocator >::~basic_istringstream()",_ZNSt7__cxx1119basic_istringstreamIcSt11char_traitsIcESaIcEED1Ev,, "std::__cxx11::basic_ostringstream, std::allocator >::basic_ostringstream(std::_Ios_Openmode)",_ZNSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEEC1ESt13_Ios_Openmode,, "std::__cxx11::basic_ostringstream, std::allocator >::~basic_ostringstream()",_ZNSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEED1Ev,, std::bad_cast::~bad_cast(),_ZNSt8bad_castD2Ev,, "std::__detail::_List_node_base::swap(std::__detail::_List_node_base&, std::__detail::_List_node_base&)",_ZNSt8__detail15_List_node_base4swapERS0_S1_,, std::__detail::_List_node_base::_M_hook(std::__detail::_List_node_base*),_ZNSt8__detail15_List_node_base7_M_hookEPS0_,, std::__detail::_List_node_base::_M_unhook(),_ZNSt8__detail15_List_node_base9_M_unhookEv,, std::ios_base::Init::Init(),_ZNSt8ios_base4InitC1Ev,, std::ios_base::Init::~Init(),_ZNSt8ios_base4InitD1Ev,, std::ios_base::ios_base(),_ZNSt8ios_baseC2Ev,, std::ios_base::~ios_base(),_ZNSt8ios_baseD2Ev,, "std::basic_ios >::init(std::basic_streambuf >*)",_ZNSt9basic_iosIcSt11char_traitsIcEE4initEPSt15basic_streambufIcS1_E,, "std::basic_ios >::clear(std::_Ios_Iostate)",_ZNSt9basic_iosIcSt11char_traitsIcEE5clearESt12_Ios_Iostate,, std::exception::~exception(),_ZNSt9exceptionD1Ev,, operator new(unsigned long),_Znwm,, "operator new(unsigned long, std::nothrow_t const&)",_ZnwmRKSt9nothrow_t,, "std::_Hash_bytes(void const*, unsigned long, unsigned long)",_ZSt11_Hash_bytesPKvmm,, "std::basic_ostream >& std::__ostream_insert >(std::basic_ostream >&, char const*, long)",_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l,, std::__throw_bad_cast(),_ZSt16__throw_bad_castv,, std::__throw_bad_alloc(),_ZSt17__throw_bad_allocv,, std::_Rb_tree_decrement(std::_Rb_tree_node_base const*),_ZSt18_Rb_tree_decrementPKSt18_Rb_tree_node_base,, std::_Rb_tree_decrement(std::_Rb_tree_node_base*),_ZSt18_Rb_tree_decrementPSt18_Rb_tree_node_base,, std::_Rb_tree_increment(std::_Rb_tree_node_base const*),_ZSt18_Rb_tree_incrementPKSt18_Rb_tree_node_base,, std::_Rb_tree_increment(std::_Rb_tree_node_base*),_ZSt18_Rb_tree_incrementPSt18_Rb_tree_node_base,, std::__throw_logic_error(char const*),_ZSt19__throw_logic_errorPKc,, std::__throw_length_error(char const*),_ZSt20__throw_length_errorPKc,, std::__throw_out_of_range(char const*),_ZSt20__throw_out_of_rangePKc,, "std::__throw_out_of_range_fmt(char const*, ...)",_ZSt24__throw_out_of_range_fmtPKcz,, std::__throw_bad_function_call(),_ZSt25__throw_bad_function_callv,, "std::_Rb_tree_rebalance_for_erase(std::_Rb_tree_node_base*, std::_Rb_tree_node_base&)",_ZSt28_Rb_tree_rebalance_for_erasePSt18_Rb_tree_node_baseRS_,, "std::_Rb_tree_insert_and_rebalance(bool, std::_Rb_tree_node_base*, std::_Rb_tree_node_base*, std::_Rb_tree_node_base&)",_ZSt29_Rb_tree_insert_and_rebalancebPSt18_Rb_tree_node_baseS0_RS_,, std::cin,_ZSt3cin,, std::cerr,_ZSt4cerr,, std::cout,_ZSt4cout,, "std::basic_ostream >& std::endl >(std::basic_ostream >&)",_ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_,, "std::basic_istream >& std::getline, std::allocator >(std::basic_istream >&, std::__cxx11::basic_string, std::allocator >&, char)",_ZSt7getlineIcSt11char_traitsIcESaIcEERSt13basic_istreamIT_T0_ES7_RNSt7__cxx1112basic_stringIS4_S5_T1_EES4_,, std::nothrow,_ZSt7nothrow,, std::terminate(),_ZSt9terminatev,, "std::basic_ostream >& std::operator<< >(std::basic_ostream >&, char const*)",_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc,, "std::basic_istream >& std::operator>> >(std::basic_istream >&, char&)",_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_RS3_,, typeinfo for int,_ZTIi,OBJ,OBJ typeinfo for char const*,_ZTIPKc,OBJ,OBJ "VTT for std::__cxx11::basic_stringstream, std::allocator >",_ZTTNSt7__cxx1118basic_stringstreamIcSt11char_traitsIcESaIcEEE,OBJ,OBJ "VTT for std::__cxx11::basic_istringstream, std::allocator >",_ZTTNSt7__cxx1119basic_istringstreamIcSt11char_traitsIcESaIcEEE,OBJ,OBJ "VTT for std::__cxx11::basic_ostringstream, std::allocator >",_ZTTNSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEEE,OBJ,OBJ "VTT for std::basic_ifstream >",_ZTTSt14basic_ifstreamIcSt11char_traitsIcEE,OBJ,OBJ "VTT for std::basic_ofstream >",_ZTTSt14basic_ofstreamIcSt11char_traitsIcEE,OBJ,OBJ "vtable for std::__cxx11::basic_stringbuf, std::allocator >",_ZTVNSt7__cxx1115basic_stringbufIcSt11char_traitsIcESaIcEEE,OBJ,OBJ "vtable for std::__cxx11::basic_stringstream, std::allocator >",_ZTVNSt7__cxx1118basic_stringstreamIcSt11char_traitsIcESaIcEEE,OBJ,OBJ "vtable for std::__cxx11::basic_istringstream, std::allocator >",_ZTVNSt7__cxx1119basic_istringstreamIcSt11char_traitsIcESaIcEEE,OBJ,OBJ "vtable for std::__cxx11::basic_ostringstream, std::allocator >",_ZTVNSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEEE,OBJ,OBJ "vtable for std::basic_filebuf >",_ZTVSt13basic_filebufIcSt11char_traitsIcEE,OBJ,OBJ "vtable for std::basic_ifstream >",_ZTVSt14basic_ifstreamIcSt11char_traitsIcEE,OBJ,OBJ "vtable for std::basic_ofstream >",_ZTVSt14basic_ofstreamIcSt11char_traitsIcEE,OBJ,OBJ "vtable for std::basic_streambuf >",_ZTVSt15basic_streambufIcSt11char_traitsIcEE,OBJ,OBJ "vtable for std::basic_ios >",_ZTVSt9basic_iosIcSt11char_traitsIcEE,OBJ,OBJ ================================================ FILE: src/library_functions/library_functions.c ================================================ /* * library_functions.c * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include "library_functions.h" #include "../utils.h" #include /* * Create a LFuncInfo */ Z_PRIVATE LFuncInfo *__lfunc_info_create(const char *name, LCFGInfo cfg_info, LRAInfo ra_info); /* * Destroy a LFuncInfo */ Z_PRIVATE void __lfunc_info_destroy(LFuncInfo *info); /* * Load data into database */ Z_PRIVATE void __libfunc_load(GHashTable *d); Z_PRIVATE LFuncInfo *__lfunc_info_create(const char *name, LCFGInfo cfg_info, LRAInfo ra_info) { LFuncInfo *rv = z_alloc(1, sizeof(LFuncInfo)); rv->name = z_strdup(name); rv->cfg_info = cfg_info; rv->ra_info = ra_info; return rv; } Z_PRIVATE void __lfunc_info_destroy(LFuncInfo *info) { z_free((void *)info->name); z_free(info); } // XXX: the file must be included here. #include "library_functions_load.c" GHashTable *lf_info = NULL; Z_API void z_libfunc_init() { if (lf_info) { return; } lf_info = g_hash_table_new_full(g_str_hash, g_str_equal, (GDestroyNotify)&z_free, (GDestroyNotify)&__lfunc_info_destroy); __libfunc_load(lf_info); } Z_API void z_libfunc_fini() { if (lf_info) { g_hash_table_destroy(lf_info); lf_info = NULL; } } Z_API const LFuncInfo *z_libfunc_get_info(const char *name) { if (!lf_info) { z_libfunc_init(); } LFuncInfo *rv = (LFuncInfo *)g_hash_table_lookup(lf_info, (gpointer)name); if (!rv) { rv = __lfunc_info_create(name, LCFG_UNK, LRA_UNK); g_hash_table_insert(lf_info, (gpointer)z_strdup(name), (gpointer)rv); } return rv; } const LFuncInfo default_func_info = { .name = NULL, .cfg_info = LCFG_UNK, .ra_info = LRA_UNK, }; Z_API const LFuncInfo *z_libfunc_default() { return &default_func_info; } ================================================ FILE: src/library_functions/library_functions.h ================================================ /* * library_functions.h * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #ifndef __LIBRARY_FUNCTIONS_H #define __LIBRARY_FUNCTIONS_H #include "../config.h" // whether the library function will return to caller typedef enum lcfg_info_t { LCFG_OBJ, // this is not an imported function but an object LCFG_UNK, LCFG_RET, LCFG_TERM, } LCFGInfo; // whether the retaddr pushed by `call` instructions is used typedef enum lra_info_t { LRA_OBJ, // this is not an imported function but an object LRA_UNK, LRA_USED, LRA_UNUSED, } LRAInfo; typedef struct lfunc_info_t { const char *name; LCFGInfo cfg_info; LRAInfo ra_info; } LFuncInfo; Z_API void z_libfunc_init(); Z_API void z_libfunc_fini(); Z_API const LFuncInfo *z_libfunc_get_info(const char *name); Z_API const LFuncInfo *z_libfunc_default(); #endif ================================================ FILE: src/libstochfuzz.h ================================================ /* * libstochfuzz.h * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #ifndef __LIBSTOCHFUZZ_H #define __LIBSTOCHFUZZ_H #include "core.h" #include "utils.h" #endif ================================================ FILE: src/libstochfuzzRT.c ================================================ /* * libstochfuzzRT.c * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ // XXX: some code is modified from // https://github.com/mxz297/dyninst/blob/asplos21/dyninstAPI_RT/src/RTunwind.c. #include "config.h" #include #include #include #include #include #include #include #define IP_OFFSET_IN_CURSOR 3 typedef int (*unw_step_fn_type)(unw_cursor_t*); typedef struct retaddr_entity_t { uint32_t shadow; uint32_t original; } Retaddr; typedef struct retaddr_mapping_t { size_t n; unw_step_fn_type real_unw_step; Retaddr addrs[]; } RetaddrMapping; static void __runtime_mremap(const char* filename, void* addr, size_t length, int prot) { // msync the data if (msync(addr, length, MS_SYNC)) { fprintf(stderr, "msync failed: %s\n", strerror(errno)); exit(MY_ERR_CODE); } // munmap the underlying memory if (munmap(addr, length)) { fprintf(stderr, "munmap failed: %s\n", strerror(errno)); exit(MY_ERR_CODE); } // open file int fd = open(filename, (prot & PROT_WRITE) ? O_RDWR : O_RDONLY); if (fd < 0) { fprintf(stderr, "open %s failed: %s\n", filename, strerror(errno)); exit(MY_ERR_CODE); } // mmap file if (mmap(addr, length, prot, MAP_SHARED | MAP_FIXED, fd, 0) != addr) { fprintf(stderr, "mmap failed: %s\n", strerror(errno)); exit(MY_ERR_CODE); } // close fd if (close(fd)) { fprintf(stderr, "close failed: %s\n", strerror(errno)); exit(MY_ERR_CODE); } } static unw_word_t __runtime_retaddr_translate(RetaddrMapping* mapping, unw_word_t ip) { size_t low_id = 0; size_t high_id = mapping->n - 1; if (mapping->addrs[low_id].shadow > ip || mapping->addrs[high_id].shadow < ip) { return ip; } if (mapping->addrs[low_id].shadow == ip) { return mapping->addrs[low_id].original; } if (mapping->addrs[high_id].shadow == ip) { return mapping->addrs[high_id].original; } size_t mid_id = (low_id + high_id) >> 1; while (low_id + 1 != high_id) { if (mapping->addrs[mid_id].shadow < ip) { low_id = mid_id; } else if (mapping->addrs[mid_id].shadow > ip) { high_id = mid_id; } else { return mapping->addrs[mid_id].original; } mid_id = (low_id + high_id) >> 1; } return ip; } int _ULx86_64_step(unw_cursor_t* cursor) { if (!RW_PAGE_INFO(retaddr_mapping_used)) { fprintf(stderr, "stochfuzz's -r option is disabled!\n"); exit(MY_ERR_CODE); } RetaddrMapping* mapping = (RetaddrMapping*)RW_PAGE_INFO(retaddr_mapping_base); if (!mapping->real_unw_step) { // first check size if (sizeof(addr_t) != sizeof(unw_word_t)) { fprintf(stderr, "inconsistent size of addr_t and unw_word_t"); exit(MY_ERR_CODE); } // get basic information void* retaddr_mapping_base = (void*)RW_PAGE_INFO(retaddr_mapping_base); size_t retaddr_mapping_size = RW_PAGE_INFO(retaddr_mapping_size); const char* retaddr_mapping_path = RW_PAGE_INFO(retaddr_mapping_path); // update mapping prot __runtime_mremap(retaddr_mapping_path, retaddr_mapping_base, retaddr_mapping_size, PROT_READ | PROT_WRITE); // find the real address struct link_map* l_current = _r_debug.r_map; while (l_current) { if (strstr(l_current->l_name, "libunwind.so")) { break; } l_current = l_current->l_next; } if (!l_current) { fprintf(stderr, "Cannot find libunwind handle\n"); exit(MY_ERR_CODE); } mapping->real_unw_step = (unw_step_fn_type)(l_current->l_addr + STEP_OFFSET); // remapping as non-writable __runtime_mremap(retaddr_mapping_path, retaddr_mapping_base, retaddr_mapping_size, PROT_READ); } int rv = (*(mapping->real_unw_step))(cursor); unw_word_t* typed_cursor = (unw_word_t*)cursor; unw_word_t base_ip = RW_PAGE_INFO(program_base); unw_word_t ip = typed_cursor[IP_OFFSET_IN_CURSOR] - base_ip; unw_word_t new_ip = __runtime_retaddr_translate(mapping, ip); typed_cursor[IP_OFFSET_IN_CURSOR] = new_ip + base_ip; return rv; } ================================================ FILE: src/loader.c ================================================ /* * the code inside asm(".globl _entry\n ...") * Copyright (C) 2021 National University of Singapore * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. * */ // XXX: the code inside the asm(".globl _entry\n ...") is modified based on // https://github.com/GJDuck/e9patch/blob/master/src/e9patch/e9loader.cpp /* * other parts of loader.c * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ /* * Layout of patched binary (on disk): * * padding padding * | | * V V * | ori ELF |.| loader (see below) | fork server |.| trampolines (see below) | * \_____________________ _______________________/ \___________ __________/ * \/ \/ * SHADOW_PREFIX. */ /* * Layout of loader: * * | loader | jmp 2 ori entrypoint | loader base | TP base | TP size | names | * | * | * +--------------------------------------------------+ * | * V * | trampoline 1 | shadow code | trampoline 2| ... | trampoline n | * ^ * | * | * +-- | mmap addr | mmap size | TP addr | TP size | next TP off | data | * * * For trampolines meta data: * * +----------------+-----------+-----------+----------+----------+ * | Type | mmap addr | mmap size | TP addr | TP size | * +----------------+-----------+-----------+----------+----------+ * | uTP (w/ mmap) | Non-NULL | Non-NULL | Non-NULL | Non-NULL | * +----------------+-----------+-----------+----------+----------+ * | uTP (w/o mmap) | NULL | NULL | Non-NULL | Non-NULL | * +----------------+-----------+-----------+----------+----------+ * | TP | NULL | NULL | NULL | Non-NULL | * +----------------+-----------+-----------+----------+----------+ * | Terminal | NULL | NULL | NULL | NULL | * +----------------+-----------+-----------+----------+----------+ * */ #include "loader.h" #include #include #include #include #include #include "asm_utils.c" #define SA_RESTORER 0x04000000 #ifdef DEBUG extern const char loader_logo_str[]; extern const char suspect_signal_info_str[]; extern const char loader_err_str[]; extern const char prctl_err_str[]; extern const char handler_err_str[]; #endif extern void restorer(); asm( /* * Entry into stage #1 (loader). We: * (0) save all registers * (1) call loader_output_running_path() if necessary * (2) setup stage parameters for loader_load() * (3) call loader_load() to mmap and copy data to target virtual addr * (4) restore all registers * (5) jump to original entrypoint */ ".globl _entry\n" ".type _entry,@function\n" "_entry:\n" // (0) save registers (meanwhile storing variable *envp*) "\tpushq %r15;\n" "\tpushq %r14;\n" "\tpushq %r13;\n" "\tpushq %r12;\n" "\tpushq %r11;\n" "\tpushq %r10;\n" "\tpushq %r9;\n" "\tpushq %r8;\n" "\tpushq %rcx;\n" "\tpushq %rdx;\n" "\tpushq %rsi;\n" "\tpushq %rdi;\n" // (1) call loader_output_running_path() "\tmovq 0x68(%rsp), %rdi;\n" // XXX: note that the magic number 0x68 is // associated with how many registers we // pushed on the stack "\tcallq loader_output_running_path;\n" // Show current path // (2) setup stage parameters for loader_load() "\tlea __etext(%rip), %rdi;\n" "\taddq $4, %rdi;\n" "\tshrq $3, %rdi;\n" "\tincq %rdi;\n" "\tshlq $3, %rdi;\n" // cur_addr in __binary_setup_loader step (4) binary.c "\tmovq (%rdi), %rbx;\n" "\tleaq _entry(%rip), %rdx;\n" "\tsubq %rbx, %rdx;\n" // program base into %rdx (size_t rip_base) "\tleaq 24(%rdi), %rcx;\n" // names in %rcx (const char *name) "\tmovq 16(%rdi), %rsi;\n" "\taddq %rdx, %rsi;\n" // .text base into %rsi (void *shared_text_base) "\tmovq 8(%rdi), %rdi;\n" "\taddq %rdx, %rdi;\n" // TP chunk base into %rdi (Trampoline *tp) "\tmovq %rax, %r8;\n" // pathname into %r8 (const char *pathname) // (3) mmap and copy data to target virtual addr "\tcld;\n" // set DF register "\tcallq loader_load;\n" // call loader_load() // (4) restore all registers "\tpopq %rdi;\n" "\tpopq %rsi;\n" "\tpopq %rdx;\n" "\tpopq %rcx;\n" "\tpopq %r8;\n" "\tpopq %r9;\n" "\tpopq %r10;\n" "\tpopq %r11;\n" "\tpopq %r12;\n" "\tpopq %r13;\n" "\tpopq %r14;\n" "\tpopq %r15;\n" // (5) jump to original entrypoint // The springboard to original entrypoint will be placed at the end of the // (.text) section. "\tjmp __etext\n" /* * restore function for rt_sigaction */ ".global restorer\n" ".type restorer,@function\n" "restorer:\n" "\tmov $15,%rax;\n" "\tsyscall;\n" "\tret;\n" /* * The following defines the read-only data used by the loader. * Note that we define the data as executable code to keep everything * in the (.text) section. */ #ifdef DEBUG ASM_STRING(loader_logo_str, "\\033[32mpatched by " OURTOOL ", current running path: \\033[0m") // suspect signal info string ASM_STRING(suspect_signal_info_str, "suspect signal occurs, with ") // prctl error ASM_STRING(prctl_err_str, "prctl error") // handler error ASM_STRING(handler_err_str, "signal handler error") // loader error ASM_STRING(loader_err_str, "loader: loading error") #endif ); static void loader_memcpy(void *dst_0, void *src_0, size_t n_0) { register uintptr_t dst asm("rdi") = (uintptr_t)dst_0; register uintptr_t src asm("rsi") = (uintptr_t)src_0; register uintptr_t n asm("rcx") = (uintptr_t)(n_0); asm volatile( "movq %%rcx, %%rdx\n\t" "andq $7, %%rdx\n\t" "shrq $3, %%rcx\n\t" "rep movsq\n\t" "movq %%rdx, %%rcx\n\t" "rep movsb\n\t" : : "r"(dst), "r"(src), "r"(n) : "rdx"); } /* * mmap a fake AFL_SHARED_MEMORY to avoid instrumentation before main */ static inline void loader_mmap_fake_shared_memory() { unsigned long shared_mem_addr = AFL_MAP_ADDR; size_t shared_mem_size = AFL_MAP_SIZE; if (sys_mmap(shared_mem_addr, shared_mem_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0) != shared_mem_addr) { utils_error(loader_err_str, true); } shared_mem_addr = CRS_MAP_ADDR; shared_mem_size = CRS_MAP_SIZE; if (sys_mmap(shared_mem_addr, shared_mem_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0) != shared_mem_addr) { utils_error(loader_err_str, true); } } /* * mmap a R/W data page at fixed address RW_PAGE_ADDR, and store rip base into * the first qword. */ static inline void loader_mmap_data_page(size_t rip_base) { if (sys_mmap(RW_PAGE_ADDR, RW_PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0) != RW_PAGE_ADDR) { utils_error(loader_err_str, true); } RW_PAGE_INFO(program_base) = (addr_t)rip_base; } /* * signal handler */ static void loader_catch_suspect_signals(int signal, siginfo_t *siginfo, void *context) { uint64_t rip = ((ucontext_t *)context)->uc_mcontext.gregs[REG_RIP]; #ifdef DEBUG uint64_t client_pid = RW_PAGE_INFO(client_pid); char s[0x40] = ""; s[0] = 'r'; s[1] = 'i'; s[2] = 'p'; s[3] = ':'; s[4] = ' '; utils_num2hexstr(s + 5, rip); s[21] = '('; utils_num2hexstr(s + 22, client_pid); s[38] = ','; s[39] = ' '; utils_num2hexstr(s + 40, sys_getpid()); s[56] = ')'; s[57] = '\n'; s[58] = '\x00'; utils_puts(suspect_signal_info_str, false); utils_puts(s, false); #endif rip -= RW_PAGE_INFO(program_base); // XXX: For an *UNKNOWN* reason, pipe CRS_DATA_FD sometimes is broken, // resulting in an incorrect patching schedule. Hence, we adopt shared // memory to sent crashed PC. Note that CRS_DATA_FD is still valid in dry // run, for compatibility. In the future, we will abandon this pipe. if (RW_PAGE_INFO(daemon_attached)) { // we need a lock to avoid race condition if (!__sync_lock_test_and_set((uint32_t *)CRS_INFO_ADDR(lock), 1)) { CRS_INFO(crash_ip) = (addr_t)rip; CRS_INFO(self_fired) = 1UL; } else { // we pause this process here. Note that we are fine with pause() // here. sys_pause(); } } else { // we only need to send rip, since a successful communication indicates // a signal fired if (sys_write(CRS_DATA_FD, (char *)(&rip), 8) != 8) { utils_error(handler_err_str, true); } } // it would be better to kill all the process in the group sys_kill(0, SIGKILL); } /* * Register signal handlers for suspect signals to send crash site information. */ static inline void loader_set_signal_handler(addr_t rip_base) { /* * Before we set signal handler, we will first mmap a new stack for the * handler. As such, even if the stack gets polluted, we can send the crash * address to the daemon. More details can be found in: * https://man7.org/linux/man-pages/man2/sigaltstack.2.html * https://stackoverflow.com/questions/39297207/catching-sigsegv-when-triggered-by-corrupt-stack */ addr_t ss_addr = rip_base + SIGNAL_STACK_ADDR; if (sys_mmap(ss_addr, SIGNAL_STACK_SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0) != ss_addr) { utils_error(loader_err_str, true); } stack_t ss = { .ss_sp = (void *)ss_addr, .ss_flags = 0, .ss_size = SIGNAL_STACK_SIZE, }; sys_sigaltstack(&ss, NULL); struct kernel_sigaction sa = {}; sa.k_sa_handler = &loader_catch_suspect_signals; sa.sa_flags = SA_SIGINFO | SA_RESTORER | SA_ONSTACK; sa.sa_restorer = &restorer; if (sys_rt_sigaction(SIGSEGV, &sa, NULL, _NSIG / 8)) { utils_error(loader_err_str, true); } if (sys_rt_sigaction(SIGILL, &sa, NULL, _NSIG / 8)) { utils_error(loader_err_str, true); } // XXX: overlapping bridges may cause SIGTRAP if (sys_rt_sigaction(SIGTRAP, &sa, NULL, _NSIG / 8)) { utils_error(loader_err_str, true); } } /* * Install seccomp filter to avoid modify suspect signal handler */ static inline void loader_set_seccomp() { if (sys_prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { utils_error(prctl_err_str, true); } // XXX: note that we cannot block sigprocmask (which may delay the following // signals). For more information, please refer to // https://lwn.net/Articles/822256/ ("accepting or rejecting the system call // cannot depend on, for example, values in structures that are passed to // system calls via pointers") /* * Use compiled seccomp rule (bytecode) to avoid compilation difference * * int error = 1; * struct sock_filter filter[] = { * BPF_STMT(BPF_LD | BPF_W | BPF_ABS, * (offsetof(struct seccomp_data, nr))), * BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_rt_sigaction, 0, 4), * BPF_STMT(BPF_LD | BPF_W | BPF_ABS, * (offsetof(struct seccomp_data, args[0]))), * BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SIGTRAP, 3, 0), * BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SIGSEGV, 2, 0), * BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SIGILL, 1, 0), * BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), * BPF_STMT(BPF_RET | BPF_K, * SECCOMP_RET_ERRNO | (error & SECCOMP_RET_DATA))}; * */ register struct sock_filter *filter asm("rax"); asm volatile( " leaq _filter(%%rip), %%rax\n\t" " jmp _out\n\t" "_filter:\n\t" ".ascii \"" "\\040\\000\\000\\000\\000\\000\\000\\000" // 0. BPF_STMT "\\025\\000\\000\\004\\015\\000\\000\\000" // 1. BPF_JUMP "\\040\\000\\000\\000\\020\\000\\000\\000" // 2. BPF_STMT "\\025\\000\\003\\000\\005\\000\\000\\000" // 3. BPF_JUMP "\\025\\000\\002\\000\\013\\000\\000\\000" // 4. BPF_JUMP "\\025\\000\\001\\000\\004\\000\\000\\000" // 5. BPF_JUMP "\\006\\000\\000\\000\\000\\000\\377\\177" // 6. BPF_STMT "\\006\\000\\000\\000\\001\\000\\005\\000" // 7. BPF_STMT "\"\n\t" "_out:" : "=rax"(filter) : :); struct sock_fprog prog = { .len = 8, // (unsigned short)(sizeof(filter) / sizeof(filter[0])), .filter = filter, }; if (sys_prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, (unsigned long)(&prog), 0, 0)) { utils_error(prctl_err_str, true); } } /* * Load ujmp/ucall trampolines, and set a W/R page tp store global data */ NO_INLINE void loader_load(Trampoline *tp, void *shared_text_base, size_t rip_base, const char *name, const char *pathname) { void *mmap_addr, *tp_addr; unsigned long mmap_size, tp_size, next_tp_offset; // in case we send SIGKILL to all the parent signal sys_setpgid(0, 0); // ignore errors loader_set_signal_handler((addr_t)rip_base); loader_set_seccomp(); loader_mmap_data_page(rip_base); loader_mmap_fake_shared_memory(); // get related path // (XXX: check overflow? but the longest path on linux is only 0x100 bytes) char fullpath[0x200]; const char *slash_ = NULL; for (int i = 0; i < 0x200; i++) { char c = pathname[i]; fullpath[i] = c; if (c == '/') { slash_ = fullpath + i; } if (!c) { break; } } char *cur_ = NULL; if (slash_) { // get last slash symbol cur_ = (char *)slash_ + 1; } else { cur_ = fullpath; } #define __PARSE_FILENAME(dir, s) \ do { \ int i = 0; \ do { \ (dir)[i++] = *((s)++); \ } while (*s); \ (s)++; \ (dir)[i] = '\x00'; \ } while (0) // shadow file __PARSE_FILENAME(cur_, name); utils_strcpy(RW_PAGE_INFO(shadow_path), fullpath); utils_puts(RW_PAGE_INFO(shadow_path), true); RW_PAGE_INFO(shadow_size) = utils_mmap_external_file( fullpath, false, (unsigned long)tp, PROT_READ | PROT_EXEC); RW_PAGE_INFO(shadow_base) = (addr_t)tp; // lookup table file __PARSE_FILENAME(cur_, name); utils_strcpy(RW_PAGE_INFO(lookup_tab_path), fullpath); utils_puts(RW_PAGE_INFO(lookup_tab_path), true); addr_t lookup_table_addr = rip_base + LOOKUP_TABLE_ADDR; RW_PAGE_INFO(lookup_tab_base) = lookup_table_addr; RW_PAGE_INFO(lookup_tab_size) = utils_mmap_external_file(fullpath, false, lookup_table_addr, PROT_READ); // pipe file __PARSE_FILENAME(cur_, name); utils_strcpy(RW_PAGE_INFO(pipe_path), fullpath); utils_puts(RW_PAGE_INFO(pipe_path), true); // shared .text file __PARSE_FILENAME(cur_, name); utils_strcpy(RW_PAGE_INFO(shared_text_path), fullpath); utils_puts(RW_PAGE_INFO(shared_text_path), true); RW_PAGE_INFO(shared_text_size) = utils_mmap_external_file( fullpath, true, (unsigned long)shared_text_base, PROT_READ | PROT_EXEC); RW_PAGE_INFO(shared_text_base) = (addr_t)shared_text_base; // retaddr mapping file __PARSE_FILENAME(cur_, name); utils_strcpy(RW_PAGE_INFO(retaddr_mapping_path), fullpath); utils_puts(RW_PAGE_INFO(retaddr_mapping_path), true); addr_t retaddr_mapping_addr = rip_base + RETADDR_MAPPING_ADDR; RW_PAGE_INFO(retaddr_mapping_base) = retaddr_mapping_addr; RW_PAGE_INFO(retaddr_mapping_size) = utils_mmap_external_file( fullpath, false, retaddr_mapping_addr, PROT_READ | PROT_WRITE); if (*((int64_t *)retaddr_mapping_addr) == -1) { // retaddr mapping is useless uint64_t ori_size = RW_PAGE_INFO(retaddr_mapping_size); if (sys_munmap(retaddr_mapping_addr, ori_size)) { utils_error(loader_err_str, true); } RW_PAGE_INFO(retaddr_mapping_used) = false; RW_PAGE_INFO(retaddr_mapping_size) = 0; } else { RW_PAGE_INFO(retaddr_mapping_used) = true; // set the function pointer as NULL *((void **)retaddr_mapping_addr + 1) = NULL; // remap the page as read only utils_mmap_external_file(fullpath, true, retaddr_mapping_addr, PROT_READ); } #undef __PARSE_FILENAME // set the client pid as the pid of fork server (loader) itself // it will be updated every time we fork a new process RW_PAGE_INFO(client_pid) = sys_getpid(); // XXX: currently TP mapping is not used but reserved for advanced patching. // However, note that we still to maintain it as it can be quite useful in // the futuer while (true) { // get every TP's meta-data mmap_addr = tp->mmap_addr; mmap_size = tp->mmap_size; tp_addr = tp->tp_addr; tp_size = tp->tp_size; next_tp_offset = tp->next_tp_offset; // check whether the tp needs to mmap if (mmap_addr != NULL && mmap_size != 0) { if (sys_mmap((unsigned long)mmap_addr + rip_base, mmap_size, // XXX: PROT_READ | PROT_WRITE | PROT_EXEC ? PROT_READ | PROT_EXEC, MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0) != (unsigned long)mmap_addr + rip_base) { utils_error(loader_err_str, true); } } // check whether the tp needs to memcpy if (tp_addr != NULL && tp_size != 0) { loader_memcpy(tp_addr + rip_base, tp->tp, tp_size); } // check terminal if (next_tp_offset == 0) { break; } tp = (void *)tp + next_tp_offset; } } NO_INLINE const char *loader_output_running_path(const char *pathname) { utils_puts(loader_logo_str, false); utils_puts(pathname, true); return pathname; } ================================================ FILE: src/loader.h ================================================ /* * loader.h * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #ifndef __LOADER_H #define __LOADER_H #include "crs_config.h" typedef struct trampoline_t { void *mmap_addr; unsigned long mmap_size; void *tp_addr; unsigned long tp_size; unsigned long next_tp_offset; unsigned char tp[]; } Trampoline; #endif ================================================ FILE: src/mem_file.c ================================================ /* * mem_file.c * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include "mem_file.h" #include "utils.h" #include #include #include #include #define INIT_SIZE PAGE_SIZE // XXX: will a small INC_SIZE_POW2 helps reduce fork overhead? #define INC_SIZE_POW2 (PAGE_SIZE_POW2 + 6) #define INC_SIZE (1 << INC_SIZE_POW2) #define _MEM_FILE_DEFINE_GETTER(OTYPE, ONAME, FTYPE, FNAME) \ Z_API FTYPE z_##ONAME##_##get_##FNAME(OTYPE *ONAME) { \ assert(ONAME != NULL); \ __mem_file_check_state(ONAME); \ return ONAME->FNAME; \ } // Stretch the file size to size. Z_PRIVATE int __mem_file_stretch_to_size(_MEM_FILE *stream, size_t size); // Open stream. Z_PRIVATE void __mem_file_open_stream(_MEM_FILE *stream, bool is_resumed); // Check the state of _MEM_FILE, to identify whether it is suitable to operate // on it. Z_PRIVATE void __mem_file_check_state(_MEM_FILE *stream); /* * Setter and Getter */ _MEM_FILE_DEFINE_GETTER(_MEM_FILE, mem_file, const char *, filename); _MEM_FILE_DEFINE_GETTER(_MEM_FILE, mem_file, uint8_t *, raw_buf); _MEM_FILE_DEFINE_GETTER(_MEM_FILE, mem_file, uint8_t *, cur_ptr); _MEM_FILE_DEFINE_GETTER(_MEM_FILE, mem_file, size_t, size); Z_PRIVATE void __mem_file_check_state(_MEM_FILE *stream) { if (!stream) { EXITME("try to operate on an empty _MEM_FILE"); } if (stream->fd == INVALID_FD) { EXITME("try to operate on a disconnected _MEM_FILE"); } } // XXX: all possible cases when invoking __mem_file_stretch_to_size // case 1: stream->size == 0, stream->raw_buf == NULL (open a new file) // case 2: stream->size > 0, stream->raw_buf == NULL (resume a file) // case 3: stream->size > 0, stream->raw_buf != NULL (update a file) Z_PRIVATE int __mem_file_stretch_to_size(_MEM_FILE *stream, size_t size) { // step (0). valid size if (stream->size_fixed) { if (size != stream->size) { EXITME("try to resize a size-fixed file"); } } else { if (size < stream->size) { EXITME("the given _MEM_FILE is too large"); } } if (!size) { EXITME("cannot stretch to 0"); } if (!stream->size && (stream->raw_buf || stream->cur_ptr)) { EXITME("impossible case when stream->size == 0"); } // step (1). update the size of underlying file // XXX: avoid write on existing data if (size > stream->size) { if (lseek(stream->fd, size - 1, SEEK_SET) == -1) { return -1; } if (write(stream->fd, "", 1) == -1) { return -1; } } // step (2). update memory mapping size_t old_size = stream->size; size_t new_size = size; if (stream->raw_buf) { // the raw_ptr exists if (new_size != old_size) { assert(stream->cur_ptr >= stream->raw_buf); size_t cur_offset = stream->cur_ptr - stream->raw_buf; if ((stream->raw_buf = mremap(stream->raw_buf, old_size, new_size, MREMAP_MAYMOVE)) == MAP_FAILED) { EXITME("failed to mremap"); } stream->cur_ptr = stream->raw_buf + cur_offset; } } else { // the raw_ptr does not exist if ((stream->raw_buf = mmap(NULL, new_size, PROT_READ | PROT_WRITE, MAP_SHARED, stream->fd, 0)) == MAP_FAILED) { EXITME("failed to mmap"); } stream->cur_ptr = stream->raw_buf; } stream->size = new_size; return 0; } Z_PRIVATE void __mem_file_open_stream(_MEM_FILE *stream, bool is_resumed) { assert(stream != NULL && stream->filename != NULL); int flag = (is_resumed ? O_RDWR : O_RDWR | O_CREAT | O_TRUNC); size_t file_size = (is_resumed ? stream->size : INIT_SIZE); if ((stream->fd = open(stream->filename, flag, (mode_t)0755)) == -1) { goto ERROR; } // XXX: here we can have two cases: // case 1: a new file, where stream->size = 0, stream->raw_buf = NULL // case 2: an old file, where stream->size > 0, stream->raw_buf = NULL if (__mem_file_stretch_to_size(stream, file_size) == -1) { goto ERROR; } return; ERROR: z_error("_MEM_FILE open stream: %d(%s)", errno, strerror(errno)); z_free((void *)stream->filename); z_free(stream); z_exit(errno); return; } Z_API _MEM_FILE *z_mem_file_fopen(const char *pathname, const char *mode) { if (z_strcmp(mode, "w+")) { EXITME("for _MEM_FILE, we only support \"w+\" mode"); } _MEM_FILE *stream = STRUCT_ALLOC(_MEM_FILE); stream->filename = z_strdup(pathname); stream->raw_buf = stream->cur_ptr = NULL; stream->size_fixed = false; __mem_file_open_stream(stream, false); return stream; } Z_API void z_mem_file_fsync(_MEM_FILE *stream) { __mem_file_check_state(stream); assert(stream != NULL); z_trace("fsync _MEM_FILE"); if (msync(stream->raw_buf, stream->size, MS_SYNC) == -1) { z_error("_MEM_FILE fsync: %d(%s)", errno, strerror(errno)); munmap(stream->raw_buf, stream->size); close(stream->fd); z_free(stream); z_exit(errno); } } Z_API void z_mem_file_fclose(_MEM_FILE *stream) { __mem_file_check_state(stream); assert(stream != NULL); z_mem_file_fsync(stream); if (munmap(stream->raw_buf, stream->size) == -1) { z_error("_MEM_FILE fclose: %d(%s)", errno, strerror(errno)); close(stream->fd); z_free(stream); z_exit(errno); } if (close(stream->fd) == -1) { z_error("_MEM_FILE fclose: %d(%s)", errno, strerror(errno)); z_free(stream); z_exit(errno); } z_free((void *)stream->filename); z_free(stream); } Z_API size_t z_mem_file_pwrite(_MEM_FILE *stream, const void *buf, size_t count, size_t offset) { __mem_file_check_state(stream); assert(stream != NULL); if (stream->size < count + offset) { // stretch file size size_t new_size = BITS_ALIGN_CELL(count + offset, INC_SIZE_POW2); assert(new_size >= count + offset); if (__mem_file_stretch_to_size(stream, new_size) == -1) { goto ERROR; } } memcpy(stream->raw_buf + offset, buf, count); return count; ERROR: z_error("_MEM_FILE pwrite: %d(%s)", errno, strerror(errno)); close(stream->fd); z_free(stream); z_exit(errno); return SIZE_MAX; } Z_API size_t z_mem_file_pread(_MEM_FILE *stream, void *buf, size_t count, size_t offset) { __mem_file_check_state(stream); assert(stream != NULL); if (stream->size < count + offset) { EXITME("read too much from _MEM_FILE"); } memcpy(buf, stream->raw_buf + offset, count); return count; } Z_API size_t z_mem_file_fwrite(void *ptr, size_t size, size_t nmemb, _MEM_FILE *stream) { __mem_file_check_state(stream); size_t n = z_mem_file_pwrite(stream, ptr, nmemb * size, stream->cur_ptr - stream->raw_buf); stream->cur_ptr += n; return n; } Z_API void z_mem_file_fix_size(_MEM_FILE *stream, size_t size) { __mem_file_check_state(stream); if (size < stream->size) { EXITME("the size of the given _MEM_FILE is too large"); } if (size % PAGE_SIZE) { EXITME("the given size is not page-aligned"); } if (__mem_file_stretch_to_size(stream, size) == -1) { EXITME("failed to set size for the underlying file"); } stream->size_fixed = true; } Z_API size_t z_mem_file_fread(void *ptr, size_t size, size_t nmemb, _MEM_FILE *stream) { __mem_file_check_state(stream); size_t n = z_mem_file_pread(stream, ptr, nmemb * size, stream->cur_ptr - stream->raw_buf); stream->cur_ptr += n; return n; } Z_API void z_mem_file_fseek(_MEM_FILE *stream, long offset, int whence) { __mem_file_check_state(stream); assert(stream != NULL); if (whence != SEEK_SET) { EXITME("for _MEM_FILE seek, we only support SEEK_SET"); } if (offset >= stream->size) { EXITME("offset is out of boundary"); } stream->cur_ptr = stream->raw_buf + offset; } Z_API long z_mem_file_ftell(_MEM_FILE *stream) { __mem_file_check_state(stream); assert(stream != NULL); return (long)(stream->cur_ptr - stream->raw_buf); } Z_API void z_mem_file_suspend(_MEM_FILE *stream) { __mem_file_check_state(stream); z_trace("suspend file %s", stream->filename); if (stream->fd == INVALID_FD && stream->raw_buf == NULL && stream->cur_ptr == NULL) { // XXX: a good place to debug by changing return to EXITME z_warn("try to suspend a disconnected file, ignore"); return; } z_mem_file_fsync(stream); if (close(stream->fd) == -1) { z_error("_MEM_FILE suspend: %d(%s)", errno, strerror(errno)); z_free(stream); z_exit(errno); } stream->fd = INVALID_FD; if (munmap(stream->raw_buf, stream->size) == -1) { z_error("_MEM_FILE suspend: %d(%s)", errno, strerror(errno)); close(stream->fd); z_free(stream); z_exit(errno); } stream->raw_buf = stream->cur_ptr = NULL; } Z_API void z_mem_file_resume(_MEM_FILE *stream) { z_trace("resume file %s", stream->filename); if (stream->fd != INVALID_FD && stream->raw_buf != NULL && stream->cur_ptr != NULL) { // XXX: a good place to debug by changing return to EXITME z_warn("try to resume a connected file, ignore"); return; } __mem_file_open_stream(stream, true); } Z_API void z_mem_file_save_as(_MEM_FILE *stream, const char *pathname) { __mem_file_check_state(stream); assert(stream != NULL); // check whether pathname exists. if so, remove it. // Note that we have to remove pathname first. Otherwise, if pathname is // linked with any important file (e.g., patched file), directly // fopen(pathname, "wb") will rewrite the important file. if (!z_access(pathname, F_OK)) { if (remove(pathname)) { EXITME("failed on remove: %s (error: %s)", pathname, strerror(errno)); } } FILE *f = z_fopen(pathname, "wb"); if (!f) { EXITME("fail to open %s", pathname); } size_t size = z_fwrite(stream->raw_buf, sizeof(uint8_t), stream->size, f); if (size != stream->size) { EXITME( "fail when writing content to \"%s\", expect %ld bytes, but only " "%ld bytes (error: %s)", pathname, stream->size, size, strerror(errno)); } z_fclose(f); if (z_chmod(pathname, 0755)) { EXITME("fail when chmod snapshot"); } } ================================================ FILE: src/mem_file.h ================================================ /* * mem_file.h * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #ifndef __MEM_FILE_H #define __MEM_FILE_H #include "config.h" /* * Use mmap to speed up FILE operations (similar with _IO_FILE) */ STRUCT(_MEM_FILE, { int fd; const char *filename; uint8_t *raw_buf; uint8_t *cur_ptr; size_t size; // page-aligned bool size_fixed; }); /* * Setter and Getter */ DECLARE_GETTER(_MEM_FILE, mem_file, const char *, filename); DECLARE_GETTER(_MEM_FILE, mem_file, uint8_t *, raw_buf); DECLARE_GETTER(_MEM_FILE, mem_file, uint8_t *, cur_ptr); DECLARE_GETTER(_MEM_FILE, mem_file, size_t, size); /* * Open a _MEM_FILE with pathname. * Currently, we only support "w+" mode. */ Z_API _MEM_FILE *z_mem_file_fopen(const char *pathname, const char *mode); /* * Synchronize a _MEM_FILE with its memory mapping. */ Z_API void z_mem_file_fsync(_MEM_FILE *stream); /* * Close a _MEM_FILE. */ Z_API void z_mem_file_fclose(_MEM_FILE *stream); /* * Fix the size of a _MEM_FILE. This function requires the size of _MEM_FILE * cannot be larger than size. */ Z_API void z_mem_file_fix_size(_MEM_FILE *stream, size_t size); /* * Write to a _MEM_FILE. * Note that only pwrite can extend file. */ Z_API size_t z_mem_file_pwrite(_MEM_FILE *stream, const void *buf, size_t count, size_t offset); /* * Read from a _MEM_FILE. */ Z_API size_t z_mem_file_pread(_MEM_FILE *stream, void *buf, size_t count, size_t offset); /* * fread for _MEM_FILE. */ Z_API size_t z_mem_file_fread(void *ptr, size_t size, size_t nmemb, _MEM_FILE *stream); /* * fwrite for _MEM_FILE. */ Z_API size_t z_mem_file_fwrite(void *ptr, size_t size, size_t nmemb, _MEM_FILE *stream); /* * fseek for _MEM_FILE. * Currently, we only support SEEK_SET. */ Z_API void z_mem_file_fseek(_MEM_FILE *stream, long offset, int whence); /* * ftell for _MEM_FILE. */ Z_API long z_mem_file_ftell(_MEM_FILE *stream); /* * suspend a _MEM_FILE, to allow other processes access the underlaying file. */ Z_API void z_mem_file_suspend(_MEM_FILE *stream); /* * resume a _MEM_FILE. */ Z_API void z_mem_file_resume(_MEM_FILE *stream); /* * save _MEM_FILE as pathname */ Z_API void z_mem_file_save_as(_MEM_FILE *stream, const char *pathname); #endif ================================================ FILE: src/patcher.c ================================================ /* * patcher.c * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include "patcher.h" #include "capstone_.h" #include "interval_splay.h" #include "iterator.h" #include "utils.h" #include "x64_utils.c" #include #define PATCH_THRESHOLD 0.99999 #define PATCH_THRESHOLD_FOR_RETADDR (PATCH_THRESHOLD / 2) #define PATCH_RET_DEPTH 20 #define BRIDGE_PRE_DEPTH 5 typedef struct bridge_point_t { addr_t bridge_addr; addr_t jump_addr; addr_t source_addr; addr_t max_addr; // used for revoke bridge patching } BridgePoint; /* * When the underlying disassembler does not fully support prob-disasm, we * directly patch all possible instructions without calculating pathcing * candidates. */ Z_PRIVATE void __patcher_patch_all_S(Patcher *p); /* * When the underlying fully supports prob-disasm, we need to carefully decide * which the patch candidates are. */ Z_PRIVATE void __patcher_patch_all_F(Patcher *p); /* * Flip uncertain patches (used in delta debugging mode) */ Z_PRIVATE void __patcher_flip_uncertain_patch(Patcher *p, addr_t addr, bool is_enable); /* * Find new certain addresses via BFS */ Z_PRIVATE void __patcher_bfs_certain_addresses(Patcher *p, addr_t addr); /* * Patch a new certain address, return whether this patch is successfully * applied. */ Z_PRIVATE bool __patcher_patch_certain_address(Patcher *p, addr_t addr, uint8_t inst_size); /* * Patch a new uncertain address, return whether this patch is successfully * applied. */ Z_PRIVATE bool __patcher_patch_uncertain_address(Patcher *p, addr_t addr); /* * Compare two address */ Z_PRIVATE int32_t __patcher_compare_address(addr_t a, addr_t b, void *_data); Z_PRIVATE int32_t __patcher_compare_address(addr_t a, addr_t b, void *_data) { assert(!_data); if (a < b) { return -1; } else if (a > b) { return 1; } else { return 0; } } Z_PRIVATE void __patcher_flip_uncertain_patch(Patcher *p, addr_t addr, bool is_enable) { if (is_enable) { z_patcher_unsafe_patch(p, addr, 1, z_x64_gen_invalid(1), NULL); } else { size_t off = addr - p->text_addr; if (off >= p->text_size) { EXITME("invalid address: %#lx", addr); } z_patcher_unsafe_patch(p, addr, 1, p->text_backup + off, NULL); } } Z_PRIVATE bool __patcher_patch_uncertain_address(Patcher *p, addr_t addr) { // step (1). check whether this address is certain if (z_addr_dict_exist(p->certain_addresses, addr)) { return false; } // step (2). check whether it is already patched as uncertain patch if (g_sequence_lookup(p->uncertain_patches, GSIZE_TO_POINTER(addr), (GCompareDataFunc)__patcher_compare_address, NULL)) { return false; } // step (3). patch underlying binary z_patcher_unsafe_patch(p, addr, 1, z_x64_gen_invalid(1), NULL); // step (4). update uncertain_patches g_sequence_insert_sorted(p->uncertain_patches, GSIZE_TO_POINTER(addr), (GCompareDataFunc)__patcher_compare_address, NULL); return true; } Z_PRIVATE bool __patcher_patch_certain_address(Patcher *p, addr_t addr, uint8_t inst_size) { // XXX: one address cannot be set as certain twice (except for the ones // which are revoked for adjusting bridges) if (z_addr_dict_exist(p->certain_addresses, addr)) { return false; } z_trace("certain patch: %#lx", addr); // step (1). set certain_addresses z_addr_dict_set(p->certain_addresses, addr, inst_size); // step (2). patch underlying binary z_patcher_unsafe_patch(p, addr, 1, z_x64_gen_invalid(1), NULL); // step (3). update certain_patches and uncertain_patches z_addr_dict_set(p->certain_patches, addr, true); GSequenceIter *iter = g_sequence_lookup(p->uncertain_patches, GSIZE_TO_POINTER(addr), (GCompareDataFunc)__patcher_compare_address, NULL); if (iter) { g_sequence_remove(iter); } return true; } Z_PRIVATE void __patcher_bfs_certain_addresses(Patcher *p, addr_t addr) { // step (0). a quick check of whether addr is already known if (z_addr_dict_exist(p->certain_addresses, addr)) { return; } Disassembler *d = p->disassembler; addr_t text_addr = p->text_addr; size_t text_size = p->text_size; // step (1). BFS to find all certain addresses GQueue *queue = g_queue_new(); g_queue_push_tail(queue, GSIZE_TO_POINTER(addr)); while (!g_queue_is_empty(queue)) { // step (3.1). pop from queue and get basic information addr_t cur_addr = (addr_t)g_queue_pop_head(queue); // step (3.2). update certain_addresses (true means it is an instruction // boundary, otherwise false) if (z_addr_dict_exist(p->certain_addresses, cur_addr)) { // XXX: there are two cases of duplicate updating: // a: we push the same instruction into the queue twice // b: there is an overlapping instruction caused by *LOCK* prefix // The other two assertions have the same situation. assert(z_addr_dict_get(p->certain_addresses, cur_addr) || (z_addr_dict_get(p->certain_addresses, cur_addr - 1) && z_disassembler_get_superset_disasm(d, cur_addr - 1) ->detail->x86.prefix[0] == X86_PREFIX_LOCK)); continue; } cs_insn *cur_inst = z_disassembler_get_superset_disasm(d, cur_addr); assert(cur_inst); z_trace("find a certain address " CS_SHOW_INST(cur_inst)); for (int i = 0; i < cur_inst->size; i++) { if (z_addr_dict_exist(p->certain_addresses, cur_addr + i)) { // XXX: avoid rewriting the instruction boundary assert(i == 1 && z_addr_dict_get(p->certain_addresses, cur_addr + i) && cur_inst->detail->x86.prefix[0] == X86_PREFIX_LOCK); break; } __patcher_patch_certain_address(p, cur_addr + i, (i == 0 ? cur_inst->size : 0)); // update pdisasm here if (i == 0) { z_diassembler_update_prob_disasm(d, cur_addr + i, true); } else if (i == 1 && cur_inst->detail->x86.prefix[0] == X86_PREFIX_LOCK) { // XXX: we make it conservative, as we are not sure whether // cur_addr + i will be used as another instruction. // // do nothing } else { z_diassembler_update_prob_disasm(d, cur_addr + i, false); } } // step (3.3). check successors Iter(addr_t, succ_addrs); z_iter_init_from_buf(succ_addrs, z_disassembler_get_all_successors(d, cur_addr)); while (!z_iter_is_empty(succ_addrs)) { addr_t succ_addr = *(z_iter_next(succ_addrs)); // ignore the one which is not in .text if (succ_addr < text_addr || succ_addr >= text_addr + text_size) { continue; } if (z_addr_dict_exist(p->certain_addresses, succ_addr)) { assert(z_addr_dict_get(p->certain_addresses, succ_addr) || (z_addr_dict_get(p->certain_addresses, succ_addr - 1) && z_disassembler_get_superset_disasm(d, succ_addr - 1) ->detail->x86.prefix[0] == X86_PREFIX_LOCK)); continue; } g_queue_push_tail(queue, GSIZE_TO_POINTER(succ_addr)); } z_iter_destroy(succ_addrs); } // step (2). free queue g_queue_free(queue); } #ifdef CONSERVATIVE_PATCH Z_PRIVATE void __patcher_patch_all_F(Patcher *p) { Disassembler *d = p->disassembler; ELF *e = z_binary_get_elf(p->binary); addr_t text_addr = p->text_addr; size_t text_size = p->text_size; // we first patch call/cjmp/jmp (at least 5 bytes) for (addr_t addr = text_addr; addr < text_addr + text_size; addr++) { if (z_disassembler_get_prob_disasm(d, addr) < PATCH_THRESHOLD) { goto NEXT_ADDR; } cs_insn *inst = z_disassembler_get_superset_disasm(d, addr); assert(inst); if (!z_capstone_is_call(inst) && !z_capstone_is_ret(inst) && !z_capstone_is_cjmp(inst) && !z_capstone_is_jmp(inst)) { goto NEXT_ADDR; } // check RET without number if (z_capstone_is_ret(inst) && inst->detail->x86.op_count) { goto NEXT_ADDR; } addr_t end_addr = addr + inst->size; addr_t cur_addr = addr; // guarantee at least 5 bytes while (end_addr - cur_addr < 5) { Iter(addr_t, pred_addrs); z_iter_init_from_buf( pred_addrs, z_disassembler_get_direct_predecessors(d, cur_addr)); bool found = false; addr_t pred_addr = INVALID_ADDR; while (!z_iter_is_empty(pred_addrs)) { addr_t pred_addr_ = *(z_iter_next(pred_addrs)); // check the operand is not single-byte-length cs_insn *pred_inst_ = z_disassembler_get_superset_disasm(d, pred_addr_); if (!pred_inst_) { continue; } cs_detail *pred_detail_ = pred_inst_->detail; if (pred_detail_->x86.op_count >= 1) { if (pred_detail_->x86.operands[0].size == 1) { continue; } } // check probability if (z_disassembler_get_prob_disasm(d, pred_addr_) < PATCH_THRESHOLD) { continue; } // multiple valid predecessors if (found) { goto NEXT_ADDR; } found = true; pred_addr = pred_addr_; } if (!found) { goto NEXT_ADDR; } cs_insn *pred_inst = z_disassembler_get_superset_disasm(d, pred_addr); if (z_capstone_is_call(pred_inst) || z_capstone_is_ret(pred_inst) || z_capstone_is_cjmp(pred_inst) || z_capstone_is_jmp(pred_inst) || pred_addr + pred_inst->size != cur_addr) { goto NEXT_ADDR; } cur_addr = pred_addr; } // TODO: advanced patching // XXX: advanced patching is not that necessary for now, as the error // diagnosis can help find such erroneous patchings // check no prior patchpoints are call/cjmp/jmp // Iter(addr_t, occ_addrs); // z_iter_init_from_buf(occ_addrs, // z_disassembler_get_occluded_addrs(d, cur_addr)); // while (!z_iter_is_empty(occ_addrs)) { // addr_t occ_addr = *(z_iter_next(occ_addrs)); // if (occ_addr >= cur_addr) { // continue; // } // cs_insn *occ_inst = z_disassembler_get_superset_disasm(d, // occ_addr); assert(occ_inst); if (z_capstone_is_call(occ_inst) || // z_capstone_is_cjmp(occ_inst) || // z_capstone_is_jmp(occ_inst)) { // goto NEXT_ADDR; // } // } __patcher_patch_uncertain_address(p, cur_addr); NEXT_ADDR: continue; } // we then patch returan address for normal call and plt call GQueue *bfs = g_queue_new(); for (addr_t addr = text_addr; addr < text_addr + text_size; addr++) { double128_t addr_prob = z_disassembler_get_prob_disasm(d, addr); if (addr_prob < PATCH_THRESHOLD) { continue; } cs_insn *inst = z_disassembler_get_superset_disasm(d, addr); assert(inst); if (!z_capstone_is_call(inst)) { continue; } cs_detail *detail = inst->detail; if ((detail->x86.op_count != 1) || (detail->x86.operands[0].type != X86_OP_IMM)) { continue; } addr_t callee_addr = detail->x86.operands[0].imm; if (!z_elf_get_plt_info(e, callee_addr) && (callee_addr < text_addr || callee_addr >= text_addr + text_size)) { continue; } addr_t ret_addr = addr + inst->size; if (!z_elf_get_plt_info(e, callee_addr)) { g_queue_push_tail(bfs, GSIZE_TO_POINTER(ret_addr)); size_t bfs_n = 0; bool valid = false; while (!g_queue_is_empty(bfs)) { addr_t cur_addr = (addr_t)g_queue_pop_head(bfs); if (z_disassembler_get_prob_disasm(d, cur_addr) >= PATCH_THRESHOLD) { valid = true; break; } Iter(addr_t, succ_addrs); z_iter_init_from_buf( succ_addrs, z_disassembler_get_direct_successors(d, cur_addr)); while (!z_iter_is_empty(succ_addrs)) { addr_t succ_addr = *(z_iter_next(succ_addrs)); if ((bfs_n++) < PATCH_RET_DEPTH) { g_queue_push_tail(bfs, GSIZE_TO_POINTER(succ_addr)); } } if (bfs_n >= PATCH_RET_DEPTH) { break; } } g_queue_clear(bfs); if (!valid) { continue; } } else { double128_t ret_P = z_disassembler_get_prob_disasm(d, ret_addr); if (copysignl(1.0, ret_P) < 0.0) { continue; } } __patcher_patch_uncertain_address(p, ret_addr); } } #else Z_PRIVATE void __patcher_patch_all_F(Patcher *p) { Disassembler *d = p->disassembler; addr_t text_addr = p->text_addr; size_t text_size = p->text_size; // step (1). we first find all potential uncertain patch points including // all call/cjmp/jmp/ret instruction and the ret_addr of any call // instruction. if (!p->potential_uncertain_addresses) { for (addr_t addr = text_addr; addr < text_addr + text_size; addr++) { cs_insn *inst = z_disassembler_get_superset_disasm(d, addr); if (!inst) { continue; } // TODO: patch some predecessors to reduce the number of delayed // bridges if (z_capstone_is_ret(inst) || z_capstone_is_cjmp(inst) || z_capstone_is_jmp(inst)) { p->potential_uncertain_addresses = g_list_prepend( p->potential_uncertain_addresses, GSIZE_TO_POINTER(addr)); continue; } if (z_capstone_is_call(inst)) { p->potential_uncertain_addresses = g_list_prepend( p->potential_uncertain_addresses, GSIZE_TO_POINTER(addr)); // TODO: leverage non-return analysis to improve here addr_t ret_addr = addr + inst->size; if (z_disassembler_get_superset_disasm(d, ret_addr)) { // XXX: we use -ret_addr to indicate it is a return address addr_t negative_addr = (addr_t)(-(int64_t)ret_addr); p->potential_uncertain_addresses = g_list_prepend(p->potential_uncertain_addresses, GSIZE_TO_POINTER(negative_addr)); } } } } // step (2). apply patches { GList *l = p->potential_uncertain_addresses; while (l != NULL) { GList *next = l->next; // step (2.1) get address and threshold_p addr_t addr = INVALID_ADDR; double128_t threshold_p = 1.0; int64_t addr_r = (int64_t)l->data; if (addr_r >= 0) { addr = (addr_t)addr_r; threshold_p = PATCH_THRESHOLD; } else { addr = (addr_t)(-addr_r); threshold_p = PATCH_THRESHOLD_FOR_RETADDR; } // step (2.2). patch the ones which have high probabilities and // which are still uncertain if (z_addr_dict_exist(p->certain_addresses, addr)) { // addr is certain to be code currently, which means it can be // remove from the uncertain patch list p->potential_uncertain_addresses = g_list_delete_link(p->potential_uncertain_addresses, l); } else { if (z_disassembler_get_prob_disasm(d, addr) > threshold_p) { __patcher_patch_uncertain_address(p, addr); } } // step (2.3). goto next l = next; } } } #endif Z_PRIVATE void __patcher_patch_all_S(Patcher *p) { addr_t text_addr = p->text_addr; size_t text_size = p->text_size; Disassembler *d = p->disassembler; addr_t cur_addr = text_addr; while (cur_addr < text_addr + text_size) { if (z_disassembler_get_prob_disasm(d, cur_addr) < PATCH_THRESHOLD) { cur_addr += 1; continue; } cs_insn *cur_inst = z_disassembler_get_superset_disasm(d, cur_addr); assert(cur_inst); z_trace("handle instruction: " CS_SHOW_INST(cur_inst)); // TODO: handle the overlapping instruction introduced by *LOCK* prefix size_t i = 0; do { if (z_disassembler_get_prob_disasm(d, cur_addr) < PATCH_THRESHOLD) { EXITME("invalid address for simple pdisasm " CS_SHOW_INST( cur_inst)); } __patcher_patch_certain_address(p, cur_addr, (i == 0 ? cur_inst->size : 0)); cur_addr += 1; i += 1; } while (i < cur_inst->size); } } Z_API void z_patcher_describe(Patcher *p) { if (p->s_iter || p->e_iter) { EXITME("cannot make requests when delta debugging mode is enable"); } // first do patching z_patcher_initially_patch(p); Disassembler *d = p->disassembler; addr_t text_addr = p->text_addr; size_t text_size = p->text_size; z_sayf("%-7s%-25s%-25s%-25s%-25s%-25s%-8s%-60s%-5s%s\n", "status", "inst hint", "inst lost", "data hint", "D", "P", "SCC", "inst", "size", " succs"); Buffer *patchpoints = z_buffer_create(NULL, 0); for (addr_t addr = text_addr; addr < text_addr + text_size; addr++) { cs_insn *inst = NULL; uint32_t scc_id = 0; double128_t inst_hint = NAN; double128_t inst_lost = NAN; double128_t data_hint = NAN; double128_t D = NAN; double128_t P = NAN; z_disassembler_get_prob_disasm_internal(d, addr, &inst, &scc_id, &inst_hint, &inst_lost, &data_hint, &D, &P); const char *status = ""; PPType pp_type = z_patcher_check_patchpoint(p, addr); if (pp_type != PP_INVALID) { if (pp_type == PP_CERTAIN) { status = "CC"; } else if (pp_type == PP_UNCERTAIN) { status = "UC"; } else if (pp_type == PP_BRIDGE) { status = "BC"; } z_buffer_append_raw(patchpoints, (uint8_t *)&addr, sizeof(addr)); } if (!isnan(data_hint) && !isinf(data_hint) && data_hint > 10000000000000000000.0) { z_sayf("%-7s%-25.12Lf%-25.2Lf%-25Le%-25.12Lf%+-25.12Lf", status, inst_hint, inst_lost, data_hint, D, P); } else { z_sayf("%-7s%-25.12Lf%-25.2Lf%-25.2Lf%-25.12Lf%+-25.12Lf", status, inst_hint, inst_lost, data_hint, D, P); } if (inst) { z_sayf("%-8d", scc_id); const char *inst_str = z_alloc_printf(CS_SHOW_INST(inst)); z_sayf("%-60s%-5d", inst_str, inst->size); z_free((void *)inst_str); Iter(addr_t, succ_addrs); z_iter_init_from_buf(succ_addrs, z_disassembler_get_all_successors(d, addr)); while (!z_iter_is_empty(succ_addrs)) { z_sayf(" {%#lx}", *(z_iter_next(succ_addrs))); } z_sayf("\n"); } else { z_sayf("%-8d(%#lx:\tinvalid)\n", scc_id, addr); } } z_buffer_write_file(patchpoints, "patchpoints.log"); z_buffer_destroy(patchpoints); } Z_API Patcher *z_patcher_create(Disassembler *d, RewritingOptArgs *opts) { Patcher *p = STRUCT_ALLOC(Patcher); p->opts = opts; p->disassembler = d; p->binary = z_disassembler_get_binary(d); p->pdisasm_enable = z_disassembler_fully_support_prob_disasm(d); p->elf = z_binary_get_elf(p->binary); Elf64_Shdr *text = z_elf_get_shdr_text(p->elf); p->text_addr = text->sh_addr; p->text_size = text->sh_size; p->text_ptr = z_elf_vaddr2ptr(p->elf, p->text_addr); p->text_backup = NULL; z_addr_dict_init(p->certain_addresses, p->text_addr, p->text_size); z_addr_dict_init(p->certain_patches, p->text_addr, p->text_size); p->uncertain_patches = g_sequence_new(NULL); p->bridges = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, (GDestroyNotify)(&z_free)); p->potential_uncertain_addresses = NULL; p->s_iter = NULL; p->e_iter = NULL; p->patched_bridges = 0; p->delayed_bridges = 0; p->resolved_bridges = 0; p->adjusted_bridges = 0; return p; } Z_API void z_patcher_destroy(Patcher *p) { z_addr_dict_destroy(p->certain_addresses); z_addr_dict_destroy(p->certain_patches); g_sequence_free(p->uncertain_patches); g_hash_table_destroy(p->bridges); z_rptr_destroy(p->text_ptr); if (p->text_backup) { z_free(p->text_backup); } if (p->potential_uncertain_addresses) { g_list_free(p->potential_uncertain_addresses); } z_free(p); } Z_API void z_patcher_initially_patch(Patcher *p) { assert(p != NULL); if (p->s_iter || p->e_iter) { EXITME("cannot do initial patch in delta debugging mode"); } // backup .text if (p->text_backup) { EXITME("backed up .text before initial patching"); } p->text_backup = z_alloc(p->text_size, sizeof(uint8_t)); z_rptr_memcpy(p->text_backup, p->text_ptr, p->text_size); // do prob-disassemble first z_disassembler_prob_disasm(p->disassembler); // fill all patch candidates as HLT (0xf4) or ILLEGAL INSTRUCTION if (!p->pdisasm_enable) { __patcher_patch_all_S(p); } else { __patcher_patch_all_F(p); } } Z_API PPType z_patcher_check_patchpoint(Patcher *p, addr_t addr) { if (p->s_iter || p->e_iter) { EXITME("cannot make requests when delta debugging mode is enable"); } #ifdef BINARY_SEARCH_DEBUG_REWRITER z_warn( "when debuging rewriter, real crashes may cause unintentional " "behaviors"); #endif // step (0). check whether addr is in .text (some real crash points are in // the shadow code) if (addr < p->text_addr || addr >= p->text_addr + p->text_size) { return PP_INVALID; } // step (1). check certain patches // TODO: the overlapping *LOCK* instruction may cause problems if (z_addr_dict_exist(p->certain_patches, addr) && z_addr_dict_get(p->certain_addresses, addr)) { return PP_CERTAIN; } // step (2). check uncertain patches GSequenceIter *iter = g_sequence_lookup(p->uncertain_patches, GSIZE_TO_POINTER(addr), (GCompareDataFunc)__patcher_compare_address, NULL); if (iter) { return PP_UNCERTAIN; } // step (3). check bridge if (g_hash_table_lookup(p->bridges, GSIZE_TO_POINTER(addr))) { return PP_BRIDGE; } return PP_INVALID; } // TODO: BINARY_SEARCH_DEBUG_XXX may cause bugs for the following new code // // TODO: it is a basic jump instruction patching algorithm (w/ auto fix and // delayed patching) currently, we may leverage E9Patch tech in the future // // XXX: following is a typical bridge layout (before and after patching), where // symbol "|" denotes an instruction boundary. // // Bytes: B B B B B B B B B B B B B B B B B B B B B B B // --------------------------------------------------------------------------- // Before patching: // |---|-|-----|-------|---|-----|-----|-------| // // --------------------------------------------------------------------------- // After patching: // |*******|???|-------|---|-----|-----|-------| // // overlapping inst A: |---------------------| // overlapping inst B: |-| // // jump values: J J J J J // patching values: P P P P P P P P P P P P P P P P P P // // bridge points: X X X X X X X X // left certain patches: C C C C C C C C // // bridge-related addresses: A A A A A A A A A A A A A A A // Z_API void z_patcher_build_bridge(Patcher *p, addr_t ori_addr, addr_t shadow_addr, bool is_real) { if (p->s_iter || p->e_iter) { EXITME("cannot build bridge in delta debugging mode"); } Disassembler *d = p->disassembler; #ifdef BINARY_SEARCH_DEBUG_REWRITER // avoid infinite loop ELF *e = z_binary_get_elf(p->binary); if (ori_addr == shadow_addr) { cs_insn *inst = z_disassembler_get_superset_disasm(d, ori_addr); z_elf_write(e, ori_addr, inst->size, inst->bytes); return; } #endif // step (0). check ori_addr range if (ori_addr < p->text_addr || ori_addr >= p->text_addr + p->text_size) { EXITME("invalid address for bridge: %#lx", ori_addr); } // step (1). update certain_addresses __patcher_bfs_certain_addresses(p, ori_addr); // step (2). check whether there is a bridge already built on current addr BridgePoint *ori_bp = (BridgePoint *)g_hash_table_lookup( p->bridges, GSIZE_TO_POINTER(ori_addr)); if (ori_bp) { // It is possible when the address is regarded as external crashpoint // and then regarded as retaddr. // Additionally, note that even if this is a fake crashpoint, it still // cannot be a non-leading PP_BRIDGE (i.e., not the starting point of a // bridge), as ori_addr should have been adjusted. if (ori_bp->bridge_addr != ori_addr) { EXITME("strange overlapped bridge detected: %#lx / %#lx", ori_addr, ori_bp->bridge_addr); } return; } if (!ori_bp && !z_addr_dict_exist(p->certain_patches, ori_addr)) { // TODO: remove the following is_real checking when confirming it is // useless. if (!is_real) { // XXX: it is possible that a fake bridge, which is not triggered by // a control flow crash, is added on code for another delayed // bridge. // XXX: a very typical case for this branch is, when pdisasm is // fully enabled: // 1. For an unsafe crashpoint A, we resolved this unsafety by // adding a new crashpoint B. // 2. Crashpoint B was triggered, but it is still unsafe and cannot // be resolved. So we delayed it. // 3. Both A and B are logged. But later, when applying the log, B // is first applied. // XXX: Above comments may be out-of-date. By applying the new way // of logging crashpoints, the aforementioned case seems to be // impossible to happend. EXITME("invalid fake bridge address: %#lx", ori_addr); return; } EXITME("invalid bridge address: %#lx", ori_addr); } // step (3). declare some important variables for futher operations bool safe_patch = true; bool bridge_patched = false; addr_t bridge_sources[35]; // the longest x64 inst is 15-bytes (5 + 15 * 2) addr_t bridge_max_addr = ori_addr; GQueue *bridge_queue = g_queue_new(); size_t ori_size = z_addr_dict_get(p->certain_addresses, ori_addr); if (!ori_size) { EXITME("the address of a bridge should be an instruction boundary"); } // the real address of the patched jump instruction addr_t jmp_addr = ori_addr; // We will try use all the addresses in [ori_addr, ori_addr + ori_size) as // the starting point of the jump instruction, so that we do not delay too // many bridges. // XXX: the overlapping *LOCK* instruction may cause some troubles. do { // initize some local variables first // XXX: the safe_patch should be initized as true, because we haven't // tested the new jmp_addr. safe_patch = true; bridge_max_addr = ori_addr; memset(bridge_sources, 0, sizeof(bridge_sources)); // patch nop if (jmp_addr != ori_addr) { if (!bridge_patched) { EXITME("the bridge much be applied in this case"); } z_patcher_unsafe_patch(p, ori_addr, jmp_addr - ori_addr, z_x64_gen_nop(jmp_addr - ori_addr), NULL); } // step (4). pre-patch bridge and additionally check whether current // patch is valid (for overlapping instructions). // step (4.0). check whether the new occupied byte is certain_patches if (jmp_addr == ori_addr) { // all first 5 bytes (a jmp instruction) need to be certain // patches for (size_t i = 0; i < 5; i++) { if (!z_addr_dict_exist(p->certain_patches, ori_addr + i)) { z_info( "an unsafe bridge patching caused by no enough certain " "patches, try to resolve it... " "(failed address %#lx, based on bridge address %#lx)", ori_addr + i, ori_addr); safe_patch = false; goto TRY_TO_PATCH_DONE; } } } else { if (!z_addr_dict_exist(p->certain_patches, jmp_addr + 4)) { // XXX: it means all next jmp_addrs will be invalid z_info( "an unsafe bridge patching caused by no enough certain " "patches, try to resolve it... " "(failed address %#lx, based on bridge address %#lx)", jmp_addr + 4, ori_addr); safe_patch = false; goto TRY_TO_PATCH_DONE; } } // step (4.1). pre-patch bridge (and revoke certain patches). { bridge_patched = true; KS_ASM_JMP(jmp_addr, shadow_addr); z_patcher_unsafe_patch(p, jmp_addr, ks_size, ks_encode, NULL); assert(ks_size == 5); // revoke patchpoints of PP_CERTAIN // XXX: note that the uncertain patchpoints have already be replaced // by certain ones in step (1). if (jmp_addr == ori_addr) { for (size_t off = 0; off < 5; off++) { z_addr_dict_remove(p->certain_patches, jmp_addr + off); } } else { // for the jmp_addr other than ori_addr, we only need to remove // the last byte of the patched jmp instruction z_addr_dict_remove(p->certain_patches, jmp_addr + 4); } } // step (4.2). additionally check whether current patch is valid (for // overlapping instructions) { // step (4.2.0). set up bridge starting point bridge_sources[0] = ori_addr; // XXX: as jmp_addr is inside the original bridge instruction, it // cannot be a crashpoint. /* bridge_sources[jmp_addr - ori_addr] = jmp_addr; */ // XXX: The first element is the target address and the second // element is the source address. g_queue_clear(bridge_queue); // step (4.2.1). insert the sources of overlapping instruction for (size_t off = 1; off < 5; off++) { if (z_addr_dict_get(p->certain_addresses, jmp_addr + off)) { g_queue_push_tail(bridge_queue, GSIZE_TO_POINTER(jmp_addr + off)); g_queue_push_tail(bridge_queue, GSIZE_TO_POINTER(jmp_addr + off)); } } // step (4.2.2). validate all possible overlapping instructions while (!g_queue_is_empty(bridge_queue)) { addr_t cur_addr = (addr_t)g_queue_pop_head(bridge_queue); addr_t src_addr = (addr_t)g_queue_pop_head(bridge_queue); size_t cur_off = cur_addr - ori_addr; z_rptr_inc(p->text_ptr, uint8_t, cur_addr - p->text_addr); CS_DISASM(p->text_ptr, cur_addr, 1); z_rptr_reset(p->text_ptr); // update bridge information { if (cur_addr > bridge_max_addr) { bridge_max_addr = cur_addr; } if (!bridge_sources[cur_off] || src_addr < bridge_sources[cur_off]) { bridge_sources[cur_off] = src_addr; } } // invalid instruction (nice!) if (cs_count == 0) { continue; } // TODO: handle control flow transfer instruction (e.g., set // unsafe_patch once any control flow transfer instruction is // involved) if (z_capstone_is_ret(cs_inst) || z_capstone_is_cjmp(cs_inst) || z_capstone_is_jmp(cs_inst) || z_capstone_is_call(cs_inst)) { z_info( "find an unsafe patch caused an inner jump, try next " "jmp_addr... (current bridge address %#lx and jmp addr " "%#lx)", ori_addr, jmp_addr); z_info("current failed jmp inst: " CS_SHOW_INST(cs_inst)); safe_patch = false; break; } // check whether the successor is still in the bridge addr_t next_addr = cur_addr + cs_inst->size; size_t next_off = cur_off + cs_inst->size; if (next_addr < jmp_addr + 5) { g_queue_push_tail(bridge_queue, GSIZE_TO_POINTER(next_addr)); g_queue_push_tail(bridge_queue, GSIZE_TO_POINTER(src_addr)); continue; } // check whether the successor is a certain patch if (z_addr_dict_exist(p->certain_patches, next_addr)) { // additionally handle the next instruction if (next_addr > bridge_max_addr) { bridge_max_addr = next_addr; } if (!bridge_sources[next_off] || src_addr < bridge_sources[next_off]) { bridge_sources[next_off] = src_addr; } continue; } z_info( "find an unsafe bridge patching without a certain ending, " "try next jmp_addr... (failed address %#lx, based on " "bridge address %#lx and jmp addr %#lx)", next_addr, ori_addr, jmp_addr); safe_patch = false; break; } if (!safe_patch) { // XXX: current !safe_patch means this jmp_addr is unsafe goto NEXT_JMP_ADDR; } } // step (4.3). check all affected addresses are in certain_patches. for (addr_t cur_addr = jmp_addr + 5; cur_addr <= bridge_max_addr; cur_addr++) { if (!z_addr_dict_exist(p->certain_patches, cur_addr)) { safe_patch = false; goto NEXT_JMP_ADDR; } } // step (4.4) find a safe patch if (!safe_patch) { EXITME("only safe patch can go into here"); } goto TRY_TO_PATCH_DONE; NEXT_JMP_ADDR: jmp_addr += 1; } while (jmp_addr < ori_addr + ori_size); TRY_TO_PATCH_DONE: g_queue_free(bridge_queue); // step (5). if it is a safe patch, update bridge information if (safe_patch) { if (jmp_addr == ori_addr + ori_size) { EXITME("invalid jmp_addr"); } z_info("successfully patch at address %#lx @ %#lx", jmp_addr, ori_addr); for (addr_t cur_addr = ori_addr; cur_addr <= bridge_max_addr; cur_addr++) { assert( !g_hash_table_lookup(p->bridges, GSIZE_TO_POINTER(cur_addr))); size_t off = cur_addr - ori_addr; // XXX: remember to revoke certain_patches if (z_addr_dict_exist(p->certain_patches, cur_addr)) { z_addr_dict_remove(p->certain_patches, cur_addr); } // first check whether it is a patch-influenced detection point if (bridge_sources[off]) { BridgePoint *bp = z_alloc(1, sizeof(BridgePoint)); bp->bridge_addr = ori_addr; bp->jump_addr = jmp_addr; bp->source_addr = bridge_sources[off]; bp->max_addr = bridge_max_addr; g_hash_table_insert(p->bridges, GSIZE_TO_POINTER(cur_addr), (gpointer)bp); continue; } // actually, all affected instruction boudnaries in jmp patching // shoud be handled before assert(!(cur_addr < jmp_addr + 5 && z_addr_dict_get(p->certain_addresses, cur_addr))); // then check it is an inst boundary before the patched jmp inst if (cur_addr >= jmp_addr + 5 && z_addr_dict_get(p->certain_addresses, cur_addr)) { BridgePoint *bp = z_alloc(1, sizeof(BridgePoint)); bp->bridge_addr = ori_addr; bp->jump_addr = jmp_addr; bp->source_addr = cur_addr; bp->max_addr = bridge_max_addr; g_hash_table_insert(p->bridges, GSIZE_TO_POINTER(cur_addr), (gpointer)bp); continue; } } p->patched_bridges += 1; return; } // step (6). for unsafe patches, we need first revoke the patched bridge if (bridge_patched) { // XXX: all bytes before jmp_addr + 5, which are patched as bridge (jmp) // and nop, werer origianlly certain patches. So we can safely reset // them as certain patches. size_t n = jmp_addr + 5 - ori_addr; z_patcher_unsafe_patch(p, ori_addr, n, z_x64_gen_invalid(n), NULL); for (size_t i = 0; i < n; i++) { z_addr_dict_set(p->certain_patches, ori_addr + i, true); } } // step (7). for unsafe patches, we try to resolve it // XXX: note that we can only resolve such unsafe patches when pdisasm is // fully supported, because only uncertain patches, which do not exist when // pidasm is not fully supported, can help fix the unsafe patches. if (p->pdisasm_enable) { bool new_uncertain_patch = false; // XXX: the first element is the target address, the second is the depth GQueue *queue = g_queue_new(); // step (7.1). find all possible uncertain precedessor patches g_queue_push_tail(queue, GSIZE_TO_POINTER(ori_addr)); g_queue_push_tail(queue, GSIZE_TO_POINTER(0)); while (!g_queue_is_empty(queue)) { addr_t cur_addr = (addr_t)g_queue_pop_head(queue); size_t depth = (size_t)g_queue_pop_head(queue); if (depth > BRIDGE_PRE_DEPTH) { continue; } // get predecessors Iter(addr_t, pred_addrs); z_iter_init_from_buf( pred_addrs, z_disassembler_get_all_predecessors(d, cur_addr)); while (!z_iter_is_empty(pred_addrs)) { // pred_addr must in .text (it may be incomplete when // pre-superset diaasm is not enable) addr_t pred_addr = *(z_iter_next(pred_addrs)); // check prob if (z_disassembler_get_prob_disasm(d, pred_addr) < PATCH_THRESHOLD) { continue; } // there are some cases where the following predicate is false: // case (1). pred_addr is in certain_addresses // case (2). pred_addr already be patched as uncertain patches // case (2.a). pred_addr is patched by this BFS // case (2.b). pred_addr is patched by others if (!__patcher_patch_uncertain_address(p, pred_addr)) { continue; } // TODO: decide whether this new uncertain patch should be added // into the list of potential_uncertain_addresses new_uncertain_patch = true; z_info("resolve the unsafe patch by patching %#lx", pred_addr); g_queue_push_tail(queue, GSIZE_TO_POINTER(pred_addr)); g_queue_push_tail(queue, GSIZE_TO_POINTER(depth + 1)); } z_iter_destroy(pred_addrs); } g_queue_free(queue); // step (7.2) return if we can resolve it by the next execution if (new_uncertain_patch) { p->resolved_bridges += 1; return; } } // step (8). if we cannot resolve it, we delay the patches // XXX: avoid touch other patch points z_info("fail to resolve the unsafe patch, let's delay it: %#lx", ori_addr); { z_rptr_inc(p->text_ptr, uint8_t, ori_addr - p->text_addr); addr_t cur_addr = ori_addr; while (z_addr_dict_exist(p->certain_addresses, cur_addr) && z_addr_dict_exist(p->certain_patches, cur_addr)) { assert(z_addr_dict_get(p->certain_addresses, cur_addr)); cs_insn *cur_inst = z_disassembler_get_superset_disasm(d, cur_addr); assert(cur_inst); z_rptr_memcpy(p->text_ptr, cur_inst->bytes, cur_inst->size); for (size_t i = 0; i < cur_inst->size; i++) { // XXX: in this case, cur_addr + i belongs to neighter bridges // nor certain_patches, but it belongs to certain_addresses. It // is a special case for delayed bridges. z_addr_dict_remove(p->certain_patches, cur_addr + i); } // we end at terminator (e.g., ret) or call if (z_capstone_is_terminator(cur_inst) || z_capstone_is_call(cur_inst)) { break; } cur_addr += cur_inst->size; z_rptr_inc(p->text_ptr, uint8_t, cur_inst->size); } z_rptr_reset(p->text_ptr); p->delayed_bridges += 1; } return; } Z_API void z_patcher_bridge_stats(Patcher *p) { z_info("number of patched bridges : %d", p->patched_bridges); z_info("number of delayed bridges : %d", p->delayed_bridges); z_info("number of resolved bridges: %d", p->resolved_bridges); z_info("number of adjusted bridges: %d", p->adjusted_bridges); } Z_API addr_t z_patcher_adjust_bridge_address(Patcher *p, addr_t addr) { if (p->s_iter || p->e_iter) { EXITME("cannot adjust bridge in delta debugging mode"); } BridgePoint *bp = g_hash_table_lookup(p->bridges, GSIZE_TO_POINTER(addr)); // case (1). this is not a bridge point, and we do nothing. if (!bp) { return addr; } // case (2). this is the bridge starting point, and we do nothing too. if (bp->bridge_addr == addr) { return addr; } // it is invalid that jump_addr == addr at here (note that currently addr is // not the bridge point). if (bp->jump_addr == addr) { EXITME("internal jump point cannot be a crash point"); } // case (3). this crash is caused by an overlapping instruction. We need to // revoke this bridge patching. addr_t bridge_addr = bp->bridge_addr; addr_t jump_addr = bp->jump_addr; addr_t source_addr = bp->source_addr; addr_t max_addr = bp->max_addr; z_info("detect a solvable bridge overlapping: %#lx / %#lx", addr, bridge_addr); // step (1). revoke the tail part of bridge (after source_addr), if // necessary if (source_addr < jump_addr + 5) { size_t tail_size = jump_addr + 5 - source_addr; z_patcher_unsafe_patch(p, source_addr, tail_size, z_x64_gen_invalid(tail_size), NULL); } // step (2). revoke the head part of bridge (before source_addr) { assert(source_addr > bridge_addr); size_t head_size = source_addr - bridge_addr; // XXX: these addresses are also the special cases for delayed bridges. // Again, them do not belong to certain_patches and bridges, but belong // to certain_addresses. z_patcher_unsafe_patch(p, bridge_addr, head_size, p->text_backup + (bridge_addr - p->text_addr), NULL); } // step (3). remove all associated bridge information and reset some as // certain patches { for (addr_t cur_addr = bridge_addr; cur_addr <= max_addr; cur_addr++) { if (cur_addr >= source_addr) { z_addr_dict_set(p->certain_patches, cur_addr, true); } g_hash_table_remove(p->bridges, GSIZE_TO_POINTER(cur_addr)); } } p->adjusted_bridges += 1; return source_addr; } Z_API size_t z_patcher_uncertain_patches_n(Patcher *p) { if (p->s_iter || p->e_iter) { EXITME("cannot make requests when delta debugging mode is enable"); } return g_sequence_get_length(p->uncertain_patches); } Z_API void z_patcher_self_correction_start(Patcher *p) { if (p->s_iter || p->e_iter) { EXITME("self correction procedure already started"); } if (!p->pdisasm_enable) { EXITME("self correction procedure only works when pdisasm is enable"); } p->s_iter = g_sequence_get_begin_iter(p->uncertain_patches); p->e_iter = g_sequence_get_end_iter(p->uncertain_patches); } Z_API void z_patcher_self_correction_end(Patcher *p) { if (!p->s_iter || !p->e_iter) { EXITME("self correction procedure did not start"); } if (!p->pdisasm_enable) { EXITME("self correction procedure only works when pdisasm is enable"); } Disassembler *d = p->disassembler; // step (1). repair the buggy rewriting if any // XXX: note that we only need to do online re-patching when there are some // rewritting errors. if (p->s_iter != p->e_iter) { // step (1.1) disable such uncertain patches and update pdisasm GSequenceIter *iter = p->s_iter; while (iter != p->e_iter) { addr_t err_addr = (addr_t)g_sequence_get(iter); z_info("repair rewriting error: %#lx", err_addr); __patcher_flip_uncertain_patch(p, err_addr, false); z_diassembler_update_prob_disasm(d, err_addr, false); iter = g_sequence_iter_next(iter); } // step (1.2). rerun pdisasm assert(p->pdisasm_enable); z_disassembler_prob_disasm(d); // step (1.3). remove all uncertain patches and re-patch // XXX: note that current all the uncertain patches are disabled GSequenceIter *s_iter = g_sequence_get_begin_iter(p->uncertain_patches); GSequenceIter *e_iter = g_sequence_get_end_iter(p->uncertain_patches); g_sequence_remove_range(s_iter, e_iter); __patcher_patch_all_F(p); } else { // XXX: it means there is no rewritting error. We just need to re-enable // all uncertain patches. GSequenceIter *iter = g_sequence_get_begin_iter(p->uncertain_patches); while (!g_sequence_iter_is_end(iter)) { __patcher_flip_uncertain_patch(p, (addr_t)g_sequence_get(iter), true); iter = g_sequence_iter_next(iter); } } // step (2). disable the s_iter and e_iter flags p->s_iter = NULL; p->e_iter = NULL; } Z_API void z_patcher_flip_uncertain_patches(Patcher *p, bool is_s_iter, int64_t off) { if (!p->s_iter || !p->e_iter) { EXITME("self correction procedure did not start"); } if (!p->pdisasm_enable) { EXITME("self correction procedure only works when pdisasm is enable"); } if (!off) { return; } // step (1). prepart basic infomation GSequenceIter *iter = (is_s_iter ? p->s_iter : p->e_iter); GSequenceIter *(*change_iter)(GSequenceIter *) = ((off > 0) ? &g_sequence_iter_next : &g_sequence_iter_prev); size_t steps = ((off < 0) ? (size_t)(-off) : (size_t)off); // is_enable | is_s_iter | off > 0 // ----------+-----------+---------------- // True | True | False (off < 0) // True | False | True (off > 0) // False | True | True (off > 0) // False | False | False (off < 0) bool is_enable = (!!is_s_iter) ^ (!!(off > 0)); // step (2). flip uncertain patches bool do_before_change = (off > 0); for (size_t i = 0; i < steps; i++) { if (do_before_change) { __patcher_flip_uncertain_patch(p, (addr_t)g_sequence_get(iter), is_enable); } GSequenceIter *tmp = (*change_iter)(iter); assert(tmp != iter); iter = tmp; if (!do_before_change) { __patcher_flip_uncertain_patch(p, (addr_t)g_sequence_get(iter), is_enable); } } // step (3). update s_iter/e_iter if (is_s_iter) { p->s_iter = iter; } else { p->e_iter = iter; } assert(p->s_iter && p->e_iter); // it is also possible that s_iter == e_iter if (!g_sequence_iter_is_end(p->e_iter) && __patcher_compare_address((addr_t)g_sequence_get(p->s_iter), (addr_t)g_sequence_get(p->e_iter), NULL) > 0) { EXITME("invalid s_iter and e_iter: %#lx - %#lx", (addr_t)g_sequence_get(p->s_iter), (addr_t)g_sequence_get(p->e_iter)); } } // XXX: real patch function Z_API void z_patcher_unsafe_patch(Patcher *p, addr_t addr, size_t size, const uint8_t *buf, uint8_t *obuf) { if (z_likely(addr >= p->text_addr && addr < p->text_addr + p->text_size)) { // XXX: hot branch z_rptr_inc(p->text_ptr, uint8_t, addr - p->text_addr); if (obuf) { z_rptr_memcpy(obuf, p->text_ptr, size); } z_rptr_memcpy(p->text_ptr, buf, size); z_rptr_reset(p->text_ptr); } else { if (obuf) { z_elf_read(p->elf, addr, size, obuf); } z_elf_write(p->elf, addr, size, buf); } } ================================================ FILE: src/patcher.h ================================================ /* * patcher.h * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #ifndef __PATCHER_H #define __PATCHER_H #include "address_dictionary.h" #include "binary.h" #include "buffer.h" #include "config.h" #include "crs_config.h" #include "disassembler.h" #include "elf_.h" #include "sys_optarg.h" #include // XXX: note that patchpoint has priority of: // PP_BRIDEG > PP_CERTAIN > PP_UNCERTAIN typedef enum patchpoint_type { PP_INVALID = 0UL, PP_UNCERTAIN = 1UL, PP_CERTAIN = 2UL, PP_BRIDGE = 3UL, } PPType; // XXX: some fields of Patcher are essential to understand the underlying logic: // // * certain_addresses: all the address which are *certainly* sure to be code // bytes. The values of this dictionary have two types: // the instruction size for each instruction boundary, and // zero for the others. // // * uncertain_patches: all the patches which are *uncertainly* sure. Most of // them are patched based on the calculated probability. // // * certain_patches: all the patches which are *certainly* sure. The only // patched value of this type is invalid inst. This kind // of patches excludes the ones serve for bridge // overlapping detection. It also exlucdes those code // which was patched and has been revoked for delayed // bridges. // // * bridges: all potential patch points which can help detect bridge // overlapping. // // // There are some relations between aforementioned fields. // // keys(uncertain_patches).intersaction(keys(certain_addresses)) = EmptySet // // keys(uncertain_patches).intersaction(keys(certain_patches)) = EmptySet // keys(uncertain_patches).intersaction(keys(bridges)) = EmptySet // keys(certain_patches).intersaction(keys(bridges)) = EmptySet // // keys(certain_patches) in keys(certain_addresses) // keys(bridges) in keys(certain_addresses) // // keys(certain_addresses) // - (keys(certain_patches) + keys(bridges)) // = set(address which was patched and has been revoked for delayed bridges) // // Only uncertain_patches are involved in the delta debugging procedure. STRUCT(Patcher, { Binary *binary; Disassembler *disassembler; bool pdisasm_enable; // ELF ELF *elf; // .text info (for efficient patching) addr_t text_addr; size_t text_size; Rptr *text_ptr; // pointer to the shared .text section uint8_t *text_backup; // original data before any patching // addresses which are certainly known as code // for instruction boundary, the value is the length of instruction // for other places, the value is zero AddrDict(uint8_t, certain_addresses); // patch information GSequence *uncertain_patches; AddrDictFast(bool, certain_patches); GHashTable *bridges; // bridges detection points // potential addresses for uncertain patches (only used when pdisasm is // enable and CONSERVATIVE_PATCH is disable) GList *potential_uncertain_addresses; // delta debugging info GSequenceIter *s_iter; GSequenceIter *e_iter; // statistic information size_t patched_bridges; size_t delayed_bridges; size_t resolved_bridges; size_t adjusted_bridges; // rewriting optargs RewritingOptArgs *opts; }); /* * Create a patcher */ Z_API Patcher *z_patcher_create(Disassembler *d, RewritingOptArgs *opts); /* * Destroy a patcher */ Z_API void z_patcher_destroy(Patcher *p); /* * Patcher show details */ Z_API void z_patcher_describe(Patcher *p); /* * Initial patching for the instructions whose probabilities are high enough */ Z_API void z_patcher_initially_patch(Patcher *p); /* * Check whether address is a patched crash points (patch point) */ Z_API PPType z_patcher_check_patchpoint(Patcher *p, addr_t addr); /* * Patch address as a jump bridge. * * The parameter is_real means the bridge is triggered by a crash during * execution, and vice versa (e.g., logged crashpoint and CP_RETADDR). */ Z_API void z_patcher_build_bridge(Patcher *p, addr_t ori_addr, addr_t shadow_addr, bool is_real); /* * Adjust the address of a given bridge. This function may also change current * patching. */ Z_API addr_t z_patcher_adjust_bridge_address(Patcher *p, addr_t addr); /* * Show bridge stat */ Z_API void z_patcher_bridge_stats(Patcher *p); /* * Show the number of uncertain patches */ Z_API size_t z_patcher_uncertain_patches_n(Patcher *p); /* * Self correction starts */ Z_API void z_patcher_self_correction_start(Patcher *p); /* * Self correction ends */ Z_API void z_patcher_self_correction_end(Patcher *p); /* * Enable or disable uncertain patches by moving s_iter/e_iter */ Z_API void z_patcher_flip_uncertain_patches(Patcher *p, bool is_s_iter, int64_t off); /* * Basic patching function: patch at the given address and return the original * value if obuf is not NULL. * * Note that this function is unsafe because it allows users to do their own * patches *without* changing the metadata (e.g., bridges) of the patcher. * * Only use it when you are sure your patches are safe. Any crash triggered by * patches from this function cannot be diagnosed and repaired. */ Z_API void z_patcher_unsafe_patch(Patcher *p, addr_t addr, size_t size, const uint8_t *buf, uint8_t *obuf); #endif ================================================ FILE: src/prob_disasm/prob_disasm_complete/dag.c ================================================ /* * dag.c * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ /* * Tarjan data */ typedef struct tarjan_info_t { AddrDict(uint32_t, low); AddrDict(uint32_t, dfn); uint32_t addr_n; } TarjanInfo; /* * Tarjan algorithm to calculate SCC (return low[cur_addr]) */ Z_PRIVATE void __prob_disassembler_tarjan(ProbDisassembler *pd, TarjanInfo *info, GQueue *stack, GHashTable *in_stack, addr_t cur_addr); /* * Bulid DAG using Tarjan algorithm */ Z_PRIVATE void __prob_disassembler_build_dag(ProbDisassembler *pd); Z_PRIVATE void __prob_disassembler_tarjan(ProbDisassembler *pd, TarjanInfo *info, GQueue *stack, GHashTable *in_stack, addr_t cur_addr) { // step [0]. basic info Disassembler *d = pd->base; // step [1]. update low and dfn z_addr_dict_set(info->low, cur_addr, info->addr_n); z_addr_dict_set(info->dfn, cur_addr, info->addr_n); info->addr_n++; // step [2]. push into stack g_queue_push_tail(stack, GSIZE_TO_POINTER(cur_addr)); g_hash_table_insert(in_stack, GSIZE_TO_POINTER(cur_addr), GSIZE_TO_POINTER(1)); // step [3]. get nexts size_t n = 0; addr_t *next_addrs = NULL; if (!__prob_disassembler_get_propogate_successors(pd, cur_addr, &n, &next_addrs)) { EXITME("invalid successors"); } // step [5]. main loop for (size_t i = 0; i < n; i++) { addr_t next_addr = next_addrs[i]; // step [5.1]. check whether next_addr is valid instruction if (!z_disassembler_get_superset_disasm(d, next_addr)) { continue; } // step [5.2]. for non-visited next_addr if (!z_addr_dict_exist(info->low, next_addr)) { assert(!z_addr_dict_exist(info->dfn, next_addr)); __prob_disassembler_tarjan(pd, info, stack, in_stack, next_addr); uint32_t cur_low = z_addr_dict_get(info->low, cur_addr); uint32_t next_low = z_addr_dict_get(info->low, next_addr); if (next_low < cur_low) { z_addr_dict_set(info->low, cur_addr, next_low); } } else if (g_hash_table_lookup(in_stack, GSIZE_TO_POINTER(next_addr))) { uint32_t cur_low = z_addr_dict_get(info->low, cur_addr); uint32_t next_dfn = z_addr_dict_get(info->dfn, next_addr); if (next_dfn < cur_low) { z_addr_dict_set(info->low, cur_addr, next_dfn); } } } // step [6]. get SCC if (z_addr_dict_get(info->dfn, cur_addr) == z_addr_dict_get(info->low, cur_addr)) { uint32_t scc_id = pd->scc_n++; while (!g_queue_is_empty(stack)) { addr_t poped_addr = (addr_t)g_queue_pop_tail(stack); g_hash_table_remove(in_stack, GSIZE_TO_POINTER(poped_addr)); z_addr_dict_set(pd->addr2sccid, poped_addr, scc_id); if (poped_addr == cur_addr) { break; } } } } Z_PRIVATE void __prob_disassembler_build_dag(ProbDisassembler *pd) { /* * step [0]. basic stuff */ Disassembler *d = pd->base; addr_t text_addr = pd->text_addr; size_t text_size = pd->text_size; /* * step [1]. initialization members */ z_addr_dict_init(pd->addr2sccid, pd->text_addr, pd->text_size); pd->scc_n = 1; // XXX: scc_id == 0 is reserved for invalid instructions /* * step [2]. use Tarjan to calculate SCC */ { TarjanInfo *info = z_alloc(1, sizeof(TarjanInfo)); info->addr_n = 0; z_addr_dict_init(info->low, text_addr, text_size); z_addr_dict_init(info->dfn, text_addr, text_size); GQueue *stack = g_queue_new(); /* stack */ GHashTable *in_stack = /* whehter addr is in stack */ g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL); for (addr_t addr = text_addr; addr < text_addr + text_size; addr++) { // check whether addr is handled if (z_addr_dict_exist(pd->addr2sccid, addr)) { assert(z_addr_dict_exist(info->low, addr)); assert(z_addr_dict_exist(info->dfn, addr)); continue; } // check cur_addr is valid if (!z_disassembler_get_superset_disasm(d, addr)) { z_addr_dict_set(pd->addr2sccid, addr, 0); continue; } // do tarjan __prob_disassembler_tarjan(pd, info, stack, in_stack, addr); assert(g_queue_is_empty(stack)); assert(!g_hash_table_size(in_stack)); } z_info("we found %d SCCs in the superset control flow graph", pd->scc_n); // free memory g_hash_table_destroy(in_stack); g_queue_free(stack); z_addr_dict_destroy(info->low); z_addr_dict_destroy(info->dfn); z_free(info); } /* * step [3]. build DAG */ z_addr_dict_init(pd->dag_succs, 0, pd->scc_n); z_addr_dict_init(pd->dag_preds, 0, pd->scc_n); z_addr_dict_init(pd->dag_dead, 0, pd->scc_n); z_addr_dict_init(pd->dag_P, 0, pd->scc_n); AddrDict(uint32_t, dag_preds_n); /* used for toposord */ z_addr_dict_init(dag_preds_n, 0, pd->scc_n); { // step [3.1]. init all necessary members for (uint32_t scc_id = 0; scc_id < pd->scc_n; scc_id++) { z_addr_dict_set(dag_preds_n, scc_id, 0); z_addr_dict_set(pd->dag_succs, scc_id, g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL)); z_addr_dict_set(pd->dag_preds, scc_id, g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL)); } // step [3.2]. construct DAG based on each address's information for (addr_t addr = text_addr; addr < text_addr + text_size; addr++) { // ignore invalid instructions assert(z_addr_dict_exist(pd->addr2sccid, addr)); uint32_t scc_id = z_addr_dict_get(pd->addr2sccid, addr); if (!scc_id) { continue; } // get dag_succs GHashTable *dag_succs = z_addr_dict_get(pd->dag_succs, scc_id); // get succ_addrs size_t n = 0; addr_t *succ_addrs = NULL; if (!__prob_disassembler_get_propogate_successors(pd, addr, &n, &succ_addrs)) { EXITME("invalid successors"); } for (int i = 0; i < n; i++) { addr_t succ_addr = succ_addrs[i]; // check succ_addr is in .text (we cannot know the outside info) // XXX: OUTSIDE LOST already handles this uint32_t succ_scc_id; if (succ_addr < text_addr || succ_addr >= text_addr + text_size) { continue; } assert(z_addr_dict_exist(pd->addr2sccid, succ_addr)); succ_scc_id = z_addr_dict_get(pd->addr2sccid, succ_addr); // and not equal to scc_id if (succ_scc_id == scc_id) { continue; } // check whether succ_scc is in dag_succs and insert if not if (!g_hash_table_lookup(dag_succs, GSIZE_TO_POINTER(succ_scc_id))) { // update dag_succs g_hash_table_insert(dag_succs, GSIZE_TO_POINTER(succ_scc_id), GSIZE_TO_POINTER(1)); // update dag_preds_n z_addr_dict_set( dag_preds_n, succ_scc_id, z_addr_dict_get(dag_preds_n, succ_scc_id) + 1); // update dag_preds g_hash_table_insert( z_addr_dict_get(pd->dag_preds, succ_scc_id), GSIZE_TO_POINTER(scc_id), GSIZE_TO_POINTER(1)); } } } #ifdef DEBUG /* * step [3.3]. check the correctness of DAG */ size_t edge_n = 0; for (uint32_t scc_id = 0; scc_id < pd->scc_n; scc_id++) { assert(z_addr_dict_exist(dag_preds_n, scc_id)); assert(z_addr_dict_exist(pd->dag_succs, scc_id)); assert(z_addr_dict_exist(pd->dag_preds, scc_id)); GHashTable *dag_succs = z_addr_dict_get(pd->dag_succs, scc_id); GHashTable *dag_preds = z_addr_dict_get(pd->dag_preds, scc_id); assert(z_addr_dict_get(dag_preds_n, scc_id) == g_hash_table_size(dag_preds)); GList *list_dag_succs = g_hash_table_get_keys(dag_succs); for (GList *l = list_dag_succs; l != NULL; l = l->next) { edge_n++; uint32_t succ_scc_id = (uint32_t)l->data; assert(g_hash_table_lookup( z_addr_dict_get(pd->dag_preds, succ_scc_id), GSIZE_TO_POINTER(scc_id))); } g_list_free(list_dag_succs); } assert(edge_n); z_info("there are %d edges in contructed DAG", edge_n); #endif } /* * step [4]. topo-sort */ pd->topo = g_queue_new(); { GQueue *queue = g_queue_new(); // first find all nodes without preds for (uint32_t scc_id = 0; scc_id < pd->scc_n; scc_id++) { if (!z_addr_dict_get(dag_preds_n, scc_id)) { g_queue_push_tail(queue, GSIZE_TO_POINTER(scc_id)); } } // get topo while (!g_queue_is_empty(queue)) { uint32_t scc_id = (uint32_t)g_queue_pop_head(queue); g_queue_push_tail(pd->topo, GSIZE_TO_POINTER(scc_id)); GHashTable *dag_succs = z_addr_dict_get(pd->dag_succs, scc_id); GList *list_dag_succs = g_hash_table_get_keys(dag_succs); for (GList *l = list_dag_succs; l != NULL; l = l->next) { uint32_t succ_scc_id = (uint32_t)l->data; assert(z_addr_dict_exist(dag_preds_n, succ_scc_id)); z_addr_dict_set(dag_preds_n, succ_scc_id, z_addr_dict_get(dag_preds_n, succ_scc_id) - 1); if (!z_addr_dict_get(dag_preds_n, succ_scc_id)) { g_queue_push_tail(queue, GSIZE_TO_POINTER(succ_scc_id)); } } g_list_free(list_dag_succs); } assert(g_queue_get_length(pd->topo) == pd->scc_n); g_queue_free(queue); } z_addr_dict_destroy(dag_preds_n); } ================================================ FILE: src/prob_disasm/prob_disasm_complete/hints.c ================================================ /* * hints.c * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ // TODO: it would be more convencing to dynamically update hints /* * Register info */ typedef struct reg_info_t { GPRState gpr; XMMState xmm; YMMState ymm; ZMMState zmm; } RegInfo; /* * Coolect hints from registers' use-def */ Z_PRIVATE void __prob_disassembler_reg_hints_dfs( ProbDisassembler *pd, GHashTable *seen, Buffer *(*get_next)(UCFG_Analyzer *, addr_t), void (*update_info)(ProbDisassembler *, addr_t, RegInfo *), addr_t cur_addr, RegInfo *info, bool is_first_addr); /* * Data length threshold */ #define STRING_LENGTH_THRESHOLD 6 #define VALUE_LENGTH_THRESHOLD 4 #define CONFIDENT_LENGTH_THRESHOLD 100 /* * Code pattern distance */ #define CMP_CJMP_DISTANCE 2 #define ARG_CALL_DISTANCE 2 /* * Collect control-flow-related hints */ Z_PRIVATE void __prob_disassembler_collect_cf_hints(ProbDisassembler *pd); /* * Collect pop-ret hints */ Z_PRIVATE void __prob_disassembler_collect_pop_ret_hints(ProbDisassembler *pd); /* * Collect cmp/test-cjmp hints */ Z_PRIVATE void __prob_disassembler_collect_cmp_cjmp_hints(ProbDisassembler *pd); /* * Collect arg-call hints */ Z_PRIVATE void __prob_disassembler_collect_arg_call_hints(ProbDisassembler *pd); /* * Collect register-related hints */ Z_PRIVATE void __prob_disassembler_collect_reg_hints(ProbDisassembler *pd); /* * Collect string hints */ Z_PRIVATE void __prob_disassembler_collect_str_hints(ProbDisassembler *pd); /* * Collect value hints */ Z_PRIVATE void __prob_disassembler_collect_value_hints(ProbDisassembler *pd); Z_PRIVATE void __prob_disassembler_reg_hints_dfs( ProbDisassembler *pd, GHashTable *seen, Buffer *(*get_next)(UCFG_Analyzer *, addr_t), void (*update_info)(ProbDisassembler *, addr_t, RegInfo *), addr_t cur_addr, RegInfo *info, bool is_first_addr) { Disassembler *d = pd->base; // step [0]. if info in zero, we do not need to go deeper if (!info->gpr && !info->xmm && !info->ymm && !info->zmm) { return; } // step [1]. check cur_addr is valid if (!z_disassembler_get_superset_disasm(d, cur_addr)) { return; } // step [2]. get all necessary information Iter(addr_t, next_addrs); z_iter_init_from_buf(next_addrs, (*get_next)(d->ucfg_analyzer, cur_addr)); // step [3]. collect hints and update next info RegInfo backup_info = *info; if (!is_first_addr) { (*update_info)(pd, cur_addr, info); } // step [4]. go deep while (!z_iter_is_empty(next_addrs)) { addr_t next_addr = *(z_iter_next(next_addrs)); // check seen if (g_hash_table_lookup(seen, GSIZE_TO_POINTER(next_addr))) { continue; } g_hash_table_insert(seen, GSIZE_TO_POINTER(next_addr), GSIZE_TO_POINTER(1)); // deep search __prob_disassembler_reg_hints_dfs(pd, seen, get_next, update_info, next_addr, info, false); } // step [5]. restore info *info = backup_info; } Z_PRIVATE void __prob_disassembler_collect_cf_hints(ProbDisassembler *pd) { // step [0]. create call_/jmp_ targets and other basic information GHashTable *call_targets = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, (GDestroyNotify)(&z_buffer_destroy)); GHashTable *jmp_targets = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, (GDestroyNotify)(&z_buffer_destroy)); Disassembler *d = pd->base; ELF *e = z_binary_get_elf(pd->binary); addr_t text_addr = pd->text_addr; size_t text_size = pd->text_size; addr_t init_addr, fini_addr; size_t init_size, fini_size; if (z_elf_get_shdr_init(e)) { init_addr = z_elf_get_shdr_init(e)->sh_addr; init_size = z_elf_get_shdr_init(e)->sh_size; } else { // if we do not detect .init, we set it as .text init_addr = text_addr; init_size = text_size; } if (z_elf_get_shdr_fini(e)) { fini_addr = z_elf_get_shdr_fini(e)->sh_addr; fini_size = z_elf_get_shdr_fini(e)->sh_size; } else { // if we do not detect .fini, we set it as .text fini_addr = text_addr; fini_size = text_size; } size_t plt_n = z_elf_get_plt_n(e); // step [2]. main loop to check all instruction for (addr_t addr = text_addr; addr < text_addr + text_size; addr++) { // step [2.1]. get corresponding instruction cs_insn *inst = z_disassembler_get_superset_disasm(d, addr); if (!inst) { continue; } // step [2.2]. check the instruction only has one imm operand cs_detail *detail = inst->detail; if ((detail->x86.op_count != 1) || (detail->x86.operands[0].type != X86_OP_IMM)) { continue; } // step [2.3]. handle different cf transfer instruction addr_t target = detail->x86.operands[0].imm; #define __COLLECT_CF_TARGET(TYPE, plt_check, targets) \ do { \ /* pre-check invalid prefix */ \ if (*((uint32_t *)(inst->detail->x86.prefix))) { \ z_trace("find invalid prefix: " CS_SHOW_INST(inst)); \ continue; \ } \ \ /* additional check for invalid prefix: HUG capstone */ \ KS_ASM(inst->address, "%s %s", inst->mnemonic, inst->op_str); \ if (ks_size != inst->size) { \ z_trace("find invalid prefix: " CS_SHOW_INST(inst)); \ continue; \ } \ \ /* check PLT transfer */ \ if (z_elf_get_plt_info(e, target)) { \ /* for PLT transfer, we have further check */ \ if (plt_check) { \ z_trace("find PLT " #TYPE ": " CS_SHOW_INST(inst)); \ __prob_disassembler_update_inst_hint( \ pd, addr, HINT(PLT_##TYPE, BASE_CF(inst) * plt_n)); \ } \ continue; \ } \ \ /* check outsider transfer */ \ if ((target < text_addr || target >= text_addr + text_size) && \ (target < init_addr || target >= init_addr + init_size) && \ (target < fini_addr || target >= fini_addr + fini_size)) { \ z_trace("find outside " #TYPE ": " CS_SHOW_INST(inst)); \ __prob_disassembler_update_inst_lost( \ pd, addr, LOST(OUTSIDE_##TYPE, BASE_CF(inst) * text_size)); \ continue; \ } \ \ /* check target is valid */ \ if (!z_disassembler_get_superset_disasm(d, target)) { \ continue; \ } \ \ /* check it does not jump into its next instruction */ \ if (target == inst->address + inst->size) { \ continue; \ } \ \ /* maintain a relation from dst address to src address */ \ Buffer *dst2src = \ g_hash_table_lookup((targets), GSIZE_TO_POINTER(target)); \ if (!dst2src) { \ dst2src = z_buffer_create(NULL, 0); \ g_hash_table_insert((targets), GSIZE_TO_POINTER(target), \ (gpointer)(dst2src)); \ } \ z_buffer_append_raw(dst2src, (uint8_t *)&addr, sizeof(addr)); \ } while (0) if (z_capstone_is_call(inst)) { __COLLECT_CF_TARGET(CALL, inst->size == 5, call_targets); } else if (z_capstone_is_jmp(inst) || z_capstone_is_cjmp(inst)) { __COLLECT_CF_TARGET( JMP, (inst->size == 5 && z_capstone_is_jmp(inst)), jmp_targets); } #undef __COLLECT_CF_TARGET } // step [3]. collect hints from converged calls for (addr_t addr = text_addr; addr < text_addr + text_size; addr++) { Buffer *callers_buf = (Buffer *)g_hash_table_lookup(call_targets, GSIZE_TO_POINTER(addr)); if (!callers_buf) { continue; } Iter(addr_t, callers); z_iter_init_from_buf(callers, callers_buf); assert(!z_iter_is_empty(callers)); if (z_iter_get_size(callers) == 1) { continue; } while (!z_iter_is_empty(callers)) { addr_t caller = *(z_iter_next(callers)); cs_insn *caller_inst = z_disassembler_get_superset_disasm(d, caller); assert(caller_inst); __prob_disassembler_update_inst_hint( pd, caller, HINT(CONVERGED_CALL, BASE_CF(caller_inst) / (z_iter_get_size(callers) - 1))); } } g_hash_table_destroy(call_targets); // step [4]. collect hints from converged jumps and cross jumps for (addr_t addr = text_addr; addr < text_addr + text_size; addr++) { Buffer *jmp_sources_buf = (Buffer *)g_hash_table_lookup(jmp_targets, GSIZE_TO_POINTER(addr)); if (!jmp_sources_buf) { continue; } Iter(addr_t, jmp_sources); z_iter_init_from_buf(jmp_sources, jmp_sources_buf); assert(!z_iter_is_empty(jmp_sources)); // step [4.1]. collect hints from converged jumps size_t jmp_sources_n = z_iter_get_size(jmp_sources); if (jmp_sources_n > 1) { while (!z_iter_is_empty(jmp_sources)) { addr_t jmp_source = *(z_iter_next(jmp_sources)); cs_insn *jmp_source_inst = z_disassembler_get_superset_disasm(d, jmp_source); assert(jmp_source_inst); __prob_disassembler_update_inst_hint( pd, jmp_source, HINT(CONVERGED_JMP, BASE_CF(jmp_source_inst) / (jmp_sources_n - 1))); } } // step [4.2]. collect hints from crossed jumps assert(addr > 7); // As the longest jump, which we will consider, is 7-bytes for (size_t pred = addr - 7; pred < addr; pred++) { // get predecessors cs_insn *pred_inst = z_disassembler_get_superset_disasm(d, pred); if (!pred_inst) { goto NEXT_PRED; } // check cross if (pred + pred_inst->size != addr) { goto NEXT_PRED; } // check pred is jmp and cjmp if (!z_capstone_is_jmp(pred_inst) && !z_capstone_is_cjmp(pred_inst)) { goto NEXT_PRED; } // check pred's succs are valid Iter(addr_t, pred_succs); z_iter_init_from_buf( pred_succs, z_ucfg_analyzer_get_direct_successors(d->ucfg_analyzer, pred)); while (!z_iter_is_empty(pred_succs)) { addr_t pred_succ = *(z_iter_next(pred_succs)); if (!z_disassembler_get_superset_disasm(d, pred_succ)) { goto NEXT_PRED; } } // collect hints for pred, where we assume most crossed jump is only // 1-byte __prob_disassembler_update_inst_hint( pd, pred, HINT(CROSSED_JMP, BASE_CF_RAW(1) / jmp_sources_n)); // collect hints for jump sources z_iter_reset(jmp_sources); while (!z_iter_is_empty(jmp_sources)) { addr_t jmp_source = *(z_iter_next(jmp_sources)); z_trace("find crossed JMP: %#lx - %#lx", pred, jmp_source); cs_insn *jmp_source_inst = z_disassembler_get_superset_disasm(d, jmp_source); assert(jmp_source_inst); __prob_disassembler_update_inst_hint( pd, jmp_source, HINT(CROSSED_JMP, BASE_CF(jmp_source_inst) / jmp_sources_n)); } NEXT_PRED:; } } g_hash_table_destroy(jmp_targets); } /* * Functions for updating info. * Note that following two functions will only be used during dfs */ Z_PRIVATE void __update_info_for_usedef_reg_hint(ProbDisassembler *pd, addr_t addr, RegInfo *info) { Disassembler *d = pd->base; RegState *rs = z_ucfg_analyzer_get_register_state(d->ucfg_analyzer, addr); assert(rs); if (rs->gpr_write_32_64 & info->gpr) { __prob_disassembler_update_inst_hint(pd, addr, HINT(USEDEF_GPR, BASE_REG)); info->gpr &= (~rs->gpr_write_32_64); } #define __SSE_TEMPLATE(T) \ do { \ if (rs->T##_write & info->T) { \ __prob_disassembler_update_inst_hint(pd, addr, \ HINT(USEDEF_SSE, BASE_REG)); \ info->T &= (~rs->T##_write); \ } \ } while (0) __SSE_TEMPLATE(xmm); __SSE_TEMPLATE(ymm); __SSE_TEMPLATE(zmm); #undef __SSE_TEMPLATE } Z_PRIVATE void __update_info_for_killed_reg_hint(ProbDisassembler *pd, addr_t addr, RegInfo *info) { Disassembler *d = pd->base; RegState *rs = z_ucfg_analyzer_get_register_state(d->ucfg_analyzer, addr); assert(rs); if (rs->gpr_write_32_64 & info->gpr) { __prob_disassembler_update_inst_lost(pd, addr, LOST(KILLED_GPR, BASE_REG)); info->gpr &= (~rs->gpr_write_32_64); } if (rs->gpr_read_32_64 & info->gpr) { info->gpr &= (~rs->gpr_read_32_64); } #define __SSE_TEMPLATE(T) \ do { \ if (rs->T##_write & info->T) { \ __prob_disassembler_update_inst_lost(pd, addr, \ LOST(KILLED_SSE, BASE_REG)); \ info->T &= (~rs->T##_write); \ } \ if (rs->T##_read & info->T) { \ info->T &= (~rs->T##_read); \ } \ } while (0) __SSE_TEMPLATE(xmm); __SSE_TEMPLATE(ymm); __SSE_TEMPLATE(zmm); #undef __SSE_TEMPLATE } Z_PRIVATE void __prob_disassembler_collect_reg_hints(ProbDisassembler *pd) { Disassembler *d = pd->base; addr_t text_addr = pd->text_addr; size_t text_size = pd->text_size; GHashTable *seen = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL); RegInfo info = {}; for (addr_t addr = text_addr; addr < text_addr + text_size; addr++) { RegState *rs = z_ucfg_analyzer_get_register_state(d->ucfg_analyzer, addr); if (!rs) { continue; } /* * step [1]. get use-def hints */ info.gpr = rs->gpr_read_32_64; info.xmm = rs->xmm_read; info.ymm = rs->ymm_read; info.zmm = rs->zmm_read; g_hash_table_remove_all(seen); g_hash_table_insert(seen, GSIZE_TO_POINTER(addr), GSIZE_TO_POINTER(1)); __prob_disassembler_reg_hints_dfs( pd, seen, &z_ucfg_analyzer_get_direct_predecessors, &__update_info_for_usedef_reg_hint, addr, &info, true); /* * step [2]. get killed hints */ info.gpr = rs->gpr_write_32_64 & (~rs->gpr_read_32_64); info.xmm = rs->xmm_write & (~rs->xmm_read); info.ymm = rs->ymm_write & (~rs->ymm_read); info.zmm = rs->zmm_write & (~rs->zmm_read); g_hash_table_remove_all(seen); g_hash_table_insert(seen, GSIZE_TO_POINTER(addr), GSIZE_TO_POINTER(1)); __prob_disassembler_reg_hints_dfs( pd, seen, &z_ucfg_analyzer_get_direct_predecessors, &__update_info_for_killed_reg_hint, addr, &info, true); } } Z_PRIVATE void __prob_disassembler_collect_pop_ret_hints(ProbDisassembler *pd) { Disassembler *d = pd->base; addr_t text_addr = pd->text_addr; size_t text_size = pd->text_size; for (addr_t addr = text_addr; addr < text_addr + text_size; addr++) { cs_insn *inst = z_disassembler_get_superset_disasm(d, addr); if (!inst) { continue; } if (inst->id != X86_INS_POP) { continue; } size_t pop_n = 0; addr_t cur_addr = addr; cs_insn *cur_inst = inst; bool pop_ret = false; while (true) { pop_n += 1; cur_addr += cur_inst->size; cur_inst = z_disassembler_get_superset_disasm(d, cur_addr); if (!cur_inst) { break; } if (cur_inst->id == X86_INS_RET) { pop_ret = true; break; } if (cur_inst->id != X86_INS_POP) { break; } } if (!pop_ret) { continue; } z_trace("find %d pop at %#lx", pop_n, addr); __prob_disassembler_update_inst_hint(pd, addr, HINT(POP_RET, BASE_REG / pop_n)); } } Z_PRIVATE void __prob_disassembler_collect_str_hints(ProbDisassembler *pd) { addr_t text_addr = pd->text_addr; size_t text_size = pd->text_size; ELF *e = z_binary_get_elf(pd->binary); Rptr *text_ptr = z_elf_vaddr2ptr(e, text_addr); // collect all string-like hints addr_t prev_string = INVALID_ADDR; addr_t prev_null = INVALID_ADDR; for (addr_t addr = text_addr; addr < text_addr + text_size; addr++) { uint8_t c = *(z_rptr_get_ptr(text_ptr, uint8_t)); if (!c) { if (prev_string != INVALID_ADDR) { // we ignore null during string scanning prev_null = addr; } } else if (isprint(c)) { if (prev_string == INVALID_ADDR) { prev_string = addr; prev_null = INVALID_ADDR; } } else { if (prev_string != INVALID_ADDR && prev_null != INVALID_ADDR) { assert(prev_null > prev_string); size_t n = prev_null - prev_string; if (n > STRING_LENGTH_THRESHOLD) { z_trace("find string starting from %#lx with %d bytes", prev_string, n); double128_t hint; if (n < CONFIDENT_LENGTH_THRESHOLD) { hint = HINT(STRING, BASE_STRING(n)); } else { hint = +INFINITY; } for (addr_t cur_addr = prev_string; cur_addr <= prev_null; cur_addr++) { __prob_disassembler_update_data_hint(pd, cur_addr, hint); } } } prev_string = INVALID_ADDR; prev_null = INVALID_ADDR; } z_rptr_inc(text_ptr, uint8_t, 1); } z_rptr_destroy(text_ptr); } Z_PRIVATE void __prob_disassembler_collect_value_hints(ProbDisassembler *pd) { /* * Macro to collect continuous numerical number: * T: type (int16_t, int32_t, int64_t) * B: bit offset of size (1, 2, 3) * L: length threshold * C: count zero and 0xff */ #define __COLLECT_VALUE_HINTS(T, B, L, C) \ do { \ assert(sizeof(T) == (1 << B)); \ \ addr_t text_addr = pd->text_addr; \ size_t text_size = pd->text_size; \ double128_t threshold = __pow_in_4(0x100, (B)); \ z_trace("threshold: %Lf", threshold); \ \ /* alignment */ \ text_size = BITS_ALIGN_FLOOR(text_addr + text_size, (B)); \ text_addr = BITS_ALIGN_CELL(text_addr, (B)); \ text_size -= text_addr; \ z_trace("aligned range: [%#lx, %#lx]", text_addr, \ text_addr + text_size - 1); \ assert(!(text_addr % sizeof(T))); \ assert(!(text_size % sizeof(T))); \ \ ELF *e = z_binary_get_elf(pd->binary); \ Rptr *text_ptr = z_elf_vaddr2ptr(e, text_addr); \ \ /* collect continued likely numerical value */ \ addr_t numerical_addr = INVALID_ADDR; \ double128_t numerical_val = 0.0; \ for (addr_t addr = text_addr; addr < text_addr + text_size; \ addr += sizeof(T)) { \ T val = *(z_rptr_get_ptr(text_ptr, T)); \ double128_t val_f = (double128_t)val; \ size_t n = (addr - numerical_addr) >> (B); \ \ if (numerical_addr == INVALID_ADDR) { \ /* the first value */ \ numerical_addr = addr; \ numerical_val = val_f; \ } else if ((!(C)) && (val == 0 || val == -1)) { \ /* we ignore 0 and 0xfff..ff. Hence, do nothing. */ \ } else if (fabsl(numerical_val - val_f) < threshold) { \ /* valid numerical number */ \ numerical_val = \ (numerical_val / (n + 1)) * n + (val_f / (n + 1)); \ } else { \ if (n > (L)) { \ z_trace( \ "find %d-byte numerical array from %#lx with %d " \ "elements (mean: %.2Lf)", \ sizeof(T), numerical_addr, n, numerical_val); \ double128_t hint; \ if (n < CONFIDENT_LENGTH_THRESHOLD) { \ hint = HINT(VALUE, \ BASE_VALUE(1 << (B), threshold * 2, n)); \ } else { \ hint = +INFINITY; \ } \ for (addr_t cur_addr = numerical_addr; cur_addr < addr; \ cur_addr++) { \ __prob_disassembler_update_data_hint(pd, cur_addr, \ hint); \ } \ } \ \ numerical_addr = addr; \ numerical_val = val_f; \ } \ \ z_rptr_inc(text_ptr, T, 1); \ } \ \ z_rptr_destroy(text_ptr); \ } while (0) __COLLECT_VALUE_HINTS(int8_t, 0, VALUE_LENGTH_THRESHOLD << 2, true); __COLLECT_VALUE_HINTS(int16_t, 1, VALUE_LENGTH_THRESHOLD << 2, false); __COLLECT_VALUE_HINTS(int32_t, 2, VALUE_LENGTH_THRESHOLD << 1, false); __COLLECT_VALUE_HINTS(int64_t, 3, VALUE_LENGTH_THRESHOLD << 0, false); #undef __COLLECT_VALUE_HINTS } Z_PRIVATE void __prob_disassembler_collect_cmp_cjmp_hints( ProbDisassembler *pd) { Disassembler *d = pd->base; addr_t text_addr = pd->text_addr; size_t text_size = pd->text_size; for (addr_t addr = text_addr; addr < text_addr + text_size; addr++) { cs_insn *inst = z_disassembler_get_superset_disasm(d, addr); // check valid if (!inst) { continue; } // check cmp and test if (inst->id != X86_INS_TEST && inst->id != X86_INS_CMP) { continue; } // try to find a cjmp within CMP_CJMP_DISTANCE bool found_cjmp = false; addr_t cur_addr = addr; cs_insn *cur_inst = inst; Iter(addr_t, succ_addrs); for (size_t i = 0; i < CMP_CJMP_DISTANCE; i++) { z_iter_init_from_buf( succ_addrs, z_disassembler_get_direct_successors(d, cur_addr)); if (z_iter_get_size(succ_addrs) != 1) { break; } addr_t succ_addr = *(z_iter_next(succ_addrs)); if (succ_addr != cur_addr + cur_inst->size) { break; } // switch into next address cur_addr = succ_addr; cur_inst = z_disassembler_get_superset_disasm(d, cur_addr); if (!cur_inst) { break; } if (z_capstone_is_cjmp(cur_inst)) { found_cjmp = true; break; } } if (found_cjmp) { z_trace("find cmp-cjmp pattern at %#lx - %#lx", addr, cur_addr); __prob_disassembler_update_inst_hint( pd, addr, HINT(CMP_CJMP, __pow_in_4(BASE_INS, 2))); } } } Z_PRIVATE void __prob_disassembler_collect_arg_call_hints( ProbDisassembler *pd) { Disassembler *d = pd->base; addr_t text_addr = pd->text_addr; size_t text_size = pd->text_size; for (addr_t addr = text_addr; addr < text_addr + text_size; addr++) { cs_insn *inst = z_disassembler_get_superset_disasm(d, addr); // check valid if (!inst) { continue; } // check mov if (inst->id != X86_INS_MOV) { continue; } // check the rdi and rsi cs_detail *detail = inst->detail; if (detail->x86.operands[0].type != X86_OP_REG) { continue; } if (detail->x86.operands[0].reg != X86_REG_RDI && detail->x86.operands[0].reg != X86_REG_RSI) { continue; } // try to find a call within ARG_CALL_DISTANCE bool found_call = false; addr_t cur_addr = addr; cs_insn *cur_inst = inst; Iter(addr_t, succ_addrs); for (size_t i = 0; i < ARG_CALL_DISTANCE; i++) { z_iter_init_from_buf( succ_addrs, z_disassembler_get_direct_successors(d, cur_addr)); if (z_iter_get_size(succ_addrs) != 1) { break; } addr_t succ_addr = *(z_iter_next(succ_addrs)); if (succ_addr != cur_addr + cur_inst->size) { break; } // switch into next address cur_addr = succ_addr; cur_inst = z_disassembler_get_superset_disasm(d, cur_addr); if (!cur_inst) { break; } if (z_capstone_is_call(cur_inst)) { found_call = true; break; } } if (found_call) { z_trace("find arg-call pattern at %#lx - %#lx", addr, cur_addr); __prob_disassembler_update_inst_hint( pd, addr, HINT(ARG_CALL, __pow_in_4(BASE_INS, 2))); } } } ================================================ FILE: src/prob_disasm/prob_disasm_complete/propagation.c ================================================ /* * propagation.c * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ /* * Propogate instruction hints */ Z_PRIVATE void __prob_disassembler_propogate_inst_hints(ProbDisassembler *pd); Z_PRIVATE void __prob_disassembler_propogate_inst_hints(ProbDisassembler *pd) { // step [0]. basic information Disassembler *d = pd->base; addr_t text_addr = pd->text_addr; size_t text_size = pd->text_size; // step [1]. aggregate all hints within a SCC AddrDict(double128_t, dag_hints); z_addr_dict_init(dag_hints, 0, pd->scc_n); // XXX: invalid_sccs means those SCCs whose likelihook of being instructions // is quite small. Hence, we stop propogation when reaching them. Note that // it is different from those SCCs in pd->dag_dead which are 100% not // instruction boundaries. AddrDictFast(bool, invalid_sccs); z_addr_dict_init(invalid_sccs, 0, pd->scc_n); for (addr_t addr = text_addr; addr < text_addr + text_size; addr++) { // check addr is valid uint32_t scc_id = z_addr_dict_get(pd->addr2sccid, addr); if (!scc_id) { continue; } // check invalid_scc if (z_addr_dict_exist(pd->dag_P, scc_id) && z_addr_dict_get(pd->dag_P, scc_id) < PROPAGATE_P) { if (!z_addr_dict_exist(invalid_sccs, scc_id)) { z_addr_dict_set(invalid_sccs, scc_id, true); z_addr_dict_set(dag_hints, scc_id, 1.0); } continue; } // we do not use hints of very rare instructions // TODO: get a instruction distribution to weaken the hints instead of // directly disabling it. cs_insn *inst = z_disassembler_get_superset_disasm(d, addr); if (z_capstone_is_rare(inst)) { continue; } // update aggragated hints double128_t addr_hint = NAN; if (__prob_disassembler_get_H(pd, addr, &addr_hint)) { if (!z_addr_dict_exist(dag_hints, scc_id)) { // new hints z_addr_dict_set(dag_hints, scc_id, addr_hint); } else { z_addr_dict_set(dag_hints, scc_id, z_addr_dict_get(dag_hints, scc_id) * addr_hint); } } } // step [2]. find all predecessors of invalid scc (only for first round) if (!pd->round_n) { GQueue *queue = g_queue_new(); g_queue_push_tail(queue, GSIZE_TO_POINTER(0)); AddrDictFast(bool, seen); z_addr_dict_init(seen, 0, pd->scc_n); z_addr_dict_set(seen, 0, true); while (!g_queue_is_empty(queue)) { uint32_t scc_id = (uint32_t)g_queue_pop_head(queue); // update dag_hints and invalid_sccs z_addr_dict_set(pd->dag_dead, scc_id, true); z_addr_dict_set(invalid_sccs, scc_id, true); z_addr_dict_set(dag_hints, scc_id, 1.0); // find predecessors GHashTable *dag_preds = z_addr_dict_get(pd->dag_preds, scc_id); GList *list_dag_preds = g_hash_table_get_keys(dag_preds); for (GList *l = list_dag_preds; l != NULL; l = l->next) { uint32_t pred_scc_id = (uint32_t)l->data; if (z_addr_dict_exist(seen, pred_scc_id)) { continue; } z_addr_dict_set(seen, pred_scc_id, true); g_queue_push_tail(queue, GSIZE_TO_POINTER(pred_scc_id)); } g_list_free(list_dag_preds); } g_queue_free(queue); z_addr_dict_destroy(seen); } // step [3]. propogate hints for (GList *l = pd->topo->head; l != NULL; l = l->next) { uint32_t scc_id = (uint32_t)l->data; // check scc without any hint if (!z_addr_dict_exist(dag_hints, scc_id)) { continue; } // check invalid scc. If so, stop propagation. if (z_addr_dict_exist(invalid_sccs, scc_id)) { continue; } // get hints double128_t scc_hint = z_addr_dict_get(dag_hints, scc_id); // propogate hints GHashTable *dag_succs = z_addr_dict_get(pd->dag_succs, scc_id); GList *list_dag_succs = g_hash_table_get_keys(dag_succs); for (GList *ll = list_dag_succs; ll != NULL; ll = ll->next) { uint32_t succ_scc_id = (uint32_t)ll->data; if (!z_addr_dict_exist(dag_hints, succ_scc_id)) { z_addr_dict_set(dag_hints, succ_scc_id, scc_hint); } else { z_addr_dict_set( dag_hints, succ_scc_id, z_addr_dict_get(dag_hints, succ_scc_id) * scc_hint); } } g_list_free(list_dag_succs); } z_addr_dict_destroy(invalid_sccs); // step [4]. update RH for each address for (addr_t addr = text_addr; addr < text_addr + text_size; addr++) { // ignore invalid instruction uint32_t scc_id = z_addr_dict_get(pd->addr2sccid, addr); if (!scc_id) { continue; } if (!z_addr_dict_exist(dag_hints, scc_id)) { continue; } double128_t scc_hint = z_addr_dict_get(dag_hints, scc_id); __prob_disassembler_update_RH(pd, addr, scc_hint); } z_addr_dict_destroy(dag_hints); } ================================================ FILE: src/prob_disasm/prob_disasm_complete/solving.c ================================================ /* * solving.c * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ /* * Normalize probabilities */ Z_PRIVATE void __prob_disassembler_normalize_prob(ProbDisassembler *pd); /* * Restrain probabilities based on control flow constrains */ Z_PRIVATE void __prob_disassembler_restrain_prob(ProbDisassembler *pd); /* * Spread hints to occluded instructions */ Z_PRIVATE void __prob_disassembler_spread_hints(ProbDisassembler *pd); #define __DECLARE_RESTRAIN(T, op) \ Z_PRIVATE void __prob_disassembler_restrain_##T(ProbDisassembler *pd) { \ addr_t text_addr = pd->text_addr; \ size_t text_size = pd->text_size; \ \ /* step [1]. calculate better T for each scc */ \ AddrDict(double128_t, dag_better); \ z_addr_dict_init(dag_better, 0, pd->scc_n); \ for (addr_t addr = text_addr; addr < text_addr + text_size; addr++) { \ double128_t T = NAN; \ __prob_disassembler_get_##T(pd, addr, &T); \ assert(!isnan(T)); \ \ uint32_t scc_id = z_addr_dict_get(pd->addr2sccid, addr); \ \ if (z_addr_dict_exist(dag_better, scc_id)) { \ double128_t T##_ = z_addr_dict_get(dag_better, scc_id); \ if (T op T##_) { \ z_addr_dict_set(dag_better, scc_id, T); \ } \ } else { \ z_addr_dict_set(dag_better, scc_id, T); \ } \ } \ \ /* step [2]. restrain T */ \ for (GList *l = pd->topo->tail; l != NULL; l = l->prev) { \ uint32_t scc_id = (uint32_t)l->data; \ \ assert(z_addr_dict_exist(dag_better, scc_id)); \ \ double128_t T = z_addr_dict_get(dag_better, scc_id); \ \ GHashTable *pred_scc_ids = z_addr_dict_get(pd->dag_preds, scc_id); \ GList *list_pred_scc_ids = g_hash_table_get_keys(pred_scc_ids); \ for (GList *ll = list_pred_scc_ids; ll != NULL; ll = ll->next) { \ uint32_t pred_scc_id = (uint32_t)ll->data; \ \ double128_t pred_##T = \ z_addr_dict_get(dag_better, pred_scc_id); \ \ if (T op pred_##T) { \ z_addr_dict_set(dag_better, pred_scc_id, T); \ } \ } \ g_list_free(list_pred_scc_ids); \ } \ \ /* step [3]. reassign T for each address */ \ for (addr_t addr = text_addr; addr < text_addr + text_size; addr++) { \ uint32_t scc_id = z_addr_dict_get(pd->addr2sccid, addr); \ \ assert(z_addr_dict_exist(dag_better, scc_id)); \ \ __prob_disassembler_reset_##T( \ pd, addr, z_addr_dict_get(dag_better, scc_id)); \ } \ \ z_addr_dict_destroy(dag_better); \ } __DECLARE_RESTRAIN(D, >); __DECLARE_RESTRAIN(P, <); #undef __DECLARE_RESTRAIN Z_PRIVATE void __prob_disassembler_normalize_prob(ProbDisassembler *pd) { Disassembler *d = pd->base; addr_t text_addr = pd->text_addr; size_t text_size = pd->text_size; for (addr_t addr = text_addr; addr < text_addr + text_size; addr++) { double128_t D = NAN; __prob_disassembler_get_D(pd, addr, &D); assert(!isnan(D)); // check P first to make sure a 100% data is still data double128_t P = NAN; if (__prob_disassembler_get_P(pd, addr, &P)) { if (__double128_equal(P, 0.0)) { continue; } } if (__double128_equal(D, 1.0)) { __prob_disassembler_reset_P(pd, addr, 0.0); continue; } if (__double128_equal(D, 0.0)) { __prob_disassembler_reset_P(pd, addr, 1.0); continue; } double128_t s = 1.0 / D; if (isinf(s)) { __prob_disassembler_reset_P(pd, addr, 1.0); continue; } Iter(addr_t, occ_addrs); z_iter_init_from_buf(occ_addrs, z_disassembler_get_occluded_addrs(d, addr)); while (!z_iter_is_empty(occ_addrs)) { addr_t occ_addr = *(z_iter_next(occ_addrs)); double128_t occ_D = NAN; __prob_disassembler_get_D(pd, occ_addr, &occ_D); assert(!isnan(occ_D)); if (__double128_equal(occ_D, 0.0)) { s = +INFINITY; } else { s += 1.0 / occ_D; } } assert(!isnan(s)); double128_t final_P = (1.0 / D) / s; assert(!isnan(final_P)); if (!isnan(P)) { size_t n = pd->round_n; assert(n); final_P = (final_P / (n + 1)) * n + P / (n + 1); } __prob_disassembler_reset_P(pd, addr, final_P); } __prob_disassembler_restrain_P(pd); } Z_PRIVATE void __prob_disassembler_restrain_prob(ProbDisassembler *pd) { __prob_disassembler_restrain_D(pd); } Z_PRIVATE void __prob_disassembler_spread_hints(ProbDisassembler *pd) { Disassembler *d = pd->base; addr_t text_addr = pd->text_addr; size_t text_size = pd->text_size; // step [1]. use RH to update D, and reset any D bigger than 1.0 as 1.0 for (addr_t addr = text_addr; addr < text_addr + text_size; addr++) { double128_t RH = NAN; if (__prob_disassembler_get_RH(pd, addr, &RH)) { __prob_disassembler_update_D(pd, addr, RH); } cs_insn *inst = z_disassembler_get_superset_disasm(d, addr); if (!inst) { assert(isnan(RH) || isinf(RH)); // we may update inst_lost as +inf __prob_disassembler_reset_D(pd, addr, 1.0); } double128_t D = NAN; if (__prob_disassembler_get_D(pd, addr, &D)) { // XXX: when D is nan or inf, it means addr has a very strong data // hint and a strong inst hint. As we are trying to avoid false // postive, in this case, we will set it as data. if (isnan(D) || isinf(D) || D > 1.0) { __prob_disassembler_reset_D(pd, addr, 1.0); } } } // step [2]. spread D into occluded instructions for (addr_t addr = text_addr; addr < text_addr + text_size; addr++) { double128_t min_D = NAN; // ignore the ones already with D value if (__prob_disassembler_get_D(pd, addr, &min_D)) { continue; } assert(z_disassembler_get_occluded_addrs(d, addr)); Iter(addr_t, occ_addrs); z_iter_init_from_buf(occ_addrs, z_disassembler_get_occluded_addrs(d, addr)); while (!z_iter_is_empty(occ_addrs)) { addr_t occ_addr = *(z_iter_next(occ_addrs)); double128_t D = NAN; if (__prob_disassembler_get_D(pd, occ_addr, &D)) { if (isnan(min_D) || D < min_D) { min_D = D; } } } // XXX: note here, for a given address, if all addresses occluded with // it are 100% data, it should be data. (the threshold 1.0 can be // changed in the future -- maybe) // TODO: the logic here is weird. if (isnan(min_D) || __double128_equal(min_D, 1.0)) { __prob_disassembler_reset_D(pd, addr, 1.0); } else { __prob_disassembler_reset_D(pd, addr, 1.0 - min_D); } } } ================================================ FILE: src/prob_disasm/prob_disasm_complete.c ================================================ /* * prob_disasm_complete.c * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include "../buffer.h" #include "../disassembler.h" #include "../iterator.h" #include "../restricted_ptr.h" #include #include typedef enum dynamic_hint_type_t { DHINT_NONE = 0, DHINT_CODE = 1, // XXX: we skip 0 for easy use of GHashTable DHINT_DATA, } DHintType; typedef struct dynamic_hint_t { addr_t addr; DHintType type; } DHint; /////////////////////////////////// // ProbDisassembler /////////////////////////////////// STRUCT(ProbDisassembler, { // pointer to Disassembler, so that we can call some functions of // Disassembler (it looks like inheritance but not really) Disassembler *base; AddrDict(double128_t, H); AddrDict(double128_t, RH); AddrDict(double128_t, P); AddrDict(double128_t, D); AddrDict(double128_t, inst_lost); AddrDict(double128_t, data_hint); // basic information Binary *binary; addr_t text_addr; size_t text_size; // logged dynamic hints (i.e., certain code/data information collected // during previous runs) const char *dhint_filename; GHashTable *dynamic_hints; // DAG information // TODO: we should do this for other address-keyed hash table. uint32_t scc_n; AddrDict(uint32_t, addr2sccid); AddrDictFast(GHashTable *, dag_succs); AddrDictFast(GHashTable *, dag_preds); AddrDictFast(bool, dag_dead); GQueue *topo; AddrDict(double128_t, dag_P); // how many round we have played size_t round_n; }); #define __GET_PDISASM(d) ((ProbDisassembler *)((d)->prob_disasm)) #define __SET_PDISASM(d, v) \ do { \ (d)->prob_disasm = (PhantomType *)(v); \ } while (0) #define INIT_ROUND_N 3 #define PROPAGATE_P 0.1 #define STRONG_DATA_HINT 1e52 /////////////////////////////////// // All hints and losts value /////////////////////////////////// // base #define __BASE_CF (1.0 / 256.0) #define __BASE_REG (1.0 / 16.0) #define __BASE_INS (1.0 / 502.0) // it is naively/semi-randomly picked by me #define __BASE_PRINTABLE_CHAR (256.0 / 95.0) #define __BASE_VALUE (256.0) #define BASE_CF(INST) \ (__pow_in_4(__BASE_CF, (INST)->detail->x86.encoding.imm_size)) #define BASE_CF_RAW(N) (__pow_in_4(__BASE_CF, (N))) #define BASE_REG (__BASE_REG) #define BASE_INS (__BASE_INS) #define BASE_STRING(N) (__pow_in_n(__BASE_PRINTABLE_CHAR, (N))) #define BASE_VALUE(L, R, N) \ (__pow_in_n(__pow_in_n(__BASE_VALUE, (L)) / (R), (N))) // hint weights: bigger weight means higher confidence #define __HINT_PLT_CALL_WEIGHT (100000.0) #define __HINT_PLT_JMP_WEIGHT (0.5) #define __HINT_CONVERGED_CALL_WEIGHT (1.0) #define __HINT_CONVERGED_JMP_WEIGHT (1.0) #define __HINT_CROSSED_JMP_WEIGHT (1.0) #define __HINT_USEDEF_GPR_WEIGHT (1.0) #define __HINT_USEDEF_SSE_WEIGHT (0.5) #define __HINT_POP_RET_WEIGHT (1.0) #define __HINT_CMP_CJMP_WEIGHT (1.0) #define __HINT_ARG_CALL_WEIGHT (1.0) // data hint is different, higher means lower confidence #define __HINT_STRING_WEIGHT \ (0.00001 * (1.0 / 256.0)) // TODO: check the string is valid instead of // assigning a very small weight #define __HINT_VALUE_WEIGHT (1.0) // hint functions #define HINT(TYPE, BASE) ((1.0 / (__HINT_##TYPE##_WEIGHT)) * (BASE)) // lost weights: bigger weight means higher confidence #define __LOST_OUTSIDE_CALL_WEIGHT (+INFINITY) #define __LOST_OUTSIDE_JMP_WEIGHT (+INFINITY) #define __LOST_KILLED_GPR_WEIGHT (1.0) #define __LOST_KILLED_SSE_WEIGHT (2.0) // lost functions #define LOST(TYPE, BASE) ((__LOST_##TYPE##_WEIGHT) * (1.0 / (BASE))) /////////////////////////////////// // Useful functions /////////////////////////////////// /* * Securely check whether two double128_t variables are equal */ Z_PRIVATE bool __double128_equal(double128_t a, double128_t b) { double128_t max_val = (fabsl(a) > fabsl(b) ? fabsl(a) : fabsl(b)); return (fabsl(a - b) <= max_val * LDBL_EPSILON); } /* * simple function to calculate pow */ Z_PRIVATE double128_t __pow_in_4(double128_t base, size_t n) { double128_t res = base; switch (n) { case 4: res = res * res; case 2: res = res * res; break; case 3: res = res * res * res; case 1: break; case 0: if (__double128_equal(base, 0.0)) { res = NAN; } else { res = 1.0; } break; default: EXITME("invalid pow: %d", n); } return res; } /* * fast function to calculate pow when n is integer */ Z_PRIVATE double128_t __pow_in_n(double128_t base, size_t n) { double128_t res = 1.0; double128_t cur = base; while (n > 0) { if (n & 1) { res *= cur; } cur = cur * cur; n >>= 1; } return res; } /////////////////////////////////// // Getter and Setter /////////////////////////////////// #define PROB_DISASSEMBLER_DEFINE_PRIVATE_SETTER(T) \ Z_PRIVATE void __prob_disassembler_update_##T( \ ProbDisassembler *pd, addr_t addr, double128_t T) { \ if (!z_addr_dict_exist(pd->T, addr)) { \ z_addr_dict_set(pd->T, addr, T); \ } else { \ z_addr_dict_set(pd->T, addr, z_addr_dict_get(pd->T, addr) * T); \ } \ } #define PROB_DISASSEMBLER_DEFINE_PRIVATE_GETTER(T) \ Z_PRIVATE bool __prob_disassembler_get_##T(ProbDisassembler *pd, \ addr_t addr, double128_t *T) { \ if (!z_addr_dict_exist(pd->T, addr)) { \ return false; \ } else { \ *T = z_addr_dict_get(pd->T, addr); \ return true; \ } \ } #define PROB_DISASSEMBLER_DEFINE_PRIVATE_RESETTER(T) \ Z_PRIVATE void __prob_disassembler_reset_##T(ProbDisassembler *pd, \ addr_t addr, double128_t T) { \ z_addr_dict_set(pd->T, addr, T); \ } PROB_DISASSEMBLER_DEFINE_PRIVATE_SETTER(H); PROB_DISASSEMBLER_DEFINE_PRIVATE_SETTER(RH); PROB_DISASSEMBLER_DEFINE_PRIVATE_SETTER(D); PROB_DISASSEMBLER_DEFINE_PRIVATE_SETTER(inst_lost); PROB_DISASSEMBLER_DEFINE_PRIVATE_SETTER(data_hint); PROB_DISASSEMBLER_DEFINE_PRIVATE_RESETTER(H); PROB_DISASSEMBLER_DEFINE_PRIVATE_RESETTER(RH); PROB_DISASSEMBLER_DEFINE_PRIVATE_RESETTER(D); PROB_DISASSEMBLER_DEFINE_PRIVATE_RESETTER(inst_lost); PROB_DISASSEMBLER_DEFINE_PRIVATE_RESETTER(data_hint); PROB_DISASSEMBLER_DEFINE_PRIVATE_RESETTER(P); PROB_DISASSEMBLER_DEFINE_PRIVATE_GETTER(H); PROB_DISASSEMBLER_DEFINE_PRIVATE_GETTER(RH); PROB_DISASSEMBLER_DEFINE_PRIVATE_GETTER(D); PROB_DISASSEMBLER_DEFINE_PRIVATE_GETTER(inst_lost); PROB_DISASSEMBLER_DEFINE_PRIVATE_GETTER(data_hint); PROB_DISASSEMBLER_DEFINE_PRIVATE_GETTER(P); #define __prob_disassembler_update_inst_hint __prob_disassembler_update_H #define __prob_disassembler_get_inst_hint __prob_disassembler_get_H #define __prob_disassembler_reset_inst_hint __prob_disassembler_reset_H /////////////////////////////////// // Local functions /////////////////////////////////// /* * Get successors for propogating instruction hints. It is a very helpful * wrapper function to customize propogation rule for instruction hints. */ Z_PRIVATE bool __prob_disassembler_get_propogate_successors( ProbDisassembler *pd, addr_t addr, size_t *n, addr_t **succs); /* * Apply hints and losts into working environment (RH/D/P), and remove previous * data when there are no hint and lost. (playground = H + RH + D + P, and H is * for inst_hint) */ Z_PRIVATE void __prob_disassembler_refresh_playground(ProbDisassembler *pd); /////////////////////////////////// // Components /////////////////////////////////// // XXX: note that we should import following components here, as they might use // above local functions. #include "prob_disasm_complete/dag.c" #include "prob_disasm_complete/hints.c" #include "prob_disasm_complete/propagation.c" #include "prob_disasm_complete/solving.c" /////////////////////////////////// // Test Code /////////////////////////////////// #ifdef DEBUG Z_RESERVED Z_PRIVATE bool __prob_disassembler_path_dfs( ProbDisassembler *pd, Buffer *(*get_next)(UCFG_Analyzer *, addr_t), GQueue *stack, GHashTable *seen, addr_t cur_addr, addr_t target) { Disassembler *d = pd->base; cs_insn *inst = z_disassembler_get_superset_disasm(d, cur_addr); if (!inst) { return false; } g_queue_push_tail(stack, (gpointer)(inst)); if (cur_addr == target) { return true; } Iter(addr_t, next_addrs); z_iter_init_from_buf(next_addrs, (*get_next)(d->ucfg_analyzer, cur_addr)); while (!z_iter_is_empty(next_addrs)) { addr_t next_addr = *(z_iter_next(next_addrs)); if (g_hash_table_lookup(seen, GSIZE_TO_POINTER(next_addr))) { continue; } g_hash_table_insert(seen, GSIZE_TO_POINTER(next_addr), GSIZE_TO_POINTER(1)); if (__prob_disassembler_path_dfs(pd, get_next, stack, seen, next_addr, target)) { return true; } } g_queue_pop_tail(stack); return false; } Z_RESERVED Z_PRIVATE void __prob_disassembler_search_path(ProbDisassembler *pd, addr_t src, addr_t dst) { GHashTable *seen = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL); GQueue *stack = g_queue_new(); if (!__prob_disassembler_path_dfs(pd, &z_ucfg_analyzer_get_all_successors, stack, seen, src, dst)) { EXITME("cannot reach %#lx from %#lx", dst, src); } else { while (!g_queue_is_empty(stack)) { cs_insn *inst = (cs_insn *)g_queue_pop_head(stack); z_info(CS_SHOW_INST(inst)); } } g_hash_table_destroy(seen); g_queue_free(stack); } #endif Z_PRIVATE void __prob_disassembler_refresh_playground(ProbDisassembler *pd) { addr_t text_addr = pd->text_addr; size_t text_size = pd->text_size; #ifdef DEBUG // remove dag_P first (it is improtant for the following checking at step 3) for (uint32_t scc_id = 0; scc_id < pd->scc_n; scc_id++) { z_addr_dict_remove(pd->dag_P, scc_id); assert(!z_addr_dict_exist(pd->dag_P, scc_id)); } #endif for (addr_t addr = text_addr; addr < text_addr + text_size; addr++) { // step [1]. apply inst_lost into RH double128_t inst_lost = NAN; if (__prob_disassembler_get_inst_lost(pd, addr, &inst_lost)) { __prob_disassembler_reset_RH(pd, addr, inst_lost); } else { z_addr_dict_remove(pd->RH, addr); assert(!z_addr_dict_exist(pd->RH, addr)); } // step [2]. apply data_hint into D double128_t data_hint = NAN; if (__prob_disassembler_get_data_hint(pd, addr, &data_hint)) { __prob_disassembler_reset_D(pd, addr, data_hint); } else { z_addr_dict_remove(pd->D, addr); assert(!z_addr_dict_exist(pd->D, addr)); } // step [3]. update dag P double128_t P = NAN; if (__prob_disassembler_get_P(pd, addr, &P)) { uint32_t scc_id = z_addr_dict_get(pd->addr2sccid, addr); #ifdef DEBUG if (z_addr_dict_exist(pd->dag_P, scc_id) && z_addr_dict_get(pd->dag_P, scc_id) != P) { EXITME("inconsistent dag P: %#lx (%Le) v/s %d (%Le)", addr, P, scc_id, z_addr_dict_get(pd->dag_P, scc_id)); } #endif z_addr_dict_set(pd->dag_P, scc_id, P); // XXX: note that we do not perform z_addr_dict_remove(pd->P, addr) // here. It is mainly beacuse we want to maintain a feature that if // an address was thought as 100% non-instruction before, the // address should always be non-instruction. } } } Z_PRIVATE bool __prob_disassembler_get_propogate_successors( ProbDisassembler *pd, addr_t addr, size_t *n, addr_t **succs) { Disassembler *d = pd->base; cs_insn *inst = z_disassembler_get_superset_disasm(d, addr); if (!inst) { return false; } Buffer *succs_buf = z_ucfg_analyzer_get_all_successors(d->ucfg_analyzer, addr); assert(succs_buf); // XXX: option one: propogate hints through fall-through edges for calls // ------ // if (z_capstone_is_call(inst)) { // addr_t next_addr = addr + inst->size; // z_buffer_append_raw(succs_buf, (uint8_t *)&next_addr, // sizeof(next_addr)); // } // ------ *n = z_buffer_get_size(succs_buf) / sizeof(addr_t); *succs = (addr_t *)z_buffer_get_raw_buf(succs_buf); return true; } /////////////////////////////////// // ProbDisassembler Pubilc API /////////////////////////////////// Z_PRIVATE double128_t z_prob_disassembler_get_inst_prob(ProbDisassembler *pd, addr_t addr) { if (addr < pd->text_addr || addr >= pd->text_addr + pd->text_size) { return 0.0; } double128_t P = NAN; __prob_disassembler_get_P(pd, addr, &P); assert(!isnan(P)); if (!__double128_equal(P, 0.0)) { return P; } // additionally check dag_dead and very huge data hint double128_t data_hint = NAN; if (__prob_disassembler_get_data_hint(pd, addr, &data_hint)) { if (data_hint > STRONG_DATA_HINT) { return -0.0; } } uint32_t scc_id = z_addr_dict_get(pd->addr2sccid, addr); if (z_addr_dict_exist(pd->dag_dead, scc_id)) { return -0.0; } return P; } Z_PRIVATE void z_prob_disassembler_get_internal( ProbDisassembler *pd, addr_t addr, cs_insn **inst, uint32_t *scc_id, double128_t *inst_hint, double128_t *inst_lost, double128_t *data_hint, double128_t *D, double128_t *P) { Disassembler *d = pd->base; *inst = z_disassembler_get_superset_disasm(d, addr); *scc_id = z_addr_dict_get(pd->addr2sccid, addr); __prob_disassembler_get_inst_hint(pd, addr, inst_hint); __prob_disassembler_get_inst_lost(pd, addr, inst_lost); __prob_disassembler_get_data_hint(pd, addr, data_hint); __prob_disassembler_get_D(pd, addr, D); *P = z_prob_disassembler_get_inst_prob(pd, addr); } Z_PRIVATE void z_prob_disassembler_update(ProbDisassembler *pd, addr_t addr, bool is_inst, bool need_log) { if (is_inst) { // we have known for sure this addr is an instruction boundary __prob_disassembler_reset_inst_hint(pd, addr, 0.0); z_addr_dict_remove(pd->inst_lost, addr); z_addr_dict_remove(pd->data_hint, addr); } else { // we have known for sure this addr is not an instruction boundary z_addr_dict_remove(pd->H, addr); // inst_hint __prob_disassembler_reset_inst_lost(pd, addr, +INFINITY); // XXX: resetting data_hint should be more carefully handled as there // are two cases of is_inst == false: 1) inside an instrution and 2) // data __prob_disassembler_reset_data_hint(pd, addr, +INFINITY); } if (need_log) { // log the hint DHintType type = (is_inst ? DHINT_CODE : DHINT_DATA); #ifdef DEBUG DHintType old_type = (DHintType)g_hash_table_lookup( pd->dynamic_hints, GSIZE_TO_POINTER(addr)); if (old_type && (old_type != type)) { EXITME("inconstatn type of the dynamic hint at %#lx", addr); } #endif g_hash_table_insert(pd->dynamic_hints, GSIZE_TO_POINTER(addr), GSIZE_TO_POINTER(type)); } } Z_PRIVATE void z_prob_disassembler_start(ProbDisassembler *pd) { /* * step [1]. collect hints if we haven't: please refer to * *prob_disasm_complete/hints.c* */ if (!pd->round_n) { // calculate static hints __prob_disassembler_collect_cf_hints(pd); __prob_disassembler_collect_reg_hints(pd); __prob_disassembler_collect_pop_ret_hints(pd); __prob_disassembler_collect_cmp_cjmp_hints(pd); __prob_disassembler_collect_arg_call_hints(pd); __prob_disassembler_collect_str_hints(pd); __prob_disassembler_collect_value_hints(pd); // apply logged dynamic hint { GHashTableIter iter; gpointer key, value; g_hash_table_iter_init(&iter, pd->dynamic_hints); while (g_hash_table_iter_next(&iter, &key, &value)) { addr_t addr = (addr_t)key; bool is_inst = ((DHintType)value == DHINT_CODE); z_prob_disassembler_update(pd, addr, is_inst, false); } } z_info("probabilistic disassembly: hints collection done"); } /* * step [2]. play several rounds to calculate probabilities */ do { /* * step [2.1]. refresh playground */ __prob_disassembler_refresh_playground(pd); /* * step [2]. propogate hints: * refer to *prob_disasm_complete/propagation.c* */ __prob_disassembler_propogate_inst_hints(pd); // TODO: __prob_disassembler_propogate_data_hints(pd); z_trace("probabilistic disassembly: hints propagation done"); /* * step [3]. spread hints: refer to *prob_disasm_complete/solving.c* */ __prob_disassembler_spread_hints(pd); z_trace("probabilistic disassembly: hints spreading done"); /* * step [4]. restrain probabilities: * refer to *prob_disasm_complete/solving.c* */ __prob_disassembler_restrain_prob(pd); z_trace("probabilistic disassembly: probability restraint done"); /* * step [5]. normalized probabilities: * refer to *prob_disasm_complete/solving.c* */ __prob_disassembler_normalize_prob(pd); z_trace("probabilistic disassembly: probability normalization done"); pd->round_n += 1; z_info("probabilistic disassembly round %d done", pd->round_n); } while (pd->round_n < INIT_ROUND_N); } Z_PRIVATE ProbDisassembler *z_prob_disassembler_create(Disassembler *d) { ProbDisassembler *pd = STRUCT_ALLOC(ProbDisassembler); pd->base = d; pd->binary = d->binary; pd->text_addr = d->text_addr; pd->text_size = d->text_size; pd->round_n = 0; // read p-disasm file pd->dynamic_hints = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL); const char *original_filename = z_binary_get_original_filename(d->binary); pd->dhint_filename = z_strcat(PDISASM_FILENAME_PREFIX, original_filename); { if (!z_access(pd->dhint_filename, F_OK)) { z_info( "pdisasm file exists, so we will read those pre-calcualted " "hints"); Buffer *buf = z_buffer_read_file(pd->dhint_filename); size_t n = z_buffer_get_size(buf) / sizeof(DHint); DHint *hints = (DHint *)z_buffer_get_raw_buf(buf); // XXX: note that we will apply those dynamic hints after collecting // static hints. for (size_t i = 0; i < n; i++) { g_hash_table_insert(pd->dynamic_hints, GSIZE_TO_POINTER(hints[i].addr), GSIZE_TO_POINTER(hints[i].type)); } z_buffer_destroy(buf); } } /* * H: instruction hint source for each address, which is also the * update point for all *instruction hints*. */ z_addr_dict_init(pd->H, pd->text_addr, pd->text_size); /* * RH: Propogated instruction hints for each address, which is the * result of hint propogation, and also the update point of all * *instruction losts*. * * Additionally, we do not propogate instruction losts. */ z_addr_dict_init(pd->RH, pd->text_addr, pd->text_size); z_addr_dict_init(pd->inst_lost, pd->text_addr, pd->text_size); /* * D: final probabilities of eash address to be data, which is also the * update point of all *data hints*. */ z_addr_dict_init(pd->D, pd->text_addr, pd->text_size); z_addr_dict_init(pd->data_hint, pd->text_addr, pd->text_size); /* * P: final probabilities of each address to be instructoin. */ z_addr_dict_init(pd->P, pd->text_addr, pd->text_size); /* * dag building: please refer to: *prob_disasm_complete/dag.c* */ __prob_disassembler_build_dag(pd); return pd; } Z_PRIVATE void z_prob_disassembler_destroy(ProbDisassembler *pd) { // XXX: note that *base* should not be destroyed here. z_addr_dict_destroy(pd->H); z_addr_dict_destroy(pd->RH); z_addr_dict_destroy(pd->P); z_addr_dict_destroy(pd->D); z_addr_dict_destroy(pd->inst_lost); z_addr_dict_destroy(pd->data_hint); z_addr_dict_destroy(pd->addr2sccid); z_addr_dict_destroy(pd->dag_succs, &g_hash_table_destroy); z_addr_dict_destroy(pd->dag_preds, &g_hash_table_destroy); z_addr_dict_destroy(pd->dag_dead); g_queue_free(pd->topo); z_addr_dict_destroy(pd->dag_P); // write down dynamic hints { FILE *f = z_fopen(pd->dhint_filename, "wb"); DHint hint = { .addr = INVALID_ADDR, .type = DHINT_NONE, }; GHashTableIter iter; gpointer key, value; g_hash_table_iter_init(&iter, pd->dynamic_hints); while (g_hash_table_iter_next(&iter, &key, &value)) { hint.addr = (addr_t)key; hint.type = (DHintType)value; if (z_fwrite(&hint, sizeof(DHint), 1, f) != 1) { EXITME("error on writing dynamic hint file"); } } z_fclose(f); } z_free((char *)pd->dhint_filename); g_hash_table_destroy(pd->dynamic_hints); z_free(pd); } /////////////////////////////////// // Disassembler Private API /////////////////////////////////// Z_PRIVATE void __disassembler_pdisasm_create(Disassembler *d) { __SET_PDISASM(d, z_prob_disassembler_create(d)); } Z_PRIVATE void __disassembler_pdisasm_destroy(Disassembler *d) { z_prob_disassembler_destroy(__GET_PDISASM(d)); } Z_PRIVATE void __disassembler_pdisasm_start(Disassembler *d) { z_prob_disassembler_start(__GET_PDISASM(d)); } Z_PRIVATE double128_t __disassembler_pdisasm_get_inst_prob(Disassembler *d, addr_t addr) { return z_prob_disassembler_get_inst_prob(__GET_PDISASM(d), addr); } Z_PRIVATE void __disassembler_pdisasm_get_internal( Disassembler *d, addr_t addr, cs_insn **inst, uint32_t *scc_id, double128_t *inst_hint, double128_t *inst_lost, double128_t *data_hint, double128_t *D, double128_t *P) { z_prob_disassembler_get_internal(__GET_PDISASM(d), addr, inst, scc_id, inst_hint, inst_lost, data_hint, D, P); } Z_PRIVATE void __disassembler_pdisasm_update(Disassembler *d, addr_t addr, bool is_inst) { z_prob_disassembler_update(__GET_PDISASM(d), addr, is_inst, true); } #undef __GET_PDISASM #undef __SET_PDISASM ================================================ FILE: src/prob_disasm/prob_disasm_simple.c ================================================ /* * prob_disasm_simple.c * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #define __GET_PDISASM(d) ((Splay *)((d)->prob_disasm)) #define __SET_PDISASM(d, v) \ do { \ (d)->prob_disasm = (PhantomType *)(v); \ } while (0) typedef struct code_segment_t { addr_t addr; size_t size; } CodeSegment; Z_PRIVATE void __disassembler_pdisasm_create_S(Disassembler *d) { const char *original_filename = z_binary_get_original_filename(d->binary); const char *codeseg_filename = z_strcat(original_filename, CODE_SEGMENT_FILE_SUFFIX); __SET_PDISASM(d, z_splay_create(NULL)); // XXX: code segment file is mainly used for debugging purpose. if (!z_access(codeseg_filename, F_OK)) { // code segment file exits z_info( "code segment file (for linear disassembly) is persent, and we will" "use those pre-defined code segments"); Buffer *buf = z_buffer_read_file(codeseg_filename); // tail (virtual) code segment assert(INVALID_ADDR > 0); CodeSegment virtual_code_segment = { .addr = INVALID_ADDR, .size = 0, }; z_buffer_append_raw(buf, (uint8_t *)&virtual_code_segment, sizeof(virtual_code_segment)); size_t n = z_buffer_get_size(buf) / sizeof(CodeSegment); CodeSegment *codes = (CodeSegment *)z_buffer_get_raw_buf(buf); addr_t cur_addr = codes[0].addr; size_t cur_size = codes[0].size; for (int i = 1; i < n; i++) { CodeSegment *code = &(codes[i]); if (code->addr <= cur_addr) { EXITME("pre-defined code segments are not in increasing order"); } if (code->addr <= cur_addr + cur_size && code->addr != INVALID_ADDR) { size_t tmp_size = code->addr + code->size - cur_addr; cur_size = (cur_size >= tmp_size ? cur_size : tmp_size); } else { z_info("pre-defined code segment: [%#lx, %#lx]", cur_addr, cur_addr + cur_size - 1); Snode *node = z_snode_create(cur_addr, cur_size, NULL, NULL); z_splay_insert(__GET_PDISASM(d), node); cur_addr = code->addr; cur_size = code->size; } } z_buffer_destroy(buf); } else { z_info("no code segment file found, patch the whole .text section"); Snode *node = z_snode_create(d->text_addr, d->text_size, NULL, NULL); z_splay_insert(__GET_PDISASM(d), node); } z_free((char *)codeseg_filename); } Z_PRIVATE void __disassembler_pdisasm_destroy_S(Disassembler *d) { z_splay_destroy(__GET_PDISASM(d)); } Z_PRIVATE void __disassembler_pdisasm_start_S(Disassembler *d) { /* * leave it blank */ } Z_PRIVATE double128_t __disassembler_pdisasm_get_inst_prob_S(Disassembler *d, addr_t addr) { if (z_splay_search(__GET_PDISASM(d), addr)) { return 1.0; } else { return 0.0; } } Z_PRIVATE void __disassembler_pdisasm_get_internal_S( Disassembler *d, addr_t addr, cs_insn **inst, uint32_t *scc_id, double128_t *inst_hint, double128_t *inst_lost, double128_t *data_hint, double128_t *D, double128_t *P) { EXITME("Probabilisitic Disassembly is not fully supported"); } Z_PRIVATE void __disassembler_pdisasm_update_S(Disassembler *d, addr_t addr, bool is_inst) { /* * leave it blank */ } #undef __GET_PDISASM #undef __SET_PDISASM ================================================ FILE: src/restricted_ptr.c ================================================ /* * restricted_ptr.c * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include "restricted_ptr.h" #include "utils.h" /* * Setter and Getter */ DEFINE_GETTER(Rptr, rptr, size_t, size); Z_PRIVATE void __rptr_check_null(Rptr *rptr) { if (z_rptr_is_null(rptr)) { EXITME("rptr is NULL"); } } Z_API Rptr *z_rptr_create(uint8_t *base_ptr, size_t size) { Rptr *rptr = STRUCT_ALLOC(Rptr); rptr->base_ptr = base_ptr; rptr->raw_ptr = base_ptr; rptr->size = size; return rptr; } Z_API void z_rptr_destroy(Rptr *rptr) { z_free(rptr); } Z_API void *z_rptr_safe_raw_ptr(Rptr *rptr, size_t n) { __rptr_check_null(rptr); if (rptr->size < n) { EXITME("restricted pointer's size is smaller than memcpy size"); } return rptr->raw_ptr; } Z_API void z_rptr_memcpy_to(Rptr *rptr, uint8_t *src, size_t size) { __rptr_check_null(rptr); if (rptr->size < size) { EXITME("restricted pointer's size is smaller than memcpy size"); } memcpy(rptr->raw_ptr, src, size); } Z_API void z_rptr_memcpy_from(Rptr *rptr, uint8_t *dst, size_t size) { __rptr_check_null(rptr); if (rptr->size < size) { EXITME("restricted pointer's size is smaller than memcpy size"); } memcpy(dst, rptr->raw_ptr, size); } Z_API void z_rptr_truncate(Rptr *rptr, size_t n) { __rptr_check_null(rptr); if (n > rptr->size) { EXITME("truncate pointer to a bigger size"); } rptr->size = n; } ================================================ FILE: src/restricted_ptr.h ================================================ /* * restricted_ptr.h * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #ifndef __RESTRICTED_PTR_H #define __RESTRICTED_PTR_H #include "config.h" #define __IS_RPTR(x) _Generic((x), Rptr * : true, default : false) #define z_rptr_get_ptr(rptr, type) \ ((type *)z_rptr_safe_raw_ptr(rptr, sizeof(type))) #define z_rptr_is_null(rptr) \ (((rptr) == NULL) || ((rptr)->raw_ptr == NULL) || ((rptr)->size == 0)) #define z_rptr_inc(rptr, type, n) \ do { \ if ((rptr)->size < (n) * sizeof(type)) { \ EXITME("restricted pointer's size is too small"); \ } \ (rptr)->raw_ptr += (n) * sizeof(type); \ (rptr)->size -= (n) * sizeof(type); \ } while (0) #define z_rptr_memset(s, c, n) \ do { \ if ((s)->size < n) { \ EXITME("restricted pointer's size is too small"); \ } \ memset((s)->raw_ptr, c, n); \ } while (0) #define z_rptr_memcpy(dst, src, n) \ do { \ if (__IS_RPTR(dst)) { \ z_rptr_memcpy_to((Rptr *)(dst), (uint8_t *)(src), n); \ } else { \ z_rptr_memcpy_from((Rptr *)(src), (uint8_t *)(dst), n); \ } \ } while (0) #define z_rptr_reset(rptr) \ do { \ (rptr)->size += (rptr)->raw_ptr - (rptr)->base_ptr; \ (rptr)->raw_ptr = (rptr)->base_ptr; \ } while (0) STRUCT(Rptr, { uint8_t *base_ptr; uint8_t *raw_ptr; size_t size; }); /* * Setter and Getter */ DECLARE_GETTER(Rptr, rptr, size_t, size); /* * Create a restricted pointer. */ Z_API Rptr *z_rptr_create(uint8_t *base_ptr, size_t size); /* * Destroy a restricted pointer. */ Z_API void z_rptr_destroy(Rptr *rptr); /* * Safely return a raw ptr */ Z_API void *z_rptr_safe_raw_ptr(Rptr *rptr, size_t n); /* * memcpy to Rptr */ Z_API void z_rptr_memcpy_to(Rptr *rptr, uint8_t *src, size_t size); /* * memcpy from Rptr */ Z_API void z_rptr_memcpy_from(Rptr *rptr, uint8_t *dst, size_t size); /* * Truncate a Pptr to n */ Z_API void z_rptr_truncate(Rptr *rptr, size_t n); #endif ================================================ FILE: src/rewriter.c ================================================ /* * rewriter.c * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include "rewriter.h" #include "buffer.h" #include "capstone_.h" #include "config.h" #include "ucfg_analyzer.h" #include "utils.h" #include "x64_utils.c" #include #ifdef DEBUG FILE *__debug_file = NULL; #define __debug_printf(...) fprintf(__debug_file, __VA_ARGS__) #endif #define ASMLINE_FMT_SIZE 0x100 static char asmline_fmt[ASMLINE_FMT_SIZE]; // TODO: add BeforeBB/AfterBB/BeforeInst/AfterInst handler /* * Rewrite entrypoint */ Z_PRIVATE void z_rewriter_rewrite_entrypoint(Rewriter *r); /* * Rewrite main */ Z_PRIVATE void z_rewriter_rewrite_main(Rewriter *r); /* * Rewrite functions beyond main (_start, .init, .fini, ...) */ Z_PRIVATE void z_rewriter_rewrite_beyond_main(Rewriter *r); /* * Function Pointer: compare two address */ Z_PRIVATE int __rewriter_compare_address(addr_t x, addr_t y, void *_z); /* * Calculate uTP address, and store the new inst_addr into inst_addr */ Z_RESERVED Z_PRIVATE addr_t __rewriter_calculate_utp_addr(Rewriter *r, addr_t *inst_addr, size_t inst_size); /* * Find a possible uTP address */ Z_RESERVED Z_PRIVATE bool __rewriter_patch_utp(Rewriter *r, addr_t ori_addr); /* * Translate inst into shadow address */ Z_PRIVATE cs_insn *__rewriter_translate_shadow_inst(Rewriter *r, cs_insn *inst, addr_t ori_addr); /* * Generate an instruction of shadow code */ Z_PRIVATE void __rewriter_generate_shadow_inst(Rewriter *r, GHashTable *holes, cs_insn *inst, addr_t ori_addr, bool bb_entry); /* * Generate a basic block of shadow code */ Z_PRIVATE void __rewriter_generate_shadow_block( Rewriter *r, GHashTable *holes, GQueue *instructions, addr_t ori_addr, cs_insn *(*disasm_func)(Disassembler *, addr_t)); /* * Fill in shadow holes */ Z_PRIVATE void __rewriter_fillin_shadow_hole(Rewriter *r, GHashTable *holes); /* * Build bridgs */ Z_RESERVED Z_PRIVATE void __rewriter_build_bridges(Rewriter *r, GQueue *instructions); /* * Emit Trampoline based on analyzed results */ Z_PRIVATE void __rewriter_emit_trampoline(Rewriter *r, addr_t addr); // XXX: this include must be placed here, to use above predeclared these // prototypes #include "rewriter_handlers/handler_main.c" /* * Cound how many BB ID is conflicted */ Z_PRIVATE void __rewriter_count_conflicted_ids(Rewriter *r); Z_PRIVATE void __rewriter_count_conflicted_ids(Rewriter *r) { size_t conflicts = 0; GHashTable *id_2_bb = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL); GList *bbs = g_hash_table_get_keys(r->rewritten_bbs); for (GList *l = bbs; l != NULL; l = l->next) { addr_t bb_addr = (addr_t)(l->data); size_t bb_id = AFL_BB_ID(bb_addr); addr_t old_bb = (addr_t)g_hash_table_lookup(id_2_bb, GSIZE_TO_POINTER(bb_id)); if (!old_bb) { g_hash_table_insert(id_2_bb, GSIZE_TO_POINTER(bb_id), GSIZE_TO_POINTER(bb_addr)); } else { conflicts += 1; z_trace("conflict: %#lx v/s %#lx (%#lx)", bb_addr, old_bb, bb_id); } } g_hash_table_destroy(id_2_bb); g_list_free(bbs); z_info("number of conflicted block IDs : %ld", conflicts); } Z_PRIVATE void z_rewriter_rewrite_beyond_main(Rewriter *r) { if (r->__main_rewritten) { EXITME( "z_rewriter_rewrite_beyond_main should execute before " "z_rewriter_rewrite_main"); } ELF *e = z_binary_get_elf(r->binary); // init and fini #define __REWRITE_FCN_FROM_REG(type, reg) \ do { \ addr_t type##_addr = z_elf_get_##type(e); \ z_rewriter_rewrite(r, type##_addr); \ addr_t shadow_##type##_addr = \ z_rewriter_get_shadow_addr(r, type##_addr); \ if (shadow_##type##_addr == INVALID_ADDR) { \ break; \ } \ \ addr_t load_##type = z_elf_get_load_##type(e); \ assert(z_rewriter_get_shadow_addr(r, load_##type) == INVALID_ADDR); \ assert(z_disassembler_get_recursive_disasm(r->disassembler, \ load_##type) == NULL); \ assert(z_disassembler_get_linear_disasm(r->disassembler, \ load_##type) == NULL); \ if (z_elf_get_is_pie(e)) { \ KS_ASM(load_##type, "lea " #reg ", [rip %+ld];", \ shadow_##type##_addr - load_##type - 7); \ } else { \ KS_ASM(load_##type, "mov " #reg ", %#lx;", shadow_##type##_addr); \ } \ assert(ks_size == 7); \ z_elf_write(e, load_##type, ks_size, ks_encode); \ z_disassembler_update_superset_disasm(r->disassembler, load_##type); \ } while (0) __REWRITE_FCN_FROM_REG(init, rcx); __REWRITE_FCN_FROM_REG(fini, r8); #undef __REWRITE_FCN_FROM_REG // .init.array and .fini array #define __REWRITE_FCN_FROM_ARRAY(type) \ do { \ Rptr *array = NULL; \ size_t array_size = 0; \ addr_t array_addr = INVALID_ADDR; \ \ Elf64_Shdr *type##_array = z_elf_get_shdr_##type##_array(e); \ if (!type##_array) { \ break; \ } \ array_size = type##_array->sh_size; \ array_addr = type##_array->sh_addr; \ array = z_elf_vaddr2ptr(e, array_addr); \ for (int i = 0; i < array_size / sizeof(addr_t); i++) { \ addr_t fcn = *z_rptr_get_ptr(array, addr_t); \ z_info("." #type ".array[%d]: %#lx", i, fcn); \ z_rewriter_rewrite(r, fcn); \ addr_t shadow_fcn = z_rewriter_get_shadow_addr(r, fcn); \ *z_rptr_get_ptr(array, addr_t) = shadow_fcn; \ z_rptr_inc(array, addr_t, 1); \ } \ z_rptr_destroy(array); \ } while (0) __REWRITE_FCN_FROM_ARRAY(init); __REWRITE_FCN_FROM_ARRAY(fini); #undef __REWRITE_FCN_FROM_ARRAY // start z_rewriter_rewrite_entrypoint(r); } Z_PRIVATE void z_rewriter_rewrite_entrypoint(Rewriter *r) { ELF *e = z_binary_get_elf(r->binary); addr_t start_addr = z_elf_get_ori_entry(e); // rewrite entrypoint z_rewriter_rewrite(r, start_addr); // update shadow start addr_t shadow_start_addr = z_rewriter_get_shadow_addr(r, start_addr); assert(shadow_start_addr != INVALID_ADDR); z_binary_set_shadow_start(r->binary, shadow_start_addr); } Z_PRIVATE void z_rewriter_rewrite_main(Rewriter *r) { if (r->__main_rewritten) { EXITME("z_rewriter_rewrite_main already executed"); } ELF *e = z_binary_get_elf(r->binary); addr_t main_addr = z_elf_get_main(e); // rewrite main z_rewriter_rewrite(r, main_addr); // update shadow main addr_t shadow_main_addr = z_rewriter_get_shadow_addr(r, main_addr); assert(shadow_main_addr != INVALID_ADDR); z_binary_set_shadow_main(r->binary, shadow_main_addr); // update __main_rewritten r->__main_rewritten = true; } Z_RESERVED Z_PRIVATE bool __rewriter_patch_utp(Rewriter *r, addr_t ori_addr) { // [0] get instruction cs_insn *inst = z_disassembler_get_recursive_disasm(r->disassembler, ori_addr); assert(inst != NULL); // [1] get upt_addr addr_t new_inst_addr = ori_addr; addr_t utp_addr = __rewriter_calculate_utp_addr(r, &new_inst_addr, inst->size); // [2] validate uTP if (utp_addr == INVALID_ADDR) { return false; } // [3] get shadow_address addr_t shadow_addr = z_rewriter_get_shadow_addr(r, ori_addr); assert(shadow_addr != INVALID_ADDR); // [4] generate utp trampoline KS_ASM_JMP(utp_addr, shadow_addr); z_binary_insert_utp(r->binary, utp_addr, ks_encode, ks_size); // [5] generate patched code // KS_ASM_JMP(new_inst_addr, utp_addr); // [6] do patch // ELF *e = z_binary_get_elf(r->binary); // z_elf_write(e, new_inst_addr, ks_size, ks_encode); // [7] patch prefix code // if (new_inst_addr != ori_addr) { // size_t padding_size = new_inst_addr - ori_addr; // z_elf_write(e, ori_addr, padding_size, // z_x64_gen_nop(padding_size)); // } // [8] update count r->patched_unsafe_bg_count++; return true; } Z_RESERVED Z_PRIVATE addr_t __rewriter_calculate_utp_addr(Rewriter *r, addr_t *inst_addr, size_t inst_size) { ELF *e = z_binary_get_elf(r->binary); bool is_pie = z_elf_get_is_pie(e); // [1] get offset buf uint64_t tmp = 0; addr_t ori_inst_addr = *inst_addr; z_elf_read(e, ori_inst_addr, 8, (uint8_t *)(&tmp)); // [2] prepare init pointer uint8_t *buffer = (uint8_t *)(&tmp) + 1; size_t buffer_size = inst_size - 1; // [3] prepare a utp snode Snode *utp = z_snode_create(0, __rewriter_get_hole_len(X86_INS_JMP), NULL, NULL); // [4] Brute-force OP inst_addr while ((int64_t)buffer_size >= 0) { int32_t *offset = (int32_t *)buffer; // [4.1] pre-check for non-pie (avoid cache miss) if (!is_pie && buffer[3] > 0x7f) goto NEXT; // [4.2] initial offset buf (a trick to avoid conflict) memset(buffer, 1, buffer_size); int32_t ori_offset = *offset; // [4.3] brute-force offset int64_t utp_addr = 0; do { utp_addr = (int64_t)(*inst_addr + 5) + (int64_t)(*offset); if (is_pie || utp_addr >= 0) { z_snode_set_addr(utp, (addr_t)utp_addr); if (z_elf_check_region_free(e, utp)) { z_snode_destroy(utp); return (addr_t)utp_addr; } } if (buffer_size == 0) goto NEXT; for (int32_t i = buffer_size - 1; i >= 0; i--) { if (buffer[i] != 0xff) { buffer[i] += 1; break; } else { buffer[i] = 0x00; } } } while (*offset != ori_offset); NEXT: // [4.4] check next OP inst_addr (*inst_addr)++; buffer++; buffer_size--; } // [5] failed z_trace("fail to find suitable uTP address: %#lx", ori_inst_addr); z_snode_destroy(utp); return INVALID_ADDR; } Z_PRIVATE int __rewriter_compare_address(addr_t x, addr_t y, void *_z) { if (x == y) return 0; else if (x > y) return 1; else return -1; } Z_PRIVATE void __rewriter_emit_trampoline(Rewriter *r, addr_t addr) { #ifndef BINARY_SEARCH_INVALID_CRASH UCFG_Analyzer *ucfg_analyzer = z_disassembler_get_ucfg_analyzer(r->disassembler); if (g_hash_table_lookup(r->instrumentation_free_bbs, GSIZE_TO_POINTER(addr)) || z_ucfg_analyzer_is_security_chk_failed(ucfg_analyzer, addr)) { // instrumentation-free blocks do not need trampoline return; } FLGState flg_state = z_ucfg_analyzer_get_flg_need_write(ucfg_analyzer, addr); GPRState gpr_state = z_ucfg_analyzer_get_gpr_can_write(ucfg_analyzer, addr); // update total number of tramplines r->afl_trampoline_count += 1; // update gpr state if (gpr_state) { r->optimized_gpr_count += 1; } if (!flg_state) { // no need to store eflags r->optimized_flg_count += 1; TP_EMIT(bitmap, addr, gpr_state); z_binary_insert_shadow_code(r->binary, tp_code, tp_size); } else { // need to store eflags TP_EMIT(context_save); z_binary_insert_shadow_code(r->binary, tp_code, tp_size); TP_EMIT(bitmap, addr, gpr_state & (~GPRSTATE_RAX)); z_binary_insert_shadow_code(r->binary, tp_code, tp_size); TP_EMIT(context_restore); z_binary_insert_shadow_code(r->binary, tp_code, tp_size); } #endif } Z_PRIVATE void __rewriter_fillin_shadow_hole(Rewriter *r, GHashTable *holes) { GList *shadow_addrs = g_hash_table_get_keys(holes); ELF *e = z_binary_get_elf(r->binary); for (GList *l = shadow_addrs; l != NULL; l = l->next) { addr_t shadow_inst_addr = (addr_t)(l->data); addr_t ori_tar_addr = (addr_t)g_hash_table_lookup( holes, GSIZE_TO_POINTER(shadow_inst_addr)); addr_t shadow_tar_addr = (addr_t)g_hash_table_lookup( r->rewritten_bbs, GSIZE_TO_POINTER(ori_tar_addr)); if (shadow_tar_addr == 0) { // XXX: ignore invalid hole as it may be false instruction z_warn("an invalid hole: %#lx <- %#lx", ori_tar_addr, shadow_inst_addr); continue; } // get id and hole size uint32_t inst_id; z_elf_read(e, shadow_inst_addr, sizeof(uint32_t), (uint8_t *)(&inst_id)); #ifndef NSINGLE_SUCC_OPT // check whether we need to do optimization if (!r->opts->disable_opt) { if ((int32_t)inst_id < 0) { // it is a trampoline-free transfer inst_id = (~inst_id) + 1; shadow_tar_addr = (addr_t)g_hash_table_lookup( r->shadow_code, GSIZE_TO_POINTER(ori_tar_addr)); } } else { assert((int32_t)inst_id >= 0); } #endif size_t hole_size = __rewriter_get_hole_len(inst_id); // generate code KS_ASM(shadow_inst_addr, "%s %#lx", cs_insn_name(cs, inst_id), shadow_tar_addr); z_elf_write(e, shadow_inst_addr, ks_size, ks_encode); // padding hole assert(ks_size <= hole_size); if (ks_size < hole_size) { z_elf_write(e, shadow_inst_addr + ks_size, hole_size - ks_size, z_x64_gen_nop(hole_size - ks_size)); } } g_list_free(shadow_addrs); } Z_PRIVATE cs_insn *__rewriter_translate_shadow_inst(Rewriter *r, cs_insn *inst, addr_t ori_addr) { cs_detail *detail = inst->detail; for (int32_t i = 0; i < detail->x86.op_count; i++) { cs_x86_op *op = &(detail->x86.operands[i]); if (op->type == X86_OP_MEM && (op->mem.base == X86_REG_RIP || op->mem.base == X86_REG_EIP)) { goto TRANSLATE_RIP_INS; } } // PC non-related instruction, directly return assert(strstr(inst->op_str, "rip") == NULL); assert(strstr(inst->op_str, "eip") == NULL); return inst; TRANSLATE_RIP_INS: z_trace( "rip-related memory access " "instruction " CS_SHOW_INST(inst)); const char *pc_regname = NULL; if (strstr(inst->op_str, "eip")) { z_warn("translate eip-related instruction: " CS_SHOW_INST(inst)); pc_regname = "eip"; } else { pc_regname = "rip"; } // step [1]. generate asmline fmt (FMTSTR ATTACK!!!) int64_t op_mem_disp = 0; // step [1.1]. generate mnemonic z_snprintf(asmline_fmt, ASMLINE_FMT_SIZE, "%s\t", inst->mnemonic); // step [1.2]. generate operands for (int32_t i = 0; i < detail->x86.op_count; i++) { cs_x86_op *op = &(detail->x86.operands[i]); switch (op->type) { case X86_OP_REG: assert(op->reg != X86_REG_RIP); assert(op->reg != X86_REG_EIP); z_snprintf(asmline_fmt + z_strlen(asmline_fmt), ASMLINE_FMT_SIZE - z_strlen(asmline_fmt), "%s, ", cs_reg_name(cs, op->reg)); continue; case X86_OP_IMM: z_snprintf(asmline_fmt + z_strlen(asmline_fmt), ASMLINE_FMT_SIZE - z_strlen(asmline_fmt), "%#lx, ", op->imm); continue; case X86_OP_MEM: assert(op->mem.base == X86_REG_RIP || op->mem.base == X86_REG_EIP); assert(op->mem.index == X86_REG_INVALID); /* * XXX: keystone and capstone bug! For more information, please * refer to * https://github.com/keystone-engine/keystone/issues/92 */ // TODO: build our own keystone and capstone (HUG!) size_t hooked_size = op->size; if (inst->id == X86_INS_COMISS) { hooked_size = 4; } else if (inst->id == X86_INS_COMISD) { hooked_size = 8; } switch (hooked_size) { case 1: z_strcpy(asmline_fmt + z_strlen(asmline_fmt), "byte ptr [%s%+ld], "); break; case 2: z_strcpy(asmline_fmt + z_strlen(asmline_fmt), "word ptr [%s%+ld], "); break; case 4: z_strcpy(asmline_fmt + z_strlen(asmline_fmt), "dword ptr [%s%+ld], "); break; case 8: z_strcpy(asmline_fmt + z_strlen(asmline_fmt), "qword ptr [%s%+ld], "); break; case 10: z_strcpy(asmline_fmt + z_strlen(asmline_fmt), "fword ptr [%s%+ld], "); break; case 16: z_strcpy(asmline_fmt + z_strlen(asmline_fmt), "xmmword ptr [%s%+ld], "); break; default: z_strcpy(asmline_fmt + z_strlen(asmline_fmt), "[%s%+ld], "); break; } op_mem_disp = op->mem.disp; continue; default: EXITME("invalid op type " CS_SHOW_INST(inst)); } } // step [1.3]. add NULL at last comma assert(asmline_fmt[z_strlen(asmline_fmt) - 2] == ','); asmline_fmt[z_strlen(asmline_fmt) - 2] = '\x00'; z_trace("generated asmline_fmt: %s", asmline_fmt); const addr_t ori_pc = ori_addr + inst->size; const addr_t shadow_addr = z_binary_get_shadow_code_addr(r->binary); addr_t shadow_pc = shadow_addr + inst->size; // step [2]. brute-force to find the suitable shadow_pc, starting from the // most possible address (the longest meanful x64 instruction is 15-byte) for (; shadow_pc < shadow_addr + 0x10; shadow_pc++) { // step [2.1]. asm and disasm (FMTSTR ATTACK!!!) KS_ASM(shadow_addr, asmline_fmt, pc_regname, ori_pc - shadow_pc + op_mem_disp); assert(ks_size > 0); CS_DISASM_RAW(ks_encode, ks_size, shadow_addr, 1); if (ks_size != cs_inst->size) { EXITME("invalid instruction rewriting"); } // step [2.2]. check and re-fit next pc address if (shadow_addr + cs_inst->size == shadow_pc) { // nice, break break; } else if (shadow_addr + cs_inst->size < shadow_pc) { // for short instruction, // easy to padding nop size_t padding_size = shadow_pc - cs_inst->size - shadow_addr; z_binary_insert_shadow_code(r->binary, z_x64_gen_nop(padding_size), padding_size); break; } // we need to check bigger shadow pc } assert(z_binary_get_shadow_code_addr(r->binary) + cs_inst->size == shadow_pc); return (cs_insn *)cs_inst; } Z_PRIVATE void __rewriter_generate_shadow_inst(Rewriter *r, GHashTable *holes, cs_insn *inst, addr_t ori_addr, bool bb_entry) { // step [0]. get next address, we must do this before translation addr_t ori_next_addr = ori_addr + inst->size; /* * XXX: for the basic block entrypoints' address mapping, there is a silght * difference between r->rewritten_bbs and LOOKUP_TABLE: * r->rewritten_bbs maps the bb entrypoint address to its corresponding * bitmap code's shadow address (it serves for handlers to find shadow * tranfer target); * LOOKUP_TABLE maps the bb entrypoint address to its own shadow * address (it serves for on-the-fly translattion of indirect call/jmp); * * In short, for a given bb entrypoint, r->rewritten_bbs's mapping * value is always samller than LOOKUP_TABLE's. * */ /* * XXX: it is very important to distinguish r->rewritten_bbs and * LOOKUP_TABLE. Note that LOOKUP_TABLE is used for indirect call/jmp's * dynamic mapping, and these indirect call/jmp's targets are very different * to identify. Hence, it is possible that their targets are already * rewritten but not identified as block entrypoints. In that case, the best * we can do is to instrument AFL_TRAMPOLINE at the tail of these indirect * call/jmp, and directly tranfer to the shadow address (w/o * AFL_TRAMPOLINE). And r->rewritten_bbs is used for direct call/jmp at * rewriting time. When rewriting a direct call/jmp, it is possible its * target is not rewritten. Hence, we use holes and r->rewritten_bbs to lazy * update the target address. As these direct call/jmp's targets can always * be identified as block entrypoints, we do not need to instrument * AFL_TRAMPOLINE at their tails (to reduce memory usage). * */ // step [1]. handle entry of basic block if (bb_entry) { size_t shadow_addr = z_binary_get_shadow_code_addr(r->binary); // step [1.1]. update rewritten_bbs if (!g_hash_table_lookup(r->rewritten_bbs, GSIZE_TO_POINTER(ori_addr))) { g_hash_table_insert(r->rewritten_bbs, GSIZE_TO_POINTER(ori_addr), GSIZE_TO_POINTER(shadow_addr)); } // step [1.2]. place an endbr64 at the beginning if the original inst is // endbr64 if (inst->id == X86_INS_ENDBR64) { z_binary_insert_shadow_code(r->binary, inst->bytes, inst->size); } // step [1.3]. insert trampolines based on optimization __rewriter_emit_trampoline(r, ori_addr); } // step [2]. update shadow code if (!g_hash_table_lookup(r->shadow_code, GSIZE_TO_POINTER(ori_addr))) { size_t shadow_addr = z_binary_get_shadow_code_addr(r->binary); // we store the first apperance of each instruction g_hash_table_insert(r->shadow_code, GSIZE_TO_POINTER(ori_addr), GSIZE_TO_POINTER(shadow_addr)); z_binary_update_lookup_table(r->binary, ori_addr, shadow_addr); } if (r->opts->trace_pc) { // trace previous pc KS_ASM_CONST_MOV(RW_PAGE_INFO_ADDR(prev_pc), ori_addr); z_binary_insert_shadow_code(r->binary, ks_encode, ks_size); } #ifdef DEBUG __debug_printf("%#lx -> %#lx:\n", ori_addr, z_binary_get_shadow_code_addr(r->binary)); __debug_printf("\told inst " CS_SHOW_INST(inst)); __debug_printf("\n"); #endif // step [3]. translate rip-related instrution // XXX: note that inserting any new code between step [3] and step [4] // will cause wrong instrumentation. inst = __rewriter_translate_shadow_inst(r, inst, ori_addr); #ifdef DEBUG __debug_printf("\tnew inst " CS_SHOW_INST(inst)); __debug_printf("\n"); #endif // step [4]. check handlers RHandler **handlers = (RHandler **)z_buffer_get_raw_buf(r->handlers); size_t n = z_buffer_get_size(r->handlers) / sizeof(RHandler *); for (size_t i = 0; i < n; i++) { REvent event = z_rhandler_get_event(handlers[i]); RHandlerFcn fcn = z_rhandler_get_fcn(handlers[i]); if ((*event)(inst)) { // XXX: note that the inst->address is incorrect here (*fcn)(r, holes, inst, ori_addr, ori_next_addr); return; } } // for unhandled instruction, we simply rewrite it z_binary_insert_shadow_code(r->binary, inst->bytes, inst->size); return; } /* * XXX: For every BB entrypoint found by Disassembler, Rewriter should not * inject any AFL_TRAMPOLINE until a terminator instruction is found. In other * words, if we consider a basic block's complete body starting from its * entrypoint and ending at its terminator instruction (ret, jmp, int, or any * other preivilege instrution), every disassembler-found basic block should * have its own unique copy of its complete body, and these copies do not * overlap with each other. * * Maintaining this property will help us on: * 1. Any Disassembler's false positive of indentifying basic blocks will * not impact Rewriter's correctness; * 2. Fuzzing speed can be optimized. When a basic block is reached, it is * meaningless to record any fall-through edge within this basic block. * This design naturely avoids recording these fall-through edges; * * Variable *bb_entry* is the key to maintain this property. */ /* * XXX: Above idea is very reasonable. *However*, it does not consider the cache * hit rate and forking overhead. In other words, if every disassembly-found * block has an own copy of its complete body, the memory usage will increase. * Therefore, the cache hit rate will quickly drop down. When the program is * large (e.g., openssl), the missing hit will siginificately influence the * execution speed. We have test the fuzzing speed w/ and w/o above * optimization, and the results are shown this optimization does hurt * performance. * * Hence, we decide to disable this optimization right now. */ /* * XXX: FALL_THROUGH opt can be enabled by jumping over the trampoline. However, * considering we can almost elimiate all EFLAGS saving, the overhead of an AFL * trampoline may be smaller than the one caused by a jump instruction. * TODO: decide whether we need to enable FALL_THROUGH (note that in SotchFuzz * paper, this optimization is enabled) */ /* * XXX: It is ok for our tool to instrument false instructions or block * entrypoint, as long as the false rate it limited. Note that when the false * rate increases, the number of memory usage (influencing cache hit rate) and * extra-false AFL_TRAMPOLINE will increase, reasulting a low execution speed. * That is why we abandon pre- linear disassembly. * * However, keep in mind that, for our tool, it is very critical to avoid * missing any instruction or block entrypoint. */ Z_PRIVATE void __rewriter_generate_shadow_block( Rewriter *r, GHashTable *holes, GQueue *instructions, addr_t bb_addr, cs_insn *(*disasm_func)(Disassembler *, addr_t)) { // step [1]. basic information cs_insn *inst = NULL; addr_t ori_addr = bb_addr; bool bb_entry = true; // whether next instrution is a BB entrypoint // step [2]. check whether this block is handled if (g_hash_table_lookup(r->rewritten_bbs, GSIZE_TO_POINTER(bb_addr))) { // we already rewrite this basic block return; } // step [3]. rewrite code one by one do { inst = (*disasm_func)(r->disassembler, ori_addr); // step [3.1]. check this address is valid and update instructions // Note that it is possible inst is NULL, as no-return / inline // data may cause incorrect disasm. if (!inst) { // XXX: it is important to insert an invalid instruction to // terminate the incorrect control flow, for effective unintentional // crash detection. // step [3.1.1]. update shadow code if (!g_hash_table_lookup(r->shadow_code, GSIZE_TO_POINTER(ori_addr))) { size_t shadow_addr = z_binary_get_shadow_code_addr(r->binary); g_hash_table_insert(r->shadow_code, GSIZE_TO_POINTER(ori_addr), GSIZE_TO_POINTER(shadow_addr)); z_binary_update_lookup_table(r->binary, ori_addr, shadow_addr); } // step [3.1.2]. insert invalid instruction z_binary_insert_shadow_code(r->binary, z_x64_gen_invalid(1), 1); return; } // XXX: instructions was used to build bridges by Rewriter, which is // no longer supported currently. if (instructions) { if (!g_hash_table_lookup(r->shadow_code, GSIZE_TO_POINTER(ori_addr))) { g_queue_push_tail(instructions, GSIZE_TO_POINTER(ori_addr)); } } #ifdef BINARY_SEARCH_DEBUG_REWRITER if (ori_addr <= BINARY_SEARCH_DEBUG_REWRITER) { if (bb_entry) { g_hash_table_insert(r->rewritten_bbs, GSIZE_TO_POINTER(ori_addr), GSIZE_TO_POINTER(ori_addr)); } g_hash_table_insert(r->shadow_code, GSIZE_TO_POINTER(ori_addr), GSIZE_TO_POINTER(ori_addr)); z_binary_update_lookup_table(r->binary, ori_addr, ori_addr); z_elf_write(r->binary->elf, ori_addr, inst->size, inst->bytes); } else #endif { // step [3.2]. rewrite the single instruction __rewriter_generate_shadow_inst(r, holes, inst, ori_addr, bb_entry); } bb_entry = !!z_disassembler_is_potential_block_entrypoint( r->disassembler, ori_addr + inst->size); #ifdef FALL_THROUGH_OPT if (bb_entry && !(z_capstone_is_cjmp(inst) || z_capstone_is_loop(inst) || z_capstone_is_terminator(inst))) { // XXX: insert a short jmp instruction here } #endif // step [3.3]. update cur_addr ori_addr += inst->size; } while (!z_capstone_is_terminator(inst)); return; } Z_RESERVED Z_PRIVATE void __rewriter_build_bridges(Rewriter *r, GQueue *instructions) { assert(r != NULL && instructions != NULL); ELF *e = z_binary_get_elf(r->binary); bool prev_patched = false; addr_t prev_addr = INVALID_ADDR; while (!g_queue_is_empty(instructions)) { addr_t cur_addr = (addr_t)g_queue_pop_tail(instructions); assert(prev_addr > cur_addr); cs_insn *ori_inst = z_disassembler_get_recursive_disasm(r->disassembler, cur_addr); assert(ori_inst != NULL); assert(ori_inst->size + cur_addr <= prev_addr); // get shadow_addr addr_t shadow_addr = z_rewriter_get_shadow_addr(r, cur_addr); assert(shadow_addr != INVALID_ADDR); // check ori_inst->size if (ori_inst->size >= __rewriter_get_hole_len(X86_INS_JMP)) { // build bridge KS_ASM_JMP(cur_addr, shadow_addr); z_elf_write(e, cur_addr, ks_size, ks_encode); // update statistic r->patched_safe_bg_count++; prev_patched = true; } else { // if previous instruction is patched, we ignore here if (prev_patched) { prev_patched = false; goto NEXT; } // we only do crashed brideg on continued instructions if (prev_addr != cur_addr + ori_inst->size) { prev_patched = false; goto NEXT; } cs_insn *prev_inst = z_disassembler_get_recursive_disasm(r->disassembler, prev_addr); assert(prev_inst != NULL); // we only do patch within two instruction if (ori_inst->size + prev_inst->size < __rewriter_get_hole_len(X86_INS_JMP)) { prev_patched = false; goto NEXT; } // test for next instruction uint8_t tmp_buf[16] = {0}; z_elf_read(e, cur_addr, sizeof(tmp_buf), tmp_buf); KS_ASM_JMP(cur_addr, shadow_addr); memcpy(tmp_buf, ks_encode, ks_size); CS_DISASM_RAW(tmp_buf + ori_inst->size, sizeof(tmp_buf) - ori_inst->size, cur_addr + ori_inst->size, 1); if (cs_count == 0) { // invalid, nice z_elf_write(e, cur_addr, ks_size, ks_encode); // update statistic r->patched_unsafe_bg_count++; prev_patched = true; } else { prev_patched = false; } } NEXT: prev_addr = cur_addr; } } Z_API Rewriter *z_rewriter_create(Disassembler *d, RewritingOptArgs *opts) { Rewriter *r = STRUCT_ALLOC(Rewriter); r->opts = opts; r->disassembler = d; r->binary = z_disassembler_get_binary(d); // init basic information r->shadow_code = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL); r->rewritten_bbs = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL); // init instrumentation information r->instrumentation_free_bbs = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL); // init potential returen address info r->potential_retaddrs = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL); r->unpatched_retaddrs = g_hash_table_new_full( g_direct_hash, g_direct_equal, NULL, (GDestroyNotify)&z_buffer_destroy); // init statistical data r->patched_safe_bg_count = 0; r->patched_unsafe_bg_count = 0; r->afl_trampoline_count = 0; r->optimized_flg_count = 0; r->optimized_gpr_count = 0; r->optimized_single_succ = 0; // init handlers r->handlers = z_buffer_create(NULL, 0); r->__main_rewritten = false; __rewriter_init_predefined_handler(r); #ifdef DEBUG __debug_file = fopen("shadow.log", "w"); #endif return r; } // XXX: note that its underlying disassembly (linear) is not completed. // XXX: useless and hence unused! Z_RESERVED Z_API void z_rewriter_heuristics_rewrite(Rewriter *r) { assert(r != NULL); if (!r->__main_rewritten) { EXITME( "z_rewriter_heuristics_rewrite should execute after " "z_rewriter_rewrite_main"); } // step [1]. request disassembler to recursive disassemble code GQueue *new_bbs = z_disassembler_linear_disasm(r->disassembler); z_trace("find %d new basic blocks by linear disassembly", g_queue_get_length(new_bbs)); g_queue_sort(new_bbs, (GCompareDataFunc)__rewriter_compare_address, NULL); // step [2]. prepare cf_related hole GHashTable *cf_related_holes = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL); // step [3]. rewrite all new basci blocks while (!g_queue_is_empty(new_bbs)) { addr_t bb_addr = (addr_t)g_queue_pop_head(new_bbs); // rewrite the whole basic block __rewriter_generate_shadow_block(r, cf_related_holes, NULL, bb_addr, &z_disassembler_get_linear_disasm); } // step [4]. fill in all cf_related holes __rewriter_fillin_shadow_hole(r, cf_related_holes); // step [5]. destroy structure to avoid memleak g_hash_table_destroy(cf_related_holes); g_queue_free(new_bbs); if (r->opts->count_conflict) { __rewriter_count_conflicted_ids(r); } } Z_API void z_rewriter_destroy(Rewriter *r) { RHandler **handlers = (RHandler **)z_buffer_get_raw_buf(r->handlers); for (int32_t i = 0; i < z_buffer_get_size(r->handlers) / sizeof(RHandler *); i++) z_rhandler_destroy(handlers[i]); z_buffer_destroy(r->handlers); g_hash_table_destroy(r->shadow_code); g_hash_table_destroy(r->rewritten_bbs); g_hash_table_destroy(r->instrumentation_free_bbs); g_hash_table_destroy(r->potential_retaddrs); g_hash_table_destroy(r->unpatched_retaddrs); z_free(r); #ifdef DEBUG fclose(__debug_file); #endif } Z_API void z_rewriter_register_handler(Rewriter *r, REvent event, RHandlerFcn fcn) { RHandler *handler = z_rhandler_create(event, fcn); z_buffer_append_raw(r->handlers, (uint8_t *)(&handler), sizeof(RHandler *)); } Z_API void z_rewriter_rewrite(Rewriter *r, addr_t new_addr) { assert(r != NULL); z_trace("rewrite new target: %#lx", new_addr); // step [1]. request disassembler to recursive disassemble code // XXX: it is important that we have to rewrite those new basic blocks each // time we call z_disassembler_recursive_disasm. Or in other words, // z_disassembler_recursive_disasm can only be called in z_rewriter_rewrite. GQueue *new_bbs = z_disassembler_recursive_disasm(r->disassembler, new_addr); z_trace("find %d new basic blocks", g_queue_get_length(new_bbs)); g_queue_sort(new_bbs, (GCompareDataFunc)__rewriter_compare_address, NULL); // step [2]. prepare cf_related hole GHashTable *cf_related_holes = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL); // step [3]. rewrite all new basci blocks while (!g_queue_is_empty(new_bbs)) { addr_t bb_addr = (addr_t)g_queue_pop_head(new_bbs); // rewrite the whole basic block __rewriter_generate_shadow_block(r, cf_related_holes, NULL, bb_addr, &z_disassembler_get_recursive_disasm); } // step [4]. fill in all cf_related holes __rewriter_fillin_shadow_hole(r, cf_related_holes); // step [5]. destroy structure to avoid memleak g_hash_table_destroy(cf_related_holes); g_queue_free(new_bbs); if (r->opts->count_conflict) { __rewriter_count_conflicted_ids(r); } } Z_API void z_rewriter_optimization_stats(Rewriter *r) { z_info("number of optimized FLG savings: %6d / %d", r->optimized_flg_count, r->afl_trampoline_count); z_info("number of optimized GPR savings: %6d / %d", r->optimized_gpr_count, r->afl_trampoline_count); z_info("number of optimized trampolines: %6d / %d", r->optimized_single_succ, r->afl_trampoline_count); } Z_API addr_t z_rewriter_get_shadow_addr(Rewriter *r, addr_t addr) { addr_t shadow_addr = (addr_t)g_hash_table_lookup(r->rewritten_bbs, GSIZE_TO_POINTER(addr)); if (!shadow_addr) { shadow_addr = (addr_t)g_hash_table_lookup(r->shadow_code, GSIZE_TO_POINTER(addr)); } if (shadow_addr) { return shadow_addr; } else { return INVALID_ADDR; } } Z_API bool z_rewriter_check_retaddr_crashpoint(Rewriter *r, addr_t addr) { // XXX: there is a special case where the correspoind callee // (potential_retaddrs) exists but the related retaddrs (unpatched_retaddrs) // do not. Specifically, It is possible that: // 1. addresses A and B are found as the retaddrs of a callee X. // 2. B is detected and X is marked as returnable. Hence, A, as a related // retaddr of X, should be patched. // 3. However, A is additionally serving as a BRIDGE_POINT, which will not // be patched actually. // 4. A is detected (during next execution). At this point, A is in // potential_retaddrs but X is not in unpatched_retaddrs addr_t callee = (addr_t)g_hash_table_lookup(r->potential_retaddrs, GSIZE_TO_POINTER(addr)); if (!callee) { return false; } else { return !!g_hash_table_lookup(r->unpatched_retaddrs, GSIZE_TO_POINTER(callee)); } } // XXX: every time we find a new retaddr, we will return all the unpatched // retaddrs which share the same callee with this given retaddr. Z_API Buffer *z_rewriter_new_validate_retaddr(Rewriter *r, addr_t retaddr) { // step (1). find corresponding callee addr_t callee = (addr_t)g_hash_table_lookup(r->potential_retaddrs, GSIZE_TO_POINTER(retaddr)); if (!callee) { // XXX: theoretically this branch cannot be reached, but when we have // different rewriting order than last execution, the logged crashpoints // may force the program to go into this branch. return z_buffer_create(NULL, 0); } // step (2). get all retaddrs and remove the entity Buffer *buf = (Buffer *)g_hash_table_lookup(r->unpatched_retaddrs, GSIZE_TO_POINTER(callee)); assert(buf); g_hash_table_steal(r->unpatched_retaddrs, GSIZE_TO_POINTER(callee)); return buf; } Z_API void z_rewriter_initially_rewrite(Rewriter *r) { if (r->opts->instrument_early) { z_rewriter_rewrite_entrypoint(r); } else { z_rewriter_rewrite_beyond_main(r); z_rewriter_rewrite_main(r); } } ================================================ FILE: src/rewriter.h ================================================ /* * rewriter.h * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #ifndef __REWRITER_H #define __REWRITER_H #include "binary.h" #include "buffer.h" #include "config.h" #include "disassembler.h" #include "sys_optarg.h" #include STRUCT(Rewriter, { // Binary which nees to rewrite Binary *binary; // Disassembler Disassembler *disassembler; // Handlers Buffer *handlers; // Basic information GHashTable *shadow_code; GHashTable *rewritten_bbs; /* * meta-info for CP_RETADDR */ // XXX: note that when pdisasm is fully supported, CP_RETADDR is disabled. // XXX: CP_RETADDR is only used for unknown library functions, which means // it is not for those internal calls or white-listed library calls. // patched retaddr, which is potential to be crashpoint GHashTable *potential_retaddrs; // for a given callee, all unpatched retaddr crashpoints associated with it GHashTable *unpatched_retaddrs; // callee -> retaddrs // instrumentaion-free blocks (following a security_check predicate) GHashTable *instrumentation_free_bbs; // Statistical data size_t patched_safe_bg_count; size_t patched_unsafe_bg_count; size_t afl_trampoline_count; size_t optimized_flg_count; size_t optimized_gpr_count; size_t optimized_single_succ; // Internal data bool __main_rewritten; // rewriting optargs RewritingOptArgs *opts; }); // which instruction needs to be handled typedef bool (*REvent)(const cs_insn *); // how to rewrite the instruction typedef void (*RHandlerFcn)(Rewriter *, GHashTable *, cs_insn *, addr_t ori_addr, addr_t ori_next_addr); STRUCT(RHandler, { REvent event; RHandlerFcn fcn; }); DECLARE_GETTER(RHandler, rhandler, REvent, evnet); DECLARE_GETTER(RHandler, rhandler, RHandlerFcn, fcn); /* * Create a REvent */ Z_API RHandler *z_rhandler_create(REvent event, RHandlerFcn fcn); /* * Destroy a REvent */ Z_API void z_rhandler_destroy(RHandler *handler); /* * Create a rewriter */ Z_API Rewriter *z_rewriter_create(Disassembler *d, RewritingOptArgs *opts); /* * Destroy a rewrite */ Z_API void z_rewriter_destroy(Rewriter *r); /* * Register a handler for rewriter */ Z_API void z_rewriter_register_handler(Rewriter *r, REvent event, RHandlerFcn fcn); /* * Rewrite based on known knowledge */ Z_API void z_rewriter_rewrite(Rewriter *r, addr_t new_addr); /* * Get the shadow address of given addr */ Z_API addr_t z_rewriter_get_shadow_addr(Rewriter *r, addr_t addr); /* * Initial rewriting for those addresses known to be code */ Z_API void z_rewriter_initially_rewrite(Rewriter *r); /* * Heuristics rewriting after rewriting main */ Z_RESERVED Z_API void z_rewriter_heuristics_rewrite(Rewriter *r); /* * Check whether the address is a potential return address which is already * rewritten */ Z_API bool z_rewriter_check_retaddr_crashpoint(Rewriter *r, addr_t addr); /* * Find a new validate retaddr and return all retaddrs who share the same call * with given retaddr. Note that destorying returned Buffer is not this * function's responsibility. */ Z_API Buffer *z_rewriter_new_validate_retaddr(Rewriter *r, addr_t retaddr); /* * Show optimization stats */ Z_API void z_rewriter_optimization_stats(Rewriter *r); #endif ================================================ FILE: src/rewriter_handlers/generate.py ================================================ import sys import os import re event_re = re.compile(r"\s*#define\s*REVENT\s*(?P\S*)\s*") handler_re = re.compile(r"\s*#define\s*RHANDLER\s*(?P\S*)\s*") def extract_c_file(c_file): meta_info = {} f = open(c_file, "r") data = f.read() f.close() meta_info["c_file"] = os.path.basename(c_file) captured_event = event_re.search(data) if captured_event is None: print("generate.py: invalid format of handler plugin [no REVENT defined]") exit(-1) meta_info["event"] = captured_event.group("event") captured_handler = handler_re.search(data) if captured_handler is None: print("generate.py: invalid format of handler plugin [no RHANDLER defined]") exit(-1) meta_info["handler"] = captured_handler.group("handler") print("generate.py: find %s" % meta_info) return meta_info def extend_buffer(buffer, handlers): register_fcns = "" for h in handlers: buffer += '#include "%s"\n' % h["c_file"] buffer += "#undef REVENT\n" buffer += "#undef RHANDLER\n" register_fcns += " z_rewriter_register_handler(r, %s, %s);\n" % ( h["event"], h["handler"], ) buffer += ( """ Z_PRIVATE void __rewriter_init_predefined_handler(Rewriter *r) { %s } """ % register_fcns.strip() ) return buffer if __name__ == "__main__": if len(sys.argv) != 2: print("generate.py: ./generate.py ") exit(-1) dir = sys.argv[1].strip() in_file = os.path.join(dir, "handler_main.in") if not os.path.exists(in_file): print("generate.py: %s does not exist" % in_file) exit(-1) f = open(in_file, "r") buffer = f.read() + "\n" f.close() handlers = [] for _file in os.listdir(dir): if _file.endswith(".c"): if "main" in _file: continue handlers.append(extract_c_file(os.path.join(dir, _file))) buffer = extend_buffer(buffer, handlers) out_file = os.path.join(dir, "handler_main.c") f = open(out_file, "w") f.write(buffer) f.close() ================================================ FILE: src/rewriter_handlers/handler_call.c ================================================ /* * handler_call.c * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #define REVENT z_capstone_is_call #define RHANDLER __rewriter_call_handler #define KS_ASM_PRESERVER_RETADDR(is_pie, shadow_addr, ori_next_addr) \ do { \ if ((is_pie)) { \ KS_ASM((shadow_addr), \ " call next;\n" \ "next:\n" \ " sub qword ptr [rsp], %#lx;\n", \ (shadow_addr) + 5 - (ori_next_addr)); \ } else { \ KS_ASM((shadow_addr), "push %#lx", (ori_next_addr)); \ } \ } while (0) /* * Rewriter handler for call instruction. */ Z_PRIVATE void __rewriter_call_handler(Rewriter *r, GHashTable *holes, cs_insn *inst, addr_t ori_addr, addr_t ori_next_addr); /* * Check whether it is a library call */ Z_PRIVATE const LFuncInfo *__rewriter_is_library_call(ELF *e, cs_insn *inst); Z_PRIVATE const LFuncInfo *__rewriter_is_library_call(ELF *e, cs_insn *inst) { const LFuncInfo *rv = NULL; addr_t got_addr = INVALID_ADDR; cs_detail *detail = inst->detail; if (detail->x86.op_count != 1) { return NULL; } cs_x86_op *op = &(detail->x86.operands[0]); if (op->type == X86_OP_IMM) { // check call to PLT rv = z_elf_get_plt_info(e, op->imm); } else if (z_capstone_is_pc_related_ucall(inst, &got_addr) || (!z_elf_get_is_pie(e) && z_capstone_is_const_mem_ucall(inst, &got_addr))) { // check call to GOT rv = z_elf_get_got_info(e, got_addr); } if (!rv || rv->cfg_info == LCFG_OBJ || rv->ra_info == LRA_OBJ) { return NULL; } else { return rv; } } Z_PRIVATE void __rewriter_call_handler(Rewriter *r, GHashTable *holes, cs_insn *inst, addr_t ori_addr, addr_t ori_next_addr) { if (inst->id == X86_INS_LCALL) { // XXX: I am not so sure, but it seems lcall is no longer used in amd64 z_warn("false instruction detected " CS_SHOW_INST(inst)); return; } cs_detail *detail = inst->detail; cs_x86_op *op = &(detail->x86.operands[0]); addr_t shadow_addr = z_binary_get_shadow_code_addr(r->binary); ELF *e = z_binary_get_elf(r->binary); bool is_pie = z_elf_get_is_pie(e); // first let's correct the inst->address inst->address = shadow_addr; const LFuncInfo *lf_info = __rewriter_is_library_call(e, inst); /* * first handle library calls */ if (lf_info) { assert(detail->x86.op_count == 1); if (op->type == X86_OP_IMM) { // call to PLT z_trace("find plt call %s @ %#lx", lf_info->name, ori_addr); addr_t callee_addr = op->imm; if (r->opts->safe_ret) { // direct write down the instruction KS_ASM_CALL(shadow_addr, callee_addr); z_binary_insert_shadow_code(r->binary, ks_encode, ks_size); z_binary_new_retaddr_entity(r->binary, shadow_addr + ks_size, ori_next_addr); } else if (lf_info->ra_info == LRA_UNUSED) { // direct write down the instruction KS_ASM_CALL(shadow_addr, callee_addr); z_binary_insert_shadow_code(r->binary, ks_encode, ks_size); } else { KS_ASM_PRESERVER_RETADDR(is_pie, shadow_addr, ori_next_addr); z_binary_insert_shadow_code(r->binary, ks_encode, ks_size); shadow_addr += ks_size; KS_ASM_JMP(shadow_addr, callee_addr); z_binary_insert_shadow_code(r->binary, ks_encode, ks_size); // update retaddr information if (lf_info->cfg_info != LCFG_TERM && !g_hash_table_lookup(r->potential_retaddrs, GSIZE_TO_POINTER(ori_next_addr))) { // we do not known whether this callee will return. Hence, // it is a potential CP_RETADDR. Additionaly, it is the // first time that we find this retaddr. g_hash_table_insert(r->potential_retaddrs, GSIZE_TO_POINTER(ori_next_addr), GSIZE_TO_POINTER(callee_addr)); Buffer *buf = (Buffer *)g_hash_table_lookup( r->unpatched_retaddrs, GSIZE_TO_POINTER(callee_addr)); if (!buf) { buf = z_buffer_create(NULL, 0); g_hash_table_insert(r->unpatched_retaddrs, GSIZE_TO_POINTER(callee_addr), (gpointer)buf); } z_buffer_append_raw(buf, (uint8_t *)&ori_next_addr, sizeof(ori_next_addr)); } } return; } if (op->type == X86_OP_MEM) { // call to GOT z_trace("find got call %s @ %#lx", lf_info->name, ori_addr); if (r->opts->safe_ret) { // direct write down the instruction z_binary_insert_shadow_code(r->binary, inst->bytes, inst->size); z_binary_new_retaddr_entity(r->binary, shadow_addr + inst->size, ori_next_addr); } else if (lf_info->ra_info == LRA_UNUSED) { // direct write down the instruction z_binary_insert_shadow_code(r->binary, inst->bytes, inst->size); } else { // we first push the retaddr KS_ASM_PRESERVER_RETADDR(is_pie, shadow_addr, ori_next_addr); z_binary_insert_shadow_code(r->binary, ks_encode, ks_size); shadow_addr += ks_size; addr_t got_addr = INVALID_ADDR; if (z_capstone_is_pc_related_ucall(inst, &got_addr)) { // jmp qword ptr [rip+xxx] if (inst->size != 6) { EXITME("invalid pc-related ucall " CS_SHOW_INST(inst)); } int32_t off = got_addr - (shadow_addr + inst->size); KS_ASM(shadow_addr, "jmp qword ptr [rip + %+d]", off); if (ks_size != 6) { EXITME("invalid pc-related ucall"); } z_binary_insert_shadow_code(r->binary, ks_encode, ks_size); } else { // jmp qword ptr [xxx] KS_ASM(shadow_addr, "jmp %s", inst->op_str); z_binary_insert_shadow_code(r->binary, ks_encode, ks_size); } // XXX: note that we do not update retaddr information here to // avoid some case where the GOT gets changed during execution } return; } EXITME("unreachable code"); } if (detail->x86.op_count == 1 && op->type == X86_OP_IMM) { addr_t callee_addr = op->imm; // direct call /* * step [1]. first check callee_addr is inside .text */ if (!z_disassembler_get_superset_disasm(r->disassembler, callee_addr)) { if (r->opts->safe_ret) { // directly write KS_ASM_CALL(shadow_addr, callee_addr); z_binary_insert_shadow_code(r->binary, ks_encode, ks_size); z_binary_new_retaddr_entity(r->binary, shadow_addr + ks_size, ori_next_addr); } else { KS_ASM_PRESERVER_RETADDR(is_pie, shadow_addr, ori_next_addr); z_binary_insert_shadow_code(r->binary, ks_encode, ks_size); shadow_addr += ks_size; KS_ASM_JMP(shadow_addr, callee_addr); z_binary_insert_shadow_code(r->binary, ks_encode, ks_size); } return; } /* * step [2]. get shadow callee and prepare hole_buf */ uint64_t hole_buf = 0; #ifndef NSINGLE_SUCC_OPT addr_t shadow_callee_addr; if (r->opts->disable_opt) { shadow_callee_addr = (addr_t)g_hash_table_lookup( r->rewritten_bbs, GSIZE_TO_POINTER(callee_addr)); hole_buf = (uint64_t)X86_INS_JMP; } else { shadow_callee_addr = (addr_t)g_hash_table_lookup( r->shadow_code, GSIZE_TO_POINTER(callee_addr)); hole_buf = (uint64_t)(-(int64_t)X86_INS_JMP); assert((int64_t)hole_buf < 0); r->optimized_single_succ += 1; } #else addr_t shadow_callee_addr = (addr_t)g_hash_table_lookup( r->rewritten_bbs, GSIZE_TO_POINTER(callee_addr)); #endif /* * step [3]. rewrite and insrumentation */ if (shadow_callee_addr) { if (r->opts->safe_ret) { KS_ASM_CALL(shadow_addr, shadow_callee_addr); z_binary_insert_shadow_code(r->binary, ks_encode, ks_size); z_binary_new_retaddr_entity(r->binary, shadow_addr + ks_size, ori_next_addr); } else { KS_ASM_PRESERVER_RETADDR(is_pie, shadow_addr, ori_next_addr); z_binary_insert_shadow_code(r->binary, ks_encode, ks_size); shadow_addr += ks_size; KS_ASM_JMP(shadow_addr, shadow_callee_addr); z_binary_insert_shadow_code(r->binary, ks_encode, ks_size); } } else { // rewrite return address if (r->opts->safe_ret) { // insert hole hole_buf = X86_INS_CALL; z_binary_insert_shadow_code(r->binary, (uint8_t *)(&hole_buf), __rewriter_get_hole_len(hole_buf)); z_binary_new_retaddr_entity( r->binary, shadow_addr + __rewriter_get_hole_len(hole_buf), ori_next_addr); } else { KS_ASM_PRESERVER_RETADDR(is_pie, shadow_addr, ori_next_addr); z_binary_insert_shadow_code(r->binary, ks_encode, ks_size); shadow_addr += ks_size; // insert hole hole_buf = X86_INS_JMP; z_binary_insert_shadow_code(r->binary, (uint8_t *)(&hole_buf), __rewriter_get_hole_len(hole_buf)); } g_hash_table_insert(holes, GSIZE_TO_POINTER(shadow_addr), GSIZE_TO_POINTER(callee_addr)); } } else { // indirect call addr_t text_addr = z_elf_get_shdr_text(e)->sh_addr; size_t text_size = z_elf_get_shdr_text(e)->sh_size; /* * step [1]. store target value */ if (strstr(inst->op_str, "rip")) { // Handle PC-relative jmp: a good observation is that any // rip-relative jmp/call are equal to or longer than // `push ??? PTR [rip + ???]` // Note that we need to keep `next instruction` at the same address // step [1]. get ks_size KS_ASM(INVALID_ADDR, "push %s", inst->op_str); assert(inst->size >= ks_size); // step [2]. padding if (inst->size > ks_size) { size_t padding_size = inst->size - ks_size; z_binary_insert_shadow_code( r->binary, z_x64_gen_nop(padding_size), padding_size); } // step [3]. rewriting KS_ASM(shadow_addr + inst->size - ks_size, "push %s", inst->op_str); assert(z_binary_get_shadow_code_addr(r->binary) + ks_size == shadow_addr + inst->size); z_binary_insert_shadow_code(r->binary, ks_encode, ks_size); shadow_addr += inst->size; } else { KS_ASM(shadow_addr, "push %s", inst->op_str); z_binary_insert_shadow_code(r->binary, ks_encode, ks_size); shadow_addr += ks_size; } /* * step [2]. rewrite ucall using hand-written assembly code */ z_debug("rewrite ucall " CS_SHOW_INST(inst)); // XXX: it is ok to directly use LOOKUP_TABLE_ADDR since the underlying // binary is not compiled with PIE. // XXX: call may not care about eflags if (is_pie) { KS_ASM(shadow_addr, " mov [rsp - 168], rsi;\n" " mov [rsp - 128], rcx;\n" // " mov [rsp - 120], rax;\n" // " lahf;\n" // " seto al;\n" " pop rcx;\n" " mov [rsp - 144], rcx;\n" /* * get program base and update rcx */ " mov rsi, %#lx;\n" " mov rsi, [rsi];\n" " sub rcx, rsi;\n" /* * for addresses outside .text, directly go through */ " cmp rcx, %#lx;\n" // compare upper bound of .text " jae hug;\n" " sub rcx, %#lx;\n" // sub .text base and compare " jb hug;\n" /* * update bitmap and prev_id */ " mov [rsp - 152], rdx;\n" " mov [rsp - 160], rdi;\n" " xor rdx, rdx;\n" // hug keystone (issue #295) " mov rdi, qword ptr [" STRING(AFL_PREV_ID_PTR) " + rdx];\n" " mov rdx, rcx;\n" " shr rdx, " STRING(AFL_MAP_SIZE_POW2) ";\n" " xor rdx, rcx;\n" " and rdx, " STRING(AFL_MAP_SIZE_MASK) ";\n" " xor rdi, rdx;\n" " inc BYTE PTR [" STRING(AFL_MAP_ADDR) " + rdi];\n" " xor rdi, rdi;\n" // hug keystone (issue #295) " shr rdx, 1;\n" " mov qword ptr [" STRING(AFL_PREV_ID_PTR) " + rdi], rdx;\n" " mov rdi, [rsp - 160];\n" " mov rdx, [rsp - 152];\n" /* * lookup target shadow address */ " shl rcx, " STRING(LOOKUP_TABLE_CELL_SIZE_POW2) ";\n" " add rcx, rsi;\n" " movsxd rcx, dword ptr [" STRING(LOOKUP_TABLE_ADDR) " + rcx];\n" " add rcx, rsi;\n" " mov [rsp - 144], rcx;\n" /* * go to target */ "hug:\n" // " add al, 127;\n" // " sahf;\n" // " mov rax, [rsp - 120 - 8];\n" " mov rcx, [rsp - 128 - 8];\n" " mov rsi, [rsp - 168 - 8];\n", RW_PAGE_INFO_ADDR(program_base), text_addr + text_size, text_addr); z_binary_insert_shadow_code(r->binary, ks_encode, ks_size); shadow_addr += ks_size; } else { KS_ASM(shadow_addr, " mov [rsp - 128], rcx;\n" // " mov [rsp - 120], rax;\n" // " lahf;\n" // " seto al;\n" " pop rcx;\n" " mov [rsp - 144], rcx;\n" /* * for addresses outside .text, directly go through */ " cmp rcx, %#lx;\n" // compare upper bound of .text " jae hug;\n" " sub rcx, %#lx;\n" // sub .text base and compare " jb hug;\n" /* * update bitmap and prev_id */ " mov [rsp - 152], rdx;\n" " mov [rsp - 160], rdi;\n" " xor rdx, rdx;\n" // hug keystone (issue #295) " mov rdi, qword ptr [" STRING(AFL_PREV_ID_PTR) " + rdx];\n" " mov rdx, rcx;\n" " shr rdx, " STRING(AFL_MAP_SIZE_POW2) ";\n" " xor rdx, rcx;\n" " and rdx, " STRING(AFL_MAP_SIZE_MASK) ";\n" " xor rdi, rdx;\n" " inc BYTE PTR [" STRING(AFL_MAP_ADDR) " + rdi];\n" " xor rdi, rdi;\n" // hug keystone (issue #295) " shr rdx, 1;\n" " mov qword ptr [" STRING(AFL_PREV_ID_PTR) " + rdi], rdx;\n" " mov rdi, [rsp - 160];\n" " mov rdx, [rsp - 152];\n" /* * lookup target shadow address */ " shl rcx, " STRING(LOOKUP_TABLE_CELL_SIZE_POW2) ";\n" " movsxd rcx, dword ptr [" STRING(LOOKUP_TABLE_ADDR) " + rcx];\n" " mov [rsp - 144], rcx;\n" /* * go to target */ "hug:\n" // " add al, 127;\n" // " sahf;\n" // " mov rax, [rsp - 120 - 8];\n" " mov rcx, [rsp - 128 - 8];\n", text_addr + text_size, text_addr); z_binary_insert_shadow_code(r->binary, ks_encode, ks_size); shadow_addr += ks_size; } // XXX: the below assembly is following the previous one if (r->opts->safe_ret) { KS_ASM(shadow_addr, "call qword ptr [rsp - 144]"); z_binary_insert_shadow_code(r->binary, ks_encode, ks_size); z_binary_new_retaddr_entity(r->binary, shadow_addr + ks_size, ori_next_addr); } else { KS_ASM_PRESERVER_RETADDR(is_pie, shadow_addr, ori_next_addr); z_binary_insert_shadow_code(r->binary, ks_encode, ks_size); shadow_addr += ks_size; KS_ASM(shadow_addr, "jmp qword ptr [rsp - 144 + 8];\n"); z_binary_insert_shadow_code(r->binary, ks_encode, ks_size); } } } ================================================ FILE: src/rewriter_handlers/handler_cjmp.c ================================================ /* * handler_cjmp.c * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #define REVENT z_capstone_is_cjmp #define RHANDLER __rewriter_cjmp_handler /* * Rewriter handler for cjmp instruction. */ Z_PRIVATE void __rewriter_cjmp_handler(Rewriter *r, GHashTable *holes, cs_insn *inst, addr_t ori_addr, addr_t ori_next_addr); /* * Rewriter handler for JRCXZ, JECXZ and JCXZ instruction. */ Z_PRIVATE void __rewriter_cjmp_handler_for_rcx(Rewriter *r, GHashTable *holes, cs_insn *inst, addr_t ori_addr, addr_t ori_next_addr); Z_PRIVATE void __rewriter_cjmp_handler_for_rcx(Rewriter *r, GHashTable *holes, cs_insn *inst, addr_t ori_addr, addr_t ori_next_addr) { if (inst->id == X86_INS_JCXZ) { EXITME("`jcxz' is not supported in 64-bit mode"); } cs_detail *detail = inst->detail; cs_x86_op *op = &(detail->x86.operands[0]); assert(detail->x86.op_count == 1 && op->type == X86_OP_IMM); // get origianl branch address addr_t true_branch_addr = op->imm; addr_t false_branch_addr = ori_next_addr; if (!z_disassembler_get_superset_disasm(r->disassembler, true_branch_addr) || !z_disassembler_get_superset_disasm(r->disassembler, false_branch_addr)) { // j*cxz can only do short jump, if this happend, it means we are // writing a false instruction z_warn("false instruction detected " CS_SHOW_INST(inst)); return; } /* * We will rewrite the instruction in following format: * * j*cxz hug: * jmp shadow_false_branch; * hug: * jmp shadow_true_brach; * */ switch (inst->id) { case X86_INS_JECXZ: // jecxz $+5 z_binary_insert_shadow_code(r->binary, (const uint8_t *)"\x67\xe3\x05", 3); break; case X86_INS_JRCXZ: // jrcxz $+5 z_binary_insert_shadow_code(r->binary, (const uint8_t *)"\xe3\x05", 2); break; default: EXITME("invalid opcode " CS_SHOW_INST(inst)); } #define __GENERATE_SHADOW_JMP(tar_addr) \ do { \ addr_t shadow_addr = z_binary_get_shadow_code_addr(r->binary); \ addr_t shadow_tar_addr = (addr_t)g_hash_table_lookup( \ r->rewritten_bbs, GSIZE_TO_POINTER(tar_addr)); \ if (shadow_tar_addr) { \ KS_ASM(shadow_addr, "jmp %#lx", shadow_tar_addr); \ z_binary_insert_shadow_code(r->binary, ks_encode, ks_size); \ if (ks_size < 5) { \ z_binary_insert_shadow_code( \ r->binary, z_x64_gen_nop(5 - ks_size), 5 - ks_size); \ } \ } else { \ uint64_t hole_buf = X86_INS_JMP; \ shadow_addr = z_binary_insert_shadow_code( \ r->binary, (uint8_t *)(&hole_buf), \ __rewriter_get_hole_len(hole_buf)); \ g_hash_table_insert(holes, GSIZE_TO_POINTER(shadow_addr), \ GSIZE_TO_POINTER(tar_addr)); \ } \ } while (0) __GENERATE_SHADOW_JMP(false_branch_addr); __GENERATE_SHADOW_JMP(true_branch_addr); #undef __GENERATE_SHADOW_JMP } Z_PRIVATE bool __rewriter_cjmp_is_security_check(Rewriter *r, addr_t addr); // check whether this cjmp is directly related to security check Z_PRIVATE bool __rewriter_cjmp_is_security_check(Rewriter *r, addr_t addr) { // XXX: this function must be sound but does not need to be complete, since // we cannot skip any non-security-check cjmp but can afford the additional // efforts of flipping security check cjmp. Disassembler *d = r->disassembler; UCFG_Analyzer *a = z_disassembler_get_ucfg_analyzer(d); Buffer *succ_addrs_buf = z_disassembler_get_intra_successors(d, addr); size_t succ_n = z_buffer_get_size(succ_addrs_buf) / sizeof(addr_t); addr_t *succ_addrs = (addr_t *)z_buffer_get_raw_buf(succ_addrs_buf); bool is_security_check = false; for (int i = 0; i < succ_n; i++) { if (z_ucfg_analyzer_is_security_chk_failed(a, succ_addrs[i])) { is_security_check = true; break; } } if (is_security_check) { z_trace("find a security check: %#lx", addr); // update instrumentation_free_bbs for (int i = 0; i < succ_n; i++) { g_hash_table_add(r->instrumentation_free_bbs, GSIZE_TO_POINTER(succ_addrs[i])); } } return is_security_check; } Z_PRIVATE void __rewriter_cjmp_handler(Rewriter *r, GHashTable *holes, cs_insn *inst, addr_t ori_addr, addr_t ori_next_addr) { __rewriter_cjmp_is_security_check(r, ori_addr); if (inst->id == X86_INS_JCXZ || inst->id == X86_INS_JECXZ || inst->id == X86_INS_JRCXZ) { __rewriter_cjmp_handler_for_rcx(r, holes, inst, ori_addr, ori_next_addr); return; } cs_detail *detail = inst->detail; cs_x86_op *op = &(detail->x86.operands[0]); uint64_t hole_buf = 0; addr_t shadow_addr = z_binary_get_shadow_code_addr(r->binary); // no conditional indirect jump assert(detail->x86.op_count == 1 && op->type == X86_OP_IMM); addr_t cjmp_addr = op->imm; // first check cjmp_addr is inside .text if (!z_disassembler_get_superset_disasm(r->disassembler, cjmp_addr)) { // directly write KS_ASM(shadow_addr, "%s %#lx", cs_insn_name(cs, inst->id), cjmp_addr); z_binary_insert_shadow_code(r->binary, ks_encode, ks_size); return; } addr_t shadow_cjmp_addr = (addr_t)g_hash_table_lookup( r->rewritten_bbs, GSIZE_TO_POINTER(cjmp_addr)); if (shadow_cjmp_addr) { KS_ASM(shadow_addr, "%s %#lx", cs_insn_name(cs, inst->id), shadow_cjmp_addr); z_binary_insert_shadow_code(r->binary, ks_encode, ks_size); } else { // cjmp ??? (HOLE) hole_buf = (uint64_t)inst->id; shadow_addr = z_binary_insert_shadow_code(r->binary, (uint8_t *)(&hole_buf), __rewriter_get_hole_len(hole_buf)); g_hash_table_insert(holes, GSIZE_TO_POINTER(shadow_addr), GSIZE_TO_POINTER(cjmp_addr)); } } ================================================ FILE: src/rewriter_handlers/handler_jmp.c ================================================ /* * handler_jmp.c * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #define REVENT z_capstone_is_jmp #define RHANDLER __rewriter_jmp_handler /* * Rewriter handler for jmp instruction. */ Z_PRIVATE void __rewriter_jmp_handler(Rewriter *r, GHashTable *holes, cs_insn *inst, addr_t ori_addr, addr_t ori_next_addr); Z_PRIVATE void __rewriter_jmp_handler(Rewriter *r, GHashTable *holes, cs_insn *inst, addr_t ori_addr, addr_t ori_next_addr) { cs_detail *detail = inst->detail; cs_x86_op *op = &(detail->x86.operands[0]); ELF *e = z_binary_get_elf(r->binary); addr_t text_addr = z_elf_get_shdr_text(e)->sh_addr; size_t text_size = z_elf_get_shdr_text(e)->sh_size; if (detail->x86.op_count == 1 && op->type == X86_OP_IMM) { // direct jump here addr_t shadow_addr = z_binary_get_shadow_code_addr(r->binary); addr_t jmp_addr = op->imm; // first check jmp_addr is inside .text if (!z_disassembler_get_superset_disasm(r->disassembler, jmp_addr)) { // directly write KS_ASM_JMP(shadow_addr, jmp_addr); z_binary_insert_shadow_code(r->binary, ks_encode, ks_size); return; } #ifndef NSINGLE_SUCC_OPT uint64_t hole_buf = 0; addr_t shadow_jmp_addr; if (r->opts->disable_opt) { shadow_jmp_addr = (addr_t)g_hash_table_lookup( r->rewritten_bbs, GSIZE_TO_POINTER(jmp_addr)); hole_buf = (uint64_t)X86_INS_JMP; } else { shadow_jmp_addr = (addr_t)g_hash_table_lookup( r->shadow_code, GSIZE_TO_POINTER(jmp_addr)); hole_buf = (uint64_t)(-(int64_t)X86_INS_JMP); assert((int64_t)hole_buf < 0); r->optimized_single_succ += 1; } #else uint64_t hole_buf = X86_INS_JMP; addr_t shadow_jmp_addr = (addr_t)g_hash_table_lookup( r->rewritten_bbs, GSIZE_TO_POINTER(jmp_addr)); #endif if (shadow_jmp_addr) { KS_ASM_JMP(shadow_addr, shadow_jmp_addr); z_binary_insert_shadow_code(r->binary, ks_encode, ks_size); } else { shadow_addr = z_binary_insert_shadow_code(r->binary, (uint8_t *)(&hole_buf), __rewriter_get_hole_len(hole_buf)); g_hash_table_insert(holes, GSIZE_TO_POINTER(shadow_addr), GSIZE_TO_POINTER(jmp_addr)); } } else { // jmp may not jump out of .text (NO! z3 binary has such behaviour) z_debug("rewrite ujmp " CS_SHOW_INST(inst)); // record the original shadow_addr for inst addr_t ori_shadow_addr = INVALID_ADDR; // store rcx value { addr_t shadow_addr = ori_shadow_addr = z_binary_get_shadow_code_addr(r->binary); KS_ASM(shadow_addr, "mov [rsp - 128], rcx;\n"); z_binary_insert_shadow_code(r->binary, ks_encode, ks_size); shadow_addr += ks_size; } // translate jump instruction into mov instruction // XXX: note that if we simply push the target value on the stack, // the program may crash when it uses the value near the bootom of // the stack (e.g., mov rbx, [rsp - 8]). Hence, we use 'mov' instead // of 'push'; if (strstr(inst->op_str, "rip")) { assert(ori_shadow_addr != INVALID_ADDR); // step [1]. get new instruction KS_ASM(INVALID_ADDR, "mov rcx, %s", inst->op_str); cs_inst = NULL; // avoid double free inst CS_DISASM_RAW(ks_encode, ks_size, INVALID_ADDR, 1); cs_insn *new_inst = (cs_insn *)cs_inst; cs_inst = NULL; // avoid double free new_inst // step [2]. calculate a possible starting address for the new mov // instruction, so that we can guarantee correctness: // new_shadow_addr + new_inst->size == ori_shadow_addr + inst->size addr_t new_shadow_addr = ori_shadow_addr + inst->size - new_inst->size; // step [3]. translate the instruction, so that: cs_insn *translated_inst = __rewriter_translate_shadow_inst(r, new_inst, new_shadow_addr); // step [4]. rewrite z_binary_insert_shadow_code(r->binary, translated_inst->bytes, translated_inst->size); // step [5]. free inst and new_inst cs_free(inst, 1); cs_free(new_inst, 1); } else { addr_t shadow_addr = z_binary_get_shadow_code_addr(r->binary); KS_ASM(shadow_addr, "mov rcx, %s", inst->op_str); z_binary_insert_shadow_code(r->binary, ks_encode, ks_size); } // do the addrss translation if (z_elf_get_is_pie(e)) { addr_t shadow_addr = z_binary_get_shadow_code_addr(r->binary); KS_ASM(shadow_addr, /* * backup rsi for program base */ " mov [rsp - 152], rsi;\n" /* * store rcx */ " mov [rsp - 112], rcx;\n" /* * store EFLAGS */ " mov [rsp - 120], rax;\n" " lahf;\n" " seto al;\n" /* * get program base into rsi */ " mov rsi, %#lx;\n" " mov rsi, [rsi];\n" /* * get *static* address in rcx */ " sub rcx, rsi;\n" /* * for addresses outside .text, directly go through */ " cmp rcx, %#lx;\n" // compare upper bound of .text " jae hug;\n" " sub rcx, %#lx;\n" // sub .text base " jb hug;\n" /* * update bitmap and prev_id */ " mov [rsp - 136], rdx;\n" " mov [rsp - 144], rdi;\n" " xor rdx, rdx;\n" // hug keystone (issue #295) " mov rdi, qword ptr [" STRING(AFL_PREV_ID_PTR) " + rdx];\n" " mov rdx, rcx;\n" " shr rdx, " STRING(AFL_MAP_SIZE_POW2) ";\n" " xor rdx, rcx;\n" " and rdx, " STRING(AFL_MAP_SIZE_MASK) ";\n" " xor rdi, rdx;\n" " inc BYTE PTR [" STRING(AFL_MAP_ADDR) " + rdi];\n" " xor rdi, rdi;\n" // hug keystone (issue #295) " shr rdx, 1;\n" " mov qword ptr [" STRING(AFL_PREV_ID_PTR) " + rdi], rdx;\n" " mov rdi, [rsp - 144];\n" " mov rdx, [rsp - 136];\n" /* * lookup target shadow address */ " shl rcx, " STRING(LOOKUP_TABLE_CELL_SIZE_POW2) " ;\n" " add rcx, rsi;\n" " movsxd rcx, dword ptr [" STRING(LOOKUP_TABLE_ADDR) " + rcx];\n" " add rcx, rsi;\n" " mov [rsp - 112], rcx;\n" /* * go to target */ "hug:\n" " add al, 127;\n" " sahf;\n" " mov rax, [rsp - 120];\n" " mov rcx, [rsp - 128];\n" " mov rsi, [rsp - 142];\n" " jmp qword ptr [rsp - 112];\n", RW_PAGE_INFO_ADDR(program_base), text_addr + text_size, text_addr); z_binary_insert_shadow_code(r->binary, ks_encode, ks_size); } else { // XXX: it is ok to directly use LOOKUP_TABLE_ADDR since the // underlying binary is not compiled with PIE. addr_t shadow_addr = z_binary_get_shadow_code_addr(r->binary); KS_ASM(shadow_addr, /* * store rcx */ " mov [rsp - 112], rcx;\n" /* * store EFLAGS */ " mov [rsp - 120], rax;\n" " lahf;\n" " seto al;\n" /* * for addresses outside .text, directly go through */ " cmp rcx, %#lx;\n" // compare upper bound of .text " jae hug;\n" " sub rcx, %#lx;\n" // sub .text base " jb hug;\n" /* * update bitmap and prev_id */ " mov [rsp - 136], rdx;\n" " mov [rsp - 144], rdi;\n" " xor rdx, rdx;\n" // hug keystone (issue #295) " mov rdi, qword ptr [" STRING(AFL_PREV_ID_PTR) " + rdx];\n" " mov rdx, rcx;\n" " shr rdx, " STRING(AFL_MAP_SIZE_POW2) ";\n" " xor rdx, rcx;\n" " and rdx, " STRING(AFL_MAP_SIZE_MASK) ";\n" " xor rdi, rdx;\n" " inc BYTE PTR [" STRING(AFL_MAP_ADDR) " + rdi];\n" " xor rdi, rdi;\n" // hug keystone (issue #295) " shr rdx, 1;\n" " mov qword ptr [" STRING(AFL_PREV_ID_PTR) " + rdi], rdx;\n" " mov rdi, [rsp - 144];\n" " mov rdx, [rsp - 136];\n" /* * lookup target shadow address */ " shl rcx, " STRING(LOOKUP_TABLE_CELL_SIZE_POW2) " ;\n" " movsxd rcx, dword ptr [" STRING(LOOKUP_TABLE_ADDR) " + rcx];\n" " mov [rsp - 112], rcx;\n" /* * go to target */ "hug:\n" " add al, 127;\n" " sahf;\n" " mov rax, [rsp - 120];\n" " mov rcx, [rsp - 128];\n" " jmp qword ptr [rsp - 112];\n", text_addr + text_size, text_addr); z_binary_insert_shadow_code(r->binary, ks_encode, ks_size); } } } ================================================ FILE: src/rewriter_handlers/handler_loop.c ================================================ /* * handler_loop.c * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #define REVENT z_capstone_is_loop #define RHANDLER __rewriter_loop_handler /* * Rewriter handler for loop instruction. */ Z_PRIVATE void __rewriter_loop_handler(Rewriter *r, GHashTable *holes, cs_insn *inst, addr_t ori_addr, addr_t ori_next_addr); Z_PRIVATE void __rewriter_loop_handler(Rewriter *r, GHashTable *holes, cs_insn *inst, addr_t ori_addr, addr_t ori_next_addr) { cs_detail *detail = inst->detail; cs_x86_op *op = &(detail->x86.operands[0]); uint64_t hole_buf = 0; addr_t loop_addr = op->imm; addr_t shadow_addr = z_binary_get_shadow_code_addr(r->binary); // no conditional indirect jump assert(detail->x86.op_count == 1 && op->type == X86_OP_IMM); // get hand-written asm KS_ASM(shadow_addr, " mov [rsp - 128], rax;\n" // store context " lahf;\n" " seto al;\n" " dec rcx;\n" " jz out1;\n" " add al, 127;\n" " sahf;\n" " mov rax, [rsp - 128];\n" "jmp_target:\n" " jz 0x0;\n" " jmp out2;\n" "out1:\n" " add al, 127;\n" " sahf;\n" " mov rax, [rsp - 128];\n" "out2:\n"); z_binary_insert_shadow_code(r->binary, ks_encode, ks_size); ELF *e = z_binary_get_elf(r->binary); if (inst->id == X86_INS_LOOP) { // jmp ??? hole_buf = (uint64_t)X86_INS_JMP; z_elf_write(e, shadow_addr + 0x16, __rewriter_get_hole_len(hole_buf), (uint8_t *)(&hole_buf)); g_hash_table_insert(holes, GSIZE_TO_POINTER(shadow_addr + 0x16), GSIZE_TO_POINTER(loop_addr)); } else if (inst->id == X86_INS_LOOPE) { // je ??? hole_buf = (uint64_t)X86_INS_JE; z_elf_write(e, shadow_addr + 0x16, __rewriter_get_hole_len(hole_buf), (uint8_t *)(&hole_buf)); g_hash_table_insert(holes, GSIZE_TO_POINTER(shadow_addr + 0x16), GSIZE_TO_POINTER(loop_addr)); } else if (inst->id == X86_INS_LOOPNE) { // jne ??? hole_buf = (uint64_t)X86_INS_JNE; z_elf_write(e, shadow_addr + 0x16, __rewriter_get_hole_len(hole_buf), (uint8_t *)(&hole_buf)); g_hash_table_insert(holes, GSIZE_TO_POINTER(shadow_addr + 0x16), GSIZE_TO_POINTER(loop_addr)); } } ================================================ FILE: src/rewriter_handlers/handler_main.in ================================================ /* * handler_main.in * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ /* * rewriter_handler.c: pre-defined handler for rewriting different instruction. */ #include "../fork_server.h" #include "../utils.h" DEFINE_GETTER(RHandler, rhandler, REvent, event); DEFINE_GETTER(RHandler, rhandler, RHandlerFcn, fcn); Z_API RHandler *z_rhandler_create(REvent event, RHandlerFcn fcn) { RHandler *handler = STRUCT_ALLOC(RHandler); handler->event = event; handler->fcn = fcn; return handler; } Z_API void z_rhandler_destroy(RHandler *handler) { z_free(handler); } /* * Get control-flow hole size for different instruction types. */ Z_PRIVATE size_t __rewriter_get_hole_len(uint64_t id); Z_PRIVATE size_t __rewriter_get_hole_len(uint64_t id) { if ((int64_t)id < 0) { id = (~id) + 1; } size_t res = 0; switch (id) { case X86_INS_CALL: case X86_INS_JMP: res = 5; break; case X86_INS_JAE: case X86_INS_JA: case X86_INS_JBE: case X86_INS_JB: case X86_INS_JE: case X86_INS_JGE: case X86_INS_JG: case X86_INS_JLE: case X86_INS_JL: case X86_INS_JNE: case X86_INS_JNO: case X86_INS_JNP: case X86_INS_JNS: case X86_INS_JO: case X86_INS_JP: case X86_INS_JS: res = 6; break; case X86_INS_JECXZ: EXITME("temporarily not support for jecxz"); case X86_INS_JRCXZ: EXITME("temporarily not support for jrcxz"); case X86_INS_JCXZ: EXITME("jcxz is not supported in 64-bit mode"); default: EXITME("invalid hole"); } assert(res >= 4); return res; } ================================================ FILE: src/rewriter_handlers/handler_ret.c ================================================ /* * handler_ret.c * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #define REVENT z_capstone_is_ret #define RHANDLER __rewriter_ret_handler /* * Rewriter handler for ret instruction. */ Z_PRIVATE void __rewriter_ret_handler(Rewriter *r, GHashTable *holes, cs_insn *inst, addr_t ori_addr, addr_t ori_next_addr); Z_PRIVATE void __rewriter_ret_handler(Rewriter *r, GHashTable *holes, cs_insn *inst, addr_t ori_addr, addr_t ori_next_addr) { if (r->opts->safe_ret) { z_binary_insert_shadow_code(r->binary, inst->bytes, inst->size); return; } // modern CPU will do nothing more except direct returning about `repz ret` addr_t shadow_addr = z_binary_get_shadow_code_addr(r->binary); ELF *e = z_binary_get_elf(r->binary); addr_t text_addr = z_elf_get_shdr_text(e)->sh_addr; size_t text_size = z_elf_get_shdr_text(e)->sh_size; if (z_elf_get_is_pie(e)) { KS_ASM(shadow_addr, " mov [rsp - 128], rcx;\n" " mov [rsp - 136], rdx;\n" // " mov [rsp - 120], rax;\n" // " lahf;\n" // " seto al;\n" /* * get program base */ " mov rdx, %#lx;\n" " mov rdx, [rdx];\n" /* * calculate the *static* address of the retaddr (w/o PIE) */ " mov rcx, [rsp];\n" " sub rcx, rdx;\n" /* * check whether the retaddr is inside .text */ " cmp rcx, %#lx;\n" " jae hug;\n" " sub rcx, %#lx;\n" // sub .text base " jb hug;\n" /* * translate the retaddr */ " shl rcx, " STRING(LOOKUP_TABLE_CELL_SIZE_POW2) ";\n" " add rcx, rdx;\n" // add the program base for lookup table, in advance " movsxd rcx, dword ptr [" STRING(LOOKUP_TABLE_ADDR) " + rcx];\n" // lookup table " add rcx, rdx;\n" // add the program base onto the translated address " mov [rsp], rcx;\n" "hug:\n" // " add al, 127;\n" // " sahf;\n" // " mov rax, [rsp - 120];\n" " mov rdx, [rsp - 136];\n" " mov rcx, [rsp - 128];\n", // " ret;\n", // XXX: ret is replaced by the original inst, see below RW_PAGE_INFO_ADDR(program_base), text_addr + text_size, text_addr); z_binary_insert_shadow_code(r->binary, ks_encode, ks_size); shadow_addr += ks_size; } else { // XXX: it is ok to directly use LOOKUP_TABLE_ADDR since the underlying // binary is not compiled with PIE. KS_ASM(shadow_addr, " mov [rsp - 128], rcx;\n" // " mov [rsp - 120], rax;\n" // " lahf;\n" // " seto al;\n" " mov rcx, [rsp];\n" " cmp rcx, %#lx;\n" " jae hug;\n" " sub rcx, %#lx;\n" // sub .text base " jb hug;\n" " shl rcx, " STRING(LOOKUP_TABLE_CELL_SIZE_POW2) ";\n" " movsxd rcx, dword ptr [" STRING(LOOKUP_TABLE_ADDR) " + rcx];\n" // lookup table " mov [rsp], rcx;\n" "hug:\n" // " add al, 127;\n" // " sahf;\n" // " mov rax, [rsp - 120];\n" " mov rcx, [rsp - 128];\n", // " ret;\n", // XXX: ret is replaced by the original inst, see below text_addr + text_size, text_addr); z_binary_insert_shadow_code(r->binary, ks_encode, ks_size); shadow_addr += ks_size; } // XXX: we respect the original encoding of inst, to support `ret n` // XXX: we keep the bnp prefix here if present. Note that we have to place // endbr64 instruction at a suitable position since it is always possible // for the control flow to jump from the original code (w/ an endbr64 // prefix) z_binary_insert_shadow_code(r->binary, inst->bytes, inst->size); } ================================================ FILE: src/sys_optarg.c ================================================ /* * sys_optarg.c * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include "sys_optarg.h" #include "utils.h" SysOptArgs sys_optargs = { .mode = SYSMODE_NONE, .r = { .trace_pc = false, .count_conflict = false, .disable_opt = false, .safe_ret = false, .instrument_early = false, .force_pdisasm = false, .disable_callthrough = false, .force_linear = false, }, .log_level = LOG_INFO, .timeout = SYS_TIMEOUT, .check_execs = SYS_CHECK_EXECS, }; ================================================ FILE: src/sys_optarg.h ================================================ /* * sys_optarg.h * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #ifndef __SYS_OPTARGS_H #define __SYS_OPTARGS_H #include "config.h" /* * Default system options */ #define SYS_TIMEOUT 2000UL #define SYS_CHECK_EXECS 200000 /* * System mode */ typedef enum system_mode_t { SYSMODE_NONE, SYSMODE_DAEMON, SYSMODE_RUN, SYSMODE_PATCH, SYSMODE_DISASM, SYSMODE_VIEW, } SysMode; /* * Rewriting options */ typedef struct rewriting_optargs_t { bool trace_pc; bool count_conflict; bool disable_opt; bool safe_ret; bool instrument_early; bool force_pdisasm; bool disable_callthrough; bool force_linear; // secret option } RewritingOptArgs; typedef struct system_optargs_t { SysMode mode; RewritingOptArgs r; int32_t log_level; uint64_t timeout; uint32_t check_execs; } SysOptArgs; extern SysOptArgs sys_optargs; #endif ================================================ FILE: src/tp_dispatcher.c ================================================ /* * tp_dispatcher.c * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include "tp_dispatcher.h" #include "afl_config.h" #include "utils.h" #include "trampolines/trampolines.h" #define TPD_LOCATE_HOLE(haystack, haystacklen, needle, needlelen, err) \ ({ \ void *p = memmem((void *)(haystack), (size_t)(haystacklen), \ (void *)(needle), (size_t)(needlelen)); \ if (p == NULL) { \ EXITME(err); \ } \ assert(memmem(p + (size_t)(needlelen), \ (size_t)(haystacklen) - (p - (void *)(haystack)) - \ (size_t)(needlelen), \ (void *)(needle), (size_t)(needlelen)) == NULL); \ p; \ }) /* * Create a TPCode */ Z_PRIVATE TPCode *__tp_code_create(size_t size); /* * Destroy a TPcode */ Z_PRIVATE void __tp_code_destroy(TPCode *tpc); /* * Emit TPCode */ Z_PRIVATE const uint8_t *__tp_code_emit(TPCode *tpc, uint32_t id, size_t *size_ptr); /* * Append raw code into TPCode */ Z_PRIVATE void __tp_code_append_raw(TPCode *tpc, const uint8_t *buf, size_t size); /* * Locate holes in TPCode */ Z_PRIVATE void __tp_code_locate_holes(TPCode *tpc, uint32_t id_hole, uint32_t shr_id_hole); Z_PRIVATE void __tp_code_destroy(TPCode *tpc) { z_free(tpc->code); z_free(tpc); } Z_PRIVATE TPCode *__tp_code_create(size_t size) { TPCode *tpc = z_alloc(1, sizeof(TPCode)); tpc->code = z_alloc(size, sizeof(uint8_t)); tpc->len = 0; tpc->capacity = size; tpc->id_hole = tpc->shr_id_hole = NULL; return tpc; } Z_PRIVATE void __tp_code_locate_holes(TPCode *tpc, uint32_t id_hole, uint32_t shr_id_hole) { tpc->id_hole = (uint32_t *)TPD_LOCATE_HOLE( tpc->code, tpc->len, &id_hole, sizeof(id_hole), "missing id hole"); tpc->shr_id_hole = (uint32_t *)TPD_LOCATE_HOLE(tpc->code, tpc->len, &shr_id_hole, sizeof(shr_id_hole), "missing shr id hole"); } Z_PRIVATE void __tp_code_append_raw(TPCode *tpc, const uint8_t *buf, size_t size) { if (tpc->len + size > tpc->capacity) { EXITME("TPCode execceds its total capacity"); } memcpy(tpc->code + tpc->len, buf, size); tpc->len += size; } Z_PRIVATE const uint8_t *__tp_code_emit(TPCode *tpc, uint32_t id, size_t *size_ptr) { assert(id < AFL_MAP_SIZE); *(tpc->id_hole) = (id); *(tpc->shr_id_hole) = ((id) >> 1); *(size_ptr) = tpc->len; return tpc->code; } Z_API void z_tp_dispatcher_destroy(TPDispatcher *tpd) { __tp_code_destroy(tpd->bitmap); #define __DESTROY_TPCODE_FOR_REG(REG) __tp_code_destroy(tpd->bitmap_##REG) CAPSTONE_FORALL_GPR(__DESTROY_TPCODE_FOR_REG); #undef __DESTROY_TPCODE_FOR_REG z_free(tpd); } Z_API TPDispatcher *z_tp_dispatcher_create() { TPDispatcher *tpd = STRUCT_ALLOC(TPDispatcher); /* * Context Save */ tpd->context_save = context_save_bin; tpd->context_save_len = context_save_bin_len; /* * Context Restore */ tpd->context_restore = context_restore_bin; tpd->context_restore_len = context_restore_bin_len; /* * Register bitmap */ #define __GENERATE_TPCODE_FOR_REG(REG) \ do { \ tpd->bitmap_##REG = \ __tp_code_create(__BITMAP_##REG##_END - __BITMAP_##REG); \ __tp_code_append_raw(tpd->bitmap_##REG, bitmap_bin + __BITMAP_##REG, \ __BITMAP_##REG##_END - __BITMAP_##REG); \ __tp_code_locate_holes(tpd->bitmap_##REG, bitmap_id_hole, \ bitmap_shr_id_hole); \ } while (0) CAPSTONE_FORALL_GPR(__GENERATE_TPCODE_FOR_REG); #undef __GENERATE_TPCODE_FOR_REG /* * Bitmap (w/ push and pop GPR): we choose RDI here */ tpd->bitmap = __tp_code_create(tpd->bitmap_RDI->len + 0x10); // 'push rdi' KS_ASM(INVALID_ADDR, "mov [rsp - 152], rdi"); __tp_code_append_raw(tpd->bitmap, ks_encode, ks_size); // rdi bitmap __tp_code_append_raw(tpd->bitmap, tpd->bitmap_RDI->code, tpd->bitmap_RDI->len); // 'pop rdi' KS_ASM(INVALID_ADDR, "mov rdi, [rsp - 152]"); __tp_code_append_raw(tpd->bitmap, ks_encode, ks_size); // find holes __tp_code_locate_holes(tpd->bitmap, bitmap_id_hole, bitmap_shr_id_hole); return tpd; } Z_API const uint8_t *z_tp_dispatcher_emit_context_save(TPDispatcher *tpd, size_t *size) { *size = tpd->context_save_len; return (const uint8_t *)tpd->context_save; } Z_API const uint8_t *z_tp_dispatcher_emit_context_restore(TPDispatcher *tpd, size_t *size) { *size = tpd->context_restore_len; return (const uint8_t *)tpd->context_restore; } Z_API const uint8_t *z_tp_dispatcher_emit_bitmap(TPDispatcher *tpd, size_t *size, addr_t addr, GPRState state) { #define __EMIT_TP_FOR_REG(REG) \ do { \ if (state & GPRSTATE_##REG) { \ return __tp_code_emit(tpd->bitmap_##REG, AFL_BB_ID(addr), size); \ } \ } while (0) CAPSTONE_FORALL_GPR(__EMIT_TP_FOR_REG); #undef __EMIT_TP_FOR_REG return __tp_code_emit(tpd->bitmap, AFL_BB_ID(addr), size); } ================================================ FILE: src/tp_dispatcher.h ================================================ /* * tp_dispatcher.h * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #ifndef __TP_DISPATCHER_H #define __TP_DISPATCHER_H #include "capstone_.h" #include "config.h" // XXX: we avoid using Buffer as raw pointer can be faster. Note that TP_EMIT // will be invocated during fuzzing. typedef struct tp_code_t { uint8_t *code; size_t len; size_t capacity; uint32_t *id_hole; uint32_t *shr_id_hole; } TPCode; STRUCT(TPDispatcher, { uint8_t *context_save; size_t context_save_len; uint8_t *context_restore; size_t context_restore_len; TPCode *bitmap; TPCode *bitmap_RAX; TPCode *bitmap_RBX; TPCode *bitmap_RCX; TPCode *bitmap_RDX; TPCode *bitmap_RBP; TPCode *bitmap_RDI; TPCode *bitmap_RSI; TPCode *bitmap_R8; TPCode *bitmap_R9; TPCode *bitmap_R10; TPCode *bitmap_R11; TPCode *bitmap_R12; TPCode *bitmap_R13; TPCode *bitmap_R14; TPCode *bitmap_R15; }); /* * Create a tp_dispatcher. */ Z_API TPDispatcher *z_tp_dispatcher_create(); /* * Destroy a tp_dispatcher. */ Z_API void z_tp_dispatcher_destroy(TPDispatcher *tpd); /* * Emit a Context Saving TP */ Z_API const uint8_t *z_tp_dispatcher_emit_context_save(TPDispatcher *tpd, size_t *size); /* * Emit a Context Restoring TP */ Z_API const uint8_t *z_tp_dispatcher_emit_context_restore(TPDispatcher *tpd, size_t *size); /* * Emit a bitmap TP */ Z_API const uint8_t *z_tp_dispatcher_emit_bitmap(TPDispatcher *tpd, size_t *size, addr_t addr, GPRState state); #endif ================================================ FILE: src/trampolines/Makefile ================================================ SIGSTKSZ = $(shell ../get_signal_stack_size.sh) all: bitmap context_save context_restore bitmap: $(CC) -Wall -fno-stack-protector -fpie -Os -c bitmap.c -DSIGNAL_STACK_SIZE=$(SIGSTKSZ) $(CC) -nostdlib -o bitmap.out bitmap.o -Wl,--entry=_entry objcopy --dump-section .text=bitmap.bin bitmap.out xxd -i bitmap.bin > bitmap_bin.c readelf -s bitmap.o | grep __BITMAP_ | awk '{print "const size_t " $$8 " = 0x" $$2 ";"}' >> bitmap_bin.c echo "const unsigned int bitmap_id_hole = 0xDEAD;" >> bitmap_bin.c echo "const unsigned int bitmap_shr_id_hole = 0xBEEF;" >> bitmap_bin.c context_save: $(CC) -Wall -fno-stack-protector -fpie -Os -c context_save.c -DSIGNAL_STACK_SIZE=$(SIGSTKSZ) $(CC) -nostdlib -o context_save.out context_save.o -Wl,--entry=_entry objcopy --dump-section .text=context_save.bin context_save.out xxd -i context_save.bin > context_save_bin.c context_restore: $(CC) -Wall -fno-stack-protector -fpie -Os -c context_restore.c -DSIGNAL_STACK_SIZE=$(SIGSTKSZ) $(CC) -nostdlib -o context_restore.out context_restore.o -Wl,--entry=_entry objcopy --dump-section .text=context_restore.bin context_restore.out xxd -i context_restore.bin > context_restore_bin.c clean: rm -rf *.o *_bin.c *.bin *.out SOURCES += bitmap.c context_save.c context_restore.c HDEADERS += trampolines.h format: clang-format -sort-includes -style=file -i $(SOURCES) clang-format -sort-includes -style=file -i $(HDEADERS) ================================================ FILE: src/trampolines/bitmap.c ================================================ /* * bitmap.c * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include "../afl_config.h" #define __BITMAP_FOR_REG(REG) \ /****************************************************************/ \ /* set symbol name */ \ ".globl __BITMAP_" STRING(REG) "\n" \ ".type __BITMAP_" STRING(REG) ",@function\n" \ "__BITMAP_" STRING(REG)":\n" \ /* get prev_id */ \ "\tmov " STRING(REG) ", [" STRING(AFL_PREV_ID_PTR) "];\n" \ /* inc bitmap */ \ "\txor " STRING(REG) ", 0xDEAD;\n" \ "\tinc BYTE PTR [" STRING(AFL_MAP_ADDR) " + " STRING(REG) "];\n" \ /* update prev_id */ \ "\tmov QWORD PTR [" STRING(AFL_PREV_ID_PTR) "], 0xBEEF;\n" \ /* set symbol end */ \ ".globl __BITMAP_" STRING(REG) "_END\n" \ ".type __BITMAP_" STRING(REG) "_END,@function\n" \ "__BITMAP_" STRING(REG)"_END:\n" \ /****************************************************************/ asm(".intel_syntax noprefix\n" ".globl _entry\n" ".type _entry,@function\n" "_entry:\n" __BITMAP_FOR_REG(RAX) // FORCE NEWLINE __BITMAP_FOR_REG(RBX) // FORCE NEWLINE __BITMAP_FOR_REG(RCX) // FORCE NEWLINE __BITMAP_FOR_REG(RDX) // FORCE NEWLINE __BITMAP_FOR_REG(RDI) // FORCE NEWLINE __BITMAP_FOR_REG(RSI) // FORCE NEWLINE __BITMAP_FOR_REG(RBP) // FORCE NEWLINE __BITMAP_FOR_REG(R8) // FORCE NEWLINE __BITMAP_FOR_REG(R9) // FORCE NEWLINE __BITMAP_FOR_REG(R10) // FORCE NEWLINE __BITMAP_FOR_REG(R11) // FORCE NEWLINE __BITMAP_FOR_REG(R12) // FORCE NEWLINE __BITMAP_FOR_REG(R13) // FORCE NEWLINE __BITMAP_FOR_REG(R14) // FORCE NEWLINE __BITMAP_FOR_REG(R15) // FORCE NEWLINE ); #undef __BITMAP_FOR_REG ================================================ FILE: src/trampolines/context_restore.c ================================================ /* * context_restor.c * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ /* * COPY FROM AFL * * - popf is *awfully* slow, which is why we're doing the lahf / sahf + * overflow test trick. Unfortunately, this forces us to taint eax / rax, but * this dependency on a commonly-used register still beats the alternative of * using pushf / popf. * * One possible optimization is to avoid touching flags by using a circular * buffer that stores just a sequence of current locations, with the XOR stuff * happening offline. Alas, this doesn't seem to have a huge impact: * * https://groups.google.com/d/msg/afl-users/MsajVf4fRLo/2u6t88ntUBIJ */ /* * IT SEEMS PUSH/POP generate register is a little bit faster than MOV RSP */ asm(".intel_syntax noprefix\n" ".globl _entry\n" ".type _entry,@function\n" "_entry:\n" // restore EFLAGS "\tadd al, 127;\n" "\tsahf;\n" // restore rdi and rax "\tmov rax, [rsp - 144];\n"); ================================================ FILE: src/trampolines/context_save.c ================================================ /* * context_save.c * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ /* * COPY FROM AFL * * - popf is *awfully* slow, which is why we're doing the lahf / sahf + * overflow test trick. Unfortunately, this forces us to taint eax / rax, but * this dependency on a commonly-used register still beats the alternative of * using pushf / popf. * * One possible optimization is to avoid touching flags by using a circular * buffer that stores just a sequence of current locations, with the XOR stuff * happening offline. Alas, this doesn't seem to have a huge impact: * * https://groups.google.com/d/msg/afl-users/MsajVf4fRLo/2u6t88ntUBIJ */ /* * IT SEEMS PUSH/POP generate register is a little bit faster than MOV RSP */ asm(".intel_syntax noprefix\n" ".globl _entry\n" ".type _entry,@function\n" "_entry:\n" // store rdi and rax "\tmov [rsp - 144], rax;\n" // store EFLAGS "\tlahf;\n" "\tseto al;\n"); ================================================ FILE: src/trampolines/trampolines.h ================================================ /* * trampolines.h * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #ifndef __TRAMPOLINES_H #define __TRAMPOLINES_H #include "bitmap_bin.c" #include "context_restore_bin.c" #include "context_save_bin.c" #define BITMAP_REG X86_INS_RDI #endif ================================================ FILE: src/ucfg_analyzer.c ================================================ /* * ucfg_analyzer.c * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include "ucfg_analyzer.h" #include "elf_.h" #include "iterator.h" #include "library_functions/library_functions.h" #include "utils.h" // XXX: there are three types of UCFG edges: // DIRECT_UEDGE : call edges to the callee // INTRA_UEDGE : call-fallthrough edges // DIRECT_UEDGE | INTRA_UEDGE: other control flow edges typedef enum ucfg_edge_t { DIRECT_UEDGE = (1 << 0), INTRA_UEDGE = (1 << 1), } UEdge; #define __UCFG_ANALYZER_GHASHTABLE_GET_BUFFER(t, k) \ ({ \ Buffer *buf = (Buffer *)g_hash_table_lookup(t, k); \ if (!buf) { \ buf = z_buffer_create(NULL, 0); \ g_hash_table_insert(t, k, (gpointer)buf); \ } \ buf; \ }) /* * Initial analysis for each instruction (calculate direct successors and * predecessors) */ Z_PRIVATE void __ucfg_analyzer_init_analyze(UCFG_Analyzer *a, addr_t addr, const cs_insn *inst); /* * Advanced analysis for each instruction (gpr & flg's use-def) */ Z_PRIVATE void __ucfg_analyzer_advance_analyze(UCFG_Analyzer *a, addr_t addr, const cs_insn *inst); /* * Use-def analysis for eflag reigster */ Z_PRIVATE void __ucfg_analyzer_analyze_flg(UCFG_Analyzer *a, addr_t addr, const cs_insn *inst); /* * Use-def analysis for general purpose register */ Z_PRIVATE void __ucfg_analyzer_analyze_gpr(UCFG_Analyzer *a, addr_t addr, const cs_insn *inst); /* * Returning / non-returning functions analysis: whether a given inst (at addr) * can reach a RET instruction via intra-procedure edges */ Z_PRIVATE void __ucfg_analyzer_analyze_ret(UCFG_Analyzer *a, addr_t addr, const cs_insn *inst); /* * Reachability analysis for security check failed functions: whether a given * inst (at addr) can reach a security-chk-failed PLT call without any condition * and indirect edge */ Z_PRIVATE void __ucfg_analyzer_analyze_sec_chk(UCFG_Analyzer *a, addr_t addr, const cs_insn *inst); /* * Add predecessor and successor relation */ Z_PRIVATE void __ucfg_analyzer_new_pred_and_succ(UCFG_Analyzer *a, addr_t src_addr, addr_t dst_addr, UEdge edge); /* * Check whether two instructions are consistent, so that simply replacing one * with another one will not influence current analysis result */ Z_PRIVATE bool __ucfg_analyzer_check_consistent(const cs_insn *inst_alice, const cs_insn *inst_bob); Z_PRIVATE void __ucfg_analyzer_analyze_sec_chk(UCFG_Analyzer *a, addr_t addr, const cs_insn *inst) { // this addr cannot be in a->sec_chk_failed now assert(!g_hash_table_lookup(a->sec_chk_failed, GSIZE_TO_POINTER(addr))); ELF *e = z_binary_get_elf(a->binary); GQueue *queue = g_queue_new(); // queue for back trace // step (1). check whether current address is a sec_check_failed function. // Any other call wouldbe invalid. if (z_capstone_is_call(inst)) { const cs_detail *detail = inst->detail; if (detail->x86.op_count != 1) { return; } const cs_x86_op *op = &(detail->x86.operands[0]); if (op->type != X86_OP_IMM) { return; } const addr_t callee_addr = op->imm; const LFuncInfo *callee_info = z_elf_get_plt_info(e, callee_addr); if (!callee_info) { return; } // see https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling for // C++ mangling rules if ((z_strstr(callee_info->name, "__asan_report")) || (z_strstr(callee_info->name, "__stack_chk_fail")) || (z_strncmp(callee_info->name, "_Z", 2) && z_strstr(callee_info->name, "__asan") && z_strstr(callee_info->name, "Report"))) { // it is a sec_chk_failed PLT call z_trace("find a sec_chk_failed instruction: %#lx", addr); g_hash_table_add(a->sec_chk_failed, GSIZE_TO_POINTER(addr)); g_queue_push_tail(queue, GSIZE_TO_POINTER(addr)); } } else if (z_capstone_is_cjmp(inst) || z_capstone_is_loop(inst) || z_capstone_is_xbegin(inst) || z_capstone_is_ret(inst)) { // these instructions cannot belong to a sec_chk_failed block return; } else { Buffer *succ_addrs = z_ucfg_analyzer_get_intra_successors(a, addr); size_t succ_n = z_buffer_get_size(succ_addrs) / sizeof(addr_t); if (succ_n != 1) { // we only consider those instructions with only one successor return; } addr_t succ_addr = *((addr_t *)z_buffer_get_raw_buf(succ_addrs)); if (!g_hash_table_lookup(a->sec_chk_failed, GSIZE_TO_POINTER(succ_addr))) { return; } // it belongs to a sec_chk_failed block z_trace("find a sec_chk_failed instruction: %#lx", addr); g_hash_table_add(a->sec_chk_failed, GSIZE_TO_POINTER(addr)); g_queue_push_tail(queue, GSIZE_TO_POINTER(addr)); } // step (2). check all the possible predecessors while (!g_queue_is_empty(queue)) { addr_t cur_addr = (addr_t)g_queue_pop_head(queue); assert( g_hash_table_lookup(a->sec_chk_failed, GSIZE_TO_POINTER(cur_addr))); Buffer *pred_addrs_buf = z_ucfg_analyzer_get_intra_predecessors(a, cur_addr); size_t pred_n = z_buffer_get_size(pred_addrs_buf) / sizeof(addr_t); addr_t *pred_addrs = (addr_t *)z_buffer_get_raw_buf(pred_addrs_buf); for (int i = 0; i < pred_n; i++) { addr_t pred_addr = pred_addrs[i]; const cs_insn *pred_inst = (const cs_insn *)g_hash_table_lookup( a->insts, GSIZE_TO_POINTER(pred_addr)); // pred_inst cannot be NULL assert(pred_inst); // step (2.1). check the type of pred_inst if (z_capstone_is_call(pred_inst) || z_capstone_is_cjmp(pred_inst) || z_capstone_is_xbegin(pred_inst) || z_capstone_is_ret(pred_inst) || z_capstone_is_loop(pred_inst)) { continue; } // step (2.2). check the number of succ size_t succ_n = z_buffer_get_size( z_ucfg_analyzer_get_intra_successors(a, pred_addr)) / sizeof(addr_t); if (succ_n != 1) { continue; } // step (2.3). add into queue if not find before if (!g_hash_table_lookup(a->sec_chk_failed, GSIZE_TO_POINTER(pred_addr))) { z_trace("find a sec_chk_failed instruction: %#lx", pred_addr); g_hash_table_add(a->sec_chk_failed, GSIZE_TO_POINTER(pred_addr)); g_queue_push_tail(queue, GSIZE_TO_POINTER(pred_addr)); } } } } Z_PRIVATE void __ucfg_analyzer_analyze_ret(UCFG_Analyzer *a, addr_t addr, const cs_insn *inst) { if (a->opts->disable_callthrough) { return; } // this addr cannot be in a->can_ret now assert(!g_hash_table_lookup(a->can_ret, GSIZE_TO_POINTER(addr))); // step (1). add intra-procedure edges if inst is calling a returning // function if (z_capstone_is_call(inst)) { Buffer *succ_buf = z_ucfg_analyzer_get_intra_successors(a, addr); if (!z_buffer_get_size(succ_buf)) { // XXX: no intra-procedure successor found cs_detail *detail = inst->detail; if ((detail->x86.op_count == 1) && (detail->x86.operands[0].type == X86_OP_IMM)) { addr_t callee_addr = detail->x86.operands[0].imm; if (callee_addr != addr + inst->size && g_hash_table_lookup(a->can_ret, GSIZE_TO_POINTER(callee_addr))) { // XXX: avoid duplicated edges z_trace("call-fallthrough: %#lx -> %#lx", addr, addr + inst->size); __ucfg_analyzer_new_pred_and_succ( a, addr, addr + inst->size, INTRA_UEDGE); if (z_unlikely(!z_buffer_get_size(succ_buf))) { EXITME("invalid intra-procedure successors"); } } } } } // step (2). check whether current address is returnable GQueue *queue = g_queue_new(); // queue for back trace { if (z_capstone_is_ret(inst)) { // it is a RET instruction g_hash_table_add(a->can_ret, GSIZE_TO_POINTER(addr)); g_queue_push_tail(queue, GSIZE_TO_POINTER(addr)); } else { // this is all intra-procedure success Iter(addr_t, intra_succs); z_iter_init_from_buf(intra_succs, z_ucfg_analyzer_get_intra_successors(a, addr)); // other instructions while (!z_iter_is_empty(intra_succs)) { addr_t succ_addr = *(z_iter_next(intra_succs)); if (g_hash_table_lookup(a->can_ret, GSIZE_TO_POINTER(succ_addr))) { g_hash_table_add(a->can_ret, GSIZE_TO_POINTER(addr)); g_queue_push_tail(queue, GSIZE_TO_POINTER(addr)); break; } } z_iter_destroy(intra_succs); } } // step (3). update all predecessors while (!g_queue_is_empty(queue)) { addr_t cur_addr = (addr_t)g_queue_pop_head(queue); z_trace("find returanable address: %#lx", cur_addr); assert(g_hash_table_lookup(a->can_ret, GSIZE_TO_POINTER(cur_addr))); // step (3.1). first update calls if cur_addr is a function entrypoint Iter(addr_t, direct_preds); z_iter_init_from_buf( direct_preds, z_ucfg_analyzer_get_direct_predecessors(a, cur_addr)); while (!z_iter_is_empty(direct_preds)) { addr_t pred_addr = *(z_iter_next(direct_preds)); const cs_insn *pred_inst = (const cs_insn *)g_hash_table_lookup( a->insts, GSIZE_TO_POINTER(pred_addr)); // pred_inst cannot be NULL assert(pred_inst); if (!z_capstone_is_call(pred_inst)) { continue; } addr_t call_addr = pred_addr; addr_t fallthrough_addr = call_addr + pred_inst->size; if (fallthrough_addr == cur_addr) { // XXX: avoid duplicated edges continue; } if (z_buffer_get_size( z_ucfg_analyzer_get_intra_successors(a, call_addr))) { continue; } __ucfg_analyzer_new_pred_and_succ(a, call_addr, fallthrough_addr, INTRA_UEDGE); if (g_hash_table_lookup(a->can_ret, GSIZE_TO_POINTER(fallthrough_addr))) { z_trace("call-fallthrough: %#lx -> %#lx", call_addr, fallthrough_addr); g_hash_table_add(a->can_ret, GSIZE_TO_POINTER(call_addr)); g_queue_push_tail(queue, GSIZE_TO_POINTER(call_addr)); } } z_iter_destroy(direct_preds); // step (3.2) update all intra-procedure predecessors Iter(addr_t, intra_preds); z_iter_init_from_buf( intra_preds, z_ucfg_analyzer_get_intra_predecessors(a, cur_addr)); while (!z_iter_is_empty(intra_preds)) { addr_t pred_addr = *(z_iter_next(intra_preds)); if (!g_hash_table_lookup(a->can_ret, GSIZE_TO_POINTER(pred_addr))) { g_hash_table_add(a->can_ret, GSIZE_TO_POINTER(pred_addr)); g_queue_push_tail(queue, GSIZE_TO_POINTER(pred_addr)); } } z_iter_destroy(intra_preds); } // destroy queue g_queue_free(queue); } Z_PRIVATE void __ucfg_analyzer_analyze_gpr(UCFG_Analyzer *a, addr_t addr, const cs_insn *inst) { if (a->opts->disable_opt) { return; } // step (0). check whether addr is analyzed if (g_hash_table_lookup(a->gpr_can_write, GSIZE_TO_POINTER(addr))) { return; } // step (1). update gpr_analyzed_succs { // check addr's succs Buffer *succs = z_ucfg_analyzer_get_direct_successors(a, addr); assert(succs != NULL); size_t succ_n = z_buffer_get_size(succs) / sizeof(addr_t); addr_t *succs_array = (addr_t *)z_buffer_get_raw_buf(succs); size_t analyzed_succ_n = 0; for (int i = 0; i < succ_n; i++) { if (g_hash_table_lookup(a->gpr_can_write, GSIZE_TO_POINTER(succs_array[i]))) { analyzed_succ_n += 1; } } g_hash_table_insert(a->gpr_analyzed_succs, GSIZE_TO_POINTER(addr), GSIZE_TO_POINTER(analyzed_succ_n)); // update addr's direct preds Buffer *preds = z_ucfg_analyzer_get_direct_predecessors(a, addr); assert(preds != NULL); size_t pred_n = z_buffer_get_size(preds) / sizeof(addr_t); addr_t *preds_array = (addr_t *)z_buffer_get_raw_buf(preds); for (int i = 0; i < pred_n; i++) { addr_t pred = preds_array[i]; size_t pred_analyzed_succs = (size_t)g_hash_table_lookup( a->gpr_analyzed_succs, GSIZE_TO_POINTER(pred)); g_hash_table_insert(a->gpr_analyzed_succs, GSIZE_TO_POINTER(pred), GSIZE_TO_POINTER(pred_analyzed_succs + 1)); } } // step (2). push addr into analysis queue GQueue *queue = g_queue_new(); g_queue_push_tail(queue, GSIZE_TO_POINTER(addr)); // step (3). do analysis and propogate the result while (!g_queue_is_empty(queue)) { // step (3.1). pop from queue and get basic information addr_t cur_addr = (addr_t)g_queue_pop_head(queue); Buffer *preds = z_ucfg_analyzer_get_direct_predecessors(a, cur_addr); assert(preds != NULL); size_t pred_n = z_buffer_get_size(preds) / sizeof(addr_t); Buffer *succs = z_ucfg_analyzer_get_direct_successors(a, cur_addr); assert(succs != NULL); size_t succ_n = z_buffer_get_size(succs) / sizeof(addr_t); RegState *rs = (RegState *)g_hash_table_lookup( a->reg_states, GSIZE_TO_POINTER(cur_addr)); // XXX: a good observation is that for a given address, its known // successors must be added before it. And according to the logic of // z_ucfg_analyzer_add_inst, any instruction will be analyzed once it is // added into analyzer. Hence, we can sure any instruction in the queue // is already analyzed (except addr itself). assert(rs != NULL); // step (3.2). calculate succs_can_write size_t analyzed_succ_n = (size_t)g_hash_table_lookup( a->gpr_analyzed_succs, GSIZE_TO_POINTER(cur_addr)); assert(succ_n >= analyzed_succ_n); GPRState succs_can_write = GPRSTATE_ALL + 1; if (succ_n != 0 && succ_n == analyzed_succ_n) { // assume succs_can_write all registers succs_can_write |= GPRSTATE_ALL; // all succs are analyzed addr_t *succs_array = (addr_t *)z_buffer_get_raw_buf(succs); for (int i = 0; i < succ_n; i++) { GPRState succ_can_write = 0; if (cur_addr == succs_array[i]) { // handle self-loop! succ_can_write = GPRSTATE_ALL + 1; } else { succ_can_write = (GPRState)g_hash_table_lookup( a->gpr_can_write, GSIZE_TO_POINTER(succs_array[i])); } assert(succ_can_write); succs_can_write &= succ_can_write; } } // step (3.3). calcualte can_write for cur_addr. // According to datalog disassembly // (https://www.usenix.org/conference/usenixsecurity20/presentation/flores-montoya) // section 5.1, the x64 architecture zeroes the upper part of 64 bits // registers whenever the corresponding 32 bits register is written. GPRState can_write = GPRSTATE_ALL + 1; can_write |= rs->gpr_write_32_64 | succs_can_write; can_write &= (~rs->gpr_read); // step (3.4). update predecessors GPRState ori_can_write = (GPRState)g_hash_table_lookup( a->gpr_can_write, GSIZE_TO_POINTER(cur_addr)); if (ori_can_write != can_write) { assert((uint64_t)can_write > (uint64_t)ori_can_write); addr_t *preds_array = (addr_t *)z_buffer_get_raw_buf(preds); for (int i = 0; i < pred_n; i++) { g_queue_push_tail(queue, GSIZE_TO_POINTER(preds_array[i])); } // update can_write g_hash_table_insert(a->gpr_can_write, GSIZE_TO_POINTER(cur_addr), GSIZE_TO_POINTER(can_write)); } } g_queue_free(queue); return; } Z_PRIVATE void __ucfg_analyzer_analyze_flg(UCFG_Analyzer *a, addr_t addr, const cs_insn *inst) { if (a->opts->disable_opt) { return; } // step (0). check whether addr is analyzed if (g_hash_table_lookup(a->flg_need_write, GSIZE_TO_POINTER(addr))) { return; } GQueue *queue = g_queue_new(); // step (1). check whether it is ready to analyze { Buffer *succs = z_ucfg_analyzer_get_direct_successors(a, addr); assert(succs != NULL); size_t succ_n = z_buffer_get_size(succs) / sizeof(addr_t); addr_t *succs_array = (addr_t *)z_buffer_get_raw_buf(succs); // step (1.1). update flg_finished succs size_t finished_succ_n = 0; for (int i = 0; i < succ_n; i++) { if (g_hash_table_lookup(a->flg_need_write, GSIZE_TO_POINTER(succs_array[i]))) { finished_succ_n += 1; } } g_hash_table_insert(a->flg_finished_succs, GSIZE_TO_POINTER(addr), GSIZE_TO_POINTER(finished_succ_n)); RegState *rs = (RegState *)g_hash_table_lookup(a->reg_states, GSIZE_TO_POINTER(addr)); assert(rs != NULL); // step (1.2). check whether it is ready if (rs->flg_write == FLGSTATE_ALL || rs->flg_read == FLGSTATE_ALL) { // case A: writing/reading all means it is ready to analyze g_queue_push_tail(queue, GSIZE_TO_POINTER(addr)); } else if (z_capstone_is_call(inst) || z_capstone_is_ret(inst)) { // case B: we are trying to do an intra-procedure analysis g_queue_push_tail(queue, GSIZE_TO_POINTER(addr)); } else if (succ_n == 0) { // case C: for instruction without successors, it is ready to // analyze g_queue_push_tail(queue, GSIZE_TO_POINTER(addr)); } else if (succ_n == finished_succ_n) { // case D: all successors are done with analysis (it actually can be // mergied into case C, but for clarity we set it as an individual // case) g_queue_push_tail(queue, GSIZE_TO_POINTER(addr)); } } // step (2). do analysis and propagate the result while (!g_queue_is_empty(queue)) { // step (2.1). pop from queue and set a flag on result (distinguished // from non-existed key) addr_t cur_addr = (addr_t)g_queue_pop_head(queue); const cs_insn *cur_inst = (const cs_insn *)g_hash_table_lookup( a->insts, GSIZE_TO_POINTER(cur_addr)); assert(cur_inst); FLGState need_write = FLGSTATE_ALL + 1; assert(!g_hash_table_lookup(a->flg_need_write, GSIZE_TO_POINTER(cur_addr))); // step (2.2). basic infomration Buffer *preds = z_ucfg_analyzer_get_direct_predecessors(a, cur_addr); assert(preds != NULL); size_t pred_n = z_buffer_get_size(preds) / sizeof(addr_t); Buffer *succs = z_ucfg_analyzer_get_direct_successors(a, cur_addr); assert(succs != NULL); size_t succ_n = z_buffer_get_size(succs) / sizeof(addr_t); RegState *rs = (RegState *)g_hash_table_lookup( a->reg_states, GSIZE_TO_POINTER(cur_addr)); assert(rs != NULL); // step (2.3). calculate need to write if (rs->flg_write == FLGSTATE_ALL) { // case A.1: write all need_write |= 0; } else if (rs->flg_read == FLGSTATE_ALL) { // case A.2: read all need_write |= FLGSTATE_ALL; } else if (z_capstone_is_call(cur_inst) || z_capstone_is_ret(cur_inst)) { // case B: call & ret need_write |= 0; } else if (succ_n == 0) { // case C: no successors need_write |= FLGSTATE_ALL; } else if (succ_n == (size_t)g_hash_table_lookup(a->flg_finished_succs, GSIZE_TO_POINTER(cur_addr))) { FLGState post_need_write = 0; addr_t *succs_array = (addr_t *)z_buffer_get_raw_buf(succs); for (int i = 0; i < succ_n; i++) { FLGState succ_need_write = (FLGState)g_hash_table_lookup( a->flg_need_write, GSIZE_TO_POINTER(succs_array[i])); assert(succ_need_write); post_need_write |= succ_need_write; } need_write |= post_need_write & (FLGSTATE_ALL ^ rs->flg_write); } else { EXITME("incomplete address in analysis: %#lx", cur_addr); } // step (2.4). do not forget flag read by it self need_write |= rs->flg_read; // step (2.5). update need_write g_hash_table_insert(a->flg_need_write, GSIZE_TO_POINTER(cur_addr), GSIZE_TO_POINTER(need_write)); // step (2.6). update predecessors' information addr_t *preds_array = (addr_t *)z_buffer_get_raw_buf(preds); for (int i = 0; i < pred_n; i++) { addr_t pred = preds_array[i]; // it is very important to check whether pred is analyzed if (g_hash_table_lookup(a->flg_need_write, GSIZE_TO_POINTER(pred))) { continue; } size_t pred_finish_succs = (size_t)g_hash_table_lookup( a->flg_finished_succs, GSIZE_TO_POINTER(pred)); pred_finish_succs += 1; g_hash_table_insert(a->flg_finished_succs, GSIZE_TO_POINTER(pred), GSIZE_TO_POINTER(pred_finish_succs)); if (pred_finish_succs == (size_t)(z_buffer_get_size( z_ucfg_analyzer_get_direct_successors(a, pred)) / sizeof(addr_t))) { g_queue_push_tail(queue, GSIZE_TO_POINTER(pred)); } } } g_queue_free(queue); } Z_PRIVATE void __ucfg_analyzer_advance_analyze(UCFG_Analyzer *a, addr_t addr, const cs_insn *inst) { __ucfg_analyzer_analyze_flg(a, addr, inst); __ucfg_analyzer_analyze_gpr(a, addr, inst); __ucfg_analyzer_analyze_ret(a, addr, inst); __ucfg_analyzer_analyze_sec_chk(a, addr, inst); } Z_PRIVATE bool __ucfg_analyzer_check_consistent(const cs_insn *inst_alice, const cs_insn *inst_bob) { // check size if (inst_alice->size != inst_bob->size) { return false; } // control-flow-related instructions always change analysis result { const cs_insn *inst = inst_alice; if (z_capstone_is_jmp(inst) || z_capstone_is_call(inst) || z_capstone_is_xbegin(inst) || z_capstone_is_cjmp(inst) || z_capstone_is_loop(inst) || z_capstone_is_ret(inst) || z_capstone_is_terminator(inst)) { z_trace("CFG related instructions"); return false; } } // first check instruction type if (inst_alice->id != inst_bob->id) { z_trace("inconsistent instruction types"); return false; } cs_detail *detail_alice = inst_alice->detail; cs_detail *detail_bob = inst_bob->detail; // then check operands if (detail_alice->x86.op_count != detail_bob->x86.op_count) { z_trace("inconsistent operand count"); return false; } // check individual operand for (int i = 0; i < detail_alice->x86.op_count; i++) { cs_x86_op *op_alice = &(detail_alice->x86.operands[i]); cs_x86_op *op_bob = &(detail_bob->x86.operands[i]); if (op_alice->type != op_bob->type) { z_trace("inconsisten operand type"); return false; } switch (op_alice->type) { case X86_OP_REG: if (op_alice->reg != op_bob->reg) { z_trace("inconsisten operand register"); return false; } break; case X86_OP_MEM: if (op_alice->mem.segment != op_bob->mem.segment) { z_trace("inconsisten operand mem segment"); return false; } if (op_alice->mem.base != op_bob->mem.base) { z_trace("inconsisten operand mem base"); return false; } if (op_alice->mem.index != op_bob->mem.index) { z_trace("inconsisten operand mem index"); return false; } break; default: break; } } return true; } Z_PRIVATE void __ucfg_analyzer_new_pred_and_succ(UCFG_Analyzer *a, addr_t src_addr, addr_t dst_addr, UEdge edge) { #ifdef DEBUG #define __NEW_RELATION(relation, from_addr, to_addr) \ do { \ Buffer *buf = NULL; \ if (!(buf = g_hash_table_lookup(a->relation, \ GSIZE_TO_POINTER(from_addr)))) { \ buf = z_buffer_create(NULL, 0); \ g_hash_table_insert(a->relation, GSIZE_TO_POINTER(from_addr), \ (gpointer)buf); \ } \ \ addr_t *targets = (addr_t *)z_buffer_get_raw_buf(buf); \ size_t n = z_buffer_get_size(buf) / sizeof(addr_t); \ for (size_t i = 0; i < n; i++) { \ if (targets[i] == (to_addr)) { \ EXITME("duplicated " #relation " for %#lx->%#lx", from_addr, \ to_addr); \ } \ } \ \ z_buffer_append_raw(buf, (uint8_t *)&(to_addr), sizeof(to_addr)); \ } while (0) #else #define __NEW_RELATION(relation, from_addr, to_addr) \ do { \ Buffer *buf = NULL; \ if (!(buf = g_hash_table_lookup(a->relation, \ GSIZE_TO_POINTER(from_addr)))) { \ buf = z_buffer_create(NULL, 0); \ g_hash_table_insert(a->relation, GSIZE_TO_POINTER(from_addr), \ (gpointer)buf); \ } \ z_buffer_append_raw(buf, (uint8_t *)&(to_addr), sizeof(to_addr)); \ } while (0) #endif if (edge & DIRECT_UEDGE) { __NEW_RELATION(direct_succs, src_addr, dst_addr); __NEW_RELATION(direct_preds, dst_addr, src_addr); } if (edge & INTRA_UEDGE) { __NEW_RELATION(intra_succs, src_addr, dst_addr); __NEW_RELATION(intra_preds, dst_addr, src_addr); } __NEW_RELATION(all_succs, src_addr, dst_addr); __NEW_RELATION(all_preds, dst_addr, src_addr); #undef __NEW_RELATION } Z_PRIVATE void __ucfg_analyzer_init_analyze(UCFG_Analyzer *a, addr_t addr, const cs_insn *inst) { assert(inst != NULL); cs_detail *detail = inst->detail; if (z_capstone_is_cjmp(inst) || z_capstone_is_loop(inst)) { assert((detail->x86.op_count == 1) && (detail->x86.operands[0].type == X86_OP_IMM)); // avoid dupilicated succs/preds if (true) { __ucfg_analyzer_new_pred_and_succ(a, addr, addr + inst->size, DIRECT_UEDGE | INTRA_UEDGE); } if (detail->x86.operands[0].imm != addr + inst->size) { __ucfg_analyzer_new_pred_and_succ(a, addr, detail->x86.operands[0].imm, DIRECT_UEDGE | INTRA_UEDGE); } } else if (z_capstone_is_jmp(inst) || z_capstone_is_xbegin(inst)) { if ((detail->x86.op_count == 1) && (detail->x86.operands[0].type == X86_OP_IMM)) { __ucfg_analyzer_new_pred_and_succ(a, addr, detail->x86.operands[0].imm, DIRECT_UEDGE | INTRA_UEDGE); } } else if (z_capstone_is_call(inst)) { ELF *e = z_binary_get_elf(a->binary); if ((detail->x86.op_count == 1) && (detail->x86.operands[0].type == X86_OP_IMM)) { // get callee first addr_t callee_addr = detail->x86.operands[0].imm; // add the inter-procedure edge (the call edge) __ucfg_analyzer_new_pred_and_succ(a, addr, callee_addr, DIRECT_UEDGE); // check plt const LFuncInfo *lf_info = z_elf_get_plt_info(e, callee_addr); if (lf_info && lf_info->cfg_info == LCFG_RET) { if (callee_addr == addr + inst->size) { EXITME("invalid PLT call: " CS_SHOW_INST(inst)); } __ucfg_analyzer_new_pred_and_succ(a, addr, addr + inst->size, INTRA_UEDGE); } } else { // let check GOT call addr_t got_addr = INVALID_ADDR; if (z_capstone_is_pc_related_ucall(inst, &got_addr) || (!z_elf_get_is_pie(e) && z_capstone_is_const_mem_ucall(inst, &got_addr))) { const LFuncInfo *lf_info = z_elf_get_got_info(e, got_addr); if (lf_info && lf_info->cfg_info == LCFG_RET) { __ucfg_analyzer_new_pred_and_succ( a, addr, addr + inst->size, INTRA_UEDGE); } } } } else if (z_capstone_is_terminator(inst)) { // do nothing for terminator } else { __ucfg_analyzer_new_pred_and_succ(a, addr, addr + inst->size, DIRECT_UEDGE | INTRA_UEDGE); } } Z_API UCFG_Analyzer *z_ucfg_analyzer_create(Binary *binary, RewritingOptArgs *opts) { UCFG_Analyzer *a = STRUCT_ALLOC(UCFG_Analyzer); a->binary = binary; a->opts = opts; a->insts = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL); a->reg_states = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, (GDestroyNotify)(&z_free)); a->direct_preds = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, (GDestroyNotify)(&z_buffer_destroy)); a->direct_succs = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, (GDestroyNotify)(&z_buffer_destroy)); a->intra_preds = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, (GDestroyNotify)(&z_buffer_destroy)); a->intra_succs = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, (GDestroyNotify)(&z_buffer_destroy)); a->all_preds = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, (GDestroyNotify)(&z_buffer_destroy)); a->all_succs = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, (GDestroyNotify)(&z_buffer_destroy)); a->flg_finished_succs = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL); a->flg_need_write = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL); a->gpr_analyzed_succs = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL); a->gpr_can_write = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL); a->can_ret = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL); a->sec_chk_failed = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL); return a; } Z_API void z_ucfg_analyzer_destroy(UCFG_Analyzer *a) { g_hash_table_destroy(a->insts); g_hash_table_destroy(a->reg_states); g_hash_table_destroy(a->direct_preds); g_hash_table_destroy(a->direct_succs); g_hash_table_destroy(a->intra_preds); g_hash_table_destroy(a->intra_succs); g_hash_table_destroy(a->all_preds); g_hash_table_destroy(a->all_succs); g_hash_table_destroy(a->flg_finished_succs); g_hash_table_destroy(a->flg_need_write); g_hash_table_destroy(a->gpr_analyzed_succs); g_hash_table_destroy(a->gpr_can_write); g_hash_table_destroy(a->can_ret); g_hash_table_destroy(a->sec_chk_failed); z_free(a); } Z_API void z_ucfg_analyzer_add_inst(UCFG_Analyzer *a, addr_t addr, const cs_insn *inst, bool maybe_duplicated) { assert(a != NULL); if (maybe_duplicated) { cs_insn *ori_inst = (cs_insn *)g_hash_table_lookup(a->insts, GSIZE_TO_POINTER(addr)); if (ori_inst) { if (!__ucfg_analyzer_check_consistent(ori_inst, inst)) { EXITME("inconsistent instruction update " CS_SHOW_INST(inst)); } g_hash_table_insert(a->insts, GSIZE_TO_POINTER(addr), (gpointer)inst); return; } } // update insts assert(!g_hash_table_lookup(a->insts, GSIZE_TO_POINTER(addr))); g_hash_table_insert(a->insts, GSIZE_TO_POINTER(addr), (gpointer)inst); // update register states RegState *rs = z_capstone_get_register_state(inst); g_hash_table_insert(a->reg_states, GSIZE_TO_POINTER(addr), (gpointer)rs); /* * XXX: it is important that following analysis happens in order and * closely. */ // initial analysis __ucfg_analyzer_init_analyze(a, addr, inst); // advanced analysis __ucfg_analyzer_advance_analyze(a, addr, inst); } Z_API Buffer *z_ucfg_analyzer_get_direct_successors(UCFG_Analyzer *a, addr_t addr) { assert(a != NULL); return __UCFG_ANALYZER_GHASHTABLE_GET_BUFFER(a->direct_succs, GSIZE_TO_POINTER(addr)); } Z_API Buffer *z_ucfg_analyzer_get_direct_predecessors(UCFG_Analyzer *a, addr_t addr) { assert(a != NULL); return __UCFG_ANALYZER_GHASHTABLE_GET_BUFFER(a->direct_preds, GSIZE_TO_POINTER(addr)); } Z_API Buffer *z_ucfg_analyzer_get_intra_successors(UCFG_Analyzer *a, addr_t addr) { assert(a != NULL); return __UCFG_ANALYZER_GHASHTABLE_GET_BUFFER(a->intra_succs, GSIZE_TO_POINTER(addr)); } Z_API Buffer *z_ucfg_analyzer_get_intra_predecessors(UCFG_Analyzer *a, addr_t addr) { assert(a != NULL); return __UCFG_ANALYZER_GHASHTABLE_GET_BUFFER(a->intra_preds, GSIZE_TO_POINTER(addr)); } Z_API Buffer *z_ucfg_analyzer_get_all_successors(UCFG_Analyzer *a, addr_t addr) { assert(a != NULL); return __UCFG_ANALYZER_GHASHTABLE_GET_BUFFER(a->all_succs, GSIZE_TO_POINTER(addr)); } Z_API Buffer *z_ucfg_analyzer_get_all_predecessors(UCFG_Analyzer *a, addr_t addr) { assert(a != NULL); return __UCFG_ANALYZER_GHASHTABLE_GET_BUFFER(a->all_preds, GSIZE_TO_POINTER(addr)); } Z_API FLGState z_ucfg_analyzer_get_flg_need_write(UCFG_Analyzer *a, addr_t addr) { FLGState state = (FLGState)g_hash_table_lookup(a->flg_need_write, GSIZE_TO_POINTER(addr)); if (!state) { // there is not enough infomration to analyze this address return FLGSTATE_ALL; } else { return state & FLGSTATE_ALL; } } Z_API GPRState z_ucfg_analyzer_get_gpr_can_write(UCFG_Analyzer *a, addr_t addr) { GPRState state = (GPRState)g_hash_table_lookup(a->gpr_can_write, GSIZE_TO_POINTER(addr)); return state & GPRSTATE_ALL; } Z_API RegState *z_ucfg_analyzer_get_register_state(UCFG_Analyzer *a, addr_t addr) { return (RegState *)g_hash_table_lookup(a->reg_states, GSIZE_TO_POINTER(addr)); } Z_API bool z_ucfg_analyzer_is_security_chk_failed(UCFG_Analyzer *a, addr_t addr) { return !!(g_hash_table_lookup(a->sec_chk_failed, GSIZE_TO_POINTER(addr))); } #undef __UCFG_ANALYZER_GHASHTABLE_GET_BUFFER ================================================ FILE: src/ucfg_analyzer.h ================================================ /* * ucfg_analyzer.h * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #ifndef __UCFG_ANALYZER_H #define __UCFG_ANALYZER_H #include "binary.h" #include "buffer.h" #include "capstone_.h" #include "config.h" #include "sys_optarg.h" #include #include /* * Light-weight instruction-level analyzer, which aims at analyzing conservative * use-def relation on the Universal CFG (UCFG). */ STRUCT(UCFG_Analyzer, { // basic instruction information GHashTable *insts; // register state for each instruction GHashTable *reg_states; /* * successors and predecessor * XXX: note that it is possible to return preds/succs for an invalid * address * * all_preds = direct_preds U intra_preds * all_succs = direct_succs U intra_succs */ // direct/explict successors and predecessors without call-fallthrough edges GHashTable *direct_preds; GHashTable *direct_succs; // intra-procedure successsors and predecessors GHashTable *intra_preds; GHashTable *intra_succs; // successors and predecessors with call-fallthrough edges GHashTable *all_preds; GHashTable *all_succs; // eflags register analysis GHashTable *flg_finished_succs; GHashTable *flg_need_write; // general register analysis GHashTable *gpr_analyzed_succs; GHashTable *gpr_can_write; // whether an inst can reach a RET inst via intra-procedure edges GHashTable *can_ret; // whether an inst can reach a security-chk-failed PLT call without any // condition and indirect edges GHashTable *sec_chk_failed; // rewriting optargs RewritingOptArgs *opts; Binary *binary; }); /* * Create an ucfg_analyzer */ Z_API UCFG_Analyzer *z_ucfg_analyzer_create(Binary *binary, RewritingOptArgs *opts); /* * Destroy an ucfg_analyzer */ Z_API void z_ucfg_analyzer_destroy(UCFG_Analyzer *a); /* * Add a new instruction into analyzing buffer, *maybe_duplicated* means it is * possible that UCFG_Analyzer already analyzes this address */ // XXX: note that it is ok if the predecessors of addr is unknown, which means // it is safe to use this function even the superset disassembly is incomplete. Z_API void z_ucfg_analyzer_add_inst(UCFG_Analyzer *a, addr_t addr, const cs_insn *inst, bool maybe_duplicated); /* * Get succerrors without the call-fallthrough edges (return value will never be * NULL) */ Z_API Buffer *z_ucfg_analyzer_get_direct_successors(UCFG_Analyzer *a, addr_t addr); /* * Get predecessor without the call-fallthrough edges (return value will never * be NULL) */ Z_API Buffer *z_ucfg_analyzer_get_direct_predecessors(UCFG_Analyzer *a, addr_t addr); /* * Get intra-procedure successors */ Z_API Buffer *z_ucfg_analyzer_get_intra_successors(UCFG_Analyzer *a, addr_t addr); /* * Get intra-procedure predecessors */ Z_API Buffer *z_ucfg_analyzer_get_intra_predecessors(UCFG_Analyzer *a, addr_t addr); /* * Get all successors */ Z_API Buffer *z_ucfg_analyzer_get_all_successors(UCFG_Analyzer *a, addr_t addr); /* * Get all predecessors */ Z_API Buffer *z_ucfg_analyzer_get_all_predecessors(UCFG_Analyzer *a, addr_t addr); /* * Get *need-write* information for flag registers */ Z_API FLGState z_ucfg_analyzer_get_flg_need_write(UCFG_Analyzer *a, addr_t addr); /* * Get *can_write* information for general purpose registers */ Z_API GPRState z_ucfg_analyzer_get_gpr_can_write(UCFG_Analyzer *a, addr_t addr); /* * Get register state for a given addr */ Z_API RegState *z_ucfg_analyzer_get_register_state(UCFG_Analyzer *a, addr_t addr); /* * Get whether an instruction belongs to a security_chk_failed block */ Z_API bool z_ucfg_analyzer_is_security_chk_failed(UCFG_Analyzer *a, addr_t addr); #endif ================================================ FILE: src/utils.c ================================================ /* * utils.c * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include "utils.h" #include #include #include /* * Lookup table function */ #define __INVALID_LOOKUP_TABLE_CELL_NUM ((uint64_t)(-1L)) static uint64_t __lookup_table_cell_num = __INVALID_LOOKUP_TABLE_CELL_NUM; void z_lookup_table_init_cell_num(uint64_t text_size) { if (__lookup_table_cell_num != __INVALID_LOOKUP_TABLE_CELL_NUM) { EXITME("duplicated initization for lookup table cell number"); } __lookup_table_cell_num = BITS_ALIGN_CELL(text_size, PAGE_SIZE_POW2); if (__lookup_table_cell_num > LOOKUP_TABLE_MAX_CELL_NUM) { EXITME("too big cell number: %#lx", __lookup_table_cell_num); } z_info("cell number of lookup table: %#lx", __lookup_table_cell_num); } uint64_t z_lookup_table_get_cell_num() { if (__lookup_table_cell_num == __INVALID_LOOKUP_TABLE_CELL_NUM) { EXITME("non-initizated lookup table cell number"); } return __lookup_table_cell_num; } #undef __INVALID_LOOKUP_TABLE_CELL_NUM /* * Log session */ static const char *level_names[] = {"TRACE", "DEBUG", "INFO", "WARN", "ERROR", "FATAL"}; static const char *level_colors[] = {COLOR_PURPLE, COLOR_CYAN, COLOR_GREEN, COLOR_YELLOW, COLOR_RED, COLOR_MAGENTA}; static int log_level = 0; Z_API void z_log_set_level(int level) { log_level = level; } Z_API void z_log(int level, const char *file, int line, const char *fmt, ...) { if (level < log_level) { return; } time_t t = time(NULL); struct tm *lt = localtime(&t); va_list args; char buf[16]; buf[strftime(buf, sizeof(buf), "%H:%M:%S", lt)] = '\0'; fprintf(stderr, "%s %s%-5s" COLOR_RESET " ", buf, level_colors[level], level_names[level]); va_start(args, fmt); vfprintf(stderr, fmt, args); va_end(args); fprintf(stderr, " " COLOR_GRAY ":%s:%d" COLOR_RESET " ", file, line); fprintf(stderr, "\n"); fflush(stderr); } /* * General methods */ static bool is_srand = false; Z_API int z_rand() { if (!is_srand) { srand(time(NULL)); is_srand = true; } return rand(); } Z_API void z_exit(int status) { exit(status); } Z_API FILE *z_fopen(const char *pathname, const char *mode) { FILE *out = fopen(pathname, mode); if (out == NULL) { z_error("fopen: %d (%s)", errno, strerror(errno)); z_exit(errno); } return out; } Z_API void z_fclose(FILE *stream) { if (fclose(stream) != 0) { z_error("fclose: %d (%s)", errno, strerror(errno)); z_exit(errno); } } Z_API void z_fseek(FILE *stream, long offset, int whence) { if (fseek(stream, offset, whence) != 0) { z_error("fseek: %d (%s)", errno, strerror(errno)); z_exit(errno); } } Z_API long z_ftell(FILE *stream) { long out = ftell(stream); if (out == -1) { z_error("ftell: %d (%s)", errno, strerror(errno)); z_exit(errno); } return out; } Z_API size_t z_fread(void *ptr, size_t size, size_t nmemb, FILE *stream) { return fread(ptr, size, nmemb, stream); } Z_API size_t z_fwrite(void *ptr, size_t size, size_t nmemb, FILE *stream) { return fwrite(ptr, size, nmemb, stream); } Z_API int z_chmod(const char *pathname, mode_t mode) { return chmod(pathname, mode); } Z_API int z_access(const char *path, int mode) { return access(path, mode); } Z_API void *z_alloc(size_t nmemb, size_t size) { void *out = calloc(nmemb, size); if (out == NULL) { EXITME("calloc: run out of memory"); } return out; } Z_API void *z_realloc(void *ptr, size_t size) { void *out = realloc(ptr, size); if (out == NULL) { EXITME("realloc: run out of memory"); } return out; } Z_API void z_free(void *ptr) { free(ptr); } /* * String methods */ Z_API char *z_strcat(const char *s1, const char *s2) { char *s = z_alloc(z_strlen(s1) + z_strlen(s2) + 0x10, sizeof(char)); z_strcpy(s, s1); z_strcpy(s + z_strlen(s1), s2); return s; } Z_API char *z_strstr(const char *haystack, const char *needle) { return strstr(haystack, needle); } Z_API char *z_strdup(const char *s) { char *o = strdup(s); if (o == NULL) EXITME("strdup: run out of memory"); return o; } Z_API int z_strcmp(const char *s1, const char *s2) { return strcmp(s1, s2); } Z_API int z_strncmp(const char *s1, const char *s2, size_t n) { return strncmp(s1, s2, n); } Z_API size_t z_strlen(const char *s) { return strlen(s); } Z_API void z_strcpy(char *dst, const char *src) { strcpy(dst, src); } Z_API char *z_strchr(const char *s, int c) { return strchr(s, c); } Z_API char *z_strrchr(const char *s, int c) { return strrchr(s, c); } /* * Keystone */ ks_engine *ks = NULL; size_t ks_count = 0; size_t ks_size = 0; const unsigned char *ks_encode = NULL; unsigned char ks_encode_fast[0x10]; char ks_buf[KS_BUFMAX]; /* * Capstone */ csh cs; size_t cs_count; const cs_insn *cs_inst; /* * TPDispatcher */ TPDispatcher *tp; size_t tp_size; const uint8_t *tp_code; ================================================ FILE: src/utils.h ================================================ /* * utils.h * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #ifndef __UTILS_H #define __UTILS_H #include "afl_config.h" #include "config.h" #include "library_functions/library_functions.h" #include "tp_dispatcher.h" #include #include /* * Color */ #define COLOR_BLACK "\x1b[30m" #define COLOR_RED "\x1b[31m" #define COLOR_GREEN "\x1b[32m" #define COLOR_YELLOW "\x1b[33m" #define COLOR_BLUE "\x1b[34m" #define COLOR_MAGENTA "\x1b[35m" #define COLOR_CYAN "\x1b[36m" #define COLOR_GRAY "\x1b[90m" #define COLOR_PURPLE "\x1b[94m" #define COLOR_BRIGHT "\x1b[1;97m" #define COLOR_RESET "\x1b[0m" #define COLOR(color, str) COLOR_##color str COLOR_RESET /* * Bit aligments */ // floor alignment: // e.g., for 12-bits alignment, 0x1000 -> 0x1000, 0x1001 -> 0x1000 #define BITS_ALIGN_FLOOR(addr, bits) (((addr) >> (bits)) << (bits)) // cell alignment: // e.g., for 12-bits alignment, 0x1000 -> 0x1000, 0x1001 -> 0x2000 #define BITS_ALIGN_CELL(addr, bits) (((((addr)-1) >> (bits)) + 1) << (bits)) /* * Lookup table */ void z_lookup_table_init_cell_num(uint64_t text_size); uint64_t z_lookup_table_get_cell_num(); /* * Log session */ Z_API void z_log(int level, const char *file, int line, const char *fmt, ...); Z_API void z_log_set_level(int level); enum { LOG_TRACE, LOG_DEBUG, LOG_INFO, LOG_WARN, LOG_ERROR, LOG_FATAL }; #ifdef DEBUG #define z_trace(...) z_log(LOG_TRACE, __FILE__, __LINE__, __VA_ARGS__) #define z_debug(...) z_log(LOG_DEBUG, __FILE__, __LINE__, __VA_ARGS__) #define z_info(...) z_log(LOG_INFO, __FILE__, __LINE__, __VA_ARGS__) #define z_warn(...) z_log(LOG_WARN, __FILE__, __LINE__, __VA_ARGS__) #define z_error(...) z_log(LOG_ERROR, __FILE__, __LINE__, __VA_ARGS__) #define z_fatal(...) z_log(LOG_FATAL, __FILE__, __LINE__, __VA_ARGS__) #else #define z_trace(...) #define z_debug(...) #define z_info(...) z_log(LOG_INFO, __FILE__, __LINE__, __VA_ARGS__) #define z_warn(...) z_log(LOG_WARN, __FILE__, __LINE__, __VA_ARGS__) #define z_error(...) z_log(LOG_ERROR, __FILE__, __LINE__, __VA_ARGS__) #define z_fatal(...) z_log(LOG_FATAL, __FILE__, __LINE__, __VA_ARGS__) #endif // print message #define z_sayf(...) fprintf(stderr, __VA_ARGS__) /* * Unreachable */ #define EXITME(...) \ do { \ z_error(__VA_ARGS__); \ z_exit(MY_ERR_CODE); \ } while (0) /* * General methods (wrapper of glibc alloc/file/string function) */ Z_API void z_exit(int status); Z_API FILE *z_fopen(const char *pathname, const char *mode); Z_API void z_fclose(FILE *stream); Z_API void z_fseek(FILE *stream, long offset, int whence); Z_API long z_ftell(FILE *stream); Z_API size_t z_fread(void *ptr, size_t size, size_t nmemb, FILE *stream); Z_API size_t z_fwrite(void *ptr, size_t size, size_t nmemb, FILE *stream); Z_API int z_access(const char *path, int mode); Z_API int z_chmod(const char *pathname, mode_t mode); Z_API void *z_alloc(size_t nmemb, size_t size); Z_API void *z_realloc(void *ptr, size_t size); Z_API void z_free(void *ptr); Z_API int z_rand(); Z_API char *z_strcat(const char *s1, const char *s2); Z_API int z_strcmp(const char *s1, const char *s2); Z_API int z_strncmp(const char *s1, const char *s2, size_t n); Z_API char *z_strstr(const char *haystack, const char *needle); Z_API char *z_strdup(const char *s); Z_API size_t z_strlen(const char *s); Z_API void z_strcpy(char *dst, const char *src); Z_API char *z_strchr(const char *s, int c); Z_API char *z_strrchr(const char *s, int c); #define z_alloc_printf(_str...) \ ({ \ char *_tmp; \ size_t _len = snprintf(NULL, 0, _str); \ if (_len < 0) { \ EXITME("Whoa, snprintf() fails?!"); \ } \ _tmp = z_alloc(_len + 1, sizeof(char)); \ snprintf(_tmp, _len + 1, _str); \ _tmp; \ }) #define z_snprintf(...) snprintf(__VA_ARGS__) #define z_sscanf(...) sscanf(__VA_ARGS__) #define z_likely(x) __builtin_expect(!!(x), 1) #define z_unlikely(x) __builtin_expect(!!(x), 0) /* * Keystone */ #define KS_BUFMAX 0x400 extern ks_engine *ks; extern size_t ks_count; extern size_t ks_size; extern const unsigned char *ks_encode; extern unsigned char ks_encode_fast[0x10]; extern char ks_buf[KS_BUFMAX]; #define KS_INIT \ do { \ if (ks == NULL) { \ if (ks_open(KS_ARCH_X86, KS_MODE_64, &ks) != KS_ERR_OK) { \ EXITME("fail on ks_open()"); \ } \ } \ } while (0) #define KS_FINI \ do { \ if (ks_encode != NULL && ks_encode != ks_encode_fast) { \ ks_free((unsigned char *)ks_encode); \ } \ if (ks != NULL) { \ ks_close(ks); \ } \ } while (0) // for quick assembly #define KS_ASM_CALL(cur_addr, tar_addr) \ do { \ ks_encode_fast[0] = '\xe8'; \ *(int *)(ks_encode_fast + 1) = (tar_addr) - (cur_addr)-5; \ if (ks_encode != NULL && ks_encode != ks_encode_fast) { \ ks_free((unsigned char *)ks_encode); \ } \ ks_size = 5; \ ks_count = 1; \ ks_encode = ks_encode_fast; \ } while (0) #define KS_ASM_JMP(cur_addr, tar_addr) \ do { \ ks_encode_fast[0] = '\xe9'; \ *(int *)(ks_encode_fast + 1) = (tar_addr) - (cur_addr)-5; \ if (ks_encode != NULL && ks_encode != ks_encode_fast) { \ ks_free((unsigned char *)ks_encode); \ } \ ks_size = 5; \ ks_count = 1; \ ks_encode = ks_encode_fast; \ } while (0) // XXX: note that KS_ASM_CONST_MOV can only mov to an address smaller than // 0x7fffffff, and can only store a value smaller than 0x7fffffff #define KS_ASM_CONST_MOV(mem, val) \ do { \ if (ks_encode != NULL && ks_encode != ks_encode_fast) { \ ks_free((unsigned char *)ks_encode); \ } \ long mem_ = (mem); \ long val_ = (val)&0x7FFFFFFF; \ if (mem_ > 0x7FFFFFFF) { \ EXITME("KS_ASM_CONST_MOV stores to a large address: %#lx", mem_); \ } \ memcpy(ks_encode_fast, \ "\x48\xC7\x04\x25\xDD\xDD\xDD\xDD\xFF\xFF\xFF\xFF", 12); \ memcpy(ks_encode_fast + 4, &(mem_), 4); \ memcpy(ks_encode_fast + 8, &(val_), 4); \ ks_size = 12; \ ks_count = 1; \ ks_encode = ks_encode_fast; \ } while (0) #define KS_ASM(addr, ...) \ do { \ if (snprintf(ks_buf, KS_BUFMAX, __VA_ARGS__) >= KS_BUFMAX) { \ EXITME("assembly code is too long:\n%s", ks_buf); \ } \ if (ks_encode != NULL && ks_encode != ks_encode_fast) \ ks_free((unsigned char *)ks_encode); \ if (ks_asm(ks, ks_buf, addr, (unsigned char **)(&ks_encode), &ks_size, \ &ks_count) != KS_ERR_OK) { \ EXITME("fail on ks_asm:\n%s", ks_buf); \ } \ } while (0) /* * Capstone */ extern csh cs; extern size_t cs_count; extern const cs_insn *cs_inst; #define CS_SHOW_INST(i) \ "(%#lx:\t%s %s)", (i)->address, (i)->mnemonic, (i)->op_str #define CS_INVALID_CSH 0 #define CS_DETAIL_ON \ do { \ if (cs_option(cs, CS_OPT_DETAIL, CS_OPT_ON) != CS_ERR_OK) { \ EXITME("fail on cs_option()"); \ } \ } while (0) #define CS_DETAIL_OFF \ do { \ if (cs_option(cs, CS_OPT_DETAIL, CS_OPT_OFF) != CS_ERR_OK) { \ EXITME("fail on cs_option()"); \ } \ } while (0) #define CS_INIT \ do { \ if (cs == CS_INVALID_CSH) { \ if (cs_open(CS_ARCH_X86, CS_MODE_64, &cs) != CS_ERR_OK) { \ EXITME("fail on cs_open()"); \ } \ CS_DETAIL_ON; \ } \ } while (0) #define CS_FINI \ do { \ if (cs_inst != NULL) \ cs_free((cs_insn *)cs_inst, cs_count); \ if (cs != CS_INVALID_CSH) \ cs_close(&cs); \ } while (0) #define CS_DISASM_RAW(ptr, size, addr, count) \ do { \ if (cs_inst != NULL) \ cs_free((cs_insn *)cs_inst, cs_count); \ cs_count = \ cs_disasm(cs, ptr, size, addr, count, (cs_insn **)(&cs_inst)); \ } while (0) #define CS_DISASM(rptr, addr, count) \ do { \ CS_DISASM_RAW((rptr)->raw_ptr, (rptr)->size, addr, count); \ } while (0) /* * TPDispatcher */ extern TPDispatcher *tp; extern size_t tp_size; extern const uint8_t *tp_code; #define TP_INIT \ do { \ if (tp == NULL) \ tp = z_tp_dispatcher_create(); \ } while (0) #define TP_FINI \ do { \ if (tp != NULL) \ z_tp_dispatcher_destroy(tp); \ } while (0) #define TP_EMIT(type, ...) \ do { \ if (tp == NULL) \ TP_INIT; \ tp_code = z_tp_dispatcher_emit_##type(tp, &tp_size, ##__VA_ARGS__); \ } while (0) /* * Library function information */ #define LB_INIT z_libfunc_init() #define LB_FINI z_libfunc_fini() #define LB_QUERY(name) z_libfunc_get_info(name) #define LB_DEFAULT() z_libfunc_default() /* * System */ #define __PRE_CHECK \ do { \ if (AFL_PREV_ID_PTR != RW_PAGE_INFO_ADDR(afl_prev_id)) { \ EXITME("invalid AFL_PREV_ID_PTR value: %#lx v/s %#lx", \ AFL_PREV_ID_PTR, RW_PAGE_INFO_ADDR(afl_prev_id)); \ } \ if (AFL_MAP_SIZE_POW2 > 31) { \ EXITME("the size of AFL's shared memory is too large: %#lx", \ AFL_MAP_SIZE); \ } \ if (RW_PAGE_SIZE < RW_PAGE_USED_SIZE + 0x100) { \ /* XXX: 0x100 is left for utils_output_number when DEBUG */ \ EXITME("use too much space on RW_PAGE: %#lx v/s %#lx", \ RW_PAGE_SIZE, RW_PAGE_USED_SIZE + 0x100); \ } \ if (CRS_MAP_SIZE < CRS_USED_SIZE) { \ EXITME("use too much space on CRS PAGE: %#lx v/s %#lx", \ CRS_MAP_SIZE, CRS_USED_SIZE); \ } \ if (LOOKUP_TABLE_CELL_SIZE_POW2 != 2) { \ EXITME("the element size of lookup table must be dword"); \ } \ if (SIGNAL_STACK_SIZE < MINSIGSTKSZ) { \ EXITME( \ "the size of signal stack is smaller than MINSIGSTKSZ (%#lx)", \ MINSIGSTKSZ); \ } \ } while (0) #define Z_INIT \ do { \ __PRE_CHECK; \ KS_INIT; \ CS_INIT; \ TP_INIT; \ LB_INIT; \ } while (0) #define Z_FINI \ do { \ KS_FINI; \ CS_FINI; \ TP_FINI; \ LB_FINI; \ } while (0) #endif ================================================ FILE: src/x64_utils.c ================================================ /* * x64_utils.c * Copyright (C) 2021 Zhuo Zhang, Xiangyu Zhang * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #ifndef __X64_UTILS_C #define __X64_UTILS_C // XXX: this file is always included into .c file to benifit compiler // optimization Z_PRIVATE const uint8_t *z_x64_gen_nop(size_t n) { static const char *nop_bufs[15] = { "\x90", "\x66\x90", "\x0F\x1F\x00", "\x0F\x1F\x40\x00", "\x0F\x1F\x44\x00\x00", "\x66\x0F\x1F\x44\x00\x00", "\x0F\x1F\x80\x00\x00\x00\x00", "\x0F\x1F\x84\x00\x00\x00\x00\x00", "\x66\x0F\x1F\x84\x00\x00\x00\x00\x00", "\x0F\x1F\x44\x00\x00\x0F\x1F\x44\x00\x00", "\x0F\x1F\x44\x00\x00\x66\x0F\x1F\x44\x00\x00", "\x66\x0F\x1F\x44\x00\x00\x66\x0F\x1F\x44\x00\x00", "\x66\x0F\x1F\x44\x00\x00\x0F\x1F\x80\x00\x00\x00\x00", "\x0F\x1F\x80\x00\x00\x00\x00\x0F\x1F\x80\x00\x00\x00\x00", "\x0F\x1F\x80\x00\x00\x00\x00\x0F\x1F\x84\x00\x00\x00\x00\x00", }; if (n > 15) { EXITME("invalid size for a nop instruction: %d", n); return NULL; } else { return (const uint8_t *)nop_bufs[n - 1]; } } Z_PRIVATE const uint8_t *z_x64_gen_invalid(size_t n) { if (n > 15) { EXITME("invalid size for an invalid instruction: %d", n); return NULL; } else { const char *buf = "\x2F\x2F\x2F\x2F\x2F\x2F\x2F\x2F\x2F\x2F\x2F\x2F\x2F\x2F\x2F\x2F"; return (const uint8_t *)buf; } } #endif ================================================ FILE: test/check_avx512.c ================================================ /* * Check whether current CPU support AVX512. To compile, use following command: * * clang -mavx512f check_avx512.c -o check_avx512 */ #include #include #define BUF_SIZE 0x10000 unsigned char buffer[BUF_SIZE]; int main(int argc, char **argv) { register uintptr_t dst asm("rdi") = (uintptr_t)buffer; register uintptr_t n asm("rcx") = (uintptr_t)BUF_SIZE; asm volatile( ".intel_syntax noprefix\n" " xor rax, rax;\n" " vpbroadcastd zmm16, eax;\n" " lea rax, [rdi + rcx];\n" " sub rdi, rax;\n" "loop:\n" " vmovdqa64 [rax + rdi], zmm16;\n" " add rdi, 0x40;\n" " jnz loop;\n" : : "r"(dst), "r"(n) : "rax", "zmm16", "memory"); return 0; } ================================================ FILE: test/crash.c ================================================ #include #include int main(int argc, const char **argv) { if (argc > 1 && !strcmp(argv[1], "mdzz")) { char *a = NULL; a[1] = 'z'; } } ================================================ FILE: test/ex.smt2 ================================================ ; Boogie universal background predicate ; Copyright (c) 2004-2010, Microsoft Corp. (set-info :category "industrial") (declare-sort |T@U| 0) (declare-sort |T@T| 0) (declare-fun real_pow (Real Real) Real) (declare-fun UOrdering2 (|T@U| |T@U|) Bool) (declare-fun UOrdering3 (|T@T| |T@U| |T@U|) Bool) (declare-fun tickleBool (Bool) Bool) (assert (and (tickleBool true) (tickleBool false))) (declare-fun TV (Int) Bool) (declare-fun TO (Int) Bool) (declare-fun between (Int Int Int) Bool) (declare-fun word (Int) Bool) (declare-fun WORD_HI () Int) (declare-fun NULL () Int) (declare-fun TVM (Int Int) Bool) (declare-fun Mult (Int Int) Int) (declare-fun TVM3 (Int Int Int) Bool) (declare-fun memAddr (Int) Bool) (declare-fun ?memLo () Int) (declare-fun ?memHi () Int) (declare-fun memAddrEx (Int) Bool) (declare-fun TBV ((_ BitVec 32)) Bool) (declare-fun $Aligned ((_ BitVec 32)) Bool) (declare-fun $bbvec4 ((Array Int Int) Int Int (Array Int Int) Int Int Int Int Int) Bool) (declare-fun B (Int) (_ BitVec 32)) (declare-fun I ((_ BitVec 32)) Int) (declare-fun $bb2vec4 ((Array Int Int) Int (Array Int Int) Int Int Int Int Int) Bool) (declare-fun q@and (Int Int) Int) (declare-fun q@or (Int Int) Int) (declare-fun q@xor (Int Int) Int) (declare-fun shl (Int Int) Int) (declare-fun shr (Int Int) Int) (declare-fun neg (Int) Int) (declare-fun Aligned (Int) Bool) (declare-fun %lbl%+2849 () Bool) (declare-fun %lbl%@4150 () Bool) (declare-fun $x () (_ BitVec 32)) (declare-fun %lbl%+4132 () Bool) (assert (forall ((val Int) ) (! (= (TV val) true) :qid |baseibpl.14:28| :skolemid |0| :pattern ( (TV val)) ))) (assert (forall ((wordOffset Int) ) (! (= (TO wordOffset) true) :qid |baseibpl.18:28| :skolemid |1| :pattern ( (TO wordOffset)) ))) (assert (forall ((i1 Int) (i2 Int) (x Int) ) (! (= (between i1 i2 x) (and (<= i1 x) (< x i2))) :qid |baseibpl.25:18| :skolemid |2| :pattern ( (between i1 i2 x)) ))) (assert (forall ((val@@0 Int) ) (! (= (word val@@0) (and (<= 0 val@@0) (< val@@0 WORD_HI))) :qid |baseibpl.30:15| :skolemid |3| :pattern ( (word val@@0)) ))) (assert (= NULL 0)) (assert (forall ((a Int) (b Int) ) (! (= (TVM a b) true) :qid |baseibpl.45:29| :skolemid |4| :pattern ( (TVM a b)) ))) (assert (forall ((a@@0 Int) (b@@0 Int) ) (! (= (Mult a@@0 b@@0) (* a@@0 b@@0)) :qid |baseibpl.47:15| :skolemid |5| :pattern ( (TVM a@@0 b@@0)) ))) (assert (forall ((a@@1 Int) (b1 Int) (b2 Int) ) (! (= (TVM3 a@@1 b1 b2) true) :qid |baseibpl.49:30| :skolemid |6| :pattern ( (TVM3 a@@1 b1 b2)) ))) (assert (forall ((i Int) ) (! (= (memAddr i) (and (<= ?memLo i) (< i ?memHi))) :qid |memoryib.18:18| :skolemid |7| :pattern ( (memAddr i)) ))) (assert (forall ((i@@0 Int) ) (! (= (memAddrEx i@@0) (and (<= ?memLo i@@0) (<= i@@0 ?memHi))) :qid |memoryib.19:20| :skolemid |8| :pattern ( (memAddrEx i@@0)) ))) (assert (forall ((b@@1 (_ BitVec 32)) ) (! (= (TBV b@@1) true) :qid |BitVecto.18:29| :skolemid |9| :pattern ( (TBV b@@1)) ))) (assert (forall ((b@@2 (_ BitVec 32)) ) (! (= ($Aligned b@@2) (= (bvand b@@2 #x00000003) #x00000000)) :qid |BitVecto.12:19| :skolemid |10| :pattern ( ($Aligned b@@2)) ))) (assert (forall ((a@@2 (Array Int Int)) (off Int) (aBase Int) (bb (Array Int Int)) (i0 Int) (i1@@0 Int) (i2@@0 Int) (g1 Int) (g2 Int) ) (! (= ($bbvec4 a@@2 off aBase bb i0 i1@@0 i2@@0 g1 g2) (forall ((i@@1 Int) ) (! (=> (and (TV i@@1) (word (- i@@1 i0)) (<= i1@@0 i@@1) (< i@@1 i2@@0) ($Aligned (B (- i@@1 i0)))) (and (between g1 g2 (+ g1 (* 4 (I (bvlshr (B (- i@@1 i0)) #x00000007))))) (= (= (select a@@2 (+ aBase (- i@@1 i0))) off) (= #x00000000 (bvand (B (select bb (+ g1 (* 4 (I (bvlshr (B (- i@@1 i0)) #x00000007)))))) (bvshl #x00000001 (bvand (bvlshr (B (- i@@1 i0)) #x00000002) #x0000001f))))))) :qid |BitVecto.19:11| :skolemid |11| :pattern ( (TV i@@1)) ))) :qid |BitVecto.17:18| :skolemid |12| :pattern ( ($bbvec4 a@@2 off aBase bb i0 i1@@0 i2@@0 g1 g2)) ))) (assert (forall ((a@@3 (Array Int Int)) (aBase@@0 Int) (bb@@0 (Array Int Int)) (i0@@0 Int) (i1@@1 Int) (i2@@1 Int) (g1@@0 Int) (g2@@0 Int) ) (! (= ($bb2vec4 a@@3 aBase@@0 bb@@0 i0@@0 i1@@1 i2@@1 g1@@0 g2@@0) (forall ((i@@2 Int) ) (! (=> (and (TV i@@2) (word (- i@@2 i0@@0)) (<= i1@@1 i@@2) (< i@@2 i2@@1) ($Aligned (B (- i@@2 i0@@0)))) (and (between g1@@0 g2@@0 (+ g1@@0 (* 4 (I (bvlshr (B (- i@@2 i0@@0)) #x00000006))))) (= (B (select a@@3 (+ aBase@@0 (- i@@2 i0@@0)))) (bvand (bvlshr (B (select bb@@0 (+ g1@@0 (* 4 (I (bvlshr (B (- i@@2 i0@@0)) #x00000006)))))) (bvand (bvlshr (B (- i@@2 i0@@0)) #x00000001) #x0000001f)) #x00000003)))) :qid |BitVecto.28:11| :skolemid |13| :pattern ( (TV i@@2)) ))) :qid |BitVecto.26:19| :skolemid |14| :pattern ( ($bb2vec4 a@@3 aBase@@0 bb@@0 i0@@0 i1@@1 i2@@1 g1@@0 g2@@0)) ))) (assert (= WORD_HI (+ (+ 2147483647 2147483647) 2))) (assert (= (I #x00000001) 1)) (assert (forall ((i1@@2 Int) (i2@@2 Int) ) (! (=> (and (word i1@@2) (word i2@@2)) (= (= i1@@2 i2@@2) (= (B i1@@2) (B i2@@2)))) :qid |BitVecto.9:15| :skolemid |19| :pattern ( (B i1@@2) (B i2@@2)) ))) (assert (forall ((b1@@0 (_ BitVec 32)) (b2@@0 (_ BitVec 32)) ) (! (= (= b1@@0 b2@@0) (= (I b1@@0) (I b2@@0))) :qid |BitVecto.10:15| :skolemid |20| :pattern ( (I b1@@0) (I b2@@0)) ))) (assert (forall ((b@@3 (_ BitVec 32)) ) (! (word (I b@@3)) :qid |BitVecto.12:15| :skolemid |21| :pattern ( (I b@@3)) ))) (assert (forall ((b@@4 (_ BitVec 32)) ) (! (= (B (I b@@4)) b@@4) :qid |BitVecto.13:15| :skolemid |22| :pattern ( (B (I b@@4))) ))) (assert (forall ((i@@3 Int) ) (! (=> (word i@@3) (= (I (B i@@3)) i@@3)) :qid |BitVecto.14:15| :skolemid |23| :pattern ( (I (B i@@3))) ))) (assert (forall ((b1@@1 (_ BitVec 32)) (b2@@1 (_ BitVec 32)) ) (! (=> (word (+ (I b1@@1) (I b2@@1))) (= (+ (I b1@@1) (I b2@@1)) (I (bvadd b1@@1 b2@@1)))) :qid |BitVecto.16:15| :skolemid |24| :pattern ( (bvadd b1@@1 b2@@1)) :pattern ( (TBV b1@@1) (TBV b2@@1)) ))) (assert (forall ((b1@@2 (_ BitVec 32)) (b2@@2 (_ BitVec 32)) ) (! (=> (word (- (I b1@@2) (I b2@@2))) (= (- (I b1@@2) (I b2@@2)) (I (bvsub b1@@2 b2@@2)))) :qid |BitVecto.17:15| :skolemid |25| :pattern ( (bvsub b1@@2 b2@@2)) :pattern ( (TBV b1@@2) (TBV b2@@2)) ))) (assert (forall ((b1@@3 (_ BitVec 32)) (b2@@3 (_ BitVec 32)) ) (! (=> (word (* (I b1@@3) (I b2@@3))) (= (* (I b1@@3) (I b2@@3)) (I (bvmul b1@@3 b2@@3)))) :qid |BitVecto.18:15| :skolemid |26| :pattern ( (bvmul b1@@3 b2@@3)) :pattern ( (TBV b1@@3) (TBV b2@@3)) ))) (assert (forall ((b1@@4 (_ BitVec 32)) (b2@@4 (_ BitVec 32)) ) (! (= (<= (I b1@@4) (I b2@@4)) (bvule b1@@4 b2@@4)) :qid |BitVecto.19:15| :skolemid |27| :pattern ( (bvule b1@@4 b2@@4)) :pattern ( (TBV b1@@4) (TBV b2@@4)) ))) (assert (forall ((i1@@3 Int) (i2@@3 Int) ) (! (= (q@and i1@@3 i2@@3) (I (bvand (B i1@@3) (B i2@@3)))) :qid |BitVecto.20:15| :skolemid |28| :pattern ( (q@and i1@@3 i2@@3)) ))) (assert (forall ((i1@@4 Int) (i2@@4 Int) ) (! (= (q@or i1@@4 i2@@4) (I (bvor (B i1@@4) (B i2@@4)))) :qid |BitVecto.21:15| :skolemid |29| :pattern ( (q@or i1@@4 i2@@4)) ))) (assert (forall ((i1@@5 Int) (i2@@5 Int) ) (! (= (q@xor i1@@5 i2@@5) (I (bvxor (B i1@@5) (B i2@@5)))) :qid |BitVecto.22:15| :skolemid |30| :pattern ( (q@xor i1@@5 i2@@5)) ))) (assert (forall ((i1@@6 Int) (i2@@6 Int) ) (! (= (shl i1@@6 i2@@6) (I (bvshl (B i1@@6) (B i2@@6)))) :qid |BitVecto.23:15| :skolemid |31| :pattern ( (shl i1@@6 i2@@6)) ))) (assert (forall ((i1@@7 Int) (i2@@7 Int) ) (! (= (shr i1@@7 i2@@7) (I (bvlshr (B i1@@7) (B i2@@7)))) :qid |BitVecto.24:15| :skolemid |32| :pattern ( (shr i1@@7 i2@@7)) ))) (assert (forall ((i@@4 Int) ) (! (= (neg i@@4) (I (bvnot (B i@@4)))) :qid |BitVecto.25:15| :skolemid |33| :pattern ( (neg i@@4)) ))) (assert (forall ((b@@5 (_ BitVec 32)) ) (! (= (Aligned (I b@@5)) (= (bvand b@@5 #x00000003) #x00000000)) :qid |BitVecto.27:15| :skolemid |34| :pattern ( (Aligned (I b@@5))) ))) (push 1) (set-info :boogie-vc-id _aligned) (assert (not (let ((anon0_correct (=> (! (and %lbl%+2849 true) :lblpos +2849) (and (! (or %lbl%@4150 ($Aligned (bvmul #x00000004 $x))) :lblneg @4150) (=> ($Aligned (bvmul #x00000004 $x)) true))))) (let ((PreconditionGeneratedEntry_correct (=> (! (and %lbl%+4132 true) :lblpos +4132) anon0_correct))) PreconditionGeneratedEntry_correct)) )) (check-sat) (pop 1) (declare-fun %lbl%+2852 () Bool) (declare-fun %lbl%@4178 () Bool) (declare-fun %lbl%+4172 () Bool) (push 1) (set-info :boogie-vc-id _zeroAligned) (assert (not (let ((anon0_correct@@0 (=> (! (and %lbl%+2852 true) :lblpos +2852) (and (! (or %lbl%@4178 ($Aligned #x00000000)) :lblneg @4178) (=> ($Aligned #x00000000) true))))) (let ((PreconditionGeneratedEntry_correct@@0 (=> (! (and %lbl%+4172 true) :lblpos +4172) anon0_correct@@0))) PreconditionGeneratedEntry_correct@@0)) )) (check-sat) (pop 1) (declare-fun %lbl%+2858 () Bool) (declare-fun %lbl%@4199 () Bool) (declare-fun $x@@0 () (_ BitVec 32)) (declare-fun %lbl%+4183 () Bool) (push 1) (set-info :boogie-vc-id _andAligned) (assert (not (let ((anon0_correct@@1 (=> (! (and %lbl%+2858 true) :lblpos +2858) (and (! (or %lbl%@4199 (= (= (bvand $x@@0 #x00000003) #x00000000) ($Aligned $x@@0))) :lblneg @4199) (=> (= (= (bvand $x@@0 #x00000003) #x00000000) ($Aligned $x@@0)) true))))) (let ((PreconditionGeneratedEntry_correct@@1 (=> (! (and %lbl%+4183 true) :lblpos +4183) anon0_correct@@1))) PreconditionGeneratedEntry_correct@@1)) )) (check-sat) (pop 1) (declare-fun %lbl%+2867 () Bool) (declare-fun %lbl%@4234 () Bool) (declare-fun $x@@1 () (_ BitVec 32)) (declare-fun $y () (_ BitVec 32)) (declare-fun %lbl%+4216 () Bool) (push 1) (set-info :boogie-vc-id _addAligned) (assert (not (let ((anon0_correct@@2 (=> (! (and %lbl%+2867 true) :lblpos +2867) (and (! (or %lbl%@4234 (=> ($Aligned $x@@1) (= ($Aligned $y) ($Aligned (bvadd $x@@1 $y))))) :lblneg @4234) (=> (=> ($Aligned $x@@1) (= ($Aligned $y) ($Aligned (bvadd $x@@1 $y)))) true))))) (let ((PreconditionGeneratedEntry_correct@@2 (=> (! (and %lbl%+4216 true) :lblpos +4216) anon0_correct@@2))) PreconditionGeneratedEntry_correct@@2)) )) (check-sat) (pop 1) (declare-fun %lbl%+2876 () Bool) (declare-fun %lbl%@4273 () Bool) (declare-fun $x@@2 () (_ BitVec 32)) (declare-fun $y@@0 () (_ BitVec 32)) (declare-fun %lbl%+4255 () Bool) (push 1) (set-info :boogie-vc-id _subAligned) (assert (not (let ((anon0_correct@@3 (=> (! (and %lbl%+2876 true) :lblpos +2876) (and (! (or %lbl%@4273 (=> ($Aligned $x@@2) (= ($Aligned $y@@0) ($Aligned (bvsub $x@@2 $y@@0))))) :lblneg @4273) (=> (=> ($Aligned $x@@2) (= ($Aligned $y@@0) ($Aligned (bvsub $x@@2 $y@@0)))) true))))) (let ((PreconditionGeneratedEntry_correct@@3 (=> (! (and %lbl%+4255 true) :lblpos +4255) anon0_correct@@3))) PreconditionGeneratedEntry_correct@@3)) )) (check-sat) (pop 1) (declare-fun %lbl%+2882 () Bool) (declare-fun %lbl%@4338 () Bool) (declare-fun $b () (_ BitVec 32)) (declare-fun %lbl%@4348 () Bool) (declare-fun %lbl%@4358 () Bool) (declare-fun %lbl%@4368 () Bool) (declare-fun %lbl%+4294 () Bool) (push 1) (set-info :boogie-vc-id _notAligned) (assert (not (let ((anon0_correct@@4 (=> (! (and %lbl%+2882 true) :lblpos +2882) (and (! (or %lbl%@4338 (not ($Aligned (bvadd $b #x00000001)))) :lblneg @4338) (=> (not ($Aligned (bvadd $b #x00000001))) (and (! (or %lbl%@4348 (not ($Aligned (bvadd $b #x00000002)))) :lblneg @4348) (=> (not ($Aligned (bvadd $b #x00000002))) (and (! (or %lbl%@4358 (not ($Aligned (bvadd $b #x00000003)))) :lblneg @4358) (=> (not ($Aligned (bvadd $b #x00000003))) (and (! (or %lbl%@4368 (bvule $b #xfffffffc)) :lblneg @4368) (=> (bvule $b #xfffffffc) true))))))))))) (let ((PreconditionGeneratedEntry_correct@@4 (=> (! (and %lbl%+4294 true) :lblpos +4294) (=> ($Aligned $b) anon0_correct@@4)))) PreconditionGeneratedEntry_correct@@4)) )) (check-sat) (pop 1) (declare-fun %lbl%+2888 () Bool) (declare-fun %lbl%@4414 () Bool) (declare-fun $x@@3 () (_ BitVec 32)) (declare-fun %lbl%@4432 () Bool) (declare-fun %lbl%+4375 () Bool) (push 1) (set-info :boogie-vc-id _is4kAligned) (assert (not (let ((anon0_correct@@5 (=> (! (and %lbl%+2888 true) :lblpos +2888) (and (! (or %lbl%@4414 (= (bvand (bvsub $x@@3 (bvand $x@@3 #x00000fff)) #x00000fff) #x00000000)) :lblneg @4414) (=> (= (bvand (bvsub $x@@3 (bvand $x@@3 #x00000fff)) #x00000fff) #x00000000) (and (! (or %lbl%@4432 (and (bvule #x00000000 (bvand $x@@3 #x00000fff)) (bvule (bvand $x@@3 #x00000fff) #x00000fff))) :lblneg @4432) (=> (and (bvule #x00000000 (bvand $x@@3 #x00000fff)) (bvule (bvand $x@@3 #x00000fff) #x00000fff)) true))))))) (let ((PreconditionGeneratedEntry_correct@@5 (=> (! (and %lbl%+4375 true) :lblpos +4375) anon0_correct@@5))) PreconditionGeneratedEntry_correct@@5)) )) (check-sat) (pop 1) (declare-fun %lbl%+2894 () Bool) (declare-fun %lbl%@4498 () Bool) (declare-fun $x@@4 () (_ BitVec 32)) (declare-fun %lbl%@4520 () Bool) (declare-fun %lbl%+4455 () Bool) (push 1) (set-info :boogie-vc-id _is2m4kAligned) (assert (not (let ((anon0_correct@@6 (=> (! (and %lbl%+2894 true) :lblpos +2894) (and (! (or %lbl%@4498 (= (bvand (bvsub (bvadd $x@@4 #x00200000) (bvand $x@@4 #x001fffff)) #x00000fff) #x00000000)) :lblneg @4498) (=> (= (bvand (bvsub (bvadd $x@@4 #x00200000) (bvand $x@@4 #x001fffff)) #x00000fff) #x00000000) (and (! (or %lbl%@4520 (and (bvule #x00000000 (bvand $x@@4 #x001fffff)) (bvule (bvand $x@@4 #x001fffff) #x001fffff))) :lblneg @4520) (=> (and (bvule #x00000000 (bvand $x@@4 #x001fffff)) (bvule (bvand $x@@4 #x001fffff) #x001fffff)) true))))))) (let ((PreconditionGeneratedEntry_correct@@6 (=> (! (and %lbl%+4455 true) :lblpos +4455) anon0_correct@@6))) PreconditionGeneratedEntry_correct@@6)) )) (check-sat) (pop 1) (declare-fun %lbl%+2900 () Bool) (declare-fun %lbl%@4581 () Bool) (declare-fun $x@@5 () (_ BitVec 32)) (declare-fun %lbl%@4595 () Bool) (declare-fun %lbl%+4543 () Bool) (push 1) (set-info :boogie-vc-id _add4kAligned) (assert (not (let ((anon0_correct@@7 (=> (! (and %lbl%+2900 true) :lblpos +2900) (and (! (or %lbl%@4581 (= (bvand (bvadd $x@@5 #x00001000) #x00000fff) #x00000000)) :lblneg @4581) (=> (= (bvand (bvadd $x@@5 #x00001000) #x00000fff) #x00000000) (and (! (or %lbl%@4595 ($Aligned $x@@5)) :lblneg @4595) (=> ($Aligned $x@@5) true))))))) (let ((PreconditionGeneratedEntry_correct@@7 (=> (! (and %lbl%+4543 true) :lblpos +4543) (=> (= (bvand $x@@5 #x00000fff) #x00000000) anon0_correct@@7)))) PreconditionGeneratedEntry_correct@@7)) )) (check-sat) (pop 1) (declare-fun %lbl%+2906 () Bool) (declare-fun %lbl%@4652 () Bool) (declare-fun %lbl%@4662 () Bool) (declare-fun $unitSize () (_ BitVec 32)) (declare-fun %lbl%@4676 () Bool) (declare-fun %lbl%+4600 () Bool) (push 1) (set-info :boogie-vc-id _initialize) (assert (not (let ((anon0_correct@@8 (=> (! (and %lbl%+2906 true) :lblpos +2906) (and (! (or %lbl%@4652 (= (bvlshr #x00000000 #x00000007) #x00000000)) :lblneg @4652) (=> (= (bvlshr #x00000000 #x00000007) #x00000000) (and (! (or %lbl%@4662 (= (bvlshr (bvmul #x00000080 $unitSize) #x00000007) $unitSize)) :lblneg @4662) (=> (= (bvlshr (bvmul #x00000080 $unitSize) #x00000007) $unitSize) (and (! (or %lbl%@4676 (= (bvlshr (bvmul #x00000100 $unitSize) #x00000007) (bvadd $unitSize $unitSize))) :lblneg @4676) (=> (= (bvlshr (bvmul #x00000100 $unitSize) #x00000007) (bvadd $unitSize $unitSize)) true))))))))) (let ((PreconditionGeneratedEntry_correct@@8 (=> (! (and %lbl%+4600 true) :lblpos +4600) (=> (bvule $unitSize #x00ffffff) anon0_correct@@8)))) PreconditionGeneratedEntry_correct@@8)) )) (check-sat) (pop 1) (declare-fun %lbl%+3018 () Bool) (declare-fun %lbl%@5233 () Bool) (declare-fun $i2 () Int) (declare-fun $i0 () Int) (declare-fun %lbl%@5259 () Bool) (declare-fun $idx () Int) (declare-fun $g1 () Int) (declare-fun %lbl%@5285 () Bool) (declare-fun %lbl%@5345 () Bool) (declare-fun $a () (Array Int Int)) (declare-fun $off () Int) (declare-fun $aBase () Int) (declare-fun $bb () (Array Int Int)) (declare-fun $i1 () Int) (declare-fun $g2 () Int) (declare-fun %lbl%+4695 () Bool) (push 1) (set-info :boogie-vc-id _bb4Zero) (assert (not (let ((anon0_correct@@9 (=> (! (and %lbl%+3018 true) :lblpos +3018) (and (! (or %lbl%@5233 (= (bvmul #x00000080 (bvlshr (B (- $i2 $i0)) #x00000007)) (B (- $i2 $i0)))) :lblneg @5233) (=> (= (bvmul #x00000080 (bvlshr (B (- $i2 $i0)) #x00000007)) (B (- $i2 $i0))) (and (! (or %lbl%@5259 (= (- $idx $g1) (* 4 (I (bvlshr (B (- $i2 $i0)) #x00000007))))) :lblneg @5259) (=> (= (- $idx $g1) (* 4 (I (bvlshr (B (- $i2 $i0)) #x00000007)))) (and (! (or %lbl%@5285 (forall ((i@@5 Int) ) (! (=> (and (TV i@@5) (<= $i2 i@@5) (< i@@5 (+ $i2 128))) (= (bvlshr (B (- i@@5 $i0)) #x00000007) (bvlshr (B (- $i2 $i0)) #x00000007))) :qid |BitVecto.62:18| :skolemid |35| :pattern ( (TV i@@5)) ))) :lblneg @5285) (=> (forall ((i@@6 Int) ) (! (=> (and (TV i@@6) (<= $i2 i@@6) (< i@@6 (+ $i2 128))) (= (bvlshr (B (- i@@6 $i0)) #x00000007) (bvlshr (B (- $i2 $i0)) #x00000007))) :qid |BitVecto.62:18| :skolemid |35| :pattern ( (TV i@@6)) )) (and (! (or %lbl%@5345 ($bbvec4 $a $off $aBase (store $bb $idx 0) $i0 $i1 (+ $i2 128) $g1 $g2)) :lblneg @5345) (=> ($bbvec4 $a $off $aBase (store $bb $idx 0) $i0 $i1 (+ $i2 128) $g1 $g2) true))))))))))) (let ((PreconditionGeneratedEntry_correct@@9 (=> (! (and %lbl%+4695 true) :lblpos +4695) (=> (and (forall ((i@@7 Int) ) (! (=> (and (TV i@@7) (<= $i1 i@@7) (< i@@7 (+ $i2 128))) (= (select $a (+ $aBase (- i@@7 $i0))) $off)) :qid |BitVecto.80:20| :skolemid |15| :pattern ( (TV i@@7)) )) ($bbvec4 $a $off $aBase $bb $i0 $i1 $i2 $g1 $g2)) (=> (and ($Aligned (B $idx)) ($Aligned (B $g1)) (= (B (- $i2 $i0)) (bvmul #x00000020 (bvsub (B $idx) (B $g1)))) (= $i1 $i0) (=> (and (bvule (bvlshr (B (- $i2 $i0)) #x00000007) #x01ffffff) (= (bvmul #x00000080 (bvlshr (B (- $i2 $i0)) #x00000007)) (B (- $i2 $i0)))) (= (- $idx $g1) (* 4 (I (bvlshr (B (- $i2 $i0)) #x00000007))))) (forall ((i@@8 Int) ) (! (=> (and (TV i@@8) (<= $i2 i@@8) (< i@@8 (+ $i2 128))) (and (bvule (B (- $i2 $i0)) (B (- i@@8 $i0))) (bvule (B (- i@@8 $i0)) (bvadd (B (- $i2 $i0)) #x0000007f)))) :qid |BitVecto.87:20| :skolemid |16| :pattern ( (TV i@@8)) )) (between $g1 $g2 $idx) (= (B 0) #x00000000)) anon0_correct@@9))))) PreconditionGeneratedEntry_correct@@9)) )) (check-sat) (pop 1) (declare-fun %lbl%+3027 () Bool) (declare-fun %lbl%@5396 () Bool) (declare-fun $k () Int) (declare-fun $i0@@0 () Int) (declare-fun %lbl%+5376 () Bool) (push 1) (set-info :boogie-vc-id _bb4GetBit) (assert (not (let ((anon0_correct@@10 (=> (! (and %lbl%+3027 true) :lblpos +3027) (and (! (or %lbl%@5396 (bvule (bvand (bvlshr (B (- $k $i0@@0)) #x00000002) #x0000001f) #x0000001f)) :lblneg @5396) (=> (bvule (bvand (bvlshr (B (- $k $i0@@0)) #x00000002) #x0000001f) #x0000001f) true))))) (let ((PreconditionGeneratedEntry_correct@@10 (=> (! (and %lbl%+5376 true) :lblpos +5376) anon0_correct@@10))) PreconditionGeneratedEntry_correct@@10)) )) (check-sat) (pop 1) (declare-fun %lbl%+3078 () Bool) (declare-fun %lbl%@5716 () Bool) (declare-fun $a@@0 () (Array Int Int)) (declare-fun $aBase@@0 () Int) (declare-fun $k@@0 () Int) (declare-fun $i0@@1 () Int) (declare-fun $on () Int) (declare-fun $off@@0 () Int) (declare-fun $ret () (Array Int Int)) (declare-fun $i1@@0 () Int) (declare-fun $i2@@0 () Int) (declare-fun $g1@@0 () Int) (declare-fun $g2@@0 () Int) (declare-fun %lbl%@5750 () Bool) (declare-fun $idx@@0 () Int) (declare-fun %lbl%@5758 () Bool) (declare-fun %lbl%+5417 () Bool) (declare-fun $bb@@0 () (Array Int Int)) (declare-fun $bbb () Int) (push 1) (set-info :boogie-vc-id _bb4SetBit) (assert (not (let ((anon0_correct@@11 (=> (! (and %lbl%+3078 true) :lblpos +3078) (and (! (or %lbl%@5716 ($bbvec4 (store $a@@0 (+ $aBase@@0 (- $k@@0 $i0@@1)) $on) $off@@0 $aBase@@0 $ret $i0@@1 $i1@@0 $i2@@0 $g1@@0 $g2@@0)) :lblneg @5716) (=> ($bbvec4 (store $a@@0 (+ $aBase@@0 (- $k@@0 $i0@@1)) $on) $off@@0 $aBase@@0 $ret $i0@@1 $i1@@0 $i2@@0 $g1@@0 $g2@@0) (and (! (or %lbl%@5750 (between $g1@@0 $g2@@0 $idx@@0)) :lblneg @5750) (=> (between $g1@@0 $g2@@0 $idx@@0) (and (! (or %lbl%@5758 (bvule (bvand (bvlshr (B (- $k@@0 $i0@@1)) #x00000002) #x0000001f) #x0000001f)) :lblneg @5758) (=> (bvule (bvand (bvlshr (B (- $k@@0 $i0@@1)) #x00000002) #x0000001f) #x0000001f) true))))))))) (let ((PreconditionGeneratedEntry_correct@@11 (=> (! (and %lbl%+5417 true) :lblpos +5417) (=> ($bbvec4 $a@@0 $off@@0 $aBase@@0 $bb@@0 $i0@@1 $i1@@0 $i2@@0 $g1@@0 $g2@@0) (=> (and (TV $k@@0) (word (- $k@@0 $i0@@1)) (<= $i1@@0 $k@@0) (< $k@@0 $i2@@0) ($Aligned (B (- $k@@0 $i0@@1))) (not (= $on $off@@0)) (= $idx@@0 (+ $g1@@0 (* 4 (I (bvlshr (B (- $k@@0 $i0@@1)) #x00000007))))) (= (B $bbb) (bvor (B (select $bb@@0 $idx@@0)) (bvshl #x00000001 (bvand (bvlshr (B (- $k@@0 $i0@@1)) #x00000002) #x0000001f)))) (= $ret (store $bb@@0 $idx@@0 $bbb))) anon0_correct@@11))))) PreconditionGeneratedEntry_correct@@11)) )) (check-sat) (pop 1) (declare-fun %lbl%+3187 () Bool) (declare-fun %lbl%@6314 () Bool) (declare-fun $i2@@1 () Int) (declare-fun $i0@@2 () Int) (declare-fun %lbl%@6340 () Bool) (declare-fun $idx@@1 () Int) (declare-fun $g1@@1 () Int) (declare-fun %lbl%@6366 () Bool) (declare-fun %lbl%@6426 () Bool) (declare-fun $a@@1 () (Array Int Int)) (declare-fun $aBase@@1 () Int) (declare-fun $bb@@1 () (Array Int Int)) (declare-fun $i1@@1 () Int) (declare-fun $g2@@1 () Int) (declare-fun %lbl%+5779 () Bool) (push 1) (set-info :boogie-vc-id _bb4Zero2) (assert (not (let ((anon0_correct@@12 (=> (! (and %lbl%+3187 true) :lblpos +3187) (and (! (or %lbl%@6314 (= (bvmul #x00000040 (bvlshr (B (- $i2@@1 $i0@@2)) #x00000006)) (B (- $i2@@1 $i0@@2)))) :lblneg @6314) (=> (= (bvmul #x00000040 (bvlshr (B (- $i2@@1 $i0@@2)) #x00000006)) (B (- $i2@@1 $i0@@2))) (and (! (or %lbl%@6340 (= (- $idx@@1 $g1@@1) (* 4 (I (bvlshr (B (- $i2@@1 $i0@@2)) #x00000006))))) :lblneg @6340) (=> (= (- $idx@@1 $g1@@1) (* 4 (I (bvlshr (B (- $i2@@1 $i0@@2)) #x00000006)))) (and (! (or %lbl%@6366 (forall ((i@@9 Int) ) (! (=> (and (TV i@@9) (<= $i2@@1 i@@9) (< i@@9 (+ $i2@@1 64))) (= (bvlshr (B (- i@@9 $i0@@2)) #x00000006) (bvlshr (B (- $i2@@1 $i0@@2)) #x00000006))) :qid |BitVecto.77:18| :skolemid |36| :pattern ( (TV i@@9)) ))) :lblneg @6366) (=> (forall ((i@@10 Int) ) (! (=> (and (TV i@@10) (<= $i2@@1 i@@10) (< i@@10 (+ $i2@@1 64))) (= (bvlshr (B (- i@@10 $i0@@2)) #x00000006) (bvlshr (B (- $i2@@1 $i0@@2)) #x00000006))) :qid |BitVecto.77:18| :skolemid |36| :pattern ( (TV i@@10)) )) (and (! (or %lbl%@6426 ($bb2vec4 $a@@1 $aBase@@1 (store $bb@@1 $idx@@1 0) $i0@@2 $i1@@1 (+ $i2@@1 64) $g1@@1 $g2@@1)) :lblneg @6426) (=> ($bb2vec4 $a@@1 $aBase@@1 (store $bb@@1 $idx@@1 0) $i0@@2 $i1@@1 (+ $i2@@1 64) $g1@@1 $g2@@1) true))))))))))) (let ((PreconditionGeneratedEntry_correct@@12 (=> (! (and %lbl%+5779 true) :lblpos +5779) (=> (and (forall ((i@@11 Int) ) (! (=> (and (TV i@@11) (<= $i1@@1 i@@11) (< i@@11 (+ $i2@@1 64))) (= (select $a@@1 (+ $aBase@@1 (- i@@11 $i0@@2))) 0)) :qid |BitVecto.108:20| :skolemid |17| :pattern ( (TV i@@11)) )) ($bb2vec4 $a@@1 $aBase@@1 $bb@@1 $i0@@2 $i1@@1 $i2@@1 $g1@@1 $g2@@1)) (=> (and ($Aligned (B $idx@@1)) ($Aligned (B $g1@@1)) (= (B (- $i2@@1 $i0@@2)) (bvmul #x00000010 (bvsub (B $idx@@1) (B $g1@@1)))) (= $i1@@1 $i0@@2) (=> (and (bvule (bvlshr (B (- $i2@@1 $i0@@2)) #x00000006) #x03ffffff) (= (bvmul #x00000040 (bvlshr (B (- $i2@@1 $i0@@2)) #x00000006)) (B (- $i2@@1 $i0@@2)))) (= (- $idx@@1 $g1@@1) (* 4 (I (bvlshr (B (- $i2@@1 $i0@@2)) #x00000006))))) (forall ((i@@12 Int) ) (! (=> (and (TV i@@12) (<= $i2@@1 i@@12) (< i@@12 (+ $i2@@1 64))) (and (bvule (B (- $i2@@1 $i0@@2)) (B (- i@@12 $i0@@2))) (bvule (B (- i@@12 $i0@@2)) (bvadd (B (- $i2@@1 $i0@@2)) #x0000003f)))) :qid |BitVecto.115:20| :skolemid |18| :pattern ( (TV i@@12)) )) (between $g1@@1 $g2@@1 $idx@@1) (= (B 0) #x00000000)) anon0_correct@@12))))) PreconditionGeneratedEntry_correct@@12)) )) (check-sat) (pop 1) (declare-fun %lbl%+3196 () Bool) (declare-fun %lbl%@6475 () Bool) (declare-fun $k@@1 () Int) (declare-fun $i0@@3 () Int) (declare-fun %lbl%+6455 () Bool) (push 1) (set-info :boogie-vc-id _bb4Get2Bit) (assert (not (let ((anon0_correct@@13 (=> (! (and %lbl%+3196 true) :lblpos +3196) (and (! (or %lbl%@6475 (bvule (bvand (bvlshr (B (- $k@@1 $i0@@3)) #x00000001) #x0000001f) #x0000001f)) :lblneg @6475) (=> (bvule (bvand (bvlshr (B (- $k@@1 $i0@@3)) #x00000001) #x0000001f) #x0000001f) true))))) (let ((PreconditionGeneratedEntry_correct@@13 (=> (! (and %lbl%+6455 true) :lblpos +6455) anon0_correct@@13))) PreconditionGeneratedEntry_correct@@13)) )) (check-sat) (pop 1) (declare-fun %lbl%+3247 () Bool) (declare-fun %lbl%@6863 () Bool) (declare-fun $a@@2 () (Array Int Int)) (declare-fun $aBase@@2 () Int) (declare-fun $k@@2 () Int) (declare-fun $i0@@4 () Int) (declare-fun $val () Int) (declare-fun $ret@@0 () (Array Int Int)) (declare-fun $i1@@2 () Int) (declare-fun $i2@@2 () Int) (declare-fun $g1@@2 () Int) (declare-fun $g2@@2 () Int) (declare-fun %lbl%@6895 () Bool) (declare-fun $idx@@2 () Int) (declare-fun %lbl%@6903 () Bool) (declare-fun %lbl%+6496 () Bool) (declare-fun $bb@@2 () (Array Int Int)) (declare-fun $bbb@@0 () Int) (declare-fun $_bbb () Int) (push 1) (set-info :boogie-vc-id _bb4Set2Bit) (assert (not (let ((anon0_correct@@14 (=> (! (and %lbl%+3247 true) :lblpos +3247) (and (! (or %lbl%@6863 ($bb2vec4 (store $a@@2 (+ $aBase@@2 (- $k@@2 $i0@@4)) $val) $aBase@@2 $ret@@0 $i0@@4 $i1@@2 $i2@@2 $g1@@2 $g2@@2)) :lblneg @6863) (=> ($bb2vec4 (store $a@@2 (+ $aBase@@2 (- $k@@2 $i0@@4)) $val) $aBase@@2 $ret@@0 $i0@@4 $i1@@2 $i2@@2 $g1@@2 $g2@@2) (and (! (or %lbl%@6895 (between $g1@@2 $g2@@2 $idx@@2)) :lblneg @6895) (=> (between $g1@@2 $g2@@2 $idx@@2) (and (! (or %lbl%@6903 (bvule (bvand (bvlshr (B (- $k@@2 $i0@@4)) #x00000001) #x0000001f) #x0000001f)) :lblneg @6903) (=> (bvule (bvand (bvlshr (B (- $k@@2 $i0@@4)) #x00000001) #x0000001f) #x0000001f) true))))))))) (let ((PreconditionGeneratedEntry_correct@@14 (=> (! (and %lbl%+6496 true) :lblpos +6496) (=> ($bb2vec4 $a@@2 $aBase@@2 $bb@@2 $i0@@4 $i1@@2 $i2@@2 $g1@@2 $g2@@2) (=> (and (TV $k@@2) (word (- $k@@2 $i0@@4)) (<= $i1@@2 $k@@2) (< $k@@2 $i2@@2) ($Aligned (B (- $k@@2 $i0@@4))) (= $idx@@2 (+ $g1@@2 (* 4 (I (bvlshr (B (- $k@@2 $i0@@4)) #x00000006))))) (bvule (B $val) #x00000003) (= (B $bbb@@0) (bvand (B (select $bb@@2 $idx@@2)) (bvnot (bvshl #x00000003 (bvand (bvlshr (B (- $k@@2 $i0@@4)) #x00000001) #x0000001f))))) (= (B $_bbb) (bvor (B $bbb@@0) (bvshl (B $val) (bvand (bvlshr (B (- $k@@2 $i0@@4)) #x00000001) #x0000001f)))) (= $ret@@0 (store $bb@@2 $idx@@2 $_bbb))) anon0_correct@@14))))) PreconditionGeneratedEntry_correct@@14)) )) (check-sat) (pop 1) (declare-fun %lbl%+3250 () Bool) (declare-fun %lbl%@7266 () Bool) (declare-fun %lbl%@7276 () Bool) (declare-fun %lbl%@7286 () Bool) (declare-fun %lbl%@7296 () Bool) (declare-fun %lbl%@7306 () Bool) (declare-fun %lbl%@7316 () Bool) (declare-fun %lbl%@7326 () Bool) (declare-fun %lbl%@7336 () Bool) (declare-fun %lbl%@7346 () Bool) (declare-fun %lbl%@7356 () Bool) (declare-fun %lbl%@7366 () Bool) (declare-fun %lbl%@7376 () Bool) (declare-fun %lbl%@7386 () Bool) (declare-fun %lbl%@7396 () Bool) (declare-fun %lbl%@7406 () Bool) (declare-fun %lbl%@7416 () Bool) (declare-fun %lbl%@7426 () Bool) (declare-fun %lbl%@7436 () Bool) (declare-fun %lbl%@7446 () Bool) (declare-fun %lbl%@7456 () Bool) (declare-fun %lbl%@7466 () Bool) (declare-fun %lbl%@7476 () Bool) (declare-fun %lbl%@7486 () Bool) (declare-fun %lbl%@7496 () Bool) (declare-fun %lbl%@7506 () Bool) (declare-fun %lbl%@7516 () Bool) (declare-fun %lbl%@7526 () Bool) (declare-fun %lbl%@7536 () Bool) (declare-fun %lbl%@7546 () Bool) (declare-fun %lbl%@7556 () Bool) (declare-fun %lbl%@7566 () Bool) (declare-fun %lbl%+6924 () Bool) (push 1) (set-info :boogie-vc-id _const) (assert (not (let ((anon0_correct@@15 (=> (! (and %lbl%+3250 true) :lblpos +3250) (and (! (or %lbl%@7266 (= (bvsub #x00000001 #x00000001) #x00000000)) :lblneg @7266) (=> (= (bvsub #x00000001 #x00000001) #x00000000) (and (! (or %lbl%@7276 (= (bvadd #x00000001 #x00000001) #x00000002)) :lblneg @7276) (=> (= (bvadd #x00000001 #x00000001) #x00000002) (and (! (or %lbl%@7286 (= (bvadd #x00000002 #x00000001) #x00000003)) :lblneg @7286) (=> (= (bvadd #x00000002 #x00000001) #x00000003) (and (! (or %lbl%@7296 (= (bvadd #x00000002 #x00000002) #x00000004)) :lblneg @7296) (=> (= (bvadd #x00000002 #x00000002) #x00000004) (and (! (or %lbl%@7306 (= (bvadd #x00000004 #x00000001) #x00000005)) :lblneg @7306) (=> (= (bvadd #x00000004 #x00000001) #x00000005) (and (! (or %lbl%@7316 (= (bvadd #x00000005 #x00000001) #x00000006)) :lblneg @7316) (=> (= (bvadd #x00000005 #x00000001) #x00000006) (and (! (or %lbl%@7326 (= (bvadd #x00000005 #x00000002) #x00000007)) :lblneg @7326) (=> (= (bvadd #x00000005 #x00000002) #x00000007) (and (! (or %lbl%@7336 (= (bvmul #x00000004 #x00000004) #x00000010)) :lblneg @7336) (=> (= (bvmul #x00000004 #x00000004) #x00000010) (and (! (or %lbl%@7346 (= (bvadd #x00000010 #x00000010) #x00000020)) :lblneg @7346) (=> (= (bvadd #x00000010 #x00000010) #x00000020) (and (! (or %lbl%@7356 (= (bvsub #x00000020 #x00000001) #x0000001f)) :lblneg @7356) (=> (= (bvsub #x00000020 #x00000001) #x0000001f) (and (! (or %lbl%@7366 (= (bvadd #x00000020 #x00000020) #x00000040)) :lblneg @7366) (=> (= (bvadd #x00000020 #x00000020) #x00000040) (and (! (or %lbl%@7376 (= (bvsub #x00000040 #x00000001) #x0000003f)) :lblneg @7376) (=> (= (bvsub #x00000040 #x00000001) #x0000003f) (and (! (or %lbl%@7386 (= (bvmul #x00000020 #x00000004) #x00000080)) :lblneg @7386) (=> (= (bvmul #x00000020 #x00000004) #x00000080) (and (! (or %lbl%@7396 (= (bvsub #x00000080 #x00000001) #x0000007f)) :lblneg @7396) (=> (= (bvsub #x00000080 #x00000001) #x0000007f) (and (! (or %lbl%@7406 (= (bvmul #x00000010 #x00000010) #x00000100)) :lblneg @7406) (=> (= (bvmul #x00000010 #x00000010) #x00000100) (and (! (or %lbl%@7416 (= (bvadd #x00000100 #x00000100) #x00000200)) :lblneg @7416) (=> (= (bvadd #x00000100 #x00000100) #x00000200) (and (! (or %lbl%@7426 (= (bvmul #x00000040 #x00000040) #x00001000)) :lblneg @7426) (=> (= (bvmul #x00000040 #x00000040) #x00001000) (and (! (or %lbl%@7436 (= (bvsub #x00001000 #x00000001) #x00000fff)) :lblneg @7436) (=> (= (bvsub #x00001000 #x00000001) #x00000fff) (and (! (or %lbl%@7446 (= (bvmul #x00000100 #x00000100) #x00010000)) :lblneg @7446) (=> (= (bvmul #x00000100 #x00000100) #x00010000) (and (! (or %lbl%@7456 (= (bvsub #x00010000 #x00000001) #x0000ffff)) :lblneg @7456) (=> (= (bvsub #x00010000 #x00000001) #x0000ffff) (and (! (or %lbl%@7466 (= (bvmul #x00010000 #x00000020) #x00200000)) :lblneg @7466) (=> (= (bvmul #x00010000 #x00000020) #x00200000) (and (! (or %lbl%@7476 (= (bvsub #x00200000 #x00000001) #x001fffff)) :lblneg @7476) (=> (= (bvsub #x00200000 #x00000001) #x001fffff) (and (! (or %lbl%@7486 (= (bvmul #x00010000 #x00000100) #x01000000)) :lblneg @7486) (=> (= (bvmul #x00010000 #x00000100) #x01000000) (and (! (or %lbl%@7496 (= (bvsub #x01000000 #x00000001) #x00ffffff)) :lblneg @7496) (=> (= (bvsub #x01000000 #x00000001) #x00ffffff) (and (! (or %lbl%@7506 (= (bvmul #x00010000 #x00000200) #x02000000)) :lblneg @7506) (=> (= (bvmul #x00010000 #x00000200) #x02000000) (and (! (or %lbl%@7516 (= (bvsub #x02000000 #x00000001) #x01ffffff)) :lblneg @7516) (=> (= (bvsub #x02000000 #x00000001) #x01ffffff) (and (! (or %lbl%@7526 (= (bvadd #x02000000 #x02000000) #x04000000)) :lblneg @7526) (=> (= (bvadd #x02000000 #x02000000) #x04000000) (and (! (or %lbl%@7536 (= (bvsub #x04000000 #x00000001) #x03ffffff)) :lblneg @7536) (=> (= (bvsub #x04000000 #x00000001) #x03ffffff) (and (! (or %lbl%@7546 (= (bvmul #x00010000 #x0000ffff) #xffff0000)) :lblneg @7546) (=> (= (bvmul #x00010000 #x0000ffff) #xffff0000) (and (! (or %lbl%@7556 (= (bvadd #xffff0000 #x0000ffff) #xffffffff)) :lblneg @7556) (=> (= (bvadd #xffff0000 #x0000ffff) #xffffffff) (and (! (or %lbl%@7566 (= (bvsub #xffffffff #x00000003) #xfffffffc)) :lblneg @7566) (=> (= (bvsub #xffffffff #x00000003) #xfffffffc) true))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) (let ((PreconditionGeneratedEntry_correct@@15 (=> (! (and %lbl%+6924 true) :lblpos +6924) anon0_correct@@15))) PreconditionGeneratedEntry_correct@@15)) )) (check-sat) (pop 1) ================================================ FILE: test/json.seed ================================================ 1000000 010E5 ================================================ FILE: test/no_main.c ================================================ #define ASMSTR(S) "\t" S "\n" asm( ".globl _entry\n" ".type _entry,@function\n" "_entry:\n" ASMSTR(".intel_syntax noprefix") ASMSTR("mov rdi, [rsp];") ASMSTR("dec rdi;") ASMSTR("mov r15, rdi;") ASMSTR("test rdi, rdi;") ASMSTR("jne Y1;") ASMSTR("jmp B1;") ASMSTR(".byte 0x2f") ASMSTR("jmp A1;") ASMSTR("jmp Z1;") ASMSTR("jmp A1;") ASMSTR("jmp Z1;") ASMSTR("jmp A1;") ASMSTR("jmp Z1;") ASMSTR("jmp A1;") ASMSTR("jmp Z1;") ASMSTR("jmp A1;") ASMSTR("jmp Z1;") ASMSTR("jmp A1;") ASMSTR("jmp Z1;") ASMSTR(".global Y1") ASMSTR("Y1:") ASMSTR("xor rbx, rbx") ASMSTR("mov bx, word ptr [X1];") ASMSTR("sub rbx, 0xf1dc;") ASMSTR("push rbx;") ASMSTR("lea r8, [rip + A1];") ASMSTR(".global A1") ASMSTR("A1:") ASMSTR("pop r9;") ASMSTR("add r8, r9;") ASMSTR("call r8;") ASMSTR(".global Z1") ASMSTR("Z1:") ASMSTR("call A1;") ASMSTR(".global X1") ASMSTR("X1:") ASMSTR("jmp A1;") ASMSTR(".global B1") ASMSTR("B1:") ASMSTR("test r15, r15;") ASMSTR("jne Y2;") ASMSTR("jmp B2;") ASMSTR("jmp A2;") ASMSTR("jmp Z2;") ASMSTR("jmp A2;") ASMSTR("jmp Z2;") ASMSTR("jmp A2;") ASMSTR("jmp Z2;") ASMSTR("jmp A2;") ASMSTR("jmp Z2;") ASMSTR("jmp A2;") ASMSTR("jmp Z2;") ASMSTR("jmp A2;") ASMSTR("jmp Z2;") ASMSTR(".global Y2") ASMSTR("Y2:") ASMSTR("xor rbx, rbx") ASMSTR("mov bx, word ptr [X2];") ASMSTR("sub rbx, 0xf1dc;") ASMSTR("push rbx;") ASMSTR("lea r8, [rip + A2];") ASMSTR(".global A2") ASMSTR("A2:") ASMSTR("pop r9;") ASMSTR("add r8, r9;") ASMSTR("call r8;") ASMSTR(".global Z2") ASMSTR("Z2:") ASMSTR("call A2;") ASMSTR(".global X2") ASMSTR("X2:") ASMSTR("jmp A2;") ASMSTR(".global B2") ASMSTR("B2:") ASMSTR("test r15, r15;") ASMSTR("jne Y3;") ASMSTR("jmp B3;") ASMSTR("jmp A3;") ASMSTR("jmp Z3;") ASMSTR("jmp A3;") ASMSTR("jmp Z3;") ASMSTR("jmp A3;") ASMSTR("jmp Z3;") ASMSTR("jmp A3;") ASMSTR("jmp Z3;") ASMSTR("jmp A3;") ASMSTR("jmp Z3;") ASMSTR("jmp A3;") ASMSTR("jmp Z3;") ASMSTR(".global Y3") ASMSTR("Y3:") ASMSTR("xor rbx, rbx") ASMSTR("mov bx, word ptr [X3];") ASMSTR("sub rbx, 0xf1dc;") ASMSTR("push rbx;") ASMSTR("lea r8, [rip + A3];") ASMSTR(".global A3") ASMSTR("A3:") ASMSTR("pop r9;") ASMSTR("add r8, r9;") ASMSTR("call r8;") ASMSTR(".global Z3") ASMSTR("Z3:") ASMSTR("call A3;") ASMSTR(".global X3") ASMSTR("X3:") ASMSTR("jmp A3;") ASMSTR(".global B3") ASMSTR("B3:") ASMSTR("test r15, r15;") ASMSTR("jne Y4;") ASMSTR("jmp B4;") ASMSTR("jmp A4;") ASMSTR("jmp Z4;") ASMSTR("jmp A4;") ASMSTR("jmp Z4;") ASMSTR("jmp A4;") ASMSTR("jmp Z4;") ASMSTR("jmp A4;") ASMSTR("jmp Z4;") ASMSTR("jmp A4;") ASMSTR("jmp Z4;") ASMSTR("jmp A4;") ASMSTR("jmp Z4;") ASMSTR(".global Y4") ASMSTR("Y4:") ASMSTR("xor rbx, rbx") ASMSTR("mov bx, word ptr [X4];") ASMSTR("sub rbx, 0xf1dc;") ASMSTR("push rbx;") ASMSTR("lea r8, [rip + A4];") ASMSTR(".global A4") ASMSTR("A4:") ASMSTR("pop r9;") ASMSTR("add r8, r9;") ASMSTR("call r8;") ASMSTR(".global Z4") ASMSTR("Z4:") ASMSTR("call A4;") ASMSTR(".global X4") ASMSTR("X4:") ASMSTR("jmp A4;") ASMSTR(".global B4") ASMSTR("B4:") ASMSTR("test r15, r15;") ASMSTR("jne Y5;") ASMSTR("jmp B5;") ASMSTR("jmp A5;") ASMSTR("jmp Z5;") ASMSTR("jmp A5;") ASMSTR("jmp Z5;") ASMSTR("jmp A5;") ASMSTR("jmp Z5;") ASMSTR("jmp A5;") ASMSTR("jmp Z5;") ASMSTR("jmp A5;") ASMSTR("jmp Z5;") ASMSTR("jmp A5;") ASMSTR("jmp Z5;") ASMSTR(".global Y5") ASMSTR("Y5:") ASMSTR("xor rbx, rbx") ASMSTR("mov bx, word ptr [X5];") ASMSTR("sub rbx, 0xf1dc;") ASMSTR("push rbx;") ASMSTR("lea r8, [rip + A5];") ASMSTR(".global A5") ASMSTR("A5:") ASMSTR("pop r9;") ASMSTR("add r8, r9;") ASMSTR("call r8;") ASMSTR(".global Z5") ASMSTR("Z5:") ASMSTR("call A5;") ASMSTR(".global X5") ASMSTR("X5:") ASMSTR("jmp A5;") ASMSTR(".global B5") ASMSTR("B5:") ASMSTR("test r15, r15;") ASMSTR("jne Y6;") ASMSTR("jmp B6;") ASMSTR("jmp A6;") ASMSTR("jmp Z6;") ASMSTR("jmp A6;") ASMSTR("jmp Z6;") ASMSTR("jmp A6;") ASMSTR("jmp Z6;") ASMSTR("jmp A6;") ASMSTR("jmp Z6;") ASMSTR("jmp A6;") ASMSTR("jmp Z6;") ASMSTR("jmp A6;") ASMSTR("jmp Z6;") ASMSTR(".global Y6") ASMSTR("Y6:") ASMSTR("xor rbx, rbx") ASMSTR("mov bx, word ptr [X6];") ASMSTR("sub rbx, 0xf1dc;") ASMSTR("push rbx;") ASMSTR("lea r8, [rip + A6];") ASMSTR(".global A6") ASMSTR("A6:") ASMSTR("pop r9;") ASMSTR("add r8, r9;") ASMSTR("call r8;") ASMSTR(".global Z6") ASMSTR("Z6:") ASMSTR("call A6;") ASMSTR(".global X6") ASMSTR("X6:") ASMSTR("jmp A6;") ASMSTR(".global B6") ASMSTR("B6:") ASMSTR("test r15, r15;") ASMSTR("jne Y7;") ASMSTR("jmp B7;") ASMSTR("jmp A7;") ASMSTR("jmp Z7;") ASMSTR("jmp A7;") ASMSTR("jmp Z7;") ASMSTR("jmp A7;") ASMSTR("jmp Z7;") ASMSTR("jmp A7;") ASMSTR("jmp Z7;") ASMSTR("jmp A7;") ASMSTR("jmp Z7;") ASMSTR("jmp A7;") ASMSTR("jmp Z7;") ASMSTR(".global Y7") ASMSTR("Y7:") ASMSTR("xor rbx, rbx") ASMSTR("mov bx, word ptr [X7];") ASMSTR("sub rbx, 0xf1dc;") ASMSTR("push rbx;") ASMSTR("lea r8, [rip + A7];") ASMSTR(".global A7") ASMSTR("A7:") ASMSTR("pop r9;") ASMSTR("add r8, r9;") ASMSTR("call r8;") ASMSTR(".global Z7") ASMSTR("Z7:") ASMSTR("call A7;") ASMSTR(".global X7") ASMSTR("X7:") ASMSTR("jmp A7;") ASMSTR(".global B7") ASMSTR("B7:") ASMSTR("test r15, r15;") ASMSTR("jne Y8;") ASMSTR("jmp B8;") ASMSTR("jmp A8;") ASMSTR("jmp Z8;") ASMSTR("jmp A8;") ASMSTR("jmp Z8;") ASMSTR("jmp A8;") ASMSTR("jmp Z8;") ASMSTR("jmp A8;") ASMSTR("jmp Z8;") ASMSTR("jmp A8;") ASMSTR("jmp Z8;") ASMSTR("jmp A8;") ASMSTR("jmp Z8;") ASMSTR(".global Y8") ASMSTR("Y8:") ASMSTR("xor rbx, rbx") ASMSTR("mov bx, word ptr [X8];") ASMSTR("sub rbx, 0xf1dc;") ASMSTR("push rbx;") ASMSTR("lea r8, [rip + A8];") ASMSTR(".global A8") ASMSTR("A8:") ASMSTR("pop r9;") ASMSTR("add r8, r9;") ASMSTR("call r8;") ASMSTR(".global Z8") ASMSTR("Z8:") ASMSTR("call A8;") ASMSTR(".global X8") ASMSTR("X8:") ASMSTR("jmp A8;") ASMSTR(".global B8") ASMSTR("B8:") ASMSTR("test r15, r15;") ASMSTR("jne Y9;") ASMSTR("jmp B9;") ASMSTR("jmp A9;") ASMSTR("jmp Z9;") ASMSTR("jmp A9;") ASMSTR("jmp Z9;") ASMSTR("jmp A9;") ASMSTR("jmp Z9;") ASMSTR("jmp A9;") ASMSTR("jmp Z9;") ASMSTR("jmp A9;") ASMSTR("jmp Z9;") ASMSTR("jmp A9;") ASMSTR("jmp Z9;") ASMSTR(".global Y9") ASMSTR("Y9:") ASMSTR("xor rbx, rbx") ASMSTR("mov bx, word ptr [X9];") ASMSTR("sub rbx, 0xf1dc;") ASMSTR("push rbx;") ASMSTR("lea r8, [rip + A9];") ASMSTR(".global A9") ASMSTR("A9:") ASMSTR("pop r9;") ASMSTR("add r8, r9;") ASMSTR("call r8;") ASMSTR(".global Z9") ASMSTR("Z9:") ASMSTR("call A9;") ASMSTR(".global X9") ASMSTR("X9:") ASMSTR("jmp A9;") ASMSTR(".global B9") ASMSTR("B9:") ASMSTR("mov rax, 60;") ASMSTR("mov rdi, 0;") ASMSTR("syscall;") ASMSTR("ret;") ASMSTR(".att_syntax;") ); ================================================ FILE: test/runtime/server.key ================================================ -----BEGIN PRIVATE KEY----- MIIBVAIBADANBgkqhkiG9w0BAQEFAASCAT4wggE6AgEAAkEA1AdZNDVOA9cXm97f erp1bukz2kohjToJS6Ma8fOb36VV9lQGmDNsJanXFiqafOgV+kh1HXqZ3l1I0JmZ 71b+QQIDAQABAkAHGfPn5r0lLcgRpWZQwvv56f+dmQwEoeP7z4uwfNtEo0JcRD66 1WRCvx3LE0VbNeaEdNmSPiRXhlwIggjfrBi9AiEA9UusPBcEp/QcPGs96nQQdQzE fw4x0HL/eSV3qHimT6MCIQDdSAiX4Ouxoiwn/9KhDMcZXRYX/OPzj6w8u1YIH7BI ywIgSozbJdAhHCJ2ym4VfUIVFl3xAmSAA0hQGLOocE1qzl0CIQDRicOxZmhqBiKA IgznOn1StEYWov+MhRFZVSBLgw5gbwIgJzOlSlu0Y22hEUsLCKyHBrCAZZHcZ020 20pfogmQYn0= -----END PRIVATE KEY----- ================================================ FILE: test/runtime/server.pem ================================================ -----BEGIN CERTIFICATE----- MIIBYTCCAQugAwIBAgIJAMPQQtUHkx+KMA0GCSqGSIb3DQEBCwUAMAwxCjAIBgNV BAMMAWEwHhcNMTYwOTI0MjIyMDUyWhcNNDQwMjA5MjIyMDUyWjAMMQowCAYDVQQD DAFhMFwwDQYJKoZIhvcNAQEBBQADSwAwSAJBANQHWTQ1TgPXF5ve33q6dW7pM9pK IY06CUujGvHzm9+lVfZUBpgzbCWp1xYqmnzoFfpIdR16md5dSNCZme9W/kECAwEA AaNQME4wHQYDVR0OBBYEFCXtEo9rkLuKGSlm0mFE4Yk/HDJVMB8GA1UdIwQYMBaA FCXtEo9rkLuKGSlm0mFE4Yk/HDJVMAwGA1UdEwQFMAMBAf8wDQYJKoZIhvcNAQEL BQADQQCnldOnbdNJZxBO/J+979Urg8qDp8MnlN0979AmK1P5/YzPnAF4BU7QTOTE imS5qZ0MvziBa81nVlnnFRkIezcD -----END CERTIFICATE----- ================================================ FILE: test/test_daemon.sh ================================================ #!/bin/bash readonly EXIT_FAILURE=1 tool=$1 options=$2 target=$3 phantom=$target.phantom echo "phantom file: $phantom" rm -rf $phantom $tool $options -- $target 2>$target.daemon.log & daemon_pid=$! for i in {1..100} do if [ -f $phantom ]; then echo "$target: daemon is up" if [ -v STOCHFUZZ_PRELOAD ]; then LD_PRELOAD=$STOCHFUZZ_PRELOAD ./$phantom ${@:4} code=$? else ./$phantom ${@:4} code=$? fi kill -0 $daemon_pid if [ "$?" -eq "0" ]; then wait $daemon_pid fi exit $code else echo "$target: daemon is not ready" sleep 5 fi done echo "$target: timeout" kill -9 $daemon_pid exit 1 ================================================ FILE: test/test_daemon_ignore_asan_sof.sh ================================================ #!/bin/bash readonly EXIT_FAILURE=1 tool=$1 options=$2 target=$3 phantom=$target.phantom echo "phantom file: $phantom" rm -rf $phantom $tool $options -- $target 2>$target.daemon.log & daemon_pid=$! for i in {1..100} do if [ -f $phantom ]; then echo "$target: daemon is up" ./$phantom ${@:4} code=$? kill -0 $daemon_pid if [ "$?" -eq "0" ]; then wait $daemon_pid fi exit $code else grep -F "SUMMARY: AddressSanitizer: stack-overflow" $target.daemon.log if [ "$?" -eq "0" ]; then echo "ASAN stack-overflow: ignore this program" exit 0 fi echo "$target: daemon is not ready" sleep 5 fi done echo "$target: timeout" kill -9 $daemon_pid exit 1 ================================================ FILE: test/timeout.c ================================================ #include #include static void my_sleep() { sleep(5); } int main(int argc, const char **argv) { if (argc == 2 && !strcmp(argv[1], "mdzz")) { void (*p)() = my_sleep; (*p)(); } } ================================================ FILE: test/unintentional_crash.c ================================================ #define ASMSTR(S) "\t" S "\n" int main(int argc, char **argv) { asm volatile( ASMSTR(".intel_syntax noprefix") ASMSTR("dec rdi;") ASMSTR("mov r15, rdi;") ASMSTR("test rdi, rdi;") ASMSTR("jne Y1;") ASMSTR("jmp B1;") ASMSTR(".byte 0x2f") ASMSTR("jmp A1;") ASMSTR("jmp Z1;") ASMSTR("jmp A1;") ASMSTR("jmp Z1;") ASMSTR("jmp A1;") ASMSTR("jmp Z1;") ASMSTR("jmp A1;") ASMSTR("jmp Z1;") ASMSTR("jmp A1;") ASMSTR("jmp Z1;") ASMSTR("jmp A1;") ASMSTR("jmp Z1;") ASMSTR(".global Y1") ASMSTR("Y1:") ASMSTR("xor rbx, rbx") ASMSTR("mov bx, word ptr [X1];") ASMSTR("sub rbx, 0xf1dc;") ASMSTR("push rbx;") ASMSTR("lea r8, [rip + A1];") ASMSTR(".global A1") ASMSTR("A1:") ASMSTR("pop r9;") ASMSTR("add r8, r9;") ASMSTR("call r8;") ASMSTR(".global Z1") ASMSTR("Z1:") ASMSTR("call A1;") ASMSTR(".global X1") ASMSTR("X1:") ASMSTR("jmp A1;") ASMSTR(".global B1") ASMSTR("B1:") ASMSTR("test r15, r15;") ASMSTR("jne Y2;") ASMSTR("jmp B2;") ASMSTR("jmp A2;") ASMSTR("jmp Z2;") ASMSTR("jmp A2;") ASMSTR("jmp Z2;") ASMSTR("jmp A2;") ASMSTR("jmp Z2;") ASMSTR("jmp A2;") ASMSTR("jmp Z2;") ASMSTR("jmp A2;") ASMSTR("jmp Z2;") ASMSTR("jmp A2;") ASMSTR("jmp Z2;") ASMSTR(".global Y2") ASMSTR("Y2:") ASMSTR("xor rbx, rbx") ASMSTR("mov bx, word ptr [X2];") ASMSTR("sub rbx, 0xf1dc;") ASMSTR("push rbx;") ASMSTR("lea r8, [rip + A2];") ASMSTR(".global A2") ASMSTR("A2:") ASMSTR("pop r9;") ASMSTR("add r8, r9;") ASMSTR("call r8;") ASMSTR(".global Z2") ASMSTR("Z2:") ASMSTR("call A2;") ASMSTR(".global X2") ASMSTR("X2:") ASMSTR("jmp A2;") ASMSTR(".global B2") ASMSTR("B2:") ASMSTR("test r15, r15;") ASMSTR("jne Y3;") ASMSTR("jmp B3;") ASMSTR("jmp A3;") ASMSTR("jmp Z3;") ASMSTR("jmp A3;") ASMSTR("jmp Z3;") ASMSTR("jmp A3;") ASMSTR("jmp Z3;") ASMSTR("jmp A3;") ASMSTR("jmp Z3;") ASMSTR("jmp A3;") ASMSTR("jmp Z3;") ASMSTR("jmp A3;") ASMSTR("jmp Z3;") ASMSTR(".global Y3") ASMSTR("Y3:") ASMSTR("xor rbx, rbx") ASMSTR("mov bx, word ptr [X3];") ASMSTR("sub rbx, 0xf1dc;") ASMSTR("push rbx;") ASMSTR("lea r8, [rip + A3];") ASMSTR(".global A3") ASMSTR("A3:") ASMSTR("pop r9;") ASMSTR("add r8, r9;") ASMSTR("call r8;") ASMSTR(".global Z3") ASMSTR("Z3:") ASMSTR("call A3;") ASMSTR(".global X3") ASMSTR("X3:") ASMSTR("jmp A3;") ASMSTR(".global B3") ASMSTR("B3:") ASMSTR("test r15, r15;") ASMSTR("jne Y4;") ASMSTR("jmp B4;") ASMSTR("jmp A4;") ASMSTR("jmp Z4;") ASMSTR("jmp A4;") ASMSTR("jmp Z4;") ASMSTR("jmp A4;") ASMSTR("jmp Z4;") ASMSTR("jmp A4;") ASMSTR("jmp Z4;") ASMSTR("jmp A4;") ASMSTR("jmp Z4;") ASMSTR("jmp A4;") ASMSTR("jmp Z4;") ASMSTR(".global Y4") ASMSTR("Y4:") ASMSTR("xor rbx, rbx") ASMSTR("mov bx, word ptr [X4];") ASMSTR("sub rbx, 0xf1dc;") ASMSTR("push rbx;") ASMSTR("lea r8, [rip + A4];") ASMSTR(".global A4") ASMSTR("A4:") ASMSTR("pop r9;") ASMSTR("add r8, r9;") ASMSTR("call r8;") ASMSTR(".global Z4") ASMSTR("Z4:") ASMSTR("call A4;") ASMSTR(".global X4") ASMSTR("X4:") ASMSTR("jmp A4;") ASMSTR(".global B4") ASMSTR("B4:") ASMSTR("test r15, r15;") ASMSTR("jne Y5;") ASMSTR("jmp B5;") ASMSTR("jmp A5;") ASMSTR("jmp Z5;") ASMSTR("jmp A5;") ASMSTR("jmp Z5;") ASMSTR("jmp A5;") ASMSTR("jmp Z5;") ASMSTR("jmp A5;") ASMSTR("jmp Z5;") ASMSTR("jmp A5;") ASMSTR("jmp Z5;") ASMSTR("jmp A5;") ASMSTR("jmp Z5;") ASMSTR(".global Y5") ASMSTR("Y5:") ASMSTR("xor rbx, rbx") ASMSTR("mov bx, word ptr [X5];") ASMSTR("sub rbx, 0xf1dc;") ASMSTR("push rbx;") ASMSTR("lea r8, [rip + A5];") ASMSTR(".global A5") ASMSTR("A5:") ASMSTR("pop r9;") ASMSTR("add r8, r9;") ASMSTR("call r8;") ASMSTR(".global Z5") ASMSTR("Z5:") ASMSTR("call A5;") ASMSTR(".global X5") ASMSTR("X5:") ASMSTR("jmp A5;") ASMSTR(".global B5") ASMSTR("B5:") ASMSTR("test r15, r15;") ASMSTR("jne Y6;") ASMSTR("jmp B6;") ASMSTR("jmp A6;") ASMSTR("jmp Z6;") ASMSTR("jmp A6;") ASMSTR("jmp Z6;") ASMSTR("jmp A6;") ASMSTR("jmp Z6;") ASMSTR("jmp A6;") ASMSTR("jmp Z6;") ASMSTR("jmp A6;") ASMSTR("jmp Z6;") ASMSTR("jmp A6;") ASMSTR("jmp Z6;") ASMSTR(".global Y6") ASMSTR("Y6:") ASMSTR("xor rbx, rbx") ASMSTR("mov bx, word ptr [X6];") ASMSTR("sub rbx, 0xf1dc;") ASMSTR("push rbx;") ASMSTR("lea r8, [rip + A6];") ASMSTR(".global A6") ASMSTR("A6:") ASMSTR("pop r9;") ASMSTR("add r8, r9;") ASMSTR("call r8;") ASMSTR(".global Z6") ASMSTR("Z6:") ASMSTR("call A6;") ASMSTR(".global X6") ASMSTR("X6:") ASMSTR("jmp A6;") ASMSTR(".global B6") ASMSTR("B6:") ASMSTR("test r15, r15;") ASMSTR("jne Y7;") ASMSTR("jmp B7;") ASMSTR("jmp A7;") ASMSTR("jmp Z7;") ASMSTR("jmp A7;") ASMSTR("jmp Z7;") ASMSTR("jmp A7;") ASMSTR("jmp Z7;") ASMSTR("jmp A7;") ASMSTR("jmp Z7;") ASMSTR("jmp A7;") ASMSTR("jmp Z7;") ASMSTR("jmp A7;") ASMSTR("jmp Z7;") ASMSTR(".global Y7") ASMSTR("Y7:") ASMSTR("xor rbx, rbx") ASMSTR("mov bx, word ptr [X7];") ASMSTR("sub rbx, 0xf1dc;") ASMSTR("push rbx;") ASMSTR("lea r8, [rip + A7];") ASMSTR(".global A7") ASMSTR("A7:") ASMSTR("pop r9;") ASMSTR("add r8, r9;") ASMSTR("call r8;") ASMSTR(".global Z7") ASMSTR("Z7:") ASMSTR("call A7;") ASMSTR(".global X7") ASMSTR("X7:") ASMSTR("jmp A7;") ASMSTR(".global B7") ASMSTR("B7:") ASMSTR("test r15, r15;") ASMSTR("jne Y8;") ASMSTR("jmp B8;") ASMSTR("jmp A8;") ASMSTR("jmp Z8;") ASMSTR("jmp A8;") ASMSTR("jmp Z8;") ASMSTR("jmp A8;") ASMSTR("jmp Z8;") ASMSTR("jmp A8;") ASMSTR("jmp Z8;") ASMSTR("jmp A8;") ASMSTR("jmp Z8;") ASMSTR("jmp A8;") ASMSTR("jmp Z8;") ASMSTR(".global Y8") ASMSTR("Y8:") ASMSTR("xor rbx, rbx") ASMSTR("mov bx, word ptr [X8];") ASMSTR("sub rbx, 0xf1dc;") ASMSTR("push rbx;") ASMSTR("lea r8, [rip + A8];") ASMSTR(".global A8") ASMSTR("A8:") ASMSTR("pop r9;") ASMSTR("add r8, r9;") ASMSTR("call r8;") ASMSTR(".global Z8") ASMSTR("Z8:") ASMSTR("call A8;") ASMSTR(".global X8") ASMSTR("X8:") ASMSTR("jmp A8;") ASMSTR(".global B8") ASMSTR("B8:") ASMSTR("test r15, r15;") ASMSTR("jne Y9;") ASMSTR("jmp B9;") ASMSTR("jmp A9;") ASMSTR("jmp Z9;") ASMSTR("jmp A9;") ASMSTR("jmp Z9;") ASMSTR("jmp A9;") ASMSTR("jmp Z9;") ASMSTR("jmp A9;") ASMSTR("jmp Z9;") ASMSTR("jmp A9;") ASMSTR("jmp Z9;") ASMSTR("jmp A9;") ASMSTR("jmp Z9;") ASMSTR(".global Y9") ASMSTR("Y9:") ASMSTR("xor rbx, rbx") ASMSTR("mov bx, word ptr [X9];") ASMSTR("sub rbx, 0xf1dc;") ASMSTR("push rbx;") ASMSTR("lea r8, [rip + A9];") ASMSTR(".global A9") ASMSTR("A9:") ASMSTR("pop r9;") ASMSTR("add r8, r9;") ASMSTR("call r8;") ASMSTR(".global Z9") ASMSTR("Z9:") ASMSTR("call A9;") ASMSTR(".global X9") ASMSTR("X9:") ASMSTR("jmp A9;") ASMSTR(".global B9") ASMSTR("B9:") ASMSTR("mov rax, 60;") ASMSTR("mov rdi, 0;") ASMSTR("syscall;") ASMSTR("ret;") ASMSTR(".att_syntax;") ); } ================================================ FILE: test/z3 ================================================ [File too large to display: 19.6 MB]