Repository: angr/pyvex Branch: master Commit: 36455aaaf276 Files: 80 Total size: 387.1 KB Directory structure: gitextract_782bwflj/ ├── .git-blame-ignore-revs ├── .github/ │ ├── ISSUE_TEMPLATE/ │ │ ├── bug-report.yml │ │ ├── config.yml │ │ ├── feature-request.yml │ │ └── question.yml │ └── workflows/ │ ├── ci.yml │ ├── cifuzz.yml │ └── nightly-ci.yml ├── .gitignore ├── .gitmodules ├── .pre-commit-config.yaml ├── .readthedocs.yml ├── CMakeLists.txt ├── LICENSE ├── MANIFEST.in ├── README.md ├── docs/ │ ├── Makefile │ ├── api.rst │ ├── conf.py │ ├── index.rst │ ├── make.bat │ └── quickstart.rst ├── fuzzing/ │ ├── build.sh │ ├── enhanced_fdp.py │ └── irsb_fuzzer.py ├── make_ffi.py ├── pyproject.toml ├── pyvex/ │ ├── __init__.py │ ├── _register_info.py │ ├── arches.py │ ├── block.py │ ├── const.py │ ├── const_val.py │ ├── data_ref.py │ ├── enums.py │ ├── errors.py │ ├── expr.py │ ├── lifting/ │ │ ├── __init__.py │ │ ├── gym/ │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── aarch64_spotter.py │ │ │ ├── arm_spotter.py │ │ │ └── x86_spotter.py │ │ ├── libvex.py │ │ ├── lift_function.py │ │ ├── lifter.py │ │ ├── post_processor.py │ │ ├── util/ │ │ │ ├── __init__.py │ │ │ ├── instr_helper.py │ │ │ ├── lifter_helper.py │ │ │ ├── syntax_wrapper.py │ │ │ └── vex_helper.py │ │ └── zerodivision.py │ ├── native.py │ ├── py.typed │ ├── stmt.py │ ├── types.py │ └── utils.py ├── pyvex_c/ │ ├── LICENSE │ ├── README │ ├── analysis.c │ ├── e4c_lite.h │ ├── logging.c │ ├── logging.h │ ├── postprocess.c │ ├── pyvex.c │ ├── pyvex.def │ ├── pyvex.h │ └── pyvex_internal.h └── tests/ ├── test_arm_postprocess.py ├── test_gym.py ├── test_irsb_property_caching.py ├── test_lift.py ├── test_mips32_postprocess.py ├── test_pyvex.py ├── test_s390x_exrl.py ├── test_s390x_lochi.py ├── test_s390x_vl.py ├── test_spotter.py └── test_ud2.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .git-blame-ignore-revs ================================================ # Black + pre-commit 23503e79193a3cff5d6f1c92f22349fd2227d936 # Black cd758543f17a2253b5a0630327eac0ad6780217a # Trailing whitespace, pyupgrade, prefer builtin constructors dfd137fc8d3073ff065347401f528c1eaf62c383 # ruff ================================================ FILE: .github/ISSUE_TEMPLATE/bug-report.yml ================================================ name: Report a bug description: Report a bug in pyvex labels: [bug,needs-triage] body: - type: markdown attributes: value: | Thank you for taking the time to submit this bug report! Before submitting this bug report, please check the following, which may resolve your issue: * Have you checked that you are running the latest versions of angr and its components? angr is rapidly-evolving! * Have you [searched existing issues](https://github.com/angr/pyvex/issues?q=is%3Aopen+is%3Aissue+label%3Abug) to see if this bug has been reported before? * Have you checked the [documentation](https://docs.angr.io/)? * Have you checked the [FAQ](https://docs.angr.io/introductory-errata/faq)? **Important:** If this bug is a security vulnerability, please submit it privately. See our [security policy](https://github.com/angr/angr/blob/master/SECURITY.md) for more details. Please note: The angr suite is maintained by a small team. While we cannot guarantee any timeliness for fixes and enhancements, we will do our best. For more real-time help with angr, from us and the community, join our [Slack](https://angr.io/invite/). - type: textarea attributes: label: Description description: Brief description of the bug, with any relevant log messages. validations: required: true - type: textarea attributes: label: Steps to reproduce the bug description: | If appropriate, include both a **script to reproduce the bug**, and if possible **attach the binary used**. **Tip:** You can attach files to the issue by first clicking on the textarea to select it, then dragging & dropping the file onto the textarea. - type: textarea attributes: label: Environment description: Many common issues are caused by problems with the local Python environment. Before submitting, double-check that your versions of all modules in the angr suite (angr, cle, pyvex, ...) are up to date and include the output of `python -m angr.misc.bug_report` here. - type: textarea attributes: label: Additional context description: Any additional context about the problem. ================================================ FILE: .github/ISSUE_TEMPLATE/config.yml ================================================ blank_issues_enabled: false contact_links: - name: Join our Slack community url: https://angr.io/invite/ about: For questions and help with angr, you are invited to join the angr Slack community ================================================ FILE: .github/ISSUE_TEMPLATE/feature-request.yml ================================================ name: Request a feature description: Request a new feature for pyvex labels: [enhancement,needs-triage] body: - type: markdown attributes: value: | Thank you for taking the time to submit this feature request! Before submitting this feature request, please check the following: * Have you checked that you are running the latest versions of angr and its components? angr is rapidly-evolving! * Have you checked the [documentation](https://docs.angr.io/) to see if this feature exists already? * Have you [searched existing issues](https://github.com/angr/pyvex/issues?q=is%3Aissue+label%3Aenhancement+) to see if this feature has been requested before? Please note: The angr suite is maintained by a small team. While we cannot guarantee any timeliness for fixes and enhancements, we will do our best. For more real-time help with angr, from us and the community, join our [Slack](https://angr.io/invite/). - type: textarea attributes: label: Description description: | Brief description of the desired feature. If the feature is intended to solve some problem, please clearly describe the problem, including any relevant binaries, etc. **Tip:** You can attach files to the issue by first clicking on the textarea to select it, then dragging & dropping the file onto the textarea. validations: required: true - type: textarea attributes: label: Alternatives description: Possible alternative solutions or features that you have considered. - type: textarea attributes: label: Additional context description: Any other context or screenshots about the feature request. ================================================ FILE: .github/ISSUE_TEMPLATE/question.yml ================================================ name: Ask a question description: Ask a question about pyvex labels: [question,needs-triage] body: - type: markdown attributes: value: | If you have a question about pyvex, that is not a bug report or a feature request, you can ask it here. For more real-time help with pyvex, from us and the community, join our [Slack](https://angr.io/invite/). Before submitting this question, please check the following, which may answer your question: * Have you checked the [documentation](https://docs.angr.io/)? * Have you checked the [FAQ](https://docs.angr.io/introductory-errata/faq)? * Have you checked our library of [examples](https://github.com/angr/angr-doc/tree/master/examples)? * Have you [searched existing issues](https://github.com/angr/pyvex/issues?q=is%3Aissue+label%3Aquestion) to see if this question has been answered before? * Have you checked that you are running the latest versions of angr and its components. angr is rapidly-evolving! Please note: The angr suite is maintained by a small team. While we cannot guarantee any timeliness for fixes and enhancements, we will do our best. - type: textarea attributes: label: Question description: validations: required: true ================================================ FILE: .github/workflows/ci.yml ================================================ name: CI on: push: branches: - master pull_request: workflow_dispatch: jobs: ecosystem: uses: angr/ci-settings/.github/workflows/angr-ci.yml@master test: name: Test strategy: matrix: os: [windows-2022, macos-15-intel, macos-15, ubuntu-24.04] fail-fast: false runs-on: ${{ matrix.os }} steps: - name: Checkout uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 with: submodules: 'recursive' - name: Activate msvc uses: ilammy/msvc-dev-cmd@0b201ec74fa43914dc39ae48a89fd1d8cb592756 # v1 if: startsWith(runner.os, 'windows') - name: Setup uv uses: astral-sh/setup-uv@445689ea25e0de0a23313031f5fe577c74ae45a1 # v6 - name: Sync dependencies run: uv sync -p 3.10 - name: Run tests run: uv run pytest tests ================================================ FILE: .github/workflows/cifuzz.yml ================================================ name: OSS-Fuzz on: # push: # branches: # - master # pull_request: workflow_dispatch: permissions: {} jobs: Fuzzing: runs-on: ubuntu-latest permissions: security-events: write steps: - name: Build Fuzzers id: build uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master with: oss-fuzz-project-name: 'pyvex' language: python - name: Run Fuzzers uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master with: oss-fuzz-project-name: 'pyvex' language: python fuzz-seconds: 600 output-sarif: true - name: Upload Crash uses: actions/upload-artifact@v3 if: failure() && steps.build.outcome == 'success' with: name: artifacts path: ./out/artifacts - name: Upload Sarif if: always() && steps.build.outcome == 'success' uses: github/codeql-action/upload-sarif@v2 with: # Path to SARIF file relative to the root of the repository sarif_file: cifuzz-sarif/results.sarif checkout_path: cifuzz-sarif ================================================ FILE: .github/workflows/nightly-ci.yml ================================================ name: Nightly CI on: schedule: - cron: "0 0 * * *" workflow_dispatch: jobs: ci: uses: angr/ci-settings/.github/workflows/angr-ci.yml@master with: nightly: true secrets: inherit ================================================ FILE: .gitignore ================================================ build dist MANIFEST pyvex_python vex_ffi.py libpyvex.so *.egg-info *.eggs *.pyc *.swp *.obj *.lib *.dll *.exp *.o *.a *.dylib pyvex/lib pyvex/include vex-master vex-master.tar.gz docs/_build scikit_build uv.lock ================================================ FILE: .gitmodules ================================================ [submodule "vex"] path = vex url = https://github.com/angr/vex.git branch = master ================================================ FILE: .pre-commit-config.yaml ================================================ repos: # # Fail fast # - repo: https://github.com/abravalheri/validate-pyproject rev: v0.25 hooks: - id: validate-pyproject fail_fast: true - repo: https://github.com/pre-commit/pre-commit-hooks rev: v6.0.0 hooks: # General - id: check-merge-conflict fail_fast: true - id: check-case-conflict fail_fast: true - id: destroyed-symlinks fail_fast: true - id: check-symlinks fail_fast: true - id: check-added-large-files fail_fast: true # Syntax - id: check-toml fail_fast: true - id: check-json fail_fast: true - id: check-yaml fail_fast: true - repo: https://github.com/pre-commit/pre-commit-hooks rev: v6.0.0 hooks: - id: check-ast fail_fast: true # # Modifiers # - repo: https://github.com/pre-commit/pre-commit-hooks rev: v6.0.0 hooks: - id: mixed-line-ending - id: trailing-whitespace - repo: https://github.com/dannysepler/rm_unneeded_f_str rev: v0.2.0 hooks: - id: rm-unneeded-f-str - repo: https://github.com/asottile/pyupgrade rev: v3.21.2 hooks: - id: pyupgrade args: [--py310-plus] - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.15.10 hooks: - id: ruff args: [--fix, --exit-non-zero-on-fix] # Last modifier: Coding Standard - repo: https://github.com/psf/black-pre-commit-mirror rev: 26.3.1 hooks: - id: black # # Static Checks # - repo: https://github.com/pre-commit/pygrep-hooks rev: v1.10.0 hooks: # Python - id: python-use-type-annotations - id: python-no-log-warn # Documentation - id: rst-backticks - id: rst-directive-colons - id: rst-inline-touching-normal - repo: https://github.com/pre-commit/pre-commit-hooks rev: v6.0.0 hooks: - id: debug-statements - id: check-builtin-literals - id: check-docstring-first ================================================ FILE: .readthedocs.yml ================================================ # Read the Docs configuration file # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details version: 2 sphinx: configuration: docs/conf.py submodules: include: all build: os: ubuntu-22.04 tools: python: "3.10" jobs: pre_install: - pip install -U pip - pip install git+https://github.com/angr/archinfo.git python: install: - method: pip path: . extra_requirements: - docs ================================================ FILE: CMakeLists.txt ================================================ cmake_minimum_required(VERSION 3.15) set(CMAKE_POSITION_INDEPENDENT_CODE ON) project(pyvex LANGUAGES C) # Set the output directory for built libraries set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/pyvex/lib) # Set the C standard to C99 set(CMAKE_C_STANDARD 99) # Include directories include_directories( ${CMAKE_SOURCE_DIR}/pyvex/include ${CMAKE_SOURCE_DIR}/pyvex_c ${CMAKE_SOURCE_DIR}/vex/pub ) # Source files for the pyvex C library set(PYVEX_SRC pyvex_c/pyvex.c pyvex_c/analysis.c pyvex_c/logging.c pyvex_c/postprocess.c ) # Source files for the VEX C library set(VEX_SRC vex/priv/ir_defs.c vex/priv/ir_match.c vex/priv/ir_opt.c vex/priv/ir_inject.c vex/priv/main_globals.c vex/priv/main_util.c vex/priv/s390_disasm.c vex/priv/host_x86_defs.c vex/priv/host_amd64_defs.c vex/priv/host_arm_defs.c vex/priv/host_arm64_defs.c vex/priv/host_ppc_defs.c vex/priv/host_riscv64_defs.c vex/priv/host_s390_defs.c vex/priv/host_mips_defs.c vex/priv/host_x86_isel.c vex/priv/host_amd64_isel.c vex/priv/host_arm_isel.c vex/priv/host_arm64_isel.c vex/priv/host_ppc_isel.c vex/priv/host_riscv64_isel.c vex/priv/host_s390_isel.c vex/priv/host_mips_isel.c vex/priv/host_generic_maddf.c vex/priv/host_generic_regs.c vex/priv/host_generic_simd64.c vex/priv/host_generic_simd128.c vex/priv/host_generic_simd256.c vex/priv/host_generic_reg_alloc2.c vex/priv/host_generic_reg_alloc3.c vex/priv/guest_generic_x87.c vex/priv/guest_generic_bb_to_IR.c vex/priv/guest_x86_helpers.c vex/priv/guest_amd64_helpers.c vex/priv/guest_arm_helpers.c vex/priv/guest_arm64_helpers.c vex/priv/guest_ppc_helpers.c vex/priv/guest_riscv64_helpers.c vex/priv/guest_s390_helpers.c vex/priv/guest_mips_helpers.c vex/priv/guest_x86_toIR.c vex/priv/guest_amd64_toIR.c vex/priv/guest_arm_toIR.c vex/priv/guest_arm64_toIR.c vex/priv/guest_ppc_toIR.c vex/priv/guest_riscv64_toIR.c vex/priv/guest_s390_toIR.c vex/priv/guest_mips_toIR.c vex/priv/multiarch_main_main.c ) # Build the VEX static library add_library(vex STATIC ${VEX_SRC}) target_compile_definitions(vex PRIVATE PYVEX) target_include_directories(vex PUBLIC ${CMAKE_SOURCE_DIR}/vex/pub) # Build the shared library add_library(pyvex SHARED ${PYVEX_SRC}) set_target_properties(pyvex PROPERTIES OUTPUT_NAME "pyvex") # Handle .def file for Windows builds if (WIN32) set_target_properties(pyvex PROPERTIES LINK_FLAGS "/DEF:${CMAKE_SOURCE_DIR}/pyvex_c/pyvex.def") endif() target_include_directories(pyvex PRIVATE pyvex_c) target_link_libraries(pyvex PRIVATE vex) # Install the built library to the Python package # It is installed twice to handle both editable and non-editable installs install(TARGETS pyvex DESTINATION ${CMAKE_SOURCE_DIR}/pyvex/lib) install(TARGETS pyvex DESTINATION pyvex/lib) # --- BEGIN: Generate pub/libvex_guest_offsets.h --- add_executable(genoffsets vex/auxprogs/genoffsets.c) set_target_properties(genoffsets PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/vex/auxprogs) add_custom_command( OUTPUT ${CMAKE_SOURCE_DIR}/vex/pub/libvex_guest_offsets.h COMMAND $ > ${CMAKE_SOURCE_DIR}/vex/pub/libvex_guest_offsets.h DEPENDS genoffsets WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} COMMENT "Generating pub/libvex_guest_offsets.h" ) add_custom_target(generate_offsets_header DEPENDS ${CMAKE_SOURCE_DIR}/vex/pub/libvex_guest_offsets.h ) install( FILES ${CMAKE_SOURCE_DIR}/vex/pub/libvex_guest_offsets.h DESTINATION pyvex/include ) add_dependencies(vex generate_offsets_header) # --- END: Generate pub/libvex_guest_offsets.h --- # --- BEGIN: Generate pyvex/vex_ffi.py --- add_custom_command( OUTPUT ${CMAKE_SOURCE_DIR}/pyvex/vex_ffi.py COMMAND ${CMAKE_COMMAND} -E env ${Python3_EXECUTABLE} ${CMAKE_SOURCE_DIR}/make_ffi.py ${CMAKE_SOURCE_DIR}/vex/pub DEPENDS ${CMAKE_SOURCE_DIR}/vex/pub/libvex_guest_offsets.h WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} COMMENT "Generating pyvex/vex_ffi.py using make_ffi.py" ) add_custom_target(generate_vex_ffi_py DEPENDS ${CMAKE_SOURCE_DIR}/pyvex/vex_ffi.py ) install( FILES ${CMAKE_SOURCE_DIR}/pyvex/vex_ffi.py DESTINATION pyvex ) add_dependencies(pyvex generate_vex_ffi_py) # --- END: Generate pyvex/vex_ffi.py --- # --- BEGIN: Copy headers to pyvex/include --- add_custom_command( OUTPUT ${CMAKE_SOURCE_DIR}/pyvex/include/pub COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_SOURCE_DIR}/vex/pub ${CMAKE_SOURCE_DIR}/pyvex/include/ DEPENDS ${CMAKE_SOURCE_DIR}/vex/pub COMMENT "Copying vex/pub to pyvex/include/" ) add_custom_command( OUTPUT ${CMAKE_SOURCE_DIR}/pyvex/include/pyvex.h COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/pyvex_c/pyvex.h ${CMAKE_SOURCE_DIR}/pyvex/include/pyvex.h DEPENDS ${CMAKE_SOURCE_DIR}/pyvex_c/pyvex.h COMMENT "Copying pyvex_c/pyvex.h to pyvex/include/" ) add_custom_target(copy_headers ALL DEPENDS ${CMAKE_SOURCE_DIR}/pyvex/include/pub ${CMAKE_SOURCE_DIR}/pyvex/include/pyvex.h ) add_dependencies(pyvex copy_headers) add_dependencies(copy_headers generate_offsets_header) # --- END: Copy headers to pyvex/include --- ================================================ FILE: LICENSE ================================================ Copyright (c) 2015, The Regents of the University of California All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================ FILE: MANIFEST.in ================================================ include LICENSE include README.md include make_ffi.py recursive-include pyvex_c *.c *.h *.def Makefile Makefile-msvc LICENSE recursive-include vex * ================================================ FILE: README.md ================================================ # PyVEX [![Latest Release](https://img.shields.io/pypi/v/pyvex.svg)](https://pypi.python.org/pypi/pyvex/) [![Python Version](https://img.shields.io/pypi/pyversions/pyvex)](https://pypi.python.org/pypi/pyvex/) [![PyPI Statistics](https://img.shields.io/pypi/dm/pyvex.svg)](https://pypistats.org/packages/pyvex) [![License](https://img.shields.io/github/license/angr/pyvex.svg)](https://github.com/angr/pyvex/blob/master/LICENSE) PyVEX is Python bindings for the VEX IR. ## Project Links Project repository: https://github.com/angr/pyvex Documentation: https://api.angr.io/projects/pyvex/en/latest/ ## Installing PyVEX PyVEX can be pip-installed: ```bash pip install pyvex ``` ## Using PyVEX ```python import pyvex import archinfo # translate an AMD64 basic block (of nops) at 0x400400 into VEX irsb = pyvex.lift(b"\x90\x90\x90\x90\x90", 0x400400, archinfo.ArchAMD64()) # pretty-print the basic block irsb.pp() # this is the IR Expression of the jump target of the unconditional exit at the end of the basic block print(irsb.next) # this is the type of the unconditional exit (i.e., a call, ret, syscall, etc) print(irsb.jumpkind) # you can also pretty-print it irsb.next.pp() # iterate through each statement and print all the statements for stmt in irsb.statements: stmt.pp() # pretty-print the IR expression representing the data, and the *type* of that IR expression written by every store statement import pyvex for stmt in irsb.statements: if isinstance(stmt, pyvex.IRStmt.Store): print("Data:", end="") stmt.data.pp() print("") print("Type:", end="") print(stmt.data.result_type) print("") # pretty-print the condition and jump target of every conditional exit from the basic block for stmt in irsb.statements: if isinstance(stmt, pyvex.IRStmt.Exit): print("Condition:", end="") stmt.guard.pp() print("") print("Target:", end="") stmt.dst.pp() print("") # these are the types of every temp in the IRSB print(irsb.tyenv.types) # here is one way to get the type of temp 0 print(irsb.tyenv.types[0]) ``` Keep in mind that this is a *syntactic* representation of a basic block. That is, it'll tell you what the block means, but you don't have any context to say, for example, what *actual* data is written by a store instruction. ## VEX Intermediate Representation To deal with widely diverse architectures, it is useful to carry out analyses on an intermediate representation. An IR abstracts away several architecture differences when dealing with different architectures, allowing a single analysis to be run on all of them: - **Register names.** The quantity and names of registers differ between architectures, but modern CPU designs hold to a common theme: each CPU contains several general purpose registers, a register to hold the stack pointer, a set of registers to store condition flags, and so forth. The IR provides a consistent, abstracted interface to registers on different platforms. Specifically, VEX models the registers as a separate memory space, with integer offsets (i.e., AMD64's `rax` is stored starting at address 16 in this memory space). - **Memory access.** Different architectures access memory in different ways. For example, ARM can access memory in both little-endian and big-endian modes. The IR must abstract away these differences. - **Memory segmentation.** Some architectures, such as x86, support memory segmentation through the use of special segment registers. The IR understands such memory access mechanisms. - **Instruction side-effects.** Most instructions have side-effects. For example, most operations in Thumb mode on ARM update the condition flags, and stack push/pop instructions update the stack pointer. Tracking these side-effects in an *ad hoc* manner in the analysis would be crazy, so the IR makes these effects explicit. There are lots of choices for an IR. We use VEX, since the uplifting of binary code into VEX is quite well supported. VEX is an architecture-agnostic, side-effects-free representation of a number of target machine languages. It abstracts machine code into a representation designed to make program analysis easier. This representation has five main classes of objects: - **Expressions.** IR Expressions represent a calculated or constant value. This includes memory loads, register reads, and results of arithmetic operations. - **Operations.** IR Operations describe a *modification* of IR Expressions. This includes integer arithmetic, floating-point arithmetic, bit operations, and so forth. An IR Operation applied to IR Expressions yields an IR Expression as a result. - **Temporary variables.** VEX uses temporary variables as internal registers: IR Expressions are stored in temporary variables between use. The content of a temporary variable can be retrieved using an IR Expression. These temporaries are numbered, starting at `t0`. These temporaries are strongly typed (i.e., "64-bit integer" or "32-bit float"). - **Statements.** IR Statements model changes in the state of the target machine, such as the effect of memory stores and register writes. IR Statements use IR Expressions for values they may need. For example, a memory store *IR Statement* uses an *IR Expression* for the target address of the write, and another *IR Expression* for the content. - **Blocks.** An IR Block is a collection of IR Statements, representing an extended basic block (termed "IR Super Block" or "IRSB") in the target architecture. A block can have several exits. For conditional exits from the middle of a basic block, a special *Exit* IR Statement is used. An IR Expression is used to represent the target of the unconditional exit at the end of the block. VEX IR is actually quite well documented in the `libvex_ir.h` file (https://github.com/angr/vex/blob/dev/pub/libvex_ir.h) in the VEX repository. For the lazy, we'll detail some parts of VEX that you'll likely interact with fairly frequently. To begin with, here are some IR Expressions: | IR Expression | Evaluated Value | VEX Output Example | | ------------- | --------------- | ------- | | Constant | A constant value. | 0x4:I32 | | Read Temp | The value stored in a VEX temporary variable. | RdTmp(t10) | | Get Register | The value stored in a register. | GET:I32(16) | | Load Memory | The value stored at a memory address, with the address specified by another IR Expression. | LDle:I32 / LDbe:I64 | | Operation | A result of a specified IR Operation, applied to specified IR Expression arguments. | Add32 | | If-Then-Else | If a given IR Expression evaluates to 0, return one IR Expression. Otherwise, return another. | ITE | | Helper Function | VEX uses C helper functions for certain operations, such as computing the conditional flags registers of certain architectures. These functions return IR Expressions. | function\_name() | These expressions are then, in turn, used in IR Statements. Here are some common ones: | IR Statement | Meaning | VEX Output Example | | ------------ | ------- | ------------------ | Write Temp | Set a VEX temporary variable to the value of the given IR Expression. | WrTmp(t1) = (IR Expression) | Put Register | Update a register with the value of the given IR Expression. | PUT(16) = (IR Expression) | Store Memory | Update a location in memory, given as an IR Expression, with a value, also given as an IR Expression. | STle(0x1000) = (IR Expression) | Exit | A conditional exit from a basic block, with the jump target specified by an IR Expression. The condition is specified by an IR Expression. | if (condition) goto (Boring) 0x4000A00:I32 | An example of an IR translation, on ARM, is produced below. In the example, the subtraction operation is translated into a single IR block comprising 5 IR Statements, each of which contains at least one IR Expression (although, in real life, an IR block would typically consist of more than one instruction). Register names are translated into numerical indices given to the *GET* Expression and *PUT* Statement. The astute reader will observe that the actual subtraction is modeled by the first 4 IR Statements of the block, and the incrementing of the program counter to point to the next instruction (which, in this case, is located at `0x59FC8`) is modeled by the last statement. The following ARM instruction: subs R2, R2, #8 Becomes this VEX IR: t0 = GET:I32(16) t1 = 0x8:I32 t3 = Sub32(t0,t1) PUT(16) = t3 PUT(68) = 0x59FC8:I32 Cool stuff! ## Citing PyVEX If you use PyVEX in an academic work, please cite the paper for which it was developed: ```bibtex @article{shoshitaishvili2015firmalice, title={Firmalice - Automatic Detection of Authentication Bypass Vulnerabilities in Binary Firmware}, author={Shoshitaishvili, Yan and Wang, Ruoyu and Hauser, Christophe and Kruegel, Christopher and Vigna, Giovanni}, booktitle={NDSS}, year={2015} } ``` ================================================ FILE: docs/Makefile ================================================ # Minimal makefile for Sphinx documentation # # You can set these variables from the command line, and also # from the environment for the first two. SPHINXOPTS ?= SPHINXBUILD ?= sphinx-build SOURCEDIR = . BUILDDIR = _build # Put it first so that "make" without argument is like "make help". help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) .PHONY: help Makefile # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) ================================================ FILE: docs/api.rst ================================================ :mod:`pyvex` --- Binary Translator ================================== .. automodule:: pyvex .. automodule:: pyvex.native Translation Interface --------------------- .. automodule:: pyvex.block IR Components ------------- .. automodule:: pyvex.stmt .. automodule:: pyvex.expr .. automodule:: pyvex.const .. automodule:: pyvex.enums Lifting System -------------- .. automodule:: pyvex.data_ref .. automodule:: pyvex.lifting .. automodule:: pyvex.lifting.lift_function .. automodule:: pyvex.lifting.libvex .. automodule:: pyvex.lifting.lifter .. automodule:: pyvex.lifting.post_processor .. automodule:: pyvex.lifting.util.irsb_postprocess .. automodule:: pyvex.lifting.util .. automodule:: pyvex.lifting.util.syntax_wrapper .. automodule:: pyvex.lifting.util.vex_helper .. automodule:: pyvex.lifting.util.lifter_helper .. automodule:: pyvex.lifting.util.instr_helper Builtin IR Processors --------------------- .. automodule:: pyvex.lifting.zerodivision Errors ------ .. automodule:: pyvex.errors Utilities --------- .. automodule:: pyvex.utils ================================================ FILE: docs/conf.py ================================================ # Configuration file for the Sphinx documentation builder. # # For the full list of built-in configuration values, see the documentation: # https://www.sphinx-doc.org/en/master/usage/configuration.html import datetime # -- Project information ----------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information project = "pyvex" project_copyright = f"{datetime.datetime.now().year}, The angr Project contributors" author = "The angr Project" # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration extensions = [ "sphinx.ext.autodoc", "sphinx.ext.autosummary", "sphinx.ext.coverage", "sphinx.ext.napoleon", "sphinx.ext.todo", "sphinx.ext.viewcode", "sphinx_autodoc_typehints", "myst_parser", ] templates_path = ["_templates"] exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] # -- Options for autodoc ----------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/extensions/autodoc.html#configuration autoclass_content = "class" autodoc_default_options = { "members": True, "member-order": "bysource", "show-inheritance": True, "special-members": "__init__", "undoc-members": True, } autodoc_inherit_docstrings = True autodoc_typehints = "both" # -- Options for coverage ---------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/extensions/coverage.html coverage_write_headline = False # -- Options for HTML output ------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output html_theme = "furo" html_static_path = ["_static"] ================================================ FILE: docs/index.rst ================================================ Welcome to pyVEX's documentation! ================================= .. toctree:: :maxdepth: 2 :caption: Contents: Quickstart API Indices and tables ================== * :ref:`genindex` * :ref:`modindex` * :ref:`search` ================================================ FILE: docs/make.bat ================================================ @ECHO OFF pushd %~dp0 REM Command file for Sphinx documentation if "%SPHINXBUILD%" == "" ( set SPHINXBUILD=sphinx-build ) set SOURCEDIR=. set BUILDDIR=_build %SPHINXBUILD% >NUL 2>NUL if errorlevel 9009 ( echo. echo.The 'sphinx-build' command was not found. Make sure you have Sphinx echo.installed, then set the SPHINXBUILD environment variable to point echo.to the full path of the 'sphinx-build' executable. Alternatively you echo.may add the Sphinx directory to PATH. echo. echo.If you don't have Sphinx installed, grab it from echo.https://www.sphinx-doc.org/ exit /b 1 ) if "%1" == "" goto help %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% goto end :help %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% :end popd ================================================ FILE: docs/quickstart.rst ================================================ .. include:: ../README.md :parser: myst_parser.sphinx_ ================================================ FILE: fuzzing/build.sh ================================================ #!/bin/bash -eu # Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ################################################################################ # Since pyvex requires a specific developer build of archinfo, install it from source cd "$SRC"/archinfo python3 -m pip install . cd "$SRC"/pyvex python3 -m pip install .[testing] # Generate a simple binary for the corpus echo -ne "start:\n\txor %edi, %edi\nmov \$60, %eax\nsyscall" > /tmp/corpus.s clang -Os -s /tmp/corpus.s -nostdlib -nostartfiles -m32 -o corpus zip -r "$OUT"/irsb_fuzzer_seed_corpus.zip corpus # Build fuzzers in $OUT # --collect-submodules=bitstring ensures all bitstring submodules are bundled by PyInstaller for fuzzer in $(find $SRC -name '*_fuzzer.py'); do compile_python_fuzzer "$fuzzer" \ --add-binary="pyvex/lib/libpyvex.so:pyvex/lib" \ --collect-submodules=bitstring done ================================================ FILE: fuzzing/enhanced_fdp.py ================================================ # Copyright 2021 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ################################################################################ """ Defines the EnhancedFuzzedDataProvider """ from atheris import FuzzedDataProvider class EnhancedFuzzedDataProvider(FuzzedDataProvider): """ Extends the functionality of FuzzedDataProvider """ def _consume_random_count(self) -> int: """ :return: A count of bytes that is strictly in range 0<=n<=remaining_bytes """ return self.ConsumeIntInRange(0, self.remaining_bytes()) def ConsumeRandomBytes(self) -> bytes: """ Consume a 'random' count of the remaining bytes :return: 0<=n<=remaining_bytes bytes """ return self.ConsumeBytes(self._consume_random_count()) def ConsumeRemainingBytes(self) -> bytes: """ :return: The remaining buffer """ return self.ConsumeBytes(self.remaining_bytes()) def ConsumeRandomString(self) -> str: """ Consume a 'random' length string, excluding surrogates :return: The string """ return self.ConsumeUnicodeNoSurrogates(self._consume_random_count()) def ConsumeRemainingString(self) -> str: """ :return: The remaining buffer, as a string without surrogates """ return self.ConsumeUnicodeNoSurrogates(self.remaining_bytes()) def PickValueInEnum(self, enum): return self.PickValueInList([e.value for e in enum]) ================================================ FILE: fuzzing/irsb_fuzzer.py ================================================ #!/usr/bin/python3 # Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ################################################################################ import re import sys from contextlib import contextmanager from enum import IntEnum from io import StringIO import atheris with atheris.instrument_imports(include=["pyvex"]): import pyvex # Additional imports from enhanced_fdp import EnhancedFuzzedDataProvider register_error_msg = re.compile("Register .*? does not exist!") @contextmanager def nostdout(): saved_stdout = sys.stdout saved_stderr = sys.stderr sys.stdout = StringIO() sys.stderr = StringIO() yield sys.stdout = saved_stdout sys.stderr = saved_stderr # Save all available architectures off available_archs = [ pyvex.ARCH_X86, pyvex.ARCH_AMD64, pyvex.ARCH_ARM_LE, pyvex.ARCH_ARM_BE, pyvex.ARCH_ARM64_LE, pyvex.ARCH_ARM64_BE, pyvex.ARCH_PPC32, pyvex.ARCH_PPC64_BE, pyvex.ARCH_PPC64_LE, pyvex.ARCH_S390X, pyvex.ARCH_MIPS32_BE, pyvex.ARCH_MIPS32_LE, pyvex.ARCH_MIPS64_BE, pyvex.ARCH_MIPS64_LE, ] class SupportedOptLevels(IntEnum): """ Enumerates the supported optimization levels within pyvex, as derived from the documentation """ StrictUnopt = -1 Unopt = 0 Opt = 1 StrictOpt = 2 def consume_random_arch(fdp: atheris.FuzzedDataProvider) -> pyvex.arches.PyvexArch: return fdp.PickValueInList(available_archs) def TestOneInput(data: bytes): fdp = EnhancedFuzzedDataProvider(data) arch = consume_random_arch(fdp) try: with nostdout(): data = fdp.ConsumeRandomBytes() max_bytes = fdp.ConsumeIntInRange(0, len(data)) irsb = pyvex.lift( data, fdp.ConsumeInt(arch.bits), arch, max_bytes=fdp.ConsumeIntInRange(0, len(data)), max_inst=fdp.ConsumeInt(16), bytes_offset=fdp.ConsumeIntInRange(0, max_bytes), opt_level=fdp.PickValueInEnum(SupportedOptLevels), ) irsb.pp() return 0 except pyvex.PyVEXError: return -1 except ValueError as e: if re.match(register_error_msg, str(e)): return -1 raise e except OverflowError: return -1 def main(): atheris.Setup(sys.argv, TestOneInput) atheris.Fuzz() if __name__ == "__main__": main() ================================================ FILE: make_ffi.py ================================================ import logging import os import platform import re import subprocess import sys import cffi log = logging.getLogger("cffier") log.setLevel(logging.DEBUG) def find_good_scan(questionable): known_good = [] end_line = len(questionable) while len(questionable): ffi = cffi.FFI() log.debug("scan - trying %d good and %d questionable", len(known_good), len(questionable)) candidate = known_good + questionable[:end_line] failed_line = -1 try: ffi.cdef("\n".join(candidate)) known_good = candidate questionable = questionable[end_line:] end_line = len(questionable) except AssertionError: questionable = questionable[1:] end_line = len(questionable) except cffi.CDefError as e: if "" in str(e): failed_line = int(str(e).split("\n")[-1].split(":")[1]) - 1 elif str(e).count(":") >= 2: failed_line = int(str(e).split("\n")[1].split(":")[1]) failed_line_description = str(e).split("\n")[0] idx1 = failed_line_description.index('"') idx2 = failed_line_description.rindex('"') failed_reason = failed_line_description[idx1 + 1 : idx2] for i in range(failed_line, -1, -1): if failed_reason in candidate[i]: failed_line = i elif "unrecognized construct" in str(e): failed_line = int(str(e).split()[1][:-1]) - 1 elif "end of input" in str(e): end_line -= 1 else: raise Exception("Unknown error") except cffi.FFIError as e: if str(e).count(":") >= 2: failed_line = int(str(e).split("\n")[0].split(":")[1]) - 1 else: raise Exception("Unknown error") if failed_line != -1: end_line = failed_line - len(known_good) if end_line == 0: questionable = questionable[1:] end_line = len(questionable) return known_good def doit(vex_path): cpplist = ["cl", "cpp"] cpp = os.getenv("CPP") if cpp: cpplist.insert(0, cpp) if platform.system() == "Darwin": cpplist.insert(0, "clang") errs = [] for cpp in cpplist: cmd = [cpp, "-I" + vex_path, os.path.join("pyvex_c", "pyvex.h")] if cpp in ("cl", "clang", "gcc", "cc", "clang++", "g++"): cmd.append("-E") try: p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) header, stderr = p.communicate() try: header = header.decode("utf-8") stderr = stderr.decode("utf-8") except UnicodeDecodeError: # They don't have to be unicode on Windows pass if not header.strip() or p.returncode != 0: errs.append((" ".join(cmd), p.returncode, stderr)) continue else: break except OSError: errs.append((" ".join(cmd), -1, "does not exist")) continue else: log.warning("failed commands:\n" + "\n".join("{} ({}) -- {}".format(*e) for e in errs)) raise Exception( "Couldn't process pyvex headers." + 'Please set CPP environmental variable to local path of "cpp".' + 'Note that "cpp" and "g++" are different.' ) # header = vex_pp + pyvex_pp linesep = "\r\n" if "\r\n" in header else "\n" ffi_text = linesep.join( line for line in header.split(linesep) if "#" not in line and line.strip() != "" and "jmp_buf" not in line and not ("=" in line and ";" in line) ) ffi_text = re.sub(r"\{\s*\} NoOp;", "{ int DONOTUSE; } NoOp;", ffi_text) ffi_text = re.sub(r"__attribute__\s*\(.*\)", "", ffi_text) ffi_text = re.sub(r"__declspec\s*\([^\)]*\)", "", ffi_text) ffi_text = ffi_text.replace("__const", "const") ffi_text = ffi_text.replace("__inline", "") ffi_text = ffi_text.replace("__w64", "") ffi_text = ffi_text.replace("__cdecl", "") ffi_text = ffi_text.replace("__int64", "long") ffi_lines = ffi_text.split(linesep) good = find_good_scan(ffi_lines) good += ["extern VexControl vex_control;"] with open("pyvex/vex_ffi.py", "w") as fp: fp.write('ffi_str = """' + "\n".join(good) + '"""\n') fp.write("guest_offsets = " + repr(get_guest_offsets(vex_path)) + "\n") def get_guest_offsets(vex_path): fname = os.path.join(vex_path, "libvex_guest_offsets.h") out = {} with open(fname) as fp: for line in fp: if line.startswith("#define"): _, names, val = line.split() val = int(val, 0) assert names.startswith("OFFSET_") _, arch, reg = names.split("_", 2) out[(arch, reg.lower())] = val return out if __name__ == "__main__": logging.basicConfig(level=logging.DEBUG) doit(sys.argv[1]) ================================================ FILE: pyproject.toml ================================================ [build-system] requires = ["scikit-build-core >= 0.11.4, < 0.12.0", "cffi >= 1.0.3;implementation_name == 'cpython'"] build-backend = "scikit_build_core.build" [project] name = "pyvex" description = "A Python interface to libVEX and VEX IR" license = "BSD-2-Clause AND GPL-2.0-only" license-files = [ "LICENSE", "pyvex_c/LICENSE", "vex/LICENSE.README", "vex/LICENSE.GPL", ] classifiers = [ "Programming Language :: Python :: 3", "Programming Language :: Python :: 3 :: Only", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", ] requires-python = ">=3.10" dependencies = [ "bitstring", "cffi>=1.0.3;implementation_name == 'cpython'", ] dynamic = ["version"] [project.readme] file = "README.md" content-type = "text/markdown" [project.urls] Homepage = "https://api.angr.io/projects/pyvex/en/latest/" Repository = "https://github.com/angr/pyvex" [project.optional-dependencies] docs = [ "furo", "myst-parser", "sphinx", "sphinx-autodoc-typehints", ] fuzzing = [ "atheris>=2.3.0", ] testing = [ "pytest", "pytest-xdist", ] [dependency-groups] dev = [ "pytest>=8.4.1", ] [tool.scikit-build] build-dir = "scikit_build" [tool.scikit-build.sdist] include = [ "pyvex/py.typed", "pyvex/include/*", ] exclude = [ "tests*" ] [tool.scikit-build.metadata.version] provider = "scikit_build_core.metadata.regex" input = "pyvex/__init__.py" [tool.black] line-length = 120 target-version = ['py310'] force-exclude = ''' /( vex )/ ''' [tool.ruff] line-length = 120 [tool.ruff.lint] select = [ "E", "F", "I", "TID", ] ================================================ FILE: pyvex/__init__.py ================================================ """ PyVEX provides an interface that translates binary code into the VEX intermediate representation (IR). For an introduction to VEX, take a look here: https://docs.angr.io/advanced-topics/ir """ __version__ = "9.2.212.dev0" from . import const, expr, stmt from .arches import ( ARCH_AMD64, ARCH_ARM64_BE, ARCH_ARM64_LE, ARCH_ARM_BE, ARCH_ARM_BE_LE, ARCH_ARM_LE, ARCH_MIPS32_BE, ARCH_MIPS32_LE, ARCH_MIPS64_BE, ARCH_MIPS64_LE, ARCH_PPC32, ARCH_PPC64_BE, ARCH_PPC64_LE, ARCH_RISCV64_LE, ARCH_S390X, ARCH_X86, ) from .block import IRSB, IRTypeEnv from .const import get_type_size, get_type_spec_size, tag_to_const_class from .enums import ( IRCallee, IRRegArray, VEXObject, default_vex_archinfo, get_enum_from_int, get_int_from_enum, irop_enums_to_ints, vex_endness_from_string, ) from .errors import PyVEXError from .expr import get_op_retty from .lifting import lift, lifters from .native import ffi, pvc # aliases.... IRStmt = stmt IRExpr = expr IRConst = const __all__ = [ "const", "expr", "stmt", "IRSB", "IRTypeEnv", "get_type_size", "get_type_spec_size", "irop_enums_to_ints", "tag_to_const_class", "IRCallee", "IRRegArray", "VEXObject", "default_vex_archinfo", "get_enum_from_int", "get_int_from_enum", "vex_endness_from_string", "PyVEXError", "get_op_retty", "lift", "lifters", "ffi", "pvc", "IRStmt", "IRExpr", "IRConst", "ARCH_X86", "ARCH_AMD64", "ARCH_ARM_BE", "ARCH_ARM_BE_LE", "ARCH_ARM_LE", "ARCH_ARM64_LE", "ARCH_ARM64_BE", "ARCH_PPC32", "ARCH_PPC64_BE", "ARCH_PPC64_LE", "ARCH_S390X", "ARCH_MIPS32_BE", "ARCH_MIPS32_LE", "ARCH_MIPS64_BE", "ARCH_MIPS64_LE", "ARCH_RISCV64_LE", ] ================================================ FILE: pyvex/_register_info.py ================================================ REGISTER_OFFSETS = { ("x86", "eax"): 8, ("x86", "ax"): 8, ("x86", "al"): 8, ("x86", "ah"): 9, ("x86", "ecx"): 12, ("x86", "cx"): 12, ("x86", "cl"): 12, ("x86", "ch"): 13, ("x86", "edx"): 16, ("x86", "dx"): 16, ("x86", "dl"): 16, ("x86", "dh"): 17, ("x86", "ebx"): 20, ("x86", "bx"): 20, ("x86", "bl"): 20, ("x86", "bh"): 21, ("x86", "esp"): 24, ("x86", "sp"): 24, ("x86", "ebp"): 28, ("x86", "bp"): 28, ("x86", "esi"): 32, ("x86", "si"): 32, ("x86", "sil"): 32, ("x86", "sih"): 33, ("x86", "edi"): 36, ("x86", "di"): 36, ("x86", "dil"): 36, ("x86", "dih"): 37, ("x86", "cc_op"): 40, ("x86", "cc_dep1"): 44, ("x86", "cc_dep2"): 48, ("x86", "cc_ndep"): 52, ("x86", "d"): 56, ("x86", "dflag"): 56, ("x86", "id"): 60, ("x86", "idflag"): 60, ("x86", "ac"): 64, ("x86", "acflag"): 64, ("x86", "eip"): 68, ("x86", "ip"): 68, ("x86", "pc"): 68, ("x86", "fpreg"): 72, ("x86", "fpu_regs"): 72, ("x86", "mm0"): 72, ("x86", "mm1"): 80, ("x86", "mm2"): 88, ("x86", "mm3"): 96, ("x86", "mm4"): 104, ("x86", "mm5"): 112, ("x86", "mm6"): 120, ("x86", "mm7"): 128, ("x86", "fptag"): 136, ("x86", "fpu_tags"): 136, ("x86", "fpround"): 144, ("x86", "fc3210"): 148, ("x86", "ftop"): 152, ("x86", "sseround"): 156, ("x86", "xmm0"): 160, ("x86", "xmm1"): 176, ("x86", "xmm2"): 192, ("x86", "xmm3"): 208, ("x86", "xmm4"): 224, ("x86", "xmm5"): 240, ("x86", "xmm6"): 256, ("x86", "xmm7"): 272, ("x86", "cs"): 288, ("x86", "ds"): 290, ("x86", "es"): 292, ("x86", "fs"): 294, ("x86", "gs"): 296, ("x86", "ss"): 298, ("x86", "ldt"): 304, ("x86", "gdt"): 312, ("x86", "emnote"): 320, ("x86", "cmstart"): 324, ("x86", "cmlen"): 328, ("x86", "nraddr"): 332, ("x86", "sc_class"): 336, ("x86", "ip_at_syscall"): 340, ("amd64", "rax"): 16, ("amd64", "eax"): 16, ("amd64", "ax"): 16, ("amd64", "al"): 16, ("amd64", "ah"): 17, ("amd64", "rcx"): 24, ("amd64", "ecx"): 24, ("amd64", "cx"): 24, ("amd64", "cl"): 24, ("amd64", "ch"): 25, ("amd64", "rdx"): 32, ("amd64", "edx"): 32, ("amd64", "dx"): 32, ("amd64", "dl"): 32, ("amd64", "dh"): 33, ("amd64", "rbx"): 40, ("amd64", "ebx"): 40, ("amd64", "bx"): 40, ("amd64", "bl"): 40, ("amd64", "bh"): 41, ("amd64", "rsp"): 48, ("amd64", "sp"): 48, ("amd64", "esp"): 48, ("amd64", "rbp"): 56, ("amd64", "bp"): 56, ("amd64", "ebp"): 56, ("amd64", "_bp"): 56, ("amd64", "bpl"): 56, ("amd64", "bph"): 57, ("amd64", "rsi"): 64, ("amd64", "esi"): 64, ("amd64", "si"): 64, ("amd64", "sil"): 64, ("amd64", "sih"): 65, ("amd64", "rdi"): 72, ("amd64", "edi"): 72, ("amd64", "di"): 72, ("amd64", "dil"): 72, ("amd64", "dih"): 73, ("amd64", "r8"): 80, ("amd64", "r8d"): 80, ("amd64", "r8w"): 80, ("amd64", "r8b"): 80, ("amd64", "r9"): 88, ("amd64", "r9d"): 88, ("amd64", "r9w"): 88, ("amd64", "r9b"): 88, ("amd64", "r10"): 96, ("amd64", "r10d"): 96, ("amd64", "r10w"): 96, ("amd64", "r10b"): 96, ("amd64", "r11"): 104, ("amd64", "r11d"): 104, ("amd64", "r11w"): 104, ("amd64", "r11b"): 104, ("amd64", "r12"): 112, ("amd64", "r12d"): 112, ("amd64", "r12w"): 112, ("amd64", "r12b"): 112, ("amd64", "r13"): 120, ("amd64", "r13d"): 120, ("amd64", "r13w"): 120, ("amd64", "r13b"): 120, ("amd64", "r14"): 128, ("amd64", "r14d"): 128, ("amd64", "r14w"): 128, ("amd64", "r14b"): 128, ("amd64", "r15"): 136, ("amd64", "r15d"): 136, ("amd64", "r15w"): 136, ("amd64", "r15b"): 136, ("amd64", "cc_op"): 144, ("amd64", "cc_dep1"): 152, ("amd64", "cc_dep2"): 160, ("amd64", "cc_ndep"): 168, ("amd64", "d"): 176, ("amd64", "dflag"): 176, ("amd64", "rip"): 184, ("amd64", "ip"): 184, ("amd64", "pc"): 184, ("amd64", "ac"): 192, ("amd64", "acflag"): 192, ("amd64", "id"): 200, ("amd64", "idflag"): 200, ("amd64", "fs"): 208, ("amd64", "fs_const"): 208, ("amd64", "sseround"): 216, ("amd64", "cr0"): 768, ("amd64", "cr2"): 784, ("amd64", "cr3"): 792, ("amd64", "cr4"): 800, ("amd64", "cr8"): 832, ("amd64", "ymm0"): 224, ("amd64", "xmm0"): 224, ("amd64", "xmm0lq"): 224, ("amd64", "xmm0hq"): 232, ("amd64", "ymm0hx"): 240, ("amd64", "ymm1"): 256, ("amd64", "xmm1"): 256, ("amd64", "xmm1lq"): 256, ("amd64", "xmm1hq"): 264, ("amd64", "ymm1hx"): 272, ("amd64", "ymm2"): 288, ("amd64", "xmm2"): 288, ("amd64", "xmm2lq"): 288, ("amd64", "xmm2hq"): 296, ("amd64", "ymm2hx"): 304, ("amd64", "ymm3"): 320, ("amd64", "xmm3"): 320, ("amd64", "xmm3lq"): 320, ("amd64", "xmm3hq"): 328, ("amd64", "ymm3hx"): 336, ("amd64", "ymm4"): 352, ("amd64", "xmm4"): 352, ("amd64", "xmm4lq"): 352, ("amd64", "xmm4hq"): 360, ("amd64", "ymm4hx"): 368, ("amd64", "ymm5"): 384, ("amd64", "xmm5"): 384, ("amd64", "xmm5lq"): 384, ("amd64", "xmm5hq"): 392, ("amd64", "ymm5hx"): 400, ("amd64", "ymm6"): 416, ("amd64", "xmm6"): 416, ("amd64", "xmm6lq"): 416, ("amd64", "xmm6hq"): 424, ("amd64", "ymm6hx"): 432, ("amd64", "ymm7"): 448, ("amd64", "xmm7"): 448, ("amd64", "xmm7lq"): 448, ("amd64", "xmm7hq"): 456, ("amd64", "ymm7hx"): 464, ("amd64", "ymm8"): 480, ("amd64", "xmm8"): 480, ("amd64", "xmm8lq"): 480, ("amd64", "xmm8hq"): 488, ("amd64", "ymm8hx"): 496, ("amd64", "ymm9"): 512, ("amd64", "xmm9"): 512, ("amd64", "xmm9lq"): 512, ("amd64", "xmm9hq"): 520, ("amd64", "ymm9hx"): 528, ("amd64", "ymm10"): 544, ("amd64", "xmm10"): 544, ("amd64", "xmm10lq"): 544, ("amd64", "xmm10hq"): 552, ("amd64", "ymm10hx"): 560, ("amd64", "ymm11"): 576, ("amd64", "xmm11"): 576, ("amd64", "xmm11lq"): 576, ("amd64", "xmm11hq"): 584, ("amd64", "ymm11hx"): 592, ("amd64", "ymm12"): 608, ("amd64", "xmm12"): 608, ("amd64", "xmm12lq"): 608, ("amd64", "xmm12hq"): 616, ("amd64", "ymm12hx"): 624, ("amd64", "ymm13"): 640, ("amd64", "xmm13"): 640, ("amd64", "xmm13lq"): 640, ("amd64", "xmm13hq"): 648, ("amd64", "ymm13hx"): 656, ("amd64", "ymm14"): 672, ("amd64", "xmm14"): 672, ("amd64", "xmm14lq"): 672, ("amd64", "xmm14hq"): 680, ("amd64", "ymm14hx"): 688, ("amd64", "ymm15"): 704, ("amd64", "xmm15"): 704, ("amd64", "xmm15lq"): 704, ("amd64", "xmm15hq"): 712, ("amd64", "ymm15hx"): 720, ("amd64", "ftop"): 896, ("amd64", "fpreg"): 904, ("amd64", "fpu_regs"): 904, ("amd64", "mm0"): 904, ("amd64", "mm1"): 912, ("amd64", "mm2"): 920, ("amd64", "mm3"): 928, ("amd64", "mm4"): 936, ("amd64", "mm5"): 944, ("amd64", "mm6"): 952, ("amd64", "mm7"): 960, ("amd64", "fptag"): 968, ("amd64", "fpu_tags"): 968, ("amd64", "fpround"): 976, ("amd64", "fc3210"): 984, ("amd64", "emnote"): 992, ("amd64", "cmstart"): 1000, ("amd64", "cmlen"): 1008, ("amd64", "nraddr"): 1016, ("amd64", "gs"): 1032, ("amd64", "gs_const"): 1032, ("amd64", "ip_at_syscall"): 1040, ("amd64", "cs_seg"): 1048, ("amd64", "ds_seg"): 1050, ("amd64", "es_seg"): 1052, ("amd64", "fs_seg"): 1054, ("amd64", "gs_seg"): 1056, ("amd64", "ss_seg"): 1058, ("arm", "r0"): 8, ("arm", "a1"): 8, ("arm", "r1"): 12, ("arm", "a2"): 12, ("arm", "r2"): 16, ("arm", "a3"): 16, ("arm", "r3"): 20, ("arm", "a4"): 20, ("arm", "r4"): 24, ("arm", "v1"): 24, ("arm", "r5"): 28, ("arm", "v2"): 28, ("arm", "r6"): 32, ("arm", "v3"): 32, ("arm", "r7"): 36, ("arm", "v4"): 36, ("arm", "r8"): 40, ("arm", "v5"): 40, ("arm", "r9"): 44, ("arm", "v6"): 44, ("arm", "sb"): 44, ("arm", "r10"): 48, ("arm", "v7"): 48, ("arm", "sl"): 48, ("arm", "r11"): 52, ("arm", "v8"): 52, ("arm", "fp"): 52, ("arm", "bp"): 52, ("arm", "r12"): 56, ("arm", "sp"): 60, ("arm", "r13"): 60, ("arm", "lr"): 64, ("arm", "r14"): 64, ("arm", "pc"): 68, ("arm", "r15"): 68, ("arm", "ip"): 68, ("arm", "cc_op"): 72, ("arm", "cc_dep1"): 76, ("arm", "cc_dep2"): 80, ("arm", "cc_ndep"): 84, ("arm", "qflag32"): 88, ("arm", "geflag0"): 92, ("arm", "geflag1"): 96, ("arm", "geflag2"): 100, ("arm", "geflag3"): 104, ("arm", "emnote"): 108, ("arm", "cmstart"): 112, ("arm", "cmlen"): 116, ("arm", "nraddr"): 120, ("arm", "ip_at_syscall"): 124, ("arm", "d0"): 128, ("arm", "s0"): 128, ("arm", "s1"): 132, ("arm", "d1"): 136, ("arm", "s2"): 136, ("arm", "s3"): 140, ("arm", "d2"): 144, ("arm", "s4"): 144, ("arm", "s5"): 148, ("arm", "d3"): 152, ("arm", "s6"): 152, ("arm", "s7"): 156, ("arm", "d4"): 160, ("arm", "s8"): 160, ("arm", "s9"): 164, ("arm", "d5"): 168, ("arm", "s10"): 168, ("arm", "s11"): 172, ("arm", "d6"): 176, ("arm", "s12"): 176, ("arm", "s13"): 180, ("arm", "d7"): 184, ("arm", "s14"): 184, ("arm", "s15"): 188, ("arm", "d8"): 192, ("arm", "s16"): 192, ("arm", "s17"): 196, ("arm", "d9"): 200, ("arm", "s18"): 200, ("arm", "s19"): 204, ("arm", "d10"): 208, ("arm", "s20"): 208, ("arm", "s21"): 212, ("arm", "d11"): 216, ("arm", "s22"): 216, ("arm", "s23"): 220, ("arm", "d12"): 224, ("arm", "s24"): 224, ("arm", "s25"): 228, ("arm", "d13"): 232, ("arm", "s26"): 232, ("arm", "s27"): 236, ("arm", "d14"): 240, ("arm", "s28"): 240, ("arm", "s29"): 244, ("arm", "d15"): 248, ("arm", "s30"): 248, ("arm", "s31"): 252, ("arm", "d16"): 256, ("arm", "d17"): 264, ("arm", "d18"): 272, ("arm", "d19"): 280, ("arm", "d20"): 288, ("arm", "d21"): 296, ("arm", "d22"): 304, ("arm", "d23"): 312, ("arm", "d24"): 320, ("arm", "d25"): 328, ("arm", "d26"): 336, ("arm", "d27"): 344, ("arm", "d28"): 352, ("arm", "d29"): 360, ("arm", "d30"): 368, ("arm", "d31"): 376, ("arm", "fpscr"): 384, ("arm", "tpidruro"): 388, ("arm", "itstate"): 392, ("arm64", "x0"): 16, ("arm64", "r0"): 16, ("arm64", "w0"): 16, ("arm64", "x1"): 24, ("arm64", "r1"): 24, ("arm64", "w1"): 24, ("arm64", "x2"): 32, ("arm64", "r2"): 32, ("arm64", "w2"): 32, ("arm64", "x3"): 40, ("arm64", "r3"): 40, ("arm64", "w3"): 40, ("arm64", "x4"): 48, ("arm64", "r4"): 48, ("arm64", "w4"): 48, ("arm64", "x5"): 56, ("arm64", "r5"): 56, ("arm64", "w5"): 56, ("arm64", "x6"): 64, ("arm64", "r6"): 64, ("arm64", "w6"): 64, ("arm64", "x7"): 72, ("arm64", "r7"): 72, ("arm64", "w7"): 72, ("arm64", "x8"): 80, ("arm64", "r8"): 80, ("arm64", "w8"): 80, ("arm64", "x9"): 88, ("arm64", "r9"): 88, ("arm64", "w9"): 88, ("arm64", "x10"): 96, ("arm64", "r10"): 96, ("arm64", "w10"): 96, ("arm64", "x11"): 104, ("arm64", "r11"): 104, ("arm64", "w11"): 104, ("arm64", "x12"): 112, ("arm64", "r12"): 112, ("arm64", "w12"): 112, ("arm64", "x13"): 120, ("arm64", "r13"): 120, ("arm64", "w13"): 120, ("arm64", "x14"): 128, ("arm64", "r14"): 128, ("arm64", "w14"): 128, ("arm64", "x15"): 136, ("arm64", "r15"): 136, ("arm64", "w15"): 136, ("arm64", "x16"): 144, ("arm64", "r16"): 144, ("arm64", "ip0"): 144, ("arm64", "w16"): 144, ("arm64", "x17"): 152, ("arm64", "r17"): 152, ("arm64", "ip1"): 152, ("arm64", "w17"): 152, ("arm64", "x18"): 160, ("arm64", "r18"): 160, ("arm64", "w18"): 160, ("arm64", "x19"): 168, ("arm64", "r19"): 168, ("arm64", "w19"): 168, ("arm64", "x20"): 176, ("arm64", "r20"): 176, ("arm64", "w20"): 176, ("arm64", "x21"): 184, ("arm64", "r21"): 184, ("arm64", "w21"): 184, ("arm64", "x22"): 192, ("arm64", "r22"): 192, ("arm64", "w22"): 192, ("arm64", "x23"): 200, ("arm64", "r23"): 200, ("arm64", "w23"): 200, ("arm64", "x24"): 208, ("arm64", "r24"): 208, ("arm64", "w24"): 208, ("arm64", "x25"): 216, ("arm64", "r25"): 216, ("arm64", "w25"): 216, ("arm64", "x26"): 224, ("arm64", "r26"): 224, ("arm64", "w26"): 224, ("arm64", "x27"): 232, ("arm64", "r27"): 232, ("arm64", "w27"): 232, ("arm64", "x28"): 240, ("arm64", "r28"): 240, ("arm64", "w28"): 240, ("arm64", "x29"): 248, ("arm64", "r29"): 248, ("arm64", "fp"): 248, ("arm64", "bp"): 248, ("arm64", "w29"): 248, ("arm64", "x30"): 256, ("arm64", "r30"): 256, ("arm64", "lr"): 256, ("arm64", "w30"): 256, ("arm64", "xsp"): 264, ("arm64", "sp"): 264, ("arm64", "wsp"): 264, ("arm64", "pc"): 272, ("arm64", "ip"): 272, ("arm64", "cc_op"): 280, ("arm64", "cc_dep1"): 288, ("arm64", "cc_dep2"): 296, ("arm64", "cc_ndep"): 304, ("arm64", "tpidr_el0"): 312, ("arm64", "q0"): 320, ("arm64", "v0"): 320, ("arm64", "d0"): 320, ("arm64", "s0"): 320, ("arm64", "h0"): 320, ("arm64", "b0"): 320, ("arm64", "q1"): 336, ("arm64", "v1"): 336, ("arm64", "d1"): 336, ("arm64", "s1"): 336, ("arm64", "h1"): 336, ("arm64", "b1"): 336, ("arm64", "q2"): 352, ("arm64", "v2"): 352, ("arm64", "d2"): 352, ("arm64", "s2"): 352, ("arm64", "h2"): 352, ("arm64", "b2"): 352, ("arm64", "q3"): 368, ("arm64", "v3"): 368, ("arm64", "d3"): 368, ("arm64", "s3"): 368, ("arm64", "h3"): 368, ("arm64", "b3"): 368, ("arm64", "q4"): 384, ("arm64", "v4"): 384, ("arm64", "d4"): 384, ("arm64", "s4"): 384, ("arm64", "h4"): 384, ("arm64", "b4"): 384, ("arm64", "q5"): 400, ("arm64", "v5"): 400, ("arm64", "d5"): 400, ("arm64", "s5"): 400, ("arm64", "h5"): 400, ("arm64", "b5"): 400, ("arm64", "q6"): 416, ("arm64", "v6"): 416, ("arm64", "d6"): 416, ("arm64", "s6"): 416, ("arm64", "h6"): 416, ("arm64", "b6"): 416, ("arm64", "q7"): 432, ("arm64", "v7"): 432, ("arm64", "d7"): 432, ("arm64", "s7"): 432, ("arm64", "h7"): 432, ("arm64", "b7"): 432, ("arm64", "q8"): 448, ("arm64", "v8"): 448, ("arm64", "d8"): 448, ("arm64", "s8"): 448, ("arm64", "h8"): 448, ("arm64", "b8"): 448, ("arm64", "q9"): 464, ("arm64", "v9"): 464, ("arm64", "d9"): 464, ("arm64", "s9"): 464, ("arm64", "h9"): 464, ("arm64", "b9"): 464, ("arm64", "q10"): 480, ("arm64", "v10"): 480, ("arm64", "d10"): 480, ("arm64", "s10"): 480, ("arm64", "h10"): 480, ("arm64", "b10"): 480, ("arm64", "q11"): 496, ("arm64", "v11"): 496, ("arm64", "d11"): 496, ("arm64", "s11"): 496, ("arm64", "h11"): 496, ("arm64", "b11"): 496, ("arm64", "q12"): 512, ("arm64", "v12"): 512, ("arm64", "d12"): 512, ("arm64", "s12"): 512, ("arm64", "h12"): 512, ("arm64", "b12"): 512, ("arm64", "q13"): 528, ("arm64", "v13"): 528, ("arm64", "d13"): 528, ("arm64", "s13"): 528, ("arm64", "h13"): 528, ("arm64", "b13"): 528, ("arm64", "q14"): 544, ("arm64", "v14"): 544, ("arm64", "d14"): 544, ("arm64", "s14"): 544, ("arm64", "h14"): 544, ("arm64", "b14"): 544, ("arm64", "q15"): 560, ("arm64", "v15"): 560, ("arm64", "d15"): 560, ("arm64", "s15"): 560, ("arm64", "h15"): 560, ("arm64", "b15"): 560, ("arm64", "q16"): 576, ("arm64", "v16"): 576, ("arm64", "d16"): 576, ("arm64", "s16"): 576, ("arm64", "h16"): 576, ("arm64", "b16"): 576, ("arm64", "q17"): 592, ("arm64", "v17"): 592, ("arm64", "d17"): 592, ("arm64", "s17"): 592, ("arm64", "h17"): 592, ("arm64", "b17"): 592, ("arm64", "q18"): 608, ("arm64", "v18"): 608, ("arm64", "d18"): 608, ("arm64", "s18"): 608, ("arm64", "h18"): 608, ("arm64", "b18"): 608, ("arm64", "q19"): 624, ("arm64", "v19"): 624, ("arm64", "d19"): 624, ("arm64", "s19"): 624, ("arm64", "h19"): 624, ("arm64", "b19"): 624, ("arm64", "q20"): 640, ("arm64", "v20"): 640, ("arm64", "d20"): 640, ("arm64", "s20"): 640, ("arm64", "h20"): 640, ("arm64", "b20"): 640, ("arm64", "q21"): 656, ("arm64", "v21"): 656, ("arm64", "d21"): 656, ("arm64", "s21"): 656, ("arm64", "h21"): 656, ("arm64", "b21"): 656, ("arm64", "q22"): 672, ("arm64", "v22"): 672, ("arm64", "d22"): 672, ("arm64", "s22"): 672, ("arm64", "h22"): 672, ("arm64", "b22"): 672, ("arm64", "q23"): 688, ("arm64", "v23"): 688, ("arm64", "d23"): 688, ("arm64", "s23"): 688, ("arm64", "h23"): 688, ("arm64", "b23"): 688, ("arm64", "q24"): 704, ("arm64", "v24"): 704, ("arm64", "d24"): 704, ("arm64", "s24"): 704, ("arm64", "h24"): 704, ("arm64", "b24"): 704, ("arm64", "q25"): 720, ("arm64", "v25"): 720, ("arm64", "d25"): 720, ("arm64", "s25"): 720, ("arm64", "h25"): 720, ("arm64", "b25"): 720, ("arm64", "q26"): 736, ("arm64", "v26"): 736, ("arm64", "d26"): 736, ("arm64", "s26"): 736, ("arm64", "h26"): 736, ("arm64", "b26"): 736, ("arm64", "q27"): 752, ("arm64", "v27"): 752, ("arm64", "d27"): 752, ("arm64", "s27"): 752, ("arm64", "h27"): 752, ("arm64", "b27"): 752, ("arm64", "q28"): 768, ("arm64", "v28"): 768, ("arm64", "d28"): 768, ("arm64", "s28"): 768, ("arm64", "h28"): 768, ("arm64", "b28"): 768, ("arm64", "q29"): 784, ("arm64", "v29"): 784, ("arm64", "d29"): 784, ("arm64", "s29"): 784, ("arm64", "h29"): 784, ("arm64", "b29"): 784, ("arm64", "q30"): 800, ("arm64", "v30"): 800, ("arm64", "d30"): 800, ("arm64", "s30"): 800, ("arm64", "h30"): 800, ("arm64", "b30"): 800, ("arm64", "q31"): 816, ("arm64", "v31"): 816, ("arm64", "d31"): 816, ("arm64", "s31"): 816, ("arm64", "h31"): 816, ("arm64", "b31"): 816, ("arm64", "qcflag"): 832, ("arm64", "emnote"): 848, ("arm64", "cmstart"): 856, ("arm64", "cmlen"): 864, ("arm64", "nraddr"): 872, ("arm64", "ip_at_syscall"): 880, ("arm64", "fpcr"): 888, ("ppc32", "gpr0"): 16, ("ppc32", "r0"): 16, ("ppc32", "gpr1"): 20, ("ppc32", "r1"): 20, ("ppc32", "sp"): 20, ("ppc32", "gpr2"): 24, ("ppc32", "r2"): 24, ("ppc32", "gpr3"): 28, ("ppc32", "r3"): 28, ("ppc32", "gpr4"): 32, ("ppc32", "r4"): 32, ("ppc32", "gpr5"): 36, ("ppc32", "r5"): 36, ("ppc32", "gpr6"): 40, ("ppc32", "r6"): 40, ("ppc32", "gpr7"): 44, ("ppc32", "r7"): 44, ("ppc32", "gpr8"): 48, ("ppc32", "r8"): 48, ("ppc32", "gpr9"): 52, ("ppc32", "r9"): 52, ("ppc32", "gpr10"): 56, ("ppc32", "r10"): 56, ("ppc32", "gpr11"): 60, ("ppc32", "r11"): 60, ("ppc32", "gpr12"): 64, ("ppc32", "r12"): 64, ("ppc32", "gpr13"): 68, ("ppc32", "r13"): 68, ("ppc32", "gpr14"): 72, ("ppc32", "r14"): 72, ("ppc32", "gpr15"): 76, ("ppc32", "r15"): 76, ("ppc32", "gpr16"): 80, ("ppc32", "r16"): 80, ("ppc32", "gpr17"): 84, ("ppc32", "r17"): 84, ("ppc32", "gpr18"): 88, ("ppc32", "r18"): 88, ("ppc32", "gpr19"): 92, ("ppc32", "r19"): 92, ("ppc32", "gpr20"): 96, ("ppc32", "r20"): 96, ("ppc32", "gpr21"): 100, ("ppc32", "r21"): 100, ("ppc32", "gpr22"): 104, ("ppc32", "r22"): 104, ("ppc32", "gpr23"): 108, ("ppc32", "r23"): 108, ("ppc32", "gpr24"): 112, ("ppc32", "r24"): 112, ("ppc32", "gpr25"): 116, ("ppc32", "r25"): 116, ("ppc32", "gpr26"): 120, ("ppc32", "r26"): 120, ("ppc32", "gpr27"): 124, ("ppc32", "r27"): 124, ("ppc32", "gpr28"): 128, ("ppc32", "r28"): 128, ("ppc32", "gpr29"): 132, ("ppc32", "r29"): 132, ("ppc32", "gpr30"): 136, ("ppc32", "r30"): 136, ("ppc32", "gpr31"): 140, ("ppc32", "r31"): 140, ("ppc32", "bp"): 140, ("ppc32", "vsr0"): 144, ("ppc32", "v0"): 144, ("ppc32", "fpr0"): 144, ("ppc32", "vsr1"): 160, ("ppc32", "v1"): 160, ("ppc32", "fpr1"): 160, ("ppc32", "vsr2"): 176, ("ppc32", "v2"): 176, ("ppc32", "fpr2"): 176, ("ppc32", "vsr3"): 192, ("ppc32", "v3"): 192, ("ppc32", "fpr3"): 192, ("ppc32", "vsr4"): 208, ("ppc32", "v4"): 208, ("ppc32", "fpr4"): 208, ("ppc32", "vsr5"): 224, ("ppc32", "v5"): 224, ("ppc32", "fpr5"): 224, ("ppc32", "vsr6"): 240, ("ppc32", "v6"): 240, ("ppc32", "fpr6"): 240, ("ppc32", "vsr7"): 256, ("ppc32", "v7"): 256, ("ppc32", "fpr7"): 256, ("ppc32", "vsr8"): 272, ("ppc32", "v8"): 272, ("ppc32", "fpr8"): 272, ("ppc32", "vsr9"): 288, ("ppc32", "v9"): 288, ("ppc32", "fpr9"): 288, ("ppc32", "vsr10"): 304, ("ppc32", "v10"): 304, ("ppc32", "fpr10"): 304, ("ppc32", "vsr11"): 320, ("ppc32", "v11"): 320, ("ppc32", "fpr11"): 320, ("ppc32", "vsr12"): 336, ("ppc32", "v12"): 336, ("ppc32", "fpr12"): 336, ("ppc32", "vsr13"): 352, ("ppc32", "v13"): 352, ("ppc32", "fpr13"): 352, ("ppc32", "vsr14"): 368, ("ppc32", "v14"): 368, ("ppc32", "fpr14"): 368, ("ppc32", "vsr15"): 384, ("ppc32", "v15"): 384, ("ppc32", "fpr15"): 384, ("ppc32", "vsr16"): 400, ("ppc32", "v16"): 400, ("ppc32", "fpr16"): 400, ("ppc32", "vsr17"): 416, ("ppc32", "v17"): 416, ("ppc32", "fpr17"): 416, ("ppc32", "vsr18"): 432, ("ppc32", "v18"): 432, ("ppc32", "fpr18"): 432, ("ppc32", "vsr19"): 448, ("ppc32", "v19"): 448, ("ppc32", "fpr19"): 448, ("ppc32", "vsr20"): 464, ("ppc32", "v20"): 464, ("ppc32", "fpr20"): 464, ("ppc32", "vsr21"): 480, ("ppc32", "v21"): 480, ("ppc32", "fpr21"): 480, ("ppc32", "vsr22"): 496, ("ppc32", "v22"): 496, ("ppc32", "fpr22"): 496, ("ppc32", "vsr23"): 512, ("ppc32", "v23"): 512, ("ppc32", "fpr23"): 512, ("ppc32", "vsr24"): 528, ("ppc32", "v24"): 528, ("ppc32", "fpr24"): 528, ("ppc32", "vsr25"): 544, ("ppc32", "v25"): 544, ("ppc32", "fpr25"): 544, ("ppc32", "vsr26"): 560, ("ppc32", "v26"): 560, ("ppc32", "fpr26"): 560, ("ppc32", "vsr27"): 576, ("ppc32", "v27"): 576, ("ppc32", "fpr27"): 576, ("ppc32", "vsr28"): 592, ("ppc32", "v28"): 592, ("ppc32", "fpr28"): 592, ("ppc32", "vsr29"): 608, ("ppc32", "v29"): 608, ("ppc32", "fpr29"): 608, ("ppc32", "vsr30"): 624, ("ppc32", "v30"): 624, ("ppc32", "fpr30"): 624, ("ppc32", "vsr31"): 640, ("ppc32", "v31"): 640, ("ppc32", "fpr31"): 640, ("ppc32", "vsr32"): 656, ("ppc32", "v32"): 656, ("ppc32", "vsr33"): 672, ("ppc32", "v33"): 672, ("ppc32", "vsr34"): 688, ("ppc32", "v34"): 688, ("ppc32", "vsr35"): 704, ("ppc32", "v35"): 704, ("ppc32", "vsr36"): 720, ("ppc32", "v36"): 720, ("ppc32", "vsr37"): 736, ("ppc32", "v37"): 736, ("ppc32", "vsr38"): 752, ("ppc32", "v38"): 752, ("ppc32", "vsr39"): 768, ("ppc32", "v39"): 768, ("ppc32", "vsr40"): 784, ("ppc32", "v40"): 784, ("ppc32", "vsr41"): 800, ("ppc32", "v41"): 800, ("ppc32", "vsr42"): 816, ("ppc32", "v42"): 816, ("ppc32", "vsr43"): 832, ("ppc32", "v43"): 832, ("ppc32", "vsr44"): 848, ("ppc32", "v44"): 848, ("ppc32", "vsr45"): 864, ("ppc32", "v45"): 864, ("ppc32", "vsr46"): 880, ("ppc32", "v46"): 880, ("ppc32", "vsr47"): 896, ("ppc32", "v47"): 896, ("ppc32", "vsr48"): 912, ("ppc32", "v48"): 912, ("ppc32", "vsr49"): 928, ("ppc32", "v49"): 928, ("ppc32", "vsr50"): 944, ("ppc32", "v50"): 944, ("ppc32", "vsr51"): 960, ("ppc32", "v51"): 960, ("ppc32", "vsr52"): 976, ("ppc32", "v52"): 976, ("ppc32", "vsr53"): 992, ("ppc32", "v53"): 992, ("ppc32", "vsr54"): 1008, ("ppc32", "v54"): 1008, ("ppc32", "vsr55"): 1024, ("ppc32", "v55"): 1024, ("ppc32", "vsr56"): 1040, ("ppc32", "v56"): 1040, ("ppc32", "vsr57"): 1056, ("ppc32", "v57"): 1056, ("ppc32", "vsr58"): 1072, ("ppc32", "v58"): 1072, ("ppc32", "vsr59"): 1088, ("ppc32", "v59"): 1088, ("ppc32", "vsr60"): 1104, ("ppc32", "v60"): 1104, ("ppc32", "vsr61"): 1120, ("ppc32", "v61"): 1120, ("ppc32", "vsr62"): 1136, ("ppc32", "v62"): 1136, ("ppc32", "vsr63"): 1152, ("ppc32", "v63"): 1152, ("ppc32", "cia"): 1168, ("ppc32", "ip"): 1168, ("ppc32", "pc"): 1168, ("ppc32", "lr"): 1172, ("ppc32", "ctr"): 1176, ("ppc32", "xer_so"): 1180, ("ppc32", "xer_ov"): 1181, ("ppc32", "xer_ca"): 1182, ("ppc32", "xer_bc"): 1183, ("ppc32", "cr0_321"): 1184, ("ppc32", "cr0_0"): 1185, ("ppc32", "cr0"): 1185, ("ppc32", "cr1_321"): 1186, ("ppc32", "cr1_0"): 1187, ("ppc32", "cr1"): 1187, ("ppc32", "cr2_321"): 1188, ("ppc32", "cr2_0"): 1189, ("ppc32", "cr2"): 1189, ("ppc32", "cr3_321"): 1190, ("ppc32", "cr3_0"): 1191, ("ppc32", "cr3"): 1191, ("ppc32", "cr4_321"): 1192, ("ppc32", "cr4_0"): 1193, ("ppc32", "cr4"): 1193, ("ppc32", "cr5_321"): 1194, ("ppc32", "cr5_0"): 1195, ("ppc32", "cr5"): 1195, ("ppc32", "cr6_321"): 1196, ("ppc32", "cr6_0"): 1197, ("ppc32", "cr6"): 1197, ("ppc32", "cr7_321"): 1198, ("ppc32", "cr7_0"): 1199, ("ppc32", "cr7"): 1199, ("ppc32", "fpround"): 1200, ("ppc32", "dfpround"): 1201, ("ppc32", "c_fpcc"): 1202, ("ppc32", "vrsave"): 1204, ("ppc32", "vscr"): 1208, ("ppc32", "emnote"): 1212, ("ppc32", "cmstart"): 1216, ("ppc32", "cmlen"): 1220, ("ppc32", "nraddr"): 1224, ("ppc32", "nraddr_gpr2"): 1228, ("ppc32", "redir_sp"): 1232, ("ppc32", "redir_stack"): 1236, ("ppc32", "ip_at_syscall"): 1364, ("ppc32", "sprg3_ro"): 1368, ("ppc32", "tfhar"): 1376, ("ppc32", "texasr"): 1384, ("ppc32", "tfiar"): 1392, ("ppc32", "ppr"): 1400, ("ppc32", "texasru"): 1408, ("ppc32", "pspb"): 1412, ("ppc64", "gpr0"): 16, ("ppc64", "r0"): 16, ("ppc64", "gpr1"): 24, ("ppc64", "r1"): 24, ("ppc64", "sp"): 24, ("ppc64", "gpr2"): 32, ("ppc64", "r2"): 32, ("ppc64", "rtoc"): 32, ("ppc64", "gpr3"): 40, ("ppc64", "r3"): 40, ("ppc64", "gpr4"): 48, ("ppc64", "r4"): 48, ("ppc64", "gpr5"): 56, ("ppc64", "r5"): 56, ("ppc64", "gpr6"): 64, ("ppc64", "r6"): 64, ("ppc64", "gpr7"): 72, ("ppc64", "r7"): 72, ("ppc64", "gpr8"): 80, ("ppc64", "r8"): 80, ("ppc64", "gpr9"): 88, ("ppc64", "r9"): 88, ("ppc64", "gpr10"): 96, ("ppc64", "r10"): 96, ("ppc64", "gpr11"): 104, ("ppc64", "r11"): 104, ("ppc64", "gpr12"): 112, ("ppc64", "r12"): 112, ("ppc64", "gpr13"): 120, ("ppc64", "r13"): 120, ("ppc64", "gpr14"): 128, ("ppc64", "r14"): 128, ("ppc64", "gpr15"): 136, ("ppc64", "r15"): 136, ("ppc64", "gpr16"): 144, ("ppc64", "r16"): 144, ("ppc64", "gpr17"): 152, ("ppc64", "r17"): 152, ("ppc64", "gpr18"): 160, ("ppc64", "r18"): 160, ("ppc64", "gpr19"): 168, ("ppc64", "r19"): 168, ("ppc64", "gpr20"): 176, ("ppc64", "r20"): 176, ("ppc64", "gpr21"): 184, ("ppc64", "r21"): 184, ("ppc64", "gpr22"): 192, ("ppc64", "r22"): 192, ("ppc64", "gpr23"): 200, ("ppc64", "r23"): 200, ("ppc64", "gpr24"): 208, ("ppc64", "r24"): 208, ("ppc64", "gpr25"): 216, ("ppc64", "r25"): 216, ("ppc64", "gpr26"): 224, ("ppc64", "r26"): 224, ("ppc64", "gpr27"): 232, ("ppc64", "r27"): 232, ("ppc64", "gpr28"): 240, ("ppc64", "r28"): 240, ("ppc64", "gpr29"): 248, ("ppc64", "r29"): 248, ("ppc64", "gpr30"): 256, ("ppc64", "r30"): 256, ("ppc64", "gpr31"): 264, ("ppc64", "r31"): 264, ("ppc64", "bp"): 264, ("ppc64", "vsr0"): 272, ("ppc64", "v0"): 272, ("ppc64", "fpr0"): 272, ("ppc64", "vsr1"): 288, ("ppc64", "v1"): 288, ("ppc64", "fpr1"): 288, ("ppc64", "vsr2"): 304, ("ppc64", "v2"): 304, ("ppc64", "fpr2"): 304, ("ppc64", "vsr3"): 320, ("ppc64", "v3"): 320, ("ppc64", "fpr3"): 320, ("ppc64", "vsr4"): 336, ("ppc64", "v4"): 336, ("ppc64", "fpr4"): 336, ("ppc64", "vsr5"): 352, ("ppc64", "v5"): 352, ("ppc64", "fpr5"): 352, ("ppc64", "vsr6"): 368, ("ppc64", "v6"): 368, ("ppc64", "fpr6"): 368, ("ppc64", "vsr7"): 384, ("ppc64", "v7"): 384, ("ppc64", "fpr7"): 384, ("ppc64", "vsr8"): 400, ("ppc64", "v8"): 400, ("ppc64", "fpr8"): 400, ("ppc64", "vsr9"): 416, ("ppc64", "v9"): 416, ("ppc64", "fpr9"): 416, ("ppc64", "vsr10"): 432, ("ppc64", "v10"): 432, ("ppc64", "fpr10"): 432, ("ppc64", "vsr11"): 448, ("ppc64", "v11"): 448, ("ppc64", "fpr11"): 448, ("ppc64", "vsr12"): 464, ("ppc64", "v12"): 464, ("ppc64", "fpr12"): 464, ("ppc64", "vsr13"): 480, ("ppc64", "v13"): 480, ("ppc64", "fpr13"): 480, ("ppc64", "vsr14"): 496, ("ppc64", "v14"): 496, ("ppc64", "fpr14"): 496, ("ppc64", "vsr15"): 512, ("ppc64", "v15"): 512, ("ppc64", "fpr15"): 512, ("ppc64", "vsr16"): 528, ("ppc64", "v16"): 528, ("ppc64", "fpr16"): 528, ("ppc64", "vsr17"): 544, ("ppc64", "v17"): 544, ("ppc64", "fpr17"): 544, ("ppc64", "vsr18"): 560, ("ppc64", "v18"): 560, ("ppc64", "fpr18"): 560, ("ppc64", "vsr19"): 576, ("ppc64", "v19"): 576, ("ppc64", "fpr19"): 576, ("ppc64", "vsr20"): 592, ("ppc64", "v20"): 592, ("ppc64", "fpr20"): 592, ("ppc64", "vsr21"): 608, ("ppc64", "v21"): 608, ("ppc64", "fpr21"): 608, ("ppc64", "vsr22"): 624, ("ppc64", "v22"): 624, ("ppc64", "fpr22"): 624, ("ppc64", "vsr23"): 640, ("ppc64", "v23"): 640, ("ppc64", "fpr23"): 640, ("ppc64", "vsr24"): 656, ("ppc64", "v24"): 656, ("ppc64", "fpr24"): 656, ("ppc64", "vsr25"): 672, ("ppc64", "v25"): 672, ("ppc64", "fpr25"): 672, ("ppc64", "vsr26"): 688, ("ppc64", "v26"): 688, ("ppc64", "fpr26"): 688, ("ppc64", "vsr27"): 704, ("ppc64", "v27"): 704, ("ppc64", "fpr27"): 704, ("ppc64", "vsr28"): 720, ("ppc64", "v28"): 720, ("ppc64", "fpr28"): 720, ("ppc64", "vsr29"): 736, ("ppc64", "v29"): 736, ("ppc64", "fpr29"): 736, ("ppc64", "vsr30"): 752, ("ppc64", "v30"): 752, ("ppc64", "fpr30"): 752, ("ppc64", "vsr31"): 768, ("ppc64", "v31"): 768, ("ppc64", "fpr31"): 768, ("ppc64", "vsr32"): 784, ("ppc64", "v32"): 784, ("ppc64", "vsr33"): 800, ("ppc64", "v33"): 800, ("ppc64", "vsr34"): 816, ("ppc64", "v34"): 816, ("ppc64", "vsr35"): 832, ("ppc64", "v35"): 832, ("ppc64", "vsr36"): 848, ("ppc64", "v36"): 848, ("ppc64", "vsr37"): 864, ("ppc64", "v37"): 864, ("ppc64", "vsr38"): 880, ("ppc64", "v38"): 880, ("ppc64", "vsr39"): 896, ("ppc64", "v39"): 896, ("ppc64", "vsr40"): 912, ("ppc64", "v40"): 912, ("ppc64", "vsr41"): 928, ("ppc64", "v41"): 928, ("ppc64", "vsr42"): 944, ("ppc64", "v42"): 944, ("ppc64", "vsr43"): 960, ("ppc64", "v43"): 960, ("ppc64", "vsr44"): 976, ("ppc64", "v44"): 976, ("ppc64", "vsr45"): 992, ("ppc64", "v45"): 992, ("ppc64", "vsr46"): 1008, ("ppc64", "v46"): 1008, ("ppc64", "vsr47"): 1024, ("ppc64", "v47"): 1024, ("ppc64", "vsr48"): 1040, ("ppc64", "v48"): 1040, ("ppc64", "vsr49"): 1056, ("ppc64", "v49"): 1056, ("ppc64", "vsr50"): 1072, ("ppc64", "v50"): 1072, ("ppc64", "vsr51"): 1088, ("ppc64", "v51"): 1088, ("ppc64", "vsr52"): 1104, ("ppc64", "v52"): 1104, ("ppc64", "vsr53"): 1120, ("ppc64", "v53"): 1120, ("ppc64", "vsr54"): 1136, ("ppc64", "v54"): 1136, ("ppc64", "vsr55"): 1152, ("ppc64", "v55"): 1152, ("ppc64", "vsr56"): 1168, ("ppc64", "v56"): 1168, ("ppc64", "vsr57"): 1184, ("ppc64", "v57"): 1184, ("ppc64", "vsr58"): 1200, ("ppc64", "v58"): 1200, ("ppc64", "vsr59"): 1216, ("ppc64", "v59"): 1216, ("ppc64", "vsr60"): 1232, ("ppc64", "v60"): 1232, ("ppc64", "vsr61"): 1248, ("ppc64", "v61"): 1248, ("ppc64", "vsr62"): 1264, ("ppc64", "v62"): 1264, ("ppc64", "vsr63"): 1280, ("ppc64", "v63"): 1280, ("ppc64", "cia"): 1296, ("ppc64", "ip"): 1296, ("ppc64", "pc"): 1296, ("ppc64", "lr"): 1304, ("ppc64", "ctr"): 1312, ("ppc64", "xer_so"): 1320, ("ppc64", "xer_ov"): 1321, ("ppc64", "xer_ca"): 1322, ("ppc64", "xer_bc"): 1323, ("ppc64", "cr0_321"): 1324, ("ppc64", "cr0_0"): 1325, ("ppc64", "cr0"): 1325, ("ppc64", "cr1_321"): 1326, ("ppc64", "cr1_0"): 1327, ("ppc64", "cr1"): 1327, ("ppc64", "cr2_321"): 1328, ("ppc64", "cr2_0"): 1329, ("ppc64", "cr2"): 1329, ("ppc64", "cr3_321"): 1330, ("ppc64", "cr3_0"): 1331, ("ppc64", "cr3"): 1331, ("ppc64", "cr4_321"): 1332, ("ppc64", "cr4_0"): 1333, ("ppc64", "cr4"): 1333, ("ppc64", "cr5_321"): 1334, ("ppc64", "cr5_0"): 1335, ("ppc64", "cr5"): 1335, ("ppc64", "cr6_321"): 1336, ("ppc64", "cr6_0"): 1337, ("ppc64", "cr6"): 1337, ("ppc64", "cr7_321"): 1338, ("ppc64", "cr7_0"): 1339, ("ppc64", "cr7"): 1339, ("ppc64", "fpround"): 1340, ("ppc64", "dfpround"): 1341, ("ppc64", "c_fpcc"): 1342, ("ppc64", "vrsave"): 1344, ("ppc64", "vscr"): 1348, ("ppc64", "emnote"): 1352, ("ppc64", "cmstart"): 1360, ("ppc64", "cmlen"): 1368, ("ppc64", "nraddr"): 1376, ("ppc64", "nraddr_gpr2"): 1384, ("ppc64", "redir_sp"): 1392, ("ppc64", "redir_stack"): 1400, ("ppc64", "ip_at_syscall"): 1656, ("ppc64", "sprg3_ro"): 1664, ("ppc64", "tfhar"): 1672, ("ppc64", "texasr"): 1680, ("ppc64", "tfiar"): 1688, ("ppc64", "ppr"): 1696, ("ppc64", "texasru"): 1704, ("ppc64", "pspb"): 1708, ("s390x", "ia"): 720, ("s390x", "ip"): 720, ("s390x", "pc"): 720, ("s390x", "r0"): 576, ("s390x", "r1"): 584, ("s390x", "r1_32"): 588, ("s390x", "r2"): 592, ("s390x", "r2_32"): 596, ("s390x", "r3"): 600, ("s390x", "r3_32"): 604, ("s390x", "r4"): 608, ("s390x", "r4_32"): 612, ("s390x", "r5"): 616, ("s390x", "r5_32"): 620, ("s390x", "r6"): 624, ("s390x", "r6_32"): 628, ("s390x", "r7"): 632, ("s390x", "r7_32"): 636, ("s390x", "r8"): 640, ("s390x", "r8_32"): 644, ("s390x", "r9"): 648, ("s390x", "r9_32"): 652, ("s390x", "r10"): 656, ("s390x", "r10_32"): 660, ("s390x", "r11"): 664, ("s390x", "bp"): 664, ("s390x", "r11_32"): 668, ("s390x", "r12"): 672, ("s390x", "r12_32"): 676, ("s390x", "r13"): 680, ("s390x", "r13_32"): 684, ("s390x", "r14"): 688, ("s390x", "lr"): 688, ("s390x", "r15"): 696, ("s390x", "sp"): 696, ("s390x", "v0"): 64, ("s390x", "f0"): 64, ("s390x", "v1"): 80, ("s390x", "f1"): 80, ("s390x", "v2"): 96, ("s390x", "f2"): 96, ("s390x", "v3"): 112, ("s390x", "f3"): 112, ("s390x", "v4"): 128, ("s390x", "f4"): 128, ("s390x", "v5"): 144, ("s390x", "f5"): 144, ("s390x", "v6"): 160, ("s390x", "f6"): 160, ("s390x", "v7"): 176, ("s390x", "f7"): 176, ("s390x", "v8"): 192, ("s390x", "f8"): 192, ("s390x", "v9"): 208, ("s390x", "f9"): 208, ("s390x", "v10"): 224, ("s390x", "f10"): 224, ("s390x", "v11"): 240, ("s390x", "f11"): 240, ("s390x", "v12"): 256, ("s390x", "f12"): 256, ("s390x", "v13"): 272, ("s390x", "f13"): 272, ("s390x", "v14"): 288, ("s390x", "f14"): 288, ("s390x", "v15"): 304, ("s390x", "f15"): 304, ("s390x", "v16"): 320, ("s390x", "v17"): 336, ("s390x", "v18"): 352, ("s390x", "v19"): 368, ("s390x", "v20"): 384, ("s390x", "v21"): 400, ("s390x", "v22"): 416, ("s390x", "v23"): 432, ("s390x", "v24"): 448, ("s390x", "v25"): 464, ("s390x", "v26"): 480, ("s390x", "v27"): 496, ("s390x", "v28"): 512, ("s390x", "v29"): 528, ("s390x", "v30"): 544, ("s390x", "v31"): 560, ("s390x", "a0"): 0, ("s390x", "a1"): 4, ("s390x", "a2"): 8, ("s390x", "a3"): 12, ("s390x", "a4"): 16, ("s390x", "a5"): 20, ("s390x", "a6"): 24, ("s390x", "a7"): 28, ("s390x", "a8"): 32, ("s390x", "a9"): 36, ("s390x", "a10"): 40, ("s390x", "a11"): 44, ("s390x", "a12"): 48, ("s390x", "a13"): 52, ("s390x", "a14"): 56, ("s390x", "a15"): 60, ("s390x", "nraddr"): 768, ("s390x", "cmstart"): 776, ("s390x", "cmlen"): 784, ("s390x", "ip_at_syscall"): 792, ("s390x", "emnote"): 800, ("mips32", "zero"): 8, ("mips32", "r0"): 8, ("mips32", "at"): 12, ("mips32", "r1"): 12, ("mips32", "v0"): 16, ("mips32", "r2"): 16, ("mips32", "v1"): 20, ("mips32", "r3"): 20, ("mips32", "a0"): 24, ("mips32", "r4"): 24, ("mips32", "a1"): 28, ("mips32", "r5"): 28, ("mips32", "a2"): 32, ("mips32", "r6"): 32, ("mips32", "a3"): 36, ("mips32", "r7"): 36, ("mips32", "t0"): 40, ("mips32", "r8"): 40, ("mips32", "t1"): 44, ("mips32", "r9"): 44, ("mips32", "t2"): 48, ("mips32", "r10"): 48, ("mips32", "t3"): 52, ("mips32", "r11"): 52, ("mips32", "t4"): 56, ("mips32", "r12"): 56, ("mips32", "t5"): 60, ("mips32", "r13"): 60, ("mips32", "t6"): 64, ("mips32", "r14"): 64, ("mips32", "t7"): 68, ("mips32", "r15"): 68, ("mips32", "s0"): 72, ("mips32", "r16"): 72, ("mips32", "s1"): 76, ("mips32", "r17"): 76, ("mips32", "s2"): 80, ("mips32", "r18"): 80, ("mips32", "s3"): 84, ("mips32", "r19"): 84, ("mips32", "s4"): 88, ("mips32", "r20"): 88, ("mips32", "s5"): 92, ("mips32", "r21"): 92, ("mips32", "s6"): 96, ("mips32", "r22"): 96, ("mips32", "s7"): 100, ("mips32", "r23"): 100, ("mips32", "t8"): 104, ("mips32", "r24"): 104, ("mips32", "t9"): 108, ("mips32", "r25"): 108, ("mips32", "k0"): 112, ("mips32", "r26"): 112, ("mips32", "k1"): 116, ("mips32", "r27"): 116, ("mips32", "gp"): 120, ("mips32", "r28"): 120, ("mips32", "sp"): 124, ("mips32", "r29"): 124, ("mips32", "s8"): 128, ("mips32", "r30"): 128, ("mips32", "fp"): 128, ("mips32", "bp"): 128, ("mips32", "ra"): 132, ("mips32", "r31"): 132, ("mips32", "lr"): 132, ("mips32", "pc"): 136, ("mips32", "ip"): 136, ("mips32", "hi"): 140, ("mips32", "lo"): 144, ("mips32", "f0"): 152, ("mips32", "f0_lo"): 152, ("mips32", "f1"): 160, ("mips32", "f1_lo"): 160, ("mips32", "f2"): 168, ("mips32", "f2_lo"): 168, ("mips32", "f3"): 176, ("mips32", "f3_lo"): 176, ("mips32", "f4"): 184, ("mips32", "f4_lo"): 184, ("mips32", "f5"): 192, ("mips32", "f5_lo"): 192, ("mips32", "f6"): 200, ("mips32", "f6_lo"): 200, ("mips32", "f7"): 208, ("mips32", "f7_lo"): 208, ("mips32", "f8"): 216, ("mips32", "f8_lo"): 216, ("mips32", "f9"): 224, ("mips32", "f9_lo"): 224, ("mips32", "f10"): 232, ("mips32", "f10_lo"): 232, ("mips32", "f11"): 240, ("mips32", "f11_lo"): 240, ("mips32", "f12"): 248, ("mips32", "f12_lo"): 248, ("mips32", "f13"): 256, ("mips32", "f13_lo"): 256, ("mips32", "f14"): 264, ("mips32", "f14_lo"): 264, ("mips32", "f15"): 272, ("mips32", "f15_lo"): 272, ("mips32", "f16"): 280, ("mips32", "f16_lo"): 280, ("mips32", "f17"): 288, ("mips32", "f17_lo"): 288, ("mips32", "f18"): 296, ("mips32", "f18_lo"): 296, ("mips32", "f19"): 304, ("mips32", "f19_lo"): 304, ("mips32", "f20"): 312, ("mips32", "f20_lo"): 312, ("mips32", "f21"): 320, ("mips32", "f21_lo"): 320, ("mips32", "f22"): 328, ("mips32", "f22_lo"): 328, ("mips32", "f23"): 336, ("mips32", "f23_lo"): 336, ("mips32", "f24"): 344, ("mips32", "f24_lo"): 344, ("mips32", "f25"): 352, ("mips32", "f25_lo"): 352, ("mips32", "f26"): 360, ("mips32", "f26_lo"): 360, ("mips32", "f27"): 368, ("mips32", "f27_lo"): 368, ("mips32", "f28"): 376, ("mips32", "f28_lo"): 376, ("mips32", "f29"): 384, ("mips32", "f29_lo"): 384, ("mips32", "f30"): 392, ("mips32", "f30_lo"): 392, ("mips32", "f31"): 400, ("mips32", "f31_lo"): 400, ("mips32", "fir"): 408, ("mips32", "fccr"): 412, ("mips32", "fexr"): 416, ("mips32", "fenr"): 420, ("mips32", "fcsr"): 424, ("mips32", "ulr"): 428, ("mips32", "emnote"): 432, ("mips32", "cmstart"): 436, ("mips32", "cmlen"): 440, ("mips32", "nraddr"): 444, ("mips32", "cond"): 448, ("mips32", "dspcontrol"): 452, ("mips32", "ac0"): 456, ("mips32", "ac1"): 464, ("mips32", "ac2"): 472, ("mips32", "ac3"): 480, ("mips32", "cp0_status"): 488, ("mips32", "ip_at_syscall"): 492, ("mips64", "zero"): 16, ("mips64", "r0"): 16, ("mips64", "at"): 24, ("mips64", "r1"): 24, ("mips64", "v0"): 32, ("mips64", "r2"): 32, ("mips64", "v1"): 40, ("mips64", "r3"): 40, ("mips64", "a0"): 48, ("mips64", "r4"): 48, ("mips64", "a1"): 56, ("mips64", "r5"): 56, ("mips64", "a2"): 64, ("mips64", "r6"): 64, ("mips64", "a3"): 72, ("mips64", "r7"): 72, ("mips64", "t0"): 80, ("mips64", "r8"): 80, ("mips64", "a4"): 80, ("mips64", "t1"): 88, ("mips64", "r9"): 88, ("mips64", "a5"): 88, ("mips64", "t2"): 96, ("mips64", "r10"): 96, ("mips64", "a6"): 96, ("mips64", "t3"): 104, ("mips64", "r11"): 104, ("mips64", "a7"): 104, ("mips64", "t4"): 112, ("mips64", "r12"): 112, ("mips64", "t5"): 120, ("mips64", "r13"): 120, ("mips64", "t6"): 128, ("mips64", "r14"): 128, ("mips64", "t7"): 136, ("mips64", "r15"): 136, ("mips64", "s0"): 144, ("mips64", "r16"): 144, ("mips64", "s1"): 152, ("mips64", "r17"): 152, ("mips64", "s2"): 160, ("mips64", "r18"): 160, ("mips64", "s3"): 168, ("mips64", "r19"): 168, ("mips64", "s4"): 176, ("mips64", "r20"): 176, ("mips64", "s5"): 184, ("mips64", "r21"): 184, ("mips64", "s6"): 192, ("mips64", "r22"): 192, ("mips64", "s7"): 200, ("mips64", "r23"): 200, ("mips64", "t8"): 208, ("mips64", "r24"): 208, ("mips64", "t9"): 216, ("mips64", "r25"): 216, ("mips64", "k0"): 224, ("mips64", "r26"): 224, ("mips64", "k1"): 232, ("mips64", "r27"): 232, ("mips64", "gp"): 240, ("mips64", "r28"): 240, ("mips64", "sp"): 248, ("mips64", "r29"): 248, ("mips64", "s8"): 256, ("mips64", "r30"): 256, ("mips64", "fp"): 256, ("mips64", "bp"): 256, ("mips64", "ra"): 264, ("mips64", "r31"): 264, ("mips64", "lr"): 264, ("mips64", "pc"): 272, ("mips64", "ip"): 272, ("mips64", "hi"): 280, ("mips64", "lo"): 288, ("mips64", "f0"): 296, ("mips64", "f0_lo"): 296, ("mips64", "f1"): 304, ("mips64", "f1_lo"): 304, ("mips64", "f2"): 312, ("mips64", "f2_lo"): 312, ("mips64", "f3"): 320, ("mips64", "f3_lo"): 320, ("mips64", "f4"): 328, ("mips64", "f4_lo"): 328, ("mips64", "f5"): 336, ("mips64", "f5_lo"): 336, ("mips64", "f6"): 344, ("mips64", "f6_lo"): 344, ("mips64", "f7"): 352, ("mips64", "f7_lo"): 352, ("mips64", "f8"): 360, ("mips64", "f8_lo"): 360, ("mips64", "f9"): 368, ("mips64", "f9_lo"): 368, ("mips64", "f10"): 376, ("mips64", "f10_lo"): 376, ("mips64", "f11"): 384, ("mips64", "f11_lo"): 384, ("mips64", "f12"): 392, ("mips64", "f12_lo"): 392, ("mips64", "f13"): 400, ("mips64", "f13_lo"): 400, ("mips64", "f14"): 408, ("mips64", "f14_lo"): 408, ("mips64", "f15"): 416, ("mips64", "f15_lo"): 416, ("mips64", "f16"): 424, ("mips64", "f16_lo"): 424, ("mips64", "f17"): 432, ("mips64", "f17_lo"): 432, ("mips64", "f18"): 440, ("mips64", "f18_lo"): 440, ("mips64", "f19"): 448, ("mips64", "f19_lo"): 448, ("mips64", "f20"): 456, ("mips64", "f20_lo"): 456, ("mips64", "f21"): 464, ("mips64", "f21_lo"): 464, ("mips64", "f22"): 472, ("mips64", "f22_lo"): 472, ("mips64", "f23"): 480, ("mips64", "f23_lo"): 480, ("mips64", "f24"): 488, ("mips64", "f24_lo"): 488, ("mips64", "f25"): 496, ("mips64", "f25_lo"): 496, ("mips64", "f26"): 504, ("mips64", "f26_lo"): 504, ("mips64", "f27"): 512, ("mips64", "f27_lo"): 512, ("mips64", "f28"): 520, ("mips64", "f28_lo"): 520, ("mips64", "f29"): 528, ("mips64", "f29_lo"): 528, ("mips64", "f30"): 536, ("mips64", "f30_lo"): 536, ("mips64", "f31"): 544, ("mips64", "f31_lo"): 544, ("mips64", "fir"): 552, ("mips64", "fccr"): 556, ("mips64", "fexr"): 560, ("mips64", "fenr"): 564, ("mips64", "fcsr"): 568, ("mips64", "cp0_status"): 572, ("mips64", "ulr"): 576, ("mips64", "emnote"): 584, ("mips64", "cond"): 588, ("mips64", "cmstart"): 592, ("mips64", "cmlen"): 600, ("mips64", "nraddr"): 608, ("mips64", "ip_at_syscall"): 616, ("riscv64", "x0"): 16, ("riscv64", "zero"): 16, ("riscv64", "x1"): 24, ("riscv64", "ra"): 24, ("riscv64", "lr"): 24, ("riscv64", "x2"): 32, ("riscv64", "sp"): 32, ("riscv64", "x3"): 40, ("riscv64", "gp"): 40, ("riscv64", "x4"): 48, ("riscv64", "tp"): 48, ("riscv64", "x5"): 56, ("riscv64", "t0"): 56, ("riscv64", "x6"): 64, ("riscv64", "t1"): 64, ("riscv64", "x7"): 72, ("riscv64", "t2"): 72, ("riscv64", "x9"): 88, ("riscv64", "s1"): 88, ("riscv64", "x10"): 96, ("riscv64", "a0"): 96, ("riscv64", "x11"): 104, ("riscv64", "a1"): 104, ("riscv64", "x12"): 112, ("riscv64", "a2"): 112, ("riscv64", "x13"): 120, ("riscv64", "a3"): 120, ("riscv64", "x14"): 128, ("riscv64", "a4"): 128, ("riscv64", "x15"): 136, ("riscv64", "a5"): 136, ("riscv64", "x16"): 144, ("riscv64", "a6"): 144, ("riscv64", "x17"): 152, ("riscv64", "a7"): 152, ("riscv64", "x18"): 160, ("riscv64", "s2"): 160, ("riscv64", "x19"): 168, ("riscv64", "s3"): 168, ("riscv64", "x20"): 176, ("riscv64", "s4"): 176, ("riscv64", "x21"): 184, ("riscv64", "s5"): 184, ("riscv64", "x22"): 192, ("riscv64", "s6"): 192, ("riscv64", "x23"): 200, ("riscv64", "s7"): 200, ("riscv64", "x24"): 208, ("riscv64", "s8"): 208, ("riscv64", "x25"): 216, ("riscv64", "s9"): 216, ("riscv64", "x26"): 224, ("riscv64", "s10"): 224, ("riscv64", "x27"): 232, ("riscv64", "s11"): 232, ("riscv64", "x28"): 240, ("riscv64", "t3"): 240, ("riscv64", "x29"): 248, ("riscv64", "t4"): 248, ("riscv64", "x30"): 256, ("riscv64", "t5"): 256, ("riscv64", "x31"): 264, ("riscv64", "t6"): 264, ("riscv64", "pc"): 272, ("riscv64", "ip"): 272, ("riscv64", "f0"): 280, ("riscv64", "ft0"): 280, ("riscv64", "f1"): 288, ("riscv64", "ft1"): 288, ("riscv64", "f2"): 296, ("riscv64", "ft2"): 296, ("riscv64", "f3"): 304, ("riscv64", "ft3"): 304, ("riscv64", "f4"): 312, ("riscv64", "ft4"): 312, ("riscv64", "f5"): 320, ("riscv64", "ft5"): 320, ("riscv64", "f6"): 328, ("riscv64", "ft6"): 328, ("riscv64", "f7"): 336, ("riscv64", "ft7"): 336, ("riscv64", "f9"): 352, ("riscv64", "fs1"): 352, ("riscv64", "f10"): 360, ("riscv64", "fa0"): 360, ("riscv64", "f11"): 368, ("riscv64", "fa1"): 368, ("riscv64", "f12"): 376, ("riscv64", "fa2"): 376, ("riscv64", "f13"): 384, ("riscv64", "fa3"): 384, ("riscv64", "f14"): 392, ("riscv64", "fa4"): 392, ("riscv64", "f15"): 400, ("riscv64", "fa5"): 400, ("riscv64", "f16"): 408, ("riscv64", "fa6"): 408, ("riscv64", "f17"): 416, ("riscv64", "fa7"): 416, ("riscv64", "f18"): 424, ("riscv64", "fs2"): 424, ("riscv64", "f19"): 432, ("riscv64", "fs3"): 432, ("riscv64", "f20"): 440, ("riscv64", "fs4"): 440, ("riscv64", "f21"): 448, ("riscv64", "fs5"): 448, ("riscv64", "f22"): 456, ("riscv64", "fs6"): 456, ("riscv64", "f23"): 464, ("riscv64", "fs7"): 464, ("riscv64", "f24"): 472, ("riscv64", "fs8"): 472, ("riscv64", "f25"): 480, ("riscv64", "fs9"): 480, ("riscv64", "f26"): 488, ("riscv64", "fs10"): 488, ("riscv64", "f27"): 496, ("riscv64", "fs11"): 496, ("riscv64", "f28"): 504, ("riscv64", "ft8"): 504, ("riscv64", "f29"): 512, ("riscv64", "ft9"): 512, ("riscv64", "f30"): 520, ("riscv64", "ft10"): 520, ("riscv64", "f31"): 528, ("riscv64", "ft11"): 528, } ================================================ FILE: pyvex/arches.py ================================================ from ._register_info import REGISTER_OFFSETS from .enums import default_vex_archinfo, vex_endness_from_string from .types import Register from .vex_ffi import guest_offsets class PyvexArch: """ An architecture definition for use with pyvex - usable version. """ def __init__(self, name: str, bits: int, memory_endness: str, instruction_endness: str = "Iend_BE"): self.name = name self.bits = bits self.memory_endness = memory_endness self.instruction_endness = instruction_endness self.byte_width = 8 self.register_list: list[Register] = [] self.registers: dict[str, tuple[int, int]] = {} self.vex_arch = { "X86": "VexArchX86", "AMD64": "VexArchAMD64", "ARM": "VexArchARM", "ARM64": "VexArchARM64", "PPC32": "VexArchPPC32", "PPC64": "VexArchPPC64", "S390X": "VexArchS390X", "MIPS32": "VexArchMIPS32", "MIPS64": "VexArchMIPS64", "RISCV64": "VexArchRISCV64", }[name] self.ip_offset = guest_offsets[ ( self.vex_name_small, { "X86": "eip", "AMD64": "rip", "ARM": "r15t", "ARM64": "pc", "PPC32": "cia", "PPC64": "cia", "S390X": "ia", "MIPS32": "pc", "MIPS64": "pc", "RISCV64": "pc", }[name], ) ] self.vex_archinfo = default_vex_archinfo() if memory_endness == "Iend_BE": self.vex_archinfo["endness"] = vex_endness_from_string("VexEndnessBE") def __repr__(self): return f"" @property def vex_name_small(self): return self.vex_arch[7:].lower() def translate_register_name(self, offset, size=None): # pylint: disable=unused-argument for (arch, reg), offset2 in guest_offsets.items(): if arch == self.vex_name_small and offset2 == offset: return reg for (arch, reg), offset2 in REGISTER_OFFSETS.items(): if arch == self.vex_name_small and offset2 == offset: return reg return str(offset) def get_register_offset(self, name: str) -> int: arch_reg_tuple = (self.vex_name_small, name) if arch_reg_tuple in guest_offsets: return guest_offsets[arch_reg_tuple] elif arch_reg_tuple in REGISTER_OFFSETS: return REGISTER_OFFSETS[arch_reg_tuple] else: raise KeyError(f"Unknown register {name} for architecture {self.name}") ARCH_X86 = PyvexArch("X86", 32, "Iend_LE") ARCH_AMD64 = PyvexArch("AMD64", 64, "Iend_LE") ARCH_ARM_LE = PyvexArch("ARM", 32, "Iend_LE", instruction_endness="Iend_LE") ARCH_ARM_BE_LE = PyvexArch("ARM", 32, "Iend_BE", instruction_endness="Iend_LE") ARCH_ARM_BE = PyvexArch("ARM", 32, "Iend_LE") ARCH_ARM64_LE = PyvexArch("ARM64", 64, "Iend_LE", instruction_endness="Iend_LE") ARCH_ARM64_BE = PyvexArch("ARM64", 64, "Iend_BE") ARCH_PPC32 = PyvexArch("PPC32", 32, "Iend_BE") ARCH_PPC64_BE = PyvexArch("PPC64", 64, "Iend_BE") ARCH_PPC64_LE = PyvexArch("PPC64", 64, "Iend_LE") ARCH_S390X = PyvexArch("S390X", 64, "Iend_BE") ARCH_MIPS32_BE = PyvexArch("MIPS32", 32, "Iend_BE") ARCH_MIPS32_LE = PyvexArch("MIPS32", 32, "Iend_LE") ARCH_MIPS64_BE = PyvexArch("MIPS64", 64, "Iend_BE") ARCH_MIPS64_LE = PyvexArch("MIPS64", 64, "Iend_LE") ARCH_RISCV64_LE = PyvexArch("RISCV64", 64, "Iend_LE", instruction_endness="Iend_LE") ================================================ FILE: pyvex/block.py ================================================ import copy import itertools import logging from typing import Optional from . import expr, stmt from .const import U1, get_type_size from .const_val import ConstVal from .data_ref import DataRef from .enums import VEXObject from .errors import SkipStatementsError from .expr import Const, RdTmp from .native import pvc from .stmt import ( CAS, LLSC, Dirty, Exit, IMark, IRExpr, IRStmt, LoadG, WrTmp, get_enum_from_int, get_int_from_enum, ) from .types import Arch log = logging.getLogger("pyvex.block") class IRSB(VEXObject): """ The IRSB is the primary interface to pyvex. Constructing one of these will make a call into LibVEX to perform a translation. IRSB stands for *Intermediate Representation Super-Block*. An IRSB in VEX is a single-entry, multiple-exit code block. :ivar arch: The architecture this block is lifted under. Must duck-type as :class:`archinfo.arch.Arch` :ivar statements: The statements in this block :vartype statements: list of :class:`IRStmt` :ivar next: The expression for the default exit target of this block :vartype next: :class:`IRExpr` :ivar int offsIP: The offset of the instruction pointer in the VEX guest state :ivar int stmts_used: The number of statements in this IRSB :ivar str jumpkind: The type of this block's default jump (call, boring, syscall, etc) as a VEX enum string :ivar bool direct_next: Whether this block ends with a direct (not indirect) jump or branch :ivar int size: The size of this block in bytes :ivar int addr: The address of this basic block, i.e. the address in the first IMark """ __slots__ = [ "addr", "arch", "statements", "next", "_tyenv", "jumpkind", "is_noop_block", "_direct_next", "_size", "_instructions", "_exit_statements", "default_exit_target", "_instruction_addresses", "data_refs", "const_vals", ] # The following constants shall match the defs in pyvex.h MAX_EXITS = 400 MAX_DATA_REFS = 2000 MAX_CONST_VALS = 1000 def __init__( self, data, mem_addr, arch: Arch, max_inst=None, max_bytes=None, bytes_offset=0, traceflags=0, opt_level=1, num_inst=None, num_bytes=None, strict_block_end=False, skip_stmts=False, collect_data_refs=False, cross_insn_opt=True, ): """ :param data: The bytes to lift. Can be either a string of bytes or a cffi buffer object. You may also pass None to initialize an empty IRSB. :type data: str or bytes or cffi.FFI.CData or None :param int mem_addr: The address to lift the data at. :param arch: The architecture to lift the data as. :param max_inst: The maximum number of instructions to lift. (See note below) :param max_bytes: The maximum number of bytes to use. :param num_inst: Replaces max_inst if max_inst is None. If set to None as well, no instruction limit is used. :param num_bytes: Replaces max_bytes if max_bytes is None. If set to None as well, no byte limit is used. :param bytes_offset: The offset into `data` to start lifting at. Note that for ARM THUMB mode, both `mem_addr` and `bytes_offset` must be odd (typically `bytes_offset` is set to 1). :param traceflags: The libVEX traceflags, controlling VEX debug prints. :param opt_level: The level of optimization to apply to the IR, -1 through 2. -1 is the strictest unoptimized level, 0 is unoptimized but will perform some lookahead/lookbehind optimizations, 1 performs constant propogation, and 2 performs loop unrolling, which honestly doesn't make much sense in the context of pyvex. The default is 1. :param strict_block_end: Should the LibVEX arm-thumb split block at some instructions, for example CB{N}Z. .. note:: Explicitly specifying the number of instructions to lift (`max_inst`) may not always work exactly as expected. For example, on MIPS, it is meaningless to lift a branch or jump instruction without its delay slot. VEX attempts to Do The Right Thing by possibly decoding fewer instructions than requested. Specifically, this means that lifting a branch or jump on MIPS as a single instruction (`max_inst=1`) will result in an empty IRSB, and subsequent attempts to run this block will raise `SimIRSBError('Empty IRSB passed to SimIRSB.')`. .. note:: If no instruction and byte limit is used, pyvex will continue lifting the block until the block ends properly or until it runs out of data to lift. """ if max_inst is None: max_inst = num_inst if max_bytes is None: max_bytes = num_bytes VEXObject.__init__(self) self.addr = mem_addr self.arch: Arch = arch self.statements: list[IRStmt] = [] self.next: IRExpr = Const(U1(0)) self._tyenv: Optional["IRTypeEnv"] = None self.jumpkind: str = "UNSET" self._direct_next: bool | None = None self._size: int | None = None self._instructions: int | None = None self._exit_statements: tuple[tuple[int, int, IRStmt], ...] | None = None self.is_noop_block: bool = False self.default_exit_target = None self.data_refs = () self.const_vals = () self._instruction_addresses: tuple[int, ...] = () if data is not None: # This is the slower path (because we need to call _from_py() to copy the content in the returned IRSB to # the current IRSB instance. You should always call `lift()` directly. This method is kept for compatibility # concerns. from pyvex.lifting import lift irsb = lift( data, mem_addr, arch, max_bytes=max_bytes, max_inst=max_inst, bytes_offset=bytes_offset, opt_level=opt_level, traceflags=traceflags, strict_block_end=strict_block_end, skip_stmts=skip_stmts, collect_data_refs=collect_data_refs, cross_insn_opt=cross_insn_opt, ) self._from_py(irsb) @staticmethod def empty_block(arch, addr, statements=None, nxt=None, tyenv=None, jumpkind=None, direct_next=None, size=None): block = IRSB(None, addr, arch) block._set_attributes(statements, nxt, tyenv, jumpkind, direct_next, size=size) return block @property def tyenv(self) -> "IRTypeEnv": if self._tyenv is None: self._tyenv = IRTypeEnv(self.arch) return self._tyenv @tyenv.setter def tyenv(self, v): self._tyenv = v @property def has_statements(self) -> bool: return self.statements is not None and bool(self.statements) @property def exit_statements(self) -> tuple[tuple[int, int, IRStmt], ...]: if self._exit_statements is not None: return self._exit_statements # Delayed process if not self.has_statements: return () exit_statements = [] ins_addr = None for idx, stmt_ in enumerate(self.statements): if type(stmt_) is IMark: ins_addr = stmt_.addr + stmt_.delta elif type(stmt_) is Exit: assert ins_addr is not None exit_statements.append((ins_addr, idx, stmt_)) self._exit_statements = tuple(exit_statements) return self._exit_statements def copy(self) -> "IRSB": return copy.deepcopy(self) def extend(self, extendwith) -> None: """ Appends an irsb to the current irsb. The irsb that is appended is invalidated. The appended irsb's jumpkind and default exit are used. :param extendwith: The IRSB to append to this IRSB :vartype extendwith: :class:`IRSB` """ if self.stmts_used == 0: self._from_py(extendwith) return conversion_dict = {} invalid_vals = (0xFFFFFFFF, -1) new_size = self.size + extendwith.size new_instructions = self.instructions + extendwith.instructions new_direct_next = extendwith.direct_next def convert_tmp(tmp): """ Converts a tmp from the appended-block into one in the appended-to-block. Creates a new tmp if it does not already exist. Prevents collisions in tmp numbers between the two blocks. :param tmp: The tmp number to convert """ if tmp not in conversion_dict: tmp_type = extendwith.tyenv.lookup(tmp) conversion_dict[tmp] = self.tyenv.add(tmp_type) return conversion_dict[tmp] def convert_expr(expr_): """ Converts a VEX expression to use tmps in the appended-block instead of the appended-to-block. Used to prevent collisions in tmp numbers between the two blocks. :param tmp: The VEX expression to convert :vartype expr: :class:`IRExpr` """ if type(expr_) is RdTmp: return RdTmp.get_instance(convert_tmp(expr_.tmp)) return expr_ for stmt_ in extendwith.statements: stmttype = type(stmt_) if stmttype is WrTmp: stmt_.tmp = convert_tmp(stmt_.tmp) elif stmttype is LoadG: stmt_.dst = convert_tmp(stmt_.dst) elif stmttype is LLSC: stmt_.result = convert_tmp(stmt_.result) elif stmttype is Dirty: if stmt_.tmp not in invalid_vals: stmt_.tmp = convert_tmp(stmt_.tmp) for e in stmt_.args: convert_expr(e) elif stmttype is CAS: if stmt_.oldLo not in invalid_vals: stmt_.oldLo = convert_tmp(stmt_.oldLo) if stmt_.oldHi not in invalid_vals: stmt_.oldHi = convert_tmp(stmt_.oldHi) # Convert all expressions to_replace = {} for expr_ in stmt_.expressions: replacement = convert_expr(expr_) if replacement is not expr_: to_replace[expr_] = replacement stmt_.replace_expression(to_replace) # Add the converted statement to self.statements self.statements.append(stmt_) extendwith.next = convert_expr(extendwith.next) self.next = extendwith.next self.jumpkind = extendwith.jumpkind self._size = new_size self._instructions = new_instructions self._direct_next = new_direct_next # TODO: Change exit_statements, data_references, etc. def invalidate_direct_next(self) -> None: self._direct_next = None def pp(self) -> None: """ Pretty-print the IRSB to stdout. """ print(self._pp_str()) def __repr__(self): return f"IRSB <0x{self.size:x} bytes, {self.instructions} ins., {str(self.arch)}> at 0x{self.addr:x}" def __str__(self): return self._pp_str() def __eq__(self, other): return ( isinstance(other, IRSB) and self.addr == other.addr and self.arch.name == other.arch.name and self.statements == other.statements and self.next == other.next and self.jumpkind == other.jumpkind ) def __hash__(self): return hash((IRSB, self.addr, self.arch.name, tuple(self.statements), self.next, self.jumpkind)) def typecheck(self) -> bool: try: # existence assertions assert self.next is not None, "Missing next expression" assert self.jumpkind is not None, "Missing jumpkind" # Type assertions assert isinstance(self.next, expr.IRExpr), "Next expression is not an expression" assert type(self.jumpkind is str), "Jumpkind is not a string" assert self.jumpkind.startswith("Ijk_"), "Jumpkind is not a jumpkind enum" assert self.tyenv.typecheck(), "Type environment contains invalid types" # statement assertions last_imark = None for i, st in enumerate(self.statements): assert isinstance(st, stmt.IRStmt), "Statement %d is not an IRStmt" % i try: assert st.typecheck(self.tyenv), "Statement %d failed to typecheck" % i except Exception: # pylint: disable=bare-except assert False, "Statement %d errored in typechecking" % i if type(st) is stmt.NoOp: continue elif type(st) is stmt.IMark: if last_imark is not None: # pylint: disable=unsubscriptable-object assert last_imark[0] + last_imark[1] == st.addr, "IMarks sizes overlap or have gaps" last_imark = (st.addr, st.len) else: assert last_imark is not None, "Operation statement appears before IMark" assert last_imark is not None, "No IMarks present in block" except AssertionError as e: log.debug(e.args[0]) return False return True # # alternate constructors # @staticmethod def from_c(c_irsb, mem_addr, arch) -> "IRSB": irsb = IRSB(None, mem_addr, arch) irsb._from_c(c_irsb) return irsb @staticmethod def from_py(tyenv, stmts, next_expr, jumpkind, mem_addr, arch) -> "IRSB": irsb = IRSB(None, mem_addr, arch) irsb.tyenv = tyenv irsb.statements = stmts irsb.next = next_expr irsb.jumpkind = jumpkind irsb._direct_next = irsb._is_defaultexit_direct_jump() return irsb # # simple properties useful for analysis # @property def stmts_used(self) -> int: if self.statements is None: return 0 return len(self.statements) @property def offsIP(self) -> int: return self.arch.ip_offset @property def direct_next(self): if self._direct_next is None: self._direct_next = self._is_defaultexit_direct_jump() return self._direct_next @property def expressions(self): """ Return an iterator of all expressions contained in the IRSB. """ for s in self.statements: yield from s.expressions yield self.next @property def instructions(self): """ The number of instructions in this block """ if self._instructions is None: if self.statements is None: self._instructions = 0 else: self._instructions = len([s for s in self.statements if type(s) is stmt.IMark]) return self._instructions @property def instruction_addresses(self) -> tuple[int, ...]: """ Addresses of instructions in this block. """ if self._instruction_addresses is None: if self.statements is None: self._instruction_addresses = () else: self._instruction_addresses = tuple( (s.addr + s.delta) for s in self.statements if type(s) is stmt.IMark ) return self._instruction_addresses @property def size(self): """ The size of this block, in bytes """ if self._size is None: self._size = sum(s.len for s in self.statements if type(s) is stmt.IMark) return self._size @property def operations(self): """ A list of all operations done by the IRSB, as libVEX enum names """ ops = [] for e in self.expressions: if hasattr(e, "op"): ops.append(e.op) return ops @property def all_constants(self): """ Returns all constants in the block (including incrementing of the program counter) as :class:`pyvex.const.IRConst`. """ return sum((e.constants for e in self.expressions), []) @property def constants(self): """ The constants (excluding updates of the program counter) in the IRSB as :class:`pyvex.const.IRConst`. """ return sum((s.constants for s in self.statements if not (type(s) is stmt.Put and s.offset == self.offsIP)), []) @property def constant_jump_targets(self): """ A set of the static jump targets of the basic block. """ exits = set() if self.exit_statements: for _, _, stmt_ in self.exit_statements: exits.add(stmt_.dst.value) default_target = self.default_exit_target if default_target is not None: exits.add(default_target) return exits @property def constant_jump_targets_and_jumpkinds(self): """ A dict of the static jump targets of the basic block to their jumpkind. """ exits = {} if self.exit_statements: for _, _, stmt_ in self.exit_statements: exits[stmt_.dst.value] = stmt_.jumpkind default_target = self.default_exit_target if default_target is not None: exits[default_target] = self.jumpkind return exits # # private methods # def _pp_str(self) -> str: """ Return the pretty-printed IRSB. """ sa = [] sa.append("IRSB {") if self.statements is not None: sa.append(" %s" % self.tyenv) sa.append("") if self.statements is not None: for i, s in enumerate(self.statements): if isinstance(s, stmt.Put): stmt_str = s.pp_str( reg_name=self.arch.translate_register_name(s.offset, s.data.result_size(self.tyenv) // 8) ) elif isinstance(s, stmt.WrTmp) and isinstance(s.data, expr.Get): stmt_str = s.pp_str( reg_name=self.arch.translate_register_name(s.data.offset, s.data.result_size(self.tyenv) // 8) ) elif isinstance(s, stmt.Exit): stmt_str = s.pp_str(reg_name=self.arch.translate_register_name(s.offsIP, self.arch.bits // 8)) else: stmt_str = s.pp_str() sa.append(" %02d | %s" % (i, stmt_str)) else: sa.append(" Statements are omitted.") sa.append(f" NEXT: PUT({self.arch.translate_register_name(self.offsIP)}) = {self.next}; {self.jumpkind}") sa.append("}") return "\n".join(sa) def _is_defaultexit_direct_jump(self): """ Checks if the default of this IRSB a direct jump or not. """ if not (self.jumpkind == "Ijk_InvalICache" or self.jumpkind == "Ijk_Boring" or self.jumpkind == "Ijk_Call"): return False target = self.default_exit_target return target is not None # # internal "constructors" to fill this block out with data from various sources # def _from_c(self, lift_r, skip_stmts=False): c_irsb = lift_r.irsb if not skip_stmts: self.statements = [stmt.IRStmt._from_c(c_irsb.stmts[i]) for i in range(c_irsb.stmts_used)] self.tyenv = IRTypeEnv._from_c(self.arch, c_irsb.tyenv) else: self.statements = None self.tyenv = None self.next = expr.IRExpr._from_c(c_irsb.next) self.jumpkind = get_enum_from_int(c_irsb.jumpkind) self._size = lift_r.size self.is_noop_block = lift_r.is_noop_block == 1 self._instructions = lift_r.insts self._instruction_addresses = tuple(itertools.islice(lift_r.inst_addrs, lift_r.insts)) # Conditional exits exit_statements = [] if skip_stmts: if lift_r.exit_count > self.MAX_EXITS: # There are more exits than the default size of the exits array. We will need all statements raise SkipStatementsError("exit_count exceeded MAX_EXITS (%d)" % self.MAX_EXITS) for i in range(lift_r.exit_count): ex = lift_r.exits[i] exit_stmt = stmt.IRStmt._from_c(ex.stmt) exit_statements.append((ex.ins_addr, ex.stmt_idx, exit_stmt)) self._exit_statements = tuple(exit_statements) else: self._exit_statements = None # It will be generated when self.exit_statements is called # The default exit if lift_r.is_default_exit_constant == 1: self.default_exit_target = lift_r.default_exit else: self.default_exit_target = None # Data references self.data_refs = None if lift_r.data_ref_count > 0: if lift_r.data_ref_count > self.MAX_DATA_REFS: raise SkipStatementsError(f"data_ref_count exceeded MAX_DATA_REFS ({self.MAX_DATA_REFS})") self.data_refs = [DataRef.from_c(lift_r.data_refs[i]) for i in range(lift_r.data_ref_count)] # Const values self.const_vals = None if lift_r.const_val_count > 0: if lift_r.const_val_count > self.MAX_CONST_VALS: raise SkipStatementsError(f"const_val_count exceeded MAX_CONST_VALS ({self.MAX_CONST_VALS})") self.const_vals = [ConstVal.from_c(lift_r.const_vals[i]) for i in range(lift_r.const_val_count)] def _set_attributes( self, statements=None, nxt=None, tyenv=None, jumpkind=None, direct_next=None, size=None, instructions=None, instruction_addresses=None, exit_statements=None, default_exit_target=None, ): self.statements = statements if statements is not None else [] self.next = nxt if tyenv is not None: self.tyenv = tyenv self.jumpkind = jumpkind self._direct_next = direct_next self._size = size self._instructions = instructions self._instruction_addresses = instruction_addresses self._exit_statements = exit_statements self.default_exit_target = default_exit_target def _from_py(self, irsb): self._set_attributes( irsb.statements, irsb.next, irsb.tyenv, irsb.jumpkind, irsb.direct_next, irsb.size, instructions=irsb._instructions, instruction_addresses=irsb._instruction_addresses, exit_statements=irsb.exit_statements, default_exit_target=irsb.default_exit_target, ) class IRTypeEnv(VEXObject): """ An IR type environment. :ivar types: A list of the types of all the temporaries in this block as VEX enum strings. `types[3]` is the type of t3. :vartype types: list of str """ __slots__ = ["types", "wordty"] def __init__(self, arch, types=None): VEXObject.__init__(self) self.types = [] if types is None else types self.wordty = "Ity_I%d" % arch.bits def __str__(self): return " ".join(("t%d:%s" % (i, t)) for i, t in enumerate(self.types)) def lookup(self, tmp: int) -> str: """ Return the type of temporary variable `tmp` as an enum string """ if tmp < 0 or tmp > self.types_used: log.debug("Invalid temporary number %d", tmp) raise IndexError(tmp) return self.types[tmp] def sizeof(self, tmp): return get_type_size(self.lookup(tmp)) def add(self, ty): """ Add a new tmp of type `ty` to the environment. Returns the number of the new tmp. """ self.types.append(ty) return self.types_used - 1 @property def types_used(self): return len(self.types) @staticmethod def _from_c(arch, c_tyenv): return IRTypeEnv(arch, [get_enum_from_int(c_tyenv.types[t]) for t in range(c_tyenv.types_used)]) @staticmethod def _to_c(tyenv): c_tyenv = pvc.emptyIRTypeEnv() for ty in tyenv.types: pvc.newIRTemp(c_tyenv, get_int_from_enum(ty)) return c_tyenv def typecheck(self): for ty in self.types: try: get_type_size(ty) except ValueError: return False return True ================================================ FILE: pyvex/const.py ================================================ # pylint:disable=missing-class-docstring,raise-missing-from,not-callable import re from abc import ABC from .enums import VEXObject, get_enum_from_int from .errors import PyVEXError from .native import ffi, pvc # IRConst hierarchy class IRConst(VEXObject, ABC): __slots__ = ["_value"] type: str size: int tag: str c_constructor = None _value: int def pp(self): print(str(self)) @property def value(self) -> int: return self._value @staticmethod def _from_c(c_const): if c_const[0] == ffi.NULL: return None tag = get_enum_from_int(c_const.tag) try: return tag_to_const_class(tag)._from_c(c_const) except KeyError: raise PyVEXError("Unknown/unsupported IRConstTag %s\n" % tag) _translate = _from_c @classmethod def _to_c(cls, const): # libvex throws an exception when constructing a U1 with a value other than 0 or 1 if const.tag == "Ico_U1" and const.value not in (0, 1): raise PyVEXError("Invalid U1 value: %d" % const.value) try: return cls.c_constructor(const.value) except KeyError: raise PyVEXError("Unknown/unsupported IRConstTag %s]n" % const.tag) def __eq__(self, other): if not isinstance(other, type(self)): return False return self._value == other._value def __hash__(self): return hash((type(self), self._value)) class U1(IRConst): __slots__: list[str] = [] type = "Ity_I1" size = 1 tag = "Ico_U1" op_format = "1" c_constructor = pvc.IRConst_U1 def __init__(self, value): self._value = value def __str__(self): return "%d" % self.value @staticmethod def _from_c(c_const): return U1(c_const.Ico.U1) class U8(IRConst): __slots__: list[str] = [] type = "Ity_I8" size = 8 tag = "Ico_U8" op_format = "8" c_constructor = pvc.IRConst_U8 def __init__(self, value): self._value = value def __str__(self): return "0x%02x" % self.value @staticmethod def _from_c(c_const): return _U8_POOL[c_const.Ico.U8] _U8_POOL = [U8(i) for i in range(256)] class U16(IRConst): __slots__: list[str] = [] type = "Ity_I16" size = 16 tag = "Ico_U16" op_format = "16" c_constructor = pvc.IRConst_U16 def __init__(self, value): self._value = value def __str__(self): return "0x%04x" % self.value @staticmethod def _from_c(c_const): val = c_const.Ico.U16 if val < 1024: return _U16_POOL[val] if val >= 0xFC00: return _U16_POOL[val - 0xFC00 + 1024] return U16(val) _U16_POOL = [U16(i) for i in range(1024)] + [U16(i) for i in range(0xFC00, 0xFFFF + 1)] class U32(IRConst): __slots__: list[str] = [] type = "Ity_I32" size = 32 tag = "Ico_U32" op_format = "32" c_constructor = pvc.IRConst_U32 def __init__(self, value: int): self._value = value def __str__(self): return "0x%08x" % self.value @staticmethod def _from_c(c_const): val = c_const.Ico.U32 if val < 1024: return _U32_POOL[val] if val >= 0xFFFFFC00: return _U32_POOL[val - 0xFFFFFC00 + 1024] return U32(val) _U32_POOL = [U32(i) for i in range(1024)] + [U32(i) for i in range(0xFFFFFC00, 0xFFFFFFFF + 1)] class U64(IRConst): __slots__: list[str] = [] type = "Ity_I64" size = 64 tag = "Ico_U64" op_format = "64" c_constructor = pvc.IRConst_U64 def __init__(self, value): self._value = value def __str__(self): return "0x%016x" % self.value @staticmethod def _from_c(c_const): val = c_const.Ico.U64 if val < 1024: return _U64_POOL[val] if val >= 0xFFFFFFFFFFFFFC00: return _U64_POOL[val - 0xFFFFFFFFFFFFFC00 + 1024] return U64(val) _U64_POOL = [U64(i) for i in range(1024)] + [U64(i) for i in range(0xFFFFFFFFFFFFFC00, 0xFFFFFFFFFFFFFFFF + 1)] # Integer Type Imagination class_cache = {1: U1, 8: U8, 16: U16, 32: U32, 64: U64} def vex_int_class(size): try: return class_cache[size] except KeyError: class VexInt(IRConst): type = "Ity_I%d" % size tag = "Ico_U%d" % size op_format = str(size) def __init__(self, value): IRConst.__init__(self) self._value = value def __str__(self): return f"(0x{self.value:x} :: {self.type})" VexInt.__name__ = "U%d" % size class_cache[size] = VexInt return VexInt class F32(IRConst): __slots__: list[str] = [] type = "Ity_F32" tag = "Ico_F32" op_format = "F32" c_constructor = pvc.IRConst_F32 size = 32 def __init__(self, value): self._value = value def __str__(self): return "%f" % self.value @staticmethod def _from_c(c_const): return F32(c_const.Ico.F32) class F32i(IRConst): __slots__: list[str] = [] type = "Ity_F32" tag = "Ico_F32i" op_format = "F32" c_constructor = pvc.IRConst_F32i size = 32 def __init__(self, value): self._value = value def __str__(self): return "%f" % self.value @staticmethod def _from_c(c_const): return F32i(c_const.Ico.F32) class F64(IRConst): __slots__: list[str] = [] type = "Ity_F64" tag = "Ico_F64" op_format = "F64" c_constructor = pvc.IRConst_F64 size = 64 def __init__(self, value): self._value = value def __str__(self): return "%f" % self.value @staticmethod def _from_c(c_const): return F64(c_const.Ico.F64) class F64i(IRConst): __slots__: list[str] = [] type = "Ity_F64" tag = "Ico_F64i" op_format = "F64" c_constructor = pvc.IRConst_F64i size = 64 def __init__(self, value): self._value = value def __str__(self): return "%f" % self.value @staticmethod def _from_c(c_const): return F64i(c_const.Ico.F64) class V128(IRConst): __slots__: list[str] = [] type = "Ity_V128" tag = "Ico_V128" op_format = "V128" c_constructor = pvc.IRConst_V128 size = 128 def __init__(self, value): self._value = value def __str__(self): return "%x" % self.value # vex doesn't store a full 128 bit constant, instead it stores 1 bit per 8 bits of data # and duplicates each bit 8 times @staticmethod def _from_c(c_const): base_const = c_const.Ico.V128 real_const = 0 for i in range(16): if (base_const >> i) & 1 == 1: real_const |= 0xFF << (8 * i) return V128(real_const) class V256(IRConst): __slots__: list[str] = [] type = "Ity_V256" tag = "Ico_V256" op_format = "V256" c_constructor = pvc.IRConst_V256 size = 256 def __init__(self, value): self._value = value def __str__(self): return "%x" % self.value # see above @staticmethod def _from_c(c_const): base_const = c_const.Ico.V256 real_const = 0 for i in range(32): if (base_const >> i) & 1 == 1: real_const |= 0xFF << (8 * i) return V256(real_const) predefined_types = [U1, U8, U16, U32, U64, F32, F32i, F64, F64i, V128, V256] predefined_types_map = {c.type: c for c in predefined_types} predefined_classes_map = {c.tag: c for c in predefined_types} # precompiled regexes int_ty_re = re.compile(r"Ity_I\d+") int_tag_re = re.compile(r"Ico_U\d+") tag_size_re = re.compile(r"Ico_[UFV](?P\d+)i?") def is_int_ty(ty): m = int_ty_re.match(ty) return m is not None def is_int_tag(tag): m = int_tag_re.match(tag) return m is not None def get_tag_size(tag): m = tag_size_re.match(tag) if m is None: raise ValueError("Tag %s does not have size" % tag) return int(m.group("size")) type_str_re = re.compile(r"Ity_[IFDV](?P\d+)") type_tag_str_re = re.compile(r"[IFDV]?(?P\d+)[SU]?") def get_type_size(ty): """ Returns the size, in BITS, of a VEX type specifier e.g., Ity_I16 -> 16 :param ty: :return: """ m = type_str_re.match(ty) if m is None: raise ValueError("Type %s does not have size" % ty) return int(m.group("size")) def get_type_spec_size(ty): """ Get the width of a "type specifier" like I16U or F16 or just 16 (Yes, this really just takes the int out. If we must special-case, do it here. :param tyspec: :return: """ m = type_tag_str_re.match(ty) if m is None: raise ValueError("Type specifier %s does not have size" % ty) return int(m.group("size")) def ty_to_const_class(ty): try: return predefined_types_map[ty] except KeyError: if is_int_ty(ty): size = get_type_size(ty) return vex_int_class(size) else: raise ValueError("Type %s does not exist" % ty) def tag_to_const_class(tag): try: return predefined_classes_map[tag] except KeyError: if is_int_tag(tag): size = get_tag_size(tag) return vex_int_class(size) else: raise ValueError("Tag %s does not exist" % tag) ================================================ FILE: pyvex/const_val.py ================================================ class ConstVal: """ A constant value object. Indicates a constant value assignment to a VEX tmp variable. :ivar tmp: The tmp variable being assigned to. :ivar value: The value of the tmp variable. :ivar stmt_idx: The IRSB statement index containing the data access """ __slots__ = ( "tmp", "value", "stmt_idx", ) def __init__(self, tmp: int, value: int, stmt_idx: int): self.tmp = tmp self.value = value self.stmt_idx = stmt_idx def __repr__(self): return f"" @classmethod def from_c(cls, r): return cls(r.tmp, r.value, r.stmt_idx) ================================================ FILE: pyvex/data_ref.py ================================================ def data_ref_type_str(dref_enum): """ Translate an ``enum DataRefTypes`` value into a string representation. """ if dref_enum == 0x9000: return "unknown" elif dref_enum == 0x9001: return "integer" elif dref_enum == 0x9002: return "fp" elif dref_enum == 0x9003: return "integer(store)" else: return "INVALID" class DataRef: """ A data reference object. Indicates a data access in an IRSB. :ivar data_addr: The address of the data being accessed :ivar data_size: The size of the data being accessed, in bytes :ivar data_type: The type of the data, a DataRefTypes enum. :ivar stmt_idx: The IRSB statement index containing the data access :ivar ins_addr: The address of the instruction performing the data access """ __slots__ = ("data_addr", "data_size", "data_type", "stmt_idx", "ins_addr") def __init__(self, data_addr, data_size, data_type, stmt_idx, ins_addr): self.data_addr = data_addr self.data_size = data_size self.data_type = data_type self.stmt_idx = stmt_idx self.ins_addr = ins_addr @property def data_type_str(self): """ The data ref type as a string, "unknown" "integer" "fp" or "INVALID" """ return data_ref_type_str(self.data_type) def __repr__(self): return "" % ( self.data_addr, data_ref_type_str(self.data_type), self.data_size, self.ins_addr, self.stmt_idx, ) @classmethod def from_c(cls, r): return cls(r.data_addr, r.size, r.data_type, r.stmt_idx, r.ins_addr) ================================================ FILE: pyvex/enums.py ================================================ from typing import Any from .native import ffi, pvc from .utils import stable_hash class VEXObject: """ The base class for Vex types. """ __slots__: list[str] = [] def __eq__(self, other): if not isinstance(other, type(self)): return False # compare values in slots for slot in self.__slots__: if getattr(self, slot) != getattr(other, slot): return False return True def __hash__(self): values = [getattr(self, slot) for slot in self.__slots__] for i, lst_val in enumerate(values): if isinstance(lst_val, list): values[i] = tuple(lst_val) return stable_hash(tuple([type(self)] + values)) class IRCallee(VEXObject): """ Describes a helper function to call. """ __slots__ = ["regparms", "name", "mcx_mask"] def __init__(self, regparms, name, mcx_mask): VEXObject.__init__(self) self.regparms = regparms self.name = name self.mcx_mask = mcx_mask def __str__(self): return str(self.name) @staticmethod def _from_c(c_callee): return IRCallee( c_callee.regparms, ffi.string(c_callee.name).decode(), # NO. #int(ffi.cast("unsigned long long", c_callee.addr)), c_callee.mcx_mask, ) @staticmethod def _to_c(callee): # pylint: disable=unused-argument raise TypeError( "This doesn't work! Please invent a way to get the correct address for the named function from pyvex_c." ) # c_callee = pvc.mkIRCallee(callee.regparms, # callee.name.encode(), # ffi.cast("void *", callee.addr)) # c_callee.mcx_mask = callee.mcx_mask # return c_callee class IRRegArray(VEXObject): """ A section of the guest state that we want te be able to index at run time, so as to be able to describe indexed or rotating register files on the guest. :ivar int base: The offset into the state that this array starts :ivar str elemTy: The types of the elements in this array, as VEX enum strings :ivar int nElems: The number of elements in this array """ __slots__ = ["base", "elemTy", "nElems"] def __init__(self, base, elemTy, nElems): VEXObject.__init__(self) self.base = base self.elemTy = elemTy self.nElems = nElems def __str__(self): return "%s:%sx%d" % (self.base, self.elemTy[4:], self.nElems) @staticmethod def _from_c(c_arr): return IRRegArray(c_arr.base, ints_to_enums[c_arr.elemTy], c_arr.nElems) @staticmethod def _to_c(arr): return pvc.mkIRRegArray(arr.base, get_int_from_enum(arr.elemTy), arr.nElems) ints_to_enums: dict[int, str] = {} enums_to_ints: dict[str, int] = {} irop_enums_to_ints: dict[str, int] = {} will_be_overwritten = ["Ircr_GT", "Ircr_LT"] def get_enum_from_int(i): return ints_to_enums[i] def get_int_from_enum(e): return enums_to_ints[e] _add_enum_counter = 0 def _add_enum(s, i=None): # TODO get rid of this global _add_enum_counter # pylint: disable=global-statement if i is None: while _add_enum_counter in ints_to_enums: _add_enum_counter += 1 i = _add_enum_counter _add_enum_counter += 1 # Update for the next iteration if i in ints_to_enums: if ints_to_enums[i] not in will_be_overwritten: raise ValueError("Enum with intkey %d already present" % i) enums_to_ints[s] = i ints_to_enums[i] = s if s.startswith("Iop_"): irop_enums_to_ints[s] = i for attr in dir(pvc): if attr[0] in "ABCDEFGHIJKLMNOPQRSTUVWXYZ" and hasattr(pvc, attr) and isinstance(getattr(pvc, attr), int): _add_enum(attr, getattr(pvc, attr)) def vex_endness_from_string(endness_str): return getattr(pvc, endness_str) def default_vex_archinfo() -> dict[str, Any]: return { "hwcaps": 0, "endness": vex_endness_from_string("VexEndnessLE"), "ppc_icache_line_szB": 0, "ppc_dcbz_szB": 0, "ppc_dcbzl_szB": 0, "arm64_dMinLine_lg2_szB": 0, "arm64_iMinLine_lg2_szB": 0, "hwcache_info": { "num_levels": 0, "num_caches": 0, "caches": None, "icaches_maintain_coherence": True, }, "x86_cr0": 0xFFFFFFFF, } ================================================ FILE: pyvex/errors.py ================================================ class PyVEXError(Exception): pass class SkipStatementsError(PyVEXError): pass # # Exceptions and notifications that post-processors can raise # class LiftingException(Exception): pass class NeedStatementsNotification(LiftingException): """ A post-processor may raise a NeedStatementsNotification if it needs to work with statements, but the current IRSB is generated without any statement available (skip_stmts=True). The lifter will re-lift the current block with skip_stmts=False upon catching a NeedStatementsNotification, and re-run the post-processors. It's worth noting that if a post-processor always raises this notification for every basic block without statements, it will essentially disable the skipping statement optimization, and it is bad for performance (especially for CFGFast, which heavily relies on this optimization). Post-processor authors are encouraged to at least filter the IRSBs based on available properties (jumpkind, next, etc.). If a post-processor must work with statements for the majority of IRSBs, the author should implement it in PyVEX in C for the sake of a better performance. """ pass ================================================ FILE: pyvex/expr.py ================================================ from __future__ import annotations import logging import re from typing import TYPE_CHECKING from .const import U8, U16, U32, U64, IRConst, get_type_size from .enums import IRCallee, IRRegArray, VEXObject, get_enum_from_int, get_int_from_enum from .errors import PyVEXError from .native import ffi, pvc if TYPE_CHECKING: from .block import IRTypeEnv log = logging.getLogger("pyvex.expr") class IRExpr(VEXObject): """ IR expressions in VEX represent operations without side effects. """ __slots__ = [] tag: str | None = None tag_int = 0 # set automatically at bottom of file def pp(self): print(str(self)) def __str__(self): return self._pp_str() def _pp_str(self) -> str: raise NotImplementedError @property def child_expressions(self) -> list[IRExpr]: """ A list of all of the expressions that this expression ends up evaluating. """ expressions = [] for k in self.__slots__: v = getattr(self, k) if isinstance(v, IRExpr): expressions.append(v) expressions.extend(v.child_expressions) return expressions @property def constants(self): """ A list of all of the constants that this expression ends up using. """ constants = [] for k in self.__slots__: v = getattr(self, k) if isinstance(v, IRExpr): constants.extend(v.constants) elif isinstance(v, IRConst): constants.append(v) return constants def result_size(self, tyenv: IRTypeEnv): return get_type_size(self.result_type(tyenv)) def result_type(self, tyenv: IRTypeEnv): raise NotImplementedError() def replace_expression(self, replacements): """ Replace child expressions in-place. :param Dict[IRExpr, IRExpr] replacements: A mapping from expression-to-find to expression-to-replace-with :return: None """ for k in self.__slots__: v = getattr(self, k) if isinstance(v, IRExpr) and v in replacements: setattr(self, k, replacements.get(v)) elif isinstance(v, list): # Replace the instance in the list for i, expr_ in enumerate(v): if isinstance(expr_, IRExpr) and expr_ in replacements: v[i] = replacements.get(expr_) elif type(v) is tuple: # Rebuild the tuple _lst = [] replaced = False for i, expr_ in enumerate(v): if isinstance(expr_, IRExpr) and expr_ in replacements: _lst.append(replacements.get(expr_)) replaced = True else: _lst.append(expr_) if replaced: setattr(self, k, tuple(_lst)) elif isinstance(v, IRExpr): v.replace_expression(replacements) @staticmethod def _from_c(c_expr) -> IRExpr | None: if c_expr == ffi.NULL or c_expr[0] == ffi.NULL: return None try: return enum_to_expr_class(c_expr.tag)._from_c(c_expr) except KeyError: raise PyVEXError("Unknown/unsupported IRExprTag %s\n" % get_enum_from_int(c_expr.tag)) _translate = _from_c @staticmethod def _to_c(expr): try: return tag_to_expr_class(expr.tag)._to_c(expr) except KeyError: raise PyVEXError("Unknown/unsupported IRExprTag %s\n" % expr.tag) def typecheck(self, tyenv): return self.result_type(tyenv) class Binder(IRExpr): """ Used only in pattern matching within Vex. Should not be seen outside of Vex. """ __slots__ = ["binder"] tag = "Iex_Binder" def __init__(self, binder): self.binder = binder def _pp_str(self): return "Binder" @staticmethod def _from_c(c_expr): return Binder(c_expr.iex.Binder.binder) @staticmethod def _to_c(expr): return pvc.IRExpr_Binder(expr.binder) def result_type(self, tyenv): return "Ity_INVALID" class VECRET(IRExpr): tag = "Iex_VECRET" __slots__ = [] def _pp_str(self): return "VECRET" @staticmethod def _from_c(c_expr): return VECRET() @staticmethod def _to_c(expr): return pvc.IRExpr_VECRET() def result_type(self, tyenv): return "Ity_INVALID" class GSPTR(IRExpr): __slots__ = [] tag = "Iex_GSPTR" def _pp_str(self): return "GSPTR" @staticmethod def _from_c(c_expr): return GSPTR() @staticmethod def _to_c(expr): return pvc.IRExpr_GSPTR() def result_type(self, tyenv): return "Ity_INVALID" class GetI(IRExpr): """ Read a guest register at a non-fixed offset in the guest state. """ __slots__ = ["descr", "ix", "bias"] tag = "Iex_GetI" def __init__(self, descr, ix, bias): self.descr = descr self.ix = ix self.bias = bias @property def description(self): return self.descr @property def index(self): return self.ix def _pp_str(self): return f"GetI({self.descr})[{self.ix},{self.bias}]" @staticmethod def _from_c(c_expr): descr = IRRegArray._from_c(c_expr.Iex.GetI.descr) ix = IRExpr._from_c(c_expr.Iex.GetI.ix) bias = c_expr.Iex.GetI.bias return GetI(descr, ix, bias) @staticmethod def _to_c(expr): return pvc.IRExpr_GetI(IRRegArray._to_c(expr.descr), IRExpr._to_c(expr.ix), expr.bias) def result_type(self, tyenv): return self.descr.elemTy class RdTmp(IRExpr): """ Read the value held by a temporary. """ __slots__ = ["_tmp"] tag = "Iex_RdTmp" def __init__(self, tmp): self._tmp = tmp def _pp_str(self): return "t%d" % self.tmp @property def tmp(self): return self._tmp @staticmethod def _from_c(c_expr): tmp = c_expr.Iex.RdTmp.tmp return RdTmp.get_instance(tmp) @staticmethod def _to_c(expr): return pvc.IRExpr_RdTmp(expr.tmp) @staticmethod def get_instance(tmp): if tmp < 1024: # for small tmp reads, they are cached and are only created once globally return _RDTMP_POOL[tmp] return RdTmp(tmp) def replace_expression(self, replacements): # RdTmp is one of the terminal IRExprs, which cannot be replaced. pass def result_type(self, tyenv): return tyenv.lookup(self.tmp) def __hash__(self): return 133700 + self._tmp _RDTMP_POOL = list(RdTmp(i) for i in range(0, 1024)) class Get(IRExpr): """ Read a guest register, at a fixed offset in the guest state. """ __slots__ = ["offset", "ty_int"] tag = "Iex_Get" def __init__(self, offset: int, ty: str, ty_int: int | None = None): self.offset = offset if ty_int is None: self.ty_int = get_int_from_enum(ty) else: self.ty_int = ty_int @property def ty(self): return get_enum_from_int(self.ty_int) @property def type(self): return get_enum_from_int(self.ty_int) def _pp_str(self): return f"GET:{self.ty[4:]}(offset={self.offset})" def pp_str_with_name(self, reg_name: str): """pp_str_with_name is used to print the expression with the name of the register instead of the offset""" return f"GET:{self.ty[4:]}({reg_name})" @staticmethod def _from_c(c_expr): return Get(c_expr.Iex.Get.offset, get_enum_from_int(c_expr.Iex.Get.ty)) @staticmethod def _to_c(expr): return pvc.IRExpr_Get(expr.offset, expr.ty_int) def result_type(self, tyenv): return self.ty def __hash__(self): return (self.offset << 8) | self.ty_int class Qop(IRExpr): """ A quaternary operation (4 arguments). """ __slots__ = ["op", "args"] tag = "Iex_Qop" def __init__(self, op, args): self.op = op self.args = args def _pp_str(self): return "{}({})".format(self.op[4:], ",".join(str(a) for a in self.args)) @property def child_expressions(self): expressions = sum((a.child_expressions for a in self.args), []) expressions.extend(self.args) return expressions @staticmethod def _from_c(c_expr): return Qop( get_enum_from_int(c_expr.Iex.Qop.details.op), [ IRExpr._from_c(arg) for arg in [ c_expr.Iex.Qop.details.arg1, c_expr.Iex.Qop.details.arg2, c_expr.Iex.Qop.details.arg3, c_expr.Iex.Qop.details.arg4, ] ], ) @staticmethod def _to_c(expr): return pvc.IRExpr_Qop(get_int_from_enum(expr.op), *[IRExpr._to_c(arg) for arg in expr.args]) def result_type(self, tyenv): return get_op_retty(self.op) def typecheck(self, tyenv): # TODO change all this to use PyvexTypeErrorException resty, (arg1ty, arg2ty, arg3ty, arg4ty) = op_arg_types(self.op) arg1ty_real = self.args[0].typecheck(tyenv) arg2ty_real = self.args[1].typecheck(tyenv) arg3ty_real = self.args[2].typecheck(tyenv) arg4ty_real = self.args[3].typecheck(tyenv) if arg1ty_real is None or arg2ty_real is None or arg3ty_real is None or arg4ty_real is None: return None if arg1ty_real != arg1ty: log.debug("First arg of %s must be %s", self.op, arg1ty) return None if arg2ty_real != arg2ty: log.debug("Second arg of %s must be %s", self.op, arg2ty) return None if arg3ty_real != arg3ty: log.debug("Third arg of %s must be %s", self.op, arg3ty) return None if arg4ty_real != arg4ty: log.debug("Fourth arg of %s must be %s", self.op, arg4ty) return None return resty class Triop(IRExpr): """ A ternary operation (3 arguments) """ __slots__ = ["op", "args"] tag = "Iex_Triop" def __init__(self, op, args): self.op = op self.args = args def _pp_str(self): return "{}({})".format(self.op[4:], ",".join(str(a) for a in self.args)) @property def child_expressions(self): expressions = sum((a.child_expressions for a in self.args), []) expressions.extend(self.args) return expressions @staticmethod def _from_c(c_expr): return Triop( get_enum_from_int(c_expr.Iex.Triop.details.op), [ IRExpr._from_c(arg) for arg in [c_expr.Iex.Triop.details.arg1, c_expr.Iex.Triop.details.arg2, c_expr.Iex.Triop.details.arg3] ], ) @staticmethod def _to_c(expr): return pvc.IRExpr_Triop(get_int_from_enum(expr.op), *[IRExpr._to_c(arg) for arg in expr.args]) def result_type(self, tyenv): return get_op_retty(self.op) def typecheck(self, tyenv): resty, (arg1ty, arg2ty, arg3ty) = op_arg_types(self.op) arg1ty_real = self.args[0].typecheck(tyenv) arg2ty_real = self.args[1].typecheck(tyenv) arg3ty_real = self.args[2].typecheck(tyenv) if arg1ty_real is None or arg2ty_real is None or arg3ty_real is None: return None if arg1ty_real != arg1ty: log.debug("First arg of %s must be %s", self.op, arg1ty) return None if arg2ty_real != arg2ty: log.debug("Second arg of %s must be %s", self.op, arg2ty) return None if arg3ty_real != arg3ty: log.debug("Third arg of %s must be %s", self.op, arg3ty) return None return resty class Binop(IRExpr): """ A binary operation (2 arguments). """ __slots__ = ["_op", "op_int", "args"] tag = "Iex_Binop" def __init__(self, op, args, op_int=None): self.op_int = op_int self.args = args self._op = op if op is not None else None def _pp_str(self): return "{}({})".format(self.op[4:], ",".join(str(a) for a in self.args)) @property def op(self): if self._op is None: self._op = get_enum_from_int(self.op_int) return self._op @property def child_expressions(self): expressions = sum((a.child_expressions for a in self.args), []) expressions.extend(self.args) return expressions @staticmethod def _from_c(c_expr): return Binop( None, [IRExpr._from_c(arg) for arg in [c_expr.Iex.Binop.arg1, c_expr.Iex.Binop.arg2]], op_int=c_expr.Iex.Binop.op, ) @staticmethod def _to_c(expr): return pvc.IRExpr_Binop(get_int_from_enum(expr.op), *[IRExpr._to_c(arg) for arg in expr.args]) def result_type(self, tyenv): return get_op_retty(self.op) def typecheck(self, tyenv): arg1ty_real = self.args[0].typecheck(tyenv) arg2ty_real = self.args[1].typecheck(tyenv) resty, (arg1ty, arg2ty) = op_arg_types(self.op) if arg1ty_real is None or arg2ty_real is None: return None if arg1ty_real != arg1ty: log.debug("First arg of %s must be %s", self.op, arg1ty) return None if arg2ty_real != arg2ty: log.debug("Second arg of %s must be %s", self.op, arg2ty) return None return resty class Unop(IRExpr): """ A unary operation (1 argument). """ __slots__ = ["op", "args"] tag = "Iex_Unop" def __init__(self, op: str, args: list[IRExpr]): self.op = op self.args = args def _pp_str(self): return "{}({})".format(self.op[4:], ",".join(str(a) for a in self.args)) @property def child_expressions(self): expressions = sum((a.child_expressions for a in self.args), []) expressions.extend(self.args) return expressions @staticmethod def _from_c(c_expr): return Unop(get_enum_from_int(c_expr.Iex.Unop.op), [IRExpr._from_c(c_expr.Iex.Unop.arg)]) @staticmethod def _to_c(expr): return pvc.IRExpr_Unop(get_int_from_enum(expr.op), IRExpr._to_c(expr.args[0])) def result_type(self, tyenv): return get_op_retty(self.op) def typecheck(self, tyenv): resty, (arg1ty,) = op_arg_types(self.op) arg1ty_real = self.args[0].typecheck(tyenv) if arg1ty_real is None: return None if arg1ty_real != arg1ty: log.debug("First arg of %s must be %s", self.op, arg1ty) return None return resty class Load(IRExpr): """ A load from memory. """ __slots__ = ["end", "ty", "addr"] tag = "Iex_Load" def __init__(self, end, ty, addr): self.end = end self.ty = ty self.addr = addr @property def endness(self): return self.end @property def type(self): return self.ty def _pp_str(self): return f"LD{self.end[-2:].lower()}:{self.ty[4:]}({self.addr})" @staticmethod def _from_c(c_expr): return Load( get_enum_from_int(c_expr.Iex.Load.end), get_enum_from_int(c_expr.Iex.Load.ty), IRExpr._from_c(c_expr.Iex.Load.addr), ) @staticmethod def _to_c(expr): return pvc.IRExpr_Load(get_int_from_enum(expr.end), get_int_from_enum(expr.ty), IRExpr._to_c(expr.addr)) def result_type(self, tyenv): return self.ty def typecheck(self, tyenv): addrty = self.addr.typecheck(tyenv) if addrty is None: return None if addrty != tyenv.wordty: log.debug("Address must be word-sized") return None return self.ty class Const(IRExpr): """ A constant expression. """ __slots__ = ["_con"] tag = "Iex_Const" def __init__(self, con: IRConst): self._con = con def _pp_str(self): return str(self.con) @property def con(self) -> IRConst: return self._con @staticmethod def _from_c(c_expr): con = IRConst._from_c(c_expr.Iex.Const.con) return Const.get_instance(con) @staticmethod def _to_c(expr): return pvc.IRExpr_Const(IRConst._to_c(expr.con)) @staticmethod def get_instance(con): if con.value < 1024 and con.__class__ in _CONST_POOL: return _CONST_POOL[con.__class__][con.value] return Const(con) def result_type(self, tyenv): return self.con.type _CONST_POOL = { U8: [Const(U8(i)) for i in range(0, 1024)], U16: [Const(U16(i)) for i in range(0, 1024)], U32: [Const(U32(i)) for i in range(0, 1024)], U64: [Const(U64(i)) for i in range(0, 1024)], } class ITE(IRExpr): """ An if-then-else expression. """ __slots__ = ["cond", "iffalse", "iftrue"] tag = "Iex_ITE" def __init__(self, cond, iffalse, iftrue): self.cond = cond self.iffalse = iffalse self.iftrue = iftrue def _pp_str(self): return f"ITE({self.cond},{self.iftrue},{self.iffalse})" @staticmethod def _from_c(c_expr): return ITE( IRExpr._from_c(c_expr.Iex.ITE.cond), IRExpr._from_c(c_expr.Iex.ITE.iffalse), IRExpr._from_c(c_expr.Iex.ITE.iftrue), ) @staticmethod def _to_c(expr): return pvc.IRExpr_ITE(IRExpr._to_c(expr.cond), IRExpr._to_c(expr.iftrue), IRExpr._to_c(expr.iffalse)) def result_type(self, tyenv): return self.iftrue.result_type(tyenv) def typecheck(self, tyenv): condty = self.cond.typecheck(tyenv) falsety = self.iffalse.typecheck(tyenv) truety = self.iftrue.typecheck(tyenv) if condty is None or falsety is None or truety is None: return None if condty != "Ity_I1": log.debug("guard must be Ity_I1") return None if falsety != truety: log.debug("false condition must be same type as true condition") return None return falsety class CCall(IRExpr): """ A call to a pure (no side-effects) helper C function. """ __slots__ = ["retty", "cee", "args"] tag = "Iex_CCall" def __init__(self, retty, cee, args): self.retty = retty self.cee = cee self.args = tuple(args) @property def ret_type(self): return self.retty @property def callee(self): return self.cee def _pp_str(self): return "{}({}):{}".format(self.cee, ",".join(str(a) for a in self.args), self.retty) @property def child_expressions(self): expressions = sum((a.child_expressions for a in self.args), []) expressions.extend(self.args) return expressions @staticmethod def _from_c(c_expr): i = 0 args = [] while True: arg = c_expr.Iex.CCall.args[i] if arg == ffi.NULL: break args.append(IRExpr._from_c(arg)) i += 1 return CCall(get_enum_from_int(c_expr.Iex.CCall.retty), IRCallee._from_c(c_expr.Iex.CCall.cee), tuple(args)) @staticmethod def _to_c(expr): args = [IRExpr._to_c(arg) for arg in expr.args] mkIRExprVec = getattr(pvc, "mkIRExprVec_%d" % len(args)) return pvc.IRExpr_CCall(IRCallee._to_c(expr.cee), get_int_from_enum(expr.retty), mkIRExprVec(*args)) def result_type(self, tyenv): return self.retty def get_op_retty(op): return op_arg_types(op)[0] op_signatures: dict[str, tuple[str, tuple[str, ...]]] = {} def _request_op_type_from_cache(op): return op_signatures[op] def _request_op_type_from_libvex(op): Ity_INVALID = 0x1100 # as defined in enum IRType in VEX res_ty = ffi.new("IRType *") arg_tys = [ffi.new("IRType *") for _ in range(4)] # initialize all IRTypes to Ity_INVALID for arg in arg_tys: arg[0] = Ity_INVALID pvc.typeOfPrimop(get_int_from_enum(op), res_ty, *arg_tys) arg_ty_vals = [a[0] for a in arg_tys] try: numargs = arg_ty_vals.index(Ity_INVALID) except ValueError: numargs = 4 args_tys_list = [get_enum_from_int(arg_ty_vals[i]) for i in range(numargs)] op_ty_sig = (get_enum_from_int(res_ty[0]), tuple(args_tys_list)) op_signatures[op] = op_ty_sig return op_ty_sig class PyvexOpMatchException(Exception): pass class PyvexTypeErrorException(Exception): pass def int_type_for_size(size): return "Ity_I%d" % size # precompiled regexes unop_signature_re = re.compile(r"Iop_(Not|Ctz|Clz)(?P\d+)$") binop_signature_re = re.compile(r"Iop_(Add|Sub|Mul|Xor|Or|And|Div[SU]|Mod)(?P\d+)$") shift_signature_re = re.compile(r"Iop_(Shl|Shr|Sar)(?P\d+)$") cmp_signature_re_1 = re.compile(r"Iop_Cmp(EQ|NE)(?P\d+)$") cmp_signature_re_2 = re.compile(r"Iop_Cmp(GT|GE|LT|LE)(?P\d+)[SU]$") mull_signature_re = re.compile(r"Iop_Mull[SU](?P\d+)$") half_signature_re = re.compile(r"Iop_DivMod[SU](?P\d+)to(?P\d+)$") cast_signature_re = re.compile(r"Iop_(?P\d+)(U|S|HI|HL)?to(?P\d+)") def unop_signature(op): m = unop_signature_re.match(op) if m is None: raise PyvexOpMatchException() size = int(m.group("size")) size_type = int_type_for_size(size) return size_type, (size_type,) def binop_signature(op): m = binop_signature_re.match(op) if m is None: raise PyvexOpMatchException() size = int(m.group("size")) size_type = int_type_for_size(size) return (size_type, (size_type, size_type)) def shift_signature(op): m = shift_signature_re.match(op) if m is None: raise PyvexOpMatchException() size = int(m.group("size")) if size > 255: raise PyvexTypeErrorException("Cannot apply shift operation to %d size int because shift index is 8-bit" % size) size_type = int_type_for_size(size) return (size_type, (size_type, int_type_for_size(8))) def cmp_signature(op): m = cmp_signature_re_1.match(op) m2 = cmp_signature_re_2.match(op) if (m is None) == (m2 is None): raise PyvexOpMatchException() mfound = m if m is not None else m2 assert mfound is not None size = int(mfound.group("size")) size_type = int_type_for_size(size) return (int_type_for_size(1), (size_type, size_type)) def mull_signature(op): m = mull_signature_re.match(op) if m is None: raise PyvexOpMatchException() size = int(m.group("size")) size_type = int_type_for_size(size) doubled_size_type = int_type_for_size(2 * size) return (doubled_size_type, (size_type, size_type)) def half_signature(op): m = half_signature_re.match(op) if m is None: raise PyvexOpMatchException() fullsize = int(m.group("fullsize")) halfsize = int(m.group("halfsize")) if halfsize * 2 != fullsize: raise PyvexTypeErrorException("Invalid Instruction %s: Type 1 must be twice the size of type 2" % op) fullsize_type = int_type_for_size(fullsize) halfsize_type = int_type_for_size(halfsize) return (fullsize_type, (fullsize_type, halfsize_type)) def cast_signature(op): m = cast_signature_re.match(op) if m is None: raise PyvexOpMatchException() src_type = int_type_for_size(int(m.group("srcsize"))) dst_type = int_type_for_size(int(m.group("dstsize"))) return (dst_type, (src_type,)) polymorphic_op_processors = [ unop_signature, binop_signature, shift_signature, cmp_signature, mull_signature, half_signature, cast_signature, ] def _request_polymorphic_op_type(op): for polymorphic_signature in polymorphic_op_processors: try: op_ty_sig = polymorphic_signature(op) break except PyvexOpMatchException: continue else: raise PyvexOpMatchException("Op %s not recognized" % op) return op_ty_sig _request_funcs = [_request_op_type_from_cache, _request_op_type_from_libvex, _request_polymorphic_op_type] def op_arg_types(op): for _request_func in _request_funcs: try: return _request_func(op) except KeyError: continue raise ValueError("Cannot find type of op %s" % op) _globals = globals().copy() # # Mapping from tag strings/enums to IRExpr classes # tag_to_expr_mapping = {} enum_to_expr_mapping = {} tag_count = 0 cls = None for cls in _globals.values(): if type(cls) is type and issubclass(cls, IRExpr) and cls is not IRExpr: tag_to_expr_mapping[cls.tag] = cls enum_to_expr_mapping[get_int_from_enum(cls.tag)] = cls cls.tag_int = tag_count tag_count += 1 del cls def tag_to_expr_class(tag): """ Convert a tag string to the corresponding IRExpr class type. :param str tag: The tag string. :return: A class. :rtype: type """ try: return tag_to_expr_mapping[tag] except KeyError: raise KeyError("Cannot find expression class for type %s." % tag) def enum_to_expr_class(tag_enum): """ Convert a tag enum to the corresponding IRExpr class. :param int tag_enum: The tag enum. :return: A class. :rtype: type """ try: return enum_to_expr_mapping[tag_enum] except KeyError: raise KeyError("Cannot find expression class for type %s." % get_enum_from_int(tag_enum)) ================================================ FILE: pyvex/lifting/__init__.py ================================================ from .gym import AARCH64Spotter, AMD64Spotter, ARMSpotter, X86Spotter from .libvex import LIBVEX_SUPPORTED_ARCHES, LibVEXLifter from .lift_function import lift, lifters, register from .lifter import Lifter from .post_processor import Postprocessor from .zerodivision import ZeroDivisionPostProcessor for arch in LIBVEX_SUPPORTED_ARCHES: register(LibVEXLifter, arch) register(AARCH64Spotter, "AARCH64") register(ARMSpotter, "ARM") register(ARMSpotter, "ARMEL") register(ARMSpotter, "ARMHF") register(ARMSpotter, "ARMCortexM") register(AMD64Spotter, "AMD64") register(X86Spotter, "X86") __all__ = ["Lifter", "Postprocessor", "lift", "register", "lifters", "ZeroDivisionPostProcessor"] ================================================ FILE: pyvex/lifting/gym/README.md ================================================ # The Gym This is where we're putting non-libvex lifters that we feel should be included with the pyvex distribution. These will probably be mostly "spotters", which correct for gaps in libvex's instruction support. ================================================ FILE: pyvex/lifting/gym/__init__.py ================================================ from .aarch64_spotter import AARCH64Spotter from .arm_spotter import ARMSpotter from .x86_spotter import AMD64Spotter, X86Spotter __all__ = ("ARMSpotter", "AARCH64Spotter", "X86Spotter", "AMD64Spotter") ================================================ FILE: pyvex/lifting/gym/aarch64_spotter.py ================================================ import logging from pyvex.lifting.util.instr_helper import Instruction from pyvex.lifting.util.lifter_helper import GymratLifter log = logging.getLogger(__name__) class Aarch64Instruction(Instruction): # pylint: disable=abstract-method # NOTE: WARNING: There is no MRS, MSR, SYSL in VEX's ARM implementation # You must use straight nasty hacks instead. pass class Instruction_SYSL(Aarch64Instruction): name = "SYSL" bin_format = "1101010100101qqqnnnnmmmmppprrrrr" def compute_result(self): # pylint: disable=arguments-differ log.debug("Ignoring SYSL instruction at %#x.", self.addr) class Instruction_MSR(Aarch64Instruction): name = "MSR" bin_format = "11010101000ioqqqnnnnmmmmppprrrrr" def compute_result(self): # pylint: disable=arguments-differ log.debug("Ignoring MSR instruction at %#x.", self.addr) class Instruction_MRS(Aarch64Instruction): name = "MRS" bin_format = "110101010011opppnnnnmmmmppprrrrr" def compute_result(self): # pylint: disable=arguments-differ log.debug("Ignoring MRS instruction at %#x.", self.addr) class AARCH64Spotter(GymratLifter): instrs = [Instruction_MRS, Instruction_MSR, Instruction_SYSL] ================================================ FILE: pyvex/lifting/gym/arm_spotter.py ================================================ import logging import bitstring from pyvex.lifting.util import JumpKind, Type from pyvex.lifting.util.instr_helper import Instruction, ParseError from pyvex.lifting.util.lifter_helper import GymratLifter from pyvex.types import Arch log = logging.getLogger(__name__) class ARMInstruction(Instruction): # pylint: disable=abstract-method # NOTE: WARNING: There is no CPSR in VEX's ARM implementation # You must use straight nasty hacks instead. # NOTE 2: Something is goofy w/r/t archinfo and VEX; cc_op3 is used in ccalls, but there's # no cc_op3 in archinfo, angr itself uses cc_depn instead. We do the same. def match_instruction(self, data, bitstrm): """ ARM Instructions are pretty dense, so let's do what we can to weed them out """ if "c" not in data or data["c"] == "1111": raise ParseError("Invalid ARM Instruction") def get_N(self): cc_op = self.get("cc_op", Type.int_32) cc_dep1 = self.get("cc_dep1", Type.int_32) cc_dep2 = self.get("cc_dep2", Type.int_32) cc_depn = self.get("cc_ndep", Type.int_32) return self.ccall(Type.int_32, "armg_calculate_flag_n", [cc_op, cc_dep1, cc_dep2, cc_depn]) def get_C(self): cc_op = self.get("cc_op", Type.int_32) cc_dep1 = self.get("cc_dep1", Type.int_32) cc_dep2 = self.get("cc_dep2", Type.int_32) cc_depn = self.get("cc_ndep", Type.int_32) return self.ccall(Type.int_32, "armg_calculate_flag_c", [cc_op, cc_dep1, cc_dep2, cc_depn]) def get_V(self): cc_op = self.get("cc_op", Type.int_32) cc_dep1 = self.get("cc_dep1", Type.int_32) cc_dep2 = self.get("cc_dep2", Type.int_32) cc_depn = self.get("cc_ndep", Type.int_32) return self.ccall(Type.int_32, "armg_calculate_flag_v", [cc_op, cc_dep1, cc_dep2, cc_depn]) def get_Z(self): cc_op = self.get("cc_op", Type.int_32) cc_dep1 = self.get("cc_dep1", Type.int_32) cc_dep2 = self.get("cc_dep2", Type.int_32) cc_depn = self.get("cc_ndep", Type.int_32) return self.ccall(Type.int_32, "armg_calculate_flag_z", [cc_op.rdt, cc_dep1.rdt, cc_dep2.rdt, cc_depn.rdt]) def evaluate_condition(self): # condition codes should be in 'c' cond = self.data["c"] if cond == "0000": # equal, z set return self.get_Z() == 1 elif cond == "0001": # not equal, Z clear return self.get_Z() == 0 elif cond == "0010": # Carry, C set return self.get_C() == 1 elif cond == "0011": # Carry Clear, C clear return self.get_C() == 0 elif cond == "0100": # MI / neagative / N set return self.get_N() == 1 elif cond == "0101": # PL / plus / positive / N clear return self.get_N() == 0 elif cond == "0110": # VS / V set / Overflow return self.get_V() == 1 elif cond == "0111": # VC / V Clear / no overflow return self.get_V() == 0 elif cond == "1000": # Hi / unsigned higher / C set, Z clear return (self.get_C() == 1) & (self.get_Z() == 0) elif cond == "1001": # LS / C clear, Z set return (self.get_C() == 0) & (self.get_Z() == 1) elif cond == "1011": # LT / Less than / N != V return self.get_N() != self.get_V() elif cond == "1100": # GT / greater than / Z clear and (n == v) return (self.get_Z() == 1) & (self.get_N() != self.get_V()) elif cond == "1101": # LE / less than or equal to / Z set OR (N != V) return (self.get_Z() == 1) | (self.get_N() != self.get_V()) else: # No condition return None def _load_le_instr(self, bitstream: bitstring.ConstBitStream, numbits: int) -> str: # THUMB mode instructions swap endianness every two bytes! if (self.addr & 1) == 1 and numbits > 16: chunk = "" oldpos = bitstream.pos try: for _ in range(0, numbits, 16): chunk += bitstring.Bits(uint=bitstream.peek("uintle:%d" % 16), length=16).bin bitstream.pos += 16 finally: bitstream.pos = oldpos return chunk return super()._load_le_instr(bitstream, numbits) class Instruction_MRC(ARMInstruction): name = "MRC" bin_format = "cccc1110CCC1nnnnddddppppOOOOOOOO" # 11101110000100010001111100010000 # c = cond # C = Coprocessor operation mode # d = CPd # O = Offset # p = CP# def compute_result(self): # pylint: disable=arguments-differ # TODO at least look at the conditionals # TODO Clobber the dst reg of MCR # TODO maybe treat coproc regs as simple storage (even though they are very much not) log.debug("Ignoring MRC instruction at %#x.", self.addr) class Instruction_MCR(ARMInstruction): name = "MCR" bin_format = "cccc1110CCC0nnnnddddppppOOOOOOOO" # 11101110000000010000111100010000 # c = cond # C = Coprocessor operation mode # d = CPd # O = Offset # p = CP# def compute_result(self): # pylint: disable=arguments-differ # TODO at least look at the conditionals # TODO Clobber the dst reg of MCR # TODO maybe treat coproc regs as simple storage (even though they are very much not) log.debug("Ignoring MCR instruction at %#x.", self.addr) class Instruction_MSR(ARMInstruction): name = "MSR" bin_format = "cccc00i10d10xxxj1111ssssssssssss" # 11100011001000011111000010010001 # 11100001011011111111000000000001 def compute_result(self): # pylint: disable=arguments-differ log.debug( "Ignoring MSR instruction at %#x. VEX cannot support this instruction. " "See pyvex/lifting/gym/arm_spotter.py", self.addr, ) class Instruction_MRS(ARMInstruction): name = "MRS" bin_format = "cccc00010s001111dddd000000000000" def compute_result(self): # pylint: disable=arguments-differ log.debug( "Ignoring MRS instruction at %#x. VEX cannot support this instruction. " "See pyvex/lifting/gym/arm_spotter.py", self.addr, ) class Instruction_STM(ARMInstruction): name = "STM" bin_format = "cccc100pu1w0bbbbrrrrrrrrrrrrrrrr" def match_instruction(self, data, bitstrm): # If we don't push anything, that's not real if int(data["r"]) == 0: raise ParseError("Invalid STM instruction") return True def compute_result(self): # pylint: disable=arguments-differ log.debug( "Ignoring STMxx ^ instruction at %#x. This mode is not implemented by VEX! " "See pyvex/lifting/gym/arm_spotter.py", self.addr, ) class Instruction_LDM(ARMInstruction): name = "LDM" bin_format = "cccc100PU1W1bbbbrrrrrrrrrrrrrrrr" def match_instruction(self, data, bitstrm): # If we don't push anything, that's not real if int(data["r"]) == 0: raise ParseError("Invalid LDM instruction") return True def compute_result(self): # pylint: disable=arguments-differ # test if PC will be set. If so, the jumpkind of this block should be Ijk_Ret log.debug("Spotting an LDM instruction at %#x. This is not fully tested. Prepare for errors.", self.addr) src_n = f"r{int(self.data['b'], 2)}" src = self.get(src_n, Type.int_32) for reg_num, bit in enumerate(self.data["r"]): reg_num = 15 - reg_num if bit == "1": if self.data["P"] == "1": if self.data["U"] == "0": src += 4 else: src -= 4 val = self.load(src, Type.int_32) self.put(val, f"r{reg_num}") if self.data["P"] == "0": if self.data["U"] == "0": src += 4 else: src -= 4 # If we touch PC, we're doing a RET! if reg_num == 15 and bit == "1": cond = self.evaluate_condition() if cond is not None: self.jump(cond, val, JumpKind.Ret) else: self.jump(None, val, JumpKind.Ret) # Write-back if self.data["W"] == "1": self.put(src, src_n) class Instruction_STC(ARMInstruction): name = "STC" bin_format = "cccc110PUNW0nnnnddddppppOOOOOOOO" def compute_result(self): # pylint: disable=arguments-differ # TODO At least look at the conditionals log.debug("Ignoring STC instruction at %#x.", self.addr) class Instruction_STC_THUMB(ARMInstruction): name = "STC" bin_format = "111c110PUNW0nnnnddddppppOOOOOOOO" def compute_result(self): # pylint: disable=arguments-differ # TODO At least look at the conditionals log.debug("Ignoring STC instruction at %#x.", self.addr) class Instruction_LDC(ARMInstruction): name = "LDC" bin_format = "cccc110PUNW1nnnnddddppppOOOOOOOO" def compute_result(self): # pylint: disable=arguments-differ # TODO At least look at the conditionals # TODO Clobber the dest reg of LDC # TODO Maybe clobber the dst reg of CDP, if we're really adventurous log.debug("Ignoring LDC instruction at %#x.", self.addr) class Instruction_LDC_THUMB(ARMInstruction): name = "LDC" bin_format = "111c110PUNW1nnnnddddppppOOOOOOOO" def compute_result(self): # pylint: disable=arguments-differ # TODO At least look at the conditionals # TODO Clobber the dest reg of LDC # TODO Maybe clobber the dst reg of CDP, if we're really adventurous log.debug("Ignoring LDC instruction at %#x.", self.addr) class Instruction_CDP(Instruction): name = "CDP" bin_format = "cccc1110oooonnnnddddppppPPP0mmmm" # c = cond # d = CPd # O = Offset # p = CP# def compute_result(self): # pylint: disable=arguments-differ # TODO At least look at the conditionals # TODO Maybe clobber the dst reg of CDP, if we're really adventurous log.debug("Ignoring CDP instruction at %#x.", self.addr) ## ## Thumb! (ugh) ## class ThumbInstruction(Instruction): # pylint: disable=abstract-method def mark_instruction_start(self): self.irsb_c.imark(self.addr - 1, self.bytewidth, 1) class Instruction_tCPSID(ThumbInstruction): name = "CPSID" bin_format = "101101x0011x0010" def compute_result(self): # pylint: disable=arguments-differ # TODO haha lol yeah right log.debug("[thumb] Ignoring CPS instruction at %#x.", self.addr) class Instruction_tMSR(ThumbInstruction): name = "tMSR" bin_format = "10x0mmmmxxxxxxxx11110011100Rrrrr" def compute_result(self): # pylint: disable=arguments-differ dest_spec_reg = int(self.data["x"], 2) src_reg = f"r{int(self.data['r'], 2)}" # If 0, do not write the SPSR if self.data["R"] == "0": if dest_spec_reg == 8: # msp src = self.get(src_reg, Type.int_32) self.put(src, "sp") elif dest_spec_reg == 16: # primask src = self.get(src_reg, Type.int_32) self.put(src, "primask") else: log.debug( "[thumb] FIXME: tMSR at %#x is writing into an unsupported special register %#x. " "Ignoring the instruction.", self.addr, dest_spec_reg, ) else: log.debug("[thumb] tMSR at %#x is writing SPSR. Ignoring the instruction. FixMe.", self.addr) log.debug( "[thumb] Spotting an tMSR instruction at %#x. This is not fully tested. Prepare for errors.", self.addr ) class Instruction_tMRS(ThumbInstruction): name = "tMRS" bin_format = "10x0mmmmxxxxxxxx11110011111Rrrrr" def compute_result(self): # pylint: disable=arguments-differ spec_reg = int(self.data["x"], 2) dest_reg = f"r{int(self.data['m'], 2)}" # Reading from CPSR if self.data["R"] == "0": # See special registers constants here: # https://github.com/aquynh/capstone/blob/45bec1a691e455b864f7e4d394711a467e5493dc/arch/ARM/ARMInstPrinter.c#L1654 if spec_reg == 8: # We move the SP and call it a day. src = self.get("sp", Type.int_32) self.put(src, dest_reg) elif spec_reg == 16: src = self.get("primask", Type.int_32) self.put(src, dest_reg) else: log.debug( "[thumb] FIXME: tMRS at %#x is using the unsupported special register %#x. " "Ignoring the instruction.", self.addr, spec_reg, ) else: log.debug("[thumb] tMRS at %#x is reading from SPSR. Ignoring the instruction. FixMe.", self.addr) log.debug("[thumb] Ignoring tMRS instruction at %#x.", self.addr) log.debug( "[thumb] Spotting an tMRS instruction at %#x. This is not fully tested. Prepare for errors.", self.addr ) class Instruction_tDMB(ThumbInstruction): name = "DMB" bin_format = "100011110101xxxx1111001110111111" def compute_result(self): # pylint: disable=arguments-differ # TODO haha lol yeah right log.debug("[thumb] Ignoring DMB instruction at %#x.", self.addr) class Instruction_WFI(ThumbInstruction): name = "WFI" bin_format = "10111111001a0000" # 1011111100110000 def compute_result(self): # pylint: disable=arguments-differ log.debug("[thumb] Ignoring WFI instruction at %#x.", self.addr) class ARMSpotter(GymratLifter): arm_instrs = [ Instruction_MRC, Instruction_MCR, Instruction_MSR, Instruction_MRS, Instruction_STM, Instruction_LDM, Instruction_STC, Instruction_LDC, Instruction_CDP, ] thumb_instrs = [ Instruction_tCPSID, Instruction_tMSR, Instruction_tMRS, Instruction_WFI, Instruction_tDMB, Instruction_STC_THUMB, Instruction_LDC_THUMB, ] def __init__(self, arch: Arch, addr: int): super().__init__(arch, addr) self.thumb: bool = False def _lift(self): if self.irsb.addr & 1: # Thumb! self.instrs = self.thumb_instrs self.thumb = True else: self.instrs = self.arm_instrs self.thumb = False super()._lift() ================================================ FILE: pyvex/lifting/gym/x86_spotter.py ================================================ import logging from pyvex.lifting.util import GymratLifter, Instruction, JumpKind, Type log = logging.getLogger(__name__) # pylint: disable=missing-class-docstring class Instruction_SWAPGS(Instruction): name = "SWAPGS" bin_format = "000011110000000111111000" # 0f 01 f8 def compute_result(self, *args): pass # TODO check for priv mode class Instruction_SYSRET(Instruction): name = "SYSRET" bin_format = "010010000000111100000111" # 48 04 07 def compute_result(self, *args): result = self.dirty(Type.int_64, "%sg_dirtyhelper_SYSRET" % self.arch.name.lower(), ()) self.jump(None, result, JumpKind.Ret) class Instruction_IRETQ(Instruction): name = "IRETQ" bin_format = "0100100011001111" # 48 cf def compute_result(self, *args): result = self.dirty(Type.int_64, "%sg_dirtyhelper_IRETQ" % self.arch.name.lower(), ()) self.jump(None, result, JumpKind.Ret) class Instruction_RDMSR(Instruction): name = "RDMSR" bin_format = "0000111100110010" # 0f 32 def compute_result(self, *args): ecx = self.get("ecx", Type.int_32) result = self.dirty(Type.int_64, "%sg_dirtyhelper_RDMSR" % self.arch.name.lower(), (ecx,)) edx = result.narrow_high(Type.int_32) eax = result.narrow_low(Type.int_32) if self.arch.bits == 32: self.put(eax, "eax") self.put(edx, "edx") else: self.put(eax.widen_unsigned(Type.int_64), "rax") self.put(edx.widen_unsigned(Type.int_64), "rdx") class Instruction_XGETBV(Instruction): name = "XGETBV" bin_format = "000011110000000111010000" # 0f 01 d0 def compute_result(self, *args): ecx = self.get("ecx", Type.int_32) result = self.dirty(Type.int_64, "%sg_dirtyhelper_XGETBV" % self.arch.name.lower(), (ecx,)) edx = result.narrow_high(Type.int_32) eax = result.narrow_low(Type.int_32) if self.arch.bits == 32: self.put(eax, "eax") self.put(edx, "edx") else: self.put(eax.widen_unsigned(Type.int_64), "rax") self.put(edx.widen_unsigned(Type.int_64), "rdx") class Instruction_AAM(Instruction): name = "AAM" bin_format = "11010100iiiiiiii" # From https://www.felixcloutier.com/x86/aam def compute_result(self): # pylint: disable=arguments-differ base = self.constant(int(self.data["i"], 2), Type.int_8) temp_al = self.get("al", Type.int_8) temp_ah = temp_al // base temp_al = temp_al % base self.put(temp_ah, "ah") self.put(temp_al, "al") log.debug( "The generalized AAM instruction is not supported by VEX, and is handled specially by pyvex." " It has no flag handling at present. See pyvex/lifting/gym/x86_spotter.py for details" ) # TODO: Flags class Instruction_AAD(Instruction): name = "AAD" bin_format = "11010101iiiiiiii" # From https://www.felixcloutier.com/x86/aad def compute_result(self): # pylint: disable=arguments-differ base = self.constant(int(self.data["i"], 2), Type.int_8) temp_al = self.get("al", Type.int_8) temp_ah = self.get("ah", Type.int_8) temp_al = (temp_al + (temp_ah * base)) & 0xFF temp_ah = self.constant(0, Type.int_8) self.put(temp_ah, "ah") self.put(temp_al, "al") log.debug( "The generalized AAD instruction is not supported by VEX, and is handled specially by pyvex." " It has no flag handling at present. See pyvex/lifting/gym/x86_spotter.py for details" ) # TODO: Flags class AMD64Spotter(GymratLifter): instrs = [ Instruction_RDMSR, Instruction_XGETBV, Instruction_AAD, Instruction_AAM, Instruction_SWAPGS, Instruction_IRETQ, Instruction_SYSRET, ] class X86Spotter(GymratLifter): instrs = [ Instruction_RDMSR, Instruction_XGETBV, Instruction_AAD, Instruction_AAM, ] ================================================ FILE: pyvex/lifting/libvex.py ================================================ import logging import threading from typing import TYPE_CHECKING from pyvex.errors import LiftingException from pyvex.native import ffi, pvc from pyvex.types import CLiftSource, LibvexArch from .lift_function import Lifter log = logging.getLogger("pyvex.lifting.libvex") _libvex_lock = threading.Lock() LIBVEX_SUPPORTED_ARCHES = { "X86", "AMD64", "MIPS32", "MIPS64", "ARM", "ARMEL", "ARMHF", "ARMCortexM", "AARCH64", "PPC32", "PPC64", "S390X", "RISCV64", } VEX_MAX_INSTRUCTIONS = 99 VEX_MAX_BYTES = 5000 class VexRegisterUpdates: VexRegUpd_INVALID = 0x700 VexRegUpdSpAtMemAccess = 0x701 VexRegUpdUnwindregsAtMemAccess = 0x702 VexRegUpdAllregsAtMemAccess = 0x703 VexRegUpdAllregsAtEachInsn = 0x704 VexRegUpdLdAllregsAtEachInsn = 0x705 class LibVEXLifter(Lifter): __slots__ = () REQUIRE_DATA_C = True @staticmethod def get_vex_log(): return bytes(ffi.buffer(pvc.msg_buffer, pvc.msg_current_size)).decode() if pvc.msg_buffer != ffi.NULL else None def _lift(self): if TYPE_CHECKING: assert isinstance(self.irsb.arch, LibvexArch) assert isinstance(self.data, CLiftSource) try: _libvex_lock.acquire() pvc.log_level = log.getEffectiveLevel() vex_arch = getattr(pvc, self.irsb.arch.vex_arch, None) assert vex_arch is not None if self.bytes_offset is None: self.bytes_offset = 0 if self.max_bytes is None or self.max_bytes > VEX_MAX_BYTES: max_bytes = VEX_MAX_BYTES else: max_bytes = self.max_bytes if self.max_inst is None or self.max_inst > VEX_MAX_INSTRUCTIONS: max_inst = VEX_MAX_INSTRUCTIONS else: max_inst = self.max_inst strict_block_end = self.strict_block_end if strict_block_end is None: strict_block_end = True if self.cross_insn_opt: px_control = VexRegisterUpdates.VexRegUpdUnwindregsAtMemAccess else: px_control = VexRegisterUpdates.VexRegUpdLdAllregsAtEachInsn self.irsb.arch.vex_archinfo["hwcache_info"]["caches"] = ffi.NULL lift_r = pvc.vex_lift( vex_arch, self.irsb.arch.vex_archinfo, self.data + self.bytes_offset, self.irsb.addr, max_inst, max_bytes, self.opt_level, self.traceflags, self.allow_arch_optimizations, strict_block_end, 1 if self.collect_data_refs else 0, 1 if self.load_from_ro_regions else 0, 1 if self.const_prop else 0, px_control, self.bytes_offset, ) log_str = self.get_vex_log() if lift_r == ffi.NULL: raise LiftingException("libvex: unknown error" if log_str is None else log_str) else: if log_str is not None: log.debug(log_str) self.irsb._from_c(lift_r, skip_stmts=self.skip_stmts) if self.irsb.size == 0: log.debug("raising lifting exception") raise LiftingException("libvex: could not decode any instructions @ 0x%x" % self.addr) finally: _libvex_lock.release() self.irsb.arch.vex_archinfo["hwcache_info"]["caches"] = None ================================================ FILE: pyvex/lifting/lift_function.py ================================================ import logging from collections import defaultdict from typing import DefaultDict from pyvex import const from pyvex.block import IRSB from pyvex.const import vex_int_class from pyvex.errors import LiftingException, NeedStatementsNotification, PyVEXError, SkipStatementsError from pyvex.expr import Const from pyvex.native import ffi from pyvex.types import LiftSource, PyLiftSource from .lifter import Lifter from .post_processor import Postprocessor log = logging.getLogger(__name__) lifters: DefaultDict[str, list[type[Lifter]]] = defaultdict(list) postprocessors: DefaultDict[str, list[type[Postprocessor]]] = defaultdict(list) def lift( data: LiftSource, addr, arch, max_bytes=None, max_inst=None, bytes_offset=0, opt_level=1, traceflags=0, strict_block_end=True, inner=False, skip_stmts=False, collect_data_refs=False, cross_insn_opt=True, load_from_ro_regions=False, const_prop=False, ): """ Recursively lifts blocks using the registered lifters and postprocessors. Tries each lifter in the order in which they are registered on the data to lift. If a lifter raises a LiftingException on the data, it is skipped. If it succeeds and returns a block with a jumpkind of Ijk_NoDecode, all of the lifters are tried on the rest of the data and if they work, their output is appended to the first block. :param arch: The arch to lift the data as. :param addr: The starting address of the block. Effects the IMarks. :param data: The bytes to lift as either a python string of bytes or a cffi buffer object. :param max_bytes: The maximum number of bytes to lift. If set to None, no byte limit is used. :param max_inst: The maximum number of instructions to lift. If set to None, no instruction limit is used. :param bytes_offset: The offset into `data` to start lifting at. :param opt_level: The level of optimization to apply to the IR, -1 through 2. -1 is the strictest unoptimized level, 0 is unoptimized but will perform some lookahead/lookbehind optimizations, 1 performs constant propogation, and 2 performs loop unrolling, which honestly doesn't make much sense in the context of pyvex. The default is 1. :param traceflags: The libVEX traceflags, controlling VEX debug prints. .. note:: Explicitly specifying the number of instructions to lift (`max_inst`) may not always work exactly as expected. For example, on MIPS, it is meaningless to lift a branch or jump instruction without its delay slot. VEX attempts to Do The Right Thing by possibly decoding fewer instructions than requested. Specifically, this means that lifting a branch or jump on MIPS as a single instruction (`max_inst=1`) will result in an empty IRSB, and subsequent attempts to run this block will raise `SimIRSBError('Empty IRSB passed to SimIRSB.')`. .. note:: If no instruction and byte limit is used, pyvex will continue lifting the block until the block ends properly or until it runs out of data to lift. """ if max_bytes is not None and max_bytes <= 0: raise PyVEXError("Cannot lift block with no data (max_bytes <= 0)") if not data: raise PyVEXError("Cannot lift block with no data (data is empty)") if isinstance(data, str): raise TypeError("Cannot pass unicode string as data to lifter") py_data: PyLiftSource | None if isinstance(data, (bytes, bytearray, memoryview)): py_data = data c_data = None else: if max_bytes is None: raise PyVEXError("Cannot lift block with ffi pointer and no size (max_bytes is None)") c_data = data py_data = None allow_arch_optimizations = True # In order to attempt to preserve the property that # VEX lifts the same bytes to the same IR at all times when optimizations are disabled # we hack off all of VEX's non-IROpt optimizations when opt_level == -1. # This is intended to enable comparisons of the lifted IR between code that happens to be # found in different contexts. if opt_level < 0: allow_arch_optimizations = False opt_level = 0 for lifter in lifters[arch.name]: try: u_data: LiftSource = data if lifter.REQUIRE_DATA_C: if c_data is None: assert py_data is not None if isinstance(py_data, (bytearray, memoryview)): u_data = ffi.from_buffer(ffi.BVoidP, py_data) else: u_data = ffi.from_buffer(ffi.BVoidP, py_data + b"\0" * 8) max_bytes = min(len(py_data), max_bytes) if max_bytes is not None else len(py_data) else: u_data = c_data skip = 0 elif lifter.REQUIRE_DATA_PY: if bytes_offset and arch.name.startswith("ARM") and (addr & 1) == 1: skip = bytes_offset - 1 else: skip = bytes_offset if py_data is None: assert c_data is not None if max_bytes is None: log.debug("Cannot create py_data from c_data when no max length is given") continue u_data = ffi.buffer(c_data + skip, max_bytes)[:] else: if max_bytes is None: u_data = py_data[skip:] else: u_data = py_data[skip : skip + max_bytes] else: raise RuntimeError( "Incorrect lifter configuration. What type of data does %s expect?" % lifter.__class__ ) try: final_irsb = lifter(arch, addr).lift( u_data, bytes_offset - skip, max_bytes, max_inst, opt_level, traceflags, allow_arch_optimizations, strict_block_end, skip_stmts, collect_data_refs=collect_data_refs, cross_insn_opt=cross_insn_opt, load_from_ro_regions=load_from_ro_regions, const_prop=const_prop, ) except SkipStatementsError: assert skip_stmts is True final_irsb = lifter(arch, addr).lift( u_data, bytes_offset - skip, max_bytes, max_inst, opt_level, traceflags, allow_arch_optimizations, strict_block_end, skip_stmts=False, collect_data_refs=collect_data_refs, cross_insn_opt=cross_insn_opt, load_from_ro_regions=load_from_ro_regions, const_prop=const_prop, ) break except LiftingException as ex: log.debug("Lifting Exception: %s", str(ex)) continue else: final_irsb = IRSB.empty_block( arch, addr, size=0, nxt=Const(const.vex_int_class(arch.bits)(addr)), jumpkind="Ijk_NoDecode", ) final_irsb.invalidate_direct_next() return final_irsb if final_irsb.size > 0 and final_irsb.jumpkind == "Ijk_NoDecode": # We have decoded a few bytes before we hit an undecodeable instruction. # Determine if this is an intentional NoDecode, like the ud2 instruction on AMD64 nodecode_addr_expr = final_irsb.next if type(nodecode_addr_expr) is Const: nodecode_addr = nodecode_addr_expr.con.value next_irsb_start_addr = addr + final_irsb.size if nodecode_addr != next_irsb_start_addr: # The last instruction of the IRSB has a non-zero length. This is an intentional NoDecode. # The very last instruction has been decoded final_irsb.jumpkind = "Ijk_NoDecode" final_irsb.next = final_irsb.next final_irsb.invalidate_direct_next() return final_irsb # Decode more bytes if skip_stmts: # When gymrat will be invoked, we will merge future basic blocks to the current basic block. In this case, # statements are usually required. # TODO: In the future, we may further optimize it to handle cases where getting statements in gymrat is not # TODO: required. return lift( data, addr, arch, max_bytes=max_bytes, max_inst=max_inst, bytes_offset=bytes_offset, opt_level=opt_level, traceflags=traceflags, strict_block_end=strict_block_end, skip_stmts=False, collect_data_refs=collect_data_refs, load_from_ro_regions=load_from_ro_regions, const_prop=const_prop, ) next_addr = addr + final_irsb.size if max_bytes is not None: max_bytes -= final_irsb.size if isinstance(data, (bytes, bytearray, memoryview)): data_left = data[final_irsb.size :] else: data_left = data + final_irsb.size if max_inst is not None: max_inst -= final_irsb.instructions if (max_bytes is None or max_bytes > 0) and (max_inst is None or max_inst > 0) and data_left: more_irsb = lift( data_left, next_addr, arch, max_bytes=max_bytes, max_inst=max_inst, bytes_offset=bytes_offset, opt_level=opt_level, traceflags=traceflags, strict_block_end=strict_block_end, inner=True, skip_stmts=False, collect_data_refs=collect_data_refs, load_from_ro_regions=load_from_ro_regions, const_prop=const_prop, ) if more_irsb.size: # Successfully decoded more bytes final_irsb.extend(more_irsb) elif max_bytes == 0: # We have no more bytes left. Mark the jumpkind of the IRSB as Ijk_Boring if final_irsb.size > 0 and final_irsb.jumpkind == "Ijk_NoDecode": final_irsb.jumpkind = "Ijk_Boring" final_irsb.next = Const(vex_int_class(arch.bits)(final_irsb.addr + final_irsb.size)) if not inner: for postprocessor in postprocessors[arch.name]: try: postprocessor(final_irsb).postprocess() except NeedStatementsNotification as e: # The post-processor cannot work without statements. Re-lift the current block with skip_stmts=False if not skip_stmts: # sanity check # Why does the post-processor raise NeedStatementsNotification when skip_stmts is False? raise TypeError( "Bad post-processor %s: " "NeedStatementsNotification is raised when statements are available." % postprocessor.__class__ ) from e # Re-lift the current IRSB return lift( data, addr, arch, max_bytes=max_bytes, max_inst=max_inst, bytes_offset=bytes_offset, opt_level=opt_level, traceflags=traceflags, strict_block_end=strict_block_end, inner=inner, skip_stmts=False, collect_data_refs=collect_data_refs, load_from_ro_regions=load_from_ro_regions, const_prop=const_prop, ) except LiftingException: continue return final_irsb def register(lifter, arch_name): """ Registers a Lifter or Postprocessor to be used by pyvex. Lifters are are given priority based on the order in which they are registered. Postprocessors will be run in registration order. :param lifter: The Lifter or Postprocessor to register :vartype lifter: :class:`Lifter` or :class:`Postprocessor` """ if issubclass(lifter, Lifter): log.debug("Registering lifter %s for architecture %s.", lifter.__name__, arch_name) lifters[arch_name].append(lifter) if issubclass(lifter, Postprocessor): log.debug("Registering postprocessor %s for architecture %s.", lifter.__name__, arch_name) postprocessors[arch_name].append(lifter) ================================================ FILE: pyvex/lifting/lifter.py ================================================ from pyvex.block import IRSB from pyvex.types import Arch, LiftSource # pylint:disable=attribute-defined-outside-init class Lifter: __slots__ = ( "data", "bytes_offset", "opt_level", "traceflags", "allow_arch_optimizations", "strict_block_end", "collect_data_refs", "max_inst", "max_bytes", "skip_stmts", "irsb", "arch", "addr", "cross_insn_opt", "load_from_ro_regions", "const_prop", "disasm", "dump_irsb", ) """ A lifter is a class of methods for processing a block. :ivar data: The bytes to lift as either a python string of bytes or a cffi buffer object. :ivar bytes_offset: The offset into `data` to start lifting at. :ivar max_bytes: The maximum number of bytes to lift. If set to None, no byte limit is used. :ivar max_inst: The maximum number of instructions to lift. If set to None, no instruction limit is used. :ivar opt_level: The level of optimization to apply to the IR, 0-2. Most likely will be ignored in any lifter other then LibVEX. :ivar traceflags: The libVEX traceflags, controlling VEX debug prints. Most likely will be ignored in any lifter other than LibVEX. :ivar allow_arch_optimizations: Should the LibVEX lifter be allowed to perform lift-time preprocessing optimizations (e.g., lookback ITSTATE optimization on THUMB) Most likely will be ignored in any lifter other than LibVEX. :ivar strict_block_end: Should the LibVEX arm-thumb split block at some instructions, for example CB{N}Z. :ivar skip_stmts: Should LibVEX ignore statements. """ REQUIRE_DATA_C = False REQUIRE_DATA_PY = False def __init__(self, arch: Arch, addr: int): self.arch: Arch = arch self.addr: int = addr def lift( self, data: LiftSource, bytes_offset: int | None = None, max_bytes: int | None = None, max_inst: int | None = None, opt_level: int | float = 1, traceflags: int | None = None, allow_arch_optimizations: bool | None = None, strict_block_end: bool | None = None, skip_stmts: bool = False, collect_data_refs: bool = False, cross_insn_opt: bool = True, load_from_ro_regions: bool = False, const_prop: bool = False, disasm: bool = False, dump_irsb: bool = False, ): """ Wrapper around the `_lift` method on Lifters. Should not be overridden in child classes. :param data: The bytes to lift as either a python string of bytes or a cffi buffer object. :param bytes_offset: The offset into `data` to start lifting at. :param max_bytes: The maximum number of bytes to lift. If set to None, no byte limit is used. :param max_inst: The maximum number of instructions to lift. If set to None, no instruction limit is used. :param opt_level: The level of optimization to apply to the IR, 0-2. Most likely will be ignored in any lifter other then LibVEX. :param traceflags: The libVEX traceflags, controlling VEX debug prints. Most likely will be ignored in any lifter other than LibVEX. :param allow_arch_optimizations: Should the LibVEX lifter be allowed to perform lift-time preprocessing optimizations (e.g., lookback ITSTATE optimization on THUMB) Most likely will be ignored in any lifter other than LibVEX. :param strict_block_end: Should the LibVEX arm-thumb split block at some instructions, for example CB{N}Z. :param skip_stmts: Should the lifter skip transferring IRStmts from C to Python. :param collect_data_refs: Should the LibVEX lifter collect data references in C. :param cross_insn_opt: If cross-instruction-boundary optimizations are allowed or not. :param disasm: Should the GymratLifter generate disassembly during lifting. :param dump_irsb: Should the GymratLifter log the lifted IRSB. """ irsb: IRSB = IRSB.empty_block(self.arch, self.addr) self.data = data self.bytes_offset = bytes_offset self.opt_level = opt_level self.traceflags = traceflags self.allow_arch_optimizations = allow_arch_optimizations self.strict_block_end = strict_block_end self.collect_data_refs = collect_data_refs self.max_inst = max_inst self.max_bytes = max_bytes self.skip_stmts = skip_stmts self.irsb = irsb self.cross_insn_opt = cross_insn_opt self.load_from_ro_regions = load_from_ro_regions self.const_prop = const_prop self.disasm = disasm self.dump_irsb = dump_irsb self._lift() return self.irsb def _lift(self): """ Lifts the data using the information passed into _lift. Should be overridden in child classes. Should set the lifted IRSB to self.irsb. If a lifter raises a LiftingException on the data, this signals that the lifter cannot lift this data and arch and the lifter is skipped. If a lifter can lift any amount of data, it should lift it and return the lifted block with a jumpkind of Ijk_NoDecode, signalling to pyvex that other lifters should be used on the undecodable data. """ raise NotImplementedError() ================================================ FILE: pyvex/lifting/post_processor.py ================================================ # # The post-processor base class # class Postprocessor: def __init__(self, irsb): self.irsb = irsb def postprocess(self): """ Modify the irsb All of the postprocessors will be used in the order that they are registered """ pass ================================================ FILE: pyvex/lifting/util/__init__.py ================================================ from .instr_helper import Instruction from .lifter_helper import GymratLifter, ParseError from .syntax_wrapper import VexValue from .vex_helper import JumpKind, Type __all__ = [ "Type", "JumpKind", "VexValue", "ParseError", "Instruction", "GymratLifter", "ParseError", ] ================================================ FILE: pyvex/lifting/util/instr_helper.py ================================================ import abc import string import bitstring from pyvex.expr import IRExpr, RdTmp from .lifter_helper import ParseError from .syntax_wrapper import VexValue from .vex_helper import IRSBCustomizer, JumpKind, vex_int_class class Instruction(metaclass=abc.ABCMeta): """ Base class for an Instruction. You should make a subclass of this for each instruction you want to lift. These classes will contain the "semantics" of the instruction, that is, what it _does_, in terms of the VEX IR. You may want to subclass this for your architecture, and add arch-specific handling for parsing, argument resolution, etc., and have instructions subclass that instead. The core parsing functionality is done via ``bin_format``. Each instruction should be a subclass of ``Instruction`` and will be parsed by comparing bits in the provided bitstream to symbols in the ``bin_format`` member of the class. "Bin formats" are strings of symbols, like those you'd find in an ISA document, such as "0010rrrrddddffmm" 0 or 1 specify hard-coded bits that must match for an instruction to match. Any letters specify arguments, grouped by letter, which will be parsed and provided as bitstrings in the ``data`` member of the class as a dictionary. So, in our example, the bits ``0010110101101001``, applied to format string ``0010rrrrddddffmm`` will result in the following in ``self.data``: {'r': '1101', 'd': '0110', 'f': '10', 'm': '01'} Implement compute_result to provide the "meat" of what your instruction does. You can also implement it in your arch-specific subclass of ``Instruction``, to handle things common to all instructions, and provide instruction implementations elsewhere. We provide the ``VexValue`` syntax wrapper to make expressing instruction semantics easy. You first convert the bitstring arguments into ``VexValue``s using the provided convenience methods (``self.get/put/load/store/etc.``) This loads the register from the actual registers into a temporary value we can work with. You can then write it back to a register when you're done. For example, if you have the register in ``r``, as above, you can make a ``VexValue`` like this: r = int(self.data['r'], 2) # we get bits corresponding to `r` bits and convert it to an int r_vv = self.get(r, Type.int_32) If you then had an instruction to increment ``r``, you could simply: return r_vv += 1 You could then write it back to the register like this: self.put(r_vv, r) Note that most architectures have special flags that get set differently for each instruction, make sure to implement those as well (override ``set_flags()`` ) Override ``parse()`` to extend parsing. For example, in MSP430, this allows us to grab extra words from the bitstream when extra immediate words are present. All architectures are different enough that there's no magic recipe for how to write a lifter. See the examples provided by gymrat for ideas of how to use this to build your own lifters quickly and easily. """ data: dict[str, str] irsb_c: IRSBCustomizer def __init__(self, bitstrm, arch, addr): """ Create an instance of the instruction :param irsb_c: The IRSBCustomizer to put VEX instructions into :param bitstrm: The bitstream to decode instructions from :param addr: The address of the instruction to be lifted, used only for jumps and branches """ self.addr = addr self.arch = arch self.bitwidth = len(self.bin_format) self.data = self.parse(bitstrm) @property @abc.abstractmethod def bin_format(self) -> str: """ Read the documentation of the class to understand what a bin format string is :return: str bin format string """ @property @abc.abstractmethod def name(self) -> str: """ Name of the instruction Can be useful to name the instruction when there's an error related to it """ def __call__(self, irsb_c, past_instructions, future_instructions): self.lift(irsb_c, past_instructions, future_instructions) def mark_instruction_start(self): self.irsb_c.imark(self.addr, self.bytewidth, 0) def fetch_operands(self): # pylint: disable=no-self-use """ Get the operands out of memory or registers Return a tuple of operands for the instruction """ return () def lift(self, irsb_c: IRSBCustomizer, past_instructions, future_instructions): # pylint: disable=unused-argument """ This is the main body of the "lifting" for the instruction. This can/should be overridden to provide the general flow of how instructions in your arch work. For example, in MSP430, this is: - Figure out what your operands are by parsing the addressing, and load them into temporary registers - Do the actual operation, and commit the result, if needed. - Compute the flags """ self.irsb_c = irsb_c # Always call this first! self.mark_instruction_start() # Then do the actual stuff. inputs = self.fetch_operands() retval = self.compute_result(*inputs) # pylint: disable=assignment-from-none if retval is not None: self.commit_result(retval) vals = list(inputs) + [retval] self.compute_flags(*vals) def commit_result(self, res): """ This where the result of the operation is written to a destination. This happens only if compute_result does not return None, and happens before compute_flags is called. Override this to specify how to write out the result. The results of fetch_operands can be used to resolve various addressing modes for the write outward. A common pattern is to return a function from fetch_operands which will be called here to perform the write. :param args: A tuple of the results of fetch_operands and compute_result """ def compute_result(self, *args): # pylint: disable=unused-argument,no-self-use """ This is where the actual operation performed by your instruction, excluding the calculation of flags, should be performed. Return the VexValue of the "result" of the instruction, which may be used to calculate the flags later. For example, for a simple add, with arguments src and dst, you can simply write: return src + dst: :param args: :return: A VexValue containing the "result" of the operation. """ return None def compute_flags(self, *args): """ Most CPU architectures have "flags" that should be computed for many instructions. Override this to specify how that happens. One common pattern is to define this method to call specifi methods to update each flag, which can then be overriden in the actual classes for each instruction. """ def match_instruction(self, data, bitstrm): # pylint: disable=unused-argument,no-self-use """ Override this to extend the parsing functionality. This is great for if your arch has instruction "formats" that have an opcode that has to match. :param data: :param bitstrm: :return: data """ return data def parse(self, bitstrm): if self.arch.instruction_endness == "Iend_LE": # This arch stores its instructions in memory endian-flipped compared to the ISA. # To enable natural lifter-writing, we let the user write them like in the manual, and correct for # endness here. instr_bits = self._load_le_instr(bitstrm, self.bitwidth) else: instr_bits = bitstrm.peek("bin:%d" % self.bitwidth) data = {c: "" for c in self.bin_format if c in string.ascii_letters} for c, b in zip(self.bin_format, instr_bits): if c in "01": if b != c: raise ParseError("Mismatch between format bit %c and instruction bit %c" % (c, b)) elif c in string.ascii_letters: data[c] += b else: raise ValueError("Invalid bin_format character %c" % c) # Hook here for extra matching functionality if hasattr(self, "match_instruction"): # Should raise if it's not right self.match_instruction(data, bitstrm) # Use up the bits once we're sure it's right self.rawbits = bitstrm.read("hex:%d" % self.bitwidth) # Hook here for extra parsing functionality (e.g., trailers) if hasattr(self, "_extra_parsing"): data = self._extra_parsing(data, bitstrm) # pylint: disable=no-member return data @property def bytewidth(self): if self.bitwidth % self.arch.byte_width != 0: raise ValueError("Instruction is not a multiple of bytes wide!") return self.bitwidth // self.arch.byte_width def disassemble(self): """ Return the disassembly of this instruction, as a string. Override this in subclasses. :return: The address (self.addr), the instruction's name, and a list of its operands, as strings """ return self.addr, "UNK", [self.rawbits] # These methods should be called in subclasses to do register and memory operations def load(self, addr, ty): """ Load a value from memory into a VEX temporary register. :param addr: The VexValue containing the addr to load from. :param ty: The Type of the resulting data :return: a VexValue """ rdt = self.irsb_c.load(addr.rdt, ty) return VexValue(self.irsb_c, rdt) def constant(self, val, ty): """ Creates a constant as a VexValue :param val: The value, as an integer :param ty: The type of the resulting VexValue :return: a VexValue """ if isinstance(val, VexValue) and not isinstance(val, IRExpr): raise Exception("Constant cannot be made from VexValue or IRExpr") rdt = self.irsb_c.mkconst(val, ty) return VexValue(self.irsb_c, rdt) @staticmethod def _lookup_register(arch, reg): # TODO: This is a hack to make it work with archinfo where we use # register indicies instead of names if isinstance(reg, int): if hasattr(arch, "register_index"): reg = arch.register_index[reg] else: reg = arch.register_list[reg].name return arch.get_register_offset(reg) def get(self, reg, ty): """ Load a value from a machine register into a VEX temporary register. All values must be loaded out of registers before they can be used with operations, etc and stored back into them when the instruction is over. See Put(). :param reg: Register number as an integer, or register string name :param ty: The Type to use. :return: A VexValue of the gotten value. """ offset = self._lookup_register(self.irsb_c.irsb.arch, reg) if offset == self.irsb_c.irsb.arch.ip_offset: return self.constant(self.addr, ty) rdt = self.irsb_c.rdreg(offset, ty) return VexValue(self.irsb_c, rdt) def put(self, val, reg): """ Puts a value from a VEX temporary register into a machine register. This is how the results of operations done to registers get committed to the machine's state. :param val: The VexValue to store (Want to store a constant? See Constant() first) :param reg: The integer register number to store into, or register name :return: None """ offset = self._lookup_register(self.irsb_c.irsb.arch, reg) self.irsb_c.put(val.rdt, offset) def put_conditional(self, cond, valiftrue, valiffalse, reg): """ Like put, except it checks a condition to decide what to put in the destination register. :param cond: The VexValue representing the logical expression for the condition (if your expression only has constants, don't use this method!) :param valiftrue: the VexValue to put in reg if cond evals as true :param validfalse: the VexValue to put in reg if cond evals as false :param reg: The integer register number to store into, or register name :return: None """ val = self.irsb_c.ite(cond.rdt, valiftrue.rdt, valiffalse.rdt) offset = self._lookup_register(self.irsb_c.irsb.arch, reg) self.irsb_c.put(val, offset) def store(self, val, addr): """ Store a VexValue in memory at the specified loaction. :param val: The VexValue of the value to store :param addr: The VexValue of the address to store into :return: None """ self.irsb_c.store(addr.rdt, val.rdt) def jump(self, condition, to_addr, jumpkind=JumpKind.Boring, ip_offset=None): """ Jump to a specified destination, under the specified condition. Used for branches, jumps, calls, returns, etc. :param condition: The VexValue representing the expression for the guard, or None for an unconditional jump :param to_addr: The address to jump to. :param jumpkind: The JumpKind to use. See the VEX docs for what these are; you only need them for things aren't normal jumps (e.g., calls, interrupts, program exits, etc etc) :return: None """ to_addr_ty = None if isinstance(to_addr, VexValue): # Unpack a VV to_addr_rdt = to_addr.rdt to_addr_ty = to_addr.ty elif isinstance(to_addr, int): # Direct jump to an int, make an RdT and Ty to_addr_ty = vex_int_class(self.irsb_c.irsb.arch.bits).type to_addr = self.constant(to_addr, to_addr_ty) # TODO archinfo may be changing to_addr_rdt = to_addr.rdt elif isinstance(to_addr, RdTmp): # An RdT; just get the Ty of the arch's pointer type to_addr_ty = vex_int_class(self.irsb_c.irsb.arch.bits).type to_addr_rdt = to_addr else: raise TypeError("Jump destination has unknown type: " + repr(type(to_addr))) if not condition: # This is the default exit. self.irsb_c.irsb.jumpkind = jumpkind self.irsb_c.irsb.next = to_addr_rdt else: # add another exit # EDG says: We should make sure folks set ArchXYZ.ip_offset like they're supposed to if ip_offset is None: ip_offset = self.arch.ip_offset assert ip_offset is not None negated_condition_rdt = self.ite(condition, self.constant(0, condition.ty), self.constant(1, condition.ty)) direct_exit_target = self.constant(self.addr + (self.bitwidth // 8), to_addr_ty) self.irsb_c.add_exit(negated_condition_rdt, direct_exit_target.rdt, jumpkind, ip_offset) self.irsb_c.irsb.jumpkind = jumpkind self.irsb_c.irsb.next = to_addr_rdt def ite(self, cond, t, f): return self.irsb_c.ite(cond.rdt, t.rdt, f.rdt) def ccall(self, ret_type, func_name, args): """ Creates a CCall operation. A CCall is a procedure that calculates a value at *runtime*, not at lift-time. You can use these for flags, unresolvable jump targets, etc. We caution you to avoid using them when at all possible though. :param ret_type: The return type of the CCall :param func_obj: The name of the helper function to call. If you're using angr, this should be added (or monkeypatched) into ``angr.engines.vex.claripy.ccall``. :param args: List of arguments to the function :return: A VexValue of the result. """ # Check the args to make sure they're the right type list_args = list(args) new_args = [] for arg in list_args: if isinstance(arg, VexValue): arg = arg.rdt new_args.append(arg) args = tuple(new_args) cc = self.irsb_c.op_ccall(ret_type, func_name, args) return VexValue(self.irsb_c, cc) def dirty(self, ret_type, func_name, args) -> VexValue: """ Creates a dirty call operation. These are like ccalls (clean calls) but their implementations are theoretically allowed to read or write to or from any part of the state, making them a nightmare for static analysis to reason about. Avoid their use at all costs. :param ret_type: The return type of the dirty call, or None if the dirty call doesn't return anything. :param func_name: The name of the helper function to call. If you're using angr, this should be added (or monkeypatched) into ``angr.engines.vex.heavy.dirty``. :param args: List of arguments to the function :return: A VexValue of the result. """ # Check the args to make sure they're the right type list_args = list(args) new_args = [] for arg in list_args: if isinstance(arg, VexValue): arg = arg.rdt new_args.append(arg) args = tuple(new_args) rdt = self.irsb_c.dirty(ret_type, func_name, args) return VexValue(self.irsb_c, rdt) def _load_le_instr(self, bitstream: bitstring.ConstBitStream, numbits: int) -> str: return bitstring.Bits(uint=bitstream.peek("uintle:%d" % numbits), length=numbits).bin ================================================ FILE: pyvex/lifting/util/lifter_helper.py ================================================ import logging from typing import TYPE_CHECKING import bitstring from pyvex.const import vex_int_class from pyvex.errors import LiftingException from pyvex.lifting.lifter import Lifter from .vex_helper import IRSBCustomizer, JumpKind if TYPE_CHECKING: from .instr_helper import Instruction log = logging.getLogger(__name__) def is_empty(bitstrm): try: bitstrm.peek(1) return False except bitstring.ReadError: return True class ParseError(Exception): pass class GymratLifter(Lifter): """ This is a base class for lifters that use Gymrat. For most architectures, all you need to do is subclass this, and set the property "instructions" to be a list of classes that define each instruction. By default, a lifter will decode instructions by attempting to instantiate every class until one works. This will use an IRSBCustomizer, which will, if it succeeds, add the appropriate VEX instructions to a pyvex IRSB. pyvex, when lifting a block of code for this architecture, will call the method "lift", which will produce the IRSB of the lifted code. """ __slots__ = ( "bitstrm", "errors", "thedata", "disassembly", ) REQUIRE_DATA_PY = True instrs: list[type["Instruction"]] def __init__(self, arch, addr): super().__init__(arch, addr) self.bitstrm = None self.errors = None self.thedata = None self.disassembly = None def create_bitstrm(self): self.bitstrm = bitstring.ConstBitStream(bytes=self.thedata) def _decode_next_instruction(self, addr): # Try every instruction until one works for possible_instr in self.instrs: try: log.debug("Trying %s", possible_instr.name) return possible_instr(self.bitstrm, self.irsb.arch, addr) # a ParserError signals that this instruction did not match # we need to try other instructions, so we ignore this error except ParseError: pass # l.exception(repr(possible_instr)) # if we are out of input, ignore. # there may be other, shorter instructions that still match, # so we continue with the loop except (bitstring.ReadError, bitstring.InterpretError): pass # If no instruction matches, log an error errorstr = "Unknown instruction at bit position %d" % self.bitstrm.bitpos log.debug(errorstr) log.debug("Address: %#08x" % addr) def decode(self): try: self.create_bitstrm() count = 0 disas = [] addr = self.irsb.addr log.debug("Starting block at address: " + hex(addr)) bytepos = self.bitstrm.bytepos while not is_empty(self.bitstrm): instr = self._decode_next_instruction(addr) if not instr: break disas.append(instr) log.debug("Matched " + instr.name) addr += self.bitstrm.bytepos - bytepos bytepos = self.bitstrm.bytepos count += 1 return disas except Exception as e: self.errors = str(e) log.exception(f"Error decoding block at offset {bytepos:#x} (address {addr:#x}):") raise def _lift(self): self.thedata = ( self.data[: self.max_bytes] if isinstance(self.data, (bytes, bytearray, memoryview)) else self.data[: self.max_bytes].encode() ) log.debug(repr(self.thedata)) instructions = self.decode() if self.disasm: self.disassembly = [instr.disassemble() for instr in instructions] self.irsb.jumpkind = JumpKind.Invalid irsb_c = IRSBCustomizer(self.irsb) log.debug("Decoding complete.") for i, instr in enumerate(instructions[: self.max_inst]): log.debug("Lifting instruction %s", instr.name) instr(irsb_c, instructions[:i], instructions[i + 1 :]) if irsb_c.irsb.jumpkind != JumpKind.Invalid: break if (i + 1) == self.max_inst: # if we are on our last iteration instr.jump(None, irsb_c.irsb.addr + irsb_c.irsb.size) break else: if len(irsb_c.irsb.statements) == 0: raise LiftingException("Could not decode any instructions") irsb_c.irsb.jumpkind = JumpKind.NoDecode dst = irsb_c.irsb.addr + irsb_c.irsb.size dst_ty = vex_int_class(irsb_c.irsb.arch.bits).type irsb_c.irsb.next = irsb_c.mkconst(dst, dst_ty) log.debug(str(self.irsb)) if self.dump_irsb: self.irsb.pp() return self.irsb def pp_disas(self): disasstr = "" insts = self.disassemble() for addr, name, args in insts: args_str = ",".join(str(a) for a in args) disasstr += f"{addr:#08x}:\t{name} {args_str}\n" print(disasstr) def error(self): return self.errors def disassemble(self): if self.disassembly is None: self.lift(self.data, disasm=True) return self.disassembly ================================================ FILE: pyvex/lifting/util/syntax_wrapper.py ================================================ import functools from typing import Union from pyvex.const import get_type_size from pyvex.expr import Const, IRExpr, RdTmp from .vex_helper import IRSBCustomizer, Type def checkparams(rhstype=None): def decorator(fn): @functools.wraps(fn) def inner_decorator(self, *args, **kwargs): irsb_cs = {a.irsb_c for a in list(args) + [self] if isinstance(a, VexValue)} # pylint: disable=no-member assert len(irsb_cs) == 1, "All VexValues must belong to the same irsb_c" args = list(args) for idx, arg in enumerate(args): if isinstance(arg, int): thetype = rhstype if rhstype else self.ty args[idx] = VexValue.Constant(self.irsb_c, arg, thetype) elif not isinstance(arg, VexValue): raise Exception("Cannot convert param %s" % str(arg)) args = tuple(args) return fn(self, *args, **kwargs) return inner_decorator return decorator def vvifyresults(f): @functools.wraps(f) def decor(self, *args, **kwargs): returned = f(self, *args, **kwargs) assert isinstance(returned, RdTmp) or isinstance(returned, Const) return VexValue(self.irsb_c, returned) return decor class VexValue: def __init__(self, irsb_c: "IRSBCustomizer", rdt: "Union[RdTmp, Const]", signed=False): self.irsb_c = irsb_c self.ty = self.irsb_c.get_type(rdt) self.rdt = rdt self.width = get_type_size(self.ty) self._is_signed = signed @property def value(self): if isinstance(self.rdt, Const): return self.rdt.con.value else: raise ValueError("Non-constant VexValue has no value property") @property def signed(self): return VexValue(self.irsb_c, self.rdt, True) @vvifyresults def widen_unsigned(self, ty): return self.irsb_c.op_widen_int_unsigned(self.rdt, ty) @vvifyresults def cast_to(self, ty, signed=False, high=False): return self.irsb_c.cast_to(self.rdt, ty, signed=signed, high=high) @vvifyresults def widen_signed(self, ty): return self.irsb_c.op_widen_int_signed(self.rdt, ty) @vvifyresults def narrow_high(self, ty): return self.irsb_c.op_narrow_int(self.rdt, ty, high_half=True) @vvifyresults def narrow_low(self, ty): return self.irsb_c.op_narrow_int(self.rdt, ty, high_half=False) # TODO at some point extend this to Vex nonconstants def __getitem__(self, idx): def getb(i): return VexValue(self.irsb_c, self.irsb_c.get_bit(self.rdt, i)) def makeconstant(x): return VexValue.Constant(self.irsb_c, x, Type.int_8).rdt if not isinstance(idx, slice): actualindex = slice(idx).indices(self.width)[1] return getb(makeconstant(actualindex)) else: return [getb(makeconstant(i)) for i in range(*idx.indices(self.width))] def __setitem__(self, idx, bval): setted = self.set_bit(idx, bval) self.__init__(setted.irsb_c, setted.rdt) @checkparams(rhstype=Type.int_8) @vvifyresults def set_bit(self, idx, bval): return self.irsb_c.set_bit(self.rdt, idx.rdt, bval.rdt) @checkparams() @vvifyresults def set_bits(self, idxsandvals): return self.irsb_c.set_bits(self.rdt, [(i.cast_to(Type.int_8).rdt, b.rdt) for i, b in idxsandvals]) @checkparams() @vvifyresults def ite(self, iftrue, iffalse): onebitcond = self.cast_to(Type.int_1) return self.irsb_c.ite(onebitcond.rdt, iftrue.rdt, iffalse.rdt) @checkparams() @vvifyresults def sar(self, right): """ `v.sar(r)` should do arithmetic shift right of `v` by `r` :param right:VexValue value to shift by :return: VexValue - result of a shift """ return self.irsb_c.op_sar(self.rdt, right.rdt) @checkparams() @vvifyresults def __add__(self, right): return self.irsb_c.op_add(self.rdt, right.rdt) @checkparams() def __radd__(self, left): return self + left @checkparams() @vvifyresults def __sub__(self, right): return self.irsb_c.op_sub(self.rdt, right.rdt) @checkparams() def __rsub__(self, left): return left - self @checkparams() @vvifyresults def __div__(self, right): if self._is_signed: return self.irsb_c.op_sdiv(self.rdt, right.rdt) else: return self.irsb_c.op_udiv(self.rdt, right.rdt) @checkparams() def __rdiv__(self, left): return left // self @checkparams() def __floordiv__(self, right): # Note: nonprimitive return self.__div__(right) @checkparams() def __rfloordiv__(self, left): return left // self @checkparams() def __truediv__(self, right): # Note: nonprimitive return self / right @checkparams() def __rtruediv__(self, left): return left.__truediv__(self) @checkparams() @vvifyresults def __and__(self, right): return self.irsb_c.op_and(self.rdt, right.rdt) @checkparams() def __rand__(self, left): return left & self @checkparams() @vvifyresults def __eq__(self, right): return self.irsb_c.op_cmp_eq(self.rdt, right.rdt) @checkparams() @vvifyresults def __ne__(self, other): return self.irsb_c.op_cmp_ne(self.rdt, other.rdt) @checkparams() @vvifyresults def __invert__(self): return self.irsb_c.op_not(self.rdt) @checkparams() @vvifyresults def __le__(self, right): if self._is_signed: return self.irsb_c.op_cmp_sle(self.rdt, right.rdt) else: return self.irsb_c.op_cmp_ule(self.rdt, right.rdt) @checkparams() @vvifyresults def __gt__(self, other): if self._is_signed: return self.irsb_c.op_cmp_sgt(self.rdt, other.rdt) else: return self.irsb_c.op_cmp_ugt(self.rdt, other.rdt) @checkparams() @vvifyresults def __ge__(self, right): if self._is_signed: return self.irsb_c.op_cmp_sge(self.rdt, right.rdt) else: return self.irsb_c.op_cmp_uge(self.rdt, right.rdt) @checkparams(rhstype=Type.int_8) @vvifyresults def __lshift__(self, right): # TODO put better type inference in irsb_c so we can have rlshift """ logical shift left """ return self.irsb_c.op_shl(self.rdt, right.rdt) @checkparams() @vvifyresults def __lt__(self, right): if self._is_signed: return self.irsb_c.op_cmp_slt(self.rdt, right.rdt) else: return self.irsb_c.op_cmp_ult(self.rdt, right.rdt) @checkparams() @vvifyresults def __mod__(self, right): # Note: nonprimitive return self.irsb_c.op_mod(self.rdt, right.rdt) @checkparams() def __rmod__(self, left): return left % self @checkparams() @vvifyresults def __mul__(self, right): if self._is_signed: return self.irsb_c.op_smul(self.rdt, right.rdt) else: return self.irsb_c.op_umul(self.rdt, right.rdt) @checkparams() def __rmul__(self, left): return left * self @checkparams() @vvifyresults def __neg__(self): # Note: nonprimitive if not self._is_signed: raise Exception("Number is unsigned, cannot change sign!") else: return self.rdt * -1 @checkparams() @vvifyresults def __or__(self, right): return self.irsb_c.op_or(self.rdt, right.rdt) def __ror__(self, left): return self | left @checkparams() @vvifyresults def __pos__(self): return self @checkparams(rhstype=Type.int_8) @vvifyresults def __rshift__(self, right): """ logical shift right """ return self.irsb_c.op_shr(self.rdt, right.rdt) @checkparams() def __rlshift__(self, left): return left << self @checkparams() def __rrshift__(self, left): return left >> self @checkparams() @vvifyresults def __xor__(self, right): return self.irsb_c.op_xor(self.rdt, right.rdt) def __rxor__(self, left): return self ^ left @classmethod def Constant(cls, irsb_c, val, ty): """ Creates a constant as a VexValue :param irsb_c: The IRSBCustomizer to use :param val: The value, as an integer :param ty: The type of the resulting VexValue :return: a VexValue """ assert not (isinstance(val, VexValue) or isinstance(val, IRExpr)) rdt = irsb_c.mkconst(val, ty) return cls(irsb_c, rdt) ================================================ FILE: pyvex/lifting/util/vex_helper.py ================================================ import copy import re from pyvex.const import U1, get_type_size, ty_to_const_class, vex_int_class from pyvex.enums import IRCallee from pyvex.expr import ITE, Binop, CCall, Const, Get, Load, RdTmp, Unop from pyvex.stmt import Dirty, Exit, IMark, NoOp, Put, Store, WrTmp class JumpKind: Boring = "Ijk_Boring" Call = "Ijk_Call" Ret = "Ijk_Ret" Segfault = "Ijk_SigSEGV" Exit = "Ijk_Exit" Syscall = "Ijk_Sys_syscall" Sysenter = "Ijk_Sys_sysenter" Invalid = "Ijk_INVALID" NoDecode = "Ijk_NoDecode" class TypeMeta(type): typemeta_re = re.compile(r"int_(?P\d+)$") def __getattr__(self, name): match = self.typemeta_re.match(name) if match: width = int(match.group("size")) return vex_int_class(width).type else: return type.__getattr__(name) class Type(metaclass=TypeMeta): __metaclass__ = TypeMeta ieee_float_16 = "Ity_F16" ieee_float_32 = "Ity_F32" ieee_float_64 = "Ity_F64" ieee_float_128 = "Ity_F128" decimal_float_32 = "Ity_D32" decimal_float_64 = "Ity_D64" decimal_float_128 = "Ity_D128" simd_vector_128 = "Ity_V128" simd_vector_256 = "Ity_V256" def get_op_format_from_const_ty(ty): return ty_to_const_class(ty).op_format def make_format_op_generator(fmt_string): """ Return a function which generates an op format (just a string of the vex instruction) Functions by formatting the fmt_string with the types of the arguments """ def gen(arg_types): converted_arg_types = list(map(get_op_format_from_const_ty, arg_types)) op = fmt_string.format(arg_t=converted_arg_types) return op return gen def mkbinop(fstring): return lambda self, expr_a, expr_b: self.op_binary(make_format_op_generator(fstring))(expr_a, expr_b) def mkunop(fstring): return lambda self, expr_a: self.op_unary(make_format_op_generator(fstring))(expr_a) def mkcmpop(fstring_fragment, signedness=""): def cmpop(self, expr_a, expr_b): ty = self.get_type(expr_a) fstring = f"Iop_Cmp{fstring_fragment}{{arg_t[0]}}{signedness}" retval = mkbinop(fstring)(self, expr_a, expr_b) return self.cast_to(retval, ty) return cmpop class IRSBCustomizer: op_add = mkbinop("Iop_Add{arg_t[0]}") op_sub = mkbinop("Iop_Sub{arg_t[0]}") op_umul = mkbinop("Iop_Mul{arg_t[0]}") op_smul = mkbinop("Iop_MullS{arg_t[0]}") op_sdiv = mkbinop("Iop_DivS{arg_t[0]}") op_udiv = mkbinop("Iop_DivU{arg_t[0]}") # Custom operation that does not exist in libVEX op_mod = mkbinop("Iop_Mod{arg_t[0]}") op_or = mkbinop("Iop_Or{arg_t[0]}") op_and = mkbinop("Iop_And{arg_t[0]}") op_xor = mkbinop("Iop_Xor{arg_t[0]}") op_shr = mkbinop("Iop_Shr{arg_t[0]}") # Shift Right (logical) op_shl = mkbinop("Iop_Shl{arg_t[0]}") # Shift Left (logical) op_sar = mkbinop("Iop_Sar{arg_t[0]}") # Shift Arithmetic Right operation op_not = mkunop("Iop_Not{arg_t[0]}") op_cmp_eq = mkcmpop("EQ") op_cmp_ne = mkcmpop("NE") op_cmp_slt = mkcmpop("LT", "S") op_cmp_sle = mkcmpop("LE", "S") op_cmp_ult = mkcmpop("LT", "U") op_cmp_ule = mkcmpop("LE", "U") op_cmp_sge = mkcmpop("GE", "S") op_cmp_uge = mkcmpop("GE", "U") op_cmp_sgt = mkcmpop("GT", "S") op_cmp_ugt = mkcmpop("GT", "U") def __init__(self, irsb): self.arch = irsb.arch self.irsb = irsb def get_type(self, rdt): return rdt.result_type(self.irsb.tyenv) # Statements (no return value) def _append_stmt(self, stmt): self.irsb.statements += [stmt] def imark(self, int_addr, int_length, int_delta=0): self._append_stmt(IMark(int_addr, int_length, int_delta)) def get_reg(self, regname): # TODO move this into the lifter return self.arch.registers[regname][0] def put(self, expr_val, tuple_reg): self._append_stmt(Put(copy.copy(expr_val), tuple_reg)) def store(self, addr, expr): self._append_stmt(Store(copy.copy(addr), copy.copy(expr), self.arch.memory_endness)) def noop(self): self._append_stmt(NoOp()) def add_exit(self, guard, dst, jk, ip): """ Add an exit out of the middle of an IRSB. (e.g., a conditional jump) :param guard: An expression, the exit is taken if true :param dst: the destination of the exit (a Const) :param jk: the JumpKind of this exit (probably Ijk_Boring) :param ip: The address of this exit's source """ self.irsb.statements.append(Exit(guard, dst.con, jk, ip)) # end statements def goto(self, addr): self.irsb.next = addr self.irsb.jumpkind = JumpKind.Boring def ret(self, addr): self.irsb.next = addr self.irsb.jumpkind = JumpKind.Ret def call(self, addr): self.irsb.next = addr self.irsb.jumpkind = JumpKind.Call def _add_tmp(self, t): return self.irsb.tyenv.add(t) def _rdtmp(self, tmp): return RdTmp.get_instance(tmp) def _settmp(self, expr): ty = self.get_type(expr) tmp = self._add_tmp(ty) self._append_stmt(WrTmp(tmp, expr)) return self._rdtmp(tmp) def rdreg(self, reg, ty): return self._settmp(Get(reg, ty)) def load(self, addr, ty): return self._settmp(Load(self.arch.memory_endness, ty, copy.copy(addr))) def op_ccall(self, retty, funcstr, args): return self._settmp(CCall(retty, IRCallee(len(args), funcstr, 0xFFFF), args)) def dirty(self, retty, funcstr, args): if retty is None: tmp = 0xFFFFFFFF else: tmp = self._add_tmp(retty) self._append_stmt(Dirty(IRCallee(len(args), funcstr, 0xFFFF), Const(U1(1)), args, tmp, None, None, None, None)) return self._rdtmp(tmp) def ite(self, condrdt, iftruerdt, iffalserdt): return self._settmp(ITE(copy.copy(condrdt), copy.copy(iffalserdt), copy.copy(iftruerdt))) def mkconst(self, val, ty): cls = ty_to_const_class(ty) return Const(cls(val)) # Operations def op_generic(self, Operation, op_generator): def instance(*args): # Note: The args here are all RdTmps for arg in args: assert isinstance(arg, RdTmp) or isinstance(arg, Const) arg_types = [self.get_type(arg) for arg in args] # two operations should never share the same argument instances, copy them here to ensure that args = [copy.copy(a) for a in args] op = Operation(op_generator(arg_types), args) msg = "operation needs to be well typed: " + str(op) assert op.typecheck(self.irsb.tyenv), msg + "\ntypes: " + str(self.irsb.tyenv) return self._settmp(op) return instance def op_binary(self, op_format_str): return self.op_generic(Binop, op_format_str) def op_unary(self, op_format_str): return self.op_generic(Unop, op_format_str) def cast_to(self, rdt, tydest, signed=False, high=False): goalwidth = get_type_size(tydest) rdtwidth = self.get_rdt_width(rdt) if rdtwidth > goalwidth: return self.op_narrow_int(rdt, tydest, high_half=high) elif rdtwidth < goalwidth: return self.op_widen_int(rdt, tydest, signed=signed) else: return rdt def op_to_one_bit(self, rdt): rdtty = self.get_type(rdt) if rdtty not in [Type.int_64, Type.int_32]: rdt = self.op_widen_int_unsigned(rdt, Type.int_32) onebit = self.op_narrow_int(rdt, Type.int_1) return onebit def op_narrow_int(self, rdt, tydest, high_half=False): op_name = "{op}{high}to{dest}".format( op="Iop_{arg_t[0]}", high="HI" if high_half else "", dest=get_op_format_from_const_ty(tydest) ) return self.op_unary(make_format_op_generator(op_name))(rdt) def op_widen_int(self, rdt, tydest, signed=False): op_name = "{op}{sign}to{dest}".format( op="Iop_{arg_t[0]}", sign="S" if signed else "U", dest=get_op_format_from_const_ty(tydest) ) return self.op_unary(make_format_op_generator(op_name))(rdt) def op_widen_int_signed(self, rdt, tydest): return self.op_widen_int(rdt, tydest, signed=True) def op_widen_int_unsigned(self, rdt, tydest): return self.op_widen_int(rdt, tydest, signed=False) def get_msb(self, tmp, ty): width = get_type_size(ty) return self.get_bit(tmp, width - 1) def get_bit(self, rdt, idx): shifted = self.op_shr(rdt, idx) bit = self.op_extract_lsb(shifted) return bit def op_extract_lsb(self, rdt): bitmask = self.mkconst(1, self.get_type(rdt)) return self.op_and(bitmask, rdt) def set_bit(self, rdt, idx, bval): currbit = self.get_bit(rdt, idx) areequalextrabits = self.op_xor(bval, currbit) one = self.mkconst(1, self.get_type(areequalextrabits)) areequal = self.op_and(areequalextrabits, one) shifted = self.op_shl(areequal, idx) return self.op_xor(rdt, shifted) def set_bits(self, rdt, idxsandvals): ty = self.get_type(rdt) if all([isinstance(idx, Const) for idx, _ in idxsandvals]): relevantbits = self.mkconst(sum([1 << idx.con.value for idx, _ in idxsandvals]), ty) else: relevantbits = self.mkconst(0, ty) for idx, _ in idxsandvals: shifted = self.op_shl(self.mkconst(1, ty), idx) relevantbits = self.op_or(relevantbits, shifted) setto = self.mkconst(0, ty) for idx, bval in idxsandvals: bvalbit = self.op_extract_lsb(bval) shifted = self.op_shl(bvalbit, idx) setto = self.op_or(setto, shifted) shouldflip = self.op_and(self.op_xor(setto, rdt), relevantbits) return self.op_xor(rdt, shouldflip) def get_rdt_width(self, rdt): return rdt.result_size(self.irsb.tyenv) ================================================ FILE: pyvex/lifting/zerodivision.py ================================================ import copy from pyvex import const, expr, stmt from .post_processor import Postprocessor class ZeroDivisionPostProcessor(Postprocessor): """ A postprocessor for adding zero-division checks to VEX. For "div rcx", will turn: 00 | ------ IMark(0x8000, 3, 0) ------ 01 | t0 = GET:I64(rcx) 02 | t1 = GET:I64(rax) 03 | t2 = GET:I64(rdx) 04 | t3 = 64HLto128(t2,t1) 05 | t4 = DivModU128to64(t3,t0) 06 | t5 = 128to64(t4) 07 | PUT(rax) = t5 08 | t6 = 128HIto64(t4) 09 | PUT(rdx) = t6 NEXT: PUT(rip) = 0x0000000000008003; Ijk_Boring into: 00 | ------ IMark(0x8000, 3, 0) ------ 01 | t0 = GET:I64(rcx) 02 | t4 = GET:I64(rax) 03 | t5 = GET:I64(rdx) 04 | t3 = 64HLto128(t5,t4) 05 | t9 = CmpEQ(t0,0x0000000000000000) 06 | if (t9) { PUT(pc) = 0x8000; Ijk_SigFPE_IntDiv } 07 | t2 = DivModU128to64(t3,t0) 08 | t6 = 128to64(t2) 09 | PUT(rax) = t6 10 | t7 = 128HIto64(t2) 11 | PUT(rdx) = t7 NEXT: PUT(rip) = 0x0000000000008003; Ijk_Boring """ def postprocess(self): if self.irsb.statements is None: # This is an optimized IRSB. We cannot really post-process it. return insertions = [] last_ip = 0 for i, s in enumerate(self.irsb.statements): if s.tag == "Ist_IMark": last_ip = s.addr if s.tag == "Ist_WrTmp" and s.data.tag == "Iex_Binop" and ("Div" in s.data.op or "Mod" in s.data.op): arg_size = s.data.args[1].result_size(self.irsb.tyenv) cmp_args = [copy.copy(s.data.args[1]), expr.Const(const.vex_int_class(arg_size)(0))] cmp_tmp = self.irsb.tyenv.add("Ity_I1") insertions.append((i, stmt.WrTmp(cmp_tmp, expr.Binop("Iop_CmpEQ%d" % arg_size, cmp_args)))) insertions.append( ( i, stmt.Exit( expr.RdTmp.get_instance(cmp_tmp), const.vex_int_class(self.irsb.arch.bits)(last_ip), "Ijk_SigFPE_IntDiv", self.irsb.offsIP, ), ) ) for i, s in reversed(insertions): self.irsb.statements.insert(i, s) ================================================ FILE: pyvex/native.py ================================================ import getpass import hashlib import importlib.resources import os import pickle import sys import tempfile from typing import Any import cffi from .vex_ffi import ffi_str as _ffi_str ffi = cffi.FFI() def _parse_ffi_str(): hash_ = hashlib.md5(_ffi_str.encode("utf-8")).hexdigest() cache_location = os.path.join(tempfile.gettempdir(), f"pyvex_ffi_parser_cache.{getpass.getuser()}.{hash_}") if os.path.isfile(cache_location): # load the cache with open(cache_location, "rb") as f: cache = pickle.loads(f.read()) ffi._parser._declarations = cache["_declarations"] ffi._parser._int_constants = cache["_int_constants"] else: ffi.cdef(_ffi_str) # cache the result cache = { "_declarations": ffi._parser._declarations, "_int_constants": ffi._parser._int_constants, } # atomically write cache with tempfile.NamedTemporaryFile(delete=False) as temp_file: temp_file.write(pickle.dumps(cache)) temp_file_name = temp_file.name os.replace(temp_file_name, cache_location) def _find_c_lib(): # Load the c library for calling into VEX if sys.platform in ("win32", "cygwin"): library_file = "pyvex.dll" elif sys.platform == "darwin": library_file = "libpyvex.dylib" else: library_file = "libpyvex.so" pyvex_path = str(importlib.resources.files("pyvex") / "lib" / library_file) # parse _ffi_str and use cache if possible _parse_ffi_str() # RTLD_GLOBAL used for sim_unicorn.so lib = ffi.dlopen(pyvex_path) if not lib.vex_init(): raise ImportError("libvex failed to initialize") # this looks up all the definitions (wtf) dir(lib) return lib pvc: Any = _find_c_lib() # This should be properly typed, but this seems non trivial ================================================ FILE: pyvex/py.typed ================================================ partial ================================================ FILE: pyvex/stmt.py ================================================ from __future__ import annotations import logging from collections.abc import Iterator from typing import TYPE_CHECKING from . import expr from .const import IRConst from .enums import IRCallee, IRRegArray, VEXObject, get_enum_from_int, get_int_from_enum from .errors import PyVEXError from .expr import Const, Get, IRExpr from .native import ffi, pvc if TYPE_CHECKING: from .block import IRTypeEnv log = logging.getLogger("pyvex.stmt") class IRStmt(VEXObject): """ IR statements in VEX represents operations with side-effects. """ tag: str tag_int = 0 # set automatically at bottom of file __slots__ = [] def pp(self): print(str(self)) @property def child_expressions(self) -> Iterator[IRExpr]: for k in self.__slots__: v = getattr(self, k) if isinstance(v, IRExpr): # return itself yield v # return all the child expressions yield from v.child_expressions # ??? @property def expressions(self): return self.child_expressions @property def constants(self): return sum((e.constants for e in self.expressions), []) @staticmethod def _from_c(c_stmt): if c_stmt[0] == ffi.NULL: return None try: stmt_class = enum_to_stmt_class(c_stmt.tag) except KeyError: raise PyVEXError("Unknown/unsupported IRStmtTag %s.\n" % get_enum_from_int(c_stmt.tag)) return stmt_class._from_c(c_stmt) def typecheck(self, tyenv: IRTypeEnv) -> bool: # pylint: disable=unused-argument,no-self-use return True def replace_expression(self, replacements): """ Replace child expressions in-place. :param Dict[IRExpr, IRExpr] replacements: A mapping from expression-to-find to expression-to-replace-with :return: None """ for k in self.__slots__: v = getattr(self, k) if isinstance(v, IRExpr) and v in replacements: setattr(self, k, replacements.get(v)) elif isinstance(v, IRExpr): v.replace_expression(replacements) elif type(v) is tuple: # Rebuild the tuple _lst = [] replaced = False for expr_ in v: if isinstance(expr_, IRExpr) and expr_ in replacements: _lst.append(replacements.get(expr_)) replaced = True else: _lst.append(expr_) if replaced: setattr(self, k, tuple(_lst)) def __str__(self): return self.pp_str(None, None, None) def pp_str(self, reg_name=None, arch=None, tyenv=None) -> str: raise NotImplementedError() class NoOp(IRStmt): """ A no-operation statement. It is usually the result of an IR optimization. """ __slots__ = [] tag = "Ist_NoOp" def pp_str(self, reg_name=None, arch=None, tyenv=None): return "IR-NoOp" @staticmethod def _from_c(c_stmt): return NoOp() class IMark(IRStmt): """ An instruction mark. It marks the start of the statements that represent a single machine instruction (the end of those statements is marked by the next IMark or the end of the IRSB). Contains the address and length of the instruction. """ __slots__ = ["addr", "len", "delta"] tag = "Ist_IMark" def __init__(self, addr: int, length: int, delta: int): self.addr = addr self.len = length self.delta = delta def pp_str(self, reg_name=None, arch=None, tyenv=None): return "------ IMark(0x%x, %d, %d) ------" % (self.addr, self.len, self.delta) @staticmethod def _from_c(c_stmt): return IMark(c_stmt.Ist.IMark.addr, c_stmt.Ist.IMark.len, c_stmt.Ist.IMark.delta) class AbiHint(IRStmt): """ An ABI hint, provides specific information about this platform's ABI. """ __slots__ = ["base", "len", "nia"] tag = "Ist_AbiHint" def __init__(self, base, length, nia): self.base = base self.len = length self.nia = nia def pp_str(self, reg_name=None, arch=None, tyenv=None): return "====== AbiHint(0x%s, %d, %s) ======" % (self.base, self.len, self.nia) @staticmethod def _from_c(c_stmt): return AbiHint( IRExpr._from_c(c_stmt.Ist.AbiHint.base), c_stmt.Ist.AbiHint.len, IRExpr._from_c(c_stmt.Ist.AbiHint.nia) ) class Put(IRStmt): """ Write to a guest register, at a fixed offset in the guest state. """ __slots__ = ["data", "offset"] tag = "Ist_Put" def __init__(self, data: IRExpr, offset: int): self.data = data self.offset = offset ## TODO: Check if result_size and arch are available before looking of arch register name def pp_str(self, reg_name=None, arch=None, tyenv=None): if arch is not None and tyenv is not None: reg_name = arch.translate_register_name(self.offset, self.data.result_size(tyenv) // 8) if reg_name is not None: return f"PUT({reg_name}) = {self.data}" else: return f"PUT(offset={self.offset}) = {self.data}" @staticmethod def _from_c(c_stmt): return Put(IRExpr._from_c(c_stmt.Ist.Put.data), c_stmt.Ist.Put.offset) def typecheck(self, tyenv): return self.data.typecheck(tyenv) class PutI(IRStmt): """ Write to a guest register, at a non-fixed offset in the guest state. """ __slots__ = ["descr", "ix", "data", "bias"] tag = "Ist_PutI" def __init__(self, descr, ix, data, bias): self.descr = descr self.ix = ix self.data = data self.bias = bias def pp_str(self, reg_name=None, arch=None, tyenv=None): return "PutI(%s)[%s,%d] = %s" % (self.descr, self.ix, self.bias, self.data) @staticmethod def _from_c(c_stmt): return PutI( IRRegArray._from_c(c_stmt.Ist.PutI.details.descr), IRExpr._from_c(c_stmt.Ist.PutI.details.ix), IRExpr._from_c(c_stmt.Ist.PutI.details.data), c_stmt.Ist.PutI.details.bias, ) def typecheck(self, tyenv): dataty = self.data.typecheck(tyenv) if dataty is None: return False if dataty != self.descr.elemTy: log.debug("Expression doesn't match RegArray type") return False return True class WrTmp(IRStmt): """ Assign a value to a temporary. Note that SSA rules require each tmp is only assigned to once. IR sanity checking will reject any block containing a temporary which is not assigned to exactly once. """ __slots__ = ["data", "tmp"] tag = "Ist_WrTmp" def __init__(self, tmp, data: IRExpr): self.tmp = tmp self.data = data def pp_str(self, reg_name=None, arch=None, tyenv=None): # Support for named register in string representation of expr.Get if arch is not None and tyenv is not None and isinstance(self.data, Get): reg_name = arch.translate_register_name(self.data.offset, self.data.result_size(tyenv) // 8) if reg_name is not None and isinstance(self.data, expr.Get): return "t%d = %s" % (self.tmp, self.data.pp_str_with_name(reg_name)) else: return "t%d = %s" % (self.tmp, self.data) @staticmethod def _from_c(c_stmt): return WrTmp(c_stmt.Ist.WrTmp.tmp, IRExpr._from_c(c_stmt.Ist.WrTmp.data)) def typecheck(self, tyenv): dataty = self.data.typecheck(tyenv) if dataty is None: return False if dataty != tyenv.lookup(self.tmp): log.debug("Expression doesn't match tmp type") return False return True class Store(IRStmt): """ Write a value to memory.. """ __slots__ = ["addr", "data", "end"] tag = "Ist_Store" def __init__(self, addr: IRExpr, data: IRExpr, end: str): self.addr = addr self.data = data self.end = end @property def endness(self): return self.end def pp_str(self, reg_name=None, arch=None, tyenv=None): return f"ST{self.endness[-2:].lower()}({self.addr}) = {self.data}" @staticmethod def _from_c(c_stmt): return Store( IRExpr._from_c(c_stmt.Ist.Store.addr), IRExpr._from_c(c_stmt.Ist.Store.data), get_enum_from_int(c_stmt.Ist.Store.end), ) def typecheck(self, tyenv): dataty = self.data.typecheck(tyenv) if dataty is None: return False addrty = self.addr.typecheck(tyenv) if addrty is None: return False if addrty != tyenv.wordty: log.debug("addr must be full word for arch") return False if self.end not in ("Iend_LE", "Iend_BE"): log.debug("invalid endness enum") return False return True class CAS(IRStmt): """ an atomic compare-and-swap operation. """ __slots__ = ["addr", "dataLo", "dataHi", "expdLo", "expdHi", "oldLo", "oldHi", "end"] tag = "Ist_CAS" def __init__(self, addr, dataLo, dataHi, expdLo, expdHi, oldLo, oldHi, end): self.addr = addr self.dataLo = dataLo self.dataHi = dataHi self.expdLo = expdLo self.expdHi = expdHi self.oldLo = oldLo self.oldHi = oldHi self.end = end @property def endness(self): return self.end def pp_str(self, reg_name=None, arch=None, tyenv=None): return "t({},{}) = CAS{}({} :: ({},{})->({},{}))".format( self.oldLo, self.oldHi, self.end[-2:].lower(), self.addr, self.expdLo, self.expdHi, self.dataLo, self.dataHi ) @staticmethod def _from_c(c_stmt): return CAS( IRExpr._from_c(c_stmt.Ist.CAS.details.addr), IRExpr._from_c(c_stmt.Ist.CAS.details.dataLo), IRExpr._from_c(c_stmt.Ist.CAS.details.dataHi), IRExpr._from_c(c_stmt.Ist.CAS.details.expdLo), IRExpr._from_c(c_stmt.Ist.CAS.details.expdHi), c_stmt.Ist.CAS.details.oldLo, c_stmt.Ist.CAS.details.oldHi, get_enum_from_int(c_stmt.Ist.CAS.details.end), ) def typecheck(self, tyenv): addrty = self.addr.typecheck(tyenv) if addrty is None: return False if addrty != tyenv.wordty: log.debug("addr must be full word for arch") return False if self.end not in ("Iend_LE", "Iend_BE"): log.debug("invalid endness enum") return False if self.oldHi == 0xFFFFFFFF: # single-element case if self.expdHi is not None or self.dataHi is not None: log.debug("expdHi and dataHi must be None") return False expdLoTy = self.expdLo.typecheck(tyenv) dataLoTy = self.dataLo.typecheck(tyenv) if expdLoTy is None or dataLoTy is None: return False if tyenv.lookup(self.oldLo) != expdLoTy or expdLoTy != dataLoTy: log.debug("oldLo, expdL, dataLo must all have the same type") return False else: # double-element case expdLoTy = self.expdLo.typecheck(tyenv) dataLoTy = self.dataLo.typecheck(tyenv) expdHiTy = self.expdHi.typecheck(tyenv) dataHiTy = self.dataHi.typecheck(tyenv) if expdLoTy is None or dataLoTy is None or expdHiTy is None or dataHiTy is None: return False if ( tyenv.lookup(self.oldLo) != expdLoTy or expdLoTy != dataLoTy or tyenv.lookup(self.oldHi) != expdHiTy or expdHiTy != dataHiTy or expdLoTy != expdHiTy ): log.debug("oldLo, expdLo, dataLo, oldHi, expdHi, dataHi must all have the same type") return False return True class LLSC(IRStmt): """ Either Load-Linked or Store-Conditional, depending on STOREDATA. If STOREDATA is NULL then this is a Load-Linked, else it is a Store-Conditional. """ __slots__ = ["addr", "storedata", "result", "end"] tag = "Ist_LLSC" def __init__(self, addr: IRExpr, storedata: IRExpr, result: int, end: str): self.addr = addr self.storedata = storedata self.result = result self.end = end @property def endness(self): return self.end def pp_str(self, reg_name=None, arch=None, tyenv=None): if self.storedata is None: return "t%d = LD%s-Linked(%s)" % (self.result, self.end[-2:].lower(), self.addr) else: return "t%d = ( ST%s-Cond(%s) = %s )" % (self.result, self.end[-2:].lower(), self.addr, self.storedata) @staticmethod def _from_c(c_stmt): return LLSC( IRExpr._from_c(c_stmt.Ist.LLSC.addr), IRExpr._from_c(c_stmt.Ist.LLSC.storedata), c_stmt.Ist.LLSC.result, get_enum_from_int(c_stmt.Ist.LLSC.end), ) def typecheck(self, tyenv): addrty = self.addr.typecheck(tyenv) if addrty is None: return False if addrty != tyenv.wordty: log.debug("addr must be full word for arch") return False if self.end not in ("Iend_LE", "Iend_BE"): log.debug("invalid endness enum") return False if self.storedata is not None: # load-linked storety = self.storedata.typecheck(tyenv) if storety is None: return False if tyenv.lookup(self.result) != "Ity_I1": log.debug("result tmp must be Ity_I1") return False return True class MBE(IRStmt): __slots__ = ["event"] tag = "Ist_MBE" def __init__(self, event): self.event = event def pp_str(self, reg_name=None, arch=None, tyenv=None): return "MBusEvent-" + self.event @staticmethod def _from_c(c_stmt): return MBE(get_enum_from_int(c_stmt.Ist.MBE.event)) class Dirty(IRStmt): __slots__ = ["cee", "guard", "args", "tmp", "mFx", "mAddr", "mSize", "nFxState"] tag = "Ist_Dirty" def __init__(self, cee, guard, args, tmp, mFx, mAddr, mSize, nFxState): self.cee = cee self.guard = guard self.args = tuple(args) self.tmp = tmp self.mFx = mFx self.mAddr = mAddr self.mSize = mSize self.nFxState = nFxState def pp_str(self, reg_name=None, arch=None, tyenv=None): return "t{} = DIRTY {} {} ::: {}({})".format( self.tmp, self.guard, "TODO(effects)", self.cee, ",".join(str(a) for a in self.args) ) @property def child_expressions(self): expressions = sum((a.child_expressions for a in self.args), []) expressions.extend(self.args) expressions.append(self.guard) expressions.extend(self.guard.child_expressions) return expressions @staticmethod def _from_c(c_stmt): args = [] for i in range(20): a = c_stmt.Ist.Dirty.details.args[i] if a == ffi.NULL: break args.append(IRExpr._from_c(a)) return Dirty( IRCallee._from_c(c_stmt.Ist.Dirty.details.cee), IRExpr._from_c(c_stmt.Ist.Dirty.details.guard), tuple(args), c_stmt.Ist.Dirty.details.tmp, get_enum_from_int(c_stmt.Ist.Dirty.details.mFx), IRExpr._from_c(c_stmt.Ist.Dirty.details.mAddr), c_stmt.Ist.Dirty.details.mSize, c_stmt.Ist.Dirty.details.nFxState, ) class Exit(IRStmt): """ A conditional exit from the middle of an IRSB. """ __slots__ = ["guard", "dst", "offsIP", "jk"] tag = "Ist_Exit" def __init__(self, guard: IRExpr, dst: IRConst, jk: str, offsIP: int): self.guard = guard self.dst = dst self.offsIP = offsIP self.jk = jk @property def jumpkind(self): return self.jk def pp_str(self, reg_name=None, arch=None, tyenv=None): if arch is not None and tyenv is not None: reg_name = arch.translate_register_name(self.offsIP, arch.bits // 8) if reg_name is None: return "if (%s) { PUT(offset=%d) = %#x; %s }" % (self.guard, self.offsIP, self.dst.value, self.jumpkind) else: return f"if ({self.guard}) {{ PUT({reg_name}) = {self.dst.value:#x}; {self.jumpkind} }}" @property def child_expressions(self): return [self.guard] + self.guard.child_expressions + [Const(self.dst)] @staticmethod def _from_c(c_stmt): return Exit( IRExpr._from_c(c_stmt.Ist.Exit.guard), IRConst._from_c(c_stmt.Ist.Exit.dst), get_enum_from_int(c_stmt.Ist.Exit.jk), c_stmt.Ist.Exit.offsIP, ) def typecheck(self, tyenv): if not self.jk.startswith("Ijk_"): log.debug("Jumpkind is not a jumpkind enum") return False guardty = self.guard.typecheck(tyenv) if guardty is None: return False if guardty != "Ity_I1": log.debug("guard must be Ity_I1") return False return True class LoadG(IRStmt): """ A guarded load. """ __slots__ = ["addr", "alt", "guard", "dst", "cvt", "end", "cvt_types"] tag = "Ist_LoadG" def __init__(self, end: str, cvt: str, dst: int, addr: IRExpr, alt: IRExpr, guard: IRExpr): self.addr = addr self.alt = alt self.guard = guard self.dst = dst self.cvt = cvt self.end = end type_in = ffi.new("IRType *") # TODO separate this from the pyvex C implementation type_out = ffi.new("IRType *") pvc.typeOfIRLoadGOp(get_int_from_enum(self.cvt), type_out, type_in) type_in = ffi.cast("int *", type_in)[0] type_out = ffi.cast("int *", type_out)[0] self.cvt_types = (get_enum_from_int(type_in), get_enum_from_int(type_out)) @property def endness(self): return self.end def pp_str(self, reg_name=None, arch=None, tyenv=None): return "t%d = if (%s) %s(LD%s(%s)) else %s" % ( self.dst, self.guard, self.cvt, self.end[-2:].lower(), self.addr, self.alt, ) @staticmethod def _from_c(c_stmt): return LoadG( get_enum_from_int(c_stmt.Ist.LoadG.details.end), get_enum_from_int(c_stmt.Ist.LoadG.details.cvt), c_stmt.Ist.LoadG.details.dst, IRExpr._from_c(c_stmt.Ist.LoadG.details.addr), IRExpr._from_c(c_stmt.Ist.LoadG.details.alt), IRExpr._from_c(c_stmt.Ist.LoadG.details.guard), ) def typecheck(self, tyenv): addrty = self.addr.typecheck(tyenv) if addrty is None: return False if addrty != tyenv.wordty: log.debug("addr must be full word for arch") return False if self.end not in ("Iend_LE", "Iend_BE"): log.debug("invalid endness enum") return False dstty = tyenv.lookup(self.dst) guardty = self.guard.typecheck(tyenv) altty = self.alt.typecheck(tyenv) if guardty is None or altty is None: return False if dstty != "Ity_I32" or altty != "Ity_I32": log.debug("dst and alt must be Ity_I32") return False if guardty != "Ity_I1": log.debug("guard must be Ity_I1") return False if not self.cvt.startswith("ILGop_"): log.debug("Invalid cvt enum") return False return True class StoreG(IRStmt): """ A guarded store. """ __slots__ = ["addr", "data", "guard", "end"] tag = "Ist_StoreG" def __init__(self, end, addr, data, guard): self.addr = addr self.data = data self.guard = guard self.end = end @property def endness(self): return self.end def pp_str(self, reg_name=None, arch=None, tyenv=None): return f"if ({self.guard}) ST{self.end[-2:].lower()}({self.addr}) = {self.data}" @staticmethod def _from_c(c_stmt): return StoreG( get_enum_from_int(c_stmt.Ist.StoreG.details.end), IRExpr._from_c(c_stmt.Ist.StoreG.details.addr), IRExpr._from_c(c_stmt.Ist.StoreG.details.data), IRExpr._from_c(c_stmt.Ist.StoreG.details.guard), ) def typecheck(self, tyenv): addrty = self.addr.typecheck(tyenv) if addrty is None: return False if addrty != tyenv.wordty: log.debug("addr must be full word for arch") return False if self.end not in ("Iend_LE", "Iend_BE"): log.debug("invalid endness enum") return False guardty = self.guard.typecheck(tyenv) dataty = self.data.typecheck(tyenv) if guardty is None or dataty is None: return False if guardty != "Ity_I1": log.debug("guard must be Ity_I1") return False return True _globals = globals().copy() # # Mapping from tag strings/enums to IRStmt classes # tag_to_stmt_mapping = {} enum_to_stmt_mapping = {} tag_count = 0 cls = None for cls in _globals.values(): if type(cls) is type and issubclass(cls, IRStmt) and cls is not IRStmt: tag_to_stmt_mapping[cls.tag] = cls enum_to_stmt_mapping[get_int_from_enum(cls.tag)] = cls cls.tag_int = tag_count tag_count += 1 del cls def tag_to_stmt_class(tag): try: return tag_to_stmt_mapping[tag] except KeyError: raise KeyError("No statement class for tag %s." % tag) def enum_to_stmt_class(tag_enum): try: return enum_to_stmt_mapping[tag_enum] except KeyError: raise KeyError("No statement class for tag %s." % get_enum_from_int(tag_enum)) ================================================ FILE: pyvex/types.py ================================================ from typing import TYPE_CHECKING, Any, Protocol, Union, runtime_checkable from cffi.api import FFI class Register(Protocol): """ A register. Pyvex should probably not have this dependency. """ name: str class Arch(Protocol): """ An architecture description. """ name: str ip_offset: int bits: int instruction_endness: str memory_endness: str byte_width: int register_list: list[Register] registers: dict[str, tuple[int, int]] def translate_register_name(self, offset: int, size: int | None = None) -> str | None: ... def get_register_offset(self, name: str) -> int: ... @runtime_checkable class LibvexArch(Protocol): """ The description for an architecture that is usable with libvex """ vex_arch: str vex_archinfo: dict[str, Any] PyLiftSource = Union[bytes, bytearray, memoryview] if TYPE_CHECKING: CLiftSource = FFI.CData else: CLiftSource = None LiftSource = Union[PyLiftSource, CLiftSource] ================================================ FILE: pyvex/utils.py ================================================ import struct from collections.abc import Callable from typing import Any try: import _md5 as md5lib except ImportError: import hashlib as md5lib md5_unpacker = struct.Struct("4I") def stable_hash(t: tuple) -> int: cnt = _dump_tuple(t) hd = md5lib.md5(cnt).digest() return md5_unpacker.unpack(hd)[0] # 32 bits def _dump_tuple(t: tuple) -> bytes: cnt = b"" for item in t: if item is not None: type_ = type(item) if type_ in _DUMP_BY_TYPE: cnt += _DUMP_BY_TYPE[type_](item) else: cnt += struct.pack(" bytes: return t.encode("ascii") def _dump_int(t: int) -> bytes: prefix = b"" if t >= 0 else b"-" t = abs(t) if t <= 0xFFFF: return prefix + struct.pack(" 0: cnt += _dump_int(t & 0xFFFF_FFFF_FFFF_FFFF) t >>= 64 return prefix + cnt def _dump_type(t: type) -> bytes: return t.__name__.encode("ascii") _DUMP_BY_TYPE: dict[type, Callable[[Any], bytes]] = { tuple: _dump_tuple, str: _dump_str, int: _dump_int, type: _dump_type, } ================================================ FILE: pyvex_c/LICENSE ================================================ GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Lesser General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. 10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) year name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. ================================================ FILE: pyvex_c/README ================================================ To generate the list of exports for windows: grep -E -o -h -r "pvc\.[a-zA-Z0-9_]+" | cut -c 5- | sort -u Then remove Ity_I8 and add vex_lift (called from __init__ where we can't use the name pvc to refernce it) and sizeofIRType (called from... the unicorn compatibility layer I think?) ================================================ FILE: pyvex_c/analysis.c ================================================ #include #include #include #include #include #include #include #include "pyvex.h" const int _endian = 0xfe; #define BE_HOST (*((unsigned char*)&_endian) == 0) #define LE_HOST (*((unsigned char*)&_endian) != 0) void remove_noops( IRSB* irsb ) { Int noops = 0, i; Int pos = 0; for (i = 0; i < irsb->stmts_used; ++i) { if (irsb->stmts[i]->tag != Ist_NoOp) { if (i != pos) { irsb->stmts[pos] = irsb->stmts[i]; } pos++; } else { noops++; } } irsb->stmts_used -= noops; } void get_exits_and_inst_addrs( IRSB *irsb, VEXLiftResult *lift_r) { Int i, exit_ctr = 0, inst_count = 0; Addr ins_addr = -1; UInt size = 0; for (i = 0; i < irsb->stmts_used; ++i) { IRStmt* stmt = irsb->stmts[i]; if (stmt->tag == Ist_Exit) { assert(ins_addr != -1); if (exit_ctr < MAX_EXITS) { lift_r->exits[exit_ctr].ins_addr = ins_addr; lift_r->exits[exit_ctr].stmt_idx = i; lift_r->exits[exit_ctr].stmt = stmt; } exit_ctr += 1; } else if (stmt->tag == Ist_IMark) { ins_addr = stmt->Ist.IMark.addr + stmt->Ist.IMark.delta; size += stmt->Ist.IMark.len; if (inst_count < sizeof(lift_r->inst_addrs) / sizeof(Addr)) { lift_r->inst_addrs[inst_count] = ins_addr; } // inst_count is incremented anyway. If lift_r->insts > 200, the overflowed // instruction addresses will not be written into inst_addrs. inst_count++; } } lift_r->exit_count = exit_ctr; lift_r->size = size; lift_r->insts = inst_count; } void get_default_exit_target( IRSB *irsb, VEXLiftResult *lift_r ) { IRTemp tmp; Int reg = -1; IRType reg_type = Ity_INVALID; Int i; lift_r->is_default_exit_constant = 0; if (irsb->jumpkind != Ijk_InvalICache && irsb->jumpkind != Ijk_Boring && irsb->jumpkind != Ijk_Call) { return; } if (irsb->next->tag == Iex_Const) { IRConst *con = irsb->next->Iex.Const.con; switch (con->tag) { case Ico_U16: lift_r->is_default_exit_constant = 1; lift_r->default_exit = con->Ico.U16; break; case Ico_U32: lift_r->is_default_exit_constant = 1; lift_r->default_exit = con->Ico.U32; break; case Ico_U64: lift_r->is_default_exit_constant = 1; lift_r->default_exit = con->Ico.U64; break; default: // A weird address... we don't support it. break; } return; } if (irsb->next->tag != Iex_RdTmp) { // Unexpected irsb->next type return; } // Scan statements backwards to find the assigning statement tmp = irsb->next->Iex.RdTmp.tmp; for (i = irsb->stmts_used - 1; i >= 0; --i) { IRExpr *data = NULL; IRStmt *stmt = irsb->stmts[i]; if (stmt->tag == Ist_WrTmp && stmt->Ist.WrTmp.tmp == tmp) { data = stmt->Ist.WrTmp.data; } else if (stmt->tag == Ist_Put && stmt->Ist.Put.offset == reg) { IRType put_type = typeOfIRExpr(irsb->tyenv, stmt->Ist.Put.data); if (put_type != reg_type) { // The size does not match. Give up. return; } data = stmt->Ist.Put.data; } else if (stmt->tag == Ist_LoadG) { // We do not handle LoadG. Give up. return; } else { continue; } if (data->tag == Iex_Const) { lift_r->is_default_exit_constant = 1; IRConst *con = data->Iex.Const.con; switch (con->tag) { case Ico_U16: lift_r->is_default_exit_constant = 1; lift_r->default_exit = con->Ico.U16; break; case Ico_U32: lift_r->is_default_exit_constant = 1; lift_r->default_exit = con->Ico.U32; break; case Ico_U64: lift_r->is_default_exit_constant = 1; lift_r->default_exit = con->Ico.U64; break; default: // A weird address... we don't support it. break; } return; } else if (data->tag == Iex_RdTmp) { // Reading another temp variable tmp = data->Iex.RdTmp.tmp; reg = -1; } else if (data->tag == Iex_Get) { // Reading from a register tmp = IRTemp_INVALID; reg = data->Iex.Get.offset; reg_type = typeOfIRExpr(irsb->tyenv, data); } else { // Something we don't currently support return; } } // We cannot resolve it to a constant value. return; } Addr get_value_from_const_expr( IRConst* con) { switch (con->tag) { case Ico_U8: return con->Ico.U8; case Ico_U16: return con->Ico.U16; case Ico_U32: return con->Ico.U32; case Ico_U64: return con->Ico.U64; default: // A weird address... return 0; } } // // Collect data references // /* General map. Shamelessly stolen from ir_opt.c in libVEX */ typedef struct { Bool* inuse; HWord* key; HWord* val; Int size; Int used; } HashHW; static HashHW* newHHW() { HashHW* h = malloc(sizeof(HashHW)); h->size = 8; h->used = 0; h->inuse = (Bool*)malloc(h->size * sizeof(Bool)); h->key = (HWord*)malloc(h->size * sizeof(HWord)); h->val = (HWord*)malloc(h->size * sizeof(HWord)); return h; } static void freeHHW(HashHW* h) { free(h->inuse); free(h->key); free(h->val); free(h); } /* Look up key in the map. */ static Bool lookupHHW(HashHW* h, /*OUT*/HWord* val, HWord key) { Int i; for (i = 0; i < h->used; i++) { if (h->inuse[i] && h->key[i] == key) { if (val) *val = h->val[i]; return True; } } return False; } /* Add key->val to the map. Replaces any existing binding for key. */ static void addToHHW(HashHW* h, HWord key, HWord val) { Int i, j; /* Find and replace existing binding, if any. */ for (i = 0; i < h->used; i++) { if (h->inuse[i] && h->key[i] == key) { h->val[i] = val; return; } } /* Ensure a space is available. */ if (h->used == h->size) { /* Copy into arrays twice the size. */ Bool* inuse2 = malloc(2 * h->size * sizeof(Bool)); HWord* key2 = malloc(2 * h->size * sizeof(HWord)); HWord* val2 = malloc(2 * h->size * sizeof(HWord)); for (i = j = 0; i < h->size; i++) { if (!h->inuse[i]) continue; inuse2[j] = True; key2[j] = h->key[i]; val2[j] = h->val[i]; j++; } h->used = j; h->size *= 2; free(h->inuse); h->inuse = inuse2; free(h->key); h->key = key2; free(h->val); h->val = val2; } /* Finally, add it. */ h->inuse[h->used] = True; h->key[h->used] = key; h->val[h->used] = val; h->used++; } /* Remove key from the map. */ static void removeFromHHW(HashHW* h, HWord key) { Int i, j; /* Find and replace existing binding, if any. */ for (i = 0; i < h->used; i++) { if (h->inuse[i] && h->key[i] == key) { h->inuse[i] = False; return; } } } /* Create keys, of the form ((minoffset << 16) | maxoffset). */ static UInt mk_key_GetPut ( Int offset, IRType ty ) { /* offset should fit in 16 bits. */ UInt minoff = offset; UInt maxoff = minoff + sizeofIRType(ty) - 1; return (minoff << 16) | maxoff; } void record_data_reference( VEXLiftResult *lift_r, Addr data_addr, Int size, DataRefTypes data_type, Int stmt_idx, Addr inst_addr) { if (lift_r->data_ref_count < MAX_DATA_REFS) { Int idx = lift_r->data_ref_count; lift_r->data_refs[idx].size = size; lift_r->data_refs[idx].data_addr = data_addr; lift_r->data_refs[idx].data_type = data_type; lift_r->data_refs[idx].stmt_idx = stmt_idx; lift_r->data_refs[idx].ins_addr = inst_addr; lift_r->data_ref_count++; } } Addr get_const_and_record( VEXLiftResult *lift_r, IRExpr *const_expr, Int size, DataRefTypes data_type, Int stmt_idx, Addr inst_addr, Addr next_inst_addr, Bool record) { if (const_expr->tag != Iex_Const) { // Why are you calling me? assert (const_expr->tag == Iex_Const); return -1; } Addr addr = get_value_from_const_expr(const_expr->Iex.Const.con); if (addr != next_inst_addr) { if (record) { record_data_reference(lift_r, addr, size, data_type, stmt_idx, inst_addr); } return addr; } return -1; } void record_tmp_value( VEXLiftResult *lift_r, Int tmp, ULong value, Int stmt_idx ) { if (lift_r->const_val_count < MAX_CONST_VALS) { Int idx = lift_r->const_val_count; lift_r->const_vals[idx].tmp = tmp; lift_r->const_vals[idx].value = value; lift_r->const_vals[idx].stmt_idx = stmt_idx; lift_r->const_val_count++; } } typedef struct { int used; ULong value; } TmpValue; typedef struct { Bool in_use; ULong start; ULong size; unsigned char* content; } Region; int next_unused_region_id = 0; #define MAX_REGION_COUNT 1024 Region regions[MAX_REGION_COUNT] = {0}; static int find_region(ULong start) { if (next_unused_region_id > 0 && regions[next_unused_region_id - 1].start < start) { if (next_unused_region_id >= MAX_REGION_COUNT) { return -1; } return next_unused_region_id - 1; } int lo = 0, hi = next_unused_region_id, mid; while (lo != hi) { mid = (lo + hi) / 2; Region* region = ®ions[mid]; if (region->start >= start) { hi = mid; } else { lo = mid + 1; } } return lo; } Bool register_readonly_region(ULong start, ULong size, unsigned char* content) { // Where do we insert the region? if (next_unused_region_id >= MAX_REGION_COUNT) { // Regions are full return False; } int pos = find_region(start); if (pos < 0) { // Regions are full return False; } if (!regions[pos].in_use) { // it's likely to be the end - store here regions[pos].in_use = True; regions[pos].start = start; regions[pos].size = size; regions[pos].content = content; next_unused_region_id++; return True; } if (regions[pos].start == start) { // overwrite the current region with new data regions[pos].in_use = True; regions[pos].start = start; regions[pos].size = size; regions[pos].content = content; return True; } // Move everything forward by one slot memmove(®ions[pos + 1], ®ions[pos], sizeof(Region) * (next_unused_region_id - pos)); // Insert the new region regions[pos].in_use = True; regions[pos].start = start; regions[pos].size = size; regions[pos].content = content; next_unused_region_id++; return True; } void deregister_all_readonly_regions() { next_unused_region_id = 0; regions[next_unused_region_id].in_use = 0; } Bool load_value(ULong addr, int size, int endness, void *value) { int pos = find_region(addr); if (pos < 0 || pos >= next_unused_region_id) { // Does not exist return False; } unsigned char* ptr = NULL; if (regions[pos].in_use && regions[pos].start <= addr && regions[pos].start <= addr + size && regions[pos].start + regions[pos].size >= addr + size) { ptr = regions[pos].content + (addr - regions[pos].start); } else if (pos > 0 && regions[pos - 1].in_use && regions[pos - 1].start <= addr && regions[pos - 1].start <= addr + size && regions[pos - 1].start + regions[pos - 1].size >= addr + size) { ptr = regions[pos - 1].content + (addr - regions[pos - 1].start); } else { return False; } // Do the load! if ((endness == Iend_LE && LE_HOST) || (endness == Iend_BE && BE_HOST)) { switch (size) { case 1: *(UChar*)value = *(UChar*)ptr; break; case 2: *(UShort*)value = *(UShort*)ptr; break; case 4: *(UInt*)value = *(UInt*)ptr; break; case 8: *(ULong*)value = *(ULong*)ptr; break; default: { UChar* begin = (UChar*)value; for (int n = 0; n < size; ++n) { *(begin + n) = *(ptr + n); } } break; } } else { // we need to swap data... UChar* begin = (UChar*)value; for (int n = 0; n < size; ++n) { *(begin + size - n - 1) = *(ptr + n); } } return True; } #undef MAX_REGION_COUNT typedef struct _InitialReg { ULong offset; UInt size; ULong value; } InitialReg; UInt initial_reg_count = 0; InitialReg initial_regs[1024]; Bool register_initial_register_value(UInt offset, UInt size, ULong value) { if (initial_reg_count >= 1024) { return False; } switch (size) { case 1: case 2: case 4: case 8: case 16: break; default: return False; } UInt i = initial_reg_count; initial_regs[i].offset = offset; initial_regs[i].size = size; initial_regs[i].value = value; initial_reg_count++; return True; } Bool reset_initial_register_values() { initial_reg_count = 0; return True; } void execute_irsb( IRSB *irsb, VEXLiftResult *lift_r, VexArch guest, Bool load_from_ro_regions, Bool collect_data_refs, Bool const_prop ) { Int i; Addr inst_addr = -1, next_inst_addr = -1; HashHW* env = newHHW(); TmpValue *tmps = NULL; TmpValue tmp_backingstore[1024]; // Record the last legitimate constant value. We do not record RdTmp or BinOp results // if they are the same as the last constant. UInt last_const_value = 0; if (irsb->tyenv->types_used > 1024) { tmps = malloc(irsb->tyenv->types_used * sizeof(TmpValue)); } else { tmps = tmp_backingstore; // Use the local backing store to save a malloc } memset(tmps, 0, irsb->tyenv->types_used * sizeof(TmpValue)); // Set initial register values for (i = 0; i < initial_reg_count; ++i) { IRType ty; switch (initial_regs[i].size) { case 1: ty = Ity_I8; break; case 2: ty = Ity_I16; break; case 4: ty = Ity_I32; break; case 8: ty = Ity_I64; break; case 16: ty = Ity_I128; break; default: continue; } UInt key = mk_key_GetPut(initial_regs[i].offset, ty); addToHHW(env, key, initial_regs[i].value); } for (i = 0; i < irsb->stmts_used; ++i) { IRStmt *stmt = irsb->stmts[i]; switch (stmt->tag) { case Ist_IMark: inst_addr = stmt->Ist.IMark.addr + stmt->Ist.IMark.delta; next_inst_addr = inst_addr + stmt->Ist.IMark.len; break; case Ist_WrTmp: assert(inst_addr != -1 && next_inst_addr != -1); { IRExpr *data = stmt->Ist.WrTmp.data; switch (data->tag) { case Iex_Load: // load // e.g. t7 = LDle:I64(0x0000000000600ff8) if (data->Iex.Load.addr->tag == Iex_Const) { Int size; size = sizeofIRType(typeOfIRTemp(irsb->tyenv, stmt->Ist.WrTmp.tmp)); Addr v = get_const_and_record(lift_r, data->Iex.Load.addr, size, Dt_Integer, i, inst_addr, next_inst_addr, collect_data_refs); if (v != -1 && v != next_inst_addr) { last_const_value = v; } // Load the value if it might be a constant pointer... if (load_from_ro_regions) { UInt value = 0; if (load_value(data->Iex.Load.addr->Iex.Const.con->Ico.U32, size, data->Iex.Load.end, &value)) { tmps[stmt->Ist.WrTmp.tmp].used = 1; tmps[stmt->Ist.WrTmp.tmp].value = value; if (const_prop) { record_tmp_value(lift_r, stmt->Ist.WrTmp.tmp, value, i); } } } } else if (data->Iex.Load.addr->tag == Iex_RdTmp) { IRTemp rdtmp = data->Iex.Load.addr->Iex.RdTmp.tmp; if (tmps[rdtmp].used == 1) { // The source tmp exists Int size; size = sizeofIRType(typeOfIRTemp(irsb->tyenv, stmt->Ist.WrTmp.tmp)); if (tmps[rdtmp].value != last_const_value) { if (collect_data_refs) { record_data_reference(lift_r, tmps[rdtmp].value, size, Dt_Integer, i, inst_addr); } } if (load_from_ro_regions) if (guest == VexArchARM && size == 4 || guest == VexArchMIPS32 && size == 4 || guest == VexArchMIPS64 && size == 8) { ULong value = 0; if (load_value(tmps[rdtmp].value, size, data->Iex.Load.end, &value)) { tmps[stmt->Ist.WrTmp.tmp].used = 1; tmps[stmt->Ist.WrTmp.tmp].value = value; if (const_prop) { record_tmp_value(lift_r, stmt->Ist.WrTmp.tmp, value, i); } } } } } break; case Iex_Binop: if (data->Iex.Binop.op == Iop_Add32 || data->Iex.Binop.op == Iop_Add64) { IRExpr *arg1 = data->Iex.Binop.arg1, *arg2 = data->Iex.Binop.arg2; if (arg1->tag == Iex_Const && arg2->tag == Iex_Const) { // ip-related addressing Addr addr = get_value_from_const_expr(arg1->Iex.Const.con) + get_value_from_const_expr(arg2->Iex.Const.con); if (data->Iex.Binop.op == Iop_Add32) { addr &= 0xffffffff; } if (addr != next_inst_addr) { if (addr != last_const_value) { if (collect_data_refs) { record_data_reference(lift_r, addr, 0, Dt_Unknown, i, inst_addr); } } } if (const_prop) { record_tmp_value(lift_r, stmt->Ist.WrTmp.tmp, addr, i); } } else { // Do the calculation if (arg1->tag == Iex_RdTmp && tmps[arg1->Iex.RdTmp.tmp].used && arg2->tag == Iex_Const) { ULong arg1_value = tmps[arg1->Iex.RdTmp.tmp].value; ULong arg2_value = get_value_from_const_expr(arg2->Iex.Const.con); ULong value = arg1_value + arg2_value; if (data->Iex.Binop.op == Iop_Add32) { value &= 0xffffffff; } if (value != last_const_value) { if (collect_data_refs) { record_data_reference(lift_r, value, 0, Dt_Unknown, i, inst_addr); } } tmps[stmt->Ist.WrTmp.tmp].used = 1; tmps[stmt->Ist.WrTmp.tmp].value = value; if (const_prop) { record_tmp_value(lift_r, stmt->Ist.WrTmp.tmp, value, i); } } if (arg1->tag == Iex_Const && arg2->tag == Iex_RdTmp && tmps[arg2->Iex.RdTmp.tmp].used) { ULong arg1_value = get_value_from_const_expr(arg1->Iex.Const.con); ULong arg2_value = tmps[arg2->Iex.RdTmp.tmp].value; ULong value = arg1_value + arg2_value; if (data->Iex.Binop.op == Iop_Add32) { value &= 0xffffffff; } if (value != last_const_value) { if (collect_data_refs) { record_data_reference(lift_r, value, 0, Dt_Unknown, i, inst_addr); } } tmps[stmt->Ist.WrTmp.tmp].used = 1; tmps[stmt->Ist.WrTmp.tmp].value = value; if (const_prop) { record_tmp_value(lift_r, stmt->Ist.WrTmp.tmp, value, i); } } if (arg2->tag == Iex_Const) { ULong arg2_value = get_value_from_const_expr(arg2->Iex.Const.con); if (arg2_value != last_const_value) { if (collect_data_refs) { record_data_reference(lift_r, arg2_value, 0, Dt_Unknown, i, inst_addr); } } } if (arg1->tag == Iex_RdTmp && tmps[arg1->Iex.RdTmp.tmp].used && arg2->tag == Iex_RdTmp && tmps[arg2->Iex.RdTmp.tmp].used) { ULong arg1_value = tmps[arg1->Iex.RdTmp.tmp].value; ULong arg2_value = tmps[arg2->Iex.RdTmp.tmp].value; ULong value = arg1_value + arg2_value; if (data->Iex.Binop.op == Iop_Add32) { value &= 0xffffffff; } tmps[stmt->Ist.WrTmp.tmp].used = 1; tmps[stmt->Ist.WrTmp.tmp].value = value; if (const_prop) { record_tmp_value(lift_r, stmt->Ist.WrTmp.tmp, value, i); } } } } else { // Normal binary operations if (data->Iex.Binop.arg1->tag == Iex_Const) { Addr v = get_const_and_record(lift_r, data->Iex.Binop.arg1, 0, Dt_Unknown, i, inst_addr, next_inst_addr, collect_data_refs); if (v != -1 && v != next_inst_addr) { last_const_value = v; } } if (data->Iex.Binop.arg2->tag == Iex_Const) { Addr v = get_const_and_record(lift_r, data->Iex.Binop.arg2, 0, Dt_Unknown, i, inst_addr, next_inst_addr, collect_data_refs); if (v != -1 && v != next_inst_addr) { last_const_value = v; } } } break; case Iex_Const: { Addr v = get_const_and_record(lift_r, data, 0, Dt_Unknown, i, inst_addr, next_inst_addr, collect_data_refs); if (v != -1 && v != next_inst_addr) { last_const_value = v; } Addr value = get_value_from_const_expr(data->Iex.Const.con); tmps[stmt->Ist.WrTmp.tmp].used = 1; tmps[stmt->Ist.WrTmp.tmp].value = value; if (const_prop) { record_tmp_value(lift_r, stmt->Ist.WrTmp.tmp, value, i); } } break; case Iex_ITE: { if (data->Iex.ITE.iftrue->tag == Iex_Const) { get_const_and_record(lift_r, data->Iex.ITE.iftrue, 0, Dt_Unknown, i, inst_addr, next_inst_addr, collect_data_refs); } if (data->Iex.ITE.iffalse->tag == Iex_Const) { get_const_and_record(lift_r, data->Iex.ITE.iffalse, 0, Dt_Unknown, i, inst_addr, next_inst_addr, collect_data_refs); } } break; case Iex_Get: { UInt key = mk_key_GetPut(data->Iex.Get.offset, data->Iex.Get.ty); HWord val; if (lookupHHW(env, &val, key) == True) { tmps[stmt->Ist.WrTmp.tmp].used = 1; tmps[stmt->Ist.WrTmp.tmp].value = val; if (const_prop) { record_tmp_value(lift_r, stmt->Ist.WrTmp.tmp, val, i); } } } default: // Unsupported for now break; } // end switch (data->tag) } break; case Ist_Put: // put // e.g. PUT(rdi) = 0x0000000000400714 assert(inst_addr != -1 && next_inst_addr != -1); { // Ignore itstate on ARM if (guest == VexArchARM && stmt->Ist.Put.offset == offsetof(VexGuestARMState, guest_ITSTATE)) { break; } IRExpr *data = stmt->Ist.Put.data; if (data->tag == Iex_Const) { Addr v = get_const_and_record(lift_r, data, 0, Dt_Unknown, i, inst_addr, next_inst_addr, collect_data_refs); if (v != -1 && v != next_inst_addr) { last_const_value = v; } UInt key = mk_key_GetPut(stmt->Ist.Put.offset, typeOfIRExpr(irsb->tyenv, data)); addToHHW(env, key, get_value_from_const_expr(data->Iex.Const.con)); } else if (data->tag == Iex_RdTmp) { if (tmps[data->Iex.RdTmp.tmp].used == 1) { // tmp is available IRType data_type = typeOfIRExpr(irsb->tyenv, data); UInt key = mk_key_GetPut(stmt->Ist.Put.offset, data_type); ULong value = tmps[data->Iex.RdTmp.tmp].value; addToHHW(env, key, value); if (value != last_const_value) { if (collect_data_refs) { record_data_reference(lift_r, value, 0, Dt_Integer, i, inst_addr); } } } else { // the tmp does not exist; we ignore updates to GP on MIPS32 // this is to handle cases where gp is loaded from a stack variable if (guest == VexArchMIPS32 && stmt->Ist.Put.offset == offsetof(VexGuestMIPS32State, guest_r28)) { break; } IRType data_type = typeOfIRExpr(irsb->tyenv, data); UInt key = mk_key_GetPut(stmt->Ist.Put.offset, data_type); removeFromHHW(env, key); } } } break; case Ist_Store: // Store assert(inst_addr != -1 && next_inst_addr != -1); { IRExpr *store_dst = stmt->Ist.Store.addr; IRExpr *store_data = stmt->Ist.Store.data; if (store_dst->tag == Iex_Const) { // Writing to a memory destination. We can get its size by analyzing the size of store_data IRType data_type = typeOfIRExpr(irsb->tyenv, stmt->Ist.Put.data); Int data_size = 0; if (data_type != Ity_INVALID) { data_size = sizeofIRType(data_type); } get_const_and_record(lift_r, store_dst, data_size, data_size == 0? Dt_Unknown : Dt_StoreInteger, i, inst_addr, next_inst_addr, collect_data_refs); } if (store_data->tag == Iex_Const) { get_const_and_record(lift_r, store_data, 0, Dt_Unknown, i, inst_addr, next_inst_addr, collect_data_refs); } } break; case Ist_Dirty: // Dirty assert(inst_addr != -1 && next_inst_addr != -1); if (stmt->Ist.Dirty.details->mAddr != NULL && stmt->Ist.Dirty.details->mAddr->tag == Iex_Const) { IRExpr *m_addr = stmt->Ist.Dirty.details->mAddr; get_const_and_record(lift_r, m_addr, stmt->Ist.Dirty.details->mSize, Dt_FP, i, inst_addr, next_inst_addr, collect_data_refs); } break; case Ist_LoadG: // LoadG // e.g., t7 = if (t70) ILGop_Ident32(LDle(0x00032f50)) else t69 if (stmt->Ist.LoadG.details->addr != NULL && stmt->Ist.LoadG.details->addr->tag == Iex_Const) { IRExpr *addr = stmt->Ist.LoadG.details->addr; IRType data_type = typeOfIRExpr(irsb->tyenv, addr); Int data_size = 0; if (data_type != Ity_INVALID) { data_size = sizeofIRType(data_type); } get_const_and_record(lift_r, addr, data_size, Dt_Unknown, i, inst_addr, next_inst_addr, collect_data_refs); } break; default: break; } // end switch (stmt->tag) } freeHHW(env); if (tmps != tmp_backingstore) { free(tmps); } } /* Determine if the VEX block is an no-op */ void get_is_noop_block( IRSB *irsb, VEXLiftResult *lift_r ) { // the block is a noop block if it only has IMark statements **and** it jumps to its immediate successor. VEX will // generate such blocks when opt_level==1 and cross_insn_opt is True. // the block is a noop block if it only has IMark statements and IP-setting statements that set the IP to the next // location. VEX will generate such blocks when opt_level==1 and cross_insn_opt is False. Addr fallthrough_addr = 0xffffffffffffffff; Bool has_other_inst = False; for (int i = 0; i < irsb->stmts_used; ++i) { IRStmt *stmt = irsb->stmts[i]; if (stmt->tag == Ist_IMark) { // update fallthrough_addr; it will be correct upon the last instruction fallthrough_addr = stmt->Ist.IMark.addr + stmt->Ist.IMark.delta + stmt->Ist.IMark.len; } else if (stmt->tag == Ist_NoOp) { // NoOp is a no-op } else if (stmt->tag == Ist_Put) { if (stmt->Ist.Put.data->tag == Iex_Const) { if (irsb->offsIP != stmt->Ist.Put.offset) { // found a register write that is not the same as the pc offset; this is not a noop block lift_r->is_noop_block = False; return; } } else { // found a non-constant register write; this is not a noop block lift_r->is_noop_block = False; return; } } else { has_other_inst = True; break; } } if (has_other_inst) { lift_r->is_noop_block = False; return; } if (fallthrough_addr == 0xffffffffffffffff) { // for some reason we cannot find the fallthrough addr; just give up lift_r->is_noop_block = False; return; } if (irsb->jumpkind == Ijk_Boring && irsb->next->tag == Iex_Const) { if (irsb->next->Iex.Const.con->tag == Ico_U32 && fallthrough_addr < 0xffffffff && fallthrough_addr == irsb->next->Iex.Const.con->Ico.U32 || irsb->next->Iex.Const.con->tag == Ico_U64 && fallthrough_addr == irsb->next->Iex.Const.con->Ico.U64) { lift_r->is_noop_block = True; return; } } lift_r->is_noop_block = False; } ================================================ FILE: pyvex_c/e4c_lite.h ================================================ /* * exceptions4c lightweight version 1.0 * * Copyright (c) 2014 Guillermo Calvo * Licensed under the GNU Lesser General Public License */ #ifndef EXCEPTIONS4C_LITE #define EXCEPTIONS4C_LITE #include #include /* Maximum number of nested `try` blocks */ #ifndef E4C_MAX_FRAMES # define E4C_MAX_FRAMES 16 #endif /* Maximum length (in bytes) of an exception message */ #ifndef E4C_MESSAGE_SIZE # define E4C_MESSAGE_SIZE 128 #endif /* Exception handling keywords: try/catch/finally/throw */ #ifndef E4C_NOKEYWORDS # define try E4C_TRY # define catch(type) E4C_CATCH(type) # define finally E4C_FINALLY # define throw(type, message) E4C_THROW(type, message) #endif /* Represents an exception type */ struct e4c_exception_type{ const char * name; const char * default_message; const struct e4c_exception_type * supertype; }; /* Declarations and definitions of exception types */ #define E4C_DECLARE_EXCEPTION(name) extern const struct e4c_exception_type name #define E4C_DEFINE_EXCEPTION(name, default_message, supertype) const struct e4c_exception_type name = { #name, default_message, &supertype } /* Predefined exception types */ E4C_DECLARE_EXCEPTION(RuntimeException); E4C_DECLARE_EXCEPTION(NullPointerException); /* Represents an instance of an exception type */ struct e4c_exception{ char message[E4C_MESSAGE_SIZE]; const char * file; int line; const struct e4c_exception_type * type; }; /* Retrieve current thrown exception */ #define E4C_EXCEPTION e4c.err /* Returns whether current exception is of a given type */ #define E4C_IS_INSTANCE_OF(t) ( e4c.err.type == &t || e4c_extends(e4c.err.type, &t) ) /* Implementation details */ #define E4C_TRY if(e4c_try(E4C_INFO) && setjmp(e4c.jump[e4c.frames - 1]) >= 0) while(e4c_hook(0)) if(e4c.frame[e4c.frames].stage == e4c_trying) #define E4C_CATCH(type) else if(e4c.frame[e4c.frames].stage == e4c_catching && E4C_IS_INSTANCE_OF(type) && e4c_hook(1)) #define E4C_FINALLY else if(e4c.frame[e4c.frames].stage == e4c_finalizing) #define E4C_THROW(type, message) e4c_throw(&type, E4C_INFO, message) #ifndef NDEBUG # define E4C_INFO __FILE__, __LINE__ #else # define E4C_INFO NULL, 0 #endif enum e4c_stage{e4c_beginning, e4c_trying, e4c_catching, e4c_finalizing, e4c_done}; extern struct e4c_context{jmp_buf jump[E4C_MAX_FRAMES]; struct e4c_exception err; struct{unsigned char stage; unsigned char uncaught;} frame[E4C_MAX_FRAMES + 1]; int frames;} e4c; extern int e4c_try(const char * file, int line); extern int e4c_hook(int is_catch); extern int e4c_extends(const struct e4c_exception_type * child, const struct e4c_exception_type * parent); extern void e4c_throw(const struct e4c_exception_type * exception_type, const char * file, int line, const char * message); # endif ================================================ FILE: pyvex_c/logging.c ================================================ // This code is GPLed by Yan Shoshitaishvili #include #include #include #include "logging.h" int log_level = 50; void pyvex_debug(const char *fmt, ...) { if (log_level > 10) return; fprintf(stderr, "[[pyvex_c]]\tDEBUG:\t"); va_list args; va_start(args,fmt); vfprintf(stderr, fmt, args); va_end(args); fflush(stdout); } void pyvex_info(const char *fmt, ...) { if (log_level > 20) return; fprintf(stderr, "[[pyvex_c]]\tINFO:\t"); va_list args; va_start(args, fmt); vfprintf(stderr, fmt, args); va_end(args); fflush(stdout); } void pyvex_error(const char *fmt, ...) { if (log_level > 40) return; fprintf(stderr, "[[pyvex_c]]\tERROR:\t"); va_list args; va_start(args,fmt); vfprintf(stderr, fmt,args); va_end(args); fflush(stderr); } ================================================ FILE: pyvex_c/logging.h ================================================ // This code is GPLed by Yan Shoshitaishvili #ifndef __COMMON_H #define __COMMON_H extern int log_level; void pyvex_debug(const char *, ...); void pyvex_info(const char *, ...); void pyvex_error(const char *, ...); #endif ================================================ FILE: pyvex_c/postprocess.c ================================================ #include #include #include #include "pyvex_internal.h" // // Jumpkind fixes for ARM // // If PC is moved to LR, then this should be an Ijk_Call // // Example: // MOV LR, PC // MOV PC, R8 // // Note that the value of PC is directly used in IRStatements, i.e // instead of having: // t0 = GET:I32(pc) // PUT(lr) = t0 // we have: // PUT(lr) = 0x10400 // The only case (that I've seen so far) where a temporary variable // is assigned to LR is: // t2 = ITE(cond, t0, t1) // PUT(lr) = t2 // void arm_post_processor_determine_calls( Addr irsb_addr, // Address of this IRSB Int irsb_size, // Size of this IRSB Int irsb_insts, // Number of instructions IRSB *irsb) { // Offset to the link register #define ARM_OFFB_LR offsetof(VexGuestARMState,guest_R14) // The maximum number of tmps #define MAX_TMP 1000 // The maximum offset of registers #define MAX_REG_OFFSET 1000 // Dummy value #define DUMMY 0xffeffeff if (irsb->jumpkind != Ijk_Boring) { return; } // Emulated CPU context Addr tmps[MAX_TMP + 1]; Addr regs[MAX_REG_OFFSET + 1]; // Initialize context Int i; for (i = 0; i <= MAX_TMP; ++i) { tmps[i] = DUMMY; } for (i = 0; i <= MAX_REG_OFFSET; ++i) { regs[i] = DUMMY; } Int lr_store_pc = 0; Int inst_ctr = 0; Int has_exit = 0; IRStmt *other_exit = NULL; Addr next_irsb_addr = (irsb_addr & (~1)) + irsb_size; // Clear the least significant bit Int is_thumb_mode = irsb_addr & 1; // if we pop {..,lr,...}; b xxx, I bet this isn't a boring jump! for (i = 0; i < irsb->stmts_used; ++i) { IRStmt *stmt = irsb->stmts[i]; if (stmt->tag == Ist_Exit){ // HACK: FIXME: BLCC and friends set the default exit to Ijk_Boring // Yet, the call is there, and it's just fine. // We assume if the block has an exit AND lr stores PC, we're probably // doing one of those fancy BL-ish things. // Should work for BCC and friends though has_exit = 1; other_exit = stmt; } } for (i = 0; i < irsb->stmts_used; ++i) { IRStmt *stmt = irsb->stmts[i]; if (stmt->tag == Ist_Put) { // LR is modified just before the last instruction of the block... if (stmt->Ist.Put.offset == ARM_OFFB_LR /*&& inst_ctr == irsb_insts - 1*/) { // ... by a constant, so test whether it is the address of the next IRSB if (stmt->Ist.Put.data->tag == Iex_Const) { IRConst *con = stmt->Ist.Put.data->Iex.Const.con; if (get_value_from_const_expr(con) == next_irsb_addr) { lr_store_pc = 1; } else { lr_store_pc = 0; } } else if (stmt->Ist.Put.data->tag == Iex_RdTmp) { Int tmp = stmt->Ist.Put.data->Iex.RdTmp.tmp; if (tmp <= MAX_TMP && next_irsb_addr == tmps[tmp]) { lr_store_pc = 1; } else { lr_store_pc = 0; } } break; } else { Int reg_offset = stmt->Ist.Put.offset; if (reg_offset <= MAX_REG_OFFSET) { IRExpr *data = stmt->Ist.Put.data; if (data->tag == Iex_Const) { regs[reg_offset] = get_value_from_const_expr(stmt->Ist.Put.data->Iex.Const.con); } else if (data->tag == Iex_RdTmp) { Int tmp = data->Iex.RdTmp.tmp; if (tmp <= MAX_TMP && tmps[tmp] != DUMMY) { regs[reg_offset] = tmps[tmp]; } } else if (data->tag == Iex_Get) { Int src_reg = data->Iex.Get.offset; if (src_reg <= MAX_REG_OFFSET && regs[src_reg] != DUMMY) { regs[reg_offset] = regs[src_reg]; } } } } } else if (stmt->tag == Ist_WrTmp && stmt->Ist.WrTmp.tmp <= MAX_TMP) { // The PC value may propagate through the block, and since // LR is modified at the end of the block, the PC value have // to be incremented in order to match the address of the // next IRSB. So the only propagation ways that can lead to // a function call are: // // - Iop_Add* operations (even "sub r0, #-4" is compiled // as "add r0, #4") // - Iop_And*, Iop_Or*, Iop_Xor*, Iop_Sh*, Iop_Not* (there // may be some tricky and twisted ways to increment PC) // Int tmp_dst = stmt->Ist.WrTmp.tmp; if (stmt->Ist.WrTmp.data->tag == Iex_Binop) { IRExpr* data = stmt->Ist.WrTmp.data; Addr op0 = DUMMY, op1 = DUMMY; // Extract op0 if (data->Iex.Binop.arg1->tag == Iex_Const) { op0 = get_value_from_const_expr(data->Iex.Binop.arg1->Iex.Const.con); } else if (data->Iex.Binop.arg1->tag == Iex_RdTmp) { Int tmp = data->Iex.Binop.arg1->Iex.RdTmp.tmp; if (tmp <= MAX_TMP && tmps[tmp] != DUMMY) { op0 = tmps[tmp]; } } // Extract op1 if (data->Iex.Binop.arg2->tag == Iex_Const) { op1 = get_value_from_const_expr(data->Iex.Binop.arg2->Iex.Const.con); } else if (data->Iex.Binop.arg2->tag == Iex_RdTmp) { Int tmp = data->Iex.Binop.arg2->Iex.RdTmp.tmp; if (tmp <= MAX_TMP && tmps[tmp] != DUMMY) { op1 = tmps[tmp]; } } if (op0 != DUMMY && op1 != DUMMY) { // Both operands are loaded. Perfom calculation. switch (data->Iex.Binop.op) { case Iop_Add8: case Iop_Add16: case Iop_Add32: case Iop_Add64: tmps[tmp_dst] = op0 + op1; break; case Iop_Sub8: case Iop_Sub16: case Iop_Sub32: case Iop_Sub64: tmps[tmp_dst] = op0 - op1; break; case Iop_And8: case Iop_And16: case Iop_And32: case Iop_And64: tmps[tmp_dst] = op0 & op1; break; case Iop_Or8: case Iop_Or16: case Iop_Or32: case Iop_Or64: tmps[tmp_dst] = op0 | op1; break; case Iop_Xor8: case Iop_Xor16: case Iop_Xor32: case Iop_Xor64: tmps[tmp_dst] = op0 ^ op1; break; case Iop_Shl8: case Iop_Shl16: case Iop_Shl32: case Iop_Shl64: tmps[tmp_dst] = op0 << op1; break; case Iop_Shr8: case Iop_Shr16: case Iop_Shr32: case Iop_Shr64: case Iop_Sar8: case Iop_Sar16: case Iop_Sar32: case Iop_Sar64: tmps[tmp_dst] = op0 >> op1; break; default: // Unsupported operation break; } } } else if (stmt->Ist.WrTmp.data->tag == Iex_Get) { Int reg_offset = stmt->Ist.WrTmp.data->Iex.Get.offset; if (reg_offset <= MAX_REG_OFFSET && regs[reg_offset] != DUMMY) { tmps[tmp_dst] = regs[reg_offset]; } } else if (stmt->Ist.WrTmp.data->tag == Iex_ITE) { // Parse iftrue and iffalse IRExpr *data = stmt->Ist.WrTmp.data; if (data->Iex.ITE.iffalse->tag == Iex_Const) { tmps[tmp_dst] = get_value_from_const_expr(data->Iex.ITE.iffalse->Iex.Const.con); } else if (data->Iex.ITE.iffalse->tag == Iex_RdTmp) { Int tmp = data->Iex.ITE.iffalse->Iex.RdTmp.tmp; if (tmp <= MAX_TMP && tmps[tmp] != DUMMY) { tmps[tmp_dst] = tmps[tmp]; } } if (data->Iex.ITE.iftrue->tag == Iex_Const) { tmps[tmp_dst] = get_value_from_const_expr(data->Iex.ITE.iftrue->Iex.Const.con); } else if (data->Iex.ITE.iftrue->tag == Iex_RdTmp) { Int tmp = data->Iex.ITE.iftrue->Iex.RdTmp.tmp; if (tmp <= MAX_TMP && tmps[tmp] != DUMMY) { tmps[tmp_dst] = tmps[tmp]; } } } else if (stmt->Ist.WrTmp.data->tag == Iex_RdTmp) { IRExpr *data = stmt->Ist.WrTmp.data; Int tmp = data->Iex.RdTmp.tmp; if (tmp <= MAX_TMP && tmps[tmp] != DUMMY) { tmps[tmp_dst] = tmps[tmp]; } } else if (stmt->Ist.WrTmp.data->tag == Iex_Const) { IRConst *con = stmt->Ist.WrTmp.data->Iex.Const.con; tmps[tmp_dst] = get_value_from_const_expr(con); } } else if (stmt->tag == Ist_IMark) { inst_ctr++; } } if (lr_store_pc) { if (has_exit && // It has a non-default exit other_exit->Ist.Exit.jk == Ijk_Boring && // The non-default exit is a Boring jump get_value_from_const_expr(other_exit->Ist.Exit.dst) != next_irsb_addr + is_thumb_mode // The non-defualt exit is not skipping // the last instruction ) { // Fix the not-default exit other_exit->Ist.Exit.jk = Ijk_Call; } else if (!has_exit || other_exit->Ist.Exit.jk != Ijk_Call) { //Fix the default exit irsb->jumpkind = Ijk_Call; } } // Undefine all defined values #undef ARM_OFFB_LR #undef MAX_TMP #undef MAX_REG_OFFSET #undef DUMMY } // // Unconditional branch fixes for MIPS32 // // Handle unconditional branches // `beq $zero, $zero, xxxx` // It is translated to // // 15 | ------ IMark(0x401684, 4, 0) ------ // 16 | t0 = CmpEQ32(0x00000000, 0x00000000) // 17 | PUT(128) = 0x00401688 // 18 | ------ IMark(0x401688, 4, 0) ------ // 19 | if (t0) goto {Ijk_Boring} 0x401684 // 20 | PUT(128) = 0x0040168c // 21 | t4 = GET:I32(128) // NEXT: PUT(128) = t4; Ijk_Boring // void mips32_post_processor_fix_unconditional_exit( IRSB *irsb) { #define INVALID 0xffff Int i; Int tmp_exit = INVALID, exit_stmt_idx = INVALID; IRConst *dst = NULL; for (i = irsb->stmts_used - 1; i >= 0; --i) { IRStmt *stmt = irsb->stmts[i]; if (tmp_exit == INVALID) { // Looking for the Exit statement if (stmt->tag == Ist_Exit && stmt->Ist.Exit.jk == Ijk_Boring && stmt->Ist.Exit.guard->tag == Iex_RdTmp) { tmp_exit = stmt->Ist.Exit.guard->Iex.RdTmp.tmp; dst = stmt->Ist.Exit.dst; exit_stmt_idx = i; } } else if (stmt->tag == Ist_WrTmp && stmt->Ist.WrTmp.tmp == tmp_exit) { // Looking for the WrTmp statement IRExpr *data = stmt->Ist.WrTmp.data; if (data->tag == Iex_Binop && data->Iex.Binop.op == Iop_CmpEQ32 && data->Iex.Binop.arg1->tag == Iex_Const && data->Iex.Binop.arg2->tag == Iex_Const && get_value_from_const_expr(data->Iex.Binop.arg1->Iex.Const.con) == get_value_from_const_expr(data->Iex.Binop.arg2->Iex.Const.con)) { // We found it // Update the statements Int j; for (j = exit_stmt_idx; j < irsb->stmts_used - 1; ++j) { irsb->stmts[j] = irsb->stmts[j + 1]; } irsb->stmts_used -= 1; // Update the default of the IRSB irsb->next = IRExpr_Const(dst); } break; } } #undef INVALID } void irsb_insert(IRSB *irsb, IRStmt* stmt, Int i) { addStmtToIRSB(irsb, stmt); IRStmt *in_air = irsb->stmts[irsb->stmts_used - 1]; for (Int j = irsb->stmts_used - 1; j > i; j--) { irsb->stmts[j] = irsb->stmts[j-1]; } irsb->stmts[i] = in_air; } void zero_division_side_exits(IRSB *irsb) { Int i; Addr lastIp = -1; IRType addrTy = typeOfIRExpr(irsb->tyenv, irsb->next); IRConstTag addrConst = addrTy == Ity_I32 ? Ico_U32 : addrTy == Ity_I16 ? Ico_U16 : Ico_U64; IRType argty; IRTemp cmptmp; for (i = 0; i < irsb->stmts_used; i++) { IRStmt *stmt = irsb->stmts[i]; switch (stmt->tag) { case Ist_IMark: lastIp = stmt->Ist.IMark.addr; continue; case Ist_WrTmp: if (stmt->Ist.WrTmp.data->tag != Iex_Binop) { continue; } switch (stmt->Ist.WrTmp.data->Iex.Binop.op) { case Iop_DivU32: case Iop_DivS32: case Iop_DivU32E: case Iop_DivS32E: case Iop_DivModU64to32: case Iop_DivModS64to32: argty = Ity_I32; break; case Iop_DivU64: case Iop_DivS64: case Iop_DivU64E: case Iop_DivS64E: case Iop_DivModU128to64: case Iop_DivModS128to64: case Iop_DivModS64to64: argty = Ity_I64; break; // TODO YIKES //case Iop_DivF32: // argty = Ity_F32; //case Iop_DivF64: //case Iop_DivF64r32: // argty = Ity_F64; //case Iop_DivF128: // argty = Ity_F128; //case Iop_DivD64: // argty = Ity_D64; //case Iop_DivD128: // argty = Ity_D128; //case Iop_Div32Fx4: //case Iop_Div32F0x4: //case Iop_Div64Fx2: //case Iop_Div64F0x2: //case Iop_Div64Fx4: //case Iop_Div32Fx8: default: continue; } cmptmp = newIRTemp(irsb->tyenv, Ity_I1); irsb_insert(irsb, IRStmt_WrTmp(cmptmp, IRExpr_Binop(argty == Ity_I32 ? Iop_CmpEQ32 : Iop_CmpEQ64, stmt->Ist.WrTmp.data->Iex.Binop.arg2, IRExpr_Const(argty == Ity_I32 ? IRConst_U32(0) : IRConst_U64(0)))), i); i++; IRConst *failAddr = IRConst_U64(lastIp); // ohhhhh boy this is a hack failAddr->tag = addrConst; irsb_insert(irsb, IRStmt_Exit(IRExpr_RdTmp(cmptmp), Ijk_SigFPE_IntDiv, failAddr, irsb->offsIP), i); i++; break; default: continue; } } } ================================================ FILE: pyvex_c/pyvex.c ================================================ /* This is shamelessly ripped from Vine, because those guys have very very strange language preferences. Vine is Copyright (C) 2006-2009, BitBlaze Team. You can redistribute and modify it under the terms of the GNU GPL, version 2 or later, but it is made available WITHOUT ANY WARRANTY. See the top-level README file for more details. For more information about Vine and other BitBlaze software, see our web site at: http://bitblaze.cs.berkeley.edu/ */ //====================================================================== // // This file provides the interface to VEX that allows block by block // translation from binary to VEX IR. // //====================================================================== #include #include #include #include #include #include #include "pyvex.h" #include "pyvex_internal.h" #include "logging.h" //====================================================================== // // Globals // //====================================================================== // Some info required for translation VexArchInfo vai_host; VexGuestExtents vge; VexTranslateArgs vta; VexTranslateResult vtr; VexAbiInfo vbi; VexControl vc; // Log message buffer, from vex itself char *msg_buffer = NULL; size_t msg_capacity = 0, msg_current_size = 0; jmp_buf jumpout; //====================================================================== // // Functions needed for the VEX translation // //====================================================================== #ifdef _MSC_VER __declspec(noreturn) #else __attribute__((noreturn)) #endif static void failure_exit(void) { longjmp(jumpout, 1); } static void log_bytes(const HChar* bytes, SizeT nbytes) { if (msg_buffer == NULL) { msg_buffer = malloc(nbytes); msg_capacity = nbytes; } if (nbytes + msg_current_size > msg_capacity) { do { msg_capacity *= 2; } while (nbytes + msg_current_size > msg_capacity); msg_buffer = realloc(msg_buffer, msg_capacity); } memcpy(&msg_buffer[msg_current_size], bytes, nbytes); msg_current_size += nbytes; } void clear_log() { if (msg_buffer != NULL) { free(msg_buffer); msg_buffer = NULL; msg_capacity = 0; msg_current_size = 0; } } static Bool chase_into_ok(void *closureV, Addr addr64) { return False; } static UInt needs_self_check(void *callback_opaque, VexRegisterUpdates* pxControl, const VexGuestExtents *guest_extents) { return 0; } static void *dispatch(void) { return NULL; } //---------------------------------------------------------------------- // Initializes VEX // It must be called before using VEX for translation to Valgrind IR //---------------------------------------------------------------------- int vex_init() { static int initialized = 0; pyvex_debug("Initializing VEX.\n"); if (initialized) { pyvex_debug("VEX already initialized.\n"); return 1; } initialized = 1; // Initialize VEX LibVEX_default_VexControl(&vc); LibVEX_default_VexArchInfo(&vai_host); LibVEX_default_VexAbiInfo(&vbi); vc.iropt_verbosity = 0; vc.iropt_level = 0; // No optimization by default //vc.iropt_precise_memory_exns = False; vc.iropt_unroll_thresh = 0; vc.guest_max_insns = 1; // By default, we vex 1 instruction at a time vc.guest_chase_thresh = 0; vc.arm64_allow_reordered_writeback = 0; vc.x86_optimize_callpop_idiom = 0; vc.strict_block_end = 0; vc.special_instruction_support = 0; pyvex_debug("Calling LibVEX_Init()....\n"); if (setjmp(jumpout) == 0) { // the 0 is the debug level LibVEX_Init(&failure_exit, &log_bytes, 0, &vc); pyvex_debug("LibVEX_Init() done....\n"); } else { pyvex_debug("LibVEX_Init() failed catastrophically...\n"); return 0; } #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ vai_host.endness = VexEndnessLE; #else vai_host.endness = VexEndnessBE; #endif // various settings to make stuff work // ... former is set to 'unspecified', but gets set in vex_inst for archs which care // ... the latter two are for dealing with gs and fs in VEX vbi.guest_stack_redzone_size = 0; vbi.guest_amd64_assume_fs_is_const = True; vbi.guest_amd64_assume_gs_is_const = True; //------------------------------------ // options for instruction translation // // Architecture info // vta.arch_guest = VexArch_INVALID; // to be assigned later #if __amd64__ || _WIN64 vta.arch_host = VexArchAMD64; #elif __i386__ || _WIN32 vta.arch_host = VexArchX86; #elif __arm__ vta.arch_host = VexArchARM; vai_host.hwcaps = 7; #elif __aarch64__ vta.arch_host = VexArchARM64; #elif __s390x__ vta.arch_host = VexArchS390X; vai_host.hwcaps = VEX_HWCAPS_S390X_LDISP; #elif defined(__powerpc__) && defined(__NetBSD__) # if defined(__LONG_WIDTH__) && (__LONG_WIDTH__ == 32) vta.arch_host = VexArchPPC32; # endif #elif defined(__powerpc__) vta.arch_host = VexArchPPC64; #elif defined(__riscv) # if defined(__riscv_xlen) && (__riscv_xlen == 64) vta.arch_host = VexArchRISCV64; # endif #else #error "Unsupported host arch" #endif vta.archinfo_host = vai_host; // // The actual stuff to vex // vta.guest_bytes = NULL; // Set in vex_insts vta.guest_bytes_addr = 0; // Set in vex_insts // // callbacks // vta.callback_opaque = NULL; // Used by chase_into_ok, but never actually called vta.chase_into_ok = chase_into_ok; // Always returns false vta.preamble_function = NULL; vta.instrument1 = NULL; vta.instrument2 = NULL; vta.finaltidy = NULL; vta.needs_self_check = needs_self_check; vta.disp_cp_chain_me_to_slowEP = (void *)dispatch; // Not used vta.disp_cp_chain_me_to_fastEP = (void *)dispatch; // Not used vta.disp_cp_xindir = (void *)dispatch; // Not used vta.disp_cp_xassisted = (void *)dispatch; // Not used vta.guest_extents = &vge; vta.host_bytes = NULL; // Buffer for storing the output binary vta.host_bytes_size = 0; vta.host_bytes_used = NULL; // doesn't exist? vta.do_self_check = False; vta.traceflags = 0; // Debug verbosity //vta.traceflags = -1; // Debug verbosity return 1; } // Prepare the VexArchInfo struct static void vex_prepare_vai(VexArch arch, VexArchInfo *vai) { switch (arch) { case VexArchX86: vai->hwcaps = VEX_HWCAPS_X86_MMXEXT | VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2 | VEX_HWCAPS_X86_SSE3 | VEX_HWCAPS_X86_LZCNT; break; case VexArchAMD64: vai->hwcaps = VEX_HWCAPS_AMD64_SSE3 | VEX_HWCAPS_AMD64_CX16 | VEX_HWCAPS_AMD64_LZCNT | VEX_HWCAPS_AMD64_AVX | VEX_HWCAPS_AMD64_RDTSCP | VEX_HWCAPS_AMD64_BMI | VEX_HWCAPS_AMD64_AVX2; break; case VexArchARM: vai->hwcaps = VEX_ARM_ARCHLEVEL(8) | VEX_HWCAPS_ARM_NEON | VEX_HWCAPS_ARM_VFP3; break; case VexArchARM64: vai->hwcaps = 0; vai->arm64_dMinLine_lg2_szB = 6; vai->arm64_iMinLine_lg2_szB = 6; break; case VexArchPPC32: vai->hwcaps = VEX_HWCAPS_PPC32_F | VEX_HWCAPS_PPC32_V | VEX_HWCAPS_PPC32_FX | VEX_HWCAPS_PPC32_GX | VEX_HWCAPS_PPC32_VX | VEX_HWCAPS_PPC32_DFP | VEX_HWCAPS_PPC32_ISA2_07; vai->ppc_icache_line_szB = 32; // unsure if correct break; case VexArchPPC64: vai->hwcaps = VEX_HWCAPS_PPC64_V | VEX_HWCAPS_PPC64_FX | VEX_HWCAPS_PPC64_GX | VEX_HWCAPS_PPC64_VX | VEX_HWCAPS_PPC64_DFP | VEX_HWCAPS_PPC64_ISA2_07; vai->ppc_icache_line_szB = 64; // unsure if correct break; case VexArchS390X: vai->hwcaps = 0; break; case VexArchMIPS32: case VexArchMIPS64: vai->hwcaps = VEX_PRID_COMP_CAVIUM; break; case VexArchRISCV64: vai->hwcaps = 0; break; default: pyvex_error("Invalid arch in vex_prepare_vai.\n"); break; } } // Prepare the VexAbiInfo static void vex_prepare_vbi(VexArch arch, VexAbiInfo *vbi) { // only setting the guest_stack_redzone_size for now // this attribute is only specified by the X86, AMD64 and PPC64 ABIs switch (arch) { case VexArchX86: vbi->guest_stack_redzone_size = 0; break; case VexArchAMD64: vbi->guest_stack_redzone_size = 128; break; case VexArchPPC64: vbi->guest_stack_redzone_size = 288; break; default: break; } } VEXLiftResult _lift_r; //---------------------------------------------------------------------- // Main entry point. Do a lift. //---------------------------------------------------------------------- VEXLiftResult *vex_lift( VexArch guest, VexArchInfo archinfo, unsigned char *insn_start, unsigned long long insn_addr, unsigned int max_insns, unsigned int max_bytes, int opt_level, int traceflags, int allow_arch_optimizations, int strict_block_end, int collect_data_refs, int load_from_ro_regions, int const_prop, VexRegisterUpdates px_control, unsigned int lookback) { VexRegisterUpdates pxControl = px_control; vex_prepare_vai(guest, &archinfo); vex_prepare_vbi(guest, &vbi); pyvex_debug("Guest arch: %d\n", guest); pyvex_debug("Guest arch hwcaps: %08x\n", archinfo.hwcaps); vta.archinfo_guest = archinfo; vta.arch_guest = guest; vta.abiinfo_both = vbi; // Set the vbi value vta.guest_bytes = (UChar *)(insn_start); // Ptr to actual bytes of start of instruction vta.guest_bytes_addr = (Addr64)(insn_addr); vta.traceflags = traceflags; vc.guest_max_bytes = max_bytes; vc.guest_max_insns = max_insns; vc.iropt_level = opt_level; vc.lookback_amount = lookback; // Gate all of these on one flag, they depend on the arch vc.arm_allow_optimizing_lookback = allow_arch_optimizations; vc.arm64_allow_reordered_writeback = allow_arch_optimizations; vc.x86_optimize_callpop_idiom = allow_arch_optimizations; vc.strict_block_end = strict_block_end; clear_log(); // Do the actual translation if (setjmp(jumpout) == 0) { LibVEX_Update_Control(&vc); _lift_r.is_noop_block = False; _lift_r.data_ref_count = 0; _lift_r.const_val_count = 0; _lift_r.irsb = LibVEX_Lift(&vta, &vtr, &pxControl); if (!_lift_r.irsb) { // Lifting failed return NULL; } remove_noops(_lift_r.irsb); if (guest == VexArchMIPS32) { // This post processor may potentially remove statements. // Call it before we get exit statements and such. mips32_post_processor_fix_unconditional_exit(_lift_r.irsb); } get_exits_and_inst_addrs(_lift_r.irsb, &_lift_r); get_default_exit_target(_lift_r.irsb, &_lift_r); if (guest == VexArchARM && _lift_r.insts > 0) { arm_post_processor_determine_calls(_lift_r.inst_addrs[0], _lift_r.size, _lift_r.insts, _lift_r.irsb); } zero_division_side_exits(_lift_r.irsb); get_is_noop_block(_lift_r.irsb, &_lift_r); if (collect_data_refs || const_prop) { execute_irsb(_lift_r.irsb, &_lift_r, guest, (Bool)load_from_ro_regions, (Bool)collect_data_refs, (Bool)const_prop); } return &_lift_r; } else { return NULL; } } ================================================ FILE: pyvex_c/pyvex.def ================================================ LIBRARY pyvex.dll EXPORTS IRConst_F32 IRConst_F32i IRConst_F64 IRConst_F64i IRConst_U1 IRConst_U16 IRConst_U32 IRConst_U64 IRConst_U8 IRConst_V128 IRConst_V256 IRExpr_Binder IRExpr_Binop IRExpr_CCall IRExpr_Const IRExpr_GSPTR IRExpr_Get IRExpr_GetI IRExpr_ITE IRExpr_Load IRExpr_Qop IRExpr_RdTmp IRExpr_Triop IRExpr_Unop IRExpr_VECRET emptyIRSB emptyIRTypeEnv log_level mkIRCallee mkIRExprVec_0 mkIRExprVec_1 mkIRExprVec_2 mkIRExprVec_3 mkIRExprVec_4 mkIRExprVec_5 mkIRExprVec_6 mkIRExprVec_7 mkIRExprVec_8 mkIRRegArray msg_buffer msg_current_size newIRTemp typeOfIRExpr typeOfIRLoadGOp typeOfPrimop clear_log vex_lift vex_init register_readonly_region deregister_all_readonly_regions register_initial_register_value reset_initial_register_values sizeofIRType ================================================ FILE: pyvex_c/pyvex.h ================================================ // This code is GPLed by Yan Shoshitaishvili #ifndef __VEXIR_H #define __VEXIR_H #include // Some info required for translation extern int log_level; extern VexTranslateArgs vta; extern char *msg_buffer; extern size_t msg_current_size; void clear_log(void); // // Initializes VEX. This function must be called before vex_lift // can be used. // int vex_init(void); typedef struct _ExitInfo { Int stmt_idx; Addr ins_addr; IRStmt *stmt; } ExitInfo; typedef enum { Dt_Unknown = 0x9000, Dt_Integer, Dt_FP, Dt_StoreInteger } DataRefTypes; typedef struct _DataRef { Addr data_addr; Int size; DataRefTypes data_type; Int stmt_idx; Addr ins_addr; } DataRef; typedef struct _ConstVal { Int tmp; Int stmt_idx; ULong value; // 64-bit max } ConstVal; #define MAX_EXITS 400 #define MAX_DATA_REFS 2000 #define MAX_CONST_VALS 1000 typedef struct _VEXLiftResult { IRSB* irsb; Int size; Bool is_noop_block; // Conditional exits Int exit_count; ExitInfo exits[MAX_EXITS]; // The default exit Int is_default_exit_constant; Addr default_exit; // Instruction addresses Int insts; Addr inst_addrs[200]; // Data references Int data_ref_count; DataRef data_refs[MAX_DATA_REFS]; // Constant propagation Int const_val_count; ConstVal const_vals[MAX_CONST_VALS]; } VEXLiftResult; VEXLiftResult *vex_lift( VexArch guest, VexArchInfo archinfo, unsigned char *insn_start, unsigned long long insn_addr, unsigned int max_insns, unsigned int max_bytes, int opt_level, int traceflags, int allow_arch_optimizations, int strict_block_end, int collect_data_refs, int load_from_ro_regions, int const_prop, VexRegisterUpdates px_control, unsigned int lookback_amount); Bool register_readonly_region(ULong start, ULong size, unsigned char* content); void deregister_all_readonly_regions(); Bool register_initial_register_value(UInt offset, UInt size, ULong value); Bool reset_initial_register_values(); #endif ================================================ FILE: pyvex_c/pyvex_internal.h ================================================ #include "pyvex.h" void arm_post_processor_determine_calls(Addr irsb_addr, Int irsb_size, Int irsb_insts, IRSB *irsb); void mips32_post_processor_fix_unconditional_exit(IRSB *irsb); void remove_noops(IRSB* irsb); void zero_division_side_exits(IRSB* irsb); void get_exits_and_inst_addrs(IRSB *irsb, VEXLiftResult *lift_r); void get_default_exit_target(IRSB *irsb, VEXLiftResult *lift_r); void get_is_noop_block(IRSB *irsb, VEXLiftResult *lift_r); void execute_irsb(IRSB *irsb, VEXLiftResult *lift_r, VexArch guest, Bool load_from_ro_regions, Bool collect_data_refs, Bool const_prop); Addr get_value_from_const_expr(IRConst* con); ================================================ FILE: tests/test_arm_postprocess.py ================================================ import pyvex ########################## ### ARM Postprocessing ### ########################## def test_arm_postprocess_call(): for i in range(3): # Thumb # push {r7} # add r7, sp, #0 # mov.w r1, #6 # mov r0, pc # add.w lr, r0, r1 # b.w 10408 irsb = pyvex.IRSB( data=(b"\x80\xb4" b"\x00\xaf" b"\x4f\xf0\x06\x01" b"\x78\x46" b"\x00\xeb\x01\x0e" b"\xff\xf7\xec\xbf"), mem_addr=0x1041F, arch=pyvex.ARCH_ARM_LE, num_inst=6, bytes_offset=1, opt_level=i, ) assert irsb.jumpkind == "Ijk_Call" # mov lr, pc # b.w 10408 irsb = pyvex.IRSB( data=(b"\xfe\x46" b"\xe9\xe7"), mem_addr=0x10431, arch=pyvex.ARCH_ARM_LE, num_inst=2, bytes_offset=1, opt_level=i, ) assert irsb.jumpkind == "Ijk_Call" # add r2, pc, #0 # add.w lr, r2, #4 # ldr.w pc, [pc, #52] irsb = pyvex.IRSB( data=(b"\x00\xa2" b"\x02\xf1\x06\x0e" b"\xdf\xf8\x34\xf0"), mem_addr=0x10435, arch=pyvex.ARCH_ARM_LE, num_inst=3, bytes_offset=1, opt_level=i, ) assert irsb.jumpkind == "Ijk_Call" # ldr r0, [pc, #48] # mov r1, pc # add.w r2, r1, #4 # add.w r3, r2, #4 # add.w r4, r3, #4 # add.w lr, r4, #4 # mov pc, r0 irsb = pyvex.IRSB( data=( b"\x0c\x48" b"\x79\x46" b"\x01\xf1\x04\x02" b"\x02\xf1\x04\x03" b"\x03\xf1\x04\x04" b"\x04\xf1\x04\x0e" b"\x87\x46" ), mem_addr=0x1043F, arch=pyvex.ARCH_ARM_LE, num_inst=7, bytes_offset=1, opt_level=i, ) assert irsb.jumpkind == "Ijk_Call" # eor.w r0, r0, r0 # mov lr, pc # b.n 10460 irsb = pyvex.IRSB( data=(b"\x80\xea\x00\x00" b"\x86\x46" b"\x01\xe0"), mem_addr=0x10455, arch=pyvex.ARCH_ARM_LE, num_inst=3, bytes_offset=1, opt_level=i, ) assert irsb.jumpkind == "Ijk_Boring" # Thumb compiled with optimizations (gcc -O2) # mov.w r1, #6 # mov r0, pc # add.w lr, r0, r1 # b.w 104bc irsb = pyvex.IRSB( data=(b"\x4f\xf0\x06\x01" b"\x78\x46" b"\x00\xeb\x01\x0e" b"\x00\xf0\xc5\xb8"), mem_addr=0x10325, arch=pyvex.ARCH_ARM_LE, num_inst=4, bytes_offset=1, opt_level=i, ) assert irsb.jumpkind == "Ijk_Call" # ldr r0, [pc, #56] # mov r1, pc # add.w r2, r1, #4 # add.w r3, r2, #4 # add.w r4, r3, #4 # add.w lr, r4, #4 # mov pc, r0 irsb = pyvex.IRSB( data=( b"\x0e\x48" b"\x79\x46" b"\x01\xf1\x04\x02" b"\x02\xf1\x04\x03" b"\x03\xf1\x04\x04" b"\x04\xf1\x04\x0e" b"\x87\x46" ), mem_addr=0x10333, arch=pyvex.ARCH_ARM_LE, num_inst=7, bytes_offset=1, opt_level=i, ) assert irsb.jumpkind == "Ijk_Call" # add r2, pc, #0 # add.w lr, r2, #6 # ldr.w pc, [pc, #28] irsb = pyvex.IRSB( data=(b"\x00\xa2" b"\x02\xf1\x06\x0e" b"\xdf\xf8\x1c\xf0"), mem_addr=0x10349, arch=pyvex.ARCH_ARM_LE, num_inst=3, bytes_offset=1, opt_level=i, ) assert irsb.jumpkind == "Ijk_Call" # mov lr, pc # b.w 104bc irsb = pyvex.IRSB( data=(b"\xfe\x46" b"\xb2\xe0"), mem_addr=0x10353, arch=pyvex.ARCH_ARM_LE, num_inst=2, bytes_offset=1, opt_level=i, ) assert irsb.jumpkind == "Ijk_Call" # eor.w r0, r0, r0 # mov lr, pc # b.n 10362 irsb = pyvex.IRSB( data=(b"\x80\xea\x00\x00" b"\x86\x46" b"\x01\xe0"), mem_addr=0x10357, arch=pyvex.ARCH_ARM_LE, num_inst=3, bytes_offset=1, opt_level=i, ) assert irsb.jumpkind == "Ijk_Boring" # ARM compiled with optimizations (gcc -O2) # mov r1, #4 # mov r0, pc # add lr, r0, r1 # ldr pc, [pc, #56] irsb = pyvex.IRSB( data=(b"\x04\x10\xa0\xe3" b"\x0f\x00\xa0\xe1" b"\x01\xe0\x80\xe0" b"\x38\xf0\x9f\xe5"), mem_addr=0x10298, arch=pyvex.ARCH_ARM_LE, num_inst=4, opt_level=i, ) assert irsb.jumpkind == "Ijk_Call" # add r1, pc, #0 # add r2, r1, #4 # add r3, r2, #4 # add r4, r3, #4 # add lr, r4, #4 # b 10414 irsb = pyvex.IRSB( data=( b"\x00\x10\x8f\xe2" b"\x04\x20\x81\xe2" b"\x04\x30\x82\xe2" b"\x04\x40\x83\xe2" b"\x04\xe0\x84\xe2" b"\x54\x00\x00\xea" ), mem_addr=0x102A8, arch=pyvex.ARCH_ARM_LE, num_inst=6, opt_level=i, ) assert irsb.jumpkind == "Ijk_Call" # mov lr, pc # b 10414 irsb = pyvex.IRSB( data=(b"\x0f\xe0\xa0\xe1" b"\x52\x00\x00\xea"), mem_addr=0x102C0, arch=pyvex.ARCH_ARM_LE, num_inst=2, opt_level=i, ) assert irsb.jumpkind == "Ijk_Call" # eor r0, r0, r0 # mov lr, r0 # b 102d8 irsb = pyvex.IRSB( data=(b"\x00\x00\x20\xe0" b"\x00\xe0\xa0\xe1" b"\x00\x00\x00\xea"), mem_addr=0x102C8, arch=pyvex.ARCH_ARM_LE, num_inst=3, opt_level=i, ) assert irsb.jumpkind == "Ijk_Boring" # ARM # push {fp} # add fp, sp, #0 # mov r1, #4 # mov r0, pc # add lr, r0, r1 # ldr pc, [pc, #68] irsb = pyvex.IRSB( data=( b"\x04\xb0\x2d\xe5" b"\x00\xb0\x8d\xe2" b"\x04\x10\xa0\xe3" b"\x0f\x00\xa0\xe1" b"\x01\xe0\x80\xe0" b"\x44\xf0\x9f\xe5" ), mem_addr=0x103E8, arch=pyvex.ARCH_ARM_LE, num_inst=6, opt_level=i, ) assert irsb.jumpkind == "Ijk_Call" # add r1, pc, #0 # add r2, r1, #4 # add r3, r2, #4 # add r4, r3, #4 # add lr, r4, #4 # b 103c4 irsb = pyvex.IRSB( data=( b"\x00\x10\x8f\xe2" b"\x04\x20\x81\xe2" b"\x04\x30\x82\xe2" b"\x04\x40\x83\xe2" b"\x04\xe0\x84\xe2" b"\x54\xff\xff\xea" ), mem_addr=0x10400, arch=pyvex.ARCH_ARM_LE, num_inst=6, opt_level=i, ) assert irsb.jumpkind == "Ijk_Call" # mov lr, pc # b 103c4 irsb = pyvex.IRSB( data=(b"\x0f\xe0\xa0\xe1" b"\xe8\xff\xff\xea"), mem_addr=0x10418, arch=pyvex.ARCH_ARM_LE, num_inst=2, opt_level=i, ) assert irsb.jumpkind == "Ijk_Call" # eor r0, r0, r0 # mov lr, r0 # b 10430 irsb = pyvex.IRSB( data=(b"\x00\x00\x20\xe0" b"\x00\xe0\xa0\xe1" b"\x00\x00\x00\xea"), mem_addr=0x10420, arch=pyvex.ARCH_ARM_LE, num_inst=3, opt_level=i, ) assert irsb.jumpkind == "Ijk_Boring" # From a "real thing" compiled with armc # ARM: # irsb = pyvex.IRSB( data=( b"H\x10\x9b\xe5" b"\x0b\x00\xa0\xe1" b"\x04 \x91\xe5" b"\x04\xe0\x8f\xe2" b"\x01\x10\x82\xe0" b"\x01\xf0\xa0\xe1" ), mem_addr=0x264B4C, arch=pyvex.ARCH_ARM_LE, num_inst=6, opt_level=i, ) assert irsb.jumpkind == "Ijk_Call" # 400000 str lr, [sp,#-0x4]! # 400004 mov r1, #0xa # 400008 cmp r0, r1 # 40000c blne #FunctionB irsb = pyvex.IRSB( data=bytes.fromhex("04e02de50a10a0e3010050e10100001b"), mem_addr=0x400000, arch=pyvex.ARCH_ARM_LE, num_inst=4, opt_level=i, ) assert len(irsb.exit_statements) == 1 assert irsb.exit_statements[0][2].jumpkind == "Ijk_Call" assert irsb.jumpkind == "Ijk_Boring" def test_arm_postprocess_ret(): for i in range(3): # e91ba8f0 # ldmdb R11, {R4,R11,SP,PC} irsb = pyvex.IRSB( data=b"\xe9\x1b\xa8\xf0", mem_addr=0xED4028, arch=pyvex.ARCH_ARM_BE_LE, num_inst=1, opt_level=i, ) assert irsb.jumpkind == "Ijk_Ret" # e91badf0 # ldmdb R11, {R4-R8,R10,R11,SP,PC} irsb = pyvex.IRSB( data=b"\xe9\x1b\xa8\xf0", mem_addr=0x4D4028, arch=pyvex.ARCH_ARM_BE_LE, num_inst=1, opt_level=i, ) assert irsb.jumpkind == "Ijk_Ret" # 00a89de8 # ldmfd SP, {R11,SP,PC} # Fixed by Fish in the VEX fork, commit 43c78f608490f9a5c71c7fca87c04759c1b93741 irsb = pyvex.IRSB( data=b"\x00\xa8\x9d\xe8", mem_addr=0xC800B57C, arch=pyvex.ARCH_ARM_BE, num_inst=1, opt_level=1, ) assert irsb.jumpkind == "Ijk_Ret" if __name__ == "__main__": test_arm_postprocess_call() test_arm_postprocess_ret() ================================================ FILE: tests/test_gym.py ================================================ # pylint: disable=missing-class-docstring import unittest import pyvex class Tests(unittest.TestCase): def test_x86_aam(self): irsb = pyvex.lift(b"\xd4\x0b", 0, pyvex.ARCH_X86) self.assertEqual(irsb.jumpkind, "Ijk_Boring") self.assertEqual(irsb.size, 2) def test_x86_aad(self): irsb = pyvex.lift(b"\xd5\x0b", 0, pyvex.ARCH_X86) self.assertEqual(irsb.jumpkind, "Ijk_Boring") self.assertEqual(irsb.size, 2) def test_x86_xgetbv(self): irsb = pyvex.lift(b"\x0f\x01\xd0", 0, pyvex.ARCH_X86) self.assertEqual(irsb.jumpkind, "Ijk_Boring") self.assertEqual(irsb.size, 3) def test_x86_rdmsr(self): irsb = pyvex.lift(b"\x0f\x32", 0, pyvex.ARCH_X86) self.assertEqual(irsb.jumpkind, "Ijk_Boring") self.assertEqual(irsb.size, 2) if __name__ == "__main__": unittest.main() ================================================ FILE: tests/test_irsb_property_caching.py ================================================ # pylint: disable=missing-class-docstring,no-self-use import unittest import pyvex class TestCacheInvalidationOnExtend(unittest.TestCase): def test_cache_invalidation_on_extend(self): b = pyvex.block.IRSB(b"\x50", 0, pyvex.ARCH_X86) assert b.size == 1 assert b.instructions == 1 toappend = pyvex.block.IRSB(b"\x51", 0, pyvex.ARCH_X86) toappend.jumpkind = "Ijk_Invalid" toappend._direct_next = None # Invalidate the cache because I manually changed the jumpkind assert not toappend.direct_next b.extend(toappend) assert b.size == 2 assert b.instructions == 2 assert not b.direct_next if __name__ == "__main__": unittest.main() ================================================ FILE: tests/test_lift.py ================================================ import unittest import pyvex from pyvex import IRSB, ffi, lift from pyvex.errors import PyVEXError from pyvex.lifting.util import GymratLifter, Instruction, JumpKind # pylint: disable=R0201 # pylint: disable=C0115 class TestLift(unittest.TestCase): def test_partial_lift(self): """This tests that gymrat correctly handles the case where an instruction is longer than the remaining input. """ class NOP(Instruction): name = "nop" bin_format = "0000111100001111" def compute_result(self, *args): pass class NOPLifter(GymratLifter): instrs = [NOP] lifter = NOPLifter(pyvex.ARCH_AMD64, 0) # this should not throw an exception block = lifter.lift("\x0f\x0fa") assert block.size == 2 assert block.instructions == 1 assert block.jumpkind == JumpKind.NoDecode def test_skipstmts_toomanyexits(self): # https://github.com/angr/pyvex/issues/153 old_exit_limit = IRSB.MAX_EXITS IRSB.MAX_EXITS = 32 bytes_ = bytes.fromhex( "0DF1B00B2EAB94E8030008938BE803000DF1C0089AE8030083E" "80300019B0DF1F00A339AE669E26193E8030085E8030098E803" "0083E80300069B95E8030088E80300A26993E803004A9200236" "3622362A361E362A36238AC029A069484E8030012AC09982993" "28932B9303C885E8030092E8030084E803009AE8030082E8030" "02A460A9D26993E910B9941910D9942910C992A93409548AD43" "9194E803008AE8030027983F9927913F909BE803000DF5887B2" "69335938BE803000DF58C7B089903C98BE8030098E8030084E8" "030095E8030088E803004B993391329394E8030034933793369" "3069C059B4C93049B4E9350ABCDF834C1CDF83CE185E8030094" "E803004B9683E8030015A94498C4F7E2EA " ) arch = pyvex.ARCH_ARM_LE # Lifting the first four bytes will not cause any problem. Statements should be skipped as expected b = IRSB(bytes_[:34], 0xC6951, arch, opt_level=1, bytes_offset=5, skip_stmts=True) assert len(b.exit_statements) > 0 assert not b.has_statements # Lifting the entire block will cause the number of exit statements go # beyond the limit (currently 32). PyVEX will # automatically relift this block without skipping the statements b = IRSB(bytes_, 0xC6951, arch, opt_level=1, bytes_offset=5, skip_stmts=True) assert b.statements is not None assert len(b.exit_statements) > 32 # Restore the setting IRSB.MAX_EXITS = old_exit_limit def test_max_bytes(self): data = bytes.fromhex("909090909090c3") arch = pyvex.ARCH_X86 assert lift(data, 0x1000, arch, max_bytes=None).size == len(data) assert lift(data, 0x1000, arch, max_bytes=len(data) - 1).size == len(data) - 1 assert lift(data, 0x1000, arch, max_bytes=len(data) + 1).size == len(data) data2 = ffi.from_buffer(data) self.assertRaises(PyVEXError, lift, data2, 0x1000, arch) assert lift(data2, 0x1000, arch, max_bytes=len(data)).size == len(data) assert lift(data2, 0x1000, arch, max_bytes=len(data) - 1).size == len(data) - 1 if __name__ == "__main__": unittest.main() ================================================ FILE: tests/test_mips32_postprocess.py ================================================ import pyvex def test_mips32_unconditional_jumps(): # 0040000c: 10000002 ; beq $zero, $zero, LABEL_ELSE_IF # 00400010: 00000000 ; sll $zero, $zero, 0 # 00400014: 08100012 ; j LABEL_DONE # 00400018: ; LABEL_ELSE_IF: irsb = pyvex.IRSB( data=(b"\x10\x00\x00\x02" b"\x00\x00\x00\x00"), mem_addr=0x40000C, arch=pyvex.ARCH_MIPS32_BE, num_inst=2, opt_level=0, ) assert type(irsb.next) is pyvex.expr.Const assert irsb.next.con.value == 0x400018 if __name__ == "__main__": test_mips32_unconditional_jumps() ================================================ FILE: tests/test_pyvex.py ================================================ import copy import gc import logging import os import random import sys import unittest import pyvex from pyvex.lifting import LibVEXLifter if sys.platform == "linux": import resource # pylint: disable=R0201 class TestPyvex(unittest.TestCase): @unittest.skipUnless( sys.platform == "linux", "Cannot import the resource package on windows, values different on macos." ) def test_memory(self): arches = [pyvex.ARCH_X86, pyvex.ARCH_PPC32, pyvex.ARCH_AMD64, pyvex.ARCH_ARM_BE] # we're not including ArchMIPS32 cause it segfaults sometimes # disable logging, as that may fill up log buffers somewhere logging.disable(logging.ERROR) for _ in range(10000): try: s = os.urandom(32) a = random.choice(arches) p = pyvex.IRSB(data=s, mem_addr=0, arch=a) except pyvex.PyVEXError: pass kb_start = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss for _ in range(20000): try: s = os.urandom(32) a = random.choice(arches) p = pyvex.IRSB(data=s, mem_addr=0, arch=a) except pyvex.PyVEXError: pass del p gc.collect() logging.disable(logging.NOTSET) kb_end = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss pyvex.pvc.clear_log() pyvex.pvc.LibVEX_ShowAllocStats() print(LibVEXLifter.get_vex_log()) # allow a 5mb leeway assert kb_end - kb_start < 5000 ################ ### IRCallee ### ################ def test_ircallee(self): callee = pyvex.IRCallee(3, "test_name", 0xFFFFFF) assert callee.name == "test_name" assert callee.regparms == 3 assert callee.mcx_mask == 0xFFFFFF ############ ### IRSB ### ############ def test_irsb_empty(self): self.assertRaises(Exception, pyvex.IRSB) self.assertRaises(Exception, pyvex.IRSB, data="", arch=pyvex.ARCH_AMD64, mem_addr=0) def test_irsb_arm(self): irsb = pyvex.IRSB(data=b"\x33\xff\x2f\xe1", mem_addr=0, arch=pyvex.ARCH_ARM_BE) assert len([i for i in irsb.statements if isinstance(i, pyvex.IRStmt.IMark)]) == 1 def test_irsb_popret(self): irsb = pyvex.IRSB(data=b"\x5d\xc3", mem_addr=0, arch=pyvex.ARCH_AMD64) stmts = irsb.statements irsb.pp() assert len(stmts) > 0 assert irsb.jumpkind == "Ijk_Ret" assert irsb.offsIP == 184 cursize = len(irsb.tyenv.types) assert cursize > 0 print(irsb.statements[10].data) print(irsb.statements[10].data.tmp) print(irsb.tyenv.types[irsb.statements[10].data.tmp]) assert irsb.tyenv.lookup(irsb.statements[10].data.tmp) == "Ity_I64" def test_two_irsb(self): irsb1 = pyvex.IRSB(data=b"\x5d\xc3", mem_addr=0, arch=pyvex.ARCH_AMD64) irsb2 = pyvex.IRSB(data=b"\x5d\x5d\x5d\x5d", mem_addr=0, arch=pyvex.ARCH_AMD64) stmts1 = irsb1.statements stmts2 = irsb2.statements assert len(stmts1) != len(stmts2) def test_irsb_deepCopy(self): irsb = pyvex.IRSB(data=b"\x5d\xc3", mem_addr=0, arch=pyvex.ARCH_AMD64) stmts = irsb.statements irsb2 = copy.deepcopy(irsb) stmts2 = irsb2.statements assert len(stmts) == len(stmts2) def test_irsb_addStmt(self): irsb = pyvex.IRSB(data=b"\x5d\xc3", mem_addr=0, arch=pyvex.ARCH_AMD64) stmts = irsb.statements irsb2 = copy.deepcopy(irsb) irsb2.statements = [] assert len(irsb2.statements) == 0 for n, i in enumerate(stmts): assert len(irsb2.statements) == n irsb2.statements.append(copy.deepcopy(i)) irsb2.pp() def test_irsb_tyenv(self): irsb = pyvex.IRSB(data=b"\x5d\xc3", mem_addr=0, arch=pyvex.ARCH_AMD64) print(irsb.tyenv) print("Orig") print(irsb.tyenv) print("Empty") irsb2 = pyvex.IRSB.empty_block(arch=pyvex.ARCH_AMD64, addr=0) print(irsb2.tyenv) print("Unwrapped") irsb2.tyenv = copy.deepcopy(irsb.tyenv) print(irsb2.tyenv) ################## ### Statements ### ################## def test_irstmt_pp(self): irsb = pyvex.IRSB(data=b"\x5d\xc3", mem_addr=0, arch=pyvex.ARCH_AMD64) stmts = irsb.statements for i in stmts: print("STMT: ", end=" ") print(i) def test_irstmt_flat(self): print("TODO") def test_irstmt_imark(self): m = pyvex.IRStmt.IMark(1, 2, 3) assert m.tag == "Ist_IMark" assert m.addr == 1 assert m.len == 2 assert m.delta == 3 m.addr = 5 assert m.addr == 5 m.len = 5 assert m.len == 5 m.delta = 5 assert m.delta == 5 self.assertRaises(Exception, pyvex.IRStmt.IMark, ()) def test_irstmt_abihint(self): self.assertRaises(Exception, pyvex.IRStmt.AbiHint, ()) a = pyvex.IRExpr.RdTmp.get_instance(123) b = pyvex.IRExpr.RdTmp.get_instance(456) m = pyvex.IRStmt.AbiHint(a, 10, b) assert m.base.tmp == 123 assert m.len == 10 assert m.nia.tmp == 456 def test_irstmt_put(self): self.assertRaises(Exception, pyvex.IRStmt.Put, ()) a = pyvex.IRExpr.RdTmp.get_instance(123) m = pyvex.IRStmt.Put(a, 10) print("Put stmt:", end=" ") print(m) print("") assert m.data.tmp == 123 assert m.offset == 10 def test_irexpr_puti(self): r = pyvex.IRRegArray(10, "Ity_I64", 20) i = pyvex.IRExpr.RdTmp.get_instance(5) d = pyvex.IRExpr.RdTmp.get_instance(30) m = pyvex.IRStmt.PutI(r, i, d, 2) assert m.descr.base == 10 assert m.ix.tmp == 5 assert m.bias == 2 assert m.data.tmp == d.tmp self.assertRaises(Exception, pyvex.IRStmt.PutI, ()) def test_irstmt_wrtmp(self): self.assertRaises(Exception, pyvex.IRStmt.WrTmp, ()) a = pyvex.IRExpr.RdTmp.get_instance(123) m = pyvex.IRStmt.WrTmp(10, a) assert m.tag == "Ist_WrTmp" assert m.tmp == 10 assert m.data.tmp == 123 def test_irstmt_store(self): self.assertRaises(Exception, pyvex.IRStmt.Store, ()) a = pyvex.IRExpr.RdTmp.get_instance(123) d = pyvex.IRExpr.RdTmp.get_instance(456) m = pyvex.IRStmt.Store(a, d, "Iend_LE") assert m.tag == "Ist_Store" assert m.endness == "Iend_LE" assert m.addr.tmp == a.tmp assert m.data.tmp == d.tmp def test_irstmt_cas(self): self.assertRaises(Exception, pyvex.IRStmt.CAS, ()) a = pyvex.IRExpr.RdTmp.get_instance(10) eh = pyvex.IRExpr.RdTmp.get_instance(11) el = pyvex.IRExpr.RdTmp.get_instance(12) dh = pyvex.IRExpr.RdTmp.get_instance(21) dl = pyvex.IRExpr.RdTmp.get_instance(22) args = { "oldHi": 1, "oldLo": 2, "end": "Iend_LE", "addr": a, "expdHi": eh, "expdLo": el, "dataHi": dh, "dataLo": dl, } m = pyvex.IRStmt.CAS(**args) assert m.tag == "Ist_CAS" assert m.endness == "Iend_LE" assert m.oldHi == 1 assert m.oldLo == 2 assert m.addr.tmp == a.tmp assert m.expdHi.tmp == eh.tmp assert m.expdLo.tmp == el.tmp assert m.dataHi.tmp == dh.tmp assert m.dataLo.tmp == dl.tmp def test_irstmt_loadg(self): self.assertRaises(Exception, pyvex.IRStmt.LoadG, ()) a = pyvex.IRExpr.RdTmp.get_instance(10) alt = pyvex.IRExpr.RdTmp.get_instance(11) guard = pyvex.IRExpr.RdTmp.get_instance(12) args = { "dst": 1, "end": "Iend_LE", "addr": a, "alt": alt, "guard": guard, "cvt": "ILGop_Ident32", } m = pyvex.IRStmt.LoadG(**args) assert m.tag == "Ist_LoadG" assert m.end == "Iend_LE" assert m.cvt == "ILGop_Ident32" assert m.dst == 1 assert m.addr.tmp == a.tmp assert m.alt.tmp == alt.tmp assert m.guard.tmp == guard.tmp assert m.cvt_types == ("Ity_I32", "Ity_I32") def test_irstmt_storeg(self): self.assertRaises(Exception, pyvex.IRStmt.LoadG, ()) a = pyvex.IRExpr.RdTmp.get_instance(10) data = pyvex.IRExpr.RdTmp.get_instance(11) guard = pyvex.IRExpr.RdTmp.get_instance(12) args = {"end": "Iend_LE", "addr": a, "data": data, "guard": guard} m = pyvex.IRStmt.StoreG(**args) assert m.tag == "Ist_StoreG" assert m.end == "Iend_LE" assert m.addr.tmp == a.tmp assert m.data.tmp == data.tmp assert m.guard.tmp == guard.tmp def test_irstmt_llsc(self): self.assertRaises(Exception, pyvex.IRStmt.LLSC) a = pyvex.IRExpr.RdTmp.get_instance(123) d = pyvex.IRExpr.RdTmp.get_instance(456) m = pyvex.IRStmt.LLSC(a, d, 1, "Iend_LE") assert m.tag == "Ist_LLSC" assert m.endness == "Iend_LE" assert m.result == 1 assert m.addr.tmp == a.tmp assert m.storedata.tmp == d.tmp def test_irstmt_mbe(self): m = pyvex.IRStmt.MBE("Imbe_CancelReservation") assert m.event == "Imbe_CancelReservation" m.event = "Imbe_Fence" assert m.event == "Imbe_Fence" def test_irstmt_dirty(self): args = [pyvex.IRExpr.RdTmp.get_instance(i) for i in range(10)] m = pyvex.IRStmt.Dirty("test_dirty", pyvex.IRConst.U8(1), args, 15, "Ifx_None", 0, 1, 0) assert m.cee == "test_dirty" assert isinstance(m.guard, pyvex.IRConst.U8) assert m.tmp == 15 assert m.mFx == "Ifx_None" assert m.nFxState == 0 for n, a in enumerate(m.args): assert a.tmp == args[n].tmp def test_irstmt_exit(self): self.assertRaises(Exception, pyvex.IRStmt.Exit) g = pyvex.IRExpr.RdTmp.get_instance(123) d = pyvex.IRConst.U32(456) m = pyvex.IRStmt.Exit(g, d, "Ijk_Ret", 10) assert m.tag == "Ist_Exit" assert m.jumpkind == "Ijk_Ret" assert m.offsIP == 10 assert m.guard.tmp == g.tmp assert m.dst.value == d.value ################## ### IRRegArray ### ################## def test_irregarray(self): m = pyvex.IRRegArray(10, "Ity_I64", 20) assert m.nElems == 20 assert m.elemTy == "Ity_I64" assert m.base == 10 ################ ### IRConst.s ### ################ def helper_const_subtype(self, subtype, tag, value): print("Testing %s" % tag) self.assertRaises(Exception, subtype) c = subtype(value) assert c.tag == tag assert c.value == value d = subtype(value - 1) e = subtype(value) assert c.value == e.value assert e.value == c.value self.assertNotEqual(c.value, d.value) self.assertNotEqual(d.value, c.value) self.assertNotEqual(c.value, "test") # TODO: actually check value assert c.type == d.type def test_irconst(self): self.helper_const_subtype(pyvex.IRConst.U1, "Ico_U1", 1) self.helper_const_subtype(pyvex.IRConst.U8, "Ico_U8", 233) self.helper_const_subtype(pyvex.IRConst.U16, "Ico_U16", 39852) self.helper_const_subtype(pyvex.IRConst.U32, "Ico_U32", 3442312356) self.helper_const_subtype(pyvex.IRConst.U64, "Ico_U64", 823452334523623455) self.helper_const_subtype(pyvex.IRConst.F32, "Ico_F32", 13453.234375) self.helper_const_subtype(pyvex.IRConst.F32i, "Ico_F32i", 3442312356) self.helper_const_subtype(pyvex.IRConst.F64, "Ico_F64", 13453.234525) self.helper_const_subtype(pyvex.IRConst.F64i, "Ico_F64i", 823457234523623455) self.helper_const_subtype(pyvex.IRConst.V128, "Ico_V128", 39852) self.helper_const_subtype(pyvex.IRConst.V256, "Ico_V256", 3442312356) ################### ### Expressions ### ################### def test_irexpr_binder(self): # binder doesn't work statically, but hopefully we should # never see it, anyways return # m = pyvex.IRExpr.Binder(1534252) # assert m.binder == 1534252 def test_irexpr_geti(self): r = pyvex.IRRegArray(10, "Ity_I64", 20) i = pyvex.IRExpr.RdTmp.get_instance(5) m = pyvex.IRExpr.GetI(r, i, 2) assert m.description.base == 10 assert m.index.tmp == 5 assert m.bias == 2 self.assertRaises(Exception, pyvex.IRExpr.GetI) def test_irexpr_rdtmp(self): m = pyvex.IRExpr.RdTmp.get_instance(123) assert m.tag == "Iex_RdTmp" assert m.tmp == 123 irsb = pyvex.IRSB(b"\x90\x5d\xc3", mem_addr=0x0, arch=pyvex.ARCH_AMD64) print("TMP:", irsb.next.tmp) def test_irexpr_get(self): m = pyvex.IRExpr.Get(0, "Ity_I64") assert m.type == "Ity_I64" self.assertRaises(Exception, pyvex.IRExpr.Get) def test_irexpr_qop(self): a = pyvex.IRExpr.Get(0, "Ity_I64") b = pyvex.IRExpr.Get(184, "Ity_I64") c = pyvex.IRExpr.RdTmp.get_instance(1) d = pyvex.IRExpr.RdTmp.get_instance(2) op = "Iop_QAdd32S" m = pyvex.IRExpr.Qop(op, [a, b, c, d]) assert m.op == op assert m.args[1].type == b.type assert len(m.args) == 4 assert m.args[2].tmp == c.tmp def test_irexpr_triop(self): a = pyvex.IRExpr.Get(0, "Ity_I64") b = pyvex.IRExpr.Get(184, "Ity_I64") c = pyvex.IRExpr.RdTmp.get_instance(1) op = "Iop_MAddF64" m = pyvex.IRExpr.Triop(op, [a, b, c]) assert m.op == op assert m.args[1].type == b.type assert len(m.args) == 3 assert m.args[2].tmp == c.tmp def test_irexpr_binop(self): a = pyvex.IRExpr.Get(0, "Ity_I64") c = pyvex.IRExpr.RdTmp.get_instance(1) op = "Iop_Add64" m = pyvex.IRExpr.Binop(op, [a, c]) assert m.op == op assert m.args[1].tmp == c.tmp assert len(m.args) == 2 assert m.args[1].tmp == c.tmp def test_irexpr_unop(self): a = pyvex.IRExpr.Get(0, "Ity_I64") op = "Iop_Add64" m = pyvex.IRExpr.Unop(op, [a]) assert m.op == op assert len(m.args) == 1 assert m.args[0].offset == a.offset def test_irexpr_load(self): a = pyvex.IRExpr.Get(0, "Ity_I64") e = "Iend_LE" t = "Ity_I64" m = pyvex.IRExpr.Load(e, t, a) assert m.endness == e assert m.type == t def test_irexpr_const(self): u1 = pyvex.IRConst.U1(1) f64 = pyvex.IRConst.F64(1.123) ue = pyvex.IRExpr.Const(u1) _ = pyvex.IRExpr.Const(f64) assert ue.con.value == u1.value assert ue.con.value != f64.value def test_irexpr_ite(self): a = pyvex.IRExpr.Get(0, "Ity_I64") iffalse = pyvex.IRExpr.RdTmp.get_instance(1) iftrue = pyvex.IRExpr.Const(pyvex.IRConst.U8(200)) m = pyvex.IRExpr.ITE(a, iffalse, iftrue) assert m.iftrue.con.value == iftrue.con.value def test_irexpr_ccall(self): callee = pyvex.IRCallee(3, "test_name", 0xFFFFFF) args = [pyvex.IRExpr.RdTmp.get_instance(i) for i in range(10)] m = pyvex.IRExpr.CCall("Ity_I64", callee, args) assert len(m.args) == len(args) assert m.ret_type == "Ity_I64" for n, a in enumerate(m.args): assert a.tmp == args[n].tmp m = pyvex.IRExpr.CCall(callee, "Ity_I64", ()) assert len(m.args) == 0 if __name__ == "__main__": unittest.main() ================================================ FILE: tests/test_s390x_exrl.py ================================================ import pyvex def test_s390x_exrl(): arch = pyvex.ARCH_S390X irsb = pyvex.lift( b"\xc6\x10\x00\x00\x00\x04" # exrl %r1,0x400408 b"\x07\xfe" # br %r14 b"\xd7\x00\x20\x00\x30\x00" # xc 0(0,%r2),0(%r3) b"\x7d\xa7", # padding 0x400400, arch, ) irsb_str = str(irsb) # check last_execute_target, only top 6 bytes are relevant assert "0xd700200030000000" in irsb_str assert "s390x_dirtyhelper_EX" in irsb_str assert "{ PUT(ia) = 0x400400; Ijk_Boring }" in irsb_str assert "------ IMark(0x400406, 2, 0) ------" in irsb_str assert irsb.jumpkind == "Ijk_Ret" if __name__ == "__main__": test_s390x_exrl() ================================================ FILE: tests/test_s390x_lochi.py ================================================ import pyvex def test_s390x_lochi(): arch = pyvex.ARCH_S390X irsb = pyvex.lift(b"\xec\x18\xab\xcd\x00\x42", 0x400400, arch) # lochi %r1,0xabcd,8 irsb_str = str(irsb) assert "s390_calculate_cond(0x0000000000000008" in irsb_str assert "PUT(r1_32) = 0xffffabcd" in irsb_str assert irsb.jumpkind in "Ijk_Boring" if __name__ == "__main__": test_s390x_lochi() ================================================ FILE: tests/test_s390x_vl.py ================================================ #!/usr/bin/env python3 import pyvex def test_s390x_vl(): arch = pyvex.ARCH_S390X irsb = pyvex.lift(b"\xe7\x40\x90\xa8\x00\x06", 0x11C6C9E, arch) # vl %v4, 0xa8(%r9) irsb_str = str(irsb) assert "GET:I64(r9)" in irsb_str assert "Add64(0x00000000000000a8" in irsb_str assert "LDbe:V128" in irsb_str assert "PUT(v4) =" in irsb_str assert irsb.jumpkind == "Ijk_Boring" if __name__ == "__main__": test_s390x_vl() ================================================ FILE: tests/test_spotter.py ================================================ import os import pyvex import pyvex.lifting from pyvex.lifting import register from pyvex.lifting.util import GymratLifter, Instruction, Type test_location = str(os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../binaries/tests")) class Instruction_IMAGINARY(Instruction): bin_format = bin(0x0F0B)[2:].zfill(16) name = "IMAGINARY" def compute_result(self): a = self.constant(10, Type.int_27) b = self.constant(20, Type.int_27) a + b class ImaginarySpotter(GymratLifter): instrs = [Instruction_IMAGINARY] register(ImaginarySpotter, "X86") basic_goal = """ IRSB { t0:Ity_I27 00 | ------ IMark(0x1, 2, 0) ------ 01 | t0 = Add27((0xa :: Ity_I27),(0x14 :: Ity_I27)) NEXT: PUT(eip) = 0x00000003; Ijk_Boring } """ def test_basic(): b = pyvex.block.IRSB(b"\x0f\x0b", 1, pyvex.ARCH_X86) assert str(b).strip() == basic_goal.strip() def test_embedded(): b = pyvex.block.IRSB(b"\x50" * 3 + b"\x0f\x0b" + b"\x50" * 6, 1, pyvex.ARCH_X86) for i, stmt in enumerate(b.statements): if type(stmt) is pyvex.stmt.IMark and stmt.addr == 0x4 and stmt.len == 2 and stmt.delta == 0: imaginary_trans_stmt = b.statements[i + 1] assert type(imaginary_trans_stmt) is pyvex.stmt.WrTmp addexpr = imaginary_trans_stmt.data assert type(addexpr) is pyvex.expr.Binop assert addexpr.op == "Iop_Add27" arg1, arg2 = addexpr.args assert type(arg1) is pyvex.expr.Const assert arg1.con.value == 10 assert type(arg2) is pyvex.expr.Const assert arg2.con.value == 20 return assert False, "Could not find matching IMark" class Instruction_MSR(Instruction): bin_format = bin(0x8808F380)[2:].zfill(32) name = "MSR.W" def compute_result(self): a = self.constant(10, Type.int_27) b = self.constant(20, Type.int_27) a + b class Instruction_CPSIEI(Instruction): bin_format = bin(0xB662)[2:].zfill(16) name = "CPSIE I" def compute_result(self): a = self.constant(10, Type.int_27) b = self.constant(20, Type.int_27) a + b class Instruction_CPSIEF(Instruction): bin_format = bin(0xB661)[2:].zfill(16) name = "CPSIE F" def compute_result(self): a = self.constant(10, Type.int_27) b = self.constant(20, Type.int_27) a + b class CortexSpotter(GymratLifter): instrs = [Instruction_MSR, Instruction_CPSIEI, Instruction_CPSIEF] register(CortexSpotter, "ARMEL") def test_tmrs(): arch = pyvex.ARCH_ARM_LE ins = b"\xef\xf3\x08\x82" b = pyvex.block.IRSB(ins, 1, arch) assert b.jumpkind == "Ijk_Boring" assert isinstance(b.statements[1].data, pyvex.expr.Get) assert arch.translate_register_name(b.statements[1].data.offset) in ["sp", "r13"] assert isinstance(b.statements[2], pyvex.stmt.Put) def test_tmsr(): arch = pyvex.ARCH_ARM_LE inss = b"\x82\xf3\x08\x88" b = pyvex.block.IRSB(inss, 1, arch, opt_level=3) assert b.jumpkind == "Ijk_Boring" assert isinstance(b.statements[1].data, pyvex.expr.Get) assert arch.translate_register_name(b.statements[1].data.offset) == "r2" assert isinstance(b.statements[2], pyvex.stmt.Put) if __name__ == "__main__": test_basic() test_embedded() test_tmrs() test_tmsr() ================================================ FILE: tests/test_ud2.py ================================================ import pyvex def test_ud2(): # On x86 and amd64, ud2 is a valid 2-byte instruction that means "undefined instruction". Upon decoding a basic # block that ends with ud2, we should treat it as an explicit NoDecode, instead of skipping the instruction and # resume lifting. b = pyvex.block.IRSB(b"\x90\x90\x0f\x0b\x90\x90", 0x20, pyvex.ARCH_AMD64) assert b.jumpkind == "Ijk_NoDecode" assert b.next.con.value == 0x22 assert b.size == 4 if __name__ == "__main__": test_ud2()