Repository: jserv/shecc Branch: master Commit: 0d07d85f1cc4 Files: 47 Total size: 2.4 MB Directory structure: gitextract_2uml3qup/ ├── .ci/ │ ├── check-format.sh │ └── check-newline.sh ├── .clang-format ├── .github/ │ └── workflows/ │ └── main.yml ├── .gitignore ├── AUTHORS ├── COMPLIANCE.md ├── CONTRIBUTING.md ├── LICENSE ├── Makefile ├── README.md ├── docs/ │ └── dynamic-linking.md ├── lib/ │ ├── c.c │ └── c.h ├── mk/ │ ├── arm.mk │ ├── common.mk │ └── riscv.mk ├── src/ │ ├── arch-lower.c │ ├── arm-codegen.c │ ├── arm.c │ ├── defs.h │ ├── elf.c │ ├── globals.c │ ├── lexer.c │ ├── main.c │ ├── opt-sccp.c │ ├── parser.c │ ├── peephole.c │ ├── preprocessor.c │ ├── reg-alloc.c │ ├── riscv-codegen.c │ ├── riscv.c │ └── ssa.c ├── tests/ │ ├── arm-abi.sh │ ├── check-snapshots.sh │ ├── driver.sh │ ├── fib.c │ ├── hello.c │ ├── snapshots/ │ │ ├── fib-arm-dynamic.json │ │ ├── fib-arm-static.json │ │ ├── fib-riscv-static.json │ │ ├── hello-arm-dynamic.json │ │ ├── hello-arm-static.json │ │ └── hello-riscv-static.json │ └── update-snapshots.sh └── tools/ ├── inliner.c └── norm-lf.c ================================================ FILE CONTENTS ================================================ ================================================ FILE: .ci/check-format.sh ================================================ #!/usr/bin/env bash SOURCES=$(find $(git rev-parse --show-toplevel) | egrep "\.(c|cxx|cpp|h|hpp)\$") set -x for file in ${SOURCES}; do clang-format-18 ${file} > expected-format diff -u -p --label="${file}" --label="expected coding style" ${file} expected-format done exit $(clang-format-18 --output-replacements-xml ${SOURCES} | egrep -c "") ================================================ FILE: .ci/check-newline.sh ================================================ #!/usr/bin/env bash ret=0 show=0 # Reference: https://medium.com/@alexey.inkin/how-to-force-newline-at-end-of-files-and-why-you-should-do-it-fdf76d1d090e while IFS= read -rd '' f; do if file --mime-encoding "$f" | grep -qv binary; then tail -c1 < "$f" | read -r _ || show=1 if [ $show -eq 1 ]; then echo "Warning: No newline at end of file $f" ret=1 show=0 fi fi done < <(git ls-files -z src tools tests) exit $ret ================================================ FILE: .clang-format ================================================ BasedOnStyle: Chromium Language: Cpp MaxEmptyLinesToKeep: 3 IndentCaseLabels: false AllowShortIfStatementsOnASingleLine: false AllowShortCaseLabelsOnASingleLine: false AllowShortLoopsOnASingleLine: false DerivePointerAlignment: false PointerAlignment: Right SpaceAfterCStyleCast: true TabWidth: 4 UseTab: Never IndentWidth: 4 BreakBeforeBraces: Linux AccessModifierOffset: -4 ForEachMacros: - foreach - list_for_each - list_for_each_safe - list_for_each_entry - list_for_each_entry_safe ================================================ FILE: .github/workflows/main.yml ================================================ name: Github Actions on: [push, pull_request] jobs: host-x86: runs-on: ubuntu-24.04 strategy: matrix: compiler: [gcc, clang] architecture: [arm, riscv] link_mode: [static] include: - compiler: gcc architecture: arm link_mode: dynamic - compiler: clang architecture: arm link_mode: dynamic steps: - name: Checkout code uses: actions/checkout@v4 - name: Download dependencies run: | sudo apt-get update -q -y sudo apt-get install -q -y graphviz jq sudo apt-get install -q -y qemu-user sudo apt-get install -q -y build-essential sudo apt-get install -q -y gcc-arm-linux-gnueabihf - name: Determine static or dynamic linking mode id: determine-mode run: | if [ "${{ matrix.link_mode }}" = "dynamic" ]; then echo "Use dynamic linking mode" echo "DYNLINK=1" >> "$GITHUB_OUTPUT" else echo "Use static linking mode" echo "DYNLINK=0" >> "$GITHUB_OUTPUT" fi - name: Build artifacts env: CC: ${{ matrix.compiler }} run: | make ARCH=${{ matrix.architecture }} DYNLINK=${{ steps.determine-mode.outputs.DYNLINK }} - name: IR regression tests run: | make check-snapshot DYNLINK=${{ steps.determine-mode.outputs.DYNLINK }} || exit 1 - name: Sanitizer-enabled stage 0 tests env: CC: ${{ matrix.compiler }} run: | make check-sanitizer DYNLINK=${{ steps.determine-mode.outputs.DYNLINK }} || exit 1 - name: Unit tests run: | make check DYNLINK=${{ steps.determine-mode.outputs.DYNLINK }} || exit 1 host-arm: runs-on: ubuntu-24.04-arm strategy: matrix: link_mode: [static, dynamic] steps: - name: Checkout code uses: actions/checkout@v4 - name: Download dependencies run: | sudo dpkg --add-architecture armhf sudo apt-get update -q -y sudo apt-get install -q -y graphviz jq sudo apt-get install -q -y build-essential libc6:armhf sudo wget https://github.com/fastfetch-cli/fastfetch/releases/download/2.58.0/fastfetch-linux-aarch64.deb sudo dpkg -i fastfetch-linux-aarch64.deb - name: Determine static or dynamic linking mode id: determine-mode run: | if [ "${{ matrix.link_mode }}" = "dynamic" ]; then echo "Use dynamic linking mode" echo "DYNLINK=1" >> "$GITHUB_OUTPUT" else echo "Use static linking mode" echo "DYNLINK=0" >> "$GITHUB_OUTPUT" fi - name: Build artifacts run: | make ARCH=arm DYNLINK=${{ steps.determine-mode.outputs.DYNLINK }} - name: Sanitizer-enabled stage 0 tests run: | make check-sanitizer DYNLINK=${{ steps.determine-mode.outputs.DYNLINK }} || exit 1 - name: Unit tests run: | make check DYNLINK=${{ steps.determine-mode.outputs.DYNLINK }} || exit 1 preprocessor-host: runs-on: ubuntu-24.04 strategy: matrix: compiler: [gcc, clang] architecture: [arm, riscv] steps: - name: Checkout code uses: actions/checkout@v4 - name: Download dependencies run: | sudo apt-get update -q -y sudo apt-get install -q -y graphviz jq sudo apt-get install -q -y qemu-user sudo apt-get install -q -y build-essential - name: Configurate config run: | make distclean config ARCH=${{ matrix.architecture }} - name: Preprocess stage 1 source code env: CC: ${{ matrix.compiler }} run: | make out/shecc ./out/shecc -E src/main.c > ./out/out.c - name: Build stage 1 artifact run: | ./out/shecc --no-libc -o out/shecc-stage1.elf ./out/out.c coding-style: runs-on: ubuntu-24.04 steps: - uses: actions/checkout@v4 - name: Coding convention run: | sudo apt-get install -q -y clang-format-18 .ci/check-newline.sh .ci/check-format.sh shell: bash ================================================ FILE: .gitignore ================================================ out a.out *.o *.elf *.log *.lst *.dot config src/codegen.c .session.mk # vscode C/C++ plugin generated files .vscode ================================================ FILE: AUTHORS ================================================ shecc is written by: Jim Huang Yu-Cheng Cheng Lecopzer Chen Vacantron Chen Matt Jan Alex Lai Chin Yik Ming Hank Wang Kyle Lin Yu En Siao Meng-Zong Tsai Kuan-Wei Chiu Yu-Hui Wu ================================================ FILE: COMPLIANCE.md ================================================ # C99 Compliance Status shecc implements a subset of C99 suitable for self-hosting and systems programming, prioritizing simplicity, educational value, and minimal dependencies over full standard compliance. This document tracks compliance gaps and non-standard behaviors. ## Implemented Features ### Core Language - Basic types: `int`, `short`, `char`, `void`, `_Bool` - Structures and unions with nested definitions - Enumerations with automatic value assignment - Function definitions and declarations - Arrays (single and multi-dimensional) - Pointers and pointer arithmetic (fully C99-compliant) - Type definitions (`typedef`) ### Control Flow - `if`/`else` statements - `goto` and label statements - `while`, `do-while`, `for` loops - `switch`/`case`/`default` statements - `break`, `continue`, `return` statements ### Operators - Arithmetic: `+`, `-`, `*`, `/`, `%` - Bitwise: `&`, `|`, `^`, `~`, `<<`, `>>` - Logical: `&&`, `||`, `!` - Relational: `<`, `>`, `<=`, `>=`, `==`, `!=` - Assignment: `=`, `+=`, `-=`, `*=`, `/=`, `%=`, `<<=`, `>>=`, `&=`, `|=`, `^=` - Increment/decrement: `++`, `--` (prefix and postfix) - Conditional: `? :` - Member access: `.`, `->` - Address/dereference: `&`, `*` ### Preprocessor (Partial) - `#define` for object-like and function-like macros - `#ifdef`, `#ifndef`, `#if`, `#elif`, `#else`, `#endif` - `#undef` for macro removal - `#pragma once`, other `#pragma` options will be ignored - `defined()` operator - `__VA_ARGS__` for variadic macros - `__FILE__`, `__LINE__` built-in macros ## Missing Features ### Storage Classes & Qualifiers | Feature | Status | Impact | |---------|--------|--------| | `static` | Not implemented | No internal linkage or persistent local variables | | `extern` | Not implemented | No external linkage declarations | | `register` | Not implemented | No register hint optimization | | `auto` | Not implemented | Default storage class (implicit) | | `const` | Parsed but ignored | No read-only enforcement | | `volatile` | Not implemented | No volatile semantics | | `restrict` | Not implemented | No pointer aliasing optimization | | `inline` | Not implemented | No function inlining | ### Type System | Feature | Status | Notes | |---------|--------|-------| | `long` | Missing | Only 4-byte integers | | `long long` | Missing | No 64-bit integers | | `unsigned` | Missing | All integers are signed | | `signed` | Missing | Implicit for integers | | `float` | Missing | No floating-point support | | `double` | Missing | No floating-point support | | `long double` | Missing | No floating-point support | | Bit-fields | Missing | Cannot pack struct members | ### Literals & Constants | Feature | Status | Current Behavior | |---------|--------|-----------------| | Integer suffixes (`u`, `l`, `ll`) | Not parsed | All literals are `int` | | Wide characters (`L'c'`) | Not supported | Single-byte only | | Wide strings (`L"..."`) | Not supported | Single-byte only | | Multi-character constants | Not supported | Single character only | | Universal characters (`\u`, `\U`) | Not supported | ASCII only | | Hex escapes (`\x...`) | Limited | Max 2 hex digits | ### Preprocessor Gaps | Feature | Status | Description | |---------|--------|-------------| | `#include` | Partial | Local file inclusion is supported, but lack of capability to include system files | | Token pasting (`##`) | Missing | Cannot concatenate tokens | | Stringizing (`#`) | Missing | Cannot convert to string | | `__DATE__` | Missing | No compile date | | `__TIME__` | Missing | No compile time | | `__STDC__` | Missing | No standard compliance indicator | ### Advanced Features | Feature | Status | Description | |---------|--------|-------------| | Designated initializers | Missing | No `.field = value` syntax | | Compound literals | Partial | Limited support | | Flexible array members | Missing | No `[]` at struct end | | Variable-length arrays | Missing | No runtime-sized arrays | | `_Complex` | Missing | No complex numbers | | `_Imaginary` | Missing | No imaginary numbers | | `_Static_assert` | Missing | No compile-time assertions | | `_Alignof` | Missing | No alignment queries | | `_Alignas` | Missing | No alignment specification | | `_Generic` | Missing | No generic selection | ## Non-Standard Behaviors ### GNU Extensions - Binary literals: `0b101010` - Escape sequence: `\e` for ESC character - `void*` arithmetic (treated as `char*`) - `sizeof(void)` returns 0 (should be error) - Computed goto ### Implementation-Specific - Array compound literals in scalar context use first element - String literals are modifiable (stored in `.data`, not `.rodata`) - No strict aliasing rules - Left-to-right evaluation order (not always guaranteed in C99) ================================================ FILE: CONTRIBUTING.md ================================================ # Contributing to `shecc` :+1::tada: First off, thanks for taking the time to contribute! :tada::+1: The following is a set of guidelines for contributing to [shecc](https://github.com/sysprog21/shecc) hosted on GitHub. These are mostly guidelines, not rules. Use your best judgment, and feel free to propose changes to this document in a pull request. ## Issues This project uses GitHub Issues to track ongoing development, discuss project plans, and keep track of bugs. Be sure to search for existing issues before you create another one. Initially, it is advisable to create an issue on GitHub for bug reports, feature requests, or substantial pull requests, as this offers a platform for discussion with both the community and project maintainers. Engaging in a conversation through a GitHub issue before making a contribution is crucial to ensure the acceptance of your work. We aim to prevent situations where significant effort is expended on a pull request that might not align with the project's design principles. For example, it might turn out that the feature you propose is more suited as an independent module that complements this project, in which case we would recommend that direction. For minor corrections, such as typo fixes, small refactoring, or updates to documentation/comments, filing an issue is not typically necessary. What constitutes a "minor" fix involves discretion; however, examples include: - Correcting spelling mistakes - Minor code refactoring - Updating or editing documentation and comments Nevertheless, there may be instances where, upon reviewing your pull requests, we might request an issue to be filed to facilitate discussion on broader design considerations. Visit our [Issues page on GitHub](https://github.com/sysprog21/shecc/issues) to search and submit. ## Language Compliance Before contributing new language features or modifications to the compiler frontend, please review [COMPLIANCE.md](COMPLIANCE.md) to understand shecc's current C99 compliance status, supported features, and known limitations. This document helps ensure that contributions align with the project's subset implementation philosophy. ## Coding Convention Contributions from developers across corporations, academia, and individuals are welcome. However, participation requires adherence to fundamental ground rules: * Code must strictly adhere to the established C coding style (refer to the guidelines below). While there is some flexibility in basic style, it is crucial to stick to the current coding standards. Complex algorithmic constructs without proper comments will not be accepted. * External pull requests should include thorough documentation in the pull request comments for consideration. * When composing documentation, code comments, and other materials in English, please adhere to the American English (`en_US`) dialect. This variant should be considered the standard for all documentation efforts. For instance, opt for "initialize" over "initialise" and "color" rather than "colour". Software requirement: [clang-format](https://clang.llvm.org/docs/ClangFormat.html) version 18 or later. This repository consistently contains an up-to-date `.clang-format` file with rules that match the explained ones. For maintaining a uniform coding style, execute the command `clang-format -i *.{c,h}`. ## Coding Style for Modern C This coding style is a variant of the [K&R style](https://en.wikipedia.org/wiki/Indentation_style#K&R). Adhere to established practices while being open to innovation. Maintain consistency, adopt the latest C standards, and embrace modern compilers along with their advanced static analysis capabilities and sanitizers. ### Indentation In this coding style guide, the use of 4 spaces for indentation instead of tabs is strongly enforced to ensure consistency. Always apply a single space before and after comparison and assignment operators to maintain readable code. Additionally, it is crucial to include a single space after every comma. e.g., ```c for (int i = 0; i < 10; i++) { printf("%d\n", i); /* some operations */ } ``` The tab character (ASCII 0x9) should never appear within any source code file. When indentation is needed in the source code, align using spaces instead. The width of the tab character varies by text editor and programmer preference, making consistent visual layout a continual challenge during code reviews and maintenance. ### Line length All lines should typically remain within 80 characters, with longer lines wrapped as needed. This practice is supported by several valid rationales: * It encourages developers to write concise code. * Smaller portions of information are easier for humans to process. * It assists users of vi/vim (and potentially other editors) who use vertical splits. * It is especially helpful for those who may want to print code on paper. ### Comments Multi-line comments should have the opening and closing characters on separate lines, with the content lines prefixed by a space and an asterisk (`*`) for alignment, e.g., ```c /* * This is a multi-line comment. */ /* One line comment. */ ``` Use multi-line comments for more elaborate descriptions or before significant logical blocks of code. Single-line comments should be written in C89 style: ```c return (uintptr_t) val; /* return a bitfield */ ``` Leave two spaces between the statement and the inline comment. Avoid commenting out code directly. Instead, use `#if 0` ... `#endif` when it is intentional. All assumptions should be clearly explained in comments. Use the following markers to highlight issues and make them searchable: * `WARNING`: Alerts a maintainer to the risk of changing this code. e.g., a delay loop counter's terminal value was determined empirically and may need adjustment when the code is ported or the optimization level is tweaked. * `NOTE`: Provides descriptive comments about the "why" of a chunk of code, as opposed to the "how" usually included in comments. e.g., a chunk of driver code may deviate from the datasheet due to a known erratum in the chip, or an assumption made by the original programmer is explained. * `TODO`: Indicates an area of the code that is still under construction and explains what remains to be done. When appropriate, include an all-caps programmer name or set of initials before the word `TODO`. Keep the documentation as close to the code as possible. ### Spacing and brackets Ensure that the keywords `if`, `while`, `for`, `switch`, and `return` are always followed by a single space when there is additional code on the same line. Follow these spacing guidelines: * Place one space after the keyword in a conditional or loop. * Do not use spaces around the parentheses in conditionals or loops. * Insert one space before the opening curly bracket. For example: ```c do { /* some operations */ } while (condition); ``` Functions (their declarations or calls), `sizeof` operator or similar macros shall not have a space after their name/keyword or around the brackets, e.g., ```c unsigned total_len = offsetof(obj_t, items[n]); unsigned obj_len = sizeof(obj_t); ``` Use brackets to avoid ambiguity and with operators such as `sizeof`, but otherwise avoid redundant or excessive brackets. Assignment operators (`=`, `+=`, `-=`, `*=`, `/=`, `%=`, `&=`, `|=`, `^=`, `~=`, and `!=`) should always have a space before and after them. For example: ```c count += 1; ``` Binary operators (`+`, `-`, `*`, `/`, `%`, `<`, `<=`, `>`, `>=`, `==`, `!=`, `<<`, `>>`, `&`, `|`, `^`, `&&`, and `||`) should also be surrounded by spaces. For example: ```c current_conf = prev_conf | (1 << START_BIT); ``` Unary operators (`++`, `--`, `!`, and `~`) should be written without spaces between the operator and the operand. For example: ```c bonus++; if (!play) return STATE_QUITE; ``` The ternary operator (`?` and `:`) should have spaces on both sides. For example: ```c uint32_t max(uint32_t a, uint32_t b) { return (a > b) ? a : b; } ``` Structure pointer (`->`) and member (`.`) operators should not have surrounding spaces. Similarly, array subscript operators (`[` and `]`) and function call parentheses should be written without spaces around them. ### Parentheses Avoid relying on C’s operator precedence rules, as they may not be immediately clear to those maintaining the code. To ensure clarity, always use parentheses to enforce the correct execution order within a sequence of operations, or break long statements into multiple lines if necessary. When using logical AND (`&&`) and logical OR (`||`) operators, each operand should be enclosed in parentheses, unless it is a single identifier or constant. For example: ```c if ((count > 100) && (detected == false)) { character = C_ASSASSIN; } ``` ### Variable names and declarations Ensure that functions, variables, and comments are consistently named using English words. Global variables should have descriptive names, while local variables can have shorter names. It is important to strike a balance between being descriptive and concise. Each variable's name should clearly reflect its purpose. Use [snake_case](https://en.wikipedia.org/wiki/Snake_case) for naming conventions, and avoid using "camelCase." Additionally, do not use Hungarian notation or any other unnecessary prefixes or suffixes. When declaring pointers, follow these spacing conventions: ```c const char *name; /* Const pointer; '*' with the name and a space before it */ conf_t * const cfg; /* Pointer to const data; spaces around 'const' */ const uint8_t * const charmap; /* Const pointer and const data */ const void * restrict key; /* Const pointer that does not alias */ ``` Local variables of the same type should be declared on the same line. For example: ```c void func(void) { char a, b; /* OK */ char a; char b; /* Incorrect: a variable with char type already exists. */ } ``` Always include a trailing comma in the last element of a structure initialization, including its nested elements, to help `clang-format` correctly format the structure. However, this comma can be omitted in very simple and short structures. ```c typedef struct { int width, height; } screen_t; screen_t s = { .width = 640, .height = 480, /* comma here */ } ``` ### Type definitions Declarations shall be on the same line, e.g., ```c typedef void (*dir_iter_t)(void *, const char *, struct dirent *); ``` _Typedef_ structures rather than pointers. Note that structures can be kept opaque if they are not dereferenced outside the translation unit where they are defined. Pointers can be _typedefed_ only if there is a very compelling reason. New types may be suffixed with `_t`. Structure name, when used within the translation unit, may be omitted, e.g.: ```c typedef struct { unsigned if_index; unsigned addr_len; addr_t next_hop; } route_info_t; ``` ### Initialization Do not initialize static and global variables to `0`; the compiler will do this. When a variable is declared inside a function, it is not automatically initialized. ```c static uint8_t a; /* Global variable 'a' is set to 0 by the compiler */ void foo() { /* 'b' is uninitialized and set to whatever happens to be in memory */ uint8_t b; ... } ``` Embrace C99 structure initialization where reasonable, e.g., ```c static const crypto_ops_t openssl_ops = { .create = openssl_crypto_create, .destroy = openssl_crypto_destroy, .encrypt = openssl_crypto_encrypt, .decrypt = openssl_crypto_decrypt, .hmac = openssl_crypto_hmac, }; ``` Embrace C99 array initialization, especially for the state machines, e.g., ```c static const uint8_t tcp_fsm[TCP_NSTATES][2][TCPFC_COUNT] = { [TCPS_CLOSED] = { [FLOW_FORW] = { /* Handshake (1): initial SYN. */ [TCPFC_SYN] = TCPS_SYN_SENT, }, }, ... } ``` Any pointer variable that does not have an initial address should be explicitly initialized to `NULL`. This practice helps prevent undefined behavior caused by dereferencing uninitialized pointers. In accordance with modern C standards (such as C99 and later), it is preferable to define local variables as needed, rather than declaring them all at the beginning of a function. Declaring variables close to their first use enhances code readability and helps maintain a clear, logical flow within the function. Additionally, static analysis tools can be employed to scan the entire source code before each build, providing warnings about variables that are used before being properly initialized. This helps catch potential bugs early and ensures code quality and safety. ### Control structures Try to make the control flow easy to follow. Avoid long convoluted logic expressions; try to split them where possible (into inline functions, separate if-statements, etc). The control structure keyword and the expression in the brackets should be separated by a single space. The opening curly bracket shall be in the same line, also separated by a single space. Example: ```c for (;;) { obj = get_first(); while ((obj = get_next(obj))) { ... } if (done) break; } ``` Do not add inner spaces around the brackets. There should be one space after the semicolon when `for` has expressions: ```c for (unsigned i = 0; i < __arraycount(items); i++) { ... } ``` #### Avoid unnecessary nesting levels It is generally preferred to place the shorter clause (measured in lines of code) first in `if` and `else if` statements. Long clauses can distract the reader from the core decision-making logic, making the code harder to follow. By placing the shorter clause first, the decision path becomes clearer and easier to understand, which can help reduce bugs. Avoid nesting `if`-`else` statements deeper than two levels. Instead, consider using function calls or `switch` statements to simplify the logic and enhance readability. Deeply nested `if`-`else` statements often indicate a complex and fragile state machine implementation, which can be refactored into a safer and more maintainable structure. For example, avoid this: ```c int inspect(obj_t *obj) { if (cond) { ... /* long code block */ ... return 0; } return -1; } ``` Instead, consider this approach: ```c int inspect(obj_t *obj) { if (!cond) return -1; ... return 0; } ``` However, be careful not to make the logic more convoluted in an attempt to simplify nesting. ### `if` statements Curly brackets and spacing follow the K&R style: ```c if (a == b) { .. } else if (a < b) { ... } else { ... } ``` Simple and succinct one-line if-statements may omit curly brackets: ```c if (!valid) return -1; ``` However, do prefer curly brackets with multi-line or more complex statements. If one branch uses curly brackets, then all other branches shall use the curly brackets too. Wrap long conditions to the if-statement indentation adding extra 4 spaces: ```c if (some_long_expression && another_expression) { ... } ``` #### Avoid redundant `else` Avoid: ```c if (flag & F_FEATURE_X) { ... return 0; } else { return -1; } ``` Consider: ```c if (flag & F_FEATURE_X) { ... return 0; } return -1; ``` ### `switch` statements Switch statements should have the `case` blocks at the same indentation level, e.g.: ```c switch (expr) { case A: ... break; case B: /* fallthrough */ case C: ... break; } ``` If the case block does not break, then it is strongly recommended to add a comment containing "fallthrough" to indicate it. Modern compilers can also be configured to require such comment (see gcc `-Wimplicit-fallthrough`). ### Function definitions The opening and closing curly brackets shall also be in the separate lines (K&R style). ```c ssize_t hex_write(FILE *stream, const void *buf, size_t len) { ... } ``` Do not use old style K&R style C definitions. Introduced in C99, `restrict` is a pointer qualifier that informs the compiler no other pointer will access the same object during its lifetime, enabling optimizations such as vectorization. Violating this assumption leads to undefined behavior. Use `restrict` judiciously. For function parameters, place one space after each comma, except at the end of a line. ### Function-like Macros When using function-like macros (parameterized macros), adhere to the following guidelines: - Enclose the entire macro body in parentheses. - Surround each parameter usage with parentheses. - Limit the use of each parameter to no more than once within the macro to avoid unintended side effects. - Never include control flow statements (e.g., `return`) within a macro. - If the macro involves multiple statements, encapsulate them within a `do`-`while (0)` construct. For example: ```c #define SET_POINT(p, x, y) \ do { \ (p)->px = (x); \ (p)->py = (y); \ } while (0) ``` While the extensive use of parentheses, as shown above, helps minimize some risks, it cannot prevent issues like unintended double increments from calls such as `MAX(i++, j++)`. Other risks associated with macros include comparing signed and unsigned data or testing floating-point values. Additionally, macros are not visible at runtime, making them impossible to step into with a debugger. Therefore, use them with caution. In general, macro names are typically written in all capitals, except in cases where readability is improved by using lowercase. For example: ``` #define countof(a) (size)(sizeof(a) / sizeof(*(a))) #define lengthof(s) (countof(s) - 1) ``` Although all capitals are generally preferred for constants, lowercase can be used for function-like macros to improve readability. These function-like macros do not share the same namespace concerns as other macros. For example, consider the implementation of a simple memory allocator. An arena can be represented by a memory buffer and an offset that begins at zero. To allocate an object, record the pointer at the current offset, advance the offset by the size of the object, and return the pointer. Additional considerations, such as alignment and checking for available space, are also required. ```c #define new(a, n, t) alloc(a, n, sizeof(t), _Alignof(t)) typedef struct { char *begin, *end; } arena_t; void *alloc(arena_t *a, ptrdiff_t count, ptrdiff_t size, ptrdiff_t align) { ptrdiff_t pad = -(uintptr_t)a->begin & (align - 1); assert(count < (a->end - a->begin - pad) / size); void *result = a->begin + pad; a->begin += pad + (count * size); return memset(result, 0, count * size); } ``` Using the `new` macro helps prevent several common errors in C programs. If types are mixed up, the compiler generates errors or warnings. Moreover, naming a macro `new()` does not conflict with variables or fields named `new`, because the macro form does not resemble a function call. ### Use `const` and `static` effectively The `static` keyword should be used for any variables that do not need to be accessible outside the module where they are declared. This is particularly important for global variables defined in C files. Declaring variables and functions as `static` at the module level protects them from external access, reducing coupling between modules and improving encapsulation. For functions that do not need to be accessible outside the module, use the `static` keyword. This is especially important for private functions, where `static` should always be applied. For example: ```c static bool verify_range(uint16_t x, uint16_t y); ``` The `const` keyword is essential for several key purposes: - Declaring variables that should not change after initialization. - Defining fields within a `struct` that must remain immutable, such as those in memory-mapped I/O peripheral registers. - Serving as a strongly typed alternative to `#define` for numerical constants. For example, instead of using: ```c #define MAX_SKILL_LEVEL (100U) ``` Use: ```c const uint8_t max_skill_level = 100; ``` Maximizing the use of `const` provides the advantage of compiler-enforced protection against unintended modifications to data that should be read-only, thereby enhancing code reliability and safety. Additionally, when one of your function arguments is a pointer to data that will not be modified within the function, you should use the `const` keyword. This is particularly useful when comparing a character array with predefined strings without altering the array’s contents. For example: ```c static bool is_valid_cmd(const char *cmd); ``` ### Object abstraction Objects are often "simulated" by the C programmers with a `struct` and its "public API". To enforce the information hiding principle, it is a good idea to define the structure in the source file (translation unit) and provide only the _declaration_ in the header. For example, `obj.c`: ```c #include "obj.h" struct obj { int value; } obj_t *obj_create(void) { return calloc(1, sizeof(obj_t)); } void obj_destroy(obj_t *obj) { free(obj); } ``` With an example `obj.h`: ```c #ifndef _OBJ_H_ #define _OBJ_H_ typedef struct obj; obj_t *obj_create(void); void obj_destroy(obj_t *); #endif ``` Such structuring will prevent direct access of the `obj_t` members outside the `obj.c` source file. The implementation (of such "class" or "module") may be large and abstracted within separate source files. In such case, consider separating structures and "methods" into separate headers (think of different visibility), for example `obj_impl.h` (private) and `obj.h` (public). Consider `crypto_impl.h`: ```c #ifndef _CRYPTO_IMPL_H_ #define _CRYPTO_IMPL_H_ #if !defined(__CRYPTO_PRIVATE) #error "only to be used by the crypto modules" #endif #include "crypto.h" typedef struct crypto { crypto_cipher_t cipher; void *key; size_t key_len; ... } ... #endif ``` And `crypto.h` (public API): ```c #ifndef _CRYPTO_H_ #define _CRYPTO_H_ typedef struct crypto crypto_t; crypto_t *crypto_create(crypto_cipher_t); void crypto_destroy(crypto_t *); ... #endif ``` ### Use reasonable types Use `unsigned` for general iterators; use `size_t` for general sizes; use `ssize_t` to return a size which may include an error. Of course, consider possible overflows. Avoid using fixed-width types like `uint8_t`, `uint16_t`, or other smaller integer types for general iterators or similar cases unless there is a specific need for size-constrained operations, such as in fixed-width data processing or resource-limited environments. C has rather peculiar _type promotion rules_ and unnecessary use of sub-word types might contribute to a bug once in a while. Boolean variables should be declared using the `bool` type. Non-Boolean values should be converted to Boolean by using relational operators (e.g., `<` or `!=`) rather than by casting. For example: ```c #include ... bool inside = (value < expected_range); ``` ### Embrace portability #### Byte-order Do not assume x86 or little-endian architecture. Use endian conversion functions for operating the on-disk and on-the-wire structures or other cases where it is appropriate. #### Types Do not assume a particular 32-bit or 64-bit architecture; for example, do not assume the size of `long` or `unsigned long`. Instead, use `int64_t` or `uint64_t` for 8-byte integers. Fixed-width types, such as `uint32_t`, are particularly useful when memory size is critical, as in embedded systems, communication protocols requiring specific data sizes, or when interacting with hardware registers that require precise bit-width operations. In these scenarios, fixed-width types ensure consistent behavior across different platforms and compilers. Do not assume `char` is signed; for example, on Arm architectures, it is unsigned by default. Avoid defining bit-fields within signed integer types. Additionally, do not use bitwise operators (such as `&`, `|`, `~`, `^`, `<<`, and `>>`) on signed integer data. Refrain from combining signed and unsigned integers in comparisons or expressions, as this can lead to unpredictable results. When using `#define` to declare decimal constants, append a `U` to ensure they are treated as unsigned. For example: ```c #define SOME_CONSTANT (6U) uint16_t unsigned_a = 6; int16_t signed_b = -9; if (unsigned_a + signed_b < 4) { /* This block might appear logically correct, as -9 + 6 is -3 */ ... } /* but compilers with 16-bit int may legally interpret it as (0xFFFF – 9) + 6. */ ``` It is important to note that certain aspects of manipulating binary data within signed integer containers are implementation-defined behaviors according to ISO C standards. Additionally, mixing signed and unsigned integers can lead to data-dependent results, as demonstrated in the example above. Use C99 macros for constant prefixes or formatting of the fixed-width types. Use: ```c #define SOME_CONSTANT (UINT64_C(1) << 48) printf("val %" PRIu64 "\n", SOME_CONSTANT); ``` Do not use: ```c #define SOME_CONSTANT (1ULL << 48) printf("val %lld\n", SOME_CONSTANT); ``` #### Avoid unaligned access Avoid assuming that unaligned access is safe. It is not secure on architectures like Arm, POWER, and others. Additionally, even on x86, unaligned access can be slower. #### Structures and Unions Care should be taken to prevent the compiler from inserting padding bytes within `struct` or `union` types, as this can affect memory layout and portability. To control padding and alignment, consider using structure packing techniques specific to your compiler. Additionally, take precautions to ensure that the compiler does not alter the intended order of bits within bit-fields. This is particularly important when working with hardware registers or communication protocols where bit order is crucial. According to the C standard, the layout of structures, including padding and bit-field ordering, is implementation-defined, meaning it can vary between different compilers and platforms. Therefore, it is essential to verify that the structure's layout meets your expectations, especially when writing portable code. For example: ```c typedef struct { uint16_t count; /* offset 0 */ uint16_t max_count; /* offset 2 */ uint16_t unused0; /* offset 4 */ uint16_t enable : 2; /* offset 6 bits 15-14 */ uint16_t interrupt : 1; /* offset 6 bit 13 */ uint16_t unused1 : 7; /* offset 6 bits 12-6 */ uint16_t complete : 1; /* offset 6 bit 5 */ uint16_t unused2 : 4; /* offset 6 bits 4-1 */ uint16_t periodic : 1; /* offset 6 bit 0 */ } mytimer_t; _Static_assert(sizeof(mytimer_t) == 8, "mytimer_t struct size incorrect (expected 8 bytes)"); ``` To enhance portability, use standard-defined types (e.g., `uint16_t`, `uint32_t`) and avoid relying on compiler-specific behavior. Where precise control over memory layout is required, such as in embedded systems or when interfacing with hardware, always verify the structure size and layout using static assertions. #### Avoid extreme portability Unless programming for micro-controllers or exotic CPU architectures, focus on the common denominator of the modern CPU architectures, avoiding the very maximum portability which can make the code unnecessarily cumbersome. Some examples: - It is fair to assume `sizeof(int) == 4` since it is the case on all modern mainstream architectures. PDP-11 era is long gone. - Using `1U` instead of `UINT32_C(1)` or `(uint32_t) 1` is also fine. - It is fair to assume that `NULL` is matching `(uintptr_t) 0` and it is fair to `memset()` structures with zero. Non-zero `NULL` is for retro computing. ## Git Commit Style Effective version control is critical to modern software development. Git's powerful features—such as granular commits, branching, and a versatile staging area—offer unparalleled flexibility. However, this flexibility can sometimes lead to disorganized commit histories and merge conflicts if not managed with clear, consistent practices. By committing often, writing clear messages, and adhering to a common workflow, developers can not only reduce the potential for errors but also simplify collaboration and future maintenance. We encourage every team to tailor these best practices to their specific needs while striving for a shared standard that promotes efficiency and code quality. Below are the detailed guidelines that build on these principles. * Group Related Changes Together: Each commit should encapsulate a single, coherent change. e.g., if you are addressing two separate bugs, create two distinct commits. This approach produces focused, small commits that simplify understanding, enable quick rollbacks, and foster efficient peer reviews. By taking advantage of Git’s staging area and selective file staging, you can craft granular commits that make collaboration smoother and more transparent. * Commit Frequently: Making commits often ensures that your changes remain concise and logically grouped. Frequent commits not only help maintain a clean history but also allow you to share your progress with your teammates regularly. This regular sharing keeps everyone in sync, minimizes merge conflicts, and promotes a collaborative environment where integration happens seamlessly. * Avoid Committing Work in Progress: Only commit code when a logical component is in a stable, ready-to-integrate state. Break your feature's development into manageable segments that reach a functional milestone quickly, so you can commit regularly without compromising quality. If you feel the urge to commit merely to clear your working directory for actions like switching branches or pulling changes, use Git's stash feature instead. This practice helps maintain a stable repository and ensures that your team reviews well-tested, coherent code. * Test Your Code Before Committing: Before committing, ensure that your code has been thoroughly tested. Rather than assuming your changes are ready, run comprehensive tests to confirm they work as intended without unintended side effects. Testing is especially critical when sharing your code with others, as it maintains the overall stability of the project and builds trust among collaborators. * Utilize Branches for Parallel Development: Branches are a powerful tool that enables developers to isolate different lines of work—whether you are developing new features, fixing bugs, or exploring innovative ideas. By using branches extensively, you can work on your tasks independently and merge only after careful review and testing. This not only keeps the main branch stable but also encourages collaborative code reviews and a more organized integration process. Clear and descriptive commit messages are crucial for maintaining a transparent history of changes and for facilitating effective debugging and tracking. Please adhere to the guidelines outlined in [How to Write a Git Commit Message](https://cbea.ms/git-commit/). 1. Separate the subject from the body with a blank line. 2. Limit the subject line to 50 characters. 3. Capitalize the subject line. 4. Do not end the subject line with a period. 5. Use the imperative mood in the subject line. 6. Wrap the body at 72 characters. 7. Use the body to explain what and why, not how. An example (derived from Chris' blog post) looks like the following: ```text Summarize changes in around 50 characters or less More detailed explanatory text, if necessary. Wrap it to about 72 characters or so. In some contexts, the first line is treated as the subject of the commit and the rest of the text as the body. The blank line separating the summary from the body is critical (unless you omit the body entirely); various tools like `log`, `shortlog` and `rebase` can get confused if you run the two together. Explain the problem that this commit is solving. Focus on why you are making this change as opposed to how (the code explains that). Are there side effects or other unintuitive consequences of this change? Here's the place to explain them. Further paragraphs come after blank lines. - Bullet points are okay, too - Typically a hyphen or asterisk is used for the bullet, preceded by a single space, with blank lines in between, but conventions vary here If you use an issue tracker, put references to them at the bottom, like this: Close #123 ``` Another illustration of effective practice. ```text commit f1775422bb5a1aa6e79a685dfa7cb54a852b567b Author: Jim Huang Date: Mon Feb 24 13:08:32 2025 +0800 Introduce CPU architecture filtering in scheduler In environments with mixed CPU architectures, it is crucial to ensure that an instance runs only on a host with a compatible CPU type—preventing, for example, a RISC-V instance from being scheduled on an Arm host. This new scheduler filter enforces that requirement by comparing an instance's architecture against the host's allowed architectures. For the libvirt driver, the host's guest capabilities are queried, and the permitted architectures are recorded in the permitted_instances_types list within the host's cpu_info dictionary. The filter systematically excludes hosts that do not support the instance's CPU architecture. Additionally, RISC-V has been added to the set of acceptable architectures for scheduling. Note that the CPU architecture filter is disabled by default. ``` The above is a clear, unformatted description provided in plain text. In addition, this project expects contributors to follow these additional rules: * If there is important, useful, or essential conversation or information, include a reference or copy it. * Do not write single-word commits. Provide a descriptive subject. * Avoid using abusive words. * Avoid using backticks in commit subjects. Backticks can be easily confused with single quotes on some terminals, reducing readability. Plain text or single quotes provide sufficient clarity and emphasis. * Avoid using parentheses in commit subjects. Excessive use of parentheses "()" can clutter the subject line, making it harder to quickly grasp the essential message. Some conventions are automatically enforced by the [githooks](https://git-scm.com/docs/githooks). ## References - [Linux kernel coding style](https://www.kernel.org/doc/html/latest/process/coding-style.html) - 1999, Brian W. Kernighan and Rob Pike, The Practice of Programming, Addison–Wesley. - 1993, Bill Shannon, [C Style and Coding Standards for SunOS](https://devnull-cz.github.io/unix-linux-prog-in-c/cstyle.ms.pdf) ================================================ FILE: LICENSE ================================================ Copyright (c) 2020-2021, 2023-2026 National Cheng Kung University, Taiwan. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================ FILE: Makefile ================================================ CFLAGS := -O -g \ -std=c99 -pedantic CFLAGS_TO_CHECK := \ -fwrapv \ -Wall -Wextra \ -Wno-unused-but-set-variable \ -Wno-unused-parameter \ -Wno-unused-function \ -Wshadow \ -Wno-variadic-macros \ -Wno-uninitialized \ -Wno-strict-prototypes \ -Wno-declaration-after-statement \ -Wno-format \ -Wno-format-pedantic \ -Wno-overflow SUPPORTED_CFLAGS := # Check if a specific compiler flag is supported, attempting a dummy compilation # with flags. If successful, it returns the flag string; otherwise, it returns # an empty string. # Usage: $(call check_flag, -some-flag) check_flag = $(shell $(CC) $(1) -S -o /dev/null -xc /dev/null 2>/dev/null; \ if test $$? -eq 0; then echo "$(1)"; fi) # Iterate through the list of all potential flags, effectively filtering out all # unsupported flags. $(foreach flag, $(CFLAGS_TO_CHECK), $(eval CFLAGS += $(call check_flag, $(flag)))) BUILD_SESSION := .session.mk -include $(BUILD_SESSION) STAGE0 := shecc STAGE1 := shecc-stage1.elf STAGE2 := shecc-stage2.elf USE_QEMU ?= 1 OUT ?= out ARCHS = arm riscv ARCH ?= $(firstword $(ARCHS)) SRCDIR := $(shell find src -type d) LIBDIR := $(shell find lib -type d) BUILTIN_LIBC_SOURCE ?= c.c BUILTIN_LIBC_HEADER := c.h STAGE0_FLAGS ?= --dump-ir STAGE1_FLAGS ?= DYNLINK ?= 0 ifeq ($(DYNLINK),1) ifeq ($(ARCH),riscv) # TODO: implement dynamic linking for RISC-V. $(error "Dynamic linking mode is not implemented for RISC-V") endif STAGE0_FLAGS += --dynlink STAGE1_FLAGS += --dynlink endif SRCS := $(wildcard $(patsubst %,%/main.c, $(SRCDIR))) OBJS := $(SRCS:%.c=$(OUT)/%.o) deps := $(OBJS:%.o=%.o.d) TESTS := $(wildcard tests/*.c) TESTBINS := $(TESTS:%.c=$(OUT)/%.elf) SNAPSHOTS = $(foreach SNAPSHOT_ARCH,$(ARCHS), $(patsubst tests/%.c, tests/snapshots/%-$(SNAPSHOT_ARCH)-static.json, $(TESTS))) SNAPSHOTS += $(patsubst tests/%.c, tests/snapshots/%-arm-dynamic.json, $(TESTS)) all: config bootstrap sanitizer: CFLAGS += -fsanitize=address -fsanitize=undefined -fno-omit-frame-pointer -O0 sanitizer: LDFLAGS += -fsanitize=address -fsanitize=undefined sanitizer: config $(OUT)/$(STAGE0)-sanitizer $(VECHO) " Built stage 0 compiler with sanitizers\n" ifeq (,$(filter $(ARCH),$(ARCHS))) $(error Support ARM and RISC-V only. Select the target with "ARCH=arm" or "ARCH=riscv") endif include mk/$(ARCH).mk include mk/common.mk config: $(Q)ln -s $(PWD)/$(SRCDIR)/$(ARCH)-codegen.c $(SRCDIR)/codegen.c $(Q)$(PRINTF) $(ARCH_DEFS) > $@ $(VECHO) "Target machine code switch to %s\n" $(ARCH) $(Q)$(MAKE) $(BUILD_SESSION) --silent $(Q)$(CONFIG_CHECK_CMD) $(OUT)/tests/%.elf: tests/%.c $(OUT)/$(STAGE0) $(VECHO) " SHECC\t$@\n" $(Q)$(OUT)/$(STAGE0) $(STAGE0_FLAGS) -o $@ $< > $(basename $@).log ; \ chmod +x $@ ; $(PRINTF) "Running $@ ...\n" $(Q)$(TARGET_EXEC) $@ && $(call pass) check: check-stage0 check-stage2 check-abi-stage0 check-abi-stage2 check-stage0: $(OUT)/$(STAGE0) $(TESTBINS) tests/driver.sh $(VECHO) " TEST STAGE 0\n" tests/driver.sh 0 $(DYNLINK) check-stage2: $(OUT)/$(STAGE2) $(TESTBINS) tests/driver.sh $(VECHO) " TEST STAGE 2\n" tests/driver.sh 2 $(DYNLINK) check-sanitizer: $(OUT)/$(STAGE0)-sanitizer tests/driver.sh $(VECHO) " TEST STAGE 0 (with sanitizers)\n" $(Q)cp $(OUT)/$(STAGE0)-sanitizer $(OUT)/shecc tests/driver.sh 0 $(DYNLINK) $(Q)rm $(OUT)/shecc check-snapshots: $(OUT)/$(STAGE0) $(SNAPSHOTS) tests/check-snapshots.sh $(Q)$(foreach SNAPSHOT_ARCH, $(ARCHS), $(MAKE) distclean config check-snapshot ARCH=$(SNAPSHOT_ARCH) DYNLINK=0 --silent;) $(Q)$(MAKE) distclean config check-snapshot ARCH=arm DYNLINK=1 --silent $(VECHO) "Switching backend back to %s (DYNLINK=0)\n" arm $(Q)$(MAKE) distclean config ARCH=arm DYNLINK=0 --silent check-snapshot: $(OUT)/$(STAGE0) tests/check-snapshots.sh $(VECHO) "Checking snapshot for %s (DYNLINK=%s)\n" $(ARCH) $(DYNLINK) tests/check-snapshots.sh $(ARCH) $(DYNLINK) $(VECHO) " OK\n" # TODO: Add an ABI conformance test suite for the RISC-V architecture check-abi-stage0: $(OUT)/$(STAGE0) $(Q)if [ "$(ARCH)" = "arm" ]; then \ tests/$(ARCH)-abi.sh 0 $(DYNLINK); \ else \ echo "Skip ABI compliance validation"; \ fi check-abi-stage2: $(OUT)/$(STAGE2) $(Q)if [ "$(ARCH)" = "arm" ]; then \ tests/$(ARCH)-abi.sh 2 $(DYNLINK); \ else \ echo "Skip ABI compliance validation"; \ fi update-snapshots: tests/update-snapshots.sh $(Q)$(foreach SNAPSHOT_ARCH, $(ARCHS), $(MAKE) distclean config update-snapshot ARCH=$(SNAPSHOT_ARCH) DYNLINK=0 --silent;) $(Q)$(MAKE) distclean config update-snapshot ARCH=arm DYNLINK=1 --silent $(VECHO) "Switching backend back to %s (DYNLINK=0)\n" arm $(Q)$(MAKE) distclean config ARCH=arm DYNLINK=0 --silent update-snapshot: $(OUT)/$(STAGE0) tests/update-snapshots.sh $(VECHO) "Updating snapshot for %s (DYNLINK=%s)\n" $(ARCH) $(DYNLINK) tests/update-snapshots.sh $(ARCH) $(DYNLINK) $(VECHO) " OK\n" $(OUT)/%.o: %.c $(VECHO) " CC\t$@\n" $(Q)$(CC) -o $@ $(CFLAGS) -c -MMD -MF $@.d $< SHELL_HACK := $(shell mkdir -p $(OUT) $(OUT)/$(SRCDIR) $(OUT)/tests) $(OUT)/norm-lf: tools/norm-lf.c $(VECHO) " CC+LD\t$@\n" $(Q)$(CC) $(CFLAGS) -o $@ $^ $(OUT)/libc.inc: $(OUT)/inliner $(OUT)/norm-lf $(LIBDIR)/$(BUILTIN_LIBC_SOURCE) $(LIBDIR)/$(BUILTIN_LIBC_HEADER) $(VECHO) " GEN\t$@\n" $(Q)$(OUT)/norm-lf $(LIBDIR)/$(BUILTIN_LIBC_SOURCE) $(OUT)/c.normalized.c $(Q)$(OUT)/norm-lf $(LIBDIR)/$(BUILTIN_LIBC_HEADER) $(OUT)/c.normalized.h $(Q)$(OUT)/inliner $(OUT)/c.normalized.c $(OUT)/c.normalized.h $@ $(Q)$(RM) $(OUT)/c.normalized.c $(OUT)/c.normalized.h $(OUT)/inliner: tools/inliner.c $(VECHO) " CC+LD\t$@\n" $(Q)$(CC) $(CFLAGS) -o $@ $^ $(OUT)/$(STAGE0): $(OUT)/libc.inc $(OBJS) $(VECHO) " LD\t$@\n" $(Q)$(CC) $(OBJS) $(LDFLAGS) -o $@ $(OUT)/$(STAGE0)-sanitizer: $(OUT)/libc.inc $(OBJS) $(VECHO) " LD\t$@ (with sanitizers)\n" $(Q)$(CC) $(OBJS) $(LDFLAGS) -o $@ $(OUT)/$(STAGE1): $(OUT)/$(STAGE0) $(Q)$(STAGE1_CHECK_CMD) $(VECHO) " SHECC\t$@\n" $(Q)$(OUT)/$(STAGE0) $(STAGE0_FLAGS) -o $@ $(SRCDIR)/main.c > $(OUT)/shecc-stage1.log $(Q)chmod a+x $@ $(OUT)/$(STAGE2): $(OUT)/$(STAGE1) $(VECHO) " SHECC\t$@\n" $(Q)$(TARGET_EXEC) $(OUT)/$(STAGE1) $(STAGE1_FLAGS) -o $@ $(SRCDIR)/main.c bootstrap: $(OUT)/$(STAGE2) $(Q)chmod 775 $(OUT)/$(STAGE2) $(Q)if ! diff -q $(OUT)/$(STAGE1) $(OUT)/$(STAGE2); then \ echo "Unable to bootstrap. Aborting"; false; \ fi $(BUILD_SESSION): $(PRINTF) "ARCH=$(ARCH)" > $@ .PHONY: clean clean: -$(RM) $(OUT)/$(STAGE0) $(OUT)/$(STAGE1) $(OUT)/$(STAGE2) -$(RM) $(OBJS) $(deps) -$(RM) $(TESTBINS) $(OUT)/tests/*.log $(OUT)/tests/*.lst -$(RM) $(OUT)/shecc*.log -$(RM) $(OUT)/libc.inc distclean: clean -$(RM) $(OUT)/inliner $(OUT)/norm-lf $(OUT)/target $(SRCDIR)/codegen.c config $(BUILD_SESSION) -$(RM) DOM.dot CFG.dot -include $(deps) ================================================ FILE: README.md ================================================ # shecc : self-hosting and educational C optimizing compiler

logo image

## Introduction `shecc` is built from scratch, targeting both 32-bit Arm and RISC-V architectures, as a self-compiling compiler for a subset of the C language. Despite its simplistic nature, it is capable of performing basic optimization strategies as a standalone optimizing compiler. ### Features * Generate executable Linux ELF binaries for ARMv7-A and RV32IM. * Provide a minimal C standard library for basic I/O on GNU/Linux. * The cross-compiler is written in ANSI C, making it compatible with most platforms. * Include a self-contained C front-end with an integrated machine code generator; no external assembler or linker needed. * Utilize a two-pass compilation process: the first pass checks syntax and breaks down complex statements into basic operations, while the second pass translates these operations into Arm/RISC-V machine code. * Develop a register allocation system that is compatible with RISC-style architectures. * Implement an architecture-independent, [static single assignment](https://en.wikipedia.org/wiki/Static_single-assignment_form) (SSA)-based middle-end for enhanced optimizations. ## Compatibility `shecc` is capable of compiling C source files written in the following syntax: * data types: `char`, `int`, `struct`, `enum`, `typedef`, and pointer types * condition statements: `if`, `else`, `while`, `for`, `do-while`, `switch`, `case`, `default`, `break`, `continue`, `return`, and general expressions * operators: all arithmetic, logical, bitwise, and assignment operators including compound assignments (`+=`, `-=`, `*=`, `/=`, `%=`, `&=`, `|=`, `^=`, `<<=`, `>>=`) * arrays: global/local arrays with initializers, multi-dimensional arrays * functions: function declarations, definitions, and calls with fixed arguments * variadic functions: basic support via direct pointer arithmetic (no ``) * typedef: type aliasing including typedef pointers (`typedef int *ptr_t;`) * pointers: full pointer arithmetic, multi-level pointer dereference (`***ptr`) * global/local variable initializations for all supported data types - e.g. `int i = [expr];`, `int arr[] = {1, 2, 3};` * preprocessor directives: `#define`, `#ifdef`, `#ifndef`, `#elif`, `#else`, `#endif`, `#undef`, `#error`, and `#include` * function-like macros with parameters and `__VA_ARGS__` support The backend targets armv7hf with Linux ABI, verified on Raspberry Pi 3, and also supports RISC-V 32-bit architecture, verified with QEMU. ## Bootstrapping The steps to validate `shecc` bootstrapping: 1. `stage0`: `shecc` source code is initially compiled using an ordinary compiler which generates a native executable. The generated compiler can be used as a cross-compiler. 2. `stage1`: The built binary reads its own source code as input and generates an ARMv7-A/RV32IM binary. 3. `stage2`: The generated ARMv7-A/RV32IM binary is invoked (via QEMU or running on Arm and RISC-V devices) with its own source code as input and generates another ARMv7-A/RV32IM binary. 4. `bootstrap`: Build the `stage1` and `stage2` compilers, and verify that they are byte-wise identical. If so, `shecc` can compile its own source code and produce new versions of that same program. ## Prerequisites Code generator in `shecc` does not rely on external utilities. You only need ordinary C compilers such as `gcc` and `clang`. However, `shecc` would bootstrap itself, and Arm/RISC-V ISA emulation is required. Install QEMU for Arm/RISC-V user emulation on GNU/Linux: ```shell $ sudo apt-get install qemu-user ``` The build system is able to verify whether the running machine can perform native execution without QEMU. The host machine may install the prebuilt [fastfetch](https://github.com/fastfetch-cli/fastfetch/), which allows the build system to determine whether native execution can be enabled. It is still possible to build `shecc` on macOS or Microsoft Windows. However, the second stage bootstrapping would fail due to `qemu-arm` absence. To execute the snapshot test, install the packages below: ```shell $ sudo apt-get install graphviz jq ``` Additionally, because `shecc` supports the dynamic linking mode for the Arm architecture, it needs to install the ARM GNU toolchain to obtain the ELF interpreter and other dependencies: ```shell $ sudo apt-get install gcc-arm-linux-gnueabihf ``` Another approach is to manually download and install the toolchain from [ARM Developer website](https://developer.arm.com/downloads/-/arm-gnu-toolchain-downloads). Select "x86_64 Linux hosted cross toolchains" - "AArch32 GNU/Linux target with hard float (arm-none-linux-gnueabihf)" to download the toolchain. ## Build and Verify Configure which backend you want, `shecc` supports ARMv7-A and RV32IM backend: ```shell $ make config ARCH=arm # Target machine code switch to Arm $ make config ARCH=riscv # Target machine code switch to RISC-V ``` Run `make` and you should see this: ```shell $ make CC+LD out/inliner GEN out/libc.inc CC out/src/main.o LD out/shecc SHECC out/shecc-stage1.elf SHECC out/shecc-stage2.elf ``` Run `make DYNLINK=1` to use the dynamic linking mode and generate the dynamically linked compiler: ```shell # If using the dynamic linking mode, you should add 'DYNLINK=1' for each 'make' command. $ make DYNLINK=1 CC+LD out/inliner GEN out/libc.inc CC out/src/main.o LD out/shecc SHECC out/shecc-stage1.elf SHECC out/shecc-stage2.elf $ file out/shecc-stage2.elf out/shecc-stage2.elf: ELF 32-bit LSB executable, ARM, EABI5 version 1 (SYSV), dynamically linked, interpreter /lib/ld-linux-armhf.so.3, not stripped ``` For development builds with memory safety checks: ```shell $ make sanitizer $ make check-sanitizer ``` File `out/shecc` is the first stage compiler. Its usage: ```shell $ shecc [-o output] [+m] [--no-libc] [--dump-ir] [--dynlink] ``` Compiler options: - `-o` : Specify output file name (default: `out.elf`) - `+m` : Use hardware multiplication/division instructions (default: disabled) - `--no-libc` : Exclude embedded C library (default: embedded) - `--dump-ir` : Dump intermediate representation (IR) - `--dynlink` : Use dynamic linking (default: disabled) Example 1: static linking mode ```shell $ out/shecc -o fib tests/fib.c $ chmod +x fib $ qemu-arm fib ``` Example 2: dynamic linking mode Notice that `/usr/arm-linux-gnueabihf` is the ELF interpreter prefix. Since the path may be different if you manually install the ARM GNU toolchain instead of using `apt-get`, you should set the prefix to the actual path. ```shell $ out/shecc --dynlink -o fib tests/fib.c $ chmod +x fib $ qemu-arm -L /usr/arm-linux-gnueabihf fib ``` ### IR Regression Tests To ensure the consistency of frontend (lexer, parser) behavior when working on it, the snapshot test is introduced. The snapshot test dumps IRs from the executable and compares the structural identity with the provided snapshots. Verify the emitted IRs by specifying `check-snapshots` target when invoking `make`: ```shell $ make check-snapshots ``` If the compiler frontend is updated, the emitted IRs might be changed. Thus, you can update snapshots by specifying `update-snapshots` target when invoking `make`: ```shell $ make update-snapshots ``` Notice that the above 2 targets will update all backend snapshots at once, to update/check current backend's snapshot, use `update-snapshot` / `check-snapshot` instead. ### Unit Tests `shecc` comes with a comprehensive test suite (200+ test cases). To run the tests: ```shell # Add 'DYNLINK=1' if using the dynamic linking mode. $ make check # Run all tests (stage 0 and stage 2) $ make check-stage0 # Test stage 0 compiler only $ make check-stage2 # Test stage 2 compiler only $ make check-sanitizer # Test with AddressSanitizer and UBSan ``` The test suite covers: * Basic data types and operators * Control flow statements * Arrays and pointers (including multi-level dereference) * Structs, enums, and typedefs * Variadic functions * Preprocessor directives and macros * Self-hosting validation Reference output: ``` TEST STAGE 0 ... int main(int argc, int argv) { exit(sizeof(char)); } => 1 int main(int argc, int argv) { int a; a = 0; switch (3) { case 0: return 2; case 3: a = 10; break; case 1: return 0; } exit(a); } => 10 int main(int argc, int argv) { int a; a = 0; switch (3) { case 0: return 2; default: a = 10; break; } exit(a); } => 10 OK TEST STAGE 2 ... int main(int argc, int argv) { exit(sizeof(char*)); } exit code => 4 output => int main(int argc, int argv) { exit(sizeof(int*)); } exit code => 4 output => OK ``` To clean up the generated compiler files, execute the command `make clean`. For resetting architecture configurations, use the command `make distclean`. ## Intermediate Representation Once the option `--dump-ir` is passed to `shecc`, the intermediate representation (IR) will be generated. Take the file `tests/fib.c` for example. It consists of a recursive Fibonacci sequence function. ```c int fib(int n) { if (n == 0) return 0; else if (n == 1) return 1; return fib(n - 1) + fib(n - 2); } ``` Execute the following to generate IR: ```shell $ out/shecc --dump-ir -o fib tests/fib.c ``` Line-by-line explanation between C source and IR (variable and label numbering may differ): ```c C Source IR Explanation -------------------+--------------------------------------+-------------------------------------------------------------------------------------- int fib(int n) def int @fib(int %n) { { if (n == 0) const %.t871, 0 Load constant 0 into a temporary variable ".t871" %.t872 = eq %n, %.t871 Test if "n" is equal to ".t871", store result in ".t872" br %.t872, .label.1430, .label.1431 If ".t872" is non-zero, branch to label ".label.1430", otherwise to ".label.1431" .label.1430: return 0; const %.t873, 0 Load constant 0 into a temporary variable ".t873" ret %.t873 Return ".t873" .label.1431: else if (n == 1) const %.t874, 1 Load constant 1 into a temporary variable ".t874" %.t875 = eq %n, %.t874 Test if "n" is equal to ".t874", store result in ".t875" br %.t875, .label.1434, .label.1435 If ".t875" is true, branch to ".label.1434", otherwise to ".label.1435" .label.1434: return 1; const %.t876, 1 Load constant 1 into a temporary variable ".t876" ret %.t876 Return ".t876" .label.1435: return fib(n - 1) const %.t877, 1 Load constant 1 into ".t877" %.t878 = sub %n, %.t877 Subtract ".t877" from "n", store in ".t878" push %.t878 Prepare argument ".t878" for function call call @fib, 1 Call function "@fib" with 1 argument + retval %.t879 Store the return value in ".t879" fib(n - 2); const %.t880, 2 Load constant 2 into ".t880" %.t881 = sub %n, %.t880 Subtract ".t880" from "n", store in ".t881" push %.t881 Prepare argument ".t881" for function call call @fib, 1 Call function "@fib" with 1 argument retval %.t882 Store the return value in ".t882" %.t883 = add %.t879, %.t882 Add ".t879" and ".t882", store in ".t883" ret %.t883 Return ".t883" } } ``` ## C99 Compliance shecc implements a subset of C99 suitable for self-hosting and systems programming. For detailed information about supported features, missing functionality, and non-standard behaviors, see [COMPLIANCE.md](COMPLIANCE.md). ## Known Issues 2. Full `` support is not available. Variadic functions work via direct pointer arithmetic. See the `printf` implementation in `lib/c.c` for the supported approach. 3. The C front-end operates directly on token streams without building a full AST. 4. Complex pointer arithmetic expressions like `*(p + offset)` have limited support. ## License `shecc` is freely redistributable under the BSD 2 clause license. Use of this source code is governed by a BSD-style license that can be found in the `LICENSE` file. ================================================ FILE: docs/dynamic-linking.md ================================================ # Dynamic Linking ## Build dynamically linked shecc and programs Build the dynamically linked version of shecc, but notice that shecc currently doesn't support dynamic linking for the RISC-V architecture: ```shell $ make ARCH=arm DYNLINK=1 ``` Next, you can use shecc to build dynamically linked programs by adding the `--dynlink` flag: ```shell # Use the stage 0 compiler $ out/shecc --dynlink -o # Use the stage 1 or stage 2 compiler $ qemu-arm -L out/shecc-stage2.elf --dynlink -o # Execute the compiled program $ qemu-arm -L ``` When executing a dynamically linked program, you should set the ELF interpreter prefix so that `ld.so` can be invoked. Generally, it should be `/usr/arm-linux-gnueabihf` if you have installed the ARM GNU toolchain by `apt`. Otherwise, you should find and specify the correct path if you manually installed the toolchain. ## Stack frame layout ### Arm32 In both static and dynamic linking modes, the stack frame layout for each function can be illustrated as follows: ``` High Address +------------------+ | incoming args | +------------------+ <- sp + total_size | saved lr | +------------------+ | saved r11 | +------------------+ | saved r10 | +------------------+ | saved r9 | +------------------+ | saved r8 | +------------------+ | saved r7 | +------------------+ | saved r6 | +------------------+ | saved r5 | +------------------+ | saved r4 | +------------------+ | (padding) | +------------------+ | local variables | +------------------+ <- sp + (MAX_PARAMS - MAX_ARGS_IN_REG) * 4 | outgoing args | +------------------+ <- sp (MUST be aligned to 8 bytes) Low Address ``` * `total_size`: includes the size of the following elements: * `outgoing args`: a fixed size - `(MAX_PARAMS - MAX_ARGS_IN_REG) * 4` bytes * `local variables` * `saved r4-r11 and lr`: a fixed size - 36 bytes * Note that the space for `incoming args` belongs to the caller's stack frame, while the remaining space belongs to the callee's stack frame. ### RISC-V (Currently not supported) ## Calling Convention ### Arm32 Regardless of which mode is used, the caller performs the following operations to comply with the Arm Architecture Procedure Call Standard (AAPCS) when calling a function. * The first four arguments are put into registers `r0` - `r3` * Any additional arguments are passed on the stack. Arguments are pushed onto the stack starting from the last argument, so the fifth argument resides at a lower address and the last argument at a higher address. * Align the stack pointer to 8 bytes, as external functions may access 8-byte objects that require such alignment. Then, the callee will perform these operations: - Preserve the contents of registers `r4` - `r11` on the stack upon function entry. - The callee also pushes the content of `lr` onto the stack to preserve the return address; however, this operation is not required by the AAPCS. - Restore these registers from the stack upon returning. ### RISC-V In the RISC-V architecture, registers `a0` - `a7` are used as argument registers; that is, the first eight arguments are passed into these registers. Since the current implementation of shecc supports up to 8 arguments, no argument needs to be passed onto the stack. ## Runtime execution flow of a dynamically linked program ``` | +---------------------------+ | | program | | +-------------+ +----------------+ | | | | shell | | Dynamic linker | | +--------+ +----------+ | userspace | | | | +------+->| entry | | main | | | | $ ./program | | (ld.so) | | | point | | function | | program | +-----+-------+ +----------------+ | +-+------+ +-----+----+ | | | ^ | | ^ | | | | | +----+---------+----+-------+ | | | | | | | | | | | | ----------+-------+---------------------------------------------+--------------------+---------+----+---------------------- | | | | | | | v | v | v | +-------+ (It may be another | +-------------+-----+ +------+ glibc | | execl | | | __libc_start_main +--->| exit | | +---+---+ equivalent call) | +-------------------+ +---+--+ | | | | ----------+-------+---------------------------------------------+---------------------------------------------+------------ system | | | | | v | v call | +------+ (It may be another | +-------+ | | exec | | | _exit | interface | +---+--+ equivalent syscall) | +---+---+ | | | | ----------+-------+---------------------------------------------+---------------------------------------------+------------ | | | | | v | v | +--------------+ +---------------+ +--------+-------------+ +---------------+ | | Validate the | | Create a new | | Startup the kernel's | | Delete the | kernel | | +--->| +--->| | | | | | executable | | process image | | program loader | | process image | | +--------------+ +---------------+ +----------------------+ +---------------+ ``` 1. A running process (e.g.: a shell) executes the specified program (`program`), which is dynamically linked. 2. Kernel validates the executable and creates a process image if the validation passes. 3. Dynamic linker (`ld.so`) is invoked by the kernel's program loader. * For the Arm architecture, the dynamic linker is `/lib/ld-linux-armhf.so.3`. 4. Linker loads shared libraries such as `libc.so`. 5. Linker resolves symbols and fills global offset table (GOT). 6. Control transfers to the program, which starts at the entry point. 7. Program executes `__libc_start_main` at the beginning. 8. `__libc_start_main` calls the *main wrapper*, which pushes registers r4-r11 and lr onto the stack, sets up a global stack for all global variables (excluding read-only variables), and initializes them. 9. Execute the *main wrapper*, and then invoke the main function. 10. After the `main` function returns, the *main wrapper* restores the necessary registers and passes control back to `__libc_start_main`, which implicitly calls `exit(3)` to terminate the program. * Or, the `main` function can also call `exit(3)` or `_exit(2)` to directly terminate itself. ## Dynamic sections When using dynamic linking, the following sections are generated for compiled programs: 1. `.interp` - Path to dynamic linker 2. `.dynsym` - Dynamic symbol table 3. `.dynstr` - Dynamic string table 4. `.rel.plt` - PLT relocations 5. `.plt` - Procedure Linkage Table 6. `.got` - Global Offset Table 7. `.dynamic` - Dynamic linking information ### Initialization of all GOT entries * `GOT[0]` is set to the starting address of the `.dynamic` section. * `GOT[1]` and `GOT[2]` are initialized to zero and reserved for the `link_map` and the resolver (`__dl_runtimer_resolve`). * The dynamic linker modifies them to point to the actual addresses at runtime. * `GOT[3]` - `GOT[N]` are initially set to the address of `PLT[0]` at compile time, causing the first call to an external function to invoke the resolver at runtime. ### Explanation for PLT stubs (Arm32) Under the Arm architecture, the resolver assumes that the following three conditions are met: * `[sp]` contains the return address from the original function call. * `ip` stores the address of the callee's GOT entry. * `lr` stores the address of `GOT[2]`. Therefore, the first entry (`PLT[0]`) contains the following instructions to satisfy the first and third requirements, and then to invoke the resolver. ``` push {lr} @ (str lr, [sp, #-4]!) movw sl, #:lower16:(&GOT[2]) movt sl, #:upper16:(&GOT[2]) mov lr, sl ldr pc, [lr] ``` 1. Push register `lr` onto the stack. 2. Set register `sl` to the address of `GOT[2]`. 3. Move the value of `sl` to `lr`. 4. Load the value located at `[lr]` into the program counter (`pc`) The remaining PLT entries correspond to all external functions, and each entry includes the following instructions to fulfill the second requirement: ``` movw ip, #:lower16:(&GOT[x]) movt ip, #:upper16:(&GOT[x]) ldr pc, [ip] ``` 1. Set register `ip` to the address of `GOT[x]`. 2. Assign register `pc` to the value of `GOT[x]`. That is, set `pc` to the address of the callee. ## PLT execution path and performance overhead Since calling an external function needs a PLT stub for indirect invocation, the execution path of the first call is as follows: 1. Call the corresponding PLT stub of the external function. 2. The PLT stub reads the GOT entry. 3. Since the GOT entry is initially set to point to the first PLT entry, the call jumps to `PLT[0]`, which in turn calls the resolver. 4. The resolver handles the symbol and updates the GOT entry. 5. Jump to the actual function to continue execution. For subsequent calls, the execution path only performs steps 1, 2 and 5. Regardless of whether it is the first call or a subsequent call, calling an external function requires executing additional instructions. It is evident that the overhead accounts to 3-8 instructions compared to a direct call. For a bootstrapping compiler, this overhead is acceptable. ## Binding Each external function must perform relocation via the resolver; in other words, each "symbol" needs to **bind** to its actual address. There are two types of binding: ### Lazy binding The dynamic linker defers function call resolution until the function is called at runtime. ### Immediate handling The dynamic linker resolves all symbols when the program is started, or when the shared library is loaded via `dlopen`. ## Limitations For the current implementation of dynamic linking, note the following: * GOT is located in a writable segment (`.data` segment). * The `PT_GNU_RELRO` program header has not yet been implemented. * `DT_BIND_NOW` (force immediate binding) is not set. This implies that: * GOT entries can be modified at runtime, which may create a potential ROP (Return-Oriented Programming) attack vector. * Function pointers (GOT entries) might be hijacked due to the absence of full RELRO protection. ## Reference * man page: `ld(1)` * man page: `ld.so(8)` * glibc - [`__dl_runtime_resolve`](https://elixir.bootlin.com/glibc/glibc-2.41.9000/source/sysdeps/arm/dl-trampoline.S#L30) implementation (for Arm32) * Application Binary Interface for the Arm Architecture - [`abi-aa`](https://github.com/ARM-software/abi-aa) * `aaelf32` * `aapcs32` ================================================ FILE: lib/c.c ================================================ /* * shecc - Self-Hosting and Educational C Compiler. * * shecc is freely redistributable under the BSD 2 clause license. See the * file "LICENSE" for information on usage and redistribution of this file. */ /* minimal libc implementation */ #include "c.h" #define INT_BUF_LEN 16 #define __is_alpha(c) ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) #define __is_digit(c) ((c >= '0' && c <= '9')) #define __is_hex(c) \ (__is_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')) int isdigit(int c) { return __is_digit(c); } int isalpha(int c) { return __is_alpha(c); } int isalnum(int c) { return __is_alpha(c) || __is_digit(c); } int isxdigit(int c) { return __is_hex(c); } int isblank(int c) { return c == ' ' || c == '\t'; } int strlen(char *str) { /* process the string by checking 4 characters (a 32-bit word) at a time */ int i = 0; for (;; i += 4) { if (!str[i]) return i; if (!str[i + 1]) return i + 1; if (!str[i + 2]) return i + 2; if (!str[i + 3]) return i + 3; } } int strcmp(char *s1, char *s2) { int i = 0; while (s1[i] && s2[i]) { if (s1[i] < s2[i]) return -1; if (s1[i] > s2[i]) return 1; i++; } return s1[i] - s2[i]; } int strncmp(char *s1, char *s2, int len) { int i = 0; while (i < len) { if (s1[i] < s2[i]) return -1; if (s1[i] > s2[i]) return 1; if (!s1[i]) return 0; i++; } return 0; } char *strcpy(char *dest, char *src) { int i = 0; while (src[i]) { dest[i] = src[i]; i++; } dest[i] = 0; return dest; } char *strncpy(char *dest, char *src, int len) { int i = 0; int beyond = 0; while (i < len) { if (beyond == 0) { dest[i] = src[i]; if (src[i] == 0) beyond = 1; } else { dest[i] = 0; } i++; } return dest; } char *memcpy(char *dest, char *src, int count) { int i = 0; /* Continues as long as there are at least 4 bytes remaining to copy. */ for (; i + 4 <= count; i += 4) { dest[i] = src[i]; dest[i + 1] = src[i + 1]; dest[i + 2] = src[i + 2]; dest[i + 3] = src[i + 3]; } /* Ensure all @count bytes are copied, even if @count is not a multiple of * 4, or if @count was less than 4 initially. */ for (; i < count; i++) dest[i] = src[i]; return dest; } int memcmp(void *s1, void *s2, int n) { char *p1 = (char *) s1, *p2 = (char *) s2; for (int i = 0; i < n; i++) { if (p1[i] < p2[i]) return -1; if (p1[i] > p2[i]) return 1; } return 0; } void *memset(void *s, int c, int n) { int i = 0; char *ptr = (char *) s; char byte_val = (char) c; for (; i + 4 <= n; i += 4) { ptr[i] = byte_val; ptr[i + 1] = byte_val; ptr[i + 2] = byte_val; ptr[i + 3] = byte_val; } for (; i < n; i++) ptr[i] = byte_val; return s; } /* set 10 digits (32bit) without div * * This function converts a given integer value to its string representation * in base-10 without using division operations. The method involves calculating * the approximate quotient and remainder using bitwise operations, which are * then used to derive each digit of the result. * * The logic is based on an efficient method of dividing by constants, as * detailed in the reference link: * http://web.archive.org/web/20180517023231/http://www.hackersdelight.org/divcMore.pdf. * This approach avoids expensive division instructions by using a series of * bitwise shifts and additions to calculate the quotient and remainder. */ void __str_base10(char *pb, int val) { int neg = 0; int q, r, t; int i = INT_BUF_LEN - 1; if (val == -2147483648) { strncpy(pb + INT_BUF_LEN - 11, "-2147483648", 11); return; } if (val < 0) { neg = 1; val = -val; } while (val) { q = (val >> 1) + (val >> 2); q += (q >> 4); q += (q >> 8); q += (q >> 16); q >>= 3; r = val - (((q << 2) + q) << 1); t = ((r + 6) >> 4); q += t; r -= (((t << 2) + t) << 1); pb[i] += r; val = q; i--; } if (neg) pb[i] = '-'; } void __str_base8(char *pb, int val) { int c = INT_BUF_LEN - 1, v; /* Because every 3 binary digits can be converted to 1 octal digit, here * performs the conversion 10 times, derived from 32 divided by 3. * * Finally, the remaining 2 bits are processed after the loop. */ int times = (sizeof(int) << 3) / 3; for (int i = 0; i < times; i++) { v = val & 0x7; pb[c] = '0' + v; val >>= 3; c--; } v = val & 0x3; pb[c] = '0' + v; } void __str_base16(char *pb, int val) { int c = INT_BUF_LEN - 1; int times = sizeof(int) << 1; for (int i = 0; i < times; i++) { int v = val & 0xf; if (v < 10) pb[c] = '0' + v; else if (v < 16) pb[c] = 'a' + v - 10; else { abort(); break; } val >>= 4; c--; } } /* The specification of snprintf() is defined in C99 7.19.6.5, and its behavior * and return value should comply with the following description: * - If n is zero, nothing is written. * - Writes at most n bytes, including the null character. * - On success, the return value should be the length of the entire converted * string even if n is insufficient to store it. * * Thus, a structure fmtbuf_t is defined for formatted output conversion for * the functions in the printf() family. * @buf: the current position of the buffer. * @n : the remaining space of the buffer. * @len: the number of characters that would have been written (excluding the * null terminator) had n been sufficiently large. * * Once a write operation is performed, buf and n will be respectively * incremented and decremented by the actual written size if n is sufficient, * and len must be incremented to store the length of the entire converted * string. */ typedef struct { char *buf; int n; int len; } fmtbuf_t; void __fmtbuf_write_char(fmtbuf_t *fmtbuf, int val) { fmtbuf->len += 1; /* Write the given character when n is greater than 1. * This means preserving one position for the null character. */ if (fmtbuf->n <= 1) return; char ch = (char) (val & 0xFF); fmtbuf->buf[0] = ch; fmtbuf->buf += 1; fmtbuf->n -= 1; } void __fmtbuf_write_str(fmtbuf_t *fmtbuf, char *str, int l) { fmtbuf->len += l; /* Write the given string when n is greater than 1. * This means preserving one position for the null character. */ if (fmtbuf->n <= 1) return; /* If the remaining space is less than the length of the string, write only * n - 1 bytes. */ int sz = fmtbuf->n - 1; l = l <= sz ? l : sz; strncpy(fmtbuf->buf, str, l); fmtbuf->buf += l; fmtbuf->n -= l; } void __format(fmtbuf_t *fmtbuf, int val, int width, int zeropad, int base, int alternate_form) { char pb[INT_BUF_LEN], ch; int pbi; /* set to zeroes */ for (pbi = 0; pbi < INT_BUF_LEN; pbi++) pb[pbi] = '0'; pbi = 0; switch (base) { case 8: __str_base8(pb, val); break; case 10: __str_base10(pb, val); break; case 16: __str_base16(pb, val); break; default: abort(); break; } while (pb[pbi] == '0' && pbi < INT_BUF_LEN - 1) pbi++; switch (base) { case 8: if (alternate_form) { if (width && zeropad && pb[pbi] != '0') { __fmtbuf_write_char(fmtbuf, '0'); width -= 1; } else if (pb[pbi] != '0') pb[--pbi] = '0'; } break; case 10: if (width && zeropad && pb[pbi] == '-') { __fmtbuf_write_char(fmtbuf, '-'); pbi++; width--; } break; case 16: if (alternate_form) { if (width && zeropad && pb[pbi] != '0') { __fmtbuf_write_char(fmtbuf, '0'); __fmtbuf_write_char(fmtbuf, 'x'); width -= 2; } else if (pb[pbi] != '0') { pb[--pbi] = 'x'; pb[--pbi] = '0'; } } break; } width -= (INT_BUF_LEN - pbi); if (width < 0) width = 0; ch = zeropad ? '0' : ' '; while (width) { __fmtbuf_write_char(fmtbuf, ch); width--; } __fmtbuf_write_str(fmtbuf, pb + pbi, INT_BUF_LEN - pbi); } void __format_to_buf(fmtbuf_t *fmtbuf, char *format, int *var_args) { int si = 0, pi = 0; while (format[si]) { if (format[si] != '%') { __fmtbuf_write_char(fmtbuf, format[si]); si++; } else { int w = 0, zp = 0, pp = 0, v = var_args[pi], l; si++; if (format[si] == '#') { pp = 1; si++; } if (format[si] == '0') { zp = 1; si++; } if (format[si] >= '1' && format[si] <= '9') { w = format[si] - '0'; si++; while (format[si] >= '0' && format[si] <= '9') { w *= 10; w += format[si] - '0'; si++; } } switch (format[si]) { case 's': /* append param pi as string */ l = strlen((char *) v); __fmtbuf_write_str(fmtbuf, (char *) v, l); break; case 'c': /* append param pi as char */ __fmtbuf_write_char(fmtbuf, (char) v); break; case 'o': /* append param as octal */ __format(fmtbuf, v, w, zp, 8, pp); break; case 'd': /* append param as decimal */ __format(fmtbuf, v, w, zp, 10, 0); break; case 'x': /* append param as hex */ __format(fmtbuf, v, w, zp, 16, pp); break; case '%': /* append literal '%' character */ __fmtbuf_write_char(fmtbuf, '%'); si++; continue; } pi++; si++; } } /* If n is still greater than 0, set the null character. */ if (fmtbuf->n) fmtbuf->buf[0] = 0; } int printf(char *str, ...) { char buffer[200]; fmtbuf_t fmtbuf; fmtbuf.buf = buffer; fmtbuf.n = INT_MAX; fmtbuf.len = 0; __format_to_buf(&fmtbuf, str, &str + 4); return __syscall(__syscall_write, 1, buffer, fmtbuf.len); } int sprintf(char *buffer, char *str, ...) { fmtbuf_t fmtbuf; fmtbuf.buf = buffer; fmtbuf.n = INT_MAX; fmtbuf.len = 0; __format_to_buf(&fmtbuf, str, &str + 4); return fmtbuf.len; } int snprintf(char *buffer, int n, char *str, ...) { fmtbuf_t fmtbuf; fmtbuf.buf = buffer; fmtbuf.n = n; fmtbuf.len = 0; __format_to_buf(&fmtbuf, str, &str + 4); return fmtbuf.len; } int __free_all(void); void exit(int exit_code) { __free_all(); __syscall(__syscall_exit, exit_code); } void abort(void) { printf("Abnormal program termination\n"); exit(-1); } FILE *fopen(char *filename, char *mode) { if (!strcmp(mode, "wb")) { #if defined(__arm__) return __syscall(__syscall_open, filename, 65, 0x1fd); #elif defined(__riscv) /* FIXME: mode not work currently in RISC-V */ return __syscall(__syscall_openat, -100, filename, 65, 0x1fd); #endif } if (!strcmp(mode, "rb")) { #if defined(__arm__) return __syscall(__syscall_open, filename, 0, 0); #elif defined(__riscv) return __syscall(__syscall_openat, -100, filename, 0, 0); #endif } return NULL; } int fclose(FILE *stream) { __syscall(__syscall_close, stream); return 0; } /* Read a byte from file descriptor. So the return value is either in the range * of 0 to 127 for the character, or -1 on the end of file. */ int fgetc(FILE *stream) { int buf = 0, r = __syscall(__syscall_read, stream, &buf, 1); if (r < 1) return -1; return buf; } char *fgets(char *str, int n, FILE *stream) { int i; for (i = 0; i < n - 1; i++) { int c = fgetc(stream); if (c == -1) { if (i == 0) /* EOF on first char */ return NULL; /* EOF in the middle */ str[i] = 0; return str; } /* Use explicit cast for clarity */ str[i] = (char) c; if (c == '\n') { str[i + 1] = 0; return str; } } str[i] = 0; return str; } int fputc(int c, FILE *stream) { if (__syscall(__syscall_write, stream, &c, 1) < 0) return -1; return c; } int fseek(FILE *stream, int offset, int whence) { int result; #if defined(__arm__) result = __syscall(__syscall_lseek, stream, offset, whence); #elif defined(__riscv) /* No need to offset */ result = __syscall(__syscall_lseek, stream, 0, offset, NULL, whence); #else #error "Unsupported fseek support for current platform" #endif return result == -1; } int ftell(FILE *stream) { #if defined(__arm__) return __syscall(__syscall_lseek, stream, 0, SEEK_CUR); #elif defined(__riscv) int result; __syscall(__syscall_lseek, stream, 0, 0, &result, SEEK_CUR); return result; #else #error "Unsupported ftell support for current platform" #endif } #define CHUNK_SIZE_FREED_MASK 1 #define CHUNK_SIZE_SZ_MASK 0xFFFFFFFE #define CHUNK_GET_SIZE(size) (size & CHUNK_SIZE_SZ_MASK) #define IS_CHUNK_GET_FREED(size) (size & CHUNK_SIZE_FREED_MASK) typedef struct chunk { struct chunk *next, *prev; int size; } chunk_t; void chunk_set_freed(chunk_t *chunk) { chunk->size |= CHUNK_SIZE_FREED_MASK; } void chunk_clear_freed(chunk_t *chunk) { chunk->size &= CHUNK_SIZE_SZ_MASK; } int __align_up(int size) { return ALIGN_UP(size, PAGESIZE); } chunk_t *__alloc_head; chunk_t *__alloc_tail; chunk_t *__freelist_head; void *malloc(int size) { if (size <= 0) return NULL; int flags = 34; /* MAP_PRIVATE (0x02) | MAP_ANONYMOUS (0x20) */ int prot = 3; /* PROT_READ (0x01) | PROT_WRITE (0x02) */ /* Align size to MIN_ALIGNMENT */ size = ALIGN_UP(size, MIN_ALIGNMENT); if (!__alloc_head) { chunk_t *tmp = __syscall(__syscall_mmap2, NULL, __align_up(sizeof(chunk_t)), prot, flags, -1, 0); __alloc_head = tmp; __alloc_tail = tmp; __alloc_head->next = NULL; __alloc_head->prev = NULL; __alloc_head->size = 0; } if (!__freelist_head) { chunk_t *tmp = __syscall(__syscall_mmap2, NULL, __align_up(sizeof(chunk_t)), prot, flags, -1, 0); __freelist_head = tmp; __freelist_head->next = NULL; __freelist_head->prev = NULL; __freelist_head->size = -1; } /* Search for the best fit chunk in the free list */ chunk_t *best_fit_chunk = NULL; chunk_t *allocated; int best_size = 0; if (!__freelist_head->next) { allocated = NULL; } else { for (chunk_t *fh = __freelist_head; fh->next; fh = fh->next) { int fh_size = CHUNK_GET_SIZE(fh->size); if (fh_size >= size && (!best_fit_chunk || fh_size < best_size)) { best_fit_chunk = fh; best_size = fh_size; } } if (best_fit_chunk) { /* Remove from freelist */ if (best_fit_chunk->prev) { best_fit_chunk->prev->next = best_fit_chunk->next; } else { __freelist_head = best_fit_chunk->next; } if (best_fit_chunk->next) { best_fit_chunk->next->prev = best_fit_chunk->prev; } } allocated = best_fit_chunk; } if (!allocated) { allocated = __syscall(__syscall_mmap2, NULL, __align_up(sizeof(chunk_t) + size), prot, flags, -1, 0); allocated->size = __align_up(sizeof(chunk_t) + size); } /* Add to allocation list */ __alloc_tail->next = allocated; allocated->prev = __alloc_tail; __alloc_tail = allocated; __alloc_tail->next = NULL; __alloc_tail->size = allocated->size; chunk_clear_freed(__alloc_tail); void *ptr = (void *) (__alloc_tail + 1); return ptr; } void *calloc(int n, int size) { /* Check for overflow before multiplication */ if (!n || !size) return NULL; if (n > INT_MAX / size) return NULL; /* Overflow protection */ int total = n * size; char *p = malloc(total); if (!p) return NULL; return memset(p, 0, total); } void __rfree(void *ptr, int size) { if (!ptr) return; __syscall(__syscall_munmap, ptr, size); } int __free_all(void) { if (!__freelist_head && !__alloc_head) return 0; chunk_t *cur = __freelist_head; chunk_t *rel; int size; /* release freelist */ while (cur && cur->next) { rel = cur; cur = cur->next; rel->next = NULL; rel->prev = NULL; size = CHUNK_GET_SIZE(rel->size); __rfree(rel, size); } if (__alloc_head && __alloc_head->next) { cur = __alloc_head->next; /* release chunks which not be free */ while (cur) { rel = cur; cur = cur->next; rel->next = NULL; rel->prev = NULL; size = CHUNK_GET_SIZE(rel->size); __rfree(rel, size); } } return 0; } void free(void *ptr) { if (!ptr) return; char *__ptr = (char *) ptr; chunk_t *cur = (chunk_t *) (__ptr - sizeof(chunk_t)); if (IS_CHUNK_GET_FREED(cur->size)) { printf("free(): double free detected\n"); abort(); } chunk_t *prev = NULL; if (cur->prev) { prev = cur->prev; prev->next = cur->next; } else { __alloc_head = cur->next; } if (cur->next) { chunk_t *next = cur->next; next->prev = cur->prev; } else if (prev) { prev->next = NULL; __alloc_tail = prev; } /* Insert head in __freelist_head */ cur->next = __freelist_head; cur->prev = NULL; chunk_set_freed(cur); if (__freelist_head) __freelist_head->prev = cur; __freelist_head = cur; } ================================================ FILE: lib/c.h ================================================ /* * shecc - Self-Hosting and Educational C Compiler. * * shecc is freely redistributable under the BSD 2 clause license. See the * file "LICENSE" for information on usage and redistribution of this file. */ #pragma once /* Declarations of C standard library functions */ #define NULL 0 #define bool _Bool #define true 1 #define false 0 #define INT_MAX 0x7fffffff #define INT_MIN 0x80000000 #define SEEK_SET 0 #define SEEK_CUR 1 #define SEEK_END 2 #if defined(__arm__) #define __SIZEOF_POINTER__ 4 #define __syscall_exit 1 #define __syscall_read 3 #define __syscall_write 4 #define __syscall_close 6 #define __syscall_open 5 #define __syscall_lseek 19 #define __syscall_mmap2 192 #define __syscall_munmap 91 #elif defined(__riscv) #define __SIZEOF_POINTER__ 4 #define __syscall_exit 93 #define __syscall_read 63 #define __syscall_write 64 #define __syscall_close 57 #define __syscall_open 1024 #define __syscall_openat 56 #define __syscall_lseek 62 #define __syscall_mmap2 222 #define __syscall_munmap 215 #else /* Only Arm32 and RV32 are supported */ #error "Unsupported architecture" #endif /* Non-portable: Assume page size is 4KiB */ #define PAGESIZE 4096 /* Minimum alignment for all memory allocations. */ #define MIN_ALIGNMENT 8 #define ALIGN_UP(val, align) (((val) + (align) - 1) & ~((align) - 1)) /* va_list support for variadic functions */ typedef int *va_list; /* Character predicate functions */ int isdigit(int c); int isalpha(int c); int isalnum(int c); int isxdigit(int c); int isblank(int c); /* File I/O */ typedef int FILE; FILE *fopen(char *filename, char *mode); int fclose(FILE *stream); int fgetc(FILE *stream); char *fgets(char *str, int n, FILE *stream); int fputc(int c, FILE *stream); int fseek(FILE *stream, int offset, int whence); int ftell(FILE *stream); /* string-related functions */ int strlen(char *str); int strcmp(char *s1, char *s2); int strncmp(char *s1, char *s2, int len); char *strcpy(char *dest, char *src); char *strncpy(char *dest, char *src, int len); char *memcpy(char *dest, char *src, int count); int memcmp(void *s1, void *s2, int n); void *memset(void *s, int c, int n); /* formatted output string */ int printf(char *str, ...); int sprintf(char *buffer, char *str, ...); int snprintf(char *buffer, int n, char *str, ...); /* Terminating program */ void exit(int exit_code); void abort(void); /* Dynamic memory allocation/deallocation functions */ void *malloc(int size); void *calloc(int n, int size); void free(void *ptr); ================================================ FILE: mk/arm.mk ================================================ # Allow the following machines to use native execution # # - Beaglebone Black (Cortex-A8) # - Raspberry Pi 3 (Cortex-A53) # - Raspberry Pi 4 (Cortex-A72) # - Raspberry Pi 5 (Cortex-A76) ALLOW_MACHINES = BeagleBone-Black Raspberry-Pi-3 Raspberry-Pi-4 Raspberry-Pi-5 ARCH_RUNNER = qemu-arm ARCH_DEFS = \ "/* target: ARM */\n$\ \#pragma once\n$\ \#define ARCH_PREDEFINED \"__arm__\" /* defined by GNU C and RealView */\n$\ \#define ELF_MACHINE 0x28 /* up to ARMv7/Aarch32 */\n$\ \#define ELF_FLAGS 0x5000400\n$\ \#define DYN_LINKER \"/lib/ld-linux-armhf.so.3\"\n$\ \#define LIBC_SO \"libc.so.6\"\n$\ \#define PLT_FIXUP_SIZE 20\n$\ \#define PLT_ENT_SIZE 12\n$\ \#define R_ARCH_JUMP_SLOT 0x16\n$\ \#define MAX_ARGS_IN_REG 4\n$\ " # If the running machine has the "fastfetch" tool installed, the build # system will verify whether native execution can be performed. ifneq ($(shell which fastfetch),) # 1. Replace whitespaces with hyphens after retrieving the host # machine name via the "fastfetch" tool. # # 2. If at least one machine name in the allowlist is found in # the host machine name, it can perform native execution. # # Therefore, set USE_QEMU to 0. HOST_MACHINE = $(shell fastfetch --logo none --structure Host | sed 's/ /-/g') USE_QEMU = $(if $(strip $(foreach MACHINE, $(ALLOW_MACHINES), $(findstring $(MACHINE),$(HOST_MACHINE)))),0,1) # Special case: GitHub workflows on Arm64 runners # # When an Arm-hosted runner executes "fastfetch --logo none --structure Host", # it produces the following output: # # Host: Virtual Machine (Hyper-V UEFI Release v4.1) # # Arm-hosted runners are also capable of performing native execution. However, # directly adding "Virtual-Machine" to the allowlist would be ambiguous. # Therefore, the build system instead checks the CPU name using the # "fastfetch --logo none --structure CPU" command. # # If the detected CPU is "Neoverse-N2", the build system treats the running # machine as an Arm-hosted runner and enable native execution. ifeq ($(USE_QEMU),1) HOST_CPU = $(shell fastfetch --logo none --structure CPU | sed 's/ /-/g') USE_QEMU = $(if $(strip $(findstring Neoverse-N2,$(HOST_CPU))),0,1) endif endif # Find the sysroot of the ARM GNU toolchain if using dynamic linking. # # Since developers may install the toolchain manually instead of # using a package manager such as apt, we cannot assume that the # path of ld-linux is always "/usr/arm-linux-gnueabihf". # # Therefore, the following process first locates find the correct # sysroot of the toolchain, and then generate the ELF interpreter # prefix for later use. ifeq ($(USE_QEMU),1) ifeq ($(DYNLINK),1) CROSS_COMPILE = arm-none-linux-gnueabihf- ARM_CC = $(CROSS_COMPILE)gcc ARM_CC := $(shell which $(ARM_CC)) ifndef ARM_CC CROSS_COMPILE = arm-linux-gnueabihf- ARM_CC = $(CROSS_COMPILE)gcc ARM_CC := $(shell which $(ARM_CC)) ifndef ARM_CC $(error "Unable to find ARM GNU toolchain.") endif endif LD_LINUX_PATH := $(shell cd $(shell $(ARM_CC) --print-sysroot) 2>/dev/null && pwd) ifeq ("$(LD_LINUX_PATH)","/") LD_LINUX_PATH := $(shell dirname "$(shell which $(ARM_CC))")/.. LD_LINUX_PATH := $(shell cd $(LD_LINUX_PATH) 2>/dev/null && pwd) LD_LINUX_PATH := $(LD_LINUX_PATH)/$(shell echo $(CROSS_COMPILE) | sed s'/.$$//')/libc LD_LINUX_PATH := $(shell cd $(LD_LINUX_PATH) 2>/dev/null && pwd) ifndef LD_LINUX_PATH LD_LINUX_PATH = /usr/$(shell echo $(CROSS_COMPILE) | sed s'/.$$//') LD_LINUX_PATH := $(shell cd $(LD_LINUX_PATH) 2>/dev/null && pwd) endif endif ifndef LD_LINUX_PATH $(error "Dynamic linking mode requires ld-linux.so") endif RUNNER_LD_PREFIX = -L $(LD_LINUX_PATH) endif endif ================================================ FILE: mk/common.mk ================================================ UNAME_S := $(shell uname -s) ifeq ($(UNAME_S),Darwin) PRINTF = printf else PRINTF = env printf endif # Control the build verbosity ifeq ("$(VERBOSE)","1") Q := VECHO = @true REDIR = else Q := @ VECHO = @$(PRINTF) REDIR = >/dev/null endif # Test suite PASS_COLOR = \e[32;01m NO_COLOR = \e[0m pass = $(PRINTF) "$(PASS_COLOR)$1 Passed$(NO_COLOR)\n" # Check the prerequisites PREREQ_LIST := dot jq TARGET_EXEC ?= ifeq ($(USE_QEMU),1) # Add qemu to the list if the host and target architectures differ PREREQ_LIST += $(ARCH_RUNNER) ifeq ($(filter $(ARCH_RUNNER),$(notdir $(shell which $(ARCH_RUNNER)))),) STAGE1_WARN_MSG := "Warning: failed to build the stage 1 and $\ stage 2 compilers due to missing $(ARCH_RUNNER)\n" STAGE1_CHECK_CMD := $(VECHO) $(STAGE1_WARN_MSG) && exit 1 endif # Generate the path to the architecture-specific qemu TARGET_EXEC = $(shell which $(ARCH_RUNNER)) ifeq ($(DYNLINK),1) TARGET_EXEC += $(RUNNER_LD_PREFIX) endif endif export TARGET_EXEC PREREQ_EXEC := $(shell which $(PREREQ_LIST)) PREREQ_MISSING := $(filter-out $(notdir $(PREREQ_EXEC)),$(PREREQ_LIST)) ifdef PREREQ_MISSING CONFIG_WARN_MSG := "Warning: missing packages: $(PREREQ_MISSING)\n$\ Warning: Please check package installation\n" CONFIG_CHECK_CMD := $(VECHO) $(CONFIG_WARN_MSG) endif ================================================ FILE: mk/riscv.mk ================================================ # Enforce the use qemu of by setting the ALLOW_MACHINES variable to empty ALLOW_MACHINES = ARCH_RUNNER = qemu-riscv32 ARCH_DEFS = \ "/* target: RISCV */\n$\ \#pragma once\n$\ \#define ARCH_PREDEFINED \"__riscv\" /* Older versions of the GCC toolchain defined __riscv__ */\n$\ \#define ELF_MACHINE 0xf3\n$\ \#define ELF_FLAGS 0\n$\ \#define DYN_LINKER \"/lib/ld-linux.so.3\"\n$\ \#define LIBC_SO \"libc.so.6\"\n$\ \#define PLT_FIXUP_SIZE 20\n$\ \#define PLT_ENT_SIZE 12\n$\ \#define R_ARCH_JUMP_SLOT 0x5\n$\ \#define MAX_ARGS_IN_REG 8\n$\ " # TODO: Set this variable for RISC-V architecture RUNNER_LD_PREFIX= ================================================ FILE: src/arch-lower.c ================================================ /* * shecc - Architecture-specific IR lowering stage * * Introduces a minimal arch-lowering boundary that applies target-specific * tweaks to phase-2 IR (ph2_ir) before final code generation. This keeps * backends simpler by moving decisions that depend on CFG shape or target * quirks out of emit-time where possible. */ #include "../config" #include "defs.h" /* ARM-specific lowering: * - Mark detached conditional branches so codegen can decide between * short/long forms without re-deriving CFG shape. */ void arm_lower(void) { for (func_t *func = FUNC_LIST.head; func; func = func->next) { /* Skip function declarations without bodies */ if (!func->bbs) continue; for (basic_block_t *bb = func->bbs; bb; bb = bb->rpo_next) { for (ph2_ir_t *insn = bb->ph2_ir_list.head; insn; insn = insn->next) { /* Mark branches that don't fall through to next block */ if (insn->op == OP_branch) { /* In SSA, we index 'else_bb' first, and then 'then_bb' */ insn->is_branch_detached = (insn->else_bb != bb->rpo_next); } } } } } /* RISC-V-specific lowering: * - Mark detached conditional branches * - Future: prepare for RISC-V specific patterns */ void riscv_lower(void) { for (func_t *func = FUNC_LIST.head; func; func = func->next) { /* Skip function declarations without bodies */ if (!func->bbs) continue; for (basic_block_t *bb = func->bbs; bb; bb = bb->rpo_next) { for (ph2_ir_t *insn = bb->ph2_ir_list.head; insn; insn = insn->next) { /* Mark branches that don't fall through to next block */ if (insn->op == OP_branch) insn->is_branch_detached = (insn->else_bb != bb->rpo_next); } } } } /* Entry point: dispatch to the active architecture. */ void arch_lower(void) { #if ELF_MACHINE == 0x28 /* ARM */ arm_lower(); #elif ELF_MACHINE == 0xf3 /* RISC-V */ riscv_lower(); #else /* Unknown architecture: keep behavior as-is. */ (void) 0; #endif } ================================================ FILE: src/arm-codegen.c ================================================ /* * shecc - Self-Hosting and Educational C Compiler. * * shecc is freely redistributable under the BSD 2 clause license. See the * file "LICENSE" for information on usage and redistribution of this file. */ /* Translate IR to target machine code */ #include "arm.c" #include "defs.h" #include "globals.c" void update_elf_offset(ph2_ir_t *ph2_ir) { func_t *func; switch (ph2_ir->op) { case OP_load_constant: /* ARMv7 uses 12 bits to encode immediate value, but the higher 4 bits * are for rotation. See A5.2.4 "Modified immediate constants in ARM * instructions" in ARMv7-A manual. */ if (ph2_ir->src0 < 0) elf_offset += 12; else if (ph2_ir->src0 > 255) elf_offset += 8; else elf_offset += 4; return; case OP_address_of: case OP_global_address_of: /* ARMv7 uses 12 bits to encode immediate value, but the higher 4 bits * are for rotation. See A5.2.4 "Modified immediate constants in ARM * instructions" in ARMv7-A manual. */ if (ph2_ir->src0 > 255) elf_offset += 12; else if (ph2_ir->src0 >= 0) elf_offset += 4; else abort(); return; case OP_assign: if (ph2_ir->dest != ph2_ir->src0) elf_offset += 4; return; case OP_load: case OP_global_load: /* ARMv7 straight uses 12 bits to encode the offset of load instruction * (no rotation). */ if (ph2_ir->src0 > 4095) elf_offset += 16; else if (ph2_ir->src0 >= 0) elf_offset += 4; else abort(); return; case OP_store: case OP_global_store: /* ARMv7 straight uses 12 bits to encode the offset of store instruction * (no rotation). */ if (ph2_ir->src1 > 4095) elf_offset += 16; else if (ph2_ir->src1 >= 0) elf_offset += 4; else abort(); return; case OP_read: case OP_write: case OP_jump: case OP_load_func: case OP_indirect: case OP_add: case OP_sub: case OP_mul: case OP_lshift: case OP_rshift: case OP_bit_and: case OP_bit_or: case OP_bit_xor: case OP_negate: case OP_bit_not: elf_offset += 4; return; case OP_call: func = find_func(ph2_ir->func_name); if (func->bbs) elf_offset += 4; else if (dynlink) { /* When calling external functions in dynamic linking mode, * the following instructions are required: * - movw + movt: set r8 to 'elf_data_start' * - ldr: load a word from the address 'elf_data_start' into r12. * (restore the global stack pointer.) * * Therefore, the total offset is 16 bytes (4 instructions). */ elf_offset += 16; } else { printf("The '%s' function is not implemented\n", ph2_ir->func_name); abort(); } return; case OP_div: case OP_mod: if (hard_mul_div) { if (ph2_ir->op == OP_div) elf_offset += 4; else elf_offset += 12; return; } /* div/mod emulation's offset */ elf_offset += 116; return; case OP_load_data_address: case OP_load_rodata_address: elf_offset += 8; return; case OP_address_of_func: case OP_eq: case OP_neq: case OP_gt: case OP_lt: case OP_geq: case OP_leq: case OP_log_not: elf_offset += 12; return; case OP_branch: if (ph2_ir->is_branch_detached) elf_offset += 12; else elf_offset += 8; return; case OP_return: elf_offset += 24; return; case OP_trunc: if (ph2_ir->src1 == 2) elf_offset += 8; else elf_offset += 4; return; case OP_sign_ext: elf_offset += 4; return; case OP_cast: elf_offset += 4; return; default: fatal("Unknown opcode"); } } void cfg_flatten(void) { func_t *func; if (dynlink) elf_offset = 100; /* offset of __libc_start_main + main_wrapper in codegen */ else { func = find_func("__syscall"); func->bbs->elf_offset = 32; /* offset of start + branch in codegen */ elf_offset = 92; /* offset of start + branch + syscall in codegen */ } GLOBAL_FUNC->bbs->elf_offset = elf_offset; for (ph2_ir_t *ph2_ir = GLOBAL_FUNC->bbs->ph2_ir_list.head; ph2_ir; ph2_ir = ph2_ir->next) { update_elf_offset(ph2_ir); } /* prepare 'argc' and 'argv', then proceed to 'main' function */ if (dynlink) elf_offset += 28; else elf_offset += 32; /* 6 insns for main call + 2 for exit */ for (func = FUNC_LIST.head; func; func = func->next) { /* Skip function declarations without bodies */ if (!func->bbs) continue; /* reserve stack */ ph2_ir_t *flatten_ir = add_ph2_ir(OP_define); flatten_ir->src0 = func->stack_size; strncpy(flatten_ir->func_name, func->return_def.var_name, MAX_VAR_LEN); /* The actual offset of the top of the local stack is the sum of: * - 36 bytes (pushing registers r4-r11 and lr onto the stack) * - 4 bytes * (to ensure 8-byte alignment after pushing the 9 registers) * - ALIGN_UP(func->stack_size, 8) * * Note that func->stack_size does not include the 36 + 4 bytes, * so an additional 40 bytes should be added to * ALIGN_UP(func->stack_size, 8). */ int stack_top_ofs = ALIGN_UP(func->stack_size, MIN_ALIGNMENT) + 40; for (basic_block_t *bb = func->bbs; bb; bb = bb->rpo_next) { bb->elf_offset = elf_offset; if (bb == func->bbs) { /* retrieve the global stack pointer and save ra, sp */ elf_offset += 28; } for (ph2_ir_t *insn = bb->ph2_ir_list.head; insn; insn = insn->next) { /* For the instructions whose ofs_based_on_stack_top is set, * recalculate the operand's offset by adding stack_top_ofs. */ if (insn->ofs_based_on_stack_top) { switch (insn->op) { case OP_load: case OP_address_of: insn->src0 = insn->src0 + stack_top_ofs; break; case OP_store: insn->src1 = insn->src1 + stack_top_ofs; break; default: /* Ignore opcodes with the ofs_based_on_stack_top * flag set since only the three opcodes above needs * to access a variable's address. */ break; } } flatten_ir = add_existed_ph2_ir(insn); if (insn->op == OP_return) { /* restore sp */ flatten_ir->src1 = bb->belong_to->stack_size; } /* Branch detachment is determined in the arch-lowering stage */ update_elf_offset(flatten_ir); } } } } void emit(int code) { elf_write_int(elf_code, code); } void emit_ph2_ir(ph2_ir_t *ph2_ir) { func_t *func; const int rd = ph2_ir->dest; const int rn = ph2_ir->src0; int rm = ph2_ir->src1; /* Not const because OP_trunc modifies it */ int ofs; bool is_external_call = false; /* Prepare this variable to reuse code for: * 1. division and modulo operations * 2. load and store operations * 3. address-of operations */ arm_reg interm; switch (ph2_ir->op) { case OP_define: /* We should handle the function entry point carefully due to the * following constraints: * - according to AAPCS, the callee must preserve r4-r11 for the caller, * and the stack must always be 8-byte aligned. * - lr must be pushed because it may be modified when the function * calls another function. * - since external functions may call internal functions, r12 may not * hold the global stack pointer. * * Therefore, we perform the following operations: * 1. use a __stmdb instruction to push r4-r11 and lr onto the stack * first. * 2. retrieve the global stack pointer from the 4-byte global object * located at 'elf_data_start' to ensure correct access to the global * stack. * 3. set ofs to align(ph2_ir->src0, 8) + 4, and prepare a local * stack for the callee by subtracting ofs from sp. */ emit(__stmdb(__AL, 1, __sp, 0x4FF0)); emit(__movw(__AL, __r8, elf_data_start)); emit(__movt(__AL, __r8, elf_data_start)); emit(__lw(__AL, __r12, __r8, 0)); ofs = ALIGN_UP(ph2_ir->src0, MIN_ALIGNMENT) + 4; emit(__movw(__AL, __r8, ofs)); emit(__movt(__AL, __r8, ofs)); emit(__sub_r(__AL, __sp, __sp, __r8)); return; case OP_load_constant: if (ph2_ir->src0 < 0) { emit(__movw(__AL, __r8, -ph2_ir->src0)); emit(__movt(__AL, __r8, -ph2_ir->src0)); emit(__rsb_i(__AL, rd, 0, __r8)); } else if (ph2_ir->src0 > 255) { emit(__movw(__AL, rd, ph2_ir->src0)); emit(__movt(__AL, rd, ph2_ir->src0)); } else emit(__mov_i(__AL, rd, ph2_ir->src0)); return; case OP_address_of: case OP_global_address_of: interm = ph2_ir->op == OP_address_of ? __sp : __r12; if (ph2_ir->src0 > 255) { emit(__movw(__AL, __r8, ph2_ir->src0)); emit(__movt(__AL, __r8, ph2_ir->src0)); emit(__add_r(__AL, rd, interm, __r8)); } else emit(__add_i(__AL, rd, interm, ph2_ir->src0)); return; case OP_assign: emit(__mov_r(__AL, rd, rn)); return; case OP_load: case OP_global_load: interm = ph2_ir->op == OP_load ? __sp : __r12; if (ph2_ir->src0 > 4095) { emit(__movw(__AL, __r8, ph2_ir->src0)); emit(__movt(__AL, __r8, ph2_ir->src0)); emit(__add_r(__AL, __r8, interm, __r8)); emit(__lw(__AL, rd, __r8, 0)); } else emit(__lw(__AL, rd, interm, ph2_ir->src0)); return; case OP_store: case OP_global_store: interm = ph2_ir->op == OP_store ? __sp : __r12; if (ph2_ir->src1 > 4095) { emit(__movw(__AL, __r8, ph2_ir->src1)); emit(__movt(__AL, __r8, ph2_ir->src1)); emit(__add_r(__AL, __r8, interm, __r8)); emit(__sw(__AL, rn, __r8, 0)); } else emit(__sw(__AL, rn, interm, ph2_ir->src1)); return; case OP_read: if (ph2_ir->src1 == 1) emit(__lb(__AL, rd, rn, 0)); else if (ph2_ir->src1 == 2) emit(__lh(__AL, rd, rn, 0)); else if (ph2_ir->src1 == 4) emit(__lw(__AL, rd, rn, 0)); else abort(); return; case OP_write: if (ph2_ir->dest == 1) emit(__sb(__AL, rm, rn, 0)); else if (ph2_ir->dest == 2) emit(__sh(__AL, rm, rn, 0)); else if (ph2_ir->dest == 4) emit(__sw(__AL, rm, rn, 0)); else abort(); return; case OP_branch: emit(__teq(rn)); if (ph2_ir->is_branch_detached) { emit(__b(__NE, 8)); emit(__b(__AL, ph2_ir->else_bb->elf_offset - elf_code->size)); } else emit(__b(__NE, ph2_ir->then_bb->elf_offset - elf_code->size)); return; case OP_jump: emit(__b(__AL, ph2_ir->next_bb->elf_offset - elf_code->size)); return; case OP_call: func = find_func(ph2_ir->func_name); if (func->bbs) ofs = func->bbs->elf_offset - elf_code->size; else if (dynlink) { ofs = (dynamic_sections.elf_plt_start + func->plt_offset) - (elf_code_start + elf_code->size); is_external_call = true; } else { printf("The '%s' function is not implemented\n", ph2_ir->func_name); abort(); } /* When calling external functions in dynamic linking mode, * the following instructions are required: * - movw + movt: set r8 to 'elf_data_start' * - ldr: load a word from the address 'elf_data_start' to r12. * (restore the global stack pointer.) * * Since shecc uses r12 to store a global stack pointer and external * functions can freely modify r12, causing internal functions to * access global variables incorrectly, additional instructions are * needed to restore r12 from the global object after the external * function returns. * * Otherwise, only a 'bl' instruction is generated to call internal * functions because shecc guarantees they do not modify r12. */ emit(__bl(__AL, ofs)); if (is_external_call) { emit(__movw(__AL, __r8, elf_data_start)); emit(__movt(__AL, __r8, elf_data_start)); emit(__lw(__AL, __r12, __r8, 0)); } return; case OP_load_data_address: emit(__movw(__AL, rd, ph2_ir->src0 + elf_data_start)); emit(__movt(__AL, rd, ph2_ir->src0 + elf_data_start)); return; case OP_load_rodata_address: emit(__movw(__AL, rd, ph2_ir->src0 + elf_rodata_start)); emit(__movt(__AL, rd, ph2_ir->src0 + elf_rodata_start)); return; case OP_address_of_func: func = find_func(ph2_ir->func_name); if (func->bbs) ofs = elf_code_start + func->bbs->elf_offset; else if (dynlink) ofs = dynamic_sections.elf_plt_start + func->plt_offset; else { printf("The '%s' function is not implemented\n", ph2_ir->func_name); abort(); } emit(__movw(__AL, __r8, ofs)); emit(__movt(__AL, __r8, ofs)); emit(__sw(__AL, __r8, rn, 0)); return; case OP_load_func: emit(__mov_r(__AL, __r8, rn)); return; case OP_indirect: emit(__blx(__AL, __r8)); return; case OP_return: if (ph2_ir->src0 == -1) emit(__mov_r(__AL, __r0, __r0)); else emit(__mov_r(__AL, __r0, rn)); /* When calling a function, the following operations are performed: * 1. push r4-r11 and lr onto the stack. * 2. retrieve the global stack pointer from the 4-byte global object. * 3. decrement the stack by ALIGN_UP(stack_size, 8) + 4. * * Except for step 2, the reversed operations should be performed to * upon returning to restore the stack and the contents of r4-r11 and * lr. */ ofs = ALIGN_UP(ph2_ir->src1, MIN_ALIGNMENT) + 4; emit(__movw(__AL, __r8, ofs)); emit(__movt(__AL, __r8, ofs)); emit(__add_r(__AL, __sp, __sp, __r8)); emit(__ldm(__AL, 1, __sp, 0x4FF0)); emit(__bx(__AL, __lr)); return; case OP_add: emit(__add_r(__AL, rd, rn, rm)); return; case OP_sub: emit(__sub_r(__AL, rd, rn, rm)); return; case OP_mul: emit(__mul(__AL, rd, rn, rm)); return; case OP_div: case OP_mod: if (hard_mul_div) { if (ph2_ir->op == OP_div) emit(__div(__AL, rd, rm, rn)); else { emit(__div(__AL, __r8, rm, rn)); emit(__mul(__AL, __r8, rm, __r8)); emit(__sub_r(__AL, rd, rn, __r8)); } return; } interm = __r8; /* div/mod emulation */ /* Preserve the values of the dividend and divisor */ emit(__stmdb(__AL, 1, __sp, (1 << rn) | (1 << rm))); /* Obtain absolute values of the dividend and divisor */ emit(__srl_amt(__AL, 0, arith_rs, __r8, rn, 31)); emit(__add_r(__AL, rn, rn, __r8)); emit(__eor_r(__AL, rn, rn, __r8)); emit(__srl_amt(__AL, 0, arith_rs, __r9, rm, 31)); emit(__add_r(__AL, rm, rm, __r9)); emit(__eor_r(__AL, rm, rm, __r9)); if (ph2_ir->op == OP_div) emit(__eor_r(__AL, __r10, __r8, __r9)); else { /* If the requested operation is modulo, the result will be stored * in __r9. The sign of the divisor is irrelevant for determining * the result's sign. */ interm = __r9; emit(__mov_r(__AL, __r10, __r8)); } /* Unsigned integer division */ emit(__zero(__r8)); emit(__mov_i(__AL, __r9, 1)); emit(__cmp_i(__AL, rm, 0)); emit(__b(__EQ, 52)); emit(__cmp_i(__AL, rn, 0)); emit(__b(__EQ, 44)); emit(__cmp_r(__AL, rm, rn)); emit(__sll_amt(__CC, 0, logic_ls, rm, rm, 1)); emit(__sll_amt(__CC, 0, logic_ls, __r9, __r9, 1)); emit(__b(__CC, -12)); emit(__cmp_r(__AL, rn, rm)); emit(__sub_r(__CS, rn, rn, rm)); emit(__add_r(__CS, __r8, __r8, __r9)); emit(__srl_amt(__AL, 1, logic_rs, __r9, __r9, 1)); emit(__srl_amt(__CC, 0, logic_rs, rm, rm, 1)); emit(__b(__CC, -20)); /* After completing the emulation, the quotient and remainder will be * stored in __r8 and __r9, respectively. * * The original values of the dividend and divisor will be restored in * rn and rm. * * Finally, the result (quotient or remainder) will be stored in rd. */ emit(__mov_r(__AL, __r9, rn)); emit(__ldm(__AL, 1, __sp, (1 << rn) | (1 << rm))); emit(__mov_r(__AL, rd, interm)); /* Handle the correct sign for the quotient or remainder */ emit(__cmp_i(__AL, __r10, 0)); emit(__rsb_i(__NE, rd, 0, rd)); return; case OP_lshift: emit(__sll(__AL, rd, rn, rm)); return; case OP_rshift: emit(__sra(__AL, rd, rn, rm)); return; case OP_eq: case OP_neq: case OP_gt: case OP_lt: case OP_geq: case OP_leq: emit(__cmp_r(__AL, rn, rm)); emit(__zero(rd)); emit(__mov_i(arm_get_cond(ph2_ir->op), rd, 1)); return; case OP_negate: emit(__rsb_i(__AL, rd, 0, rn)); return; case OP_bit_not: emit(__mvn_r(__AL, rd, rn)); return; case OP_bit_and: emit(__and_r(__AL, rd, rn, rm)); return; case OP_bit_or: emit(__or_r(__AL, rd, rn, rm)); return; case OP_bit_xor: emit(__eor_r(__AL, rd, rn, rm)); return; case OP_log_not: emit(__cmp_i(__AL, rn, 0)); emit(__mov_i(__NE, rd, 0)); emit(__mov_i(__EQ, rd, 1)); return; case OP_trunc: if (rm == 1) { emit(__and_i(__AL, rd, rn, 0xFF)); } else if (rm == 2) { emit(__sll_amt(__AL, 0, logic_ls, rd, rn, 16)); emit(__sll_amt(__AL, 0, logic_rs, rd, rd, 16)); } else if (rm == 4) { emit(__mov_r(__AL, rd, rn)); } else { fatal("Unsupported truncation operation with invalid target size"); } return; case OP_sign_ext: { /* Decode source size from upper 16 bits */ int source_size = (rm >> 16) & 0xFFFF; if (source_size == 2) { emit(__sxth(__AL, rd, rn, 0)); } else { /* For other cases, use byte extension (original behavior) */ emit(__sxtb(__AL, rd, rn, 0)); } } return; case OP_cast: /* Generic cast operation - for now, just move the value */ emit(__mov_r(__AL, rd, rn)); return; default: fatal("Unknown opcode"); } } void plt_generate(void); void code_generate(void) { int ofs; if (dynlink) { plt_generate(); /* Call __libc_start_main() */ emit(__mov_i(__AL, __r11, 0)); emit(__mov_i(__AL, __lr, 0)); emit(__pop_word(__AL, __r1)); emit(__mov_r(__AL, __r2, __sp)); emit(__push_reg(__AL, __r2)); emit(__push_reg(__AL, __r0)); emit(__mov_i(__AL, __r12, 0)); emit(__push_reg(__AL, __r12)); int main_wrapper_offset = elf_code->size + 28; emit(__movw(__AL, __r0, elf_code_start + main_wrapper_offset)); emit(__movt(__AL, __r0, elf_code_start + main_wrapper_offset)); emit(__mov_i(__AL, __r3, 0)); emit(__bl(__AL, (dynamic_sections.elf_plt_start + PLT_FIXUP_SIZE) - (elf_code_start + elf_code->size))); /* Call '_exit' (syscall) to terminate the program if __libc_start_main * returns. */ emit(__mov_i(__AL, __r0, 127)); emit(__mov_i(__AL, __r7, 1)); emit(__svc()); /* If the compiled program is dynamic linking, the starting * point of 'main_wrapper' is located here. * * Push the contents of r4-r11 and lr onto stack. * Preserve 'argc' and 'argv' for the 'main' function. */ emit(__stmdb(__AL, 1, __sp, 0x4FF0)); emit(__mov_r(__AL, __r9, __r0)); emit(__mov_r(__AL, __r10, __r1)); } /* For both static and dynamic linking, we need to set up the stack * and call the main function. * * To ensure that the stack remains 8-byte aligned after adjustment, * 'ofs' is to align(GLOBAL_FUNC->stack_size, 8) to allocate space * for the global stack. * * In dynamic linking mode, since the preceding __stmdb instruction * pushes 9 registers onto stack, 'ofs' must be increased by 4 to * prevent the stack from becoming misaligned. */ ofs = ALIGN_UP(GLOBAL_FUNC->stack_size, MIN_ALIGNMENT); if (dynlink) ofs += 4; emit(__movw(__AL, __r8, ofs)); emit(__movt(__AL, __r8, ofs)); emit(__sub_r(__AL, __sp, __sp, __r8)); emit(__mov_r(__AL, __r12, __sp)); /* The first object in the .data section is used to store the global * stack pointer. Therefore, store r12 at the address 'elf_data_start' * after the global stack has been prepared. */ emit(__movw(__AL, __r8, elf_data_start)); emit(__movt(__AL, __r8, elf_data_start)); emit(__sw(__AL, __r12, __r8, 0)); if (!dynlink) { /* Jump directly to the main preparation and then execute the * main function. * * In static linking mode, when the main function completes its * execution, it will invoke the '_exit' syscall to terminate * the program. * * That is, the execution flow is: * * +------------------+ * | movw r8 | * 'start' | ... | * | b | (1) jump to global init --+ * +------------------+ | * | push {r4 ... r7} | | * '__syscall' | ... | | * | bx lr | | * +------------------+ | * | ... | (2) global init <------+ * | (global init) | * | ... | * global init | movw r8 | * + | movt r8 | * call main() | ... | * | bl
| (3) call main() * | mov r7 #1 | * | svc 0x00000000 | (4) call '_exit' after main() * +------------------+ returns */ emit(__b(__AL, GLOBAL_FUNC->bbs->elf_offset - elf_code->size)); /* __syscall - only for static linking * * If the number of arguments is greater than 4, the additional * arguments need to be retrieved from the stack. However, this * process must modify the contents of registers r4-r7. * * Therefore, __syscall needs to preserve the contents of these * registers before invoking a syscall, and restore them after * the syscall has completed. */ emit(__stmdb(__AL, 1, __sp, 0x00F0)); emit(__lw(__AL, __r4, __sp, 16)); emit(__lw(__AL, __r5, __sp, 20)); emit(__lw(__AL, __r6, __sp, 24)); emit(__lw(__AL, __r7, __sp, 28)); emit(__mov_r(__AL, __r7, __r0)); emit(__mov_r(__AL, __r0, __r1)); emit(__mov_r(__AL, __r1, __r2)); emit(__mov_r(__AL, __r2, __r3)); emit(__mov_r(__AL, __r3, __r4)); emit(__mov_r(__AL, __r4, __r5)); emit(__mov_r(__AL, __r5, __r6)); emit(__svc()); emit(__ldm(__AL, 1, __sp, 0x00F0)); emit(__bx(__AL, __lr)); } ph2_ir_t *ph2_ir; for (ph2_ir = GLOBAL_FUNC->bbs->ph2_ir_list.head; ph2_ir; ph2_ir = ph2_ir->next) emit_ph2_ir(ph2_ir); /* prepare 'argc' and 'argv', then proceed to 'main' function */ if (MAIN_BB) { if (dynlink) { emit(__mov_r(__AL, __r0, __r9)); emit(__mov_r(__AL, __r1, __r10)); /* Call the main function. * * After the main function returns, the following * instructions restore the registers r4-r11 and * return control to __libc_start_main via the * preserved lr. */ emit(__bl(__AL, MAIN_BB->elf_offset - elf_code->size)); emit(__movw(__AL, __r8, ofs)); emit(__movt(__AL, __r8, ofs)); emit(__add_r(__AL, __sp, __sp, __r8)); emit(__ldm(__AL, 1, __sp, 0x8FF0)); } else { emit(__movw(__AL, __r8, ofs)); emit(__movt(__AL, __r8, ofs)); emit(__add_r(__AL, __r8, __r12, __r8)); emit(__lw(__AL, __r0, __r8, 0)); emit(__add_i(__AL, __r1, __r8, 4)); /* Call main function, and call '_exit' syscall to * terminate the program. */ emit(__bl(__AL, MAIN_BB->elf_offset - elf_code->size)); /* exit with main's return value - r0 already has the * return value */ emit(__mov_i(__AL, __r7, 1)); emit(__svc()); } } for (int i = 0; i < ph2_ir_idx; i++) { ph2_ir = PH2_IR_FLATTEN[i]; emit_ph2_ir(ph2_ir); } } void plt_generate(void) { /* - PLT code generation explanation - * * As described in ARM's Platform Standard, PLT code should make register * ip address the corresponding GOT entry on SVr4-like (Linux-like) * platforms. * * Therefore, PLT[1] ~ PLT[N] use r12 (ip) to load the address of the * GOT entry and jump to the function entry via the GOT value. * * PLT[0] is used to call the resolver, which requires: * - [sp] contains the return address from the original function call. * - ip contains the address of the GOT entry. * - lr points to the address of GOT[2]. * * The second requirement is alreadly handled by PLT[1] - PLT[N], so * PLT[0] must take care of the other two. The first one can be achieved * by a 'push' instruction; for the third, we use r10 to store the address * of GOT[2] and then move the value to lr. * * - Reason for using r10 in PLT[0] - * * The register allocation assumes 8 available registers, so the ARM code * generator primarily uses r0-r7 for code generation. These registers * cannot be modified arbitrarily; otherwise, the program may fail if any * of them are changed by PLT[0]. * * However, r8-r11 can be freely used as temporary registers during code * generation, so PLT[0] arbitrarily chooses r10 to perform the required * operation. */ int addr_of_got = dynamic_sections.elf_got_start + PTR_SIZE * 2; int end = dynamic_sections.plt_size - PLT_FIXUP_SIZE; elf_write_int(dynamic_sections.elf_plt, __push_reg(__AL, __lr)); elf_write_int(dynamic_sections.elf_plt, __movw(__AL, __r10, addr_of_got)); elf_write_int(dynamic_sections.elf_plt, __movt(__AL, __r10, addr_of_got)); elf_write_int(dynamic_sections.elf_plt, __mov_r(__AL, __lr, __r10)); elf_write_int(dynamic_sections.elf_plt, __lw(__AL, __pc, __lr, 0)); for (int i = 0; i * PLT_ENT_SIZE < end; i++) { addr_of_got = dynamic_sections.elf_got_start + PTR_SIZE * (i + 3); elf_write_int(dynamic_sections.elf_plt, __movw(__AL, __r12, addr_of_got)); elf_write_int(dynamic_sections.elf_plt, __movt(__AL, __r12, addr_of_got)); elf_write_int(dynamic_sections.elf_plt, __lw(__AL, __pc, __r12, 0)); } } ================================================ FILE: src/arm.c ================================================ /* * shecc - Self-Hosting and Educational C Compiler. * * shecc is freely redistributable under the BSD 2 clause license. See the * file "LICENSE" for information on usage and redistribution of this file. */ /* ARMv7-A instruction encoding */ /* Identifier naming conventions * - prefix arm_ : Arm instruction encoding. * - prefix __ : mnemonic symbols for Arm instruction, condition code, * registers, etc. * * An example of usage in src/codegen.c: (unconditional jump) * * +---------------- write specified instruction into ELF * | * emit(__b(__AL, ofs)); * | | | * | | +--- to PC-relative expression * | +------- always * +------------ branch * * Machine-level "b" instructions have restricted ranges from the address of * the current instruction. */ #include "defs.h" /* opcode */ typedef enum { arm_and = 0, arm_eor = 1, arm_sub = 2, arm_rsb = 3, arm_add = 4, arm_ldm = 9, arm_teq = 9, arm_cmp = 10, arm_orr = 12, arm_mov = 13, arm_mvn = 15, arm_stmdb = 16 } arm_op_t; /* Condition code * Reference: * https://community.arm.com/developer/ip-products/processors/b/processors-ip-blog/posts/condition-codes-1-condition-flags-and-codes */ typedef enum { __EQ = 0, /* Equal */ __NE = 1, /* Not equal */ __CS = 2, /* Unsigned higher or same */ __CC = 3, /* Unsigned lower */ __LS = 9, /* Unsigned lower or same */ __GE = 10, /* Signed greater than or equal */ __LT = 11, /* Signed less than */ __GT = 12, /* Signed greater than */ __LE = 13, /* Signed less than or equal */ __AL = 14 /* Always executed */ } arm_cond_t; /* Registers */ typedef enum { __r0 = 0, __r1 = 1, __r2 = 2, __r3 = 3, __r4 = 4, __r5 = 5, __r6 = 6, __r7 = 7, __r8 = 8, __r9 = 9, __r10 = 10, __r11 = 11, __r12 = 12, __sp = 13, /* stack pointer, r13 */ __lr = 14, /* link register, r14 */ __pc = 15 /* program counter, r15 */ } arm_reg; typedef enum { logic_ls = 0, /* Logical left shift */ logic_rs = 1, /* Logical right shift */ arith_rs = 2, /* Arithmetic right shift */ rotat_rs = 3 /* Rotate right shift */ } shift_type; arm_cond_t arm_get_cond(opcode_t op) { switch (op) { case OP_eq: return __EQ; case OP_neq: return __NE; case OP_lt: return __LT; case OP_geq: return __GE; case OP_gt: return __GT; case OP_leq: return __LE; default: fatal("Unsupported condition IR opcode"); } return __AL; } int arm_extract_bits(int imm, int i_start, int i_end, int d_start, int d_end) { if (((d_end - d_start) != (i_end - i_start)) || (i_start > i_end) || (d_start > d_end)) fatal("Invalid bit copy"); int v = imm >> i_start; v &= ((2 << (i_end - i_start)) - 1); v <<= d_start; return v; } int arm_encode(arm_cond_t cond, int opcode, int rn, int rd, int op2) { return (cond << 28) + (opcode << 20) + (rn << 16) + (rd << 12) + op2; } int __svc(void) { return arm_encode(__AL, 240, 0, 0, 0); } int __mov(arm_cond_t cond, int io, int opcode, int s, int rn, int rd, int op2) { int shift = 0; if (op2 > 255) { shift = 16; /* full rotation */ while ((op2 & 3) == 0) { /* we can shift by two bits */ op2 >>= 2; shift -= 1; } if (op2 > 255) /* value spans more than 8 bits */ fatal("Unable to represent value"); } return arm_encode(cond, s + (opcode << 1) + (io << 5), rn, rd, (shift << 8) + (op2 & 255)); } int __and_r(arm_cond_t cond, arm_reg rd, arm_reg rs, arm_reg rm) { return __mov(cond, 0, arm_and, 0, rs, rd, rm); } int __or_r(arm_cond_t cond, arm_reg rd, arm_reg rs, arm_reg rm) { return __mov(cond, 0, arm_orr, 0, rs, rd, rm); } int __eor_r(arm_cond_t cond, arm_reg rd, arm_reg rs, arm_reg rm) { return __mov(cond, 0, arm_eor, 0, rs, rd, rm); } int __mvn_r(arm_cond_t cond, arm_reg rd, arm_reg rm) { return __mov(cond, 0, arm_mvn, 0, 0, rd, rm); } int __movw(arm_cond_t cond, arm_reg rd, int imm) { return arm_encode(cond, 48, 0, rd, 0) + arm_extract_bits(imm, 0, 11, 0, 11) + arm_extract_bits(imm, 12, 15, 16, 19); } int __movt(arm_cond_t cond, arm_reg rd, int imm) { imm >>= 16; return arm_encode(cond, 52, 0, rd, 0) + arm_extract_bits(imm, 0, 11, 0, 11) + arm_extract_bits(imm, 12, 15, 16, 19); } int __mov_i(arm_cond_t cond, arm_reg rd, int imm) { return __mov(cond, 1, arm_mov, 0, 0, rd, imm); } int __mov_r(arm_cond_t cond, arm_reg rd, arm_reg rs) { return __mov(cond, 0, arm_mov, 0, 0, rd, rs); } int __srl(arm_cond_t cond, arm_reg rd, arm_reg rm, arm_reg rs) { return arm_encode(cond, 0 + (arm_mov << 1) + (0 << 5), 0, rd, rm + (1 << 4) + (1 << 5) + (rs << 8)); } int __srl_amt(arm_cond_t cond, int s, shift_type shift, arm_reg rd, arm_reg rm, int amt) { return arm_encode(cond, s + (arm_mov << 1) + (0 << 5), 0, rd, rm + (0 << 4) + (shift << 5) + (amt << 7)); } int __sll(arm_cond_t cond, arm_reg rd, arm_reg rm, arm_reg rs) { return arm_encode(cond, 0 + (arm_mov << 1) + (0 << 5), 0, rd, rm + (1 << 4) + (0 << 5) + (rs << 8)); } int __sll_amt(arm_cond_t cond, int s, shift_type shift, arm_reg rd, arm_reg rm, int amt) { return arm_encode(cond, s + (arm_mov << 1) + (0 << 5), 0, rd, rm + (0 << 4) + (shift << 5) + (amt << 7)); } int __sra(arm_cond_t cond, arm_reg rd, arm_reg rm, arm_reg rs) { /* Arithmetic right shift with register * Bit 4 = 1 (register-specified shift) * Bits 5-6 = arith_rs (2) for arithmetic right shift */ return arm_encode(cond, 0 + (arm_mov << 1) + (0 << 5), 0, rd, rm + (1 << 4) + (arith_rs << 5) + (rs << 8)); } int __add_i(arm_cond_t cond, arm_reg rd, arm_reg rs, int imm) { if (imm >= 0) return __mov(cond, 1, arm_add, 0, rs, rd, imm); return __mov(cond, 1, arm_sub, 0, rs, rd, -imm); } int __add_r(arm_cond_t cond, arm_reg rd, arm_reg rs, arm_reg ro) { return __mov(cond, 0, arm_add, 0, rs, rd, ro); } int __sub_r(arm_cond_t cond, arm_reg rd, arm_reg rs, arm_reg ro) { return __mov(cond, 0, arm_sub, 0, rs, rd, ro); } int __and_i(arm_cond_t cond, arm_reg rd, arm_reg rs, int imm) { return __mov(cond, 1, arm_and, 0, rs, rd, imm); } int __zero(int rd) { return __mov_i(__AL, rd, 0); } /* ARM halfword transfer (immediate offset) using special encoding * For halfword: bits[11:8] = imm4H, bits[7:4] = encoding, bits[3:0] = imm4L * imm4H: upper 4 bits of offset * imm4L: lower 4 bits of offset * encoding: 0b1011 for unsigned halfword, 0b1111 for signed halfword */ int arm_halfword_transfer(arm_cond_t cond, int l, arm_reg rn, arm_reg rd, int ofs, int signed_op) { int opcode = 16 + 8 + 4 + l; if (ofs < 0) { opcode -= 8; ofs = -ofs; } if (ofs > 255) fatal("Halfword offset too large"); /* Halfword encoding: split offset into 4-bit high and low parts */ int imm4H = ((ofs >> 4) & 0xF) << 8; int imm4L = ofs & 0xF; /* Encode lower 8 bits: 1011xxxx for unsigned, 1111xxxx for signed */ int encoded_ofs = imm4H | 0xB0 | imm4L | (signed_op << 6); return arm_encode(cond, opcode, rn, rd, encoded_ofs); } int arm_transfer(arm_cond_t cond, int l, int size, arm_reg rn, arm_reg rd, int ofs) { int opcode = 64 + 16 + 8 + l; if (size == 1) opcode += 4; if (ofs < 0) { opcode -= 8; ofs = -ofs; } return arm_encode(cond, opcode, rn, rd, ofs & 4095); } int __lw(arm_cond_t cond, arm_reg rd, arm_reg rn, int ofs) { return arm_transfer(cond, 1, 4, rn, rd, ofs); } int __lb(arm_cond_t cond, arm_reg rd, arm_reg rn, int ofs) { return arm_transfer(cond, 1, 1, rn, rd, ofs); } int __sw(arm_cond_t cond, arm_reg rd, arm_reg rn, int ofs) { return arm_transfer(cond, 0, 4, rn, rd, ofs); } int __sb(arm_cond_t cond, arm_reg rd, arm_reg rn, int ofs) { return arm_transfer(cond, 0, 1, rn, rd, ofs); } /* ARM signed halfword load (LDRSH) */ int __lh(arm_cond_t cond, arm_reg rd, arm_reg rn, int ofs) { return arm_halfword_transfer(cond, 1, rn, rd, ofs, 1); } /* ARM halfword store (STRH) */ int __sh(arm_cond_t cond, arm_reg rd, arm_reg rn, int ofs) { return arm_halfword_transfer(cond, 0, rn, rd, ofs, 0); } int __stmdb(arm_cond_t cond, int w, arm_reg rn, int reg_list) { return arm_encode(cond, arm_stmdb + (0x2 << 6) + (w << 1), rn, 0, reg_list); } int __ldm(arm_cond_t cond, int w, arm_reg rn, int reg_list) { return arm_encode(cond, arm_ldm + (0x2 << 6) + (w << 1), rn, 0, reg_list); } int __push_reg(arm_cond_t cond, arm_reg rt) { return arm_encode(cond, (0x5 << 4) | 0x2, 0xd, rt, 0x4); } int __pop_word(arm_cond_t cond, arm_reg rt) { return arm_encode(cond, (0x4 << 4) | 0x9, 0xd, rt, 0x4); } int __b(arm_cond_t cond, int ofs) { int o = (ofs - 8) >> 2; return arm_encode(cond, 160, 0, 0, 0) + (o & 16777215); } int __bl(arm_cond_t cond, int ofs) { int o = (ofs - 8) >> 2; return arm_encode(cond, 176, 0, 0, 0) + (o & 16777215); } int __bx(arm_cond_t cond, arm_reg rm) { /* BX: Branch and Exchange */ return (cond << 28) | 0x012FFF10 | rm; } int __blx(arm_cond_t cond, arm_reg rd) { return arm_encode(cond, 18, 15, 15, rd + 3888); } int __mul(arm_cond_t cond, arm_reg rd, arm_reg r1, arm_reg r2) { return arm_encode(cond, 0, rd, 0, (r1 << 8) + 144 + r2); } int __div(arm_cond_t cond, arm_reg rd, arm_reg r1, arm_reg r2) { return arm_encode(cond, 113, rd, 15, (r1 << 8) + 16 + r2); } int __rsb_i(arm_cond_t cond, arm_reg rd, int imm, arm_reg rn) { return __mov(cond, 1, arm_rsb, 0, rn, rd, imm); } int __cmp_r(arm_cond_t cond, arm_reg r1, arm_reg r2) { return __mov(cond, 0, arm_cmp, 1, r1, 0, r2); } int __cmp_i(arm_cond_t cond, arm_reg rn, int imm) { return __mov(cond, 1, arm_cmp, 1, rn, 0, imm); } int __teq(arm_reg rd) { return __mov(__AL, 1, arm_teq, 1, rd, 0, 0); } int __sxtb(arm_cond_t cond, arm_reg rd, arm_reg rm, int rotation) { if (rotation != 0 && rotation != 8 && rotation != 16 && rotation != 24) fatal("SXTB rotation must be 0, 8, 16, or 24"); return arm_encode(cond, 106, 0xF, rd, rm | ((rotation >> 3) << 10) | (0x7 << 4)); } int __sxth(arm_cond_t cond, arm_reg rd, arm_reg rm, int rotation) { if (rotation != 0 && rotation != 8 && rotation != 16 && rotation != 24) fatal("SXTH rotation must be 0, 8, 16, or 24"); return arm_encode(cond, 107, 0xF, rd, rm | ((rotation >> 3) << 10) | (0x7 << 4)); } ================================================ FILE: src/defs.h ================================================ /* * shecc - Self-Hosting and Educational C Compiler. * * shecc is freely redistributable under the BSD 2 clause license. See the * file "LICENSE" for information on usage and redistribution of this file. */ #pragma once #include /* definitions */ /* Common macro functions */ #define is_newline(c) (c == '\r' || c == '\n') /* Limitations */ #define MAX_TOKEN_LEN 256 #define MAX_ID_LEN 64 #define MAX_LINE_LEN 256 #define MAX_VAR_LEN 128 #define MAX_TYPE_LEN 32 #define MAX_PARAMS 8 #define MAX_LOCALS 1600 #define MAX_FIELDS 64 #define MAX_TYPES 256 #define MAX_LABELS 256 #define MAX_IR_INSTR 80000 #define MAX_BB_PRED 128 #define MAX_BB_DOM_SUCC 64 #define MAX_BB_RDOM_SUCC 256 #define MAX_GLOBAL_IR 256 #define MAX_CODE 262144 #define MAX_DATA 262144 #define MAX_SYMTAB 65536 #define MAX_STRTAB 65536 #define MAX_HEADER 1024 #define MAX_PROGRAM_HEADER 1024 #define MAX_SECTION 1024 #define MAX_SECTION_HEADER 1024 #define MAX_SHSTR 1024 #define MAX_INTERP 1024 #define MAX_DYNAMIC 1024 #define MAX_DYNSYM 1024 #define MAX_DYNSTR 1024 #define MAX_RELPLT 1024 #define MAX_PLT 1024 #define MAX_GOTPLT 1024 #define MAX_CONSTANTS 1024 #define MAX_CASES 128 #define MAX_NESTING 128 #define MAX_OPERAND_STACK_SIZE 32 #define MAX_ANALYSIS_STACK_SIZE 800 /* Default capacities for common data structures */ /* Arena sizes optimized based on typical usage patterns */ #define DEFAULT_ARENA_SIZE 262144 /* 256 KiB - standard default */ #define SMALL_ARENA_SIZE 65536 /* 64 KiB - for small allocations */ #define LARGE_ARENA_SIZE 524288 /* 512 KiB - for instruction arena */ #define DEFAULT_FUNCS_SIZE 64 #define DEFAULT_SRC_FILE_COUNT 8 /* Arena compaction bitmask flags for selective memory reclamation */ #define COMPACT_ARENA_BLOCK 0x01 /* BLOCK_ARENA - variables/blocks */ #define COMPACT_ARENA_INSN 0x02 /* INSN_ARENA - instructions */ #define COMPACT_ARENA_BB 0x04 /* BB_ARENA - basic blocks */ #define COMPACT_ARENA_HASHMAP 0x08 /* HASHMAP_ARENA - hash nodes */ #define COMPACT_ARENA_GENERAL 0x10 /* GENERAL_ARENA - misc allocations */ #define COMPACT_ARENA_ALL 0x1F /* All arenas */ /* Common arena compaction combinations for different compilation phases */ #define COMPACT_PHASE_PARSING (COMPACT_ARENA_BLOCK | COMPACT_ARENA_GENERAL) #define COMPACT_PHASE_SSA (COMPACT_ARENA_INSN | COMPACT_ARENA_BB) #define COMPACT_PHASE_BACKEND (COMPACT_ARENA_BB | COMPACT_ARENA_GENERAL) #define ELF_START 0x10000 #define PTR_SIZE 4 /* Number of the available registers. Either 7 or 8 is accepted now. */ #define REG_CNT 8 /* This macro will be automatically defined at shecc run-time. */ #ifdef __SHECC__ /* use do-while as a substitution for nop */ #define UNUSED(x) \ do { \ ; \ } while (0) #define HOST_PTR_SIZE 4 #else /* suppress GCC/Clang warnings */ #define UNUSED(x) (void) (x) /* configure host data model when using 'memcpy'. */ #define HOST_PTR_SIZE __SIZEOF_POINTER__ #endif #ifndef MIN_ALIGNMENT #define MIN_ALIGNMENT 8 #endif #ifndef ALIGN_UP #define ALIGN_UP(val, align) (((val) + (align) - 1) & ~((align) - 1)) #endif /* Common data structures */ typedef struct arena_block { char *memory; int capacity; int offset; struct arena_block *next; } arena_block_t; typedef struct { arena_block_t *head; int total_bytes; /* Track total allocation for profiling */ int block_size; /* Default block size for new blocks */ } arena_t; /* string-based hash map definitions */ typedef struct hashmap_node { char *key; void *val; bool occupied; } hashmap_node_t; typedef struct { int size; int cap; hashmap_node_t *table; } hashmap_t; /* lexer tokens */ typedef enum { T_start, /* FIXME: Unused, intended for lexer state machine init */ T_eof, /* end-of-file (EOF) */ T_numeric, T_identifier, T_comma, /* , */ T_string, /* null-terminated string */ T_char, T_open_bracket, /* ( */ T_close_bracket, /* ) */ T_open_curly, /* { */ T_close_curly, /* } */ T_open_square, /* [ */ T_close_square, /* ] */ T_asterisk, /* '*' */ T_divide, /* / */ T_mod, /* % */ T_bit_or, /* | */ T_bit_xor, /* ^ */ T_bit_not, /* ~ */ T_log_and, /* && */ T_log_or, /* || */ T_log_not, /* ! */ T_lt, /* < */ T_gt, /* > */ T_le, /* <= */ T_ge, /* >= */ T_lshift, /* << */ T_rshift, /* >> */ T_dot, /* . */ T_arrow, /* -> */ T_plus, /* + */ T_minus, /* - */ T_minuseq, /* -= */ T_pluseq, /* += */ T_asteriskeq, /* *= */ T_divideeq, /* /= */ T_modeq, /* %= */ T_lshifteq, /* <<= */ T_rshifteq, /* >>= */ T_xoreq, /* ^= */ T_oreq, /* |= */ T_andeq, /* &= */ T_eq, /* == */ T_noteq, /* != */ T_assign, /* = */ T_increment, /* ++ */ T_decrement, /* -- */ T_question, /* ? */ T_colon, /* : */ T_semicolon, /* ; */ T_ampersand, /* & */ T_return, T_if, T_else, T_while, T_for, T_do, T_typedef, T_enum, T_struct, T_union, T_sizeof, T_elipsis, /* ... */ T_switch, T_case, T_break, T_default, T_continue, T_goto, T_const, /* const qualifier */ /* C pre-processor directives */ T_cppd_include, T_cppd_define, T_cppd_undef, T_cppd_error, T_cppd_if, T_cppd_elif, T_cppd_else, T_cppd_endif, T_cppd_ifdef, T_cppd_ifndef, T_cppd_pragma, /* C pre-processor specific, these kinds * will be removed after pre-processing is done. */ T_newline, T_backslash, T_whitespace, T_tab } token_kind_t; /* Source location tracking for better error reporting */ typedef struct { int pos; /* raw source file position */ int len; /* length of token */ int line; int column; char *filename; } source_location_t; typedef struct token { token_kind_t kind; char *literal; source_location_t location; struct token *next; } token_t; typedef struct token_stream { token_t *head; token_t *tail; } token_stream_t; /* String pool for identifier deduplication */ typedef struct { hashmap_t *strings; /* Map string -> interned string */ } string_pool_t; /* String literal pool for deduplicating string constants */ typedef struct { hashmap_t *literals; /* Map string literal -> ELF data offset */ } string_literal_pool_t; /* builtin types */ typedef enum { TYPE_void = 0, TYPE_int, TYPE_char, TYPE_short, TYPE_struct, TYPE_union, TYPE_typedef } base_type_t; /* IR opcode */ typedef enum { /* intermediate use in front-end. No code generation */ OP_generic, OP_phi, OP_unwound_phi, /* work like address_of + store */ /* calling convention */ OP_define, /* function entry point */ OP_push, /* prepare arguments */ OP_call, /* function call */ OP_indirect, /* indirect call with function pointer */ OP_return, /* explicit return */ OP_allocat, /* allocate space on stack */ OP_assign, OP_load_constant, /* load constant */ OP_load_data_address, /* lookup address of a constant in data section */ OP_load_rodata_address, /* lookup address of a constant in rodata section */ /* control flow */ OP_branch, /* conditional jump */ OP_jump, /* unconditional jump */ OP_func_ret, /* returned value */ OP_label, /* for goto label */ /* function pointer */ OP_address_of_func, /* resolve function entry */ OP_load_func, /* prepare indirective call */ OP_global_load_func, /* memory address operations */ OP_address_of, /* lookup variable's address */ OP_global_address_of, OP_load, /* load a word from stack */ OP_global_load, OP_store, /* store a word to stack */ OP_global_store, OP_read, /* read from memory address */ OP_write, /* write to memory address */ /* arithmetic operators */ OP_add, OP_sub, OP_mul, OP_div, /* signed division */ OP_mod, /* modulo */ OP_ternary, /* ? : */ OP_lshift, OP_rshift, OP_log_and, OP_log_or, OP_log_not, OP_eq, /* equal */ OP_neq, /* not equal */ OP_lt, /* less than */ OP_leq, /* less than or equal */ OP_gt, /* greater than */ OP_geq, /* greater than or equal */ OP_bit_or, OP_bit_and, OP_bit_xor, OP_bit_not, OP_negate, /* data type conversion */ OP_trunc, OP_sign_ext, OP_cast, /* entry point of the state machine */ OP_start } opcode_t; /* variable definition */ typedef struct { int counter; int stack[64]; int stack_idx; } rename_t; typedef struct ref_block ref_block_t; struct ref_block_list { ref_block_t *head, *tail; }; typedef struct ref_block_list ref_block_list_t; typedef struct insn insn_t; typedef struct use_chain_node { insn_t *insn; struct use_chain_node *next, *prev; } use_chain_t; typedef struct var var_t; typedef struct type type_t; typedef struct var_list { int capacity; int size; var_t **elements; } var_list_t; struct var { type_t *type; char var_name[MAX_VAR_LEN]; int ptr_level; bool is_func; bool is_global; bool is_const_qualified; /* true if variable has const qualifier */ bool address_taken; /* true if variable address was taken (&var) */ int array_size; int array_dim1, array_dim2; /* first/second dimension size for 2D arrays */ int offset; /* offset from stack or frame, index 0 is reserved */ int init_val; /* for global initialization */ int liveness; /* live range */ int in_loop; struct var *base; int subscript; struct var *subscripts[128]; int subscripts_idx; rename_t rename; ref_block_list_t ref_block_list; /* blocks which kill variable */ use_chain_t *users_head, *users_tail; struct insn *last_assign; int consumed; bool is_ternary_ret; bool is_logical_ret; bool is_const; /* whether a constant representaion or not */ int vreg_id; /* Virtual register ID */ int phys_reg; /* Physical register assignment (-1 if unassigned) */ int vreg_flags; /* VReg flags */ int first_use; /* First instruction index where variable is used */ int last_use; /* Last instruction index where variable is used */ int loop_depth; /* Nesting depth if variable is in a loop */ int use_count; /* Number of times variable is used */ bool space_is_allocated; /* whether space is allocated for this variable */ /* This flag is used to indicate to the compiler that the offset of * the variable is based on the top of the local stack. */ bool ofs_based_on_stack_top; /* True when this variable was synthesized to hold a compound literal * (e.g., array or struct literal temporaries). */ bool is_compound_literal; }; typedef struct func func_t; /* block definition */ struct block { var_list_t locals; struct block *parent; func_t *func; struct block *next; }; typedef struct block block_t; typedef struct basic_block basic_block_t; /* Definition of a growable buffer for a mutable null-terminated string * @size: Current number of elements in the array * @capacity: Number of elements that can be stored without resizing * @elements: Pointer to the array of characters */ typedef struct { int size; int capacity; char *elements; } strbuf_t; /* phase-2 IR definition */ struct ph2_ir { opcode_t op; int src0; int src1; int dest; char func_name[MAX_VAR_LEN]; basic_block_t *next_bb; basic_block_t *then_bb; basic_block_t *else_bb; struct ph2_ir *next; bool is_branch_detached; /* When an instruction uses a variable that its offset is based on * the top of the stack, this instruction's flag is also set to * indicate the compiler to recalculate the offset after the function's * stack size has been determined. * * Currently, only OP_load, OP_store and OP_address_of need this flag * to recompute the offset. */ bool ofs_based_on_stack_top; }; typedef struct ph2_ir ph2_ir_t; /* type definition */ struct type { char type_name[MAX_TYPE_LEN]; base_type_t base_type; struct type *base_struct; int size; var_t fields[MAX_FIELDS]; int num_fields; int ptr_level; /* pointer level for typedef pointer types */ }; /* lvalue details */ typedef struct { int size; int ptr_level; bool is_func; bool is_reference; type_t *type; } lvalue_t; /* constants for enums */ typedef struct { char alias[MAX_VAR_LEN]; int value; } constant_t; struct phi_operand { var_t *var; basic_block_t *from; struct phi_operand *next; }; typedef struct phi_operand phi_operand_t; struct insn { struct insn *next, *prev; int idx; opcode_t opcode; var_t *rd; var_t *rs1; var_t *rs2; int sz; bool useful; /* Used in DCE process. Set true if instruction is useful. */ basic_block_t *belong_to; phi_operand_t *phi_ops; char str[64]; }; typedef struct { insn_t *head, *tail; } insn_list_t; typedef struct { ph2_ir_t *head, *tail; } ph2_ir_list_t; typedef enum { NEXT, ELSE, THEN } bb_connection_type_t; typedef struct { basic_block_t *bb; bb_connection_type_t type; } bb_connection_t; struct symbol { var_t *var; int index; struct symbol *next; }; typedef struct symbol symbol_t; typedef struct { symbol_t *head, *tail; } symbol_list_t; struct basic_block { insn_list_t insn_list; ph2_ir_list_t ph2_ir_list; bb_connection_t prev[MAX_BB_PRED]; /* Used in instruction dumping when ir_dump is enabled. */ char bb_label_name[MAX_VAR_LEN]; struct basic_block *next; /* normal BB */ struct basic_block *then_; /* conditional BB */ struct basic_block *else_; struct basic_block *idom; struct basic_block *r_idom; struct basic_block *rpo_next; struct basic_block *rpo_r_next; var_list_t live_gen; var_list_t live_kill; var_list_t live_in; var_list_t live_out; int rpo; int rpo_r; struct basic_block *DF[64]; struct basic_block *RDF[64]; int df_idx; int rdf_idx; int visited; bool useful; /* indicate whether this BB contains useful instructions */ struct basic_block *dom_next[64]; struct basic_block *dom_prev; struct basic_block *rdom_next[256]; struct basic_block *rdom_prev; func_t *belong_to; block_t *scope; symbol_list_t symbol_list; /* variable declaration */ int elf_offset; }; struct ref_block { basic_block_t *bb; struct ref_block *next; }; /* Syntactic representation of func, combines syntactic details (e.g., return * type, parameters) with SSA-related information (e.g., basic blocks, control * flow) to support parsing, analysis, optimization, and code generation. */ typedef struct { char label_name[MAX_ID_LEN]; basic_block_t *bb; bool used; } label_t; struct func { /* Syntatic info */ var_t return_def; var_t param_defs[MAX_PARAMS]; int num_params; int va_args; int stack_size; /* SSA info */ basic_block_t *bbs; basic_block_t *exit; symbol_list_t global_sym_list; int bb_cnt; int visited; /* Information used for dynamic linking */ bool is_used; int plt_offset, got_offset; struct func *next; }; typedef struct { func_t *head, *tail; } func_list_t; typedef struct { func_t *func; basic_block_t *bb; void (*preorder_cb)(func_t *, basic_block_t *); void (*postorder_cb)(func_t *, basic_block_t *); } bb_traversal_args_t; typedef struct { var_t *var; int polluted; } regfile_t; /* ELF header */ typedef struct { char e_ident[16]; char e_type[2]; char e_machine[2]; int e_version; int e_entry; int e_phoff; int e_shoff; int e_flags; char e_ehsize[2]; char e_phentsize[2]; char e_phnum[2]; char e_shentsize[2]; char e_shnum[2]; char e_shstrndx[2]; } elf32_hdr_t; /* ELF program header */ typedef struct { int p_type; int p_offset; int p_vaddr; int p_paddr; int p_filesz; int p_memsz; int p_flags; int p_align; } elf32_phdr_t; /* ELF section header */ typedef struct { int sh_name; int sh_type; int sh_flags; int sh_addr; int sh_offset; int sh_size; int sh_link; int sh_info; int sh_addralign; int sh_entsize; } elf32_shdr_t; /* Structures for dynamic linked program */ /* ELF buffers for dynamic sections */ typedef struct { strbuf_t *elf_interp; strbuf_t *elf_dynamic; strbuf_t *elf_dynsym; strbuf_t *elf_dynstr; strbuf_t *elf_relplt; strbuf_t *elf_plt; strbuf_t *elf_got; int elf_interp_start; int elf_relplt_start; int elf_plt_start; int elf_got_start; int relplt_size; int plt_size; int got_size; } dynamic_sections_t; /* For .dynsym section. */ typedef struct { int st_name; int st_value; int st_size; char st_info; char st_other; char st_shndx[2]; } elf32_sym_t; /* For .rel.plt section */ typedef struct { int r_offset; int r_info; } elf32_rel_t; /* For .dynamic section */ typedef struct { int d_tag; int d_un; } elf32_dyn_t; #define ELF32_ST_INFO(b, t) (((b) << 4) + ((t) & 0xf)) ================================================ FILE: src/elf.c ================================================ /* * shecc - Self-Hosting and Educational C Compiler. * * shecc is freely redistributable under the BSD 2 clause license. See the * file "LICENSE" for information on usage and redistribution of this file. */ /* ELF file manipulation */ #include "../config" #include "defs.h" #include "globals.c" #ifndef PAGESIZE #define PAGESIZE 4096 #endif int elf_symbol_index = 0; void elf_write_str(strbuf_t *elf_array, const char *vals) { /* Note that strbuf_puts() does not push the null character. * * If necessary, use elf_write_byte() to append the null character * after calling elf_write_str(). */ if (!elf_array || !vals) return; strbuf_puts(elf_array, vals); } void elf_write_byte(strbuf_t *elf_array, int val) { if (!elf_array) return; strbuf_putc(elf_array, val); } char e_extract_byte(int v, int b) { return (char) ((v >> (b << 3)) & 0xFF); } void elf_write_int(strbuf_t *elf_array, int val) { if (!elf_array) return; for (int i = 0; i < 4; i++) strbuf_putc(elf_array, e_extract_byte(val, i)); } void elf_write_blk(strbuf_t *elf_array, void *blk, int sz) { if (!elf_array || !blk || sz <= 0) return; char *ptr = blk; for (int i = 0; i < sz; i++) strbuf_putc(elf_array, ptr[i]); } void elf_generate_header(void) { /* Check for null pointers to prevent crashes */ if (!elf_code || !elf_data || !elf_symtab || !elf_strtab || !elf_header) { fatal("ELF buffers not initialized"); return; } elf32_hdr_t hdr; int phnum, shnum, shstrndx, shoff; if (dynlink) { /* In dynamic linking mode: * - number of program headers = 4 * - number of section headers = 15 * - section header index of .shstrtab = 14 */ phnum = 4; shnum = 15; shstrndx = 14; shoff = elf_header_len + elf_code->size + elf_data->size + elf_rodata->size + elf_symtab->size + elf_strtab->size + elf_shstrtab->size + dynamic_sections.elf_interp->size + dynamic_sections.elf_relplt->size + dynamic_sections.elf_plt->size + dynamic_sections.elf_got->size + dynamic_sections.elf_dynstr->size + dynamic_sections.elf_dynsym->size + dynamic_sections.elf_dynamic->size; } else { /* In static linking mode: * - number of program headers = 2 * - number of section headers = 8 * - section header index of .shstrtab = 7 */ phnum = 2; shnum = 8; shstrndx = 7; shoff = elf_header_len + elf_code->size + elf_data->size + elf_rodata->size + elf_symtab->size + elf_strtab->size + elf_shstrtab->size; } /* The following table explains the meaning of each field in the * ELF32 file header. * * Notice that the following values are hexadecimal. * * | File | | * & | Header bytes | Explanation | * ---+----------------+-------------------------------------------------+ * 00 | 7F 45 4C 46 | e_ident[0] - e_ident[3]: ELF magic number. | * | 01 | e_ident[4]: 1 -> 32-bit, 2 -> 64-bit. | * | 01 | e_ident[5]: 1 -> little-endian. 2 -> big-endian.| * | 01 | e_ident[6]: 1 -> ELF header version; must be 1. | * | 00 | e_ident[7]: Target OS ABI; be 1 for Linux. | * | 00 | e_ident[8]: ABI version; should be 1 for Linux. | * | 00 00 00 | e_ident[9] - e_ident[16]: Padding; Unused; | * | 00 00 00 00 | should be 0. | * ---+----------------+-------------------------------------------------+ * | 02 00 | e_type: Object file type; 2 -> executable | * | 28 00 | e_machine: Instruction Set Architecture. | * | | 0x28 -> ARMv7 | * | | 0xF3 -> RISC-V | * | 01 00 00 00 | e_version: ELF identification version; | * | | must be 1. | * | 54 00 01 00 | e_entry: Memory address of entry point. | * | | (where process starts). | * | 34 00 00 00 | e_phoff: File offset of program headers. | * | | 0x34 -> 32-bit, 0x40 -> 64-bit. | * | d7 8a 03 00 | e_shoff: File offset of section headers. | * ---+----------------+-------------------------------------------------+ * | 00 02 00 50 | e_flags: 0x50000200 -> ARM Version5 EABI, | * | | soft-float ABI | * | | 0x00000000 -> RISC-V | * | 34 00 | e_ehsize: Size of this header. | * | | 0x34 -> 32-bit, 0x40 -> 64-bit. | * | 20 00 | e_phentsize: Size of each program header. | * | | 0x20 -> 32-bit, 0x38 -> 64-bit. | * | 01 00 | e_phnum: Number of program headers. | * | 28 00 | e_shentsize: Size of each section header. | * | | 0x28 -> 32-bit, 0x40 -> 64-bit. | * | 06 00 | e_shnum: Number of section headers. | * | 05 00 | e_shstrndx: Index of section header containing | * | | section names. | * ---+----------------+-------------------------------------------------+ * 34 | | | */ /* ELF file header */ hdr.e_ident[0] = (char) 0x7F; /* ELF magic number */ hdr.e_ident[1] = 'E'; hdr.e_ident[2] = 'L'; hdr.e_ident[3] = 'F'; hdr.e_ident[4] = 1; /* 32-bit */ hdr.e_ident[5] = 1; /* little-endian */ hdr.e_ident[6] = 1; /* ELF header version */ hdr.e_ident[7] = 0; /* Target OS ABI */ hdr.e_ident[8] = 0; /* ABI version */ hdr.e_ident[9] = 0; /* Padding */ hdr.e_ident[10] = 0; hdr.e_ident[11] = 0; hdr.e_ident[12] = 0; hdr.e_ident[13] = 0; hdr.e_ident[14] = 0; hdr.e_ident[15] = 0; hdr.e_type[0] = 2; /* Object file type */ hdr.e_type[1] = 0; hdr.e_machine[0] = ELF_MACHINE; /* Instruction Set Architecture */ hdr.e_machine[1] = 0; hdr.e_version = 1; /* ELF version */ hdr.e_entry = elf_code_start; /* entry point */ hdr.e_phoff = sizeof(elf32_hdr_t); /* program header offset */ hdr.e_shoff = shoff; /* section header offset */ hdr.e_flags = ELF_FLAGS; /* flags */ hdr.e_ehsize[0] = sizeof(elf32_hdr_t); /* header size */ hdr.e_ehsize[1] = 0; hdr.e_phentsize[0] = sizeof(elf32_phdr_t); /* program header size */ hdr.e_phentsize[1] = 0; hdr.e_phnum[0] = phnum; /* number of program headers */ hdr.e_phnum[1] = 0; hdr.e_shentsize[0] = sizeof(elf32_shdr_t); /* section header size */ hdr.e_shentsize[1] = 0; hdr.e_shnum[0] = shnum; /* number of section headers */ hdr.e_shnum[1] = 0; hdr.e_shstrndx[0] = shstrndx; /* section index with names */ hdr.e_shstrndx[1] = 0; elf_write_blk(elf_header, &hdr, sizeof(elf32_hdr_t)); } void elf_generate_program_headers(void) { if (!elf_program_header || !elf_code || !elf_data || !elf_rodata || (dynlink && (!dynamic_sections.elf_interp || !dynamic_sections.elf_relplt || !dynamic_sections.elf_plt || !dynamic_sections.elf_got || !dynamic_sections.elf_dynstr || !dynamic_sections.elf_dynsym || !dynamic_sections.elf_dynamic))) { fatal("ELF section buffers not initialized"); return; } elf32_phdr_t phdr; /* Explain the meaning of each field in the ELF32 program header. * * | Program | | * & | Header bytes | Explanation | * ---+----------------+-------------------------------------------------+ * 34 | 01 00 00 00 | p_type: Segment type; 1 -> loadable. | * | 54 00 00 00 | p_offset: Offset of segment in the file. | * | 54 00 01 00 | p_vaddr: Virtual address of loaded segment. | * | 54 00 01 00 | p_paddr: Only used on systems where physical | * | | address is relevant. | * | 48 8a 03 00 | p_filesz: Size of the segment in the file image.| * | 48 8a 03 00 | p_memsz: Size of the segment in memory. | * | | This value should be greater than or | * | | equal to p_filesz. | * | 07 00 00 00 | p_flags: Segment-wise permissions; | * | | 0x1 -> execute, 0x2 -> write, | * | | 0x4 -> read | * | 04 00 00 00 | p_align: Align segment to the specified value. | * ---+----------------+-------------------------------------------------+ * 54 | | | */ /* program header - read-only segment */ phdr.p_type = 1; /* PT_LOAD */ phdr.p_offset = 0; /* offset of segment */ phdr.p_vaddr = ELF_START; /* virtual address */ phdr.p_paddr = ELF_START; /* physical address */ phdr.p_filesz = elf_header_len + elf_code->size + elf_rodata->size; /* size in file */ phdr.p_memsz = elf_header_len + elf_code->size + elf_rodata->size; /* size in memory */ phdr.p_flags = 5; /* flags */ phdr.p_align = PAGESIZE; /* alignment */ if (dynlink) { phdr.p_filesz += dynamic_sections.elf_relplt->size + dynamic_sections.elf_plt->size; phdr.p_memsz += dynamic_sections.elf_relplt->size + dynamic_sections.elf_plt->size; } elf_write_blk(elf_program_header, &phdr, sizeof(elf32_phdr_t)); /* program header - readable and writable segment */ phdr.p_type = 1; /* PT_LOAD */ phdr.p_offset = elf_header_len + elf_code->size + elf_rodata->size; /* offset of segment */ phdr.p_vaddr = elf_data_start; /* virtual address */ phdr.p_paddr = elf_data_start; /* physical address */ phdr.p_filesz = elf_data->size; /* size in file */ phdr.p_memsz = elf_data->size + elf_bss_size; /* size in memory */ phdr.p_flags = 6; /* flags */ phdr.p_align = PAGESIZE; /* alignment */ if (dynlink) { phdr.p_offset += dynamic_sections.elf_relplt->size + dynamic_sections.elf_plt->size; phdr.p_vaddr = dynamic_sections.elf_interp_start; phdr.p_paddr = dynamic_sections.elf_interp_start; phdr.p_filesz += dynamic_sections.elf_interp->size + dynamic_sections.elf_got->size + dynamic_sections.elf_dynstr->size + dynamic_sections.elf_dynsym->size + dynamic_sections.elf_dynamic->size; phdr.p_memsz += dynamic_sections.elf_interp->size + dynamic_sections.elf_got->size + dynamic_sections.elf_dynstr->size + dynamic_sections.elf_dynsym->size + dynamic_sections.elf_dynamic->size; } elf_write_blk(elf_program_header, &phdr, sizeof(elf32_phdr_t)); if (dynlink) { /* program header - program interpreter (.interp section) */ phdr.p_type = 3; /* PT_INTERP */ phdr.p_offset = elf_header_len + elf_code->size + elf_rodata->size + dynamic_sections.elf_relplt->size + dynamic_sections.elf_plt->size; /* offset of segment */ phdr.p_vaddr = dynamic_sections.elf_interp_start; /* virtual address */ phdr.p_paddr = dynamic_sections.elf_interp_start; /* physical address */ phdr.p_filesz = strlen(DYN_LINKER) + 1; /* size in file */ phdr.p_memsz = strlen(DYN_LINKER) + 1; /* size in memory */ phdr.p_flags = 4; /* flags */ phdr.p_align = 1; /* alignment */ elf_write_blk(elf_program_header, &phdr, sizeof(elf32_phdr_t)); /* program header - .dynamic section */ phdr.p_type = 2; /* PT_DYNAMIC */ phdr.p_offset = elf_header_len + elf_code->size + elf_rodata->size + dynamic_sections.elf_relplt->size + dynamic_sections.elf_plt->size + dynamic_sections.elf_interp->size + dynamic_sections.elf_got->size + dynamic_sections.elf_dynstr->size + dynamic_sections.elf_dynsym->size; /* offset of segment */ phdr.p_vaddr = dynamic_sections.elf_got_start + dynamic_sections.elf_got->size + dynamic_sections.elf_dynstr->size + dynamic_sections.elf_dynsym->size; /* virtual address */ phdr.p_paddr = dynamic_sections.elf_got_start + dynamic_sections.elf_got->size + dynamic_sections.elf_dynstr->size + dynamic_sections.elf_dynsym->size; /* physical address */ phdr.p_filesz = dynamic_sections.elf_dynamic->size; /* size in file */ phdr.p_memsz = dynamic_sections.elf_dynamic->size; /* size in memory */ phdr.p_flags = 6; /* flags */ phdr.p_align = 4; /* alignment */ elf_write_blk(elf_program_header, &phdr, sizeof(elf32_phdr_t)); } } void elf_generate_section_headers(void) { /* Check for null pointers to prevent crashes */ if (!elf_section_header || !elf_code || !elf_data || !elf_rodata || !elf_symtab || !elf_strtab || !elf_shstrtab || (dynlink && (!dynamic_sections.elf_interp || !dynamic_sections.elf_relplt || !dynamic_sections.elf_plt || !dynamic_sections.elf_got || !dynamic_sections.elf_dynstr || !dynamic_sections.elf_dynsym || !dynamic_sections.elf_dynamic))) { fatal("ELF section buffers not initialized"); return; } /* section header table */ elf32_shdr_t shdr; int ofs = elf_header_len, sh_name = 0; /* * The following table uses the text section header as an example * to explain the ELF32 section header. * * | Section | | * & | Header bytes | Explanation | * ---+----------------+-------------------------------------------------+ * | 0b 00 00 00 | sh_name: Name of the section. Giving the | * | | location of a null-terminated string. | * | 01 00 00 00 | sh_type: Type of the section's contents | * | | and semantics. | * | | 1 -> holds the program-defined | * | | information | * | 07 00 00 00 | sh_flags: Miscellaneous attributes. | * | | 0x1 -> writable, 0x2 -> allocatable | * | | 0x4 -> executable. | * | 54 00 01 00 | sh_addr: Starting address of the section | * | | in the memory image of a process. | * | 54 00 00 00 | sh_offset: Offset of the section in the file. | * | 0b 30 03 00 | sh_size: Size of the section. | * | 00 00 00 00 | sh_link: Section header table index link. | * | 00 00 00 00 | sh_info: Extra information. | * | 04 00 00 00 | sh_addralign: Address alignment constraints. | * | 00 00 00 00 | sh_entsize: Size of each entry. | * ---+----------------+-------------------------------------------------+ * | | | */ /* NULL section */ shdr.sh_name = sh_name; shdr.sh_type = 0; shdr.sh_flags = 0; shdr.sh_addr = 0; shdr.sh_offset = 0; shdr.sh_size = 0; shdr.sh_link = 0; shdr.sh_info = 0; shdr.sh_addralign = 0; shdr.sh_entsize = 0; elf_write_blk(elf_section_header, &shdr, sizeof(elf32_shdr_t)); sh_name += 1; /* .text */ shdr.sh_name = sh_name; shdr.sh_type = 1; shdr.sh_flags = 7; shdr.sh_addr = elf_code_start; shdr.sh_offset = ofs; shdr.sh_size = elf_code->size; shdr.sh_link = 0; shdr.sh_info = 0; shdr.sh_addralign = 4; shdr.sh_entsize = 0; elf_write_blk(elf_section_header, &shdr, sizeof(elf32_shdr_t)); ofs += elf_code->size; sh_name += strlen(".text") + 1; /* .rodata */ shdr.sh_name = sh_name; /* Offset in shstrtab for ".rodata" */ shdr.sh_type = 1; /* SHT_PROGBITS */ shdr.sh_flags = 2; /* SHF_ALLOC only (read-only) */ shdr.sh_addr = elf_rodata_start; shdr.sh_offset = ofs; shdr.sh_size = elf_rodata->size; shdr.sh_link = 0; shdr.sh_info = 0; shdr.sh_addralign = 4; shdr.sh_entsize = 0; elf_write_blk(elf_section_header, &shdr, sizeof(elf32_shdr_t)); ofs += elf_rodata->size; sh_name += strlen(".rodata") + 1; if (dynlink) { /* .rel.plt */ shdr.sh_name = sh_name; shdr.sh_type = 9; /* SHT_REL */ shdr.sh_flags = 0x42; /* 0x40 | SHF_ALLOC */ shdr.sh_addr = dynamic_sections.elf_relplt_start; shdr.sh_offset = ofs; shdr.sh_size = dynamic_sections.elf_relplt->size; shdr.sh_link = 8; /* The section header index of .dynsym. */ shdr.sh_info = 6; /* The section header index of .got. */ shdr.sh_addralign = 4; shdr.sh_entsize = sizeof(elf32_rel_t); elf_write_blk(elf_section_header, &shdr, sizeof(elf32_shdr_t)); ofs += dynamic_sections.elf_relplt->size; sh_name += strlen(".rel.plt") + 1; /* .plt */ shdr.sh_name = sh_name; shdr.sh_type = 1; shdr.sh_flags = 0x6; shdr.sh_addr = dynamic_sections.elf_plt_start; shdr.sh_offset = ofs; shdr.sh_size = dynamic_sections.elf_plt->size; shdr.sh_link = 0; shdr.sh_info = 0; shdr.sh_addralign = 4; shdr.sh_entsize = 4; elf_write_blk(elf_section_header, &shdr, sizeof(elf32_shdr_t)); ofs += dynamic_sections.elf_plt->size; sh_name += strlen(".plt") + 1; /* .interp */ shdr.sh_name = sh_name; shdr.sh_type = 1; shdr.sh_flags = 0x2; shdr.sh_addr = dynamic_sections.elf_interp_start; shdr.sh_offset = ofs; shdr.sh_size = strlen(DYN_LINKER) + 1; shdr.sh_link = 0; shdr.sh_info = 0; shdr.sh_addralign = 1; shdr.sh_entsize = 0; elf_write_blk(elf_section_header, &shdr, sizeof(elf32_shdr_t)); ofs += dynamic_sections.elf_interp->size; sh_name += strlen(".interp") + 1; /* .got */ shdr.sh_name = sh_name; shdr.sh_type = 1; shdr.sh_flags = 0x3; shdr.sh_addr = dynamic_sections.elf_got_start; shdr.sh_offset = ofs; shdr.sh_size = dynamic_sections.elf_got->size; shdr.sh_link = 0; shdr.sh_info = 0; shdr.sh_addralign = 4; shdr.sh_entsize = PTR_SIZE; elf_write_blk(elf_section_header, &shdr, sizeof(elf32_shdr_t)); ofs += dynamic_sections.elf_got->size; sh_name += strlen(".got") + 1; /* .dynstr */ shdr.sh_name = sh_name; shdr.sh_type = 3; shdr.sh_flags = 0x2; shdr.sh_addr = dynamic_sections.elf_got_start + dynamic_sections.elf_got->size; shdr.sh_offset = ofs; shdr.sh_size = dynamic_sections.elf_dynstr->size; shdr.sh_link = 0; shdr.sh_info = 0; shdr.sh_addralign = 1; shdr.sh_entsize = 0; elf_write_blk(elf_section_header, &shdr, sizeof(elf32_shdr_t)); ofs += dynamic_sections.elf_dynstr->size; sh_name += strlen(".dynstr") + 1; /* .dynsym */ shdr.sh_name = sh_name; shdr.sh_type = 11; shdr.sh_flags = 0x2; shdr.sh_addr = dynamic_sections.elf_got_start + dynamic_sections.elf_got->size + dynamic_sections.elf_dynstr->size; shdr.sh_offset = ofs; shdr.sh_size = dynamic_sections.elf_dynsym->size; shdr.sh_link = 7; /* The section header index of .dynstr. */ shdr.sh_info = 1; /* The index of the first non-local symbol. */ shdr.sh_addralign = 4; shdr.sh_entsize = sizeof(elf32_sym_t); elf_write_blk(elf_section_header, &shdr, sizeof(elf32_shdr_t)); ofs += dynamic_sections.elf_dynsym->size; sh_name += strlen(".dynsym") + 1; /* .dynamic */ shdr.sh_name = sh_name; shdr.sh_type = 6; shdr.sh_flags = 0x3; shdr.sh_addr = dynamic_sections.elf_got_start + dynamic_sections.elf_got->size + dynamic_sections.elf_dynstr->size + dynamic_sections.elf_dynsym->size; shdr.sh_offset = ofs; shdr.sh_size = dynamic_sections.elf_dynamic->size; shdr.sh_link = 7; /* The section header index of .dynstr. */ shdr.sh_info = 0; shdr.sh_addralign = 4; shdr.sh_entsize = 0; elf_write_blk(elf_section_header, &shdr, sizeof(elf32_shdr_t)); ofs += dynamic_sections.elf_dynamic->size; sh_name += strlen(".dynamic") + 1; } /* .data */ shdr.sh_name = sh_name; shdr.sh_type = 1; shdr.sh_flags = 3; shdr.sh_addr = elf_data_start; shdr.sh_offset = ofs; shdr.sh_size = elf_data->size; shdr.sh_link = 0; shdr.sh_info = 0; shdr.sh_addralign = 4; shdr.sh_entsize = 0; elf_write_blk(elf_section_header, &shdr, sizeof(elf32_shdr_t)); ofs += elf_data->size; sh_name += strlen(".data") + 1; /* .bss */ shdr.sh_name = sh_name; /* Offset in shstrtab for ".bss" */ shdr.sh_type = 8; /* SHT_NOBITS */ shdr.sh_flags = 3; /* SHF_ALLOC | SHF_WRITE */ shdr.sh_addr = elf_bss_start; shdr.sh_offset = ofs; /* File offset (not actually used for NOBITS) */ shdr.sh_size = elf_bss_size; shdr.sh_link = 0; shdr.sh_info = 0; shdr.sh_addralign = 4; shdr.sh_entsize = 0; elf_write_blk(elf_section_header, &shdr, sizeof(elf32_shdr_t)); sh_name += strlen(".bss") + 1; /* Note: .bss is not written to file (SHT_NOBITS) */ /* .symtab */ shdr.sh_name = sh_name; shdr.sh_type = 2; shdr.sh_flags = 0; shdr.sh_addr = 0; shdr.sh_offset = ofs; shdr.sh_size = elf_symtab->size; shdr.sh_link = dynlink ? 13 : 6; /* Link to .strtab */ shdr.sh_info = elf_symbol_index; shdr.sh_addralign = 4; shdr.sh_entsize = 16; elf_write_blk(elf_section_header, &shdr, sizeof(elf32_shdr_t)); ofs += elf_symtab->size; sh_name += strlen(".symtab") + 1; /* .strtab */ shdr.sh_name = sh_name; shdr.sh_type = 3; shdr.sh_flags = 0; shdr.sh_addr = 0; shdr.sh_offset = ofs; shdr.sh_size = elf_strtab->size; shdr.sh_link = 0; shdr.sh_info = 0; shdr.sh_addralign = 1; shdr.sh_entsize = 0; elf_write_blk(elf_section_header, &shdr, sizeof(elf32_shdr_t)); ofs += elf_strtab->size; sh_name += strlen(".strtab") + 1; /* .shstr */ shdr.sh_name = sh_name; shdr.sh_type = 3; shdr.sh_flags = 0; shdr.sh_addr = 0; shdr.sh_offset = ofs; shdr.sh_size = elf_shstrtab->size; shdr.sh_link = 0; shdr.sh_info = 0; shdr.sh_addralign = 1; shdr.sh_entsize = 0; elf_write_blk(elf_section_header, &shdr, sizeof(elf32_shdr_t)); sh_name += strlen(".shstrtab") + 1; } void elf_align(strbuf_t *elf_array) { /* Check for null pointers to prevent crashes */ if (!elf_array) { fatal("ELF buffers not initialized for alignment"); return; } while (elf_array->size & 3) elf_write_byte(elf_array, 0); } void elf_generate_sections(void) { if (!elf_shstrtab || (dynlink && (!dynamic_sections.elf_interp || !dynamic_sections.elf_relplt || !dynamic_sections.elf_plt || !dynamic_sections.elf_got || !dynamic_sections.elf_dynstr || !dynamic_sections.elf_dynsym || !dynamic_sections.elf_dynamic))) { fatal("ELF section buffers not initialized"); return; } if (dynlink) { /* In dynamic linking mode, elf_generate_sections() also generates * .interp, .dynsym, .dynstr, .relplt, .got and dynamic sections. * * .plt section is generated at the code generation phase. * * TODO: * Define a new structure named 'elf32_rela_t' and use it to generate * relocation entries for RISC-V architecture. */ elf32_sym_t sym; elf32_dyn_t dyn; elf32_rel_t rel; int dymsym_idx = 1, func_plt_ofs, func_got_ofs, st_name = 0; memset(&sym, 0, sizeof(elf32_sym_t)); memset(&dyn, 0, sizeof(elf32_dyn_t)); memset(&rel, 0, sizeof(elf32_rel_t)); /* .interp section */ elf_write_str(dynamic_sections.elf_interp, DYN_LINKER); elf_write_byte(dynamic_sections.elf_interp, 0); elf_align(dynamic_sections.elf_interp); /* Add first symbol table entry (STN_UNDEF) to .dynsym section. */ elf_write_blk(dynamic_sections.elf_dynsym, &sym, sizeof(elf32_sym_t)); /* Add first NULL byte to .dynstr section. */ elf_write_byte(dynamic_sections.elf_dynstr, 0); st_name += 1; /* Add "libc.so.6" to .dynstr section. */ elf_write_str(dynamic_sections.elf_dynstr, LIBC_SO); elf_write_byte(dynamic_sections.elf_dynstr, 0); st_name += strlen(LIBC_SO) + 1; /* Perform the following steps for each external function. * - Add a new PLT relocation entry to .relplt section. * - Add a new dynamic symbol entry to .dynsym section. * - Append the external function name to .dynstr section. * - Set plt_offset and got_offset for the external function. * * Since __libc_start_main is not added to the function list, * it must be handled additionally first. */ rel.r_offset = dynamic_sections.elf_got_start + PTR_SIZE * 3; rel.r_info = (dymsym_idx << 8) | R_ARCH_JUMP_SLOT; elf_write_blk(dynamic_sections.elf_relplt, &rel, sizeof(elf32_rel_t)); sym.st_name = st_name; sym.st_info = ELF32_ST_INFO(1, 2); /* STB_GLOBAL = 1, STT_FUNC = 2 */ elf_write_blk(dynamic_sections.elf_dynsym, &sym, sizeof(elf32_sym_t)); dymsym_idx += 1; elf_write_str(dynamic_sections.elf_dynstr, "__libc_start_main"); elf_write_byte(dynamic_sections.elf_dynstr, 0); st_name += strlen("__libc_start_main") + 1; /* Because PLT[1] and GOT[3] are reserved for __libc_start_main, * its plt_offset and got_offset must be PLT_FIXUP_SIZE and * PTR_SIZE * 3, respectively. Therefore, no offset assignment is * required for this function. */ func_plt_ofs = PLT_FIXUP_SIZE + PLT_ENT_SIZE; func_got_ofs = PTR_SIZE << 2; for (func_t *func = FUNC_LIST.head; func; func = func->next) { if (func->is_used && !func->bbs) { rel.r_offset += PTR_SIZE; rel.r_info = (dymsym_idx << 8) | R_ARCH_JUMP_SLOT; elf_write_blk(dynamic_sections.elf_relplt, &rel, sizeof(elf32_rel_t)); sym.st_name = st_name; sym.st_info = ELF32_ST_INFO(1, 2); /* STB_GLOBAL = 1, STT_FUNC = 2 */ elf_write_blk(dynamic_sections.elf_dynsym, &sym, sizeof(elf32_sym_t)); dymsym_idx += 1; elf_write_str(dynamic_sections.elf_dynstr, func->return_def.var_name); elf_write_byte(dynamic_sections.elf_dynstr, 0); st_name += strlen(func->return_def.var_name) + 1; func->plt_offset = func_plt_ofs; func->got_offset = func_got_ofs; func_plt_ofs += PLT_ENT_SIZE; func_got_ofs += PTR_SIZE; } } /* Ensure proper alignment for .dynstr section. */ elf_align(dynamic_sections.elf_dynstr); /* .got section * * - GOT[0] holds the virtual address of .dynamic section. * - GOT[1] and GOT[2] are reserved for link_map and resolver, * and are initialized to 0. * - The remaining entries are initialized to &PLT[0]. */ elf_write_int(dynamic_sections.elf_got, dynamic_sections.elf_got_start + dynamic_sections.got_size + dynamic_sections.elf_dynstr->size + dynamic_sections.elf_dynsym->size); elf_write_int(dynamic_sections.elf_got, 0); elf_write_int(dynamic_sections.elf_got, 0); for (int i = PTR_SIZE * 3; i < dynamic_sections.got_size; i += PTR_SIZE) elf_write_int(dynamic_sections.elf_got, dynamic_sections.elf_plt_start); /* .dynamic section */ dyn.d_tag = 0x5; /* STRTAB */ dyn.d_un = dynamic_sections.elf_got_start + dynamic_sections.got_size; /* The virtual address of .dynstr. */ elf_write_blk(dynamic_sections.elf_dynamic, &dyn, sizeof(elf32_dyn_t)); dyn.d_tag = 0xa; /* STRSZ */ dyn.d_un = dynamic_sections.elf_dynstr->size; elf_write_blk(dynamic_sections.elf_dynamic, &dyn, sizeof(elf32_dyn_t)); dyn.d_tag = 0x6; /* SYMTAB */ dyn.d_un = dynamic_sections.elf_got_start + dynamic_sections.got_size + dynamic_sections.elf_dynstr ->size; /* The virtual address of .dynsym. */ elf_write_blk(dynamic_sections.elf_dynamic, &dyn, sizeof(elf32_dyn_t)); dyn.d_tag = 0xb; /* SYMENT */ dyn.d_un = sizeof(elf32_sym_t); /* Size of an entry. */ elf_write_blk(dynamic_sections.elf_dynamic, &dyn, sizeof(elf32_dyn_t)); dyn.d_tag = 0x11; /* REL */ dyn.d_un = dynamic_sections .elf_relplt_start; /* The virtual address of .rel.plt. */ elf_write_blk(dynamic_sections.elf_dynamic, &dyn, sizeof(elf32_dyn_t)); dyn.d_tag = 0x12; /* RELSZ */ dyn.d_un = dynamic_sections.relplt_size; elf_write_blk(dynamic_sections.elf_dynamic, &dyn, sizeof(elf32_dyn_t)); dyn.d_tag = 0x13; /* RELENT */ dyn.d_un = sizeof(elf32_rel_t); elf_write_blk(dynamic_sections.elf_dynamic, &dyn, sizeof(elf32_dyn_t)); dyn.d_tag = 0x3; /* PLTGOT */ dyn.d_un = dynamic_sections.elf_got_start; /* The virtual address of .got.*/ elf_write_blk(dynamic_sections.elf_dynamic, &dyn, sizeof(elf32_dyn_t)); dyn.d_tag = 0x2; /* PLTRELSZ */ dyn.d_un = dynamic_sections.relplt_size; elf_write_blk(dynamic_sections.elf_dynamic, &dyn, sizeof(elf32_dyn_t)); dyn.d_tag = 0x14; /* PLTREL */ dyn.d_un = 0x11; elf_write_blk(dynamic_sections.elf_dynamic, &dyn, sizeof(elf32_dyn_t)); dyn.d_tag = 0x17; /* JMPREL */ dyn.d_un = dynamic_sections .elf_relplt_start; /* The virtual address of .rel.plt. */ elf_write_blk(dynamic_sections.elf_dynamic, &dyn, sizeof(elf32_dyn_t)); dyn.d_tag = 0x1; /* NEEDED */ dyn.d_un = 0x1; /* The index of "libc.so.6" in .dynstr. */ elf_write_blk(dynamic_sections.elf_dynamic, &dyn, sizeof(elf32_dyn_t)); dyn.d_tag = 0x0; /* NULL */ dyn.d_un = 0x0; elf_write_blk(dynamic_sections.elf_dynamic, &dyn, sizeof(elf32_dyn_t)); } /* shstr section; len = 53 * If using dynamic linking, len = 105. */ elf_write_byte(elf_shstrtab, 0); elf_write_str(elf_shstrtab, ".text"); elf_write_byte(elf_shstrtab, 0); elf_write_str(elf_shstrtab, ".rodata"); elf_write_byte(elf_shstrtab, 0); if (dynlink) { elf_write_str(elf_shstrtab, ".rel.plt"); elf_write_byte(elf_shstrtab, 0); elf_write_str(elf_shstrtab, ".plt"); elf_write_byte(elf_shstrtab, 0); elf_write_str(elf_shstrtab, ".interp"); elf_write_byte(elf_shstrtab, 0); elf_write_str(elf_shstrtab, ".got"); elf_write_byte(elf_shstrtab, 0); elf_write_str(elf_shstrtab, ".dynstr"); elf_write_byte(elf_shstrtab, 0); elf_write_str(elf_shstrtab, ".dynsym"); elf_write_byte(elf_shstrtab, 0); elf_write_str(elf_shstrtab, ".dynamic"); elf_write_byte(elf_shstrtab, 0); } elf_write_str(elf_shstrtab, ".data"); elf_write_byte(elf_shstrtab, 0); elf_write_str(elf_shstrtab, ".bss"); elf_write_byte(elf_shstrtab, 0); elf_write_str(elf_shstrtab, ".symtab"); elf_write_byte(elf_shstrtab, 0); elf_write_str(elf_shstrtab, ".strtab"); elf_write_byte(elf_shstrtab, 0); elf_write_str(elf_shstrtab, ".shstrtab"); elf_write_byte(elf_shstrtab, 0); } void elf_add_symbol(const char *symbol, int pc) { /* Check for null pointers to prevent crashes */ if (!symbol || !elf_symtab || !elf_strtab) { fatal("Invalid parameters for elf_add_symbol"); return; } elf_write_int(elf_symtab, elf_strtab->size); elf_write_int(elf_symtab, pc); elf_write_int(elf_symtab, 0); elf_write_int(elf_symtab, pc == 0 ? 0 : 1 << 16); elf_write_str(elf_strtab, symbol); elf_write_byte(elf_strtab, 0); elf_symbol_index++; } void elf_preprocess(void) { elf_header_len = sizeof(elf32_hdr_t) + (sizeof(elf32_phdr_t) << 1); if (dynlink) elf_header_len += (sizeof(elf32_phdr_t) << 1); elf_align(elf_data); elf_align(elf_rodata); elf_code_start = ELF_START + elf_header_len; elf_rodata_start = elf_code_start + elf_offset; if (dynlink) { /* Precalculate the sizes of .rel.plt, .plt and .got sections. * * Suppose the compiled program has n external functions: * - .rel.plt contains n entries. * - .plt has n entries plus one fixup entry. * - .got includes n + 3 entries * - GOT[0] holds the virtual address of .dynamic section. * - GOT[1] and GOT[2] are reserved for link_map and resolver * (both set to 0). * - The remaining entries correspond to all external functions. * * Next, consider the case of __libc_start_main before initializing * the sizes: * - .rel.plt has the one entry for __libc_start_main. * - .plt includes one fixup entry plus one entry for __libc_start_main. * - .got has 3 + 1 entries. * - 3 entries for GOT[0] - GOT[2]. * - 1 entry (GOT[3]) reserved for __libc_start_main. * * Therefore, the following code initialize the section sizes based on * the layout described above, and then traverse the function list in a * for loop to increment the sizes for each newly found external * function. */ dynamic_sections.relplt_size = sizeof(elf32_rel_t); dynamic_sections.plt_size = PLT_FIXUP_SIZE + PLT_ENT_SIZE; dynamic_sections.got_size = PTR_SIZE * 3 + PTR_SIZE; for (func_t *func = FUNC_LIST.head; func; func = func->next) { if (func->is_used && !func->bbs) { dynamic_sections.relplt_size += sizeof(elf32_rel_t); dynamic_sections.plt_size += PLT_ENT_SIZE; dynamic_sections.got_size += PTR_SIZE; } } /* Set the starting addresses of the three sections. */ int elf_interp_size = strlen(DYN_LINKER) + 1; elf_interp_size = ALIGN_UP(elf_interp_size, 4); dynamic_sections.elf_relplt_start = elf_rodata_start + elf_rodata->size; dynamic_sections.elf_plt_start = dynamic_sections.elf_relplt_start + dynamic_sections.relplt_size; /* Since the first section of the second load segment is .interp * when using dynamic linking mode, adding PAGESIZE to elf_interp_start * is to ensure that two load segments don't share a common page. */ dynamic_sections.elf_interp_start = dynamic_sections.elf_plt_start + dynamic_sections.plt_size + PAGESIZE; dynamic_sections.elf_got_start = dynamic_sections.elf_interp_start + elf_interp_size; } elf_generate_sections(); if (dynlink) { elf_data_start = dynamic_sections.elf_got_start + dynamic_sections.elf_got->size + dynamic_sections.elf_dynstr->size + dynamic_sections.elf_dynsym->size + dynamic_sections.elf_dynamic->size; } else { /* To prevent two load segments from sharing a common page, add * PAGESIZE to elf_data_start, since the first section of the second * load segment is .data in static linking mode. */ elf_data_start = elf_rodata_start + elf_rodata->size + PAGESIZE; } elf_bss_start = elf_data_start + elf_data->size; elf_align(elf_symtab); elf_align(elf_strtab); } void elf_postprocess(void) { elf_generate_header(); elf_generate_program_headers(); elf_generate_section_headers(); } void elf_generate(const char *outfile) { if (!outfile) outfile = "a.out"; FILE *fp = fopen(outfile, "wb"); if (!fp) { fatal("Unable to open output file for writing"); return; } for (int i = 0; i < elf_header->size; i++) fputc(elf_header->elements[i], fp); for (int i = 0; i < elf_program_header->size; i++) fputc(elf_program_header->elements[i], fp); /* Read-only sections */ for (int i = 0; i < elf_code->size; i++) fputc(elf_code->elements[i], fp); for (int i = 0; i < elf_rodata->size; i++) fputc(elf_rodata->elements[i], fp); if (dynlink) { /* Read-only sections */ for (int i = 0; i < dynamic_sections.elf_relplt->size; i++) fputc(dynamic_sections.elf_relplt->elements[i], fp); for (int i = 0; i < dynamic_sections.elf_plt->size; i++) fputc(dynamic_sections.elf_plt->elements[i], fp); /* Readable and writable sections */ for (int i = 0; i < dynamic_sections.elf_interp->size; i++) fputc(dynamic_sections.elf_interp->elements[i], fp); for (int i = 0; i < dynamic_sections.elf_got->size; i++) fputc(dynamic_sections.elf_got->elements[i], fp); for (int i = 0; i < dynamic_sections.elf_dynstr->size; i++) fputc(dynamic_sections.elf_dynstr->elements[i], fp); for (int i = 0; i < dynamic_sections.elf_dynsym->size; i++) fputc(dynamic_sections.elf_dynsym->elements[i], fp); for (int i = 0; i < dynamic_sections.elf_dynamic->size; i++) fputc(dynamic_sections.elf_dynamic->elements[i], fp); } /* Readable and writable sections */ for (int i = 0; i < elf_data->size; i++) fputc(elf_data->elements[i], fp); /* Note: .bss is not written to file (SHT_NOBITS) */ /* Other sections and section headers */ for (int i = 0; i < elf_symtab->size; i++) fputc(elf_symtab->elements[i], fp); for (int i = 0; i < elf_strtab->size; i++) fputc(elf_strtab->elements[i], fp); for (int i = 0; i < elf_shstrtab->size; i++) fputc(elf_shstrtab->elements[i], fp); for (int i = 0; i < elf_section_header->size; i++) fputc(elf_section_header->elements[i], fp); fclose(fp); } ================================================ FILE: src/globals.c ================================================ /* * shecc - Self-Hosting and Educational C Compiler. * * shecc is freely redistributable under the BSD 2 clause license. See the * file "LICENSE" for information on usage and redistribution of this file. */ #pragma once #include #include #include #include #include #include "defs.h" /* Forward declaration for string interning */ char *intern_string(char *str); /* Lexer */ token_t *cur_token; /* TOKEN_CACHE maps filename to the corresponding computed token stream */ hashmap_t *TOKEN_CACHE; strbuf_t *LIBC_SRC; /* Global objects */ hashmap_t *SRC_FILE_MAP; hashmap_t *FUNC_MAP; hashmap_t *CONSTANTS_MAP; /* Types */ type_t *TYPES; int types_idx = 0; type_t *TY_void; type_t *TY_char; type_t *TY_bool; type_t *TY_int; type_t *TY_short; /* Arenas */ arena_t *INSN_ARENA; /* HASHMAP_ARENA is responsible for hashmap_node_t allocation */ arena_t *HASHMAP_ARENA; /* BLOCK_ARENA is responsible for block_t / var_t allocation */ arena_t *BLOCK_ARENA; /* BB_ARENA is responsible for basic_block_t / ph2_ir_t allocation */ arena_t *BB_ARENA; /* TOKEN_ARENA is responsible for token_t (including literal) / * source_location_t allocation */ arena_t *TOKEN_ARENA; /* GENERAL_ARENA is responsible for functions, symbols, constants, aliases, * macros, and traversal args */ arena_t *GENERAL_ARENA; int bb_label_idx = 0; ph2_ir_t **PH2_IR_FLATTEN; int ph2_ir_idx = 0; func_list_t FUNC_LIST; func_t *GLOBAL_FUNC; block_t *GLOBAL_BLOCK; basic_block_t *MAIN_BB; int elf_offset = 0; regfile_t REGS[REG_CNT]; hashmap_t *INCLUSION_MAP; /* ELF sections */ strbuf_t *elf_code; strbuf_t *elf_data; strbuf_t *elf_rodata; strbuf_t *elf_header; strbuf_t *elf_program_header; strbuf_t *elf_symtab; strbuf_t *elf_strtab; strbuf_t *elf_section_header; strbuf_t *elf_shstrtab; int elf_header_len; int elf_code_start; int elf_data_start; int elf_rodata_start; int elf_bss_start; int elf_bss_size; dynamic_sections_t dynamic_sections; /* Command line compilation flags */ bool dynlink = false; bool libc = true; bool expand_only = false; bool dump_ir = false; bool hard_mul_div = false; /* Create a new arena block with given capacity. * @capacity: The capacity of the arena block. Must be positive. * * Return: The pointer of created arena block. NULL if failed to allocate. */ arena_block_t *arena_block_create(int capacity) { arena_block_t *block = malloc(sizeof(arena_block_t)); if (!block) { printf("Failed to allocate memory for arena block structure\n"); abort(); } block->memory = malloc(capacity * sizeof(char)); if (!block->memory) { printf("Failed to allocate memory for arena block buffer\n"); free(block); abort(); } block->capacity = capacity; block->offset = 0; block->next = NULL; return block; } /* Free a single arena block and its memory buffer. * @block: Pointer to the arena_block_t to free. Must not be NULL. */ void arena_block_free(arena_block_t *block) { free(block->memory); free(block); } /* Initialize the given arena with initial capacity. * @initial_capacity: The initial capacity of the arena. Must be positive. * * Return: The pointer of initialized arena. */ arena_t *arena_init(int initial_capacity) { arena_t *arena = malloc(sizeof(arena_t)); if (!arena) { printf("Failed to allocate memory for arena structure\n"); abort(); } arena->head = arena_block_create(initial_capacity); arena->total_bytes = initial_capacity; /* Use the initial capacity as the default block size for future growth. */ arena->block_size = initial_capacity; return arena; } /* Allocate memory from the given arena with given size. * The arena may create a new arena block if no space is available. * @arena: The arena to allocate memory from. Must not be NULL. * @size: The size of memory to allocate. Must be positive. * * Return: The pointer of allocated memory. NULL if new arena block is failed to * allocate. */ void *arena_alloc(arena_t *arena, int size) { if (size <= 0) { printf("arena_alloc: size must be positive\n"); abort(); } /* Align to sizeof(void*) bytes for host compatibility */ const int alignment = sizeof(void *); size = (size + alignment - 1) & ~(alignment - 1); if (!arena->head || arena->head->offset + size > arena->head->capacity) { /* Need a new block: choose capacity = max(DEFAULT_ARENA_SIZE, * arena->block_size, size) */ const int base = (arena->block_size > DEFAULT_ARENA_SIZE ? arena->block_size : DEFAULT_ARENA_SIZE); const int new_capacity = (size > base ? size : base); arena_block_t *new_block = arena_block_create(new_capacity); new_block->next = arena->head; arena->head = new_block; arena->total_bytes += new_capacity; } void *ptr = arena->head->memory + arena->head->offset; arena->head->offset += size; return ptr; } /* arena_alloc() plus explicit zero‑initialization. * @arena: The arena to allocate memory from. Must not be NULL. * @n: Number of elements. * @size: Size of each element in bytes. * * Internally calls arena_alloc(n * size) and then fills the entire region with * zero bytes. * * Return: Pointer to zero-initialized memory. */ void *arena_calloc(arena_t *arena, int n, int size) { if (n * size == 0) { printf("arena_calloc: cannot allocate 0 bytes\n"); abort(); } int total = n * size; void *ptr = arena_alloc(arena, total); /* Use memset for better performance */ memset(ptr, 0, total); return ptr; } /* Reallocate a previously allocated region within the arena to a different * size. * * Behaviors: * 1. If oldptr == NULL and oldsz == 0, act like malloc. * 2. If newsz <= oldsz, return oldptr immediately. * 3. Grow in place if oldptr is the last allocation in the current block. * 4. Otherwise, allocate a new region and copy old data. * * @arena: Pointer to the arena. Must not be NULL. * @oldptr: Pointer to the previously allocated memory in the arena. * @oldsz: Original size (in bytes) of that allocation. * @newsz: New desired size (in bytes). * * Return: Pointer to the reallocated (resized) memory region. */ void *arena_realloc(arena_t *arena, char *oldptr, int oldsz, int newsz) { /* act like malloc */ if (oldptr == NULL) { if (oldsz != 0) { printf("arena_realloc: oldptr == NULL requires oldsz == 0\n"); abort(); } return arena_alloc(arena, newsz); } if (oldsz == 0) { printf("arena_realloc: oldptr != NULL requires oldsz > 0\n"); abort(); } /* return oldptr immediately */ if (newsz <= oldsz) { return oldptr; } /* From here on, oldptr != NULL and newsz > oldsz and oldsz != 0 */ int delta = newsz - oldsz; arena_block_t *blk = arena->head; char *block_end = blk->memory + blk->offset; /* grow in place if oldptr is the last allocation in the current block */ if (oldptr + oldsz == block_end && blk->offset + delta <= blk->capacity) { blk->offset += delta; return oldptr; } /* allocate a new region and copy old data */ void *newptr = arena_alloc(arena, newsz); memcpy(newptr, oldptr, oldsz); return newptr; } /* Duplicate a NULL-terminated string into the arena. * * @arena: a Pointer to the arena. Must not be NULL. * @str: NULL-terminated input string to duplicate. Must not be NULL. * * Return: Pointer to the duplicated string stored in the arena. */ char *arena_strdup(arena_t *arena, char *str) { const int n = strlen(str); char *dup = arena_alloc(arena, n + 1); memcpy(dup, str, n); dup[n] = '\0'; return dup; } /* Duplicate a block of memory into the arena. * Allocates size bytes within the arena and copies data from the input pointer. * * @arena: a Pointer to the arena. Must not be NULL. * @data: data Pointer to the source memory. Must not be NULL. * @size: size Number of bytes to copy. Must be non-negative. * * Return: The pointer to the duplicated memory stored in the arena. */ void *arena_memdup(arena_t *arena, void *data, int size) { return memcpy(arena_alloc(arena, size), data, size); } /* Typed allocators for consistent memory management */ func_t *arena_alloc_func(void) { return arena_calloc(GENERAL_ARENA, 1, sizeof(func_t)); } symbol_t *arena_alloc_symbol(void) { return arena_calloc(GENERAL_ARENA, 1, sizeof(symbol_t)); } constant_t *arena_alloc_constant(void) { /* constant_t is simple, can avoid zeroing */ constant_t *c = arena_alloc(GENERAL_ARENA, sizeof(constant_t)); c->alias[0] = '\0'; c->value = 0; return c; } bb_traversal_args_t *arena_alloc_traversal_args(void) { /* Keep using calloc for safety */ return arena_calloc(GENERAL_ARENA, 1, sizeof(bb_traversal_args_t)); } void arena_free(arena_t *arena) { arena_block_t *block = arena->head; arena_block_t *next; while (block) { next = block->next; arena_block_free(block); block = next; } free(arena); } /* Hash a string with FNV-1a hash function * and converts into usable hashmap index. The range of returned * hashmap index is ranged from "(0 ~ 2,147,483,647) mod size" due to * lack of unsigned integer implementation. * @size: The size of map. Must not be negative or 0. * @key: The key string. May be NULL. * * Return: The usable hashmap index. */ int hashmap_hash_index(int size, char *key) { if (!key) return 0; int hash = 0x811c9dc5; for (; *key; key++) { hash ^= *key; hash *= 0x01000193; } const int mask = hash >> 31; return ((hash ^ mask) - mask) & (size - 1); } int round_up_pow2(int v) { v--; v |= v >> 1; v |= v >> 2; v |= v >> 4; v |= v >> 8; v |= v >> 16; v++; return v; } /* Create a hashmap on heap. Notice that provided size will always be rounded * up to nearest power of 2. * @size: The initial bucket size of hashmap. Must not be 0 or * negative. * * Return: The pointer of created hashmap. */ hashmap_t *hashmap_create(int cap) { hashmap_t *map = malloc(sizeof(hashmap_t)); if (!map) { printf("Failed to allocate hashmap_t with capacity %d\n", cap); return NULL; } map->size = 0; map->cap = round_up_pow2(cap); map->table = calloc(map->cap, sizeof(hashmap_node_t)); if (!map->table) { printf("Failed to allocate table in hashmap_t\n"); free(map); return NULL; } return map; } void hashmap_rehash(hashmap_t *map) { if (!map) return; int old_cap = map->cap; hashmap_node_t *old_table = map->table; map->cap <<= 1; map->table = calloc(map->cap, sizeof(hashmap_node_t)); if (!map->table) { printf("Failed to allocate new table in hashmap_t\n"); map->table = old_table; map->cap = old_cap; return; } map->size = 0; for (int i = 0; i < old_cap; i++) { if (old_table[i].occupied) { char *key = old_table[i].key; void *val = old_table[i].val; int index = hashmap_hash_index(map->cap, key); int start = index; while (map->table[index].occupied) { index = (index + 1) & (map->cap - 1); if (index == start) { printf("Error: New table is full during rehash\n"); abort(); } } map->table[index].key = key; map->table[index].val = val; map->table[index].occupied = true; map->size++; } } free(old_table); } /* Put a key-value pair into given hashmap. * If key already contains a value, then replace it with new value, the old * value will be freed. * @map: The hashmap to be put into. Must not be NULL. * @key: The key string. May be NULL. * @val: The value pointer. May be NULL. This value's lifetime is held by * hashmap. */ void hashmap_put(hashmap_t *map, char *key, void *val) { if (!map) return; /* Check if size of map exceeds load factor 50% (or 1/2 of capacity) */ if ((map->cap >> 1) <= map->size) hashmap_rehash(map); int index = hashmap_hash_index(map->cap, key); int start = index; while (map->table[index].occupied) { if (!strcmp(map->table[index].key, key)) { map->table[index].val = val; return; } index = (index + 1) & (map->cap - 1); if (index == start) { printf("Error: Hashmap is full\n"); abort(); } } map->table[index].key = arena_strdup(HASHMAP_ARENA, key); map->table[index].val = val; map->table[index].occupied = true; map->size++; } /* Get key-value pair node from hashmap from given key. * @map: The hashmap to be looked up. Must no be NULL. * @key: The key string. May be NULL. * * Return: The look up result, if the key-value pair entry exists, then returns * address of itself, NULL otherwise. */ hashmap_node_t *hashmap_get_node(hashmap_t *map, char *key) { if (!map) return NULL; int index = hashmap_hash_index(map->cap, key); int start = index; while (map->table[index].occupied) { if (!strcmp(map->table[index].key, key)) return &map->table[index]; index = (index + 1) & (map->cap - 1); if (index == start) return NULL; } return NULL; } /* Get value from hashmap from given key. * @map: The hashmap to be looked up. Must no be NULL. * @key: The key string. May be NULL. * * Return: The look up result, if the key-value pair entry exists, then returns * its value's address, NULL otherwise. */ void *hashmap_get(hashmap_t *map, char *key) { hashmap_node_t *node = hashmap_get_node(map, key); return node ? node->val : NULL; } /* Check if the key-value pair entry exists from given key. * @map: The hashmap to be looked up. Must no be NULL. * @key: The key string. May be NULL. * * Return: The look up result, if the key-value pair entry exists, then returns * true, false otherwise. */ bool hashmap_contains(hashmap_t *map, char *key) { return hashmap_get_node(map, key); } /* Free the hashmap, this also frees key-value pair entry's value. * @map: The hashmap to be looked up. Must no be NULL. */ void hashmap_free(hashmap_t *map) { if (!map) return; free(map->table); free(map); } /* Find the type by the given name. * @type_name: The name to be searched. * @flag: * 0 - Search in all type names. * 1 - Search in all names, excluding the tags of structure. * 2 - Only search in tags. * * Return: The pointer to the type, or NULL if not found. */ type_t *find_type(char *type_name, int flag) { for (int i = 0; i < types_idx; i++) { if (TYPES[i].base_type == TYPE_struct || TYPES[i].base_type == TYPE_union) { if (flag == 1) continue; if (!strcmp(TYPES[i].type_name, type_name)) return &TYPES[i]; } else { if (flag == 2) continue; if (!strcmp(TYPES[i].type_name, type_name)) { /* If it is a forwardly declared alias of a structure, return * the base structure type. */ if (TYPES[i].base_type == TYPE_typedef && TYPES[i].size == 0) return TYPES[i].base_struct; return &TYPES[i]; } } } return NULL; } ph2_ir_t *add_existed_ph2_ir(ph2_ir_t *ph2_ir) { PH2_IR_FLATTEN[ph2_ir_idx++] = ph2_ir; return ph2_ir; } ph2_ir_t *add_ph2_ir(opcode_t op) { ph2_ir_t *ph2_ir = arena_alloc(BB_ARENA, sizeof(ph2_ir_t)); ph2_ir->op = op; /* Initialize all fields explicitly */ ph2_ir->next = NULL; ph2_ir->is_branch_detached = 0; ph2_ir->src0 = 0; ph2_ir->src1 = 0; ph2_ir->dest = 0; ph2_ir->func_name[0] = '\0'; ph2_ir->next_bb = NULL; ph2_ir->then_bb = NULL; ph2_ir->else_bb = NULL; ph2_ir->ofs_based_on_stack_top = false; return add_existed_ph2_ir(ph2_ir); } void set_var_liveout(var_t *var, int end) { if (var->liveness >= end) return; var->liveness = end; } block_t *add_block(block_t *parent, func_t *func) { block_t *blk = arena_alloc(BLOCK_ARENA, sizeof(block_t)); /* Initialize all fields explicitly */ blk->locals.size = 0; blk->locals.capacity = 16; blk->locals.elements = arena_alloc(BLOCK_ARENA, blk->locals.capacity * sizeof(var_t *)); blk->parent = parent; blk->func = func; blk->next = NULL; return blk; } /* String pool global */ string_pool_t *string_pool; string_literal_pool_t *string_literal_pool; /* Safe string interning that works with self-hosting */ char *intern_string(char *str) { char *existing; char *interned; int len; /* Safety: return original if NULL */ if (!str) return NULL; /* Safety: can't intern before initialization */ if (!GENERAL_ARENA || !string_pool) return str; /* Check if already interned */ existing = hashmap_get(string_pool->strings, str); if (existing) return existing; /* Allocate and store new string */ len = strlen(str) + 1; interned = arena_alloc(GENERAL_ARENA, len); strcpy(interned, str); hashmap_put(string_pool->strings, interned, interned); return interned; } int hex_digit_value(char c) { if (c >= '0' && c <= '9') return c - '0'; if (c >= 'a' && c <= 'f') return c - 'a' + 10; if (c >= 'A' && c <= 'F') return c - 'A' + 10; return -1; } int unescape_string(const char *input, char *output, int output_size) { if (!input || !output || output_size == 0) return -1; int i = 0, j = 0; while (input[i] != '\0' && j < output_size - 1) { if (input[i] != '\\') { /* Regular characters */ output[j++] = input[i++]; continue; } i++; switch (input[i]) { case 'a': output[j++] = '\a'; i++; break; case 'b': output[j++] = '\b'; i++; break; case 'f': output[j++] = '\f'; i++; break; case 'e': output[j++] = 27; i++; break; case 'n': output[j++] = '\n'; i++; break; case 'r': output[j++] = '\r'; i++; break; case 't': output[j++] = '\t'; i++; break; case 'v': output[j++] = '\v'; i++; break; case '\\': output[j++] = '\\'; i++; break; case '\'': output[j++] = '\''; i++; break; case '"': output[j++] = '"'; i++; break; case '?': output[j++] = '\?'; i++; break; case 'x': { /* Hexadecimal escape sequence: \xhh */ i++; /* Skips 'x' */ if (!isxdigit(input[i])) return -1; int value = 0; int count = 0; while (isxdigit(input[i]) && count < 2) { value = (value << 4) + hex_digit_value(input[i]); i++; count++; } output[j++] = (char) value; break; } case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': { /* Octal escape sequence: \ooo (up to 3 digits) */ int value = 0; int digit_count = 0; while (input[i] >= '0' && input[i] <= '7' && digit_count < 3) { value = value * 8 + (input[i] - '0'); i++; digit_count++; } output[j++] = (char) value; break; } default: /* Unknown escape sequence - treat as literal character */ output[j++] = input[i++]; break; } } output[j] = '\0'; /* Check if we ran out of output space */ if (input[i] != '\0') return -1; return j; } int parse_numeric_constant(char *buffer) { int i = 0; int value = 0; while (buffer[i]) { if (i == 1 && (buffer[i] | 32) == 'x') { /* hexadecimal */ value = 0; i = 2; while (buffer[i]) { char c = buffer[i++]; value <<= 4; if (isdigit(c)) value += c - '0'; c |= 32; /* convert to lower case */ if (c >= 'a' && c <= 'f') value += (c - 'a') + 10; } return value; } if (i == 1 && (buffer[i] | 32) == 'b') { /* binary */ value = 0; i = 2; while (buffer[i]) { char c = buffer[i++]; value <<= 1; value += (c == '1'); } return value; } if (buffer[0] == '0') /* octal */ value = value * 8 + buffer[i++] - '0'; else value = value * 10 + buffer[i++] - '0'; } return value; } type_t *add_type(void) { if (types_idx >= MAX_TYPES) { printf("Error: Maximum number of types (%d) exceeded\n", MAX_TYPES); abort(); } return &TYPES[types_idx++]; } type_t *add_named_type(char *name) { type_t *type = add_type(); /* Use interned string for type name */ strcpy(type->type_name, intern_string(name)); return type; } void add_constant(char alias[], int value) { constant_t *constant = arena_alloc_constant(); if (!constant) { printf("Failed to allocate constant_t\n"); return; } /* Use interned string for constant name */ strcpy(constant->alias, intern_string(alias)); constant->value = value; hashmap_put(CONSTANTS_MAP, alias, constant); } constant_t *find_constant(char alias[]) { return hashmap_get(CONSTANTS_MAP, alias); } var_t *find_member(char token[], type_t *type) { /* If it is a forwardly declared alias of a structure, switch to the base * structure type. */ if (type->size == 0) type = type->base_struct; for (int i = 0; i < type->num_fields; i++) { if (!strcmp(type->fields[i].var_name, token)) return &type->fields[i]; } return NULL; } var_t *find_local_var(char *token, block_t *block) { func_t *func = block->func; for (; block; block = block->parent) { var_list_t *var_list = &block->locals; for (int i = 0; i < var_list->size; i++) { if (!strcmp(var_list->elements[i]->var_name, token)) return var_list->elements[i]; } } if (func) { for (int i = 0; i < func->num_params; i++) { if (!strcmp(func->param_defs[i].var_name, token)) return &func->param_defs[i]; } } return NULL; } var_t *find_global_var(char *token) { var_list_t *var_list = &GLOBAL_BLOCK->locals; for (int i = 0; i < var_list->size; i++) { if (!strcmp(var_list->elements[i]->var_name, token)) return var_list->elements[i]; } return NULL; } var_t *find_var(char *token, block_t *parent) { var_t *var = find_local_var(token, parent); if (!var) var = find_global_var(token); return var; } int size_var(var_t *var) { int size; if (var->ptr_level > 0 || var->is_func) { size = 4; } else { type_t *type = var->type; if (type->size == 0) size = type->base_struct->size; else size = type->size; } if (var->array_size > 0) size = size * var->array_size; return size; } /* Create a new function and adds it to the function lookup table and function * list if it does not already exist, or returns the existing instance if the * function already exists. * * Synthesized functions (e.g., compiler-generated functions like '__syscall') * are excluded from SSA analysis. * * @func_name: The name of the function. May be NULL. * @synthesize: Indicates whether the function is synthesized by the compiler. * Synthesized functions will not be analyzed by the SSA unit. * * Return: A pointer to the function. */ func_t *add_func(char *func_name, bool synthesize) { func_t *func = hashmap_get(FUNC_MAP, func_name); if (func) return func; func = arena_alloc_func(); hashmap_put(FUNC_MAP, func_name, func); /* Use interned string for function name */ strcpy(func->return_def.var_name, intern_string(func_name)); /* Prepare space for function arguments. * * For Arm architecture, the first four arguments (arg1 ~ arg4) are * passed to r0 ~ r3, and any additional arguments (arg5+) are passed * to the stack. * * +-------------+ * | local vars | * +-------------+ * | ... | * +-------------+ <-- sp + 16 * | arg 8 | * +-------------+ <-- sp + 12 * | arg 7 | * +-------------+ <-- sp + 8 * | arg 6 | * +-------------+ <-- sp + 4 * | arg 5 | * +-------------+ <-- sp * * If the target architecture is RISC-V, arg1 ~ arg8 are passed to * registers and arg9+ are passed to the stack. * * We allocate (MAX_PARAMS - MAX_ARGS_IN_REG) * 4 bytes for all functions * so that each of them can use the space to pass extra arguments. */ func->stack_size = (MAX_PARAMS - MAX_ARGS_IN_REG) * 4; if (synthesize) return func; if (!FUNC_LIST.head) { FUNC_LIST.head = func; FUNC_LIST.tail = func; } else { FUNC_LIST.tail->next = func; FUNC_LIST.tail = func; } return func; } /* Find the function in function map. * @func_name: The name of the function. May be NULL. * * Return: A pointer to the function if exists, NULL otherwise. */ func_t *find_func(char *func_name) { return hashmap_get(FUNC_MAP, func_name); } /* Create a basic block and set the scope of variables to 'parent' block */ basic_block_t *bb_create(block_t *parent) { /* Use arena_calloc for basic_block_t as it has many arrays that need * zeroing (live_gen, live_kill, live_in, live_out, DF, RDF, dom_next, etc.) * This is simpler and safer than manually initializing everything. */ basic_block_t *bb = arena_calloc(BB_ARENA, 1, sizeof(basic_block_t)); /* Initialize non-zero fields */ bb->scope = parent; bb->belong_to = parent->func; /* Initialize prev array with NEXT type */ for (int i = 0; i < MAX_BB_PRED; i++) bb->prev[i].type = NEXT; if (dump_ir) snprintf(bb->bb_label_name, MAX_VAR_LEN, ".label.%d", bb_label_idx++); return bb; } /* The pred-succ pair must have only one connection */ void bb_connect(basic_block_t *pred, basic_block_t *succ, bb_connection_type_t type) { if (!pred) abort(); if (!succ) abort(); int i = 0; while (succ->prev[i].bb) i++; if (i > MAX_BB_PRED - 1) { printf("Error: too many predecessors\n"); abort(); } succ->prev[i].bb = pred; succ->prev[i].type = type; switch (type) { case NEXT: pred->next = succ; break; case THEN: pred->then_ = succ; break; case ELSE: pred->else_ = succ; break; default: abort(); } } /* The pred-succ pair must have only one connection */ void bb_disconnect(basic_block_t *pred, basic_block_t *succ) { for (int i = 0; i < MAX_BB_PRED; i++) { if (succ->prev[i].bb == pred) { switch (succ->prev[i].type) { case NEXT: pred->next = NULL; break; case THEN: pred->then_ = NULL; break; case ELSE: pred->else_ = NULL; break; default: abort(); } succ->prev[i].bb = NULL; break; } } } /* The symbol is an argument of function or the variable in declaration */ void add_symbol(basic_block_t *bb, var_t *var) { if (!bb) return; symbol_t *sym; for (sym = bb->symbol_list.head; sym; sym = sym->next) { if (sym->var == var) return; } sym = arena_alloc_symbol(); sym->var = var; if (!bb->symbol_list.head) { sym->index = 0; bb->symbol_list.head = sym; bb->symbol_list.tail = sym; } else { sym->index = bb->symbol_list.tail->index + 1; bb->symbol_list.tail->next = sym; bb->symbol_list.tail = sym; } } void add_insn(block_t *block, basic_block_t *bb, opcode_t op, var_t *rd, var_t *rs1, var_t *rs2, int sz, char *str) { if (!bb) return; bb->scope = block; insn_t *n = arena_alloc(INSN_ARENA, sizeof(insn_t)); n->next = NULL; n->prev = NULL; n->opcode = op; n->rd = rd; n->rs1 = rs1; n->rs2 = rs2; n->sz = sz; n->useful = false; n->belong_to = bb; n->phi_ops = NULL; n->idx = 0; if (str) strcpy(n->str, intern_string(str)); else n->str[0] = '\0'; /* Mark variables as address-taken to prevent incorrect constant * optimization */ if ((op == OP_address_of || op == OP_global_address_of) && rs1) { rs1->address_taken = true; rs1->is_const = false; /* disable constant optimization */ } if (!bb->insn_list.head) bb->insn_list.head = n; else bb->insn_list.tail->next = n; n->prev = bb->insn_list.tail; bb->insn_list.tail = n; } strbuf_t *strbuf_create(int init_capacity) { strbuf_t *array = malloc(sizeof(strbuf_t)); if (!array) return NULL; array->size = 0; array->capacity = init_capacity; array->elements = malloc(array->capacity * sizeof(char)); if (!array->elements) { free(array); return NULL; } return array; } bool strbuf_extend(strbuf_t *src, int len) { int new_size = src->size + len; if (new_size < src->capacity) return true; if (new_size > (src->capacity << 1)) src->capacity = new_size; else src->capacity <<= 1; char *new_arr = malloc(src->capacity * sizeof(char)); if (!new_arr) return false; memcpy(new_arr, src->elements, src->size * sizeof(char)); free(src->elements); src->elements = new_arr; return true; } bool strbuf_putc(strbuf_t *src, char value) { if (!strbuf_extend(src, 1)) return false; src->elements[src->size] = value; src->size++; return true; } bool strbuf_puts(strbuf_t *src, const char *value) { int len = strlen(value); if (!strbuf_extend(src, len)) return false; strncpy(src->elements + src->size, value, len); src->size += len; return true; } void strbuf_free(strbuf_t *src) { if (!src) return; free(src->elements); free(src); } /* This routine is required because the global variable initializations are * not supported now. */ void global_init(void) { FUNC_LIST.head = NULL; FUNC_LIST.tail = NULL; memset(REGS, 0, sizeof(regfile_t) * REG_CNT); /* Initialize arenas first so we can use them for allocation */ BLOCK_ARENA = arena_init(DEFAULT_ARENA_SIZE); /* Variables/blocks */ INSN_ARENA = arena_init(LARGE_ARENA_SIZE); /* Instructions - high usage */ BB_ARENA = arena_init(SMALL_ARENA_SIZE); /* Basic blocks - low usage */ HASHMAP_ARENA = arena_init(DEFAULT_ARENA_SIZE); /* Hash nodes */ TOKEN_ARENA = arena_init(LARGE_ARENA_SIZE); GENERAL_ARENA = arena_init(DEFAULT_ARENA_SIZE); /* For TYPES and PH2_IR_FLATTEN */ /* Use arena allocation for better memory management */ TYPES = arena_alloc(GENERAL_ARENA, MAX_TYPES * sizeof(type_t)); PH2_IR_FLATTEN = arena_alloc(GENERAL_ARENA, MAX_IR_INSTR * sizeof(ph2_ir_t *)); /* Initialize string pool for identifier deduplication */ string_pool = arena_alloc(GENERAL_ARENA, sizeof(string_pool_t)); string_pool->strings = hashmap_create(512); /* Initialize string literal pool for deduplicating string constants */ string_literal_pool = arena_alloc(GENERAL_ARENA, sizeof(string_literal_pool_t)); string_literal_pool->literals = hashmap_create(256); TOKEN_CACHE = hashmap_create(DEFAULT_SRC_FILE_COUNT); SRC_FILE_MAP = hashmap_create(DEFAULT_SRC_FILE_COUNT); FUNC_MAP = hashmap_create(DEFAULT_FUNCS_SIZE); CONSTANTS_MAP = hashmap_create(MAX_CONSTANTS); LIBC_SRC = strbuf_create(4096); elf_code = strbuf_create(MAX_CODE); elf_data = strbuf_create(MAX_DATA); elf_rodata = strbuf_create(MAX_DATA); elf_header = strbuf_create(MAX_HEADER); elf_program_header = strbuf_create(MAX_PROGRAM_HEADER); elf_symtab = strbuf_create(MAX_SYMTAB); elf_strtab = strbuf_create(MAX_STRTAB); elf_bss_size = 0; elf_shstrtab = strbuf_create(MAX_SHSTR); elf_section_header = strbuf_create(MAX_SECTION_HEADER); dynamic_sections.elf_interp = strbuf_create(MAX_INTERP); dynamic_sections.elf_dynamic = strbuf_create(MAX_DYNAMIC); dynamic_sections.elf_dynsym = strbuf_create(MAX_DYNSYM); dynamic_sections.elf_dynstr = strbuf_create(MAX_DYNSTR); dynamic_sections.elf_relplt = strbuf_create(MAX_RELPLT); dynamic_sections.elf_plt = strbuf_create(MAX_PLT); dynamic_sections.elf_got = strbuf_create(MAX_GOTPLT); } /* Forward declaration for lexer cleanup */ void lexer_cleanup(void); /* Free empty trailing blocks from an arena safely. * This only frees blocks that come after the last used block, * ensuring no pointers are invalidated. * * @arena: The arena to compact. * Return: Bytes freed. */ int arena_free_trailing_blocks(arena_t *arena) { if (!arena || !arena->head) return 0; /* Find the last block with actual allocations */ arena_block_t *last_used = NULL; arena_block_t *block; for (block = arena->head; block; block = block->next) { if (block->offset > 0) last_used = block; } /* If no blocks are used, keep just the head */ if (!last_used) last_used = arena->head; /* Free all blocks after last_used */ int freed = 0; if (last_used->next) { block = last_used->next; last_used->next = NULL; while (block) { arena_block_t *next = block->next; freed += block->capacity; arena->total_bytes -= block->capacity; arena_block_free(block); block = next; } } return freed; } /* Compact all arenas to reduce memory usage after compilation phases. * This safely frees only trailing empty blocks without invalidating pointers. * * Return: Total bytes freed across all arenas. */ int compact_all_arenas(void) { int total_saved = 0; /* Free trailing blocks from each arena */ total_saved += arena_free_trailing_blocks(BLOCK_ARENA); total_saved += arena_free_trailing_blocks(INSN_ARENA); total_saved += arena_free_trailing_blocks(BB_ARENA); total_saved += arena_free_trailing_blocks(HASHMAP_ARENA); total_saved += arena_free_trailing_blocks(GENERAL_ARENA); return total_saved; } /* Compact specific arenas based on compilation phase. * Different phases have different memory usage patterns. * * @phase_mask: Bitmask using COMPACT_ARENA_* defines * to indicate which arenas to compact. * * Return: Total bytes freed. */ int compact_arenas_selective(int phase_mask) { int total_saved = 0; if (phase_mask & COMPACT_ARENA_BLOCK) total_saved += arena_free_trailing_blocks(BLOCK_ARENA); if (phase_mask & COMPACT_ARENA_INSN) total_saved += arena_free_trailing_blocks(INSN_ARENA); if (phase_mask & COMPACT_ARENA_BB) total_saved += arena_free_trailing_blocks(BB_ARENA); if (phase_mask & COMPACT_ARENA_HASHMAP) total_saved += arena_free_trailing_blocks(HASHMAP_ARENA); if (phase_mask & COMPACT_ARENA_GENERAL) total_saved += arena_free_trailing_blocks(GENERAL_ARENA); return total_saved; } void global_release(void) { /* Cleanup lexer hashmaps */ lexer_cleanup(); /* Free string interning hashmaps */ if (string_pool && string_pool->strings) hashmap_free(string_pool->strings); if (string_literal_pool && string_literal_pool->literals) hashmap_free(string_literal_pool->literals); arena_free(BLOCK_ARENA); arena_free(INSN_ARENA); arena_free(BB_ARENA); arena_free(HASHMAP_ARENA); arena_free(TOKEN_ARENA); arena_free(GENERAL_ARENA); /* free TYPES and PH2_IR_FLATTEN */ hashmap_free(TOKEN_CACHE); hashmap_free(SRC_FILE_MAP); hashmap_free(FUNC_MAP); hashmap_free(INCLUSION_MAP); hashmap_free(CONSTANTS_MAP); strbuf_free(LIBC_SRC); strbuf_free(elf_code); strbuf_free(elf_data); strbuf_free(elf_rodata); strbuf_free(elf_header); strbuf_free(elf_program_header); strbuf_free(elf_symtab); strbuf_free(elf_strtab); strbuf_free(elf_shstrtab); strbuf_free(elf_section_header); strbuf_free(dynamic_sections.elf_interp); strbuf_free(dynamic_sections.elf_dynamic); strbuf_free(dynamic_sections.elf_dynsym); strbuf_free(dynamic_sections.elf_dynstr); strbuf_free(dynamic_sections.elf_relplt); strbuf_free(dynamic_sections.elf_plt); strbuf_free(dynamic_sections.elf_got); } /* Reports an error without specifying a position */ void fatal(char *msg) { printf("[Error]: %s\n", msg); abort(); } /* Reports error and prints occurred position context, * if the given location is NULL or source file is missing, * then fallbacks to fatal(char *). */ void error_at(char *msg, source_location_t *loc) { int offset, start_idx, i = 0, len, pos; char diagnostic[MAX_LINE_LEN]; if (!loc) fatal(msg); len = loc->len; pos = loc->pos; strbuf_t *src = hashmap_get(SRC_FILE_MAP, loc->filename); if (!src) fatal(msg); if (len < 1) len = 1; printf("%s:%d:%d: [Error]: %s\n", loc->filename, loc->line, loc->column, msg); printf("%6d | ", loc->line); /* Finds line's start position */ for (offset = pos; offset >= 0 && src->elements[offset] != '\n'; offset--) ; start_idx = offset + 1; /* Copies whole line to diagnostic buffer */ for (offset = start_idx; offset < src->capacity && src->elements[offset] != '\n' && src->elements[offset] != '\0'; offset++) { diagnostic[i++] = src->elements[offset]; } diagnostic[i] = '\0'; printf("%s\n", diagnostic); printf("%6c | ", ' '); i = 0; for (offset = start_idx; offset < pos; offset++) diagnostic[i++] = ' '; diagnostic[i++] = '^'; for (; len > 1; len--) diagnostic[i++] = '~'; strcpy(diagnostic + i, " Error occurs here"); printf("%s\n", diagnostic); abort(); } void print_indent(int indent) { for (int i = 0; i < indent; i++) printf("\t"); } void dump_bb_insn(func_t *func, basic_block_t *bb, bool *at_func_start) { if (!bb) return; var_t *rd, *rs1, *rs2; if (bb != func->bbs && bb->insn_list.head) { if (!at_func_start[0]) printf("%s:\n", bb->bb_label_name); else at_func_start[0] = false; } for (insn_t *insn = bb->insn_list.head; insn; insn = insn->next) { rd = insn->rd; rs1 = insn->rs1; rs2 = insn->rs2; switch (insn->opcode) { case OP_unwound_phi: /* Ignored */ continue; case OP_allocat: print_indent(1); printf("allocat %s", rd->type->type_name); for (int i = 0; i < rd->ptr_level; i++) printf("*"); printf(" %%%s", rd->var_name); if (rd->array_size > 0) printf("[%d]", rd->array_size); break; case OP_load_constant: print_indent(1); printf("const %%%s, %d", rd->var_name, rd->init_val); break; case OP_load_data_address: print_indent(1); /* offset from .data section */ printf("%%%s = .data (%d)", rd->var_name, rd->init_val); break; case OP_load_rodata_address: print_indent(1); /* offset from .rodata section */ printf("%%%s = .rodata (%d)", rd->var_name, rd->init_val); break; case OP_address_of: print_indent(1); printf("%%%s = &(%%%s)", rd->var_name, rs1->var_name); break; case OP_assign: print_indent(1); printf("%%%s = %%%s", rd->var_name, rs1->var_name); break; case OP_branch: print_indent(1); printf("br %%%s, %s, %s", rs1->var_name, bb->then_->bb_label_name, bb->else_->bb_label_name); break; case OP_jump: print_indent(1); printf("jmp %s", bb->next->bb_label_name); break; case OP_label: print_indent(0); printf("%s:", insn->str); break; case OP_push: print_indent(1); printf("push %%%s", rs1->var_name); break; case OP_call: print_indent(1); printf("call @%s", insn->str); break; case OP_func_ret: print_indent(1); printf("retval %%%s", rd->var_name); break; case OP_return: print_indent(1); if (rs1) printf("ret %%%s", rs1->var_name); else printf("ret"); break; case OP_read: print_indent(1); printf("%%%s = (%%%s), %d", rd->var_name, rs1->var_name, insn->sz); break; case OP_write: print_indent(1); if (rs1->is_func) printf("(%%%s) = @%s", rs1->var_name, rs2->var_name); else printf("(%%%s) = %%%s, %d", rs1->var_name, rs2->var_name, insn->sz); break; case OP_indirect: print_indent(1); printf("indirect call @(%%%s)", rs1->var_name); break; case OP_negate: print_indent(1); printf("neg %%%s, %%%s", rd->var_name, rs1->var_name); break; case OP_add: print_indent(1); printf("%%%s = add %%%s, %%%s", rd->var_name, rs1->var_name, rs2->var_name); break; case OP_sub: print_indent(1); printf("%%%s = sub %%%s, %%%s", rd->var_name, rs1->var_name, rs2->var_name); break; case OP_mul: print_indent(1); printf("%%%s = mul %%%s, %%%s", rd->var_name, rs1->var_name, rs2->var_name); break; case OP_div: print_indent(1); printf("%%%s = div %%%s, %%%s", rd->var_name, rs1->var_name, rs2->var_name); break; case OP_mod: print_indent(1); printf("%%%s = mod %%%s, %%%s", rd->var_name, rs1->var_name, rs2->var_name); break; case OP_eq: print_indent(1); printf("%%%s = eq %%%s, %%%s", rd->var_name, rs1->var_name, rs2->var_name); break; case OP_neq: print_indent(1); printf("%%%s = neq %%%s, %%%s", rd->var_name, rs1->var_name, rs2->var_name); break; case OP_gt: print_indent(1); printf("%%%s = gt %%%s, %%%s", rd->var_name, rs1->var_name, rs2->var_name); break; case OP_lt: print_indent(1); printf("%%%s = lt %%%s, %%%s", rd->var_name, rs1->var_name, rs2->var_name); break; case OP_geq: print_indent(1); printf("%%%s = geq %%%s, %%%s", rd->var_name, rs1->var_name, rs2->var_name); break; case OP_leq: print_indent(1); printf("%%%s = leq %%%s, %%%s", rd->var_name, rs1->var_name, rs2->var_name); break; case OP_bit_and: print_indent(1); printf("%%%s = and %%%s, %%%s", rd->var_name, rs1->var_name, rs2->var_name); break; case OP_bit_or: print_indent(1); printf("%%%s = or %%%s, %%%s", rd->var_name, rs1->var_name, rs2->var_name); break; case OP_bit_not: print_indent(1); printf("%%%s = not %%%s", rd->var_name, rs1->var_name); break; case OP_bit_xor: print_indent(1); printf("%%%s = xor %%%s, %%%s", rd->var_name, rs1->var_name, rs2->var_name); break; case OP_log_and: print_indent(1); printf("%%%s = and %%%s, %%%s", rd->var_name, rs1->var_name, rs2->var_name); break; case OP_log_or: print_indent(1); printf("%%%s = or %%%s, %%%s", rd->var_name, rs1->var_name, rs2->var_name); break; case OP_log_not: print_indent(1); printf("%%%s = not %%%s", rd->var_name, rs1->var_name); break; case OP_rshift: print_indent(1); printf("%%%s = rshift %%%s, %%%s", rd->var_name, rs1->var_name, rs2->var_name); break; case OP_lshift: print_indent(1); printf("%%%s = lshift %%%s, %%%s", rd->var_name, rs1->var_name, rs2->var_name); break; case OP_trunc: print_indent(1); printf("%%%s = trunc %%%s, %d", rd->var_name, rs1->var_name, insn->sz); break; case OP_sign_ext: print_indent(1); printf("%%%s = sign_ext %%%s, %d", rd->var_name, rs1->var_name, insn->sz); break; case OP_cast: print_indent(1); printf("%%%s = cast %%%s", rd->var_name, rs1->var_name); break; default: printf("", insn->opcode); break; } printf("\n"); } } void dump_bb_insn_by_dom(func_t *func, basic_block_t *bb, bool *at_func_start) { dump_bb_insn(func, bb, at_func_start); for (int i = 0; i < MAX_BB_DOM_SUCC; i++) { if (!bb || !bb->dom_next[i]) break; dump_bb_insn_by_dom(func, bb->dom_next[i], at_func_start); } } void dump_insn(void) { printf("====\n"); for (func_t *func = FUNC_LIST.head; func; func = func->next) { /* Skip function declarations without bodies */ if (!func->bbs) continue; bool at_func_start = true; printf("def %s", func->return_def.type->type_name); for (int i = 0; i < func->return_def.ptr_level; i++) printf("*"); printf(" @%s(", func->return_def.var_name); for (int i = 0; i < func->num_params; i++) { if (i != 0) printf(", "); printf("%s", func->param_defs[i].type->type_name); for (int k = 0; k < func->param_defs[i].ptr_level; k++) printf("*"); printf(" %%%s", func->param_defs[i].var_name); } printf(") {\n"); dump_bb_insn_by_dom(func, func->bbs, &at_func_start); /* Handle implicit return */ for (int i = 0; i < MAX_BB_PRED; i++) { if (!func->exit) break; basic_block_t *bb = func->exit->prev[i].bb; if (!bb) continue; if (func->return_def.type != TY_void) continue; if (bb->insn_list.tail) if (bb->insn_list.tail->opcode == OP_return) continue; print_indent(1); printf("ret\n"); } printf("}\n"); } printf("====\n"); } ================================================ FILE: src/lexer.c ================================================ /* * shecc - Self-Hosting and Educational C Compiler. * * shecc is freely redistributable under the BSD 2 clause license. See the * file "LICENSE" for information on usage and redistribution of this file. */ #include #include #include "defs.h" #include "globals.c" /* Hash table constants */ #define NUM_DIRECTIVES 11 #define NUM_KEYWORDS 18 /* Token mapping structure for elegant initialization */ typedef struct { char *name; token_kind_t token; } token_mapping_t; /* Preprocessor directive hash table using existing shecc hashmap */ hashmap_t *DIRECTIVE_MAP = NULL; /* C keywords hash table */ hashmap_t *KEYWORD_MAP = NULL; /* Token arrays for cleanup */ token_kind_t *directive_tokens_storage = NULL; token_kind_t *keyword_tokens_storage = NULL; void lex_init_directives() { if (DIRECTIVE_MAP) return; DIRECTIVE_MAP = hashmap_create(16); /* Small capacity for directives */ /* Initialization using struct compound literals for elegance */ directive_tokens_storage = arena_alloc(GENERAL_ARENA, NUM_DIRECTIVES * sizeof(token_kind_t)); /* Use array compound literal for directive mappings */ token_mapping_t directives[] = { {"#define", T_cppd_define}, {"#elif", T_cppd_elif}, {"#else", T_cppd_else}, {"#endif", T_cppd_endif}, {"#error", T_cppd_error}, {"#if", T_cppd_if}, {"#ifdef", T_cppd_ifdef}, {"#ifndef", T_cppd_ifndef}, {"#include", T_cppd_include}, {"#pragma", T_cppd_pragma}, {"#undef", T_cppd_undef}, }; /* hashmap insertion */ for (int i = 0; i < NUM_DIRECTIVES; i++) { directive_tokens_storage[i] = directives[i].token; hashmap_put(DIRECTIVE_MAP, directives[i].name, &directive_tokens_storage[i]); } } void lex_init_keywords() { if (KEYWORD_MAP) return; KEYWORD_MAP = hashmap_create(32); /* Capacity for keywords */ /* Initialization using struct compound literals for elegance */ keyword_tokens_storage = arena_alloc(GENERAL_ARENA, NUM_KEYWORDS * sizeof(token_kind_t)); /* Use array compound literal for keyword mappings */ token_mapping_t keywords[] = { {"if", T_if}, {"while", T_while}, {"for", T_for}, {"do", T_do}, {"else", T_else}, {"return", T_return}, {"typedef", T_typedef}, {"enum", T_enum}, {"struct", T_struct}, {"sizeof", T_sizeof}, {"switch", T_switch}, {"case", T_case}, {"break", T_break}, {"default", T_default}, {"continue", T_continue}, {"goto", T_goto}, {"union", T_union}, {"const", T_const}, }; /* hashmap insertion */ for (int i = 0; i < NUM_KEYWORDS; i++) { keyword_tokens_storage[i] = keywords[i].token; hashmap_put(KEYWORD_MAP, keywords[i].name, &keyword_tokens_storage[i]); } } /* Hash table lookup for preprocessor directives */ token_kind_t lookup_directive(char *token) { if (!DIRECTIVE_MAP) lex_init_directives(); token_kind_t *result = hashmap_get(DIRECTIVE_MAP, token); if (result) return *result; return T_identifier; } /* Hash table lookup for C keywords */ token_kind_t lookup_keyword(char *token) { if (!KEYWORD_MAP) lex_init_keywords(); token_kind_t *result = hashmap_get(KEYWORD_MAP, token); if (result) return *result; return T_identifier; } /* Cleanup function for lexer hashmaps */ void lexer_cleanup() { if (DIRECTIVE_MAP) { hashmap_free(DIRECTIVE_MAP); DIRECTIVE_MAP = NULL; } if (KEYWORD_MAP) { hashmap_free(KEYWORD_MAP); KEYWORD_MAP = NULL; } /* Token storage arrays are allocated from GENERAL_ARENA and will be * automatically freed when the arena is freed in global_release(). * No need to explicitly free them here. */ directive_tokens_storage = NULL; keyword_tokens_storage = NULL; } char peek_char(strbuf_t *buf, int offset) { if (buf->size + offset >= buf->capacity) return '\0'; return buf->elements[buf->size + offset]; } char read_char(strbuf_t *buf) { if (buf->size + 1 >= buf->capacity) return buf->elements[buf->capacity - 1]; buf->size++; return buf->elements[buf->size]; } strbuf_t *read_file(char *filename) { char buffer[MAX_LINE_LEN]; FILE *f = fopen(filename, "rb"); strbuf_t *src; if (!f) { printf("filename: %s\n", filename); fatal("source file cannot be found."); } fseek(f, 0, SEEK_END); int len = ftell(f); src = strbuf_create(len + 1); fseek(f, 0, SEEK_SET); while (fgets(buffer, MAX_LINE_LEN, f)) strbuf_puts(src, buffer); fclose(f); src->elements[len] = '\0'; return src; } strbuf_t *get_file_buf(char *filename) { strbuf_t *buf; if (!hashmap_contains(SRC_FILE_MAP, filename)) { buf = read_file(filename); hashmap_put(SRC_FILE_MAP, filename, buf); } else { buf = hashmap_get(SRC_FILE_MAP, filename); } return buf; } token_t *new_token(token_kind_t kind, source_location_t *loc, int len) { token_t *token = arena_calloc(TOKEN_ARENA, 1, sizeof(token_t)); token->kind = kind; memcpy(&token->location, loc, sizeof(source_location_t)); token->location.len = len; return token; } token_t *lex_token(strbuf_t *buf, source_location_t *loc) { token_t *token; char token_buffer[MAX_TOKEN_LEN], ch = peek_char(buf, 0); loc->pos = buf->size; if (ch == '#') { if (loc->column != 1) error_at("Directive must be on the start of line", loc); int sz = 0; do { if (sz >= MAX_TOKEN_LEN - 1) { loc->len = sz; error_at("Token too long", loc); } token_buffer[sz++] = ch; ch = read_char(buf); } while (isalnum(ch) || ch == '_'); token_buffer[sz] = '\0'; token_kind_t directive_kind = lookup_directive(token_buffer); if (directive_kind == T_identifier) { loc->len = sz; error_at("Unsupported directive", loc); } token = new_token(directive_kind, loc, sz); loc->column += sz; return token; } if (ch == '\\') { read_char(buf); token = new_token(T_backslash, loc, 1); loc->column++; return token; } if (ch == '\n') { read_char(buf); token = new_token(T_newline, loc, 1); loc->line++; loc->column = 1; return token; } if (ch == '/') { ch = read_char(buf); if (ch == '*') { /* C-style comment */ int pos = buf->size; do { /* advance one char */ pos++; loc->column++; ch = buf->elements[pos]; if (ch == '*') { /* look ahead */ pos++; loc->column++; ch = buf->elements[pos]; if (ch == '/') { /* consume closing '/', then commit and skip trailing * whitespaces */ pos++; loc->column += 2; buf->size = pos; return lex_token(buf, loc); } } if (ch == '\n') { loc->line++; loc->column = 1; } } while (ch); error_at("Unenclosed C-style comment", loc); return NULL; } if (ch == '/') { /* C++-style comment */ int pos = buf->size; do { pos++; ch = buf->elements[pos]; } while (ch && !is_newline(ch)); loc->column += pos - buf->size + 1; buf->size = pos; return lex_token(buf, loc); } if (ch == '=') { ch = read_char(buf); token = new_token(T_divideeq, loc, 2); loc->column += 2; return token; } token = new_token(T_divide, loc, 1); loc->column++; return token; } if (ch == ' ') { /* Compacts sequence of whitespace together */ int sz = 1; while (read_char(buf) == ' ') sz++; token = new_token(T_whitespace, loc, sz); loc->column += sz; return token; } if (ch == '\t') { read_char(buf); token = new_token(T_tab, loc, 1); loc->column++; return token; } if (ch == '\0') { read_char(buf); token = new_token(T_eof, loc, 1); loc->column++; return token; } if (isdigit(ch)) { int sz = 0; token_buffer[sz++] = ch; ch = read_char(buf); if (token_buffer[0] == '0' && ((ch | 32) == 'x')) { /* Hexadecimal: starts with 0x or 0X */ if (sz >= MAX_TOKEN_LEN - 1) { loc->len = sz; error_at("Token too long", loc); } token_buffer[sz++] = ch; ch = read_char(buf); if (!isxdigit(ch)) { loc->len = 3; error_at("Invalid hex literal: expected hex digit after 0x", loc); } do { if (sz >= MAX_TOKEN_LEN - 1) { loc->len = sz; error_at("Token too long", loc); } token_buffer[sz++] = ch; ch = read_char(buf); } while (isxdigit(ch)); } else if (token_buffer[0] == '0' && ((ch | 32) == 'b')) { /* Binary literal: 0b or 0B */ if (sz >= MAX_TOKEN_LEN - 1) { loc->len = sz; error_at("Token too long", loc); } token_buffer[sz++] = ch; ch = read_char(buf); if (ch != '0' && ch != '1') { loc->len = 3; error_at("Binary literal expects 0 or 1 after 0b", loc); } do { if (sz >= MAX_TOKEN_LEN - 1) { loc->len = sz; error_at("Token too long", loc); } token_buffer[sz++] = ch; ch = read_char(buf); } while (ch == '0' || ch == '1'); } else if (token_buffer[0] == '0') { /* Octal: starts with 0 but not followed by 'x' or 'b' */ while (isdigit(ch)) { if (ch >= '8') { loc->pos += sz; loc->column += sz; error_at("Invalid octal digit, must be in range 0-7", loc); } if (sz >= MAX_TOKEN_LEN - 1) { loc->len = sz; error_at("Token too long", loc); } token_buffer[sz++] = ch; ch = read_char(buf); } } else { /* Decimal */ while (isdigit(ch)) { if (sz >= MAX_TOKEN_LEN - 1) { loc->len = sz; error_at("Token too long", loc); } token_buffer[sz++] = ch; ch = read_char(buf); } } token_buffer[sz] = '\0'; token = new_token(T_numeric, loc, sz); token->literal = intern_string(token_buffer); loc->column += sz; return token; } if (ch == '(') { ch = read_char(buf); token = new_token(T_open_bracket, loc, 1); loc->column++; return token; } if (ch == ')') { ch = read_char(buf); token = new_token(T_close_bracket, loc, 1); loc->column++; return token; } if (ch == '{') { ch = read_char(buf); token = new_token(T_open_curly, loc, 1); loc->column++; return token; } if (ch == '}') { ch = read_char(buf); token = new_token(T_close_curly, loc, 1); loc->column++; return token; } if (ch == '[') { ch = read_char(buf); token = new_token(T_open_square, loc, 1); loc->column++; return token; } if (ch == ']') { ch = read_char(buf); token = new_token(T_close_square, loc, 1); loc->column++; return token; } if (ch == ',') { ch = read_char(buf); token = new_token(T_comma, loc, 1); loc->column++; return token; } if (ch == '^') { ch = read_char(buf); if (ch == '=') { ch = read_char(buf); token = new_token(T_xoreq, loc, 2); loc->column += 2; return token; } token = new_token(T_bit_xor, loc, 1); loc->column++; return token; } if (ch == '~') { ch = read_char(buf); token = new_token(T_bit_not, loc, 1); loc->column++; return token; } if (ch == '"') { int sz = 0; bool special = false; ch = read_char(buf); while (ch != '"' || special) { if ((sz > 0) && (token_buffer[sz - 1] == '\\')) { token_buffer[sz++] = ch; } else { if (sz >= MAX_TOKEN_LEN - 1) { loc->len = sz + 1; error_at("String literal too long", loc); } token_buffer[sz++] = ch; } if (ch == '\\') special = true; else special = false; ch = read_char(buf); } token_buffer[sz] = '\0'; read_char(buf); token = new_token(T_string, loc, sz + 2); token->literal = intern_string(token_buffer); loc->column += sz + 2; return token; } if (ch == '\'') { int sz = 0; bool escaped = false; ch = read_char(buf); if (ch == '\\') { token_buffer[sz++] = ch; ch = read_char(buf); do { token_buffer[sz++] = ch; ch = read_char(buf); escaped = true; } while (ch && ch != '\''); } else { token_buffer[sz++] = ch; } token_buffer[sz] = '\0'; if (!escaped) ch = read_char(buf); if (ch != '\'') { loc->len = 2; error_at("Unenclosed character literal", loc); } read_char(buf); token = new_token(T_char, loc, sz + 2); token->literal = intern_string(token_buffer); loc->column += sz + 2; return token; } if (ch == '*') { ch = read_char(buf); if (ch == '=') { read_char(buf); token = new_token(T_asteriskeq, loc, 2); loc->column += 2; return token; } token = new_token(T_asterisk, loc, 1); loc->column++; return token; } if (ch == '&') { ch = read_char(buf); if (ch == '&') { read_char(buf); token = new_token(T_log_and, loc, 2); loc->column += 2; return token; } if (ch == '=') { read_char(buf); token = new_token(T_andeq, loc, 2); loc->column += 2; return token; } token = new_token(T_ampersand, loc, 1); loc->column++; return token; } if (ch == '|') { ch = read_char(buf); if (ch == '|') { read_char(buf); token = new_token(T_log_or, loc, 2); loc->column += 2; return token; } if (ch == '=') { read_char(buf); token = new_token(T_oreq, loc, 2); loc->column += 2; return token; } token = new_token(T_bit_or, loc, 1); loc->column++; return token; } if (ch == '<') { ch = read_char(buf); if (ch == '=') { read_char(buf); token = new_token(T_le, loc, 2); loc->column += 2; return token; } if (ch == '<') { ch = read_char(buf); if (ch == '=') { read_char(buf); token = new_token(T_lshifteq, loc, 3); loc->column += 3; return token; } token = new_token(T_lshift, loc, 2); loc->column += 2; return token; } token = new_token(T_lt, loc, 1); loc->column++; return token; } if (ch == '%') { ch = read_char(buf); if (ch == '=') { read_char(buf); token = new_token(T_modeq, loc, 2); loc->column += 2; return token; } token = new_token(T_mod, loc, 1); loc->column++; return token; } if (ch == '>') { ch = read_char(buf); if (ch == '=') { read_char(buf); token = new_token(T_ge, loc, 2); loc->column += 2; return token; } if (ch == '>') { ch = read_char(buf); if (ch == '=') { read_char(buf); token = new_token(T_rshifteq, loc, 3); loc->column += 3; return token; } token = new_token(T_rshift, loc, 2); loc->column += 2; return token; } token = new_token(T_gt, loc, 1); loc->column++; return token; } if (ch == '!') { ch = read_char(buf); if (ch == '=') { read_char(buf); token = new_token(T_noteq, loc, 2); loc->column += 2; return token; } token = new_token(T_log_not, loc, 1); loc->column++; return token; } if (ch == '.') { ch = read_char(buf); if (ch == '.' && peek_char(buf, 1) == '.') { buf->size += 2; token = new_token(T_elipsis, loc, 3); loc->column += 3; return token; } token = new_token(T_dot, loc, 1); loc->column++; return token; } if (ch == '-') { ch = read_char(buf); if (ch == '>') { read_char(buf); token = new_token(T_arrow, loc, 2); loc->column += 2; return token; } if (ch == '-') { read_char(buf); token = new_token(T_decrement, loc, 2); loc->column += 2; return token; } if (ch == '=') { read_char(buf); token = new_token(T_minuseq, loc, 2); loc->column += 2; return token; } token = new_token(T_minus, loc, 1); loc->column++; return token; } if (ch == '+') { ch = read_char(buf); if (ch == '+') { read_char(buf); token = new_token(T_increment, loc, 2); loc->column += 2; return token; } if (ch == '=') { read_char(buf); token = new_token(T_pluseq, loc, 2); loc->column += 2; return token; } token = new_token(T_plus, loc, 1); loc->column++; return token; } if (ch == ';') { read_char(buf); token = new_token(T_semicolon, loc, 1); loc->column++; return token; } if (ch == '?') { read_char(buf); token = new_token(T_question, loc, 1); loc->column++; return token; } if (ch == ':') { read_char(buf); token = new_token(T_colon, loc, 1); loc->column++; return token; } if (ch == '=') { ch = read_char(buf); if (ch == '=') { read_char(buf); token = new_token(T_eq, loc, 2); loc->column += 2; return token; } token = new_token(T_assign, loc, 1); loc->column++; return token; } if (isalnum(ch) || ch == '_') { int sz = 0; do { if (sz >= MAX_TOKEN_LEN - 1) { loc->len = sz; error_at("Token too long", loc); } token_buffer[sz++] = ch; ch = read_char(buf); } while (isalnum(ch) || ch == '_'); token_buffer[sz] = 0; /* Fast path for common keywords - avoid hashmap lookup */ token_kind_t kind = T_identifier; /* Check most common keywords inline based on token length and first * character. */ switch (sz) { case 2: /* 2-letter keywords: if, do */ if (token_buffer[0] == 'i' && token_buffer[1] == 'f') kind = T_if; else if (token_buffer[0] == 'd' && token_buffer[1] == 'o') kind = T_do; break; case 3: /* 3-letter keywords: for */ if (token_buffer[0] == 'f' && token_buffer[1] == 'o' && token_buffer[2] == 'r') kind = T_for; break; case 4: /* 4-letter keywords: else, enum, case */ if (token_buffer[0] == 'e') { if (!memcmp(token_buffer, "else", 4)) kind = T_else; else if (!memcmp(token_buffer, "enum", 4)) kind = T_enum; } else if (!memcmp(token_buffer, "case", 4)) kind = T_case; else if (!memcmp(token_buffer, "goto", 4)) kind = T_goto; break; case 5: /* 5-letter keywords: while, break, union, const */ if (token_buffer[0] == 'w' && !memcmp(token_buffer, "while", 5)) kind = T_while; else if (token_buffer[0] == 'b' && !memcmp(token_buffer, "break", 5)) kind = T_break; else if (token_buffer[0] == 'u' && !memcmp(token_buffer, "union", 5)) kind = T_union; else if (token_buffer[0] == 'c' && !memcmp(token_buffer, "const", 5)) kind = T_const; break; case 6: /* 6-letter keywords: return, struct, switch, sizeof */ if (token_buffer[0] == 'r' && !memcmp(token_buffer, "return", 6)) kind = T_return; else if (token_buffer[0] == 's') { if (!memcmp(token_buffer, "struct", 6)) kind = T_struct; else if (!memcmp(token_buffer, "switch", 6)) kind = T_switch; else if (!memcmp(token_buffer, "sizeof", 6)) kind = T_sizeof; } break; case 7: /* 7-letter keywords: typedef, default */ if (!memcmp(token_buffer, "typedef", 7)) kind = T_typedef; else if (!memcmp(token_buffer, "default", 7)) kind = T_default; break; case 8: /* 8-letter keywords: continue */ if (!memcmp(token_buffer, "continue", 8)) kind = T_continue; break; default: /* Keywords longer than 8 chars or identifiers - use hashmap */ break; } /* Fall back to hashmap for uncommon keywords */ if (kind == T_identifier) kind = lookup_keyword(token_buffer); token = new_token(kind, loc, sz); token->literal = intern_string(token_buffer); loc->column += sz; return token; } error_at("Unexpected token", loc); return NULL; } token_stream_t *gen_file_token_stream(char *filename) { /* FIXME: We should normalize filename first to make cache works as expected */ token_t head; token_t *cur = &head; token_stream_t *tks; /* initialie source location with the following configuration: * pos is at 0, * len is 1 for reporting convenience, * and the column and line number are set to 1. */ source_location_t loc = {0, 1, 1, 1, filename}; strbuf_t *buf; tks = hashmap_get(TOKEN_CACHE, filename); /* Already cached, just return the computed token stream */ if (tks) return tks; buf = get_file_buf(filename); /* Borrows strbuf_t#size to use as source index */ buf->size = 0; while (buf->size < buf->capacity) { cur->next = lex_token(buf, &loc); cur = cur->next; if (cur->kind == T_eof) break; } if (!head.next) { head.next = arena_calloc(TOKEN_ARENA, 1, sizeof(token_t)); head.next->kind = T_eof; memcpy(&head.next->location, &loc, sizeof(source_location_t)); cur = head.next; } if (cur->kind != T_eof) error_at("Internal error, expected eof at the end of file", &cur->location); tks = malloc(sizeof(token_stream_t)); tks->head = head.next; tks->tail = cur; hashmap_put(TOKEN_CACHE, filename, tks); return tks; } token_stream_t *gen_libc_token_stream() { token_t head; token_t *cur = &head, *tk; token_stream_t *tks; char *filename = dynlink ? "lib/c.h" : "lib/c.c"; strbuf_t *buf = LIBC_SRC; source_location_t loc = {0, 1, 1, 1, filename}; tks = hashmap_get(TOKEN_CACHE, filename); if (tks) return tks; if (!hashmap_contains(SRC_FILE_MAP, filename)) hashmap_put(SRC_FILE_MAP, filename, LIBC_SRC); /* Borrows strbuf_t#size to use as source index */ buf->size = 0; while (buf->size < buf->capacity) { tk = lex_token(buf, &loc); /* Early break to discard eof token, so later * we can concat libc token stream with actual * input file's token stream. */ if (tk->kind == T_eof) break; cur->next = tk; cur = cur->next; } if (!head.next) fatal("Unable to include libc"); if (tk->kind != T_eof) error_at("Internal error, expected eof at the end of file", &cur->location); tks = malloc(sizeof(token_stream_t)); tks->head = head.next; tks->tail = cur; hashmap_put(TOKEN_CACHE, filename, tks); return tks; } void skip_unused_token(void) { while (cur_token && cur_token->next) { if (cur_token->next->kind == T_whitespace || cur_token->next->kind == T_newline || cur_token->next->kind == T_tab) cur_token = cur_token->next; else break; } } /* Fetches current token's location. */ source_location_t *cur_token_loc() { return &cur_token->location; } /* Finds next token's location, whitespace, tab, and newline tokens are skipped, * if current token is eof, then returns eof token's location instead. */ source_location_t *next_token_loc() { skip_unused_token(); if (cur_token->kind == T_eof) return &cur_token->location; return &cur_token->next->location; } /* Lex next token with aliasing enabled */ token_kind_t lex_next(void) { skip_unused_token(); /* if reached eof, we always return eof token to avoid any advancement */ if (cur_token->kind == T_eof) return T_eof; cur_token = cur_token->next; return cur_token->kind; } /* Accepts next token if token types are matched. */ bool lex_accept(token_kind_t kind) { skip_unused_token(); if (cur_token->next && cur_token->next->kind == kind) { lex_next(); return true; } return false; } /* Peeks next token and copy token's literal to value if token types are * matched. */ bool lex_peek(token_kind_t kind, char *value) { skip_unused_token(); if (cur_token->next && cur_token->next->kind == kind) { if (!value) return true; strcpy(value, cur_token->next->literal); return true; } return false; } /* Strictly match next token with given token type and copy token's literal to * value. */ void lex_ident(token_kind_t token, char *value) { skip_unused_token(); if (cur_token->next && cur_token->next->kind == token) { lex_next(); if (value) strcpy(value, cur_token->literal); return; } token_t *tk = cur_token->next ? cur_token->next : cur_token; error_at("Unexpected token", &tk->location); } /* Strictly match next token with given token type. */ void lex_expect(token_kind_t token) { skip_unused_token(); if (cur_token->next && cur_token->next->kind == token) { lex_next(); return; } token_t *tk = cur_token->next ? cur_token->next : cur_token; error_at("Unexpected token", &tk->location); } ================================================ FILE: src/main.c ================================================ /* * shecc - Self-Hosting and Educational C Compiler. * * shecc is freely redistributable under the BSD 2 clause license. See the * file "LICENSE" for information on usage and redistribution of this file. */ #include #include #include #include /* Define target machine */ #include "../config" /* The inclusion must follow the fixed order, otherwise it fails to build. */ #include "defs.h" /* Initialize global objects */ #include "globals.c" /* ELF manipulation */ #include "elf.c" /* C language lexical analyzer */ #include "lexer.c" /* C language pre-processor */ #include "preprocessor.c" /* C language syntactic analyzer */ #include "parser.c" /* architecture-independent middle-end */ #include "ssa.c" /* Register allocator */ #include "reg-alloc.c" /* Peephole optimization */ #include "peephole.c" /* Arch-specific IR lowering boundary */ #include "arch-lower.c" /* Machine code generation. support ARMv7-A and RV32I */ #include "codegen.c" /* inlined libc */ #include "../out/libc.inc" int main(int argc, char *argv[]) { char *out = NULL; char *in = NULL; token_stream_t *libc_token_stream, *token_stream; token_t *tk; for (int i = 1; i < argc; i++) { if (!strcmp(argv[i], "--dump-ir")) dump_ir = true; else if (!strcmp(argv[i], "+m")) hard_mul_div = true; else if (!strcmp(argv[i], "--no-libc")) libc = false; else if (!strcmp(argv[i], "--dynlink")) dynlink = true; else if (!strcmp(argv[i], "-E")) expand_only = true; else if (!strcmp(argv[i], "-o")) { if (i + 1 < argc) { out = argv[i + 1]; i++; } else /* unsupported options */ abort(); } else if (argv[i][0] == '-') { fatal("Unidentified option"); } else in = argv[i]; } if (!in) { printf("Missing source file!\n"); printf( "Usage: shecc [-o output] [+m] [--dump-ir] [--no-libc] [--dynlink] " "[-E]" "\n"); exit(-1); } /* initialize global objects */ global_init(); /* include libc */ if (libc) { libc_decl(); if (!dynlink) libc_impl(); libc_token_stream = gen_libc_token_stream(); } token_stream = gen_file_token_stream(in); /* concat libc's and input file's token stream */ if (libc) { libc_token_stream->tail->next = token_stream->head; token_stream = libc_token_stream; } tk = preprocess(token_stream->head); if (expand_only) { emit_preprocessed_token(tk); exit(0); } /* load and parse source code into IR */ parse(tk); /* Compact arenas after parsing to free temporary parse structures */ compact_all_arenas(); ssa_build(); /* dump first phase IR */ if (dump_ir) dump_insn(); /* SSA-based optimization */ optimize(); /* Compact arenas after SSA optimization to free temporary SSA structures */ compact_all_arenas(); /* SSA-based liveness analyses */ liveness_analysis(); /* Compact after liveness analysis - mainly traversal args in GENERAL_ARENA */ compact_arenas_selective(COMPACT_ARENA_GENERAL); /* allocate register from IR */ reg_alloc(); /* Compact after register allocation - mainly INSN and BB arenas */ compact_arenas_selective(COMPACT_ARENA_INSN | COMPACT_ARENA_BB); peephole(); /* Apply arch-specific IR tweaks before final codegen */ arch_lower(); /* flatten CFG to linear instruction */ cfg_flatten(); /* Compact after CFG flattening - BB and GENERAL no longer needed */ compact_arenas_selective(COMPACT_ARENA_BB | COMPACT_ARENA_GENERAL); /* dump second phase IR */ if (dump_ir) dump_ph2_ir(); /* * ELF preprocess: * 1. generate all sections except for .text section. * 2. calculate the starting addresses of certain sections. */ elf_preprocess(); /* generate code from IR */ code_generate(); /* ELF postprocess: generate all ELF headers */ elf_postprocess(); /* output code in ELF */ elf_generate(out); /* release allocated objects */ global_release(); exit(0); } ================================================ FILE: src/opt-sccp.c ================================================ /* * shecc - Self-Hosting and Educational C Compiler. * * shecc is freely redistributable under the BSD 2 clause license. See the * file "LICENSE" for information on usage and redistribution of this file. */ /* SCCP (Sparse Conditional Constant Propagation) Optimization Pass * * This optimization pass performs: * - Constant propagation through assignments * - Constant folding for arithmetic and comparison operations * - Branch folding when conditions are compile-time constants * - Dead code elimination through unreachable branch removal */ /* Simple constant propagation within basic blocks */ bool simple_sccp(func_t *func) { if (!func || !func->bbs) return false; bool changed = false; /* Iterate through basic blocks */ for (basic_block_t *bb = func->bbs; bb; bb = bb->rpo_next) { /* Process instructions in the block */ for (insn_t *insn = bb->insn_list.head; insn; insn = insn->next) { /* Skip if no destination */ if (!insn->rd) continue; /* Handle simple constant propagation */ switch (insn->opcode) { case OP_assign: /* Propagate constants through assignments */ if (insn->rs1 && insn->rs1->is_const && !insn->rd->is_const) { insn->rd->is_const = true; insn->rd->init_val = insn->rs1->init_val; insn->opcode = OP_load_constant; insn->rs1 = NULL; changed = true; } break; case OP_trunc: /* Constant truncation optimization integrated into SCCP */ if (insn->rs1 && insn->rs1->is_const && !insn->rd->is_global && insn->sz > 0) { int value = insn->rs1->init_val; int result = value; /* Perform truncation based on size */ if (insn->sz == 1) { /* Truncate to 8 bits */ result = value & 0xFF; } else if (insn->sz == 2) { /* Truncate to 16 bits */ result = value & 0xFFFF; } else if (insn->sz == 4) { /* No truncation needed for 32-bit */ result = value; } else { /* Invalid size, skip */ break; } /* Convert to constant load */ insn->opcode = OP_load_constant; insn->rd->is_const = true; insn->rd->init_val = result; insn->rs1 = NULL; insn->sz = 0; changed = true; } break; case OP_sign_ext: /* Constant sign extension optimization integrated into SCCP */ if (insn->rs1 && insn->rs1->is_const && !insn->rd->is_global && insn->sz > 0) { int value = insn->rs1->init_val; int result = value; /* Perform sign extension based on source size */ if (insn->sz == 1) { /* Sign extend from 8 bits */ result = (value & 0x80) ? (value | 0xFFFFFF00) : (value & 0xFF); } else if (insn->sz == 2) { /* Sign extend from 16 bits */ result = (value & 0x8000) ? (value | 0xFFFF0000) : (value & 0xFFFF); } else if (insn->sz == 4) { /* No sign extension needed for 32-bit */ result = value; } else { /* Invalid size, skip */ break; } /* Convert to constant load */ insn->opcode = OP_load_constant; insn->rd->is_const = true; insn->rd->init_val = result; insn->rs1 = NULL; insn->sz = 0; changed = true; } break; case OP_add: case OP_sub: case OP_mul: case OP_eq: case OP_neq: case OP_lt: case OP_leq: case OP_gt: case OP_geq: /* Unified constant folding for binary and comparison ops */ if (insn->rs1 && insn->rs1->is_const && insn->rs2 && insn->rs2->is_const && !insn->rd->is_global) { int result = 0; const int l = insn->rs1->init_val, r = insn->rs2->init_val; /* Compute result based on operation type */ switch (insn->opcode) { case OP_add: result = l + r; break; case OP_sub: result = l - r; break; case OP_mul: result = l * r; break; case OP_eq: result = (l == r); break; case OP_neq: result = (l != r); break; case OP_lt: result = (l < r); break; case OP_leq: result = (l <= r); break; case OP_gt: result = (l > r); break; case OP_geq: result = (l >= r); break; default: continue; } /* Convert to constant load */ insn->opcode = OP_load_constant; insn->rd->is_const = true; insn->rd->init_val = result; insn->rs1 = NULL; insn->rs2 = NULL; changed = true; } break; default: /* Other opcodes - no optimization */ break; } } /* Simple constant branch folding */ insn_t *last = bb->insn_list.tail; if (last && last->opcode == OP_branch) { if (last->rs1 && last->rs1->is_const) { /* Convert to unconditional jump */ last->opcode = OP_jump; if (last->rs1->init_val != 0) { /* Take then branch */ bb->else_ = NULL; } else { /* Take else branch */ bb->then_ = bb->else_; bb->else_ = NULL; } last->rs1 = NULL; changed = true; } } } return changed; } /* Targeted constant truncation peephole optimization */ bool optimize_constant_casts(func_t *func) { if (!func || !func->bbs) return false; bool changed = false; /* Simple peephole optimization: const + trunc pattern */ for (basic_block_t *bb = func->bbs; bb; bb = bb->rpo_next) { if (!bb) continue; for (insn_t *insn = bb->insn_list.head; insn && insn->next; insn = insn->next) { insn_t *next_insn = insn->next; /* Look for pattern: const %.tX, VALUE followed by * %.tY = trunc %.tX, SIZE */ if (insn->opcode == OP_load_constant && next_insn->opcode == OP_trunc && insn->rd && next_insn->rs1 && insn->rd == next_insn->rs1 && next_insn->sz > 0 && !next_insn->rd->is_global) { int value = insn->rd->init_val; int result = value; /* Perform truncation based on size */ if (next_insn->sz == 1) { /* Truncate to 8 bits */ result = value & 0xFF; } else if (next_insn->sz == 2) { /* Truncate to 16 bits */ result = value & 0xFFFF; } else if (next_insn->sz == 4) { /* No truncation needed for 32-bit */ result = value; } else { /* Invalid size, skip */ continue; } /* Optimize: Replace both instructions with single const */ insn->rd = next_insn->rd; /* Update dest to final target */ insn->rd->is_const = true; insn->rd->init_val = result; /* Remove the truncation instruction by converting it to * NOP-like */ next_insn->opcode = OP_load_constant; next_insn->rd->is_const = true; next_insn->rd->init_val = result; next_insn->rs1 = NULL; next_insn->sz = 0; changed = true; } } } return changed; } ================================================ FILE: src/parser.c ================================================ /* * shecc - Self-Hosting and Educational C Compiler. * * shecc is freely redistributable under the BSD 2 clause license. See the * file "LICENSE" for information on usage and redistribution of this file. */ #include #include #include #include "../config" #include "defs.h" #include "globals.c" /* C language syntactic analyzer */ int global_var_idx = 0; /* Side effect instructions cache */ insn_t side_effect[10]; int se_idx = 0; /* Control flow utilities */ basic_block_t *break_bb[MAX_NESTING]; int break_exit_idx = 0; basic_block_t *continue_bb[MAX_NESTING]; int continue_pos_idx = 0; /* Label utilities */ label_t labels[MAX_LABELS]; int label_idx = 0; basic_block_t *backpatch_bb[MAX_LABELS]; int backpatch_bb_idx = 0; /* stack of the operands of 3AC */ var_t *operand_stack[MAX_OPERAND_STACK_SIZE]; int operand_stack_idx = 0; /* Forward declarations */ source_location_t *cur_token_loc(); source_location_t *next_token_loc(); basic_block_t *read_body_statement(block_t *parent, basic_block_t *bb); void perform_side_effect(block_t *parent, basic_block_t *bb); void read_inner_var_decl(var_t *vd, bool anon, bool is_param); void read_partial_var_decl(var_t *vd, var_t *template); void parse_array_init(var_t *var, block_t *parent, basic_block_t **bb, bool emit_code); label_t *find_label(char *name) { for (int i = 0; i < label_idx; i++) { if (!strcmp(name, labels[i].label_name)) return &labels[i]; } return NULL; } void add_label(char *name, basic_block_t *bb) { if (label_idx > MAX_LABELS - 1) error_at("Too many labels in function", cur_token_loc()); label_t *l = &labels[label_idx++]; strncpy(l->label_name, name, MAX_ID_LEN); l->bb = bb; } char *gen_name_to(char *buf) { sprintf(buf, ".t%d", global_var_idx++); return buf; } var_t *require_var(block_t *blk) { var_list_t *var_list = &blk->locals; if (var_list->size >= var_list->capacity) { var_list->capacity <<= 1; var_t **new_locals = arena_alloc(BLOCK_ARENA, var_list->capacity * sizeof(var_t *)); memcpy(new_locals, var_list->elements, var_list->size * sizeof(var_t *)); var_list->elements = new_locals; } var_t *var = arena_calloc(BLOCK_ARENA, 1, sizeof(var_t)); var_list->elements[var_list->size++] = var; var->consumed = -1; var->phys_reg = -1; var->first_use = -1; var->last_use = -1; var->loop_depth = 0; var->use_count = 0; var->base = var; var->type = TY_int; var->space_is_allocated = false; var->ofs_based_on_stack_top = false; return var; } var_t *require_typed_var(block_t *blk, type_t *type) { if (!type) error_at("Type must not be NULL", cur_token_loc()); var_t *var = require_var(blk); var->type = type; return var; } var_t *require_typed_ptr_var(block_t *blk, type_t *type, int ptr) { var_t *var = require_typed_var(blk, type); var->ptr_level = ptr; return var; } var_t *require_ref_var(block_t *blk, type_t *type, int ptr) { if (!type) error_at("Cannot reference variable from NULL type", cur_token_loc()); var_t *var = require_typed_var(blk, type); var->ptr_level = ptr + 1; return var; } var_t *require_deref_var(block_t *blk, type_t *type, int ptr) { if (!type) error_at("Cannot dereference variable from NULL type", cur_token_loc()); /* Allowing integer dereferencing */ if (!ptr && type->base_type != TYPE_struct && type->base_type != TYPE_typedef) return require_var(blk); if (!ptr) error_at("Cannot dereference from non-pointer typed variable", cur_token_loc()); var_t *var = require_typed_var(blk, type); var->ptr_level = ptr - 1; return var; } void opstack_push(var_t *var) { operand_stack[operand_stack_idx++] = var; } var_t *opstack_pop(void) { return operand_stack[--operand_stack_idx]; } void read_expr(block_t *parent, basic_block_t **bb); int write_symbol(const char *data) { /* Write string literals to .rodata section */ const int start_len = elf_rodata->size; elf_write_str(elf_rodata, data); elf_write_byte(elf_rodata, 0); return start_len; } int get_size(var_t *var) { if (var->ptr_level || var->is_func) return PTR_SIZE; return var->type->size; } int get_operator_prio(opcode_t op) { /* https://www.cs.uic.edu/~i109/Notes/COperatorPrecedenceTable.pdf */ switch (op) { case OP_ternary: return 3; case OP_log_or: return 4; case OP_log_and: return 5; case OP_bit_or: return 6; case OP_bit_xor: return 7; case OP_bit_and: return 8; case OP_eq: case OP_neq: return 9; case OP_lt: case OP_leq: case OP_gt: case OP_geq: return 10; case OP_lshift: case OP_rshift: return 11; case OP_add: case OP_sub: return 12; case OP_mul: case OP_div: case OP_mod: return 13; default: return 0; } } int get_unary_operator_prio(opcode_t op) { switch (op) { case OP_add: case OP_sub: case OP_bit_not: case OP_log_not: return 14; default: return 0; } } opcode_t get_operator(void) { opcode_t op = OP_generic; if (lex_accept(T_plus)) op = OP_add; else if (lex_accept(T_minus)) op = OP_sub; else if (lex_accept(T_asterisk)) op = OP_mul; else if (lex_accept(T_divide)) op = OP_div; else if (lex_accept(T_mod)) op = OP_mod; else if (lex_accept(T_lshift)) op = OP_lshift; else if (lex_accept(T_rshift)) op = OP_rshift; else if (lex_accept(T_log_and)) op = OP_log_and; else if (lex_accept(T_log_or)) op = OP_log_or; else if (lex_accept(T_eq)) op = OP_eq; else if (lex_accept(T_noteq)) op = OP_neq; else if (lex_accept(T_lt)) op = OP_lt; else if (lex_accept(T_le)) op = OP_leq; else if (lex_accept(T_gt)) op = OP_gt; else if (lex_accept(T_ge)) op = OP_geq; else if (lex_accept(T_ampersand)) op = OP_bit_and; else if (lex_accept(T_bit_or)) op = OP_bit_or; else if (lex_accept(T_bit_xor)) op = OP_bit_xor; else if (lex_peek(T_question, NULL)) op = OP_ternary; return op; } var_t *promote_unchecked(block_t *block, basic_block_t **bb, var_t *var, type_t *target_type, int target_ptr) { var_t *rd = require_typed_ptr_var(block, target_type, target_ptr); gen_name_to(rd->var_name); /* Encode both source and target sizes in src1: * Lower 16 bits: target size * Upper 16 bits: source size * This allows codegen to distinguish between different promotion types * without changing IR semantics. */ int encoded_size = ((var->type->size) << 16); if (target_ptr) encoded_size |= PTR_SIZE; else encoded_size |= target_type->size; add_insn(block, *bb, OP_sign_ext, rd, var, NULL, encoded_size, NULL); return rd; } var_t *promote(block_t *block, basic_block_t **bb, var_t *var, type_t *target_type, int target_ptr) { /* Effectively checking whether var has size of int */ if (var->type->size == target_type->size || var->ptr_level || var->array_size) return var; if (var->type->size > TY_int->size && !var->ptr_level) { printf("Warning: Suspicious type promotion %s\n", var->type->type_name); return var; } return promote_unchecked(block, bb, var, target_type, target_ptr); } var_t *truncate_unchecked(block_t *block, basic_block_t **bb, var_t *var, type_t *target_type, int target_ptr) { var_t *rd = require_typed_ptr_var(block, target_type, target_ptr); gen_name_to(rd->var_name); add_insn(block, *bb, OP_trunc, rd, var, NULL, target_ptr ? PTR_SIZE : target_type->size, NULL); return rd; } var_t *resize_var(block_t *block, basic_block_t **bb, var_t *from, var_t *to) { bool is_from_ptr = from->ptr_level || from->array_size, is_to_ptr = to->ptr_level || to->array_size || (to->type && to->type->ptr_level > 0); if (is_from_ptr && is_to_ptr) return from; int from_size = get_size(from), to_size = get_size(to); if (from_size > to_size) { /* Truncation */ return truncate_unchecked(block, bb, from, to->type, to->ptr_level); } if (from_size < to_size) { /* Sign extend */ return promote_unchecked(block, bb, from, to->type, to->ptr_level); } return from; } void read_parameter_list_decl(func_t *func, bool anon); /* Forward declaration for ternary handling used by initializers */ void read_ternary_operation(block_t *parent, basic_block_t **bb); /* Parse array initializer to determine size for implicit arrays and * optionally emit initialization code. */ var_t *compute_element_address(block_t *parent, basic_block_t **bb, var_t *base_addr, int index, int elem_size) { if (index == 0) return base_addr; var_t *offset = require_var(parent); gen_name_to(offset->var_name); offset->init_val = index * elem_size; add_insn(parent, *bb, OP_load_constant, offset, NULL, NULL, 0, NULL); var_t *addr = require_var(parent); gen_name_to(addr->var_name); add_insn(parent, *bb, OP_add, addr, base_addr, offset, 0, NULL); return addr; } var_t *compute_field_address(block_t *parent, basic_block_t **bb, var_t *struct_addr, var_t *field) { if (field->offset == 0) return struct_addr; var_t *offset = require_var(parent); gen_name_to(offset->var_name); offset->init_val = field->offset; add_insn(parent, *bb, OP_load_constant, offset, NULL, NULL, 0, NULL); var_t *addr = require_var(parent); gen_name_to(addr->var_name); add_insn(parent, *bb, OP_add, addr, struct_addr, offset, 0, NULL); return addr; } var_t *parse_global_constant_value(block_t *parent, basic_block_t **bb) { var_t *val = NULL; if (lex_peek(T_numeric, NULL) || lex_peek(T_minus, NULL)) { bool is_neg = false; if (lex_accept(T_minus)) is_neg = true; char numtok[MAX_ID_LEN]; lex_ident(T_numeric, numtok); int num_val = parse_numeric_constant(numtok); if (is_neg) num_val = -num_val; val = require_var(parent); gen_name_to(val->var_name); val->init_val = num_val; add_insn(parent, *bb, OP_load_constant, val, NULL, NULL, 0, NULL); } else if (lex_peek(T_char, NULL)) { char chtok[MAX_TOKEN_LEN], unescaped[MAX_TOKEN_LEN]; lex_ident(T_char, chtok); unescape_string(chtok, unescaped, MAX_TOKEN_LEN); val = require_typed_var(parent, TY_char); gen_name_to(val->var_name); val->init_val = unescaped[0]; add_insn(parent, *bb, OP_load_constant, val, NULL, NULL, 0, NULL); } else if (lex_peek(T_string, NULL)) { lex_accept(T_string); /* TODO: String fields in structs not yet supported - requires proper * handling of string literals as initializers */ } else { error_at("Global array initialization requires constant values", next_token_loc()); } return val; } void consume_global_constant_syntax(void) { if (lex_peek(T_numeric, NULL)) { lex_accept(T_numeric); } else if (lex_peek(T_minus, NULL)) { lex_accept(T_minus); lex_accept(T_numeric); } else if (lex_peek(T_string, NULL)) { lex_accept(T_string); } else if (lex_peek(T_char, NULL)) { lex_accept(T_char); } else { error_at("Global array initialization requires constant values", next_token_loc()); } } void parse_struct_field_init(block_t *parent, basic_block_t **bb, type_t *struct_type, var_t *target_addr, bool emit_code) { int field_idx = 0; if (!lex_peek(T_close_curly, NULL)) { for (;;) { var_t *field_val_raw = NULL; if (parent == GLOBAL_BLOCK) { if (emit_code) { field_val_raw = parse_global_constant_value(parent, bb); } else { consume_global_constant_syntax(); } } else { read_expr(parent, bb); read_ternary_operation(parent, bb); field_val_raw = opstack_pop(); } if (field_val_raw && field_idx < struct_type->num_fields) { var_t *field = &struct_type->fields[field_idx]; var_t target = {0}; target.type = field->type; target.ptr_level = field->ptr_level; var_t *field_val = resize_var(parent, bb, field_val_raw, &target); var_t *field_addr = compute_field_address(parent, bb, target_addr, field); int field_size = size_var(field); add_insn(parent, *bb, OP_write, NULL, field_addr, field_val, field_size, NULL); } field_idx++; if (!lex_accept(T_comma)) break; if (lex_peek(T_close_curly, NULL)) break; } } } void parse_array_literal_expr(block_t *parent, basic_block_t **bb) { var_t *array_var = require_var(parent); gen_name_to(array_var->var_name); array_var->is_compound_literal = true; int element_count = 0; var_t *first_element = NULL; if (!lex_peek(T_close_curly, NULL)) { read_expr(parent, bb); read_ternary_operation(parent, bb); first_element = opstack_pop(); element_count = 1; while (lex_accept(T_comma)) { if (lex_peek(T_close_curly, NULL)) break; read_expr(parent, bb); read_ternary_operation(parent, bb); opstack_pop(); element_count++; } } lex_expect(T_close_curly); array_var->array_size = element_count; if (first_element) { array_var->type = first_element->type; array_var->init_val = first_element->init_val; } else { array_var->type = TY_int; array_var->init_val = 0; } opstack_push(array_var); add_insn(parent, *bb, OP_load_constant, array_var, NULL, NULL, 0, NULL); } basic_block_t *handle_return_statement(block_t *parent, basic_block_t *bb) { if (lex_accept(T_semicolon)) { add_insn(parent, bb, OP_return, NULL, NULL, NULL, 0, NULL); bb_connect(bb, parent->func->exit, NEXT); return NULL; } read_expr(parent, &bb); read_ternary_operation(parent, &bb); perform_side_effect(parent, bb); lex_expect(T_semicolon); var_t *rs1 = opstack_pop(); /* Handle array compound literals in return context. * Convert array compound literals to their first element value. */ if (rs1 && rs1->array_size > 0 && rs1->var_name[0] == '.') { var_t *val = require_var(parent); val->type = rs1->type; val->init_val = rs1->init_val; gen_name_to(val->var_name); add_insn(parent, bb, OP_load_constant, val, NULL, NULL, 0, NULL); rs1 = val; } add_insn(parent, bb, OP_return, NULL, rs1, NULL, 0, NULL); bb_connect(bb, parent->func->exit, NEXT); return NULL; } basic_block_t *handle_if_statement(block_t *parent, basic_block_t *bb) { basic_block_t *n = bb_create(parent); bb_connect(bb, n, NEXT); bb = n; lex_expect(T_open_bracket); read_expr(parent, &bb); lex_expect(T_close_bracket); var_t *vd = opstack_pop(); add_insn(parent, bb, OP_branch, NULL, vd, NULL, 0, NULL); basic_block_t *then_ = bb_create(parent); basic_block_t *else_ = bb_create(parent); bb_connect(bb, then_, THEN); bb_connect(bb, else_, ELSE); basic_block_t *then_body = read_body_statement(parent, then_); basic_block_t *then_next_ = NULL; if (then_body) { then_next_ = bb_create(parent); bb_connect(then_body, then_next_, NEXT); } if (lex_accept(T_else)) { basic_block_t *else_body = read_body_statement(parent, else_); basic_block_t *else_next_ = NULL; if (else_body) { else_next_ = bb_create(parent); bb_connect(else_body, else_next_, NEXT); } if (then_next_ && else_next_) { basic_block_t *next_ = bb_create(parent); bb_connect(then_next_, next_, NEXT); bb_connect(else_next_, next_, NEXT); return next_; } return then_next_ ? then_next_ : else_next_; } else { if (then_next_) { bb_connect(else_, then_next_, NEXT); return then_next_; } return else_; } } basic_block_t *handle_while_statement(block_t *parent, basic_block_t *bb) { basic_block_t *n = bb_create(parent); bb_connect(bb, n, NEXT); bb = n; continue_bb[continue_pos_idx++] = bb; basic_block_t *cond = bb; lex_expect(T_open_bracket); read_expr(parent, &bb); lex_expect(T_close_bracket); var_t *vd = opstack_pop(); add_insn(parent, bb, OP_branch, NULL, vd, NULL, 0, NULL); basic_block_t *then_ = bb_create(parent); basic_block_t *else_ = bb_create(parent); bb_connect(bb, then_, THEN); bb_connect(bb, else_, ELSE); break_bb[break_exit_idx++] = else_; basic_block_t *body_ = read_body_statement(parent, then_); continue_pos_idx--; break_exit_idx--; if (body_) bb_connect(body_, cond, NEXT); return else_; } basic_block_t *handle_goto_statement(block_t *parent, basic_block_t *bb) { /* Since a goto splits the current program into two basic blocks and makes * the subsequent basic block unreachable, this causes problems for later * CFG operations. Therefore, we create a fake if that always executes to * wrap the goto, and connect the unreachable basic block to the else * branch. Finally, return this else block. * * after: * a = b + c; * goto label; * c *= d; * * before: * a = b + c; * if (1) * goto label; * c *= d; */ char token[MAX_ID_LEN]; if (!lex_peek(T_identifier, token)) error_at("Expected identifier after 'goto'", next_token_loc()); lex_expect(T_identifier); lex_expect(T_semicolon); basic_block_t *fake_if = bb_create(parent); bb_connect(bb, fake_if, NEXT); var_t *val = require_var(parent); gen_name_to(val->var_name); val->init_val = 1; add_insn(parent, fake_if, OP_load_constant, val, NULL, NULL, 0, NULL); add_insn(parent, fake_if, OP_branch, NULL, val, NULL, 0, NULL); basic_block_t *then_ = bb_create(parent); basic_block_t *else_ = bb_create(parent); bb_connect(fake_if, then_, THEN); bb_connect(fake_if, else_, ELSE); add_insn(parent, then_, OP_jump, NULL, NULL, NULL, 0, token); label_t *label = find_label(token); if (label) { label->used = true; bb_connect(then_, label->bb, NEXT); return else_; } if (backpatch_bb_idx > MAX_LABELS - 1) error_at("Too many forward-referenced labels", cur_token_loc()); backpatch_bb[backpatch_bb_idx++] = then_; return else_; } basic_block_t *handle_struct_variable_decl(block_t *parent, basic_block_t *bb, char *token) { int find_type_flag = lex_accept(T_struct) ? 2 : 1; if (find_type_flag == 1 && lex_accept(T_union)) { find_type_flag = 2; } type_t *type = find_type(token, find_type_flag); if (!type) return bb; var_t *var = require_typed_var(parent, type); read_partial_var_decl(var, NULL); add_insn(parent, bb, OP_allocat, var, NULL, NULL, 0, NULL); add_symbol(bb, var); if (lex_accept(T_assign)) { if (lex_peek(T_open_curly, NULL) && (var->array_size > 0 || var->ptr_level > 0)) { parse_array_init(var, parent, &bb, true); } else if (lex_peek(T_open_curly, NULL) && (var->type->base_type == TYPE_struct || var->type->base_type == TYPE_typedef)) { type_t *struct_type = var->type; if (struct_type->base_type == TYPE_typedef && struct_type->base_struct) struct_type = struct_type->base_struct; var_t *struct_addr = require_var(parent); gen_name_to(struct_addr->var_name); add_insn(parent, bb, OP_address_of, struct_addr, var, NULL, 0, NULL); lex_expect(T_open_curly); parse_struct_field_init(parent, &bb, struct_type, struct_addr, true); lex_expect(T_close_curly); } else { read_expr(parent, &bb); read_ternary_operation(parent, &bb); var_t *rs1 = resize_var(parent, &bb, opstack_pop(), var); add_insn(parent, bb, OP_assign, var, rs1, NULL, 0, NULL); } } while (lex_accept(T_comma)) { var_t *nv = require_typed_var(parent, type); read_inner_var_decl(nv, false, false); add_insn(parent, bb, OP_allocat, nv, NULL, NULL, 0, NULL); add_symbol(bb, nv); if (lex_accept(T_assign)) { if (lex_peek(T_open_curly, NULL) && (nv->array_size > 0 || nv->ptr_level > 0)) { parse_array_init(nv, parent, &bb, true); } else if (lex_peek(T_open_curly, NULL) && (nv->type->base_type == TYPE_struct || nv->type->base_type == TYPE_typedef)) { type_t *struct_type = nv->type; if (struct_type->base_type == TYPE_typedef && struct_type->base_struct) struct_type = struct_type->base_struct; var_t *struct_addr = require_var(parent); gen_name_to(struct_addr->var_name); add_insn(parent, bb, OP_address_of, struct_addr, nv, NULL, 0, NULL); lex_expect(T_open_curly); parse_struct_field_init(parent, &bb, struct_type, struct_addr, 1); lex_expect(T_close_curly); } else { read_expr(parent, &bb); read_ternary_operation(parent, &bb); var_t *rs1 = resize_var(parent, &bb, opstack_pop(), nv); add_insn(parent, bb, OP_assign, nv, rs1, NULL, 0, NULL); } } } lex_expect(T_semicolon); return bb; } void parse_array_init(var_t *var, block_t *parent, basic_block_t **bb, bool emit_code) { int elem_size = var->type->size; int count = 0; var_t *base_addr = NULL; var_t *stored_vals[256]; bool is_implicit = (var->array_size == 0); if (emit_code) base_addr = var; lex_expect(T_open_curly); if (!lex_peek(T_close_curly, NULL)) { for (;;) { var_t *val = NULL; if (lex_peek(T_open_curly, NULL) && (var->type->base_type == TYPE_struct || var->type->base_type == TYPE_typedef)) { type_t *struct_type = var->type; if (struct_type->base_type == TYPE_typedef && struct_type->base_struct) struct_type = struct_type->base_struct; if (emit_code) { var_t *elem_addr = compute_element_address( parent, bb, base_addr, count, elem_size); lex_expect(T_open_curly); parse_struct_field_init(parent, bb, struct_type, elem_addr, emit_code); lex_expect(T_close_curly); val = NULL; } else { lex_expect(T_open_curly); while (!lex_peek(T_close_curly, NULL)) { if (parent == GLOBAL_BLOCK) { consume_global_constant_syntax(); } else { read_expr(parent, bb); read_ternary_operation(parent, bb); opstack_pop(); } if (!lex_accept(T_comma)) break; if (lex_peek(T_close_curly, NULL)) break; } lex_expect(T_close_curly); val = NULL; } } else { if (parent == GLOBAL_BLOCK) { consume_global_constant_syntax(); val = NULL; } else { read_expr(parent, bb); read_ternary_operation(parent, bb); val = opstack_pop(); } } if (is_implicit && emit_code && count < 256) stored_vals[count] = val; if (val && emit_code && !is_implicit && count < var->array_size) { var_t target = {0}; target.type = var->type; target.ptr_level = 0; var_t *v = resize_var(parent, bb, val, &target); var_t *elem_addr = compute_element_address( parent, bb, base_addr, count, elem_size); if (elem_size <= 4) { add_insn(parent, *bb, OP_write, NULL, elem_addr, v, elem_size, NULL); } else { fatal("Unsupported: struct assignment > 4 bytes in array"); } } count++; if (!lex_accept(T_comma)) break; if (lex_peek(T_close_curly, NULL)) break; } } if (parent != GLOBAL_BLOCK && emit_code && !is_implicit) { /* e.g.: * * 1. * int main() * { * int a[5] = {}; * return a[0] + a[1] + a[2] + a[3] + a[4]; * } * * 2. * int main() * { * int a[5] = {5, 10} * return a[0] + a[1] + a[2] + a[3] + a[4]; * } * * The initializer should set the value of the first elements, and * initialize other elements without explicit assignments to 0. * * Therefore, the first and second cases return 0 and 15, respectively. * */ for (; count < var->array_size; count++) { var_t *val = require_var(parent); gen_name_to(val->var_name); val->init_val = 0; add_insn(parent, *bb, OP_load_constant, val, NULL, NULL, 0, NULL); var_t target = {0}; target.type = var->type; target.ptr_level = 0; var_t *v = resize_var(parent, bb, val, &target); var_t *elem_addr = compute_element_address(parent, bb, base_addr, count, elem_size); if (elem_size <= 4) { add_insn(parent, *bb, OP_write, NULL, elem_addr, v, elem_size, NULL); } else { fatal("Unsupported: struct assignment > 4 bytes in array"); } } } lex_expect(T_close_curly); if (is_implicit) { if (var->ptr_level > 0) var->ptr_level = 0; var->array_size = count; if (emit_code && count > 0) { base_addr = var; for (int i = 0; i < count && i < 256; i++) { if (!stored_vals[i]) continue; var_t target = {0}; target.type = var->type; target.ptr_level = 0; var_t *v = resize_var(parent, bb, stored_vals[i], &target); var_t *elem_addr = compute_element_address( parent, bb, base_addr, i, elem_size); add_insn(parent, *bb, OP_write, NULL, elem_addr, v, elem_size, NULL); } } } } void parse_array_compound_literal(var_t *var, block_t *parent, basic_block_t **bb) { int elem_size = var->type->size; int count = 0; var->array_size = 0; var->init_val = 0; if (!lex_peek(T_close_curly, NULL)) { for (;;) { read_expr(parent, bb); read_ternary_operation(parent, bb); var_t *value = opstack_pop(); if (count == 0) var->init_val = value->init_val; var_t target = {0}; target.type = var->type; target.ptr_level = 0; var_t *store_val = resize_var(parent, bb, value, &target); var_t *elem_addr = compute_element_address(parent, bb, var, count, elem_size); add_insn(parent, *bb, OP_write, NULL, elem_addr, store_val, elem_size, NULL); count++; if (!lex_accept(T_comma)) break; if (lex_peek(T_close_curly, NULL)) break; } } lex_expect(T_close_curly); var->array_size = count; } /* Identify compiler-emitted temporaries that hold array compound literals. * They keep array metadata without pointer indirection and are marked via * is_compound_literal when synthesized. */ bool is_array_literal_placeholder(var_t *var) { return var && var->array_size > 0 && !var->ptr_level && var->is_compound_literal; } bool is_pointer_like_value(var_t *var) { return var && (var->ptr_level || var->array_size || (var->type && var->type->ptr_level > 0)); } /* Lower a compiler-emitted array literal placeholder (marked via * is_compound_literal) into a scalar temporary when later IR expects a plain * value instead of addressable storage. This keeps SSA joins uniform when only * one branch originates from an array literal. */ var_t *scalarize_array_literal(block_t *parent, basic_block_t **bb, var_t *array_var, type_t *hint_type) { if (!is_array_literal_placeholder(array_var)) return array_var; /* Array literal placeholders carry the literal's natural type; default to * int when the parser left the type unset. */ type_t *literal_type = array_var->type ? array_var->type : TY_int; int literal_size = literal_type->size; if (literal_size <= 0) literal_size = TY_int->size; /* A caller-provided hint (e.g., assignment target) dictates the result * type when available so we reuse wider/narrower scalar destinations. */ type_t *result_type = hint_type ? hint_type : literal_type; if (!result_type) result_type = TY_int; /* Create a new scalar temporary, giving it a unique name and copying over * the literal data so downstream code can treat it like a normal value. */ var_t *scalar = require_typed_var(parent, result_type); scalar->ptr_level = 0; gen_name_to(scalar->var_name); scalar->init_val = array_var->init_val; /* Materialize the literal data into the scalar temporary via an OP_read. */ add_insn(parent, *bb, OP_read, scalar, array_var, NULL, literal_size, NULL); return scalar; } /* Centralized guard for lowering array literal placeholders when a scalar * value is expected, keeping the scattered special cases consistent. */ var_t *scalarize_array_literal_if_needed(block_t *parent, basic_block_t **bb, var_t *value, type_t *hint_type, bool needs_scalar) { if (!needs_scalar) return value; return scalarize_array_literal(parent, bb, value, hint_type); } void read_inner_var_decl(var_t *vd, bool anon, bool is_param) { /* Preserve typedef pointer level - don't reset if already inherited */ vd->init_val = 0; if (is_param) { /* However, if the parsed variable is a function parameter, * reset its pointer level to zero. */ vd->ptr_level = 0; } while (lex_accept(T_asterisk)) { vd->ptr_level++; /* Check for const after asterisk (e.g., int * const ptr). * For now, we just consume const qualifiers after pointer. * Full support would require tracking const-ness of the pointer * itself vs the pointed-to data separately. */ while (lex_peek(T_const, NULL)) lex_accept(T_const); } /* is it function pointer declaration? */ if (lex_accept(T_open_bracket)) { func_t func; char temp_name[MAX_VAR_LEN]; lex_expect(T_asterisk); lex_ident(T_identifier, temp_name); strcpy(vd->var_name, intern_string(temp_name)); lex_expect(T_close_bracket); read_parameter_list_decl(&func, true); vd->is_func = true; } else { if (!anon) { char temp_name[MAX_VAR_LEN]; lex_ident(T_identifier, temp_name); strcpy(vd->var_name, intern_string(temp_name)); if (!lex_peek(T_open_bracket, NULL) && !is_param) { if (vd->is_global) { opstack_push(vd); } } } if (lex_accept(T_open_square)) { char buffer[10]; /* array with size */ if (lex_peek(T_numeric, buffer)) { vd->array_size = parse_numeric_constant(buffer); vd->array_dim1 = vd->array_size; /* Store first dimension */ lex_expect(T_numeric); } else { /* array without size: * regarded as a pointer although could be nested */ vd->ptr_level++; } lex_expect(T_close_square); /* Handle multi-dimensional arrays: int matrix[3][4] becomes array * of 3*4=12 elements */ if (lex_accept(T_open_square)) { if (lex_peek(T_numeric, buffer)) { int next_dim = parse_numeric_constant(buffer); lex_expect(T_numeric); vd->array_dim2 = next_dim; /* Store second dimension */ if (vd->array_size > 0) { vd->array_size *= next_dim; /* multiply dimensions together */ } else { vd->array_size = next_dim; } } else { vd->ptr_level++; } lex_expect(T_close_square); /* For now, only support 2D arrays */ while (lex_accept(T_open_square)) { if (lex_peek(T_numeric, buffer)) { int next_dim = parse_numeric_constant(buffer); lex_expect(T_numeric); if (vd->array_size > 0) { vd->array_size *= next_dim; } else { vd->array_size = next_dim; } } else { vd->ptr_level++; } lex_expect(T_close_square); } } } else { vd->array_size = 0; vd->array_dim1 = 0; vd->array_dim2 = 0; } vd->is_func = false; } } /* starting next_token, need to check the type */ void read_full_var_decl(var_t *vd, bool anon, bool is_param) { char type_name[MAX_TYPE_LEN]; int find_type_flag = lex_accept(T_struct) ? 2 : 1; if (find_type_flag == 1 && lex_accept(T_union)) { find_type_flag = 2; } lex_ident(T_identifier, type_name); type_t *type = find_type(type_name, find_type_flag); if (!type) { printf("Could not find type %s%s\n", find_type_flag == 2 ? "struct/union " : "", type_name); abort(); } vd->type = type; read_inner_var_decl(vd, anon, is_param); } /* starting next_token, need to check the type */ void read_partial_var_decl(var_t *vd, var_t *template) { read_inner_var_decl(vd, false, false); } void read_parameter_list_decl(func_t *func, bool anon) { int vn = 0; lex_expect(T_open_bracket); char token[MAX_TYPE_LEN]; if (lex_peek(T_identifier, token) && !strncmp(token, "void", 4)) { lex_next(); if (lex_accept(T_close_bracket)) return; func->param_defs[vn].type = TY_void; read_inner_var_decl(&func->param_defs[vn], anon, true); if (!func->param_defs[vn].ptr_level && !func->param_defs[vn].is_func && !func->param_defs[vn].array_size) error_at("'void' must be the only parameter and unnamed", cur_token_loc()); vn++; lex_accept(T_comma); } while (lex_peek(T_identifier, NULL) || lex_peek(T_const, NULL)) { /* Check for const qualifier */ bool is_const = false; if (lex_accept(T_const)) is_const = true; read_full_var_decl(&func->param_defs[vn], anon, true); func->param_defs[vn].is_const_qualified = is_const; vn++; lex_accept(T_comma); } func->num_params = vn; /* Up to 'MAX_PARAMS' parameters are accepted for the variadic function. */ if (lex_accept(T_elipsis)) func->va_args = 1; lex_expect(T_close_bracket); } void read_literal_param(block_t *parent, basic_block_t *bb) { char literal[MAX_TOKEN_LEN], unescaped[MAX_TOKEN_LEN], combined[MAX_LINE_LEN]; int combined_len = 0; /* Read first string literal */ lex_ident(T_string, literal); unescape_string(literal, combined, MAX_LINE_LEN); combined_len = strlen(combined); /* Check for adjacent string literals and concatenate them */ while (lex_peek(T_string, NULL)) { lex_ident(T_string, literal); unescape_string(literal, unescaped, MAX_LINE_LEN - combined_len); int unescaped_len = strlen(unescaped); if (combined_len + unescaped_len >= MAX_LINE_LEN - 1) error_at("Concatenated string literal too long", cur_token_loc()); strcpy(combined + combined_len, unescaped); combined_len += unescaped_len; } const int index = write_symbol(combined); var_t *vd = require_typed_ptr_var(parent, TY_char, true); gen_name_to(vd->var_name); vd->init_val = index; opstack_push(vd); /* String literals are now in .rodata section */ add_insn(parent, bb, OP_load_rodata_address, vd, NULL, NULL, 0, NULL); } void read_numeric_param(block_t *parent, basic_block_t *bb, bool is_neg) { char token[MAX_ID_LEN]; int value = 0; int i = 0; char c; lex_ident(T_numeric, token); if (token[0] == '-') { is_neg = !is_neg; i++; } if (token[0] == '0') { if ((token[1] | 32) == 'x') { /* hexdecimal */ i = 2; do { c = token[i++]; if (isdigit(c)) c -= '0'; else { c |= 32; /* convert to lower case */ if (c >= 'a' && c <= 'f') c = (c - 'a') + 10; else error_at("Invalid numeric constant", cur_token_loc()); } value = (value * 16) + c; } while (isxdigit(token[i])); } else if ((token[1] | 32) == 'b') { /* binary */ i = 2; do { c = token[i++]; if (c != '0' && c != '1') error_at("Invalid binary constant", cur_token_loc()); c -= '0'; value = (value * 2) + c; } while (token[i] == '0' || token[i] == '1'); } else { /* octal */ do { c = token[i++]; if (c > '7') error_at("Invalid numeric constant", cur_token_loc()); c -= '0'; value = (value * 8) + c; } while (isdigit(token[i])); } } else { do { c = token[i++] - '0'; value = (value * 10) + c; } while (isdigit(token[i])); } if (is_neg) value = -value; var_t *vd = require_var(parent); gen_name_to(vd->var_name); vd->init_val = value; opstack_push(vd); add_insn(parent, bb, OP_load_constant, vd, NULL, NULL, 0, NULL); } void read_char_param(block_t *parent, basic_block_t *bb) { char literal[MAX_TOKEN_LEN], unescaped[MAX_TOKEN_LEN]; lex_ident(T_char, literal); unescape_string(literal, unescaped, MAX_TOKEN_LEN); var_t *vd = require_typed_var(parent, TY_char); gen_name_to(vd->var_name); vd->init_val = unescaped[0]; opstack_push(vd); add_insn(parent, bb, OP_load_constant, vd, NULL, NULL, 0, NULL); } void read_logical(opcode_t op, block_t *parent, basic_block_t **bb); void read_func_parameters(func_t *func, block_t *parent, basic_block_t **bb) { int param_num = 0; var_t *params[MAX_PARAMS], *param; lex_expect(T_open_bracket); while (!lex_accept(T_close_bracket)) { read_expr(parent, bb); read_ternary_operation(parent, bb); param = opstack_pop(); if (func && param_num < func->num_params) { var_t *target = &func->param_defs[param_num]; if (!target->ptr_level && !target->array_size) param = scalarize_array_literal(parent, bb, param, target->type); } /* Handle parameter type conversion for direct calls. * Indirect calls currently don't provide function instance. */ if (func && param_num >= func->num_params && func->va_args) { /* Default promotions apply to scalar varargs, but pointer-like * values (including array literals) must flow through unchanged so * "%p" and friends see an address rather than a scalarized value. */ if (!is_pointer_like_value(param)) param = promote(parent, bb, param, TY_int, 0); } else if (func) { param = resize_var(parent, bb, param, &func->param_defs[param_num]); } params[param_num++] = param; lex_accept(T_comma); } for (int i = 0; i < param_num; i++) { /* The operand should keep alive before calling function. Pass the * number of remained parameters to allocator to extend their liveness. */ add_insn(parent, *bb, OP_push, NULL, params[i], NULL, param_num - i, NULL); } } void read_func_call(func_t *func, block_t *parent, basic_block_t **bb) { /* direct function call */ read_func_parameters(func, parent, bb); add_insn(parent, *bb, OP_call, NULL, NULL, NULL, 0, func->return_def.var_name); } void read_indirect_call(block_t *parent, basic_block_t **bb) { /* Note: Indirect calls use generic parameter handling */ read_func_parameters(NULL, parent, bb); add_insn(parent, *bb, OP_indirect, NULL, opstack_pop(), NULL, 0, NULL); } void read_lvalue(lvalue_t *lvalue, var_t *var, block_t *parent, basic_block_t **bb, bool eval, opcode_t op); /* Maintain a stack of expression values and operators, depending on next * operators' priority. Either apply it or operator on stack first. */ void handle_address_of_operator(block_t *parent, basic_block_t **bb) { char token[MAX_VAR_LEN]; lvalue_t lvalue; var_t *vd, *rs1; lex_peek(T_identifier, token); var_t *var = find_var(token, parent); read_lvalue(&lvalue, var, parent, bb, false, OP_generic); if (!lvalue.is_reference) { rs1 = opstack_pop(); vd = require_ref_var(parent, lvalue.type, lvalue.ptr_level); gen_name_to(vd->var_name); opstack_push(vd); add_insn(parent, *bb, OP_address_of, vd, rs1, NULL, 0, NULL); } } void handle_single_dereference(block_t *parent, basic_block_t **bb) { var_t *vd, *rs1; int sz; if (lex_peek(T_open_bracket, NULL)) { /* Handle general expression dereference: *(expr) */ lex_expect(T_open_bracket); read_expr(parent, bb); lex_expect(T_close_bracket); rs1 = opstack_pop(); /* For pointer dereference, we need to determine the target type and * size. Since we do not have full type tracking in expressions, use * defaults */ type_t *deref_type = rs1->type ? rs1->type : TY_int; int deref_ptr = rs1->ptr_level > 0 ? rs1->ptr_level - 1 : 0; vd = require_deref_var(parent, deref_type, deref_ptr); if (deref_ptr > 0) sz = PTR_SIZE; else sz = deref_type->size; gen_name_to(vd->var_name); opstack_push(vd); add_insn(parent, *bb, OP_read, vd, rs1, NULL, sz, NULL); } else { /* Handle simple identifier dereference: *var */ char token[MAX_VAR_LEN]; lvalue_t lvalue; lex_peek(T_identifier, token); var_t *var = find_var(token, parent); read_lvalue(&lvalue, var, parent, bb, true, OP_generic); rs1 = opstack_pop(); vd = require_deref_var(parent, var->type, var->ptr_level); if (lvalue.ptr_level > 1) sz = PTR_SIZE; else { /* For typedef pointers, get the size of the pointed-to type */ if (lvalue.type && lvalue.type->ptr_level > 0) { /* This is a typedef pointer */ switch (lvalue.type->base_type) { case TYPE_char: sz = TY_char->size; break; case TYPE_short: sz = TY_short->size; break; case TYPE_int: sz = TY_int->size; break; case TYPE_void: sz = 1; break; default: sz = lvalue.type->size; break; } } else { sz = lvalue.type->size; } } gen_name_to(vd->var_name); opstack_push(vd); add_insn(parent, *bb, OP_read, vd, rs1, NULL, sz, NULL); } } void handle_multiple_dereference(block_t *parent, basic_block_t **bb) { var_t *vd, *rs1; int sz; /* Handle consecutive asterisks for multiple dereference: **pp, ***ppp, * ***(expr) */ int deref_count = 1; /* We already consumed one asterisk */ while (lex_accept(T_asterisk)) deref_count++; /* Check if we have a parenthesized expression or simple identifier */ if (lex_peek(T_open_bracket, NULL)) { /* Handle ***(expr) case */ lex_expect(T_open_bracket); read_expr(parent, bb); lex_expect(T_close_bracket); /* Apply dereferences one by one */ for (int i = 0; i < deref_count; i++) { rs1 = opstack_pop(); /* For expression dereference, use default type info */ type_t *deref_type = rs1->type ? rs1->type : TY_int; int deref_ptr = rs1->ptr_level > 0 ? rs1->ptr_level - 1 : 0; vd = require_deref_var(parent, deref_type, deref_ptr); if (deref_ptr > 0) sz = PTR_SIZE; else sz = deref_type->size; gen_name_to(vd->var_name); opstack_push(vd); add_insn(parent, *bb, OP_read, vd, rs1, NULL, sz, NULL); } } else { /* Handle **pp, ***ppp case with simple identifier */ char token[MAX_VAR_LEN]; lvalue_t lvalue; lex_peek(T_identifier, token); var_t *var = find_var(token, parent); read_lvalue(&lvalue, var, parent, bb, true, OP_generic); /* Apply dereferences one by one */ for (int i = 0; i < deref_count; i++) { rs1 = opstack_pop(); vd = require_deref_var( parent, var->type, lvalue.ptr_level > i ? lvalue.ptr_level - i - 1 : 0); if (lvalue.ptr_level > i + 1) sz = PTR_SIZE; else { /* For typedef pointers, get the size of the pointed-to type */ if (lvalue.type && lvalue.type->ptr_level > 0 && i == deref_count - 1) { /* This is a typedef pointer on the final dereference */ switch (lvalue.type->base_type) { case TYPE_char: sz = TY_char->size; break; case TYPE_short: sz = TY_short->size; break; case TYPE_int: sz = TY_int->size; break; case TYPE_void: sz = 1; break; default: sz = lvalue.type->size; break; } } else { sz = lvalue.type->size; } } gen_name_to(vd->var_name); opstack_push(vd); add_insn(parent, *bb, OP_read, vd, rs1, NULL, sz, NULL); } } } void handle_sizeof_operator(block_t *parent, basic_block_t **bb) { char token[MAX_TYPE_LEN]; int ptr_cnt = 0; token_t *sizeof_tk = cur_token; type_t *type = NULL; var_t *vd; lex_expect(T_open_bracket); /* Check if this is sizeof(type) or sizeof(expression) */ int find_type_flag = lex_accept(T_struct) ? 2 : 1; if (find_type_flag == 1 && lex_accept(T_union)) find_type_flag = 2; if (lex_peek(T_identifier, token)) { /* Try to parse as a type first */ type = find_type(token, find_type_flag); if (type) { /* sizeof(type) */ lex_expect(T_identifier); while (lex_accept(T_asterisk)) ptr_cnt++; } } if (!type) { /* sizeof(expression) - parse the expression and get its type */ read_expr(parent, bb); read_ternary_operation(parent, bb); var_t *expr_var = opstack_pop(); type = expr_var->type; ptr_cnt = expr_var->ptr_level; } if (!type) error_at("Unable to determine type in sizeof", &sizeof_tk->location); vd = require_var(parent); vd->init_val = ptr_cnt ? PTR_SIZE : type->size; gen_name_to(vd->var_name); opstack_push(vd); lex_expect(T_close_bracket); add_insn(parent, *bb, OP_load_constant, vd, NULL, NULL, 0, NULL); } void read_expr_operand(block_t *parent, basic_block_t **bb) { var_t *vd, *rs1; bool is_neg = false; if (lex_accept(T_minus)) { is_neg = true; if (!lex_peek(T_numeric, NULL) && !lex_peek(T_identifier, NULL) && !lex_peek(T_open_bracket, NULL)) { error_at("Unexpected token after unary minus", next_token_loc()); } } if (lex_peek(T_string, NULL)) read_literal_param(parent, *bb); else if (lex_peek(T_char, NULL)) read_char_param(parent, *bb); else if (lex_peek(T_numeric, NULL)) read_numeric_param(parent, *bb, is_neg); else if (lex_accept(T_log_not)) { read_expr_operand(parent, bb); rs1 = opstack_pop(); /* Constant folding for logical NOT */ if (rs1 && rs1->is_const && !rs1->ptr_level && !rs1->is_global) { vd = require_var(parent); gen_name_to(vd->var_name); vd->is_const = true; vd->init_val = !rs1->init_val; opstack_push(vd); add_insn(parent, *bb, OP_load_constant, vd, NULL, NULL, 0, NULL); } else { vd = require_var(parent); gen_name_to(vd->var_name); opstack_push(vd); add_insn(parent, *bb, OP_log_not, vd, rs1, NULL, 0, NULL); } } else if (lex_accept(T_bit_not)) { read_expr_operand(parent, bb); rs1 = opstack_pop(); /* Constant folding for bitwise NOT */ if (rs1 && rs1->is_const && !rs1->ptr_level && !rs1->is_global) { vd = require_var(parent); gen_name_to(vd->var_name); vd->is_const = true; vd->init_val = ~rs1->init_val; opstack_push(vd); add_insn(parent, *bb, OP_load_constant, vd, NULL, NULL, 0, NULL); } else { vd = require_var(parent); gen_name_to(vd->var_name); opstack_push(vd); add_insn(parent, *bb, OP_bit_not, vd, rs1, NULL, 0, NULL); } } else if (lex_accept(T_ampersand)) { handle_address_of_operator(parent, bb); } else if (lex_accept(T_asterisk)) { /* dereference */ if (lex_peek(T_asterisk, NULL)) { handle_multiple_dereference(parent, bb); } else { handle_single_dereference(parent, bb); } } else if (lex_accept(T_open_bracket)) { /* Check if this is a cast, compound literal, or parenthesized * expression */ char lookahead_token[MAX_TYPE_LEN]; bool is_compound_literal = false; bool is_cast = false; type_t *cast_or_literal_type = NULL; int cast_ptr_level = 0; /* Look ahead to see if we have a typename followed by ) */ if (lex_peek(T_identifier, lookahead_token)) { /* Check if it's a basic type or typedef */ type_t *type = find_type(lookahead_token, true); if (type) { /* Save current position to backtrack if needed */ token_t *saved_token = cur_token; /* Try to parse as typename */ lex_expect(T_identifier); /* Check for pointer types: int*, char*, etc. */ int ptr_level = 0; while (lex_accept(T_asterisk)) { ptr_level++; } /* Check for array brackets: [size] or [] */ bool is_array = false; if (lex_accept(T_open_square)) { is_array = true; /* Skip array size if present */ if (lex_peek(T_numeric, NULL)) { char size_buffer[10]; lex_ident(T_numeric, size_buffer); } lex_expect(T_close_square); } /* Check what follows the closing ) */ if (lex_accept(T_close_bracket)) { if (lex_peek(T_open_curly, NULL)) { /* (type){...} - compound literal */ is_compound_literal = true; cast_or_literal_type = type; cast_ptr_level = ptr_level; /* Store is_array flag in cast_ptr_level if it's an * array */ if (is_array) { /* Special marker for array compound literal */ cast_ptr_level = -1; } } else { /* (type)expr - cast expression */ is_cast = true; cast_or_literal_type = type; cast_ptr_level = ptr_level; } } else { /* Not a cast or compound literal - backtrack */ cur_token = saved_token; } } } if (is_cast) { /* Process cast: (type)expr */ /* Parse the expression to be cast */ read_expr_operand(parent, bb); /* Get the expression result */ var_t *expr_var = opstack_pop(); /* Create variable for cast result */ var_t *cast_var = require_typed_ptr_var( parent, cast_or_literal_type, cast_ptr_level); gen_name_to(cast_var->var_name); /* Generate cast IR */ add_insn(parent, *bb, OP_cast, cast_var, expr_var, NULL, cast_or_literal_type->size, NULL); /* Push the cast result */ opstack_push(cast_var); } else if (is_compound_literal) { /* Process compound literal */ lex_expect(T_open_curly); /* Create variable for compound literal result */ var_t *compound_var = require_typed_var(parent, cast_or_literal_type); gen_name_to(compound_var->var_name); compound_var->is_compound_literal = true; /* Check if this is an array compound literal (int[]){...} */ bool is_array_literal = (cast_ptr_level == -1); if (is_array_literal) cast_ptr_level = 0; /* Reset for normal processing */ bool consumed_close_brace = false; /* Check if this is a pointer compound literal */ if (is_array_literal) { compound_var->array_size = 0; add_insn(parent, *bb, OP_allocat, compound_var, NULL, NULL, 0, NULL); parse_array_compound_literal(compound_var, parent, bb); if (compound_var->array_size == 0) { compound_var->init_val = 0; add_insn(parent, *bb, OP_load_constant, compound_var, NULL, NULL, 0, NULL); } opstack_push(compound_var); consumed_close_brace = true; } else if (cast_ptr_level > 0) { /* Pointer compound literal: (int*){&x} */ compound_var->ptr_level = cast_ptr_level; /* Parse the pointer value (should be an address) */ if (!lex_peek(T_close_curly, NULL)) { read_expr(parent, bb); read_ternary_operation(parent, bb); var_t *ptr_val = opstack_pop(); /* For pointer compound literals, store the address */ compound_var->init_val = ptr_val->init_val; /* Consume additional values if present (for pointer arrays) */ while (lex_accept(T_comma)) { if (lex_peek(T_close_curly, NULL)) break; read_expr(parent, bb); read_ternary_operation(parent, bb); opstack_pop(); } } else { /* Empty pointer compound literal: (int*){} */ compound_var->init_val = 0; /* NULL pointer */ } /* Generate code for pointer compound literal */ opstack_push(compound_var); add_insn(parent, *bb, OP_load_constant, compound_var, NULL, NULL, 0, NULL); } else if (cast_or_literal_type->base_type == TYPE_struct || cast_or_literal_type->base_type == TYPE_typedef) { /* Struct compound literal support (including typedef structs) */ /* For typedef structs, the actual struct info is in the type */ /* Initialize struct compound literal */ compound_var->init_val = 0; compound_var->ptr_level = 0; /* Parse first field value */ if (!lex_peek(T_close_curly, NULL)) { read_expr(parent, bb); read_ternary_operation(parent, bb); var_t *first_field = opstack_pop(); compound_var->init_val = first_field->init_val; /* Consume additional fields if present */ while (lex_accept(T_comma)) { if (lex_peek(T_close_curly, NULL)) { break; } read_expr(parent, bb); read_ternary_operation(parent, bb); opstack_pop(); /* Consume additional field values */ } } /* Generate code for struct compound literal */ opstack_push(compound_var); add_insn(parent, *bb, OP_load_constant, compound_var, NULL, NULL, 0, NULL); } else if (cast_or_literal_type->base_type == TYPE_int || cast_or_literal_type->base_type == TYPE_short || cast_or_literal_type->base_type == TYPE_char) { /* Handle empty compound literals */ if (lex_peek(T_close_curly, NULL)) { /* Empty compound literal: (int){} */ compound_var->init_val = 0; compound_var->array_size = 0; opstack_push(compound_var); add_insn(parent, *bb, OP_load_constant, compound_var, NULL, NULL, 0, NULL); } else if (lex_peek(T_numeric, NULL) || lex_peek(T_identifier, NULL) || lex_peek(T_char, NULL)) { /* Parse first element */ read_expr(parent, bb); read_ternary_operation(parent, bb); /* Check if there are more elements (comma-separated) or if * it's an explicit array */ if (lex_peek(T_comma, NULL) || is_array_literal) { /* Array compound literal: (int[]){1, 2, 3} */ var_t *first_element = opstack_pop(); /* Store elements temporarily */ var_t *elements[256]; elements[0] = first_element; int element_count = 1; /* Parse remaining elements */ while (lex_accept(T_comma)) { if (lex_peek(T_close_curly, NULL)) break; /* Trailing comma */ read_expr(parent, bb); read_ternary_operation(parent, bb); if (element_count < 256) { elements[element_count] = opstack_pop(); } else { opstack_pop(); /* Discard if too many */ } element_count++; } /* Set array metadata */ compound_var->array_size = element_count; compound_var->init_val = first_element->init_val; /* Allocate space for the array on stack */ add_insn(parent, *bb, OP_allocat, compound_var, NULL, NULL, 0, NULL); /* Initialize each element */ for (int i = 0; i < element_count && i < 256; i++) { if (!elements[i]) continue; /* Store element at offset i * sizeof(element) */ var_t *elem_offset = require_var(parent); elem_offset->init_val = i * cast_or_literal_type->size; gen_name_to(elem_offset->var_name); add_insn(parent, *bb, OP_load_constant, elem_offset, NULL, NULL, 0, NULL); /* Calculate address of element */ var_t *elem_addr = require_var(parent); elem_addr->ptr_level = 1; gen_name_to(elem_addr->var_name); add_insn(parent, *bb, OP_add, elem_addr, compound_var, elem_offset, 0, NULL); /* Store the element value */ add_insn(parent, *bb, OP_write, NULL, elem_addr, elements[i], cast_or_literal_type->size, NULL); } /* Store first element value for array-to-scalar */ compound_var->init_val = first_element->init_val; /* Create result that provides first element access. * This enables array compound literals in scalar * contexts: int x = (int[]){1,2,3}; // x gets 1 int y * = 5 + (int[]){10}; // adds 5 + 10 */ var_t *result_var = require_var(parent); gen_name_to(result_var->var_name); result_var->type = compound_var->type; result_var->ptr_level = 0; result_var->array_size = 0; /* Read first element from the array */ add_insn(parent, *bb, OP_read, result_var, compound_var, NULL, compound_var->type->size, NULL); opstack_push(result_var); } else { /* Single value: (int){42} - scalar compound literal */ compound_var = opstack_pop(); opstack_push(compound_var); } } } if (!consumed_close_brace) lex_expect(T_close_curly); } else { /* Regular parenthesized expression */ read_expr(parent, bb); read_ternary_operation(parent, bb); lex_expect(T_close_bracket); } } else if (lex_accept(T_sizeof)) { handle_sizeof_operator(parent, bb); } else { /* function call, constant or variable - read token and determine */ opcode_t prefix_op = OP_generic; char token[MAX_ID_LEN]; if (lex_accept(T_increment)) prefix_op = OP_add; else if (lex_accept(T_decrement)) prefix_op = OP_sub; lex_peek(T_identifier, token); /* is a constant or variable? */ constant_t *con = find_constant(token); var_t *var = find_var(token, parent); func_t *func = find_func(token); if (con) { vd = require_var(parent); vd->init_val = con->value; gen_name_to(vd->var_name); opstack_push(vd); lex_expect(T_identifier); add_insn(parent, *bb, OP_load_constant, vd, NULL, NULL, 0, NULL); } else if (var) { /* evalue lvalue expression */ lvalue_t lvalue; read_lvalue(&lvalue, var, parent, bb, true, prefix_op); /* is it an indirect call with function pointer? */ if (lex_peek(T_open_bracket, NULL)) { read_indirect_call(parent, bb); vd = require_var(parent); gen_name_to(vd->var_name); opstack_push(vd); add_insn(parent, *bb, OP_func_ret, vd, NULL, NULL, 0, NULL); } } else if (func) { lex_expect(T_identifier); if (lex_peek(T_open_bracket, NULL)) { read_func_call(func, parent, bb); vd = require_typed_ptr_var(parent, func->return_def.type, func->return_def.ptr_level); gen_name_to(vd->var_name); opstack_push(vd); add_insn(parent, *bb, OP_func_ret, vd, NULL, NULL, 0, NULL); } else { /* indirective function pointer assignment */ vd = require_var(parent); vd->is_func = true; strcpy(vd->var_name, intern_string(token)); opstack_push(vd); } } else if (lex_accept(T_open_curly)) { parse_array_literal_expr(parent, bb); } else { /* unknown expression */ error_at("Unrecognized expression token", next_token_loc()); } if (is_neg) { rs1 = opstack_pop(); /* Constant folding for negation */ if (rs1 && rs1->is_const && !rs1->ptr_level && !rs1->is_global) { vd = require_var(parent); gen_name_to(vd->var_name); vd->is_const = true; vd->init_val = -rs1->init_val; opstack_push(vd); add_insn(parent, *bb, OP_load_constant, vd, NULL, NULL, 0, NULL); } else { vd = require_var(parent); gen_name_to(vd->var_name); opstack_push(vd); add_insn(parent, *bb, OP_negate, vd, rs1, NULL, 0, NULL); } } } } void finalize_logical(opcode_t op, block_t *parent, basic_block_t **bb, basic_block_t *shared_bb); bool is_logical(opcode_t op) { return op == OP_log_and || op == OP_log_or; } /* Helper function to calculate element size for pointer operations */ int get_pointer_element_size(var_t *ptr_var) { int element_size = PTR_SIZE; /* Default to pointer size */ if (!ptr_var || !ptr_var->type) return element_size; /* Direct pointer with type info */ if (ptr_var->ptr_level && ptr_var->type) return ptr_var->type->size; /* Typedef pointer or array-derived pointer */ if (ptr_var->type && ptr_var->type->ptr_level > 0) { switch (ptr_var->type->base_type) { case TYPE_char: return TY_char->size; case TYPE_short: return TY_short->size; case TYPE_int: return TY_int->size; case TYPE_void: return 1; default: return ptr_var->type->size ? ptr_var->type->size : PTR_SIZE; } } /* Array-derived pointer without ptr_level set */ if (ptr_var->type) { switch (ptr_var->type->base_type) { case TYPE_char: return TY_char->size; case TYPE_short: return TY_short->size; case TYPE_int: return TY_int->size; case TYPE_void: return 1; default: return ptr_var->type->size ? ptr_var->type->size : PTR_SIZE; } } return element_size; } /* Helper function to handle pointer difference calculation */ void handle_pointer_difference(block_t *parent, basic_block_t **bb, var_t *rs1, var_t *rs2) { /* First perform the subtraction to get byte difference */ var_t *vd = require_var(parent); gen_name_to(vd->var_name); add_insn(parent, *bb, OP_sub, vd, rs1, rs2, 0, NULL); /* Determine element size for division */ int element_size = get_pointer_element_size(rs1); /* Divide by element size to get element count */ if (element_size > 1) { var_t *size_const = require_var(parent); gen_name_to(size_const->var_name); size_const->init_val = element_size; add_insn(parent, *bb, OP_load_constant, size_const, NULL, NULL, 0, NULL); var_t *result = require_var(parent); gen_name_to(result->var_name); add_insn(parent, *bb, OP_div, result, vd, size_const, 0, NULL); /* Push the result */ opstack_push(result); } else { /* Element size is 1 (e.g., char), no division needed */ opstack_push(vd); } } /* Helper function to handle pointer arithmetic (add/sub with scaling) */ void handle_pointer_arithmetic(block_t *parent, basic_block_t **bb, opcode_t op, var_t *rs1, var_t *rs2) { var_t *ptr_var = NULL; var_t *int_var = NULL; int element_size = 0; /* Pointer arithmetic: differences (char*, int*, struct*, etc.), * addition/increment with scaling, and array indexing. */ /* Check if both operands are pointers (pointer difference) */ if (op == OP_sub) { /* If both are variables (not temporaries), look them up */ var_t *orig_rs1 = rs1, *orig_rs2 = rs2; /* If they have names, they might be variable references - look them up */ if (rs1->var_name[0] && !rs1->init_val) { var_t *found = find_var(rs1->var_name, parent); if (found) orig_rs1 = found; } if (rs2->var_name[0] && !rs2->init_val) { var_t *found = find_var(rs2->var_name, parent); if (found) orig_rs2 = found; } /* Check if both have ptr_level or typedef pointer type */ bool rs1_is_ptr = is_pointer_like_value(orig_rs1); bool rs2_is_ptr = is_pointer_like_value(orig_rs2); /* If variable lookup failed, check the passed variables directly */ if (!rs1_is_ptr) rs1_is_ptr = is_pointer_like_value(rs1); if (!rs2_is_ptr) rs2_is_ptr = is_pointer_like_value(rs2); if (rs1_is_ptr && rs2_is_ptr) { /* Both are pointers - this is pointer difference */ /* Determine element size */ element_size = PTR_SIZE; /* Default */ /* Get element size from the first pointer */ if (orig_rs1->type) { /* Check if this is a typedef pointer or regular pointer */ if (orig_rs1->type->ptr_level > 0) { /* Typedef pointer - element size from base type */ switch (orig_rs1->type->base_type) { case TYPE_char: element_size = 1; break; case TYPE_short: element_size = 2; break; case TYPE_int: element_size = 4; break; default: /* For struct/union typedef pointers, use the actual * type size */ if (orig_rs1->type->size > 0) element_size = orig_rs1->type->size; break; } } else if (orig_rs1->ptr_level > 0) { /* Regular pointer (e.g., int *p) - type gives the base type */ switch (orig_rs1->type->base_type) { case TYPE_char: element_size = 1; break; case TYPE_short: element_size = 2; break; case TYPE_int: element_size = 4; break; case TYPE_void: element_size = 1; /* void* arithmetic uses byte size */ break; default: /* For struct pointers, use the struct size */ element_size = orig_rs1->type->size; break; } } } /* Perform subtraction first */ var_t *diff = require_var(parent); gen_name_to(diff->var_name); add_insn(parent, *bb, OP_sub, diff, rs1, rs2, 0, NULL); /* Then divide by element size if needed */ if (element_size > 1) { var_t *size_const = require_var(parent); gen_name_to(size_const->var_name); size_const->init_val = element_size; add_insn(parent, *bb, OP_load_constant, size_const, NULL, NULL, 0, NULL); var_t *result = require_var(parent); gen_name_to(result->var_name); add_insn(parent, *bb, OP_div, result, diff, size_const, 0, NULL); opstack_push(result); } else { opstack_push(diff); } return; } } /* Determine which operand is the pointer for regular pointer arithmetic */ if (is_pointer_like_value(rs1)) { ptr_var = rs1; int_var = rs2; element_size = get_pointer_element_size(rs1); } else if (is_pointer_like_value(rs2)) { /* Only for addition (p + n == n + p) */ if (op == OP_add) { ptr_var = rs2; int_var = rs1; element_size = get_pointer_element_size(rs2); /* Swap operands so pointer is rs1 */ rs1 = ptr_var; rs2 = int_var; } } /* If we need to scale the integer operand */ if (ptr_var && element_size > 1) { /* Create multiplication by element size */ var_t *size_const = require_var(parent); gen_name_to(size_const->var_name); size_const->init_val = element_size; add_insn(parent, *bb, OP_load_constant, size_const, NULL, NULL, 0, NULL); var_t *scaled = require_var(parent); gen_name_to(scaled->var_name); add_insn(parent, *bb, OP_mul, scaled, int_var, size_const, 0, NULL); /* Use scaled value as rs2 */ rs2 = scaled; } /* Perform the operation */ var_t *vd = require_var(parent); /* Preserve pointer type metadata on results of pointer arithmetic */ if (ptr_var) { vd->type = ptr_var->type; vd->ptr_level = ptr_var->ptr_level; } gen_name_to(vd->var_name); opstack_push(vd); add_insn(parent, *bb, op, vd, rs1, rs2, 0, NULL); } /* Helper function to check if pointer arithmetic is needed */ bool is_pointer_operation(opcode_t op, var_t *rs1, var_t *rs2) { if (op != OP_add && op != OP_sub) return false; return is_pointer_like_value(rs1) || is_pointer_like_value(rs2); } /* Helper function to check if a variable is a pointer based on its declaration */ bool is_pointer_var(var_t *v, block_t *parent) { if (!v || !v->var_name[0]) return false; /* Check if it has explicit ptr_level or type with ptr_level */ if (v->ptr_level > 0 || (v->type && v->type->ptr_level > 0)) return true; /* For variables that lost their type info during loading, * try to find the original declaration */ var_t *orig = find_var(v->var_name, parent); if (orig && (orig->ptr_level > 0 || (orig->type && orig->type->ptr_level > 0))) return true; return false; } void read_expr(block_t *parent, basic_block_t **bb) { var_t *vd, *rs1, *rs2; opcode_t oper_stack[10]; int oper_stack_idx = 0; /* These variables used for parsing logical-and/or operation. * * For the logical-and operation, the false condition code path for testing * each operand uses the same code snippet (basic block). * * Likewise, when testing each operand for the logical-or operation, all of * them share a unified code path for the true condition. */ bool has_prev_log_op = false; opcode_t prev_log_op = 0, pprev_log_op = 0; basic_block_t *log_and_shared_bb = bb_create(parent), *log_or_shared_bb = bb_create(parent); read_expr_operand(parent, bb); opcode_t op = get_operator(); if (op == OP_generic || op == OP_ternary) return; if (is_logical(op)) { bb_connect(*bb, op == OP_log_and ? log_and_shared_bb : log_or_shared_bb, op == OP_log_and ? ELSE : THEN); read_logical(op, parent, bb); has_prev_log_op = true; prev_log_op = op; } else oper_stack[oper_stack_idx++] = op; read_expr_operand(parent, bb); op = get_operator(); while (op != OP_generic && op != OP_ternary) { if (oper_stack_idx > 0) { int same = 0; do { opcode_t top_op = oper_stack[oper_stack_idx - 1]; if (get_operator_prio(top_op) >= get_operator_prio(op)) { rs2 = opstack_pop(); rs1 = opstack_pop(); /* Handle pointer arithmetic for addition and subtraction */ if (is_pointer_operation(top_op, rs1, rs2)) { /* handle_pointer_arithmetic handles both pointer * differences and regular pointer arithmetic internally */ handle_pointer_arithmetic(parent, bb, top_op, rs1, rs2); oper_stack_idx--; continue; } vd = require_var(parent); gen_name_to(vd->var_name); opstack_push(vd); add_insn(parent, *bb, top_op, vd, rs1, rs2, 0, NULL); oper_stack_idx--; } else same = 1; } while (oper_stack_idx > 0 && same == 0); } if (is_logical(op)) { if (prev_log_op == 0 || prev_log_op == op) { bb_connect( *bb, op == OP_log_and ? log_and_shared_bb : log_or_shared_bb, op == OP_log_and ? ELSE : THEN); read_logical(op, parent, bb); prev_log_op = op; has_prev_log_op = true; } else if (prev_log_op == OP_log_and) { /* For example: a && b || c * previous opcode: prev_log_op == OP_log_and * current opcode: op == OP_log_or * current operand: b * * Finalize the logical-and operation and test the operand for * the following logical-or operation. */ finalize_logical(prev_log_op, parent, bb, log_and_shared_bb); log_and_shared_bb = bb_create(parent); bb_connect(*bb, log_or_shared_bb, THEN); read_logical(op, parent, bb); /* Here are two cases to illustrate the following assignments * after finalizing the logical-and operation and testing the * operand for the following logical-or operation. * * 1. a && b || c * pprev opcode: pprev_log_op == 0 (no opcode) * previous opcode: prev_log_op == OP_log_and * current opcode: op == OP_log_or * current operand: b * * The current opcode should become the previous opcode, * and the pprev opcode remains 0. * * 2. a || b && c || d * pprev opcode: pprev_log_op == OP_log_or * previous opcode: prev_log_op == OP_log_and * current opcode: op == OP_log_or * current operand: b * * The previous opcode should inherit the pprev opcode, which * is equivalent to inheriting the current opcode because both * of pprev opcode and current opcode are logical-or operator. * * Thus, pprev opcode is considered used and is cleared to 0. * * Eventually, the current opcode becomes the previous opcode * and pprev opcode is set to 0. * */ prev_log_op = op; pprev_log_op = 0; } else { /* For example: a || b && c * previous opcode: prev_log_op == OP_log_or * current opcode: op == OP_log_and * current operand: b * * Using the logical-and operation to test the current operand * instead of using the logical-or operation. * * Then, the previous opcode becomes pprev opcode and the * current opcode becomes the previous opcode. */ bb_connect(*bb, log_and_shared_bb, ELSE); read_logical(op, parent, bb); pprev_log_op = prev_log_op; prev_log_op = op; } } else { while (has_prev_log_op && (get_operator_prio(op) < get_operator_prio(prev_log_op))) { /* When encountering an operator with lower priority, conclude * the current logical-and/or and create a new basic block for * next logical-and/or operator. */ finalize_logical(prev_log_op, parent, bb, prev_log_op == OP_log_and ? log_and_shared_bb : log_or_shared_bb); if (prev_log_op == OP_log_and) log_and_shared_bb = bb_create(parent); else log_or_shared_bb = bb_create(parent); /* After finalizing the previous logical-and/or operation, the * prev_log_op should inherit pprev_log_op and continue to check * whether to finalize a logical-and/or operation. */ prev_log_op = pprev_log_op; has_prev_log_op = prev_log_op != 0; pprev_log_op = 0; } } read_expr_operand(parent, bb); if (!is_logical(op)) oper_stack[oper_stack_idx++] = op; op = get_operator(); } while (oper_stack_idx > 0) { opcode_t top_op = oper_stack[--oper_stack_idx]; rs2 = opstack_pop(); rs1 = opstack_pop(); bool rs1_is_placeholder = is_array_literal_placeholder(rs1); bool rs2_is_placeholder = is_array_literal_placeholder(rs2); bool rs1_is_ptr_like = is_pointer_like_value(rs1); bool rs2_is_ptr_like = is_pointer_like_value(rs2); bool pointer_context = (rs1_is_ptr_like && !rs1_is_placeholder) || (rs2_is_ptr_like && !rs2_is_placeholder); /* Pointer arithmetic handling */ if (pointer_context && is_pointer_operation(top_op, rs1, rs2)) { handle_pointer_arithmetic(parent, bb, top_op, rs1, rs2); continue; /* skip normal processing */ } if (rs1_is_placeholder && rs2_is_placeholder) { rs1 = scalarize_array_literal(parent, bb, rs1, NULL); rs2 = scalarize_array_literal(parent, bb, rs2, NULL); } else { if (rs1_is_placeholder && !rs2_is_ptr_like) rs1 = scalarize_array_literal( parent, bb, rs1, rs2 && rs2->type ? rs2->type : NULL); if (rs2_is_placeholder && !rs1_is_ptr_like) rs2 = scalarize_array_literal( parent, bb, rs2, rs1 && rs1->type ? rs1->type : NULL); } /* Constant folding for binary operations */ if (rs1 && rs2 && rs1->init_val && !rs1->ptr_level && !rs1->is_global && rs2->init_val && !rs2->ptr_level && !rs2->is_global) { /* Both operands are compile-time constants */ int result = 0; bool folded = true; switch (top_op) { case OP_add: result = rs1->init_val + rs2->init_val; break; case OP_sub: result = rs1->init_val - rs2->init_val; break; case OP_mul: result = rs1->init_val * rs2->init_val; break; case OP_div: if (rs2->init_val != 0) result = rs1->init_val / rs2->init_val; else folded = false; /* Division by zero */ break; case OP_mod: if (rs2->init_val != 0) result = rs1->init_val % rs2->init_val; else folded = false; /* Modulo by zero */ break; case OP_bit_and: result = rs1->init_val & rs2->init_val; break; case OP_bit_or: result = rs1->init_val | rs2->init_val; break; case OP_bit_xor: result = rs1->init_val ^ rs2->init_val; break; case OP_lshift: result = rs1->init_val << rs2->init_val; break; case OP_rshift: result = rs1->init_val >> rs2->init_val; break; case OP_eq: result = rs1->init_val == rs2->init_val; break; case OP_neq: result = rs1->init_val != rs2->init_val; break; case OP_lt: result = rs1->init_val < rs2->init_val; break; case OP_leq: result = rs1->init_val <= rs2->init_val; break; case OP_gt: result = rs1->init_val > rs2->init_val; break; case OP_geq: result = rs1->init_val >= rs2->init_val; break; default: folded = false; break; } if (folded) { /* Create constant result */ vd = require_var(parent); gen_name_to(vd->var_name); vd->init_val = result; opstack_push(vd); add_insn(parent, *bb, OP_load_constant, vd, NULL, NULL, 0, NULL); } else { /* Normal operation - folding failed or not supported */ vd = require_var(parent); gen_name_to(vd->var_name); opstack_push(vd); add_insn(parent, *bb, top_op, vd, rs1, rs2, 0, NULL); } } else { /* Normal operation */ vd = require_var(parent); gen_name_to(vd->var_name); opstack_push(vd); add_insn(parent, *bb, top_op, vd, rs1, rs2, 0, NULL); } } while (has_prev_log_op) { finalize_logical( prev_log_op, parent, bb, prev_log_op == OP_log_and ? log_and_shared_bb : log_or_shared_bb); prev_log_op = pprev_log_op; has_prev_log_op = prev_log_op != 0; pprev_log_op = 0; } } /* Return the address that an expression points to, or evaluate its value. * x =; * x[] =; * x[expr].field =; * x[expr]->field =; */ void read_lvalue(lvalue_t *lvalue, var_t *var, block_t *parent, basic_block_t **bb, bool eval, opcode_t prefix_op) { var_t *vd, *rs1, *rs2; bool is_address_got = false; bool is_member = false; /* already peeked and have the variable */ lex_expect(T_identifier); lvalue->type = var->type; lvalue->size = get_size(var); lvalue->ptr_level = var->ptr_level; lvalue->is_func = var->is_func; lvalue->is_reference = false; opstack_push(var); if (lex_peek(T_open_square, NULL) || lex_peek(T_arrow, NULL) || lex_peek(T_dot, NULL)) lvalue->is_reference = true; while (lex_peek(T_open_square, NULL) || lex_peek(T_arrow, NULL) || lex_peek(T_dot, NULL)) { if (lex_accept(T_open_square)) { /* if subscripted member's is not yet resolved, dereference to * resolve base address. * e.g., dereference of "->" in "data->raw[0]" would be performed * here. */ if (lvalue->is_reference && lvalue->ptr_level && is_member) { rs1 = opstack_pop(); vd = require_var(parent); gen_name_to(vd->var_name); opstack_push(vd); add_insn(parent, *bb, OP_read, vd, rs1, NULL, 4, NULL); } /* var must be either a pointer or an array of some type */ /* For typedef pointers, check the type's ptr_level */ bool is_typedef_pointer = (var->type && var->type->ptr_level > 0); if (var->ptr_level == 0 && var->array_size == 0 && !is_typedef_pointer) error_at("Cannot apply square operator to non-pointer", cur_token_loc()); /* if nested pointer, still pointer */ /* Also handle typedef pointers which have ptr_level == 0 */ if ((var->ptr_level <= 1 || is_typedef_pointer) && var->array_size == 0) { /* For typedef pointers, get the size of the base type that the * pointer points to */ if (lvalue->type->ptr_level > 0) { /* This is a typedef pointer, get base type size */ switch (lvalue->type->base_type) { case TYPE_char: lvalue->size = TY_char->size; break; case TYPE_short: lvalue->size = TY_short->size; break; case TYPE_int: lvalue->size = TY_int->size; break; case TYPE_void: /* void pointers treated as byte pointers */ lvalue->size = 1; break; default: lvalue->size = lvalue->type->size; break; } } else { lvalue->size = lvalue->type->size; } } read_expr(parent, bb); /* multiply by element size */ /* For 2D arrays, check if this is the first or second dimension */ int multiplier = lvalue->size; /* If this is the first index of a 2D array, multiply by dim2 * * element_size */ if (!is_address_got && var->array_dim2 > 0) multiplier = var->array_dim2 * lvalue->size; if (multiplier != 1) { vd = require_var(parent); vd->init_val = multiplier; gen_name_to(vd->var_name); opstack_push(vd); add_insn(parent, *bb, OP_load_constant, vd, NULL, NULL, 0, NULL); rs2 = opstack_pop(); rs1 = opstack_pop(); vd = require_var(parent); gen_name_to(vd->var_name); opstack_push(vd); add_insn(parent, *bb, OP_mul, vd, rs1, rs2, 0, NULL); } rs2 = opstack_pop(); rs1 = opstack_pop(); vd = require_var(parent); gen_name_to(vd->var_name); opstack_push(vd); add_insn(parent, *bb, OP_add, vd, rs1, rs2, 0, NULL); lex_expect(T_close_square); is_address_got = true; is_member = true; lvalue->is_reference = true; } else { char token[MAX_ID_LEN]; if (lex_accept(T_arrow)) { /* resolve where the pointer points at from the calculated * address in a structure. */ if (is_member) { rs1 = opstack_pop(); vd = require_var(parent); gen_name_to(vd->var_name); opstack_push(vd); add_insn(parent, *bb, OP_read, vd, rs1, NULL, 4, NULL); } } else { lex_expect(T_dot); if (!is_address_got) { rs1 = opstack_pop(); vd = require_var(parent); gen_name_to(vd->var_name); opstack_push(vd); add_insn(parent, *bb, OP_address_of, vd, rs1, NULL, 0, NULL); is_address_got = true; } } lex_ident(T_identifier, token); /* change type currently pointed to */ var = find_member(token, lvalue->type); lvalue->type = var->type; lvalue->ptr_level = var->ptr_level; lvalue->is_func = var->is_func; lvalue->size = get_size(var); /* if it is an array, get the address of first element instead of * its value. */ if (var->array_size > 0) lvalue->is_reference = false; /* move pointer to offset of structure */ vd = require_var(parent); gen_name_to(vd->var_name); vd->init_val = var->offset; opstack_push(vd); add_insn(parent, *bb, OP_load_constant, vd, NULL, NULL, 0, NULL); rs2 = opstack_pop(); rs1 = opstack_pop(); vd = require_var(parent); gen_name_to(vd->var_name); opstack_push(vd); add_insn(parent, *bb, OP_add, vd, rs1, rs2, 0, NULL); is_address_got = true; is_member = true; } } if (!eval) return; /* Only handle pointer arithmetic if we have a pointer/array that hasn't * been dereferenced. After array indexing like arr[0], we have a value, not * a pointer. */ if (lex_peek(T_plus, NULL) && (var->ptr_level || var->array_size) && !lvalue->is_reference) { while (lex_peek(T_plus, NULL) && (var->ptr_level || var->array_size)) { lex_expect(T_plus); if (lvalue->is_reference) { rs1 = opstack_pop(); vd = require_var(parent); gen_name_to(vd->var_name); opstack_push(vd); add_insn(parent, *bb, OP_read, vd, rs1, NULL, lvalue->size, NULL); } read_expr_operand(parent, bb); lvalue->size = lvalue->type->size; if (lvalue->size > 1) { vd = require_var(parent); gen_name_to(vd->var_name); vd->init_val = lvalue->size; opstack_push(vd); add_insn(parent, *bb, OP_load_constant, vd, NULL, NULL, 0, NULL); rs2 = opstack_pop(); rs1 = opstack_pop(); vd = require_var(parent); gen_name_to(vd->var_name); opstack_push(vd); add_insn(parent, *bb, OP_mul, vd, rs1, rs2, 0, NULL); } rs2 = opstack_pop(); rs1 = opstack_pop(); vd = require_var(parent); gen_name_to(vd->var_name); opstack_push(vd); add_insn(parent, *bb, OP_add, vd, rs1, rs2, 0, NULL); } } else { var_t *t; /* If operand is a reference, read the value and push to stack for the * incoming addition/subtraction. Otherwise, use the top element of * stack as the one of operands and the destination. */ if (lvalue->is_reference) { rs1 = operand_stack[operand_stack_idx - 1]; t = require_var(parent); gen_name_to(t->var_name); opstack_push(t); add_insn(parent, *bb, OP_read, t, rs1, NULL, lvalue->size, NULL); } if (prefix_op != OP_generic) { vd = require_var(parent); gen_name_to(vd->var_name); /* For pointer arithmetic, increment by the size of pointed-to type */ if (lvalue->ptr_level) vd->init_val = lvalue->type->size; else vd->init_val = 1; opstack_push(vd); add_insn(parent, *bb, OP_load_constant, vd, NULL, NULL, 0, NULL); rs2 = opstack_pop(); if (lvalue->is_reference) rs1 = opstack_pop(); else rs1 = operand_stack[operand_stack_idx - 1]; vd = require_var(parent); gen_name_to(vd->var_name); add_insn(parent, *bb, prefix_op, vd, rs1, rs2, 0, NULL); if (lvalue->is_reference) { rs1 = vd; vd = opstack_pop(); /* The column of arguments of the new insn of 'OP_write' is * different from 'ph1_ir' */ add_insn(parent, *bb, OP_write, NULL, vd, rs1, lvalue->size, NULL); /* Push the new value onto the operand stack */ opstack_push(rs1); } else { rs1 = vd; vd = operand_stack[operand_stack_idx - 1]; add_insn(parent, *bb, OP_assign, vd, rs1, NULL, 0, NULL); } } else if (lex_peek(T_increment, NULL) || lex_peek(T_decrement, NULL)) { side_effect[se_idx].opcode = OP_load_constant; vd = require_var(parent); gen_name_to(vd->var_name); /* Calculate increment size based on pointer type */ int increment_size = 1; if (lvalue->ptr_level && !lvalue->is_reference) { increment_size = lvalue->type->size; } else if (!lvalue->is_reference && lvalue->type && lvalue->type->ptr_level > 0) { /* This is a typedef pointer */ switch (lvalue->type->base_type) { case TYPE_char: increment_size = TY_char->size; break; case TYPE_short: increment_size = TY_short->size; break; case TYPE_int: increment_size = TY_int->size; break; case TYPE_void: increment_size = 1; break; default: increment_size = lvalue->type->size; break; } } vd->init_val = increment_size; side_effect[se_idx].rd = vd; side_effect[se_idx].rs1 = NULL; side_effect[se_idx].rs2 = NULL; se_idx++; side_effect[se_idx].opcode = lex_accept(T_increment) ? OP_add : OP_sub; side_effect[se_idx].rs2 = vd; if (lvalue->is_reference) side_effect[se_idx].rs1 = opstack_pop(); else side_effect[se_idx].rs1 = operand_stack[operand_stack_idx - 1]; vd = require_var(parent); gen_name_to(vd->var_name); side_effect[se_idx].rd = vd; se_idx++; if (lvalue->is_reference) { side_effect[se_idx].opcode = OP_write; side_effect[se_idx].rs2 = vd; side_effect[se_idx].rs1 = opstack_pop(); side_effect[se_idx].sz = lvalue->size; side_effect[se_idx].rd = NULL; opstack_push(t); se_idx++; } else { side_effect[se_idx].opcode = OP_assign; side_effect[se_idx].rs1 = vd; side_effect[se_idx].rd = operand_stack[operand_stack_idx - 1]; side_effect[se_idx].rs2 = NULL; se_idx++; } } else { if (lvalue->is_reference) { /* pop the address and keep the read value */ t = opstack_pop(); opstack_pop(); opstack_push(t); } } } } void read_logical(opcode_t op, block_t *parent, basic_block_t **bb) { var_t *vd; if (op != OP_log_and && op != OP_log_or) error_at("encounter an invalid logical opcode in read_logical()", cur_token_loc()); /* Test the operand before the logical-and/or operator */ vd = opstack_pop(); add_insn(parent, *bb, OP_branch, NULL, vd, NULL, 0, NULL); /* Create a proper branch label for the operand of the logical-and/or * operation. */ basic_block_t *new_bb = bb_create(parent); bb_connect(*bb, new_bb, op == OP_log_and ? THEN : ELSE); bb[0] = new_bb; } void finalize_logical(opcode_t op, block_t *parent, basic_block_t **bb, basic_block_t *shared_bb) { basic_block_t *then, *then_next, *else_if, *else_bb; basic_block_t *end = bb_create(parent); var_t *vd, *log_op_res; if (op == OP_log_and) { /* For example: a && b * * If handling the expression, the basic blocks will * connect to each other as the following illustration: * * bb1 bb2 bb3 * +-----------+ +-----------+ +---------+ * | teq a, #0 | True | teq b, #0 | True | ldr 1 | * | bne bb2 | ----> | bne bb3 | ----> | b bb5 | * | b bb4 | | b bb4 | +---------+ * +-----------+ +-----------+ | * | | | * | False | False | * | | | * | +---------+ +--------+ * -------------> | ldr 0 | ------> | | * | b bb5 | | | * +---------+ +--------+ * bb4 bb5 * * In this case, finalize_logical() should add some * instructions to bb2 ~ bb5 and properly connect them * to each other. * * Notice that * - bb1 has been handled by read_logical(). * - bb2 is equivalent to '*bb'. * - bb3 needs to be created. * - bb4 is 'shared_bb'. * - bb5 needs to be created. * * Thus, here uses 'then', 'then_next', 'else_bb' and * 'end' to respectively point to bb2 ~ bb5. Subsequently, * perform the mentioned operations for finalizing. * */ then = *bb; then_next = bb_create(parent); else_bb = shared_bb; bb_connect(then, then_next, THEN); bb_connect(then, else_bb, ELSE); bb_connect(then_next, end, NEXT); } else if (op == OP_log_or) { /* For example: a || b * * Similar to handling logical-and operations, it should * add some instructions to the basic blocks and connect * them to each other for logical-or operations as in * the figure: * * bb1 bb2 bb3 * +-----------+ +-----------+ +---------+ * | teq a, #0 | False | teq b, #0 | False | ldr 0 | * | bne bb4 | ----> | bne bb4 | ----> | b bb5 | * | b bb2 | | b bb3 | +---------+ * +-----------+ +-----------+ | * | | | * | True | True | * | | | * | +---------+ +--------+ * -------------> | ldr 1 | ------> | | * | b bb5 | | | * +---------+ +--------+ * bb4 bb5 * * Similarly, here uses 'else_if', 'else_bb', 'then' and * 'end' to respectively point to bb2 ~ bb5, and then * finishes the finalization. * */ then = shared_bb; else_if = *bb; else_bb = bb_create(parent); bb_connect(else_if, then, THEN); bb_connect(else_if, else_bb, ELSE); bb_connect(then, end, NEXT); } else error_at("encounter an invalid logical opcode in finalize_logical()", cur_token_loc()); bb_connect(else_bb, end, NEXT); /* Create the branch instruction for final logical-and/or operand */ vd = opstack_pop(); add_insn(parent, op == OP_log_and ? then : else_if, OP_branch, NULL, vd, NULL, 0, NULL); /* * If handling logical-and operation, here creates a true branch for the * logical-and operation and assigns a true value. * * Otherwise, create a false branch and assign a false value for logical-or * operation. * */ vd = require_var(parent); gen_name_to(vd->var_name); vd->init_val = op == OP_log_and; add_insn(parent, op == OP_log_and ? then_next : else_bb, OP_load_constant, vd, NULL, NULL, 0, NULL); log_op_res = require_var(parent); gen_name_to(log_op_res->var_name); add_insn(parent, op == OP_log_and ? then_next : else_bb, OP_assign, log_op_res, vd, NULL, 0, NULL); /* After assigning a value, go to the final basic block, this is done by BB * fallthrough. */ /* Create the shared branch and assign the other value for the other * condition of a logical-and/or operation. * * If handing a logical-and operation, assign a false value. else, assign * a true value for a logical-or operation. */ vd = require_var(parent); gen_name_to(vd->var_name); vd->init_val = op != OP_log_and; add_insn(parent, op == OP_log_and ? else_bb : then, OP_load_constant, vd, NULL, NULL, 0, NULL); add_insn(parent, op == OP_log_and ? else_bb : then, OP_assign, log_op_res, vd, NULL, 0, NULL); log_op_res->is_logical_ret = true; opstack_push(log_op_res); bb[0] = end; } void read_ternary_operation(block_t *parent, basic_block_t **bb) { var_t *vd; if (!lex_accept(T_question)) return; /* ternary-operator */ vd = opstack_pop(); add_insn(parent, *bb, OP_branch, NULL, vd, NULL, 0, NULL); basic_block_t *then_ = bb_create(parent); basic_block_t *else_ = bb_create(parent); basic_block_t *end_ternary = bb_create(parent); bb_connect(then_, end_ternary, NEXT); bb_connect(else_, end_ternary, NEXT); /* true branch */ read_expr(parent, &then_); bb_connect(*bb, then_, THEN); if (!lex_accept(T_colon)) { /* ternary operator in standard C needs three operands */ /* Note: Dangling basic block cleanup handled by arena allocator */ abort(); } var_t *true_val = opstack_pop(); /* false branch */ read_expr(parent, &else_); bb_connect(*bb, else_, ELSE); var_t *false_val = opstack_pop(); bool true_array = is_array_literal_placeholder(true_val); bool false_array = is_array_literal_placeholder(false_val); bool true_ptr_like = is_pointer_like_value(true_val); bool false_ptr_like = is_pointer_like_value(false_val); /* The ternary result must look like whichever side is pointer-like. If the * "true" expression is still a raw array literal but the "false" side is a * plain scalar, materialize the literal now so both branches produce * comparable scalar SSA values. */ true_val = scalarize_array_literal_if_needed( parent, &then_, true_val, false_val ? false_val->type : NULL, true_array && !false_ptr_like); /* Apply the same conversion symmetrically when only the false branch is a * literal array. This prevents OP_assign from trying to move array storage * into a scalar destination later in code generation. */ false_val = scalarize_array_literal_if_needed( parent, &else_, false_val, true_val ? true_val->type : NULL, false_array && !true_ptr_like); vd = require_var(parent); gen_name_to(vd->var_name); add_insn(parent, then_, OP_assign, vd, true_val, NULL, 0, NULL); add_insn(parent, else_, OP_assign, vd, false_val, NULL, 0, NULL); var_t *array_ref = NULL; if (is_array_literal_placeholder(true_val)) array_ref = true_val; else if (is_array_literal_placeholder(false_val)) array_ref = false_val; if (array_ref) { vd->array_size = array_ref->array_size; vd->init_val = array_ref->init_val; vd->type = array_ref->type; } vd->is_ternary_ret = true; opstack_push(vd); bb[0] = end_ternary; } bool read_body_assignment(char *token, block_t *parent, opcode_t prefix_op, basic_block_t **bb) { var_t *var = find_local_var(token, parent), *vd, *rs1, *rs2, *t; if (!var) var = find_global_var(token); if (var) { int one = 0; opcode_t op = OP_generic; lvalue_t lvalue; int size = 0; /* has memory address that we want to set */ read_lvalue(&lvalue, var, parent, bb, false, OP_generic); size = lvalue.size; if (lex_accept(T_increment)) { op = OP_add; one = 1; } else if (lex_accept(T_decrement)) { op = OP_sub; one = 1; } else if (lex_accept(T_pluseq)) { op = OP_add; } else if (lex_accept(T_minuseq)) { op = OP_sub; } else if (lex_accept(T_asteriskeq)) { op = OP_mul; } else if (lex_accept(T_divideeq)) { op = OP_div; } else if (lex_accept(T_modeq)) { op = OP_mod; } else if (lex_accept(T_lshifteq)) { op = OP_lshift; } else if (lex_accept(T_rshifteq)) { op = OP_rshift; } else if (lex_accept(T_xoreq)) { op = OP_bit_xor; } else if (lex_accept(T_oreq)) { op = OP_bit_or; } else if (lex_accept(T_andeq)) { op = OP_bit_and; } else if (lex_peek(T_open_bracket, NULL)) { /* dereference lvalue into function address */ rs1 = opstack_pop(); vd = require_var(parent); gen_name_to(vd->var_name); opstack_push(vd); add_insn(parent, *bb, OP_read, vd, rs1, NULL, PTR_SIZE, NULL); read_indirect_call(parent, bb); return true; } else if (prefix_op == OP_generic) { lex_expect(T_assign); } else { op = prefix_op; one = 1; } if (op != OP_generic) { int increment_size = 1; /* if we have a pointer, shift it by element size */ /* But not if we are operating on a dereferenced value (array * indexing) */ if (lvalue.ptr_level && !lvalue.is_reference) increment_size = lvalue.type->size; /* Also check for typedef pointers which have is_ptr == 0 */ else if (!lvalue.is_reference && lvalue.type && lvalue.type->ptr_level > 0) { /* This is a typedef pointer, get the base type size */ switch (lvalue.type->base_type) { case TYPE_char: increment_size = TY_char->size; break; case TYPE_short: increment_size = TY_short->size; break; case TYPE_int: increment_size = TY_int->size; break; case TYPE_void: /* void pointers treated as byte pointers */ increment_size = 1; break; default: /* For struct pointers and other types */ increment_size = lvalue.type->size; break; } } /* If operand is a reference, read the value and push to stack for * the incoming addition/subtraction. Otherwise, use the top element * of stack as the one of operands and the destination. */ if (one == 1) { if (lvalue.is_reference) { t = opstack_pop(); vd = require_var(parent); gen_name_to(vd->var_name); opstack_push(vd); add_insn(parent, *bb, OP_read, vd, t, NULL, lvalue.size, NULL); } else t = operand_stack[operand_stack_idx - 1]; vd = require_var(parent); gen_name_to(vd->var_name); vd->init_val = increment_size; add_insn(parent, *bb, OP_load_constant, vd, NULL, NULL, 0, NULL); rs2 = vd; rs1 = opstack_pop(); vd = require_var(parent); gen_name_to(vd->var_name); add_insn(parent, *bb, op, vd, rs1, rs2, 0, NULL); if (lvalue.is_reference) { add_insn(parent, *bb, OP_write, NULL, t, vd, size, NULL); } else { vd = resize_var(parent, bb, vd, t); add_insn(parent, *bb, OP_assign, t, vd, NULL, 0, NULL); } } else { if (lvalue.is_reference) { t = opstack_pop(); vd = require_var(parent); gen_name_to(vd->var_name); opstack_push(vd); add_insn(parent, *bb, OP_read, vd, t, NULL, lvalue.size, NULL); } else t = operand_stack[operand_stack_idx - 1]; read_expr(parent, bb); var_t *rhs_val = opstack_pop(); rhs_val = scalarize_array_literal_if_needed( parent, bb, rhs_val, lvalue.type, !lvalue.ptr_level && !lvalue.is_reference); opstack_push(rhs_val); vd = require_var(parent); vd->init_val = increment_size; gen_name_to(vd->var_name); opstack_push(vd); add_insn(parent, *bb, OP_load_constant, vd, NULL, NULL, 0, NULL); rs2 = opstack_pop(); rs1 = opstack_pop(); vd = require_var(parent); gen_name_to(vd->var_name); opstack_push(vd); add_insn(parent, *bb, OP_mul, vd, rs1, rs2, 0, NULL); rs2 = opstack_pop(); rs1 = opstack_pop(); vd = require_var(parent); gen_name_to(vd->var_name); add_insn(parent, *bb, op, vd, rs1, rs2, 0, NULL); if (lvalue.is_reference) { add_insn(parent, *bb, OP_write, NULL, t, vd, lvalue.size, NULL); } else { vd = resize_var(parent, bb, vd, t); add_insn(parent, *bb, OP_assign, t, vd, NULL, 0, NULL); } } } else { read_expr(parent, bb); read_ternary_operation(parent, bb); if (lvalue.is_func) { rs2 = opstack_pop(); rs1 = opstack_pop(); add_insn(parent, *bb, OP_write, NULL, rs1, rs2, PTR_SIZE, NULL); } else if (lvalue.is_reference) { rs2 = opstack_pop(); rs1 = opstack_pop(); add_insn(parent, *bb, OP_write, NULL, rs1, rs2, size, NULL); } else { rs1 = opstack_pop(); vd = opstack_pop(); rs1 = resize_var(parent, bb, rs1, vd); add_insn(parent, *bb, OP_assign, vd, rs1, NULL, 0, NULL); } } return true; } return false; } int read_primary_constant(void) { /* return signed constant */ int isneg = 0, res; char buffer[MAX_TOKEN_LEN]; if (lex_accept(T_minus)) isneg = 1; if (lex_accept(T_open_bracket)) { res = read_primary_constant(); lex_expect(T_close_bracket); } else if (lex_peek(T_numeric, buffer)) { res = parse_numeric_constant(buffer); lex_expect(T_numeric); } else if (lex_peek(T_char, buffer)) { char unescaped[MAX_TOKEN_LEN]; unescape_string(buffer, unescaped, MAX_TOKEN_LEN); res = unescaped[0]; lex_expect(T_char); } else error_at("Invalid value after assignment", next_token_loc()); if (isneg) return (-1) * res; return res; } int eval_expression_imm(opcode_t op, int op1, int op2) { /* return immediate result */ int tmp = op2; int res = 0; switch (op) { case OP_add: res = op1 + op2; break; case OP_sub: res = op1 - op2; break; case OP_mul: res = op1 * op2; break; case OP_div: res = op1 / op2; break; case OP_mod: /* Use bitwise AND for modulo optimization when divisor is power of 2 */ tmp &= (tmp - 1); if ((op2 != 0) && (tmp == 0)) { res = op1; res &= (op2 - 1); } else res = op1 % op2; break; case OP_lshift: res = op1 << op2; break; case OP_rshift: res = op1 >> op2; break; case OP_log_and: res = op1 && op2; break; case OP_log_or: res = op1 || op2; break; case OP_eq: res = op1 == op2; break; case OP_neq: res = op1 != op2; break; case OP_lt: res = op1 < op2; break; case OP_gt: res = op1 > op2; break; case OP_leq: res = op1 <= op2; break; case OP_geq: res = op1 >= op2; break; default: error_at("The requested operation is not supported.", cur_token_loc()); } return res; } bool read_global_assignment(char *token); void eval_ternary_imm(int cond, char *token) { if (cond == 0) { while (!lex_peek(T_colon, NULL)) { lex_next(); } lex_accept(T_colon); read_global_assignment(token); } else { read_global_assignment(token); lex_expect(T_colon); while (!lex_peek(T_semicolon, NULL)) { lex_next(); } } } bool read_global_assignment(char *token) { var_t *vd, *rs1, *var; block_t *parent = GLOBAL_BLOCK; basic_block_t *bb = GLOBAL_FUNC->bbs; /* global initialization must be constant */ var = find_global_var(token); if (var) { if (lex_peek(T_string, NULL)) { /* String literal global initialization: * String literals are now stored in .rodata section. * TODO: Implement compile-time address resolution for global * pointer initialization with rodata addresses * (e.g., char *p = "str";) */ read_literal_param(parent, bb); rs1 = opstack_pop(); vd = var; add_insn(parent, bb, OP_assign, vd, rs1, NULL, 0, NULL); return true; } opcode_t op_stack[10]; opcode_t op, next_op; int val_stack[10]; int op_stack_index = 0, val_stack_index = 0; int operand1, operand2; operand1 = read_primary_constant(); op = get_operator(); /* only one value after assignment */ if (op == OP_generic) { vd = require_var(parent); gen_name_to(vd->var_name); vd->init_val = operand1; add_insn(parent, bb, OP_load_constant, vd, NULL, NULL, 0, NULL); rs1 = vd; vd = opstack_pop(); add_insn(parent, bb, OP_assign, vd, rs1, NULL, 0, NULL); return true; } if (op == OP_ternary) { lex_expect(T_question); eval_ternary_imm(operand1, token); return true; } operand2 = read_primary_constant(); next_op = get_operator(); if (next_op == OP_generic) { /* only two operands, apply and return */ vd = require_var(parent); gen_name_to(vd->var_name); vd->init_val = eval_expression_imm(op, operand1, operand2); add_insn(parent, bb, OP_load_constant, vd, NULL, NULL, 0, NULL); rs1 = vd; vd = opstack_pop(); add_insn(parent, bb, OP_assign, vd, rs1, NULL, 0, NULL); return true; } if (op == OP_ternary) { lex_expect(T_question); int cond = eval_expression_imm(op, operand1, operand2); eval_ternary_imm(cond, token); return true; } /* using stack if operands more than two */ op_stack[op_stack_index++] = op; op = next_op; val_stack[val_stack_index++] = operand1; val_stack[val_stack_index++] = operand2; while (op != OP_generic && op != OP_ternary) { if (op_stack_index > 0) { /* we have a continuation, use stack */ int same_op = 0; do { opcode_t stack_op = op_stack[op_stack_index - 1]; if (get_operator_prio(stack_op) >= get_operator_prio(op)) { operand1 = val_stack[val_stack_index - 2]; operand2 = val_stack[val_stack_index - 1]; val_stack_index -= 2; /* apply stack operator and push result back */ val_stack[val_stack_index++] = eval_expression_imm(stack_op, operand1, operand2); /* pop op stack */ op_stack_index--; } else { same_op = 1; } /* continue util next operation is higher prio */ } while (op_stack_index > 0 && same_op == 0); } /* push next operand on stack */ val_stack[val_stack_index++] = read_primary_constant(); /* push operator on stack */ op_stack[op_stack_index++] = op; op = get_operator(); } /* unwind stack and apply operations */ while (op_stack_index > 0) { opcode_t stack_op = op_stack[op_stack_index - 1]; /* pop stack and apply operators */ operand1 = val_stack[val_stack_index - 2]; operand2 = val_stack[val_stack_index - 1]; val_stack_index -= 2; /* apply stack operator and push value back on stack */ val_stack[val_stack_index++] = eval_expression_imm(stack_op, operand1, operand2); if (op_stack_index == 1) { if (op == OP_ternary) { lex_expect(T_question); eval_ternary_imm(val_stack[0], token); } else { vd = require_var(parent); gen_name_to(vd->var_name); vd->init_val = val_stack[0]; add_insn(parent, bb, OP_load_constant, vd, NULL, NULL, 0, NULL); rs1 = vd; vd = opstack_pop(); add_insn(parent, bb, OP_assign, vd, rs1, NULL, 0, NULL); } return true; } /* pop op stack */ op_stack_index--; } if (op == OP_ternary) { lex_expect(T_question); eval_ternary_imm(val_stack[0], token); } else { vd = require_var(parent); gen_name_to(vd->var_name); vd->init_val = val_stack[0]; add_insn(parent, GLOBAL_FUNC->bbs, OP_load_constant, vd, NULL, NULL, 0, NULL); rs1 = vd; vd = opstack_pop(); add_insn(parent, GLOBAL_FUNC->bbs, OP_assign, vd, rs1, NULL, 0, NULL); } return true; } return false; } void perform_side_effect(block_t *parent, basic_block_t *bb) { for (int i = 0; i < se_idx; i++) { insn_t *insn = &side_effect[i]; add_insn(parent, bb, insn->opcode, insn->rd, insn->rs1, insn->rs2, insn->sz, insn->str); } se_idx = 0; } basic_block_t *read_code_block(func_t *func, block_t *parent, basic_block_t *bb); basic_block_t *read_body_statement(block_t *parent, basic_block_t *bb) { char token[MAX_ID_LEN]; func_t *func; type_t *type; var_t *vd, *rs1, *rs2, *var; opcode_t prefix_op = OP_generic; bool is_const = false; if (!bb) printf("Warning: unreachable code detected\n"); /* statement can be: * function call, variable declaration, assignment operation, * keyword, block */ if (lex_peek(T_open_curly, NULL)) return read_code_block(parent->func, parent, bb); if (lex_accept(T_return)) { return handle_return_statement(parent, bb); } if (lex_accept(T_if)) { return handle_if_statement(parent, bb); } if (lex_accept(T_while)) { return handle_while_statement(parent, bb); } if (lex_accept(T_switch)) { bool is_default = false; basic_block_t *n = bb_create(parent); bb_connect(bb, n, NEXT); bb = n; lex_expect(T_open_bracket); read_expr(parent, &bb); lex_expect(T_close_bracket); /* create exit jump for breaks */ basic_block_t *switch_end = bb_create(parent); break_bb[break_exit_idx++] = switch_end; basic_block_t *true_body_ = bb_create(parent); lex_expect(T_open_curly); while (lex_peek(T_default, NULL) || lex_peek(T_case, NULL)) { if (lex_accept(T_default)) is_default = true; else { int case_val; lex_accept(T_case); if (lex_peek(T_numeric, token)) { case_val = parse_numeric_constant(token); lex_expect(T_numeric); } else if (lex_peek(T_char, token)) { char unescaped[MAX_TOKEN_LEN]; unescape_string(token, unescaped, MAX_TOKEN_LEN); case_val = unescaped[0]; lex_expect(T_char); } else if (lex_peek(T_identifier, token)) { constant_t *cd = find_constant(token); case_val = cd->value; lex_expect(T_identifier); } else { fatal("Not a valid case value"); } vd = require_var(parent); gen_name_to(vd->var_name); vd->init_val = case_val; opstack_push(vd); add_insn(parent, bb, OP_load_constant, vd, NULL, NULL, 0, NULL); vd = require_var(parent); gen_name_to(vd->var_name); rs1 = opstack_pop(); rs2 = operand_stack[operand_stack_idx - 1]; add_insn(parent, bb, OP_eq, vd, rs1, rs2, 0, NULL); add_insn(parent, bb, OP_branch, NULL, vd, NULL, 0, NULL); } lex_expect(T_colon); if (is_default) /* there's no condition if it is a default label */ bb_connect(bb, true_body_, NEXT); else bb_connect(bb, true_body_, THEN); int control = 0; while (!lex_peek(T_case, NULL) && !lex_peek(T_close_curly, NULL) && !lex_peek(T_default, NULL)) { true_body_ = read_body_statement(parent, true_body_); control = 1; } if (control && true_body_) { /* Create a new body block for next case, and connect the last * body block which lacks 'break' to it to make that one ignore * the upcoming cases. */ n = bb_create(parent); bb_connect(true_body_, n, NEXT); true_body_ = n; } if (!lex_peek(T_close_curly, NULL)) { if (is_default) error_at("Label default should be the last one", next_token_loc()); /* create a new conditional block for next case */ n = bb_create(parent); bb_connect(bb, n, ELSE); bb = n; /* create a new body block for next case if the last body block * exits 'switch'. */ if (!true_body_) true_body_ = bb_create(parent); } else if (!is_default) { /* handle missing default label */ bb_connect(bb, switch_end, ELSE); } } /* remove the expression in switch() */ opstack_pop(); lex_expect(T_close_curly); if (true_body_) /* if the last label has no explicit break, connect it to the end */ bb_connect(true_body_, switch_end, NEXT); break_exit_idx--; int dangling = 1; for (int i = 0; i < MAX_BB_PRED; i++) if (switch_end->prev[i].bb) dangling = 0; if (dangling) return NULL; return switch_end; } if (lex_accept(T_break)) { bb_connect(bb, break_bb[break_exit_idx - 1], NEXT); lex_expect(T_semicolon); return NULL; } if (lex_accept(T_continue)) { bb_connect(bb, continue_bb[continue_pos_idx - 1], NEXT); lex_expect(T_semicolon); return NULL; } if (lex_accept(T_for)) { lex_expect(T_open_bracket); /* synthesize for loop block */ block_t *blk = add_block(parent, parent->func); /* setup - execute once */ basic_block_t *setup = bb_create(blk); bb_connect(bb, setup, NEXT); if (!lex_accept(T_semicolon)) { if (!lex_peek(T_identifier, token)) error_at("Unexpected token when parsing for loop", next_token_loc()); int find_type_flag = lex_accept(T_struct) ? 2 : 1; if (find_type_flag == 1 && lex_accept(T_union)) { find_type_flag = 2; } type = find_type(token, find_type_flag); if (type) { var = require_typed_var(blk, type); read_full_var_decl(var, false, false); add_insn(blk, setup, OP_allocat, var, NULL, NULL, 0, NULL); add_symbol(setup, var); if (lex_accept(T_assign)) { read_expr(blk, &setup); read_ternary_operation(blk, &setup); rs1 = resize_var(parent, &bb, opstack_pop(), var); add_insn(blk, setup, OP_assign, var, rs1, NULL, 0, NULL); } while (lex_accept(T_comma)) { var_t *nv; /* add sequence point at T_comma */ perform_side_effect(blk, setup); /* multiple (partial) declarations */ nv = require_typed_var(blk, type); read_partial_var_decl(nv, var); /* partial */ add_insn(blk, setup, OP_allocat, nv, NULL, NULL, 0, NULL); add_symbol(setup, nv); if (lex_accept(T_assign)) { read_expr(blk, &setup); rs1 = resize_var(parent, &bb, opstack_pop(), nv); add_insn(blk, setup, OP_assign, nv, rs1, NULL, 0, NULL); } } } else { read_body_assignment(token, blk, OP_generic, &setup); } lex_expect(T_semicolon); } basic_block_t *cond_ = bb_create(blk); basic_block_t *for_end = bb_create(parent); basic_block_t *cond_start = cond_; break_bb[break_exit_idx++] = for_end; bb_connect(setup, cond_, NEXT); /* condition - check before the loop */ if (!lex_accept(T_semicolon)) { read_expr(blk, &cond_); lex_expect(T_semicolon); } else { /* always true */ vd = require_var(blk); vd->init_val = 1; gen_name_to(vd->var_name); opstack_push(vd); add_insn(blk, cond_, OP_load_constant, vd, NULL, NULL, 0, NULL); } bb_connect(cond_, for_end, ELSE); vd = opstack_pop(); add_insn(blk, cond_, OP_branch, NULL, vd, NULL, 0, NULL); basic_block_t *inc_ = bb_create(blk); continue_bb[continue_pos_idx++] = inc_; /* increment after each loop */ if (!lex_accept(T_close_bracket)) { if (lex_accept(T_increment)) prefix_op = OP_add; else if (lex_accept(T_decrement)) prefix_op = OP_sub; lex_peek(T_identifier, token); read_body_assignment(token, blk, prefix_op, &inc_); lex_expect(T_close_bracket); } /* loop body */ basic_block_t *body_ = bb_create(blk); bb_connect(cond_, body_, THEN); body_ = read_body_statement(blk, body_); if (body_) { bb_connect(body_, inc_, NEXT); bb_connect(inc_, cond_start, NEXT); } else if (inc_->insn_list.head) { bb_connect(inc_, cond_start, NEXT); } else { /* Empty increment block - cleanup handled by arena allocator */ } /* jump to increment */ continue_pos_idx--; break_exit_idx--; return for_end; } if (lex_accept(T_do)) { basic_block_t *n = bb_create(parent); bb_connect(bb, n, NEXT); bb = n; basic_block_t *cond_ = bb_create(parent); basic_block_t *do_while_end = bb_create(parent); continue_bb[continue_pos_idx++] = cond_; break_bb[break_exit_idx++] = do_while_end; basic_block_t *do_body = read_body_statement(parent, bb); if (do_body) bb_connect(do_body, cond_, NEXT); lex_expect(T_while); lex_expect(T_open_bracket); read_expr(parent, &cond_); lex_expect(T_close_bracket); vd = opstack_pop(); add_insn(parent, cond_, OP_branch, NULL, vd, NULL, 0, NULL); lex_expect(T_semicolon); for (int i = 0; i < MAX_BB_PRED; i++) { if (cond_->prev[i].bb) { bb_connect(cond_, bb, THEN); bb_connect(cond_, do_while_end, ELSE); break; } /* if breaking out of loop, skip condition block */ } continue_pos_idx--; break_exit_idx--; return do_while_end; } if (lex_accept(T_goto)) return handle_goto_statement(parent, bb); /* empty statement */ if (lex_accept(T_semicolon)) return bb; /* struct/union variable declaration */ if (lex_peek(T_struct, NULL) || lex_peek(T_union, NULL)) { int find_type_flag = lex_accept(T_struct) ? 2 : 1; if (find_type_flag == 1 && lex_accept(T_union)) { find_type_flag = 2; } lex_ident(T_identifier, token); type = find_type(token, find_type_flag); if (type) { var = require_typed_var(parent, type); var->is_const_qualified = is_const; read_partial_var_decl(var, NULL); add_insn(parent, bb, OP_allocat, var, NULL, NULL, 0, NULL); add_symbol(bb, var); if (lex_accept(T_assign)) { if (lex_peek(T_open_curly, NULL) && (var->array_size > 0 || var->ptr_level > 0)) { parse_array_init(var, parent, &bb, 1); /* Always emit code */ } else if (lex_peek(T_open_curly, NULL) && (var->type->base_type == TYPE_struct || var->type->base_type == TYPE_typedef)) { /* C90-compliant struct compound literal support */ type_t *struct_type = var->type; /* Handle typedef by getting actual struct type */ if (struct_type->base_type == TYPE_typedef && struct_type->base_struct) struct_type = struct_type->base_struct; lex_expect(T_open_curly); int field_idx = 0; if (!lex_peek(T_close_curly, NULL)) { for (;;) { /* Parse field value expression */ read_expr(parent, &bb); read_ternary_operation(parent, &bb); var_t *val = opstack_pop(); /* Initialize field if within bounds */ if (field_idx < struct_type->num_fields) { var_t *field = &struct_type->fields[field_idx]; /* Create target variable for field */ var_t target = {0}; target.type = field->type; target.ptr_level = field->ptr_level; var_t *field_val = resize_var(parent, &bb, val, &target); /* Compute field address: &struct + field_offset */ var_t *struct_addr = require_var(parent); gen_name_to(struct_addr->var_name); add_insn(parent, bb, OP_address_of, struct_addr, var, NULL, 0, NULL); var_t *field_addr = struct_addr; if (field->offset > 0) { var_t *offset = require_var(parent); gen_name_to(offset->var_name); offset->init_val = field->offset; add_insn(parent, bb, OP_load_constant, offset, NULL, NULL, 0, NULL); var_t *addr = require_var(parent); gen_name_to(addr->var_name); add_insn(parent, bb, OP_add, addr, struct_addr, offset, 0, NULL); field_addr = addr; } /* Write field value */ int field_size = size_var(field); add_insn(parent, bb, OP_write, NULL, field_addr, field_val, field_size, NULL); } field_idx++; if (!lex_accept(T_comma)) break; if (lex_peek(T_close_curly, NULL)) break; } } lex_expect(T_close_curly); } else { read_expr(parent, &bb); read_ternary_operation(parent, &bb); var_t *rhs = opstack_pop(); rhs = scalarize_array_literal_if_needed( parent, &bb, rhs, var->type, !var->ptr_level && var->array_size == 0); rs1 = resize_var(parent, &bb, rhs, var); add_insn(parent, bb, OP_assign, var, rs1, NULL, 0, NULL); } } while (lex_accept(T_comma)) { var_t *nv; /* add sequence point at T_comma */ perform_side_effect(parent, bb); /* multiple (partial) declarations */ nv = require_typed_var(parent, type); read_inner_var_decl(nv, false, false); add_insn(parent, bb, OP_allocat, nv, NULL, NULL, 0, NULL); add_symbol(bb, nv); if (lex_accept(T_assign)) { if (lex_peek(T_open_curly, NULL) && (nv->array_size > 0 || nv->ptr_level > 0)) { parse_array_init(nv, parent, &bb, true); } else if (lex_peek(T_open_curly, NULL) && (nv->type->base_type == TYPE_struct || nv->type->base_type == TYPE_typedef)) { /* C90-compliant struct compound literal support */ type_t *struct_type = nv->type; /* Handle typedef by getting actual struct type */ if (struct_type->base_type == TYPE_typedef && struct_type->base_struct) struct_type = struct_type->base_struct; lex_expect(T_open_curly); int field_idx = 0; if (!lex_peek(T_close_curly, NULL)) { for (;;) { /* Parse field value expression */ read_expr(parent, &bb); read_ternary_operation(parent, &bb); var_t *val = opstack_pop(); /* Initialize field if within bounds */ if (field_idx < struct_type->num_fields) { var_t *field = &struct_type->fields[field_idx]; /* Create target variable for field */ var_t target = {0}; target.type = field->type; target.ptr_level = field->ptr_level; var_t *field_val = resize_var(parent, &bb, val, &target); /* Compute field address: &struct + * field_offset */ var_t *struct_addr = require_var(parent); gen_name_to(struct_addr->var_name); add_insn(parent, bb, OP_address_of, struct_addr, nv, NULL, 0, NULL); var_t *field_addr = struct_addr; if (field->offset > 0) { var_t *offset = require_var(parent); gen_name_to(offset->var_name); offset->init_val = field->offset; add_insn(parent, bb, OP_load_constant, offset, NULL, NULL, 0, NULL); var_t *addr = require_var(parent); gen_name_to(addr->var_name); add_insn(parent, bb, OP_add, addr, struct_addr, offset, 0, NULL); field_addr = addr; } /* Write field value */ int field_size = size_var(field); add_insn(parent, bb, OP_write, NULL, field_addr, field_val, field_size, NULL); } field_idx++; if (!lex_accept(T_comma)) break; if (lex_peek(T_close_curly, NULL)) break; } } lex_expect(T_close_curly); } else { read_expr(parent, &bb); read_ternary_operation(parent, &bb); var_t *rhs = opstack_pop(); rhs = scalarize_array_literal_if_needed( parent, &bb, rhs, nv->type, !nv->ptr_level && nv->array_size == 0); rs1 = resize_var(parent, &bb, rhs, nv); add_insn(parent, bb, OP_assign, nv, rs1, NULL, 0, NULL); } } } lex_expect(T_semicolon); return bb; } error_at("Unknown struct/union type", next_token_loc()); } /* Handle const qualifier for local variable declarations */ if (lex_accept(T_const)) { is_const = true; /* After const, we expect a type */ if (!lex_peek(T_identifier, token)) error_at("Expected type after const", next_token_loc()); } /* statement with prefix */ if (!is_const && lex_accept(T_increment)) prefix_op = OP_add; else if (!is_const && lex_accept(T_decrement)) prefix_op = OP_sub; /* must be an identifier or asterisk (for pointer dereference) */ bool has_asterisk = lex_peek(T_asterisk, NULL); if (!is_const && !lex_peek(T_identifier, token) && !has_asterisk) error_at("Unexpected token", next_token_loc()); /* is it a variable declaration? */ /* Special handling when statement starts with asterisk */ if (has_asterisk) { /* For "*identifier", check if identifier is a type. * If not, it's a dereference, not a declaration. */ token_t *saved_token = cur_token; /* Skip the asterisk to peek at the identifier */ lex_accept(T_asterisk); char next_ident[MAX_TOKEN_LEN]; bool could_be_type = false; if (lex_peek(T_identifier, next_ident)) { /* Check if it's a type name */ type = find_type(next_ident, 0); if (type) could_be_type = true; } /* Restore position */ cur_token = saved_token; /* If it's not a type, skip the declaration block */ if (!could_be_type) type = NULL; } else { /* Normal type checking without asterisk */ int find_type_flag = lex_accept(T_struct) ? 2 : 1; if (find_type_flag == 1 && lex_accept(T_union)) find_type_flag = 2; type = find_type(token, find_type_flag); } if (type) { var = require_typed_var(parent, type); var->is_const_qualified = is_const; read_full_var_decl(var, false, false); add_insn(parent, bb, OP_allocat, var, NULL, NULL, 0, NULL); add_symbol(bb, var); if (lex_accept(T_assign)) { if (lex_peek(T_open_curly, NULL) && (var->array_size > 0 || var->ptr_level > 0)) { /* Emit code for locals in functions */ parse_array_init(var, parent, &bb, 1); } else if (lex_peek(T_open_curly, NULL) && (var->type->base_type == TYPE_struct || var->type->base_type == TYPE_typedef)) { /* C90-compliant struct compound literal support */ type_t *struct_type = var->type; /* Handle typedef by getting actual struct type */ if (struct_type->base_type == TYPE_typedef && struct_type->base_struct) struct_type = struct_type->base_struct; lex_expect(T_open_curly); int field_idx = 0; if (!lex_peek(T_close_curly, NULL)) { for (;;) { /* Parse field value expression */ read_expr(parent, &bb); read_ternary_operation(parent, &bb); var_t *val = opstack_pop(); /* Initialize field if within bounds */ if (field_idx < struct_type->num_fields) { var_t *field = &struct_type->fields[field_idx]; /* Create target variable for field */ var_t target = {0}; target.type = field->type; target.ptr_level = field->ptr_level; var_t *field_val = resize_var(parent, &bb, val, &target); /* Compute field address: &struct + field_offset */ var_t *struct_addr = require_var(parent); gen_name_to(struct_addr->var_name); add_insn(parent, bb, OP_address_of, struct_addr, var, NULL, 0, NULL); var_t *field_addr = struct_addr; if (field->offset > 0) { var_t *offset = require_var(parent); gen_name_to(offset->var_name); offset->init_val = field->offset; add_insn(parent, bb, OP_load_constant, offset, NULL, NULL, 0, NULL); var_t *addr = require_var(parent); gen_name_to(addr->var_name); add_insn(parent, bb, OP_add, addr, struct_addr, offset, 0, NULL); field_addr = addr; } /* Write field value */ int field_size = size_var(field); add_insn(parent, bb, OP_write, NULL, field_addr, field_val, field_size, NULL); } field_idx++; if (!lex_accept(T_comma)) break; if (lex_peek(T_close_curly, NULL)) break; } } lex_expect(T_close_curly); } else { read_expr(parent, &bb); read_ternary_operation(parent, &bb); var_t *expr_result = opstack_pop(); /* Handle array compound literal to scalar assignment */ if (expr_result && expr_result->array_size > 0 && !var->ptr_level && var->array_size == 0 && var->type && (var->type->base_type == TYPE_int || var->type->base_type == TYPE_short) && expr_result->var_name[0] == '.') { /* Extract first element from compound literal array */ var_t *first_elem = require_var(parent); first_elem->type = var->type; gen_name_to(first_elem->var_name); /* Read first element from array at offset 0 * expr_result is the array itself, so we can read * directly from it */ add_insn(parent, bb, OP_read, first_elem, expr_result, NULL, var->type->size, NULL); expr_result = first_elem; } rs1 = resize_var(parent, &bb, expr_result, var); add_insn(parent, bb, OP_assign, var, rs1, NULL, 0, NULL); } } while (lex_accept(T_comma)) { var_t *nv; /* add sequence point at T_comma */ perform_side_effect(parent, bb); /* multiple (partial) declarations */ nv = require_typed_var(parent, type); read_partial_var_decl(nv, var); /* partial */ add_insn(parent, bb, OP_allocat, nv, NULL, NULL, 0, NULL); add_symbol(bb, nv); if (lex_accept(T_assign)) { if (lex_peek(T_open_curly, NULL) && (nv->array_size > 0 || nv->ptr_level > 0)) { /* Emit code for locals */ parse_array_init(nv, parent, &bb, 1); } else if (lex_peek(T_open_curly, NULL) && (nv->type->base_type == TYPE_struct || nv->type->base_type == TYPE_typedef)) { /* C90-compliant struct compound literal support */ type_t *struct_type = nv->type; /* Handle typedef by getting actual struct type */ if (struct_type->base_type == TYPE_typedef && struct_type->base_struct) struct_type = struct_type->base_struct; lex_expect(T_open_curly); int field_idx = 0; if (!lex_peek(T_close_curly, NULL)) { for (;;) { /* Parse field value expression */ read_expr(parent, &bb); read_ternary_operation(parent, &bb); var_t *val = opstack_pop(); /* Initialize field if within bounds */ if (field_idx < struct_type->num_fields) { var_t *field = &struct_type->fields[field_idx]; /* Create target variable for field */ var_t target = {0}; target.type = field->type; target.ptr_level = field->ptr_level; var_t *field_val = resize_var(parent, &bb, val, &target); /* Compute field address: &struct + field_offset */ var_t *struct_addr = require_var(parent); gen_name_to(struct_addr->var_name); add_insn(parent, bb, OP_address_of, struct_addr, nv, NULL, 0, NULL); var_t *field_addr = struct_addr; if (field->offset > 0) { var_t *offset = require_var(parent); gen_name_to(offset->var_name); offset->init_val = field->offset; add_insn(parent, bb, OP_load_constant, offset, NULL, NULL, 0, NULL); var_t *addr = require_var(parent); gen_name_to(addr->var_name); add_insn(parent, bb, OP_add, addr, struct_addr, offset, 0, NULL); field_addr = addr; } /* Write field value */ int field_size = size_var(field); add_insn(parent, bb, OP_write, NULL, field_addr, field_val, field_size, NULL); } field_idx++; if (!lex_accept(T_comma)) break; if (lex_peek(T_close_curly, NULL)) break; } } lex_expect(T_close_curly); } else { read_expr(parent, &bb); rs1 = resize_var(parent, &bb, opstack_pop(), nv); add_insn(parent, bb, OP_assign, nv, rs1, NULL, 0, NULL); } } } lex_expect(T_semicolon); return bb; } /* is a function call? Skip function call check when has_asterisk is true */ if (!has_asterisk) { func = find_func(token); if (func) { lex_expect(T_identifier); read_func_call(func, parent, &bb); perform_side_effect(parent, bb); lex_expect(T_semicolon); return bb; } } /* handle pointer dereference expressions like *ptr = value */ if (lex_peek(T_asterisk, NULL)) { read_expr(parent, &bb); read_ternary_operation(parent, &bb); /* Check if it's an assignment */ if (lex_accept(T_assign)) { var_t *lvalue = opstack_pop(); read_expr(parent, &bb); read_ternary_operation(parent, &bb); var_t *rvalue = opstack_pop(); /* Generate OP_write for pointer dereference assignment */ add_insn(parent, bb, OP_write, NULL, lvalue, rvalue, get_size(rvalue), NULL); } else { /* Expression statement without assignment */ perform_side_effect(parent, bb); } lex_expect(T_semicolon); return bb; } /* is an assignment? */ if (read_body_assignment(token, parent, prefix_op, &bb)) { perform_side_effect(parent, bb); lex_expect(T_semicolon); return bb; } if (lex_peek(T_identifier, token)) { lex_accept(T_identifier); token_t *id_tk = cur_token; if (lex_accept(T_colon)) { label_t *l = find_label(token); if (l) error_at("label redefinition", &id_tk->location); basic_block_t *n = bb_create(parent); bb_connect(bb, n, NEXT); add_label(token, n); add_insn(parent, n, OP_label, NULL, NULL, NULL, 0, token); return n; } } error_at("Unrecognized statement token", next_token_loc()); return NULL; } basic_block_t *read_code_block(func_t *func, block_t *parent, basic_block_t *bb) { block_t *blk = add_block(parent, func); bb->scope = blk; lex_expect(T_open_curly); while (!lex_accept(T_close_curly)) { bb = read_body_statement(blk, bb); perform_side_effect(blk, bb); } return bb; } void var_add_killed_bb(var_t *var, basic_block_t *bb); void read_func_body(func_t *func) { block_t *blk = add_block(NULL, func); func->bbs = bb_create(blk); func->exit = bb_create(blk); for (int i = 0; i < func->num_params; i++) { /* arguments */ add_symbol(func->bbs, &func->param_defs[i]); func->param_defs[i].base = &func->param_defs[i]; var_add_killed_bb(&func->param_defs[i], func->bbs); } basic_block_t *body = read_code_block(func, NULL, func->bbs); if (body) bb_connect(body, func->exit, NEXT); for (int i = 0; i < backpatch_bb_idx; i++) { basic_block_t *bb = backpatch_bb[i]; insn_t *g = bb->insn_list.tail; label_t *label = find_label(g->str); if (!label) error_at("goto label undefined", cur_token_loc()); label->used = true; bb_connect(bb, label->bb, NEXT); } for (int i = 0; i < label_idx; i++) { label_t *label = &labels[i]; if (label->used) continue; printf("Warning: unused label %s\n", label->label_name); } backpatch_bb_idx = 0; label_idx = 0; } void print_ptr_level(int level) { while (level > 0) { printf("*"); level--; } } void print_func_decl(func_t *func, const char *prefix, bool newline) { if (prefix) printf("%s", prefix); if (func->return_def.is_const_qualified) printf("const "); printf("%s ", func->return_def.type->type_name); print_ptr_level(func->return_def.ptr_level - func->return_def.type->ptr_level); printf("%s(", func->return_def.var_name); for (int i = 0; i < func->num_params; i++) { var_t *var = &func->param_defs[i]; if (var->is_const_qualified) printf("const "); printf("%s ", var->type->type_name); print_ptr_level(var->ptr_level - var->type->ptr_level); printf("%s", var->var_name); if (i != func->num_params - 1) printf(", "); } if (func->va_args) printf(", ..."); printf(")"); if (newline) printf("\n"); } /* if first token is type */ void read_global_decl(block_t *block, bool is_const) { var_t *var = require_var(block); var->is_global = true; var->is_const_qualified = is_const; /* new function, or variables under parent */ read_full_var_decl(var, false, false); if (lex_peek(T_open_bracket, NULL)) { /* function */ func_t *func = find_func(var->var_name); func_t func_tmp; bool check_decl = false; if (func) { memcpy(&func_tmp, func, sizeof(func_t)); check_decl = true; } else func = add_func(var->var_name, false); memcpy(&func->return_def, var, sizeof(var_t)); block->locals.size--; read_parameter_list_decl(func, 0); if (check_decl) { /* Validate whether the previous declaration and the current * one differ. */ if ((func->return_def.type != func_tmp.return_def.type) || (func->return_def.ptr_level != func_tmp.return_def.ptr_level) || (func->return_def.is_const_qualified != func_tmp.return_def.is_const_qualified)) { printf("Error: conflicting types for the function %s.\n", func->return_def.var_name); print_func_decl(&func_tmp, "before: ", true); print_func_decl(func, "after: ", true); abort(); } if (func->num_params != func_tmp.num_params) { printf( "Error: confilcting number of arguments for the function " "%s.\n", func->return_def.var_name); print_func_decl(&func_tmp, "before: ", true); print_func_decl(func, "after: ", true); abort(); } for (int i = 0; i < func->num_params; i++) { var_t *func_var = &func->param_defs[i]; var_t *func_tmp_var = &func_tmp.param_defs[i]; if ((func_var->type != func_tmp_var->type) || (func_var->ptr_level != func_tmp_var->ptr_level) || (func_var->is_const_qualified != func_tmp_var->is_const_qualified)) { printf("Error: confilcting types for the function %s.\n", func->return_def.var_name); print_func_decl(&func_tmp, "before: ", true); print_func_decl(func, "after: ", true); abort(); } } if (func->va_args != func_tmp.va_args) { printf("Error: conflicting types for the function %s.\n", func->return_def.var_name); print_func_decl(&func_tmp, "before: ", true); print_func_decl(func, "after: ", true); abort(); } } if (lex_peek(T_open_curly, NULL)) { read_func_body(func); return; } if (lex_accept(T_semicolon)) /* forward definition */ return; error_at("Syntax error in global declaration", next_token_loc()); } else add_insn(block, GLOBAL_FUNC->bbs, OP_allocat, var, NULL, NULL, 0, NULL); /* is a variable */ if (lex_accept(T_assign)) { /* If '{' follows and this is an array (explicit or implicit-size via * pointer syntax), reuse the array initializer to emit per-element * stores for globals as well. */ if (lex_peek(T_open_curly, NULL) && (var->array_size > 0 || var->ptr_level > 0)) { parse_array_init(var, block, &GLOBAL_FUNC->bbs, true); lex_expect(T_semicolon); return; } /* Otherwise fall back to scalar/constant global assignment */ read_global_assignment(var->var_name); lex_expect(T_semicolon); return; } else if (lex_accept(T_comma)) { /* TODO: Implement global variable continuation syntax for multiple * declarations in single statement (e.g., int a = 1, b = 2;) */ error_at("Global continuation not supported", cur_token_loc()); } else if (lex_accept(T_semicolon)) { opstack_pop(); return; } error_at("Syntax error in global declaration", next_token_loc()); } void consume_global_compound_literal(void) { lex_expect(T_open_curly); if (!lex_peek(T_close_curly, NULL)) { for (;;) { /* Just consume constant values for now */ if (lex_peek(T_numeric, NULL)) { lex_accept(T_numeric); } else if (lex_peek(T_minus, NULL)) { lex_accept(T_minus); lex_accept(T_numeric); } else if (lex_peek(T_string, NULL)) { lex_accept(T_string); } else if (lex_peek(T_char, NULL)) { lex_accept(T_char); } else { error_at( "Global struct initialization requires constant values", next_token_loc()); } if (!lex_accept(T_comma)) break; if (lex_peek(T_close_curly, NULL)) break; } } lex_expect(T_close_curly); } void initialize_struct_field(var_t *nv, var_t *v, int offset) { nv->type = v->type; nv->var_name[0] = '\0'; nv->ptr_level = 0; nv->is_func = false; nv->is_global = false; nv->is_const_qualified = false; nv->array_size = 0; nv->offset = offset; nv->init_val = 0; nv->liveness = 0; nv->in_loop = 0; nv->base = NULL; nv->subscript = 0; nv->subscripts_idx = 0; nv->is_compound_literal = false; } void read_global_statement(void) { char token[MAX_ID_LEN]; block_t *block = GLOBAL_BLOCK; /* global block */ bool is_const = false; /* Handle const qualifier */ if (lex_accept(T_const)) is_const = true; if (lex_accept(T_struct)) { int i = 0, size = 0; lex_ident(T_identifier, token); token_t *id_tk = cur_token; /* variable declaration using existing struct tag? */ if (!lex_peek(T_open_curly, NULL)) { type_t *decl_type = find_type(token, 2); if (!decl_type) error_at("Unknown struct type", &id_tk->location); /* one or more declarators */ var_t *var = require_typed_var(block, decl_type); var->is_global = true; /* Global struct variable */ var->is_const_qualified = is_const; read_partial_var_decl(var, NULL); add_insn(block, GLOBAL_FUNC->bbs, OP_allocat, var, NULL, NULL, 0, NULL); if (lex_accept(T_assign)) { if (lex_peek(T_open_curly, NULL) && (var->array_size > 0 || var->ptr_level > 0)) { parse_array_init(var, block, &GLOBAL_FUNC->bbs, true); } else if (lex_peek(T_open_curly, NULL) && var->array_size == 0 && var->ptr_level == 0 && (decl_type->base_type == TYPE_struct || decl_type->base_type == TYPE_typedef)) { /* Global struct compound literal support * Currently we just consume the syntax - actual * initialization would require runtime code which globals * don't support */ consume_global_compound_literal(); } else { read_global_assignment(var->var_name); } } while (lex_accept(T_comma)) { var_t *nv = require_typed_var(block, decl_type); read_inner_var_decl(nv, false, false); add_insn(block, GLOBAL_FUNC->bbs, OP_allocat, nv, NULL, NULL, 0, NULL); if (lex_accept(T_assign)) { if (lex_peek(T_open_curly, NULL) && (nv->array_size > 0 || nv->ptr_level > 0)) { parse_array_init(nv, block, &GLOBAL_FUNC->bbs, true); } else if (lex_peek(T_open_curly, NULL) && nv->array_size == 0 && nv->ptr_level == 0 && (decl_type->base_type == TYPE_struct || decl_type->base_type == TYPE_typedef)) { /* Global struct compound literal support for * continuation Currently we just consume the syntax */ consume_global_compound_literal(); } else { read_global_assignment(nv->var_name); } } } lex_expect(T_semicolon); return; } /* struct definition */ /* has forward declaration? */ type_t *type = find_type(token, 2); if (!type) type = add_type(); strcpy(type->type_name, intern_string(token)); type->base_type = TYPE_struct; lex_expect(T_open_curly); do { var_t *v = &type->fields[i++]; read_full_var_decl(v, false, true); v->offset = size; size += size_var(v); /* Handle multiple variable declarations with same base type */ while (lex_accept(T_comma)) { if (i >= MAX_FIELDS) error_at("Too many struct fields", cur_token_loc()); var_t *nv = &type->fields[i++]; initialize_struct_field(nv, v, 0); read_inner_var_decl(nv, false, true); nv->offset = size; size += size_var(nv); } lex_expect(T_semicolon); } while (!lex_accept(T_close_curly)); type->size = size; type->num_fields = i; lex_expect(T_semicolon); } else if (lex_accept(T_union)) { int i = 0, max_size = 0; lex_ident(T_identifier, token); /* has forward declaration? */ type_t *type = find_type(token, 2); if (!type) type = add_type(); strcpy(type->type_name, intern_string(token)); type->base_type = TYPE_union; lex_expect(T_open_curly); do { var_t *v = &type->fields[i++]; read_full_var_decl(v, false, true); v->offset = 0; /* All union fields start at offset 0 */ int field_size = size_var(v); if (field_size > max_size) max_size = field_size; /* Handle multiple variable declarations with same base type */ while (lex_accept(T_comma)) { if (i >= MAX_FIELDS) error_at("Too many union fields", cur_token_loc()); var_t *nv = &type->fields[i++]; /* All union fields start at offset 0 */ initialize_struct_field(nv, v, 0); read_inner_var_decl(nv, false, true); field_size = size_var(nv); if (field_size > max_size) max_size = field_size; } lex_expect(T_semicolon); } while (!lex_accept(T_close_curly)); type->size = max_size; type->num_fields = i; lex_expect(T_semicolon); } else if (lex_accept(T_typedef)) { if (lex_accept(T_enum)) { int val = 0; type_t *type = add_type(); type->base_type = TYPE_int; type->size = 4; lex_expect(T_open_curly); do { lex_ident(T_identifier, token); if (lex_accept(T_assign)) { char value[MAX_ID_LEN]; lex_ident(T_numeric, value); val = parse_numeric_constant(value); } add_constant(token, val++); } while (lex_accept(T_comma)); lex_expect(T_close_curly); lex_ident(T_identifier, token); strcpy(type->type_name, intern_string(token)); lex_expect(T_semicolon); } else if (lex_accept(T_struct)) { int i = 0, size = 0; bool has_struct_def = false; type_t *tag = NULL, *type = add_type(); /* is struct definition? */ if (lex_peek(T_identifier, token)) { lex_expect(T_identifier); /* is existent? */ tag = find_type(token, 2); if (!tag) { tag = add_type(); tag->base_type = TYPE_struct; strcpy(tag->type_name, intern_string(token)); } } /* typedef with struct definition */ if (lex_accept(T_open_curly)) { has_struct_def = true; do { var_t *v = &type->fields[i++]; read_full_var_decl(v, false, true); v->offset = size; size += size_var(v); /* Handle multiple variable declarations with same base type */ while (lex_accept(T_comma)) { if (i >= MAX_FIELDS) error_at("Too many struct fields", cur_token_loc()); var_t *nv = &type->fields[i++]; initialize_struct_field(nv, v, 0); read_inner_var_decl(nv, false, true); nv->offset = size; size += size_var(nv); } lex_expect(T_semicolon); } while (!lex_accept(T_close_curly)); } lex_ident(T_identifier, type->type_name); type->size = size; type->num_fields = i; type->base_type = TYPE_typedef; if (tag && has_struct_def == 1) { strcpy(token, tag->type_name); memcpy(tag, type, sizeof(type_t)); tag->base_type = TYPE_struct; strcpy(tag->type_name, intern_string(token)); } else { /* If it is a forward declaration, build a connection between * structure tag and alias. In 'find_type', it will retrieve * infomation from base structure for alias. */ type->base_struct = tag; } lex_expect(T_semicolon); } else if (lex_accept(T_union)) { int i = 0, max_size = 0; bool has_union_def = false; type_t *tag = NULL, *type = add_type(); /* is union definition? */ if (lex_peek(T_identifier, token)) { lex_expect(T_identifier); /* is existent? */ tag = find_type(token, 2); if (!tag) { tag = add_type(); tag->base_type = TYPE_union; strcpy(tag->type_name, intern_string(token)); } } /* typedef with union definition */ if (lex_accept(T_open_curly)) { has_union_def = true; do { var_t *v = &type->fields[i++]; read_full_var_decl(v, false, true); v->offset = 0; /* All union fields start at offset 0 */ int field_size = size_var(v); if (field_size > max_size) max_size = field_size; /* Handle multiple variable declarations with same base type */ while (lex_accept(T_comma)) { if (i >= MAX_FIELDS) error_at("Too many union fields", cur_token_loc()); var_t *nv = &type->fields[i++]; /* All union fields start at offset 0 */ initialize_struct_field(nv, v, 0); read_inner_var_decl(nv, false, true); field_size = size_var(nv); if (field_size > max_size) max_size = field_size; } lex_expect(T_semicolon); } while (!lex_accept(T_close_curly)); } lex_ident(T_identifier, type->type_name); type->size = max_size; type->num_fields = i; type->base_type = TYPE_typedef; if (tag && has_union_def == 1) { strcpy(token, tag->type_name); memcpy(tag, type, sizeof(type_t)); tag->base_type = TYPE_union; strcpy(tag->type_name, intern_string(token)); } else { /* If it is a forward declaration, build a connection between * union tag and alias. In 'find_type', it will retrieve * information from base union for alias. */ type->base_struct = tag; } lex_expect(T_semicolon); } else { char base_type[MAX_TYPE_LEN]; type_t *base; type_t *type = add_type(); lex_ident(T_identifier, base_type); base = find_type(base_type, true); if (!base) error_at("Unable to find base type", cur_token_loc()); type->base_type = base->base_type; type->size = base->size; type->num_fields = 0; type->ptr_level = 0; /* Handle pointer types in typedef: typedef char *string; */ while (lex_accept(T_asterisk)) { type->ptr_level++; type->size = PTR_SIZE; } lex_ident(T_identifier, type->type_name); lex_expect(T_semicolon); } } else if (lex_peek(T_identifier, NULL)) { read_global_decl(block, is_const); } else error_at("Syntax error in global statement", next_token_loc()); } void parse_internal(void) { /* set starting point of global stack manually */ GLOBAL_FUNC = add_func("", true); GLOBAL_FUNC->stack_size = 4; GLOBAL_FUNC->bbs = arena_calloc(BB_ARENA, 1, sizeof(basic_block_t)); GLOBAL_FUNC->bbs->belong_to = GLOBAL_FUNC; /* Prevent nullptr deref in RA */ /* built-in types */ TY_void = add_named_type("void"); TY_void->base_type = TYPE_void; TY_void->size = 0; TY_char = add_named_type("char"); TY_char->base_type = TYPE_char; TY_char->size = 1; TY_int = add_named_type("int"); TY_int->base_type = TYPE_int; TY_int->size = 4; TY_short = add_named_type("short"); TY_short->base_type = TYPE_short; TY_short->size = 2; /* builtin type _Bool was introduced in C99 specification, it is more * well-known as macro type bool, which is defined in (in * shecc, it is defined in 'lib/c.c'). */ TY_bool = add_named_type("_Bool"); TY_bool->base_type = TYPE_char; TY_bool->size = 1; GLOBAL_BLOCK = add_block(NULL, NULL); /* global block */ elf_add_symbol("", 0); /* undef symbol */ if (dynlink) { /* In dynamic mode, __syscall won't be implemented. * * Simply declare a 'syscall' function as follows if the program * needs to use 'syscall': * * int syscall(int number, ...); * * shecc will treat it as an external function, and the compiled * program will eventually use the implementation provided by * the external C library. * * If shecc supports the 'long' data type in the future, it would be * better to declare syscall using its original prototype: * * long syscall(long number, ...); * */ } else { /* Linux syscall */ func_t *func = add_func("__syscall", true); func->return_def.type = TY_int; func->num_params = 0; func->va_args = 1; func->bbs = NULL; /* Otherwise, allocate a basic block to implement in static mode. */ func->bbs = arena_calloc(BB_ARENA, 1, sizeof(basic_block_t)); } /* Add a global object to the .data section. * * This object is used to save the global stack pointer. */ elf_write_int(elf_data, 0); /* lexer initialization */ do { read_global_statement(); } while (!lex_accept(T_eof)); } void parse(token_t *tk) { token_t head; head.kind = T_start; head.next = tk; cur_token = &head; parse_internal(); } ================================================ FILE: src/peephole.c ================================================ /* * shecc - Self-Hosting and Educational C Compiler. * * shecc is freely redistributable under the BSD 2 clause license. See the * file "LICENSE" for information on usage and redistribution of this file. */ #include #include "defs.h" #include "globals.c" /* Determines if an instruction can be fused with a following OP_assign. * Fusible instructions are those whose results can be directly written * to the final destination register, eliminating intermediate moves. */ bool is_fusible_insn(ph2_ir_t *ph2_ir) { switch (ph2_ir->op) { case OP_add: /* Arithmetic operations */ case OP_sub: case OP_mul: case OP_div: case OP_mod: case OP_lshift: /* Shift operations */ case OP_rshift: case OP_bit_and: /* Bitwise operations */ case OP_bit_or: case OP_bit_xor: case OP_log_and: /* Logical operations */ case OP_log_or: case OP_log_not: case OP_negate: /* Unary operations */ case OP_load: /* Memory operations */ case OP_global_load: case OP_load_data_address: case OP_load_rodata_address: return true; default: return false; } } /* Main peephole optimization function that applies pattern matching * and transformation rules to consecutive IR instructions. * Returns true if any optimization was applied, false otherwise. */ bool insn_fusion(ph2_ir_t *ph2_ir) { ph2_ir_t *next = ph2_ir->next; if (!next) return false; /* ALU instruction fusion. * Eliminates redundant move operations following arithmetic/logical * operations. This is the most fundamental optimization that removes * temporary register usage. */ if (next->op == OP_assign) { if (is_fusible_insn(ph2_ir) && ph2_ir->dest == next->src0) { /* Pattern: {ALU rn, rs1, rs2; mv rd, rn} → {ALU rd, rs1, rs2} * Example: {add t1, a, b; mv result, t1} → {add result, a, b} */ ph2_ir->dest = next->dest; ph2_ir->next = next->next; return true; } } /* Arithmetic identity with zero constant */ if (ph2_ir->op == OP_load_constant && ph2_ir->src0 == 0) { if (next->op == OP_add && (ph2_ir->dest == next->src0 || ph2_ir->dest == next->src1)) { /* Pattern: {li 0; add x, 0} → {mov x} (additive identity: x+0 = x) * Handles both operand positions due to addition commutativity * Example: {li t1, 0; add result, var, t1} → {mov result, var} */ int non_zero_src = (ph2_ir->dest == next->src0) ? next->src1 : next->src0; ph2_ir->op = OP_assign; ph2_ir->src0 = non_zero_src; ph2_ir->dest = next->dest; ph2_ir->next = next->next; return true; } if (next->op == OP_sub) { if (ph2_ir->dest == next->src1) { /* Pattern: {li 0; sub x, 0} → {mov x} (x - 0 = x) * Example: {li t1, 0; sub result, var, t1} → {mov result, var} */ ph2_ir->op = OP_assign; ph2_ir->src0 = next->src0; ph2_ir->dest = next->dest; ph2_ir->next = next->next; return true; } if (ph2_ir->dest == next->src0) { /* Pattern: {li 0; sub 0, x} → {neg x} (0 - x = -x) * Example: {li t1, 0; sub result, t1, var} → {neg result, var} */ ph2_ir->op = OP_negate; ph2_ir->src0 = next->src1; ph2_ir->dest = next->dest; ph2_ir->next = next->next; return true; } } if (next->op == OP_mul && (ph2_ir->dest == next->src0 || ph2_ir->dest == next->src1)) { /* Pattern: {li 0; mul x, 0} → {li 0} (absorbing element: x * 0 = 0) * Example: {li t1, 0; mul result, var, t1} → {li result, 0} * Eliminates multiplication entirely */ ph2_ir->op = OP_load_constant; ph2_ir->src0 = 0; ph2_ir->dest = next->dest; ph2_ir->next = next->next; return true; } } /* Multiplicative identity with one constant */ if (ph2_ir->op == OP_load_constant && ph2_ir->src0 == 1) { if (next->op == OP_mul && (ph2_ir->dest == next->src0 || ph2_ir->dest == next->src1)) { /* Pattern: {li 1; mul x, 1} → {mov x} (multiplicative identity: * x * 1 = x) * Example: {li t1, 1; mul result, var, t1} → {mov result, var} * Handles both operand positions due to multiplication * commutativity */ ph2_ir->op = OP_assign; ph2_ir->src0 = ph2_ir->dest == next->src0 ? next->src1 : next->src0; ph2_ir->dest = next->dest; ph2_ir->next = next->next; return true; } } /* Bitwise identity operations */ if (ph2_ir->op == OP_load_constant && ph2_ir->src0 == -1 && next->op == OP_bit_and && ph2_ir->dest == next->src1) { /* Pattern: {li -1; and x, -1} → {mov x} (x & 0xFFFFFFFF = x) * Example: {li t1, -1; and result, var, t1} → {mov result, var} * Eliminates bitwise AND with all-ones mask */ ph2_ir->op = OP_assign; ph2_ir->src0 = next->src0; ph2_ir->dest = next->dest; ph2_ir->next = next->next; return true; } if (ph2_ir->op == OP_load_constant && ph2_ir->src0 == 0 && (next->op == OP_lshift || next->op == OP_rshift) && ph2_ir->dest == next->src1) { /* Pattern: {li 0; shl/shr x, 0} → {mov x} (x << 0 = x >> 0 = x) * Example: {li t1, 0; shl result, var, t1} → {mov result, var} * Eliminates no-op shift operations */ ph2_ir->op = OP_assign; ph2_ir->src0 = next->src0; ph2_ir->dest = next->dest; ph2_ir->next = next->next; return true; } if (ph2_ir->op == OP_load_constant && ph2_ir->src0 == 0 && next->op == OP_bit_or && ph2_ir->dest == next->src1) { /* Pattern: {li 0; or x, 0} → {mov x} (x | 0 = x) * Example: {li t1, 0; or result, var, t1} → {mov result, var} * Eliminates bitwise OR with zero (identity element) */ ph2_ir->op = OP_assign; ph2_ir->src0 = next->src0; ph2_ir->dest = next->dest; ph2_ir->next = next->next; return true; } /* Power-of-2 multiplication to shift conversion. * Shift operations are significantly faster than multiplication */ if (ph2_ir->op == OP_load_constant && ph2_ir->src0 > 0 && next->op == OP_mul && ph2_ir->dest == next->src1) { int power = ph2_ir->src0; /* Detect power-of-2 using bit manipulation: (n & (n-1)) == 0 for powers * of 2 */ if (power && (power & (power - 1)) == 0) { /* Calculate log2(power) to determine shift amount */ int shift_amount = 0; int tmp = power; while (tmp > 1) { tmp >>= 1; shift_amount++; } /* Pattern: {li 2^n; mul x, 2^n} → {li n; shl x, n} * Example: {li t1, 4; mul result, var, t1} → * {li t1, 2; shl result, var, t1} */ ph2_ir->op = OP_load_constant; ph2_ir->src0 = shift_amount; next->op = OP_lshift; return true; } } /* XOR identity operation */ if (ph2_ir->op == OP_load_constant && ph2_ir->src0 == 0 && next->op == OP_bit_xor && ph2_ir->dest == next->src1) { /* Pattern: {li 0; xor x, 0} → {mov x} (x ^ 0 = x) * Example: {li t1, 0; xor result, var, t1} → {mov result, var} * Completes bitwise identity optimization coverage */ ph2_ir->op = OP_assign; ph2_ir->src0 = next->src0; ph2_ir->dest = next->dest; ph2_ir->next = next->next; return true; } /* Extended multiplicative identity (operand position variant) * Handles the case where constant 1 is in src0 position of multiplication */ if (ph2_ir->op == OP_load_constant && ph2_ir->src0 == 1 && next->op == OP_mul && ph2_ir->dest == next->src0) { /* Pattern: {li 1; mul 1, x} → {mov x} (1 * x = x) * Example: {li t1, 1; mul result, t1, var} → {mov result, var} * Covers multiplication commutativity edge case */ ph2_ir->op = OP_assign; ph2_ir->src0 = next->src1; ph2_ir->dest = next->dest; ph2_ir->next = next->next; return true; } return false; } /* Redundant move elimination * Eliminates unnecessary move operations that are overwritten or redundant */ bool redundant_move_elim(ph2_ir_t *ph2_ir) { ph2_ir_t *next = ph2_ir->next; if (!next) return false; /* Pattern 1: Consecutive assignments to same destination * {mov rd, rs1; mov rd, rs2} → {mov rd, rs2} * The first move is completely overwritten by the second */ if (ph2_ir->op == OP_assign && next->op == OP_assign && ph2_ir->dest == next->dest) { /* Replace first move with second, skip second */ ph2_ir->src0 = next->src0; ph2_ir->next = next->next; return true; } /* Pattern 2: Redundant load immediately overwritten * {load rd, offset; mov rd, rs} → {mov rd, rs} * Loading a value that's immediately replaced is wasteful */ if ((ph2_ir->op == OP_load || ph2_ir->op == OP_global_load) && next->op == OP_assign && ph2_ir->dest == next->dest) { /* Replace load with move */ ph2_ir->op = OP_assign; ph2_ir->src0 = next->src0; ph2_ir->src1 = 0; /* Clear unused field */ ph2_ir->next = next->next; return true; } /* Pattern 3: Load constant immediately overwritten * {li rd, imm; mov rd, rs} → {mov rd, rs} * Loading a constant that's immediately replaced */ if (ph2_ir->op == OP_load_constant && next->op == OP_assign && ph2_ir->dest == next->dest) { /* Replace constant load with move */ ph2_ir->op = OP_assign; ph2_ir->src0 = next->src0; ph2_ir->next = next->next; return true; } /* Pattern 4: Consecutive loads to same register * {load rd, offset1; load rd, offset2} → {load rd, offset2} * First load is pointless if immediately overwritten */ if ((ph2_ir->op == OP_load || ph2_ir->op == OP_global_load) && (next->op == OP_load || next->op == OP_global_load) && ph2_ir->dest == next->dest) { /* Keep only the second load */ ph2_ir->op = next->op; ph2_ir->src0 = next->src0; ph2_ir->src1 = next->src1; ph2_ir->next = next->next; return true; } /* Pattern 5: Consecutive constant loads (already handled in main loop * but included here for completeness) * {li rd, imm1; li rd, imm2} → {li rd, imm2} */ if (ph2_ir->op == OP_load_constant && next->op == OP_load_constant && ph2_ir->dest == next->dest) { /* Keep only the second constant */ ph2_ir->src0 = next->src0; ph2_ir->next = next->next; return true; } /* Pattern 6: Move followed by load * {mov rd, rs; load rd, offset} → {load rd, offset} * The move is pointless if immediately overwritten by load */ if (ph2_ir->op == OP_assign && (next->op == OP_load || next->op == OP_global_load) && ph2_ir->dest == next->dest) { /* Replace move+load with just the load */ ph2_ir->op = next->op; ph2_ir->src0 = next->src0; ph2_ir->src1 = next->src1; ph2_ir->next = next->next; return true; } /* Pattern 7: Move followed by constant load * {mov rd, rs; li rd, imm} → {li rd, imm} * The move is pointless if immediately overwritten by constant */ if (ph2_ir->op == OP_assign && next->op == OP_load_constant && ph2_ir->dest == next->dest) { /* Replace move+li with just the li */ ph2_ir->op = OP_load_constant; ph2_ir->src0 = next->src0; ph2_ir->src1 = 0; /* Clear unused field */ ph2_ir->next = next->next; return true; } return false; } /* Load/store elimination for consecutive memory operations. * Removes redundant loads and dead stores that access the same memory location. * Conservative implementation to maintain bootstrap stability. */ bool eliminate_load_store_pairs(ph2_ir_t *ph2_ir) { ph2_ir_t *next = ph2_ir->next; if (!next) return false; /* Only handle local loads/stores for now (not globals) to be safe */ /* Pattern 1: Consecutive stores to same local location * {store [addr], val1; store [addr], val2} → {store [addr], val2} * First store is dead if immediately overwritten */ if (ph2_ir->op == OP_store && next->op == OP_store) { /* Check if storing to same memory location */ if (ph2_ir->src0 == next->src0 && ph2_ir->src1 == next->src1 && ph2_ir->src0 >= 0 && ph2_ir->src1 >= 0) { /* Remove first store - it's dead */ ph2_ir->dest = next->dest; ph2_ir->next = next->next; return true; } } /* Pattern 2: Redundant consecutive loads from same local location * {load rd1, [addr]; load rd2, [addr]} → {load rd1, [addr]; mov rd2, rd1} * Second load can reuse the first load's result * Only apply if addresses are simple (not complex expressions) */ if (ph2_ir->op == OP_load && next->op == OP_load) { /* Check if loading from same memory location */ if (ph2_ir->src0 == next->src0 && ph2_ir->src1 == next->src1 && ph2_ir->src0 >= 0 && ph2_ir->src1 >= 0) { /* Replace second load with move */ next->op = OP_assign; next->src0 = ph2_ir->dest; /* Result of first load */ next->src1 = 0; return true; } } /* Pattern 3: Store followed by load from same location (store-to-load * forwarding) {store [addr], val; load rd, [addr]} → {store [addr], val; * mov rd, val} The load can use the stored value directly */ if (ph2_ir->op == OP_store && next->op == OP_load) { /* Check if accessing same memory location */ if (ph2_ir->src0 == next->src0 && ph2_ir->src1 == next->src1 && ph2_ir->src0 >= 0 && ph2_ir->dest >= 0) { /* Replace load with move of stored value */ next->op = OP_assign; next->src0 = ph2_ir->dest; /* Value that was stored */ next->src1 = 0; return true; } } /* Pattern 4: Load followed by redundant store of same value * {load rd, [addr]; store [addr], rd} → {load rd, [addr]} * The store is redundant if storing back the just-loaded value */ if (ph2_ir->op == OP_load && next->op == OP_store) { /* Check if storing the value we just loaded from same location */ if (ph2_ir->dest == next->dest && ph2_ir->src0 == next->src0 && ph2_ir->src1 == next->src1 && ph2_ir->src0 >= 0) { /* Remove redundant store */ ph2_ir->next = next->next; return true; } } /* Pattern 5: Global store/load optimizations (carefully enabled) */ if (ph2_ir->op == OP_global_store && next->op == OP_global_store) { /* Consecutive global stores to same location */ if (ph2_ir->src0 == next->src0 && ph2_ir->src1 == next->src1) { /* Remove first store - it's dead */ ph2_ir->dest = next->dest; ph2_ir->next = next->next; return true; } } if (ph2_ir->op == OP_global_load && next->op == OP_global_load) { /* Consecutive global loads from same location */ if (ph2_ir->src0 == next->src0 && ph2_ir->src1 == next->src1) { /* Replace second load with move */ next->op = OP_assign; next->src0 = ph2_ir->dest; next->src1 = 0; return true; } } return false; } /* Algebraic simplification: Apply mathematical identities to simplify * expressions * * This function handles patterns that SSA cannot see: * - Self-operations on registers (x-x, x^x, x|x, x&x) * - These patterns emerge after register allocation when different * variables are assigned to the same register * * SSA handles: Constant folding with known values (5+3 → 8) * Peephole handles: Register-based patterns (r1-r1 → 0) * * Returns true if optimization was applied */ bool algebraic_simplification(ph2_ir_t *ph2_ir) { if (!ph2_ir) return false; /* NOTE: SSA's const_folding handles constant operations with known values. * We focus on register-based patterns that appear after register * allocation. */ /* Pattern 1: Self-subtraction → 0 * x - x = 0 (for register operands) */ if (ph2_ir->op == OP_sub && ph2_ir->src0 == ph2_ir->src1) { ph2_ir->op = OP_load_constant; ph2_ir->src0 = 0; /* result is 0 */ ph2_ir->src1 = 0; /* clear unused field */ return true; } /* Pattern 2: Self-XOR → 0 * x ^ x = 0 (for register operands) */ if (ph2_ir->op == OP_bit_xor && ph2_ir->src0 == ph2_ir->src1) { ph2_ir->op = OP_load_constant; ph2_ir->src0 = 0; /* result is 0 */ ph2_ir->src1 = 0; /* clear unused field */ return true; } /* Pattern 3: Self-OR → x * x | x = x (identity operation for register operands) */ if (ph2_ir->op == OP_bit_or && ph2_ir->src0 == ph2_ir->src1) { ph2_ir->op = OP_assign; /* src0 already contains x, just need to move it */ ph2_ir->src1 = 0; /* clear unused field */ return true; } /* Pattern 4: Self-AND → x * x & x = x (identity operation for register operands) */ if (ph2_ir->op == OP_bit_and && ph2_ir->src0 == ph2_ir->src1) { ph2_ir->op = OP_assign; /* src0 already contains x, just need to move it */ ph2_ir->src1 = 0; /* clear unused field */ return true; } /* NOTE: Arithmetic identity patterns (x+0, x*1, x*0, x-0) are already * handled by SSA's const_folding() function and insn_fusion(). * We focus on register-level patterns that SSA cannot see. */ return false; } /* Division/modulo strength reduction: Optimize division and modulo by * power-of-2 * * This pattern is unique to peephole optimizer. * SSA cannot perform this optimization because it works on virtual registers * before actual constant values are loaded. * * Returns true if optimization was applied */ bool strength_reduction(ph2_ir_t *ph2_ir) { if (!ph2_ir || !ph2_ir->next) return false; ph2_ir_t *next = ph2_ir->next; /* Check for constant load followed by division or modulo */ if (ph2_ir->op != OP_load_constant) return false; int value = ph2_ir->src0; /* Check if value is a power of 2 */ if (value <= 0 || (value & (value - 1)) != 0) return false; /* Calculate shift amount for power of 2 */ int shift = 0; int tmp = value; while (tmp > 1) { shift++; tmp >>= 1; } /* Pattern 1: Division by power of 2 → right shift * x / 2^n = x >> n (for unsigned) */ if (next->op == OP_div && next->src1 == ph2_ir->dest) { /* Convert division to right shift */ ph2_ir->src0 = shift; /* Load shift amount instead */ next->op = OP_rshift; return true; } /* Pattern 2: Modulo by power of 2 → bitwise AND * x % 2^n = x & (2^n - 1) */ if (next->op == OP_mod && next->src1 == ph2_ir->dest) { /* Convert modulo to bitwise AND */ ph2_ir->src0 = value - 1; /* Load mask (2^n - 1) */ next->op = OP_bit_and; return true; } /* Pattern 3: Multiplication by power of 2 → left shift * x * 2^n = x << n */ if (next->op == OP_mul) { if (next->src0 == ph2_ir->dest) { /* 2^n * x = x << n */ ph2_ir->src0 = shift; /* Load shift amount */ next->op = OP_lshift; next->src0 = next->src1; /* Move x to src0 */ next->src1 = ph2_ir->dest; /* Shift amount in src1 */ return true; } else if (next->src1 == ph2_ir->dest) { /* x * 2^n = x << n */ ph2_ir->src0 = shift; /* Load shift amount */ next->op = OP_lshift; return true; } } return false; } /* Comparison optimization: Simplify comparison patterns * Focus on register-based patterns that SSA's SCCP misses * Returns true if optimization was applied */ bool comparison_optimization(ph2_ir_t *ph2_ir) { if (!ph2_ir) return false; /* NOTE: SSA's SCCP handles constant comparisons, so we focus on * register-based self-comparisons after register allocation */ /* Pattern 1: Self-comparison always false for != * x != x → 0 (for register operands) */ if (ph2_ir->op == OP_neq && ph2_ir->src0 == ph2_ir->src1) { ph2_ir->op = OP_load_constant; ph2_ir->src0 = 0; /* always false */ ph2_ir->src1 = 0; return true; } /* Pattern 2: Self-comparison always true for == * x == x → 1 (for register operands) */ if (ph2_ir->op == OP_eq && ph2_ir->src0 == ph2_ir->src1) { ph2_ir->op = OP_load_constant; ph2_ir->src0 = 1; /* always true */ ph2_ir->src1 = 0; return true; } /* Pattern 3: Self-comparison for less-than * x < x → 0 (always false) */ if (ph2_ir->op == OP_lt && ph2_ir->src0 == ph2_ir->src1) { ph2_ir->op = OP_load_constant; ph2_ir->src0 = 0; /* always false */ ph2_ir->src1 = 0; return true; } /* Pattern 4: Self-comparison for greater-than * x > x → 0 (always false) */ if (ph2_ir->op == OP_gt && ph2_ir->src0 == ph2_ir->src1) { ph2_ir->op = OP_load_constant; ph2_ir->src0 = 0; /* always false */ ph2_ir->src1 = 0; return true; } /* Pattern 5: Self-comparison for less-equal * x <= x → 1 (always true) */ if (ph2_ir->op == OP_leq && ph2_ir->src0 == ph2_ir->src1) { ph2_ir->op = OP_load_constant; ph2_ir->src0 = 1; /* always true */ ph2_ir->src1 = 0; return true; } /* Pattern 6: Self-comparison for greater-equal * x >= x → 1 (always true) */ if (ph2_ir->op == OP_geq && ph2_ir->src0 == ph2_ir->src1) { ph2_ir->op = OP_load_constant; ph2_ir->src0 = 1; /* always true */ ph2_ir->src1 = 0; return true; } return false; } /* Bitwise operation optimization: Simplify bitwise patterns * Returns true if optimization was applied */ bool bitwise_optimization(ph2_ir_t *ph2_ir) { if (!ph2_ir || !ph2_ir->next) return false; ph2_ir_t *next = ph2_ir->next; /* Pattern 1: Double complement → identity * ~(~x) = x */ if (ph2_ir->op == OP_negate && next->op == OP_negate && next->src0 == ph2_ir->dest) { /* Replace with simple assignment */ ph2_ir->op = OP_assign; ph2_ir->dest = next->dest; ph2_ir->next = next->next; return true; } /* Pattern 2: AND with all-ones mask → identity * x & 0xFFFFFFFF = x (for 32-bit) */ if (ph2_ir->op == OP_load_constant && ph2_ir->src0 == -1 && next->op == OP_bit_and && next->src1 == ph2_ir->dest) { /* Replace AND with assignment */ next->op = OP_assign; next->src1 = 0; ph2_ir->next = next->next; return true; } /* Pattern 3: OR with zero → identity * x | 0 = x */ if (ph2_ir->op == OP_load_constant && ph2_ir->src0 == 0 && next->op == OP_bit_or && next->src1 == ph2_ir->dest) { /* Replace OR with assignment */ next->op = OP_assign; next->src1 = 0; ph2_ir->next = next->next; return true; } /* Pattern 4: XOR with zero → identity * x ^ 0 = x */ if (ph2_ir->op == OP_load_constant && ph2_ir->src0 == 0 && next->op == OP_bit_xor && next->src1 == ph2_ir->dest) { /* Replace XOR with assignment */ next->op = OP_assign; next->src1 = 0; ph2_ir->next = next->next; return true; } /* Pattern 5: AND with zero → zero * x & 0 = 0 */ if (ph2_ir->op == OP_load_constant && ph2_ir->src0 == 0 && next->op == OP_bit_and && (next->src0 == ph2_ir->dest || next->src1 == ph2_ir->dest)) { /* Replace with constant load of 0 */ next->op = OP_load_constant; next->src0 = 0; next->src1 = 0; ph2_ir->next = next->next; return true; } /* Pattern 6: OR with all-ones → all-ones * x | 0xFFFFFFFF = 0xFFFFFFFF */ if (ph2_ir->op == OP_load_constant && ph2_ir->src0 == -1 && next->op == OP_bit_or && (next->src0 == ph2_ir->dest || next->src1 == ph2_ir->dest)) { /* Replace with constant load of -1 */ next->op = OP_load_constant; next->src0 = -1; next->src1 = 0; ph2_ir->next = next->next; return true; } /* Pattern 7: Shift by zero → identity * x << 0 = x, x >> 0 = x */ if (ph2_ir->op == OP_load_constant && ph2_ir->src0 == 0 && (next->op == OP_lshift || next->op == OP_rshift) && next->src1 == ph2_ir->dest) { /* Replace shift with assignment */ next->op = OP_assign; next->src1 = 0; ph2_ir->next = next->next; return true; } return false; } /* Triple pattern optimization: Handle 3-instruction sequences * These patterns are more complex but offer significant optimization * opportunities Returns true if optimization was applied */ bool triple_pattern_optimization(ph2_ir_t *ph2_ir) { if (!ph2_ir || !ph2_ir->next || !ph2_ir->next->next) return false; ph2_ir_t *second = ph2_ir->next; ph2_ir_t *third = second->next; /* Pattern 1: Store-load-store elimination * {store val1, addr; load r, addr; store val2, addr} * The middle load is pointless if not used elsewhere */ if (ph2_ir->op == OP_store && second->op == OP_load && third->op == OP_store && ph2_ir->src1 == second->src0 && /* same address */ ph2_ir->dest == second->src1 && /* same offset */ second->src0 == third->src1 && /* same address */ second->src1 == third->dest) { /* same offset */ /* Check if the loaded value is used by the third store */ if (third->src0 != second->dest) { /* The load result is not used, can eliminate it */ ph2_ir->next = third; return true; } } /* Pattern 2: Consecutive stores to same location * {store v1, addr; store v2, addr; store v3, addr} * Only the last store matters */ if (ph2_ir->op == OP_store && second->op == OP_store && third->op == OP_store && ph2_ir->src1 == second->src1 && ph2_ir->dest == second->dest && second->src1 == third->src1 && second->dest == third->dest) { /* All three stores go to the same location */ /* Only the last one matters, eliminate first two */ ph2_ir->src0 = third->src0; /* Use last value */ ph2_ir->next = third->next; /* Skip middle stores */ return true; } /* FIXME: Additional optimization patterns to implement: * * Pattern 3: Load-op-store with same location * {load r1, [addr]; op r2, r1, ...; store r2, [addr]} * Can optimize to in-place operation if possible * Requires architecture-specific support in codegen. * * Pattern 4: Redundant comparison after boolean operation * {cmp a, b; load 1; load 0} → simplified when used in branch * The comparison already produces 0 or 1, constants may be redundant * * Pattern 5: Consecutive loads that can be combined * {load r1, [base+off1]; load r2, [base+off2]; op r3, r1, r2} * Useful for struct member access patterns * Needs alignment checking and architecture support. * * Pattern 6: Load-Load-Select pattern * {load r1, c1; load r2, c2; select/cmov based on condition} * Can optimize by loading only the needed value * Requires control flow analysis. * * Pattern 7: Add-Add-Add chain simplification * {add r1, r0, c1; add r2, r1, c2; add r3, r2, c3} * Can be simplified if all are constants * Requires tracking constant values through the chain. * * Pattern 8: Global load followed by immediate use * {global_load r1; op r2, r1, ...; store r2} * Track global access patterns * Could optimize to atomic operations or direct memory ops. * Needs careful synchronization analysis. */ return false; } /* Main peephole optimization driver. * * SSA Optimizer (insn_t, before register allocation): * - Constant folding with known values (5+3 → 8, x+0 → x) * - Common subexpression elimination * - Self-assignment elimination (x = x) * - Dead code elimination * - Constant comparison folding (5 < 3 → 0) * * Peephole Optimizer (ph2_ir_t, after register allocation): * - Register-based self-operations (r1-r1 → 0, r1^r1 → 0) * - Bitwise operation optimization (SSA doesn't handle these) * - Strength reduction for power-of-2 (needs actual constants loaded) * - Load/store pattern elimination * - Triple instruction sequence optimization * - Architecture-specific instruction fusion * * This refined separation eliminates redundant optimizations while * maintaining comprehensive coverage of optimization opportunities. */ void peephole(void) { for (func_t *func = FUNC_LIST.head; func; func = func->next) { /* Skip function declarations without bodies */ if (!func->bbs) continue; /* Local peephole optimizations on post-register-allocation IR */ for (basic_block_t *bb = func->bbs; bb; bb = bb->rpo_next) { for (ph2_ir_t *ir = bb->ph2_ir_list.head; ir; ir = ir->next) { ph2_ir_t *next = ir->next; if (!next) continue; /* Self-assignment elimination * Keep this as a safety net: SSA handles most cases, but * register allocation might create new self-assignments */ if (next->op == OP_assign && next->dest == next->src0) { ir->next = next->next; continue; } /* Try triple pattern optimization first (3-instruction * sequences) */ if (triple_pattern_optimization(ir)) continue; /* Try instruction fusion (2-instruction sequences) */ if (insn_fusion(ir)) continue; /* Apply comparison optimization */ if (comparison_optimization(ir)) continue; /* Apply strength reduction for power-of-2 operations */ if (strength_reduction(ir)) continue; /* Apply algebraic simplification */ if (algebraic_simplification(ir)) continue; /* Apply bitwise operation optimizations */ if (bitwise_optimization(ir)) continue; /* Apply redundant move elimination */ if (redundant_move_elim(ir)) continue; /* Apply load/store elimination */ if (eliminate_load_store_pairs(ir)) continue; } } } } ================================================ FILE: src/preprocessor.c ================================================ /* * shecc - Self-Hosting and Educational C Compiler. * * shecc is freely redistributable under the BSD 2 clause license. See the * file "LICENSE" for information on usage and redistribution of this file. */ #include "../config" #include "defs.h" #include "globals.c" source_location_t synth_built_in_loc; hashmap_t *PRAGMA_ONCE; hashmap_t *MACROS; token_t *pp_lex_skip_space(token_t *tk) { while (tk->next && (tk->next->kind == T_whitespace || tk->next->kind == T_tab)) tk = tk->next; return tk; } token_t *pp_lex_next_token(token_t *tk, bool skip_space) { if (skip_space) tk = pp_lex_skip_space(tk); return tk->next; } bool pp_lex_peek_token(token_t *tk, token_kind_t kind, bool skip_space) { if (skip_space) tk = pp_lex_skip_space(tk); return tk->next && tk->next->kind == kind; } token_t *pp_lex_expect_token(token_t *tk, token_kind_t kind, bool skip_space) { if (skip_space) tk = pp_lex_skip_space(tk); if (tk->next) { if (tk->next->kind == kind) return pp_lex_next_token(tk, false); error_at("Unexpected token kind", &tk->next->location); } error_at("Expect token after this token", &tk->location); return tk; } token_t *lex_ident_token(token_t *tk, token_kind_t kind, char *dest, bool skip_space) { tk = pp_lex_expect_token(tk, kind, skip_space); strcpy(dest, tk->literal); return tk; } /* Copies and isolate the given copied token */ token_t *copy_token(token_t *tk) { token_t *new_tk = arena_calloc(TOKEN_ARENA, 1, sizeof(token_t)); memcpy(new_tk, tk, sizeof(token_t)); new_tk->next = NULL; return new_tk; } typedef struct macro { char *name; int param_num; token_t *param_names[MAX_PARAMS]; token_t *replacement; bool is_variadic; token_t *variadic_tk; bool is_disabled; /* build-in function-like macro handler */ token_t *(*handler)(token_t *); } macro_t; bool is_macro_defined(char *name) { macro_t *macro = hashmap_get(MACROS, name); return macro && !macro->is_disabled; } /* file_macro_handler is responsible for expanding built-in macro "__FILE__" * inplace with a string token with file's relative path's name literally */ token_t *file_macro_handler(token_t *tk) { token_t *new_tk = copy_token(tk); new_tk->kind = T_string; new_tk->literal = tk->location.filename; memcpy(&new_tk->location, &tk->location, sizeof(source_location_t)); return new_tk; } /* line_macro_handler is responsible for expanding built-in macro "__LINE__" * inplace with a string token with line number literally */ token_t *line_macro_handler(token_t *tk) { char line[MAX_TOKEN_LEN]; snprintf(line, MAX_TOKEN_LEN, "%d", tk->location.line); token_t *new_tk = copy_token(tk); new_tk->kind = T_numeric; new_tk->literal = intern_string(line); memcpy(&new_tk->location, &tk->location, sizeof(source_location_t)); return new_tk; } /* hide_set_t is used to track which macros have been expanded in the previous * expanding context, if so, it'll get added into hide set of context to prevent * endless recursion macro expansion. */ typedef struct hide_set { char *name; struct hide_set *next; } hide_set_t; hide_set_t *new_hide_set(char *name) { hide_set_t *hs = arena_alloc(TOKEN_ARENA, sizeof(hide_set_t)); hs->name = name; hs->next = NULL; return hs; } hide_set_t *hide_set_union(hide_set_t *hs1, hide_set_t *hs2) { hide_set_t head; hide_set_t *cur = &head; for (; hs1; hs1 = hs1->next) { cur->next = new_hide_set(hs1->name); cur = cur->next; } cur->next = hs2; return head.next; } bool hide_set_contains(hide_set_t *hs, char *name) { for (; hs; hs = hs->next) if (!strcmp(hs->name, name)) return true; return false; } void hide_set_free(hide_set_t *hs) { for (hide_set_t *tmp; hs;) { tmp = hs; hs = hs->next; free(tmp); } } typedef enum { CK_if_then, CK_elif_then, CK_else_then } cond_kind_t; /* cond_incl_t is used as a stack-like context to track conditional macro * directives' expansion, and gives information to the expansion context * to process the token stream with correct behavior. */ typedef struct cond_incl { struct cond_incl *prev; cond_kind_t ctx; token_t *tk; bool included; } cond_incl_t; cond_incl_t *push_cond(cond_incl_t *ci, token_t *tk, bool included) { cond_incl_t *cond = arena_alloc(TOKEN_ARENA, sizeof(cond_incl_t)); cond->prev = ci; cond->ctx = CK_if_then; cond->tk = tk; cond->included = included; return cond; } /* preprocess_ctx_t is used to track various inforamtion when expanding token * stream, the context state may vary due to the current expanding object, * but in general case, it will tries to inherit parent context state if * possible. * * Due to the standard that token stream are always ends with EOF token, * the default behavior is not to trim EOF token, but if the result requires * EOF token to be present, set trim_eof to true would suffice. */ typedef struct preprocess_ctx { hide_set_t *hide_set; hashmap_t *macro_args; token_t *expanded_from; token_t *end_of_token; /* end of token stream of current context */ bool trim_eof; } preprocess_ctx_t; token_t *pp_preprocess_internal(token_t *tk, preprocess_ctx_t *ctx); char *token_to_string(token_t *tk, char *dest); int pp_get_operator_prio(opcode_t op) { /* https://www.cs.uic.edu/~i109/Notes/COperatorPrecedenceTable.pdf */ switch (op) { case OP_ternary: return 3; case OP_log_or: return 4; case OP_log_and: return 5; case OP_bit_or: return 6; case OP_bit_xor: return 7; case OP_bit_and: return 8; case OP_eq: case OP_neq: return 9; case OP_lt: case OP_leq: case OP_gt: case OP_geq: return 10; case OP_add: case OP_sub: return 12; case OP_mul: case OP_div: case OP_mod: return 13; default: return 0; } } int pp_get_unary_operator_prio(opcode_t op) { switch (op) { case OP_add: case OP_sub: case OP_bit_not: case OP_log_not: return 14; default: return 0; } } token_t *pp_get_operator(token_t *tk, opcode_t *op) { tk = pp_lex_skip_space(tk); if (!tk->next) error_at("Unexpected error when trying to evaulate constant operator", &tk->location); switch (tk->next->kind) { case T_plus: op[0] = OP_add; break; case T_minus: op[0] = OP_sub; break; case T_asterisk: op[0] = OP_mul; break; case T_divide: op[0] = OP_div; break; case T_mod: op[0] = OP_mod; break; case T_lshift: op[0] = OP_lshift; break; case T_rshift: op[0] = OP_rshift; break; case T_log_and: op[0] = OP_log_and; break; case T_log_or: op[0] = OP_log_or; break; case T_eq: op[0] = OP_eq; break; case T_noteq: op[0] = OP_neq; break; case T_lt: op[0] = OP_lt; break; case T_le: op[0] = OP_leq; break; case T_gt: op[0] = OP_gt; break; case T_ge: op[0] = OP_geq; break; case T_ampersand: op[0] = OP_bit_and; break; case T_bit_or: op[0] = OP_bit_or; break; case T_bit_xor: op[0] = OP_bit_xor; break; case T_question: op[0] = OP_ternary; break; default: /* Maybe it's an operand, we immediately return here. */ op[0] = OP_generic; return tk; } tk = pp_lex_next_token(tk, true); return tk; } token_t *pp_read_constant_expr_operand(token_t *tk, int *val) { if (pp_lex_peek_token(tk, T_numeric, true)) { tk = pp_lex_next_token(tk, true); val[0] = parse_numeric_constant(tk->literal); return tk; } if (pp_lex_peek_token(tk, T_open_bracket, true)) { tk = pp_lex_next_token(tk, true); tk = pp_read_constant_expr_operand(tk, val); tk = pp_lex_expect_token(tk, T_close_bracket, true); return tk; } if (pp_lex_peek_token(tk, T_identifier, true)) { tk = pp_lex_next_token(tk, true); if (!strcmp("defined", tk->literal)) { tk = pp_lex_expect_token(tk, T_open_bracket, true); tk = pp_lex_expect_token(tk, T_identifier, true); val[0] = is_macro_defined(tk->literal); tk = pp_lex_expect_token(tk, T_close_bracket, true); } else { /* Any identifier will fallback and evaluate as 0 */ macro_t *macro = hashmap_get(MACROS, tk->literal); /* Disallow function-like macro to be expanded */ if (macro && !(macro->param_num > 0 || macro->is_variadic)) { token_t *expanded_tk, *tmp; preprocess_ctx_t ctx; ctx.expanded_from = tk; ctx.hide_set = NULL; ctx.macro_args = NULL; ctx.trim_eof = false; expanded_tk = pp_preprocess_internal(macro->replacement, &ctx); tmp = tk->next; tk->next = expanded_tk; ctx.end_of_token->next = tmp; return pp_read_constant_expr_operand(tk, val); } val[0] = 0; } return tk; } /* Unable to identify next token, so we advance to next non-whitespace token * and report its location with error message. */ tk = pp_lex_next_token(tk, true); error_at("Unexpected token while evaluating constant", &tk->location); return tk; } token_t *pp_read_constant_infix_expr(int precedence, token_t *tk, int *val) { int lhs, rhs; /* Evaluate unary expression first */ opcode_t op; tk = pp_get_operator(tk, &op); int current_precedence = pp_get_unary_operator_prio(op); if (current_precedence != 0 && current_precedence >= precedence) { tk = pp_read_constant_infix_expr(current_precedence, tk, &lhs); switch (op) { case OP_add: break; case OP_sub: lhs = -lhs; break; case OP_bit_not: lhs = ~lhs; break; case OP_log_not: lhs = !lhs; break; default: { source_location_t *loc = tk->next ? &tk->next->location : &tk->location; error_at("Unexpected unary token while evaluating constant", loc); } } } else { tk = pp_read_constant_expr_operand(tk, &lhs); } while (true) { tk = pp_get_operator(tk, &op); current_precedence = pp_get_operator_prio(op); if (current_precedence == 0 || current_precedence <= precedence) break; tk = pp_read_constant_infix_expr(current_precedence, tk, &rhs); switch (op) { case OP_add: lhs += rhs; break; case OP_sub: lhs -= rhs; break; case OP_mul: lhs *= rhs; break; case OP_div: lhs /= rhs; break; case OP_bit_and: lhs &= rhs; break; case OP_bit_or: lhs |= rhs; break; case OP_bit_xor: lhs ^= rhs; break; case OP_lshift: lhs <<= rhs; break; case OP_rshift: lhs >>= rhs; break; case OP_gt: lhs = lhs > rhs; break; case OP_geq: lhs = lhs >= rhs; break; case OP_lt: lhs = lhs < rhs; break; case OP_leq: lhs = lhs <= rhs; break; case OP_eq: lhs = lhs == rhs; break; case OP_neq: lhs = lhs != rhs; break; case OP_log_and: lhs = lhs && rhs; break; case OP_log_or: lhs = lhs || rhs; break; default: error_at("Unexpected infix token while evaluating constant", &tk->location); } tk = pp_get_operator(tk, &op); } val[0] = lhs; return tk; } token_t *pp_read_constant_expr(token_t *tk, int *val) { tk = pp_read_constant_infix_expr(0, tk, val); /* advance to fully consume constant expression */ tk = pp_lex_next_token(tk, true); return tk; } token_t *pp_skip_inner_cond_incl(token_t *tk) { token_kind_t kind; while (tk->kind != T_eof) { kind = tk->kind; if (kind == T_cppd_if || kind == T_cppd_ifdef || kind == T_cppd_ifndef) { if (!tk->next || !tk->next->next) error_at("Unexpected error when skipping conditional inclusion", &tk->location); tk = pp_skip_inner_cond_incl(tk->next->next); continue; } if (kind == T_cppd_endif) { if (!tk->next || !tk->next->next) error_at("Unexpected error when skipping conditional inclusion", &tk->location); return tk->next->next; } tk = tk->next; } return tk; } token_t *pp_skip_cond_incl(token_t *tk) { token_kind_t kind; while (tk->kind != T_eof) { kind = tk->kind; if (kind == T_cppd_if || kind == T_cppd_ifdef || kind == T_cppd_ifndef) { tk = pp_skip_inner_cond_incl(tk); continue; } if (kind == T_cppd_elif || kind == T_cppd_else || kind == T_cppd_endif) break; tk = tk->next; } return tk; } token_t *pp_preprocess_internal(token_t *tk, preprocess_ctx_t *ctx) { token_t head; token_t *cur = &head; cond_incl_t *ci = NULL; while (tk) { macro_t *macro = NULL; switch (tk->kind) { case T_identifier: { token_t *macro_tk = tk; preprocess_ctx_t expansion_ctx; /* Initialize expansion context: inherit parent context and enable * EOF trimming for macro body expansion */ expansion_ctx.expanded_from = ctx->expanded_from ? ctx->expanded_from : tk; expansion_ctx.macro_args = ctx->macro_args; expansion_ctx.trim_eof = true; token_t *macro_arg_replcaement = NULL; /* Check if this identifier is a macro parameter (argument) * If we're currently expanding a macro body, parameters should be * replaced with their supplied arguments */ if (ctx->macro_args) macro_arg_replcaement = hashmap_get(ctx->macro_args, tk->literal); if (macro_arg_replcaement) { /* Recursively expand the argument to handle nested macros * TODO: We should consider ## (token concatenation) here */ expansion_ctx.hide_set = ctx->hide_set; expansion_ctx.macro_args = NULL; /* Don't take account of macro arguments, this might run into infinite loop */ macro_arg_replcaement = pp_preprocess_internal( macro_arg_replcaement, &expansion_ctx); cur->next = macro_arg_replcaement; cur = expansion_ctx.end_of_token; tk = pp_lex_next_token(tk, false); continue; } /* Prevent infinite recursion by checking hide set */ if (hide_set_contains(ctx->hide_set, tk->literal)) break; macro = hashmap_get(MACROS, tk->literal); /* Skips expansion if either macro doesn't exist or macro is diabled */ if (!macro || macro->is_disabled) break; /* Handle built-in function-like macros (__FILE__, __LINE__) * These have special handlers that generate tokens directly */ if (macro->handler) { cur->next = macro->handler(expansion_ctx.expanded_from); cur = cur->next; tk = pp_lex_next_token(tk, false); continue; } /* Check if this is a function-like macro invocation */ if (pp_lex_peek_token(tk, T_open_bracket, true)) { token_t arg_head; token_t *arg_cur = &arg_head; int arg_idx = 0; int bracket_depth = 0; /* Add macro name to hide set to prevent re-expansion of itself * during its own body expansion */ expansion_ctx.hide_set = hide_set_union(ctx->hide_set, new_hide_set(tk->literal)); /* Create parameter mapping table for this macro invocation */ expansion_ctx.macro_args = hashmap_create(8); tk = pp_lex_next_token(tk, true); /* Parse macro arguments until closing parenthesis * * Handles nested parentheses and comma-separated argument list * by tracking the nested depth */ while (true) { if (pp_lex_peek_token(tk, T_open_bracket, false)) bracket_depth++; else if (pp_lex_peek_token(tk, T_close_bracket, false)) bracket_depth--; /* Expand identifiers within macro arguments * * This handles cases like: MACRO(OTHER_MACRO) where * OTHER_MACRO is itself expanded before being used as arg */ if (expansion_ctx.macro_args && pp_lex_peek_token(tk, T_identifier, false)) { token_t *arg_tk = hashmap_get(ctx->macro_args, tk->next->literal); if (arg_tk) { preprocess_ctx_t arg_expansion_ctx; arg_expansion_ctx.expanded_from = tk->next; arg_expansion_ctx.hide_set = expansion_ctx.hide_set; arg_expansion_ctx.macro_args = NULL; arg_tk = pp_preprocess_internal(arg_tk, &arg_expansion_ctx); tk = pp_lex_next_token(tk, false); arg_cur->next = arg_tk; arg_cur = arg_expansion_ctx.end_of_token; continue; } } /* Accumulate argument tokens until delimiter * * Collect all tokens between commas or parentheses as part * of the current argument */ if (bracket_depth >= 0 && !pp_lex_peek_token(tk, T_comma, false) && !pp_lex_peek_token(tk, T_close_bracket, false)) { tk = pp_lex_next_token(tk, false); arg_cur->next = copy_token(tk); arg_cur = arg_cur->next; continue; } token_t *param_tk; /* Bind argument to corresponding parameter */ if (arg_idx < macro->param_num) { param_tk = macro->param_names[arg_idx++]; hashmap_put(expansion_ctx.macro_args, param_tk->literal, arg_head.next); } else { /* Handle variadic macro overflow * * If macro takes __VA_ARGS__, excess arguments go there */ if (macro->is_variadic) { param_tk = macro->variadic_tk; if (hashmap_contains(expansion_ctx.macro_args, param_tk->literal)) { /* Append to existing variadic args with comma * separator to preserve argument boundaries */ token_t *prev = hashmap_get(expansion_ctx.macro_args, param_tk->literal); while (prev->next) prev = prev->next; /* Borrows parameter's token location */ prev->next = new_token(T_comma, ¶m_tk->location, 1); prev->next->next = arg_head.next; prev = arg_cur; } else { hashmap_put(expansion_ctx.macro_args, param_tk->literal, arg_head.next); } } else { error_at( "Too many arguments supplied to macro " "invocation", ¯o_tk->location); } } /* Reset for next argument collection */ arg_cur = &arg_head; if (pp_lex_peek_token(tk, T_comma, false)) { tk = pp_lex_next_token(tk, false); continue; } if (pp_lex_peek_token(tk, T_close_bracket, false)) { tk = pp_lex_next_token(tk, false); break; } } if (arg_idx < macro->param_num) error_at("Too few arguments supplied to macro invocation", ¯o_tk->location); /* Expand macro body with collected arguments * Replace parameter references with supplied argument tokens */ cur->next = pp_preprocess_internal(macro->replacement, &expansion_ctx); cur = expansion_ctx.end_of_token; hashmap_free(expansion_ctx.macro_args); } else { /* Handle object-like macro expansion (no parameters) * Simply expand the replacement with current hide set plus * this macro name added to prevent re-expansion */ expansion_ctx.hide_set = hide_set_union(ctx->hide_set, new_hide_set(tk->literal)); cur->next = pp_preprocess_internal(macro->replacement, &expansion_ctx); cur = expansion_ctx.end_of_token; } tk = pp_lex_next_token(tk, false); continue; } case T_cppd_include: { char inclusion_path[MAX_LINE_LEN]; token_stream_t *file_tks = NULL; preprocess_ctx_t inclusion_ctx; inclusion_ctx.hide_set = ctx->hide_set; inclusion_ctx.expanded_from = NULL; inclusion_ctx.macro_args = NULL; inclusion_ctx.trim_eof = true; if (pp_lex_peek_token(tk, T_string, true)) { tk = pp_lex_next_token(tk, true); strcpy(inclusion_path, tk->literal); /* normalize path */ char path[MAX_LINE_LEN]; const char *file = tk->location.filename; int c = strlen(file) - 1; while (c > 0 && file[c] != '/') c--; if (c) { if (c >= MAX_LINE_LEN - 1) c = MAX_LINE_LEN - 2; memcpy(path, file, c); path[c] = '\0'; } else { path[0] = '.'; path[1] = '\0'; c = 1; } snprintf(path + c, MAX_LINE_LEN - c, "/%s", inclusion_path); strncpy(inclusion_path, path, MAX_LINE_LEN - 1); inclusion_path[MAX_LINE_LEN - 1] = '\0'; } else { int sz = 0; char token_buffer[MAX_TOKEN_LEN], *literal; tk = pp_lex_expect_token(tk, T_lt, true); while (!pp_lex_peek_token(tk, T_gt, false)) { tk = pp_lex_next_token(tk, false); literal = token_to_string(tk, token_buffer); strcpy(inclusion_path + sz, literal); sz += strlen(literal); } tk = pp_lex_next_token(tk, false); /* FIXME: We ignore #include <...> at this moment, since * all libc functions are included done by inlining. */ tk = pp_lex_expect_token(tk, T_newline, true); tk = pp_lex_next_token(tk, false); continue; } tk = pp_lex_expect_token(tk, T_newline, true); tk = pp_lex_next_token(tk, false); if (hashmap_contains(PRAGMA_ONCE, inclusion_path)) continue; file_tks = gen_file_token_stream(intern_string(inclusion_path)); cur->next = pp_preprocess_internal(file_tks->head, &inclusion_ctx); cur = inclusion_ctx.end_of_token; continue; } case T_cppd_define: { token_t *r_head = NULL, *r_tail = NULL, *r_cur; tk = pp_lex_expect_token(tk, T_identifier, true); macro = hashmap_get(MACROS, tk->literal); if (!macro) { macro = arena_calloc(TOKEN_ARENA, 1, sizeof(macro_t)); macro->name = tk->literal; } else { /* Ensures that #undef effect is overwritten */ macro->is_disabled = false; } if (pp_lex_peek_token(tk, T_open_bracket, false)) { /* function-like macro */ tk = pp_lex_next_token(tk, false); while (pp_lex_peek_token(tk, T_identifier, true)) { tk = pp_lex_next_token(tk, true); macro->param_names[macro->param_num++] = copy_token(tk); if (pp_lex_peek_token(tk, T_comma, true)) { tk = pp_lex_next_token(tk, true); } } if (pp_lex_peek_token(tk, T_elipsis, true)) { tk = pp_lex_next_token(tk, true); macro->is_variadic = true; macro->variadic_tk = copy_token(tk); macro->variadic_tk->literal = intern_string("__VA_ARGS__"); } tk = pp_lex_expect_token(tk, T_close_bracket, true); } tk = pp_lex_skip_space(tk); while (!pp_lex_peek_token(tk, T_newline, false)) { if (pp_lex_peek_token(tk, T_backslash, false)) { tk = pp_lex_expect_token(tk, T_backslash, false); if (!pp_lex_peek_token(tk, T_newline, false)) error_at("Backslash and newline must not be separated", &tk->location); else tk = pp_lex_expect_token(tk, T_newline, false); tk = pp_lex_next_token(tk, false); continue; } tk = pp_lex_next_token(tk, false); r_cur = copy_token(tk); r_cur->next = NULL; if (!r_head) { r_head = r_cur; r_tail = r_head; } else { r_tail->next = r_cur; r_tail = r_cur; } } tk = pp_lex_expect_token(tk, T_newline, false); tk = pp_lex_next_token(tk, false); macro->replacement = r_head; hashmap_put(MACROS, macro->name, macro); continue; } case T_cppd_undef: { tk = pp_lex_expect_token(tk, T_identifier, true); macro = hashmap_get(MACROS, tk->literal); if (macro) { macro->is_disabled = true; } tk = pp_lex_expect_token(tk, T_newline, true); continue; } case T_cppd_if: { token_t *cond_tk = tk; int defined; tk = pp_read_constant_expr(tk, &defined); ci = push_cond(ci, cond_tk, defined); if (!defined) tk = pp_skip_cond_incl(tk); continue; } case T_cppd_ifdef: { token_t *kw_tk = tk; tk = pp_lex_expect_token(tk, T_identifier, true); bool defined = is_macro_defined(tk->literal); ci = push_cond(ci, kw_tk, defined); if (!defined) tk = pp_skip_cond_incl(tk); else tk = pp_lex_expect_token(tk, T_newline, true); continue; } case T_cppd_ifndef: { token_t *kw_tk = tk; tk = pp_lex_expect_token(tk, T_identifier, true); bool defined = is_macro_defined(tk->literal); ci = push_cond(ci, kw_tk, !defined); if (defined) tk = pp_skip_cond_incl(tk); else tk = pp_lex_expect_token(tk, T_newline, true); continue; } case T_cppd_elif: { if (!ci || ci->ctx == CK_else_then) error_at("Stray #elif", &tk->location); int included; ci->ctx = CK_elif_then; tk = pp_read_constant_expr(tk, &included); if (!ci->included && included) ci->included = true; else tk = pp_skip_cond_incl(tk); continue; } case T_cppd_else: { if (!ci || ci->ctx == CK_else_then) error_at("Stray #else", &tk->location); ci->ctx = CK_else_then; tk = pp_lex_expect_token(tk, T_newline, true); if (ci->included) tk = pp_skip_cond_incl(tk); continue; } case T_cppd_endif: { if (!ci) error_at("Stray #endif", &tk->location); ci = ci->prev; tk = pp_lex_expect_token(tk, T_newline, true); continue; } case T_cppd_pragma: { if (pp_lex_peek_token(tk, T_identifier, true)) { tk = pp_lex_next_token(tk, true); if (!strcmp("once", tk->literal)) hashmap_put(PRAGMA_ONCE, tk->location.filename, NULL); } while (!pp_lex_peek_token(tk, T_newline, true)) tk = pp_lex_next_token(tk, true); tk = pp_lex_expect_token(tk, T_newline, true); continue; } case T_cppd_error: { if (pp_lex_peek_token(tk, T_string, true)) { tk = pp_lex_next_token(tk, true); error_at(tk->literal, &tk->location); } else { error_at( "Internal error, #error does not support non-string error " "message", &tk->location); } break; } case T_backslash: { /* This branch is designed to be failed since backslash should be * consumed by #define, and upon later expansion, it should not be * included previously while created by #define. */ error_at("Backslash is not allowed here", &cur->location); break; } case T_eof: { if (ctx->trim_eof) { tk = pp_lex_next_token(tk, false); continue; } break; } default: break; } cur->next = copy_token(tk); cur = cur->next; tk = pp_lex_next_token(tk, false); } if (ci) error_at("Unterminated conditional directive", &ci->tk->location); ctx->end_of_token = cur; return head.next; } token_t *preprocess(token_t *tk) { preprocess_ctx_t ctx; ctx.hide_set = NULL; ctx.expanded_from = NULL; ctx.macro_args = NULL; ctx.trim_eof = false; /* Initialize built-in macros */ PRAGMA_ONCE = hashmap_create(16); MACROS = hashmap_create(16); synth_built_in_loc.pos = 0; synth_built_in_loc.len = 1; synth_built_in_loc.column = 1; synth_built_in_loc.line = 1; synth_built_in_loc.filename = ""; macro_t *macro = calloc(1, sizeof(macro_t)); macro->name = "__FILE__"; macro->handler = file_macro_handler; hashmap_put(MACROS, "__FILE__", macro); macro = calloc(1, sizeof(macro_t)); macro->name = "__LINE__"; macro->handler = line_macro_handler; hashmap_put(MACROS, "__LINE__", macro); /* architecture defines */ macro = calloc(1, sizeof(macro_t)); macro->name = ARCH_PREDEFINED; macro->replacement = new_token(T_numeric, &synth_built_in_loc, 1); macro->replacement->literal = "1"; hashmap_put(MACROS, ARCH_PREDEFINED, macro); /* shecc run-time defines */ macro = calloc(1, sizeof(macro_t)); macro->name = "__SHECC__"; macro->replacement = new_token(T_numeric, &synth_built_in_loc, 1); macro->replacement->literal = "1"; hashmap_put(MACROS, "__SHECC__", macro); tk = pp_preprocess_internal(tk, &ctx); hashmap_free(MACROS); hashmap_free(PRAGMA_ONCE); return tk; } char *token_to_string(token_t *tk, char *dest) { switch (tk->kind) { case T_eof: if (tk->next) error_at( "Internal error, token_to_string does not expect eof token in " "the middle of token stream", &tk->location); return NULL; case T_numeric: return tk->literal; case T_identifier: return tk->literal; case T_string: snprintf(dest, MAX_TOKEN_LEN, "\"%s\"", tk->literal); return dest; case T_char: snprintf(dest, MAX_TOKEN_LEN, "'%s'", tk->literal); return dest; case T_comma: return ","; case T_open_bracket: return "("; case T_close_bracket: return ")"; case T_open_curly: return "{"; case T_close_curly: return "}"; case T_open_square: return "["; case T_close_square: return "]"; case T_asterisk: return "*"; case T_divide: return "/"; case T_mod: return "%"; case T_bit_or: return "|"; case T_bit_xor: return "^"; case T_bit_not: return "~"; case T_log_and: return "&&"; case T_log_or: return "||"; case T_log_not: return "!"; case T_lt: return "<"; case T_gt: return ">"; case T_le: return "<="; case T_ge: return ">="; case T_lshift: return "<<"; case T_rshift: return ">>"; case T_dot: return "."; case T_arrow: return "->"; case T_plus: return "+"; case T_minus: return "-"; case T_minuseq: return "-="; case T_pluseq: return "+="; case T_asteriskeq: return "*="; case T_divideeq: return "/="; case T_modeq: return "%="; case T_lshifteq: return "<<="; case T_rshifteq: return ">>="; case T_xoreq: return "^="; case T_oreq: return "|="; case T_andeq: return "&="; case T_eq: return "=="; case T_noteq: return "!="; case T_assign: return "="; case T_increment: return "++"; case T_decrement: return "--"; case T_question: return "?"; case T_colon: return ":"; case T_semicolon: return ";"; case T_ampersand: return "&"; case T_return: return "return"; case T_if: return "if"; case T_else: return "else"; case T_while: return "while"; case T_for: return "for"; case T_do: return "do"; case T_typedef: return "typedef"; case T_enum: return "enum"; case T_struct: return "struct"; case T_union: return "union"; case T_sizeof: return "sizeof"; case T_elipsis: return "..."; case T_switch: return "switch"; case T_case: return "case"; case T_break: return "break"; case T_default: return "default"; case T_continue: return "continue"; case T_goto: return "goto"; case T_const: return "const"; case T_newline: return "\n"; case T_backslash: error_at( "Internal error, backslash should be ommited after " "preprocessing", &tk->location); break; case T_whitespace: { int i = 0; for (; i < tk->location.len; i++) dest[i] = ' '; dest[i] = '\0'; return dest; } case T_tab: return "\t"; case T_start: /* FIXME: Unused token kind */ break; case T_cppd_include: case T_cppd_define: case T_cppd_undef: case T_cppd_error: case T_cppd_if: case T_cppd_elif: case T_cppd_else: case T_cppd_endif: case T_cppd_ifdef: case T_cppd_ifndef: case T_cppd_pragma: error_at( "Internal error, preprocessor directives should be ommited " "after preprocessing", &tk->location); break; default: error_at("Unknown token kind", &tk->location); printf("UNKNOWN_TOKEN"); break; } return NULL; } void emit_preprocessed_token(token_t *tk) { char token_buffer[MAX_TOKEN_LEN], *literal; while (tk) { literal = token_to_string(tk, token_buffer); if (literal) printf("%s", literal); tk = tk->next; } } ================================================ FILE: src/reg-alloc.c ================================================ /* * shecc - Self-Hosting and Educational C Compiler. * * shecc is freely redistributable under the BSD 2 clause license. See the * file "LICENSE" for information on usage and redistribution of this file. */ /* Allocate registers from IR. The linear-scan algorithm now expects a minimum * of 7 available registers (typical for RISC-style architectures). * * TODO: Implement "-O level" optimization control. Currently the allocator * always performs dead variable elimination without writing back to stack. */ #include "defs.h" #include "globals.c" void vreg_map_to_phys(var_t *var, int phys_reg) { if (var) var->phys_reg = phys_reg; } int vreg_get_phys(var_t *var) { if (var) return var->phys_reg; return -1; } void vreg_clear_phys(var_t *var) { if (var) var->phys_reg = -1; } /* Aligns size to nearest multiple of 4, this meets ARMv7's alignment * requirement. * * This function should be called whenever handling with user-defined type's * size. */ int align_size(int i) { return i <= 4 ? 4 : (i + 3) & ~3; } bool check_live_out(basic_block_t *bb, var_t *var) { for (int i = 0; i < bb->live_out.size; i++) { if (bb->live_out.elements[i] == var) return true; } return false; } void track_var_use(var_t *var, int insn_idx) { if (!var) return; var->use_count++; if (var->first_use < 0) var->first_use = insn_idx; var->last_use = insn_idx; } void refresh(basic_block_t *bb, insn_t *insn) { for (int i = 0; i < REG_CNT; i++) { if (!REGS[i].var) continue; if (check_live_out(bb, REGS[i].var)) continue; if (REGS[i].var->consumed < insn->idx) { vreg_clear_phys(REGS[i].var); REGS[i].var = NULL; REGS[i].polluted = 0; } } } ph2_ir_t *bb_add_ph2_ir(basic_block_t *bb, opcode_t op) { ph2_ir_t *n = arena_alloc(BB_ARENA, sizeof(ph2_ir_t)); n->op = op; /* Initialize all fields explicitly */ n->next = NULL; /* well-formed singly linked list */ n->is_branch_detached = 0; /* arch-lowering will set for branches */ n->src0 = 0; n->src1 = 0; n->dest = 0; n->func_name[0] = '\0'; n->next_bb = NULL; n->then_bb = NULL; n->else_bb = NULL; n->ofs_based_on_stack_top = false; if (!bb->ph2_ir_list.head) bb->ph2_ir_list.head = n; else bb->ph2_ir_list.tail->next = n; bb->ph2_ir_list.tail = n; return n; } /* Calculate the cost of spilling a variable from a register. * Higher cost means the variable is more valuable to keep in a register. * The cost is computed based on multiple factors that affect performance. */ int calculate_spill_cost(var_t *var, basic_block_t *bb, int current_idx) { int cost = 0; /* Variables that are live-out of the basic block must be spilled anyway, * so give them a high cost to prefer spilling them over others */ if (check_live_out(bb, var)) cost += 1000; /* Variables that will be used soon should have higher cost. * The closer the next use, the higher the penalty for spilling */ if (var->consumed > current_idx) { int distance = var->consumed - current_idx; if (distance < 10) cost += 100 - distance * 10; /* Max 100 points for immediate use */ } /* Frequently used variables should stay in registers. * Each use adds 5 points to the cost */ if (var->use_count > 0) cost += var->use_count * 5; /* Variables inside loops are accessed repeatedly, so they should have much * higher priority to stay in registers (200 points per level) */ if (var->loop_depth > 0) cost += var->loop_depth * 200; /* Constants can be easily reloaded, so prefer spilling them by reducing * their cost */ if (var->is_const) cost -= 50; /* Variables with long live ranges may benefit from spilling to free up * registers for other variables */ if (var->first_use >= 0 && var->last_use >= 0) { int range_length = var->last_use - var->first_use; if (range_length > 100) cost += 20; /* Small penalty for very long live ranges */ } return cost; } int find_best_spill(basic_block_t *bb, int current_idx, int avoid_reg1, int avoid_reg2) { int best_reg = -1; int min_cost = 99999; for (int i = 0; i < REG_CNT; i++) { if (i == avoid_reg1 || i == avoid_reg2) continue; if (!REGS[i].var) continue; int cost = calculate_spill_cost(REGS[i].var, bb, current_idx); if (cost < min_cost) { min_cost = cost; best_reg = i; } } return best_reg; } /* Priority of spilling: * - live_out variable * - farthest local variable */ void spill_var(basic_block_t *bb, var_t *var, int idx) { if (!REGS[idx].polluted) { REGS[idx].var = NULL; vreg_clear_phys(var); return; } if (!var->space_is_allocated) { var->offset = bb->belong_to->stack_size; var->space_is_allocated = true; bb->belong_to->stack_size += 4; } ph2_ir_t *ir = var->is_global ? bb_add_ph2_ir(bb, OP_global_store) : bb_add_ph2_ir(bb, OP_store); ir->src0 = idx; ir->src1 = var->offset; ir->ofs_based_on_stack_top = var->ofs_based_on_stack_top; REGS[idx].var = NULL; REGS[idx].polluted = 0; vreg_clear_phys(var); } /* Return the index of register for given variable. Otherwise, return -1. */ int find_in_regs(var_t *var) { for (int i = 0; i < REG_CNT; i++) { if (REGS[i].var == var) return i; } return -1; } void load_var(basic_block_t *bb, var_t *var, int idx) { ph2_ir_t *ir; /* Load constants directly, others from memory */ if (var->is_const) { ir = bb_add_ph2_ir(bb, OP_load_constant); ir->src0 = var->init_val; } else { ir = var->is_global ? bb_add_ph2_ir(bb, OP_global_load) : bb_add_ph2_ir(bb, OP_load); ir->src0 = var->offset; ir->ofs_based_on_stack_top = var->ofs_based_on_stack_top; } ir->dest = idx; REGS[idx].var = var; REGS[idx].polluted = 0; vreg_map_to_phys(var, idx); } int prepare_operand(basic_block_t *bb, var_t *var, int operand_0) { /* Check VReg mapping first for O(1) lookup */ int phys_reg = vreg_get_phys(var); if (phys_reg >= 0 && phys_reg < REG_CNT && REGS[phys_reg].var == var) return phys_reg; /* Force reload for address-taken variables (may be modified via pointer) */ int i = find_in_regs(var); if (i > -1 && !var->address_taken) { vreg_map_to_phys(var, i); return i; } for (i = 0; i < REG_CNT; i++) { if (!REGS[i].var) { load_var(bb, var, i); vreg_map_to_phys(var, i); return i; } } int spilled = find_best_spill( bb, bb->insn_list.tail ? bb->insn_list.tail->idx : 0, operand_0, -1); if (spilled < 0) { for (i = 0; i < REG_CNT; i++) { if (i != operand_0 && REGS[i].var) { spilled = i; break; } } } if (REGS[spilled].var) vreg_clear_phys(REGS[spilled].var); spill_var(bb, REGS[spilled].var, spilled); load_var(bb, var, spilled); vreg_map_to_phys(var, spilled); return spilled; } int prepare_dest(basic_block_t *bb, var_t *var, int operand_0, int operand_1) { int phys_reg = vreg_get_phys(var); if (phys_reg >= 0 && phys_reg < REG_CNT && REGS[phys_reg].var == var) { REGS[phys_reg].polluted = 1; return phys_reg; } int i = find_in_regs(var); if (i > -1) { REGS[i].polluted = 1; vreg_map_to_phys(var, i); return i; } for (i = 0; i < REG_CNT; i++) { if (!REGS[i].var) { REGS[i].var = var; REGS[i].polluted = 1; vreg_map_to_phys(var, i); return i; } } int spilled = find_best_spill(bb, bb->insn_list.tail ? bb->insn_list.tail->idx : 0, operand_0, operand_1); if (spilled < 0) { for (i = 0; i < REG_CNT; i++) { if (i != operand_0 && i != operand_1 && REGS[i].var) { spilled = i; break; } } } if (REGS[spilled].var) vreg_clear_phys(REGS[spilled].var); spill_var(bb, REGS[spilled].var, spilled); REGS[spilled].var = var; REGS[spilled].polluted = 1; vreg_map_to_phys(var, spilled); return spilled; } void spill_alive(basic_block_t *bb, insn_t *insn) { /* Spill all locals on pointer writes (conservative aliasing handling) */ if (insn && insn->opcode == OP_write) { for (int i = 0; i < REG_CNT; i++) { if (REGS[i].var && !REGS[i].var->is_global) spill_var(bb, REGS[i].var, i); } return; } /* Standard spilling for non-pointer operations */ for (int i = 0; i < REG_CNT; i++) { if (!REGS[i].var) continue; if (check_live_out(bb, REGS[i].var)) { spill_var(bb, REGS[i].var, i); continue; } if (REGS[i].var->consumed > insn->idx) { spill_var(bb, REGS[i].var, i); continue; } } } void spill_live_out(basic_block_t *bb) { for (int i = 0; i < REG_CNT; i++) { if (!REGS[i].var) continue; if (!check_live_out(bb, REGS[i].var)) { vreg_clear_phys(REGS[i].var); REGS[i].var = NULL; REGS[i].polluted = 0; continue; } if (!var_check_killed(REGS[i].var, bb)) { vreg_clear_phys(REGS[i].var); REGS[i].var = NULL; REGS[i].polluted = 0; continue; } spill_var(bb, REGS[i].var, i); } } /* The operand of 'OP_push' should not been killed until function called. */ void extend_liveness(basic_block_t *bb, insn_t *insn, var_t *var, int offset) { if (check_live_out(bb, var)) return; if (insn->idx + offset > var->consumed) var->consumed = insn->idx + offset; } /* Return whether extra arguments are pushed onto stack. */ bool abi_lower_call_args(basic_block_t *bb, insn_t *insn) { int num_of_args = 0; int stack_args = 0; while (insn && insn->opcode == OP_push) { num_of_args += 1; insn = insn->next; } if (num_of_args <= MAX_ARGS_IN_REG) return false; insn = insn->prev; stack_args = num_of_args - MAX_ARGS_IN_REG; while (stack_args) { load_var(bb, insn->rs1, MAX_ARGS_IN_REG - 1); ph2_ir_t *ir = bb_add_ph2_ir(bb, OP_store); ir->src0 = MAX_ARGS_IN_REG - 1; ir->src1 = (stack_args - 1) * 4; stack_args -= 1; insn = insn->prev; } REGS[MAX_ARGS_IN_REG - 1].var = NULL; return true; } void reg_alloc(void) { /* TODO: Add proper .bss and .data section support for uninitialized / * initialized globals */ for (insn_t *global_insn = GLOBAL_FUNC->bbs->insn_list.head; global_insn; global_insn = global_insn->next) { ph2_ir_t *ir; int dest, src0; switch (global_insn->opcode) { case OP_allocat: if (global_insn->rd->array_size) { /* Original scheme: pointer slot + backing region. Cache the * base offset of the backing region into init_val so later * global initializers can address elements without loading * the pointer. */ global_insn->rd->offset = GLOBAL_FUNC->stack_size; global_insn->rd->space_is_allocated = true; GLOBAL_FUNC->stack_size += PTR_SIZE; src0 = GLOBAL_FUNC->stack_size; /* base of backing region */ /* Stash base offset for this array variable */ global_insn->rd->init_val = src0; if (global_insn->rd->ptr_level) GLOBAL_FUNC->stack_size += align_size(PTR_SIZE * global_insn->rd->array_size); else { GLOBAL_FUNC->stack_size += align_size(global_insn->rd->array_size * global_insn->rd->type->size); } dest = prepare_dest(GLOBAL_FUNC->bbs, global_insn->rd, -1, -1); ir = bb_add_ph2_ir(GLOBAL_FUNC->bbs, OP_global_address_of); ir->src0 = src0; ir->dest = dest; spill_var(GLOBAL_FUNC->bbs, global_insn->rd, dest); } else { global_insn->rd->offset = GLOBAL_FUNC->stack_size; global_insn->rd->space_is_allocated = true; if (global_insn->rd->ptr_level) GLOBAL_FUNC->stack_size += PTR_SIZE; else if (global_insn->rd->type != TY_int && global_insn->rd->type != TY_short && global_insn->rd->type != TY_char && global_insn->rd->type != TY_bool) { GLOBAL_FUNC->stack_size += align_size(global_insn->rd->type->size); } else /* 'char' is aligned to one byte for the convenience */ GLOBAL_FUNC->stack_size += 4; } break; case OP_load_constant: case OP_load_data_address: case OP_load_rodata_address: dest = prepare_dest(GLOBAL_FUNC->bbs, global_insn->rd, -1, -1); ir = bb_add_ph2_ir(GLOBAL_FUNC->bbs, global_insn->opcode); ir->src0 = global_insn->rd->init_val; ir->dest = dest; break; case OP_assign: src0 = prepare_operand(GLOBAL_FUNC->bbs, global_insn->rs1, -1); dest = prepare_dest(GLOBAL_FUNC->bbs, global_insn->rd, src0, -1); ir = bb_add_ph2_ir(GLOBAL_FUNC->bbs, OP_assign); ir->src0 = src0; ir->dest = dest; spill_var(GLOBAL_FUNC->bbs, global_insn->rd, dest); /* release the unused constant number in register manually */ REGS[src0].polluted = 0; vreg_clear_phys(REGS[src0].var); REGS[src0].var = NULL; break; case OP_add: { /* Special-case address computation for globals: if rs1 is a global * base and rs2 is a constant, propagate absolute offset to rd so * OP_write can fold into OP_global_store. */ if (global_insn->rs1 && global_insn->rs1->is_global && global_insn->rs2) { int base_off = global_insn->rs1->offset; /* For global arrays, use backing-region base cached in init_val */ if (global_insn->rs1->array_size > 0) base_off = global_insn->rs1->init_val; global_insn->rd->offset = base_off + global_insn->rs2->init_val; global_insn->rd->space_is_allocated = true; global_insn->rd->is_global = true; break; } /* Fallback: generate an add */ int src1; src0 = prepare_operand(GLOBAL_FUNC->bbs, global_insn->rs1, -1); src1 = prepare_operand(GLOBAL_FUNC->bbs, global_insn->rs2, src0); dest = prepare_dest(GLOBAL_FUNC->bbs, global_insn->rd, src0, src1); ir = bb_add_ph2_ir(GLOBAL_FUNC->bbs, OP_add); ir->src0 = src0; ir->src1 = src1; ir->dest = dest; break; } case OP_write: { /* Fold (addr, val) where addr carries GP-relative offset */ if (global_insn->rs1 && (global_insn->rs1->is_global)) { int vreg = prepare_operand(GLOBAL_FUNC->bbs, global_insn->rs2, -1); ir = bb_add_ph2_ir(GLOBAL_FUNC->bbs, OP_global_store); ir->src0 = vreg; /* For array variables used as base, store to the backing * region's base offset (cached in init_val). */ int base_off = global_insn->rs1->offset; if (global_insn->rs1->array_size > 0) base_off = global_insn->rs1->init_val; ir->src1 = base_off; break; } /* Fallback generic write */ int src1; src0 = prepare_operand(GLOBAL_FUNC->bbs, global_insn->rs1, -1); src1 = prepare_operand(GLOBAL_FUNC->bbs, global_insn->rs2, src0); ir = bb_add_ph2_ir(GLOBAL_FUNC->bbs, OP_write); ir->src0 = src0; ir->src1 = src1; ir->dest = global_insn->sz; break; } default: printf("Unsupported global operation: %d\n", global_insn->opcode); abort(); } } for (func_t *func = FUNC_LIST.head; func; func = func->next) { /* Skip function declarations without bodies */ if (!func->bbs) continue; func->visited++; if (!strcmp(func->return_def.var_name, "main")) MAIN_BB = func->bbs; for (int i = 0; i < REG_CNT; i++) REGS[i].var = NULL; /* set arguments available */ int args_in_reg = func->num_params < MAX_ARGS_IN_REG ? func->num_params : MAX_ARGS_IN_REG; for (int i = 0; i < args_in_reg; i++) { REGS[i].var = func->param_defs[i].subscripts[0]; REGS[i].polluted = 1; } /* variadic function implementation */ if (func->va_args) { /* When encountering a variadic function, allocate space for all * arguments on the local stack to ensure their addresses are * contiguous. */ for (int i = 0; i < MAX_PARAMS; i++) { ph2_ir_t *ir; int src0 = i; if (i >= MAX_ARGS_IN_REG) { /* Callee should access caller's stack to obtain the * extra arguments. */ ir = bb_add_ph2_ir(func->bbs, OP_load); ir->dest = MAX_ARGS_IN_REG; ir->src0 = (i - MAX_ARGS_IN_REG) * 4; ir->ofs_based_on_stack_top = true; src0 = MAX_ARGS_IN_REG; } if (i < args_in_reg) { func->param_defs[i].subscripts[0]->offset = func->stack_size; func->param_defs[i].subscripts[0]->space_is_allocated = true; } ir = bb_add_ph2_ir(func->bbs, OP_store); ir->src0 = src0; ir->src1 = func->stack_size; func->stack_size += 4; } } else { /* If the number of function arguments is fixed, the extra arguments * are directly placed in the caller's stack space instead of the * callee's. * * +----------> +---------------+ * | | local vars | * | +---------------+ * | | extra arg 4 | * | +---------------+ <-- sp + stack_size + 12 * caller's space | extra arg 3 | * | +---------------+ <-- sp + stack_size + 8 * | | extra arg 2 | * | +---------------+ <-- sp + stack_size + 4 * | | extra arg 1 | * +----------> +---------------+ <-- sp + stack_size * | | local vars | * | +---------------+ <-- sp + 16 * callee's space | Next callee's | * | | additional | * | | arguments | * +----------> +---------------+ <-- sp * * Note that: * - For the Arm architecture, extra arg1 ~ argX correspond to * arg5 ~ arg(X + 4). * - For the RISC-V architecture, extra arg1 ~ argX correspond to * arg9 ~ arg(X + 8). * * If any instruction use one of these additional arguments, it * inherits 'offset' and 'ofs_based_on_stack_top'. When calling * cfg_flatten(), the operand's offset will be recalculated by * adding the function's stack size. */ for (int i = MAX_ARGS_IN_REG; i < func->num_params; i++) { func->param_defs[i].subscripts[0]->offset = (i - MAX_ARGS_IN_REG) * 4; func->param_defs[i].subscripts[0]->space_is_allocated = true; func->param_defs[i].subscripts[0]->ofs_based_on_stack_top = true; } } for (basic_block_t *bb = func->bbs; bb; bb = bb->rpo_next) { bool is_pushing_args = false, handle_abi = false, args_on_stack = false; int args = 0; bb->visited++; for (insn_t *insn = bb->insn_list.head; insn; insn = insn->next) { func_t *callee_func; ph2_ir_t *ir; int dest, src0, src1; int sz, clear_reg; refresh(bb, insn); switch (insn->opcode) { case OP_unwound_phi: track_var_use(insn->rs1, insn->idx); src0 = prepare_operand(bb, insn->rs1, -1); if (!insn->rd->space_is_allocated) { insn->rd->offset = bb->belong_to->stack_size; insn->rd->space_is_allocated = true; bb->belong_to->stack_size += 4; } ir = bb_add_ph2_ir(bb, OP_store); ir->src0 = src0; ir->src1 = insn->rd->offset; ir->ofs_based_on_stack_top = insn->rd->ofs_based_on_stack_top; break; case OP_allocat: if ((insn->rd->type == TY_void || insn->rd->type == TY_int || insn->rd->type == TY_short || insn->rd->type == TY_char || insn->rd->type == TY_bool) && insn->rd->array_size == 0) break; insn->rd->offset = func->stack_size; insn->rd->space_is_allocated = true; func->stack_size += PTR_SIZE; src0 = func->stack_size; if (insn->rd->ptr_level) sz = PTR_SIZE; else { sz = insn->rd->type->size; } if (insn->rd->array_size) func->stack_size += align_size(insn->rd->array_size * sz); else func->stack_size += align_size(sz); dest = prepare_dest(bb, insn->rd, -1, -1); ir = bb_add_ph2_ir(bb, OP_address_of); ir->src0 = src0; ir->dest = dest; ir->ofs_based_on_stack_top = insn->rd->ofs_based_on_stack_top; /* For arrays, store the base address just like global * arrays do */ if (insn->rd->array_size) spill_var(bb, insn->rd, dest); break; case OP_load_constant: case OP_load_data_address: case OP_load_rodata_address: dest = prepare_dest(bb, insn->rd, -1, -1); ir = bb_add_ph2_ir(bb, insn->opcode); ir->src0 = insn->rd->init_val; ir->dest = dest; /* store global variable immediately after assignment */ if (insn->rd->is_global) { ir = bb_add_ph2_ir(bb, OP_global_store); ir->src0 = dest; ir->src1 = insn->rd->offset; REGS[dest].polluted = 0; } break; case OP_address_of: case OP_global_address_of: /* Mark variable as address-taken, disable constant * optimization */ insn->rs1->address_taken = true; insn->rs1->is_const = false; /* make sure variable is on stack */ if (!insn->rs1->space_is_allocated) { insn->rs1->offset = bb->belong_to->stack_size; insn->rs1->space_is_allocated = true; bb->belong_to->stack_size += 4; for (int i = 0; i < REG_CNT; i++) if (REGS[i].var == insn->rs1) { ir = bb_add_ph2_ir(bb, OP_store); ir->src0 = i; ir->src1 = insn->rs1->offset; ir->ofs_based_on_stack_top = insn->rs1->ofs_based_on_stack_top; /* Clear stale register tracking */ REGS[i].var = NULL; } } dest = prepare_dest(bb, insn->rd, -1, -1); if (insn->rs1->is_global || insn->opcode == OP_global_address_of) ir = bb_add_ph2_ir(bb, OP_global_address_of); else ir = bb_add_ph2_ir(bb, OP_address_of); ir->src0 = insn->rs1->offset; ir->dest = dest; ir->ofs_based_on_stack_top = insn->rs1->ofs_based_on_stack_top; break; case OP_assign: if (insn->rd->consumed == -1) break; track_var_use(insn->rs1, insn->idx); src0 = find_in_regs(insn->rs1); /* If operand is loaded from stack, clear the original slot * after moving. */ if (src0 > -1) clear_reg = 0; else { clear_reg = 1; src0 = prepare_operand(bb, insn->rs1, -1); } dest = prepare_dest(bb, insn->rd, src0, -1); ir = bb_add_ph2_ir(bb, OP_assign); ir->src0 = src0; ir->dest = dest; /* store global variable immediately after assignment */ if (insn->rd->is_global) { ir = bb_add_ph2_ir(bb, OP_global_store); ir->src0 = dest; ir->src1 = insn->rd->offset; REGS[dest].polluted = 0; } if (clear_reg) { vreg_clear_phys(REGS[src0].var); REGS[src0].var = NULL; } break; case OP_read: src0 = prepare_operand(bb, insn->rs1, -1); dest = prepare_dest(bb, insn->rd, src0, -1); ir = bb_add_ph2_ir(bb, OP_read); ir->src0 = src0; ir->src1 = insn->sz; ir->dest = dest; break; case OP_write: if (insn->rs2->is_func) { src0 = prepare_operand(bb, insn->rs1, -1); ir = bb_add_ph2_ir(bb, OP_address_of_func); ir->src0 = src0; strcpy(ir->func_name, insn->rs2->var_name); if (dynlink) { func_t *target_fn = find_func(ir->func_name); if (target_fn) target_fn->is_used = true; } } else { /* FIXME: Register content becomes stale after store * operation. Current workaround causes redundant * spilling - need better register invalidation * strategy. */ spill_alive(bb, insn); src0 = prepare_operand(bb, insn->rs1, -1); src1 = prepare_operand(bb, insn->rs2, src0); ir = bb_add_ph2_ir(bb, OP_write); ir->src0 = src0; ir->src1 = src1; ir->dest = insn->sz; } break; case OP_branch: src0 = prepare_operand(bb, insn->rs1, -1); /* REGS[src0].var had been set to NULL, but the actual * content is still holded in the register. */ spill_live_out(bb); ir = bb_add_ph2_ir(bb, OP_branch); ir->src0 = src0; ir->then_bb = bb->then_; ir->else_bb = bb->else_; break; case OP_push: extend_liveness(bb, insn, insn->rs1, insn->sz); if (!is_pushing_args) { spill_alive(bb, insn); is_pushing_args = true; } if (!handle_abi) { args_on_stack = abi_lower_call_args(bb, insn); handle_abi = true; } if (args_on_stack && args >= MAX_ARGS_IN_REG) break; src0 = prepare_operand(bb, insn->rs1, -1); ir = bb_add_ph2_ir(bb, OP_assign); ir->src0 = src0; ir->dest = args++; REGS[ir->dest].var = insn->rs1; REGS[ir->dest].polluted = 0; break; case OP_call: callee_func = find_func(insn->str); if (!callee_func->num_params) spill_alive(bb, insn); if (dynlink) callee_func->is_used = true; ir = bb_add_ph2_ir(bb, OP_call); strcpy(ir->func_name, insn->str); is_pushing_args = false; args = 0; handle_abi = false; for (int i = 0; i < REG_CNT; i++) REGS[i].var = NULL; break; case OP_indirect: if (!args) spill_alive(bb, insn); src0 = prepare_operand(bb, insn->rs1, -1); ir = bb_add_ph2_ir(bb, OP_load_func); ir->src0 = src0; bb_add_ph2_ir(bb, OP_indirect); is_pushing_args = false; args = 0; handle_abi = false; break; case OP_func_ret: dest = prepare_dest(bb, insn->rd, -1, -1); ir = bb_add_ph2_ir(bb, OP_assign); ir->src0 = 0; ir->dest = dest; break; case OP_return: if (insn->rs1) src0 = prepare_operand(bb, insn->rs1, -1); else src0 = -1; ir = bb_add_ph2_ir(bb, OP_return); ir->src0 = src0; break; case OP_add: case OP_sub: case OP_mul: case OP_div: case OP_mod: case OP_lshift: case OP_rshift: case OP_eq: case OP_neq: case OP_gt: case OP_geq: case OP_lt: case OP_leq: case OP_bit_and: case OP_bit_or: case OP_bit_xor: track_var_use(insn->rs1, insn->idx); track_var_use(insn->rs2, insn->idx); src0 = prepare_operand(bb, insn->rs1, -1); src1 = prepare_operand(bb, insn->rs2, src0); dest = prepare_dest(bb, insn->rd, src0, src1); ir = bb_add_ph2_ir(bb, insn->opcode); ir->src0 = src0; ir->src1 = src1; ir->dest = dest; break; case OP_negate: case OP_bit_not: case OP_log_not: src0 = prepare_operand(bb, insn->rs1, -1); dest = prepare_dest(bb, insn->rd, src0, -1); ir = bb_add_ph2_ir(bb, insn->opcode); ir->src0 = src0; ir->dest = dest; break; case OP_trunc: case OP_sign_ext: case OP_cast: src0 = prepare_operand(bb, insn->rs1, -1); dest = prepare_dest(bb, insn->rd, src0, -1); ir = bb_add_ph2_ir(bb, insn->opcode); ir->src1 = insn->sz; ir->src0 = src0; ir->dest = dest; break; default: printf("Unknown opcode\n"); abort(); } } if (bb->next) spill_live_out(bb); if (bb == func->exit) continue; /* append jump instruction for the normal block only */ if (!bb->next) continue; if (bb->next == func->exit) continue; /* jump to the beginning of loop or over the else block */ if (bb->next->visited == func->visited || bb->next->rpo != bb->rpo + 1) { ph2_ir_t *ir = bb_add_ph2_ir(bb, OP_jump); ir->next_bb = bb->next; } } /* handle implicit return */ for (int i = 0; i < MAX_BB_PRED; i++) { basic_block_t *bb = func->exit->prev[i].bb; if (!bb) continue; if (func->return_def.type != TY_void) continue; if (bb->insn_list.tail) if (bb->insn_list.tail->opcode == OP_return) continue; ph2_ir_t *ir = bb_add_ph2_ir(bb, OP_return); ir->src0 = -1; } } } void dump_ph2_ir(void) { for (int i = 0; i < ph2_ir_idx; i++) { ph2_ir_t *ph2_ir = PH2_IR_FLATTEN[i]; const int rd = ph2_ir->dest + 48; const int rs1 = ph2_ir->src0 + 48; const int rs2 = ph2_ir->src1 + 48; switch (ph2_ir->op) { case OP_define: printf("%s:", ph2_ir->func_name); break; case OP_allocat: continue; case OP_assign: printf("\t%%x%c = %%x%c", rd, rs1); break; case OP_load_constant: printf("\tli %%x%c, $%d", rd, ph2_ir->src0); break; case OP_load_data_address: printf("\t%%x%c = .data(%d)", rd, ph2_ir->src0); break; case OP_load_rodata_address: printf("\t%%x%c = .rodata(%d)", rd, ph2_ir->src0); break; case OP_address_of: printf("\t%%x%c = %%sp + %d", rd, ph2_ir->src0); break; case OP_global_address_of: printf("\t%%x%c = %%gp + %d", rd, ph2_ir->src0); break; case OP_branch: printf("\tbr %%x%c", rs1); break; case OP_jump: printf("\tj %s", ph2_ir->func_name); break; case OP_call: printf("\tcall @%s", ph2_ir->func_name); break; case OP_return: if (ph2_ir->src0 == -1) printf("\tret"); else printf("\tret %%x%c", rs1); break; case OP_load: printf("\tload %%x%c, %d(sp)", rd, ph2_ir->src0); break; case OP_store: printf("\tstore %%x%c, %d(sp)", rs1, ph2_ir->src1); break; case OP_global_load: printf("\tload %%x%c, %d(gp)", rd, ph2_ir->src0); break; case OP_global_store: printf("\tstore %%x%c, %d(gp)", rs1, ph2_ir->src1); break; case OP_read: printf("\t%%x%c = (%%x%c)", rd, rs1); break; case OP_write: printf("\t(%%x%c) = %%x%c", rs1, rs2); break; case OP_address_of_func: printf("\t(%%x%c) = @%s", rs1, ph2_ir->func_name); break; case OP_load_func: printf("\tload %%t0, %d(sp)", ph2_ir->src0); break; case OP_global_load_func: printf("\tload %%t0, %d(gp)", ph2_ir->src0); break; case OP_indirect: printf("\tindirect call @(%%t0)"); break; case OP_negate: printf("\tneg %%x%c, %%x%c", rd, rs1); break; case OP_add: printf("\t%%x%c = add %%x%c, %%x%c", rd, rs1, rs2); break; case OP_sub: printf("\t%%x%c = sub %%x%c, %%x%c", rd, rs1, rs2); break; case OP_mul: printf("\t%%x%c = mul %%x%c, %%x%c", rd, rs1, rs2); break; case OP_div: printf("\t%%x%c = div %%x%c, %%x%c", rd, rs1, rs2); break; case OP_mod: printf("\t%%x%c = mod %%x%c, %%x%c", rd, rs1, rs2); break; case OP_eq: printf("\t%%x%c = eq %%x%c, %%x%c", rd, rs1, rs2); break; case OP_neq: printf("\t%%x%c = neq %%x%c, %%x%c", rd, rs1, rs2); break; case OP_gt: printf("\t%%x%c = gt %%x%c, %%x%c", rd, rs1, rs2); break; case OP_lt: printf("\t%%x%c = lt %%x%c, %%x%c", rd, rs1, rs2); break; case OP_geq: printf("\t%%x%c = geq %%x%c, %%x%c", rd, rs1, rs2); break; case OP_leq: printf("\t%%x%c = leq %%x%c, %%x%c", rd, rs1, rs2); break; case OP_bit_and: printf("\t%%x%c = and %%x%c, %%x%c", rd, rs1, rs2); break; case OP_bit_or: printf("\t%%x%c = or %%x%c, %%x%c", rd, rs1, rs2); break; case OP_bit_not: printf("\t%%x%c = not %%x%c", rd, rs1); break; case OP_bit_xor: printf("\t%%x%c = xor %%x%c, %%x%c", rd, rs1, rs2); break; case OP_log_not: printf("\t%%x%c = not %%x%c", rd, rs1); break; case OP_rshift: printf("\t%%x%c = rshift %%x%c, %%x%c", rd, rs1, rs2); break; case OP_lshift: printf("\t%%x%c = lshift %%x%c, %%x%c", rd, rs1, rs2); break; case OP_trunc: printf("\t%%x%c = trunc %%x%c, %d", rd, rs1, ph2_ir->src1); break; case OP_sign_ext: printf("\t%%x%c = sign_ext %%x%c, %d", rd, rs1, ph2_ir->src1); break; case OP_cast: printf("\t%%x%c = cast %%x%c", rd, rs1); break; default: break; } printf("\n"); } } ================================================ FILE: src/riscv-codegen.c ================================================ /* * shecc - Self-Hosting and Educational C Compiler. * * shecc is freely redistributable under the BSD 2 clause license. See the * file "LICENSE" for information on usage and redistribution of this file. */ /* Translate IR to target machine code */ #include "defs.h" #include "globals.c" #include "riscv.c" void update_elf_offset(ph2_ir_t *ph2_ir) { switch (ph2_ir->op) { case OP_load_constant: if (ph2_ir->src0 < -2048 || ph2_ir->src0 > 2047) elf_offset += 8; else elf_offset += 4; return; case OP_address_of: case OP_global_address_of: if (ph2_ir->src0 < -2048 || ph2_ir->src0 > 2047) elf_offset += 12; else elf_offset += 4; return; case OP_assign: elf_offset += 4; return; case OP_load: case OP_global_load: if (ph2_ir->src0 < -2048 || ph2_ir->src0 > 2047) elf_offset += 16; else elf_offset += 4; return; case OP_store: case OP_global_store: if (ph2_ir->src1 < -2048 || ph2_ir->src1 > 2047) elf_offset += 16; else elf_offset += 4; return; case OP_read: case OP_write: case OP_jump: case OP_call: case OP_load_func: case OP_indirect: case OP_add: case OP_sub: case OP_lshift: case OP_rshift: case OP_gt: case OP_lt: case OP_bit_and: case OP_bit_or: case OP_bit_xor: case OP_negate: case OP_bit_not: elf_offset += 4; return; case OP_mul: if (hard_mul_div) elf_offset += 4; else elf_offset += 52; return; case OP_div: case OP_mod: if (hard_mul_div) elf_offset += 4; else elf_offset += 108; return; case OP_load_data_address: case OP_load_rodata_address: case OP_neq: case OP_geq: case OP_leq: case OP_log_not: elf_offset += 8; return; case OP_address_of_func: case OP_eq: elf_offset += 12; return; case OP_branch: elf_offset += 20; return; case OP_return: elf_offset += 24; return; case OP_trunc: if (ph2_ir->src1 == 2) elf_offset += 8; else elf_offset += 4; return; case OP_sign_ext: { /* Decode source size from upper 16 bits */ int source_size = (ph2_ir->src1 >> 16) & 0xFFFF; if (source_size == 2) elf_offset += 8; /* short extension: 2 instructions */ else elf_offset += 12; /* byte extension: 3 instructions */ return; } case OP_cast: elf_offset += 4; return; default: fatal("Unknown opcode"); } } void cfg_flatten(void) { func_t *func = find_func("__syscall"); /* Prologue ~ 6 instructions (24 bytes). Place __syscall right after. */ func->bbs->elf_offset = 24; /* Reserve space for prologue (24) + syscall trampoline (36) = 60 bytes. */ elf_offset = 60; GLOBAL_FUNC->bbs->elf_offset = elf_offset; for (ph2_ir_t *ph2_ir = GLOBAL_FUNC->bbs->ph2_ir_list.head; ph2_ir; ph2_ir = ph2_ir->next) { update_elf_offset(ph2_ir); } /* prepare 'argc' and 'argv', then proceed to 'main' function */ elf_offset += 24; for (func = FUNC_LIST.head; func; func = func->next) { /* Skip function declarations without bodies */ if (!func->bbs) continue; /* reserve stack */ ph2_ir_t *flatten_ir = add_ph2_ir(OP_define); flatten_ir->src0 = func->stack_size; strncpy(flatten_ir->func_name, func->return_def.var_name, MAX_VAR_LEN); for (basic_block_t *bb = func->bbs; bb; bb = bb->rpo_next) { bb->elf_offset = elf_offset; if (bb == func->bbs) { /* save ra, sp */ elf_offset += 16; } for (ph2_ir_t *insn = bb->ph2_ir_list.head; insn; insn = insn->next) { /* TODO: recalculate the offset for instructions with the * 'ofs_based_on_stack_top' flag set. */ flatten_ir = add_existed_ph2_ir(insn); if (insn->op == OP_return) { /* restore sp */ flatten_ir->src1 = bb->belong_to->stack_size; } update_elf_offset(flatten_ir); } } } } void emit(int code) { elf_write_int(elf_code, code); } void emit_ph2_ir(ph2_ir_t *ph2_ir) { func_t *func; int rd = ph2_ir->dest + 10; int rs1 = ph2_ir->src0 + 10; int rs2 = ph2_ir->src1 + 10; int ofs; /* Prepare the variables to reuse the same code for * the instruction sequence of * 1. division and modulo. * 2. load and store operations. * 3. address-of operations. */ rv_reg interm, divisor_mask = __t1; switch (ph2_ir->op) { case OP_define: emit(__sw(__ra, __sp, -4)); emit(__lui(__t0, rv_hi(ph2_ir->src0 + 4))); emit(__addi(__t0, __t0, rv_lo(ph2_ir->src0 + 4))); emit(__sub(__sp, __sp, __t0)); return; case OP_load_constant: if (ph2_ir->src0 < -2048 || ph2_ir->src0 > 2047) { emit(__lui(rd, rv_hi(ph2_ir->src0))); emit(__addi(rd, rd, rv_lo(ph2_ir->src0))); } else emit(__addi(rd, __zero, ph2_ir->src0)); return; case OP_address_of: case OP_global_address_of: interm = ph2_ir->op == OP_address_of ? __sp : __gp; if (ph2_ir->src0 < -2048 || ph2_ir->src0 > 2047) { emit(__lui(__t0, rv_hi(ph2_ir->src0))); emit(__addi(__t0, __t0, rv_lo(ph2_ir->src0))); emit(__add(rd, interm, __t0)); } else emit(__addi(rd, interm, ph2_ir->src0)); return; case OP_assign: emit(__addi(rd, rs1, 0)); return; case OP_load: case OP_global_load: interm = ph2_ir->op == OP_load ? __sp : __gp; if (ph2_ir->src0 < -2048 || ph2_ir->src0 > 2047) { emit(__lui(__t0, rv_hi(ph2_ir->src0))); emit(__addi(__t0, __t0, rv_lo(ph2_ir->src0))); emit(__add(__t0, interm, __t0)); emit(__lw(rd, __t0, 0)); } else emit(__lw(rd, interm, ph2_ir->src0)); return; case OP_store: case OP_global_store: interm = ph2_ir->op == OP_store ? __sp : __gp; if (ph2_ir->src1 < -2048 || ph2_ir->src1 > 2047) { emit(__lui(__t0, rv_hi(ph2_ir->src1))); emit(__addi(__t0, __t0, rv_lo(ph2_ir->src1))); emit(__add(__t0, interm, __t0)); emit(__sw(rs1, __t0, 0)); } else emit(__sw(rs1, interm, ph2_ir->src1)); return; case OP_read: if (ph2_ir->src1 == 1) emit(__lb(rd, rs1, 0)); else if (ph2_ir->src1 == 2) emit(__lh(rd, rs1, 0)); else if (ph2_ir->src1 == 4) emit(__lw(rd, rs1, 0)); else abort(); return; case OP_write: if (ph2_ir->dest == 1) emit(__sb(rs2, rs1, 0)); else if (ph2_ir->dest == 2) emit(__sh(rs2, rs1, 0)); else if (ph2_ir->dest == 4) emit(__sw(rs2, rs1, 0)); else abort(); return; case OP_branch: ofs = elf_code_start + ph2_ir->then_bb->elf_offset; emit(__lui(__t0, rv_hi(ofs))); emit(__addi(__t0, __t0, rv_lo(ofs))); emit(__beq(rs1, __zero, 8)); emit(__jalr(__zero, __t0, 0)); emit(__jal(__zero, ph2_ir->else_bb->elf_offset - elf_code->size)); return; case OP_jump: emit(__jal(__zero, ph2_ir->next_bb->elf_offset - elf_code->size)); return; case OP_call: func = find_func(ph2_ir->func_name); emit(__jal(__ra, func->bbs->elf_offset - elf_code->size)); return; case OP_load_data_address: emit(__lui(rd, rv_hi(elf_data_start + ph2_ir->src0))); emit(__addi(rd, rd, rv_lo(elf_data_start + ph2_ir->src0))); return; case OP_load_rodata_address: emit(__lui(rd, rv_hi(elf_rodata_start + ph2_ir->src0))); emit(__addi(rd, rd, rv_lo(elf_rodata_start + ph2_ir->src0))); return; case OP_address_of_func: func = find_func(ph2_ir->func_name); ofs = elf_code_start + func->bbs->elf_offset; emit(__lui(__t0, rv_hi(ofs))); emit(__addi(__t0, __t0, rv_lo(ofs))); emit(__sw(__t0, rs1, 0)); return; case OP_load_func: emit(__addi(__t0, rs1, 0)); return; case OP_indirect: emit(__jalr(__ra, __t0, 0)); return; case OP_return: if (ph2_ir->src0 == -1) emit(__addi(__zero, __zero, 0)); else emit(__addi(__a0, rs1, 0)); emit(__lui(__t0, rv_hi(ph2_ir->src1 + 4))); emit(__addi(__t0, __t0, rv_lo(ph2_ir->src1 + 4))); emit(__add(__sp, __sp, __t0)); emit(__lw(__ra, __sp, -4)); emit(__jalr(__zero, __ra, 0)); return; case OP_add: emit(__add(rd, rs1, rs2)); return; case OP_sub: emit(__sub(rd, rs1, rs2)); return; case OP_mul: if (hard_mul_div) emit(__mul(rd, rs1, rs2)); else { emit(__addi(__t0, __zero, 0)); emit(__addi(__t1, __zero, 0)); emit(__addi(__t3, rs1, 0)); emit(__addi(__t4, rs2, 0)); emit(__beq(__t3, __zero, 32)); emit(__beq(__t4, __zero, 28)); emit(__andi(__t1, __t4, 1)); emit(__beq(__t1, __zero, 8)); emit(__add(__t0, __t0, __t3)); emit(__slli(__t3, __t3, 1)); emit(__srli(__t4, __t4, 1)); emit(__jal(__zero, -28)); emit(__addi(rd, __t0, 0)); } return; case OP_div: case OP_mod: if (hard_mul_div) { if (ph2_ir->op == OP_div) emit(__div(rd, rs1, rs2)); else emit(__mod(rd, rs1, rs2)); return; } interm = __t0; /* div/mod emulation */ if (ph2_ir->op == OP_mod) { /* If the requested operation is modulo, the result will be stored * in __t2. The sign of the divisor is irrelevant for determining * the result's sign. */ interm = __t2; divisor_mask = __zero; } /* Obtain absolute values of the dividend and divisor */ emit(__addi(__t2, rs1, 0)); emit(__addi(__t3, rs2, 0)); emit(__srai(__t0, __t2, 31)); emit(__add(__t2, __t2, __t0)); emit(__xor(__t2, __t2, __t0)); emit(__srai(__t1, __t3, 31)); emit(__add(__t3, __t3, __t1)); emit(__xor(__t3, __t3, __t1)); emit(__xor(__t5, __t0, divisor_mask)); /* Unsigned integer division */ emit(__addi(__t0, __zero, 0)); emit(__addi(__t1, __zero, 1)); emit(__beq(__t3, __zero, 52)); emit(__beq(__t2, __zero, 48)); emit(__beq(__t2, __t3, 20)); emit(__bltu(__t2, __t3, 16)); emit(__slli(__t3, __t3, 1)); emit(__slli(__t1, __t1, 1)); emit(__jal(__zero, -16)); emit(__bltu(__t2, __t3, 12)); emit(__sub(__t2, __t2, __t3)); emit(__add(__t0, __t0, __t1)); emit(__srli(__t1, __t1, 1)); emit(__srli(__t3, __t3, 1)); emit(__bne(__t1, __zero, -20)); emit(__addi(rd, interm, 0)); /* Handle the correct sign for the quotient or remainder */ emit(__beq(__t5, __zero, 8)); emit(__sub(rd, __zero, rd)); return; case OP_lshift: emit(__sll(rd, rs1, rs2)); return; case OP_rshift: emit(__sra(rd, rs1, rs2)); return; case OP_eq: emit(__sub(rd, rs1, rs2)); emit(__sltu(rd, __zero, rd)); emit(__xori(rd, rd, 1)); return; case OP_neq: emit(__sub(rd, rs1, rs2)); emit(__sltu(rd, __zero, rd)); return; case OP_gt: emit(__slt(rd, rs2, rs1)); return; case OP_geq: emit(__slt(rd, rs1, rs2)); emit(__xori(rd, rd, 1)); return; case OP_lt: emit(__slt(rd, rs1, rs2)); return; case OP_leq: emit(__slt(rd, rs2, rs1)); emit(__xori(rd, rd, 1)); return; case OP_negate: emit(__sub(rd, __zero, rs1)); return; case OP_bit_not: emit(__xori(rd, rs1, -1)); return; case OP_bit_and: emit(__and(rd, rs1, rs2)); return; case OP_bit_or: emit(__or(rd, rs1, rs2)); return; case OP_bit_xor: emit(__xor(rd, rs1, rs2)); return; case OP_log_not: emit(__sltu(rd, __zero, rs1)); emit(__xori(rd, rd, 1)); return; case OP_trunc: if (ph2_ir->src1 == 1) { emit(__andi(rd, rs1, 0xFF)); } else if (ph2_ir->src1 == 2) { /* For short truncation, * use shift operations since 0xFFFF is too large */ emit(__slli(rd, rs1, 16)); /* Shift left 16 bits */ emit(__srli(rd, rd, 16)); /* Shift right 16 bits logical */ } else if (ph2_ir->src1 == 4) { /* No truncation needed for 32-bit values */ emit(__add(rd, rs1, __zero)); } else { fatal("Unsupported truncation operation with invalid target size"); } return; case OP_sign_ext: { /* Decode size information: * Lower 16 bits: target size * Upper 16 bits: source size */ int target_size = ph2_ir->src1 & 0xFFFF; int source_size = (ph2_ir->src1 >> 16) & 0xFFFF; /* Calculate shift amount based on target and source sizes */ int shift_amount = (target_size - source_size) * 8; if (source_size == 2) { /* Sign extend from short to word (16-bit shift) * For 16-bit sign extension, use only shift operations * since 0xFFFF is too large for RISC-V immediate field */ emit(__slli(rd, rs1, shift_amount)); emit(__srai(rd, rd, shift_amount)); } else { /* Fallback for other sizes */ emit(__andi(rd, rs1, 0xFF)); emit(__slli(rd, rd, shift_amount)); emit(__srai(rd, rd, shift_amount)); } return; } case OP_cast: /* Generic cast operation - for now, just move the value */ emit(__addi(rd, rs1, 0)); return; default: fatal("Unknown opcode"); } } void code_generate(void) { /* start: save original sp in s0; allocate global stack; run init */ emit(__addi(__s0, __sp, 0)); emit(__lui(__t0, rv_hi(GLOBAL_FUNC->stack_size))); emit(__addi(__t0, __t0, rv_lo(GLOBAL_FUNC->stack_size))); emit(__sub(__sp, __sp, __t0)); emit(__addi(__gp, __sp, 0)); /* Set up global pointer */ emit(__jal(__ra, GLOBAL_FUNC->bbs->elf_offset - elf_code->size)); /* syscall trampoline for __syscall - must be at offset 24 */ emit(__addi(__a7, __a0, 0)); emit(__addi(__a0, __a1, 0)); emit(__addi(__a1, __a2, 0)); emit(__addi(__a2, __a3, 0)); emit(__addi(__a3, __a4, 0)); emit(__addi(__a4, __a5, 0)); emit(__addi(__a5, __a6, 0)); emit(__ecall()); emit(__jalr(__zero, __ra, 0)); ph2_ir_t *ph2_ir; for (ph2_ir = GLOBAL_FUNC->bbs->ph2_ir_list.head; ph2_ir; ph2_ir = ph2_ir->next) emit_ph2_ir(ph2_ir); /* prepare 'argc' and 'argv', then proceed to 'main' function */ /* use original sp saved in s0 to get argc/argv */ if (MAIN_BB) { emit(__addi(__t0, __s0, 0)); emit(__lw(__a0, __t0, 0)); emit(__addi(__a1, __t0, 4)); emit(__jal(__ra, MAIN_BB->elf_offset - elf_code->size)); /* exit with main's return value in a0 */ emit(__addi(__a7, __zero, 93)); emit(__ecall()); } for (int i = 0; i < ph2_ir_idx; i++) { ph2_ir = PH2_IR_FLATTEN[i]; emit_ph2_ir(ph2_ir); } } ================================================ FILE: src/riscv.c ================================================ /* * shecc - Self-Hosting and Educational C Compiler. * * shecc is freely redistributable under the BSD 2 clause license. See the * file "LICENSE" for information on usage and redistribution of this file. */ /* RISC-V instruction encoding */ /* opcodes */ typedef enum { /* R type */ rv_add = 51 /* 0b110011 + (0 << 12) */, rv_sub = 1073741875 /* 0b110011 + (0 << 12) + (0x20 << 25) */, rv_xor = 16435 /* 0b110011 + (4 << 12) */, rv_or = 24627 /* 0b110011 + (6 << 12) */, rv_and = 28723 /* 0b110011 + (7 << 12) */, rv_sll = 4147 /* 0b110011 + (1 << 12) */, rv_srl = 20531 /* 0b110011 + (5 << 12) */, rv_sra = 1073762355 /* 0b110011 + (5 << 12) + (0x20 << 25) */, rv_slt = 8243 /* 0b110011 + (2 << 12) */, rv_sltu = 12339 /* 0b110011 + (3 << 12) */, /* I type */ rv_addi = 19 /* 0b0010011 */, rv_xori = 16403 /* 0b0010011 + (4 << 12) */, rv_ori = 24595 /* 0b0010011 + (6 << 12) */, rv_andi = 28691 /* 0b0010011 + (7 << 12) */, rv_slli = 4115 /* 0b0010011 + (1 << 12) */, rv_srli = 20499 /* 0b0010011 + (5 << 12) */, rv_srai = 1073762323 /* 0b0010011 + (5 << 12) + (0x20 << 25) */, rv_slti = 8211 /* 0b0010011 + (2 << 12) */, rv_sltiu = 12307 /* 0b0010011 + (3 << 12) */, rv_sext_b = 1614811155 /* 0b0010011 + (1 << 12) + (0x604 << 20) (imm included)*/, /* load/store */ rv_lb = 3 /* 0b11 */, rv_lh = 4099 /* 0b11 + (1 << 12) */, rv_lw = 8195 /* 0b11 + (2 << 12) */, rv_lbu = 16387 /* 0b11 + (4 << 12) */, rv_lhu = 20483 /* 0b11 + (5 << 12) */, rv_sb = 35 /* 0b0100011 */, rv_sh = 4131 /* 0b0100011 + (1 << 12) */, rv_sw = 8227 /* 0b0100011 + (2 << 12) */, /* branch */ rv_beq = 99 /* 0b1100011 */, rv_bne = 4195 /* 0b1100011 + (1 << 12) */, rv_blt = 16483 /* 0b1100011 + (4 << 12) */, rv_bge = 20579 /* 0b1100011 + (5 << 12) */, rv_bltu = 24675 /* 0b1100011 + (6 << 12) */, rv_bgeu = 28771 /* 0b1100011 + (7 << 12) */, /* jumps */ rv_jal = 111 /* 0b1101111 */, rv_jalr = 103 /* 0b1100111 */, /* misc */ rv_lui = 55 /* 0b0110111 */, rv_auipc = 23 /* 0b0010111 */, rv_ecall = 115 /* 0b1110011 */, rv_ebreak = 1048691 /* 0b1110011 + (1 << 20) */, /* m */ rv_mul = 33554483 /* 0b0110011 + (1 << 25) */, rv_div = 33570867 /* 0b0110011 + (1 << 25) + (4 << 12) */, rv_mod = 33579059 /* 0b0110011 + (1 << 25) + (6 << 12) */ } rv_op; /* registers */ typedef enum { __zero = 0, __ra = 1, __sp = 2, __gp = 3, __tp = 4, __t0 = 5, __t1 = 6, __t2 = 7, __s0 = 8, __s1 = 9, __a0 = 10, __a1 = 11, __a2 = 12, __a3 = 13, __a4 = 14, __a5 = 15, __a6 = 16, __a7 = 17, __s2 = 18, __s3 = 19, __s4 = 20, __s5 = 21, __s6 = 22, __s7 = 23, __s8 = 24, __s9 = 25, __s10 = 26, __s11 = 27, __t3 = 28, __t4 = 29, __t5 = 30, __t6 = 31 } rv_reg; int rv_extract_bits(int imm, int i_start, int i_end, int d_start, int d_end) { int v; if (d_end - d_start != i_end - i_start || i_start > i_end || d_start > d_end) fatal("Invalid bit copy"); v = imm >> i_start; v &= ((2 << (i_end - i_start)) - 1); v <<= d_start; return v; } int rv_hi(int val) { return val + ((val & (1 << 11)) << 1); } int rv_lo(int val) { return (val & 0xFFF) - ((val & (1 << 11)) << 1); } int rv_encode_R(rv_op op, rv_reg rd, rv_reg rs1, rv_reg rs2) { return op + (rd << 7) + (rs1 << 15) + (rs2 << 20); } int rv_encode_I(rv_op op, rv_reg rd, rv_reg rs1, int imm) { if (imm > 2047 || imm < -2048) fatal("Offset too large"); if (imm < 0) { imm += 4096; imm &= (1 << 13) - 1; } return op + (rd << 7) + (rs1 << 15) + (imm << 20); } int rv_encode_S(rv_op op, rv_reg rs1, rv_reg rs2, int imm) { if (imm > 2047 || imm < -2048) fatal("Offset too large"); if (imm < 0) { imm += 4096; imm &= (1 << 13) - 1; } return op + (rs1 << 15) + (rs2 << 20) + rv_extract_bits(imm, 0, 4, 7, 11) + rv_extract_bits(imm, 5, 11, 25, 31); } int rv_encode_B(rv_op op, rv_reg rs1, rv_reg rs2, int imm) { bool sign = false; /* 13 signed bits, with bit zero ignored */ if (imm > 4095 || imm < -4096) fatal("Offset too large"); if (imm < 0) sign = true; return op + (rs1 << 15) + (rs2 << 20) + rv_extract_bits(imm, 11, 11, 7, 7) + rv_extract_bits(imm, 1, 4, 8, 11) + rv_extract_bits(imm, 5, 10, 25, 30) + (sign << 31); } int rv_encode_J(rv_op op, rv_reg rd, int imm) { bool sign = false; if (imm < 0) { sign = true; imm = -imm; imm = (1 << 21) - imm; } return op + (rd << 7) + rv_extract_bits(imm, 1, 10, 21, 30) + rv_extract_bits(imm, 11, 11, 20, 20) + rv_extract_bits(imm, 12, 19, 12, 19) + (sign << 31); } int rv_encode_U(rv_op op, rv_reg rd, int imm) { return op + (rd << 7) + rv_extract_bits(imm, 12, 31, 12, 31); } int __add(rv_reg rd, rv_reg rs1, rv_reg rs2) { return rv_encode_R(rv_add, rd, rs1, rs2); } int __sub(rv_reg rd, rv_reg rs1, rv_reg rs2) { return rv_encode_R(rv_sub, rd, rs1, rs2); } int __xor(rv_reg rd, rv_reg rs1, rv_reg rs2) { return rv_encode_R(rv_xor, rd, rs1, rs2); } int __or(rv_reg rd, rv_reg rs1, rv_reg rs2) { return rv_encode_R(rv_or, rd, rs1, rs2); } int __and(rv_reg rd, rv_reg rs1, rv_reg rs2) { return rv_encode_R(rv_and, rd, rs1, rs2); } int __sll(rv_reg rd, rv_reg rs1, rv_reg rs2) { return rv_encode_R(rv_sll, rd, rs1, rs2); } int __srl(rv_reg rd, rv_reg rs1, rv_reg rs2) { return rv_encode_R(rv_srl, rd, rs1, rs2); } int __sra(rv_reg rd, rv_reg rs1, rv_reg rs2) { return rv_encode_R(rv_sra, rd, rs1, rs2); } int __slt(rv_reg rd, rv_reg rs1, rv_reg rs2) { return rv_encode_R(rv_slt, rd, rs1, rs2); } int __sltu(rv_reg rd, rv_reg rs1, rv_reg rs2) { return rv_encode_R(rv_sltu, rd, rs1, rs2); } int __addi(rv_reg rd, rv_reg rs1, int imm) { return rv_encode_I(rv_addi, rd, rs1, imm); } int __xori(rv_reg rd, rv_reg rs1, int imm) { return rv_encode_I(rv_xori, rd, rs1, imm); } int __ori(rv_reg rd, rv_reg rs1, int imm) { return rv_encode_I(rv_ori, rd, rs1, imm); } int __andi(rv_reg rd, rv_reg rs1, int imm) { return rv_encode_I(rv_andi, rd, rs1, imm); } int __slli(rv_reg rd, rv_reg rs1, int imm) { return rv_encode_I(rv_slli, rd, rs1, imm); } int __srli(rv_reg rd, rv_reg rs1, int imm) { return rv_encode_I(rv_srli, rd, rs1, imm); } int __srai(rv_reg rd, rv_reg rs1, int imm) { return rv_encode_I(rv_srai, rd, rs1, imm); } int __slti(rv_reg rd, rv_reg rs1, int imm) { return rv_encode_I(rv_slti, rd, rs1, imm); } int __sltiu(rv_reg rd, rv_reg rs1, int imm) { return rv_encode_I(rv_sltiu, rd, rs1, imm); } int __lb(rv_reg rd, rv_reg rs1, int imm) { return rv_encode_I(rv_lb, rd, rs1, imm); } int __lh(rv_reg rd, rv_reg rs1, int imm) { return rv_encode_I(rv_lh, rd, rs1, imm); } int __lw(rv_reg rd, rv_reg rs1, int imm) { return rv_encode_I(rv_lw, rd, rs1, imm); } int __lbu(rv_reg rd, rv_reg rs1, int imm) { return rv_encode_I(rv_lbu, rd, rs1, imm); } int __lhu(rv_reg rd, rv_reg rs1, int imm) { return rv_encode_I(rv_lhu, rd, rs1, imm); } int __sb(rv_reg rd, rv_reg rs1, int imm) { return rv_encode_S(rv_sb, rs1, rd, imm); } int __sh(rv_reg rd, rv_reg rs1, int imm) { return rv_encode_S(rv_sh, rs1, rd, imm); } int __sw(rv_reg rd, rv_reg rs1, int imm) { return rv_encode_S(rv_sw, rs1, rd, imm); } int __beq(rv_reg rs1, rv_reg rs2, int imm) { return rv_encode_B(rv_beq, rs1, rs2, imm); } int __bne(rv_reg rs1, rv_reg rs2, int imm) { return rv_encode_B(rv_bne, rs1, rs2, imm); } int __blt(rv_reg rs1, rv_reg rs2, int imm) { return rv_encode_B(rv_blt, rs1, rs2, imm); } int __bge(rv_reg rs1, rv_reg rs2, int imm) { return rv_encode_B(rv_bge, rs1, rs2, imm); } int __bltu(rv_reg rs1, rv_reg rs2, int imm) { return rv_encode_B(rv_bltu, rs1, rs2, imm); } int __bgeu(rv_reg rs1, rv_reg rs2, int imm) { return rv_encode_B(rv_bgeu, rs1, rs2, imm); } int __jal(rv_reg rd, int imm) { return rv_encode_J(rv_jal, rd, imm); } int __jalr(rv_reg rd, rv_reg rs1, int imm) { return rv_encode_I(rv_jalr, rd, rs1, imm); } int __lui(rv_reg rd, int imm) { return rv_encode_U(rv_lui, rd, imm); } int __auipc(rv_reg rd, int imm) { return rv_encode_U(rv_auipc, rd, imm); } int __ecall(void) { return rv_encode_I(rv_ecall, __zero, __zero, 0); } int __mul(rv_reg rd, rv_reg rs1, rv_reg rs2) { return rv_encode_R(rv_mul, rd, rs1, rs2); } int __div(rv_reg rd, rv_reg rs1, rv_reg rs2) { return rv_encode_R(rv_div, rd, rs1, rs2); } int __mod(rv_reg rd, rv_reg rs1, rv_reg rs2) { return rv_encode_R(rv_mod, rd, rs1, rs2); } int __sext_b(rv_reg rd, rv_reg rs) { return rv_encode_I(rv_sext_b, rd, rs, 0); } ================================================ FILE: src/ssa.c ================================================ /* * shecc - Self-Hosting and Educational C Compiler. * * shecc is freely redistributable under the BSD 2 clause license. See the * file "LICENSE" for information on usage and redistribution of this file. */ #include #include #include "defs.h" #include "globals.c" /* SCCP (Sparse Conditional Constant Propagation) optimization */ #include "opt-sccp.c" /* Configuration constants - replace magic numbers */ #define PHI_WORKLIST_SIZE 128 #define DCE_WORKLIST_SIZE 2048 /* Dead store elimination window size */ #define OVERWRITE_WINDOW 3 void var_list_ensure_capacity(var_list_t *list, int min_capacity) { if (list->capacity >= min_capacity) return; int new_capacity = list->capacity ? list->capacity : HOST_PTR_SIZE; while (new_capacity < min_capacity) new_capacity <<= 1; var_t **new_elements = arena_alloc(BB_ARENA, new_capacity * HOST_PTR_SIZE); if (list->elements) memcpy(new_elements, list->elements, list->size * HOST_PTR_SIZE); list->elements = new_elements; list->capacity = new_capacity; } void var_list_add_var(var_list_t *list, var_t *var) { for (int i = 0; i < list->size; i++) { if (list->elements[i] == var) return; } var_list_ensure_capacity(list, list->size + 1); list->elements[list->size++] = var; } void var_list_assign_array(var_list_t *list, var_t **data, int count) { var_list_ensure_capacity(list, count); memcpy(list->elements, data, count * HOST_PTR_SIZE); list->size = count; } /* cfront does not accept structure as an argument, pass pointer */ void bb_forward_traversal(bb_traversal_args_t *args) { args->bb->visited++; if (args->preorder_cb) args->preorder_cb(args->func, args->bb); /* 'args' is a reference, do not modify it */ bb_traversal_args_t next_args; memcpy(&next_args, args, sizeof(bb_traversal_args_t)); if (args->bb->next) { if (args->bb->next->visited < args->func->visited) { next_args.bb = args->bb->next; bb_forward_traversal(&next_args); } } if (args->bb->then_) { if (args->bb->then_->visited < args->func->visited) { next_args.bb = args->bb->then_; bb_forward_traversal(&next_args); } } if (args->bb->else_) { if (args->bb->else_->visited < args->func->visited) { next_args.bb = args->bb->else_; bb_forward_traversal(&next_args); } } if (args->postorder_cb) args->postorder_cb(args->func, args->bb); } /* cfront does not accept structure as an argument, pass pointer */ void bb_backward_traversal(bb_traversal_args_t *args) { args->bb->visited++; if (args->preorder_cb) args->preorder_cb(args->func, args->bb); for (int i = 0; i < MAX_BB_PRED; i++) { if (!args->bb->prev[i].bb) continue; if (args->bb->prev[i].bb->visited < args->func->visited) { /* 'args' is a reference, do not modify it */ bb_traversal_args_t next_args; memcpy(&next_args, args, sizeof(bb_traversal_args_t)); next_args.bb = args->bb->prev[i].bb; bb_backward_traversal(&next_args); } } if (args->postorder_cb) args->postorder_cb(args->func, args->bb); } void bb_index_rpo(func_t *func, basic_block_t *bb) { bb->rpo = func->bb_cnt++; } void bb_reverse_index(func_t *func, basic_block_t *bb) { bb->rpo = func->bb_cnt - bb->rpo; } void bb_build_rpo(func_t *func, basic_block_t *bb) { if (func->bbs == bb) return; basic_block_t *prev = func->bbs; basic_block_t *curr = prev->rpo_next; for (; curr; curr = curr->rpo_next) { if (curr->rpo < bb->rpo) { prev = curr; continue; } bb->rpo_next = curr; prev->rpo_next = bb; prev = curr; return; } prev->rpo_next = bb; } void build_rpo(void) { bb_traversal_args_t *args = arena_alloc_traversal_args(); for (func_t *func = FUNC_LIST.head; func; func = func->next) { /* Skip function declarations without bodies */ if (!func->bbs) continue; args->func = func; args->bb = func->bbs; func->visited++; args->postorder_cb = bb_index_rpo; bb_forward_traversal(args); func->visited++; args->postorder_cb = bb_reverse_index; bb_forward_traversal(args); func->visited++; args->postorder_cb = bb_build_rpo; bb_forward_traversal(args); } } basic_block_t *intersect(basic_block_t *i, basic_block_t *j) { while (i != j) { while (i->rpo > j->rpo) i = i->idom; while (j->rpo > i->rpo) j = j->idom; } return i; } /* Find the immediate dominator of each basic block to build the dominator tree. * * Once the dominator tree is built, we can perform the more advanced * optimiaztion according to the liveness analysis and the reachability * analysis, e.g. common subexpression elimination, loop optimiaztion or dead * code elimination . * * Reference: * Cooper, Keith D.; Harvey, Timothy J.; Kennedy, Ken (2001). * "A Simple, Fast Dominance Algorithm" */ void build_idom(void) { for (func_t *func = FUNC_LIST.head; func; func = func->next) { /* Skip function declarations without bodies */ if (!func->bbs) continue; bool changed; func->bbs->idom = func->bbs; do { changed = false; for (basic_block_t *bb = func->bbs->rpo_next; bb; bb = bb->rpo_next) { /* pick one predecessor */ basic_block_t *pred; for (int i = 0; i < MAX_BB_PRED; i++) { if (!bb->prev[i].bb) continue; if (!bb->prev[i].bb->idom) continue; pred = bb->prev[i].bb; break; } for (int i = 0; i < MAX_BB_PRED; i++) { if (!bb->prev[i].bb) continue; if (bb->prev[i].bb == pred) continue; if (bb->prev[i].bb->idom) pred = intersect(bb->prev[i].bb, pred); } if (bb->idom != pred) { bb->idom = pred; changed = true; } } } while (changed); } } bool dom_connect(basic_block_t *pred, basic_block_t *succ) { if (succ->dom_prev) return false; int i; for (i = 0; i < MAX_BB_DOM_SUCC; i++) { if (pred->dom_next[i] == succ) return false; if (!pred->dom_next[i]) break; } if (i > MAX_BB_DOM_SUCC - 1) fatal("Too many predecessors in dominator tree"); pred->dom_next[i++] = succ; succ->dom_prev = pred; return true; } void bb_build_dom(func_t *func, basic_block_t *bb) { basic_block_t *curr = bb; while (curr != func->bbs) { if (!dom_connect(curr->idom, curr)) break; curr = curr->idom; } } void build_dom(void) { bb_traversal_args_t *args = arena_alloc_traversal_args(); for (func_t *func = FUNC_LIST.head; func; func = func->next) { /* Skip function declarations without bodies */ if (!func->bbs) continue; args->func = func; args->bb = func->bbs; func->visited++; args->preorder_cb = bb_build_dom; bb_forward_traversal(args); } } void bb_build_df(func_t *func, basic_block_t *bb) { UNUSED(func); int cnt = 0; for (int i = 0; i < MAX_BB_PRED; i++) { if (bb->prev[i].bb) cnt++; } if (cnt <= 0) return; for (int i = 0; i < MAX_BB_PRED; i++) { if (bb->prev[i].bb) { for (basic_block_t *curr = bb->prev[i].bb; curr != bb->idom; curr = curr->idom) curr->DF[curr->df_idx++] = bb; } } } void build_df(void) { bb_traversal_args_t *args = arena_alloc_traversal_args(); for (func_t *func = FUNC_LIST.head; func; func = func->next) { /* Skip function declarations without bodies */ if (!func->bbs) continue; args->func = func; args->bb = func->bbs; func->visited++; args->postorder_cb = bb_build_df; bb_forward_traversal(args); } } basic_block_t *reverse_intersect(basic_block_t *i, basic_block_t *j) { while (i != j) { while (i->rpo_r > j->rpo_r) i = i->r_idom; while (j->rpo_r > i->rpo_r) j = j->r_idom; } return i; } void build_r_idom(void) { for (func_t *func = FUNC_LIST.head; func; func = func->next) { /* Skip function declarations without bodies */ if (!func->bbs) continue; bool changed; func->exit->r_idom = func->exit; do { changed = false; for (basic_block_t *bb = func->exit->rpo_r_next; bb; bb = bb->rpo_r_next) { /* pick one predecessor */ basic_block_t *pred; if (bb->next && bb->next->r_idom) { pred = bb->next; } else if (bb->else_ && bb->else_->r_idom) { pred = bb->else_; } else if (bb->then_ && bb->then_->r_idom) { pred = bb->then_; } if (bb->next && bb->next != pred && bb->next->r_idom) pred = reverse_intersect(bb->next, pred); if (bb->else_ && bb->else_ != pred && bb->else_->r_idom) pred = reverse_intersect(bb->else_, pred); if (bb->then_ && bb->then_ != pred && bb->then_->r_idom) pred = reverse_intersect(bb->then_, pred); if (bb->r_idom != pred) { bb->r_idom = pred; changed = true; } } } while (changed); } } bool rdom_connect(basic_block_t *pred, basic_block_t *succ) { if (succ->rdom_prev) return false; int i; for (i = 0; i < MAX_BB_RDOM_SUCC; i++) { if (pred->rdom_next[i] == succ) return false; if (!pred->rdom_next[i]) break; } if (i > MAX_BB_RDOM_SUCC - 1) fatal("Too many predecessors in reverse dominator tree"); pred->rdom_next[i++] = succ; succ->rdom_prev = pred; return true; } void bb_build_rdom(func_t *func, basic_block_t *bb) { if (!func->bbs) return; for (basic_block_t *curr = bb; curr != func->exit; curr = curr->r_idom) { if (!rdom_connect(curr->r_idom, curr)) break; } } void build_rdom(void) { bb_traversal_args_t *args = arena_alloc_traversal_args(); for (func_t *func = FUNC_LIST.head; func; func = func->next) { /* Skip function declarations without bodies */ if (!func->bbs) continue; args->func = func; args->bb = func->exit; func->visited++; args->preorder_cb = bb_build_rdom; bb_backward_traversal(args); } } void bb_build_rdf(func_t *func, basic_block_t *bb) { UNUSED(func); int cnt = 0; if (bb->next) cnt++; if (bb->then_) cnt++; if (bb->else_) cnt++; if (cnt <= 0) return; if (bb->next) { for (basic_block_t *curr = bb->next; curr != bb->r_idom; curr = curr->r_idom) curr->RDF[curr->rdf_idx++] = bb; } if (bb->else_) { for (basic_block_t *curr = bb->else_; curr != bb->r_idom; curr = curr->r_idom) curr->RDF[curr->rdf_idx++] = bb; } if (bb->then_) { for (basic_block_t *curr = bb->then_; curr != bb->r_idom; curr = curr->r_idom) curr->RDF[curr->rdf_idx++] = bb; } } void build_rdf(void) { bb_traversal_args_t *args = arena_alloc_traversal_args(); for (func_t *func = FUNC_LIST.head; func; func = func->next) { /* Skip function declarations without bodies */ if (!func->bbs) continue; args->func = func; args->bb = func->exit; func->visited++; args->postorder_cb = bb_build_rdf; bb_backward_traversal(args); } } void use_chain_add_tail(insn_t *i, var_t *var) { use_chain_t *u = arena_calloc(INSN_ARENA, 1, sizeof(use_chain_t)); u->insn = i; if (!var->users_head) var->users_head = u; else var->users_tail->next = u; u->prev = var->users_tail; var->users_tail = u; } void use_chain_delete(use_chain_t *u, var_t *var) { if (u->prev) u->prev->next = u->next; else { var->users_head = u->next; u->next->prev = NULL; } if (u->next) u->next->prev = u->prev; else { var->users_tail = u->prev; u->prev->next = NULL; } } void use_chain_build(void) { for (func_t *func = FUNC_LIST.head; func; func = func->next) { /* Skip function declarations without bodies */ if (!func->bbs) continue; for (basic_block_t *bb = func->bbs; bb; bb = bb->rpo_next) { for (insn_t *i = bb->insn_list.head; i; i = i->next) { if (i->rs1) use_chain_add_tail(i, i->rs1); if (i->rs2) use_chain_add_tail(i, i->rs2); } } } } bool var_check_killed(var_t *var, basic_block_t *bb) { for (int i = 0; i < bb->live_kill.size; i++) { if (bb->live_kill.elements[i] == var) return true; } return false; } void bb_add_killed_var(basic_block_t *bb, var_t *var) { var_list_add_var(&bb->live_kill, var); } void var_add_killed_bb(var_t *var, basic_block_t *bb) { bool found = false; ref_block_t *ref; for (ref = var->ref_block_list.head; ref; ref = ref->next) { if (ref->bb == bb) { found = true; break; } } if (found) return; ref = arena_calloc(GENERAL_ARENA, 1, sizeof(ref_block_t)); ref->bb = bb; if (!var->ref_block_list.head) var->ref_block_list.head = ref; else var->ref_block_list.tail->next = ref; var->ref_block_list.tail = ref; } void fn_add_global(func_t *func, var_t *var) { bool found = false; symbol_t *sym; for (sym = func->global_sym_list.head; sym; sym = sym->next) { if (sym->var == var) { found = true; break; } } if (found) return; sym = arena_alloc_symbol(); sym->var = var; if (!func->global_sym_list.head) { sym->index = 0; func->global_sym_list.head = sym; func->global_sym_list.tail = sym; } else { sym->index = func->global_sym_list.tail->index + 1; func->global_sym_list.tail->next = sym; func->global_sym_list.tail = sym; } } void bb_solve_globals(func_t *func, basic_block_t *bb) { UNUSED(func); for (insn_t *insn = bb->insn_list.head; insn; insn = insn->next) { if (insn->rs1) if (!var_check_killed(insn->rs1, bb)) fn_add_global(bb->belong_to, insn->rs1); if (insn->rs2) if (!var_check_killed(insn->rs2, bb)) fn_add_global(bb->belong_to, insn->rs2); if (insn->rd) { bb_add_killed_var(bb, insn->rd); var_add_killed_bb(insn->rd, bb); } } } void solve_globals(void) { bb_traversal_args_t *args = arena_alloc_traversal_args(); for (func_t *func = FUNC_LIST.head; func; func = func->next) { /* Skip function declarations without bodies */ if (!func->bbs) continue; args->func = func; args->bb = func->bbs; func->visited++; args->postorder_cb = bb_solve_globals; bb_forward_traversal(args); } } bool var_check_in_scope(var_t *var, block_t *block) { func_t *func = block->func; while (block) { for (int i = 0; i < block->locals.capacity; i++) { if (var == block->locals.elements[i]) return true; } block = block->parent; } for (int i = 0; i < func->num_params; i++) { if (&func->param_defs[i] == var) return true; } return false; } bool insert_phi_insn(basic_block_t *bb, var_t *var) { bool found = false; for (insn_t *insn = bb->insn_list.head; insn; insn = insn->next) { if ((insn->opcode == OP_phi) && (insn->rd == var)) { found = true; break; } } if (found) return false; insn_t *head = bb->insn_list.head; insn_t *n = arena_calloc(INSN_ARENA, 1, sizeof(insn_t)); n->opcode = OP_phi; n->rd = var; n->rs1 = var; n->rs2 = var; if (!head) { bb->insn_list.head = n; bb->insn_list.tail = n; } else { head->prev = n; n->next = head; bb->insn_list.head = n; } return true; } void solve_phi_insertion(void) { for (func_t *func = FUNC_LIST.head; func; func = func->next) { /* Skip function declarations without bodies */ if (!func->bbs) continue; for (symbol_t *sym = func->global_sym_list.head; sym; sym = sym->next) { var_t *var = sym->var; basic_block_t *work_list[PHI_WORKLIST_SIZE]; int work_list_idx = 0; for (ref_block_t *ref = var->ref_block_list.head; ref; ref = ref->next) { if (work_list_idx >= PHI_WORKLIST_SIZE - 1) fatal("PHI worklist overflow"); work_list[work_list_idx++] = ref->bb; } for (int i = 0; i < work_list_idx; i++) { basic_block_t *bb = work_list[i]; for (int j = 0; j < bb->df_idx; j++) { basic_block_t *df = bb->DF[j]; if (!var_check_in_scope(var, df->scope)) continue; bool is_decl = false; for (symbol_t *s = df->symbol_list.head; s; s = s->next) { if (s->var == var) { is_decl = true; break; } } if (is_decl) continue; if (df == func->exit) continue; if (var->is_global) continue; if (insert_phi_insn(df, var)) { bool found = false; /* Restrict phi insertion of ternary operation, and * logical-and/or operation. * * The ternary and logical-and/or operations don't * create new scope, so prevent temporary variable from * propagating through the dominance tree. */ if (var->is_ternary_ret || var->is_logical_ret) continue; for (int l = 0; l < work_list_idx; l++) { if (work_list[l] == df) { found = true; break; } } if (!found) { if (work_list_idx >= PHI_WORKLIST_SIZE - 1) fatal("PHI worklist overflow"); work_list[work_list_idx++] = df; } } } } } } } var_t *require_var(block_t *blk); void new_name(block_t *block, var_t **var) { var_t *v = *var; if (!v->base) v->base = v; if (v->is_global) return; int i = v->base->rename.counter++; v->base->rename.stack[v->base->rename.stack_idx++] = i; var_t *vd = require_var(block); memcpy(vd, *var, sizeof(var_t)); vd->base = *var; vd->subscript = i; v->subscripts[v->subscripts_idx++] = vd; var[0] = vd; } var_t *get_stack_top_subscript_var(var_t *var) { if (var->base->rename.stack_idx < 1) return var; /* fallback: use base when no prior definition */ int sub = var->base->rename.stack[var->base->rename.stack_idx - 1]; for (int i = 0; i < var->base->subscripts_idx; i++) { if (var->base->subscripts[i]->subscript == sub) return var->base->subscripts[i]; } fatal("Failed to find subscript variable on rename stack"); return NULL; /* unreachable, but silences compiler warning */ } void rename_var(var_t **var) { var_t *v = *var; if (!v->base) v->base = v; if (v->is_global) return; var[0] = get_stack_top_subscript_var(*var); } void pop_name(var_t *var) { if (var->is_global) return; var->base->rename.stack_idx--; } void append_phi_operand(insn_t *insn, var_t *var, basic_block_t *bb_from) { phi_operand_t *op = arena_calloc(GENERAL_ARENA, 1, sizeof(phi_operand_t)); op->from = bb_from; op->var = get_stack_top_subscript_var(var); phi_operand_t *tail = insn->phi_ops; if (tail) { while (tail->next) tail = tail->next; tail->next = op; } else insn->phi_ops = op; } void bb_solve_phi_params(basic_block_t *bb) { for (insn_t *insn = bb->insn_list.head; insn; insn = insn->next) { if (insn->opcode == OP_phi) new_name(bb->scope, &insn->rd); else { if (insn->rs1) rename_var(&insn->rs1); if (insn->rs2) if (!insn->rs2->is_func) rename_var(&insn->rs2); if (insn->rd) new_name(bb->scope, &insn->rd); } } if (bb->next) { for (insn_t *insn = bb->next->insn_list.head; insn; insn = insn->next) { if (insn->opcode == OP_phi) append_phi_operand(insn, insn->rd, bb); } } if (bb->then_) { for (insn_t *insn = bb->then_->insn_list.head; insn; insn = insn->next) { if (insn->opcode == OP_phi) append_phi_operand(insn, insn->rd, bb); } } if (bb->else_) { for (insn_t *insn = bb->else_->insn_list.head; insn; insn = insn->next) { if (insn->opcode == OP_phi) append_phi_operand(insn, insn->rd, bb); } } for (int i = 0; i < MAX_BB_DOM_SUCC; i++) { if (!bb->dom_next[i]) break; bb_solve_phi_params(bb->dom_next[i]); } for (insn_t *insn = bb->insn_list.head; insn; insn = insn->next) { if (insn->opcode == OP_phi) pop_name(insn->rd); else if (insn->rd) pop_name(insn->rd); } } void solve_phi_params(void) { for (func_t *func = FUNC_LIST.head; func; func = func->next) { /* Skip function declarations without bodies */ if (!func->bbs) continue; for (int i = 0; i < func->num_params; i++) { /* FIXME: Direct argument renaming in SSA construction phase may * interfere with later optimization passes */ var_t *var = require_var(func->bbs->scope); var_t *base = &func->param_defs[i]; memcpy(var, base, sizeof(var_t)); var->base = base; var->subscript = 0; base->rename.stack[base->rename.stack_idx++] = base->rename.counter++; base->subscripts[base->subscripts_idx++] = var; } bb_solve_phi_params(func->bbs); } } void append_unwound_phi_insn(basic_block_t *bb, var_t *dest, var_t *rs) { insn_t *n = arena_calloc(INSN_ARENA, 1, sizeof(insn_t)); n->opcode = OP_unwound_phi; n->rd = dest; n->rs1 = rs; n->belong_to = bb; insn_t *tail = bb->insn_list.tail; if (!tail) { bb->insn_list.head = n; bb->insn_list.tail = n; } else { /* insert it before branch instruction */ if (tail->opcode == OP_branch) { if (tail->prev) { tail->prev->next = n; n->prev = tail->prev; } else bb->insn_list.head = n; n->next = tail; tail->prev = n; } else { tail->next = n; bb->insn_list.tail = n; } } } void bb_unwind_phi(func_t *func, basic_block_t *bb) { UNUSED(func); insn_t *insn; for (insn = bb->insn_list.head; insn; insn = insn->next) { if (insn->opcode != OP_phi) break; for (phi_operand_t *operand = insn->phi_ops; operand; operand = operand->next) append_unwound_phi_insn(operand->from, insn->rd, operand->var); } bb->insn_list.head = insn; if (!insn) bb->insn_list.tail = NULL; else insn->prev = NULL; } void unwind_phi(void) { bb_traversal_args_t *args = arena_alloc_traversal_args(); for (func_t *func = FUNC_LIST.head; func; func = func->next) { /* Skip function declarations without bodies */ if (!func->bbs) continue; args->func = func; args->bb = func->bbs; func->visited++; args->preorder_cb = bb_unwind_phi; bb_forward_traversal(args); } } bool is_dominate(basic_block_t *pred, basic_block_t *succ) { int i; bool found = false; for (i = 0; i < MAX_BB_DOM_SUCC; i++) { if (!pred->dom_next[i]) break; if (pred->dom_next[i] == succ) { found = true; break; } found |= is_dominate(pred->dom_next[i], succ); } return found; } /* * For any variable, the basic block that defines it must dominate all the * basic blocks where it is used; otherwise, it is an invalid cross-block * initialization. */ void bb_check_var_cross_init(func_t *func, basic_block_t *bb) { UNUSED(func); for (insn_t *insn = bb->insn_list.head; insn; insn = insn->next) { if (insn->opcode != OP_allocat) continue; var_t *var = insn->rd; ref_block_t *ref; for (ref = var->ref_block_list.head; ref; ref = ref->next) { if (ref->bb == bb) continue; if (!is_dominate(bb, ref->bb)) printf("Warning: Variable '%s' cross-initialized\n", var->var_name); } } } /** * A variable's initialization lives in a basic block that does not dominate * all of its uses, so control flow can reach a use without first passing * through its initialization (i.e., a possibly-uninitialized use). * * For Example: * // Jumps directly to 'label', skipping the declaration below * goto label; * if (1) { * // This line is never executed when 'goto' is taken * int x; * label: * // Uses 'x' after its declaration was bypassed * x = 5; * } */ void check_var_cross_init() { bb_traversal_args_t *args = arena_alloc_traversal_args(); for (func_t *func = FUNC_LIST.head; func; func = func->next) { /* Skip function declarations without bodies */ if (!func->bbs) continue; args->func = func; args->bb = func->bbs; func->visited++; args->postorder_cb = bb_check_var_cross_init; bb_forward_traversal(args); } } #ifdef __SHECC__ #else void bb_dump_connection(FILE *fd, basic_block_t *curr, basic_block_t *next, bb_connection_type_t type) { char *str; switch (type) { case NEXT: str = "%s_%p:s->%s_%p:n\n"; break; case THEN: str = "%s_%p:sw->%s_%p:n\n"; break; case ELSE: str = "%s_%p:se->%s_%p:n\n"; break; default: fatal("Unknown basic block connection type"); } char *pred; void *pred_id; if (curr->insn_list.tail) { pred = "insn"; pred_id = curr->insn_list.tail; } else { pred = "pseudo"; pred_id = curr; } char *succ; void *succ_id; if (next->insn_list.tail) { succ = "insn"; succ_id = next->insn_list.head; } else { succ = "pseudo"; succ_id = next; } fprintf(fd, str, pred, pred_id, succ, succ_id); } /* escape character for the tag in dot file */ char *get_insn_op(insn_t *insn) { switch (insn->opcode) { case OP_add: return "+"; case OP_sub: return "-"; case OP_mul: return "*"; case OP_div: return "/"; case OP_mod: return "%%"; case OP_lshift: return "<<"; case OP_rshift: return ">>"; case OP_eq: return "=="; case OP_neq: return "!="; case OP_gt: return ">"; case OP_lt: return "<"; case OP_geq: return ">="; case OP_leq: return "<="; case OP_bit_and: return "&"; case OP_bit_or: return "|"; case OP_bit_xor: return "^"; case OP_log_and: return "&&"; case OP_log_or: return "||"; default: fatal("Unknown opcode in operator string conversion"); return ""; /* unreachable, but silences compiler warning */ } } void bb_dump(FILE *fd, func_t *func, basic_block_t *bb) { bb->visited++; bool next_ = false, then_ = false, else_ = false; if (bb->next) next_ = true; if (bb->then_) then_ = true; if (bb->else_) else_ = true; if (then_ && !else_) printf("Warning: missing false branch\n"); if (!then_ && else_) printf("Warning: missing true branch\n"); if (next_ && (then_ || else_)) printf("Warning: normal BB with condition\n"); fprintf(fd, "subgraph cluster_%p {\n", bb); fprintf(fd, "label=\"BasicBlock %p (%s)\"\n", bb, bb->bb_label_name); insn_t *insn = bb->insn_list.head; if (!insn) fprintf(fd, "pseudo_%p [label=\"pseudo\"]\n", bb); if (!insn && (then_ || else_)) printf("Warning: pseudo node should only have NEXT\n"); for (; insn; insn = insn->next) { if (insn->opcode == OP_phi) { fprintf(fd, "insn_%p [label=", insn); fprintf(fd, "<%s%d := PHI(%s%d", insn->rd->var_name, insn->rd->subscript, insn->phi_ops->var->var_name, insn->phi_ops->var->subscript); for (phi_operand_t *op = insn->phi_ops->next; op; op = op->next) { fprintf(fd, ", %s%d", op->var->var_name, op->var->subscript); } fprintf(fd, ")>]\n"); } else { char str[256]; switch (insn->opcode) { case OP_allocat: sprintf(str, "<%s%d := ALLOC>", insn->rd->var_name, insn->rd->subscript); break; case OP_load_constant: sprintf(str, "<%s%d := CONST %d>", insn->rd->var_name, insn->rd->subscript, insn->rd->init_val); break; case OP_load_data_address: sprintf(str, "<%s%d := [.data] + %d>", insn->rd->var_name, insn->rd->subscript, insn->rd->init_val); break; case OP_load_rodata_address: sprintf(str, "<%s%d := [.rodata] + %d>", insn->rd->var_name, insn->rd->subscript, insn->rd->init_val); break; case OP_address_of: sprintf(str, "<%s%d := &%s%d>", insn->rd->var_name, insn->rd->subscript, insn->rs1->var_name, insn->rs1->subscript); break; case OP_assign: sprintf(str, "<%s%d := %s%d>", insn->rd->var_name, insn->rd->subscript, insn->rs1->var_name, insn->rs1->subscript); break; case OP_read: sprintf(str, "<%s%d := (%s%d)>", insn->rd->var_name, insn->rd->subscript, insn->rs1->var_name, insn->rs1->subscript); break; case OP_write: if (insn->rs2->is_func) sprintf(str, "<(%s%d) := %s>", insn->rs1->var_name, insn->rs1->subscript, insn->rs2->var_name); else sprintf(str, "<(%s%d) := %s%d>", insn->rs1->var_name, insn->rs1->subscript, insn->rs2->var_name, insn->rs2->subscript); break; case OP_branch: sprintf(str, "%d>", insn->rs1->var_name, insn->rs1->subscript); break; case OP_jump: sprintf(str, ""); break; case OP_label: sprintf(str, "