Repository: HigherOrderCO/HVM2 Branch: main Commit: 654276018084 Files: 84 Total size: 422.5 KB Directory structure: gitextract_cj26vbd4/ ├── .github/ │ ├── ISSUE_TEMPLATE/ │ │ ├── bug_report.yml │ │ ├── config.yml │ │ └── feature_request.md │ └── workflows/ │ ├── bench.yml │ ├── checks.yml │ └── delete-cancelled.yml ├── .gitignore ├── Cargo.toml ├── LICENSE ├── README.md ├── build.rs ├── examples/ │ ├── demo_io/ │ │ ├── main.bend │ │ └── main.hvm │ ├── sort_bitonic/ │ │ ├── main.bend │ │ └── main.hvm │ ├── sort_radix/ │ │ ├── main.bend │ │ └── main.hvm │ ├── stress/ │ │ ├── README.md │ │ ├── main.bend │ │ ├── main.hvm │ │ ├── main.js │ │ └── main.py │ ├── sum_rec/ │ │ ├── main.bend │ │ ├── main.hvm │ │ ├── main.js │ │ └── sum.js │ ├── sum_tree/ │ │ ├── main.bend │ │ └── main.hvm │ └── tuples/ │ ├── tuples.bend │ └── tuples.hvm ├── paper/ │ ├── HVM2.typst │ ├── README.md │ └── inet.typ ├── src/ │ ├── ast.rs │ ├── cmp.rs │ ├── hvm.c │ ├── hvm.cu │ ├── hvm.cuh │ ├── hvm.h │ ├── hvm.rs │ ├── lib.rs │ ├── main.rs │ ├── run.c │ └── run.cu └── tests/ ├── programs/ │ ├── empty.hvm │ ├── hello-world.hvm │ ├── io/ │ │ ├── basic.bend │ │ ├── basic.hvm │ │ ├── invalid-name.bend │ │ ├── invalid-name.hvm │ │ ├── open1.bend │ │ ├── open1.hvm │ │ ├── open2.bend │ │ ├── open2.hvm │ │ ├── open3.bend │ │ └── open3.hvm │ ├── list.hvm │ ├── numeric-casts.hvm │ ├── numerics/ │ │ ├── f24.hvm │ │ ├── i24.hvm │ │ └── u24.hvm │ └── safety-check.hvm ├── run.rs └── snapshots/ ├── run__file@empty.hvm.snap ├── run__file@hello-world.hvm.snap ├── run__file@list.hvm.snap ├── run__file@numeric-casts.hvm.snap ├── run__file@numerics__f24.hvm.snap ├── run__file@numerics__i24.hvm.snap ├── run__file@numerics__u24.hvm.snap ├── run__file@safety-check.hvm.snap ├── run__file@sort_bitonic__main.hvm.snap ├── run__file@sort_radix__main.hvm.snap ├── run__file@stress__main.hvm.snap ├── run__file@sum_rec__main.hvm.snap ├── run__file@sum_tree__main.hvm.snap ├── run__file@tuples__tuples.hvm.snap ├── run__io_file@demo_io__main.hvm.snap ├── run__io_file@io__basic.hvm.snap ├── run__io_file@io__invalid-name.hvm.snap ├── run__io_file@io__open1.hvm.snap ├── run__io_file@io__open2.hvm.snap ├── run__io_file@io__open3.hvm.snap └── run__io_file@io__read_and_print.hvm.snap ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/ISSUE_TEMPLATE/bug_report.yml ================================================ name: Bug report description: Create a report to help us improve. body: - type: markdown attributes: value: | ### Bug Report Note, your issue might have been already reported, please check [issues](https://github.com/HigherOrderCO/HVM/issues). If you find a similar issue, respond with a reaction or any additional information that you feel may be helpful. ### For Windows Users There is currently no native way to install HVM, as a temporary workaround, please use [WSL2](https://learn.microsoft.com/en-us/windows/wsl/install). - type: textarea attributes: label: Reproducing the behavior description: A clear and concise description of what the bug is. value: | Example: Running command... With code.... Error... Expected behavior.... validations: required: true - type: textarea attributes: label: System Settings description: Your System's settings value: | Example: - OS: [e.g. Linux (Ubuntu 22.04)] - CPU: [e.g. Intel i9-14900KF] - GPU: [e.g. RTX 4090] - Cuda Version [e.g. release 12.4, V12.4.131] validations: required: true - type: textarea attributes: label: Additional context description: Add any other context about the problem here (Optional). ================================================ FILE: .github/ISSUE_TEMPLATE/config.yml ================================================ blank_issues_enabled: false contact_links: - name: Bend Related Issues url: https://github.com/HigherOrderCO/Bend/issues/new/choose about: For Bend related Issues, please Report them on the Bend repository. ================================================ FILE: .github/ISSUE_TEMPLATE/feature_request.md ================================================ --- name: Feature request about: Suggest a feature that you think should be added. title: '' labels: '' --- **Is your feature request related to a problem? Please describe.** A clear and concise description of what the problem is. Ex. I'm frustrated when [...] **Describe the solution you'd like** A clear and concise description of what you want to happen. **Describe alternatives you've considered** A clear and concise description of any alternative solutions or features you've considered. **Additional context** Add any other context or screenshots about the feature request here. ================================================ FILE: .github/workflows/bench.yml ================================================ name: Bench on: pull_request: concurrency: group: bench-${{ github.ref }} cancel-in-progress: true jobs: bench: runs-on: [self-hosted, cuda] timeout-minutes: 10 steps: - uses: actions/checkout@v3 - name: compare perf run: | git fetch origin main git clone https://github.com/HigherOrderCO/hvm-bench cd hvm-bench NO_COLOR=1 cargo run bench --repo-dir ../ -r main --timeout 20 > ../table shell: bash -l {0} - name: write comment run: | echo 'Perf run for [`'`git rev-parse --short ${{ github.sha }}`'`](https://github.com/higherorderco/HVM/commit/${{ github.sha }}):' >> comment echo '```' >> comment cat table >> comment echo '```' >> comment - name: post comment run: gh pr comment ${{ github.event.number }} -F comment env: GH_TOKEN: ${{ secrets.PAT }} - name: hide old comment env: GH_TOKEN: ${{ secrets.PAT }} run: | COMMENT_ID=$( gh api graphql -F pr=${{ github.event.number }} -f query=' query($pr: Int!) { organization(login: "higherorderco") { repository(name: "HVM") { pullRequest(number: $pr) { comments(last: 100) { nodes { id author { login } } } } } } } ' \ | jq -r ' [ .data.organization.repository.pullRequest.comments.nodes | .[] | select(.author.login == "HigherOrderBot") | .id ] | .[-2] ' ) if [ $COMMENT_ID != null ] then gh api graphql -F id=$COMMENT_ID -f query=' mutation($id: ID!) { minimizeComment(input: { subjectId: $id, classifier: OUTDATED, }) { minimizedComment { ...on Comment { id } } } } ' fi - name: delete on cancel if: ${{ cancelled() }} run: gh workflow run delete-cancelled.yml -f run_id=${{ github.run_id }} env: GH_TOKEN: ${{ secrets.PAT }} ================================================ FILE: .github/workflows/checks.yml ================================================ name: Checks on: pull_request: merge_group: push: branches: - main jobs: check: runs-on: ubuntu-latest timeout-minutes: 10 steps: - uses: actions/checkout@v3 - uses: actions/cache@v2 with: path: | ~/.cargo/registry ~/.cargo/git target key: ${{ runner.os }}-check-${{ hashFiles('**/Cargo.lock') }} - run: RUSTFLAGS="-D warnings" cargo check --all-targets test: runs-on: ubuntu-latest timeout-minutes: 10 steps: - uses: actions/checkout@v3 - uses: actions/cache@v2 with: path: | ~/.cargo/registry ~/.cargo/git target key: ${{ runner.os }}-test-${{ hashFiles('**/Cargo.lock') }} - run: cargo test --release test-cuda: needs: test # don't bother the cuda machine if other tests are failing runs-on: [self-hosted, cuda] timeout-minutes: 10 steps: - uses: actions/checkout@v3 - run: cargo test --release shell: bash -l {0} ================================================ FILE: .github/workflows/delete-cancelled.yml ================================================ name: Delete Cancelled Benchmarks on: workflow_dispatch: inputs: run_id: type: string description: "" jobs: delete: runs-on: ubuntu-latest steps: - run: gh api "repos/higherorderco/hvm-core/actions/runs/${{ inputs.run_id }}" -X DELETE env: GH_TOKEN: ${{ secrets.PAT }} ================================================ FILE: .gitignore ================================================ .fill.tmp hvm-cuda-experiments/ src/hvm src/old_cmp.rs src/tmp/ target/ tmp/ .hvm/ examples/**/main examples/**/*.c examples/**/*.cu .out.hvm # nix-direnv /.direnv/ /.envrc ================================================ FILE: Cargo.toml ================================================ [package] name = "hvm" description = "A massively parallel, optimal functional runtime in Rust." license = "Apache-2.0" version = "2.0.22" edition = "2021" rust-version = "1.74" build = "build.rs" repository = "https://github.com/HigherOrderCO/HVM" [lib] name = "hvm" path = "src/lib.rs" [dependencies] TSPL = "0.0.13" clap = "4.5.2" highlight_error = "0.1.1" num_cpus = "1.0" [build-dependencies] cc = "1.0" num_cpus = "1.0" [features] default = [] # C and CUDA features are determined during build c = [] cuda = [] [dev-dependencies] insta = { version = "1.39.0", features = ["glob"] } ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS ================================================ FILE: README.md ================================================ Higher-order Virtual Machine 2 (HVM2) ===================================== **Higher-order Virtual Machine 2 (HVM2)** is a massively parallel [Interaction Combinator](https://www.semanticscholar.org/paper/Interaction-Combinators-Lafont/6cfe09aa6e5da6ce98077b7a048cb1badd78cc76) evaluator. By compiling programs from high-level languages (such as Python and Haskell) to HVM, one can run these languages directly on massively parallel hardware, like GPUs, with near-ideal speedup. HVM2 is the successor to [HVM1](https://github.com/HigherOrderCO/HVM1), a 2022 prototype of this concept. Compared to its predecessor, HVM2 is simpler, faster and, most importantly, more correct. [HOC](https://HigherOrderCO.com/) provides long-term support for all features listed on its [PAPER](./paper/HVM2.pdf). This repository provides a low-level IR language for specifying the HVM2 nets and a compiler from that language to C and CUDA. It is not meant for direct human usage. If you're looking for a high-level language to interface with HVM2, check [Bend](https://github.com/HigherOrderCO/Bend) instead. Usage ----- > DISCLAIMER: Windows is currently not supported, please use [WSL](https://learn.microsoft.com/en-us/windows/wsl/install) for now as a workaround. First install the dependencies: * If you want to use the C runtime, install a C-11 compatible compiler like GCC or Clang. * If you want to use the CUDA runtime, install CUDA and nvcc (the CUDA compiler). - _HVM requires CUDA 12.x and currently only works on Nvidia GPUs._ Install HVM2: ```sh cargo install hvm ``` There are multiple ways to run an HVM program: ```sh hvm run # interpret via Rust hvm run-c # interpret via C hvm run-cu # interpret via CUDA hvm gen-c # compile to standalone C hvm gen-cu # compile to standalone CUDA ``` All modes produce the same output. The compiled modes require you to compile the generated file (with `gcc file.c -o file`, for example), but are faster to run. The CUDA versions have much higher peak performance but are less stable. As a rule of thumb, `gen-c` should be used in production. Language -------- HVM is a low-level compile target for high-level languages. It provides a raw syntax for wiring interaction nets. For example: ```javascript @main = a & @sum ~ (28 (0 a)) @sum = (?(((a a) @sum__C0) b) b) @sum__C0 = ({c a} ({$([*2] $([+1] d)) $([*2] $([+0] b))} f)) &! @sum ~ (a (b $([+] $(e f)))) &! @sum ~ (c (d e)) ``` The file above implements a recursive sum. If that looks unreadable to you - don't worry, it isn't meant to. [Bend](https://github.com/HigherOrderCO/Bend) is the human-readable language and should be used both by end users and by languages aiming to target the HVM. If you're looking to learn more about the core syntax and tech, though, please check the [PAPER](./paper/HVM2.pdf). ================================================ FILE: build.rs ================================================ fn main() { let cores = num_cpus::get(); let tpcl2 = (cores as f64).log2().floor() as u32; println!("cargo:rerun-if-changed=src/run.c"); println!("cargo:rerun-if-changed=src/hvm.c"); println!("cargo:rerun-if-changed=src/run.cu"); println!("cargo:rerun-if-changed=src/hvm.cu"); println!("cargo:rustc-link-arg=-rdynamic"); match cc::Build::new() .file("src/run.c") .opt_level(3) .warnings(false) .define("TPC_L2", &*tpcl2.to_string()) .define("IO", None) .try_compile("hvm-c") { Ok(_) => println!("cargo:rustc-cfg=feature=\"c\""), Err(e) => { println!("cargo:warning=\x1b[1m\x1b[31mWARNING: Failed to compile/run.c:\x1b[0m {}", e); println!("cargo:warning=Ignoring/run.c and proceeding with build. \x1b[1mThe C runtime will not be available.\x1b[0m"); } } // Builds hvm.cu if std::process::Command::new("nvcc").arg("--version").stdout(std::process::Stdio::null()).stderr(std::process::Stdio::null()).status().is_ok() { if let Ok(cuda_path) = std::env::var("CUDA_HOME") { println!("cargo:rustc-link-search=native={}/lib64", cuda_path); } else { println!("cargo:rustc-link-search=native=/usr/local/cuda/lib64"); } cc::Build::new() .cuda(true) .file("src/run.cu") .define("IO", None) .flag("-diag-suppress=177") // variable was declared but never referenced .flag("-diag-suppress=550") // variable was set but never used .flag("-diag-suppress=20039") // a __host__ function redeclared with __device__, hence treated as a __host__ __device__ function .compile("hvm-cu"); println!("cargo:rustc-cfg=feature=\"cuda\""); } else { println!("cargo:warning=\x1b[1m\x1b[31mWARNING: CUDA compiler not found.\x1b[0m \x1b[1mHVM will not be able to run on GPU.\x1b[0m"); } } ================================================ FILE: examples/demo_io/main.bend ================================================ test-io = 1 def unwrap(res): match res: case Result/Ok: return res.val case Result/Err: return res.val def open(): return call("OPEN", ("./LICENSE", "r")) def read(f): return call("READ", (f, 47)) def print(bytes): with IO: * <- call("WRITE", (1, bytes)) * <- call("WRITE", (1, "\n")) return wrap(*) def close(f): return call("CLOSE", f) def main(): with IO: f <- open() f = unwrap(f) bytes <- read(f) bytes = unwrap(bytes) * <- print(bytes) res <- close(f) return wrap(res) ================================================ FILE: examples/demo_io/main.hvm ================================================ @IO/Call = (a (b (c (d ((@IO/Call/tag (a (b (c (d e))))) e))))) @IO/Call/tag = 1 @IO/Done = (a (b ((@IO/Done/tag (a (b c))) c))) @IO/Done/tag = 0 @IO/MAGIC = (13683217 16719857) @IO/bind = ((@IO/bind__C2 a) a) @IO/bind__C0 = (* (b (a c))) & @undefer ~ (a (b c)) @IO/bind__C1 = (* (* (a (b ((c d) (e g)))))) & @IO/Call ~ (@IO/MAGIC (a (b ((c f) g)))) & @IO/bind ~ (d (e f)) @IO/bind__C2 = (?((@IO/bind__C0 @IO/bind__C1) a) a) @IO/wrap = a & @IO/Done ~ (@IO/MAGIC a) @String/Cons = (a (b ((@String/Cons/tag (a (b c))) c))) @String/Cons/tag = 1 @String/Nil = ((@String/Nil/tag a) a) @String/Nil/tag = 0 @call = (a (b c)) & @IO/Call ~ (@IO/MAGIC (a (b (@call__C0 c)))) @call__C0 = a & @IO/Done ~ (@IO/MAGIC a) @close = f & @call ~ (e f) & @String/Cons ~ (67 (d e)) & @String/Cons ~ (76 (c d)) & @String/Cons ~ (79 (b c)) & @String/Cons ~ (83 (a b)) & @String/Cons ~ (69 (@String/Nil a)) @main = w & @IO/bind ~ (@open ((((s (a u)) (@IO/wrap v)) v) w)) & @IO/bind ~ (c ((((n (o (d q))) (r (s t))) t) u)) & @unwrap ~ (a {b r}) & @read ~ (b c) & @IO/bind ~ (f ((((g (k (* m))) (n (o p))) p) q)) & @print ~ (e f) & @unwrap ~ (d e) & @IO/bind ~ (h ((((i i) (k l)) l) m)) & @close ~ (g h) @open = o & @call ~ (d ((m n) o)) & @String/Cons ~ (79 (c d)) & @String/Cons ~ (80 (b c)) & @String/Cons ~ (69 (a b)) & @String/Cons ~ (78 (@String/Nil a)) & @String/Cons ~ (46 (l m)) & @String/Cons ~ (47 (k l)) & @String/Cons ~ (76 (j k)) & @String/Cons ~ (73 (i j)) & @String/Cons ~ (67 (h i)) & @String/Cons ~ (69 (g h)) & @String/Cons ~ (78 (f g)) & @String/Cons ~ (83 (e f)) & @String/Cons ~ (69 (@String/Nil e)) & @String/Cons ~ (114 (@String/Nil n)) @print = (f h) & @IO/bind ~ (g (@print__C3 h)) & @call ~ (e ((1 f) g)) & @String/Cons ~ (87 (d e)) & @String/Cons ~ (82 (c d)) & @String/Cons ~ (73 (b c)) & @String/Cons ~ (84 (a b)) & @String/Cons ~ (69 (@String/Nil a)) @print__C0 = ((* a) (* a)) @print__C1 = g & @call ~ (e ((1 f) g)) & @String/Cons ~ (87 (d e)) & @String/Cons ~ (82 (c d)) & @String/Cons ~ (73 (b c)) & @String/Cons ~ (84 (a b)) & @String/Cons ~ (69 (@String/Nil a)) & @String/Cons ~ (10 (@String/Nil f)) @print__C2 = (a (* c)) & @IO/bind ~ (@print__C1 (((@print__C0 (a b)) b) c)) @print__C3 = ((@print__C2 (@IO/wrap a)) a) @read = (e f) & @call ~ (d ((e 47) f)) & @String/Cons ~ (82 (c d)) & @String/Cons ~ (69 (b c)) & @String/Cons ~ (65 (a b)) & @String/Cons ~ (68 (@String/Nil a)) @test-io = 1 @undefer = (((a a) b) b) @unwrap = ((@unwrap__C0 a) a) @unwrap__C0 = (?(((a a) (* (b b))) c) c) ================================================ FILE: examples/sort_bitonic/main.bend ================================================ def gen(d, x): switch d: case 0: return x case _: return (gen(d-1, x * 2 + 1), gen(d-1, x * 2)) def sum(d, t): switch d: case 0: return t case _: (t.a, t.b) = t return sum(d-1, t.a) + sum(d-1, t.b) def swap(s, a, b): switch s: case 0: return (a,b) case _: return (b,a) def warp(d, s, a, b): switch d: case 0: return swap(s ^ (a > b), a, b) case _: (a.a,a.b) = a (b.a,b.b) = b (A.a,A.b) = warp(d-1, s, a.a, b.a) (B.a,B.b) = warp(d-1, s, a.b, b.b) return ((A.a,B.a),(A.b,B.b)) def flow(d, s, t): switch d: case 0: return t case _: (t.a, t.b) = t return down(d, s, warp(d-1, s, t.a, t.b)) def down(d,s,t): switch d: case 0: return t case _: (t.a, t.b) = t return (flow(d-1, s, t.a), flow(d-1, s, t.b)) def sort(d, s, t): switch d: case 0: return t case _: (t.a, t.b) = t return flow(d, s, (sort(d-1, 0, t.a), sort(d-1, 1, t.b))) def main: return sum(12, sort(12, 0, gen(12, 0))) ================================================ FILE: examples/sort_bitonic/main.hvm ================================================ @down = (?(((a (* a)) @down__C0) (b (c d))) (c (b d))) @down__C0 = ({a e} ((c g) ({b f} (d h)))) &! @flow ~ (a (b (c d))) &! @flow ~ (e (f (g h))) @flow = (?(((a (* a)) @flow__C0) (b (c d))) (c (b d))) @flow__C0 = ({$([+1] a) c} ((e f) ({b d} h))) & @down ~ (a (b (g h))) & @warp ~ (c (d (e (f g)))) @gen = (?(((a a) @gen__C0) b) b) @gen__C0 = ({a d} ({$([*2] $([+1] b)) $([*2] e)} (c f))) &! @gen ~ (a (b c)) &! @gen ~ (d (e f)) @main = a & @sum ~ (12 (@main__C1 a)) @main__C0 = a & @gen ~ (12 (0 a)) @main__C1 = a & @sort ~ (12 (0 (@main__C0 a))) @sort = (?(((a (* a)) @sort__C0) (b (c d))) (c (b d))) @sort__C0 = ({$([+1] a) {c f}} ((d g) (b i))) & @flow ~ (a (b ((e h) i))) &! @sort ~ (c (0 (d e))) &! @sort ~ (f (1 (g h))) @sum = (?(((a a) @sum__C0) b) b) @sum__C0 = ({a c} ((b d) f)) &! @sum ~ (a (b $([+] $(e f)))) &! @sum ~ (c (d e)) @swap = (?((@swap__C0 @swap__C1) (a (b c))) (b (a c))) @swap__C0 = (b (a (a b))) @swap__C1 = (* (a (b (a b)))) @warp = (?((@warp__C0 @warp__C1) (a (b (c d)))) (c (b (a d)))) @warp__C0 = ({a e} ({$([>] $(a b)) d} ($([^] $(b c)) f))) & @swap ~ (c (d (e f))) @warp__C1 = ({a f} ((d i) ((c h) ({b g} ((e j) (k l)))))) &! @warp ~ (f (g (h (i (j l))))) &! @warp ~ (a (b (c (d (e k))))) ================================================ FILE: examples/sort_radix/main.bend ================================================ # data Arr = Empty | (Single x) | (Concat x0 x1) Empty = λempty λsingle λconcat empty Single = λx λempty λsingle λconcat (single x) Concat = λx0 λx1 λempty λsingle λconcat (concat x0 x1) # data Map = Free | Busy | (Node x0 x1) Free = λfree λbusy λnode free Busy = λfree λbusy λnode busy Node = λx0 λx1 λfree λbusy λnode (node x0 x1) # gen : u32 -> Arr gen = λn switch n { 0: λx (Single x) _: λx let x0 = (* x 2) let x1 = (+ x0 1) (Concat (gen n-1 x1) (gen n-1 x0)) } # sum : Arr -> u32 sum = λa let a_empty = 0 let a_single = λx x let a_concat = λx0 λx1 (+ (sum x0) (sum x1)) (a a_empty a_single a_concat) # sort : Arr -> Arr sort = λt (to_arr (to_map t) 0) # to_arr : Map -> u32 -> Arr to_arr = λa let a_free = λk Empty let a_busy = λk (Single k) let a_node = λx0 λx1 λk let x0 = (to_arr x0 (+ (* k 2) 0)) let x1 = (to_arr x1 (+ (* k 2) 1)) (Concat x0 x1) (a a_free a_busy a_node) # to_map : Arr -> Map to_map = λa let a_empty = Free let a_single = λx (radix 24 x 1 Busy) let a_concat = λx0 λx1 (merge (to_map x0) (to_map x1)) (a a_empty a_single a_concat) # merge : Map -> Map -> Map merge = λa let a_free = λb let b_free = Free let b_busy = Busy let b_node = λb0 λb1 (Node b0 b1) (b b_free b_busy b_node) let a_busy = λb let b_free = Busy let b_busy = Busy let b_node = λb0 λb1 0 (b b_free b_busy b_node) let a_node = λa0 λa1 λb let b_free = λa0 λa1 (Node a0 a1) let b_busy = λa0 λa1 0 let b_node = λb0 λb1 λa0 λa1 (Node (merge a0 b0) (merge a1 b1)) (b b_free b_busy b_node a0 a1) (a a_free a_busy a_node) # radix : u32 -> Map radix = λi λn λk λr switch i { 0: r _: (radix i-1 n (* k 2) (swap (& n k) r Free)) } # swap : u32 -> Map -> Map -> Map swap = λn switch n { 0: λx0 λx1 (Node x0 x1) _: λx0 λx1 (Node x1 x0) } # main : u32 main = (sum (sort (gen 16 0))) ================================================ FILE: examples/sort_radix/main.hvm ================================================ @Busy = (* (a (* a))) @Concat = (a (b (* (* ((a (b c)) c))))) @Empty = (a (* (* a))) @Free = (a (* (* a))) @Node = (a (b (* (* ((a (b c)) c))))) @Single = (a (* ((a b) (* b)))) @gen = (?((@gen__C0 @gen__C1) a) a) @gen__C0 = a & @Single ~ a @gen__C1 = ({a d} ($([*2] {e $([+1] b)}) g)) & @Concat ~ (c (f g)) &! @gen ~ (a (b c)) &! @gen ~ (d (e f)) @main = a & @sum ~ (@main__C1 a) @main__C0 = a & @gen ~ (16 (0 a)) @main__C1 = a & @sort ~ (@main__C0 a) @merge = ((@merge__C5 (@merge__C4 (@merge__C3 a))) a) @merge__C0 = (b (e (a (d g)))) & @Node ~ (c (f g)) &! @merge ~ (a (b c)) &! @merge ~ (d (e f)) @merge__C1 = a & @Node ~ a @merge__C2 = a & @Node ~ a @merge__C3 = (a (b ((@merge__C1 ((* (* 0)) (@merge__C0 (a (b c))))) c))) @merge__C4 = ((@Busy (@Busy ((* (* 0)) a))) a) @merge__C5 = ((@Free (@Busy (@merge__C2 a))) a) @radix = (?(((* (* (a a))) @radix__C0) b) b) @radix__C0 = (a ({b $([&] $(d e))} ({$([*2] c) d} (f h)))) & @radix ~ (a (b (c (g h)))) & @swap ~ (e (f (@Free g))) @sort = (a c) & @to_arr ~ (b (0 c)) & @to_map ~ (a b) @sum = ((0 ((a a) (@sum__C0 b))) b) @sum__C0 = (a (b d)) &! @sum ~ (a $([+] $(c d))) &! @sum ~ (b c) @swap = (?((@swap__C0 @swap__C1) a) a) @swap__C0 = a & @Node ~ a @swap__C1 = (* (b (a c))) & @Node ~ (a (b c)) @to_arr = (((* @Empty) (@to_arr__C1 (@to_arr__C0 a))) a) @to_arr__C0 = (a (d ({$([*2] $([+1] e)) $([*2] $([+0] b))} g))) & @Concat ~ (c (f g)) &! @to_arr ~ (a (b c)) &! @to_arr ~ (d (e f)) @to_arr__C1 = a & @Single ~ a @to_map = ((@Free (@to_map__C1 (@to_map__C0 a))) a) @to_map__C0 = (a (c e)) & @merge ~ (b (d e)) &! @to_map ~ (a b) &! @to_map ~ (c d) @to_map__C1 = (a b) & @radix ~ (24 (a (1 (@Busy b)))) ================================================ FILE: examples/stress/README.md ================================================ # stress This is the basic stress-test used to test an implementation's maximum IPS. It recursively creates a tree with a given depth, and then performs a recursive computation with a given length: ``` def sum(n): if n == 0: return 0 else: return n + sum(n - 1) def fun(n): if n == 0: return sum(LENGTH) else: return fun(n - 1) + fun(n - 1) fun(DEPTH) ``` This lets us test both the parallel and sequential performance of a runtime. For example, by testing a tree of depth 14 and breadth 2^20, for example, we have enough parallelism to use all the 32k threads of a RTX 4090, and enough sequential work (1m calls) to keep each thread busy for a long time. ================================================ FILE: examples/stress/main.bend ================================================ def loop(n): switch n: case 0: return 0 case _: return loop(n-1) def fun(n): switch n: case 0: return loop(0x10000) case _: return fun(n-1) + fun(n-1) def main: return fun(8) ================================================ FILE: examples/stress/main.hvm ================================================ @fun = (?((@fun__C0 @fun__C1) a) a) @fun__C0 = a & @loop ~ (65536 a) @fun__C1 = ({a b} d) &! @fun ~ (a $([+] $(c d))) &! @fun ~ (b c) @loop = (?((0 @loop__C0) a) a) @loop__C0 = a & @loop ~ a @main = a & @fun ~ (8 a) ================================================ FILE: examples/stress/main.js ================================================ function sum(n) { if (n === 0) { return 0; } else { return n + sum(n - 1); } } function fun(n) { if (n === 0) { return sum(4096); } else { return fun(n - 1) + fun(n - 1); } } console.log(fun(18)); ================================================ FILE: examples/stress/main.py ================================================ def sum(n): if n == 0: return 0 else: return n + sum(n - 1) def fun(n): if n == 0: return sum(16) else: return fun(n - 1) + fun(n - 1) print(fun(8)) # Demo Micro-Benchmark / Stress-Test # # Complexity: 120,264,589,303 Interactions # # CPython: 640s on Apple M3 Max (1 thread) * # HVM-CPU: 268s on Apple M3 Max (1 thread) # Node.js: 128s on Apple M3 Max (1 thread) * # HVM-CPU: 14s on Apple M3 Max (12 threads) # HVM-GPU: 2s on NVIDIA RTX 4090 (32k threads) # # * estimated due to stack overflow ================================================ FILE: examples/sum_rec/main.bend ================================================ #flavor core sum = λn λx switch n { 0: x _: let fst = (sum n-1 (+ (* x 2) 0)) let snd = (sum n-1 (+ (* x 2) 1)) (+ fst snd) } main = (sum 20 0) ================================================ FILE: examples/sum_rec/main.hvm ================================================ @main = a & @sum ~ (20 (0 a)) @sum = (?(((a a) @sum__C0) b) b) @sum__C0 = ({c a} ({$([*2] $([+1] d)) $([*2] $([+0] b))} f)) &! @sum ~ (a (b $([+] $(e f)))) &! @sum ~ (c (d e)) ================================================ FILE: examples/sum_rec/main.js ================================================ function sum(a, b) { if (a === b) { return a; } else { let mid = Math.floor((a + b) / 2); let fst = sum(a, mid + 0); let snd = sum(mid + 1, b); return fst + snd; } } console.log(sum(0, 10000000)); ================================================ FILE: examples/sum_rec/sum.js ================================================ var sum = 0; for (var i = 0; i < 2**30; ++i) { sum += i; } console.log(sum); ================================================ FILE: examples/sum_tree/main.bend ================================================ gen = λd switch d { 0: λx x _: λx ((gen d-1 (+ (* x 2) 1)), (gen d-1 (* x 2))) } sum = λd λt switch d { 0: 1 _: let (t.a,t.b) = t (+ (sum d-1 t.a) (sum d-1 t.b)) } main = (sum 20 (gen 20 0)) ================================================ FILE: examples/sum_tree/main.hvm ================================================ @gen = (?(((a a) @gen__C0) b) b) @gen__C0 = ({a d} ({$([*2] $([+1] b)) $([*2] e)} (c f))) &! @gen ~ (a (b c)) &! @gen ~ (d (e f)) @main = a & @sum ~ (20 (@main__C0 a)) @main__C0 = a & @gen ~ (20 (0 a)) @sum = (?(((* 1) @sum__C0) a) a) @sum__C0 = ({a c} ((b d) f)) &! @sum ~ (a (b $([+] $(e f)))) &! @sum ~ (c (d e)) ================================================ FILE: examples/tuples/tuples.bend ================================================ type Tup8: New { a, b, c, d, e, f, g, h } rot = λx match x { Tup8/New: (Tup8/New x.b x.c x.d x.e x.f x.g x.h x.a) } app = λn switch n { 0: λf λx x _: λf λx (app n-1 f (f x)) } main = (app 1234 rot (Tup8/New 1 2 3 4 5 6 7 8)) ================================================ FILE: examples/tuples/tuples.hvm ================================================ @Tup8/New = (a (b (c (d (e (f (g (h ((0 (a (b (c (d (e (f (g (h i))))))))) i))))))))) @app = (?(((* (a a)) @app__C0) b) b) @app__C0 = (a ({b (c d)} (c e))) & @app ~ (a (b (d e))) @main = b & @app ~ (1234 (@rot (a b))) & @Tup8/New ~ (1 (2 (3 (4 (5 (6 (7 (8 a)))))))) @rot = ((@rot__C1 a) a) @rot__C0 = (h (a (b (c (d (e (f (g i)))))))) & @Tup8/New ~ (a (b (c (d (e (f (g (h i)))))))) @rot__C1 = (?((@rot__C0 *) a) a) ================================================ FILE: paper/HVM2.typst ================================================ #import "@preview/unequivocal-ams:0.1.0": ams-article, theorem, proof #import "@preview/cetz:0.2.2": canvas, draw #import "inet.typ" #show link: underline #set cite(form: "normal", style: "iso-690-author-date") #show: ams-article.with( title: [HVM2: A Parallel Evaluator for Interaction Combinators], authors: ((name: "Victor Taelin", company: "Higher Order Company", email: "taelin@HigherOrderCO.com"),), abstract:[ We present HVM2, an efficient, massively parallel evaluator for extended interaction combinators. When compiling non-sequential programs from a high-level programming language to C and CUDA, we achieved a near-ideal parallel speedup as a function of cores available (within a single device), scaling from 400 million interactions per second (MIPS) (Apple M3 Max; single thread), to 5,200 MIPS (Apple M3 Max; 16 threads), to 74,000 MIPS (NVIDIA RTX 4090; 32,768 threads). In this paper we describe HVM2's architecture, present snippets of the reference implementation in Rust, share early benchmarks and experimental results, and discuss current limitations and future plans. ], bibliography: bibliography("refs.bib", style: "annual-reviews"), ) *This paper is a work in progress. See the #link("https://github.com/HigherOrderCO/HVM")[HVM repo] for the latest version.* = Introduction Interaction Nets (IN's) @Lafont_1990 and Interaction Combinators (IC's) @Lafont_1997 were introduced by Lafont as a minimal and concurrent model of computation. Lafont proved that IC's were not only Turing Complete, but that they also preserve the complexity class and degree of parallelism. Moreover, Lafont argued that while Turing Machines are a universal model of sequential computation, IC's are a universal model of #emph[distributed] computation. The locality and strong confluence of Lafont's ICs make it suitable for massive parallel computation. This heavily implied that IC's are an optimal model of computation, in a very fundamental sense. Yet, it remained to be seen if this system could be implemented efficiently in practice. In this paper, we answer this question positively. By storing Interaction Combinator nodes in a memory-efficient format, we're able to implement its core operations (annihilation, commutation, and erasure) as lightweight C procedures and CUDA kernels. Furthermore, by representing wires as atomic variables, we're able to perform interactions atomically, in a lock-free fashion and with minimal synchronization. We also extend our system with global definitions (for fast function applications) and native numbers (for fast numeric operations). The result, HVM2, is an efficient, massively parallel evaluator for ICs that achieves near-ideal speedup, up to at least 16,384 concurrent cores, peaking at 74 billion interactions per second on an NVIDIA RTX 4090. This level of performance makes it compelling to propose HVM2 as a general framework for parallel computing. By translating constructs such as functions, algebraic data types, pattern matching, and recursion to HVM2, we see it as a potential compilation target for modern programming languages such as Python and Haskell. As a demonstration of this possibility, we also introduce #link("https://github.com/HigherOrderCO/bend")[Bend], a high-level programming language that compiles to HVM2. We explain how some of these translations work, and set up a general framework to translate arbitrary languages, procedural or functional, to HVM2. #pagebreak() = Similar Works *Work In Progress* #pagebreak() = Syntax HVM2's syntax consists of an Interaction Calculus system which textually represents an Interaction Combinator system @Calculus_1999. This textual system is capable of representing any arbitrary Interaction Net, and it is therefore possible to represent "vicious circles" @Lafont_1997. We only consider HVM2 programs which do not contain any vicious circles. HVM2's syntax has seven different types of _agents_ (in Lafont's terms) or _Nodes_. Additionally, HVM2 also has _Variables_ to represent wires which connect ports across Nodes. _Trees_ are either Variables or Nodes. They are represented syntactically as: #align(center)[ ``` ::= | "*" -- (ERA)ser | "@" -- (REF)erence | -- (NUM)eric | "(" ")" -- (CON)structor | "{" "}" -- (DUP)licator | "$(" ")" -- (OPE)rator | "?(" ")" -- (SWI)tch ::= | -- (VAR)iable | ::= [a-zA-Z0-9_.-/]+ ``` \ _(For details on the `` syntax, see Numbers (@numbers))_ ] \ \ Notice that Nodes form a tree-like structure, and throughout this document we will make systematic confusion between Nodes and Trees. For example, in Interactions (@interactions), it is critical to know when a Variable is permissible or not when referring to arbitrary Nodes. In Memory Layout (@memory), however, we purposefully blur the line between Nodes and Variables as the memory layout is greatly simplified by doing so. The first three node types (`ERA`, `REF`, `NUM`) are nullary, and the last 4 types (`CON`, `DUP`, `OPE`, `SWI`) are binary. As implied above, `VAR` can be seen as an additional node type. As in Lafont's Interaction Nets, every node has an extra distinguished edge, called the main or principal port @Lafont_1997. Thus, nullary nodes have one port (one main and zero auxiliary), while binary nodes have three ports (one main and two auxiliary). With the syntax above, the main port of the root node is "free", as it is not wired to another port. We can connect two main ports and form a reducible expression (redex), using the following syntax: #align(center)[ ``` ::= "~" ``` ] A Net consists of a root tree and a (possibly empty) list of `&`-separated redexes: #align(center)[ ``` ::= ("&" )* ``` ] #pagebreak() HVM2 Nets represent what are known as _configurations_ in the literature. A Net like #align(center)[ ``` t1 & v1 ~ w1 & .. & vn ~ wn ``` ] graphically represents a net like, #pad(1em)[ #figure( image("configuration.png", width: 50%), caption: [ A configuration #footnote[This is a modified image of a configuration with multiple free main ports @Salikhmetov_2016.] ], ) ] where $omega$ is a wiring. Thus, HVM2 Nets contain only a single free main port. Wirings are possible between trees through pairs of `VAR` nodes with the same names. Note, however, that a variable can only occur twice. This aligns with Interaction Nets in that a wire can only connect two ports. Lastly, an Book consists of a list of top-level definitions, or, "named" Nets: #align(center)[ ``` ::= ("@" "=" )* ``` ] Each `.hvm2` file contains a book, which is executed by HVM2. The entry point for HVM2 programs is the `@main` definition. == An Example The following definition: #align(center)[ ``` @succ = ({(a b) (b R)} (a R)) ``` ] Represents the HVM2 encoding of the $lambda$-calculus term `λs λz (s (s z))`, and can be drawn as the following Interaction Combinator net: /* : /_\ ...: :... : : /#\ /_\ ..: :.. a R /_\ /_\ : : a b...b R..:.: :..........: */ #figure(caption: [An example $lambda$-calculus term.], canvas({ import draw: * // inet.con(name: "a", pos: (0, 2), rot: -90deg) // inet.con(name: "b", pos: (2, 0), rot: -180deg) // inet.con(name: "c", pos: (0, -2), rot: 90deg) inet.con(name: "a", pos: (0, 0), rot: 90deg) inet.dup(name: "b", pos: (1.5, -1), rot: 90deg) inet.con(name: "c", pos: (1.5, 1), rot: 90deg) inet.con(name: "d", pos: (3, -2), rot: 90deg) inet.con(name: "e", pos: (3, 0), rot: 90deg) inet.link("a.0", "a.0") inet.link("a.1", "b.0") inet.link("a.2", "c.0") inet.link("b.1", "d.0") inet.link("b.2", "e.0") inet.link("d.2", "e.1") content((3.5, -1), [`b`]) // "c.2" -> "e.2" inet.port("R", (3.45, 1 + 1/6), -90deg) inet.port("R'", (3.45, 1 + 1/6), 90deg) inet.link("e.2", "R") inet.link("c.2", "R'") content((3, 1.5), [`R`]) // "c.1" -> "d.1" inet.port("A1", (4.1, -1), -180deg) inet.port("A2", (4.1, 0), 0deg) inet.port("A3", (3.45, 1 - 1/6), 90deg) inet.port("A3'", (3.45, 1 - 1/6), -90deg) inet.link("d.1", "A1") inet.link("A1", "A2") inet.link("A2", "A3'") inet.link("c.1", "A3") content((4.4, -0.5), [`a`]) })) Notice how `CON`/`DUP` nodes in HVM2 correspond directly to constructor and duplicator nodes in Lafont's Interaction Combinators @Lafont_1997. Aux-to-main wires are implicit through the tree-like structure of the syntax, while aux-to-aux wires are explicit through variable nodes, which are always paired. Additionally, main ports being implicit is critical to storing nodes efficiently in memory. Nodes can be represented as just two ports (the HVM2 memory model for wires) rather than three. In HVM2, since every port is 32 bits, this allows us to store a single node in a 64-bit word. This compact representation lets us use built-in atomic operations in various parts of the code, which was key to making parallel C and CUDA versions efficient. For details on the precise memory representation, see @architecture. == Interpretation of the Syntax Semantically, `CON`, `DUP`, and `ERA` nodes correspond accordingly to Lafont's #emph[constructor], #emph[duplicator], and #emph[eraser] symbols @Lafont_1997, and behave like Mazza's Symmetric Interaction Combinators @Mazza_2007. The `VAR` node represents a wiring in the graph, connecting two ports of a Net. They are linear and paired in the sense that (except for the free main port) every port is connected to exactly one other port, and therefore each variable occurs exactly twice. `REF` nodes are an extension to Lafont's IC's, and they represent an immutable net that is expanded in a single interaction. While not essential for the expressivity of the system, `REF` nodes are essential for performance, as they enable fast global functions, a degree of laziness in a strict setup (critical to making GPU implementations viable), and allow us to represent tail recursion in constant space. `NUM`, `OPE` and `SWI` nodes are also not essential expressivity-wise, but are too important for performance reasons. Modern processors are equipped with native machine integer operations. Emulating these operations with IC constructs analogous to Church or Scott Numerals would be very inefficient. Thus, these numeric nodes are necessary for HVM2 to be efficient in practice. #pagebreak() = Interactions The AST above specifies HVM2's data format. As a virtual machine, it also provides a mechanism to compute with that data. In traditional VMs, these are called *instructions*. In term rewriting systems, there are usually *reductions*. In HVM2, the mechanism for computation is called *interactions*. There are ten of them. All interactions are listed below using Gentzen-style rules (redexes in the line above reduce to ones in the line below). #v(1em) $ & "Arbitrary Nodes (including" #raw("VAR") ")" #h(2em) & #raw("A"), #raw("B"), #raw("C"), #raw("D") &:= #raw("") \ & "Binary Nodes" #footnote[In the interaction rules `()` and `{}` refer to arbitrary binary nodes, not just `CON` and `DUP`.] & #raw("()"), #raw("{}") &:= #raw("CON") | #raw("DUP") | #raw("OPA") | #raw("SWI") \ & "Nullary Nodes" & circle.filled.small, circle.stroked.small &:= #raw("ERA") | #raw("REF") | #raw("NUM") \ & "Numeric Nodes" & #raw("N"), #raw("M") &:= #raw("NUM") "(Numbers or Operations)"\ & "Numeric Value Nodes (Not Operators)" & #raw("#n"), #raw("#m") &:= #raw("NUM") "where" #raw("n"), #raw("m") in bb(Q) \ & "Erasure Nodes" & #raw("*") &:= #raw("ERA") \ & "Variables" & #raw("x"), #raw("y"), #raw("z"), #raw("w") &:= #raw("VAR") \ $ #v(1em) #v(1em) #show math.equation: set text(15pt) #grid( align: center, columns: (1fr, 1fr), gutter: 1pt, [ #smallcaps[(#strong[link])] #math.frac( [ `B` contains `x` \ `x ~ A` ], [`B[x` $arrow.l$ `A]`] ) ], [ #smallcaps[(#strong[call])] #math.frac( [ `A` is not a `VAR` node #v(1em) \ `@foo ~ A` ], `expand(@foo) ~ A`) ], ) #v(1em) #grid( align: center, columns: (1fr, 1fr), gutter: 1pt, [ #smallcaps[(#strong[void])] #math.frac([$circle.filled.small$ `~` $circle.stroked.small$], ``) ], [ #smallcaps[(#strong[eras]e)] #math.frac([$circle.filled.small$ `~ (A B)`], [ $circle.filled.small$ `~ A` \ $circle.filled.small$ `~ B` ]) ], ) #v(1em) #grid( align: center, columns: (1fr, 1fr), gutter: 1pt, [ #smallcaps[(#strong[comm]ute)] #math.frac(`(A B) ~ {C D}`, ``` {x y} ~ A {z w} ~ B (x z) ~ C (y w) ~ D ``` ) ], [ #smallcaps[(#strong[anni]hilate)] #math.frac( `(A B) ~ (C D)`, ``` A ~ C B ~ D ``` ) ], ) #v(1em) #grid( align: center, columns: (1fr, 1fr), gutter: 1pt, [ #smallcaps[(#strong[oper]ate 1)] #math.frac(`N ~ $(M A)`, `op(N, M) ~ A`) ], [ #smallcaps[(#strong[swit]ch 1)] #math.frac(`#0 ~ ?(A B)`, `A ~ (B *)`) ], ) #v(1em) #grid( align: center, columns: (1fr, 1fr), gutter: 1pt, [ #smallcaps[(#strong[oper]ate 2)] #math.frac( [ `A` is not a `NUM` node #v(1em) \ `N ~ $(A B)` ], `A ~ $(N B)` ) ], [ #smallcaps[(#strong[swit]ch 2)] #math.frac(`#n+1 ~ ?(A B)`, `A ~ (* (#n B))`) ], ) #v(2em) #show math.equation: set text(10pt) Note that rules are #emph[symmetric]: if a rule applies to a redex `A ~ B` then it also applies to `B ~ A`. Explanations and implementation details of each of the rules follow. == Link "Links" two ports where at least one is a `VAR` Node. A *global substitution* is performed replacing the single other occurrence of `x` with `A`. Recall that there is _exactly_ one other occurrence of `x` in the net. In the graph-rewriting system of Interaction Nets, linking two ports isn't technically an interaction, as wires are not named. However, for the term-rewriting calculus, wires are named and must be substituted for. When applying this rule, the redex `x ~ A` is removed, and the occurrence of `x` in `B` node is replaced with `A`. This is the only rule where nodes "far apart" can affect each other. See for details on the `link` function. == Call Expands a `REF`, replacing it with its definition. The definition is essentially copied from the static Book to the global memory, allocating its nodes and creating fresh variables. This operation is key to enable fast function application, since, without it, one would need to use duplicator nodes for the same purpose, which brings considerable overhead. It also introduces some laziness in a strict evaluator, allowing for global recursive functions, and constant-space tail-calls. == Void Erases two nullary nodes connected to each other. The result is nothing: both nodes are consumed, fully cleared from memory. The `VOID` rule completes a garbage collection process. == Erase Erases a binary node `(A B)` connected to an nullary node, propagating the nullary node towards both nodes `A` and `B`. The rule performs a granular, parallel garbage collection of nets that go out of scope. When the nullary node is a `NUM` or a `REF`, the #smallcaps[erase] rule actually behaves as a copy operation, cloning the `NUM` or `REF`, and connecting to both ports. #emph[However], when a copy operation is applied to a `REF` which contains `DUP` nodes, it instead is computed as a normal #smallcaps[call] operation. This allows us to perform fast copy of "function pointers", while still preserving Interaction Combinator semantics. == Commute Commutes two binary nodes of different types, essentially cloning them. The #smallcaps[commute] rule can be used to clone data and to perform loops and recursion, although these are preferably done via #smallcaps[call]s: Cloning large networks is faster through the #smallcaps[call] interaction, as it can be done in a single pass, as opposed to the incremental #smallcaps[commute] interactions that would have to propagate throughout the network. == Annihilate Annihilates two binary nodes of the same type connected to each-other, replacing them with two redexes. The #smallcaps[annihilate] rule is the most essential computation rule, and is used to implement beta-reduction and pattern-matching. == Operate Performs a numeric operation between two `NUM` nodes `N` and `M` connected by an `OPE` node. Note that `N` and `M` should not both be numeric values for this interaction to perform a sensible numeric operation. Dispatching to different native numeric operations depends on the `N` and `M` nodes themselves. See Numbers (@numbers) for details. Note than when counting the number interactions, #smallcaps[operate 2] is not counted, as this would cause the number of interactions to be non-deterministic. == Switch Performs a switch on a `NUM` node `#n` connected to a `SWI` node, treating it like a `Nat ::= Zero | (Succ pred)`. Here, `A` is expected to be a #highlight[tuple (first reference of the term "tuple", it's unclear what the encoding is)] with both cases: `zero` and `succ`, and `B` is the return port. If `n` is 0, we return the `zero` case, and erase the `succ` case. Otherwise, we return the `succ` case applied to `n-1`, and erase the `zero` case. #pagebreak() == Interaction Table Since there are eight node types, there is a total of 64 possible pairwise node interactions. The interaction rule for an active pair is _uniquely_ determined by the types of the two nodes in the pair. The table below shows which interaction rule is triggered for each possible pair of nodes that form a redex. Since the interaction rules are symmetric, this table is symmetric across the main diagonal. #highlight[`CON`-`SWI` being #smallcaps[comm] is problematic; should be removed or the reduction for #smallcaps[swit] should change]. #v(1em) #align(center)[ ``` | A\B | VAR | REF | ERA | NUM | CON | DUP | OPR | SWI | |-----|------|------|------|------|------|------|------|------| | VAR | LINK | LINK | LINK | LINK | LINK | LINK | LINK | LINK | | REF | LNIK | VOID | VOID | VOID | CALL | ERAS | CALL | CALL | | ERA | LINK | VOID | VOID | VOID | ERAS | ERAS | ERAS | ERAS | | NUM | LINK | VOID | VOID | VOID | ERAS | ERAS | OPER | SWIT | | CON | LINK | CALL | ERAS | ERAS | ANNI | COMM | COMM | COMM | | DUP | LINK | ERAS | ERAS | ERAS | COMM | ANNI | COMM | COMM | | OPR | LINK | CALL | ERAS | OPER | COMM | COMM | ANNI | COMM | | SWI | LINK | CALL | ERAS | SWIT | COMM | COMM | COMM | ANNI | ``` ] \ Because for each active pair exactly one rule applies, HVM2 retains the same _strong confluence_ that Lafont's Interaction Combinators do @Lafont_1997. This implies that not only can HVM2 programs be reduced completely in parallel, but also that the number of reductions is invariant to the order in which interaction rules are applied. This ensures that HVM2 can reduce redexes in any order without any risk of complexity blowups. #pagebreak() = Substitution Map & Atomic Linker While HVM2 retains the strong confluence property of Lafont's IC's, _locality_ is more difficult to obtain. This is due to HVM2's variables. Variables link two different parts of the program, and thus can cause interference when two threads attempt to reduce two redexes in parallel. For example, consider a subset of a Net: #v(1em) #align(center)[ ``` & (a b) ~ (d c) & (c d) ~ (f e) ``` ] #v(1em) Two threads attempting to reduce this Net can be represented as follows: #v(1em) #align(center)[ ``` Thread_0 Thread_1 --a--|\____/|--c--|\____/|--e-- --b--|/ \|--d--|/ \|--f-- ``` ] #v(1em) Notice that in the reduction of these two redexes, both `Thread_0` and `Thread_1` will need to access variables `c` and `d`. This requires synchronization. In HVM2, there is a global collection of redexes that is mutated in parallel by a variety of threads. See Architecture (@architecture) for details. Since every variable occurs exactly twice, the #smallcaps[link] interaction with a variable `x` will also occur twice, but _possibly at very different times_. The first time is when this variable is first encountered, and somehow a substitution must be "deferred" until the #smallcaps[link] interaction rule is applied to the second occurrence of `x`. This is accomplished by a global, atomic substitution map, which tracks these deferred substitutions. When a variable is linked to a node, or to another variable, it is inserted into the substitution map. When that same variable is linked again, it will already have an entry in the substitution map, and then the proper redex will be constructed. The substitution map can be represented efficiently with a flat buffer, where the index is the variable name, and the value is the node that has been substituted. This can be done atomically, via a simple lock-free linker. In pseudocode, this roughly looks like: #pagebreak() #align(center)[ ```python # Attempts to link A and B. def link(subst: Dict[str, Node], A: Node, B: Node): while True: # If A is not a VAR: swap A and B, and continue. if type(A) != VAR: swap(A, B) # If A is not a VAR: both are non-vars. Create a new redex. if type(A) != VAR: push_redex(A, B) # Here, A is a VAR. Create a `A: B` entry in the map. got: Port = subst.set_atomic(A, B) # If there was no `A` entry, stop. if got is None: break # Otherwise, delete `A` and link `got` to `B`. del subst[A] A = got ``` ] #v(1em) To see how this algorithm works, let's consider, again, the scenario above: #v(1em) #align(center)[ ``` Thread_0 Thread_1 --a--|\____/|--c--|\____/|--e-- --b--|/ \|--d--|/ \|--f-- ``` ] #v(1em) Assume we start with a substitution `a` $arrow.l$ `#42`, and let both threads reduce a redex in parallel. Each thread are an `ANNI` rule, their effect is to link both ports; thus, the resulting wire connected to `#42` (with the wires `a`, `d`, and `e` labelled for clarity) should be #v(1em) #align(center)[ ``` #42 ---a---. .---e--- '---d---' ``` ] #v(1em) That is, `e` must be directly linked to `#42`. Let's now evaluate the algorithm in an arbitrary order, step-by-step. Recall that the initial Net is: #v(1em) #align(center)[ ``` & (a b) ~ (d c) & (c d) ~ (f e) ``` ] #v(1em) And for simplicity we're observing only ports `a`, `d`, and `e`. `Thread_0` will attempt to perform `link(a, d)` and `Thread_1` will attempt `link(d, e)`. There are many possible orders of execution: #pagebreak() == Possible Execution Order 1 #pad(left: 2em)[ ``` - a: #42 ======= Thread_2: link(d, e) - a: #42 - d: e ======= Thread_1: link(a, d) - a: d - d: e ======= Thread_1: got `a: #42`, thus, delete `a` and link(d, #42) - d: #42 ======= Thread_1: got `d: e`, thus, delete `d` and link(e, #42) - e: #42 ``` ] The resulting substitution map is linking `e` to `42`, as required. == Possible Execution Order 2 #pad(left: 2em)[ ``` - a: #42 ======= Thread_1: link(d, a) - a: #42 - d: a ======= Thread_2: link(d, e) - a: #42 - d: e ======= Thread_2: got `d: a`, thus, delete `d` and link(a, e) - a: e ======= Thread_2: got `a: #42`, thus, delete `a` and link(e, #42) - e: 42 ``` ] The resulting substitution map is linking, again, `e` to `42`, as required. == Possible Execution Order 3 #pad(left: 2em)[ ``` - a: #42 ======= Thread_1: link(d, a) - a: #42 - d: a ======= Thread_2: link(e, d) - a: #42 - d: a - e: d ``` ] In this case, the result isn't directly linking `e` to `#42`. But it does link `e` to `d`, which links to `a`, which links to `#42`. Thus, `e` is, indirectly, linked to `#42`. While it does temporarily use more memory in this case, it is, semantically, the same result. Additionally, the indirect links will be cleared as soon as `e` is linked to something else. It is easy enough to see that this holds for all possible evaluation orders. #pagebreak() = Numbers HVM2 has a built-in support for 32-bit numerics and operations represented by the `NUM` node type. `NUM` nodes in HVM2 have a 5-bit tag and a 24-bit payload. Depending on the tag, numbers can represent unsigned integers (U24), signed integers (I24), IEEE 754 binary32 floats (F24), or (possibly partially applied) operators. These choices mean any numeric node can be represented in 29 bits, which can be unboxed in a 32-bit port with a 3 bit tag for the node type. == Syntax Numeric nodes can either be a number or a (possibly partially applied) operator. Syntactically, #align(center)[ ``` ::= | | ::= | | | ::= | "[" "]" -- (unapplied) | "[" "]" -- (partially applied) ::= | "+" -- (ADD) | "-" -- (SUB) | "*" -- (MUL) | "/" -- (DIV) | "%" -- (REM) | "=" -- (EQ) | "!" -- (NEQ) | "<" -- (LT) | ">" -- (GT) | "&" -- (AND) | "|" -- (OR) | "^" -- (XOR) | ">>" -- (SHR) | "<<" -- (SHL) | ":-" -- (FP_SUB) | ":/" -- (FP_DIV) | ":%" -- (FP_REM) | ":>>" -- (FP_SHR) | ":<<" -- (FP_SHL) ``` ] where `` is disambiguated from `` by requiring a sign prefix `+`/`-`, and flipped versions of non-commutative operators (`FP_*`) are provided for convenience. #pagebreak() == Numeric Node Memory Layout In order to understand how numeric operations are derived from the numeric nodes, we must look at the memory layout of the different numeric values and operations. As stated above, numeric nodes fit into 29 bits, so they can be inlined into a 32-bit ports. Numeric nodes have the following layout #align(center)[ ``` VVVVVVVVVVVVVVVVVVVVVVVVTTTTT ``` ] Where the 5-bit tag `T` has 19 possible values: one of the three number types (`U24`, `I24`, `F24`), one of the 15 operations, or a special value `SYM`. The 24-bit value `V` has a few possible interpretations: - If $#raw("T") in {#raw("U24"), #raw("I24"), #raw("F24")}$, then `V` is interpreted as a number with the type `T`. For example: `123`, `-123`, `1.0`. - If $#raw("T") in #raw("")$, then `V` is an untyped number. This is a partially applied operation. The type of the second argument (when applied) will dictate the interpretation of `V`. For example: `[/123]`, `[*-123]`, `[%1.0]`. - If $#raw("T") = #raw("SYM")$, then $#raw("V") in #raw("")$. This is an unapplied operator, like `[+]`. == U24 - Unsigned 24-bit Integer U24 numbers represent unsigned integers from 0 to 16,777,215 ($2^24 - 1$). The 24-bit payload directly encodes the integer value. For example: #align(center)[ ``` 0000 0000 0000 0000 0000 0001 = 1 0000 0000 0000 0000 0000 0010 = 2 1111 1111 1111 1111 1111 1111 = 16,777,215 ``` ] == I24 - Signed 24-bit Integer I24 numbers represent signed integers from -8,388,608 to 8,388,607. The 24-bit payload uses two's complement encoding. For example: #align(center)[ ``` 0000 0000 0000 0000 0000 0000 = 0 0000 0000 0000 0000 0000 0001 = 1 0111 1111 1111 1111 1111 1111 = 8,388,607 1000 0000 0000 0000 0000 0000 = -8,388,608 1111 1111 1111 1111 1111 1111 = -1 ``` ] == F24 - 24-bit IEEE 754 binary32 Float F24 numbers represent a subset of IEEE 754 binary32 floating point numbers; it supports approximately the same range, but with less precision. The 24-bit payload is laid out as follows: #align(center)[ ``` SEEE EEEE EMMM MMMM MMMM MMMM ``` ] Where: - S is the sign bit (1 = negative, 0 = positive) - E is the 8-bit exponent, with a bias of 127 (range of −126 to +127) - M is the 15-bit significand (mantissa) precision The value is calculated as: - If E = 0 and M = 0, the value is signed zero - If E = 0 and M $eq.not$ 0, the value is a subnormal number: \ $(-1)^S times 2^(-126) times (0.M "in base 2")$ - If 0 < E < 255, the value is a normal number: \ $(-1)^S times 2^(E-127) times (1.M "in base 2")$ - If E = 255 and M = 0, the value is signed infinity - If E = 255 and M $eq.not$ 0, the value is NaN (Not-a-Number) F24 supports a range of approximately $plus.minus 3.4 times 10^38$. The smallest positive normal number is $2^(-126) approx 1.2 times 10^(-38)$, while the smallest subnormal numbers go down to $2^(-141) approx 3.6 times 10^(-43)$. == Numeric Operations When two `NUM` nodes are connected by an `OPE` node, as shown in Interaction Rules (@interactions), a numeric operation is performed using the `op` function. The operation to be performed depends on the tags of each numeric node. Some operations `op(N, M)` are invalid, and simply return `0`: - If both numeric tags are types. - If both numeric tags are operations. - If both numeric tags are `SYM`. Otherwise: - If one of the tags is `SYM`, the output has the tag represented by the `SYM` numeric node and the payload of the other operand. For example, #align(center)[ ``` OP([+], 10) = [+10] OP(-1, [*]) = [*0xffffff] ``` ] - If one of the tags is an operation, and the other is a type, a native operation is performed, according to the following table: #align(center)[ ``` | | U24 | I24 | F24 | |---|-----|-----|-------| |ADD| + | + | + | |SUB| - | - | - | |MUL| * | * | * | |DIV| / | / | / | |REM| % | % | % | |EQ | == | == | == | |NEQ| != | != | != | |LT | < | < | < | |GT | > | > | > | |AND| & | & | atan2 | |OR | | | | | log | |XOR| ^ | ^ | pow | |SHR| >> | | | |SHL| << | | | ``` ] Where empty cells are intentionally left unspecified. The resulting number type is the same as the input type, except for comparison operators (`EQ`, `NEQ`, `LT`, `GT`) which always return `U24` `0` or `1`. #pagebreak() The number tagged with the operation is the left operand of the native operation, and the number tagged with the type is the right operand. For example, #align(center)[ ``` op([/1.2], 3.0) = op(3.0, [/1.2]) = 1.2 / 3.0 ``` ] Note that this means that the number type used in an operation is always determined by the right operand; if the left operand is of a different type, its bits will be reinterpreted. Finally, flipped operations (such as `FP_SUB`) interpret their operands in the opposite order (e.g. `SUB` represents `a - b` whereas `FP_SUB` represents `b - a`). This allows representing e.g. both `1 - x` and `x - 1` with partially-applied operations (`[-1]` and `[:-1]` respectively). #align(center)[ ``` OP([-2], +1) = +1 OP([:-2], 1) = -1 ``` ] Note that `op` is a symmetric function (since the order of the operands is determined by their tags). That is, `op(N, M) = op(M, N)`. This makes the "swap" interaction in #smallcaps[operate 2] valid. #pagebreak() = The 32-bit Architecture The initial version of HVM2, as implemented in this paper, is based on a 32-bit architecture. In this section, we'll use snippets of the Rust reference implementation of the interpreter, available at the #link("https://github.com/HigherOrderCO/hvm2")[HVM2 repo], to document this architecture. == Memory Layout Ports are 32-bit values that represent a wire connected to a main port. The low 3-bits are reserved to identify the type of the node (`VAR`, `REF`, `ERA`, etc) whose main port the wire is connected to. This is a port's _tag_. The upper 29-bits hold a port's _value_. The interpretation of the value is dependent on the tag. The value is either an address (for binary `CON`, `DUP`, `OPR`, and `SWI` nodes), a virtual function address (for `REF` nodes), an unboxed 29-bit number (for `NUM` nodes), a variable name (for `VAR` nodes), or `0` (for `ERA` nodes). Binary nodes are represented in memory as a pair of two ports. Notice that _ports store the kind of node they are connecting to_, nodes don't store their own type. #align(center)[ ```rust pub type Tag = u8; // 3 bits (rounded up to u8) pub type Val = u32; // 29 bits (rounded up to u32) pub struct Port(pub u32); // Tag + Val (32 bits) pub struct Pair(pub u64); // Port + Port (64 bits) ``` ] The Global Net structure includes three shared memory buffers, `node`: a node buffer where nodes are allocated, `vars`: a buffer representing the current substitution map (with the 29-bit key being the variable name, and the value being the current substitution), and `rbag`: a collection of active redexes. `APair` and `APort` are atomic variants of `Pair` and `Port`, respectively, #align(center)[ ```rust pub struct GNet<'a> { pub node: &'a mut [APair], pub vars: &'a mut [APort], pub rbag: &'a mut [APair], } ``` ] Since this 32-bit architecture has 29-bit values, that means we can address a total of $2^29$ nodes and variables, making the `node` buffer at most `4 GB` long, and the `vars` buffer at most `2 GB` long. A 64-bit architecture would increase this limit to match the overwhelming majority of use cases, and will be incorporated in a future revision of the HVM2 runtime. Since top-level definitions are just static nets, they are stored in a similar structure, with the key difference that they include an explicit `root` port, which, is used to connect the expanded definition its target port in the graph. We also include a `safe` flag, which indicates whether this definition has duplicator nodes or not. This affects the `DUP-REF` interaction, which will just copy the `REF` port, rather than expanding the definition, when it is `safe`. #align(center)[ ```rust pub struct Def { pub safe: bool, // has no dups pub root: Port, // root port pub rbag: Vec, // def redex bag pub node: Vec, // def node buffer } ``` ] Finally, the global Book is just a map of names to `Def`s: #align(center)[ ```rust pub struct Book { pub defs: Vec, } ``` ] Notice that, like variables, names are simply indexes into the global `Book`. This concludes HVM2's memory layout. For more details, check the reference Rust implementation in the #link("https://github.com/HigherOrderCO/hvm2")[HVM2 repo]. == Example Interaction Net Layout Consider, again, the following net: #align(center)[ ``` (a b) & (b a) ~ (x (y *)) & {y x} ~ @foo ``` ] In HVM2's memory, it would be represented as: #align(center)[ ``` RBAG | FST-TREE | SND-TREE ---- | -------- | -------- 0800 | CON 0002 | CON 0003 // '& (b a) ~ (x (y *))' 1800 | DUP 0005 | REF 0000 // '& {x y} ~ @foo' ---- | -------- | -------- NODE | PORT-1 | PORT-2 ---- | -------- | -------- 0001 | VAR 0000 | VAR 0001 // '(a b)' node (root) 0002 | VAR 0001 | VAR 0000 // '(b a)' node 0003 | VAR 0002 | CON 0004 // '(x (y *))' node 0004 | VAR 0003 | ERA 0000 // '(y *)' node 0005 | VAR 0003 | VAR 0002 // '{y x}' node ---- | -------- | -------- VARS | VALUE | ---- | -------- | FFFF | CON 0001 | // points to root node ``` ] Note that the `VARS` buffers has only one entry, because there are no substitutions, but we always use the last variable to represent the root port, serving as an entry point to the graph. == Example Interaction Interactions can be implemented in five steps: 1. Allocate the needed resources. 2. Loads nodes from global memory to registers. 3. Initialize fresh variables on the substitution map. 4. Stores fresh nodes on the node buffer. 5. Atomically links outgoing wires. For example, the #smallcaps[commute] interaction is implemented in Rust as: #align(center)[ ```rust pub fn interact_comm(&mut self, net: &GNet, a: Port, b: Port) -> bool { // Allocates needed resources. if !self.get_resources(net, 4, 4, 4) { return false; } // Loads nodes from global memory. let a_ = net.node_take(a.get_val() as usize); let a1 = a_.get_fst(); let a2 = a_.get_snd(); let b_ = net.node_take(b.get_val() as usize); let b1 = b_.get_fst(); let b2 = b_.get_snd(); // Stores new vars. net.vars_create(self.v0, NONE); net.vars_create(self.v1, NONE); net.vars_create(self.v2, NONE); net.vars_create(self.v3, NONE); // Stores new nodes. net.node_create(self.n0, pair(port(VAR, self.v0), port(VAR, self.v1))); net.node_create(self.n1, pair(port(VAR, self.v2), port(VAR, self.v3))); net.node_create(self.n2, pair(port(VAR, self.v0), port(VAR, self.v2))); net.node_create(self.n3, pair(port(VAR, self.v1), port(VAR, self.v3))); // Links. self.link_pair(net, pair(port(b.get_tag(), self.n0), a1)); self.link_pair(net, pair(port(b.get_tag(), self.n1), a2)); self.link_pair(net, pair(port(a.get_tag(), self.n2), b1)); self.link_pair(net, pair(port(a.get_tag(), self.n3), b2)); return true; } ``` ] #v(1em) Note that, other than the linking, all operations here are local. Taking nodes from global memory is safe, because the thread that holds a redex implicitly owns both trees it contains, and storing vars and nodes is safe, because these spaces have been allocated by the thread. A fast concurrent allocator for small values is assumed. In HVM2, we just use a simple linear bump allocator, which is fast and fragmentation-free in a context where all allocations are at most 64-bit values (the size of a single node). = Massively Parallel Evaluation Provided the architecture we just constructed, evaluating an HVM2 program in parallel is surprisingly easy: *just compute global redexes concurrently, until there is no more work to do*. HVM2's local interactions exposes the original program's full degree of parallelism, ensuring that every work that *can* be done in parallel *will* be done in parallel. In other words, it maximizes the theoretical speedup, per Amdahl's law. The atomic linking procedure ensures that points of synchronization that emerge from the original program are solved safely and efficiently, without no room for race conditions. Finally, the strong confluence property ensures that the total work done is independent of the order that redexes are computed, giving us freedom to evaluate in parallel without generating extra work. == Redex Sharing An additional question, is, how do we actually distribute that workload through all cores of a modern processor? The act of sharing a redex is, itself, a point of synchronization. If this is done without enough caution, it can result in contention, and slowing up execution. HVM2 solves this by two different approaches: On _CPUs_, a simple task-stealing queue is used, where each thread pushes and pops from its own local redex bag, while a starving neighbor thread actively attempt to steal a redex from it. Since a redex is just a 64-bit value, stealing can be done with a single atomic_exchange operation, making it very lightweight. To reduce contention, and to force threads to steal "old redexes", which are more likely to produce long independent workloads, this stealing is done from the other end of the bag. In our experiences, this works extremely well in practice, achieving full CPU occupancy in all cases tested, with minimal overhead, and low impact on non-parallelizable programs. On _GPUs_, this matter is more complex in many ways. First, there are two scales on which we want sharing to occur: 1. Within a running block, where stealing between local threads can be accomplished by fast shared-memory operations and warp-sync primitives. 2. Across global blocks, where sharing requires either a global synchronization (i.e., calling the kernel again) or direct communication via global memory. Unfortunately, the cost of global synchronization (i.e., across blocks) is very high, so, having a globally shared redex bag, as in the C version, and accessing it within the context of a kernel, would greatly impact performance. To improve this, we, initially, attempted to implement a fast block-wise scheduler, which simply lets local threads pass redexes to starving ones with warp syncs: ```c __device__ void share_redexes(TM* tm) { __shared__ Pair pool[TPB]; Pair send, recv; u32* ini = &tm->rbag.lo_ini; u32* end = &tm->rbag.lo_end; Pair* bag = tm->rbag.lo_buf; for (u32 off = 1; off < 32; off *= 2) { send = (*end - *ini) > 1 ? bag[*ini%RLEN] : 0; recv = __shfl_xor_sync(__activemask(), send, off); if (!send && recv) bag[((*end)++)%RLEN] = recv; if ( send && !recv) ++(*ini); } for (u32 off = 32; off < TPB; off *= 2) { u32 a = TID(); u32 b = a ^ off; send = (*end - *ini) > 1 ? bag[*ini%RLEN] : 0; pool[a] = send; __syncthreads(); recv = pool[b]; if (!send && recv) bag[((*end)++)%RLEN] = recv; if ( send && !recv) ++(*ini); } } ``` Such procedure is efficient enough to be called between every few interactions, allowing redexes to quickly fill the whole block. With that, all we had to do is let the kernel perform a constant number of local interactions (usually in the range of $2^9$ to $2^13$), and, once it completes, i.e., across kernel invocations, the global redex bag was transposed (rows become columns), letting the entire GPU to fill naturally by just the block-wise sharing function above, and nothing else. This approach worked very well in practice, and let us achieve a peak of 74,000 MIPS in a shader-like functional program (i.e., a "tree-map" operation). Unfortunately, it didn't work so well in cases where the implied communication was more involved. For example, consider the following implementation of a purely functional Bitonic Sort: ```javascript data Tree = (Leaf val) | (Node fst snd) // Swaps distant values in parallel; corresponds to a Red Box (warp s (Leaf a) (Leaf b)) = (U60.swap (^ (> a b) s) (Leaf a) (Leaf b)) (warp s (Node a b) (Node c d)) = (join (warp s a c) (warp s b d)) // Rebuilds the warped tree in the original order (join (Node a b) (Node c d)) = (Node (Node a c) (Node b d)) // Recursively warps each sub-tree; corresponds to a Blue/Green Box (flow s (Leaf a)) = (Leaf a) (flow s (Node a b)) = (down s (warp s a b)) // Propagates Flow downwards (down s (Leaf a)) = (Leaf a) (down s (Node a b)) = (Node (flow s a) (flow s b)) // Bitonic Sort (sort s (Leaf a)) = (Leaf a) (sort s (Node a b)) = (flow s (Node (sort 0 a) (sort 1 b))) ``` Since this is an $O(n*log(n))$ algorithm, its recursive structure unfolds in such a manner that is much less regular than a tree-map. As such, the naive task sharing approach had a consequence that greatly impacted performance on GPUs: threads would give and receive "misaligned" redexes, causing warp-local threads to compute different calls at any given point. For example, a given warp thread might be processing `(flow 5 (Node _ _))`, while another might be processing `(down 0 (Leaf _))` instead. This divergence has the consequence of producing sequentialism in the GPU architecture, where warp-local threads are in lockstep. To improve this, a different task-sharing mechanism has been implemented, which requires a minimal annotation: redexes corresponding to branching recursive calls are flagged with a `!` on the global Book. With this annotation, the GPU evaluator will then only share redexes from functions that recurse in a parallelizable fashion. This is extremely effective, as it allows threads to always get "equivalent" redexes in a regular recursive algorithm. For example, if given thread is processing `(flow 5 (Node _ _))`, it is very likely that another warp local thread is too. This minimizes warp divergence, and has a profound impact in performance across many cases. On the `bitonic_sort` example, this new policy alone resulted in a jump from 1,300 MIPS to 12,000 MIPS (9x). == Optimization: Shared Memory Interactions GPUs also have another particularity that, if exploited properly, can result in significant speedups: shared memory. The NVIDIA RTX 4090, for example, includes A L1 Cache memory space of about 128KB, and GPU languages like CUDA usually allow a programmer to manually read and write from that cache, in a shared memory buffer that is accessible by all threads of a block. Reading and writing from shared memory can be up to 2 orders of magnitude faster than doing so from global memory, so, using that space properly is essential to fully harness a GPU's computing power. On HVM32, we use that space to store a local node buffer, and a local subst map, with 8192 nodes and variables. This occupies exactly 96KB, just enough to fit most modern processors. When a thread allocates a fresh node or variable, that allocation occurs in the shared memory, rather than the global memory. With a configuration of 128 threads per block, each thread has a "scratchpad" of 64 nodes and vars to work locally, with no global memory access. This is often enough to compute long-running tail-loops, which is what makes HVM2 so efficient on shader-like programs. There is one problem, though: what happens when an interaction links a locally allocated node to a global variable? This would cause a pointer to a local node to "leak" to another block, which would then be unable to retrieve its information, causing a runtime error. To handle this situation, we extend the LINK interaction with a "LEAK" sub-interaction, which is specific to GPUs only. That interaction essentially allocates a "global view" of the local node, filled with two placeholder variables, such that one copy is local, and the other copy is global (remember: variables are always paired). That way, we can continue the local reduction without interruptions. If another block does get this "leaked" node, it will be filled with two variables, which, in this case, act as "future" values which will be resolved when the local thread links it. ``` ^A ~ (b1 b2) ------------- LEAK ^X ~ b1 ^Y ~ b2 ^A ~ ^(^X ^Y) ``` The LEAK interaction allows us to safely work locally for as long as desired, which has great impact on performance. On the stress test benchmark, the throughput jumps from about 13,000 MIPS to 54,000 MIPS by this change alone. = Garbage Collection Since HVM2 is based on Interaction Combinators, which are fully linear, there is no global "garbage collection" pass required. By IC evaluation semantics alone, data is granularly allocated as needed, and freed as soon as they become unreachable. This is specifically accomplished by the ERAS and VOID interactions, which consume sub-nets that go out of scope in parallel, clearing them from memory. When HVM2 completes evaluation (i.e., after all redexes have been processed), the memory should be left with just the final result of the program, and no remaining garbage or byproduct. No further work is needed. = IO TODO: explain how we do IO = Benchmarks TODO: include some benchmarks = Translations TODO: include example translations from high-level languages to HVM2 = Limitations The HVM2 architecture, as currently presented, is capable of evaluating modern, high-level programs in massively parallel hardware with near-ideal speedup, which is a remarkable feat. That said, it has severe impactful limitations that must be understood. Below is a list of many of these limitations, and how they can be addressed in the future. == Only one Duplicator Node Since HVM2 is affine, duplicator nodes are often used to copy non-linear variables. For example, when translating the $lambda$-term `λx.x+x`, which is not linear (because `x` occurs twice), one might use a `DUP` node to clone `x`. Due to Interaction Net semantics, though, `DUP` nodes don't always match the behavior of cloning a variable on traditional languages. This, if not handled properly, can lead to *unsound reductions*. For example, the $lambda$-term: ``` C4 = (λf.λx.(f (f x)) λf.λx.(f (f x))) ``` Which computes the Church-encoded exponentiation `2^2`, can not be soundly reduced by HVM2. To handle this, a source language must use either a type system or similar mechanism to verify that the following invariant holds: #v(-1em) #quote(block: true)[ _A higher-order lambda that clones its variable can not be cloned._ ] While restrictive, it is important to stress that this limitation only applies to cloning higher-order functions. A language that targets the HVM can still clone data types with no restrictions, and it is still able to perform loops, recursion and pattern-matches with no limitations. In other words, HVM is Turing Complete, and can evaluate procedural languages with no restrictions, and functional languages with some restrictions. That said, this can be amended by: 1. Adding more duplicator nodes. This would allow "nested copies" of higher-order functions. With a proper type system (such as EAL inference), this can greatly reduce the set of practical programs that are affected by this restriction. 2. Adding bookkeeping nodes. These nodes, originally proposed by Lamping (1990), allow interaction systems to evaluate the full λ-calculus with no restrictions. Adding bookkeeping to HVM should be easy. Sadly, this has the consequence of bringing a constant-time overhead, decreasing performance by about 10x. Because of that, it wasn't included in this model. Ideally, a combination of both approaches should be used: a type-checker that flags safe programs, which can be evaluated safely on HVM2, and a fallback bookkeeper, which ensures sound reductions of programs that do not. Implementing such system is outside the scope of this work, and should be done as a future extension. [cite: the optimizing optimal evaluation paper] == Ultra-Eager Evaluation Only In our first implementation, HVM1, we used a lazy evaluation model. This not only ensured that no unnecessary work was done, but also allowed one to compute with infinite structures, like lists. Since the implementation presented here reduces *all* available redexes eagerly, that means neither of these hold. For example, if you allocate a big structure, but only read one branch, HVM2 will allocate the entire structure, while HVM1 wouldn't. And if you do have an infinite structure, HVM2 will never halt (because the redex bag will never be empty). This applies even to code that doesn't look like it is an infinite structure. For example, consider the JavaScript function below: ``` foo = x => x == 0 ? 0 : 1 + foo(x-1); ``` In JavaScript, this is a perfectly valid function. In HVM2, if called as-is, this would hang, because `foo(x-1)` would unroll infinitely, as we do not "detect" that it is in a branch. To make recursive functions computable, the usual approach is to split it into multiple definitions, as in: ``` foo = x => x == 0 ? fooZ : foo_S(x - 1); foo_Z = 0; foo_S = x => 1 + foo(x-1); ``` Since REFs unfold lazily, the program above will properly erase the `foo_S` branch when it reaches the base case, avoiding the infinite recursion. Extending HVM2 with a full lazy mode would requires us to store uplinks, allowing threads to navigate through the graph and only reduce redexes that are reachable from the root port. While not technically hard to do, doing so would make the task scheduler way more complex to implement efficiently, specially in the GPU version. We reserve this for a future extension. == Single Core Inefficiency While HVM2 achieves near-linear speedup, allowing it to make programs run arbitrarily faster by just using more cores (as long as there is sufficient degree of parallelism), its compiler is still extremely immature, and not nearly as fast as state-of-art alternatives like GCC of GHC. In single-thread CPU evaluation, HVM2, is, baseline, still about 5x slower than GHC, and this number can grow to 100x on programs that involve loops and mutable arrays, since HVM2 doesn't feature these yet. For example, a single-core C program that adds numbers from 0 to a few billions will easily outperform an HVM2 one that uses thousands of threads, given the C version is doing no allocation, while HVM2 is allocating a tree-like recursive stack. That said, not every program can be implemented as an allocation-free, register-mutating loop. For real programs that allocate tons of short memory objects, HVM2 is expected to perform extremely well. Moreover, and unlike some might think, HVM2 is not incompatible with loops or mutable types, because it isn't a functional runtime, but one based on interaction combinators, which are fully linear. Extending HVM2 with arrays is as easy as creating nodes for it, and implementing the interactions, and can be done in a timely fashion as a fork of this repository. Similarly, loops can be implemented by optimizing tail-calls. We plan to add such optimization soon. Finally, there are many other low-hanging fruits that could improve HVM2's performance considerably. For example, currently, we do not have native constructors, which means that algebraic datatypes have to be λ-encoded, which brings a 2x-5x memory overhead. Adding proper constructors and eliminating this overhead would likely bring a proportional speedup. Similarly, adding more numeric types like vectors would allow using more of the available GPU instructions, and adding read-only types like immutable strings and textures with 1-interaction reads would allow one to implement many algorithms that, currently, wouldn't be practical, specially for graphics rendering. == 32-bit Architecture Limitations Since this architecture is 32-bit, and since 3 bits are reserved for a tag, that leaves us with a 29-bit addressable space. That amounts for a total of about 500 million nodes, or about 4 GB. Modern GPUs come with as much as 256 GB integrated memory, so, HVM2 isn't able to fully use the available space, due to addressing constraints. Moreover, its 29-bit unboxed numbers only allow for 24-bit machine ints and floats, which may not be enough for many applications. All these problems should be solved by extending ports to 64-bit and nodes to 128-bits, but this requires some additional considerations, since modern GPUs don't come with 128-bit atomic operations. We'll do this in a future extension. == More *Work In Progress* = Conclusion By starting from a solid, inherently concurrent model of computation, Interaction Combinators, carefully designing an efficient memory format, implementing lock-free concurrent interactions via lightweight atomic primitives, and granularly distributing workload across all cores, we were able to design a parallel compiler and evaluator for high-level programming languages that achieves near-linear speedup as a function of core count (within a single device). While the resulting system still has many limitations, we proposed sensible plans to address them in the future. This work creates a solid foundation for parallel programming languages that are able to harness the massively parallel capabilities of modern hardware, without demanding explicit low-level management of threads, locks, mutexes and other complex synchronization primitives by the programmer. ================================================ FILE: paper/README.md ================================================ # HVM - Paper(s) - HVM2 (Work in progress) - HVM2's theoretical foundations, implementation, early benchmarks, current limitations, and future work. - Extended Abstract - Accepted to [FProPer 2024][1]. - A much shorter version of the main paper. [1]: https://icfp24.sigplan.org/home/fproper-2024 ================================================ FILE: paper/inet.typ ================================================ #import "@preview/cetz:0.2.2": draw, canvas #let port = (name, pos, dir) => { import draw: * group(name: name, { translate(pos) rotate(dir) // scale(1.5) anchor("p", (0, 0)) anchor("c", (0, 0.5)) }) } #let agent = (..agent) => (..args) => { import draw: * let style = agent.named().at("style", default: ()) let name = args.named().at("name") let pos = args.named().at("pos") let rot = args.named().at("rot", default: 0deg) group(name: name, { translate(pos) rotate(rot) translate((0, -calc.sqrt(3)/4)) stroke(2pt) line((-.5, 0), (.5, 0), (0, calc.sqrt(3)/2), close: true, ..style, stroke: 0.5pt) port("0", (0, calc.sqrt(3)/2), 0deg) port("1", (-1/2+1/3, 0), 180deg) port("2", (+1/2-1/3, 0), 180deg) }) } #let link = (a, b) => { import draw: * stroke(2pt) bezier(a + ".p", b + ".p", a + ".c", b + ".c", stroke: 0.5pt) } #let con = agent() #let dup = agent(style: (fill: black)) ================================================ FILE: src/ast.rs ================================================ use TSPL::{new_parser, Parser, ParseError}; use highlight_error::highlight_error; use crate::hvm; use std::fmt::{Debug, Display}; use std::collections::{BTreeMap, BTreeSet}; // Types // ----- #[derive(Clone, Hash, PartialEq, Eq, Debug)] pub struct Numb(pub u32); #[derive(Clone, Hash, PartialEq, Eq, Debug)] pub enum Tree { Var { nam: String }, Ref { nam: String }, Era, Num { val: Numb }, Con { fst: Box, snd: Box }, Dup { fst: Box, snd: Box }, Opr { fst: Box, snd: Box }, Swi { fst: Box, snd: Box }, } pub type Redex = (bool, Tree, Tree); #[derive(Clone, Hash, PartialEq, Eq, Debug)] pub struct Net { pub root: Tree, pub rbag: Vec, } pub struct Book { pub defs: BTreeMap, } // Parser // ------ pub type ParseResult = std::result::Result; new_parser!(CoreParser); impl<'i> CoreParser<'i> { pub fn parse_numb_sym(&mut self) -> ParseResult { self.consume("[")?; // numeric casts if let Some(cast) = match () { _ if self.try_consume("u24") => Some(hvm::TY_U24), _ if self.try_consume("i24") => Some(hvm::TY_I24), _ if self.try_consume("f24") => Some(hvm::TY_F24), _ => None } { // Casts can't be partially applied, so nothing should follow. self.consume("]")?; return Ok(Numb(hvm::Numb::new_sym(cast).0)); } // Parses the symbol let op = hvm::Numb::new_sym(match () { // numeric operations _ if self.try_consume("+") => hvm::OP_ADD, _ if self.try_consume("-") => hvm::OP_SUB, _ if self.try_consume(":-") => hvm::FP_SUB, _ if self.try_consume("*") => hvm::OP_MUL, _ if self.try_consume("/") => hvm::OP_DIV, _ if self.try_consume(":/") => hvm::FP_DIV, _ if self.try_consume("%") => hvm::OP_REM, _ if self.try_consume(":%") => hvm::FP_REM, _ if self.try_consume("=") => hvm::OP_EQ, _ if self.try_consume("!") => hvm::OP_NEQ, _ if self.try_consume("<<") => hvm::OP_SHL, _ if self.try_consume(":<<") => hvm::FP_SHL, _ if self.try_consume(">>") => hvm::OP_SHR, _ if self.try_consume(":>>") => hvm::FP_SHR, _ if self.try_consume("<") => hvm::OP_LT, _ if self.try_consume(">") => hvm::OP_GT, _ if self.try_consume("&") => hvm::OP_AND, _ if self.try_consume("|") => hvm::OP_OR, _ if self.try_consume("^") => hvm::OP_XOR, _ => self.expected("operator symbol")?, }); self.skip_trivia(); // Syntax for partial operations, like `[*2]` let num = if self.peek_one() != Some(']') { hvm::Numb::partial(op, hvm::Numb(self.parse_numb_lit()?.0)) } else { op }; // Closes symbol bracket self.consume("]")?; // Returns the symbol return Ok(Numb(num.0)); } pub fn parse_numb_lit(&mut self) -> ParseResult { let ini = self.index; let num = self.take_while(|x| x.is_alphanumeric() || x == '+' || x == '-' || x == '.'); let mut num_parser = CoreParser::new(num); let end = self.index; Ok(Numb(if num.contains('.') || num.contains("inf") || num.contains("NaN") { let val: f32 = num.parse() .map_err(|err| { let msg = format!("invalid number literal: {}\n{}", err, highlight_error(ini, end, self.input)); self.expected_and::("number literal", &msg).unwrap_err() })?; hvm::Numb::new_f24(val) } else if num.starts_with('+') || num.starts_with('-') { *num_parser.index() += 1; let val = num_parser.parse_u64()? as i32; hvm::Numb::new_i24(if num.starts_with('-') { -val } else { val }) } else { let val = num_parser.parse_u64()? as u32; hvm::Numb::new_u24(val) }.0)) } pub fn parse_numb(&mut self) -> ParseResult { self.skip_trivia(); // Parses symbols (SYM) if let Some('[') = self.peek_one() { return self.parse_numb_sym(); // Parses numbers (U24,I24,F24) } else { return self.parse_numb_lit(); } } pub fn parse_tree(&mut self) -> ParseResult { self.skip_trivia(); //println!("aaa ||{}", &self.input[self.index..]); match self.peek_one() { Some('(') => { self.advance_one(); let fst = Box::new(self.parse_tree()?); self.skip_trivia(); let snd = Box::new(self.parse_tree()?); self.consume(")")?; Ok(Tree::Con { fst, snd }) } Some('{') => { self.advance_one(); let fst = Box::new(self.parse_tree()?); self.skip_trivia(); let snd = Box::new(self.parse_tree()?); self.consume("}")?; Ok(Tree::Dup { fst, snd }) } Some('$') => { self.advance_one(); self.consume("(")?; let fst = Box::new(self.parse_tree()?); self.skip_trivia(); let snd = Box::new(self.parse_tree()?); self.consume(")")?; Ok(Tree::Opr { fst, snd }) } Some('?') => { self.advance_one(); self.consume("(")?; let fst = Box::new(self.parse_tree()?); self.skip_trivia(); let snd = Box::new(self.parse_tree()?); self.consume(")")?; Ok(Tree::Swi { fst, snd }) } Some('@') => { self.advance_one(); let nam = self.parse_name()?; Ok(Tree::Ref { nam }) } Some('*') => { self.advance_one(); Ok(Tree::Era) } _ => { if let Some(c) = self.peek_one() { if "0123456789+-[".contains(c) { return Ok(Tree::Num { val: self.parse_numb()? }); } } let nam = self.parse_name()?; Ok(Tree::Var { nam }) } } } pub fn parse_net(&mut self) -> ParseResult { let root = self.parse_tree()?; let mut rbag = Vec::new(); self.skip_trivia(); while self.peek_one() == Some('&') { self.consume("&")?; let par = if let Some('!') = self.peek_one() { self.consume("!")?; true } else { false }; let fst = self.parse_tree()?; self.consume("~")?; let snd = self.parse_tree()?; rbag.push((par,fst,snd)); self.skip_trivia(); } Ok(Net { root, rbag }) } pub fn parse_book(&mut self) -> ParseResult { let mut defs = BTreeMap::new(); while !self.is_eof() { self.consume("@")?; let name = self.parse_name()?; self.consume("=")?; let net = self.parse_net()?; defs.insert(name, net); } Ok(Book { defs }) } fn try_consume(&mut self, str: &str) -> bool { let matches = self.peek_many(str.len()) == Some(str); if matches { self.advance_many(str.len()); } matches } } // Stringifier // ----------- impl Numb { pub fn show(&self) -> String { let numb = hvm::Numb(self.0); match numb.get_typ() { hvm::TY_SYM => match numb.get_sym() as hvm::Tag { // casts hvm::TY_U24 => "[u24]".to_string(), hvm::TY_I24 => "[i24]".to_string(), hvm::TY_F24 => "[f24]".to_string(), // operations hvm::OP_ADD => "[+]".to_string(), hvm::OP_SUB => "[-]".to_string(), hvm::FP_SUB => "[:-]".to_string(), hvm::OP_MUL => "[*]".to_string(), hvm::OP_DIV => "[/]".to_string(), hvm::FP_DIV => "[:/]".to_string(), hvm::OP_REM => "[%]".to_string(), hvm::FP_REM => "[:%]".to_string(), hvm::OP_EQ => "[=]".to_string(), hvm::OP_NEQ => "[!]".to_string(), hvm::OP_LT => "[<]".to_string(), hvm::OP_GT => "[>]".to_string(), hvm::OP_AND => "[&]".to_string(), hvm::OP_OR => "[|]".to_string(), hvm::OP_XOR => "[^]".to_string(), hvm::OP_SHL => "[<<]".to_string(), hvm::FP_SHL => "[:<<]".to_string(), hvm::OP_SHR => "[>>]".to_string(), hvm::FP_SHR => "[:>>]".to_string(), _ => "[?]".to_string(), } hvm::TY_U24 => { let val = numb.get_u24(); format!("{}", val) } hvm::TY_I24 => { let val = numb.get_i24(); format!("{:+}", val) } hvm::TY_F24 => { let val = numb.get_f24(); if val.is_infinite() { if val.is_sign_positive() { format!("+inf") } else { format!("-inf") } } else if val.is_nan() { format!("+NaN") } else { format!("{:?}", val) } } _ => { let typ = numb.get_typ(); let val = numb.get_u24(); format!("[{}0x{:07X}]", match typ { hvm::OP_ADD => "+", hvm::OP_SUB => "-", hvm::FP_SUB => ":-", hvm::OP_MUL => "*", hvm::OP_DIV => "/", hvm::FP_DIV => ":/", hvm::OP_REM => "%", hvm::FP_REM => ":%", hvm::OP_EQ => "=", hvm::OP_NEQ => "!", hvm::OP_LT => "<", hvm::OP_GT => ">", hvm::OP_AND => "&", hvm::OP_OR => "|", hvm::OP_XOR => "^", hvm::OP_SHL => "<<", hvm::FP_SHL => ":<<", hvm::OP_SHR => ">>", hvm::FP_SHR => ":>>", _ => "?", }, val) } } } } impl Tree { pub fn show(&self) -> String { match self { Tree::Var { nam } => nam.to_string(), Tree::Ref { nam } => format!("@{}", nam), Tree::Era => "*".to_string(), Tree::Num { val } => format!("{}", val.show()), Tree::Con { fst, snd } => format!("({} {})", fst.show(), snd.show()), Tree::Dup { fst, snd } => format!("{{{} {}}}", fst.show(), snd.show()), Tree::Opr { fst, snd } => format!("$({} {})", fst.show(), snd.show()), Tree::Swi { fst, snd } => format!("?({} {})", fst.show(), snd.show()), } } } impl Net { pub fn show(&self) -> String { let mut s = self.root.show(); for (par, fst, snd) in &self.rbag { s.push_str(" &"); s.push_str(if *par { "!" } else { " " }); s.push_str(&fst.show()); s.push_str(" ~ "); s.push_str(&snd.show()); } s } } impl Book { pub fn show(&self) -> String { let mut s = String::new(); for (name, net) in &self.defs { s.push_str("@"); s.push_str(name); s.push_str(" = "); s.push_str(&net.show()); s.push('\n'); } s } } // Readback // -------- impl Tree { pub fn readback(net: &hvm::GNet, port: hvm::Port, fids: &BTreeMap) -> Option { //println!("reading {}", port.show()); match port.get_tag() { hvm::VAR => { let got = net.enter(port); if got != port { return Tree::readback(net, got, fids); } else { return Some(Tree::Var { nam: format!("v{:x}", port.get_val()) }); } } hvm::REF => { return Some(Tree::Ref { nam: fids.get(&port.get_val())?.clone() }); } hvm::ERA => { return Some(Tree::Era); } hvm::NUM => { return Some(Tree::Num { val: Numb(port.get_val()) }); } hvm::CON => { let pair = net.node_load(port.get_val() as usize); let fst = Tree::readback(net, pair.get_fst(), fids)?; let snd = Tree::readback(net, pair.get_snd(), fids)?; return Some(Tree::Con { fst: Box::new(fst), snd: Box::new(snd) }); } hvm::DUP => { let pair = net.node_load(port.get_val() as usize); let fst = Tree::readback(net, pair.get_fst(), fids)?; let snd = Tree::readback(net, pair.get_snd(), fids)?; return Some(Tree::Dup { fst: Box::new(fst), snd: Box::new(snd) }); } hvm::OPR => { let pair = net.node_load(port.get_val() as usize); let fst = Tree::readback(net, pair.get_fst(), fids)?; let snd = Tree::readback(net, pair.get_snd(), fids)?; return Some(Tree::Opr { fst: Box::new(fst), snd: Box::new(snd) }); } hvm::SWI => { let pair = net.node_load(port.get_val() as usize); let fst = Tree::readback(net, pair.get_fst(), fids)?; let snd = Tree::readback(net, pair.get_snd(), fids)?; return Some(Tree::Swi { fst: Box::new(fst), snd: Box::new(snd) }); } _ => { unreachable!() } } } } impl Net { pub fn readback(net: &hvm::GNet, book: &hvm::Book) -> Option { let mut fids = BTreeMap::new(); for (fid, def) in book.defs.iter().enumerate() { fids.insert(fid as hvm::Val, def.name.clone()); } let root = net.enter(hvm::ROOT); let root = Tree::readback(net, root, &fids)?; let rbag = Vec::new(); return Some(Net { root, rbag }); } } // Def Builder // ----------- impl Tree { pub fn build(&self, def: &mut hvm::Def, fids: &BTreeMap, vars: &mut BTreeMap) -> hvm::Port { match self { Tree::Var { nam } => { if !vars.contains_key(nam) { vars.insert(nam.clone(), vars.len() as hvm::Val); def.vars += 1; } return hvm::Port::new(hvm::VAR, *vars.get(nam).unwrap()); } Tree::Ref { nam } => { if let Some(fid) = fids.get(nam) { return hvm::Port::new(hvm::REF, *fid); } else { panic!("Unbound definition: {}", nam); } } Tree::Era => { return hvm::Port::new(hvm::ERA, 0); } Tree::Num { val } => { return hvm::Port::new(hvm::NUM, val.0); } Tree::Con { fst, snd } => { let index = def.node.len(); def.node.push(hvm::Pair(0)); let p1 = fst.build(def, fids, vars); let p2 = snd.build(def, fids, vars); def.node[index] = hvm::Pair::new(p1, p2); return hvm::Port::new(hvm::CON, index as hvm::Val); } Tree::Dup { fst, snd } => { def.safe = false; let index = def.node.len(); def.node.push(hvm::Pair(0)); let p1 = fst.build(def, fids, vars); let p2 = snd.build(def, fids, vars); def.node[index] = hvm::Pair::new(p1, p2); return hvm::Port::new(hvm::DUP, index as hvm::Val); }, Tree::Opr { fst, snd } => { let index = def.node.len(); def.node.push(hvm::Pair(0)); let p1 = fst.build(def, fids, vars); let p2 = snd.build(def, fids, vars); def.node[index] = hvm::Pair::new(p1, p2); return hvm::Port::new(hvm::OPR, index as hvm::Val); }, Tree::Swi { fst, snd } => { let index = def.node.len(); def.node.push(hvm::Pair(0)); let p1 = fst.build(def, fids, vars); let p2 = snd.build(def, fids, vars); def.node[index] = hvm::Pair::new(p1, p2); return hvm::Port::new(hvm::SWI, index as hvm::Val); }, } } pub fn direct_dependencies<'name>(&'name self) -> BTreeSet<&'name str> { let mut stack: Vec<&Tree> = vec![self]; let mut acc: BTreeSet<&'name str> = BTreeSet::new(); while let Some(curr) = stack.pop() { match curr { Tree::Ref { nam } => { acc.insert(nam); }, Tree::Con { fst, snd } => { stack.push(fst); stack.push(snd); }, Tree::Dup { fst, snd } => { stack.push(fst); stack.push(snd); }, Tree::Opr { fst, snd } => { stack.push(fst); stack.push(snd); }, Tree::Swi { fst, snd } => { stack.push(fst); stack.push(snd); }, Tree::Num { val } => {}, Tree::Var { nam } => {}, Tree::Era => {}, }; } acc } } impl Net { pub fn build(&self, def: &mut hvm::Def, fids: &BTreeMap, vars: &mut BTreeMap) { let index = def.node.len(); def.root = self.root.build(def, fids, vars); for (par, fst, snd) in &self.rbag { let index = def.rbag.len(); def.rbag.push(hvm::Pair(0)); let p1 = fst.build(def, fids, vars); let p2 = snd.build(def, fids, vars); let rx = hvm::Pair::new(p1, p2); let rx = if *par { rx.set_par_flag() } else { rx }; def.rbag[index] = rx; } } } impl Book { pub fn parse(code: &str) -> ParseResult { CoreParser::new(code).parse_book() } pub fn build(&self) -> hvm::Book { let mut name_to_fid = BTreeMap::new(); let mut fid_to_name = BTreeMap::new(); fid_to_name.insert(0, "main".to_string()); name_to_fid.insert("main".to_string(), 0); for (_i, (name, _)) in self.defs.iter().enumerate() { if name != "main" { fid_to_name.insert(name_to_fid.len() as hvm::Val, name.clone()); name_to_fid.insert(name.clone(), name_to_fid.len() as hvm::Val); } } let mut book = hvm::Book { defs: Vec::new() }; for (fid, name) in &fid_to_name { let ast_def = self.defs.get(name).expect("missing `@main` definition"); let mut def = hvm::Def { name: name.clone(), safe: true, root: hvm::Port(0), rbag: vec![], node: vec![], vars: 0, }; ast_def.build(&mut def, &name_to_fid, &mut BTreeMap::new()); book.defs.push(def); } self.propagate_safety(&mut book, &name_to_fid); return book; } /// Propagate unsafe definitions to those that reference them. /// /// When calling this function, it is expected that definitions that are directly /// unsafe are already marked as such in the `compiled_book`. /// /// This does not completely solve the cloning safety in HVM. It only stops invalid /// **global** definitions from being cloned, but local unsafe code can still be /// cloned and can generate seemingly unexpected results, such as placing eraser /// nodes in weird places. See HVM issue [#362](https://github.com/HigherOrderCO/HVM/issues/362) /// for an example. fn propagate_safety(&self, compiled_book: &mut hvm::Book, lookup: &BTreeMap) { let dependents = self.direct_dependents(); let mut stack: Vec<&str> = Vec::new(); for (name, _) in self.defs.iter() { let def = &mut compiled_book.defs[lookup[name] as usize]; if !def.safe { for next in dependents[name.as_str()].iter() { stack.push(next); } } } while let Some(curr) = stack.pop() { let def = &mut compiled_book.defs[lookup[curr] as usize]; if !def.safe { // Already visited, skip this continue; } def.safe = false; for &next in dependents[curr].iter() { stack.push(next); } } } /// Calculates the dependents of each definition, that is, if definition `A` /// requires `B`, `B: A` is in the return map. This is used to propagate unsafe /// definitions to others that depend on them. /// /// This solution has linear complexity on the number of definitions in the /// book and the number of direct references in each definition, but it also /// traverses each definition's trees entirely once. /// /// Complexity: O(d*t + r) /// - `d` is the number of definitions in the book /// - `r` is the number of direct references in each definition /// - `t` is the number of nodes in each tree fn direct_dependents<'name>(&'name self) -> BTreeMap<&'name str, BTreeSet<&'name str>> { let mut result = BTreeMap::new(); for (name, _) in self.defs.iter() { result.insert(name.as_str(), BTreeSet::new()); } let mut process = |tree: &'name Tree, name: &'name str| { for dependency in tree.direct_dependencies() { result .get_mut(dependency) .expect("global definition depends on undeclared reference") .insert(name); } }; for (name, net) in self.defs.iter() { process(&net.root, name); for (_, r1, r2) in net.rbag.iter() { process(r1, name); process(r2, name); } } result } } ================================================ FILE: src/cmp.rs ================================================ use crate::ast; use crate::hvm; use std::collections::HashMap; #[derive(Clone, Copy, PartialEq, Eq)] pub enum Target { CUDA, C } // Compiles a whole Book. pub fn compile_book(trg: Target, book: &hvm::Book) -> String { let mut code = String::new(); // Compiles functions for fid in 0..book.defs.len() { compile_def(trg, &mut code, book, 0, fid as hvm::Val); code.push_str(&format!("\n")); } // Compiles interact_call if trg == Target::CUDA { code.push_str(&format!("__device__ ")); } code.push_str(&format!("bool interact_call(Net *net, TM *tm, Port a, Port b) {{\n")); code.push_str(&format!(" u32 fid = get_val(a) & 0xFFFFFFF;\n")); code.push_str(&format!(" switch (fid) {{\n")); for (fid, def) in book.defs.iter().enumerate() { code.push_str(&format!(" case {}: return interact_call_{}(net, tm, a, b);\n", fid, &def.name.replace("/","_").replace(".","_").replace("-","_"))); } code.push_str(&format!(" default: return false;\n")); code.push_str(&format!(" }}\n")); code.push_str(&format!("}}")); return code; } // Compiles a single Def. pub fn compile_def(trg: Target, code: &mut String, book: &hvm::Book, tab: usize, fid: hvm::Val) { let def = &book.defs[fid as usize]; let fun = &def.name.replace("/","_").replace(".","_").replace("-","_"); // Initializes context let neo = &mut 0; // Generates function if trg == Target::CUDA { code.push_str(&format!("__device__ ")); } code.push_str(&format!("{}bool interact_call_{}(Net *net, TM *tm, Port a, Port b) {{\n", indent(tab), fun)); // Fast DUP-REF if def.safe { code.push_str(&format!("{}if (get_tag(b) == DUP) {{\n", indent(tab+1))); code.push_str(&format!("{}return interact_eras(net, tm, a, b);\n", indent(tab+2))); code.push_str(&format!("{}}}\n", indent(tab+1))); } code.push_str(&format!("{}u32 vl = 0;\n", indent(tab+1))); code.push_str(&format!("{}u32 nl = 0;\n", indent(tab+1))); // Allocs resources (using fast allocator) for i in 0 .. def.vars { code.push_str(&format!("{}Val v{:x} = vars_alloc_1(net, tm, &vl);\n", indent(tab+1), i)); } for i in 0 .. def.node.len() { code.push_str(&format!("{}Val n{:x} = node_alloc_1(net, tm, &nl);\n", indent(tab+1), i)); } code.push_str(&format!("{}if (0", indent(tab+1))); for i in 0 .. def.vars { code.push_str(&format!(" || !v{:x}", i)); } for i in 0 .. def.node.len() { code.push_str(&format!(" || !n{:x}", i)); } code.push_str(&format!(") {{\n")); code.push_str(&format!("{}return false;\n", indent(tab+2))); code.push_str(&format!("{}}}\n", indent(tab+1))); for i in 0 .. def.vars { code.push_str(&format!("{}vars_create(net, v{:x}, NONE);\n", indent(tab+1), i)); } // Allocs resources (using slow allocator) //code.push_str(&format!("{}// Allocates needed resources.\n", indent(tab+1))); //code.push_str(&format!("{}if (!get_resources(net, tm, {}, {}, {})) {{\n", indent(tab+1), def.rbag.len()+1, def.node.len(), def.vars)); //code.push_str(&format!("{}return false;\n", indent(tab+2))); //code.push_str(&format!("{}}}\n", indent(tab+1))); //for i in 0 .. def.node.len() { //code.push_str(&format!("{}Val n{:x} = tm->nloc[0x{:x}];\n", indent(tab+1), i, i)); //} //for i in 0 .. def.vars { //code.push_str(&format!("{}Val v{:x} = tm->vloc[0x{:x}];\n", indent(tab+1), i, i)); //} //for i in 0 .. def.vars { //code.push_str(&format!("{}vars_create(net, v{:x}, NONE);\n", indent(tab+1), i)); //} // Compiles root compile_link_fast(trg, code, book, neo, tab+1, def, def.root, "b"); // Compiles rbag for redex in &def.rbag { let fun = compile_node(trg, code, book, neo, tab+1, def, redex.get_fst()); let arg = compile_node(trg, code, book, neo, tab+1, def, redex.get_snd()); code.push_str(&format!("{}link(net, tm, {}, {});\n", indent(tab+1), &fun, &arg)); } // Return code.push_str(&format!("{}return true;\n", indent(tab+1))); code.push_str(&format!("{}}}\n", indent(tab))); } // Compiles a link, performing some pre-defined static reductions. pub fn compile_link_fast(trg: Target, code: &mut String, book: &hvm::Book, neo: &mut usize, tab: usize, def: &hvm::Def, a: hvm::Port, b: &str) { // ( a2) <~ (#X R) // --------------------------------- fast SWITCH // if X == 0: // a111 <~ R // a112 <~ ERAS // else: // a111 <~ ERAS // a112 <~ (#(X-1) R) if trg != Target::CUDA && a.get_tag() == hvm::CON { let a_ = &def.node[a.get_val() as usize]; let a1 = a_.get_fst(); let a2 = a_.get_snd(); if a1.get_tag() == hvm::SWI { let a1_ = &def.node[a1.get_val() as usize]; let a11 = a1_.get_fst(); let a12 = a1_.get_snd(); if a11.get_tag() == hvm::CON && a2.get_tag() == hvm::VAR && a12.0 == a2.0 { let a11_ = &def.node[a11.get_val() as usize]; let a111 = a11_.get_fst(); let a112 = a11_.get_snd(); let op = fresh(neo); let bv = fresh(neo); let x1 = fresh(neo); let x2 = fresh(neo); let nu = fresh(neo); code.push_str(&format!("{}bool {} = 0;\n", indent(tab), &op)); code.push_str(&format!("{}Pair {} = 0;\n", indent(tab), &bv)); code.push_str(&format!("{}Port {} = NONE;\n", indent(tab), &nu)); code.push_str(&format!("{}Port {} = NONE;\n", indent(tab), &x1)); code.push_str(&format!("{}Port {} = NONE;\n", indent(tab), &x2)); code.push_str(&format!("{}//fast switch\n", indent(tab))); code.push_str(&format!("{}if (get_tag({}) == CON) {{\n", indent(tab), b)); code.push_str(&format!("{}{} = node_load(net, get_val({}));\n", indent(tab+1), &bv, b)); // recycled code.push_str(&format!("{}{} = enter(net,get_fst({}));\n", indent(tab+1), &nu, &bv)); code.push_str(&format!("{}if (get_tag({}) == NUM) {{\n", indent(tab+1), &nu)); code.push_str(&format!("{}tm->itrs += 3;\n", indent(tab+2))); code.push_str(&format!("{}vars_take(net, v{});\n", indent(tab+2), a2.get_val())); code.push_str(&format!("{}{} = 1;\n", indent(tab+2), &op)); code.push_str(&format!("{}if (get_u24(get_val({})) == 0) {{\n", indent(tab+2), &nu)); code.push_str(&format!("{}node_take(net, get_val({}));\n", indent(tab+3), b)); code.push_str(&format!("{}{} = get_snd({});\n", indent(tab+3), &x1, &bv)); code.push_str(&format!("{}{} = new_port(ERA,0);\n", indent(tab+3), &x2)); code.push_str(&format!("{}}} else {{\n", indent(tab+2))); code.push_str(&format!("{}node_store(net, get_val({}), new_pair(new_port(NUM,new_u24(get_u24(get_val({}))-1)), get_snd({})));\n", indent(tab+3), b, &nu, &bv)); code.push_str(&format!("{}{} = new_port(ERA,0);\n", indent(tab+3), &x1)); code.push_str(&format!("{}{} = {};\n", indent(tab+3), &x2, b)); code.push_str(&format!("{}}}\n", indent(tab+2))); code.push_str(&format!("{}}} else {{\n", indent(tab+1))); code.push_str(&format!("{}node_store(net, get_val({}), new_pair({},get_snd({})));\n", indent(tab+2), b, &nu, &bv)); // update "entered" var code.push_str(&format!("{}}}\n", indent(tab+1))); code.push_str(&format!("{}}}\n", indent(tab+0))); compile_link_fast(trg, code, book, neo, tab, def, a111, &x1); compile_link_fast(trg, code, book, neo, tab, def, a112, &x2); code.push_str(&format!("{}if (!{}) {{\n", indent(tab), &op)); code.push_str(&format!("{}node_create(net, n{:x}, new_pair(new_port(SWI,n{}),new_port(VAR,v{})));\n", indent(tab+1), a.get_val(), a1.get_val(), a2.get_val())); code.push_str(&format!("{}node_create(net, n{:x}, new_pair(new_port(CON,n{}),new_port(VAR,v{})));\n", indent(tab+1), a1.get_val(), a11.get_val(), a12.get_val())); code.push_str(&format!("{}node_create(net, n{:x}, new_pair({},{}));\n", indent(tab+1), a11.get_val(), &x1, &x2)); link_or_store(trg, code, book, neo, tab+1, def, &format!("new_port(CON, n{:x})", a.get_val()), b); code.push_str(&format!("{}}}\n", indent(tab))); return; } } } // FIXME: REVIEW // <+ #B r> <~ #A // --------------- fast OPER // r <~ #(op(A,B)) if trg != Target::CUDA && a.get_tag() == hvm::OPR { let a_ = &def.node[a.get_val() as usize]; let a1 = a_.get_fst(); let a2 = a_.get_snd(); let op = fresh(neo); let x1 = compile_node(trg, code, book, neo, tab, def, a1); let x2 = fresh(neo); code.push_str(&format!("{}bool {} = 0;\n", indent(tab), &op)); code.push_str(&format!("{}Port {} = NONE;\n", indent(tab), &x2)); code.push_str(&format!("{}// fast oper\n", indent(tab))); code.push_str(&format!("{}if (get_tag({}) == NUM && get_tag({}) == NUM) {{\n", indent(tab), b, &x1)); code.push_str(&format!("{}tm->itrs += 1;\n", indent(tab+1))); code.push_str(&format!("{}{} = 1;\n", indent(tab+1), &op)); code.push_str(&format!("{}{} = new_port(NUM, operate(get_val({}), get_val({})));\n", indent(tab+1), &x2, b, &x1)); code.push_str(&format!("{}}}\n", indent(tab))); compile_link_fast(trg, code, book, neo, tab, def, a2, &x2); code.push_str(&format!("{}if (!{}) {{\n", indent(tab), &op)); code.push_str(&format!("{}node_create(net, n{:x}, new_pair({},{}));\n", indent(tab+1), a.get_val(), &x1, &x2)); link_or_store(trg, code, book, neo, tab+1, def, &format!("new_port(OPR, n{:x})", a.get_val()), b); code.push_str(&format!("{}}}\n", indent(tab))); return; } // FIXME: REVIEW // {a1 a2} <~ #v // ------------- Fast COPY // a1 <~ #v // a2 <~ #v if trg != Target::CUDA && a.get_tag() == hvm::DUP { let a_ = &def.node[a.get_val() as usize]; let p1 = a_.get_fst(); let p2 = a_.get_snd(); let op = fresh(neo); let x1 = fresh(neo); let x2 = fresh(neo); code.push_str(&format!("{}bool {} = 0;\n", indent(tab), &op)); code.push_str(&format!("{}Port {} = NONE;\n", indent(tab), &x1)); code.push_str(&format!("{}Port {} = NONE;\n", indent(tab), &x2)); code.push_str(&format!("{}// fast copy\n", indent(tab))); code.push_str(&format!("{}if (get_tag({}) == NUM) {{\n", indent(tab), b)); code.push_str(&format!("{}tm->itrs += 1;\n", indent(tab+1))); code.push_str(&format!("{}{} = 1;\n", indent(tab+1), &op)); code.push_str(&format!("{}{} = {};\n", indent(tab+1), &x1, b)); code.push_str(&format!("{}{} = {};\n", indent(tab+1), &x2, b)); code.push_str(&format!("{}}}\n", indent(tab))); compile_link_fast(trg, code, book, neo, tab, def, p2, &x2); compile_link_fast(trg, code, book, neo, tab, def, p1, &x1); code.push_str(&format!("{}if (!{}) {{\n", indent(tab), &op)); code.push_str(&format!("{}node_create(net, n{:x}, new_pair({},{}));\n", indent(tab+1), a.get_val(), x1, x2)); link_or_store(trg, code, book, neo, tab+1, def, &format!("new_port(DUP,n{:x})", a.get_val()), b); code.push_str(&format!("{}}}\n", indent(tab))); return; } // (a1 a2) <~ (x1 x2) // ------------------ Fast ANNI // a1 <~ x1 // a2 <~ x2 if trg != Target::CUDA && a.get_tag() == hvm::CON { let a_ = &def.node[a.get_val() as usize]; let a1 = a_.get_fst(); let a2 = a_.get_snd(); let op = fresh(neo); let bv = fresh(neo); let x1 = fresh(neo); let x2 = fresh(neo); code.push_str(&format!("{}bool {} = 0;\n", indent(tab), &op)); code.push_str(&format!("{}Pair {} = 0;\n", indent(tab), &bv)); code.push_str(&format!("{}Port {} = NONE;\n", indent(tab), &x1)); code.push_str(&format!("{}Port {} = NONE;\n", indent(tab), &x2)); code.push_str(&format!("{}// fast anni\n", indent(tab))); code.push_str(&format!("{}if (get_tag({}) == CON && node_load(net, get_val({})) != 0) {{\n", indent(tab), b, b)); //code.push_str(&format!("{}atomic_fetch_add(&FAST, 1);\n", indent(tab+1))); code.push_str(&format!("{}tm->itrs += 1;\n", indent(tab+1))); code.push_str(&format!("{}{} = 1;\n", indent(tab+1), &op)); code.push_str(&format!("{}{} = node_take(net, get_val({}));\n", indent(tab+1), &bv, b)); code.push_str(&format!("{}{} = get_fst({});\n", indent(tab+1), x1, &bv)); code.push_str(&format!("{}{} = get_snd({});\n", indent(tab+1), x2, &bv)); code.push_str(&format!("{}}}\n", indent(tab))); //code.push_str(&format!("{}else {{ atomic_fetch_add(&SLOW, 1); }}\n", indent(tab))); compile_link_fast(trg, code, book, neo, tab, def, a2, &x2); compile_link_fast(trg, code, book, neo, tab, def, a1, &x1); code.push_str(&format!("{}if (!{}) {{\n", indent(tab), &op)); code.push_str(&format!("{}node_create(net, n{:x}, new_pair({},{}));\n", indent(tab+1), a.get_val(), x1, x2)); link_or_store(trg, code, book, neo, tab+1, def, &format!("new_port(CON,n{:x})", a.get_val()), b); code.push_str(&format!("{}}}\n", indent(tab))); return; } // FIXME: since get_tag(NONE) == REF, comparing if something's tag is REF always has the issue of // returning true when that thing is NONE. this caused a bug in the optimization below. in // general, this is a potential source of bugs across the entire implementation, so we always // need to check that case. an alternative, of course, would be to make get_tag handle this, but // I'm concerned about the performance issues. so, instead, we should make sure that, across the // entire codebase, we never use get_tag expecting a REF on something that might be NONE // ATOM <~ * // --------- Fast VOID // nothing if trg != Target::CUDA && (a.get_tag() == hvm::NUM || a.get_tag() == hvm::ERA) { code.push_str(&format!("{}// fast void\n", indent(tab))); code.push_str(&format!("{}if (get_tag({}) == ERA || get_tag({}) == NUM) {{\n", indent(tab), b, b)); code.push_str(&format!("{}tm->itrs += 1;\n", indent(tab+1))); code.push_str(&format!("{}}} else {{\n", indent(tab))); compile_link_slow(trg, code, book, neo, tab+1, def, a, b); code.push_str(&format!("{}}}\n", indent(tab))); return; } compile_link_slow(trg, code, book, neo, tab, def, a, b); } // Compiles a link, without pre-defined reductions. pub fn compile_link_slow(trg: Target, code: &mut String, book: &hvm::Book, neo: &mut usize, tab: usize, def: &hvm::Def, a: hvm::Port, b: &str) { let a_node = compile_node(trg, code, book, neo, tab, def, a); link_or_store(trg, code, book, neo, tab, def, &a_node, b); } // TODO: comment pub fn link_or_store(trg: Target, code: &mut String, book: &hvm::Book, neo: &mut usize, tab: usize, def: &hvm::Def, a: &str, b: &str) { code.push_str(&format!("{}if ({} != NONE) {{\n", indent(tab), b)); code.push_str(&format!("{}link(net, tm, {}, {});\n", indent(tab+1), a, b)); code.push_str(&format!("{}}} else {{\n", indent(tab))); code.push_str(&format!("{}{} = {};\n", indent(tab+1), b, a)); code.push_str(&format!("{}}}\n", indent(tab))); } // Compiles just a node. pub fn compile_node(trg: Target, code: &mut String, book: &hvm::Book, neo: &mut usize, tab: usize, def: &hvm::Def, a: hvm::Port) -> String { if a.is_nod() { let nd = &def.node[a.get_val() as usize]; let p1 = compile_node(trg, code, book, neo, tab, def, nd.get_fst()); let p2 = compile_node(trg, code, book, neo, tab, def, nd.get_snd()); code.push_str(&format!("{}node_create(net, n{:x}, new_pair({},{}));\n", indent(tab), a.get_val(), p1, p2)); return format!("new_port({},n{:x})", compile_tag(trg, a.get_tag()), a.get_val()); } else if a.is_var() { return format!("new_port(VAR,v{:x})", a.get_val()); } else { return format!("new_port({},0x{:08x})", compile_tag(trg, a.get_tag()), a.get_val()); } } // Compiles an atomic port. //fn compile_atom(trg: Target, port: hvm::Port) -> String { //return format!("new_port({},0x{:08x})/*atom*/", compile_tag(trg, port.get_tag()), port.get_val()); //} // Compiles a tag. pub fn compile_tag(trg: Target, tag: hvm::Tag) -> &'static str { match tag { hvm::VAR => "VAR", hvm::REF => "REF", hvm::ERA => "ERA", hvm::NUM => "NUM", hvm::OPR => "OPR", hvm::SWI => "SWI", hvm::CON => "CON", hvm::DUP => "DUP", _ => unreachable!(), } } // Creates indentation. pub fn indent(tab: usize) -> String { return " ".repeat(tab); } // Generates a fresh name. fn fresh(count: &mut usize) -> String { *count += 1; format!("k{}", count) } ================================================ FILE: src/hvm.c ================================================ #include #include #include #include #include #include #include #include #include #ifdef DEBUG #define debug(...) fprintf(stderr, __VA_ARGS__) #else #define debug(...) #endif #define INTERPRETED #define WITHOUT_MAIN // Types // -------- typedef uint8_t u8; typedef uint16_t u16; typedef uint32_t u32; typedef uint64_t u64; typedef int32_t i32; typedef float f32; typedef double f64; typedef _Atomic(u8) a8; typedef _Atomic(u16) a16; typedef _Atomic(u32) a32; typedef _Atomic(u64) a64; // Configuration // ------------- // Threads per CPU #ifndef TPC_L2 #define TPC_L2 0 #endif #define TPC (1ul << TPC_L2) // Types // ----- // Local Types typedef u8 Tag; // Tag ::= 3-bit (rounded up to u8) typedef u32 Val; // Val ::= 29-bit (rounded up to u32) typedef u32 Port; // Port ::= Tag + Val (fits a u32) typedef u64 Pair; // Pair ::= Port + Port (fits a u64) typedef a32 APort; // atomic Port typedef a64 APair; // atomic Pair // Rules typedef u8 Rule; // Rule ::= 3-bit (rounded up to 8) // Numbs typedef u32 Numb; // Numb ::= 29-bit (rounded up to u32) // Tags #define VAR 0x0 // variable #define REF 0x1 // reference #define ERA 0x2 // eraser #define NUM 0x3 // number #define CON 0x4 // constructor #define DUP 0x5 // duplicator #define OPR 0x6 // operator #define SWI 0x7 // switch // Interaction Rule Values #define LINK 0x0 #define CALL 0x1 #define VOID 0x2 #define ERAS 0x3 #define ANNI 0x4 #define COMM 0x5 #define OPER 0x6 #define SWIT 0x7 // Numbers static const f32 U24_MAX = (f32) (1 << 24) - 1; static const f32 U24_MIN = 0.0; static const f32 I24_MAX = (f32) (1 << 23) - 1; static const f32 I24_MIN = (f32) (i32) ((-1u) << 23); #define TY_SYM 0x00 #define TY_U24 0x01 #define TY_I24 0x02 #define TY_F24 0x03 #define OP_ADD 0x04 #define OP_SUB 0x05 #define FP_SUB 0x06 #define OP_MUL 0x07 #define OP_DIV 0x08 #define FP_DIV 0x09 #define OP_REM 0x0A #define FP_REM 0x0B #define OP_EQ 0x0C #define OP_NEQ 0x0D #define OP_LT 0x0E #define OP_GT 0x0F #define OP_AND 0x10 #define OP_OR 0x11 #define OP_XOR 0x12 #define OP_SHL 0x13 #define FP_SHL 0x14 #define OP_SHR 0x15 #define FP_SHR 0x16 // Constants #define FREE 0x00000000 #define ROOT 0xFFFFFFF8 #define NONE 0xFFFFFFFF // Cache Padding #define CACHE_PAD 64 // Global Net #define HLEN (1ul << 16) // max 16k high-priority redexes #define RLEN (1ul << 24) // max 16m low-priority redexes #define G_NODE_LEN (1ul << 29) // max 536m nodes #define G_VARS_LEN (1ul << 29) // max 536m vars #define G_RBAG_LEN (TPC * RLEN) typedef struct Net { APair node_buf[G_NODE_LEN]; // global node buffer APort vars_buf[G_VARS_LEN]; // global vars buffer APair rbag_buf[G_RBAG_LEN]; // global rbag buffer a64 itrs; // interaction count a32 idle; // idle thread counter } Net; #define DEF_RBAG_LEN 0xFFF #define DEF_NODE_LEN 0xFFF // Top-Level Definition typedef struct Def { char name[256]; bool safe; u32 rbag_len; u32 node_len; u32 vars_len; Port root; Pair node_buf[DEF_NODE_LEN]; Pair rbag_buf[DEF_RBAG_LEN]; } Def; typedef struct Book Book; // A Foreign Function typedef struct { char name[256]; Port (*func)(Net*, Book*, Port); } FFn; // Book of Definitions typedef struct Book { u32 defs_len; Def defs_buf[0x4000]; u32 ffns_len; FFn ffns_buf[0x4000]; } Book; // Local Thread Memory typedef struct TM { u32 tid; // thread id u32 itrs; // interaction count u32 nput; // next node allocation attempt index u32 vput; // next vars allocation attempt index u32 hput; // next hbag push index u32 rput; // next rbag push index u32 sidx; // steal index u32 nloc[0xFFF]; // global node allocation indices u32 vloc[0xFFF]; // global vars allocation indices Pair hbag_buf[HLEN]; // high-priority redexes } TM; // Debugger // -------- typedef struct { char x[13]; } Show; void put_u16(char* B, u16 val); Show show_port(Port port); Show show_rule(Rule rule); void print_net(Net* net); void pretty_print_numb(Numb word); void pretty_print_port(Net* net, Book* book, Port port); // Port: Constructor and Getters // ----------------------------- static inline Port new_port(Tag tag, Val val) { return (val << 3) | tag; } static inline Tag get_tag(Port port) { return port & 7; } static inline Val get_val(Port port) { return port >> 3; } // Pair: Constructor and Getters // ----------------------------- static inline const Pair new_pair(Port fst, Port snd) { return ((u64)snd << 32) | fst; } static inline Port get_fst(Pair pair) { return pair & 0xFFFFFFFF; } static inline Port get_snd(Pair pair) { return pair >> 32; } Pair set_par_flag(Pair pair) { Port p1 = get_fst(pair); Port p2 = get_snd(pair); if (get_tag(p1) == REF) { return new_pair(new_port(get_tag(p1), get_val(p1) | 0x10000000), p2); } else { return pair; } } Pair clr_par_flag(Pair pair) { Port p1 = get_fst(pair); Port p2 = get_snd(pair); if (get_tag(p1) == REF) { return new_pair(new_port(get_tag(p1), get_val(p1) & 0xFFFFFFF), p2); } else { return pair; } } bool get_par_flag(Pair pair) { Port p1 = get_fst(pair); if (get_tag(p1) == REF) { return (get_val(p1) >> 28) == 1; } else { return false; } } // Utils // ----- // Swaps two ports. static inline void swap(Port *a, Port *b) { Port x = *a; *a = *b; *b = x; } static inline u32 min(u32 a, u32 b) { return (a < b) ? a : b; } static inline f32 clamp(f32 x, f32 min, f32 max) { const f32 t = x < min ? min : x; return (t > max) ? max : t; } // A simple spin-wait barrier using atomic operations a64 a_reached = 0; // number of threads that reached the current barrier a64 a_barrier = 0; // number of barriers passed during this program void sync_threads() { u64 barrier_old = atomic_load_explicit(&a_barrier, memory_order_relaxed); if (atomic_fetch_add_explicit(&a_reached, 1, memory_order_relaxed) == (TPC - 1)) { // Last thread to reach the barrier resets the counter and advances the barrier atomic_store_explicit(&a_reached, 0, memory_order_relaxed); atomic_store_explicit(&a_barrier, barrier_old + 1, memory_order_release); } else { u32 tries = 0; while (atomic_load_explicit(&a_barrier, memory_order_acquire) == barrier_old) { sched_yield(); } } } // Global sum function static a32 GLOBAL_SUM = 0; u32 global_sum(u32 x) { atomic_fetch_add_explicit(&GLOBAL_SUM, x, memory_order_relaxed); sync_threads(); u32 sum = atomic_load_explicit(&GLOBAL_SUM, memory_order_relaxed); sync_threads(); atomic_store_explicit(&GLOBAL_SUM, 0, memory_order_relaxed); return sum; } // TODO: write a time64() function that returns the time as fast as possible as a u64 static inline u64 time64() { struct timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts); return (u64)ts.tv_sec * 1000000000ULL + (u64)ts.tv_nsec; } // Ports / Pairs / Rules // --------------------- // True if this port has a pointer to a node. static inline bool is_nod(Port a) { return get_tag(a) >= CON; } // True if this port is a variable. static inline bool is_var(Port a) { return get_tag(a) == VAR; } // Given two tags, gets their interaction rule. static inline Rule get_rule(Port a, Port b) { const u8 table[8][8] = { //VAR REF ERA NUM CON DUP OPR SWI {LINK,LINK,LINK,LINK,LINK,LINK,LINK,LINK}, // VAR {LINK,VOID,VOID,VOID,CALL,CALL,CALL,CALL}, // REF {LINK,VOID,VOID,VOID,ERAS,ERAS,ERAS,ERAS}, // ERA {LINK,VOID,VOID,VOID,ERAS,ERAS,OPER,SWIT}, // NUM {LINK,CALL,ERAS,ERAS,ANNI,COMM,COMM,COMM}, // CON {LINK,CALL,ERAS,ERAS,COMM,ANNI,COMM,COMM}, // DUP {LINK,CALL,ERAS,OPER,COMM,COMM,ANNI,COMM}, // OPR {LINK,CALL,ERAS,SWIT,COMM,COMM,COMM,ANNI}, // SWI }; return table[get_tag(a)][get_tag(b)]; } // Same as above, but receiving a pair. static inline Rule get_pair_rule(Pair AB) { return get_rule(get_fst(AB), get_snd(AB)); } // Should we swap ports A and B before reducing this rule? static inline bool should_swap(Port A, Port B) { return get_tag(B) < get_tag(A); } // Gets a rule's priority static inline bool is_high_priority(Rule rule) { // TODO: this needs to be more readable return (bool)((0b00011101 >> rule) & 1); } // Adjusts a newly allocated port. static inline Port adjust_port(Net* net, TM* tm, Port port) { Tag tag = get_tag(port); Val val = get_val(port); if (is_nod(port)) return new_port(tag, tm->nloc[val]); if (is_var(port)) return new_port(tag, tm->vloc[val]); return new_port(tag, val); } // Adjusts a newly allocated pair. static inline Pair adjust_pair(Net* net, TM* tm, Pair pair) { Port p1 = adjust_port(net, tm, get_fst(pair)); Port p2 = adjust_port(net, tm, get_snd(pair)); return new_pair(p1, p2); } // Numbs // ----- // Constructor and getters for SYM (operation selector) static inline Numb new_sym(u32 val) { return (val << 5) | TY_SYM; } static inline u32 get_sym(Numb word) { return (word >> 5); } // Constructor and getters for U24 (unsigned 24-bit integer) static inline Numb new_u24(u32 val) { return (val << 5) | TY_U24; } static inline u32 get_u24(Numb word) { return word >> 5; } // Constructor and getters for I24 (signed 24-bit integer) static inline Numb new_i24(i32 val) { return ((u32)val << 5) | TY_I24; } static inline i32 get_i24(Numb word) { return ((i32)word) << 3 >> 8; } // Constructor and getters for F24 (24-bit float) static inline Numb new_f24(float val) { u32 bits = *(u32*)&val; u32 shifted_bits = bits >> 8; u32 lost_bits = bits & 0xFF; // round ties to even shifted_bits += (!isnan(val)) & ((lost_bits - ((lost_bits >> 7) & !shifted_bits)) >> 7); // ensure NaNs don't become infinities shifted_bits |= isnan(val); return (shifted_bits << 5) | TY_F24; } static inline float get_f24(Numb word) { u32 bits = (word << 3) & 0xFFFFFF00; return *(float*)&bits; } // Flip flag static inline Tag get_typ(Numb word) { return word & 0x1F; } static inline bool is_num(Numb word) { return get_typ(word) >= TY_U24 && get_typ(word) <= TY_F24; } static inline bool is_cast(Numb word) { return get_typ(word) == TY_SYM && get_sym(word) >= TY_U24 && get_sym(word) <= TY_F24; } // Partial application static inline Numb partial(Numb a, Numb b) { return (b & ~0x1F) | get_sym(a); } // Cast a number to another type. // The semantics are meant to spiritually resemble rust's numeric casts: // - i24 <-> u24: is just reinterpretation of bits // - f24 -> i24, // f24 -> u24: casts to the "closest" integer representing this float, // saturating if out of range and 0 if NaN // - i24 -> f24, // u24 -> f24: casts to the "closest" float representing this integer. static inline Numb cast(Numb a, Numb b) { if (get_sym(a) == TY_U24 && get_typ(b) == TY_U24) return b; if (get_sym(a) == TY_U24 && get_typ(b) == TY_I24) { // reinterpret bits i32 val = get_i24(b); return new_u24(*(u32*) &val); } if (get_sym(a) == TY_U24 && get_typ(b) == TY_F24) { f32 val = get_f24(b); if (isnan(val)) { return new_u24(0); } return new_u24((u32) clamp(val, U24_MIN, U24_MAX)); } if (get_sym(a) == TY_I24 && get_typ(b) == TY_U24) { // reinterpret bits u32 val = get_u24(b); return new_i24(*(i32*) &val); } if (get_sym(a) == TY_I24 && get_typ(b) == TY_I24) return b; if (get_sym(a) == TY_I24 && get_typ(b) == TY_F24) { f32 val = get_f24(b); if (isnan(val)) { return new_i24(0); } return new_i24((i32) clamp(val, I24_MIN, I24_MAX)); } if (get_sym(a) == TY_F24 && get_typ(b) == TY_U24) return new_f24((f32) get_u24(b)); if (get_sym(a) == TY_F24 && get_typ(b) == TY_I24) return new_f24((f32) get_i24(b)); if (get_sym(a) == TY_F24 && get_typ(b) == TY_F24) return b; return new_u24(0); } // Operate function static inline Numb operate(Numb a, Numb b) { Tag at = get_typ(a); Tag bt = get_typ(b); if (at == TY_SYM && bt == TY_SYM) { return new_u24(0); } if (is_cast(a) && is_num(b)) { return cast(a, b); } if (is_cast(b) && is_num(a)) { return cast(b, a); } if (at == TY_SYM && bt != TY_SYM) { return partial(a, b); } if (at != TY_SYM && bt == TY_SYM) { return partial(b, a); } if (at >= OP_ADD && bt >= OP_ADD) { return new_u24(0); } if (at < OP_ADD && bt < OP_ADD) { return new_u24(0); } Tag op, ty; Numb swp; if (at >= OP_ADD) { op = at; ty = bt; } else { op = bt; ty = at; swp = a; a = b; b = swp; } switch (ty) { case TY_U24: { u32 av = get_u24(a); u32 bv = get_u24(b); switch (op) { case OP_ADD: return new_u24(av + bv); case OP_SUB: return new_u24(av - bv); case FP_SUB: return new_u24(bv - av); case OP_MUL: return new_u24(av * bv); case OP_DIV: return new_u24(av / bv); case FP_DIV: return new_u24(bv / av); case OP_REM: return new_u24(av % bv); case FP_REM: return new_u24(bv % av); case OP_EQ: return new_u24(av == bv); case OP_NEQ: return new_u24(av != bv); case OP_LT: return new_u24(av < bv); case OP_GT: return new_u24(av > bv); case OP_AND: return new_u24(av & bv); case OP_OR: return new_u24(av | bv); case OP_XOR: return new_u24(av ^ bv); case OP_SHL: return new_u24(av << (bv & 31)); case FP_SHL: return new_u24(bv << (av & 31)); case OP_SHR: return new_u24(av >> (bv & 31)); case FP_SHR: return new_u24(bv >> (av & 31)); default: return new_u24(0); } } case TY_I24: { i32 av = get_i24(a); i32 bv = get_i24(b); switch (op) { case OP_ADD: return new_i24(av + bv); case OP_SUB: return new_i24(av - bv); case FP_SUB: return new_i24(bv - av); case OP_MUL: return new_i24(av * bv); case OP_DIV: return new_i24(av / bv); case FP_DIV: return new_i24(bv / av); case OP_REM: return new_i24(av % bv); case FP_REM: return new_i24(bv % av); case OP_EQ: return new_u24(av == bv); case OP_NEQ: return new_u24(av != bv); case OP_LT: return new_u24(av < bv); case OP_GT: return new_u24(av > bv); case OP_AND: return new_i24(av & bv); case OP_OR: return new_i24(av | bv); case OP_XOR: return new_i24(av ^ bv); default: return new_i24(0); } } case TY_F24: { float av = get_f24(a); float bv = get_f24(b); switch (op) { case OP_ADD: return new_f24(av + bv); case OP_SUB: return new_f24(av - bv); case FP_SUB: return new_f24(bv - av); case OP_MUL: return new_f24(av * bv); case OP_DIV: return new_f24(av / bv); case FP_DIV: return new_f24(bv / av); case OP_REM: return new_f24(fmodf(av, bv)); case FP_REM: return new_f24(fmodf(bv, av)); case OP_EQ: return new_u24(av == bv); case OP_NEQ: return new_u24(av != bv); case OP_LT: return new_u24(av < bv); case OP_GT: return new_u24(av > bv); case OP_AND: return new_f24(atan2f(av, bv)); case OP_OR: return new_f24(logf(bv) / logf(av)); case OP_XOR: return new_f24(powf(av, bv)); case OP_SHL: return new_f24(sin(av + bv)); case OP_SHR: return new_f24(tan(av + bv)); default: return new_f24(0); } } default: return new_u24(0); } } // RBag // ---- // FIXME: what about some bound checks? static inline void push_redex(Net* net, TM* tm, Pair redex) { #ifdef DEBUG bool free_local = tm->hput < HLEN; bool free_global = tm->rput < RLEN; if (!free_global || !free_local) { debug("push_redex: limited resources, maybe corrupting memory\n"); } #endif if (is_high_priority(get_pair_rule(redex))) { tm->hbag_buf[tm->hput++] = redex; } else { atomic_store_explicit(&net->rbag_buf[tm->tid*(G_RBAG_LEN/TPC) + (tm->rput++)], redex, memory_order_relaxed); } } static inline Pair pop_redex(Net* net, TM* tm) { if (tm->hput > 0) { return tm->hbag_buf[--tm->hput]; } else if (tm->rput > 0) { return atomic_exchange_explicit(&net->rbag_buf[tm->tid*(G_RBAG_LEN/TPC) + (--tm->rput)], 0, memory_order_relaxed); } else { return 0; } } static inline u32 rbag_len(Net* net, TM* tm) { return tm->rput + tm->hput; } // TM // -- static TM* tm[TPC]; TM* tm_new(u32 tid) { TM* tm = malloc(sizeof(TM)); tm->tid = tid; tm->itrs = 0; tm->nput = 1; tm->vput = 1; tm->rput = 0; tm->hput = 0; tm->sidx = 0; return tm; } void alloc_static_tms() { for (u32 t = 0; t < TPC; ++t) { tm[t] = tm_new(t); } } void free_static_tms() { for (u32 t = 0; t < TPC; ++t) { free(tm[t]); } } // Net // ---- // Stores a new node on global. static inline void node_create(Net* net, u32 loc, Pair val) { atomic_store_explicit(&net->node_buf[loc], val, memory_order_relaxed); } // Stores a var on global. static inline void vars_create(Net* net, u32 var, Port val) { atomic_store_explicit(&net->vars_buf[var], val, memory_order_relaxed); } // Reads a node from global. static inline Pair node_load(Net* net, u32 loc) { return atomic_load_explicit(&net->node_buf[loc], memory_order_relaxed); } // Reads a var from global. static inline Port vars_load(Net* net, u32 var) { return atomic_load_explicit(&net->vars_buf[var], memory_order_relaxed); } // Stores a node on global. static inline void node_store(Net* net, u32 loc, Pair val) { atomic_store_explicit(&net->node_buf[loc], val, memory_order_relaxed); } // Exchanges a node on global by a value. Returns old. static inline Pair node_exchange(Net* net, u32 loc, Pair val) { return atomic_exchange_explicit(&net->node_buf[loc], val, memory_order_relaxed); } // Exchanges a var on global by a value. Returns old. static inline Port vars_exchange(Net* net, u32 var, Port val) { return atomic_exchange_explicit(&net->vars_buf[var], val, memory_order_relaxed); } // Takes a node. static inline Pair node_take(Net* net, u32 loc) { return node_exchange(net, loc, 0); } // Takes a var. static inline Port vars_take(Net* net, u32 var) { return vars_exchange(net, var, 0); } // Net // --- // Initializes a net. static inline Net* net_new() { Net* net = calloc(1, sizeof(Net)); atomic_store(&net->itrs, 0); atomic_store(&net->idle, 0); return net; } // Allocator // --------- u32 node_alloc_1(Net* net, TM* tm, u32* lps) { while (true) { u32 lc = tm->tid*(G_NODE_LEN/TPC) + (tm->nput%(G_NODE_LEN/TPC)); Pair elem = net->node_buf[lc]; tm->nput += 1; if (lc > 0 && elem == 0) { return lc; } // FIXME: check this decently if (++(*lps) >= G_NODE_LEN/TPC) printf("OOM\n"); } } u32 vars_alloc_1(Net* net, TM* tm, u32* lps) { while (true) { u32 lc = tm->tid*(G_NODE_LEN/TPC) + (tm->vput%(G_NODE_LEN/TPC)); Port elem = net->vars_buf[lc]; tm->vput += 1; if (lc > 0 && elem == 0) { return lc; } // FIXME: check this decently if (++(*lps) >= G_NODE_LEN/TPC) printf("OOM\n"); } } u32 node_alloc(Net* net, TM* tm, u32 num) { u32 got = 0; u32 lps = 0; while (got < num) { u32 lc = tm->tid*(G_NODE_LEN/TPC) + (tm->nput%(G_NODE_LEN/TPC)); Pair elem = net->node_buf[lc]; tm->nput += 1; if (lc > 0 && elem == 0) { tm->nloc[got++] = lc; } // FIXME: check this decently if (++lps >= G_NODE_LEN/TPC) printf("OOM\n"); } return got; } u32 vars_alloc(Net* net, TM* tm, u32 num) { u32 got = 0; u32 lps = 0; while (got < num) { u32 lc = tm->tid*(G_NODE_LEN/TPC) + (tm->vput%(G_NODE_LEN/TPC)); Port elem = net->vars_buf[lc]; tm->vput += 1; if (lc > 0 && elem == 0) { tm->vloc[got++] = lc; } // FIXME: check this decently if (++lps >= G_NODE_LEN/TPC) printf("OOM\n"); } return got; } // Gets the necessary resources for an interaction. Returns success. static inline bool get_resources(Net* net, TM* tm, u32 need_rbag, u32 need_node, u32 need_vars) { u32 got_rbag = min(RLEN - tm->rput, HLEN - tm->hput); u32 got_node = node_alloc(net, tm, need_node); u32 got_vars = vars_alloc(net, tm, need_vars); return got_rbag >= need_rbag && got_node >= need_node && got_vars >= need_vars; } // Linking // ------- // Peeks a variable's final target without modifying it. static inline Port peek(Net* net, Port var) { while (get_tag(var) == VAR) { Port val = vars_load(net, get_val(var)); if (val == NONE) break; if (val == 0) break; var = val; } return var; } // Finds a variable's value. static inline Port enter(Net* net, Port var) { // While `B` is VAR: extend it (as an optimization) while (get_tag(var) == VAR) { // Takes the current `var` substitution as `val` Port val = vars_exchange(net, get_val(var), NONE); // If there was no `val`, stop, as there is no extension if (val == NONE || val == 0) { break; } // Otherwise, delete `B` (we own both) and continue vars_take(net, get_val(var)); var = val; } return var; } // Atomically Links `A ~ B`. static inline void link(Net* net, TM* tm, Port A, Port B) { // Attempts to directionally point `A ~> B` while (true) { // If `A` is NODE: swap `A` and `B`, and continue if (get_tag(A) != VAR && get_tag(B) == VAR) { Port X = A; A = B; B = X; } // If `A` is NODE: create the `A ~ B` redex if (get_tag(A) != VAR) { push_redex(net, tm, new_pair(A, B)); // TODO: move global ports to local break; } // Extends B (as an optimization) B = enter(net, B); // Since `A` is VAR: point `A ~> B`. // Stores `A -> B`, taking the current `A` subst as `A'` Port A_ = vars_exchange(net, get_val(A), B); // If there was no `A'`, stop, as we lost B's ownership if (A_ == NONE) { break; } //if (A_ == 0) { ? } // FIXME: must handle on the move-to-global algo // Otherwise, delete `A` (we own both) and link `A' ~ B` vars_take(net, get_val(A)); A = A_; } } // Links `A ~ B` (as a pair). static inline void link_pair(Net* net, TM* tm, Pair AB) { link(net, tm, get_fst(AB), get_snd(AB)); } // Interactions // ------------ // The Link Interaction. static inline bool interact_link(Net* net, TM* tm, Port a, Port b) { // Allocates needed nodes and vars. if (!get_resources(net, tm, 1, 0, 0)) { debug("interact_link: get_resources failed\n"); return false; } // Links. link_pair(net, tm, new_pair(a, b)); return true; } // Declared here for use in call interactions. static inline bool interact_eras(Net* net, TM* tm, Port a, Port b); // The Call Interaction. #ifdef COMPILED ///COMPILED_INTERACT_CALL/// #else static inline bool interact_call(Net* net, TM* tm, Port a, Port b, Book* book) { // Loads Definition. u32 fid = get_val(a) & 0xFFFFFFF; Def* def = &book->defs_buf[fid]; // Copy Optimization. if (def->safe && get_tag(b) == DUP) { return interact_eras(net, tm, a, b); } // Allocates needed nodes and vars. if (!get_resources(net, tm, def->rbag_len + 1, def->node_len, def->vars_len)) { debug("interact_call: get_resources failed\n"); return false; } // Stores new vars. for (u32 i = 0; i < def->vars_len; ++i) { vars_create(net, tm->vloc[i], NONE); } // Stores new nodes. for (u32 i = 0; i < def->node_len; ++i) { node_create(net, tm->nloc[i], adjust_pair(net, tm, def->node_buf[i])); } // Links. for (u32 i = 0; i < def->rbag_len; ++i) { link_pair(net, tm, adjust_pair(net, tm, def->rbag_buf[i])); } link_pair(net, tm, new_pair(adjust_port(net, tm, def->root), b)); return true; } #endif // The Void Interaction. static inline bool interact_void(Net* net, TM* tm, Port a, Port b) { return true; } // The Eras Interaction. static inline bool interact_eras(Net* net, TM* tm, Port a, Port b) { // Allocates needed nodes and vars. if (!get_resources(net, tm, 2, 0, 0)) { debug("interact_eras: get_resources failed\n"); return false; } // Checks availability if (node_load(net, get_val(b)) == 0) { return false; } // Loads ports. Pair B = node_exchange(net, get_val(b), 0); Port B1 = get_fst(B); Port B2 = get_snd(B); // Links. link_pair(net, tm, new_pair(a, B1)); link_pair(net, tm, new_pair(a, B2)); return true; } // The Anni Interaction. static inline bool interact_anni(Net* net, TM* tm, Port a, Port b) { // Allocates needed nodes and vars. if (!get_resources(net, tm, 2, 0, 0)) { debug("interact_anni: get_resources failed\n"); return false; } // Checks availability if (node_load(net, get_val(a)) == 0 || node_load(net, get_val(b)) == 0) { return false; } // Loads ports. Pair A = node_take(net, get_val(a)); Port A1 = get_fst(A); Port A2 = get_snd(A); Pair B = node_take(net, get_val(b)); Port B1 = get_fst(B); Port B2 = get_snd(B); // Links. link_pair(net, tm, new_pair(A1, B1)); link_pair(net, tm, new_pair(A2, B2)); return true; } // The Comm Interaction. static inline bool interact_comm(Net* net, TM* tm, Port a, Port b) { // Allocates needed nodes and vars. if (!get_resources(net, tm, 4, 4, 4)) { debug("interact_comm: get_resources failed\n"); return false; } // Checks availability if (node_load(net, get_val(a)) == 0 || node_load(net, get_val(b)) == 0) { return false; } // Loads ports. Pair A = node_take(net, get_val(a)); Port A1 = get_fst(A); Port A2 = get_snd(A); Pair B = node_take(net, get_val(b)); Port B1 = get_fst(B); Port B2 = get_snd(B); // Stores new vars. vars_create(net, tm->vloc[0], NONE); vars_create(net, tm->vloc[1], NONE); vars_create(net, tm->vloc[2], NONE); vars_create(net, tm->vloc[3], NONE); // Stores new nodes. node_create(net, tm->nloc[0], new_pair(new_port(VAR, tm->vloc[0]), new_port(VAR, tm->vloc[1]))); node_create(net, tm->nloc[1], new_pair(new_port(VAR, tm->vloc[2]), new_port(VAR, tm->vloc[3]))); node_create(net, tm->nloc[2], new_pair(new_port(VAR, tm->vloc[0]), new_port(VAR, tm->vloc[2]))); node_create(net, tm->nloc[3], new_pair(new_port(VAR, tm->vloc[1]), new_port(VAR, tm->vloc[3]))); // Links. link_pair(net, tm, new_pair(new_port(get_tag(b), tm->nloc[0]), A1)); link_pair(net, tm, new_pair(new_port(get_tag(b), tm->nloc[1]), A2)); link_pair(net, tm, new_pair(new_port(get_tag(a), tm->nloc[2]), B1)); link_pair(net, tm, new_pair(new_port(get_tag(a), tm->nloc[3]), B2)); return true; } // The Oper Interaction. static inline bool interact_oper(Net* net, TM* tm, Port a, Port b) { // Allocates needed nodes and vars. if (!get_resources(net, tm, 1, 1, 0)) { debug("interact_oper: get_resources failed\n"); return false; } // Checks availability if (node_load(net, get_val(b)) == 0) { return false; } // Loads ports. Val av = get_val(a); Pair B = node_take(net, get_val(b)); Port B1 = get_fst(B); Port B2 = enter(net, get_snd(B)); // Performs operation. if (get_tag(B1) == NUM) { Val bv = get_val(B1); Numb cv = operate(av, bv); link_pair(net, tm, new_pair(new_port(NUM, cv), B2)); } else { node_create(net, tm->nloc[0], new_pair(a, B2)); link_pair(net, tm, new_pair(B1, new_port(OPR, tm->nloc[0]))); } return true; } // The Swit Interaction. static inline bool interact_swit(Net* net, TM* tm, Port a, Port b) { // Allocates needed nodes and vars. if (!get_resources(net, tm, 1, 2, 0)) { debug("interact_swit: get_resources failed\n"); return false; } // Checks availability if (node_load(net, get_val(b)) == 0) { return false; } // Loads ports. u32 av = get_u24(get_val(a)); Pair B = node_take(net, get_val(b)); Port B1 = get_fst(B); Port B2 = get_snd(B); // Stores new nodes. if (av == 0) { node_create(net, tm->nloc[0], new_pair(B2, new_port(ERA,0))); link_pair(net, tm, new_pair(new_port(CON, tm->nloc[0]), B1)); } else { node_create(net, tm->nloc[0], new_pair(new_port(ERA,0), new_port(CON, tm->nloc[1]))); node_create(net, tm->nloc[1], new_pair(new_port(NUM, new_u24(av-1)), B2)); link_pair(net, tm, new_pair(new_port(CON, tm->nloc[0]), B1)); } return true; } // Pops a local redex and performs a single interaction. static inline bool interact(Net* net, TM* tm, Book* book) { // Pops a redex. Pair redex = pop_redex(net, tm); // If there is no redex, stop. if (redex != 0) { // Gets redex ports A and B. Port a = get_fst(redex); Port b = get_snd(redex); // Gets the rule type. Rule rule = get_rule(a, b); // Used for root redex. if (get_tag(a) == REF && b == ROOT) { rule = CALL; // Swaps ports if necessary. } else if (should_swap(a,b)) { swap(&a, &b); } // Dispatches interaction rule. bool success; switch (rule) { case LINK: success = interact_link(net, tm, a, b); break; #ifdef COMPILED case CALL: success = interact_call(net, tm, a, b); break; #else case CALL: success = interact_call(net, tm, a, b, book); break; #endif case VOID: success = interact_void(net, tm, a, b); break; case ERAS: success = interact_eras(net, tm, a, b); break; case ANNI: success = interact_anni(net, tm, a, b); break; case COMM: success = interact_comm(net, tm, a, b); break; case OPER: success = interact_oper(net, tm, a, b); break; case SWIT: success = interact_swit(net, tm, a, b); break; } // If error, pushes redex back. if (!success) { push_redex(net, tm, redex); return false; // Else, increments the interaction count. } else if (rule != LINK) { tm->itrs += 1; } } return true; } // Evaluator // --------- void evaluator(Net* net, TM* tm, Book* book) { // Initializes the global idle counter atomic_store_explicit(&net->idle, TPC - 1, memory_order_relaxed); sync_threads(); // Performs some interactions u32 tick = 0; bool busy = tm->tid == 0; while (true) { tick += 1; // If we have redexes... if (rbag_len(net, tm) > 0) { // Update global idle counter if (!busy) atomic_fetch_sub_explicit(&net->idle, 1, memory_order_relaxed); busy = true; // Perform an interaction #ifdef DEBUG if (!interact(net, tm, book)) debug("interaction failed\n"); #else interact(net, tm, book); #endif // If we have no redexes... } else { // Update global idle counter if (busy) atomic_fetch_add_explicit(&net->idle, 1, memory_order_relaxed); busy = false; //// Peeks a redex from target u32 sid = (tm->tid - 1) % TPC; u32 idx = sid*(G_RBAG_LEN/TPC) + (tm->sidx++); // Stealing Everything: this will steal all redexes Pair got = atomic_exchange_explicit(&net->rbag_buf[idx], 0, memory_order_relaxed); if (got != 0) { push_redex(net, tm, got); continue; } else { tm->sidx = 0; } // Chill... sched_yield(); // Halt if all threads are idle if (tick % 256 == 0) { if (atomic_load_explicit(&net->idle, memory_order_relaxed) == TPC) { break; } } } } sync_threads(); atomic_fetch_add(&net->itrs, tm->itrs); tm->itrs = 0; } // Normalizer // ---------- // Thread data typedef struct { Net* net; TM* tm; Book* book; } ThreadArg; void* thread_func(void* arg) { ThreadArg* data = (ThreadArg*)arg; evaluator(data->net, data->tm, data->book); return NULL; } // Sets the initial redex. void boot_redex(Net* net, Pair redex) { net->vars_buf[get_val(ROOT)] = NONE; net->rbag_buf[0] = redex; } // Evaluates all redexes. // TODO: cache threads to avoid spawning overhead void normalize(Net* net, Book* book) { // Inits thread_arg objects ThreadArg thread_arg[TPC]; for (u32 t = 0; t < TPC; ++t) { thread_arg[t].net = net; thread_arg[t].tm = tm[t]; thread_arg[t].book = book; } // Spawns the evaluation threads pthread_t threads[TPC]; for (u32 t = 0; t < TPC; ++t) { pthread_create(&threads[t], NULL, thread_func, &thread_arg[t]); } // Wait for the threads to finish for (u32 t = 0; t < TPC; ++t) { pthread_join(threads[t], NULL); } } // Util: expands a REF Port. Port expand(Net* net, Book* book, Port port) { Port old = vars_load(net, get_val(ROOT)); Port got = peek(net, port); while (get_tag(got) == REF) { boot_redex(net, new_pair(got, ROOT)); normalize(net, book); got = peek(net, vars_load(net, get_val(ROOT))); } vars_create(net, get_val(ROOT), old); return got; } // Reads back an image. // Encoding: (,) | #RRGGBB void read_img(Net* net, Port port, u32 width, u32 height, u32* buffer) { //pretty_print_port(net, port); //printf("\n"); typedef struct { Port port; u32 lv; u32 x0; u32 x1; u32 y0; u32 y1; } Rect; Rect stk[24]; u32 pos = 0; stk[pos++] = (Rect){port, 0, 0, width, 0, height}; while (pos > 0) { Rect rect = stk[--pos]; Port port = enter(net, rect.port); u32 lv = rect.lv; u32 x0 = rect.x0; u32 x1 = rect.x1; u32 y0 = rect.y0; u32 y1 = rect.y1; if (get_tag(port) == CON) { Pair nd = node_load(net, get_val(port)); Port p1 = get_fst(nd); Port p2 = get_snd(nd); u32 xm = (x0 + x1) / 2; u32 ym = (y0 + y1) / 2; if (lv % 2 == 0) { stk[pos++] = (Rect){p2, lv+1, xm, x1, y0, y1}; stk[pos++] = (Rect){p1, lv+1, x0, xm, y0, y1}; } else { stk[pos++] = (Rect){p2, lv+1, x0, x1, ym, y1}; stk[pos++] = (Rect){p1, lv+1, x0, x1, y0, ym}; } continue; } if (get_tag(port) == NUM) { u32 color = get_u24(get_val(port)); printf("COL=%08x x0=%04u x1=%04u y0=%04u y1=%04u | %s\n", color, x0, x1, y0, y1, show_port(port).x); for (u32 y = y0; y < y1; y++) { for (u32 x = x0; x < x1; x++) { buffer[y*width + x] = 0xFF000000 | color; } } continue; } break; } } //#ifdef IO_DRAWIMAGE //// Global variables for the window and renderer //static SDL_Window *window = NULL; //static SDL_Renderer *renderer = NULL; //static SDL_Texture *texture = NULL; //// Function to close the SDL window and clean up resources //void close_sdl(void) { //if (texture != NULL) { //SDL_DestroyTexture(texture); //texture = NULL; //} //if (renderer != NULL) { //SDL_DestroyRenderer(renderer); //renderer = NULL; //} //if (window != NULL) { //SDL_DestroyWindow(window); //window = NULL; //} //SDL_Quit(); //} //// Function to render an image to the SDL window //void render(uint32_t width, uint32_t height, uint32_t *buffer) { //// Initialize SDL if it hasn't been initialized //if (SDL_WasInit(SDL_INIT_VIDEO) == 0) { //if (SDL_Init(SDL_INIT_VIDEO) < 0) { //fprintf(stderr, "SDL could not initialize! SDL Error: %s\n", SDL_GetError()); //return; //} //} //// Create window and renderer if they don't exist //if (window == NULL) { //window = SDL_CreateWindow("SDL Window", SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED, width, height, SDL_WINDOW_SHOWN); //if (window == NULL) { //fprintf(stderr, "Window could not be created! SDL Error: %s\n", SDL_GetError()); //return; //} //renderer = SDL_CreateRenderer(window, -1, SDL_RENDERER_ACCELERATED | SDL_RENDERER_PRESENTVSYNC); //if (renderer == NULL) { //SDL_DestroyWindow(window); //window = NULL; //fprintf(stderr, "Renderer could not be created! SDL Error: %s\n", SDL_GetError()); //return; //} //} //// Create or recreate the texture if necessary //if (texture == NULL) { //texture = SDL_CreateTexture(renderer, SDL_PIXELFORMAT_ARGB8888, SDL_TEXTUREACCESS_STREAMING, width, height); //if (texture == NULL) { //fprintf(stderr, "Texture could not be created! SDL Error: %s\n", SDL_GetError()); //return; //} //} //// Update the texture with the new buffer //if (SDL_UpdateTexture(texture, NULL, buffer, width * sizeof(uint32_t)) < 0) { //fprintf(stderr, "Texture could not be updated! SDL Error: %s\n", SDL_GetError()); //return; //} //// Clear the renderer //SDL_RenderClear(renderer); //// Copy the texture to the renderer //SDL_RenderCopy(renderer, texture, NULL, NULL); //// Update the screen //SDL_RenderPresent(renderer); //// Process events to prevent the OS from thinking the application is unresponsive //SDL_Event e; //while (SDL_PollEvent(&e)) { //if (e.type == SDL_QUIT) { //close_sdl(); //exit(0); //} //} //} //// IO: DrawImage //Port io_put_image(Net* net, Book* book, u32 argc, Port* argv) { //u32 width = 256; //u32 height = 256; //// Create a buffer //uint32_t *buffer = (uint32_t *)malloc(width * height * sizeof(uint32_t)); //if (buffer == NULL) { //fprintf(stderr, "Failed to allocate memory for buffer\n"); //return 1; //} //// Initialize buffer to a dark blue background //for (int i = 0; i < width * height; ++i) { //buffer[i] = 0xFF000030; // Dark blue background //} //// Converts a HVM2 tuple-encoded quadtree to a color buffer //read_img(net, argv[0], width, height, buffer); //// Render the buffer to the screen //render(width, height, buffer); //// Wait some time //SDL_Delay(2000); //// Free the buffer //free(buffer); //return new_port(ERA, 0); //} //#else //// IO: DrawImage //Port io_put_image(Net* net, Book* book, u32 argc, Port* argv) { //printf("DRAWIMAGE: disabled.\n"); //printf("Image rendering is a WIP. For now, to enable it, you must:\n"); //printf("1. Generate a C file, with `hvm gen-c your_file.hvm`.\n"); //printf("2. Manually un-comment the '#define IO_DRAWIMAGE' line on it.\n"); //printf("3. Have SDL installed and compile it with '-lSDL2'.\n"); //return new_port(ERA, 0); //} //#endif // Book Loader // ----------- bool book_load(Book* book, u32* buf) { // Reads defs_len book->defs_len = *buf++; // Parses each def for (u32 i = 0; i < book->defs_len; ++i) { // Reads fid u32 fid = *buf++; // Gets def Def* def = &book->defs_buf[fid]; // Reads name memcpy(def->name, buf, 256); buf += 64; // Reads safe flag def->safe = *buf++; // Reads lengths def->rbag_len = *buf++; def->node_len = *buf++; def->vars_len = *buf++; if (def->rbag_len > DEF_RBAG_LEN) { fprintf(stderr, "def '%s' has too many redexes: %u\n", def->name, def->rbag_len); return false; } if (def->node_len > DEF_NODE_LEN) { fprintf(stderr, "def '%s' has too many nodes: %u\n", def->name, def->node_len); return false; } // Reads root def->root = *buf++; // Reads rbag_buf memcpy(def->rbag_buf, buf, 8*def->rbag_len); buf += def->rbag_len * 2; // Reads node_buf memcpy(def->node_buf, buf, 8*def->node_len); buf += def->node_len * 2; } return true; } // Debug Printing // -------------- void put_u32(char* B, u32 val) { for (int i = 0; i < 8; i++, val >>= 4) { B[8-i-1] = "0123456789ABCDEF"[val & 0xF]; } } Show show_port(Port port) { // NOTE: this is done like that because sprintf seems not to be working Show s; switch (get_tag(port)) { case VAR: memcpy(s.x, "VAR:", 4); put_u32(s.x+4, get_val(port)); break; case REF: memcpy(s.x, "REF:", 4); put_u32(s.x+4, get_val(port)); break; case ERA: memcpy(s.x, "ERA:________", 12); break; case NUM: memcpy(s.x, "NUM:", 4); put_u32(s.x+4, get_val(port)); break; case CON: memcpy(s.x, "CON:", 4); put_u32(s.x+4, get_val(port)); break; case DUP: memcpy(s.x, "DUP:", 4); put_u32(s.x+4, get_val(port)); break; case OPR: memcpy(s.x, "OPR:", 4); put_u32(s.x+4, get_val(port)); break; case SWI: memcpy(s.x, "SWI:", 4); put_u32(s.x+4, get_val(port)); break; } s.x[12] = '\0'; return s; } Show show_rule(Rule rule) { Show s; switch (rule) { case LINK: memcpy(s.x, "LINK", 4); break; case VOID: memcpy(s.x, "VOID", 4); break; case ERAS: memcpy(s.x, "ERAS", 4); break; case ANNI: memcpy(s.x, "ANNI", 4); break; case COMM: memcpy(s.x, "COMM", 4); break; case OPER: memcpy(s.x, "OPER", 4); break; case SWIT: memcpy(s.x, "SWIT", 4); break; case CALL: memcpy(s.x, "CALL", 4); break; default : memcpy(s.x, "????", 4); break; } s.x[4] = '\0'; return s; } //void print_rbag(RBag* rbag) { //printf("RBAG | FST-TREE | SND-TREE \n"); //printf("---- | ------------ | ------------\n"); //for (u32 i = rbag->lo_ini; i < rbag->lo_end; ++i) { //Pair redex = rbag->lo_buf[i%RLEN]; //printf("%04X | %s | %s\n", i, show_port(get_fst(redex)).x, show_port(get_snd(redex)).x); //} //for (u32 i = 0; i > rbag->hi_end; ++i) { //Pair redex = rbag->hi_buf[i]; //printf("%04X | %s | %s\n", i, show_port(get_fst(redex)).x, show_port(get_snd(redex)).x); //} //printf("==== | ============ | ============\n"); //} void print_net(Net* net) { printf("NODE | PORT-1 | PORT-2 \n"); printf("---- | ------------ | ------------\n"); for (u32 i = 0; i < G_NODE_LEN; ++i) { Pair node = node_load(net, i); if (node != 0) { printf("%04X | %s | %s\n", i, show_port(get_fst(node)).x, show_port(get_snd(node)).x); } } printf("==== | ============ |\n"); printf("VARS | VALUE |\n"); printf("---- | ------------ |\n"); for (u32 i = 0; i < G_VARS_LEN; ++i) { Port var = vars_load(net,i); if (var != 0) { printf("%04X | %s |\n", i, show_port(vars_load(net,i)).x); } } printf("==== | ============ |\n"); } void pretty_print_numb(Numb word) { switch (get_typ(word)) { case TY_SYM: { switch (get_sym(word)) { // types case TY_U24: printf("[u24]"); break; case TY_I24: printf("[i24]"); break; case TY_F24: printf("[f24]"); break; // operations case OP_ADD: printf("[+]"); break; case OP_SUB: printf("[-]"); break; case FP_SUB: printf("[:-]"); break; case OP_MUL: printf("[*]"); break; case OP_DIV: printf("[/]"); break; case FP_DIV: printf("[:/]"); break; case OP_REM: printf("[%%]"); break; case FP_REM: printf("[:%%]"); break; case OP_EQ: printf("[=]"); break; case OP_NEQ: printf("[!]"); break; case OP_LT: printf("[<]"); break; case OP_GT: printf("[>]"); break; case OP_AND: printf("[&]"); break; case OP_OR: printf("[|]"); break; case OP_XOR: printf("[^]"); break; case OP_SHL: printf("[<<]"); break; case FP_SHL: printf("[:<<]"); break; case OP_SHR: printf("[>>]"); break; case FP_SHR: printf("[:>>]"); break; default: printf("[?]"); break; } break; } case TY_U24: { printf("%u", get_u24(word)); break; } case TY_I24: { printf("%+d", get_i24(word)); break; } case TY_F24: { if (isinf(get_f24(word))) { if (signbit(get_f24(word))) { printf("-inf"); } else { printf("+inf"); } } else if (isnan(get_f24(word))) { printf("+NaN"); } else { printf("%.7e", get_f24(word)); } break; } default: { switch (get_typ(word)) { case OP_ADD: printf("[+0x%07X]", get_u24(word)); break; case OP_SUB: printf("[-0x%07X]", get_u24(word)); break; case FP_SUB: printf("[:-0x%07X]", get_u24(word)); break; case OP_MUL: printf("[*0x%07X]", get_u24(word)); break; case OP_DIV: printf("[/0x%07X]", get_u24(word)); break; case FP_DIV: printf("[:/0x%07X]", get_u24(word)); break; case OP_REM: printf("[%%0x%07X]", get_u24(word)); break; case FP_REM: printf("[:%%0x%07X]", get_u24(word)); break; case OP_EQ: printf("[=0x%07X]", get_u24(word)); break; case OP_NEQ: printf("[!0x%07X]", get_u24(word)); break; case OP_LT: printf("[<0x%07X]", get_u24(word)); break; case OP_GT: printf("[>0x%07X]", get_u24(word)); break; case OP_AND: printf("[&0x%07X]", get_u24(word)); break; case OP_OR: printf("[|0x%07X]", get_u24(word)); break; case OP_XOR: printf("[^0x%07X]", get_u24(word)); break; case OP_SHL: printf("[<<0x%07X]", get_u24(word)); break; case FP_SHL: printf("[:<<0x%07X]", get_u24(word)); break; case OP_SHR: printf("[>>0x%07X]", get_u24(word)); break; case FP_SHR: printf("[:>>0x%07X]", get_u24(word)); break; default: printf("[?0x%07X]", get_u24(word)); break; } break; } } } void pretty_print_port(Net* net, Book* book, Port port) { Port stack[4096]; stack[0] = port; u32 len = 1; u32 num = 0; while (len > 0) { Port cur = stack[--len]; switch (get_tag(cur)) { case CON: { Pair node = node_load(net,get_val(cur)); Port p2 = get_snd(node); Port p1 = get_fst(node); printf("("); stack[len++] = new_port(ERA, (u32)(')')); stack[len++] = p2; stack[len++] = new_port(ERA, (u32)(' ')); stack[len++] = p1; break; } case ERA: { if (get_val(cur) != 0) { printf("%c", (char)get_val(cur)); } else { printf("*"); } break; } case VAR: { Port got = vars_load(net, get_val(cur)); if (got != NONE) { stack[len++] = got; } else { printf("x%x", get_val(cur)); } break; } case NUM: { pretty_print_numb(get_val(cur)); break; } case DUP: { Pair node = node_load(net,get_val(cur)); Port p2 = get_snd(node); Port p1 = get_fst(node); printf("{"); stack[len++] = new_port(ERA, (u32)('}')); stack[len++] = p2; stack[len++] = new_port(ERA, (u32)(' ')); stack[len++] = p1; break; } case OPR: { Pair node = node_load(net,get_val(cur)); Port p2 = get_snd(node); Port p1 = get_fst(node); printf("$("); stack[len++] = new_port(ERA, (u32)(')')); stack[len++] = p2; stack[len++] = new_port(ERA, (u32)(' ')); stack[len++] = p1; break; } case SWI: { Pair node = node_load(net,get_val(cur)); Port p2 = get_snd(node); Port p1 = get_fst(node); printf("?("); stack[len++] = new_port(ERA, (u32)(')')); stack[len++] = p2; stack[len++] = new_port(ERA, (u32)(' ')); stack[len++] = p1; break; } case REF: { u32 fid = get_val(cur) & 0xFFFFFFF; Def* def = &book->defs_buf[fid]; printf("@%s", def->name); break; } } } } //void pretty_print_rbag(Net* net, RBag* rbag) { //for (u32 i = rbag->lo_ini; i < rbag->lo_end; ++i) { //Pair redex = rbag->lo_buf[i]; //if (redex != 0) { //pretty_print_port(net, get_fst(redex)); //printf(" ~ "); //pretty_print_port(net, get_snd(redex)); //printf("\n"); //} //} //for (u32 i = 0; i > rbag->hi_end; ++i) { //Pair redex = rbag->hi_buf[i]; //if (redex != 0) { //pretty_print_port(net, get_fst(redex)); //printf(" ~ "); //pretty_print_port(net, get_snd(redex)); //printf("\n"); //} //} //} // Demos // ----- // stress_test 2^10 x 65536 //static const u8 BOOK_BUF[] = {6, 0, 0, 0, 0, 0, 0, 0, 109, 97, 105, 110, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 4, 0, 0, 0, 11, 10, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 102, 117, 110, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 17, 0, 0, 0, 25, 0, 0, 0, 2, 0, 0, 0, 102, 117, 110, 95, 95, 67, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 33, 0, 0, 0, 4, 0, 0, 0, 11, 0, 0, 1, 0, 0, 0, 0, 3, 0, 0, 0, 102, 117, 110, 95, 95, 67, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 9, 0, 0, 128, 20, 0, 0, 0, 9, 0, 0, 128, 44, 0, 0, 0, 13, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 30, 0, 0, 0, 3, 4, 0, 0, 38, 0, 0, 0, 24, 0, 0, 0, 16, 0, 0, 0, 8, 0, 0, 0, 24, 0, 0, 0, 4, 0, 0, 0, 108, 111, 111, 112, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 41, 0, 0, 0, 5, 0, 0, 0, 108, 111, 111, 112, 95, 95, 67, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 33, 0, 0, 0, 0, 0, 0, 0}; // stress_test 2^18 x 65536 //static const u8 BOOK_BUF[] = {6, 0, 0, 0, 0, 0, 0, 0, 109, 97, 105, 110, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 4, 0, 0, 0, 11, 18, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 102, 117, 110, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 17, 0, 0, 0, 25, 0, 0, 0, 2, 0, 0, 0, 102, 117, 110, 95, 95, 67, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 33, 0, 0, 0, 4, 0, 0, 0, 11, 0, 0, 1, 0, 0, 0, 0, 3, 0, 0, 0, 102, 117, 110, 95, 95, 67, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 9, 0, 0, 128, 20, 0, 0, 0, 9, 0, 0, 128, 44, 0, 0, 0, 13, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 30, 0, 0, 0, 3, 4, 0, 0, 38, 0, 0, 0, 24, 0, 0, 0, 16, 0, 0, 0, 8, 0, 0, 0, 24, 0, 0, 0, 4, 0, 0, 0, 108, 111, 111, 112, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 41, 0, 0, 0, 5, 0, 0, 0, 108, 111, 111, 112, 95, 95, 67, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 33, 0, 0, 0, 0, 0, 0, 0}; // bitonic_sort 2^20 //static const u8 BOOK_BUF[] = {19, 0, 0, 0, 0, 0, 0, 0, 109, 97, 105, 110, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 89, 0, 0, 0, 4, 0, 0, 0, 11, 18, 0, 0, 12, 0, 0, 0, 65, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 100, 111, 119, 110, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 15, 0, 0, 0, 60, 0, 0, 0, 20, 0, 0, 0, 44, 0, 0, 0, 28, 0, 0, 0, 17, 0, 0, 0, 0, 0, 0, 0, 36, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 52, 0, 0, 0, 16, 0, 0, 0, 24, 0, 0, 0, 16, 0, 0, 0, 68, 0, 0, 0, 8, 0, 0, 0, 24, 0, 0, 0, 2, 0, 0, 0, 100, 111, 119, 110, 95, 95, 67, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 13, 0, 0, 0, 8, 0, 0, 0, 4, 0, 0, 0, 25, 0, 0, 128, 60, 0, 0, 0, 25, 0, 0, 128, 84, 0, 0, 0, 13, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 28, 0, 0, 0, 36, 0, 0, 0, 16, 0, 0, 0, 24, 0, 0, 0, 45, 0, 0, 0, 52, 0, 0, 0, 32, 0, 0, 0, 40, 0, 0, 0, 48, 0, 0, 0, 56, 0, 0, 0, 0, 0, 0, 0, 68, 0, 0, 0, 32, 0, 0, 0, 76, 0, 0, 0, 16, 0, 0, 0, 48, 0, 0, 0, 8, 0, 0, 0, 92, 0, 0, 0, 40, 0, 0, 0, 100, 0, 0, 0, 24, 0, 0, 0, 56, 0, 0, 0, 3, 0, 0, 0, 102, 108, 111, 119, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 15, 0, 0, 0, 60, 0, 0, 0, 20, 0, 0, 0, 44, 0, 0, 0, 28, 0, 0, 0, 33, 0, 0, 0, 0, 0, 0, 0, 36, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 52, 0, 0, 0, 16, 0, 0, 0, 24, 0, 0, 0, 16, 0, 0, 0, 68, 0, 0, 0, 8, 0, 0, 0, 24, 0, 0, 0, 4, 0, 0, 0, 102, 108, 111, 119, 95, 95, 67, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 14, 0, 0, 0, 8, 0, 0, 0, 4, 0, 0, 0, 9, 0, 0, 0, 60, 0, 0, 0, 129, 0, 0, 0, 84, 0, 0, 0, 13, 0, 0, 0, 28, 0, 0, 0, 22, 0, 0, 0, 8, 0, 0, 0, 35, 1, 0, 0, 0, 0, 0, 0, 36, 0, 0, 0, 44, 0, 0, 0, 16, 0, 0, 0, 24, 0, 0, 0, 53, 0, 0, 0, 48, 0, 0, 0, 32, 0, 0, 0, 40, 0, 0, 0, 0, 0, 0, 0, 68, 0, 0, 0, 32, 0, 0, 0, 76, 0, 0, 0, 56, 0, 0, 0, 48, 0, 0, 0, 8, 0, 0, 0, 92, 0, 0, 0, 40, 0, 0, 0, 100, 0, 0, 0, 16, 0, 0, 0, 108, 0, 0, 0, 24, 0, 0, 0, 56, 0, 0, 0, 5, 0, 0, 0, 103, 101, 110, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 15, 0, 0, 0, 8, 0, 0, 0, 20, 0, 0, 0, 8, 0, 0, 0, 28, 0, 0, 0, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 103, 101, 110, 95, 95, 67, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 12, 0, 0, 0, 6, 0, 0, 0, 4, 0, 0, 0, 41, 0, 0, 128, 68, 0, 0, 0, 41, 0, 0, 128, 84, 0, 0, 0, 13, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 29, 0, 0, 0, 60, 0, 0, 0, 38, 0, 0, 0, 54, 0, 0, 0, 59, 2, 0, 0, 46, 0, 0, 0, 35, 1, 0, 0, 16, 0, 0, 0, 59, 2, 0, 0, 24, 0, 0, 0, 32, 0, 0, 0, 40, 0, 0, 0, 0, 0, 0, 0, 76, 0, 0, 0, 16, 0, 0, 0, 32, 0, 0, 0, 8, 0, 0, 0, 92, 0, 0, 0, 24, 0, 0, 0, 40, 0, 0, 0, 7, 0, 0, 0, 109, 97, 105, 110, 95, 95, 67, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 41, 0, 0, 0, 4, 0, 0, 0, 11, 18, 0, 0, 12, 0, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 109, 97, 105, 110, 95, 95, 67, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 73, 0, 0, 0, 4, 0, 0, 0, 11, 18, 0, 0, 12, 0, 0, 0, 11, 0, 0, 0, 20, 0, 0, 0, 57, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 115, 111, 114, 116, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 15, 0, 0, 0, 60, 0, 0, 0, 20, 0, 0, 0, 44, 0, 0, 0, 28, 0, 0, 0, 81, 0, 0, 0, 0, 0, 0, 0, 36, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 52, 0, 0, 0, 16, 0, 0, 0, 24, 0, 0, 0, 16, 0, 0, 0, 68, 0, 0, 0, 8, 0, 0, 0, 24, 0, 0, 0, 10, 0, 0, 0, 115, 111, 114, 116, 95, 95, 67, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 17, 0, 0, 0, 9, 0, 0, 0, 4, 0, 0, 0, 25, 0, 0, 0, 60, 0, 0, 0, 73, 0, 0, 128, 92, 0, 0, 0, 73, 0, 0, 128, 116, 0, 0, 0, 13, 0, 0, 0, 36, 0, 0, 0, 22, 0, 0, 0, 29, 0, 0, 0, 35, 1, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 16, 0, 0, 0, 44, 0, 0, 0, 52, 0, 0, 0, 24, 0, 0, 0, 32, 0, 0, 0, 40, 0, 0, 0, 48, 0, 0, 0, 0, 0, 0, 0, 68, 0, 0, 0, 40, 0, 0, 0, 76, 0, 0, 0, 84, 0, 0, 0, 48, 0, 0, 0, 56, 0, 0, 0, 64, 0, 0, 0, 8, 0, 0, 0, 100, 0, 0, 0, 11, 0, 0, 0, 108, 0, 0, 0, 24, 0, 0, 0, 56, 0, 0, 0, 16, 0, 0, 0, 124, 0, 0, 0, 11, 1, 0, 0, 132, 0, 0, 0, 32, 0, 0, 0, 64, 0, 0, 0, 11, 0, 0, 0, 115, 117, 109, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 15, 0, 0, 0, 8, 0, 0, 0, 20, 0, 0, 0, 8, 0, 0, 0, 28, 0, 0, 0, 97, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 0, 0, 0, 115, 117, 109, 95, 95, 67, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 10, 0, 0, 0, 6, 0, 0, 0, 4, 0, 0, 0, 89, 0, 0, 128, 36, 0, 0, 0, 89, 0, 0, 128, 68, 0, 0, 0, 13, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 28, 0, 0, 0, 32, 0, 0, 0, 16, 0, 0, 0, 24, 0, 0, 0, 0, 0, 0, 0, 44, 0, 0, 0, 16, 0, 0, 0, 54, 0, 0, 0, 3, 4, 0, 0, 62, 0, 0, 0, 40, 0, 0, 0, 32, 0, 0, 0, 8, 0, 0, 0, 76, 0, 0, 0, 24, 0, 0, 0, 40, 0, 0, 0, 13, 0, 0, 0, 115, 119, 97, 112, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0, 15, 0, 0, 0, 44, 0, 0, 0, 20, 0, 0, 0, 28, 0, 0, 0, 113, 0, 0, 0, 121, 0, 0, 0, 0, 0, 0, 0, 36, 0, 0, 0, 8, 0, 0, 0, 16, 0, 0, 0, 8, 0, 0, 0, 52, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 14, 0, 0, 0, 115, 119, 97, 112, 95, 95, 67, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 12, 0, 0, 0, 8, 0, 0, 0, 20, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 115, 119, 97, 112, 95, 95, 67, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 20, 0, 0, 0, 8, 0, 0, 0, 28, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 16, 0, 0, 0, 119, 97, 114, 112, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 15, 0, 0, 0, 52, 0, 0, 0, 20, 0, 0, 0, 28, 0, 0, 0, 137, 0, 0, 0, 145, 0, 0, 0, 0, 0, 0, 0, 36, 0, 0, 0, 8, 0, 0, 0, 44, 0, 0, 0, 16, 0, 0, 0, 24, 0, 0, 0, 16, 0, 0, 0, 60, 0, 0, 0, 8, 0, 0, 0, 68, 0, 0, 0, 0, 0, 0, 0, 24, 0, 0, 0, 17, 0, 0, 0, 119, 97, 114, 112, 95, 95, 67, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 12, 0, 0, 0, 6, 0, 0, 0, 4, 0, 0, 0, 105, 0, 0, 0, 76, 0, 0, 0, 13, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 29, 0, 0, 0, 52, 0, 0, 0, 38, 0, 0, 0, 24, 0, 0, 0, 3, 15, 0, 0, 46, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 62, 0, 0, 0, 40, 0, 0, 0, 3, 18, 0, 0, 70, 0, 0, 0, 16, 0, 0, 0, 32, 0, 0, 0, 32, 0, 0, 0, 84, 0, 0, 0, 24, 0, 0, 0, 92, 0, 0, 0, 8, 0, 0, 0, 40, 0, 0, 0, 18, 0, 0, 0, 119, 97, 114, 112, 95, 95, 67, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 21, 0, 0, 0, 12, 0, 0, 0, 4, 0, 0, 0, 129, 0, 0, 128, 92, 0, 0, 0, 129, 0, 0, 128, 132, 0, 0, 0, 13, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 28, 0, 0, 0, 36, 0, 0, 0, 16, 0, 0, 0, 24, 0, 0, 0, 44, 0, 0, 0, 52, 0, 0, 0, 32, 0, 0, 0, 40, 0, 0, 0, 61, 0, 0, 0, 68, 0, 0, 0, 48, 0, 0, 0, 56, 0, 0, 0, 76, 0, 0, 0, 84, 0, 0, 0, 64, 0, 0, 0, 72, 0, 0, 0, 80, 0, 0, 0, 88, 0, 0, 0, 8, 0, 0, 0, 100, 0, 0, 0, 56, 0, 0, 0, 108, 0, 0, 0, 40, 0, 0, 0, 116, 0, 0, 0, 24, 0, 0, 0, 124, 0, 0, 0, 72, 0, 0, 0, 88, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 48, 0, 0, 0, 148, 0, 0, 0, 32, 0, 0, 0, 156, 0, 0, 0, 16, 0, 0, 0, 164, 0, 0, 0, 64, 0, 0, 0, 80, 0, 0, 0}; //COMPILED_BOOK_BUF// #ifdef IO void do_run_io(Net* net, Book* book, Port port); #endif // Main // ---- void hvm_c(u32* book_buffer) { // Creates static TMs alloc_static_tms(); // Loads the Book Book* book = NULL; if (book_buffer) { book = (Book*)malloc(sizeof(Book)); if (!book_load(book, book_buffer)) { fprintf(stderr, "failed to load book\n"); return; } } // GMem Net *net = net_new(); // Starts the timer u64 start = time64(); // Creates an initial redex that calls main boot_redex(net, new_pair(new_port(REF, 0), ROOT)); #ifdef IO do_run_io(net, book, ROOT); #else normalize(net, book); #endif // Prints the result printf("Result: "); pretty_print_port(net, book, enter(net, ROOT)); printf("\n"); // Stops the timer double duration = (time64() - start) / 1000000000.0; // seconds // Prints interactions and time u64 itrs = atomic_load(&net->itrs); printf("- ITRS: %" PRIu64 "\n", itrs); printf("- TIME: %.2fs\n", duration); printf("- MIPS: %.2f\n", (double)itrs / duration / 1000000.0); // Frees everything free_static_tms(); free(net); free(book); } #ifdef WITH_MAIN int main() { hvm_c((u32*)BOOK_BUF); return 0; } #endif ================================================ FILE: src/hvm.cu ================================================ #define INTERPRETED #define WITHOUT_MAIN #ifdef DEBUG #define debug(...) printf(__VA_ARGS__) #else #define debug(...) #endif #include #include #include // Integers // -------- typedef uint8_t u8; typedef uint16_t u16; typedef uint32_t u32; typedef int32_t i32; typedef float f32; typedef double f64; typedef unsigned long long int u64; // Configuration // ------------- // Clocks per Second const u64 S = 2520000000; // Threads per Block const u32 TPB_L2 = 7; const u32 TPB = 1 << TPB_L2; // Blocks per GPU const u32 BPG_L2 = 7; const u32 BPG = 1 << BPG_L2; // Threads per GPU const u32 TPG = TPB * BPG; //#define ALLOC_MODE SHARED //#define ALLOC_MODE GLOBAL // Types // ----- // Local Types typedef u8 Tag; // Tag ::= 3-bit (rounded up to u8) typedef u32 Val; // Val ::= 29-bit (rounded up to u32) typedef u32 Port; // Port ::= Tag + Val (fits a u32) typedef u64 Pair; // Pair ::= Port + Port (fits a u64) // Rules typedef u8 Rule; // Rule ::= 3-bit (rounded up to 8) // Numbs typedef u32 Numb; // Numb ::= 29-bit (rounded up to u32) // Tags const Tag VAR = 0x0; // variable const Tag REF = 0x1; // reference const Tag ERA = 0x2; // eraser const Tag NUM = 0x3; // number const Tag CON = 0x4; // constructor const Tag DUP = 0x5; // duplicator const Tag OPR = 0x6; // operator const Tag SWI = 0x7; // switch // Interaction Rule Values const Rule LINK = 0x0; const Rule CALL = 0x1; const Rule VOID = 0x2; const Rule ERAS = 0x3; const Rule ANNI = 0x4; const Rule COMM = 0x5; const Rule OPER = 0x6; const Rule SWIT = 0x7; // Constants const Port FREE = 0x00000000; const Port ROOT = 0xFFFFFFF8; const Port NONE = 0xFFFFFFFF; // Numbers const Tag TY_SYM = 0x00; const Tag TY_U24 = 0x01; const Tag TY_I24 = 0x02; const Tag TY_F24 = 0x03; const Tag OP_ADD = 0x04; const Tag OP_SUB = 0x05; const Tag FP_SUB = 0x06; const Tag OP_MUL = 0x07; const Tag OP_DIV = 0x08; const Tag FP_DIV = 0x09; const Tag OP_REM = 0x0A; const Tag FP_REM = 0x0B; const Tag OP_EQ = 0x0C; const Tag OP_NEQ = 0x0D; const Tag OP_LT = 0x0E; const Tag OP_GT = 0x0F; const Tag OP_AND = 0x10; const Tag OP_OR = 0x11; const Tag OP_XOR = 0x12; const Tag OP_SHL = 0x13; const Tag FP_SHL = 0x14; const Tag OP_SHR = 0x15; const Tag FP_SHR = 0x16; // Evaluation Modes const u8 SEED = 0; const u8 GROW = 1; const u8 WORK = 2; // Thread Redex Bag Length const u32 RLEN = 256; // Thread Redex Bag // It uses the same space to store two stacks: // - HI: a high-priotity stack, for shrinking reductions // - LO: a low-priority stack, for growing reductions struct RBag { u32 hi_end; Pair hi_buf[RLEN]; u32 lo_end; Pair lo_buf[RLEN]; }; // Local Net const u32 L_NODE_LEN = 0x2000; const u32 L_VARS_LEN = 0x2000; struct LNet { Pair node_buf[L_NODE_LEN]; Port vars_buf[L_VARS_LEN]; }; // Global Net const u32 G_NODE_LEN = 1 << 29; // max 536m nodes const u32 G_VARS_LEN = 1 << 29; // max 536m vars const u32 G_RBAG_LEN = TPB * BPG * RLEN * 3; // max 4m redexes struct GNet { u32 rbag_use_A; // total rbag redex count (buffer A) u32 rbag_use_B; // total rbag redex count (buffer B) Pair rbag_buf_A[G_RBAG_LEN]; // global redex bag (buffer A) Pair rbag_buf_B[G_RBAG_LEN]; // global redex bag (buffer B) Pair node_buf[G_NODE_LEN]; // global node buffer Port vars_buf[G_VARS_LEN]; // global vars buffer u32 node_put[TPB*BPG]; u32 vars_put[TPB*BPG]; u32 rbag_pos[TPB*BPG]; u8 mode; // evaluation mode (curr) u64 itrs; // interaction count u64 iadd; // interaction count adder u64 leak; // leak count u32 turn; // turn count u8 down; // are we recursing down? u8 rdec; // decrease rpos by 1? }; // View Net: includes both GNet and LNet struct Net { i32 l_node_dif; // delta node space i32 l_vars_dif; // delta vars space Pair *l_node_buf; // local node buffer values Port *l_vars_buf; // local vars buffer values u32 *g_rbag_use_A; // global rbag count (active buffer) u32 *g_rbag_use_B; // global rbag count (inactive buffer) Pair *g_rbag_buf_A; // global rbag values (active buffer) Pair *g_rbag_buf_B; // global rbag values (inactive buffer) Pair *g_node_buf; // global node buffer values Port *g_vars_buf; // global vars buffer values u32 *g_node_put; // next global node allocation index u32 *g_vars_put; // next global vars allocation index }; // Thread Memory struct TM { u32 page; // page index u32 nput; // node alloc index u32 vput; // vars alloc index u32 mode; // evaluation mode u32 itrs; // interactions u32 leak; // leaks u32 nloc[L_NODE_LEN/TPB]; // node allocs u32 vloc[L_NODE_LEN/TPB]; // vars allocs RBag rbag; // tmem redex bag }; // Top-Level Definition struct Def { char name[256]; bool safe; u32 rbag_len; u32 node_len; u32 vars_len; Port root; Pair rbag_buf[L_NODE_LEN/TPB]; Pair node_buf[L_NODE_LEN/TPB]; }; typedef struct Book Book; // A Foreign Function typedef struct { char name[256]; Port (*func)(GNet*, Port); } FFn; // Book of Definitions struct Book { u32 defs_len; Def defs_buf[0x4000]; u32 ffns_len; FFn ffns_buf[0x4000]; }; // Static Book __device__ Book BOOK; // Debugger // -------- struct Show { char x[13]; }; __device__ __host__ void put_u16(char* B, u16 val); __device__ __host__ Show show_port(Port port); __device__ Show show_rule(Rule rule); __device__ void print_rbag(Net* net, TM* tm); __device__ __host__ void print_net(Net* net, u32, u32); __device__ void pretty_print_numb(Numb word); __device__ void pretty_print_port(Net* net, Port port); __device__ void pretty_print_rbag(Net* net, RBag* rbag); __global__ void print_heatmap(GNet* gnet, u32 turn); // Utils // ----- __device__ __host__ f32 clamp(f32 x, f32 min, f32 max) { const f32 t = x < min ? min : x; return (t > max) ? max : t; } // TODO: write a time64() function that returns the time as fast as possible as a u64 static inline u64 time64() { struct timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts); return (u64)ts.tv_sec * 1000000000ULL + (u64)ts.tv_nsec; } __device__ inline u32 TID() { return threadIdx.x; } __device__ inline u32 BID() { return blockIdx.x; } __device__ inline u32 GID() { return TID() + BID() * blockDim.x; } __device__ __host__ inline u32 div(u32 a, u32 b) { return (a + b - 1) / b; } __device__ u32 push_index(u32 msk, u32 idx) { return msk | (1U << (31 - idx)); } __device__ u32 pop_index(u32* msk) { u32 idx = __clz(*msk); *msk &= ~(1U << (31 - idx)); return idx; } // Port: Constructor and Getters // ----------------------------- __device__ __host__ inline Port new_port(Tag tag, Val val) { return (val << 3) | tag; } __device__ __host__ inline Tag get_tag(Port port) { return port & 7; } __device__ __host__ inline Val get_val(Port port) { return port >> 3; } // Pair: Constructor and Getters // ----------------------------- __device__ __host__ inline Pair new_pair(Port fst, Port snd) { return ((u64)snd << 32) | fst; } __device__ __host__ inline Port get_fst(Pair pair) { return pair & 0xFFFFFFFF; } __device__ __host__ inline Port get_snd(Pair pair) { return pair >> 32; } __device__ __host__ Pair set_par_flag(Pair pair) { Port p1 = get_fst(pair); Port p2 = get_snd(pair); if (get_tag(p1) == REF) { return new_pair(new_port(get_tag(p1), get_val(p1) | 0x10000000), p2); } else { return pair; } } __device__ __host__ Pair clr_par_flag(Pair pair) { Port p1 = get_fst(pair); Port p2 = get_snd(pair); if (get_tag(p1) == REF) { return new_pair(new_port(get_tag(p1), get_val(p1) & 0xFFFFFFF), p2); } else { return pair; } } __device__ __host__ bool get_par_flag(Pair pair) { Port p1 = get_fst(pair); if (get_tag(p1) == REF) { return (get_val(p1) >> 28) == 1; } else { return false; } } // Utils // ----- // Swaps two ports. __device__ __host__ inline void swap(Port *a, Port *b) { Port x = *a; *a = *b; *b = x; } // Transposes an index over a matrix. __device__ u32 transpose(u32 idx, u32 width, u32 height) { u32 old_row = idx / width; u32 old_col = idx % width; u32 new_row = old_col % height; u32 new_col = old_col / height + old_row * (width / height); return new_row * width + new_col; } // Returns true if all 'x' are true, block-wise __device__ __noinline__ bool block_all(bool x) { __shared__ bool res; if (TID() == 0) res = true; __syncthreads(); if (!x) res = false; __syncthreads(); return res; } // Returns true if any 'x' is true, block-wise __device__ __noinline__ bool block_any(bool x) { __shared__ bool res; if (TID() == 0) res = false; __syncthreads(); if (x) res = true; __syncthreads(); return res; } // Returns the sum of a value, block-wise template __device__ __noinline__ A block_sum(A x) { __shared__ A res; if (TID() == 0) res = 0; __syncthreads(); atomicAdd(&res, x); __syncthreads(); return res; } // Returns the sum of a boolean, block-wise __device__ __noinline__ u32 block_count(bool x) { __shared__ u32 res; if (TID() == 0) res = 0; __syncthreads(); atomicAdd(&res, x); __syncthreads(); return res; } // Prints a 4-bit value for each thread in a block __device__ void block_print(u32 x) { __shared__ u8 value[TPB]; value[TID()] = x; __syncthreads(); if (TID() == 0) { for (u32 i = 0; i < TPB; ++i) { printf("%x", min(value[i],0xF)); } } __syncthreads(); } // Ports / Pairs / Rules // --------------------- // True if this port has a pointer to a node. __device__ __host__ inline bool is_nod(Port a) { return get_tag(a) >= CON; } // True if this port is a variable. __device__ __host__ inline bool is_var(Port a) { return get_tag(a) == VAR; } // True if this port is a local node/var (that can leak). __device__ __host__ inline bool is_local(Port a) { return (is_nod(a) || is_var(a)) && get_val(a) < L_NODE_LEN; } // True if this port is a global node/var (that can be leaked into). __device__ __host__ inline bool is_global(Port a) { return (is_nod(a) || is_var(a)) && get_val(a) >= L_NODE_LEN; } // Given two tags, gets their interaction rule. Uses a u64mask lookup table. __device__ __host__ inline Rule get_rule(Port A, Port B) { const u64 x = 0b0111111010110110110111101110111010110000111100001111000100000010; const u64 y = 0b0000110000001100000011100000110011111110111111100010111000000000; const u64 z = 0b1111100011111000111100001111000011000000000000000000000000000000; const u64 i = ((u64)get_tag(A) << 3) | (u64)get_tag(B); return (Rule)((x>>i&1) | (y>>i&1)<<1 | (z>>i&1)<<2); } // Same as above, but receiving a pair. __device__ __host__ inline Rule get_pair_rule(Pair AB) { return get_rule(get_fst(AB), get_snd(AB)); } // Should we swap ports A and B before reducing this rule? __device__ __host__ inline bool should_swap(Port A, Port B) { return get_tag(B) < get_tag(A); } // Gets a rule's priority __device__ __host__ inline bool is_high_priority(Rule rule) { return (bool)((0b00011101 >> rule) & 1); } // Adjusts a newly allocated port. __device__ inline Port adjust_port(Net* net, TM* tm, Port port) { Tag tag = get_tag(port); Val val = get_val(port); if (is_nod(port)) return new_port(tag, tm->nloc[val]); if (is_var(port)) return new_port(tag, tm->vloc[val]); return new_port(tag, val); } // Adjusts a newly allocated pair. __device__ inline Pair adjust_pair(Net* net, TM* tm, Pair pair) { Port p1 = adjust_port(net, tm, get_fst(pair)); Port p2 = adjust_port(net, tm, get_snd(pair)); return new_pair(p1, p2); } // Words // ----- // Constructor and getters for SYM (operation selector) __device__ __host__ inline Numb new_sym(u32 val) { return (val << 5) | TY_SYM; } __device__ __host__ inline u32 get_sym(Numb word) { return (word >> 5); } // Constructor and getters for U24 (unsigned 24-bit integer) __device__ __host__ inline Numb new_u24(u32 val) { return (val << 5) | TY_U24; } __device__ __host__ inline u32 get_u24(Numb word) { return word >> 5; } // Constructor and getters for I24 (signed 24-bit integer) __device__ __host__ inline Numb new_i24(i32 val) { return ((u32)val << 5) | TY_I24; } __device__ __host__ inline i32 get_i24(Numb word) { return ((i32)word) << 3 >> 8; } // Constructor and getters for F24 (24-bit float) __device__ __host__ inline Numb new_f24(f32 val) { u32 bits = *(u32*)&val; u32 shifted_bits = bits >> 8; u32 lost_bits = bits & 0xFF; // round ties to even shifted_bits += (!isnan(val)) & ((lost_bits - ((lost_bits >> 7) & !shifted_bits)) >> 7); // ensure NaNs don't become infinities shifted_bits |= isnan(val); return (shifted_bits << 5) | TY_F24; } __device__ __host__ inline f32 get_f24(Numb word) { u32 bits = (word << 3) & 0xFFFFFF00; return *(f32*)&bits; } // Flip flag __device__ __host__ inline Tag get_typ(Numb word) { return word & 0x1F; } __device__ __host__ inline bool is_num(Numb word) { return get_typ(word) >= TY_U24 && get_typ(word) <= TY_F24; } __device__ __host__ inline bool is_cast(Numb word) { return get_typ(word) == TY_SYM && get_sym(word) >= TY_U24 && get_sym(word) <= TY_F24; } // Cast a number to another type. // The semantics are meant to spiritually resemble rust's numeric casts: // - i24 <-> u24: is just reinterpretation of bits // - f24 -> i24, // f24 -> u24: casts to the "closest" integer representing this float, // saturating if out of range and 0 if NaN // - i24 -> f24, // u24 -> f24: casts to the "closest" float representing this integer. __device__ __host__ inline Numb cast(Numb a, Numb b) { if (get_sym(a) == TY_U24 && get_typ(b) == TY_U24) return b; if (get_sym(a) == TY_U24 && get_typ(b) == TY_I24) { // reinterpret bits i32 val = get_i24(b); return new_u24(*(u32*) &val); } if (get_sym(a) == TY_U24 && get_typ(b) == TY_F24) { f32 val = get_f24(b); if (isnan(val)) { return new_u24(0); } return new_u24((u32) clamp(val, 0.0, 16777215)); } if (get_sym(a) == TY_I24 && get_typ(b) == TY_U24) { // reinterpret bits u32 val = get_u24(b); return new_i24(*(i32*) &val); } if (get_sym(a) == TY_I24 && get_typ(b) == TY_I24) return b; if (get_sym(a) == TY_I24 && get_typ(b) == TY_F24) { f32 val = get_f24(b); if (isnan(val)) { return new_i24(0); } return new_i24((i32) clamp(val, -8388608.0, 8388607.0)); } if (get_sym(a) == TY_F24 && get_typ(b) == TY_U24) return new_f24((f32) get_u24(b)); if (get_sym(a) == TY_F24 && get_typ(b) == TY_I24) return new_f24((f32) get_i24(b)); if (get_sym(a) == TY_F24 && get_typ(b) == TY_F24) return b; return new_u24(0); } // Partial application __device__ __host__ inline Numb partial(Numb a, Numb b) { return (b & ~0x1F) | get_sym(a); } // Operate function __device__ __host__ inline Numb operate(Numb a, Numb b) { Tag at = get_typ(a); Tag bt = get_typ(b); if (at == TY_SYM && bt == TY_SYM) { return new_u24(0); } if (is_cast(a) && is_num(b)) { return cast(a, b); } if (is_cast(b) && is_num(a)) { return cast(b, a); } if (at == TY_SYM && bt != TY_SYM) { return partial(a, b); } if (at != TY_SYM && bt == TY_SYM) { return partial(b, a); } if (at >= OP_ADD && bt >= OP_ADD) { return new_u24(0); } if (at < OP_ADD && bt < OP_ADD) { return new_u24(0); } Tag op, ty; Numb swp; if (at >= OP_ADD) { op = at; ty = bt; } else { op = bt; ty = at; swp = a; a = b; b = swp; } switch (ty) { case TY_U24: { u32 av = get_u24(a); u32 bv = get_u24(b); switch (op) { case OP_ADD: return new_u24(av + bv); case OP_SUB: return new_u24(av - bv); case FP_SUB: return new_u24(bv - av); case OP_MUL: return new_u24(av * bv); case OP_DIV: return new_u24(av / bv); case FP_DIV: return new_u24(bv / av); case OP_REM: return new_u24(av % bv); case FP_REM: return new_u24(bv % av); case OP_EQ: return new_u24(av == bv); case OP_NEQ: return new_u24(av != bv); case OP_LT: return new_u24(av < bv); case OP_GT: return new_u24(av > bv); case OP_AND: return new_u24(av & bv); case OP_OR: return new_u24(av | bv); case OP_XOR: return new_u24(av ^ bv); case OP_SHL: return new_u24(av << (bv & 31)); case FP_SHL: return new_u24(bv << (av & 31)); case OP_SHR: return new_u24(av >> (bv & 31)); case FP_SHR: return new_u24(bv >> (av & 31)); default: return new_u24(0); } } case TY_I24: { i32 av = get_i24(a); i32 bv = get_i24(b); switch (op) { case OP_ADD: return new_i24(av + bv); case OP_SUB: return new_i24(av - bv); case FP_SUB: return new_i24(bv - av); case OP_MUL: return new_i24(av * bv); case OP_DIV: return new_i24(av / bv); case FP_DIV: return new_i24(bv / av); case OP_REM: return new_i24(av % bv); case FP_REM: return new_i24(bv % av); case OP_EQ: return new_u24(av == bv); case OP_NEQ: return new_u24(av != bv); case OP_LT: return new_u24(av < bv); case OP_GT: return new_u24(av > bv); case OP_AND: return new_i24(av & bv); case OP_OR: return new_i24(av | bv); case OP_XOR: return new_i24(av ^ bv); default: return new_i24(0); } } case TY_F24: { float av = get_f24(a); float bv = get_f24(b); switch (op) { case OP_ADD: return new_f24(av + bv); case OP_SUB: return new_f24(av - bv); case FP_SUB: return new_f24(bv - av); case OP_MUL: return new_f24(av * bv); case OP_DIV: return new_f24(av / bv); case FP_DIV: return new_f24(bv / av); case OP_REM: return new_f24(fmodf(av, bv)); case FP_REM: return new_f24(fmodf(bv, av)); case OP_EQ: return new_u24(av == bv); case OP_NEQ: return new_u24(av != bv); case OP_LT: return new_u24(av < bv); case OP_GT: return new_u24(av > bv); case OP_AND: return new_f24(atan2f(av, bv)); case OP_OR: return new_f24(logf(bv) / logf(av)); case OP_XOR: return new_f24(powf(av, bv)); case OP_SHL: return new_f24(sin(av + bv)); case OP_SHR: return new_f24(tan(av + bv)); default: return new_f24(0); } } default: return new_u24(0); } } // RBag // ---- __device__ RBag rbag_new() { RBag rbag; rbag.hi_end = 0; rbag.lo_end = 0; return rbag; } __device__ u32 rbag_len(RBag* rbag) { return rbag->hi_end + rbag->lo_end; } __device__ u32 rbag_has_highs(RBag* rbag) { return rbag->hi_end > 0; } __device__ void push_redex(TM* tm, Pair redex) { #ifdef DEBUG bool free_hi = tm->rbag.hi_end < RLEN; bool free_lo = tm->rbag.lo_end < RLEN; if (!free_hi || !free_lo) { debug("push_redex: limited resources, maybe corrupting memory\n"); } #endif Rule rule = get_pair_rule(redex); if (is_high_priority(rule)) { tm->rbag.hi_buf[tm->rbag.hi_end++] = redex; } else { tm->rbag.lo_buf[tm->rbag.lo_end++] = redex; } } __device__ Pair pop_redex(TM* tm) { if (tm->rbag.hi_end > 0) { return tm->rbag.hi_buf[(--tm->rbag.hi_end) % RLEN]; } else if (tm->rbag.lo_end > 0) { return tm->rbag.lo_buf[(--tm->rbag.lo_end) % RLEN]; } else { return 0; } } // TM // -- __device__ TM tmem_new() { TM tm; tm.rbag = rbag_new(); tm.nput = 1; tm.vput = 1; tm.mode = SEED; tm.itrs = 0; tm.leak = 0; return tm; } // Net // ---- __device__ Net vnet_new(GNet* gnet, void* smem, u32 turn) { Net net; net.l_node_dif = 0; net.l_vars_dif = 0; net.l_node_buf = smem == NULL ? net.l_node_buf : ((LNet*)smem)->node_buf; net.l_vars_buf = smem == NULL ? net.l_vars_buf : ((LNet*)smem)->vars_buf; net.g_rbag_use_A = turn % 2 == 0 ? &gnet->rbag_use_A : &gnet->rbag_use_B; net.g_rbag_use_B = turn % 2 == 0 ? &gnet->rbag_use_B : &gnet->rbag_use_A; net.g_rbag_buf_A = turn % 2 == 0 ? gnet->rbag_buf_A : gnet->rbag_buf_B; net.g_rbag_buf_B = turn % 2 == 0 ? gnet->rbag_buf_B : gnet->rbag_buf_A; net.g_node_buf = gnet->node_buf; net.g_vars_buf = gnet->vars_buf; net.g_node_put = &gnet->node_put[GID()]; net.g_vars_put = &gnet->vars_put[GID()]; return net; } // Stores a new node on global. __device__ inline void node_create(Net* net, u32 loc, Pair val) { Pair old; if (loc < L_NODE_LEN) { net->l_node_dif += 1; old = atomicExch(&net->l_node_buf[loc], val); } else { old = atomicExch(&net->g_node_buf[loc], val); } #ifdef DEBUG if (old != 0) printf("[%04x] ERR NODE_CREATE | %04x\n", GID(), loc); #endif } // Stores a var on global. __device__ inline void vars_create(Net* net, u32 var, Port val) { Port old; if (var < L_VARS_LEN) { net->l_vars_dif += 1; old = atomicExch(&net->l_vars_buf[var], val); } else { old = atomicExch(&net->g_vars_buf[var], val); } #ifdef DEBUG if (old != 0) printf("[%04x] ERR VARS_CREATE | %04x\n", GID(), var); #endif } // Reads a node from global. __device__ __host__ inline Pair node_load(Net* net, u32 loc) { Pair got; if (loc < L_NODE_LEN) { got = net->l_node_buf[loc]; } else { got = net->g_node_buf[loc]; } return got; } // Reads a var from global. __device__ __host__ inline Port vars_load(Net* net, u32 var) { Port got; if (var < L_VARS_LEN) { got = net->l_vars_buf[var]; } else { got = net->g_vars_buf[var]; } return got; } // Exchanges a node on global by a value. Returns old. __device__ inline Pair node_exchange(Net* net, u32 loc, Pair val) { Pair got = 0; if (loc < L_NODE_LEN) { got = atomicExch(&net->l_node_buf[loc], val); } else { got = atomicExch(&net->g_node_buf[loc], val); } #ifdef DEBUG if (got == 0) printf("[%04x] ERR NODE_EXCHANGE | %04x\n", GID(), loc); #endif return got; } // Exchanges a var on global by a value. Returns old. __device__ inline Port vars_exchange(Net* net, u32 var, Port val) { Port got = 0; if (var < L_VARS_LEN) { got = atomicExch(&net->l_vars_buf[var], val); } else { got = atomicExch(&net->g_vars_buf[var], val); } #ifdef DEBUG if (got == 0) printf("[%04x] ERR VARS_EXCHANGE | %04x\n", GID(), var); #endif return got; } // Takes a node. __device__ inline Pair node_take(Net* net, u32 loc) { Pair got = 0; if (loc < L_NODE_LEN) { net->l_node_dif -= 1; got = atomicExch(&net->l_node_buf[loc], 0); } else { got = atomicExch(&net->g_node_buf[loc], 0); } #ifdef DEBUG if (got == 0) printf("[%04x] ERR NODE_TAKE | %04x\n", GID(), loc); #endif return got; } // Takes a var. __device__ inline Port vars_take(Net* net, u32 var) { Port got = 0; if (var < L_VARS_LEN) { net->l_vars_dif -= 1; got = atomicExch(&net->l_vars_buf[var], 0); } else { got = atomicExch(&net->g_vars_buf[var], 0); } #ifdef DEBUG if (got == 0) printf("[%04x] ERR VARS_TAKE | %04x\n", GID(), var); #endif return got; } // Allocator // --------- template __device__ u32 g_alloc_1(Net* net, u32* g_put, A* g_buf) { u32 lps = 0; while (true) { u32 lc = GID()*(G_NODE_LEN/TPG) + (*g_put%(G_NODE_LEN/TPG)); A elem = g_buf[lc]; *g_put += 1; if (lc >= L_NODE_LEN && elem == 0) { return lc; } if (++lps >= G_NODE_LEN/TPG) printf("OOM\n"); // FIXME: remove //assert(++lps < G_NODE_LEN/TPG); // FIXME: enable? } } template __device__ u32 g_alloc(Net* net, u32* ret, u32* g_put, A* g_buf, u32 num) { u32 got = 0; u32 lps = 0; while (got < num) { u32 lc = GID()*(G_NODE_LEN/TPG) + (*g_put%(G_NODE_LEN/TPG)); A elem = g_buf[lc]; *g_put += 1; if (lc >= L_NODE_LEN && elem == 0) { ret[got++] = lc; } if (++lps >= G_NODE_LEN/TPG) printf("OOM\n"); // FIXME: remove //assert(++lps < G_NODE_LEN/TPG); // FIXME: enable? } return got; } template __device__ u32 l_alloc(Net* net, u32* ret, u32* l_put, A* l_buf, u32 num) { u32 got = 0; u32 lps = 0; while (got < num) { u32 lc = ((*l_put)++ * TPB) % L_NODE_LEN + TID(); A elem = l_buf[lc]; if (++lps >= L_NODE_LEN/TPB) { break; } if (lc > 0 && elem == 0) { ret[got++] = lc; } } return got; } template __device__ u32 l_alloc_1(Net* net, u32* ret, u32* l_put, A* l_buf, u32* lps) { u32 got = 0; while (true) { u32 lc = ((*l_put)++ * TPB) % L_NODE_LEN + TID(); A elem = l_buf[lc]; if (++(*lps) >= L_NODE_LEN/TPB) { break; } if (lc > 0 && elem == 0) { return lc; } } return got; } __device__ u32 g_node_alloc_1(Net* net) { return g_alloc_1(net, net->g_node_put, net->g_node_buf); } __device__ u32 g_vars_alloc_1(Net* net) { return g_alloc_1(net, net->g_vars_put, net->g_vars_buf); } __device__ u32 g_node_alloc(Net* net, TM* tm, u32 num) { return g_alloc(net, tm->nloc, net->g_node_put, net->g_node_buf, num); } __device__ u32 g_vars_alloc(Net* net, TM* tm, u32 num) { return g_alloc(net, tm->vloc, net->g_vars_put, net->g_vars_buf, num); } __device__ u32 l_node_alloc(Net* net, TM* tm, u32 num) { return l_alloc(net, tm->nloc, &tm->nput, net->l_node_buf, num); } __device__ u32 l_vars_alloc(Net* net, TM* tm, u32 num) { return l_alloc(net, tm->vloc, &tm->vput, net->l_vars_buf, num); } __device__ u32 l_node_alloc_1(Net* net, TM* tm, u32* lps) { return l_alloc_1(net, tm->nloc, &tm->nput, net->l_node_buf, lps); } __device__ u32 l_vars_alloc_1(Net* net, TM* tm, u32* lps) { return l_alloc_1(net, tm->vloc, &tm->vput, net->l_vars_buf, lps); } __device__ u32 node_alloc_1(Net* net, TM* tm, u32* lps) { if (tm->mode != WORK) { return g_node_alloc_1(net); } else { return l_node_alloc_1(net, tm, lps); } } __device__ u32 vars_alloc_1(Net* net, TM* tm, u32* lps) { if (tm->mode != WORK) { return g_vars_alloc_1(net); } else { return l_vars_alloc_1(net, tm, lps); } } // Linking // ------- // Finds a variable's value. __device__ inline Port peek(Net* net, Port var) { while (get_tag(var) == VAR) { Port val = vars_load(net, get_val(var)); if (val == NONE) break; if (val == 0) break; var = val; } return var; } // Finds a variable's value. __device__ inline Port enter(Net* net, Port var) { u32 lps = 0; Port init = var; // While `B` is VAR: extend it (as an optimization) while (get_tag(var) == VAR) { // Takes the current `var` substitution as `val` Port val = vars_exchange(net, get_val(var), NONE); // If there was no `val`, stop, as there is no extension if (val == NONE) { break; } // Sanity check: if global A is unfilled, stop if (val == 0) { break; } // Otherwise, delete `B` (we own both) and continue vars_take(net, get_val(var)); //if (++lps > 65536) printf("[%04x] BUG A | init=%s var=%s val=%s\n", GID(), show_port(init).x, show_port(var).x, show_port(val).x); var = val; } return var; } // Atomically Links `A ~ B`. __device__ void link(Net* net, TM* tm, Port A, Port B) { Port INI_A = A; Port INI_B = B; u32 lps = 0; // Attempts to directionally point `A ~> B` while (true) { // If `A` is NODE: swap `A` and `B`, and continue if (get_tag(A) != VAR && get_tag(B) == VAR) { Port X = A; A = B; B = X; } // If `A` is NODE: create the `A ~ B` redex if (get_tag(A) != VAR) { //printf("[%04x] new redex A %s ~ %s\n", GID(), show_port(A).x, show_port(B).x); push_redex(tm, new_pair(A, B)); // TODO: move global ports to local break; } // While `B` is VAR: extend it (as an optimization) B = enter(net, B); // Since `A` is VAR: point `A ~> B`. if (true) { // If B would leak... if (is_global(A) && is_local(B)) { // If B is a var, just swap it if (is_var(B)) { Port X = A; A = B; B = X; continue; } // If B is a nod, create a leak interaction if (is_nod(B)) { //if (!TID()) printf("[%04x] NODE LEAK %s ~ %s\n", GID(), show_port(A).x, show_port(B).x); push_redex(tm, new_pair(A, B)); break; } } // Sanity check: if global A is unfilled, delay this link if (is_global(A) && vars_load(net, get_val(A)) == 0) { push_redex(tm, new_pair(A, B)); break; } // Stores `A -> B`, taking the current `A` subst as `A'` Port A_ = vars_exchange(net, get_val(A), B); // If there was no `A'`, stop, as we lost B's ownership if (A_ == NONE) { break; } #ifdef DEBUG if (A_ == 0) printf("[%04x] ERR LINK %s ~ %s | %s ~ %s\n", GID(), show_port(INI_A).x, show_port(INI_B).x, show_port(A).x, show_port(B).x); #endif // Otherwise, delete `A` (we own both) and link `A' ~ B` vars_take(net, get_val(A)); A = A_; } } } // Links `A ~ B` (as a pair). __device__ void link_pair(Net* net, TM* tm, Pair AB) { link(net, tm, get_fst(AB), get_snd(AB)); } // Resources // --------- // Gets the necessary resources for an interaction. __device__ bool get_resources(Net* net, TM* tm, u32 need_rbag, u32 need_node, u32 need_vars) { u32 got_rbag = min(RLEN - tm->rbag.lo_end, RLEN - tm->rbag.hi_end); u32 got_node; u32 got_vars; if (tm->mode != WORK) { debug("allocating need_rbag=%u need_node=%u need_vars=%u\n", need_rbag, need_node, need_vars); got_node = g_node_alloc(net, tm, need_node); got_vars = g_vars_alloc(net, tm, need_vars); } else { got_node = l_node_alloc(net, tm, need_node); got_vars = l_vars_alloc(net, tm, need_vars); } return got_rbag >= need_rbag && got_node >= need_node && got_vars >= need_vars; } // Interactions // ------------ // The Link Interaction. __device__ bool interact_link(Net* net, TM* tm, Port a, Port b) { // If A is a global var and B is a local node, leak it: // ^A ~ (b1 b2) // ------------- LEAK-NODE // ^X ~ b1 // ^Y ~ b2 // ^A ~ ^(^X ^Y) if (is_global(a) && is_nod(b) && is_local(b)) { // Allocates needed nodes and vars. if (!get_resources(net, tm, 3, 0, 0)) { return false; } tm->leak += 1; // Loads ports. Pair l_b = node_take(net, get_val(b)); Port l_b1 = enter(net, get_fst(l_b)); Port l_b2 = enter(net, get_snd(l_b)); // Leaks port 1. Port g_b1; if (is_local(l_b1)) { g_b1 = new_port(VAR, g_vars_alloc_1(net)); vars_create(net, get_val(g_b1), NONE); link_pair(net, tm, new_pair(g_b1, l_b1)); } else { g_b1 = l_b1; } // Leaks port 2. Port g_b2; if (is_local(l_b2)) { g_b2 = new_port(VAR, g_vars_alloc_1(net)); vars_create(net, get_val(g_b2), NONE); link_pair(net, tm, new_pair(g_b2, l_b2)); } else { g_b2 = l_b2; } // Leaks node. Port g_b = new_port(get_tag(b), g_node_alloc_1(net)); node_create(net, get_val(g_b), new_pair(g_b1, g_b2)); link_pair(net, tm, new_pair(a, g_b)); return true; // Otherwise, just perform a normal link. } else { // Allocates needed nodes and vars. if (!get_resources(net, tm, 1, 0, 0)) { return false; } link_pair(net, tm, new_pair(a, b)); } return true; } // Declared here for use in call interactions. static inline bool interact_eras(Net* net, TM* tm, Port a, Port b); // The Call Interaction. #ifdef COMPILED ///COMPILED_INTERACT_CALL/// #else __device__ bool interact_eras(Net* net, TM* tm, Port a, Port b); __device__ bool interact_call(Net* net, TM* tm, Port a, Port b) { // Loads Definition. u32 fid = get_val(a) & 0xFFFFFFF; Def* def = &BOOK.defs_buf[fid]; // Copy Optimization. if (def->safe && get_tag(b) == DUP) { return interact_eras(net, tm, a, b); } // Allocates needed nodes and vars. if (!get_resources(net, tm, def->rbag_len + 1, def->node_len, def->vars_len)) { return false; } // Stores new vars. for (u32 i = 0; i < def->vars_len; ++i) { vars_create(net, tm->vloc[i], NONE); } // Stores new nodes. for (u32 i = 0; i < def->node_len; ++i) { node_create(net, tm->nloc[i], adjust_pair(net, tm, def->node_buf[i])); } // Links. for (u32 i = 0; i < def->rbag_len; ++i) { link_pair(net, tm, adjust_pair(net, tm, def->rbag_buf[i])); } link_pair(net, tm, new_pair(adjust_port(net, tm, def->root), b)); return true; } #endif // The Void Interaction. __device__ bool interact_void(Net* net, TM* tm, Port a, Port b) { return true; } // The Eras Interaction. __device__ bool interact_eras(Net* net, TM* tm, Port a, Port b) { // Allocates needed nodes and vars. if (!get_resources(net, tm, 2, 0, 0)) { return false; } // Loads ports. Pair B = node_take(net, get_val(b)); Port B1 = get_fst(B); Port B2 = get_snd(B); // Links. link_pair(net, tm, new_pair(a, B1)); link_pair(net, tm, new_pair(a, B2)); return true; } // The Anni Interaction. __device__ bool interact_anni(Net* net, TM* tm, Port a, Port b) { // Allocates needed nodes and vars. if (!get_resources(net, tm, 2, 0, 0)) { return false; } // Loads ports. Pair A = node_take(net, get_val(a)); Port A1 = get_fst(A); Port A2 = get_snd(A); Pair B = node_take(net, get_val(b)); Port B1 = get_fst(B); Port B2 = get_snd(B); // Links. link_pair(net, tm, new_pair(A1, B1)); link_pair(net, tm, new_pair(A2, B2)); return true; } // The Comm Interaction. __device__ bool interact_comm(Net* net, TM* tm, Port a, Port b) { // Allocates needed nodes and vars. if (!get_resources(net, tm, 4, 4, 4)) { return false; } // Loads ports. Pair A = node_take(net, get_val(a)); Port A1 = get_fst(A); Port A2 = get_snd(A); Pair B = node_take(net, get_val(b)); Port B1 = get_fst(B); Port B2 = get_snd(B); // Stores new vars. vars_create(net, tm->vloc[0], NONE); vars_create(net, tm->vloc[1], NONE); vars_create(net, tm->vloc[2], NONE); vars_create(net, tm->vloc[3], NONE); // Stores new nodes. node_create(net, tm->nloc[0], new_pair(new_port(VAR, tm->vloc[0]), new_port(VAR, tm->vloc[1]))); node_create(net, tm->nloc[1], new_pair(new_port(VAR, tm->vloc[2]), new_port(VAR, tm->vloc[3]))); node_create(net, tm->nloc[2], new_pair(new_port(VAR, tm->vloc[0]), new_port(VAR, tm->vloc[2]))); node_create(net, tm->nloc[3], new_pair(new_port(VAR, tm->vloc[1]), new_port(VAR, tm->vloc[3]))); // Links. link_pair(net, tm, new_pair(new_port(get_tag(b), tm->nloc[0]), A1)); link_pair(net, tm, new_pair(new_port(get_tag(b), tm->nloc[1]), A2)); link_pair(net, tm, new_pair(new_port(get_tag(a), tm->nloc[2]), B1)); link_pair(net, tm, new_pair(new_port(get_tag(a), tm->nloc[3]), B2)); return true; } // The Oper Interaction. __device__ bool interact_oper(Net* net, TM* tm, Port a, Port b) { // Allocates needed nodes and vars. if (!get_resources(net, tm, 1, 1, 0)) { return false; } // Loads ports. Val av = get_val(a); Pair B = node_take(net, get_val(b)); Port B1 = get_fst(B); Port B2 = enter(net, get_snd(B)); // Performs operation. if (get_tag(B1) == NUM) { Val bv = get_val(B1); Numb cv = operate(av, bv); link_pair(net, tm, new_pair(new_port(NUM, cv), B2)); } else { node_create(net, tm->nloc[0], new_pair(a, B2)); link_pair(net, tm, new_pair(B1, new_port(OPR, tm->nloc[0]))); } return true; } // The Swit Interaction. __device__ bool interact_swit(Net* net, TM* tm, Port a, Port b) { // Allocates needed nodes and vars. if (!get_resources(net, tm, 1, 2, 0)) { return false; } // Loads ports. u32 av = get_u24(get_val(a)); Pair B = node_take(net, get_val(b)); Port B1 = get_fst(B); Port B2 = get_snd(B); // Stores new nodes. if (av == 0) { node_create(net, tm->nloc[0], new_pair(B2, new_port(ERA,0))); link_pair(net, tm, new_pair(new_port(CON, tm->nloc[0]), B1)); } else { node_create(net, tm->nloc[0], new_pair(new_port(ERA,0), new_port(CON, tm->nloc[1]))); node_create(net, tm->nloc[1], new_pair(new_port(NUM, new_u24(av-1)), B2)); link_pair(net, tm, new_pair(new_port(CON, tm->nloc[0]), B1)); } return true; } // Pops a local redex and performs a single interaction. __device__ bool interact(Net* net, TM* tm, Pair redex, u32 turn) { // Gets redex ports A and B. Port a = get_fst(redex); Port b = get_snd(redex); // Gets the rule type. Rule rule = get_rule(a, b); // If there is no redex, stop. if (redex != 0) { //if (GID() == 0 && turn == 0x201) { //Pair kn = get_tag(b) == CON ? node_load(net, get_val(b)) : 0; //printf("%04x:[%04x] REDUCE %s ~ %s | par? %d | (%s %s)\n", //turn, GID(), //show_port(get_fst(redex)).x, //show_port(get_snd(redex)).x, //get_par_flag(redex), //show_port(get_fst(kn)).x, //show_port(get_snd(kn)).x); //} // Used for root redex. if (get_tag(a) == REF && b == ROOT) { rule = CALL; // Swaps ports if necessary. } else if (should_swap(a,b)) { swap(&a, &b); } // Dispatches interaction rule. bool success; switch (rule) { case LINK: success = interact_link(net, tm, a, b); break; case CALL: success = interact_call(net, tm, a, b); break; case VOID: success = interact_void(net, tm, a, b); break; case ERAS: success = interact_eras(net, tm, a, b); break; case ANNI: success = interact_anni(net, tm, a, b); break; case COMM: success = interact_comm(net, tm, a, b); break; case OPER: success = interact_oper(net, tm, a, b); break; case SWIT: success = interact_swit(net, tm, a, b); break; } // If error, pushes redex back. if (!success) { push_redex(tm, redex); return false; // Else, increments the interaction count. } else if (rule != LINK) { tm->itrs += 1; } } return true; } // RBag Save/Load // -------------- // Moves redexes from shared memory to global bag __device__ void save_redexes(Net* net, TM *tm, u32 turn) { u32 idx = 0; u32 bag = tm->mode == SEED ? transpose(GID(), TPB, BPG) : GID(); // Leaks low-priority redexes for (u32 i = 0; i < tm->rbag.lo_end; ++i) { Pair R = tm->rbag.lo_buf[i % RLEN]; Port x = get_fst(R); Port y = get_snd(R); Port X = new_port(VAR, g_vars_alloc_1(net)); Port Y = new_port(VAR, g_vars_alloc_1(net)); vars_create(net, get_val(X), NONE); vars_create(net, get_val(Y), NONE); link_pair(net, tm, new_pair(X, x)); link_pair(net, tm, new_pair(Y, y)); net->g_rbag_buf_B[bag * RLEN + (idx++)] = new_pair(X, Y); } __syncthreads(); tm->rbag.lo_end = 0; // Executes all high-priority redexes while (rbag_has_highs(&tm->rbag)) { Pair redex = pop_redex(tm); if (!interact(net, tm, redex, turn)) { printf("ERROR: failed to clear high-priority redexes"); } } __syncthreads(); #ifdef DEBUG if (rbag_len(&tm->rbag) > 0) printf("[%04x] ERR SAVE_REDEXES lo=%d hi=%d tot=%d\n", GID(), tm->rbag.lo_end, tm->rbag.hi_end, rbag_len(&tm->rbag)); #endif // Updates global redex counter atomicAdd(net->g_rbag_use_B, idx); } // Loads redexes from global bag to shared memory // FIXME: check if we have enuogh space for all loads __device__ void load_redexes(Net* net, TM *tm, u32 turn) { u32 gid = BID() * TPB + TID(); u32 bag = tm->mode == SEED ? transpose(GID(), TPB, BPG) : GID(); for (u32 i = 0; i < RLEN; ++i) { Pair redex = atomicExch(&net->g_rbag_buf_A[bag * RLEN + i], 0); if (redex != 0) { Port a = enter(net, get_fst(redex)); Port b = enter(net, get_snd(redex)); #ifdef DEBUG if (is_local(a) || is_local(b)) printf("[%04x] ERR LOAD_REDEXES\n", turn); #endif push_redex(tm, new_pair(a, b)); } else { break; } } __syncthreads(); } // Kernels // ------- // Sets the initial redex. __global__ void boot_redex(GNet* gnet, Pair redex) { // Creates root variable. gnet->vars_buf[get_val(ROOT)] = NONE; // Creates root redex. if (gnet->turn % 2 == 0) { gnet->rbag_buf_A[0] = redex; } else { gnet->rbag_buf_B[0] = redex; } } // Creates a node. __global__ void make_node(GNet* gnet, Tag tag, Port fst, Port snd, Port* ret) { if (GID() == 0) { Net net = vnet_new(gnet, NULL, gnet->turn); u32 loc = g_node_alloc_1(&net); node_create(&net, loc, new_pair(fst, snd)); *ret = new_port(tag, loc); } } __global__ void inbetween(GNet* gnet) { // Clears rbag use counter if (gnet->turn % 2 == 0) { gnet->rbag_use_A = 0; } else { gnet->rbag_use_B = 0; } // Increments gnet turn gnet->turn += 1; // Increments interaction counter gnet->itrs += gnet->iadd; // Resets the rdec variable gnet->rdec = 0; // Moves to next mode if (!gnet->down) { gnet->mode = min(gnet->mode + 1, WORK); } // If no work was done... if (gnet->iadd == 0) { // If on seed mode, go up to GROW mode if (gnet->mode == SEED) { gnet->mode = GROW; gnet->down = 0; // Otherwise, go down to SEED mode } else { gnet->mode = SEED; gnet->down = 1; gnet->rdec = 1; // peel one rpos } //printf(">> CHANGE MODE TO %d | %d <<\n", gnet->mode, gnet->down); } // Reset interaction adder gnet->iadd = 0; } // EVAL __global__ void evaluator(GNet* gnet) { extern __shared__ char shared_mem[]; // 96 KB __shared__ Pair spawn[TPB]; // thread initialized // Thread Memory TM tm = tmem_new(); // Net (Local-Global View) Net net = vnet_new(gnet, shared_mem, gnet->turn); // Clears shared memory for (u32 i = 0; i < L_NODE_LEN / TPB; ++i) { net.l_node_buf[i * TPB + TID()] = 0; net.l_vars_buf[i * TPB + TID()] = 0; } __syncthreads(); // Sets mode tm.mode = gnet->mode; // Loads Redexes load_redexes(&net, &tm, gnet->turn); // Clears spawn buffer spawn[TID()] = rbag_len(&tm.rbag) > 0 ? 0xFFFFFFFFFFFFFFFF : 0; __syncthreads(); // Variables u64 INIT = clock64(); // initial time u32 HASR = block_count(rbag_len(&tm.rbag) > 0); u32 tick = 0; u32 bag = tm.mode == SEED ? transpose(GID(), TPB, BPG) : GID(); u32 rpos = gnet->rbag_pos[bag] > 0 ? gnet->rbag_pos[bag] - gnet->rdec : gnet->rbag_pos[bag]; u8 down = gnet->down; //if (BID() == 0 && gnet->turn == 0x69) { //printf("[%04x] ini rpos is %d | bag=%d\n", GID(), rpos, bag); //} // Aborts if empty if (HASR == 0) { return; } //if (BID() == 0 && rbag_len(&tm.rbag) > 0) { //Pair redex = pop_redex(&tm); //Pair kn = get_tag(get_snd(redex)) == CON ? node_load(&net, get_val(get_snd(redex))) : 0; //printf("[%04x] HAS REDEX %s ~ %s | par? %d | (%s %s)\n", //GID(), //show_port(get_fst(redex)).x, //show_port(get_snd(redex)).x, //get_par_flag(redex), //show_port(get_fst(kn)).x, //show_port(get_snd(kn)).x); //push_redex(&tm, redex); //} //// Display debug rbag //if (GID() == 0) { //print_rbag(&net, &tm); //printf("| rbag_pos = %d | mode = %d | down = %d | turn = %04x\n", gnet->rbag_pos[bag], gnet->mode, down, gnet->turn); //} //__syncthreads(); // GROW MODE // --------- if (tm.mode == SEED || tm.mode == GROW) { u32 tlim = tm.mode == SEED ? min(TPB_L2,BPG_L2) : max(TPB_L2,BPG_L2); u32 span = 1 << (32 - __clz(TID())); Pair redex; for (u32 tick = 0; tick < tlim; ++tick) { u32 span = 1 << tick; u32 targ = TID() ^ span; // Attempts to spawn a thread if (TID() < span && spawn[targ] == 0) { //if (BID() == 0) { //if (!TID()) printf("----------------------------------------------------\n"); //if (!TID()) printf("TIC %04x | span=%d | rlen=%d | ", tick, span, rbag_len(&tm.rbag)); //block_print(rbag_len(&tm.rbag)); //if (!TID()) printf("\n"); //__syncthreads(); //} // Performs some interactions until a parallel redex is found for (u32 i = 0; i < 64; ++i) { if (tm.rbag.lo_end < rpos) break; redex = pop_redex(&tm); if (redex == 0) { break; } // If we found a stealable redex, pass it to stealing, // and un-mark the redex above it, so we keep it for us. if (get_par_flag(redex)) { Pair above = pop_redex(&tm); if (above != 0) { push_redex(&tm, clr_par_flag(above)); } break; } interact(&net, &tm, redex, gnet->turn); redex = 0; while (tm.rbag.hi_end > 0) { if (!interact(&net, &tm, pop_redex(&tm), gnet->turn)) break; } } // Spawn a thread if (redex != 0 && get_par_flag(redex)) { //if (BID() == 0) { //Pair kn = get_tag(get_snd(redex)) == CON ? node_load(&net, get_val(get_snd(redex))) : 0; //printf("[%04x] GIVE %s ~ %s | par? %d | (%s %s) | rbag.lo_end=%d\n", GID(), show_port(get_fst(redex)).x, show_port(get_snd(redex)).x, get_par_flag(redex), show_port(peek(&net, &tm, get_fst(kn))).x, show_port(peek(&net, &tm, get_snd(kn))).x, tm.rbag.lo_end); //} spawn[targ] = clr_par_flag(redex); if (!down) { rpos = tm.rbag.lo_end - 1; } } } __syncthreads(); // If we've been spawned, push initial redex if (TID() >= span && TID() < span*2 && spawn[TID()] != 0 && spawn[TID()] != 0xFFFFFFFFFFFFFFFF) { //if (rbag_len(&tm.rbag) > 0) { //printf("[%04x] ERROR: SPAWNED BUT HAVE REDEX\n", GID()); //} push_redex(&tm, atomicExch(&spawn[TID()], 0xFFFFFFFFFFFFFFFF)); rpos = 0; //if (BID() == 0) printf("[%04x] TAKE %016llx\n", GID(), spawn[TID()]); } __syncthreads(); //if (BID() == 0) { //if (!TID()) printf("TAC %04x | span=%d | rlen=%d | ", tick, span, rbag_len(&tm.rbag)); //block_print(rbag_len(&tm.rbag)); //if (!TID()) printf("\n"); //__syncthreads(); //} //__syncthreads(); //printf("[%04x] span is %d\n", TID(), span); //__syncthreads(); } //if (BID() == 0 && gnet->turn == 0x69) { //printf("[%04x] end rpos is %d | bag=%d\n", GID(), rpos, bag); //} gnet->rbag_pos[bag] = rpos; } // WORK MODE // --------- if (tm.mode == WORK) { u32 chkt = 0; u32 chka = 1; u32 bag = tm.mode == SEED ? transpose(GID(), TPB, BPG) : GID(); u32 rpos = gnet->rbag_pos[bag]; for (tick = 0; tick < 1 << 9; ++tick) { if (tm.rbag.lo_end > rpos || rbag_has_highs(&tm.rbag)) { if (interact(&net, &tm, pop_redex(&tm), gnet->turn)) { while (rbag_has_highs(&tm.rbag)) { if (!interact(&net, &tm, pop_redex(&tm), gnet->turn)) break; } } } __syncthreads(); } } __syncthreads(); //u32 ITRS = block_sum(tm.itrs); //u32 LOOP = block_sum((u32)tick); //u32 RLEN = block_sum(rbag_len(&tm.rbag)); //u32 FAIL = 0; // block_sum((u32)fail); //f64 TIME = (f64)(clock64() - INIT) / (f64)S; //f64 MIPS = (f64)ITRS / TIME / (f64)1000000.0; ////if (BID() >= 0 && TID() == 0) { //if (TID() == 0) { //printf("%04x:[%02x]: MODE=%d DOWN=%d ITRS=%d LOOP=%d RLEN=%d FAIL=%d TIME=%f MIPS=%.0f | %d\n", //gnet->turn, BID(), tm.mode, down, ITRS, LOOP, RLEN, FAIL, TIME, MIPS, 42); //} //__syncthreads(); // Display debug rbag //if (BID() == 0) { //for (u32 i = 0; i < TPB; ++i) { //if (TID() == i && rbag_len(&tm.rbag) > 0) print_rbag(&net, &tm); //__syncthreads(); //} //__syncthreads(); //} // Moves rbag to global save_redexes(&net, &tm, gnet->turn); // Stores rewrites atomicAdd(&gnet->iadd, tm.itrs); atomicAdd(&gnet->leak, tm.leak); } // GNet Host Functions // ------------------- // Initializes the GNet __global__ void initialize(GNet* gnet) { gnet->node_put[GID()] = 0; gnet->vars_put[GID()] = 0; gnet->rbag_pos[GID()] = 0; for (u32 i = 0; i < RLEN; ++i) { gnet->rbag_buf_A[G_RBAG_LEN / TPG * GID() + i] = 0; } for (u32 i = 0; i < RLEN; ++i) { gnet->rbag_buf_B[G_RBAG_LEN / TPG * GID() + i] = 0; } } GNet* gnet_create() { GNet *gnet; cudaMalloc((void**)&gnet, sizeof(GNet)); initialize<<>>(gnet); //cudaMemset(gnet, 0, sizeof(GNet)); return gnet; } u32 gnet_get_rlen(GNet* gnet, u32 turn) { u32 rbag_use; if (turn % 2 == 0) { cudaMemcpy(&rbag_use, &gnet->rbag_use_B, sizeof(u32), cudaMemcpyDeviceToHost); } else { cudaMemcpy(&rbag_use, &gnet->rbag_use_A, sizeof(u32), cudaMemcpyDeviceToHost); } return rbag_use; } u64 gnet_get_itrs(GNet* gnet) { u64 itrs; cudaMemcpy(&itrs, &gnet->itrs, sizeof(u64), cudaMemcpyDeviceToHost); return itrs; } u64 gnet_get_leak(GNet* gnet) { u64 leak; cudaMemcpy(&leak, &gnet->leak, sizeof(u64), cudaMemcpyDeviceToHost); return leak; } void gnet_boot_redex(GNet* gnet, Pair redex) { boot_redex<<>>(gnet, redex); } void gnet_normalize(GNet* gnet) { // Invokes the Evaluator Kernel repeatedly u32 turn; u64 itrs = 0; u32 rlen = 0; // NORM for (turn = 0; turn < 0xFFFFFFFF; ++turn) { //printf("\e[1;1H\e[2J"); //printf("==================================================== "); //printf("TURN: %04x | RLEN: %04x | ITRS: %012llu\n", turn, rlen, itrs); //cudaDeviceSynchronize(); evaluator<<>>(gnet); inbetween<<<1, 1>>>(gnet); //cudaDeviceSynchronize(); //count_memory<<>>(gnet); //cudaDeviceSynchronize(); //print_heatmap<<<1,1>>>(gnet, turn+1); //cudaDeviceSynchronize(); itrs = gnet_get_itrs(gnet); rlen = gnet_get_rlen(gnet, turn); if (rlen == 0) { //printf("Completed after %d kernel launches!\n", turn); break; } } } // Reads a device node to host Pair gnet_node_load(GNet* gnet, u32 loc) { Pair pair; cudaMemcpy(&pair, &gnet->node_buf[loc], sizeof(Pair), cudaMemcpyDeviceToHost); return pair; } // Reads a device var to host Port gnet_vars_load(GNet* gnet, u32 loc) { Pair port; cudaMemcpy(&port, &gnet->vars_buf[loc], sizeof(Port), cudaMemcpyDeviceToHost); return port; } // Writes a host var to device void gnet_vars_create(GNet* gnet, u32 var, Port val) { cudaMemcpy(&gnet->vars_buf[var], &val, sizeof(Port), cudaMemcpyHostToDevice); } // Like the enter() function, but from host and read-only Port gnet_peek(GNet* gnet, Port port) { while (get_tag(port) == VAR) { Port val = gnet_vars_load(gnet, get_val(port)); if (val == NONE) break; port = val; } return port; } // Expands a REF Port. Port gnet_expand(GNet* gnet, Port port) { Port old = gnet_vars_load(gnet, get_val(ROOT)); Port got = gnet_peek(gnet, port); //printf("expand %s\n", show_port(got).x); while (get_tag(got) == REF) { gnet_boot_redex(gnet, new_pair(got, ROOT)); gnet_normalize(gnet); got = gnet_peek(gnet, gnet_vars_load(gnet, get_val(ROOT))); } gnet_vars_create(gnet, get_val(ROOT), old); return got; } // Allocs and creates a node, returning its port. Port gnet_make_node(GNet* gnet, Tag tag, Port fst, Port snd) { Port ret; Port* d_ret; cudaMalloc(&d_ret, sizeof(Port)); make_node<<<1,1>>>(gnet, tag, fst, snd, d_ret); cudaMemcpy(&ret, d_ret, sizeof(Port), cudaMemcpyDeviceToHost); cudaFree(d_ret); return ret; } // Book Loader // ----------- bool book_load(Book* book, u32* buf) { // Reads defs_len book->defs_len = *buf++; // Parses each def for (u32 i = 0; i < book->defs_len; ++i) { // Reads fid u32 fid = *buf++; // Gets def Def* def = &book->defs_buf[fid]; // Reads name memcpy(def->name, buf, 256); buf += 64; // Reads safe flag def->safe = *buf++; // Reads lengths def->rbag_len = *buf++; def->node_len = *buf++; def->vars_len = *buf++; if (def->rbag_len > L_NODE_LEN/TPB) { fprintf(stderr, "def '%s' has too many redexes: %u\n", def->name, def->rbag_len); return false; } if (def->node_len > L_NODE_LEN/TPB) { fprintf(stderr, "def '%s' has too many nodes: %u\n", def->name, def->node_len); return false; } // Reads root def->root = *buf++; // Reads rbag_buf memcpy(def->rbag_buf, buf, 8*def->rbag_len); buf += def->rbag_len * 2; // Reads node_buf memcpy(def->node_buf, buf, 8*def->node_len); buf += def->node_len * 2; } return true; } // Debug Printing // -------------- __device__ __host__ void put_u32(char* B, u32 val) { for (int i = 0; i < 8; i++, val >>= 4) { B[8-i-1] = "0123456789ABCDEF"[val & 0xF]; } } __device__ __host__ Show show_port(Port port) { // NOTE: this is done like that because sprintf seems not to be working Show s; switch (get_tag(port)) { case VAR: memcpy(s.x, "VAR:", 4); put_u32(s.x+4, get_val(port)); break; case REF: memcpy(s.x, "REF:", 4); put_u32(s.x+4, get_val(port)); break; case ERA: memcpy(s.x, "ERA:________", 12); break; case NUM: memcpy(s.x, "NUM:", 4); put_u32(s.x+4, get_val(port)); break; case CON: memcpy(s.x, "CON:", 4); put_u32(s.x+4, get_val(port)); break; case DUP: memcpy(s.x, "DUP:", 4); put_u32(s.x+4, get_val(port)); break; case OPR: memcpy(s.x, "OPR:", 4); put_u32(s.x+4, get_val(port)); break; case SWI: memcpy(s.x, "SWI:", 4); put_u32(s.x+4, get_val(port)); break; } s.x[12] = '\0'; return s; } __device__ Show show_rule(Rule rule) { Show s; switch (rule) { case LINK: memcpy(s.x, "LINK", 4); break; case VOID: memcpy(s.x, "VOID", 4); break; case ERAS: memcpy(s.x, "ERAS", 4); break; case ANNI: memcpy(s.x, "ANNI", 4); break; case COMM: memcpy(s.x, "COMM", 4); break; case OPER: memcpy(s.x, "OPER", 4); break; case SWIT: memcpy(s.x, "SWIT", 4); break; case CALL: memcpy(s.x, "CALL", 4); break; default : memcpy(s.x, "????", 4); break; } s.x[4] = '\0'; return s; } __device__ void print_rbag(Net* net, TM* tm) { printf("RBAG | FST-TREE | SND-TREE \n"); printf("---- | ------------ | ------------\n"); for (u32 i = 0; i < tm->rbag.hi_end; ++i) { Pair redex = tm->rbag.hi_buf[i]; Pair node1 = get_tag(get_snd(redex)) == CON ? node_load(net, get_val(get_fst(redex))) : 0; Pair node2 = get_tag(get_snd(redex)) == CON ? node_load(net, get_val(get_snd(redex))) : 0; printf("%04X | %s | %s | hi | (%s %s) ~ (%s %s)\n", i, show_port(get_fst(redex)).x, show_port(get_snd(redex)).x, show_port(peek(net, get_fst(node1))).x, show_port(peek(net, get_snd(node1))).x, show_port(peek(net, get_fst(node2))).x, show_port(peek(net, get_snd(node2))).x); } for (u32 i = 0; i < tm->rbag.lo_end; ++i) { Pair redex = tm->rbag.lo_buf[i%RLEN]; Pair node1 = get_tag(get_snd(redex)) == CON ? node_load(net, get_val(get_fst(redex))) : 0; Pair node2 = get_tag(get_snd(redex)) == CON ? node_load(net, get_val(get_snd(redex))) : 0; printf("%04X | %s | %s | hi | (%s %s) ~ (%s %s)\n", i, show_port(get_fst(redex)).x, show_port(get_snd(redex)).x, show_port(peek(net, get_fst(node1))).x, show_port(peek(net, get_snd(node1))).x, show_port(peek(net, get_fst(node2))).x, show_port(peek(net, get_snd(node2))).x); } printf("==== | ============ | ============\n"); } __device__ __host__ void print_net(Net* net, u32 ini, u32 end) { printf("NODE | PORT-1 | PORT-2 \n"); printf("---- | ------------ | ------------\n"); for (u32 i = ini; i < end; ++i) { Pair node = node_load(net, i); if (node != 0) { printf("%04X | %s | %s\n", i, show_port(get_fst(node)).x, show_port(get_snd(node)).x); } } printf("==== | ============ |\n"); printf("VARS | VALUE |\n"); printf("---- | ------------ |\n"); for (u32 i = ini; i < end; ++i) { Port var = vars_load(net,i); if (var != 0) { printf("%04X | %s |\n", i, show_port(vars_load(net,i)).x); } } printf("==== | ============ |\n"); } __device__ void pretty_print_numb(Numb word) { switch (get_typ(word)) { case TY_SYM: { switch (get_sym(word)) { // types case TY_U24: printf("[u24]"); break; case TY_I24: printf("[i24]"); break; case TY_F24: printf("[f24]"); break; // operations case OP_ADD: printf("[+]"); break; case OP_SUB: printf("[-]"); break; case FP_SUB: printf("[:-]"); break; case OP_MUL: printf("[*]"); break; case OP_DIV: printf("[/]"); break; case FP_DIV: printf("[:/]"); break; case OP_REM: printf("[%%]"); break; case FP_REM: printf("[:%%]"); break; case OP_EQ: printf("[=]"); break; case OP_NEQ: printf("[!]"); break; case OP_LT: printf("[<]"); break; case OP_GT: printf("[>]"); break; case OP_AND: printf("[&]"); break; case OP_OR: printf("[|]"); break; case OP_XOR: printf("[^]"); break; case OP_SHL: printf("[<<]"); break; case FP_SHL: printf("[:<<]"); break; case OP_SHR: printf("[>>]"); break; case FP_SHR: printf("[:>>]"); break; default: printf("[?]"); break; } break; } case TY_U24: { printf("%u", get_u24(word)); break; } case TY_I24: { printf("%+d", get_i24(word)); break; } case TY_F24: { if (isinf(get_f24(word))) { if (signbit(get_f24(word))) { printf("-inf"); } else { printf("+inf"); } } else if (isnan(get_f24(word))) { printf("+NaN"); } else { printf("%.7e", get_f24(word)); } break; } default: { switch (get_typ(word)) { case OP_ADD: printf("[+0x%07X]", get_u24(word)); break; case OP_SUB: printf("[-0x%07X]", get_u24(word)); break; case FP_SUB: printf("[:-0x%07X]", get_u24(word)); break; case OP_MUL: printf("[*0x%07X]", get_u24(word)); break; case OP_DIV: printf("[/0x%07X]", get_u24(word)); break; case FP_DIV: printf("[:/0x%07X]", get_u24(word)); break; case OP_REM: printf("[%%0x%07X]", get_u24(word)); break; case FP_REM: printf("[:%%0x%07X]", get_u24(word)); break; case OP_EQ: printf("[=0x%07X]", get_u24(word)); break; case OP_NEQ: printf("[!0x%07X]", get_u24(word)); break; case OP_LT: printf("[<0x%07X]", get_u24(word)); break; case OP_GT: printf("[>0x%07X]", get_u24(word)); break; case OP_AND: printf("[&0x%07X]", get_u24(word)); break; case OP_OR: printf("[|0x%07X]", get_u24(word)); break; case OP_XOR: printf("[^0x%07X]", get_u24(word)); break; case OP_SHL: printf("[<<0x%07X]", get_u24(word)); break; case FP_SHL: printf("[:<<0x%07X]", get_u24(word)); break; case OP_SHR: printf("[>>0x%07X]", get_u24(word)); break; case FP_SHR: printf("[:>>0x%07X]", get_u24(word)); break; default: printf("[?0x%07X]", get_u24(word)); break; } break; } } } __device__ void pretty_print_port(Net* net, Port port) { Port stack[4096]; stack[0] = port; u32 len = 1; while (len > 0) { if (len > 256) { printf("ERROR: result too deep to print. This will be fixed soon(TM)"); --len; continue; } Port cur = stack[--len]; switch (get_tag(cur)) { case CON: { Pair node = node_load(net,get_val(cur)); Port p2 = get_snd(node); Port p1 = get_fst(node); printf("("); stack[len++] = new_port(ERA, (u32)(')')); stack[len++] = p2; stack[len++] = new_port(ERA, (u32)(' ')); stack[len++] = p1; break; } case ERA: { if (get_val(cur) != 0) { printf("%c", (char)get_val(cur)); } else { printf("*"); } break; } case VAR: { Port got = vars_load(net, get_val(cur)); if (got != NONE) { stack[len++] = got; } else { printf("x%x", get_val(cur)); } break; } case NUM: { pretty_print_numb(get_val(cur)); break; } case DUP: { Pair node = node_load(net,get_val(cur)); Port p2 = get_snd(node); Port p1 = get_fst(node); printf("{"); stack[len++] = new_port(ERA, (u32)('}')); stack[len++] = p2; stack[len++] = new_port(ERA, (u32)(' ')); stack[len++] = p1; break; } case OPR: { Pair node = node_load(net,get_val(cur)); Port p2 = get_snd(node); Port p1 = get_fst(node); printf("$("); stack[len++] = new_port(ERA, (u32)(')')); stack[len++] = p2; stack[len++] = new_port(ERA, (u32)(' ')); stack[len++] = p1; break; } case SWI: { Pair node = node_load(net,get_val(cur)); Port p2 = get_snd(node); Port p1 = get_fst(node); printf("?("); stack[len++] = new_port(ERA, (u32)(')')); stack[len++] = p2; stack[len++] = new_port(ERA, (u32)(' ')); stack[len++] = p1; break; } case REF: { u32 fid = get_val(cur) & 0xFFFFFFF; Def* def = &BOOK.defs_buf[fid]; printf("@%s", def->name); break; } } } } __device__ void pretty_print_rbag(Net* net, RBag* rbag) { for (u32 i = 0; i < rbag->lo_end; ++i) { Pair redex = rbag->lo_buf[i%RLEN]; if (redex != 0) { pretty_print_port(net, get_fst(redex)); printf(" ~ "); pretty_print_port(net, get_snd(redex)); printf("\n"); } } for (u32 i = 0; i < rbag->hi_end; ++i) { Pair redex = rbag->hi_buf[i]; if (redex != 0) { pretty_print_port(net, get_fst(redex)); printf(" ~ "); pretty_print_port(net, get_snd(redex)); printf("\n"); } } } __device__ u32 NODE_COUNT; __device__ u32 VARS_COUNT; __global__ void count_memory(GNet* gnet) { u32 node_count = 0; u32 vars_count = 0; for (u32 i = GID(); i < G_NODE_LEN; i += TPG) { if (gnet->node_buf[i] != 0) ++node_count; if (gnet->vars_buf[i] != 0) ++vars_count; } __shared__ u32 block_node_count; __shared__ u32 block_vars_count; if (TID() == 0) block_node_count = 0; if (TID() == 0) block_vars_count = 0; __syncthreads(); atomicAdd(&block_node_count, node_count); atomicAdd(&block_vars_count, vars_count); __syncthreads(); if (TID() == 0) atomicAdd(&NODE_COUNT, block_node_count); if (TID() == 0) atomicAdd(&VARS_COUNT, block_vars_count); } __global__ void print_heatmap(GNet* gnet, u32 turn) { if (GID() > 0) return; const char* heatChars[] = { //" ", ".", ":", ":", //"∴", "⁘", "⁙", "░", //"░", "░", "▒", "▒", //"▒", "▓", "▓", "▓" " ", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", "F", }; for (u32 bid = 0; bid < BPG; bid++) { printf("|"); for (u32 tid = 0; tid < TPB; tid++) { u32 gid = bid * TPB + tid; u32 len = 0; for (u32 i = 0; i < RLEN; i++) { if ( turn % 2 == 0 && gnet->rbag_buf_A[gid * RLEN + i] != 0 || turn % 2 == 1 && gnet->rbag_buf_B[gid * RLEN + i] != 0) { len++; } } u32 pos = gnet->rbag_pos[gid]; u32 heat = min(len, 0xF); printf("%s", heatChars[heat]); } printf("|\n"); } } __global__ void print_result(GNet* gnet) { Net net = vnet_new(gnet, NULL, gnet->turn); if (threadIdx.x == 0 && blockIdx.x == 0) { printf("Result: "); pretty_print_port(&net, enter(&net, ROOT)); printf("\n"); } } // Demos // ----- // stress_test 2^10 x 65536 //static const u8 BOOK_BUF[] = {6, 0, 0, 0, 0, 0, 0, 0, 109, 97, 105, 110, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 4, 0, 0, 0, 11, 10, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 102, 117, 110, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 17, 0, 0, 0, 25, 0, 0, 0, 2, 0, 0, 0, 102, 117, 110, 95, 95, 67, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 33, 0, 0, 0, 4, 0, 0, 0, 11, 0, 0, 1, 0, 0, 0, 0, 3, 0, 0, 0, 102, 117, 110, 95, 95, 67, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 9, 0, 0, 128, 20, 0, 0, 0, 9, 0, 0, 128, 44, 0, 0, 0, 13, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 30, 0, 0, 0, 3, 4, 0, 0, 38, 0, 0, 0, 24, 0, 0, 0, 16, 0, 0, 0, 8, 0, 0, 0, 24, 0, 0, 0, 4, 0, 0, 0, 108, 111, 111, 112, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 41, 0, 0, 0, 5, 0, 0, 0, 108, 111, 111, 112, 95, 95, 67, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 33, 0, 0, 0, 0, 0, 0, 0}; // stress_test 2^18 x 65536 //static const u8 BOOK_BUF[] = {6, 0, 0, 0, 0, 0, 0, 0, 109, 97, 105, 110, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 4, 0, 0, 0, 11, 18, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 102, 117, 110, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 17, 0, 0, 0, 25, 0, 0, 0, 2, 0, 0, 0, 102, 117, 110, 95, 95, 67, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 33, 0, 0, 0, 4, 0, 0, 0, 11, 0, 0, 1, 0, 0, 0, 0, 3, 0, 0, 0, 102, 117, 110, 95, 95, 67, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 9, 0, 0, 128, 20, 0, 0, 0, 9, 0, 0, 128, 44, 0, 0, 0, 13, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 30, 0, 0, 0, 3, 4, 0, 0, 38, 0, 0, 0, 24, 0, 0, 0, 16, 0, 0, 0, 8, 0, 0, 0, 24, 0, 0, 0, 4, 0, 0, 0, 108, 111, 111, 112, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 41, 0, 0, 0, 5, 0, 0, 0, 108, 111, 111, 112, 95, 95, 67, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 33, 0, 0, 0, 0, 0, 0, 0}; // bitonic_sort 2^20 //static const u8 BOOK_BUF[] = {19, 0, 0, 0, 0, 0, 0, 0, 109, 97, 105, 110, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 89, 0, 0, 0, 4, 0, 0, 0, 11, 18, 0, 0, 12, 0, 0, 0, 65, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 100, 111, 119, 110, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 15, 0, 0, 0, 60, 0, 0, 0, 20, 0, 0, 0, 44, 0, 0, 0, 28, 0, 0, 0, 17, 0, 0, 0, 0, 0, 0, 0, 36, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 52, 0, 0, 0, 16, 0, 0, 0, 24, 0, 0, 0, 16, 0, 0, 0, 68, 0, 0, 0, 8, 0, 0, 0, 24, 0, 0, 0, 2, 0, 0, 0, 100, 111, 119, 110, 95, 95, 67, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 13, 0, 0, 0, 8, 0, 0, 0, 4, 0, 0, 0, 25, 0, 0, 128, 60, 0, 0, 0, 25, 0, 0, 128, 84, 0, 0, 0, 13, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 28, 0, 0, 0, 36, 0, 0, 0, 16, 0, 0, 0, 24, 0, 0, 0, 45, 0, 0, 0, 52, 0, 0, 0, 32, 0, 0, 0, 40, 0, 0, 0, 48, 0, 0, 0, 56, 0, 0, 0, 0, 0, 0, 0, 68, 0, 0, 0, 32, 0, 0, 0, 76, 0, 0, 0, 16, 0, 0, 0, 48, 0, 0, 0, 8, 0, 0, 0, 92, 0, 0, 0, 40, 0, 0, 0, 100, 0, 0, 0, 24, 0, 0, 0, 56, 0, 0, 0, 3, 0, 0, 0, 102, 108, 111, 119, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 15, 0, 0, 0, 60, 0, 0, 0, 20, 0, 0, 0, 44, 0, 0, 0, 28, 0, 0, 0, 33, 0, 0, 0, 0, 0, 0, 0, 36, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 52, 0, 0, 0, 16, 0, 0, 0, 24, 0, 0, 0, 16, 0, 0, 0, 68, 0, 0, 0, 8, 0, 0, 0, 24, 0, 0, 0, 4, 0, 0, 0, 102, 108, 111, 119, 95, 95, 67, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 14, 0, 0, 0, 8, 0, 0, 0, 4, 0, 0, 0, 9, 0, 0, 0, 60, 0, 0, 0, 129, 0, 0, 0, 84, 0, 0, 0, 13, 0, 0, 0, 28, 0, 0, 0, 22, 0, 0, 0, 8, 0, 0, 0, 35, 1, 0, 0, 0, 0, 0, 0, 36, 0, 0, 0, 44, 0, 0, 0, 16, 0, 0, 0, 24, 0, 0, 0, 53, 0, 0, 0, 48, 0, 0, 0, 32, 0, 0, 0, 40, 0, 0, 0, 0, 0, 0, 0, 68, 0, 0, 0, 32, 0, 0, 0, 76, 0, 0, 0, 56, 0, 0, 0, 48, 0, 0, 0, 8, 0, 0, 0, 92, 0, 0, 0, 40, 0, 0, 0, 100, 0, 0, 0, 16, 0, 0, 0, 108, 0, 0, 0, 24, 0, 0, 0, 56, 0, 0, 0, 5, 0, 0, 0, 103, 101, 110, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 15, 0, 0, 0, 8, 0, 0, 0, 20, 0, 0, 0, 8, 0, 0, 0, 28, 0, 0, 0, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 103, 101, 110, 95, 95, 67, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 12, 0, 0, 0, 6, 0, 0, 0, 4, 0, 0, 0, 41, 0, 0, 128, 68, 0, 0, 0, 41, 0, 0, 128, 84, 0, 0, 0, 13, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 29, 0, 0, 0, 60, 0, 0, 0, 38, 0, 0, 0, 54, 0, 0, 0, 59, 2, 0, 0, 46, 0, 0, 0, 35, 1, 0, 0, 16, 0, 0, 0, 59, 2, 0, 0, 24, 0, 0, 0, 32, 0, 0, 0, 40, 0, 0, 0, 0, 0, 0, 0, 76, 0, 0, 0, 16, 0, 0, 0, 32, 0, 0, 0, 8, 0, 0, 0, 92, 0, 0, 0, 24, 0, 0, 0, 40, 0, 0, 0, 7, 0, 0, 0, 109, 97, 105, 110, 95, 95, 67, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 41, 0, 0, 0, 4, 0, 0, 0, 11, 18, 0, 0, 12, 0, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 109, 97, 105, 110, 95, 95, 67, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 73, 0, 0, 0, 4, 0, 0, 0, 11, 18, 0, 0, 12, 0, 0, 0, 11, 0, 0, 0, 20, 0, 0, 0, 57, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 115, 111, 114, 116, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 15, 0, 0, 0, 60, 0, 0, 0, 20, 0, 0, 0, 44, 0, 0, 0, 28, 0, 0, 0, 81, 0, 0, 0, 0, 0, 0, 0, 36, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 52, 0, 0, 0, 16, 0, 0, 0, 24, 0, 0, 0, 16, 0, 0, 0, 68, 0, 0, 0, 8, 0, 0, 0, 24, 0, 0, 0, 10, 0, 0, 0, 115, 111, 114, 116, 95, 95, 67, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 17, 0, 0, 0, 9, 0, 0, 0, 4, 0, 0, 0, 25, 0, 0, 0, 60, 0, 0, 0, 73, 0, 0, 128, 92, 0, 0, 0, 73, 0, 0, 128, 116, 0, 0, 0, 13, 0, 0, 0, 36, 0, 0, 0, 22, 0, 0, 0, 29, 0, 0, 0, 35, 1, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 16, 0, 0, 0, 44, 0, 0, 0, 52, 0, 0, 0, 24, 0, 0, 0, 32, 0, 0, 0, 40, 0, 0, 0, 48, 0, 0, 0, 0, 0, 0, 0, 68, 0, 0, 0, 40, 0, 0, 0, 76, 0, 0, 0, 84, 0, 0, 0, 48, 0, 0, 0, 56, 0, 0, 0, 64, 0, 0, 0, 8, 0, 0, 0, 100, 0, 0, 0, 11, 0, 0, 0, 108, 0, 0, 0, 24, 0, 0, 0, 56, 0, 0, 0, 16, 0, 0, 0, 124, 0, 0, 0, 11, 1, 0, 0, 132, 0, 0, 0, 32, 0, 0, 0, 64, 0, 0, 0, 11, 0, 0, 0, 115, 117, 109, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 15, 0, 0, 0, 8, 0, 0, 0, 20, 0, 0, 0, 8, 0, 0, 0, 28, 0, 0, 0, 97, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 0, 0, 0, 115, 117, 109, 95, 95, 67, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 10, 0, 0, 0, 6, 0, 0, 0, 4, 0, 0, 0, 89, 0, 0, 128, 36, 0, 0, 0, 89, 0, 0, 128, 68, 0, 0, 0, 13, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 28, 0, 0, 0, 32, 0, 0, 0, 16, 0, 0, 0, 24, 0, 0, 0, 0, 0, 0, 0, 44, 0, 0, 0, 16, 0, 0, 0, 54, 0, 0, 0, 3, 4, 0, 0, 62, 0, 0, 0, 40, 0, 0, 0, 32, 0, 0, 0, 8, 0, 0, 0, 76, 0, 0, 0, 24, 0, 0, 0, 40, 0, 0, 0, 13, 0, 0, 0, 115, 119, 97, 112, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0, 15, 0, 0, 0, 44, 0, 0, 0, 20, 0, 0, 0, 28, 0, 0, 0, 113, 0, 0, 0, 121, 0, 0, 0, 0, 0, 0, 0, 36, 0, 0, 0, 8, 0, 0, 0, 16, 0, 0, 0, 8, 0, 0, 0, 52, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 14, 0, 0, 0, 115, 119, 97, 112, 95, 95, 67, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 12, 0, 0, 0, 8, 0, 0, 0, 20, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 115, 119, 97, 112, 95, 95, 67, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 20, 0, 0, 0, 8, 0, 0, 0, 28, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 16, 0, 0, 0, 119, 97, 114, 112, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 15, 0, 0, 0, 52, 0, 0, 0, 20, 0, 0, 0, 28, 0, 0, 0, 137, 0, 0, 0, 145, 0, 0, 0, 0, 0, 0, 0, 36, 0, 0, 0, 8, 0, 0, 0, 44, 0, 0, 0, 16, 0, 0, 0, 24, 0, 0, 0, 16, 0, 0, 0, 60, 0, 0, 0, 8, 0, 0, 0, 68, 0, 0, 0, 0, 0, 0, 0, 24, 0, 0, 0, 17, 0, 0, 0, 119, 97, 114, 112, 95, 95, 67, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 12, 0, 0, 0, 6, 0, 0, 0, 4, 0, 0, 0, 105, 0, 0, 0, 76, 0, 0, 0, 13, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 29, 0, 0, 0, 52, 0, 0, 0, 38, 0, 0, 0, 24, 0, 0, 0, 3, 15, 0, 0, 46, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 62, 0, 0, 0, 40, 0, 0, 0, 3, 18, 0, 0, 70, 0, 0, 0, 16, 0, 0, 0, 32, 0, 0, 0, 32, 0, 0, 0, 84, 0, 0, 0, 24, 0, 0, 0, 92, 0, 0, 0, 8, 0, 0, 0, 40, 0, 0, 0, 18, 0, 0, 0, 119, 97, 114, 112, 95, 95, 67, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 21, 0, 0, 0, 12, 0, 0, 0, 4, 0, 0, 0, 129, 0, 0, 128, 92, 0, 0, 0, 129, 0, 0, 128, 132, 0, 0, 0, 13, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 28, 0, 0, 0, 36, 0, 0, 0, 16, 0, 0, 0, 24, 0, 0, 0, 44, 0, 0, 0, 52, 0, 0, 0, 32, 0, 0, 0, 40, 0, 0, 0, 61, 0, 0, 0, 68, 0, 0, 0, 48, 0, 0, 0, 56, 0, 0, 0, 76, 0, 0, 0, 84, 0, 0, 0, 64, 0, 0, 0, 72, 0, 0, 0, 80, 0, 0, 0, 88, 0, 0, 0, 8, 0, 0, 0, 100, 0, 0, 0, 56, 0, 0, 0, 108, 0, 0, 0, 40, 0, 0, 0, 116, 0, 0, 0, 24, 0, 0, 0, 124, 0, 0, 0, 72, 0, 0, 0, 88, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 48, 0, 0, 0, 148, 0, 0, 0, 32, 0, 0, 0, 156, 0, 0, 0, 16, 0, 0, 0, 164, 0, 0, 0, 64, 0, 0, 0, 80, 0, 0, 0}; //COMPILED_BOOK_BUF// // Main // ---- #ifdef IO void do_run_io(GNet* gnet, Book* book, Port port); #endif extern "C" void hvm_cu(u32* book_buffer) { // Loads the Book Book* book = (Book*)malloc(sizeof(Book)); if (book_buffer) { if (!book_load(book, (u32*)book_buffer)) { fprintf(stderr, "failed to load book\n"); return; } cudaMemcpyToSymbol(BOOK, book, sizeof(Book)); } // Configures Shared Memory Size cudaFuncSetAttribute(evaluator, cudaFuncAttributeMaxDynamicSharedMemorySize, sizeof(LNet)); // Creates a new GNet GNet* gnet = gnet_create(); // Start the timer clock_t start = clock(); // Boots root redex, to expand @main gnet_boot_redex(gnet, new_pair(new_port(REF, 0), ROOT)); #ifdef IO do_run_io(gnet, book, ROOT); #else gnet_normalize(gnet); #endif cudaDeviceSynchronize(); // Stops the timer clock_t end = clock(); double duration = ((double)(end - start)) / CLOCKS_PER_SEC; // Prints the result print_result<<<1,1>>>(gnet); // Reports errors cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { fprintf(stderr, "Failed to launch kernels. Error code: %s.\n", cudaGetErrorString(err)); if (err == cudaErrorInvalidConfiguration) { fprintf(stderr, "Note: for now, HVM-CUDA requires a GPU with at least 128 KB of L1 cache per SM.\n"); } exit(EXIT_FAILURE); } // Prints entire memdump //{ //// Allocate host memory for the net //GNet *h_gnet = (GNet*)malloc(sizeof(GNet)); //// Copy the net from device to host //cudaMemcpy(h_gnet, gnet, sizeof(GNet), cudaMemcpyDeviceToHost); //// Create a Net view of the host GNet //Net net; //net.g_node_buf = h_gnet->node_buf; //net.g_vars_buf = h_gnet->vars_buf; //// Print the net //print_net(&net, L_NODE_LEN, G_NODE_LEN); //// Free host memory //free(h_gnet); //} // Gets interaction count //cudaMemcpy(&itrs, &gnet->itrs, sizeof(u64), cudaMemcpyDeviceToHost); // Prints interactions, time and MIPS printf("- ITRS: %llu\n", gnet_get_itrs(gnet)); printf("- LEAK: %llu\n", gnet_get_leak(gnet)); printf("- TIME: %.2fs\n", duration); printf("- MIPS: %.2f\n", (double)gnet_get_itrs(gnet) / duration / 1000000.0); } #ifdef WITH_MAIN int main() { hvm_cu((u32*)BOOK_BUF); return 0; } #endif ================================================ FILE: src/hvm.cuh ================================================ #ifndef hvm_cuh_INCLUDED #define hvm_cuh_INCLUDED #include #include // Types // ----- typedef uint8_t u8; typedef uint16_t u16; typedef uint32_t u32; typedef unsigned long long int u64; typedef int32_t i32; typedef float f32; typedef double f64; // Local Types typedef u8 Tag; // Tag ::= 3-bit (rounded up to u8) typedef u32 Val; // Val ::= 29-bit (rounded up to u32) typedef u32 Port; // Port ::= Tag + Val (fits a u32) typedef u64 Pair; // Pair ::= Port + Port (fits a u64) // Numbs typedef u32 Numb; // Numb ::= 29-bit (rounded up to u32) // Tags const Tag VAR = 0x0; // variable const Tag REF = 0x1; // reference const Tag ERA = 0x2; // eraser const Tag NUM = 0x3; // number const Tag CON = 0x4; // constructor const Tag DUP = 0x5; // duplicator const Tag OPR = 0x6; // operator const Tag SWI = 0x7; // switch // Numbers static const f32 U24_MAX = (f32) (1 << 24) - 1; static const f32 U24_MIN = 0.0; static const f32 I24_MAX = (f32) (1 << 23) - 1; static const f32 I24_MIN = (f32) (i32) ((-1u) << 23); const Tag TY_SYM = 0x00; const Tag TY_U24 = 0x01; const Tag TY_I24 = 0x02; const Tag TY_F24 = 0x03; const Tag OP_ADD = 0x04; const Tag OP_SUB = 0x05; const Tag FP_SUB = 0x06; const Tag OP_MUL = 0x07; const Tag OP_DIV = 0x08; const Tag FP_DIV = 0x09; const Tag OP_REM = 0x0A; const Tag FP_REM = 0x0B; const Tag OP_EQ = 0x0C; const Tag OP_NEQ = 0x0D; const Tag OP_LT = 0x0E; const Tag OP_GT = 0x0F; const Tag OP_AND = 0x10; const Tag OP_OR = 0x11; const Tag OP_XOR = 0x12; const Tag OP_SHL = 0x13; const Tag FP_SHL = 0x14; const Tag OP_SHR = 0x15; const Tag FP_SHR = 0x16; typedef struct GNet GNet; // Debugger // -------- // Port: Constructor and Getters // ----------------------------- static inline Port new_port(Tag tag, Val val) { return (val << 3) | tag; } static inline Tag get_tag(Port port) { return port & 7; } static inline Val get_val(Port port) { return port >> 3; } // Pair: Constructor and Getters // ----------------------------- static inline const Pair new_pair(Port fst, Port snd) { return ((u64)snd << 32) | fst; } static inline Port get_fst(Pair pair) { return pair & 0xFFFFFFFF; } static inline Port get_snd(Pair pair) { return pair >> 32; } // Utils // ----- // Swaps two ports. static inline void swap(Port *a, Port *b) { Port x = *a; *a = *b; *b = x; } static inline u32 min(u32 a, u32 b) { return (a < b) ? a : b; } static inline f32 clamp(f32 x, f32 min, f32 max) { const f32 t = x < min ? min : x; return (t > max) ? max : t; } // Numbs // ----- // Constructor and getters for SYM (operation selector) static inline Numb new_sym(u32 val) { return (val << 5) | TY_SYM; } static inline u32 get_sym(Numb word) { return (word >> 5); } // Constructor and getters for U24 (unsigned 24-bit integer) static inline Numb new_u24(u32 val) { return (val << 5) | TY_U24; } static inline u32 get_u24(Numb word) { return word >> 5; } // Constructor and getters for I24 (signed 24-bit integer) static inline Numb new_i24(i32 val) { return ((u32)val << 5) | TY_I24; } static inline i32 get_i24(Numb word) { return ((i32)word) << 3 >> 8; } // Constructor and getters for F24 (24-bit float) static inline Numb new_f24(float val) { u32 bits = *(u32*)&val; u32 shifted_bits = bits >> 8; u32 lost_bits = bits & 0xFF; // round ties to even shifted_bits += (!isnan(val)) & ((lost_bits - ((lost_bits >> 7) & !shifted_bits)) >> 7); // ensure NaNs don't become infinities shifted_bits |= isnan(val); return (shifted_bits << 5) | TY_F24; } static inline float get_f24(Numb word) { u32 bits = (word << 3) & 0xFFFFFF00; return *(float*)&bits; } static inline Tag get_typ(Numb word) { return word & 0x1F; } static inline bool is_num(Numb word) { return get_typ(word) >= TY_U24 && get_typ(word) <= TY_F24; } static inline bool is_cast(Numb word) { return get_typ(word) == TY_SYM && get_sym(word) >= TY_U24 && get_sym(word) <= TY_F24; } // Partial application static inline Numb partial(Numb a, Numb b) { return (b & ~0x1F) | get_sym(a); } // Readback // --------- // Readback: Tuples typedef struct Tup { u32 elem_len; Port elem_buf[8]; } Tup; // Reads a tuple of `size` elements from `port`. // Tuples are con nodes nested to the right auxilliary port, // For example, `(CON a (CON b (CON c)))` is a 3-tuple (a, b, c). extern Tup gnet_readback_tup(GNet* gnet, Port port, u32 size); typedef struct Str { u32 len; char *buf; } Str; // Reads a constructor-encoded string (of length at most 255 characters), // into a null-terminated `Str`. extern Str gnet_readback_str(GNet* gnet, Port port); typedef struct Bytes { u32 len; char *buf; } Bytes; // Reads a constructor-encoded string (of length at most 256 characters), // into a `Bytes`. The returned `Bytes` is not null terminated. extern Bytes gnet_readback_bytes(GNet* net, Port port); // Creates a construtor-encoded string of arbitrary length from the // provided `bytes`. This string can be consumed on the HVM-side. This // will return an `ERA` if nodes cannot be allocated. extern Port gnet_inject_bytes(GNet* net, Bytes *bytes); #endif // hvm_cuh_INCLUDED ================================================ FILE: src/hvm.h ================================================ #ifndef hvm_h_INCLUDED #define hvm_h_INCLUDED #include #include #include // Types // ----- typedef uint8_t u8; typedef uint16_t u16; typedef uint32_t u32; typedef int32_t i32; typedef uint64_t u64; typedef float f32; typedef double f64; // Local Types typedef u8 Tag; // Tag ::= 3-bit (rounded up to u8) typedef u32 Val; // Val ::= 29-bit (rounded up to u32) typedef u32 Port; // Port ::= Tag + Val (fits a u32) typedef u64 Pair; // Pair ::= Port + Port (fits a u64) // Numbs typedef u32 Numb; // Numb ::= 29-bit (rounded up to u32) // Tags #define VAR 0x0 // variable #define REF 0x1 // reference #define ERA 0x2 // eraser #define NUM 0x3 // number #define CON 0x4 // constructor #define DUP 0x5 // duplicator #define OPR 0x6 // operator #define SWI 0x7 // switch // Numbers static const f32 U24_MAX = (f32) (1 << 24) - 1; static const f32 U24_MIN = 0.0; static const f32 I24_MAX = (f32) (1 << 23) - 1; static const f32 I24_MIN = (f32) (i32) ((-1u) << 23); #define TY_SYM 0x00 #define TY_U24 0x01 #define TY_I24 0x02 #define TY_F24 0x03 #define OP_ADD 0x04 #define OP_SUB 0x05 #define FP_SUB 0x06 #define OP_MUL 0x07 #define OP_DIV 0x08 #define FP_DIV 0x09 #define OP_REM 0x0A #define FP_REM 0x0B #define OP_EQ 0x0C #define OP_NEQ 0x0D #define OP_LT 0x0E #define OP_GT 0x0F #define OP_AND 0x10 #define OP_OR 0x11 #define OP_XOR 0x12 #define OP_SHL 0x13 #define FP_SHL 0x14 #define OP_SHR 0x15 #define FP_SHR 0x16 typedef struct Net Net; typedef struct Book Book; // Debugger // -------- typedef struct { char x[13]; } Show; void put_u16(char* B, u16 val); Show show_port(Port port); void print_net(Net* net); void pretty_print_numb(Numb word); void pretty_print_port(Net* net, Book* book, Port port); // Port: Constructor and Getters // ----------------------------- static inline Port new_port(Tag tag, Val val) { return (val << 3) | tag; } static inline Tag get_tag(Port port) { return port & 7; } static inline Val get_val(Port port) { return port >> 3; } // Pair: Constructor and Getters // ----------------------------- static inline const Pair new_pair(Port fst, Port snd) { return ((u64)snd << 32) | fst; } static inline Port get_fst(Pair pair) { return pair & 0xFFFFFFFF; } static inline Port get_snd(Pair pair) { return pair >> 32; } // Utils // ----- // Swaps two ports. static inline void swap(Port *a, Port *b) { Port x = *a; *a = *b; *b = x; } static inline u32 min(u32 a, u32 b) { return (a < b) ? a : b; } static inline f32 clamp(f32 x, f32 min, f32 max) { const f32 t = x < min ? min : x; return (t > max) ? max : t; } // Numbs // ----- // Constructor and getters for SYM (operation selector) static inline Numb new_sym(u32 val) { return (val << 5) | TY_SYM; } static inline u32 get_sym(Numb word) { return (word >> 5); } // Constructor and getters for U24 (unsigned 24-bit integer) static inline Numb new_u24(u32 val) { return (val << 5) | TY_U24; } static inline u32 get_u24(Numb word) { return word >> 5; } // Constructor and getters for I24 (signed 24-bit integer) static inline Numb new_i24(i32 val) { return ((u32)val << 5) | TY_I24; } static inline i32 get_i24(Numb word) { return ((i32)word) << 3 >> 8; } // Constructor and getters for F24 (24-bit float) static inline Numb new_f24(float val) { u32 bits = *(u32*)&val; u32 shifted_bits = bits >> 8; u32 lost_bits = bits & 0xFF; // round ties to even shifted_bits += (!isnan(val)) & ((lost_bits - ((lost_bits >> 7) & !shifted_bits)) >> 7); // ensure NaNs don't become infinities shifted_bits |= isnan(val); return (shifted_bits << 5) | TY_F24; } static inline float get_f24(Numb word) { u32 bits = (word << 3) & 0xFFFFFF00; return *(float*)&bits; } static inline Tag get_typ(Numb word) { return word & 0x1F; } static inline bool is_num(Numb word) { return get_typ(word) >= TY_U24 && get_typ(word) <= TY_F24; } static inline bool is_cast(Numb word) { return get_typ(word) == TY_SYM && get_sym(word) >= TY_U24 && get_sym(word) <= TY_F24; } // Partial application static inline Numb partial(Numb a, Numb b) { return (b & ~0x1F) | get_sym(a); } // Readback // --------- // Readback: Tuples typedef struct Tup { u32 elem_len; Port elem_buf[8]; } Tup; // Reads a tuple of `size` elements from `port`. // Tuples are con nodes nested to the right auxilliary port, // For example, `(CON a (CON b (CON c)))` is a 3-tuple (a, b, c). extern Tup readback_tup(Net* net, Book* book, Port port, u32 size); typedef struct Str { u32 len; char *buf; } Str; // Reads a constructor-encoded string (of length at most 255 characters), // into a null-terminated `Str`. extern Str readback_str(Net* net, Book* book, Port port); typedef struct Bytes { u32 len; char *buf; } Bytes; // Reads a constructor-encoded string (of length at most 256 characters), // into a `Bytes`. The returned `Bytes` is not null terminated. extern Bytes readback_bytes(Net* net, Book* book, Port port); // Creates a construtor-encoded string of arbitrary length from the // provided `bytes`. This string can be consumed on the HVM-side. This // will return an `ERA` if nodes cannot be allocated. extern Port inject_bytes(Net* net, Bytes *bytes); #endif // hvm_h_INCLUDED ================================================ FILE: src/hvm.rs ================================================ use std::sync::atomic::{AtomicU32, AtomicU64, Ordering}; use std::alloc::{alloc, dealloc, Layout}; use std::mem; // Runtime // ======= // Types pub type Tag = u8; // Tag ::= 3-bit (rounded up to u8) pub type Lab = u32; // Lab ::= 29-bit (rounded up to u32) pub type Val = u32; // Val ::= 29-bit (rounded up to u32) pub type Rule = u8; // Rule ::= 8-bit (fits a u8) // Port #[derive(Copy, Clone, Debug, Eq, PartialEq, PartialOrd, Hash)] pub struct Port(pub Val); // Pair pub struct Pair(pub u64); // Atomics pub type AVal = AtomicU32; pub struct APort(pub AVal); pub struct APair(pub AtomicU64); // Number pub struct Numb(pub Val); const U24_MAX : u32 = (1 << 24) - 1; const U24_MIN : u32 = 0; const I24_MAX : i32 = (1 << 23) - 1; const I24_MIN : i32 = (-1) << 23; // Tags pub const VAR : Tag = 0x0; // variable pub const REF : Tag = 0x1; // reference pub const ERA : Tag = 0x2; // eraser pub const NUM : Tag = 0x3; // number pub const CON : Tag = 0x4; // constructor pub const DUP : Tag = 0x5; // duplicator pub const OPR : Tag = 0x6; // operator pub const SWI : Tag = 0x7; // switch // Rules pub const LINK : Rule = 0x0; pub const CALL : Rule = 0x1; pub const VOID : Rule = 0x2; pub const ERAS : Rule = 0x3; pub const ANNI : Rule = 0x4; pub const COMM : Rule = 0x5; pub const OPER : Rule = 0x6; pub const SWIT : Rule = 0x7; // Numbs pub const TY_SYM : Tag = 0x00; pub const TY_U24 : Tag = 0x01; pub const TY_I24 : Tag = 0x02; pub const TY_F24 : Tag = 0x03; pub const OP_ADD : Tag = 0x04; pub const OP_SUB : Tag = 0x05; pub const FP_SUB : Tag = 0x06; pub const OP_MUL : Tag = 0x07; pub const OP_DIV : Tag = 0x08; pub const FP_DIV : Tag = 0x09; pub const OP_REM : Tag = 0x0A; pub const FP_REM : Tag = 0x0B; pub const OP_EQ : Tag = 0x0C; pub const OP_NEQ : Tag = 0x0D; pub const OP_LT : Tag = 0x0E; pub const OP_GT : Tag = 0x0F; pub const OP_AND : Tag = 0x10; pub const OP_OR : Tag = 0x11; pub const OP_XOR : Tag = 0x12; pub const OP_SHL : Tag = 0x13; pub const FP_SHL : Tag = 0x14; pub const OP_SHR : Tag = 0x15; pub const FP_SHR : Tag = 0x16; // Constants pub const FREE : Port = Port(0x0); pub const ROOT : Port = Port(0xFFFFFFF8); pub const NONE : Port = Port(0xFFFFFFFF); // RBag pub struct RBag { pub lo: Vec, pub hi: Vec, } // Global Net pub struct GNet<'a> { pub nlen: usize, // length of the node buffer pub vlen: usize, // length of the vars buffer pub node: &'a mut [APair], // node buffer pub vars: &'a mut [APort], // vars buffer pub itrs: AtomicU64, // interaction count } // Thread Memory pub struct TMem { pub tid: u32, // thread id pub tids: u32, // thread count pub tick: u32, // tick counter pub itrs: u32, // interaction count pub nput: usize, // next node allocation index pub vput: usize, // next vars allocation index pub nloc: Vec, // allocated node locations pub vloc: Vec, // allocated vars locations pub rbag: RBag, // local redex bag } // Top-Level Definition pub struct Def { pub name: String, // def name pub safe: bool, // has no dups pub root: Port, // root port pub rbag: Vec, // def redex bag pub node: Vec, // def node buffer pub vars: usize, // def vars count } // Book of Definitions pub struct Book { pub defs: Vec, } impl Port { pub fn new(tag: Tag, val: Val) -> Self { Port((val << 3) | tag as Val) } pub fn get_tag(&self) -> Tag { (self.0 & 7) as Tag } pub fn get_val(&self) -> Val { self.0 >> 3 } pub fn is_nod(&self) -> bool { self.get_tag() >= CON } pub fn is_var(&self) -> bool { self.get_tag() == VAR } pub fn get_rule(a: Port, b: Port) -> Rule { const TABLE: [[Rule; 8]; 8] = [ //VAR REF ERA NUM CON DUP OPR SWI [LINK,LINK,LINK,LINK,LINK,LINK,LINK,LINK], // VAR [LINK,VOID,VOID,VOID,CALL,CALL,CALL,CALL], // REF [LINK,VOID,VOID,VOID,ERAS,ERAS,ERAS,ERAS], // ERA [LINK,VOID,VOID,VOID,ERAS,ERAS,OPER,SWIT], // NUM [LINK,CALL,ERAS,ERAS,ANNI,COMM,COMM,COMM], // CON [LINK,CALL,ERAS,ERAS,COMM,ANNI,COMM,COMM], // DUP [LINK,CALL,ERAS,OPER,COMM,COMM,ANNI,COMM], // OPR [LINK,CALL,ERAS,SWIT,COMM,COMM,COMM,ANNI], // SWI ]; return TABLE[a.get_tag() as usize][b.get_tag() as usize]; } pub fn should_swap(a: Port, b: Port) -> bool { b.get_tag() < a.get_tag() } pub fn is_high_priority(rule: Rule) -> bool { (0b00011101 >> rule) & 1 != 0 } pub fn adjust_port(&self, tm: &TMem) -> Port { let tag = self.get_tag(); let val = self.get_val(); if self.is_nod() { Port::new(tag, tm.nloc[val as usize] as u32) } else if self.is_var() { Port::new(tag, tm.vloc[val as usize] as u32) } else { Port::new(tag, val) } } } impl Pair { pub fn new(fst: Port, snd: Port) -> Self { Pair(((snd.0 as u64) << 32) | fst.0 as u64) } pub fn get_fst(&self) -> Port { Port((self.0 & 0xFFFFFFFF) as u32) } pub fn get_snd(&self) -> Port { Port((self.0 >> 32) as u32) } pub fn adjust_pair(&self, tm: &TMem) -> Pair { let p1 = self.get_fst().adjust_port(tm); let p2 = self.get_snd().adjust_port(tm); Pair::new(p1, p2) } pub fn set_par_flag(&self) -> Self { let p1 : Port = self.get_fst(); let p2 : Port = self.get_snd(); if p1.get_tag() == REF { return Pair::new(Port::new(p1.get_tag(), p1.get_val() | 0x10000000), p2); } else { return Pair::new(p1, p2); } } pub fn get_par_flag(&self) -> bool { let p1 : Port = self.get_fst(); if p1.get_tag() == REF { return p1.get_val() >> 28 == 1; } else { return false; } } } impl Numb { // SYM: a symbolic operator pub fn new_sym(val: Tag) -> Self { Numb((val as Val) << 5 | (TY_SYM as Val)) } pub fn get_sym(&self) -> Tag { (self.0 >> 5) as Tag } // U24: unsigned 24-bit integer pub fn new_u24(val: u32) -> Self { Numb((val << 5) as Val | (TY_U24 as Val)) } pub fn get_u24(&self) -> u32 { (self.0 >> 5) as u32 } // I24: signed 24-bit integer pub fn new_i24(val: i32) -> Self { Numb(((val as u32) << 5) as Val | (TY_I24 as Val)) } pub fn get_i24(&self) -> i32 { (self.0 as i32) << 3 >> 8 } // F24: 24-bit float pub fn new_f24(val: f32) -> Self { let bits = val.to_bits(); let mut shifted_bits = bits >> 8; let lost_bits = bits & 0xFF; // round ties to even shifted_bits += u32::from(!val.is_nan()) & ((lost_bits - ((lost_bits >> 7) & !shifted_bits)) >> 7); // ensure NaNs don't become infinities shifted_bits |= u32::from(val.is_nan()); Numb((shifted_bits << 5) as Val | (TY_F24 as Val)) } pub fn get_f24(&self) -> f32 { f32::from_bits((self.0 << 3) & 0xFFFFFF00) } // Gets the numeric type. pub fn get_typ(&self) -> Tag { (self.0 & 0x1F) as Tag } pub fn is_num(&self) -> bool { self.get_typ() >= TY_U24 && self.get_typ() <= TY_F24 } pub fn is_cast(&self) -> bool { self.get_typ() == TY_SYM && self.get_sym() >= TY_U24 && self.get_sym() <= TY_F24 } // Partial application. pub fn partial(a: Self, b: Self) -> Self { Numb((b.0 & !0x1F) | a.get_sym() as u32) } // Cast a number to another type. // The semantics are meant to spiritually resemble rust's numeric casts: // - i24 <-> u24: is just reinterpretation of bits // - f24 -> i24, // f24 -> u24: casts to the "closest" integer representing this float, // saturating if out of range and 0 if NaN // - i24 -> f24, // u24 -> f24: casts to the "closest" float representing this integer. pub fn cast(a: Self, b: Self) -> Self { match (a.get_sym(), b.get_typ()) { (TY_U24, TY_U24) => b, (TY_U24, TY_I24) => Self::new_u24(b.get_i24() as u32), (TY_U24, TY_F24) => Self::new_u24((b.get_f24() as u32).clamp(U24_MIN, U24_MAX)), (TY_I24, TY_U24) => Self::new_i24(b.get_u24() as i32), (TY_I24, TY_I24) => b, (TY_I24, TY_F24) => Self::new_i24((b.get_f24() as i32).clamp(I24_MIN, I24_MAX)), (TY_F24, TY_U24) => Self::new_f24(b.get_u24() as f32), (TY_F24, TY_I24) => Self::new_f24(b.get_i24() as f32), (TY_F24, TY_F24) => b, // invalid cast (_, _) => Self::new_u24(0), } } pub fn operate(a: Self, b: Self) -> Self { //println!("operate {} {}", crate::ast::Numb(a.0).show(), crate::ast::Numb(b.0).show()); let at = a.get_typ(); let bt = b.get_typ(); if at == TY_SYM && bt == TY_SYM { return Numb::new_u24(0); } if a.is_cast() && b.is_num() { return Numb::cast(a, b); } if b.is_cast() && a.is_num() { return Numb::cast(b, a); } if at == TY_SYM && bt != TY_SYM { return Numb::partial(a, b); } if at != TY_SYM && bt == TY_SYM { return Numb::partial(b, a); } if at >= OP_ADD && bt >= OP_ADD { return Numb::new_u24(0); } if at < OP_ADD && bt < OP_ADD { return Numb::new_u24(0); } let (op, a, ty, b) = if at >= OP_ADD { (at, a, bt, b) } else { (bt, b, at, a) }; match ty { TY_U24 => { let av = a.get_u24(); let bv = b.get_u24(); match op { OP_ADD => Numb::new_u24(av.wrapping_add(bv)), OP_SUB => Numb::new_u24(av.wrapping_sub(bv)), FP_SUB => Numb::new_u24(bv.wrapping_sub(av)), OP_MUL => Numb::new_u24(av.wrapping_mul(bv)), OP_DIV => Numb::new_u24(av.wrapping_div(bv)), FP_DIV => Numb::new_u24(bv.wrapping_div(av)), OP_REM => Numb::new_u24(av.wrapping_rem(bv)), FP_REM => Numb::new_u24(bv.wrapping_rem(av)), OP_EQ => Numb::new_u24((av == bv) as u32), OP_NEQ => Numb::new_u24((av != bv) as u32), OP_LT => Numb::new_u24((av < bv) as u32), OP_GT => Numb::new_u24((av > bv) as u32), OP_AND => Numb::new_u24(av & bv), OP_OR => Numb::new_u24(av | bv), OP_XOR => Numb::new_u24(av ^ bv), OP_SHL => Numb::new_u24(av << (bv & 31)), OP_SHR => Numb::new_u24(av >> (bv & 31)), FP_SHL => Numb::new_u24(bv << (av & 31)), FP_SHR => Numb::new_u24(bv >> (av & 31)), _ => unreachable!(), } } TY_I24 => { let av = a.get_i24(); let bv = b.get_i24(); match op { OP_ADD => Numb::new_i24(av.wrapping_add(bv)), OP_SUB => Numb::new_i24(av.wrapping_sub(bv)), FP_SUB => Numb::new_i24(bv.wrapping_sub(av)), OP_MUL => Numb::new_i24(av.wrapping_mul(bv)), OP_DIV => Numb::new_i24(av.wrapping_div(bv)), FP_DIV => Numb::new_i24(bv.wrapping_div(av)), OP_REM => Numb::new_i24(av.wrapping_rem(bv)), FP_REM => Numb::new_i24(bv.wrapping_rem(av)), OP_EQ => Numb::new_u24((av == bv) as u32), OP_NEQ => Numb::new_u24((av != bv) as u32), OP_LT => Numb::new_u24((av < bv) as u32), OP_GT => Numb::new_u24((av > bv) as u32), OP_AND => Numb::new_i24(av & bv), OP_OR => Numb::new_i24(av | bv), OP_XOR => Numb::new_i24(av ^ bv), _ => unreachable!(), } } TY_F24 => { let av = a.get_f24(); let bv = b.get_f24(); match op { OP_ADD => Numb::new_f24(av + bv), OP_SUB => Numb::new_f24(av - bv), FP_SUB => Numb::new_f24(bv - av), OP_MUL => Numb::new_f24(av * bv), OP_DIV => Numb::new_f24(av / bv), FP_DIV => Numb::new_f24(bv / av), OP_REM => Numb::new_f24(av % bv), FP_REM => Numb::new_f24(bv % av), OP_EQ => Numb::new_u24((av == bv) as u32), OP_NEQ => Numb::new_u24((av != bv) as u32), OP_LT => Numb::new_u24((av < bv) as u32), OP_GT => Numb::new_u24((av > bv) as u32), OP_AND => Numb::new_f24(av.atan2(bv)), OP_OR => Numb::new_f24(bv.log(av)), OP_XOR => Numb::new_f24(av.powf(bv)), OP_SHL => Numb::new_f24((av + bv).sin()), OP_SHR => Numb::new_f24((av + bv).tan()), _ => unreachable!(), } } _ => Numb::new_u24(0), } } } impl RBag { pub fn new() -> Self { RBag { lo: Vec::new(), hi: Vec::new(), } } pub fn push_redex(&mut self, redex: Pair) { let rule = Port::get_rule(redex.get_fst(), redex.get_snd()); if Port::is_high_priority(rule) { self.hi.push(redex); } else { self.lo.push(redex); } } pub fn pop_redex(&mut self) -> Option { if !self.hi.is_empty() { self.hi.pop() } else { self.lo.pop() } } pub fn len(&self) -> usize { self.lo.len() + self.hi.len() } pub fn has_highs(&self) -> bool { !self.hi.is_empty() } } impl<'a> GNet<'a> { pub fn new(nlen: usize, vlen: usize) -> Self { let nlay = Layout::array::(nlen).unwrap(); let vlay = Layout::array::(vlen).unwrap(); let nptr = unsafe { alloc(nlay) as *mut APair }; let vptr = unsafe { alloc(vlay) as *mut APort }; let node = unsafe { std::slice::from_raw_parts_mut(nptr, nlen) }; let vars = unsafe { std::slice::from_raw_parts_mut(vptr, vlen) }; GNet { nlen, vlen, node, vars, itrs: AtomicU64::new(0) } } pub fn node_create(&self, loc: usize, val: Pair) { self.node[loc].0.store(val.0, Ordering::Relaxed); } pub fn vars_create(&self, var: usize, val: Port) { self.vars[var].0.store(val.0, Ordering::Relaxed); } pub fn node_load(&self, loc: usize) -> Pair { Pair(self.node[loc].0.load(Ordering::Relaxed)) } pub fn vars_load(&self, var: usize) -> Port { Port(self.vars[var].0.load(Ordering::Relaxed) as u32) } pub fn node_store(&self, loc: usize, val: Pair) { self.node[loc].0.store(val.0, Ordering::Relaxed); } pub fn vars_store(&self, var: usize, val: Port) { self.vars[var].0.store(val.0, Ordering::Relaxed); } pub fn node_exchange(&self, loc: usize, val: Pair) -> Pair { Pair(self.node[loc].0.swap(val.0, Ordering::Relaxed)) } pub fn vars_exchange(&self, var: usize, val: Port) -> Port { Port(self.vars[var].0.swap(val.0, Ordering::Relaxed) as u32) } pub fn node_take(&self, loc: usize) -> Pair { self.node_exchange(loc, Pair(0)) } pub fn vars_take(&self, var: usize) -> Port { self.vars_exchange(var, Port(0)) } pub fn is_node_free(&self, loc: usize) -> bool { self.node_load(loc).0 == 0 } pub fn is_vars_free(&self, var: usize) -> bool { self.vars_load(var).0 == 0 } pub fn enter(&self, mut var: Port) -> Port { // While `B` is VAR: extend it (as an optimization) while var.get_tag() == VAR { // Takes the current `B` substitution as `B'` let val = self.vars_exchange(var.get_val() as usize, NONE); // If there was no `B'`, stop, as there is no extension if val == NONE || val == Port(0) { break; } // Otherwise, delete `B` (we own both) and continue as `A ~> B'` self.vars_take(var.get_val() as usize); var = val; } return var; } } impl<'a> Drop for GNet<'a> { fn drop(&mut self) { let nlay = Layout::array::(self.nlen).unwrap(); let vlay = Layout::array::(self.vlen).unwrap(); unsafe { dealloc(self.node.as_mut_ptr() as *mut u8, nlay); dealloc(self.vars.as_mut_ptr() as *mut u8, vlay); } } } impl TMem { // TODO: implement a TMem::new() fn pub fn new(tid: u32, tids: u32) -> Self { TMem { tid, tids, tick: 0, itrs: 0, nput: 0, vput: 0, nloc: vec![0; 0xFFF], // FIXME: move to a constant vloc: vec![0; 0xFFF], rbag: RBag::new(), } } pub fn node_alloc(&mut self, net: &GNet, num: usize) -> usize { let mut got = 0; for _ in 0..net.nlen { self.nput += 1; // index 0 reserved if self.nput < net.nlen-1 || net.is_node_free(self.nput % net.nlen) { self.nloc[got] = self.nput % net.nlen; got += 1; //println!("ALLOC NODE {} {}", got, self.nput); } if got >= num { break; } } return got } pub fn vars_alloc(&mut self, net: &GNet, num: usize) -> usize { let mut got = 0; for _ in 0..net.vlen { self.vput += 1; // index 0 reserved for FREE if self.vput < net.vlen-1 || net.is_vars_free(self.vput % net.vlen) { self.vloc[got] = self.vput % net.nlen; //println!("ALLOC VARS {} {}", got, self.vput); got += 1; } if got >= num { break; } } got } pub fn get_resources(&mut self, net: &GNet, _need_rbag: usize, need_node: usize, need_vars: usize) -> bool { let got_node = self.node_alloc(net, need_node); let got_vars = self.vars_alloc(net, need_vars); got_node >= need_node && got_vars >= need_vars } // Atomically Links `A ~ B`. pub fn link(&mut self, net: &GNet, a: Port, b: Port) { //println!("link {} ~ {}", a.show(), b.show()); let mut a = a; let mut b = b; // Attempts to directionally point `A ~> B` loop { // If `A` is NODE: swap `A` and `B`, and continue if a.get_tag() != VAR && b.get_tag() == VAR { let x = a; a = b; b = x; } // If `A` is NODE: create the `A ~ B` redex if a.get_tag() != VAR { self.rbag.push_redex(Pair::new(a, b)); break; } // While `B` is VAR: extend it (as an optimization) b = net.enter(b); // Since `A` is VAR: point `A ~> B`. if true { // Stores `A -> B`, taking the current `A` subst as `A'` let a_ = net.vars_exchange(a.get_val() as usize, b); // If there was no `A'`, stop, as we lost B's ownership if a_ == NONE { break; } // Otherwise, delete `A` (we own both) and link `A' ~ B` net.vars_take(a.get_val() as usize); a = a_; } } } // Links `A ~ B` (as a pair). pub fn link_pair(&mut self, net: &GNet, ab: Pair) { self.link(net, ab.get_fst(), ab.get_snd()); //println!("link_pair {:016X}", ab.0); } // The Link Interaction. pub fn interact_link(&mut self, net: &GNet, a: Port, b: Port) -> bool { // Allocates needed nodes and vars. if !self.get_resources(net, 1, 0, 0) { return false; } // Links. self.link_pair(net, Pair::new(a, b)); true } // The Call Interaction. pub fn interact_call(&mut self, net: &GNet, a: Port, b: Port, book: &Book) -> bool { let fid = (a.get_val() as usize) & 0xFFFFFFF; let def = &book.defs[fid]; // Copy Optimization. if b.get_tag() == DUP { if def.safe { return self.interact_eras(net, a, b); } else { // TODO: // Currently, we'll not allow copying of REFs with DUPs. While this is perfectly valid on // IC semantics (i.e., if the user know what they're doing), this can lead to unsound // reductions when compiling λ-terms to HVM. So, for now, we'll just disable this feature, // and consider it undefined behavior. We should add a `--unsafe` flag that allows it. println!("ERROR: attempt to clone a non-affine global reference.\n"); std::process::exit(0); } } // Allocates needed nodes and vars. if !self.get_resources(net, def.rbag.len() + 1, def.node.len(), def.vars as usize) { return false; } // Stores new vars. for i in 0..def.vars { net.vars_create(self.vloc[i], NONE); //println!("vars_create vars_loc[{:04X}] {:04X}", i, self.vloc[i]); } // Stores new nodes. for i in 0..def.node.len() { net.node_create(self.nloc[i], def.node[i].adjust_pair(self)); //println!("node_create node_loc[{:04X}] {:016X}", i-1, def.node[i].0); } // Links. for pair in &def.rbag { self.link_pair(net, pair.adjust_pair(self)); } self.link_pair(net, Pair::new(def.root.adjust_port(self), b)); true } // The Void Interaction. pub fn interact_void(&mut self, _net: &GNet, _a: Port, _b: Port) -> bool { true } // The Eras Interaction. pub fn interact_eras(&mut self, net: &GNet, a: Port, b: Port) -> bool { // Allocates needed nodes and vars. if !self.get_resources(net, 2, 0, 0) { return false; } // Checks availability if net.node_load(b.get_val() as usize).0 == 0 { return false; } // Loads ports. let b_ = net.node_exchange(b.get_val() as usize, Pair(0)); let b1 = b_.get_fst(); let b2 = b_.get_snd(); // Links. self.link_pair(net, Pair::new(a, b1)); self.link_pair(net, Pair::new(a, b2)); true } // The Anni Interaction. pub fn interact_anni(&mut self, net: &GNet, a: Port, b: Port) -> bool { // Allocates needed nodes and vars. if !self.get_resources(net, 2, 0, 0) { return false; } // Checks availability if net.node_load(a.get_val() as usize).0 == 0 || net.node_load(b.get_val() as usize).0 == 0 { return false; } // Loads ports. let a_ = net.node_take(a.get_val() as usize); let a1 = a_.get_fst(); let a2 = a_.get_snd(); let b_ = net.node_take(b.get_val() as usize); let b1 = b_.get_fst(); let b2 = b_.get_snd(); // Links. self.link_pair(net, Pair::new(a1, b1)); self.link_pair(net, Pair::new(a2, b2)); return true; } // The Comm Interaction. pub fn interact_comm(&mut self, net: &GNet, a: Port, b: Port) -> bool { // Allocates needed nodes and vars. if !self.get_resources(net, 4, 4, 4) { return false; } // Checks availability if net.node_load(a.get_val() as usize).0 == 0 || net.node_load(b.get_val() as usize).0 == 0 { return false; } // Loads ports. let a_ = net.node_take(a.get_val() as usize); let a1 = a_.get_fst(); let a2 = a_.get_snd(); let b_ = net.node_take(b.get_val() as usize); let b1 = b_.get_fst(); let b2 = b_.get_snd(); // Stores new vars. net.vars_create(self.vloc[0], NONE); net.vars_create(self.vloc[1], NONE); net.vars_create(self.vloc[2], NONE); net.vars_create(self.vloc[3], NONE); // Stores new nodes. net.node_create(self.nloc[0], Pair::new(Port::new(VAR, self.vloc[0] as u32), Port::new(VAR, self.vloc[1] as u32))); net.node_create(self.nloc[1], Pair::new(Port::new(VAR, self.vloc[2] as u32), Port::new(VAR, self.vloc[3] as u32))); net.node_create(self.nloc[2], Pair::new(Port::new(VAR, self.vloc[0] as u32), Port::new(VAR, self.vloc[2] as u32))); net.node_create(self.nloc[3], Pair::new(Port::new(VAR, self.vloc[1] as u32), Port::new(VAR, self.vloc[3] as u32))); // Links. self.link_pair(net, Pair::new(Port::new(b.get_tag(), self.nloc[0] as u32), a1)); self.link_pair(net, Pair::new(Port::new(b.get_tag(), self.nloc[1] as u32), a2)); self.link_pair(net, Pair::new(Port::new(a.get_tag(), self.nloc[2] as u32), b1)); self.link_pair(net, Pair::new(Port::new(a.get_tag(), self.nloc[3] as u32), b2)); true } // The Oper Interaction. pub fn interact_oper(&mut self, net: &GNet, a: Port, b: Port) -> bool { // Allocates needed nodes and vars. if !self.get_resources(net, 1, 1, 0) { return false; } // Checks availability if net.node_load(b.get_val() as usize).0 == 0 { return false; } assert_eq!(a.get_tag(), NUM); // Loads ports. let av = a.get_val(); let b_ = net.node_take(b.get_val() as usize); let b1 = b_.get_fst(); let b2 = net.enter(b_.get_snd()); // Performs operation. if b1.get_tag() == NUM { let bv = b1.get_val(); let cv = Numb::operate(Numb(av), Numb(bv)); self.link_pair(net, Pair::new(Port::new(NUM, cv.0), b2)); } else { net.node_create(self.nloc[0], Pair::new(Port::new(a.get_tag(), Numb(a.get_val()).0), b2)); self.link_pair(net, Pair::new(b1, Port::new(OPR, self.nloc[0] as u32))); } true } // The Swit Interaction. pub fn interact_swit(&mut self, net: &GNet, a: Port, b: Port) -> bool { // Allocates needed nodes and vars. if !self.get_resources(net, 1, 2, 0) { return false; } // Checks availability if net.node_load(b.get_val() as usize).0 == 0 { return false; } // Loads ports. let av = Numb(a.get_val()).get_u24(); let b_ = net.node_take(b.get_val() as usize); let b1 = b_.get_fst(); let b2 = b_.get_snd(); // Stores new nodes. if av == 0 { net.node_create(self.nloc[0], Pair::new(b2, Port::new(ERA,0))); self.link_pair(net, Pair::new(Port::new(CON, self.nloc[0] as u32), b1)); } else { net.node_create(self.nloc[0], Pair::new(Port::new(ERA,0), Port::new(CON, self.nloc[1] as u32))); net.node_create(self.nloc[1], Pair::new(Port::new(NUM, Numb::new_u24(av-1).0), b2)); self.link_pair(net, Pair::new(Port::new(CON, self.nloc[0] as u32), b1)); } true } // Pops a local redex and performs a single interaction. pub fn interact(&mut self, net: &GNet, book: &Book) -> bool { // Pops a redex. let redex = match self.rbag.pop_redex() { Some(redex) => redex, None => return true, // If there is no redex, stop }; // Gets redex ports A and B. let mut a = redex.get_fst(); let mut b = redex.get_snd(); // Gets the rule type. let mut rule = Port::get_rule(a, b); // Used for root redex. if a.get_tag() == REF && b == ROOT { rule = CALL; // Swaps ports if necessary. } else if Port::should_swap(a,b) { let x = a; a = b; b = x; } //println!("[{:04x}] REDUCE {} ~ {} | {}", self.tid, a.show(), b.show(), rule); let success = match rule { LINK => self.interact_link(net, a, b), CALL => self.interact_call(net, a, b, book), VOID => self.interact_void(net, a, b), ERAS => self.interact_eras(net, a, b), ANNI => self.interact_anni(net, a, b), COMM => self.interact_comm(net, a, b), OPER => self.interact_oper(net, a, b), SWIT => self.interact_swit(net, a, b), _ => panic!("Invalid rule"), }; // If error, pushes redex back. if !success { self.rbag.push_redex(redex); false // Else, increments the interaction count. } else if rule != LINK { self.itrs += 1; true } else { true } } pub fn evaluator(&mut self, net: &GNet, book: &Book) { // Increments the tick self.tick += 1; // DEBUG: //let mut max_rlen = 0; //let mut max_nlen = 0; //let mut max_vlen = 0; // Performs some interactions while self.rbag.len() > 0 { self.interact(net, book); // DEBUG: //println!("{}{}", self.rbag.show(), net.show()); //println!(""); //let rlen = self.rbag.lo.len() + self.rbag.hi.len(); //let mut nlen = 0; //for i in 0 .. 256 { //if net.node_load(i).0 != 0 { //nlen += 1; //} //} //let mut vlen = 0; //for i in 0..256 { //if net.vars_load(i).0 != 0 { //vlen += 1; //} //} //max_rlen = max_rlen.max(rlen); //max_nlen = max_nlen.max(nlen); //max_vlen = max_vlen.max(vlen); } // DEBUG: //println!("MAX_RLEN: {}", max_rlen); //println!("MAX_NLEN: {}", max_nlen); //println!("MAX_VLEN: {}", max_vlen); net.itrs.fetch_add(self.itrs as u64, Ordering::Relaxed); self.itrs = 0; } } // Serialization // ------------- impl Book { pub fn to_buffer(&self, buf: &mut Vec) { // Writes the number of defs buf.extend_from_slice(&(self.defs.len() as u32).to_ne_bytes()); // For each def for (fid, def) in self.defs.iter().enumerate() { // Writes the safe flag buf.extend_from_slice(&(fid as u32).to_ne_bytes()); // Writes the name // TODO: store as varlen to save space let name_bytes = def.name.as_bytes(); if name_bytes.len() < 256 { buf.extend_from_slice(&name_bytes[..name_bytes.len()]); buf.resize(buf.len() + (256 - name_bytes.len()), 0); } else { panic!("Name too long: {}", def.name); } // Writes the safe flag buf.extend_from_slice(&(def.safe as u32).to_ne_bytes()); // Writes the rbag length buf.extend_from_slice(&(def.rbag.len() as u32).to_ne_bytes()); // Writes the node length buf.extend_from_slice(&(def.node.len() as u32).to_ne_bytes()); // Writes the vars length buf.extend_from_slice(&(def.vars as u32).to_ne_bytes()); // Writes the root buf.extend_from_slice(&def.root.0.to_ne_bytes()); // Writes the rbag buffer for pair in &def.rbag { buf.extend_from_slice(&pair.0.to_ne_bytes()); } // Writes the node buffer for pair in &def.node { buf.extend_from_slice(&pair.0.to_ne_bytes()); } } } } // Debug // ----- impl Port { pub fn show(&self) -> String { match self.get_tag() { VAR => format!("VAR:{:08X}", self.get_val()), REF => format!("REF:{:08X}", self.get_val()), ERA => format!("ERA:{:08X}", self.get_val()), NUM => format!("NUM:{:08X}", self.get_val()), CON => format!("CON:{:08X}", self.get_val()), DUP => format!("DUP:{:08X}", self.get_val()), OPR => format!("OPR:{:08X}", self.get_val()), SWI => format!("SWI:{:08X}", self.get_val()), _ => panic!("Invalid tag"), } } } impl Pair { pub fn show(&self) -> String { format!("{} ~ {}", self.get_fst().show(), self.get_snd().show()) } } impl RBag { pub fn show(&self) -> String { let mut s = String::new(); s.push_str("RBAG | FST-TREE | SND-TREE \n"); s.push_str("---- | ------------ | ------------\n"); for (i, pair) in self.hi.iter().enumerate() { s.push_str(&format!("{:04X} | {} | {}\n", i, pair.get_fst().show(), pair.get_snd().show())); } s.push_str("~~~~ | ~~~~~~~~~~~~ | ~~~~~~~~~~~~\n"); for (i, pair) in self.lo.iter().enumerate() { s.push_str(&format!("{:04X} | {} | {}\n", i + self.hi.len(), pair.get_fst().show(), pair.get_snd().show())); } s.push_str("==== | ============ | ============\n"); return s; } } impl<'a> GNet<'a> { pub fn show(&self) -> String { let mut s = String::new(); s.push_str("NODE | FST-PORT | SND-PORT \n"); s.push_str("---- | ------------ | ------------\n"); //for i in 0..256 { for i in 0..self.nlen-1 { let node = self.node_load(i); if node.0 != 0 { s.push_str(&format!("{:04X} | {} | {}\n", i, node.get_fst().show(), node.get_snd().show())); } } s.push_str("==== | ============ | ============\n"); s.push_str("VARS | VALUE |\n"); s.push_str("---- | ------------ |\n"); //for i in 0..256 { for i in 0..self.vlen-1 { let var = self.vars_load(i); if var.0 != 0 { s.push_str(&format!("{:04X} | {} |\n", i, var.show())); } } let root = self.vars_load(0x1FFFFFFF); s.push_str(&format!("ROOT | {} |\n", root.show())); s.push_str("==== | ============ |\n"); return s; } } impl Book { pub fn show(&self) -> String { let mut s = String::new(); for def in &self.defs { s.push_str(&format!("==== | ============ | ============ {} (vars={},safe={})\n", def.name, def.vars, def.safe)); s.push_str("NODE | FST-PORT | SND-PORT \n"); s.push_str("---- | ------------ | ------------\n"); for (i, node) in def.node.iter().enumerate() { s.push_str(&format!("{:04X} | {} | {}\n", i, node.get_fst().show(), node.get_snd().show())); } s.push_str("==== | ============ | ============\n"); s.push_str("RBAG | FST-TREE | SND-TREE \n"); s.push_str("---- | ------------ | ------------\n"); for (i, node) in def.rbag.iter().enumerate() { s.push_str(&format!("{:04X} | {} | {}\n", i, node.get_fst().show(), node.get_snd().show())); } s.push_str("==== | ============ | ============\n"); } return s; } } impl Book { // Creates a demo program that is equivalent to: // lop = λn switch n { 0: 0; _: (lop n-1) } // fun = λn switch n { 0: (lop LOOPS); _: (+ (fun n-1) (fun n-1)) } // main = (fun DEPTH) // Or, in core syntax: // @fun = (?<(@fun0 @fun1) a> a) // @fun0 = a & @lop ~ (#65536 a) // @fun1 = ({a b} c) & @fun ~ (a <+ d c>) & @fun ~ (b d) // @lop = (?<(#0 @lop0) a> a) // @lop0 = (a b) & @lop ~ (a b) // @main = a & @fun ~ (#10 a) //pub fn new_demo(depth: u32, loops: u32) -> Self { //let fun = Def { //name: "fun".to_string(), //safe: true, //rbag: vec![], //node: vec![Pair::new(Port(0x0C),Port(0x00)), Pair::new(Port(0x1F),Port(0x00)), Pair::new(Port(0x09),Port(0x11)), Pair::new(Port(0x14),Port(0x00))], //vars: 1, //}; //let fun0 = Def { //name: "fun0".to_string(), //safe: true, //rbag: vec![Pair::new(Port(0x19),Port(0x0C))], //node: vec![Pair::new(Port(0x00),Port(0x00)), Pair::new(Port::new(NUM,loops),Port(0x00))], //vars: 1, //}; //let fun1 = Def { //name: "fun1".to_string(), //safe: false, //rbag: vec![Pair::new(Port(0x01),Port(0x1C)), Pair::new(Port(0x01),Port(0x2C))], //node: vec![Pair::new(Port(0x0C),Port(0x00)), Pair::new(Port(0x15),Port(0x10)), Pair::new(Port(0x00),Port(0x08)), Pair::new(Port(0x00),Port(0x26)), Pair::new(Port(0x18),Port(0x10)), Pair::new(Port(0x08),Port(0x18))], //vars: 4, //}; //let lop = Def { //name: "lop".to_string(), //safe: true, //rbag: vec![], //node: vec![Pair::new(Port(0x0C),Port(0x00)), Pair::new(Port(0x1F),Port(0x00)), Pair::new(Port(0x03),Port(0x21)), Pair::new(Port(0x14),Port(0x00))], //vars: 1, //}; //let lop0 = Def { //name: "lop0".to_string(), //safe: true, //rbag: vec![Pair::new(Port(0x19),Port(0x14))], //node: vec![Pair::new(Port(0x0C),Port(0x00)), Pair::new(Port(0x00),Port(0x08)), Pair::new(Port(0x00),Port(0x08))], //vars: 2, //}; //let main = Def { //name: "main".to_string(), //safe: true, //rbag: vec![Pair::new(Port(0x01),Port(0x0C))], //node: vec![Pair::new(Port(0x00),Port(0x00)), Pair::new(Port::new(NUM,depth),Port(0x00))], //vars: 1, //}; //return Book { //defs: vec![fun, fun0, fun1, lop, lop0, main], //}; //} } #[test] fn test_f24() { // Test that numbers in range round-trip correctly: let min_positive = f32::from_bits(0b0_00000000_000000000000001_00000000); let max_subnormal = f32::from_bits(0b0_00000000_111111111111111_00000000); let min_normal = f32::from_bits(0b0_00000001_000000000000000_00000000); for x in [ 0.0, -0.0, 1.0, -1.0, 1.1000061, -1.1000061, f32::NAN, f32::NEG_INFINITY, f32::INFINITY, min_positive, -min_positive, min_positive * 123.0, -min_positive * 123.0, max_subnormal, min_normal, ] { let y = Numb::new_f24(x).get_f24(); assert!(x.is_nan() && y.is_nan() || x == y); } for (i, o) in [ // Test rounding ties to even: (f32::from_bits(0b00_00000000), f32::from_bits(0b00_00000000)), (f32::from_bits(0b00_01111111), f32::from_bits(0b00_00000000)), (f32::from_bits(0b00_10000000), f32::from_bits(0b00_00000000)), (f32::from_bits(0b00_10000001), f32::from_bits(0b01_00000000)), (f32::from_bits(0b00_11111111), f32::from_bits(0b01_00000000)), (f32::from_bits(0b01_00000000), f32::from_bits(0b01_00000000)), (f32::from_bits(0b01_01111111), f32::from_bits(0b01_00000000)), (f32::from_bits(0b01_10000000), f32::from_bits(0b10_00000000)), (f32::from_bits(0b01_10000001), f32::from_bits(0b10_00000000)), (f32::from_bits(0b01_11111111), f32::from_bits(0b10_00000000)), ] { assert_eq!(Numb::new_f24(i).get_f24(), o); } // Test that NaNs are not turned into infinities assert!(Numb::new_f24(f32::from_bits(0b0_11111111_000000000000000_00000001)).get_f24().is_nan()); assert!(Numb::new_f24(f32::from_bits(0b1_11111111_000000000000000_00000001)).get_f24().is_nan()); assert!(Numb::new_f24(f32::from_bits(0b0_11111111_111111111111111_11111111)).get_f24().is_nan()); } ================================================ FILE: src/lib.rs ================================================ #![allow(dead_code)] #![allow(unused_imports)] #![allow(unused_variables)] pub mod ast; pub mod cmp; pub mod hvm; ================================================ FILE: src/main.rs ================================================ #![allow(dead_code)] #![allow(unused_imports)] #![allow(unused_variables)] use clap::{Arg, ArgAction, Command}; use ::hvm::{ast, cmp, hvm}; use std::fs; use std::io::Write; use std::path::PathBuf; use std::process::Command as SysCommand; #[cfg(feature = "c")] extern "C" { fn hvm_c(book_buffer: *const u32); } #[cfg(feature = "cuda")] extern "C" { fn hvm_cu(book_buffer: *const u32); } fn main() { let matches = Command::new("hvm") .about("HVM2: Higher-order Virtual Machine 2 (32-bit Version)") .version(env!("CARGO_PKG_VERSION")) .subcommand_required(true) .arg_required_else_help(true) .subcommand( Command::new("run") .about("Interprets a file (using Rust)") .arg(Arg::new("file").required(true))) .subcommand( Command::new("run-c") .about("Interprets a file (using C)") .arg(Arg::new("file").required(true)) .arg(Arg::new("io") .long("io") .action(ArgAction::SetTrue) .help("Run with IO enabled")) ) .subcommand( Command::new("run-cu") .about("Interprets a file (using CUDA)") .arg(Arg::new("file").required(true)) .arg(Arg::new("io") .long("io") .action(ArgAction::SetTrue) .help("Run with IO enabled"))) .subcommand( Command::new("gen-c") .about("Compiles a file with IO (to standalone C)") .arg(Arg::new("file").required(true)) .arg(Arg::new("io") .long("io") .action(ArgAction::SetTrue) .help("Generate with IO enabled"))) .subcommand( Command::new("gen-cu") .about("Compiles a file (to standalone CUDA)") .arg(Arg::new("file").required(true)) .arg(Arg::new("io") .long("io") .action(ArgAction::SetTrue) .help("Generate with IO enabled"))) .get_matches(); match matches.subcommand() { Some(("run", sub_matches)) => { let file = sub_matches.get_one::("file").expect("required"); let code = fs::read_to_string(file).expect("Unable to read file"); let book = ast::Book::parse(&code).unwrap_or_else(|er| panic!("{}",er)).build(); run(&book); } Some(("run-c", sub_matches)) => { let file = sub_matches.get_one::("file").expect("required"); let code = fs::read_to_string(file).expect("Unable to read file"); let book = ast::Book::parse(&code).unwrap_or_else(|er| panic!("{}",er)).build(); let mut data : Vec = Vec::new(); book.to_buffer(&mut data); #[cfg(feature = "c")] unsafe { hvm_c(data.as_mut_ptr() as *mut u32); } #[cfg(not(feature = "c"))] println!("C runtime not available!\n"); } Some(("run-cu", sub_matches)) => { let file = sub_matches.get_one::("file").expect("required"); let code = fs::read_to_string(file).expect("Unable to read file"); let book = ast::Book::parse(&code).unwrap_or_else(|er| panic!("{}",er)).build(); let mut data : Vec = Vec::new(); book.to_buffer(&mut data); #[cfg(feature = "cuda")] unsafe { hvm_cu(data.as_mut_ptr() as *mut u32); } #[cfg(not(feature = "cuda"))] println!("CUDA runtime not available!\n If you've installed CUDA and nvcc after HVM, please reinstall HVM."); } Some(("gen-c", sub_matches)) => { // Reads book from file let file = sub_matches.get_one::("file").expect("required"); let code = fs::read_to_string(file).expect("Unable to read file"); let book = ast::Book::parse(&code).unwrap_or_else(|er| panic!("{}",er)).build(); // Gets optimal core count let cores = num_cpus::get(); let tpcl2 = (cores as f64).log2().floor() as u32; // Generates the interpreted book let mut book_buf : Vec = Vec::new(); book.to_buffer(&mut book_buf); let bookb = format!("{:?}", book_buf).replace("[","{").replace("]","}"); let bookb = format!("static const u8 BOOK_BUF[] = {};", bookb); // Generates the C file let hvm_c = include_str!("hvm.c"); let hvm_c = format!("#define IO\n\n{hvm_c}"); let hvm_c = hvm_c.replace("///COMPILED_INTERACT_CALL///", &cmp::compile_book(cmp::Target::C, &book)); let hvm_c = hvm_c.replace("#define INTERPRETED", "#define COMPILED"); let hvm_c = hvm_c.replace("//COMPILED_BOOK_BUF//", &bookb); let hvm_c = hvm_c.replace("#define WITHOUT_MAIN", "#define WITH_MAIN"); let hvm_c = hvm_c.replace("#define TPC_L2 0", &format!("#define TPC_L2 {} // {} cores", tpcl2, cores)); let hvm_c = format!("{hvm_c}\n\n{}", include_str!("run.c")); let hvm_c = hvm_c.replace(r#"#include "hvm.c""#, ""); println!("{}", hvm_c); } Some(("gen-cu", sub_matches)) => { // Reads book from file let file = sub_matches.get_one::("file").expect("required"); let code = fs::read_to_string(file).expect("Unable to read file"); let book = ast::Book::parse(&code).unwrap_or_else(|er| panic!("{}",er)).build(); // Generates the interpreted book let mut book_buf : Vec = Vec::new(); book.to_buffer(&mut book_buf); let bookb = format!("{:?}", book_buf).replace("[","{").replace("]","}"); let bookb = format!("static const u8 BOOK_BUF[] = {};", bookb); //FIXME: currently, CUDA is faster on interpreted mode, so the compiler uses it. // Compile with compiled functions: //let hvm_c = include_str!("hvm.cu"); //let hvm_c = hvm_c.replace("///COMPILED_INTERACT_CALL///", &cmp::compile_book(cmp::Target::CUDA, &book)); //let hvm_c = hvm_c.replace("#define INTERPRETED", "#define COMPILED"); // Generates the Cuda file let hvm_cu = include_str!("hvm.cu"); let hvm_cu = format!("#define IO\n\n{hvm_cu}"); let hvm_cu = hvm_cu.replace("//COMPILED_BOOK_BUF//", &bookb); let hvm_cu = hvm_cu.replace("#define WITHOUT_MAIN", "#define WITH_MAIN"); let hvm_cu = format!("{hvm_cu}\n\n{}", include_str!("run.cu")); let hvm_cu = hvm_cu.replace(r#"#include "hvm.cu""#, ""); println!("{}", hvm_cu); } _ => unreachable!(), } } pub fn run(book: &hvm::Book) { // Initializes the global net let net = hvm::GNet::new(1 << 29, 1 << 29); // Initializes threads let mut tm = hvm::TMem::new(0, 1); // Creates an initial redex that calls main let main_id = book.defs.iter().position(|def| def.name == "main").unwrap(); tm.rbag.push_redex(hvm::Pair::new(hvm::Port::new(hvm::REF, main_id as u32), hvm::ROOT)); net.vars_create(hvm::ROOT.get_val() as usize, hvm::NONE); // Starts the timer let start = std::time::Instant::now(); // Evaluates tm.evaluator(&net, &book); // Stops the timer let duration = start.elapsed(); //println!("{}", net.show()); // Prints the result if let Some(tree) = ast::Net::readback(&net, book) { println!("Result: {}", tree.show()); } else { println!("Readback failed. Printing GNet memdump...\n"); println!("{}", net.show()); } // Prints interactions and time let itrs = net.itrs.load(std::sync::atomic::Ordering::Relaxed); println!("- ITRS: {}", itrs); println!("- TIME: {:.2}s", duration.as_secs_f64()); println!("- MIPS: {:.2}", itrs as f64 / duration.as_secs_f64() / 1_000_000.0); } ================================================ FILE: src/run.c ================================================ #include #include #include #include "hvm.c" // Readback: λ-Encoded Ctr typedef struct Ctr { u32 tag; u32 args_len; Port args_buf[16]; } Ctr; // Readback: Tuples typedef struct Tup { u32 elem_len; Port elem_buf[8]; } Tup; // Readback: λ-Encoded Str (UTF-32), null-terminated // FIXME: this is actually ASCII :| typedef struct Str { u32 len; char *buf; } Str; // Readback: λ-Encoded list of bytes typedef struct Bytes { u32 len; char *buf; } Bytes; // IO Magic Number #define IO_MAGIC_0 0xD0CA11 #define IO_MAGIC_1 0xFF1FF1 // IO Tags #define IO_DONE 0 #define IO_CALL 1 // Result Tags = Result #define RESULT_OK 0 #define RESULT_ERR 1 // IOError = { // Type, -- a type error // Name, -- invalid io func name // Inner {val: T}, -- an error while calling an io func // } #define IO_ERR_TYPE 0 #define IO_ERR_NAME 1 #define IO_ERR_INNER 2 typedef struct IOError { u32 tag; Port val; } IOError; // List Tags #define LIST_NIL 0 #define LIST_CONS 1 // Readback // -------- // Reads back a λ-Encoded constructor from device to host. // Encoding: λt ((((t TAG) arg0) arg1) ...) Ctr readback_ctr(Net* net, Book* book, Port port) { Ctr ctr; ctr.tag = -1; ctr.args_len = 0; // Loads root lambda Port lam_port = expand(net, book, port); if (get_tag(lam_port) != CON) return ctr; Pair lam_node = node_load(net, get_val(lam_port)); // Loads first application Port app_port = expand(net, book, get_fst(lam_node)); if (get_tag(app_port) != CON) return ctr; Pair app_node = node_load(net, get_val(app_port)); // Loads first argument (as the tag) Port arg_port = expand(net, book, get_fst(app_node)); if (get_tag(arg_port) != NUM) return ctr; ctr.tag = get_u24(get_val(arg_port)); // Loads remaining arguments while (true) { app_port = expand(net, book, get_snd(app_node)); if (get_tag(app_port) != CON) break; app_node = node_load(net, get_val(app_port)); arg_port = expand(net, book, get_fst(app_node)); ctr.args_buf[ctr.args_len++] = arg_port; } return ctr; } // Reads back a tuple of at most `size` elements. Tuples are // (right-nested con nodes) (CON 1 (CON 2 (CON 3 (...)))) // The provided `port` should be `expanded` before calling. extern Tup readback_tup(Net* net, Book* book, Port port, u32 size) { Tup tup; tup.elem_len = 0; // Loads remaining arguments while (get_tag(port) == CON && (tup.elem_len + 1 < size)) { Pair node = node_load(net, get_val(port)); tup.elem_buf[tup.elem_len++] = expand(net, book, get_fst(node)); port = expand(net, book, get_snd(node)); } tup.elem_buf[tup.elem_len++] = port; return tup; } // Converts a Port into a list of bytes. // Encoding: // - λt (t NIL) // - λt (((t CONS) head) tail) Bytes readback_bytes(Net* net, Book* book, Port port) { Bytes bytes; u32 capacity = 256; bytes.buf = (char*) malloc(sizeof(char) * capacity); bytes.len = 0; // Readback loop while (true) { // Normalizes the net normalize(net, book); // Reads the λ-Encoded Ctr Ctr ctr = readback_ctr(net, book, peek(net, port)); // Reads string layer switch (ctr.tag) { case LIST_NIL: { break; } case LIST_CONS: { if (ctr.args_len != 2) break; if (get_tag(ctr.args_buf[0]) != NUM) break; if (bytes.len == capacity - 1) { capacity *= 2; bytes.buf = realloc(bytes.buf, capacity); } bytes.buf[bytes.len++] = get_u24(get_val(ctr.args_buf[0])); boot_redex(net, new_pair(ctr.args_buf[1], ROOT)); port = ROOT; continue; } } break; } return bytes; } // Converts a Port into a UTF-32 (truncated to 24 bits) null-terminated string. // Since unicode scalars can fit in 21 bits, HVM's u24 // integers can contain any unicode scalar value. // Encoding: // - λt (t NIL) // - λt (((t CONS) head) tail) Str readback_str(Net* net, Book* book, Port port) { // readback_bytes is guaranteed to return a buffer with a capacity of at least one more // than the number of bytes read, so we can null-terminate it. Bytes bytes = readback_bytes(net, book, port); Str str; str.len = bytes.len; str.buf = bytes.buf; str.buf[str.len] = 0; return str; } /// Returns a λ-Encoded Ctr for a NIL: λt (t NIL) /// A previous call to `get_resources(tm, 0, 2, 1)` is required. Port inject_nil(Net* net) { u32 v1 = tm[0]->vloc[0]; u32 n1 = tm[0]->nloc[0]; u32 n2 = tm[0]->nloc[1]; vars_create(net, v1, NONE); Port var = new_port(VAR, v1); node_create(net, n1, new_pair(new_port(NUM, new_u24(LIST_NIL)), var)); node_create(net, n2, new_pair(new_port(CON, n1), var)); return new_port(CON, n2); } /// Returns a λ-Encoded Ctr for a CONS: λt (((t CONS) head) tail) /// A previous call to `get_resources(tm, 0, 4, 1)` is required. Port inject_cons(Net* net, Port head, Port tail) { u32 v1 = tm[0]->vloc[0]; u32 n1 = tm[0]->nloc[0]; u32 n2 = tm[0]->nloc[1]; u32 n3 = tm[0]->nloc[2]; u32 n4 = tm[0]->nloc[3]; vars_create(net, v1, NONE); Port var = new_port(VAR, v1); node_create(net, n1, new_pair(tail, var)); node_create(net, n2, new_pair(head, new_port(CON, n1))); node_create(net, n3, new_pair(new_port(NUM, new_u24(LIST_CONS)), new_port(CON, n2))); node_create(net, n4, new_pair(new_port(CON, n3), var)); return new_port(CON, n4); } // Converts a list of bytes to a Port. // Encoding: // - λt (t NIL) // - λt (((t CONS) head) tail) Port inject_bytes(Net* net, Bytes *bytes) { // Allocate all resources up front: // - NIL needs 2 nodes & 1 var // - CONS needs 4 nodes & 1 var u32 len = bytes->len; if (!get_resources(net, tm[0], 0, 2, 1)) { fprintf(stderr, "inject_bytes: failed to get resources\n"); return new_port(ERA, 0); } Port port = inject_nil(net); // TODO: batch-allocate these (within the limits of TM) for (u32 i = 0; i < len; i++) { if (!get_resources(net, tm[0], 0, 4, 1)) { fprintf(stderr, "inject_bytes: failed to get resources\n"); return new_port(ERA, 0); } Port byte = new_port(NUM, new_u24(bytes->buf[len - i - 1])); port = inject_cons(net, byte, port); } return port; } /// Returns a λ-Encoded Ctr for a RESULT_OK: λt ((t RESULT_OK) val) Port inject_ok(Net* net, Port val) { if (!get_resources(net, tm[0], 0, 3, 1)) { fprintf(stderr, "inject_ok: failed to get resources\n"); return new_port(ERA, 0); } u32 v1 = tm[0]->vloc[0]; u32 n1 = tm[0]->nloc[0]; u32 n2 = tm[0]->nloc[1]; u32 n3 = tm[0]->nloc[2]; vars_create(net, v1, NONE); Port var = new_port(VAR, v1); node_create(net, n1, new_pair(val, var)); node_create(net, n2, new_pair(new_port(NUM, new_u24(RESULT_OK)), new_port(CON, n1))); node_create(net, n3, new_pair(new_port(CON, n2), var)); return new_port(CON, n3); } /// Returns a λ-Encoded Ctr for a RESULT_ERR: λt ((t RESULT_ERR) err) Port inject_err(Net* net, Port err) { if (!get_resources(net, tm[0], 0, 3, 1)) { fprintf(stderr, "inject_err: failed to get resources\n"); return new_port(ERA, 0); } u32 v1 = tm[0]->vloc[0]; u32 n1 = tm[0]->nloc[0]; u32 n2 = tm[0]->nloc[1]; u32 n3 = tm[0]->nloc[2]; vars_create(net, v1, NONE); Port var = new_port(VAR, v1); node_create(net, n1, new_pair(err, var)); node_create(net, n2, new_pair(new_port(NUM, new_u24(RESULT_ERR)), new_port(CON, n1))); node_create(net, n3, new_pair(new_port(CON, n2), var)); return new_port(CON, n3); } /// Returns a λ-Encoded Ctr for a Result/Err(IOError(..)) Port inject_io_err(Net* net, IOError err) { if (err.tag <= IO_ERR_NAME) { if (!get_resources(net, tm[0], 0, 2, 1)) { fprintf(stderr, "inject_io_err: failed to get resources\n"); return new_port(ERA, 0); } u32 v1 = tm[0]->vloc[0]; u32 n1 = tm[0]->nloc[0]; u32 n2 = tm[0]->nloc[1]; vars_create(net, v1, NONE); Port var = new_port(VAR, v1); node_create(net, n1, new_pair(new_port(NUM, new_u24(err.tag)), var)); node_create(net, n2, new_pair(new_port(CON, n1), var)); return inject_err(net, new_port(CON, n2)); } if (!get_resources(net, tm[0], 0, 3, 1)) { fprintf(stderr, "inject_io_err: failed to get resources\n"); return new_port(ERA, 0); } u32 v1 = tm[0]->vloc[0]; u32 n1 = tm[0]->nloc[0]; u32 n2 = tm[0]->nloc[1]; u32 n3 = tm[0]->nloc[2]; vars_create(net, v1, NONE); Port var = new_port(VAR, v1); node_create(net, n1, new_pair(err.val, var)); node_create(net, n2, new_pair(new_port(NUM, new_u24(IO_ERR_INNER)), new_port(CON, n1))); node_create(net, n3, new_pair(new_port(CON, n2), var)); return inject_err(net, new_port(CON, n3)); } /// Returns a λ-Encoded Ctr for a Result/Err(IOError/Type) Port inject_io_err_type(Net* net) { IOError io_error = { .tag = IO_ERR_TYPE, }; return inject_io_err(net, io_error); } /// Returns a λ-Encoded Ctr for a Result/Err(IOError/Name) Port inject_io_err_name(Net* net) { IOError io_error = { .tag = IO_ERR_NAME, }; return inject_io_err(net, io_error); } /// Returns a λ-Encoded Ctr for a Result/Err(IOError/Inner(val)) Port inject_io_err_inner(Net* net, Port val) { IOError io_error = { .tag = IO_ERR_INNER, .val = val, }; return inject_io_err(net, io_error); } /// Returns a λ-Encoded Ctr for an Result> /// `err` must be `NUL`-terminated. Port inject_io_err_str(Net* net, char* err) { Bytes err_bytes; err_bytes.buf = err; err_bytes.len = strlen(err_bytes.buf); Port err_port = inject_bytes(net, &err_bytes); return inject_io_err_inner(net, err_port); } // Primitive IO Fns // ----------------- // Open file pointers. Indices into this array // are used as "file descriptors". // Indices 0 1 and 2 are reserved. // - 0 -> stdin // - 1 -> stdout // - 2 -> stderr static FILE* FILE_POINTERS[256]; // Open dylibs handles. Indices into this array // are used as opaque loadedd object "handles". static void* DYLIBS[256]; // Converts a NUM port (file descriptor) to file pointer. FILE* readback_file(Port port) { if (get_tag(port) != NUM) { fprintf(stderr, "non-num where file descriptor was expected: %i\n", get_tag(port)); return NULL; } u32 idx = get_u24(get_val(port)); if (idx == 0) return stdin; if (idx == 1) return stdout; if (idx == 2) return stderr; FILE* fp = FILE_POINTERS[idx]; if (fp == NULL) { return NULL; } return fp; } // Converts a NUM port (dylib handle) to an opaque dylib object. void* readback_dylib(Port port) { if (get_tag(port) != NUM) { fprintf(stderr, "non-num where dylib handle was expected: %i\n", get_tag(port)); return NULL; } u32 idx = get_u24(get_val(port)); void* dl = DYLIBS[idx]; if (dl == NULL) { fprintf(stderr, "invalid dylib handle\n"); return NULL; } return dl; } // Reads from a file a specified number of bytes. // `argm` is a tuple of (file_descriptor, num_bytes). // Returns: Result> Port io_read(Net* net, Book* book, Port argm) { Tup tup = readback_tup(net, book, argm, 2); if (tup.elem_len != 2) { return inject_io_err_type(net); } FILE* fp = readback_file(tup.elem_buf[0]); u32 num_bytes = get_u24(get_val(tup.elem_buf[1])); if (fp == NULL) { return inject_io_err_inner(net, new_port(NUM, new_i24(EBADF))); } /// Read a string. Bytes bytes; bytes.buf = (char*) malloc(sizeof(char) * num_bytes); bytes.len = fread(bytes.buf, sizeof(char), num_bytes, fp); if ((bytes.len != num_bytes) && ferror(fp)) { free(bytes.buf); return inject_io_err_inner(net, new_port(NUM, new_i24(ferror(fp)))); } // Convert it to a port. Port ret = inject_bytes(net, &bytes); free(bytes.buf); return inject_ok(net, ret); } // Opens a file with the provided mode. // `argm` is a tuple (CON node) of the // file name and mode as strings. // Returns: Result> Port io_open(Net* net, Book* book, Port argm) { Tup tup = readback_tup(net, book, argm, 2); if (tup.elem_len != 2) { return inject_io_err_type(net); } Str name = readback_str(net, book, tup.elem_buf[0]); Str mode = readback_str(net, book, tup.elem_buf[1]); for (u32 fd = 3; fd < sizeof(FILE_POINTERS); fd++) { if (FILE_POINTERS[fd] == NULL) { FILE_POINTERS[fd] = fopen(name.buf, mode.buf); free(name.buf); free(mode.buf); if (FILE_POINTERS[fd] == NULL) { return inject_io_err_inner(net, new_port(NUM, new_i24(errno))); } return inject_ok(net, new_port(NUM, new_u24(fd))); } } free(name.buf); free(mode.buf); // too many open files return inject_io_err_inner(net, new_port(NUM, new_i24(EMFILE))); } // Closes a file, reclaiming the file descriptor. // Returns: Result<*, IOError> Port io_close(Net* net, Book* book, Port argm) { FILE* fp = readback_file(argm); if (fp == NULL) { return inject_io_err_inner(net, new_port(NUM, new_i24(EBADF))); } if (fclose(fp) != 0) { return inject_io_err_inner(net, new_port(NUM, new_i24(ferror(fp)))); } FILE_POINTERS[get_u24(get_val(argm))] = NULL; return inject_ok(net, new_port(ERA, 0)); } // Writes a list of bytes to a file. // `argm` is a tuple (CON node) of the // file descriptor and list of bytes to write. // Returns: Result<*, IOError> Port io_write(Net* net, Book* book, Port argm) { Tup tup = readback_tup(net, book, argm, 2); if (tup.elem_len != 2) { return inject_io_err_type(net); } FILE* fp = readback_file(tup.elem_buf[0]); Bytes bytes = readback_bytes(net, book, tup.elem_buf[1]); if (fp == NULL) { free(bytes.buf); return inject_io_err_inner(net, new_port(NUM, new_i24(EBADF))); } if (fwrite(bytes.buf, sizeof(char), bytes.len, fp) != bytes.len) { free(bytes.buf); return inject_io_err_inner(net, new_port(NUM, new_i24(ferror(fp)))); } free(bytes.buf); return inject_ok(net, new_port(ERA, 0)); } // Flushes an output stream. // Returns: Result<*, IOError> Port io_flush(Net* net, Book* book, Port argm) { FILE* fp = readback_file(argm); if (fp == NULL) { return inject_io_err_inner(net, new_port(NUM, new_i24(EBADF))); } if (fflush(fp) != 0) { return inject_io_err_inner(net, new_port(NUM, new_i24(ferror(fp)))); } return inject_ok(net, new_port(ERA, 0)); } // Seeks to a position in a file. // `argm` is a 3-tuple (CON fd (CON offset whence)), where // - fd is a file descriptor // - offset is a signed byte offset // - whence is what that offset is relative to: // - 0 (SEEK_SET): beginning of file // - 1 (SEEK_CUR): current position of the file pointer // - 2 (SEEK_END): end of the file // Returns: Result<*, IOError> Port io_seek(Net* net, Book* book, Port argm) { Tup tup = readback_tup(net, book, argm, 3); if (tup.elem_len != 3) { return inject_io_err_type(net); } FILE* fp = readback_file(tup.elem_buf[0]); i32 offset = get_i24(get_val(tup.elem_buf[1])); u32 whence = get_i24(get_val(tup.elem_buf[2])); if (fp == NULL) { return inject_io_err_inner(net, new_port(NUM, new_i24(EBADF))); } int cwhence; switch (whence) { case 0: cwhence = SEEK_SET; break; case 1: cwhence = SEEK_CUR; break; case 2: cwhence = SEEK_END; break; default: return inject_io_err_type(net); } if (fseek(fp, offset, cwhence) != 0) { return inject_io_err_inner(net, new_port(NUM, new_i24(ferror(fp)))); } return inject_ok(net, new_port(ERA, 0)); } // Returns the current time as a tuple of the high // and low 24 bits of a 48-bit nanosecond timestamp. // Returns: Result<(u24, u24), IOError<*>> Port io_get_time(Net* net, Book* book, Port argm) { // Get the current time in nanoseconds u64 time_ns = time64(); // Encode the time as a 64-bit unsigned integer u32 time_hi = (u32)(time_ns >> 24) & 0xFFFFFFF; u32 time_lo = (u32)(time_ns & 0xFFFFFFF); // Allocate a node to store the time u32 lps = 0; u32 loc = node_alloc_1(net, tm[0], &lps); node_create(net, loc, new_pair(new_port(NUM, new_u24(time_hi)), new_port(NUM, new_u24(time_lo)))); return inject_ok(net, new_port(CON, loc)); } // Sleeps. // `argm` is a tuple (CON node) of the high and low // 24 bits for a 48-bit duration in nanoseconds. // Returns: Result<*, IOError<*>> Port io_sleep(Net* net, Book* book, Port argm) { Tup tup = readback_tup(net, book, argm, 2); if (tup.elem_len != 2) { return inject_io_err_type(net); } // Get the sleep duration node Pair dur_node = node_load(net, get_val(argm)); // Get the high and low 24-bit parts of the duration u32 dur_hi = get_u24(get_val(tup.elem_buf[0])); u32 dur_lo = get_u24(get_val(tup.elem_buf[1])); // Combine into a 48-bit duration in nanoseconds u64 dur_ns = (((u64)dur_hi) << 24) | dur_lo; // Sleep for the specified duration struct timespec ts; ts.tv_sec = dur_ns / 1000000000; ts.tv_nsec = dur_ns % 1000000000; nanosleep(&ts, NULL); return inject_ok(net, new_port(ERA, 0)); } // Opens a dylib at the provided path. // `argm` is a tuple of `filename` and `lazy`. // `filename` is a λ-encoded string. // `lazy` is a `bool` indicating if functions should be lazily loaded. // Returns: Result> Port io_dl_open(Net* net, Book* book, Port argm) { Tup tup = readback_tup(net, book, argm, 2); Str str = readback_str(net, book, tup.elem_buf[0]); u32 lazy = get_u24(get_val(tup.elem_buf[1])); int flags = lazy ? RTLD_LAZY : RTLD_NOW; for (u32 dl = 0; dl < sizeof(DYLIBS); dl++) { if (DYLIBS[dl] == NULL) { DYLIBS[dl] = dlopen(str.buf, flags); free(str.buf); if (DYLIBS[dl] == NULL) { return inject_io_err_str(net, dlerror()); } return inject_ok(net, new_port(NUM, new_u24(dl))); } } return inject_io_err_str(net, "too many open dylibs"); } // Calls a function from a loaded dylib. // `argm` is a 3-tuple of `dylib_handle`, `symbol`, `args`. // `dylib_handle` is the numeric node returned from a `DL_OPEN` call. // `symbol` is a λ-encoded string of the symbol name. // `args` is the argument to be provided to the dylib symbol. // // This function returns a Result with an Ok variant containing an // arbitrary type. // // Returns Result> Port io_dl_call(Net* net, Book* book, Port argm) { Tup tup = readback_tup(net, book, argm, 3); if (tup.elem_len != 3) { return inject_io_err_type(net); } void* dl = readback_dylib(tup.elem_buf[0]); Str symbol = readback_str(net, book, tup.elem_buf[1]); dlerror(); Port (*func)(Net*, Book*, Port) = dlsym(dl, symbol.buf); char* error = dlerror(); if (error != NULL) { return inject_io_err_str(net, error); } return inject_ok(net, func(net, book, tup.elem_buf[2])); } // Closes a loaded dylib, reclaiming the handle. // // Returns: Result<*, IOError> Port io_dl_close(Net* net, Book* book, Port argm) { void* dl = readback_dylib(argm); if (dl == NULL) { return inject_io_err_type(net); } int err = dlclose(dl) != 0; if (err != 0) { return inject_io_err_str(net, dlerror()); } DYLIBS[get_u24(get_val(argm))] = NULL; return inject_ok(net, new_port(ERA, 0)); } // Book Loader // ----------- void book_init(Book* book) { book->ffns_buf[book->ffns_len++] = (FFn){"READ", io_read}; book->ffns_buf[book->ffns_len++] = (FFn){"OPEN", io_open}; book->ffns_buf[book->ffns_len++] = (FFn){"CLOSE", io_close}; book->ffns_buf[book->ffns_len++] = (FFn){"FLUSH", io_flush}; book->ffns_buf[book->ffns_len++] = (FFn){"WRITE", io_write}; book->ffns_buf[book->ffns_len++] = (FFn){"SEEK", io_seek}; book->ffns_buf[book->ffns_len++] = (FFn){"GET_TIME", io_get_time}; book->ffns_buf[book->ffns_len++] = (FFn){"SLEEP", io_sleep}; book->ffns_buf[book->ffns_len++] = (FFn){"DL_OPEN", io_dl_open}; book->ffns_buf[book->ffns_len++] = (FFn){"DL_CALL", io_dl_call}; book->ffns_buf[book->ffns_len++] = (FFn){"DL_CLOSE", io_dl_open}; } // Monadic IO Evaluator // --------------------- // Runs an IO computation. void do_run_io(Net* net, Book* book, Port port) { book_init(book); setlinebuf(stdout); setlinebuf(stderr); // IO loop while (true) { // Normalizes the net normalize(net, book); // Reads the λ-Encoded Ctr Ctr ctr = readback_ctr(net, book, peek(net, port)); // Checks if IO Magic Number is a CON if (ctr.args_len < 1 || get_tag(ctr.args_buf[0]) != CON) { break; } // Checks the IO Magic Number Pair io_magic = node_load(net, get_val(ctr.args_buf[0])); //printf("%08x %08x\n", get_u24(get_val(get_fst(io_magic))), get_u24(get_val(get_snd(io_magic)))); if (get_val(get_fst(io_magic)) != new_u24(IO_MAGIC_0) || get_val(get_snd(io_magic)) != new_u24(IO_MAGIC_1)) { break; } switch (ctr.tag) { case IO_CALL: { if (ctr.args_len != 4) { fprintf(stderr, "invalid IO_CALL: args_len = %u\n", ctr.args_len); break; } Str func = readback_str(net, book, ctr.args_buf[1]); FFn* ffn = NULL; // FIXME: optimize this linear search for (u32 fid = 0; fid < book->ffns_len; ++fid) { if (strcmp(func.buf, book->ffns_buf[fid].name) == 0) { ffn = &book->ffns_buf[fid]; break; } } free(func.buf); Port argm = ctr.args_buf[2]; Port cont = ctr.args_buf[3]; Port ret; if (ffn == NULL) { ret = inject_io_err_name(net); } else { ret = ffn->func(net, book, argm); }; u32 lps = 0; u32 loc = node_alloc_1(net, tm[0], &lps); node_create(net, loc, new_pair(ret, ROOT)); boot_redex(net, new_pair(new_port(CON, loc), cont)); port = ROOT; continue; } case IO_DONE: { break; } } break; } } ================================================ FILE: src/run.cu ================================================ #include #include #include #include "hvm.cu" // Readback: λ-Encoded Ctr struct Ctr { u32 tag; u32 args_len; Port args_buf[16]; }; // Readback: Tuples struct Tup { u32 elem_len; Port elem_buf[8]; }; // Readback: λ-Encoded Str (UTF-32) // FIXME: this is actually ASCII :| struct Str { u32 len; char* buf; }; // Readback: λ-Encoded list of bytes typedef struct Bytes { u32 len; char *buf; } Bytes; // IO Magic Number #define IO_MAGIC_0 0xD0CA11 #define IO_MAGIC_1 0xFF1FF1 // IO Tags #define IO_DONE 0 #define IO_CALL 1 // Result Tags #define RESULT_OK 0 #define RESULT_ERR 1 // IOError = { // Type, -- a type error // Name, -- invalid io func name // Inner {val: T}, -- an error while calling an io func // } #define IO_ERR_TYPE 0 #define IO_ERR_NAME 1 #define IO_ERR_INNER 2 typedef struct IOError { u32 tag; Port val; } IOError; // List Type #define LIST_NIL 0 #define LIST_CONS 1 // Readback // -------- // Reads back a λ-Encoded constructor from device to host. // Encoding: λt ((((t TAG) arg0) arg1) ...) Ctr gnet_readback_ctr(GNet* gnet, Port port) { Ctr ctr; ctr.tag = -1; ctr.args_len = 0; // Loads root lambda Port lam_port = gnet_expand(gnet, port); if (get_tag(lam_port) != CON) return ctr; Pair lam_node = gnet_node_load(gnet, get_val(lam_port)); // Loads first application Port app_port = gnet_expand(gnet, get_fst(lam_node)); if (get_tag(app_port) != CON) return ctr; Pair app_node = gnet_node_load(gnet, get_val(app_port)); // Loads first argument (as the tag) Port arg_port = gnet_expand(gnet, get_fst(app_node)); if (get_tag(arg_port) != NUM) return ctr; ctr.tag = get_u24(get_val(arg_port)); // Loads remaining arguments while (true) { app_port = gnet_expand(gnet, get_snd(app_node)); if (get_tag(app_port) != CON) break; app_node = gnet_node_load(gnet, get_val(app_port)); arg_port = gnet_expand(gnet, get_fst(app_node)); ctr.args_buf[ctr.args_len++] = arg_port; } return ctr; } // Reads back a tuple of at most `size` elements. Tuples are // (right-nested con nodes) (CON 1 (CON 2 (CON 3 (...)))) // The provided `port` should be `expanded` before calling. extern "C" Tup gnet_readback_tup(GNet* gnet, Port port, u32 size) { Tup tup; tup.elem_len = 0; // Loads remaining arguments while (get_tag(port) == CON && (tup.elem_len + 1 < size)) { Pair node = gnet_node_load(gnet, get_val(port)); tup.elem_buf[tup.elem_len++] = gnet_expand(gnet, get_fst(node)); port = gnet_expand(gnet, get_snd(node)); } tup.elem_buf[tup.elem_len++] = port; return tup; } // Converts a Port into a list of bytes. // Encoding: // - λt (t NIL) // - λt (((t CONS) head) tail) extern "C" Bytes gnet_readback_bytes(GNet* gnet, Port port) { // Result Bytes bytes; u32 capacity = 256; bytes.buf = (char*) malloc(sizeof(char) * capacity); bytes.len = 0; // Readback loop while (true) { // Normalizes the net gnet_normalize(gnet); // Reads the λ-Encoded Ctr Ctr ctr = gnet_readback_ctr(gnet, gnet_peek(gnet, port)); // Reads string layer switch (ctr.tag) { case LIST_NIL: { break; } case LIST_CONS: { if (ctr.args_len != 2) break; if (get_tag(ctr.args_buf[0]) != NUM) break; if (bytes.len == capacity - 1) { capacity *= 2; bytes.buf = (char*) realloc(bytes.buf, capacity); } bytes.buf[bytes.len++] = get_u24(get_val(ctr.args_buf[0])); gnet_boot_redex(gnet, new_pair(ctr.args_buf[1], ROOT)); port = ROOT; continue; } } break; } return bytes; } // Reads back a UTF-32 (truncated to 24 bits) string. // Since unicode scalars can fit in 21 bits, HVM's u24 // integers can contain any unicode scalar value. // Encoding: // - λt (t NIL) // - λt (((t CONS) head) tail) extern "C" Str gnet_readback_str(GNet* gnet, Port port) { // gnet_readback_bytes is guaranteed to return a buffer with a capacity of at least one more // than the number of bytes read, so we can null-terminate it. Bytes bytes = gnet_readback_bytes(gnet, port); Str str; str.len = bytes.len; str.buf = bytes.buf; str.buf[str.len] = 0; return str; } /// Returns a λ-Encoded Ctr for a NIL: λt (t NIL) /// Should only be called within `inject_bytes`, as a previous call /// to `get_resources` is expected. __device__ Port inject_nil(Net* net, TM* tm) { u32 v1 = tm->vloc[0]; u32 n1 = tm->nloc[0]; u32 n2 = tm->nloc[1]; vars_create(net, v1, NONE); Port var = new_port(VAR, v1); node_create(net, n1, new_pair(new_port(NUM, new_u24(LIST_NIL)), var)); node_create(net, n2, new_pair(new_port(CON, n1), var)); return new_port(CON, n2); } /// Returns a λ-Encoded Ctr for a CONS: λt (((t CONS) head) tail) /// Should only be called within `inject_bytes`, as a previous call /// to `get_resources` is expected. /// The `char_idx` parameter is used to offset the vloc and nloc /// allocations, otherwise they would conflict with each other on /// subsequent calls. __device__ Port inject_cons(Net* net, TM* tm, Port head, Port tail) { u32 v1 = tm->vloc[0]; u32 n1 = tm->nloc[0]; u32 n2 = tm->nloc[1]; u32 n3 = tm->nloc[2]; u32 n4 = tm->nloc[3]; vars_create(net, v1, NONE); Port var = new_port(VAR, v1); node_create(net, n1, new_pair(tail, var)); node_create(net, n2, new_pair(head, new_port(CON, n1))); node_create(net, n3, new_pair(new_port(NUM, new_u24(LIST_CONS)), new_port(CON, n2))); node_create(net, n4, new_pair(new_port(CON, n3), var)); return new_port(CON, n4); } // Converts a list of bytes to a Port. // Encoding: // - λt (t NIL) // - λt (((t CONS) head) tail) __device__ Port inject_bytes(Net* net, TM* tm, Bytes *bytes) { // Allocate all resources up front: // - NIL needs 2 nodes & 1 var // - CONS needs 4 nodes & 1 var u32 len = bytes->len; if (!get_resources(net, tm, 0, 2, 1)) { return new_port(ERA, 0); } Port port = inject_nil(net, tm); for (u32 i = 0; i < len; i++) { if (!get_resources(net, tm, 0, 4, 1)) { return new_port(ERA, 0); } Port byte = new_port(NUM, new_u24(bytes->buf[len - i - 1])); port = inject_cons(net, tm, byte, port); } return port; } __global__ void make_bytes_port(GNet* gnet, Bytes bytes, Port* ret) { if (GID() == 0) { TM tm = tmem_new(); Net net = vnet_new(gnet, NULL, gnet->turn); *ret = inject_bytes(&net, &tm, &bytes); } } // Converts a list of bytes to a Port. // Encoding: // - λt (t NIL) // - λt (((t CONS) head) tail) extern "C" Port gnet_inject_bytes(GNet* gnet, Bytes *bytes) { Port* d_ret; cudaMalloc(&d_ret, sizeof(Port)); Bytes bytes_cu; bytes_cu.len = bytes->len; cudaMalloc(&bytes_cu.buf, sizeof(char) * bytes_cu.len); cudaMemcpy(bytes_cu.buf, bytes->buf, sizeof(char) * bytes_cu.len, cudaMemcpyHostToDevice); make_bytes_port<<<1,1>>>(gnet, bytes_cu, d_ret); Port ret; cudaMemcpy(&ret, d_ret, sizeof(Port), cudaMemcpyDeviceToHost); cudaFree(d_ret); cudaFree(bytes_cu.buf); return ret; } /// Returns a λ-Encoded Ctr for a RESULT_OK: λt ((t RESULT_OK) val) __device__ Port inject_ok(Net* net, TM* tm, Port val) { if (!get_resources(net, tm, 0, 3, 1)) { printf("inject_ok: failed to get resources\n"); return new_port(ERA, 0); } u32 v1 = tm->vloc[0]; u32 n1 = tm->nloc[0]; u32 n2 = tm->nloc[1]; u32 n3 = tm->nloc[2]; vars_create(net, v1, NONE); Port var = new_port(VAR, v1); node_create(net, n1, new_pair(val, var)); node_create(net, n2, new_pair(new_port(NUM, new_u24(RESULT_OK)), new_port(CON, n1))); node_create(net, n3, new_pair(new_port(CON, n2), var)); return new_port(CON, n3); } __global__ void make_ok_port(GNet* gnet, Port val, Port* ret) { if (GID() == 0) { TM tm = tmem_new(); Net net = vnet_new(gnet, NULL, gnet->turn); *ret = inject_ok(&net, &tm, val); } } extern "C" Port gnet_inject_ok(GNet* gnet, Port val) { Port* d_ret; cudaMalloc(&d_ret, sizeof(Port)); make_ok_port<<<1,1>>>(gnet, val, d_ret); Port ret; cudaMemcpy(&ret, d_ret, sizeof(Port), cudaMemcpyDeviceToHost); cudaFree(d_ret); return ret; } /// Returns a λ-Encoded Ctr for a RESULT_ERR: λt ((t RESULT_ERR) err) __device__ Port inject_err(Net* net, TM* tm, Port err) { if (!get_resources(net, tm, 0, 3, 1)) { printf("inject_err: failed to get resources\n"); return new_port(ERA, 0); } u32 v1 = tm->vloc[0]; u32 n1 = tm->nloc[0]; u32 n2 = tm->nloc[1]; u32 n3 = tm->nloc[2]; vars_create(net, v1, NONE); Port var = new_port(VAR, v1); node_create(net, n1, new_pair(err, var)); node_create(net, n2, new_pair(new_port(NUM, new_u24(RESULT_ERR)), new_port(CON, n1))); node_create(net, n3, new_pair(new_port(CON, n2), var)); return new_port(CON, n3); } __global__ void make_err_port(GNet* gnet, Port val, Port* ret) { if (GID() == 0) { TM tm = tmem_new(); Net net = vnet_new(gnet, NULL, gnet->turn); *ret = inject_err(&net, &tm, val); } } extern "C" Port gnet_inject_err(GNet* gnet, Port val) { Port* d_ret; cudaMalloc(&d_ret, sizeof(Port)); make_err_port<<<1,1>>>(gnet, val, d_ret); Port ret; cudaMemcpy(&ret, d_ret, sizeof(Port), cudaMemcpyDeviceToHost); cudaFree(d_ret); return ret; } /// Returns a λ-Encoded Ctr for a Result/Err(IOError(..)) __device__ Port inject_io_err(Net* net, TM* tm, IOError err) { if (err.tag <= IO_ERR_NAME) { if (!get_resources(net, tm, 0, 2, 1)) { return new_port(ERA, 0); } u32 v1 = tm->vloc[0]; u32 n1 = tm->nloc[0]; u32 n2 = tm->nloc[1]; vars_create(net, v1, NONE); Port var = new_port(VAR, v1); node_create(net, n1, new_pair(new_port(NUM, new_u24(err.tag)), var)); node_create(net, n2, new_pair(new_port(CON, n1), var)); return inject_err(net, tm, new_port(CON, n2)); } if (!get_resources(net, tm, 0, 3, 1)) { return new_port(ERA, 0); } u32 v1 = tm->vloc[0]; u32 n1 = tm->nloc[0]; u32 n2 = tm->nloc[1]; u32 n3 = tm->nloc[2]; vars_create(net, v1, NONE); Port var = new_port(VAR, v1); node_create(net, n1, new_pair(err.val, var)); node_create(net, n2, new_pair(new_port(NUM, new_u24(IO_ERR_INNER)), new_port(CON, n1))); node_create(net, n3, new_pair(new_port(CON, n2), var)); return inject_err(net, tm, new_port(CON, n3)); } __global__ void make_io_err_port(GNet* gnet, IOError err, Port* ret) { if (GID() == 0) { TM tm = tmem_new(); Net net = vnet_new(gnet, NULL, gnet->turn); *ret = inject_io_err(&net, &tm, err); } } extern "C" Port gnet_inject_io_err(GNet* gnet, IOError err) { Port* d_ret; cudaMalloc(&d_ret, sizeof(Port)); make_io_err_port<<<1,1>>>(gnet, err, d_ret); Port ret; cudaMemcpy(&ret, d_ret, sizeof(Port), cudaMemcpyDeviceToHost); cudaFree(d_ret); return ret; } /// Returns a λ-Encoded Ctr for a Result/Err(IOError/Type) extern "C" Port gnet_inject_io_err_type(GNet* gnet) { IOError io_error = { .tag = IO_ERR_TYPE, }; return gnet_inject_io_err(gnet, io_error); } /// Returns a λ-Encoded Ctr for a Result/Err(IOError/Name) extern "C" Port gnet_inject_io_err_name(GNet* gnet) { IOError io_error = { .tag = IO_ERR_NAME, }; return gnet_inject_io_err(gnet, io_error); } /// Returns a λ-Encoded Ctr for a Result/Err(IOError/Inner(val)) extern "C" Port gnet_inject_io_err_inner(GNet* gnet, Port val) { IOError io_error = { .tag = IO_ERR_INNER, .val = val, }; return gnet_inject_io_err(gnet, io_error); } /// Returns a λ-Encoded Ctr for an Result> /// `err` must be `NUL`-terminated. extern "C" Port gnet_inject_io_err_str(GNet* gnet, char* err) { Port* d_bytes_port; cudaMalloc(&d_bytes_port, sizeof(Port)); Bytes bytes_cu; bytes_cu.len = strlen(err); cudaMalloc(&bytes_cu.buf, sizeof(char) * bytes_cu.len); cudaMemcpy(bytes_cu.buf, err, sizeof(char) * bytes_cu.len, cudaMemcpyHostToDevice); make_bytes_port<<<1,1>>>(gnet, bytes_cu, d_bytes_port); Port bytes_port; cudaMemcpy(&bytes_port, d_bytes_port, sizeof(Port), cudaMemcpyDeviceToHost); cudaFree(d_bytes_port); cudaFree(bytes_cu.buf); return gnet_inject_io_err_inner(gnet, bytes_port); } // Primitive IO Fns // ----------------- // Open file pointers. Indices into this array // are used as "file descriptors". // Indices 0 1 and 2 are reserved. // - 0 -> stdin // - 1 -> stdout // - 2 -> stderr static FILE* FILE_POINTERS[256]; // Open dylibs handles. Indices into this array // are used as opaque loadedd object "handles". static void* DYLIBS[256]; // Converts a NUM port (file descriptor) to file pointer. FILE* readback_file(Port port) { if (get_tag(port) != NUM) { fprintf(stderr, "non-num where file descriptor was expected: %s\n", show_port(port).x); return NULL; } u32 idx = get_u24(get_val(port)); if (idx == 0) return stdin; if (idx == 1) return stdout; if (idx == 2) return stderr; FILE* fp = FILE_POINTERS[idx]; if (fp == NULL) { fprintf(stderr, "invalid file descriptor\n"); return NULL; } return fp; } // Converts a NUM port (dylib handle) to an opaque dylib object. void* readback_dylib(Port port) { if (get_tag(port) != NUM) { fprintf(stderr, "non-num where dylib handle was expected: %i\n", get_tag(port)); return NULL; } u32 idx = get_u24(get_val(port)); void* dl = DYLIBS[idx]; if (dl == NULL) { fprintf(stderr, "invalid dylib handle\n"); return NULL; } return dl; } // Reads from a file a specified number of bytes. // `argm` is a tuple of (file_descriptor, num_bytes). // Returns: Result> Port io_read(GNet* gnet, Port argm) { Tup tup = gnet_readback_tup(gnet, argm, 2); if (tup.elem_len != 2) { fprintf(stderr, "io_read: expected 2-tuple\n"); return gnet_inject_io_err_type(gnet); } FILE* fp = readback_file(tup.elem_buf[0]); u32 num_bytes = get_u24(get_val(tup.elem_buf[1])); if (fp == NULL) { return gnet_inject_io_err_inner(gnet, new_port(NUM, new_i24(EBADF))); } /// Read a string. Bytes bytes; bytes.buf = (char*) malloc(sizeof(char) * num_bytes); bytes.len = fread(bytes.buf, sizeof(char), num_bytes, fp); if ((bytes.len != num_bytes) && ferror(fp)) { fprintf(stderr, "io_read: failed to read\n"); free(bytes.buf); return gnet_inject_io_err_inner(gnet, new_port(NUM, new_i24(ferror(fp)))); } // Convert it to a port. Port ret = gnet_inject_bytes(gnet, &bytes); free(bytes.buf); return gnet_inject_ok(gnet, ret); } // Opens a file with the provided mode. // `argm` is a tuple (CON node) of the // file name and mode as strings. // Returns: Result> Port io_open(GNet* gnet, Port argm) { Tup tup = gnet_readback_tup(gnet, argm, 2); if (tup.elem_len != 2) { return gnet_inject_io_err_type(gnet); } Str name = gnet_readback_str(gnet, tup.elem_buf[0]); Str mode = gnet_readback_str(gnet, tup.elem_buf[1]); for (u32 fd = 3; fd < sizeof(FILE_POINTERS); fd++) { if (FILE_POINTERS[fd] == NULL) { FILE_POINTERS[fd] = fopen(name.buf, mode.buf); free(name.buf); free(mode.buf); if (FILE_POINTERS[fd] == NULL) { return gnet_inject_io_err_inner(gnet, new_port(NUM, new_i24(errno))); } return gnet_inject_ok(gnet, new_port(NUM, new_u24(fd))); } } free(name.buf); free(mode.buf); // too many open files return gnet_inject_io_err_inner(gnet, new_port(NUM, new_i24(EMFILE))); } // Closes a file, reclaiming the file descriptor. // Returns: Result<*, IOError> Port io_close(GNet* gnet, Port argm) { FILE* fp = readback_file(argm); if (fp == NULL) { return gnet_inject_io_err_inner(gnet, new_port(NUM, new_i24(EBADF))); } if (fclose(fp) != 0) { return gnet_inject_io_err_inner(gnet, new_port(NUM, new_i24(ferror(fp)))); } FILE_POINTERS[get_u24(get_val(argm))] = NULL; return gnet_inject_ok(gnet, new_port(ERA, 0)); } // Writes a list of bytes to a file. // `argm` is a tuple (CON node) of the // file descriptor and list of bytes to write. // Returns: Result<*, IOError> Port io_write(GNet* gnet, Port argm) { Tup tup = gnet_readback_tup(gnet, argm, 2); if (tup.elem_len != 2) { return gnet_inject_io_err_type(gnet); } FILE* fp = readback_file(tup.elem_buf[0]); Bytes bytes = gnet_readback_bytes(gnet, tup.elem_buf[1]); if (fp == NULL) { free(bytes.buf); return gnet_inject_io_err_inner(gnet, new_port(NUM, new_i24(EBADF))); } if (fwrite(bytes.buf, sizeof(char), bytes.len, fp) != bytes.len) { free(bytes.buf); return gnet_inject_io_err_inner(gnet, new_port(NUM, new_i24(ferror(fp)))); } free(bytes.buf); return gnet_inject_ok(gnet, new_port(ERA, 0)); } // Flushes an output stream. // Returns: Result<*, IOError> Port io_flush(GNet* gnet, Port argm) { FILE* fp = readback_file(argm); if (fp == NULL) { return gnet_inject_io_err_inner(gnet, new_port(NUM, new_i24(EBADF))); } if (fflush(fp) != 0) { return gnet_inject_io_err_inner(gnet, new_port(NUM, new_i24(ferror(fp)))); } return gnet_inject_ok(gnet, new_port(ERA, 0)); } // Seeks to a position in a file. // `argm` is a 3-tuple (CON fd (CON offset whence)), where // - fd is a file descriptor // - offset is a signed byte offset // - whence is what that offset is relative to: // - 0 (SEEK_SET): beginning of file // - 1 (SEEK_CUR): current position of the file pointer // - 2 (SEEK_END): end of the file // Returns: Result<*, IOError> Port io_seek(GNet* gnet, Port argm) { Tup tup = gnet_readback_tup(gnet, argm, 3); if (tup.elem_len != 3) { fprintf(stderr, "io_seek: expected 3-tuple\n"); return gnet_inject_io_err_type(gnet); } FILE* fp = readback_file(tup.elem_buf[0]); i32 offset = get_i24(get_val(tup.elem_buf[1])); u32 whence = get_i24(get_val(tup.elem_buf[2])); if (fp == NULL) { return gnet_inject_io_err_inner(gnet, new_port(NUM, new_i24(EBADF))); } int cwhence; switch (whence) { case 0: cwhence = SEEK_SET; break; case 1: cwhence = SEEK_CUR; break; case 2: cwhence = SEEK_END; break; default: return gnet_inject_io_err_type(gnet); } if (fseek(fp, offset, cwhence) != 0) { return gnet_inject_io_err_inner(gnet, new_port(NUM, new_i24(ferror(fp)))); } return gnet_inject_ok(gnet, new_port(ERA, 0)); } // Returns the current time as a tuple of the high // and low 24 bits of a 48-bit nanosecond timestamp. // Returns: Result<(u24, u24), IOError<*>> Port io_get_time(GNet* gnet, Port argm) { // Get the current time in nanoseconds u64 time_ns = time64(); // Encode the time as a 64-bit unsigned integer u32 time_hi = (u32)(time_ns >> 24) & 0xFFFFFFF; u32 time_lo = (u32)(time_ns & 0xFFFFFFF); // Return the encoded time return gnet_make_node(gnet, CON, new_port(NUM, new_u24(time_hi)), new_port(NUM, new_u24(time_lo))); } // Sleeps. // `argm` is a tuple (CON node) of the high and low // 24 bits for a 48-bit duration in nanoseconds. // Returns: Result<*, IOError<*>> Port io_sleep(GNet* gnet, Port argm) { Tup tup = gnet_readback_tup(gnet, argm, 2); if (tup.elem_len != 2) { return gnet_inject_io_err_type(gnet); } // Get the sleep duration node Pair dur_node = gnet_node_load(gnet, get_val(argm)); // Get the high and low 24-bit parts of the duration u32 dur_hi = get_u24(get_val(tup.elem_buf[0])); u32 dur_lo = get_u24(get_val(tup.elem_buf[1])); // Combine into a 48-bit duration in nanoseconds u64 dur_ns = (((u64)dur_hi) << 24) | dur_lo; // Sleep for the specified duration struct timespec ts; ts.tv_sec = dur_ns / 1000000000; ts.tv_nsec = dur_ns % 1000000000; nanosleep(&ts, NULL); return gnet_inject_ok(gnet, new_port(ERA, 0)); } // Opens a dylib at the provided path. // `argm` is a tuple of `filename` and `lazy`. // `filename` is a λ-encoded string. // `lazy` is a `bool` indicating if functions should be lazily loaded. // Returns: Result> Port io_dl_open(GNet* gnet, Port argm) { Tup tup = gnet_readback_tup(gnet, argm, 2); Str str = gnet_readback_str(gnet, tup.elem_buf[0]); u32 lazy = get_u24(get_val(tup.elem_buf[1])); int flags = lazy ? RTLD_LAZY : RTLD_NOW; for (u32 dl = 0; dl < sizeof(DYLIBS); dl++) { if (DYLIBS[dl] == NULL) { DYLIBS[dl] = dlopen(str.buf, flags); free(str.buf); if (DYLIBS[dl] == NULL) { return gnet_inject_io_err_str(gnet, dlerror()); } return gnet_inject_ok(gnet, new_port(NUM, new_u24(dl))); } } return gnet_inject_io_err_str(gnet, "too many open dylibs"); } // Calls a function from a loaded dylib. // `argm` is a 3-tuple of `dylib_handle`, `symbol`, `args`. // `dylib_handle` is the numeric node returned from a `DL_OPEN` call. // `symbol` is a λ-encoded string of the symbol name. // `args` is the argument to be provided to the dylib symbol. // // This function returns a Result with an Ok variant containing an // arbitrary type. // // Returns Result> Port io_dl_call(GNet* gnet, Port argm) { Tup tup = gnet_readback_tup(gnet, argm, 3); if (tup.elem_len != 3) { fprintf(stderr, "io_dl_call: expected 3-tuple\n"); return gnet_inject_io_err_type(gnet); } void* dl = readback_dylib(tup.elem_buf[0]); Str symbol = gnet_readback_str(gnet, tup.elem_buf[1]); dlerror(); Port (*func)(GNet*, Port) = (Port (*)(GNet*, Port)) dlsym(dl, symbol.buf); char* error = dlerror(); if (error != NULL) { return gnet_inject_io_err_str(gnet, error); } return gnet_inject_ok(gnet, func(gnet, tup.elem_buf[2])); } // Closes a loaded dylib, reclaiming the handle. // // Returns: Result<*, IOError> Port io_dl_close(GNet* gnet, Book* book, Port argm) { void* dl = readback_dylib(argm); if (dl == NULL) { fprintf(stderr, "io_dl_close: invalid handle\n"); return gnet_inject_io_err_type(gnet); } int err = dlclose(dl) != 0; if (err != 0) { return gnet_inject_io_err_str(gnet, dlerror()); } DYLIBS[get_u24(get_val(argm))] = NULL; return gnet_inject_ok(gnet, new_port(ERA, 0)); } void book_init(Book* book) { book->ffns_buf[book->ffns_len++] = (FFn){"READ", io_read}; book->ffns_buf[book->ffns_len++] = (FFn){"OPEN", io_open}; book->ffns_buf[book->ffns_len++] = (FFn){"CLOSE", io_close}; book->ffns_buf[book->ffns_len++] = (FFn){"FLUSH", io_flush}; book->ffns_buf[book->ffns_len++] = (FFn){"WRITE", io_write}; book->ffns_buf[book->ffns_len++] = (FFn){"SEEK", io_seek}; book->ffns_buf[book->ffns_len++] = (FFn){"GET_TIME", io_get_time}; book->ffns_buf[book->ffns_len++] = (FFn){"SLEEP", io_sleep}; book->ffns_buf[book->ffns_len++] = (FFn){"DL_OPEN", io_dl_open}; book->ffns_buf[book->ffns_len++] = (FFn){"DL_CALL", io_dl_call}; book->ffns_buf[book->ffns_len++] = (FFn){"DL_CLOSE", io_dl_open}; cudaMemcpyToSymbol(BOOK, book, sizeof(Book)); } // Monadic IO Evaluator // --------------------- // Runs an IO computation. void do_run_io(GNet* gnet, Book* book, Port port) { book_init(book); setlinebuf(stdout); setlinebuf(stderr); // IO loop while (true) { // Normalizes the net gnet_normalize(gnet); // Reads the λ-Encoded Ctr Ctr ctr = gnet_readback_ctr(gnet, gnet_peek(gnet, port)); // Checks if IO Magic Number is a CON if (ctr.args_len < 1 || get_tag(ctr.args_buf[0]) != CON) { break; } // Checks the IO Magic Number Pair io_magic = gnet_node_load(gnet, get_val(ctr.args_buf[0])); //printf("%08x %08x\n", get_u24(get_val(get_fst(io_magic))), get_u24(get_val(get_snd(io_magic)))); if (get_val(get_fst(io_magic)) != new_u24(IO_MAGIC_0) || get_val(get_snd(io_magic)) != new_u24(IO_MAGIC_1)) { break; } switch (ctr.tag) { case IO_CALL: { if (ctr.args_len != 4) { fprintf(stderr, "invalid IO_CALL: args_len = %u\n", ctr.args_len); break; } Str func = gnet_readback_str(gnet, ctr.args_buf[1]); FFn* ffn = NULL; // FIXME: optimize this linear search for (u32 fid = 0; fid < book->ffns_len; ++fid) { if (strcmp(func.buf, book->ffns_buf[fid].name) == 0) { ffn = &book->ffns_buf[fid]; break; } } free(func.buf); Port argm = ctr.args_buf[2]; Port cont = ctr.args_buf[3]; Port ret; if (ffn == NULL) { ret = gnet_inject_io_err_name(gnet); } else { ret = ffn->func(gnet, argm); } Port p = gnet_make_node(gnet, CON, ret, ROOT); gnet_boot_redex(gnet, new_pair(p, cont)); port = ROOT; continue; } case IO_DONE: { break; } } break; } } ================================================ FILE: tests/programs/empty.hvm ================================================ ================================================ FILE: tests/programs/hello-world.hvm ================================================ @String/Cons = (a (b ((@String/Cons/tag (a (b c))) c))) @String/Cons/tag = 1 @String/Nil = ((@String/Nil/tag a) a) @String/Nil/tag = 0 @main = l & @String/Cons ~ (104 (k l)) & @String/Cons ~ (101 (j k)) & @String/Cons ~ (108 (i j)) & @String/Cons ~ (108 (h i)) & @String/Cons ~ (111 (g h)) & @String/Cons ~ (44 (f g)) & @String/Cons ~ (32 (e f)) & @String/Cons ~ (119 (d e)) & @String/Cons ~ (111 (c d)) & @String/Cons ~ (114 (b c)) & @String/Cons ~ (108 (a b)) & @String/Cons ~ (100 (@String/Nil a)) ================================================ FILE: tests/programs/io/basic.bend ================================================ test-io = 1 def unwrap(res): match res: case Result/Ok: return res.val case Result/Err: return res.val def open(): return call("OPEN", ("./LICENSE", "r")) def read(f): return call("READ", (f, 47)) def print(bytes): with IO: * <- call("WRITE", (1, bytes)) * <- call("WRITE", (1, "\n")) return wrap(*) def close(f): return call("CLOSE", f) def main(): with IO: f <- open() f = unwrap(f) bytes <- read(f) bytes = unwrap(bytes) * <- print(bytes) res <- close(f) return wrap(res) ================================================ FILE: tests/programs/io/basic.hvm ================================================ @IO/Call = (a (b (c (d ((@IO/Call/tag (a (b (c (d e))))) e))))) @IO/Call/tag = 1 @IO/Done = (a (b ((@IO/Done/tag (a (b c))) c))) @IO/Done/tag = 0 @IO/MAGIC = (13683217 16719857) @IO/bind = ((@IO/bind__C2 a) a) @IO/bind__C0 = (* (b (a c))) & @undefer ~ (a (b c)) @IO/bind__C1 = (* (* (a (b ((c d) (e g)))))) & @IO/Call ~ (@IO/MAGIC (a (b ((c f) g)))) & @IO/bind ~ (d (e f)) @IO/bind__C2 = (?((@IO/bind__C0 @IO/bind__C1) a) a) @IO/wrap = a & @IO/Done ~ (@IO/MAGIC a) @String/Cons = (a (b ((@String/Cons/tag (a (b c))) c))) @String/Cons/tag = 1 @String/Nil = ((@String/Nil/tag a) a) @String/Nil/tag = 0 @call = (a (b c)) & @IO/Call ~ (@IO/MAGIC (a (b (@call__C0 c)))) @call__C0 = a & @IO/Done ~ (@IO/MAGIC a) @close = f & @call ~ (e f) & @String/Cons ~ (67 (d e)) & @String/Cons ~ (76 (c d)) & @String/Cons ~ (79 (b c)) & @String/Cons ~ (83 (a b)) & @String/Cons ~ (69 (@String/Nil a)) @main = w & @IO/bind ~ (@open ((((s (a u)) (@IO/wrap v)) v) w)) & @IO/bind ~ (c ((((n (o (d q))) (r (s t))) t) u)) & @unwrap ~ (a {b r}) & @read ~ (b c) & @IO/bind ~ (f ((((g (k (* m))) (n (o p))) p) q)) & @print ~ (e f) & @unwrap ~ (d e) & @IO/bind ~ (h ((((i i) (k l)) l) m)) & @close ~ (g h) @open = o & @call ~ (d ((m n) o)) & @String/Cons ~ (79 (c d)) & @String/Cons ~ (80 (b c)) & @String/Cons ~ (69 (a b)) & @String/Cons ~ (78 (@String/Nil a)) & @String/Cons ~ (46 (l m)) & @String/Cons ~ (47 (k l)) & @String/Cons ~ (76 (j k)) & @String/Cons ~ (73 (i j)) & @String/Cons ~ (67 (h i)) & @String/Cons ~ (69 (g h)) & @String/Cons ~ (78 (f g)) & @String/Cons ~ (83 (e f)) & @String/Cons ~ (69 (@String/Nil e)) & @String/Cons ~ (114 (@String/Nil n)) @print = (f h) & @IO/bind ~ (g (@print__C3 h)) & @call ~ (e ((1 f) g)) & @String/Cons ~ (87 (d e)) & @String/Cons ~ (82 (c d)) & @String/Cons ~ (73 (b c)) & @String/Cons ~ (84 (a b)) & @String/Cons ~ (69 (@String/Nil a)) @print__C0 = ((* a) (* a)) @print__C1 = g & @call ~ (e ((1 f) g)) & @String/Cons ~ (87 (d e)) & @String/Cons ~ (82 (c d)) & @String/Cons ~ (73 (b c)) & @String/Cons ~ (84 (a b)) & @String/Cons ~ (69 (@String/Nil a)) & @String/Cons ~ (10 (@String/Nil f)) @print__C2 = (a (* c)) & @IO/bind ~ (@print__C1 (((@print__C0 (a b)) b) c)) @print__C3 = ((@print__C2 (@IO/wrap a)) a) @read = (e f) & @call ~ (d ((e 47) f)) & @String/Cons ~ (82 (c d)) & @String/Cons ~ (69 (b c)) & @String/Cons ~ (65 (a b)) & @String/Cons ~ (68 (@String/Nil a)) @test-io = 1 @undefer = (((a a) b) b) @unwrap = ((@unwrap__C0 a) a) @unwrap__C0 = (?(((a a) (* (b b))) c) c) ================================================ FILE: tests/programs/io/invalid-name.bend ================================================ test-io = 1 def main(): with IO: f <- call("INVALID-NAME", ("./README.md", "r")) return wrap(f) ================================================ FILE: tests/programs/io/invalid-name.hvm ================================================ @IO/Call = (a (b (c (d ((@IO/Call/tag (a (b (c (d e))))) e))))) @IO/Call/tag = 1 @IO/Done = (a (b ((@IO/Done/tag (a (b c))) c))) @IO/Done/tag = 0 @IO/MAGIC = (13683217 16719857) @IO/bind = ((@IO/bind__C2 a) a) @IO/bind__C0 = (* (b (a c))) & @undefer ~ (a (b c)) @IO/bind__C1 = (* (* (a (b ((c d) (e g)))))) & @IO/Call ~ (@IO/MAGIC (a (b ((c f) g)))) & @IO/bind ~ (d (e f)) @IO/bind__C2 = (?((@IO/bind__C0 @IO/bind__C1) a) a) @IO/wrap = a & @IO/Done ~ (@IO/MAGIC a) @String/Cons = (a (b ((@String/Cons/tag (a (b c))) c))) @String/Cons/tag = 1 @String/Nil = ((@String/Nil/tag a) a) @String/Nil/tag = 0 @call = (a (b c)) & @IO/Call ~ (@IO/MAGIC (a (b (@call__C0 c)))) @call__C0 = a & @IO/Done ~ (@IO/MAGIC a) @main = cb & @IO/bind ~ (y ((((z z) (@IO/wrap bb)) bb) cb)) & @call ~ (l ((w x) y)) & @String/Cons ~ (73 (k l)) & @String/Cons ~ (78 (j k)) & @String/Cons ~ (86 (i j)) & @String/Cons ~ (65 (h i)) & @String/Cons ~ (76 (g h)) & @String/Cons ~ (73 (f g)) & @String/Cons ~ (68 (e f)) & @String/Cons ~ (45 (d e)) & @String/Cons ~ (78 (c d)) & @String/Cons ~ (65 (b c)) & @String/Cons ~ (77 (a b)) & @String/Cons ~ (69 (@String/Nil a)) & @String/Cons ~ (46 (v w)) & @String/Cons ~ (47 (u v)) & @String/Cons ~ (82 (t u)) & @String/Cons ~ (69 (s t)) & @String/Cons ~ (65 (r s)) & @String/Cons ~ (68 (q r)) & @String/Cons ~ (77 (p q)) & @String/Cons ~ (69 (o p)) & @String/Cons ~ (46 (n o)) & @String/Cons ~ (109 (m n)) & @String/Cons ~ (100 (@String/Nil m)) & @String/Cons ~ (114 (@String/Nil x)) @test-io = 1 @undefer = (((a a) b) b) ================================================ FILE: tests/programs/io/open1.bend ================================================ test-io = 1 def main(): with IO: f <- call("OPEN", ("./LICENSE", "r")) return wrap(f) ================================================ FILE: tests/programs/io/open1.hvm ================================================ @IO/Call = (a (b (c (d ((@IO/Call/tag (a (b (c (d e))))) e))))) @IO/Call/tag = 1 @IO/Done = (a (b ((@IO/Done/tag (a (b c))) c))) @IO/Done/tag = 0 @IO/MAGIC = (13683217 16719857) @IO/bind = ((@IO/bind__C2 a) a) @IO/bind__C0 = (* (b (a c))) & @undefer ~ (a (b c)) @IO/bind__C1 = (* (* (a (b ((c d) (e g)))))) & @IO/Call ~ (@IO/MAGIC (a (b ((c f) g)))) & @IO/bind ~ (d (e f)) @IO/bind__C2 = (?((@IO/bind__C0 @IO/bind__C1) a) a) @IO/wrap = a & @IO/Done ~ (@IO/MAGIC a) @String/Cons = (a (b ((@String/Cons/tag (a (b c))) c))) @String/Cons/tag = 1 @String/Nil = ((@String/Nil/tag a) a) @String/Nil/tag = 0 @call = (a (b c)) & @IO/Call ~ (@IO/MAGIC (a (b (@call__C0 c)))) @call__C0 = a & @IO/Done ~ (@IO/MAGIC a) @main = s & @IO/bind ~ (o ((((p p) (@IO/wrap r)) r) s)) & @call ~ (d ((m n) o)) & @String/Cons ~ (79 (c d)) & @String/Cons ~ (80 (b c)) & @String/Cons ~ (69 (a b)) & @String/Cons ~ (78 (@String/Nil a)) & @String/Cons ~ (46 (l m)) & @String/Cons ~ (47 (k l)) & @String/Cons ~ (76 (j k)) & @String/Cons ~ (73 (i j)) & @String/Cons ~ (67 (h i)) & @String/Cons ~ (69 (g h)) & @String/Cons ~ (78 (f g)) & @String/Cons ~ (83 (e f)) & @String/Cons ~ (69 (@String/Nil e)) & @String/Cons ~ (114 (@String/Nil n)) @test-io = 1 @undefer = (((a a) b) b) ================================================ FILE: tests/programs/io/open2.bend ================================================ test-io = 1 def main(): with IO: f <- call("OPEN", ("fake-file", "r")) return wrap(f) ================================================ FILE: tests/programs/io/open2.hvm ================================================ @IO/Call = (a (b (c (d ((@IO/Call/tag (a (b (c (d e))))) e))))) @IO/Call/tag = 1 @IO/Done = (a (b ((@IO/Done/tag (a (b c))) c))) @IO/Done/tag = 0 @IO/MAGIC = (13683217 16719857) @IO/bind = ((@IO/bind__C2 a) a) @IO/bind__C0 = (* (b (a c))) & @undefer ~ (a (b c)) @IO/bind__C1 = (* (* (a (b ((c d) (e g)))))) & @IO/Call ~ (@IO/MAGIC (a (b ((c f) g)))) & @IO/bind ~ (d (e f)) @IO/bind__C2 = (?((@IO/bind__C0 @IO/bind__C1) a) a) @IO/wrap = a & @IO/Done ~ (@IO/MAGIC a) @String/Cons = (a (b ((@String/Cons/tag (a (b c))) c))) @String/Cons/tag = 1 @String/Nil = ((@String/Nil/tag a) a) @String/Nil/tag = 0 @call = (a (b c)) & @IO/Call ~ (@IO/MAGIC (a (b (@call__C0 c)))) @call__C0 = a & @IO/Done ~ (@IO/MAGIC a) @main = s & @IO/bind ~ (o ((((p p) (@IO/wrap r)) r) s)) & @call ~ (d ((m n) o)) & @String/Cons ~ (79 (c d)) & @String/Cons ~ (80 (b c)) & @String/Cons ~ (69 (a b)) & @String/Cons ~ (78 (@String/Nil a)) & @String/Cons ~ (102 (l m)) & @String/Cons ~ (97 (k l)) & @String/Cons ~ (107 (j k)) & @String/Cons ~ (101 (i j)) & @String/Cons ~ (45 (h i)) & @String/Cons ~ (102 (g h)) & @String/Cons ~ (105 (f g)) & @String/Cons ~ (108 (e f)) & @String/Cons ~ (101 (@String/Nil e)) & @String/Cons ~ (114 (@String/Nil n)) @test-io = 1 @undefer = (((a a) b) b) ================================================ FILE: tests/programs/io/open3.bend ================================================ test-io = 1 def main(): with IO: # calling open with an unexpected type of arg f <- call("OPEN", 123) return wrap(f) ================================================ FILE: tests/programs/io/open3.hvm ================================================ @IO/Call = (a (b (c (d ((@IO/Call/tag (a (b (c (d e))))) e))))) @IO/Call/tag = 1 @IO/Done = (a (b ((@IO/Done/tag (a (b c))) c))) @IO/Done/tag = 0 @IO/MAGIC = (13683217 16719857) @IO/bind = ((@IO/bind__C2 a) a) @IO/bind__C0 = (* (b (a c))) & @undefer ~ (a (b c)) @IO/bind__C1 = (* (* (a (b ((c d) (e g)))))) & @IO/Call ~ (@IO/MAGIC (a (b ((c f) g)))) & @IO/bind ~ (d (e f)) @IO/bind__C2 = (?((@IO/bind__C0 @IO/bind__C1) a) a) @IO/wrap = a & @IO/Done ~ (@IO/MAGIC a) @String/Cons = (a (b ((@String/Cons/tag (a (b c))) c))) @String/Cons/tag = 1 @String/Nil = ((@String/Nil/tag a) a) @String/Nil/tag = 0 @call = (a (b c)) & @IO/Call ~ (@IO/MAGIC (a (b (@call__C0 c)))) @call__C0 = a & @IO/Done ~ (@IO/MAGIC a) @main = i & @IO/bind ~ (e ((((f f) (@IO/wrap h)) h) i)) & @call ~ (d (123 e)) & @String/Cons ~ (79 (c d)) & @String/Cons ~ (80 (b c)) & @String/Cons ~ (69 (a b)) & @String/Cons ~ (78 (@String/Nil a)) @test-io = 1 @undefer = (((a a) b) b) ================================================ FILE: tests/programs/list.hvm ================================================ @List/Cons = (a (b ((@List/Cons/tag (a (b c))) c))) @List/Cons/tag = 1 @List/Nil = ((@List/Nil/tag a) a) @List/Nil/tag = 0 @id = (a a) @list = c & @List/Cons ~ (1 (b c)) & @List/Cons ~ (2 (a b)) & @List/Cons ~ (3 (@List/Nil a)) @main = e & @map ~ ((a b) (d e)) & @map ~ (a (@list b)) & @List/Cons ~ (@id (@List/Nil d)) // @main__C0 = (a b) // & @map ~ (a (@list b)) @map = (a ((@map__C1 (a b)) b)) @map__C0 = (* (a (d ({(a b) c} f)))) & @List/Cons ~ (b (e f)) & @map ~ (c (d e)) @map__C1 = (?(((* @List/Nil) @map__C0) a) a) ================================================ FILE: tests/programs/numeric-casts.hvm ================================================ @main = x & @tu0 ~ (* x) // casting to u24 @tu0 = (* {n x}) & @tu1 ~ (* x) & 0 ~ $([u24] n) // 0 @tu1 = (* {n x}) & @tu2 ~ (* x) & 1234 ~ $([u24] n) // 1234 @tu2 = (* {n x}) & @tu3 ~ (* x) & +4321 ~ $([u24] n) // 4321 @tu3 = (* {n x}) & @tu4 ~ (* x) & -5678 ~ $([u24] n) // 16771538 (reinterprets bits) @tu4 = (* {n x}) & @tu5 ~ (* x) & 2.8 ~ $([u24] n) // 2 (rounds to zero) @tu5 = (* {n x}) & @tu6 ~ (* x) & -12.5 ~ $([u24] n) // 0 (saturates) @tu6 = (* {n x}) & @tu7 ~ (* x) & 16777216.0 ~ $([u24] n) // 16777215 (saturates) @tu7 = (* {n x}) & @tu8 ~ (* x) & +inf ~ $([u24] n) // 16777215 (saturates) @tu8 = (* {n x}) & @tu9 ~ (* x) & -inf ~ $([u24] n) // 0 (saturates) @tu9 = (* {n x}) & @ti0 ~ (* x) & +NaN ~ $([u24] n) // 0 // casting to i24 @ti0 = (* {n x}) & @ti1 ~ (* x) & 0 ~ $([i24] n) // +0 @ti1 = (* {n x}) & @ti2 ~ (* x) & 1234 ~ $([i24] n) // +1234 @ti2 = (* {n x}) & @ti3 ~ (* x) & +4321 ~ $([i24] n) // +4321 @ti3 = (* {n x}) & @ti4 ~ (* x) & -5678 ~ $([i24] n) // -5678 @ti4 = (* {n x}) & @ti5 ~ (* x) & 2.8 ~ $([i24] n) // +2 (rounds to zero) @ti5 = (* {n x}) & @ti6 ~ (* x) & -12.7 ~ $([i24] n) // -12 (rounds to zero) @ti6 = (* {n x}) & @ti7 ~ (* x) & 8388610.0 ~ $([i24] n) // +8388607 (saturates) @ti7 = (* {n x}) & @ti8 ~ (* x) & -8388610.0 ~ $([i24] n) // -8388608 (saturates) @ti8 = (* {n x}) & @ti9 ~ (* x) & +inf ~ $([i24] n) // +8388607 (saturates) @ti9 = (* {n x}) & @ti10 ~ (* x) & -inf ~ $([i24] n) // -8388608 (saturates) @ti10 = (* {n x}) & @tf0 ~ (* x) & +NaN ~ $([i24] n) // +0 // casting to f24 @tf0 = (* {n x}) & @tf1 ~ (* x) & +NaN ~ $([f24] n) // +NaN @tf1 = (* {n x}) & @tf2 ~ (* x) & +inf ~ $([f24] n) // +inf @tf2 = (* {n x}) & @tf3 ~ (* x) & -inf ~ $([f24] n) // -inf @tf3 = (* {n x}) & @tf4 ~ (* x) & 2.15 ~ $([f24] n) // 2.15 @tf4 = (* {n x}) & @tf5 ~ (* x) & -2.15 ~ $([f24] n) // -2.15 @tf5 = (* {n x}) & @tf6 ~ (* x) & 0.15 ~ $([f24] n) // 0.15 @tf6 = (* {n x}) & @tf7 ~ (* x) & -1234 ~ $([f24] n) // -1234.0 @tf7 = (* {n x}) & @tf8 ~ (* x) & +1234 ~ $([f24] n) // +1234.0 @tf8 = (* {n x}) & @tf9 ~ (* x) & 123456 ~ $([f24] n) // 123456.0 @tf9 = (* {n x}) & @tp0 ~ (* x) & 16775982 ~ $([f24] n) // 16775936.0 // printing @tp0 = (* {n x}) & @tp1 ~ (* x) & n ~ [u24] // [u24] @tp1 = (* {n x}) & @tp2 ~ (* x) & n ~ [i24] // [i24] @tp2 = (* {n x}) & @t ~ (* x) & n ~ [f24] // [f24] @t = * ================================================ FILE: tests/programs/numerics/f24.hvm ================================================ @half = xN & 1.0 ~ $([/] $(2.0 xN)) @nan = xN & 0.0 ~ $([/] $(0.0 xN)) @main = x & @t0 ~ (* x) // nan and inf divisions @t0 = (* {n x}) & @t1 ~ (* x) & 1.0 ~ $([/] $(0.0 n)) // +inf @t1 = (* {n x}) & @t2 ~ (* x) & -1.0 ~ $([/] $(0.0 n)) // -inf @t2 = (* {n x}) & @t3 ~ (* x) & 0.0 ~ $([/] $(0.0 n)) // NaN // general operators @t3 = (* {n x}) & @t4 ~ (* x) & @half ~ $([+] $(2.0 n)) // 2.5 @t4 = (* {n x}) & @t5 ~ (* x) & @half ~ $([-] $(2.0 n)) // -1.5 @t5 = (* {n x}) & @t6 ~ (* x) & @half ~ $([*] $(2.3 n)) // 1.15 @t6 = (* {n x}) & @t7 ~ (* x) & @half ~ $([/] $(2.0 n)) // 0.25 @t7 = (* {n x}) & @t8 ~ (* x) & @half ~ $([%] $(2.0 n)) // 0.5 // comparisons (returning ints) @t8 = (* {n x}) & @t9 ~ (* x) & @half ~ $([=] $(2.0 n)) // 0 @t9 = (* {n x}) & @tA ~ (* x) & @half ~ $([!] $(2.0 n)) // 1 @tA = (* {n x}) & @tB ~ (* x) & @half ~ $([<] $(2.0 n)) // 1 @tB = (* {n x}) & @tC ~ (* x) & @half ~ $([>] $(2.0 n)) // 0 // ieee nan comparisons @tC = (* {n x}) & @tD ~ (* x) & @nan ~ $([=] $(@nan n)) // 0 @tD = (* {n x}) & @tE ~ (* x) & @nan ~ $([<] $(@nan n)) // 0 @tE = (* {n x}) & @tF ~ (* x) & @nan ~ $([>] $(@nan n)) // 0 // parsing @tF = (* {n x}) & @tG ~ (* x) & +NaN ~ $([+] $(0.0 n)) // NaN @tG = (* {n x}) & @tH ~ (* x) & +inf ~ $([+] $(0.0 n)) // inf @tH = (* {n x}) & @tI ~ (* x) & -inf ~ $([+] $(0.0 n)) // -inf @tI = (* {n x}) & @tJ ~ (* x) & 1.02 ~ $([+] $(0.0 n)) // 1.02 // modulo @tJ = (* {n x}) & @tK ~ (* x) & +1.2 ~ $([%] $(+1.1 n)) // +0.1 @tK = (* {n x}) & @tL ~ (* x) & +1.2 ~ $([%] $(-1.1 n)) // +0.1 @tL = (* {n x}) & @tM ~ (* x) & -1.2 ~ $([%] $(+1.1 n)) // -0.1 @tM = (* {n x}) & @tN ~ (* x) & -1.2 ~ $([%] $(-1.1 n)) // -0.1 // modulo @tN = (* {n x}) & @tO ~ (* x) & +0.0 ~ $([<<] $(+3.14159265 n)) // ~0 @tO = (* {n x}) & @tP ~ (* x) & +1.570796325 ~ $([<<] $(+0.0 n)) // 1.0 @tP = (* {n x}) & @tQ ~ (* x) & +0.0 ~ $([>>] $(+3.14159265 n)) // ~0 @tQ = (* {n x}) & @tR ~ (* x) & +0.785398162 ~ $([>>] $(+0.0 n)) // 1.0 @tR = * ================================================ FILE: tests/programs/numerics/i24.hvm ================================================ @main = x & @t0 ~ (* x) // all ops @t0 = (* {n x}) & @t1 ~ (* x) & +10 ~ $([+] $(+2 n)) // 12 @t1 = (* {n x}) & @t2 ~ (* x) & +10 ~ $([-] $(+2 n)) // 8 @t2 = (* {n x}) & @t3 ~ (* x) & +10 ~ $([*] $(+2 n)) // 20 @t3 = (* {n x}) & @t4 ~ (* x) & +10 ~ $([/] $(+2 n)) // 5 @t4 = (* {n x}) & @t5 ~ (* x) & +10 ~ $([%] $(+2 n)) // 0 @t5 = (* {n x}) & @t6 ~ (* x) & +10 ~ $([=] $(+2 n)) // 0 @t6 = (* {n x}) & @t7 ~ (* x) & +10 ~ $([!] $(+2 n)) // 1 @t7 = (* {n x}) & @t8 ~ (* x) & +10 ~ $([<] $(+2 n)) // 0 @t8 = (* {n x}) & @t9 ~ (* x) & +10 ~ $([>] $(+2 n)) // 1 @t9 = (* {n x}) & @tA ~ (* x) & +10 ~ $([&] $(+2 n)) // 2 @tA = (* {n x}) & @tB ~ (* x) & +10 ~ $([|] $(+2 n)) // 10 @tB = (* {n x}) & @tC ~ (* x) & +10 ~ $([^] $(+2 n)) // 8 // underflow @tC = (* {n x}) & @tD ~ (* x) & -8388608 ~ $([-] $(+1 n)) // 8388607 // overflow @tD = (* {n x}) & @tE ~ (* x) & +8388607 ~ $([+] $(+1 n)) // -8388608 // modulo @tE = (* {n x}) & @tF ~ (* x) & +3 ~ $([%] $(+2 n)) // +1 @tF = (* {n x}) & @tG ~ (* x) & +3 ~ $([%] $(-2 n)) // +1 @tG = (* {n x}) & @tH ~ (* x) & -3 ~ $([%] $(+2 n)) // -1 @tH = (* {n x}) & @tI ~ (* x) & -3 ~ $([%] $(-2 n)) // -1 @tI = * ================================================ FILE: tests/programs/numerics/u24.hvm ================================================ @main = x & @t0 ~ (* x) // all ops @t0 = (* {n x}) & @t1 ~ (* x) & 10 ~ $([+] $(2 n)) // 12 @t1 = (* {n x}) & @t2 ~ (* x) & 10 ~ $([-] $(2 n)) // 8 @t2 = (* {n x}) & @t3 ~ (* x) & 10 ~ $([*] $(2 n)) // 20 @t3 = (* {n x}) & @t4 ~ (* x) & 10 ~ $([/] $(2 n)) // 5 @t4 = (* {n x}) & @t5 ~ (* x) & 10 ~ $([%] $(2 n)) // 0 @t5 = (* {n x}) & @t6 ~ (* x) & 10 ~ $([=] $(2 n)) // 0 @t6 = (* {n x}) & @t7 ~ (* x) & 10 ~ $([!] $(2 n)) // 1 @t7 = (* {n x}) & @t8 ~ (* x) & 10 ~ $([<] $(2 n)) // 0 @t8 = (* {n x}) & @t9 ~ (* x) & 10 ~ $([>] $(2 n)) // 1 @t9 = (* {n x}) & @tA ~ (* x) & 10 ~ $([&] $(2 n)) // 2 @tA = (* {n x}) & @tB ~ (* x) & 10 ~ $([|] $(2 n)) // 10 @tB = (* {n x}) & @tC ~ (* x) & 10 ~ $([^] $(2 n)) // 8 @tC = (* {n x}) & @tD ~ (* x) & 10 ~ $([<<] $(2 n)) // 40 @tD = (* {n x}) & @tE ~ (* x) & 10 ~ $([>>] $(2 n)) // 2 // underflow @tE = (* {n x}) & @tF ~ (* x) & 0 ~ $([-] $(1 n)) // 16777215 // overflow @tF = (* {n x}) & @tG ~ (* x) & 16777215 ~ $([+] $(1 n)) // 0 // no sign extension @tG = (* {n x}) & @tH ~ (* x) & 16777215 ~ $([>>] $(22 n)) // 3 @tH = * ================================================ FILE: tests/programs/safety-check.hvm ================================================ @List/Cons = (a (b ((@List/Cons/tag (a (b c))) c))) @List/Cons/tag = 1 @List/Nil = ((@List/Nil/tag a) a) @List/Nil/tag = 0 @id = (a a) @list = c & @List/Cons ~ (1 (b c)) & @List/Cons ~ (2 (@List/Nil b)) @main = b & @map ~ (@main__C0 (a b)) & @List/Cons ~ (@id (@List/Nil a)) @main__C0 = (a b) & @map ~ (a (@list b)) @map = (a ((@map__C1 (a b)) b)) @map__C0 = (* (a (d ({(a b) c} f)))) & @List/Cons ~ (b (e f)) & @map ~ (c (d e)) @map__C1 = (?(((* @List/Nil) @map__C0) a) a) // Test flags @test-rust-only = 1 ================================================ FILE: tests/run.rs ================================================ use std::{ collections::HashMap, error::Error, ffi::OsStr, fs, io::{Read, Write}, path::{Path, PathBuf}, process::{Command, Stdio}, }; use hvm::ast::Tree; use insta::assert_snapshot; use TSPL::Parser; #[test] fn test_run_programs() { test_dir(&manifest_relative("tests/programs/")); } #[test] fn test_run_examples() { test_dir(&manifest_relative("examples/")); } fn test_dir(dir: &Path) { insta::glob!(dir, "**/*.hvm", test_file) } fn manifest_relative(sub: &str) -> PathBuf { format!("{}/{}", env!("CARGO_MANIFEST_DIR"), sub).into() } fn test_file(path: &Path) { let contents = fs::read_to_string(path).unwrap(); if contents.contains("@test-skip = 1") { println!("skipping {path:?}"); return; } if contents.contains("@test-io = 1") { test_io_file(path); return; } println!("testing {path:?}..."); let rust_output = execute_hvm(&["run".as_ref(), path.as_os_str()], false).unwrap(); assert_snapshot!(rust_output); if contents.contains("@test-rust-only = 1") { println!("only testing rust implementation for {path:?}"); return; } println!(" testing {path:?}, C..."); let c_output = execute_hvm(&["run-c".as_ref(), path.as_os_str()], false).unwrap(); assert_eq!(c_output, rust_output, "{path:?}: C output does not match rust output"); if cfg!(feature = "cuda") { println!(" testing {path:?}, CUDA..."); let cuda_output = execute_hvm(&["run-cu".as_ref(), path.as_os_str()], false).unwrap(); assert_eq!( cuda_output, rust_output, "{path:?}: CUDA output does not match rust output" ); } } fn test_io_file(path: &Path) { println!(" testing (io) {path:?}, C..."); let c_output = execute_hvm(&["run-c".as_ref(), path.as_os_str()], true).unwrap(); assert_snapshot!(c_output); if cfg!(feature = "cuda") { println!(" testing (io) {path:?}, CUDA..."); let cuda_output = execute_hvm(&["run-cu".as_ref(), path.as_os_str()], true).unwrap(); assert_eq!(cuda_output, c_output, "{path:?}: CUDA output does not match C output"); } } fn execute_hvm(args: &[&OsStr], send_io: bool) -> Result> { // Spawn the command let mut child = Command::new(env!("CARGO_BIN_EXE_hvm")) .args(args) .stdin(Stdio::piped()) .stdout(Stdio::piped()) .stderr(Stdio::piped()) .spawn()?; // Capture the output of the command let mut stdout = child.stdout.take().ok_or("Couldn't capture stdout!")?; let mut stderr = child.stderr.take().ok_or("Couldn't capture stderr!")?; // Wait for the command to finish and get the exit status if send_io { let mut stdin = child.stdin.take().ok_or("Couldn't capture stdin!")?; stdin.write_all(b"io from the tests\n")?; drop(stdin); } let status = child.wait()?; // Read the output let mut output = String::new(); stdout.read_to_string(&mut output)?; stderr.read_to_string(&mut output)?; Ok(if !status.success() { format!("{status}\n{output}") } else { parse_output(&output).unwrap_or_else(|err| panic!("error parsing output:\n{err}\n\n{output}")) }) } fn parse_output(output: &str) -> Result { let mut lines = Vec::new(); for line in output.lines() { if line.starts_with("Result:") { let mut parser = hvm::ast::CoreParser::new(line); parser.consume("Result:")?; let mut tree = parser.parse_tree()?; normalize_vars(&mut tree, &mut HashMap::new()); lines.push(format!("Result: {}", tree.show())); } else if !line.starts_with("- ITRS:") && !line.starts_with("- TIME:") && !line.starts_with("- MIPS:") && !line.starts_with("- LEAK:") { // TODO: include iteration count in snapshot once consistent lines.push(line.to_string()) } } Ok(lines.join("\n")) } fn normalize_vars(tree: &mut Tree, vars: &mut HashMap) { match tree { Tree::Var { nam } => { let next_var = vars.len(); *nam = format!("x{}", vars.entry(std::mem::take(nam)).or_insert(next_var)); } Tree::Era | Tree::Ref { .. } | Tree::Num { .. } => {} Tree::Con { fst, snd } | Tree::Dup { fst, snd } | Tree::Opr { fst, snd } | Tree::Swi { fst, snd } => { normalize_vars(fst, vars); normalize_vars(snd, vars); } } } ================================================ FILE: tests/snapshots/run__file@empty.hvm.snap ================================================ --- source: tests/run.rs expression: rust_output input_file: tests/programs/empty.hvm --- exit status: 101 thread 'main' panicked at src/ast.rs:545:41: missing `@main` definition note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace ================================================ FILE: tests/snapshots/run__file@hello-world.hvm.snap ================================================ --- source: tests/run.rs expression: rust_output input_file: tests/programs/hello-world.hvm --- Result: ((@String/Cons/tag (104 (((@String/Cons/tag (101 (((@String/Cons/tag (108 (((@String/Cons/tag (108 (((@String/Cons/tag (111 (((@String/Cons/tag (44 (((@String/Cons/tag (32 (((@String/Cons/tag (119 (((@String/Cons/tag (111 (((@String/Cons/tag (114 (((@String/Cons/tag (108 (((@String/Cons/tag (100 (@String/Nil x0))) x0) x1))) x1) x2))) x2) x3))) x3) x4))) x4) x5))) x5) x6))) x6) x7))) x7) x8))) x8) x9))) x9) x10))) x10) x11))) x11) ================================================ FILE: tests/snapshots/run__file@list.hvm.snap ================================================ --- source: tests/run.rs expression: rust_output input_file: tests/programs/list.hvm --- Result: ((@List/Cons/tag (((@List/Cons/tag (1 (((@List/Cons/tag (* (((@List/Cons/tag (* (@List/Nil x0))) x0) x1))) x1) x2))) x2) (@List/Nil x3))) x3) ================================================ FILE: tests/snapshots/run__file@numeric-casts.hvm.snap ================================================ --- source: tests/run.rs expression: rust_output input_file: tests/programs/numeric-casts.hvm --- Result: {0 {1234 {4321 {16771538 {2 {0 {16777215 {16777215 {0 {0 {+0 {+1234 {+4321 {-5678 {+2 {-12 {+8388607 {-8388608 {+8388607 {-8388608 {+0 {+NaN {+inf {-inf {2.1500244 {-2.1500244 {0.15000153 {-1234.0 {1234.0 {123456.0 {16775936.0 {[u24] {[i24] {[f24] *}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}} ================================================ FILE: tests/snapshots/run__file@numerics__f24.hvm.snap ================================================ --- source: tests/run.rs expression: rust_output input_file: tests/programs/numerics/f24.hvm --- Result: {+inf {-inf {+NaN {2.5 {-1.5 {1.1499939 {0.25 {0.5 {0 {1 {1 {0 {0 {0 {0 {+NaN {+inf {-inf {1.019989 {0.1000061 {0.1000061 {-0.1000061 {-0.1000061 {-8.908799e-6 {1.0 {8.908799e-6 {1.0 *}}}}}}}}}}}}}}}}}}}}}}}}}}} ================================================ FILE: tests/snapshots/run__file@numerics__i24.hvm.snap ================================================ --- source: tests/run.rs expression: rust_output input_file: tests/programs/i24.hvm --- Result: {+12 {+8 {+20 {+5 {+0 {0 {1 {0 {1 {+2 {+10 {+8 {+8388607 {-8388608 {+1 {+1 {-1 {-1 *}}}}}}}}}}}}}}}}}} ================================================ FILE: tests/snapshots/run__file@numerics__u24.hvm.snap ================================================ --- source: tests/run.rs expression: rust_output input_file: tests/programs/u24.hvm --- Result: {12 {8 {20 {5 {0 {0 {1 {0 {1 {2 {10 {8 {40 {2 {16777215 {0 {3 *}}}}}}}}}}}}}}}}} ================================================ FILE: tests/snapshots/run__file@safety-check.hvm.snap ================================================ --- source: tests/run.rs expression: rust_output input_file: tests/programs/safety-check.hvm --- ERROR: attempt to clone a non-affine global reference. ================================================ FILE: tests/snapshots/run__file@sort_bitonic__main.hvm.snap ================================================ --- source: tests/run.rs expression: rust_output input_file: examples/sort_bitonic/main.hvm --- Result: 8386560 ================================================ FILE: tests/snapshots/run__file@sort_radix__main.hvm.snap ================================================ --- source: tests/run.rs expression: rust_output input_file: examples/sort_radix/main.hvm --- Result: 16744448 ================================================ FILE: tests/snapshots/run__file@stress__main.hvm.snap ================================================ --- source: tests/run.rs expression: rust_output input_file: examples/stress/main.hvm --- Result: 0 ================================================ FILE: tests/snapshots/run__file@sum_rec__main.hvm.snap ================================================ --- source: tests/run.rs expression: rust_output input_file: examples/sum_rec/main.hvm --- Result: 16252928 ================================================ FILE: tests/snapshots/run__file@sum_tree__main.hvm.snap ================================================ --- source: tests/run.rs expression: rust_output input_file: examples/sum_tree/main.hvm --- Result: 1048576 ================================================ FILE: tests/snapshots/run__file@tuples__tuples.hvm.snap ================================================ --- source: tests/run.rs expression: rust_output input_file: examples/tuples/tuples.hvm --- Result: ((0 (3 (4 (5 (6 (7 (8 (1 (2 x0))))))))) x0) ================================================ FILE: tests/snapshots/run__io_file@demo_io__main.hvm.snap ================================================ --- source: tests/run.rs expression: c_output input_file: examples/demo_io/main.hvm --- Apache License Result: ((@IO/Done/tag (@IO/MAGIC (((0 (* x0)) x0) x1))) x1) ================================================ FILE: tests/snapshots/run__io_file@io__basic.hvm.snap ================================================ --- source: tests/run.rs expression: c_output input_file: tests/programs/io/basic.hvm --- Apache License Result: ((@IO/Done/tag (@IO/MAGIC (((0 (* x0)) x0) x1))) x1) ================================================ FILE: tests/snapshots/run__io_file@io__invalid-name.hvm.snap ================================================ --- source: tests/run.rs expression: c_output input_file: tests/programs/io/invalid-name.hvm --- Result: ((@IO/Done/tag (@IO/MAGIC (((1 (((1 x0) x0) x1)) x1) x2))) x2) ================================================ FILE: tests/snapshots/run__io_file@io__open1.hvm.snap ================================================ --- source: tests/run.rs expression: c_output input_file: tests/programs/io/open1.hvm --- Result: ((@IO/Done/tag (@IO/MAGIC (((0 (3 x0)) x0) x1))) x1) ================================================ FILE: tests/snapshots/run__io_file@io__open2.hvm.snap ================================================ --- source: tests/run.rs expression: c_output input_file: tests/programs/io/open2.hvm --- Result: ((@IO/Done/tag (@IO/MAGIC (((1 (((2 (+2 x0)) x0) x1)) x1) x2))) x2) ================================================ FILE: tests/snapshots/run__io_file@io__open3.hvm.snap ================================================ --- source: tests/run.rs expression: c_output input_file: tests/programs/io/open3.hvm --- Result: ((@IO/Done/tag (@IO/MAGIC (((1 (((0 x0) x0) x1)) x1) x2))) x2) ================================================ FILE: tests/snapshots/run__io_file@io__read_and_print.hvm.snap ================================================ --- source: tests/run.rs expression: c_output input_file: tests/programs/io/read_and_print.hvm --- What is your name? io fr from Result: 42