Repository: etaoins/arret Branch: master Commit: 3b3bae27ca72 Files: 328 Total size: 1.6 MB Directory structure: gitextract_wp8wxvol/ ├── .arret-root ├── .buildkite/ │ ├── build-and-test.sh │ ├── llvm-assert.Dockerfile │ ├── pipeline.yml │ ├── sync-rustdoc.sh │ ├── update-multiarch-manifest.sh │ └── vscode-extension-tests.sh ├── .dockerignore ├── .github/ │ ├── CODEOWNERS │ └── renovate.json ├── .gitignore ├── .prettierrc ├── Cargo.toml ├── Dockerfile ├── LICENSE ├── README.md ├── compiler/ │ ├── Cargo.toml │ ├── arret_root.rs │ ├── codegen/ │ │ ├── alloc/ │ │ │ ├── core.rs │ │ │ ├── mod.rs │ │ │ ├── plan.rs │ │ │ └── types.rs │ │ ├── analysis/ │ │ │ ├── escape.rs │ │ │ ├── mod.rs │ │ │ └── names.rs │ │ ├── box_layout.rs │ │ ├── callee.rs │ │ ├── const_gen.rs │ │ ├── debug_info.rs │ │ ├── fun_gen.rs │ │ ├── jit.rs │ │ ├── libcstr.rs │ │ ├── math_gen.rs │ │ ├── mod.rs │ │ ├── mod_gen.rs │ │ ├── op_gen.rs │ │ ├── panic_gen.rs │ │ ├── program.rs │ │ ├── range_md.rs │ │ ├── record_struct.rs │ │ ├── target_gen.rs │ │ ├── target_machine.rs │ │ └── vector_gen.rs │ ├── context.rs │ ├── hir/ │ │ ├── destruc.rs │ │ ├── error.rs │ │ ├── exports.rs │ │ ├── import/ │ │ │ ├── filter.rs │ │ │ ├── mod.rs │ │ │ └── parse.rs │ │ ├── loader.rs │ │ ├── lowering.rs │ │ ├── macros/ │ │ │ ├── expander.rs │ │ │ ├── linker.rs │ │ │ ├── matcher.rs │ │ │ └── mod.rs │ │ ├── mod.rs │ │ ├── ns.rs │ │ ├── prim.rs │ │ ├── records.rs │ │ ├── scope.rs │ │ ├── types.rs │ │ ├── util.rs │ │ ├── var_id.rs │ │ └── visitor.rs │ ├── id_type.rs │ ├── lib.rs │ ├── mir/ │ │ ├── app_purity.rs │ │ ├── arg_list.rs │ │ ├── builder.rs │ │ ├── costing.rs │ │ ├── env_values.rs │ │ ├── equality.rs │ │ ├── error.rs │ │ ├── eval_hir.rs │ │ ├── inliner.rs │ │ ├── intrinsic/ │ │ │ ├── bitwise.rs │ │ │ ├── list.rs │ │ │ ├── math.rs │ │ │ ├── mod.rs │ │ │ ├── num_utils.rs │ │ │ ├── number.rs │ │ │ ├── panics.rs │ │ │ ├── partial_print.rs │ │ │ ├── print.rs │ │ │ ├── testing.rs │ │ │ └── vector.rs │ │ ├── mod.rs │ │ ├── ops.rs │ │ ├── optimise/ │ │ │ ├── duplicate_alloc_ops.rs │ │ │ ├── mod.rs │ │ │ └── unused_ops.rs │ │ ├── polymorph.rs │ │ ├── printer.rs │ │ ├── record_field.rs │ │ ├── ret_value.rs │ │ ├── rust_fun.rs │ │ ├── specific_abi_type.rs │ │ ├── tagset.rs │ │ ├── typred.rs │ │ ├── value/ │ │ │ ├── arret_fun.rs │ │ │ ├── build_reg.rs │ │ │ ├── from_reg.rs │ │ │ ├── list.rs │ │ │ ├── mod.rs │ │ │ ├── plan_phi.rs │ │ │ ├── synthetic_fun.rs │ │ │ ├── to_const.rs │ │ │ └── types.rs │ │ └── vector_member.rs │ ├── promise.rs │ ├── repl.rs │ ├── reporting.rs │ ├── rfi/ │ │ └── mod.rs │ ├── source.rs │ ├── tests/ │ │ ├── compile-error/ │ │ │ ├── arity.arret │ │ │ ├── bit-shift-left-negative.arret │ │ │ ├── bit-shift-right-overflow.arret │ │ │ ├── destruc-errors.arret │ │ │ ├── fn-lowering-errors.arret │ │ │ ├── if-errors.arret │ │ │ ├── import-parse-errors.arret │ │ │ ├── macro-errors.arret │ │ │ ├── misc-body-errors.arret │ │ │ ├── misc-top-level-errors.arret │ │ │ ├── missing-module.arret │ │ │ ├── no-main.arret │ │ │ ├── overflow-add.arret │ │ │ ├── overflow-multiply.arret │ │ │ ├── overflow-quot.arret │ │ │ ├── overflow-subtract.arret │ │ │ ├── quot-by-zero.arret │ │ │ ├── record-errors.arret │ │ │ ├── recur-errors.arret │ │ │ ├── reference-errors.arret │ │ │ ├── rem-by-zero.arret │ │ │ ├── syntax-error.arret │ │ │ ├── type-checking-errors.arret │ │ │ ├── type-lowering-errors.arret │ │ │ ├── vector-assoc-negative.arret │ │ │ ├── vector-assoc-out-of-bounds.arret │ │ │ ├── vector-ref-negative.arret │ │ │ ├── vector-ref-out-of-bounds.arret │ │ │ └── wrong-main-type.arret │ │ ├── integration.rs │ │ ├── optimise/ │ │ │ ├── application.arret │ │ │ ├── bitwise.arret │ │ │ ├── const.arret │ │ │ ├── equality.arret │ │ │ ├── inliner.arret │ │ │ ├── list.arret │ │ │ ├── math.arret │ │ │ ├── number.arret │ │ │ ├── typred.arret │ │ │ └── vector.arret │ │ ├── run-error/ │ │ │ ├── impure-panic.arret │ │ │ ├── impure-panic.stderr │ │ │ ├── infinite-to-int.arret │ │ │ ├── infinite-to-int.stderr │ │ │ ├── nan-to-int.arret │ │ │ ├── nan-to-int.stderr │ │ │ ├── overflow-add.arret │ │ │ ├── overflow-add.stderr │ │ │ ├── overflow-multiply.arret │ │ │ ├── overflow-multiply.stderr │ │ │ ├── overflow-quot.arret │ │ │ ├── overflow-quot.stderr │ │ │ ├── overflow-subtract.arret │ │ │ ├── overflow-subtract.stderr │ │ │ ├── pure-panic.arret │ │ │ ├── pure-panic.stderr │ │ │ ├── quot-by-zero.arret │ │ │ ├── quot-by-zero.stderr │ │ │ ├── rem-by-zero.arret │ │ │ └── rem-by-zero.stderr │ │ └── run-pass/ │ │ ├── application.arret │ │ ├── binding.arret │ │ ├── bitwise.arret │ │ ├── closure-typing.arret │ │ ├── closure.arret │ │ ├── comments.arret │ │ ├── conditionals.arret │ │ ├── divergence.arret │ │ ├── empty.arret │ │ ├── equality.arret │ │ ├── hash.arret │ │ ├── list.arret │ │ ├── macros.arret │ │ ├── math.arret │ │ ├── number.arret │ │ ├── occurrence-typing.arret │ │ ├── read.arret │ │ ├── record.arret │ │ ├── recursion.arret │ │ ├── set.arret │ │ ├── type-definitions.arret │ │ ├── typred.arret │ │ ├── vector.arret │ │ └── write.arret │ ├── ty/ │ │ ├── conv_abi.rs │ │ ├── datum.rs │ │ ├── intersect.rs │ │ ├── is_a.rs │ │ ├── list_iter.rs │ │ ├── mod.rs │ │ ├── pred.rs │ │ ├── props.rs │ │ ├── purity.rs │ │ ├── record.rs │ │ ├── select.rs │ │ ├── subst.rs │ │ ├── subtract.rs │ │ ├── ty_args.rs │ │ ├── unify.rs │ │ └── var_usage.rs │ └── typeck/ │ ├── dce.rs │ ├── destruc.rs │ ├── error.rs │ ├── infer.rs │ └── mod.rs ├── docker-compose.yml ├── docs/ │ └── language-design.md ├── driver/ │ ├── Cargo.toml │ ├── main.rs │ ├── subcommand/ │ │ ├── compile.rs │ │ ├── eval.rs │ │ ├── mod.rs │ │ └── repl/ │ │ ├── arret_helper.rs │ │ ├── command.rs │ │ ├── history.rs │ │ ├── mod.rs │ │ └── syntax.rs │ └── tests/ │ └── integration/ │ ├── hello-world.arret │ └── run.sh ├── editors/ │ └── code/ │ ├── .dockerignore │ ├── .eslintrc.yml │ ├── .gitignore │ ├── .vscode/ │ │ ├── launch.json │ │ ├── settings.json │ │ └── tasks.json │ ├── Dockerfile │ ├── language-configuration.json │ ├── package.json │ ├── src/ │ │ ├── extension.ts │ │ └── test/ │ │ ├── colorize-fixtures/ │ │ │ └── sample.arret │ │ ├── colorize-results/ │ │ │ └── sample_arret.json │ │ ├── downloadVsCode.ts │ │ ├── runTest.ts │ │ ├── suite/ │ │ │ ├── colorization.test.ts │ │ │ ├── extension.test.ts │ │ │ └── index.ts │ │ └── vsCodeVersion.ts │ ├── syntaxes/ │ │ └── arret.tmLanguage.json │ └── tsconfig.json ├── lsp-server/ │ ├── Cargo.toml │ ├── capabilities.rs │ ├── handler/ │ │ ├── mod.rs │ │ ├── text_synchronisation.rs │ │ └── workspace.rs │ ├── json_rpc.rs │ ├── main.rs │ ├── model/ │ │ ├── document.rs │ │ ├── mod.rs │ │ └── workspace.rs │ ├── session.rs │ ├── transport/ │ │ ├── bytestream.rs │ │ └── mod.rs │ └── watcher/ │ ├── mod.rs │ └── syntax.rs ├── rfi-derive/ │ ├── Cargo.toml │ └── lib.rs ├── runtime/ │ ├── Cargo.toml │ ├── abitype.rs │ ├── binding.rs │ ├── boxed/ │ │ ├── heap/ │ │ │ ├── collect.rs │ │ │ ├── mod.rs │ │ │ └── type_info.rs │ │ ├── mod.rs │ │ ├── refs.rs │ │ └── types/ │ │ ├── char.rs │ │ ├── field_value.rs │ │ ├── float.rs │ │ ├── fun.rs │ │ ├── int.rs │ │ ├── list.rs │ │ ├── map.rs │ │ ├── mod.rs │ │ ├── record.rs │ │ ├── record_data.rs │ │ ├── set.rs │ │ ├── shared_str.rs │ │ ├── str.rs │ │ ├── sym.rs │ │ └── vector.rs │ ├── callback.rs │ ├── class_map.rs │ ├── compiler_support.rs │ ├── intern.rs │ ├── lib.rs │ ├── persistent/ │ │ ├── mod.rs │ │ └── vector.rs │ └── task.rs ├── runtime-syntax/ │ ├── Cargo.toml │ ├── lib.rs │ ├── reader.rs │ └── writer.rs ├── stdlib/ │ ├── arret/ │ │ ├── base.arret │ │ ├── set.arret │ │ └── test.arret │ └── rust/ │ ├── Cargo.toml │ ├── bitwise.rs │ ├── hash.rs │ ├── lib.rs │ ├── list.rs │ ├── math.rs │ ├── number.rs │ ├── read.rs │ ├── set.rs │ ├── testing.rs │ ├── vector.rs │ └── write.rs └── syntax/ ├── Cargo.toml ├── anon_fun.rs ├── datum.rs ├── error.rs ├── lib.rs ├── parser.rs └── span.rs ================================================ FILE CONTENTS ================================================ ================================================ FILE: .arret-root ================================================ ================================================ FILE: .buildkite/build-and-test.sh ================================================ #!/bin/sh set -eu # Deny warnings on CI export RUSTFLAGS="-D warnings" echo '--- :cargo: Compiling debug' cargo build echo '--- :pray: Testing debug' cargo test echo '--- :keyboard: Testing driver' ./driver/tests/integration/run.sh target/debug/arret ================================================ FILE: .buildkite/llvm-assert.Dockerfile ================================================ ARG LLVM_VERSION=11.1.0 ARG LLVM_ROOT=/opt/llvm-11 ## FROM fedora:33 AS fedora-common RUN dnf install -y gcc-c++ # `dnf clean all` happens in later stages ## FROM fedora-common AS llvm-build ARG LLVM_VERSION ARG LLVM_ROOT RUN dnf install -y file cmake ninja-build xz && \ dnf clean all WORKDIR /usr/src RUN curl https://github.com/llvm/llvm-project/releases/download/llvmorg-11.1.0/llvm-11.1.0.src.tar.xz -sSL | \ tar -Jx --no-same-owner WORKDIR /usr/src/llvm-build # We need to be careful to use less than 4GiB on our build agents RUN cmake \ -GNinja \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX=${LLVM_ROOT} \ -DLLVM_ENABLE_ASSERTIONS=ON \ -DLLVM_TARGETS_TO_BUILD=AArch64 \ -DLLVM_ENABLE_WARNINGS=OFF \ # Disable a spammy ABI change warning on GCC 7 that `ENABLE_WARNINGS=OFF` # doesn't suppress. -DCMAKE_CXX_FLAGS=-Wno-psabi \ -DLLVM_USE_LINKER=gold \ ../llvm-${LLVM_VERSION}.src RUN ninja install ## FROM fedora-common ARG LLVM_ROOT COPY --from=llvm-build ${LLVM_ROOT} ${LLVM_ROOT} RUN curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain 1.53.0 --profile=minimal --component rustfmt ENV PATH "/root/.cargo/bin:${PATH}" ENV LLVM_SYS_100_PREFIX "${LLVM_ROOT}" ================================================ FILE: .buildkite/pipeline.yml ================================================ cached-ecr-build-env: &cached-ecr-build-env plugins: - seek-oss/docker-ecr-cache#v1.11.0: target: build-env cache-on: - Cargo.lock - docker#v3.9.0 docker-ghcr-login: &docker-ghcr-login docker-login#v2.0.1: server: ghcr.io username: etaoins password-env: CR_PAT steps: - label: ':muscle: Test ARM64' agents: { queue: arm64 } command: - ./.buildkite/build-and-test.sh - "echo '--- :sleuth_or_spy: Checking release'" - RUSTFLAGS="-Copt-level=0" cargo check --release <<: *cached-ecr-build-env - label: ':ubuntu: Test AMD64' agents: { queue: amd64 } command: - ./.buildkite/build-and-test.sh <<: *cached-ecr-build-env - label: ':fedora: Test LLVM assert' branches: '!master' agents: { queue: arm64 } command: - "echo '--- :prettier: Checking rustfmt'" - cargo fmt -- --check - ./.buildkite/build-and-test.sh plugins: - seek-oss/docker-ecr-cache#v1.11.0: dockerfile: ./.buildkite/llvm-assert.Dockerfile - docker#v3.9.0 - label: ':typescript: Test VS Code extension' agents: { queue: amd64 } command: - ./.buildkite/vscode-extension-tests.sh plugins: - seek-oss/docker-ecr-cache#v1.11.0: dockerfile: ./editors/code/Dockerfile ecr-name: build-cache/arret/vscode-extension cache-on: - ./editors/code/yarn.lock - ./editors/code/src/test/vsCodeVersion.ts - docker#v3.9.0: volumes: - '/workdir/editors/code/node_modules' - '/workdir/editors/code/.vscode-test' - wait - label: ':mechanical_arm: Push ARM64 REPL image' key: 'push-arm64-repl-image' branches: 'master' agents: { queue: arm64 } plugins: - *docker-ghcr-login - docker-compose#v3.9.0: push: - repl:ghcr.io/etaoins/arret-repl-arm64 env: - BUILDKITE_COMMIT - label: ':rocket: Push AMD64 REPL image' key: 'push-amd64-repl-image' branches: 'master' agents: { queue: amd64 } plugins: - *docker-ghcr-login - docker-compose#v3.9.0: push: - repl:ghcr.io/etaoins/arret-repl-amd64 env: - BUILDKITE_COMMIT - label: ':docker: Update multiarch manifest' branches: 'master' agents: { queue: arm64 } depends_on: - 'push-amd64-repl-image' - 'push-arm64-repl-image' command: - ./.buildkite/update-multiarch-manifest.sh plugins: - *docker-ghcr-login - label: ':rust: Check (Rust Beta)' branches: 'master' agents: { queue: arm64 } command: - "echo '--- :rust: Installing Rust beta'" - rustup default beta - "echo '--- :male-detective: Checking debug'" - cargo check - "echo '--- :female-detective: Checking release'" - RUSTFLAGS="-Copt-level=0" cargo check --release <<: *cached-ecr-build-env - label: ':books: Update Rustdoc' branches: 'master' agents: { queue: arm64 } command: - "echo '--- :book: Building rustdoc'" - cargo doc --no-deps - "echo '--- :rust: Installing awscli'" - apt-get update - DEBIAN_FRONTEND=noninteractive apt-get -y install awscli - ./.buildkite/sync-rustdoc.sh <<: *cached-ecr-build-env concurrency_group: 'update-rustdoc' concurrency: 1 ================================================ FILE: .buildkite/sync-rustdoc.sh ================================================ #!/bin/sh set -eu S3_BUCKET_NAME=arret-lang-rustdoc CLOUDFRONT_DISTRIBUTION_ID=E1FFCMKSLRZAZ echo '--- :s3: Updating S3' aws s3 sync --only-show-errors --delete --cache-control "max-age=3600" \ target/doc/ "s3://${S3_BUCKET_NAME}" echo '--- :cloudfront: Invalidating CloudFront' aws cloudfront create-invalidation \ --distribution-id "${CLOUDFRONT_DISTRIBUTION_ID}" \ --paths '/*' ================================================ FILE: .buildkite/update-multiarch-manifest.sh ================================================ #!/usr/bin/env bash set -eu # Needed for `docker manifest` export DOCKER_CLI_EXPERIMENTAL=enabled manifest=ghcr.io/etaoins/arret-repl docker manifest create -a "${manifest}" "${manifest}-arm64" "${manifest}-amd64" docker manifest push "${manifest}" ================================================ FILE: .buildkite/vscode-extension-tests.sh ================================================ #!/usr/bin/env bash set -eu export DISPLAY=':99.0' /usr/bin/Xvfb :99 -screen 0 1024x768x24 > /dev/null 2>&1 & cd editors/code yarn test yarn lint yarn vscode:package ================================================ FILE: .dockerignore ================================================ /target/ /.git # These are build files that aren't needed inside the Docker container. Ignore them so we don't # trigger a recompile when experimenting with Docker /.dockerignore /docker-compose.yml /Dockerfile ================================================ FILE: .github/CODEOWNERS ================================================ * @etaoins ================================================ FILE: .github/renovate.json ================================================ { "extends": ["config:base", "docker:disable"], "cargo": { "enabled": true }, "timezone": "Australia/Melbourne", "schedule": ["before 5am every 2 weeks on Sunday"], "prCreation": "not-pending", "packageRules": [ { "managers": ["npm"], "depTypeList": ["devDependencies"], "packagePatterns": ["^@typescript-eslint/", "^eslint-"], "packageNames": ["eslint"], "groupName": "eslint deps" }, { "managers": ["npm"], "depTypeList": ["devDependencies"], "packageNames": ["vsce", "vscode-test", "@types/vscode"], "groupName": "VS Code deps" }, { "managers": ["npm"], "depTypeList": ["devDependencies"], "packagePatterns": ["^@types/"], "groupName": "npm Definitely Typed deps" } ] } ================================================ FILE: .gitignore ================================================ /target/ /rls/ **/*.rs.bk ================================================ FILE: .prettierrc ================================================ { "singleQuote": true, "trailingComma": "all" } ================================================ FILE: Cargo.toml ================================================ [workspace] members = [ "syntax", "compiler", "driver", "lsp-server", "runtime", "runtime-syntax", "rfi-derive", "stdlib/rust" ] ================================================ FILE: Dockerfile ================================================ FROM ubuntu:20.04 AS build-env RUN \ apt-get update && \ apt-get -y install --no-install-recommends ca-certificates curl gcc zlib1g-dev libstdc++-9-dev llvm-10 llvm-10-dev && \ apt-get clean ENV LLVM_SYS_10_PREFIX /usr/lib/llvm-10 RUN curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain 1.53.0 --profile=minimal ENV PATH "/root/.cargo/bin:${PATH}" # These are the minimum required files for `cargo fetch` # This allows the `cargo fetch` to be cached between other source code changes ADD Cargo.toml Cargo.lock /opt/arret/ ADD syntax/Cargo.toml /opt/arret/syntax/ ADD runtime/Cargo.toml /opt/arret/runtime/ ADD runtime-syntax/Cargo.toml /opt/arret/runtime-syntax/ ADD rfi-derive/Cargo.toml /opt/arret/rfi-derive/ ADD stdlib/rust/Cargo.toml /opt/arret/stdlib/rust/ ADD compiler/Cargo.toml /opt/arret/compiler/ ADD driver/Cargo.toml /opt/arret/driver/ ADD lsp-server/Cargo.toml /opt/arret/lsp-server/ WORKDIR /opt/arret RUN cargo fetch ADD . /opt/arret ### FROM build-env as full-compiler RUN cargo build --release ### FROM ubuntu:20.04 AS repl ARG vcs_ref COPY --from=full-compiler /opt/arret/.arret-root /opt/arret/.arret-root COPY --from=full-compiler /opt/arret/stdlib/arret /opt/arret/stdlib/arret COPY --from=full-compiler /opt/arret/target/release/arret /opt/arret/target/release/arret COPY --from=full-compiler /opt/arret/target/release/*.so /opt/arret/target/release/ RUN groupadd arret && useradd -r -g arret arret USER arret:arret WORKDIR /opt/arret ENTRYPOINT ["/opt/arret/target/release/arret"] CMD ["repl"] # Label the commit that was used to build this LABEL \ org.label-schema.vcs-ref=$vcs_ref \ org.label-schema.vcs-url="https://github.com/etaoins/arret" ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: README.md ================================================ # Arret [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) [![Build status](https://badge.buildkite.com/bcda02e06b6795e669edae4264bdecbb11ff98b4f5afb1fa4b.svg?branch=master)](https://buildkite.com/arret/arret) ## Overview Arret is pure functional, strongly typed language with Lisp-like syntax. It aims to combine the expressiveness of Lisp with guarantees provided by functional programming. The [language design documentation](./docs/language-design.md) has a high-level summary of the language's design choices. The Arret compiler and parts of its standard library are written in Rust. The mechanism for calling Rust code from Arret is referred to as the Rust Function Interface or RFI. Documentation for the [`arret_runtime` crate](https://rustdoc.arret-lang.org/arret_runtime/index.html) describes the core concepts of the RFI. ## Installation ### Docker REPL Image There is a public Docker image at `ghcr.io/etaoins/arret-repl` that runs the Arret REPL. Whenever `cargo run repl` appears in the documentation this command can be used instead: ```shell > docker run -ti ghcr.io/etaoins/arret-repl ``` It can also evaluate single file programs: ```shell > cat hello-world.arret (import [stdlib base]) (defn main! () (println! "Hello, world!")) > docker run -i ghcr.io/etaoins/arret-repl eval - < hello-world.arret Hello, world! ``` ### Build Requirements 1. A Unix-like host running on ARM64, x86-64 or x86-32. These are the platforms supporting lazy compilation with LLVM's ORC JIT. 1. [LLVM](http://releases.llvm.org) 10 or 11 1. [Rust](https://www.rust-lang.org) ### Building with rustup and Cargo ```shell > curl https://sh.rustup.rs -sSf | sh > cd ~/path/to/repo/root > cargo run repl ``` ## Usage ### REPL The REPL provides an interactive environment for exploring Arret. It's supported as a first class environment in Arret; the REPL is just as powerful as the compiler. ```text > cargo run repl arret> (length '(1 2 3 4 5)) => 5 arret> (defn identity #{T} ([x T]) -> T x) defined arret> /type identity => (All #{T} T -> T) arret> (identity "Hello, world!") => "Hello, world!" arret> /type (identity [one two three]) => (Vector 'one 'two 'three) arret> /quit ``` ### Compiler Compiled programs have a `(main!)` function as their entry point: ```clojure (import [stdlib base]) (defn main! () (println! "Hello, world!")) ``` These can be compiled to a static binary by running Arret with the path name: ```sh > cargo run compile hello-world.arret > ./hello-world "Hello, world!" ``` ### Editors A basic [Visual Studio Code](https://code.visualstudio.com) extension is bundled in [editors/code](./editors/code). This uses the [Language Server](https://microsoft.github.io/language-server-protocol/) from the [lsp-server crate](./lsp-server). ```sh # Install `arret-lsp-server` cargo install --path lsp-server # Install the Visual Studio code extension cd editors/code yarn yarn vscode:install ``` ## Examples The Arret language is still rapidly evolving. This makes it impractical to provide accurate documentation of the language and standard library. However, the test programs in [run-pass](compiler/tests/run-pass) give examples of working Arret code. ================================================ FILE: compiler/Cargo.toml ================================================ [package] name = "arret-compiler" version = "0.1.0" edition = "2018" authors = ["Ryan Cumming "] [lib] path = "lib.rs" crate-type = ["lib"] [dependencies] llvm-sys = "100" libc = "0.2" libloading = "0.7" arret-syntax = { path = "../syntax" } arret-runtime = { path = "../runtime" } arret-runtime-syntax = { path = "../runtime-syntax" } codespan-reporting = "0.11" crossbeam-channel = "0.5" termcolor = "1" [dev-dependencies] tempfile = "3" num_cpus = "1.13" ================================================ FILE: compiler/arret_root.rs ================================================ use std::{env, path}; const ARRET_ROOT_ENV_VAR: &str = "ARRET_ROOT"; fn is_arret_root(path: &path::Path) -> bool { path.join("./.arret-root").is_file() } pub struct InvalidOptionError { invalid_path: path::PathBuf, } impl InvalidOptionError { /// Path to the invalid Arret root pub fn invalid_path(&self) -> &path::Path { &self.invalid_path } } pub struct InvalidEnvVarError { invalid_path: path::PathBuf, } impl InvalidEnvVarError { /// Environment variable that contained the invalid root pub fn env_var_name(&self) -> &'static str { ARRET_ROOT_ENV_VAR } /// Path to the invalid Arret root pub fn invalid_path(&self) -> &path::Path { &self.invalid_path } } pub enum FindArretRootError { /// Explicitly specified option was not an Arret root InvalidOption(InvalidOptionError), /// Environment variable with the given name is not an Arret root InvalidEnvVar(InvalidEnvVarError), /// Heuristic search failed NotFound, } /// Attempts to find the path to Arret root directory /// /// The search order is: /// 1. The `arret_root_option` parameter /// 2. The `ARRET_ROOT` environment variable /// 3. The path this binary was originally built in and all of its parents /// 4. The current directory and all of its parents pub fn find_arret_root( arret_root_option: Option<&str>, ) -> Result { if let Some(arg_root) = arret_root_option { let arg_path = path::PathBuf::from(arg_root); if !is_arret_root(&arg_path) { return Err(FindArretRootError::InvalidOption(InvalidOptionError { invalid_path: arg_path, })); } return Ok(arg_path); } if let Some(env_root) = env::var_os(ARRET_ROOT_ENV_VAR) { let env_path = path::PathBuf::from(env_root); if !is_arret_root(&env_path) { return Err(FindArretRootError::InvalidEnvVar(InvalidEnvVarError { invalid_path: env_path, })); } return Ok(env_path); } if let Some(manifest_dir) = option_env!("CARGO_MANIFEST_DIR") { for candidate in path::Path::new(manifest_dir).ancestors() { if is_arret_root(candidate) { return Ok(candidate.to_owned()); } } } let current_dir = env::current_dir().expect("Cannot determine current directory"); for candidate in path::Path::new(¤t_dir).ancestors() { if is_arret_root(candidate) { return Ok(candidate.to_owned()); } } Err(FindArretRootError::NotFound) } ================================================ FILE: compiler/codegen/alloc/core.rs ================================================ use std::{mem, ptr}; use llvm_sys::core::*; use llvm_sys::prelude::*; use llvm_sys::{LLVMAttributeFunctionIndex, LLVMAttributeReturnIndex, LLVMIntPredicate}; use arret_runtime::boxed; use crate::codegen::alloc::{ActiveAlloc, AllocAtom, BoxSource}; use crate::codegen::mod_gen::ModCtx; use crate::codegen::target_gen::TargetCtx; use crate::libcstr; fn init_alloced_box_header( tcx: &mut TargetCtx, builder: LLVMBuilderRef, alloced_box: LLVMValueRef, header: boxed::Header, ) { unsafe { let header_ptr = LLVMBuildStructGEP(builder, alloced_box, 0, libcstr!("header_ptr")); LLVMBuildStore(builder, tcx.llvm_box_header(header), header_ptr); } } fn gen_stack_alloced_box( tcx: &mut TargetCtx, builder: LLVMBuilderRef, llvm_type: LLVMTypeRef, value_name: &[u8], ) -> LLVMValueRef { unsafe { let type_tag = T::TYPE_TAG; let alloced_box = LLVMBuildAlloca(builder, llvm_type, value_name.as_ptr() as *const _); LLVMSetAlignment(alloced_box, mem::align_of::() as u32); init_alloced_box_header( tcx, builder, alloced_box, boxed::Header::new(type_tag, boxed::AllocType::Stack), ); alloced_box } } fn gen_heap_alloced_box( tcx: &mut TargetCtx, builder: LLVMBuilderRef, active_alloc: &mut ActiveAlloc<'_>, box_size: boxed::BoxSize, llvm_type: LLVMTypeRef, value_name: &[u8], ) -> LLVMValueRef { unsafe { assert!( !active_alloc.is_empty(), "attempt to create heap box with empty active heap allocation" ); let cell_count = box_size.cell_count(); let slot_index = active_alloc.used_cells; let llvm_slot = if slot_index == 0 { active_alloc.box_slots } else { let gep_indices = &mut [LLVMConstInt( LLVMInt32TypeInContext(tcx.llx), slot_index as u64, 0, )]; LLVMBuildInBoundsGEP( builder, active_alloc.box_slots, gep_indices.as_mut_ptr(), gep_indices.len() as u32, libcstr!("slot"), ) }; active_alloc.used_cells += cell_count; assert!(active_alloc.used_cells <= active_alloc.total_cells); let type_tag = T::TYPE_TAG; let alloced_box = LLVMBuildBitCast( builder, llvm_slot, LLVMPointerType(llvm_type, 0), value_name.as_ptr() as *const _, ); init_alloced_box_header( tcx, builder, alloced_box, boxed::Header::new(type_tag, box_size.to_heap_alloc_type()), ); alloced_box } } pub fn gen_alloced_box_with_llvm_type( tcx: &mut TargetCtx, builder: LLVMBuilderRef, active_alloc: &mut ActiveAlloc<'_>, box_source: BoxSource, llvm_type: LLVMTypeRef, value_name: &[u8], ) -> LLVMValueRef { match box_source { BoxSource::Stack => gen_stack_alloced_box::(tcx, builder, llvm_type, value_name), BoxSource::Heap(box_size) => { gen_heap_alloced_box::(tcx, builder, active_alloc, box_size, llvm_type, value_name) } } } pub fn gen_alloced_box( tcx: &mut TargetCtx, builder: LLVMBuilderRef, active_alloc: &mut ActiveAlloc<'_>, box_source: BoxSource, value_name: &[u8], ) -> LLVMValueRef { let llvm_type = tcx.boxed_abi_to_llvm_struct_type(&T::TYPE_TAG.into()); gen_alloced_box_with_llvm_type::( tcx, builder, active_alloc, box_source, llvm_type, value_name, ) } /// Allocates cells by invoking a function at runtime /// /// This is the slow path; it is only used when our current heap segment is full. fn gen_runtime_heap_alloc( tcx: &mut TargetCtx, mcx: &mut ModCtx<'_, '_, '_>, builder: LLVMBuilderRef, llvm_task: LLVMValueRef, required_cells: usize, ) -> LLVMValueRef { use arret_runtime::abitype; unsafe { let llvm_i32 = LLVMInt32TypeInContext(tcx.llx); let llvm_param_types = &mut [tcx.task_llvm_ptr_type(), llvm_i32]; let alloc_cells_llvm_type = LLVMFunctionType( tcx.boxed_abi_to_llvm_ptr_type(&abitype::BoxedAbiType::Any), llvm_param_types.as_mut_ptr(), llvm_param_types.len() as u32, 0, ); let alloc_cells_fun = mcx.get_function_or_insert( alloc_cells_llvm_type, b"arret_runtime_alloc_cells\0", |alloc_cells_fun| { LLVMAddAttributeAtIndex( alloc_cells_fun, LLVMAttributeFunctionIndex, tcx.llvm_enum_attr_for_name("cold", 0), ); LLVMAddAttributeAtIndex( alloc_cells_fun, LLVMAttributeReturnIndex, tcx.llvm_boxed_align_attr(), ); LLVMAddAttributeAtIndex( alloc_cells_fun, LLVMAttributeReturnIndex, tcx.llvm_noalias_attr(), ); }, ); let alloc_cells_args = &mut [llvm_task, LLVMConstInt(llvm_i32, required_cells as u64, 0)]; let runtime_box_slots = LLVMBuildCall( builder, alloc_cells_fun, alloc_cells_args.as_mut_ptr(), alloc_cells_args.len() as u32, libcstr!("runtime_box_slots"), ); // We can dereference the entire allocation immediately let dereferenceable_attr = tcx.llvm_enum_attr_for_name( "dereferenceable", (mem::size_of::() * required_cells) as u64, ); LLVMAddCallSiteAttribute( runtime_box_slots, LLVMAttributeReturnIndex, dereferenceable_attr, ); runtime_box_slots } } /// Generates an `ActiveAlloc` containing the required allocations for the passed `AllocAtom` /// /// This will first attempt a bump allocation on the task's current segment. If that fails it will /// fallback to the runtime. pub fn atom_into_active_alloc<'op>( tcx: &mut TargetCtx, mcx: &mut ModCtx<'_, '_, '_>, builder: LLVMBuilderRef, llvm_task: LLVMValueRef, atom: AllocAtom<'op>, ) -> ActiveAlloc<'op> { use arret_runtime::abitype; let required_cells = atom .box_sources .iter() .map(|box_source| match box_source { BoxSource::Stack => 0, BoxSource::Heap(box_size) => box_size.cell_count(), }) .sum(); if required_cells == 0 { return ActiveAlloc { box_slots: ptr::null_mut(), total_cells: 0, used_cells: 0, box_source_iter: atom.box_sources.into_iter(), cond_plan_iter: atom.cond_plans.into_iter(), }; } unsafe { let function = LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)); let mut bump_alloc_block = LLVMAppendBasicBlockInContext(tcx.llx, function, libcstr!("bump_alloc")); let mut runtime_alloc_block = LLVMAppendBasicBlockInContext(tcx.llx, function, libcstr!("runtime_alloc")); let cont_block = LLVMAppendBasicBlockInContext(tcx.llx, function, libcstr!("alloc_cont")); let seg_next_ptr = LLVMBuildStructGEP(builder, llvm_task, 0, libcstr!("seg_next_ptr")); let mut seg_old_next = LLVMBuildLoad(builder, seg_next_ptr, libcstr!("seg_old_next")); let gep_indices = &mut [LLVMConstInt( LLVMInt32TypeInContext(tcx.llx), required_cells as u64, 0, )]; let seg_new_next = LLVMBuildInBoundsGEP( builder, seg_old_next, gep_indices.as_mut_ptr(), gep_indices.len() as u32, libcstr!("seg_new_next"), ); let seg_end_ptr = LLVMBuildStructGEP(builder, llvm_task, 1, libcstr!("seg_end_ptr")); let seg_end = LLVMBuildLoad(builder, seg_end_ptr, libcstr!("seg_end")); let llvm_i64 = LLVMInt64TypeInContext(tcx.llx); let seg_new_next_int = LLVMBuildPtrToInt( builder, seg_new_next, llvm_i64, libcstr!("seg_new_next_int"), ); let seg_end_int = LLVMBuildPtrToInt(builder, seg_end, llvm_i64, libcstr!("seg_end_int")); let seg_has_space = LLVMBuildICmp( builder, LLVMIntPredicate::LLVMIntULE, seg_new_next_int, seg_end_int, libcstr!("seg_has_space"), ); LLVMBuildCondBr( builder, seg_has_space, bump_alloc_block, runtime_alloc_block, ); // Bump alloc succeeded; update the segment LLVMPositionBuilderAtEnd(builder, bump_alloc_block); LLVMBuildStore(builder, seg_new_next, seg_next_ptr); LLVMBuildBr(builder, cont_block); // Bump alloc failed; call the runtime LLVMPositionBuilderAtEnd(builder, runtime_alloc_block); let mut runtime_box_slots = gen_runtime_heap_alloc(tcx, mcx, builder, llvm_task, required_cells); LLVMBuildBr(builder, cont_block); LLVMPositionBuilderAtEnd(builder, cont_block); let box_slots = LLVMBuildPhi( builder, tcx.boxed_abi_to_llvm_ptr_type(&abitype::BoxedAbiType::Any), libcstr!("box_slots"), ); LLVMAddIncoming( box_slots, &mut seg_old_next as *mut _, &mut bump_alloc_block as *mut _, 1, ); LLVMAddIncoming( box_slots, &mut runtime_box_slots as *mut _, &mut runtime_alloc_block as *mut _, 1, ); ActiveAlloc { box_slots, total_cells: required_cells, used_cells: 0, box_source_iter: atom.box_sources.into_iter(), cond_plan_iter: atom.cond_plans.into_iter(), } } } ================================================ FILE: compiler/codegen/alloc/mod.rs ================================================ use std::vec; use llvm_sys::prelude::*; use arret_runtime::boxed; use crate::mir::ops; pub mod core; pub mod plan; pub mod types; /// Indicates where memory for a box allocation should come from #[derive(PartialEq, Debug, Clone, Copy)] pub enum BoxSource { Stack, Heap(boxed::BoxSize), } /// Contains the sub-plans for a conditional branch #[derive(PartialEq, Debug)] pub struct CondPlan<'op> { pub true_subplan: Vec>, pub false_subplan: Vec>, } /// Represents a sequence of MIR ops that begin and end with the heap in a consistent state #[derive(PartialEq, Debug, Default)] pub struct AllocAtom<'op> { box_sources: Vec, cond_plans: Vec>, ops_base: &'op [ops::Op], ops_count: usize, } impl<'op> AllocAtom<'op> { /// Creates a new `AllocAtom` with its ops starting at the specified slice fn new(ops_base: &'op [ops::Op]) -> Self { Self { ops_base, ..Default::default() } } pub fn ops(&self) -> &'op [ops::Op] { &self.ops_base[0..self.ops_count] } /// Increments the used size of our ops by one fn push_op(&mut self) { self.ops_count += 1 } fn is_empty(&self) -> bool { self.ops_count == 0 } } pub struct ActiveAlloc<'op> { box_slots: LLVMValueRef, total_cells: usize, used_cells: usize, box_source_iter: vec::IntoIter, cond_plan_iter: vec::IntoIter>, } impl<'op> ActiveAlloc<'op> { pub fn is_empty(&self) -> bool { self.total_cells == self.used_cells } pub fn next_box_source(&mut self) -> BoxSource { self.box_source_iter.next().unwrap() } pub fn next_cond_plan(&mut self) -> CondPlan<'op> { self.cond_plan_iter.next().unwrap() } } ================================================ FILE: compiler/codegen/alloc/plan.rs ================================================ use arret_runtime::boxed; use crate::codegen::alloc::{AllocAtom, BoxSource, CondPlan}; use crate::codegen::analysis::escape::{CaptureKind, Captures}; use crate::codegen::target_gen::TargetCtx; use crate::mir::ops; struct AllocInfo { output_reg: ops::RegId, box_size: boxed::BoxSize, } /// Determines if an op requires the heap to be in a consistent state before it's executed /// /// Our `AllocAtom`s cannot span these operations fn op_needs_heap_checkpoint(tcx: &mut TargetCtx, op: &ops::Op) -> bool { use crate::mir::ops::OpKind; match op.kind() { OpKind::Ret(_) | OpKind::RetVoid | OpKind::Unreachable | OpKind::Call(_, _) | OpKind::Panic(_) | OpKind::Int64CheckedAdd(_, _) | OpKind::Int64CheckedSub(_, _) | OpKind::Int64CheckedMul(_, _) | OpKind::Int64CheckedDiv(_, _) | OpKind::Int64CheckedRem(_, _) => true, OpKind::Cond(cond_op) => cond_op .true_ops .iter() .chain(cond_op.false_ops.iter()) // We additionally need to make sure we don't allocate in our branches. Otherwise we // might need to plan an allocation of a dynamic size to cover each branch. Instead // just start a new atom for each branch. .any(|op| op_needs_heap_checkpoint(tcx, op) || op_alloc_info(tcx, op).is_some()), _ => false, } } /// Returns the output reg for an allocating op, or `None` otherwise fn op_alloc_info(tcx: &mut TargetCtx, op: &ops::Op) -> Option { use crate::mir::ops::OpKind; match op.kind() { OpKind::AllocBoxedInt(output_reg, _) => Some(AllocInfo { output_reg: *output_reg, box_size: boxed::Int::size(), }), OpKind::AllocBoxedFloat(output_reg, _) => Some(AllocInfo { output_reg: *output_reg, box_size: boxed::Float::size(), }), OpKind::AllocBoxedChar(output_reg, _) => Some(AllocInfo { output_reg: *output_reg, box_size: boxed::Char::size(), }), OpKind::AllocBoxedSym(output_reg, _) => Some(AllocInfo { output_reg: *output_reg, box_size: boxed::Sym::size(), }), OpKind::AllocBoxedPair(output_reg, _) => Some(AllocInfo { output_reg: *output_reg, box_size: boxed::Pair::::size(), }), OpKind::AllocBoxedFunThunk(output_reg, _) => Some(AllocInfo { output_reg: *output_reg, box_size: boxed::FunThunk::size(), }), OpKind::AllocBoxedRecord(output_reg, box_record_op) => { let record_storage = tcx .target_record_struct(&box_record_op.record_struct) .record_storage; Some(AllocInfo { output_reg: *output_reg, box_size: record_storage.box_size(), }) } _ => None, } } pub fn plan_allocs<'op>( tcx: &mut TargetCtx, captures: &Captures, ops: &'op [ops::Op], ) -> Vec> { use std::mem; let mut atoms = vec![]; let mut current_atom = AllocAtom::new(&ops[0..]); for (i, op) in ops.iter().enumerate() { let checkpointing_op = op_needs_heap_checkpoint(tcx, op); if checkpointing_op && !current_atom.is_empty() { atoms.push(mem::replace(&mut current_atom, AllocAtom::new(&ops[i..]))); } if let ops::OpKind::Cond(ops::CondOp { true_ops, false_ops, .. }) = op.kind() { current_atom.cond_plans.push(CondPlan { true_subplan: plan_allocs(tcx, captures, true_ops), false_subplan: plan_allocs(tcx, captures, false_ops), }); } else if let Some(AllocInfo { output_reg, box_size, }) = op_alloc_info(tcx, op) { if captures.get(output_reg) == CaptureKind::Never { current_atom.box_sources.push(BoxSource::Stack); } else { current_atom.box_sources.push(BoxSource::Heap(box_size)); } } current_atom.push_op(); if checkpointing_op { atoms.push(mem::replace( &mut current_atom, AllocAtom::new(&ops[i + 1..]), )); } } if !current_atom.is_empty() { atoms.push(current_atom); } atoms } #[cfg(test)] mod test { use super::*; /// Plans allocations assuming the native data layout fn plan_native_allocs(ops: &[ops::Op]) -> Vec> { use llvm_sys::target_machine::*; use crate::codegen::target_machine::create_target_machine; use crate::codegen::test::initialise_test_llvm; initialise_test_llvm(); let target_machine = create_target_machine( None, LLVMRelocMode::LLVMRelocDynamicNoPic, LLVMCodeModel::LLVMCodeModelDefault, ); let mut tcx = TargetCtx::new(target_machine, false); let atoms = plan_allocs(&mut tcx, &Captures::new(), ops); unsafe { LLVMDisposeTargetMachine(target_machine); } atoms } #[test] fn empty_ops() { let actual_atoms = plan_native_allocs(&[]); assert_eq!(0, actual_atoms.len()); } #[test] fn condless_allocs() { let reg1 = ops::RegId::alloc(); let reg2 = ops::RegId::alloc(); let reg3 = ops::RegId::alloc(); let reg4 = ops::RegId::alloc(); let input_ops = [ ops::OpKind::AllocBoxedInt(reg1, reg1).into(), ops::OpKind::ConstBoxedTrue(reg2, ()).into(), ops::OpKind::RetVoid.into(), ops::OpKind::AllocBoxedInt(reg3, reg3).into(), ops::OpKind::AllocBoxedInt(reg4, reg4).into(), ]; let expected_atoms = vec![ AllocAtom { box_sources: vec![BoxSource::Stack], cond_plans: vec![], ops_base: &input_ops[0..], ops_count: 2, }, AllocAtom { box_sources: vec![], cond_plans: vec![], ops_base: &input_ops[2..], ops_count: 1, }, AllocAtom { box_sources: vec![BoxSource::Stack, BoxSource::Stack], cond_plans: vec![], ops_base: &input_ops[3..], ops_count: 2, }, ]; let actual_atoms = plan_native_allocs(&input_ops); assert_eq!(expected_atoms, actual_atoms); } #[test] fn non_allocating_cond() { let output_reg = ops::RegId::alloc(); let true_result_reg = ops::RegId::alloc(); let false_result_reg = ops::RegId::alloc(); let test_reg = ops::RegId::alloc(); let true_ops = Box::new([ops::OpKind::ConstBoxedNil(true_result_reg, ()).into()]); let false_ops = Box::new([ops::OpKind::ConstBoxedNil(false_result_reg, ()).into()]); let input_ops = [ ops::OpKind::AllocBoxedInt(test_reg, test_reg).into(), ops::OpKind::Cond(ops::CondOp { reg_phi: Some(ops::RegPhi { output_reg, true_result_reg, false_result_reg, }), test_reg, true_ops, false_ops, }) .into(), ]; let actual_atoms = plan_native_allocs(&input_ops); // We should place the `AllocBoxedInt` and `Cond` in the same atom assert_eq!(1, actual_atoms.len()); } #[test] fn allocating_cond() { let output_reg = ops::RegId::alloc(); let test_reg = ops::RegId::alloc(); let true_result_reg = ops::RegId::alloc(); let false_result_reg = ops::RegId::alloc(); let true_ops = Box::new([ops::OpKind::ConstBoxedNil(true_result_reg, ()).into()]); let false_ops = Box::new([ops::OpKind::AllocBoxedInt(false_result_reg, false_result_reg).into()]); let input_ops = [ ops::OpKind::AllocBoxedInt(test_reg, test_reg).into(), ops::OpKind::Cond(ops::CondOp { reg_phi: Some(ops::RegPhi { output_reg, true_result_reg, false_result_reg, }), test_reg, true_ops, false_ops, }) .into(), ]; let actual_atoms = plan_native_allocs(&input_ops); // We should place the `AllocBoxedInt` and `Cond` in different atoms assert_eq!(2, actual_atoms.len()); } } ================================================ FILE: compiler/codegen/alloc/types.rs ================================================ use llvm_sys::core::*; use llvm_sys::prelude::*; use llvm_sys::LLVMAttributeReturnIndex; use arret_runtime::boxed; use crate::codegen::alloc::core::{gen_alloced_box, gen_alloced_box_with_llvm_type}; use crate::codegen::alloc::{ActiveAlloc, BoxSource}; use crate::codegen::mod_gen::ModCtx; use crate::codegen::record_struct; use crate::codegen::target_gen::TargetCtx; use crate::libcstr; use crate::mir::ops::RecordStructId; pub struct PairInput { pub llvm_head: LLVMValueRef, pub llvm_rest: LLVMValueRef, pub llvm_list_len: LLVMValueRef, } pub struct FunThunkInput { pub llvm_captures: LLVMValueRef, pub llvm_entry_point: LLVMValueRef, } pub struct RecordInput<'rs> { pub record_struct: &'rs RecordStructId, pub llvm_fields: Box<[LLVMValueRef]>, } pub fn gen_alloc_int( tcx: &mut TargetCtx, builder: LLVMBuilderRef, active_alloc: &mut ActiveAlloc<'_>, box_source: BoxSource, llvm_int_value: LLVMValueRef, ) -> LLVMValueRef { unsafe { let alloced_int = gen_alloced_box::(tcx, builder, active_alloc, box_source, b"alloced_int\0"); let value_ptr = LLVMBuildStructGEP(builder, alloced_int, 1, libcstr!("value_ptr")); LLVMBuildStore(builder, llvm_int_value, value_ptr); alloced_int } } pub fn gen_alloc_char( tcx: &mut TargetCtx, builder: LLVMBuilderRef, active_alloc: &mut ActiveAlloc<'_>, box_source: BoxSource, llvm_char_value: LLVMValueRef, ) -> LLVMValueRef { unsafe { let alloced_char = gen_alloced_box::( tcx, builder, active_alloc, box_source, b"alloced_char\0", ); let value_ptr = LLVMBuildStructGEP(builder, alloced_char, 1, libcstr!("value_ptr")); LLVMBuildStore(builder, llvm_char_value, value_ptr); alloced_char } } pub fn gen_alloc_sym( tcx: &mut TargetCtx, builder: LLVMBuilderRef, active_alloc: &mut ActiveAlloc<'_>, box_source: BoxSource, llvm_interned_sym: LLVMValueRef, ) -> LLVMValueRef { unsafe { let alloced_sym = gen_alloced_box::(tcx, builder, active_alloc, box_source, b"alloced_sym\0"); let interned_sym_ptr = LLVMBuildStructGEP(builder, alloced_sym, 1, libcstr!("interned_sym_ptr")); LLVMBuildStore(builder, llvm_interned_sym, interned_sym_ptr); alloced_sym } } pub fn gen_alloc_float( tcx: &mut TargetCtx, builder: LLVMBuilderRef, active_alloc: &mut ActiveAlloc<'_>, box_source: BoxSource, llvm_float_value: LLVMValueRef, ) -> LLVMValueRef { unsafe { let alloced_float = gen_alloced_box::( tcx, builder, active_alloc, box_source, b"alloced_float\0", ); let value_ptr = LLVMBuildStructGEP(builder, alloced_float, 1, libcstr!("value_ptr")); LLVMBuildStore(builder, llvm_float_value, value_ptr); alloced_float } } pub fn gen_alloc_boxed_pair( tcx: &mut TargetCtx, builder: LLVMBuilderRef, active_alloc: &mut ActiveAlloc<'_>, box_source: BoxSource, input: &PairInput, ) -> LLVMValueRef { let PairInput { llvm_head, llvm_rest, llvm_list_len, } = input; unsafe { let alloced_pair = gen_alloced_box::( tcx, builder, active_alloc, box_source, b"alloced_pair\0", ); let list_len_ptr = LLVMBuildStructGEP(builder, alloced_pair, 1, libcstr!("list_len_ptr")); LLVMBuildStore(builder, *llvm_list_len, list_len_ptr); let head_ptr = LLVMBuildStructGEP(builder, alloced_pair, 2, libcstr!("head_ptr")); LLVMBuildStore(builder, *llvm_head, head_ptr); let rest_ptr = LLVMBuildStructGEP(builder, alloced_pair, 3, libcstr!("rest_ptr")); LLVMBuildStore(builder, *llvm_rest, rest_ptr); alloced_pair } } pub fn gen_alloc_boxed_fun_thunk( tcx: &mut TargetCtx, builder: LLVMBuilderRef, active_alloc: &mut ActiveAlloc<'_>, box_source: BoxSource, input: &FunThunkInput, ) -> LLVMValueRef { let FunThunkInput { llvm_captures, llvm_entry_point, } = input; unsafe { let alloced_fun_thunk = gen_alloced_box::( tcx, builder, active_alloc, box_source, b"alloced_fun_thunk\0", ); let captures_ptr = LLVMBuildStructGEP(builder, alloced_fun_thunk, 1, libcstr!("captures_ptr")); LLVMBuildStore(builder, *llvm_captures, captures_ptr); let entry_point_ptr = LLVMBuildStructGEP(builder, alloced_fun_thunk, 2, libcstr!("entry_point_ptr")); LLVMBuildStore(builder, *llvm_entry_point, entry_point_ptr); alloced_fun_thunk } } pub fn gen_alloc_boxed_record( tcx: &mut TargetCtx, mcx: &mut ModCtx<'_, '_, '_>, builder: LLVMBuilderRef, active_alloc: &mut ActiveAlloc<'_>, box_source: BoxSource, input: &RecordInput<'_>, ) -> LLVMValueRef { let RecordInput { record_struct, llvm_fields, } = input; let record_class_id = mcx.record_class_id_for_struct(record_struct); unsafe { let llvm_i8 = LLVMInt8TypeInContext(tcx.llx); let llvm_i32 = LLVMInt32TypeInContext(tcx.llx); let record_struct::TargetRecordStruct { data_layout, record_storage, llvm_data_type, .. } = *tcx.target_record_struct(record_struct); let may_contain_gc_refs = record_struct .field_abi_types .iter() .zip(llvm_fields.iter()) .any(|(field_abi_type, llvm_field)| { field_abi_type.may_contain_gc_refs() && LLVMIsConstant(*llvm_field) == 0 }); let boxed_record_name = format!("alloced_{}_record\0", record_struct.source_name); let llvm_box_type = tcx.record_struct_llvm_box_type(record_struct); let alloced_boxed_record = gen_alloced_box_with_llvm_type::( tcx, builder, active_alloc, box_source, llvm_box_type, boxed_record_name.as_bytes(), ); let may_contain_gc_refs_ptr = LLVMBuildStructGEP( builder, alloced_boxed_record, record_struct::CONTAINS_GC_REFS_INDEX, libcstr!("may_contain_gc_refs_ptr"), ); let llvm_may_contain_gc_refs = LLVMConstInt(llvm_i8, may_contain_gc_refs as u64, 1); LLVMBuildStore(builder, llvm_may_contain_gc_refs, may_contain_gc_refs_ptr); let record_class_id_ptr = LLVMBuildStructGEP( builder, alloced_boxed_record, record_struct::RECORD_CLASS_ID_INDEX, libcstr!("record_class_id_ptr"), ); let llvm_record_class_id = LLVMConstInt( tcx.record_class_id_llvm_type(), u64::from(record_class_id), 1, ); LLVMBuildStore(builder, llvm_record_class_id, record_class_id_ptr); // This is used by both inline and external records let record_data_gep_indices = &mut [ LLVMConstInt(llvm_i32, 0, 0), LLVMConstInt(llvm_i32, u64::from(record_struct::DATA_INDEX), 0), ]; let (llvm_record_data_ptr, inline_byte_len) = match (record_storage, box_source) { (boxed::RecordStorage::Inline(_), _) => { let llvm_inline_record_data_ptr = LLVMBuildInBoundsGEP( builder, alloced_boxed_record, record_data_gep_indices.as_mut_ptr(), record_data_gep_indices.len() as u32, libcstr!("inline_record_data"), ); let inline_byte_len = match data_layout { Some(data_layout) => data_layout.size(), None => 0, }; (llvm_inline_record_data_ptr, inline_byte_len) } (boxed::RecordStorage::External, BoxSource::Stack) => { // Allocate the record data let llvm_stack_record_data_ptr = LLVMBuildAlloca(builder, llvm_data_type, libcstr!("stack_record_data")); // Update our record data pointer let llvm_record_data_ptr_ptr = LLVMBuildInBoundsGEP( builder, alloced_boxed_record, record_data_gep_indices.as_mut_ptr(), record_data_gep_indices.len() as u32, libcstr!("record_data_ptr_ptr"), ); LLVMBuildStore( builder, llvm_stack_record_data_ptr, llvm_record_data_ptr_ptr, ); ( llvm_stack_record_data_ptr, boxed::Record::EXTERNAL_INLINE_LEN as usize, ) } (boxed::RecordStorage::External, BoxSource::Heap(_)) => { let data_layout = data_layout.unwrap(); let llvm_i8 = LLVMInt8TypeInContext(tcx.llx); let llvm_i32 = LLVMInt32TypeInContext(tcx.llx); let llvm_i64 = LLVMInt64TypeInContext(tcx.llx); let llvm_param_types = &mut [llvm_i64, llvm_i32]; let alloc_record_data_llvm_type = LLVMFunctionType( LLVMPointerType(llvm_i8, 0), llvm_param_types.as_mut_ptr(), llvm_param_types.len() as u32, 0, ); let alloc_record_data_fun = mcx.get_function_or_insert( alloc_record_data_llvm_type, b"arret_runtime_alloc_record_data\0", |alloc_record_data_fun| { LLVMAddAttributeAtIndex( alloc_record_data_fun, LLVMAttributeReturnIndex, tcx.llvm_noalias_attr(), ); }, ); let alloc_record_data_args = &mut [ LLVMConstInt(llvm_i64, data_layout.size() as u64, 0), LLVMConstInt(llvm_i32, data_layout.align() as u64, 0), ]; let llvm_untyped_record_data_ptr = LLVMBuildCall( builder, alloc_record_data_fun, alloc_record_data_args.as_mut_ptr(), alloc_record_data_args.len() as u32, libcstr!("external_record_data"), ); // Convert the record data pointer to the correct type let llvm_typed_record_data_ptr = LLVMBuildBitCast( builder, llvm_untyped_record_data_ptr, LLVMPointerType(llvm_data_type, 0), libcstr!("typed_record_data_ptr"), ); // Save the record data pointer let llvm_record_data_ptr_ptr = LLVMBuildInBoundsGEP( builder, alloced_boxed_record, record_data_gep_indices.as_mut_ptr(), record_data_gep_indices.len() as u32, libcstr!("record_data_ptr_ptr"), ); LLVMBuildStore( builder, llvm_typed_record_data_ptr, llvm_record_data_ptr_ptr, ); // Save the compact layout let record_compact_layout_gep_indices = &mut [ LLVMConstInt(llvm_i32, 0, 0), LLVMConstInt( llvm_i32, u64::from(record_struct::EXTERNAL_COMPACT_LAYOUT_INDEX), 0, ), ]; let llvm_record_compact_layout_ptr = LLVMBuildInBoundsGEP( builder, alloced_boxed_record, record_compact_layout_gep_indices.as_mut_ptr(), record_compact_layout_gep_indices.len() as u32, libcstr!("record_compact_layout_ptr"), ); LLVMBuildStore( builder, LLVMConstInt( llvm_i64, boxed::RecordData::alloc_layout_to_compact(Some(data_layout)), 0, ), llvm_record_compact_layout_ptr, ); ( llvm_typed_record_data_ptr, boxed::Record::EXTERNAL_INLINE_LEN as usize, ) } }; let inline_byte_len_ptr = LLVMBuildStructGEP( builder, alloced_boxed_record, record_struct::IS_INLINE_INDEX, libcstr!("inline_byte_len_ptr"), ); let llvm_inline_byte_len = LLVMConstInt(llvm_i8, inline_byte_len as u64, 1); LLVMBuildStore(builder, llvm_inline_byte_len, inline_byte_len_ptr); for (field_index, llvm_field) in llvm_fields.iter().enumerate() { let field_gep_indices = &mut [ LLVMConstInt(llvm_i32, 0, 0), LLVMConstInt(llvm_i32, field_index as u64, 0), ]; let llvm_field_ptr = LLVMBuildInBoundsGEP( builder, llvm_record_data_ptr, field_gep_indices.as_mut_ptr(), field_gep_indices.len() as u32, libcstr!("init_record_field_ptr"), ); LLVMBuildStore(builder, *llvm_field, llvm_field_ptr); } let boxed_record_name = format!("alloced_{}_record\0", record_struct.source_name); LLVMBuildBitCast( builder, alloced_boxed_record, tcx.boxed_abi_to_llvm_ptr_type(&boxed::TypeTag::Record.into()), boxed_record_name.as_ptr() as *const _, ) } } ================================================ FILE: compiler/codegen/analysis/escape.rs ================================================ use std::collections::{HashMap, HashSet}; use arret_runtime::abitype::{AbiType, ParamAbiType, ParamCapture, RetAbiType}; use crate::codegen::GenAbi; use crate::mir::ops; /// Describes the capture behaviour of a function parameter #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Debug)] pub enum CaptureKind { /// This reg is always captured Always = 2, /// This reg is captured if the function's return value is captured ViaRet = 1, /// This reg is never captured Never = 0, } impl CaptureKind { /// Calculates the capture of a passed parameter based on the the call's return capture fn capture_for_call_param(self, return_capture: CaptureKind) -> CaptureKind { match self { CaptureKind::Always => CaptureKind::Always, CaptureKind::ViaRet => return_capture, CaptureKind::Never => CaptureKind::Never, } } } /// Tracks the captures for all regs in a function #[derive(Debug)] pub struct Captures { inner: HashMap, } impl Captures { pub fn new() -> Captures { Captures { inner: HashMap::new(), } } /// Adds the capture of a reg to the capture state /// /// If there is an existing capture the "stronger" capture kind of the two will be used. pub fn add(&mut self, reg_id: ops::RegId, capture: CaptureKind) { use std::cmp::max; // It's not worthwhile to track never captures; that's the default capture type if capture != CaptureKind::Never { self.inner .entry(reg_id) .and_modify(|e| { *e = max(*e, capture); }) .or_insert(capture); } } pub fn get(&self, reg_id: ops::RegId) -> CaptureKind { self.inner .get(®_id) .cloned() .unwrap_or(CaptureKind::Never) } } /// Infers if a function can capture a parameter based on its return type /// /// This is used for Rust functions where we don't have precise capture information. This uses a /// very conservative algorithm where any function returning a box is assumed to capture all of its /// arguments. pub fn infer_param_capture_kind( ret_abi_type: &RetAbiType, param_abi_type: &ParamAbiType, ) -> CaptureKind { let returns_box = matches!(ret_abi_type, RetAbiType::Inhabited(AbiType::Boxed(_))); match param_abi_type.capture { ParamCapture::Auto => { if returns_box { CaptureKind::ViaRet } else { CaptureKind::Never } } ParamCapture::Always => CaptureKind::Always, ParamCapture::Never => CaptureKind::Never, } } fn add_static_symbol_call_captures( captures: &mut Captures, return_capture: CaptureKind, static_symbol_abi: &GenAbi, args: &[ops::RegId], ) { let arg_iter = args.iter(); assert_eq!(arg_iter.len(), static_symbol_abi.params.len()); for (arg_reg, param_abi_type) in arg_iter.zip(static_symbol_abi.params.iter()) { let param_capture = infer_param_capture_kind(&static_symbol_abi.ret, param_abi_type); captures.add( *arg_reg, param_capture.capture_for_call_param(return_capture), ); } } struct ProgramCaptureCtx<'of> { private_funs: &'of HashMap, private_fun_captures: HashMap, // Funs we're currently recursing in to recursing_private_funs: HashSet, } impl<'of> ProgramCaptureCtx<'of> { fn add_op_captures(&mut self, captures: &mut Captures, ret_type: &RetAbiType, op: &ops::Op) { use crate::mir::ops::OpKind; match op.kind() { OpKind::Ret(ret_reg) => { if let RetAbiType::Inhabited(AbiType::Boxed(_)) = ret_type { // `Ret` captures boxes unconditionally captures.add(*ret_reg, CaptureKind::ViaRet); } } OpKind::CastBoxed(reg, ops::CastBoxedOp { from_reg, .. }) | OpKind::Alias(reg, from_reg) | OpKind::LoadBoxedPairHead(reg, from_reg) | OpKind::LoadBoxedPairRest(reg, from_reg) | OpKind::LoadBoxedVectorMember( reg, ops::LoadBoxedVectorMemberOp { vector_reg: from_reg, .. }, ) => { captures.add(*from_reg, captures.get(*reg)); } OpKind::AllocBoxedPair( reg, ops::BoxPairOp { head_reg, rest_reg, .. }, ) => { let output_capture = captures.get(*reg); captures.add(*head_reg, output_capture); captures.add(*rest_reg, output_capture); } OpKind::Cond(ops::CondOp { reg_phi, true_ops, false_ops, .. }) => { if let Some(reg_phi) = reg_phi { let output_capture = captures.get(reg_phi.output_reg); // Propagate captures through the phi captures.add(reg_phi.true_result_reg, output_capture); captures.add(reg_phi.false_result_reg, output_capture); } for op in true_ops.iter().rev().chain(false_ops.iter().rev()) { self.add_op_captures(captures, ret_type, op); } } OpKind::Call(reg, ops::CallOp { callee, args, .. }) => { let return_capture = captures.get(*reg); match callee { ops::Callee::StaticSymbol(ops::StaticSymbol { abi, .. }) => { add_static_symbol_call_captures(captures, return_capture, abi, args); } ops::Callee::PrivateFun(private_fun_id) => { let ops_fun = &self.private_funs[private_fun_id]; if !self.recursing_private_funs.contains(private_fun_id) { let callee_captures = self.captures_for_private_fun_id(*private_fun_id); for (arg_reg, param_reg) in args.iter().zip(ops_fun.param_regs.iter()) { captures.add(*arg_reg, callee_captures.get(*param_reg)); } } else { // This is part of a recursive loop; assume everything is captured. // While this seems like an easy way out this is probably the right // thing to do. If there are many loop iterations at runtime we do // not want to allocate boxes on the stack. This both prevents tail // recursion and can lead to a stack overflow. By claiming that // everything is captured we force them to be heap allocated. for arg_reg in args.iter() { captures.add(*arg_reg, CaptureKind::Always); } } } ops::Callee::BoxedFunThunk(_) => { // We know nothing about the actual captures. We need to assume the worst. for arg_reg in args.iter() { captures.add(*arg_reg, CaptureKind::Always); } } }; } OpKind::TailCall(_, ops::TailCallOp { args, .. }) => { // This is the same justification as the recursive case in `OpKind::Call` for arg_reg in args.iter() { captures.add(*arg_reg, CaptureKind::Always); } } OpKind::MakeCallback(_, ops::MakeCallbackOp { callee, .. }) | OpKind::AllocBoxedFunThunk(_, ops::BoxFunThunkOp { callee, .. }) | OpKind::ConstBoxedFunThunk(_, ops::BoxFunThunkOp { callee, .. }) => { // We don't actually care about these captures; we just pull them in for dependencies if let ops::Callee::PrivateFun(private_fun_id) = callee { // If we're already recursing we'll only loop if we re-enter if !self.recursing_private_funs.contains(private_fun_id) { self.captures_for_private_fun_id(*private_fun_id); } } } OpKind::AllocBoxedRecord(reg, ops::BoxRecordOp { field_regs, .. }) => { let output_capture = captures.get(*reg); for field_reg in field_regs.iter() { captures.add(*field_reg, output_capture); } } OpKind::LoadBoxedRecordField( reg, ops::LoadBoxedRecordFieldOp { record_reg, record_struct, field_index, }, ) => { let output_capture = captures.get(*reg); // Don't capture the record if we're loading a non-GCed value if record_struct.field_abi_types[*field_index].may_contain_gc_refs() { captures.add(*record_reg, output_capture); } } _ => {} } } fn captures_for_private_fun_id(&mut self, private_fun_id: ops::PrivateFunId) -> &Captures { if self.private_fun_captures.contains_key(&private_fun_id) { return &self.private_fun_captures[&private_fun_id]; } self.recursing_private_funs.insert(private_fun_id); let ops_fun = &self.private_funs[&private_fun_id]; let captures = self.calc_fun_captures(ops_fun); self.recursing_private_funs.remove(&private_fun_id); self.private_fun_captures .entry(private_fun_id) .or_insert(captures) } fn calc_fun_captures(&mut self, fun: &ops::Fun) -> Captures { let mut captures = Captures::new(); for op in fun.ops.iter().rev() { self.add_op_captures(&mut captures, &fun.abi.ret, op); } captures } } pub struct ProgramCaptures { pub entry_fun_captures: Captures, pub private_fun_captures: HashMap, } /// Calculates the captured registers for the passed fun and every fun it references pub fn calc_program_captures( private_funs: &HashMap, entry_fun: &ops::Fun, ) -> ProgramCaptures { let mut ctx = ProgramCaptureCtx { private_funs, private_fun_captures: HashMap::new(), recursing_private_funs: HashSet::new(), }; let entry_fun_captures = ctx.calc_fun_captures(entry_fun); ProgramCaptures { private_fun_captures: ctx.private_fun_captures, entry_fun_captures, } } #[cfg(test)] mod test { use super::*; use arret_runtime::boxed; use crate::source::EMPTY_SPAN; fn calc_single_fun_captures(fun: &ops::Fun) -> Captures { calc_program_captures(&HashMap::new(), fun).entry_fun_captures } #[test] fn infer_param_capture() { // Boxed return type can capture boxed parameter assert_eq!( CaptureKind::ViaRet, infer_param_capture_kind(&boxed::TypeTag::Int.into(), &boxed::TypeTag::Int.into()) ); // Unboxed return type cannot capture boxed parameter assert_eq!( CaptureKind::Never, infer_param_capture_kind(&AbiType::Bool.into(), &boxed::TypeTag::Int.into()) ); } #[test] fn empty_fun_captures() { let param_reg = ops::RegId::alloc(); let test_fun = ops::Fun { span: EMPTY_SPAN, source_name: None, abi: ops::OpsAbi { call_conv: ops::CallConv::FastCc, params: Box::new([boxed::TypeTag::Int.into()]), ret: RetAbiType::Void, }, param_regs: Box::new([param_reg]), ops: Box::new([]), }; let captures = calc_single_fun_captures(&test_fun); assert_eq!(CaptureKind::Never, captures.get(param_reg)); } #[test] fn capture_param_via_ret() { let capture_reg = ops::RegId::alloc(); let test_fun = ops::Fun { span: EMPTY_SPAN, source_name: None, abi: ops::OpsAbi { call_conv: ops::CallConv::FastCc, params: Box::new([boxed::TypeTag::Int.into()]), ret: boxed::TypeTag::Int.into(), }, param_regs: Box::new([capture_reg]), ops: Box::new([ops::OpKind::Ret(capture_reg).into()]), }; let captures = calc_single_fun_captures(&test_fun); assert_eq!(CaptureKind::ViaRet, captures.get(capture_reg)); } #[test] fn capture_param_via_pair() { let param_reg = ops::RegId::alloc(); let ret_reg = ops::RegId::alloc(); let test_fun = ops::Fun { span: EMPTY_SPAN, source_name: None, abi: ops::OpsAbi { call_conv: ops::CallConv::FastCc, params: Box::new([boxed::TypeTag::Int.into()]), ret: boxed::TypeTag::Pair.into(), }, param_regs: Box::new([param_reg]), ops: Box::new([ ops::OpKind::AllocBoxedPair( ret_reg, ops::BoxPairOp { head_reg: param_reg, rest_reg: param_reg, list_len_reg: param_reg, }, ) .into(), ops::OpKind::Ret(ret_reg).into(), ]), }; let captures = calc_single_fun_captures(&test_fun); assert_eq!(CaptureKind::ViaRet, captures.get(param_reg)); assert_eq!(CaptureKind::ViaRet, captures.get(ret_reg)); } #[test] fn capture_param_via_box_thunk_call() { let param_reg = ops::RegId::alloc(); let ret_reg = ops::RegId::alloc(); let test_fun = ops::Fun { span: EMPTY_SPAN, source_name: None, abi: ops::OpsAbi { call_conv: ops::CallConv::FastCc, params: Box::new([boxed::TypeTag::Int.into()]), ret: boxed::TypeTag::Pair.into(), }, param_regs: Box::new([param_reg]), ops: Box::new([ ops::OpKind::Call( ret_reg, ops::CallOp { callee: ops::Callee::BoxedFunThunk(param_reg), impure: true, args: Box::new([param_reg, param_reg, param_reg]), }, ) .into(), ops::OpKind::Ret(ret_reg).into(), ]), }; let captures = calc_single_fun_captures(&test_fun); assert_eq!(CaptureKind::Always, captures.get(param_reg)); assert_eq!(CaptureKind::ViaRet, captures.get(ret_reg)); } #[test] fn capture_param_via_static_symbol_call() { // These are passed to the first call with an unused ret let param_reg1 = ops::RegId::alloc(); let param_reg2 = ops::RegId::alloc(); let param_reg3 = ops::RegId::alloc(); // These are passed to the second call which does have its ret captured let param_reg4 = ops::RegId::alloc(); let param_reg5 = ops::RegId::alloc(); let param_reg6 = ops::RegId::alloc(); let unused_reg = ops::RegId::alloc(); let ret_reg = ops::RegId::alloc(); let static_symbol_abi = GenAbi { takes_task: false, params: Box::new([ ParamAbiType { abi_type: boxed::TypeTag::Int.into(), capture: ParamCapture::Never, }, ParamAbiType { abi_type: boxed::TypeTag::Int.into(), capture: ParamCapture::Auto, }, ParamAbiType { abi_type: boxed::TypeTag::Int.into(), capture: ParamCapture::Always, }, ]), ret: boxed::TypeTag::Int.into(), }; let static_symbol = ops::StaticSymbol { symbol: "test", impure: true, abi: static_symbol_abi, }; let test_fun = ops::Fun { span: EMPTY_SPAN, source_name: None, abi: ops::OpsAbi { call_conv: ops::CallConv::FastCc, params: Box::new([ boxed::TypeTag::Int.into(), boxed::TypeTag::Int.into(), boxed::TypeTag::Int.into(), boxed::TypeTag::Int.into(), boxed::TypeTag::Int.into(), boxed::TypeTag::Int.into(), ]), ret: boxed::TypeTag::Int.into(), }, param_regs: Box::new([param_reg1]), ops: Box::new([ ops::OpKind::Call( unused_reg, ops::CallOp { callee: ops::Callee::StaticSymbol(static_symbol.clone()), impure: true, args: Box::new([param_reg1, param_reg2, param_reg3]), }, ) .into(), ops::OpKind::Call( ret_reg, ops::CallOp { callee: ops::Callee::StaticSymbol(static_symbol), impure: true, args: Box::new([param_reg4, param_reg5, param_reg6]), }, ) .into(), ops::OpKind::Ret(ret_reg).into(), ]), }; let captures = calc_single_fun_captures(&test_fun); assert_eq!(CaptureKind::Never, captures.get(param_reg1)); assert_eq!(CaptureKind::Never, captures.get(param_reg2)); assert_eq!(CaptureKind::Always, captures.get(param_reg3)); assert_eq!(CaptureKind::Never, captures.get(param_reg4)); assert_eq!(CaptureKind::ViaRet, captures.get(param_reg5)); assert_eq!(CaptureKind::Always, captures.get(param_reg6)); assert_eq!(CaptureKind::Never, captures.get(unused_reg)); assert_eq!(CaptureKind::ViaRet, captures.get(ret_reg)); } #[test] fn capture_param_via_cond() { let param_reg = ops::RegId::alloc(); let ret_reg = ops::RegId::alloc(); let test_fun = ops::Fun { span: EMPTY_SPAN, source_name: None, abi: ops::OpsAbi { call_conv: ops::CallConv::FastCc, params: Box::new([boxed::TypeTag::Int.into()]), ret: boxed::TypeTag::Pair.into(), }, param_regs: Box::new([param_reg]), ops: Box::new([ ops::OpKind::Cond(ops::CondOp { reg_phi: Some(ops::RegPhi { output_reg: ret_reg, true_result_reg: param_reg, false_result_reg: param_reg, }), test_reg: param_reg, true_ops: Box::new([]), false_ops: Box::new([]), }) .into(), ops::OpKind::Ret(ret_reg).into(), ]), }; let captures = calc_single_fun_captures(&test_fun); assert_eq!(CaptureKind::ViaRet, captures.get(param_reg)); assert_eq!(CaptureKind::ViaRet, captures.get(ret_reg)); } } ================================================ FILE: compiler/codegen/analysis/mod.rs ================================================ pub mod escape; pub mod names; use std::collections::{BTreeMap, HashMap}; use std::rc::Rc; use arret_runtime::intern; use crate::codegen::analysis::escape::Captures; use crate::mir::ops; pub struct AnalysedMod<'of> { private_funs: HashMap>, entry_fun: AnalysedFun<'of>, global_interned_names: BTreeMap, intern::InternedSym>, } pub struct AnalysedFun<'of> { pub ops_fun: &'of ops::Fun, pub captures: Captures, } impl<'of> AnalysedMod<'of> { pub fn new( private_funs: &'of HashMap, entry_fun: &'of ops::Fun, ) -> AnalysedMod<'of> { // This also determines which private funs are used; private_fun_captures won't contain // entries for unused funs let escape::ProgramCaptures { private_fun_captures, entry_fun_captures, } = escape::calc_program_captures(private_funs, entry_fun); let private_funs = private_fun_captures .into_iter() .map(|(private_fun_id, captures)| { ( private_fun_id, AnalysedFun { ops_fun: &private_funs[&private_fun_id], captures, }, ) }) .collect(); let global_interned_names = names::calc_program_global_interned_names(&private_funs, entry_fun); AnalysedMod { private_funs, entry_fun: AnalysedFun { ops_fun: entry_fun, captures: entry_fun_captures, }, global_interned_names, } } pub fn private_funs(&self) -> impl Iterator)> { self.private_funs.iter() } pub fn entry_fun(&self) -> &AnalysedFun<'of> { &self.entry_fun } pub fn global_interned_names(&self) -> &BTreeMap, intern::InternedSym> { &self.global_interned_names } } ================================================ FILE: compiler/codegen/analysis/names.rs ================================================ use std::collections::{BTreeMap, BTreeSet, HashMap}; use std::rc::Rc; use arret_runtime::intern; use super::AnalysedFun; use crate::mir::ops; fn add_op_global_interned_names(names: &mut BTreeSet>, op: &ops::Op) { use ops::OpKind; match op.kind() { OpKind::ConstInternedSym(_, name) | OpKind::ConstBoxedSym(_, name) => { if intern::InternedSym::try_from_inline_name(name).is_none() { names.insert(name.clone()); } } OpKind::Cond(ops::CondOp { true_ops, false_ops, .. }) => { for op in true_ops.iter().rev().chain(false_ops.iter().rev()) { add_op_global_interned_names(names, op); } } _ => {} } } fn add_fun_global_interned_names(fun: &ops::Fun, names: &mut BTreeSet>) { for op in fun.ops.iter() { add_op_global_interned_names(names, op); } } /// Finds all global interned names in the program and returns them in sorted order pub fn calc_program_global_interned_names( private_funs: &HashMap>, entry_fun: &ops::Fun, ) -> BTreeMap, intern::InternedSym> { let mut names: BTreeSet> = BTreeSet::new(); for fun in private_funs .values() .map(|af| af.ops_fun) .chain(std::iter::once(entry_fun)) { add_fun_global_interned_names(fun, &mut names); } names .into_iter() .enumerate() .map(|(idx, name)| (name, intern::InternedSym::from_global_index(idx as u32))) .collect() } #[cfg(test)] mod test { use super::*; use arret_runtime::abitype::RetAbiType; use arret_runtime::boxed; use crate::source::EMPTY_SPAN; #[test] fn simple_global_interned_names() { let param_reg = ops::RegId::alloc(); let inline_reg = ops::RegId::alloc(); let alpha_reg = ops::RegId::alloc(); let beta_reg = ops::RegId::alloc(); let gamma_reg = ops::RegId::alloc(); let test_fun = ops::Fun { span: EMPTY_SPAN, source_name: None, abi: ops::OpsAbi { call_conv: ops::CallConv::FastCc, params: Box::new([boxed::TypeTag::Int.into()]), ret: RetAbiType::Void, }, param_regs: Box::new([]), ops: Box::new([ ops::OpKind::ConstBoxedSym(inline_reg, "inline".into()).into(), ops::OpKind::Cond(ops::CondOp { reg_phi: None, test_reg: param_reg, true_ops: Box::new([ops::OpKind::ConstBoxedSym( beta_reg, "beta NOT INLINE".into(), ) .into()]), false_ops: Box::new([ops::OpKind::ConstInternedSym( gamma_reg, "gamma NOT INLINE".into(), ) .into()]), }) .into(), ops::OpKind::ConstBoxedSym(alpha_reg, "alpha NOT INLINE".into()).into(), ]), }; let global_interned_names = calc_program_global_interned_names(&HashMap::new(), &test_fun); assert_eq!( [ ("alpha NOT INLINE", 0u32), ("beta NOT INLINE", 1u32), ("gamma NOT INLINE", 2u32) ] .iter() .map(|(name, idx)| ((*name).into(), intern::InternedSym::from_global_index(*idx))) .collect::, _>>(), global_interned_names ); } } ================================================ FILE: compiler/codegen/box_layout.rs ================================================ use llvm_sys::core::*; use llvm_sys::prelude::*; use arret_runtime::abitype::{BoxedAbiType, EncodeBoxedAbiType, TOP_LIST_BOXED_ABI_TYPE}; use arret_runtime::boxed; use arret_runtime::boxed::TypeTag; use crate::codegen::record_struct; use crate::codegen::target_gen::TargetCtx; use crate::codegen::GenAbi; /// Represents the runtime layout of a boxed data structure /// /// There are many boxed ABI types that can correspond to the same type name and layout. For /// example, all boxed pair types currently share a layout. #[derive(Clone, Hash, PartialEq, Eq)] pub enum BoxLayout { Any, Bool, Num, List, Union, ConstTagged(boxed::TypeTag), } impl BoxLayout { /// Returns a NULL terminated type name for the box layout /// /// This is used to make the LLVM IR more descriptive. Some boxes with identical layouts have /// distinct enum values and names for the purposes of making LLVM IR more readable. pub fn type_name(&self) -> &'static [u8] { match self { BoxLayout::Any => b"boxed_any\0", BoxLayout::Bool => b"boxed_bool\0", BoxLayout::Num => b"boxed_num\0", BoxLayout::List => b"boxed_list\0", BoxLayout::Union => b"boxed_union\0", BoxLayout::ConstTagged(TypeTag::Nil) => b"boxed_nil\0", BoxLayout::ConstTagged(TypeTag::True) => b"boxed_true\0", BoxLayout::ConstTagged(TypeTag::False) => b"boxed_false\0", BoxLayout::ConstTagged(TypeTag::Int) => b"boxed_int\0", BoxLayout::ConstTagged(TypeTag::Float) => b"boxed_float\0", BoxLayout::ConstTagged(TypeTag::Char) => b"boxed_char\0", BoxLayout::ConstTagged(TypeTag::Set) => b"boxed_set\0", BoxLayout::ConstTagged(TypeTag::Str) => b"boxed_str\0", BoxLayout::ConstTagged(TypeTag::Sym) => b"boxed_sym\0", BoxLayout::ConstTagged(TypeTag::FunThunk) => b"boxed_fun_thunk\0", BoxLayout::ConstTagged(TypeTag::Pair) => b"boxed_pair\0", BoxLayout::ConstTagged(TypeTag::Vector) => b"boxed_vector\0", BoxLayout::ConstTagged(TypeTag::Record) => b"boxed_record\0", BoxLayout::ConstTagged(TypeTag::Map) => b"boxed_map\0", } } /// Appends member types to the passed `Vec` /// /// This presumes `members` already contains the box header pub fn append_members(&self, tcx: &mut TargetCtx, members: &mut Vec) { unsafe { match self { BoxLayout::Any => { use std::mem; let llvm_byte = LLVMInt8TypeInContext(tcx.llx); let padding_bytes = mem::size_of::() - mem::size_of::(); members.push(LLVMArrayType(llvm_byte, padding_bytes as u32)); } BoxLayout::ConstTagged(TypeTag::Int) => { members.push(LLVMInt64TypeInContext(tcx.llx)); } BoxLayout::ConstTagged(TypeTag::Float) => { members.push(LLVMDoubleTypeInContext(tcx.llx)); } BoxLayout::ConstTagged(TypeTag::Char) => { members.push(LLVMInt32TypeInContext(tcx.llx)); } BoxLayout::ConstTagged(TypeTag::Str) => { members.push(LLVMInt8TypeInContext(tcx.llx)); } BoxLayout::ConstTagged(TypeTag::Sym) => { members.push(LLVMInt64TypeInContext(tcx.llx)); } BoxLayout::ConstTagged(TypeTag::FunThunk) => { members.extend_from_slice(&[ tcx.captures_llvm_type(), LLVMPointerType(tcx.fun_abi_to_llvm_type(&GenAbi::thunk_abi()), 0), ]); } BoxLayout::ConstTagged(TypeTag::Pair) => { let llvm_i64 = LLVMInt64TypeInContext(tcx.llx); let llvm_any_ptr = tcx.boxed_abi_to_llvm_ptr_type(&BoxedAbiType::Any); let llvm_any_list_ptr = tcx.boxed_abi_to_llvm_ptr_type(&TOP_LIST_BOXED_ABI_TYPE); members.extend_from_slice(&[llvm_i64, llvm_any_ptr, llvm_any_list_ptr]); } BoxLayout::ConstTagged(TypeTag::Record) => { record_struct::append_common_internal_members(tcx, members); } BoxLayout::List => { members.push(LLVMInt64TypeInContext(tcx.llx)); } BoxLayout::ConstTagged(TypeTag::Set) => { let llvm_i32 = LLVMInt32TypeInContext(tcx.llx); let llvm_any_ptr = tcx.boxed_abi_to_llvm_ptr_type(&BoxedAbiType::Any); members.extend_from_slice(&[llvm_i32, llvm_any_ptr, llvm_any_ptr, llvm_any_ptr]) } BoxLayout::ConstTagged(TypeTag::Vector) => { // inline_len members.push(LLVMInt32TypeInContext(tcx.llx)); } BoxLayout::ConstTagged(TypeTag::Nil) | BoxLayout::ConstTagged(TypeTag::True) | BoxLayout::ConstTagged(TypeTag::False) | BoxLayout::ConstTagged(TypeTag::Map) | BoxLayout::Bool | BoxLayout::Num | BoxLayout::Union => {} }; } } } impl From<&BoxedAbiType> for BoxLayout { fn from(boxed_abi_type: &BoxedAbiType) -> BoxLayout { match boxed_abi_type { BoxedAbiType::Any => BoxLayout::Any, BoxedAbiType::List(_) => BoxLayout::List, &boxed::Num::BOXED_ABI_TYPE => BoxLayout::Num, &boxed::Bool::BOXED_ABI_TYPE => BoxLayout::Bool, BoxedAbiType::Union(_, _) => BoxLayout::Union, BoxedAbiType::UniqueTagged(type_tag) => BoxLayout::ConstTagged(*type_tag), BoxedAbiType::Pair(_) => BoxLayout::ConstTagged(TypeTag::Pair), BoxedAbiType::Set(_) => BoxLayout::ConstTagged(TypeTag::Set), BoxedAbiType::Vector(_) => BoxLayout::ConstTagged(TypeTag::Vector), BoxedAbiType::Map(_, _) => BoxLayout::ConstTagged(TypeTag::Map), } } } ================================================ FILE: compiler/codegen/callee.rs ================================================ use std::ffi; use llvm_sys::core::*; use llvm_sys::prelude::*; use llvm_sys::{LLVMAttributeFunctionIndex, LLVMCallConv}; use crate::codegen::mod_gen::ModCtx; use crate::codegen::target_gen::TargetCtx; use crate::libcstr; use crate::mir::ops; pub fn gen_static_symbol_entry_point( tcx: &mut TargetCtx, mcx: &mut ModCtx<'_, '_, '_>, static_symbol: &ops::StaticSymbol, ) -> LLVMValueRef { use crate::codegen::analysis::escape::{infer_param_capture_kind, CaptureKind}; use arret_runtime::abitype::{AbiType, RetAbiType}; let ops::StaticSymbol { abi, impure, symbol, } = static_symbol; let function_type = tcx.fun_abi_to_llvm_type(abi); let function_name = ffi::CString::new(*symbol).unwrap(); unsafe { mcx.get_function_or_insert( function_type, function_name.as_bytes_with_nul(), |function| { let param_attr_offset = abi.takes_task as usize; for (index, param_abi_type) in abi.params.iter().enumerate() { if let AbiType::Boxed(_) = param_abi_type.abi_type { let no_capture = infer_param_capture_kind(&abi.ret, param_abi_type) == CaptureKind::Never; tcx.add_boxed_param_attrs( function, (param_attr_offset + index) as u32, no_capture, ) } } if !impure { let speculatable_attr = tcx.llvm_enum_attr_for_name("speculatable", 0); LLVMAddAttributeAtIndex( function, LLVMAttributeFunctionIndex, speculatable_attr, ); } match abi.ret { RetAbiType::Inhabited(AbiType::Boxed(_)) => { tcx.add_boxed_return_attrs(function); } RetAbiType::Never => { let noreturn_attr = tcx.llvm_enum_attr_for_name("noreturn", 0); LLVMAddAttributeAtIndex( function, LLVMAttributeFunctionIndex, noreturn_attr, ); } _ => {} } }, ) } } pub fn gen_boxed_fun_thunk_entry_point( builder: LLVMBuilderRef, llvm_fun_thunk: LLVMValueRef, ) -> LLVMValueRef { unsafe { let entry_ptr = LLVMBuildStructGEP(builder, llvm_fun_thunk, 2, libcstr!("fun_thunk_entry_ptr")); LLVMBuildLoad(builder, entry_ptr, libcstr!("fun_thunk_entry")) } } pub fn callee_takes_task(callee: &ops::Callee) -> bool { match callee { ops::Callee::BoxedFunThunk(_) => true, ops::Callee::PrivateFun(_) => true, ops::Callee::StaticSymbol(ops::StaticSymbol { abi, .. }) => abi.takes_task, } } pub fn callee_call_conv(mcx: &mut ModCtx<'_, '_, '_>, callee: &ops::Callee) -> u32 { match callee { ops::Callee::BoxedFunThunk(_) | ops::Callee::StaticSymbol(_) => { LLVMCallConv::LLVMCCallConv as u32 } ops::Callee::PrivateFun(private_fun_id) => unsafe { LLVMGetFunctionCallConv(mcx.llvm_private_fun(*private_fun_id)) }, } } ================================================ FILE: compiler/codegen/const_gen.rs ================================================ use std::rc::Rc; use std::{iter, mem}; use llvm_sys::core::*; use llvm_sys::prelude::*; use llvm_sys::{LLVMLinkage, LLVMUnnamedAddr}; use arret_runtime::boxed; use crate::codegen::mod_gen::ModCtx; use crate::codegen::record_struct; use crate::codegen::target_gen::TargetCtx; use crate::libcstr; use crate::mir::ops; pub fn annotate_private_global(llvm_global: LLVMValueRef) { unsafe { LLVMSetUnnamedAddress(llvm_global, LLVMUnnamedAddr::LLVMGlobalUnnamedAddr); LLVMSetGlobalConstant(llvm_global, 1); LLVMSetLinkage(llvm_global, LLVMLinkage::LLVMPrivateLinkage) } } pub fn gen_boxed_pair( tcx: &mut TargetCtx, mcx: &mut ModCtx<'_, '_, '_>, llvm_head: LLVMValueRef, llvm_rest: LLVMValueRef, llvm_list_len: LLVMValueRef, ) -> LLVMValueRef { unsafe { let type_tag = boxed::TypeTag::Pair; let llvm_type = tcx.boxed_abi_to_llvm_struct_type(&type_tag.into()); let members = &mut [ tcx.llvm_box_header(type_tag.to_const_header()), llvm_list_len, llvm_head, llvm_rest, ]; let llvm_value = LLVMConstNamedStruct(llvm_type, members.as_mut_ptr(), members.len() as u32); let global = LLVMAddGlobal(mcx.module, llvm_type, libcstr!("const_pair")); LLVMSetInitializer(global, llvm_value); LLVMSetAlignment(global, mem::align_of::() as u32); annotate_private_global(global); global } } fn gen_boxed_external_str( tcx: &mut TargetCtx, mcx: &mut ModCtx<'_, '_, '_>, value: &str, ) -> LLVMValueRef { unsafe { let llvm_i64 = LLVMInt64TypeInContext(tcx.llx); let shared_str_members = &mut [ // ref_count LLVMConstInt(llvm_i64, std::u64::MAX, 0), // len LLVMConstInt(llvm_i64, value.len() as u64, 0), // data LLVMConstStringInContext(tcx.llx, value.as_ptr() as *mut _, value.len() as u32, 1), ]; let shared_str_llvm_value = LLVMConstStructInContext( tcx.llx, shared_str_members.as_mut_ptr(), shared_str_members.len() as u32, 0, ); let shared_str_global = LLVMAddGlobal( mcx.module, LLVMTypeOf(shared_str_llvm_value), libcstr!("shared_str"), ); LLVMSetInitializer(shared_str_global, shared_str_llvm_value); annotate_private_global(shared_str_global); let type_tag = boxed::TypeTag::Str; let external_llvm_type = tcx.boxed_external_str_llvm_type(); let llvm_i8 = LLVMInt8TypeInContext(tcx.llx); let external_members = &mut [ tcx.llvm_box_header(type_tag.to_const_header()), LLVMConstInt(llvm_i8, boxed::Str::EXTERNAL_INLINE_BYTE_LEN as u64, 0), LLVMConstBitCast( shared_str_global, LLVMPointerType(tcx.shared_str_llvm_type(), 0), ), ]; let external_llvm_value = LLVMConstNamedStruct( external_llvm_type, external_members.as_mut_ptr(), external_members.len() as u32, ); let global = LLVMAddGlobal(mcx.module, external_llvm_type, libcstr!("const_str")); LLVMSetInitializer(global, external_llvm_value); LLVMSetAlignment(global, mem::align_of::() as u32); annotate_private_global(global); LLVMConstBitCast(global, tcx.boxed_abi_to_llvm_ptr_type(&type_tag.into())) } } fn gen_boxed_inline_str( tcx: &mut TargetCtx, mcx: &mut ModCtx<'_, '_, '_>, value: &str, ) -> LLVMValueRef { unsafe { const MAX_INLINE_BYTES: usize = boxed::Str::MAX_INLINE_BYTES; let mut inline_buffer: [u8; MAX_INLINE_BYTES] = [0; MAX_INLINE_BYTES]; inline_buffer[0..value.len()].copy_from_slice(value.as_bytes()); let type_tag = boxed::TypeTag::Str; let inline_llvm_type = tcx.boxed_inline_str_llvm_type(); let llvm_i8 = LLVMInt8TypeInContext(tcx.llx); let members = &mut [ tcx.llvm_box_header(type_tag.to_const_header()), LLVMConstInt(llvm_i8, value.len() as u64, 0), LLVMConstStringInContext( tcx.llx, inline_buffer.as_mut_ptr() as *mut _, MAX_INLINE_BYTES as u32, 1, ), ]; let inline_llvm_value = LLVMConstNamedStruct(inline_llvm_type, members.as_mut_ptr(), members.len() as u32); let global = LLVMAddGlobal(mcx.module, inline_llvm_type, libcstr!("const_str")); LLVMSetInitializer(global, inline_llvm_value); LLVMSetAlignment(global, mem::align_of::() as u32); annotate_private_global(global); LLVMConstBitCast(global, tcx.boxed_abi_to_llvm_ptr_type(&type_tag.into())) } } pub fn gen_boxed_str( tcx: &mut TargetCtx, mcx: &mut ModCtx<'_, '_, '_>, value: &str, ) -> LLVMValueRef { match boxed::Str::storage_for_byte_len(value.len()) { boxed::StrStorage::Inline(_) => gen_boxed_inline_str(tcx, mcx, value), boxed::StrStorage::External => gen_boxed_external_str(tcx, mcx, value), } } pub fn gen_boxed_sym( tcx: &mut TargetCtx, mcx: &mut ModCtx<'_, '_, '_>, value: &str, ) -> LLVMValueRef { let interned_sym = mcx.intern_name(value); unsafe { let type_tag = boxed::TypeTag::Sym; let boxed_llvm_type = tcx.boxed_abi_to_llvm_struct_type(&type_tag.into()); let llvm_i64 = LLVMInt64TypeInContext(tcx.llx); let members = &mut [ tcx.llvm_box_header(type_tag.to_const_header()), LLVMConstInt(llvm_i64, interned_sym.to_raw_u64(), 0), ]; let boxed_llvm_value = LLVMConstNamedStruct(boxed_llvm_type, members.as_mut_ptr(), members.len() as u32); let global = LLVMAddGlobal(mcx.module, boxed_llvm_type, libcstr!("const_sym")); LLVMSetInitializer(global, boxed_llvm_value); LLVMSetAlignment(global, mem::align_of::() as u32); annotate_private_global(global); global } } /// Generates a table of global interned names /// /// `names` must be pre-sorted pub fn gen_global_interned_names<'a>( tcx: &mut TargetCtx, llvm_module: LLVMModuleRef, names: impl ExactSizeIterator>, ) -> LLVMValueRef { unsafe { let names_len = names.len(); if names_len == 0 { return LLVMConstPointerNull(LLVMPointerType(tcx.global_interned_name_llvm_type(), 0)); } let llvm_i32 = LLVMInt32TypeInContext(tcx.llx); let llvm_i64 = LLVMInt64TypeInContext(tcx.llx); let global_interned_name_llvm_type = tcx.global_interned_name_llvm_type(); let first_element_gep_indices = &mut [LLVMConstInt(llvm_i32, 0, 0), LLVMConstInt(llvm_i32, 0, 0)]; let mut llvm_names: Vec = names .map(|name| { let llvm_name_string = LLVMConstStringInContext( tcx.llx, name.as_bytes().as_ptr() as *mut _, name.len() as u32, 1, ); let name_global_name = format!("global_interned_name_{}\0", name); let llvm_name_global = LLVMAddGlobal( llvm_module, LLVMTypeOf(llvm_name_string), name_global_name.as_ptr() as *const _, ); LLVMSetInitializer(llvm_name_global, llvm_name_string); annotate_private_global(llvm_name_global); let llvm_name_string_ptr = LLVMConstGEP( llvm_name_global, first_element_gep_indices.as_mut_ptr(), first_element_gep_indices.len() as u32, ); let llvm_name_members = &mut [ LLVMConstInt(llvm_i64, name.len() as u64, 0), llvm_name_string_ptr, ]; LLVMConstNamedStruct( global_interned_name_llvm_type, llvm_name_members.as_mut_ptr(), llvm_name_members.len() as u32, ) }) .collect(); let llvm_names_array = LLVMConstArray( global_interned_name_llvm_type, llvm_names.as_mut_ptr(), llvm_names.len() as u32, ); let global_names_members = &mut [ // len LLVMConstInt(llvm_i32, names_len as u64, 0), // names llvm_names_array, ]; let llvm_global_names = LLVMConstStructInContext( tcx.llx, global_names_members.as_mut_ptr(), global_names_members.len() as u32, 0, ); let global = LLVMAddGlobal( llvm_module, LLVMTypeOf(llvm_global_names), libcstr!("global_interned_names"), ); LLVMSetInitializer(global, llvm_global_names); annotate_private_global(global); global } } pub fn gen_boxed_int( tcx: &mut TargetCtx, mcx: &mut ModCtx<'_, '_, '_>, value: i64, ) -> LLVMValueRef { unsafe { let type_tag = boxed::TypeTag::Int; let llvm_type = tcx.boxed_abi_to_llvm_struct_type(&type_tag.into()); let llvm_i64 = LLVMInt64TypeInContext(tcx.llx); let box_name = format!("const_int_{}\0", value); let global = mcx.get_global_or_insert(llvm_type, box_name.as_bytes(), || { let members = &mut [ tcx.llvm_box_header(type_tag.to_const_header()), LLVMConstInt(llvm_i64, value as u64, 1), ]; LLVMConstNamedStruct(llvm_type, members.as_mut_ptr(), members.len() as u32) }); LLVMSetAlignment(global, mem::align_of::() as u32); annotate_private_global(global); global } } pub fn gen_boxed_float( tcx: &mut TargetCtx, mcx: &mut ModCtx<'_, '_, '_>, value: f64, ) -> LLVMValueRef { unsafe { let type_tag = boxed::TypeTag::Float; let llvm_type = tcx.boxed_abi_to_llvm_struct_type(&type_tag.into()); let llvm_double = LLVMDoubleTypeInContext(tcx.llx); let members = &mut [ tcx.llvm_box_header(type_tag.to_const_header()), LLVMConstReal(llvm_double, value), ]; let llvm_value = LLVMConstNamedStruct(llvm_type, members.as_mut_ptr(), members.len() as u32); let global = LLVMAddGlobal(mcx.module, llvm_type, libcstr!("const_float")); LLVMSetInitializer(global, llvm_value); LLVMSetAlignment(global, mem::align_of::() as u32); annotate_private_global(global); global } } pub fn gen_boxed_char( tcx: &mut TargetCtx, mcx: &mut ModCtx<'_, '_, '_>, value: char, ) -> LLVMValueRef { unsafe { let type_tag = boxed::TypeTag::Char; let llvm_type = tcx.boxed_abi_to_llvm_struct_type(&type_tag.into()); let llvm_i32 = LLVMInt32TypeInContext(tcx.llx); let box_name = format!("const_char_{}\0", value); let global = mcx.get_global_or_insert(llvm_type, box_name.as_bytes(), || { let members = &mut [ tcx.llvm_box_header(type_tag.to_const_header()), LLVMConstInt(llvm_i32, value as u64, 1), ]; LLVMConstNamedStruct(llvm_type, members.as_mut_ptr(), members.len() as u32) }); LLVMSetAlignment(global, mem::align_of::() as u32); annotate_private_global(global); global } } pub fn gen_boxed_nil(tcx: &mut TargetCtx, mcx: &mut ModCtx<'_, '_, '_>) -> LLVMValueRef { tcx.ptr_to_singleton_box(mcx.module, boxed::TypeTag::Nil, b"ARRET_NIL\0") } pub fn gen_boxed_fun_thunk( tcx: &mut TargetCtx, mcx: &mut ModCtx<'_, '_, '_>, llvm_captures: LLVMValueRef, llvm_entry_point: LLVMValueRef, ) -> LLVMValueRef { unsafe { let type_tag = boxed::TypeTag::FunThunk; let llvm_type = tcx.boxed_abi_to_llvm_struct_type(&type_tag.into()); let members = &mut [ tcx.llvm_box_header(type_tag.to_const_header()), llvm_captures, llvm_entry_point, ]; let llvm_value = LLVMConstNamedStruct(llvm_type, members.as_mut_ptr(), members.len() as u32); let global = LLVMAddGlobal(mcx.module, llvm_type, libcstr!("const_fun_thunk")); LLVMSetInitializer(global, llvm_value); LLVMSetAlignment(global, mem::align_of::() as u32); annotate_private_global(global); global } } pub fn gen_boxed_record( tcx: &mut TargetCtx, mcx: &mut ModCtx<'_, '_, '_>, record_struct: &ops::RecordStructId, llvm_fields: &[LLVMValueRef], ) -> LLVMValueRef { let type_tag = boxed::TypeTag::Record; let record_class_id = mcx.record_class_id_for_struct(record_struct); let record_struct::TargetRecordStruct { data_layout, record_storage, llvm_data_type, .. } = *tcx.target_record_struct(record_struct); let llvm_box_type = tcx.record_struct_llvm_box_type(record_struct); unsafe { let box_name = format!("const_{}\0", record_struct.source_name); let llvm_data_struct = LLVMConstNamedStruct( llvm_data_type, llvm_fields.as_ptr() as *mut _, llvm_fields.len() as u32, ); let llvm_box_header = tcx.llvm_box_header(type_tag.to_const_header()); // Constant records by definition cannot have GC refs let llvm_i8 = LLVMInt8TypeInContext(tcx.llx); let llvm_has_gc_refs = LLVMConstInt(llvm_i8, 0, 1); let llvm_record_class_id = LLVMConstInt( tcx.record_class_id_llvm_type(), u64::from(record_class_id), 1, ); let llvm_box_value = if let boxed::RecordStorage::Inline(_) = record_storage { let llvm_inline_byte_len = LLVMConstInt( llvm_i8, match data_layout { Some(data_layout) => data_layout.size() as u64, None => 0, }, 1, ); let inline_box_members = &mut [ llvm_box_header, llvm_inline_byte_len, llvm_has_gc_refs, llvm_record_class_id, llvm_data_struct, ]; LLVMConstNamedStruct( llvm_box_type, inline_box_members.as_mut_ptr(), inline_box_members.len() as u32, ) } else { // Create a global containing our data and return a pointer to it let data_global_name = format!("const_{}_data\0", record_struct.source_name); let data_global = LLVMAddGlobal( mcx.module, llvm_data_type, data_global_name.as_ptr() as *const _, ); LLVMSetInitializer(data_global, llvm_data_struct); annotate_private_global(data_global); let llvm_i64 = LLVMInt64TypeInContext(tcx.llx); let external_box_members = &mut [ llvm_box_header, LLVMConstInt(llvm_i8, boxed::Record::EXTERNAL_INLINE_LEN as u64, 1), llvm_has_gc_refs, llvm_record_class_id, data_global, LLVMConstInt(llvm_i64, 0, 0), ]; LLVMConstNamedStruct( llvm_box_type, external_box_members.as_mut_ptr(), external_box_members.len() as u32, ) }; let global = LLVMAddGlobal(mcx.module, llvm_box_type, box_name.as_ptr() as *const _); LLVMSetInitializer(global, llvm_box_value); LLVMSetAlignment(global, mem::align_of::() as u32); annotate_private_global(global); LLVMConstBitCast(global, tcx.boxed_abi_to_llvm_ptr_type(&type_tag.into())) } } fn gen_persistent_vector_leaf( tcx: &mut TargetCtx, mcx: &mut ModCtx<'_, '_, '_>, llvm_elements: &[LLVMValueRef], ) -> LLVMValueRef { use arret_runtime::abitype::BoxedAbiType; use arret_runtime::persistent::vector::GLOBAL_CONSTANT_REFCOUNT; use arret_runtime::persistent::vector::NODE_SIZE; unsafe { let llvm_type = tcx.persistent_vector_leaf_llvm_type(); let llvm_any_ptr = tcx.boxed_abi_to_llvm_ptr_type(&BoxedAbiType::Any); let llvm_i64 = LLVMInt64TypeInContext(tcx.llx); let mut padded_llvm_elements: Vec = llvm_elements .iter() .copied() .chain(iter::repeat(LLVMGetUndef(llvm_any_ptr)).take(NODE_SIZE - llvm_elements.len())) .collect(); let mut members = vec![ LLVMConstInt(llvm_i64, GLOBAL_CONSTANT_REFCOUNT as u64, 0), LLVMConstArray( llvm_any_ptr, padded_llvm_elements.as_mut_ptr(), padded_llvm_elements.len() as u32, ), ]; let global = LLVMAddGlobal(mcx.module, llvm_type, libcstr!("const_vector_leaf")); let llvm_value = LLVMConstNamedStruct(llvm_type, members.as_mut_ptr(), members.len() as u32); LLVMSetInitializer(global, llvm_value); annotate_private_global(global); global } } fn gen_boxed_external_vector( tcx: &mut TargetCtx, mcx: &mut ModCtx<'_, '_, '_>, llvm_elements_iter: impl ExactSizeIterator, ) -> LLVMValueRef { use arret_runtime::persistent::vector::NODE_SIZE; let llvm_elements: Vec = llvm_elements_iter.collect(); unsafe { let type_tag = boxed::TypeTag::Vector; let llvm_type = tcx.boxed_external_vector_llvm_type(); let llvm_i32 = LLVMInt32TypeInContext(tcx.llx); let llvm_i64 = LLVMInt64TypeInContext(tcx.llx); let llvm_persistent_vector_leaf_ptr = LLVMPointerType(tcx.persistent_vector_leaf_llvm_type(), 0); let (root_ptr, tail_ptr) = if llvm_elements.len() > NODE_SIZE { // Need a root a tail ( gen_persistent_vector_leaf(tcx, mcx, &llvm_elements[0..NODE_SIZE]), gen_persistent_vector_leaf(tcx, mcx, &llvm_elements[NODE_SIZE..]), ) } else { // Need just the tail ( LLVMConstPointerNull(llvm_persistent_vector_leaf_ptr), gen_persistent_vector_leaf(tcx, mcx, &llvm_elements), ) }; let mut members = [ tcx.llvm_box_header(type_tag.to_const_header()), LLVMConstInt( llvm_i32, boxed::Vector::::EXTERNAL_INLINE_LEN as u64, 0, ), LLVMConstInt(llvm_i64, llvm_elements.len() as u64, 0), root_ptr, tail_ptr, ]; let llvm_value = LLVMConstNamedStruct(llvm_type, members.as_mut_ptr(), members.len() as u32); let global = LLVMAddGlobal(mcx.module, llvm_type, libcstr!("const_vector")); LLVMSetInitializer(global, llvm_value); LLVMSetAlignment(global, mem::align_of::() as u32); annotate_private_global(global); global } } fn gen_boxed_inline_vector( tcx: &mut TargetCtx, mcx: &mut ModCtx<'_, '_, '_>, llvm_elements: impl ExactSizeIterator, ) -> LLVMValueRef { use arret_runtime::abitype::BoxedAbiType; let elements_len = llvm_elements.len(); unsafe { let type_tag = boxed::TypeTag::Vector; let llvm_type = tcx.boxed_inline_vector_llvm_type(); let llvm_i32 = LLVMInt32TypeInContext(tcx.llx); let llvm_any_ptr = tcx.boxed_abi_to_llvm_ptr_type(&BoxedAbiType::Any); let mut members: Vec = vec![ tcx.llvm_box_header(type_tag.to_const_header()), LLVMConstInt(llvm_i32, elements_len as u64, 0), ]; members.extend( llvm_elements.chain( iter::repeat(LLVMGetUndef(llvm_any_ptr)) .take(boxed::Vector::::MAX_INLINE_LEN - elements_len), ), ); let llvm_value = LLVMConstNamedStruct(llvm_type, members.as_mut_ptr(), members.len() as u32); let global = LLVMAddGlobal(mcx.module, llvm_type, libcstr!("const_vector")); LLVMSetInitializer(global, llvm_value); LLVMSetAlignment(global, mem::align_of::() as u32); annotate_private_global(global); global } } pub fn gen_boxed_vector( tcx: &mut TargetCtx, mcx: &mut ModCtx<'_, '_, '_>, llvm_elements: impl ExactSizeIterator, ) -> LLVMValueRef { use arret_runtime::persistent::vector::NODE_SIZE; let elements_len = llvm_elements.len(); if elements_len <= boxed::Vector::::MAX_INLINE_LEN { gen_boxed_inline_vector(tcx, mcx, llvm_elements) } else if elements_len <= (NODE_SIZE * 2) { gen_boxed_external_vector(tcx, mcx, llvm_elements) } else { todo!("generating constant vector of length {}", elements_len); } } pub fn gen_boxed_set( tcx: &mut TargetCtx, mcx: &mut ModCtx<'_, '_, '_>, llvm_elements: impl ExactSizeIterator, ) -> LLVMValueRef { use arret_runtime::abitype::BoxedAbiType; let elements_len = llvm_elements.len(); if elements_len > boxed::Set::::MAX_INLINE_LEN { todo!("generating constant set of length {}", elements_len); } unsafe { let type_tag = boxed::TypeTag::Set; let llvm_type = tcx.boxed_abi_to_llvm_struct_type(&type_tag.into()); let llvm_i32 = LLVMInt32TypeInContext(tcx.llx); let llvm_any_ptr = tcx.boxed_abi_to_llvm_ptr_type(&BoxedAbiType::Any); let mut members: Vec = vec![ tcx.llvm_box_header(type_tag.to_const_header()), LLVMConstInt(llvm_i32, elements_len as u64, 0), ]; members.extend( llvm_elements.chain( iter::repeat(LLVMGetUndef(llvm_any_ptr)) .take(boxed::Set::::MAX_INLINE_LEN - elements_len), ), ); let llvm_value = LLVMConstNamedStruct(llvm_type, members.as_mut_ptr(), members.len() as u32); let global = LLVMAddGlobal(mcx.module, llvm_type, libcstr!("const_set")); LLVMSetInitializer(global, llvm_value); LLVMSetAlignment(global, mem::align_of::() as u32); annotate_private_global(global); global } } pub fn gen_boxed_map( tcx: &mut TargetCtx, mcx: &mut ModCtx<'_, '_, '_>, llvm_elements: impl ExactSizeIterator, ) -> LLVMValueRef { if llvm_elements.len() > 0 { todo!("generating non-empty map"); } unsafe { let type_tag = boxed::TypeTag::Map; let llvm_type = tcx.boxed_abi_to_llvm_struct_type(&type_tag.into()); let mut members: Vec = vec![tcx.llvm_box_header(type_tag.to_const_header())]; let llvm_value = LLVMConstNamedStruct(llvm_type, members.as_mut_ptr(), members.len() as u32); let global = LLVMAddGlobal(mcx.module, llvm_type, libcstr!("const_map")); LLVMSetInitializer(global, llvm_value); LLVMSetAlignment(global, mem::align_of::() as u32); annotate_private_global(global); global } } ================================================ FILE: compiler/codegen/debug_info.rs ================================================ use std::collections::HashMap; use std::os::unix::ffi::OsStrExt; use std::{env, ffi, ptr}; use codespan_reporting::files::Files as _; use llvm_sys::core::*; use llvm_sys::debuginfo::*; use llvm_sys::prelude::*; use arret_syntax::datum::DataStr; use arret_syntax::span::{FileId, Span}; use crate::source::SourceLoader; pub struct DebugInfoBuilder<'sl> { pub llvm_dib: LLVMDIBuilderRef, source_loader: &'sl SourceLoader, current_dir: ffi::OsString, file_metadata: HashMap, } impl<'sl> DebugInfoBuilder<'sl> { pub fn new( source_loader: &'sl SourceLoader, optimised: bool, main_span: Span, module: LLVMModuleRef, ) -> DebugInfoBuilder<'sl> { // This is needed for all of our file metadata so the debugger can resolve relative paths let current_dir = env::current_dir() .ok() .map(|current_dir| current_dir.as_os_str().to_owned()) .unwrap_or_else(ffi::OsString::new); let llvm_dib = unsafe { LLVMCreateDIBuilderDisallowUnresolved(module) }; let mut di_builder = DebugInfoBuilder { llvm_dib, source_loader, current_dir, file_metadata: HashMap::new(), }; di_builder.add_compile_unit_metadata(optimised, main_span); di_builder } fn add_compile_unit_metadata(&mut self, optimised: bool, main_span: Span) { let main_file_id = if let Some(file_id) = main_span.file_id() { file_id } else { return; }; let main_file_metadata = self.file_metadata(main_file_id); let producer = b"arret\0"; unsafe { // This is implicitly added to the LLVM module LLVMDIBuilderCreateCompileUnit( self.llvm_dib, LLVMDWARFSourceLanguage::LLVMDWARFSourceLanguageC, main_file_metadata, producer.as_ptr() as *const _, producer.len(), optimised as i32, // `isOptimized` ptr::null(), // `Flags` 0, // `FlagsLen` 0, // `RuntimeVer` ptr::null(), // `SplitName` 0, // `SplitNameLen` LLVMDWARFEmissionKind::LLVMDWARFEmissionKindFull, // `LLVMDWARFEmissionKind::LLVMDWARFEmissionKindLineTablesOnly`, 0, // `DWOId` 0, // `SplitDebugInlining` 0, // `DebugInfoForProfiling` ); } } pub fn file_metadata(&mut self, file_id: FileId) -> LLVMMetadataRef { if let Some(metadata) = self.file_metadata.get(&file_id) { return *metadata; } let filename = self.source_loader.files().name(file_id).unwrap(); let metadata = unsafe { LLVMDIBuilderCreateFile( self.llvm_dib, filename.as_ptr() as *const _, filename.len(), self.current_dir.as_bytes().as_ptr() as *const _, self.current_dir.as_bytes().len(), ) }; self.file_metadata.insert(file_id, metadata); metadata } /// Returns a subroutine type containing no parameters pub fn placeholder_subroutine_type( &mut self, file_metadata: LLVMMetadataRef, ) -> LLVMMetadataRef { // This includes no parameter types unsafe { LLVMDIBuilderCreateSubroutineType( self.llvm_dib, file_metadata, ptr::null_mut(), 0, LLVMDIFlagZero, ) } } pub fn add_function_debug_info( &mut self, span: Span, source_name: Option<&DataStr>, llvm_function: LLVMValueRef, ) { let file_id = if let Some(file_id) = span.file_id() { file_id } else { return; }; let location = if let Ok(location) = self .source_loader .files() .location(file_id, span.start() as usize) { location } else { return; }; let line_index = location.line_number - 1; let file_metadata = self.file_metadata(file_id); unsafe { let mut linkage_name_len: usize = 0; let linkage_name_ptr = LLVMGetValueName2(llvm_function, &mut linkage_name_len); let function_metadata = LLVMDIBuilderCreateFunction( self.llvm_dib, file_metadata, // `Scope` source_name .map(|source_name| source_name.as_ptr() as *const _) .unwrap_or(linkage_name_ptr), source_name .as_ref() .map(|source_name| source_name.len()) .unwrap_or(linkage_name_len), linkage_name_ptr, linkage_name_len, file_metadata, line_index as u32, self.placeholder_subroutine_type(file_metadata), source_name.is_none() as i32, // `IsLocalToUnit` 1, // `IsDefinition` line_index as u32, // `ScopeLine` LLVMDIFlagZero, 1, // `IsOptimized` ); LLVMSetSubprogram(llvm_function, function_metadata); } } pub fn finalise(&mut self) { unsafe { LLVMDIBuilderFinalize(self.llvm_dib); } } } impl Drop for DebugInfoBuilder<'_> { fn drop(&mut self) { unsafe { LLVMDisposeDIBuilder(self.llvm_dib) } } } ================================================ FILE: compiler/codegen/fun_gen.rs ================================================ use std::collections::HashMap; use std::ffi; use llvm_sys::core::*; use llvm_sys::prelude::*; use llvm_sys::LLVMCallConv; use crate::mir::ops; use crate::codegen::analysis::escape::{CaptureKind, Captures}; use crate::codegen::mod_gen::ModCtx; use crate::codegen::target_gen::TargetCtx; use crate::codegen::GenAbi; use crate::libcstr; pub(crate) struct FunCtx { pub regs: HashMap, pub function: LLVMValueRef, pub builder: LLVMBuilderRef, pub current_task: LLVMValueRef, } impl FunCtx { pub(crate) fn new( function: LLVMValueRef, builder: LLVMBuilderRef, current_task: LLVMValueRef, ) -> FunCtx { FunCtx { regs: HashMap::new(), function, builder, current_task, } } } impl Drop for FunCtx { fn drop(&mut self) { unsafe { LLVMDisposeBuilder(self.builder); } } } pub(crate) fn declare_fun( tcx: &mut TargetCtx, llvm_module: LLVMModuleRef, fun: &ops::Fun, ) -> LLVMValueRef { let gen_abi: GenAbi = (&fun.abi).into(); let function_type = tcx.fun_abi_to_llvm_type(&gen_abi); let fun_symbol = fun .source_name .as_ref() .map(|source_name| ffi::CString::new(source_name.as_bytes()).unwrap()) .unwrap_or_else(|| ffi::CString::new("anon_fun").unwrap()); unsafe { let llvm_fun = LLVMAddFunction(llvm_module, fun_symbol.as_ptr() as *const _, function_type); let llvm_call_conv = match fun.abi.call_conv { ops::CallConv::Ccc => LLVMCallConv::LLVMCCallConv, ops::CallConv::FastCc => LLVMCallConv::LLVMFastCallConv, }; LLVMSetFunctionCallConv(llvm_fun, llvm_call_conv as u32); llvm_fun } } pub(crate) fn define_fun( tcx: &mut TargetCtx, mcx: &mut ModCtx<'_, '_, '_>, fun: &ops::Fun, captures: &Captures, llvm_fun: LLVMValueRef, ) { use crate::codegen::alloc::plan::plan_allocs; use crate::codegen::op_gen; use arret_runtime::abitype::{AbiType, RetAbiType}; let alloc_plan = plan_allocs(tcx, captures, &fun.ops); unsafe { let builder = LLVMCreateBuilderInContext(tcx.llx); let bb = LLVMAppendBasicBlockInContext(tcx.llx, llvm_fun, libcstr!("entry")); LLVMPositionBuilderAtEnd(builder, bb); let mut fcx = FunCtx::new(llvm_fun, builder, LLVMGetParam(llvm_fun, 0)); fcx.regs.reserve(fun.param_regs.len()); for (param_index, (reg, param_abi_type)) in fun.param_regs.iter().zip(fun.abi.params.iter()).enumerate() { // Our implicit task param shifts our params by 1 let llvm_offset = (1 + param_index) as u32; fcx.regs.insert(*reg, LLVMGetParam(llvm_fun, llvm_offset)); if let AbiType::Boxed(_) = param_abi_type { let no_capture = captures.get(*reg) == CaptureKind::Never; tcx.add_boxed_param_attrs(llvm_fun, llvm_offset, no_capture); } } if let RetAbiType::Inhabited(AbiType::Boxed(_)) = fun.abi.ret { tcx.add_boxed_return_attrs(llvm_fun); } for alloc_atom in alloc_plan { op_gen::gen_alloc_atom(tcx, mcx, &mut fcx, alloc_atom); } mcx.optimise_function(llvm_fun); } } ================================================ FILE: compiler/codegen/jit.rs ================================================ use std::collections::HashMap; use std::{alloc, env, ffi, io, ptr}; use llvm_sys::core::*; use llvm_sys::execution_engine::*; use llvm_sys::orc::*; use llvm_sys::target_machine::*; use arret_runtime::boxed; use arret_runtime::class_map::ClassMap; use arret_runtime::intern::Interner; use crate::codegen::analysis::AnalysedMod; use crate::codegen::mod_gen::{gen_mod, GeneratedMod}; use crate::codegen::target_gen::TargetCtx; use crate::mir::ops; use crate::mir::printer::print_fun; extern "C" fn orc_sym_resolve(name_ptr: *const libc::c_char, jcx_void: *mut libc::c_void) -> u64 { unsafe { let jcx: &JitCtx = &*(jcx_void as *mut _); let name = ffi::CStr::from_ptr(name_ptr); jcx.symbols .get(name) .cloned() .unwrap_or_else(|| panic!("unable to lookup symbol {:?}", name)) } } pub struct JitCtx { tcx: TargetCtx, orc: LLVMOrcJITStackRef, target_machine: LLVMTargetMachineRef, symbols: HashMap, record_struct_class_ids: HashMap, module_counter: usize, } pub struct RegisteredRecordStruct { /// Allocation layout of the record's data pub data_layout: Option, /// Record class ID that was dynamically registered in the class map pub record_class_id: boxed::RecordClassId, } impl JitCtx { pub fn new(optimising: bool) -> JitCtx { #[allow(clippy::fn_to_numeric_cast)] unsafe { use crate::codegen::target_machine::create_target_machine; use arret_runtime::compiler_support; LLVMLinkInMCJIT(); let target_machine = create_target_machine( None, // Can't cross compile in the JIT LLVMRelocMode::LLVMRelocDefault, LLVMCodeModel::LLVMCodeModelJITDefault, ); let orc = LLVMOrcCreateInstance(target_machine); let mut jcx = JitCtx { tcx: TargetCtx::new(target_machine, optimising), orc, target_machine, symbols: HashMap::new(), record_struct_class_ids: HashMap::new(), module_counter: 0, }; jcx.add_symbol(b"ARRET_TRUE\0", &boxed::TRUE_INSTANCE as *const _ as u64); jcx.add_symbol(b"ARRET_FALSE\0", &boxed::FALSE_INSTANCE as *const _ as u64); jcx.add_symbol(b"ARRET_NIL\0", &boxed::NIL_INSTANCE as *const _ as u64); jcx.add_symbol( b"arret_runtime_alloc_cells\0", compiler_support::alloc_cells as u64, ); jcx.add_symbol( b"arret_runtime_alloc_record_data\0", compiler_support::alloc_record_data as u64, ); jcx.add_symbol(b"arret_runtime_equals\0", compiler_support::equals as u64); jcx.add_symbol( b"arret_runtime_panic_with_string\0", compiler_support::panic_with_string as u64, ); jcx } } pub fn compile_fun( &mut self, private_funs: &HashMap, interner: &mut Interner, fun: &ops::Fun, ) -> u64 { if env::var_os("ARRET_DUMP_MIR").is_some() { print_fun(&mut io::stdout().lock(), private_funs, fun, None).unwrap(); } let tcx = &mut self.tcx; self.module_counter += 1; let module_counter = self.module_counter; let module_name = fun .source_name .as_ref() .map(|source_name| format!("JIT Module #{} for `{}`\0", module_counter, source_name)) .unwrap_or_else(|| format!("Anonymous JIT Module #{}\0", module_counter)); // Create the module let analysed_mod = AnalysedMod::new(private_funs, fun); unsafe { // Generate our Arret funs let GeneratedMod { llvm_module, llvm_entry_fun, .. } = gen_mod( tcx, module_name.as_bytes(), &analysed_mod, Some(interner), self.record_struct_class_ids.clone(), None, ); // We need to take ownership before we transfer the module to ORC let mut function_name_len: usize = 0; let function_name_ptr = LLVMGetValueName2(llvm_entry_fun, &mut function_name_len); let function_name = ffi::CStr::from_ptr(function_name_ptr).to_owned(); tcx.finish_module(llvm_module); let mut orc_module: LLVMOrcModuleHandle = 0; if !LLVMOrcAddEagerlyCompiledIR( self.orc, &mut orc_module, llvm_module, Some(orc_sym_resolve), self as *mut JitCtx as *mut _, ) .is_null() { panic!("Unable to add module"); } let mut target_address: LLVMOrcTargetAddress = 0; if !LLVMOrcGetSymbolAddressIn( self.orc, &mut target_address, orc_module, function_name.as_ptr() as *const _, ) .is_null() { panic!("Unable to get symbol address") } target_address } } pub fn add_symbol(&mut self, unmangled_name: &[u8], address: u64) { unsafe { let mut mangled_pointer: *mut libc::c_char = ptr::null_mut(); LLVMOrcGetMangledSymbol( self.orc, &mut mangled_pointer, unmangled_name.as_ptr() as *const _, ); let mangled_string = ffi::CStr::from_ptr(mangled_pointer); self.symbols.insert(mangled_string.to_owned(), address); LLVMOrcDisposeMangledSymbol(mangled_pointer); } } pub fn register_record_struct( &mut self, record_struct: &ops::RecordStructId, class_map: &mut ClassMap, ) -> RegisteredRecordStruct { let target_record_struct = self.tcx.target_record_struct(record_struct); let record_class_id = class_map.push_dynamic_class(target_record_struct.classmap_class.clone()); self.record_struct_class_ids .insert(record_struct.clone(), record_class_id); RegisteredRecordStruct { data_layout: target_record_struct.data_layout, record_class_id, } } } impl Drop for JitCtx { fn drop(&mut self) { unsafe { LLVMDisposeTargetMachine(self.target_machine); LLVMOrcDisposeInstance(self.orc); } } } ================================================ FILE: compiler/codegen/libcstr.rs ================================================ /// Builds a static NULL terminated `*const libc::c_char` with the given contents #[macro_export] macro_rules! libcstr { ($s:expr) => { concat!($s, "\0").as_ptr() as *const libc::c_char }; } ================================================ FILE: compiler/codegen/math_gen.rs ================================================ use llvm_sys::core::*; use llvm_sys::prelude::*; use llvm_sys::LLVMIntPredicate; use crate::codegen::fun_gen::FunCtx; use crate::codegen::mod_gen::ModCtx; use crate::codegen::panic_gen::gen_panic; use crate::codegen::target_gen::TargetCtx; use crate::libcstr; pub struct CheckedIntOp { math_intrinsic_name: &'static [u8], result_name: &'static [u8], panic_message: &'static str, } pub const CHECKED_ADD: CheckedIntOp = CheckedIntOp { math_intrinsic_name: b"llvm.sadd.with.overflow.i64\0", result_name: b"sum\0", panic_message: "attempt to add with overflow", }; pub const CHECKED_SUB: CheckedIntOp = CheckedIntOp { math_intrinsic_name: b"llvm.ssub.with.overflow.i64\0", result_name: b"difference\0", panic_message: "attempt to subtract with overflow", }; pub const CHECKED_MUL: CheckedIntOp = CheckedIntOp { math_intrinsic_name: b"llvm.smul.with.overflow.i64\0", result_name: b"product\0", panic_message: "attempt to multiply with overflow", }; pub(crate) fn gen_checked_int_math( tcx: &mut TargetCtx, mcx: &mut ModCtx<'_, '_, '_>, fcx: &mut FunCtx, int_op: &'static CheckedIntOp, llvm_lhs: LLVMValueRef, llvm_rhs: LLVMValueRef, ) -> LLVMValueRef { let CheckedIntOp { math_intrinsic_name, result_name, panic_message, } = int_op; unsafe { let llvm_i1 = LLVMInt1TypeInContext(tcx.llx); let llvm_i64 = LLVMInt64TypeInContext(tcx.llx); let mut return_type_members = [llvm_i64, llvm_i1]; let llvm_return_type = LLVMStructTypeInContext( tcx.llx, return_type_members.as_mut_ptr(), return_type_members.len() as u32, 0, ); let llvm_param_types = &mut [llvm_i64, llvm_i64]; let math_intrinsic_llvm_type = LLVMFunctionType( llvm_return_type, llvm_param_types.as_mut_ptr(), llvm_param_types.len() as u32, 0, ); let math_intrinsic_fun = mcx.get_function_or_insert(math_intrinsic_llvm_type, math_intrinsic_name, |_| {}); let math_intrinsic_args = &mut [llvm_lhs, llvm_rhs]; let llvm_result_with_overflow = LLVMBuildCall( fcx.builder, math_intrinsic_fun, math_intrinsic_args.as_mut_ptr(), math_intrinsic_args.len() as u32, libcstr!("result_with_overflow"), ); let llvm_math_result = LLVMBuildExtractValue( fcx.builder, llvm_result_with_overflow, 0, result_name.as_ptr() as *const _, ); let llvm_overflow = LLVMBuildExtractValue( fcx.builder, llvm_result_with_overflow, 1, libcstr!("overflow_flag"), ); let overflow_block = LLVMAppendBasicBlockInContext(tcx.llx, fcx.function, libcstr!("overflow")); let cont_block = LLVMAppendBasicBlockInContext(tcx.llx, fcx.function, libcstr!("no_overflow")); LLVMBuildCondBr(fcx.builder, llvm_overflow, overflow_block, cont_block); LLVMPositionBuilderAtEnd(fcx.builder, overflow_block); gen_panic(tcx, mcx, fcx, panic_message); LLVMPositionBuilderAtEnd(fcx.builder, cont_block); llvm_math_result } } pub(crate) fn gen_checked_int_rem( tcx: &mut TargetCtx, mcx: &mut ModCtx<'_, '_, '_>, fcx: &mut FunCtx, llvm_numer: LLVMValueRef, llvm_denom: LLVMValueRef, ) -> LLVMValueRef { unsafe { let llvm_i64 = LLVMInt64TypeInContext(tcx.llx); let denom_is_zero = LLVMBuildICmp( fcx.builder, LLVMIntPredicate::LLVMIntEQ, llvm_denom, LLVMConstInt(llvm_i64, 0, 0), libcstr!("denom_is_zero"), ); let rem_by_zero_block = LLVMAppendBasicBlockInContext(tcx.llx, fcx.function, libcstr!("rem_by_zero")); let valid_rem_block = LLVMAppendBasicBlockInContext(tcx.llx, fcx.function, libcstr!("valid_rem")); LLVMBuildCondBr( fcx.builder, denom_is_zero, rem_by_zero_block, valid_rem_block, ); LLVMPositionBuilderAtEnd(fcx.builder, rem_by_zero_block); gen_panic(tcx, mcx, fcx, "division by zero"); LLVMPositionBuilderAtEnd(fcx.builder, valid_rem_block); LLVMBuildSRem(fcx.builder, llvm_numer, llvm_denom, libcstr!("rem")) } } pub(crate) fn gen_checked_int_div( tcx: &mut TargetCtx, mcx: &mut ModCtx<'_, '_, '_>, fcx: &mut FunCtx, llvm_numer: LLVMValueRef, llvm_denom: LLVMValueRef, ) -> LLVMValueRef { unsafe { let llvm_i64 = LLVMInt64TypeInContext(tcx.llx); // Build our blocks let div_by_zero_block = LLVMAppendBasicBlockInContext(tcx.llx, fcx.function, libcstr!("div_by_zero")); let non_zero_denom_block = LLVMAppendBasicBlockInContext(tcx.llx, fcx.function, libcstr!("non_zero_denom")); let neg_one_denom_block = LLVMAppendBasicBlockInContext(tcx.llx, fcx.function, libcstr!("neg_one_denom")); let valid_div_block = LLVMAppendBasicBlockInContext(tcx.llx, fcx.function, libcstr!("valid_div_block")); // Test if the denominator is 0 let denom_is_zero = LLVMBuildICmp( fcx.builder, LLVMIntPredicate::LLVMIntEQ, llvm_denom, LLVMConstInt(llvm_i64, 0, 0), libcstr!("denom_is_zero"), ); // If the denominator 0 then raise a divide by zero error LLVMBuildCondBr( fcx.builder, denom_is_zero, div_by_zero_block, non_zero_denom_block, ); // Test if the denominator is -1 LLVMPositionBuilderAtEnd(fcx.builder, non_zero_denom_block); let denom_is_neg_one = LLVMBuildICmp( fcx.builder, LLVMIntPredicate::LLVMIntEQ, llvm_denom, LLVMConstInt(llvm_i64, std::mem::transmute(-1i64), 0), libcstr!("denom_is_neg_one"), ); // If the denominator in -1 then we need to test the numerator LLVMBuildCondBr( fcx.builder, denom_is_neg_one, neg_one_denom_block, valid_div_block, ); // Test if the numerator is i64::MIN LLVMPositionBuilderAtEnd(fcx.builder, neg_one_denom_block); let numer_is_int_min = LLVMBuildICmp( fcx.builder, LLVMIntPredicate::LLVMIntEQ, llvm_numer, LLVMConstInt(llvm_i64, std::mem::transmute(std::i64::MIN), 0), libcstr!("numer_is_int_min"), ); // If denominator is -1 and numerator is i64::MIN then raise a divide by zero error LLVMBuildCondBr( fcx.builder, numer_is_int_min, div_by_zero_block, valid_div_block, ); // Build the common panic block LLVMPositionBuilderAtEnd(fcx.builder, div_by_zero_block); gen_panic(tcx, mcx, fcx, "division by zero"); LLVMPositionBuilderAtEnd(fcx.builder, valid_div_block); LLVMBuildSDiv(fcx.builder, llvm_numer, llvm_denom, libcstr!("quot")) } } pub(crate) fn gen_float_sqrt( tcx: &mut TargetCtx, mcx: &mut ModCtx<'_, '_, '_>, fcx: &mut FunCtx, llvm_radicand: LLVMValueRef, ) -> LLVMValueRef { unsafe { let llvm_double = LLVMDoubleTypeInContext(tcx.llx); let llvm_param_types = &mut [llvm_double]; let double_sqrt_llvm_type = LLVMFunctionType( llvm_double, llvm_param_types.as_mut_ptr(), llvm_param_types.len() as u32, 0, ); let double_sqrt_fun = mcx.get_function_or_insert(double_sqrt_llvm_type, b"llvm.sqrt.f64\0", |_| {}); let llvm_sqrt_args = &mut [llvm_radicand]; LLVMBuildCall( fcx.builder, double_sqrt_fun, llvm_sqrt_args.as_mut_ptr(), llvm_sqrt_args.len() as u32, libcstr!("sqrt"), ) } } ================================================ FILE: compiler/codegen/mod.rs ================================================ mod alloc; mod analysis; mod box_layout; mod callee; mod const_gen; mod debug_info; mod fun_gen; pub(crate) mod jit; mod libcstr; mod math_gen; mod mod_gen; mod op_gen; mod panic_gen; pub(crate) mod program; mod range_md; mod record_struct; pub(crate) mod target_gen; mod target_machine; mod vector_gen; use crate::mir::ops::OpsAbi; use arret_runtime::abitype; #[derive(Debug, PartialEq, Clone)] pub struct GenAbi { pub takes_task: bool, pub params: Box<[abitype::ParamAbiType]>, pub ret: abitype::RetAbiType, } impl GenAbi { pub fn thunk_abi() -> GenAbi { GenAbi { takes_task: true, params: Box::new([ abitype::BoxedAbiType::Any.into(), abitype::TOP_LIST_BOXED_ABI_TYPE.into(), ]), ret: abitype::BoxedAbiType::Any.into(), } } } impl<'a> From<&'a OpsAbi> for GenAbi { fn from(ops_abi: &'a OpsAbi) -> GenAbi { GenAbi { takes_task: true, params: ops_abi .params .iter() .map(|abi_type| abi_type.clone().into()) .collect(), ret: ops_abi.ret.clone(), } } } /// Initialises LLVM /// /// This must be called before anything else in this module. It can only be called from a single /// thread at once. pub fn initialise_llvm(support_cross_compilation: bool) { use llvm_sys::target::*; unsafe { if support_cross_compilation { LLVM_InitializeAllTargetInfos(); LLVM_InitializeAllTargets(); LLVM_InitializeAllTargetMCs(); LLVM_InitializeAllAsmPrinters(); } else { LLVM_InitializeNativeTarget(); LLVM_InitializeNativeAsmPrinter(); } } } #[cfg(test)] pub(crate) mod test { use super::*; use std::sync::Once; static INITIALISE_TEST_LLVM: Once = Once::new(); pub fn initialise_test_llvm() { INITIALISE_TEST_LLVM.call_once(|| { initialise_llvm(false); }); } } ================================================ FILE: compiler/codegen/mod_gen.rs ================================================ use std::collections::HashMap; use llvm_sys::core::*; use llvm_sys::prelude::*; use llvm_sys::target::*; use llvm_sys::target_machine::*; use llvm_sys::LLVMLinkage; use arret_runtime::boxed::RecordClassId; use arret_runtime::intern; use crate::codegen::analysis::AnalysedMod; use crate::codegen::debug_info::DebugInfoBuilder; use crate::codegen::record_struct; use crate::codegen::target_gen::TargetCtx; use crate::mir::ops; use crate::source::SourceLoader; pub struct ModCtx<'am, 'sl, 'interner> { pub module: LLVMModuleRef, analysed_mod: &'am AnalysedMod<'am>, di_builder: Option>, llvm_private_funs: HashMap, jit_interner: Option<&'interner mut intern::Interner>, has_jit_record_struct_class_ids: bool, record_struct_class_ids: HashMap, record_structs: Vec, record_class_id_llvm_values: Vec, function_pass_manager: LLVMPassManagerRef, } pub struct GeneratedMod { pub llvm_module: LLVMModuleRef, pub llvm_entry_fun: LLVMValueRef, pub llvm_global_interned_names: LLVMValueRef, pub llvm_classmap_classes: LLVMValueRef, } impl<'am, 'sl, 'interner> ModCtx<'am, 'sl, 'interner> { /// Constructs a new module context with the given name /// /// Note that the module name in LLVM is not arbitrary. For instance, in the ORC JIT it will /// shadow exported symbol names. This identifier should be as unique and descriptive as /// possible. fn new( tcx: &mut TargetCtx, name: &[u8], analysed_mod: &'am AnalysedMod<'am>, jit_interner: Option<&'interner mut intern::Interner>, jit_record_struct_class_ids: HashMap, debug_source_loader: Option<&'sl SourceLoader>, ) -> Self { use crate::codegen::fun_gen::declare_fun; use llvm_sys::transforms::pass_manager_builder::*; // Hoist these out of the unsafe block let module; let function_pass_manager; unsafe { module = LLVMModuleCreateWithNameInContext(name.as_ptr() as *const _, tcx.llx); LLVMSetModuleDataLayout(module, tcx.target_data()); let target_triple = LLVMGetTargetMachineTriple(tcx.target_machine()); LLVMSetTarget(module, target_triple); LLVMDisposeMessage(target_triple); function_pass_manager = LLVMCreateFunctionPassManagerForModule(module); if tcx.optimising() { let fpmb = LLVMPassManagerBuilderCreate(); LLVMPassManagerBuilderSetOptLevel(fpmb, 2); LLVMPassManagerBuilderPopulateFunctionPassManager(fpmb, function_pass_manager); LLVMPassManagerBuilderDispose(fpmb); } } let di_builder = debug_source_loader.map(|source_loader| { DebugInfoBuilder::new( source_loader, tcx.optimising(), analysed_mod.entry_fun().ops_fun.span, module, ) }); // Forward declare all our private funs // Analysis has determined all of these are used let llvm_private_funs = analysed_mod .private_funs() .map(|(private_fun_id, analysed_fun)| { let llvm_fun = declare_fun(tcx, module, analysed_fun.ops_fun); (*private_fun_id, llvm_fun) }) .collect(); ModCtx { module, analysed_mod, di_builder, llvm_private_funs, jit_interner, has_jit_record_struct_class_ids: !jit_record_struct_class_ids.is_empty(), record_struct_class_ids: jit_record_struct_class_ids, record_structs: vec![], record_class_id_llvm_values: vec![], function_pass_manager, } } pub fn intern_name(&mut self, name: &str) -> intern::InternedSym { if let Some(ref mut jit_interner) = self.jit_interner { jit_interner.intern_static(name) } else if let Some(interned_sym) = intern::InternedSym::try_from_inline_name(name) { interned_sym } else { *self .analysed_mod .global_interned_names() .get(name) .expect("encountered name not found during analysis") } } pub fn record_class_id_for_struct( &mut self, record_struct: &ops::RecordStructId, ) -> RecordClassId { if let Some(record_class_id) = self.record_struct_class_ids.get(record_struct) { return *record_class_id; } let record_class_id = self.record_structs.len() as u32; self.record_structs.push(record_struct.clone()); self.record_struct_class_ids .insert(record_struct.clone(), record_class_id); record_class_id } pub fn add_record_class_id_range_metadata(&mut self, record_class_id_llvm_value: LLVMValueRef) { // This is a bit of a hack - we don't know the range of the record class IDs until we // finish generating the module. self.record_class_id_llvm_values .push(record_class_id_llvm_value); } pub fn llvm_private_fun(&self, private_fun_id: ops::PrivateFunId) -> LLVMValueRef { self.llvm_private_funs[&private_fun_id] } pub fn get_global_or_insert( &mut self, llvm_type: LLVMTypeRef, name: &[u8], initial_value: F, ) -> LLVMValueRef where F: FnOnce() -> LLVMValueRef, { unsafe { let global = LLVMGetNamedGlobal(self.module, name.as_ptr() as *const _); if !global.is_null() { return global; } let global = LLVMAddGlobal(self.module, llvm_type, name.as_ptr() as *const _); LLVMSetInitializer(global, initial_value()); global } } pub fn get_function_or_insert( &mut self, function_type: LLVMTypeRef, name: &[u8], initialise: F, ) -> LLVMValueRef where F: FnOnce(LLVMValueRef), { unsafe { let function = LLVMGetNamedFunction(self.module, name.as_ptr() as *const _); if !function.is_null() { return function; } let function = LLVMAddFunction(self.module, name.as_ptr() as *const _, function_type); initialise(function); function } } pub fn optimise_function(&mut self, function: LLVMValueRef) { unsafe { LLVMRunFunctionPassManager(self.function_pass_manager, function); } } fn finalise_record_class_id_range_metadata(&mut self, tcx: &mut TargetCtx) { unsafe { if self.has_jit_record_struct_class_ids { // These are from a distinct range; it's too much effort to include them in the JIT // case so just skip generating metadata. return; } let mut llvm_range_values = [ LLVMValueAsMetadata(LLVMConstInt(tcx.record_class_id_llvm_type(), 0, 0)), LLVMValueAsMetadata(LLVMConstInt( tcx.record_class_id_llvm_type(), self.record_structs.len() as u64, 0, )), ]; let range_md_kind_id = tcx.llvm_md_kind_id_for_name("range"); let record_class_id_range_md = LLVMMDNodeInContext2( tcx.llx, llvm_range_values.as_mut_ptr(), llvm_range_values.len(), ); for llvm_value in self.record_class_id_llvm_values.iter() { LLVMSetMetadata( *llvm_value, range_md_kind_id, LLVMMetadataAsValue(tcx.llx, record_class_id_range_md), ); } } } /// Finalise the module and return the LLVMModuleRef /// /// This will verify the module's correctness and dump the LLVM IR to stdout if the /// `ARRET_DUMP_LLVM` environment variable is set fn into_generated_mod(mut self, tcx: &mut TargetCtx) -> GeneratedMod { use crate::codegen::analysis::AnalysedFun; use crate::codegen::const_gen::gen_global_interned_names; use crate::codegen::fun_gen::{declare_fun, define_fun}; // Define our entry fun let AnalysedFun { ops_fun: entry_ops_fun, captures: entry_captures, } = self.analysed_mod.entry_fun(); let llvm_entry_fun = declare_fun(tcx, self.module, entry_ops_fun); define_fun( tcx, &mut self, entry_ops_fun, entry_captures, llvm_entry_fun, ); if let Some(ref mut di_builder) = self.di_builder { di_builder.add_function_debug_info( entry_ops_fun.span, entry_ops_fun.source_name.as_ref(), llvm_entry_fun, ); } // Define all of our private funs for (private_fun_id, analysed_fun) in self.analysed_mod.private_funs() { let AnalysedFun { ops_fun, captures } = analysed_fun; let llvm_fun = self.llvm_private_funs[private_fun_id]; define_fun(tcx, &mut self, ops_fun, captures, llvm_fun); if let Some(ref mut di_builder) = self.di_builder { di_builder.add_function_debug_info( ops_fun.span, ops_fun.source_name.as_ref(), llvm_fun, ); } unsafe { LLVMSetLinkage(llvm_fun, LLVMLinkage::LLVMPrivateLinkage); } } let llvm_global_interned_names = gen_global_interned_names( tcx, self.module, self.analysed_mod.global_interned_names().keys(), ); let llvm_classmap_classes = record_struct::gen_classmap_classes(tcx, self.module, &self.record_structs); self.finalise_record_class_id_range_metadata(tcx); if let Some(ref mut di_builder) = self.di_builder { di_builder.finalise(); } GeneratedMod { llvm_module: self.module, llvm_entry_fun, llvm_global_interned_names, llvm_classmap_classes, } } } pub fn gen_mod<'am, 'sl, 'interner>( tcx: &mut TargetCtx, name: &[u8], analysed_mod: &'am AnalysedMod<'am>, jit_interner: Option<&'interner mut intern::Interner>, jit_record_struct_class_ids: HashMap, debug_source_loader: Option<&'sl SourceLoader>, ) -> GeneratedMod { ModCtx::new( tcx, name, analysed_mod, jit_interner, jit_record_struct_class_ids, debug_source_loader, ) .into_generated_mod(tcx) } impl Drop for ModCtx<'_, '_, '_> { fn drop(&mut self) { unsafe { LLVMDisposePassManager(self.function_pass_manager); } } } ================================================ FILE: compiler/codegen/op_gen.rs ================================================ use llvm_sys::core::*; use llvm_sys::prelude::*; use llvm_sys::{LLVMCallConv, LLVMIntPredicate, LLVMRealPredicate}; use arret_runtime::boxed; use crate::mir::ops::*; use crate::codegen::fun_gen::FunCtx; use crate::codegen::math_gen; use crate::codegen::mod_gen::ModCtx; use crate::codegen::panic_gen::gen_panic; use crate::codegen::record_struct; use crate::codegen::target_gen::TargetCtx; use crate::codegen::{alloc, const_gen}; use crate::libcstr; fn comparison_to_llvm_int_pred(comparison: Comparison) -> LLVMIntPredicate { match comparison { Comparison::Lt => LLVMIntPredicate::LLVMIntSLT, Comparison::Le => LLVMIntPredicate::LLVMIntSLE, Comparison::Eq => LLVMIntPredicate::LLVMIntEQ, Comparison::Gt => LLVMIntPredicate::LLVMIntSGT, Comparison::Ge => LLVMIntPredicate::LLVMIntSGE, } } fn comparison_to_llvm_real_pred(comparison: Comparison) -> LLVMRealPredicate { match comparison { Comparison::Lt => LLVMRealPredicate::LLVMRealOLT, Comparison::Le => LLVMRealPredicate::LLVMRealOLE, Comparison::Eq => LLVMRealPredicate::LLVMRealOEQ, Comparison::Gt => LLVMRealPredicate::LLVMRealOGT, Comparison::Ge => LLVMRealPredicate::LLVMRealOGE, } } fn gen_int_compare( fcx: &mut FunCtx, reg: RegId, comparison: Comparison, lhs_reg: RegId, rhs_reg: RegId, reg_name: &str, ) { unsafe { fcx.regs.insert( reg, LLVMBuildICmp( fcx.builder, comparison_to_llvm_int_pred(comparison), fcx.regs[&lhs_reg], fcx.regs[&rhs_reg], reg_name.as_ptr() as *const _, ), ); } } fn gen_op( tcx: &mut TargetCtx, mcx: &mut ModCtx<'_, '_, '_>, fcx: &mut FunCtx, active_alloc: &mut alloc::ActiveAlloc<'_>, op: &Op, ) { unsafe { match &op.kind { OpKind::ConstBoxedNil(reg, _) => { let llvm_value = const_gen::gen_boxed_nil(tcx, mcx); fcx.regs.insert(*reg, llvm_value); } OpKind::ConstBoxedTrue(reg, _) => { let llvm_value = tcx.ptr_to_singleton_box(mcx.module, boxed::TypeTag::True, b"ARRET_TRUE\0"); fcx.regs.insert(*reg, llvm_value); } OpKind::ConstBoxedFalse(reg, _) => { let llvm_value = tcx.ptr_to_singleton_box(mcx.module, boxed::TypeTag::False, b"ARRET_FALSE\0"); fcx.regs.insert(*reg, llvm_value); } OpKind::ConstInt64(reg, value) => { let llvm_value = LLVMConstInt(LLVMInt64TypeInContext(tcx.llx), *value as u64, 1); fcx.regs.insert(*reg, llvm_value); } OpKind::ConstFloat(reg, value) => { let llvm_value = LLVMConstReal(LLVMDoubleTypeInContext(tcx.llx), *value); fcx.regs.insert(*reg, llvm_value); } OpKind::ConstChar(reg, value) => { let llvm_value = LLVMConstInt(LLVMInt32TypeInContext(tcx.llx), *value as u64, 1); fcx.regs.insert(*reg, llvm_value); } OpKind::ConstBool(reg, value) => { let llvm_value = LLVMConstInt(LLVMInt1TypeInContext(tcx.llx), *value as u64, 1); fcx.regs.insert(*reg, llvm_value); } OpKind::ConstInternedSym(reg, value) => { let interned_sym = mcx.intern_name(value); let llvm_value = LLVMConstInt( LLVMInt64TypeInContext(tcx.llx), interned_sym.to_raw_u64(), 1, ); fcx.regs.insert(*reg, llvm_value); } OpKind::ConstTypeTag(reg, type_tag) => { let llvm_value = LLVMConstInt(LLVMInt8TypeInContext(tcx.llx), *type_tag as u64, 1); fcx.regs.insert(*reg, llvm_value); } OpKind::ConstRecordClassId(reg, record_struct) => { let record_class_id = mcx.record_class_id_for_struct(record_struct); let llvm_value = LLVMConstInt( tcx.record_class_id_llvm_type(), u64::from(record_class_id), 1, ); fcx.regs.insert(*reg, llvm_value); } OpKind::ConstBoxedInt(reg, value) => { let llvm_value = const_gen::gen_boxed_int(tcx, mcx, *value); fcx.regs.insert(*reg, llvm_value); } OpKind::ConstBoxedFloat(reg, value) => { let llvm_value = const_gen::gen_boxed_float(tcx, mcx, *value); fcx.regs.insert(*reg, llvm_value); } OpKind::ConstBoxedChar(reg, value) => { let llvm_value = const_gen::gen_boxed_char(tcx, mcx, *value); fcx.regs.insert(*reg, llvm_value); } OpKind::ConstBoxedFunThunk( reg, BoxFunThunkOp { captures_reg, callee, }, ) => { let llvm_entry_point = gen_callee_entry_point(tcx, mcx, fcx, callee); let llvm_env = fcx.regs[captures_reg]; let llvm_value = const_gen::gen_boxed_fun_thunk(tcx, mcx, llvm_env, llvm_entry_point); fcx.regs.insert(*reg, llvm_value); } OpKind::ConstBoxedPair( reg, BoxPairOp { head_reg, rest_reg, list_len_reg, }, ) => { let llvm_head = fcx.regs[head_reg]; let llvm_rest = fcx.regs[rest_reg]; let llvm_list_len = fcx.regs[list_len_reg]; let llvm_value = const_gen::gen_boxed_pair(tcx, mcx, llvm_head, llvm_rest, llvm_list_len); fcx.regs.insert(*reg, llvm_value); } OpKind::ConstBoxedStr(reg, value) => { let llvm_value = const_gen::gen_boxed_str(tcx, mcx, value.as_ref()); fcx.regs.insert(*reg, llvm_value); } OpKind::ConstBoxedSym(reg, value) => { let llvm_value = const_gen::gen_boxed_sym(tcx, mcx, value.as_ref()); fcx.regs.insert(*reg, llvm_value); } OpKind::ConstBoxedVector(reg, elements) => { let llvm_value = const_gen::gen_boxed_vector( tcx, mcx, elements.iter().map(|element| fcx.regs[element]), ); fcx.regs.insert(*reg, llvm_value); } OpKind::ConstBoxedSet(reg, elements) => { let llvm_value = const_gen::gen_boxed_set( tcx, mcx, elements.iter().map(|element| fcx.regs[element]), ); fcx.regs.insert(*reg, llvm_value); } OpKind::ConstBoxedMap(reg, entries) => { let llvm_value = const_gen::gen_boxed_map( tcx, mcx, entries .iter() .map(|(key, value)| (fcx.regs[key], fcx.regs[value])), ); fcx.regs.insert(*reg, llvm_value); } OpKind::ConstCastBoxed(reg, CastBoxedOp { from_reg, to_type }) => { let from_llvm_value = fcx.regs[from_reg]; let to_llvm_type = tcx.boxed_abi_to_llvm_ptr_type(to_type); let to_llvm_value = LLVMConstBitCast(from_llvm_value, to_llvm_type); fcx.regs.insert(*reg, to_llvm_value); } OpKind::CastBoxed(reg, CastBoxedOp { from_reg, to_type }) => { let from_llvm_value = fcx.regs[from_reg]; let to_llvm_type = tcx.boxed_abi_to_llvm_ptr_type(to_type); let to_llvm_value = LLVMBuildBitCast( fcx.builder, from_llvm_value, to_llvm_type, libcstr!("box_bitcast"), ); fcx.regs.insert(*reg, to_llvm_value); } OpKind::Alias(reg, from_reg) => { let from_llvm_value = fcx.regs[from_reg]; fcx.regs.insert(*reg, from_llvm_value); } OpKind::Call(reg, CallOp { callee, args, .. }) => { use crate::codegen::callee; let llvm_fun = gen_callee_entry_point(tcx, mcx, fcx, callee); let takes_task = callee::callee_takes_task(callee); let task_reg_iter = Some(fcx.current_task).filter(|_| takes_task).into_iter(); let mut llvm_args = task_reg_iter .chain(args.iter().map(|param_reg| fcx.regs[param_reg])) .collect::>(); let llvm_ret = LLVMBuildCall( fcx.builder, llvm_fun, llvm_args.as_mut_ptr(), llvm_args.len() as u32, libcstr!(""), ); let call_conv = callee::callee_call_conv(mcx, callee); LLVMSetInstructionCallConv(llvm_ret, call_conv); fcx.regs.insert(*reg, llvm_ret); } OpKind::TailCall(reg, TailCallOp { args, .. }) => { let mut llvm_args = std::iter::once(fcx.current_task) .chain(args.iter().map(|param_reg| fcx.regs[param_reg])) .collect::>(); let llvm_ret = LLVMBuildCall( fcx.builder, fcx.function, llvm_args.as_mut_ptr(), llvm_args.len() as u32, libcstr!(""), ); LLVMSetTailCall(llvm_ret, 1); LLVMSetInstructionCallConv(llvm_ret, LLVMCallConv::LLVMFastCallConv as u32); fcx.regs.insert(*reg, llvm_ret); } OpKind::Ret(reg) => { let llvm_value = fcx.regs[reg]; LLVMBuildRet(fcx.builder, llvm_value); } OpKind::RetVoid => { LLVMBuildRetVoid(fcx.builder); } OpKind::Unreachable => { LLVMBuildUnreachable(fcx.builder); } OpKind::Panic(message) => { gen_panic(tcx, mcx, fcx, message); } OpKind::LoadBoxedTypeTag( reg, LoadBoxedTypeTagOp { subject_reg, possible_type_tags, }, ) => { use crate::codegen::range_md::int_range_md_node; let llvm_any = fcx.regs[subject_reg]; let gep_indices = &mut [ LLVMConstInt(LLVMInt32TypeInContext(tcx.llx), 0, 0), LLVMConstInt(LLVMInt32TypeInContext(tcx.llx), 0, 0), LLVMConstInt(LLVMInt32TypeInContext(tcx.llx), 0, 0), ]; let llvm_type_tag_ptr = LLVMBuildInBoundsGEP( fcx.builder, llvm_any, gep_indices.as_mut_ptr(), gep_indices.len() as u32, libcstr!("type_tag_ptr"), ); let llvm_type_tag = LLVMBuildLoad(fcx.builder, llvm_type_tag_ptr, libcstr!("type_tag")); let llvm_i8 = LLVMInt8TypeInContext(tcx.llx); let possible_type_tag_md = int_range_md_node( tcx.llx, llvm_i8, possible_type_tags .into_iter() .map(|type_tag| type_tag as i64), ); let range_md_kind_id = tcx.llvm_md_kind_id_for_name("range"); LLVMSetMetadata( llvm_type_tag, range_md_kind_id, LLVMMetadataAsValue(tcx.llx, possible_type_tag_md), ); tcx.add_invariant_load_metadata(llvm_type_tag); fcx.regs.insert(*reg, llvm_type_tag); } OpKind::LoadBoxedListLen( reg, LoadBoxedListLenOp { list_reg, min_list_len, }, ) => { let llvm_i64 = LLVMInt64TypeInContext(tcx.llx); let llvm_list = fcx.regs[list_reg]; let list_len_ptr = LLVMBuildStructGEP(fcx.builder, llvm_list, 1, libcstr!("list_len_ptr")); let llvm_list_len = LLVMBuildLoad(fcx.builder, list_len_ptr, libcstr!("list_len")); tcx.add_invariant_load_metadata(llvm_list_len); // Every list element needs at least one pair. This means there's a maximum list // length that can fit in our address space. let max_list_len = std::u64::MAX / std::mem::size_of::() as u64; let mut llvm_range_values = [ LLVMValueAsMetadata(LLVMConstInt(llvm_i64, *min_list_len as u64, 0)), LLVMValueAsMetadata(LLVMConstInt(llvm_i64, max_list_len + 1, 0)), ]; let range_md_kind_id = tcx.llvm_md_kind_id_for_name("list_len_range"); let list_len_range_md = LLVMMDNodeInContext2( tcx.llx, llvm_range_values.as_mut_ptr(), llvm_range_values.len(), ); LLVMSetMetadata( llvm_list_len, range_md_kind_id, LLVMMetadataAsValue(tcx.llx, list_len_range_md), ); fcx.regs.insert(*reg, llvm_list_len); } OpKind::LoadBoxedPairHead(reg, pair_reg) => { let llvm_pair = fcx.regs[pair_reg]; let head_ptr = LLVMBuildStructGEP(fcx.builder, llvm_pair, 2, libcstr!("head_ptr")); let llvm_head = LLVMBuildLoad(fcx.builder, head_ptr, libcstr!("head")); tcx.add_invariant_load_metadata(llvm_head); tcx.add_boxed_load_metadata(llvm_head); fcx.regs.insert(*reg, llvm_head); } OpKind::LoadBoxedPairRest(reg, pair_reg) => { let llvm_pair = fcx.regs[pair_reg]; let head_ptr = LLVMBuildStructGEP(fcx.builder, llvm_pair, 3, libcstr!("rest_ptr")); let llvm_rest = LLVMBuildLoad(fcx.builder, head_ptr, libcstr!("rest")); tcx.add_invariant_load_metadata(llvm_rest); tcx.add_boxed_load_metadata(llvm_rest); fcx.regs.insert(*reg, llvm_rest); } OpKind::LoadBoxedIntValue(reg, boxed_int_reg) => { let llvm_boxed_int = fcx.regs[boxed_int_reg]; let value_ptr = LLVMBuildStructGEP(fcx.builder, llvm_boxed_int, 1, libcstr!("int_value_ptr")); let llvm_value = LLVMBuildLoad(fcx.builder, value_ptr, libcstr!("int_value")); tcx.add_invariant_load_metadata(llvm_value); fcx.regs.insert(*reg, llvm_value); } OpKind::LoadBoxedSymInterned(reg, boxed_sym_reg) => { let llvm_boxed_sym = fcx.regs[boxed_sym_reg]; let value_ptr = LLVMBuildStructGEP( fcx.builder, llvm_boxed_sym, 1, libcstr!("interned_sym_ptr"), ); let llvm_value = LLVMBuildLoad(fcx.builder, value_ptr, libcstr!("interned_sym")); tcx.add_invariant_load_metadata(llvm_value); fcx.regs.insert(*reg, llvm_value); } OpKind::LoadBoxedFloatValue(reg, boxed_float_reg) => { let llvm_boxed_float = fcx.regs[boxed_float_reg]; let value_ptr = LLVMBuildStructGEP( fcx.builder, llvm_boxed_float, 1, libcstr!("float_value_ptr"), ); let llvm_value = LLVMBuildLoad(fcx.builder, value_ptr, libcstr!("float_value")); tcx.add_invariant_load_metadata(llvm_value); fcx.regs.insert(*reg, llvm_value); } OpKind::LoadBoxedCharValue(reg, boxed_char_reg) => { let llvm_boxed_char = fcx.regs[boxed_char_reg]; let value_ptr = LLVMBuildStructGEP(fcx.builder, llvm_boxed_char, 1, libcstr!("char_value_ptr")); let llvm_value = LLVMBuildLoad(fcx.builder, value_ptr, libcstr!("char_value")); tcx.add_invariant_load_metadata(llvm_value); tcx.add_char_codepoint_range_metadata(llvm_value); fcx.regs.insert(*reg, llvm_value); } OpKind::LoadBoxedFunThunkCaptures(reg, boxed_fun_thunk_reg) => { let llvm_boxed_fun_thunk = fcx.regs[boxed_fun_thunk_reg]; let captures_ptr = LLVMBuildStructGEP( fcx.builder, llvm_boxed_fun_thunk, 1, libcstr!("boxed_fun_thunk_captures_ptr"), ); let llvm_value = LLVMBuildLoad( fcx.builder, captures_ptr, libcstr!("boxed_fun_thunk_captures"), ); fcx.regs.insert(*reg, llvm_value); } OpKind::LoadBoxedRecordClassId(reg, boxed_record_reg) => { let llvm_boxed_record = fcx.regs[boxed_record_reg]; let value_ptr = LLVMBuildStructGEP( fcx.builder, llvm_boxed_record, record_struct::RECORD_CLASS_ID_INDEX, libcstr!("record_class_id_ptr"), ); let llvm_value = LLVMBuildLoad(fcx.builder, value_ptr, libcstr!("record_class_id")); mcx.add_record_class_id_range_metadata(llvm_value); tcx.add_invariant_load_metadata(llvm_value); fcx.regs.insert(*reg, llvm_value); } OpKind::LoadBoxedRecordField(reg, load_boxed_record_field_op) => { let LoadBoxedRecordFieldOp { record_reg, record_struct, field_index, } = load_boxed_record_field_op; let record_struct::TargetRecordStruct { record_storage, .. } = *tcx.target_record_struct(record_struct); let boxed_record_name = format!("boxed_{}_record\0", record_struct.source_name); let boxed_record_ptr_type = LLVMPointerType(tcx.record_struct_llvm_box_type(record_struct), 0); let llvm_boxed_record = LLVMBuildBitCast( fcx.builder, fcx.regs[record_reg], boxed_record_ptr_type, boxed_record_name.as_ptr() as *const _, ); let field_ptr = record_struct::gen_record_field_ptr( tcx, fcx.builder, record_storage, llvm_boxed_record, *field_index, b"record_field_ptr\0", ); let llvm_value = LLVMBuildLoad(fcx.builder, field_ptr, libcstr!("record_field_value")); tcx.add_invariant_load_metadata(llvm_value); fcx.regs.insert(*reg, llvm_value); } OpKind::LoadBoxedVectorLen(reg, vector_reg) => { use crate::codegen::vector_gen::load_boxed_vector_len; let llvm_boxed_vector = fcx.regs[vector_reg]; let llvm_vector_len = load_boxed_vector_len(tcx, fcx, llvm_boxed_vector); fcx.regs.insert(*reg, llvm_vector_len); } OpKind::LoadBoxedVectorMember( reg, LoadBoxedVectorMemberOp { vector_reg, known_vector_len, member_index, }, ) => { use crate::codegen::vector_gen::load_boxed_vector_member; let llvm_boxed_vector = fcx.regs[vector_reg]; let llvm_vector_member = load_boxed_vector_member( tcx, fcx, llvm_boxed_vector, *known_vector_len, *member_index, ); fcx.regs.insert(*reg, llvm_vector_member); } OpKind::Cond(cond_op) => { let cond_alloc_plan = active_alloc.next_cond_plan(); gen_cond(tcx, mcx, fcx, cond_op, cond_alloc_plan); } OpKind::AllocBoxedInt(reg, int_reg) => { let box_source = active_alloc.next_box_source(); let llvm_int = fcx.regs[int_reg]; let llvm_alloced = alloc::types::gen_alloc_int( tcx, fcx.builder, active_alloc, box_source, llvm_int, ); fcx.regs.insert(*reg, llvm_alloced); } OpKind::AllocBoxedFloat(reg, float_reg) => { let box_source = active_alloc.next_box_source(); let llvm_float = fcx.regs[float_reg]; let llvm_alloced = alloc::types::gen_alloc_float( tcx, fcx.builder, active_alloc, box_source, llvm_float, ); fcx.regs.insert(*reg, llvm_alloced); } OpKind::AllocBoxedChar(reg, char_reg) => { let box_source = active_alloc.next_box_source(); let llvm_char = fcx.regs[char_reg]; let llvm_alloced = alloc::types::gen_alloc_char( tcx, fcx.builder, active_alloc, box_source, llvm_char, ); fcx.regs.insert(*reg, llvm_alloced); } OpKind::AllocBoxedSym(reg, interned_sym_reg) => { let box_source = active_alloc.next_box_source(); let llvm_interned_sym = fcx.regs[interned_sym_reg]; let llvm_alloced = alloc::types::gen_alloc_sym( tcx, fcx.builder, active_alloc, box_source, llvm_interned_sym, ); fcx.regs.insert(*reg, llvm_alloced); } OpKind::AllocBoxedPair( reg, BoxPairOp { head_reg, rest_reg, list_len_reg, }, ) => { let box_source = active_alloc.next_box_source(); let input = alloc::types::PairInput { llvm_head: fcx.regs[head_reg], llvm_rest: fcx.regs[rest_reg], llvm_list_len: fcx.regs[list_len_reg], }; let llvm_value = alloc::types::gen_alloc_boxed_pair( tcx, fcx.builder, active_alloc, box_source, &input, ); fcx.regs.insert(*reg, llvm_value); } OpKind::AllocBoxedFunThunk( reg, BoxFunThunkOp { captures_reg, callee, }, ) => { let box_source = active_alloc.next_box_source(); let input = alloc::types::FunThunkInput { llvm_captures: fcx.regs[captures_reg], llvm_entry_point: gen_callee_entry_point(tcx, mcx, fcx, callee), }; let llvm_value = alloc::types::gen_alloc_boxed_fun_thunk( tcx, fcx.builder, active_alloc, box_source, &input, ); fcx.regs.insert(*reg, llvm_value); } OpKind::IntCompare( reg, CompareOp { comparison, lhs_reg, rhs_reg, }, ) => { let reg_name = if comparison == &Comparison::Eq { "int_equal\0" } else { "int_compare\0" }; gen_int_compare(fcx, *reg, *comparison, *lhs_reg, *rhs_reg, reg_name) } OpKind::BoolEqual(reg, BinaryOp { lhs_reg, rhs_reg }) => gen_int_compare( fcx, *reg, Comparison::Eq, *lhs_reg, *rhs_reg, "bool_equal\0", ), OpKind::CharEqual(reg, BinaryOp { lhs_reg, rhs_reg }) => gen_int_compare( fcx, *reg, Comparison::Eq, *lhs_reg, *rhs_reg, "char_equal\0", ), OpKind::InternedSymEqual(reg, BinaryOp { lhs_reg, rhs_reg }) => gen_int_compare( fcx, *reg, Comparison::Eq, *lhs_reg, *rhs_reg, "interned_sym_equal\0", ), OpKind::TypeTagEqual(reg, BinaryOp { lhs_reg, rhs_reg }) => gen_int_compare( fcx, *reg, Comparison::Eq, *lhs_reg, *rhs_reg, "type_tag_equal\0", ), OpKind::RecordClassIdEqual(reg, BinaryOp { lhs_reg, rhs_reg }) => gen_int_compare( fcx, *reg, Comparison::Eq, *lhs_reg, *rhs_reg, "record_class_id_equal\0", ), OpKind::FloatCompare( reg, CompareOp { comparison, lhs_reg, rhs_reg, }, ) => { let llvm_lhs = fcx.regs[lhs_reg]; let llvm_rhs = fcx.regs[rhs_reg]; let reg_name = if comparison == &Comparison::Eq { "float_equal\0" } else { "float_compare\0" }; let llvm_value = LLVMBuildFCmp( fcx.builder, comparison_to_llvm_real_pred(*comparison), llvm_lhs, llvm_rhs, reg_name.as_ptr() as *const _, ); fcx.regs.insert(*reg, llvm_value); } OpKind::BoxIdentical(reg, BinaryOp { lhs_reg, rhs_reg }) => { let llvm_lhs = fcx.regs[lhs_reg]; let llvm_rhs = fcx.regs[rhs_reg]; let llvm_i64 = LLVMInt64TypeInContext(tcx.llx); let i64_lhs = LLVMBuildPtrToInt(fcx.builder, llvm_lhs, llvm_i64, libcstr!("lhs_as_int")); let i64_rhs = LLVMBuildPtrToInt(fcx.builder, llvm_rhs, llvm_i64, libcstr!("rhs_as_int")); let llvm_value = LLVMBuildICmp( fcx.builder, LLVMIntPredicate::LLVMIntEQ, i64_lhs, i64_rhs, libcstr!("box_identical"), ); fcx.regs.insert(*reg, llvm_value); } OpKind::Int64ToFloat(reg, int64_reg) => { let llvm_i64 = fcx.regs[int64_reg]; let llvm_double = LLVMBuildSIToFP( fcx.builder, llvm_i64, LLVMDoubleTypeInContext(tcx.llx), libcstr!("i64_as_double"), ); fcx.regs.insert(*reg, llvm_double); } OpKind::FloatAdd(reg, BinaryOp { lhs_reg, rhs_reg }) => { let llvm_lhs = fcx.regs[lhs_reg]; let llvm_rhs = fcx.regs[rhs_reg]; let llvm_value = LLVMBuildFAdd(fcx.builder, llvm_lhs, llvm_rhs, libcstr!("float_sum")); fcx.regs.insert(*reg, llvm_value); } OpKind::Int64Add(reg, BinaryOp { lhs_reg, rhs_reg }) => { let llvm_lhs = fcx.regs[lhs_reg]; let llvm_rhs = fcx.regs[rhs_reg]; let llvm_value = LLVMBuildNUWAdd(fcx.builder, llvm_lhs, llvm_rhs, libcstr!("sum")); fcx.regs.insert(*reg, llvm_value); } OpKind::Int64CheckedAdd(reg, BinaryOp { lhs_reg, rhs_reg }) => { let llvm_lhs = fcx.regs[lhs_reg]; let llvm_rhs = fcx.regs[rhs_reg]; let llvm_value = math_gen::gen_checked_int_math( tcx, mcx, fcx, &math_gen::CHECKED_ADD, llvm_lhs, llvm_rhs, ); fcx.regs.insert(*reg, llvm_value); } OpKind::FloatMul(reg, BinaryOp { lhs_reg, rhs_reg }) => { let llvm_lhs = fcx.regs[lhs_reg]; let llvm_rhs = fcx.regs[rhs_reg]; let llvm_value = LLVMBuildFMul(fcx.builder, llvm_lhs, llvm_rhs, libcstr!("float_product")); fcx.regs.insert(*reg, llvm_value); } OpKind::Int64CheckedMul(reg, BinaryOp { lhs_reg, rhs_reg }) => { let llvm_lhs = fcx.regs[lhs_reg]; let llvm_rhs = fcx.regs[rhs_reg]; let llvm_value = math_gen::gen_checked_int_math( tcx, mcx, fcx, &math_gen::CHECKED_MUL, llvm_lhs, llvm_rhs, ); fcx.regs.insert(*reg, llvm_value); } OpKind::FloatSub(reg, BinaryOp { lhs_reg, rhs_reg }) => { let llvm_lhs = fcx.regs[lhs_reg]; let llvm_rhs = fcx.regs[rhs_reg]; let llvm_value = LLVMBuildFSub( fcx.builder, llvm_lhs, llvm_rhs, libcstr!("float_difference"), ); fcx.regs.insert(*reg, llvm_value); } OpKind::Int64CheckedSub(reg, BinaryOp { lhs_reg, rhs_reg }) => { let llvm_lhs = fcx.regs[lhs_reg]; let llvm_rhs = fcx.regs[rhs_reg]; let llvm_value = math_gen::gen_checked_int_math( tcx, mcx, fcx, &math_gen::CHECKED_SUB, llvm_lhs, llvm_rhs, ); fcx.regs.insert(*reg, llvm_value); } OpKind::FloatDiv(reg, BinaryOp { lhs_reg, rhs_reg }) => { let llvm_lhs = fcx.regs[lhs_reg]; let llvm_rhs = fcx.regs[rhs_reg]; let llvm_value = LLVMBuildFDiv(fcx.builder, llvm_lhs, llvm_rhs, libcstr!("float_quotient")); fcx.regs.insert(*reg, llvm_value); } OpKind::Int64Div(reg, BinaryOp { lhs_reg, rhs_reg }) => { let llvm_numer = fcx.regs[lhs_reg]; let llvm_denom = fcx.regs[rhs_reg]; let llvm_value = LLVMBuildSDiv(fcx.builder, llvm_numer, llvm_denom, libcstr!("quot")); fcx.regs.insert(*reg, llvm_value); } OpKind::Int64CheckedDiv(reg, BinaryOp { lhs_reg, rhs_reg }) => { let llvm_numer = fcx.regs[lhs_reg]; let llvm_denom = fcx.regs[rhs_reg]; let llvm_value = math_gen::gen_checked_int_div(tcx, mcx, fcx, llvm_numer, llvm_denom); fcx.regs.insert(*reg, llvm_value); } OpKind::Int64Rem(reg, BinaryOp { lhs_reg, rhs_reg }) => { let llvm_numer = fcx.regs[lhs_reg]; let llvm_denom = fcx.regs[rhs_reg]; let llvm_value = LLVMBuildSRem(fcx.builder, llvm_numer, llvm_denom, libcstr!("rem")); fcx.regs.insert(*reg, llvm_value); } OpKind::Int64CheckedRem(reg, BinaryOp { lhs_reg, rhs_reg }) => { let llvm_numer = fcx.regs[lhs_reg]; let llvm_denom = fcx.regs[rhs_reg]; let llvm_value = math_gen::gen_checked_int_rem(tcx, mcx, fcx, llvm_numer, llvm_denom); fcx.regs.insert(*reg, llvm_value); } OpKind::FloatSqrt(reg, radicand) => { let llvm_radicand = fcx.regs[radicand]; let llvm_value = math_gen::gen_float_sqrt(tcx, mcx, fcx, llvm_radicand); fcx.regs.insert(*reg, llvm_value); } OpKind::Int64BitwiseAnd(reg, BinaryOp { lhs_reg, rhs_reg }) => { let llvm_lhs = fcx.regs[lhs_reg]; let llvm_rhs = fcx.regs[rhs_reg]; let llvm_value = LLVMBuildAnd(fcx.builder, llvm_lhs, llvm_rhs, libcstr!("int_and")); fcx.regs.insert(*reg, llvm_value); } OpKind::Int64BitwiseOr(reg, BinaryOp { lhs_reg, rhs_reg }) => { let llvm_lhs = fcx.regs[lhs_reg]; let llvm_rhs = fcx.regs[rhs_reg]; let llvm_value = LLVMBuildOr(fcx.builder, llvm_lhs, llvm_rhs, libcstr!("int_or")); fcx.regs.insert(*reg, llvm_value); } OpKind::Int64BitwiseXor(reg, BinaryOp { lhs_reg, rhs_reg }) => { let llvm_lhs = fcx.regs[lhs_reg]; let llvm_rhs = fcx.regs[rhs_reg]; let llvm_value = LLVMBuildXor(fcx.builder, llvm_lhs, llvm_rhs, libcstr!("int_xor")); fcx.regs.insert(*reg, llvm_value); } OpKind::Int64BitwiseNot(reg, int_reg) => { let llvm_int = fcx.regs[int_reg]; let llvm_value = LLVMBuildNot(fcx.builder, llvm_int, libcstr!("int_not")); fcx.regs.insert(*reg, llvm_value); } OpKind::Int64ShiftLeft(reg, ShiftOp { int_reg, bit_count }) => { let llvm_int = fcx.regs[int_reg]; let llvm_value = LLVMBuildShl( fcx.builder, llvm_int, LLVMConstInt(LLVMInt64TypeInContext(tcx.llx), *bit_count as u64, 0), libcstr!("int_shl"), ); fcx.regs.insert(*reg, llvm_value); } OpKind::Int64ArithmeticShiftRight(reg, ShiftOp { int_reg, bit_count }) => { let llvm_int = fcx.regs[int_reg]; let llvm_value = LLVMBuildAShr( fcx.builder, llvm_int, LLVMConstInt(LLVMInt64TypeInContext(tcx.llx), *bit_count as u64, 0), libcstr!("int_ashr"), ); fcx.regs.insert(*reg, llvm_value); } OpKind::Int64LogicalShiftRight(reg, ShiftOp { int_reg, bit_count }) => { let llvm_int = fcx.regs[int_reg]; let llvm_value = LLVMBuildLShr( fcx.builder, llvm_int, LLVMConstInt(LLVMInt64TypeInContext(tcx.llx), *bit_count as u64, 0), libcstr!("int_lshr"), ); fcx.regs.insert(*reg, llvm_value); } OpKind::MakeCallback( reg, MakeCallbackOp { callee, captures_reg, }, ) => { let llvm_captures = fcx.regs[captures_reg]; let llvm_entry_point = gen_callee_entry_point(tcx, mcx, fcx, callee); let entry_point_llvm_type = LLVMTypeOf(llvm_entry_point); let callback_type = tcx.callback_llvm_type(entry_point_llvm_type); let llvm_undef = LLVMGetUndef(callback_type); let llvm_with_captures = LLVMBuildInsertValue( fcx.builder, llvm_undef, llvm_captures, 0, libcstr!("captures"), ); let llvm_callback = LLVMBuildInsertValue( fcx.builder, llvm_with_captures, llvm_entry_point, 1, libcstr!("callback"), ); fcx.regs.insert(*reg, llvm_callback); } OpKind::ConstBoxedRecord( reg, BoxRecordOp { record_struct, field_regs, }, ) => { let llvm_fields: Box<[LLVMValueRef]> = field_regs .iter() .map(|field_reg| fcx.regs[field_reg]) .collect(); let llvm_value = const_gen::gen_boxed_record(tcx, mcx, record_struct, &llvm_fields); fcx.regs.insert(*reg, llvm_value); } OpKind::AllocBoxedRecord( reg, BoxRecordOp { record_struct, field_regs, }, ) => { let box_source = active_alloc.next_box_source(); let llvm_fields = field_regs .iter() .map(|field_reg| fcx.regs[field_reg]) .collect(); let input = alloc::types::RecordInput { record_struct, llvm_fields, }; let llvm_value = alloc::types::gen_alloc_boxed_record( tcx, mcx, fcx.builder, active_alloc, box_source, &input, ); fcx.regs.insert(*reg, llvm_value); } } } } fn gen_cond_branch( tcx: &mut TargetCtx, mcx: &mut ModCtx<'_, '_, '_>, fcx: &mut FunCtx, block: LLVMBasicBlockRef, alloc_plan: Vec>, cont_block: LLVMBasicBlockRef, ) { unsafe { LLVMPositionBuilderAtEnd(fcx.builder, block); // We can't branch if we terminated let will_terminate = alloc_plan .last() .and_then(|alloc_atom| alloc_atom.ops().last()) .filter(|op| op.kind().is_terminator()) .is_some(); for alloc_atom in alloc_plan { gen_alloc_atom(tcx, mcx, fcx, alloc_atom); } if !will_terminate { LLVMBuildBr(fcx.builder, cont_block); } } } fn gen_cond( tcx: &mut TargetCtx, mcx: &mut ModCtx<'_, '_, '_>, fcx: &mut FunCtx, cond_op: &CondOp, cond_alloc_plan: alloc::CondPlan<'_>, ) { let CondOp { reg_phi, test_reg, .. } = cond_op; let alloc::CondPlan { true_subplan: true_alloc_subplan, false_subplan: false_alloc_subplan, } = cond_alloc_plan; unsafe { let true_block = LLVMAppendBasicBlockInContext(tcx.llx, fcx.function, libcstr!("cond_true")); let false_block = LLVMAppendBasicBlockInContext(tcx.llx, fcx.function, libcstr!("cond_false")); let cont_block = LLVMAppendBasicBlockInContext(tcx.llx, fcx.function, libcstr!("cond_cont")); let test_llvm = fcx.regs[test_reg]; LLVMBuildCondBr(fcx.builder, test_llvm, true_block, false_block); gen_cond_branch(tcx, mcx, fcx, true_block, true_alloc_subplan, cont_block); let mut final_true_block = LLVMGetInsertBlock(fcx.builder); gen_cond_branch(tcx, mcx, fcx, false_block, false_alloc_subplan, cont_block); let mut final_false_block = LLVMGetInsertBlock(fcx.builder); LLVMPositionBuilderAtEnd(fcx.builder, cont_block); if let Some(RegPhi { output_reg, true_result_reg, false_result_reg, }) = reg_phi { let mut true_result_llvm = fcx.regs[true_result_reg]; let mut false_result_llvm = fcx.regs[false_result_reg]; let phi_value = LLVMBuildPhi( fcx.builder, LLVMTypeOf(true_result_llvm), libcstr!("cond_phi"), ); LLVMAddIncoming( phi_value, &mut true_result_llvm as *mut _, &mut final_true_block as *mut _, 1, ); LLVMAddIncoming( phi_value, &mut false_result_llvm as *mut _, &mut final_false_block as *mut _, 1, ); fcx.regs.insert(*output_reg, phi_value); } } } fn gen_callee_entry_point( tcx: &mut TargetCtx, mcx: &mut ModCtx<'_, '_, '_>, fcx: &mut FunCtx, callee: &Callee, ) -> LLVMValueRef { use crate::codegen::callee::*; match callee { Callee::PrivateFun(private_fun_id) => mcx.llvm_private_fun(*private_fun_id), Callee::BoxedFunThunk(fun_thunk_reg) => { let llvm_fun_thunk = fcx.regs[fun_thunk_reg]; gen_boxed_fun_thunk_entry_point(fcx.builder, llvm_fun_thunk) } Callee::StaticSymbol(static_symbol) => { gen_static_symbol_entry_point(tcx, mcx, static_symbol) } } } pub(crate) fn gen_alloc_atom( tcx: &mut TargetCtx, mcx: &mut ModCtx<'_, '_, '_>, fcx: &mut FunCtx, alloc_atom: alloc::AllocAtom<'_>, ) { let ops = alloc_atom.ops(); let mut active_alloc = alloc::core::atom_into_active_alloc(tcx, mcx, fcx.builder, fcx.current_task, alloc_atom); for op in ops { gen_op(tcx, mcx, fcx, &mut active_alloc, op); } assert!( active_alloc.is_empty(), "did not consume entire active heap allocation" ); } ================================================ FILE: compiler/codegen/panic_gen.rs ================================================ use llvm_sys::core::*; use llvm_sys::LLVMAttributeFunctionIndex; use crate::codegen::const_gen::annotate_private_global; use crate::codegen::fun_gen::FunCtx; use crate::codegen::mod_gen::ModCtx; use crate::codegen::target_gen::TargetCtx; use crate::libcstr; pub(crate) fn gen_panic( tcx: &mut TargetCtx, mcx: &mut ModCtx<'_, '_, '_>, fcx: &mut FunCtx, message: &str, ) { unsafe { let llvm_i8 = LLVMInt8TypeInContext(tcx.llx); let llvm_i32 = LLVMInt32TypeInContext(tcx.llx); let llvm_param_types = &mut [ tcx.task_llvm_ptr_type(), LLVMPointerType(llvm_i8, 0), llvm_i32, ]; let panic_with_string_llvm_type = LLVMFunctionType( LLVMVoidTypeInContext(tcx.llx), llvm_param_types.as_mut_ptr(), llvm_param_types.len() as u32, 0, ); let panic_with_string_fun = mcx.get_function_or_insert( panic_with_string_llvm_type, b"arret_runtime_panic_with_string\0", |panic_with_string_fun| { LLVMAddAttributeAtIndex( panic_with_string_fun, LLVMAttributeFunctionIndex, tcx.llvm_enum_attr_for_name("cold", 0), ); LLVMAddAttributeAtIndex( panic_with_string_fun, LLVMAttributeFunctionIndex, tcx.llvm_enum_attr_for_name("noreturn", 0), ); }, ); let llvm_message_string = LLVMConstStringInContext(tcx.llx, message.as_ptr() as *mut _, message.len() as u32, 1); let llvm_message_global = LLVMAddGlobal( mcx.module, LLVMTypeOf(llvm_message_string), libcstr!("panic_message"), ); LLVMSetInitializer(llvm_message_global, llvm_message_string); annotate_private_global(llvm_message_global); let llvm_first_byte_gep_indices = &mut [LLVMConstInt(llvm_i32, 0, 0), LLVMConstInt(llvm_i32, 0, 0)]; let message_pointer = LLVMConstInBoundsGEP( llvm_message_global, llvm_first_byte_gep_indices.as_mut_ptr(), llvm_first_byte_gep_indices.len() as u32, ); let panic_with_string_args = &mut [ fcx.current_task, message_pointer, LLVMConstInt(llvm_i32, message.len() as u64, 0), ]; LLVMBuildCall( fcx.builder, panic_with_string_fun, panic_with_string_args.as_mut_ptr(), panic_with_string_args.len() as u32, libcstr!(""), ); LLVMBuildUnreachable(fcx.builder); } } ================================================ FILE: compiler/codegen/program.rs ================================================ use std::collections::HashMap; use std::ffi::{CStr, CString}; use std::sync::Arc; use std::{env, fs, io, path, process, ptr}; use llvm_sys::core::*; use llvm_sys::prelude::*; use llvm_sys::target_machine::*; use llvm_sys::LLVMLinkage; use crate::codegen::analysis::AnalysedMod; use crate::codegen::mod_gen::{gen_mod, GeneratedMod}; use crate::codegen::target_gen::TargetCtx; use crate::context::LinkedLibrary; use crate::libcstr; use crate::mir; use crate::SourceLoader; #[derive(Copy, Clone, PartialEq)] pub enum OutputType { None, LlvmIr, Assembly, Object, Executable, } #[derive(Copy, Clone, PartialEq)] pub struct Options<'target> { target_triple: Option<&'target str>, output_type: OutputType, llvm_opt: bool, } impl<'target> Options<'target> { pub fn new() -> Options<'static> { Options { target_triple: None, output_type: OutputType::Executable, llvm_opt: true, } } pub fn with_target_triple(self, target_triple: Option<&'target str>) -> Options<'target> { Options { target_triple, ..self } } pub fn with_llvm_opt(self, llvm_opt: bool) -> Options<'target> { Options { llvm_opt, ..self } } pub fn with_output_type(self, output_type: OutputType) -> Options<'target> { Options { output_type, ..self } } pub fn output_type(&self) -> OutputType { self.output_type } } impl Default for Options<'static> { fn default() -> Options<'static> { Options::new() } } fn arret_main_llvm_type(tcx: &mut TargetCtx) -> LLVMTypeRef { unsafe { let llvm_arg_types = &mut [tcx.task_llvm_ptr_type()]; LLVMFunctionType( LLVMVoidTypeInContext(tcx.llx), llvm_arg_types.as_mut_ptr(), llvm_arg_types.len() as u32, 0, ) } } fn c_main_llvm_type(tcx: &mut TargetCtx) -> LLVMTypeRef { unsafe { let llvm_argc_type = LLVMInt32TypeInContext(tcx.llx); let llvm_argv_type = LLVMPointerType(LLVMPointerType(LLVMInt8TypeInContext(tcx.llx), 0), 0); let llvm_ret_type = LLVMInt32TypeInContext(tcx.llx); let llvm_arg_types = &mut [llvm_argc_type, llvm_argv_type]; LLVMFunctionType( llvm_ret_type, llvm_arg_types.as_mut_ptr(), llvm_arg_types.len() as u32, 0, ) } } fn program_to_module( tcx: &mut TargetCtx, program: &mir::BuiltProgram, debug_source_loader: Option<&SourceLoader>, ) -> LLVMModuleRef { unsafe { let analysed_mod = AnalysedMod::new(&program.private_funs, &program.main); // Build our Arret funs let GeneratedMod { llvm_module, llvm_entry_fun: llvm_arret_main, llvm_global_interned_names, llvm_classmap_classes, } = gen_mod( tcx, b"program\0", &analysed_mod, None, HashMap::new(), debug_source_loader, ); LLVMSetLinkage(llvm_arret_main, LLVMLinkage::LLVMPrivateLinkage); // Declare our C main let builder = LLVMCreateBuilderInContext(tcx.llx); let c_main = LLVMAddFunction(llvm_module, libcstr!("main"), c_main_llvm_type(tcx)); let bb = LLVMAppendBasicBlockInContext(tcx.llx, c_main, libcstr!("entry")); LLVMPositionBuilderAtEnd(builder, bb); let classmap_class_ptr_type = LLVMPointerType(tcx.classmap_class_llvm_type(), 0); // Declare arret_runtime_launch_task let launch_task_llvm_arg_types = &mut [ LLVMTypeOf(llvm_global_interned_names), classmap_class_ptr_type, LLVMPointerType(arret_main_llvm_type(tcx), 0), ]; let launch_task_llvm_type = LLVMFunctionType( LLVMVoidTypeInContext(tcx.llx), launch_task_llvm_arg_types.as_mut_ptr(), launch_task_llvm_arg_types.len() as u32, 0, ); // And launch the task from C main let launch_task_llvm_fun = LLVMAddFunction( llvm_module, libcstr!("arret_runtime_launch_task"), launch_task_llvm_type, ); let launch_task_llvm_args = &mut [ llvm_global_interned_names, llvm_classmap_classes, llvm_arret_main, ]; LLVMBuildCall( builder, launch_task_llvm_fun, launch_task_llvm_args.as_mut_ptr(), launch_task_llvm_args.len() as u32, libcstr!(""), ); LLVMBuildRet(builder, LLVMConstInt(LLVMInt32TypeInContext(tcx.llx), 0, 0)); LLVMDisposeBuilder(builder); llvm_module } } fn target_triple_to_cc_args(target_triple: &str) -> Vec<&str> { // Try to use -m32 when possible for compatibility with GCC if (cfg!(target_arch = "x86_64") && target_triple.starts_with("i686-")) || (cfg!(target_arch = "aarch64") && target_triple.starts_with("arm")) { vec!["-m32"] } else { vec!["-target", target_triple] } } /// Generates code for the program with the given output type /// /// `codegen::initialise_llvm()` must be called before this. pub fn gen_program( options: Options<'_>, linked_libraries: &[Arc], program: &mir::BuiltProgram, output_file: &path::Path, debug_source_loader: Option<&SourceLoader>, ) { use crate::codegen::target_machine::create_target_machine; if env::var_os("ARRET_DUMP_MIR").is_some() { mir::print_program(&mut io::stdout().lock(), program, debug_source_loader).unwrap(); } let Options { target_triple, output_type, llvm_opt, } = options; let llvm_output_path = if output_type == OutputType::Executable { // When outputting an executable this is an intermediate file that we pass to our linker output_file.with_extension("o") } else { // Otherwise this is the final destination output_file.to_owned() }; let llvm_output_path_cstring = CString::new(llvm_output_path.to_str().unwrap()).unwrap(); let target_machine = create_target_machine( target_triple, LLVMRelocMode::LLVMRelocDynamicNoPic, LLVMCodeModel::LLVMCodeModelDefault, ); let mut tcx = TargetCtx::new(target_machine, llvm_opt); let module = program_to_module(&mut tcx, program, debug_source_loader); tcx.finish_module(module); unsafe { let mut error: *mut libc::c_char = ptr::null_mut(); let llvm_code_gen_file_type = match output_type { OutputType::None => { LLVMDisposeTargetMachine(target_machine); return; } OutputType::LlvmIr => { if LLVMPrintModuleToFile( module, llvm_output_path_cstring.as_ptr() as *mut _, &mut error as *mut _, ) != 0 { panic!( "LLVMPrintModuleToFile: {}", CStr::from_ptr(error).to_str().unwrap() ); } return; } OutputType::Assembly => LLVMCodeGenFileType::LLVMAssemblyFile, OutputType::Object | OutputType::Executable => LLVMCodeGenFileType::LLVMObjectFile, }; if LLVMTargetMachineEmitToFile( target_machine, module, llvm_output_path_cstring.as_ptr() as *mut _, llvm_code_gen_file_type, &mut error as *mut _, ) != 0 { panic!( "LLVMTargetMachineEmitToFile: {}", CStr::from_ptr(error).to_str().unwrap() ); } LLVMDisposeTargetMachine(target_machine); } if output_type == OutputType::Executable { let target_args = match target_triple { Some(triple) => target_triple_to_cc_args(triple), None => vec![], }; let status = process::Command::new("cc") .arg(llvm_output_path.clone()) .args(target_args) .arg("-o") .arg(output_file) .args(linked_libraries.iter().map(|l| l.target_path())) .arg("-pthread") .arg("-ldl") .arg("-lm") .status() .unwrap(); let _ = fs::remove_file(llvm_output_path); if !status.success() { panic!("Error invoking linker"); } } } ================================================ FILE: compiler/codegen/range_md.rs ================================================ use std::iter; use std::ops::Range; use llvm_sys::core::*; use llvm_sys::prelude::*; type Int = i64; struct IntRangeIter where I: Iterator, { inner_iter: iter::Peekable, } impl Iterator for IntRangeIter where I: Iterator, { type Item = Range; fn next(&mut self) -> Option> { let range_start = self.inner_iter.next()?; let mut range_length: Int = 1; while self.inner_iter.peek() == Some(&(range_start + range_length)) { self.inner_iter.next(); range_length += 1; } Some(range_start..range_start + (range_length as Int)) } } /// Finds ranges of consecutive integers from a sorted iterator fn find_int_ranges(input: impl Iterator) -> impl Iterator> { IntRangeIter { inner_iter: input.peekable(), } } /// Generates a range metadata node from a sorted iterator of possible values /// /// This does not handle minimum or maximum values of `Int` correctly! pub fn int_range_md_node( llx: LLVMContextRef, llvm_int_type: LLVMTypeRef, input: impl Iterator, ) -> LLVMMetadataRef { unsafe { let mut llvm_range_values: Vec = find_int_ranges(input) .flat_map(|range| iter::once(range.start).chain(iter::once(range.end))) .map(|value| LLVMConstInt(llvm_int_type, value as u64, 0)) .map(|value| LLVMValueAsMetadata(value)) .collect(); LLVMMDNodeInContext2(llx, llvm_range_values.as_mut_ptr(), llvm_range_values.len()) } } #[cfg(test)] mod test { use super::*; #[test] fn empty() { let iter = find_int_ranges([].iter().cloned()); assert_eq!(0, iter.count()); } #[test] fn single_value() { let mut iter = find_int_ranges([-5].iter().cloned()); assert_eq!(Some(-5..-4), iter.next()); assert_eq!(None, iter.next()); } #[test] fn single_range() { let mut iter = find_int_ranges([-1, 0, 1].iter().cloned()); assert_eq!(Some(-1..2), iter.next()); assert_eq!(None, iter.next()); } #[test] fn multi_range() { let mut iter = find_int_ranges([-5, -1, 0, 1, 90, 91, 92].iter().cloned()); assert_eq!(Some(-5..-4), iter.next()); assert_eq!(Some(-1..2), iter.next()); assert_eq!(Some(90..93), iter.next()); assert_eq!(None, iter.next()); } } ================================================ FILE: compiler/codegen/record_struct.rs ================================================ use std::alloc; use llvm_sys::core::*; use llvm_sys::prelude::*; use llvm_sys::target::*; use arret_runtime::boxed; use arret_runtime::class_map; use crate::codegen::const_gen::annotate_private_global; use crate::codegen::target_gen::TargetCtx; use crate::libcstr; use crate::mir::ops; pub const IS_INLINE_INDEX: u32 = 1; pub const CONTAINS_GC_REFS_INDEX: u32 = 2; pub const RECORD_CLASS_ID_INDEX: u32 = 3; pub const DATA_INDEX: u32 = 4; pub const EXTERNAL_COMPACT_LAYOUT_INDEX: u32 = 5; /// Adds internal member fields common to all inline and external records pub fn append_common_internal_members(tcx: &mut TargetCtx, members: &mut Vec) { unsafe { members.extend_from_slice(&[ // is_inline LLVMInt8TypeInContext(tcx.llx), // may_contain_gc_refs LLVMInt8TypeInContext(tcx.llx), // record_class_id tcx.record_class_id_llvm_type(), ]); } } #[derive(Clone)] pub struct TargetRecordStruct { pub data_layout: Option, pub record_storage: boxed::RecordStorage, pub llvm_data_type: LLVMTypeRef, pub classmap_class: class_map::BoxedClass, } impl TargetRecordStruct { pub fn from_mir_record_struct( tcx: &mut TargetCtx, record_struct: &ops::RecordStructId, ) -> Self { let mut members: Box<[LLVMTypeRef]> = record_struct .field_abi_types .iter() .map(|abi_type| tcx.abi_to_llvm_type(abi_type)) .collect(); let record_data_name = format!("{}_data\0", record_struct.source_name); unsafe { let llvm_data_type = LLVMStructCreateNamed(tcx.llx, record_data_name.as_ptr() as *const _); LLVMStructSetBody( llvm_data_type, members.as_mut_ptr(), members.len() as u32, 0, ); let data_layout = if members.is_empty() { None } else { // Convert our LLVM layout information to Rust's `std::alloc::Layout` let align = LLVMABIAlignmentOfType(tcx.target_data(), llvm_data_type) as usize; let size = LLVMABISizeOfType(tcx.target_data(), llvm_data_type) as usize; Some(alloc::Layout::from_size_align_unchecked(size, align)) }; let record_storage = boxed::Record::storage_for_data_layout(data_layout); let classmap_class = class_map::BoxedClass::from_fields( record_struct .field_abi_types .iter() .enumerate() .map(|(index, field_abi_type)| { let field_type = class_map::FieldType::from_abi_type(field_abi_type); let offset = LLVMOffsetOfElement(tcx.target_data(), llvm_data_type, index as u32) as usize; class_map::Field::new(field_type, offset) }), ); Self { data_layout, record_storage, llvm_data_type, classmap_class, } } } } pub fn gen_classmap_classes( tcx: &mut TargetCtx, llvm_module: LLVMModuleRef, record_structs: &[ops::RecordStructId], ) -> LLVMValueRef { if record_structs.is_empty() { return unsafe { LLVMConstPointerNull(LLVMPointerType(tcx.classmap_class_llvm_type(), 0)) }; } let llvm_classmap_field_type = tcx.classmap_field_llvm_type(); let llvm_i8 = unsafe { LLVMInt8TypeInContext(tcx.llx) }; let llvm_i32 = unsafe { LLVMInt32TypeInContext(tcx.llx) }; let llvm_first_element_gep_indices = unsafe { &mut [LLVMConstInt(llvm_i32, 0, 0), LLVMConstInt(llvm_i32, 0, 0)] }; let mut llvm_classmap_classes: Vec = record_structs .iter() .map(|record_struct| { let classmap_class = tcx .target_record_struct(record_struct) .classmap_class .as_ref(); if classmap_class.is_empty() { return unsafe { LLVMConstPointerNull(LLVMPointerType(llvm_classmap_field_type, 0)) }; } let mut llvm_classmap_class_fields: Vec = classmap_class .field_iter() .map(|field| unsafe { // This is the layout of `class_map::Field` let llvm_offset = LLVMConstInt(llvm_i32, field.offset() as u64, 0); let llvm_field_type = LLVMConstInt(llvm_i8, field.field_type() as u64, 0); let llvm_is_last = LLVMConstInt(llvm_i8, field.is_last() as u64, 0); let members = &mut [llvm_offset, llvm_field_type, llvm_is_last]; LLVMConstNamedStruct( llvm_classmap_field_type, members.as_mut_ptr(), members.len() as u32, ) }) .collect(); unsafe { let llvm_classmap_class = LLVMConstArray( llvm_classmap_field_type, llvm_classmap_class_fields.as_mut_ptr(), llvm_classmap_class_fields.len() as u32, ); let classmap_class_global_name = format!("{}_classmap\0", record_struct.source_name); let llvm_classmap_class_global = LLVMAddGlobal( llvm_module, LLVMTypeOf(llvm_classmap_class), classmap_class_global_name.as_ptr() as *const _, ); LLVMSetInitializer(llvm_classmap_class_global, llvm_classmap_class); annotate_private_global(llvm_classmap_class_global); LLVMConstInBoundsGEP( llvm_classmap_class_global, llvm_first_element_gep_indices.as_mut_ptr(), llvm_first_element_gep_indices.len() as u32, ) } }) .collect(); unsafe { let llvm_classmap = LLVMConstArray( LLVMPointerType(llvm_classmap_field_type, 0), llvm_classmap_classes.as_mut_ptr(), llvm_classmap_classes.len() as u32, ); let llvm_classmap_global = LLVMAddGlobal( llvm_module, LLVMTypeOf(llvm_classmap), libcstr!("classmap_classes"), ); LLVMSetInitializer(llvm_classmap_global, llvm_classmap); annotate_private_global(llvm_classmap_global); LLVMConstInBoundsGEP( llvm_classmap_global, llvm_first_element_gep_indices.as_mut_ptr(), llvm_first_element_gep_indices.len() as u32, ) } } pub fn gen_record_field_ptr( tcx: &TargetCtx, builder: LLVMBuilderRef, record_storage: boxed::RecordStorage, llvm_boxed_record: LLVMValueRef, field_index: usize, pointer_name: &[u8], ) -> LLVMValueRef { unsafe { let llvm_i32 = LLVMInt32TypeInContext(tcx.llx); match record_storage { boxed::RecordStorage::Inline(_) => { let field_gep_indices = &mut [ LLVMConstInt(llvm_i32, 0, 0), LLVMConstInt(llvm_i32, u64::from(DATA_INDEX), 0), LLVMConstInt(llvm_i32, field_index as u64, 0), ]; LLVMBuildInBoundsGEP( builder, llvm_boxed_record, field_gep_indices.as_mut_ptr(), field_gep_indices.len() as u32, pointer_name.as_ptr() as *const _, ) } boxed::RecordStorage::External => { let data_ptr_gep_indices = &mut [ LLVMConstInt(llvm_i32, 0, 0), LLVMConstInt(llvm_i32, u64::from(DATA_INDEX), 0), ]; let llvm_record_data_ptr_ptr = LLVMBuildInBoundsGEP( builder, llvm_boxed_record, data_ptr_gep_indices.as_mut_ptr(), data_ptr_gep_indices.len() as u32, libcstr!("record_data_ptr_ptr"), ); let llvm_record_data_ptr = LLVMBuildLoad( builder, llvm_record_data_ptr_ptr, libcstr!("record_data_ptr"), ); tcx.add_invariant_load_metadata(llvm_record_data_ptr); let field_gep_indices = &mut [ LLVMConstInt(llvm_i32, 0, 0), LLVMConstInt(llvm_i32, field_index as u64, 0), ]; LLVMBuildInBoundsGEP( builder, llvm_record_data_ptr, field_gep_indices.as_mut_ptr(), field_gep_indices.len() as u32, pointer_name.as_ptr() as *const _, ) } } } } ================================================ FILE: compiler/codegen/target_gen.rs ================================================ use std::collections::HashMap; use llvm_sys::core::*; use llvm_sys::prelude::*; use llvm_sys::target::*; use llvm_sys::target_machine::*; use llvm_sys::{LLVMAttributeReturnIndex, LLVMLinkage}; use arret_runtime::abitype::{AbiType, BoxedAbiType, RetAbiType}; use arret_runtime::boxed; use arret_runtime::callback::EntryPointAbiType as CallbackEntryPointAbiType; use crate::codegen::box_layout::BoxLayout; use crate::codegen::record_struct; use crate::codegen::GenAbi; use crate::libcstr; use crate::mir::ops; fn llvm_enum_attr_for_name( llx: LLVMContextRef, attr_name: &str, attr_value: u64, ) -> LLVMAttributeRef { unsafe { let kind_id = LLVMGetEnumAttributeKindForName(attr_name.as_ptr() as *const _, attr_name.len()); LLVMCreateEnumAttribute(llx, kind_id, attr_value) } } fn llvm_md_kind_id_for_name(llx: LLVMContextRef, md_name: &str) -> u32 { unsafe { LLVMGetMDKindIDInContext(llx, md_name.as_ptr() as *const _, md_name.len() as u32) } } fn llvm_i64_md_node(llx: LLVMContextRef, values: &[u64]) -> LLVMMetadataRef { unsafe { let llvm_i64 = LLVMInt64TypeInContext(llx); let mut node_values: Vec = values .iter() .map(|value| LLVMConstInt(llvm_i64, *value as u64, 0)) .map(|value| LLVMValueAsMetadata(value)) .collect(); LLVMMDNodeInContext2(llx, node_values.as_mut_ptr(), node_values.len()) } } #[derive(Default)] struct CachedTypes { task: Option, box_header: Option, boxed: HashMap, shared_str: Option, boxed_inline_str: Option, boxed_external_str: Option, persistent_vector_leaf: Option, boxed_inline_vector: Option, boxed_external_vector: Option, global_interned_name: Option, record_struct_box: HashMap, classmap_field: Option, } /// Context for building against a given target machine /// /// During compilation there will typically be two instances of `TargetCtx`: one for the eval JIT /// and one for generating the program. /// /// This has a number of responsibilities: /// /// 1. Storing information about the target machine and its data layout /// 2. Wrapping the global `LLVMContextRef` /// 3. Caching complex types, attributes and metadata nodes /// 4. Optimising modules /// /// These are only vaguely related; this is a bit of a God Object. pub struct TargetCtx { pub llx: LLVMContextRef, target_machine: LLVMTargetMachineRef, target_data: LLVMTargetDataRef, optimising: bool, module_pass_manager: LLVMPassManagerRef, boxed_dereferenceable_attr: LLVMAttributeRef, boxed_align_attr: LLVMAttributeRef, readonly_attr: LLVMAttributeRef, noalias_attr: LLVMAttributeRef, nocapture_attr: LLVMAttributeRef, invariant_load_md_kind_id: u32, dereferenceable_md_kind_id: u32, align_md_kind_id: u32, empty_md_node: LLVMMetadataRef, boxed_dereferenceable_md_node: LLVMMetadataRef, boxed_align_md_node: LLVMMetadataRef, cached_types: CachedTypes, target_record_structs: HashMap, } impl TargetCtx { /// Construct a new `TargetCtx` /// /// `target_machine` remains owned by the caller and must outlive this instance. pub fn new(target_machine: LLVMTargetMachineRef, optimising: bool) -> TargetCtx { use llvm_sys::transforms::pass_manager_builder::*; use std::mem; unsafe { let llx = LLVMContextCreate(); let module_pass_manager = LLVMCreatePassManager(); let target_data = LLVMCreateTargetDataLayout(target_machine); if optimising { let fpmb = LLVMPassManagerBuilderCreate(); LLVMPassManagerBuilderSetOptLevel(fpmb, 2); LLVMPassManagerBuilderPopulateModulePassManager(fpmb, module_pass_manager); LLVMPassManagerBuilderDispose(fpmb); } TargetCtx { llx, target_machine, target_data, optimising, module_pass_manager, boxed_dereferenceable_attr: llvm_enum_attr_for_name( llx, "dereferenceable", mem::size_of::() as u64, ), boxed_align_attr: llvm_enum_attr_for_name( llx, "align", mem::align_of::() as u64, ), readonly_attr: llvm_enum_attr_for_name(llx, "readonly", 0), noalias_attr: llvm_enum_attr_for_name(llx, "noalias", 0), nocapture_attr: llvm_enum_attr_for_name(llx, "nocapture", 0), invariant_load_md_kind_id: llvm_md_kind_id_for_name(llx, "invariant.load"), dereferenceable_md_kind_id: llvm_md_kind_id_for_name(llx, "dereferenceable"), align_md_kind_id: llvm_md_kind_id_for_name(llx, "align"), empty_md_node: llvm_i64_md_node(llx, &[]), boxed_dereferenceable_md_node: llvm_i64_md_node( llx, &[mem::size_of::() as u64], ), boxed_align_md_node: llvm_i64_md_node(llx, &[mem::align_of::() as u64]), cached_types: Default::default(), target_record_structs: HashMap::new(), } } } pub fn optimising(&self) -> bool { self.optimising } pub fn target_machine(&self) -> LLVMTargetMachineRef { self.target_machine } pub fn target_data(&self) -> LLVMTargetDataRef { self.target_data } pub fn task_llvm_ptr_type(&mut self) -> LLVMTypeRef { let llvm_any_ptr = self.boxed_abi_to_llvm_ptr_type(&BoxedAbiType::Any); let llx = self.llx; *self.cached_types.task.get_or_insert_with(|| unsafe { let members = &mut [llvm_any_ptr, llvm_any_ptr]; let llvm_type = LLVMStructCreateNamed(llx, libcstr!("task")); LLVMStructSetBody(llvm_type, members.as_mut_ptr(), members.len() as u32, 0); LLVMPointerType(llvm_type, 0) }) } pub fn global_interned_name_llvm_type(&mut self) -> LLVMTypeRef { let llx = self.llx; *self .cached_types .global_interned_name .get_or_insert_with(|| unsafe { let llvm_i64 = LLVMInt64TypeInContext(llx); let llvm_i8 = LLVMInt8TypeInContext(llx); let members = &mut [llvm_i64, LLVMPointerType(llvm_i8, 0)]; let llvm_type = LLVMStructCreateNamed(llx, libcstr!("global_interned_name")); LLVMStructSetBody(llvm_type, members.as_mut_ptr(), members.len() as u32, 0); llvm_type }) } pub fn classmap_field_llvm_type(&mut self) -> LLVMTypeRef { let llx = self.llx; *self .cached_types .classmap_field .get_or_insert_with(|| unsafe { let llvm_i32 = LLVMInt32TypeInContext(llx); let llvm_i8 = LLVMInt8TypeInContext(llx); let members = &mut [llvm_i32, llvm_i8, llvm_i8]; let llvm_type = LLVMStructCreateNamed(llx, libcstr!("classmap_field")); LLVMStructSetBody(llvm_type, members.as_mut_ptr(), members.len() as u32, 0); llvm_type }) } pub fn classmap_class_llvm_type(&mut self) -> LLVMTypeRef { unsafe { LLVMPointerType(self.classmap_field_llvm_type(), 0) } } pub fn captures_llvm_type(&mut self) -> LLVMTypeRef { self.boxed_abi_to_llvm_ptr_type(&BoxedAbiType::Any) } pub fn record_class_id_llvm_type(&self) -> LLVMTypeRef { unsafe { LLVMInt32TypeInContext(self.llx) } } fn box_header_llvm_type(&mut self) -> LLVMTypeRef { let llx = self.llx; *self.cached_types.box_header.get_or_insert_with(|| unsafe { let llvm_i8 = LLVMInt8TypeInContext(llx); let members = &mut [llvm_i8, llvm_i8]; let llvm_type = LLVMStructCreateNamed(llx, libcstr!("box_header")); LLVMStructSetBody(llvm_type, members.as_mut_ptr(), members.len() as u32, 0); llvm_type }) } pub fn shared_str_llvm_type(&mut self) -> LLVMTypeRef { let llx = self.llx; *self.cached_types.shared_str.get_or_insert_with(|| unsafe { let llvm_i8 = LLVMInt8TypeInContext(llx); let llvm_i64 = LLVMInt64TypeInContext(llx); let members = &mut [ // ref_count llvm_i64, // len llvm_i64, // data LLVMArrayType(llvm_i8, 0), ]; let llvm_type = LLVMStructCreateNamed(llx, libcstr!("shared_str")); LLVMStructSetBody(llvm_type, members.as_mut_ptr(), members.len() as u32, 0); llvm_type }) } pub fn boxed_external_str_llvm_type(&mut self) -> LLVMTypeRef { let llx = self.llx; let llvm_header = self.box_header_llvm_type(); let shared_str_llvm_type = self.shared_str_llvm_type(); *self .cached_types .boxed_external_str .get_or_insert_with(|| unsafe { let llvm_i8 = LLVMInt8TypeInContext(llx); let members = &mut [ llvm_header, llvm_i8, LLVMPointerType(shared_str_llvm_type, 0), ]; let llvm_type = LLVMStructCreateNamed(llx, libcstr!("boxed_external_str")); LLVMStructSetBody(llvm_type, members.as_mut_ptr(), members.len() as u32, 0); llvm_type }) } pub fn boxed_inline_str_llvm_type(&mut self) -> LLVMTypeRef { let llx = self.llx; let llvm_header = self.box_header_llvm_type(); *self .cached_types .boxed_inline_str .get_or_insert_with(|| unsafe { let llvm_i8 = LLVMInt8TypeInContext(llx); let members = &mut [ llvm_header, llvm_i8, LLVMArrayType(llvm_i8, boxed::Str::MAX_INLINE_BYTES as u32), ]; let llvm_type = LLVMStructCreateNamed(llx, libcstr!("boxed_inline_str")); LLVMStructSetBody(llvm_type, members.as_mut_ptr(), members.len() as u32, 0); llvm_type }) } pub fn persistent_vector_leaf_llvm_type(&mut self) -> LLVMTypeRef { use arret_runtime::persistent::vector::NODE_SIZE; let llx = self.llx; let llvm_any_ptr = self.boxed_abi_to_llvm_ptr_type(&BoxedAbiType::Any); *self .cached_types .persistent_vector_leaf .get_or_insert_with(|| unsafe { let llvm_i64 = LLVMInt64TypeInContext(llx); let mut members = [llvm_i64, LLVMArrayType(llvm_any_ptr, NODE_SIZE as u32)]; let llvm_type = LLVMStructCreateNamed(llx, libcstr!("persistent_vector_leaf")); LLVMStructSetBody(llvm_type, members.as_mut_ptr(), members.len() as u32, 0); llvm_type }) } pub fn boxed_external_vector_llvm_type(&mut self) -> LLVMTypeRef { let llx = self.llx; let llvm_header = self.box_header_llvm_type(); let persistent_vector_leaf_type = self.persistent_vector_leaf_llvm_type(); *self .cached_types .boxed_external_vector .get_or_insert_with(|| unsafe { let llvm_i32 = LLVMInt32TypeInContext(llx); let llvm_i64 = LLVMInt64TypeInContext(llx); let persistent_vector_leaf_ptr = LLVMPointerType(persistent_vector_leaf_type, 0); let members = &mut [ llvm_header, llvm_i32, llvm_i64, persistent_vector_leaf_ptr, persistent_vector_leaf_ptr, ]; let llvm_type = LLVMStructCreateNamed(llx, libcstr!("boxed_external_vector")); LLVMStructSetBody(llvm_type, members.as_mut_ptr(), members.len() as u32, 0); llvm_type }) } pub fn boxed_inline_vector_llvm_type(&mut self) -> LLVMTypeRef { let llx = self.llx; let llvm_header = self.box_header_llvm_type(); let llvm_any_ptr = self.boxed_abi_to_llvm_ptr_type(&BoxedAbiType::Any); *self .cached_types .boxed_inline_vector .get_or_insert_with(|| unsafe { let llvm_i32 = LLVMInt32TypeInContext(llx); let members = &mut [ llvm_header, llvm_i32, llvm_any_ptr, llvm_any_ptr, llvm_any_ptr, ]; let llvm_type = LLVMStructCreateNamed(llx, libcstr!("boxed_inline_vector")); LLVMStructSetBody(llvm_type, members.as_mut_ptr(), members.len() as u32, 0); llvm_type }) } pub fn boxed_abi_to_llvm_struct_type(&mut self, boxed_abi_type: &BoxedAbiType) -> LLVMTypeRef { let box_layout: BoxLayout = boxed_abi_type.into(); if let Some(llvm_struct) = self.cached_types.boxed.get(&box_layout) { return *llvm_struct; } unsafe { let llvm_header = self.box_header_llvm_type(); let mut members = vec![llvm_header]; box_layout.append_members(self, &mut members); let llvm_type = LLVMStructCreateNamed(self.llx, box_layout.type_name().as_ptr() as *const _); LLVMStructSetBody(llvm_type, members.as_mut_ptr(), members.len() as u32, 0); self.cached_types.boxed.insert(box_layout, llvm_type); llvm_type } } fn callback_entry_point_llvm_type( &mut self, entry_point_abi_type: &CallbackEntryPointAbiType, ) -> LLVMTypeRef { let mut llvm_param_types = vec![ self.task_llvm_ptr_type(), self.boxed_abi_to_llvm_ptr_type(&BoxedAbiType::Any), ]; llvm_param_types.extend( entry_point_abi_type .params .iter() .map(|abi_type| self.abi_to_llvm_type(abi_type)), ); let llvm_ret_type = self.ret_abi_to_llvm_type(&entry_point_abi_type.ret); unsafe { LLVMPointerType( LLVMFunctionType( llvm_ret_type, llvm_param_types.as_mut_ptr(), llvm_param_types.len() as u32, 0, ), 0, ) } } pub fn callback_llvm_type(&mut self, entry_point_llvm_type: LLVMTypeRef) -> LLVMTypeRef { let mut members = [ self.boxed_abi_to_llvm_ptr_type(&BoxedAbiType::Any), entry_point_llvm_type, ]; unsafe { LLVMStructTypeInContext(self.llx, members.as_mut_ptr(), members.len() as u32, 0) } } pub fn boxed_abi_to_llvm_ptr_type(&mut self, boxed_abi_type: &BoxedAbiType) -> LLVMTypeRef { unsafe { LLVMPointerType(self.boxed_abi_to_llvm_struct_type(boxed_abi_type), 0) } } pub fn abi_to_llvm_type(&mut self, abi_type: &AbiType) -> LLVMTypeRef { unsafe { match abi_type { AbiType::Bool => LLVMInt1TypeInContext(self.llx), AbiType::Int => LLVMInt64TypeInContext(self.llx), AbiType::Char => LLVMInt32TypeInContext(self.llx), AbiType::Float => LLVMDoubleTypeInContext(self.llx), AbiType::InternedSym => LLVMInt64TypeInContext(self.llx), AbiType::Boxed(boxed) => self.boxed_abi_to_llvm_ptr_type(boxed), AbiType::Callback(entry_point_abi_type) => { let entry_point_llvm_type = self.callback_entry_point_llvm_type(entry_point_abi_type); self.callback_llvm_type(entry_point_llvm_type) } } } } fn ret_abi_to_llvm_type(&mut self, ret_abi_type: &RetAbiType) -> LLVMTypeRef { match ret_abi_type { RetAbiType::Inhabited(abi_type) => self.abi_to_llvm_type(abi_type), RetAbiType::Void | RetAbiType::Never => unsafe { LLVMVoidTypeInContext(self.llx) }, } } pub fn fun_abi_to_llvm_type(&mut self, fun_abi: &GenAbi) -> LLVMTypeRef { let mut llvm_param_types = vec![]; if fun_abi.takes_task { llvm_param_types.push(self.task_llvm_ptr_type()); } llvm_param_types.extend( fun_abi .params .iter() .map(|param_abi_type| self.abi_to_llvm_type(¶m_abi_type.abi_type)), ); let llvm_ret_type = self.ret_abi_to_llvm_type(&fun_abi.ret); unsafe { LLVMFunctionType( llvm_ret_type, llvm_param_types.as_mut_ptr(), llvm_param_types.len() as u32, 0, ) } } pub fn target_record_struct<'a>( &'a mut self, mir_record_struct: &ops::RecordStructId, ) -> &'a record_struct::TargetRecordStruct { if self.target_record_structs.contains_key(mir_record_struct) { return &self.target_record_structs[mir_record_struct]; } let target_record_struct = record_struct::TargetRecordStruct::from_mir_record_struct(self, mir_record_struct); self.target_record_structs .entry(mir_record_struct.clone()) .or_insert(target_record_struct) } pub fn record_struct_llvm_box_type( &mut self, record_struct: &ops::RecordStructId, ) -> LLVMTypeRef { if let Some(record_struct_box_type) = self.cached_types.record_struct_box.get(record_struct) { return *record_struct_box_type; } let record_struct::TargetRecordStruct { llvm_data_type, record_storage, .. } = *self.target_record_struct(record_struct); let llvm_header = self.box_header_llvm_type(); let mut members = vec![llvm_header]; record_struct::append_common_internal_members(self, &mut members); match record_storage { boxed::RecordStorage::Inline(_) => { members.push(llvm_data_type); } boxed::RecordStorage::External => unsafe { members.extend(&[ LLVMPointerType(llvm_data_type, 0), LLVMInt64TypeInContext(self.llx), ]); }, } let box_name = format!("boxed_{}\0", record_struct.source_name); let record_struct_box_type = unsafe { let record_struct_box_type = LLVMStructCreateNamed(self.llx, box_name.as_ptr() as *const _); LLVMStructSetBody( record_struct_box_type, members.as_mut_ptr(), members.len() as u32, 0, ); record_struct_box_type }; self.cached_types .record_struct_box .insert(record_struct.clone(), record_struct_box_type); record_struct_box_type } pub fn ptr_to_singleton_box( &mut self, module: LLVMModuleRef, type_tag: boxed::TypeTag, name: &[u8], ) -> LLVMValueRef { use std::mem; unsafe { let global = LLVMGetNamedGlobal(module, name.as_ptr() as *const _); if !global.is_null() { return global; } let llvm_type = self.boxed_abi_to_llvm_struct_type(&type_tag.into()); let global = LLVMAddGlobal(module, llvm_type, name.as_ptr() as *const _); let members = &mut [self.llvm_box_header(type_tag.to_const_header())]; let llvm_value = LLVMConstNamedStruct(llvm_type, members.as_mut_ptr(), members.len() as u32); LLVMSetInitializer(global, llvm_value); LLVMSetAlignment(global, mem::align_of::() as u32); LLVMSetGlobalConstant(global, 1); LLVMSetLinkage(global, LLVMLinkage::LLVMAvailableExternallyLinkage); global } } pub fn llvm_box_header(&mut self, header: boxed::Header) -> LLVMValueRef { unsafe { let llvm_i8 = LLVMInt8TypeInContext(self.llx); let llvm_type = self.box_header_llvm_type(); let members = &mut [ LLVMConstInt(llvm_i8, header.type_tag() as u64, 0), LLVMConstInt(llvm_i8, header.alloc_type() as u64, 0), ]; LLVMConstNamedStruct(llvm_type, members.as_mut_ptr(), members.len() as u32) } } pub fn llvm_enum_attr_for_name( &mut self, attr_name: &str, attr_value: u64, ) -> LLVMAttributeRef { llvm_enum_attr_for_name(self.llx, attr_name, attr_value) } pub fn llvm_md_kind_id_for_name(&mut self, md_name: &str) -> u32 { llvm_md_kind_id_for_name(self.llx, md_name) } pub fn llvm_boxed_align_attr(&self) -> LLVMAttributeRef { self.boxed_align_attr } pub fn llvm_noalias_attr(&self) -> LLVMAttributeRef { self.noalias_attr } pub fn add_invariant_load_metadata(&self, loaded_value: LLVMValueRef) { unsafe { LLVMSetMetadata( loaded_value, self.invariant_load_md_kind_id, LLVMMetadataAsValue(self.llx, self.empty_md_node), ); } } /// Adds range metadata to annotate our native `Char` type with valid Unicode codepoint ranges pub fn add_char_codepoint_range_metadata(&mut self, llvm_value: LLVMValueRef) { unsafe { // Valid Unicode codepoints are effectively 21bit values with a hole in the middle let llvm_char_type = LLVMInt32TypeInContext(self.llx); let mut llvm_range_values: Vec = [0x0000, 0xD800, 0xE000, 0x11_0000] .iter() .map(|value| LLVMConstInt(llvm_char_type, *value as u64, 0)) .map(|value| LLVMValueAsMetadata(value)) .collect(); let codepoint_range_md = LLVMMDNodeInContext2( self.llx, llvm_range_values.as_mut_ptr(), llvm_range_values.len(), ); let range_md_kind_id = self.llvm_md_kind_id_for_name("range"); LLVMSetMetadata( llvm_value, range_md_kind_id, LLVMMetadataAsValue(self.llx, codepoint_range_md), ); } } pub fn add_boxed_param_attrs( &mut self, function: LLVMValueRef, param_index: u32, no_capture: bool, ) { unsafe { for &common_attr in &[ self.boxed_dereferenceable_attr, self.boxed_align_attr, self.readonly_attr, self.noalias_attr, ] { // Parameters are offset by 1 LLVMAddAttributeAtIndex(function, param_index + 1, common_attr); } if no_capture { LLVMAddAttributeAtIndex(function, param_index + 1, self.nocapture_attr); } } } pub fn add_boxed_return_attrs(&mut self, function: LLVMValueRef) { unsafe { for &common_attr in &[ self.boxed_dereferenceable_attr, self.boxed_align_attr, self.noalias_attr, ] { LLVMAddAttributeAtIndex(function, LLVMAttributeReturnIndex, common_attr); } } } pub fn add_boxed_load_metadata(&mut self, loaded_value: LLVMValueRef) { unsafe { for &(kind_id, md_node) in &[ ( self.dereferenceable_md_kind_id, self.boxed_dereferenceable_md_node, ), (self.align_md_kind_id, self.boxed_align_md_node), ] { LLVMSetMetadata( loaded_value, kind_id, LLVMMetadataAsValue(self.llx, md_node), ); } } } pub fn finish_module(&mut self, module: LLVMModuleRef) { use llvm_sys::analysis::*; use std::{env, ptr}; unsafe { let mut error: *mut libc::c_char = ptr::null_mut(); // Dump if env::var_os("ARRET_DUMP_LLVM").is_some() { LLVMDumpModule(module); } // Verify LLVMVerifyModule( module, LLVMVerifierFailureAction::LLVMAbortProcessAction, &mut error as *mut _, ); LLVMDisposeMessage(error); // Optimise LLVMRunPassManager(self.module_pass_manager, module); } } } impl Drop for TargetCtx { fn drop(&mut self) { unsafe { LLVMDisposeTargetData(self.target_data); LLVMDisposePassManager(self.module_pass_manager); LLVMContextDispose(self.llx); } } } ================================================ FILE: compiler/codegen/target_machine.rs ================================================ use std::{ffi, ptr}; use llvm_sys::core::*; use llvm_sys::target_machine::*; enum TripleString { Cross(ffi::CString), LlvmDefault(*mut libc::c_char), } impl TripleString { fn as_ptr(&self) -> *const libc::c_char { match self { TripleString::Cross(cross_triple) => cross_triple.as_ptr() as *const _, TripleString::LlvmDefault(llvm_default) => *llvm_default, } } } impl Drop for TripleString { fn drop(&mut self) { if let TripleString::LlvmDefault(llvm_default) = self { unsafe { LLVMDisposeMessage(*llvm_default); } } } } pub fn create_target_machine( cross_triple: Option<&str>, reloc_mode: LLVMRelocMode, code_model: LLVMCodeModel, ) -> LLVMTargetMachineRef { let cross_triple = cross_triple.map(|cross_triple| ffi::CString::new(cross_triple).unwrap()); unsafe { let mut target: LLVMTargetRef = ptr::null_mut(); let triple_string = cross_triple .map(|cross_triple| TripleString::Cross(ffi::CString::new(cross_triple).unwrap())) .unwrap_or_else(|| TripleString::LlvmDefault(LLVMGetDefaultTargetTriple())); let mut error: *mut libc::c_char = ptr::null_mut(); if LLVMGetTargetFromTriple(triple_string.as_ptr(), &mut target, &mut error as *mut _) != 0 { panic!( "LLVMGetTargetFromTriple({:?}): {}", ffi::CStr::from_ptr(triple_string.as_ptr()) .to_str() .unwrap(), ffi::CStr::from_ptr(error).to_str().unwrap() ); } LLVMCreateTargetMachine( target, triple_string.as_ptr(), ptr::null(), ptr::null(), LLVMCodeGenOptLevel::LLVMCodeGenLevelDefault, reloc_mode, code_model, ) } } ================================================ FILE: compiler/codegen/vector_gen.rs ================================================ use llvm_sys::core::*; use llvm_sys::prelude::*; use llvm_sys::LLVMIntPredicate; use arret_runtime::boxed; use crate::codegen::fun_gen::FunCtx; use crate::codegen::target_gen::TargetCtx; use crate::libcstr; fn load_boxed_external_vector_len( tcx: &mut TargetCtx, fcx: &mut FunCtx, llvm_boxed_vector: LLVMValueRef, ) -> LLVMValueRef { unsafe { let boxed_external_vector_ptr_type = LLVMPointerType(tcx.boxed_external_vector_llvm_type(), 0); let llvm_boxed_external_vector = LLVMBuildBitCast( fcx.builder, llvm_boxed_vector, boxed_external_vector_ptr_type, libcstr!("boxed_external_vector"), ); let vector_external_len_ptr = LLVMBuildStructGEP( fcx.builder, llvm_boxed_external_vector, 2, libcstr!("vector_external_len_ptr"), ); let llvm_vector_external_len = LLVMBuildLoad( fcx.builder, vector_external_len_ptr, libcstr!("vector_external_len"), ); tcx.add_invariant_load_metadata(llvm_vector_external_len); llvm_vector_external_len } } pub(crate) fn load_boxed_vector_len( tcx: &mut TargetCtx, fcx: &mut FunCtx, llvm_boxed_vector: LLVMValueRef, ) -> LLVMValueRef { use arret_runtime::boxed::Vector; unsafe { let llvm_i32 = LLVMInt32TypeInContext(tcx.llx); let llvm_i64 = LLVMInt64TypeInContext(tcx.llx); let vector_inline_len_ptr = LLVMBuildStructGEP( fcx.builder, llvm_boxed_vector, 1, libcstr!("vector_inline_len_ptr"), ); let llvm_vector_inline_len = LLVMBuildLoad( fcx.builder, vector_inline_len_ptr, libcstr!("vector_inline_len"), ); tcx.add_invariant_load_metadata(llvm_vector_inline_len); let mut llvm_range_values = [ LLVMValueAsMetadata(LLVMConstInt(llvm_i32, 0, 0)), LLVMValueAsMetadata(LLVMConstInt( llvm_i32, (Vector::::EXTERNAL_INLINE_LEN + 1) as u64, 0, )), ]; let range_md_kind_id = tcx.llvm_md_kind_id_for_name("vector_inline_len_range"); let vector_inline_len_range_md = LLVMMDNodeInContext2( tcx.llx, llvm_range_values.as_mut_ptr(), llvm_range_values.len(), ); LLVMSetMetadata( llvm_vector_inline_len, range_md_kind_id, LLVMMetadataAsValue(tcx.llx, vector_inline_len_range_md), ); let llvm_vector_is_external = LLVMBuildICmp( fcx.builder, LLVMIntPredicate::LLVMIntEQ, llvm_vector_inline_len, LLVMConstInt( llvm_i32, Vector::::EXTERNAL_INLINE_LEN as u64, 0, ), libcstr!("vector_is_external"), ); let mut external_block = LLVMAppendBasicBlockInContext(tcx.llx, fcx.function, libcstr!("external_vector")); let mut inline_block = LLVMAppendBasicBlockInContext(tcx.llx, fcx.function, libcstr!("inline_vector")); let cont_block = LLVMAppendBasicBlockInContext(tcx.llx, fcx.function, libcstr!("vector_len_cont")); LLVMBuildCondBr( fcx.builder, llvm_vector_is_external, external_block, inline_block, ); let mut llvm_external_vector_len = { LLVMPositionBuilderAtEnd(fcx.builder, external_block); let llvm_value = load_boxed_external_vector_len(tcx, fcx, llvm_boxed_vector); LLVMBuildBr(fcx.builder, cont_block); llvm_value }; let mut llvm_vector_inline_len_ext = { LLVMPositionBuilderAtEnd(fcx.builder, inline_block); let llvm_value = LLVMBuildZExt( fcx.builder, llvm_vector_inline_len, llvm_i64, libcstr!("vector_inline_len_ext"), ); LLVMBuildBr(fcx.builder, cont_block); llvm_value }; LLVMPositionBuilderAtEnd(fcx.builder, cont_block); let phi_value = LLVMBuildPhi(fcx.builder, llvm_i64, libcstr!("vector_len")); LLVMAddIncoming( phi_value, &mut llvm_external_vector_len as *mut _, &mut external_block as *mut _, 1, ); LLVMAddIncoming( phi_value, &mut llvm_vector_inline_len_ext as *mut _, &mut inline_block as *mut _, 1, ); phi_value } } fn load_boxed_inline_vector_member( tcx: &mut TargetCtx, fcx: &mut FunCtx, llvm_boxed_vector: LLVMValueRef, member_index: usize, ) -> LLVMValueRef { unsafe { let boxed_inline_vector_ptr_type = LLVMPointerType(tcx.boxed_inline_vector_llvm_type(), 0); let llvm_boxed_inline_vector = LLVMBuildBitCast( fcx.builder, llvm_boxed_vector, boxed_inline_vector_ptr_type, libcstr!("boxed_inline_vector"), ); let value_ptr = LLVMBuildStructGEP( fcx.builder, llvm_boxed_inline_vector, // Skip the header and inline len (2 + member_index) as u32, libcstr!("vector_member_ptr"), ); let llvm_value = LLVMBuildLoad(fcx.builder, value_ptr, libcstr!("vector_member")); tcx.add_invariant_load_metadata(llvm_value); tcx.add_boxed_load_metadata(llvm_value); llvm_value } } fn load_boxed_external_vector_member( tcx: &mut TargetCtx, fcx: &mut FunCtx, llvm_boxed_vector: LLVMValueRef, known_vector_len: usize, member_index: usize, ) -> LLVMValueRef { use arret_runtime::persistent::vector::NODE_SIZE; const TREE_PTR_INDEX: u32 = 3; const TAIL_PTR_INDEX: u32 = 4; let (node_gep_index, element_array_index) = if known_vector_len <= NODE_SIZE { (TAIL_PTR_INDEX, member_index as u64) } else if known_vector_len <= (NODE_SIZE * 2) { if member_index < NODE_SIZE { (TREE_PTR_INDEX, member_index as u64) } else { (TAIL_PTR_INDEX, (member_index - NODE_SIZE) as u64) } } else { todo!("loading member of vector of length {}", known_vector_len); }; unsafe { let llvm_i32 = LLVMInt32TypeInContext(tcx.llx); let boxed_external_vector_ptr_type = LLVMPointerType(tcx.boxed_external_vector_llvm_type(), 0); let llvm_boxed_external_vector = LLVMBuildBitCast( fcx.builder, llvm_boxed_vector, boxed_external_vector_ptr_type, libcstr!("boxed_external_vector"), ); let vector_node_ptr_ptr = LLVMBuildStructGEP( fcx.builder, llvm_boxed_external_vector, node_gep_index, libcstr!("vector_node_ptr_ptr"), ); let vector_node_ptr = LLVMBuildLoad( fcx.builder, vector_node_ptr_ptr, libcstr!("vector_node_ptr"), ); tcx.add_invariant_load_metadata(vector_node_ptr); let element_ptr_gep_indices = &mut [ LLVMConstInt(llvm_i32, 0, 0), // Skip the refcount LLVMConstInt(llvm_i32, 1, 0), LLVMConstInt(llvm_i32, element_array_index, 0), ]; let vector_node_element_ptr = LLVMBuildInBoundsGEP( fcx.builder, vector_node_ptr, element_ptr_gep_indices.as_mut_ptr(), element_ptr_gep_indices.len() as u32, libcstr!("vector_node_element_ptr"), ); let llvm_value = LLVMBuildLoad( fcx.builder, vector_node_element_ptr, libcstr!("vector_member"), ); tcx.add_invariant_load_metadata(llvm_value); tcx.add_boxed_load_metadata(llvm_value); llvm_value } } pub(crate) fn load_boxed_vector_member( tcx: &mut TargetCtx, fcx: &mut FunCtx, llvm_boxed_vector: LLVMValueRef, known_vector_len: usize, member_index: usize, ) -> LLVMValueRef { if known_vector_len <= boxed::Vector::::MAX_INLINE_LEN { load_boxed_inline_vector_member(tcx, fcx, llvm_boxed_vector, member_index) } else { load_boxed_external_vector_member( tcx, fcx, llvm_boxed_vector, known_vector_len, member_index, ) } } ================================================ FILE: compiler/context.rs ================================================ use crate::hir::PackagePaths; use crate::rfi; use crate::source::SourceLoader; use std::collections::{HashMap, HashSet}; use std::sync::Arc; use std::{hash, path}; use codespan_reporting::diagnostic::Diagnostic; use arret_syntax::datum::Datum; use arret_syntax::span::{FileId, Span}; use crate::hir; use crate::hir::exports::Exports; use crate::hir::import; use crate::hir::loader::{LoadedModule, ModuleName}; use crate::hir::lowering::LoweredModule; use crate::promise::PromiseMap; use crate::reporting::diagnostic_for_syntax_error; use crate::reporting::errors_to_diagnostics; use crate::source::SourceFile; use crate::ty; use crate::typeck::infer; new_global_id_type!( ModuleId, u32, std::sync::atomic::AtomicU32, std::num::NonZeroU32 ); pub(crate) type ModuleImports = HashMap>; pub struct LinkedLibrary { _loaded: libloading::Library, target_path: Box, } impl LinkedLibrary { pub fn target_path(&self) -> &path::Path { &self.target_path } } /// Module being compiled until type inference /// /// This represents both Arret and RFI libraries pub(crate) struct Module { pub module_id: ModuleId, pub imports: ModuleImports, pub defs: Vec>, pub inferred_locals: Arc>>, pub exports: Exports, pub main_local_id: Option, pub linked_library: Option>, } impl PartialEq for Module { fn eq(&self, other: &Self) -> bool { self.module_id == other.module_id } } impl Eq for Module {} impl hash::Hash for Module { fn hash(&self, state: &mut H) { state.write_u32(self.module_id.get()); } } type CachedModule = Result, Vec>>; type UncachedModule = Result>>; /// Finds all transitive dependencies for a set of imports /// /// This is inclusive of the imports themselves. fn transitive_deps(imports: &ModuleImports) -> HashSet> { let mut all_deps: HashSet> = imports.values().cloned().collect(); for import in imports.values() { all_deps.extend(transitive_deps(&import.imports).into_iter()); } all_deps } pub(crate) fn prims_to_module(exports: Exports) -> Module { Module { module_id: ModuleId::alloc(), imports: HashMap::new(), defs: vec![], inferred_locals: Arc::new(HashMap::new()), exports, main_local_id: None, linked_library: None, } } fn rfi_library_to_module(span: Span, rfi_library: rfi::Library) -> Module { use crate::hir::var_id::LocalIdAlloc; use crate::ty::Ty; use arret_syntax::datum::DataStr; let rfi::Library { loaded, target_path, exported_funs, } = rfi_library; let mut lia = LocalIdAlloc::new(); let mut exports = HashMap::with_capacity(exported_funs.len()); let mut defs = Vec::with_capacity(exported_funs.len()); let mut inferred_locals = HashMap::with_capacity(exported_funs.len()); for (fun_name, rust_fun) in exported_funs.into_vec().into_iter() { let local_id = lia.alloc_mut(); let arret_type: ty::Ref = Ty::Fun(Box::new(rust_fun.arret_fun_type().clone())).into(); let fun_name_data_str: DataStr = fun_name.into(); let def = hir::Def:: { span, macro_invocation_span: None, destruc: hir::destruc::Destruc::Scalar( span, hir::destruc::Scalar::new( Some(local_id), fun_name_data_str.clone(), arret_type.clone(), ), ), value_expr: hir::Expr { result_ty: arret_type.clone(), kind: hir::ExprKind::RustFun(rust_fun), }, }; defs.push(def); inferred_locals.insert(local_id, arret_type); exports.insert(fun_name_data_str, hir::scope::Binding::Var(None, local_id)); } Module { module_id: ModuleId::alloc(), imports: HashMap::new(), defs, inferred_locals: Arc::new(inferred_locals), exports, main_local_id: None, linked_library: Some(Arc::new(LinkedLibrary { _loaded: loaded, target_path, })), } } /// Shared context for compilation /// /// This isn't specific to a given program or REPL session. It acts as a global cache of compiled /// source files and Rust libraries; it should be reused whenever possible. pub struct CompileCtx { package_paths: PackagePaths, enable_optimisations: bool, source_loader: SourceLoader, rfi_loader: rfi::Loader, modules_by_name: PromiseMap, } impl CompileCtx { pub fn new(package_paths: PackagePaths, enable_optimisations: bool) -> Self { use crate::hir::exports; use std::iter; // These modules are always loaded let initial_modules = iter::once(("primitives", exports::prims_exports())) .chain(iter::once(("types", exports::tys_exports()))) .map(|(terminal_name, exports)| { let prims_module = prims_to_module(exports); ( ModuleName::new( "arret".into(), vec!["internal".into()], (*terminal_name).into(), ), Ok(Arc::new(prims_module)), ) }); Self { package_paths, enable_optimisations, source_loader: SourceLoader::new(), rfi_loader: rfi::Loader::new(), modules_by_name: PromiseMap::new(initial_modules), } } pub fn package_paths(&self) -> &PackagePaths { &self.package_paths } pub fn enable_optimisations(&self) -> bool { self.enable_optimisations } pub fn source_loader(&self) -> &SourceLoader { &self.source_loader } pub(crate) fn rfi_loader(&self) -> &rfi::Loader { &self.rfi_loader } /// Returns a module for the given module name /// /// This returns a cached module; the module will only be compiled once per `CompileCtx` /// instance. If the module is being compiled on another thread this will block until the /// compilation is finished. fn get_module_by_name(&self, span: Span, module_name: ModuleName) -> CachedModule { self.modules_by_name .get_or_insert_with( module_name.clone(), move || match hir::loader::load_module_by_name(self, span, &module_name) { Ok(LoadedModule::Source(source_file)) => { self.source_file_to_module(&source_file).map(Arc::new) } Ok(LoadedModule::Rust(rfi_library)) => { Ok(Arc::new(rfi_library_to_module(span, rfi_library))) } Err(err) => Err(vec![err.into()]), }, ) } /// Returns an uncached module for a source file pub(crate) fn source_file_to_module(&self, source_file: &SourceFile) -> UncachedModule { let data = source_file .parsed() .map_err(|err| vec![diagnostic_for_syntax_error(&err)])?; self.data_to_module(data) } /// Collects all imports for a module's syntax data /// /// This attempts to import modules concurrently where possible pub(crate) fn imports_for_data<'a>( &self, data: impl Iterator, ) -> Result>> { let imported_module_names = import::collect_imported_module_names(data).map_err(errors_to_diagnostics)?; let import_count = imported_module_names.len(); let loaded_module_results: Vec<(ModuleName, CachedModule)> = imported_module_names .into_iter() .map(|(module_name, span)| { let module = self.get_module_by_name(span, module_name.clone()); (module_name, module) }) .collect(); let mut diagnostics = Vec::>::new(); let mut imports = HashMap::>::with_capacity(import_count); for (module_name, loaded_module_result) in loaded_module_results { match loaded_module_result { Ok(module) => { imports.insert(module_name, module); } Err(mut new_diagnostics) => diagnostics.append(&mut new_diagnostics), } } if !diagnostics.is_empty() { return Err(diagnostics); } Ok(imports) } /// Returns an uncached module for syntax data fn data_to_module(&self, data: &[Datum]) -> UncachedModule { let imports = self.imports_for_data(data.iter())?; let lowered_module = hir::lowering::lower_data(&imports, data).map_err(errors_to_diagnostics)?; let LoweredModule { defs: lowered_defs, exports, main_local_id, } = lowered_module; let imported_inferred_vars = transitive_deps(&imports) .into_iter() .map(|module| (module.module_id, module.inferred_locals.clone())) .collect(); let inferred_module = infer::infer_module(&imported_inferred_vars, lowered_defs) .map_err(errors_to_diagnostics)?; let infer::InferredModule { defs: inferred_defs, inferred_locals, } = inferred_module; Ok(Module { module_id: ModuleId::alloc(), imports, defs: inferred_defs, inferred_locals: Arc::new(inferred_locals), exports, main_local_id, linked_library: None, }) } } ================================================ FILE: compiler/hir/destruc.rs ================================================ use arret_syntax::datum::DataStr; use arret_syntax::span::Span; use crate::hir; use crate::ty; use crate::ty::Ty; #[derive(Debug, PartialEq, Clone)] pub enum Destruc { Scalar(Span, Scalar

), List(Span, List

), } #[derive(Debug, PartialEq, Clone)] pub struct List { fixed: Vec>, rest: Option>>, } impl List

{ pub fn new(fixed: Vec>, rest: Option>>) -> List

{ List { fixed, rest } } pub fn fixed(&self) -> &Vec> { &self.fixed } pub fn rest(&self) -> &Option>> { &self.rest } } #[derive(Debug, PartialEq, Clone)] pub struct Scalar { /// ID of the local. If this is None it's treated as a wildcard. local_id: Option, source_name: DataStr, ty: P::DeclType, } impl Scalar

{ pub fn new(local_id: Option, source_name: DataStr, ty: P::DeclType) -> Scalar

{ Scalar { local_id, source_name, ty, } } pub fn local_id(&self) -> &Option { &self.local_id } pub fn source_name(&self) -> &DataStr { &self.source_name } pub fn ty(&self) -> &P::DeclType { &self.ty } } pub fn subst_list_destruc( free_ty_polys: &mut impl Iterator>, list: List, ) -> List { let fixed = list .fixed .into_iter() .map(|fixed_destruc| subst_destruc(free_ty_polys, fixed_destruc)) .collect(); let rest = list .rest .map(|rest_destruc| Box::new(subst_scalar_destruc(free_ty_polys, *rest_destruc))); List::new(fixed, rest) } pub fn subst_scalar_destruc( free_ty_polys: &mut impl Iterator>, scalar: Scalar, ) -> Scalar { let Scalar { local_id, ty, source_name, } = scalar; let poly_type = match ty { hir::DeclTy::Known(poly) => poly, hir::DeclTy::Free => free_ty_polys.next().unwrap(), }; Scalar::new(local_id, source_name, poly_type) } /// Substitutes free types with their inferred types /// /// `free_ty_polys` must be ordered in the same way the types appear in the destruc type in /// depth-first order pub fn subst_destruc( free_ty_polys: &mut impl Iterator>, destruc: Destruc, ) -> Destruc { match destruc { Destruc::Scalar(span, scalar) => { Destruc::Scalar(span, subst_scalar_destruc(free_ty_polys, scalar)) } Destruc::List(span, list) => Destruc::List(span, subst_list_destruc(free_ty_polys, list)), } } pub fn poly_for_list_destruc(list: &List) -> ty::List { let fixed_polys = list.fixed().iter().map(poly_for_destruc).collect(); let rest_poly = match list.rest() { Some(rest) => rest.ty().clone(), None => Ty::never().into(), }; ty::List::new(fixed_polys, rest_poly) } pub fn poly_for_destruc(destruc: &Destruc) -> ty::Ref { match destruc { Destruc::Scalar(_, scalar) => scalar.ty().clone(), Destruc::List(_, list) => poly_for_list_destruc(list).into(), } } ================================================ FILE: compiler/hir/error.rs ================================================ use std::{error, fmt, io, iter, path, result}; use codespan_reporting::diagnostic::Diagnostic; use arret_syntax::datum::DataStr; use arret_syntax::error::Error as SyntaxError; use arret_syntax::span::{FileId, Span}; use crate::hir::types::{str_for_purity, str_for_ty_ref}; use crate::reporting::{ diagnostic_for_syntax_error, new_primary_label, new_secondary_label, LocTrace, }; use crate::ty; use crate::ty::purity; #[derive(Debug, PartialEq, Clone)] pub struct ExpectedSym { pub found: &'static str, pub usage: &'static str, } #[derive(Debug, PartialEq, Clone)] pub struct PolyArgIsNotTy { pub arg_type: ty::Ref, pub param_bound: ty::Ref, pub param_span: Span, } #[derive(Debug, PartialEq, Clone)] pub struct PolyArgIsNotPure { pub arg_purity: purity::Ref, pub param_span: Span, } #[derive(Debug, PartialEq, Clone)] pub struct ExpectedPolyPurityArg { pub found: &'static str, pub param_span: Span, } #[derive(Debug, PartialEq, Clone)] pub enum ErrorKind { ExpectedValue(&'static str), ExpectedTy(&'static str), ExpectedTyCons(&'static str), ExpectedParamList(&'static str), ExpectedPolyVarsDecl(&'static str), ExpectedMacroSpecList(&'static str), ExpectedMacroRuleVec(&'static str), ExpectedMacroRulePatternList(&'static str), ExpectedMacroEllipsisEscape(&'static str), ExpectedCompileErrorString(&'static str), ExpectedImportFilterKeyword(&'static str), ExpectedImportRenameMap(&'static str), ExpectedRecordTyConsDecl(&'static str), ExpectedRecordValueConsDecl(&'static str), ExpectedRecordFieldDecl(&'static str), ExpectedSym(Box), UnboundIdent(DataStr), WrongArgCount(usize), WrongCondArgCount, WrongDefLikeArgCount(&'static str), WrongDefRecordArgCount, DefOutsideBody, ExportOutsideModule, NonDefInsideModule, ExportInsideRepl, PackageNotFound, ModuleNotFound(Box), NoMacroRule(Box<[Span]>), DuplicateDef(Option, DataStr), MultipleZeroOrMoreMatch(Span), NoVecDestruc, UserError(DataStr), ReadError(Box), SyntaxError(SyntaxError), RustFunError(Box), BadListDestruc, BadRestDestruc, NoBindingVec, BindingsNotVec(&'static str), UnevenBindingVec, BadPolyVarDecl, UnsupportedLiteralType, VarPurityBound, NoParamDecl, NoPolyVarsDecl, UnsupportedImportFilter, MacroMultiPatternRef(Box<[Span]>), MacroNoPatternRef, MacroNoTemplateVars, MacroBadEllipsis, MacroBadSetPattern, WrongMacroRuleVecCount(usize), NoMacroType, BadMacroType, BadImportSet, NonFunPolyTy, ShortModuleName, AnonymousPolymorphicParam, PolyArgIsNotTy(Box), PolyArgIsNotPure(Box), ExpectedPolyPurityArg(Box), UnusedPolyPurityParam(purity::PVarId), UnusedPolyTyParam(ty::TVarId), } #[derive(Debug, PartialEq, Clone)] pub struct Error { loc_trace: LocTrace, kind: ErrorKind, } pub type Result = result::Result; impl Error { pub fn new(span: Span, kind: ErrorKind) -> Error { Error { loc_trace: span.into(), kind, } } pub fn kind(&self) -> &ErrorKind { &self.kind } pub fn from_module_io(span: Span, path: &path::Path, error: &io::Error) -> Error { match error.kind() { io::ErrorKind::NotFound => Error::new(span, ErrorKind::ModuleNotFound(path.into())), _ => Error::new(span, ErrorKind::ReadError(path.into())), } } pub fn with_macro_invocation_span(self, span: Span) -> Error { Error { loc_trace: self.loc_trace.with_macro_invocation(span), ..self } } } impl From for Diagnostic { fn from(error: Error) -> Diagnostic { let Error { loc_trace, kind } = error; let origin = loc_trace.origin(); let diagnostic = match kind { ErrorKind::ExpectedValue(found) => Diagnostic::error() .with_message(format!("cannot take the value of a {}", found)) .with_labels(vec![new_primary_label(origin, "expected value")]), ErrorKind::ExpectedTy(found) => Diagnostic::error() .with_message(format!("{} cannot be used as a type", found)) .with_labels(vec![new_primary_label(origin, "expected type")]), ErrorKind::ExpectedTyCons(found) => Diagnostic::error() .with_message(format!("{} cannot be used as a type constructor", found)) .with_labels(vec![new_primary_label(origin, "expected type constructor")]), ErrorKind::ExpectedSym(details) => { let ExpectedSym { found, usage } = *details; Diagnostic::error() .with_message(format!("expected symbol, found {}", found)) .with_labels(vec![new_primary_label( origin, format!("expected {}", usage), )]) } ErrorKind::ExpectedParamList(found) => Diagnostic::error() .with_message(format!( "expected parameter declaration list, found {}", found )) .with_labels(vec![new_primary_label(origin, "expected parameter list")]), ErrorKind::ExpectedPolyVarsDecl(found) => Diagnostic::error() .with_message(format!( "expected polymorphic variable set, found {}", found )) .with_labels(vec![new_primary_label( origin, "expected polymorphic variable set", )]), ErrorKind::ExpectedMacroSpecList(found) => Diagnostic::error() .with_message(format!( "expected macro specification list, found {}", found )) .with_labels(vec![new_primary_label( origin, "expected `(macro-rules ...)`", )]), ErrorKind::ExpectedMacroRuleVec(found) => Diagnostic::error() .with_message(format!("expected macro rule vector, found {}", found)) .with_labels(vec![new_primary_label( origin, "expected `[pattern template]`", )]), ErrorKind::ExpectedMacroRulePatternList(found) => Diagnostic::error() .with_message(format!("expected macro rule pattern list, found {}", found)) .with_labels(vec![new_primary_label( origin, "expected macro rule pattern list", )]), ErrorKind::ExpectedMacroEllipsisEscape(found) => Diagnostic::error() .with_message(format!("expected macro symbol to escape, found {}", found)) .with_labels(vec![new_primary_label(origin, "expected symbol")]), ErrorKind::ExpectedCompileErrorString(found) => Diagnostic::error() .with_message(format!("expected error message string, found {}", found)) .with_labels(vec![new_primary_label(origin, "expected string")]), ErrorKind::ExpectedImportFilterKeyword(found) => Diagnostic::error() .with_message(format!("expected import filter keyword, found {}", found)) .with_labels(vec![new_primary_label( origin, "expected `:only`, `:exclude`, `:rename`, `:prefix` or `:prefixed`", )]), ErrorKind::ExpectedImportRenameMap(found) => Diagnostic::error() .with_message(format!("expected identifier rename map, found {}", found)) .with_labels(vec![new_primary_label( origin, "expected identifier rename map", )]), ErrorKind::ExpectedRecordTyConsDecl(found) => Diagnostic::error() .with_message(format!( "expected record type constuctor declaration, found {}", found )) .with_labels(vec![new_primary_label( origin, "expected symbol or polymorphic constructor list", )]), ErrorKind::ExpectedRecordValueConsDecl(found) => Diagnostic::error() .with_message(format!( "expected record value constructor declaration, found {}", found )) .with_labels(vec![new_primary_label( origin, "expected record field list", )]), ErrorKind::ExpectedRecordFieldDecl(found) => Diagnostic::error() .with_message(format!( "expected record field declaration, found {}", found )) .with_labels(vec![new_primary_label( origin, "expected record field declaration", )]), ErrorKind::UnboundIdent(ref ident) => { let diagnostic = Diagnostic::error() .with_message(format!("unable to resolve `{}`", ident)) .with_labels(vec![new_primary_label(origin, "not found in this scope")]); if ident.as_ref() == "nil" { diagnostic.with_notes(vec![ "Arret does not have a distinct `nil` value; consider using `()` instead" .to_owned(), ]) } else { diagnostic } } ErrorKind::WrongArgCount(expected) => { let label_message = if expected == 1 { "expected 1 argument".to_owned() } else { format!("expected {} arguments", expected) }; Diagnostic::error() .with_message(format!("wrong argument count; expected {}", expected)) .with_labels(vec![new_primary_label(origin, label_message)]) } ErrorKind::WrongCondArgCount => Diagnostic::error() .with_message("wrong argument count; expected 3") .with_labels(vec![new_primary_label( origin, "expected `(if test-expr true-expr false-expr)`", )]), ErrorKind::WrongDefLikeArgCount(name) => Diagnostic::error() .with_message("wrong argument count; expected 2") .with_labels(vec![new_primary_label( origin, format!("expected `({} name definition)`", name), )]), ErrorKind::WrongDefRecordArgCount => Diagnostic::error() .with_message("wrong argument count; expected 2") .with_labels(vec![new_primary_label( origin, "expected `(defrecord ty-cons-decl value-cons-decl)`", )]), ErrorKind::WrongMacroRuleVecCount(found) => Diagnostic::error() .with_message(format!( "expected macro rule vector with 2 elements, found {}", found )) .with_labels(vec![new_primary_label( origin, "expected `[pattern template]`", )]), ErrorKind::DefOutsideBody => Diagnostic::error() .with_message("definition outside module body") .with_labels(vec![new_primary_label( origin, "not at top-level of module", )]), ErrorKind::DuplicateDef(first_def_span, ref ident) => { let diagnostic = Diagnostic::error() .with_message(format!("duplicate definition of `{}`", ident)); let primary_label = new_primary_label(origin, "second definition here"); if let Some(first_def_span) = first_def_span { let secondary_label = new_secondary_label(first_def_span, "first definition here"); diagnostic.with_labels(vec![primary_label, secondary_label]) } else { diagnostic.with_labels(vec![primary_label]) } } ErrorKind::ExportOutsideModule => Diagnostic::error() .with_message("(export) outside of module body") .with_labels(vec![new_primary_label( origin, "not at top-level of module", )]), ErrorKind::NonDefInsideModule => Diagnostic::error() .with_message("value at top-level of module body") .with_labels(vec![new_primary_label( origin, "(import), (export) or definition expected", )]), ErrorKind::ExportInsideRepl => Diagnostic::error() .with_message("export not supported within REPL") .with_labels(vec![new_primary_label(origin, "export not supported")]), ErrorKind::PackageNotFound => Diagnostic::error() .with_message("package not found") .with_labels(vec![new_primary_label(origin, "at this import")]), ErrorKind::ModuleNotFound(ref filename) => Diagnostic::error() .with_message(format!( "module not found at `{}`", filename.to_string_lossy() )) .with_labels(vec![new_primary_label(origin, "at this import")]), ErrorKind::NoMacroRule(pattern_spans) => Diagnostic::error() .with_message("no matching macro rule") .with_labels( iter::once(new_primary_label(origin, "at this macro invocation")) .chain(pattern_spans.iter().map(|pattern_span| { new_secondary_label(*pattern_span, "unmatched macro rule") })) .collect(), ), ErrorKind::MultipleZeroOrMoreMatch(first_zero_or_more_span) => Diagnostic::error() .with_message("multiple zero or more matches in the same sequence") .with_labels(vec![ new_primary_label(origin, "second zero or more match"), new_secondary_label(first_zero_or_more_span, "first zero or more match"), ]), ErrorKind::NoVecDestruc => Diagnostic::error() .with_message("vectors can only be used in a destructure in the form `[name Type]`") .with_labels(vec![new_primary_label(origin, "unexpected vector")]), ErrorKind::UserError(ref message) => Diagnostic::error() .with_message(message.as_ref()) .with_labels(vec![new_primary_label(origin, "user error raised here")]), ErrorKind::ReadError(ref filename) => Diagnostic::error() .with_message(format!("error reading `{}`", filename.to_string_lossy())) .with_labels(vec![new_primary_label(origin, "at this import")]), ErrorKind::SyntaxError(ref err) => { // Just proxy this return diagnostic_for_syntax_error(err); } ErrorKind::RustFunError(ref message) => Diagnostic::error() .with_message("error loading RFI module") .with_labels(vec![new_primary_label( origin, message.clone().into_string(), )]), ErrorKind::BadListDestruc => Diagnostic::error() .with_message("unsupported destructuring binding") .with_labels(vec![new_primary_label( origin, "expected variable name, list or `[name Type]`", )]), ErrorKind::BadRestDestruc => Diagnostic::error() .with_message("unsupported rest destructuring") .with_labels(vec![new_primary_label( origin, "expected variable name or `[name Type]`", )]), ErrorKind::NoBindingVec => Diagnostic::error() .with_message("binding vector expected") .with_labels(vec![new_primary_label(origin, "expected vector argument")]), ErrorKind::BindingsNotVec(found) => Diagnostic::error() .with_message(format!("binding vector expected, found {}", found)) .with_labels(vec![new_primary_label(origin, "vector expected")]), ErrorKind::UnevenBindingVec => Diagnostic::error() .with_message("binding vector must have an even number of forms") .with_labels(vec![new_primary_label(origin, "extra binding form")]), ErrorKind::BadPolyVarDecl => Diagnostic::error() .with_message("bad polymorphic variable declaration") .with_labels(vec![new_primary_label( origin, "expected polymorphic variable name or `[name Bound]`", )]), ErrorKind::UnsupportedLiteralType => Diagnostic::error() .with_message("unsupported literal type") .with_labels(vec![new_primary_label( origin, "expected boolean, symbol, keyword, list or vector", )]), ErrorKind::VarPurityBound => Diagnostic::error() .with_message("purity variables cannot be bound by other variables") .with_labels(vec![new_primary_label(origin, "expected `->` or `->!`")]), ErrorKind::NoParamDecl => Diagnostic::error() .with_message("parameter declaration missing") .with_labels(vec![new_primary_label( origin, "expected parameter list argument", )]), ErrorKind::NoPolyVarsDecl => Diagnostic::error() .with_message("polymorphic variable declaration missing") .with_labels(vec![new_primary_label( origin, "expected polymorphic variable set argument", )]), ErrorKind::UnsupportedImportFilter => Diagnostic::error() .with_message("unsupported import filter") .with_labels(vec![new_primary_label( origin, "expected `:only`, `:exclude`, `:rename`, `:prefix` or `:prefixed`", )]), ErrorKind::MacroMultiPatternRef(sub_var_spans) => Diagnostic::error() .with_message("subtemplate references macro variables from multiple subpatterns") .with_labels( iter::once(new_primary_label( origin, "subtemplate references multiple subpatterns", )) .chain(sub_var_spans.iter().map(|sub_var_span| { new_secondary_label(*sub_var_span, "referenced macro variable") })) .collect(), ), ErrorKind::MacroNoTemplateVars => Diagnostic::error() .with_message("subtemplate does not include any macro variables") .with_labels(vec![new_primary_label( origin, "subtemplate includes no variables", )]), ErrorKind::MacroNoPatternRef => Diagnostic::error() .with_message("subtemplate does not reference macro variables from any subpattern") .with_labels(vec![new_primary_label( origin, "subtemplate does not reference subpatterns", )]), ErrorKind::MacroBadEllipsis => Diagnostic::error() .with_message("unexpected ellipsis in macro rule") .with_labels(vec![new_primary_label(origin, "expected `var ...`")]), ErrorKind::MacroBadSetPattern => Diagnostic::error() .with_message("set patterns must either be empty or a zero or more match") .with_labels(vec![new_primary_label( origin, "expected `#{}` or `#{var ...}`", )]), ErrorKind::NoMacroType => Diagnostic::error() .with_message("missing macro type") .with_labels(vec![new_primary_label( origin, "expected `(macro-rules ...)`", )]), ErrorKind::BadMacroType => Diagnostic::error() .with_message("unsupported macro type") .with_labels(vec![new_primary_label(origin, "expected `macro-rules`")]), ErrorKind::BadImportSet => Diagnostic::error() .with_message("bad import set") .with_labels(vec![new_primary_label( origin, "expected module name vector or applied filter", )]), ErrorKind::NonFunPolyTy => Diagnostic::error() .with_message("polymorphism on non-function type") .with_labels(vec![new_primary_label(origin, "expected function type")]), ErrorKind::ShortModuleName => Diagnostic::error() .with_message("module name requires a least two components") .with_labels(vec![new_primary_label( origin, "expected vector of 2 or more symbols", )]), ErrorKind::AnonymousPolymorphicParam => Diagnostic::error() .with_message("polymorphic parameters must have a name") .with_labels(vec![new_primary_label( origin, "expected polymorphic parameter name", )]), ErrorKind::PolyArgIsNotTy(boxed_details) => { let PolyArgIsNotTy { arg_type, param_bound, param_span, } = *boxed_details; Diagnostic::error() .with_message("mismatched types") .with_labels(vec![ new_primary_label( origin, format!( "`{}` does not satisfy the lower bound of `{}`", str_for_ty_ref(&arg_type), str_for_ty_ref(¶m_bound) ), ), new_secondary_label(param_span, "type parameter declared here"), ]) } ErrorKind::PolyArgIsNotPure(boxed_details) => { let PolyArgIsNotPure { arg_purity, param_span, } = *boxed_details; Diagnostic::error() .with_message("mismatched purities") .with_labels(vec![ new_primary_label( origin, format!("`{}` is not pure", str_for_purity(&arg_purity)), ), new_secondary_label(param_span, "purity parameter declared here"), ]) } ErrorKind::ExpectedPolyPurityArg(boxed_details) => { let ExpectedPolyPurityArg { found, param_span } = *boxed_details; Diagnostic::error() .with_message(format!("{} cannot be used as a purity", found)) .with_labels(vec![ new_primary_label(origin, "expected purity"), new_secondary_label(param_span, "purity parameter declared here"), ]) } ErrorKind::UnusedPolyPurityParam(pvar) => Diagnostic::error() .with_message(format!( "unused polymorphic purity parameter `{}`", pvar.source_name() )) .with_labels(vec![new_primary_label( pvar.span(), "purity parameter declared here", )]), ErrorKind::UnusedPolyTyParam(tvar) => Diagnostic::error() .with_message(format!( "unused polymorphic type parameter `{}`", tvar.source_name() )) .with_labels(vec![new_primary_label( tvar.span(), "type parameter declared here", )]), }; loc_trace.label_macro_invocation(diagnostic) } } impl error::Error for Error {} impl fmt::Display for Error { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let diagnostic: Diagnostic = self.clone().into(); f.write_str(&diagnostic.message) } } impl From for Error { fn from(err: SyntaxError) -> Error { Error::new(err.span(), ErrorKind::SyntaxError(err)) } } impl From for Vec { fn from(error: Error) -> Vec { vec![error] } } ================================================ FILE: compiler/hir/exports.rs ================================================ use std::collections::HashMap; use arret_syntax::datum::DataStr; use crate::hir::prim::PRIM_EXPORTS; use crate::hir::scope::Binding; use crate::hir::types::TY_EXPORTS; pub type Exports = HashMap; pub fn prims_exports() -> Exports { PRIM_EXPORTS .iter() .map(|(name, binding)| ((*name).into(), binding.clone())) .collect() } pub fn tys_exports() -> Exports { TY_EXPORTS .iter() .map(|(name, binding)| ((*name).into(), binding.clone())) .collect() } ================================================ FILE: compiler/hir/import/filter.rs ================================================ use std::result; use crate::hir::error::{Error, ErrorKind}; use crate::hir::exports::Exports; use crate::hir::import::parse::{ParsedFilter, ParsedImportSet}; type Result = result::Result>; fn apply_filter(filter: ParsedFilter, exports: &Exports) -> Result { match filter { ParsedFilter::Only(only_spanned_names) => Ok(only_spanned_names .into_vec() .into_iter() .map(|(span, name)| { if let Some(binding) = exports.get(&name) { Ok((name, binding.clone())) } else { Err(Error::new(span, ErrorKind::UnboundIdent(name))) } }) .collect::>()?), ParsedFilter::Exclude(exclude_spanned_names) => { let mut exports = exports.clone(); let mut errors = vec![]; for (span, name) in exclude_spanned_names.into_vec().into_iter() { if exports.remove(&name).is_none() { errors.push(Error::new(span, ErrorKind::UnboundIdent(name))); } } if errors.is_empty() { Ok(exports) } else { Err(errors) } } ParsedFilter::Rename(rename_spanned_names) => { let mut exports = exports.clone(); let mut errors = vec![]; for ((from_span, from_name), to_name) in rename_spanned_names.into_vec().into_iter() { match exports.remove(&from_name) { Some(binding) => { exports.insert(to_name, binding); } None => { errors.push(Error::new(from_span, ErrorKind::UnboundIdent(from_name))); } } } if errors.is_empty() { Ok(exports) } else { Err(errors) } } ParsedFilter::Prefix(prefix_name) => Ok(exports .iter() .map(|(name, binding)| (format!("{}{}", prefix_name, name).into(), binding.clone())) .collect()), } } /// Applies the parsed import to the passed exports /// /// If there are no filters to apply then `exports` will be directly returned. pub fn filter_imported_exports( parsed_import_set: ParsedImportSet, exports: &Exports, ) -> Result { match parsed_import_set { ParsedImportSet::Module(_, _) => Ok(exports.clone()), ParsedImportSet::Filter(filter, inner_parsed_import) => { let inner_exports = filter_imported_exports(*inner_parsed_import, exports)?; Ok(apply_filter(filter, &inner_exports)?) } } } ================================================ FILE: compiler/hir/import/mod.rs ================================================ mod filter; mod parse; use std::collections::HashMap; use arret_syntax::datum::Datum; use arret_syntax::span::Span; use crate::hir::error::Error; use crate::hir::loader::ModuleName; pub use filter::filter_imported_exports; pub use parse::{parse_import_set, ParsedFilter, ParsedImportSet}; pub fn try_extract_import_set(datum: &Datum) -> Option<&[Datum]> { if let Datum::List(_, vs) = datum { match vs.as_ref() { [Datum::Sym(_, name), import_set @ ..] if name.as_ref() == "import" => Some(import_set), _ => None, } } else { None } } /// Returns all unique imported module names for the passed module data /// /// The value of the `HashMap` will be the first span where that module name occurs. This is /// intended to provide a stable location for error reporting. pub fn collect_imported_module_names<'a>( data: impl Iterator, ) -> Result, Vec> { let mut imported_module_names = HashMap::new(); let mut errors = vec![]; for datum in data { if let Some(arg_data) = try_extract_import_set(datum) { for arg_datum in arg_data { match parse_import_set(arg_datum) { Ok(parsed_import) => { let (span, module_name) = parsed_import.into_spanned_module_name(); imported_module_names.entry(module_name).or_insert(span); } Err(error) => { errors.push(error); } } } } } if !errors.is_empty() { return Err(errors); } Ok(imported_module_names) } #[cfg(test)] mod test { use super::*; use std::collections::HashMap; use std::result; use arret_syntax::span::t2s; use crate::hir::error::{Error, ErrorKind}; use crate::hir::exports::Exports; use crate::hir::loader::ModuleName; use crate::hir::prim::Prim; use crate::hir::scope::Binding; type Result = result::Result>; fn exports_for_import_set(datum: &str) -> Result { use arret_syntax::parser::datum_from_str; let parsed_import = parse::parse_import_set(&datum_from_str(None, datum).unwrap())?; let (span, module_name) = parsed_import.spanned_module_name(); if module_name == &ModuleName::new("lib".into(), vec![], "test".into()) { let mut exports = HashMap::new(); exports.insert("quote".into(), Binding::Prim(Prim::Quote)); exports.insert("if".into(), Binding::Prim(Prim::If)); Ok(filter::filter_imported_exports(parsed_import, &exports)?) } else { Err(vec![Error::new(span, ErrorKind::PackageNotFound)]) } } fn assert_exports_prim(exports: &Exports, name: &'static str, expected_prim: Prim) { match exports.get(name) { Some(Binding::Prim(actual_prim)) => { assert_eq!(actual_prim, &expected_prim); } Some(other) => { panic!("Non-prim binding {:?} for {}", other, name); } None => { panic!("Missing binding for {}", name); } } } #[test] fn basic_import() { let j = "[lib test]"; let exports = exports_for_import_set(j).unwrap(); assert_exports_prim(&exports, "quote", Prim::Quote); assert_exports_prim(&exports, "if", Prim::If); } #[test] fn package_not_found() { let j = "[not found]"; let t = "^^^^^^^^^^^"; let err = vec![Error::new(t2s(t), ErrorKind::PackageNotFound)]; assert_eq!(err, exports_for_import_set(j).unwrap_err()); } #[test] fn only_filter() { let j = "(:only [lib test] quote)"; let exports = exports_for_import_set(j).unwrap(); assert_exports_prim(&exports, "quote", Prim::Quote); assert!(!exports.contains_key("if")); let j = "(:only [lib test] quote ifz)"; let t = " ^^^ "; let err = vec![Error::new(t2s(t), ErrorKind::UnboundIdent("ifz".into()))]; assert_eq!(err, exports_for_import_set(j).unwrap_err()); } #[test] fn exclude_filter() { let j = "(:exclude [lib test] if)"; let exports = exports_for_import_set(j).unwrap(); assert_exports_prim(&exports, "quote", Prim::Quote); assert!(!exports.contains_key("if")); let j = "(:exclude [lib test] ifz)"; let t = " ^^^ "; let err = vec![Error::new(t2s(t), ErrorKind::UnboundIdent("ifz".into()))]; assert_eq!(err, exports_for_import_set(j).unwrap_err()); } #[test] fn rename_filter() { let j = "(:rename [lib test] {quote new-quote, if new-if})"; let exports = exports_for_import_set(j).unwrap(); assert_exports_prim(&exports, "new-quote", Prim::Quote); assert_exports_prim(&exports, "new-if", Prim::If); let j = "(:rename [lib test] {ifz new-ifz})"; let t = " ^^^ "; let err = vec![Error::new(t2s(t), ErrorKind::UnboundIdent("ifz".into()))]; assert_eq!(err, exports_for_import_set(j).unwrap_err()); } #[test] fn prefix_filter() { let j = "(:prefix [lib test] new-)"; let exports = exports_for_import_set(j).unwrap(); assert_exports_prim(&exports, "new-quote", Prim::Quote); assert_exports_prim(&exports, "new-if", Prim::If); } #[test] fn prefixed_filter() { let j = "(:prefixed [lib test])"; let exports = exports_for_import_set(j).unwrap(); assert_exports_prim(&exports, "test/quote", Prim::Quote); assert_exports_prim(&exports, "test/if", Prim::If); } } ================================================ FILE: compiler/hir/import/parse.rs ================================================ use std::result; use arret_syntax::datum::{DataStr, Datum}; use arret_syntax::span::Span; use crate::hir::error::{Error, ErrorKind}; use crate::hir::loader::ModuleName; use crate::hir::util::{expect_ident, expect_spanned_ident}; type Result = result::Result; pub enum ParsedImportSet { Module(Span, ModuleName), Filter(ParsedFilter, Box), } impl ParsedImportSet { pub fn into_spanned_module_name(self) -> (Span, ModuleName) { match self { ParsedImportSet::Module(span, module_name) => (span, module_name), ParsedImportSet::Filter(_, inner_import) => inner_import.into_spanned_module_name(), } } pub fn spanned_module_name(&self) -> (Span, &ModuleName) { match self { ParsedImportSet::Module(span, module_name) => (*span, module_name), ParsedImportSet::Filter(_, inner_import) => inner_import.spanned_module_name(), } } pub fn module_name(&self) -> &ModuleName { self.spanned_module_name().1 } } pub enum ParsedFilter { Only(Box<[(Span, DataStr)]>), Exclude(Box<[(Span, DataStr)]>), Rename(Box<[((Span, DataStr), DataStr)]>), Prefix(DataStr), } fn parse_module_name(span: Span, name: &[Datum]) -> Result { if name.len() < 2 { return Err(Error::new(span, ErrorKind::ShortModuleName)); } let mut name_idents = name .iter() .map(|datum| Ok(expect_ident(datum, "module name component")?.clone())); let package_name = name_idents.next().unwrap()?; let terminal_name = name_idents.next_back().unwrap()?; let name_components = name_idents.collect::, Error>>()?; Ok(ModuleName::new( package_name, name_components, terminal_name, )) } fn parse_filter( apply_span: Span, filter_datum: &Datum, filter_input: &ParsedImportSet, arg_data: &[Datum], ) -> Result { let (filter_span, filter_name) = match filter_datum { Datum::Sym(span, name) if name.starts_with(':') => (span, name), _ => { return Err(Error::new( filter_datum.span(), ErrorKind::ExpectedImportFilterKeyword(filter_datum.description()), )); } }; match filter_name.as_ref() { ":only" => { let only_spanned_names = arg_data .iter() .map(|arg_datum| { expect_spanned_ident(arg_datum, "identifier to include") .map(|(span, ident)| (span, ident.clone())) }) .collect::>>()?; Ok(ParsedFilter::Only(only_spanned_names)) } ":exclude" => { let exclude_spanned_names = arg_data .iter() .map(|arg_datum| { expect_spanned_ident(arg_datum, "identifier to exclude") .map(|(span, ident)| (span, ident.clone())) }) .collect::>>()?; Ok(ParsedFilter::Exclude(exclude_spanned_names)) } ":rename" => match arg_data { [Datum::Map(_, vs)] => { let rename_spanned_names = vs .iter() .map(|(from_datum, to_datum)| { let (from_span, from_ident) = expect_spanned_ident(from_datum, "identifier to rename from")?; let to_ident = expect_ident(to_datum, "identifier to rename to")?; Ok(((from_span, from_ident.clone()), to_ident.clone())) }) .collect::>>()?; Ok(ParsedFilter::Rename(rename_spanned_names)) } [other] => Err(Error::new( other.span(), ErrorKind::ExpectedImportRenameMap(other.description()), )), _ => Err(Error::new(apply_span, ErrorKind::WrongArgCount(2))), }, ":prefix" => match arg_data { [prefix_datum] => { let prefix_ident = expect_ident(prefix_datum, "identifier prefix")?; Ok(ParsedFilter::Prefix(prefix_ident.clone())) } _ => Err(Error::new(apply_span, ErrorKind::WrongArgCount(2))), }, ":prefixed" => { if !arg_data.is_empty() { return Err(Error::new(apply_span, ErrorKind::WrongArgCount(1))); } Ok(ParsedFilter::Prefix( format!("{}/", filter_input.module_name().terminal_name()).into(), )) } _ => Err(Error::new(*filter_span, ErrorKind::UnsupportedImportFilter)), } } /// Parses the passed import set datum /// /// This produces an AST without performing the import itself. pub fn parse_import_set(import_set_datum: &Datum) -> Result { let span = import_set_datum.span(); match import_set_datum { Datum::Vector(_, vs) => { let module_name = parse_module_name(span, vs.as_ref())?; Ok(ParsedImportSet::Module(span, module_name)) } Datum::List(_, vs) if vs.len() >= 2 => { let filter_datum = &vs[0]; let inner_import_datum = &vs[1]; let filter_input = parse_import_set(inner_import_datum)?; let filter = parse_filter(span, filter_datum, &filter_input, &vs[2..])?; Ok(ParsedImportSet::Filter(filter, Box::new(filter_input))) } _ => Err(Error::new(span, ErrorKind::BadImportSet)), } } ================================================ FILE: compiler/hir/loader.rs ================================================ use std::collections::HashMap; use std::path; use arret_syntax::datum::DataStr; use arret_syntax::span::Span; use crate::hir::error::{Error, ErrorKind, Result}; use crate::rfi; use crate::source::SourceFile; use crate::CompileCtx; pub struct PackagePath { arret_base: Box, native_rust_base: Box, target_rust_base: Box, } pub struct PackagePaths { paths: HashMap, PackagePath>, } impl PackagePaths { pub fn empty() -> PackagePaths { PackagePaths { paths: HashMap::new(), } } /// Creates an instance including the `stdlib` package pub fn with_stdlib(arret_root_dir: &path::Path, target_triple: Option<&str>) -> PackagePaths { let mut pp = PackagePaths::empty(); let native_rust_base = arret_root_dir.join("target"); let target_rust_base = if let Some(target_triple) = target_triple { native_rust_base.join(target_triple) } else { native_rust_base.clone() }; let stdlib_path = PackagePath { arret_base: arret_root_dir.join("stdlib/arret").into(), native_rust_base: native_rust_base.into(), target_rust_base: target_rust_base.into(), }; pp.add_package("stdlib", stdlib_path); pp } /// Creates an instance for use in our internal unit and integration tests pub fn test_paths(target_triple: Option<&str>) -> PackagePaths { let parent_path = path::Path::new(".."); Self::with_stdlib(parent_path, target_triple) } pub fn add_package(&mut self, package_name: &str, path: PackagePath) { self.paths.insert(package_name.into(), path); } } #[derive(PartialEq, Eq, Hash, Clone)] pub struct ModuleName { package_name: DataStr, path: Vec, terminal_name: DataStr, } #[derive(Debug)] pub enum LoadedModule { Source(SourceFile), Rust(rfi::Library), } impl ModuleName { pub fn new(package_name: DataStr, path: Vec, terminal_name: DataStr) -> ModuleName { ModuleName { package_name, path, terminal_name, } } pub fn is_rfi(&self) -> bool { self.path.is_empty() && self.terminal_name.as_ref() == "rust" } pub fn terminal_name(&self) -> &DataStr { &self.terminal_name } } pub fn load_module_by_name( ccx: &CompileCtx, span: Span, module_name: &ModuleName, ) -> Result { let package_path = if let Some(package_path) = ccx .package_paths() .paths .get(module_name.package_name.as_ref()) { package_path } else { return Err(Error::new(span, ErrorKind::PackageNotFound)); }; if module_name.is_rfi() { ccx.rfi_loader() .load( span, ccx.source_loader(), &package_path.native_rust_base, &package_path.target_rust_base, &module_name.package_name, ) .map(LoadedModule::Rust) } else { // Look for files starting in the package path let mut path_buf = path::PathBuf::new(); path_buf.push(&package_path.arret_base); for path_component in &module_name.path { path_buf.push(path_component.as_ref()); } path_buf.push(format!("{}.arret", module_name.terminal_name)); let path = path_buf.as_path(); let source_file = ccx .source_loader() .load_path(path) .map_err(|err| Error::from_module_io(span, path, &err))?; Ok(LoadedModule::Source(source_file)) } } #[cfg(test)] mod test { use super::*; use crate::source::EMPTY_SPAN; fn load_stdlib_module(name: &'static str) -> Result { let ccx = CompileCtx::new(PackagePaths::test_paths(None), true); let module_name = ModuleName::new("stdlib".into(), vec![], name.into()); load_module_by_name(&ccx, EMPTY_SPAN, &module_name) } #[test] fn load_stdlib_base() { let loaded_module = load_stdlib_module("base").unwrap(); if let LoadedModule::Source(data) = loaded_module { assert!(!data.parsed().unwrap().is_empty()); } else { panic!("Did not get source module; got {:?}", loaded_module); } } #[test] fn load_stdlib_rust() { // Ensure we can locate and load the module. The RFI itself is tested separately. let loaded_module = load_stdlib_module("rust").expect( "unable to load stdlib library; you may need to `cargo build` before running tests", ); if let LoadedModule::Rust(rfi_module) = loaded_module { assert!(!rfi_module.exported_funs.is_empty()); } else { panic!("Did not get Rust module; got {:?}", loaded_module); } } #[test] fn load_stdlib_missing() { let err = load_stdlib_module("notamodule").unwrap_err(); if let ErrorKind::ModuleNotFound(_) = err.kind() { } else { panic!("Unexpected error kind: {:?}", err.kind()) } } } ================================================ FILE: compiler/hir/lowering.rs ================================================ use std::collections::HashMap; use codespan_reporting::diagnostic::Diagnostic; use arret_syntax::datum::Datum; use arret_syntax::span::{FileId, Span}; use crate::ty; use crate::ty::purity; use crate::CompileCtx; use crate::context::ModuleImports; use crate::hir::destruc; use crate::hir::error::{Error, ErrorKind, ExpectedSym, Result}; use crate::hir::exports::Exports; use crate::hir::import; use crate::hir::macros::{expand_macro, lower_macro_rules}; use crate::hir::ns::{Ident, NsDataIter, NsDatum}; use crate::hir::prim::Prim; use crate::hir::records::lower_record; use crate::hir::scope::{Binding, Scope}; use crate::hir::types::{lower_poly, lower_polymorphic_var_set, try_lower_purity}; use crate::hir::util::{expect_one_arg, expect_spanned_ns_ident, try_take_rest_arg}; use crate::hir::var_id::{ExportId, LocalIdAlloc}; use crate::hir::Lowered; use crate::hir::{ App, Cond, DeclPurity, DeclTy, Def, Expr, ExprKind, FieldAccessor, Fun, Let, LocalId, Recur, }; #[cfg(test)] use crate::source::EMPTY_SPAN; /// Module lowered to HIR pub struct LoweredModule { /// Defs in the order the were lowered pub defs: Vec>, pub exports: Exports, pub main_local_id: Option, } struct DeferredDef { span: Span, macro_invocation_span: Option, destruc: destruc::Destruc, value_datum: NsDatum, } struct DeferredExport { span: Span, ident: Ident, } enum DeferredModulePrim { Def(DeferredDef), Exports(Vec), } impl DeferredModulePrim { fn with_macro_invocation_span(self, span: Span) -> DeferredModulePrim { match self { DeferredModulePrim::Def(deferred_def) => DeferredModulePrim::Def(DeferredDef { macro_invocation_span: Some(span), ..deferred_def }), other => other, } } } pub(crate) enum LoweredReplDatum { /// One or more modules were imported Import(ModuleImports), /// An evaluable definition EvaluableDef(Def), /// A non-evalable definition handled by HIR lowering NonEvaluableDef, /// An expression Expr(Expr), } // This would be less ugly as Result once it's stabilised fn lower_user_compile_error(span: Span, arg_iter: NsDataIter) -> Error { match expect_one_arg(span, arg_iter) { Ok(NsDatum::Str(_, user_message)) => Error::new(span, ErrorKind::UserError(user_message)), Ok(other) => Error::new( other.span(), ErrorKind::ExpectedCompileErrorString(other.description()), ), Err(error) => error, } } fn lower_macro( scope: &mut Scope<'_>, self_datum: NsDatum, transformer_spec: NsDatum, ) -> Result<()> { let (self_span, self_ident) = expect_spanned_ns_ident(self_datum, "new macro name")?; let macro_rules_data = if let NsDatum::List(span, vs) = transformer_spec { let mut transformer_data = vs.into_vec(); let macro_type_datum = if let Some(macro_type_datum) = transformer_data.first() { macro_type_datum } else { return Err(Error::new(span, ErrorKind::NoMacroType)); }; if let Some(Binding::Prim(Prim::MacroRules)) = scope.get_datum(macro_type_datum) { } else { return Err(Error::new(macro_type_datum.span(), ErrorKind::BadMacroType)); } transformer_data.remove(0); transformer_data } else { return Err(Error::new( transformer_spec.span(), ErrorKind::ExpectedMacroSpecList(transformer_spec.description()), )); }; let mac = lower_macro_rules(scope, &self_ident, macro_rules_data)?; scope.insert_binding(self_span, self_ident, Binding::Macro(None, mac))?; Ok(()) } fn lower_defmacro(scope: &mut Scope<'_>, span: Span, mut arg_iter: NsDataIter) -> Result<()> { if arg_iter.len() != 2 { return Err(Error::new( span, ErrorKind::WrongDefLikeArgCount("defmacro"), )); } let self_datum = arg_iter.next().unwrap(); let transformer_spec = arg_iter.next().unwrap(); lower_macro(scope, self_datum, transformer_spec) } fn lower_letmacro( lia: &LocalIdAlloc, scope: &Scope<'_>, span: Span, arg_iter: NsDataIter, ) -> Result> { lower_let_like(lia, scope, span, arg_iter, lower_macro, |expr, _| expr) } fn lower_type(scope: &mut Scope<'_>, self_datum: NsDatum, ty_datum: NsDatum) -> Result<()> { let (span, ident) = expect_spanned_ns_ident(self_datum, "new type name")?; let ty = lower_poly(scope, ty_datum)?; scope.insert_binding(span, ident, Binding::Ty(ty))?; Ok(()) } fn lower_deftype(scope: &mut Scope<'_>, span: Span, mut arg_iter: NsDataIter) -> Result<()> { if arg_iter.len() != 2 { return Err(Error::new(span, ErrorKind::WrongDefLikeArgCount("deftype"))); } let self_datum = arg_iter.next().unwrap(); let ty_datum = arg_iter.next().unwrap(); lower_type(scope, self_datum, ty_datum) } fn lower_lettype( lia: &LocalIdAlloc, scope: &Scope<'_>, span: Span, arg_iter: NsDataIter, ) -> Result> { lower_let_like(lia, scope, span, arg_iter, lower_type, |expr, _| expr) } fn lower_defrecord(scope: &mut Scope<'_>, span: Span, mut arg_iter: NsDataIter) -> Result<()> { if arg_iter.len() != 2 { return Err(Error::new(span, ErrorKind::WrongDefRecordArgCount)); } let ty_cons_datum = arg_iter.next().unwrap(); let value_cons_datum = arg_iter.next().unwrap(); lower_record(scope, ty_cons_datum, value_cons_datum) } fn lower_letrecord( lia: &LocalIdAlloc, scope: &Scope<'_>, span: Span, arg_iter: NsDataIter, ) -> Result> { lower_let_like(lia, scope, span, arg_iter, lower_record, |expr, _| expr) } /// Lowers an identifier in to a scalar destruc with the passed type fn lower_ident_destruc( lia: &LocalIdAlloc, scope: &mut Scope<'_>, span: Span, ident: Ident, decl_ty: DeclTy, ) -> Result> { if ident.is_underscore() { Ok(destruc::Scalar::new(None, ident.into_name(), decl_ty)) } else { let local_id = lia.alloc(); let source_name = ident.name().clone(); scope.insert_local(span, ident, local_id)?; Ok(destruc::Scalar::new(Some(local_id), source_name, decl_ty)) } } fn lower_scalar_destruc( lia: &LocalIdAlloc, scope: &mut Scope<'_>, destruc_datum: NsDatum, ) -> Result> { match destruc_datum { NsDatum::Ident(span, ident) => lower_ident_destruc(lia, scope, span, ident, DeclTy::Free), NsDatum::Vector(span, vs) => { let mut data = vs.into_vec(); if data.len() != 2 { return Err(Error::new(span, ErrorKind::NoVecDestruc)); } let ty = lower_poly(scope, data.pop().unwrap())?; let (span, ident) = expect_spanned_ns_ident(data.pop().unwrap(), "new variable name")?; lower_ident_destruc(lia, scope, span, ident, ty.into()) } _ => Err(Error::new(destruc_datum.span(), ErrorKind::BadRestDestruc)), } } fn lower_list_destruc( lia: &LocalIdAlloc, scope: &mut Scope<'_>, mut data_iter: NsDataIter, ) -> Result> { let rest = try_take_rest_arg(&mut data_iter); let fixed_destrucs = data_iter .map(|v| lower_destruc(lia, scope, v)) .collect::>>>()?; let rest_destruc = match rest { Some(rest) => Some(Box::new(lower_scalar_destruc(lia, scope, rest)?)), None => None, }; Ok(destruc::List::new(fixed_destrucs, rest_destruc)) } fn lower_destruc( lia: &LocalIdAlloc, scope: &mut Scope<'_>, destruc_datum: NsDatum, ) -> Result> { match destruc_datum { NsDatum::Ident(span, _) | NsDatum::Vector(span, _) => { lower_scalar_destruc(lia, scope, destruc_datum) .map(|scalar| destruc::Destruc::Scalar(span, scalar)) } NsDatum::List(span, vs) => lower_list_destruc(lia, scope, vs.into_vec().into_iter()) .map(|list_destruc| destruc::Destruc::List(span, list_destruc)), NsDatum::Keyword(span, _) => Err(Error::new( span, ErrorKind::ExpectedSym( ExpectedSym { found: "keyword", usage: "new variable name", } .into(), ), )), _ => Err(Error::new(destruc_datum.span(), ErrorKind::BadListDestruc)), } } fn lower_let_like( lia: &LocalIdAlloc, outer_scope: &Scope<'_>, span: Span, mut arg_iter: NsDataIter, binder: B, fold_output: C, ) -> Result> where B: Fn(&mut Scope<'_>, NsDatum, NsDatum) -> Result, C: Fn(Expr, O) -> Expr, { let bindings_datum = arg_iter .next() .ok_or_else(|| Error::new(span, ErrorKind::NoBindingVec))?; let bindings_data = if let NsDatum::Vector(_, vs) = bindings_datum { vs.into_vec() } else { return Err(Error::new( bindings_datum.span(), ErrorKind::BindingsNotVec(bindings_datum.description()), )); }; let mut scope = outer_scope.child(); let mut outputs = Vec::::with_capacity(bindings_data.len() / 2); let mut bindings_iter = bindings_data.into_iter(); while let Some(target_datum) = bindings_iter.next() { let value_datum = bindings_iter .next() .ok_or_else(|| Error::new(target_datum.span(), ErrorKind::UnevenBindingVec))?; outputs.push(binder(&mut scope, target_datum, value_datum)?); } let body_expr = lower_body(lia, &scope, arg_iter)?; // This is to build nested `Let` expressions. Types/macros don't need this Ok(outputs.into_iter().rfold(body_expr, fold_output)) } fn lower_body( lia: &LocalIdAlloc, scope: &Scope<'_>, body_data: NsDataIter, ) -> Result> { let mut flattened_exprs = vec![]; for body_datum in body_data { match lower_expr(lia, scope, body_datum)? { Expr { kind: ExprKind::Do(mut exprs), .. } => { flattened_exprs.append(&mut exprs); } other => { flattened_exprs.push(other); } } } if flattened_exprs.len() == 1 { Ok(flattened_exprs.pop().unwrap()) } else { Ok(ExprKind::Do(flattened_exprs).into()) } } fn lower_let( lia: &LocalIdAlloc, scope: &Scope<'_>, span: Span, arg_iter: NsDataIter, ) -> Result> { lower_let_like( lia, scope, span, arg_iter, |scope, target_datum, value_datum| { let value_expr = lower_expr(lia, scope, value_datum)?; let destruc = lower_destruc(lia, scope, target_datum)?; Ok((destruc, value_expr)) }, |body_expr, (destruc, value_expr)| { ExprKind::Let(Box::new(Let { span, destruc, value_expr, body_expr, })) .into() }, ) } fn lower_fun( lia: &LocalIdAlloc, outer_scope: &Scope<'_>, span: Span, mut arg_iter: NsDataIter, ) -> Result> { let mut fun_scope = outer_scope.child(); let mut next_datum = arg_iter .next() .ok_or_else(|| Error::new(span, ErrorKind::NoParamDecl))?; // We can either begin with a set of type variables or a list of parameters let (pvars, tvars) = if let NsDatum::Set(_, vs) = next_datum { next_datum = arg_iter .next() .ok_or_else(|| Error::new(span, ErrorKind::NoParamDecl))?; lower_polymorphic_var_set(outer_scope, &mut fun_scope, vs.into_vec().into_iter())? } else { (purity::PVars::new(), ty::TVars::new()) }; // Pull out our params let params = match next_datum { NsDatum::List(_, vs) => lower_list_destruc(lia, &mut fun_scope, vs.into_vec().into_iter())?, other => { return Err(Error::new( other.span(), ErrorKind::ExpectedParamList(other.description()), )); } }; // Determine if we have a purity and return type after the parameters, eg (param) -> RetTy let mut purity = DeclPurity::Free; let mut ret_ty = DeclTy::Free; let mut ret_ty_span = None; if arg_iter.len() >= 2 { if let Some(poly_purity) = try_lower_purity(&fun_scope, &arg_iter.as_slice()[0]) { arg_iter.next(); purity = poly_purity.into(); match arg_iter.next().unwrap() { NsDatum::Ident(_, ref ident) if ident.is_underscore() => {} ret_datum => { ret_ty_span = Some(ret_datum.span()); ret_ty = lower_poly(&fun_scope, ret_datum)?.into(); } } } } // Extract the body let body_expr = lower_body(lia, &fun_scope, arg_iter)?; Ok(ExprKind::Fun(Box::new(Fun { span, pvars, tvars, purity, params, ret_ty, ret_ty_span, body_expr, })) .into()) } fn lower_expr_prim_apply( lia: &LocalIdAlloc, scope: &Scope<'_>, span: Span, prim: Prim, mut arg_iter: NsDataIter, ) -> Result> { match prim { Prim::Def | Prim::DefMacro | Prim::DefType | Prim::ImportPlaceholder | Prim::DefRecord => { Err(Error::new(span, ErrorKind::DefOutsideBody)) } Prim::Let => lower_let(lia, scope, span, arg_iter), Prim::LetMacro => lower_letmacro(lia, scope, span, arg_iter), Prim::LetType => lower_lettype(lia, scope, span, arg_iter), Prim::LetRecord => lower_letrecord(lia, scope, span, arg_iter), Prim::Export => Err(Error::new(span, ErrorKind::ExportOutsideModule)), Prim::Quote => { let literal_datum = expect_one_arg(span, arg_iter)?; Ok(literal_datum.into_syntax_datum().into()) } Prim::Fun => lower_fun(lia, scope, span, arg_iter), Prim::If => { if arg_iter.len() != 3 { return Err(Error::new(span, ErrorKind::WrongCondArgCount)); } Ok(ExprKind::Cond(Box::new(Cond { span, test_expr: lower_expr(lia, scope, arg_iter.next().unwrap())?, true_expr: lower_expr(lia, scope, arg_iter.next().unwrap())?, false_expr: lower_expr(lia, scope, arg_iter.next().unwrap())?, })) .into()) } Prim::Do => lower_body(lia, scope, arg_iter), Prim::Recur => lower_recur(lia, scope, span, arg_iter), Prim::CompileError => Err(lower_user_compile_error(span, arg_iter)), Prim::MacroRules | Prim::All => { Err(Error::new(span, ErrorKind::ExpectedValue("primitive"))) } } } fn lower_expr_apply( lia: &LocalIdAlloc, scope: &Scope<'_>, span: Span, fun_expr: Expr, mut arg_iter: NsDataIter, ) -> Result> { let rest_arg_datum = try_take_rest_arg(&mut arg_iter); let fixed_arg_exprs = arg_iter .map(|arg_datum| lower_expr(lia, scope, arg_datum)) .collect::>>>()?; let rest_arg_expr = match rest_arg_datum { Some(rest_arg_datum) => Some(lower_expr(lia, scope, rest_arg_datum)?), None => None, }; Ok(ExprKind::App(Box::new(App { span, fun_expr, ty_args: (), fixed_arg_exprs, rest_arg_expr, })) .into()) } fn lower_recur( lia: &LocalIdAlloc, scope: &Scope<'_>, span: Span, mut arg_iter: NsDataIter, ) -> Result> { let rest_arg_datum = try_take_rest_arg(&mut arg_iter); let fixed_arg_exprs = arg_iter .map(|arg_datum| lower_expr(lia, scope, arg_datum)) .collect::>>>()?; let rest_arg_expr = match rest_arg_datum { Some(rest_arg_datum) => Some(lower_expr(lia, scope, rest_arg_datum)?), None => None, }; Ok(ExprKind::Recur(Box::new(Recur { span, fixed_arg_exprs, rest_arg_expr, })) .into()) } fn lower_expr(lia: &LocalIdAlloc, scope: &Scope<'_>, datum: NsDatum) -> Result> { match datum { NsDatum::Ident(span, ident) => match scope.get_or_err(span, &ident)? { Binding::Var(Some(module_id), local_id) => { Ok(ExprKind::ExportRef(span, ExportId::new(*module_id, *local_id)).into()) } Binding::Var(None, local_id) => Ok(ExprKind::LocalRef(span, *local_id).into()), Binding::TyPred(test_ty) => Ok(ExprKind::TyPred(span, test_ty.clone()).into()), Binding::EqPred => Ok(ExprKind::EqPred(span).into()), Binding::RecordValueCons(record_cons) => { Ok(ExprKind::RecordCons(span, record_cons.clone()).into()) } Binding::FieldAccessor(record_cons, field_index) => { Ok(ExprKind::FieldAccessor(Box::new(FieldAccessor { span, record_cons: record_cons.clone(), field_index: *field_index, })) .into()) } other => Err(Error::new( span, ErrorKind::ExpectedValue(other.description()), )), }, NsDatum::List(span, vs) => { let mut data_iter = vs.into_vec().into_iter(); let fn_datum = if let Some(fn_datum) = data_iter.next() { fn_datum } else { return Ok(Datum::List(span, Box::new([])).into()); }; if let NsDatum::Ident(fn_span, ref ident) = fn_datum { match scope.get_or_err(fn_span, ident)? { Binding::Prim(prim) => { return lower_expr_prim_apply(lia, scope, span, *prim, data_iter); } Binding::Macro(module_id, mac) => { let mut macro_scope = scope.child(); let expanded_datum = expand_macro( &mut macro_scope, span, *module_id, mac, data_iter.as_slice(), )?; return lower_expr(lia, ¯o_scope, expanded_datum) .map(|expr| ExprKind::MacroExpand(span, Box::new(expr)).into()) .map_err(|e| e.with_macro_invocation_span(span)); } _ => {} } } let fn_expr = lower_expr(lia, scope, fn_datum)?; lower_expr_apply(lia, scope, span, fn_expr, data_iter) } other => Ok(other.into_syntax_datum().into()), } } fn lower_module_prim_apply( lia: &LocalIdAlloc, scope: &mut Scope<'_>, span: Span, prim: Prim, mut arg_iter: NsDataIter, ) -> Result, Vec> { match prim { Prim::Export => { let deferred_exports = arg_iter .map(|datum| { let (span, ident) = expect_spanned_ns_ident(datum, "identifier to export")?; Ok(DeferredExport { span, ident }) }) .collect::>>()?; Ok(Some(DeferredModulePrim::Exports(deferred_exports))) } Prim::Def => { if arg_iter.len() != 2 { return Err(vec![Error::new( span, ErrorKind::WrongDefLikeArgCount("def"), )]); } let destruc_datum = arg_iter.next().unwrap(); let destruc = lower_destruc(lia, scope, destruc_datum)?; let value_datum = arg_iter.next().unwrap(); let deferred_def = DeferredDef { span, macro_invocation_span: None, destruc, value_datum, }; Ok(Some(DeferredModulePrim::Def(deferred_def))) } Prim::DefMacro => Ok(lower_defmacro(scope, span, arg_iter).map(|_| None)?), Prim::DefType => Ok(lower_deftype(scope, span, arg_iter).map(|_| None)?), Prim::DefRecord => Ok(lower_defrecord(scope, span, arg_iter).map(|_| None)?), Prim::CompileError => Err(vec![lower_user_compile_error(span, arg_iter)]), _ => Err(vec![Error::new(span, ErrorKind::NonDefInsideModule)]), } } fn lower_module_def( lia: &LocalIdAlloc, scope: &mut Scope<'_>, datum: NsDatum, ) -> Result, Vec> { let span = datum.span(); if let NsDatum::List(span, vs) = datum { let mut data_iter = vs.into_vec().into_iter(); if let Some(NsDatum::Ident(fn_span, ref ident)) = data_iter.next() { match scope.get_or_err(fn_span, ident)? { Binding::Prim(prim) => { let prim = *prim; return lower_module_prim_apply(lia, scope, span, prim, data_iter); } Binding::Macro(module_id, mac) => { let module_id = *module_id; let mac = &mac.clone(); let expanded_datum = expand_macro(scope, span, module_id, mac, data_iter.as_slice())?; return lower_module_def(lia, scope, expanded_datum) .map(|def| def.map(|def| def.with_macro_invocation_span(span))) .map_err(|errs| { errs.into_iter() .map(|e| e.with_macro_invocation_span(span)) .collect() }); } _ => { // Non-def } } } } Err(vec![Error::new(span, ErrorKind::NonDefInsideModule)]) } fn insert_import_bindings( imports: &ModuleImports, scope: &mut Scope<'_>, arg_data: &[Datum], ) -> Result<(), Vec> { for arg_datum in arg_data { let span = arg_datum.span(); let parsed_import = import::parse_import_set(arg_datum)?; let import_module = &imports[parsed_import.module_name()]; let exports = import::filter_imported_exports(parsed_import, &import_module.exports)?; scope.insert_bindings( span, exports.into_iter().map(|(name, binding)| { ( Ident::new(Scope::root_ns_id(), name), binding.import_from(import_module.module_id), ) }), )?; } Ok(()) } pub(crate) fn lower_data( imports: &ModuleImports, data: &[Datum], ) -> Result> { let lia = LocalIdAlloc::new(); let mut scope = Scope::root(); // Build up a list of errors to return at once let mut errors: Vec = vec![]; // Extract all of our definitions. // // This occurs in two passes: // - Imports, types and macros are resolved immediately and cannot refer to bindings later // in the body // - Definitions are resolved after the module has been loaded let mut deferred_exports = Vec::::new(); let mut deferred_defs = Vec::::new(); for input_datum in data { if let Some(arg_data) = import::try_extract_import_set(input_datum) { if let Err(mut new_errors) = insert_import_bindings(imports, &mut scope, arg_data) { errors.append(&mut new_errors); } continue; } let ns_datum = NsDatum::from_syntax_datum(input_datum); match lower_module_def(&lia, &mut scope, ns_datum) { Ok(Some(DeferredModulePrim::Exports(mut exports))) => { deferred_exports.append(&mut exports); } Ok(Some(DeferredModulePrim::Def(deferred_def))) => { deferred_defs.push(deferred_def); } Ok(None) => {} Err(mut new_errors) => { errors.append(&mut new_errors); } }; } // Process any exports let mut exports = HashMap::with_capacity(deferred_exports.len()); for deferred_export in deferred_exports { let DeferredExport { span, ident } = deferred_export; match scope.get_or_err(span, &ident) { Ok(binding) => { exports.insert(ident.into_name(), binding.clone()); } Err(err) => { errors.push(err); } }; } // And now process any deferred defs let mut defs = Vec::with_capacity(deferred_defs.len()); for deferred_def in deferred_defs { match resolve_deferred_def(&lia, &scope, deferred_def) { Ok(def) => { defs.push(def); } Err(error) => { errors.push(error); } } } // Try to find `main!`. If we're not the entry module this will be ignored. let main_ident = Ident::new(Scope::root_ns_id(), "main!".into()); let main_local_id = if let Some(Binding::Var(None, local_id)) = scope.get(&main_ident) { Some(*local_id) } else { None }; if errors.is_empty() { Ok(LoweredModule { defs, exports, main_local_id, }) } else { Err(errors) } } fn resolve_deferred_def( lia: &LocalIdAlloc, scope: &Scope<'_>, deferred_def: DeferredDef, ) -> Result> { let DeferredDef { span, macro_invocation_span, destruc, value_datum, } = deferred_def; lower_expr(lia, scope, value_datum).map(|value_expr| Def { span, macro_invocation_span, destruc, value_expr, }) } // REPL interface pub(crate) fn lower_repl_datum( ccx: &CompileCtx, scope: &mut Scope<'_>, datum: &Datum, ) -> Result>> { use crate::reporting::errors_to_diagnostics; // For the purposes of type inference every datum is a new module let lia = LocalIdAlloc::new(); if let Some(arg_data) = import::try_extract_import_set(datum) { let imports = ccx.imports_for_data(std::iter::once(datum))?; insert_import_bindings(&imports, scope, arg_data).map_err(errors_to_diagnostics)?; return Ok(LoweredReplDatum::Import(imports)); } // Try interpreting this as a module def let ns_datum = NsDatum::from_syntax_datum(datum); match lower_module_def(&lia, scope, ns_datum.clone()) { Ok(Some(DeferredModulePrim::Def(deferred_def))) => { let def = resolve_deferred_def(&lia, scope, deferred_def).map_err(|err| vec![err.into()])?; Ok(LoweredReplDatum::EvaluableDef(def)) } Ok(Some(DeferredModulePrim::Exports(_))) => { Err(vec![ Error::new(datum.span(), ErrorKind::ExportInsideRepl).into() ]) } Ok(None) => Ok(LoweredReplDatum::NonEvaluableDef), Err(mut errs) => { // `NonDefInsideModule` doesn't apply because we allow non-defs in the REPL errs.retain(|err| err.kind() != &ErrorKind::NonDefInsideModule); if errs.is_empty() { // Re-interpret as an expression let expr = lower_expr(&lia, scope, ns_datum).map_err(|err| vec![err.into()])?; Ok(LoweredReplDatum::Expr(expr)) } else { Err(errors_to_diagnostics(errs)) } } } } //// #[cfg(test)] fn import_statement_for_module(names: &[&'static str]) -> Datum { Datum::List( EMPTY_SPAN, Box::new([ Datum::Sym(EMPTY_SPAN, "import".into()), Datum::Vector( EMPTY_SPAN, names .iter() .map(|&n| Datum::Sym(EMPTY_SPAN, n.into())) .collect(), ), ]), ) } #[cfg(test)] fn module_for_str(data_str: &str) -> Result { use std::iter; use std::sync::Arc; use arret_syntax::parser::data_from_str; use crate::context; use crate::hir::exports; use crate::hir::loader::ModuleName; let mut program_data = vec![]; let mut imports: ModuleImports = HashMap::new(); for (terminal_name, exports) in iter::once(("primitives", exports::prims_exports())) .chain(iter::once(("types", exports::tys_exports()))) { program_data.push(import_statement_for_module(&[ "arret", "internal", terminal_name, ])); imports.insert( ModuleName::new( "arret".into(), vec!["internal".into()], terminal_name.into(), ), Arc::new(context::prims_to_module(exports)), ); } let mut test_data = data_from_str(None, data_str).unwrap(); program_data.append(&mut test_data); imports.insert( ModuleName::new("arret".into(), vec!["internal".into()], "types".into()), Arc::new(context::prims_to_module(exports::tys_exports())), ); lower_data(&imports, &program_data).map_err(|mut errors| errors.remove(0)) } #[cfg(test)] pub fn expr_for_str(data_str: &str) -> Expr { use arret_syntax::parser::datum_from_str; let lia = LocalIdAlloc::new(); let scope = Scope::new_with_primitives(); let test_datum = datum_from_str(None, data_str).unwrap(); let test_nsdatum = NsDatum::from_syntax_datum(&test_datum); lower_expr(&lia, &scope, test_nsdatum).unwrap() } #[allow(clippy::many_single_char_names)] #[cfg(test)] mod test { use super::*; use arret_syntax::span::t2s; use crate::ty::purity::Purity; use crate::ty::Ty; #[test] fn self_quoting_bool() { let j = "false"; let t = "^^^^^"; let expected: Expr<_> = Datum::Bool(t2s(t), false).into(); assert_eq!(expected, expr_for_str(j)); } #[test] fn self_quoting_empty_list() { let j = "()"; let t = "^^"; let expected: Expr<_> = Datum::List(t2s(t), Box::new([])).into(); assert_eq!(expected, expr_for_str(j)); } #[test] fn quoted_datum_shorthand() { let j = "'foo"; let t = " ^^^"; let expected: Expr<_> = Datum::Sym(t2s(t), "foo".into()).into(); assert_eq!(expected, expr_for_str(j)); } #[test] fn quoted_datum_explicit() { let j = "(quote foo)"; let t = " ^^^ "; let expected: Expr<_> = Datum::Sym(t2s(t), "foo".into()).into(); assert_eq!(expected, expr_for_str(j)); } #[test] fn self_evaluating_keyword() { let j = ":foo"; let t = "^^^^"; let expected: Expr<_> = Datum::Sym(t2s(t), ":foo".into()).into(); assert_eq!(expected, expr_for_str(j)); } #[test] fn wildcard_let() { let j = "(let [_ 1])"; let t = " ^ "; let u = "^^^^^^^^^^^"; let v = " ^ "; let destruc = destruc::Destruc::Scalar(t2s(t), destruc::Scalar::new(None, "_".into(), DeclTy::Free)); let expected: Expr<_> = ExprKind::Let(Box::new(Let { span: t2s(u), destruc, value_expr: Datum::Int(t2s(v), 1).into(), body_expr: ExprKind::Do(vec![]).into(), })) .into(); assert_eq!(expected, expr_for_str(j)); } #[test] fn empty_fn() { let j = "(fn ())"; let t = "^^^^^^^"; let expected: Expr<_> = ExprKind::Fun(Box::new(Fun { span: t2s(t), pvars: purity::PVars::new(), tvars: ty::TVars::new(), purity: DeclPurity::Free, params: destruc::List::new(vec![], None), ret_ty: DeclTy::Free, ret_ty_span: None, body_expr: ExprKind::Do(vec![]).into(), })) .into(); assert_eq!(expected, expr_for_str(j)); } #[test] fn empty_fn_with_purity() { let j = "(fn () -> _ 1)"; let t = "^^^^^^^^^^^^^^"; let u = " ^ "; let expected: Expr<_> = ExprKind::Fun(Box::new(Fun { span: t2s(t), pvars: purity::PVars::new(), tvars: ty::TVars::new(), purity: Purity::Pure.into(), params: destruc::List::new(vec![], None), ret_ty: DeclTy::Free, ret_ty_span: None, body_expr: Datum::Int(t2s(u), 1).into(), })) .into(); assert_eq!(expected, expr_for_str(j)); } #[test] fn empty_fn_with_ret_ty() { let j = "(fn () -> Int 1)"; let t = "^^^^^^^^^^^^^^^^"; let u = " ^^^ "; let v = " ^ "; let expected: Expr<_> = ExprKind::Fun(Box::new(Fun { span: t2s(t), pvars: purity::PVars::new(), tvars: ty::TVars::new(), purity: Purity::Pure.into(), params: destruc::List::new(vec![], None), ret_ty: Ty::Int.into(), ret_ty_span: Some(t2s(u)), body_expr: Datum::Int(t2s(v), 1).into(), })) .into(); assert_eq!(expected, expr_for_str(j)); } #[test] fn fixed_expr_apply() { let j = "(1 2 3)"; let t = "^^^^^^^"; let u = " ^ "; let v = " ^ "; let w = " ^ "; let expected: Expr<_> = ExprKind::App(Box::new(App { span: t2s(t), fun_expr: Datum::Int(t2s(u), 1).into(), ty_args: (), fixed_arg_exprs: vec![Datum::Int(t2s(v), 2).into(), Datum::Int(t2s(w), 3).into()], rest_arg_expr: None, })) .into(); assert_eq!(expected, expr_for_str(j)); } #[test] fn rest_expr_apply() { let j = "(1 2 & 3)"; let t = "^^^^^^^^^"; let u = " ^ "; let v = " ^ "; let w = " ^ "; let expected: Expr<_> = ExprKind::App(Box::new(App { span: t2s(t), fun_expr: Datum::Int(t2s(u), 1).into(), ty_args: (), fixed_arg_exprs: vec![Datum::Int(t2s(v), 2).into()], rest_arg_expr: Some(Datum::Int(t2s(w), 3).into()), })) .into(); assert_eq!(expected, expr_for_str(j)); } #[test] fn recur_expr() { let j = "(recur 1 2 3)"; let t = "^^^^^^^^^^^^^"; let u = " ^ "; let v = " ^ "; let w = " ^ "; let expected: Expr<_> = ExprKind::Recur(Box::new(Recur { span: t2s(t), fixed_arg_exprs: vec![ Datum::Int(t2s(u), 1).into(), Datum::Int(t2s(v), 2).into(), Datum::Int(t2s(w), 3).into(), ], rest_arg_expr: None, })) .into(); assert_eq!(expected, expr_for_str(j)); } #[test] fn if_expr() { let j = "(if true 1 2)"; let t = "^^^^^^^^^^^^^"; let u = " ^^^^ "; let v = " ^ "; let w = " ^ "; let expected: Expr<_> = ExprKind::Cond(Box::new(Cond { span: t2s(t), test_expr: ExprKind::Lit(Datum::Bool(t2s(u), true)).into(), true_expr: ExprKind::Lit(Datum::Int(t2s(v), 1)).into(), false_expr: ExprKind::Lit(Datum::Int(t2s(w), 2)).into(), })) .into(); assert_eq!(expected, expr_for_str(j)); } #[test] fn expand_trivial_macro() { let j = "(letmacro [one (macro-rules [() 1])] (one))"; let t = " ^^^^^ "; let u = " ^ "; let expected: Expr<_> = ExprKind::MacroExpand(t2s(t), Box::new(Datum::Int(t2s(u), 1).into())).into(); assert_eq!(expected, expr_for_str(j)); } #[test] fn mutual_module_def() { let j1 = "(export x y)"; let j2 = "(def x y)"; let j3 = "(def y x)"; let j = &[j1, j2, j3].join(""); let module = module_for_str(j).unwrap(); assert_eq!(2, module.exports.len()); } #[test] fn type_predicate() { let j = "bool?"; let t = "^^^^^"; let expected: Expr<_> = ExprKind::TyPred(t2s(t), ty::pred::TestTy::Bool).into(); assert_eq!(expected, expr_for_str(j)); } #[test] fn equality_predicate() { let j = "="; let t = "^"; let expected: Expr<_> = ExprKind::EqPred(t2s(t)).into(); assert_eq!(expected, expr_for_str(j)); } } ================================================ FILE: compiler/hir/macros/expander.rs ================================================ use std::collections::HashMap; use std::sync::Arc; use arret_syntax::span::Span; use crate::context::ModuleId; use crate::hir::macros::linker::{TemplateIdent, VarLinks}; use crate::hir::macros::matcher::MatchData; use crate::hir::macros::{get_escaped_ident, starts_with_zero_or_more, Macro}; use crate::hir::ns::{Ident, NsDatum, NsId}; use crate::hir::scope::{Binding, Scope}; struct ExpandCursor<'data, 'links> { match_data: &'data MatchData<'data>, var_links: &'links VarLinks, ident_index: usize, subtemplate_index: usize, } struct ExpandCtx<'scope, 'parent> { scope: &'scope mut Scope<'parent>, module_id: Option, ns_mapping: HashMap, } impl<'scope, 'parent> ExpandCtx<'scope, 'parent> { fn new(scope: &'scope mut Scope<'parent>, module_id: Option) -> Self { ExpandCtx { scope, module_id, ns_mapping: HashMap::new(), } } fn expand_ident( &mut self, self_mac: &Arc, cursor: &mut ExpandCursor<'_, '_>, span: Span, ident: &Ident, ) -> NsDatum { let binding = if !ident.is_underscore() { let template_ident = cursor.var_links.template_ident(cursor.ident_index); cursor.ident_index += 1; match template_ident { TemplateIdent::SubpatternVar(var_index) => { return cursor.match_data.var(*var_index).clone(); } TemplateIdent::SelfIdent => Some(Binding::Macro(self.module_id, self_mac.clone())), TemplateIdent::Bound(binding) => Some(if let Some(module_id) = self.module_id { binding.import_from(module_id) } else { binding.clone() }), TemplateIdent::Unbound => None, } } else { None }; // Re-scope this ident let old_ns_id = ident.ns_id(); let scope = &mut self.scope; let new_ns_id = self .ns_mapping .entry(old_ns_id) .or_insert_with(|| scope.alloc_ns_id()); let new_ident = ident.with_ns_id(*new_ns_id); if let Some(binding) = binding { scope.replace_binding(span, new_ident.clone(), binding); }; NsDatum::Ident(span, new_ident) } fn expand_zero_or_more( &mut self, self_mac: &Arc, cursor: &mut ExpandCursor<'_, '_>, template: &NsDatum, ) -> Vec { // Find our subpattern index from our subtemplate index let subtemplate_index = cursor.subtemplate_index; let subvar_links = &cursor.var_links.subtemplates()[subtemplate_index]; let subpattern_index = subvar_links.subpattern_index(); let submatches = &cursor.match_data.subpattern(subpattern_index); cursor.subtemplate_index += 1; submatches .iter() .map(|m| { // Build a new cursor pointing to our subpattern let mut subcursor = ExpandCursor { match_data: m, var_links: subvar_links, ident_index: 0, subtemplate_index: 0, }; self.expand_datum(self_mac, &mut subcursor, template) }) .collect() } fn expand_slice( &mut self, self_mac: &Arc, cursor: &mut ExpandCursor<'_, '_>, mut templates: &[NsDatum], ) -> Box<[NsDatum]> { let mut result: Vec = vec![]; while !templates.is_empty() { if starts_with_zero_or_more(templates) { let mut expanded = self.expand_zero_or_more(self_mac, cursor, &templates[0]); result.append(&mut expanded); // Skip the ellipsis as well templates = &templates[2..]; } else { let expanded = self.expand_datum(self_mac, cursor, &templates[0]); result.push(expanded); templates = &templates[1..]; } } result.into_boxed_slice() } fn expand_list( &mut self, self_mac: &Arc, cursor: &mut ExpandCursor<'_, '_>, span: Span, templates: &[NsDatum], ) -> NsDatum { if let Some(ident) = get_escaped_ident(templates) { NsDatum::Ident(span, ident.clone()) } else { NsDatum::List(span, self.expand_slice(self_mac, cursor, templates)) } } fn expand_datum( &mut self, self_mac: &Arc, cursor: &mut ExpandCursor<'_, '_>, template: &NsDatum, ) -> NsDatum { match template { NsDatum::Ident(span, ident) => self.expand_ident(self_mac, cursor, *span, ident), NsDatum::List(span, vs) => self.expand_list(self_mac, cursor, *span, vs), NsDatum::Vector(span, vs) => { NsDatum::Vector(*span, self.expand_slice(self_mac, cursor, vs)) } NsDatum::Set(span, vs) => NsDatum::Set(*span, self.expand_slice(self_mac, cursor, vs)), other => other.clone(), } } } pub fn expand_rule<'scope, 'parent, 'data>( scope: &'scope mut Scope<'parent>, module_id: Option, self_mac: &Arc, match_data: &'data MatchData<'data>, var_links: &VarLinks, template: &NsDatum, ) -> NsDatum { let mut mcx = ExpandCtx::new(scope, module_id); let mut cursor = ExpandCursor { match_data, var_links, ident_index: 0, subtemplate_index: 0, }; mcx.expand_datum(self_mac, &mut cursor, template) } ================================================ FILE: compiler/hir/macros/linker.rs ================================================ use std::collections::HashMap; use std::result; use arret_syntax::span::Span; use crate::hir::error::{Error, ErrorKind, Result}; use crate::hir::macros::starts_with_zero_or_more; use crate::hir::ns::{Ident, NsDatum}; use crate::hir::scope::{Binding, Scope}; /// Indicates the meaning of a given ident in the template #[derive(Debug)] pub enum TemplateIdent { /// Refers to a template variable with a given index SubpatternVar(usize), /// Ident of the macro being expanded SelfIdent, /// Ident bound to the given binding Bound(Binding), /// Unbound ident Unbound, } /// Precomputed links from variables in the template to the pattern #[derive(Debug)] pub struct VarLinks { subpattern_index: usize, template_idents: Box<[TemplateIdent]>, subtemplates: Box<[VarLinks]>, } impl VarLinks { /// Index of the subpattern for this subtemplate pub fn subpattern_index(&self) -> usize { self.subpattern_index } /// Returns the template ident for the given index pub fn template_ident(&self, i: usize) -> &TemplateIdent { &self.template_idents[i] } /// Links for our subtemplates in visit order pub fn subtemplates(&self) -> &[VarLinks] { &self.subtemplates } } #[derive(Debug)] struct FoundVars<'data> { span: Span, idents: Vec<&'data Ident>, subs: Vec>, } impl<'data> FoundVars<'data> { fn new(span: Span) -> Self { FoundVars { span, idents: vec![], subs: vec![], } } } /// Tracks which type of input is being provided to `FindVarsCtx` #[derive(Clone, Copy, PartialEq)] enum FindVarsInputType { Pattern, Template, } struct FindVarsCtx<'data> { input_type: FindVarsInputType, var_spans: Option>, } type FindVarsResult = result::Result<(), Error>; impl<'data> FindVarsCtx<'data> { fn new(input_type: FindVarsInputType) -> Self { let var_spans = if input_type == FindVarsInputType::Template { // Duplicate vars are allowed in the template as they must all resolve to the same // value. None } else { // This tracks the name of variables and where they were first used (for error // reporting) Some(HashMap::<&'data Ident, Span>::new()) }; FindVarsCtx { input_type, var_spans, } } fn visit_ident( &mut self, pattern_vars: &mut FoundVars<'data>, span: Span, ident: &'data Ident, ) -> FindVarsResult { if ident.is_underscore() { // This is a wildcard return Ok(()); } if ident.is_ellipsis() { return Err(Error::new(span, ErrorKind::MacroBadEllipsis)); } if let Some(ref mut var_spans) = self.var_spans { if let Some(old_span) = var_spans.insert(ident, span) { return Err(Error::new( span, ErrorKind::DuplicateDef(Some(old_span), ident.name().clone()), )); } } pattern_vars.idents.push(ident); Ok(()) } fn visit_zero_or_more( &mut self, pattern_vars: &mut FoundVars<'data>, pattern: &'data NsDatum, ) -> FindVarsResult { let mut sub_vars = FoundVars::new(pattern.span()); self.visit_datum(&mut sub_vars, pattern)?; pattern_vars.subs.push(sub_vars); Ok(()) } fn visit_datum( &mut self, pattern_vars: &mut FoundVars<'data>, pattern: &'data NsDatum, ) -> FindVarsResult { match pattern { NsDatum::Ident(span, ident) => self.visit_ident(pattern_vars, *span, ident), NsDatum::List(_, vs) => self.visit_list(pattern_vars, vs), NsDatum::Vector(_, vs) => self.visit_seq(pattern_vars, vs), NsDatum::Set(span, vs) => self.visit_set(pattern_vars, *span, vs), _ => { // Can't contain a pattern var Ok(()) } } } fn visit_seq( &mut self, pattern_vars: &mut FoundVars<'data>, mut patterns: &'data [NsDatum], ) -> FindVarsResult { let mut zero_or_more_span: Option = None; while !patterns.is_empty() { if starts_with_zero_or_more(patterns) { let pattern = &patterns[0]; // Make sure we don't have multiple zero or more matches in the same slice if self.input_type == FindVarsInputType::Pattern { if let Some(old_span) = zero_or_more_span.replace(pattern.span()) { // We've already had a zero-or-more match return Err(Error::new( pattern.span(), ErrorKind::MultipleZeroOrMoreMatch(old_span), )); } } self.visit_zero_or_more(pattern_vars, pattern)?; patterns = &patterns[2..]; } else { self.visit_datum(pattern_vars, &patterns[0])?; patterns = &patterns[1..]; } } Ok(()) } fn visit_list( &mut self, pattern_vars: &mut FoundVars<'data>, patterns: &'data [NsDatum], ) -> FindVarsResult { match patterns { [NsDatum::Ident(_, ellipsis_ident), escaped_datum] if ellipsis_ident.is_ellipsis() => { if let NsDatum::Ident(_, _) = escaped_datum { // This isn't actually a list Ok(()) } else { Err(Error::new( escaped_datum.span(), ErrorKind::ExpectedMacroEllipsisEscape(escaped_datum.description()), )) } } _ => self.visit_seq(pattern_vars, patterns), } } fn visit_set( &mut self, pattern_vars: &mut FoundVars<'data>, span: Span, patterns: &'data [NsDatum], ) -> FindVarsResult { if self.input_type == FindVarsInputType::Template { // Sets are expanded the same way as any other sequence return self.visit_seq(pattern_vars, patterns); } match patterns.len() { 0 => Ok(()), 2 if starts_with_zero_or_more(patterns) => { self.visit_zero_or_more(pattern_vars, &patterns[0]) } _ => Err(Error::new(span, ErrorKind::MacroBadSetPattern)), } } } fn link_template_ident( scope: &Scope<'_>, self_ident: &Ident, template_ident: &Ident, pattern_idents: &[&Ident], ) -> TemplateIdent { // First, see if this corresponds to a var in the pattern if let Some(subpattern_index) = pattern_idents .iter() .position(|pattern_ident| *pattern_ident == template_ident) { TemplateIdent::SubpatternVar(subpattern_index) } else if template_ident == self_ident { TemplateIdent::SelfIdent } else if let Some(binding) = scope.get(template_ident) { TemplateIdent::Bound(binding.clone()) } else { TemplateIdent::Unbound } } fn link_found_vars( scope: &Scope<'_>, self_ident: &Ident, subpattern_index: usize, pattern_vars: &FoundVars<'_>, template_vars: &FoundVars<'_>, ) -> Result { let template_idents = template_vars .idents .iter() .map(|template_ident| { link_template_ident(scope, self_ident, template_ident, &pattern_vars.idents) }) .collect(); let subtemplates = template_vars .subs .iter() .map(|subtemplate_vars| { if subtemplate_vars.idents.is_empty() { return Err(Error::new( template_vars.span, ErrorKind::MacroNoTemplateVars, )); } // Find possible indices for subpatterns in our pattern let possible_indices = pattern_vars .subs .iter() .enumerate() .filter(|(_, subpattern_vars)| { subpattern_vars .idents .iter() .any(|subpattern_var| subtemplate_vars.idents.contains(subpattern_var)) }) .collect::)>>(); if possible_indices.is_empty() { return Err(Error::new(template_vars.span, ErrorKind::MacroNoPatternRef)); } else if possible_indices.len() > 1 { let sub_var_spans = possible_indices .iter() .map(|(_, subpattern_vars)| subpattern_vars.span) .collect(); return Err(Error::new( template_vars.span, ErrorKind::MacroMultiPatternRef(sub_var_spans), )); } // Iterate over our subpatterns let (pattern_index, subpattern_vars) = possible_indices[0]; link_found_vars( scope, self_ident, pattern_index, subpattern_vars, subtemplate_vars, ) }) .collect::>>()?; Ok(VarLinks { subpattern_index, template_idents, subtemplates, }) } pub fn link_rule_vars( scope: &Scope<'_>, self_ident: &Ident, pattern_span: Span, patterns: &[NsDatum], template: &NsDatum, ) -> Result { let mut fpvcx = FindVarsCtx::new(FindVarsInputType::Pattern); let mut pattern_vars = FoundVars::new(pattern_span); fpvcx.visit_seq(&mut pattern_vars, patterns)?; let mut ftvcx = FindVarsCtx::new(FindVarsInputType::Template); let mut template_vars = FoundVars::new(template.span()); ftvcx.visit_datum(&mut template_vars, template)?; link_found_vars(scope, self_ident, 0, &pattern_vars, &template_vars) } ================================================ FILE: compiler/hir/macros/matcher.rs ================================================ use std::result; use crate::hir::macros::{get_escaped_ident, starts_with_zero_or_more, Rule}; use crate::hir::ns::{Ident, NsDatum}; #[derive(Debug)] pub struct MatchData<'data> { vars: Vec<&'data NsDatum>, // The outside vector is the subpatterns; the inside slice contains the zero or more matches subpatterns: Vec]>>, } impl<'data> MatchData<'data> { fn new() -> MatchData<'data> { MatchData { vars: vec![], subpatterns: vec![], } } pub fn var(&self, i: usize) -> &'data NsDatum { self.vars[i] } pub fn subpattern(&self, i: usize) -> &[MatchData<'data>] { &self.subpatterns[i] } } struct MatchCtx<'data> { match_data: MatchData<'data>, } type Result = result::Result; impl<'data> MatchCtx<'data> { fn new() -> Self { MatchCtx { match_data: MatchData::new(), } } fn match_ident(&mut self, pattern_ident: &'data Ident, arg: &'data NsDatum) -> bool { if pattern_ident.is_underscore() { // This is a wildcard; just discard } else { self.match_data.vars.push(arg); } true } // TODO: Maps #[allow(clippy::float_cmp)] fn match_datum(&mut self, pattern: &'data NsDatum, arg: &'data NsDatum) -> bool { match (pattern, arg) { (NsDatum::Ident(_, pattern_ident), arg) => self.match_ident(pattern_ident, arg), (NsDatum::Keyword(_, pv), NsDatum::Keyword(_, av)) => pv == av, (NsDatum::List(_, pvs), NsDatum::List(_, avs)) => self.match_slice(pvs, avs), (NsDatum::Vector(_, pvs), NsDatum::Vector(_, avs)) => self.match_slice(pvs, avs), (NsDatum::Set(_, pvs), NsDatum::Set(_, avs)) => self.match_slice(pvs, avs), (NsDatum::Bool(_, pv), NsDatum::Bool(_, av)) => pv == av, (NsDatum::Int(_, pv), NsDatum::Int(_, av)) => pv == av, // Don't match NaNs against other NaNs. This is consistent with `=`. (NsDatum::Float(_, pv), NsDatum::Float(_, av)) => pv == av, (NsDatum::Char(_, pv), NsDatum::Char(_, av)) => pv == av, (NsDatum::Str(_, pv), NsDatum::Str(_, av)) => pv == av, (NsDatum::List(_, pv), NsDatum::Ident(_, arg)) => { if let Some(escaped_ident) = get_escaped_ident(pv) { escaped_ident.name() == arg.name() } else { false } } _ => false, } } fn match_zero_or_more(&mut self, pattern: &'data NsDatum, args: &'data [NsDatum]) -> bool { let submatch_result = args .iter() .map(|arg| { let mut subcontext = MatchCtx { match_data: MatchData::new(), }; if !subcontext.match_datum(pattern, arg) { Err(()) } else { Ok(subcontext.match_data) } }) .collect::]>, ()>>(); match submatch_result { Ok(submatch_data) => { self.match_data.subpatterns.push(submatch_data); true } Err(()) => false, } } fn match_slice(&mut self, mut patterns: &'data [NsDatum], mut args: &'data [NsDatum]) -> bool { loop { if starts_with_zero_or_more(patterns) { let rest_patterns_len = patterns.len() - 2; if args.len() < rest_patterns_len { // Cannot match break false; } let (zero_or_more_args, rest_args) = args.split_at(args.len() - rest_patterns_len); if !self.match_zero_or_more(&patterns[0], zero_or_more_args) { break false; } patterns = &patterns[2..]; args = rest_args; } else { let (pattern, arg) = match (patterns.first(), args.first()) { (Some(pattern), Some(arg)) => (pattern, arg), (None, None) => { // Patterns and args ran out at the same time break true; } _ => { // Mismatched lengths break false; } }; if !self.match_datum(pattern, arg) { break false; } patterns = &patterns[1..]; args = &args[1..]; } } } fn visit_rule( mut self, rule: &'data Rule, arg_data: &'data [NsDatum], ) -> Result> { if self.match_slice(&rule.pattern, arg_data) { Ok(self.match_data) } else { Err(()) } } } pub fn match_rule<'data>( rule: &'data Rule, arg_data: &'data [NsDatum], ) -> Result> { let mcx = MatchCtx::new(); mcx.visit_rule(rule, arg_data) } ================================================ FILE: compiler/hir/macros/mod.rs ================================================ mod expander; mod linker; mod matcher; use std::sync::Arc; use arret_syntax::span::Span; use crate::context::ModuleId; use crate::hir::error::{Error, ErrorKind, Result}; use crate::hir::macros::expander::expand_rule; use crate::hir::macros::linker::{link_rule_vars, VarLinks}; use crate::hir::macros::matcher::match_rule; use crate::hir::ns::{Ident, NsDatum}; use crate::hir::scope::Scope; #[derive(Debug)] pub struct Rule { pattern_span: Span, pattern: Box<[NsDatum]>, template: NsDatum, var_links: VarLinks, } #[derive(Debug)] pub struct Macro { rules: Box<[Rule]>, } impl Macro { pub fn new(rules: Box<[Rule]>) -> Arc { Arc::new(Self { rules }) } } fn starts_with_zero_or_more(data: &[NsDatum]) -> bool { match data { [_, NsDatum::Ident(_, ident), ..] => ident.is_ellipsis(), _ => false, } } fn get_escaped_ident(data: &[NsDatum]) -> Option<&Ident> { match data { [NsDatum::Ident(_, ellipsis_ident), NsDatum::Ident(_, escaped_ident)] if ellipsis_ident.is_ellipsis() => { Some(escaped_ident) } _ => None, } } fn lower_macro_rule_datum( scope: &Scope<'_>, self_ident: &Ident, rule_datum: NsDatum, ) -> Result { let (span, mut rule_values) = if let NsDatum::Vector(span, vs) = rule_datum { (span, vs.into_vec()) } else { return Err(Error::new( rule_datum.span(), ErrorKind::ExpectedMacroRuleVec(rule_datum.description()), )); }; if rule_values.len() != 2 { return Err(Error::new( span, ErrorKind::WrongMacroRuleVecCount(rule_values.len()), )); } let template = rule_values.pop().unwrap(); let pattern_datum = rule_values.pop().unwrap(); let (pattern_span, pattern) = if let NsDatum::List(span, vs) = pattern_datum { (span, vs) } else { return Err(Error::new( pattern_datum.span(), ErrorKind::ExpectedMacroRulePatternList(pattern_datum.description()), )); }; let var_links = link_rule_vars(scope, self_ident, pattern_span, &pattern, &template)?; Ok(Rule { pattern_span, pattern, template, var_links, }) } pub fn lower_macro_rules( scope: &Scope<'_>, self_ident: &Ident, macro_rules_data: Vec, ) -> Result> { let rules = macro_rules_data .into_iter() .map(|rule_datum| lower_macro_rule_datum(scope, self_ident, rule_datum)) .collect::>>()?; Ok(Macro::new(rules)) } pub fn expand_macro<'s, 'p>( scope: &'s mut Scope<'p>, invocation_span: Span, module_id: Option, mac: &Arc, arg_data: &[NsDatum], ) -> Result { for rule in mac.rules.iter() { let match_result = match_rule(rule, arg_data); if let Ok(match_data) = match_result { return Ok(expand_rule( scope, module_id, mac, &match_data, &rule.var_links, &rule.template, )); } } Err(Error::new( invocation_span, ErrorKind::NoMacroRule(mac.rules.iter().map(|rule| rule.pattern_span).collect()), )) } ================================================ FILE: compiler/hir/mod.rs ================================================ pub(crate) mod destruc; pub(crate) mod error; pub(crate) mod exports; pub(crate) mod import; pub(crate) mod loader; pub(crate) mod lowering; mod macros; pub(crate) mod ns; mod prim; mod records; pub(crate) mod scope; mod types; mod util; pub(crate) mod var_id; pub(crate) mod visitor; use std::sync::Arc; use arret_syntax::datum::Datum; use arret_syntax::span::Span; use crate::rfi; use crate::ty; use crate::ty::purity; use crate::ty::record; use crate::ty::Ty; pub use crate::hir::var_id::{ExportId, LocalId}; /// DeclTy is a type declared by a user /// /// The `Known` variant indicates the type is specified while `Free` indicates it must be inferred. #[derive(PartialEq, Debug, Clone)] pub enum DeclTy { Known(ty::Ref), Free, } impl From> for DeclTy { fn from(ty: Ty) -> Self { DeclTy::Known(ty::Ref::Fixed(ty)) } } impl From> for DeclTy { fn from(poly: ty::Ref) -> Self { DeclTy::Known(poly) } } /// Decl is a purity declared by a user /// /// The `Known` variant indicates the purity is specified while `Free` indicates it must be /// inferred. #[derive(PartialEq, Eq, Debug, Clone)] pub enum DeclPurity { Known(purity::Ref), Free, } impl From for DeclPurity { fn from(poly: purity::Ref) -> Self { DeclPurity::Known(poly) } } #[cfg(test)] impl From for DeclPurity { fn from(purity: purity::Purity) -> Self { DeclPurity::Known(purity::Ref::Fixed(purity)) } } pub trait Phase: Clone + std::cmp::PartialEq + std::fmt::Debug { type Purity: Clone + std::cmp::PartialEq + std::fmt::Debug; type DeclType: Clone + std::cmp::PartialEq + std::fmt::Debug; type ResultType: Clone + std::cmp::PartialEq + std::fmt::Debug; type TyArgs: Clone + std::cmp::PartialEq + std::fmt::Debug; } #[derive(Clone, PartialEq, Debug)] pub struct Inferred {} impl Phase for Inferred { type Purity = purity::Ref; type DeclType = ty::Ref; type ResultType = ty::Ref; type TyArgs = ty::ty_args::TyArgs; } #[derive(Clone, PartialEq, Debug)] pub struct Lowered {} impl Phase for Lowered { type Purity = DeclPurity; type DeclType = DeclTy; type ResultType = (); type TyArgs = (); } #[derive(PartialEq, Debug, Clone)] pub struct Fun { pub span: Span, pub pvars: purity::PVars, pub tvars: ty::TVars, pub purity: P::Purity, pub params: destruc::List

, pub ret_ty: P::DeclType, pub ret_ty_span: Option, pub body_expr: Expr

, } #[derive(PartialEq, Debug, Clone)] pub struct Cond { pub span: Span, pub test_expr: Expr

, pub true_expr: Expr

, pub false_expr: Expr

, } #[derive(PartialEq, Debug, Clone)] pub struct Let { pub span: Span, pub destruc: destruc::Destruc

, pub value_expr: Expr

, pub body_expr: Expr

, } #[derive(PartialEq, Debug, Clone)] pub struct App { pub span: Span, pub fun_expr: Expr

, pub ty_args: P::TyArgs, pub fixed_arg_exprs: Vec>, pub rest_arg_expr: Option>, } #[derive(PartialEq, Debug, Clone)] pub struct Recur { pub span: Span, pub fixed_arg_exprs: Vec>, pub rest_arg_expr: Option>, } #[derive(PartialEq, Debug, Clone)] pub struct FieldAccessor { pub span: Span, pub record_cons: record::ConsId, pub field_index: usize, } #[derive(PartialEq, Debug, Clone)] pub struct Expr { pub result_ty: P::ResultType, pub kind: ExprKind

, } impl From for Expr { fn from(datum: Datum) -> Expr { ExprKind::Lit(datum).into() } } #[derive(PartialEq, Debug, Clone)] pub enum ExprKind { Lit(Datum), App(Box>), Recur(Box>), Fun(Box>), RustFun(Arc), Let(Box>), Cond(Box>), ExportRef(Span, ExportId), LocalRef(Span, LocalId), TyPred(Span, ty::pred::TestTy), EqPred(Span), RecordCons(Span, record::ConsId), FieldAccessor(Box), Do(Vec>), /// Used for tracing macro expansion for error report and debug information /// /// Other than the above this should be treated identically to the inner expression. MacroExpand(Span, Box>), } impl From> for Expr { fn from(kind: ExprKind) -> Expr { Expr { result_ty: (), kind, } } } #[derive(PartialEq, Debug)] pub struct Def { pub span: Span, pub macro_invocation_span: Option, pub destruc: destruc::Destruc

, pub value_expr: Expr

, } pub use self::loader::PackagePaths; pub use self::types::lower_poly; pub use self::types::str_for_purity; pub use self::types::str_for_ty_ref; #[cfg(test)] pub use self::types::{lower_polymorphic_var_set, poly_for_str, try_lower_purity, tvar_bounded_by}; #[cfg(test)] pub use self::lowering::expr_for_str; ================================================ FILE: compiler/hir/ns.rs ================================================ use std::vec; use arret_syntax::datum::{DataStr, Datum}; use arret_syntax::span::Span; use crate::hir::scope::Scope; new_counting_id_type!(NsIdCounter, NsId); #[derive(Clone, PartialEq, Eq, Hash, Debug)] pub struct Ident { ns_id: NsId, data_name: DataStr, } impl Ident { pub fn new(ns_id: NsId, data_name: DataStr) -> Ident { Ident { ns_id, data_name } } pub fn ns_id(&self) -> NsId { self.ns_id } pub fn name(&self) -> &DataStr { &self.data_name } pub fn into_name(self) -> DataStr { self.data_name } pub fn is_underscore(&self) -> bool { self.data_name.as_ref() == "_" } pub fn is_ellipsis(&self) -> bool { self.data_name.as_ref() == "..." } pub fn is_ampersand(&self) -> bool { self.data_name.as_ref() == "&" } pub fn with_ns_id(&self, new_ns_id: NsId) -> Ident { Ident { ns_id: new_ns_id, data_name: self.data_name.clone(), } } } #[derive(Clone, PartialEq, Debug)] pub enum NsDatum { Bool(Span, bool), Char(Span, char), Int(Span, i64), Float(Span, f64), List(Span, Box<[NsDatum]>), Str(Span, DataStr), Keyword(Span, DataStr), Ident(Span, Ident), Vector(Span, Box<[NsDatum]>), Map(Span, Box<[(NsDatum, NsDatum)]>), Set(Span, Box<[NsDatum]>), } impl NsDatum { fn map_syntax_data(vs: &[Datum]) -> Box<[NsDatum]> { vs.iter().map(Self::from_syntax_datum).collect() } pub fn from_syntax_datum(value: &Datum) -> NsDatum { match value { Datum::Bool(span, v) => NsDatum::Bool(*span, *v), Datum::Char(span, v) => NsDatum::Char(*span, *v), Datum::Int(span, v) => NsDatum::Int(*span, *v), Datum::Float(span, v) => NsDatum::Float(*span, *v), Datum::Str(span, v) => NsDatum::Str(*span, v.clone()), Datum::Sym(span, v) => { if v.starts_with(':') { NsDatum::Keyword(*span, v.clone()) } else { NsDatum::Ident(*span, Ident::new(Scope::root_ns_id(), v.clone())) } } Datum::List(span, vs) => NsDatum::List(*span, Self::map_syntax_data(vs)), Datum::Vector(span, vs) => NsDatum::Vector(*span, Self::map_syntax_data(vs)), Datum::Set(span, vs) => NsDatum::Set(*span, Self::map_syntax_data(vs)), Datum::Map(span, vs) => NsDatum::Map( *span, vs.iter() .map(|(k, v)| (NsDatum::from_syntax_datum(k), NsDatum::from_syntax_datum(v))) .collect(), ), } } fn map_nsdata(vs: Box<[NsDatum]>) -> Box<[Datum]> { vs.into_vec() .into_iter() .map(NsDatum::into_syntax_datum) .collect() } pub fn into_syntax_datum(self) -> Datum { match self { NsDatum::Bool(span, v) => Datum::Bool(span, v), NsDatum::Char(span, v) => Datum::Char(span, v), NsDatum::Int(span, v) => Datum::Int(span, v), NsDatum::Float(span, v) => Datum::Float(span, v), NsDatum::Str(span, v) => Datum::Str(span, v), NsDatum::Keyword(span, v) => Datum::Sym(span, v), NsDatum::Ident(span, v) => Datum::Sym(span, v.into_name()), NsDatum::List(span, vs) => Datum::List(span, Self::map_nsdata(vs)), NsDatum::Vector(span, vs) => Datum::Vector(span, Self::map_nsdata(vs)), NsDatum::Set(span, vs) => Datum::Set(span, Self::map_nsdata(vs)), NsDatum::Map(span, vs) => Datum::Map( span, vs.into_vec() .into_iter() .map(|(k, v)| (k.into_syntax_datum(), v.into_syntax_datum())) .collect(), ), } } pub fn span(&self) -> Span { match self { NsDatum::Bool(span, _) | NsDatum::Char(span, _) | NsDatum::Int(span, _) | NsDatum::Float(span, _) | NsDatum::Str(span, _) | NsDatum::Keyword(span, _) | NsDatum::Ident(span, _) | NsDatum::List(span, _) | NsDatum::Vector(span, _) | NsDatum::Set(span, _) | NsDatum::Map(span, _) => *span, } } pub fn description(&self) -> &'static str { match self { NsDatum::Bool(_, true) => "boolean true", NsDatum::Bool(_, false) => "boolean false", NsDatum::Char(_, _) => "character", NsDatum::Int(_, _) => "integer", NsDatum::Float(_, _) => "floating point number", NsDatum::Str(_, _) => "string", NsDatum::Keyword(_, _) => "keyword", NsDatum::Ident(_, _) => "symbol", NsDatum::List(_, vs) if vs.is_empty() => "empty list", NsDatum::List(_, _) => "list", NsDatum::Vector(_, vs) if vs.is_empty() => "empty vector", NsDatum::Vector(_, _) => "vector", NsDatum::Set(_, vs) if vs.is_empty() => "empty set", NsDatum::Set(_, _) => "set", NsDatum::Map(_, vs) if vs.is_empty() => "empty map", NsDatum::Map(_, _) => "map", } } } /// Iterator for NsDatum used inside the HIR /// /// This is a specific type as we use .as_slice() to peek at data in certain places for /// context-sensitive parsing. pub type NsDataIter = vec::IntoIter; ================================================ FILE: compiler/hir/prim.rs ================================================ use crate::hir::scope::Binding; macro_rules! export_prims { ( $( ($n:expr, $i:ident) ),* ) => { #[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)] pub enum Prim { // `(import)` is magically added to every scope. If we add it again in `PRIM_EXPORTS` // we will trigger duplicate definition errors if someone imports the prim modules. // Note that `(import)` is actually parsed specially; this is here to catch duplicate // definitions. ImportPlaceholder, $($i,)* } pub const PRIM_EXPORTS: &[(&str, Binding)] = &[ // This is a pseudo-primitive. Unlike normal primitives we can take the value of `=`. ("=", Binding::EqPred), $( ($n, Binding::Prim(Prim::$i)) ),* ]; } } export_prims!( ("def", Def), ("let", Let), ("fn", Fun), ("if", If), ("do", Do), ("recur", Recur), ("quote", Quote), ("export", Export), ("defmacro", DefMacro), ("letmacro", LetMacro), ("macro-rules", MacroRules), ("deftype", DefType), ("lettype", LetType), ("defrecord", DefRecord), ("letrecord", LetRecord), ("compile-error", CompileError), ("All", All) ); ================================================ FILE: compiler/hir/records.rs ================================================ use arret_syntax::datum::DataStr; use arret_syntax::span::Span; use crate::hir::error::{Error, ErrorKind, Result}; use crate::hir::ns::Ident; use crate::hir::ns::{NsDataIter, NsDatum}; use crate::hir::scope::{Binding, Scope}; use crate::hir::types::lower_poly; use crate::hir::types::lower_polymorphic_var_list; use crate::hir::util::{expect_ns_ident, expect_spanned_ns_ident}; use crate::ty; use crate::ty::record; use crate::ty::Ty; enum LoweredRecordCons { Parameterised(Span, Ident, NsDataIter), Singleton(Span, Ident), } fn lower_record_field_decl(scope: &Scope<'_>, field_datum: NsDatum) -> Result { let datum_span = field_datum.span(); let datum_description = field_datum.description(); let (ident, poly) = match field_datum { NsDatum::Ident(_, ident) => (ident, Ty::Any.into()), NsDatum::Vector(span, vs) => { let mut data = vs.into_vec(); if data.len() != 2 { return Err(Error::new( span, ErrorKind::ExpectedRecordFieldDecl(datum_description), )); } let poly = lower_poly(scope, data.pop().unwrap())?; let ident = expect_ns_ident(data.pop().unwrap(), "new record field name")?; (ident, poly) } other => { return Err(Error::new( other.span(), ErrorKind::ExpectedRecordFieldDecl(datum_description), )); } }; Ok(record::Field::new(datum_span, ident.into_name(), poly)) } /// Lowers either the type or value constructor for a `(defrecord)` fn lower_record_cons_decl(cons_datum: NsDatum, error_kind_cons: F) -> Result where F: Fn(&'static str) -> ErrorKind, { let datum_description = cons_datum.description(); match cons_datum { NsDatum::Ident(span, ident) => Ok(LoweredRecordCons::Singleton(span, ident)), NsDatum::List(span, vs) => { let mut param_data_iter = vs.into_vec().into_iter(); if let Some(name_datum) = param_data_iter.next() { let (ident_span, ident) = expect_spanned_ns_ident(name_datum, "new record constructor name")?; Ok(LoweredRecordCons::Parameterised( ident_span, ident, param_data_iter, )) } else { Err(Error::new(span, error_kind_cons(datum_description))) } } other => Err(Error::new(other.span(), error_kind_cons(datum_description))), } } pub fn lower_record( outer_scope: &mut Scope<'_>, ty_cons_datum: NsDatum, value_cons_datum: NsDatum, ) -> Result<()> { use crate::hir::types::PolymorphicVar; use crate::ty::ty_args::TyArgs; let mut inner_scope = outer_scope.child(); // Lower our type constructor let ty_cons_span = ty_cons_datum.span(); let ty_cons_decl = lower_record_cons_decl(ty_cons_datum, ErrorKind::ExpectedRecordTyConsDecl)?; let (ty_ident_span, ty_ident, poly_vars) = match ty_cons_decl { LoweredRecordCons::Singleton(span, ident) => (span, ident, None), LoweredRecordCons::Parameterised(span, ident, param_data_iter) => { let poly_params = lower_polymorphic_var_list(outer_scope, &mut inner_scope, param_data_iter)?; (span, ident, Some(poly_params)) } }; // Lower our value destructor let value_cons_decl = lower_record_cons_decl(value_cons_datum, ErrorKind::ExpectedRecordValueConsDecl)?; let fields: Box<[record::Field]>; let (value_cons_ident_span, value_cons_ident) = match value_cons_decl { LoweredRecordCons::Singleton(_, _) => { todo!("singleton record values"); } LoweredRecordCons::Parameterised(span, ident, param_data_iter) => { fields = param_data_iter .map(|field_datum| lower_record_field_decl(&inner_scope, field_datum)) .collect::>>()?; (span, ident) } }; // Convert our lowered polymorphic vars to polymorphic parameters let poly_params_list = match poly_vars { Some(poly_vars) => { use crate::ty::var_usage::VarUsages; let mut var_usages = VarUsages::new(); for field in fields.iter() { var_usages.add_poly_usages(field.ty_ref()); } let poly_params_list = poly_vars .into_vec() .into_iter() .map(|poly_var| { match poly_var { PolymorphicVar::PVar(pvar) => { if let Some(variance) = var_usages.pvar_variance(&pvar) { Ok(record::PolyParam::PVar(variance, pvar)) } else { Err(Error::new( pvar.span(), ErrorKind::UnusedPolyPurityParam(pvar), )) } } // It'd be nice to check if the param was used but it's been erased to // `Pure` by this point PolymorphicVar::Pure(span) => Ok(record::PolyParam::Pure(span)), PolymorphicVar::TVar(tvar) => { if let Some(variance) = var_usages.tvar_variance(&tvar) { Ok(record::PolyParam::TVar(variance, tvar)) } else { Err(Error::new(tvar.span(), ErrorKind::UnusedPolyTyParam(tvar))) } } PolymorphicVar::TFixed(span, fixed_poly) => { Ok(record::PolyParam::TFixed(span, fixed_poly)) } } }) .collect::>>()?; Some(poly_params_list) } None => None, }; let predicate_name: DataStr = format!("{}?", value_cons_ident.name()).into(); let predicate_ident = Ident::new(value_cons_ident.ns_id(), predicate_name); let record_ty_cons = record::Cons::new( ty_cons_span, ty_ident.name().clone(), value_cons_ident.name().clone(), poly_params_list, fields, ); for (idx, field) in record_ty_cons.fields().iter().enumerate() { if field.name().as_ref() != "_" { let accessor_name = format!("{}-{}", value_cons_ident.name(), field.name()); let accessor_ident = Ident::new(value_cons_ident.ns_id(), accessor_name.into()); outer_scope.insert_binding( field.span(), accessor_ident, Binding::FieldAccessor(record_ty_cons.clone(), idx), )?; } } outer_scope.insert_binding( value_cons_ident_span, predicate_ident, Binding::TyPred(ty::pred::TestTy::RecordClass(record_ty_cons.clone())), )?; outer_scope.insert_binding( value_cons_ident_span, value_cons_ident, Binding::RecordValueCons(record_ty_cons.clone()), )?; if record_ty_cons.is_singleton() { // We were used as a singleton; bind a type let record_instance = record::Instance::new(record_ty_cons, TyArgs::empty()); outer_scope.insert_binding(ty_ident_span, ty_ident, Binding::Ty(record_instance.into()))?; } else { // We were used as a type constructor; bind a type constructor outer_scope.insert_binding( ty_ident_span, ty_ident, Binding::RecordTyCons(record_ty_cons), )?; }; Ok(()) } ================================================ FILE: compiler/hir/scope.rs ================================================ use std::collections::HashMap; use std::sync::Arc; use arret_syntax::span::Span; use crate::context::ModuleId; use crate::hir::error::{Error, ErrorKind}; use crate::hir::macros::Macro; use crate::hir::ns::{Ident, NsDatum, NsId, NsIdCounter}; use crate::hir::prim::Prim; use crate::hir::{types, LocalId}; use crate::ty; use crate::ty::purity; use crate::ty::record; #[derive(Clone, Debug)] pub enum Binding { Var(Option, LocalId), Prim(Prim), Macro(Option, Arc), Ty(ty::Ref), TyCons(types::TyCons), TyPred(ty::pred::TestTy), EqPred, RecordValueCons(record::ConsId), RecordTyCons(record::ConsId), FieldAccessor(record::ConsId, usize), Purity(purity::Ref), } impl Binding { pub fn description(&self) -> &'static str { match self { Binding::Var(_, _) | Binding::TyPred(_) | Binding::EqPred => "value", Binding::Prim(_) => "primitive", Binding::Macro(_, _) => "macro", Binding::Ty(_) => "type", Binding::TyCons(_) => "type constructor", Binding::RecordValueCons(_) => "record value constructor", Binding::RecordTyCons(_) => "record type constructor", Binding::FieldAccessor(_, _) => "record field accessor", Binding::Purity(_) => "purity", } } pub fn import_from(&self, module_id: ModuleId) -> Binding { match self { Binding::Var(None, local_id) => Binding::Var(Some(module_id), *local_id), Binding::Macro(None, macro_id) => Binding::Macro(Some(module_id), macro_id.clone()), other => other.clone(), } } } pub struct SpannedBinding { span: Option, binding: Binding, } pub struct Scope<'parent> { ns_id_counter: NsIdCounter, entries: HashMap, parent: Option<&'parent Scope<'parent>>, } impl<'parent> Scope<'parent> { pub fn root_ns_id() -> NsId { NsId::new(0) } /// Creates a new root scope with entries pub fn new_with_entries(entries: I) -> Scope<'static> where I: Iterator, { let entries = entries .map(|(name, binding)| { ( Ident::new(Self::root_ns_id(), (*name).into()), SpannedBinding { span: None, binding, }, ) }) .collect::>(); Scope { ns_id_counter: NsIdCounter::new(), entries, parent: None, } } /// Creates a root scope containing `import` pub fn root() -> Scope<'static> { // The default root scope only consists of a placeholder for (import) let entries = std::iter::once(("import", Binding::Prim(Prim::ImportPlaceholder))); Self::new_with_entries(entries) } /// Creates a new root scope containing all primitives and types pub fn new_with_primitives() -> Scope<'static> { use crate::hir::prim::PRIM_EXPORTS; use crate::hir::types::TY_EXPORTS; let entries = PRIM_EXPORTS .iter() .chain(TY_EXPORTS.iter()) .map(|(name, binding)| (*name, binding.clone())); Self::new_with_entries(entries) } pub fn child(&'parent self) -> Scope<'parent> { Scope { ns_id_counter: self.ns_id_counter.clone(), entries: HashMap::new(), parent: Some(self), } } /// Returns the binding for a given datum if it exists /// /// Only idents can have bindings; other data will return None. pub fn get_datum<'a>(&'a self, datum: &NsDatum) -> Option<&'a Binding> { if let NsDatum::Ident(_, ident) = datum { self.get(ident) } else { None } } /// Returns the binding for a given ident if it exists pub fn get<'a>(&'a self, ident: &Ident) -> Option<&'a Binding> { self.entries.get(ident).map(|e| &e.binding).or_else(|| { if let Some(parent) = self.parent { parent.get(ident) } else { None } }) } /// Returns the binding for a given ident if it exists, otherwise returns an error pub fn get_or_err<'a>(&'a self, span: Span, ident: &Ident) -> Result<&'a Binding, Error> { self.get(ident) .ok_or_else(|| Error::new(span, ErrorKind::UnboundIdent(ident.name().clone()))) } /// Inserts a new binding if it doesn't exist or redefinition is allowed pub fn insert_binding( &mut self, span: Span, ident: Ident, binding: Binding, ) -> Result<(), Error> { use std::iter; self.insert_bindings(span, iter::once((ident, binding))) } pub fn insert_bindings(&mut self, span: Span, new_bindings: I) -> Result<(), Error> where I: Iterator, { use std::collections::hash_map::Entry; self.entries.reserve(new_bindings.size_hint().0); for (ident, binding) in new_bindings.filter(|b| !b.0.is_underscore()) { let entry = SpannedBinding { span: Some(span), binding, }; match self.entries.entry(ident) { Entry::Occupied(occupied) => { return Err(Error::new( span, ErrorKind::DuplicateDef(occupied.get().span, occupied.key().name().clone()), )); } Entry::Vacant(vacant) => { vacant.insert(entry); } } } Ok(()) } /// Unconditionally replaces a binding pub fn replace_binding(&mut self, span: Span, ident: Ident, binding: Binding) { self.entries.insert( ident, SpannedBinding { span: Some(span), binding, }, ); } pub fn insert_local( &mut self, span: Span, ident: Ident, local_id: LocalId, ) -> Result<(), Error> { self.insert_binding(span, ident, Binding::Var(None, local_id)) } /// Returns all bound idents pub fn bound_idents(&self) -> impl Iterator { self.entries.iter().map(|(ident, _)| ident) } /// Allocates a new NsId /// /// This is not globally unique; it will only be unique in the current scope chain pub fn alloc_ns_id(&mut self) -> NsId { self.ns_id_counter.alloc() } /// Exports our entire contents as bindings suitable to be imported in to another scope /// /// This is used to fold child REPL scopes back in to their parent pub fn into_exported_bindings(self) -> HashMap { self.entries } pub fn import_bindings( &mut self, exported_bindings: impl IntoIterator, module_id: ModuleId, ) { self.entries.extend(exported_bindings.into_iter().map( |(ident, SpannedBinding { span, binding })| { ( ident, SpannedBinding { span, binding: binding.import_from(module_id), }, ) }, )); } } ================================================ FILE: compiler/hir/types.rs ================================================ use arret_syntax::span::Span; use crate::hir::error::{ Error, ErrorKind, ExpectedPolyPurityArg, PolyArgIsNotPure, PolyArgIsNotTy, Result, }; use crate::hir::ns::{Ident, NsDataIter, NsDatum}; use crate::hir::prim::Prim; use crate::hir::scope::{Binding, Scope}; use crate::hir::util::{ expect_arg_count, expect_one_arg, expect_spanned_ns_ident, try_take_rest_arg, }; use crate::ty; use crate::ty::purity; use crate::ty::purity::Purity; use crate::ty::record; use crate::ty::ty_args::TyArgs; use crate::ty::Ty; #[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)] pub enum TyCons { List, Vector, Vectorof, Set, Map, Union, #[cfg(test)] RawU, } #[derive(Clone)] pub enum PolymorphicVar { TVar(ty::TVarId), PVar(purity::PVarId), TFixed(Span, ty::Ref), Pure(Span), } struct LoweredPolymorphicVar { ident: Ident, polymorphic_var: PolymorphicVar, } fn lower_polymorphic_var(scope: &Scope<'_>, tvar_datum: NsDatum) -> Result { let span = tvar_datum.span(); match tvar_datum { NsDatum::Ident(span, ident) => { if ident.is_underscore() { return Err(Error::new(span, ErrorKind::AnonymousPolymorphicParam)); } let source_name = ident.name().clone(); return Ok(LoweredPolymorphicVar { ident, polymorphic_var: PolymorphicVar::TVar(ty::TVar::new( span, source_name, Ty::Any.into(), )), }); } NsDatum::Vector(vector_span, vs) => { let mut arg_data = vs.into_vec(); if arg_data.len() == 2 { let bound_datum = arg_data.pop().unwrap(); let (ident_span, ident) = expect_spanned_ns_ident( arg_data.pop().unwrap(), "new polymorphic parameter name", )?; if ident.is_underscore() { return Err(Error::new(ident_span, ErrorKind::AnonymousPolymorphicParam)); } let source_name = ident.name().clone(); match try_lower_purity(scope, &bound_datum) { Some(purity::Ref::Fixed(Purity::Impure)) => { return Ok(LoweredPolymorphicVar { ident, polymorphic_var: PolymorphicVar::PVar(purity::PVar::new( vector_span, source_name, )), }); } Some(purity::Ref::Fixed(Purity::Pure)) => { // Emulate bounding to pure in case the purity comes from e.g. a macro // expansion return Ok(LoweredPolymorphicVar { ident, polymorphic_var: PolymorphicVar::Pure(vector_span), }); } Some(_) => { return Err(Error::new(bound_datum.span(), ErrorKind::VarPurityBound)); } None => { let bound_ty = lower_poly(scope, bound_datum)?; let polymorphic_var = if ty::props::has_subtypes(&bound_ty) { PolymorphicVar::TVar(ty::TVar::new(vector_span, source_name, bound_ty)) } else { PolymorphicVar::TFixed(vector_span, bound_ty) }; return Ok(LoweredPolymorphicVar { ident, polymorphic_var, }); } } } } _ => {} } Err(Error::new(span, ErrorKind::BadPolyVarDecl)) } fn lower_list_cons(scope: &Scope<'_>, mut arg_iter: NsDataIter) -> Result> { let rest = try_take_rest_arg(&mut arg_iter); let fixed_polys = arg_iter .map(|fixed_datum| lower_poly(scope, fixed_datum)) .collect::]>>>()?; let rest_poly = match rest { Some(rest_datum) => lower_poly(scope, rest_datum)?, None => Ty::never().into(), }; Ok(ty::List::new(fixed_polys, rest_poly)) } fn lower_fun_cons( scope: &Scope<'_>, purity: purity::Ref, mut arg_iter: NsDataIter, ) -> Result> { let ret_ty = lower_poly(scope, arg_iter.next_back().unwrap())?; // Discard the purity arg_iter.next_back(); let top_fun = ty::TopFun::new(purity, ret_ty); if arg_iter.len() == 1 { if let NsDatum::Ident(_, ident) = &arg_iter.as_slice()[0] { if ident.is_ellipsis() { // Top function type in the form `(... -> ReturnType)` return Ok(top_fun.into()); } } } let params = lower_list_cons(scope, arg_iter)?; Ok(ty::Fun::new(purity::PVars::new(), ty::TVars::new(), top_fun, params).into()) } fn lower_ty_cons_apply( scope: &Scope<'_>, span: Span, ty_cons: TyCons, mut arg_iter: NsDataIter, ) -> Result> { Ok(match ty_cons { TyCons::List => lower_list_cons(scope, arg_iter)?.into(), TyCons::Vector => { let member_tys = arg_iter .map(|arg_datum| lower_poly(scope, arg_datum)) .collect::]>>>()?; Ty::Vector(member_tys).into() } TyCons::Vectorof => { let start_datum = expect_one_arg(span, arg_iter)?; let start_ty = lower_poly(scope, start_datum)?; Ty::Vectorof(Box::new(start_ty)).into() } TyCons::Set => { let member_datum = expect_one_arg(span, arg_iter)?; let member_ty = lower_poly(scope, member_datum)?; Ty::Set(Box::new(member_ty)).into() } TyCons::Map => { expect_arg_count(span, 2, arg_iter.len())?; let key_ty = lower_poly(scope, arg_iter.next().unwrap())?; let value_ty = lower_poly(scope, arg_iter.next().unwrap())?; ty::Map::new(key_ty, value_ty).into() } TyCons::Union => { let member_tys = arg_iter .map(|arg_datum| lower_poly(scope, arg_datum)) .collect::>>>()?; ty::unify::unify_ty_ref_iter(member_tys.into_iter()) } #[cfg(test)] TyCons::RawU => { // This performs a union *without* unifying the types. This is used when testing the // union code itself let member_tys = arg_iter .map(|arg_datum| lower_poly(scope, arg_datum)) .collect::]>>>()?; Ty::Union(member_tys).into() } }) } fn lower_record_ty_cons_purity_arg( scope: &Scope<'_>, param_span: Span, arg_datum: &NsDatum, ) -> Result { (match arg_datum { NsDatum::Ident(span, ident) => match scope.get_or_err(*span, ident)? { Binding::Purity(purity) => Ok(purity.clone()), other => Err(other.description()), }, other => Err(other.description()), }) .map_err(|found| { let details = Box::new(ExpectedPolyPurityArg { found, param_span }); Error::new(arg_datum.span(), ErrorKind::ExpectedPolyPurityArg(details)) }) } fn lower_record_ty_cons_apply( scope: &Scope<'_>, span: Span, record_cons: &record::ConsId, arg_iter: NsDataIter, ) -> Result> { use crate::ty::is_a::{ty_ref_is_a, ty_refs_equivalent}; use std::collections::HashMap; expect_arg_count(span, record_cons.poly_params().len(), arg_iter.len())?; let mut pvar_purities = HashMap::new(); let mut tvar_types = HashMap::new(); for (poly_param, arg_datum) in record_cons.poly_params().iter().zip(arg_iter) { let arg_span = arg_datum.span(); match poly_param { record::PolyParam::PVar(_, pvar) => { let purity_ref = lower_record_ty_cons_purity_arg(scope, pvar.span(), &arg_datum)?; pvar_purities.insert(pvar.clone(), purity_ref); } record::PolyParam::Pure(span) => { let purity_ref = lower_record_ty_cons_purity_arg(scope, *span, &arg_datum)?; if purity_ref != Purity::Pure.into() { let details = Box::new(PolyArgIsNotPure { arg_purity: purity_ref, param_span: *span, }); return Err(Error::new(arg_span, ErrorKind::PolyArgIsNotPure(details))); } } record::PolyParam::TVar(_, tvar) => { let arg_type = lower_poly(scope, arg_datum)?; if !ty_ref_is_a(&arg_type, tvar.bound()) { let details = Box::new(PolyArgIsNotTy { arg_type, param_bound: tvar.bound().clone(), param_span: tvar.span(), }); return Err(Error::new(arg_span, ErrorKind::PolyArgIsNotTy(details))); } tvar_types.insert(tvar.clone(), arg_type); } record::PolyParam::TFixed(span, fixed_poly) => { let arg_type = lower_poly(scope, arg_datum)?; if !ty_refs_equivalent(&arg_type, fixed_poly) { let details = Box::new(PolyArgIsNotTy { arg_type, param_bound: fixed_poly.clone(), param_span: *span, }); return Err(Error::new(arg_span, ErrorKind::PolyArgIsNotTy(details))); } } } } Ok(record::Instance::new(record_cons.clone(), TyArgs::new(pvar_purities, tvar_types)).into()) } fn lower_literal_vec(literal_data: Vec) -> Result>> { literal_data.into_iter().map(lower_literal).collect() } fn lower_literal(datum: NsDatum) -> Result> { match datum { NsDatum::Bool(_, v) => Ok(Ty::LitBool(v).into()), NsDatum::Keyword(_, name) => Ok(Ty::LitSym(name).into()), NsDatum::Ident(_, ident) => Ok(Ty::LitSym(ident.into_name()).into()), NsDatum::List(_, vs) => { let fixed_literals = lower_literal_vec(vs.into_vec())?; Ok(ty::List::new_tuple(fixed_literals.into_boxed_slice()).into()) } NsDatum::Vector(_, vs) => { let fixed_literals = lower_literal_vec(vs.into_vec())?; Ok(Ty::Vector(fixed_literals.into_boxed_slice()).into()) } _ => Err(Error::new(datum.span(), ErrorKind::UnsupportedLiteralType)), } } fn lower_ident(scope: &Scope<'_>, span: Span, ident: &Ident) -> Result> { match scope.get_or_err(span, ident)? { Binding::Ty(ty) => Ok(ty.clone()), Binding::TyPred(test_ty) => Ok(Ty::TyPred(test_ty.clone()).into()), Binding::EqPred => Ok(Ty::EqPred.into()), other => Err(Error::new(span, ErrorKind::ExpectedTy(other.description()))), } } fn lower_polymorphic_poly( scope: &Scope<'_>, span: Span, mut data_iter: NsDataIter, ) -> Result> { let polymorphic_vars_datum = if let Some(datum) = data_iter.next() { datum } else { return Err(Error::new(span, ErrorKind::NoPolyVarsDecl)); }; let polymorphic_var_data = if let NsDatum::Set(_, vs) = polymorphic_vars_datum { vs } else { return Err(Error::new( polymorphic_vars_datum.span(), ErrorKind::ExpectedPolyVarsDecl(polymorphic_vars_datum.description()), )); }; let mut inner_scope = scope.child(); let (pvars, tvars) = lower_polymorphic_var_set( scope, &mut inner_scope, polymorphic_var_data.into_vec().into_iter(), )?; let inner_poly = lower_poly_data_iter(&inner_scope, span, data_iter)?; if let ty::Ref::Fixed(Ty::Fun(fun)) = inner_poly { Ok(Ty::Fun(Box::new(fun.with_polymorphic_vars(pvars, tvars))).into()) } else { Err(Error::new(span, ErrorKind::NonFunPolyTy)) } } fn lower_poly_data_iter( scope: &Scope<'_>, span: Span, mut data_iter: NsDataIter, ) -> Result> { let data_len = data_iter.len(); if data_len == 0 { // This is by analogy with () being self-evaluating in expressions return Ok(ty::List::empty().into()); } if let Some(Binding::Prim(Prim::All)) = scope.get_datum(&data_iter.as_slice()[0]) { // Discard the `All` data_iter.next(); return lower_polymorphic_poly(scope, span, data_iter); } if data_len >= 2 { if let Some(purity) = try_lower_purity(scope, &data_iter.as_slice()[data_len - 2]) { // This is a function type return lower_fun_cons(scope, purity, data_iter); }; } let fn_datum = data_iter.next().unwrap(); let (ident_span, ident) = expect_spanned_ns_ident(fn_datum, "type constructor name")?; match scope.get_or_err(ident_span, &ident)? { Binding::Prim(Prim::Quote) => { let literal_datum = expect_one_arg(span, data_iter)?; lower_literal(literal_datum) } Binding::TyCons(ty_cons) => lower_ty_cons_apply(scope, span, *ty_cons, data_iter), Binding::RecordTyCons(record_cons) => { lower_record_ty_cons_apply(scope, span, record_cons, data_iter) } other => Err(Error::new( ident_span, ErrorKind::ExpectedTyCons(other.description()), )), } } pub fn lower_poly(scope: &Scope<'_>, datum: NsDatum) -> Result> { match datum { NsDatum::List(span, vs) => lower_poly_data_iter(scope, span, vs.into_vec().into_iter()), NsDatum::Ident(span, ident) => lower_ident(scope, span, &ident), _ => lower_literal(datum), } } fn bind_polymorphic_vars( scope: &mut Scope<'_>, lowered_poly_vars: Vec, ) -> Result<()> { for LoweredPolymorphicVar { ident, polymorphic_var, } in lowered_poly_vars { let (span, binding) = match polymorphic_var { PolymorphicVar::PVar(pvar) => (pvar.span(), Binding::Purity(pvar.into())), PolymorphicVar::TVar(tvar) => (tvar.span(), Binding::Ty(tvar.into())), PolymorphicVar::TFixed(fixed_span, poly) => (fixed_span, Binding::Ty(poly)), PolymorphicVar::Pure(pure_span) => (pure_span, Binding::Purity(Purity::Pure.into())), }; scope.insert_binding(span, ident, binding)?; } Ok(()) } /// Lowers a set of polymorphic variables defined in `outer_scope` and places them in `inner_scope` /// /// This is used for functions and function types pub fn lower_polymorphic_var_set( outer_scope: &Scope<'_>, inner_scope: &mut Scope<'_>, polymorphic_var_data: NsDataIter, ) -> Result<(purity::PVars, ty::TVars)> { let mut pvars = purity::PVars::new(); let mut tvars = ty::TVars::new(); let lowered_poly_vars = polymorphic_var_data .map(|var_datum| lower_polymorphic_var(outer_scope, var_datum)) .collect::>>()?; for lowered_poly_var in lowered_poly_vars.iter() { match &lowered_poly_var.polymorphic_var { PolymorphicVar::PVar(pvar) => { pvars.push(pvar.clone()); } PolymorphicVar::TVar(tvar) => { tvars.push(tvar.clone()); } PolymorphicVar::Pure(_) | PolymorphicVar::TFixed(_, _) => {} } } bind_polymorphic_vars(inner_scope, lowered_poly_vars)?; Ok((pvars, tvars)) } /// Lowers a list of polymorphic variables defined in `outer_scope` and places them in `inner_scope` /// /// This is used for record types pub fn lower_polymorphic_var_list( outer_scope: &Scope<'_>, inner_scope: &mut Scope<'_>, param_data: NsDataIter, ) -> Result> { let lowered_poly_vars = param_data .map(|var_datum| lower_polymorphic_var(outer_scope, var_datum)) .collect::>>()?; let poly_vars = lowered_poly_vars .iter() .map(|lpv| lpv.polymorphic_var.clone()) .collect(); bind_polymorphic_vars(inner_scope, lowered_poly_vars)?; Ok(poly_vars) } pub fn try_lower_purity(scope: &Scope<'_>, datum: &NsDatum) -> Option { scope.get_datum(datum).and_then(|binding| match binding { Binding::Purity(purity) => Some(purity.clone()), _ => None, }) } macro_rules! export_ty { ($name:expr, $type:expr) => { ($name, Binding::Ty(ty::Ref::Fixed($type))) }; } macro_rules! export_ty_cons { ($name:expr, $ty_cons:expr) => { ($name, Binding::TyCons($ty_cons)) }; } macro_rules! export_purity { ($name:expr, $purity:expr) => { ($name, Binding::Purity(purity::Ref::Fixed($purity))) }; } macro_rules! export_ty_pred { ($name:expr, $test_ty:expr) => { ($name, Binding::TyPred($test_ty)) }; } pub const TY_EXPORTS: &[(&str, Binding)] = &[ export_ty!("Any", Ty::Any), export_ty!("Bool", Ty::Bool), export_ty!("Sym", Ty::Sym), export_ty!("Str", Ty::Str), export_ty!("Int", Ty::Int), export_ty!("Float", Ty::Float), export_ty!("Num", Ty::Num), export_ty!("Char", Ty::Char), export_ty!("Record", Ty::TopRecord), export_ty_cons!("List", TyCons::List), export_ty_cons!("Vector", TyCons::Vector), export_ty_cons!("Vectorof", TyCons::Vectorof), export_ty_cons!("Setof", TyCons::Set), export_ty_cons!("Map", TyCons::Map), export_ty_cons!("U", TyCons::Union), export_purity!("->", Purity::Pure), export_purity!("->!", Purity::Impure), export_ty_pred!("str?", ty::pred::TestTy::Str), export_ty_pred!("sym?", ty::pred::TestTy::Sym), export_ty_pred!("bool?", ty::pred::TestTy::Bool), export_ty_pred!("num?", ty::pred::TestTy::Num), export_ty_pred!("int?", ty::pred::TestTy::Int), export_ty_pred!("float?", ty::pred::TestTy::Float), export_ty_pred!("char?", ty::pred::TestTy::Char), export_ty_pred!("list?", ty::pred::TestTy::List), export_ty_pred!("vector?", ty::pred::TestTy::Vector), export_ty_pred!("set?", ty::pred::TestTy::Set), export_ty_pred!("map?", ty::pred::TestTy::Map), export_ty_pred!("fn?", ty::pred::TestTy::Fun), export_ty_pred!("nil?", ty::pred::TestTy::Nil), export_ty_pred!("record?", ty::pred::TestTy::TopRecord), #[cfg(test)] export_ty_cons!("RawU", TyCons::RawU), ]; /// Pushes the arguments for a list constructor on to the passed `Vec` /// /// This is used to share code between list and function types fn push_list_parts(list_parts: &mut Vec, list_ref: &ty::List) { for fixed in list_ref.fixed() { list_parts.push(str_for_ty_ref(fixed)); } let rest = list_ref.rest(); if !rest.is_never() { list_parts.push("&".to_owned()); list_parts.push(str_for_ty_ref(rest)); } } fn str_for_bounds(bound_pvars: &[purity::PVarId], bound_tvars: &[ty::TVarId]) -> String { let pvar_parts = bound_pvars .iter() .map(|pvar| format!("[{} ->!]", pvar.source_name())); let tvar_parts = bound_tvars.iter().map(|tvar| { if tvar.bound() == &Ty::Any.into() { return tvar.source_name().into(); } format!("[{} {}]", tvar.source_name(), str_for_ty_ref(tvar.bound())) }); let all_parts = pvar_parts.chain(tvar_parts).collect::>(); format!("#{{{}}}", all_parts.join(" ")) } fn str_for_record_poly_arg( instance: &record::Instance, poly_param: &record::PolyParam, ) -> String { let ty_args = instance.ty_args(); match poly_param { record::PolyParam::PVar(_, pvar) => str_for_purity(&ty_args.pvar_purities()[pvar]), record::PolyParam::Pure(_) => str_for_purity(&Purity::Pure.into()), record::PolyParam::TVar(_, tvar) => str_for_ty_ref(&ty_args.tvar_types()[tvar]), record::PolyParam::TFixed(_, fixed_poly) => str_for_ty_ref(fixed_poly), } } fn str_for_ty(ty: &Ty) -> String { match ty { Ty::Any => "Any".to_owned(), Ty::Bool => "Bool".to_owned(), Ty::Char => "Char".to_owned(), Ty::Int => "Int".to_owned(), Ty::Sym => "Sym".to_owned(), Ty::Str => "Str".to_owned(), Ty::Float => "Float".to_owned(), Ty::Num => "Num".to_owned(), Ty::LitBool(false) => "false".to_owned(), Ty::LitBool(true) => "true".to_owned(), Ty::LitSym(name) => { if name.starts_with(':') { name.to_string() } else { format!("'{}", name) } } Ty::Map(map) => format!( "(Map {} {})", str_for_ty_ref(map.key()), str_for_ty_ref(map.value()) ), Ty::Set(member) => format!("(Setof {})", str_for_ty_ref(member)), Ty::Vector(members) => { let result_parts: Vec = members .iter() .map(|member| format!(" {}", str_for_ty_ref(member))) .collect(); format!("(Vector{})", result_parts.join("")) } Ty::Vectorof(member) => format!("(Vectorof {})", str_for_ty_ref(member)), Ty::TopFun(top_fun) => format!( "(... {} {})", str_for_purity(top_fun.purity()), str_for_ty_ref(top_fun.ret()) ), Ty::Fun(fun) => { let mut fun_parts = Vec::with_capacity(2); push_list_parts(&mut fun_parts, fun.params()); fun_parts.push(str_for_purity(fun.purity())); fun_parts.push(str_for_ty_ref(fun.ret())); if fun.has_polymorphic_vars() { format!( "(All {} {})", str_for_bounds(fun.pvars(), fun.tvars()), fun_parts.join(" ") ) } else { format!("({})", fun_parts.join(" ")) } } Ty::TyPred(test_ty) => test_ty.to_string(), Ty::EqPred => "=".to_owned(), Ty::Union(members) => { let member_strs: Vec = members .iter() .map(|m| format!(" {}", str_for_ty_ref(m))) .collect(); format!("(U{})", member_strs.join("")) } Ty::Intersect(members) => { let member_strs: Vec = members .iter() .map(|m| format!(" {}", str_for_ty_ref(m))) .collect(); format!("(∩{})", member_strs.join("")) } Ty::List(list) => { // While all list types can be expressed using `(List)` we try to find the shortest // representation if list.is_empty() { "()".to_owned() } else { let mut list_parts = Vec::with_capacity(2); list_parts.push("List".to_owned()); push_list_parts(&mut list_parts, list); format!("({})", list_parts.join(" ")) } } Ty::TopRecord => "Record".to_owned(), Ty::RecordClass(record_cons) => format!("({} ...)", record_cons.ty_cons_name()), Ty::Record(instance) => { let record_cons = instance.cons(); if record_cons.is_singleton() { // This is bound as its name return record_cons.ty_cons_name().to_string(); } let record_parts: Vec = std::iter::once(record_cons.ty_cons_name().to_string()) .chain( record_cons .poly_params() .iter() .map(|poly_param| str_for_record_poly_arg(instance, poly_param)), ) .collect(); format!("({})", record_parts.join(" ")) } } } pub fn str_for_ty_ref(ty_ref: &ty::Ref) -> String { match ty_ref { ty::Ref::Var(tvar, _) => tvar.source_name().to_owned(), ty::Ref::Fixed(ty) => str_for_ty(ty), } } pub fn str_for_purity(purity: &purity::Ref) -> String { match purity { purity::Ref::Fixed(Purity::Pure) => "->".to_owned(), purity::Ref::Fixed(Purity::Impure) => "->!".to_owned(), purity::Ref::Var(pvar) => pvar.source_name().into(), } } #[cfg(test)] pub fn poly_for_str(datum_str: &str) -> ty::Ref { use crate::hir::prim::PRIM_EXPORTS; use arret_syntax::parser::datum_from_str; let prim_entries = PRIM_EXPORTS .iter() .chain(TY_EXPORTS.iter()) .map(|(name, binding)| { if *name == "U" { // Using `U` in tests is very dubious as it invokes a lot of type system logic. It's // easy to write tautological tests due to `U` creating a simplified type. Rename to // `UnifyingU` so it's clear what's happening. ("UnifyingU", binding.clone()) } else { (*name, binding.clone()) } }); let scope = Scope::new_with_entries(prim_entries); let test_datum = datum_from_str(None, datum_str).unwrap(); lower_poly(&scope, NsDatum::from_syntax_datum(&test_datum)).unwrap() } #[cfg(test)] pub fn tvar_bounded_by(bound: ty::Ref) -> ty::Ref { ty::TVar::new(crate::source::EMPTY_SPAN, "TVar".into(), bound).into() } #[cfg(test)] mod test { use super::*; use std::collections::HashMap; use crate::source::EMPTY_SPAN; use crate::ty::var_usage::Variance; fn assert_ty_for_str(expected: Ty, datum_str: &str) { let expected_poly = expected.into(); assert_eq!(expected_poly, poly_for_str(datum_str)); // Try to round trip this to make sure str_for_ty_ref works let recovered_str = str_for_ty_ref(&expected_poly); assert_eq!(expected_poly, poly_for_str(&recovered_str)); } /// This asserts that a type uses an exact string in str_for_ty_ref fn assert_exact_str_repr(datum_str: &str) { assert_eq!(datum_str, str_for_ty_ref(&poly_for_str(datum_str))); } #[test] fn true_literal() { let j = "true"; let expected = Ty::LitBool(true); assert_ty_for_str(expected, j); } #[test] fn false_literal() { let j = "false"; let expected = Ty::LitBool(false); assert_ty_for_str(expected, j); } #[test] fn sym_literal() { let j = "'foo"; let expected = Ty::LitSym("foo".into()); assert_ty_for_str(expected, j); } #[test] fn keyword_literal() { let j = ":foo"; let expected = Ty::LitSym(":foo".into()); assert_ty_for_str(expected, j); // Make sure we don't quote this needlessly assert_exact_str_repr(j); } #[test] fn empty_list_literal() { let j = "()"; let expected = ty::List::empty().into(); assert_ty_for_str(expected, j); } #[test] fn quoted_list_literal() { let j = "'(true false)"; let expected = ty::List::new_tuple(Box::new([ Ty::LitBool(true).into(), Ty::LitBool(false).into(), ])) .into(); assert_ty_for_str(expected, j); } #[test] fn empty_vector_literal() { let j = "[]"; let expected = Ty::Vector(Box::new([])); assert_ty_for_str(expected, j); } #[test] fn vector_literal() { let j = "[true false]"; let expected = Ty::Vector(Box::new([ Ty::LitBool(true).into(), Ty::LitBool(false).into(), ])); assert_ty_for_str(expected, j); } #[test] fn ty_ref() { let j = "Sym"; let expected = Ty::Sym; assert_ty_for_str(expected, j); } #[test] fn fixed_list_cons() { let j = "(List true false)"; let expected = ty::List::new_tuple(Box::new([ Ty::LitBool(true).into(), Ty::LitBool(false).into(), ])) .into(); assert_ty_for_str(expected, j); } #[test] fn rest_list_cons() { let j = "(List true & false)"; let expected = ty::List::new( Box::new([Ty::LitBool(true).into()]), Ty::LitBool(false).into(), ) .into(); assert_ty_for_str(expected, j); } #[test] fn vectorof_cons() { let j = "(Vectorof true)"; let inner_poly = Ty::LitBool(true).into(); let expected = Ty::Vectorof(Box::new(inner_poly)); assert_ty_for_str(expected, j); } #[test] fn vector_cons() { let j = "(Vector true false)"; let expected = Ty::Vector(Box::new([ Ty::LitBool(true).into(), Ty::LitBool(false).into(), ])); assert_ty_for_str(expected, j); } #[test] fn pure_fun() { let j = "(-> true)"; let expected = ty::Fun::new_mono( ty::List::empty(), Purity::Pure.into(), Ty::LitBool(true).into(), ) .into(); assert_ty_for_str(expected, j); } #[test] fn impure_fun() { let j = "(->! true)"; let expected = ty::Fun::new_mono( ty::List::empty(), Purity::Impure.into(), Ty::LitBool(true).into(), ) .into(); assert_ty_for_str(expected, j); } #[test] fn fixed_fun() { let j = "(false -> true)"; let expected = ty::Fun::new_mono( ty::List::new_tuple(Box::new([Ty::LitBool(false).into()])), Purity::Pure.into(), Ty::LitBool(true).into(), ) .into(); assert_ty_for_str(expected, j); } #[test] fn rest_impure_fun() { let j = "(Str & Sym ->! true)"; let expected = ty::Fun::new_mono( ty::List::new(Box::new([Ty::Str.into()]), Ty::Sym.into()), Purity::Impure.into(), Ty::LitBool(true).into(), ) .into(); assert_ty_for_str(expected, j); } #[test] fn top_impure_fun() { let j = "(... ->! true)"; let expected = ty::TopFun::new(Purity::Impure.into(), Ty::LitBool(true).into()).into(); assert_ty_for_str(expected, j); } #[test] fn type_predicate() { let j = "str?"; let expected = Ty::TyPred(ty::pred::TestTy::Str); assert_ty_for_str(expected, j); } #[test] fn equality_predicate() { let j = "="; let expected = Ty::EqPred; assert_ty_for_str(expected, j); } #[test] fn set_cons() { let j = "(Setof true)"; let inner_poly = Ty::LitBool(true).into(); let expected = Ty::Set(Box::new(inner_poly)); assert_ty_for_str(expected, j); } #[test] fn map_cons() { let j = "(Map true false)"; let key_ty = Ty::LitBool(true); let value_ty = Ty::LitBool(false); let expected = ty::Map::new(key_ty.into(), value_ty.into()).into(); assert_ty_for_str(expected, j); } #[test] fn merged_union_cons() { let j = "(UnifyingU true false)"; let expected = Ty::Bool; assert_ty_for_str(expected, j); } #[test] fn simpifying_str_for_ty_ref() { assert_exact_str_repr("(List Int Float)"); assert_exact_str_repr("(List Int & Float)"); assert_exact_str_repr("(List & Float)"); assert_exact_str_repr("(Float & Int -> Sym)"); } #[test] fn polymorphic_fun_str() { assert_exact_str_repr("(All #{[->? ->!] A [B Bool] C} B C ->? A)"); } #[test] fn singleton_record_type() { let mono_record_cons = record::Cons::new( EMPTY_SPAN, "MonoCons".into(), "mono-cons?".into(), None, Box::new([record::Field::new(EMPTY_SPAN, "num".into(), Ty::Num.into())]), ); let record_class_ref: ty::Ref = mono_record_cons.clone().into(); assert_eq!("(MonoCons ...)", str_for_ty_ref(&record_class_ref)); let int_record_instance_ref: ty::Ref = record::Instance::new(mono_record_cons, TyArgs::empty()).into(); assert_eq!("MonoCons", str_for_ty_ref(&int_record_instance_ref)); } #[test] fn poly_record_type() { let tvar = ty::TVar::new(EMPTY_SPAN, "tvar".into(), Ty::Any.into()); let poly_record_cons = record::Cons::new( EMPTY_SPAN, "PolyCons".into(), "poly-cons?".into(), Some(Box::new([ record::PolyParam::Pure(EMPTY_SPAN), record::PolyParam::TVar(Variance::Covariant, tvar.clone()), ])), Box::new([record::Field::new( EMPTY_SPAN, "num".into(), tvar.clone().into(), )]), ); // Record class type let record_class_ref: ty::Ref = poly_record_cons.clone().into(); assert_eq!("(PolyCons ...)", str_for_ty_ref(&record_class_ref)); // Instance parameterised with an `Int` let mut int_tvars = HashMap::new(); int_tvars.insert(tvar, Ty::Int.into()); let int_ty_args = TyArgs::new(HashMap::new(), int_tvars); let poly_record_instance_ref: ty::Ref = record::Instance::new(poly_record_cons, int_ty_args).into(); assert_eq!( "(PolyCons -> Int)", str_for_ty_ref(&poly_record_instance_ref) ); } } ================================================ FILE: compiler/hir/util.rs ================================================ use arret_syntax::datum::{DataStr, Datum}; use arret_syntax::span::Span; use crate::hir::error::{Error, ErrorKind, ExpectedSym, Result}; use crate::hir::ns::{Ident, NsDataIter, NsDatum}; /// Removes the rest argument from the passed iterator and returns it /// /// The rest argument is denoted by using `&` before a final datum pub fn try_take_rest_arg(data_iter: &mut NsDataIter) -> Option { let data_len = data_iter.len(); if data_len < 2 { return None; } // This is gross because we need to "peek" at the end of the iterator if let NsDatum::Ident(_, ident) = &data_iter.as_slice()[data_len - 2] { if ident.is_ampersand() { let rest = data_iter.next_back(); // Remove the & completely data_iter.next_back(); return rest; } } None } pub fn expect_arg_count( span: Span, expected_arg_count: usize, actual_arg_count: usize, ) -> Result<()> { if actual_arg_count != expected_arg_count { Err(Error::new( span, ErrorKind::WrongArgCount(expected_arg_count), )) } else { Ok(()) } } pub fn expect_one_arg(span: Span, mut iter: NsDataIter) -> Result { expect_arg_count(span, 1, iter.len())?; Ok(iter.next().unwrap()) } pub fn expect_spanned_ns_ident(datum: NsDatum, usage: &'static str) -> Result<(Span, Ident)> { if let NsDatum::Ident(span, ident) = datum { Ok((span, ident)) } else { Err(Error::new( datum.span(), ErrorKind::ExpectedSym( ExpectedSym { found: datum.description(), usage, } .into(), ), )) } } pub fn expect_ns_ident(datum: NsDatum, usage: &'static str) -> Result { expect_spanned_ns_ident(datum, usage).map(|(_, ident)| ident) } pub fn expect_spanned_ident<'a>( datum: &'a Datum, usage: &'static str, ) -> Result<(Span, &'a DataStr)> { if let Datum::Sym(span, name) = datum { if !name.starts_with(':') { return Ok((*span, name)); } } Err(Error::new( datum.span(), ErrorKind::ExpectedSym( ExpectedSym { found: datum.description(), usage, } .into(), ), )) } pub fn expect_ident<'a>(datum: &'a Datum, usage: &'static str) -> Result<&'a DataStr> { expect_spanned_ident(datum, usage).map(|(_, ident)| ident) } ================================================ FILE: compiler/hir/var_id.rs ================================================ use std::num::NonZeroU32; use std::sync::atomic::{AtomicU32, Ordering}; use crate::context::ModuleId; /// Identifier for a local variable within a module /// /// This is not globally unique; it must be combined with a `ModuleId` #[derive(Debug, PartialEq, Eq, Clone, Copy, Hash, PartialOrd, Ord)] pub struct LocalId(NonZeroU32); /// Identifier for a variable exported from another module #[derive(Debug, PartialEq, Eq, Clone, Copy, Hash, PartialOrd, Ord)] pub struct ExportId(ModuleId, LocalId); impl ExportId { pub fn new(module_id: ModuleId, local_id: LocalId) -> Self { Self(module_id, local_id) } pub fn module_id(self) -> ModuleId { self.0 } pub fn local_id(self) -> LocalId { self.1 } } pub struct LocalIdAlloc { local_id_counter: AtomicU32, } impl LocalIdAlloc { pub fn new() -> Self { Self { local_id_counter: AtomicU32::new(1), } } /// Allocates a `LocalId` using atomic operations on a shared instance pub fn alloc(&self) -> LocalId { LocalId(NonZeroU32::new(self.local_id_counter.fetch_add(1, Ordering::Relaxed)).unwrap()) } /// Allocates a `LocalId` using non-atomic operations on an exclusive instance pub fn alloc_mut(&mut self) -> LocalId { let local_id_counter = self.local_id_counter.get_mut(); let raw_id = *local_id_counter; *local_id_counter += 1; LocalId(NonZeroU32::new(raw_id).unwrap()) } } ================================================ FILE: compiler/hir/visitor.rs ================================================ use crate::hir; /// Visits an expression and all of its subexpressions pub fn visit_exprs<'a, P: hir::Phase, F>(expr: &'a hir::Expr

, visitor: &mut F) where F: FnMut(&'a hir::Expr

), { visitor(expr); use crate::hir::ExprKind; match &expr.kind { ExprKind::Cond(cond) => { visit_exprs(&cond.test_expr, visitor); visit_exprs(&cond.true_expr, visitor); visit_exprs(&cond.false_expr, visitor); } ExprKind::Fun(fun) => { visit_exprs(&fun.body_expr, visitor); } ExprKind::App(app) => { visit_exprs(&app.fun_expr, visitor); for fixed_arg_expr in &app.fixed_arg_exprs { visit_exprs(fixed_arg_expr, visitor); } for rest_arg_expr in &app.rest_arg_expr { visit_exprs(rest_arg_expr, visitor); } } ExprKind::Recur(recur) => { for fixed_arg_expr in &recur.fixed_arg_exprs { visit_exprs(fixed_arg_expr, visitor); } for rest_arg_expr in &recur.rest_arg_expr { visit_exprs(rest_arg_expr, visitor); } } ExprKind::Let(hir_let) => { visit_exprs(&hir_let.value_expr, visitor); visit_exprs(&hir_let.body_expr, visitor); } ExprKind::Do(exprs) => { for expr in exprs { visit_exprs(expr, visitor); } } ExprKind::MacroExpand(_, expr) => { visit_exprs(expr, visitor); } ExprKind::ExportRef(_, _) | ExprKind::LocalRef(_, _) | ExprKind::Lit(_) | ExprKind::RustFun(_) | ExprKind::TyPred(_, _) | ExprKind::EqPred(_) | ExprKind::RecordCons(_, _) | ExprKind::FieldAccessor(_) => { // Terminal expression } }; } ================================================ FILE: compiler/id_type.rs ================================================ use std::sync::Arc; use std::{fmt, hash, ops}; /// Builds a new ID type based off indexing in to a `Vec` lookup table /// /// This stores the value internally at $ty (typically `u32`) while the interface uses `usize` to /// support easy indexing. #[macro_export] macro_rules! new_indexing_id_type { ($name:ident, $ty:ty) => { #[derive(Debug, PartialEq, Eq, Clone, Copy, Hash, PartialOrd)] pub struct $name($ty); impl $name { pub fn new(value: usize) -> $name { $name(value as $ty) } #[allow(unused)] pub fn new_entry_id(lookup_vec: &mut Vec, entry: T) -> $name { let id = Self::new(lookup_vec.len()); lookup_vec.push(entry); id } #[allow(unused)] pub fn to_usize(self) -> usize { self.0 as usize } } }; } /// Builds a new ID type using a global counter /// /// This allows allocating IDs without threading a mutable counter through multiple layers of /// code. #[macro_export] macro_rules! new_global_id_type { ($id_name:ident) => { new_global_id_type!( $id_name, usize, std::sync::atomic::AtomicUsize, std::num::NonZeroUsize ); }; ($id_name:ident, $native_type:ty, $atomic_type:ty, $non_zero_type:ty) => { // These counters are very hot and shared between threads // They're not strongly correlated with each other so put them on different cachelines to // avoid bouncing them between CPUs. The value of 64 is just a guess; it's a typical value // and isn't needed for correctness. #[repr(align(64))] struct AlignedAtomic($atomic_type); static NEXT_VALUE: AlignedAtomic = AlignedAtomic(<$atomic_type>::new(1)); #[derive(Debug, PartialEq, Eq, Clone, Copy, Hash, PartialOrd, Ord)] pub struct $id_name($non_zero_type); impl $id_name { /// Allocates a ID unique for the duration of compiler's execution pub fn alloc() -> Self { use std::sync::atomic::Ordering; // We used relaxed ordering because the order doesn't actually matter; these are // used only for uniqueness let raw_id = NEXT_VALUE.0.fetch_add(1, Ordering::Relaxed); Self::new(raw_id) } #[allow(unused)] pub fn get(&self) -> $native_type { self.0.into() } fn new(raw_id: $native_type) -> Self { $id_name(<$non_zero_type>::new(raw_id).unwrap()) } } }; } /// Builds a new ID type based off an arbitrary counter #[macro_export] macro_rules! new_counting_id_type { ($counter_name:ident, $id_name:ident) => { #[derive(Clone)] pub struct $counter_name(u32); impl $counter_name { pub fn new() -> $counter_name { $counter_name(1) } pub fn alloc(&mut self) -> $id_name { let id = $id_name(self.0); self.0 += 1; id } } #[derive(Debug, PartialEq, Eq, Clone, Copy, Hash, PartialOrd)] pub struct $id_name(u32); impl $id_name { #[allow(unused)] pub fn new(value: u32) -> $id_name { $id_name(value) } #[allow(unused)] pub fn to_u32(self) -> u32 { self.0 } } }; } /// Reference-counted pointer that uses pointer identity /// /// Traits such as `Hash`, `Eq`, `Ord` etc. are implemented in terms of the value's memory location. /// This means that the value returned by `ArcId::new()` is considered equal to itself and its /// clones regardless of the value it points to. pub struct ArcId { inner: Arc, } impl ArcId { pub fn new(value: T) -> Self { ArcId { inner: Arc::new(value), } } } impl Clone for ArcId { fn clone(&self) -> Self { ArcId { inner: self.inner.clone(), } } } impl ops::Deref for ArcId { type Target = T; fn deref(&self) -> &T { self.inner.deref() } } impl PartialEq for ArcId { fn eq(&self, other: &Self) -> bool { Arc::ptr_eq(&self.inner, &other.inner) } } impl Eq for ArcId {} impl hash::Hash for ArcId { fn hash(&self, state: &mut H) { state.write_usize(self.inner.as_ref() as *const T as usize) } } impl PartialOrd for ArcId { fn partial_cmp(&self, other: &ArcId) -> Option { Some(self.cmp(other)) } } impl Ord for ArcId { fn cmp(&self, other: &ArcId) -> std::cmp::Ordering { (self.inner.as_ref() as *const T as usize).cmp(&(other.inner.as_ref() as *const T as usize)) } } impl fmt::Debug for ArcId { fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { self.inner.fmt(formatter) } } ================================================ FILE: compiler/lib.rs ================================================ #![warn(clippy::all)] #![warn(rust_2018_idioms)] #[macro_use] mod id_type; mod arret_root; mod codegen; mod context; mod hir; mod mir; mod promise; pub mod repl; mod reporting; mod rfi; mod source; mod ty; mod typeck; use std::collections::HashSet; use std::sync::Arc; use codespan_reporting::diagnostic::Diagnostic; use arret_syntax::span::FileId; pub use crate::arret_root::{find_arret_root, FindArretRootError}; pub use crate::codegen::initialise_llvm; pub use crate::codegen::program::{gen_program, Options as GenProgramOptions, OutputType}; pub use crate::context::{CompileCtx, LinkedLibrary}; pub use crate::hir::PackagePaths; pub use crate::id_type::ArcId; pub use crate::mir::eval_hir::{BuiltProgram, EvalHirCtx}; pub use crate::mir::print_program as print_program_mir; pub use crate::reporting::emit_diagnostics_to_stderr; pub use crate::source::{SourceFile, SourceLoader, SourceText}; pub struct EvaluableProgram { pub ehx: EvalHirCtx, pub main_export_id: hir::ExportId, pub linked_libraries: Vec>, } /// Visits a subtree of modules, evaluates their definitions and collects their RFI libraries fn include_imports( ehx: &mut EvalHirCtx, visited_modules: &mut HashSet, linked_libraries: &mut Vec>, root_module: &context::Module, ) -> Result<(), Vec>> { if visited_modules.contains(&root_module.module_id) { return Ok(()); } visited_modules.insert(root_module.module_id); if let Some(ref linked_library) = root_module.linked_library { linked_libraries.push(linked_library.clone()); } // Make sure our imports are first for import in root_module.imports.values() { include_imports(ehx, visited_modules, linked_libraries, import)?; } ehx.visit_module_defs(root_module.module_id, &root_module.defs)?; Ok(()) } pub fn program_to_evaluable( ccx: &CompileCtx, source_file: &SourceFile, ) -> Result>> { use arret_syntax::span::Span; use crate::typeck::infer; let entry_module = ccx.source_file_to_module(source_file)?; let main_local_id = if let Some(local_id) = entry_module.main_local_id { local_id } else { use codespan_reporting::diagnostic::Label; return Err(vec![Diagnostic::error() .with_message("no main! function defined in entry module") .with_labels(vec![Label::primary(source_file.file_id(), 0..1) .with_message("main! function expected in this file")])]); }; let inferred_main_type = &entry_module.inferred_locals[&main_local_id]; infer::ensure_main_type( Span::new(Some(source_file.file_id()), 0, 0), &entry_module.defs, main_local_id, inferred_main_type, ) .map_err(|err| vec![err.into()])?; let mut ehx = EvalHirCtx::new(ccx.enable_optimisations()); let mut linked_libraries = vec![]; let mut visited_modules = HashSet::new(); for import in entry_module.imports.values() { include_imports( &mut ehx, &mut visited_modules, &mut linked_libraries, import, )?; } // We can consume here because we own the entry module ehx.consume_module_defs(entry_module.module_id, entry_module.defs)?; if ehx.should_collect() { ehx.collect_garbage(); } Ok(EvaluableProgram { ehx, main_export_id: hir::ExportId::new(entry_module.module_id, main_local_id), linked_libraries, }) } ================================================ FILE: compiler/mir/app_purity.rs ================================================ use std::collections::HashMap; use crate::ty; use crate::ty::purity; use crate::ty::purity::Purity; fn resolve_ref_to_purity( pvar_purities: &HashMap, poly: &purity::Ref, ) -> Purity { match poly { purity::Ref::Fixed(purity) => *purity, purity::Ref::Var(pvar) => { let inner_ref = pvar_purities .get(pvar) .expect("Unable to find PVar determining fun apply purity"); resolve_ref_to_purity(pvar_purities, inner_ref) } } } /// Returns the purity for a fun application pub fn fun_app_purity( pvar_purities: &HashMap, fun_purity: &purity::Ref, fun_ret_ty: &ty::Ref, ) -> Purity { if fun_ret_ty.is_never() { // This is a hack for things like `panic`. Pure funs are allowed to panic but if they // return `(U)` they're likely only called to terminate the program. Without this `panic` // would be optimised away. return Purity::Impure; } resolve_ref_to_purity(pvar_purities, fun_purity) } ================================================ FILE: compiler/mir/arg_list.rs ================================================ use arret_syntax::span::Span; use arret_runtime::abitype; use crate::mir::builder::{Builder, BuiltReg}; use crate::mir::eval_hir::EvalHirCtx; use crate::mir::ops; use crate::mir::polymorph::PolymorphAbi; use crate::mir::value::Value; use crate::ty; pub struct LoadedArgList { /// Reg holding the captures parameter pub captures_reg: Option, /// All regs the function takes including the captures pub param_regs: Box<[ops::RegId]>, /// Built list value of the arguments pub arg_list_value: Value, } /// Builds the regs and ops for loading the argument list of a function /// /// This results in an argument list value which contains all arguments passed to the function. pub fn build_load_arg_list_value( ehx: &mut EvalHirCtx, b: &mut Builder, polymorph_abi: &PolymorphAbi, param_list_poly: &ty::List, ) -> LoadedArgList { use crate::mir::value::from_reg::reg_to_value; use crate::ty::list_iter::ListIterator; let captures_reg: Option = if polymorph_abi.has_captures { Some(b.alloc_local()) } else { None }; let mut param_list_poly_iter = ListIterator::new(param_list_poly); let fixed_reg_values: Vec<(ops::RegId, Value)> = polymorph_abi .fixed_params .iter() .map(|abi_type| { let reg = b.alloc_local(); let param_type = param_list_poly_iter.next().unwrap(); (reg.into(), reg_to_value(ehx, reg, abi_type, param_type)) }) .collect(); let rest_reg_value: Option<(ops::RegId, Value)> = polymorph_abi.rest_param.as_ref().map(|abi_type| { let reg = b.alloc_local(); let tail_type = param_list_poly_iter.tail_type(); ( reg.into(), reg_to_value(ehx, reg, abi_type, &tail_type.into()), ) }); let param_regs = captures_reg .into_iter() .map(Into::into) .chain(fixed_reg_values.iter().map(|(reg, _)| *reg)) .chain(rest_reg_value.iter().map(|(reg, _)| *reg)) .collect(); let arg_list_value = Value::List( fixed_reg_values .into_iter() .map(|(_, value)| value) .collect(), rest_reg_value.map(|(_, value)| Box::new(value)), ); LoadedArgList { captures_reg, param_regs, arg_list_value, } } pub fn build_save_arg_list_to_regs<'a>( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, arg_list_value: Value, fixed_abi_types: impl ExactSizeIterator, rest_abi_type: Option<&'a abitype::AbiType>, ) -> Vec { use crate::mir::value::build_reg::value_to_reg; let mut list_iter = arg_list_value.into_unsized_list_iter(); let mut arg_regs = vec![]; for abi_type in fixed_abi_types { let fixed_value = list_iter.next_unchecked(b, span); let reg_id = value_to_reg(ehx, b, span, &fixed_value, abi_type); arg_regs.push(reg_id.into()); } if let Some(rest_abi_type) = rest_abi_type { let reg_id = value_to_reg(ehx, b, span, &list_iter.into_rest(), rest_abi_type); arg_regs.push(reg_id.into()); }; arg_regs } ================================================ FILE: compiler/mir/builder.rs ================================================ use std::fmt; use crate::mir::ops::{CastBoxedOp, CondOp, Op, OpKind, RegId, RegPhi}; use arret_runtime::abitype; use arret_syntax::span::Span; pub struct Builder { ops: Vec, } #[derive(Clone, Copy)] pub enum BuiltReg { Const(RegId), Local(RegId), } impl BuiltReg { pub fn into_reg_id(self) -> RegId { match self { BuiltReg::Const(reg_id) | BuiltReg::Local(reg_id) => reg_id, } } pub fn is_const(self) -> bool { match self { BuiltReg::Const(_) => true, BuiltReg::Local(_) => false, } } } impl From for RegId { fn from(built_reg: BuiltReg) -> RegId { built_reg.into_reg_id() } } impl fmt::Debug for BuiltReg { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { // This is by analogy with LLVM if self.is_const() { write!(f, "@{}", self.into_reg_id().get()) } else { write!(f, "%{}", self.into_reg_id().get()) } } } impl Builder { pub fn new() -> Builder { Builder { ops: vec![] } } pub fn alloc_local(&mut self) -> BuiltReg { BuiltReg::Local(RegId::alloc()) } pub fn push_reg(&mut self, span: Span, kind_cons: F, kind_param: P) -> BuiltReg where F: FnOnce(RegId, P) -> OpKind, { let reg_id = RegId::alloc(); let kind = kind_cons(reg_id, kind_param); let output_reg_is_const = kind.const_output(); self.push(span, kind); if output_reg_is_const { BuiltReg::Const(reg_id) } else { BuiltReg::Local(reg_id) } } pub fn push(&mut self, span: Span, kind: OpKind) { self.ops.push(Op::new(span, kind)); } pub fn append(&mut self, ops: impl IntoIterator) { self.ops.extend(ops); } pub fn into_ops(self) -> Box<[Op]> { self.ops.into_boxed_slice() } pub fn push_cond( &mut self, span: Span, test_reg: RegId, true_cons: T, false_cons: F, ) -> BuiltReg where T: FnOnce(&mut Builder) -> RegId, F: FnOnce(&mut Builder) -> RegId, { let mut true_builder = Builder::new(); let true_result_reg = true_cons(&mut true_builder); let mut false_builder = Builder::new(); let false_result_reg = false_cons(&mut false_builder); let output_reg = RegId::alloc(); self.push( span, OpKind::Cond(CondOp { reg_phi: Some(RegPhi { output_reg, true_result_reg, false_result_reg, }), test_reg, true_ops: true_builder.into_ops(), false_ops: false_builder.into_ops(), }), ); BuiltReg::Local(output_reg) } pub fn cast_boxed( &mut self, span: Span, from_reg: BuiltReg, to_type: abitype::BoxedAbiType, ) -> BuiltReg { let kind_cons = if from_reg.is_const() { OpKind::ConstCastBoxed } else { OpKind::CastBoxed }; let cast_boxed_op = CastBoxedOp { from_reg: from_reg.into(), to_type, }; self.push_reg(span, kind_cons, cast_boxed_op) } pub fn cast_boxed_cond( &mut self, span: Span, from_type: &abitype::BoxedAbiType, from_reg: BuiltReg, to_type: abitype::BoxedAbiType, ) -> BuiltReg { if from_type == &to_type { from_reg } else { self.cast_boxed(span, from_reg, to_type) } } } impl Default for Builder { fn default() -> Builder { Builder::new() } } pub trait TryToBuilder { fn try_to_builder(&mut self) -> Option<&mut Builder>; } impl TryToBuilder for Option { fn try_to_builder(&mut self) -> Option<&mut Builder> { self.as_mut() } } impl TryToBuilder for Builder { fn try_to_builder(&mut self) -> Option<&mut Builder> { Some(self) } } ================================================ FILE: compiler/mir/costing.rs ================================================ use crate::mir::ops; /// Abstract unit for measuring the runtime cost of ops pub type OpCost = u32; /// Abstract unit for a multiplier of an `OpCost` pub type OpCostFactor = f32; /// Returns the approximate runtime cost of an operation category /// /// This isn't adjusted for any specifics of a given op. `cost_for_ops` should be used when costing /// a known sequence of ops. pub fn cost_for_op_category(category: ops::OpCategory) -> OpCost { use crate::mir::ops::OpCategory; match category { OpCategory::Unreachable => 0, OpCategory::ConstCastBoxed | OpCategory::CastBoxed => 0, OpCategory::ConstReg => 1, OpCategory::RegOp => 2, OpCategory::ConstBox => 4, OpCategory::Cond => 5, // Adjusted below to include branches OpCategory::MakeCallback => 5, OpCategory::MemLoad => 5, OpCategory::Ret => 5, OpCategory::Call => 9, // Adjusted below based on the call purity // This is tricky. This could either do a stack allocation (which is cheap) or a heap // allocation (which is very expensive). This depends on the type and escape analysis // in codegen. We need to make use compromise between those two costs here. OpCategory::AllocBoxed => 15, } } /// Returns the approximate runtime cost of an operation in an abstract unit fn cost_for_op(op: &ops::Op) -> OpCost { let category_cost = cost_for_op_category(op.kind().category()); let op_adjustment = match op.kind() { ops::OpKind::Cond(cond_op) => { // Only one branch can be taken so the runtime cost is the average of the branches. On // the other hand, the code size (and thus icache footprint) is the sum of the // branches. Compromise by using the most expensive branch. std::cmp::max( cost_for_ops(cond_op.true_ops.iter()), cost_for_ops(cond_op.false_ops.iter()), ) } ops::OpKind::Call(_, call_op) => { // Impure calls are harder to optimise. Penalise them. let impure_penalty = if call_op.impure { 2 } else { 0 }; let callee_penalty = match call_op.callee { // These cannot be inlined and need to use the standard calling convention ops::Callee::BoxedFunThunk(_) | ops::Callee::StaticSymbol(_) => 2, _ => 0, }; impure_penalty + callee_penalty } _ => 0, }; category_cost + op_adjustment } /// Returns the cost for a sequence of ops pub fn cost_for_ops<'o>(ops: impl Iterator) -> OpCost { ops.map(cost_for_op).sum() } ================================================ FILE: compiler/mir/env_values.rs ================================================ use std::collections::HashMap; use arret_syntax::datum::DataStr; use arret_syntax::span::Span; use crate::hir; use crate::mir::builder::{Builder, BuiltReg}; use crate::mir::eval_hir::EvalHirCtx; use crate::mir::ops::{RecordStruct, RecordStructId}; use crate::mir::specific_abi_type::specific_abi_type_for_value; use crate::mir::value::Value; type Values = Box<[(hir::LocalId, Value)]>; /// Indicates the layout of captured values #[derive(Clone, Debug)] enum CapturesRepr { Empty, SingleBox, RecordStruct(RecordStructId), } /// Tracks the constant and free values captured by an expression #[derive(Clone, Debug)] pub struct EnvValues { pub const_values: Values, pub free_values: Values, captures_repr: CapturesRepr, } impl EnvValues { pub fn empty() -> EnvValues { EnvValues { const_values: Box::new([]), free_values: Box::new([]), captures_repr: CapturesRepr::Empty, } } } fn can_reference_local_regs(value: &Value) -> bool { match value { Value::Const(_) | Value::EqPred | Value::TyPred(_) | Value::RecordCons(_) | Value::FieldAccessor(_, _) | Value::RustFun(_) => false, Value::Reg(_) => true, Value::ArretFun(arret_fun) => !arret_fun.env_values().free_values.is_empty(), Value::List(fixed, rest) => fixed .iter() .chain(rest.iter().map(AsRef::as_ref)) .any(can_reference_local_regs), Value::Record(_, fields) => fields.iter().any(can_reference_local_regs), } } /// Calculates the values captured from the environment by the passed expression pub fn calculate_env_values( local_values: &HashMap, capturing_expr: &hir::Expr, source_name: Option<&DataStr>, ) -> EnvValues { let mut captured_values = HashMap::new(); // Only process captures if there are local values. This is to avoid visiting the expression // when capturing isn't possible if !local_values.is_empty() { // Look for references to variables inside the function hir::visitor::visit_exprs(capturing_expr, &mut |expr| { if let hir::ExprKind::LocalRef(_, local_id) = &expr.kind { if !captured_values.contains_key(local_id) { if let Some(value) = local_values.get(local_id) { captured_values.insert(*local_id, value.clone()); } } } }); } // Determine which captures are constants let (free_values, const_values): (Vec<_>, Vec<_>) = captured_values .into_iter() .partition(|(_, value)| can_reference_local_regs(value)); let captures_repr = match free_values.len() { 0 => CapturesRepr::Empty, 1 => { // Single field records can never box as efficiently as our native box representation CapturesRepr::SingleBox } _ => { let captures_source_name = source_name .map(|source_name| format!("{}_captures", source_name).into()) .unwrap_or_else(|| "anon_captures".into()); let field_abi_types = free_values .iter() .map(|(_, value)| specific_abi_type_for_value(value)) .collect(); let record_struct_id = RecordStruct::new(captures_source_name, field_abi_types); CapturesRepr::RecordStruct(record_struct_id) } }; EnvValues { const_values: const_values.into_boxed_slice(), free_values: free_values.into_boxed_slice(), captures_repr, } } /// Builds code to save local values into a captures reg pub fn save_to_captures_reg( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, env_values: &EnvValues, ) -> Option { use crate::mir::value::build_reg::value_to_reg; use arret_runtime::abitype; match &env_values.captures_repr { CapturesRepr::Empty => None, CapturesRepr::SingleBox => { let value = &env_values.free_values[0].1; Some(value_to_reg( ehx, b, span, value, &abitype::BoxedAbiType::Any.into(), )) } CapturesRepr::RecordStruct(record_struct) => { use crate::mir::ops::*; let field_regs = env_values .free_values .iter() .zip(record_struct.field_abi_types.iter()) .map(|((_, value), abi_type)| value_to_reg(ehx, b, span, value, abi_type).into()) .collect(); let record_reg = b.push_reg( span, OpKind::AllocBoxedRecord, BoxRecordOp { record_struct: record_struct.clone(), field_regs, }, ); Some(b.cast_boxed(span, record_reg, abitype::BoxedAbiType::Any)) } } } /// Loads env values assuming all captured variables are still inside the local function pub fn load_from_current_fun( local_values: &mut HashMap, env_values: &EnvValues, ) { local_values.extend( env_values .const_values .iter() .chain(env_values.free_values.iter()) .map(|(local_id, value)| (*local_id, value.clone())), ); } /// Loads environment values from an env parameter pub fn load_from_env_param( b: &mut Builder, span: Span, local_values: &mut HashMap, env_values: &mut EnvValues, captures_reg: Option, ) { use crate::mir::value; use arret_runtime::abitype; // Include the const values directly local_values.extend( env_values .const_values .iter() .map(|(local_id, value)| (*local_id, value.clone())), ); match &env_values.captures_repr { CapturesRepr::Empty => {} CapturesRepr::SingleBox => { let var_id = &env_values.free_values[0].0; let captures_reg = captures_reg.unwrap(); let new_value: Value = value::RegValue::new(captures_reg, abitype::BoxedAbiType::Any.into()).into(); local_values.insert(*var_id, new_value.clone()); env_values.free_values[0].1 = new_value; } CapturesRepr::RecordStruct(record_struct) => { use crate::mir::ops::*; let record_reg: RegId = captures_reg.unwrap().into(); for (field_index, (local_id, free_value)) in env_values.free_values.iter_mut().enumerate() { let field_reg = b.push_reg( span, OpKind::LoadBoxedRecordField, LoadBoxedRecordFieldOp { record_reg, record_struct: record_struct.clone(), field_index, }, ); let field_abi_type = record_struct.field_abi_types[field_index].clone(); let new_value: Value = value::RegValue::new(field_reg, field_abi_type).into(); local_values.insert(*local_id, new_value.clone()); *free_value = new_value; } } } } ================================================ FILE: compiler/mir/equality.rs ================================================ use arret_syntax::span::Span; use arret_runtime::abitype; use arret_runtime::boxed; use arret_runtime::boxed::prelude::*; use crate::codegen::GenAbi; use crate::mir::builder::{Builder, BuiltReg, TryToBuilder}; use crate::mir::costing::{cost_for_op_category, cost_for_ops}; use crate::mir::eval_hir::EvalHirCtx; use crate::mir::ops::*; use crate::mir::tagset::TypeTagSet; use crate::mir::value; use crate::mir::value::build_reg::value_to_reg; use crate::mir::value::to_const::value_to_const; use crate::mir::value::Value; use crate::ty::record; pub enum EqualityResult { Static(bool), Dynamic(Value), } impl EqualityResult { fn from_bool_reg(reg: BuiltReg) -> EqualityResult { EqualityResult::Dynamic(value::RegValue::new(reg, abitype::AbiType::Bool).into()) } } impl From for Value { fn from(er: EqualityResult) -> Value { match er { EqualityResult::Static(true) => boxed::TRUE_INSTANCE.as_any_ref().into(), EqualityResult::Static(false) => boxed::FALSE_INSTANCE.as_any_ref().into(), EqualityResult::Dynamic(value) => value, } } } fn runtime_compare( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, left_value: &Value, right_value: &Value, ) -> BuiltReg { let left_reg = value_to_reg(ehx, b, span, left_value, &abitype::BoxedAbiType::Any.into()); let right_reg = value_to_reg( ehx, b, span, right_value, &abitype::BoxedAbiType::Any.into(), ); let abi = GenAbi { takes_task: true, params: Box::new([ abitype::BoxedAbiType::Any.into(), abitype::BoxedAbiType::Any.into(), ]), ret: abitype::AbiType::Bool.into(), }; let callee = Callee::StaticSymbol(StaticSymbol { symbol: "arret_runtime_equals", impure: false, abi, }); b.push_reg( span, OpKind::Call, CallOp { callee, impure: false, args: Box::new([left_reg.into(), right_reg.into()]), }, ) } fn build_native_compare( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, left_value: &Value, right_value: &Value, abi_type: &abitype::AbiType, op_kind: F, ) -> BuiltReg where F: FnOnce(RegId, BinaryOp) -> OpKind, { let left_reg = value_to_reg(ehx, b, span, left_value, abi_type); let right_reg = value_to_reg(ehx, b, span, right_value, abi_type); b.push_reg( span, op_kind, BinaryOp { lhs_reg: left_reg.into(), rhs_reg: right_reg.into(), }, ) } fn build_record_equality( ehx: &mut EvalHirCtx, parent_b: &mut Builder, span: Span, record_cons: &record::ConsId, left_value: &Value, right_value: &Value, ) -> EqualityResult { use crate::mir::record_field::load_record_field; // Try a fieldwise comparison let field_count = record_cons.fields().len(); let mut fieldwise_b = Builder::new(); let mut fieldwise_regs = Vec::::with_capacity(field_count); for field_index in 0..field_count { let left_field = load_record_field( ehx, &mut fieldwise_b, span, record_cons, left_value, field_index, ); let right_field = load_record_field( ehx, &mut fieldwise_b, span, record_cons, right_value, field_index, ); match eval_equality(ehx, &mut fieldwise_b, span, &left_field, &right_field) { EqualityResult::Static(false) => { // The whole comparison is false; we don't need to build anything return EqualityResult::Static(false); } EqualityResult::Static(true) => { // We can ignore this comparison } EqualityResult::Dynamic(value) => { let fieldwise_reg = value_to_reg(ehx, &mut fieldwise_b, span, &value, &abitype::AbiType::Bool); fieldwise_regs.push(fieldwise_reg); } } } let mut fieldwise_reg_iter = fieldwise_regs.into_iter(); let first_fieldwise_reg = if let Some(fieldwise_reg) = fieldwise_reg_iter.next() { fieldwise_reg } else { // This is statically true return EqualityResult::Static(true); }; let combined_fieldwise_reg = fieldwise_reg_iter.fold(first_fieldwise_reg, |acc_reg, fieldwise_reg| { let phi_result_reg = fieldwise_b.alloc_local(); fieldwise_b.push( span, OpKind::Cond(CondOp { reg_phi: Some(RegPhi { output_reg: phi_result_reg.into(), true_result_reg: acc_reg.into(), false_result_reg: fieldwise_reg.into(), }), test_reg: fieldwise_reg.into(), true_ops: Box::new([]), false_ops: Box::new([]), }), ); phi_result_reg }); // Try a runtime compare let mut runtime_b = Builder::new(); let runtime_reg = runtime_compare(ehx, &mut runtime_b, span, left_value, right_value); // Build ops for both options and cost them let fieldwise_ops = fieldwise_b.into_ops(); let fieldwise_cost = cost_for_ops(fieldwise_ops.iter()); let runtime_ops = runtime_b.into_ops(); // Favour fieldwise comparisons. Runtime comparisons of records are more expensive than other // types but this wouldn't be captured by `cost_for_ops`. Account for at least the cost of // loading the class map. let runtime_cost = cost_for_ops(runtime_ops.iter()) + cost_for_op_category(OpCategory::MemLoad); if runtime_cost < fieldwise_cost { parent_b.append(runtime_ops.into_vec().into_iter()); EqualityResult::from_bool_reg(runtime_reg) } else { parent_b.append(fieldwise_ops.into_vec().into_iter()); EqualityResult::from_bool_reg(combined_fieldwise_reg) } } /// Builds a comparison between two values known to be boolean fn build_bool_equality( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, left_value: &Value, right_value: &Value, ) -> EqualityResult { enum ValueClass { ConstTrue, Boxed, Other, } fn classify_value(value: &Value) -> ValueClass { match value { Value::Const(any_ref) if any_ref.header().type_tag() == boxed::TypeTag::True => { ValueClass::ConstTrue } Value::Reg(reg_value) => { if let abitype::AbiType::Boxed(_) = ®_value.abi_type { ValueClass::Boxed } else { ValueClass::Other } } _ => ValueClass::Other, } } let left_class = classify_value(left_value); let right_class = classify_value(right_value); let result_reg = match (left_class, right_class) { // Comparing a boolean to constant true can be simplified to a no-op (ValueClass::ConstTrue, _) => { return EqualityResult::Dynamic(right_value.clone()); } (_, ValueClass::ConstTrue) => { return EqualityResult::Dynamic(left_value.clone()); } (ValueClass::Boxed, ValueClass::Boxed) => { // If both values are boxed we can just compare the pointers build_native_compare( ehx, b, span, left_value, right_value, &abitype::BoxedAbiType::Any.into(), OpKind::BoxIdentical, ) } _ => { // Fall back to a native comparison of the unboxed values build_native_compare( ehx, b, span, left_value, right_value, &abitype::AbiType::Bool, OpKind::BoolEqual, ) } }; EqualityResult::from_bool_reg(result_reg) } /// Determines if two values are statically equal pub fn values_statically_equal( ehx: &mut EvalHirCtx, left_value: &Value, right_value: &Value, ) -> Option { match (left_value, right_value) { (Value::Reg(left_reg), Value::Reg(right_reg)) => { if [left_reg, right_reg] .iter() .any(|reg| reg.possible_type_tags == boxed::TypeTag::FunThunk.into()) { // Functions are equal to nothing, including themselves return Some(false); } if left_reg.reg.into_reg_id() != right_reg.reg.into_reg_id() { // We can't determine if these are statically equal return None; } for partial_equal_type_tag in TypeTagSet::all().into_iter().filter(|type_tag| { match type_tag { // Functions never compare equal boxed::TypeTag::FunThunk => true, // NaN != NaN boxed::TypeTag::Float => true, // Can contain partial equal values boxed::TypeTag::Pair | boxed::TypeTag::Record | boxed::TypeTag::Set | boxed::TypeTag::Map | boxed::TypeTag::Vector => true, // The rest can be compared. Add them explicitly so we will be forced to // classify new types boxed::TypeTag::Int | boxed::TypeTag::Char | boxed::TypeTag::Str | boxed::TypeTag::Sym | boxed::TypeTag::True | boxed::TypeTag::False | boxed::TypeTag::Nil => false, } }) { if [left_reg, right_reg] .iter() .all(|reg| reg.possible_type_tags.contains(partial_equal_type_tag)) { return None; } } Some(true) } // Functions never compare equal (Value::ArretFun(_) | Value::RustFun(_) | Value::TyPred(_) | Value::EqPred, _) | (_, Value::ArretFun(_) | Value::RustFun(_) | Value::TyPred(_) | Value::EqPred) => { Some(false) } _ => { if let Some(const_left) = value_to_const(ehx, left_value) { if let Some(const_right) = value_to_const(ehx, right_value) { return Some(const_left.eq_in_heap(ehx.as_heap(), &const_right)); } } None } } } /// Evaluates if two values are equal /// /// This attempts `values_statically_equal` before building a runtime comparison. pub fn eval_equality( ehx: &mut EvalHirCtx, b: &mut impl TryToBuilder, span: Span, left_value: &Value, right_value: &Value, ) -> EqualityResult { use crate::mir::value::types::{known_record_cons_for_value, possible_type_tags_for_value}; if let Some(static_result) = values_statically_equal(ehx, left_value, right_value) { return EqualityResult::Static(static_result); } let b = if let Some(some_b) = b.try_to_builder() { some_b } else { panic!("runtime equality without builder") }; let left_type_tags = possible_type_tags_for_value(left_value); let right_type_tags = possible_type_tags_for_value(right_value); let all_type_tags = left_type_tags | right_type_tags; let common_type_tags = left_type_tags & right_type_tags; if common_type_tags.is_empty() { // No types in common return EqualityResult::Static(false); } if [left_type_tags, right_type_tags].contains(&boxed::TypeTag::FunThunk.into()) { // Functions always compare false return EqualityResult::Static(false); } if all_type_tags == abitype::AbiType::Bool.into() { // Build a specialised comparison for `Bool` return build_bool_equality(ehx, b, span, left_value, right_value); } let boxed_singleton_type_tags: TypeTagSet = [ boxed::TypeTag::True, boxed::TypeTag::False, boxed::TypeTag::Nil, ] .iter() .collect(); let result_reg = if common_type_tags.is_subset(boxed_singleton_type_tags) { // We an do a direct pointer comparison build_native_compare( ehx, b, span, left_value, right_value, &abitype::BoxedAbiType::Any.into(), OpKind::BoxIdentical, ) } else if all_type_tags == boxed::TypeTag::Int.into() { build_native_compare( ehx, b, span, left_value, right_value, &abitype::AbiType::Int, |reg_id, BinaryOp { lhs_reg, rhs_reg }| { OpKind::IntCompare( reg_id, CompareOp { comparison: Comparison::Eq, lhs_reg, rhs_reg, }, ) }, ) } else if all_type_tags == boxed::TypeTag::Char.into() { build_native_compare( ehx, b, span, left_value, right_value, &abitype::AbiType::Char, OpKind::CharEqual, ) } else if all_type_tags == boxed::TypeTag::Sym.into() { build_native_compare( ehx, b, span, left_value, right_value, &abitype::AbiType::InternedSym, OpKind::InternedSymEqual, ) } else if all_type_tags == boxed::TypeTag::Float.into() { build_native_compare( ehx, b, span, left_value, right_value, &abitype::AbiType::Float, |reg_id, BinaryOp { lhs_reg, rhs_reg }| { OpKind::FloatCompare( reg_id, CompareOp { comparison: Comparison::Eq, lhs_reg, rhs_reg, }, ) }, ) } else if all_type_tags == boxed::TypeTag::Record.into() { let known_left_cons = known_record_cons_for_value(ehx, left_value); let known_right_cons = known_record_cons_for_value(ehx, right_value); match (known_left_cons, known_right_cons) { (Some(left_cons), Some(right_cons)) => { if left_cons == right_cons { let common_cons = left_cons.clone(); return build_record_equality( ehx, b, span, &common_cons, left_value, right_value, ); } else { return EqualityResult::Static(false); } } _ => runtime_compare(ehx, b, span, left_value, right_value), } } else { runtime_compare(ehx, b, span, left_value, right_value) }; EqualityResult::from_bool_reg(result_reg) } ================================================ FILE: compiler/mir/error.rs ================================================ use std::{error, fmt, result}; use codespan_reporting::diagnostic::Diagnostic; use arret_syntax::span::{FileId, Span}; use crate::mir::inliner::ApplyCookie; use crate::reporting::{new_primary_label, LocTrace}; #[derive(Debug, PartialEq)] pub struct Panic { loc_trace: LocTrace, message: String, } impl Panic { pub fn new(span: Span, message: String) -> Panic { Panic { loc_trace: span.into(), message, } } } #[derive(Debug, PartialEq)] pub enum Error { Panic(Panic), /// Internal error used to abort a recursive function application when a loop is detected AbortRecursion(ApplyCookie), /// Internal error indicating that a divergent value was encountered Diverged, } pub type Result = result::Result; impl Error { pub fn with_macro_invocation_span(self, span: Span) -> Error { match self { Error::Panic(Panic { loc_trace, message }) => Error::Panic(Panic { loc_trace: loc_trace.with_macro_invocation(span), message, }), other => other, } } } impl From for Diagnostic { fn from(error: Error) -> Self { if let Error::Panic(panic) = error { let diagnostic = Diagnostic::error() .with_message(panic.message) .with_labels(vec![new_primary_label( panic.loc_trace.origin(), "panicked here", )]); return panic.loc_trace.label_macro_invocation(diagnostic); } panic!( "attempted to convert an internal {:?} flow control error to a diagnostic", error ); } } impl From for Vec> { fn from(error: Error) -> Self { vec![error.into()] } } impl error::Error for Panic {} impl fmt::Display for Panic { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.write_str(&self.message) } } ================================================ FILE: compiler/mir/eval_hir.rs ================================================ use std::collections::HashMap; use std::sync::Arc; use std::{alloc, ffi, panic}; use arret_runtime::boxed; use arret_runtime::boxed::prelude::*; use arret_runtime::boxed::refs::Gc; use arret_runtime::callback::EntryPointAbiType as CallbackEntryPointAbiType; use arret_runtime::intern::{AsInterner, Interner}; use arret_runtime::abitype; use arret_runtime_syntax::reader; use arret_syntax::datum::{DataStr, Datum}; use arret_syntax::span::Span; use crate::codegen; use crate::context::ModuleId; use crate::hir; use crate::mir::builder::{Builder, BuiltReg, TryToBuilder}; use crate::mir::error::{Error, Result}; use crate::mir::inliner; use crate::mir::ops; use crate::mir::polymorph::PolymorphAbi; use crate::mir::value; use crate::mir::value::synthetic_fun::SyntheticFuns; use crate::mir::value::types::TypeHint; use crate::mir::{Expr, Value}; use crate::rfi; use crate::source::EMPTY_SPAN; use crate::ty; use crate::ty::purity; use crate::ty::purity::Purity; use crate::ty::record; use crate::ty::ty_args::TyArgs; use crate::ty::Ty; #[derive(PartialEq, Eq, Hash)] struct RustFunKey { symbol: &'static str, polymorph_abi: PolymorphAbi, } #[derive(PartialEq, Eq, Hash)] struct ArretFunKey { arret_fun_id: value::ArretFunId, polymorph_abi: PolymorphAbi, } #[derive(PartialEq, Eq)] pub struct EvaledRecordClass { pub jit_record_class_id: boxed::RecordClassId, pub jit_data_layout: Option, pub record_struct: ops::RecordStructId, } pub struct EvalHirCtx { runtime_task: arret_runtime::task::Task, global_values: HashMap, private_fun_id_counter: ops::PrivateFunIdCounter, private_funs: HashMap, rust_funs: HashMap, arret_funs: HashMap, synthetic_funs: SyntheticFuns, rust_fun_thunks: HashMap, arret_fun_thunks: HashMap, // This uses pointers because `FunThunk` is always inequal to itself thunk_fun_values: HashMap<*const boxed::FunThunk, Value>, thunk_jit: codegen::jit::JitCtx, pub(super) record_class_for_cons: HashMap, cons_for_jit_record_class_id: HashMap, } /// Context for performing a tail call in `(recur)` struct TailCallCtx { self_abi: PolymorphAbi, captures_reg: Option, } struct RecurSelf<'af> { arret_fun: &'af value::ArretFun, /// Return ABI type of expected by tail calls, if they're allowed tail_call_ctx: Option, } pub struct FunCtx<'rs> { /// Optional module to find local variables in /// /// If this isn't specified the function cannot refer to other top-level definitions in the /// same module. module_id: Option, mono_ty_args: TyArgs, local_values: HashMap, recur_self: Option>>, pub(super) inliner_stack: inliner::ApplyStack, } impl<'sv> FunCtx<'sv> { pub fn new(module_id: Option) -> FunCtx<'static> { FunCtx { module_id, mono_ty_args: TyArgs::empty(), local_values: HashMap::new(), recur_self: None, inliner_stack: inliner::ApplyStack::new(), } } pub fn monomorphise(&self, poly: &ty::Ref) -> ty::Ref { ty::subst::monomorphise(&self.mono_ty_args, poly) } } struct BuiltCondBranch { b: Builder, result: Result, } pub struct BuiltProgram { pub main: ops::Fun, pub private_funs: HashMap, } impl BuiltProgram { /// Returns true if the program always executes successfully with no output or side effects pub fn is_empty(&self) -> bool { matches!( self.main.ops.as_ref(), [ops::Op { kind: ops::OpKind::RetVoid, .. }] ) } } #[derive(Clone)] pub(super) struct ApplyArgs<'tyargs> { ty_args: &'tyargs TyArgs, pub(super) list_value: Value, } fn merge_apply_purity_into_scope( scope: &HashMap, apply_purities: &HashMap, subst_with: &TyArgs, ) -> HashMap { use crate::ty::subst; scope .iter() .map(|(pvar, v)| (pvar.clone(), v.clone())) .chain(apply_purities.iter().map(|(pvar, poly_purity)| { let subst_purity = subst::monomorphise_purity(subst_with, poly_purity); (pvar.clone(), subst_purity) })) .collect() } /// Merge poly type args in to existing mono type args /// /// This is used when applying a polymorphic function. The `subst_with` are used to monomorphise /// the `apply_ty_args` which are then added to the existing `scope` and returned. fn merge_apply_ty_args_into_scope( scope: &TyArgs, apply_ty_args: &TyArgs, subst_with: &TyArgs, ) -> TyArgs { use crate::ty::subst; let pvar_purities = merge_apply_purity_into_scope( scope.pvar_purities(), apply_ty_args.pvar_purities(), subst_with, ); let tvar_types = scope .tvar_types() .iter() .map(|(tvar, mono)| (tvar.clone(), mono.clone())) .chain(apply_ty_args.tvar_types().iter().map(|(tvar, poly_type)| { let mono_ty = subst::monomorphise(subst_with, poly_type); (tvar.clone(), mono_ty) })) .collect(); TyArgs::new(pvar_purities, tvar_types) } impl EvalHirCtx { pub fn new(optimising: bool) -> EvalHirCtx { let thunk_jit = codegen::jit::JitCtx::new(optimising); EvalHirCtx { runtime_task: arret_runtime::task::Task::new(), global_values: HashMap::new(), private_fun_id_counter: ops::PrivateFunIdCounter::new(), private_funs: HashMap::new(), rust_funs: HashMap::new(), arret_funs: HashMap::new(), synthetic_funs: SyntheticFuns::new(), rust_fun_thunks: HashMap::new(), arret_fun_thunks: HashMap::new(), thunk_fun_values: HashMap::new(), thunk_jit, record_class_for_cons: HashMap::new(), cons_for_jit_record_class_id: HashMap::new(), } } fn destruc_scalar( scalar: &hir::destruc::Scalar, value: Value, insert_local: &mut F, ) where F: FnMut(hir::LocalId, Value), { if let Some(local_id) = scalar.local_id() { insert_local(*local_id, value); } } fn destruc_list( b: &mut Option, span: Span, list: &hir::destruc::List, value: Value, insert_local: &mut F, ) where F: FnMut(hir::LocalId, Value), { let mut iter = value.into_unsized_list_iter(); for fixed_destruc in list.fixed() { let value = iter.next_unchecked(b, span); Self::destruc_value(b, fixed_destruc, value, insert_local); } if let Some(rest_destruc) = list.rest() { Self::destruc_scalar(rest_destruc, iter.into_rest(), insert_local) } } fn destruc_value( b: &mut Option, destruc: &hir::destruc::Destruc, value: Value, insert_local: &mut F, ) where F: FnMut(hir::LocalId, Value), { use crate::hir::destruc::Destruc; match destruc { Destruc::Scalar(_, scalar) => Self::destruc_scalar(scalar, value, insert_local), Destruc::List(span, list) => Self::destruc_list(b, *span, list, value, insert_local), } } fn destruc_source_name(destruc: &hir::destruc::Destruc) -> Option<&DataStr> { use crate::hir::destruc::Destruc; match destruc { Destruc::Scalar(_, scalar) => Some(scalar.source_name()), Destruc::List(_, _) => None, } } fn eval_local_ref(&self, fcx: &FunCtx<'_>, local_id: hir::LocalId) -> Value { // Try local values if let Some(local_value) = fcx.local_values.get(&local_id) { return local_value.clone(); } let module_id = fcx .module_id .expect("could not fall back to global for missing local"); // If this is a top-level def from the same module it will be a global fcx.local_values .get(&local_id) .unwrap_or_else(|| &self.global_values[&hir::ExportId::new(module_id, local_id)]) .clone() } fn eval_do( &mut self, fcx: &mut FunCtx<'_>, b: &mut Option, exprs: &[Expr], ) -> Result { let initial_value = Value::List(Box::new([]), None); exprs .iter() .try_fold(initial_value, |_, expr| self.eval_expr(fcx, b, expr)) } fn eval_let( &mut self, fcx: &mut FunCtx<'_>, b: &mut Option, hir_let: &hir::Let, ) -> Result { let source_name = Self::destruc_source_name(&hir_let.destruc); let value = self.eval_expr_with_source_name(fcx, b, &hir_let.value_expr, source_name)?; Self::destruc_value(b, &hir_let.destruc, value, &mut |local_id, value| { fcx.local_values.insert(local_id, value); }); self.eval_expr(fcx, b, &hir_let.body_expr) } fn eval_lit(&mut self, literal: &Datum) -> Value { reader::box_syntax_datum(self, literal).into() } pub(super) fn synthetic_funs(&mut self) -> &mut SyntheticFuns { &mut self.synthetic_funs } pub(super) fn build_arret_fun_app( &mut self, fcx: &FunCtx<'_>, b: &mut Builder, span: Span, ret_ty: &ty::Ref, arret_fun: &value::ArretFun, apply_args: &ApplyArgs<'_>, ) -> Result { use crate::hir::destruc::poly_for_list_destruc; use crate::mir::app_purity::fun_app_purity; use crate::mir::arg_list::build_save_arg_list_to_regs; use crate::mir::env_values; use crate::mir::ops::*; use crate::mir::polymorph::polymorph_abi_for_list_ty; use crate::mir::ret_value::ret_reg_to_value; use crate::ty::subst; let ApplyArgs { list_value: arg_list_value, ty_args: apply_ty_args, } = apply_args; let mono_ty_args = merge_apply_ty_args_into_scope( arret_fun.env_ty_args(), apply_ty_args, &fcx.mono_ty_args, ); let captures_reg = env_values::save_to_captures_reg(self, b, span, arret_fun.env_values()); let param_list_poly = poly_for_list_destruc(&arret_fun.fun_expr().params); let param_list_mono = subst::monomorphise_list(&mono_ty_args, ¶m_list_poly); let wanted_abi = polymorph_abi_for_list_ty(captures_reg.is_some(), ¶m_list_mono, ret_ty); let ret_abi = wanted_abi.ret.clone(); let mut arg_regs: Vec = vec![]; if let Some(captures_reg) = captures_reg { arg_regs.push(captures_reg.into()); } arg_regs.extend(build_save_arg_list_to_regs( self, b, span, arg_list_value.clone(), wanted_abi.fixed_params.iter(), wanted_abi.rest_param.as_ref(), )); let private_fun_id = self.id_for_arret_fun(arret_fun, wanted_abi); let fun_expr = arret_fun.fun_expr(); let app_purity = fun_app_purity( mono_ty_args.pvar_purities(), &fun_expr.purity, &fun_expr.ret_ty, ); let ret_reg = b.push_reg( span, OpKind::Call, CallOp { callee: Callee::PrivateFun(private_fun_id), impure: app_purity == Purity::Impure, args: arg_regs.into_boxed_slice(), }, ); ret_reg_to_value(ret_reg, ret_abi) } pub(super) fn inline_arret_fun_app( &mut self, outer_fcx: &FunCtx<'_>, b: &mut Option, span: Span, arret_fun: &value::ArretFun, apply_args: ApplyArgs<'_>, inliner_stack: inliner::ApplyStack, ) -> Result { let fun_expr = arret_fun.fun_expr(); let mut inner_fcx = FunCtx { module_id: arret_fun.module_id(), mono_ty_args: merge_apply_ty_args_into_scope( arret_fun.env_ty_args(), apply_args.ty_args, &outer_fcx.mono_ty_args, ), local_values: outer_fcx.local_values.clone(), recur_self: Some(Box::new(RecurSelf { arret_fun, tail_call_ctx: None, })), inliner_stack, }; Self::destruc_list( b, span, &fun_expr.params, apply_args.list_value, &mut |local_id, value| { inner_fcx.local_values.insert(local_id, value); }, ); self.eval_expr(&mut inner_fcx, b, &fun_expr.body_expr) } fn eval_arret_fun_app( &mut self, fcx: &mut FunCtx<'_>, b: &mut Option, span: Span, ret_ty: &ty::Ref, arret_fun: &value::ArretFun, apply_args: ApplyArgs<'_>, ) -> Result { if arret_fun.has_multiple_usages() { if let Some(outer_b) = b { return inliner::cond_inline( self, fcx, outer_b, span, ret_ty, arret_fun, apply_args, ); } } // Always inline self.inline_arret_fun_app( fcx, b, span, arret_fun, apply_args, inliner::ApplyStack::new(), ) } fn eval_ty_pred_app( &mut self, b: &mut Option, span: Span, arg_list_value: &Value, test_ty: &ty::pred::TestTy, ) -> Value { use crate::mir::typred::eval_ty_pred; let subject_value = arg_list_value.unsized_list_iter().next_unchecked(b, span); eval_ty_pred(self, b, span, &subject_value, test_ty) } fn eval_eq_pred_app( &mut self, b: &mut impl TryToBuilder, span: Span, arg_list_value: &Value, ) -> Value { use crate::mir::equality::eval_equality; let mut iter = arg_list_value.unsized_list_iter(); let left_value = iter.next_unchecked(b, span); let right_value = iter.next_unchecked(b, span); eval_equality(self, b, span, &left_value, &right_value).into() } fn eval_record_cons_app( &mut self, b: &mut Option, span: Span, record_cons: &record::ConsId, arg_list_value: &Value, ) -> Value { let mut iter = arg_list_value.unsized_list_iter(); let field_values = record_cons .fields() .iter() .map(|_| iter.next_unchecked(b, span)) .collect(); Value::Record(record_cons.clone(), field_values) } fn eval_field_accessor_app( &mut self, b: &mut Option, span: Span, record_cons: &record::ConsId, field_index: usize, arg_list_value: &Value, ) -> Value { use crate::mir::record_field::load_record_field; let mut iter = arg_list_value.unsized_list_iter(); let record_value = iter.next_unchecked(b, span); load_record_field(self, b, span, record_cons, &record_value, field_index) } pub fn rust_fun_to_jit_boxed(&mut self, rust_fun: Arc) -> Gc { let captures = boxed::NIL_INSTANCE.as_any_ref(); let entry = self.jit_thunk_for_rust_fun(&rust_fun); let new_boxed = boxed::FunThunk::new(self, captures, entry); self.thunk_fun_values .insert(new_boxed.as_ptr(), Value::RustFun(rust_fun)); new_boxed } fn jit_thunk_for_rust_fun(&mut self, rust_fun: &rfi::Fun) -> boxed::ThunkEntry { // Create a dynamic thunk to this Rust function if it doesn't exist if let Some(thunk) = self.rust_fun_thunks.get(&rust_fun.entry_point()) { return *thunk; } let thunk = unsafe { use crate::mir::rust_fun::ops_for_rust_fun; use std::mem; let wanted_abi = PolymorphAbi::thunk_abi(); let ops_fun = ops_for_rust_fun(self, rust_fun, wanted_abi); let address = self.thunk_jit.compile_fun( &self.private_funs, self.runtime_task.heap_mut().type_info_mut().interner_mut(), &ops_fun, ); mem::transmute(address as usize) }; self.rust_fun_thunks.insert(rust_fun.entry_point(), thunk); thunk } /// Ensures the passed `RustFun` is known by the JIT /// /// This can be called multiple times with the same Rust fun. Calling it with a fun that's /// never used by the JIT is harmless pub fn register_rust_fun_with_jit(&mut self, rust_fun: &rfi::Fun) { let symbol_cstring = ffi::CString::new(rust_fun.symbol()).unwrap(); // Add the inner symbol self.thunk_jit.add_symbol( symbol_cstring.as_bytes_with_nul(), rust_fun.entry_point() as u64, ); } /// Returns a private fun ID for the wanted Rust fun and ABI /// /// This will return a cached ID if available fn id_for_rust_fun( &mut self, rust_fun: &rfi::Fun, wanted_abi: PolymorphAbi, ) -> ops::PrivateFunId { use crate::mir::rust_fun::ops_for_rust_fun; let rust_fun_key = RustFunKey { symbol: rust_fun.symbol(), polymorph_abi: wanted_abi.clone(), }; if let Some(private_fun_id) = self.rust_funs.get(&rust_fun_key) { return *private_fun_id; } let private_fun_id = self.private_fun_id_counter.alloc(); self.rust_funs.insert(rust_fun_key, private_fun_id); let ops_fun = ops_for_rust_fun(self, rust_fun, wanted_abi); self.private_funs.insert(private_fun_id, ops_fun); private_fun_id } pub fn rust_fun_to_thunk_reg( &mut self, b: &mut Builder, span: Span, rust_fun: &rfi::Fun, ) -> BuiltReg { use crate::mir::ops::*; let wanted_abi = PolymorphAbi::thunk_abi(); let private_fun_id = self.id_for_rust_fun(rust_fun, wanted_abi); let nil_reg = b.push_reg(span, OpKind::ConstBoxedNil, ()); let captures_reg = b.cast_boxed(span, nil_reg, abitype::BoxedAbiType::Any); b.push_reg( span, OpKind::ConstBoxedFunThunk, BoxFunThunkOp { captures_reg: captures_reg.into(), callee: ops::Callee::PrivateFun(private_fun_id), }, ) } pub fn rust_fun_to_callback_reg( &mut self, b: &mut Builder, span: Span, rust_fun: &rfi::Fun, entry_point_abi: &CallbackEntryPointAbiType, ) -> BuiltReg { use crate::mir::ops::*; let wanted_abi = entry_point_abi.clone().into(); let private_fun_id = self.id_for_rust_fun(rust_fun, wanted_abi); let nil_reg = b.push_reg(span, OpKind::ConstBoxedNil, ()); let captures_reg = b.cast_boxed(span, nil_reg, abitype::BoxedAbiType::Any); b.push_reg( span, OpKind::MakeCallback, MakeCallbackOp { captures_reg: captures_reg.into(), callee: ops::Callee::PrivateFun(private_fun_id), }, ) } fn call_native_fun(span: Span, block: F) -> Result where F: FnOnce() -> Gc, { // By convention convert string panics in to our `ErrorKind::Panic` panic::catch_unwind(panic::AssertUnwindSafe(block)) .map(Value::Const) .map_err(|err| { use crate::mir::error; let message = if let Some(message) = err.downcast_ref::() { message.clone() } else { "Unexpected panic type".to_owned() }; Error::Panic(error::Panic::new(span, message)) }) } fn eval_rust_fun_app( &mut self, fcx: &mut FunCtx<'_>, b: &mut Option, span: Span, ret_ty: &ty::Ref, rust_fun: &rfi::Fun, apply_args: ApplyArgs<'_>, ) -> Result { use crate::mir::app_purity::fun_app_purity; use crate::mir::intrinsic; use crate::mir::rust_fun::build_rust_fun_app; use crate::mir::value::to_const::value_to_const; let ApplyArgs { list_value: arg_list_value, ty_args: apply_ty_args, } = apply_args; let arret_fun_type = rust_fun.arret_fun_type(); let mono_purities = merge_apply_purity_into_scope( &HashMap::new(), apply_ty_args.pvar_purities(), &fcx.mono_ty_args, ); let call_purity = fun_app_purity( &mono_purities, arret_fun_type.purity(), arret_fun_type.ret(), ); let can_const_eval = b.is_none() || (call_purity == Purity::Pure); if let Some(intrinsic_name) = rust_fun.intrinsic_name() { // Attempt specialised evaluation if let Some(value) = intrinsic::try_eval(self, b, span, intrinsic_name, &arg_list_value)? { return Ok(value); } } if can_const_eval { let boxed_arg_list = value_to_const(self, &arg_list_value); if let Some(boxed_arg_list) = boxed_arg_list { let thunk = self.jit_thunk_for_rust_fun(rust_fun); let runtime_task = &mut self.runtime_task; let native_result = Self::call_native_fun(span, || { let captures = boxed::NIL_INSTANCE.as_any_ref(); thunk(runtime_task, captures, boxed_arg_list) }); // If we receive a panic while building we want to still build the function call. // This `panic` could be in conditional code and we want to build all the // expressions before this panic for their side effects. if native_result.is_ok() || !b.is_some() { return native_result; } } } if let Some(b) = b { let arg_list_value = if let Some(intrinsic_name) = rust_fun.intrinsic_name() { match intrinsic::try_build(self, b, span, intrinsic_name, &arg_list_value)? { intrinsic::BuildOutcome::None => arg_list_value, intrinsic::BuildOutcome::SimplifiedArgs(simplified_arg_list_value) => { simplified_arg_list_value } intrinsic::BuildOutcome::ReturnValue(return_value) => { return Ok(return_value); } } } else { arg_list_value }; build_rust_fun_app(self, b, span, ret_ty, rust_fun, call_purity, arg_list_value) } else { panic!("Need builder for non-const function application"); } } fn eval_const_fun_thunk_app( &mut self, fcx: &mut FunCtx<'_>, b: &mut Option, span: Span, ret_ty: &ty::Ref, fun_thunk: Gc, apply_args: ApplyArgs<'_>, ) -> Result { use crate::mir::value::to_const::value_to_const; if let Some(actual_value) = self.thunk_fun_values.get(&fun_thunk.as_ptr()) { let actual_value = actual_value.clone(); return self.eval_value_app(fcx, b, span, ret_ty, &actual_value, apply_args); } if b.is_some() { panic!("attempt to apply unknown fun thunk during compile phase"); } let const_arg_list = value_to_const(self, &apply_args.list_value).expect("non-constant value during apply"); Self::call_native_fun(span, || { fun_thunk.apply(&mut self.runtime_task, const_arg_list) }) } fn build_reg_fun_thunk_app( &mut self, b: &mut Builder, span: Span, fun_reg_value: &value::RegValue, arg_list_value: &Value, ) -> Value { use crate::mir::ops::*; use crate::mir::value::build_reg::value_to_reg; let fun_boxed_abi_type = if let abitype::AbiType::Boxed(ref fun_boxed_abi_type) = fun_reg_value.abi_type { fun_boxed_abi_type } else { panic!( "Attempted to apply reg value with unboxed ABI type of {:?}", fun_reg_value.abi_type ) }; let fun_thunk_reg = b.cast_boxed_cond( span, fun_boxed_abi_type, fun_reg_value.reg, boxed::TypeTag::FunThunk.into(), ); let captures_reg = b.push_reg( span, OpKind::LoadBoxedFunThunkCaptures, fun_thunk_reg.into(), ); let arg_list_reg = value_to_reg( self, b, span, arg_list_value, &abitype::TOP_LIST_BOXED_ABI_TYPE.into(), ); let ret_reg = b.push_reg( span, OpKind::Call, CallOp { callee: Callee::BoxedFunThunk(fun_thunk_reg.into()), impure: true, args: Box::new([captures_reg.into(), arg_list_reg.into()]), }, ); value::RegValue::new(ret_reg, abitype::BoxedAbiType::Any.into()).into() } fn eval_value_app( &mut self, fcx: &mut FunCtx<'_>, b: &mut Option, span: Span, ret_ty: &ty::Ref, fun_value: &Value, apply_args: ApplyArgs<'_>, ) -> Result { match fun_value { Value::ArretFun(arret_fun) => { use crate::mir::env_values; env_values::load_from_current_fun(&mut fcx.local_values, arret_fun.env_values()); self.eval_arret_fun_app(fcx, b, span, ret_ty, arret_fun, apply_args) } Value::RustFun(rust_fun) => { self.eval_rust_fun_app(fcx, b, span, ret_ty, rust_fun, apply_args) } Value::TyPred(test_ty) => { Ok(self.eval_ty_pred_app(b, span, &apply_args.list_value, test_ty)) } Value::EqPred => Ok(self.eval_eq_pred_app(b, span, &apply_args.list_value)), Value::RecordCons(record_cons) => { Ok(self.eval_record_cons_app(b, span, record_cons, &apply_args.list_value)) } Value::FieldAccessor(record_cons, field_index) => Ok(self.eval_field_accessor_app( b, span, record_cons, *field_index, &apply_args.list_value, )), Value::Const(boxed_fun) => { let fun_thunk = boxed_fun .downcast_ref::() .expect("applying non-function box"); self.eval_const_fun_thunk_app(fcx, b, span, ret_ty, fun_thunk, apply_args) } Value::Reg(reg_value) => { if let Some(b) = b { Ok(self.build_reg_fun_thunk_app(b, span, reg_value, &apply_args.list_value)) } else { panic!("Need builder for reg function application"); } } other => { unimplemented!("applying function value type: {:?}", other); } } } fn eval_app( &mut self, fcx: &mut FunCtx<'_>, b: &mut Option, result_ty: &ty::Ref, app: &hir::App, ) -> Result { let fun_value = self.eval_expr(fcx, b, &app.fun_expr)?; let fixed_values = app .fixed_arg_exprs .iter() .map(|arg| self.eval_expr(fcx, b, arg)) .collect::>>()?; let rest_value = match &app.rest_arg_expr { Some(rest_arg) => Some(Box::new(self.eval_expr(fcx, b, rest_arg)?)), None => None, }; let ret_ty = fcx.monomorphise(result_ty); let arg_list_value = Value::List(fixed_values, rest_value); self.eval_value_app( fcx, b, app.span, &ret_ty, &fun_value, ApplyArgs { ty_args: &app.ty_args, list_value: arg_list_value, }, ) } /// Evaluates a `(recur)` within a fun body /// /// While `(recur)` is semantically equivalent to calling the fun by name it has a different /// implication about programmer intent. `(recur)` is used in positions where unbounded tail /// recursion can occur, typically to iteratively process a data structure. This is used where /// a loop would be used in other languages. /// /// For this reason they are evaluated quite differently from normal applies. Four things are /// tried in order of preference: /// /// 1. If we don't have a builder we will JIT a thunk and call into it. This is important /// because our MIR evaluation is not tail recursive; we could exhaust our Rust stack if /// we attempted to MIR evaluate until the end of recursion. /// /// 2. If the arg list is constant and the apply is pure then we will also evaluate through a /// thunk. This is the same way Rust funs are treated. /// /// 3. If we have a `tail_call_ctx` we will built a special `TailCall` op and immediately /// return its value. This maximises the chance that codegen and LLVM will be able to /// optimise the tail call in to a loop. We're also able to reuse our existing captures arg /// directly. /// /// 4. If we don't have a `tail_call_ctx` we will treat this as if it was an Arret fun apply. /// /// This can happen if we're being inlined or we're inside a thunk or callback. This will /// only happen for one recursion; the next one will have a `tail_call_ctx` and use one of /// the above cases. /// /// We directly build a fun app instead of attempting inlining. Inlining a recursion with a /// non-constant arg list will nearly always hit the maximum inline depth and abort. This is /// wasteful of compiler time. fn eval_recur( &mut self, fcx: &mut FunCtx<'_>, b: &mut Option, result_ty: &ty::Ref, recur: &hir::Recur, ) -> Result { use crate::mir::app_purity::fun_app_purity; let span = recur.span; let fixed_values = recur .fixed_arg_exprs .iter() .map(|arg| self.eval_expr(fcx, b, arg)) .collect::>>()?; let rest_value = match &recur.rest_arg_expr { Some(rest_arg) => Some(Box::new(self.eval_expr(fcx, b, rest_arg)?)), None => None, }; let (arret_fun, tail_call_ctx) = if let Some(recur_self) = &fcx.recur_self { (&recur_self.arret_fun, &recur_self.tail_call_ctx) } else { panic!("`(recur)` outside function"); }; let ret_ty = fcx.monomorphise(result_ty); let arg_list_value = Value::List(fixed_values, rest_value); // Determine our purity to see if we can const eval let recur_purity = fun_app_purity( fcx.mono_ty_args.pvar_purities(), &arret_fun.fun_expr().purity, &arret_fun.fun_expr().ret_ty, ); // If we're impure or we have dynamic environment values we need to be evaluated at runtime let can_const_eval = b.is_none() || (recur_purity == Purity::Pure && arret_fun.env_values().free_values.is_empty()); if can_const_eval { use crate::mir::value::to_const::value_to_const; if let Some(boxed_arg_list) = value_to_const(self, &arg_list_value) { let thunk = self.jit_thunk_for_arret_fun(arret_fun); return Self::call_native_fun(span, || { let captures = boxed::NIL_INSTANCE.as_any_ref(); thunk(&mut self.runtime_task, captures, boxed_arg_list) }); } } let some_b = if let Some(some_b) = b { some_b } else { panic!("failed to const eval (recur) during eval"); }; if let Some(tail_call_ctx) = tail_call_ctx { use crate::mir::arg_list::build_save_arg_list_to_regs; use crate::mir::ops::*; let self_abi = &tail_call_ctx.self_abi; let mut arg_regs: Vec = vec![]; if let Some(captures_reg) = tail_call_ctx.captures_reg { arg_regs.push(captures_reg.into()); } arg_regs.extend(build_save_arg_list_to_regs( self, some_b, span, arg_list_value, self_abi.fixed_params.iter(), self_abi.rest_param.as_ref(), )); // All of the context for `TailCall` is implicit except the arg regs let ret_reg = some_b.push_reg( span, OpKind::TailCall, TailCallOp { impure: recur_purity == Purity::Impure, args: arg_regs.into_boxed_slice(), }, ); match &self_abi.ret { abitype::RetAbiType::Inhabited(_) => { some_b.push(span, OpKind::Ret(ret_reg.into())); } abitype::RetAbiType::Never => { some_b.push(span, OpKind::Unreachable); } abitype::RetAbiType::Void => { some_b.push(span, OpKind::RetVoid); } } Err(Error::Diverged) } else { // By definition our ty args are the fun's type args pointed to themselves let pvar_purities = arret_fun .fun_expr() .pvars .iter() .map(|pvar| (pvar.clone(), pvar.clone().into())) .collect(); let tvar_types = arret_fun .fun_expr() .tvars .iter() .map(|tvar| (tvar.clone(), tvar.clone().into())) .collect(); let ty_args = TyArgs::new(pvar_purities, tvar_types); // We can't do a native tail call. This can happen if we're inside a thunk or callback // since they don't have the `FastCC` calling convention. self.build_arret_fun_app( fcx, some_b, recur.span, &ret_ty, arret_fun, &ApplyArgs { ty_args: &ty_args, list_value: arg_list_value, }, ) } } fn eval_cond( &mut self, fcx: &mut FunCtx<'_>, b: &mut Option, cond: &hir::Cond, ) -> Result { let test_value = self.eval_expr(fcx, b, &cond.test_expr)?; match test_value { Value::Const(any_ref) => { let bool_ref = any_ref.downcast_ref::().unwrap(); if bool_ref.as_bool() { self.eval_expr(fcx, b, &cond.true_expr) } else { self.eval_expr(fcx, b, &cond.false_expr) } } dynamic_value => { if let Some(b) = b { self.build_cond(fcx, b, &dynamic_value, cond) } else { panic!("need builder for dynamic cond"); } } } } fn build_cond_branch( &mut self, fcx: &mut FunCtx<'_>, branch_expr: &hir::Expr, ) -> BuiltCondBranch { let b = Builder::new(); let mut some_b = Some(b); let result = self.eval_expr(fcx, &mut some_b, branch_expr); let b = some_b.unwrap(); BuiltCondBranch { b, result } } fn build_cond( &mut self, fcx: &mut FunCtx<'_>, b: &mut Builder, test_value: &Value, cond: &hir::Cond, ) -> Result { use arret_runtime::boxed::TypeTag; use crate::mir::equality::values_statically_equal; use crate::mir::ops::{BinaryOp, OpKind}; use crate::mir::value::build_reg::value_to_reg; use crate::mir::value::plan_phi::*; use crate::mir::value::types::{possible_type_tags_for_value, type_hint_for_value}; let span = cond.span; let test_reg = value_to_reg(self, b, span, test_value, &abitype::AbiType::Bool); let mut built_true = self.build_cond_branch(fcx, &cond.true_expr); let mut built_false = self.build_cond_branch(fcx, &cond.false_expr); let output_value; let reg_phi; match (built_true.result, built_false.result) { (Ok(true_value), Ok(false_value)) => { let possible_true_type_tags = possible_type_tags_for_value(&true_value); let possible_false_type_tags = possible_type_tags_for_value(&false_value); if values_statically_equal(self, &true_value, &false_value) == Some(true) { output_value = true_value; reg_phi = None; } else if possible_true_type_tags == TypeTag::True.into() && possible_false_type_tags == TypeTag::False.into() { // Our output value is our test // Use the unboxed value because LLVM has trouble reasoning about our boxed bools let reg_value = value::RegValue::new(test_reg, abitype::AbiType::Bool); output_value = reg_value.into(); reg_phi = None; } else if possible_true_type_tags == TypeTag::False.into() && possible_false_type_tags == TypeTag::True.into() { // Our output value is the negation of our test let const_false_reg = b.push_reg(span, OpKind::ConstBool, false); let negated_test_reg = b.push_reg( span, OpKind::BoolEqual, BinaryOp { lhs_reg: test_reg.into(), rhs_reg: const_false_reg.into(), }, ); let reg_value = value::RegValue::new(negated_test_reg, abitype::AbiType::Bool); output_value = reg_value.into(); reg_phi = None; } else { let phi_abi_type = plan_phi_abi_type(&true_value, &false_value); let true_result_reg = value_to_reg(self, &mut built_true.b, span, &true_value, &phi_abi_type); let false_result_reg = value_to_reg(self, &mut built_false.b, span, &false_value, &phi_abi_type); let output_reg = b.alloc_local(); let possible_type_tags = possible_true_type_tags | possible_false_type_tags; let true_type_hint = type_hint_for_value(self, &true_value); let false_type_hint = type_hint_for_value(self, &false_value); let common_type_hint = if true_type_hint == false_type_hint { true_type_hint } else { TypeHint::None }; let reg_value = value::RegValue { reg: output_reg, abi_type: phi_abi_type, possible_type_tags, type_hint: common_type_hint, }; output_value = reg_value.into(); reg_phi = Some(ops::RegPhi { output_reg: output_reg.into(), true_result_reg: true_result_reg.into(), false_result_reg: false_result_reg.into(), }); } } (Ok(true_value), Err(Error::Diverged)) => { output_value = true_value; reg_phi = None; } (Err(Error::Diverged), Ok(false_value)) => { output_value = false_value; reg_phi = None; } (Err(true_error), _) => { return Err(true_error); } (_, Err(false_error)) => { return Err(false_error); } }; let true_ops = built_true.b.into_ops(); let false_ops = built_false.b.into_ops(); // Avoid adding an empty `Cond` we'd have to optimise away later if reg_phi.is_some() || !true_ops.is_empty() || !false_ops.is_empty() { b.push( span, ops::OpKind::Cond(ops::CondOp { reg_phi, test_reg: test_reg.into(), true_ops, false_ops, }), ); } Ok(output_value) } fn eval_arret_fun( &mut self, fcx: &mut FunCtx<'_>, fun_expr: hir::Fun, source_name: Option<&DataStr>, ) -> Value { use crate::mir::env_values; let env_values = env_values::calculate_env_values(&fcx.local_values, &fun_expr.body_expr, source_name); Value::ArretFun(value::ArretFun::new( fcx.module_id, source_name.cloned(), fcx.mono_ty_args.clone(), env_values, fun_expr, )) } pub fn arret_fun_to_jit_boxed( &mut self, arret_fun: &value::ArretFun, ) -> Option> { // If we have non-const (i.e. "free") values in our environment we can't be const if !arret_fun.env_values().free_values.is_empty() { return None; } let entry = self.jit_thunk_for_arret_fun(arret_fun); let captures = boxed::NIL_INSTANCE.as_any_ref(); let new_boxed = boxed::FunThunk::new(self, captures, entry); let arret_fun_value = Value::ArretFun(arret_fun.clone()); self.thunk_fun_values .insert(new_boxed.as_ptr(), arret_fun_value); Some(new_boxed) } fn jit_thunk_for_arret_fun(&mut self, arret_fun: &value::ArretFun) -> boxed::ThunkEntry { // Create a dynamic thunk to this Arret function if it doesn't exist if let Some(thunk) = self.arret_fun_thunks.get(&arret_fun.id()) { return *thunk; } let thunk = unsafe { use std::mem; let wanted_abi = PolymorphAbi::thunk_abi(); let ops_fun = self.ops_for_arret_fun(arret_fun, wanted_abi); let address = self.thunk_jit.compile_fun( &self.private_funs, self.runtime_task.heap_mut().type_info_mut().interner_mut(), &ops_fun, ); mem::transmute(address as usize) }; self.arret_fun_thunks.insert(arret_fun.id(), thunk); thunk } /// Returns a private fun ID for the wanted Arret fun and ABI /// /// This will return a cached ID if available fn id_for_arret_fun( &mut self, arret_fun: &value::ArretFun, wanted_abi: PolymorphAbi, ) -> ops::PrivateFunId { let arret_fun_key = ArretFunKey { arret_fun_id: arret_fun.id(), polymorph_abi: wanted_abi.clone(), }; if let Some(private_fun_id) = self.arret_funs.get(&arret_fun_key) { return *private_fun_id; } // Allocate and track our private fun ID before we actually build the fun. // This is to prevent a compile-time loop if this fun ends up recursing. let private_fun_id = self.private_fun_id_counter.alloc(); self.arret_funs.insert(arret_fun_key, private_fun_id); let ops_fun = self.ops_for_arret_fun(arret_fun, wanted_abi); self.private_funs.insert(private_fun_id, ops_fun); private_fun_id } pub fn evaled_record_class_for_cons( &mut self, record_cons: &record::ConsId, ) -> &EvaledRecordClass { use crate::mir::specific_abi_type::specific_abi_type_for_ty_ref; if self.record_class_for_cons.contains_key(record_cons) { return &self.record_class_for_cons[record_cons]; } let field_abi_types = record_cons .fields() .iter() .map(|field| specific_abi_type_for_ty_ref(field.ty_ref())) .collect(); let record_struct = ops::RecordStruct::new(record_cons.ty_cons_name().clone(), field_abi_types); let registered_record_struct = self.thunk_jit.register_record_struct( &record_struct, self.runtime_task.heap_mut().type_info_mut().class_map_mut(), ); let evaled_record_class = EvaledRecordClass { jit_record_class_id: registered_record_struct.record_class_id, jit_data_layout: registered_record_struct.data_layout, record_struct, }; self.cons_for_jit_record_class_id.insert( registered_record_struct.record_class_id, record_cons.clone(), ); self.record_class_for_cons .entry(record_cons.clone()) .or_insert(evaled_record_class) } pub fn cons_for_jit_record_class_id( &self, record_class_id: boxed::RecordClassId, ) -> Option<&record::ConsId> { self.cons_for_jit_record_class_id.get(&record_class_id) } pub fn arret_fun_to_thunk_reg( &mut self, b: &mut Builder, span: Span, arret_fun: &value::ArretFun, ) -> BuiltReg { use crate::mir::env_values; use crate::mir::ops::*; let wanted_abi = PolymorphAbi::thunk_abi(); let private_fun_id = self.id_for_arret_fun(arret_fun, wanted_abi); let captures_reg = env_values::save_to_captures_reg(self, b, span, arret_fun.env_values()); if let Some(captures_reg) = captures_reg { b.push_reg( span, OpKind::AllocBoxedFunThunk, BoxFunThunkOp { captures_reg: captures_reg.into(), callee: ops::Callee::PrivateFun(private_fun_id), }, ) } else { let nil_reg = b.push_reg(span, OpKind::ConstBoxedNil, ()); let outer_captures_reg = b.cast_boxed(span, nil_reg, abitype::BoxedAbiType::Any); b.push_reg( span, OpKind::ConstBoxedFunThunk, BoxFunThunkOp { captures_reg: outer_captures_reg.into(), callee: ops::Callee::PrivateFun(private_fun_id), }, ) } } pub fn arret_fun_to_callback_reg( &mut self, b: &mut Builder, span: Span, arret_fun: &value::ArretFun, entry_point_abi: &CallbackEntryPointAbiType, ) -> BuiltReg { use crate::mir::env_values; use crate::mir::ops::*; let wanted_abi = entry_point_abi.clone().into(); let private_fun_id = self.id_for_arret_fun(arret_fun, wanted_abi); let captures_reg = env_values::save_to_captures_reg(self, b, span, arret_fun.env_values()) .unwrap_or_else(|| { let nil_reg = b.push_reg(span, OpKind::ConstBoxedNil, ()); b.cast_boxed(span, nil_reg, abitype::BoxedAbiType::Any) }); b.push_reg( span, OpKind::MakeCallback, MakeCallbackOp { captures_reg: captures_reg.into(), callee: ops::Callee::PrivateFun(private_fun_id), }, ) } pub(super) fn ops_for_arret_fun( &mut self, arret_fun: &value::ArretFun, wanted_abi: PolymorphAbi, ) -> ops::Fun { use crate::hir::destruc::poly_for_list_destruc; use crate::mir::arg_list::{build_load_arg_list_value, LoadedArgList}; use crate::mir::env_values; use crate::mir::optimise::optimise_fun; use crate::mir::ret_value::build_value_ret; let mut b = Builder::new(); let fun_expr = arret_fun.fun_expr(); let span = fun_expr.span; let param_list_poly = poly_for_list_destruc(&arret_fun.fun_expr().params); let LoadedArgList { captures_reg, param_regs, arg_list_value, } = build_load_arg_list_value(self, &mut b, &wanted_abi, ¶m_list_poly); // Start by loading the captures let mut local_values: HashMap = HashMap::new(); let mut recur_env_values = arret_fun.env_values().clone(); env_values::load_from_env_param( &mut b, span, &mut local_values, &mut recur_env_values, captures_reg, ); // Our env values have been updated with its new reg IDs let recur_arret_fun = arret_fun.with_env_values(recur_env_values); // Try to refine our polymorphic type variables based on our requested op ABI let mut stx = ty::select::SelectCtx::new(&fun_expr.pvars, &fun_expr.tvars); let fun_param_poly = hir::destruc::poly_for_list_destruc(&fun_expr.params); let wanted_abi_poly = wanted_abi.param_ty_ref(); stx.add_evidence(&fun_param_poly.into(), &wanted_abi_poly.into()); let ty_args = stx.into_poly_ty_args(); let tail_call_ctx = if wanted_abi.call_conv == ops::CallConv::FastCc { Some(TailCallCtx { self_abi: wanted_abi.clone(), captures_reg, }) } else { None }; // Now build a function context let mut fcx = FunCtx { module_id: arret_fun.module_id(), mono_ty_args: merge_apply_ty_args_into_scope( arret_fun.env_ty_args(), &ty_args, &TyArgs::empty(), ), local_values, recur_self: Some(Box::new(RecurSelf { arret_fun: &recur_arret_fun, tail_call_ctx, })), inliner_stack: inliner::ApplyStack::new(), }; let mut some_b = Some(b); Self::destruc_list( &mut some_b, span, &fun_expr.params, arg_list_value, &mut |local_id, value| { fcx.local_values.insert(local_id, value); }, ); let app_result = self.eval_expr(&mut fcx, &mut some_b, &fun_expr.body_expr); let mut b = some_b.unwrap(); build_value_ret(self, &mut b, span, app_result, &wanted_abi.ret); optimise_fun(ops::Fun { span: arret_fun.fun_expr().span, source_name: arret_fun.source_name().clone(), abi: wanted_abi.into(), param_regs, ops: b.into_ops(), }) } /// Builds a function with a callback ABI that calls a thunk passed as its captures fn ops_for_callback_to_thunk_adapter( &mut self, entry_point_abi: CallbackEntryPointAbiType, ) -> ops::Fun { use crate::mir::arg_list::{build_load_arg_list_value, LoadedArgList}; use crate::mir::optimise::optimise_fun; use crate::mir::ret_value::build_value_ret; let span = EMPTY_SPAN; let wanted_abi = entry_point_abi.into(); let mut b = Builder::new(); let LoadedArgList { captures_reg, param_regs, arg_list_value, } = build_load_arg_list_value( self, &mut b, &wanted_abi, &ty::List::new_uniform(Ty::Any.into()), ); let fun_reg_value = value::RegValue::new(captures_reg.unwrap(), abitype::BoxedAbiType::Any.into()); let result_value = self.build_reg_fun_thunk_app(&mut b, span, &fun_reg_value, &arg_list_value); build_value_ret(self, &mut b, span, Ok(result_value), &wanted_abi.ret); optimise_fun(ops::Fun { span, source_name: Some("callback_to_thunk_adapter".into()), abi: wanted_abi.into(), param_regs, ops: b.into_ops(), }) } pub fn thunk_reg_to_callback_reg( &mut self, b: &mut Builder, span: Span, thunk_reg_abi_type: &arret_runtime::abitype::BoxedAbiType, thunk_reg: BuiltReg, entry_point_abi: &CallbackEntryPointAbiType, ) -> BuiltReg { use crate::mir::ops::*; // Captures are of type `Any` let captures_reg = b.cast_boxed_cond( span, thunk_reg_abi_type, thunk_reg, abitype::BoxedAbiType::Any, ); let private_fun_id = self.private_fun_id_counter.alloc(); let ops_fun = self.ops_for_callback_to_thunk_adapter(entry_point_abi.clone()); self.private_funs.insert(private_fun_id, ops_fun); b.push_reg( span, OpKind::MakeCallback, MakeCallbackOp { captures_reg: captures_reg.into(), callee: ops::Callee::PrivateFun(private_fun_id), }, ) } pub fn jit_boxed_to_fun_value(&self, boxed_thunk: Gc) -> Option<&Value> { self.thunk_fun_values.get(&boxed_thunk.as_ptr()) } pub fn visit_module_defs<'a>( &mut self, module_id: ModuleId, defs: impl IntoIterator>, ) -> Result<()> { for def in defs { let hir::Def { destruc, value_expr, .. } = def; let mut fcx = FunCtx::new(Some(module_id)); // Don't pass a builder; we should never generate ops based on a def let source_name = Self::destruc_source_name(destruc); let value = self.eval_expr_with_source_name(&mut fcx, &mut None, value_expr, source_name)?; Self::destruc_value(&mut None, destruc, value, &mut |local_id, value| { self.global_values .insert(hir::ExportId::new(module_id, local_id), value); }); } Ok(()) } pub fn consume_module_defs( &mut self, module_id: ModuleId, defs: impl IntoIterator>, ) -> Result<()> { for def in defs { let hir::Def { destruc, value_expr, .. } = def; let mut fcx = FunCtx::new(Some(module_id)); // Don't pass a builder; we should never generate ops based on a def let source_name = Self::destruc_source_name(&destruc); let value = self.consume_expr_with_source_name(&mut fcx, &mut None, value_expr, source_name)?; Self::destruc_value(&mut None, &destruc, value, &mut |local_id, value| { self.global_values .insert(hir::ExportId::new(module_id, local_id), value); }); } Ok(()) } pub fn should_collect(&self) -> bool { self.runtime_task.heap().should_collect() } /// Collect any boxed values that are no longer reachable pub fn collect_garbage(&mut self) { use arret_runtime::boxed::collect; use std::mem; let old_heap = mem::take(self.runtime_task.heap_mut()); let mut strong_pass = collect::StrongPass::new(old_heap); // Move all of our global values to the new heap for value_ref in self.global_values.values_mut() { value::visit_value_root(&mut strong_pass, value_ref); } for value_ref in self.thunk_fun_values.values_mut() { // TODO: This can cause a circular reference with the weak pass below value::visit_value_root(&mut strong_pass, value_ref); } // Any function values that are still live need to be updated let weak_pass = strong_pass.into_weak_pass(); let old_thunk_fun_values = mem::take(&mut self.thunk_fun_values); self.thunk_fun_values = old_thunk_fun_values .into_iter() .filter_map(|(fun_thunk, value)| unsafe { weak_pass .new_heap_ref_for(Gc::new(fun_thunk)) .map(|new_fun_thunk| (new_fun_thunk.as_ptr(), value)) }) .collect(); *self.runtime_task.heap_mut() = weak_pass.into_new_heap(); } fn eval_expr_with_source_name( &mut self, fcx: &mut FunCtx<'_>, b: &mut Option, expr: &Expr, source_name: Option<&DataStr>, ) -> Result { use crate::mir::value::types::value_with_arret_ty; use crate::hir::ExprKind; let value = match &expr.kind { ExprKind::Lit(literal) => Ok(self.eval_lit(literal)), ExprKind::Do(exprs) => self.eval_do(fcx, b, exprs), ExprKind::Fun(fun_expr) => { Ok(self.eval_arret_fun(fcx, fun_expr.as_ref().clone(), source_name)) } ExprKind::RustFun(rust_fun) => Ok(Value::RustFun(rust_fun.clone())), ExprKind::TyPred(_, test_ty) => Ok(Value::TyPred(test_ty.clone())), ExprKind::EqPred(_) => Ok(Value::EqPred), ExprKind::RecordCons(_, record_cons) => Ok(Value::RecordCons(record_cons.clone())), ExprKind::FieldAccessor(field_accessor) => Ok(Value::FieldAccessor( field_accessor.record_cons.clone(), field_accessor.field_index, )), ExprKind::LocalRef(_, local_id) => Ok(self.eval_local_ref(fcx, *local_id)), ExprKind::ExportRef(_, export_id) => Ok(self.global_values[export_id].clone()), ExprKind::Let(hir_let) => self.eval_let(fcx, b, hir_let), ExprKind::App(app) => self.eval_app(fcx, b, &expr.result_ty, app), ExprKind::Recur(recur) => self.eval_recur(fcx, b, &expr.result_ty, recur), ExprKind::MacroExpand(span, expr) => self .eval_expr(fcx, b, expr) .map_err(|err| err.with_macro_invocation_span(*span)), ExprKind::Cond(cond) => self.eval_cond(fcx, b, cond), }?; // Annotate this value with the expression's result type as it passes through Ok(value_with_arret_ty(self, value, || { fcx.monomorphise(&expr.result_ty) })) } pub fn eval_expr( &mut self, fcx: &mut FunCtx<'_>, b: &mut Option, expr: &Expr, ) -> Result { self.eval_expr_with_source_name(fcx, b, expr, None) } fn consume_expr_with_source_name( &mut self, fcx: &mut FunCtx<'_>, b: &mut Option, expr: Expr, source_name: Option<&DataStr>, ) -> Result { use crate::hir::ExprKind; match expr.kind { ExprKind::Fun(fun_expr) => Ok(self.eval_arret_fun(fcx, *fun_expr, source_name)), ExprKind::RustFun(rust_fun) => Ok(Value::RustFun(rust_fun)), _ => self.eval_expr_with_source_name(fcx, b, &expr, source_name), } } pub fn consume_expr( &mut self, fcx: &mut FunCtx<'_>, b: &mut Option, expr: Expr, ) -> Result { self.consume_expr_with_source_name(fcx, b, expr, None) } /// Evaluates the main function of a program pub fn eval_main_fun(&mut self, main_export_id: hir::ExportId) -> Result<()> { let mut fcx = FunCtx::new(Some(main_export_id.module_id())); let main_value = self.eval_local_ref(&fcx, main_export_id.local_id()); let empty_list_value = Value::List(Box::new([]), None); self.eval_value_app( &mut fcx, &mut None, EMPTY_SPAN, &Ty::unit().into(), &main_value, ApplyArgs { ty_args: &TyArgs::empty(), list_value: empty_list_value, }, )?; Ok(()) } /// Builds the main function of the program pub fn into_built_program(mut self, main_export_id: hir::ExportId) -> Result { let fcx = FunCtx::new(Some(main_export_id.module_id())); let main_value = self.eval_local_ref(&fcx, main_export_id.local_id()); let main_arret_fun = if let Value::ArretFun(main_arret_fun) = main_value { main_arret_fun } else { unimplemented!("Non-Arret main!"); }; let main_abi = PolymorphAbi { call_conv: ops::CallConv::Ccc, // Main is a top-level function; it can't capture has_captures: false, fixed_params: Box::new([]), rest_param: None, ret: abitype::RetAbiType::Void, }; let main = self.ops_for_arret_fun(&main_arret_fun, main_abi); Ok(BuiltProgram { main, private_funs: self.private_funs, }) } pub fn value_to_const(&mut self, value: &Value) -> Option> { use crate::mir::value::to_const::value_to_const; value_to_const(self, value) } } impl AsHeap for EvalHirCtx { fn as_heap(&self) -> &boxed::Heap { self.runtime_task.heap() } fn as_heap_mut(&mut self) -> &mut boxed::Heap { self.runtime_task.heap_mut() } } impl AsInterner for EvalHirCtx { fn as_interner(&self) -> &Interner { self.runtime_task.heap().type_info().interner() } } ================================================ FILE: compiler/mir/inliner.rs ================================================ use std::hash::{Hash, Hasher}; use arret_syntax::span::Span; use arret_runtime::boxed::prelude::*; use arret_runtime::boxed::Heap; use crate::mir::builder::Builder; use crate::mir::costing::{cost_for_ops, OpCost, OpCostFactor}; use crate::mir::env_values::EnvValues; use crate::mir::error::{Error, Result}; use crate::mir::eval_hir::ApplyArgs; use crate::mir::eval_hir::EvalHirCtx; use crate::mir::eval_hir::FunCtx; use crate::mir::optimise::optimise_inlined_fun; use crate::mir::value; use crate::mir::value::Value; use crate::ty; /// Maximum number of consecutive inlinings in a call stack const MAX_INLINE_DEPTH: usize = 16; /// Opaque hash of an Arret fun application /// /// This is used to heuristically detect recursion loops #[derive(Clone, Copy, PartialEq, Eq, Debug)] pub struct ApplyCookie { arret_fun_id: value::ArretFunId, arg_hash: u64, } impl ApplyCookie { pub fn new(heap: &Heap, arret_fun: &value::ArretFun, arg_list_value: &Value) -> Self { ApplyCookie { arret_fun_id: arret_fun.id(), arg_hash: hash_for_arg_list_value(heap, arg_list_value), } } } /// Tracks a stack of inline Arret fun applications /// /// This is used to make inlining decisions pub struct ApplyStack { entries: Vec, } impl ApplyStack { pub fn new() -> ApplyStack { ApplyStack { entries: vec![] } } fn with_apply_cookie(&self, apply_cookie: ApplyCookie) -> ApplyStack { use std::iter; ApplyStack { entries: self .entries .iter() .cloned() .chain(iter::once(apply_cookie)) .collect(), } } } /// Returns the scaling factor we should use to prefer inlining for a given value /// /// This is used to identify values that lose significant information when converted to a reg and /// back as part of a call. This is intended as a proxy for the "hidden" cost of calling to an /// out-of-line function that has lost optimisation-related information. /// /// These factors seem small but the we take the product of all args and captured free values. /// This can cause the final factor to be quite large. fn inline_preference_factor_for_value(arg_value: &Value) -> OpCostFactor { match arg_value { Value::Reg(_) => 1.0, // Consts allow for const evaling, const propagation, dead code elimination, etc Value::Const(_) => 1.1, // Lists and records can partially evaluate their type-specific operations Value::List(_, _) | Value::Record(_, _) => 1.2, // RustFuns can be const eval'ed Value::RustFun(_) => 1.5, // These can be const eval'ed or completely inlined Value::ArretFun(_) | Value::TyPred(_) | Value::EqPred | Value::RecordCons(_) | Value::FieldAccessor(_, _) => 2.0, } } /// Returns the product of the inline scaling factor for each of our arguments fn inline_preference_factor_for_arg_list_value(arg_list_value: &Value) -> OpCostFactor { match arg_list_value { Value::List(fixed, rest) => fixed .iter() .chain(rest.iter().map(AsRef::as_ref)) .map(inline_preference_factor_for_value) .product(), Value::Const(_) => 1.5, _ => 1.0, } } /// Returns the product of the inline scaling factor for each captured value fn inline_preference_factor_for_env_values(env_values: &EnvValues) -> OpCostFactor { env_values .free_values .iter() .map(|(_, value)| inline_preference_factor_for_value(value)) .product() } /// Returns the scaling factor we should use to prefer inlining for the given application fn calc_inline_preference_factor( arret_fun: &value::ArretFun, arg_list_value: &Value, ) -> OpCostFactor { inline_preference_factor_for_arg_list_value(arg_list_value) * inline_preference_factor_for_env_values(arret_fun.env_values()) } /// Hashes the passed value, poorly /// /// This can only distinguish constants; regs hash to the same value. It's possible for constants /// would compare as equal to receive different hashes depending on their representation. fn hash_value(heap: &Heap, value: &Value, state: &mut H) { match value { Value::List(fixed, rest) => { state.write_u8(0); state.write_usize(fixed.len()); for member in fixed.iter() { hash_value(heap, member, state); } state.write_u8(rest.is_some() as u8); if let Some(rest_value) = rest { hash_value(heap, rest_value, state); } } Value::Record(record_cons, fields) => { state.write_u8(1); record_cons.hash(state); for field in fields.iter() { hash_value(heap, field, state); } } Value::Const(any_ref) => { state.write_u8(2); any_ref.hash_in_heap(heap, state); } Value::EqPred => { state.write_u8(3); } Value::TyPred(test_ty) => { state.write_u8(4); test_ty.hash(state); } Value::RecordCons(record_cons) => { state.write_u8(5); record_cons.hash(state); } Value::FieldAccessor(record_cons, field_index) => { state.write_u8(6); record_cons.hash(state); field_index.hash(state); } Value::RustFun(rust_fun) => { state.write_u8(7); rust_fun.symbol().hash(state); } Value::ArretFun(arret_fun) => { state.write_u8(8); state.write_usize(arret_fun.env_values().const_values.len()); for (_, const_value) in arret_fun.env_values().const_values.iter() { hash_value(heap, const_value, state); } } Value::Reg(_) => { state.write_u8(9); } }; } /// Hashes the arg list, poorly /// /// This is used to detect if a recursive loop is making forward progress. Collisions will cause us /// to abort recursive inlining. fn hash_for_arg_list_value(heap: &Heap, arg_list_value: &Value) -> u64 { use std::collections::hash_map::DefaultHasher; let mut state = DefaultHasher::new(); hash_value(heap, arg_list_value, &mut state); state.finish() } /// Conditionally inlines an Arret fun /// /// This makes an inlining decision based on four criteria: /// /// 1. The approximate cost of performing a call versus inlining. This is calculated by attempting /// both options and measuring the cost of the ops they build. /// /// 2. The amount of knowledge lost by calling through a fun and arg regs. This is referred to as /// the inlining preference factor. This is multiplied against the call cost. /// /// 3. If the inlining limit has been reached. This is a basic fixed threshold. /// /// 4. If we've seen this exact call before in our inlining call stack. In that case we use /// `Err::AbortRecursion` to "unwind" back to the original inlining and replace it with a call. /// /// Due to the amount of partial evaluation we support this is very eager to inline. Out-of-line /// calls should only be used in extreme cases. pub(super) fn cond_inline<'a>( ehx: &mut EvalHirCtx, fcx: &mut FunCtx<'_>, outer_b: &mut Builder, span: Span, ret_ty: &ty::Ref, arret_fun: &value::ArretFun, apply_args: ApplyArgs<'a>, ) -> Result { // We need to build an out-of-line call in every case let mut call_b = Builder::new(); let call_result = ehx.build_arret_fun_app(fcx, &mut call_b, span, ret_ty, arret_fun, &apply_args); let call_ops = call_b.into_ops(); let apply_stack = &fcx.inliner_stack; let apply_cookie = ApplyCookie::new(ehx.as_heap(), arret_fun, &apply_args.list_value); if apply_stack.entries.len() >= MAX_INLINE_DEPTH || apply_stack.entries.contains(&apply_cookie) { // Abort recursion all the way back to the original call of this function // This prevents us from doing a "partial unroll" where we recurse in to one iteration // of the fun application and then bail out to a call. This is a bit gnarly as we're // using errors for flow control but it's isolated to this function. let abort_to = apply_stack .entries .iter() .find(|apply_cookie| apply_cookie.arret_fun_id == arret_fun.id()); if let Some(abort_to) = abort_to { return Err(Error::AbortRecursion(*abort_to)); } else { // Inline limit reached; don't attempt another inline outer_b.append(call_ops.into_vec().into_iter()); return call_result; } } let mut inline_b = Some(Builder::new()); let apply_stack = apply_stack.with_apply_cookie(apply_cookie); // Figure out how much we should prefer an inline version let inline_preference_factor = calc_inline_preference_factor(arret_fun, &apply_args.list_value); // Build an inline version let inline_result = ehx.inline_arret_fun_app(fcx, &mut inline_b, span, arret_fun, apply_args, apply_stack); let inline_ops = inline_b.unwrap().into_ops(); let inline_ops = if let Ok(ref return_value) = inline_result { // In order to cost the inline function accurately we need to optimise it first optimise_inlined_fun(inline_ops, return_value) } else { inline_ops }; // Determine if calling is cheaper let call_cost = cost_for_ops(call_ops.iter()); let inline_cost = cost_for_ops(inline_ops.iter()); if ((call_cost as OpCostFactor * inline_preference_factor) as OpCost) < inline_cost { // Calling is cheaper than inlining outer_b.append(call_ops.into_vec().into_iter()); return call_result; } match inline_result { Ok(_) | Err(Error::Diverged) => { // We either succeeded or hit a divergence - use the steps outer_b.append(inline_ops.into_vec().into_iter()); inline_result } Err(Error::AbortRecursion(abort_to_cookie)) if abort_to_cookie == apply_cookie => { // We detected another application of this fun and requested recursion is aborted back // to this point outer_b.append(call_ops.into_vec().into_iter()); call_result } Err(other) => Err(other), } } ================================================ FILE: compiler/mir/intrinsic/bitwise.rs ================================================ use arret_syntax::span::Span; use arret_runtime::abitype; use crate::mir::builder::Builder; use crate::mir::error::Result; use crate::mir::eval_hir::EvalHirCtx; use crate::mir::intrinsic::num_utils::try_value_to_i64; use crate::mir::intrinsic::BuildOutcome; use crate::mir::ops::{BinaryOp, OpKind, RegId, ShiftOp}; use crate::mir::value::build_reg::value_to_reg; use crate::mir::value; use crate::mir::value::Value; fn fold_bitwise_operands( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, arg_list_value: &Value, bitwise_op: O, ) -> BuildOutcome where O: Fn(RegId, BinaryOp) -> OpKind + Copy, { let mut list_iter = if let Some(list_iter) = arg_list_value.try_sized_list_iter() { list_iter } else { return BuildOutcome::None; }; let initial_value = list_iter.next(b, span).unwrap(); let mut acc_reg = value_to_reg(ehx, b, span, &initial_value, &abitype::AbiType::Int); while let Some(next_value) = list_iter.next(b, span) { let next_reg = value_to_reg(ehx, b, span, &next_value, &abitype::AbiType::Int); acc_reg = b.push_reg( span, bitwise_op, BinaryOp { lhs_reg: acc_reg.into(), rhs_reg: next_reg.into(), }, ); } BuildOutcome::ReturnValue(value::RegValue::new(acc_reg, abitype::AbiType::Int).into()) } fn bit_shift_op( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, arg_list_value: &Value, shift_op: O, ) -> BuildOutcome where O: Fn(RegId, ShiftOp) -> OpKind + Copy, { let mut iter = arg_list_value.unsized_list_iter(); let int_value = iter.next_unchecked(b, span); let int_reg = value_to_reg(ehx, b, span, &int_value, &abitype::AbiType::Int); let bit_count_value = iter.next_unchecked(b, span); let bit_count = if let Some(bit_count) = try_value_to_i64(bit_count_value) { bit_count } else { return BuildOutcome::None; }; if !(0..=64).contains(&bit_count) { return BuildOutcome::None; } let result_reg = b.push_reg( span, shift_op, ShiftOp { int_reg: int_reg.into(), bit_count: bit_count as u32, }, ); BuildOutcome::ReturnValue(value::RegValue::new(result_reg, abitype::AbiType::Int).into()) } pub fn bit_and( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, arg_list_value: &Value, ) -> Result { use crate::mir::ops::*; Ok(fold_bitwise_operands( ehx, b, span, arg_list_value, OpKind::Int64BitwiseAnd, )) } pub fn bit_or( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, arg_list_value: &Value, ) -> Result { use crate::mir::ops::*; Ok(fold_bitwise_operands( ehx, b, span, arg_list_value, OpKind::Int64BitwiseOr, )) } pub fn bit_xor( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, arg_list_value: &Value, ) -> Result { use crate::mir::ops::*; Ok(fold_bitwise_operands( ehx, b, span, arg_list_value, OpKind::Int64BitwiseXor, )) } pub fn bit_not( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, arg_list_value: &Value, ) -> Result { use crate::mir::ops::*; let mut iter = arg_list_value.unsized_list_iter(); let int_value = iter.next_unchecked(b, span); let int_reg = value_to_reg(ehx, b, span, &int_value, &abitype::AbiType::Int); let result_reg = b.push_reg(span, OpKind::Int64BitwiseNot, int_reg.into()); Ok(BuildOutcome::ReturnValue( value::RegValue::new(result_reg, abitype::AbiType::Int).into(), )) } pub fn bit_shift_left( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, arg_list_value: &Value, ) -> Result { Ok(bit_shift_op( ehx, b, span, arg_list_value, OpKind::Int64ShiftLeft, )) } pub fn bit_shift_right( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, arg_list_value: &Value, ) -> Result { Ok(bit_shift_op( ehx, b, span, arg_list_value, OpKind::Int64ArithmeticShiftRight, )) } pub fn unsigned_bit_shift_right( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, arg_list_value: &Value, ) -> Result { Ok(bit_shift_op( ehx, b, span, arg_list_value, OpKind::Int64LogicalShiftRight, )) } ================================================ FILE: compiler/mir/intrinsic/list.rs ================================================ use arret_syntax::span::Span; use arret_runtime::boxed; use crate::mir::builder::Builder; use crate::mir::error::Result; use crate::mir::eval_hir::EvalHirCtx; use crate::mir::value::list::{list_value_len, ListValueLen}; use crate::mir::Value; pub fn length( ehx: &mut EvalHirCtx, b: &mut Option, span: Span, arg_list_value: &Value, ) -> Result> { let mut iter = arg_list_value.unsized_list_iter(); let single_arg = iter.next_unchecked(b, span); let list_len = list_value_len(&single_arg); if let ListValueLen::Exact(known_len) = list_len { return Ok(Some(boxed::Int::new(ehx, known_len as i64).into())); } if let Some(b) = b { use crate::mir::ops::*; use crate::mir::value; use crate::mir::value::build_reg::value_to_reg; use arret_runtime::abitype; let list_reg = value_to_reg( ehx, b, span, &single_arg, &abitype::TOP_LIST_BOXED_ABI_TYPE.into(), ); let list_len_reg = b.push_reg( span, OpKind::LoadBoxedListLen, LoadBoxedListLenOp { list_reg: list_reg.into(), min_list_len: list_len.lower_bound(), }, ); return Ok(Some( value::RegValue::new(list_len_reg, abitype::AbiType::Int).into(), )); } Ok(None) } pub fn cons( _ehx: &mut EvalHirCtx, b: &mut Option, span: Span, arg_list_value: &Value, ) -> Result> { let mut iter = arg_list_value.unsized_list_iter(); let head = iter.next_unchecked(b, span); let rest = iter.next_unchecked(b, span); Ok(Some(Value::List(Box::new([head]), Some(Box::new(rest))))) } pub fn repeat( _ehx: &mut EvalHirCtx, b: &mut Option, span: Span, arg_list_value: &Value, ) -> Result> { // Avoid creating giant constants at compile time const MAX_REPEAT_EVAL_LEN: i64 = 64; use crate::mir::intrinsic::num_utils::try_value_to_i64; let mut iter = arg_list_value.unsized_list_iter(); let count_value = iter.next_unchecked(b, span); let count = if let Some(count) = try_value_to_i64(count_value) { count } else { return Ok(None); }; let value = iter.next_unchecked(b, span); if count <= 0 { return Ok(Some(Value::List(Box::new([]), None))); } else if count > MAX_REPEAT_EVAL_LEN { return Ok(None); } // This lets us build a list of a known length and MIR values Ok(Some(Value::List( std::iter::repeat(value).take(count as usize).collect(), None, ))) } ================================================ FILE: compiler/mir/intrinsic/math.rs ================================================ //! Intrinsics for math operations on numbers //! //! This strives to match the behaviour of the stdlib in term of operation and conversion order. //! For multi-operand math operations we behave as if we reduce our input pairwise from left to //! right. If either pairwise operand is a `Float` then both operands are converted to `Float` and //! the result is a `Float`. If both operands are `Int`s then we perform checked math to ensure //! the value doesn't overflow its `Int` result. //! //! The input-dependent result type makes it difficult for us to build operations on values that //! aren't definite `Int` or `Float` (i.e. `Num`). Once we encounter a known `Float` we can safely //! convert every remaining operand to `Float` with at most a single branch per operand. However, if //! we encounter an unknown `Num` along with another `Num` or `Int` we don't know the result type //! of the pairwise operation. This can produce a combinatorial explosion of branches. //! //! For this reason this code will return `None` if it encounters a `Num` before a `Float`. This //! will cause us to fallback to the stdlib at runtime. //! //! This also makes no attempt at simplification or strength reduction. The presumption is LLVM is //! much better at this than we are. use arret_syntax::span::Span; use arret_runtime::abitype; use crate::mir::builder::{Builder, BuiltReg}; use crate::mir::error::Result; use crate::mir::eval_hir::EvalHirCtx; use crate::mir::intrinsic::num_utils::{num_value_to_float_reg, try_value_to_i64, NumOperand}; use crate::mir::intrinsic::BuildOutcome; use crate::mir::ops::{BinaryOp, OpKind, RegId}; use crate::mir::value; use crate::mir::value::build_reg::value_to_reg; use crate::mir::value::list::SizedListIterator; use crate::mir::value::Value; /// Folds a series of numerical operands as `Float`s /// /// This is used once we know our result will be a `Float` fn fold_float_operands( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, mut acc_float_reg: BuiltReg, mut list_iter: SizedListIterator, float_op: F, ) -> Value where F: Fn(RegId, BinaryOp) -> OpKind + Copy, { while let Some(value) = list_iter.next(b, span) { let operand_reg = num_value_to_float_reg(ehx, b, span, &value); acc_float_reg = b.push_reg( span, float_op, BinaryOp { lhs_reg: acc_float_reg.into(), rhs_reg: operand_reg.into(), }, ); } value::RegValue::new(acc_float_reg, abitype::AbiType::Float).into() } /// Folds a series of numerical operands with the given reducers for `Int` and `Float`s /// /// This is used when the precise type of the result is still unknown fn fold_num_operands( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, mut acc_int_reg: BuiltReg, mut list_iter: SizedListIterator, int64_op: I, float_op: F, ) -> BuildOutcome where I: Fn(RegId, BinaryOp) -> OpKind + Copy, F: Fn(RegId, BinaryOp) -> OpKind + Copy, { while let Some(value) = list_iter.next(b, span) { let operand = if let Some(operand) = NumOperand::try_from_value(ehx, b, span, &value) { operand } else { // Can't continue. Use the work we've done so far to simplify the // stdlib call. return BuildOutcome::SimplifiedArgs(Value::List( Box::new([ value::RegValue::new(acc_int_reg, abitype::AbiType::Int).into(), value, ]), Some(Box::new(list_iter.into_rest())), )); }; acc_int_reg = match operand { NumOperand::Int(operand_int_reg) => b.push_reg( span, int64_op, BinaryOp { lhs_reg: acc_int_reg.into(), rhs_reg: operand_int_reg.into(), }, ), NumOperand::Float(operand_float_reg) => { let int_as_float_reg = b.push_reg(span, OpKind::Int64ToFloat, acc_int_reg.into()); let result_reg = b.push_reg( span, float_op, BinaryOp { lhs_reg: int_as_float_reg.into(), rhs_reg: operand_float_reg.into(), }, ); return BuildOutcome::ReturnValue(fold_float_operands( ehx, b, span, result_reg, list_iter, float_op, )); } } } BuildOutcome::ReturnValue(value::RegValue::new(acc_int_reg, abitype::AbiType::Int).into()) } /// Reduces a series of numerical operands with the given reducer ops for `Int` and `Float`s /// /// This does not assume the reducers are associative fn reduce_operands( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, mut list_iter: SizedListIterator, int64_op: I, float_op: F, ) -> BuildOutcome where I: Fn(RegId, BinaryOp) -> OpKind + Copy, F: Fn(RegId, BinaryOp) -> OpKind + Copy, { let initial_value = list_iter.next(b, span).unwrap(); let initial_operand = if let Some(initial_operand) = NumOperand::try_from_value(ehx, b, span, &initial_value) { initial_operand } else { return BuildOutcome::None; }; match initial_operand { NumOperand::Int(int_reg) => { fold_num_operands(ehx, b, span, int_reg, list_iter, int64_op, float_op) } NumOperand::Float(float_reg) => BuildOutcome::ReturnValue(fold_float_operands( ehx, b, span, float_reg, list_iter, float_op, )), } } /// Reduces a series of numerical operands with the given reducer ops for `Int` and `Float`s /// /// This assumes the reducers are associative fn reduce_assoc_operands( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, arg_list_value: &Value, int64_op: I, float_op: F, ) -> BuildOutcome where I: Fn(RegId, BinaryOp) -> OpKind + Copy, F: Fn(RegId, BinaryOp) -> OpKind + Copy, { let mut list_iter = if let Some(list_iter) = arg_list_value.try_sized_list_iter() { list_iter } else { return BuildOutcome::None; }; if list_iter.len() == 1 { // The associative math functions (`+` and `*`) act as the identity function with 1 arg. // We check here so even if the value doesn't have a definite type it's still returned. list_iter .next(b, span) .map_or(BuildOutcome::None, BuildOutcome::ReturnValue) } else { reduce_operands(ehx, b, span, list_iter, int64_op, float_op) } } pub fn add( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, arg_list_value: &Value, ) -> Result { use crate::mir::ops::*; Ok(reduce_assoc_operands( ehx, b, span, arg_list_value, OpKind::Int64CheckedAdd, OpKind::FloatAdd, )) } pub fn mul( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, arg_list_value: &Value, ) -> Result { use crate::mir::ops::*; Ok(reduce_assoc_operands( ehx, b, span, arg_list_value, OpKind::Int64CheckedMul, OpKind::FloatMul, )) } pub fn sub( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, arg_list_value: &Value, ) -> Result { use crate::mir::ops::*; let list_iter = if let Some(list_iter) = arg_list_value.try_sized_list_iter() { list_iter } else { return Ok(BuildOutcome::None); }; if list_iter.len() == 1 { // Rewrite `(- x)` to `(- 0 x)` let int_zero_reg = b.push_reg(span, OpKind::ConstInt64, 0); Ok(fold_num_operands( ehx, b, span, int_zero_reg, list_iter, OpKind::Int64CheckedSub, OpKind::FloatSub, )) } else { Ok(reduce_operands( ehx, b, span, list_iter, OpKind::Int64CheckedSub, OpKind::FloatSub, )) } } pub fn div( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, arg_list_value: &Value, ) -> Result { use crate::mir::ops::*; let mut list_iter = if let Some(list_iter) = arg_list_value.try_sized_list_iter() { list_iter } else { return Ok(BuildOutcome::None); }; let initial_value = list_iter.next(b, span).unwrap(); let initial_reg = value_to_reg(ehx, b, span, &initial_value, &abitype::AbiType::Float); let result_reg = if list_iter.is_empty() { // Rewrite `(/ x)` to `(/ 1.0 x)` let const_one_reg = b.push_reg(span, OpKind::ConstFloat, 1.0f64); b.push_reg( span, OpKind::FloatDiv, BinaryOp { lhs_reg: const_one_reg.into(), rhs_reg: initial_reg.into(), }, ) } else { let mut acc = initial_reg; while let Some(value) = list_iter.next(b, span) { let value_reg = value_to_reg(ehx, b, span, &value, &abitype::AbiType::Float); acc = b.push_reg( span, OpKind::FloatDiv, BinaryOp { lhs_reg: acc.into(), rhs_reg: value_reg.into(), }, ); } acc }; Ok(BuildOutcome::ReturnValue( value::RegValue::new(result_reg, abitype::AbiType::Float).into(), )) } fn int_division_op( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, arg_list_value: &Value, checked_op_kind: CI, unchecked_op_kind: UI, ) -> BuildOutcome where CI: FnOnce(RegId, BinaryOp) -> OpKind, UI: FnOnce(RegId, BinaryOp) -> OpKind, { let mut iter = arg_list_value.unsized_list_iter(); let numer_value = iter.next_unchecked(b, span); let numer_reg = value_to_reg(ehx, b, span, &numer_value, &abitype::AbiType::Int); let denom_value = iter.next_unchecked(b, span); let denom_reg = value_to_reg(ehx, b, span, &denom_value, &abitype::AbiType::Int); let needs_checked = match try_value_to_i64(denom_value) { None => { // Completely unknown, we need a check true } Some(0) | Some(-1) => { // Definite divide-by-zero or possible overflow true } Some(_) => { // Don't need a check false } }; let div_binary_op = BinaryOp { lhs_reg: numer_reg.into(), rhs_reg: denom_reg.into(), }; let result_reg = if needs_checked { b.push_reg(span, checked_op_kind, div_binary_op) } else { b.push_reg(span, unchecked_op_kind, div_binary_op) }; BuildOutcome::ReturnValue(value::RegValue::new(result_reg, abitype::AbiType::Int).into()) } pub fn quot( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, arg_list_value: &Value, ) -> Result { Ok(int_division_op( ehx, b, span, arg_list_value, OpKind::Int64CheckedDiv, OpKind::Int64Div, )) } pub fn rem( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, arg_list_value: &Value, ) -> Result { Ok(int_division_op( ehx, b, span, arg_list_value, OpKind::Int64CheckedRem, OpKind::Int64Rem, )) } pub fn sqrt( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, arg_list_value: &Value, ) -> Result { let radicand_value = arg_list_value.unsized_list_iter().next_unchecked(b, span); let radicand_reg = value_to_reg(ehx, b, span, &radicand_value, &abitype::AbiType::Float); let result_reg = b.push_reg(span, OpKind::FloatSqrt, radicand_reg.into()); Ok(BuildOutcome::ReturnValue( value::RegValue::new(result_reg, abitype::AbiType::Float).into(), )) } ================================================ FILE: compiler/mir/intrinsic/mod.rs ================================================ mod bitwise; mod list; mod math; mod num_utils; mod number; mod panics; mod partial_print; mod print; mod testing; mod vector; use arret_syntax::span::Span; use crate::mir::builder::Builder; use crate::mir::error::Result; use crate::mir::eval_hir::EvalHirCtx; use crate::mir::Value; macro_rules! define_eval_intrinsics { ( $($name:expr => $handler:path),* ) => { pub fn try_eval( ehx: &mut EvalHirCtx, b: &mut Option, span: Span, intrinsic_name: &'static str, arg_list_value: &Value, ) -> Result> { match intrinsic_name { $( $name => { $handler(ehx, b, span, arg_list_value) } ),* _ => Ok(None), } } }; } pub enum BuildOutcome { None, ReturnValue(Value), SimplifiedArgs(Value), } macro_rules! define_build_intrinsics { ( $($name:expr => $handler:path),* ) => { pub fn try_build( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, intrinsic_name: &'static str, arg_list_value: &Value, ) -> Result { match intrinsic_name { $( $name => { $handler(ehx, b, span, arg_list_value) } ),* _ => Ok(BuildOutcome::None), } } }; } define_eval_intrinsics! { "length" => list::length, "cons" => list::cons, "repeat" => list::repeat, "fn-op-categories" => testing::fn_op_categories } define_build_intrinsics! { "+" => math::add, "*" => math::mul, "-" => math::sub, "/" => math::div, "quot" => math::quot, "rem" => math::rem, "sqrt" => math::sqrt, "int" => number::int, "float" => number::float, "<" => number::num_lt, "<=" => number::num_le, "==" => number::num_eq, ">" => number::num_gt, ">=" => number::num_ge, // Purity doesn't matter at the MIR level; these are both treated as impure so they're not // optimised away. "panic" => panics::panics, "panic!" => panics::panics, "print!" => print::print, "println!" => print::print, "print-str" => print::print_str, "vector-length" => vector::vector_length, "vector-ref" => vector::vector_ref, "bit-and" => bitwise::bit_and, "bit-or" => bitwise::bit_or, "bit-xor" => bitwise::bit_xor, "bit-not" => bitwise::bit_not, "bit-shift-left" => bitwise::bit_shift_left, "bit-shift-right" => bitwise::bit_shift_right, "unsigned-bit-shift-right" => bitwise::unsigned_bit_shift_right } ================================================ FILE: compiler/mir/intrinsic/num_utils.rs ================================================ use arret_syntax::span::Span; use arret_runtime::abitype; use arret_runtime::boxed; use crate::mir::builder::{Builder, BuiltReg}; use crate::mir::eval_hir::EvalHirCtx; use crate::mir::value::build_reg::value_to_reg; use crate::mir::value::types::possible_type_tags_for_value; use crate::mir::value::Value; /// Represents a numerical operand of a known type pub enum NumOperand { Int(BuiltReg), Float(BuiltReg), } impl NumOperand { /// Attempts to build numerical operand from a value /// /// If the value isn't a definite `Int` or `Float` this will return `None` pub fn try_from_value( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, value: &Value, ) -> Option { let possible_type_tags = possible_type_tags_for_value(value); if !possible_type_tags.contains(boxed::TypeTag::Float) { let int64_reg = value_to_reg(ehx, b, span, value, &abitype::AbiType::Int); Some(NumOperand::Int(int64_reg)) } else if !possible_type_tags.contains(boxed::TypeTag::Int) { let float_reg = value_to_reg(ehx, b, span, value, &abitype::AbiType::Float); Some(NumOperand::Float(float_reg)) } else { None } } } /// Converts a value of type `Num` to a float reg pub fn num_value_to_float_reg( ehx: &mut EvalHirCtx, outer_b: &mut Builder, span: Span, value: &Value, ) -> BuiltReg { use crate::mir::ops::*; let num_type_tags = [boxed::TypeTag::Int, boxed::TypeTag::Float] .iter() .collect(); let possible_type_tags = possible_type_tags_for_value(value) & num_type_tags; if possible_type_tags == boxed::TypeTag::Float.into() { value_to_reg(ehx, outer_b, span, value, &abitype::AbiType::Float) } else if possible_type_tags == boxed::TypeTag::Int.into() { let int64_reg = value_to_reg(ehx, outer_b, span, value, &abitype::AbiType::Int); outer_b.push_reg(span, OpKind::Int64ToFloat, int64_reg.into()) } else { let boxed_any_reg = value_to_reg( ehx, outer_b, span, value, &abitype::BoxedAbiType::Any.into(), ) .into(); let value_type_tag_reg = outer_b.push_reg( span, OpKind::LoadBoxedTypeTag, LoadBoxedTypeTagOp { subject_reg: boxed_any_reg, possible_type_tags: num_type_tags, }, ); let float_tag_reg = outer_b.push_reg(span, OpKind::ConstTypeTag, boxed::TypeTag::Float); let is_float_reg = outer_b.push_reg( span, OpKind::TypeTagEqual, BinaryOp { lhs_reg: value_type_tag_reg.into(), rhs_reg: float_tag_reg.into(), }, ); let mut is_float_b = Builder::new(); let is_float_result_reg = value_to_reg(ehx, &mut is_float_b, span, value, &abitype::AbiType::Float); let mut is_int_b = Builder::new(); let int64_reg = value_to_reg(ehx, &mut is_int_b, span, value, &abitype::AbiType::Int); let is_int_result_reg = is_int_b.push_reg(span, OpKind::Int64ToFloat, int64_reg.into()); let output_reg = RegId::alloc(); outer_b.push( span, OpKind::Cond(CondOp { reg_phi: Some(RegPhi { output_reg, true_result_reg: is_float_result_reg.into(), false_result_reg: is_int_result_reg.into(), }), test_reg: is_float_reg.into(), true_ops: is_float_b.into_ops(), false_ops: is_int_b.into_ops(), }), ); BuiltReg::Local(output_reg) } } /// Tries to convert a `Value` to a constant `i64` pub fn try_value_to_i64(value: Value) -> Option { match value { Value::Const(any_ref) => any_ref .downcast_ref::() .map(|int_ref| int_ref.value()), _ => None, } } ================================================ FILE: compiler/mir/intrinsic/number.rs ================================================ use arret_syntax::span::Span; use arret_runtime::abitype; use arret_runtime::boxed; use arret_runtime::boxed::prelude::*; use crate::mir::builder::{Builder, BuiltReg}; use crate::mir::error::Result; use crate::mir::eval_hir::EvalHirCtx; use crate::mir::intrinsic::num_utils::{num_value_to_float_reg, NumOperand}; use crate::mir::intrinsic::BuildOutcome; use crate::mir::ops::Comparison; use crate::mir::value; use crate::mir::value::list::SizedListIterator; use crate::mir::value::types::possible_type_tags_for_value; use crate::mir::value::Value; fn build_operand_pair_compare( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, left_value: &Value, right_value: &Value, comparison: Comparison, ) -> Option { use crate::mir::ops::*; let left_num_operand = NumOperand::try_from_value(ehx, b, span, left_value)?; let right_num_operand = NumOperand::try_from_value(ehx, b, span, right_value)?; match (left_num_operand, right_num_operand) { (NumOperand::Int(left_int_reg), NumOperand::Int(right_int_reg)) => Some(b.push_reg( span, OpKind::IntCompare, CompareOp { comparison, lhs_reg: left_int_reg.into(), rhs_reg: right_int_reg.into(), }, )), (NumOperand::Float(left_float_reg), NumOperand::Int(right_int_reg)) => { let right_float_reg = b.push_reg(span, OpKind::Int64ToFloat, right_int_reg.into()); Some(b.push_reg( span, OpKind::FloatCompare, CompareOp { comparison, lhs_reg: left_float_reg.into(), rhs_reg: right_float_reg.into(), }, )) } (NumOperand::Int(left_int_reg), NumOperand::Float(right_float_reg)) => { let left_float_reg = b.push_reg(span, OpKind::Int64ToFloat, left_int_reg.into()); Some(b.push_reg( span, OpKind::FloatCompare, CompareOp { comparison, lhs_reg: left_float_reg.into(), rhs_reg: right_float_reg.into(), }, )) } (NumOperand::Float(left_float_reg), NumOperand::Float(right_float_reg)) => { Some(b.push_reg( span, OpKind::FloatCompare, CompareOp { comparison, lhs_reg: left_float_reg.into(), rhs_reg: right_float_reg.into(), }, )) } } } fn build_operand_iter_compare( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, left_value: &Value, rest_iter: &mut SizedListIterator, comparison: Comparison, ) -> Option { use crate::mir::ops::*; let right_value = rest_iter.next(b, span).unwrap(); let compare_result_reg = build_operand_pair_compare(ehx, b, span, left_value, &right_value, comparison)?; let combined_result_reg; if rest_iter.is_empty() { // We're terminal, this is simple combined_result_reg = compare_result_reg; } else { let mut rest_b = Builder::new(); let rest_result_reg = build_operand_iter_compare( ehx, &mut rest_b, span, &right_value, rest_iter, comparison, )?; combined_result_reg = b.alloc_local(); b.push( span, OpKind::Cond(CondOp { reg_phi: Some(RegPhi { output_reg: combined_result_reg.into(), true_result_reg: rest_result_reg.into(), // This is known false false_result_reg: compare_result_reg.into(), }), test_reg: compare_result_reg.into(), true_ops: rest_b.into_ops(), false_ops: Box::new([]), }), ); }; Some(combined_result_reg) } fn compare_operand_list( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, arg_list_value: &Value, comparison: Comparison, ) -> BuildOutcome { let mut list_iter = if let Some(list_iter) = arg_list_value.try_sized_list_iter() { list_iter } else { return BuildOutcome::None; }; if list_iter.len() == 1 { return BuildOutcome::ReturnValue(boxed::TRUE_INSTANCE.as_any_ref().into()); } let left_value = list_iter.next(b, span).unwrap(); match build_operand_iter_compare(ehx, b, span, &left_value, &mut list_iter, comparison) { Some(result_reg) => BuildOutcome::ReturnValue( value::RegValue::new(result_reg, abitype::AbiType::Bool).into(), ), None => BuildOutcome::None, } } pub fn int( _ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, arg_list_value: &Value, ) -> Result { let value = arg_list_value.unsized_list_iter().next_unchecked(b, span); Ok( if possible_type_tags_for_value(&value) == boxed::TypeTag::Int.into() { BuildOutcome::ReturnValue(value) } else { BuildOutcome::None }, ) } pub fn float( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, arg_list_value: &Value, ) -> Result { let value = arg_list_value.unsized_list_iter().next_unchecked(b, span); Ok(BuildOutcome::ReturnValue( value::RegValue::new( num_value_to_float_reg(ehx, b, span, &value), abitype::AbiType::Float, ) .into(), )) } pub fn num_lt( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, arg_list_value: &Value, ) -> Result { Ok(compare_operand_list( ehx, b, span, arg_list_value, Comparison::Lt, )) } pub fn num_le( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, arg_list_value: &Value, ) -> Result { Ok(compare_operand_list( ehx, b, span, arg_list_value, Comparison::Le, )) } pub fn num_eq( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, arg_list_value: &Value, ) -> Result { Ok(compare_operand_list( ehx, b, span, arg_list_value, Comparison::Eq, )) } pub fn num_gt( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, arg_list_value: &Value, ) -> Result { Ok(compare_operand_list( ehx, b, span, arg_list_value, Comparison::Gt, )) } pub fn num_ge( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, arg_list_value: &Value, ) -> Result { Ok(compare_operand_list( ehx, b, span, arg_list_value, Comparison::Ge, )) } ================================================ FILE: compiler/mir/intrinsic/panics.rs ================================================ use arret_syntax::span::Span; use crate::mir::builder::Builder; use crate::mir::error::{Error, Result}; use crate::mir::eval_hir::EvalHirCtx; use crate::mir::intrinsic::BuildOutcome; use crate::mir::value::Value; use crate::mir::intrinsic::partial_print::{partial_pretty_print, PartialPrint}; pub fn panics( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, arg_list_value: &Value, ) -> Result { use crate::mir::ops::*; match partial_pretty_print(ehx, b, span, arg_list_value) { // Can simplify this to a single MIR opt Some(PartialPrint::Constant(message)) => { b.push(span, OpKind::Panic(message)); Err(Error::Diverged) } // Still contains variables. Simplify the arguments. Some(partial_print) => Ok(BuildOutcome::SimplifiedArgs( partial_print.into_arg_list_value(ehx), )), // Declined to partial print None => Ok(BuildOutcome::None), } } ================================================ FILE: compiler/mir/intrinsic/partial_print.rs ================================================ use arret_syntax::span::Span; use arret_runtime::boxed; use crate::mir::builder::Builder; use crate::mir::eval_hir::EvalHirCtx; use crate::mir::value::to_const::value_to_const; use crate::mir::value::types::possible_type_tags_for_value; use crate::mir::value::Value; pub enum PartialPrint { Constant(String), SimplifiedArgs(Box<[Value]>), } impl PartialPrint { /// Converts the partial print to an arg list suitable for passing to a stdlib print function pub fn into_arg_list_value(self, ehx: &mut EvalHirCtx) -> Value { match self { PartialPrint::Constant(string) => { Value::List(Box::new([boxed::Str::new(ehx, &string).into()]), None) } PartialPrint::SimplifiedArgs(args) => Value::List(args, None), } } } /// Partially pretty prints an arg list value /// /// If partial printing isn't possible or isn't an improvement over the original arg list, `None` /// will be returned. pub fn partial_pretty_print( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, arg_list_value: &Value, ) -> Option { let mut list_iter = arg_list_value.try_sized_list_iter()?; let original_arg_count = list_iter.len(); if original_arg_count < 2 { // Nothing we can do to simplify this further return None; } // Accumulates our string literal let mut literal_acc = String::new(); let mut simplified_args: Vec = vec![]; while let Some(value) = list_iter.next(b, span) { // Check early for function values // This is to avoid doing a full JIT just to print `#fn` and to allow // printing non-constant function values. if possible_type_tags_for_value(&value) == boxed::TypeTag::FunThunk.into() { literal_acc.push_str("#fn"); continue; } match value_to_const(ehx, &value) { Some(boxed) => { let mut output: Vec = vec![]; arret_runtime_syntax::writer::pretty_print_boxed(&mut output, ehx, boxed); literal_acc.push_str( std::str::from_utf8(&output) .expect("pretty printed invalid UTF-8 during partial print"), ); } None => { if !literal_acc.is_empty() { simplified_args.push(boxed::Str::new(ehx, &literal_acc).into()); literal_acc.clear(); } simplified_args.push(value); } }; } if simplified_args.is_empty() { Some(PartialPrint::Constant(literal_acc)) } else { // Push on the end of the accumulator if !literal_acc.is_empty() { simplified_args.push(boxed::Str::new(ehx, &literal_acc).into()); } if simplified_args.len() < original_arg_count { Some(PartialPrint::SimplifiedArgs( simplified_args.into_boxed_slice(), )) } else { // Didn't improve anything None } } } ================================================ FILE: compiler/mir/intrinsic/print.rs ================================================ use arret_syntax::span::Span; use arret_runtime::boxed; use crate::mir::builder::Builder; use crate::mir::error::Result; use crate::mir::eval_hir::EvalHirCtx; use crate::mir::intrinsic::BuildOutcome; use crate::mir::value::Value; use crate::mir::intrinsic::partial_print::{partial_pretty_print, PartialPrint}; // `print!` & `println!` pub fn print( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, arg_list_value: &Value, ) -> Result { match partial_pretty_print(ehx, b, span, arg_list_value) { Some(partial_print) => Ok(BuildOutcome::SimplifiedArgs( partial_print.into_arg_list_value(ehx), )), None => Ok(BuildOutcome::None), } } // `print-str` pub fn print_str( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, arg_list_value: &Value, ) -> Result { match partial_pretty_print(ehx, b, span, arg_list_value) { Some(PartialPrint::Constant(literal_str)) => Ok(BuildOutcome::ReturnValue( boxed::Str::new(ehx, &literal_str).into(), )), Some(partial_print @ PartialPrint::SimplifiedArgs(_)) => Ok(BuildOutcome::SimplifiedArgs( partial_print.into_arg_list_value(ehx), )), None => Ok(BuildOutcome::None), } } ================================================ FILE: compiler/mir/intrinsic/testing.rs ================================================ use std::collections::BTreeSet; use arret_syntax::span::Span; use arret_runtime::boxed; use crate::mir::builder::Builder; use crate::mir::error; use crate::mir::error::{Error, Result}; use crate::mir::eval_hir::EvalHirCtx; use crate::mir::ops; use crate::mir::polymorph::PolymorphAbi; use crate::mir::value; use crate::mir::value::Value; fn ideal_polymorph_abi_for_arret_fun(arret_fun: &value::ArretFun) -> PolymorphAbi { use crate::hir::destruc::poly_for_list_destruc; use crate::mir::polymorph::polymorph_abi_for_list_ty; let has_env = !arret_fun.env_values().free_values.is_empty(); let fun_expr = arret_fun.fun_expr(); let param_list_type = poly_for_list_destruc(&fun_expr.params); polymorph_abi_for_list_ty(has_env, ¶m_list_type, &fun_expr.ret_ty) } fn add_ops_categories<'a>( categories: &mut BTreeSet, ops: impl IntoIterator, ) { for op in ops { categories.insert(op.kind().category()); if let ops::OpKind::Cond(cond_op) = op.kind() { add_ops_categories(categories, cond_op.true_ops.iter()); add_ops_categories(categories, cond_op.false_ops.iter()); } } } fn op_category_to_string(category: ops::OpCategory) -> &'static str { use crate::mir::ops::OpCategory; match category { OpCategory::AllocBoxed => ":alloc-boxed", OpCategory::Call => ":call", OpCategory::ConstBox => ":const-box", OpCategory::ConstCastBoxed => ":const-cast-box", OpCategory::ConstReg => ":const-reg", OpCategory::Cond => ":cond", OpCategory::MakeCallback => ":make-callback", OpCategory::MemLoad => ":mem-load", OpCategory::CastBoxed => ":cast-boxed", OpCategory::RegOp => ":reg-op", OpCategory::Ret => ":ret", OpCategory::Unreachable => ":unreachable", } } pub fn fn_op_categories( ehx: &mut EvalHirCtx, b: &mut Option, span: Span, arg_list_value: &Value, ) -> Result> { let mut iter = arg_list_value.unsized_list_iter(); let single_arg = iter.next_unchecked(b, span); let arret_fun = if let Value::ArretFun(arret_fun) = single_arg { arret_fun } else { return Err(Error::Panic(error::Panic::new( span, "argument must be an Arret function".to_owned(), ))); }; let ideal_polymorph_abi = ideal_polymorph_abi_for_arret_fun(&arret_fun); let ops_fun = ehx.ops_for_arret_fun(&arret_fun, ideal_polymorph_abi); let mut categories = BTreeSet::::new(); add_ops_categories(&mut categories, ops_fun.ops.iter()); let category_list = boxed::List::from_values( ehx, categories.into_iter().map(op_category_to_string), boxed::Sym::new, ); Ok(Some(category_list.into())) } ================================================ FILE: compiler/mir/intrinsic/vector.rs ================================================ use arret_syntax::span::Span; use arret_runtime::boxed; use crate::mir::builder::Builder; use crate::mir::error::Result; use crate::mir::eval_hir::EvalHirCtx; use crate::mir::intrinsic::num_utils::try_value_to_i64; use crate::mir::intrinsic::BuildOutcome; use crate::mir::value::types::known_vector_len_for_value; use crate::mir::Value; pub fn vector_length( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, arg_list_value: &Value, ) -> Result { use crate::mir::ops::*; use crate::mir::value; use crate::mir::value::build_reg::value_to_reg; use arret_runtime::abitype; let mut iter = arg_list_value.unsized_list_iter(); let vector_value = iter.next_unchecked(b, span); if let Some(known_len) = known_vector_len_for_value(&vector_value) { return Ok(BuildOutcome::ReturnValue( boxed::Int::new(ehx, known_len as i64).into(), )); } let vector_reg = value_to_reg( ehx, b, span, &vector_value, &abitype::BoxedAbiType::Vector(&abitype::BoxedAbiType::Any).into(), ) .into(); let vector_len_reg = b.push_reg(span, OpKind::LoadBoxedVectorLen, vector_reg); Ok(BuildOutcome::ReturnValue( value::RegValue::new(vector_len_reg, abitype::AbiType::Int).into(), )) } pub fn vector_ref( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, arg_list_value: &Value, ) -> Result { use crate::mir::vector_member; let mut iter = arg_list_value.unsized_list_iter(); let vector_value = iter.next_unchecked(b, span); let index_value = iter.next_unchecked(b, span); let known_len = if let Some(known_len) = known_vector_len_for_value(&vector_value) { known_len } else { return Ok(BuildOutcome::None); }; let index = if let Some(index) = try_value_to_i64(index_value) { index as usize } else { return Ok(BuildOutcome::None); }; Ok(BuildOutcome::ReturnValue( vector_member::load_vector_member(ehx, b, span, known_len, &vector_value, index), )) } ================================================ FILE: compiler/mir/mod.rs ================================================ mod app_purity; mod arg_list; mod builder; mod costing; mod env_values; mod equality; pub mod error; pub mod eval_hir; mod inliner; mod intrinsic; pub mod ops; mod optimise; mod polymorph; pub mod printer; mod record_field; mod ret_value; mod rust_fun; mod specific_abi_type; mod tagset; mod typred; mod value; mod vector_member; pub use eval_hir::BuiltProgram; pub use printer::print_program; pub use value::Value; use crate::hir; type Expr = hir::Expr; ================================================ FILE: compiler/mir/ops.rs ================================================ use std::rc::Rc; use arret_runtime::abitype; use arret_runtime::boxed; use arret_syntax::datum::DataStr; use arret_syntax::span::Span; use crate::codegen::GenAbi; use crate::id_type::ArcId; use crate::mir::tagset::TypeTagSet; new_counting_id_type!(PrivateFunIdCounter, PrivateFunId); new_global_id_type!(RegId); #[derive(Debug, PartialEq, Eq, Hash, Clone)] pub enum CallConv { /// C calling convention /// /// This is required for thunks and callbacks because they can be called from Rust. Ccc, /// Fast calling convention /// /// This supports tail recursion. FastCc, } #[derive(Debug, PartialEq, Eq, Hash, Clone)] pub struct OpsAbi { pub call_conv: CallConv, pub params: Box<[abitype::AbiType]>, pub ret: abitype::RetAbiType, } impl OpsAbi { pub fn thunk_abi() -> OpsAbi { OpsAbi { call_conv: CallConv::Ccc, params: Box::new([ // Captures abitype::BoxedAbiType::Any.into(), // Rest argument abitype::TOP_LIST_BOXED_ABI_TYPE.into(), ]), ret: abitype::BoxedAbiType::Any.into(), } } } #[derive(Debug, PartialEq, Clone)] pub struct StaticSymbol { pub symbol: &'static str, pub impure: bool, pub abi: GenAbi, } /// Represents a callable function /// /// This is used instead of a `RegId` to make the ABI of the called function obvious for codegen's /// analysis passes. #[derive(Debug, PartialEq, Clone)] pub enum Callee { PrivateFun(PrivateFunId), BoxedFunThunk(RegId), StaticSymbol(StaticSymbol), } /// Represents a structure tagged with a class ID /// /// This is the MIR analog of [`record::Cons`](crate::ty::record::Cons) #[derive(Debug, PartialEq, Clone)] pub struct RecordStruct { pub source_name: DataStr, pub field_abi_types: Box<[abitype::AbiType]>, } pub type RecordStructId = ArcId; impl RecordStruct { pub fn new(source_name: DataStr, field_abi_types: Box<[abitype::AbiType]>) -> RecordStructId { ArcId::new(RecordStruct { source_name, field_abi_types, }) } } #[derive(Debug, PartialEq, Clone)] pub struct CallOp { pub callee: Callee, pub impure: bool, pub args: Box<[RegId]>, } #[derive(Debug, PartialEq, Clone)] pub struct TailCallOp { pub impure: bool, pub args: Box<[RegId]>, } #[derive(Debug, PartialEq, Clone)] pub struct BoxPairOp { pub head_reg: RegId, pub rest_reg: RegId, pub list_len_reg: RegId, } #[derive(Debug, PartialEq, Clone)] pub struct BoxFunThunkOp { pub captures_reg: RegId, pub callee: Callee, } #[derive(Debug, PartialEq, Clone)] pub struct BoxRecordOp { pub record_struct: RecordStructId, pub field_regs: Box<[RegId]>, } #[derive(Debug, PartialEq, Clone)] pub struct LoadBoxedRecordFieldOp { pub record_reg: RegId, pub record_struct: RecordStructId, pub field_index: usize, } #[derive(Debug, PartialEq, Clone)] pub struct LoadBoxedVectorMemberOp { pub vector_reg: RegId, /// Exact known length of the vector we're loading from pub known_vector_len: usize, pub member_index: usize, } #[derive(Debug, PartialEq, Clone)] pub struct CastBoxedOp { pub from_reg: RegId, pub to_type: abitype::BoxedAbiType, } #[derive(Debug, PartialEq, Clone)] pub struct RegPhi { pub output_reg: RegId, pub true_result_reg: RegId, pub false_result_reg: RegId, } #[derive(Debug, PartialEq, Clone)] pub struct CondOp { pub reg_phi: Option, pub test_reg: RegId, pub true_ops: Box<[Op]>, pub false_ops: Box<[Op]>, } #[derive(Debug, PartialEq, Clone)] pub struct BinaryOp { pub lhs_reg: RegId, pub rhs_reg: RegId, } #[derive(Debug, PartialEq, Clone)] pub struct CompareOp { pub comparison: Comparison, pub lhs_reg: RegId, pub rhs_reg: RegId, } #[derive(Debug, PartialEq, Clone)] pub struct LoadBoxedTypeTagOp { pub subject_reg: RegId, pub possible_type_tags: TypeTagSet, } #[derive(Debug, PartialEq, Clone)] pub struct LoadBoxedListLenOp { pub list_reg: RegId, pub min_list_len: usize, } #[derive(Debug, PartialEq, Clone)] pub struct MakeCallbackOp { pub captures_reg: RegId, pub callee: Callee, } #[derive(Debug, PartialEq, Clone)] pub struct ShiftOp { pub int_reg: RegId, pub bit_count: u32, } #[derive(Debug, PartialEq, Clone, Copy)] pub enum Comparison { Lt, Le, Eq, Gt, Ge, } #[derive(Debug, PartialEq, Clone)] pub enum OpKind { ConstInt64(RegId, i64), ConstFloat(RegId, f64), ConstChar(RegId, char), ConstBool(RegId, bool), ConstInternedSym(RegId, Rc), ConstTypeTag(RegId, boxed::TypeTag), ConstRecordClassId(RegId, RecordStructId), ConstBoxedNil(RegId, ()), ConstBoxedTrue(RegId, ()), ConstBoxedFalse(RegId, ()), ConstBoxedInt(RegId, i64), ConstBoxedFloat(RegId, f64), ConstBoxedChar(RegId, char), ConstBoxedStr(RegId, Box), ConstBoxedSym(RegId, Rc), ConstBoxedPair(RegId, BoxPairOp), ConstBoxedFunThunk(RegId, BoxFunThunkOp), ConstBoxedVector(RegId, Box<[RegId]>), ConstBoxedSet(RegId, Box<[RegId]>), ConstBoxedMap(RegId, Box<[(RegId, RegId)]>), AllocBoxedInt(RegId, RegId), AllocBoxedFloat(RegId, RegId), AllocBoxedChar(RegId, RegId), AllocBoxedSym(RegId, RegId), AllocBoxedPair(RegId, BoxPairOp), AllocBoxedFunThunk(RegId, BoxFunThunkOp), ConstCastBoxed(RegId, CastBoxedOp), CastBoxed(RegId, CastBoxedOp), Alias(RegId, RegId), // TODO: This is a hack for `duplicate_alloc_ops` Call(RegId, CallOp), TailCall(RegId, TailCallOp), LoadBoxedTypeTag(RegId, LoadBoxedTypeTagOp), LoadBoxedListLen(RegId, LoadBoxedListLenOp), LoadBoxedPairHead(RegId, RegId), LoadBoxedPairRest(RegId, RegId), LoadBoxedIntValue(RegId, RegId), LoadBoxedFloatValue(RegId, RegId), LoadBoxedCharValue(RegId, RegId), LoadBoxedSymInterned(RegId, RegId), LoadBoxedFunThunkCaptures(RegId, RegId), LoadBoxedRecordClassId(RegId, RegId), LoadBoxedVectorLen(RegId, RegId), LoadBoxedVectorMember(RegId, LoadBoxedVectorMemberOp), Cond(CondOp), MakeCallback(RegId, MakeCallbackOp), BoolEqual(RegId, BinaryOp), CharEqual(RegId, BinaryOp), InternedSymEqual(RegId, BinaryOp), TypeTagEqual(RegId, BinaryOp), RecordClassIdEqual(RegId, BinaryOp), BoxIdentical(RegId, BinaryOp), Int64ToFloat(RegId, RegId), IntCompare(RegId, CompareOp), FloatCompare(RegId, CompareOp), FloatAdd(RegId, BinaryOp), Int64Add(RegId, BinaryOp), Int64CheckedAdd(RegId, BinaryOp), FloatMul(RegId, BinaryOp), Int64CheckedMul(RegId, BinaryOp), FloatSub(RegId, BinaryOp), Int64CheckedSub(RegId, BinaryOp), FloatDiv(RegId, BinaryOp), Int64Div(RegId, BinaryOp), Int64CheckedDiv(RegId, BinaryOp), Int64Rem(RegId, BinaryOp), Int64CheckedRem(RegId, BinaryOp), FloatSqrt(RegId, RegId), Int64BitwiseAnd(RegId, BinaryOp), Int64BitwiseOr(RegId, BinaryOp), Int64BitwiseXor(RegId, BinaryOp), Int64BitwiseNot(RegId, RegId), Int64ShiftLeft(RegId, ShiftOp), Int64LogicalShiftRight(RegId, ShiftOp), Int64ArithmeticShiftRight(RegId, ShiftOp), ConstBoxedRecord(RegId, BoxRecordOp), AllocBoxedRecord(RegId, BoxRecordOp), LoadBoxedRecordField(RegId, LoadBoxedRecordFieldOp), Ret(RegId), RetVoid, Unreachable, Panic(String), } /// Indicates the high-level category of an op #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy)] pub enum OpCategory { ConstReg, ConstBox, ConstCastBoxed, AllocBoxed, Call, Cond, MemLoad, CastBoxed, RegOp, MakeCallback, Ret, Unreachable, } impl OpKind { pub fn output_reg(&self) -> Option { use crate::mir::ops::OpKind::*; match self { ConstBoxedNil(reg_id, _) | ConstBoxedTrue(reg_id, _) | ConstBoxedFalse(reg_id, _) | ConstInt64(reg_id, _) | ConstFloat(reg_id, _) | ConstChar(reg_id, _) | ConstBool(reg_id, _) | ConstInternedSym(reg_id, _) | ConstTypeTag(reg_id, _) | ConstBoxedInt(reg_id, _) | ConstBoxedFloat(reg_id, _) | ConstBoxedChar(reg_id, _) | ConstBoxedStr(reg_id, _) | ConstBoxedSym(reg_id, _) | ConstBoxedPair(reg_id, _) | ConstBoxedFunThunk(reg_id, _) | ConstBoxedVector(reg_id, _) | ConstBoxedSet(reg_id, _) | ConstBoxedMap(reg_id, _) | ConstRecordClassId(reg_id, _) | AllocBoxedInt(reg_id, _) | AllocBoxedFloat(reg_id, _) | AllocBoxedChar(reg_id, _) | AllocBoxedSym(reg_id, _) | AllocBoxedPair(reg_id, _) | AllocBoxedFunThunk(reg_id, _) | ConstCastBoxed(reg_id, _) | CastBoxed(reg_id, _) | Alias(reg_id, _) | Call(reg_id, _) | TailCall(reg_id, _) | LoadBoxedTypeTag(reg_id, _) | LoadBoxedListLen(reg_id, _) | LoadBoxedPairHead(reg_id, _) | LoadBoxedPairRest(reg_id, _) | LoadBoxedIntValue(reg_id, _) | LoadBoxedSymInterned(reg_id, _) | LoadBoxedFloatValue(reg_id, _) | LoadBoxedCharValue(reg_id, _) | LoadBoxedFunThunkCaptures(reg_id, _) | LoadBoxedRecordClassId(reg_id, _) | LoadBoxedRecordField(reg_id, _) | LoadBoxedVectorLen(reg_id, _) | LoadBoxedVectorMember(reg_id, _) | FloatAdd(reg_id, _) | Int64Add(reg_id, _) | Int64CheckedAdd(reg_id, _) | FloatMul(reg_id, _) | Int64CheckedMul(reg_id, _) | FloatSub(reg_id, _) | Int64CheckedSub(reg_id, _) | FloatDiv(reg_id, _) | Int64Div(reg_id, _) | Int64CheckedDiv(reg_id, _) | Int64Rem(reg_id, _) | Int64CheckedRem(reg_id, _) | FloatSqrt(reg_id, _) | Int64BitwiseAnd(reg_id, _) | Int64BitwiseOr(reg_id, _) | Int64BitwiseXor(reg_id, _) | Int64BitwiseNot(reg_id, _) | Int64ShiftLeft(reg_id, _) | Int64ArithmeticShiftRight(reg_id, _) | Int64LogicalShiftRight(reg_id, _) | IntCompare(reg_id, _) | BoolEqual(reg_id, _) | CharEqual(reg_id, _) | InternedSymEqual(reg_id, _) | TypeTagEqual(reg_id, _) | RecordClassIdEqual(reg_id, _) | FloatCompare(reg_id, _) | BoxIdentical(reg_id, _) | Int64ToFloat(reg_id, _) | MakeCallback(reg_id, _) | ConstBoxedRecord(reg_id, _) | AllocBoxedRecord(reg_id, _) => Some(*reg_id), Cond(cond_op) => cond_op.reg_phi.clone().map(|reg_phi| reg_phi.output_reg), Ret(_) | RetVoid | Unreachable | Panic(_) => None, } } pub fn add_input_regs(&self, coll: &mut impl Extend) { use crate::mir::ops::OpKind::*; use std::iter; match self { ConstBoxedNil(_, _) | ConstBoxedTrue(_, _) | ConstBoxedFalse(_, _) | ConstInt64(_, _) | ConstFloat(_, _) | ConstChar(_, _) | ConstBool(_, _) | ConstInternedSym(_, _) | ConstTypeTag(_, _) | ConstRecordClassId(_, _) | ConstBoxedInt(_, _) | ConstBoxedFloat(_, _) | ConstBoxedChar(_, _) | ConstBoxedStr(_, _) | ConstBoxedSym(_, _) | RetVoid | Unreachable | Panic(_) => {} ConstBoxedPair(_, box_pair_op) | AllocBoxedPair(_, box_pair_op) => { coll.extend( [ box_pair_op.list_len_reg, box_pair_op.head_reg, box_pair_op.rest_reg, ] .iter() .cloned(), ); } ConstBoxedFunThunk(_, box_fun_thunk_op) | AllocBoxedFunThunk(_, box_fun_thunk_op) => { coll.extend(iter::once(box_fun_thunk_op.captures_reg)); } ConstBoxedVector(_, element_regs) | ConstBoxedSet(_, element_regs) => { coll.extend(element_regs.iter().copied()) } ConstBoxedMap(_, entry_regs) => { coll.extend( entry_regs .iter() .map(|(key, _)| *key) .chain(entry_regs.iter().map(|(_, value)| *value)), ); } AllocBoxedInt(_, reg_id) | AllocBoxedFloat(_, reg_id) | AllocBoxedChar(_, reg_id) | AllocBoxedSym(_, reg_id) | ConstCastBoxed( _, CastBoxedOp { from_reg: reg_id, .. }, ) | CastBoxed( _, CastBoxedOp { from_reg: reg_id, .. }, ) | Alias(_, reg_id) | Ret(reg_id) | LoadBoxedTypeTag( _, LoadBoxedTypeTagOp { subject_reg: reg_id, .. }, ) | LoadBoxedListLen( _, LoadBoxedListLenOp { list_reg: reg_id, .. }, ) | LoadBoxedPairHead(_, reg_id) | LoadBoxedPairRest(_, reg_id) | LoadBoxedIntValue(_, reg_id) | LoadBoxedFloatValue(_, reg_id) | LoadBoxedCharValue(_, reg_id) | LoadBoxedSymInterned(_, reg_id) | LoadBoxedFunThunkCaptures(_, reg_id) | LoadBoxedRecordClassId(_, reg_id) | LoadBoxedRecordField( _, LoadBoxedRecordFieldOp { record_reg: reg_id, .. }, ) | LoadBoxedVectorLen(_, reg_id) | LoadBoxedVectorMember( _, LoadBoxedVectorMemberOp { vector_reg: reg_id, .. }, ) | Int64ToFloat(_, reg_id) | FloatSqrt(_, reg_id) | Int64BitwiseNot(_, reg_id) | Int64ShiftLeft( _, ShiftOp { int_reg: reg_id, .. }, ) | Int64ArithmeticShiftRight( _, ShiftOp { int_reg: reg_id, .. }, ) | Int64LogicalShiftRight( _, ShiftOp { int_reg: reg_id, .. }, ) | MakeCallback( _, MakeCallbackOp { captures_reg: reg_id, .. }, ) => { coll.extend(iter::once(*reg_id)); } Call(_, CallOp { args, .. }) | TailCall(_, TailCallOp { args, .. }) => { coll.extend(args.iter().cloned()); } Cond(cond_op) => { coll.extend(iter::once(cond_op.test_reg)); if let Some(reg_phi) = &cond_op.reg_phi { coll.extend( [reg_phi.true_result_reg, reg_phi.false_result_reg] .iter() .cloned(), ); } for op in cond_op.true_ops.iter().chain(cond_op.false_ops.iter()) { op.kind().add_input_regs(coll); } } FloatAdd(_, binary_op) | Int64Add(_, binary_op) | Int64CheckedAdd(_, binary_op) | FloatMul(_, binary_op) | Int64CheckedMul(_, binary_op) | FloatSub(_, binary_op) | Int64CheckedSub(_, binary_op) | FloatDiv(_, binary_op) | Int64Div(_, binary_op) | Int64CheckedDiv(_, binary_op) | Int64Rem(_, binary_op) | Int64CheckedRem(_, binary_op) | Int64BitwiseAnd(_, binary_op) | Int64BitwiseOr(_, binary_op) | Int64BitwiseXor(_, binary_op) | BoolEqual(_, binary_op) | CharEqual(_, binary_op) | InternedSymEqual(_, binary_op) | TypeTagEqual(_, binary_op) | RecordClassIdEqual(_, binary_op) | BoxIdentical(_, binary_op) => { coll.extend([binary_op.lhs_reg, binary_op.rhs_reg].iter().cloned()); } IntCompare(_, compare_op) | FloatCompare(_, compare_op) => { coll.extend([compare_op.lhs_reg, compare_op.rhs_reg].iter().cloned()); } ConstBoxedRecord(_, box_record_op) | AllocBoxedRecord(_, box_record_op) => { coll.extend(box_record_op.field_regs.iter().cloned()); } } } /// Indicates if the output of this op is a constant pub fn const_output(&self) -> bool { [ OpCategory::ConstBox, OpCategory::ConstReg, OpCategory::ConstCastBoxed, ] .contains(&self.category()) } pub fn has_side_effects(&self) -> bool { use crate::mir::ops::OpKind::*; match self { Ret(_) | RetVoid | Unreachable | Panic(_) => true, Call(_, CallOp { impure, .. }) | TailCall(_, TailCallOp { impure, .. }) => *impure, Cond(cond_op) => cond_op .true_ops .iter() .chain(cond_op.false_ops.iter()) .any(|op| op.kind().has_side_effects()), _ => false, } } pub fn is_terminator(&self) -> bool { let category = self.category(); category == OpCategory::Ret || category == OpCategory::Unreachable } pub fn category(&self) -> OpCategory { use crate::mir::ops::OpKind::*; match self { ConstInt64(_, _) | ConstFloat(_, _) | ConstChar(_, _) | ConstBool(_, _) | ConstInternedSym(_, _) | ConstTypeTag(_, _) | ConstRecordClassId(_, _) => OpCategory::ConstReg, ConstBoxedNil(_, _) | ConstBoxedTrue(_, _) | ConstBoxedFalse(_, _) | ConstBoxedInt(_, _) | ConstBoxedFloat(_, _) | ConstBoxedChar(_, _) | ConstBoxedStr(_, _) | ConstBoxedSym(_, _) | ConstBoxedPair(_, _) | ConstBoxedFunThunk(_, _) | ConstBoxedRecord(_, _) | ConstBoxedVector(_, _) | ConstBoxedSet(_, _) | ConstBoxedMap(_, _) => OpCategory::ConstBox, AllocBoxedInt(_, _) | AllocBoxedFloat(_, _) | AllocBoxedChar(_, _) | AllocBoxedSym(_, _) | AllocBoxedPair(_, _) | AllocBoxedFunThunk(_, _) | AllocBoxedRecord(_, _) => OpCategory::AllocBoxed, LoadBoxedTypeTag(_, _) | LoadBoxedListLen(_, _) | LoadBoxedPairHead(_, _) | LoadBoxedPairRest(_, _) | LoadBoxedIntValue(_, _) | LoadBoxedFloatValue(_, _) | LoadBoxedCharValue(_, _) | LoadBoxedSymInterned(_, _) | LoadBoxedFunThunkCaptures(_, _) | LoadBoxedRecordClassId(_, _) | LoadBoxedRecordField(_, _) | LoadBoxedVectorLen(_, _) | LoadBoxedVectorMember(_, _) => OpCategory::MemLoad, FloatAdd(_, _) | Int64Add(_, _) | Int64CheckedAdd(_, _) | FloatSub(_, _) | Int64CheckedSub(_, _) | FloatMul(_, _) | Int64CheckedMul(_, _) | FloatDiv(_, _) | Int64Div(_, _) | Int64CheckedDiv(_, _) | Int64Rem(_, _) | Int64CheckedRem(_, _) | FloatSqrt(_, _) | IntCompare(_, _) | BoolEqual(_, _) | CharEqual(_, _) | InternedSymEqual(_, _) | TypeTagEqual(_, _) | RecordClassIdEqual(_, _) | FloatCompare(_, _) | BoxIdentical(_, _) | Int64ToFloat(_, _) | Int64BitwiseAnd(_, _) | Int64BitwiseOr(_, _) | Int64BitwiseXor(_, _) | Int64BitwiseNot(_, _) | Int64ShiftLeft(_, _) | Int64ArithmeticShiftRight(_, _) | Int64LogicalShiftRight(_, _) => OpCategory::RegOp, Ret(_) | RetVoid => OpCategory::Ret, Cond(_) => OpCategory::Cond, MakeCallback(_, _) => OpCategory::MakeCallback, ConstCastBoxed(_, _) => OpCategory::ConstCastBoxed, CastBoxed(_, _) | Alias(_, _) => OpCategory::CastBoxed, Call(_, _) | TailCall(_, _) => OpCategory::Call, Unreachable | Panic(_) => OpCategory::Unreachable, } } } #[derive(Debug, PartialEq, Clone)] pub struct Op { pub span: Span, pub kind: OpKind, } impl Op { pub fn new(span: Span, kind: OpKind) -> Op { Op { span, kind } } pub fn span(&self) -> Span { self.span } pub fn kind(&self) -> &OpKind { &self.kind } } pub struct Fun { pub span: Span, pub source_name: Option, pub abi: OpsAbi, pub param_regs: Box<[RegId]>, pub ops: Box<[Op]>, } #[cfg(test)] mod test { use super::*; use std::collections::HashSet; impl From for Op { fn from(op_kind: OpKind) -> Self { Op::new(crate::source::EMPTY_SPAN, op_kind) } } #[test] fn output_reg() { let reg1 = RegId::alloc(); assert_eq!(None, OpKind::RetVoid.output_reg()); assert_eq!(None, OpKind::Ret(reg1).output_reg()); assert_eq!(Some(reg1), OpKind::ConstInt64(reg1, 14).output_reg()); } #[test] fn has_side_effects() { let reg1 = RegId::alloc(); assert!(OpKind::RetVoid.has_side_effects()); assert!(!OpKind::ConstInt64(reg1, 14).has_side_effects()); let cond_op_with_no_side_effects = CondOp { reg_phi: None, test_reg: reg1, true_ops: Box::new([]), false_ops: Box::new([]), }; assert!(!OpKind::Cond(cond_op_with_no_side_effects).has_side_effects()); let cond_op_with_true_side_effects = CondOp { reg_phi: None, test_reg: reg1, true_ops: Box::new([OpKind::RetVoid.into()]), false_ops: Box::new([]), }; assert!(OpKind::Cond(cond_op_with_true_side_effects).has_side_effects()); let cond_op_with_false_side_effects = CondOp { reg_phi: None, test_reg: reg1, true_ops: Box::new([]), false_ops: Box::new([OpKind::RetVoid.into()]), }; assert!(OpKind::Cond(cond_op_with_false_side_effects).has_side_effects()); } #[test] fn const_output() { assert!(OpKind::ConstBool(RegId::alloc(), true).const_output()); assert!(OpKind::ConstBoxedFalse(RegId::alloc(), ()).const_output()); assert!(OpKind::ConstCastBoxed( RegId::alloc(), CastBoxedOp { from_reg: RegId::alloc(), to_type: abitype::BoxedAbiType::Any } ) .const_output()); assert!(!OpKind::AllocBoxedInt(RegId::alloc(), RegId::alloc()).const_output()); assert!(!OpKind::LoadBoxedListLen( RegId::alloc(), LoadBoxedListLenOp { list_reg: RegId::alloc(), min_list_len: 0 } ) .const_output()); } #[test] fn ret_input_regs() { let mut used_regs = HashSet::::new(); let ret_reg = RegId::alloc(); OpKind::Ret(ret_reg).add_input_regs(&mut used_regs); assert_eq!(1, used_regs.len()); assert!(used_regs.contains(&ret_reg)); } #[test] fn cond_input_regs() { let mut used_regs = HashSet::::new(); let output_reg = RegId::alloc(); let test_reg = RegId::alloc(); let true_input_reg = RegId::alloc(); let true_result_reg = RegId::alloc(); let false_input_reg = RegId::alloc(); let false_result_reg = RegId::alloc(); let cond_op = CondOp { reg_phi: Some(RegPhi { output_reg, true_result_reg, false_result_reg, }), test_reg, true_ops: Box::new([OpKind::Ret(true_input_reg).into()]), false_ops: Box::new([OpKind::Ret(false_input_reg).into()]), }; OpKind::Cond(cond_op).add_input_regs(&mut used_regs); for used_reg in &[ test_reg, true_input_reg, true_result_reg, false_input_reg, false_result_reg, ] { assert!(used_regs.contains(used_reg)); } assert!(!used_regs.contains(&output_reg)); } } ================================================ FILE: compiler/mir/optimise/duplicate_alloc_ops.rs ================================================ use std::collections::HashMap; use crate::mir::ops; fn visit_simple_alloc_op_kind( op_kind: &mut ops::OpKind, boxed_reg: ops::RegId, native_reg: ops::RegId, native_to_boxed: &mut HashMap, ) { use std::collections::hash_map::Entry; match native_to_boxed.entry(native_reg) { Entry::Occupied(existing_output) => { *op_kind = ops::OpKind::Alias(boxed_reg, *existing_output.get()) } Entry::Vacant(vacant_entry) => { vacant_entry.insert(boxed_reg); } } } fn remove_branch_redundant_alloc_ops( ops: &mut [ops::Op], // We can use a single `HashMap` because simple alloc ops all take distinct native reg types native_to_boxed: &mut HashMap, ) { for op in ops.iter_mut() { match op.kind { ops::OpKind::AllocBoxedInt(boxed_reg, native_reg) | ops::OpKind::AllocBoxedFloat(boxed_reg, native_reg) | ops::OpKind::AllocBoxedChar(boxed_reg, native_reg) | ops::OpKind::AllocBoxedSym(boxed_reg, native_reg) => { visit_simple_alloc_op_kind(&mut op.kind, boxed_reg, native_reg, native_to_boxed); } ops::OpKind::LoadBoxedIntValue(native_reg, boxed_reg) | ops::OpKind::LoadBoxedFloatValue(native_reg, boxed_reg) | ops::OpKind::LoadBoxedCharValue(native_reg, boxed_reg) | ops::OpKind::LoadBoxedSymInterned(native_reg, boxed_reg) => { native_to_boxed.insert(native_reg, boxed_reg); } ops::OpKind::Cond(ref mut cond_op) => { remove_branch_redundant_alloc_ops( &mut cond_op.true_ops, &mut native_to_boxed.clone(), ); remove_branch_redundant_alloc_ops( &mut cond_op.false_ops, &mut native_to_boxed.clone(), ); } _ => {} } } } /// Updates `ops` in-place to replace allocs of the same native value with `OpKind::Alias` pub fn remove_redundant_alloc_ops(ops: &mut [ops::Op]) { let mut native_to_boxed = HashMap::new(); remove_branch_redundant_alloc_ops(ops, &mut native_to_boxed) } #[cfg(test)] mod test { use super::*; use crate::source::EMPTY_SPAN; #[test] fn test_box_different_native_regs() { let native_reg1 = ops::RegId::alloc(); let boxed_reg1 = ops::RegId::alloc(); let native_reg2 = ops::RegId::alloc(); let boxed_reg2 = ops::RegId::alloc(); let ops = &mut [ ops::Op::new( EMPTY_SPAN, ops::OpKind::AllocBoxedInt(boxed_reg1, native_reg1), ), ops::Op::new( EMPTY_SPAN, ops::OpKind::AllocBoxedInt(boxed_reg2, native_reg2), ), ]; // Should be identical because we're boxing different native values let expected_ops = ops.clone(); remove_redundant_alloc_ops(ops); assert_eq!(&expected_ops, ops); } #[test] fn test_box_same_native_regs() { let native_reg1 = ops::RegId::alloc(); let boxed_reg1 = ops::RegId::alloc(); let boxed_reg2 = ops::RegId::alloc(); let ops = &mut [ ops::Op::new( EMPTY_SPAN, ops::OpKind::AllocBoxedFloat(boxed_reg1, native_reg1), ), ops::Op::new( EMPTY_SPAN, ops::OpKind::AllocBoxedFloat(boxed_reg2, native_reg1), ), ]; let expected_ops = &[ ops::Op::new( EMPTY_SPAN, ops::OpKind::AllocBoxedFloat(boxed_reg1, native_reg1), ), // Should remove the redundant alloc of the same native value ops::Op::new(EMPTY_SPAN, ops::OpKind::Alias(boxed_reg2, boxed_reg1)), ]; remove_redundant_alloc_ops(ops); assert_eq!(expected_ops, ops); } #[test] fn test_reboxing() { let native_reg1 = ops::RegId::alloc(); let boxed_reg1 = ops::RegId::alloc(); let boxed_reg2 = ops::RegId::alloc(); let ops = &mut [ ops::Op::new( EMPTY_SPAN, ops::OpKind::LoadBoxedSymInterned(native_reg1, boxed_reg1), ), ops::Op::new( EMPTY_SPAN, ops::OpKind::AllocBoxedSym(boxed_reg2, native_reg1), ), ]; let expected_ops = &[ ops::Op::new( EMPTY_SPAN, ops::OpKind::LoadBoxedSymInterned(native_reg1, boxed_reg1), ), // Should re-use the original box we got the native value from ops::Op::new(EMPTY_SPAN, ops::OpKind::Alias(boxed_reg2, boxed_reg1)), ]; remove_redundant_alloc_ops(ops); assert_eq!(expected_ops, ops); } #[test] fn test_cond_branch() { let outer_native_reg1 = ops::RegId::alloc(); let outer_boxed_reg1 = ops::RegId::alloc(); let test_reg = ops::RegId::alloc(); let branch_boxed_reg1 = ops::RegId::alloc(); let branch_native_reg2 = ops::RegId::alloc(); let branch_boxed_reg2 = ops::RegId::alloc(); let outer_boxed_reg2 = ops::RegId::alloc(); let ops = &mut [ ops::Op::new( EMPTY_SPAN, ops::OpKind::AllocBoxedChar(outer_boxed_reg1, outer_native_reg1), ), ops::Op::new( EMPTY_SPAN, ops::OpKind::Cond(ops::CondOp { reg_phi: None, test_reg, true_ops: Box::new([ ops::Op::new( EMPTY_SPAN, ops::OpKind::AllocBoxedChar(branch_boxed_reg1, outer_native_reg1), ), ops::Op::new( EMPTY_SPAN, ops::OpKind::AllocBoxedChar(branch_boxed_reg2, branch_native_reg2), ), ]), false_ops: Box::new([]), }), ), ops::Op::new( EMPTY_SPAN, ops::OpKind::AllocBoxedChar(outer_boxed_reg2, branch_native_reg2), ), ]; let expected_ops = &[ ops::Op::new( EMPTY_SPAN, ops::OpKind::AllocBoxedChar(outer_boxed_reg1, outer_native_reg1), ), ops::Op::new( EMPTY_SPAN, ops::OpKind::Cond(ops::CondOp { reg_phi: None, test_reg, true_ops: Box::new([ ops::Op::new( EMPTY_SPAN, // We can use the alloc from outside this branch ops::OpKind::Alias(branch_boxed_reg1, outer_boxed_reg1), ), ops::Op::new( EMPTY_SPAN, ops::OpKind::AllocBoxedChar(branch_boxed_reg2, branch_native_reg2), ), ]), false_ops: Box::new([]), }), ), ops::Op::new( EMPTY_SPAN, // We can't use an alloc from within the branch ops::OpKind::AllocBoxedChar(outer_boxed_reg2, branch_native_reg2), ), ]; remove_redundant_alloc_ops(ops); assert_eq!(expected_ops, ops); } } ================================================ FILE: compiler/mir/optimise/mod.rs ================================================ use crate::mir::ops; use crate::mir::value::Value; mod duplicate_alloc_ops; mod unused_ops; pub fn optimise_fun(fun: ops::Fun) -> ops::Fun { let mut used_ops = unused_ops::remove_unused_fun_ops(fun.ops); duplicate_alloc_ops::remove_redundant_alloc_ops(&mut used_ops); ops::Fun { ops: unused_ops::remove_unused_fun_ops(used_ops), ..fun } } /// Optimise a function that has been inlined and returned the provided value pub fn optimise_inlined_fun(ops: Box<[ops::Op]>, return_value: &Value) -> Box<[ops::Op]> { let mut used_ops = unused_ops::remove_unused_value_ops(ops, return_value); duplicate_alloc_ops::remove_redundant_alloc_ops(&mut used_ops); used_ops } ================================================ FILE: compiler/mir/optimise/unused_ops.rs ================================================ use std::collections::HashSet; use crate::mir::ops; use crate::mir::value::Value; fn remove_unused_cond_ops( cond_op: ops::CondOp, used_regs: &mut HashSet, ) -> Option { let ops::CondOp { reg_phi, test_reg, true_ops, false_ops, } = cond_op; // Determine if our output is used let used_reg_phi = reg_phi.filter(|reg_phi| used_regs.contains(®_phi.output_reg)); // Instead of cloning `used_regs` just rollback any changes we make do it let (rollback_true_reg, rollback_false_reg) = match used_reg_phi { Some(ops::RegPhi { true_result_reg, false_result_reg, .. }) => ( Some(true_result_reg).filter(|_| !used_regs.insert(true_result_reg)), Some(false_result_reg).filter(|_| !used_regs.insert(false_result_reg)), ), _ => (None, None), }; let used_true_ops = remove_unused_branch_ops(true_ops, used_regs); let used_false_ops = remove_unused_branch_ops(false_ops, used_regs); if used_true_ops.is_empty() && used_false_ops.is_empty() && used_reg_phi.is_none() { // We can disappear! if let Some(rollback_true_reg) = rollback_true_reg { used_regs.remove(&rollback_true_reg); } if let Some(rollback_false_reg) = rollback_false_reg { used_regs.remove(&rollback_false_reg); } None } else { used_regs.insert(test_reg); Some(ops::CondOp { reg_phi: used_reg_phi, test_reg, true_ops: used_true_ops, false_ops: used_false_ops, }) } } fn remove_unused_branch_ops( ops: Box<[ops::Op]>, used_regs: &mut HashSet, ) -> Box<[ops::Op]> { let mut reverse_ops = ops .into_vec() .into_iter() .rev() .filter_map(|op| { let ops::Op { span, kind } = op; match kind { ops::OpKind::Cond(cond_op) => remove_unused_cond_ops(cond_op, used_regs) .map(|cond_op| ops::Op::new(span, ops::OpKind::Cond(cond_op))), _ => { // Does this have no side effects and its output is unused? if !kind.has_side_effects() && kind .output_reg() .map(|output_reg| !used_regs.contains(&output_reg)) .unwrap_or(true) { None } else { kind.add_input_regs(used_regs); Some(ops::Op { span, kind }) } } } }) .collect::>(); // If we do .rev() on the iterator before we collect it will change the order we iterate in reverse_ops.reverse(); reverse_ops.into_boxed_slice() } pub fn remove_unused_fun_ops(ops: Box<[ops::Op]>) -> Box<[ops::Op]> { // Nothing is used at the beginning of a function let mut used_regs = HashSet::new(); remove_unused_branch_ops(ops, &mut used_regs) } /// Adds regs referenced by the passed value fn add_value_used_regs(value: &Value, used_regs: &mut HashSet) { match value { Value::Const(_) | Value::RustFun(_) | Value::TyPred(_) | Value::EqPred | Value::RecordCons(_) | Value::FieldAccessor(_, _) => {} Value::ArretFun(arret_fun) => { for (_, free_value) in arret_fun.env_values().free_values.iter() { add_value_used_regs(free_value, used_regs); } } Value::Reg(reg_value) => { used_regs.insert(reg_value.reg.into()); } Value::Record(_, field_values) => { for field_value in field_values.iter() { add_value_used_regs(field_value, used_regs); } } Value::List(fixed_values, rest_value) => { for fixed_value in fixed_values.iter() { add_value_used_regs(fixed_value, used_regs); } if let Some(rest_value) = rest_value { add_value_used_regs(rest_value, used_regs); } } } } pub fn remove_unused_value_ops(ops: Box<[ops::Op]>, value: &Value) -> Box<[ops::Op]> { let mut used_regs = HashSet::new(); add_value_used_regs(value, &mut used_regs); remove_unused_branch_ops(ops, &mut used_regs) } #[cfg(test)] mod test { use super::*; #[test] fn empty_ops() { let ops = remove_unused_fun_ops(Box::new([])); assert!(ops.is_empty()); } #[test] fn simple_unused() { let reg1 = ops::RegId::alloc(); let reg2 = ops::RegId::alloc(); let reg3 = ops::RegId::alloc(); let input_ops = Box::new([ ops::OpKind::ConstBoxedNil(reg1, ()).into(), ops::OpKind::ConstBoxedNil(reg2, ()).into(), ops::OpKind::ConstBoxedNil(reg3, ()).into(), ops::OpKind::Ret(reg2).into(), ]); let output_ops = remove_unused_fun_ops(input_ops); let expected_ops: Box<[ops::Op]> = Box::new([ ops::OpKind::ConstBoxedNil(reg2, ()).into(), ops::OpKind::Ret(reg2).into(), ]); assert_eq!(expected_ops, output_ops); } #[test] fn fully_used_cond() { let output_reg = ops::RegId::alloc(); let test_reg = ops::RegId::alloc(); let true_result_reg = ops::RegId::alloc(); let false_result_reg = ops::RegId::alloc(); let true_ops = Box::new([ops::OpKind::ConstBoxedNil(true_result_reg, ()).into()]); let false_ops = Box::new([ops::OpKind::ConstBoxedNil(false_result_reg, ()).into()]); let input_ops: Box<[ops::Op]> = Box::new([ ops::OpKind::ConstBoxedNil(test_reg, ()).into(), ops::OpKind::Cond(ops::CondOp { reg_phi: Some(ops::RegPhi { output_reg, true_result_reg, false_result_reg, }), test_reg, true_ops, false_ops, }) .into(), ops::OpKind::Ret(output_reg).into(), ]); let expected_ops = input_ops.clone(); let output_ops = remove_unused_fun_ops(input_ops); assert_eq!(expected_ops, output_ops); } #[test] fn partially_used_cond() { let output_reg = ops::RegId::alloc(); let test_reg = ops::RegId::alloc(); let true_result_reg = ops::RegId::alloc(); let false_result_reg = ops::RegId::alloc(); let true_ops = Box::new([ops::OpKind::ConstBoxedNil(true_result_reg, ()).into()]); let false_ops = Box::new([ops::OpKind::ConstBoxedNil(false_result_reg, ()).into()]); let input_ops = Box::new([ ops::OpKind::ConstBoxedNil(test_reg, ()).into(), ops::OpKind::Cond(ops::CondOp { reg_phi: Some(ops::RegPhi { output_reg, true_result_reg: test_reg, // This makes the true branch unused false_result_reg, }), test_reg, true_ops, false_ops: false_ops.clone(), }) .into(), ops::OpKind::Ret(output_reg).into(), ]); let expected_ops: Box<[ops::Op]> = Box::new([ ops::OpKind::ConstBoxedNil(test_reg, ()).into(), ops::OpKind::Cond(ops::CondOp { reg_phi: Some(ops::RegPhi { output_reg, true_result_reg: test_reg, false_result_reg, }), test_reg, true_ops: Box::new([]), false_ops, }) .into(), ops::OpKind::Ret(output_reg).into(), ]); let output_ops = remove_unused_fun_ops(input_ops); assert_eq!(expected_ops, output_ops); } #[test] fn output_only_cond() { let output_reg = ops::RegId::alloc(); let test_reg = ops::RegId::alloc(); let true_result_reg = ops::RegId::alloc(); let false_result_reg = ops::RegId::alloc(); let true_ops = Box::new([ops::OpKind::ConstBoxedNil(true_result_reg, ()).into()]); let false_ops = Box::new([ops::OpKind::ConstBoxedNil(false_result_reg, ()).into()]); let input_ops = Box::new([ ops::OpKind::ConstBoxedNil(test_reg, ()).into(), ops::OpKind::Cond(ops::CondOp { reg_phi: Some(ops::RegPhi { output_reg, true_result_reg: test_reg, // This makes the true branch unused false_result_reg: test_reg, // This makes the false branch unused }), test_reg, true_ops, false_ops, }) .into(), // However, the output of the `Cond` is still used ops::OpKind::Ret(output_reg).into(), ]); let expected_ops: Box<[ops::Op]> = Box::new([ ops::OpKind::ConstBoxedNil(test_reg, ()).into(), ops::OpKind::Cond(ops::CondOp { reg_phi: Some(ops::RegPhi { output_reg, true_result_reg: test_reg, false_result_reg: test_reg, }), test_reg, true_ops: Box::new([]), false_ops: Box::new([]), }) .into(), ops::OpKind::Ret(output_reg).into(), ]); let output_ops = remove_unused_fun_ops(input_ops); assert_eq!(expected_ops, output_ops); } #[test] fn output_unused_cond() { let output_reg = ops::RegId::alloc(); let test_reg = ops::RegId::alloc(); let true_ops = Box::new([ops::OpKind::RetVoid.into()]); let false_ops = Box::new([ops::OpKind::RetVoid.into()]); let input_ops = Box::new([ ops::OpKind::ConstBoxedNil(test_reg, ()).into(), ops::OpKind::Cond(ops::CondOp { reg_phi: Some(ops::RegPhi { output_reg, true_result_reg: test_reg, // This makes the true result unused false_result_reg: test_reg, // This makes the false result unused }), test_reg, true_ops: true_ops.clone(), false_ops: false_ops.clone(), }) .into(), ]); let expected_ops: Box<[ops::Op]> = Box::new([ ops::OpKind::ConstBoxedNil(test_reg, ()).into(), ops::OpKind::Cond(ops::CondOp { reg_phi: None, test_reg, true_ops, false_ops, }) .into(), ]); let output_ops = remove_unused_fun_ops(input_ops); assert_eq!(expected_ops, output_ops); } #[test] fn fully_unused_cond() { let output_reg = ops::RegId::alloc(); let test_reg = ops::RegId::alloc(); let true_result_reg = ops::RegId::alloc(); let false_result_reg = ops::RegId::alloc(); let true_ops = Box::new([ops::OpKind::ConstBoxedNil(true_result_reg, ()).into()]); let false_ops = Box::new([ops::OpKind::ConstBoxedNil(false_result_reg, ()).into()]); let input_ops = Box::new([ ops::OpKind::ConstBoxedNil(test_reg, ()).into(), ops::OpKind::Cond(ops::CondOp { reg_phi: Some(ops::RegPhi { output_reg, true_result_reg: test_reg, // This makes the true branch unused false_result_reg: test_reg, // This makes the false branch unused }), test_reg, true_ops, false_ops, }) .into(), ]); let output_ops = remove_unused_fun_ops(input_ops); assert!(output_ops.is_empty()); } } ================================================ FILE: compiler/mir/polymorph.rs ================================================ use arret_runtime::abitype; use arret_runtime::callback; use crate::mir::ops; use crate::ty; use crate::ty::Ty; /// PolymorphAbi annotates OpsAbi with information about if a function expects a captures or rest /// /// This is information that's useful while generating MIR but can be discarded when building Ops. #[derive(PartialEq, Eq, Hash, Clone)] pub struct PolymorphAbi { pub call_conv: ops::CallConv, pub has_captures: bool, pub fixed_params: Box<[abitype::AbiType]>, pub rest_param: Option, pub ret: abitype::RetAbiType, } impl PolymorphAbi { pub fn thunk_abi() -> PolymorphAbi { PolymorphAbi { call_conv: ops::CallConv::Ccc, has_captures: true, fixed_params: Box::new([]), rest_param: Some(abitype::TOP_LIST_BOXED_ABI_TYPE.into()), ret: abitype::BoxedAbiType::Any.into(), } } /// Returns the Arret type for our parameter list pub fn param_ty_ref(&self) -> ty::List { use crate::ty::conv_abi::ConvertableAbiType; let fixed_refs = self .fixed_params .iter() .map(ConvertableAbiType::to_ty_ref) .collect(); let rest_ref = match &self.rest_param { Some(abitype::AbiType::Boxed(abitype::BoxedAbiType::List(member_abi_type))) => { member_abi_type.to_ty_ref() } Some(other) => { panic!("cannot determine member type for ABI rest list {:?}", other); } None => Ty::never().into(), }; // If our rest type uses a Pair this can have fixed members ty::List::new(fixed_refs, rest_ref) } } impl From for PolymorphAbi { fn from(abi_type: callback::EntryPointAbiType) -> Self { PolymorphAbi { call_conv: ops::CallConv::Ccc, has_captures: true, fixed_params: abi_type.params.iter().cloned().collect(), rest_param: None, ret: abi_type.ret, } } } impl From for ops::OpsAbi { fn from(polymorph_abi: PolymorphAbi) -> Self { ops::OpsAbi { params: Some(abitype::BoxedAbiType::Any.into()) .filter(|_| polymorph_abi.has_captures) .into_iter() .chain(polymorph_abi.fixed_params.iter().cloned()) .chain(polymorph_abi.rest_param.iter().cloned()) .collect(), call_conv: polymorph_abi.call_conv, ret: polymorph_abi.ret, } } } /// Recommends a polymorph ABI for a given list and ret type pub fn polymorph_abi_for_list_ty( has_captures: bool, list_ty: &ty::List, ret_ty: &ty::Ref, ) -> PolymorphAbi { use crate::mir::specific_abi_type::*; PolymorphAbi { call_conv: ops::CallConv::FastCc, has_captures, fixed_params: list_ty .fixed() .iter() .map(specific_abi_type_for_ty_ref) .collect(), rest_param: Some( abitype::BoxedAbiType::List(specific_boxed_abi_type_for_ty_ref(list_ty.rest())).into(), ) .filter(|_| list_ty.has_rest()), ret: specific_ret_abi_type_for_ty_ref(ret_ty), } } #[cfg(test)] mod test { use super::*; use crate::hir; #[test] fn polymorph_abi_param_ty_ref() { use arret_runtime::abitype::EncodeBoxedAbiType; use arret_runtime::boxed; let thunk_param_poly = PolymorphAbi::thunk_abi().param_ty_ref(); let expected_poly = hir::poly_for_str("(List & Any)"); assert_eq!(expected_poly, thunk_param_poly.into()); let mul_param_poly = PolymorphAbi { call_conv: ops::CallConv::FastCc, has_captures: false, fixed_params: Box::new([boxed::Num::BOXED_ABI_TYPE.into()]), rest_param: Some(abitype::BoxedAbiType::List(&boxed::Num::BOXED_ABI_TYPE).into()), ret: boxed::Num::BOXED_ABI_TYPE.into(), } .param_ty_ref(); let expected_poly = hir::poly_for_str("(List Num & Num)"); assert_eq!(expected_poly, mul_param_poly.into()); } } ================================================ FILE: compiler/mir/printer.rs ================================================ use std::collections::HashMap; use std::io::{Result, Write}; use std::iter; use codespan_reporting::files::Files as _; use arret_syntax::span::Span; use crate::codegen::GenAbi; use crate::mir::ops; use crate::mir::BuiltProgram; use crate::source::SourceLoader; use crate::ty::conv_abi::ConvertableAbiType; fn span_to_human_location(source_loader: Option<&SourceLoader>, span: Span) -> Option { let source_loader = source_loader?; let file_id = span.file_id()?; let files = source_loader.files(); let location = files.location(file_id, span.start() as usize).ok()?; Some(format!( "{}:{}:{}", files.name(file_id).ok()?, location.line_number, location.column_number )) } fn private_fun_to_string( private_funs: &HashMap, private_fun_id: ops::PrivateFunId, ) -> String { private_funs[&private_fun_id] .source_name .clone() .map(|s| format!("%{}", s)) .unwrap_or_else(|| format!("[private-{}]", private_fun_id.to_u32())) } fn callee_to_string( private_funs: &HashMap, callee: &ops::Callee, ) -> String { match callee { ops::Callee::StaticSymbol(static_symbol) => format!("@{}", static_symbol.symbol), ops::Callee::PrivateFun(private_fun_id) => { private_fun_to_string(private_funs, *private_fun_id) } ops::Callee::BoxedFunThunk(thunk_reg) => { format!("<%{} as boxed::FunThunk>.entry", thunk_reg.get()) } } } fn callee_to_gen_abi( private_funs: &HashMap, callee: &ops::Callee, ) -> GenAbi { match callee { ops::Callee::StaticSymbol(static_symbol) => static_symbol.abi.clone(), ops::Callee::PrivateFun(private_fun_id) => (&private_funs[private_fun_id].abi).into(), ops::Callee::BoxedFunThunk(_) => GenAbi::thunk_abi(), } } fn box_pair_op_to_string( ops::BoxPairOp { head_reg, rest_reg, list_len_reg, }: &ops::BoxPairOp, ) -> String { format!( "boxed::Pair {{ head: %{}, rest: %{}, list_len: %{} }}", head_reg.get(), rest_reg.get(), list_len_reg.get() ) } fn box_fun_thunk_op_to_string( private_funs: &HashMap, ops::BoxFunThunkOp { captures_reg, callee, }: &ops::BoxFunThunkOp, ) -> String { format!( "boxed::FunThunk {{ captures: %{captures_reg}, entry: {entry} }}", captures_reg = captures_reg.get(), entry = callee_to_string(private_funs, callee) ) } fn box_record_op_to_string( ops::BoxRecordOp { record_struct, field_regs, }: &ops::BoxRecordOp, ) -> String { let field_strings = field_regs .iter() .zip(record_struct.field_abi_types.iter()) .map(|(field_reg, field_abi_type)| { format!("%{}: {}", field_reg.get(), field_abi_type.to_rust_str()) }) .collect::>() .join(", "); format!( "record::{} {{ {} }}", record_struct.source_name, field_strings ) } fn comparison_to_str(comparison: ops::Comparison) -> &'static str { match comparison { ops::Comparison::Lt => "<", ops::Comparison::Le => "<=", ops::Comparison::Eq => "==", ops::Comparison::Gt => ">", ops::Comparison::Ge => ">=", } } fn print_cond_branch( w: &mut dyn Write, private_funs: &HashMap, ident_level: usize, ops: &[ops::Op], result_reg: Option, ) -> Result<()> { let ident_level = ident_level + 1; print_branch(w, private_funs, ident_level, ops)?; if let Some(result_reg) = result_reg { for _ in 0..ident_level { write!(w, " ")?; } writeln!(w, "%{}", result_reg.get())?; } Ok(()) } fn print_branch( w: &mut dyn Write, private_funs: &HashMap, ident_level: usize, ops: &[ops::Op], ) -> Result<()> { for op in ops.iter() { for _ in 0..ident_level { write!(w, " ")?; } match &op.kind { ops::OpKind::ConstBoxedNil(reg, _) => { writeln!(w, "%{} = const boxed::NIL_INSTANCE;", reg.get())?; } ops::OpKind::ConstInt64(reg, value) => { writeln!(w, "%{} = const {}i64;", reg.get(), value)? } ops::OpKind::ConstChar(reg, value) => { writeln!(w, "%{} = const {:?}", reg.get(), value)? } ops::OpKind::ConstFloat(reg, value) => { writeln!(w, "%{} = const {}f64;", reg.get(), value)? } ops::OpKind::ConstInternedSym(reg, name) => { writeln!( w, "%{} = const interned::InternedSym {{ name: {:?} }};", reg.get(), name )?; } ops::OpKind::ConstTypeTag(reg, type_tag) => { writeln!(w, "%{} = const TypeTag::{:?};", reg.get(), type_tag)? } ops::OpKind::ConstBool(reg, value) => writeln!(w, "%{} = const {};", reg.get(), value)?, ops::OpKind::ConstBoxedTrue(reg, ()) => { writeln!(w, "%{} = const boxed::TRUE_INSTANCE;", reg.get())? } ops::OpKind::ConstBoxedFalse(reg, ()) => { writeln!(w, "%{} = const boxed::FALSE_INSTANCE;", reg.get())? } ops::OpKind::ConstBoxedVector(reg, element_regs) => writeln!( w, "%{} = const boxed::Vector {{ elements: [{}] }};", reg.get(), element_regs .iter() .map(|element_reg| format!("%{}", element_reg.get())) .collect::>() .join(", ") )?, ops::OpKind::ConstBoxedSet(reg, element_regs) => writeln!( w, "%{} = const boxed::Set {{ elements: [{}] }};", reg.get(), element_regs .iter() .map(|element_reg| format!("%{}", element_reg.get())) .collect::>() .join(", ") )?, ops::OpKind::ConstBoxedMap(reg, entry_regs) => writeln!( w, "%{} = const boxed::Map {{ elements: [{}] }};", reg.get(), entry_regs .iter() .map(|(key_reg, value_reg)| format!( "(%{}, %{})", key_reg.get(), value_reg.get() )) .collect::>() .join(", ") )?, ops::OpKind::ConstRecordClassId(reg, record_class_id) => writeln!( w, "%{} = const record::{}::CLASS_ID;", reg.get(), record_class_id.source_name )?, ops::OpKind::CastBoxed(reg, ops::CastBoxedOp { from_reg, to_type }) => writeln!( w, "%{} = %{} as {};", reg.get(), from_reg.get(), to_type.to_rust_str() )?, ops::OpKind::ConstCastBoxed(reg, ops::CastBoxedOp { from_reg, to_type }) => writeln!( w, "%{} = const %{} as {};", reg.get(), from_reg.get(), to_type.to_rust_str() )?, ops::OpKind::Alias(reg, from_reg) => { writeln!(w, "%{} = %{};", reg.get(), from_reg.get(),)? } ops::OpKind::Int64ToFloat(reg, from_reg) => { writeln!(w, "%{} = (%{}: i64) as f64;", reg.get(), from_reg.get(),)? } ops::OpKind::ConstBoxedPair(reg, box_pair_op) => { writeln!( w, "%{} = const {};", reg.get(), box_pair_op_to_string(box_pair_op) )?; } ops::OpKind::AllocBoxedPair(reg, box_pair_op) => { writeln!( w, "%{} = alloc {};", reg.get(), box_pair_op_to_string(box_pair_op) )?; } ops::OpKind::ConstBoxedInt(reg, value) => { writeln!( w, "%{} = const boxed::Int {{ value: {}i64 }};", reg.get(), value )?; } ops::OpKind::AllocBoxedInt(reg, value_reg) => { writeln!( w, "%{} = alloc boxed::Int {{ value: %{} }};", reg.get(), value_reg.get() )?; } ops::OpKind::ConstBoxedChar(reg, value) => { writeln!( w, "%{} = const boxed::Char {{ value: {:?} }};", reg.get(), value )?; } ops::OpKind::AllocBoxedChar(reg, value_reg) => { writeln!( w, "%{} = alloc boxed::Char {{ value: %{} }};", reg.get(), value_reg.get() )?; } ops::OpKind::ConstBoxedFloat(reg, value) => { writeln!( w, "%{} = const boxed::Float {{ value: {}f64 }};", reg.get(), value )?; } ops::OpKind::AllocBoxedFloat(reg, value_reg) => { writeln!( w, "%{} = alloc boxed::Float {{ value: %{} }};", reg.get(), value_reg.get() )?; } ops::OpKind::ConstBoxedFunThunk(reg, box_fun_thunk_op) => { writeln!( w, "%{} = const {};", reg.get(), box_fun_thunk_op_to_string(private_funs, box_fun_thunk_op) )?; } ops::OpKind::AllocBoxedFunThunk(reg, box_fun_thunk_op) => { writeln!( w, "%{} = alloc {};", reg.get(), box_fun_thunk_op_to_string(private_funs, box_fun_thunk_op) )?; } ops::OpKind::MakeCallback( reg, ops::MakeCallbackOp { captures_reg, callee, }, ) => { writeln!( w, "%{} = callback::Callback {{ captures: %{captures_reg}, entry_point: {entry_point} }};", reg.get(), captures_reg = captures_reg.get(), entry_point = callee_to_string(private_funs, callee) )?; } ops::OpKind::ConstBoxedRecord(reg, box_record_op) => { writeln!( w, "%{} = const {};", reg.get(), box_record_op_to_string(box_record_op) )?; } ops::OpKind::AllocBoxedRecord(reg, box_record_op) => { writeln!( w, "%{} = alloc {};", reg.get(), box_record_op_to_string(box_record_op) )?; } ops::OpKind::ConstBoxedSym(reg, name) => { writeln!( w, "%{} = const boxed::Sym {{ name: {:?} }};", reg.get(), name )?; } ops::OpKind::AllocBoxedSym(reg, interned_sym_reg) => { writeln!( w, "%{} = alloc boxed::Sym {{ interned: %{} }};", reg.get(), interned_sym_reg.get() )?; } ops::OpKind::ConstBoxedStr(reg, name) => { writeln!( w, "%{} = const boxed::Str {{ value: {:?} }};", reg.get(), name )?; } ops::OpKind::LoadBoxedRecordField( reg, ops::LoadBoxedRecordFieldOp { record_reg, record_struct, field_index, }, ) => { writeln!( w, "%{} = <%{} as record::{}>.{}: {};", reg.get(), record_reg.get(), record_struct.source_name, field_index, record_struct.field_abi_types[*field_index].to_rust_str() )?; } ops::OpKind::LoadBoxedTypeTag( reg, ops::LoadBoxedTypeTagOp { subject_reg, possible_type_tags, }, ) => { let type_tags_string = possible_type_tags .into_iter() .map(|type_tag| format!("{:?}", type_tag)) .collect::>() .join(", "); writeln!( w, "%{} = <%{} as boxed::Any>.type_tag in [{}];", reg.get(), subject_reg.get(), type_tags_string )?; } ops::OpKind::LoadBoxedPairHead(reg, pair_reg) => { writeln!( w, "%{} = <%{} as boxed::Pair>.head;", reg.get(), pair_reg.get() )?; } ops::OpKind::LoadBoxedPairRest(reg, pair_reg) => { writeln!( w, "%{} = <%{} as boxed::Pair>.rest;", reg.get(), pair_reg.get() )?; } ops::OpKind::LoadBoxedVectorMember( reg, ops::LoadBoxedVectorMemberOp { vector_reg, known_vector_len, member_index, }, ) => { writeln!( w, "%{reg} = <%{vector_reg} as boxed::Vector>[{member_index}] where <${vector_reg} as boxed::Vector>.len == {known_vector_len};", reg = reg.get(), vector_reg = vector_reg.get(), known_vector_len = known_vector_len, member_index = member_index )?; } ops::OpKind::LoadBoxedListLen( reg, ops::LoadBoxedListLenOp { list_reg, min_list_len, }, ) => { writeln!( w, "%{} = <%{} as boxed::List>.list_len where > {};", reg.get(), list_reg.get(), min_list_len )?; } ops::OpKind::LoadBoxedVectorLen(reg, list_reg) => { writeln!( w, "%{} = <%{} as boxed::Vector>.len;", reg.get(), list_reg.get(), )?; } ops::OpKind::LoadBoxedSymInterned(reg, sym_reg) => { writeln!( w, "%{} = <%{} as boxed::Sym>.interned;", reg.get(), sym_reg.get() )?; } ops::OpKind::LoadBoxedIntValue(reg, int_reg) => { writeln!( w, "%{} = <%{} as boxed::Int>.value;", reg.get(), int_reg.get() )?; } ops::OpKind::LoadBoxedFloatValue(reg, float_reg) => { writeln!( w, "%{} = <%{} as boxed::Float>.value;", reg.get(), float_reg.get() )?; } ops::OpKind::LoadBoxedCharValue(reg, float_reg) => { writeln!( w, "%{} = <%{} as boxed::Char>.value;", reg.get(), float_reg.get() )?; } ops::OpKind::LoadBoxedFunThunkCaptures(reg, fun_thunk_reg) => { writeln!( w, "%{} = <%{} as boxed::FunThunk>.env;", reg.get(), fun_thunk_reg.get() )?; } ops::OpKind::LoadBoxedRecordClassId(reg, record_reg) => { writeln!( w, "%{} = <%{} as boxed::Record>.class_id;", reg.get(), record_reg.get() )?; } ops::OpKind::IntCompare( reg, ops::CompareOp { lhs_reg, rhs_reg, comparison, }, ) => { writeln!( w, "%{} = (%{}: i64) {} (%{}: i64);", reg.get(), lhs_reg.get(), comparison_to_str(*comparison), rhs_reg.get(), )?; } ops::OpKind::FloatCompare( reg, ops::CompareOp { lhs_reg, rhs_reg, comparison, }, ) => { writeln!( w, "%{} = (%{}: f64) {} (%{}: f64);", reg.get(), lhs_reg.get(), comparison_to_str(*comparison), rhs_reg.get(), )?; } ops::OpKind::FloatAdd(reg, ops::BinaryOp { lhs_reg, rhs_reg }) => { writeln!( w, "%{} = (%{}: f64) + (%{}: f64);", reg.get(), lhs_reg.get(), rhs_reg.get(), )?; } ops::OpKind::FloatSub(reg, ops::BinaryOp { lhs_reg, rhs_reg }) => { writeln!( w, "%{} = (%{}: f64) - (%{}: f64);", reg.get(), lhs_reg.get(), rhs_reg.get(), )?; } ops::OpKind::FloatMul(reg, ops::BinaryOp { lhs_reg, rhs_reg }) => { writeln!( w, "%{} = (%{}: f64) * (%{}: f64);", reg.get(), lhs_reg.get(), rhs_reg.get(), )?; } ops::OpKind::FloatDiv(reg, ops::BinaryOp { lhs_reg, rhs_reg }) => { writeln!( w, "%{} = (%{}: f64) / (%{}: f64);", reg.get(), lhs_reg.get(), rhs_reg.get(), )?; } ops::OpKind::Int64Add(reg, ops::BinaryOp { lhs_reg, rhs_reg }) => { writeln!( w, "%{} = unchecked (%{}: i64) + (%{}: i64);", reg.get(), lhs_reg.get(), rhs_reg.get(), )?; } ops::OpKind::Int64CheckedAdd(reg, ops::BinaryOp { lhs_reg, rhs_reg }) => { writeln!( w, "%{} = checked (%{}: i64) + (%{}: i64);", reg.get(), lhs_reg.get(), rhs_reg.get(), )?; } ops::OpKind::Int64CheckedSub(reg, ops::BinaryOp { lhs_reg, rhs_reg }) => { writeln!( w, "%{} = checked (%{}: i64) - (%{}: i64);", reg.get(), lhs_reg.get(), rhs_reg.get(), )?; } ops::OpKind::Int64CheckedMul(reg, ops::BinaryOp { lhs_reg, rhs_reg }) => { writeln!( w, "%{} = checked (%{}: i64) * (%{}: i64);", reg.get(), lhs_reg.get(), rhs_reg.get(), )?; } ops::OpKind::Int64Div(reg, ops::BinaryOp { lhs_reg, rhs_reg }) => { writeln!( w, "%{} = unchecked (%{}: i64) / (%{}: i64);", reg.get(), lhs_reg.get(), rhs_reg.get(), )?; } ops::OpKind::Int64CheckedDiv(reg, ops::BinaryOp { lhs_reg, rhs_reg }) => { writeln!( w, "%{} = checked (%{}: i64) / (%{}: i64);", reg.get(), lhs_reg.get(), rhs_reg.get(), )?; } ops::OpKind::Int64Rem(reg, ops::BinaryOp { lhs_reg, rhs_reg }) => { writeln!( w, "%{} = unchecked (%{}: i64) % (%{}: i64);", reg.get(), lhs_reg.get(), rhs_reg.get(), )?; } ops::OpKind::Int64CheckedRem(reg, ops::BinaryOp { lhs_reg, rhs_reg }) => { writeln!( w, "%{} = checked (%{}: i64) % (%{}: i64);", reg.get(), lhs_reg.get(), rhs_reg.get(), )?; } ops::OpKind::Int64BitwiseAnd(reg, ops::BinaryOp { lhs_reg, rhs_reg }) => { writeln!( w, "%{} = (%{}: u64) & (%{}: u64);", reg.get(), lhs_reg.get(), rhs_reg.get(), )?; } ops::OpKind::Int64BitwiseOr(reg, ops::BinaryOp { lhs_reg, rhs_reg }) => { writeln!( w, "%{} = (%{}: u64) | (%{}: u64);", reg.get(), lhs_reg.get(), rhs_reg.get(), )?; } ops::OpKind::Int64BitwiseXor(reg, ops::BinaryOp { lhs_reg, rhs_reg }) => { writeln!( w, "%{} = (%{}: u64) ^ (%{}: u64);", reg.get(), lhs_reg.get(), rhs_reg.get(), )?; } ops::OpKind::Int64BitwiseNot(reg, int_reg) => { writeln!(w, "%{} = ~(%{}: u64);", reg.get(), int_reg.get())?; } ops::OpKind::Int64ShiftLeft(reg, ops::ShiftOp { int_reg, bit_count }) => { writeln!( w, "%{} = (%{}: u64) << {};", reg.get(), int_reg.get(), bit_count )?; } ops::OpKind::Int64ArithmeticShiftRight(reg, ops::ShiftOp { int_reg, bit_count }) => { writeln!( w, "%{} = (%{}: i64) >> {};", reg.get(), int_reg.get(), bit_count )?; } ops::OpKind::Int64LogicalShiftRight(reg, ops::ShiftOp { int_reg, bit_count }) => { writeln!( w, "%{} = (%{}: u64) >> {};", reg.get(), int_reg.get(), bit_count )?; } ops::OpKind::FloatSqrt(reg, radicand_reg) => { writeln!(w, "%{} = sqrt(%{}: f64);", reg.get(), radicand_reg.get(),)?; } ops::OpKind::TypeTagEqual(reg, ops::BinaryOp { lhs_reg, rhs_reg }) => { writeln!( w, "%{} = (%{}: TypeTag) == (%{}: TypeTag);", reg.get(), lhs_reg.get(), rhs_reg.get(), )?; } ops::OpKind::BoxIdentical(reg, ops::BinaryOp { lhs_reg, rhs_reg }) => { writeln!( w, "%{} = &(%{}: boxed::Any) == &(%{}: boxed::Any);", reg.get(), lhs_reg.get(), rhs_reg.get(), )?; } ops::OpKind::BoolEqual(reg, ops::BinaryOp { lhs_reg, rhs_reg }) => { writeln!( w, "%{} = (%{}: bool) == (%{}: bool);", reg.get(), lhs_reg.get(), rhs_reg.get(), )?; } ops::OpKind::CharEqual(reg, ops::BinaryOp { lhs_reg, rhs_reg }) => { writeln!( w, "%{} = (%{}: char) == (%{}: char);", reg.get(), lhs_reg.get(), rhs_reg.get(), )?; } ops::OpKind::InternedSymEqual(reg, ops::BinaryOp { lhs_reg, rhs_reg }) => { writeln!( w, "%{} = (%{}: interned::InternedSym) == (%{}: interned::InternedSym);", reg.get(), lhs_reg.get(), rhs_reg.get(), )?; } ops::OpKind::RecordClassIdEqual(reg, ops::BinaryOp { lhs_reg, rhs_reg }) => { writeln!( w, "%{} = (%{}: boxed::RecordClassId) == (%{}: boxed::RecordClassId);", reg.get(), lhs_reg.get(), rhs_reg.get(), )?; } ops::OpKind::Call( reg, ops::CallOp { callee, impure, args, }, ) => { let purity = if *impure { "impure" } else { "pure" }; let callee_abi = callee_to_gen_abi(private_funs, callee); let args = Some("%task".to_owned()) .into_iter() .filter(|_| callee_abi.takes_task) .chain(callee_abi.params.iter().zip(args.iter()).map( |(param_abi_type, arg_reg)| { format!("%{}: {}", arg_reg.get(), param_abi_type.to_rust_str()) }, )) .collect::>() .join(", "); writeln!( w, "%{} = {} {}({}): {};", reg.get(), purity, callee_to_string(private_funs, callee), args, callee_abi.ret.to_rust_str() )?; } ops::OpKind::TailCall(reg, ops::TailCallOp { impure, args }) => { let purity = if *impure { "impure" } else { "pure" }; let args = iter::once("%task".to_owned()) .chain(args.iter().map(|arg_reg| format!(", %{}", arg_reg.get()))) .collect::(); writeln!(w, "%{} = {} recur({});", reg.get(), purity, args,)?; } ops::OpKind::Cond(cond_op) => { if let Some(reg_phi) = &cond_op.reg_phi { write!(w, "%{} = ", reg_phi.output_reg.get())?; } writeln!(w, "if %{} {{", cond_op.test_reg.get())?; print_cond_branch( w, private_funs, ident_level, &cond_op.true_ops, cond_op.reg_phi.as_ref().map(|rp| rp.true_result_reg), )?; if !(cond_op.false_ops.is_empty() && cond_op.reg_phi.is_none()) { for _ in 0..ident_level { write!(w, " ")?; } writeln!(w, "}} else {{")?; print_cond_branch( w, private_funs, ident_level, &cond_op.false_ops, cond_op.reg_phi.as_ref().map(|rp| rp.false_result_reg), )?; } for _ in 0..ident_level { write!(w, " ")?; } if cond_op.reg_phi.is_some() { writeln!(w, "}};")?; } else { writeln!(w, "}}")?; } } ops::OpKind::Ret(reg) => { writeln!(w, "return %{};", reg.get())?; } ops::OpKind::RetVoid => { writeln!(w, "return;")?; } ops::OpKind::Unreachable => { writeln!(w, "unreachable;")?; } ops::OpKind::Panic(message) => { writeln!(w, "panic({:?});", message)?; } } } Ok(()) } /// Prints a textual representation of a function's MIR to to `w` pub fn print_fun( w: &mut dyn Write, private_funs: &HashMap, ops_fun: &ops::Fun, private_fun_id: Option, ) -> Result<()> { let fun_name = ops_fun .source_name .clone() .map(|s| s.to_string()) .or_else(|| private_fun_id.map(|pfi| private_fun_to_string(private_funs, pfi))) .unwrap_or_else(|| "[anonymous]".into()); let call_conv_name = match ops_fun.abi.call_conv { ops::CallConv::Ccc => "extern \"C\" ", ops::CallConv::FastCc => "", }; let params = ops_fun .abi .params .iter() .zip(ops_fun.param_regs.iter()) .map(|(abi_type, param_reg)| format!(", %{}: {}", param_reg.get(), abi_type.to_rust_str())) .collect::(); writeln!( w, "{}fn {}(%task{}) -> {} {{", call_conv_name, fun_name, params, ops_fun.abi.ret.to_rust_str() )?; print_branch(w, private_funs, 1, &ops_fun.ops)?; writeln!(w, "}}")?; Ok(()) } /// Prints a textual representation of a program's MIR to to `w` /// /// This is an internal, undocumented and unstable format that has no equivalent parser. It's /// intended to aid human debugging an optimisation. pub fn print_program( w: &mut dyn Write, program: &BuiltProgram, source_loader: Option<&SourceLoader>, ) -> Result<()> { for (private_fun_id, private_fun) in &program.private_funs { if private_fun.source_name.is_none() { if let Some(human_location) = span_to_human_location(source_loader, private_fun.span) { writeln!(w, "// Anonymous function defined at {}", human_location)?; } } print_fun(w, &program.private_funs, private_fun, Some(*private_fun_id))?; writeln!(w)?; } print_fun(w, &program.private_funs, &program.main, None) } ================================================ FILE: compiler/mir/record_field.rs ================================================ use arret_syntax::span::Span; use arret_runtime::boxed; use arret_runtime::boxed::prelude::*; use crate::mir::builder::TryToBuilder; use crate::mir::eval_hir::EvalHirCtx; use crate::mir::value; use crate::mir::value::Value; use crate::ty::record; pub fn load_record_field( ehx: &mut EvalHirCtx, b: &mut impl TryToBuilder, span: Span, record_cons: &record::ConsId, record_value: &Value, field_index: usize, ) -> Value { match record_value { Value::Record(_, fields) => fields[field_index].clone(), Value::Const(boxed_any) => { use boxed::FieldValue; let boxed_record = boxed_any .downcast_ref::() .expect("unexpected type when accessing record field"); match boxed_record .field_values(ehx.as_heap()) .nth(field_index) .unwrap() { FieldValue::Bool(bool_value) => boxed::Bool::singleton_ref(bool_value).into(), FieldValue::Int(int_value) => boxed::Int::new(ehx, int_value).into(), FieldValue::Float(float_value) => boxed::Float::new(ehx, float_value).into(), FieldValue::Char(char_value) => boxed::Char::new(ehx, char_value).into(), FieldValue::Boxed(boxed_any) => boxed_any.into(), FieldValue::InternedSym(interned) => { boxed::Sym::from_interned_sym(ehx, interned).into() } } } other_value => { use crate::mir::ops::*; use crate::mir::value::build_reg::value_to_reg; let record_struct = ehx .evaled_record_class_for_cons(record_cons) .record_struct .clone(); let b = if let Some(b) = b.try_to_builder() { b } else { panic!("need builder to access field of boxed record reg"); }; let record_reg = value_to_reg(ehx, b, span, other_value, &boxed::TypeTag::Record.into()); let field_reg = b.push_reg( span, OpKind::LoadBoxedRecordField, LoadBoxedRecordFieldOp { field_index, record_reg: record_reg.into(), record_struct: record_struct.clone(), }, ); let field_abi_type = record_struct.field_abi_types[field_index].clone(); value::RegValue::new(field_reg, field_abi_type).into() } } } ================================================ FILE: compiler/mir/ret_value.rs ================================================ use arret_runtime::abitype; use arret_syntax::span::Span; use crate::mir::builder::{Builder, BuiltReg}; use crate::mir::error::{Error, Result}; use crate::mir::eval_hir::EvalHirCtx; use crate::mir::ops; use crate::mir::value; use crate::mir::value::Value; /// Builds the ops to return a value from a function /// /// This deals with uninhabited and void return values which require special handling. pub fn build_value_ret( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, result: Result, ret_abi: &abitype::RetAbiType, ) { use crate::mir::value::build_reg::value_to_reg; match result { Ok(result_value) => match ret_abi { abitype::RetAbiType::Inhabited(abi_type) => { let ret_reg = value_to_reg(ehx, b, span, &result_value, abi_type); b.push(span, ops::OpKind::Ret(ret_reg.into())); } abitype::RetAbiType::Never => { b.push(span, ops::OpKind::Unreachable); } abitype::RetAbiType::Void => { b.push(span, ops::OpKind::RetVoid); } }, Err(Error::Diverged) => {} Err(other) => { panic!("unexpected error when returning value: {:?}", other); } } } pub fn ret_reg_to_value(ret_reg: BuiltReg, ret_abi: abitype::RetAbiType) -> Result { match ret_abi { abitype::RetAbiType::Inhabited(abi_type) => { Ok(value::RegValue::new(ret_reg, abi_type).into()) } abitype::RetAbiType::Never => Err(Error::Diverged), abitype::RetAbiType::Void => Ok(Value::List(Box::new([]), None)), } } ================================================ FILE: compiler/mir/rust_fun.rs ================================================ use arret_syntax::span::Span; use crate::codegen::GenAbi; use crate::mir::builder::Builder; use crate::mir::error::{Error, Result}; use crate::mir::eval_hir::EvalHirCtx; use crate::mir::ops; use crate::mir::polymorph::PolymorphAbi; use crate::mir::value::Value; use crate::rfi; use crate::ty; use crate::ty::purity::Purity; use crate::ty::Ty; /// Returns the upper bound on the purity for a Rust fun pub fn rust_fun_purity_upper_bound(rust_fun: &rfi::Fun) -> Purity { let arret_fun_type = rust_fun.arret_fun_type(); if arret_fun_type.ret().is_never() { Purity::Impure } else if arret_fun_type.purity() == &Purity::Pure.into() { Purity::Pure } else { Purity::Impure } } pub fn build_rust_fun_app( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, ret_ty: &ty::Ref, rust_fun: &rfi::Fun, call_purity: Purity, arg_list_value: Value, ) -> Result { use crate::mir::arg_list::build_save_arg_list_to_regs; use crate::mir::ops::*; use crate::mir::value::from_reg::reg_to_value; use arret_runtime::abitype::RetAbiType; let mut arg_abi_types = rust_fun .params() .iter() .map(|param_abi_type| ¶m_abi_type.abi_type); let rest_abi_type = if rust_fun.has_rest() { arg_abi_types.next_back() } else { None }; let arg_regs = build_save_arg_list_to_regs(ehx, b, span, arg_list_value, arg_abi_types, rest_abi_type); let purity_upper_bound = rust_fun_purity_upper_bound(rust_fun); let abi = GenAbi { takes_task: rust_fun.takes_task(), params: rust_fun.params().to_owned().into(), ret: rust_fun.ret().clone(), }; ehx.register_rust_fun_with_jit(rust_fun); let callee = ops::Callee::StaticSymbol(ops::StaticSymbol { symbol: rust_fun.symbol(), impure: purity_upper_bound == Purity::Impure, abi, }); let ret_reg = b.push_reg( span, OpKind::Call, CallOp { callee, impure: call_purity == Purity::Impure, args: arg_regs.into_boxed_slice(), }, ); match rust_fun.ret() { RetAbiType::Void => Ok(Value::List(Box::new([]), None)), RetAbiType::Never => { b.push(span, OpKind::Unreachable); Err(Error::Diverged) } RetAbiType::Inhabited(abi_type) => Ok(reg_to_value(ehx, ret_reg, abi_type, ret_ty)), } } pub fn ops_for_rust_fun( ehx: &mut EvalHirCtx, rust_fun: &rfi::Fun, wanted_abi: PolymorphAbi, ) -> ops::Fun { use crate::mir::arg_list::{build_load_arg_list_value, LoadedArgList}; use crate::mir::optimise::optimise_fun; use crate::mir::ret_value::build_value_ret; let mut b = Builder::new(); let span = rust_fun.span(); let fun_symbol = format!("{}_adapter", rust_fun.symbol()); let LoadedArgList { param_regs, arg_list_value, .. } = build_load_arg_list_value(ehx, &mut b, &wanted_abi, rust_fun.arret_fun_type().params()); let purity_upper_bound = rust_fun_purity_upper_bound(rust_fun); let ret_ty = Ty::Any.into(); let app_result = build_rust_fun_app( ehx, &mut b, span, &ret_ty, rust_fun, purity_upper_bound, arg_list_value, ); build_value_ret(ehx, &mut b, span, app_result, &wanted_abi.ret); optimise_fun(ops::Fun { span, source_name: Some(fun_symbol.into()), abi: wanted_abi.into(), param_regs, ops: b.into_ops(), }) } ================================================ FILE: compiler/mir/specific_abi_type.rs ================================================ use arret_runtime::abitype; use arret_runtime::boxed::TypeTag; use crate::mir::tagset::TypeTagSet; use crate::mir::value::Value; use crate::ty; use crate::ty::Ty; const ANY_BOXED_ABI_TYPE: abitype::BoxedAbiType = abitype::BoxedAbiType::Any; const TOP_RECORD_BOXED_ABI_TYPE: abitype::BoxedAbiType = abitype::BoxedAbiType::UniqueTagged(TypeTag::Record); fn specific_boxed_abi_type_for_type_tag(type_tag: TypeTag) -> &'static abitype::BoxedAbiType { use arret_runtime::abitype::EncodeBoxedAbiType; use arret_runtime::boxed; match type_tag { TypeTag::Pair => &boxed::Pair::::BOXED_ABI_TYPE, TypeTag::Vector => &boxed::Vector::::BOXED_ABI_TYPE, TypeTag::Char => &boxed::Char::BOXED_ABI_TYPE, TypeTag::Int => &boxed::Int::BOXED_ABI_TYPE, TypeTag::Float => &boxed::Float::BOXED_ABI_TYPE, TypeTag::Str => &boxed::Str::BOXED_ABI_TYPE, TypeTag::Sym => &boxed::Sym::BOXED_ABI_TYPE, TypeTag::True => &boxed::True::BOXED_ABI_TYPE, TypeTag::False => &boxed::False::BOXED_ABI_TYPE, TypeTag::Nil => &boxed::Nil::BOXED_ABI_TYPE, TypeTag::FunThunk => &boxed::FunThunk::BOXED_ABI_TYPE, TypeTag::Record => &TOP_RECORD_BOXED_ABI_TYPE, TypeTag::Set => &boxed::Set::::BOXED_ABI_TYPE, TypeTag::Map => &boxed::Map::::BOXED_ABI_TYPE, } } fn specific_abi_type_for_type_tag(type_tag: TypeTag) -> abitype::AbiType { match type_tag { TypeTag::Int => abitype::AbiType::Int, TypeTag::Float => abitype::AbiType::Float, TypeTag::Char => abitype::AbiType::Char, TypeTag::Sym => abitype::AbiType::InternedSym, other_tag => specific_boxed_abi_type_for_type_tag(other_tag) .clone() .into(), } } fn specific_boxed_abi_type_for_type_tags( possible_type_tags: TypeTagSet, ) -> &'static abitype::BoxedAbiType { use arret_runtime::abitype::EncodeBoxedAbiType; use arret_runtime::boxed; if possible_type_tags.len() == 1 { let single_type_tag = possible_type_tags.into_iter().next().unwrap(); specific_boxed_abi_type_for_type_tag(single_type_tag) } else if possible_type_tags == [TypeTag::Pair, TypeTag::Nil].iter().collect() { &boxed::List::::BOXED_ABI_TYPE } else if possible_type_tags == [TypeTag::Float, TypeTag::Int].iter().collect() { &boxed::Num::BOXED_ABI_TYPE } else if possible_type_tags == [TypeTag::True, TypeTag::False].iter().collect() { &boxed::Bool::BOXED_ABI_TYPE } else { &ANY_BOXED_ABI_TYPE } } pub fn specific_boxed_abi_type_for_ty_ref( ty_ref: &ty::Ref, ) -> &'static abitype::BoxedAbiType { specific_boxed_abi_type_for_type_tags(ty_ref.into()) } fn specific_abi_type_for_type_tags(possible_type_tags: TypeTagSet) -> abitype::AbiType { if possible_type_tags.is_subset([TypeTag::True, TypeTag::False].iter().collect()) { abitype::AbiType::Bool } else if possible_type_tags.len() == 1 { let single_type_tag = possible_type_tags.into_iter().next().unwrap(); specific_abi_type_for_type_tag(single_type_tag) } else { specific_boxed_abi_type_for_type_tags(possible_type_tags) .clone() .into() } } /// Returns a specific ABI type to encode the given ty_ref pub fn specific_abi_type_for_ty_ref(ty_ref: &ty::Ref) -> abitype::AbiType { use crate::ty::list_iter::ListIterator; match ty_ref.resolve_to_ty() { Ty::List(list_ty) if !list_ty.is_empty() => { let member_ty_ref = ListIterator::new(list_ty).collect_rest(); let member_boxed_abi_type = specific_boxed_abi_type_for_ty_ref(&member_ty_ref); if list_ty.fixed().is_empty() { abitype::BoxedAbiType::List(member_boxed_abi_type).into() } else { abitype::BoxedAbiType::Pair(member_boxed_abi_type).into() } } Ty::Vectorof(member_ty) => { let member_boxed_abi_type = specific_boxed_abi_type_for_ty_ref(member_ty.as_ref()); abitype::BoxedAbiType::Vector(member_boxed_abi_type).into() } Ty::Vector(member_tys) => { let member_ty_ref = ty::unify::unify_ty_ref_iter(member_tys.iter().cloned()); let member_boxed_abi_type = specific_boxed_abi_type_for_ty_ref(&member_ty_ref); abitype::BoxedAbiType::Vector(member_boxed_abi_type).into() } _ => specific_abi_type_for_type_tags(ty_ref.into()), } } pub fn specific_ret_abi_type_for_ty_ref(ty_ref: &ty::Ref) -> abitype::RetAbiType { if ty_ref == &ty::List::empty().into() { abitype::RetAbiType::Void } else { specific_abi_type_for_type_tags(ty_ref.into()).into() } } fn specific_type_for_values<'v, F, T>( possible_values: impl Iterator, tagset_to_type: F, ) -> T where F: FnOnce(TypeTagSet) -> T, { use crate::mir::value::types::possible_type_tags_for_value; let possible_type_tags = possible_values .map(possible_type_tags_for_value) .fold(TypeTagSet::new(), |acc, type_tags| acc | type_tags); tagset_to_type(possible_type_tags) } /// Returns a specific boxed ABI type to encode the given set of possible values pub fn specific_boxed_abi_type_for_values<'v>( possible_values: impl Iterator, ) -> abitype::BoxedAbiType { specific_type_for_values(possible_values, specific_boxed_abi_type_for_type_tags).clone() } /// Returns a specific ABI type to compactly encode the given set of possible values pub fn specific_abi_type_for_values<'v>( possible_values: impl Iterator, ) -> abitype::AbiType { specific_type_for_values(possible_values, specific_abi_type_for_type_tags) } /// Return a specific ABI type to compactly encode the given value pub fn specific_abi_type_for_value(value: &Value) -> abitype::AbiType { specific_abi_type_for_values(std::iter::once(value)) } #[cfg(test)] mod test { use super::*; use crate::hir::poly_for_str; use arret_runtime::abitype::EncodeBoxedAbiType; use arret_runtime::boxed; fn assert_abi_type_for_str(abi_type: abitype::AbiType, ty_str: &'static str) { let poly = poly_for_str(ty_str); assert_eq!(abi_type, specific_abi_type_for_ty_ref(&poly)); } #[test] fn test_specific_abi_type_for_ty_ref() { assert_abi_type_for_str(abitype::AbiType::Bool, "true"); assert_abi_type_for_str(abitype::AbiType::Bool, "false"); assert_abi_type_for_str(abitype::AbiType::Bool, "Bool"); assert_abi_type_for_str(abitype::AbiType::Float, "Float"); assert_abi_type_for_str(abitype::AbiType::Int, "Int"); assert_abi_type_for_str(boxed::Num::BOXED_ABI_TYPE.into(), "Num"); assert_abi_type_for_str(abitype::AbiType::Char, "Char"); assert_abi_type_for_str(abitype::AbiType::InternedSym, "Sym"); assert_abi_type_for_str(abitype::BoxedAbiType::Any.into(), "(RawU Num Bool)"); assert_abi_type_for_str(boxed::Nil::BOXED_ABI_TYPE.into(), "(List)"); assert_abi_type_for_str( abitype::BoxedAbiType::List(&boxed::Bool::BOXED_ABI_TYPE).into(), "(List & Bool)", ); assert_abi_type_for_str( abitype::BoxedAbiType::Pair(&boxed::Num::BOXED_ABI_TYPE).into(), "(List Float & Int)", ); assert_abi_type_for_str( abitype::BoxedAbiType::Vector(&boxed::Str::BOXED_ABI_TYPE).into(), "(Vectorof Str)", ); assert_abi_type_for_str( abitype::BoxedAbiType::Vector(&boxed::Sym::BOXED_ABI_TYPE).into(), "(Vector 'foo 'bar)", ); } } ================================================ FILE: compiler/mir/tagset.rs ================================================ use std::{fmt, iter, ops}; use crate::ty; use crate::ty::Ty; use arret_runtime::abitype; use arret_runtime::boxed::{TypeTag, ALL_TYPE_TAGS}; const INNER_BITS: u8 = 32; type Inner = u32; #[derive(Clone, Copy, PartialEq, Eq, Default)] pub struct TypeTagSet(Inner); /// Efficient representation of a set of TypeTag impl TypeTagSet { pub fn new() -> TypeTagSet { TypeTagSet(0) } pub fn all() -> TypeTagSet { ALL_TYPE_TAGS.iter().collect() } pub fn is_empty(self) -> bool { self.0 == 0 } pub fn len(self) -> usize { self.0.count_ones() as usize } pub fn insert(&mut self, type_tag: TypeTag) { // The compiler is smart enough to eliminate this assert!((type_tag as u8) < INNER_BITS); self.0 |= 1 << type_tag as u8; } pub fn is_subset(self, superset: Self) -> bool { (self.0 & superset.0) == self.0 } pub fn is_disjoint(self, other: Self) -> bool { self.intersection(other).is_empty() } pub fn intersection(self, other: Self) -> TypeTagSet { TypeTagSet(self.0 & other.0) } pub fn union(self, other: Self) -> TypeTagSet { TypeTagSet(self.0 | other.0) } pub fn contains(self, type_tag: TypeTag) -> bool { let type_tag_set: TypeTagSet = type_tag.into(); type_tag_set.is_subset(self) } /// Returns an iterator over all type tags in the set /// /// These are returned in sorted order. pub fn into_iter(self) -> impl Iterator { ALL_TYPE_TAGS .iter() .cloned() .filter(move |type_tag| self.contains(*type_tag)) } } impl fmt::Debug for TypeTagSet { fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { formatter.write_str("TypeTagSet(")?; formatter.debug_list().entries(self.into_iter()).finish()?; formatter.write_str(")") } } impl From for TypeTagSet { fn from(type_tag: TypeTag) -> TypeTagSet { let mut type_tag_set = TypeTagSet::new(); type_tag_set.insert(type_tag); type_tag_set } } impl<'a, M> From<&'a ty::Ref> for TypeTagSet where M: ty::Pm, { fn from(ty_ref: &'a ty::Ref) -> TypeTagSet { match ty_ref.resolve_to_ty() { Ty::Any => TypeTagSet::all(), Ty::Int => TypeTag::Int.into(), Ty::Float => TypeTag::Float.into(), Ty::Char => TypeTag::Char.into(), Ty::Bool => [TypeTag::True, TypeTag::False].iter().collect(), Ty::Num => [TypeTag::Int, TypeTag::Float].iter().collect(), Ty::LitBool(true) => TypeTag::True.into(), Ty::LitBool(false) => TypeTag::False.into(), Ty::Sym | Ty::LitSym(_) => TypeTag::Sym.into(), Ty::Str => TypeTag::Str.into(), Ty::Fun(_) | Ty::TopFun(_) | Ty::TyPred(_) | Ty::EqPred => TypeTag::FunThunk.into(), Ty::Vector(_) | Ty::Vectorof(_) => TypeTag::Vector.into(), Ty::Set(_) => TypeTag::Set.into(), Ty::Map(_) => TypeTag::Map.into(), Ty::TopRecord | Ty::RecordClass(_) | Ty::Record(_) => TypeTag::Record.into(), Ty::List(list) => { if list.is_empty() { TypeTag::Nil.into() } else if !list.fixed().is_empty() { TypeTag::Pair.into() } else { [TypeTag::Nil, TypeTag::Pair].iter().collect() } } Ty::Union(members) => members .iter() .map(TypeTagSet::from) .fold(TypeTagSet::new(), |a, b| a | b), Ty::Intersect(members) => members .iter() .map(TypeTagSet::from) .fold(TypeTagSet::all(), |a, b| a & b), } } } impl<'a> From<&'a abitype::BoxedAbiType> for TypeTagSet { fn from(boxed_abi_type: &'a abitype::BoxedAbiType) -> TypeTagSet { use arret_runtime::abitype::BoxedAbiType; match boxed_abi_type { BoxedAbiType::Any => TypeTagSet::all(), BoxedAbiType::UniqueTagged(type_tag) => (*type_tag).into(), BoxedAbiType::List(_) => [TypeTag::Pair, TypeTag::Nil].iter().collect(), BoxedAbiType::Pair(_) => TypeTag::Pair.into(), BoxedAbiType::Vector(_) => TypeTag::Vector.into(), BoxedAbiType::Set(_) => TypeTag::Set.into(), BoxedAbiType::Map(_, _) => TypeTag::Map.into(), BoxedAbiType::Union(_, type_tags) => type_tags.iter().collect(), } } } impl<'a> From<&'a abitype::AbiType> for TypeTagSet { fn from(abi_type: &'a abitype::AbiType) -> TypeTagSet { use arret_runtime::abitype::AbiType; match abi_type { AbiType::Int => TypeTag::Int.into(), AbiType::Float => TypeTag::Float.into(), AbiType::Char => TypeTag::Char.into(), AbiType::Bool => [TypeTag::True, TypeTag::False].iter().collect(), AbiType::InternedSym => TypeTag::Sym.into(), AbiType::Boxed(boxed_abi_type) => boxed_abi_type.into(), AbiType::Callback(_) => TypeTag::FunThunk.into(), } } } impl From for TypeTagSet { fn from(abi_type: abitype::AbiType) -> TypeTagSet { (&abi_type).into() } } impl iter::FromIterator for TypeTagSet { fn from_iter>(iter: I) -> TypeTagSet { let mut type_tag_set = TypeTagSet::new(); for type_tag in iter { type_tag_set.insert(type_tag); } type_tag_set } } impl<'a> iter::FromIterator<&'a TypeTag> for TypeTagSet { fn from_iter>(iter: I) -> TypeTagSet { iter.into_iter().cloned().collect() } } impl ops::BitOr for TypeTagSet { type Output = Self; fn bitor(self, rhs: Self) -> Self { self.union(rhs) } } impl ops::BitAnd for TypeTagSet { type Output = Self; fn bitand(self, rhs: Self) -> Self { self.intersection(rhs) } } #[cfg(test)] mod test { use super::*; use crate::source::EMPTY_SPAN; #[test] fn basic_operations() { let empty_set = TypeTagSet::new(); let list_set: TypeTagSet = [TypeTag::Nil, TypeTag::Pair].iter().cloned().collect(); let nil_sym_set: TypeTagSet = [TypeTag::Nil, TypeTag::Sym].iter().cloned().collect(); let pair_set: TypeTagSet = TypeTag::Pair.into(); let nil_set: TypeTagSet = TypeTag::Nil.into(); let full_set = TypeTagSet::all(); assert!(empty_set.is_empty()); assert!(!full_set.is_empty()); assert!(!nil_sym_set.is_empty()); assert!(!pair_set.is_empty()); assert!(!full_set.is_empty()); assert!(empty_set.is_subset(full_set)); assert!(empty_set.is_subset(nil_sym_set)); assert!(empty_set.is_subset(empty_set)); assert!(list_set.is_subset(full_set)); assert!(list_set.is_subset(list_set)); assert!(!list_set.is_subset(pair_set)); assert!(!list_set.is_subset(empty_set)); assert!(empty_set.is_disjoint(full_set)); assert!(nil_sym_set.is_disjoint(pair_set)); assert!(!nil_sym_set.is_disjoint(list_set)); assert_eq!(nil_set, list_set & nil_sym_set); assert_eq!(list_set, pair_set | nil_set); } #[test] fn set_into_iter() { use std::collections::HashSet; let empty_set = TypeTagSet::new(); let list_set: TypeTagSet = [TypeTag::Nil, TypeTag::Pair].iter().collect(); let nil_set: TypeTagSet = TypeTag::Nil.into(); let full_set = TypeTagSet::all(); assert_eq!(None, empty_set.into_iter().next()); let mut nil_set_iter = nil_set.into_iter(); assert_eq!(Some(TypeTag::Nil), nil_set_iter.next()); assert_eq!(None, nil_set_iter.next()); let list_hash_set: HashSet = list_set.into_iter().collect(); assert_eq!(2, list_hash_set.len()); assert!(list_hash_set.contains(&TypeTag::Pair)); assert!(list_hash_set.contains(&TypeTag::Nil)); assert_eq!(ALL_TYPE_TAGS.len(), full_set.into_iter().count()); } #[test] fn from_ty_ref() { let int_ty_ref: ty::Ref = Ty::Int.into(); assert_eq!( TypeTagSet::from(TypeTag::Int), TypeTagSet::from(&int_ty_ref) ); let poly_sym_ref: ty::Ref = ty::TVar::new(EMPTY_SPAN, "tvar1".into(), Ty::Sym.into()).into(); assert_eq!( TypeTagSet::from(TypeTag::Sym), TypeTagSet::from(&poly_sym_ref) ); let num_float_intersect: ty::Ref = Ty::Intersect(Box::new([Ty::Num.into(), Ty::Float.into()])).into(); assert_eq!( TypeTagSet::from(TypeTag::Float), TypeTagSet::from(&num_float_intersect) ); } } ================================================ FILE: compiler/mir/typred.rs ================================================ use arret_syntax::span::Span; use arret_runtime::abitype; use arret_runtime::boxed; use arret_runtime::boxed::prelude::*; use crate::mir::builder::{Builder, BuiltReg}; use crate::mir::eval_hir::EvalHirCtx; use crate::mir::ops::*; use crate::mir::tagset::TypeTagSet; use crate::mir::value::build_reg::value_to_reg; use crate::mir::value::from_reg::reg_to_value; use crate::mir::value::types::TypeHint; use crate::mir::value::Value; use crate::ty; use crate::ty::record; use crate::ty::Ty; /// Returns a set of type tags that would satisfy the type predicate fn type_tags_for_test_ty(test_ty: &ty::pred::TestTy) -> TypeTagSet { use crate::ty::pred::TestTy; match test_ty { TestTy::Str => boxed::TypeTag::Str.into(), TestTy::Sym => boxed::TypeTag::Sym.into(), TestTy::Int => boxed::TypeTag::Int.into(), TestTy::Float => boxed::TypeTag::Float.into(), TestTy::Char => boxed::TypeTag::Char.into(), TestTy::Nil => boxed::TypeTag::Nil.into(), TestTy::Fun => boxed::TypeTag::FunThunk.into(), TestTy::Bool => [boxed::TypeTag::True, boxed::TypeTag::False] .iter() .collect(), TestTy::Num => [boxed::TypeTag::Int, boxed::TypeTag::Float] .iter() .collect(), TestTy::List => [boxed::TypeTag::Pair, boxed::TypeTag::Nil].iter().collect(), TestTy::Vector => boxed::TypeTag::Vector.into(), TestTy::Set => boxed::TypeTag::Set.into(), TestTy::Map => boxed::TypeTag::Map.into(), TestTy::TopRecord => boxed::TypeTag::Record.into(), TestTy::RecordClass(_) => { todo!("record classes"); } } } fn build_load_type_tag( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, value: &Value, possible_type_tags: TypeTagSet, ) -> BuiltReg { let subject_reg = value_to_reg(ehx, b, span, value, &abitype::BoxedAbiType::Any.into()).into(); b.push_reg( span, OpKind::LoadBoxedTypeTag, LoadBoxedTypeTagOp { subject_reg, possible_type_tags, }, ) } fn build_is_type_tag( b: &mut Builder, span: Span, subject_type_tag_reg: BuiltReg, test_tag: boxed::TypeTag, ) -> BuiltReg { let test_tag_reg = b.push_reg(span, OpKind::ConstTypeTag, test_tag); b.push_reg( span, OpKind::TypeTagEqual, BinaryOp { lhs_reg: subject_type_tag_reg.into(), rhs_reg: test_tag_reg.into(), }, ) } fn eval_tagged_ty_pred( ehx: &mut EvalHirCtx, b: &mut Option, span: Span, subject_value: &Value, qualifying_type_tags: TypeTagSet, ) -> Value { use crate::mir::value::types::possible_type_tags_for_value; let possible_type_tags = possible_type_tags_for_value(subject_value); if possible_type_tags.is_subset(qualifying_type_tags) { // Statically true return boxed::TRUE_INSTANCE.as_any_ref().into(); } else if qualifying_type_tags.is_disjoint(possible_type_tags) { // Statically false return boxed::FALSE_INSTANCE.as_any_ref().into(); } let b = if let Some(some_b) = b { some_b } else { panic!( "runtime tagged type predicate without builder: {:?} is in type tag set {:?}", subject_value, qualifying_type_tags ); }; let subject_type_tag_reg = build_load_type_tag(ehx, b, span, subject_value, possible_type_tags); let result_reg = (qualifying_type_tags & possible_type_tags) .into_iter() .fold(None, |tail_result_reg: Option, test_tag| { let is_test_tag = build_is_type_tag(b, span, subject_type_tag_reg, test_tag); if let Some(tail_result_reg) = tail_result_reg { // Logically or this with our tail result let or_result_reg = b.alloc_local(); let cond_op_kind = OpKind::Cond(CondOp { reg_phi: Some(RegPhi { output_reg: or_result_reg.into(), true_result_reg: is_test_tag.into(), false_result_reg: tail_result_reg.into(), }), test_reg: is_test_tag.into(), true_ops: Box::new([]), false_ops: Box::new([]), }); b.push(span, cond_op_kind); Some(or_result_reg) } else { // We are the first result Some(is_test_tag) } }) .unwrap(); reg_to_value( ehx, result_reg, &abitype::AbiType::Bool, &Ty::::Bool.into(), ) } fn eval_record_ty_pred( ehx: &mut EvalHirCtx, b: &mut Option, span: Span, subject_value: &Value, test_cons: &record::ConsId, ) -> Value { use crate::mir::value::types::{possible_type_tags_for_value, type_hint_for_value}; let possible_type_tags = possible_type_tags_for_value(subject_value); if !possible_type_tags.contains(boxed::TypeTag::Record) { // Cannot be a record return boxed::FALSE_INSTANCE.as_any_ref().into(); } let is_definite_record = possible_type_tags == boxed::TypeTag::Record.into(); let definite_matching_cons = if let TypeHint::KnownRecordCons(subject_cons) = type_hint_for_value(ehx, subject_value) { let known_matching_cons = test_cons == &subject_cons; if !known_matching_cons { // This cannot possibly match regardless of if it's record return boxed::FALSE_INSTANCE.as_any_ref().into(); } known_matching_cons } else { false }; if is_definite_record && definite_matching_cons { // This is a record with a matching cons return boxed::TRUE_INSTANCE.as_any_ref().into(); } let b = if let Some(some_b) = b { some_b } else { panic!("runtime record type predicate without builder"); }; let is_record_reg = if is_definite_record { None } else { // If this isn't guaranteed to be a record we need to test its type tag first let subject_type_tag_reg = build_load_type_tag(ehx, b, span, subject_value, possible_type_tags); Some(build_is_type_tag( b, span, subject_type_tag_reg, boxed::TypeTag::Record, )) }; let is_record_class_b_and_reg = if definite_matching_cons { None } else { // Create a builder for testing the record class let mut is_record_class_b = Builder::new(); let record_reg = value_to_reg( ehx, &mut is_record_class_b, span, subject_value, &abitype::BoxedAbiType::UniqueTagged(boxed::TypeTag::Record).into(), ) .into(); // Load our subject record class ID let subject_record_class_id_reg = is_record_class_b.push_reg(span, OpKind::LoadBoxedRecordClassId, record_reg); // Create our test record class ID let test_record_class_id_reg = is_record_class_b.push_reg( span, OpKind::ConstRecordClassId, ehx.evaled_record_class_for_cons(test_cons) .record_struct .clone(), ); // Compare them for equality let is_record_class_reg = is_record_class_b.push_reg( span, OpKind::RecordClassIdEqual, BinaryOp { lhs_reg: subject_record_class_id_reg.into(), rhs_reg: test_record_class_id_reg.into(), }, ); Some((is_record_class_b, is_record_class_reg)) }; let result_reg = match (is_record_reg, is_record_class_b_and_reg) { (Some(is_record_reg), Some((is_record_class_b, is_record_class_reg))) => { // Need to merge the type tag test with the record class test let and_result_reg = b.alloc_local(); let cond_op_kind = OpKind::Cond(CondOp { reg_phi: Some(RegPhi { output_reg: and_result_reg.into(), true_result_reg: is_record_class_reg.into(), false_result_reg: is_record_reg.into(), }), test_reg: is_record_reg.into(), true_ops: is_record_class_b.into_ops(), false_ops: Box::new([]), }); b.push(span, cond_op_kind); and_result_reg } (Some(is_record_reg), None) => is_record_reg, (None, Some((is_record_class_b, is_record_class_reg))) => { b.append(is_record_class_b.into_ops().into_vec()); is_record_class_reg } (None, None) => { // This is unreachable but has a sane answer anyway return boxed::TRUE_INSTANCE.as_any_ref().into(); } }; reg_to_value( ehx, result_reg, &abitype::AbiType::Bool, &Ty::::Bool.into(), ) } pub fn eval_ty_pred( ehx: &mut EvalHirCtx, b: &mut Option, span: Span, subject_value: &Value, test_ty: &ty::pred::TestTy, ) -> Value { match test_ty { ty::pred::TestTy::RecordClass(record_cons) => { eval_record_ty_pred(ehx, b, span, subject_value, record_cons) } tagged_ty => { let qualifying_type_tags = type_tags_for_test_ty(tagged_ty); eval_tagged_ty_pred(ehx, b, span, subject_value, qualifying_type_tags) } } } ================================================ FILE: compiler/mir/value/arret_fun.rs ================================================ use std::rc::Rc; use arret_syntax::datum::DataStr; use crate::context::ModuleId; use crate::hir; use crate::mir::env_values::EnvValues; use crate::ty; use crate::ty::ty_args::TyArgs; new_global_id_type!(ArretFunId); #[derive(Clone, Debug)] struct ArretFunConsts { id: ArretFunId, module_id: Option, source_name: Option, env_ty_args: TyArgs, fun_expr: hir::Fun, } #[derive(Clone, Debug)] pub struct ArretFun { consts: Rc, env_values: EnvValues, } impl ArretFun { pub fn new( module_id: Option, source_name: Option, env_ty_args: TyArgs, env_values: EnvValues, fun_expr: hir::Fun, ) -> Self { Self { consts: Rc::new(ArretFunConsts { id: ArretFunId::alloc(), module_id, source_name, env_ty_args, fun_expr, }), env_values, } } pub fn id(&self) -> ArretFunId { self.consts.id } /// Returns the optional module ID this function occurs in /// /// This is used to look up local variables from other definitions in the same module. pub fn module_id(&self) -> Option { self.consts.module_id } pub fn source_name(&self) -> &Option { &self.consts.source_name } pub fn env_ty_args(&self) -> &TyArgs { &self.consts.env_ty_args } pub fn env_values(&self) -> &EnvValues { &self.env_values } pub fn env_values_mut(&mut self) -> &mut EnvValues { &mut self.env_values } pub fn fun_expr(&self) -> &hir::Fun { &self.consts.fun_expr } pub fn with_env_values(&self, env_values: EnvValues) -> ArretFun { ArretFun { consts: self.consts.clone(), env_values, } } /// Indicates if this `ArretFun` is used in multiple places /// /// This is a heuristic; if a `Fun` is bound to a variable this will return true regardless /// of the number of usages. pub fn has_multiple_usages(&self) -> bool { // This is a hack but has the benefit of not requiring a separate analysis pass Rc::strong_count(&self.consts) > 1 } } ================================================ FILE: compiler/mir/value/build_reg.rs ================================================ use arret_syntax::span::Span; use arret_runtime::abitype; use arret_runtime::boxed; use arret_runtime::boxed::refs::Gc; use crate::mir::builder::Builder; use crate::mir::builder::BuiltReg; use crate::mir::eval_hir::EvalHirCtx; use crate::mir::value; use crate::mir::value::Value; use crate::rfi; use crate::ty::record; enum RestLen { Known(usize), Loaded(BuiltReg), } fn const_to_reg( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, any_ref: Gc, abi_type: &abitype::AbiType, ) -> BuiltReg { use crate::mir::ops::*; use arret_runtime::boxed::prelude::*; let subtype = any_ref.as_subtype(); match (subtype, abi_type) { (boxed::AnySubtype::Int(int_ref), abitype::AbiType::Int) => { b.push_reg(span, OpKind::ConstInt64, int_ref.value()) } (boxed::AnySubtype::Float(float_ref), abitype::AbiType::Float) => { b.push_reg(span, OpKind::ConstFloat, float_ref.value()) } (boxed::AnySubtype::Char(char_ref), abitype::AbiType::Char) => { b.push_reg(span, OpKind::ConstChar, char_ref.value()) } (boxed::AnySubtype::True(_), abitype::AbiType::Bool) => { b.push_reg(span, OpKind::ConstBool, true) } (boxed::AnySubtype::False(_), abitype::AbiType::Bool) => { b.push_reg(span, OpKind::ConstBool, false) } (boxed::AnySubtype::Sym(sym_ref), abitype::AbiType::InternedSym) => { b.push_reg(span, OpKind::ConstInternedSym, sym_ref.name(ehx).into()) } (boxed::AnySubtype::Int(int_ref), abitype::AbiType::Boxed(to_abi_type)) => { let from_abi_type = boxed::TypeTag::Int.into(); let from_reg = b.push_reg(span, OpKind::ConstBoxedInt, int_ref.value()); b.cast_boxed_cond(span, &from_abi_type, from_reg, to_abi_type.clone()) } (boxed::AnySubtype::Float(float_ref), abitype::AbiType::Boxed(to_abi_type)) => { let from_abi_type = boxed::TypeTag::Float.into(); let from_reg = b.push_reg(span, OpKind::ConstBoxedFloat, float_ref.value()); b.cast_boxed_cond(span, &from_abi_type, from_reg, to_abi_type.clone()) } (boxed::AnySubtype::Char(char_ref), abitype::AbiType::Boxed(to_abi_type)) => { let from_abi_type = boxed::TypeTag::Char.into(); let from_reg = b.push_reg(span, OpKind::ConstBoxedChar, char_ref.value()); b.cast_boxed_cond(span, &from_abi_type, from_reg, to_abi_type.clone()) } (boxed::AnySubtype::Str(str_ref), abitype::AbiType::Boxed(to_abi_type)) => { let from_abi_type = boxed::TypeTag::Str.into(); let from_reg = b.push_reg(span, OpKind::ConstBoxedStr, str_ref.as_str().into()); b.cast_boxed_cond(span, &from_abi_type, from_reg, to_abi_type.clone()) } (boxed::AnySubtype::Sym(sym_ref), abitype::AbiType::Boxed(to_abi_type)) => { let from_abi_type = boxed::TypeTag::Sym.into(); let from_reg = b.push_reg(span, OpKind::ConstBoxedSym, sym_ref.name(ehx).into()); b.cast_boxed_cond(span, &from_abi_type, from_reg, to_abi_type.clone()) } (boxed::AnySubtype::False(_), abitype::AbiType::Boxed(to_abi_type)) => { let from_abi_type = boxed::TypeTag::False.into(); let from_reg = b.push_reg(span, OpKind::ConstBoxedFalse, ()); b.cast_boxed_cond(span, &from_abi_type, from_reg, to_abi_type.clone()) } (boxed::AnySubtype::True(_), abitype::AbiType::Boxed(to_abi_type)) => { let from_abi_type = boxed::TypeTag::True.into(); let from_reg = b.push_reg(span, OpKind::ConstBoxedTrue, ()); b.cast_boxed_cond(span, &from_abi_type, from_reg, to_abi_type.clone()) } (boxed::AnySubtype::Nil(_), abitype::AbiType::Boxed(to_abi_type)) => { let from_abi_type = boxed::TypeTag::Nil.into(); let from_reg = b.push_reg(span, OpKind::ConstBoxedNil, ()); b.cast_boxed_cond(span, &from_abi_type, from_reg, to_abi_type.clone()) } (boxed::AnySubtype::Pair(pair_ref), abitype::AbiType::Boxed(to_abi_type)) => { let head_reg = const_to_reg( ehx, b, span, pair_ref.head(), &abitype::BoxedAbiType::Any.into(), ); let rest_reg = const_to_reg( ehx, b, span, pair_ref.rest().as_any_ref(), &abitype::TOP_LIST_BOXED_ABI_TYPE.into(), ); let list_len_reg = b.push_reg(span, OpKind::ConstInt64, pair_ref.len() as i64); let from_reg = b.push_reg( span, OpKind::ConstBoxedPair, BoxPairOp { head_reg: head_reg.into(), rest_reg: rest_reg.into(), list_len_reg: list_len_reg.into(), }, ); b.cast_boxed_cond( span, &boxed::TypeTag::Pair.into(), from_reg, to_abi_type.clone(), ) } (boxed::AnySubtype::Record(record_ref), abitype::AbiType::Boxed(to_abi_type)) => { let record_cons = ehx .cons_for_jit_record_class_id(record_ref.class_id()) .expect("unable to lookup record cons for JIT record class ID"); let record_struct = ehx .record_class_for_cons .get(record_cons) .expect("unable to lookup record class for cons") .record_struct .clone(); let field_values: Vec<_> = record_ref.field_values(ehx.as_heap()).collect(); let field_regs = field_values .into_iter() .zip(record_struct.field_abi_types.iter()) .map(|(field_value, abi_type)| { let built_reg = record_field_value_to_const_reg(ehx, b, span, &field_value, abi_type); built_reg.into() }) .collect(); let box_record_op = BoxRecordOp { record_struct, field_regs, }; let from_abi_type = boxed::TypeTag::Record.into(); let from_reg = b.push_reg(span, OpKind::ConstBoxedRecord, box_record_op); b.cast_boxed_cond(span, &from_abi_type, from_reg, to_abi_type.clone()) } (boxed::AnySubtype::Vector(vector_ref), abitype::AbiType::Boxed(to_abi_type)) => { let element_regs = vector_ref .iter() .map(|element_ref| { const_to_reg( ehx, b, span, element_ref, &abitype::BoxedAbiType::Any.into(), ) .into() }) .collect(); let from_reg = b.push_reg(span, OpKind::ConstBoxedVector, element_regs); b.cast_boxed_cond( span, &boxed::TypeTag::Vector.into(), from_reg, to_abi_type.clone(), ) } (boxed::AnySubtype::Set(set_ref), abitype::AbiType::Boxed(to_abi_type)) => { let element_regs = set_ref .iter() .map(|element_ref| { const_to_reg( ehx, b, span, element_ref, &abitype::BoxedAbiType::Any.into(), ) .into() }) .collect(); let from_reg = b.push_reg(span, OpKind::ConstBoxedSet, element_regs); b.cast_boxed_cond( span, &boxed::TypeTag::Set.into(), from_reg, to_abi_type.clone(), ) } (boxed::AnySubtype::Map(map_ref), abitype::AbiType::Boxed(to_abi_type)) => { let entry_regs = map_ref .iter() .map(|(key_ref, value_ref)| { let key_reg = const_to_reg(ehx, b, span, key_ref, &abitype::BoxedAbiType::Any.into()) .into(); let value_reg = const_to_reg(ehx, b, span, value_ref, &abitype::BoxedAbiType::Any.into()) .into(); (key_reg, value_reg) }) .collect(); let from_reg = b.push_reg(span, OpKind::ConstBoxedMap, entry_regs); b.cast_boxed_cond( span, &boxed::TypeTag::Map.into(), from_reg, to_abi_type.clone(), ) } (boxed::AnySubtype::FunThunk(fun_thunk_ref), abi_type) => { let fun_value = ehx .jit_boxed_to_fun_value(unsafe { Gc::new(fun_thunk_ref as *const _) }) .expect("attempt to convert unknown fun thunk to reg") .clone(); value_to_reg(ehx, b, span, &fun_value, abi_type) } (subtype, abi_type) => unimplemented!( "Unimplemented const {:?} to reg {:?} conversion", subtype, abi_type ), } } fn list_to_reg( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, fixed: &[Value], rest: Option<&Value>, boxed_abi_type: &abitype::BoxedAbiType, ) -> BuiltReg { use crate::mir::ops::*; use crate::mir::value::list::{list_value_len, ListValueLen}; use arret_runtime::abitype::TOP_LIST_BOXED_ABI_TYPE; let tail_reg = if let Some(rest) = rest { value_to_reg( ehx, b, span, rest, &abitype::AbiType::Boxed(TOP_LIST_BOXED_ABI_TYPE), ) } else { let nil_reg = b.push_reg(span, OpKind::ConstBoxedNil, ()); b.cast_boxed(span, nil_reg, TOP_LIST_BOXED_ABI_TYPE) }; let list_reg = if fixed.is_empty() { tail_reg } else { let rest_len = match rest { Some(rest) => match list_value_len(rest) { ListValueLen::Exact(known) => RestLen::Known(known), ListValueLen::Min(min_list_len) => { let len_reg = b.push_reg( span, OpKind::LoadBoxedListLen, LoadBoxedListLenOp { list_reg: tail_reg.into(), min_list_len, }, ); RestLen::Loaded(len_reg) } }, None => RestLen::Known(0), }; fixed .iter() .rev() .enumerate() .fold(tail_reg, |tail_reg, (i, fixed)| { let list_len_reg = match rest_len { RestLen::Known(known) => { b.push_reg(span, OpKind::ConstInt64, (known + i + 1) as i64) } RestLen::Loaded(rest_len_reg) => { let index_reg = b.push_reg(span, OpKind::ConstInt64, (i + 1) as i64); b.push_reg( span, OpKind::Int64Add, BinaryOp { lhs_reg: rest_len_reg.into(), rhs_reg: index_reg.into(), }, ) } }; let fixed_reg = value_to_reg(ehx, b, span, fixed, &abitype::BoxedAbiType::Any.into()); let box_pair_op = BoxPairOp { head_reg: fixed_reg.into(), rest_reg: tail_reg.into(), list_len_reg: list_len_reg.into(), }; let pair_head_reg = if fixed_reg.is_const() && tail_reg.is_const() { b.push_reg(span, OpKind::ConstBoxedPair, box_pair_op) } else { b.push_reg(span, OpKind::AllocBoxedPair, box_pair_op) }; b.cast_boxed(span, pair_head_reg, TOP_LIST_BOXED_ABI_TYPE.clone()) }) }; b.cast_boxed_cond( span, &TOP_LIST_BOXED_ABI_TYPE, list_reg, boxed_abi_type.clone(), ) } fn record_to_reg( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, record_cons: &record::ConsId, fields: &[Value], boxed_abi_type: &abitype::BoxedAbiType, ) -> BuiltReg { use crate::mir::ops::*; let record_struct = ehx .evaled_record_class_for_cons(record_cons) .record_struct .clone(); let mut has_non_const_fields = false; let field_regs = fields .iter() .zip(record_struct.field_abi_types.iter()) .map(|(field, abi_type)| { let built_reg = value_to_reg(ehx, b, span, field, abi_type); has_non_const_fields = has_non_const_fields || !built_reg.is_const(); built_reg.into() }) .collect(); let box_record_op = BoxRecordOp { record_struct, field_regs, }; let record_reg = if has_non_const_fields { b.push_reg(span, OpKind::AllocBoxedRecord, box_record_op) } else { b.push_reg(span, OpKind::ConstBoxedRecord, box_record_op) }; b.cast_boxed(span, record_reg, boxed_abi_type.clone()) } fn record_field_value_to_const_reg( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, field_value: &boxed::FieldValue, abi_type: &abitype::AbiType, ) -> BuiltReg { use crate::mir::ops::*; use arret_runtime::boxed::prelude::*; use boxed::FieldValue; // This depends on the fact we're encoding the exact record layout we're reading from. We only // need `abi_type` to find the specific pointer type for boxed values. match field_value { FieldValue::Int(v) => b.push_reg(span, OpKind::ConstInt64, *v), FieldValue::Float(v) => b.push_reg(span, OpKind::ConstFloat, *v), FieldValue::Bool(v) => b.push_reg(span, OpKind::ConstBool, *v), FieldValue::Char(v) => b.push_reg(span, OpKind::ConstChar, *v), FieldValue::InternedSym(interned) => { let name = ehx.as_heap().type_info().interner().unintern(interned); b.push_reg(span, OpKind::ConstInternedSym, name.into()) } FieldValue::Boxed(any_ref) => const_to_reg(ehx, b, span, *any_ref, abi_type), } } pub fn reg_to_boxed_reg( b: &mut Builder, span: Span, reg_value: &value::RegValue, to_boxed: &abitype::BoxedAbiType, ) -> BuiltReg { use crate::mir::ops::*; use arret_runtime::boxed::TypeTag; match ®_value.abi_type { abitype::AbiType::Boxed(from_boxed) => { b.cast_boxed_cond(span, from_boxed, reg_value.reg, to_boxed.clone()) } abitype::AbiType::Int => { let boxed_int_reg = b.push_reg(span, OpKind::AllocBoxedInt, reg_value.reg.into()); b.cast_boxed_cond(span, &TypeTag::Int.into(), boxed_int_reg, to_boxed.clone()) } abitype::AbiType::Char => { let boxed_char_reg = b.push_reg(span, OpKind::AllocBoxedChar, reg_value.reg.into()); b.cast_boxed_cond( span, &TypeTag::Char.into(), boxed_char_reg, to_boxed.clone(), ) } abitype::AbiType::InternedSym => { let boxed_sym_reg = b.push_reg(span, OpKind::AllocBoxedSym, reg_value.reg.into()); b.cast_boxed_cond(span, &TypeTag::Sym.into(), boxed_sym_reg, to_boxed.clone()) } abitype::AbiType::Float => { let boxed_float_reg = b.push_reg(span, OpKind::AllocBoxedFloat, reg_value.reg.into()); b.cast_boxed_cond( span, &TypeTag::Float.into(), boxed_float_reg, to_boxed.clone(), ) } abitype::AbiType::Bool => b.push_cond( span, reg_value.reg.into(), |b| { let const_true_reg = b.push_reg(span, OpKind::ConstBoxedTrue, ()); b.cast_boxed_cond( span, &TypeTag::True.into(), const_true_reg, to_boxed.clone(), ) .into() }, |b| { let const_false_reg = b.push_reg(span, OpKind::ConstBoxedFalse, ()); b.cast_boxed_cond( span, &TypeTag::False.into(), const_false_reg, to_boxed.clone(), ) .into() }, ), // Callbacks are ephemeral unboxed types. They cannot be returned from functions and should // never need to be boxed. abitype::AbiType::Callback(_) => { unimplemented!("callback to boxed reg {:?} conversion", to_boxed) } } } fn boxed_to_bool( b: &mut Builder, span: Span, from_boxed: &abitype::BoxedAbiType, reg_value: &value::RegValue, ) -> BuiltReg { use crate::mir::ops::*; use arret_runtime::boxed::TypeTag; let possible_type_tags = reg_value.possible_type_tags & [TypeTag::True, TypeTag::False].iter().collect(); if possible_type_tags == TypeTag::True.into() { b.push_reg(span, OpKind::ConstBool, true) } else if possible_type_tags == TypeTag::False.into() { b.push_reg(span, OpKind::ConstBool, false) } else { let boxed_any_reg = b.cast_boxed_cond(span, from_boxed, reg_value.reg, abitype::BoxedAbiType::Any); let boxed_type_tag_reg = b.push_reg( span, OpKind::LoadBoxedTypeTag, LoadBoxedTypeTagOp { subject_reg: boxed_any_reg.into(), possible_type_tags, }, ); let true_type_tag_reg = b.push_reg(span, OpKind::ConstTypeTag, TypeTag::True); b.push_reg( span, OpKind::TypeTagEqual, BinaryOp { lhs_reg: boxed_type_tag_reg.into(), rhs_reg: true_type_tag_reg.into(), }, ) } } fn reg_to_reg( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, reg_value: &value::RegValue, abi_type: &abitype::AbiType, ) -> BuiltReg { use crate::mir::ops::*; use arret_runtime::boxed::TypeTag; match (®_value.abi_type, abi_type) { (from, to) if from == to => reg_value.reg, (_, abitype::AbiType::Boxed(to_boxed)) => reg_to_boxed_reg(b, span, reg_value, to_boxed), (abitype::AbiType::Boxed(from_boxed), abitype::AbiType::Int) => { let boxed_int_reg = b.cast_boxed_cond(span, from_boxed, reg_value.reg, TypeTag::Int.into()); b.push_reg(span, OpKind::LoadBoxedIntValue, boxed_int_reg.into()) } (abitype::AbiType::Boxed(from_boxed), abitype::AbiType::Float) => { let boxed_float_reg = b.cast_boxed_cond(span, from_boxed, reg_value.reg, TypeTag::Float.into()); b.push_reg(span, OpKind::LoadBoxedFloatValue, boxed_float_reg.into()) } (abitype::AbiType::Boxed(from_boxed), abitype::AbiType::Char) => { let boxed_char_reg = b.cast_boxed_cond(span, from_boxed, reg_value.reg, TypeTag::Char.into()); b.push_reg(span, OpKind::LoadBoxedCharValue, boxed_char_reg.into()) } (abitype::AbiType::Boxed(from_boxed), abitype::AbiType::Bool) => { boxed_to_bool(b, span, from_boxed, reg_value) } (abitype::AbiType::Boxed(from_boxed), abitype::AbiType::InternedSym) => { let boxed_sym_reg = b.cast_boxed_cond(span, from_boxed, reg_value.reg, TypeTag::Sym.into()); b.push_reg(span, OpKind::LoadBoxedSymInterned, boxed_sym_reg.into()) } (abitype::AbiType::Boxed(from_boxed), abitype::AbiType::Callback(entry_point_abi)) => { ehx.thunk_reg_to_callback_reg(b, span, from_boxed, reg_value.reg, entry_point_abi) } (from, to) => unimplemented!("reg {:?} to reg {:?} conversion", from, to), } } fn thunk_reg_to_reg( b: &mut Builder, span: Span, boxed_thunk_reg: BuiltReg, boxed_abi_type: &abitype::BoxedAbiType, ) -> BuiltReg { use arret_runtime::boxed::TypeTag; b.cast_boxed_cond( span, &TypeTag::FunThunk.into(), boxed_thunk_reg, boxed_abi_type.clone(), ) } fn arret_fun_to_reg( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, arret_fun: &value::ArretFun, abi_type: &abitype::AbiType, ) -> BuiltReg { match abi_type { abitype::AbiType::Boxed(boxed_abi_type) => { let thunk_reg = ehx.arret_fun_to_thunk_reg(b, span, arret_fun); thunk_reg_to_reg(b, span, thunk_reg, boxed_abi_type) } abitype::AbiType::Callback(entry_point_abi) => { ehx.arret_fun_to_callback_reg(b, span, arret_fun, entry_point_abi) } other => { panic!("Attempt to convert Arret fun to {:?}", other); } } } fn rust_fun_to_reg( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, rust_fun: &rfi::Fun, abi_type: &abitype::AbiType, ) -> BuiltReg { match abi_type { abitype::AbiType::Boxed(boxed_abi_type) => { let thunk_reg = ehx.rust_fun_to_thunk_reg(b, span, rust_fun); thunk_reg_to_reg(b, span, thunk_reg, boxed_abi_type) } abitype::AbiType::Callback(entry_point_abi) => { ehx.rust_fun_to_callback_reg(b, span, rust_fun, entry_point_abi) } other => { panic!("Attempt to convert Rust fun to {:?}", other); } } } pub fn value_to_reg( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, value: &Value, abi_type: &abitype::AbiType, ) -> BuiltReg { match value { Value::Reg(reg_value) => reg_to_reg(ehx, b, span, reg_value, abi_type), Value::Const(any_ref) => const_to_reg(ehx, b, span, *any_ref, abi_type), Value::List(fixed, rest) => { if let abitype::AbiType::Boxed(boxed_abi_type) = abi_type { list_to_reg( ehx, b, span, fixed, rest.as_ref().map(AsRef::as_ref), boxed_abi_type, ) } else { panic!("Attempt to construct non-boxed list"); } } Value::Record(record_cons, fields) => { if let abitype::AbiType::Boxed(boxed_abi_type) = abi_type { record_to_reg(ehx, b, span, record_cons, fields, boxed_abi_type) } else { panic!("Attempt to construct non-boxed record"); } } Value::ArretFun(ref arret_fun) => arret_fun_to_reg(ehx, b, span, arret_fun, abi_type), Value::TyPred(test_ty) => { let ty_pred_arret_fun = ehx .synthetic_funs() .ty_pred_arret_fun(test_ty.clone()) .clone(); arret_fun_to_reg(ehx, b, span, &ty_pred_arret_fun, abi_type) } Value::EqPred => { let eq_pred_arret_fun = ehx.synthetic_funs().eq_pred_arret_fun().clone(); arret_fun_to_reg(ehx, b, span, &eq_pred_arret_fun, abi_type) } Value::RecordCons(cons) => { let record_cons_arret_fun = ehx.synthetic_funs().record_cons_arret_fun(cons).clone(); arret_fun_to_reg(ehx, b, span, &record_cons_arret_fun, abi_type) } Value::FieldAccessor(cons, field_index) => { let field_accessor_arret_fun = ehx .synthetic_funs() .field_accessor_arret_fun(cons, *field_index) .clone(); arret_fun_to_reg(ehx, b, span, &field_accessor_arret_fun, abi_type) } Value::RustFun(ref rust_fun) => rust_fun_to_reg(ehx, b, span, rust_fun, abi_type), } } ================================================ FILE: compiler/mir/value/from_reg.rs ================================================ use arret_runtime::abitype; use arret_runtime::boxed; use arret_runtime::boxed::prelude::*; use crate::mir::builder::BuiltReg; use crate::mir::tagset::TypeTagSet; use crate::mir::value; use crate::mir::value::types::TypeHint; use crate::mir::value::Value; use crate::ty; use crate::ty::Ty; fn reg_to_value_with_constraints( heap: &mut impl boxed::AsHeap, reg: BuiltReg, abi_type: &abitype::AbiType, arret_ty: &ty::Ref, constrain_possible_type_tags: TypeTagSet, fallback_type_hint: &TypeHint, ) -> Value where M: ty::Pm, { use crate::mir::value::types::type_hint_for_ty_ref; let type_hint_from_ty_ref = type_hint_for_ty_ref(arret_ty); let type_hint = if type_hint_from_ty_ref == TypeHint::None { // Hopefully our fallback type hint has more information fallback_type_hint.clone() } else { type_hint_from_ty_ref }; if let Ty::LitSym(value) = arret_ty.resolve_to_ty() { // Unlike other literal types we can't encode a literal sym in a `RegValue` without losing // information. This means we will need to rebuild the sym every time it's referenced but // this should be a net win. boxed::Sym::new(heap, value.as_ref()).as_any_ref().into() } else { value::RegValue { reg, abi_type: abi_type.clone(), possible_type_tags: TypeTagSet::from(abi_type) & TypeTagSet::from(arret_ty) & constrain_possible_type_tags, type_hint, } .into() } } /// Creates a Value from a register of the given ABI and Arret type /// /// Supported literal types will be converted to `Value::Const`. Everything else will become a /// `Value::Reg`. pub fn reg_to_value( heap: &mut impl boxed::AsHeap, reg: BuiltReg, abi_type: &abitype::AbiType, arret_ty: &ty::Ref, ) -> Value where M: ty::Pm, { reg_to_value_with_constraints( heap, reg, abi_type, arret_ty, TypeTagSet::all(), &TypeHint::None, ) } pub fn refine_reg_value_with_arret_ty( heap: &mut impl boxed::AsHeap, reg_value: &value::RegValue, arret_ty: &ty::Ref, ) -> Value where M: ty::Pm, { reg_to_value_with_constraints( heap, reg_value.reg, ®_value.abi_type, arret_ty, reg_value.possible_type_tags, ®_value.type_hint, ) } ================================================ FILE: compiler/mir/value/list.rs ================================================ use std::vec; use arret_syntax::span::Span; use crate::mir::builder::{Builder, TryToBuilder}; use crate::mir::value; use crate::mir::value::types::TypeHint; use crate::mir::value::Value; #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum ListValueLen { Exact(usize), Min(usize), } impl ListValueLen { pub fn lower_bound(&self) -> usize { match self { ListValueLen::Exact(len) => *len, ListValueLen::Min(len) => *len, } } } impl std::ops::Add for ListValueLen { type Output = ListValueLen; fn add(self, other: ListValueLen) -> ListValueLen { match (self, other) { (ListValueLen::Exact(self_len), ListValueLen::Exact(other_len)) => { ListValueLen::Exact(self_len + other_len) } _ => ListValueLen::Min(self.lower_bound() + other.lower_bound()), } } } pub fn list_value_len(value: &Value) -> ListValueLen { use arret_runtime::boxed; match value { Value::List(fixed, rest) => { let fixed_len = ListValueLen::Exact(fixed.len()); match rest { Some(rest) => fixed_len + list_value_len(rest), None => fixed_len, } } Value::Const(any_ref) => match any_ref.downcast_ref::>() { Some(list_ref) => ListValueLen::Exact(list_ref.len()), None => ListValueLen::Min(0), }, Value::Reg(reg_value) => { if !reg_value.possible_type_tags.contains(boxed::TypeTag::Pair) { // Must be empty ListValueLen::Exact(0) } else if !reg_value.possible_type_tags.contains(boxed::TypeTag::Nil) { if let TypeHint::KnownListLen(len) = reg_value.type_hint { ListValueLen::Exact(len) } else { // Cannot be empty ListValueLen::Min(1) } } else { ListValueLen::Min(0) } } _ => ListValueLen::Min(0), } } pub struct UnsizedListIterator { fixed: vec::IntoIter, rest: Option, } impl UnsizedListIterator { pub fn new(value: Value) -> Self { Self { fixed: Vec::new().into_iter(), rest: Some(value), } } /// Returns the next element in the list /// /// It is undefined if the list has no more elements. This function may panic, generate /// nonsense code, generate code that crashes at runtime, etc. #[must_use] pub fn next_unchecked(&mut self, b: &mut impl TryToBuilder, span: Span) -> Value { if let Some(next) = self.fixed.next() { return next; } let rest_value = self .rest .take() .expect("ran off the end of list with no rest argument"); match rest_value { Value::List(fixed, rest) => { // Become our tail self.fixed = fixed.into_vec().into_iter(); self.rest = rest.map(|rest| *rest); self.next_unchecked(b, span) } Value::Const(any_ref) => { use arret_runtime::boxed; let const_pair = any_ref .downcast_ref::>() .expect("tried to pop off non-pair constant"); let tail = const_pair.rest(); self.rest = if tail.is_empty() { None } else { Some(tail.into()) }; const_pair.head().into() } Value::Reg(reg_value) => { let b = b .try_to_builder() .expect("popping rest argument without builder"); self.build_rest_next(b, span, ®_value) } other => unimplemented!("popping rest argument off value {:?}", other), } } /// Returns a Value containing the rest of the iterator #[must_use] pub fn into_rest(self) -> Value { Value::List(self.fixed.collect(), self.rest.map(Box::new)) } fn build_rest_next( &mut self, b: &mut Builder, span: Span, current_rest_value: &value::RegValue, ) -> Value { use crate::mir::ops::*; use crate::mir::value::build_reg::reg_to_boxed_reg; use arret_runtime::abitype; let needed_pair_type = abitype::BoxedAbiType::Pair(&abitype::BoxedAbiType::Any); let current_rest_reg = reg_to_boxed_reg(b, span, current_rest_value, &needed_pair_type); let head_reg = b.push_reg(span, OpKind::LoadBoxedPairHead, current_rest_reg.into()); let rest_reg = b.push_reg(span, OpKind::LoadBoxedPairRest, current_rest_reg.into()); self.rest = Some(value::RegValue::new(rest_reg, abitype::TOP_LIST_BOXED_ABI_TYPE.into()).into()); value::RegValue::new(head_reg, abitype::BoxedAbiType::Any.into()).into() } } pub struct SizedListIterator { size: usize, unsized_list_iterator: UnsizedListIterator, } impl SizedListIterator { pub fn try_new(value: &Value) -> Option { match list_value_len(value) { ListValueLen::Exact(size) => Some(Self { size, unsized_list_iterator: UnsizedListIterator::new(value.clone()), }), _ => None, } } } impl SizedListIterator { pub fn next(&mut self, b: &mut impl TryToBuilder, span: Span) -> Option { if self.size == 0 { return None; } self.size -= 1; Some(self.unsized_list_iterator.next_unchecked(b, span)) } #[must_use] pub fn into_rest(self) -> Value { self.unsized_list_iterator.into_rest() } pub fn len(&self) -> usize { self.size } pub fn is_empty(&self) -> bool { self.size == 0 } } #[cfg(test)] mod test { use super::*; use arret_runtime::boxed; use arret_runtime::boxed::prelude::*; use crate::source::EMPTY_SPAN; #[test] fn list_len() { use crate::mir::builder::BuiltReg; use crate::mir::ops::RegId; use arret_runtime::abitype; let mut heap = boxed::Heap::empty(); let elements = &[1, 2, 3]; // Start with three fixed values let fixed_values: Box<[Value]> = elements .iter() .map(|element| boxed::Int::new(&mut heap, *element).into()) .collect(); // Have a constant list tail let boxed_list_tail = boxed::List::from_values(&mut heap, elements.iter().cloned(), boxed::Int::new); let const_list_tail = Value::List(Box::new([]), Some(Box::new(boxed_list_tail.into()))); // Add the fixed values (3 elements) to the constant tail (3 elements) let list_value = Value::List(fixed_values.clone(), Some(Box::new(const_list_tail))); // The length should be 6 assert_eq!(ListValueLen::Exact(6), list_value_len(&list_value)); // Try 3 fixed values with a completely unknown tail let list_with_unknown_tail = Value::List( fixed_values.clone(), Some(Box::new( value::RegValue::new( BuiltReg::Local(RegId::alloc()), abitype::BoxedAbiType::Any.into(), ) .into(), )), ); // Length should be at least 3 assert_eq!( ListValueLen::Min(3), list_value_len(&list_with_unknown_tail) ); // Try 3 fixed values with a pair tail let list_with_pair_tail = Value::List( fixed_values.clone(), Some(Box::new( value::RegValue::new(BuiltReg::Local(RegId::alloc()), boxed::TypeTag::Pair.into()) .into(), )), ); // Length should be at least 4 assert_eq!(ListValueLen::Min(4), list_value_len(&list_with_pair_tail)); // Try 3 fixed values with a nil tail let list_with_nil_tail = Value::List( fixed_values, Some(Box::new( value::RegValue::new(BuiltReg::Local(RegId::alloc()), boxed::TypeTag::Nil.into()) .into(), )), ); // Length should be at exactly 3 assert_eq!(ListValueLen::Exact(3), list_value_len(&list_with_nil_tail)); } #[test] fn const_unsized_list_iter() { let mut heap = boxed::Heap::empty(); let elements = &[1, 2, 3]; let boxed_list = boxed::List::from_values(&mut heap, elements.iter().cloned(), boxed::Int::new); let mut iter = UnsizedListIterator { fixed: Vec::new().into_iter(), rest: Some(boxed_list.into()), }; for expected in elements { let next_value = iter.next_unchecked(&mut None, EMPTY_SPAN); if let Value::Const(next_ref) = next_value { let expected_ref = boxed::Int::new(&mut heap, *expected).as_any_ref(); assert!(expected_ref.eq_in_heap(&heap, &next_ref)); } else { panic!("expected const value, got {:?}", next_value); } } } #[test] fn fixed_list_value_unsized_iter() { let mut heap = boxed::Heap::empty(); let elements = &[1, 2, 3]; let element_values: Vec = elements .iter() .map(|element| boxed::Int::new(&mut heap, *element).into()) .collect(); let mut iter = UnsizedListIterator { fixed: element_values.into_iter(), rest: None, }; for expected in elements { let next_value = iter.next_unchecked(&mut None, EMPTY_SPAN); if let Value::Const(next_ref) = next_value { let expected_ref = boxed::Int::new(&mut heap, *expected).as_any_ref(); assert!(expected_ref.eq_in_heap(&heap, &next_ref)); } else { panic!("expected const value, got {:?}", next_value); } } } } ================================================ FILE: compiler/mir/value/mod.rs ================================================ mod arret_fun; pub mod build_reg; pub mod from_reg; pub mod list; pub mod plan_phi; pub mod synthetic_fun; pub mod to_const; pub mod types; use std::rc::Rc; use std::sync::Arc; use arret_runtime::abitype; use arret_runtime::boxed; use arret_runtime::boxed::refs::Gc; use crate::mir::builder::BuiltReg; use crate::mir::tagset::TypeTagSet; use crate::rfi; use crate::ty; use crate::ty::record; pub use arret_fun::{ArretFun, ArretFunId}; #[derive(Clone, Debug)] pub struct RegValue { pub reg: BuiltReg, pub abi_type: abitype::AbiType, pub possible_type_tags: TypeTagSet, pub type_hint: types::TypeHint, } impl RegValue { pub fn new(reg: BuiltReg, abi_type: abitype::AbiType) -> RegValue { RegValue { reg, possible_type_tags: (&abi_type).into(), abi_type, type_hint: types::TypeHint::None, } } } #[derive(Clone, Debug)] pub enum Value { Const(Gc), // This uses `Box<[]>` because we can't convert from a `Vec<>` to `Rc<[]>` without reallocating List(Box<[Value]>, Option>), Record(record::ConsId, Box<[Value]>), ArretFun(ArretFun), RustFun(Arc), Reg(Rc), TyPred(ty::pred::TestTy), EqPred, RecordCons(record::ConsId), FieldAccessor(record::ConsId, usize), } impl Value { pub fn unsized_list_iter(&self) -> list::UnsizedListIterator { self.clone().into_unsized_list_iter() } pub fn into_unsized_list_iter(self) -> list::UnsizedListIterator { list::UnsizedListIterator::new(self) } pub fn try_sized_list_iter(&self) -> Option { list::SizedListIterator::try_new(self) } } impl From> for Value { fn from(boxed_ref: Gc) -> Self { Value::Const(boxed_ref.as_any_ref()) } } impl From for Value { fn from(reg_value: RegValue) -> Self { Value::Reg(Rc::new(reg_value)) } } pub fn visit_value_root(strong_pass: &mut boxed::collect::StrongPass, value: &mut Value) { match value { Value::Const(ref mut any_ref) => strong_pass.visit_box(any_ref), Value::List(ref mut fixed, ref mut rest) => { for any_ref in fixed.iter_mut() { visit_value_root(strong_pass, any_ref); } for any_ref in rest { visit_value_root(strong_pass, any_ref); } } Value::ArretFun(ref mut arret_fun) => { for (_, value) in arret_fun.env_values_mut().const_values.iter_mut() { visit_value_root(strong_pass, value); } for (_, value) in arret_fun.env_values_mut().free_values.iter_mut() { visit_value_root(strong_pass, value); } } _ => {} } } ================================================ FILE: compiler/mir/value/plan_phi.rs ================================================ use arret_runtime::abitype; use arret_runtime::boxed; use crate::mir::value::Value; pub fn plan_phi_abi_type(lhs: &Value, rhs: &Value) -> abitype::AbiType { use crate::mir::specific_abi_type::*; match (lhs, rhs) { (Value::Reg(lhs_reg_value), Value::Reg(rhs_reg_value)) if lhs_reg_value.abi_type == rhs_reg_value.abi_type => { // We have identical ABI types; this is easy rhs_reg_value.abi_type.clone() } (lhs, rhs) => { use std::iter; // We prefer working with unboxed values whenever possible. However, having to box a // value is much worse than working with a boxed temporary. Boxing requires calling in // to the allocator which is one of the most expensive things we can do. // // If both values are boxed then create an boxed phi. This prevents us from "wasting" // a box from prematurely unboxing it and then having to allocate to re-box it later. let both_boxed_non_bools = [lhs, rhs].iter().all(|value| { match value { Value::Const(any_ref) => { match any_ref.as_subtype() { boxed::AnySubtype::True(_) | boxed::AnySubtype::False(_) => { // LLVM has trouble following bool values through boxing and // unboxing. Also, boxing bools is relatively cheap because we just // need to return a pointer to the correct singleton value. false } _ => { // `Const`s can be either boxed or unboxed // This effectively means "whatever the other value wants" true } } } Value::Reg(reg_value) => { matches!(reg_value.abi_type, abitype::AbiType::Boxed(_)) } _ => true, } }); let values_iter = iter::once(lhs).chain(iter::once(rhs)); if both_boxed_non_bools { specific_boxed_abi_type_for_values(values_iter).into() } else { specific_abi_type_for_values(values_iter) } } } } ================================================ FILE: compiler/mir/value/synthetic_fun.rs ================================================ use std::collections::HashMap; use arret_syntax::datum::DataStr; use arret_syntax::span::Span; use crate::hir; use crate::hir::var_id::LocalIdAlloc; use crate::mir::env_values::EnvValues; use crate::mir::value; use crate::source::EMPTY_SPAN; use crate::ty; use crate::ty::purity; use crate::ty::purity::Purity; use crate::ty::record; use crate::ty::ty_args::TyArgs; use crate::ty::Ty; struct ExprParam { source_name: DataStr, poly_type: ty::Ref, } fn wrap_poly_expr_in_arret_fun( span: Span, source_name: DataStr, ty_args: TyArgs, expr_params: &[ExprParam], ret_ty: ty::Ref, wrapped_expr: hir::Expr, ) -> value::ArretFun { let mut lia = LocalIdAlloc::new(); let pvars: purity::PVars = ty_args.pvar_purities().keys().cloned().collect(); let tvars: ty::TVars = ty_args.tvar_types().keys().cloned().collect(); let expr_params_with_local_id: Vec<(&ExprParam, hir::LocalId)> = expr_params .iter() .map(|expr_param| (expr_param, lia.alloc_mut())) .collect(); let params = hir::destruc::List::new( expr_params_with_local_id .iter() .map(|(expr_param, param_local_id)| { hir::destruc::Destruc::Scalar( span, hir::destruc::Scalar::new( Some(*param_local_id), expr_param.source_name.clone(), expr_param.poly_type.clone(), ), ) }) .collect(), None, ); let fixed_arg_exprs = expr_params_with_local_id .iter() .map(|(expr_param, param_local_id)| hir::Expr { result_ty: expr_param.poly_type.clone(), kind: hir::ExprKind::LocalRef(span, *param_local_id), }) .collect(); value::ArretFun::new( None, Some(source_name), // These are the environment type args, not our own TyArgs::empty(), EnvValues::empty(), hir::Fun { span, pvars, tvars, purity: Purity::Pure.into(), params, ret_ty: ret_ty.clone(), ret_ty_span: None, body_expr: hir::Expr { result_ty: ret_ty, kind: hir::ExprKind::App(Box::new(hir::App { span, fun_expr: wrapped_expr, ty_args, fixed_arg_exprs, rest_arg_expr: None, })), }, }, ) } fn wrap_mono_expr_in_arret_fun( span: Span, source_name: DataStr, expr_params: &[ExprParam], ret_ty: ty::Ref, wrapped_expr: hir::Expr, ) -> value::ArretFun { wrap_poly_expr_in_arret_fun( span, source_name, TyArgs::empty(), expr_params, ret_ty, wrapped_expr, ) } fn new_eq_pred_arret_fun(span: Span) -> value::ArretFun { let expr_params = [ ExprParam { source_name: "left".into(), poly_type: Ty::Any.into(), }, ExprParam { source_name: "right".into(), poly_type: Ty::Any.into(), }, ]; let wrapped_expr = hir::Expr { result_ty: Ty::EqPred.into(), kind: hir::ExprKind::EqPred(span), }; wrap_mono_expr_in_arret_fun( span, "=".into(), &expr_params, Ty::Bool.into(), wrapped_expr, ) } fn new_ty_pred_arret_fun(span: Span, test_ty: ty::pred::TestTy) -> value::ArretFun { let expr_params = [ExprParam { source_name: "subject".into(), poly_type: Ty::Any.into(), }]; let wrapped_expr = hir::Expr { result_ty: Ty::TyPred(test_ty.clone()).into(), kind: hir::ExprKind::TyPred(span, test_ty.clone()), }; wrap_mono_expr_in_arret_fun( span, test_ty.to_string().into(), &expr_params, Ty::Bool.into(), wrapped_expr, ) } fn new_record_cons_arret_fun(span: Span, cons: &record::ConsId) -> value::ArretFun { let ty_args = cons.identity_ty_args(); let cons_fun_ty = record::Cons::value_cons_fun_type(cons); let record_instance_ty = Ty::Record(Box::new(record::Instance::new( cons.clone(), ty_args.clone(), ))); let expr_params: Vec = cons .fields() .iter() .map(|field| ExprParam { source_name: field.name().clone(), poly_type: field.ty_ref().clone(), }) .collect(); let wrapped_expr = hir::Expr { result_ty: cons_fun_ty.into(), kind: hir::ExprKind::RecordCons(span, cons.clone()), }; wrap_poly_expr_in_arret_fun( span, cons.value_cons_name().clone(), ty_args, &expr_params, record_instance_ty.into(), wrapped_expr, ) } fn new_field_accessor_arret_fun( span: Span, cons: &record::ConsId, field_index: usize, ) -> value::ArretFun { let ty_args = cons.identity_ty_args(); let field = &cons.fields()[field_index]; let accessor_fun_ty = field.accessor_fun_type(cons); let record_instance_ty = Ty::Record(Box::new(record::Instance::new( cons.clone(), ty_args.clone(), ))); let expr_params = &[ExprParam { source_name: cons.value_cons_name().clone(), poly_type: record_instance_ty.clone().into(), }]; let wrapped_expr = hir::Expr { result_ty: accessor_fun_ty.into(), kind: hir::ExprKind::FieldAccessor(Box::new(hir::FieldAccessor { span, record_cons: cons.clone(), field_index, })), }; wrap_poly_expr_in_arret_fun( span, format!("{}-{}", cons.value_cons_name(), field.name()).into(), ty_args, expr_params, record_instance_ty.into(), wrapped_expr, ) } pub struct SyntheticFuns { eq_pred_arret_fun: Option, ty_pred_arret_fun: HashMap, record_cons_arret_fun: HashMap, field_accessor_arret_fun: HashMap<(record::ConsId, usize), value::ArretFun>, } impl SyntheticFuns { pub fn new() -> Self { Self { eq_pred_arret_fun: None, ty_pred_arret_fun: HashMap::new(), record_cons_arret_fun: HashMap::new(), field_accessor_arret_fun: HashMap::new(), } } pub fn eq_pred_arret_fun(&mut self) -> &value::ArretFun { self.eq_pred_arret_fun .get_or_insert_with(|| new_eq_pred_arret_fun(EMPTY_SPAN)) } pub fn ty_pred_arret_fun(&mut self, test_ty: ty::pred::TestTy) -> &value::ArretFun { self.ty_pred_arret_fun .entry(test_ty.clone()) .or_insert_with(|| new_ty_pred_arret_fun(EMPTY_SPAN, test_ty)) } pub fn record_cons_arret_fun(&mut self, cons: &record::ConsId) -> &value::ArretFun { self.record_cons_arret_fun .entry(cons.clone()) .or_insert_with(|| new_record_cons_arret_fun(EMPTY_SPAN, cons)) } pub fn field_accessor_arret_fun( &mut self, cons: &record::ConsId, field_index: usize, ) -> &value::ArretFun { let lookup_key = (cons.clone(), field_index); self.field_accessor_arret_fun .entry(lookup_key) .or_insert_with(|| new_field_accessor_arret_fun(EMPTY_SPAN, cons, field_index)) } } ================================================ FILE: compiler/mir/value/to_const.rs ================================================ use arret_runtime::boxed; use arret_runtime::boxed::prelude::*; use arret_runtime::boxed::refs::Gc; use arret_runtime::boxed::TypeTag; use arret_runtime::class_map; use arret_runtime::intern::InternedSym; use crate::mir::eval_hir::{EvalHirCtx, EvaledRecordClass}; use crate::mir::value::Value; use crate::ty::record; fn record_to_const( ehx: &mut EvalHirCtx, record_cons: &record::ConsId, field_values: &[Value], ) -> Option> { let EvaledRecordClass { jit_record_class_id, jit_data_layout, .. } = *ehx.evaled_record_class_for_cons(record_cons); let classmap_class = ehx .as_heap() .type_info() .class_map() .class_for_record_class_id(jit_record_class_id); let data = boxed::RecordData::alloc(jit_data_layout); let classmap_fields: Vec = classmap_class.field_iter().collect(); for (classmap_field, field_value) in classmap_fields.iter().zip(field_values.iter()) { unsafe { use class_map::FieldType; let field_ptr = data.as_ptr().add(classmap_field.offset()); match classmap_field.field_type() { FieldType::Bool => { let bool_ref = &mut *(field_ptr as *mut bool); let boxed_field = value_to_const(ehx, field_value)?; *bool_ref = boxed_field .downcast_ref::() .expect("unexpected value field type while boxing constant bool") .as_bool(); } FieldType::Int => { let int_ref = &mut *(field_ptr as *mut i64); let boxed_field = value_to_const(ehx, field_value)?; *int_ref = boxed_field .downcast_ref::() .expect("unexpected value field type while boxing constant int") .value(); } FieldType::Float => { let float_ref = &mut *(field_ptr as *mut f64); let boxed_field = value_to_const(ehx, field_value)?; *float_ref = boxed_field .downcast_ref::() .expect("unexpected value field type while boxing constant float") .value(); } FieldType::Char => { let char_ref = &mut *(field_ptr as *mut char); let boxed_field = value_to_const(ehx, field_value)?; *char_ref = boxed_field .downcast_ref::() .expect("unexpected value field type while boxing constant char") .value(); } FieldType::InternedSym => { let interned_sym_ref = &mut *(field_ptr as *mut InternedSym); let boxed_field = value_to_const(ehx, field_value)?; *interned_sym_ref = boxed_field .downcast_ref::() .expect("unexpected value field type while boxing constant interned sym") .interned(); } FieldType::Boxed => { let boxed_ref = &mut *(field_ptr as *mut Gc); let boxed_field = value_to_const(ehx, field_value)?; *boxed_ref = boxed_field; } } } } Some(boxed::Record::new(ehx, jit_record_class_id, data).as_any_ref()) } pub fn list_to_const( ehx: &mut EvalHirCtx, fixed: &[Value], rest: Option<&Value>, ) -> Option> { let fixed_boxes = fixed .iter() .map(|value| value_to_const(ehx, value)) .collect::>>>()?; let rest_box = match rest { Some(rest) => { let rest_boxed = value_to_const(ehx, rest)?; if let Some(list_ref) = rest_boxed.downcast_ref::>() { list_ref } else { panic!("Attempted to build list with non-list tail"); } } None => boxed::List::::empty(), }; let list = boxed::List::::new_with_tail(ehx, fixed_boxes.into_iter(), rest_box); Some(list.as_any_ref()) } /// Attempts to convert a MIR value to a constant boxed values /// /// Non-singleton regs do not have a constant value at compile time; they will return None pub fn value_to_const(ehx: &mut EvalHirCtx, value: &Value) -> Option> { match value { Value::Const(boxed) => Some(*boxed), Value::List(fixed, Some(rest)) => list_to_const(ehx, fixed, Some(&*rest)), Value::List(fixed, None) => list_to_const(ehx, fixed, None), Value::Record(record_cons, field_values) => record_to_const(ehx, record_cons, field_values), Value::TyPred(test_ty) => { let ty_pred_arret_fun = ehx .synthetic_funs() .ty_pred_arret_fun(test_ty.clone()) .clone(); ehx.arret_fun_to_jit_boxed(&ty_pred_arret_fun) .map(|f| f.as_any_ref()) } Value::EqPred => { let eq_pred_arret_fun = ehx.synthetic_funs().eq_pred_arret_fun().clone(); ehx.arret_fun_to_jit_boxed(&eq_pred_arret_fun) .map(|f| f.as_any_ref()) } Value::ArretFun(ref arret_fun) => ehx .arret_fun_to_jit_boxed(arret_fun) .map(|f| f.as_any_ref()), Value::RecordCons(cons) => { let record_cons_arret_fun = ehx.synthetic_funs().record_cons_arret_fun(cons).clone(); ehx.arret_fun_to_jit_boxed(&record_cons_arret_fun) .map(|f| f.as_any_ref()) } Value::FieldAccessor(cons, field_index) => { let field_accessor_arret_fun = ehx .synthetic_funs() .field_accessor_arret_fun(cons, *field_index) .clone(); ehx.arret_fun_to_jit_boxed(&field_accessor_arret_fun) .map(|f| f.as_any_ref()) } Value::RustFun(ref rust_fun) => { Some(ehx.rust_fun_to_jit_boxed(rust_fun.clone()).as_any_ref()) } Value::Reg(ref reg_value) => { if reg_value.possible_type_tags == TypeTag::Nil.into() { Some(boxed::NIL_INSTANCE.as_any_ref()) } else if reg_value.possible_type_tags == TypeTag::True.into() { Some(boxed::TRUE_INSTANCE.as_any_ref()) } else if reg_value.possible_type_tags == TypeTag::False.into() { Some(boxed::FALSE_INSTANCE.as_any_ref()) } else { None } } } } ================================================ FILE: compiler/mir/value/types.rs ================================================ use arret_runtime::boxed; use arret_runtime::boxed::prelude::*; use crate::mir::eval_hir::EvalHirCtx; use crate::mir::tagset::TypeTagSet; use crate::mir::value::Value; use crate::ty; use crate::ty::record; use crate::ty::Ty; /// Compact hint for `RegValue`'s type that can't be captured in its type tags /// /// To allow type hints to apply to unions, each hint is predicated on the value having the /// appropriate type. For example, `KnownRecordCons` does not imply that the value is a record, its /// type tag must be checked first. /// /// It's possible for multiple `TypeHint`s to be applicable to the same type. However, this is /// unlikely so only a single type hint will be stored. The choice of type hint in these cases is /// arbitrary. #[derive(Debug, Clone, PartialEq)] pub enum TypeHint { /// Record of a known class KnownRecordCons(record::ConsId), /// List of a known length KnownListLen(usize), /// Vector of a known length KnownVectorLen(usize), /// No type hint None, } #[derive(PartialEq, Debug)] enum FoundRecordConses<'a> { Multi, Single(&'a record::ConsId), None, } /// Looks for the possible record conses of a type reference fn find_record_conses_for_ty_ref(ty_ref: &ty::Ref) -> FoundRecordConses<'_> where M: ty::Pm, { match ty_ref.try_to_fixed() { Some(Ty::Union(members)) => members .iter() .map(|member| find_record_conses_for_ty_ref(member)) .fold(FoundRecordConses::None, |member1, member2| { match (member1, member2) { (FoundRecordConses::Multi, _) | (_, FoundRecordConses::Multi) => { FoundRecordConses::Multi } (FoundRecordConses::None, FoundRecordConses::Single(single)) | (FoundRecordConses::Single(single), FoundRecordConses::None) => { FoundRecordConses::Single(single) } (FoundRecordConses::Single(single1), FoundRecordConses::Single(single2)) => { if single1 == single2 { FoundRecordConses::Single(single1) } else { FoundRecordConses::Multi } } (FoundRecordConses::None, FoundRecordConses::None) => FoundRecordConses::None, } }), Some(Ty::Record(instance)) => FoundRecordConses::Single(instance.cons()), Some(Ty::RecordClass(cons)) => FoundRecordConses::Single(cons), // These could be anything None | Some(Ty::Any) | Some(Ty::TopRecord) => FoundRecordConses::Multi, Some(_) => FoundRecordConses::None, } } pub fn type_hint_for_ty_ref(ty_ref: &ty::Ref) -> TypeHint where M: ty::Pm, { if let FoundRecordConses::Single(known_record_cons) = find_record_conses_for_ty_ref(ty_ref) { return TypeHint::KnownRecordCons(known_record_cons.clone()); } if let Some(Ty::List(list)) = ty_ref.try_to_fixed() { let std::ops::Range { start, end } = list.size_range(); if start == end { return TypeHint::KnownListLen(start); } } if let Some(Ty::Vector(members)) = ty_ref.try_to_fixed() { return TypeHint::KnownVectorLen(members.len()); } TypeHint::None } pub fn known_record_cons_for_value<'a>( ehx: &'a EvalHirCtx, value: &'a Value, ) -> Option<&'a record::ConsId> { match value { Value::Const(any_ref) => any_ref.downcast_ref::().map(|record_ref| { ehx.cons_for_jit_record_class_id(record_ref.class_id()) .expect("unable to lookup record cons for JIT record class ID") }), Value::Record(cons, _) => Some(cons), Value::Reg(reg_value) => { if let TypeHint::KnownRecordCons(ref cons) = reg_value.type_hint { Some(cons) } else { None } } _ => None, } } pub fn known_vector_len_for_value(value: &Value) -> Option { match value { Value::Const(any_ref) => any_ref .downcast_ref::() .map(|vector_ref| vector_ref.len()), Value::Reg(reg_value) => { if let TypeHint::KnownVectorLen(known_len) = reg_value.type_hint { Some(known_len) } else { None } } _ => None, } } pub fn type_hint_for_value(ehx: &EvalHirCtx, value: &Value) -> TypeHint { if let Some(cons) = known_record_cons_for_value(ehx, value) { return TypeHint::KnownRecordCons(cons.clone()); } match value { Value::Const(any_ref) => any_ref .downcast_ref::() .map(|vector_ref| TypeHint::KnownVectorLen(vector_ref.len())) .unwrap_or(TypeHint::None), Value::Reg(reg_value) => reg_value.type_hint.clone(), _ => TypeHint::None, } } /// Returns a TypeTagSet containing the possible type tags for a given value pub fn possible_type_tags_for_value(value: &Value) -> TypeTagSet { match value { Value::Const(any_ref) => any_ref.header().type_tag().into(), Value::ArretFun(_) | Value::RustFun(_) | Value::TyPred(_) | Value::EqPred | Value::RecordCons(_) | Value::FieldAccessor(_, _) => boxed::TypeTag::FunThunk.into(), Value::List(fixed, rest) => { if !fixed.is_empty() { // Non-empty list boxed::TypeTag::Pair.into() } else if let Some(tail) = rest { possible_type_tags_for_value(tail) } else { // Empty list boxed::TypeTag::Nil.into() } } Value::Record(_, _) => boxed::TypeTag::Record.into(), Value::Reg(reg_value) => reg_value.possible_type_tags, } } /// Annotates an existing value with Arret type information /// /// For the majority of values this is a no-op. For this reason this function takes a builder for /// the Arret type that is only invoked if the type information can be used. pub fn value_with_arret_ty( heap: &mut impl boxed::AsHeap, value: Value, build_arret_ty: F, ) -> Value where F: FnOnce() -> ty::Ref, { if let Value::Reg(reg_value) = value { use crate::mir::value::from_reg::refine_reg_value_with_arret_ty; // This could be useful; request the type let arret_ty = build_arret_ty(); refine_reg_value_with_arret_ty(heap, ®_value, &arret_ty) } else { value } } #[cfg(test)] mod test { use super::*; use crate::hir::tvar_bounded_by; use crate::source::EMPTY_SPAN; use crate::ty::ty_args::TyArgs; #[test] fn test_find_record_conses_for_ty_ref() { let cons1 = record::Cons::new( EMPTY_SPAN, "cons1".into(), "cons1?".into(), None, Box::new([]), ); let cons2 = record::Cons::new( EMPTY_SPAN, "cons2".into(), "cons2?".into(), None, Box::new([]), ); let class1_poly: ty::Ref = cons1.clone().into(); let class2_poly: ty::Ref = cons2.clone().into(); let instance1_poly: ty::Ref = record::Instance::new(cons1.clone(), TyArgs::empty()).into(); let instance2_poly: ty::Ref = record::Instance::new(cons2.clone(), TyArgs::empty()).into(); // Unit type can't contain a record type assert_eq!( FoundRecordConses::None, find_record_conses_for_ty_ref::(&Ty::unit().into()) ); // `Any` could contain any record cons assert_eq!( FoundRecordConses::Multi, find_record_conses_for_ty_ref::(&Ty::Any.into()) ); // `TopRecord` could contain any record cons assert_eq!( FoundRecordConses::Multi, find_record_conses_for_ty_ref::(&Ty::TopRecord.into()) ); // TVar could contain any record cons assert_eq!( FoundRecordConses::Multi, find_record_conses_for_ty_ref(&tvar_bounded_by(Ty::Any.into())) ); // Class type can have the record cons assert_eq!( FoundRecordConses::Single(&cons1), find_record_conses_for_ty_ref(&class1_poly) ); // Instance type can have the record cons assert_eq!( FoundRecordConses::Single(&cons2), find_record_conses_for_ty_ref(&instance2_poly) ); // Union of class and instance of the same class has the record cons assert_eq!( FoundRecordConses::Single(&cons1), find_record_conses_for_ty_ref( &Ty::Union(Box::new([class1_poly, instance1_poly.clone()])).into() ) ); // Bool + record could only have the record cons assert_eq!( FoundRecordConses::Single(&cons2), find_record_conses_for_ty_ref( &Ty::Union(Box::new([Ty::Bool.into(), instance2_poly.clone()])).into() ) ); // Multiple record types assert_eq!( FoundRecordConses::Multi, find_record_conses_for_ty_ref( &Ty::Union(Box::new([class2_poly, instance1_poly])).into() ) ); // TVar inside a union could be any record type assert_eq!( FoundRecordConses::Multi, find_record_conses_for_ty_ref( &Ty::Union(Box::new([tvar_bounded_by(Ty::Any.into()), instance2_poly])).into() ) ); } } ================================================ FILE: compiler/mir/vector_member.rs ================================================ use arret_syntax::span::Span; use arret_runtime::abitype; use crate::mir::builder::Builder; use crate::mir::eval_hir::EvalHirCtx; use crate::mir::value::Value; fn vector_member_type(vector_value: &Value) -> &abitype::BoxedAbiType { if let Value::Reg(reg_value) = vector_value { if let abitype::AbiType::Boxed(abitype::BoxedAbiType::Vector(member_boxed_abi_type)) = ®_value.abi_type { return *member_boxed_abi_type; } } &abitype::BoxedAbiType::Any } /// Loads a vector member from a vector of known length /// /// [`vector_length`] must be less than [`MAX_DIRECT_ACCESS_LENGTH`] pub fn load_vector_member( ehx: &mut EvalHirCtx, b: &mut Builder, span: Span, vector_len: usize, vector_value: &Value, member_index: usize, ) -> Value { use crate::mir::ops::*; use crate::mir::tagset::TypeTagSet; use crate::mir::value::build_reg::value_to_reg; use crate::mir::value::types::TypeHint; use crate::mir::value::RegValue; let member_possible_type_tags: TypeTagSet = vector_member_type(vector_value).into(); let vector_reg = value_to_reg( ehx, b, span, vector_value, &abitype::BoxedAbiType::Vector(&abitype::BoxedAbiType::Any).into(), ); let member_reg = b.push_reg( span, OpKind::LoadBoxedVectorMember, LoadBoxedVectorMemberOp { vector_reg: vector_reg.into(), known_vector_len: vector_len as usize, member_index: member_index as usize, }, ); (RegValue { reg: member_reg, possible_type_tags: member_possible_type_tags, abi_type: abitype::BoxedAbiType::Any.into(), type_hint: TypeHint::None, }) .into() } ================================================ FILE: compiler/promise.rs ================================================ use std::collections::HashMap; use std::ops::Deref; use std::sync::{Arc, Condvar, Mutex, RwLock}; struct Inner where T: Send + Clone, { value: Mutex>, waker: Condvar, } /// Handle to a complete an associated promise struct Completer where T: Send + Clone, { inner: Arc>, } impl Completer where T: Send + Clone, { /// Sets the associated promise as complete pub fn set(self, new_value: T) { let mut value_lock = self.inner.value.lock().unwrap(); value_lock.replace(new_value); drop(value_lock); self.inner.waker.notify_all(); } } /// Promise of a future computation /// /// The Promise is initially incomplete. Once its been completed it will return clones of the /// completed value until it's dropped. #[derive(Clone)] struct Promise where T: Send + Clone, { inner: Arc>, } impl Promise where T: Send + Clone, { /// Waits until the promise is complete and returns a clone of its value pub fn value(&self) -> T { let mut value_lock = self.inner.value.lock().unwrap(); loop { match value_lock.deref() { Some(value) => break value.clone(), None => { value_lock = self.inner.waker.wait(value_lock).unwrap(); } } } } } /// Creates new completer and promise fn promise() -> (Completer, Promise) where T: Send + Clone, { let inner = Arc::new(Inner { value: Mutex::new(None), waker: Condvar::new(), }); ( Completer { inner: inner.clone(), }, Promise { inner }, ) } /// Create an immediately completed promise fn completed(value: T) -> Promise where T: Send + Clone, { Promise { inner: { Arc::new(Inner { value: Mutex::new(Some(value)), waker: Condvar::new(), }) }, } } /// Concurrent map of keys to values where each key is only calculated once pub struct PromiseMap where K: std::hash::Hash, V: Send + Clone, { promises: RwLock>>, } impl PromiseMap where K: std::hash::Hash + Eq, V: Send + Clone, { /// Creates a new `PromiseMap` with the passed values pub fn new(values: impl IntoIterator) -> Self { PromiseMap { promises: RwLock::new(values.into_iter().map(|(k, v)| (k, completed(v))).collect()), } } /// Fetches the value from the promise map or inserts it if it does not exist /// /// Each key will only be calculated once. If a calculation is already in progress on another /// thread the current thread will block until the existing calculation completes. pub fn get_or_insert_with(&self, key: K, func: F) -> V where F: FnOnce() -> V, { // Opportunistically try to fetch the promise with a read lock let promises_read = self.promises.read().unwrap(); if let Some(promise) = promises_read.get(&key) { let cloned_promise = promise.clone(); drop(promises_read); return cloned_promise.value(); } drop(promises_read); // Try again with a write lock to ensure another thread didn't already insert let mut promises_write = self.promises.write().unwrap(); if let Some(promise) = promises_write.get(&key) { let cloned_promise = promise.clone(); drop(promises_write); return cloned_promise.value(); } let (completer, promise) = promise(); promises_write.insert(key, promise); drop(promises_write); // Build a new value. This is presumably expensive let value = func(); completer.set(value.clone()); value } } ================================================ FILE: compiler/repl.rs ================================================ use std::collections::{HashMap, HashSet}; use std::sync::Arc; use std::thread; use codespan_reporting::diagnostic::Diagnostic; use arret_syntax::datum::DataStr; use arret_syntax::span::FileId; use crate::context; use crate::context::ModuleId; use crate::hir; use crate::hir::scope::Scope; use crate::reporting::{diagnostic_for_syntax_error, errors_to_diagnostics, new_primary_label}; use crate::ty; use crate::CompileCtx; use crate::mir::eval_hir::EvalHirCtx; use crate::mir::Value; use crate::typeck::infer::{infer_module, infer_repl_expr}; /// Indicates the kind of evaluation to perform on the input /// /// This applies to expressions; it has no effect on empty input or definitions #[derive(Clone, Copy)] pub enum EvalKind { /// Infers the type of the expression /// /// This only runs as far as type checking Type, /// Fully evaluates the expression Value, } #[derive(Debug, PartialEq)] pub struct EvaledExprValue { /// Rendered type of the expression pub type_str: String, /// Rendered value of the expression pub value_str: String, /// Indicates if the type is a literal /// /// REPL implementations may want to suppress printing the type of literal values as they /// contain no additional information. pub type_is_literal: bool, } #[derive(Debug, PartialEq)] pub enum EvaledLine { /// Line was all whitepsace EmptyInput, /// Line added new definitions /// /// All bound identifiers in the root scope are returned. This is useful for tab & /// autocompletion. Defs(Vec), /// Line was evaluated to a type with the given name ExprType(String), /// Line was evaluate to a value ExprValue(EvaledExprValue), } struct ReplEngine<'ccx> { root_scope: Scope<'static>, ccx: &'ccx CompileCtx, inferred_module_vars: HashMap>>>, seen_modules: HashSet, ehx: EvalHirCtx, } impl<'ccx> ReplEngine<'ccx> { fn new(ccx: &'ccx CompileCtx) -> Self { Self { root_scope: Scope::root(), ccx, seen_modules: HashSet::new(), inferred_module_vars: HashMap::new(), ehx: EvalHirCtx::new(ccx.enable_optimisations()), } } /// Returns all names bound in the root scope and namespace fn bound_names(&self) -> Vec { self.root_scope .bound_idents() .filter_map(move |ident| { if ident.ns_id() == Scope::root_ns_id() { Some(ident.name().clone()) } else { None } }) .collect() } /// Visits a subtree of modules and adds any missing defs and inferred module vars fn visit_module_tree( &mut self, root_module: &Arc, ) -> Result<(), Vec>> { if self.seen_modules.contains(&root_module.module_id) { return Ok(()); } self.seen_modules.insert(root_module.module_id); // Make sure our imports are first for import in root_module.imports.values() { self.visit_module_tree(import)?; } self.inferred_module_vars .insert(root_module.module_id, root_module.inferred_locals.clone()); self.ehx .visit_module_defs(root_module.module_id, &root_module.defs)?; Ok(()) } fn eval_line( &mut self, input: String, kind: EvalKind, ) -> Result>> { use std::io::Write; use crate::hir::lowering::LoweredReplDatum; let source_file = self.ccx.source_loader().load_string("repl".into(), input); let input_data = source_file .parsed() .map_err(|err| vec![diagnostic_for_syntax_error(&err)])?; let input_datum = match input_data { [] => { return Ok(EvaledLine::EmptyInput); } [input_datum] => input_datum, _ => { let extra_span = input_data[1].span(); return Err(vec![Diagnostic::error() .with_message("unexpected trailing datum") .with_labels(vec![new_primary_label( extra_span, "trailing datum", )])]); } }; let module_id = ModuleId::alloc(); let mut child_scope = Scope::child(&self.root_scope); let lowered_repl_datum = hir::lowering::lower_repl_datum(self.ccx, &mut child_scope, input_datum) .map_err(errors_to_diagnostics)?; // Bring all the defs back in to root scope let exported_bindings = child_scope.into_exported_bindings(); self.root_scope .import_bindings(exported_bindings, module_id); match lowered_repl_datum { LoweredReplDatum::Import(modules) => { for module in modules.values() { self.visit_module_tree(module)?; } Ok(EvaledLine::Defs(self.bound_names())) } LoweredReplDatum::EvaluableDef(def) => { let inferred_module = infer_module(&self.inferred_module_vars, vec![def]) .map_err(errors_to_diagnostics)?; self.inferred_module_vars .insert(module_id, Arc::new(inferred_module.inferred_locals)); self.ehx .consume_module_defs(module_id, inferred_module.defs)?; Ok(EvaledLine::Defs(self.bound_names())) } LoweredReplDatum::NonEvaluableDef => { // This was handled entirely by HIR lowering Ok(EvaledLine::Defs(self.bound_names())) } LoweredReplDatum::Expr(decl_expr) => { let node = infer_repl_expr(&self.inferred_module_vars, decl_expr)?; let type_str = hir::str_for_ty_ref(node.result_ty()); match kind { EvalKind::Type => Ok(EvaledLine::ExprType(type_str)), EvalKind::Value => { use crate::mir::eval_hir::FunCtx; use arret_runtime_syntax::writer; use std::str; let type_is_literal = ty::props::is_literal(node.result_ty()); // Evaluate the expression let mut fcx = FunCtx::new(None); let value = self .ehx .consume_expr(&mut fcx, &mut None, node.into_expr())?; let boxed = self .ehx .value_to_const(&value) .expect("Received register from MIR evaluation"); // Write the result to a string let mut output_buf: Vec = vec![]; writer::write_boxed(&mut output_buf, &self.ehx, boxed).unwrap(); // Just `#fn` isn't very useful, even with a type. Add the source name. match value { Value::ArretFun(arret_fun) => { if let Some(source_name) = arret_fun.source_name() { write!(&mut output_buf, "/{}", source_name).unwrap(); } } Value::RustFun(rust_fun) => { write!(&mut output_buf, "/{}", rust_fun.symbol()).unwrap(); } _ => {} } let value_str = str::from_utf8(output_buf.as_slice()).unwrap().to_owned(); Ok(EvaledLine::ExprValue(EvaledExprValue { type_str, value_str, type_is_literal, })) } } } } } } pub struct ReplCtx { send_line: crossbeam_channel::Sender<(String, EvalKind)>, receive_result: crossbeam_channel::Receiver>>>, } #[derive(Debug)] pub struct EngineDisconnected; impl ReplCtx { /// Creates a new `ReplCtx` /// /// This will launch a REPL engine thread which can asynchronously evaluate lines. pub fn new(ccx: Arc) -> Self { let (send_line, receive_line) = crossbeam_channel::unbounded(); let (send_result, receive_result) = crossbeam_channel::unbounded(); thread::spawn(move || { let mut engine = ReplEngine::new(&ccx); for (input, kind) in receive_line.iter() { let result = engine.eval_line(input, kind); send_result.send(result).unwrap(); if engine.ehx.should_collect() { engine.ehx.collect_garbage(); } } }); Self { send_line, receive_result, } } /// Sends a line to be evaluated by the REPL engine /// /// This is asynchronous and an unlimited number of lines can be sent before reading their /// results. This allows the calling thread to remain responsive to user input while evaluation, /// garbage collection, etc occurs. pub fn send_line(&self, input: String, kind: EvalKind) -> Result<(), EngineDisconnected> { self.send_line .send((input, kind)) .map_err(|_| EngineDisconnected) } /// Receives the next result from the REPL engine /// /// These will be returned in the order they were submitted with `send_line`. pub fn receive_result(&self) -> Result>> { self.receive_result.recv().unwrap() } } #[cfg(test)] mod test { use super::*; fn eval_line_sync( rcx: &mut ReplCtx, input: String, kind: EvalKind, ) -> Result>> { rcx.send_line(input, kind).unwrap(); rcx.receive_result() } fn assert_defs(rcx: &mut ReplCtx, line: &'static str) { match eval_line_sync(rcx, line.to_owned(), EvalKind::Value).unwrap() { EvaledLine::Defs(_) => {} other => { panic!("Expected defs, got {:?}", other); } } } fn assert_empty(rcx: &mut ReplCtx, line: &'static str) { assert_eq!( EvaledLine::EmptyInput, eval_line_sync(rcx, line.to_owned(), EvalKind::Value).unwrap() ); } fn assert_expr( rcx: &mut ReplCtx, expected_value: &'static str, expected_type: &'static str, line: &'static str, ) { assert_eq!( EvaledLine::ExprType(expected_type.to_owned()), eval_line_sync(rcx, line.to_owned(), EvalKind::Type).unwrap() ); match eval_line_sync(rcx, line.into(), EvalKind::Value).unwrap() { EvaledLine::ExprValue(EvaledExprValue { value_str, type_str, .. }) => { assert_eq!(value_str, expected_value.to_owned()); assert_eq!(type_str, expected_type.to_owned()); } other => { panic!("unexpected REPL result: {:?}", other); } } } #[test] fn basic_session() { use crate::codegen::test::initialise_test_llvm; use crate::PackagePaths; initialise_test_llvm(); let ccx = Arc::new(CompileCtx::new(PackagePaths::test_paths(None), true)); let mut rcx = ReplCtx::new(ccx); assert_empty(&mut rcx, " "); assert_empty(&mut rcx, "; COMMENT!"); assert_expr(&mut rcx, "1", "Int", "1"); eval_line_sync( &mut rcx, "(import [stdlib base])".to_owned(), EvalKind::Value, ) .expect( "unable to load stdlib library; you may need to `cargo build` before running tests", ); // Make sure we can references vars from the imported module assert_expr(&mut rcx, "true", "true", "(int? 5)"); // Make sure we can redefine assert_defs(&mut rcx, "(def x 'first)"); assert_defs(&mut rcx, "(def x 'second)"); assert_expr(&mut rcx, "second", "'second", "x"); // `(do)` at the expression level assert_expr(&mut rcx, "baz", "'baz", "(do 'foo 'bar 'baz)"); // Polymorphic capturing closures assert_defs( &mut rcx, "(def return-constant (fn #{T} ([x T]) (fn () -> T x)))", ); assert_defs(&mut rcx, "(def return-one (return-constant 1))"); assert_defs(&mut rcx, "(def return-two (return-constant 'two))"); assert_expr(&mut rcx, "1", "Int", "(return-one)"); assert_expr(&mut rcx, "two", "'two", "(return-two)"); } } ================================================ FILE: compiler/reporting.rs ================================================ use codespan_reporting::diagnostic::{Diagnostic, Label}; use arret_syntax::span::{FileId, Span}; use crate::source::SourceLoader; /// Traces the location of report through macro expansions #[derive(Debug, PartialEq, Clone)] pub struct LocTrace { origin: Span, macro_invocation: Option, } impl LocTrace { pub fn new(origin: Span, macro_invocation: Option) -> LocTrace { LocTrace { origin, macro_invocation, } } pub fn with_macro_invocation(self, macro_invocation: Span) -> LocTrace { LocTrace { macro_invocation: Some(macro_invocation), ..self } } pub fn origin(&self) -> Span { self.origin } pub fn macro_invocation(&self) -> Option { self.macro_invocation } pub fn label_macro_invocation(&self, mut diagnostic: Diagnostic) -> Diagnostic { match self.macro_invocation { Some(macro_invocation_span) if !macro_invocation_span.contains(self.origin) => { let secondary_label = new_secondary_label(macro_invocation_span, "in this macro invocation"); diagnostic.labels.push(secondary_label); diagnostic } _ => diagnostic, } } } impl From for LocTrace { fn from(span: Span) -> LocTrace { LocTrace::new(span, None) } } /// Helper for converting a series of errors in to diagnostics /// /// This is intended for use with `map_err` pub fn errors_to_diagnostics>>( errors: Vec, ) -> Vec> { errors.into_iter().map(Into::into).collect() } /// Returns a diagnostic for the passed syntax errror /// /// This is required because `arret-syntax` doesn't depend on `codespan-reporting`. It requires /// its consumers to handle reporting themselves. pub fn diagnostic_for_syntax_error(error: &arret_syntax::error::Error) -> Diagnostic { let origin = error.span(); let within = error.kind().within_context(); let primary_label_message = within .and_then(|within| within.expected_next()) .map(|en| en.description()) .unwrap_or_else(|| "syntax error".to_owned()); let primary_label = new_primary_label(origin, primary_label_message); let diagnostic = Diagnostic::error() .with_message(error.kind().message()) .with_labels(vec![]); if let Some(within) = within { if let Some(open_char_span) = within.open_char_span() { let secondary_label = new_secondary_label( open_char_span, format!("{} starts here", within.description()), ); return diagnostic.with_labels(vec![primary_label, secondary_label]); } } diagnostic.with_labels(vec![primary_label]) } pub fn new_primary_label(span: Span, message: impl Into) -> Label { Label::primary(span.file_id().unwrap(), span.byte_range()).with_message(message) } pub fn new_secondary_label(span: Span, message: impl Into) -> Label { Label::secondary(span.file_id().unwrap(), span.byte_range()).with_message(message) } /// Emits a series of diagnostics to standard error /// /// This ensures the diagnostics are emitted as a contiguous group even when multiple threads /// are emitting concurrently. pub fn emit_diagnostics_to_stderr( source_loader: &SourceLoader, diagnostics: impl IntoIterator>, ) { use codespan_reporting::term; use termcolor::{ColorChoice, StandardStream}; let config = term::Config::default(); let stderr = StandardStream::stderr(ColorChoice::Auto); let mut stderr_lock = stderr.lock(); for diagnostic in diagnostics { let _ = codespan_reporting::term::emit( &mut stderr_lock, &config, &source_loader.files(), &diagnostic, ); } } ================================================ FILE: compiler/rfi/mod.rs ================================================ use std::ffi::OsString; use std::sync::Arc; use std::{fmt, path}; use arret_syntax::datum::Datum; use arret_syntax::span::Span; use crate::hir; use crate::hir::error::{Error, ErrorKind}; use crate::hir::ns::NsDatum; use crate::hir::scope::Scope; use crate::source::SourceLoader; use crate::ty; use crate::ty::Ty; use arret_runtime::{abitype, binding}; pub struct Library { pub loaded: libloading::Library, pub target_path: Box, pub exported_funs: Box<[(&'static str, Arc)]>, } impl fmt::Debug for Library { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "rfi::Library({})", self.target_path.to_string_lossy()) } } #[derive(Debug, PartialEq, Clone)] pub struct Fun { /// Name of this function if it corresponds to an intrinsic /// /// Intrinsics may have optimised partial evaluation in MIR. However, they should be /// semantically equivalent to the non-intrinsic version. intrinsic_name: Option<&'static str>, span: Span, arret_fun_type: ty::Fun, takes_task: bool, params: &'static [abitype::ParamAbiType], ret: &'static abitype::RetAbiType, symbol: &'static str, entry_point: usize, } impl Fun { pub fn intrinsic_name(&self) -> Option<&'static str> { self.intrinsic_name } pub fn span(&self) -> Span { self.span } pub fn arret_fun_type(&self) -> &ty::Fun { &self.arret_fun_type } pub fn symbol(&self) -> &'static str { self.symbol } pub fn entry_point(&self) -> usize { self.entry_point } pub fn takes_task(&self) -> bool { self.takes_task } pub fn params(&self) -> &'static [abitype::ParamAbiType] { self.params } pub fn has_rest(&self) -> bool { self.arret_fun_type.params().has_rest() } pub fn ret(&self) -> &'static abitype::RetAbiType { self.ret } } pub struct Loader { type_scope: Scope<'static>, } /// Ensure that the specified Arret type is compatible with the corresponding Rust type /// /// The Arret types are strictly more expressive than the Rust types. This simply checks that the /// Arret type is more specific than the Rust type. fn ensure_types_compatible( span: Span, arret_poly: &ty::Ref, abi_type: &T, ) -> Result<(), Error> where T: ty::conv_abi::ConvertableAbiType, { if ty::is_a::ty_ref_is_a(arret_poly, &abi_type.to_ty_ref()) { Ok(()) } else { Err(Error::new( span, ErrorKind::RustFunError( format!( "Rust type `{}` does not match declared Arret type of `{}`", abi_type.to_rust_str(), hir::str_for_ty_ref(arret_poly), ) .into_boxed_str(), ), )) } } #[derive(Clone, Copy)] enum LibType { Static, Dynamic, } fn build_rfi_lib_path(base: &path::Path, package_name: &str, lib_type: LibType) -> path::PathBuf { let mut path_buf = path::PathBuf::new(); path_buf.push(base); #[cfg(debug_assertions)] path_buf.push("debug"); #[cfg(not(debug_assertions))] path_buf.push("release"); match lib_type { LibType::Dynamic => { #[cfg(any(target_os = "macos", target_os = "ios"))] path_buf.push(format!("lib{}.dylib", package_name)); #[cfg(all(not(target_os = "macos"), not(target_os = "ios"),))] path_buf.push(format!("lib{}.so", package_name)); } LibType::Static => { path_buf.push(format!("lib{}.a", package_name)); } } path_buf } impl Loader { pub fn new() -> Loader { Loader { type_scope: Scope::new_with_primitives(), } } fn process_rust_fun( &self, arret_type_datum: &Datum, entry_point: usize, rust_fun: &'static binding::RustFun, intrinsic_name: Option<&'static str>, ) -> Result { let ns_datum = NsDatum::from_syntax_datum(arret_type_datum); let span = ns_datum.span(); // Lower the Arret type using a fixed scope let poly_type = hir::lower_poly(&self.type_scope, ns_datum)?; // Ensure the type is actually a function type let poly_fun_type = if let ty::Ref::Fixed(Ty::Fun(fun_type)) = poly_type { fun_type } else { return Err(Error::new( span, ErrorKind::RustFunError("function type expected".into()), )); }; let pvars = poly_fun_type.pvars(); let tvars = poly_fun_type.tvars(); // The Rust function signature should satisfy the upper bound of the Arret type let pta = ty::ty_args::TyArgs::from_upper_bound(pvars, tvars); let upper_fun_type = ty::subst::subst_poly_fun(&pta, &*poly_fun_type); // Calculate how many parameters the Rust function should accept let expected_rust_params = upper_fun_type.params().fixed().len() + upper_fun_type.params().has_rest() as usize; if expected_rust_params != rust_fun.params.len() { return Err(Error::new( span, ErrorKind::RustFunError( format!( "expected Rust function to have {} parameters; has {}", expected_rust_params, rust_fun.params.len() ) .into_boxed_str(), ), )); } let mut abi_params_iter = rust_fun.params.iter(); // If there are rest types ensure they're compatible let arret_rest = upper_fun_type.params().rest(); if !arret_rest.is_never() { use arret_runtime::abitype::{AbiType, BoxedAbiType}; let last_rust_param = abi_params_iter.next_back().unwrap(); if let AbiType::Boxed(BoxedAbiType::List(elem)) = &last_rust_param.abi_type { ensure_types_compatible(span, arret_rest, *elem)?; } else { return Err(Error::new( span, ErrorKind::RustFunError("expected Rust function to have `boxed::List` as last parameter to receive the rest argument".into()) )); } }; // Ensure the fixed types are compatible for (arret_fixed_poly, rust_fixed_poly) in upper_fun_type.params().fixed().iter().zip(abi_params_iter) { ensure_types_compatible(span, arret_fixed_poly, &rust_fixed_poly.abi_type)?; } // And the return type // // Note that we don't care about contravariance here; simply that the types are compatible ensure_types_compatible(span, upper_fun_type.ret(), &rust_fun.ret)?; Ok(Fun { intrinsic_name, span: arret_type_datum.span(), arret_fun_type: *poly_fun_type, takes_task: rust_fun.takes_task, params: rust_fun.params, ret: &rust_fun.ret, symbol: rust_fun.symbol, entry_point, }) } pub fn load( &self, span: Span, source_loader: &SourceLoader, native_base_path: &path::Path, target_base_path: &path::Path, package_name: &str, ) -> Result { let native_path = build_rfi_lib_path(native_base_path, package_name, LibType::Dynamic); let target_path = build_rfi_lib_path(target_base_path, package_name, LibType::Static); let map_loader_err = |err: libloading::Error| match err { libloading::Error::DlOpen { .. } | libloading::Error::DlOpenUnknown => Error::new( span, ErrorKind::ModuleNotFound(native_path.clone().into_boxed_path()), ), _ => Error::new( span, ErrorKind::ReadError(native_path.clone().into_boxed_path()), ), }; let loaded = unsafe { libloading::Library::new(&native_path).map_err(map_loader_err)? }; let exports_symbol_name = format!("ARRET_{}_RUST_EXPORTS", package_name.to_uppercase()); let exports: binding::RustExports = unsafe { let exports_symbol = loaded .get::<*const binding::RustExports>(exports_symbol_name.as_bytes()) .map_err(map_loader_err)?; **exports_symbol }; source_loader.reserve(exports.len()); let exported_funs = exports .iter() .map(|(fun_name, rust_fun)| { let entry_point_address = unsafe { *loaded .get::(rust_fun.symbol.as_bytes()) .map_err(map_loader_err)? }; // Parse the declared Arret type string as a datum let mut file_map_name = OsString::with_capacity(native_path.as_os_str().len() + 1 + fun_name.len()); file_map_name.push(native_path.as_os_str()); file_map_name.push(":"); file_map_name.push(fun_name); let arret_type_source_file = source_loader.load_string(file_map_name, rust_fun.arret_type); let arret_type_datum = match arret_type_source_file.parsed()? { [arret_type_datum] => arret_type_datum, _ => { return Err(Error::new( Span::from_str( Some(arret_type_source_file.file_id()), rust_fun.arret_type, ), ErrorKind::RustFunError("expected exactly one Arret type datum".into()), )); } }; // Treat every native function in the stdlib as an intrinsic let intrinsic_name = Some(*fun_name).filter(|_| package_name == "stdlib"); let fun = self.process_rust_fun( arret_type_datum, entry_point_address, rust_fun, intrinsic_name, )?; Ok((*fun_name, Arc::new(fun))) }) .collect::)]>, Error>>()?; Ok(Library { loaded, target_path: target_path.into_boxed_path(), exported_funs, }) } } #[cfg(test)] mod test { use super::*; use arret_runtime::abitype::{AbiType, BoxedAbiType, ParamAbiType, ParamCapture, RetAbiType}; use arret_runtime::boxed::TypeTag; use arret_syntax::parser::datum_from_str; fn binding_fun_to_poly_type(rust_fun: &'static binding::RustFun) -> Result { let loader = Loader::new(); let arret_type_datum = datum_from_str(None, rust_fun.arret_type).unwrap(); loader .process_rust_fun(&arret_type_datum, 0, rust_fun, None) .map(|rfi_fun| rfi_fun.arret_fun_type) } fn assert_valid_binding_fun(rust_fun: &'static binding::RustFun) { binding_fun_to_poly_type(rust_fun).unwrap(); } fn assert_binding_fun_error(expected_kind: &ErrorKind, rust_fun: &'static binding::RustFun) { assert_eq!( expected_kind, binding_fun_to_poly_type(rust_fun).unwrap_err().kind() ); } #[test] fn exact_rust_fun() { const BINDING_RUST_FUN: binding::RustFun = binding::RustFun { arret_type: "(Int -> Int)", takes_task: false, params: &[ParamAbiType { abi_type: AbiType::Int, capture: ParamCapture::Never, }], ret: RetAbiType::Inhabited(AbiType::Boxed(BoxedAbiType::UniqueTagged(TypeTag::Int))), symbol: "", }; assert_valid_binding_fun(&BINDING_RUST_FUN); } #[test] fn inexact_rust_fun_with_rest() { const BINDING_RUST_FUN: binding::RustFun = binding::RustFun { arret_type: "(& Int -> false)", takes_task: false, params: &[ParamAbiType { abi_type: AbiType::Boxed(BoxedAbiType::List(&BoxedAbiType::UniqueTagged( TypeTag::Int, ))), capture: ParamCapture::Auto, }], ret: RetAbiType::Inhabited(AbiType::Bool), symbol: "", }; assert_valid_binding_fun(&BINDING_RUST_FUN); } #[test] fn void_rust_fun() { const BINDING_RUST_FUN: binding::RustFun = binding::RustFun { arret_type: "(Float -> '())", takes_task: false, params: &[ParamAbiType { abi_type: AbiType::Float, capture: ParamCapture::Never, }], ret: RetAbiType::Void, symbol: "", }; assert_valid_binding_fun(&BINDING_RUST_FUN); } #[test] fn diverging_fun() { const BINDING_RUST_FUN: binding::RustFun = binding::RustFun { arret_type: "(-> (U))", takes_task: false, params: &[], ret: RetAbiType::Never, symbol: "", }; assert_valid_binding_fun(&BINDING_RUST_FUN); } #[test] fn polymorphic_rust_fun() { const BINDING_RUST_FUN: binding::RustFun = binding::RustFun { arret_type: "(All #{A} (List A & Any) -> A)", takes_task: false, params: &[ParamAbiType { abi_type: AbiType::Boxed(BoxedAbiType::Pair(&BoxedAbiType::Any)), capture: ParamCapture::Auto, }], ret: RetAbiType::Inhabited(AbiType::Boxed(BoxedAbiType::Any)), symbol: "", }; assert_valid_binding_fun(&BINDING_RUST_FUN); } #[test] fn incompatible_polymorphic_rust_fun() { const BINDING_RUST_FUN: binding::RustFun = binding::RustFun { arret_type: "(All #{A} (List & Any) -> A)", takes_task: false, params: &[ParamAbiType { abi_type: AbiType::Boxed(BoxedAbiType::Pair(&BoxedAbiType::Any)), capture: ParamCapture::Auto, }], ret: RetAbiType::Inhabited(AbiType::Boxed(BoxedAbiType::Any)), symbol: "", }; let kind = ErrorKind::RustFunError( "Rust type `Gc>` does not match declared Arret type of `(List & Any)`".into(), ); assert_binding_fun_error(&kind, &BINDING_RUST_FUN); } #[test] fn unbound_arret_type() { const BINDING_RUST_FUN: binding::RustFun = binding::RustFun { arret_type: "(unbound)", takes_task: false, params: &[ParamAbiType { abi_type: AbiType::Boxed(BoxedAbiType::UniqueTagged(TypeTag::Int)), capture: ParamCapture::Auto, }], ret: RetAbiType::Inhabited(AbiType::Bool), symbol: "", }; let kind = ErrorKind::UnboundIdent("unbound".into()); assert_binding_fun_error(&kind, &BINDING_RUST_FUN); } #[test] fn non_fun_type() { const BINDING_RUST_FUN: binding::RustFun = binding::RustFun { arret_type: "Str", takes_task: false, params: &[ParamAbiType { abi_type: AbiType::Boxed(BoxedAbiType::UniqueTagged(TypeTag::Int)), capture: ParamCapture::Auto, }], ret: RetAbiType::Inhabited(AbiType::Bool), symbol: "", }; let kind = ErrorKind::RustFunError("function type expected".into()); assert_binding_fun_error(&kind, &BINDING_RUST_FUN); } #[test] fn non_list_rust_rest_param() { const BINDING_RUST_FUN: binding::RustFun = binding::RustFun { arret_type: "(& Int -> true)", takes_task: false, params: &[ParamAbiType { abi_type: AbiType::Boxed(BoxedAbiType::UniqueTagged(TypeTag::Int)), capture: ParamCapture::Auto, }], ret: RetAbiType::Inhabited(AbiType::Bool), symbol: "", }; let kind = ErrorKind::RustFunError("expected Rust function to have `boxed::List` as last parameter to receive the rest argument".into()); assert_binding_fun_error(&kind, &BINDING_RUST_FUN); } #[test] fn mismatched_fixed_param_count() { const BINDING_RUST_FUN: binding::RustFun = binding::RustFun { arret_type: "(Int -> Int)", takes_task: false, params: &[ ParamAbiType { abi_type: AbiType::Int, capture: ParamCapture::Never, }, ParamAbiType { abi_type: AbiType::Int, capture: ParamCapture::Never, }, ], ret: RetAbiType::Inhabited(AbiType::Boxed(BoxedAbiType::UniqueTagged(TypeTag::Int))), symbol: "", }; let kind = ErrorKind::RustFunError("expected Rust function to have 1 parameters; has 2".into()); assert_binding_fun_error(&kind, &BINDING_RUST_FUN); } #[test] fn incompatible_fixed_param_type() { const BINDING_RUST_FUN: binding::RustFun = binding::RustFun { arret_type: "(Char -> Int)", takes_task: false, params: &[ParamAbiType { abi_type: AbiType::Int, capture: ParamCapture::Never, }], ret: RetAbiType::Inhabited(AbiType::Boxed(BoxedAbiType::UniqueTagged(TypeTag::Int))), symbol: "", }; let kind = ErrorKind::RustFunError( "Rust type `i64` does not match declared Arret type of `Char`".into(), ); assert_binding_fun_error(&kind, &BINDING_RUST_FUN); } #[test] fn incompatible_ret_type() { const BINDING_RUST_FUN: binding::RustFun = binding::RustFun { arret_type: "(Int -> Char)", takes_task: false, params: &[ParamAbiType { abi_type: AbiType::Int, capture: ParamCapture::Never, }], ret: RetAbiType::Inhabited(AbiType::Boxed(BoxedAbiType::UniqueTagged(TypeTag::Int))), symbol: "", }; let kind = ErrorKind::RustFunError( "Rust type `Gc` does not match declared Arret type of `Char`".into(), ); assert_binding_fun_error(&kind, &BINDING_RUST_FUN); } } ================================================ FILE: compiler/source.rs ================================================ use std::ffi::OsString; use std::ops::Range; use std::sync::{Arc, RwLock, RwLockReadGuard}; use std::{fmt, fs, io, path}; use codespan_reporting::files::Error as CodespanError; use arret_syntax::datum::Datum; use arret_syntax::span::{FileId, Span}; pub const EMPTY_SPAN: Span = Span::new(None, 0, 0); #[derive(Clone)] pub enum SourceText { Static(&'static str), Shared(Arc), } impl AsRef for SourceText { fn as_ref(&self) -> &str { match self { SourceText::Shared(shared) => shared.as_ref(), SourceText::Static(static_str) => static_str, } } } impl From> for SourceText { fn from(s: Arc) -> Self { SourceText::Shared(s) } } impl From for SourceText { fn from(s: String) -> Self { SourceText::Shared(s.into()) } } impl From<&'static str> for SourceText { fn from(s: &'static str) -> Self { SourceText::Static(s) } } pub struct SourceFile { file_id: FileId, source: SourceText, parsed: Result, arret_syntax::error::Error>, } impl SourceFile { pub fn file_id(&self) -> FileId { self.file_id } pub fn source(&self) -> &'_ str { self.source.as_ref() } pub fn parsed(&self) -> Result<&[Datum], arret_syntax::error::Error> { match &self.parsed { Ok(data) => Ok(data), Err(err) => Err(err.clone()), } } } impl fmt::Debug for SourceFile { fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { self.file_id.fmt(formatter) } } struct ReportableFile { filename: OsString, source: SourceText, line_offsets: Vec, } impl ReportableFile { fn name(&self) -> String { self.filename.to_string_lossy().into() } fn source(&self) -> &str { self.source.as_ref() } fn line_index(&self, offset: usize) -> usize { match self .line_offsets .binary_search_by(|line_start| line_start.cmp(&offset)) { Ok(line) => line, Err(line) => line - 1, } } fn line_range(&self, line_index: usize) -> Option> { let start = self.line_offsets.get(line_index)?; let end = self .line_offsets .get(line_index + 1) .cloned() .unwrap_or_else(|| self.source.as_ref().len()); Some(*start..end) } } #[derive(Default)] pub struct SourceLoader { files: RwLock>, } impl SourceLoader { pub fn new() -> Self { Self::default() } /// Synchronously read path into a `SourceFile` pub fn load_path(&self, path: &path::Path) -> Result { let source = fs::read_to_string(path)?; Ok(self.load_string( path.as_os_str().to_owned(), SourceText::Shared(source.into()), )) } /// Loads a caller-provided string into a `SourceFile` pub fn load_string(&self, filename: OsString, source: impl Into) -> SourceFile { use arret_syntax::parser::data_from_str; let source = source.into(); let reportable_file = ReportableFile { filename, line_offsets: codespan_reporting::files::line_starts(source.as_ref()).collect(), source: source.clone(), }; let file_index = { let mut files_write = self.files.write().unwrap(); files_write.push(reportable_file); files_write.len() }; let file_id = FileId::new(file_index as u32).unwrap(); SourceFile { file_id, parsed: data_from_str(Some(file_id), source.as_ref()), source, } } /// Reserves space for `additional` more files /// /// This can be used to avoid allocating memory under our instance's write lock. pub fn reserve(&self, additional: usize) { self.files.write().unwrap().reserve(additional) } /// Returns a `ReportableFiles` instance usable with `codespan-reporting` /// /// This will take our instance's read lock. pub fn files(&self) -> ReportableFiles<'_> { ReportableFiles { files: self.files.read().unwrap(), } } } pub struct ReportableFiles<'a> { files: RwLockReadGuard<'a, Vec>, } impl<'a> ReportableFiles<'a> { fn get_file(&self, file_id: FileId) -> Option<&ReportableFile> { self.files.get((file_id.get() - 1) as usize) } } type CodespanResult = Result; impl<'a> codespan_reporting::files::Files<'a> for ReportableFiles<'a> { type FileId = FileId; type Source = &'a str; type Name = String; fn name(&self, file_id: FileId) -> CodespanResult { self.get_file(file_id) .ok_or(CodespanError::FileMissing) .map(|f| f.name()) } fn source(&self, file_id: FileId) -> CodespanResult<&str> { self.get_file(file_id) .ok_or(CodespanError::FileMissing) .map(|f| f.source()) } fn line_index(&self, file_id: FileId, offset: usize) -> CodespanResult { self.get_file(file_id) .ok_or(CodespanError::FileMissing) .map(|f| f.line_index(offset)) } fn line_range(&self, file_id: FileId, line_index: usize) -> CodespanResult> { self.get_file(file_id) .ok_or(CodespanError::FileMissing) .and_then(|f| { f.line_range(line_index).ok_or(CodespanError::LineTooLarge { given: line_index, max: f.line_offsets.len(), }) }) } } ================================================ FILE: compiler/tests/compile-error/arity.arret ================================================ (import [stdlib base]) (defn not-enough-fixed (_) ()) (def _ (not-enough-fixed)) ;^^^^^^^^^^^^^^^^^^ ERROR incorrect number of arguments: wanted 1, have 0 (defn not-enough-fixed-and-rest (_ & _) ()) (def _ (not-enough-fixed-and-rest)) ;^^^^^^^^^^^^^^^^^^^^^^^^^^^ ERROR incorrect number of arguments: wanted at least 1, have 0 (defn too-many-fixed (_) ()) (def _ (too-many-fixed 1 2 3)) ;^^^^^^^^^^^^^^^^^^^^^^ ERROR incorrect number of arguments: wanted 1, have 3 (def _ (bool?)) ;^^^^^^^ ERROR incorrect number of arguments: wanted 1, have 0 (def _ (bool? true true)) ;^^^^^^^^^^^^^^^^^ ERROR incorrect number of arguments: wanted 1, have 2 (defn apply-top-fun ([x (... -> Bool)]) (x 5)) ;^^^^^ ERROR cannot determine parameter types for `(... -> Bool)` (defn apply-with-rest (_ [_ Int] _)) (def _ (apply-with-rest & '(1 2))) ;^^^^^ ERROR mismatched types (def _ (apply-with-rest & '(1 2 3 4))) ;^^^^^^^^^ ERROR mismatched types (defn main! ()) ================================================ FILE: compiler/tests/compile-error/bit-shift-left-negative.arret ================================================ (import [stdlib base]) (def _ (bit-shift-left 0 -1)) ;^^^^^^^^^^^^^^^^^^^^^ ERROR shift left by negative bit count -1 (defn main! () ->! ()) ================================================ FILE: compiler/tests/compile-error/bit-shift-right-overflow.arret ================================================ (import [stdlib base]) (def _ (bit-shift-right 0 65)) ;^^^^^^^^^^^^^^^^^^^^^^ ERROR shift right by 65 bits exceeds 64 bits (defn main! () ->! ()) ================================================ FILE: compiler/tests/compile-error/destruc-errors.arret ================================================ (import [stdlib base]) (def [x y] [1 2]) ;^ ERROR unable to resolve `y` (def [x / y] [1 2 3]) ;^^^^^^^ ERROR vectors can only be used in a destructure in the form `[name Type]` (def [x y z] [1 2 3 4]) ;^^^^^^^ ERROR vectors can only be used in a destructure in the form `[name Type]` (def [1 Int] 1) ;^ ERROR expected symbol, found integer (def 1 1) ;^ ERROR unsupported destructuring binding (def :foo 1) ;^^^^ ERROR expected symbol, found keyword ; Double type annotation (def [[x Int] Int] 1) ;^^^^^^^ ERROR expected symbol, found vector (def bad-param-destruc (fn (1))) ;^ ERROR unsupported destructuring binding (def keyword-param-destruc (fn (:foo))) ;^^^^ ERROR expected symbol, found keyword ================================================ FILE: compiler/tests/compile-error/fn-lowering-errors.arret ================================================ (import [stdlib base]) (def _ (fn)) ;^^^^ ERROR parameter declaration missing (def _ (fn #{A})) ;^^^^^^^^^ ERROR parameter declaration missing (def _ (fn [])) ;^^ ERROR expected parameter declaration list, found empty vector (def _ (fn #{123} ())) ;^^^ ERROR bad polymorphic variable declaration ================================================ FILE: compiler/tests/compile-error/if-errors.arret ================================================ (import [stdlib base]) (def _ (if)) ;~ ERROR wrong argument count; expected 3 (def _ (if true)) ;~ ERROR wrong argument count; expected 3 (def _ (if true false)) ;~ ERROR wrong argument count; expected 3 ================================================ FILE: compiler/tests/compile-error/import-parse-errors.arret ================================================ (import (unknown [stdlib base])) ;^^^^^^^ ERROR expected import filter keyword, found symbol (import (:rename [stdlib base] true)) ;^^^^ ERROR expected identifier rename map, found boolean true (import (:rename [stdlib base] {foo bar} 1)) ;^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ERROR wrong argument count; expected 2 (import (:prefix [stdlib base] test- 1)) ;^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ERROR wrong argument count; expected 2 (import (:prefixed [stdlib base] 1)) ;^^^^^^^^^^^^^^^^^^^^^^^^^^^ ERROR wrong argument count; expected 1 (import 4) ;^ ERROR bad import set (import []) ;~ ERROR module name requires a least two components (import [just-one]) ;~ ERROR module name requires a least two components ================================================ FILE: compiler/tests/compile-error/macro-errors.arret ================================================ (import [arret internal primitives]) (defmacro 1 (macro-rules)) ;^ ERROR expected symbol, found integer (defmacro a b) ;~ ERROR expected macro specification list, found symbol (defmacro a ()) ;^^ ERROR missing macro type (defmacro a (macro-fn)) ;^^^^^^^^ ERROR unsupported macro type (defmacro a (macro-rules [(...) false] ;^^^ ERROR unexpected ellipsis in macro rule )) (defmacro a (macro-rules [(1) (... 1)] ;^ ERROR expected macro symbol to escape, found integer )) (defmacro no-rules (macro-rules)) (no-rules) ;~ ERROR no matching macro rule (defmacro _ (macro-rules 1)) ;^ ERROR expected macro rule vector, found integer (defmacro no-template-datum (macro-rules [()] ;^^^^ ERROR expected macro rule vector with 2 elements, found 1 )) (defmacro more-than-one-template-datum (macro-rules [() 1 2] ;^^^^^^^^ ERROR expected macro rule vector with 2 elements, found 3 )) (defmacro non-list-pattern (macro-rules [self 1] ;^^^^ ERROR expected macro rule pattern list )) (defmacro m (macro-rules [((list1 ...) (list2 ...)) ([list1 list2] ...)] ;^^^^^^^^^^^^^^^^^^^ ERROR subtemplate references macro variables from multiple subpatterns )) (defmacro m (macro-rules [(expr ...) (5 ...)] ;^^^^^^^ ERROR subtemplate does not include any macro variables )) (defmacro vm (macro-rules [((l ... r ...)) true])) ;^ ERROR multiple zero or more matches in the same sequence (defmacro a (macro-rules [(x x) x])) ;^ ERROR duplicate definition (defmacro return-one (macro-rules [() 1])) (return-one extra-arg) ;~ ERROR no matching macro rule ; Keyword doesn't match (defmacro for (macro-rules [(x :in y) [x y]])) (for 1 :for 2) ;~ ERROR no matching macro rule (defmacro two-set? (macro-rules [(#{_ _}) false] ;^^^^^^ ERROR set patterns must either be empty or a zero or more match )) ================================================ FILE: compiler/tests/compile-error/misc-body-errors.arret ================================================ (import [stdlib base]) (defn too-many-quote-args () (quote 1 2 3)) ;^^^^^^^^^^^^^ ERROR wrong argument count; expected 1 (defn def-in-body-context () (def foo 1)) ;^^^^^^^^^^^ ERROR definition outside module body (defn let-without-binding-vector () (let x 1)) ;^ ERROR binding vector expected (def missing-param-list (fn)) ;^^^^ ERROR parameter declaration missing (def duplicate-param (fn (x x))) ;^ ERROR duplicate definition (def user-compile-error (compile-error "Some message")) ;~ ERROR Some message (def uneven-let (let [x 1 y])) ;^ ERROR binding vector must have an even number of forms ================================================ FILE: compiler/tests/compile-error/misc-top-level-errors.arret ================================================ (import [stdlib base]) (import (:only [stdlib base] do)) ;^^^^^^^^^^^^^^^^^^^^^^^^ ERROR duplicate definition 1 ;~ ERROR value at top-level of module body 'foo ;~ ERROR value at top-level of module body (list) ;~ ERROR value at top-level of module body (if true true false) ;~ ERROR value at top-level of module body (fn ()) ;~ ERROR value at top-level of module body (export unbound) ;^^^^^^^ ERROR unable to resolve `unbound` (compile-error "Some message") ;~ ERROR Some message (compile-error 1234) ;^^^^ ERROR expected error message string, found integer (def x 1) (def x 2) ;^ ERROR duplicate definition (deftype x Int) ;^ ERROR duplicate definition ; This is special because it's part of our prelude (def import 5) ;^^^^^^ ERROR duplicate definition ================================================ FILE: compiler/tests/compile-error/missing-module.arret ================================================ (import [package does not exist]) ;~ ERROR package not found (import [stdlib module does not exist]) ;~ ERROR module not found ================================================ FILE: compiler/tests/compile-error/no-main.arret ================================================ ;~ ERROR no main! function defined in entry module ================================================ FILE: compiler/tests/compile-error/overflow-add.arret ================================================ (import [stdlib base]) (def maximum-int 9223372036854775807) (def _ (+ maximum-int 1)) ;^^^^^^^^^^^^^^^^^ ERROR attempt to add with overflow (defn main! () ->! ()) ================================================ FILE: compiler/tests/compile-error/overflow-multiply.arret ================================================ (import [stdlib base]) (def maximum-int 9223372036854775807) (def _ (* maximum-int 2)) ;^^^^^^^^^^^^^^^^^ ERROR attempt to multiply with overflow (defn main! () ->! ()) ================================================ FILE: compiler/tests/compile-error/overflow-quot.arret ================================================ (import [stdlib base]) (def minimum-int -9223372036854775808) ; This message isn't quite right but it's not worth distinguishing these cases (def _ (quot minimum-int -1)) ;^^^^^^^^^^^^^^^^^^^^^ ERROR division by zero (defn main! () ->! ()) ================================================ FILE: compiler/tests/compile-error/overflow-subtract.arret ================================================ (import [stdlib base]) (def minimum-int -9223372036854775808) (def _ (- minimum-int 1)) ;^^^^^^^^^^^^^^^^^ ERROR attempt to subtract with overflow (defn main! () ->! ()) ================================================ FILE: compiler/tests/compile-error/quot-by-zero.arret ================================================ (import [stdlib base]) (def _ (quot 1 0)) ;^^^^^^^^^^ ERROR division by zero (defn main! () ->! ()) ================================================ FILE: compiler/tests/compile-error/record-errors.arret ================================================ (import [stdlib base]) (defrecord foo) ;~ ERROR wrong argument count; expected 2 (defrecord :keyword ()) ;^^^^^^^^ ERROR expected record type constuctor declaration, found keyword (defrecord _ []) ;^^ ERROR expected record value constructor declaration, found empty vector (defrecord _ ()) ;^^ ERROR expected record value constructor declaration, found empty list (defrecord _ (1)) ;^ ERROR expected symbol, found integer (defrecord Foo (foo 1)) ;^ ERROR expected record field declaration, found integer (defrecord Foo (foo [])) ;^^ ERROR expected record field declaration, found empty vector (defrecord (Foo 1) ()) ;^ ERROR bad polymorphic variable declaration (defrecord (Point [N Num]) (point [x N] [y N])) (def [_ (Point Int Float)] (point 1 2.0)) ;^^^^^^^^^^^^^^^^^ ERROR wrong argument count; expected 1 (def [_ (Point Bool)] (point true)) ;^^^^ ERROR mismatched types ; These polymorphic variables are actually fixed to a single type (defrecord (FixedPolyRecord [A Int] [->_ ->]) (fixed-poly-record [pred (A ->_ Bool)])) (def [_ (FixedPolyRecord Float ->)] 1) ;^^^^^ ERROR mismatched types (def [_ (FixedPolyRecord Int ->!)] 1) ;^^^ ERROR mismatched purities (def [_ (FixedPolyRecord Int Int)] 1) ;^^^ ERROR type cannot be used as a purity (def [_ (FixedPolyRecord Int [])] 1) ;^^ ERROR empty vector cannot be used as a purity (defrecord (AnonymousPolyParam [_ Int]) (_)) ;^ ERROR polymorphic parameters must have a name (defrecord (UnusedPolyPurityParam [->_ ->!]) (_)) ;^^^^^^^^^ ERROR unused polymorphic purity parameter `->_` (defrecord (UnusedPolyTyParam [A Num]) (_)) ;^^^^^^^ ERROR unused polymorphic type parameter `A` (defrecord DuplicateFieldName (duplicate-field-name field field)) ;^^^^^ ERROR duplicate definition of `duplicate-field-name-field` ================================================ FILE: compiler/tests/compile-error/recur-errors.arret ================================================ (import [stdlib base]) (defn non-tail-reverse #{T} ([lst (List & T)]) -> (List & T) (if (nil? lst) lst (concat (recur (rest lst)) (list (first lst))))) ;^^^^^^^^^^^^^^^^^^ ERROR non-tail `(recur)` (defn recur-without-fun-ty-decl () (recur)) ;^^^^^^^ ERROR type annotation needed (defn recur-with-non-generic-argument #{[T Num]} ([v T]) -> () (recur 5)) ;^ ERROR mismatched types (defn main! () ->! () ()) ================================================ FILE: compiler/tests/compile-error/reference-errors.arret ================================================ (import [stdlib base]) (def _ nopenopenope) ;^^^^^^^^^^^^ ERROR unable to resolve `nopenopenope` (def _ Str) ;^^^ ERROR cannot take the value of a type (def _ List) ;^^^^ ERROR cannot take the value of a type constructor (def _ ->!) ;^^^ ERROR cannot take the value of a purity (def _ fn) ;^^ ERROR cannot take the value of a primitive (def _ (let [x x])) ;^ ERROR unable to resolve `x` ================================================ FILE: compiler/tests/compile-error/rem-by-zero.arret ================================================ (import [stdlib base]) (def _ (rem 1 0)) ;^^^^^^^^^ ERROR division by zero (defn main! () ->! ()) ================================================ FILE: compiler/tests/compile-error/syntax-error.arret ================================================ (this is invalid] ;^ ERROR unexpected `]` while parsing list ================================================ FILE: compiler/tests/compile-error/type-checking-errors.arret ================================================ (import [stdlib base]) (def [wrong-ascription Int] 'foo) ;^^^ ERROR mismatched types (def ([one 'one] [two 'two]) '(two one)) ;^^^^^^^^^ ERROR mismatched types (defn wrong-ret () -> Str 'foo) ;^^^ ERROR mismatched types (def non-bool-if-test (if 'foo true false)) ;^^^ ERROR mismatched types (defn wrong-if-branch-type ([test Bool]) -> Sym (if test 'foo "foo")) ;^^^^^ ERROR mismatched types (def [wrong-do-type Int] (do 1 2 'three)) ;^^^^^ ERROR mismatched types (def wrong-arg-type ((fn ([input Str])) 'foo)) ;^^^ ERROR mismatched types (defn conflicting-free-type (free-input) (ann free-input Str) (ann free-input Sym)) ;^^^^^^^^^^ ERROR type annotation needed ; `input` will gain type information from the function type annotation (def [conflicting-closure-type (Sym -> Str)] (fn (input) (ann input Str))) ;^^^^^ ERROR type annotation needed (def not-fun-def ("foo")) ;^^^^^ ERROR expected function, found `Str` (defn impure-fun! () ->! ()) (def impure-def (impure-fun!)) ;^^^^^^^^^^^ ERROR mismatched purities (defn impure-app () -> () (impure-fun!)) ;^^^^^^^^^^^ ERROR mismatched purities (defn polymorphic-impure-app #{[->A ->!]} () ->A () (impure-fun!)) ;^^^^^^^^^^^ ERROR mismatched purities (defn bad-apply-in-return-position () -> () (length '(1 2 3))) ;^^^^^^ ERROR mismatched types (defn non-list-rest-type (& x) (ann x (Vector Any))) ;^ ERROR mismatched types (defn specific-rest-list-type (& x) (ann x (List Int Int Int))) ;^ ERROR mismatched types ; The compiler should suppress this error as it's a cascade error (def depends-on-type-error specific-rest-list-type) ; This generic parameter isn't sufficiently bound (defn takes-symbol ([x Sym])) (defn takes-generic #{T} ([x T]) (takes-symbol x)) ;^ ERROR mismatched types ; Applying a fun with incorrect polymorphic purity inside a pure context (def _ (map println! '(0 1 2 3))) ;^^^^^^^^ ERROR mismatched types ; `input` is used both as a poly `Sym` and a poly `Num` (defn conflicting-poly-types #{[A Sym] [B Num]} (input [sym-is-foo? (A -> Bool)] [num-is-zero? (B -> Bool)]) -> () (sym-is-foo? input) (num-is-zero? input) ;^^^^^ ERROR type annotation needed ()) (defn unselected-purity-variable () ((fn #{[->_ ->!]} ())) ;^^^^^^^^^^^^^^^^^^^^^^ ERROR cannot determine purity of purity variable `->_` ()) (defn unselected-type-variable () ((fn #{T} ())) ;^^^^^^^^^^^^^^ ERROR cannot determine type of type variable `T` ()) ; Make sure we still type check the branches (defn divergent-cond-test () -> Sym (if (panic "Divergent") ; This isn't a `Sym` but we should allow it as it's never returned true ; This isn't well-typed because a test must be a `Bool` (if 'foo 1 2))) ;^^^ ERROR mismatched types (defrecord RecordOne (record1)) (defrecord RecordTwo (record2)) (defrecord RecordThree (record3)) ; Make sure we don't unify this to `Record` (def [_ (U RecordOne RecordTwo)] (record3)) ;^^^^^^^ ERROR mismatched types ; `(not)` should only take a `Bool` (def _ (not 5)) ;^ ERROR mismatched types (defn main! ()) ================================================ FILE: compiler/tests/compile-error/type-lowering-errors.arret ================================================ (import [stdlib base]) (deftype _ unbound) ;^^^^^^^ ERROR unable to resolve `unbound` (deftype _ (unbound)) ;^^^^^^^ ERROR unable to resolve `unbound` (deftype _ quote) ;^^^^^ ERROR primitive cannot be used as a type (deftype _ 1) ;^ ERROR unsupported literal type (deftype _ '(1)) ;^ ERROR unsupported literal type (deftype _ [1]) ;^ ERROR unsupported literal type (deftype _ (->)) ;^^ ERROR purity cannot be used as a type constructor ; This isn't public yet but still test it (import (:only [arret internal primitives] All)) (deftype _ (All)) ;^^^^^ ERROR polymorphic variable declaration missing (deftype _ (All [])) ;^^ ERROR expected polymorphic variable set, found empty vector (deftype _ (All #{} List Int)) ;^^^^^^^^^^^^^^^^^^ ERROR polymorphism on non-function type ================================================ FILE: compiler/tests/compile-error/vector-assoc-negative.arret ================================================ (import [stdlib base]) (def _ (vector-assoc [1 2 3] -5 0)) ;^^^^^^^^^^^^^^^^^^^^^^^^^^^ ERROR index -5 is negative (defn main! () ->! ()) ================================================ FILE: compiler/tests/compile-error/vector-assoc-out-of-bounds.arret ================================================ (import [stdlib base]) (def _ (vector-assoc [1 2 3] 3 0)) ;^^^^^^^^^^^^^^^^^^^^^^^^^^ ERROR index 3 out of bounds for vector of length 3 (defn main! () ->! ()) ================================================ FILE: compiler/tests/compile-error/vector-ref-negative.arret ================================================ (import [stdlib base]) (def _ (vector-ref [1 2 3] -5)) ;^^^^^^^^^^^^^^^^^^^^^^^ ERROR index -5 is negative (defn main! () ->! ()) ================================================ FILE: compiler/tests/compile-error/vector-ref-out-of-bounds.arret ================================================ (import [stdlib base]) (def _ (vector-ref [1 2 3] 5)) ;^^^^^^^^^^^^^^^^^^^^^^ ERROR index 5 out of bounds for vector of length 3 (defn main! () ->! ()) ================================================ FILE: compiler/tests/compile-error/wrong-main-type.arret ================================================ (import [stdlib base]) (defn main! () -> Int 1) ;~ ERROR mismatched types ================================================ FILE: compiler/tests/integration.rs ================================================ #![warn(clippy::all)] #![warn(rust_2018_idioms)] use std::io::Write; use std::ops::Range; use std::sync::Arc; use std::{fs, io, path, process}; use codespan_reporting::diagnostic::{Diagnostic, Label, Severity}; use codespan_reporting::files::Files as _; use tempfile::NamedTempFile; use arret_syntax::span::{FileId, Span}; use arret_compiler::{emit_diagnostics_to_stderr, CompileCtx, SourceText}; #[derive(Clone, PartialEq)] struct RunOutput { stdout: Vec, stderr: Vec, } #[derive(Clone, PartialEq)] enum RunType { Pass(RunOutput), Error(RunOutput), } impl RunType { fn expected_output(&self) -> &RunOutput { match self { RunType::Pass(run_output) => run_output, RunType::Error(run_output) => run_output, } } } #[derive(Clone, PartialEq)] enum TestType { CompileError, Optimise, Run(RunType), } #[derive(Debug)] enum ExpectedSpan { Exact(FileId, Range), StartRange(FileId, Range), } impl ExpectedSpan { fn matches(&self, actual_file_id: FileId, actual_range: Range) -> bool { match self { ExpectedSpan::Exact(expected_file_id, expected_range) => { actual_file_id == *expected_file_id && actual_range == *expected_range } ExpectedSpan::StartRange(expected_file_id, expected_start_range) => { let actual_range_start: usize = actual_range.start; actual_file_id == *expected_file_id && actual_range_start >= expected_start_range.start && actual_range_start < expected_start_range.end } } } } #[derive(Debug)] struct ExpectedDiagnostic { expected_severity: Severity, message_prefix: String, span: ExpectedSpan, } impl ExpectedDiagnostic { fn matches(&self, actual: &Diagnostic) -> bool { if self.expected_severity != actual.severity { return false; } if !actual.message.starts_with(&self.message_prefix[..]) { return false; } actual.labels.iter().any(|candidate_label| { self.span .matches(candidate_label.file_id, candidate_label.range.clone()) }) } /// Returns a diagnostic for reporting missing expectation fn to_error_diagnostic(&self) -> Diagnostic { let (file_id, span_range) = match self.span { ExpectedSpan::Exact(ref file_id, ref span_range) => (file_id, span_range), ExpectedSpan::StartRange(ref file_id, ref span_range) => (file_id, span_range), }; let span = Span::new( Some(*file_id), span_range.start as u32, span_range.end as u32, ); Diagnostic::error() .with_message(format!( "expected {}", severity_name(self.expected_severity) )) .with_labels(vec![Label::primary( span.file_id().unwrap(), span.byte_range(), ) .with_message(format!("{} ...", self.message_prefix))]) } } fn take_severity(marker_string: &str) -> (Severity, &str) { for (prefix, severity) in &[ (" BUG ", Severity::Bug), (" ERROR ", Severity::Error), (" WARNING ", Severity::Warning), (" HELP ", Severity::Help), (" NOTE ", Severity::Note), ] { if let Some(message_prefix) = marker_string.strip_prefix(prefix) { return (*severity, message_prefix); } } panic!("Unknown severity prefix for `{}`", marker_string) } fn severity_name(severity: Severity) -> &'static str { match severity { Severity::Bug => "bug", Severity::Error => "error", Severity::Warning => "warning", Severity::Help => "help", Severity::Note => "note", } } fn extract_expected_diagnostics( source_file: &arret_compiler::SourceFile, ) -> Vec { let source = source_file.source(); source .match_indices(";~") .map(|(index, _)| { let start_of_line_index = &source[..index].rfind('\n').map(|i| i + 1).unwrap_or(0); let end_of_line_index = &source[index..] .find('\n') .map(|i| i + index) .unwrap_or_else(|| source.len()); // Take from after the ;~ to the end of the line let marker_string = &source[index + 2..*end_of_line_index]; let (severity, marker_string) = take_severity(marker_string); ExpectedDiagnostic { expected_severity: severity, message_prefix: marker_string.into(), span: ExpectedSpan::StartRange(source_file.file_id(), *start_of_line_index..index), } }) .chain(source.match_indices(";^").map(|(index, _)| { let span_length = source[index..].find(' ').expect("Cannot find severity") - 1; let start_of_line_index = &source[..index] .rfind('\n') .expect("Cannot have a spanned error on first line"); let start_of_previous_line_index = &source[..*start_of_line_index] .rfind('\n') .map(|i| i + 1) .unwrap_or(0); let end_of_line_index = &source[index..] .find('\n') .map(|i| i + index) .unwrap_or_else(|| source.len()); let span_line_offset = index - start_of_line_index; let span_start = start_of_previous_line_index + span_line_offset; let span_end = span_start + span_length; // Take from after the ;^^ to the end of the line let marker_string = &source[index + span_length + 1..*end_of_line_index]; let (severity, message_prefix) = take_severity(marker_string); ExpectedDiagnostic { expected_severity: severity, message_prefix: message_prefix.into(), span: ExpectedSpan::Exact(source_file.file_id(), span_start..span_end), } })) .collect() } fn unexpected_diag_to_error_diagnostic(unexpected_diag: Diagnostic) -> Diagnostic { let unexpected_primary_label = unexpected_diag .labels .iter() .find(|label| label.style == codespan_reporting::diagnostic::LabelStyle::Primary) .cloned(); Diagnostic::error() .with_message(format!( "unexpected {}", severity_name(unexpected_diag.severity) )) .with_labels( unexpected_primary_label .into_iter() .map(|unexpected_primary_label| { Label::primary( unexpected_primary_label.file_id, unexpected_primary_label.range, ) .with_message(unexpected_diag.message.clone()) }) .collect(), ) } fn exit_with_run_output_difference( source_filename: String, stream_name: &str, expected: &[u8], actual: &[u8], ) { use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor}; let mut expected_color = ColorSpec::new(); expected_color.set_fg(Some(Color::Red)); let mut actual_color = ColorSpec::new(); actual_color.set_fg(Some(Color::Green)); let stderr = StandardStream::stderr(ColorChoice::Auto); let mut stderr_lock = stderr.lock(); writeln!( stderr_lock, "unexpected {} output from integration test {}\n", stream_name, source_filename ) .unwrap(); write!(stderr_lock, "Expected: \"").unwrap(); let _ = stderr_lock.set_color(&expected_color); stderr_lock.write_all(expected).unwrap(); let _ = stderr_lock.reset(); writeln!(stderr_lock, "\"").unwrap(); write!(stderr_lock, "Actual: \"").unwrap(); let _ = stderr_lock.set_color(&actual_color); stderr_lock.write_all(actual).unwrap(); let _ = stderr_lock.reset(); writeln!(stderr_lock, "\"").unwrap(); std::process::exit(1); } fn result_for_single_test( ccx: &CompileCtx, source_file: &arret_compiler::SourceFile, test_type: TestType, ) -> Result<(), Vec>> { let (output_path, run_type) = { let arret_compiler::EvaluableProgram { mut ehx, main_export_id, linked_libraries, } = arret_compiler::program_to_evaluable(ccx, source_file)?; // Try evaluating if we're not supposed to panic if !matches!(test_type, TestType::Run(RunType::Error(_))) { ehx.eval_main_fun(main_export_id)?; } let run_type = if let TestType::Run(run_type) = test_type { run_type } else { return Ok(()); }; // And now compiling and running let mir_program = ehx.into_built_program(main_export_id)?; if mir_program.is_empty() { // Don't bother building return Ok(()); } let gen_program_opts = arret_compiler::GenProgramOptions::new(); let output_path = NamedTempFile::new().unwrap().into_temp_path(); arret_compiler::gen_program( gen_program_opts, &linked_libraries, &mir_program, &output_path, None, ); (output_path, run_type) }; let mut process = process::Command::new(output_path.as_os_str()); let expected_output = run_type.expected_output(); let output = process .stdout(std::process::Stdio::piped()) .stderr(std::process::Stdio::piped()) .output() .unwrap(); match run_type { RunType::Pass(_) => { if !output.status.success() { // Dump any panic message from the test let _ = io::stderr().write_all(&output.stderr); return Err(vec![Diagnostic::error() .with_message(format!( "unexpected status {} returned from integration test", output.status, )) .with_labels(vec![Label::primary(source_file.file_id(), 0..1) .with_message("integration test file")])]); } } RunType::Error(_) => { // Code 1 is used by panic. This makes sure we didn't e.g. SIGSEGV. if output.status.code() != Some(1) { return Err(vec![Diagnostic::error() .with_message(format!( "unexpected status {} returned from integration test", output.status, )) .with_labels(vec![Label::primary(source_file.file_id(), 0..1) .with_message("integration test file")])]); } } } if expected_output.stderr != output.stderr { exit_with_run_output_difference( ccx.source_loader() .files() .name(source_file.file_id()) .unwrap(), "stderr", &expected_output.stderr, &output.stderr, ); } if expected_output.stdout != output.stdout { exit_with_run_output_difference( ccx.source_loader() .files() .name(source_file.file_id()) .unwrap(), "stdout", &expected_output.stdout, &output.stdout, ); } Ok(()) } fn run_single_pass_test( ccx: &CompileCtx, source_file: &arret_compiler::SourceFile, test_type: TestType, ) -> bool { let result = result_for_single_test(ccx, source_file, test_type); if let Err(diagnostics) = result { emit_diagnostics_to_stderr(ccx.source_loader(), diagnostics); false } else { true } } fn run_single_compile_fail_test( ccx: &CompileCtx, source_file: &arret_compiler::SourceFile, ) -> bool { let result = result_for_single_test(ccx, source_file, TestType::CompileError); let mut expected_diags = extract_expected_diagnostics(source_file); let actual_diags = if let Err(diags) = result { diags } else { eprintln!( "Compilation unexpectedly succeeded for {}", ccx.source_loader() .files() .name(source_file.file_id()) .unwrap() ); return false; }; let mut unexpected_diags = vec![]; for actual_diag in actual_diags.into_iter() { let expected_report_index = expected_diags .iter() .position(|expected_report| expected_report.matches(&actual_diag)); match expected_report_index { Some(index) => { expected_diags.swap_remove(index); } None => { unexpected_diags.push(actual_diag); } } } if unexpected_diags.is_empty() && expected_diags.is_empty() { return true; } let all_diags = unexpected_diags .into_iter() .map(unexpected_diag_to_error_diagnostic) .chain( expected_diags .into_iter() .map(|expected_diag| expected_diag.to_error_diagnostic()), ); emit_diagnostics_to_stderr(ccx.source_loader(), all_diags); false } fn run_single_test(ccx: &CompileCtx, input_path: &path::Path, test_type: TestType) -> bool { let source = fs::read_to_string(input_path).unwrap(); let source_file = ccx.source_loader().load_string( input_path.as_os_str().to_owned(), SourceText::Shared(source.into()), ); if test_type == TestType::CompileError { run_single_compile_fail_test(ccx, &source_file) } else { run_single_pass_test(ccx, &source_file, test_type) } } fn entry_is_arret_source(entry: &fs::DirEntry) -> bool { entry .file_name() .to_str() .map(|file_name| !file_name.starts_with('.') && file_name.ends_with(".arret")) .unwrap_or(false) } fn read_or_empty_vec(filename: &path::Path) -> Result, io::Error> { match fs::read(filename) { Ok(data) => Ok(data), Err(err) if err.kind() == io::ErrorKind::NotFound => Ok(vec![]), Err(err) => Err(err), } } fn entry_to_compile_test_tuple( entry: io::Result, test_type: TestType, ) -> Option<(path::PathBuf, TestType)> { let entry = entry.unwrap(); if !entry_is_arret_source(&entry) { None } else { Some((entry.path(), test_type)) } } fn entry_to_run_test_tuple( entry: io::Result, run_type: RT, ) -> Option<(path::PathBuf, TestType)> where RT: FnOnce(RunOutput) -> RunType, { let entry = entry.unwrap(); if !entry_is_arret_source(&entry) { return None; } let stderr_filename = entry.path().with_extension("stderr"); let stdout_filename = entry.path().with_extension("stdout"); // These files may not exist - we'll treat them as empty let stderr = read_or_empty_vec(&stderr_filename).unwrap(); let stdout = read_or_empty_vec(&stdout_filename).unwrap(); let expected_output = RunOutput { stdout, stderr }; Some((entry.path(), TestType::Run(run_type(expected_output)))) } #[test] fn integration() { let package_paths = arret_compiler::PackagePaths::test_paths(None); let ccx = Arc::new(arret_compiler::CompileCtx::new(package_paths, true)); use arret_compiler::initialise_llvm; initialise_llvm(false); let (send_test, recv_test) = crossbeam_channel::unbounded::<(path::PathBuf, TestType)>(); let (send_failed_test, recv_failed_test) = crossbeam_channel::unbounded::(); let worker_threads: Vec> = (0..num_cpus::get()) .map(|i| { let ccx = Arc::clone(&ccx); let recv_test = recv_test.clone(); let send_failed_test = send_failed_test.clone(); std::thread::Builder::new() .name(format!("integration test worker thread {}", i)) .spawn(move || { for (input_path, test_type) in recv_test.iter() { let test_successful = run_single_test(&ccx, input_path.as_path(), test_type); if !test_successful { send_failed_test .send(input_path.to_string_lossy().to_string()) .unwrap(); } } }) .unwrap() }) .collect(); // The main thread doesn't need these drop(send_failed_test); drop(recv_test); fs::read_dir("./tests/compile-error") .unwrap() .filter_map(|entry| entry_to_compile_test_tuple(entry, TestType::CompileError)) .for_each(|t| send_test.send(t).unwrap()); fs::read_dir("./tests/optimise") .unwrap() .filter_map(|entry| entry_to_compile_test_tuple(entry, TestType::Optimise)) .for_each(|t| send_test.send(t).unwrap()); fs::read_dir("./tests/run-pass") .unwrap() .filter_map(|entry| entry_to_run_test_tuple(entry, RunType::Pass)) .for_each(|t| send_test.send(t).unwrap()); fs::read_dir("./tests/run-error") .unwrap() .filter_map(|entry| entry_to_run_test_tuple(entry, RunType::Error)) .for_each(|t| send_test.send(t).unwrap()); drop(send_test); for thread in worker_threads { thread.join().unwrap(); } let failed_tests: Vec = recv_failed_test.iter().collect(); if !failed_tests.is_empty() { let _ = writeln!( io::stderr(), "integration tests failed: {}", failed_tests.join(", ") ); std::process::exit(1); } } ================================================ FILE: compiler/tests/optimise/application.arret ================================================ (import [stdlib base]) (import [stdlib test]) (defn main! () ->! () ; Even if the outer function is impure we should optimise inner pure applications (assert-fn-doesnt-contain-op! :call (fn () ->! Bool (every? (fn (_) false) '(1 2 3))))) ================================================ FILE: compiler/tests/optimise/bitwise.arret ================================================ (import [stdlib base]) (import [stdlib test]) (defn main! () ->! () (assert-fn-doesnt-contain-op! :call (fn ([lhs Int] [rhs Int]) (bit-and lhs rhs))) (assert-fn-doesnt-contain-op! :call (fn ([lhs Int] [rhs Int]) (bit-or lhs rhs))) (assert-fn-doesnt-contain-op! :call (fn ([lhs Int] [rhs Int]) (bit-xor lhs rhs))) (assert-fn-doesnt-contain-op! :call (fn ([i Int]) (bit-not i))) (assert-fn-doesnt-contain-op! :call (fn ([i Int]) (bit-shift-left i 16))) (assert-fn-doesnt-contain-op! :call (fn ([i Int]) (bit-shift-right i 32))) (assert-fn-doesnt-contain-op! :call (fn ([i Int]) (unsigned-bit-shift-right i 32)))) ================================================ FILE: compiler/tests/optimise/const.arret ================================================ (import [stdlib base]) (import [stdlib test]) (defn main! () ->! () ; Make sure we pass a constant list when calling `(member?)` ; This is stupid but we've broken it in the past (assert-fn-doesnt-contain-op! :alloc-boxed (fn ([needle Any]) (member? needle '("cat" "dog" "fish")))) ()) ================================================ FILE: compiler/tests/optimise/equality.arret ================================================ (import [stdlib base]) (import [stdlib test]) (defmacro assert-native-compare-fn! (macro-rules [(f) (do (assert-fn-doesnt-contain-op! :call f) (assert-fn-doesnt-contain-op! :const-box f) (assert-fn-doesnt-contain-op! :alloc-boxed f))])) (defn main! () ->! () ; This requires calling in to the runtime (assert-fn-contains-op! :call (fn ([left Any] [right Any]) -> Bool (= left right))) ; Ints can be directly compared (assert-native-compare-fn! (fn ([left Int] [right Int]) -> Bool (= left right))) ; This should optimise similarly (assert-native-compare-fn! (fn ([left Int] [right Int]) -> Bool (not= left right))) ; Floats can be directly compared (assert-native-compare-fn! (fn ([left Float] [right Float]) -> Bool (= left right))) ; Bools can be directly compared (assert-native-compare-fn! (fn ([left Bool] [right Bool]) -> Bool (= left right))) ; Chars can be directly compared (assert-native-compare-fn! (fn ([left Char] [right Char]) -> Bool (= left right))) ; These should be optimised away entirely (assert-fn-doesnt-contain-op! :reg-op (fn ([v Bool]) -> Bool (= true v))) (assert-fn-doesnt-contain-op! :reg-op (fn ([v Bool]) -> Bool (= v true))) ; Syms can be directly compared (assert-native-compare-fn! (fn ([left Sym] [right Sym]) -> Bool (= left right))) ; Fns can be constantly compared because they're always inequal (assert-fn-returns-constant! (fn ([left (... -> Any)] [right (... -> Any)]) -> Bool (= left right))) ; Records can be compared fieldwise (letrecord [OneField (one-field [one Int])] (assert-native-compare-fn! (fn ([left OneField] [right OneField]) -> Bool (= left right)))) ; If one field compares false the entire comparison is false (letrecord [TwoField (two-field [one Int] [two Bool])] (assert-fn-returns-constant! (fn ([left Int] [right Int]) -> Bool (= (two-field left true) (two-field right false)))))) ================================================ FILE: compiler/tests/optimise/inliner.arret ================================================ (import [stdlib base]) (import [stdlib test]) (defn recursive-member? ([item Any] [l (List & Any)]) -> Bool (if (nil? l) false (or (= item (first l)) (recursive-member? item (rest l))))) (defn infinite-loop () -> Bool (infinite-loop)) (defn main! () ->! () ; We should be able to evaluate this at compile time ; This is recursive but does not exceed our inline limit and every iteration makes progress (assert-fn-doesnt-contain-op! :call (fn () (recursive-member? "dog" '("cat" "dog" "fish")))) ; This does not make progress; we should compile this in to a loop (assert-fn-contains-op! :call (fn () (infinite-loop)))) ================================================ FILE: compiler/tests/optimise/list.arret ================================================ (import [stdlib base]) (import [stdlib test]) (defn main! () ->! () ; This should just need to load the length from the cell (assert-fn-doesnt-contain-op! :call (fn ([l (List & Any)]) -> Int (length l))) ; We should know this is a constant value (assert-fn-doesnt-contain-op! :mem-load (fn ([l (List & Any)]) -> Int (if (nil? l) (length l) 0))) ; This should come from the type (assert-fn-doesnt-contain-op! :mem-load (fn ([l (List Any Any Any)]) -> Int (length l)))) ================================================ FILE: compiler/tests/optimise/math.arret ================================================ (import [stdlib base]) (import [stdlib test]) (defn main! () ->! () ; This should just pass the value through directly (assert-fn-doesnt-contain-op! :call (fn ([n Num]) (+ (* n)))) ; ; These should all be converted to MIR ops ; (assert-fn-doesnt-contain-op! :call (fn ([left Int] [right Int]) (+ left right))) (assert-fn-doesnt-contain-op! :call (fn ([left Int] [right Float]) (+ left right))) (assert-fn-doesnt-contain-op! :call (fn ([left Float] [right Float]) (+ left right))) (assert-fn-doesnt-contain-op! :call (fn ([left Float] [right Num]) (+ left right))) (assert-fn-doesnt-contain-op! :call (fn ([left Int] [right Int]) (* left right))) (assert-fn-doesnt-contain-op! :call (fn ([left Int] [right Float]) (* left right))) (assert-fn-doesnt-contain-op! :call (fn ([left Float] [right Float]) (* left right))) (assert-fn-doesnt-contain-op! :call (fn ([left Float] [right Num]) (* left right))) (assert-fn-doesnt-contain-op! :call (fn ([value Int]) (- value))) (assert-fn-doesnt-contain-op! :call (fn ([value Float]) (- value))) (assert-fn-doesnt-contain-op! :call (fn ([left Int] [right Int]) (- left right))) (assert-fn-doesnt-contain-op! :call (fn ([left Int] [right Float]) (- left right))) (assert-fn-doesnt-contain-op! :call (fn ([left Float] [right Float]) (- left right))) (assert-fn-doesnt-contain-op! :call (fn ([left Float] [right Num]) (- left right))) (assert-fn-doesnt-contain-op! :call (fn ([value Float]) (/ value))) (assert-fn-doesnt-contain-op! :call (fn ([left Float] [right Float]) (/ left right))) (assert-fn-doesnt-contain-op! :call (fn ([left Int] [right Int]) (quot left right))) (assert-fn-doesnt-contain-op! :call (fn ([left Int] [right Int]) (rem left right))) (assert-fn-doesnt-contain-op! :call (fn ([radicand Float]) (sqrt radicand))) ()) ================================================ FILE: compiler/tests/optimise/number.arret ================================================ (import [stdlib base]) (import [stdlib test]) (defn main! () ->! () ; This should be the identity function for `Int` (assert-fn-doesnt-contain-op! :call (fn ([i Int]) (int i))) ; This should be the identity function for `Float` (assert-fn-doesnt-contain-op! :call (fn ([f Float]) (float f))) ; We can build specific MIR ops for these (assert-fn-doesnt-contain-op! :call (fn ([i Int]) (float i))) (assert-fn-doesnt-contain-op! :call (fn ([n Num]) (float n))) ; Single arguments should be optimised to true for comparisons (assert-fn-doesnt-contain-op! :call (fn ([i Int]) (< i))) (assert-fn-doesnt-contain-op! :call (fn ([n Num]) (<= n))) (assert-fn-doesnt-contain-op! :call (fn ([f Float]) (== f))) (assert-fn-doesnt-contain-op! :call (fn ([i Int]) (>= i))) (assert-fn-doesnt-contain-op! :call (fn ([f Float]) (> f))) ; These can generate MIR ops after testing the parameter's type (assert-fn-doesnt-contain-op! :call zero?) (assert-fn-doesnt-contain-op! :call neg?) (assert-fn-doesnt-contain-op! :call pos?) ; These can generate comparison MIR ops (assert-fn-doesnt-contain-op! :call (fn ([i1 Int] [i2 Int]) (< i1 i2))) (assert-fn-doesnt-contain-op! :call (fn ([f1 Float] [i2 Int]) (<= f1 i2))) (assert-fn-doesnt-contain-op! :call (fn ([i1 Int] [f2 Float]) (== i1 f2))) (assert-fn-doesnt-contain-op! :call (fn ([f1 Float] [f2 Float]) (>= f1 f2))) (assert-fn-doesnt-contain-op! :call (fn ([i1 Int] [f2 Float] [i3 Int]) (> i1 f2 i3)))) ================================================ FILE: compiler/tests/optimise/typred.arret ================================================ (import [stdlib base]) (import [stdlib test]) (defn test-record-ty-preds! () ->! () (letrecord [RecordOne (record-one)] ; We should only need to test the type tag here ; `:cond` would indicate we're also attempting to load the record class ID (assert-fn-doesnt-contain-op! :cond (fn ([sub (U RecordOne false)]) -> Bool (record-one? sub))))) (defn main! () ->! () (test-record-ty-preds!)) ================================================ FILE: compiler/tests/optimise/vector.arret ================================================ (import [stdlib base]) (import [stdlib test]) (defn main! () ->! () ; This should just need to load the length from the cell (assert-fn-doesnt-contain-op! :call (fn ([v (Vectorof Any)]) -> Int (vector-length v))) ; This should come from the type (assert-fn-doesnt-contain-op! :call (fn ([v (Vector Any Any Any)]) -> Int (vector-length v))) ; For a vector of known length we should be able to read directly (assert-fn-doesnt-contain-op! :call (fn ([v (Vector Int Int Int)]) -> Int (vector-ref v 2)))) ================================================ FILE: compiler/tests/run-error/impure-panic.arret ================================================ (import [stdlib base]) (import [stdlib test]) (defn main! () ->! () (panic! "Impure " \p \a (black-box! \n) (black-box! \i) \c (black-box! \!))) ================================================ FILE: compiler/tests/run-error/impure-panic.stderr ================================================ Impure panic! ================================================ FILE: compiler/tests/run-error/infinite-to-int.arret ================================================ (import [stdlib base]) (import [stdlib test]) (defn main! () ->! () (black-box! (int (black-box! ##-Inf))) ()) ================================================ FILE: compiler/tests/run-error/infinite-to-int.stderr ================================================ Float value `-inf` is infinite; cannot convert to Int ================================================ FILE: compiler/tests/run-error/nan-to-int.arret ================================================ (import [stdlib base]) (import [stdlib test]) (defn main! () ->! () (black-box! (int (black-box! ##NaN))) ()) ================================================ FILE: compiler/tests/run-error/nan-to-int.stderr ================================================ Float value `NaN` is not a number; cannot convert to Int ================================================ FILE: compiler/tests/run-error/overflow-add.arret ================================================ (import [stdlib base]) (import [stdlib test]) (def maximum-int 9223372036854775807) (defn main! () ->! () (black-box! (+ (black-box! maximum-int) 1)) ()) ================================================ FILE: compiler/tests/run-error/overflow-add.stderr ================================================ attempt to add with overflow ================================================ FILE: compiler/tests/run-error/overflow-multiply.arret ================================================ (import [stdlib base]) (import [stdlib test]) (def maximum-int 9223372036854775807) (defn main! () ->! () (black-box! (* (black-box! maximum-int) 2)) ()) ================================================ FILE: compiler/tests/run-error/overflow-multiply.stderr ================================================ attempt to multiply with overflow ================================================ FILE: compiler/tests/run-error/overflow-quot.arret ================================================ (import [stdlib base]) (import [stdlib test]) (def minimum-int -9223372036854775808) (defn main! () ->! () (black-box! (quot (black-box! minimum-int) -1)) ()) ================================================ FILE: compiler/tests/run-error/overflow-quot.stderr ================================================ division by zero ================================================ FILE: compiler/tests/run-error/overflow-subtract.arret ================================================ (import [stdlib base]) (import [stdlib test]) (def minimum-int -9223372036854775808) (defn main! () ->! () (black-box! (- (black-box! minimum-int) 1)) ()) ================================================ FILE: compiler/tests/run-error/overflow-subtract.stderr ================================================ attempt to subtract with overflow ================================================ FILE: compiler/tests/run-error/pure-panic.arret ================================================ (import [stdlib base]) (import [stdlib test]) (defn main! () ->! () (panic "Pure " \p \a (black-box \n) (black-box \i) \c (black-box \!))) ================================================ FILE: compiler/tests/run-error/pure-panic.stderr ================================================ Pure panic! ================================================ FILE: compiler/tests/run-error/quot-by-zero.arret ================================================ (import [stdlib base]) (import [stdlib test]) (defn main! () ->! () (black-box! (quot 1 (black-box! 0))) ()) ================================================ FILE: compiler/tests/run-error/quot-by-zero.stderr ================================================ division by zero ================================================ FILE: compiler/tests/run-error/rem-by-zero.arret ================================================ (import [stdlib base]) (import [stdlib test]) (defn main! () ->! () (black-box! (rem 1 (black-box! 0))) ()) ================================================ FILE: compiler/tests/run-error/rem-by-zero.stderr ================================================ division by zero ================================================ FILE: compiler/tests/run-pass/application.arret ================================================ (import [stdlib base]) (import [stdlib test]) ; Applying a fun with correct polymorphic purity inside a pure context (def _ (filter zero? '(0 1 2 3))) (defn take-exactly-three (_ _ _) false) (defn return-rest (& x) x) (defn invert-pred #{[->_ ->!] T} ([pred (T ->_ Bool)] [input T]) ->_ Bool (false? (pred input))) (defn wrapped-every? #{[->_ ->!] T} ([pred (T ->_ Bool)] [lst (List & T)]) ->_ Bool ; This is forcing `(every?)` to have the same polymorphic purity as the outer function (every? pred lst)) (defn main! () ->! () ; Stress test various ways of passing arguments (assert-eq! false (take-exactly-three 1 2 3)) (assert-eq! false (take-exactly-three & '(1 2 3))) (assert-eq! false (take-exactly-three 1 2 & '(3))) ; Make sure we can figure out this is a `(Listof Int)` (let [l (concat & '((1 2 3) (4 5)))] (assert-eq! '(1 2 3 4 5) l) (ann l (List & Int))) ; Make sure we select the return type correctly (let [[ret-str Str] (identity "Hello polymorphism!")] (assert-eq! "Hello polymorphism!" ret-str)) ; Make sure we can return our rest argument (let [[rest-list (List & Any)] (return-rest 1 2 3)] (assert-eq! '(1 2 3) rest-list)) ; Treating functions as first-class values (assert-eq! '(1 2 3) ((black-box return-rest) 1 2 3)) ; Make sure we can apply functions with polymorphic purity (assert-eq! false (invert-pred int? 5)) ; Polymorphic purity Rust fun apply inside a polymorphic purity Arret fun (assert-eq! true (wrapped-every? (fn (_) true) '(1 2 3)))) ================================================ FILE: compiler/tests/run-pass/binding.arret ================================================ (import [stdlib base]) ; Ensure values can be passed through a multi-binding let (def [_ :original] (let [x :original y x z y] z)) ; We should be able to infer the type for rest bindings (def (& x) '(1 2 3)) (def [_ (List & Int)] x) (defn main! ()) ================================================ FILE: compiler/tests/run-pass/bitwise.arret ================================================ (import [stdlib base]) (import [stdlib test]) (defn test-bit-and! () ->! () (assert-eq! 8 (bit-and 12 9)) (assert-eq! 4 (bit-and (black-box! 12) 21)) (assert-eq! 0 ((black-box! bit-and) 9 12 21))) (defn test-bit-or! () ->! () (assert-eq! 13 (bit-or 12 9)) (assert-eq! 29 (bit-or 12 (black-box! 21))) (assert-eq! 29 (bit-or (black-box! 9) 12 (black-box! 21)))) (defn test-bit-xor! () ->! () (assert-eq! 5 (bit-xor 12 9)) (assert-eq! 25 (bit-xor (black-box! 12) 21)) (assert-eq! 16 (bit-xor (black-box! 9) (black-box! 12) (black-box! 21)))) (defn test-bit-not! () ->! () (assert-eq! -13 (bit-not 12)) (assert-eq! 21 (bit-not (black-box! -22)))) (defn test-bit-shift-left! () ->! () (assert-eq! 24 (bit-shift-left 12 1)) (assert-eq! 84 (bit-shift-left 21 (black-box! 2)))) (defn test-bit-shift-right! () ->! () (assert-eq! 6 (bit-shift-right 12 1)) (assert-eq! -6 (bit-shift-right (black-box! -22) 2))) (defn test-unsigned-bit-shift-right! () ->! () (assert-eq! 6 (unsigned-bit-shift-right 12 1)) (assert-eq! 4611686018427387898 (unsigned-bit-shift-right (black-box! -22) 2))) (defn main! () ->! () (test-bit-and!) (test-bit-or!) (test-bit-xor!) (test-bit-not!) (test-bit-shift-left!) (test-bit-shift-right!) (test-unsigned-bit-shift-right!)) ================================================ FILE: compiler/tests/run-pass/closure-typing.arret ================================================ (import [stdlib base]) (defn direct-required-type () (let [[closure (Sym -> Sym)] (fn (x) x)] (ann closure (Sym -> Sym)))) (defn union-required-type () (let [[closure (U false (Sym -> Sym))] (fn (x) x)] (ann closure (Sym -> Sym)))) (defn main! ()) ================================================ FILE: compiler/tests/run-pass/closure.arret ================================================ (import [stdlib base]) (import [stdlib test]) ; This is a monomorphic, fixed arity form of `(constantly)` (defn return-const (x) (fn () x)) ; This would have an inline record struct captures (defn return-two-int-values ([one Int] [two Int]) -> (-> (List & Int)) (fn () (list one two))) ; This would have a external record struct captures (defn return-four-int-values ([one Int] [two Int] [three Int] [four Int]) -> (-> (List & Int)) (fn () (list one two three four))) ; This recaptures a closure pointing to a value that's fallen out of scope (defn recapture-inner-temporary! () ->! () (black-box! (let [captures-inner (let [inner (black-box! 4)] (fn () inner))] (fn () captures-inner))) ()) (defn main! () ->! () (assert-eq! true (fn? (black-box! return-const))) (assert-eq! 1 ((return-const 1))) (assert-eq! 2 (((black-box! return-const) 2))) (assert-eq! 3 ((return-const (black-box! 3)))) (assert-eq! 4 (((black-box! return-const) (black-box! 4)))) (assert-eq! true (fn? (black-box! constantly))) (assert-eq! 1 ((constantly 1))) (assert-eq! 2 (((black-box! constantly) 2))) (assert-eq! 3 ((constantly (black-box! 3)))) (assert-eq! 4 (((black-box! constantly) (black-box! 4)))) (assert-eq! '(123 456) ((black-box! (return-two-int-values (black-box! 123) (black-box! 456))))) (assert-eq! '(1 2 3 4) ((black-box! (return-four-int-values (black-box! 1) (black-box! 2) (black-box! 3) (black-box! 4))))) (recapture-inner-temporary!)) ================================================ FILE: compiler/tests/run-pass/comments.arret ================================================ (import [stdlib base]) (import [stdlib test]) ; This is a line comment #_"This is an ignored form" (defn main! () ->! () (assert-eq! '() (comment this is a macro that discards its body))) ================================================ FILE: compiler/tests/run-pass/conditionals.arret ================================================ (import [stdlib base]) (import [stdlib test]) (defn pos-neg-or-zero ([n Int]) -> (U 'positive 'negative 'zero) (cond (< n 0) 'negative (> n 0) 'positive :else 'zero)) (defn test-cond! () ->! () (assert-eq! 'positive (pos-neg-or-zero 5)) (assert-eq! 'negative (pos-neg-or-zero -1)) (assert-eq! 'zero (pos-neg-or-zero 0)) (assert-eq! () (cond))) (defn main! () ->! () (test-cond!)) ================================================ FILE: compiler/tests/run-pass/divergence.arret ================================================ (import [stdlib base]) (import [stdlib test]) ; This needs a `(do)` wrapper otherwise reverse type propagation will complain about `expr` not ; returning `(U)` before fully evaluating it. Real code shouldn't be expecting `(U)` so it would be ; unaffected. (defmacro assert-diverges (macro-rules [(expr) (ann (do expr '()) (U))] )) ; This isn't `(main!)` to make sure it doesn't acually run (defn compile-only ([x Bool]) ; Direct divergence (assert-diverges (panic "Hello")) ; Divergence within a `(do)` (assert-diverges (do (panic "(do)") 1 2)) ; Divergence within `(if)` branches (assert-diverges (if x (do (panic "Left") 'left) (do (panic "Right") 'right))) ; Divergence within `(if)` condition (assert-diverges (if (panic "Test") 'left 'right)) (assert-diverges (if (do (panic "Test") true) 'left 'right)) (assert-diverges (if (do (panic "Test") false) 'left 'right)) (ann (if true 'left (panic "Test")) 'left) (ann (if false (panic "Test") 'right) 'right) (ann (if x 'other (panic "Test")) 'other) ; Divergence within normal application (assert-diverges (length (panic "Normal apply"))) ; Divergence within type predicate (assert-diverges (str? (panic "Type predicate apply"))) ; Divergence within equality predicate (assert-diverges (= true (panic "Type predicate apply"))) ; Divergence within `(let)` (assert-diverges (let [_ (panic "(let)")]))) (defn always-panics () -> (U) (panic "This panics!")) (defn panics-inside-let () -> Int (let [_ (panic "This panics!")]) 5) (defn panics-inside-cond () -> Int ; This is tricky because our typeck doesn't know this is constant but `eval_hir` does (if (= 0 1) () (panic "HERE")) 5) (defn panics-inside-app () -> (List & Any) (list (panic "HERE") 2 3)) (defn main! () ->! () ; Boxing these should not panic us (black-box! panic) (black-box! always-panics) (black-box! panics-inside-let) (black-box! panics-inside-cond) (black-box! panics-inside-app) ()) ================================================ FILE: compiler/tests/run-pass/empty.arret ================================================ (import [stdlib base]) (defn main! ()) ================================================ FILE: compiler/tests/run-pass/equality.arret ================================================ (import [stdlib base]) (import [stdlib test]) (defn arg-is-self-equal (arg) -> Bool ; This is very tempting to simpify to `true` but that's not valid for NaN (= arg arg)) (defn num-arg-is-nan? #{[N Num]} ([arg N]) -> Bool (and (float? arg) (not= arg arg))) (defn test-int-equality! () ->! () (assert-eq! (black-box! 1) (black-box! 1)) (assert-ne! (black-box! 1) (black-box! 2))) (defn test-bool-equality! () ->! () (assert-eq! (black-box! true) (black-box! true)) (assert-ne! (black-box! true) (black-box! false)) (assert-eq! (black-box! false) (black-box! false))) (defn test-sym-equality! () ->! () (assert-eq! (black-box-untyped! 'inline1) (black-box-untyped! 'inline1)) (assert-ne! (black-box-untyped! 'inline1) (black-box-untyped! 'inline2)) (assert-eq! (black-box-untyped! 'definitely-out-of-line1) (black-box-untyped! 'definitely-out-of-line1)) (assert-ne! (black-box-untyped! 'definitely-out-of-line1) (black-box-untyped! 'definitely-out-of-line2))) (defn test-list-equality! () ->! () (assert-eq! (black-box! ()) (black-box! ())) (assert-ne! (black-box! ()) (black-box! '(1 2 3))) (assert-eq! true ((black-box! =) (black-box! ()) (black-box! ()))) ; Same variable list containing NaN (let [nan-list (black-box! '(##NaN))] (assert-ne! nan-list nan-list))) (defn test-nan-equality! () ->! () (assert-eq! false (= ##NaN ##NaN)) (assert-eq! false ((black-box! =) ##NaN ##NaN)) (assert-eq! false (= (black-box! ##NaN) (black-box! ##NaN))) (assert-eq! false ((black-box! =) (black-box! ##NaN) (black-box! ##NaN))) (assert-eq! false (arg-is-self-equal (black-box! ##NaN))) (assert-eq! true (num-arg-is-nan? (black-box! ##NaN)))) (defn test-float-zero-equality! () ->! () (assert-eq! true (= 0.0 -0.0)) (assert-eq! true ((black-box! =) -0.0 0.0)) (assert-eq! true (= (black-box! 0.0) (black-box! -0.0))) (assert-eq! true ((black-box! =) (black-box! -0.0) (black-box! 0.0)))) (defn test-char-equality! () ->! () (assert-eq! (black-box! \space) (black-box! \space)) (assert-eq! \space (black-box! \space)) (assert-ne! (black-box! \newline) \space) (assert-ne! (black-box! \newline) (black-box! \space))) ; Make sure functions never compare equal (defn test-fn-equality! () ->! () ; Synthetic fun (assert-ne! = =) ; Rust fun (assert-ne! + +) ; Self fun (assert-ne! test-fn-equality! test-fn-equality!) ; Arret fun (assert-ne! test-sym-equality! test-sym-equality!) ; Same variable fun (let [anon-fun (black-box! (fn ()))] (assert-ne! anon-fun anon-fun)) (assert-eq! false ((black-box! =) = =)) (assert-eq! false ((black-box! =) + +)) (assert-eq! false ((black-box! =) test-fn-equality! test-fn-equality!)) (assert-eq! false ((black-box! =) test-sym-equality! test-sym-equality!))) (defn test-empty-record-equality! () ->! () (letrecord [EmptyOne (empty-one) EmptyTwo (empty-two)] (assert-eq! (empty-one) (empty-one)) (assert-ne! (empty-one) (empty-two)) (assert-eq! (empty-two) (empty-two)) (assert-eq! (empty-one) (black-box! (empty-one))) (assert-ne! (empty-two) (black-box! (empty-one))))) (defn test-dynamic-record-equality! () ->! () (letrecord [OneField (one-field [one Int])] (assert-ne! (one-field (black-box! 1)) (one-field (black-box! 2))) (assert-eq! (one-field (black-box! 1)) (one-field (black-box! 1))))) (defn main! () ->! () (test-int-equality!) (test-bool-equality!) (test-sym-equality!) (test-list-equality!) (test-nan-equality!) (test-float-zero-equality!) (test-char-equality!) (test-fn-equality!) (test-empty-record-equality!) (test-dynamic-record-equality!)) ================================================ FILE: compiler/tests/run-pass/hash.arret ================================================ (import [stdlib base]) (import [stdlib test]) (defn assert-eq-hash! ([left Any] [right Any]) ->! () (assert-eq! (hash left) (hash right)) (assert-eq! (hash (black-box! left)) (hash right)) (assert-eq! (hash (black-box! left)) (hash (black-box! right)))) (defn main! () ->! () ; This would be catastrophic for performance (assert-ne! (hash true) (hash false)) (assert-ne! (hash false) (hash ())) (assert-eq-hash! true true) (assert-eq-hash! false false) (assert-eq-hash! -1 -1) (assert-eq-hash! 0 0) (assert-eq-hash! 8 8) (assert-eq-hash! 12.0 12.0) (assert-eq-hash! ##Inf ##Inf) (assert-eq-hash! ##-Inf ##-Inf) ; Zero and negative zero are `=` (assert-eq-hash! 0.0 -0.0) (assert-eq-hash! 'small 'small) (assert-eq-hash! 'very-long-out-of-line-symbol 'very-long-out-of-line-symbol) (assert-eq-hash! "Small" "Small") (assert-eq-hash! "Very long out-of-line string" "Very long out-of-line string") (assert-eq-hash! () ()) (assert-eq-hash! '(1 2 3) '(1 2 3)) (assert-eq-hash! [] []) (assert-eq-hash! [one two three] [one two three]) (assert-eq-hash! \newline \newline) (assert-eq-hash! \tab \tab) (letrecord [Record (record [field Int])] (let [twelve-record (record 12)] (assert-eq-hash! twelve-record twelve-record))) ; Functions & NaN don't have a defined hash equality - don't test them ) ================================================ FILE: compiler/tests/run-pass/list.arret ================================================ (import [stdlib base]) (import [stdlib test]) (defn test-length! () ->! () (assert-eq! 0 (length ())) (assert-eq! 4 (length '(1 2 3 4))) (assert-eq! 4 (length (black-box! '(1 2 3 4)))) (assert-eq! 3 (length (list 1 2 3)))) (defn test-first-second-rest! () ->! () (assert-eq! 'one (ann (first '(one two three)) 'one)) (assert-eq! 'two (ann (second '(one two three)) 'two)) (assert-eq! '(two three) (ann (rest '(one two three)) (List & Sym))) (assert-eq! () (ann (rest '(one)) '()))) (defn test-cons! () ->! () (assert-eq! '(1) (ann (cons 1 '()) (List Int))) (assert-eq! '((1 2) 3) (cons '(1 2) '(3)))) (defn test-map! () ->! () (assert-eq! '() (ann (map (fn (x) x) '()) '())) (assert-eq! '(1 2 3 4 5) (map (fn (x) x) '(1 2 3 4 5))) (assert-eq! '(0 1 2 3) (map #(length %) '(() (1) (1 2) (1 2 3)))) (assert-eq! '(4 4 4) (map (constantly 4) '(1 2 3))) (assert-eq! '(() () ()) (map (constantly ()) '(1 2 3))) (assert-eq! '(true false true) (map #(= % "yes") '("yes" "no" "yes"))) (assert-eq! '(false true false) (map #(not= % "yes") '("yes" "no" "yes"))) ; This should be safe because `panic` won't be called (map panic '())) (defn test-filter! () ->! () (assert-eq! '() (filter (constantly true) '())) (assert-eq! '() (filter (constantly false) '())) (assert-eq! '(1 2 3) (filter (constantly true) '(1 2 3))) (assert-eq! '() (filter (constantly false) '(1 2 3))) (assert-eq! '(true true) (filter identity '(true false true false))) (assert-eq! '(3) (filter #(= % 3) '(1 2 3 4)))) (defn test-some? () ->! () (assert-eq! false (some? (constantly true) '())) (assert-eq! false (some? (constantly false) '())) (assert-eq! true (some? (constantly true) '(1 2 3))) (assert-eq! false (some? (constantly false) '(1 2 3))) (assert-eq! true (some? identity '(true false true false))) (assert-eq! true (some? #(= % 3) '(1 2 3 4)))) (defn test-fold! () ->! () (assert-eq! 7 (fold + 1 '(1 2 3))) (assert-eq! 5.0 (fold + -1.0 '(1.0 2.0 3.0)))) (defn test-every? () ->! () (assert-eq! true (every? int? '())) (assert-eq! true (every? int? '(1))) (assert-eq! true (every? int? '(1 2))) (assert-eq! false (every? int? '(1.0 2))) (assert-eq! false (every? int? '(1 2.0)))) (defn test-concat! () ->! () (assert-eq! '() (ann (concat) '())) (assert-eq! '(1 2 3) (concat '(1 2 3))) (assert-eq! '(1 2 3 4 5 6) (concat '(1 2 3) '() '(4 5 6)))) (defn test-member! () ->! () (assert-eq! false (member? 1 '())) (assert-eq! true (member? 1 '(1 2 3))) (assert-eq! true (member? 1 '(3 2 1))) (assert-eq! false (member? 1 '(4 5 6))) (assert-eq! false (member? ##NaN '(1 2 ##NaN)))) (defn test-threading! () ->! () (assert-eq! '(3 5 7 9) (->> '(0 1 2 3 4) ; Make sure these are run in the correct order (map #(* % 2)) (filter #(not (zero? %))) (map #(+ % 1))))) (defn test-take! () ->! () (assert-eq! '() (take -1 '(1 2 3))) (assert-eq! '() (take 0 '(1 2 3))) (assert-eq! '(1) (take 1 '(1 2 3))) (assert-eq! '(1 2) (take 2 '(1 2 3))) (assert-eq! '(1 2 3) (take 3 '(1 2 3))) (assert-eq! '(1 2 3) (take 4 '(1 2 3)))) (defn test-drop! () ->! () (assert-eq! '(1 2 3) (drop -1 '(1 2 3))) (assert-eq! '(1 2 3) (drop 0 '(1 2 3))) (assert-eq! '(2 3) (drop 1 '(1 2 3))) (assert-eq! '(3) (drop 2 '(1 2 3))) (assert-eq! '() (drop 3 '(1 2 3))) (assert-eq! '() (drop 4 '(1 2 3)))) (defn test-drop-last! () ->! () (assert-eq! '(1 2 3) (drop-last -1 '(1 2 3))) (assert-eq! '(1 2 3) (drop-last 0 '(1 2 3))) (assert-eq! '(1 2) (drop-last 1 '(1 2 3))) (assert-eq! '(1) (drop-last 2 '(1 2 3))) (assert-eq! '() (drop-last 3 '(1 2 3))) (assert-eq! '() (drop-last 4 '(1 2 3)))) (defn test-reverse! () ->! () (assert-eq! '() (reverse '())) (assert-eq! '(3 2 1) (reverse '(1 2 3)))) (defn test-nth! () ->! () (assert-eq! 1 (nth '(1 2 3) 0)) (assert-eq! 2 (nth '(1 2 3) 1)) (assert-eq! 3 (nth '(1 2 3) 2))) (defn test-repeat! () ->! () (assert-eq! '() (repeat -1 false)) (assert-eq! '() (repeat 0 true)) (assert-eq! '(one one one) (repeat 3 'one))) (defn main! () ->! () (test-length!) (test-first-second-rest!) (test-cons!) (test-map!) (test-filter!) (test-some?) (test-fold!) (test-concat!) (test-member!) (test-threading!) (test-take!) (test-drop!) (test-drop-last!) (test-reverse!) (test-nth!) (test-repeat!)) ================================================ FILE: compiler/tests/run-pass/macros.arret ================================================ (import [stdlib base]) (import [stdlib test]) ; Make sure _ discards the macro (defmacro _ (macro-rules)) (defmacro _ (macro-rules)) (defn main! () ->! () (letmacro [return-one (macro-rules [() 'one])] (assert-eq! (return-one) 'one)) (letmacro [identity (macro-rules [(x) x])] (assert-eq! (identity 'one) 'one)) (letmacro [swap (macro-rules [(x y) '(y x)])] (assert-eq! (swap one two) '(two one))) (letmacro [for (macro-rules [(x :in y) [x y]])] (assert-eq! (for two :in one) [two one])) (letmacro [return-ellipsis (macro-rules [() '(... ...)])] (assert-eq! (return-ellipsis) '...)) (letmacro [list-third (macro-rules [(_ _ x) x])] (assert-eq! (list-third 'one 'two 'three) 'three)) (letmacro [vector-second (macro-rules [([_ x _]) x])] (assert-eq! (vector-second ['one 'two 'three]) 'two)) (letmacro [recurse (macro-rules [() 'end] [(_) (recurse)])] (assert-eq! (recurse 1) 'end)) (letmacro [empty-set? (macro-rules [(#{}) true] [(#{_ ...}) false])] (assert-eq! (empty-set? #{}) true) (assert-eq! (empty-set? #{one}) false)) (letmacro [set->list (macro-rules [(#{v ...}) '(v ...)])] (assert-eq! (set->list #{one two three}) '(one two three))) (letmacro [num->name (macro-rules [(1) 'one-int] [(2) 'two-int] [(3) 'three-int] [(1.0) 'one-float] [(2.0) 'two-float] [(3.0) 'three-float] [(##NaN) 'not-a-number] [(_) 'no-match])] (assert-eq! (num->name 1) 'one-int) (assert-eq! (num->name 2) 'two-int) (assert-eq! (num->name 3) 'three-int) (assert-eq! (num->name 1.0) 'one-float) (assert-eq! (num->name 2.0) 'two-float) (assert-eq! (num->name 3.0) 'three-float) ; NaNs never match (assert-eq! (num->name ##NaN) 'no-match)) (letmacro [return-all (macro-rules [(values ...) '(values ...)])] (assert-eq! (return-all one two three) '(one two three))) (letmacro [mid (macro-rules [([_ vals ... _]) [true vals ... false]])] (assert-eq! (mid [one two three four]) [true two three false])) (letmacro [combine-lists (macro-rules [((l ...) (r ...)) [r ... l ...]])] (assert-eq! (combine-lists (one two) (three four)) [three four one two])) (letmacro [nested-patterns (macro-rules [((a b rest ...) ...) [(rest ... b a) ...]])] (assert-eq! (nested-patterns (one two three four) (five six)) [(three four two one) (six five)])) (letmacro [rebind-same-ident (macro-rules [() (quote (quote 2))])] (assert-eq! (rebind-same-ident) ''2)) ; This ensures that unbound idents (`x`) are hygienic (letmacro [set-x-to-inner (macro-rules [(body) (let [x :inner] body)])] (assert-eq! :outer (let [x :outer] (set-x-to-inner x)))) ; This ensures the bound idents (`+`) are hygenic (letmacro [set-+-to-* (macro-rules [(body) (let [+ *] body)])] (assert-eq! 2 (set-+-to-* (+ 1 1)))) ; Use the `(... ident)` literal syntax ; This uses `&` as it's a common candidate for a literal it could be an arbitrary symbol (letmacro [match-literal-& (macro-rules [((... &)) '(... literal-ampersand)] [(&) '(... var-ampersand)])] (assert-eq! 'literal-ampersand (match-literal-& &)) (assert-eq! 'var-ampersand (match-literal-& 1)))) ================================================ FILE: compiler/tests/run-pass/math.arret ================================================ (import [stdlib base]) (import [stdlib test]) (defn test-add! () ->! () (assert-eq! 4 (+ 4)) (assert-eq! 7 (+ 4 (black-box! 3))) (assert-eq! 7.0 (+ (black-box! 4.0) 3)) (assert-eq! 7.0 (+ (black-box! 4) (black-box! 3.0))) (assert-eq! 7.0 (+ 4.0 3.0)) (assert-eq! true (nan? (+ ##NaN))) (assert-eq! true (nan? (+ 1.0 (black-box! ##NaN))))) (defn test-mul! () ->! () (assert-eq! 4 (* (black-box! 4))) (assert-eq! 12 (* (black-box! 4) 3)) (assert-eq! 12.0 (* (black-box! 4.0) (black-box! 3))) (assert-eq! 12.0 (* 4 3.0)) (assert-eq! 12.0 (* 4.0 3.0)) (assert-eq! true (nan? (* (black-box! ##NaN)))) (assert-eq! true (nan? (* 1.0 ##NaN)))) (defn test-sub! () ->! () (assert-eq! -3.0 (- 3.0)) (assert-eq! 3 (- (black-box! -3))) (assert-eq! 4 (- (black-box! 7) 3)) (assert-eq! 4.0 (- 7 (black-box! 3.0))) (assert-eq! 4.0 (- (black-box! 7.0) (black-box! 3))) (assert-eq! 4.0 (- 7.0 3.0)) (assert-eq! true (nan? (- (black-box! ##NaN)))) (assert-eq! true (nan? (- 1.0 ##NaN)))) (defn test-div! () ->! () (assert-eq! 1.0 (/ 1.0)) (assert-eq! 0.5 (/ (black-box! 2.0))) (assert-eq! 0.25 (/ 1.0 (black-box! 2.0) 2.0)) (assert-eq! true (nan? (/ ##NaN))) (assert-eq! true (nan? (/ (black-box! 1.0) ##NaN)))) (defn test-inc-dec! () ->! () (assert-eq! -1 (inc -2)) (assert-eq! 0 (inc (black-box! -1))) (assert-eq! 1 (inc 0)) (assert-eq! 2 (inc (black-box! 1))) (assert-eq! -2 (dec (black-box! -1))) (assert-eq! -1 (dec 0)) (assert-eq! 0 (dec (black-box! 1))) (assert-eq! 1 (dec 2))) (defn test-rem-mod! () ->! () (assert-eq! 3 (quot 10 (black-box! 3))) (assert-eq! 1 (rem (black-box! 10) 3)) (assert-eq! 3 (quot (black-box! 11) (black-box! 3))) (assert-eq! 2 (rem 11 3)) (assert-eq! 3 (quot 11 (black-box! 3))) (assert-eq! 2 (rem (black-box! 11) 3)) (assert-eq! -3 (quot (black-box! -10) (black-box! 3))) (assert-eq! -1 (rem -10 3))) (defn test-sqrt! () ->! () (assert-eq! 0.0 (sqrt 0.0)) (assert-eq! 0.0 (sqrt (black-box! 0.0))) (assert-eq! 1.0 (sqrt 1.0)) (assert-eq! 1.0 (sqrt (black-box! 1.0))) (assert-eq! 2.0 (sqrt 4.0)) (assert-eq! 4.0 (black-box! (sqrt 16.0))) (assert-eq! true (nan? (sqrt ##-Inf))) (assert-eq! true (nan? (sqrt (black-box! ##-Inf)))) (assert-eq! true (nan? (sqrt -1.0))) (assert-eq! true (nan? (sqrt (black-box! -100.0)))) (assert-eq! ##Inf (sqrt ##Inf)) (assert-eq! ##Inf (sqrt (black-box! ##Inf)))) (defn main! () ->! () (test-add!) (test-mul!) (test-sub!) (test-div!) (test-inc-dec!) (test-rem-mod!) (test-sqrt!)) ================================================ FILE: compiler/tests/run-pass/number.arret ================================================ (import [stdlib base]) (import [stdlib test]) (defn test-zero? () ->! () (assert-eq! true (zero? 0)) (assert-eq! true (zero? (black-box! 0.0))) (assert-eq! false (zero? ##NaN)) (assert-eq! false (zero? 10)) (assert-eq! false (zero? (black-box! 10.0))) (assert-eq! false (zero? (black-box! ##Inf))) (assert-eq! false (zero? ##-Inf))) (defn test-pos? () ->! () (assert-eq! false (pos? 0)) (assert-eq! false (pos? (black-box! 0.0))) (assert-eq! false (pos? ##NaN)) (assert-eq! true (pos? 10)) (assert-eq! false (pos? (black-box! -10.0))) (assert-eq! true (pos? (black-box! ##Inf))) (assert-eq! false (pos? ##-Inf))) (defn test-neg? () ->! () (assert-eq! false (neg? 0)) (assert-eq! false (neg? (black-box! 0.0))) (assert-eq! false (neg? ##NaN)) (assert-eq! false (neg? 10)) (assert-eq! true (neg? (black-box! -10.0))) (assert-eq! false (neg? (black-box! ##Inf))) (assert-eq! true (neg? ##-Inf))) (defn test-nan? () ->! () (assert-eq! false (nan? 10.0)) (assert-eq! true (nan? (black-box! ##NaN))) (assert-eq! true (nan? ##NaN)) (assert-eq! false (nan? ##Inf)) (assert-eq! false (nan? (black-box! ##-Inf)))) (defn test-infinite? () ->! () (assert-eq! false (infinite? 3.0)) (assert-eq! true (infinite? (black-box! ##Inf))) (assert-eq! true (infinite? ##-Inf)) (assert-eq! false (infinite? ##NaN))) (defn test-float! () ->! () (assert-eq! 1.0 (float 1)) (assert-eq! 0.0 (float (black-box! 0))) (assert-eq! -1.0 (float -1)) (assert-eq! 1.0 (float (black-box! 1.0))) (assert-eq! 0.0 (float 0.0)) (assert-eq! -1.0 (float -1.0))) (defn test-int! () ->! () (assert-eq! 1 (int (black-box! 1))) (assert-eq! 0 (int 0)) (assert-eq! -1 (int -1)) (assert-eq! 1 (int 1.0)) (assert-eq! 0 (int (black-box! 0.0))) (assert-eq! -1 (int -1.0))) (defn test-comparisons! () ->! () ; These are all always true (assert-eq! true (< -1)) (assert-eq! true (<= (black-box! 0))) (assert-eq! true (== 1.0)) (assert-eq! true (> (black-box! -1.0))) (assert-eq! true (>= ##NaN)) ; NaNs always compare false (assert-eq! false (< ##NaN (black-box! ##NaN))) (assert-eq! false (<= (black-box! ##NaN) ##NaN)) (assert-eq! false (== (black-box! ##NaN) (black-box! ##NaN))) (assert-eq! false (> ##NaN ##NaN)) (assert-eq! false (>= ##NaN ##NaN)) (assert-eq! true (< 1 2.0 (black-box! 3))) (assert-eq! true (<= 1 (black-box! 2.0) 3)) (assert-eq! false (== 1 (black-box! 2.0) (black-box! 3))) (assert-eq! false (> (black-box! 1) 2.0 3)) (assert-eq! false (>= (black-box! 1) 2.0 (black-box! 3))) (assert-eq! false (< 1 (black-box! 2) (black-box! 2.0))) (assert-eq! true (<= (black-box! 1) (black-box! 2) (black-box! 2.0))) (assert-eq! false (== 1 2 2.0)) (assert-eq! false (> 1 2 (black-box! 2.0))) (assert-eq! false (>= 1 (black-box! 2) 2.0)) (assert-eq! false (< (black-box! 1) (black-box! 1.0))) (assert-eq! true (<= 1 (black-box! 1.0))) (assert-eq! true (== (black-box! 1) 1.0)) (assert-eq! false (> (black-box! 1) (black-box! 1.0))) (assert-eq! true (>= 1 1.0)) (assert-eq! false (< ##Inf (black-box! ##-Inf))) (assert-eq! false (<= (black-box! ##Inf) ##-Inf)) (assert-eq! false (== (black-box! ##Inf) (black-box! ##-Inf))) (assert-eq! true (> ##Inf (black-box! ##-Inf))) (assert-eq! true (>= (black-box! ##Inf) ##-Inf)) (assert-eq! false (< ##Inf (black-box! ##-Inf) (black-box! ##-Inf))) (assert-eq! false (<= (black-box! ##Inf) ##-Inf ##-Inf)) (assert-eq! false (== (black-box! ##Inf) (black-box! ##-Inf) (black-box! ##-Inf))) (assert-eq! false (> ##Inf ##-Inf (black-box! ##-Inf))) (assert-eq! true (>= ##Inf ##-Inf ##-Inf))) (defn test-even-odd! () ->! () (assert-eq! false (even? -1)) (assert-eq! true (odd? -1)) (assert-eq! true (even? 0)) (assert-eq! false (odd? 0)) (assert-eq! false (even? 1)) (assert-eq! true (odd? 1)) (assert-eq! true (even? 2)) (assert-eq! false (odd? 2))) (defn test-min-max! () ->! () (assert-eq! 0 (min 0)) (assert-eq! 0 (max 0)) (assert-eq! true (nan? (min ##NaN))) (assert-eq! true (nan? (max ##NaN))) (assert-eq! true (nan? (float (min ##NaN 0)))) (assert-eq! true (nan? (float (min 0 ##NaN)))) (assert-eq! true (nan? (float (max ##NaN 0)))) (assert-eq! true (nan? (float (max 0 ##NaN)))) (assert-eq! 1 (min 1 2 3)) (assert-eq! 3 (max 1 2 3)) (assert-eq! 1.0 (min 3.0 2 1.0)) (assert-eq! 3.0 (max 3.0 2 1.0)) (assert-eq! ##-Inf (min ##-Inf ##Inf)) (assert-eq! ##Inf (max ##-Inf ##Inf))) (defn main! () ->! () (test-zero?) (test-pos?) (test-neg?) (test-nan?) (test-infinite?) (test-float!) (test-int!) (test-comparisons!) (test-even-odd!) (test-min-max!)) ================================================ FILE: compiler/tests/run-pass/occurrence-typing.arret ================================================ (import [stdlib base]) (defn test-type-in-branches ([test Bool]) (if test (ann test true) (ann test false))) (defn trivial-type-predicate ([input (U Sym Str)]) (if (sym? input) (ann input Sym) (ann input Str))) (defn non-literal-equality-predicate ([input (U Int Float)]) (if (= input 123) (ann input Int) (ann input (U Int Float)))) (defn literal-equality-predicate ([input (U 'foo 'bar)]) (if (= input 'foo) (ann input 'foo) (ann input 'bar))) (defn two-var-equality-predicate ([left (U 'foo 'bar)] [right (U 'bar 'baz)]) (when (= left right) (ann left 'bar) (ann right 'bar))) (defn let-preserves-type-information ([input Bool]) (if (let [_ ()] 'foo input) (ann input true) (ann input false))) (defn bool-equality-with-true ([input Num]) (if (= true (int? input)) (ann input Int) (ann input Float)) (if (= (int? input) true) (ann input Int) (ann input Float))) (defn bool-equality-with-false ([input Num]) (if (= false (int? input)) (ann input Float) (ann input Int)) (if (= (int? input) false) (ann input Float) (ann input Int))) (defn same-var-and-typing ([lst (U (List & Any) false)]) -> (U Any false) (if (and (list? lst) (not (nil? lst))) (first lst) false)) (defn two-var-and-typing ([left Num] [right Num]) (when (and (int? left) (float? right)) (ann left Int) (ann right Float))) (defn cond-typing ([input Num]) (cond (int? input) (ann input Int) (float? input) (ann input Float))) (defn or-typing ([input (U Sym Str Int)]) (if (or (sym? input) (str? input)) (ann input (U Sym Str)) (ann input (U Sym Str Int)))) (defn not-typing ([input (U Sym Str)]) (if-not (str? input) (ann input Sym) (ann input Str))) (defn partial-not-typing ([input (U Sym Str)] [other-bool Bool]) ; This doesn't correspond to a logical operation such as `(and)` or `(not)`; it's just tested for ; completeness (when (if (str? input) false other-bool) (ann input Sym))) (defn unbounded-generic-typing #{T} ([input T]) -> T (if (str? input) (ann input Str) input)) (defn bounded-generic-typing #{[T Num]} ([input T]) -> T (if (float? input) (ann input Float) (ann input Int))) (defn main! ()) ================================================ FILE: compiler/tests/run-pass/read.arret ================================================ (import [stdlib base]) (import [stdlib test]) (defn test-read-str! () ->! () (assert-eq! 1 (read-str "1")) (assert-eq! -1.5 (read-str "-1.5")) (assert-eq! ##Inf (read-str "##Inf")) (assert-eq! ##-Inf (read-str "##-Inf")) (let [read-nan (read-str "##NaN")] (assert-eq! true (and (float? read-nan) (nan? read-nan)))) (assert-eq! 'tiny (read-str "tiny")) (assert-eq! 'large-so-it-wont-intern-inline (read-str "large-so-it-wont-intern-inline")) ; The `black-box!` forces us to re-intern these at runtime (assert-eq! 'large-so-it-wont-intern-inline (read-str (black-box! "large-so-it-wont-intern-inline"))) (assert-ne! 'large-so-it-wont-intern-inline (read-str (black-box! "new-large-symbol-that-didnt-appear-at-compile-time"))) (assert-eq! "tiny" (read-str "\"tiny\"")) (assert-eq! "Really quite large so it wont fit inside a cell" (read-str "\"Really quite large so it wont fit inside a cell\"")) (assert-eq! "Really quite large so it wont fit inside a cell" (read-str (black-box! "\"Really quite large so it wont fit inside a cell\""))) (assert-eq! '() (read-str "()")) (assert-eq! '(1 2 3) (read-str "(1 2 3)")) (assert-eq! [] (read-str "[]")) (assert-eq! [one two three] (read-str "[one two three]")) (assert-eq! \newline (read-str "\\newline")) (assert-eq! \λ (read-str "\\λ"))) (defn main! () ->! () (test-read-str!)) ================================================ FILE: compiler/tests/run-pass/record.arret ================================================ (import [stdlib base]) (import [stdlib test]) (defn test-poly-records! () ->! () (letrecord [(Record A) (record [poly A] [int Int] any)] (let [test-record (record 'foo 2 'bar)] (assert-eq! 'foo (ann (record-poly test-record) 'foo)) (assert-eq! 2 (ann (record-int test-record) Int)) (assert-eq! 'bar (ann (record-any test-record) Any)) (assert-eq! 'foo (ann ((black-box! record-poly) test-record) 'foo)) (assert-eq! 2 (ann ((black-box! record-int) test-record) Int)) (assert-eq! 'bar (ann ((black-box! record-any) test-record) Any))))) (defn test-bool-record-fields! () ->! () (letrecord [Record (record [field Bool])] (let [true-record (record true) false-record (record false) black-box-true-record ((black-box! record) true) black-box-false-record ((black-box! record) false)] (assert-eq! false (record-field false-record)) (assert-eq! true (record-field true-record)) (assert-eq! false (record-field black-box-false-record)) (assert-eq! true (record-field black-box-true-record)) (assert-eq! true-record true-record) (assert-eq! true-record black-box-true-record) (assert-ne! true-record false-record) (assert-ne! true-record black-box-false-record) (assert-eq! false ((black-box! record-field) false-record)) (assert-eq! true ((black-box! record-field) true-record))))) (defn test-int-record-fields! () ->! () (letrecord [Record (record [field Int])] (let [twelve-record (record 12)] (assert-eq! 12 (record-field twelve-record)) (assert-eq! 12 (record-field (black-box! twelve-record))) (assert-eq! 12 ((black-box! record-field) twelve-record)) (assert-eq! twelve-record twelve-record) (assert-eq! (black-box! twelve-record) (black-box! twelve-record)) ; Using a pure `(black-box)` tests that we can codegen record constants based on boxed values (assert-eq! (black-box! twelve-record) (black-box twelve-record))))) (defn test-char-record-fields! () ->! () (letrecord [Record (record [field Char])] (let [newline-record (record \newline)] (assert-eq! \newline (record-field newline-record)) (assert-eq! \newline (record-field (black-box! newline-record))) (assert-eq! \newline ((black-box! record-field) newline-record)) (assert-eq! newline-record newline-record) (assert-eq! (black-box! newline-record) (black-box! newline-record))))) (defn test-float-record-fields! () ->! () (letrecord [Record (record [field Float])] (let [fourteen-record (record 14.0)] (assert-eq! 14.0 (record-field fourteen-record)) (assert-eq! 14.0 (record-field (black-box! fourteen-record))) (assert-eq! 14.0 ((black-box! record-field) fourteen-record)) (assert-eq! fourteen-record fourteen-record) (assert-eq! (black-box! fourteen-record) (black-box! fourteen-record))))) (defn test-sym-record-fields! () ->! () (letrecord [Record (record [field Sym])] (let [foo-record (record 'foo)] (assert-eq! 'foo (record-field foo-record)) (assert-eq! 'foo (record-field (black-box! foo-record))) (assert-eq! 'foo (black-box! (record-field (black-box! foo-record)))) (assert-eq! 'foo ((black-box! record-field) foo-record)) (assert-eq! foo-record foo-record) (assert-eq! (black-box! foo-record) (black-box! foo-record))))) (defn test-boxed-record-fields! () ->! () (letrecord [Record (record [field Any])] (let [list-record (record '(1 2 3))] (assert-eq! '(1 2 3) (record-field list-record)) (assert-eq! '(1 2 3) (record-field (black-box! list-record))) (assert-eq! '(1 2 3) ((black-box! record-field) list-record)) (assert-eq! list-record list-record) (assert-eq! (black-box! list-record) (black-box! list-record))))) (defn test-external-record! () ->! () (letrecord [Record (record [first Char] [second Float] [third Int] [fourth Sym] [fifth Bool])] (let [external-record (record \tab 2.0 3 'four true)] (assert-eq! \tab (record-first external-record)) (assert-eq! \tab (record-first (black-box! external-record))) (assert-eq! 2.0 (record-second external-record)) (assert-eq! 2.0 (record-second (black-box! external-record))) (assert-eq! 3 (record-third external-record)) (assert-eq! 3 (record-third (black-box! external-record))) (assert-eq! 'four (record-fourth external-record)) (assert-eq! 'four (record-fourth (black-box! external-record))) (assert-eq! true (record-fifth external-record)) (assert-eq! true (record-fifth (black-box! external-record))) (assert-eq! external-record external-record) (assert-eq! (black-box! external-record) (black-box! external-record)) (assert-eq! (black-box! external-record) (black-box external-record)) ; Force this to be allocated on the heap (let [heap-external-record ((black-box! record) \tab 2.0 (black-box! 3) 'four true)] (assert-eq! external-record heap-external-record))))) ; This makes sure we implement records and first-class functions correctly (defn test-record-function-callbacks! () ->! () (letrecord [Record (record [field Int])] (assert-eq! '(1 2 3) (->> '(1 2 3) (map record) (map record-field))) (assert-eq! '(4 5 6) (->> '(4 5 6) (map (black-box! record)) (map (black-box! record-field)))) (assert-eq! '(7 8 9) (->> (black-box! '(7 8 9)) (map record) (map record-field))) (assert-eq! '(10 11 12) (->> '(10 11 12) ((black-box! map) record) ((black-box! map) record-field))))) (defn main! () ->! () (test-poly-records!) (test-bool-record-fields!) (test-int-record-fields!) (test-char-record-fields!) (test-float-record-fields!) (test-sym-record-fields!) (test-boxed-record-fields!) (test-external-record!) (test-record-function-callbacks!)) ================================================ FILE: compiler/tests/run-pass/recursion.arret ================================================ (import [stdlib base]) (import [stdlib test]) (defn recursive-reverse #{T} ([lst (List & T)]) -> (List & T) (if (nil? lst) lst (concat (recursive-reverse (rest lst)) (list (first lst))))) (defn even-length? ([l (List & Any)]) -> Bool (if (nil? l) true (let [tail (rest l)] (if (nil? tail) false (recur (rest tail)))))) ; This has a polymorphic purity (defn recursive-every? #{T [->_ ->!]} ([pred (T ->_ Bool)] [l (List & T)]) ->_ Bool (if (nil? l) true (if (pred (first l)) (recur pred (rest l)) false))) (defn wrapped-recursive-every? #{[->_ ->!] T} ([pred (T ->_ Bool)] [lst (List & T)]) ->_ Bool ; This is forcing `(recursive-every?)` to have the same polymorphic purity as the outer function (recursive-every? pred lst)) ; This makes sure `(recur)` works properly capturing an outer variable (defn return-infinite-print ([output Str]) -> (->! ()) (fn () ->! () (println! output) (recur))) (defn main! () ->! () (assert-eq! '() ((black-box! recursive-reverse) '())) (assert-eq! '(()) ((black-box! recursive-reverse) (black-box! '(())))) (assert-eq! '("three" "two" "one") (recursive-reverse (black-box! '("one" "two" "three")))) (assert-eq! '(7 6 5 4 3 2 1) (recursive-reverse '(1 2 3 4 5 6 7))) (assert-eq! true (even-length? '())) (assert-eq! false (even-length? (black-box! '(1)))) (assert-eq! true ((black-box! even-length?) '(1 2))) (assert-eq! false ((black-box! even-length?) (black-box! '(1 2 3)))) (assert-eq! true (even-length? '(1 2 3 4))) (assert-eq! true (recursive-every? int? '(1 2 3))) (assert-eq! false (recursive-every? int? '(1 2.0 3))) (assert-eq! true ((black-box! recursive-every?) int? '(1 2 3))) (assert-eq! false ((black-box! recursive-every?) int? '(1 2.0 3))) (assert-eq! true (recursive-every? (black-box! int?) '(1 2 3))) (assert-eq! false (recursive-every? (black-box! int?) '(1 2.0 3))) (assert-eq! true (recursive-every? int? (black-box! '(1 2 3)))) (assert-eq! false (recursive-every? int? (black-box! '(1 2.0 3)))) (assert-eq! true (wrapped-recursive-every? (fn (_) true) '(1 2 3))) ; Just building this function would previously crash (assert-eq! true (fn? (black-box! return-infinite-print))) ; We should select an ABI for `even-length?` which doesn't require allocaton (let [black-box-list (black-box! '(1 2 3 4 5)) (alloc-count is-even) (heap-alloc-count (fn () (even-length? black-box-list)))] (assert-eq! 0 alloc-count) (assert-eq! false is-even)) ()) ================================================ FILE: compiler/tests/run-pass/set.arret ================================================ (import [stdlib base]) (import [stdlib set]) (import [stdlib test]) (defn test-set! () ->! () (assert-eq! #{} (set)) (assert-eq! #{1} (set 1)) (assert-eq! #{1 2} ((black-box! set) 1 2)) (assert-eq! #{1 2} ((black-box! set) 1 1 2 2)) (assert-eq! #{1 2 3} (set 1 2 (black-box! 3))) (assert-eq! #{1 2 3} (set (black-box! 3) 2 1))) (defn test-set-contains! () ->! () (assert-eq! false (set-contains? #{1 2 3} 0)) (assert-eq! true ((black-box! set-contains?) #{1 2 3} 1)) (assert-eq! true (set-contains? (black-box! #{1 2 3}) 2)) ; ##NaN never compares equal (assert-eq! false (set-contains? (set ##NaN ##NaN ##NaN) ##NaN))) (defn test-set-length! () ->! () (assert-eq! 0 (set-length #{})) (assert-eq! 1 ((black-box! set-length) #{1})) (assert-eq! 2 (set-length (black-box! #{1 2}))) (assert-eq! 3 ((black-box! set-length) (black-box! #{1 2 3}))) ; Functions never compare equal (assert-eq! 3 (set-length (set zero? zero? zero?)))) (defn test-set->list! () ->! () (assert-eq! '() (set->list #{})) (assert-eq! '(1) (set->list #{1})) (assert-eq! #{1 2} (set & ((black-box! set->list) #{1 2}))) (assert-eq! #{1 2 3} ((black-box! set) & (set->list #{1 2 3})))) (defn test-subset! () ->! () (assert-eq! true (subset? #{} #{1 2 3})) (assert-eq! true (subset? #{1 3} #{1 2 3})) (assert-eq! true (subset? #{1 2 3} #{1 2 3})) (assert-eq! false (subset? #{1 2 3 4} #{1 2 3})) (assert-eq! false (subset? #{##NaN} #{##NaN}))) (defn test-superset! () ->! () (assert-eq! true (superset? #{'one 'two 'three} #{})) (assert-eq! true (superset? #{'one 'two 'three} #{'one 'three})) (assert-eq! true (superset? #{'one 'two 'three} #{'one 'two 'three})) (assert-eq! false (superset? #{'one 'two 'three} #{'one 'two 'three 'four})) (assert-eq! false (superset? #{##NaN} #{##NaN}))) (defn main! () ->! () (test-set!) (test-set-contains!) (test-set-length!) (test-set->list!) (test-subset!) (test-superset!)) ================================================ FILE: compiler/tests/run-pass/type-definitions.arret ================================================ (import [stdlib base]) ; Make sure _ eats the type (deftype _ Int) (deftype _ Float) (defn takes-int ([x Int]) (lettype [Integer Int] (ann x Integer))) (defn main! ()) ================================================ FILE: compiler/tests/run-pass/typred.arret ================================================ (import [stdlib base]) (import [stdlib test]) (defn test-tagged-ty-preds! () ->! () (assert-eq! true (any? (black-box-untyped! "foo"))) (assert-eq! true (any? (black-box-untyped! 'foo))) (assert-eq! true (any? (black-box-untyped! :foo))) (assert-eq! true (any? (black-box-untyped! 1))) (assert-eq! true (any? (black-box-untyped! 2.0))) (assert-eq! true (str? (black-box-untyped! "foo"))) (assert-eq! false (str? (black-box-untyped! 123))) (assert-eq! false (sym? (black-box-untyped! "foo"))) (assert-eq! true (sym? (black-box-untyped! 'foo))) (assert-eq! true (sym? (black-box-untyped! :foo))) (assert-eq! true (true? (black-box-untyped! true))) (assert-eq! false (true? (black-box-untyped! false))) (assert-eq! false (true? (black-box-untyped! 123))) (assert-eq! false (false? (black-box-untyped! true))) (assert-eq! true (false? (black-box-untyped! false))) (assert-eq! false (false? (black-box-untyped! 123))) (assert-eq! true (bool? (black-box-untyped! true))) (assert-eq! false (bool? (black-box-untyped! 123))) (assert-eq! true (num? (black-box-untyped! 123))) (assert-eq! true (num? (black-box-untyped! -456.7))) (assert-eq! false (num? (black-box-untyped! "twelve"))) (assert-eq! true (int? (black-box-untyped! 123))) (assert-eq! false (int? (black-box-untyped! false))) (assert-eq! true (fn? (black-box-untyped! cons))) (assert-eq! false (fn? (black-box-untyped! 123))) (assert-eq! true (list? (black-box-untyped! '()))) (assert-eq! true (list? (black-box-untyped! '(1 2 3)))) (assert-eq! false (list? (black-box-untyped! 123))) (assert-eq! true (nil? (black-box-untyped! '()))) (assert-eq! false (nil? (black-box-untyped! '(1 2 3)))) (assert-eq! false (nil? (black-box-untyped! 123))) (assert-eq! true ((black-box! str?) (black-box-untyped! "foo"))) (assert-eq! false ((black-box! str?) (black-box-untyped! 123))) (assert-eq! true (set? (black-box-untyped! #{}))) (assert-eq! false (set? (black-box-untyped! {}))) (assert-eq! true (map? (black-box-untyped! {}))) (assert-eq! false (map? (black-box-untyped! #{}))) ; Make sure type predicates can be treated as first-class functions (assert-eq! '(true false true) (map int? '(1 2.0 3))) (assert-eq! '(true false true) (map (black-box! int?) '(1 2.0 3))) (assert-eq! '(true false true) ((black-box! map) int? '(1 2.0 3)))) (defn test-record-ty-preds! () ->! () (letrecord [RecordOne (record-one) RecordTwo (record-two)] (let [record-one-instance (record-one) record-two-instance (record-two) record-number (fn ([r (U RecordOne RecordTwo)]) -> Int (if (record-one? r) 1 2))] (assert-eq! true (record? record-one-instance)) (assert-eq! true (record? (black-box! record-one-instance))) (assert-eq! true (record-one? record-one-instance)) (assert-eq! false (record-two? record-one-instance)) (assert-eq! false (record-one? 123)) (assert-eq! true (record-one? (black-box-untyped! record-one-instance))) (assert-eq! false (record-two? (black-box-untyped! record-one-instance))) (assert-eq! false (record-two? (black-box-untyped! 123))) (assert-eq! 1 (record-number (black-box! record-one-instance))) (assert-eq! 2 (record-number (black-box! record-two-instance)))))) (defn main! () ->! () (test-tagged-ty-preds!) (test-record-ty-preds!)) ================================================ FILE: compiler/tests/run-pass/vector.arret ================================================ (import [stdlib base]) (import [stdlib test]) ; This is our node size + 1 ; It's represented as a persistent vector with a tree node and 1 element tail node (def thirty-three-element-vector [1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33]) (defn test-vector! () ->! () ; Inline vectors (assert-eq! [] (vector)) (assert-eq! [1] (vector 1)) (assert-eq! [1 2] ((black-box! vector) 1 2)) (assert-eq! [1 2 3] (vector 1 2 (black-box! 3))) ; Tail-only vectors (assert-eq! [1 2 3 4] (vector 1 2 3 4)) (assert-eq! [1 2 3 4 5] ((black-box! vector) 1 2 3 4 5)) ; Tail & tree vectors ; This is testing constant vector generation as well as `(vector!)` (assert-eq! thirty-three-element-vector ((black-box! vector) 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33))) (defn test-vector-ref! () ->! () ; Inline vectors (assert-eq! 1 (vector-ref [1 2 3] 0)) (assert-eq! 2 ((black-box! vector-ref) [1 2 3] 1)) (assert-eq! 3 (vector-ref (black-box! [1 2 3]) 2)) ; Tail-only vectors (assert-eq! 4 (vector-ref (black-box! [1 2 3 4]) 3)) (assert-eq! 5 ((black-box! vector-ref) [1 2 3 4 5] 4)) (assert-eq! 6 ((black-box! vector-ref) (black-box! [1 2 3 4 5 6]) 5)) ; Tail & tree vector (assert-eq! 1 (vector-ref (black-box! thirty-three-element-vector) 0)) (assert-eq! 32 ((black-box! vector-ref) thirty-three-element-vector 31)) (assert-eq! 33 (vector-ref (black-box! thirty-three-element-vector) 32))) (defn test-vector-length! () ->! () (assert-eq! 0 (vector-length [])) (assert-eq! 1 ((black-box! vector-length) [1])) (assert-eq! 2 (vector-length (black-box! [1 2]))) (assert-eq! 3 ((black-box! vector-length) (black-box! [1 2 3]))) (assert-eq! 4 ((black-box! vector-length) (black-box! [1 2 3 4])))) (defn test-vector->list! () ->! () (assert-eq! '(1) (vector->list [1])) (assert-eq! '(1 2) ((black-box! vector->list) [1 2])) (assert-eq! '(1 2 3) (vector->list (black-box! [1 2 3]))) (assert-eq! '(1 2 3 4) ((black-box! vector->list) (black-box! [1 2 3 4])))) (defn test-vector-extend! () ->! () (assert-eq! [1 2 3 4 5 6] (vector-extend (vector-extend [1 2 3] 4) 5 6))) (defn test-vector-assoc! () ->! () ; Inline vector (assert-eq! [one two three] (vector-assoc (vector-assoc [two two two] 0 'one) 2 'three)) ; External vector (assert-eq! [false false false false false] (vector-assoc [false false true false false] 2 false))) (defn test-vector-append! () ->! () (assert-eq! [] (vector-append)) (assert-eq! [1 2 3] (vector-append [] [1 2 3])) (assert-eq! [1 2 3] (vector-append [1] [2 3])) (assert-eq! [1 2 3] (vector-append [1 2] [3])) (assert-eq! [1 2 3 4 5 6] (vector-append [1 2 3] [4 5 6])) (assert-eq! [1 2 3 4 5 6 7 8] (vector-append (black-box! [1 2 3]) [4 5 6 7] (black-box! [8]))) (assert-eq! [1 2 3] (vector-append [1] [2] [3]))) (defn test-vector-take! () ->! () (assert-eq! [] (vector-take -1 (black-box! [1 2 3 4 5 6]))) (assert-eq! [] (vector-take (black-box! 0) [1 2 3 4 5 6])) (assert-eq! [1 2 3] (vector-take (black-box! 3) (black-box! [1 2 3 4 5 6]))) (assert-eq! [1 2 3 4 5] ((black-box! vector-take) 5 [1 2 3 4 5 6])) (assert-eq! [1 2 3 4 5 6] ((black-box! vector-take) 6 (black-box! [1 2 3 4 5 6]))) (assert-eq! [1 2 3 4 5 6] ((black-box! vector-take) (black-box! 100) (black-box! [1 2 3 4 5 6])))) (defn main! () ->! () (test-vector!) (test-vector-ref!) (test-vector-length!) (test-vector->list!) (test-vector-assoc!) (test-vector-extend!) (test-vector-append!) (test-vector-take!)) ================================================ FILE: compiler/tests/run-pass/write.arret ================================================ (import [stdlib base]) (import [stdlib test]) (defn test-write-to-stdout! () ->! () ; This is dumb but it makes us produce a binary (print! "") ; We should be able to call through a thunk as well ((black-box! print!) "") ; And through a callback (map print! '("")) ; And print an unknown value (print! (black-box! "")) ; And do both at once ((black-box! print!) (black-box! ""))) (defn test-write-to-str! () ->! () (assert-eq! "" (print-str)) (assert-eq! "" (write-str)) (assert-eq! "hello" (print-str 'hello)) (assert-eq! "hello" (write-str 'hello)) (assert-eq! "Hello, world!" (print-str "Hello, world!")) (assert-eq! "\"Hello, world!\"" (write-str "Hello, world!")) (assert-eq! " " (print-str \space)) (assert-eq! "\\space" (write-str \space)) (assert-eq! "123456" (print-str 1 2 (black-box! 3) (black-box! 4) 5 (black-box! 6))) (assert-eq! "1 2 3 4 5 6" (write-str 1 2 (black-box! 3) (black-box! 4) 5 (black-box! 6))) (assert-eq! "#fn#fn#fn" (print-str + (black-box +) (black-box! +))) (assert-eq! "#fn #fn #fn" (write-str + (black-box +) (black-box! +)))) (defn main! () ->! () (test-write-to-stdout!) (test-write-to-str!)) ================================================ FILE: compiler/ty/conv_abi.rs ================================================ use arret_runtime::callback; use arret_runtime::{abitype, boxed}; use crate::ty; use crate::ty::purity::Purity; use crate::ty::Ty; fn type_tag_to_ty(type_tag: boxed::TypeTag) -> Ty { use arret_runtime::boxed::TypeTag; match type_tag { TypeTag::Float => Ty::Float, TypeTag::Char => Ty::Char, TypeTag::Str => Ty::Str, TypeTag::Sym => Ty::Sym, TypeTag::True => Ty::LitBool(true), TypeTag::False => Ty::LitBool(false), TypeTag::Int => Ty::Int, TypeTag::Vector => Ty::Vectorof(Box::new(Ty::Any.into())), TypeTag::Nil => ty::List::empty().into(), TypeTag::Pair => ty::List::new(Box::new([Ty::Any.into()]), Ty::Any.into()).into(), TypeTag::FunThunk => ty::TopFun::new(Purity::Impure.into(), Ty::Any.into()).into(), TypeTag::Record => Ty::TopRecord, TypeTag::Set => Ty::Set(Box::new(Ty::Any.into())), TypeTag::Map => Ty::Map(Box::new(ty::Map { key: Ty::Any.into(), value: Ty::Any.into(), })), } } pub trait ConvertableAbiType { fn to_ty_ref(&self) -> ty::Ref; fn to_rust_str(&self) -> String; } impl ConvertableAbiType for abitype::AbiType { fn to_ty_ref(&self) -> ty::Ref { use arret_runtime::abitype::AbiType; match self { AbiType::Bool => Ty::Bool.into(), AbiType::Char => Ty::Char.into(), AbiType::Float => Ty::Float.into(), AbiType::Int => Ty::Int.into(), AbiType::InternedSym => Ty::Sym.into(), AbiType::Boxed(boxed) => boxed.to_ty_ref(), AbiType::Callback(entry_point_abi) => entry_point_abi.to_ty_ref(), } } fn to_rust_str(&self) -> String { use arret_runtime::abitype::AbiType; match self { AbiType::Bool => "bool".to_owned(), AbiType::Char => "char".to_owned(), AbiType::Float => "f64".to_owned(), AbiType::Int => "i64".to_owned(), AbiType::InternedSym => "InternedSym".to_owned(), AbiType::Boxed(boxed) => format!("Gc<{}>", boxed.to_rust_str()), AbiType::Callback(entry_point_abi) => entry_point_abi.to_rust_str(), } } } impl ConvertableAbiType for abitype::BoxedAbiType { fn to_ty_ref(&self) -> ty::Ref { use arret_runtime::abitype::BoxedAbiType; match self { BoxedAbiType::Any => Ty::Any.into(), BoxedAbiType::Vector(member) => Ty::Vectorof(Box::new(member.to_ty_ref())).into(), BoxedAbiType::Set(member) => Ty::Set(Box::new(member.to_ty_ref())).into(), BoxedAbiType::Map(key, value) => Ty::Map(Box::new(ty::Map { key: key.to_ty_ref(), value: value.to_ty_ref(), })) .into(), BoxedAbiType::List(member) => ty::List::new_uniform(member.to_ty_ref()).into(), BoxedAbiType::Pair(member) => { let member_ty_ref: ty::Ref = member.to_ty_ref(); ty::List::new(Box::new([member_ty_ref.clone()]), member_ty_ref).into() } BoxedAbiType::UniqueTagged(type_tag) => type_tag_to_ty(*type_tag).into(), BoxedAbiType::Union(_, tags) => { let members = tags.iter().map(|type_tag| type_tag_to_ty(*type_tag).into()); ty::unify::unify_ty_ref_iter(members) } } } fn to_rust_str(&self) -> String { use arret_runtime::abitype::BoxedAbiType; match self { BoxedAbiType::Any => "boxed::Any".to_owned(), BoxedAbiType::Vector(member) => format!("boxed::Vector<{}>", member.to_rust_str()), BoxedAbiType::List(member) => format!("boxed::List<{}>", member.to_rust_str()), BoxedAbiType::Pair(member) => format!("boxed::Pair<{}>", member.to_rust_str()), BoxedAbiType::Set(member) => format!("boxed::Set<{}>", member.to_rust_str()), BoxedAbiType::UniqueTagged(type_tag) => format!("boxed::{}", type_tag.to_str()), BoxedAbiType::Union(name, _) => format!("boxed::{}", name), BoxedAbiType::Map(key, value) => { format!("boxed::Map<{}, {}>", key.to_rust_str(), value.to_rust_str()) } } } } impl ConvertableAbiType for abitype::RetAbiType { fn to_ty_ref(&self) -> ty::Ref { use arret_runtime::abitype::RetAbiType; match self { RetAbiType::Void => Ty::unit().into(), RetAbiType::Never => Ty::never().into(), RetAbiType::Inhabited(abi_type) => abi_type.to_ty_ref(), } } fn to_rust_str(&self) -> String { use arret_runtime::abitype::RetAbiType; match self { RetAbiType::Void => "()".to_owned(), RetAbiType::Never => "Never".to_owned(), RetAbiType::Inhabited(abi_type) => abi_type.to_rust_str(), } } } impl ConvertableAbiType for abitype::ParamAbiType { fn to_ty_ref(&self) -> ty::Ref { self.abi_type.to_ty_ref() } fn to_rust_str(&self) -> String { use arret_runtime::abitype::AbiType; use arret_runtime::abitype::ParamCapture; match &self.abi_type { AbiType::Boxed(boxed) => match self.capture { ParamCapture::Auto => format!("Gc<{}>", boxed.to_rust_str()), ParamCapture::Never => format!("NoCapture<{}>", boxed.to_rust_str()), ParamCapture::Always => format!("Capture<{}>", boxed.to_rust_str()), }, other => other.to_rust_str(), } } } impl ConvertableAbiType for callback::EntryPointAbiType { fn to_ty_ref(&self) -> ty::Ref { // TODO: How do we deal with rest params? let fixed_param_ty_refs = self .params .iter() .map(ConvertableAbiType::to_ty_ref) .collect(); ty::Fun::new_mono( ty::List::new_tuple(fixed_param_ty_refs), Purity::Impure.into(), self.ret.to_ty_ref(), ) .into() } fn to_rust_str(&self) -> String { let params_str = if self.params.is_empty() { "".to_owned() } else { self.params .iter() .map(|abi_type| format!(", {}", abi_type.to_rust_str())) .collect::>() .join("") }; format!( "extern \"C\" fn(&mut Task, boxed::Captures{}) -> {}", params_str, self.ret.to_rust_str() ) } } #[cfg(test)] mod test { use super::*; #[test] fn pair_abi_type() { use arret_runtime::abitype::EncodeBoxedAbiType; use arret_runtime::boxed; let boxed_abi_type = as EncodeBoxedAbiType>::BOXED_ABI_TYPE; assert_eq!("boxed::Pair", boxed_abi_type.to_rust_str()); let int_pair_poly: ty::Ref = ty::List::new(Box::new([Ty::Int.into()]), Ty::Int.into()).into(); assert_eq!(int_pair_poly, boxed_abi_type.to_ty_ref()); } #[test] fn bool_abi_type() { use arret_runtime::abitype::EncodeBoxedAbiType; use arret_runtime::boxed; let boxed_abi_type = ::BOXED_ABI_TYPE; assert_eq!("boxed::Bool", boxed_abi_type.to_rust_str()); let bool_poly: ty::Ref = Ty::Bool.into(); assert_eq!(bool_poly, boxed_abi_type.to_ty_ref()); } #[test] fn nil_abi_type() { use arret_runtime::abitype::EncodeBoxedAbiType; use arret_runtime::boxed; let boxed_abi_type = ::BOXED_ABI_TYPE; assert_eq!("boxed::Nil", boxed_abi_type.to_rust_str()); let nil_poly: ty::Ref = ty::List::empty().into(); assert_eq!(nil_poly, boxed_abi_type.to_ty_ref()); } #[test] fn callback_abi_type() { use arret_runtime::task; let entry_point_abi_type = char as callback::EncodeEntryPointAbiType>::ENTRY_POINT_ABI_TYPE; assert_eq!( "extern \"C\" fn(&mut Task, boxed::Captures, i64) -> char", entry_point_abi_type.to_rust_str() ); let arret_poly: ty::Ref = ty::Fun::new_mono( ty::List::new_tuple(Box::new([Ty::Int.into()])), Purity::Impure.into(), Ty::Char.into(), ) .into(); assert_eq!(arret_poly, entry_point_abi_type.to_ty_ref()); } #[test] fn captured_int_abi_type() { use arret_runtime::abitype::{EncodeBoxedAbiType, ParamAbiType}; use arret_runtime::boxed; let param_abi_type = ParamAbiType { abi_type: ::BOXED_ABI_TYPE.into(), capture: abitype::ParamCapture::Always, }; assert_eq!("Capture", param_abi_type.to_rust_str()); let int_poly: ty::Ref = Ty::Int.into(); assert_eq!(int_poly, param_abi_type.to_ty_ref()); } } ================================================ FILE: compiler/ty/datum.rs ================================================ use arret_syntax::datum::Datum; use crate::ty; use crate::ty::Ty; pub fn ty_ref_for_datum(datum: &Datum) -> ty::Ref { (match datum { Datum::Bool(_, val) => Ty::LitBool(*val), Datum::Sym(_, val) => Ty::LitSym(val.clone()), Datum::Char(_, _) => Ty::Char, Datum::Int(_, _) => Ty::Int, Datum::Float(_, _) => Ty::Float, Datum::Str(_, _) => Ty::Str, Datum::List(_, vs) => { ty::List::new_tuple(vs.iter().map(|datum| ty_ref_for_datum(datum)).collect()).into() } Datum::Vector(_, vs) => Ty::Vector(vs.iter().map(|v| ty_ref_for_datum(v)).collect()), Datum::Set(_, vs) => { let unified_type = ty::unify::unify_ty_ref_iter(vs.iter().map(|v| ty_ref_for_datum(v))); Ty::Set(Box::new(unified_type)) } Datum::Map(_, vs) => { let unified_key = ty::unify::unify_ty_ref_iter(vs.iter().map(|(k, _)| ty_ref_for_datum(k))); let unified_value = ty::unify::unify_ty_ref_iter(vs.iter().map(|(_, v)| ty_ref_for_datum(v))); ty::Map::new(unified_key, unified_value).into() } }) .into() } #[cfg(test)] mod test { use super::*; use crate::hir::poly_for_str; fn assert_poly_for_str(ty_str: &str, datum_str: &str) { use arret_syntax::parser::datum_from_str; let datum = datum_from_str(None, datum_str).unwrap(); assert_eq!(poly_for_str(ty_str), ty_ref_for_datum(&datum)); } #[test] fn trivial_types() { assert_poly_for_str("Int", "1"); assert_poly_for_str("Int", "-51"); assert_poly_for_str("Char", "\\newline"); assert_poly_for_str("Str", r#""Test string""#); } #[test] fn bool_literal() { assert_poly_for_str("true", "true"); assert_poly_for_str("false", "false"); } #[test] fn sym_literal() { assert_poly_for_str("'foo", "foo"); } #[test] fn fixed_list() { assert_poly_for_str("()", "()"); assert_poly_for_str("(List Int Int 'foo)", "(1 2 foo)"); } #[test] fn fixed_vec() { assert_poly_for_str("[]", "[]"); assert_poly_for_str("(Vector false Int 'foo)", "[false 2 foo]"); } #[test] fn fixed_set() { assert_poly_for_str("(Setof (RawU))", "#{}"); assert_poly_for_str("(Setof Bool)", "#{true false}"); } #[test] fn fixed_map() { assert_poly_for_str("(Map (RawU) (RawU))", "{}"); assert_poly_for_str("(Map Bool (RawU Int 'foo))", "{true 1, false foo}"); } } ================================================ FILE: compiler/ty/intersect.rs ================================================ use std::cmp; use std::iter; use std::result; use crate::ty; use crate::ty::list_iter::ListIterator; use crate::ty::purity; use crate::ty::purity::Purity; use crate::ty::record; use crate::ty::ty_args::TyArgs; use crate::ty::var_usage::Variance; use crate::ty::Ty; #[derive(PartialEq, Debug)] pub enum Error { Disjoint, } type Result = result::Result; /// Flattens an intersection between two type references /// /// This has no type logic; it only flattens the structure of the refs. fn flatten_ref_intersect(ref1: &ty::Ref, ref2: &ty::Ref) -> ty::Ref { let mut members: Vec> = vec![]; if let Some(Ty::Intersect(members1)) = ref1.try_to_fixed() { members.extend(members1.iter().cloned()); } else { members.push(ref1.clone()); } if let Some(Ty::Intersect(members2)) = ref2.try_to_fixed() { members.extend(members2.iter().cloned()); } else { members.push(ref2.clone()); } match members.len() { 0 => Ty::Any.into(), 1 => members.pop().unwrap(), _ => Ty::Intersect(members.into_boxed_slice()).into(), } } fn unify_list( list1: &ty::List, list2: &ty::List, ) -> Result> { match ty::unify::unify_list(list1, list2) { ty::unify::UnifiedList::Merged(merged) => Ok(merged), ty::unify::UnifiedList::Discerned => Err(Error::Disjoint), } } /// Intersects a vector of refs with an iterator /// /// `lefts` is a slice as it needs to be iterated over multiple times. `rights` is only visited /// once so it can be an arbitrary iterator. fn intersect_union_iter<'a, M, I>(lefts: &[ty::Ref], rights: I) -> Result> where M: ty::Pm + 'a, I: Iterator>, { let mut intersected_types: Vec> = vec![]; for right in rights { for left in lefts { match intersect_ty_refs(left, right) { Err(Error::Disjoint) => {} Ok(intersected) => { intersected_types.push(intersected); } } } } match intersected_types.len() { 0 => Err(Error::Disjoint), 1 => Ok(intersected_types.pop().unwrap()), _ => Ok(Ty::Union(intersected_types.into_boxed_slice()).into()), } } fn intersect_ty_ref_iter<'a, M, I>(mut ty_refs: I) -> Result> where M: ty::Pm + 'a, I: Iterator>, { let mut acc = if let Some(acc) = ty_refs.next() { acc.clone() } else { return Ok(Ty::Any.into()); }; for ty_ref in ty_refs { acc = intersect_ty_refs(&acc, ty_ref)?; } Ok(acc) } fn intersect_record_field_purities( variance: Variance, pvar: &purity::PVarId, ty_args1: &TyArgs, ty_args2: &TyArgs, ) -> Result { use crate::ty::is_a::purity_refs_equivalent; use crate::ty::unify::unify_purity_refs; let purity_ref1 = &ty_args1.pvar_purities()[pvar]; let purity_ref2 = &ty_args2.pvar_purities()[pvar]; match variance { Variance::Covariant => Ok(intersect_purity_refs(purity_ref1, purity_ref2)), Variance::Contravariant => Ok(unify_purity_refs(purity_ref1, purity_ref2)), Variance::Invariant => { if purity_refs_equivalent(purity_ref1, purity_ref2) { Ok(purity_ref1.clone()) } else { Err(Error::Disjoint) } } } } fn intersect_record_field_ty_refs( variance: Variance, tvar: &ty::TVarId, ty_args1: &TyArgs, ty_args2: &TyArgs, ) -> Result> { use crate::ty::is_a::ty_refs_equivalent; use crate::ty::unify::unify_to_ty_ref; let ty_ref1 = &ty_args1.tvar_types()[tvar]; let ty_ref2 = &ty_args2.tvar_types()[tvar]; match variance { Variance::Covariant => intersect_ty_refs(ty_ref1, ty_ref2), Variance::Contravariant => Ok(unify_to_ty_ref(ty_ref1, ty_ref2)), Variance::Invariant => { if ty_refs_equivalent(ty_ref1, ty_ref2) { Ok(ty_ref1.clone()) } else { Err(Error::Disjoint) } } } } fn intersect_record_instance( instance1: &record::Instance, instance2: &record::Instance, ) -> Result> { use crate::ty::record::PolyParam; use std::collections::HashMap; if instance1.cons() != instance2.cons() { return Err(Error::Disjoint); } let mut merged_pvar_purities = HashMap::new(); let mut merged_tvar_types = HashMap::new(); for poly_param in instance1.cons().poly_params() { match poly_param { PolyParam::PVar(variance, pvar) => { merged_pvar_purities.insert( pvar.clone(), intersect_record_field_purities( *variance, pvar, instance1.ty_args(), instance2.ty_args(), )?, ); } PolyParam::TVar(variance, tvar) => { merged_tvar_types.insert( tvar.clone(), intersect_record_field_ty_refs( *variance, tvar, instance1.ty_args(), instance2.ty_args(), )?, ); } PolyParam::Pure(_) | PolyParam::TFixed(_, _) => {} } } Ok(record::Instance::new( instance1.cons().clone(), TyArgs::new(merged_pvar_purities, merged_tvar_types), )) } /// Intersects two types under the assumption that they are not subtypes fn non_subty_intersect( ref1: &ty::Ref, ty1: &Ty, ref2: &ty::Ref, ty2: &Ty, ) -> Result> { match (ty1, ty2) { // Union types (Ty::Union(refs1), Ty::Union(refs2)) => intersect_union_iter(refs1, refs2.iter()), (Ty::Union(refs1), _) => intersect_union_iter(refs1, iter::once(ref2)), (_, Ty::Union(refs2)) => intersect_union_iter(refs2, iter::once(ref1)), // Intersection types (Ty::Intersect(refs1), Ty::Intersect(refs2)) => { intersect_ty_ref_iter(refs1.iter().chain(refs2.iter())) } (Ty::Intersect(refs1), _) => { let mut acc = ref2.clone(); for ty_ref in refs1.iter() { acc = intersect_ty_refs(&acc, ty_ref)?; } Ok(acc) } (_, Ty::Intersect(refs2)) => { let mut acc = ref1.clone(); for ty_ref in refs2.iter() { acc = intersect_ty_refs(&acc, ty_ref)?; } Ok(acc) } // Set type (Ty::Set(member1), Ty::Set(member2)) => Ok(Ty::Set(Box::new(intersect_ty_refs( member1.as_ref(), member2.as_ref(), )?)) .into()), // Map type (Ty::Map(map1), Ty::Map(map2)) => Ok(ty::Map::new( intersect_ty_refs(map1.key(), map2.key())?, intersect_ty_refs(map1.value(), map2.value())?, ) .into()), // Vector types (Ty::Vectorof(member1), Ty::Vectorof(member2)) => Ok(Ty::Vectorof(Box::new( intersect_ty_refs(member1.as_ref(), member2.as_ref())?, )) .into()), (Ty::Vector(members1), Ty::Vector(members2)) => { if members1.len() != members2.len() { Err(Error::Disjoint) } else { let intersected_members = members1 .iter() .zip(members2.iter()) .map(|(member1, member2)| intersect_ty_refs(member1, member2)) .collect::]>>>()?; Ok(Ty::Vector(intersected_members).into()) } } (Ty::Vectorof(member1), Ty::Vector(members2)) | (Ty::Vector(members2), Ty::Vectorof(member1)) => { let intersected_members = members2 .iter() .map(|member2| intersect_ty_refs(member1.as_ref(), member2)) .collect::]>>>()?; Ok(Ty::Vector(intersected_members).into()) } // List types (Ty::List(list1), Ty::List(list2)) => Ok(intersect_list(list1, list2)?.into()), // Function types (Ty::TopFun(top_fun1), Ty::TopFun(top_fun2)) => { let intersected_purity = intersect_purity_refs(top_fun1.purity(), top_fun2.purity()); let intersected_ret = intersect_ty_refs(top_fun1.ret(), top_fun2.ret())?; Ok(ty::TopFun::new(intersected_purity, intersected_ret).into()) } (Ty::TopFun(top_fun), Ty::Fun(fun)) | (Ty::Fun(fun), Ty::TopFun(top_fun)) => { if fun.has_polymorphic_vars() { // TODO: This might be possible but we would have to recalculate the tvars for // the intersected function return Err(Error::Disjoint); } let intersected_purity = intersect_purity_refs(top_fun.purity(), fun.purity()); let intersected_params = fun.params().clone(); let intersected_ret = intersect_ty_refs(top_fun.ret(), fun.ret())?; Ok(ty::Fun::new_mono(intersected_params, intersected_purity, intersected_ret).into()) } (Ty::Fun(fun1), Ty::Fun(fun2)) => { if fun1.has_polymorphic_vars() || fun2.has_polymorphic_vars() { // TODO: Same issue as top functions Err(Error::Disjoint) } else { let intersected_purity = intersect_purity_refs(fun1.purity(), fun2.purity()); let intersected_params = unify_list(fun1.params(), fun2.params())?; let intersected_ret = intersect_ty_refs(fun1.ret(), fun2.ret())?; Ok( ty::Fun::new_mono(intersected_params, intersected_purity, intersected_ret) .into(), ) } } (Ty::Record(instance1), Ty::Record(instance2)) => { Ok(Ty::Record(Box::new(intersect_record_instance(instance1, instance2)?)).into()) } (_, _) => Err(Error::Disjoint), } } pub fn intersect_list(list1: &ty::List, list2: &ty::List) -> Result> { if list1.has_disjoint_arity(list2) { return Err(ty::intersect::Error::Disjoint); } let mut iter1 = ListIterator::new(list1); let mut iter2 = ListIterator::new(list2); let mut merged_fixed: Vec> = Vec::with_capacity(cmp::max(iter1.fixed_len(), iter2.fixed_len())); while iter1.fixed_len() > 0 || iter2.fixed_len() > 0 { let next1 = iter1.next().unwrap(); let next2 = iter2.next().unwrap(); let merged_next = intersect_ty_refs(next1, next2)?; merged_fixed.push(merged_next); } let merged_rest = intersect_ty_refs(list1.rest(), list2.rest())?; Ok(ty::List::new(merged_fixed.into_boxed_slice(), merged_rest)) } pub fn intersect_ty_refs( ty_ref1: &ty::Ref, ty_ref2: &ty::Ref, ) -> Result> { if ty::is_a::ty_ref_is_a(ty_ref1, ty_ref2) { return Ok(ty_ref1.clone()); } else if ty::is_a::ty_ref_is_a(ty_ref2, ty_ref1) { return Ok(ty_ref2.clone()); } match (ty_ref1, ty_ref2) { (ty::Ref::Fixed(ty1), ty::Ref::Fixed(ty2)) => { // We can invoke full intersection logic if we have fixed types non_subty_intersect(ty_ref1, ty1, ty_ref2, ty2) } _ => { let bound1 = ty_ref1.resolve_to_ty(); let bound2 = ty_ref2.resolve_to_ty(); // Make sure the bounds aren't disjoint // We can't simply `non_subty_intersect` because the bounds may be subtypes intersect_ty_refs(&bound1.clone().into(), &bound2.clone().into())?; Ok(flatten_ref_intersect(ty_ref1, ty_ref2)) } } } pub fn intersect_purity_refs(purity1: &purity::Ref, purity2: &purity::Ref) -> purity::Ref { if purity1 == purity2 { purity1.clone() } else { Purity::Pure.into() } } #[cfg(test)] mod test { use super::*; use crate::hir::{poly_for_str, tvar_bounded_by}; use crate::source::EMPTY_SPAN; fn assert_disjoint_poly(poly1: &ty::Ref, poly2: &ty::Ref) { assert_eq!( Error::Disjoint, intersect_ty_refs(poly1, poly2).unwrap_err() ); } fn assert_merged_poly( expected: &ty::Ref, poly1: &ty::Ref, poly2: &ty::Ref, ) { // This is the basic invariant we're testing - each of our merged type satisfies each of // our input types. assert!( ty::is_a::ty_ref_is_a(expected, poly1), "The expected type does not definitely satisfy the first input type; the test is incorrect" ); assert!( ty::is_a::ty_ref_is_a(expected, poly2), "The expected type does not definitely satisfy the second input type; the test is incorrect" ); assert_eq!(expected, &intersect_ty_refs(poly1, poly2).unwrap()); } fn assert_disjoint(ty_str1: &str, ty_str2: &str) { let poly1 = poly_for_str(ty_str1); let poly2 = poly_for_str(ty_str2); assert_disjoint_poly(&poly1, &poly2) } fn assert_merged(expected_str: &str, ty_str1: &str, ty_str2: &str) { let expected = poly_for_str(expected_str); let poly1 = poly_for_str(ty_str1); let poly2 = poly_for_str(ty_str2); assert_merged_poly(&expected, &poly1, &poly2); } fn assert_disjoint_iter(ty_strs: &[&str]) { let polys: Vec<_> = ty_strs.iter().map(|&s| poly_for_str(s)).collect(); assert_eq!( Error::Disjoint, intersect_ty_ref_iter(polys.iter()).unwrap_err() ); } fn assert_merged_iter(expected_str: &str, ty_strs: &[&str]) { let expected = poly_for_str(expected_str); let polys: Vec<_> = ty_strs.iter().map(|&s| poly_for_str(s)).collect(); assert_eq!(expected, intersect_ty_ref_iter(polys.iter()).unwrap()); } #[test] fn disjoint_types() { assert_disjoint("Sym", "Str"); } #[test] fn simple_subtypes() { assert_merged("true", "Bool", "true"); assert_merged("Float", "Num", "Float"); assert_merged("Bool", "Bool", "Any"); } #[test] fn union_types() { assert_merged("'bar", "(RawU 'foo 'bar)", "(RawU 'bar 'baz)"); assert_merged( "(RawU 'bar 'baz)", "(RawU 'foo 'bar 'baz)", "(RawU 'bar 'baz 'foobar)", ); assert_merged("true", "(RawU true 'foo)", "Bool"); } #[test] fn intersect_types() { let ptype = tvar_bounded_by(Ty::Any.into()); let any_int = poly_for_str("Int"); let any_float = poly_for_str("Float"); // These two intersections become disjoint assert_eq!( Error::Disjoint, intersect_ty_refs::( &Ty::Intersect(Box::new([ptype.clone(), any_int])).into(), &Ty::Intersect(Box::new([ptype, any_float])).into(), ) .unwrap_err() ) } #[test] fn map_types() { assert_disjoint("(Map Int Float)", "(Map Float Int)"); assert_merged( "(Map 'foo Int)", "(Map (RawU 'foo 'bar) Int)", "(Map (RawU 'foo 'baz) Int)", ); } #[test] fn set_types() { assert_disjoint("(Setof Sym)", "(Setof Str)"); assert_merged( "(Setof 'foo)", "(Setof (RawU 'foo 'bar))", "(Setof (RawU 'foo 'baz))", ); } #[test] fn list_types() { assert_disjoint("(List Sym)", "(List Str)"); assert_merged("(List Sym Sym)", "(List Any Sym)", "(List & Sym)"); assert_merged( "(List false true)", "(List Bool true)", "(List false Bool & Any)", ); assert_disjoint("(List Sym Sym)", "(List Sym)"); } #[test] fn vec_types() { assert_disjoint("(Vector Int)", "(Vector Float)"); assert_merged("(Vector true)", "(Vector Bool)", "(Vectorof true)"); assert_merged("(Vectorof false)", "(Vectorof Bool)", "(Vectorof false)"); } #[test] fn top_fun_types() { assert_disjoint("(... -> Float)", "(... -> Int)"); assert_merged("(... -> true)", "(... -> Bool)", "(... ->! true)"); } #[test] fn fun_types() { assert_merged("(Num -> Int)", "(Float -> Int)", "(Int -> Int)"); assert_disjoint("(Str -> Sym)", "(Str Str -> Sym)"); assert_merged("(-> true)", "(-> Bool)", "(->! true)"); assert_merged("(Bool -> Str)", "(true -> Str)", "(false ->! Str)"); assert_merged("(->! true)", "(... ->! true)", "(->! Any)"); } #[test] fn ty_pred_types() { assert_disjoint("str?", "sym?"); assert_merged("str?", "str?", "str?"); assert_merged("str?", "str?", "(Any -> Bool)"); assert_merged("str?", "str?", "(... -> Bool)"); } #[test] fn eq_pred_types() { assert_merged("=", "=", "="); assert_merged("=", "=", "(Any Any -> Bool)"); assert_merged("=", "=", "(... -> Bool)"); } #[test] fn unbounded_poly_var() { let ptype1 = tvar_bounded_by(Ty::Any.into()); let ptype2 = tvar_bounded_by(Ty::Any.into()); let ptype_intersect = Ty::Intersect(Box::new([ptype1.clone(), ptype2.clone()])).into(); let any_sym = poly_for_str("Sym"); // These are equal; it should just return the original type assert_merged_poly(&ptype1, &ptype1, &ptype1); // These create an intersect type assert_merged_poly( &Ty::Intersect(Box::new([ptype1.clone(), ptype2.clone()])).into(), &ptype1, &ptype2, ); assert_merged_poly( &Ty::Intersect(Box::new([any_sym.clone(), ptype2.clone()])).into(), &any_sym, &ptype2, ); // These extend an existing intersection assert_merged_poly( &Ty::Intersect(Box::new([any_sym.clone(), ptype1, ptype2])).into(), &any_sym, &ptype_intersect, ); } #[test] fn bounded_poly_vars() { let ptype1_any = tvar_bounded_by(Ty::Any.into()); let ptype2_sym = tvar_bounded_by(Ty::Sym.into()); let ptype3_str = tvar_bounded_by(Ty::Str.into()); let any_sym = poly_for_str("Sym"); assert_merged_poly( &Ty::Intersect(Box::new([ptype1_any.clone(), ptype2_sym.clone()])).into(), &ptype1_any, &ptype2_sym, ); assert_merged_poly(&ptype2_sym, &any_sym, &ptype2_sym); // These have disjoint bounds assert_disjoint_poly(&ptype2_sym, &ptype3_str); assert_disjoint_poly(&ptype3_str, &any_sym); } #[test] fn polymorphic_funs() { let pidentity_fun = poly_for_str("(All #{A} A -> A)"); let pidentity_impure_bool_fun = poly_for_str("(All #{[A Bool]} A ->! A)"); let top_pure_fun = poly_for_str("(... -> Any)"); // We should intersect polymorphic functions with themselves assert_merged_poly(&pidentity_fun, &pidentity_fun, &pidentity_fun); // The intersection of the pure identity function and the top pure function is the identity // function assert_merged_poly(&pidentity_fun, &pidentity_fun, &top_pure_fun); // The intersection of the pure identity function and the impure bool identity function is // the identity function // TODO: This seems like it should be `(All #{[A Bool]} A -> A)` assert_merged_poly(&pidentity_fun, &pidentity_fun, &pidentity_impure_bool_fun); // These have no subtype relationship // TODO: This also seems like it should be `(All #{[A Bool]} A -> A)` assert_disjoint_poly(&pidentity_impure_bool_fun, &top_pure_fun); } #[test] fn intersect_iter() { assert_merged_iter("Any", &[]); assert_merged_iter("Sym", &["Sym"]); assert_merged_iter("true", &["true", "Bool"]); assert_disjoint_iter(&["true", "false"]); } #[test] fn record_instances() { use crate::ty::ty_args::TyArgs; use std::collections::HashMap; let tvar1 = ty::TVar::new(EMPTY_SPAN, "tvar1".into(), Ty::Any.into()); let tvar2 = ty::TVar::new(EMPTY_SPAN, "tvar2".into(), Ty::Any.into()); let cons1 = record::Cons::new( EMPTY_SPAN, "cons1".into(), "cons1?".into(), Some(Box::new([record::PolyParam::TVar( Variance::Covariant, tvar1.clone(), )])), Box::new([record::Field::new( EMPTY_SPAN, "cons1-field1".into(), tvar1.clone().into(), )]), ); let cons2 = record::Cons::new( EMPTY_SPAN, "cons2".into(), "cons2?".into(), Some(Box::new([ record::PolyParam::TVar(Variance::Covariant, tvar1.clone()), record::PolyParam::TVar(Variance::Contravariant, tvar2.clone()), ])), Box::new([ record::Field::new(EMPTY_SPAN, "cons2-covariant".into(), tvar1.clone().into()), record::Field::new( EMPTY_SPAN, "cons2-contravariant".into(), tvar2.clone().into(), ), ]), ); let float_instance1_poly: ty::Ref = record::Instance::new( cons1, TyArgs::new( HashMap::new(), std::iter::once((tvar1.clone(), Ty::Float.into())).collect(), ), ) .into(); let float_true_instance2_poly: ty::Ref = record::Instance::new( cons2.clone(), TyArgs::new( HashMap::new(), std::iter::once((tvar1.clone(), Ty::Float.into())) .chain(std::iter::once((tvar2.clone(), Ty::LitBool(true).into()))) .collect(), ), ) .into(); let int_true_instance2_poly: ty::Ref = record::Instance::new( cons2.clone(), TyArgs::new( HashMap::new(), std::iter::once((tvar1.clone(), Ty::Int.into())) .chain(std::iter::once((tvar2.clone(), Ty::LitBool(true).into()))) .collect(), ), ) .into(); let int_bool_instance2_poly: ty::Ref = record::Instance::new( cons2.clone(), TyArgs::new( HashMap::new(), std::iter::once((tvar1.clone(), Ty::Int.into())) .chain(std::iter::once((tvar2.clone(), Ty::Bool.into()))) .collect(), ), ) .into(); let num_bool_instance2_poly: ty::Ref = record::Instance::new( cons2, TyArgs::new( HashMap::new(), std::iter::once((tvar1, Ty::Num.into())) .chain(std::iter::once((tvar2, Ty::Bool.into()))) .collect(), ), ) .into(); // Different record constructors assert_disjoint_poly(&float_instance1_poly, &float_true_instance2_poly); // Disjoint record instances assert_disjoint_poly(&float_true_instance2_poly, &int_bool_instance2_poly); // Intersectable record types assert_merged_poly( &int_bool_instance2_poly, &int_true_instance2_poly, &num_bool_instance2_poly, ) } } ================================================ FILE: compiler/ty/is_a.rs ================================================ use crate::ty; use crate::ty::list_iter::ListIterator; use crate::ty::purity; use crate::ty::purity::Purity; use crate::ty::record; use crate::ty::var_usage::Variance; use crate::ty::Ty; fn top_fun_is_a(sub_top_fun: &ty::TopFun, par_top_fun: &ty::TopFun) -> bool { purity_ref_is_a(sub_top_fun.purity(), par_top_fun.purity()) && ty_ref_is_a(sub_top_fun.ret(), par_top_fun.ret()) } fn list_is_a(sub_list: &ty::List, par_list: &ty::List) -> bool { if (sub_list.fixed().len() > par_list.fixed().len()) && !par_list.has_rest() { // sub is longer than par return false; } if sub_list.fixed().len() < par_list.fixed().len() { // sub is less specific due to less fixed types return false; } if !ty_ref_is_a(sub_list.rest(), par_list.rest()) { return false; } // Compare our fixed types. If the par fixed ends early we'll use the par rest. sub_list .fixed() .iter() .zip(ListIterator::new(par_list)) .all(|(sub, par)| ty_ref_is_a(sub, par)) } fn record_field_is_a(variance: Variance, is_a: &F, sub: &R, par: &R) -> bool where F: Fn(&R, &R) -> bool, { match variance { Variance::Covariant => is_a(sub, par), Variance::Contravariant => is_a(par, sub), Variance::Invariant => is_a(sub, par) && is_a(par, sub), } } fn record_instance_is_a( sub_instance: &record::Instance, par_instance: &record::Instance, ) -> bool { // Make sure they came from the same constructor and satisfy their params sub_instance.cons() == par_instance.cons() && sub_instance .cons() .poly_params() .iter() .all(|poly_param| match poly_param { record::PolyParam::PVar(variance, pvar) => record_field_is_a( *variance, &purity_ref_is_a, &sub_instance.ty_args().pvar_purities()[pvar], &par_instance.ty_args().pvar_purities()[pvar], ), record::PolyParam::TVar(variance, tvar) => record_field_is_a( *variance, &ty_ref_is_a, &sub_instance.ty_args().tvar_types()[tvar], &par_instance.ty_args().tvar_types()[tvar], ), record::PolyParam::Pure(_) | record::PolyParam::TFixed(_, _) => true, }) } fn monomorphic_fun_is_a(sub_fun: &ty::Fun, par_fun: &ty::Fun) -> bool { top_fun_is_a(sub_fun.top_fun(), par_fun.top_fun()) && // Note that parameters are contravariant list_is_a(par_fun.params(), sub_fun.params()) } fn fun_is_a(sub_fun: &ty::Fun, par_fun: &ty::Fun) -> bool { if sub_fun.has_polymorphic_vars() { let sub_mono = inst_polymorphic_fun(sub_fun, par_fun.top_fun()); monomorphic_fun_is_a(&sub_mono, par_fun) } else { monomorphic_fun_is_a(sub_fun, par_fun) } } fn ty_is_a( sub_ref: &ty::Ref, sub_ty: &Ty, parent_ref: &ty::Ref, parent_ty: &Ty, ) -> bool { if sub_ty == parent_ty { return true; } match (sub_ty, parent_ty) { // Union types (Ty::Union(sub_members), _) => sub_members .iter() .all(|sub_member| ty_ref_is_a(sub_member, parent_ref)), (_, Ty::Union(par_members)) => par_members .iter() .any(|par_member| ty_ref_is_a(sub_ref, par_member)), // Intersection types (_, Ty::Intersect(par_members)) => par_members .iter() .all(|par_member| ty_ref_is_a(sub_ref, par_member)), (Ty::Intersect(sub_members), _) => sub_members .iter() .any(|sub_member| ty_ref_is_a(sub_member, parent_ref)), // Any type (_, Ty::Any) => true, // Sym types (Ty::LitSym(_), Ty::Sym) => true, // Bool types (Ty::LitBool(_), Ty::Bool) => true, // Floats (Ty::Float, Ty::Num) => true, // Ints (Ty::Int, Ty::Num) => true, // Sets (Ty::Set(sub), Ty::Set(par)) => ty_ref_is_a(sub.as_ref(), par.as_ref()), // Maps (Ty::Map(sub_map), Ty::Map(par_map)) => { ty_ref_is_a(sub_map.key(), par_map.key()) && ty_ref_is_a(sub_map.value(), par_map.value()) } // Vector types (Ty::Vector(sub_members), Ty::Vector(par_members)) => { (sub_members.len() == par_members.len()) && sub_members .iter() .zip(par_members.iter()) .all(|(sub_member, par_member)| ty_ref_is_a(sub_member, par_member)) } (Ty::Vectorof(sub_member), Ty::Vectorof(par_member)) => { ty_ref_is_a(sub_member.as_ref(), par_member.as_ref()) } (Ty::Vector(sub_members), Ty::Vectorof(par_member)) => sub_members .iter() .all(|sub_member| ty_ref_is_a(sub_member, par_member)), // Functions (Ty::TopFun(sub_top_fun), Ty::TopFun(par_top_fun)) => { top_fun_is_a(sub_top_fun, par_top_fun) } (Ty::Fun(sub_fun), Ty::TopFun(par_top_fun)) => { if sub_fun.has_polymorphic_vars() { let sub_mono = inst_polymorphic_fun(sub_fun, par_top_fun); top_fun_is_a(sub_mono.top_fun(), par_top_fun) } else { top_fun_is_a(sub_fun.top_fun(), par_top_fun) } } (Ty::Fun(sub_fun), Ty::Fun(par_fun)) => fun_is_a(sub_fun, par_fun), // All predicate types (Ty::TyPred(_), Ty::TopFun(par_top_fun)) | (Ty::EqPred, Ty::TopFun(par_top_fun)) => { top_fun_is_a(&ty::TopFun::new_for_pred(), par_top_fun) } // Type predicate types (Ty::TyPred(_), Ty::Fun(par_fun)) => fun_is_a(&ty::Fun::new_for_ty_pred(), par_fun), // Equality predicate type (Ty::EqPred, Ty::Fun(par_fun)) => fun_is_a(&ty::Fun::new_for_eq_pred(), par_fun), // List types (Ty::List(sub_list), Ty::List(par_list)) => list_is_a(sub_list, par_list), // Record types (Ty::RecordClass(_), Ty::TopRecord) => true, (Ty::Record(_), Ty::TopRecord) => true, (Ty::Record(sub_instance), Ty::Record(par_instance)) => { record_instance_is_a(sub_instance, par_instance) } (Ty::RecordClass(sub_cons), Ty::RecordClass(par_cons)) => sub_cons == par_cons, (Ty::Record(sub_instance), Ty::RecordClass(par_cons)) => sub_instance.cons() == par_cons, (Ty::RecordClass(sub_cons), Ty::Record(par_instance)) => { // If the record class has no polymorphic params then it only has one instance sub_cons == par_instance.cons() && sub_cons.poly_params().is_empty() } _ => false, } } fn tvar_is_bounded_by(sub_tvar: &ty::TVarId, parent_tvar: &ty::TVarId) -> bool { if sub_tvar == parent_tvar { return true; } match &sub_tvar.bound { ty::Ref::Fixed(_) => false, ty::Ref::Var(tvar, _) => tvar_is_bounded_by(tvar, parent_tvar), } } fn purity_ref_is_a(sub: &purity::Ref, parent: &purity::Ref) -> bool { sub == &purity::Ref::Fixed(Purity::Pure) || sub == parent || parent == &purity::Ref::Fixed(Purity::Impure) } fn inst_polymorphic_fun(sub_fun: &ty::Fun, par_top_fun: &ty::TopFun) -> ty::Fun { let mut stx = ty::select::SelectCtx::new(sub_fun.pvars(), sub_fun.tvars()); stx.add_evidence(sub_fun.ret(), par_top_fun.ret()); stx.add_evidence_purity(sub_fun.purity(), par_top_fun.purity()); let pta = stx.into_poly_ty_args(); ty::subst::subst_poly_fun(&pta, sub_fun) } pub fn ty_ref_is_a(sub: &ty::Ref, parent: &ty::Ref) -> bool { if let ty::Ref::Var(parent_tvar, _) = parent { // Typically `parent_is_bound` makes the best result for a polymorphic parent `May`. // These are overrides for cases where they can be `Yes`. match sub { ty::Ref::Var(sub_tvar, _) => { // Are we either the same var our bounded by the same var? if tvar_is_bounded_by(sub_tvar, parent_tvar) { return true; } } ty::Ref::Fixed(Ty::Intersect(sub_members)) => { // Do we satisfy any of the members of the intersection? if sub_members .iter() .any(|sub_member| ty_ref_is_a(sub_member, parent)) { return true; } } _ => {} }; } let sub_ty = sub.resolve_to_ty(); if sub_ty.is_never() { // (U) is a definite subtype of every type, regardless if the parent is bound. This is // important as (U) is used as a placeholder for parameters with unknown type. More // generally, it's the contravariant equivalent of Any. return true; } if let ty::Ref::Var(_, _) = parent { return false; } let parent_ty = parent.resolve_to_ty(); ty_is_a(sub, sub_ty, parent, parent_ty) } /// Determines if two type references are equivalent /// /// Our type system has no canonical union order, allows record class types with only a single /// possible instance, etc. This makes normal `PartialEq` unreliable for determining if the type /// system would treat two types identically. This function is more expensive but can reliably /// detect equivalent types with different representations. pub fn ty_refs_equivalent(ty_ref1: &ty::Ref, ty_ref2: &ty::Ref) -> bool { ty_ref_is_a(ty_ref1, ty_ref2) && ty_ref_is_a(ty_ref2, ty_ref1) } /// Determines if two purity refs are equivalent /// /// This is for symmetry with [`ty_refs_equivalent`] pub fn purity_refs_equivalent(purity_ref1: &purity::Ref, purity_ref2: &purity::Ref) -> bool { purity_ref1 == purity_ref2 } #[cfg(test)] mod test { use super::*; use crate::hir::{poly_for_str, tvar_bounded_by}; use crate::source::EMPTY_SPAN; #[test] fn sym_types() { let foo_sym = poly_for_str("'foo"); let bar_sym = poly_for_str("'bar"); let any_sym = poly_for_str("Sym"); let any_int = poly_for_str("Int"); assert!(ty_ref_is_a(&foo_sym, &foo_sym)); assert!(!ty_ref_is_a(&foo_sym, &bar_sym)); assert!(ty_ref_is_a(&foo_sym, &any_sym)); assert!(!ty_ref_is_a(&any_sym, &foo_sym)); assert!(!ty_ref_is_a(&any_sym, &any_int)); assert!(!ty_ref_is_a(&any_int, &any_sym)); } #[test] fn set_types() { let foo_set = poly_for_str("(Setof 'foo)"); let bar_set = poly_for_str("(Setof 'bar)"); let sym_set = poly_for_str("(Setof Sym)"); assert!(ty_ref_is_a(&foo_set, &foo_set)); assert!(!ty_ref_is_a(&foo_set, &bar_set)); assert!(ty_ref_is_a(&foo_set, &sym_set)); assert!(!ty_ref_is_a(&sym_set, &foo_set)); } #[test] fn map_types() { let foo_sym = poly_for_str("'foo"); let any_sym = poly_for_str("Sym"); let any_int = poly_for_str("Int"); let int_to_any_sym = ty::Map::new(any_int.clone(), any_sym.clone()).into(); let int_to_foo_sym = ty::Map::new(any_int, foo_sym).into(); let any_sym_to_any_sym = ty::Map::new(any_sym.clone(), any_sym).into(); assert!(ty_ref_is_a(&int_to_foo_sym, &int_to_any_sym)); assert!(!ty_ref_is_a(&int_to_any_sym, &int_to_foo_sym)); assert!(!ty_ref_is_a(&int_to_any_sym, &any_sym_to_any_sym)); } #[test] fn union_types() { let foo_sym = poly_for_str("'foo"); let baz_sym = poly_for_str("'baz"); let foo_bar_union = poly_for_str("(RawU 'foo 'bar)"); let bar_baz_union = poly_for_str("(RawU 'bar 'baz)"); let foo_bar_baz_union = poly_for_str("(RawU 'foo 'bar 'baz)"); let never = poly_for_str("(RawU)"); assert!(ty_ref_is_a(&foo_sym, &foo_bar_union)); assert!(!ty_ref_is_a(&baz_sym, &foo_bar_union)); assert!(!ty_ref_is_a(&baz_sym, &never)); assert!(!ty_ref_is_a(&foo_bar_union, &foo_sym)); assert!(!ty_ref_is_a(&foo_bar_union, &baz_sym)); assert!(ty_ref_is_a(&never, &foo_sym)); assert!(!ty_ref_is_a(&foo_bar_union, &bar_baz_union)); assert!(ty_ref_is_a(&foo_bar_union, &foo_bar_union)); assert!(ty_ref_is_a(&never, &foo_bar_union)); assert!(ty_ref_is_a(&foo_bar_union, &foo_bar_baz_union)); } #[test] fn intersect_types() { let ptype1 = tvar_bounded_by(Ty::Any.into()); let ptype2 = tvar_bounded_by(Ty::Any.into()); let any_sym = poly_for_str("Sym"); let foo_sym = poly_for_str("'foo"); let sym_poly1_intersection = Ty::Intersect(Box::new([ptype1.clone(), any_sym.clone()])).into(); let sym_poly2_intersection = Ty::Intersect(Box::new([ptype2.clone(), any_sym.clone()])).into(); let sym_poly1_poly2_intersection = Ty::Intersect(Box::new([ptype1.clone(), ptype2, any_sym.clone()])).into(); // `Sym` might not be `Poly` assert!(!ty_ref_is_a(&any_sym, &sym_poly1_intersection)); // Our intersection must be both `Sym` and `Poly assert!(ty_ref_is_a(&sym_poly1_intersection, &any_sym)); assert!(ty_ref_is_a(&sym_poly1_intersection, &ptype1)); // However, it might not be a 'foo assert!(!ty_ref_is_a(&sym_poly1_intersection, &foo_sym)); // A more specific intersection must satisfy a less specific one assert!(ty_ref_is_a( &sym_poly1_poly2_intersection, &sym_poly1_intersection )); // A less specific intersection may satisfy a more specific one assert!(!ty_ref_is_a( &sym_poly1_intersection, &sym_poly1_poly2_intersection, )); // Partially disjoint intersections may satisfy each other assert!(!ty_ref_is_a( &sym_poly1_intersection, &sym_poly2_intersection )); } #[test] fn any_and_never_types() { let any = poly_for_str("Any"); let never = Ty::never().into(); let foo_sym = poly_for_str("'foo"); assert!(ty_ref_is_a(&foo_sym, &any)); assert!(!ty_ref_is_a(&any, &foo_sym)); assert!(ty_ref_is_a(&never, &any)); assert!(ty_ref_is_a(&never, &never)); assert!(!ty_ref_is_a(&any, &never)); } #[test] fn list_types() { let empty_list = poly_for_str("()"); let listof_any = poly_for_str("(List & Any)"); let listof_int = poly_for_str("(List & Int)"); let two_ints_list = poly_for_str("(List Int Int)"); let three_ints_list = poly_for_str("(List Int Int Int)"); let at_least_one_int_list = poly_for_str("(List Int & Int)"); assert!(ty_ref_is_a(&empty_list, &listof_any)); assert!(!ty_ref_is_a(&listof_any, &empty_list)); assert!(ty_ref_is_a(&listof_int, &listof_any)); assert!(!ty_ref_is_a(&listof_any, &listof_int)); assert!(ty_ref_is_a(&two_ints_list, &listof_int)); assert!(!ty_ref_is_a(&listof_int, &two_ints_list)); assert!(ty_ref_is_a(&two_ints_list, &listof_any)); assert!(!ty_ref_is_a(&two_ints_list, &three_ints_list)); assert!(!ty_ref_is_a(&three_ints_list, &two_ints_list)); assert!(ty_ref_is_a(&at_least_one_int_list, &listof_int)); assert!(!ty_ref_is_a(&listof_int, &at_least_one_int_list)); } #[test] fn vec_types() { let vecof_any = poly_for_str("(Vectorof Any)"); let vecof_int = poly_for_str("(Vectorof Int)"); let two_ints_vec = poly_for_str("(Vector Int Int)"); let three_ints_vec = poly_for_str("(Vector Int Int Int)"); assert!(ty_ref_is_a(&vecof_int, &vecof_any)); assert!(!ty_ref_is_a(&vecof_any, &vecof_int)); assert!(ty_ref_is_a(&two_ints_vec, &vecof_int)); assert!(!ty_ref_is_a(&vecof_int, &two_ints_vec)); assert!(ty_ref_is_a(&two_ints_vec, &vecof_any)); assert!(!ty_ref_is_a(&two_ints_vec, &three_ints_vec)); assert!(!ty_ref_is_a(&three_ints_vec, &two_ints_vec)); } #[test] fn num_types() { let int = poly_for_str("Int"); let float = poly_for_str("Float"); let num = poly_for_str("Num"); assert!(ty_ref_is_a(&int, &num)); assert!(ty_ref_is_a(&float, &num)); assert!(ty_ref_is_a(&num, &num)); assert!(!ty_ref_is_a(&float, &int)); assert!(!ty_ref_is_a(&num, &int)); assert!(!ty_ref_is_a(&num, &float)); } #[test] fn fun_types() { let impure_any_to_sym = poly_for_str("(Any ->! Sym)"); let impure_sym_to_any = poly_for_str("(Sym ->! Any)"); let impure_sym_to_sym = poly_for_str("(Sym ->! Sym)"); let pure_sym_to_sym = poly_for_str("(Sym -> Sym)"); assert!(ty_ref_is_a(&impure_sym_to_sym, &impure_sym_to_any)); assert!(ty_ref_is_a(&impure_any_to_sym, &impure_sym_to_sym)); assert!(!ty_ref_is_a(&impure_sym_to_any, &impure_sym_to_sym)); assert!(ty_ref_is_a(&pure_sym_to_sym, &impure_sym_to_sym)); assert!(!ty_ref_is_a(&impure_sym_to_sym, &pure_sym_to_sym)); } #[test] fn ty_pred_types() { let sym_ty_pred = poly_for_str("sym?"); let str_ty_pred = poly_for_str("str?"); let general_ty_pred = poly_for_str("(Any -> Bool)"); let pred_top_fun = poly_for_str("(... -> Bool)"); // Type predicates always equal themselves assert!(ty_ref_is_a(&sym_ty_pred, &sym_ty_pred)); // Type predicates never equal other type predicates assert!(!ty_ref_is_a(&sym_ty_pred, &str_ty_pred)); assert!(!ty_ref_is_a(&str_ty_pred, &sym_ty_pred)); // Type predicates are a subtype of (Any -> Bool) assert!(ty_ref_is_a(&sym_ty_pred, &general_ty_pred)); assert!(!ty_ref_is_a(&general_ty_pred, &sym_ty_pred)); // Type predicates are a subtype of (... -> Bool) assert!(ty_ref_is_a(&sym_ty_pred, &pred_top_fun)); assert!(!ty_ref_is_a(&pred_top_fun, &sym_ty_pred)); } #[test] fn eq_pred_type() { let eq_pred = poly_for_str("="); let general_eq_pred = poly_for_str("(Any Any -> Bool)"); let pred_top_fun = poly_for_str("(... -> Bool)"); // Equality predicate equals itself assert!(ty_ref_is_a(&eq_pred, &eq_pred)); // Equality predicate is a subtype of (Any Any -> Bool) assert!(ty_ref_is_a(&eq_pred, &general_eq_pred)); assert!(!ty_ref_is_a(&general_eq_pred, &eq_pred)); // Equality predicate is a subtype of (... -> Bool) assert!(ty_ref_is_a(&eq_pred, &pred_top_fun)); assert!(!ty_ref_is_a(&pred_top_fun, &eq_pred)); } #[test] fn bool_types() { let true_type = poly_for_str("true"); let false_type = poly_for_str("false"); let bool_type = poly_for_str("Bool"); assert!(ty_ref_is_a(&true_type, &bool_type)); assert!(!ty_ref_is_a(&bool_type, &true_type)); assert!(!ty_ref_is_a(&false_type, &true_type)); } #[test] fn poly_bool_types() { let true_type = poly_for_str("true"); let false_type = poly_for_str("false"); let bool_type = poly_for_str("Bool"); assert!(ty_ref_is_a(&true_type, &bool_type)); assert!(!ty_ref_is_a(&bool_type, &true_type)); assert!(!ty_ref_is_a(&false_type, &true_type)); } #[test] fn unbounded_poly_vars() { let ptype1 = tvar_bounded_by(Ty::Any.into()); let ptype2 = tvar_bounded_by(Ty::Any.into()); let poly_bool = poly_for_str("Bool"); assert!(ty_ref_is_a(&ptype1, &ptype1)); assert!(!ty_ref_is_a(&ptype1, &ptype2)); assert!(!ty_ref_is_a(&ptype1, &poly_bool)); } #[test] fn bounded_poly_vars() { let ptype1_sym = tvar_bounded_by(Ty::Sym.into()); let ptype2_str = tvar_bounded_by(Ty::Str.into()); let poly_foo_sym = poly_for_str("'foo"); // A type var always satisfies itself assert!(ty_ref_is_a(&ptype1_sym, &ptype1_sym)); // The bounds of these vars are disjoint assert!(!ty_ref_is_a(&ptype1_sym, &ptype2_str)); // The type var may satisfy a more specific bound assert!(!ty_ref_is_a(&ptype1_sym, &poly_foo_sym)); // A sub never satisfies a type var with a disjoint bound assert!(!ty_ref_is_a(&poly_foo_sym, &ptype2_str)); // The sub has a fixed type while the parent has a poly type. We can't ensure that 'foo // satisfies all possible Sym subtypes (such as 'bar) assert!(!ty_ref_is_a(&poly_foo_sym, &ptype1_sym)); } #[test] fn related_poly_bounds() { let ptype1_unbounded = tvar_bounded_by(Ty::Any.into()); let ptype2_bounded_by_1 = tvar_bounded_by(ptype1_unbounded.clone()); let ptype3_bounded_by_2 = tvar_bounded_by(ptype2_bounded_by_1.clone()); // Direct bounding assert!(ty_ref_is_a(&ptype2_bounded_by_1, &ptype1_unbounded)); assert!(ty_ref_is_a(&ptype3_bounded_by_2, &ptype2_bounded_by_1)); // Commutative bounding assert!(ty_ref_is_a(&ptype3_bounded_by_2, &ptype1_unbounded)); // Inverse bounding relationship may not satisfy - the bounded type can have arbitrary // subtypes assert!(!ty_ref_is_a(&ptype1_unbounded, &ptype2_bounded_by_1)); } #[test] fn polymorphic_funs() { let pidentity_fun = poly_for_str("(All #{A} A -> A)"); let pidentity_sym_fun = poly_for_str("(All #{[A Sym]} A -> A)"); let pidentity_impure_string_fun = poly_for_str("(All #{[A Str]} A ->! A)"); // All functions should have the top function type let top_fun = poly_for_str("(... ->! Any)"); assert!(ty_ref_is_a(&pidentity_fun, &top_fun)); assert!(ty_ref_is_a(&pidentity_sym_fun, &top_fun)); assert!(ty_ref_is_a(&pidentity_impure_string_fun, &top_fun)); // We should take in to account purity let top_pure_fun = poly_for_str("(... -> Any)"); assert!(ty_ref_is_a(&pidentity_fun, &top_pure_fun)); assert!(!ty_ref_is_a(&pidentity_impure_string_fun, &top_pure_fun)); // All functions should have the top one param function type except panys let top_one_param_fun = poly_for_str("((RawU) ->! Any)"); assert!(ty_ref_is_a(&pidentity_fun, &top_one_param_fun)); assert!(ty_ref_is_a(&pidentity_sym_fun, &top_one_param_fun)); assert!(ty_ref_is_a( &pidentity_impure_string_fun, &top_one_param_fun )); // The identity function is (Any -> Any) let any_to_any_fun = poly_for_str("(Any ->! Any)"); assert!(ty_ref_is_a(&pidentity_fun, &any_to_any_fun)); // However, (Any -> Any) is not the identity function because it can take mismatched types // (e.g. Int -> Float) assert!(!ty_ref_is_a(&any_to_any_fun, &pidentity_fun)); // The identity function is (true -> true) let true_to_true_fun = poly_for_str("(true ->! true)"); assert!(ty_ref_is_a(&pidentity_fun, &true_to_true_fun)); assert!(!ty_ref_is_a(&true_to_true_fun, &pidentity_fun)); // The identity function is not (true -> false) let true_to_true_fun = poly_for_str("(true ->! false)"); assert!(!ty_ref_is_a(&pidentity_fun, &true_to_true_fun)); assert!(!ty_ref_is_a(&true_to_true_fun, &pidentity_fun)); // The symbol function satisfies ((U) -> Sym) as all of its returns must be bounded by // that let top_to_sym_fun = poly_for_str("(... ->! Sym)"); assert!(ty_ref_is_a(&pidentity_fun, &top_to_sym_fun)); assert!(ty_ref_is_a(&pidentity_sym_fun, &top_to_sym_fun)); // The identity string function satisfies (Str -> Str) let str_to_str_fun = poly_for_str("(Str ->! Str)"); assert!(ty_ref_is_a(&pidentity_fun, &str_to_str_fun)); assert!(!ty_ref_is_a(&pidentity_sym_fun, &str_to_str_fun)); assert!(ty_ref_is_a(&pidentity_impure_string_fun, &str_to_str_fun)); // The polymorphic identity string function satisfies (... ->! Str) let top_impure_str_fun = poly_for_str("(... ->! Str)"); assert!(ty_ref_is_a( &pidentity_impure_string_fun, &top_impure_str_fun )); // As does the unbounded identity function assert!(ty_ref_is_a(&pidentity_fun, &top_impure_str_fun)); // But not the polymorphic symbol function assert!(!ty_ref_is_a(&pidentity_sym_fun, &top_impure_str_fun)); } #[test] fn polymorphic_purity_funs() { let poly_purity_fun = poly_for_str("(All #{[->_ ->!]} (->_ Str) ->_ Str)"); // This is the upper bound of `poly_purity_fun let mono_purity_fun = poly_for_str("((->! Str) -> Str)"); let top_to_str_fun = poly_for_str("(... -> Str)"); assert!(ty_ref_is_a(&poly_purity_fun, &top_to_str_fun)); assert!(ty_ref_is_a(&mono_purity_fun, &poly_purity_fun)); assert!(!ty_ref_is_a(&top_to_str_fun, &poly_purity_fun)); } #[test] fn distinct_record_cons_instances() { use crate::ty::ty_args::TyArgs; let cons1 = record::Cons::new( EMPTY_SPAN, "cons1".into(), "cons1?".into(), None, Box::new([]), ); let cons2 = record::Cons::new( EMPTY_SPAN, "cons2".into(), "cons2?".into(), None, Box::new([]), ); let instance1_poly: ty::Ref = record::Instance::new(cons1, TyArgs::empty()).into(); let instance2_poly: ty::Ref = record::Instance::new(cons2, TyArgs::empty()).into(); // Different record constructors assert!(!ty_ref_is_a(&instance1_poly, &instance2_poly)); assert!(!ty_ref_is_a(&instance2_poly, &instance1_poly)); // They're both top records assert!(ty_ref_is_a(&instance1_poly, &Ty::TopRecord.into())); assert!(ty_ref_is_a(&instance2_poly, &Ty::TopRecord.into())); } #[test] fn same_cons_record_instances() { use crate::ty::ty_args::TyArgs; use std::collections::HashMap; let tvar1 = ty::TVar::new(EMPTY_SPAN, "tvar1".into(), Ty::Any.into()); let tvar2 = ty::TVar::new(EMPTY_SPAN, "tvar2".into(), Ty::Any.into()); let tvar3 = ty::TVar::new(EMPTY_SPAN, "tvar3".into(), Ty::Any.into()); let cons = record::Cons::new( EMPTY_SPAN, "cons".into(), "cons?".into(), Some(Box::new([ record::PolyParam::TVar(Variance::Covariant, tvar1.clone()), record::PolyParam::TVar(Variance::Contravariant, tvar2.clone()), record::PolyParam::TVar(Variance::Invariant, tvar3.clone()), ])), Box::new([ record::Field::new(EMPTY_SPAN, "covariant".into(), tvar1.clone().into()), record::Field::new(EMPTY_SPAN, "contravariant".into(), tvar2.clone().into()), record::Field::new(EMPTY_SPAN, "invariant".into(), tvar3.clone().into()), ]), ); let num_num_num_instance_poly: ty::Ref = record::Instance::new( cons.clone(), TyArgs::new( HashMap::new(), std::iter::once((tvar1.clone(), Ty::Num.into())) .chain(std::iter::once((tvar2.clone(), Ty::Num.into()))) .chain(std::iter::once((tvar3.clone(), Ty::Num.into()))) .collect(), ), ) .into(); let int_any_num_instance_poly: ty::Ref = record::Instance::new( cons, TyArgs::new( HashMap::new(), std::iter::once((tvar1, Ty::Int.into())) .chain(std::iter::once((tvar2, Ty::Any.into()))) .chain(std::iter::once((tvar3, Ty::Num.into()))) .collect(), ), ) .into(); assert!(ty_ref_is_a( &int_any_num_instance_poly, &num_num_num_instance_poly )); assert!(!ty_ref_is_a( &num_num_num_instance_poly, &int_any_num_instance_poly )); assert!(ty_ref_is_a( &num_num_num_instance_poly, &num_num_num_instance_poly )); assert!(ty_ref_is_a( &int_any_num_instance_poly, &int_any_num_instance_poly )); } } ================================================ FILE: compiler/ty/list_iter.rs ================================================ use crate::ty; use crate::ty::Ty; /// Iterates through the member types of a list #[derive(Clone)] pub struct ListIterator<'list, M: ty::Pm> { fixed: &'list [ty::Ref], rest: &'list ty::Ref, } impl<'list, M: ty::Pm> ListIterator<'list, M> { pub fn new(list: &'list ty::List) -> ListIterator<'list, M> { ListIterator { fixed: list.fixed(), rest: list.rest(), } } pub fn try_new_from_ty_ref(ty_ref: &'list ty::Ref) -> Option> { match ty_ref.try_to_fixed() { Some(Ty::List(list)) => Some(Self::new(list)), _ => None, } } pub fn fixed_len(&self) -> usize { self.fixed.len() } pub fn has_rest(&self) -> bool { !self.rest.is_never() } pub fn tail_type(self) -> ty::List { ty::List::new(self.fixed.to_vec().into_boxed_slice(), self.rest.clone()) } pub fn collect_rest(self) -> ty::Ref { use std::iter; if self.fixed.is_empty() { self.rest.clone() } else { ty::unify::unify_ty_ref_iter(self.fixed.iter().chain(iter::once(self.rest)).cloned()) } } } impl<'list, M: ty::Pm> Iterator for ListIterator<'list, M> { type Item = &'list ty::Ref; fn next(&mut self) -> Option<&'list ty::Ref> { if self.fixed.is_empty() { if self.rest.is_never() { None } else { Some(self.rest) } } else { let next = self.fixed.first(); self.fixed = &self.fixed[1..]; next } } } ================================================ FILE: compiler/ty/mod.rs ================================================ pub mod conv_abi; pub mod datum; pub mod intersect; pub mod is_a; pub mod list_iter; pub mod pred; pub mod props; pub mod purity; pub mod record; pub mod select; pub mod subst; pub mod subtract; pub mod ty_args; pub mod unify; pub mod var_usage; use std::fmt; use std::ops::Range; use arret_syntax::datum::DataStr; use arret_syntax::span::Span; use crate::id_type::ArcId; #[derive(PartialEq, Debug, Clone)] pub struct TVar { span: Span, source_name: DataStr, bound: Ref, } pub type TVarId = ArcId; pub type TVars = Vec; impl TVar { pub fn new(span: Span, source_name: DataStr, bound: Ref) -> TVarId { TVarId::new(TVar { span, source_name, bound, }) } pub fn span(&self) -> Span { self.span } pub fn source_name(&self) -> &str { &self.source_name } pub fn bound(&self) -> &Ref { &self.bound } } /// Marker that determines if type variables are allowed within a type pub trait Pm: PartialEq + Clone + Copy + Sized + fmt::Debug { /// Resolves a possibly variable type to its bound fn resolve_ref_to_ty(ty_ref: &Ref) -> &Ty; } #[derive(Clone, Copy, PartialEq, Debug)] pub enum Mono {} impl Pm for Mono { fn resolve_ref_to_ty(ty_ref: &Ref) -> &Ty { match ty_ref { Ref::Fixed(ty) => ty, Ref::Var(_, _) => unreachable!(), } } } #[derive(Clone, Copy, PartialEq, Debug)] pub struct Poly {} impl Pm for Poly { fn resolve_ref_to_ty(ty_ref: &Ref) -> &Ty { match ty_ref { Ref::Fixed(ty) => ty, Ref::Var(tvar, _) => Self::resolve_ref_to_ty(tvar.bound()), } } } #[derive(PartialEq, Debug, Clone)] pub enum Ref { Var(TVarId, M), Fixed(Ty), } impl Ref { /// Tries to convert the TyRef to a fixed Ty pub fn try_to_fixed(&self) -> Option<&Ty> { match self { Ref::Var(_, _) => None, Ref::Fixed(ty) => Some(ty), } } /// Constructs a fixed TyRef from a union of the passed vector `members` /// /// `members` should already be unified by the type system; this cannot be used to construct /// arbitrary valid unions. pub fn from_vec(mut members: Vec) -> Self { if members.len() == 1 { members.pop().unwrap() } else { Ty::Union(members.into_boxed_slice()).into() } } /// Combination of find + map looking for a particular fixed type /// /// This is identical to `try_to_fixed().and_then(pred)` except it iterates inside unions. pub fn find_member<'a, F, T>(&'a self, f: F) -> Option where F: Fn(&'a Ty) -> Option + Copy, T: 'a, { match self.try_to_fixed() { Some(Ty::Union(members)) => members .iter() .filter_map(|member| member.find_member(f)) .next(), Some(other) => f(other), None => None, } } pub fn resolve_to_ty(&self) -> &Ty { M::resolve_ref_to_ty(self) } pub fn is_never(&self) -> bool { self.try_to_fixed().map_or(false, |fixed| fixed.is_never()) } } impl Ref { pub fn as_ty(&self) -> &Ty { match self { Ref::Fixed(ty) => ty, Ref::Var(_, _) => { unreachable!(); } } } pub fn into_ty(self) -> Ty { match self { Ref::Fixed(ty) => ty, Ref::Var(_, _) => { unreachable!(); } } } } impl From> for Ref { fn from(ty: Ty) -> Self { Ref::Fixed(ty) } } impl From for Ref { fn from(tvar: TVarId) -> Self { Ref::Var(tvar, Poly {}) } } #[derive(PartialEq, Debug, Clone)] pub enum Ty { Any, Bool, Char, Float, Map(Box>), Int, Num, LitBool(bool), LitSym(DataStr), Set(Box>), Str, Sym, Union(Box<[Ref]>), Intersect(Box<[Ref]>), // Function types TopFun(Box), Fun(Box), TyPred(pred::TestTy), EqPred, // Vector types Vector(Box<[Ref]>), Vectorof(Box>), // List types List(List), // Record types TopRecord, RecordClass(record::ConsId), Record(Box>), } impl Ty { /// Returns the canonical unit type pub fn unit() -> Ty { List::empty().into() } /// Returns the canonical never type pub fn never() -> Ty { Ty::Union(Box::new([])) } /// Returns if this is the never type pub fn is_never(&self) -> bool { self == &Ty::Union(Box::new([])) } } #[derive(PartialEq, Debug, Clone)] pub struct Map { key: Ref, value: Ref, } impl Map { pub fn new(key: Ref, value: Ref) -> Map { Map { key, value } } pub fn key(&self) -> &Ref { &self.key } pub fn value(&self) -> &Ref { &self.value } } impl From> for Ty { fn from(map: Map) -> Self { Ty::Map(Box::new(map)) } } impl From> for Ref { fn from(map: Map) -> Self { Ref::Fixed(Ty::Map(Box::new(map))) } } #[derive(PartialEq, Debug, Clone)] pub struct List { fixed: Box<[Ref]>, rest: Box>, } impl List { /// Creates a list with the given fixed member types and a uniform tail member type pub fn new(fixed: Box<[Ref]>, rest: Ref) -> List { List { fixed, rest: Box::new(rest), } } /// Creates a list of zero or more members with a uniform type pub fn new_uniform(rest: Ref) -> List { List { fixed: Box::new([]), rest: Box::new(rest), } } /// Creates a fixed sized list with the given member types pub fn new_tuple(fixed: Box<[Ref]>) -> List { List { fixed, rest: Box::new(Ty::never().into()), } } /// Creates an empty list pub fn empty() -> List { List::new_tuple(Box::new([])) } pub fn fixed(&self) -> &[Ref] { &self.fixed } /// Returns the member type of our uniform tail /// /// This will be [`Ty::never()`] if the list has no tail. pub fn rest(&self) -> &Ref { self.rest.as_ref() } pub fn size_range(&self) -> Range { if self.rest.is_never() { self.fixed.len()..self.fixed.len() } else { self.fixed.len()..usize::max_value() } } pub fn has_disjoint_arity(&self, other: &Self) -> bool { let range1 = self.size_range(); let range2 = other.size_range(); range2.start > range1.end || range2.end < range1.start } /// Return true is the list is empty pub fn is_empty(&self) -> bool { self.fixed.is_empty() && self.rest.is_never() } /// Returns true if the list has a uniform tail of zero or more members pub fn has_rest(&self) -> bool { !self.rest.is_never() } } impl From> for Ty { fn from(list: List) -> Self { Ty::List(list) } } impl From> for Ref { fn from(list: List) -> Self { Ref::Fixed(Ty::List(list)) } } #[derive(PartialEq, Debug, Clone)] pub struct TopFun { purity: purity::Ref, ret: Ref, } impl TopFun { /// Returns a top function type pub fn new(purity: purity::Ref, ret: Ref) -> TopFun { TopFun { purity, ret } } /// Returns the `Fun` top type for all predicate functions pub fn new_for_pred() -> TopFun { Self::new(purity::Purity::Pure.into(), Ty::Bool.into()) } pub fn purity(&self) -> &purity::Ref { &self.purity } pub fn ret(&self) -> &Ref { &self.ret } } impl From for Ty { fn from(top_fun: TopFun) -> Self { Ty::TopFun(Box::new(top_fun)) } } impl From for Ref { fn from(top_fun: TopFun) -> Self { Ref::Fixed(Ty::TopFun(Box::new(top_fun))) } } #[derive(PartialEq, Debug, Clone)] pub struct Fun { pvars: purity::PVars, tvars: TVars, top_fun: TopFun, params: List, } impl Fun { pub fn new(pvars: purity::PVars, tvars: TVars, top_fun: TopFun, params: List) -> Fun { Fun { pvars, tvars, top_fun, params, } } /// Creates a new function without polymorphic variables pub fn new_mono(params: List, purity: purity::Ref, ret: Ref) -> Fun { Fun { pvars: purity::PVars::new(), tvars: TVars::new(), top_fun: TopFun::new(purity, ret), params, } } /// Returns the `Fun` type for the `(main!)` function pub fn new_for_main() -> Fun { Self::new_mono( List::empty(), purity::Purity::Impure.into(), Ty::unit().into(), ) } /// Returns the `Fun` supertype for all type predicate functions /// /// This is the type `(Any -> Bool)`. It captures the signature of the type predicates; however, /// it does not support occurrence typing. pub fn new_for_ty_pred() -> Fun { Self::new( purity::PVars::new(), TVars::new(), TopFun::new_for_pred(), List::new_tuple(Box::new([Ty::Any.into()])), ) } /// Returns the `Fun` supertype for the equality predicate /// /// This is the type `(Any Any -> Bool)`. It captures the signature of the equality predicate; /// however, it does not support occurrence typing. pub fn new_for_eq_pred() -> Fun { Self::new( purity::PVars::new(), TVars::new(), TopFun::new_for_pred(), List::new_tuple(Box::new([Ty::Any.into(), Ty::Any.into()])), ) } pub fn pvars(&self) -> &[purity::PVarId] { &self.pvars } pub fn tvars(&self) -> &[TVarId] { &self.tvars } pub fn top_fun(&self) -> &TopFun { &self.top_fun } pub fn purity(&self) -> &purity::Ref { &self.top_fun.purity } pub fn params(&self) -> &List { &self.params } pub fn ret(&self) -> &Ref { &self.top_fun.ret } pub fn has_polymorphic_vars(&self) -> bool { !self.pvars.is_empty() || !self.tvars.is_empty() } pub fn with_polymorphic_vars(self, pvars: purity::PVars, tvars: TVars) -> Fun { Fun { pvars, tvars, ..self } } } impl From for Ty { fn from(fun: Fun) -> Self { Ty::Fun(Box::new(fun)) } } impl From for Ref { fn from(fun: Fun) -> Self { Ref::Fixed(Ty::Fun(Box::new(fun))) } } ================================================ FILE: compiler/ty/pred.rs ================================================ use std::fmt; use crate::ty; use crate::ty::purity::Purity; use crate::ty::record; use crate::ty::Ty; #[derive(PartialEq, Eq, Debug, Clone, Hash)] pub enum TestTy { Sym, Str, Bool, Num, Int, Float, Char, List, Vector, Set, Map, Fun, Nil, TopRecord, RecordClass(record::ConsId), } impl TestTy { pub fn match_subject_ref(&self, ty_ref: &ty::Ref) -> Option { let resolved_ty = ty_ref.resolve_to_ty(); match resolved_ty { Ty::Any => None, Ty::Sym | Ty::LitSym(_) => Some(self == &TestTy::Sym), Ty::Bool | Ty::LitBool(_) => Some(self == &TestTy::Bool), Ty::Char => Some(self == &TestTy::Char), Ty::Float => Some(self == &TestTy::Float || self == &TestTy::Num), Ty::Map(_) => Some(self == &TestTy::Map), Ty::Int => Some(self == &TestTy::Int || self == &TestTy::Num), Ty::Num => match self { TestTy::Num => Some(true), TestTy::Int | TestTy::Float => None, _ => Some(false), }, Ty::Set(_) => Some(self == &TestTy::Set), Ty::Str => Some(self == &TestTy::Str), Ty::Fun(_) | Ty::TopFun(_) | Ty::TyPred(_) | Ty::EqPred => Some(self == &TestTy::Fun), Ty::List(list) => match self { TestTy::Nil => { if list.is_empty() { Some(true) } else if list.fixed().is_empty() { None } else { Some(false) } } TestTy::List => Some(true), _ => Some(false), }, Ty::Vector(_) | Ty::Vectorof(_) => Some(self == &TestTy::Vector), Ty::TopRecord => match self { TestTy::TopRecord => Some(true), TestTy::RecordClass(_) => None, _ => Some(false), }, Ty::RecordClass(subject_cons) => match self { TestTy::TopRecord => Some(true), TestTy::RecordClass(test_cons) => Some(test_cons == subject_cons), _ => Some(false), }, Ty::Record(instance) => match self { TestTy::TopRecord => Some(true), TestTy::RecordClass(test_cons) => Some(instance.cons() == test_cons), _ => Some(false), }, Ty::Union(members) => { let results: Vec> = members .iter() .map(|member| self.match_subject_ref(member)) .collect(); if results.contains(&None) { None } else if !results.contains(&Some(false)) { Some(true) } else if !results.contains(&Some(true)) { Some(false) } else { None } } Ty::Intersect(members) => { let results: Vec> = members .iter() .map(|member| self.match_subject_ref(member)) .collect(); if results.contains(&Some(true)) { Some(true) } else if results.contains(&None) { None } else { Some(false) } } } } pub fn to_ty(&self) -> Ty { use crate::ty::ty_args::TyArgs; match self { TestTy::Sym => Ty::Sym, TestTy::Str => Ty::Str, TestTy::Bool => Ty::Bool, TestTy::Num => Ty::Num, TestTy::Int => Ty::Int, TestTy::Float => Ty::Float, TestTy::Char => Ty::Char, TestTy::List => ty::List::new_uniform(Ty::Any.into()).into(), TestTy::Vector => Ty::Vectorof(Box::new(Ty::Any.into())), TestTy::Set => Ty::Set(Box::new(Ty::Any.into())), TestTy::Map => ty::Map::new(Ty::Any.into(), Ty::Any.into()).into(), TestTy::Fun => ty::TopFun::new(Purity::Impure.into(), Ty::Any.into()).into(), TestTy::Nil => ty::List::empty().into(), TestTy::TopRecord => Ty::TopRecord, TestTy::RecordClass(cons) => { if cons.poly_params().is_empty() { // There's a single instance of this record; we can return the instance type. // Instance types can be used in more situations than top types. record::Instance::new(cons.clone(), TyArgs::empty()).into() } else { Ty::RecordClass(cons.clone()) } } } } } impl fmt::Display for TestTy { fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { match self { TestTy::Str => write!(formatter, "str?"), TestTy::Sym => write!(formatter, "sym?"), TestTy::Num => write!(formatter, "num?"), TestTy::Int => write!(formatter, "int?"), TestTy::Float => write!(formatter, "float?"), TestTy::Bool => write!(formatter, "bool?"), TestTy::Char => write!(formatter, "char?"), TestTy::List => write!(formatter, "list?"), TestTy::Vector => write!(formatter, "vector?"), TestTy::Set => write!(formatter, "set?"), TestTy::Map => write!(formatter, "map?"), TestTy::Fun => write!(formatter, "fn?"), TestTy::Nil => write!(formatter, "nil?"), TestTy::TopRecord => write!(formatter, "record?"), TestTy::RecordClass(cons) => write!(formatter, "{}?", cons.value_cons_name()), } } } #[cfg(test)] mod test { use super::*; use crate::source::EMPTY_SPAN; fn assert_test_ty_will_match(test_ty: &TestTy, subject_ref: impl Into>) { let subject_ref = subject_ref.into(); assert!( ty::is_a::ty_ref_is_a(&subject_ref, &test_ty.to_ty().into()), "Subject type is not a definite subtype of the test type" ); assert_eq!(Some(true), test_ty.match_subject_ref(&subject_ref),); } fn assert_test_ty_may_match(test_ty: &TestTy, subject_ref: impl Into>) { let subject_ref = subject_ref.into(); assert!( !ty::is_a::ty_ref_is_a(&subject_ref, &test_ty.to_ty().into()), "Subject type is a definite subtype of the test type" ); assert_eq!(None, test_ty.match_subject_ref(&subject_ref),); } fn assert_test_ty_wont_match(test_ty: &TestTy, subject_ref: impl Into>) { let subject_ref = subject_ref.into(); assert!( !ty::is_a::ty_ref_is_a(&subject_ref, &test_ty.to_ty().into()), "Subject type is a definite subtype of the test type" ); assert_eq!(Some(false), test_ty.match_subject_ref(&subject_ref),); } fn assert_trivial_test_ty(expected_ty: Ty, test_ty: TestTy) { let unrelated_ty: Ty = if expected_ty == Ty::Char { Ty::Str } else { Ty::Char }; assert_eq!(expected_ty, test_ty.to_ty()); assert_test_ty_will_match(&test_ty, expected_ty); assert_test_ty_wont_match(&test_ty, unrelated_ty); } #[test] fn sym_test_ty() { let test_ty = TestTy::Sym; assert_test_ty_will_match(&test_ty, Ty::Sym); assert_test_ty_will_match(&test_ty, Ty::LitSym("foo".into())); assert_test_ty_wont_match(&test_ty, Ty::Str); } #[test] fn str_test_ty() { assert_trivial_test_ty(Ty::Str, TestTy::Str); } #[test] fn bool_test_ty() { let test_ty = TestTy::Bool; assert_test_ty_will_match(&test_ty, Ty::Bool); assert_test_ty_will_match(&test_ty, Ty::LitBool(false)); assert_test_ty_wont_match(&test_ty, Ty::Str); } #[test] fn num_test_ty() { let test_ty = TestTy::Num; assert_test_ty_will_match(&test_ty, Ty::Num); assert_test_ty_will_match(&test_ty, Ty::Int); assert_test_ty_will_match(&test_ty, Ty::Float); assert_test_ty_wont_match(&test_ty, Ty::Str); } #[test] fn int_test_ty() { let test_ty = TestTy::Int; assert_test_ty_will_match(&test_ty, Ty::Int); assert_test_ty_may_match(&test_ty, Ty::Num); assert_test_ty_wont_match(&test_ty, Ty::Float); } #[test] fn float_test_ty() { let test_ty = TestTy::Float; assert_test_ty_will_match(&test_ty, Ty::Float); assert_test_ty_may_match(&test_ty, Ty::Num); assert_test_ty_wont_match(&test_ty, Ty::Int); } #[test] fn char_test_ty() { assert_trivial_test_ty(Ty::Char, TestTy::Char); } #[test] fn list_test_ty() { assert_trivial_test_ty(ty::List::new_uniform(Ty::Any.into()).into(), TestTy::List); } #[test] fn vector_test_ty() { let test_ty = TestTy::Vector; assert_test_ty_will_match(&test_ty, Ty::Vector(Box::new([]))); assert_test_ty_will_match(&test_ty, Ty::Vectorof(Box::new(Ty::Any.into()))); assert_test_ty_wont_match(&test_ty, Ty::Int); } #[test] fn set_test_ty() { assert_trivial_test_ty(Ty::Set(Box::new(Ty::Any.into())), TestTy::Set); } #[test] fn map_test_ty() { assert_trivial_test_ty( ty::Map::new(Ty::Any.into(), Ty::Any.into()).into(), TestTy::Map, ); } #[test] fn fun_test_ty() { let test_ty = TestTy::Fun; assert_test_ty_will_match(&test_ty, ty::TopFun::new_for_pred()); assert_test_ty_will_match(&test_ty, ty::Fun::new_for_main()); assert_test_ty_wont_match(&test_ty, Ty::Str); } #[test] fn nil_test_ty() { let test_ty = TestTy::Nil; assert_test_ty_will_match(&test_ty, ty::List::empty()); assert_test_ty_may_match(&test_ty, ty::List::new_uniform(Ty::Any.into())); assert_test_ty_wont_match(&test_ty, ty::List::new_tuple(Box::new([Ty::Any.into()]))); } #[test] fn top_record_test_ty() { use crate::ty::ty_args::TyArgs; let cons = record::Cons::new( EMPTY_SPAN, "cons".into(), "cons?".into(), None, Box::new([]), ); let test_ty = TestTy::TopRecord; let test_class_poly: ty::Ref = cons.clone().into(); let test_instance_poly: ty::Ref = record::Instance::new(cons, TyArgs::empty()).into(); assert_test_ty_may_match(&test_ty, Ty::Any); assert_test_ty_will_match(&test_ty, Ty::TopRecord); assert_test_ty_will_match(&test_ty, test_class_poly); assert_test_ty_will_match(&test_ty, test_instance_poly); } #[test] fn record_class_test_ty() { use crate::ty::ty_args::TyArgs; let cons = record::Cons::new( EMPTY_SPAN, "cons".into(), "cons?".into(), None, Box::new([]), ); let other_cons = record::Cons::new( EMPTY_SPAN, "other_cons".into(), "other_cons?".into(), None, Box::new([]), ); let test_ty = TestTy::RecordClass(cons.clone()); let test_class_poly: ty::Ref = cons.clone().into(); let other_class_poly: ty::Ref = other_cons.clone().into(); let test_instance_poly: ty::Ref = record::Instance::new(cons, TyArgs::empty()).into(); let other_instance_poly: ty::Ref = record::Instance::new(other_cons, TyArgs::empty()).into(); assert_test_ty_may_match(&test_ty, Ty::Any); assert_test_ty_may_match(&test_ty, Ty::TopRecord); assert_test_ty_will_match(&test_ty, test_class_poly); assert_test_ty_wont_match(&test_ty, other_class_poly); assert_test_ty_will_match(&test_ty, test_instance_poly); assert_test_ty_wont_match(&test_ty, other_instance_poly); } #[test] fn union_subject_ref() { let str_sym_union: ty::Ref = Ty::Union(Box::new([Ty::Str.into(), Ty::Sym.into()])).into(); assert_test_ty_may_match(&TestTy::Str, str_sym_union.clone()); assert_test_ty_may_match(&TestTy::Sym, str_sym_union.clone()); assert_test_ty_wont_match(&TestTy::Int, str_sym_union); let list_false_union: ty::Ref = Ty::Union(Box::new([ ty::List::new_uniform(Ty::Any.into()).into(), Ty::LitBool(false).into(), ])) .into(); assert_test_ty_may_match(&TestTy::Nil, list_false_union); let never: ty::Ref = Ty::never().into(); assert_test_ty_will_match(&TestTy::Str, never.clone()); assert_test_ty_will_match(&TestTy::Sym, never); } #[test] fn intersect_subject_ref() { let str_num_intersect: ty::Ref = Ty::Intersect(Box::new([Ty::Str.into(), Ty::Num.into()])).into(); assert_test_ty_will_match(&TestTy::Str, str_num_intersect.clone()); assert_test_ty_may_match(&TestTy::Int, str_num_intersect.clone()); assert_test_ty_wont_match(&TestTy::Sym, str_num_intersect); } } ================================================ FILE: compiler/ty/props.rs ================================================ use crate::ty; use crate::ty::purity::Purity; use crate::ty::var_usage::Variance; use crate::ty::Ty; fn ty_has_subtypes(ty: &Ty) -> bool { match ty { Ty::Any | Ty::Bool | Ty::Num | Ty::Sym | Ty::TopFun(_) | Ty::TopRecord | Ty::RecordClass(_) => true, Ty::Char | Ty::Float | Ty::Int | Ty::LitBool(_) | Ty::LitSym(_) | Ty::Str | Ty::TyPred(_) | Ty::EqPred => false, Ty::Fun(fun) => { fun.purity() != &Purity::Pure.into() || !fun.params().fixed().is_empty() || fun.params().rest() != &Ty::Any.into() || has_subtypes(fun.ret()) } Ty::Map(map) => has_subtypes(map.key()) || has_subtypes(map.value()), Ty::Set(member) => has_subtypes(member.as_ref()), Ty::Vector(members) => members.iter().any(has_subtypes), Ty::Union(members) => !members.is_empty(), Ty::List(list) => { // Any arbitrary fixed length list is a subtype of a list with rest list.has_rest() || list.fixed().iter().any(has_subtypes) } // Any record type supporting variance has subtypes Ty::Record(instance) => instance .cons() .poly_params() .iter() .any(|poly_param| poly_param.variance() != Variance::Invariant), Ty::Vectorof(_) => { // Any arbitrary fixed length vector is a subtype of this vector true } Ty::Intersect(_) => { // If we're correctly normalised we should have subtypes true } } } pub fn has_subtypes(ty_ref: &ty::Ref) -> bool { ty_ref .try_to_fixed() .map(|ty| ty_has_subtypes(ty)) .unwrap_or(true) } fn ty_is_literal(ty: &Ty) -> bool { match ty { Ty::LitBool(_) | Ty::LitSym(_) => true, Ty::Vector(members) => members.iter().all(is_literal), Ty::List(list) => !list.has_rest() && list.fixed().iter().all(is_literal), _ => false, } } pub fn is_literal(ty_ref: &ty::Ref) -> bool { ty_ref .try_to_fixed() .map(|ty| ty_is_literal(ty)) .unwrap_or(false) } #[cfg(test)] mod test { use super::*; use crate::hir::poly_for_str; use crate::source::EMPTY_SPAN; use crate::ty::record; use crate::ty::ty_args::TyArgs; fn str_has_subtypes(datum_str: &str) -> bool { let poly = poly_for_str(datum_str); has_subtypes(&poly) } fn str_is_literal(datum_str: &str) -> bool { let poly = poly_for_str(datum_str); is_literal(&poly) } #[test] fn poly_subtypes() { assert!(str_has_subtypes("Any")); assert!(str_has_subtypes("Bool")); assert!(!str_has_subtypes("true")); assert!(!str_has_subtypes("Char")); assert!(!str_has_subtypes("Float")); assert!(!str_has_subtypes("Str")); assert!(str_has_subtypes("Sym")); assert!(str_has_subtypes("Num")); assert!(!str_has_subtypes("(& Any -> true)")); assert!(str_has_subtypes("(& Any ->! true)")); assert!(str_has_subtypes("(Any -> true)")); assert!(str_has_subtypes("(& Int -> true)")); assert!(str_has_subtypes("(& Any -> Any)")); assert!(str_has_subtypes("(Map Sym Int)")); assert!(!str_has_subtypes("(Map Float Int)")); assert!(str_has_subtypes("(List Sym Int)")); assert!(str_has_subtypes("(List Str & Int)")); assert!(!str_has_subtypes("(List Str Int)")); assert!(str_has_subtypes("(Setof Sym)")); assert!(!str_has_subtypes("(Setof Float)")); assert!(str_has_subtypes("(Vectorof false)")); assert!(!str_has_subtypes("(Vector false true)")); assert!(!str_has_subtypes("(RawU)")); let tvar = ty::TVar::new(EMPTY_SPAN, "test".into(), Ty::Any.into()); assert!(has_subtypes(&tvar.into())); } #[test] fn poly_literal() { assert!(!str_is_literal("Any")); assert!(!str_is_literal("Bool")); assert!(str_is_literal("true")); assert!(!str_is_literal("Char")); assert!(!str_is_literal("Float")); assert!(!str_is_literal("Str")); assert!(!str_is_literal("Sym")); assert!(!str_is_literal("(& Any -> true)")); assert!(!str_is_literal("(Map Sym Int)")); assert!(!str_is_literal("(Map false true)")); assert!(!str_is_literal("(List Sym Int)")); assert!(!str_is_literal("(List Str & Int)")); assert!(str_is_literal("(List true false)")); assert!(!str_is_literal("(List true & false)")); assert!(str_is_literal("()")); assert!(!str_is_literal("(Setof ())")); assert!(!str_is_literal("(Setof Float)")); assert!(!str_is_literal("(Vectorof false)")); assert!(str_is_literal("(Vector false true)")); let tvar = ty::TVar::new(EMPTY_SPAN, "test".into(), Ty::Any.into()); assert!(!is_literal(&tvar.into())); } #[test] fn mono_record_type() { let mono_record_cons = record::Cons::new( EMPTY_SPAN, "record_cons".into(), "record_cons?".into(), None, Box::new([record::Field::new(EMPTY_SPAN, "num".into(), Ty::Num.into())]), ); let int_record_instance_ref: ty::Ref = record::Instance::new(mono_record_cons, TyArgs::empty()).into(); assert!(!has_subtypes(&int_record_instance_ref)); assert!(!is_literal(&int_record_instance_ref)); } #[test] fn poly_record_type() { let tvar = ty::TVar::new(EMPTY_SPAN, "tvar".into(), Ty::Any.into()); let poly_record_cons = record::Cons::new( EMPTY_SPAN, "record_cons".into(), "record_cons?".into(), Some(Box::new([record::PolyParam::TVar( Variance::Covariant, tvar.clone(), )])), Box::new([record::Field::new(EMPTY_SPAN, "num".into(), tvar.into())]), ); let poly_record_instance_ref: ty::Ref = record::Instance::new(poly_record_cons, TyArgs::empty()).into(); assert!(has_subtypes(&poly_record_instance_ref)); assert!(!is_literal(&poly_record_instance_ref)); } } ================================================ FILE: compiler/ty/purity.rs ================================================ use arret_syntax::datum::DataStr; use arret_syntax::span::Span; use crate::id_type::ArcId; #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)] pub enum Purity { Pure, Impure, } #[derive(PartialEq, Debug, Clone)] pub struct PVar { span: Span, source_name: DataStr, } pub type PVarId = ArcId; pub type PVars = Vec; impl PVar { pub fn new(span: Span, source_name: DataStr) -> PVarId { PVarId::new(PVar { span, source_name }) } pub fn span(&self) -> Span { self.span } pub fn source_name(&self) -> &str { &self.source_name } } #[derive(Debug, PartialEq, Eq, Clone, Hash)] pub enum Ref { Fixed(Purity), Var(PVarId), } impl From for Ref { fn from(purity: Purity) -> Self { Ref::Fixed(purity) } } impl From for Ref { fn from(pvar: PVarId) -> Self { Ref::Var(pvar) } } ================================================ FILE: compiler/ty/record.rs ================================================ use arret_syntax::datum::DataStr; use arret_syntax::span::Span; use crate::id_type::ArcId; use crate::ty; use crate::ty::purity; use crate::ty::purity::Purity; use crate::ty::ty_args::TyArgs; use crate::ty::var_usage::Variance; use crate::ty::Ty; /// Record field of a record constructor #[derive(PartialEq, Debug, Clone)] pub struct Field { span: Span, name: DataStr, ty_ref: ty::Ref, } impl Field { pub fn new(span: Span, name: DataStr, ty_ref: ty::Ref) -> Self { Self { span, name, ty_ref } } /// Returns the span where the constructor was defined pub fn span(&self) -> Span { self.span } /// Returns the name of the record field /// /// Unlike other source names that are used for diagnostics, this is semantically meaningful. /// It's used for keyword-based field access syntax. pub fn name(&self) -> &DataStr { &self.name } /// Returns the type of the record field pub fn ty_ref(&self) -> &ty::Ref { &self.ty_ref } /// Returns the type of the field accessor function pub fn accessor_fun_type(&self, cons_id: &ConsId) -> ty::Fun { let ty_args = cons_id.identity_ty_args(); let pvars: purity::PVars = ty_args.pvar_purities().keys().cloned().collect(); let tvars: ty::TVars = ty_args.tvar_types().keys().cloned().collect(); let top_fun = ty::TopFun::new(Purity::Pure.into(), self.ty_ref().clone()); let params = ty::List::new_tuple(Box::new([Instance::new(cons_id.clone(), ty_args).into()])); ty::Fun::new(pvars, tvars, top_fun, params) } } /// Polymorphic parameter to a record constructor /// /// This doesn't use separate [`TVar`](ty::TVar) and [`PVar`](purity::PVar) vectors because they /// appear in the same parameter list and their ordering is important. #[derive(PartialEq, Debug, Clone)] pub enum PolyParam { /// Polymorphic purity variable PVar(Variance, purity::PVarId), /// Declared polymorphic purity fixed to `Pure` Pure(Span), /// Polymorphic type variable TVar(Variance, ty::TVarId), /// Declared polymorphic type fixed to a known type TFixed(Span, ty::Ref), } impl PolyParam { pub fn variance(&self) -> Variance { match self { PolyParam::PVar(variance, _) => *variance, PolyParam::TVar(variance, _) => *variance, // This is arbitrary as every instance will have the same value. `Invariant` is // probably the least confusing thing to return. PolyParam::Pure(_) | PolyParam::TFixed(_, _) => Variance::Invariant, } } } /// Record type constructor /// /// This is a collection of fields and polymorphic parameters that can be used to construct /// [instance types](Instance). This should not be confused with the record constructor function /// used to build record values. #[derive(PartialEq, Debug, Clone)] pub struct Cons { span: Span, ty_cons_name: DataStr, value_cons_name: DataStr, poly_params_list: Option>, fields: Box<[Field]>, } impl Cons { pub fn new( span: Span, ty_cons_name: DataStr, value_cons_name: DataStr, poly_params_list: Option>, fields: Box<[Field]>, ) -> ConsId { ConsId::new(Self { span, ty_cons_name, value_cons_name, poly_params_list, fields, }) } /// Returns the span where the constructor was defined pub fn span(&self) -> Span { self.span } /// Returns the name of the type constructor /// /// Unlike other source names that are used for diagnostics, this is semantically meaningful. /// It's used to define a type constructor. pub fn ty_cons_name(&self) -> &DataStr { &self.ty_cons_name } /// Returns the name of the value constructor pub fn value_cons_name(&self) -> &DataStr { &self.value_cons_name } /// Returns the polymorphic parameters this constructor accepts pub fn poly_params(&self) -> &[PolyParam] { match self.poly_params_list { Some(ref poly_params) => poly_params.as_ref(), None => &[], } } /// Returns true if the constructor was declared as a singleton /// /// This has no effect on the type system; it's only used to accurately print the type. pub fn is_singleton(&self) -> bool { self.poly_params_list.is_none() } /// Returns an ordered list of fields of every record type instance pub fn fields(&self) -> &[Field] { self.fields.as_ref() } /// Returns an identity map of polymorphic variables associated with the constructor pub fn identity_ty_args(&self) -> TyArgs { use std::collections::HashMap; let mut pvar_purities = HashMap::new(); let mut tvar_types = HashMap::new(); // Create an identity map of our polymorphic variables. When we substitute in the selected // types the keys will stay the same while the values will be replaced. for poly_param in self.poly_params() { match poly_param { PolyParam::PVar(_, pvar) => { pvar_purities.insert(pvar.clone(), pvar.clone().into()); } PolyParam::TVar(_, tvar) => { tvar_types.insert(tvar.clone(), tvar.clone().into()); } PolyParam::Pure(_) | PolyParam::TFixed(_, _) => {} } } TyArgs::new(pvar_purities, tvar_types) } /// Returns the type of the value constructor function pub fn value_cons_fun_type(cons_id: &ConsId) -> ty::Fun { let ty_args = cons_id.identity_ty_args(); let pvars: purity::PVars = ty_args.pvar_purities().keys().cloned().collect(); let tvars: ty::TVars = ty_args.tvar_types().keys().cloned().collect(); let ret_type = Instance::new(cons_id.clone(), ty_args).into(); let top_fun = ty::TopFun::new(Purity::Pure.into(), ret_type); let params = ty::List::new_tuple( cons_id .fields .iter() .map(|field| field.ty_ref.clone()) .collect(), ); ty::Fun::new(pvars, tvars, top_fun, params) } } pub type ConsId = ArcId; impl From for Ty { fn from(cons_id: ConsId) -> Self { Ty::RecordClass(cons_id) } } impl From for ty::Ref { fn from(cons_id: ConsId) -> Self { ty::Ref::Fixed(Ty::RecordClass(cons_id)) } } #[derive(PartialEq, Debug, Clone)] pub struct Instance { cons: ConsId, ty_args: TyArgs, } impl Instance { pub fn new(cons: ConsId, ty_args: TyArgs) -> Self { Self { cons, ty_args } } /// Returns the record constructor this instance was constructed from pub fn cons(&self) -> &ConsId { &self.cons } /// Returns the type arguments to the record type constructor /// /// Every [polymorphic parameter](Cons::poly_params) must be specified in the type arguments. pub fn ty_args(&self) -> &TyArgs { &self.ty_args } } impl From> for Ty { fn from(instance: Instance) -> Self { Ty::Record(Box::new(instance)) } } impl From> for ty::Ref { fn from(instance: Instance) -> Self { ty::Ref::Fixed(Ty::Record(Box::new(instance))) } } ================================================ FILE: compiler/ty/select.rs ================================================ use std::collections::HashMap; use crate::ty; use crate::ty::list_iter::ListIterator; use crate::ty::purity; use crate::ty::purity::Purity; use crate::ty::record; use crate::ty::ty_args::TyArgs; use crate::ty::Ty; pub enum Error<'vars> { UnselectedPVar(&'vars purity::PVarId), UnselectedTVar(&'vars ty::TVarId), } /// Selects a set of polymorphic variables for a function application /// /// This context is constructed with a set of purity and type variables the applied function is /// polymorphic on. Evidence from the return and argument types can then be incrementally added to /// the context. The calculated polymorphic types and purities can be retrieved from the /// `pvar_purities` and `tvar_types` methods. #[derive(Clone, Debug)] pub struct SelectCtx<'vars> { selecting_pvars: &'vars [purity::PVarId], selecting_tvars: &'vars [ty::TVarId], pvar_purities: HashMap, tvar_types: HashMap>, } impl<'vars> SelectCtx<'vars> { pub fn new( selecting_pvars: &'vars [purity::PVarId], selecting_tvars: &'vars [ty::TVarId], ) -> SelectCtx<'vars> { SelectCtx { selecting_pvars, selecting_tvars, pvar_purities: HashMap::with_capacity(selecting_pvars.len()), tvar_types: HashMap::with_capacity(selecting_tvars.len()), } } fn add_evidence_top_fun(&mut self, target_top_fun: &ty::TopFun, evidence_top_fun: &ty::TopFun) { self.add_evidence_purity(target_top_fun.purity(), evidence_top_fun.purity()); self.add_evidence(target_top_fun.ret(), evidence_top_fun.ret()); } fn add_evidence_fun(&mut self, target_top_fun: &ty::TopFun, evidence_fun: &ty::Fun) { // We have three options for dealing with polymorphic functions: // // 1. Do nothing and treat them normally. This can leak the evidence fun's polymorphic // return type in to the our selected type which results in an illegal type. // 2. Do a recursive selection where we pass the known types from the target fun in to // the evidence fun and use that to calculate the return type. This is possible but // complex. // 3. Do nothing and depend on the fact the target fun is probably already polymorphic // and expresses the type relationship we care about. This is the option implemented // below if evidence_fun.pvars().is_empty() { self.add_evidence_purity(target_top_fun.purity(), evidence_fun.purity()); } if evidence_fun.tvars().is_empty() { self.add_evidence(target_top_fun.ret(), evidence_fun.ret()); } } fn add_evidence_record( &mut self, target_instance: &record::Instance, evidence_instance: &record::Instance, ) { if target_instance.cons() != evidence_instance.cons() { return; } for (pvar, target_purity) in target_instance.ty_args().pvar_purities().iter() { let evidence_purity = &evidence_instance.ty_args().pvar_purities()[pvar]; self.add_evidence_purity(target_purity, evidence_purity); } for (tvar, target_poly) in target_instance.ty_args().tvar_types().iter() { let evidence_poly = &evidence_instance.ty_args().tvar_types()[tvar]; self.add_evidence(target_poly, evidence_poly); } } fn add_evidence_list( &mut self, target_list: &ty::List, evidence_list: &ty::List, ) { let mut target_iter = ListIterator::new(target_list); let mut evidence_iter = ListIterator::new(evidence_list); while target_iter.fixed_len() > 0 { let target_fixed = target_iter.next().unwrap(); let evidence_fixed = if let Some(evidence_fixed) = evidence_iter.next() { evidence_fixed } else { return; }; self.add_evidence(target_fixed, evidence_fixed); } if let Some(target_rest) = target_iter.next() { self.add_evidence(target_rest, &evidence_iter.collect_rest()); } } /// Adds evidence that the target is a never /// /// The propagates the never in to nested types fn add_evidence_never(&mut self, target_ty: &Ty) { match target_ty { Ty::Set(target_member) | Ty::Vectorof(target_member) => { self.add_evidence(target_member, &Ty::never().into()); } Ty::Map(target_map) => { self.add_evidence(target_map.key(), &Ty::never().into()); self.add_evidence(target_map.value(), &Ty::never().into()); } Ty::List(target_list) => { for target_fixed in target_list.fixed() { self.add_evidence(target_fixed, &Ty::never().into()); } self.add_evidence(target_list.rest(), &Ty::never().into()); } Ty::Vector(target_members) => { for target_member in target_members.iter() { self.add_evidence(target_member, &Ty::never().into()); } } _ => {} } } fn add_evidence_ty( &mut self, target_poly: &ty::Ref, target_ty: &Ty, evidence_poly: &ty::Ref, evidence_ty: &Ty, ) { match (target_ty, evidence_ty) { (Ty::Set(target_member), Ty::Set(evidence_member)) => { self.add_evidence(target_member, evidence_member); } (Ty::Map(target_map), Ty::Map(evidence_map)) => { self.add_evidence(target_map.key(), evidence_map.key()); self.add_evidence(target_map.value(), evidence_map.value()); } (Ty::List(target_list), Ty::List(evidence_list)) => { self.add_evidence_list(target_list, evidence_list); } (Ty::Vector(target_members), Ty::Vector(evidence_members)) => { for (target_member, evidence_member) in target_members.iter().zip(evidence_members.iter()) { self.add_evidence(target_member, evidence_member); } } (Ty::Vectorof(target_member), Ty::Vectorof(evidence_member)) => { self.add_evidence(target_member, evidence_member); } (Ty::Vectorof(target_member), Ty::Vector(evidence_members)) => { for evidence_member in evidence_members.iter() { self.add_evidence(target_member, evidence_member); } } (Ty::TopFun(target_top_fun), Ty::TopFun(evidence_top_fun)) => { self.add_evidence_top_fun(target_top_fun, evidence_top_fun); } (Ty::TopFun(target_top_fun), Ty::Fun(evidence_fun)) => { self.add_evidence_fun(target_top_fun, evidence_fun); } (Ty::TopFun(target_top_fun), Ty::TyPred(_) | Ty::EqPred) => { self.add_evidence_top_fun(target_top_fun, &ty::TopFun::new_for_pred()); } (Ty::Fun(target_fun), Ty::Fun(evidence_fun)) => { self.add_evidence_fun(target_fun.top_fun(), evidence_fun); } (Ty::Fun(target_fun), Ty::TyPred(_) | Ty::EqPred) => { self.add_evidence_top_fun(target_fun.top_fun(), &ty::TopFun::new_for_pred()); } (Ty::Record(target_instance), Ty::Record(evidence_instance)) => { self.add_evidence_record(target_instance, evidence_instance) } (Ty::Union(target_members), _) => { for target_member in target_members.iter() { self.add_evidence(target_member, evidence_poly); } } (_, Ty::Union(evidence_members)) => { if evidence_members.is_empty() { self.add_evidence_never(target_ty); } else { for evidence_member in evidence_members.iter() { self.add_evidence(target_poly, evidence_member); } } } _ => {} } } fn add_var_evidence(&mut self, tvar: &ty::TVarId, evidence_poly: &ty::Ref) { if !self.selecting_tvars.contains(tvar) || !ty::is_a::ty_ref_is_a(evidence_poly, tvar.bound()) { return; } self.tvar_types .entry(tvar.clone()) .and_modify(|existing| { *existing = ty::unify::unify_to_ty_ref(existing, evidence_poly); }) .or_insert_with(|| evidence_poly.clone()); } pub fn add_evidence( &mut self, target_poly: &ty::Ref, evidence_poly: &ty::Ref, ) { match target_poly { ty::Ref::Var(tvar, _) => self.add_var_evidence(tvar, evidence_poly), ty::Ref::Fixed(target_ty) => { let evidence_ty = evidence_poly.resolve_to_ty(); self.add_evidence_ty(target_poly, target_ty, evidence_poly, evidence_ty) } } } pub fn add_evidence_purity( &mut self, target_purity: &purity::Ref, evidence_purity: &purity::Ref, ) { let pvar = if let purity::Ref::Var(pvar) = target_purity { pvar } else { return; }; if !self.selecting_pvars.contains(pvar) { return; } self.pvar_purities .entry(pvar.clone()) .and_modify(|existing| { *existing = ty::unify::unify_purity_refs(existing, evidence_purity); }) .or_insert_with(|| evidence_purity.clone()); } /// Creates a `TyArgs` instance with any unselected variables set to their bound pub fn into_poly_ty_args(mut self) -> TyArgs { if self.selecting_pvars.len() != self.pvar_purities.len() { for pvar in self.selecting_pvars { if !self.pvar_purities.contains_key(pvar) { self.pvar_purities .insert(pvar.clone(), Purity::Impure.into()); } } } if self.selecting_tvars.len() != self.tvar_types.len() { for tvar in self.selecting_tvars { if !self.tvar_types.contains_key(tvar) { self.tvar_types.insert(tvar.clone(), tvar.bound().clone()); } } } TyArgs::new(self.pvar_purities, self.tvar_types) } /// Creates a `TyArgs` instance /// /// Any unselected polymorphic variables will return an error unless they have an non-`Any` /// bound to use as a default. pub fn into_complete_poly_ty_args(mut self) -> Result, Error<'vars>> { if self.selecting_pvars.len() != self.pvar_purities.len() { for pvar in self.selecting_pvars { if !self.pvar_purities.contains_key(pvar) { return Err(Error::UnselectedPVar(pvar)); } } } if self.selecting_tvars.len() != self.tvar_types.len() { for tvar in self.selecting_tvars { if !self.tvar_types.contains_key(tvar) { if tvar.bound() == &Ty::Any.into() { return Err(Error::UnselectedTVar(tvar)); } self.tvar_types.insert(tvar.clone(), tvar.bound().clone()); } } } Ok(TyArgs::new(self.pvar_purities, self.tvar_types)) } } #[cfg(test)] mod test { use super::*; use crate::hir::ns::NsDatum; use crate::hir::scope::Scope; use crate::ty::purity::Purity; use arret_syntax::parser::{data_from_str, datum_from_str}; struct TestScope { scope: Scope<'static>, pvars: purity::PVars, tvars: ty::TVars, } impl TestScope { #[allow(clippy::needless_collect)] fn new(polymorphic_str: &str) -> TestScope { use crate::hir::lower_polymorphic_var_set; let outer_scope = Scope::new_with_primitives(); let mut inner_scope = Scope::new_with_primitives(); let polymorphic_data = data_from_str(None, polymorphic_str) .unwrap() .iter() .map(NsDatum::from_syntax_datum) .collect::>(); let (pvars, tvars) = lower_polymorphic_var_set( &outer_scope, &mut inner_scope, polymorphic_data.into_iter(), ) .unwrap(); TestScope { scope: inner_scope, pvars, tvars, } } fn poly_for_str(&self, poly_str: &str) -> ty::Ref { use crate::hir::lower_poly; let test_datum = datum_from_str(None, poly_str).unwrap(); lower_poly(&self.scope, NsDatum::from_syntax_datum(&test_datum)).unwrap() } fn purity_for_str(&self, poly_str: &str) -> purity::Ref { use crate::hir::try_lower_purity; let test_datum = datum_from_str(None, poly_str).unwrap(); try_lower_purity(&self.scope, &NsDatum::from_syntax_datum(&test_datum)).unwrap() } fn select_ctx(&self) -> SelectCtx<'_> { SelectCtx::new(&self.pvars, &self.tvars) } } fn assert_unselected_type(ctx: &SelectCtx<'_>, poly_var: &ty::Ref) { let tvar = if let ty::Ref::Var(tvar, _) = poly_var { tvar } else { panic!("Can't find tvar ID") }; assert_eq!(None, ctx.tvar_types.get(tvar)); } fn assert_selected_type( ctx: &SelectCtx<'_>, poly_var: &ty::Ref, selected_poly: &ty::Ref, ) { let tvar = if let ty::Ref::Var(tvar, _) = poly_var { tvar } else { panic!("Can't find tvar ID") }; assert_eq!(Some(selected_poly), ctx.tvar_types.get(tvar)); } fn assert_unselected_purity(ctx: &SelectCtx<'_>, poly_var: &purity::Ref) { let pvar = if let purity::Ref::Var(pvar) = poly_var { pvar } else { panic!("Can't find pvar ID") }; assert_eq!(None, ctx.pvar_purities.get(pvar)); } fn assert_selected_purity( ctx: &SelectCtx<'_>, poly_var: &purity::Ref, selected_purity: Purity, ) { let pvar = if let purity::Ref::Var(pvar) = poly_var { pvar } else { panic!("Can't find pvar ID") }; assert_eq!( Some(&purity::Ref::Fixed(selected_purity)), ctx.pvar_purities.get(pvar) ); } #[test] fn trivial_tvar() { let scope = TestScope::new("A"); let poly_a = scope.poly_for_str("A"); let mut stx = scope.select_ctx(); assert_unselected_type(&stx, &poly_a); stx.add_evidence(&poly_a, &scope.poly_for_str("true")); assert_selected_type(&stx, &poly_a, &scope.poly_for_str("true")); stx.add_evidence(&poly_a, &scope.poly_for_str("false")); assert_selected_type(&stx, &poly_a, &scope.poly_for_str("Bool")); } #[test] fn poly_conflicing_tvar() { let scope = TestScope::new("[A (... -> Any)] [B (... -> Sym)] [C (... -> 'foo)]"); let poly_a = scope.poly_for_str("A"); let poly_b = scope.poly_for_str("B"); let poly_c = scope.poly_for_str("C"); let mut stx = scope.select_ctx(); assert_unselected_type(&stx, &poly_a); // We can handle one tvar as evidence stx.add_evidence(&poly_a, &poly_b); assert_selected_type(&stx, &poly_a, &poly_b); stx.add_evidence(&poly_a, &poly_c); assert_selected_type(&stx, &poly_a, &scope.poly_for_str("(U B C)")); } #[test] fn set_types() { let scope = TestScope::new("A"); let poly_a = scope.poly_for_str("A"); let mut stx = scope.select_ctx(); stx.add_evidence( &scope.poly_for_str("(Setof A)"), &scope.poly_for_str("(Setof Bool)"), ); assert_selected_type(&stx, &poly_a, &scope.poly_for_str("Bool")); } #[test] fn map_types() { let scope = TestScope::new("A B"); let poly_a = scope.poly_for_str("A"); let poly_b = scope.poly_for_str("B"); let mut stx = scope.select_ctx(); stx.add_evidence( &scope.poly_for_str("(Map A B)"), &scope.poly_for_str("(Map true false)"), ); assert_selected_type(&stx, &poly_a, &scope.poly_for_str("true")); assert_selected_type(&stx, &poly_b, &scope.poly_for_str("false")); } #[test] fn fixed_list_types() { let scope = TestScope::new("A B"); let poly_a = scope.poly_for_str("A"); let poly_b = scope.poly_for_str("B"); let mut stx = scope.select_ctx(); stx.add_evidence( &scope.poly_for_str("(List A B)"), &scope.poly_for_str("(List true false)"), ); assert_selected_type(&stx, &poly_a, &scope.poly_for_str("true")); assert_selected_type(&stx, &poly_b, &scope.poly_for_str("false")); } #[test] fn listof_types() { let scope = TestScope::new("A"); let poly_a = scope.poly_for_str("A"); let mut stx = scope.select_ctx(); stx.add_evidence( &scope.poly_for_str("(List & A)"), &scope.poly_for_str("(List & true)"), ); assert_selected_type(&stx, &poly_a, &scope.poly_for_str("true")); } #[test] fn listof_from_fixed_list() { let scope = TestScope::new("A"); let poly_a = scope.poly_for_str("A"); let mut stx = scope.select_ctx(); stx.add_evidence( &scope.poly_for_str("(List & A)"), &scope.poly_for_str("(List true false)"), ); assert_selected_type(&stx, &poly_a, &scope.poly_for_str("Bool")); } #[test] fn listof_from_list_union() { let scope = TestScope::new("A"); let poly_a = scope.poly_for_str("A"); let mut stx = scope.select_ctx(); stx.add_evidence( &scope.poly_for_str("(List & A)"), &scope.poly_for_str("(U (List Int Int) (List Int Int Int))"), ); assert_selected_type(&stx, &poly_a, &scope.poly_for_str("Int")); } #[test] fn fixed_vector_types() { let scope = TestScope::new("A B"); let poly_a = scope.poly_for_str("A"); let poly_b = scope.poly_for_str("B"); let mut stx = scope.select_ctx(); stx.add_evidence( &scope.poly_for_str("(Vector A B)"), &scope.poly_for_str("(Vector true false)"), ); assert_selected_type(&stx, &poly_a, &scope.poly_for_str("true")); assert_selected_type(&stx, &poly_b, &scope.poly_for_str("false")); } #[test] fn vectorof_types() { let scope = TestScope::new("A"); let poly_a = scope.poly_for_str("A"); let mut stx = scope.select_ctx(); stx.add_evidence( &scope.poly_for_str("(Vectorof A)"), &scope.poly_for_str("(Vectorof true)"), ); assert_selected_type(&stx, &poly_a, &scope.poly_for_str("true")); } #[test] fn vectorof_from_fixed_vector() { let scope = TestScope::new("A"); let poly_a = scope.poly_for_str("A"); let mut stx = scope.select_ctx(); stx.add_evidence( &scope.poly_for_str("(Vectorof A)"), &scope.poly_for_str("(Vector true false)"), ); assert_selected_type(&stx, &poly_a, &scope.poly_for_str("Bool")); } #[test] fn union_types() { let scope = TestScope::new("A"); let poly_a = scope.poly_for_str("A"); let mut stx = scope.select_ctx(); stx.add_evidence( &scope.poly_for_str("(U A Sym)"), &scope.poly_for_str("true"), ); assert_selected_type(&stx, &poly_a, &scope.poly_for_str("true")); } #[test] fn bounded_union_types() { let scope = TestScope::new("[A Sym] [B Bool]"); let poly_a = scope.poly_for_str("A"); let poly_b = scope.poly_for_str("B"); let mut stx = scope.select_ctx(); // A and B are bounded. We should ensure we only use evidence on the members with satisfied // bounds. stx.add_evidence(&scope.poly_for_str("(U A B)"), &scope.poly_for_str("'foo")); assert_selected_type(&stx, &poly_a, &scope.poly_for_str("'foo")); assert_unselected_type(&stx, &poly_b); stx.add_evidence(&scope.poly_for_str("(U A B)"), &scope.poly_for_str("true")); assert_selected_type(&stx, &poly_a, &scope.poly_for_str("'foo")); assert_selected_type(&stx, &poly_b, &scope.poly_for_str("true")); stx.add_evidence(&scope.poly_for_str("(U A B)"), &scope.poly_for_str("false")); assert_selected_type(&stx, &poly_a, &scope.poly_for_str("'foo")); assert_selected_type(&stx, &poly_b, &scope.poly_for_str("Bool")); } #[test] fn top_fun_types() { let scope = TestScope::new("A"); let poly_a = scope.poly_for_str("A"); let mut stx = scope.select_ctx(); stx.add_evidence( &scope.poly_for_str("(... -> A)"), &scope.poly_for_str("(... -> true)"), ); assert_selected_type(&stx, &poly_a, &scope.poly_for_str("true")); } #[test] fn top_fun_purities() { let scope = TestScope::new("[->A ->!]"); let purity_a = scope.purity_for_str("->A"); let mut stx = scope.select_ctx(); assert_unselected_purity(&stx, &purity_a); stx.add_evidence( &scope.poly_for_str("(... ->A true)"), &scope.poly_for_str("(... -> true)"), ); assert_selected_purity(&stx, &purity_a, Purity::Pure); stx.add_evidence( &scope.poly_for_str("(... ->A true)"), &scope.poly_for_str("(... ->! true)"), ); assert_selected_purity(&stx, &purity_a, Purity::Impure); } #[test] fn top_fun_from_fun() { let scope = TestScope::new("A"); let poly_a = scope.poly_for_str("A"); let mut stx = scope.select_ctx(); stx.add_evidence( &scope.poly_for_str("(... -> A)"), &scope.poly_for_str("(false -> true)"), ); assert_selected_type(&stx, &poly_a, &scope.poly_for_str("true")); } #[test] fn top_fun_from_poly_fun() { let scope = TestScope::new("Outer [->_ ->!]"); let poly_outer = scope.poly_for_str("Outer"); let poly_purity = scope.purity_for_str("->_"); let mut stx = scope.select_ctx(); stx.add_evidence( &scope.poly_for_str("(... ->_ Outer)"), // This has polymorphic types but monomorphic purity &scope.poly_for_str("(All #{[Inner Num]} Inner -> Inner)"), ); assert_unselected_type(&stx, &poly_outer); assert_selected_purity(&stx, &poly_purity, Purity::Pure); } #[test] fn top_fun_from_ty_pred() { let scope = TestScope::new("A"); let poly_a = scope.poly_for_str("A"); let mut stx = scope.select_ctx(); stx.add_evidence( &scope.poly_for_str("(... -> A)"), &scope.poly_for_str("sym?"), ); assert_selected_type(&stx, &poly_a, &scope.poly_for_str("Bool")); } #[test] fn top_fun_from_eq_pred() { let scope = TestScope::new("A"); let poly_a = scope.poly_for_str("A"); let mut stx = scope.select_ctx(); stx.add_evidence(&scope.poly_for_str("(... -> A)"), &scope.poly_for_str("=")); assert_selected_type(&stx, &poly_a, &scope.poly_for_str("Bool")); } #[test] fn fun_types() { let scope = TestScope::new("A B"); let poly_a = scope.poly_for_str("A"); let poly_b = scope.poly_for_str("B"); let mut stx = scope.select_ctx(); stx.add_evidence( &scope.poly_for_str("(A -> B)"), &scope.poly_for_str("(true -> false)"), ); // We intentionally do not use function type params as evidence assert_unselected_type(&stx, &poly_a); assert_selected_type(&stx, &poly_b, &scope.poly_for_str("false")); } #[test] fn fun_purities() { let scope = TestScope::new("[->A ->!]"); let purity_a = scope.purity_for_str("->A"); let mut stx = scope.select_ctx(); assert_unselected_purity(&stx, &purity_a); stx.add_evidence( &scope.poly_for_str("(->A true)"), &scope.poly_for_str("(->! true)"), ); assert_selected_purity(&stx, &purity_a, Purity::Impure); } #[test] fn fun_purity_conflict() { let scope = TestScope::new("[->A ->!] [->B ->!] [->C ->!]"); let purity_a = scope.purity_for_str("->A"); let mut stx = scope.select_ctx(); assert_unselected_purity(&stx, &purity_a); stx.add_evidence( &scope.poly_for_str("(->A true)"), &scope.poly_for_str("(->B true)"), ); stx.add_evidence( &scope.poly_for_str("(->A true)"), &scope.poly_for_str("(->C true)"), ); assert_selected_purity(&stx, &purity_a, Purity::Impure); } #[test] fn fun_type_from_ty_pred() { let scope = TestScope::new("A B"); let poly_a = scope.poly_for_str("A"); let poly_b = scope.poly_for_str("B"); let mut stx = scope.select_ctx(); stx.add_evidence(&scope.poly_for_str("(A -> B)"), &scope.poly_for_str("sym?")); assert_unselected_type(&stx, &poly_a); assert_selected_type(&stx, &poly_b, &scope.poly_for_str("Bool")); } #[test] fn fun_type_from_eq_pred() { let scope = TestScope::new("A B"); let poly_a = scope.poly_for_str("A"); let poly_b = scope.poly_for_str("B"); let mut stx = scope.select_ctx(); stx.add_evidence(&scope.poly_for_str("(A A -> B)"), &scope.poly_for_str("=")); assert_unselected_type(&stx, &poly_a); assert_selected_type(&stx, &poly_b, &scope.poly_for_str("Bool")); } #[test] fn ty_pred_purity() { let scope = TestScope::new("[->A ->!]"); let purity_a = scope.purity_for_str("->A"); let mut stx = scope.select_ctx(); assert_unselected_purity(&stx, &purity_a); stx.add_evidence( &scope.poly_for_str("(->A true)"), &scope.poly_for_str("sym?"), ); assert_selected_purity(&stx, &purity_a, Purity::Pure); } } ================================================ FILE: compiler/ty/subst.rs ================================================ use crate::ty; use crate::ty::purity; use crate::ty::record; use crate::ty::ty_args::TyArgs; use crate::ty::Ty; fn subst_ty_ref_slice(stx: &S, inputs: &[ty::Ref]) -> Box<[ty::Ref]> where S: Substitution, { inputs.iter().map(|i| stx.subst_ty_ref(i)).collect() } fn subst_list(stx: &S, list: &ty::List) -> ty::List where S: Substitution, { ty::List::new( subst_ty_ref_slice(stx, list.fixed()), stx.subst_ty_ref(list.rest()), ) } fn subst_record_instance( stx: &S, instance: &record::Instance, ) -> record::Instance where S: Substitution, { let subst_pvar_purities = instance .ty_args() .pvar_purities() .iter() .map(|(pvar, purity_ref)| (pvar.clone(), stx.subst_purity_ref(purity_ref))) .collect(); let subst_tvar_types = instance .ty_args() .tvar_types() .iter() .map(|(tvar, ty_ref)| (tvar.clone(), stx.subst_ty_ref(ty_ref))) .collect(); record::Instance::new( instance.cons().clone(), TyArgs::new(subst_pvar_purities, subst_tvar_types), ) } fn subst_top_fun(stx: &S, top_fun: &ty::TopFun) -> ty::TopFun where S: Substitution, { let poly_stx = stx.as_poly_subst(); ty::TopFun::new( poly_stx.subst_purity_ref(top_fun.purity()), poly_stx.subst_ty_ref(top_fun.ret()), ) } fn subst_fun(stx: &S, fun: &ty::Fun) -> ty::Fun where S: Substitution, { let poly_stx = stx.as_poly_subst(); // TODO: This doesn't seem right ty::Fun::new( purity::PVars::new(), ty::TVars::new(), subst_top_fun(stx, fun.top_fun()), subst_list(poly_stx, fun.params()), ) } fn subst_ty(stx: &S, ty: &Ty) -> Ty where S: Substitution, { match ty { Ty::Any => Ty::Any, Ty::Bool => Ty::Bool, Ty::Char => Ty::Char, Ty::Float => Ty::Float, Ty::Int => Ty::Int, Ty::Num => Ty::Num, Ty::Str => Ty::Str, Ty::Sym => Ty::Sym, Ty::EqPred => Ty::EqPred, Ty::TopRecord => Ty::TopRecord, Ty::TyPred(test_ty) => Ty::TyPred(test_ty.clone()), Ty::TopFun(top_fun) => subst_top_fun(stx, top_fun).into(), Ty::Fun(fun) => subst_fun(stx, fun).into(), Ty::Map(map) => { ty::Map::new(stx.subst_ty_ref(map.key()), stx.subst_ty_ref(map.value())).into() } Ty::LitBool(val) => Ty::LitBool(*val), Ty::LitSym(val) => Ty::LitSym(val.clone()), Ty::Set(member) => Ty::Set(Box::new(stx.subst_ty_ref(member))), Ty::Union(members) => Ty::Union(subst_ty_ref_slice(stx, members)), Ty::Intersect(members) => Ty::Intersect(subst_ty_ref_slice(stx, members)), Ty::Vector(members) => Ty::Vector(subst_ty_ref_slice(stx, members)), Ty::Vectorof(member) => Ty::Vectorof(Box::new(stx.subst_ty_ref(member))), Ty::List(list) => subst_list(stx, list).into(), Ty::RecordClass(cons) => Ty::RecordClass(cons.clone()), Ty::Record(instance) => Ty::Record(Box::new(subst_record_instance(stx, instance))), } } trait Substitution { type InputPM: ty::Pm; type OutputPM: ty::Pm; type AsPolySubst: Substitution; fn subst_purity_ref(&self, poly: &purity::Ref) -> purity::Ref; fn subst_ty_ref(&self, input: &ty::Ref) -> ty::Ref; fn as_poly_subst(&self) -> &Self::AsPolySubst; } impl<'tvars> Substitution for TyArgs { type InputPM = ty::Poly; type OutputPM = ty::Poly; type AsPolySubst = Self; fn subst_purity_ref(&self, poly: &purity::Ref) -> purity::Ref { match poly { purity::Ref::Fixed(_) => poly.clone(), purity::Ref::Var(pvar) => { if let Some(selected) = self.pvar_purities().get(pvar) { selected.clone() } else { poly.clone() } } } } fn subst_ty_ref(&self, poly: &ty::Ref) -> ty::Ref { match poly { ty::Ref::Fixed(fixed) => subst_ty(self, fixed).into(), ty::Ref::Var(tvar, _) => { if let Some(selected) = self.tvar_types().get(tvar) { selected.clone() } else { poly.clone() } } } } fn as_poly_subst(&self) -> &Self { self } } struct PolyIdentity {} impl Substitution for PolyIdentity { type InputPM = ty::Poly; type OutputPM = ty::Poly; type AsPolySubst = Self; fn subst_purity_ref(&self, poly: &purity::Ref) -> purity::Ref { poly.clone() } fn subst_ty_ref(&self, poly: &ty::Ref) -> ty::Ref { poly.clone() } fn as_poly_subst(&self) -> &Self { self } } struct MonoToPoly { poly_identity: PolyIdentity, } impl MonoToPoly { fn new() -> MonoToPoly { MonoToPoly { poly_identity: PolyIdentity {}, } } } impl Substitution for MonoToPoly { type InputPM = ty::Mono; type OutputPM = ty::Poly; type AsPolySubst = PolyIdentity; fn subst_purity_ref(&self, poly: &purity::Ref) -> purity::Ref { poly.clone() } fn subst_ty_ref(&self, mono: &ty::Ref) -> ty::Ref { subst_ty(self, mono.as_ty()).into() } fn as_poly_subst(&self) -> &PolyIdentity { &self.poly_identity } } struct Monomorphise<'tyargs> { mono_ty_args: &'tyargs TyArgs, partial: PartialMonomorphise<'tyargs>, } impl<'tyargs> Monomorphise<'tyargs> { fn new(mta: &'tyargs TyArgs) -> Monomorphise<'tyargs> { Monomorphise { mono_ty_args: mta, partial: PartialMonomorphise { mono_ty_args: mta }, } } } impl<'tyargs> Substitution for Monomorphise<'tyargs> { type InputPM = ty::Poly; type OutputPM = ty::Mono; type AsPolySubst = PartialMonomorphise<'tyargs>; fn subst_purity_ref(&self, poly: &purity::Ref) -> purity::Ref { match poly { purity::Ref::Fixed(_) => poly.clone(), purity::Ref::Var(pvar) => self .mono_ty_args .pvar_purities() .get(pvar) .unwrap_or_else(|| { panic!( "unable to find purity argument `{}` during monomorphisation", pvar.source_name() ) }) .clone(), } } fn subst_ty_ref(&self, poly: &ty::Ref) -> ty::Ref { match poly { ty::Ref::Fixed(fixed) => subst_ty(self, fixed).into(), ty::Ref::Var(tvar, _) => self .mono_ty_args .tvar_types() .get(tvar) .unwrap_or_else(|| { panic!( "unable to find type argument `{}` during monomorphisation", tvar.source_name() ) }) .clone(), } } fn as_poly_subst(&self) -> &PartialMonomorphise<'tyargs> { &self.partial } } struct PartialMonomorphise<'tyargs> { mono_ty_args: &'tyargs TyArgs, } impl<'tyargs> Substitution for PartialMonomorphise<'tyargs> { type InputPM = ty::Poly; type OutputPM = ty::Poly; type AsPolySubst = Self; fn subst_purity_ref(&self, poly: &purity::Ref) -> purity::Ref { match poly { purity::Ref::Fixed(_) => poly.clone(), purity::Ref::Var(pvar) => { if let Some(purity) = self.mono_ty_args.pvar_purities().get(pvar) { purity.clone() } else { poly.clone() } } } } fn subst_ty_ref(&self, poly: &ty::Ref) -> ty::Ref { match poly { ty::Ref::Fixed(fixed) => subst_ty(self, fixed).into(), ty::Ref::Var(tvar, _) => { if let Some(mono) = self.mono_ty_args.tvar_types().get(tvar) { subst_ty(&MonoToPoly::new(), mono.as_ty()).into() } else { poly.clone() } } } } fn as_poly_subst(&self) -> &Self { self } } pub fn subst_poly(pta: &TyArgs, poly: &ty::Ref) -> ty::Ref { pta.subst_ty_ref(poly) } pub fn subst_poly_fun(pta: &TyArgs, fun: &ty::Fun) -> ty::Fun { subst_fun(pta, fun) } pub fn subst_purity(pta: &TyArgs, purity: &purity::Ref) -> purity::Ref { pta.subst_purity_ref(purity) } pub fn monomorphise(mta: &TyArgs, poly: &ty::Ref) -> ty::Ref { let stx = Monomorphise::new(mta); stx.subst_ty_ref(poly) } pub fn monomorphise_purity(mta: &TyArgs, poly: &purity::Ref) -> purity::Ref { let stx = Monomorphise::new(mta); stx.subst_purity_ref(poly) } pub fn monomorphise_list(mta: &TyArgs, poly: &ty::List) -> ty::List { let stx = Monomorphise::new(mta); subst_list(&stx, poly) } ================================================ FILE: compiler/ty/subtract.rs ================================================ use crate::ty; use crate::ty::Ty; fn subtract_ref_iters<'a, I, M>(minuend_iter: I, subtrahend_ref: &ty::Ref) -> ty::Ref where I: Iterator>, M: ty::Pm + 'a, { ty::unify::unify_ty_ref_iter( minuend_iter.map(|minuend_ref| subtract_ty_refs(minuend_ref, subtrahend_ref)), ) } fn subtract_tys( minuend_ty: &Ty, subtrahend_ref: &ty::Ref, subtrahend_ty: &Ty, ) -> ty::Ref { match (minuend_ty, subtrahend_ty) { (Ty::Bool, _) => subtract_ref_iters( [ Ty::LitBool(false).into(), Ty::LitBool(true).into(), ] .iter(), subtrahend_ref, ), (Ty::Num, _) => subtract_ref_iters( [ Ty::Int.into(), Ty::Float.into(), ] .iter(), subtrahend_ref, ), (Ty::Union(members), _) => subtract_ref_iters(members.iter(), subtrahend_ref), (Ty::List(minuend_list), Ty::List(subtrahend_list)) // Make sure this is even useful or else we can recurse splitting list types // indefinitely if !subtrahend_list.has_rest() && minuend_list.fixed().len() == subtrahend_list.fixed().len() => { // This is required for `(nil?)` to work correctly let minued_rest = minuend_list.rest(); if !minued_rest.is_never() { // This is the list type if we have no rest elements let terminated_list = ty::List::new_tuple(minuend_list.fixed().to_vec().into_boxed_slice()); // This is the list type if we have at least one rest element let mut continued_fixed = minuend_list.fixed().to_vec(); continued_fixed.push(minued_rest.clone()); let continued_list = ty::List::new(continued_fixed.into_boxed_slice(), minued_rest.clone()); subtract_ref_iters( [ terminated_list.into(), continued_list.into(), ].iter(), subtrahend_ref, ) } else { minuend_ty.clone().into() } }, _ => minuend_ty.clone().into(), } } pub fn subtract_ty_refs( minuend_ref: &ty::Ref, subtrahend_ref: &ty::Ref, ) -> ty::Ref { use crate::ty::intersect; if ty::is_a::ty_ref_is_a(minuend_ref, subtrahend_ref) { // No type remains Ty::Union(Box::new([])).into() } else { match (minuend_ref, subtrahend_ref) { (ty::Ref::Fixed(minuend_ty), ty::Ref::Fixed(subtrahend_ty)) => { // We can subtract directly subtract_tys(minuend_ty, subtrahend_ref, subtrahend_ty) } (ty::Ref::Var(_, _), ty::Ref::Fixed(subtrahend_ty)) => { // We can refine the bound using an intersection type let minuend_bound_ty = minuend_ref.resolve_to_ty(); let refined_bound_poly = subtract_tys(minuend_bound_ty, subtrahend_ref, subtrahend_ty); intersect::intersect_ty_refs(minuend_ref, &refined_bound_poly) .unwrap_or_else(|_| minuend_ref.clone()) } _ => minuend_ref.clone(), } } } #[cfg(test)] mod test { use super::*; use std::collections::HashMap; use crate::hir::{poly_for_str, tvar_bounded_by}; use crate::source::EMPTY_SPAN; use crate::ty::record; use crate::ty::ty_args::TyArgs; use crate::ty::var_usage::Variance; fn assert_subtraction(expected_str: &str, minuend_str: &str, subrahend_str: &str) { let expected_poly = poly_for_str(expected_str); let minuend_poly = poly_for_str(minuend_str); let subtrahend_poly = poly_for_str(subrahend_str); let actual_poly = subtract_ty_refs(&minuend_poly, &subtrahend_poly); assert_eq!(expected_poly, actual_poly); } #[test] fn trivial_subtraction() { assert_subtraction("Int", "Int", "Float"); assert_subtraction("(RawU)", "Int", "Int"); } #[test] fn bool_subtraction() { assert_subtraction("true", "Bool", "false"); assert_subtraction("false", "Bool", "true"); } #[test] fn num_subtraction() { assert_subtraction("Float", "Num", "Int"); assert_subtraction("Int", "Num", "Float"); } #[test] fn union_subtraction() { assert_subtraction("Sym", "(RawU Sym Str)", "Str"); assert_subtraction( "(RawU 'foo 'bar)", "(RawU 'foo 'bar 'baz 'foobar)", "(RawU 'baz 'foobar)", ); } #[test] fn list_subtraction() { assert_subtraction("(List Int & Int)", "(List & Int)", "()"); } #[test] fn poly_substraction() { let ptype1_unbounded = tvar_bounded_by(Ty::Any.into()); let ptype2_sym = tvar_bounded_by(Ty::Sym.into()); let ptype3_num = tvar_bounded_by(Ty::Num.into()); let any_float = poly_for_str("Float"); let any_int = poly_for_str("Int"); let foo_sym = poly_for_str("'foo"); // PType1 - 'foo = PType1 assert_eq!( ptype1_unbounded, subtract_ty_refs(&ptype1_unbounded, &foo_sym) ); // [PType2 Sym] - 'foo = PType1 assert_eq!(ptype2_sym, subtract_ty_refs(&ptype2_sym, &foo_sym)); // [PType3 Num] - Float = (∩ PType3 Int) let ptype3_int_intersect: ty::Ref = Ty::Intersect(Box::new([ptype3_num.clone(), any_int])).into(); assert_eq!( ptype3_int_intersect, subtract_ty_refs(&ptype3_num, &any_float) ); } #[test] fn poly_record_type() { let tvar = ty::TVar::new(EMPTY_SPAN, "tvar".into(), Ty::Any.into()); // Polymorphic record constructor and top type let poly_record_cons = record::Cons::new( EMPTY_SPAN, "record_cons".into(), "record_cons?".into(), Some(Box::new([record::PolyParam::TVar( Variance::Covariant, tvar.clone(), )])), Box::new([record::Field::new( EMPTY_SPAN, "num".into(), tvar.clone().into(), )]), ); let record_class_ref: ty::Ref = poly_record_cons.clone().into(); // Instance parameterised with an `Int` let mut int_tvars = HashMap::new(); int_tvars.insert(tvar, Ty::Int.into()); let int_ty_args = TyArgs::new(HashMap::new(), int_tvars); let int_instance_ref: ty::Ref = record::Instance::new(poly_record_cons, int_ty_args).into(); // Record class minus an instance is the record class assert_eq!( record_class_ref, subtract_ty_refs(&record_class_ref, &int_instance_ref) ); // Instance minus the record class is nothing let never_ref: ty::Ref = Ty::never().into(); assert_eq!( never_ref, subtract_ty_refs(&int_instance_ref, &record_class_ref) ); } } ================================================ FILE: compiler/ty/ty_args.rs ================================================ use std::collections::HashMap; use crate::ty; use crate::ty::purity; use crate::ty::purity::Purity; /// Type arguments to a polymorphic function or substitution #[derive(PartialEq, Clone, Debug)] pub struct TyArgs { pvar_purities: HashMap, tvar_types: HashMap>, } impl TyArgs { pub fn new( pvar_purities: HashMap, tvar_types: HashMap>, ) -> Self { Self { pvar_purities, tvar_types, } } pub fn empty() -> Self { Self { pvar_purities: HashMap::new(), tvar_types: HashMap::new(), } } pub fn pvar_purities(&self) -> &HashMap { &self.pvar_purities } pub fn tvar_types(&self) -> &HashMap> { &self.tvar_types } } impl TyArgs { /// Returns the args for the passed pvars/tvars where all args are set to their upper bound pub fn from_upper_bound(pvars: &[purity::PVarId], tvars: &[ty::TVarId]) -> Self { let pvar_purities = pvars .iter() .map(|pvar| (pvar.clone(), Purity::Impure.into())) .collect(); let tvar_types = tvars .iter() .map(|tvar| (tvar.clone(), tvar.bound.clone())) .collect(); Self { pvar_purities, tvar_types, } } } ================================================ FILE: compiler/ty/unify.rs ================================================ //! Builds new types by unifying zero or more input types //! //! Every type can be distinguished from every other type at runtime. This would allow our most //! naive implementation to simply detect any duplicate or subtypes and remove them. //! //! However, while every type can be tested at runtime some type checks are very expensive. A //! pathological case would be testing if a long `(List & Any)` is a `(List & Int)`. We need to //! allow these checks for completeness but they should be discouraged. To that end, any types //! that would be expensive to distinguish at runtime are merged by this code. This ensures in the //! general case it should be quick to test for individual members of a union. use std::cmp; use std::iter; use crate::ty; use crate::ty::purity; use crate::ty::purity::Purity; use crate::ty::record; use crate::ty::ty_args::TyArgs; use crate::ty::var_usage::Variance; use crate::ty::Ty; #[derive(Debug, PartialEq)] enum UnifiedTy { /// The types are distinct and have no clean simplification /// /// An example would be Str and Sym. Discerned, /// The types can be simplified in to a single non-union type /// /// A trivial example would be Sym and 'foo because of their subtype relationship. More complex /// per-type logic exists, especially surrounding sequences. Merged(ty::Ref), } #[derive(Debug, PartialEq)] pub enum UnifiedList { Discerned, Merged(ty::List), } fn unify_ty_refs(ref1: &ty::Ref, ref2: &ty::Ref) -> UnifiedTy { if let (ty::Ref::Fixed(ty1), ty::Ref::Fixed(ty2)) = (&ref1, &ref2) { // We can invoke full simplification logic if we have fixed types unify_ty(ref1, ty1, ref2, ty2) } else if ty::is_a::ty_ref_is_a(ref1, ref2) { UnifiedTy::Merged(ref2.clone()) } else if ty::is_a::ty_ref_is_a(ref2, ref1) { UnifiedTy::Merged(ref1.clone()) } else { // Leave these separate UnifiedTy::Discerned } } fn try_list_to_exact_pair(list: &ty::List) -> Option<&ty::Ref> { if list.fixed.len() == 1 && &list.fixed[0] == list.rest.as_ref() { Some(list.rest.as_ref()) } else { None } } /// Unifies a member in to an existing vector of members /// /// It is assumed `output_members` refers to members of an already unified union. fn union_push(output_members: &mut Vec>, new_member: ty::Ref) { for i in 0..output_members.len() { match unify_ty_refs(&output_members[i], &new_member) { UnifiedTy::Merged(merged_member) => { // Our merged type may now unify with one of the already processed members of the // union. Remove the member we merged with and recurse using the merged member. output_members.swap_remove(i); return union_push(output_members, merged_member); } UnifiedTy::Discerned => {} } } output_members.push(new_member); } /// Extends an existing union with new members /// /// `existing_members` are assumed to be the members of an existing union. If there is no existing /// union this must be empty. This is an optimisation to avoid processing the already unified /// members. fn union_extend(existing_members: Vec>, new_members: I) -> ty::Ref where M: ty::Pm, I: Iterator>, { let mut output_members = existing_members; for new_member in new_members { union_push(&mut output_members, new_member); } ty::Ref::from_vec(output_members) } fn unify_top_fun(top_fun1: &ty::TopFun, top_fun2: &ty::TopFun) -> UnifiedTy { let unified_purity = unify_purity_refs(top_fun1.purity(), top_fun2.purity()); let unified_ret = unify_to_ty_ref(top_fun1.ret(), top_fun2.ret()); UnifiedTy::Merged(ty::TopFun::new(unified_purity, unified_ret).into()) } fn unify_fun(fun1: &ty::Fun, fun2: &ty::Fun) -> UnifiedTy { let unified_purity = unify_purity_refs(fun1.purity(), fun2.purity()); if fun1.has_polymorphic_vars() || fun2.has_polymorphic_vars() { // TODO: We could do better here by finding our upper bound and unifying them // Preserving the polymorphicness would be very complex UnifiedTy::Merged(ty::TopFun::new(unified_purity, Ty::Any.into()).into()) } else { let unified_ret = unify_to_ty_ref(fun1.ret(), fun2.ret()); match ty::intersect::intersect_list(fun1.params(), fun2.params()) { Ok(unified_params) => UnifiedTy::Merged( ty::Fun::new_mono(unified_params, unified_purity, unified_ret).into(), ), Err(ty::intersect::Error::Disjoint) => { UnifiedTy::Merged(ty::TopFun::new(unified_purity, unified_ret).into()) } } } } fn unify_record_field_purities( variance: Variance, pvar: &purity::PVarId, ty_args1: &TyArgs, ty_args2: &TyArgs, ) -> purity::Ref { use crate::ty::intersect::intersect_purity_refs; use crate::ty::is_a::purity_refs_equivalent; let purity_ref1 = &ty_args1.pvar_purities()[pvar]; let purity_ref2 = &ty_args2.pvar_purities()[pvar]; match variance { Variance::Covariant => unify_purity_refs(purity_ref1, purity_ref2), Variance::Contravariant => intersect_purity_refs(purity_ref1, purity_ref2), Variance::Invariant => { if purity_refs_equivalent(purity_ref1, purity_ref2) { purity_ref1.clone() } else { Purity::Impure.into() } } } } fn unify_record_field_ty_refs( variance: Variance, tvar: &ty::TVarId, ty_args1: &TyArgs, ty_args2: &TyArgs, ) -> UnifiedTy { use crate::ty::intersect::intersect_ty_refs; use crate::ty::is_a::ty_refs_equivalent; let ty_ref1 = &ty_args1.tvar_types()[tvar]; let ty_ref2 = &ty_args2.tvar_types()[tvar]; match variance { Variance::Covariant => unify_ty_refs(ty_ref1, ty_ref2), Variance::Contravariant => match intersect_ty_refs(ty_ref1, ty_ref2) { Ok(intersected) => UnifiedTy::Merged(intersected), Err(_) => UnifiedTy::Discerned, }, Variance::Invariant => { if ty_refs_equivalent(ty_ref1, ty_ref2) { UnifiedTy::Merged(ty_ref1.clone()) } else { UnifiedTy::Discerned } } } } fn unify_record_instance( instance1: &record::Instance, instance2: &record::Instance, ) -> UnifiedTy { use crate::ty::record::PolyParam; use std::collections::HashMap; if instance1.cons() != instance2.cons() { return UnifiedTy::Discerned; } let mut merged_pvar_purities = HashMap::new(); let mut merged_tvar_types = HashMap::new(); for poly_param in instance1.cons().poly_params() { match poly_param { PolyParam::PVar(variance, pvar) => { merged_pvar_purities.insert( pvar.clone(), unify_record_field_purities( *variance, pvar, instance1.ty_args(), instance2.ty_args(), ), ); } PolyParam::TVar(variance, tvar) => { let unified_ty = unify_record_field_ty_refs( *variance, tvar, instance1.ty_args(), instance2.ty_args(), ); match unified_ty { UnifiedTy::Merged(merged) => { merged_tvar_types.insert(tvar.clone(), merged); } UnifiedTy::Discerned => return UnifiedTy::Discerned, } } PolyParam::Pure(_) | PolyParam::TFixed(_, _) => {} } } UnifiedTy::Merged( record::Instance::new( instance1.cons().clone(), TyArgs::new(merged_pvar_purities, merged_tvar_types), ) .into(), ) } fn unify_ty( ref1: &ty::Ref, ty1: &Ty, ref2: &ty::Ref, ty2: &Ty, ) -> UnifiedTy { if ty1 == ty2 { return UnifiedTy::Merged(ref1.clone()); } match (ty1, ty2) { // Handle supertype relationships (_, Ty::Any) | (Ty::Any, _) => UnifiedTy::Merged(Ty::Any.into()), (Ty::LitSym(_), Ty::Sym) | (Ty::Sym, Ty::LitSym(_)) => UnifiedTy::Merged(Ty::Sym.into()), (Ty::LitBool(_), Ty::Bool) | (Ty::Bool, Ty::LitBool(_)) => { UnifiedTy::Merged(Ty::Bool.into()) } (Ty::Float, Ty::Num) | (Ty::Num, Ty::Float) => UnifiedTy::Merged(Ty::Num.into()), (Ty::Int, Ty::Num) | (Ty::Num, Ty::Int) => UnifiedTy::Merged(Ty::Num.into()), // Simplify (U true false) => Bool (Ty::LitBool(true), Ty::LitBool(false)) | (Ty::LitBool(false), Ty::LitBool(true)) => { UnifiedTy::Merged(Ty::Bool.into()) } // Simplify (U Float Int) => Num (Ty::Float, Ty::Int) | (Ty::Int, Ty::Float) => UnifiedTy::Merged(Ty::Num.into()), // Set type (Ty::Set(ty_ref1), Ty::Set(ty_ref2)) => { let unified_ty_ref = unify_to_ty_ref(ty_ref1.as_ref(), ty_ref2.as_ref()); UnifiedTy::Merged(Ty::Set(Box::new(unified_ty_ref)).into()) } // Map type (Ty::Map(map1), Ty::Map(map2)) => { let unified_key_ref = unify_to_ty_ref(map1.key(), map2.key()); let unified_val_ref = unify_to_ty_ref(map1.value(), map2.value()); UnifiedTy::Merged(ty::Map::new(unified_key_ref, unified_val_ref).into()) } // Vector types (Ty::Vector(members1), Ty::Vector(members2)) => { if members1.len() != members2.len() { // We can quickly check vector lengths at runtime UnifiedTy::Discerned } else { let unified_members = members1 .iter() .zip(members2.iter()) .map(|(member1, member2)| unify_to_ty_ref(member1, member2)) .collect(); UnifiedTy::Merged(Ty::Vector(unified_members).into()) } } (Ty::Vectorof(member1), Ty::Vectorof(member2)) => UnifiedTy::Merged( Ty::Vectorof(Box::new(unify_to_ty_ref( member1.as_ref(), member2.as_ref(), ))) .into(), ), (Ty::Vector(members1), Ty::Vectorof(member2)) | (Ty::Vectorof(member2), Ty::Vector(members1)) => { let unified_member = union_extend(vec![member2.as_ref().clone()], members1.iter().cloned()); UnifiedTy::Merged(Ty::Vectorof(Box::new(unified_member)).into()) } // Function types (Ty::TopFun(top_fun1), Ty::TopFun(top_fun2)) => unify_top_fun(top_fun1, top_fun2), (Ty::Fun(fun), Ty::TopFun(top_fun)) | (Ty::TopFun(top_fun), Ty::Fun(fun)) => { unify_top_fun(fun.top_fun(), top_fun) } (Ty::TyPred(_) | Ty::EqPred, Ty::TopFun(top_fun)) | (Ty::TopFun(top_fun), Ty::TyPred(_) | Ty::EqPred) => { unify_top_fun(&ty::TopFun::new_for_pred(), top_fun) } (Ty::Fun(fun1), Ty::Fun(fun2)) => unify_fun(fun1, fun2), (Ty::TyPred(_), Ty::Fun(fun)) | (Ty::Fun(fun), Ty::TyPred(_)) => { unify_fun(&ty::Fun::new_for_ty_pred(), fun) } (Ty::EqPred, Ty::Fun(fun)) | (Ty::Fun(fun), Ty::EqPred) => { unify_fun(&ty::Fun::new_for_eq_pred(), fun) } (Ty::TyPred(_), Ty::TyPred(_)) => { UnifiedTy::Merged(Ty::Fun(Box::new(ty::Fun::new_for_ty_pred())).into()) } // Union types (Ty::Union(members1), Ty::Union(members2)) => { let new_union = union_extend(members1.to_vec(), members2.iter().cloned()); UnifiedTy::Merged(new_union) } (Ty::Union(members1), _) => { let new_union = union_extend(members1.to_vec(), iter::once(ref2).cloned()); UnifiedTy::Merged(new_union) } (_, Ty::Union(members2)) => { let new_union = union_extend(members2.to_vec(), iter::once(ref1).cloned()); UnifiedTy::Merged(new_union) } // List types (Ty::List(list1), Ty::List(list2)) => match unify_list(list1, list2) { UnifiedList::Discerned => UnifiedTy::Discerned, UnifiedList::Merged(merged_list) => UnifiedTy::Merged(merged_list.into()), }, // Record types (Ty::Record(instance1), Ty::Record(instance2)) => { unify_record_instance(instance1, instance2) } _ => UnifiedTy::Discerned, } } pub fn unify_purity_refs(purity1: &purity::Ref, purity2: &purity::Ref) -> purity::Ref { if purity1 == purity2 { return purity1.clone(); } match (purity1, purity2) { // Pure is the "empty type" so this is a no-op (purity::Ref::Fixed(Purity::Pure), other) | (other, purity::Ref::Fixed(Purity::Pure)) => { other.clone() } _ => { // Impure is the "top type" so this becomes impure Purity::Impure.into() } } } pub fn unify_to_ty_ref(ty_ref1: &ty::Ref, ty_ref2: &ty::Ref) -> ty::Ref { match unify_ty_refs(ty_ref1, ty_ref2) { UnifiedTy::Merged(ty_ref) => ty_ref, UnifiedTy::Discerned => Ty::Union(Box::new([ty_ref1.clone(), ty_ref2.clone()])).into(), } } /// Unifies an iterator of types in to a new type pub fn unify_ty_ref_iter(new_members: I) -> ty::Ref where M: ty::Pm, I: Iterator>, { union_extend(vec![], new_members) } pub fn unify_list(list1: &ty::List, list2: &ty::List) -> UnifiedList { if list1.is_empty() { if let Some(member) = try_list_to_exact_pair(list2) { return UnifiedList::Merged(ty::List::new_uniform(member.clone())); } } else if list2.is_empty() { if let Some(member) = try_list_to_exact_pair(list1) { return UnifiedList::Merged(ty::List::new_uniform(member.clone())); } } if list1.has_disjoint_arity(list2) { return UnifiedList::Discerned; } let mut fixed_iter1 = list1.fixed().iter(); let mut fixed_iter2 = list2.fixed().iter(); let mut merged_fixed: Vec> = Vec::with_capacity(cmp::min(fixed_iter1.len(), fixed_iter2.len())); while fixed_iter1.len() > 0 && fixed_iter2.len() > 0 { let fixed1 = fixed_iter1.next().unwrap(); let fixed2 = fixed_iter2.next().unwrap(); merged_fixed.push(unify_to_ty_ref(fixed1, fixed2)); } // Merge all remaining fixed and rest args together let rest_iter = fixed_iter1 .chain(fixed_iter2.chain(iter::once(list1.rest()).chain(iter::once(list2.rest())))); let merged_rest = unify_ty_ref_iter(rest_iter.cloned()); UnifiedList::Merged(ty::List::new(merged_fixed.into_boxed_slice(), merged_rest)) } #[cfg(test)] mod test { use super::*; use crate::hir::{poly_for_str, tvar_bounded_by}; use crate::source::EMPTY_SPAN; fn assert_discerned(ty_str1: &str, ty_str2: &str) { let poly1 = poly_for_str(ty_str1); let poly2 = poly_for_str(ty_str2); assert_eq!(UnifiedTy::Discerned, unify_ty_refs(&poly1, &poly2)); } fn assert_merged(expected_str: &str, ty_str1: &str, ty_str2: &str) { let expected = poly_for_str(expected_str); let poly1 = poly_for_str(ty_str1); let poly2 = poly_for_str(ty_str2); // This is the basic invariant we're testing - each of our input types satisfies the merged // type assert!(ty::is_a::ty_ref_is_a(&poly1, &expected)); assert!(ty::is_a::ty_ref_is_a(&poly2, &expected)); assert_eq!(UnifiedTy::Merged(expected), unify_ty_refs(&poly1, &poly2)); } fn assert_merged_iter(expected_str: &str, ty_strs: &[&str]) { let expected = poly_for_str(expected_str); let polys = ty_strs.iter().map(|&s| poly_for_str(s)); assert_eq!(expected, unify_ty_ref_iter(polys)); } #[test] fn disjoint_types() { assert_discerned("Str", "Sym"); } #[test] fn two_sym_types() { assert_discerned("'foo", "'bar"); } #[test] fn literal_sym_and_any_sym() { assert_merged("Sym", "Sym", "'foo"); } #[test] fn two_bool_types() { assert_merged("Bool", "true", "false"); } #[test] fn num_types() { assert_merged("Int", "Int", "Int"); assert_merged("Num", "Int", "Float"); assert_merged("Num", "Float", "Int"); } #[test] fn top_fun_types() { assert_merged("(... ->! Bool)", "(... ->! true)", "(... -> false)"); } #[test] fn fun_types() { // Parameters are contravariant and Float/Int are disjoint assert_merged("(... -> Num)", "(Float -> Int)", "(Int -> Float)"); assert_merged("(true -> Num)", "(Bool -> Int)", "(true -> Float)"); assert_merged("(->! Int)", "(-> Int)", "(->! Int)"); assert_merged("(->! Bool)", "(-> true)", "(->! false)"); assert_merged("(... ->! Bool)", "(... -> true)", "(->! false)"); } #[test] fn ty_pred_types() { assert_merged("str?", "str?", "str?"); assert_merged("(Any -> Bool)", "str?", "sym?"); assert_merged("(Int -> Any)", "(Int -> Any)", "sym?"); assert_merged("(... ->! Bool)", "(... ->! Bool)", "sym?"); } #[test] fn eq_pred_type() { assert_merged("=", "=", "="); assert_merged("(Int Int -> Any)", "(Int Int -> Any)", "="); assert_merged("(... ->! Bool)", "(... ->! Bool)", "="); } #[test] fn set_types() { assert_merged("(Setof Bool)", "(Setof true)", "(Setof false)"); assert_merged("(Setof (RawU Str Sym))", "(Setof Str)", "(Setof Sym)"); } #[test] fn map_types() { assert_merged( "(Map Bool (RawU 'bar 'foo))", "(Map true 'bar)", "(Map false 'foo)", ); } #[test] fn union_types() { assert_merged("(RawU 'foo 'bar 'baz)", "(RawU 'foo 'bar)", "'baz"); assert_merged("(RawU 'foo 'bar 'baz)", "'baz", "(RawU 'foo 'bar)"); assert_merged( "(RawU Bool (-> Int))", "(RawU true (-> Int))", "(RawU false (-> Int))", ); assert_merged( "(RawU Char Int Str Sym)", "(RawU Char Int)", "(RawU Str Sym)", ); assert_merged( "(RawU true (... -> Num))", "(RawU true (Int -> Float))", "(RawU true (Float -> Int))", ); assert_merged("(RawU 'foo 'bar Bool)", "(RawU 'foo 'bar)", "Bool"); assert_merged("Sym", "(RawU 'foo 'bar)", "Sym"); assert_merged("(RawU Int Sym)", "(RawU 'foo 'bar Int)", "Sym"); assert_merged("Sym", "(RawU)", "Sym"); assert_merged("(RawU)", "(RawU)", "(RawU)"); assert_merged( "(RawU Char Int Str Sym)", "(RawU Char Int)", "(RawU Str Sym)", ); } #[test] fn unify_iter() { assert_merged_iter("(RawU)", &[]); assert_merged_iter("Sym", &["Sym"]); assert_merged_iter("Bool", &["true", "false"]); assert_merged_iter( "(Setof (RawU Str Sym Int))", &["(Setof Str)", "(Setof Sym)", "(Setof Int)"], ); assert_merged_iter( "(... -> (RawU Sym Str))", &["(Str -> Sym)", "(RawU)", "(Sym -> Str)"], ); } #[test] fn list_types() { assert_merged("(List & Any)", "(List Any)", "(List & Any)"); assert_discerned("(List Any)", "(List Any Any)"); assert_merged("(List (RawU Sym Str))", "(List Sym)", "(List Str)"); assert_discerned("(List Str)", "(List Str Str & Str)"); assert_merged( "(List Int & (RawU Float Sym Str))", "(List Int & Sym)", "(List Int Float & Str)", ); assert_merged("(List & Int)", "(List Int & Int)", "(List)"); assert_merged("(List & Sym)", "(List)", "(List Sym & Sym)"); } #[test] fn vec_types() { assert_merged("(Vectorof Bool)", "(Vector true)", "(Vectorof false)"); assert_discerned("(Vector Int Sym)", "(Vector 'bar Int Str)"); } #[test] fn polymorphic_funs() { let pidentity_fun = poly_for_str("(All #{A} A -> A)"); let pidentity_impure_string_fun = poly_for_str("(All #{[A Str]} A ->! A)"); let top_impure_fun = poly_for_str("(... ->! Any)"); assert_eq!( UnifiedTy::Merged(pidentity_fun.clone()), unify_ty_refs(&pidentity_fun, &pidentity_fun) ); assert_eq!( UnifiedTy::Merged(top_impure_fun.clone()), unify_ty_refs(&pidentity_fun, &pidentity_impure_string_fun) ); assert_eq!( UnifiedTy::Merged(top_impure_fun.clone()), unify_ty_refs(&pidentity_fun, &top_impure_fun) ); } #[test] fn purity_refs() { let purity_pure = Purity::Pure.into(); let purity_impure = Purity::Impure.into(); let pvar1 = purity::PVar::new(EMPTY_SPAN, "test".into()); let purity_var1 = purity::Ref::Var(pvar1); let pvar2 = purity::PVar::new(EMPTY_SPAN, "test".into()); let purity_var2 = purity::Ref::Var(pvar2); assert_eq!(purity_pure, unify_purity_refs(&purity_pure, &purity_pure)); assert_eq!( purity_impure, unify_purity_refs(&purity_impure, &purity_impure) ); assert_eq!(purity_var1, unify_purity_refs(&purity_var1, &purity_var1)); assert_eq!( purity_impure, unify_purity_refs(&purity_pure, &purity_impure) ); assert_eq!(purity_var1, unify_purity_refs(&purity_pure, &purity_var1)); assert_eq!( purity_impure, unify_purity_refs(&purity_impure, &purity_var1) ); assert_eq!(purity_impure, unify_purity_refs(&purity_var1, &purity_var2)); } #[test] fn related_poly_bounds() { let ptype1_unbounded = tvar_bounded_by(Ty::Any.into()); let ptype2_bounded_by_1 = tvar_bounded_by(ptype1_unbounded.clone()); assert_eq!( UnifiedTy::Merged(ptype1_unbounded.clone()), unify_ty_refs(&ptype1_unbounded, &ptype1_unbounded) ); assert_eq!( UnifiedTy::Merged(ptype2_bounded_by_1.clone()), unify_ty_refs(&ptype2_bounded_by_1, &ptype2_bounded_by_1) ); assert_eq!( UnifiedTy::Merged(ptype1_unbounded.clone()), unify_ty_refs(&ptype2_bounded_by_1, &ptype1_unbounded) ); assert_eq!( UnifiedTy::Merged(ptype1_unbounded.clone()), unify_ty_refs(&ptype1_unbounded, &ptype2_bounded_by_1,) ); } #[test] fn record_instances() { use crate::ty::ty_args::TyArgs; use std::collections::HashMap; let tvar1 = ty::TVar::new(EMPTY_SPAN, "tvar1".into(), Ty::Any.into()); let tvar2 = ty::TVar::new(EMPTY_SPAN, "tvar2".into(), Ty::Any.into()); let cons1 = record::Cons::new( EMPTY_SPAN, "cons1".into(), "cons1?".into(), Some(Box::new([record::PolyParam::TVar( Variance::Covariant, tvar1.clone(), )])), Box::new([record::Field::new( EMPTY_SPAN, "cons1-field1".into(), tvar1.clone().into(), )]), ); let cons2 = record::Cons::new( EMPTY_SPAN, "cons2".into(), "cons2?".into(), Some(Box::new([ record::PolyParam::TVar(Variance::Covariant, tvar1.clone()), record::PolyParam::TVar(Variance::Contravariant, tvar2.clone()), ])), Box::new([ record::Field::new(EMPTY_SPAN, "cons2-covariant".into(), tvar1.clone().into()), record::Field::new( EMPTY_SPAN, "cons2-contravariant".into(), tvar2.clone().into(), ), ]), ); let float_instance1_poly: ty::Ref = record::Instance::new( cons1, TyArgs::new( HashMap::new(), std::iter::once((tvar1.clone(), Ty::Float.into())).collect(), ), ) .into(); let float_bool_instance2_poly: ty::Ref = record::Instance::new( cons2.clone(), TyArgs::new( HashMap::new(), std::iter::once((tvar1.clone(), Ty::Float.into())) .chain(std::iter::once((tvar2.clone(), Ty::Bool.into()))) .collect(), ), ) .into(); let int_false_instance2_poly: ty::Ref = record::Instance::new( cons2.clone(), TyArgs::new( HashMap::new(), std::iter::once((tvar1.clone(), Ty::Int.into())) .chain(std::iter::once((tvar2.clone(), Ty::LitBool(false).into()))) .collect(), ), ) .into(); let num_false_instance2_poly: ty::Ref = record::Instance::new( cons2, TyArgs::new( HashMap::new(), std::iter::once((tvar1, Ty::Num.into())) .chain(std::iter::once((tvar2, Ty::LitBool(false).into()))) .collect(), ), ) .into(); // Different record constructors assert_eq!( UnifiedTy::Discerned, unify_ty_refs(&float_instance1_poly, &float_bool_instance2_poly) ); // Different instances of same constructor assert_eq!( UnifiedTy::Merged(num_false_instance2_poly), unify_ty_refs(&int_false_instance2_poly, &float_bool_instance2_poly) ); } } ================================================ FILE: compiler/ty/var_usage.rs ================================================ //! Calculates the usage of a polymorphic variable in type use std::collections::HashMap; use std::ops; use crate::ty; use crate::ty::purity; use crate::ty::record; use crate::ty::Ty; /// Indicates the variance of a polymorphic parameter /// /// By default variables are covariant. Whenever a variable appears inside a function's parameter /// list it becomes contravariant, flipping again for each nested function type. If a variable /// appears in both a covariant and contravariant position then it becomes invariant. #[derive(PartialEq, Debug, Clone, Copy)] pub enum Variance { /// Subtypes of the record have subtypes of this variable Covariant, /// Subtypes of the record have supertypes of this variable Contravariant, /// No subtype relationship exists between records with different types of this variable Invariant, } impl ops::Mul for Variance { type Output = Variance; /// Multiplies two variances /// /// This is used to calculate our new polarity when visiting a nested record type's polymorphic /// variables. /// /// When a contravariant relationship appears in a contravariant polarity it's actually /// covariant with respect to the root type. This makes contravariance analogous to a negative /// number for the purposes of multiplication. /// /// An invariant relationship is invariant in any polarity and any relationship is invariant in /// an invariant polarity. This makes invariance analogous to zero. fn mul(self, rhs: Variance) -> Variance { match (self, rhs) { (Variance::Invariant, _) | (_, Variance::Invariant) => Variance::Invariant, (Variance::Contravariant, Variance::Covariant) | (Variance::Covariant, Variance::Contravariant) => Variance::Contravariant, (Variance::Covariant, Variance::Covariant) | (Variance::Contravariant, Variance::Contravariant) => Variance::Covariant, } } } impl ops::BitAndAssign for Variance { /// Combines the variance from two different usages fn bitand_assign(&mut self, rhs: Variance) { if *self != rhs { *self = Variance::Invariant; } } } #[derive(Clone, Debug, Default)] pub struct VarUsages { pvar_variances: HashMap, tvar_variances: HashMap, } fn visit_list(var_usages: &mut VarUsages, polarity: Variance, list: &ty::List) { for member in list.fixed() { visit_poly(var_usages, polarity, member); } visit_poly(var_usages, polarity, list.rest()); } fn visit_top_fun(var_usages: &mut VarUsages, polarity: Variance, top_fun: &ty::TopFun) { visit_poly(var_usages, polarity, top_fun.ret()); } fn visit_ty(var_usages: &mut VarUsages, polarity: Variance, poly_ty: &Ty) { match poly_ty { Ty::Any | Ty::Bool | Ty::Char | Ty::LitBool(_) | Ty::Sym | Ty::LitSym(_) | Ty::Int | Ty::Float | Ty::Num | Ty::Str | Ty::TyPred(_) | Ty::EqPred | Ty::TopRecord | Ty::RecordClass(_) => { // Terminal type } Ty::List(list) => { visit_list(var_usages, polarity, list); } Ty::Map(map) => { visit_poly(var_usages, polarity, map.key()); visit_poly(var_usages, polarity, map.value()); } Ty::Set(member) | Ty::Vectorof(member) => { visit_poly(var_usages, polarity, member); } Ty::Union(members) | Ty::Intersect(members) | Ty::Vector(members) => { for member in members.iter() { visit_poly(var_usages, polarity, member); } } Ty::Fun(fun) => { visit_top_fun(var_usages, polarity, fun.top_fun()); visit_list(var_usages, polarity * Variance::Contravariant, fun.params()); } Ty::TopFun(top_fun) => { visit_top_fun(var_usages, polarity, top_fun); } Ty::Record(record_instance) => { let record_cons = record_instance.cons(); for poly_param in record_cons.poly_params() { match poly_param { record::PolyParam::PVar(variance, pvar) => { let purity_ref = &record_instance.ty_args().pvar_purities()[pvar]; visit_purity(var_usages, polarity * *variance, purity_ref); } record::PolyParam::TVar(variance, tvar) => { let poly_ref = &record_instance.ty_args().tvar_types()[tvar]; visit_poly(var_usages, polarity * *variance, poly_ref); } record::PolyParam::Pure(_) | record::PolyParam::TFixed(_, _) => {} } } } } } fn visit_purity(var_usages: &mut VarUsages, polarity: Variance, purity_ref: &purity::Ref) { match purity_ref { purity::Ref::Fixed(_) => {} purity::Ref::Var(pvar) => { var_usages .pvar_variances .entry(pvar.clone()) .and_modify(|existing_usage| *existing_usage &= polarity) .or_insert(polarity); } } } fn visit_poly(var_usages: &mut VarUsages, polarity: Variance, poly_ref: &ty::Ref) { match poly_ref { ty::Ref::Fixed(poly_ty) => { visit_ty(var_usages, polarity, poly_ty); } ty::Ref::Var(tvar, _) => { var_usages .tvar_variances .entry(tvar.clone()) .and_modify(|existing_usage| *existing_usage &= polarity) .or_insert(polarity); } } } impl VarUsages { pub fn new() -> Self { Self::default() } pub fn add_poly_usages(&mut self, poly_ref: &ty::Ref) { visit_poly(self, Variance::Covariant, poly_ref); } pub fn pvar_variance(&self, pvar: &purity::PVarId) -> Option { self.pvar_variances.get(pvar).copied() } pub fn tvar_variance(&self, tvar: &ty::TVarId) -> Option { self.tvar_variances.get(tvar).copied() } } #[cfg(test)] mod test { use super::*; use crate::source::EMPTY_SPAN; use crate::ty::purity::Purity; #[test] fn test_variance_mul() { assert_eq!( Variance::Covariant, Variance::Covariant * Variance::Covariant ); assert_eq!( Variance::Contravariant, Variance::Covariant * Variance::Contravariant ); assert_eq!( Variance::Invariant, Variance::Covariant * Variance::Invariant ); } #[test] fn convariant_usage() { let tvar = ty::TVar::new(EMPTY_SPAN, "tvar".into(), Ty::Any.into()); let mut var_usages = VarUsages::new(); var_usages.add_poly_usages(&tvar.clone().into()); assert_eq!(Some(Variance::Covariant), var_usages.tvar_variance(&tvar)); } #[test] fn contravariant_usage() { let tvar = ty::TVar::new(EMPTY_SPAN, "tvar".into(), Ty::Any.into()); let mut var_usages = VarUsages::new(); var_usages.add_poly_usages( &ty::Fun::new_mono( ty::List::new_uniform(tvar.clone().into()), Purity::Pure.into(), Ty::Any.into(), ) .into(), ); assert_eq!( Some(Variance::Contravariant), var_usages.tvar_variance(&tvar) ); } #[test] fn invariant_usage() { let tvar = ty::TVar::new(EMPTY_SPAN, "tvar".into(), Ty::Any.into()); let mut var_usages = VarUsages::new(); var_usages.add_poly_usages( &ty::Fun::new_mono( ty::List::new_uniform(tvar.clone().into()), Purity::Pure.into(), tvar.clone().into(), ) .into(), ); assert_eq!(Some(Variance::Invariant), var_usages.tvar_variance(&tvar)); } } ================================================ FILE: compiler/typeck/dce.rs ================================================ use crate::hir; use crate::ty; use crate::ty::purity::Purity; use crate::ty::Ty; /// Returns if an expression can have a side effect /// /// This is used for very basic dead code elimination during type checking. pub fn expr_can_side_effect(expr: &hir::Expr) -> bool { use hir::ExprKind; match &expr.kind { ExprKind::LocalRef(_, _) | ExprKind::ExportRef(_, _) | ExprKind::Lit(_) | ExprKind::EqPred(_) | ExprKind::TyPred(_, _) | ExprKind::RecordCons(_, _) | ExprKind::FieldAccessor(_) | ExprKind::Fun(_) | ExprKind::RustFun(_) => false, ExprKind::Do(exprs) => exprs.iter().any(expr_can_side_effect), ExprKind::MacroExpand(_, inner) => expr_can_side_effect(inner), ExprKind::Cond(cond) => { expr_can_side_effect(&cond.test_expr) || expr_can_side_effect(&cond.true_expr) || expr_can_side_effect(&cond.false_expr) } ExprKind::Let(let_expr) => { expr_can_side_effect(&let_expr.value_expr) || expr_can_side_effect(&let_expr.body_expr) } ExprKind::App(app) => { if let ty::Ref::Fixed(Ty::Fun(ref fun_type)) = app.fun_expr.result_ty { fun_type.top_fun().purity() != &Purity::Pure.into() || fun_type.ret().is_never() || app.fixed_arg_exprs.iter().any(expr_can_side_effect) || app.rest_arg_expr.iter().any(expr_can_side_effect) } else { true } } ExprKind::Recur(_) => { // We don't know if a `(recur)` is pure without knowing the function it appears in. // However, by definition `(recur)` always occurs in a position where its value becomes // the return value of a function. This means that in practice it can never be // eliminated anyway. true } } } #[cfg(test)] mod test { use super::*; use crate::source::EMPTY_SPAN; use crate::ty::ty_args::TyArgs; #[test] fn pure_app_expr() { let app_expr = hir::Expr:: { result_ty: ty::List::empty().into(), kind: hir::ExprKind::App(Box::new(hir::App { span: EMPTY_SPAN, fun_expr: hir::Expr { result_ty: ty::Fun::new_mono( ty::List::empty(), Purity::Pure.into(), ty::List::empty().into(), ) .into(), kind: hir::ExprKind::Do(vec![]), }, ty_args: TyArgs::empty(), fixed_arg_exprs: vec![], rest_arg_expr: None, })), }; assert!(!expr_can_side_effect(&app_expr)); } #[test] fn impure_app_expr() { let app_expr = hir::Expr:: { result_ty: ty::List::empty().into(), kind: hir::ExprKind::App(Box::new(hir::App { span: EMPTY_SPAN, fun_expr: hir::Expr { result_ty: ty::Fun::new_mono( ty::List::empty(), Purity::Impure.into(), ty::List::empty().into(), ) .into(), kind: hir::ExprKind::Do(vec![]), }, ty_args: TyArgs::empty(), fixed_arg_exprs: vec![], rest_arg_expr: None, })), }; assert!(expr_can_side_effect(&app_expr)); } } ================================================ FILE: compiler/typeck/destruc.rs ================================================ use crate::hir; use crate::hir::destruc; use crate::ty; use crate::ty::list_iter::ListIterator; use crate::ty::Ty; pub fn type_for_decl_list_destruc( list: &destruc::List, mut guide_type_iter: Option>, ) -> ty::List { let fixed_polys = list .fixed() .iter() .map(|fixed_destruc| { let guide_type = if let Some(guide_type_iter) = guide_type_iter.as_mut() { guide_type_iter.next() } else { None }; type_for_decl_destruc(fixed_destruc, guide_type) }) .collect(); let rest_poly = match list.rest() { Some(rest) => match rest.ty() { hir::DeclTy::Known(poly) => poly.clone(), hir::DeclTy::Free => guide_type_iter .map(ListIterator::collect_rest) .unwrap_or_else(|| Ty::Any.into()), }, None => Ty::never().into(), }; ty::List::new(fixed_polys, rest_poly) } /// Returns the required type for a destruc pub fn type_for_decl_destruc( destruc: &destruc::Destruc, guide_type: Option<&ty::Ref>, ) -> ty::Ref { match destruc { destruc::Destruc::Scalar(_, scalar) => match scalar.ty() { hir::DeclTy::Known(poly) => poly.clone(), hir::DeclTy::Free => guide_type.cloned().unwrap_or_else(|| Ty::Any.into()), }, destruc::Destruc::List(_, list) => { let guide_type_iter = guide_type.and_then(|guide_type| ListIterator::try_new_from_ty_ref(guide_type)); type_for_decl_list_destruc(list, guide_type_iter).into() } } } fn visit_scalar_locals(scalar: &destruc::Scalar, visitor: &mut F) where F: FnMut(hir::LocalId, &hir::DeclTy), { if let Some(local_id) = scalar.local_id() { visitor(*local_id, scalar.ty()); } } /// Visits the local variables in the passed destruc with the given visitor function /// /// If the root destruc is scalar its VarId will be returned, otherwise None pub fn visit_locals( destruc: &destruc::Destruc, visitor: &mut F, ) -> Option where F: FnMut(hir::LocalId, &hir::DeclTy), { match destruc { destruc::Destruc::Scalar(_, ref scalar) => { visit_scalar_locals(scalar, visitor); *scalar.local_id() } destruc::Destruc::List(_, ref list) => { for fixed in list.fixed() { visit_locals(fixed, visitor); } if let Some(rest) = list.rest() { visit_scalar_locals(rest, visitor); } None } } } ================================================ FILE: compiler/typeck/error.rs ================================================ use std::fmt::Display; use std::{error, fmt}; use codespan_reporting::diagnostic::Diagnostic; use arret_syntax::span::{FileId, Span}; use crate::hir; use crate::reporting::{new_primary_label, new_secondary_label, LocTrace}; use crate::ty; use crate::ty::purity; #[derive(PartialEq, Debug, Copy, Clone)] pub struct WantedArity { fixed_len: usize, has_rest: bool, } impl WantedArity { pub fn new(fixed_len: usize, has_rest: bool) -> WantedArity { WantedArity { fixed_len, has_rest, } } } impl fmt::Display for WantedArity { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { if self.has_rest { write!(f, "at least {}", self.fixed_len) } else { write!(f, "{}", self.fixed_len) } } } #[derive(PartialEq, Debug, Clone)] pub struct IsNotRetTy { value_poly: ty::Ref, ret_poly: ty::Ref, ret_ty_span: Option, } impl IsNotRetTy { pub fn new( value_poly: ty::Ref, ret_poly: ty::Ref, ret_ty_span: Option, ) -> IsNotRetTy { IsNotRetTy { value_poly, ret_poly, ret_ty_span, } } } #[derive(PartialEq, Debug, Clone)] pub enum ErrorKind { IsNotTy(ty::Ref, ty::Ref), IsNotFun(ty::Ref), IsNotPurity(ty::Ref, purity::Ref), IsNotRetTy(IsNotRetTy), VarHasEmptyType(ty::Ref, ty::Ref), TopFunApply(ty::Ref), RecursiveType, RecurWithoutFunTypeDecl, NonTailRecur, DependsOnError, WrongArity(usize, WantedArity), UnselectedPVar(purity::PVarId), UnselectedTVar(ty::TVarId), } #[derive(PartialEq, Debug, Clone)] pub struct Error { loc_trace: LocTrace, kind: ErrorKind, } impl Error { pub fn new(span: Span, kind: ErrorKind) -> Error { Self::new_with_loc_trace(span.into(), kind) } pub fn new_with_loc_trace(loc_trace: LocTrace, kind: ErrorKind) -> Error { Error { loc_trace, kind } } pub fn kind(&self) -> &ErrorKind { &self.kind } pub fn with_macro_invocation_span(self, span: Span) -> Error { Error { loc_trace: self.loc_trace.with_macro_invocation(span), ..self } } } impl From for Diagnostic { fn from(error: Error) -> Self { let origin = error.loc_trace.origin(); let diagnostic = match error.kind() { ErrorKind::IsNotFun(ref sub) => Diagnostic::error().with_message(format!( "expected function, found `{}`", hir::str_for_ty_ref(sub) )).with_labels(vec![new_primary_label(origin, "application requires function")]), ErrorKind::IsNotTy(ref sub, ref parent) => Diagnostic::error() .with_message("mismatched types") .with_labels(vec![new_primary_label(origin, format!( "`{}` is not a `{}`", hir::str_for_ty_ref(sub), hir::str_for_ty_ref(parent) ))]), ErrorKind::IsNotPurity(ref fun, ref purity) => { use crate::ty::purity::Purity; let purity_str = if purity == &Purity::Pure.into() { // `->` might be confusing here "pure".into() } else { format!("`{}`", hir::str_for_purity(purity)) }; Diagnostic::error() .with_message("mismatched purities") .with_labels(vec![new_primary_label(origin, format!( "function of type `{}` is not {}", hir::str_for_ty_ref(fun), purity_str ))] ) } ErrorKind::IsNotRetTy(IsNotRetTy { value_poly, ret_poly, ret_ty_span, }) => { let ret_poly_str = hir::str_for_ty_ref(ret_poly); let diagnostic = Diagnostic::error() .with_message("mismatched types"); let primary_label = new_primary_label(origin, format!( "`{}` is not a `{}`", hir::str_for_ty_ref(value_poly), ret_poly_str )); if let Some(ret_ty_span) = ret_ty_span { let secondary_label = new_secondary_label(*ret_ty_span,format!( "expected `{}` due to return type", ret_poly_str )); diagnostic.with_labels(vec![primary_label, secondary_label]) } else { diagnostic.with_labels(vec![primary_label]) } } ErrorKind::VarHasEmptyType(ref current_type, ref required_type) => { Diagnostic::error() .with_message("type annotation needed") .with_labels(vec![ new_primary_label(origin,format!( "usage requires `{}` but variable has inferred type of `{}`", hir::str_for_ty_ref(required_type), hir::str_for_ty_ref(current_type) ))] ) } ErrorKind::TopFunApply(ref top_fun) => Diagnostic::error() .with_message(format!( "cannot determine parameter types for `{}`", hir::str_for_ty_ref(top_fun) )) .with_labels(vec![ new_primary_label(origin,"at this application") ]), ErrorKind::WrongArity(have, ref wanted) => { let label_message = if wanted.fixed_len == 1 { format!("expected {} argument", wanted) } else { format!("expected {} arguments", wanted) }; Diagnostic::error() .with_message(format!( "incorrect number of arguments: wanted {}, have {}", wanted, have )) .with_labels(vec![new_primary_label(origin, label_message)]) } ErrorKind::RecursiveType => Diagnostic::error() .with_message("type annotation needed") .with_labels(vec![ new_primary_label(origin, "recursive usage requires explicit type annotation") ]), ErrorKind::RecurWithoutFunTypeDecl => Diagnostic::error() .with_message("type annotation needed") .with_labels(vec![ new_primary_label(origin, "`(recur)` requires the function to have a complete type annotation"), ]), ErrorKind::NonTailRecur => Diagnostic::error() .with_message("non-tail `(recur)`") .with_labels(vec![ new_primary_label(origin, "`(recur)` must occur in a position where it immediately becomes the return value of a function"), ]), ErrorKind::DependsOnError => Diagnostic::error() .with_message("type cannot be determined due to previous error") .with_labels(vec![new_primary_label(origin, "cannot infer type")]), ErrorKind::UnselectedPVar(pvar) => Diagnostic::error() .with_message(format!( "cannot determine purity of purity variable `{}`", pvar.source_name() )) .with_labels(vec![ new_primary_label(origin,"at this application"), new_secondary_label(pvar.span(),"purity variable defined here"), ]), ErrorKind::UnselectedTVar(tvar) => Diagnostic::error().with_message(format!( "cannot determine type of type variable `{}`", tvar.source_name() )).with_labels(vec![ new_primary_label(origin,"at this application"), new_secondary_label(tvar.span(), "type variable defined here") ]) }; error.loc_trace.label_macro_invocation(diagnostic) } } impl From for Vec> { fn from(error: Error) -> Self { vec![error.into()] } } impl error::Error for Error {} impl Display for Error { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let diagnostic: Diagnostic = self.clone().into(); f.write_str(&diagnostic.message) } } ================================================ FILE: compiler/typeck/infer.rs ================================================ use std::collections::HashMap; use std::result; use std::sync::Arc; use crate::context::ModuleId; use crate::hir; use crate::hir::destruc; use crate::rfi; use crate::ty; use crate::ty::list_iter::ListIterator; use crate::ty::purity; use crate::ty::purity::Purity; use crate::ty::record; use crate::ty::ty_args::TyArgs; use crate::ty::Ty; use crate::typeck; use crate::typeck::dce::expr_can_side_effect; use crate::typeck::error::{Error, ErrorKind, IsNotRetTy, WantedArity}; use arret_syntax::datum::Datum; use arret_syntax::span::Span; type Result = result::Result; /// Result of inferring the type for a HIR expression pub struct InferredNode { /// Expression with all free types replaced with poly types expr: hir::Expr, /// Type conditions depending the poly type of this node type_conds: Vec, } impl InferredNode { fn is_divergent(&self) -> bool { self.expr.result_ty.is_never() } pub fn into_expr(self) -> hir::Expr { self.expr } pub fn result_ty(&self) -> &ty::Ref { &self.expr.result_ty } } #[derive(Clone, Copy, PartialEq)] enum NodeBool { True, False, } struct VarTypeCond { when: NodeBool, override_local_id: hir::LocalId, override_type: ty::Ref, } impl VarTypeCond { fn with_when(self, when: NodeBool) -> VarTypeCond { VarTypeCond { when, ..self } } fn into_inverted(self) -> VarTypeCond { VarTypeCond { when: if self.when == NodeBool::True { NodeBool::False } else { NodeBool::True }, ..self } } } new_indexing_id_type!(FreeTyId, u32); new_indexing_id_type!(InputDefId, u32); /// Partially inferred function application /// /// The function has been inferred while the arguments have not struct FunApp { fun_expr: hir::Expr, fixed_arg_exprs: Vec>, rest_arg_expr: Option>, } enum VarType { // Introduced a definition that has yet to be processed Pending(InputDefId), // (def) currently having its type inferred Recursive, /// Type depends on a value that failed to type check Error, /// Scalar value being inferred ParamScalar(FreeTyId), /// Rest list being inferred /// /// The referenced free type is the member type of the uniform rest list. ParamRest(FreeTyId), // Declared or previously inferred type Known(ty::Ref), } #[derive(Clone)] enum PurityVar { Free(purity::Ref), Known(purity::Ref), } #[derive(Clone)] struct RetExprResultUse<'a> { /// Span of the declared return type if any ret_type_span: Option, ret_type: &'a ty::Ref, known_self_type: Option<&'a ty::Fun>, } /// Describes the use of an expression's result value #[derive(Clone)] enum ResultUse<'a> { /// Non-return expression with a used value InnerExpr(&'a ty::Ref), /// Expression used as the return value of a function RetExpr(RetExprResultUse<'a>), /// Expression with an unused value /// /// The most common example is a non-terminal expression in a `(do)`. Unused(&'a ty::Ref), } impl<'a> ResultUse<'a> { /// Returns the required type for an expression's result value fn required_type(&self) -> &'a ty::Ref { match self { ResultUse::InnerExpr(required_type) => required_type, ResultUse::RetExpr(RetExprResultUse { ret_type, .. }) => ret_type, ResultUse::Unused(required_type) => required_type, } } /// Returns `true` if the expression's result value is used fn is_used(&self) -> bool { !matches!(self, ResultUse::Unused(_)) } } impl PurityVar { fn into_poly(self) -> purity::Ref { match self { PurityVar::Free(poly) => poly, PurityVar::Known(poly) => poly, } } } enum InputDef { Pending(hir::Def), Complete, } pub type InferredLocals = HashMap>; pub type InferredModuleVars = HashMap>; pub struct InferredModule { pub inferred_locals: InferredLocals, pub defs: Vec>, } struct RecursiveDefsCtx<'types> { input_defs: Vec, complete_defs: Vec>, // The inferred types for free types in the order they're encountered // // Each (def), (let) and (fn) push entries to `free_ty_polys` before they evaluate their body // and then pop them off afterwards. free_ty_polys: Vec>, self_locals: HashMap, imported_vars: &'types InferredModuleVars, } /// Tries to convert a polymorphic type to a literal boolean value fn try_to_bool(poly: &ty::Ref) -> Option { match poly { ty::Ref::Fixed(Ty::LitBool(v)) => Some(*v), _ => None, } } fn unify_app_purity(pv: &mut PurityVar, app_purity: &purity::Ref) { if let PurityVar::Free(ref mut free_purity) = pv { *free_purity = ty::unify::unify_purity_refs(free_purity, app_purity) }; } /// Inspects the mismatched sub and parent types to attempt to produce an understandable type error fn error_kind_for_type_error( value_poly: &ty::Ref, value_use: &ResultUse<'_>, ) -> ErrorKind { if let ty::Ref::Fixed(Ty::TopFun(top_fun)) = value_use.required_type() { let topmost_fun = ty::TopFun::new(Purity::Impure.into(), Ty::Any.into()).into(); let impure_top_fun = ty::TopFun::new(Purity::Impure.into(), top_fun.ret().clone()).into(); if !ty::is_a::ty_ref_is_a(value_poly, &topmost_fun) { // We aren't a function at all return ErrorKind::IsNotFun(value_poly.clone()); } else if ty::is_a::ty_ref_is_a(value_poly, &impure_top_fun) { // We have the right return type but the wrong purity return ErrorKind::IsNotPurity(value_poly.clone(), top_fun.purity().clone()); } } match value_use { ResultUse::Unused(required_type) | ResultUse::InnerExpr(required_type) => { ErrorKind::IsNotTy(value_poly.clone(), (*required_type).clone()) } ResultUse::RetExpr(RetExprResultUse { ret_type_span, ret_type, .. }) => ErrorKind::IsNotRetTy(IsNotRetTy::new( value_poly.clone(), (*ret_type).clone(), *ret_type_span, )), } } /// Ensures `value_poly` is appropriate for `value_use` fn ensure_is_a( span: Span, value_poly: &ty::Ref, value_use: &ResultUse<'_>, ) -> Result<()> { if !value_use.is_used() { // We don't throw type errors for unused values return Ok(()); } if ty::is_a::ty_ref_is_a(value_poly, value_use.required_type()) { return Ok(()); } let error_kind = error_kind_for_type_error(value_poly, value_use); Err(Error::new(span, error_kind)) } fn member_type_for_poly_list( span: Span, poly_type: &ty::Ref, ) -> Result> { if poly_type == &Ty::Any.into() { return Ok(Ty::Any.into()); } let list = poly_type .find_member(|t| { if let Ty::List(list) = t { Some(list) } else { None } }) .ok_or_else(|| { Error::new( span, ErrorKind::IsNotTy( poly_type.clone(), ty::List::new_uniform(Ty::Any.into()).into(), ), ) })?; Ok(ListIterator::new(list).collect_rest()) } /// Preserves expressions for their side effects /// /// `side_effect_exprs` are discarded if they can't cause side effects. `value_expr` will be used as /// the value of the returned expression. fn keep_exprs_for_side_effects( side_effect_exprs: impl IntoIterator>, value_expr: hir::Expr, ) -> hir::Expr { let mut needed_exprs: Vec<_> = side_effect_exprs .into_iter() .filter(expr_can_side_effect) .collect(); if needed_exprs.is_empty() { // We don't need any of the `side_effect_exprs` return value_expr; } let result_ty = value_expr.result_ty.clone(); needed_exprs.push(value_expr); hir::Expr { result_ty, kind: hir::ExprKind::Do(needed_exprs), } } impl<'types> RecursiveDefsCtx<'types> { fn new( imported_vars: &'types InferredModuleVars, defs: Vec>, ) -> RecursiveDefsCtx<'types> { let mut self_locals = HashMap::new(); // We do this in reverse order because we infer our defs in reverse order. This doesn't // matter for correctness. However, presumably most definitions have more dependencies // before them than after them. Visiting them in forward order should cause less // recursive resolution. let input_defs = defs .into_iter() .rev() .enumerate() .map(|(idx, hir_def)| { let def_id = InputDefId::new(idx); typeck::destruc::visit_locals(&hir_def.destruc, &mut |local_id, decl_type| { let var_type = match decl_type { hir::DeclTy::Known(poly_type) => VarType::Known(poly_type.clone()), hir::DeclTy::Free => { // Record the definition ID so we can deal with forward type references VarType::Pending(def_id) } }; self_locals.insert(local_id, var_type); }); InputDef::Pending(hir_def) }) .collect::>(); RecursiveDefsCtx { complete_defs: Vec::with_capacity(input_defs.len()), input_defs, free_ty_polys: vec![], self_locals, imported_vars, } } fn new_local_ref_node( &self, span: Span, local_id: hir::LocalId, poly_type: ty::Ref, ) -> InferredNode { // We can't override type conditions across modules let type_conds = if poly_type == Ty::Bool.into() { // This seems useless but it allows occurrence typing to work if this type // flows through another node such as `(do)` or `(let)` vec![ VarTypeCond { when: NodeBool::True, override_local_id: local_id, override_type: Ty::LitBool(true).into(), }, VarTypeCond { when: NodeBool::False, override_local_id: local_id, override_type: Ty::LitBool(false).into(), }, ] } else { vec![] }; InferredNode { expr: hir::Expr { result_ty: poly_type, kind: hir::ExprKind::LocalRef(span, local_id), }, type_conds, } } fn insert_free_ty(&mut self, initial_type: ty::Ref) -> FreeTyId { FreeTyId::new_entry_id(&mut self.free_ty_polys, initial_type) } fn visit_lit(&mut self, result_use: &ResultUse<'_>, datum: Datum) -> Result { let lit_type = ty::datum::ty_ref_for_datum(&datum); ensure_is_a(datum.span(), &lit_type, result_use)?; Ok(InferredNode { expr: hir::Expr { result_ty: lit_type, kind: hir::ExprKind::Lit(datum), }, type_conds: vec![], }) } /// Calls the passed function with var types overridden by the specified type conds /// /// The var types will be restored after the function returns. #[allow(clippy::needless_collect)] fn with_type_conds_applied( &mut self, type_conds: &[VarTypeCond], node_bool: NodeBool, inner: F, ) -> R where F: FnOnce(&mut Self) -> R, { let restore_var_types = type_conds .iter() .filter(|tc| tc.when == node_bool) .map(|type_cond| { let VarTypeCond { override_local_id, ref override_type, .. } = *type_cond; ( override_local_id, self.self_locals .insert(override_local_id, VarType::Known(override_type.clone())) .unwrap(), ) }) .collect::>(); let result = inner(self); // Restore the original types // We need to use `rev()` to make sure we restore the original type if multiple conds // applied to a single var. for (local_id, original_var_type) in restore_var_types.into_iter().rev() { self.self_locals.insert(local_id, original_var_type); } result } fn visit_cond( &mut self, pv: &mut PurityVar, result_use: &ResultUse<'_>, cond: hir::Cond, ) -> Result { use std::iter; let hir::Cond { span, test_expr, true_expr, false_expr, .. } = cond; let test_node = self.visit_expr(pv, &ResultUse::InnerExpr(&Ty::Bool.into()), test_expr)?; let test_known_bool = try_to_bool(test_node.result_ty()); // If a branch isn't taken it doesn't need to match the type of the cond expression let unused_use = &ResultUse::Unused(result_use.required_type()); let (true_result_use, false_result_use) = if test_node.is_divergent() { (unused_use, unused_use) } else { match test_known_bool { Some(true) => (result_use, unused_use), Some(false) => (unused_use, result_use), None => (result_use, result_use), } }; let true_node = self.with_type_conds_applied(&test_node.type_conds, NodeBool::True, |s| { s.visit_expr(pv, true_result_use, true_expr) })?; let false_node = self.with_type_conds_applied(&test_node.type_conds, NodeBool::False, |s| { s.visit_expr(pv, false_result_use, false_expr) })?; if test_node.is_divergent() { // Test diverged; we don't need the branches return Ok(test_node); } // If the test is static then we can significantly optimise match test_known_bool { Some(true) => Ok(InferredNode { // Preserve the test expr in case it has side effects but remove the cond expr: keep_exprs_for_side_effects(iter::once(test_node.expr), true_node.expr), ..true_node }), Some(false) => Ok(InferredNode { expr: keep_exprs_for_side_effects(iter::once(test_node.expr), false_node.expr), ..false_node }), None => { let result_ty = ty::unify::unify_to_ty_ref(true_node.result_ty(), false_node.result_ty()); let false_node_bool = try_to_bool(false_node.result_ty()); let true_node_bool = try_to_bool(true_node.result_ty()); let mut type_conds: Vec = test_node .type_conds .into_iter() .filter_map(|type_cond| match type_cond.when { NodeBool::True => { if false_node_bool == Some(false) { // If the false node is always false then our result type being // true implies the test was true Some(type_cond.with_when(NodeBool::True)) } else if false_node_bool == Some(true) { // If the false node is always true then our result type being // false implies the test was true Some(type_cond.with_when(NodeBool::False)) } else { None } } NodeBool::False => { if true_node_bool == Some(true) { Some(type_cond.with_when(NodeBool::False)) } else if true_node_bool == Some(false) { Some(type_cond.with_when(NodeBool::True)) } else { None } } }) .collect(); // If the false branch is always false we can move the occurrence typing from the // true branch upwards. The same reasoning applies for the true branch. Note that // this may override conds that we brought in from our test node. These should // already have the outer occurrence typing applied so they will be more specific. if false_node_bool == Some(false) { type_conds.extend(true_node.type_conds); } if true_node_bool == Some(true) { type_conds.extend(false_node.type_conds); } Ok(InferredNode { expr: hir::Expr { result_ty, kind: hir::ExprKind::Cond(Box::new(hir::Cond { span, test_expr: test_node.expr, true_expr: true_node.expr, false_expr: false_node.expr, })), }, type_conds, }) } } } fn visit_ty_pred( &self, result_use: &ResultUse<'_>, span: Span, test_ty: ty::pred::TestTy, ) -> Result { let pred_type = Ty::TyPred(test_ty.clone()).into(); ensure_is_a(span, &pred_type, result_use)?; Ok(InferredNode { expr: hir::Expr { result_ty: pred_type, kind: hir::ExprKind::TyPred(span, test_ty), }, type_conds: vec![], }) } fn visit_eq_pred(&self, result_use: &ResultUse<'_>, span: Span) -> Result { let pred_type = Ty::EqPred.into(); ensure_is_a(span, &pred_type, result_use)?; Ok(InferredNode { expr: hir::Expr { result_ty: pred_type, kind: hir::ExprKind::EqPred(span), }, type_conds: vec![], }) } fn visit_record_cons( &self, result_use: &ResultUse<'_>, span: Span, record_cons: record::ConsId, ) -> Result { let value_cons_fun_type = record::Cons::value_cons_fun_type(&record_cons).into(); ensure_is_a(span, &value_cons_fun_type, result_use)?; Ok(InferredNode { expr: hir::Expr { result_ty: value_cons_fun_type, kind: hir::ExprKind::RecordCons(span, record_cons), }, type_conds: vec![], }) } fn visit_field_accessor( &self, result_use: &ResultUse<'_>, field_accessor: Box, ) -> Result { let record_cons = &field_accessor.record_cons; let record_field = &record_cons.fields()[field_accessor.field_index]; let field_accessor_fun_type = record_field.accessor_fun_type(record_cons).into(); ensure_is_a(field_accessor.span, &field_accessor_fun_type, result_use)?; Ok(InferredNode { expr: hir::Expr { result_ty: field_accessor_fun_type, kind: hir::ExprKind::FieldAccessor(field_accessor), }, type_conds: vec![], }) } fn type_for_free_ref( &self, required_type: &ty::Ref, span: Span, current_type: &ty::Ref, ) -> Result> { // Unlike references to known variables the `current_type` and `required_type` have equal // footing. We intersect here to find the commonality between the two types. This will // become the new type of the variable. ty::intersect::intersect_ty_refs(required_type, current_type).map_err(|_| { Error::new( span, ErrorKind::VarHasEmptyType(current_type.clone(), required_type.clone()), ) }) } fn visit_export_ref( &mut self, result_use: &ResultUse<'_>, span: Span, export_id: hir::ExportId, ) -> Result { // This comes from an imported module let module_id = export_id.module_id(); let local_id = export_id.local_id(); let known_type = &self.imported_vars[&module_id][&local_id]; ensure_is_a(span, known_type, result_use)?; Ok(InferredNode { expr: hir::Expr { result_ty: known_type.clone(), kind: hir::ExprKind::ExportRef(span, export_id), }, type_conds: vec![], }) } fn visit_local_ref( &mut self, result_use: &ResultUse<'_>, span: Span, local_id: hir::LocalId, ) -> Result { let pending_def_id = match self.self_locals[&local_id] { VarType::Pending(def_id) => def_id, VarType::Recursive => return Err(Error::new(span, ErrorKind::RecursiveType)), VarType::Error => return Err(Error::new(span, ErrorKind::DependsOnError)), VarType::Known(ref known_type) => { ensure_is_a(span, known_type, result_use)?; return Ok(self.new_local_ref_node(span, local_id, known_type.clone())); } VarType::ParamScalar(free_ty_id) => { let current_type = &self.free_ty_polys[free_ty_id.to_usize()]; let new_free_type = self.type_for_free_ref(result_use.required_type(), span, current_type)?; self.free_ty_polys[free_ty_id.to_usize()] = new_free_type.clone(); return Ok(self.new_local_ref_node(span, local_id, new_free_type)); } VarType::ParamRest(free_ty_id) => { let current_member_type = &self.free_ty_polys[free_ty_id.to_usize()]; let required_member_type = member_type_for_poly_list(span, result_use.required_type())?; let new_free_type = self.type_for_free_ref(&required_member_type, span, current_member_type)?; self.free_ty_polys[free_ty_id.to_usize()] = new_free_type.clone(); let rest_list_type = ty::List::new_uniform(new_free_type).into(); // Make sure we didn't require a specific list type e.g. `(List Int Int Int)` ensure_is_a(span, &rest_list_type, result_use)?; return Ok(self.new_local_ref_node(span, local_id, rest_list_type)); } }; self.recurse_into_def_id(pending_def_id)?; // This assumes `recurse_into_def_id` has populated our variables now self.visit_local_ref(result_use, span, local_id) } fn visit_do( &mut self, pv: &mut PurityVar, result_use: &ResultUse<'_>, mut exprs: Vec>, ) -> Result { let terminal_expr = if let Some(terminal_expr) = exprs.pop() { terminal_expr } else { return Ok(InferredNode { expr: hir::Expr { result_ty: Ty::unit().into(), kind: hir::ExprKind::Do(vec![]), }, type_conds: vec![], }); }; let mut is_divergent = false; let mut inferred_exprs = Vec::with_capacity(exprs.len() + 1); for non_terminal_expr in exprs { let was_divergent = is_divergent; // The type of this expression doesn't matter; its value is discarded let node = self.visit_expr(pv, &ResultUse::Unused(&Ty::Any.into()), non_terminal_expr)?; is_divergent = was_divergent || node.is_divergent(); if !was_divergent && expr_can_side_effect(&node.expr) { inferred_exprs.push(node.expr); } } if is_divergent { self.visit_expr(pv, &ResultUse::Unused(&Ty::Any.into()), terminal_expr)?; Ok(InferredNode { expr: hir::Expr { result_ty: Ty::never().into(), kind: hir::ExprKind::Do(inferred_exprs), }, type_conds: vec![], }) } else { let terminal_node = self.visit_expr(pv, result_use, terminal_expr)?; let result_ty = terminal_node.result_ty().clone(); if result_use.is_used() || expr_can_side_effect(&terminal_node.expr) { inferred_exprs.push(terminal_node.expr); } Ok(InferredNode { expr: hir::Expr { result_ty, kind: hir::ExprKind::Do(inferred_exprs), }, type_conds: terminal_node.type_conds, }) } } /// Visits a function expression /// /// This does a limited amount of backwards type propagation; it will attempt to fill in any /// free param or ret types from `required_type`. All declared types in the function will be /// taken as-is. fn visit_fun( &mut self, result_use: &ResultUse<'_>, decl_fun: hir::Fun, self_local_id: Option, ) -> Result { let span = decl_fun.span; // This is set to false if we encounter any free types in our params or ret let mut decl_tys_are_known = true; let required_fun_type = result_use.required_type().find_member(|t| { if let Ty::Fun(fun) = t { Some(fun.as_ref()) } else { None } }); let required_top_fun_type = required_fun_type.map(ty::Fun::top_fun).or_else(|| { result_use.required_type().find_member(|t| { if let Ty::TopFun(top_fun) = t { Some(top_fun.as_ref()) } else { None } }) }); let initial_param_type: ty::List = typeck::destruc::type_for_decl_list_destruc( &decl_fun.params, // Use the required type as a guide for any free types in the parameter list required_fun_type.map(|fun| ListIterator::new(fun.params())), ); // Bind all of our parameter variables let free_ty_offset = self.destruc_list_value( &decl_fun.params, ListIterator::new(&initial_param_type), // If a parameter has a free decl type then we can refine the type true, ); if free_ty_offset != self.free_ty_polys.len() { // We have free parameter types decl_tys_are_known = false; } // Use the declared return type if possible let wanted_ret_type = match decl_fun.ret_ty { hir::DeclTy::Known(poly) => poly, hir::DeclTy::Free => { decl_tys_are_known = false; if let Some(required_top_fun_type) = required_top_fun_type { // Fall back to the backwards type required_top_fun_type.ret().clone() } else { // Use Any as a last resort Ty::Any.into() } } }; let mut known_self_type: Option = None; let mut fun_pv = match decl_fun.purity { hir::DeclPurity::Known(poly_purity) => { if decl_tys_are_known { let self_type = ty::Fun::new( decl_fun.pvars.clone(), decl_fun.tvars.clone(), ty::TopFun::new(poly_purity.clone(), wanted_ret_type.clone()), initial_param_type, ); // We have a fully known type; allow recursive calls if let Some(self_local_id) = self_local_id { self.self_locals .insert(self_local_id, VarType::Known(self_type.clone().into())); } known_self_type = Some(self_type); } PurityVar::Known(poly_purity) } hir::DeclPurity::Free => { // Functions start pure until proven otherwise PurityVar::Free(Purity::Pure.into()) } }; let body_result_use = ResultUse::RetExpr(RetExprResultUse { ret_type_span: decl_fun.ret_ty_span, ret_type: &wanted_ret_type, known_self_type: known_self_type.as_ref(), }); let body_node = self.visit_expr(&mut fun_pv, &body_result_use, decl_fun.body_expr)?; let revealed_ret_type = body_node.result_ty(); let revealed_purity = fun_pv.into_poly(); let revealed_param_destruc = { let mut inferred_free_types = self.free_ty_polys.drain(free_ty_offset..); destruc::subst_list_destruc(&mut inferred_free_types, decl_fun.params) }; let revealed_param_type = hir::destruc::poly_for_list_destruc(&revealed_param_destruc); let revealed_type = ty::Fun::new( decl_fun.pvars.clone(), decl_fun.tvars.clone(), ty::TopFun::new(revealed_purity.clone(), revealed_ret_type.clone()), revealed_param_type, ) .into(); let revealed_fun = hir::Fun:: { span, pvars: decl_fun.pvars, tvars: decl_fun.tvars, purity: revealed_purity, params: revealed_param_destruc, ret_ty: revealed_ret_type.clone(), ret_ty_span: decl_fun.ret_ty_span, body_expr: body_node.expr, }; ensure_is_a(span, &revealed_type, result_use)?; Ok(InferredNode { expr: hir::Expr { result_ty: revealed_type, kind: hir::ExprKind::Fun(Box::new(revealed_fun)), }, type_conds: vec![], }) } /// Visit a function application /// /// This has a fairly convoluted algorithm for resolving type variables. The essential /// problem is we don't know the type of our parameters until we visit their expressions. /// However, we also need to provide the parameters with backwards type information which may /// come from other parameters. We need to decide in which order to reveal the types which /// maximise the amount of useful information. /// /// Firstly, we visit every non-function fixed parameter and the rest parameter with the /// evidence from the return type. We collect our evidence in to a staged selection context to /// ensure if a type variable appears in multiple parameters they unify instead of supersede /// each other. /// /// In the next phase we visit every function fixed parameter. This is done in a second phase /// as these functions frequently relate to both the type of the parameters and the return /// type (e.g. `map`). /// /// The final phase selects the return type. This uses all the evidence collected above. fn visit_fun_app( &mut self, pv: &mut PurityVar, result_use: &ResultUse<'_>, span: Span, fun_type: &ty::Fun, fun_app: FunApp, ) -> Result { let FunApp { fun_expr, fixed_arg_exprs, rest_arg_expr, } = fun_app; // The context used to select the types for our non-function parameters let mut non_fun_param_stx = ty::select::SelectCtx::new(fun_type.pvars(), fun_type.tvars()); if let PurityVar::Known(purity_type) = pv { if purity_type != &Purity::Impure.into() { // Add our purity information non_fun_param_stx.add_evidence_purity(fun_type.purity(), purity_type); } } // The context used to select the types for our function parameters. This includes the // evidence gathered when visiting non-function parameters. let mut fun_param_stx = non_fun_param_stx.clone(); let non_fun_param_pta = non_fun_param_stx.into_poly_ty_args(); // Iterate over our parameter type to feed type information in to the arguments let mut param_iter = ListIterator::new(fun_type.params()); let supplied_arg_count = fixed_arg_exprs.len(); let wanted_arity = WantedArity::new(param_iter.fixed_len(), param_iter.has_rest()); let mut is_divergent = false; struct PendingFixedArg<'ty> { index: usize, param_type: &'ty ty::Ref, expr: hir::Expr, } let mut fun_fixed_args: Vec> = vec![]; let mut non_fun_fixed_args: Vec> = vec![]; let mut inferred_fixed_arg_exprs: Vec<(usize, hir::Expr)> = Vec::with_capacity(fixed_arg_exprs.len()); // Pre-visit our fixed args and categorise them as fun and non-fun for (index, fixed_arg_expr) in fixed_arg_exprs.into_iter().enumerate() { let param_type = param_iter.next().ok_or_else(|| { Error::new( span, ErrorKind::WrongArity(supplied_arg_count, wanted_arity), ) })?; let pending_fixed_arg = PendingFixedArg { index, param_type, expr: fixed_arg_expr, }; if let ty::Ref::Fixed(Ty::Fun(_)) = param_type { fun_fixed_args.push(pending_fixed_arg); } else { non_fun_fixed_args.push(pending_fixed_arg); } } for PendingFixedArg { index, param_type, expr, } in non_fun_fixed_args { let wanted_arg_type = ty::subst::subst_poly(&non_fun_param_pta, param_type); let fixed_arg_node = self.visit_expr(pv, &ResultUse::InnerExpr(&wanted_arg_type), expr)?; is_divergent = is_divergent || fixed_arg_node.is_divergent(); fun_param_stx.add_evidence(param_type, fixed_arg_node.result_ty()); inferred_fixed_arg_exprs.push((index, fixed_arg_node.expr)); } // Visit our rest arg next so it's grouped in the first phase let inferred_rest_arg_expr = if let Some(rest_arg_expr) = rest_arg_expr { let tail_type = param_iter.tail_type().into(); let wanted_tail_type = ty::subst::subst_poly(&non_fun_param_pta, &tail_type); let rest_arg_node = self.visit_expr(pv, &ResultUse::InnerExpr(&wanted_tail_type), rest_arg_expr)?; is_divergent = is_divergent || rest_arg_node.is_divergent(); fun_param_stx.add_evidence(&tail_type, rest_arg_node.result_ty()); Some(rest_arg_node.expr) } else if param_iter.fixed_len() > 0 { // We wanted more args! return Err(Error::new( span, ErrorKind::WrongArity(supplied_arg_count, wanted_arity), )); } else { // We can use the lack of a rest arg as type evidence fun_param_stx.add_evidence(¶m_iter.collect_rest(), &Ty::never().into()); None }; // The context used to select our return type. This includes the evidence gathered when // visiting all parameters. let mut ret_stx = fun_param_stx.clone(); let fun_param_pta = fun_param_stx.into_poly_ty_args(); for PendingFixedArg { index, param_type, expr, } in fun_fixed_args { let wanted_arg_type = ty::subst::subst_poly(&fun_param_pta, param_type); let fixed_arg_node = self.visit_expr(pv, &ResultUse::InnerExpr(&wanted_arg_type), expr)?; is_divergent = is_divergent || fixed_arg_node.is_divergent(); ret_stx.add_evidence(param_type, fixed_arg_node.result_ty()); inferred_fixed_arg_exprs.push((index, fixed_arg_node.expr)); } inferred_fixed_arg_exprs.sort_unstable_by_key(|k| k.0); let inferred_fixed_arg_exprs = inferred_fixed_arg_exprs.into_iter().map(|e| e.1).collect(); let ret_pta = ret_stx .into_complete_poly_ty_args() .map_err(|error| match error { ty::select::Error::UnselectedPVar(pvar) => { Error::new(span, ErrorKind::UnselectedPVar(pvar.clone())) } ty::select::Error::UnselectedTVar(tvar) => { Error::new(span, ErrorKind::UnselectedTVar(tvar.clone())) } })?; let ret_type = if is_divergent { Ty::never().into() } else { ty::subst::subst_poly(&ret_pta, fun_type.ret()) }; // Keep track of the purity from the application let app_purity = ty::subst::subst_purity(&ret_pta, fun_type.purity()); unify_app_purity(pv, &app_purity); ensure_is_a(span, &ret_type, result_use)?; Ok(InferredNode { expr: hir::Expr { result_ty: ret_type, kind: hir::ExprKind::App(Box::new(hir::App { span, fun_expr, ty_args: ret_pta, fixed_arg_exprs: inferred_fixed_arg_exprs, rest_arg_expr: inferred_rest_arg_expr, })), }, type_conds: vec![], }) } /// Visit a `(recur)` /// /// This is similar to `visit_fun_app`. However, we require that the `(recur)`'s arguments match /// the generic function type. This allows us to tail recurse when monomorphising polymorphic /// functions because we know we can re-enter the same polymorph the `(recur)` occurs in. /// /// This sounds more complicated than normal function application but it's actual significantly /// easier due to not having to perform type variable selection. fn visit_recur( &mut self, pv: &mut PurityVar, result_use: &ResultUse<'_>, recur: hir::Recur, ) -> Result { let hir::Recur { span, fixed_arg_exprs, rest_arg_expr, .. } = recur; let ret_expr_use = if let ResultUse::RetExpr(ret_expr_use) = result_use { ret_expr_use } else { return Err(Error::new(span, ErrorKind::NonTailRecur)); }; let self_type = if let Some(self_type) = ret_expr_use.known_self_type { self_type } else { return Err(Error::new(span, ErrorKind::RecurWithoutFunTypeDecl)); }; // Iterate over our parameter type to feed type information in to the arguments let mut param_iter = ListIterator::new(self_type.params()); let supplied_arg_count = fixed_arg_exprs.len(); let wanted_arity = WantedArity::new(param_iter.fixed_len(), param_iter.has_rest()); let mut is_divergent = false; let inferred_fixed_arg_exprs = fixed_arg_exprs .into_iter() .map(|fixed_arg_expr| { let param_type = param_iter.next().ok_or_else(|| { Error::new( span, ErrorKind::WrongArity(supplied_arg_count, wanted_arity), ) })?; let fixed_arg_node = self.visit_expr(pv, &ResultUse::InnerExpr(param_type), fixed_arg_expr)?; is_divergent = is_divergent || fixed_arg_node.is_divergent(); Ok(fixed_arg_node.expr) }) .collect::>>()?; let inferred_rest_arg_expr = if let Some(rest_arg_expr) = rest_arg_expr { let tail_type = param_iter.tail_type().into(); let rest_arg_node = self.visit_expr(pv, &ResultUse::InnerExpr(&tail_type), rest_arg_expr)?; is_divergent = is_divergent || rest_arg_node.is_divergent(); Some(rest_arg_node.expr) } else if param_iter.fixed_len() > 0 { // We wanted more args! return Err(Error::new( span, ErrorKind::WrongArity(supplied_arg_count, wanted_arity), )); } else { None }; let ret_type: ty::Ref = if is_divergent { Ty::never().into() } else { self_type.ret().clone() }; ensure_is_a(span, &ret_type, result_use)?; Ok(InferredNode { expr: hir::Expr { result_ty: ret_type, kind: hir::ExprKind::Recur(Box::new(hir::Recur { span, fixed_arg_exprs: inferred_fixed_arg_exprs, rest_arg_expr: inferred_rest_arg_expr, })), }, type_conds: vec![], }) } /// Visit type predicate application with a single fixed arg /// /// This supports full occurrence typing fn visit_fixed_ty_pred_app( &mut self, pv: &mut PurityVar, span: Span, fun_expr: hir::Expr, test_ty: &ty::pred::TestTy, subject_expr: hir::Expr, ) -> Result { use std::iter; let subject_local_id = if let hir::ExprKind::LocalRef(_, local_id) = &subject_expr.kind { Some(*local_id) } else { None }; let subject_node = self.visit_expr(pv, &ResultUse::InnerExpr(&Ty::Any.into()), subject_expr)?; let subject_poly = subject_node.result_ty(); match test_ty.match_subject_ref(subject_poly) { Some(known_result) => { let result_ty = if subject_node.is_divergent() { Ty::never().into() } else { Ty::LitBool(known_result).into() }; // Get rid of the predicate application entirely // Keep the subject expr around for its side effect Ok(InferredNode { expr: keep_exprs_for_side_effects( iter::once(subject_node.expr), hir::Expr { result_ty, kind: hir::ExprKind::Lit(Datum::Bool(span, known_result)), }, ), type_conds: vec![], }) } None => { let poly_type = if subject_node.is_divergent() { Ty::never().into() } else { Ty::Bool.into() }; let type_conds = if let Some(override_local_id) = subject_local_id { let test_poly = test_ty.to_ty().into(); let type_if_true = ty::intersect::intersect_ty_refs(subject_poly, &test_poly) .unwrap_or_else(|_| subject_poly.clone()); let type_if_false = ty::subtract::subtract_ty_refs(subject_poly, &test_poly); vec![ VarTypeCond { when: NodeBool::True, override_local_id, override_type: type_if_true, }, VarTypeCond { when: NodeBool::False, override_local_id, override_type: type_if_false, }, ] } else { vec![] }; Ok(InferredNode { expr: hir::Expr { result_ty: poly_type, kind: hir::ExprKind::App(Box::new(hir::App { span, fun_expr, ty_args: TyArgs::empty(), fixed_arg_exprs: vec![subject_node.expr], rest_arg_expr: None, })), }, type_conds, }) } } } /// Visit type predicate application with a rest argument /// /// This can do static evaluation but it does not support occurrence typing. This is only /// included to be orthogonal with other function applications; it's not terribly useful /// otherwise. fn visit_rest_ty_pred_app( &mut self, pv: &mut PurityVar, span: Span, fun_expr: hir::Expr, test_ty: &ty::pred::TestTy, subject_list_expr: hir::Expr, ) -> Result { use std::iter; let wanted_subject_list_type = ty::List::new_tuple(Box::new([Ty::Any.into()])).into(); let subject_list_node = self.visit_expr( pv, &ResultUse::InnerExpr(&wanted_subject_list_type), subject_list_expr, )?; let subject_type = ListIterator::try_new_from_ty_ref(subject_list_node.result_ty()) .and_then(|mut iter| iter.next()) .expect("Unable to extract type argument from type predicate rest list"); match test_ty.match_subject_ref(subject_type) { Some(known_bool) => { let result_ty = if subject_list_node.is_divergent() { Ty::never().into() } else { Ty::LitBool(known_bool).into() }; Ok(InferredNode { expr: keep_exprs_for_side_effects( iter::once(subject_list_node.expr), hir::Expr { result_ty, kind: hir::ExprKind::Lit(Datum::Bool(span, known_bool)), }, ), type_conds: vec![], }) } None => { let poly_type = if subject_list_node.is_divergent() { // The subject diverged so we diverged Ty::never().into() } else { Ty::Bool.into() }; Ok(InferredNode { expr: hir::Expr { result_ty: poly_type, kind: hir::ExprKind::App(Box::new(hir::App { span, fun_expr, ty_args: TyArgs::empty(), fixed_arg_exprs: vec![], rest_arg_expr: Some(subject_list_node.expr), })), }, type_conds: vec![], }) } } } /// Visit equality predicate application with two fixed args /// /// This supports full occurrence typing fn visit_fixed_eq_pred_app( &mut self, pv: &mut PurityVar, span: Span, fun_expr: hir::Expr, left_expr: hir::Expr, right_expr: hir::Expr, ) -> Result { use crate::ty::props::is_literal; use std::iter; let left_local_id = if let hir::ExprKind::LocalRef(_, local_id) = &left_expr.kind { Some(*local_id) } else { None }; let right_local_id = if let hir::ExprKind::LocalRef(_, local_id) = &right_expr.kind { Some(*local_id) } else { None }; let left_node = self.visit_expr(pv, &ResultUse::InnerExpr(&Ty::Any.into()), left_expr)?; let left_ty = left_node.result_ty(); let right_node = self.visit_expr(pv, &ResultUse::InnerExpr(&Ty::Any.into()), right_expr)?; let right_ty = right_node.result_ty(); // Optimise away comparisons between booleans and literal true // This allows their type conditions to flow through if try_to_bool(left_ty) == Some(true) && ty::is_a::ty_ref_is_a(right_ty, &ty::Ty::Bool.into()) { return Ok(right_node); } if try_to_bool(right_ty) == Some(true) && ty::is_a::ty_ref_is_a(left_ty, &ty::Ty::Bool.into()) { return Ok(left_node); } let left_is_literal = is_literal(left_ty); let right_is_literal = is_literal(right_ty); let is_divergent = left_node.is_divergent() || right_node.is_divergent(); if left_is_literal && right_is_literal && left_ty == right_ty { // We were comparing literal types; this is a static true let result_ty = if is_divergent { Ty::never().into() } else { Ty::LitBool(true).into() }; return Ok(InferredNode { expr: keep_exprs_for_side_effects( iter::once(left_node.expr).chain(iter::once(right_node.expr)), hir::Expr { result_ty, kind: hir::ExprKind::Lit(Datum::Bool(span, true)), }, ), type_conds: vec![], }); }; let intersected_type = match ty::intersect::intersect_ty_refs(left_ty, right_ty) { Ok(intersected_type) => intersected_type, Err(ty::intersect::Error::Disjoint) => { let result_ty = if is_divergent { Ty::never().into() } else { Ty::LitBool(false).into() }; return Ok(InferredNode { expr: keep_exprs_for_side_effects( iter::once(left_node.expr).chain(iter::once(right_node.expr)), hir::Expr { result_ty, kind: hir::ExprKind::Lit(Datum::Bool(span, false)), }, ), type_conds: vec![], }); } }; let mut type_conds = vec![]; if let Some(override_local_id) = left_local_id { type_conds.push(VarTypeCond { when: NodeBool::True, override_local_id, override_type: intersected_type.clone(), }); if right_is_literal { let subtracted_type = ty::subtract::subtract_ty_refs(left_ty, right_ty); type_conds.push(VarTypeCond { when: NodeBool::False, override_local_id, override_type: subtracted_type, }); } } if let Some(override_local_id) = right_local_id { type_conds.push(VarTypeCond { when: NodeBool::True, override_local_id, override_type: intersected_type, }); if left_is_literal { let subtracted_type = ty::subtract::subtract_ty_refs(right_ty, left_ty); type_conds.push(VarTypeCond { when: NodeBool::False, override_local_id, override_type: subtracted_type, }); } } // Invert type conditions for comparisons between a boolean and non-true value if ty::is_a::ty_ref_is_a(right_ty, &ty::Ty::Bool.into()) && ty::intersect::intersect_ty_refs(left_ty, &ty::Ty::LitBool(true).into()).is_err() { type_conds.extend( right_node .type_conds .into_iter() .map(VarTypeCond::into_inverted), ); } else if ty::is_a::ty_ref_is_a(left_ty, &ty::Ty::Bool.into()) && ty::intersect::intersect_ty_refs(right_ty, &ty::Ty::LitBool(true).into()).is_err() { type_conds.extend( left_node .type_conds .into_iter() .map(VarTypeCond::into_inverted), ); } let result_ty = if is_divergent { Ty::never().into() } else { Ty::Bool.into() }; Ok(InferredNode { expr: hir::Expr { result_ty, kind: hir::ExprKind::App(Box::new(hir::App { span, fun_expr, ty_args: TyArgs::empty(), fixed_arg_exprs: vec![left_node.expr, right_node.expr], rest_arg_expr: None, })), }, type_conds, }) } fn visit_app( &mut self, pv: &mut PurityVar, result_use: &ResultUse<'_>, app: hir::App, ) -> Result { let hir::App { span, fun_expr, mut fixed_arg_exprs, rest_arg_expr, .. } = app; // The only type information we can feed back is that we want a function of a certain // purity returning a certain value let wanted_purity = match pv { PurityVar::Free(_) => { // We're inferring the purity; this application can have any purity Purity::Impure.into() } PurityVar::Known(purity_type) => { // We have a specific declared purity purity_type.clone() } }; let wanted_fun_type = ty::TopFun::new(wanted_purity, result_use.required_type().clone()).into(); let fun_node = self.visit_expr(pv, &ResultUse::InnerExpr(&wanted_fun_type), fun_expr)?; let revealed_fun_type = fun_node.result_ty().clone(); match revealed_fun_type.resolve_to_ty() { Ty::TopFun(_) => Err(Error::new(span, ErrorKind::TopFunApply(revealed_fun_type))), Ty::TyPred(test_ty) => { let wanted_arity = WantedArity::new(1, false); match (fixed_arg_exprs.len(), rest_arg_expr) { (1, None) => { let subject_expr = fixed_arg_exprs.pop().unwrap(); self.visit_fixed_ty_pred_app(pv, span, fun_node.expr, test_ty, subject_expr) } (0, Some(subject_list_expr)) => self.visit_rest_ty_pred_app( pv, span, fun_node.expr, test_ty, subject_list_expr, ), (supplied_arg_count, _) => Err(Error::new( span, ErrorKind::WrongArity(supplied_arg_count, wanted_arity), )), } } Ty::EqPred => { if fixed_arg_exprs.len() == 2 && rest_arg_expr.is_none() { let right_expr = fixed_arg_exprs.pop().unwrap(); let left_expr = fixed_arg_exprs.pop().unwrap(); self.visit_fixed_eq_pred_app(pv, span, fun_node.expr, left_expr, right_expr) } else { let fun_app = FunApp { fun_expr: fun_node.expr, fixed_arg_exprs, rest_arg_expr, }; self.visit_fun_app(pv, result_use, span, &ty::Fun::new_for_eq_pred(), fun_app) } } Ty::Fun(fun_type) => { let fun_app = FunApp { fun_expr: fun_node.expr, fixed_arg_exprs, rest_arg_expr, }; self.visit_fun_app(pv, result_use, span, fun_type, fun_app) } _ => panic!("Unexpected type"), } } fn visit_let( &mut self, pv: &mut PurityVar, result_use: &ResultUse<'_>, hir_let: hir::Let, ) -> Result { let hir::Let { span, destruc, value_expr, body_expr, } = hir_let; let required_destruc_type = typeck::destruc::type_for_decl_destruc(&destruc, None); // Pre-bind our variables to deal with recursive definitions let self_local_id = typeck::destruc::visit_locals(&destruc, &mut |local_id, decl_type| { let var_type = match decl_type { hir::DeclTy::Known(poly_type) => VarType::Known(poly_type.clone()), hir::DeclTy::Free => VarType::Recursive, }; self.self_locals.insert(local_id, var_type); }); let value_node = self.visit_expr_with_self_local_id( pv, &ResultUse::InnerExpr(&required_destruc_type), value_expr, self_local_id, )?; let free_ty_offset = self.destruc_value(&destruc, value_node.result_ty(), false); let body_node = self.visit_expr(pv, result_use, body_expr)?; let mut inferred_free_types = self.free_ty_polys.drain(free_ty_offset..); let result_ty = if value_node.is_divergent() { // Value was divergent Ty::never().into() } else { body_node.result_ty().clone() }; Ok(InferredNode { expr: hir::Expr { result_ty, kind: hir::ExprKind::Let(Box::new(hir::Let { span, destruc: destruc::subst_destruc(&mut inferred_free_types, destruc), value_expr: value_node.expr, body_expr: body_node.expr, })), }, type_conds: body_node.type_conds, }) } fn visit_rust_fun( &self, result_use: &ResultUse<'_>, rust_fun: Arc, ) -> Result { let span = rust_fun.span(); // Rust functions have their types validated by the RFI system when they're loaded // We just need to make sure we satisfy `result_use` and convert to an `InferredNode` let poly_type = Ty::Fun(Box::new(rust_fun.arret_fun_type().clone())).into(); ensure_is_a(span, &poly_type, result_use)?; Ok(InferredNode { expr: hir::Expr { result_ty: poly_type, kind: hir::ExprKind::RustFun(rust_fun), }, type_conds: vec![], }) } fn visit_expr_with_self_local_id( &mut self, pv: &mut PurityVar, result_use: &ResultUse<'_>, expr: hir::Expr, self_local_id: Option, ) -> Result { use crate::hir::ExprKind; match expr.kind { ExprKind::Lit(datum) => self.visit_lit(result_use, datum), ExprKind::Cond(cond) => self.visit_cond(pv, result_use, *cond), ExprKind::Do(exprs) => self.visit_do(pv, result_use, exprs), ExprKind::Fun(fun) => self.visit_fun(result_use, *fun, self_local_id), ExprKind::RustFun(rust_fun) => self.visit_rust_fun(result_use, rust_fun), ExprKind::TyPred(span, test_type) => self.visit_ty_pred(result_use, span, test_type), ExprKind::EqPred(span) => self.visit_eq_pred(result_use, span), ExprKind::RecordCons(span, record_cons) => { self.visit_record_cons(result_use, span, record_cons) } ExprKind::FieldAccessor(field_accessor) => { self.visit_field_accessor(result_use, field_accessor) } ExprKind::Let(hir_let) => self.visit_let(pv, result_use, *hir_let), ExprKind::LocalRef(span, local_id) => self.visit_local_ref(result_use, span, local_id), ExprKind::ExportRef(span, export_id) => { self.visit_export_ref(result_use, span, export_id) } ExprKind::App(app) => self.visit_app(pv, result_use, *app), ExprKind::Recur(recur) => self.visit_recur(pv, result_use, *recur), ExprKind::MacroExpand(span, inner_expr) => self .visit_expr_with_self_local_id(pv, result_use, *inner_expr, self_local_id) .map(|inferred| InferredNode { expr: hir::Expr { result_ty: inferred.expr.result_ty.clone(), kind: ExprKind::MacroExpand(span, Box::new(inferred.expr)), }, ..inferred }) .map_err(|err| err.with_macro_invocation_span(span)), } } fn visit_expr( &mut self, pv: &mut PurityVar, result_use: &ResultUse<'_>, expr: hir::Expr, ) -> Result { self.visit_expr_with_self_local_id(pv, result_use, expr, None) } fn destruc_scalar_value( &mut self, scalar: &destruc::Scalar, value_type: &ty::Ref, is_param: bool, ) -> usize { let start_offset = self.free_ty_polys.len(); let free_ty_id = if *scalar.ty() == hir::DeclTy::Free { Some(self.insert_free_ty(value_type.clone())) } else { None }; if let Some(local_id) = *scalar.local_id() { let var_type = if let (Some(free_ty_id), true) = (free_ty_id, is_param) { VarType::ParamScalar(free_ty_id) } else { VarType::Known(value_type.clone()) }; self.self_locals.insert(local_id, var_type); } start_offset } fn destruc_rest_value( &mut self, rest: &destruc::Scalar, value_type_iter: ListIterator<'_, ty::Poly>, is_param: bool, ) { let param_free_ty_id = if *rest.ty() == hir::DeclTy::Free { // Start with member type as a guide let member_type = value_type_iter.clone().collect_rest(); let free_ty_id = self.insert_free_ty(member_type); Some(free_ty_id).filter(|_| is_param) } else { None }; if let Some(local_id) = *rest.local_id() { let var_type = if let Some(param_free_ty_id) = param_free_ty_id { VarType::ParamRest(param_free_ty_id) } else { // If we're not a rest parameter we know our exact tail type. We can't subst // the tail type in to the destruc because it only takes a member type. // However, we can use the exact tail type whenever we reference the var. VarType::Known(value_type_iter.tail_type().into()) }; self.self_locals.insert(local_id, var_type); } } fn destruc_list_value( &mut self, list: &destruc::List, mut value_type_iter: ListIterator<'_, ty::Poly>, is_param: bool, ) -> usize { let start_offset = self.free_ty_polys.len(); for fixed_destruc in list.fixed() { let member_type = value_type_iter .next() .expect("Destructured value with unexpected type"); self.destruc_value(fixed_destruc, member_type, is_param); } if let Some(rest) = list.rest() { self.destruc_rest_value(rest, value_type_iter, is_param); } start_offset } fn destruc_value( &mut self, destruc: &destruc::Destruc, value_type: &ty::Ref, is_param: bool, ) -> usize { match destruc { destruc::Destruc::Scalar(_, scalar) => { self.destruc_scalar_value(scalar, value_type, is_param) } destruc::Destruc::List(_, list) => { let value_type_iter = ListIterator::try_new_from_ty_ref(value_type) .expect("Tried to destruc non-list"); self.destruc_list_value(list, value_type_iter, is_param) } } } fn visit_def(&mut self, hir_def: hir::Def) -> Result> { let hir::Def { span, macro_invocation_span, destruc, value_expr, } = hir_def; // Module definitions must be pure let mut pv = PurityVar::Known(Purity::Pure.into()); // Mark all of our free typed variable as recursive let self_local_id = typeck::destruc::visit_locals(&destruc, &mut |local_id, decl_type| { if *decl_type == hir::DeclTy::Free { self.self_locals.insert(local_id, VarType::Recursive); } }); let required_type = typeck::destruc::type_for_decl_destruc(&destruc, None); let value_node = match self.visit_expr_with_self_local_id( &mut pv, &ResultUse::InnerExpr(&required_type), value_expr, self_local_id, ) { Ok(value_node) => value_node, Err(error) => { // Mark this def as an error so we can suppress cascade errors typeck::destruc::visit_locals(&destruc, &mut |local_id, _| { self.self_locals.insert(local_id, VarType::Error); }); return Err(error); } }; let free_ty_offset = self.destruc_value(&destruc, value_node.result_ty(), false); let mut inferred_free_types = self.free_ty_polys.drain(free_ty_offset..); Ok(hir::Def { span, macro_invocation_span, destruc: destruc::subst_destruc(&mut inferred_free_types, destruc), value_expr: value_node.expr, }) } fn recurse_into_def_id(&mut self, def_id: InputDefId) -> Result<()> { let def_index = def_id.to_usize(); let previous_state = std::mem::replace(&mut self.input_defs[def_index], InputDef::Complete); if let InputDef::Pending(def) = previous_state { let inferred_def = self.visit_def(def)?; self.complete_defs.push(inferred_def); } else { panic!("Tried to infer already complete def. An error previously occurred?") } Ok(()) } fn into_inferred_module(mut self) -> result::Result> { let mut errs = vec![]; while let Some(def_state) = self.input_defs.pop() { match def_state { InputDef::Pending(def) => match self.visit_def(def) { Ok(inferred_def) => { self.complete_defs.push(inferred_def); } Err(err) => { // If this is due to a previous error it's just noise to report it if err.kind() != &ErrorKind::DependsOnError { errs.push(err); } } }, InputDef::Complete => {} } } if !errs.is_empty() { return Err(errs); } let inferred_locals: InferredLocals = self .self_locals .into_iter() .flat_map(|(local_id, var_type)| match var_type { VarType::Known(poly) => Some((local_id, poly)), _ => None, }) .collect(); Ok(InferredModule { inferred_locals, defs: self.complete_defs, }) } } pub fn ensure_main_type( fallback_span: Span, complete_defs: &[hir::Def], main_local_id: hir::LocalId, inferred_main_type: &ty::Ref, ) -> Result<()> { let expected_main_type = ty::Fun::new_for_main().into(); if !ty::is_a::ty_ref_is_a(inferred_main_type, &expected_main_type) { use crate::reporting::LocTrace; // Try to find where `(main!)` was defined let main_loc_trace = complete_defs .iter() .find_map(|def| { if let destruc::Destruc::Scalar(_, ref scalar) = def.destruc { if scalar.local_id() == &Some(main_local_id) { return Some(LocTrace::new(def.span, def.macro_invocation_span)); } } None }) // Fall back to the `Span` we were given .unwrap_or_else(|| fallback_span.into()); return Err(Error::new_with_loc_trace( main_loc_trace, ErrorKind::IsNotTy(inferred_main_type.clone(), expected_main_type), )); }; Ok(()) } pub fn infer_module( imported_inferred_vars: &InferredModuleVars, defs: Vec>, ) -> result::Result> { RecursiveDefsCtx::new(imported_inferred_vars, defs).into_inferred_module() } pub fn infer_repl_expr( all_inferred_vars: &InferredModuleVars, expr: hir::Expr, ) -> Result { let mut rdcx = RecursiveDefsCtx::new(all_inferred_vars, vec![]); let mut pv = PurityVar::Known(Purity::Impure.into()); rdcx.visit_expr(&mut pv, &ResultUse::InnerExpr(&Ty::Any.into()), expr) } #[cfg(test)] mod test { use super::*; use crate::hir::lowering::expr_for_str; use arret_syntax::span::t2s; fn type_for_expr( required_type: &ty::Ref, expr: hir::Expr, ) -> Result> { let imported_vars = HashMap::new(); let mut rdcx = RecursiveDefsCtx::new(&imported_vars, vec![]); let mut pv = PurityVar::Known(Purity::Pure.into()); rdcx.visit_expr(&mut pv, &ResultUse::InnerExpr(required_type), expr) .map(|node| node.expr.result_ty) } fn assert_type_for_expr(ty_str: &str, expr_str: &str) { let expr = expr_for_str(expr_str); let poly = hir::poly_for_str(ty_str); assert_eq!(poly, type_for_expr(&Ty::Any.into(), expr).unwrap()); } fn assert_constrained_type_for_expr(expected_ty_str: &str, expr_str: &str, guide_ty_str: &str) { let expr = expr_for_str(expr_str); let expected_poly = hir::poly_for_str(expected_ty_str); let guide_poly = hir::poly_for_str(guide_ty_str); assert_eq!(expected_poly, type_for_expr(&guide_poly, expr).unwrap()); } fn assert_type_error(err: &Error, expr_str: &str) { let expr = expr_for_str(expr_str); assert_eq!(err, &type_for_expr(&Ty::Any.into(), expr).unwrap_err()) } #[test] fn literal_expr() { assert_type_for_expr("Int", "1"); } #[test] fn do_expr() { assert_type_for_expr("'()", "(do)"); assert_type_for_expr("Int", "(do 'one 'two 3)"); // We have no diverging primitives so we can't test this case easily. This is covered in // run-pass. } #[test] fn cond_expr() { assert_type_for_expr("'true-branch", "(if true 'true-branch 'false-branch)"); assert_type_for_expr("'false-branch", "(if false 'true-branch 'false-branch)"); assert_type_for_expr("(Bool -> Bool)", "(fn (x) (if x true false))"); // This is a reduced version of `(and)` // We shouldn't complain about the type in the false branch because it's unreachable assert_constrained_type_for_expr("true", "(if true true false)", "true"); } #[test] fn fun_expr() { assert_type_for_expr("(-> ())", "(fn ())"); assert_type_for_expr("(Any -> true)", "(fn (_) true)"); assert_type_for_expr("(Str -> Str)", "(fn ([x Str]) x)"); // We should feed our wanted type in to the function type assert_constrained_type_for_expr("(Sym -> true)", "(fn (_) true)", "(Sym -> true)"); assert_constrained_type_for_expr("(Sym -> Sym)", "(fn (x) x)", "(Sym -> Any))"); // Function with free types being bound to an incompatible type let j = "(let [[f (Sym -> true)] (fn ([_ Str]) true)])"; let t = " ^^^^^^^^^^^^^^^^^^^ "; let err = Error::new( t2s(t), ErrorKind::IsNotTy( hir::poly_for_str("(Str -> true)"), hir::poly_for_str("(Sym -> true)"), ), ); assert_type_error(&err, j); // Function with a known type being bound to an incompatible type let j = "(let [[f (Sym -> true)] (fn ([_ Str]) -> true true)])"; let t = " ^^^^^^^^^^^^^^^^^^^^^^^^^^^ "; let err = Error::new( t2s(t), ErrorKind::IsNotTy( hir::poly_for_str("(Str -> true)"), hir::poly_for_str("(Sym -> true)"), ), ); assert_type_error(&err, j); let j = "(fn ([x Str]) -> Sym x)"; let t = " ^^^ "; let u = " ^ "; let err = Error::new( t2s(u), ErrorKind::IsNotRetTy(IsNotRetTy::new( hir::poly_for_str("Str"), hir::poly_for_str("Sym"), Some(t2s(t)), )), ); assert_type_error(&err, j); } #[test] fn app_types() { assert_type_for_expr("'foo", "((fn () 'foo))"); assert_type_for_expr("true", "(sym? 'foo)"); assert_type_for_expr("false", "(sym? false)"); assert_type_for_expr("Int", "((fn #{A} ([value A]) -> A value) 1)"); assert_type_for_expr("'foo", "((fn #{A} ([value A]) -> A value) & '(foo))"); assert_type_for_expr( "(List & Bool)", "((fn #{A} (& [rest A]) -> (List & A) rest) true false)", ); assert_type_for_expr( "Int", // This is essentially `(map)` without the use of lists "((fn #{I O} ([mapper (I -> O)] [i I]) -> O (mapper i)) (fn (x) x) 1))", ); assert_type_for_expr( "Int", // With the argument positions swapped "((fn #{I O} ([i I] [mapper (I -> O)]) -> O (mapper i)) 1 (fn (x) x)))", ); assert_type_for_expr( "Int", // With explicit type annotations "((fn #{I O} ([i I] [mapper (I -> O)]) -> O (mapper i)) 1 (fn ([x Int]) -> Int x)))", ); } #[test] fn recur_expr() { assert_type_for_expr("'foo", "((fn ([x Int]) -> 'foo (recur x)) 1)"); let j = "((fn () -> () (recur) ()))"; let t = " ^^^^^^^ "; let err = Error::new(t2s(t), ErrorKind::NonTailRecur); assert_type_error(&err, j); let j = "((fn () (recur)))"; let t = " ^^^^^^^ "; let err = Error::new(t2s(t), ErrorKind::RecurWithoutFunTypeDecl); assert_type_error(&err, j); let j = "((fn (x) -> 'foo (recur x)) 1)"; let t = " ^^^^^^^^^ "; let err = Error::new(t2s(t), ErrorKind::RecurWithoutFunTypeDecl); assert_type_error(&err, j); let j = "((fn ([x Int]) (recur x)) 1)"; let t = " ^^^^^^^^^ "; let err = Error::new(t2s(t), ErrorKind::RecurWithoutFunTypeDecl); assert_type_error(&err, j); } #[test] fn app_purity() { // An empty function is pure assert_type_for_expr("(-> false)", "(fn () false)"); // Calling a pure function in an inferred purity should leave us pure assert_type_for_expr("(-> false)", "(fn () ((fn () -> false false)))"); // Calling the impure function in an inferred purity should make us impure assert_type_for_expr("(->! false)", "(fn () ((fn () ->! false false)))"); } #[test] fn impure_app_within_pure() { // Calling an impure function inside a function declared as pure should fail let j = "(fn () -> Bool ((fn () ->! false false)))"; let t = " ^^^^^^^^^^^^^^^^^^^^^^^ "; let err = Error::new( t2s(t), ErrorKind::IsNotPurity(hir::poly_for_str("(->! false)"), Purity::Pure.into()), ); assert_type_error(&err, j); } #[test] fn too_many_args() { let j = "((fn ()) 1)"; let t = "^^^^^^^^^^^"; let wanted_arity = WantedArity::new(0, false); let err = Error::new(t2s(t), ErrorKind::WrongArity(1, wanted_arity)); assert_type_error(&err, j); } #[test] fn not_enough_args() { let j = "((fn (_ _)) 1)"; let t = "^^^^^^^^^^^^^^"; let wanted_arity = WantedArity::new(2, false); let err = Error::new(t2s(t), ErrorKind::WrongArity(1, wanted_arity)); assert_type_error(&err, j); } #[test] fn list_destruc() { assert_type_for_expr("Int", "(let [(x) '(1)] x)"); assert_type_for_expr( "(List true false)", "(let [(_ & rest) '(1 true false)] rest)", ); } #[test] fn var_ref() { assert_type_for_expr("Int", "(let [x 1] x)") } #[test] fn ty_pred() { assert_type_for_expr("true", "(sym? & '(foo))"); assert_type_for_expr("true", "(sym? 'foo)"); assert_type_for_expr("false", "(int? & '(foo))"); assert_type_for_expr("false", "(int? 'bar)"); } #[test] fn eq_pred() { assert_type_for_expr("true", "(= 'foo 'foo)"); assert_type_for_expr("false", "(= 'bar 'foo)"); // This looks stupid but we can only evaluate based on types. Both 1 and 2 are `Int`. assert_type_for_expr("Bool", "(= 1 2)"); } } ================================================ FILE: compiler/typeck/mod.rs ================================================ mod dce; mod destruc; pub mod error; pub mod infer; ================================================ FILE: docker-compose.yml ================================================ version: '3.4' services: build-env: build: context: . target: build-env repl: build: context: . target: repl args: - vcs_ref=${BUILDKITE_COMMIT} ================================================ FILE: docs/language-design.md ================================================ # Language Design ## Introduction Arret is a strongly typed, pure functional [Lisp](https://en.wikipedia.org/wiki/Lisp_(programming_language)). Its mandatory typing distinguishes it from most other Lisp dialects. However, type inference and a relatively simple type system allow type declarations to be omitted in most places. The base syntax is a subset of Clojure's [Extensible Data Notation](https://github.com/edn-format/edn). Many tools that support EDN such as [cljfmt](https://github.com/weavejester/cljfmt) will work without modification on Arret source code. Most of Arret's primitives and standard library functions are also modelled after [Clojure](https://clojure.org). Clojure's functional design and mindshare make it a good source of inspiration. However, Arret does not intend to be source compatible with Clojure. All data structures are immutable and [strictly evaluated](https://en.wikipedia.org/wiki/Eager_evaluation). This is similar to [Elm](https://elm-lang.org) or [PureScript](http://www.purescript.org). In the text below the crystal ball (🔮) indicates planned features of the language. ## Basic Data Types The basic data types are: - `Int` is a signed 64bit integer. - `Float` is a 64bit floating point value. This is known as a “double” in some other languages. - `Num` is the union of `Int` and `Float`. This allows mathematical functions to be generic over number types. Specific numeric types should be used whenever possible to improve type inference and runtime performance. - `Bool` is the union of the `true` and `false` types. Unlike most Lisps there is no concept of [truthy values](https://en.wikipedia.org/wiki/Truth_value#Computing); constructs such as `(if)` will only accept `true` or `false` values for their condition. - `Str` is an immutable [UTF-8](https://en.wikipedia.org/wiki/UTF-8) string. - `Sym` is an [interned](https://en.wikipedia.org/wiki/String_interning) symbol. These are used to represent both EDN symbols and identifiers. Additionally, every symbol has its own literal type (named `'foo`) that can be used to construct ad-hoc unions. - `Char` is an [Unicode scalar value](http://www.unicode.org/glossary/#unicode_scalar_value). ## Collections The language provides immutable lists, vectors, maps and sets corresponding to the data types provided by EDN. The `(List)`, `(Vector)`/`(Vectorof)`, `(Set)` and `(Map)` type constructors can be used to specify typed collections with a particular member type. List types can specify zero or more fixed members followed by a uniform rest type. For example, `(List Int Int & Float)` indicates a list of at least two `Int`s followed by zero or more `Float`s. This is closely related to how function arguments are specified. Lists are the primary data type. Most collection functions are only provided for lists; other collections need to be temporarily converted to lists to use them. The compiler aggressively attempts to optimise these temporary lists away - this makes lists fill the role of [iterators](https://en.wikipedia.org/wiki/Iterator) in other languages. ## User Defined Types Users can define their own types in three different ways: 1. `(deftype)` can be used to create an alias of an existing type. 2. The `(U)` type constructor can be used to define a union type. Type predicates can be used to determine which member type a given value has. 3. `(defrecord)` will create a new record type distinct from all other types. These are also known as structs or product types in other languages. ## Functions Arret functions take zero or more parameters and return a single value. [Variadic](https://en.wikipedia.org/wiki/Variadic_function) functions are supported by using `& rest` to capture a list of the variable arguments. By convention the empty list (`()` aka nil) is used to indicate no useful value is returned by the function. This is used by functions that are only called for their side effects such as `(println!)`. All functions are either impure or pure: - Pure functions are declared using the `->` function arrow. They are not allowed to have side effects. The compiler may evaluate them at compile time, remove or duplicate calls to them, etc. Pure functions cannot call impure functions. - Impure functions are declared using the `->!` function arrow. They are allowed to have side effects and will never be evaluated at compile time. By convention they should have a name ending with `!`. It's also possible for functions to be [polymorphic](https://en.wikipedia.org/wiki/Polymorphism_(computer_science)) on their purity. For example, the higher-order functions `(map)` and `(filter)` are only impure if passed an impure function. By convention these functions are named as if they were pure, i.e. without the `!` suffix. ## Destructuring Arret supports [destructuring](https://en.wikipedia.org/wiki/Assignment_(computer_science)#Parallel_assignment) lists for variable assignments and function arguments. This can be used to emulate multiple return values by returning a fixed sized list of values. List destructures can be nested or use `& rest` syntax to capture the tail of a list. Vector destructuring is unsupported as vector notation is already used for type annotations. 🔮 Record or map destructuring may be possible in the future. ## Macros Arret provides a [hygienic](https://en.wikipedia.org/wiki/Hygienic_macro) macro system modelled after [Scheme R7RS](http://r7rs.org). This is a powerful macro-by-example system that allows defining new language constructs and flow control patterns. In fact, many language features such as `(defn)`, `(not)` and `(when)` are actually macros implemented on top of a small set of core primitives. 🔮 One of the goals of Arret is to allow an additional type of macro implemented by user provided functions. These would be pure functions taking a syntax tree and input and returning the replacement syntax tree. ## Occurrence Typing Arret requires that every value is known to have the correct type at compile time. For example, a `Num` cannot be passed to a function expecting an `Int`. This makes Arret [strongly typed](https://en.wikipedia.org/wiki/Strong_and_weak_typing) which distinguishes it from most Lisp dialects. Occurrence typing is used to allow many idiomatic Lisp constructs to work with strong typing. It collects implicit type information from type predicates (e.g. `(str?)` and `(int?)`) and equality comparisons on variables. The variables' types are automatically refined based on if the predicate succeeded or failed. This is inspired by similar features in [Typed Racket](https://docs.racket-lang.org/ts-guide/) and [TypeScript](https://www.typescriptlang.org). ## 🔮 Task & Actors *This section is unimplemented. It's included to explain other design decisions.* Tasks are the atomic unit of concurrency and fault isolation. While Arret code cannot directly use threads it can spawn tasks which are run concurrently. Tasks are scheduled by the runtime across the system's cores as [green threads](https://en.wikipedia.org/wiki/Green_threads). If a task panics it will only terminate that task and allow the program to continue execution. This is important as Arret intentionally doesn't implement exceptions, instead preferring error return values and panics. An [actor system](https://en.wikipedia.org/wiki/Actor_model) is built on top of tasks. Actors are tasks with the additional ability to send and receive messages. They can keep internal state between messages which programs can use to implement a controlled form of mutable state. ================================================ FILE: driver/Cargo.toml ================================================ [package] name = "arret" version = "0.1.0" edition = "2018" authors = ["Ryan Cumming "] default-run = "arret" [[bin]] name = "arret" path = "main.rs" [dependencies] clap = "2" rustyline = "9" directories-next = "2" ansi_term = "0.12" codespan-reporting = "0.11" arret-syntax = { path = "../syntax" } arret-compiler = { path = "../compiler" } ================================================ FILE: driver/main.rs ================================================ #![warn(clippy::all)] #![warn(rust_2018_idioms)] mod subcommand; use std::sync::Arc; use std::{env, path, process}; use arret_compiler::{find_arret_root, CompileCtx, FindArretRootError}; const ARRET_FILE_EXTENSION: &str = ".arret"; fn input_arg_to_source_file( source_loader: &arret_compiler::SourceLoader, input_param: &str, ) -> arret_compiler::SourceFile { if input_param == "-" { use std::io::prelude::*; let mut input_string = String::new(); std::io::stdin().read_to_string(&mut input_string).unwrap(); source_loader.load_string("".into(), input_string) } else { let input_path = path::Path::new(input_param); source_loader .load_path(input_path) .expect("Unable to read input file") } } fn main() { use arret_compiler::initialise_llvm; use clap::{crate_version, App, AppSettings, Arg, SubCommand}; let matches = App::new("arret") .version(crate_version!()) .setting(AppSettings::SubcommandRequiredElseHelp) .about("Compiler and REPL for the Arret language") .arg( Arg::with_name("NOOPT") .long("no-llvm-opt") .takes_value(false) .help("Disables LLVM optimisation"), ) .arg( Arg::with_name("ARRET_ROOT") .long("arret-root") .takes_value(true) .help("Path to the root of a built `etaoins/arret` repository"), ) .subcommand( SubCommand::with_name("compile") .about("Compiles an Arret program to a standalone binary") .arg( Arg::with_name("INPUT") .required(true) .help("Input source file") .index(1), ) .arg( Arg::with_name("OUTPUT") .short("o") .value_name("FILE") .help("Output filename") .long_help( "Output filename.\n\ Four special extensions are recognised to output intermediate formats:\n\ \n\ `.mir` will output a text representation of Arret's internal middle IR\n\ `.ll` will output LLVM IR\n\ `.s` will output assembler for the target architecture\n\ `.o` will output an unlinked object file" ), ) .arg( Arg::with_name("DEBUG") .short("g") .long("debug-info") .help("Generates debugging information"), ) .arg( Arg::with_name("TARGET") .long("target") .value_name("TRIPLE") .help("Generates code for the given target"), ), ) .subcommand( SubCommand::with_name("eval") .about("Evaluates an Arret program once") .arg( Arg::with_name("INPUT") .required(true) .help("Input source file") .index(1), ), ) .subcommand( SubCommand::with_name("repl") .about("Starts an interactive REPL") .arg( Arg::with_name("INCLUDE") .short("i") .long("include") .value_name("FILE") .help("Preloads a file before starting REPL"), ), ) .get_matches(); let arret_root_dir = match find_arret_root(matches.value_of("ARRET_ROOT")) { Ok(arret_root) => arret_root, Err(FindArretRootError::InvalidOption(invalid_option)) => { eprintln!( "`{}` specified by the `--arret-root` option is not an Arret root directory", invalid_option.invalid_path().to_string_lossy(), ); process::exit(1); } Err(FindArretRootError::InvalidEnvVar(invalid_env_var)) => { eprintln!( "`{}` specified by the `{}` environment variable is not an Arret root directory", invalid_env_var.invalid_path().to_string_lossy(), invalid_env_var.env_var_name(), ); process::exit(1); } Err(FindArretRootError::NotFound) => { eprintln!("Unable to find the Arret root directory"); eprintln!("Either specify the `--arret-root` option or set the `ARRET_ROOT` environment variable"); process::exit(1); } }; let enable_optimisations = !matches.is_present("NOOPT"); if let Some(compile_matches) = matches.subcommand_matches("compile") { let package_paths = arret_compiler::PackagePaths::with_stdlib( &arret_root_dir, compile_matches.value_of("TARGET"), ); let ccx = CompileCtx::new(package_paths, enable_optimisations); let input_arg = compile_matches.value_of("INPUT").unwrap(); let input_file = input_arg_to_source_file(ccx.source_loader(), input_arg); let output_path = path::Path::new( if let Some(output_param) = compile_matches.value_of("OUTPUT") { output_param } else if input_arg.ends_with(ARRET_FILE_EXTENSION) { &input_arg[0..input_arg.len() - ARRET_FILE_EXTENSION.len()] } else { panic!( "Can't determine output filename from input arg `{}`", input_arg ); }, ); let debug_info = compile_matches.is_present("DEBUG"); let target_triple = compile_matches.value_of("TARGET"); initialise_llvm(target_triple.is_some()); if !subcommand::compile::compile_input_file( &ccx, &input_file, target_triple, output_path, debug_info, ) { process::exit(2); } } else if let Some(repl_matches) = matches.subcommand_matches("repl") { let package_paths = arret_compiler::PackagePaths::with_stdlib(&arret_root_dir, None); let ccx = Arc::new(CompileCtx::new(package_paths, enable_optimisations)); initialise_llvm(false); let include_path = repl_matches .value_of("INCLUDE") .map(|include_param| path::Path::new(include_param).to_owned()); subcommand::repl::interactive_loop(ccx, include_path); } else if let Some(eval_matches) = matches.subcommand_matches("eval") { let package_paths = arret_compiler::PackagePaths::with_stdlib(&arret_root_dir, None); let ccx = CompileCtx::new(package_paths, enable_optimisations); let input_param = eval_matches.value_of("INPUT").unwrap(); let input_file = input_arg_to_source_file(ccx.source_loader(), input_param); initialise_llvm(false); if !subcommand::eval::eval_input_file(&ccx, &input_file) { process::exit(2); } } else { eprintln!("Sub-command not specified"); process::exit(1); } } ================================================ FILE: driver/subcommand/compile.rs ================================================ use std::{fs, path}; use codespan_reporting::diagnostic::Diagnostic; use arret_syntax::span::FileId; use arret_compiler::{emit_diagnostics_to_stderr, print_program_mir, CompileCtx}; // We don't use this ourselves so overload it for the purposes of dumping MIR const MIR_OUTPUT_TYPE: arret_compiler::OutputType = arret_compiler::OutputType::None; fn try_compile_input_file( ccx: &CompileCtx, options: arret_compiler::GenProgramOptions<'_>, input_file: &arret_compiler::SourceFile, output_path: &path::Path, debug_info: bool, ) -> Result<(), Vec>> { let arret_compiler::EvaluableProgram { ehx, main_export_id, linked_libraries, } = arret_compiler::program_to_evaluable(ccx, input_file)?; let mir_program = ehx.into_built_program(main_export_id)?; if options.output_type() == MIR_OUTPUT_TYPE { let mut output_file = fs::File::create(output_path).unwrap(); print_program_mir(&mut output_file, &mir_program, Some(ccx.source_loader())).unwrap(); return Ok(()); } let debug_source_loader = if debug_info { Some(ccx.source_loader()) } else { None }; arret_compiler::gen_program( options, &linked_libraries, &mir_program, output_path, debug_source_loader, ); Ok(()) } pub fn compile_input_file( ccx: &CompileCtx, input_file: &arret_compiler::SourceFile, target_triple: Option<&str>, output_path: &path::Path, debug_info: bool, ) -> bool { use std::ffi; let output_type = match output_path.extension().and_then(ffi::OsStr::to_str) { Some("mir") => MIR_OUTPUT_TYPE, Some("ll") => arret_compiler::OutputType::LlvmIr, Some("s") => arret_compiler::OutputType::Assembly, Some("o") => arret_compiler::OutputType::Object, _ => arret_compiler::OutputType::Executable, }; let options = arret_compiler::GenProgramOptions::new() .with_target_triple(target_triple) .with_output_type(output_type) .with_llvm_opt(ccx.enable_optimisations()); let result = try_compile_input_file(ccx, options, input_file, output_path, debug_info); if let Err(diagnostics) = result { emit_diagnostics_to_stderr(ccx.source_loader(), diagnostics); false } else { true } } ================================================ FILE: driver/subcommand/eval.rs ================================================ use codespan_reporting::diagnostic::Diagnostic; use arret_syntax::span::FileId; use arret_compiler::{emit_diagnostics_to_stderr, CompileCtx}; fn try_eval_input_file( ccx: &CompileCtx, input_file: &arret_compiler::SourceFile, ) -> Result<(), Vec>> { let arret_compiler::EvaluableProgram { mut ehx, main_export_id, .. } = arret_compiler::program_to_evaluable(ccx, input_file)?; ehx.eval_main_fun(main_export_id)?; Ok(()) } pub fn eval_input_file(ccx: &CompileCtx, input_file: &arret_compiler::SourceFile) -> bool { let result = try_eval_input_file(ccx, input_file); if let Err(diagnostics) = result { emit_diagnostics_to_stderr(ccx.source_loader(), diagnostics); false } else { true } } ================================================ FILE: driver/subcommand/mod.rs ================================================ pub mod compile; pub mod eval; pub mod repl; ================================================ FILE: driver/subcommand/repl/arret_helper.rs ================================================ use std::borrow::Cow; use ansi_term::{Colour, Style}; use rustyline::validate::{ValidationContext, ValidationResult}; use arret_syntax::datum::DataStr; use super::command::{HELP_COMMAND, QUIT_COMMAND, TYPE_ONLY_PREFIX}; use super::syntax::{error_context_for_eol, error_for_line, MAXIMUM_PARSED_LINE_LEN}; /// Completions that don't map to a bound value in scope const UNBOUND_COMPLETIONS: &[&str] = &[ TYPE_ONLY_PREFIX, QUIT_COMMAND, HELP_COMMAND, "true", "false", "##NaN", "##Inf", "##-Inf", ]; /// Implementation of Rustyline's `Helper` trait pub struct ArretHelper { all_names: Vec, } fn sorted_strings_prefixed_by<'a, T: AsRef>( haystack: &'a [T], prefix: &'a str, ) -> impl Iterator + 'a { // Use a binary search to find the start of the strings let start_pos = match haystack.binary_search_by(|needle| needle.as_ref().cmp(prefix)) { Ok(found) => found, Err(insert_idx) => insert_idx, }; haystack[start_pos..] .iter() // Once we stop matching prefixes we're done .take_while(move |needle| needle.as_ref().starts_with(prefix)) } impl ArretHelper { pub fn new(mut bound_names: Vec) -> ArretHelper { bound_names.extend(UNBOUND_COMPLETIONS.iter().map(|unbound| (*unbound).into())); bound_names.sort(); ArretHelper { all_names: bound_names, } } } impl rustyline::completion::Completer for ArretHelper { type Candidate = String; fn complete( &self, line: &str, pos: usize, _: &rustyline::Context<'_>, ) -> rustyline::Result<(usize, Vec)> { use arret_syntax::parser::is_identifier_char; let prefix_start = line[0..pos] .rfind(|c| !is_identifier_char(c)) .map(|i| i + 1) .unwrap_or(0); let prefix = &line[prefix_start..pos]; let suffix = if line.len() > pos { let suffix_end = line[pos..] .find(|c| !is_identifier_char(c)) .map(|i| i + pos) .unwrap_or_else(|| line.len()); &line[pos..suffix_end] } else { "" }; let is_command = prefix.starts_with('/'); let is_first_identifier = pos == prefix.len(); if is_command && !is_first_identifier { // Don't complete commands in illegal positions return Ok((0, vec![])); } let options = sorted_strings_prefixed_by(&self.all_names, prefix) .filter_map(|name| { if name.ends_with(suffix) { Some((&name[0..name.len() - suffix.len()]).to_owned()) } else { None } }) .collect(); Ok((prefix_start, options)) } } impl rustyline::hint::Hinter for ArretHelper { type Hint = String; fn hint(&self, line: &str, pos: usize, _: &rustyline::Context<'_>) -> Option { use arret_syntax::error::WithinContext; use arret_syntax::parser::is_identifier_char; let within_context = error_context_for_eol(line); // If we're inside a string we shouldn't try to hint identifiers if let Some(WithinContext::String(_)) = within_context { return Some("\"".to_owned()); } let last_ident_start = line .rfind(|c| !is_identifier_char(c)) .map(|i| i + 1) .unwrap_or(0); let last_ident = &line[last_ident_start..]; let is_command = last_ident.starts_with('/'); let is_first_identifier = pos == last_ident.len(); // Make sure we have at least one character and we don't complete commands mid-line if !(last_ident.is_empty() || (is_command && !is_first_identifier)) { for name in sorted_strings_prefixed_by(&self.all_names, last_ident) { // Don't suggest ourselves if name.len() != last_ident.len() { return Some(name[last_ident.len()..].to_owned()); } } } within_context .and_then(|within| within.expected_next()) .map(|en| en.close_char().to_string()) } } impl rustyline::highlight::Highlighter for ArretHelper { fn highlight<'l>(&self, line: &'l str, _pos: usize) -> Cow<'l, str> { // See if we have an error let error_span = error_for_line(line).and_then(|error| { if let arret_syntax::error::ErrorKind::Eof(ec) = error.kind() { // We'll already be hinting at the end of the line so point to the opening char ec.open_char_span() } else { Some(error.span()) } }); let error_span = if let Some(error_span) = error_span { error_span } else { return line.into(); }; let error_start = error_span.start() as usize; let error_end = error_span.end() as usize; let prefix = &line[0..error_start]; let error = &line[error_start..error_end]; let suffix = &line[error_end..]; let error_style = Colour::Red.bold(); format!("{}{}{}", prefix, error_style.paint(error), suffix).into() } fn highlight_prompt<'b, 's: 'b, 'p: 'b>( &'s self, prompt: &'p str, _default: bool, ) -> Cow<'b, str> { let prompt_style = Colour::Fixed(25); // DeepSkyBlue4 (#005faf) prompt_style.paint(prompt).to_string().into() } fn highlight_hint<'h>(&self, hint: &'h str) -> Cow<'h, str> { use arret_syntax::parser::is_identifier_char; if hint.chars().next().map(is_identifier_char) == Some(true) { // This is a name completion let name_style = Style::new().dimmed(); name_style.paint(hint).to_string().into() } else { // This is an unexpected EOF hint let unexpected_eof_style = Colour::Red.bold(); unexpected_eof_style.paint(hint).to_string().into() } } fn highlight_char(&self, line: &str, _pos: usize) -> bool { // Essentially any character can change highlighting: // // - Delimiters can change the structure of input // - Identifier characters can make char literals (e.g. \newline) change validity line.len() <= MAXIMUM_PARSED_LINE_LEN } } impl rustyline::validate::Validator for ArretHelper { fn validate( &self, ctx: &mut ValidationContext<'_>, ) -> Result { match error_context_for_eol(ctx.input()) { Some(_) => Ok(ValidationResult::Incomplete), None => Ok(ValidationResult::Valid(None)), } } } impl rustyline::Helper for ArretHelper {} #[cfg(test)] mod test { use super::*; fn assert_sorted_strings_prefixed_by( expected: &[&'static str], haystack: &[&'static str], needle: &'static str, ) { let expected_vec = expected.to_owned(); let actual_vec: Vec<&str> = sorted_strings_prefixed_by(haystack, needle) .cloned() .collect(); assert_eq!(expected_vec, actual_vec) } #[test] fn sorted_strings_prefixed_by_empty() { let haystack: &[&str] = &[]; assert_sorted_strings_prefixed_by(&[], haystack, "foo"); } #[test] fn sorted_strings_prefixed_by_missing_at_beginning() { // "foo" would be before this one let haystack = &["zoop"]; assert_sorted_strings_prefixed_by(&[], haystack, "foo"); } #[test] fn sorted_strings_prefixed_by_missing_in_middle() { // "foo" would be in the middle of these two let haystack = &["bar", "zoop"]; assert_sorted_strings_prefixed_by(&[], haystack, "foo"); } #[test] fn sorted_strings_prefixed_by_missing_at_end() { // "foo" would be after of these two let haystack = &["bar", "baz"]; assert_sorted_strings_prefixed_by(&[], haystack, "foo"); } #[test] fn sorted_strings_prefixed_by_only_self() { let haystack = &["bar", "baz", "foo"]; assert_sorted_strings_prefixed_by(&["foo"], haystack, "foo"); } #[test] fn strings_prefixed_by_only_other() { let haystack = &["bar", "baz", "foobar", "foobaz"]; assert_sorted_strings_prefixed_by(&["foobar", "foobaz"], haystack, "foo"); } #[test] fn strings_prefixed_by_self_and_other() { let haystack = &["bar", "baz", "foo", "foobar", "foobaz", "zoop"]; assert_sorted_strings_prefixed_by(&["foo", "foobar", "foobaz"], haystack, "foo"); } } ================================================ FILE: driver/subcommand/repl/command.rs ================================================ pub const TYPE_ONLY_PREFIX: &str = "/type "; pub const QUIT_COMMAND: &str = "/quit"; pub const HELP_COMMAND: &str = "/help"; pub enum ParsedCommand { EvalValue(String), EvalType(String), Quit, Other, } pub fn parse_command(mut line: String) -> ParsedCommand { match line.as_ref() { _ if line.starts_with(TYPE_ONLY_PREFIX) => { line.drain(0..TYPE_ONLY_PREFIX.len()); ParsedCommand::EvalType(line) } HELP_COMMAND => { println!("Available REPL commands:"); println!(); println!("/help Prints this summary"); println!("/type Evaluates the type of the given expression"); println!("/quit Exits the REPL"); ParsedCommand::Other } QUIT_COMMAND => ParsedCommand::Quit, _ => ParsedCommand::EvalValue(line), } } ================================================ FILE: driver/subcommand/repl/history.rs ================================================ use std::{fs, path}; /// Gets the full path to where our REPL history should be stored /// /// This does very little error handling as history is a "nice to have" feature pub fn repl_history_path() -> Option { let project_dirs = directories_next::ProjectDirs::from("org.arret-lang", "", "arret")?; let data_dir = project_dirs.data_dir(); fs::create_dir_all(data_dir).ok()?; Some(data_dir.join("repl-history")) } ================================================ FILE: driver/subcommand/repl/mod.rs ================================================ mod arret_helper; mod command; mod history; mod syntax; use std::io::prelude::*; use std::io::BufReader; use std::sync::Arc; use std::{fs, path}; use ansi_term::{Colour, Style}; use arret_compiler::{emit_diagnostics_to_stderr, CompileCtx}; use arret_helper::ArretHelper; use command::{parse_command, ParsedCommand}; use history::repl_history_path; const PROMPT: &str = "arret> "; pub fn interactive_loop(ccx: Arc, include_path: Option) { use arret_compiler::repl::{EvalKind, EvaledExprValue, EvaledLine}; use rustyline::error::ReadlineError; // Setup our REPL backend let repl_ctx = arret_compiler::repl::ReplCtx::new(ccx.clone()); // Setup Rustyline let mut rl = rustyline::Editor::::new(); // Import [stdlib base] so we have most useful things defined let initial_import = "(import [stdlib base])".to_owned(); repl_ctx.send_line(initial_import, EvalKind::Value).unwrap(); let mut sent_prelude_lines = 1; if let Some(include_path) = include_path { let include_file = fs::File::open(include_path).unwrap(); // Import the include file line-by-line for line in BufReader::new(include_file).lines() { repl_ctx.send_line(line.unwrap(), EvalKind::Value).unwrap(); sent_prelude_lines += 1 } } // Load our history while the REPL engine is thinking let history_path = repl_history_path(); if let Some(ref history_path) = history_path { let _ = rl.load_history(history_path); } // Collect all the responses for _ in 0..sent_prelude_lines { match repl_ctx.receive_result() { Ok(EvaledLine::Defs(bound_names)) => { rl.set_helper(Some(ArretHelper::new(bound_names))); } Ok(_) => {} Err(diagnostics) => emit_diagnostics_to_stderr(ccx.source_loader(), diagnostics), } } // Configure our styles let defs_style = Colour::Purple.bold(); let expr_arrow_style = Colour::Green.bold(); let type_style = Colour::Fixed(166); // DarkOrange3 (#d75f00) let type_brackets_style = Style::new().dimmed(); loop { let mut history_dirty = false; let readline = rl.readline(PROMPT); match readline { Ok(line) => { if !line.chars().all(char::is_whitespace) { history_dirty = rl.add_history_entry(line.clone()); } let (eval_kind, input) = match parse_command(line) { ParsedCommand::EvalValue(input) => (EvalKind::Value, input), ParsedCommand::EvalType(input) => (EvalKind::Type, input), ParsedCommand::Quit => { break; } ParsedCommand::Other => { continue; } }; repl_ctx.send_line(input, eval_kind).unwrap(); if history_dirty { // Write our history while the REPL engine is thinking if let Some(ref history_path) = history_path { let _ = rl.save_history(&history_path); } } match repl_ctx.receive_result() { Ok(EvaledLine::EmptyInput) => {} Ok(EvaledLine::Defs(bound_names)) => { // Refresh our completions rl.set_helper(Some(ArretHelper::new(bound_names))); println!("{}", defs_style.paint("defined")) } Ok(EvaledLine::ExprType(type_str)) => { println!( "{} {}", expr_arrow_style.paint("=>"), type_style.paint(type_str) ); } Ok(EvaledLine::ExprValue(evaled_expr)) => { let EvaledExprValue { value_str, type_str, type_is_literal, } = evaled_expr; if type_is_literal { println!( // => value "{} {}", expr_arrow_style.paint("=>"), value_str, ); } else { println!( // => [value Type] "{} {}{} {}{}", expr_arrow_style.paint("=>"), type_brackets_style.paint("["), value_str, type_style.paint(type_str), type_brackets_style.paint("]"), ); } } Err(diagnostics) => { emit_diagnostics_to_stderr(ccx.source_loader(), diagnostics); } } } Err(ReadlineError::Interrupted) | Err(ReadlineError::Eof) => break, Err(other) => { panic!("Readline error: {:?}", other); } } } } ================================================ FILE: driver/subcommand/repl/syntax.rs ================================================ use arret_syntax::span::ByteIndex; use super::command::TYPE_ONLY_PREFIX; /// Maximum line length we'll provide parser hints and error highlighting for /// /// This requires parsing the whole line and we don't support incremental reparsing. This means /// in the worst case of pasting a large line character-by-character we'll behave O(n!) with /// with the size of the pasted line. This seems like a reasonable cutoff where a human isn't /// typing the input. pub const MAXIMUM_PARSED_LINE_LEN: usize = 512; pub fn error_for_line(mut line: &str) -> Option { use arret_syntax::parser::datum_from_str_with_span_offset; let span_offset = if line.starts_with(TYPE_ONLY_PREFIX) { line = &line[TYPE_ONLY_PREFIX.len()..]; TYPE_ONLY_PREFIX.len() } else { 0 }; // Is this a command? if line.starts_with('/') || // Or empty? line.chars().all(char::is_whitespace) || // Or is too large to parse interactively? line.len() > MAXIMUM_PARSED_LINE_LEN { return None; } datum_from_str_with_span_offset(None, line, span_offset as ByteIndex).err() } pub fn error_context_for_eol(line: &str) -> Option { error_for_line(line).and_then(|error| { if let arret_syntax::error::ErrorKind::Eof(within_context) = error.kind() { Some(*within_context) } else { None } }) } ================================================ FILE: driver/tests/integration/hello-world.arret ================================================ (import [stdlib base]) (defn main! () ->! () (println! "Hello, world!")) ================================================ FILE: driver/tests/integration/run.sh ================================================ #!/bin/sh set -e HELLO_WORLD_SOURCE=driver/tests/integration/hello-world.arret EXPECTED_HELLO_WORLD_OUTPUT="Hello, world!" TEMP_HELLO_WORLD_BINARY=target/hello-world test_binary=${1:-cargo run} assert_outputs_hello_world() { echo $1 output=$($1) if [ "${output}" != "${EXPECTED_HELLO_WORLD_OUTPUT}" ]; then >&2 echo "expected '${EXPECTED_HELLO_WORLD_OUTPUT}', got '${output}'" exit 1 fi } assert_outputs_hello_world "${test_binary} eval ${HELLO_WORLD_SOURCE}" assert_outputs_hello_world "${test_binary} eval -" < ${HELLO_WORLD_SOURCE} ${test_binary} compile ${HELLO_WORLD_SOURCE} -o "${TEMP_HELLO_WORLD_BINARY}" assert_outputs_hello_world "${TEMP_HELLO_WORLD_BINARY}" rm "${TEMP_HELLO_WORLD_BINARY}" ================================================ FILE: editors/code/.dockerignore ================================================ node_modules/ out/ .vscode-test/ ================================================ FILE: editors/code/.eslintrc.yml ================================================ plugins: - '@typescript-eslint' - prettier extends: - 'eslint:recommended' - 'plugin:@typescript-eslint/eslint-recommended' - 'plugin:@typescript-eslint/recommended' - 'plugin:@typescript-eslint/recommended-requiring-type-checking' - 'plugin:prettier/recommended' parserOptions: project: ./tsconfig.json rules: 'prettier/prettier': error 'no-unused-vars': off '@typescript-eslint/no-unused-vars': - error - argsIgnorePattern: '^_' ================================================ FILE: editors/code/.gitignore ================================================ yarn-error.log node_modules/ out/ .vscode-test/ arret-*.vsix ================================================ FILE: editors/code/.vscode/launch.json ================================================ // A launch configuration that compiles the extension and then opens it inside a new window // Use IntelliSense to learn about possible attributes. // Hover to view descriptions of existing attributes. // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 { "version": "0.2.0", "configurations": [ { "name": "Run Extension", "type": "extensionHost", "request": "launch", "runtimeExecutable": "${execPath}", "args": ["--extensionDevelopmentPath=${workspaceFolder}"], "outFiles": ["${workspaceFolder}/out/**/*.js"], "preLaunchTask": "${defaultBuildTask}" }, { "name": "Extension Tests", "type": "extensionHost", "request": "launch", "runtimeExecutable": "${execPath}", "args": [ "--extensionDevelopmentPath=${workspaceFolder}", "--extensionTestsPath=${workspaceFolder}/out/test/suite/index" ], "outFiles": ["${workspaceFolder}/out/test/**/*.js"], "preLaunchTask": "${defaultBuildTask}" } ] } ================================================ FILE: editors/code/.vscode/settings.json ================================================ // Place your settings in this file to overwrite default and user settings. { "files.exclude": { "out": false // set this to true to hide the "out" folder with the compiled JS files }, "search.exclude": { "out": true // set this to false to include "out" folder in search results }, // Turn off tsc task auto detection since we have the necessary tasks as npm scripts "typescript.tsc.autoDetect": "off" } ================================================ FILE: editors/code/.vscode/tasks.json ================================================ // See https://go.microsoft.com/fwlink/?LinkId=733558 // for the documentation about the tasks.json format { "version": "2.0.0", "tasks": [ { "type": "npm", "script": "watch", "problemMatcher": "$tsc-watch", "isBackground": true, "presentation": { "reveal": "never" }, "group": { "kind": "build", "isDefault": true } } ] } ================================================ FILE: editors/code/Dockerfile ================================================ # Visual Studio Code currently uses Node 14.16 FROM node:14-buster RUN \ apt-get update && \ apt-get -y install --no-install-recommends xvfb libnss3 libgtk-3-0 libxtst6 libxss1 libasound2 libsecret-1-0 libgbm1 && \ apt-get clean WORKDIR /workdir/editors/code COPY package.json yarn.lock tsconfig.json ./ COPY src/test/ src/test/ RUN yarn install --frozen-lockfile RUN yarn compile && yarn vscode:download ================================================ FILE: editors/code/language-configuration.json ================================================ { "comments": { "lineComment": ";" }, "brackets": [ ["{", "}"], ["[", "]"], ["(", ")"] ], "autoClosingPairs": [ ["{", "}"], ["[", "]"], ["(", ")"], { "open": "\"", "close": "\"", "notIn": ["string"] } ], "surroundingPairs": [ ["{", "}"], ["[", "]"], ["(", ")"], ["\"", "\""] ], "folding": { "offSide": true } } ================================================ FILE: editors/code/package.json ================================================ { "name": "arret", "displayName": "Arret", "description": "Arret language support", "version": "0.0.1", "publisher": "etaoins", "repository": "https://github.com/etaoins/arret", "license": "Apache-2.0", "engines": { "vscode": "^1.60.0" }, "categories": [ "Programming Languages" ], "activationEvents": [ "onLanguage:arret" ], "main": "./out/extension.js", "keywords": [ "multi-root ready" ], "contributes": { "languages": [ { "id": "arret", "aliases": [ "Arret" ], "extensions": [ ".arret" ], "configuration": "./language-configuration.json" } ], "grammars": [ { "language": "arret", "scopeName": "source.arret", "path": "./syntaxes/arret.tmLanguage.json" } ] }, "scripts": { "vscode:download": "node ./out/test/downloadVsCode.js", "vscode:install": "vsce package && code --install-extension arret-0.0.1.vsix", "vscode:package": "vsce package", "vscode:prepublish": "yarn run compile", "compile": "tsc -p ./", "watch": "tsc -watch -p ./", "pretest": "yarn run compile", "test": "node ./out/test/runTest.js", "format": "eslint --fix 'src/**/*.ts'", "lint": "eslint 'src/**/*.ts'" }, "devDependencies": { "@types/glob": "7.1.4", "@types/mocha": "9.0.0", "@types/node": "14.17.20", "@types/vscode": "1.60.0", "@typescript-eslint/eslint-plugin": "4.33.0", "@typescript-eslint/parser": "4.33.0", "eslint": "7.32.0", "eslint-config-prettier": "8.3.0", "eslint-plugin-prettier": "3.4.1", "glob": "7.2.0", "mocha": "9.1.3", "prettier": "2.5.1", "typescript": "4.4.4", "vsce": "1.99.0", "vscode-test": "1.6.1" }, "dependencies": { "vscode-languageclient": "7.0.0" } } ================================================ FILE: editors/code/src/extension.ts ================================================ import * as vscode from 'vscode'; import { LanguageClient, LanguageClientOptions, ServerOptions, Trace, } from 'vscode-languageclient/node'; let client: LanguageClient; export const activate = (_context: vscode.ExtensionContext): void => { const command = 'arret-lsp-server'; const serverOptions: ServerOptions = { run: { command }, debug: { command }, }; const clientOptions: LanguageClientOptions = { documentSelector: ['arret'], }; client = new LanguageClient('arret', serverOptions, clientOptions); client.trace = Trace.Verbose; client.start(); }; // this method is called when your extension is deactivated export const deactivate = (): Thenable | undefined => { if (!client) { return; } return client.stop(); }; ================================================ FILE: editors/code/src/test/colorize-fixtures/sample.arret ================================================ (import [arret internal primitives]) ; Primitives (export def let fn if quote export defmacro letmacro macro-rules deftype lettype compile-error do = defrecord letrecord recur) ; Booleans true false ; Numbers -10 10 -100 100 ; Macros (defmacro cond (macro-rules [() ()] [(test-expr body-expr rest-clauses ...) (if test-expr body-expr (cond rest-clauses ...))])) ================================================ FILE: editors/code/src/test/colorize-results/sample_arret.json ================================================ [ { "c": "(", "t": "source.arret meta.expression.arret punctuation.section.expression.begin.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": "import", "t": "source.arret meta.expression.arret keyword.control.arret", "r": { "dark_plus": "keyword.control: #C586C0", "light_plus": "keyword.control: #AF00DB", "dark_vs": "keyword.control: #569CD6", "light_vs": "keyword.control: #0000FF", "hc_black": "keyword.control: #C586C0" } }, { "c": " ", "t": "source.arret meta.expression.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": "[", "t": "source.arret meta.expression.arret meta.vector.arret punctuation.section.vector.begin.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": "arret", "t": "source.arret meta.expression.arret meta.vector.arret meta.symbol.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": " ", "t": "source.arret meta.expression.arret meta.vector.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": "internal", "t": "source.arret meta.expression.arret meta.vector.arret meta.symbol.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": " ", "t": "source.arret meta.expression.arret meta.vector.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": "primitives", "t": "source.arret meta.expression.arret meta.vector.arret meta.symbol.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": "]", "t": "source.arret meta.expression.arret meta.vector.arret punctuation.section.vector.end.trailing.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": ")", "t": "source.arret meta.expression.arret punctuation.section.expression.end.trailing.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": ";", "t": "source.arret comment.line.semicolon.arret punctuation.definition.comment.arret", "r": { "dark_plus": "comment: #6A9955", "light_plus": "comment: #008000", "dark_vs": "comment: #6A9955", "light_vs": "comment: #008000", "hc_black": "comment: #7CA668" } }, { "c": " Primitives", "t": "source.arret comment.line.semicolon.arret", "r": { "dark_plus": "comment: #6A9955", "light_plus": "comment: #008000", "dark_vs": "comment: #6A9955", "light_vs": "comment: #008000", "hc_black": "comment: #7CA668" } }, { "c": "(", "t": "source.arret meta.expression.arret punctuation.section.expression.begin.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": "export", "t": "source.arret meta.expression.arret keyword.control.arret", "r": { "dark_plus": "keyword.control: #C586C0", "light_plus": "keyword.control: #AF00DB", "dark_vs": "keyword.control: #569CD6", "light_vs": "keyword.control: #0000FF", "hc_black": "keyword.control: #C586C0" } }, { "c": " ", "t": "source.arret meta.expression.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": "def", "t": "source.arret meta.expression.arret keyword.control.arret", "r": { "dark_plus": "keyword.control: #C586C0", "light_plus": "keyword.control: #AF00DB", "dark_vs": "keyword.control: #569CD6", "light_vs": "keyword.control: #0000FF", "hc_black": "keyword.control: #C586C0" } }, { "c": " ", "t": "source.arret meta.expression.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": "let", "t": "source.arret meta.expression.arret storage.control.arret", "r": { "dark_plus": "storage: #569CD6", "light_plus": "storage: #0000FF", "dark_vs": "storage: #569CD6", "light_vs": "storage: #0000FF", "hc_black": "storage: #569CD6" } }, { "c": " ", "t": "source.arret meta.expression.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": "fn", "t": "source.arret meta.expression.arret storage.control.arret", "r": { "dark_plus": "storage: #569CD6", "light_plus": "storage: #0000FF", "dark_vs": "storage: #569CD6", "light_vs": "storage: #0000FF", "hc_black": "storage: #569CD6" } }, { "c": " ", "t": "source.arret meta.expression.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": "if", "t": "source.arret meta.expression.arret storage.control.arret", "r": { "dark_plus": "storage: #569CD6", "light_plus": "storage: #0000FF", "dark_vs": "storage: #569CD6", "light_vs": "storage: #0000FF", "hc_black": "storage: #569CD6" } }, { "c": " ", "t": "source.arret meta.expression.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": "quote", "t": "source.arret meta.expression.arret storage.control.arret", "r": { "dark_plus": "storage: #569CD6", "light_plus": "storage: #0000FF", "dark_vs": "storage: #569CD6", "light_vs": "storage: #0000FF", "hc_black": "storage: #569CD6" } }, { "c": " ", "t": "source.arret meta.expression.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": "export", "t": "source.arret meta.expression.arret keyword.control.arret", "r": { "dark_plus": "keyword.control: #C586C0", "light_plus": "keyword.control: #AF00DB", "dark_vs": "keyword.control: #569CD6", "light_vs": "keyword.control: #0000FF", "hc_black": "keyword.control: #C586C0" } }, { "c": " ", "t": "source.arret meta.expression.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": "defmacro", "t": "source.arret meta.expression.arret keyword.control.arret", "r": { "dark_plus": "keyword.control: #C586C0", "light_plus": "keyword.control: #AF00DB", "dark_vs": "keyword.control: #569CD6", "light_vs": "keyword.control: #0000FF", "hc_black": "keyword.control: #C586C0" } }, { "c": " ", "t": "source.arret meta.expression.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": "letmacro", "t": "source.arret meta.expression.arret storage.control.arret", "r": { "dark_plus": "storage: #569CD6", "light_plus": "storage: #0000FF", "dark_vs": "storage: #569CD6", "light_vs": "storage: #0000FF", "hc_black": "storage: #569CD6" } }, { "c": " ", "t": "source.arret meta.expression.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": "macro-rules", "t": "source.arret meta.expression.arret storage.control.arret", "r": { "dark_plus": "storage: #569CD6", "light_plus": "storage: #0000FF", "dark_vs": "storage: #569CD6", "light_vs": "storage: #0000FF", "hc_black": "storage: #569CD6" } }, { "c": " ", "t": "source.arret meta.expression.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": "deftype", "t": "source.arret meta.expression.arret keyword.control.arret", "r": { "dark_plus": "keyword.control: #C586C0", "light_plus": "keyword.control: #AF00DB", "dark_vs": "keyword.control: #569CD6", "light_vs": "keyword.control: #0000FF", "hc_black": "keyword.control: #C586C0" } }, { "c": " ", "t": "source.arret meta.expression.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": "lettype", "t": "source.arret meta.expression.arret storage.control.arret", "r": { "dark_plus": "storage: #569CD6", "light_plus": "storage: #0000FF", "dark_vs": "storage: #569CD6", "light_vs": "storage: #0000FF", "hc_black": "storage: #569CD6" } }, { "c": " ", "t": "source.arret meta.expression.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": "compile-error", "t": "source.arret meta.expression.arret storage.control.arret", "r": { "dark_plus": "storage: #569CD6", "light_plus": "storage: #0000FF", "dark_vs": "storage: #569CD6", "light_vs": "storage: #0000FF", "hc_black": "storage: #569CD6" } }, { "c": " ", "t": "source.arret meta.expression.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": "do", "t": "source.arret meta.expression.arret storage.control.arret", "r": { "dark_plus": "storage: #569CD6", "light_plus": "storage: #0000FF", "dark_vs": "storage: #569CD6", "light_vs": "storage: #0000FF", "hc_black": "storage: #569CD6" } }, { "c": " ", "t": "source.arret meta.expression.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": "=", "t": "source.arret meta.expression.arret meta.symbol.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": " ", "t": "source.arret meta.expression.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": "defrecord", "t": "source.arret meta.expression.arret keyword.control.arret", "r": { "dark_plus": "keyword.control: #C586C0", "light_plus": "keyword.control: #AF00DB", "dark_vs": "keyword.control: #569CD6", "light_vs": "keyword.control: #0000FF", "hc_black": "keyword.control: #C586C0" } }, { "c": " ", "t": "source.arret meta.expression.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": "letrecord", "t": "source.arret meta.expression.arret storage.control.arret", "r": { "dark_plus": "storage: #569CD6", "light_plus": "storage: #0000FF", "dark_vs": "storage: #569CD6", "light_vs": "storage: #0000FF", "hc_black": "storage: #569CD6" } }, { "c": " ", "t": "source.arret meta.expression.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": "recur", "t": "source.arret meta.expression.arret storage.control.arret", "r": { "dark_plus": "storage: #569CD6", "light_plus": "storage: #0000FF", "dark_vs": "storage: #569CD6", "light_vs": "storage: #0000FF", "hc_black": "storage: #569CD6" } }, { "c": ")", "t": "source.arret meta.expression.arret punctuation.section.expression.end.trailing.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": ";", "t": "source.arret comment.line.semicolon.arret punctuation.definition.comment.arret", "r": { "dark_plus": "comment: #6A9955", "light_plus": "comment: #008000", "dark_vs": "comment: #6A9955", "light_vs": "comment: #008000", "hc_black": "comment: #7CA668" } }, { "c": " Booleans", "t": "source.arret comment.line.semicolon.arret", "r": { "dark_plus": "comment: #6A9955", "light_plus": "comment: #008000", "dark_vs": "comment: #6A9955", "light_vs": "comment: #008000", "hc_black": "comment: #7CA668" } }, { "c": "true", "t": "source.arret constant.language.boolean.arret", "r": { "dark_plus": "constant.language: #569CD6", "light_plus": "constant.language: #0000FF", "dark_vs": "constant.language: #569CD6", "light_vs": "constant.language: #0000FF", "hc_black": "constant.language: #569CD6" } }, { "c": " ", "t": "source.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": "false", "t": "source.arret constant.language.boolean.arret", "r": { "dark_plus": "constant.language: #569CD6", "light_plus": "constant.language: #0000FF", "dark_vs": "constant.language: #569CD6", "light_vs": "constant.language: #0000FF", "hc_black": "constant.language: #569CD6" } }, { "c": ";", "t": "source.arret comment.line.semicolon.arret punctuation.definition.comment.arret", "r": { "dark_plus": "comment: #6A9955", "light_plus": "comment: #008000", "dark_vs": "comment: #6A9955", "light_vs": "comment: #008000", "hc_black": "comment: #7CA668" } }, { "c": " Numbers", "t": "source.arret comment.line.semicolon.arret", "r": { "dark_plus": "comment: #6A9955", "light_plus": "comment: #008000", "dark_vs": "comment: #6A9955", "light_vs": "comment: #008000", "hc_black": "comment: #7CA668" } }, { "c": "-10", "t": "source.arret constant.numeric.long.arret", "r": { "dark_plus": "constant.numeric: #B5CEA8", "light_plus": "constant.numeric: #098658", "dark_vs": "constant.numeric: #B5CEA8", "light_vs": "constant.numeric: #098658", "hc_black": "constant.numeric: #B5CEA8" } }, { "c": "10", "t": "source.arret constant.numeric.long.arret", "r": { "dark_plus": "constant.numeric: #B5CEA8", "light_plus": "constant.numeric: #098658", "dark_vs": "constant.numeric: #B5CEA8", "light_vs": "constant.numeric: #098658", "hc_black": "constant.numeric: #B5CEA8" } }, { "c": "-100", "t": "source.arret constant.numeric.long.arret", "r": { "dark_plus": "constant.numeric: #B5CEA8", "light_plus": "constant.numeric: #098658", "dark_vs": "constant.numeric: #B5CEA8", "light_vs": "constant.numeric: #098658", "hc_black": "constant.numeric: #B5CEA8" } }, { "c": "100", "t": "source.arret constant.numeric.long.arret", "r": { "dark_plus": "constant.numeric: #B5CEA8", "light_plus": "constant.numeric: #098658", "dark_vs": "constant.numeric: #B5CEA8", "light_vs": "constant.numeric: #098658", "hc_black": "constant.numeric: #B5CEA8" } }, { "c": ";", "t": "source.arret comment.line.semicolon.arret punctuation.definition.comment.arret", "r": { "dark_plus": "comment: #6A9955", "light_plus": "comment: #008000", "dark_vs": "comment: #6A9955", "light_vs": "comment: #008000", "hc_black": "comment: #7CA668" } }, { "c": " Macros", "t": "source.arret comment.line.semicolon.arret", "r": { "dark_plus": "comment: #6A9955", "light_plus": "comment: #008000", "dark_vs": "comment: #6A9955", "light_vs": "comment: #008000", "hc_black": "comment: #7CA668" } }, { "c": "(", "t": "source.arret meta.expression.arret punctuation.section.expression.begin.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": "defmacro", "t": "source.arret meta.expression.arret keyword.control.arret", "r": { "dark_plus": "keyword.control: #C586C0", "light_plus": "keyword.control: #AF00DB", "dark_vs": "keyword.control: #569CD6", "light_vs": "keyword.control: #0000FF", "hc_black": "keyword.control: #C586C0" } }, { "c": " ", "t": "source.arret meta.expression.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": "cond", "t": "source.arret meta.expression.arret storage.control.arret", "r": { "dark_plus": "storage: #569CD6", "light_plus": "storage: #0000FF", "dark_vs": "storage: #569CD6", "light_vs": "storage: #0000FF", "hc_black": "storage: #569CD6" } }, { "c": " ", "t": "source.arret meta.expression.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": "(", "t": "source.arret meta.expression.arret meta.expression.arret punctuation.section.expression.begin.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": "macro-rules", "t": "source.arret meta.expression.arret meta.expression.arret storage.control.arret", "r": { "dark_plus": "storage: #569CD6", "light_plus": "storage: #0000FF", "dark_vs": "storage: #569CD6", "light_vs": "storage: #0000FF", "hc_black": "storage: #569CD6" } }, { "c": " ", "t": "source.arret meta.expression.arret meta.expression.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": "[", "t": "source.arret meta.expression.arret meta.expression.arret meta.vector.arret punctuation.section.vector.begin.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": "(", "t": "source.arret meta.expression.arret meta.expression.arret meta.vector.arret meta.expression.arret punctuation.section.expression.begin.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": ")", "t": "source.arret meta.expression.arret meta.expression.arret meta.vector.arret meta.expression.arret punctuation.section.expression.end.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": " ", "t": "source.arret meta.expression.arret meta.expression.arret meta.vector.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": "(", "t": "source.arret meta.expression.arret meta.expression.arret meta.vector.arret meta.expression.arret punctuation.section.expression.begin.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": ")", "t": "source.arret meta.expression.arret meta.expression.arret meta.vector.arret meta.expression.arret punctuation.section.expression.end.trailing.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": "]", "t": "source.arret meta.expression.arret meta.expression.arret meta.vector.arret punctuation.section.vector.end.trailing.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": " ", "t": "source.arret meta.expression.arret meta.expression.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": "[", "t": "source.arret meta.expression.arret meta.expression.arret meta.vector.arret punctuation.section.vector.begin.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": "(", "t": "source.arret meta.expression.arret meta.expression.arret meta.vector.arret meta.expression.arret punctuation.section.expression.begin.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": "test-expr", "t": "source.arret meta.expression.arret meta.expression.arret meta.vector.arret meta.expression.arret entity.name.function.arret", "r": { "dark_plus": "entity.name.function: #DCDCAA", "light_plus": "entity.name.function: #795E26", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "entity.name.function: #DCDCAA" } }, { "c": " ", "t": "source.arret meta.expression.arret meta.expression.arret meta.vector.arret meta.expression.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": "body-expr", "t": "source.arret meta.expression.arret meta.expression.arret meta.vector.arret meta.expression.arret meta.symbol.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": " ", "t": "source.arret meta.expression.arret meta.expression.arret meta.vector.arret meta.expression.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": "rest-clauses", "t": "source.arret meta.expression.arret meta.expression.arret meta.vector.arret meta.expression.arret meta.symbol.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": " ", "t": "source.arret meta.expression.arret meta.expression.arret meta.vector.arret meta.expression.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": "...", "t": "source.arret meta.expression.arret meta.expression.arret meta.vector.arret meta.expression.arret meta.symbol.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": ")", "t": "source.arret meta.expression.arret meta.expression.arret meta.vector.arret meta.expression.arret punctuation.section.expression.end.trailing.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": " ", "t": "source.arret meta.expression.arret meta.expression.arret meta.vector.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": "(", "t": "source.arret meta.expression.arret meta.expression.arret meta.vector.arret meta.expression.arret punctuation.section.expression.begin.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": "if", "t": "source.arret meta.expression.arret meta.expression.arret meta.vector.arret meta.expression.arret storage.control.arret", "r": { "dark_plus": "storage: #569CD6", "light_plus": "storage: #0000FF", "dark_vs": "storage: #569CD6", "light_vs": "storage: #0000FF", "hc_black": "storage: #569CD6" } }, { "c": " ", "t": "source.arret meta.expression.arret meta.expression.arret meta.vector.arret meta.expression.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": "test-expr", "t": "source.arret meta.expression.arret meta.expression.arret meta.vector.arret meta.expression.arret meta.symbol.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": " ", "t": "source.arret meta.expression.arret meta.expression.arret meta.vector.arret meta.expression.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": "body-expr", "t": "source.arret meta.expression.arret meta.expression.arret meta.vector.arret meta.expression.arret meta.symbol.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": " ", "t": "source.arret meta.expression.arret meta.expression.arret meta.vector.arret meta.expression.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": "(", "t": "source.arret meta.expression.arret meta.expression.arret meta.vector.arret meta.expression.arret meta.expression.arret punctuation.section.expression.begin.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": "cond", "t": "source.arret meta.expression.arret meta.expression.arret meta.vector.arret meta.expression.arret meta.expression.arret storage.control.arret", "r": { "dark_plus": "storage: #569CD6", "light_plus": "storage: #0000FF", "dark_vs": "storage: #569CD6", "light_vs": "storage: #0000FF", "hc_black": "storage: #569CD6" } }, { "c": " ", "t": "source.arret meta.expression.arret meta.expression.arret meta.vector.arret meta.expression.arret meta.expression.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": "rest-clauses", "t": "source.arret meta.expression.arret meta.expression.arret meta.vector.arret meta.expression.arret meta.expression.arret meta.symbol.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": " ", "t": "source.arret meta.expression.arret meta.expression.arret meta.vector.arret meta.expression.arret meta.expression.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": "...", "t": "source.arret meta.expression.arret meta.expression.arret meta.vector.arret meta.expression.arret meta.expression.arret meta.symbol.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": ")", "t": "source.arret meta.expression.arret meta.expression.arret meta.vector.arret meta.expression.arret meta.expression.arret punctuation.section.expression.end.trailing.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": ")", "t": "source.arret meta.expression.arret meta.expression.arret meta.vector.arret meta.expression.arret punctuation.section.expression.end.trailing.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": "]", "t": "source.arret meta.expression.arret meta.expression.arret meta.vector.arret punctuation.section.vector.end.trailing.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": ")", "t": "source.arret meta.expression.arret meta.expression.arret punctuation.section.expression.end.trailing.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } }, { "c": ")", "t": "source.arret meta.expression.arret punctuation.section.expression.end.trailing.arret", "r": { "dark_plus": "default: #D4D4D4", "light_plus": "default: #000000", "dark_vs": "default: #D4D4D4", "light_vs": "default: #000000", "hc_black": "default: #FFFFFF" } } ] ================================================ FILE: editors/code/src/test/downloadVsCode.ts ================================================ import { downloadAndUnzipVSCode } from 'vscode-test'; import { VSCODE_VERSION } from './vsCodeVersion'; async function main(): Promise { try { await downloadAndUnzipVSCode(VSCODE_VERSION); } catch (err) { console.error('Failed to download VS Code'); process.exit(1); } } void main(); ================================================ FILE: editors/code/src/test/runTest.ts ================================================ import * as path from 'path'; import { runTests } from 'vscode-test'; import { VSCODE_VERSION } from './vsCodeVersion'; async function main(): Promise { try { // The folder containing the Extension Manifest package.json // Passed to `--extensionDevelopmentPath` const extensionDevelopmentPath = path.resolve(__dirname, '../../'); // The path to test runner // Passed to --extensionTestsPath const extensionTestsPath = path.resolve(__dirname, './suite/index'); // Download VS Code, unzip it and run the integration test await runTests({ extensionDevelopmentPath, extensionTestsPath, version: VSCODE_VERSION, }); } catch (err) { console.error('Failed to run tests'); process.exit(1); } } void main(); ================================================ FILE: editors/code/src/test/suite/colorization.test.ts ================================================ /* eslint-disable @typescript-eslint/no-unsafe-assignment */ /* eslint-disable @typescript-eslint/no-unsafe-member-access */ import * as assert from 'assert'; import * as fs from 'fs'; import { join, basename, dirname } from 'path'; import { commands, Uri } from 'vscode'; const hasThemeChange = ( d: { [key: string]: unknown }, p: { [key: string]: unknown }, ): boolean => { const keys = Object.keys(d); for (const key of keys) { if (d[key] !== p[key]) { return true; } } return false; }; const assertUnchangedTokens = ( testFixurePath: string, done: (err?: unknown) => void, ): Thenable => { const fileName = basename(testFixurePath); return commands .executeCommand('_workbench.captureSyntaxTokens', Uri.file(testFixurePath)) .then((data) => { try { const resultsFolderPath = join( dirname(dirname(testFixurePath)), 'colorize-results', ); if (!fs.existsSync(resultsFolderPath)) { fs.mkdirSync(resultsFolderPath); } const resultPath = join( resultsFolderPath, fileName.replace('.', '_') + '.json', ); if (fs.existsSync(resultPath)) { const previousData = JSON.parse( fs.readFileSync(resultPath).toString(), ); try { assert.deepEqual(data, previousData); } catch (e) { fs.writeFileSync(resultPath, JSON.stringify(data, null, '\t'), { flag: 'w', }); if ( Array.isArray(data) && Array.isArray(previousData) && data.length === previousData.length ) { for (let i = 0; i < data.length; i++) { const d = data[i]; const p = previousData[i]; if (d.c !== p.c || hasThemeChange(d.r, p.r)) { throw e; } } // different but no tokenization ot color change: no failure } else { throw e; } } } else { fs.writeFileSync(resultPath, JSON.stringify(data, null, '\t')); } done(); } catch (e) { done(e); } }, done); }; suite('colorization', () => { const extensionColorizeFixturePath = join( __dirname, '../../../src/test/colorize-fixtures', ); if (fs.existsSync(extensionColorizeFixturePath)) { const fixturesFiles = fs.readdirSync(extensionColorizeFixturePath); fixturesFiles.forEach((fixturesFile) => { // define a test for each fixture test(fixturesFile, function (done) { void assertUnchangedTokens( join(extensionColorizeFixturePath, fixturesFile), done, ); }); }); } }); ================================================ FILE: editors/code/src/test/suite/extension.test.ts ================================================ import * as assert from 'assert'; // You can import and use all API from the 'vscode' module // as well as import your extension to test it import * as vscode from 'vscode'; // import * as myExtension from '../extension'; suite('Extension Test Suite', () => { void vscode.window.showInformationMessage('Start all tests.'); test('Sample test', () => { assert.equal(-1, [1, 2, 3].indexOf(5)); assert.equal(-1, [1, 2, 3].indexOf(0)); }); }); ================================================ FILE: editors/code/src/test/suite/index.ts ================================================ import * as path from 'path'; import * as Mocha from 'mocha'; import * as glob from 'glob'; export function run(): Promise { // Create the mocha test const mocha = new Mocha({ ui: 'tdd', color: true, }); const testsRoot = path.resolve(__dirname, '..'); return new Promise((c, e) => { glob('**/**.test.js', { cwd: testsRoot }, (err, files) => { if (err) { return e(err); } // Add files to the test suite files.forEach((f) => mocha.addFile(path.resolve(testsRoot, f))); try { // Run the mocha test mocha.run((failures) => { if (failures > 0) { e(new Error(`${failures} tests failed.`)); } else { c(); } }); } catch (err) { e(err); } }); }); } ================================================ FILE: editors/code/src/test/vsCodeVersion.ts ================================================ export const VSCODE_VERSION = '1.60.0'; ================================================ FILE: editors/code/syntaxes/arret.tmLanguage.json ================================================ { "information_for_contributors": [ "This file is based on https://github.com/microsoft/vscode/blob/master/extensions/clojure/syntaxes/clojure.tmLanguage.json" ], "name": "Arret", "scopeName": "source.arret", "patterns": [ { "include": "#comment" }, { "include": "#quoted-sexp" }, { "include": "#sexp" }, { "include": "#keyfn" }, { "include": "#string" }, { "include": "#vector" }, { "include": "#set" }, { "include": "#map" }, { "include": "#var" }, { "include": "#constants" }, { "include": "#namespace-symbol" }, { "include": "#symbol" } ], "repository": { "comment": { "begin": "(?\\<\\/\\!\\?\\*]+(?=(\\s|\\)|\\]|\\}|\\,))", "name": "constant.keyword.arret" }, "keyfn": { "patterns": [ { "match": "(?<=(\\s|\\(|\\[|\\{))(if(-[-\\p{Ll}\\?]*)?|when(-[-\\p{Ll}]*)?|for(-[-\\p{Ll}]*)?|compile-error|cond|do|macro-rules|quote|letmacro|lettype|letrecord|let(-[-\\p{Ll}\\?]*)?|loop|recur|fn|([\\p{Ll}]*case))(?=(\\s|\\)|\\]|\\}))", "name": "storage.control.arret" }, { "match": "(?<=(\\s|\\(|\\[|\\{))(import|export|defmacro|deftype|defmacro|(def[\\p{Ll}\\-]*))(?=(\\s|\\)|\\]|\\}))", "name": "keyword.control.arret" } ] }, "dynamic-variables": { "match": "\\*[\\w\\.\\-\\_\\:\\+\\=\\>\\<\\!\\?\\d]+\\*", "name": "meta.symbol.dynamic.arret" }, "map": { "begin": "(\\{)", "beginCaptures": { "1": { "name": "punctuation.section.map.begin.arret" } }, "end": "(\\}(?=[\\}\\]\\)\\s]*(?:;|$)))|(\\})", "endCaptures": { "1": { "name": "punctuation.section.map.end.trailing.arret" }, "2": { "name": "punctuation.section.map.end.arret" } }, "name": "meta.map.arret", "patterns": [ { "include": "$self" } ] }, "quoted-sexp": { "begin": "(['``]\\()", "beginCaptures": { "1": { "name": "punctuation.section.expression.begin.arret" } }, "end": "(\\))$|(\\)(?=[\\}\\]\\)\\s]*(?:;|$)))|(\\))", "endCaptures": { "1": { "name": "punctuation.section.expression.end.trailing.arret" }, "2": { "name": "punctuation.section.expression.end.trailing.arret" }, "3": { "name": "punctuation.section.expression.end.arret" } }, "name": "meta.quoted-expression.arret", "patterns": [ { "include": "$self" } ] }, "set": { "begin": "(\\#\\{)", "beginCaptures": { "1": { "name": "punctuation.section.set.begin.arret" } }, "end": "(\\}(?=[\\}\\]\\)\\s]*(?:;|$)))|(\\})", "endCaptures": { "1": { "name": "punctuation.section.set.end.trailing.arret" }, "2": { "name": "punctuation.section.set.end.arret" } }, "name": "meta.set.arret", "patterns": [ { "include": "$self" } ] }, "sexp": { "begin": "(\\()", "beginCaptures": { "1": { "name": "punctuation.section.expression.begin.arret" } }, "end": "(\\))$|(\\)(?=[\\}\\]\\)\\s]*(?:;|$)))|(\\))", "endCaptures": { "1": { "name": "punctuation.section.expression.end.trailing.arret" }, "2": { "name": "punctuation.section.expression.end.trailing.arret" }, "3": { "name": "punctuation.section.expression.end.arret" } }, "name": "meta.expression.arret", "patterns": [ { "include": "#keyfn" }, { "include": "#constants" }, { "include": "#vector" }, { "include": "#map" }, { "include": "#set" }, { "include": "#sexp" }, { "match": "(?<=\\()(.+?)(?=\\s|\\))", "captures": { "1": { "name": "entity.name.function.arret" } }, "patterns": [ { "include": "$self" } ] }, { "include": "$self" } ] }, "string": { "begin": "(?\\<\\!\\?\\*][\\w\\.\\-\\_\\:\\+\\=\\>\\<\\!\\?\\*\\d]*)/", "captures": { "1": { "name": "meta.symbol.namespace.arret" } } } ] }, "symbol": { "patterns": [ { "match": "([\\p{L}\\.\\-\\_\\+\\=\\>\\<\\!\\?\\*][\\w\\.\\-\\_\\:\\+\\=\\>\\<\\!\\?\\*\\d]*)", "name": "meta.symbol.arret" } ] }, "var": { "match": "(?<=(\\s|\\(|\\[|\\{)\\#)'[\\w\\.\\-\\_\\:\\+\\=\\>\\<\\/\\!\\?\\*]+(?=(\\s|\\)|\\]|\\}))", "name": "meta.var.arret" }, "vector": { "begin": "(\\[)", "beginCaptures": { "1": { "name": "punctuation.section.vector.begin.arret" } }, "end": "(\\](?=[\\}\\]\\)\\s]*(?:;|$)))|(\\])", "endCaptures": { "1": { "name": "punctuation.section.vector.end.trailing.arret" }, "2": { "name": "punctuation.section.vector.end.arret" } }, "name": "meta.vector.arret", "patterns": [ { "include": "$self" } ] } } } ================================================ FILE: editors/code/tsconfig.json ================================================ { "compilerOptions": { "module": "commonjs", "target": "es2019", "outDir": "out", "sourceMap": true, "rootDir": "src", "strict": true }, "exclude": ["node_modules", ".vscode-test"] } ================================================ FILE: lsp-server/Cargo.toml ================================================ [package] name = "arret-lsp-server" version = "0.1.0" edition = "2018" authors = ["Ryan Cumming "] [[bin]] name = "arret-lsp-server" path = "main.rs" [dependencies] # This matches the version range in `codespan-lsp` lsp-types = "0.84" arret-syntax = { path = "../syntax" } serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" [dependencies.tokio] version = "1.14" features = [ "rt", "rt-multi-thread", "io-util", "io-std", "macros", "sync" ] ================================================ FILE: lsp-server/capabilities.rs ================================================ pub fn server_capabilities() -> lsp_types::ServerCapabilities { lsp_types::ServerCapabilities { text_document_sync: Some(lsp_types::TextDocumentSyncCapability::Options( lsp_types::TextDocumentSyncOptions { open_close: Some(true), change: Some(lsp_types::TextDocumentSyncKind::Incremental), ..Default::default() }, )), workspace: Some(lsp_types::WorkspaceCapability { workspace_folders: Some(lsp_types::WorkspaceFolderCapability { supported: Some(true), change_notifications: Some( lsp_types::WorkspaceFolderCapabilityChangeNotifications::Bool(true), ), }), }), ..Default::default() } } ================================================ FILE: lsp-server/handler/mod.rs ================================================ mod text_synchronisation; use text_synchronisation::*; mod workspace; use workspace::*; use lsp_types::notification::Notification as LspNotification; use crate::json_rpc::{ErrorCode, Notification, Request, Response}; use crate::session::State; /// Trait for handling notifications pub trait SyncNotificationHandler { type Notification: LspNotification; fn handle(state: &mut State, params: ::Params); } macro_rules! build_notification_dispatcher { ($name:ident, { $( $sync_handler:ty ),* }) => { pub fn $name(state: &mut State, notification: Notification) { match notification.method.as_str() { $( <$sync_handler as SyncNotificationHandler>::Notification::METHOD => { let params = serde_json::from_value(notification.params) .expect("Could not parse notification params"); <$sync_handler as SyncNotificationHandler>::handle(state, params); } )*, other => { // Allow optional notifications if !other.starts_with("$/") { eprintln!("Unexpected notification method '{}'", notification.method); } } } } }; } build_notification_dispatcher!(handle_non_lifecycle_notification, { DidOpenTextDocumentHandler, DidChangeTextDocumentHandler, DidCloseTextDocumentHandler, DidChangeWorkspaceFoldersHandler }); pub fn handle_non_lifecycle_request(_state: &mut State, request: Request) -> Response { // We only support lifecycle requests at the moment Response::new_err(request.id, ErrorCode::MethodNotFound, "Method not found") } ================================================ FILE: lsp-server/handler/text_synchronisation.rs ================================================ use std::sync::Arc; use crate::handler::SyncNotificationHandler; use crate::model::Document; use crate::session::State; use crate::watcher::DocumentWatcher; pub struct DidOpenTextDocumentHandler; impl SyncNotificationHandler for DidOpenTextDocumentHandler { type Notification = lsp_types::notification::DidOpenTextDocument; fn handle(state: &mut State, params: lsp_types::DidOpenTextDocumentParams) { let text_document = params.text_document; let document = Arc::new(Document::new(text_document.version, text_document.text)); state.syntax_watcher.did_open(&text_document.uri, &document); state .documents .insert(text_document.uri.to_string(), document); } } pub struct DidChangeTextDocumentHandler; impl SyncNotificationHandler for DidChangeTextDocumentHandler { type Notification = lsp_types::notification::DidChangeTextDocument; fn handle(state: &mut State, params: lsp_types::DidChangeTextDocumentParams) { let lsp_types::DidChangeTextDocumentParams { text_document, content_changes, } = params; let orig_document = if let Some(document) = state.documents.remove(text_document.uri.as_str()) { document } else { eprintln!( "Received change notification for unknown document {}", text_document.uri ); return; }; let new_document = content_changes .into_iter() .fold( orig_document, |prev_document, content_change| match content_change.range { Some(range) => { match prev_document.with_range_edit( text_document.version, range, &content_change.text, ) { Ok(new_document) => Arc::new(new_document), Err(()) => { eprintln!( "Could not find range to replace in {}", text_document.uri ); prev_document } } } None => Arc::new(Document::new(text_document.version, content_change.text)), }, ); state .syntax_watcher .did_change(&text_document.uri, &new_document); state .documents .insert(text_document.uri.to_string(), new_document); } } pub struct DidCloseTextDocumentHandler; impl SyncNotificationHandler for DidCloseTextDocumentHandler { type Notification = lsp_types::notification::DidCloseTextDocument; fn handle(state: &mut State, params: lsp_types::DidCloseTextDocumentParams) { let text_document = params.text_document; state.documents.remove(text_document.uri.as_str()); state.syntax_watcher.did_close(&text_document.uri); } } ================================================ FILE: lsp-server/handler/workspace.rs ================================================ use std::sync::Arc; use crate::handler::SyncNotificationHandler; use crate::model::Workspace; use crate::session::State; pub struct DidChangeWorkspaceFoldersHandler; impl SyncNotificationHandler for DidChangeWorkspaceFoldersHandler { type Notification = lsp_types::notification::DidChangeWorkspaceFolders; fn handle(state: &mut State, params: lsp_types::DidChangeWorkspaceFoldersParams) { let lsp_types::DidChangeWorkspaceFoldersParams { event } = params; for added in event.added { state .workspaces .insert(added.uri.to_string(), Arc::new(Workspace::new(added.name))); } for removed in event.removed { state.workspaces.remove(removed.uri.as_str()); } } } ================================================ FILE: lsp-server/json_rpc.rs ================================================ use serde::{Deserialize, Serialize}; // This was originally stolen from rust-analyzer/lsp-server #[repr(i32)] pub enum ErrorCode { ServerNotInitialized = -32002, InvalidRequest = -32600, MethodNotFound = -32601, } #[derive(Clone, Serialize, Deserialize, PartialEq, Debug)] #[serde(untagged)] pub enum ClientMessage { Request(Request), Notification(Notification), } impl From for ClientMessage { fn from(request: Request) -> ClientMessage { ClientMessage::Request(request) } } impl From for ClientMessage { fn from(notification: Notification) -> ClientMessage { ClientMessage::Notification(notification) } } #[derive(Clone, Serialize, Deserialize, PartialEq, Debug)] #[serde(untagged)] pub enum ServerMessage { Response(Response), Notification(Notification), } impl From for ServerMessage { fn from(response: Response) -> ServerMessage { ServerMessage::Response(response) } } impl From for ServerMessage { fn from(notification: Notification) -> ServerMessage { ServerMessage::Notification(notification) } } #[derive(Clone, Serialize, Deserialize, PartialEq, Debug)] pub struct Notification { pub method: String, pub params: serde_json::Value, } impl Notification { pub fn new(method: impl Into, params: impl Serialize) -> Self { Notification { method: method.into(), params: serde_json::to_value(params).expect("Could not serialise notification"), } } pub fn new_lsp(params: N::Params) -> Self where N: lsp_types::notification::Notification, N::Params: Serialize, { Self::new(N::METHOD, params) } } #[derive(Clone, Serialize, Deserialize, PartialEq, Debug)] #[serde(untagged)] enum IdRepr { U64(u64), String(String), } #[derive(Clone, Serialize, Deserialize, PartialEq, Debug)] #[serde(transparent)] pub struct RequestId(IdRepr); impl From for RequestId { fn from(id: u64) -> RequestId { RequestId(IdRepr::U64(id)) } } impl From for RequestId { fn from(id: String) -> RequestId { RequestId(IdRepr::String(id)) } } #[derive(Clone, Serialize, Deserialize, PartialEq, Debug)] pub struct Request { pub id: RequestId, pub method: String, pub params: serde_json::Value, } impl Request { #[cfg(test)] pub fn new(id: RequestId, method: impl Into, params: impl Serialize) -> Self { Request { id, method: method.into(), params: serde_json::to_value(params).expect("Could not serialise request"), } } #[cfg(test)] pub fn new_lsp(id: RequestId, params: N::Params) -> Self where N: lsp_types::request::Request, N::Params: Serialize, { Self::new(id, N::METHOD, params) } } #[derive(Clone, Serialize, Deserialize, PartialEq, Debug)] pub struct Response { pub id: RequestId, #[serde(skip_serializing_if = "Option::is_none")] pub result: Option, #[serde(skip_serializing_if = "Option::is_none")] pub error: Option, } #[derive(Clone, Serialize, Deserialize, PartialEq, Debug)] pub struct ResponseError { pub code: i32, pub message: String, #[serde(skip_serializing_if = "Option::is_none")] pub data: Option, } impl Response { pub fn new_ok(id: RequestId, result: impl Serialize) -> Response { Response { id, result: Some(serde_json::to_value(result).expect("Could not serialise result")), error: None, } } pub fn new_err(id: RequestId, code: ErrorCode, message: impl Into) -> Response { let error = ResponseError { code: code as i32, message: message.into(), data: None, }; Response { id, result: None, error: Some(error), } } } ================================================ FILE: lsp-server/main.rs ================================================ mod capabilities; mod handler; mod json_rpc; mod model; mod session; mod transport; mod watcher; use tokio::io; #[tokio::main] async fn main() -> Result<(), ()> { let reader = io::BufReader::new(io::stdin()); let writer = io::stdout(); let connection = transport::bytestream::create_connection(reader, writer); session::run(connection).await } ================================================ FILE: lsp-server/model/document.rs ================================================ use arret_syntax::span::Span; #[derive(Debug)] pub struct Document { version: i32, text: String, line_offsets: Vec, } fn line_offsets_for_str(source: &str) -> Vec { std::iter::once(0) .chain(source.match_indices('\n').map(|(i, _)| i + 1)) .collect() } impl Document { pub fn new(version: i32, text: String) -> Document { Document { version, line_offsets: line_offsets_for_str(&text), text, } } /// Returns a new instance of the document with specified range replaced pub fn with_range_edit( &self, new_version: i32, range: lsp_types::Range, new_range_text: &str, ) -> Result { let start_offset = if let Some(start_offset) = self.position_to_offset(range.start) { start_offset } else { return Err(()); }; let end_offset = self.position_to_offset(range.end); // Rebuild the new text let mut new_text = self.text[..start_offset].to_string() + new_range_text; if let Some(end_offset) = end_offset { new_text += &self.text[end_offset..]; } // Preserve the line offsets from before the edit let mut new_line_offsets = (&self.line_offsets[..=range.start.line as usize]).to_vec(); // Add the line offsets inside the new range new_line_offsets.extend( new_range_text .match_indices('\n') .map(|(i, _)| i + start_offset + 1), ); if let Some(end_offset) = end_offset { // Shift the remaining offsets to account for the size of the new range let previous_len = end_offset - start_offset; new_line_offsets.extend( self.line_offsets[range.end.line as usize + 1..] .iter() .map(|i| i + new_range_text.len() - previous_len), ) } Ok(Document { version: new_version, line_offsets: new_line_offsets, text: new_text, }) } /// Returns the document version pub fn version(&self) -> i32 { self.version } /// Returns the document text pub fn text(&self) -> &str { self.text.as_ref() } /// Returns an LSP `Range` for the given `arret-syntax` S`pan` pub fn span_to_range(&self, span: Span) -> lsp_types::Range { lsp_types::Range { start: self.offset_to_position(span.start() as usize), end: self.offset_to_position(span.end() as usize), } } /// Returns the position for the given byte offset pub fn offset_to_position(&self, offset: usize) -> lsp_types::Position { let line = match self .line_offsets .binary_search_by(|line_start| line_start.cmp(&offset)) { Ok(line) => line, Err(line) => line - 1, }; let line_start = self.line_offsets[line]; let character: usize = self.text[line_start..offset] .chars() .map(|c| c.len_utf16()) .sum(); lsp_types::Position { line: line as u32, character: character as u32, } } /// Returns the byte offset for the given position fn position_to_offset(&self, position: lsp_types::Position) -> Option { // Lines are already computed let line_offset = *self.line_offsets.get(position.line as usize)?; if position.character == 0 { return Some(line_offset); } let mut utf16_chars_remaining = position.character as usize; for (char_offset, c) in self.text[line_offset..].char_indices() { utf16_chars_remaining -= c.len_utf16(); if utf16_chars_remaining == 0 { return Some(line_offset + char_offset + c.len_utf8()); } } // Ran out of string None } } #[cfg(test)] mod test { use super::*; fn assert_consistency(doc: &Document) { assert_eq!(line_offsets_for_str(&doc.text), doc.line_offsets); } #[test] fn test_positions() { let doc = Document::new(1, "Hello 💣\nNext line\n".into()); assert_eq!( lsp_types::Position { line: 0, character: 0 }, doc.offset_to_position(0) ); assert_eq!( lsp_types::Position { line: 0, character: 6 }, doc.offset_to_position(6) ); assert_eq!( lsp_types::Position { line: 0, character: 8 }, doc.offset_to_position(10) ); assert_eq!( lsp_types::Position { line: 1, character: 0 }, doc.offset_to_position(11) ); assert_eq!( lsp_types::Position { line: 2, character: 0 }, doc.offset_to_position(21) ); } #[test] fn test_append_to_empty() { let doc = Document::new(1, "".into()) .with_range_edit( 2, lsp_types::Range { start: lsp_types::Position { line: 0, character: 0, }, end: lsp_types::Position { line: 0, character: 7, }, }, "abc-123", ) .unwrap(); assert_eq!(&doc.text, "abc-123"); assert_consistency(&doc); } #[test] fn test_append_to_line() { let doc = Document::new(1, "Hello".into()) .with_range_edit( 2, lsp_types::Range { start: lsp_types::Position { line: 0, character: 5, }, end: lsp_types::Position { line: 0, character: 5, }, }, ", world!", ) .unwrap(); assert_eq!(&doc.text, "Hello, world!"); assert_consistency(&doc); } #[test] fn test_erase_all() { let doc = Document::new(1, "abc-123".into()) .with_range_edit( 2, lsp_types::Range { start: lsp_types::Position { line: 0, character: 0, }, end: lsp_types::Position { line: 0, character: 7, }, }, "", ) .unwrap(); assert_eq!(&doc.text, ""); assert_consistency(&doc); } #[test] fn test_replace_line() { let doc = Document::new(1, "hello\nnebraska\n".into()) .with_range_edit( 2, lsp_types::Range { start: lsp_types::Position { line: 1, character: 0, }, end: lsp_types::Position { line: 1, character: 8, }, }, "world", ) .unwrap(); assert_eq!(&doc.text, "hello\nworld\n"); assert_consistency(&doc); } #[test] fn test_insert_line() { let doc = Document::new(1, "hello\nworld\n".into()) .with_range_edit( 2, lsp_types::Range { start: lsp_types::Position { line: 1, character: 0, }, end: lsp_types::Position { line: 1, character: 0, }, }, "entire\n", ) .unwrap(); assert_eq!(&doc.text, "hello\nentire\nworld\n"); assert_consistency(&doc); } #[test] fn test_delete_line() { let doc = Document::new(1, "hello\nentire\nworld\n".into()) .with_range_edit( 2, lsp_types::Range { start: lsp_types::Position { line: 1, character: 0, }, end: lsp_types::Position { line: 2, character: 0, }, }, "", ) .unwrap(); assert_eq!(&doc.text, "hello\nworld\n"); assert_consistency(&doc); } #[test] fn test_delete_utf16() { let doc = Document::new(1, "Defuse 💣 me".into()) .with_range_edit( 2, lsp_types::Range { start: lsp_types::Position { line: 0, character: 7, }, end: lsp_types::Position { line: 0, character: 10, }, }, "", ) .unwrap(); assert_eq!(&doc.text, "Defuse me"); assert_consistency(&doc); } } ================================================ FILE: lsp-server/model/mod.rs ================================================ mod document; mod workspace; pub use document::Document; pub use workspace::Workspace; ================================================ FILE: lsp-server/model/workspace.rs ================================================ #[derive(Debug)] pub struct Workspace { _name: String, } impl Workspace { pub fn new(name: String) -> Workspace { Workspace { _name: name } } } ================================================ FILE: lsp-server/session.rs ================================================ use std::collections::HashMap; use std::sync::Arc; use tokio::sync::mpsc; use crate::capabilities::server_capabilities; use crate::handler; use crate::json_rpc::{ClientMessage, ErrorCode, Response, ServerMessage}; use crate::model::{Document, Workspace}; use crate::transport::Connection; use crate::watcher::SyntaxWatcher; pub struct State { pub documents: HashMap>, pub workspaces: HashMap>, pub syntax_watcher: SyntaxWatcher, } impl State { fn new( outgoing: mpsc::Sender, initialize_params: lsp_types::InitializeParams, ) -> State { let initial_workspaces = initialize_params .workspace_folders .map(|workspace_folders| { workspace_folders .into_iter() .map(|workspace_folder| { ( workspace_folder.uri.to_string(), Arc::new(Workspace::new(workspace_folder.name)), ) }) .collect() }) .unwrap_or_else(HashMap::new); State { documents: HashMap::new(), workspaces: initial_workspaces, syntax_watcher: SyntaxWatcher::new(outgoing), } } async fn shutdown(self) { self.syntax_watcher.shutdown().await; } } pub fn create_initialize_response() -> lsp_types::InitializeResult { lsp_types::InitializeResult { server_info: Some(lsp_types::ServerInfo { name: "arret-lsp-server".to_owned(), version: option_env!("CARGO_PKG_VERSION").map(str::to_owned), }), capabilities: server_capabilities(), } } /// Runs a session loop against the provided connection /// /// On a clean exit (`shutdown` followed by `exit`) this will return `Ok`, otherwise it will return /// `Err`. pub async fn run(connection: Connection) -> Result<(), ()> { let Connection { mut incoming, outgoing, } = connection; /// Receives an incoming message or returns `Err` if the receive channel is closed /// /// This will cause us to exit uncleanly if our connection closes unexpectedly. macro_rules! recv_or_return_err { () => { match incoming.recv().await { Some(incoming_message) => incoming_message, None => { eprintln!("Connection unexpectedly closed"); return Err(()); } } }; } /// Sends the outgoing message or returns `Err` if the send channel is closed macro_rules! send_or_return_err { ($outgoing_message:expr) => { if outgoing.send($outgoing_message.into()).await.is_err() { eprintln!("Connection unexpectedly closed"); return Err(()); } }; } // Wait for initialize let initialize_request = loop { match recv_or_return_err!() { ClientMessage::Notification(notification) => { if notification.method == "exit" { // Unclean exit return Err(()); } } ClientMessage::Request(request) if request.method.as_str() == "initialize" => { break request; } ClientMessage::Request(request) => { send_or_return_err!(Response::new_err( request.id, ErrorCode::ServerNotInitialized, "Server not initialized" )); } } }; let params: lsp_types::InitializeParams = serde_json::from_value(initialize_request.params) .expect("Could not parse initialize request params"); let mut state = State::new(outgoing.clone(), params); let initialize_response = create_initialize_response(); send_or_return_err!(Response::new_ok( initialize_request.id.clone(), initialize_response )); // Process normal messages until we receive a shutdown request loop { match recv_or_return_err!() { ClientMessage::Notification(notification) if notification.method == "initialized" => { // Nothing do to } ClientMessage::Notification(notification) if notification.method == "exit" => { // Tear down our state or we'll likely to panic if there are concurrent operations state.shutdown().await; // Unclean exit return Err(()); } ClientMessage::Notification(notification) => { handler::handle_non_lifecycle_notification(&mut state, notification); } ClientMessage::Request(request) if request.method == "shutdown" => { send_or_return_err!(Response::new_ok(request.id, ())); break; } ClientMessage::Request(request) => { send_or_return_err!(handler::handle_non_lifecycle_request(&mut state, request)); } } } // Cleanly shutdown our state state.shutdown().await; // Wait for exit loop { match recv_or_return_err!() { ClientMessage::Notification(notification) => { if notification.method == "exit" { return Ok(()); } } ClientMessage::Request(request) => { send_or_return_err!(Response::new_err( request.id, ErrorCode::InvalidRequest, "Shutting down" )); } } } } #[cfg(test)] mod test { use super::*; use std::future::Future; use tokio::sync::mpsc; use crate::json_rpc::{Notification, Request, RequestId, ServerMessage}; struct TestSession where F: Future>, { outgoing: mpsc::Receiver, incoming: mpsc::Sender, exit_future: F, } fn run_test_session() -> TestSession>> { let (send_outgoing, recv_outgoing) = mpsc::channel::(4); let (send_incoming, recv_incoming) = mpsc::channel::(4); let session = run(Connection { outgoing: send_outgoing, incoming: recv_incoming, }); TestSession { outgoing: recv_outgoing, incoming: send_incoming, exit_future: session, } } fn expect_response(server_message: ServerMessage) -> Response { if let ServerMessage::Response(response) = server_message { response } else { panic!("Expected response, got {:?}", server_message); } } #[allow(deprecated)] #[tokio::test] async fn test_clean_lifecycle() { let TestSession { mut outgoing, incoming, exit_future, } = run_test_session(); tokio::spawn(async move { // We should return an error for messages before initialization incoming .send(Request::new_lsp::(123.into(), ()).into()) .await .unwrap(); let response = expect_response(outgoing.recv().await.unwrap()); assert_eq!( Response::new_err( 123.into(), ErrorCode::ServerNotInitialized, "Server not initialized" ), response, ); // Now initialize let initialize_params = lsp_types::InitializeParams { process_id: None, root_path: None, root_uri: None, initialization_options: None, capabilities: Default::default(), trace: None, workspace_folders: None, client_info: None, }; incoming .send( Request::new_lsp::( "123".to_owned().into(), initialize_params, ) .into(), ) .await .unwrap(); let response = expect_response(outgoing.recv().await.unwrap()); // Don't assert the exact body assert_eq!(response.id, RequestId::from("123".to_owned())); assert!(response.error.is_none()); // Send initialized notification incoming .send( Notification::new_lsp::( lsp_types::InitializedParams {}, ) .into(), ) .await .unwrap(); // Now shutdown for real incoming .send(Request::new_lsp::(456.into(), ()).into()) .await .unwrap(); let response = expect_response(outgoing.recv().await.unwrap()); assert_eq!(Response::new_ok(456.into(), ()), response,); // We should return an error on duplicate shutdown incoming .send( Request::new_lsp::("456".to_owned().into(), ()) .into(), ) .await .unwrap(); let response = expect_response(outgoing.recv().await.unwrap()); assert_eq!( Response::new_err( "456".to_owned().into(), ErrorCode::InvalidRequest, "Shutting down" ), response, ); // And send exit notification incoming .send(Notification::new_lsp::(()).into()) .await .unwrap(); }); // This should be considered a clean exit assert!(exit_future.await.is_ok()); } } ================================================ FILE: lsp-server/transport/bytestream.rs ================================================ use tokio::io; use tokio::io::{AsyncBufReadExt, AsyncReadExt, AsyncWriteExt}; use tokio::sync::mpsc; use crate::json_rpc::{ClientMessage, ServerMessage}; use crate::transport::Connection; fn parse_header_line(header_line: &str) -> (String, String) { let mut parts = header_line.splitn(2, ':'); let name = parts .next() .expect("Did not find header name") .trim() .to_ascii_lowercase(); let value = parts .next() .expect("Did not find header value") .trim() .to_owned(); (name, value) } /// Waits for the passed I/O future and `break`s from the current loop if the pipe is broken /// /// This is useful to propagate closing `stdout`/`stdin` by closing the respective MPSC channel. macro_rules! break_on_broken_pipe { ($io_future:expr, $message:expr) => { if let Err(err) = $io_future.await { if err.kind() == io::ErrorKind::BrokenPipe { break; } else { panic!("{}: {:?}", $message, err); } } }; } pub fn create_connection( mut reader: impl io::AsyncBufRead + Unpin + Send + 'static, mut writer: impl io::AsyncWrite + Unpin + Send + 'static, ) -> Connection { // Allow some concurrency with the session but 4 message is a bit excessive // This allows for backpressure on `stdin`/`stdout` let (send_outgoing, mut recv_outgoing) = mpsc::channel::(4); let (send_incoming, recv_incoming) = mpsc::channel::(4); // Write all our responses out sequentially tokio::spawn(async move { while let Some(response) = recv_outgoing.recv().await { let response_bytes = serde_json::to_vec(&response).expect("Could not serialise response"); break_on_broken_pipe!( writer.write_all( format!("Content-Length: {}\r\n\r\n", response_bytes.len()).as_bytes() ), "Could not write response header" ); break_on_broken_pipe!( writer.write_all(&response_bytes), "Could not write response body" ); break_on_broken_pipe!(writer.flush(), "Could not flush writer"); } }); tokio::spawn(async move { loop { let mut content_length: Option = None; let mut line_buffer = String::new(); // Read the header loop { break_on_broken_pipe!( reader.read_line(&mut line_buffer), "Could not read header line from stdin" ); if line_buffer == "\r\n" { // Read full header break; } let (name, value) = parse_header_line(&line_buffer); if name == "content-length" { content_length = Some(value.parse().expect("Cannot parse Content-Length")); } line_buffer.clear(); } let content_length = content_length.expect("Header had no Content-Length"); // Read the entire content let mut read_buffer = Vec::::new(); read_buffer.resize(content_length, 0); break_on_broken_pipe!( reader.read_exact(&mut read_buffer), "Could not read body from stdin" ); let client_message: ClientMessage = serde_json::from_slice(&read_buffer).expect("Invalid JSON"); if send_incoming.send(client_message).await.is_err() { // Channel closed break; } } }); Connection { incoming: recv_incoming, outgoing: send_outgoing, } } #[cfg(test)] mod test { use super::*; use crate::json_rpc::Notification; #[tokio::test] async fn test_happy_recv_notification() { let body = br#"{"jsonrpc":"2.0","method":"initialized","params":{}}"#; let mut message = format!("Content-Length: {}\r\n", body.len()).into_bytes(); message.extend_from_slice(b"\r\n"); message.extend_from_slice(body); let Connection { mut incoming, .. } = create_connection( io::BufReader::new(std::io::Cursor::new(message)), Vec::new(), ); let client_message = incoming.recv().await.unwrap(); assert_eq!( ClientMessage::Notification(Notification::new_lsp::< lsp_types::notification::Initialized, >(lsp_types::InitializedParams {})), client_message, ); } } ================================================ FILE: lsp-server/transport/mod.rs ================================================ pub mod bytestream; use tokio::sync::mpsc; use crate::json_rpc::{ClientMessage, ServerMessage}; pub struct Connection { /// Channel producing incoming JSON-RPC messages pub incoming: mpsc::Receiver, /// Channel accepting outgoing JSON-RPC messages pub outgoing: mpsc::Sender, } ================================================ FILE: lsp-server/watcher/mod.rs ================================================ mod syntax; pub use syntax::*; use std::sync::Arc; use crate::model::Document; /// Trait for a loosely coupled component that watches document events pub trait DocumentWatcher { /// Called when a document is opened with the specified initial contents fn did_open(&mut self, _url: &lsp_types::Url, _document: &Arc) {} /// Called when a document has changed with the updated contents fn did_change(&mut self, _url: &lsp_types::Url, _document: &Arc) {} /// Called when a document has closed fn did_close(&mut self, _url: &lsp_types::Url) {} } ================================================ FILE: lsp-server/watcher/syntax.rs ================================================ use std::collections::HashMap; use std::sync::Arc; use tokio::sync::{mpsc, watch}; use tokio::task; use arret_syntax::parser::data_from_str; use crate::json_rpc::{Notification, ServerMessage}; use crate::model::Document; use crate::watcher::DocumentWatcher; fn syntax_diagnostics_for_document( url: &lsp_types::Url, document: &Document, ) -> Vec { match data_from_str(None, document.text()) { Ok(_) => vec![], Err(error) => { let within = error.kind().within_context(); let mut related_information = vec![]; if let Some(within) = within { if let Some(open_char_span) = within.open_char_span() { related_information.push(lsp_types::DiagnosticRelatedInformation { location: lsp_types::Location { uri: url.clone(), range: document.span_to_range(open_char_span), }, message: format!("{} starts here", within.description()), }); } if let Some(expected_next) = within.expected_next() { related_information.push(lsp_types::DiagnosticRelatedInformation { location: lsp_types::Location { uri: url.clone(), range: document.span_to_range(error.span()), }, message: expected_next.description(), }); } } vec![lsp_types::Diagnostic { range: document.span_to_range(error.span()), severity: Some(lsp_types::DiagnosticSeverity::Error), message: error.kind().message(), related_information: Some(related_information), source: Some("arret-syntax".to_owned()), ..Default::default() }] } } } struct DocumentTask { send_change: watch::Sender>, join_handle: task::JoinHandle<()>, } impl DocumentTask { pub fn new( outgoing: mpsc::Sender, url: lsp_types::Url, initial_document: Arc, ) -> DocumentTask { let (send_change, mut receive_change) = watch::channel(initial_document); let join_handle = tokio::spawn(async move { loop { let document = receive_change.borrow().clone(); let diagnostics = syntax_diagnostics_for_document(&url, &document); if outgoing .send( Notification::new_lsp::( lsp_types::PublishDiagnosticsParams { uri: url.clone(), version: Some(document.version()), diagnostics, }, ) .into(), ) .await .is_err() { break; } if receive_change.changed().await.is_err() { break; } } }); DocumentTask { send_change, join_handle, } } fn did_change(&self, document: Arc) { self.send_change .send(document) .expect("Could not send change to document syntax task"); } async fn shutdown(self) { drop(self.send_change); self.join_handle .await .expect("Document syntax task panicked"); } } pub struct SyntaxWatcher { outgoing: mpsc::Sender, document_tasks: HashMap, } impl SyntaxWatcher { pub fn new(outgoing: mpsc::Sender) -> SyntaxWatcher { SyntaxWatcher { outgoing, document_tasks: HashMap::new(), } } pub async fn shutdown(self) { let document_task_futures: Vec<_> = self .document_tasks .into_iter() .map(|(_, task)| task.shutdown()) .collect(); for document_task_future in document_task_futures { document_task_future.await; } } } impl DocumentWatcher for SyntaxWatcher { fn did_open(&mut self, url: &lsp_types::Url, document: &Arc) { self.document_tasks.insert( url.to_string(), DocumentTask::new(self.outgoing.clone(), url.clone(), Arc::clone(document)), ); } fn did_change(&mut self, url: &lsp_types::Url, document: &Arc) { if let Some(document_task) = self.document_tasks.get(url.as_str()) { document_task.did_change(Arc::clone(document)); } } fn did_close(&mut self, url: &lsp_types::Url) { self.document_tasks.remove(url.as_str()); } } #[cfg(test)] mod test { use super::*; #[test] fn correct_document_diagnostics() { let url = lsp_types::Url::parse("file:///foo/bar").unwrap(); let doc = Document::new(1, "('hello-world)".to_owned()); let diags = syntax_diagnostics_for_document(&url, &doc); assert_eq!(Vec::::new(), diags); } #[test] fn missing_delimiter_diagnostics() { let url = lsp_types::Url::parse("file:///foo/bar").unwrap(); let doc = Document::new(1, "('hello-world".to_owned()); let diags = syntax_diagnostics_for_document(&url, &doc); assert_eq!( vec![lsp_types::Diagnostic { range: lsp_types::Range { start: lsp_types::Position { line: 0, character: 13, }, end: lsp_types::Position { line: 0, character: 13, } }, severity: Some(lsp_types::DiagnosticSeverity::Error), message: "unexpected end of file while parsing list".into(), related_information: Some(vec![ lsp_types::DiagnosticRelatedInformation { location: lsp_types::Location { uri: url.clone(), range: lsp_types::Range { start: lsp_types::Position { line: 0, character: 0, }, end: lsp_types::Position { line: 0, character: 1, } }, }, message: "list starts here".to_owned(), }, lsp_types::DiagnosticRelatedInformation { location: lsp_types::Location { uri: url, range: lsp_types::Range { start: lsp_types::Position { line: 0, character: 13, }, end: lsp_types::Position { line: 0, character: 13, } }, }, message: "expected datum or `)`".to_owned(), } ]), source: Some("arret-syntax".to_owned()), ..Default::default() }], diags ); } #[test] fn unsupported_character_diagnostics() { let url = lsp_types::Url::parse("file:///foo/bar").unwrap(); let doc = Document::new(1, "\\newline \\madeup".to_owned()); let diags = syntax_diagnostics_for_document(&url, &doc); assert_eq!( vec![lsp_types::Diagnostic { range: lsp_types::Range { start: lsp_types::Position { line: 0, character: 10, }, end: lsp_types::Position { line: 0, character: 16, } }, severity: Some(lsp_types::DiagnosticSeverity::Error), message: "unsupported character".into(), related_information: Some(vec![]), source: Some("arret-syntax".to_owned()), ..Default::default() }], diags ); } } ================================================ FILE: rfi-derive/Cargo.toml ================================================ [package] name = "arret-rfi-derive" version = "0.1.0" edition = "2018" authors = ["Ryan Cumming "] [lib] path = "lib.rs" proc-macro = true [dependencies] quote = "1" proc-macro2 = "1" [dependencies.syn] version = "1" features = ["full"] ================================================ FILE: rfi-derive/lib.rs ================================================ #![warn(clippy::all)] #![warn(rust_2018_idioms)] #[macro_use] extern crate quote; use syn::{parse_macro_input, ItemFn, Token}; fn arg_is_task(arg: &syn::PatType) -> bool { if let syn::Type::Reference(_) = *arg.ty { } else { return false; }; if let syn::Pat::Ident(ref pat_ident) = *arg.pat { pat_ident.ident == "task" } else { false } } /// Annotates a Rust function to be exported via `arret_runtime::define_rust_module!` /// /// This takes a single metadata string containing the full Arret type of the function. This is used /// to express concepts in Arret that don't exist in Rust. These include rest arguments and function /// purity. /// /// The annotated Rust function can optionally take a `arret_runtime::task::Task` as its first /// parameter. An attempt will be made to encode the types of the remaining parameters but only /// certain primitive types and `arret_runtime::boxed` values are allowed. #[proc_macro_attribute] pub fn rust_fun( attrs: proc_macro::TokenStream, input: proc_macro::TokenStream, ) -> proc_macro::TokenStream { let attrs: proc_macro2::TokenStream = attrs.into(); let mut attrs_iter = attrs.into_iter(); let arret_type = attrs_iter.next().expect("Arret type expected"); if attrs_iter.next().is_some() { panic!("unexpected tokens after Arret type"); } // Parse the input tokens into a syntax tree let mut input_fn = parse_macro_input!(input as ItemFn); let mut input_sig = &mut input_fn.sig; let vis = input_fn.vis.clone(); // Rename the function so the descriptor can take its original name let entry_point_name = format!("arret_{}_entry_point", input_sig.ident); let entry_point_ident = proc_macro2::Ident::new(&entry_point_name, input_sig.ident.span()); let descriptor_ident = std::mem::replace(&mut input_sig.ident, entry_point_ident); // RFI assumes a C ABI input_sig.abi = Some(syn::Abi { extern_token: Token![extern](input_sig.ident.span()), name: Some(syn::LitStr::new("C", input_sig.ident.span())), }); let takes_task = input_sig .inputs .first() .map(|arg| match arg { syn::FnArg::Typed(typed) => arg_is_task(typed), _ => false, }) .unwrap_or(false); let mut param_iter = input_sig.inputs.iter(); if takes_task { param_iter.next(); } let param_types = param_iter.map(|arg| match arg { syn::FnArg::Typed(typed) => typed.ty.clone(), _ => panic!("unexpected arg type"), }); let ret_type = match input_sig.output { syn::ReturnType::Default => quote!(()), syn::ReturnType::Type(_, ref ret_type) => quote!(#ret_type), }; // Build the output, possibly using quasi-quotation let expanded = quote! { #[allow(non_upper_case_globals)] #vis const #descriptor_ident: RustFun = RustFun { arret_type: #arret_type, takes_task: #takes_task, params: &[#( <#param_types as ::arret_runtime::abitype::EncodeAbiType>::PARAM_ABI_TYPE ),*], ret: <#ret_type as ::arret_runtime::abitype::EncodeRetAbiType>::RET_ABI_TYPE, symbol: #entry_point_name, }; #[no_mangle] #input_fn }; expanded.into() } ================================================ FILE: runtime/Cargo.toml ================================================ [package] name = "arret-runtime" version = "0.1.0" edition = "2018" authors = ["Ryan Cumming "] [lib] path = "lib.rs" crate-type = ["lib"] ================================================ FILE: runtime/abitype.rs ================================================ //! Type encoding for Rust types //! //! This is a system of traits used to encode Rust types in a form understandable by the Arret //! compiler. It's used to ensure type safety across the RFI boundary. use crate::binding::Never; use crate::boxed; use crate::boxed::refs; use crate::callback; #[derive(Debug, PartialEq, Eq, Hash, Clone)] pub enum BoxedAbiType { Any, UniqueTagged(boxed::TypeTag), Union(&'static str, &'static [boxed::TypeTag]), Vector(&'static BoxedAbiType), List(&'static BoxedAbiType), Pair(&'static BoxedAbiType), Set(&'static BoxedAbiType), Map(&'static BoxedAbiType, &'static BoxedAbiType), } pub const TOP_LIST_BOXED_ABI_TYPE: BoxedAbiType = BoxedAbiType::List(&BoxedAbiType::Any); /// Encoded type for any boxed or unboxed value #[derive(Debug, PartialEq, Eq, Hash, Clone)] pub enum AbiType { /// Unboxed boolean value /// /// This is identical to [`bool`] in Rust and C++ Bool, /// Unboxed character value /// /// This is identical to [`char`] in Rust and `wchar_t` in C++ Char, /// Unboxed 64bit float /// /// This is identical to [`f64`] in Rust and `double` in C++ Float, /// Unboxed signed 64bit integer /// /// This is identical to in [`i64`] in Rust and `std::int64_t` in C++ Int, /// Interned integer for a `Sym` /// /// While this corresponds to [`InternedSym`](crate::intern::InternedSym) it's currently only /// used internally by the compiler. InternedSym, /// [Boxed value](crate::boxed) Boxed(BoxedAbiType), /// [Callback function](crate::callback) Callback(&'static callback::EntryPointAbiType), } #[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)] pub enum ParamCapture { /// Indicates the capture of this parameter should be automatically inferred Auto, /// Explicitly specifies that this parameter is never captured Never, /// Explicitly specifies that this parameter is captured Always, } #[derive(Debug, PartialEq, Eq, Hash, Clone)] pub struct ParamAbiType { pub abi_type: AbiType, pub capture: ParamCapture, } #[derive(Debug, PartialEq, Eq, Hash, Clone)] pub enum RetAbiType { Void, Never, Inhabited(AbiType), } pub trait EncodeAbiType { const ABI_TYPE: AbiType; /// Controls the capture type when this is used as a parameter const PARAM_CAPTURE: ParamCapture = ParamCapture::Never; const PARAM_ABI_TYPE: ParamAbiType = ParamAbiType { abi_type: Self::ABI_TYPE, capture: Self::PARAM_CAPTURE, }; } impl EncodeAbiType for f64 { const ABI_TYPE: AbiType = AbiType::Float; } impl EncodeAbiType for i64 { const ABI_TYPE: AbiType = AbiType::Int; } impl EncodeAbiType for char { const ABI_TYPE: AbiType = AbiType::Char; } impl EncodeAbiType for bool { const ABI_TYPE: AbiType = AbiType::Bool; } impl EncodeAbiType for refs::Gc where T: EncodeBoxedAbiType, { const ABI_TYPE: AbiType = AbiType::Boxed(T::BOXED_ABI_TYPE); const PARAM_CAPTURE: ParamCapture = ParamCapture::Auto; } impl EncodeAbiType for refs::NoCapture where T: EncodeBoxedAbiType, { const ABI_TYPE: AbiType = AbiType::Boxed(T::BOXED_ABI_TYPE); const PARAM_CAPTURE: ParamCapture = ParamCapture::Never; } impl EncodeAbiType for refs::Capture where T: EncodeBoxedAbiType, { const ABI_TYPE: AbiType = AbiType::Boxed(T::BOXED_ABI_TYPE); const PARAM_CAPTURE: ParamCapture = ParamCapture::Always; } impl EncodeAbiType for callback::Callback where F: callback::EncodeEntryPointAbiType, { const ABI_TYPE: AbiType = AbiType::Callback(&F::ENTRY_POINT_ABI_TYPE); } pub trait EncodeBoxedAbiType { const BOXED_ABI_TYPE: BoxedAbiType; } pub trait EncodeRetAbiType { const RET_ABI_TYPE: RetAbiType; } impl EncodeRetAbiType for T { const RET_ABI_TYPE: RetAbiType = RetAbiType::Inhabited(Self::ABI_TYPE); } impl EncodeRetAbiType for () { const RET_ABI_TYPE: RetAbiType = RetAbiType::Void; } impl EncodeRetAbiType for Never { const RET_ABI_TYPE: RetAbiType = RetAbiType::Never; } impl From for BoxedAbiType { fn from(type_tag: boxed::TypeTag) -> BoxedAbiType { type_tag.to_boxed_abi_type() } } impl BoxedAbiType { pub fn into_abi_type(self) -> AbiType { AbiType::Boxed(self) } } impl From for AbiType { fn from(type_tag: boxed::TypeTag) -> AbiType { type_tag.to_boxed_abi_type().into_abi_type() } } impl From for AbiType { fn from(boxed_abi_type: BoxedAbiType) -> AbiType { boxed_abi_type.into_abi_type() } } impl AbiType { pub fn into_ret_abi_type(self) -> RetAbiType { RetAbiType::Inhabited(self) } pub fn into_param_abi_type(self) -> ParamAbiType { let capture = match self { AbiType::Boxed(_) => ParamCapture::Auto, _ => ParamCapture::Never, }; ParamAbiType { abi_type: self, capture, } } pub fn may_contain_gc_refs(&self) -> bool { matches!( self, AbiType::Boxed(_) | AbiType::InternedSym | AbiType::Callback(_) ) } } impl From for ParamAbiType { fn from(type_tag: boxed::TypeTag) -> ParamAbiType { type_tag .to_boxed_abi_type() .into_abi_type() .into_param_abi_type() } } impl From for ParamAbiType { fn from(boxed_abi_type: BoxedAbiType) -> ParamAbiType { boxed_abi_type.into_abi_type().into_param_abi_type() } } impl From for ParamAbiType { fn from(abi_type: AbiType) -> ParamAbiType { abi_type.into_param_abi_type() } } impl From for RetAbiType { fn from(type_tag: boxed::TypeTag) -> RetAbiType { type_tag .to_boxed_abi_type() .into_abi_type() .into_ret_abi_type() } } impl From for RetAbiType { fn from(boxed_abi_type: BoxedAbiType) -> RetAbiType { boxed_abi_type.into_abi_type().into_ret_abi_type() } } impl From for RetAbiType { fn from(abi_type: AbiType) -> RetAbiType { abi_type.into_ret_abi_type() } } ================================================ FILE: runtime/binding.rs ================================================ //! Macros and types for defining Rust RFI modules use crate::abitype::{ParamAbiType, RetAbiType}; #[allow(unused)] use crate::abitype::{EncodeAbiType, EncodeRetAbiType}; #[derive(Debug)] pub struct RustFun { pub arret_type: &'static str, pub takes_task: bool, pub params: &'static [ParamAbiType], pub ret: RetAbiType, pub symbol: &'static str, } pub type RustExports = &'static [(&'static str, &'static RustFun)]; // TODO: Replace with ! once it's stable pub enum Never {} /// Defines a new Arret module implemented at Rust /// /// Each Arret package can have an optional Rust module accessible as `(import [package-name /// rust])`. These are loaded both at compile-time to support constant evaluation and linked against /// compiled programs. /// /// The first argument should be an identifier in the form of `ARRET_{PACKAGE_NAME}_RUST_EXPORTS` /// where `{PACKAGE_NAME}` is the uppercased name of the package. For example, the package `stdlib` /// uses `ARRET_STDLIB_RUST_EXPORTS`. This must be unique to prevent symbol conflicts when loading /// Rust modules. /// /// The second argument is a mapping of export names to Rust functions. These are defined using /// the `rfi_derive::rust_fun` attribute macro. #[macro_export] macro_rules! define_rust_module { ($exports_sym:ident, { $( $export_name:expr => $desc_name:ident ),* }) => { #[no_mangle] pub static $exports_sym: RustExports = &[ $( ($export_name, &$desc_name) ),* ]; }; } ================================================ FILE: runtime/boxed/heap/collect.rs ================================================ //! Functionality for garbage collecting heaps //! //! This is a basic tracing, moving garbage collector. It doesn't support generations or concurrent //! collection. Every collection starts with a strong pass followed by an optional weak pass. use std::ptr; use crate::boxed; use crate::boxed::heap::Heap; use crate::boxed::refs::Gc; use crate::boxed::{AllocType, BoxSize, Boxed, TypeTag}; use crate::intern::InternedSym; #[repr(C, align(16))] struct ForwardingCell { header: boxed::Header, new_location: Gc, } /// Strong pass from an old [`Heap`] in to a new [`Heap`] /// /// [`visit_box`](StrongPass::visit_box) should be called for each GC root that needs to be moved to /// the new heap. Once all roots have been visited [`into_new_heap`](StrongPass::into_new_heap) will /// return the new [`Heap`] or [`into_weak_pass`](StrongPass::into_weak_pass) will start an optional /// weak pass. pub struct StrongPass { old_heap: Heap, new_heap: Heap, } impl StrongPass { /// Consumes an existing heap to begin a garbage collection pass pub fn new(old_heap: Heap) -> StrongPass { let type_info = old_heap.type_info().clone_for_collect_garbage(); StrongPass { old_heap, new_heap: Heap::new(type_info, Heap::DEFAULT_CAPACITY), } } /// Continues as a weak reference pass pub fn into_weak_pass(self) -> WeakPass { WeakPass { _old_heap: self.old_heap, new_heap: self.new_heap, } } /// Finishes garbage collection by returning the new heap pub fn into_new_heap(self) -> Heap { let mut new_heap = self.new_heap; new_heap.save_len_at_gc(); new_heap } /// Visits a garbage collected box as a strong root pub fn visit_box(&mut self, box_ref: &mut Gc) { let any_box_ref = unsafe { &mut *(box_ref as *mut _ as *mut Gc) }; Self::visit_any_box(&self.old_heap, &mut self.new_heap, any_box_ref); } fn move_box_to_new_heap(new_heap: &mut Heap, box_ref: &mut Gc, size: BoxSize) { // Allocate and copy to the new heap let dest_location = new_heap.alloc_cells(size.cell_count()); unsafe { ptr::copy_nonoverlapping(box_ref.as_ptr(), dest_location, size.cell_count()); } let forward_alloc_type = match size { BoxSize::Size16 => AllocType::HeapForward16, BoxSize::Size32 => AllocType::HeapForward32, }; // Create a forwarding cell let forwarding_cell = ForwardingCell { header: boxed::Header { // This is arbitrary but could be useful for debugging type_tag: box_ref.header.type_tag, alloc_type: forward_alloc_type, }, new_location: unsafe { Gc::new(dest_location) }, }; // Overwrite the previous box location unsafe { ptr::copy_nonoverlapping( &forwarding_cell as *const ForwardingCell as *const boxed::Any, box_ref.as_ptr() as *mut boxed::Any, 1, ); } // Update the box_ref *box_ref = unsafe { Gc::new(dest_location) }; } /// Re-interns the symbol on a new heap fn visit_interned_sym(old_heap: &Heap, new_heap: &mut Heap, interned_sym: &mut InternedSym) { let old_interner = old_heap.type_info().interner(); let new_interner = new_heap.type_info_mut().interner_mut(); let sym_name = old_interner.unintern(interned_sym); *interned_sym = new_interner.intern(sym_name); } fn visit_any_box(old_heap: &Heap, new_heap: &mut Heap, mut box_ref: &mut Gc) { // This loop is used for ad-hoc tail recursion when visiting Pairs and FunThunks // Everything else will return at the bottom of the loop loop { match box_ref.header.alloc_type { AllocType::Const => { // Return when encountering a const box; they cannot move and cannot refer to the heap return; } AllocType::HeapForward16 | AllocType::HeapForward32 => { // This has already been moved to a new location let forwarding_cell = unsafe { &*(box_ref.as_ptr() as *const ForwardingCell) }; *box_ref = forwarding_cell.new_location; return; } AllocType::Heap16 => { Self::move_box_to_new_heap(new_heap, box_ref, BoxSize::Size16); } AllocType::Heap32 => { Self::move_box_to_new_heap(new_heap, box_ref, BoxSize::Size32); } AllocType::Stack => { // Stack boxes cannot move but they may point to heap boxes } } match box_ref.header.type_tag { TypeTag::Sym => { let sym_ref = unsafe { &mut *(box_ref.as_mut_ptr() as *mut boxed::Sym) }; Self::visit_interned_sym(old_heap, new_heap, sym_ref.interned_mut()); } TypeTag::Pair => { let pair_ref = unsafe { &mut *(box_ref.as_mut_ptr() as *mut boxed::Pair) }; Self::visit_any_box(old_heap, new_heap, &mut pair_ref.head); // Start again with the tail of the list box_ref = unsafe { &mut *(&mut pair_ref.rest as *mut Gc> as *mut Gc) }; continue; } TypeTag::Vector => { let vec_ref = unsafe { &mut *(box_ref.as_mut_ptr() as *mut boxed::Vector) }; vec_ref.visit_mut_elements(&mut |elem_ref| { Self::visit_any_box(old_heap, new_heap, elem_ref); }); } TypeTag::FunThunk => { let fun_thunk_ref = unsafe { &mut *(box_ref.as_mut_ptr() as *mut boxed::FunThunk) }; // Start again with the captures box_ref = unsafe { &mut *(&mut fun_thunk_ref.captures as *mut Gc) }; continue; } TypeTag::Record => { use crate::boxed::types::field_value::FieldGcRef; let record_ref = unsafe { &mut *(box_ref.as_mut_ptr() as *mut boxed::Record) }; for field_gc_ref in record_ref.field_gc_refs(old_heap) { match field_gc_ref { FieldGcRef::Boxed(field_box_ref) => { Self::visit_any_box(old_heap, new_heap, field_box_ref); } FieldGcRef::InternedSym(interned_sym) => { Self::visit_interned_sym(old_heap, new_heap, interned_sym); } } } } _ => {} } return; } } } /// Weak pass of a collection to a new [`Heap`] /// /// This will return the location of cells that have been moved to the new heap or [`None`] for /// cells that were not visited during the strong pass. pub struct WeakPass { // We need the old heap to remain allocated so we can follow pointers for old cells _old_heap: Heap, new_heap: Heap, } impl WeakPass { /// Finishes garbage collection by returning the new [`Heap`] pub fn into_new_heap(self) -> Heap { let mut new_heap = self.new_heap; new_heap.save_len_at_gc(); new_heap } /// Visits a garbage collected box /// /// If the box was moved during the strong pass its new location will be returned. Otherwise, /// [`None`] will be returned. pub fn new_heap_ref_for(&self, boxed: Gc) -> Option> { let any_boxed = unsafe { boxed.cast::() }; self.new_heap_any_ref_for(any_boxed) .map(|new_box| unsafe { new_box.cast::() }) } fn new_heap_any_ref_for(&self, box_ref: Gc) -> Option> { match box_ref.header.alloc_type { AllocType::Const | AllocType::Stack => { // These aren't managed by the GC; their pointer remains valid Some(box_ref) } AllocType::HeapForward16 | AllocType::HeapForward32 => { // This has already been moved to a new location let forwarding_cell = unsafe { &*(box_ref.as_ptr() as *const ForwardingCell) }; Some(forwarding_cell.new_location) } AllocType::Heap16 | AllocType::Heap32 => None, } } } #[cfg(test)] mod test { use super::*; use crate::boxed::{Int, List, Str}; #[test] fn simple_collect() { let mut old_heap = Heap::empty(); let mut hello = Str::new(&mut old_heap, "HELLO"); let mut world = Str::new(&mut old_heap, "WORLD"); assert_eq!("HELLO", hello.as_str()); assert_eq!("WORLD", world.as_str()); assert_eq!(2, old_heap.len()); // Root everything let mut all_strong = StrongPass::new(old_heap); all_strong.visit_box(&mut hello); all_strong.visit_box(&mut world); let all_heap = all_strong.into_new_heap(); assert_eq!("HELLO", hello.as_str()); assert_eq!("WORLD", world.as_str()); assert_eq!(2, all_heap.len()); // Take aliases to hello and world to simulate weak reference let hello_alias = hello; let world_alias = world; // Root just one string let mut one_strong = StrongPass::new(all_heap); one_strong.visit_box(&mut hello); // Start a weak pass let one_weak = one_strong.into_weak_pass(); assert!(one_weak.new_heap_ref_for(hello_alias).is_some()); assert!(one_weak.new_heap_ref_for(world_alias).is_none()); let one_heap = one_weak.into_new_heap(); assert_eq!("HELLO", hello.as_str()); assert_eq!(1, one_heap.len()); // Root nothing let zero_heap = StrongPass::new(one_heap).into_new_heap(); assert_eq!(0, zero_heap.len()); } #[test] fn sym_collect() { use crate::boxed::Sym; let mut old_heap = Heap::empty(); let inline_name = "Hello"; let indexed_name = "This is too long; it will be indexed to the heap's intern table"; let mut inline = Sym::new(&mut old_heap, inline_name); let mut indexed = Sym::new(&mut old_heap, indexed_name); assert_eq!(2, old_heap.len()); let mut all_strong = StrongPass::new(old_heap); all_strong.visit_box(&mut inline); all_strong.visit_box(&mut indexed); let all_heap = all_strong.into_new_heap(); assert_eq!(inline_name, inline.name(&all_heap)); assert_eq!(indexed_name, indexed.name(&all_heap)); assert_eq!(2, all_heap.len()); } #[test] fn list_collect() { use std::mem; // Three 1 cell integers + three pairs const PAIR_CELLS: usize = mem::size_of::>() / mem::size_of::(); const EXPECTED_HEAP_SIZE: usize = 3 + (3 * PAIR_CELLS); let mut old_heap = Heap::empty(); let mut boxed_list = List::from_values(&mut old_heap, [1, 2, 3].iter().cloned(), Int::new); assert_eq!(EXPECTED_HEAP_SIZE, old_heap.len()); assert_eq!(3, boxed_list.len()); let mut all_strong = StrongPass::new(old_heap); all_strong.visit_box(&mut boxed_list); let all_heap = all_strong.into_new_heap(); assert_eq!(3, boxed_list.len()); assert_eq!(EXPECTED_HEAP_SIZE, all_heap.len()); let mut boxed_list_iter = boxed_list.iter(); for expected_num in &[1, 2, 3] { if let Some(boxed_int) = boxed_list_iter.next() { assert_eq!(*expected_num, boxed_int.value()); } else { panic!("Iterator unexpectedly ended"); } } } #[test] fn vector_collect() { // Try empty, 1 cell inline, 2 cell inline, and large vectors let test_contents: [&[i64]; 4] = [&[], &[1], &[1, 2, 3], &[9, 8, 7, 6, 5, 4, 3, 2, 1, 0]]; for &test_content in &test_contents { let mut old_heap = Heap::empty(); let mut boxed_vec = boxed::Vector::from_values(&mut old_heap, test_content.iter().cloned(), Int::new); let mut all_strong = StrongPass::new(old_heap); all_strong.visit_box(&mut boxed_vec); // Need to give this a name so it doesn't Drop let _all_heap = all_strong.into_new_heap(); let mut boxed_list_iter = boxed_vec.iter(); assert_eq!(test_content.len(), boxed_list_iter.len()); for expected_num in test_content { if let Some(boxed_int) = boxed_list_iter.next() { assert_eq!(*expected_num, boxed_int.value()); } else { panic!("Iterator unexpectedly ended"); } } } } } ================================================ FILE: runtime/boxed/heap/mod.rs ================================================ pub mod collect; pub mod type_info; use std::{cmp, mem, ptr}; use crate::boxed::heap::type_info::TypeInfo; use crate::boxed::refs::Gc; use crate::boxed::{AllocType, Any, Boxed}; use crate::intern::{AsInterner, Interner}; /// Allocated segment of garbage collected memory /// /// This has a gross pointer-based representation to allow use as a bump allocator from generated /// native code. #[repr(C)] pub struct Segment { next: *mut Any, end: *const Any, backing_vec: Vec, } /// Heap of garbage collected boxes #[repr(C)] pub struct Heap { current_segment: Segment, full_segments: Vec, type_info: TypeInfo, len_at_last_gc: usize, } impl Segment { /// Creates a new segment with capacity for `count` cells fn with_capacity(count: usize) -> Segment { // Allow for an extra cell of red zone capacity. // // This is because variable sized boxes will have a Rust size of 32 bytes, but we might // allocate them only 16 bytes in the segment. If this happens at the end of the segment we // could create a pointer to a Rust object extending past the allocation. This is UB and // could generate code that e.g. does a vector load into unallocated memory. let mut backing_vec = Vec::with_capacity(count + 1); let next: *mut Any = backing_vec.as_mut_ptr(); Segment { next, end: unsafe { next.add(count) }, backing_vec, } } /// Returns contiguous memory for holding `count` cells /// /// If the segment is full this will return [`None`] fn alloc_cells(&mut self, count: usize) -> Option<*mut Any> { let current_next = self.next; // Convert to an integer to avoid the UB of creating a pointer out of bounds. let new_next = (self.next as usize) + (count * mem::size_of::()); if new_next > self.end as usize { None } else { self.next = new_next as *mut Any; Some(current_next) } } /// Returns the number of allocated cells fn len(&self) -> usize { // TODO: Replace with `offset_from` once its stable (self.next as usize - self.backing_vec.as_ptr() as usize) / mem::size_of::() } } impl Drop for Segment { fn drop(&mut self) { let mut current = self.backing_vec.as_mut_ptr(); while current < self.next as *mut Any { unsafe { match (*current).header.alloc_type { AllocType::Heap16 | AllocType::Heap32 => ptr::drop_in_place(current), AllocType::HeapForward16 | AllocType::HeapForward32 => {} AllocType::Const | AllocType::Stack => { unreachable!("Unexpected alloc type in heap") } } match (*current).header.alloc_type { AllocType::Heap16 | AllocType::HeapForward16 => { current = current.add(1); } AllocType::Heap32 | AllocType::HeapForward32 => { current = current.add(2); } AllocType::Const | AllocType::Stack => { unreachable!("Unexpected alloc type in heap") } } } } } } impl Heap { /// Capacity of the initial segment and all overflow segments const DEFAULT_SEGMENT_CAPACITY: usize = 1024; /// Default capacity of the heap const DEFAULT_CAPACITY: usize = Self::DEFAULT_SEGMENT_CAPACITY; /// Returns an empty heap with a default capacity pub fn empty() -> Heap { Self::new(TypeInfo::empty(), Self::DEFAULT_CAPACITY) } /// Returns a new heap with the given type information and capacity pub fn new(type_info: TypeInfo, count: usize) -> Heap { Heap { current_segment: Segment::with_capacity(count), full_segments: vec![], type_info, len_at_last_gc: 0, } } /// Hints if this heap should be garbage collected /// /// This is a heuristic based on the number of allocations since the last GC cycle. pub fn should_collect(&self) -> bool { let maximum_len = std::cmp::max(Self::DEFAULT_SEGMENT_CAPACITY, self.len_at_last_gc) * 2; self.len() > maximum_len } fn save_len_at_gc(&mut self) { self.len_at_last_gc = self.len(); } /// Allocates space for `count` contiguous cells pub fn alloc_cells(&mut self, count: usize) -> *mut Any { if let Some(alloc) = self.current_segment.alloc_cells(count) { return alloc; } // Make sure we allocate enough to satisfy the request let capacity = cmp::max(count, Self::DEFAULT_SEGMENT_CAPACITY); // Build a new segment and allocate from it let mut new_segment = Segment::with_capacity(capacity); let alloc = new_segment.alloc_cells(count).unwrap(); // Switch the segment and track the old one for finalisation let previous_segment = mem::replace(&mut self.current_segment, new_segment); self.full_segments.push(previous_segment); alloc } /// Returns the runtime type information associated with the heap pub fn type_info(&self) -> &TypeInfo { &self.type_info } /// Returns a mutable reference to the runtime type information associated with the heap pub fn type_info_mut(&mut self) -> &mut TypeInfo { &mut self.type_info } /// Returns the number of allocated cells pub fn len(&self) -> usize { let full_len: usize = self.full_segments.iter().map(Segment::len).sum(); self.current_segment.len() + full_len } /// Returns true if the heap contains no boxes pub fn is_empty(&self) -> bool { self.current_segment.len() == 0 && self.full_segments.is_empty() } /// Places a new boxed value on the heap pub fn place_box(&mut self, boxed: T) -> Gc { let heap_size = boxed .header() .alloc_type() .to_heap_box_size() .expect("non-heap alloc type"); let needed_cells = heap_size.cell_count(); let insert_at = self.alloc_cells(needed_cells); unsafe { ptr::copy_nonoverlapping(&boxed as *const T as *const Any, insert_at, needed_cells); } // Make sure we don't drop the stack version mem::forget(boxed); unsafe { Gc::new(insert_at as *const T) } } } impl Default for Heap { fn default() -> Self { Self::empty() } } /// Object that can be used as a heap pub trait AsHeap { /// Returns this object as a heap fn as_heap(&self) -> &Heap; /// Returns this object as a mutable heap fn as_heap_mut(&mut self) -> &mut Heap; } impl AsHeap for Heap { fn as_heap(&self) -> &Heap { self } fn as_heap_mut(&mut self) -> &mut Heap { self } } impl AsInterner for Heap { fn as_interner(&self) -> &Interner { self.type_info().interner() } } #[cfg(test)] mod test { use super::*; #[test] fn basic_alloc() { use crate::boxed::Str; let mut heap = Heap::new(TypeInfo::empty(), 2); let string1 = Str::new(&mut heap, "HELLO"); let string2 = Str::new(&mut heap, "WORLD"); assert_eq!("HELLO", string1.as_str()); assert_eq!("WORLD", string2.as_str()); } } ================================================ FILE: runtime/boxed/heap/type_info.rs ================================================ //! Container for runtime type information for boxed data use crate::class_map::ClassMap; use crate::intern::{AsInterner, Interner}; /// Contains associated runtime type information for boxed data /// /// This is a container for [`Interner`] and [`ClassMap`]. pub struct TypeInfo { interner: Interner, class_map: ClassMap, } impl TypeInfo { /// Constructs type information with the given components pub fn new(interner: Interner, class_map: ClassMap) -> TypeInfo { TypeInfo { interner, class_map, } } /// Constructs empty type information pub fn empty() -> TypeInfo { Self::new(Interner::new(), ClassMap::empty()) } /// Returns a clone of this type information suitable for garbage collection pub fn clone_for_collect_garbage(&self) -> Self { Self { interner: self.interner.clone_for_collect_garbage(), class_map: self.class_map.clone(), } } /// Returns the symbol interner pub fn interner(&self) -> &Interner { &self.interner } /// Returns a mutable reference to the symbol interner pub fn interner_mut(&mut self) -> &mut Interner { &mut self.interner } /// Returns the class map pub fn class_map(&self) -> &ClassMap { &self.class_map } /// Returns a mutable reference to the class map pub fn class_map_mut(&mut self) -> &mut ClassMap { &mut self.class_map } } impl AsInterner for TypeInfo { fn as_interner(&self) -> &Interner { self.interner() } } ================================================ FILE: runtime/boxed/mod.rs ================================================ #![warn(missing_docs)] //! Boxed values and heaps //! //! This contains the implementation of our garbage collector and the types it can manage. Some //! types (such as `Int` and `Float`) have corresponding unboxed representations and are only boxed //! for the purposes of runtime dynamic typing. Complex values (such as `Vector` and `Sym`) have no //! unboxed representation. //! //! Boxes can also be placed on the stack on in static constants instead of the heap. This is of //! limited value to Rust code but is frequently used by the compiler to avoid the overhead of //! allocation and garbage collection. mod heap; pub mod refs; mod types; use std::hash::{Hash, Hasher}; use std::{fmt, ptr}; use crate::abitype::{BoxedAbiType, EncodeBoxedAbiType}; use crate::boxed::refs::Gc; pub use crate::boxed::heap::{collect, type_info}; pub use crate::boxed::heap::{AsHeap, Heap}; pub use crate::boxed::types::char::Char; pub use crate::boxed::types::field_value::{FieldValue, FieldValueIter}; pub use crate::boxed::types::float::Float; pub use crate::boxed::types::fun::{Captures, FunThunk, ThunkEntry}; pub use crate::boxed::types::int::Int; pub use crate::boxed::types::list::{List, ListSubtype, Nil, Pair, NIL_INSTANCE}; pub use crate::boxed::types::map::Map; pub use crate::boxed::types::record::{Record, RecordClassId, RecordStorage}; pub use crate::boxed::types::record_data::RecordData; pub use crate::boxed::types::set::Set; pub use crate::boxed::types::str::{Str, StrStorage}; pub use crate::boxed::types::sym::Sym; pub use crate::boxed::types::vector::Vector; /// Prelude of common traits useful for working with boxed values pub mod prelude { pub use super::AsHeap; pub use super::Boxed; pub use super::DistinctTagged; pub use super::HashInHeap; pub use super::PartialEqInHeap; } /// Size of a boxed value in bytes #[derive(PartialEq, Debug, Copy, Clone)] pub enum BoxSize { /// 16 byte boxed value Size16, /// 32 byte boxed value Size32, } impl BoxSize { /// Returns the number of 16 byte cells required by this box size pub fn cell_count(self) -> usize { match self { BoxSize::Size16 => 1, BoxSize::Size32 => 2, } } /// Returns the corresponding `AllocType` if this box was allocated on the heap pub fn to_heap_alloc_type(self) -> AllocType { match self { BoxSize::Size16 => AllocType::Heap16, BoxSize::Size32 => AllocType::Heap32, } } } /// Allocation type for boxed values #[repr(u8)] #[derive(Debug, PartialEq, Clone, Copy)] pub enum AllocType { /// Static constant value Const, /// Stack allocated value of unknown length Stack, /// Heap allocated 16 byte value Heap16, /// Heap allocated 32 byte value Heap32, /// Box pointing to a new 16 byte heap location /// /// This is a temporary type used during garbage collection. HeapForward16, /// Box pointing to a new 32 byte heap location /// /// This is a temporary type used during garbage collection. HeapForward32, } impl AllocType { /// Returns the corresponding `BoxSize` if this type is heap allocated pub fn to_heap_box_size(self) -> Option { match self { AllocType::Heap16 => Some(BoxSize::Size16), AllocType::Heap32 => Some(BoxSize::Size32), _ => None, } } } /// Header for common boxed value metadata #[repr(C)] #[derive(Debug, Clone, Copy)] pub struct Header { type_tag: TypeTag, alloc_type: AllocType, } impl Header { /// Returns a new header for the given type tag and allocation type pub fn new(type_tag: TypeTag, alloc_type: AllocType) -> Header { Header { type_tag, alloc_type, } } /// Returns the constant type tag for this value pub fn type_tag(self) -> TypeTag { self.type_tag } /// Return the allocation type for this value pub fn alloc_type(self) -> AllocType { self.alloc_type } } /// Equivalent of [`PartialEq`] that receives an additional [`Heap`] parameter /// /// This is required for types that require additional metadata from the heap to perform equality /// checks. pub trait PartialEqInHeap { /// Returns true if the values are equal /// /// Both values will be in the same heap. fn eq_in_heap(&self, heap: &Heap, other: &Self) -> bool; } impl PartialEqInHeap for T where T: PartialEq, { fn eq_in_heap(&self, _heap: &Heap, other: &Self) -> bool { self.eq(other) } } /// Equivalent of [`Hash`] that receives an additional [`Heap`] parameter /// /// This is required for types that require additional metadata from the heap to calculate hashes. pub trait HashInHeap { /// Feeds this value into the given [`Hasher`] fn hash_in_heap(&self, heap: &Heap, state: &mut H); } impl HashInHeap for T where T: Hash, { fn hash_in_heap(&self, _heap: &Heap, state: &mut H) { self.hash(state) } } /// Boxed value /// /// Boxes can be allocated on the stack, heap or a static constant. Every box is tagged with a /// top-level type. pub trait Boxed: Sized + PartialEqInHeap + HashInHeap + fmt::Debug { /// Casts this value to an `Any` reference fn as_any_ref(&self) -> Gc { unsafe { Gc::new(&*(self as *const Self as *const Any)) } } /// Returns the header of the box fn header(&self) -> Header { self.as_any_ref().header } } impl EncodeBoxedAbiType for Any { const BOXED_ABI_TYPE: BoxedAbiType = BoxedAbiType::Any; } /// Marks that this boxed struct has a specific constant type tag /// /// For example, [`Vector`] is `ConstTagged` because it always has a type tag of `Vector`. As /// a counterexample, [`Num`] is not because it could either have an `Int` or `Float` type tag. /// /// In mathematical terms this can be thought of as the struct being surjective to the type tag. pub trait ConstTagged: Boxed { /// Type tag for values of this type const TYPE_TAG: TypeTag; } /// Indicates that this boxed struct does not share type tags with unrelated types /// /// For example, [`Num`] is `DistinctTagged` because it only shares type tags with `Any`, `Float` /// and `Int` which are all either subtypes or supertypes. As a counterexample, [`Vector`] is /// not because it shares a type tag with [`Vector`]. /// /// In mathematical terms this can be thought of as the struct being injective to the type tag pub trait DistinctTagged: Boxed { /// Returns if the passed type tag corresponds to this type fn has_tag(type_tag: TypeTag) -> bool; } /// Marks that every boxed value with `TYPE_TAG` corresponds to this boxed struct /// /// For example, [`Str`] is `UniqueTagged` because no other struct has the type tag of `Str`. As a /// counterexample, `Vector` is not because it shares a type tag with `Vector`. /// /// In mathematical terms this can be thought of as the struct being bijective with the type tag. pub trait UniqueTagged: ConstTagged + DistinctTagged {} impl EncodeBoxedAbiType for T { const BOXED_ABI_TYPE: BoxedAbiType = BoxedAbiType::UniqueTagged(T::TYPE_TAG); } macro_rules! define_const_tagged_boxes { ($($name:ident),*) => { /// Tag byte identifying top-level types #[repr(u8)] #[derive(Debug, PartialEq, Eq, Copy, Clone, Hash)] pub enum TypeTag { $( #[allow(missing_docs)] $name ),* } /// Static list of all possible type tags /// /// This is guaranteed to be sorted pub const ALL_TYPE_TAGS: &'static [TypeTag] = &[ $( TypeTag::$name ),* ]; impl TypeTag { /// Returns a string representation for the type pub fn to_str(self) -> &'static str { match self { $( TypeTag::$name => { stringify!($name) } )* } } } $( impl ConstTagged for $name { const TYPE_TAG: TypeTag = TypeTag::$name; } impl DistinctTagged for $name { fn has_tag(type_tag: TypeTag) -> bool { Self::TYPE_TAG == type_tag } } )* define_supertype!( /// Supertype of all boxed types Any, AnySubtype, DistinctTagged, as_any_ref, { $($name),* }); } } impl TypeTag { /// Returns the boxed ABI type corresponding to this type tag pub fn to_boxed_abi_type(self) -> BoxedAbiType { BoxedAbiType::UniqueTagged(self) } /// Returns a header for a constant boxed values of this type pub fn to_const_header(self) -> Header { Header::new(self, AllocType::Const) } /// Returns a header for heap allocated values of this type and size pub fn to_heap_header(self, box_size: BoxSize) -> Header { Header::new(self, box_size.to_heap_alloc_type()) } } macro_rules! define_singleton_box { ( $(#[$struct_docs:meta])* $type_name:ident, $(#[$static_docs:meta])* $static_name:ident, $export_name:expr ) => { $(#[$struct_docs])* #[repr(C, align(16))] #[derive(Debug)] pub struct $type_name { header: Header, } impl Boxed for $type_name {} impl UniqueTagged for $type_name {} $(#[$static_docs])* #[export_name = $export_name] pub static $static_name: $type_name = $type_name { header: Header { type_tag: $type_name::TYPE_TAG, alloc_type: AllocType::Const, }, }; impl PartialEq for $type_name { fn eq(&self, _: &$type_name) -> bool { // This is tricky - we're a singleton so if the types match we must be equal true } } impl Hash for $type_name { fn hash(&self, state: &mut H) { Self::TYPE_TAG.hash(state); state.write_usize(&$static_name as *const $type_name as usize); } } }; } macro_rules! define_supertype { ( $(#[$docs:meta])* $name:ident, $subtype_enum:ident, $subtype_trait:ident, $as_enum_ref:ident, { $($member:ident),* } ) => { $(#[$docs])* #[repr(C, align(16))] pub struct $name { header: Header, } impl Boxed for $name {} impl DistinctTagged for $name { fn has_tag(type_tag: TypeTag) -> bool { [$( TypeTag::$member ),*].contains(&type_tag) } } impl $name { /// Returns a subtype of this value based on its type tag pub fn as_subtype(&self) -> $subtype_enum<'_> { #[allow(unreachable_patterns)] match self.header.type_tag { $( TypeTag::$member => { $subtype_enum::$member(unsafe { &*(self as *const $name as *const $member) }) } )* other => { unreachable!("Unexpected type tag: {:?}", other); } } } /// Tries to downcast this reference to a subtype based on its type tag pub fn downcast_ref(&self) -> Option> { if T::has_tag(self.header.type_tag) { Some(unsafe { Gc::new(&*(self as *const $name as *const T)) }) } else { None } } } impl HashInHeap for $name { fn hash_in_heap(&self, heap: &Heap, state: &mut H) { match self.as_subtype() { $( $subtype_enum::$member(subtype) => { subtype.hash_in_heap(heap, state) } )* } } } impl PartialEqInHeap for $name { fn eq_in_heap(&self, heap: &Heap, other: &$name) -> bool { match (self.as_subtype(), other.as_subtype()) { $( ($subtype_enum::$member(self_value), $subtype_enum::$member(other_value)) => { self_value.eq_in_heap(heap, other_value) } ),* _ => false } } } impl fmt::Debug for $name { fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { match self.as_subtype() { $( $subtype_enum::$member(subtype) => { subtype.fmt(formatter) } )* } } } impl Drop for $name { fn drop(&mut self) { // Cast to the correct type so Rust knows which Drop implementation to call match self.as_subtype() { $( $subtype_enum::$member(subtype) => { unsafe { ptr::drop_in_place(subtype as *const $member as *mut $member); } } )* } } } /// Possible subtypes of this supertype #[derive(Debug)] pub enum $subtype_enum<'a> { $( #[allow(missing_docs)] $member(&'a $member) ),* } } } macro_rules! define_tagged_union { ( $(#[$struct_docs:meta])* $name:ident, $(#[$subtype_docs:meta])* $subtype_enum:ident, $subtype_trait:ident, $as_enum_ref:ident, { $($member:ident),* } ) => { define_supertype!( $(#[$struct_docs])* $name, $subtype_enum, $subtype_trait, $as_enum_ref, { $($member),* } ); $(#[$subtype_docs])* pub trait $subtype_trait : DistinctTagged {} $( impl $member { /// Casts this value to its supertype pub fn $as_enum_ref(&self) -> Gc<$name> { unsafe { Gc::new(&*(self as *const Self as *const $name)) } } } impl $subtype_trait for $member {} )* impl EncodeBoxedAbiType for $name { const BOXED_ABI_TYPE: BoxedAbiType = BoxedAbiType::Union(stringify!($name), &[ $( $member::TYPE_TAG ),* ]); } }; } define_const_tagged_boxes! { Float, Int, Char, Str, Sym, Pair, Nil, True, False, Vector, FunThunk, Record, Set, Map } define_singleton_box!( /// Boolean true True, /// Static constant instance of [`True`] TRUE_INSTANCE, "ARRET_TRUE" ); define_singleton_box!( /// Boolean false False, /// Static constant instance of [`False`] FALSE_INSTANCE, "ARRET_FALSE" ); define_tagged_union!( /// Union of numeric types Num, /// Possible subtypes of [`Num`] NumSubtype, NumMember, as_num_ref, { Int, Float } ); define_tagged_union!( /// Union of boolean types Bool, /// Possible subtypes of [`Bool`] BoolSubtype, BoolMember, as_bool_ref, { True, False } ); impl Bool { /// Returns the singleton box corresponding the boolean value pub fn singleton_ref(value: bool) -> Gc { if value { TRUE_INSTANCE.as_bool_ref() } else { FALSE_INSTANCE.as_bool_ref() } } /// Returns the unboxed value of this boolean pub fn as_bool(&self) -> bool { match self.as_subtype() { BoolSubtype::True(_) => true, BoolSubtype::False(_) => false, } } } #[cfg(test)] mod test { use super::*; use std::mem; #[test] fn sizes() { assert_eq!(2, mem::size_of::

()); assert_eq!(16, mem::size_of::()); assert_eq!(16, mem::size_of::()); assert_eq!(16, mem::size_of::()); } #[test] fn downcast_ref() { let mut heap = Heap::empty(); let box_float = Float::new(&mut heap, 2.0); let box_float_as_any = box_float.as_any_ref(); assert!(!box_float_as_any.downcast_ref::().is_some()); assert!(box_float_as_any.downcast_ref::().is_some()); } #[test] fn as_tagged() { let mut heap = Heap::empty(); let box_float = Float::new(&mut heap, 2.0); let box_float_as_any = box_float.as_any_ref(); if let AnySubtype::Float(_) = box_float_as_any.as_subtype() { } else { panic!("Failed to get tagged representation") } } #[test] fn any_equality() { let mut heap = Heap::empty(); let box_two = Float::new(&mut heap, 2.0); let box_two_as_any = box_two.as_any_ref(); let box_three = Float::new(&mut heap, 3.0); let box_three_as_any = box_three.as_any_ref(); assert!(box_two_as_any.eq_in_heap(&heap, &box_two_as_any)); assert!(!box_two_as_any.eq_in_heap(&heap, &box_three_as_any)); #[allow(clippy::eq_op)] { assert_eq!(TRUE_INSTANCE, TRUE_INSTANCE); } } #[test] fn any_fmt_debug() { let mut heap = Heap::empty(); let boxed_one = Int::new(&mut heap, 1); let boxed_one_as_any = boxed_one.as_any_ref(); assert_eq!("Int(1)", format!("{:?}", boxed_one_as_any)); } #[test] fn union_types() { let mut heap = Heap::empty(); let box_float = Float::new(&mut heap, 2.0); let box_float_as_any = box_float.as_any_ref(); if let Some(stack_num) = box_float_as_any.downcast_ref::() { if let NumSubtype::Float(_) = stack_num.as_subtype() { } else { panic!("Couldn't get tagged Float from Num"); } assert!(!stack_num.downcast_ref::().is_some()); assert!(stack_num.downcast_ref::().is_some()); } else { panic!("Float was not a Num"); } let box_str = Str::new(&mut heap, "Test!"); let box_str_as_any = box_str.as_any_ref(); assert!(!box_str_as_any.downcast_ref::().is_some()); } } ================================================ FILE: runtime/boxed/refs.rs ================================================ //! References to boxed values //! //! These are all transparent; they're used for either readability or marker traits. use std::ops::Deref; use std::ptr; use std::{fmt, hash}; use crate::boxed::Boxed; /// Reference to a garbage collected value /// /// This is not memory safe and does not GC root; it's just sugar for a raw pointer. #[repr(transparent)] pub struct Gc { inner: ptr::NonNull, } // Manual Clone implementation to work around Rust issue #26925 impl Clone for Gc { fn clone(&self) -> Self { Gc { inner: self.inner } } } impl Copy for Gc {} impl Deref for Gc { type Target = T; fn deref(&self) -> &T { unsafe { self.inner.as_ref() } } } impl Gc { /// Returns a new instance wrapping a pointer to a garbage collected box /// /// # Safety /// The requires a valid pointer to a box pub unsafe fn new(ptr: *const T) -> Gc { Gc { inner: ptr::NonNull::new_unchecked(ptr as *mut T), } } /// Unchecked cast to the passed type /// /// # Safety /// The requires the box to be of the asserted type pub unsafe fn cast(self) -> Gc { Gc { inner: self.inner.cast::(), } } /// Returns a pointer to the garbage collected box pub fn as_ptr(self) -> *const T { self.inner.as_ptr() } /// Returns a mutable to the garbage collected box pub(super) fn as_mut_ptr(self) -> *mut T { self.inner.as_ptr() } } impl PartialEq for Gc where T: PartialEq, { fn eq(&self, other: &Gc) -> bool { unsafe { *self.as_ptr() == *other.as_ptr() } } } impl Eq for Gc where T: Boxed + Eq {} impl hash::Hash for Gc where T: hash::Hash, { fn hash(&self, state: &mut H) { unsafe { (*self.as_ptr()).hash(state) } } } impl fmt::Debug for Gc where T: fmt::Debug, { fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { unsafe { (*self.as_ptr()).fmt(formatter) } } } macro_rules! define_marker_ref { ( $(#[$docs:meta])* $ref_name:ident ) => { $(#[$docs])* #[repr(transparent)] pub struct $ref_name { inner: ptr::NonNull, } impl Deref for $ref_name { type Target = T; fn deref(&self) -> &T { unsafe { self.inner.as_ref() } } } impl From<$ref_name> for Gc { fn from(marker_ref: $ref_name) -> Gc { Gc { inner: marker_ref.inner, } } } }; } define_marker_ref!( /// Special marker ref for parameters that are explicitly not captured /// /// This can be used for performance-sensitive functions where the compiler cannot prove the /// parameter can't be captured. NoCapture ); define_marker_ref!( /// Special marker ref for parameters that are explicitly captured /// /// Capturing GC managed values is usually not allowed as the captured values become invisible /// to the garbage collector and will become invalid on the next collection cycle. This is /// intended for use by special runtime functions that expose their captured values to the /// collector via an internal mechanism. Capture ); ================================================ FILE: runtime/boxed/types/char.rs ================================================ use std::fmt; use std::hash::{Hash, Hasher}; use crate::boxed::*; /// Boxed Unicode character /// /// This corresponds more precisely to a /// [Unicode scalar value](http://www.unicode.org/glossary/#unicode_scalar_value). #[repr(C, align(16))] pub struct Char { header: Header, value: char, } impl Boxed for Char {} impl UniqueTagged for Char {} impl Char { /// Constructs a new character pub fn new(heap: &mut impl AsHeap, value: char) -> Gc { heap.as_heap_mut().place_box(Char { header: Self::TYPE_TAG.to_heap_header(Self::size()), value, }) } /// Returns the box size for characters pub fn size() -> BoxSize { BoxSize::Size16 } /// Returns the unboxed value of this character pub fn value(&self) -> char { self.value } } impl PartialEq for Char { fn eq(&self, other: &Char) -> bool { self.value == other.value } } impl Hash for Char { fn hash(&self, state: &mut H) { Self::TYPE_TAG.hash(state); self.value().hash(state) } } impl fmt::Debug for Char { fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { write!(formatter, "Char({:?})", self.value) } } #[cfg(test)] mod test { use super::*; use crate::boxed::heap::Heap; use std::mem; #[test] fn sizes() { assert_eq!(16, mem::size_of::()); } #[test] fn equality() { let mut heap = Heap::empty(); let boxed_a1 = Char::new(&mut heap, 'a'); let boxed_a2 = Char::new(&mut heap, 'a'); let boxed_b = Char::new(&mut heap, 'b'); assert_ne!(boxed_a1, boxed_b); assert_eq!(boxed_a1, boxed_a2); } #[test] fn fmt_debug() { let mut heap = Heap::empty(); let boxed_a = Char::new(&mut heap, 'a'); assert_eq!("Char('a')", format!("{:?}", boxed_a)); } } ================================================ FILE: runtime/boxed/types/field_value.rs ================================================ use std::hash::{Hash, Hasher}; use std::ptr; use crate::boxed; use crate::boxed::prelude::*; use crate::boxed::refs::Gc; use crate::class_map; use crate::intern::InternedSym; /// Field within a record value pub enum FieldValue { /// Unboxed boolean Bool(bool), /// Unboxed Unicode character Char(char), /// Unboxed 64bit floating point value Float(f64), /// Unboxed 64bit signed integer Int(i64), /// Interned symbol InternedSym(InternedSym), /// Boxed garbage collected value Boxed(Gc), } pub(crate) enum FieldGcRef { InternedSym(&'static mut InternedSym), Boxed(&'static mut Gc), } impl PartialEqInHeap for FieldValue { fn eq_in_heap(&self, heap: &boxed::Heap, other: &FieldValue) -> bool { match (self, other) { (FieldValue::Bool(sv), FieldValue::Bool(ov)) => sv == ov, (FieldValue::Char(sv), FieldValue::Char(ov)) => sv == ov, (FieldValue::Float(sv), FieldValue::Float(ov)) => sv == ov, (FieldValue::Int(sv), FieldValue::Int(ov)) => sv == ov, (FieldValue::InternedSym(sv), FieldValue::InternedSym(ov)) => sv == ov, (FieldValue::Boxed(sv), FieldValue::Boxed(ov)) => sv.eq_in_heap(heap, ov), _ => false, } } } impl HashInHeap for FieldValue { fn hash_in_heap(&self, heap: &boxed::Heap, state: &mut H) { match self { FieldValue::Bool(v) => (*v).hash(state), FieldValue::Char(v) => (*v).hash(state), FieldValue::Float(v) => { // See `boxed::Float::hash` if *v == 0.0 { state.write_u64((0.0f64).to_bits()) } else { state.write_u64(v.to_bits()); } } FieldValue::Int(v) => (*v).hash(state), FieldValue::InternedSym(v) => (*v).hash(state), FieldValue::Boxed(v) => v.hash_in_heap(heap, state), } } } /// Iterates over fields in a record value pub struct FieldValueIter<'cm> { pub(super) classmap_field_iter: class_map::FieldIterator<'cm>, pub(super) record_data: *const u8, } impl<'cm> Iterator for FieldValueIter<'cm> { type Item = FieldValue; fn next(&mut self) -> Option { use class_map::FieldType; self.classmap_field_iter .next() .map(|classmap_field| unsafe { let field_base_ptr = self.record_data.add(classmap_field.offset()); match classmap_field.field_type() { FieldType::Bool => FieldValue::Bool(*(field_base_ptr as *const bool)), FieldType::Char => FieldValue::Char(*(field_base_ptr as *const char)), FieldType::Float => FieldValue::Float(*(field_base_ptr as *const f64)), FieldType::Int => FieldValue::Int(*(field_base_ptr as *const i64)), FieldType::InternedSym => { FieldValue::InternedSym(*(field_base_ptr as *const InternedSym)) } FieldType::Boxed => { FieldValue::Boxed(*(field_base_ptr as *const Gc)) } } }) } } pub(crate) struct FieldGcRefIter<'cm> { pub(super) classmap_field_iter: class_map::FieldIterator<'cm>, pub(super) record_data: *const u8, } impl<'cm> FieldGcRefIter<'cm> { pub(crate) fn empty() -> FieldGcRefIter<'static> { FieldGcRefIter { classmap_field_iter: class_map::FieldIterator::empty(), record_data: ptr::null(), } } } impl<'cm> Iterator for FieldGcRefIter<'cm> { type Item = FieldGcRef; fn next(&mut self) -> Option { while let Some(classmap_field) = self.classmap_field_iter.next() { unsafe { use class_map::FieldType; let field_base_ptr = self.record_data.add(classmap_field.offset()); match classmap_field.field_type() { FieldType::InternedSym => { return Some(FieldGcRef::InternedSym( &mut *(field_base_ptr as *mut InternedSym), )); } FieldType::Boxed => { return Some(FieldGcRef::Boxed( &mut *(field_base_ptr as *mut Gc), )); } _ => {} } } } None } } ================================================ FILE: runtime/boxed/types/float.rs ================================================ use std::fmt; use std::hash::{Hash, Hasher}; use crate::boxed::refs::Gc; use crate::boxed::*; /// Boxed 64bit floating point value #[repr(C, align(16))] pub struct Float { header: Header, value: f64, } impl Boxed for Float {} impl UniqueTagged for Float {} impl Float { /// Constructs a new float pub fn new(heap: &mut impl AsHeap, value: f64) -> Gc { heap.as_heap_mut().place_box(Float { header: Self::TYPE_TAG.to_heap_header(Self::size()), value, }) } /// Returns the box size for floats pub fn size() -> BoxSize { BoxSize::Size16 } /// Returns the unboxed value of this float pub fn value(&self) -> f64 { self.value } } impl PartialEq for Float { fn eq(&self, other: &Float) -> bool { self.value() == other.value() } } impl Hash for Float { fn hash(&self, state: &mut H) { Self::TYPE_TAG.hash(state); let value = self.value(); if value == 0.0 { // 0.0 == -0.0 so they need to hash to the same value state.write_u64((0.0f64).to_bits()) } else { // NaNs will mostly map to the same value which is allowed but also a collision state.write_u64(value.to_bits()); } } } impl fmt::Debug for Float { fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { write!(formatter, "Float({:?})", self.value) } } #[cfg(test)] mod test { use super::*; use crate::boxed::heap::Heap; use std::mem; fn calc_hash(value: f64) -> u64 { use std::collections::hash_map::DefaultHasher; let mut heap = Heap::empty(); let boxed_float = Float::new(&mut heap, value); let mut hasher = DefaultHasher::new(); boxed_float.hash(&mut hasher); hasher.finish() } #[test] fn sizes() { assert_eq!(16, mem::size_of::()); } #[test] fn equality() { let mut heap = Heap::empty(); let boxed_one1 = Float::new(&mut heap, 1.0); let boxed_one2 = Float::new(&mut heap, 1.0); let boxed_two = Float::new(&mut heap, 2.0); assert_ne!(boxed_one1, boxed_two); assert_eq!(boxed_one1, boxed_one2); } #[test] fn hash() { let minus_zero_hash = calc_hash(-0.0); let plus_zero_hash = calc_hash(0.0); let plus_one_hash = calc_hash(1.0); assert_ne!(plus_one_hash, minus_zero_hash); assert_eq!(plus_zero_hash, minus_zero_hash); } #[test] fn fmt_debug() { let mut heap = Heap::empty(); let boxed_one = Float::new(&mut heap, 1.0); assert_eq!("Float(1.0)", format!("{:?}", boxed_one)); } } ================================================ FILE: runtime/boxed/types/fun.rs ================================================ use std::fmt; use std::hash::{Hash, Hasher}; use crate::boxed::refs::Gc; use crate::boxed::*; use crate::task; /// Opaque type for a function's captures /// /// This has a meaning specific to the implementation of the function. This may be a dummy value /// (typically [`Nil`]) for functions that don't capture, a single boxed value or a collection of /// multiple boxed values. The only external contract is that it must be a boxed value to allow for /// garbage collection. pub type Captures = Gc; /// Entry point for executing a function pub type ThunkEntry = extern "C" fn(&mut task::Task, Captures, Gc) -> Gc; /// Boxed function value with optional captures /// /// This is typically used in places where functions are used as values or stored in collections. /// For example, placing a function in a list will create a `FunThunk`. When taking an function as a /// parameter to an RFI function it's typically better to use a typed /// [`callback::Callback`](crate::callback::Callback). #[repr(C, align(16))] pub struct FunThunk { header: Header, pub(crate) captures: Captures, entry: ThunkEntry, } impl Boxed for FunThunk {} impl UniqueTagged for FunThunk {} impl FunThunk { /// Constructs a new function value with the given captures and entry point pub fn new(heap: &mut impl AsHeap, captures: Captures, entry: ThunkEntry) -> Gc { heap.as_heap_mut().place_box(FunThunk { header: Self::TYPE_TAG.to_heap_header(Self::size()), captures, entry, }) } /// Returns the box size for functions pub fn size() -> BoxSize { BoxSize::Size32 } /// Applies this function on the passed task with the given arguments pub fn apply(&self, task: &mut task::Task, arg_list: Gc) -> Gc { (self.entry)(task, self.captures, arg_list) } } impl PartialEq for FunThunk { fn eq(&self, _: &FunThunk) -> bool { // There is no reliable way to compare functions so they're always inequal false } } impl Eq for FunThunk {} impl Hash for FunThunk { fn hash(&self, state: &mut H) { Self::TYPE_TAG.hash(state); state.write_usize(self as *const _ as usize); } } impl fmt::Debug for FunThunk { fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { write!(formatter, "FunThunk({:p})", self) } } #[cfg(test)] mod test { use super::*; use crate::boxed; use crate::boxed::heap::Heap; use std::mem; extern "C" fn identity_entry( _: &mut task::Task, _captures: Captures, rest: Gc, ) -> Gc { rest } extern "C" fn return_42_entry( task: &mut task::Task, _captures: Captures, _rest: Gc, ) -> Gc { Int::new(task, 32).as_any_ref() } #[test] fn sizes() { assert_eq!(32, mem::size_of::()); } #[test] fn equality() { let mut heap = Heap::empty(); let nil_captures = boxed::NIL_INSTANCE.as_any_ref(); let boxed_identity1 = FunThunk::new(&mut heap, nil_captures, identity_entry); let boxed_identity2 = FunThunk::new(&mut heap, nil_captures, identity_entry); let boxed_return = FunThunk::new(&mut heap, nil_captures, return_42_entry); assert_ne!(boxed_identity1, boxed_return); // We use pointer identity for now assert_ne!(boxed_identity1, boxed_identity2); } } ================================================ FILE: runtime/boxed/types/int.rs ================================================ use std::fmt; use std::hash::{Hash, Hasher}; use crate::boxed::refs::Gc; use crate::boxed::*; /// Boxed 64bit signed integer #[repr(C, align(16))] pub struct Int { header: Header, value: i64, } impl Boxed for Int {} impl UniqueTagged for Int {} impl Int { /// Constructs a new integer pub fn new(heap: &mut impl AsHeap, value: i64) -> Gc { heap.as_heap_mut().place_box(Int { header: Self::TYPE_TAG.to_heap_header(Self::size()), value, }) } /// Returns the box size for integers pub fn size() -> BoxSize { BoxSize::Size16 } /// Returns the unboxed value of this integer pub fn value(&self) -> i64 { self.value } } impl PartialEq for Int { fn eq(&self, other: &Int) -> bool { self.value() == other.value() } } impl Hash for Int { fn hash(&self, state: &mut H) { Self::TYPE_TAG.hash(state); self.value().hash(state) } } impl fmt::Debug for Int { fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { write!(formatter, "Int({:?})", self.value) } } #[cfg(test)] mod test { use super::*; use crate::boxed::heap::Heap; use std::mem; #[test] fn sizes() { assert_eq!(16, mem::size_of::()); } #[test] fn equality() { let mut heap = Heap::empty(); let boxed_one1 = Int::new(&mut heap, 1); let boxed_one2 = Int::new(&mut heap, 1); let boxed_two = Int::new(&mut heap, 2); assert_ne!(boxed_one1, boxed_two); assert_eq!(boxed_one1, boxed_one2); } #[test] fn fmt_debug() { let mut heap = Heap::empty(); let boxed_one = Int::new(&mut heap, 1); assert_eq!("Int(1)", format!("{:?}", boxed_one)); } } ================================================ FILE: runtime/boxed/types/list.rs ================================================ use std::fmt; use std::hash::{Hash, Hasher}; use std::iter::FusedIterator; use std::marker::PhantomData; use crate::abitype::{BoxedAbiType, EncodeBoxedAbiType}; use crate::boxed::refs::Gc; use crate::boxed::*; /// Non-empty list #[repr(C, align(16))] pub struct Pair { header: Header, list_len: i64, pub(crate) head: Gc, pub(crate) rest: Gc>, } impl Boxed for Pair {} impl EncodeBoxedAbiType for Pair where T: EncodeBoxedAbiType, { const BOXED_ABI_TYPE: BoxedAbiType = BoxedAbiType::Pair(&T::BOXED_ABI_TYPE); } impl Pair { /// Constructs a pair with the given `head` and `rest` pub fn new(heap: &mut impl AsHeap, head: Gc, rest: Gc>) -> Gc> { heap.as_heap_mut().place_box(Pair { header: Pair::TYPE_TAG.to_heap_header(Self::size()), head, rest, list_len: (rest.len() + 1) as i64, }) } /// Returns the box size for pairs pub fn size() -> BoxSize { BoxSize::Size32 } /// Returns the length of the list this pair is the head of /// /// Note that this must be at least 1. pub fn len(&self) -> usize { self.list_len as usize } /// Returns false pub fn is_empty(&self) -> bool { // This is to make Clippy happy since we have `len` false } /// Returns the head value pub fn head(&self) -> Gc { self.head } /// Returns the tail list pub fn rest(&self) -> Gc> { self.rest } /// Casts this pair to a non-empty list pub fn as_list_ref(&self) -> Gc> { unsafe { Gc::new(&*(self as *const _ as *const List)) } } } impl PartialEqInHeap for Pair { fn eq_in_heap(&self, heap: &Heap, rhs: &Pair) -> bool { self.head.eq_in_heap(heap, &rhs.head) && self.rest.eq_in_heap(heap, &rhs.rest) } } impl HashInHeap for Pair { fn hash_in_heap(&self, task: &Heap, state: &mut H) { TypeTag::Pair.hash(state); self.head().hash_in_heap(task, state); self.rest().hash_in_heap(task, state); } } impl fmt::Debug for Pair { fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { self.as_list_ref().fmt(formatter) } } /// List of boxed values /// /// This allows O(n) access to its elements. It has the benefit of allowing constant time prepends /// while sharing the tail of the existing list. #[repr(C, align(16))] pub struct List { header: Header, list_len: i64, phantom: PhantomData, } impl Boxed for List {} impl DistinctTagged for List { fn has_tag(type_tag: TypeTag) -> bool { [TypeTag::Pair, TypeTag::Nil].contains(&type_tag) } } impl EncodeBoxedAbiType for List where T: EncodeBoxedAbiType, { const BOXED_ABI_TYPE: BoxedAbiType = BoxedAbiType::List(&T::BOXED_ABI_TYPE); } /// Possible subtypes of [`List`] pub enum ListSubtype<'a, T: Boxed> { /// Non-empty list Pair(&'a Pair), /// Empty list Nil, } impl List { /// Constructs a new fixed sized list containing the passed `elems` pub fn new(heap: &mut impl AsHeap, elems: impl ExactSizeIterator>) -> Gc> { Self::new_with_tail(heap, elems, Self::empty()) } /// Constructs a list with a head of `elems` and the specified tail list pub fn new_with_tail( heap: &mut impl AsHeap, elems: impl ExactSizeIterator>, tail: Gc>, ) -> Gc> { let elems_len = elems.len(); let tail_len = tail.len(); if elems_len == 0 { return tail; } // Allocate the entire list at once let heap_alloc = heap .as_heap_mut() .alloc_cells(Pair::::size().cell_count() * elems_len); unsafe { let pair_alloc = heap_alloc as *mut Pair; for (i, head) in elems.enumerate() { let elems_remaining = elems_len - i; let rest = if elems_remaining == 1 { tail } else { (&*pair_alloc.add(i + 1)).as_list_ref() }; *pair_alloc.add(i) = Pair { header: Pair::TYPE_TAG.to_heap_header(Pair::::size()), head, rest, list_len: (elems_remaining + tail_len) as i64, }; } Gc::new(pair_alloc as *const List) } } /// Returns an empty list pub fn empty() -> Gc> { unsafe { Gc::new(&NIL_INSTANCE as *const Nil as *const List) } } /// Creates a list by constructing an iterator of values pub fn from_values( heap: &mut impl AsHeap, values: impl Iterator, cons: F, ) -> Gc> where F: Fn(&mut Heap, V) -> Gc, { let heap = heap.as_heap_mut(); let elems: Vec> = values.map(|v| cons(heap, v)).collect(); Self::new(heap, elems.into_iter()) } /// Returns a subtype of this list based on its type tag pub fn as_subtype(&self) -> ListSubtype<'_, T> { match self.header.type_tag { TypeTag::Pair => { ListSubtype::Pair(unsafe { &*(self as *const List as *const Pair) }) } TypeTag::Nil => ListSubtype::Nil, other => { unreachable!("Unexpected type tag: {:?}", other); } } } /// Returns the length of the list pub fn len(&self) -> usize { self.list_len as usize } /// Returns true if the list is empty pub fn is_empty(&self) -> bool { self.header.type_tag == TypeTag::Nil } /// Returns an iterator to the list's values pub fn iter(&self) -> ListIterator { ListIterator { head: unsafe { Gc::new(self as *const Self) }, } } } impl PartialEqInHeap for List { fn eq_in_heap(&self, heap: &Heap, other: &List) -> bool { if self.len() != other.len() { return false; } self.iter() .zip(other.iter()) .all(|(self_value, other_value)| self_value.eq_in_heap(heap, &other_value)) } } impl HashInHeap for List { fn hash_in_heap(&self, heap: &Heap, state: &mut H) { match self.as_subtype() { ListSubtype::Pair(pair) => pair.hash_in_heap(heap, state), ListSubtype::Nil => NIL_INSTANCE.hash_in_heap(heap, state), } } } impl fmt::Debug for List { fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { formatter.write_str("List(")?; formatter.debug_list().entries(self.iter()).finish()?; formatter.write_str(")") } } pub struct ListIterator { head: Gc>, } impl Iterator for ListIterator { type Item = Gc; fn next(&mut self) -> Option> { // If we use `head` directly the borrow checker gets suspicious let head = unsafe { &*(self.head.as_ptr()) }; match head.as_subtype() { ListSubtype::Pair(pair) => { self.head = pair.rest; Some(pair.head) } ListSubtype::Nil => None, } } fn size_hint(&self) -> (usize, Option) { (self.head.len(), Some(self.head.len())) } } impl ExactSizeIterator for ListIterator {} impl FusedIterator for ListIterator {} /// Empty list #[repr(C, align(16))] #[derive(Debug)] pub struct Nil { header: Header, list_len: usize, } /// Static constant instance of [`Nil`] #[export_name = "ARRET_NIL"] pub static NIL_INSTANCE: Nil = Nil { header: Header { type_tag: TypeTag::Nil, alloc_type: AllocType::Const, }, list_len: 0, }; impl Boxed for Nil {} impl UniqueTagged for Nil {} impl PartialEq for Nil { fn eq(&self, _: &Nil) -> bool { true } } impl Hash for Nil { fn hash(&self, state: &mut H) { Self::TYPE_TAG.hash(state); state.write_usize(&NIL_INSTANCE as *const _ as usize); } } #[cfg(test)] mod test { use super::*; use crate::boxed::heap::Heap; use crate::boxed::Int; use std::mem; #[test] fn sizes() { assert_eq!(16, mem::size_of::()); assert_eq!(16, mem::size_of::>()); assert_eq!(32, mem::size_of::>()); } #[test] fn equality() { use crate::boxed::Int; let mut heap = Heap::empty(); let forward_list1 = List::from_values(&mut heap, [1, 2, 3].iter().cloned(), Int::new); let forward_list2 = List::from_values(&mut heap, [1, 2, 3].iter().cloned(), Int::new); let reverse_list = List::from_values(&mut heap, [3, 2, 1].iter().cloned(), Int::new); assert!(!forward_list1.eq_in_heap(&heap, &reverse_list)); assert!(forward_list1.eq_in_heap(&heap, &forward_list2)); } #[test] fn fmt_debug() { let mut heap = Heap::empty(); let forward_list = List::from_values(&mut heap, [1, 2, 3].iter().cloned(), Int::new); assert_eq!( "List([Int(1), Int(2), Int(3)])", format!("{:?}", forward_list) ); } #[test] fn construct_and_iter() { let mut heap = Heap::empty(); let boxed_list = List::from_values(&mut heap, [1, 2, 3].iter().cloned(), Int::new); let mut boxed_list_iter = boxed_list.iter(); assert_eq!(3, boxed_list_iter.len()); for expected_num in &[1, 2, 3] { if let Some(boxed_int) = boxed_list_iter.next() { assert_eq!(*expected_num, boxed_int.value()); } else { panic!("Iterator unexpectedly ended"); } } assert_eq!(0, boxed_list_iter.len()); assert!(!boxed_list_iter.next().is_some()); } } ================================================ FILE: runtime/boxed/types/map.rs ================================================ use std::fmt; use std::hash::{Hash, Hasher}; use std::marker::PhantomData; use crate::abitype::{BoxedAbiType, EncodeBoxedAbiType}; use crate::boxed::refs::Gc; use crate::boxed::*; /// Immutable map of boxed values #[repr(C, align(16))] pub struct Map { header: Header, _key: PhantomData, _value: PhantomData, } impl Boxed for Map {} impl Map { /// Constructs a new map with the given values pub fn new( heap: &mut impl AsHeap, values: impl ExactSizeIterator, Gc)>, ) -> Gc> { if values.len() != 0 { todo!("non-empty maps"); } heap.as_heap_mut().place_box(Map { header: Map::TYPE_TAG.to_heap_header(Self::size()), _key: PhantomData, _value: PhantomData, }) } /// Constructs a map by constructing an iterator of values pub fn from_values( heap: &mut impl AsHeap, values: impl ExactSizeIterator, _cons: F, ) -> Gc> where F: Fn(&mut Heap, T) -> (Gc, Gc), { if values.len() != 0 { todo!("non-empty maps"); } heap.as_heap_mut().place_box(Map { header: Map::TYPE_TAG.to_heap_header(Self::size()), _key: PhantomData, _value: PhantomData, }) } /// Returns the box size for maps pub fn size() -> BoxSize { BoxSize::Size16 } /// Return if the map is empty pub fn is_empty(&self) -> bool { true } /// Returns the number of the entries in the map pub fn len(&self) -> usize { 0 } /// Returns an iterator over the entries in map pub fn iter(&self) -> impl Iterator, Gc)> + '_ { std::iter::empty() } } impl PartialEqInHeap for Map { fn eq_in_heap(&self, _heap: &Heap, _other: &Map) -> bool { // Both maps must be empty true } } impl HashInHeap for Map { fn hash_in_heap(&self, _heap: &Heap, state: &mut H) { TypeTag::Map.hash(state); } } impl fmt::Debug for Map { fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { formatter.write_str("Map(")?; formatter.debug_list().entries(self.iter()).finish()?; formatter.write_str(")") } } impl EncodeBoxedAbiType for Map where K: EncodeBoxedAbiType, V: EncodeBoxedAbiType, { const BOXED_ABI_TYPE: BoxedAbiType = BoxedAbiType::Map(&K::BOXED_ABI_TYPE, &V::BOXED_ABI_TYPE); } #[cfg(test)] mod test { use super::*; use std::mem; #[test] fn sizes() { assert_eq!(16, mem::size_of::>()); } } ================================================ FILE: runtime/boxed/types/mod.rs ================================================ pub mod char; pub mod field_value; pub mod float; pub mod fun; pub mod int; pub mod list; pub mod map; pub mod record; pub mod record_data; pub mod set; pub mod shared_str; pub mod str; pub mod sym; pub mod vector; ================================================ FILE: runtime/boxed/types/record.rs ================================================ use std::alloc; use std::hash::{Hash, Hasher}; use std::mem::MaybeUninit; use std::{fmt, mem}; use crate::boxed::refs::Gc; use crate::boxed::types::field_value::FieldGcRefIter; use crate::boxed::*; /// Numeric ID indicating which class the record belongs to /// /// This is used to distinguish record types before each other. pub type RecordClassId = u32; #[repr(C)] struct RecordHeader { header: Header, inline_byte_len: u8, may_contain_gc_refs: bool, class_id: RecordClassId, } /// Describes the storage of a record's data #[derive(Clone, Copy, Debug, PartialEq)] pub enum RecordStorage { /// Record data is stored inline in a box of the given size Inline(BoxSize), /// Record data is stored out-of-line in a 32 byte box External, } impl RecordStorage { /// Returns the box size for a record storage pub fn box_size(self) -> BoxSize { match self { RecordStorage::Inline(box_size) => box_size, RecordStorage::External => BoxSize::Size32, } } } /// User-defined record type #[repr(C, align(16))] pub struct Record { record_header: RecordHeader, padding: [u8; Record::MAX_INLINE_BYTES], } impl Boxed for Record {} impl Record { /// Maximum number of bytes that can be stored directly in a box pub const MAX_INLINE_BYTES: usize = 24; /// Inline byte length used for external vectors pub const EXTERNAL_INLINE_LEN: u8 = (Self::MAX_INLINE_BYTES as u8) + 1; /// Alignment of our inline record data in bytes const INLINE_DATA_ALIGNMENT: usize = 8; /// Constructs a new record of the given class and initialises it with the passed data pub fn new(heap: &mut impl AsHeap, class_id: RecordClassId, data: RecordData) -> Gc { let storage = Self::storage_for_data_layout(data.layout()); let box_size = storage.box_size(); let boxed = unsafe { match storage { RecordStorage::External => { mem::transmute(ExternalRecord::new(box_size, class_id, data)) } RecordStorage::Inline(_) => { mem::transmute(InlineRecord::new(box_size, class_id, data)) } } }; heap.as_heap_mut().place_box(boxed) } /// Returns the storage for given data layout pub fn storage_for_data_layout(data_layout: Option) -> RecordStorage { match data_layout { None => RecordStorage::Inline(BoxSize::Size16), Some(data_layout) => { if data_layout.align() > Self::INLINE_DATA_ALIGNMENT { // Requires more alignment than our inline data provides return RecordStorage::External; } match data_layout.size() { 0..=8 => RecordStorage::Inline(BoxSize::Size16), 9..=Record::MAX_INLINE_BYTES => RecordStorage::Inline(BoxSize::Size32), _ => RecordStorage::External, } } } } /// Returns the class ID for the record pub fn class_id(&self) -> RecordClassId { self.record_header.class_id } /// Returns an iterator over the record's field values pub fn field_values<'cm>(&self, heap: &'cm Heap) -> FieldValueIter<'cm> { let classmap_class = heap .type_info() .class_map() .class_for_record_class_id(self.class_id()); FieldValueIter { classmap_field_iter: classmap_class.field_iter(), record_data: self.data_ptr(), } } pub(crate) fn field_gc_refs<'cm>(&mut self, heap: &'cm Heap) -> FieldGcRefIter<'cm> { if !self.record_header.may_contain_gc_refs { return FieldGcRefIter::empty(); } let classmap_class = heap .type_info() .class_map() .class_for_record_class_id(self.class_id()); FieldGcRefIter { classmap_field_iter: classmap_class.field_iter(), record_data: self.data_ptr(), } } fn data_ptr(&self) -> *const u8 { match self.as_repr() { Repr::Inline(inline) => inline.inline_data.as_ptr() as *const u8, Repr::External(external) => external.external_data.as_ptr(), } } fn is_empty(&self) -> bool { self.record_header.inline_byte_len == 0 } fn is_inline(&self) -> bool { self.record_header.inline_byte_len <= Self::MAX_INLINE_BYTES as u8 } fn as_repr(&self) -> Repr<'_> { if self.is_inline() { Repr::Inline(unsafe { &*(self as *const Record as *const InlineRecord) }) } else { Repr::External(unsafe { &*(self as *const Record as *const ExternalRecord) }) } } fn as_repr_mut(&mut self) -> ReprMut<'_> { if self.is_inline() { ReprMut::Inline(unsafe { &mut *(self as *mut Record as *mut InlineRecord) }) } else { ReprMut::External(unsafe { &mut *(self as *mut Record as *mut ExternalRecord) }) } } } impl PartialEqInHeap for Record { fn eq_in_heap(&self, heap: &Heap, other: &Record) -> bool { if self.class_id() != other.class_id() { return false; } if self.is_empty() { return true; } self.field_values(heap) .zip(other.field_values(heap)) .all(|(self_field, other_field)| self_field.eq_in_heap(heap, &other_field)) } } impl HashInHeap for Record { fn hash_in_heap(&self, heap: &Heap, state: &mut H) { Self::TYPE_TAG.hash(state); self.class_id().hash(state); if !self.is_empty() { for field in self.field_values(heap) { field.hash_in_heap(heap, state); } } } } impl fmt::Debug for Record { fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { write!(formatter, "Record({:?})", self.class_id()) } } #[repr(C, align(16))] struct InlineRecord { record_header: RecordHeader, inline_data: MaybeUninit<[u8; Record::MAX_INLINE_BYTES]>, } impl InlineRecord { fn new(box_size: BoxSize, class_id: RecordClassId, data: RecordData) -> InlineRecord { let header = Record::TYPE_TAG.to_heap_header(box_size); unsafe { let mut inline_data = mem::MaybeUninit::<[u8; Record::MAX_INLINE_BYTES]>::uninit(); if let Some(data_layout) = data.layout() { ptr::copy( data.as_ptr(), inline_data.as_mut_ptr() as *mut _, data_layout.size(), ); } InlineRecord { record_header: RecordHeader { header, inline_byte_len: match data.layout() { Some(layout) => layout.size() as u8, None => 0, }, // This is conservative - we don't know if there are GC refs or not may_contain_gc_refs: data.layout().is_some(), class_id, }, inline_data, } } } } #[repr(C, align(16))] struct ExternalRecord { record_header: RecordHeader, external_data: RecordData, } impl ExternalRecord { fn new(box_size: BoxSize, class_id: RecordClassId, data: RecordData) -> ExternalRecord { let header = Record::TYPE_TAG.to_heap_header(box_size); ExternalRecord { record_header: RecordHeader { header, inline_byte_len: std::u8::MAX, // This is conservative - we don't know if there are GC refs or not may_contain_gc_refs: true, class_id, }, external_data: data, } } } enum Repr<'a> { Inline(&'a InlineRecord), External(&'a ExternalRecord), } enum ReprMut<'a> { Inline(&'a mut InlineRecord), External(&'a mut ExternalRecord), } impl Drop for Record { fn drop(&mut self) { match self.as_repr_mut() { ReprMut::Inline(_) => { // Do nothing here; we might've been allocated as a 16 byte box so we can't read // the whole thing. } ReprMut::External(external) => unsafe { ptr::drop_in_place(external) }, } } } #[cfg(test)] mod test { use super::*; use crate::boxed::heap::Heap; #[test] fn sizes() { assert_eq!(32, mem::size_of::()); assert_eq!(32, mem::size_of::()); assert_eq!(32, mem::size_of::()); } #[test] fn equality() { let mut heap = Heap::empty(); let record_class_one_instance1 = Record::new(&mut heap, 1, RecordData::empty()); let record_class_one_instance2 = Record::new(&mut heap, 1, RecordData::empty()); let record_class_two_instance1 = Record::new(&mut heap, 2, RecordData::empty()); assert!(record_class_one_instance1.eq_in_heap(&heap, &record_class_one_instance2)); assert!(!record_class_one_instance1.eq_in_heap(&heap, &record_class_two_instance1)); } #[test] fn fmt_debug() { let mut heap = Heap::empty(); let boxed_one = Record::new(&mut heap, 1, RecordData::empty()); assert_eq!("Record(1)", format!("{:?}", boxed_one)); } } ================================================ FILE: runtime/boxed/types/record_data.rs ================================================ use std::alloc; /// Allocation to store a record's data #[repr(C)] pub struct RecordData { data_ptr: *mut u8, compact_layout: u64, } impl RecordData { /// Constructs an empty record data pub fn empty() -> Self { Self::alloc(None) } /// Allocates record data for the given layout pub fn alloc(data_layout: Option) -> Self { unsafe { Self { data_ptr: match data_layout { Some(data_layout) => alloc::alloc(data_layout), None => std::ptr::null_mut(), }, compact_layout: Self::alloc_layout_to_compact(data_layout), } } } /// Returns a pointer to the record data pub fn as_ptr(&self) -> *const u8 { self.data_ptr } /// Returns a mutable pointer to the record data pub fn as_mut_ptr(&mut self) -> *mut u8 { self.data_ptr } /// Returns the layout for the record data, or `None` if the data is empty pub fn layout(&self) -> Option { Self::compact_to_alloc_layout(self.compact_layout) } /// Converts an [`alloc::Layout`] to a compact representation /// /// This is intended for use by the compiler. pub fn alloc_layout_to_compact(alloc_layout: Option) -> u64 { match alloc_layout { None => 0, Some(alloc_layout) => { // This allows for alignments up to 2^16 and sizes up to 2^48 ((alloc_layout.align() as u64) & 0xFFFF) | ((alloc_layout.size() as u64) << 16) } } } fn compact_to_alloc_layout(input: u64) -> Option { if input == 0 { None } else { let align = (input & 0xFFFF) as usize; let size = (input >> 16) as usize; unsafe { Some(alloc::Layout::from_size_align_unchecked(size, align)) } } } } impl Drop for RecordData { fn drop(&mut self) { if let Some(data_layout) = Self::compact_to_alloc_layout(self.compact_layout) { unsafe { alloc::dealloc(self.data_ptr as *mut u8, data_layout); } } } } #[cfg(test)] mod test { use super::*; #[test] fn test_alloc_layout_to_u64() { let u8_layout = alloc::Layout::new::(); let u32_layout = alloc::Layout::new::(); let u64_layout = alloc::Layout::new::(); let empty_array_layout = alloc::Layout::new::<[char; 0]>(); let large_array_layout = alloc::Layout::new::<[f64; 10000]>(); for layout in &[ u8_layout, u32_layout, u64_layout, empty_array_layout, large_array_layout, ] { assert_eq!( Some(*layout), RecordData::compact_to_alloc_layout(RecordData::alloc_layout_to_compact(Some( *layout ))), ) } assert_eq!( None, RecordData::compact_to_alloc_layout(RecordData::alloc_layout_to_compact(None)), ) } } ================================================ FILE: runtime/boxed/types/set.rs ================================================ use std::collections::hash_map::DefaultHasher; use std::hash::{Hash, Hasher}; use std::mem::MaybeUninit; use std::{fmt, marker, mem}; use crate::abitype::{BoxedAbiType, EncodeBoxedAbiType}; use crate::boxed::refs::Gc; use crate::boxed::*; const MAX_16BYTE_INLINE_LEN: usize = (16 - 8) / mem::size_of::>(); const MAX_32BYTE_INLINE_LEN: usize = (32 - 8) / mem::size_of::>(); /// Describes the storage of a set's data #[derive(Clone, Copy, Debug, PartialEq)] pub enum SetStorage { /// Set data is stored inline in a box of the given size Inline(BoxSize), /// Set data is stored out-of-line in a 32 byte box External, } impl SetStorage { /// Returns the box size for a set storage pub fn box_size(self) -> BoxSize { match self { SetStorage::Inline(box_size) => box_size, SetStorage::External => BoxSize::Size32, } } } /// Immutable set of boxed values /// /// This is semantically similar to a map of values to the unit type #[repr(C, align(16))] pub struct Set { header: Header, inline_len: u32, padding: [u8; 24], phantom: marker::PhantomData, } impl Boxed for Set {} impl Set { /// Maximum element length of an inline set pub const MAX_INLINE_LEN: usize = MAX_32BYTE_INLINE_LEN; /// Inline element length used for external sets pub const EXTERNAL_INLINE_LEN: u32 = (Self::MAX_INLINE_LEN as u32) + 1; /// Constructs a new set with the passed boxed values pub fn new(heap: &mut impl AsHeap, values: impl ExactSizeIterator>) -> Gc> { let heap = heap.as_heap_mut(); // Calculate the hash of our values let mut hashed_values: Vec<(u64, Gc)> = values .map(|v| { let mut state = DefaultHasher::new(); v.hash_in_heap(heap, &mut state); (state.finish(), v) }) .collect(); // Make the values sorted and unique hashed_values.sort_by_key(|(hash, _)| *hash); hashed_values .dedup_by(|(hash1, v1), (hash2, v2)| hash1 == hash2 && v1.eq_in_heap(heap, v2)); let storage = Self::storage_for_element_len(hashed_values.len()); let header = Set::TYPE_TAG.to_heap_header(storage.box_size()); let boxed = unsafe { match storage { SetStorage::External => mem::transmute(ExternalSet::new(header, hashed_values)), SetStorage::Inline(_) => mem::transmute(InlineSet::new(header, hashed_values)), } }; heap.place_box(boxed) } /// Returns the storage for given element length fn storage_for_element_len(len: usize) -> SetStorage { const MIN_32BYTE_INLINE_LEN: usize = MAX_16BYTE_INLINE_LEN + 1; match len { 0..=MAX_16BYTE_INLINE_LEN => SetStorage::Inline(BoxSize::Size16), MIN_32BYTE_INLINE_LEN..=MAX_32BYTE_INLINE_LEN => SetStorage::Inline(BoxSize::Size32), _ => { // Too big to fit inline; this needs to be external SetStorage::External } } } /// Constructs a set by constructing an iterator of values pub fn from_values( heap: &mut impl AsHeap, values: impl Iterator, cons: F, ) -> Gc> where F: Fn(&mut Heap, V) -> Gc, { let heap = heap.as_heap_mut(); let elems: Vec> = values.map(|v| cons(heap, v)).collect(); Self::new(heap, elems.into_iter()) } fn is_inline(&self) -> bool { self.inline_len <= (Self::MAX_INLINE_LEN as u32) } fn as_repr(&self) -> Repr<'_, T> { if self.is_inline() { Repr::Inline(unsafe { &*(self as *const Set as *const InlineSet) }) } else { Repr::External(unsafe { &*(self as *const Set as *const ExternalSet) }) } } fn as_repr_mut(&mut self) -> ReprMut<'_, T> { if self.is_inline() { ReprMut::Inline(unsafe { &mut *(self as *mut Set as *mut InlineSet) }) } else { ReprMut::External(unsafe { &mut *(self as *mut Set as *mut ExternalSet) }) } } /// Returns the length of the set pub fn len(&self) -> usize { match self.as_repr() { Repr::Inline(inline) => inline.len(), Repr::External(external) => external.len(), } } /// Returns true if the set is empty pub fn is_empty(&self) -> bool { self.inline_len == 0 } /// Returns true if the passed value is included in the set pub fn contains(&self, heap: &Heap, value: &Gc) -> bool { match self.as_repr() { Repr::Inline(inline) => inline.contains(heap, value), Repr::External(external) => external.contains(heap, value), } } /// Returns an iterator over the set pub fn iter<'a>(&'a self) -> Box> + 'a> { // TODO: It would be nice not to box here match self.as_repr() { Repr::Inline(inline) => Box::new(inline.iter()), Repr::External(external) => Box::new(external.iter().copied()), } } /// Returns if this set is a subset of the passed set pub fn is_subset(&self, heap: &Heap, other: &Set) -> bool { match (self.as_repr(), other.as_repr()) { (Repr::External(external_self), Repr::External(external_other)) => { // Use optimised external/external logic external_self.is_subset(heap, external_other) } _ => { if self.len() > other.len() { return false; } for self_value in self.iter() { if !other.contains(heap, &self_value) { return false; } } true } } } } impl PartialEqInHeap for Set { fn eq_in_heap(&self, heap: &Heap, other: &Set) -> bool { match (self.as_repr(), other.as_repr()) { (Repr::Inline(self_inline), Repr::Inline(other_inline)) => { self_inline.eq_in_heap(heap, other_inline) } (Repr::External(self_external), Repr::External(other_external)) => { self_external.eq_in_heap(heap, other_external) } _ => false, } } } impl HashInHeap for Set { fn hash_in_heap(&self, heap: &Heap, state: &mut H) { match self.as_repr() { Repr::Inline(inline) => inline.hash_in_heap(heap, state), Repr::External(external) => external.hash_in_heap(heap, state), } } } impl fmt::Debug for Set { fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { formatter.write_str("Set(")?; formatter.debug_list().entries(self.iter()).finish()?; formatter.write_str(")") } } impl EncodeBoxedAbiType for Set where T: EncodeBoxedAbiType, { const BOXED_ABI_TYPE: BoxedAbiType = BoxedAbiType::Set(&T::BOXED_ABI_TYPE); } #[repr(C, align(16))] pub struct InlineSet { header: Header, inline_len: u32, values: [MaybeUninit>; MAX_32BYTE_INLINE_LEN], } impl InlineSet { fn new(header: Header, hashed_values: Vec<(u64, Gc)>) -> InlineSet { let inline_len = hashed_values.len(); let mut inline_values = [MaybeUninit::uninit(); MAX_32BYTE_INLINE_LEN]; for (inline_value, (_, value)) in inline_values.iter_mut().zip(hashed_values) { *inline_value = MaybeUninit::new(value); } InlineSet { header, inline_len: inline_len as u32, values: inline_values, } } fn len(&self) -> usize { self.inline_len as usize } fn iter(&self) -> impl ExactSizeIterator> + '_ { self.values[0..self.inline_len as usize] .iter() .map(|value| unsafe { value.assume_init() }) } fn contains(&self, heap: &Heap, value: &Gc) -> bool { self.iter().any(|v| v.eq_in_heap(heap, value)) } fn eq_in_heap(&self, heap: &Heap, other: &InlineSet) -> bool { if self.len() != other.len() { return false; } self.iter() .zip(other.iter()) .all(|(self_value, other_value)| self_value.eq_in_heap(heap, &other_value)) } fn hash_in_heap(&self, heap: &Heap, state: &mut H) { TypeTag::Set.hash(state); state.write_usize(self.len()); for value in self.iter() { value.hash_in_heap(heap, state); } } } #[repr(C, align(16))] pub struct ExternalSet { header: Header, inline_len: u32, sorted_hashed_values: Vec<(u64, Gc)>, } impl ExternalSet { fn new(header: Header, sorted_hashed_values: Vec<(u64, Gc)>) -> ExternalSet { ExternalSet { header, inline_len: Set::::EXTERNAL_INLINE_LEN, sorted_hashed_values, } } fn len(&self) -> usize { self.sorted_hashed_values.len() } fn iter(&self) -> impl ExactSizeIterator> { self.sorted_hashed_values.iter().map(|(_, v)| v) } fn contains(&self, heap: &Heap, needle_value: &Gc) -> bool { // Hash our value let mut state = DefaultHasher::new(); needle_value.hash_in_heap(heap, &mut state); let needle_hash = state.finish(); // Do a binary search for the index // This will return an arbitrary matching index if there are multiple matches let arbitrary_index = if let Ok(i) = self .sorted_hashed_values .binary_search_by_key(&needle_hash, |(haystack_hash, _)| *haystack_hash) { i } else { return false; }; // Search forwards through hash collisions, including the arbitrary index let mut forwards_index = arbitrary_index; loop { let (hackstack_hash, haystack_value) = self.sorted_hashed_values[forwards_index]; if hackstack_hash != needle_hash { break; } if haystack_value.eq_in_heap(heap, needle_value) { return true; } forwards_index += 1; if forwards_index >= self.sorted_hashed_values.len() { break; } } // Search backwards through hash collisions let mut backwards_index = arbitrary_index; while backwards_index > 0 { backwards_index -= 1; let (hackstack_hash, haystack_value) = self.sorted_hashed_values[backwards_index]; if hackstack_hash != needle_hash { break; } if haystack_value.eq_in_heap(heap, needle_value) { return true; } } false } /// Returns if this set is a subset of the passed set fn is_subset(&self, heap: &Heap, other: &ExternalSet) -> bool { let mut self_iter = self.sorted_hashed_values.iter(); let mut other_iter = other.sorted_hashed_values.iter(); loop { let (self_hash, self_value) = if let Some(entry) = self_iter.next() { entry } else { // No more elements left to check return true; }; // Try to find the element in the other set loop { let (other_hash, other_value) = if let Some(entry) = other_iter.next() { entry } else { // Ran past the end of the other set return false; }; if self_iter.len() > other_iter.len() { // Not enough items remaining in the other set return false; } else if other_hash == self_hash && other_value.eq_in_heap(heap, self_value) { // Found corresponding element break; } else if other_hash > self_hash { // We've gone past where the corresponding element should be return false; } } } } fn eq_in_heap(&self, heap: &Heap, other: &ExternalSet) -> bool { if self.len() != other.len() { return false; } self.sorted_hashed_values .iter() .zip(other.sorted_hashed_values.iter()) .all(|((self_hash, self_value), (other_hash, other_value))| { self_hash == other_hash && self_value.eq_in_heap(heap, other_value) }) } fn hash_in_heap(&self, _: &Heap, state: &mut H) { TypeTag::Set.hash(state); state.write_usize(self.len()); // Instead of recursing into values, use their pre-calculated hash for (hash, _) in self.sorted_hashed_values.iter() { state.write_u64(*hash); } } } enum Repr<'a, T: Boxed> { Inline(&'a InlineSet), External(&'a ExternalSet), } enum ReprMut<'a, T: Boxed> { Inline(&'a mut InlineSet), External(&'a mut ExternalSet), } impl Drop for Set { fn drop(&mut self) { match self.as_repr_mut() { ReprMut::Inline(_) => { // Do nothing here; we might've been allocated as a 16 byte box so we can't read // the whole thing. } ReprMut::External(external) => unsafe { // Call `ExternalSet`'s drop implementation ptr::drop_in_place(external); }, } } } #[cfg(test)] mod test { use super::*; use crate::boxed::heap::Heap; use std::mem; #[test] fn sizes() { assert_eq!(32, mem::size_of::>()); assert_eq!(32, mem::size_of::>()); assert_eq!(32, mem::size_of::>()); } #[test] fn inline_equality() { use crate::boxed::Int; let mut heap = Heap::empty(); let boxed1 = Int::new(&mut heap, 1); let boxed2 = Int::new(&mut heap, 2); let boxed3 = Int::new(&mut heap, 3); let forward_set1 = Set::new(&mut heap, IntoIterator::into_iter([boxed1, boxed2, boxed3])); let forward_set2 = Set::new( &mut heap, IntoIterator::into_iter([boxed1, boxed2, boxed2, boxed3]), ); let reverse_set = Set::new(&mut heap, IntoIterator::into_iter([boxed3, boxed2, boxed1])); let partial_set = Set::new(&mut heap, IntoIterator::into_iter([boxed1, boxed3])); assert!(forward_set1.eq_in_heap(&heap, &reverse_set)); assert!(forward_set1.eq_in_heap(&heap, &forward_set2)); assert!(!forward_set1.eq_in_heap(&heap, &partial_set)); } #[test] fn inline_contains() { use crate::boxed::Int; let mut heap = Heap::empty(); let boxed1 = Int::new(&mut heap, 1); let boxed2 = Int::new(&mut heap, 2); let boxed3 = Int::new(&mut heap, 3); let boxed4 = Int::new(&mut heap, 4); let boxed5 = Int::new(&mut heap, 5); let empty_set = Set::::new(&mut heap, std::iter::empty()); let odd_set = Set::new(&mut heap, IntoIterator::into_iter([boxed1, boxed3, boxed5])); let even_set = Set::new(&mut heap, IntoIterator::into_iter([boxed2, boxed4])); assert!(!empty_set.contains(&heap, &boxed1)); assert!(odd_set.contains(&heap, &boxed1)); assert!(!even_set.contains(&heap, &boxed1)); assert!(!empty_set.contains(&heap, &boxed2)); assert!(!odd_set.contains(&heap, &boxed2)); assert!(even_set.contains(&heap, &boxed2)); assert!(!empty_set.contains(&heap, &boxed3)); assert!(odd_set.contains(&heap, &boxed3)); assert!(!even_set.contains(&heap, &boxed3)); } #[test] fn external_equality() { use crate::boxed::Int; let mut heap = Heap::empty(); let boxed1 = Int::new(&mut heap, 1); let boxed2 = Int::new(&mut heap, 2); let boxed3 = Int::new(&mut heap, 3); let boxed4 = Int::new(&mut heap, 4); let boxed5 = Int::new(&mut heap, 5); let forward_set = Set::new( &mut heap, IntoIterator::into_iter([boxed1, boxed2, boxed3, boxed4, boxed5]), ); let reverse_set = Set::new( &mut heap, IntoIterator::into_iter([boxed5, boxed4, boxed3, boxed2, boxed1]), ); let inline_set = Set::new( &mut heap, IntoIterator::into_iter([boxed1, boxed2, boxed3, boxed4]), ); let empty_set = Set::::new(&mut heap, std::iter::empty()); assert!(forward_set.eq_in_heap(&heap, &reverse_set)); assert!(!forward_set.eq_in_heap(&heap, &inline_set)); assert!(!forward_set.eq_in_heap(&heap, &empty_set)); } #[test] fn external_contains() { use crate::boxed::Int; let mut heap = Heap::empty(); let boxed1 = Int::new(&mut heap, 1); let boxed2 = Int::new(&mut heap, 2); let boxed3 = Int::new(&mut heap, 3); let boxed4 = Int::new(&mut heap, 4); let boxed5 = Int::new(&mut heap, 5); let boxed6 = Int::new(&mut heap, 6); let boxed7 = Int::new(&mut heap, 7); let boxed8 = Int::new(&mut heap, 8); let empty_set = Set::::new(&mut heap, std::iter::empty()); let odd_set = Set::new( &mut heap, IntoIterator::into_iter([boxed1, boxed3, boxed5, boxed7]), ); let even_set = Set::new( &mut heap, IntoIterator::into_iter([boxed2, boxed4, boxed6, boxed8]), ); assert!(!empty_set.contains(&heap, &boxed1)); assert!(odd_set.contains(&heap, &boxed1)); assert!(!even_set.contains(&heap, &boxed1)); assert!(!empty_set.contains(&heap, &boxed2)); assert!(!odd_set.contains(&heap, &boxed2)); assert!(even_set.contains(&heap, &boxed2)); assert!(!empty_set.contains(&heap, &boxed3)); assert!(odd_set.contains(&heap, &boxed3)); assert!(!even_set.contains(&heap, &boxed3)); } #[test] fn subset() { use crate::boxed::Int; let mut heap = Heap::empty(); let boxed1 = Int::new(&mut heap, 1); let boxed2 = Int::new(&mut heap, 2); let boxed3 = Int::new(&mut heap, 3); let boxed4 = Int::new(&mut heap, 4); let boxed5 = Int::new(&mut heap, 5); let boxed6 = Int::new(&mut heap, 6); let boxed7 = Int::new(&mut heap, 7); let boxed8 = Int::new(&mut heap, 8); let empty_set = Set::::new(&mut heap, std::iter::empty()); let one_set = Set::new(&mut heap, IntoIterator::into_iter([boxed1])); let odd_set = Set::new( &mut heap, IntoIterator::into_iter([boxed1, boxed3, boxed5, boxed7]), ); let even_set = Set::new( &mut heap, IntoIterator::into_iter([boxed2, boxed4, boxed6, boxed8]), ); let full_set = Set::new( &mut heap, vec![ boxed1, boxed2, boxed3, boxed4, boxed5, boxed6, boxed7, boxed8, ] .into_iter(), ); assert!(empty_set.is_subset(&heap, &empty_set)); assert!(empty_set.is_subset(&heap, &one_set)); assert!(empty_set.is_subset(&heap, &odd_set)); assert!(empty_set.is_subset(&heap, &even_set)); assert!(empty_set.is_subset(&heap, &full_set)); assert!(!one_set.is_subset(&heap, &empty_set)); assert!(one_set.is_subset(&heap, &one_set)); assert!(one_set.is_subset(&heap, &odd_set)); assert!(!one_set.is_subset(&heap, &even_set)); assert!(one_set.is_subset(&heap, &full_set)); assert!(!odd_set.is_subset(&heap, &empty_set)); assert!(!odd_set.is_subset(&heap, &one_set)); assert!(odd_set.is_subset(&heap, &odd_set)); assert!(!odd_set.is_subset(&heap, &even_set)); assert!(odd_set.is_subset(&heap, &full_set)); assert!(!even_set.is_subset(&heap, &empty_set)); assert!(!even_set.is_subset(&heap, &one_set)); assert!(!even_set.is_subset(&heap, &odd_set)); assert!(even_set.is_subset(&heap, &even_set)); assert!(even_set.is_subset(&heap, &full_set)); assert!(!full_set.is_subset(&heap, &empty_set)); assert!(!full_set.is_subset(&heap, &one_set)); assert!(!full_set.is_subset(&heap, &odd_set)); assert!(!full_set.is_subset(&heap, &even_set)); assert!(full_set.is_subset(&heap, &full_set)); } } ================================================ FILE: runtime/boxed/types/shared_str.rs ================================================ use std::sync::atomic::{fence, AtomicU64, Ordering}; use std::{alloc, ptr}; /// Reference count used for global constants created by codegen const GLOBAL_CONSTANT_REFCOUNT: u64 = std::u64::MAX; /// Header for shared string data /// /// This is only separated out to make it easier to calculate allocation sizes. #[repr(C)] struct DataHeader { ref_count: AtomicU64, len: u64, } /// Internal shared string data #[repr(C)] struct SharedStrData { header: DataHeader, // This is actually variable length data: [u8; 1], } impl SharedStrData { fn new(value: &str) -> *mut SharedStrData { unsafe { let layout = Self::layout_for_byte_len(value.len()); let shared_str = alloc::alloc(layout) as *mut SharedStrData; (*shared_str).header = DataHeader { ref_count: AtomicU64::new(1), len: value.len() as u64, }; ptr::copy( value.as_ptr(), &mut (*shared_str).data[0] as *mut _, value.len(), ); shared_str } } fn as_str(&self) -> &str { unsafe { let utf8 = std::slice::from_raw_parts(&self.data[0], self.header.len as usize); std::str::from_utf8_unchecked(utf8) } } fn layout_for_byte_len(len: usize) -> alloc::Layout { alloc::Layout::new::() // We have a static length of 1 so we need to allocate at least that .extend(alloc::Layout::array::(std::cmp::max(1, len)).unwrap()) .unwrap() .0 .pad_to_align() } fn is_global_constant(&self) -> bool { // This doesn't need any ordering; constants can't become non-constant and vice-versa self.header.ref_count.load(Ordering::Relaxed) == GLOBAL_CONSTANT_REFCOUNT } /// Atomically takes a new reference to the string data fn take_ref(&mut self) -> *mut SharedStrData { if self.is_global_constant() { return self; } // In the case of refing to pass to another thread is sufficient to make the refcount // increment itself visible. In the case of one thread incrementing and then decrementing // later the decrement itself will enforce memory ordering. This ensures other threads // won't falsely destroy the instance. self.header.ref_count.fetch_add(1, Ordering::Relaxed); self } /// Atomically releases a reference to the string data /// /// It's unsafe to use the string data after calling this function. unsafe fn release_ref(&mut self) { if self.is_global_constant() { return; } let should_destroy = self.header.ref_count.fetch_sub(1, Ordering::Release) == 1; if should_destroy { // Make sure the memory operations from this delete are strictly after the fetch_sub fence(Ordering::Acquire); alloc::dealloc( self as *mut Self as *mut u8, Self::layout_for_byte_len(self.header.len as usize), ); } } } /// Smart pointer for string data /// /// This is morally equivalent to `Arc` except it has a fixed FFI representation. This allows /// codegen to create `SharedStr` instances. #[repr(transparent)] pub(super) struct SharedStr { data: *mut SharedStrData, } impl SharedStr { pub(super) fn new(value: &str) -> SharedStr { SharedStr { data: SharedStrData::new(value), } } /// Returns the shared string slice pub(super) fn as_str(&self) -> &str { unsafe { (*self.data).as_str() } } } impl From<&str> for SharedStr { fn from(value: &str) -> SharedStr { SharedStr::new(value) } } impl Clone for SharedStr { fn clone(&self) -> SharedStr { unsafe { SharedStr { data: (*self.data).take_ref(), } } } } impl Drop for SharedStr { fn drop(&mut self) { unsafe { (*self.data).release_ref(); } } } #[cfg(test)] mod test { use super::*; #[test] fn construction() { let empty = SharedStr::new(""); assert_eq!("", empty.as_str()); let hello_world = SharedStr::new("Hello, world!"); assert_eq!("Hello, world!", hello_world.as_str()); } #[test] fn cloning() { #[allow(clippy::redundant_clone)] let hello_clone = SharedStr::new("Hello, clone!").clone(); assert_eq!("Hello, clone!", hello_clone.as_str()); } } ================================================ FILE: runtime/boxed/types/str.rs ================================================ use std::hash::{Hash, Hasher}; use std::mem::MaybeUninit; use std::{fmt, mem, ptr}; use crate::boxed::types::shared_str::SharedStr; use crate::boxed::*; /// Describes the storage of a string's data #[derive(Clone, Copy, Debug, PartialEq)] pub enum StrStorage { /// String data is stored inline in a box of the given size Inline(BoxSize), /// String data is stored out-of-line in a 16 byte box External, } impl StrStorage { /// Returns the box size for a string storage pub fn box_size(self) -> BoxSize { match self { StrStorage::Inline(box_size) => box_size, StrStorage::External => BoxSize::Size16, } } } /// String value encoded as UTF-8 #[repr(C, align(16))] pub struct Str { header: Header, inline_byte_len: u8, padding: [u8; Str::MAX_INLINE_BYTES], } impl Boxed for Str {} impl UniqueTagged for Str {} impl Str { /// Maximum number of bytes that can be stored directly in the box pub const MAX_INLINE_BYTES: usize = 29; /// Inline byte length used for external strings pub const EXTERNAL_INLINE_BYTE_LEN: u8 = (Self::MAX_INLINE_BYTES as u8) + 1; /// Constructs a new string pub fn new(heap: &mut impl AsHeap, value: &str) -> Gc { let storage = Self::storage_for_byte_len(value.len()); let header = Self::TYPE_TAG.to_heap_header(storage.box_size()); let boxed = unsafe { match storage { StrStorage::External => mem::transmute(ExternalStr::new(header, value)), StrStorage::Inline(_) => mem::transmute(InlineStr::new(header, value)), } }; heap.as_heap_mut().place_box(boxed) } /// Returns the storage for given string byte length pub fn storage_for_byte_len(len: usize) -> StrStorage { match len { 0..=13 => StrStorage::Inline(BoxSize::Size16), 14..=Str::MAX_INLINE_BYTES => StrStorage::Inline(BoxSize::Size32), _ => { // Too big to fit inline; this needs to be external StrStorage::External } } } fn is_inline(&self) -> bool { self.inline_byte_len != Self::EXTERNAL_INLINE_BYTE_LEN as u8 } fn as_repr(&self) -> Repr<'_> { if self.is_inline() { Repr::Inline(unsafe { &*(self as *const Str as *const InlineStr) }) } else { Repr::External(unsafe { &*(self as *const Str as *const ExternalStr) }) } } fn as_repr_mut(&mut self) -> ReprMut<'_> { if self.is_inline() { ReprMut::Inline(unsafe { &mut *(self as *mut Str as *mut InlineStr) }) } else { ReprMut::External(unsafe { &mut *(self as *mut Str as *mut ExternalStr) }) } } /// Returns the string's content as a slice pub fn as_str(&self) -> &str { match self.as_repr() { Repr::Inline(inline) => inline.as_str(), Repr::External(external) => external.shared_str.as_str(), } } } impl PartialEq for Str { fn eq(&self, other: &Str) -> bool { self.as_str() == other.as_str() } } impl Hash for Str { fn hash(&self, state: &mut H) { Self::TYPE_TAG.hash(state); self.as_str().hash(state); } } impl fmt::Debug for Str { fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { write!(formatter, "Str({:?})", self.as_str()) } } impl Drop for Str { fn drop(&mut self) { match self.as_repr_mut() { ReprMut::Inline(_) => { // Do nothing here; we might've been allocated as a 16 byte box so we can't read // the whole thing. } ReprMut::External(external) => unsafe { ptr::drop_in_place(external); }, } } } #[repr(C, align(16))] struct InlineStr { header: Header, inline_byte_len: u8, inline_bytes: MaybeUninit<[u8; Str::MAX_INLINE_BYTES]>, } impl InlineStr { fn new(header: Header, value: &str) -> InlineStr { unsafe { let mut inline_bytes = mem::MaybeUninit::<[u8; Str::MAX_INLINE_BYTES]>::uninit(); ptr::copy( value.as_ptr(), inline_bytes.as_mut_ptr() as *mut _, value.len(), ); InlineStr { header, inline_byte_len: value.len() as u8, inline_bytes, } } } fn as_utf8(&self) -> &[u8] { use std::slice; unsafe { slice::from_raw_parts( self.inline_bytes.as_ptr() as *const u8, self.inline_byte_len as usize, ) } } fn as_str(&self) -> &str { use std::str; unsafe { str::from_utf8_unchecked(self.as_utf8()) } } } #[repr(C, align(16))] struct ExternalStr { header: Header, // Once we've determined we're not inline this has no useful value inline_byte_len: u8, shared_str: SharedStr, padding: [u64; 2], } impl ExternalStr { fn new(header: Header, value: &str) -> ExternalStr { ExternalStr { header, inline_byte_len: Str::EXTERNAL_INLINE_BYTE_LEN, shared_str: value.into(), padding: [0, 0], } } } enum Repr<'a> { Inline(&'a InlineStr), External(&'a ExternalStr), } enum ReprMut<'a> { Inline(&'a mut InlineStr), External(&'a mut ExternalStr), } #[cfg(test)] mod test { use super::*; use crate::boxed::heap::Heap; use std::mem; #[test] fn sizes() { assert_eq!(32, mem::size_of::()); assert_eq!(32, mem::size_of::()); assert_eq!(32, mem::size_of::()); } #[test] fn equality() { let mut heap = Heap::empty(); let boxed_one1 = Str::new(&mut heap, "one"); let boxed_one2 = Str::new(&mut heap, "one"); let boxed_two = Str::new(&mut heap, "two"); assert_ne!(boxed_one1, boxed_two); assert_eq!(boxed_one1, boxed_one2); } #[test] fn fmt_debug() { let mut heap = Heap::empty(); let boxed_one = Str::new(&mut heap, "one"); assert_eq!(r#"Str("one")"#, format!("{:?}", boxed_one)); } #[test] fn round_trip() { let mut heap = Heap::empty(); for &test_str in &[ "", "1", "smallinline", "largerinlinethattakes32bytes", "This definitely will not fit in any inline string", ] { let boxed_string = Str::new(&mut heap, test_str); assert_eq!(test_str, boxed_string.as_str()); } } } ================================================ FILE: runtime/boxed/types/sym.rs ================================================ use std::fmt; use std::hash::{Hash, Hasher}; use crate::boxed::*; use crate::intern::{AsInterner, InternedSym}; /// Interned symbol /// /// Symbols are immutable strings typically used as keywords or identifiers. #[repr(C, align(16))] pub struct Sym { header: Header, // TODO: We have room to fit a u32 hash value here which should help with re-interning heap // indexed symbols in new heaps pub(crate) interned: InternedSym, } impl Boxed for Sym {} impl UniqueTagged for Sym {} impl Sym { /// Constructs a new symbol with a specified name pub fn new(heap: &mut impl AsHeap, value: &str) -> Gc { let heap = heap.as_heap_mut(); let interned = heap.type_info_mut().interner_mut().intern(value); Self::from_interned_sym(heap, interned) } /// Constructs a new symbol with an interned symbol pub fn from_interned_sym(heap: &mut impl AsHeap, interned: InternedSym) -> Gc { let heap = heap.as_heap_mut(); heap.place_box(Sym { header: Self::TYPE_TAG.to_heap_header(Self::size()), interned, }) } /// Returns the box size for symbols pub fn size() -> BoxSize { BoxSize::Size16 } /// Returns the name of the symbol /// /// `interner` is required to unintern the name. It must be the same interner used to construct /// the symbol. pub fn name<'a>(&'a self, interner: &'a impl AsInterner) -> &'a str { interner.as_interner().unintern(&self.interned) } /// Returns the interned symbol value pub fn interned(&self) -> InternedSym { self.interned } /// Returns a mutable reference to the interned symbol value pub(crate) fn interned_mut(&mut self) -> &mut InternedSym { &mut self.interned } } impl PartialEq for Sym { fn eq(&self, other: &Sym) -> bool { self.interned == other.interned } } impl Hash for Sym { fn hash(&self, state: &mut H) { Self::TYPE_TAG.hash(state); self.interned.hash(state); } } impl fmt::Debug for Sym { fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { write!(formatter, "Sym({:?})", self.interned) } } #[cfg(test)] mod test { use super::*; use crate::boxed::heap::Heap; use std::mem; #[test] fn sizes() { assert_eq!(16, mem::size_of::()); } #[test] fn equality() { let mut heap = Heap::empty(); let boxed_one1 = Sym::new(&mut heap, "one"); let boxed_one2 = Sym::new(&mut heap, "one"); let boxed_two = Sym::new(&mut heap, "two"); assert_ne!(boxed_one1, boxed_two); assert_eq!(boxed_one1, boxed_one2); } #[test] fn fmt_debug() { let mut heap = Heap::empty(); let boxed_one = Sym::new(&mut heap, "one"); assert_eq!(r#"Sym('one)"#, format!("{:?}", boxed_one)); } } ================================================ FILE: runtime/boxed/types/vector.rs ================================================ use std::hash::{Hash, Hasher}; use std::mem::MaybeUninit; use std::{fmt, marker, mem}; use crate::abitype::{BoxedAbiType, EncodeBoxedAbiType}; use crate::boxed::refs::Gc; use crate::boxed::*; use crate::persistent::Vector as PersistentVector; const MAX_16BYTE_INLINE_LEN: usize = (16 - 8) / mem::size_of::>(); const MAX_32BYTE_INLINE_LEN: usize = (32 - 8) / mem::size_of::>(); const EXTERNAL_INLINE_LEN: u32 = (MAX_32BYTE_INLINE_LEN + 1) as u32; /// Describes the storage of a vector's data #[derive(Clone, Copy, Debug, PartialEq)] pub enum VectorStorage { /// Vector data is stored inline in a box of the given size Inline(BoxSize), /// Vector data is stored out-of-line in a 32 byte box External, } impl VectorStorage { /// Returns the box size for a vector storage pub fn box_size(self) -> BoxSize { match self { VectorStorage::Inline(box_size) => box_size, VectorStorage::External => BoxSize::Size32, } } } /// Immutable vector of boxed values /// /// This allows random access to any of its values. #[repr(C, align(16))] pub struct Vector { header: Header, inline_len: u32, padding: [u8; 24], phantom: marker::PhantomData, } impl Boxed for Vector {} impl Vector { /// Maximum element length of an inline vector pub const MAX_INLINE_LEN: usize = MAX_32BYTE_INLINE_LEN; /// Inline element length used for external vectors pub const EXTERNAL_INLINE_LEN: u32 = (Self::MAX_INLINE_LEN as u32) + 1; /// Constructs a new vector with the passed boxed values pub fn new( heap: &mut impl AsHeap, values: impl ExactSizeIterator>, ) -> Gc> { let storage = Self::storage_for_element_len(values.len()); let header = Vector::TYPE_TAG.to_heap_header(storage.box_size()); let boxed = unsafe { match storage { VectorStorage::External => mem::transmute(ExternalVector::new(header, values)), VectorStorage::Inline(_) => mem::transmute(InlineVector::new(header, values)), } }; heap.as_heap_mut().place_box(boxed) } /// Returns the storage for given element length fn storage_for_element_len(len: usize) -> VectorStorage { const MIN_32BYTE_INLINE_LEN: usize = MAX_16BYTE_INLINE_LEN + 1; match len { 0..=MAX_16BYTE_INLINE_LEN => VectorStorage::Inline(BoxSize::Size16), MIN_32BYTE_INLINE_LEN..=MAX_32BYTE_INLINE_LEN => VectorStorage::Inline(BoxSize::Size32), _ => { // Too big to fit inline; this needs to be external VectorStorage::External } } } /// Constructs a vector by constructing an iterator of values pub fn from_values( heap: &mut impl AsHeap, values: impl Iterator, cons: F, ) -> Gc> where F: Fn(&mut Heap, V) -> Gc, { let heap = heap.as_heap_mut(); let elems: Vec> = values.map(|v| cons(heap, v)).collect(); Self::new(heap, elems.into_iter()) } fn is_inline(&self) -> bool { self.inline_len <= (Self::MAX_INLINE_LEN as u32) } fn as_repr(&self) -> Repr<'_, T> { if self.is_inline() { Repr::Inline(unsafe { &*(self as *const Vector as *const InlineVector) }) } else { Repr::External(unsafe { &*(self as *const Vector as *const ExternalVector) }) } } fn as_repr_mut(&mut self) -> ReprMut<'_, T> { if self.is_inline() { ReprMut::Inline(unsafe { &mut *(self as *mut Vector as *mut InlineVector) }) } else { ReprMut::External(unsafe { &mut *(self as *mut Vector as *mut ExternalVector) }) } } /// Returns the length of the vector pub fn len(&self) -> usize { match self.as_repr() { Repr::Inline(inline) => inline.inline_len as usize, Repr::External(external) => external.values.len(), } } /// Returns true if the vector is empty pub fn is_empty(&self) -> bool { self.inline_len == 0 } /// Return an element as the provided index pub fn get(&self, index: usize) -> Option> { match self.as_repr() { Repr::Inline(inline) => inline.get(index), Repr::External(external) => external.values.get(index), } } /// Returns an iterator over the vector pub fn iter<'a>(&'a self) -> Box<(dyn ExactSizeIterator> + 'a)> { match self.as_repr() { Repr::Inline(inline) => Box::new(inline.iter()), Repr::External(external) => Box::new(external.values.iter()), } } /// Returns a new vector with the element at the given index replaced pub fn assoc(&self, heap: &mut impl AsHeap, index: usize, value: Gc) -> Gc> { match self.as_repr() { Repr::Inline(inline) => { let mut values = inline.values; values[index] = MaybeUninit::new(value); Vector::new( heap, values[0..self.len()] .iter() .map(|value| unsafe { value.assume_init() }), ) } Repr::External(external) => { let boxed = unsafe { mem::transmute(ExternalVector { header: Vector::TYPE_TAG.to_heap_header(VectorStorage::External.box_size()), inline_len: EXTERNAL_INLINE_LEN, values: external.values.assoc(index, value), }) }; heap.as_heap_mut().place_box(boxed) } } } /// Appends the elements in the passed vector and returns a new vector pub fn append(&self, heap: &mut impl AsHeap, other: Gc>) -> Gc> { if self.is_empty() { other } else { self.extend(heap, other.iter()) } } /// Returns a new vector extended with the values in the passed iterator pub fn extend( &self, heap: &mut impl AsHeap, new_values: impl ExactSizeIterator>, ) -> Gc> { if new_values.len() == 0 { return unsafe { Gc::new(self) }; } match self.as_repr() { Repr::External(self_external) => { let new_values = self_external.values.extend(new_values); let boxed = unsafe { mem::transmute(ExternalVector { header: Vector::TYPE_TAG.to_heap_header(VectorStorage::External.box_size()), inline_len: EXTERNAL_INLINE_LEN, values: new_values, }) }; heap.as_heap_mut().place_box(boxed) } _ => { let values: Vec<_> = self.iter().chain(new_values).collect(); Self::new(heap, values.into_iter()) } } } /// Takes the first `count` items from the vector pub fn take(&self, heap: &mut impl AsHeap, count: usize) -> Gc> { let new_len = std::cmp::min(self.len(), count); if new_len <= Self::MAX_INLINE_LEN { return Self::new(heap, self.iter().take(count)); } match self.as_repr() { Repr::External(self_external) => { let boxed = unsafe { mem::transmute(ExternalVector { header: Vector::TYPE_TAG.to_heap_header(VectorStorage::External.box_size()), inline_len: EXTERNAL_INLINE_LEN, values: self_external.values.take(count), }) }; heap.as_heap_mut().place_box(boxed) } // Shouldn't be reachable but is easy to handle _ => Self::new(heap, self.iter().take(count)), } } pub(crate) fn visit_mut_elements(&mut self, visitor: &mut F) where F: FnMut(&mut Gc), { match self.as_repr_mut() { ReprMut::Inline(inline) => { for element in inline.iter_mut() { visitor(element); } } ReprMut::External(external) => external.values.visit_mut_elements(visitor), } } } impl PartialEqInHeap for Vector { fn eq_in_heap(&self, heap: &Heap, other: &Vector) -> bool { if self.len() != other.len() { return false; } self.iter() .zip(other.iter()) .all(|(self_value, other_value)| self_value.eq_in_heap(heap, &other_value)) } } impl HashInHeap for Vector { fn hash_in_heap(&self, heap: &Heap, state: &mut H) { TypeTag::Vector.hash(state); state.write_usize(self.len()); for value in self.iter() { value.hash_in_heap(heap, state); } } } impl fmt::Debug for Vector { fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { formatter.write_str("Vector(")?; formatter.debug_list().entries(self.iter()).finish()?; formatter.write_str(")") } } impl EncodeBoxedAbiType for Vector where T: EncodeBoxedAbiType, { const BOXED_ABI_TYPE: BoxedAbiType = BoxedAbiType::Vector(&T::BOXED_ABI_TYPE); } #[repr(C, align(16))] pub struct InlineVector { header: Header, inline_len: u32, values: [MaybeUninit>; MAX_32BYTE_INLINE_LEN], } impl InlineVector { fn new(header: Header, values: impl ExactSizeIterator>) -> InlineVector { let inline_len = values.len(); let mut inline_values = [MaybeUninit::uninit(); MAX_32BYTE_INLINE_LEN]; for (inline_value, value) in inline_values.iter_mut().zip(values) { *inline_value = MaybeUninit::new(value); } InlineVector { header, inline_len: inline_len as u32, values: inline_values, } } fn iter(&self) -> impl ExactSizeIterator> + '_ { self.values[0..self.inline_len as usize] .iter() .map(|value| unsafe { value.assume_init() }) } fn iter_mut<'a>(&'a mut self) -> impl ExactSizeIterator> + 'a { self.values[0..self.inline_len as usize] .iter_mut() .map(|value| unsafe { &mut *value.as_mut_ptr() }) } fn get(&self, index: usize) -> Option> { if index > self.inline_len as usize { None } else { Some(unsafe { self.values[index].assume_init() }) } } } #[repr(C, align(16))] pub struct ExternalVector { header: Header, inline_len: u32, values: PersistentVector>, } impl ExternalVector { fn new(header: Header, values: impl ExactSizeIterator>) -> ExternalVector { ExternalVector { header, inline_len: Vector::::EXTERNAL_INLINE_LEN, values: PersistentVector::new(values), } } } enum Repr<'a, T: Boxed> { Inline(&'a InlineVector), External(&'a ExternalVector), } enum ReprMut<'a, T: Boxed> { Inline(&'a mut InlineVector), External(&'a mut ExternalVector), } impl Drop for Vector { fn drop(&mut self) { match self.as_repr_mut() { ReprMut::Inline(_) => { // Do nothing here; we might've been allocated as a 16 byte box so we can't read // the whole thing. } ReprMut::External(external) => unsafe { // Call `ExternalVector`'s drop implementation ptr::drop_in_place(external); }, } } } #[cfg(test)] mod test { use super::*; use crate::boxed::heap::Heap; use std::mem; #[test] fn sizes() { assert_eq!(32, mem::size_of::>()); assert_eq!(32, mem::size_of::>()); assert_eq!(32, mem::size_of::>()); } #[test] fn equality() { use crate::boxed::Int; let mut heap = Heap::empty(); let boxed1 = Int::new(&mut heap, 1); let boxed2 = Int::new(&mut heap, 2); let boxed3 = Int::new(&mut heap, 3); let forward_vec1 = Vector::new(&mut heap, IntoIterator::into_iter([boxed1, boxed2, boxed3])); let forward_vec2 = Vector::new(&mut heap, IntoIterator::into_iter([boxed1, boxed2, boxed3])); let reverse_vec = Vector::new(&mut heap, IntoIterator::into_iter([boxed3, boxed2, boxed1])); assert!(!forward_vec1.eq_in_heap(&heap, &reverse_vec)); assert!(forward_vec1.eq_in_heap(&heap, &forward_vec2)); } #[test] fn fmt_debug() { use crate::boxed::Int; let mut heap = Heap::empty(); let forward_vec = Vector::from_values(&mut heap, [1, 2, 3].iter().cloned(), Int::new); assert_eq!( "Vector([Int(1), Int(2), Int(3)])", format!("{:?}", forward_vec) ); } } ================================================ FILE: runtime/callback.rs ================================================ #![warn(missing_docs)] //! Typed callback functions use crate::abitype; use crate::boxed; use crate::task::Task; /// Typed callback function /// /// This is typically when taking a callback as a parameter to an RFI function. This is not a /// proper boxed value and can neither be stored in a collection or returned as a value. For those /// cases [`boxed::FunThunk`] should be used instead. #[repr(C)] #[derive(Clone, Copy)] pub struct Callback where F: Copy, { captures: boxed::Captures, entry_point: F, } impl Callback where F: Copy, { /// Returns the captures for this callback pub fn captures(&self) -> boxed::Captures { self.captures } } /// Encoding of an entry point's ABI type /// /// This is used internally by the compiler as a mechanism for reflecting the Rust function type. #[derive(Debug, PartialEq, Eq, Hash, Clone)] pub struct EntryPointAbiType { /// Types of the entry point's parameters /// /// This is not an [`abitype::ParamAbiType`] as captures should be determined by the callback /// implementation, not the callback's type. pub params: &'static [abitype::AbiType], /// Type of the entry point's return value pub ret: abitype::RetAbiType, } /// Trait used to encode a Rust function type as an [`EntryPointAbiType`] pub trait EncodeEntryPointAbiType: Copy { /// Corresponding [`EntryPointAbiType`] for this Rust function type const ENTRY_POINT_ABI_TYPE: EntryPointAbiType; } macro_rules! define_generic_entry_point { ( $( $generic_param:ident ),* ) => { impl EncodeEntryPointAbiType for for<'s> extern "C" fn(&'s mut Task, captures: boxed::Captures, $( $generic_param ),* ) -> R where R: abitype::EncodeRetAbiType, $( $generic_param: abitype::EncodeAbiType ),* { const ENTRY_POINT_ABI_TYPE: EntryPointAbiType = EntryPointAbiType { params: &[$( $generic_param::ABI_TYPE ),*], ret: R::RET_ABI_TYPE, }; } impl Callback extern "C" fn(&'s mut Task, captures: boxed::Captures, $( $generic_param ),* ) -> R> where R: abitype::EncodeRetAbiType, $( $generic_param: abitype::EncodeAbiType ),* { /// Applies this callback inside the given [`Task`] and returns its value /// /// It's important that the callback was created inside the passed `task` or undefined /// behaviour may result. #[allow(unused)] #[allow(non_snake_case)] #[allow(clippy::too_many_arguments)] pub fn apply(&self, task: &mut Task, $( $generic_param: $generic_param ),*) -> R { (self.entry_point)(task, self.captures, $( $generic_param ),*) } } } } define_generic_entry_point!(); define_generic_entry_point!(A); define_generic_entry_point!(A, B); define_generic_entry_point!(A, B, C); define_generic_entry_point!(A, B, C, D); define_generic_entry_point!(A, B, C, D, E); define_generic_entry_point!(A, B, C, D, E, F); define_generic_entry_point!(A, B, C, D, E, F, G); define_generic_entry_point!(A, B, C, D, E, F, G, H); #[cfg(test)] mod test { use super::*; use crate::boxed::refs::*; #[test] fn encode_entry_point_abi_type() { let empty_abi_type = ::ENTRY_POINT_ABI_TYPE; assert_eq!(abitype::RetAbiType::Void, empty_abi_type.ret); assert!(empty_abi_type.params.is_empty()); let two_param_abi_type = , ) -> bool as EncodeEntryPointAbiType>::ENTRY_POINT_ABI_TYPE; assert_eq!( abitype::AbiType::Bool.into_ret_abi_type(), two_param_abi_type.ret ); assert_eq!( &[abitype::AbiType::Int, boxed::TypeTag::Int.into()], two_param_abi_type.params ); } } ================================================ FILE: runtime/class_map.rs ================================================ use std::marker::PhantomData; use std::ptr; use crate::abitype; use crate::boxed::RecordClassId; /// Minimal type information for a class' field /// /// This is used to annotate values with information to support equality, hashing and garbage /// collection. #[derive(Copy, Clone, PartialEq, Eq)] #[repr(u8)] pub enum FieldType { Bool = 0, Char = 1, Float = 2, Int = 3, InternedSym = 4, Boxed = 5, } impl FieldType { pub fn from_abi_type(abi_type: &abitype::AbiType) -> Self { match abi_type { abitype::AbiType::Bool => FieldType::Bool, abitype::AbiType::Char => FieldType::Char, abitype::AbiType::Float => FieldType::Float, abitype::AbiType::Int => FieldType::Int, abitype::AbiType::InternedSym => FieldType::InternedSym, abitype::AbiType::Boxed(_) => FieldType::Boxed, abitype::AbiType::Callback(_) => { unimplemented!("callback record fields"); } } } } /// Type information for a class' field #[repr(C)] #[derive(Clone, Copy)] pub struct Field { offset: u32, field_type: FieldType, is_last: bool, } impl Field { /// Constructs a new instance with the given field type and offset pub fn new(field_type: FieldType, offset: usize) -> Field { Self { offset: offset as u32, field_type, is_last: false, } } /// Type information for the class field pub fn field_type(self) -> FieldType { self.field_type } /// Offset in bytes of the class field from the start of the record data pub fn offset(self) -> usize { self.offset as usize } /// Returns if this is the last field in the class pub fn is_last(self) -> bool { self.is_last } } /// Type information for a class #[repr(transparent)] #[derive(Clone, Copy)] pub struct ClassRef<'a> { fields: *const Field, phantom_lifetime: PhantomData<&'a Field>, } impl<'a> ClassRef<'a> { /// Returns if the type contains no fields pub fn is_empty(self) -> bool { self.fields.is_null() } /// Returns an iterator over the class' fields pub fn field_iter(self) -> FieldIterator<'a> { FieldIterator { fields: self.fields, phantom_lifetime: PhantomData, } } } /// Owned version of [`ClassRef`] /// /// This is used for classes that are built during compile time. #[derive(Clone)] pub struct BoxedClass { fields: Box<[Field]>, } impl BoxedClass { /// Constructs a new instance containing the provided fields pub fn from_fields(fields_iter: impl Iterator) -> Self { let mut fields: Box<[Field]> = fields_iter.collect(); if let Some(last_field) = fields.last_mut() { last_field.is_last = true; } BoxedClass { fields } } pub fn as_ref(&self) -> ClassRef<'_> { ClassRef { fields: if self.fields.is_empty() { std::ptr::null() } else { self.fields.as_ptr() }, phantom_lifetime: PhantomData, } } } /// Basic iterator of class fields pub struct FieldIterator<'a> { fields: *const Field, phantom_lifetime: PhantomData<&'a Field>, } impl<'a> FieldIterator<'a> { pub fn empty() -> FieldIterator<'static> { FieldIterator { fields: ptr::null(), phantom_lifetime: PhantomData, } } } impl<'a> Iterator for FieldIterator<'a> { type Item = Field; fn next(&mut self) -> Option { if self.fields.is_null() { return None; } let next_field = unsafe { *self.fields }; if next_field.is_last { self.fields = ptr::null(); } else { self.fields = unsafe { self.fields.add(1) }; } Some(next_field) } } /// Mapping of [record class IDs](RecordClassId) to [classes](ClassRef) #[repr(C)] #[derive(Clone)] pub struct ClassMap { const_classes: *const ClassRef<'static>, dynamic_classes: Vec, } impl ClassMap { const DYNAMIC_RECORD_CLASS_ID_BASE: RecordClassId = 1u32 << 30; /// Constructs a new instance containing no classes pub fn empty() -> ClassMap { Self::with_const_classes(std::ptr::null()) } /// Constructs a new instance with the provided constant classes pub(crate) fn with_const_classes(const_classes: *const ClassRef<'static>) -> ClassMap { Self { const_classes, dynamic_classes: vec![], } } /// Registers a new class and returns a distinct [`RecordClassId`] pub fn push_dynamic_class(&mut self, boxed_class: BoxedClass) -> RecordClassId { let record_class_id = (self.dynamic_classes.len() as RecordClassId) + Self::DYNAMIC_RECORD_CLASS_ID_BASE; self.dynamic_classes.push(boxed_class); record_class_id } /// Returns a class reference for a given [`RecordClassId`] pub fn class_for_record_class_id(&self, record_class_id: RecordClassId) -> ClassRef<'_> { if record_class_id >= Self::DYNAMIC_RECORD_CLASS_ID_BASE { let dynamic_class_index = (record_class_id - Self::DYNAMIC_RECORD_CLASS_ID_BASE) as usize; self.dynamic_classes[dynamic_class_index].as_ref() } else { unsafe { *self.const_classes.add(record_class_id as usize) } } } } ================================================ FILE: runtime/compiler_support.rs ================================================ //! Internal functions called by compiled Arret code //! //! Calls to these functions are generated by the compiler. They should not be called from user //! code. #![allow(clippy::missing_safety_doc)] use std::{alloc, panic, process}; use crate::boxed; use crate::boxed::prelude::*; use crate::boxed::refs::Gc; use crate::boxed::type_info::TypeInfo; use crate::class_map::{ClassMap, ClassRef}; use crate::intern::{Interner, RawGlobalNames}; use crate::task::Task; type TaskEntry = extern "C" fn(&mut Task); #[export_name = "arret_runtime_launch_task"] pub unsafe extern "C" fn launch_task( global_names: *const RawGlobalNames, classmap_classes: *const ClassRef<'static>, entry: TaskEntry, ) { let interner = Interner::with_global_names(global_names); let class_map = ClassMap::with_const_classes(classmap_classes); let type_info = TypeInfo::new(interner, class_map); let mut task = Task::with_type_info(type_info); if let Err(err) = panic::catch_unwind(panic::AssertUnwindSafe(|| entry(&mut task))) { if let Some(message) = err.downcast_ref::() { eprintln!("{}", message); } else { eprintln!("Unexpected panic type"); }; process::exit(1); }; } #[export_name = "arret_runtime_alloc_cells"] pub extern "C" fn alloc_cells(task: &mut Task, count: u32) -> *mut boxed::Any { task.heap_mut().alloc_cells(count as usize) } #[export_name = "arret_runtime_alloc_record_data"] pub extern "C" fn alloc_record_data(size: u64, align: u32) -> *mut u8 { unsafe { let layout = alloc::Layout::from_size_align_unchecked(size as usize, align as usize); alloc::alloc(layout) } } #[export_name = "arret_runtime_equals"] pub extern "C" fn equals(task: &Task, lhs: Gc, rhs: Gc) -> bool { lhs.eq_in_heap(task.as_heap(), &rhs) } #[export_name = "arret_runtime_panic_with_string"] pub unsafe extern "C" fn panic_with_string( task: &mut Task, message_bytes: *const u8, message_len: u32, ) { let message_vec: Vec = std::slice::from_raw_parts(message_bytes, message_len as usize).into(); task.panic(String::from_utf8_unchecked(message_vec)); } ================================================ FILE: runtime/intern.rs ================================================ //! Interned symbols //! //! This uses a fixed 8 byte representation for interned symbol. They are associated with a //! particular `Interner` instance which can return the original [`prim@str`] name of the symbol. //! Interned symbols from the same `Interner` can be compared directly without a reference to //! the `Interner` instance. //! //! Symbol names of 8 bytes or less are encoded directly in the `InternedSym`` instance without //! storing the name in the `Interner`. They are padded with a constant invalid UTF-8 sequence so //! the length of the inline name can be recovered. //! //! The encoding for names larger than 8 bytes uses an index in to a [`Vec`] stored in the //! `Interner`. The indexed representation is invalid UTF-8 so it cannot collide with a valid //! symbol name. use std::collections::HashMap; use std::hash::{Hash, Hasher}; use std::rc::Rc; use std::{fmt, ptr, str}; // UTF-8 sequences cannot start with 10xxxxxxx. This is pattern for the last continuation byte, // but any 1 byte sequences are encoded directly. We can use these values freely without colliding // with inline names. const INLINE_FILL_BYTE: u8 = 0x80; const LOCAL_INDEXED_FLAG: u8 = 0x81; const GLOBAL_INDEXED_FLAG: u8 = 0x82; const INLINE_SIZE: usize = 8; #[repr(C)] pub struct RawGlobalNames { len: u32, names: [GlobalName; 1], } #[repr(C)] struct GlobalName { name_byte_len: u64, name_bytes: *const u8, } impl GlobalName { fn as_str(&self) -> &str { unsafe { let byte_slice = std::slice::from_raw_parts(self.name_bytes, self.name_byte_len as usize); std::str::from_utf8_unchecked(byte_slice) } } } #[repr(align(8))] #[derive(Copy, Clone)] struct InternedIndexed { flag_byte: u8, _padding: [u8; 3], name_index: u32, } #[repr(align(8))] #[derive(Copy, Clone)] struct InternedInline { name_bytes: [u8; INLINE_SIZE], } impl InternedInline { fn as_str(&self) -> &str { // Find the first fill byte. If none is found assume our full inline size. let len = self .name_bytes .iter() .position(|byte| *byte == INLINE_FILL_BYTE) .unwrap_or(INLINE_SIZE); unsafe { str::from_utf8_unchecked(&self.name_bytes[0..len]) } } } #[repr(align(8))] #[derive(Copy, Clone)] pub union InternedSym { indexed: InternedIndexed, inline: InternedInline, raw: u64, } enum InternedRepr<'a> { Inline(&'a InternedInline), LocalIndexed(&'a InternedIndexed), GlobalIndexed(&'a InternedIndexed), } impl InternedSym { /// Tries to return an inline interned Sym /// /// This can be accomplished without an [`Interner`] as we don't need to add a name to the /// [`Interner`]'s index. pub fn try_from_inline_name(name: &str) -> Option { if name.len() <= INLINE_SIZE { let mut interned_inline = InternedInline { name_bytes: [INLINE_FILL_BYTE; INLINE_SIZE], }; unsafe { ptr::copy_nonoverlapping( name.as_ptr(), &mut interned_inline.name_bytes[0] as *mut u8, name.len(), ); } Some(InternedSym { inline: interned_inline, }) } else { None } } pub fn from_global_index(index: u32) -> InternedSym { InternedSym { indexed: InternedIndexed { flag_byte: GLOBAL_INDEXED_FLAG, _padding: [0; 3], name_index: index, }, } } pub fn from_local_index(index: u32) -> InternedSym { InternedSym { indexed: InternedIndexed { flag_byte: LOCAL_INDEXED_FLAG, _padding: [0; 3], name_index: index, }, } } pub fn to_raw_u64(self) -> u64 { unsafe { self.raw } } fn repr(&self) -> InternedRepr<'_> { unsafe { match self.indexed.flag_byte { LOCAL_INDEXED_FLAG => InternedRepr::LocalIndexed(&self.indexed), GLOBAL_INDEXED_FLAG => InternedRepr::GlobalIndexed(&self.indexed), _ => InternedRepr::Inline(&self.inline), } } } } impl PartialEq for InternedSym { fn eq(&self, other: &InternedSym) -> bool { unsafe { self.raw == other.raw } } } impl Eq for InternedSym {} impl Hash for InternedSym { fn hash(&self, state: &mut H) { unsafe { state.write(&self.inline.name_bytes); } } } impl fmt::Debug for InternedSym { fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { match self.repr() { InternedRepr::LocalIndexed(indexed) | InternedRepr::GlobalIndexed(indexed) => { // We don't have access to the `Interner` so we can't print our interned value write!(formatter, "`{:x}", indexed.name_index) } InternedRepr::Inline(inline) => write!(formatter, "'{}", inline.as_str()), } } } pub struct Interner { names: Vec>, name_to_interned: HashMap, InternedSym>, /// Contains the highest static index + 1 static_index_watermark: u32, global_names: Option<&'static [GlobalName]>, } impl Interner { pub fn new() -> Interner { Interner { names: vec![], name_to_interned: HashMap::new(), static_index_watermark: 0, global_names: None, } } /// Creates a new `Interner` with a global names struct produced by codegen /// /// # Safety /// `raw_global_names` must be a pointer to a valid [`RawGlobalNames`] pub unsafe fn with_global_names(raw_global_names: *const RawGlobalNames) -> Interner { // Convert from our codegened layout to Rust let global_names = raw_global_names.as_ref().map(|raw_global_names| { std::slice::from_raw_parts(&raw_global_names.names[0], raw_global_names.len as usize) }); Interner { names: vec![], name_to_interned: HashMap::new(), static_index_watermark: 0, global_names, } } fn lookup_global_name(&mut self, name: &str) -> Option { self.global_names.and_then(|global_names| { global_names .binary_search_by(|global_name| global_name.as_str().cmp(name)) .ok() .map(|index| InternedSym::from_global_index(index as u32)) }) } /// Interns a symbol with the given name /// /// The `InternedSym` must be referenced by a boxed `Sym` before the next GC cycle. pub fn intern(&mut self, name: &str) -> InternedSym { if let Some(inline_interned) = InternedSym::try_from_inline_name(name) { return inline_interned; }; // See if this has already been interned locally or is a cached global name if let Some(interned) = self.name_to_interned.get(name) { return *interned; } // See if this is in our global names if let Some(interned) = self.lookup_global_name(name) { // Cache this so we don't have to iterate to find the name again self.name_to_interned.insert(name.into(), interned); return interned; } let shared_name: Rc = name.into(); let index = self.names.len() as u32; self.names.push(shared_name.clone()); let interned = InternedSym::from_local_index(index); self.name_to_interned.insert(shared_name, interned); interned } /// Interns a static symbol with the given name /// /// This should only be used where it's not possible to GC root the [`InternedSym`]. This is /// currently only used by the JIT where we can't track [`InternedSym`] references in the /// generated code. pub fn intern_static(&mut self, name: &str) -> InternedSym { let interned_sym = self.intern(name); if let InternedRepr::LocalIndexed(indexed_sym) = interned_sym.repr() { self.static_index_watermark = indexed_sym.name_index + 1; } interned_sym } pub fn unintern<'a>(&'a self, interned: &'a InternedSym) -> &'a str { match interned.repr() { InternedRepr::LocalIndexed(indexed) => &self.names[indexed.name_index as usize], InternedRepr::GlobalIndexed(indexed) => { self.global_names.unwrap()[indexed.name_index as usize].as_str() } InternedRepr::Inline(inline) => inline.as_str(), } } /// Returns a clone of this interner usable for garbage collection /// /// This preserves the index of all static [`InternedSym`]s. pub(crate) fn clone_for_collect_garbage(&self) -> Self { if self.static_index_watermark == 0 { // Avoid iterating over our HashMap return Self::new(); }; let static_index_watermark = self.static_index_watermark; let names = self.names[0..static_index_watermark as usize].to_vec(); let name_to_interned = self .name_to_interned .iter() .filter_map(|(name, interned)| { if let InternedRepr::LocalIndexed(indexed) = interned.repr() { if indexed.name_index < self.static_index_watermark { return Some((name.clone(), *interned)); } } None }) .collect(); Interner { names, name_to_interned, static_index_watermark, global_names: self.global_names, } } } impl Default for Interner { fn default() -> Interner { Self::new() } } /// Type that can be converted to an [`Interner`] pub trait AsInterner { /// Returns this instance as an [`Interner`] fn as_interner(&self) -> &Interner; } impl AsInterner for Interner { fn as_interner(&self) -> &Interner { self } } #[cfg(test)] mod test { use super::*; use std::mem; #[test] fn sizes() { assert_eq!(8, mem::size_of::()); assert_eq!(8, mem::size_of::()); assert_eq!(8, mem::size_of::()); } #[test] fn equality() { let inline_name = "inline"; let index_name = "This must be longer than eight bytes"; let mut interner = Interner::new(); let intern_inline1 = interner.intern(inline_name); let intern_inline2 = interner.intern(inline_name); assert_eq!(intern_inline1, intern_inline2); let intern_index1 = interner.intern(index_name); let intern_index2 = interner.intern(index_name); assert_eq!(intern_index1, intern_index2); // These should not be equal assert_ne!(intern_inline1, intern_index1); } #[test] fn fmt_debug() { let mut interner = Interner::new(); let intern_inline = interner.intern("inline"); assert_eq!("'inline", format!("{:?}", intern_inline)); let intern_indexed = interner.intern("This is very long and can't be stored inline"); assert_eq!("`0", format!("{:?}", intern_indexed)); } #[test] fn roundtrip() { let mut interner = Interner::new(); let test_names = [ "", "short1", "short2", "exactly8", "Hello, world!", "This is another long test string", ]; let mut previous_interneds = vec![]; for &name in &test_names { let interned = interner.intern(name); assert_eq!(name, interner.unintern(&interned)); // Make sure we don't equal any of our previous interned symbols assert!(!previous_interneds.contains(&interned)); previous_interneds.push(interned); } } #[test] fn clone_for_collect_garbage() { let mut interner = Interner::new(); interner.intern("one "); interner.intern("two "); interner.intern("three "); assert_eq!(3, interner.names.len()); assert_eq!(3, interner.name_to_interned.len()); // No static symbols; we should collect everything interner = interner.clone_for_collect_garbage(); assert_eq!(0, interner.names.len()); assert_eq!(0, interner.name_to_interned.len()); interner.intern("one "); interner.intern_static("two "); interner.intern("three "); // We need to preserve the second symbol interner = interner.clone_for_collect_garbage(); assert_eq!(2, interner.names.len()); assert_eq!(2, interner.name_to_interned.len()); // We should be able to "promote" an existing symbol to static interner.intern("one-two-three-four"); interner.intern_static("one-two-three-four"); assert_eq!(3, interner.names.len()); assert_eq!(3, interner.name_to_interned.len()); interner = interner.clone_for_collect_garbage(); assert_eq!(3, interner.names.len()); assert_eq!(3, interner.name_to_interned.len()); } } ================================================ FILE: runtime/lib.rs ================================================ #![warn(clippy::all)] #![warn(rust_2018_idioms)] pub mod abitype; pub mod binding; pub mod boxed; pub mod callback; pub mod class_map; pub mod compiler_support; pub mod intern; pub mod persistent; pub mod task; ================================================ FILE: runtime/persistent/mod.rs ================================================ pub mod vector; pub use vector::Vector; ================================================ FILE: runtime/persistent/vector.rs ================================================ use std::mem::MaybeUninit; use std::sync::atomic::AtomicU64; use std::sync::atomic::Ordering; use std::{alloc, ptr, sync}; /// Reference count used for global constants created by codegen pub const GLOBAL_CONSTANT_REFCOUNT: u64 = std::u64::MAX; const TRIE_RADIX: u32 = 5; pub const NODE_SIZE: usize = 1 << TRIE_RADIX; const LEVEL_MASK: usize = (1 << TRIE_RADIX) - 1; #[cfg(test)] use std::cell::RefCell; #[cfg(test)] thread_local! { static ALLOCATED_BRANCHES: RefCell = RefCell::new(0); } #[cfg(test)] thread_local! { static ALLOCATED_LEAVES: RefCell = RefCell::new(0); } #[repr(C)] pub struct Vector where T: Copy, { size: u64, root: *const Node, tail: *const Node, } impl Vector where T: Copy, { pub fn new(values: impl ExactSizeIterator) -> Self { let empty_vec = Vector { size: 0, root: std::ptr::null(), tail: std::ptr::null(), }; empty_vec.extend(values) } pub fn len(&self) -> usize { self.size as usize } pub fn is_empty(&self) -> bool { self.len() == 0 } /// Pushes a new leaf containing `added_elements` additional elements fn push_leaf(&self, leaf: *const Node, added_elements: u64) -> Vector { let new_size = self.size + added_elements; debug_assert!(new_size & (LEVEL_MASK as u64) == 0); let new_root = match unsafe { self.root.as_ref() } { None => { // We're the first root node leaf } Some(old_root) => { let old_depth = Self::trie_depth(self.trie_size()); let new_depth = Self::trie_depth(self.trie_size() + NODE_SIZE); if old_depth == new_depth { old_root.push_leaf(old_depth, new_size as usize - 1, leaf) } else { // Need to add a new level let mut root_children: [*const Node; NODE_SIZE] = [ptr::null(); NODE_SIZE]; root_children[0] = Node::take_ptr_ref(self.root); root_children[1] = Node::new_chain(leaf, old_depth); Node::new_branch(root_children) } } }; Self { size: new_size, root: new_root, tail: ptr::null(), } } pub fn get(&self, index: usize) -> Option { if index >= self.len() { return None; } let leaf_node = self.get_leaf(index); unsafe { Some((*leaf_node).elements.leaf[index & LEVEL_MASK].assume_init()) } } pub fn take(&self, count: usize) -> Vector { let new_len = std::cmp::min(count, self.len()); if new_len == self.len() { return self.clone(); } let new_trie_size = Self::trie_size_for_len(new_len); let new_root = if new_trie_size > 0 { let old_depth = Self::trie_depth(self.trie_size()); let new_depth = Self::trie_depth(new_trie_size); // Drill down until we find our new root let new_root = (new_depth..old_depth) .fold(self.root, |root, _| unsafe { (*root).elements.branch[0] }); Node::take_ptr_ref(new_root) } else { ptr::null() }; let new_tail_size = Self::tail_size_for_len(new_len); let new_tail = if new_tail_size > 0 { Node::take_ptr_ref(self.get_leaf(new_trie_size)) } else { ptr::null() }; Self { size: new_len as u64, root: new_root, tail: new_tail, } } fn get_leaf(&self, index: usize) -> *const Node { if index >= self.tail_offset() { self.tail } else { let depth = Self::trie_depth(self.trie_size()); unsafe { (&*self.root).get_leaf(depth, index) } } } pub fn assoc(&self, index: usize, value: T) -> Vector { if index >= self.len() { panic!("element {} of out bounds", index); } if index >= self.tail_offset() { let mut new_elements = [MaybeUninit::uninit(); NODE_SIZE]; // Copy the previous leaf elements new_elements[..self.tail_size()] .copy_from_slice(unsafe { &(*self.tail).elements.leaf[..self.tail_size()] }); // Overwrite the element new_elements[index - self.tail_offset()] = MaybeUninit::new(value); return Self { size: self.size, root: Node::take_ptr_ref(self.root), tail: Node::new_leaf(new_elements), }; } let depth = Self::trie_depth(self.trie_size()); let new_root = unsafe { (&*self.root).assoc_value(depth, index, value) }; Self { size: self.size, root: new_root, tail: Node::take_ptr_ref(self.tail), } } pub fn iter(&self) -> impl ExactSizeIterator + '_ { Iter { vec: self, index: 0, current_leaf: self.get_leaf(0), } } pub fn extend(&self, mut values: impl ExactSizeIterator) -> Vector { // This is a three step process: // // 1. Fill the existing tail with values // 2. Push whole NODE_SIZE leaves while enough values remain // 3. Place the rest of the values in the tail // // We can run out of values at any phase and return the finished vector if values.len() == 0 { return self.clone(); } let mut vec_acc = if let Some(tail_ref) = unsafe { self.tail.as_ref() } { let old_tail_size = self.tail_size(); let mut tail_elements = [MaybeUninit::uninit(); NODE_SIZE]; unsafe { tail_elements[..old_tail_size] .copy_from_slice(&tail_ref.elements.leaf[..old_tail_size]); } let fill_size = std::cmp::min(NODE_SIZE - old_tail_size, values.len()); let new_tail_size = old_tail_size + fill_size; for tail_element in tail_elements.iter_mut().skip(old_tail_size).take(fill_size) { *tail_element = MaybeUninit::new(values.next().unwrap()); } let new_leaf = Node::new_leaf(tail_elements); if new_tail_size != NODE_SIZE { // We only affected the tail return Self { size: self.size + (fill_size as u64), root: Node::take_ptr_ref(self.root), tail: new_leaf, }; } self.push_leaf(new_leaf, fill_size as u64) } else { self.clone() }; while values.len() >= NODE_SIZE { let mut trie_elements = [MaybeUninit::uninit(); NODE_SIZE]; for trie_element in &mut trie_elements { *trie_element = MaybeUninit::new(values.next().unwrap()); } vec_acc = vec_acc.push_leaf(Node::new_leaf(trie_elements), NODE_SIZE as u64) } let tail_size = values.len(); if tail_size > 0 { let mut tail_elements = [MaybeUninit::uninit(); NODE_SIZE]; for (tail_element, value) in tail_elements.iter_mut().zip(values) { *tail_element = MaybeUninit::new(value); } vec_acc.size += tail_size as u64; vec_acc.tail = Node::new_leaf(tail_elements); } vec_acc } /// Visits each mutable element of the array /// /// This skips global constants pub(crate) fn visit_mut_elements(&mut self, visitor: &mut F) where F: FnMut(&mut T), { unsafe { if let Some(tail_ref) = (self.tail as *mut Node).as_mut() { tail_ref.visit_mut_elements(0, self.tail_size(), visitor); } } unsafe { if let Some(root_ref) = (self.root as *mut Node).as_mut() { let trie_size = self.trie_size(); root_ref.visit_mut_elements(Self::trie_depth(trie_size), trie_size, visitor); } } } /// Size of the trie portion of the `Vector` /// /// This is always a multiple of `NODE_SIZE` fn trie_size(&self) -> usize { Self::trie_size_for_len(self.len()) } fn trie_size_for_len(len: usize) -> usize { len - Self::tail_size_for_len(len) } /// Size of the tail portion of the `Vector` /// /// This is always less than `NODE_SIZE` fn tail_size(&self) -> usize { Self::tail_size_for_len(self.len()) } fn tail_size_for_len(len: usize) -> usize { len % NODE_SIZE } /// Index of the first element in the tail portion fn tail_offset(&self) -> usize { self.len() - self.tail_size() } /// Returns the trie depth for trie of the given size fn trie_depth(trie_size: usize) -> u32 { if trie_size <= 1 { // The root is the only node return 0; } (63 - (trie_size as u64 - 1).leading_zeros()) / TRIE_RADIX } } impl Drop for Vector where T: Copy, { fn drop(&mut self) { unsafe { Node::release_ptr_ref(self.root, Self::trie_depth(self.trie_size())); Node::release_ptr_ref(self.tail, 0); } } } union NodeElements where T: Copy, { leaf: [MaybeUninit; NODE_SIZE], branch: [*const Node; NODE_SIZE], } #[repr(C)] struct Node where T: Copy, { ref_count: AtomicU64, elements: NodeElements, } impl Node where T: Copy, { fn new_leaf(elements: [MaybeUninit; NODE_SIZE]) -> *const Node { #[cfg(test)] ALLOCATED_LEAVES.with(|counter| *counter.borrow_mut() += 1); let layout = alloc::Layout::new::(); unsafe { let node = alloc::alloc(layout) as *mut Node; (*node).ref_count = AtomicU64::new(1); (*node).elements.leaf = elements; node } } fn new_branch(elements: [*const Node; NODE_SIZE]) -> *const Node { #[cfg(test)] ALLOCATED_BRANCHES.with(|counter| *counter.borrow_mut() += 1); let layout = alloc::Layout::new::(); debug_assert!(!elements[0].is_null()); unsafe { let node = alloc::alloc(layout) as *mut Node; (*node).ref_count = AtomicU64::new(1); (*node).elements.branch = elements; node } } fn new_chain(leaf_node: *const Node, remaining_depth: u32) -> *const Node { if remaining_depth == 0 { return leaf_node; } // Create a one level intermediate node with a single branch let rest_tail = Self::new_chain(leaf_node, remaining_depth - 1); let mut intermediate_elements = [ptr::null::>(); NODE_SIZE]; intermediate_elements[0] = rest_tail; Self::new_branch(intermediate_elements) } fn get_leaf(&self, remaining_depth: u32, index: usize) -> *const Node { if remaining_depth == 0 { return self as *const Node; } let level_radix = TRIE_RADIX * remaining_depth; let branch_index = (index >> level_radix) & LEVEL_MASK; unsafe { (&*self.elements.branch[branch_index]).get_leaf(remaining_depth - 1, index) } } fn assoc_value(&self, remaining_depth: u32, index: usize, value: T) -> *const Node { if remaining_depth == 0 { // Replace the leaf value let mut new_elements = unsafe { self.elements.leaf }; new_elements[index & LEVEL_MASK] = MaybeUninit::new(value); return Self::new_leaf(new_elements); } let level_radix = TRIE_RADIX * remaining_depth; let branch_index = (index >> level_radix) & LEVEL_MASK; // Replace the branch value let new_subtree = unsafe { (&*self.elements.branch[branch_index]).assoc_value(remaining_depth - 1, index, value) }; let mut new_elements: [*const Node; NODE_SIZE] = [ptr::null(); NODE_SIZE]; for (i, new_element) in new_elements.iter_mut().enumerate() { unsafe { *new_element = if i == branch_index { new_subtree } else { Node::take_ptr_ref(self.elements.branch[i]) }; } } Self::new_branch(new_elements) } fn push_leaf( &self, remaining_depth: u32, last_index: usize, leaf: *const Node, ) -> *const Node { if remaining_depth == 0 { return leaf; } let level_radix = TRIE_RADIX * remaining_depth; let branch_index = (last_index >> level_radix) & LEVEL_MASK; // Replace the branch value let new_subtree = unsafe { match self.elements.branch[branch_index].as_ref() { Some(branch) => branch.push_leaf(remaining_depth - 1, last_index, leaf), None => Self::new_chain(leaf, remaining_depth - 1), } }; let mut new_elements: [*const Node; NODE_SIZE] = [ptr::null(); NODE_SIZE]; for (new_element, old_element) in new_elements .iter_mut() .zip(unsafe { self.elements.branch }.iter()) .take(branch_index) { *new_element = Node::take_ptr_ref(*old_element) } new_elements[branch_index] = new_subtree; Self::new_branch(new_elements) } fn is_global_constant(&self) -> bool { self.ref_count.load(Ordering::Relaxed) == GLOBAL_CONSTANT_REFCOUNT } fn take_ptr_ref(self_ptr: *const Node) -> *const Node { if let Some(self_ref) = unsafe { self_ptr.as_ref() } { if !self_ref.is_global_constant() { self_ref.ref_count.fetch_add(1, Ordering::Relaxed); } self_ptr } else { ptr::null() } } /// Atomically releases a reference to the node unsafe fn release_ptr_ref(self_ptr: *const Node, depth: u32) { let self_ref = if let Some(self_ref) = self_ptr.as_ref() { self_ref } else { return; }; if self_ref.is_global_constant() { return; } let should_destroy = self_ref.ref_count.fetch_sub(1, Ordering::Release) == 1; if should_destroy { sync::atomic::fence(Ordering::Acquire); if depth > 0 { for i in 0..NODE_SIZE { Self::release_ptr_ref(self_ref.elements.branch[i], depth - 1); } #[cfg(test)] ALLOCATED_BRANCHES.with(|counter| *counter.borrow_mut() -= 1); } else { #[cfg(test)] ALLOCATED_LEAVES.with(|counter| *counter.borrow_mut() -= 1); } alloc::dealloc( self_ref as *const Self as *mut u8, alloc::Layout::new::(), ); } } /// Visits up to `remaining_elements` mutable elements, returning the new remaining count fn visit_mut_elements( &mut self, remaining_depth: u32, mut remaining_elements: usize, visitor: &mut F, ) -> usize where F: FnMut(&mut T), { if self.is_global_constant() { // We're a global constant; skip us return remaining_elements.saturating_sub(NODE_SIZE << (remaining_depth * TRIE_RADIX)); } if remaining_depth == 0 { let leaf_size = std::cmp::min(remaining_elements, NODE_SIZE); unsafe { for element in self.elements.leaf.iter_mut().take(leaf_size) { visitor(&mut (*element.as_mut_ptr())); } } return remaining_elements - leaf_size; } for branch in unsafe { self.elements.branch.iter() } { unsafe { remaining_elements = (&mut *(*branch as *mut Node)).visit_mut_elements( remaining_depth - 1, remaining_elements, visitor, ); } if remaining_elements == 0 { return 0; } } remaining_elements } } struct Iter<'a, T> where T: Copy, { vec: &'a Vector, index: usize, current_leaf: *const Node, } impl<'a, T> Iterator for Iter<'a, T> where T: Copy, { type Item = T; fn next(&mut self) -> Option { if self.index >= self.vec.len() { return None; } let item = unsafe { (*self.current_leaf).elements.leaf[self.index & LEVEL_MASK].assume_init() }; self.index += 1; if self.index & LEVEL_MASK == 0 { // Lookup the next node self.current_leaf = self.vec.get_leaf(self.index); } Some(item) } fn size_hint(&self) -> (usize, Option) { let exact_size = self.vec.size as usize - self.index; (exact_size, Some(exact_size)) } } impl<'a, T> ExactSizeIterator for Iter<'a, T> where T: Copy {} impl Clone for Vector where T: Copy, { fn clone(&self) -> Self { Vector { size: self.size, root: Node::take_ptr_ref(self.root), tail: Node::take_ptr_ref(self.tail), } } } #[cfg(test)] mod test { use super::*; use std::iter; fn assert_nodes_deallocated(block: T) where T: FnOnce(), { assert_eq!( 0, ALLOCATED_BRANCHES.with(|counter| *counter.borrow()), "branches allocated before beginning of test" ); assert_eq!( 0, ALLOCATED_LEAVES.with(|counter| *counter.borrow()), "leaves allocated before beginning of test" ); block(); assert_eq!( 0, ALLOCATED_BRANCHES.with(|counter| *counter.borrow()), "branches still allocated after end of test" ); assert_eq!( 0, ALLOCATED_LEAVES.with(|counter| *counter.borrow()), "leaves still allocated after end of test" ); } #[test] fn tail_only_vector() { assert_nodes_deallocated(|| { let empty_vec = Vector::::new(iter::empty()); assert_eq!(0, empty_vec.len()); assert!(empty_vec.is_empty()); let one_vec = empty_vec.extend(iter::once(0)); // Make sure `empty_vec` is still intact assert_eq!(0, empty_vec.len()); assert!(empty_vec.is_empty()); assert_eq!(None, empty_vec.get(0)); assert_eq!(1, one_vec.len()); assert!(!one_vec.is_empty()); assert_eq!(Some(0), one_vec.get(0)); // Try modifying the original one item vec let mutated_vec = one_vec.assoc(0, 31337); assert_eq!(1, mutated_vec.len()); assert!(!mutated_vec.is_empty()); assert_eq!(Some(31337), mutated_vec.get(0)); assert_eq!(Some(0), one_vec.get(0)); }); } #[test] fn extended_one_level_vector() { assert_nodes_deallocated(|| { const TEST_LEN: usize = 48; let mut test_vec = Vector::::new(iter::empty()); for i in 0..TEST_LEN { assert_eq!(i, test_vec.len()); test_vec = test_vec.extend(iter::once(i)); } // Check the contents manually for i in 0..TEST_LEN { assert_eq!(Some(i), test_vec.get(i)); } // Check the contents with an iterator { let test_iter = test_vec.iter(); assert_eq!(TEST_LEN, test_iter.len()); for (actual, expected) in test_vec.iter().enumerate() { assert_eq!(expected, actual); } } }) } #[test] fn extended_two_level_vector() { assert_nodes_deallocated(|| { const TEST_LEN: usize = 128; let mut test_vec = Vector::::new(iter::empty()); for i in 0..TEST_LEN { assert_eq!(i, test_vec.len()); test_vec = test_vec.extend(iter::once(i)); } // Check the contents manually for i in 0..TEST_LEN { assert_eq!(Some(i), test_vec.get(i)); } // Check the contents with an iterator { let test_iter = test_vec.iter(); assert_eq!(TEST_LEN, test_iter.len()); for (actual, expected) in test_vec.iter().enumerate() { assert_eq!(expected, actual); } } // Check the contents using take for i in (0..TEST_LEN).step_by(3) { let head_vec = test_vec.take(i); assert_eq!(i, head_vec.len()); if i > 0 { assert_eq!(Some(0), head_vec.get(0)); assert_eq!(Some(i - 1), head_vec.get(i - 1)); } } }) } #[test] fn initialised_three_level_vector() { assert_nodes_deallocated(|| { const TEST_LEN: usize = 2087; let mut test_vec = Vector::::new(0..TEST_LEN); assert_eq!(TEST_LEN, test_vec.len()); // Check the contents manually for i in 0..TEST_LEN { assert_eq!(Some(i), test_vec.get(i)); } // Check the contents with an iterator { let test_iter = test_vec.iter(); assert_eq!(TEST_LEN, test_iter.len()); for (actual, expected) in test_vec.iter().enumerate() { assert_eq!(expected, actual); } } // Manually reverse the vector for i in (0..TEST_LEN).rev() { test_vec = test_vec.assoc(i, TEST_LEN - i - 1); } // Make sure it's reversed for i in 0..TEST_LEN { assert_eq!(Some(TEST_LEN - i - 1), test_vec.get(i)); } // Reverse the vector back by mutable ref test_vec.visit_mut_elements(&mut |element| { *element = TEST_LEN - *element - 1; }); // Check the contents using take for i in (0..TEST_LEN).step_by(7) { let head_vec = test_vec.take(i); assert_eq!(i, head_vec.len()); for (actual, expected) in head_vec.iter().enumerate() { assert_eq!(expected, actual); } } }) } #[test] fn vector_extend() { assert_nodes_deallocated(|| { let start_vec = Vector::::new(1..4); let extended_vec = start_vec.extend(4..7); let all_values: Vec = extended_vec.iter().collect(); assert_eq!(vec![1, 2, 3, 4, 5, 6], all_values); let zero_extended_vec = extended_vec.extend(iter::empty()); assert_eq!(6, zero_extended_vec.len()); }) } } ================================================ FILE: runtime/task.rs ================================================ #![warn(missing_docs)] //! Isolated tasks of execution use std::panic; use crate::binding::Never; use crate::boxed::prelude::*; use crate::boxed::type_info::TypeInfo; use crate::boxed::Heap; /// Isolated task of execution /// /// All Arret and RFI code must run inside a task. It provides a dedicated garbage collected /// [`Heap`] as well as an isolation boundary against panics. A task is inherently single threaded; /// it's not possible for one task to be executing on multiple threads at the same time. pub struct Task { heap: Heap, } impl Task { const DEFAULT_CAPACITY: usize = 32; /// Creates a new empty task pub fn new() -> Task { Self::with_type_info(TypeInfo::empty()) } pub(crate) fn with_type_info(type_info: TypeInfo) -> Task { Self { heap: Heap::new(type_info, Self::DEFAULT_CAPACITY), } } /// Returns this task's dedicated heap pub fn heap(&self) -> &Heap { &self.heap } /// Returns a mutable reference to this task's dedicated heap pub fn heap_mut(&mut self) -> &mut Heap { &mut self.heap } /// Panics the current task /// /// This destroys the current task and invokes any cleanup required. pub fn panic(&mut self, message: String) -> Never { // Using `resume_unwind` accomplishes two things: // // 1. Avoids printing the panic info to stderr as this is an "intentional" panic // 2. Skips incrementing our panic count. This is important for compile time evaluation of // panics. The compiler and stdlib have a different panic count due to being in separate // binaries. With a normal `panic!` the panic count will be increment on the stdlib and // decremented in the compiler. On the second panic the stdlib thinks it's already // panicking and aborts. This is a hacky workaround. // // TODO: Fix panics uniformly and remove this method. If we panic inside e.g. Rust stdlib // we won't follow this path. panic::resume_unwind(Box::new(message)); } } impl Default for Task { fn default() -> Task { Task::new() } } impl AsHeap for Task { fn as_heap(&self) -> &Heap { &self.heap } fn as_heap_mut(&mut self) -> &mut Heap { &mut self.heap } } ================================================ FILE: runtime-syntax/Cargo.toml ================================================ [package] name = "arret-runtime-syntax" version = "0.1.0" edition = "2018" authors = ["Ryan Cumming "] [lib] path = "lib.rs" crate-type = ["lib"] [dependencies] arret-syntax = { path = "../syntax" } arret-runtime = { path = "../runtime" } ================================================ FILE: runtime-syntax/lib.rs ================================================ //! This crate contains functionality for dealing with EDN at runtime #![warn(clippy::all)] #![warn(rust_2018_idioms)] pub mod reader; pub mod writer; ================================================ FILE: runtime-syntax/reader.rs ================================================ use arret_syntax::datum::Datum; use arret_runtime::boxed; use arret_runtime::boxed::prelude::*; use arret_runtime::boxed::refs::Gc; /// Places a syntax datum on a box heap pub fn box_syntax_datum(heap: &mut impl boxed::AsHeap, datum: &Datum) -> Gc { match datum { Datum::Bool(_, value) => boxed::Bool::singleton_ref(*value).as_any_ref(), Datum::Int(_, val) => boxed::Int::new(heap, *val).as_any_ref(), Datum::Float(_, val) => boxed::Float::new(heap, *val).as_any_ref(), Datum::Char(_, val) => boxed::Char::new(heap, *val).as_any_ref(), Datum::Str(_, val) => boxed::Str::new(heap, val.as_ref()).as_any_ref(), Datum::Sym(_, val) => boxed::Sym::new(heap, val.as_ref()).as_any_ref(), Datum::List(_, vs) => { boxed::List::from_values(heap, vs.iter(), box_syntax_datum).as_any_ref() } Datum::Vector(_, vs) => { boxed::Vector::from_values(heap, vs.iter(), box_syntax_datum).as_any_ref() } Datum::Set(_, vs) => { boxed::Set::from_values(heap, vs.iter(), box_syntax_datum).as_any_ref() } Datum::Map(_, vs) => boxed::Map::from_values(heap, vs.iter(), |heap, (key, value)| { (box_syntax_datum(heap, key), box_syntax_datum(heap, value)) }) .as_any_ref(), } } // This is indirectly tested by `writer` ================================================ FILE: runtime-syntax/writer.rs ================================================ use std::io::{Result, Write}; use arret_runtime::boxed; use arret_runtime::boxed::prelude::*; use arret_runtime::boxed::refs::Gc; use arret_runtime::intern::InternedSym; macro_rules! process_escaped_chars { ($w:ident, $source:ident, $( $pattern:pat => $escape:expr ),*) => { // Try to write sequential unescaped characters in chunks // This is especially important if $w isn't buffered let mut last_escape_end = 0; for (index, c) in $source.char_indices() { match c { $( $pattern => { $w.write_all(&$source.as_bytes()[last_escape_end..index])?; last_escape_end = index + c.len_utf8(); ($escape)?; } ),* , _ => {} }; } $w.write_all(&$source.as_bytes()[last_escape_end..])?; } } fn write_escaped_str(w: &mut dyn Write, source: &str) -> Result<()> { process_escaped_chars!(w, source, '\t' => write!(w, "\\t"), '\r' => write!(w, "\\r"), '\n' => write!(w, "\\n"), '\\' => write!(w, "\\\\"), '"' => write!(w, "\\\""), c @ '\u{0}'..='\u{19}' => write!(w, "\\x{:X};", c as u32) ); Ok(()) } fn write_boxed_seq( w: &mut dyn Write, heap: &impl AsHeap, elems: impl Iterator>, ) -> Result<()> { let mut has_prev = false; for elem in elems { if has_prev { write!(w, " ")?; } else { has_prev = true; } write_boxed(w, heap, elem)?; } Ok(()) } fn write_boxed_map( w: &mut dyn Write, heap: &impl AsHeap, elems: impl Iterator, Gc)>, ) -> Result<()> { write!(w, "{{")?; let mut has_prev = false; for (key, value) in elems { if has_prev { write!(w, ", ")?; } else { has_prev = true; } write_boxed(w, heap, key)?; write!(w, " ")?; write_boxed(w, heap, value)?; } write!(w, "}}")?; Ok(()) } fn write_char(w: &mut dyn Write, c: char) -> Result<()> { match c { '\n' => write!(w, "\\newline"), '\r' => write!(w, "\\return"), ' ' => write!(w, "\\space"), '\t' => write!(w, "\\tab"), '\u{21}'..='\u{126}' => write!(w, "\\{}", c), other => write!(w, "\\u{:04X}", other as u32), } } #[allow(clippy::float_cmp)] fn write_float(w: &mut dyn Write, f: f64) -> Result<()> { if f.is_nan() { write!(w, "##NaN") } else if f.is_infinite() { if f.is_sign_positive() { write!(w, "##Inf") } else { write!(w, "##-Inf") } } else if f == 0.0 && f.is_sign_negative() { write!(w, "-0.0") } else if (f as i64 as f64) == f { // This is has no fractional part; force a .0 to mark it as a float write!(w, "{:.1}", f) } else { write!(w, "{:.}", f) } } fn write_interned_sym( w: &mut dyn Write, heap: &impl AsHeap, interned_sym: InternedSym, ) -> Result<()> { // TODO: We don't support quoted/raw symbols as EDN doesn't // This assumes the symbol is a valid identifier write!( w, "{}", heap.as_heap() .type_info() .interner() .unintern(&interned_sym) ) } fn write_record(w: &mut dyn Write, heap: &impl AsHeap, record: &boxed::Record) -> Result<()> { use boxed::FieldValue; // TODO: Print our source name write!(w, "#record(")?; let mut has_prev = false; for field in record.field_values(heap.as_heap()) { if has_prev { write!(w, " ")?; } else { has_prev = true; } match field { FieldValue::Bool(true) => write!(w, "true")?, FieldValue::Bool(false) => write!(w, "false")?, FieldValue::Char(c) => write_char(w, c)?, FieldValue::Float(f) => write_float(w, f)?, FieldValue::Int(i) => write!(w, "{}", i)?, FieldValue::InternedSym(interned_sym) => write_interned_sym(w, heap, interned_sym)?, FieldValue::Boxed(boxed) => write_boxed(w, heap, boxed)?, } } write!(w, ")") } /// Writes a representation of the passed box to the writer pub fn write_boxed(w: &mut dyn Write, heap: &impl AsHeap, any_ref: Gc) -> Result<()> { use arret_runtime::boxed::AnySubtype; match any_ref.as_subtype() { AnySubtype::True(_) => write!(w, "true"), AnySubtype::False(_) => write!(w, "false"), AnySubtype::Nil(_) => write!(w, "()"), AnySubtype::Int(int_ref) => write!(w, "{}", int_ref.value()), AnySubtype::Sym(sym) => write_interned_sym(w, heap, sym.interned()), AnySubtype::Float(float_ref) => write_float(w, float_ref.value()), AnySubtype::Pair(list) => { write!(w, "(")?; write_boxed_seq(w, heap, list.as_list_ref().iter())?; write!(w, ")") } AnySubtype::Vector(vec) => { write!(w, "[")?; write_boxed_seq(w, heap, vec.iter())?; write!(w, "]") } AnySubtype::Set(set) => { write!(w, "#{{")?; write_boxed_seq(w, heap, set.iter())?; write!(w, "}}") } AnySubtype::Char(char_ref) => write_char(w, char_ref.value()), AnySubtype::Str(s) => { write!(w, "\"")?; write_escaped_str(w, s.as_str())?; write!(w, "\"") } AnySubtype::FunThunk(_) => write!(w, "#fn"), AnySubtype::Record(record) => write_record(w, heap, record), AnySubtype::Map(map) => write_boxed_map(w, heap, map.iter()), } } /// Writes a pretty-printed representation of the passed box to the writer pub fn pretty_print_boxed(write: &mut dyn Write, heap: &impl AsHeap, any_ref: Gc) { match any_ref.as_subtype() { boxed::AnySubtype::Str(string) => { write.write_all(string.as_str().as_bytes()).unwrap(); } boxed::AnySubtype::Char(c) => { let mut buffer = [0; 4]; write .write_all(c.value().encode_utf8(&mut buffer).as_bytes()) .unwrap(); } boxed::AnySubtype::Sym(sym) => { write .write_all(sym.name(heap.as_heap()).as_bytes()) .unwrap(); } _ => { write_boxed(write, heap.as_heap(), any_ref).unwrap(); } } } #[cfg(test)] mod test { use super::*; fn string_for_boxed(heap: &boxed::Heap, any_ref: Gc) -> String { use std::str; let mut output_buf: Vec = vec![]; write_boxed(&mut output_buf, heap, any_ref).unwrap(); str::from_utf8(output_buf.as_slice()).unwrap().to_owned() } fn assert_write(heap: &mut boxed::Heap, expected: &'static str, any_ref: Gc) { use crate::reader; use arret_syntax::parser::datum_from_str; let first_output = string_for_boxed(heap, any_ref); assert_eq!(expected, first_output); // Try to round trip this to make sure our output and tests are sane let reparsed_syntax = datum_from_str(None, &first_output).unwrap(); let reboxed_ref = reader::box_syntax_datum(heap, &reparsed_syntax); let second_output = string_for_boxed(heap, reboxed_ref); assert_eq!(expected, second_output); } #[test] fn bools() { let mut heap = boxed::Heap::empty(); assert_write(&mut heap, "false", boxed::FALSE_INSTANCE.as_any_ref()); assert_write(&mut heap, "true", boxed::TRUE_INSTANCE.as_any_ref()); } #[test] fn ints() { let mut heap = boxed::Heap::empty(); let boxed_zero = boxed::Int::new(&mut heap, 0); assert_write(&mut heap, "0", boxed_zero.as_any_ref()); let boxed_positive = boxed::Int::new(&mut heap, 120); assert_write(&mut heap, "120", boxed_positive.as_any_ref()); let boxed_negative = boxed::Int::new(&mut heap, -120); assert_write(&mut heap, "-120", boxed_negative.as_any_ref()); } #[test] fn floats() { let mut heap = boxed::Heap::empty(); let test_floats = [ ("0.0", 0.0), ("-0.0", -0.0), ("120.0", 120.0), ("0.25", 0.25), ("-120.0", -120.0), ("9007199254740992.0", 9_007_199_254_740_992.0), ("##NaN", std::f64::NAN), ("##Inf", std::f64::INFINITY), ("##-Inf", std::f64::NEG_INFINITY), ]; for (expected, f) in &test_floats { let boxed_float = boxed::Float::new(&mut heap, *f); assert_write(&mut heap, expected, boxed_float.as_any_ref()); } } #[test] fn sym() { let mut heap = boxed::Heap::empty(); let boxed_foo = boxed::Sym::new(&mut heap, "foo"); assert_write(&mut heap, "foo", boxed_foo.as_any_ref()); let boxed_bar = boxed::Sym::new(&mut heap, "bar"); assert_write(&mut heap, "bar", boxed_bar.as_any_ref()); } #[test] fn lists() { let mut heap = boxed::Heap::empty(); let empty_list = boxed::List::from_values(&mut heap, [].iter().cloned(), boxed::Int::new); assert_write(&mut heap, "()", empty_list.as_any_ref()); let one_list = boxed::List::from_values(&mut heap, [1].iter().cloned(), boxed::Int::new); assert_write(&mut heap, "(1)", one_list.as_any_ref()); let three_list = boxed::List::from_values(&mut heap, [1, 2, 3].iter().cloned(), boxed::Int::new); assert_write(&mut heap, "(1 2 3)", three_list.as_any_ref()); } #[test] fn vectors() { let mut heap = boxed::Heap::empty(); let empty_vector = boxed::Vector::from_values(&mut heap, [].iter().cloned(), boxed::Int::new); assert_write(&mut heap, "[]", empty_vector.as_any_ref()); let one_vector = boxed::Vector::from_values(&mut heap, [1].iter().cloned(), boxed::Int::new); assert_write(&mut heap, "[1]", one_vector.as_any_ref()); let three_vector = boxed::Vector::from_values(&mut heap, [1, 2, 3].iter().cloned(), boxed::Int::new); assert_write(&mut heap, "[1 2 3]", three_vector.as_any_ref()); } #[test] fn chars() { let mut heap = boxed::Heap::empty(); let test_chars = [ ("\\newline", '\n'), ("\\return", '\r'), ("\\space", ' '), ("\\tab", '\t'), ("\\a", 'a'), ("\\A", 'A'), ("\\(", '('), ("\\u03BB", '\u{03bb}'), ]; for (expected, c) in &test_chars { let boxed_char = boxed::Char::new(&mut heap, *c); assert_write(&mut heap, expected, boxed_char.as_any_ref()); } } #[test] fn strings() { let mut heap = boxed::Heap::empty(); let test_strings = [ (r#""""#, ""), (r#""Hello, world!""#, "Hello, world!"), (r#""Hello\"World""#, "Hello\"World"), (r#""Hello\\World""#, "Hello\\World"), (r#""Tab\t""#, "Tab\t"), (r#""\n\nnewline""#, "\n\nnewline"), (r#""carriage: \r""#, "carriage: \r"), (r#""lλ""#, "lλ"), (r#""\x0;null!""#, "\u{0}null!"), ( r#""The word \"recursion\" has many meanings.""#, r#"The word "recursion" has many meanings."#, ), ]; for (expected, s) in &test_strings { let boxed_char = boxed::Str::new(&mut heap, *s); assert_write(&mut heap, expected, boxed_char.as_any_ref()); } } } ================================================ FILE: stdlib/arret/base.arret ================================================ (import [arret internal primitives]) (export def let fn if quote export defmacro letmacro macro-rules deftype lettype compile-error do = defrecord letrecord recur) (import [arret internal types]) (export Any Bool Str Sym Int Float Num Char List Vector Vectorof Setof Map U Record -> ->! str? sym? bool? num? int? float? char? list? vector? set? map? fn? nil? record?) (import [stdlib rust]) (export length panic panic! print! println! print-str write! writeln! write-str read-str exit! cons map filter some? every? fold concat take reverse repeat int float < <= == > >= + * - / rem quot sqrt vector vector-length vector->list vector-ref vector-assoc vector-append vector-extend vector-take hash set set-length set->list set-contains? bit-and bit-or bit-xor bit-not bit-shift-left bit-shift-right unsigned-bit-shift-right) (export defn) (defmacro defn (macro-rules [(destruc fn-data ...) (def destruc (fn fn-data ...))] )) (export list) (defn list #{A} (& [l A]) -> (List & A) l) (export when) (defmacro when (macro-rules [(test body-data ...) (if test (do body-data ...) ())] )) (export when-not) (defmacro when-not (macro-rules [(test body-data ...) (if test () (do body-data ...))] )) (export cond) (defmacro cond (macro-rules [() ()] ; Intentionally don't allow a tail here so we throw an error with unreachable clauses [(:else body-expr) body-expr] [(test-expr body-expr rest-clauses ...) (if test-expr body-expr (cond rest-clauses ...))] )) (export if-not) (defmacro if-not (macro-rules [(test-expr false-expr true-expr) (if test-expr true-expr false-expr)] )) (export comment) (defmacro comment (macro-rules [(_ ...) '()] )) (export and) (defmacro and (macro-rules [() true] [(test) test] [(test1 test2 ...) (if test1 (and test2 ...) false)] )) (export or) (defmacro or (macro-rules [() false] [(test) test] [(test1 test2 ...) (if test1 true (or test2 ...))] )) (export not) (defmacro not (macro-rules [(test) (if test false true)] )) ; This is a macro to support occurrence typing (export not=) (defmacro not= (macro-rules [(lhs rhs) (not (= lhs rhs))] )) (export ann) (defmacro ann (macro-rules [(val Type) (let [[typed-val Type] val] typed-val)] )) (export first) (defn first #{T} (([v T] & _)) -> T v) (export second) (defn second #{T} ((_ [v T] & _)) -> T v) (export nth) (defn nth #{T} ([l (List & T)] [i Int]) -> T (cond (nil? l) (panic "index past end of list") (<= i 0) (first l) :else (recur (rest l) (dec i)))) (export rest) (defn rest #{T} ((_ & [tail T])) -> (List & T) tail) (export zero?) (defn zero? ([n Num]) -> Bool (if (int? n) (= n 0) (= n 0.0))) (export pos?) (defn pos? ([n Num]) -> Bool (if (int? n) (> n 0) (> n 0.0))) (export neg?) (defn neg? ([n Num]) -> Bool (if (int? n) (< n 0) (< n 0.0))) (export nan?) (defn nan? ([f Float]) -> Bool (not= f f)) (export infinite?) (defn infinite? ([f Float]) -> Bool (or (= f ##Inf) (= f ##-Inf))) (export even?) (defn even? ([v Int]) -> Bool (zero? (rem v 2))) (export odd?) (defn odd? ([v Int]) -> Bool (not (zero? (rem v 2)))) (export inc) (defn inc ([i Int]) -> Int (+ i 1)) (export dec) (defn dec ([i Int]) -> Int (- i 1)) (defmacro defextrema (macro-rules [(name operator) (defn name #{[N Num]} ([first N] & [rest N]) -> N (fold (fn ([acc N] [next N]) -> N (cond (and (float? next) (nan? next)) next (and (float? acc) (nan? acc)) acc (operator next acc) next :else acc)) first rest))] )) (export min) (defextrema min <) (export max) (defextrema max >) (export true?) (defn true? ([v Any]) -> Bool (= true v)) (export false?) (defn false? ([v Any]) -> Bool (= false v)) (export any?) (defn any? ([_ Any]) -> true true) (export identity) (defn identity #{T} ([v T]) -> T v) (export constantly) (defn constantly #{T} ([v T]) -> (& Any -> T) (fn (& _) v)) ; We're polymorphic over both the needle and haystack to build type-specific equality checks (export member?) (defn member? #{N H} ([needle N] [haystack (List & H)]) -> Bool (if (nil? haystack) false (or (= needle (first haystack)) (recur needle (rest haystack))))) (export drop) (defn drop #{T} ([i Int] [l (List & T)]) -> (List & T) (cond (<= i 0) l (nil? l) l :else (recur (dec i) (rest l)))) (export drop-last) (defn drop-last #{T} ([i Int] [l (List & T)]) -> (List & T) (take (- (length l) i) l)) (export ->>) (defmacro ->> (macro-rules [(initial) initial] [(initial (first-fn args ...) rest ...) (->> (first-fn args ... initial) rest ...)] )) ================================================ FILE: stdlib/arret/set.arret ================================================ (import [stdlib base]) (import (:only [stdlib rust] subset?)) (export subset?) (export superset?) (defn superset? #{T} ([superset (Setof T)] [subset (Setof T)]) -> Bool (subset? subset superset)) ================================================ FILE: stdlib/arret/test.arret ================================================ (import [stdlib base]) ; Explicitly don't export `fn-op-categories`; make callers use our assertions instead (import (:only [stdlib rust] black-box black-box! heap-alloc-count fn-op-categories)) (export black-box black-box! heap-alloc-count) (export black-box-untyped!) (defn black-box-untyped! ([input Any]) ->! Any (black-box! input)) (export assert-eq!) (defmacro assert-eq! (macro-rules [(expected-expr actual-expr) (let [expected expected-expr actual actual-expr] (when-not (= expected actual) (panic! "`" expected "` does not equal `" actual "`")))])) (export assert-ne!) (defmacro assert-ne! (macro-rules [(expected-expr actual-expr) (let [expected expected-expr actual actual-expr] (when (= expected actual) (panic! "`" expected "` equals `" actual "`")))])) (export assert-fn-contains-op!) (defmacro assert-fn-contains-op! (macro-rules [(op-category test-fn) (when-not (member? op-category (fn-op-categories test-fn)) (panic! "expected built function to contain an op of category `" op-category "`"))])) (export assert-fn-doesnt-contain-op!) (defmacro assert-fn-doesnt-contain-op! (macro-rules [(op-category test-fn) (when (member? op-category (fn-op-categories test-fn)) (panic! "built function unexpectedly contained an op of category `" op-category "`"))])) (export assert-fn-returns-constant!) (defmacro assert-fn-returns-constant! (macro-rules [(test-fn) (when-not (every? #(member? % '(:const-box :const-cast-box :const-reg :cast-boxed :ret)) (fn-op-categories test-fn)) (panic! "built function unexpectedly returns non-constant"))])) ================================================ FILE: stdlib/rust/Cargo.toml ================================================ [package] name = "arret-stdlib" version = "0.1.0" edition = "2018" authors = ["Ryan Cumming "] [lib] name = "stdlib" path = "lib.rs" crate-type = ["cdylib", "staticlib"] [dependencies] arret-syntax = { path = "../../syntax" } arret-runtime = { path = "../../runtime" } arret-runtime-syntax = { path = "../../runtime-syntax" } arret-rfi-derive = { path = "../../rfi-derive" } ================================================ FILE: stdlib/rust/bitwise.rs ================================================ use arret_runtime::binding::*; use arret_runtime::boxed; use arret_runtime::boxed::refs::Gc; use arret_runtime::task::Task; #[arret_rfi_derive::rust_fun("(Int Int & Int -> Int)")] pub fn stdlib_bit_and(lhs: i64, rhs: i64, rest: Gc>) -> i64 { rest.iter().fold(lhs & rhs, |acc, i| acc & i.value()) } #[arret_rfi_derive::rust_fun("(Int Int & Int -> Int)")] pub fn stdlib_bit_or(lhs: i64, rhs: i64, rest: Gc>) -> i64 { rest.iter().fold(lhs | rhs, |acc, i| acc | i.value()) } #[arret_rfi_derive::rust_fun("(Int Int & Int -> Int)")] pub fn stdlib_bit_xor(lhs: i64, rhs: i64, rest: Gc>) -> i64 { rest.iter().fold(lhs ^ rhs, |acc, i| acc ^ i.value()) } #[arret_rfi_derive::rust_fun("(Int -> Int)")] pub fn stdlib_bit_not(val: i64) -> i64 { !val } #[arret_rfi_derive::rust_fun("(Int Int -> Int)")] pub fn stdlib_bit_shift_left(task: &mut Task, val: i64, bit_count: i64) -> i64 { if bit_count < 0 { task.panic(format!("shift left by negative bit count {}", bit_count)); } else if bit_count > 64 { task.panic(format!("shift left by {} bits exceeds 64 bits", bit_count)); } val << (bit_count as u32) } #[arret_rfi_derive::rust_fun("(Int Int -> Int)")] pub fn stdlib_bit_shift_right(task: &mut Task, val: i64, bit_count: i64) -> i64 { if bit_count < 0 { task.panic(format!("shift right by negative bit count {}", bit_count)); } else if bit_count > 64 { task.panic(format!("shift right by {} bits exceeds 64 bits", bit_count)); } val >> (bit_count as u32) } #[arret_rfi_derive::rust_fun("(Int Int -> Int)")] pub fn stdlib_unsigned_bit_shift_right(task: &mut Task, val: i64, bit_count: i64) -> i64 { if bit_count < 0 { task.panic(format!("shift right by negative bit count {}", bit_count)); } else if bit_count > 64 { task.panic(format!("shift right by {} bits exceeds 64 bits", bit_count)); } (val as u64 >> (bit_count as u32)) as i64 } ================================================ FILE: stdlib/rust/hash.rs ================================================ use std::collections::hash_map::DefaultHasher; use std::hash::Hasher as _; use arret_runtime::binding::*; use arret_runtime::boxed; use arret_runtime::boxed::prelude::*; use arret_runtime::boxed::refs::Gc; use arret_runtime::task::Task; #[arret_rfi_derive::rust_fun("(Any -> Int)")] pub fn stdlib_hash(task: &mut Task, input: Gc) -> i64 { let mut state = DefaultHasher::new(); input.hash_in_heap(task.heap(), &mut state); state.finish() as i64 } ================================================ FILE: stdlib/rust/lib.rs ================================================ #![warn(clippy::all)] #![warn(rust_2018_idioms)] #[macro_use] extern crate arret_runtime; pub mod list; use crate::list::*; pub mod math; use crate::math::*; pub mod number; use crate::number::*; pub mod testing; use crate::testing::*; pub mod vector; use crate::vector::*; pub mod write; use crate::write::*; pub mod read; use crate::read::*; pub mod hash; use crate::hash::*; pub mod set; use crate::set::*; pub mod bitwise; use crate::bitwise::*; use arret_runtime_syntax::writer::pretty_print_boxed; use arret_runtime::binding::*; use arret_runtime::boxed; use arret_runtime::boxed::refs::Gc; use arret_runtime::task::Task; pub fn panic_common(task: &mut Task, values: Gc>) -> Never { use std::str; let mut output = Vec::::new(); for value in values.iter() { pretty_print_boxed(&mut output, task, value) } let message = str::from_utf8(output.as_slice()).unwrap().to_owned(); task.panic(message) } #[arret_rfi_derive::rust_fun("(& Any -> (U))")] pub fn stdlib_panic(task: &mut Task, values: Gc>) -> Never { panic_common(task, values) } #[arret_rfi_derive::rust_fun("(& Any ->! (U))")] pub fn stdlib_panic_impure(task: &mut Task, values: Gc>) -> Never { panic_common(task, values) } #[arret_rfi_derive::rust_fun("(Int ->! (U))")] pub fn stdlib_exit(exit_code: i64) { use std::process::exit; exit(exit_code as i32); } define_rust_module!(ARRET_STDLIB_RUST_EXPORTS, { "panic" => stdlib_panic, "panic!" => stdlib_panic_impure, "exit!" => stdlib_exit, "print!" => stdlib_print, "println!" => stdlib_println, "print-str" => stdlib_print_str, "write!" => stdlib_write, "writeln!" => stdlib_writeln, "write-str" => stdlib_write_str, "read-str" => stdlib_read_str, "length" => stdlib_length, "map" => stdlib_map, "filter" => stdlib_filter, "some?" => stdlib_some_p, "every?" => stdlib_every_p, "fold" => stdlib_fold, "cons" => stdlib_cons, "concat" => stdlib_concat, "take" => stdlib_take, "reverse" => stdlib_reverse, "repeat" => stdlib_repeat, "float" => stdlib_float, "int" => stdlib_int, "<" => stdlib_num_lt, "<=" => stdlib_num_le, "==" => stdlib_num_eq, ">" => stdlib_num_gt, ">=" => stdlib_num_ge, "+" => stdlib_add, "*" => stdlib_mul, "-" => stdlib_sub, "/" => stdlib_div, "quot" => stdlib_quot, "rem" => stdlib_rem, "sqrt" => stdlib_sqrt, "black-box" => stdlib_black_box, "black-box!" => stdlib_black_box_impure, "heap-alloc-count" => stdlib_heap_alloc_count, "fn-op-categories" => stdlib_fn_op_categories, "vector" => stdlib_vector, "vector-length" => stdlib_vector_length, "vector->list" => stdlib_vector_to_list, "vector-ref" => stdlib_vector_ref, "vector-assoc" => stdlib_vector_assoc, "vector-extend" => stdlib_vector_extend, "vector-append" => stdlib_vector_append, "vector-take" => stdlib_vector_take, "hash" => stdlib_hash, "set" => stdlib_set, "set-length" => stdlib_set_length, "set->list" => stdlib_set_to_list, "set-contains?" => stdlib_set_contains_p, "subset?" => stdlib_subset_p, "bit-and" => stdlib_bit_and, "bit-or" => stdlib_bit_or, "bit-xor" => stdlib_bit_xor, "bit-not" => stdlib_bit_not, "bit-shift-left" => stdlib_bit_shift_left, "bit-shift-right" => stdlib_bit_shift_right, "unsigned-bit-shift-right" => stdlib_unsigned_bit_shift_right }); ================================================ FILE: stdlib/rust/list.rs ================================================ use arret_runtime::binding::*; use arret_runtime::boxed; use arret_runtime::boxed::refs::Gc; use arret_runtime::callback; use arret_runtime::task::Task; #[arret_rfi_derive::rust_fun("((List & Any) -> Int)")] pub fn stdlib_length(input: Gc>) -> i64 { input.len() as i64 } #[arret_rfi_derive::rust_fun("(All #{H T} H (List & T) -> (List H & T))")] pub fn stdlib_cons( task: &mut Task, head: Gc, tail: Gc>, ) -> Gc> { boxed::Pair::new(task, head, tail) } #[arret_rfi_derive::rust_fun("(All #{I O [->_ ->!]} (I ->_ O) (List & I) ->_ (List & O))")] pub fn stdlib_map( task: &mut Task, mapper: callback::Callback< extern "C" fn(&mut Task, boxed::Captures, Gc) -> Gc, >, input: Gc>, ) -> Gc> { let output_vec: Vec> = input.iter().map(|elem| mapper.apply(task, elem)).collect(); boxed::List::new(task, output_vec.into_iter()) } #[arret_rfi_derive::rust_fun("(All #{T [->_ ->!]} (T ->_ Bool) (List & T) ->_ (List & T))")] pub fn stdlib_filter( task: &mut Task, filter: callback::Callback) -> bool>, input: Gc>, ) -> Gc> { let output_vec: Vec> = input .iter() .filter(|elem| filter.apply(task, *elem)) .collect(); boxed::List::new(task, output_vec.into_iter()) } #[arret_rfi_derive::rust_fun("(All #{T [->_ ->!]} (T ->_ Bool) (List & T) ->_ Bool)")] pub fn stdlib_some_p( task: &mut Task, pred: callback::Callback) -> bool>, input: Gc>, ) -> bool { input.iter().any(|elem| pred.apply(task, elem)) } #[arret_rfi_derive::rust_fun("(All #{T [->_ ->!]} (T ->_ Bool) (List & T) ->_ Bool)")] pub fn stdlib_every_p( task: &mut Task, pred: callback::Callback) -> bool>, input: Gc>, ) -> bool { input.iter().all(|elem| pred.apply(task, elem)) } #[arret_rfi_derive::rust_fun("(All #{I O [->_ ->!]} (O I ->_ O) O (List & I) ->_ O)")] pub fn stdlib_fold( task: &mut Task, folder: callback::Callback< extern "C" fn(&mut Task, boxed::Captures, Gc, Gc) -> Gc, >, initial: Gc, input: Gc>, ) -> Gc { input .iter() .fold(initial, |acc, elem| folder.apply(task, acc, elem)) } #[arret_rfi_derive::rust_fun("(All #{T} & (List & T) -> (List & T))")] pub fn stdlib_concat( task: &mut Task, lists: Gc>>, ) -> Gc> { let mut list_iter = lists.iter(); match list_iter.len() { 0 => boxed::List::empty(), 1 => list_iter.next().unwrap(), 2 => { // Avoid building a temporary `Vec` let head_list = list_iter.next().unwrap(); let tail_list = list_iter.next().unwrap(); boxed::List::new_with_tail(task, head_list.iter(), tail_list) } _ => { let mut head_values = vec![]; while list_iter.len() > 1 { head_values.extend(list_iter.next().unwrap().iter()); } // We can reuse our tail boxed::List::new_with_tail(task, head_values.into_iter(), list_iter.next().unwrap()) } } } #[arret_rfi_derive::rust_fun("(All #{T} Int (List & T) -> (List & T))")] pub fn stdlib_take( task: &mut Task, count: i64, input: Gc>, ) -> Gc> { let usize_count = if count < 0 { 0 } else { count as usize }; boxed::List::new(task, input.iter().take(usize_count)) } #[arret_rfi_derive::rust_fun("(All #{T} (List & T) -> (List & T))")] pub fn stdlib_reverse( task: &mut Task, input: Gc>, ) -> Gc> { let output_vec: Vec> = input.iter().collect(); boxed::List::new(task, output_vec.into_iter().rev()) } #[arret_rfi_derive::rust_fun("(All #{T} Int T -> (List & T))")] pub fn stdlib_repeat( task: &mut Task, count: i64, value: Gc, ) -> Gc> { struct RepeatIter { count: i64, value: Gc, } impl Iterator for RepeatIter { type Item = Gc; fn next(&mut self) -> Option> { if self.count > 0 { self.count -= 1; Some(self.value) } else { None } } fn size_hint(&self) -> (usize, Option) { if self.count < 0 { (0, Some(0)) } else { (self.count as usize, Some(self.count as usize)) } } } impl ExactSizeIterator for RepeatIter {} boxed::List::new(task, RepeatIter { count, value }) } ================================================ FILE: stdlib/rust/math.rs ================================================ use arret_runtime::binding::*; use arret_runtime::boxed; use arret_runtime::boxed::refs::Gc; use arret_runtime::task::Task; fn fold_float_op( task: &mut Task, operands_iter: impl Iterator>, initial_value: f64, float_reduce: FR, ) -> Gc where FR: Fn(f64, f64) -> f64, { let mut float_acc = initial_value; for operand in operands_iter { match operand.as_subtype() { boxed::NumSubtype::Int(int_ref) => { float_acc = float_reduce(float_acc, int_ref.value() as f64); } boxed::NumSubtype::Float(float_ref) => { // Convert to float and break float_acc = float_reduce(float_acc, float_ref.value()); } } } boxed::Float::new(task, float_acc) } fn fold_num_op( task: &mut Task, op_name: &'static str, mut operands_iter: impl Iterator>, initial_value: i64, int_reduce: IR, float_reduce: FR, ) -> Gc where IR: Fn(i64, i64) -> Option, FR: Fn(f64, f64) -> f64, { // Accumulate as an integer for as long as possible let mut int_acc = initial_value; while let Some(operand) = operands_iter.next() { match operand.as_subtype() { boxed::NumSubtype::Int(int_ref) => { if let Some(reduced_int) = int_reduce(int_acc, int_ref.value()) { int_acc = reduced_int; } else { task.panic(format!("attempt to {} with overflow", op_name)); } } boxed::NumSubtype::Float(float_ref) => { // Switch to float let float_acc = float_reduce(int_acc as f64, float_ref.value()); return fold_float_op(task, operands_iter, float_acc, float_reduce).as_num_ref(); } } } boxed::Int::new(task, int_acc).as_num_ref() } #[arret_rfi_derive::rust_fun("(All #{[N Num]} N & N -> N)")] pub fn stdlib_add( task: &mut Task, initial_num: Gc, rest: Gc>, ) -> Gc { use std::iter; use std::ops::Add; fold_num_op( task, "add", iter::once(initial_num).chain(rest.iter()), 0, i64::checked_add, f64::add, ) } #[arret_rfi_derive::rust_fun("(All #{[N Num]} N & N -> N)")] pub fn stdlib_mul( task: &mut Task, initial_num: Gc, rest: Gc>, ) -> Gc { use std::iter; use std::ops::Mul; fold_num_op( task, "multiply", iter::once(initial_num).chain(rest.iter()), 1, i64::checked_mul, f64::mul, ) } #[arret_rfi_derive::rust_fun("(All #{[N Num]} N & N -> N)")] pub fn stdlib_sub( task: &mut Task, initial_num: Gc, rest: Gc>, ) -> Gc { use std::ops::Sub; match initial_num.as_subtype() { boxed::NumSubtype::Int(int_ref) => { if rest.is_empty() { boxed::Int::new(task, -int_ref.value()).as_num_ref() } else { fold_num_op( task, "subtract", rest.iter(), int_ref.value(), i64::checked_sub, f64::sub, ) } } boxed::NumSubtype::Float(float_ref) => { if rest.is_empty() { boxed::Float::new(task, -float_ref.value()).as_num_ref() } else { fold_float_op(task, rest.iter(), float_ref.value(), f64::sub).as_num_ref() } } } } #[arret_rfi_derive::rust_fun("(Float & Float -> Float)")] pub fn stdlib_div(initial_float: f64, rest: Gc>) -> f64 { if rest.is_empty() { initial_float.recip() } else { let mut acc = initial_float; for operand in rest.iter() { acc /= operand.value() } acc } } #[arret_rfi_derive::rust_fun("(Int Int -> Int)")] pub fn stdlib_quot(task: &mut Task, numerator: i64, denominator: i64) -> i64 { match numerator.checked_div(denominator) { Some(result) => result, None => { task.panic("division by zero".to_owned()); unreachable!("returned from panic") } } } #[arret_rfi_derive::rust_fun("(Int Int -> Int)")] pub fn stdlib_rem(task: &mut Task, numerator: i64, denominator: i64) -> i64 { match numerator.checked_rem(denominator) { Some(result) => result, None => { task.panic("division by zero".to_owned()); unreachable!("returned from panic") } } } #[arret_rfi_derive::rust_fun("(Float -> Float)")] pub fn stdlib_sqrt(radicand: f64) -> f64 { radicand.sqrt() } ================================================ FILE: stdlib/rust/number.rs ================================================ use arret_runtime::binding::*; use arret_runtime::boxed; use arret_runtime::boxed::refs::Gc; use arret_runtime::task::Task; fn compare_nums( initial: Gc, rest: Gc>, int_comparator: IC, float_comparator: FC, ) -> bool where IC: Fn(&i64, &i64) -> bool, FC: Fn(&f64, &f64) -> bool, { let mut left = initial; for right in rest.iter() { use boxed::NumSubtype; let result = match (left.as_subtype(), right.as_subtype()) { (NumSubtype::Int(left_ref), NumSubtype::Int(right_ref)) => { int_comparator(&left_ref.value(), &right_ref.value()) } (NumSubtype::Float(left_ref), NumSubtype::Int(right_ref)) => { float_comparator(&left_ref.value(), &(right_ref.value() as f64)) } (NumSubtype::Int(left_ref), NumSubtype::Float(right_ref)) => { float_comparator(&(left_ref.value() as f64), &right_ref.value()) } (NumSubtype::Float(left_ref), NumSubtype::Float(right_ref)) => { float_comparator(&left_ref.value(), &right_ref.value()) } }; if !result { return false; } left = right; } true } #[arret_rfi_derive::rust_fun("(Num -> Float)")] pub fn stdlib_float(input: Gc) -> f64 { match input.as_subtype() { boxed::NumSubtype::Int(int_ref) => int_ref.value() as f64, boxed::NumSubtype::Float(float_ref) => float_ref.value(), } } #[arret_rfi_derive::rust_fun("(Num -> Int)")] pub fn stdlib_int(task: &mut Task, input: Gc) -> i64 { match input.as_subtype() { boxed::NumSubtype::Int(int_ref) => int_ref.value(), boxed::NumSubtype::Float(float_ref) => { let float_val = float_ref.value(); if float_val.is_nan() { task.panic(format!( "Float value `{}` is not a number; cannot convert to Int", float_val )); } else if float_val.is_infinite() { task.panic(format!( "Float value `{}` is infinite; cannot convert to Int", float_val )); } float_val as i64 } } } #[arret_rfi_derive::rust_fun("(Num & Num -> Bool)")] pub fn stdlib_num_lt(initial: Gc, rest: Gc>) -> bool { compare_nums(initial, rest, i64::lt, f64::lt) } #[arret_rfi_derive::rust_fun("(Num & Num -> Bool)")] pub fn stdlib_num_le(initial: Gc, rest: Gc>) -> bool { compare_nums(initial, rest, i64::le, f64::le) } #[arret_rfi_derive::rust_fun("(Num & Num -> Bool)")] pub fn stdlib_num_eq(initial: Gc, rest: Gc>) -> bool { compare_nums(initial, rest, i64::eq, f64::eq) } #[arret_rfi_derive::rust_fun("(Num & Num -> Bool)")] pub fn stdlib_num_gt(initial: Gc, rest: Gc>) -> bool { compare_nums(initial, rest, i64::gt, f64::gt) } #[arret_rfi_derive::rust_fun("(Num & Num -> Bool)")] pub fn stdlib_num_ge(initial: Gc, rest: Gc>) -> bool { compare_nums(initial, rest, i64::ge, f64::ge) } ================================================ FILE: stdlib/rust/read.rs ================================================ use arret_syntax::parser::datum_from_str; use arret_runtime::binding::*; use arret_runtime::boxed; use arret_runtime::boxed::refs::Gc; use arret_runtime::task::Task; use arret_runtime_syntax::reader; #[arret_rfi_derive::rust_fun("(Str -> Any)")] pub fn stdlib_read_str(task: &mut Task, edn_str: Gc) -> Gc { let parsed_syntax = datum_from_str(None, edn_str.as_str()).unwrap(); reader::box_syntax_datum(task, &parsed_syntax) } ================================================ FILE: stdlib/rust/set.rs ================================================ use arret_runtime::binding::*; use arret_runtime::boxed; use arret_runtime::boxed::refs::Gc; use arret_runtime::task::Task; #[arret_rfi_derive::rust_fun("(All #{T} & T -> (Setof T))")] pub fn stdlib_set( task: &mut Task, values: Gc>, ) -> Gc> { boxed::Set::new(task, values.iter()) } #[arret_rfi_derive::rust_fun("(All #{T} (Setof T) T -> Bool)")] pub fn stdlib_set_contains_p( task: &mut Task, set: Gc>, needle: Gc, ) -> bool { set.contains(task.heap(), &needle) } #[arret_rfi_derive::rust_fun("((Setof Any) -> Int)")] pub fn stdlib_set_length(set: Gc>) -> i64 { set.len() as i64 } #[arret_rfi_derive::rust_fun("(All #{T} (Setof T) -> (List & T))")] pub fn stdlib_set_to_list( task: &mut Task, set: Gc>, ) -> Gc> { boxed::List::new(task, set.iter()) } #[arret_rfi_derive::rust_fun("(All #{T} (Setof T) (Setof T) -> Bool)")] pub fn stdlib_subset_p( task: &mut Task, subset: Gc>, superset: Gc>, ) -> bool { subset.is_subset(task.heap(), &superset) } ================================================ FILE: stdlib/rust/testing.rs ================================================ use arret_runtime::binding::*; use arret_runtime::boxed; use arret_runtime::boxed::prelude::*; use arret_runtime::boxed::refs::Gc; use arret_runtime::callback; use arret_runtime::task::Task; #[arret_rfi_derive::rust_fun("(All #{T} T -> T)")] pub fn stdlib_black_box(value: Gc) -> Gc { value } #[arret_rfi_derive::rust_fun("(All #{T} T ->! T)")] pub fn stdlib_black_box_impure(value: Gc) -> Gc { value } #[arret_rfi_derive::rust_fun("(All #{[->_ ->!] T} (->_ T) ->_ (List Int T))")] pub fn stdlib_heap_alloc_count( task: &mut Task, block: callback::Callback Gc>, ) -> Gc> { let before_len = task.heap().len(); let ret = block.apply(task); let after_len = task.heap().len(); let alloc_count = boxed::Int::new(task, (after_len - before_len) as i64); boxed::List::new(task, [alloc_count.as_any_ref(), ret].iter().cloned()) } // TODO: This should return a `Set` once they're better supported #[arret_rfi_derive::rust_fun("((... ->! Any) -> (List & Sym))")] pub fn stdlib_fn_op_categories(_value: Gc) -> Gc> { panic!("cannot call `(fn-op-categories)` at runtime") } ================================================ FILE: stdlib/rust/vector.rs ================================================ use arret_runtime::binding::*; use arret_runtime::boxed; use arret_runtime::boxed::refs::Gc; use arret_runtime::task::Task; #[arret_rfi_derive::rust_fun("(All #{T} & T -> (Vectorof T))")] pub fn stdlib_vector( task: &mut Task, values: Gc>, ) -> Gc> { boxed::Vector::new(task, values.iter()) } #[arret_rfi_derive::rust_fun("(All #{T} (Vectorof T) Int -> T)")] pub fn stdlib_vector_ref( task: &mut Task, vector: Gc>, index: i64, ) -> Gc { let usize_index = if index < 0 { task.panic(format!("index {} is negative", index)); unreachable!("returned from panic") } else { index as usize }; match vector.get(usize_index) { Some(value) => value, None => { task.panic(format!( "index {} out of bounds for vector of length {}", usize_index, vector.len() )); unreachable!("returned from panic") } } } #[arret_rfi_derive::rust_fun("((Vectorof Any) -> Int)")] pub fn stdlib_vector_length(vector: Gc>) -> i64 { vector.len() as i64 } #[arret_rfi_derive::rust_fun("(All #{T} (Vectorof T) -> (List & T))")] pub fn stdlib_vector_to_list( task: &mut Task, vector: Gc>, ) -> Gc> { boxed::List::new(task, vector.iter()) } #[arret_rfi_derive::rust_fun("(All #{T} (Vectorof T) Int T -> (Vectorof T))")] pub fn stdlib_vector_assoc( task: &mut Task, vector: Gc>, index: i64, value: Gc, ) -> Gc> { let usize_index = if index < 0 { task.panic(format!("index {} is negative", index)); unreachable!("returned from panic") } else { index as usize }; if usize_index >= vector.len() { task.panic(format!( "index {} out of bounds for vector of length {}", usize_index, vector.len() )); unreachable!("returned from panic") } vector.assoc(task, usize_index, value) } #[arret_rfi_derive::rust_fun("(All #{T} & (Vectorof T) -> (Vectorof T))")] pub fn stdlib_vector_append( task: &mut Task, vectors: Gc>>, ) -> Gc> { let mut vectors_iter = vectors.iter(); let first_vector = if let Some(first_vector) = vectors_iter.next() { first_vector } else { return boxed::Vector::new(task, std::iter::empty()); }; vectors_iter.fold(first_vector, |v1, v2| v1.append(task, v2)) } #[arret_rfi_derive::rust_fun("(All #{T} (Vectorof T) & T -> (Vectorof T))")] pub fn stdlib_vector_extend( task: &mut Task, vector: Gc>, new_values: Gc>, ) -> Gc> { vector.extend(task, new_values.iter()) } #[arret_rfi_derive::rust_fun("(All #{T} Int (Vectorof T) -> (Vectorof T))")] pub fn stdlib_vector_take( task: &mut Task, count: i64, input: Gc>, ) -> Gc> { let usize_count = if count < 0 { 0 } else { count as usize }; input.take(task, usize_count) } ================================================ FILE: stdlib/rust/write.rs ================================================ use std::io; use std::io::prelude::*; use arret_runtime::binding::*; use arret_runtime::boxed; use arret_runtime::boxed::refs::Gc; use arret_runtime::task::Task; fn pretty_print_common( task: &mut Task, values: Gc>, output: &mut dyn Write, ) { for value in values.iter() { arret_runtime_syntax::writer::pretty_print_boxed(output, task, value); } } fn write_boxed_common( task: &mut Task, values: Gc>, output: &mut dyn Write, ) { let mut is_first = true; for value in values.iter() { if !is_first { output.write_all(&[b' ']).unwrap(); } arret_runtime_syntax::writer::write_boxed(output, task, value).unwrap(); is_first = false; } } #[arret_rfi_derive::rust_fun("(& Any ->! ())")] pub fn stdlib_print(task: &mut Task, values: Gc>) { let stdout = io::stdout(); let mut output = stdout.lock(); pretty_print_common(task, values, &mut output); } #[arret_rfi_derive::rust_fun("(& Any ->! ())")] pub fn stdlib_println(task: &mut Task, values: Gc>) { let stdout = io::stdout(); let mut output = stdout.lock(); pretty_print_common(task, values, &mut output); output.write_all(&[b'\n']).unwrap(); } #[arret_rfi_derive::rust_fun("(& Any ->! ())")] pub fn stdlib_write(task: &mut Task, values: Gc>) { let stdout = io::stdout(); let mut output = stdout.lock(); write_boxed_common(task, values, &mut output); } #[arret_rfi_derive::rust_fun("(& Any ->! ())")] pub fn stdlib_writeln(task: &mut Task, values: Gc>) { let stdout = io::stdout(); let mut output = stdout.lock(); write_boxed_common(task, values, &mut output); output.write_all(&[b'\n']).unwrap(); } #[arret_rfi_derive::rust_fun("(& Any -> Str)")] pub fn stdlib_print_str(task: &mut Task, values: Gc>) -> Gc { let mut output: Vec = vec![]; pretty_print_common(task, values, &mut output); boxed::Str::new( task, std::str::from_utf8(&output).expect("wrote invalid UTF-8"), ) } #[arret_rfi_derive::rust_fun("(& Any -> Str)")] pub fn stdlib_write_str(task: &mut Task, values: Gc>) -> Gc { let mut output: Vec = vec![]; write_boxed_common(task, values, &mut output); boxed::Str::new( task, std::str::from_utf8(&output).expect("wrote invalid UTF-8"), ) } ================================================ FILE: syntax/Cargo.toml ================================================ [package] name = "arret-syntax" version = "0.1.0" edition = "2018" authors = ["Ryan Cumming "] [lib] path = "lib.rs" crate-type = ["lib"] ================================================ FILE: syntax/anon_fun.rs ================================================ use crate::datum::Datum; use crate::error::{Error, ErrorKind, Result}; use crate::span::Span; struct FoundArity { fixed_args: u8, has_rest: bool, } /// Visits all arg literals replacing `%` with `%1` and tracking our arity fn visit_arg_literals(found_arity: &mut FoundArity, datum: Datum) -> Result { match datum { Datum::Sym(span, name) => { if let Some(arg_literal) = name.strip_prefix('%') { match arg_literal { "" => { // We need to rewrite this to %1 in case it's also referred to by that name found_arity.fixed_args = std::cmp::max(found_arity.fixed_args, 1); Ok(Datum::Sym(span, "%1".into())) } "&" => { found_arity.has_rest = true; Ok(Datum::Sym(span, name)) } other => other .parse::() .map(|parsed_number| { found_arity.fixed_args = std::cmp::max(found_arity.fixed_args, parsed_number); Datum::Sym(span, name) }) .map_err(|_| Error::new(span, ErrorKind::InvalidArgLiteral)), } } else { Ok(Datum::Sym(span, name)) } } Datum::List(span, content) => { let replaced_content = content .into_vec() .into_iter() .map(|body_datum| visit_arg_literals(found_arity, body_datum)) .collect::>>()?; Ok(Datum::List(span, replaced_content.into())) } other => Ok(other), } } /// Converts body data from a `#()` reader macro in to an anonymous function pub fn convert_anon_fun(outer_span: Span, body_data: impl Iterator) -> Result { use std::iter; let mut found_arity = FoundArity { fixed_args: 0, has_rest: false, }; let replaced_body = body_data .map(|body_datum| visit_arg_literals(&mut found_arity, body_datum)) .collect::>>()?; let mut param_list: Vec = (0..found_arity.fixed_args) .map(|param_index| { let param_ordinal = param_index + 1; Datum::Sym(outer_span, format!("%{}", param_ordinal).into()) }) .collect(); if found_arity.has_rest { param_list.extend( iter::once(Datum::Sym(outer_span, "&".into())) .chain(iter::once(Datum::Sym(outer_span, "%&".into()))), ); } let expanded_fun = vec![ Datum::Sym(outer_span, "fn".into()), Datum::List(outer_span, param_list.into()), Datum::List(outer_span, replaced_body.into()), ]; Ok(Datum::List(outer_span, expanded_fun.into())) } ///////// #[cfg(test)] mod test { use super::*; use crate::parser::data_from_str; use crate::span::t2s; #[test] fn empty_fun() { let j = ""; let t = ""; let body_data = data_from_str(None, j).unwrap(); let outer_span = t2s(t); let expected = Datum::List( outer_span, Box::new([ Datum::Sym(outer_span, "fn".into()), Datum::List(outer_span, Box::new([])), Datum::List(outer_span, Box::new([])), ]), ); assert_eq!( expected, convert_anon_fun(outer_span, body_data.into_iter()).unwrap() ); } #[test] fn one_arg_fun() { let j = "%"; let t = "^"; let body_data = data_from_str(None, j).unwrap(); let outer_span = t2s(t); let expected = Datum::List( outer_span, Box::new([ Datum::Sym(outer_span, "fn".into()), Datum::List(outer_span, Box::new([Datum::Sym(outer_span, "%1".into())])), Datum::List( outer_span, Box::new([ // This is converted to %1 Datum::Sym(t2s(t), "%1".into()), ]), ), ]), ); assert_eq!( expected, convert_anon_fun(outer_span, body_data.into_iter()).unwrap() ); } #[test] fn two_arg_fun() { let j = "%1 %2"; let t = "^^^^^"; let u = "^^ "; let v = " ^^"; let body_data = data_from_str(None, j).unwrap(); let outer_span = t2s(t); let expected = Datum::List( outer_span, Box::new([ Datum::Sym(outer_span, "fn".into()), Datum::List( outer_span, Box::new([ Datum::Sym(outer_span, "%1".into()), Datum::Sym(outer_span, "%2".into()), ]), ), Datum::List( outer_span, Box::new([ Datum::Sym(t2s(u), "%1".into()), Datum::Sym(t2s(v), "%2".into()), ]), ), ]), ); assert_eq!( expected, convert_anon_fun(outer_span, body_data.into_iter()).unwrap() ); } #[test] fn rest_fun() { let j = "%1 %&"; let t = "^^^^^"; let u = "^^ "; let v = " ^^"; let body_data = data_from_str(None, j).unwrap(); let outer_span = t2s(t); let expected = Datum::List( outer_span, Box::new([ Datum::Sym(outer_span, "fn".into()), Datum::List( outer_span, Box::new([ Datum::Sym(outer_span, "%1".into()), Datum::Sym(outer_span, "&".into()), Datum::Sym(outer_span, "%&".into()), ]), ), Datum::List( outer_span, Box::new([ Datum::Sym(t2s(u), "%1".into()), Datum::Sym(t2s(v), "%&".into()), ]), ), ]), ); assert_eq!( expected, convert_anon_fun(outer_span, body_data.into_iter()).unwrap() ); } } ================================================ FILE: syntax/datum.rs ================================================ use std::sync::Arc; use crate::span::Span; pub type DataStr = Arc; #[derive(PartialEq, Debug, Clone)] pub enum Datum { Bool(Span, bool), Char(Span, char), Int(Span, i64), Float(Span, f64), List(Span, Box<[Datum]>), Str(Span, DataStr), Sym(Span, DataStr), Vector(Span, Box<[Datum]>), Map(Span, Box<[(Datum, Datum)]>), Set(Span, Box<[Datum]>), } impl Datum { pub fn span(&self) -> Span { match self { Datum::Bool(span, _) | Datum::Char(span, _) | Datum::Int(span, _) | Datum::Float(span, _) | Datum::List(span, _) | Datum::Str(span, _) | Datum::Sym(span, _) | Datum::Vector(span, _) | Datum::Map(span, _) | Datum::Set(span, _) => *span, } } pub fn description(&self) -> &'static str { match self { Datum::Bool(_, true) => "boolean true", Datum::Bool(_, false) => "boolean false", Datum::Char(_, _) => "character", Datum::Int(_, _) => "integer", Datum::Float(_, _) => "floating point number", Datum::Str(_, _) => "string", Datum::Sym(_, name) => { if name.starts_with(':') { "keyword" } else { "symbol" } } Datum::List(_, vs) if vs.is_empty() => "empty list", Datum::List(_, _) => "list", Datum::Vector(_, vs) if vs.is_empty() => "empty vector", Datum::Vector(_, _) => "vector", Datum::Set(_, vs) if vs.is_empty() => "empty set", Datum::Set(_, _) => "set", Datum::Map(_, vs) if vs.is_empty() => "empty map", Datum::Map(_, _) => "map", } } } ================================================ FILE: syntax/error.rs ================================================ use std::error; use std::fmt; use std::fmt::Display; use std::result; use crate::span::Span; /// (Spanned)[`Span`] syntax error #[derive(Debug, Clone, PartialEq)] pub struct Error { span: Span, pub(crate) kind: ErrorKind, } impl Error { pub fn new(span: Span, kind: ErrorKind) -> Error { Error { span, kind } } pub fn kind(&self) -> &ErrorKind { &self.kind } pub fn span(&self) -> Span { self.span } } impl error::Error for Error {} impl Display for Error { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.write_str(&self.kind().message()) } } /// Syntax error without (span)[`Span`] information #[derive(Debug, Clone, PartialEq)] pub enum ErrorKind { Eof(WithinContext), UnsupportedDispatch, UnsupportedChar, InvalidCodePoint, UnsupportedStringEscape, IntegerOverflow, InvalidFloat, UnexpectedChar(char, WithinContext), UnevenMap, InvalidArgLiteral, } impl ErrorKind { /// Returns a string describing the error pub fn message(&self) -> String { match self { ErrorKind::Eof(ref within) => format!( "unexpected end of file while parsing {}", within.description() ), ErrorKind::UnsupportedDispatch => "unsupported dispatch".to_owned(), ErrorKind::UnsupportedChar => "unsupported character".to_owned(), ErrorKind::InvalidCodePoint => "invalid code point".to_owned(), ErrorKind::UnsupportedStringEscape => "unsupported string escape".to_owned(), ErrorKind::IntegerOverflow => "integer literal does not fit in i64".to_owned(), ErrorKind::InvalidFloat => "unable to parse float".to_owned(), ErrorKind::UnexpectedChar(c, within) => { format!("unexpected `{}` while parsing {}", c, within.description()) } ErrorKind::UnevenMap => "map literal must have an even number of values".to_owned(), ErrorKind::InvalidArgLiteral => { "arg literal must be `%`, `%{integer}` or `%&`".to_owned() } } } /// Returns the context this error was encountered in /// /// This is used to disambiguate errors that can occur in multiple contexts. Other error types /// (e.g. `InvalidFloat`) require no additional context. pub fn within_context(&self) -> Option { match self { ErrorKind::Eof(within) | ErrorKind::UnexpectedChar(_, within) => Some(*within), _ => None, } } } pub type Result = result::Result; /// Describes the content an error occurred within, with optional starting span #[derive(Debug, PartialEq, Clone, Copy)] pub enum WithinContext { List(Span), Vector(Span), Set(Span), Map(Span), String(Span), Identifier, Datum, Dispatch, QuoteEscape, CodePoint, } impl WithinContext { /// Returns a description of the content that was being parsed pub fn description(&self) -> &'static str { match self { WithinContext::List(_) => "list", WithinContext::Vector(_) => "vector", WithinContext::Set(_) => "set", WithinContext::Map(_) => "map", WithinContext::String(_) => "string literal", WithinContext::Identifier => "identifier", WithinContext::Datum => "datum", WithinContext::Dispatch => "dispatch", WithinContext::QuoteEscape => "quote escape", WithinContext::CodePoint => "code point", } } /// Returns the normally expected in this context pub fn expected_next(&self) -> Option { match self { WithinContext::List(_) => Some(ExpectedNext::List), WithinContext::Vector(_) => Some(ExpectedNext::Vector), WithinContext::Set(_) => Some(ExpectedNext::Set), WithinContext::Map(_) => Some(ExpectedNext::Map), WithinContext::String(_) => Some(ExpectedNext::String), _ => None, } } /// Returns the character opening the sequence or string pub fn open_char_span(&self) -> Option { match self { WithinContext::List(span) | WithinContext::Vector(span) | WithinContext::Set(span) | WithinContext::Map(span) | WithinContext::String(span) => Some(*span), _ => None, } } } /// Describes the content normally expected within the content #[derive(Debug, PartialEq, Clone, Copy)] pub enum ExpectedNext { List, Vector, Set, Map, String, } impl ExpectedNext { /// Returns the character that would terminate this sequence or string pub fn close_char(self) -> char { match self { ExpectedNext::List => ')', ExpectedNext::Vector => ']', ExpectedNext::Set => '}', ExpectedNext::Map => '}', ExpectedNext::String => '"', } } pub fn description(self) -> String { match self { ExpectedNext::String => "expected `\"`".to_owned(), other => format!("expected datum or `{}`", other.close_char()), } } } ================================================ FILE: syntax/lib.rs ================================================ #![warn(clippy::all)] #![warn(rust_2018_idioms)] mod anon_fun; pub mod datum; pub mod error; pub mod parser; pub mod span; ================================================ FILE: syntax/parser.rs ================================================ use crate::datum::Datum; use crate::error::{Error, ErrorKind, Result, WithinContext}; use crate::span::{ByteIndex, FileId, Span}; pub fn data_from_str_with_span_offset( file_id: Option, s: &str, span_offset: ByteIndex, ) -> Result> { Parser::from_str(file_id, s, span_offset).parse_data() } pub fn data_from_str(file_id: Option, s: &str) -> Result> { data_from_str_with_span_offset(file_id, s, 0) } pub fn datum_from_str_with_span_offset( file_id: Option, s: &str, span_offset: ByteIndex, ) -> Result { Parser::from_str(file_id, s, span_offset).parse_datum() } pub fn datum_from_str(file_id: Option, s: &str) -> Result { datum_from_str_with_span_offset(file_id, s, 0) } fn is_whitespace(c: char) -> bool { matches!(c, ',' | ' ' | '\n' | '\t' | '\r') } pub fn is_identifier_char(c: char) -> bool { matches!(c, 'A'..='Z' | 'a'..='z' | '0'..='9' | // Punctuation allowed at beginning of an identifier '.' | '*' | '+' | '!' | '-' | '_' | '?' | '$' | '%' | '&' | '=' | '<' | '>' | ':' | // Punctuation allowed anywhere '#' | // We don't support namespacing so we treat this as a normal char '/' ) } pub struct Parser<'input> { file_id: Option, input: &'input str, consumed_bytes: ByteIndex, } impl<'input> Parser<'input> { fn from_str(file_id: Option, input: &'input str, span_offset: ByteIndex) -> Self { Parser { file_id, input, consumed_bytes: span_offset, } } fn eof_err(&self, within: WithinContext) -> Error { let eof_pos = self.consumed_bytes + (self.input.len() as ByteIndex); Error::new( Span::new(self.file_id, eof_pos, eof_pos), ErrorKind::Eof(within), ) } fn peek_char(&mut self, within: WithinContext) -> Result { self.input .chars() .next() .ok_or_else(|| self.eof_err(within)) } fn peek_nth_char(&mut self, i: usize, within: WithinContext) -> Result { self.input .chars() .nth(i) .ok_or_else(|| self.eof_err(within)) } fn eat_bytes(&mut self, count: usize) { self.input = &self.input[count..]; self.consumed_bytes += count as ByteIndex; } fn consume_char(&mut self, within: WithinContext) -> Result { let mut char_indices = self.input.char_indices(); match char_indices.next() { Some((_, c)) => { let next_index = char_indices .next() .map(|t| t.0) .unwrap_or_else(|| self.input.len()); self.eat_bytes(next_index); Ok(c) } None => Err(self.eof_err(within)), } } fn skip_until_non_whitespace(&mut self, within: WithinContext) -> Result { loop { self.consume_while(is_whitespace); match self.peek_char(within)? { ';' => { self.consume_until(|c| c == '\n'); } '#' => { match self.peek_nth_char(1, within) { Ok('_') => { // Discard the #_ and the following datum self.eat_bytes(2); self.parse_datum()?; } _ => { break Ok('#'); } } } other => { break Ok(other); } } } } fn consume_until(&mut self, predicate: T) -> (Span, &str) where T: FnMut(char) -> bool, { let start = self.consumed_bytes; let last_index = self .input .find(predicate) .unwrap_or_else(|| self.input.len()); let (consumed, remaining_input) = self.input.split_at(last_index); self.input = remaining_input; self.consumed_bytes += last_index as ByteIndex; ( Span::new(self.file_id, start, self.consumed_bytes), consumed, ) } fn consume_while(&mut self, mut predicate: T) -> (Span, &str) where T: FnMut(char) -> bool, { self.consume_until(|c| !predicate(c)) } fn capture_span(&mut self, block: F) -> (Span, R) where F: FnOnce(&mut Parser<'_>) -> R, { let start = self.consumed_bytes; let result = block(self); let end = self.consumed_bytes; (Span::new(self.file_id, start, end), result) } fn parse_num(&mut self) -> Result { enum State { Sign, Whole, Fractional, } let mut state: State = State::Sign; let (span, digits) = self.consume_while(|c| match state { State::Sign => match c { '+' | '-' | '0'..='9' => { state = State::Whole; true } _ => false, }, State::Whole => match c { '.' => { state = State::Fractional; true } '0'..='9' => true, _ => false, }, State::Fractional => matches!(c, '0'..='9'), }); match state { State::Sign => Err(Error::new(span, ErrorKind::InvalidFloat)), State::Whole => digits .parse::() .map_err(|_| Error::new(span, ErrorKind::IntegerOverflow)) .map(|i| Datum::Int(span, i)), State::Fractional => digits .parse::() .map_err(|_| Error::new(span, ErrorKind::InvalidFloat)) .map(|f| Datum::Float(span, f)), } } fn parse_symbolic_float(&mut self) -> Result { let (span, symbolic_name) = self.consume_while(is_identifier_char); let float_value = match symbolic_name { "#NaN" => std::f64::NAN, "#Inf" => std::f64::INFINITY, "#-Inf" => std::f64::NEG_INFINITY, _ => { return Err(Error::new(span, ErrorKind::UnsupportedDispatch)); } }; Ok(Datum::Float( // Cover the initial # Span::new(self.file_id, span.start() - 1, span.end()), float_value, )) } fn parse_signed_num_or_symbol(&mut self) -> Result { match self.peek_nth_char(1, WithinContext::Identifier) { Ok(digit) if digit.is_ascii_digit() => self.parse_num(), Ok(_) | Err(Error { kind: ErrorKind::Eof(_), .. }) => self.parse_identifier(WithinContext::Identifier), Err(other) => Err(other), } } fn parse_char(&mut self) -> Result { let (span, c) = self.capture_span(|s| { // Consume the \ s.eat_bytes(1); // Consume the character name let (span, char_name) = s.consume_until(|c| c == ')' || c == ']' || c == '}' || is_whitespace(c)); let mut char_name_chars = char_name.chars(); if let Some(first_char) = char_name_chars.next() { if char_name_chars.next().is_none() { // There is only a single character; return it return Ok(first_char); } if first_char == 'u' { // This is a hex code point let hex_string = &char_name[1..]; let code_point = u32::from_str_radix(hex_string, 16) .map_err(|_| Error::new(span, ErrorKind::UnsupportedChar))?; return std::char::from_u32(code_point) .ok_or_else(|| Error::new(span, ErrorKind::InvalidCodePoint)); } } match char_name { "newline" => Ok('\n'), "return" => Ok('\r'), "space" => Ok(' '), "tab" => Ok('\t'), _ => Err(Error::new(span, ErrorKind::UnsupportedChar)), } }); c.map(|c| Datum::Char(span, c)) } fn parse_dispatch(&mut self) -> Result { // Consume the # // This means we need to adjust our spans below to cover it for reporting self.eat_bytes(1); match self.peek_char(WithinContext::Dispatch)? { '{' => self.parse_set(), '(' => self.parse_anon_fun(), '#' => self.parse_symbolic_float(), _ => { let (span, _) = self.capture_span(|s| s.consume_char(WithinContext::Dispatch)); Err(Error::new( Span::new(self.file_id, span.start() - 1, span.end()), ErrorKind::UnsupportedDispatch, )) } } } fn parse_seq(&mut self, terminator: char, make_ec: F) -> Result> where F: FnOnce(Span) -> WithinContext, { // Consume the opening bracket let (open_bracket_span, _) = self.capture_span(|s| { s.eat_bytes(1); }); let ec = make_ec(open_bracket_span); let mut content = Vec::new(); // Keep eating data until we hit the terminator loop { let next_char = self.skip_until_non_whitespace(ec)?; if next_char == terminator { // End of the sequence self.eat_bytes(1); break Ok(content); } else { content.push(self.parse_datum_starting_with(next_char, ec)?); } } } fn parse_list(&mut self) -> Result { let (outer_span, contents) = self.capture_span(|s| s.parse_seq(')', WithinContext::List)); contents.map(|contents| Datum::List(outer_span, contents.into())) } fn parse_vector(&mut self) -> Result { let (outer_span, contents) = self.capture_span(|s| s.parse_seq(']', WithinContext::Vector)); contents.map(|contents| Datum::Vector(outer_span, contents.into())) } fn parse_map(&mut self) -> Result { // First get the contents without splitting pairwise let (span, unpaired_contents) = self.capture_span(|s| s.parse_seq('}', WithinContext::Map)); let unpaired_contents = unpaired_contents?; if unpaired_contents.len() % 2 == 1 { return Err(Error::new(span, ErrorKind::UnevenMap)); } let mut paired_contents = Vec::with_capacity(unpaired_contents.len() / 2); let mut unpaired_contents_iter = unpaired_contents.into_iter(); while let Some(key) = unpaired_contents_iter.next() { let value = unpaired_contents_iter.next().unwrap(); paired_contents.push((key, value)); } Ok(Datum::Map(span, paired_contents.into_boxed_slice())) } fn parse_set(&mut self) -> Result { let (outer_span, contents) = self.capture_span(|s| s.parse_seq('}', WithinContext::Set)); contents.map(|contents| { Datum::Set( // Cover the # in our span Span::new(self.file_id, outer_span.start() - 1, outer_span.end()), contents.into(), ) }) } fn parse_anon_fun(&mut self) -> Result { use crate::anon_fun::convert_anon_fun; let (outer_span, body_contents) = self.capture_span(|s| s.parse_seq(')', WithinContext::List)); let body_contents = body_contents?; convert_anon_fun( // Cover the # in our span Span::new(self.file_id, outer_span.start() - 1, outer_span.end()), body_contents.into_iter(), ) } fn parse_quote_escape(&mut self) -> Result { let escape_start = self.consumed_bytes as ByteIndex; match self.consume_char(WithinContext::QuoteEscape)? { 't' => Ok('\t'), 'r' => Ok('\r'), 'n' => Ok('\n'), '\\' => Ok('\\'), '"' => Ok('"'), 'x' => { let (span, hex_string) = self.consume_until(|c| c == ';'); let code_point = u32::from_str_radix(hex_string, 16); let code_point = code_point.map_err(|_| Error::new(span, ErrorKind::UnsupportedChar))?; if self.consume_char(WithinContext::CodePoint)? != ';' { return Err(Error::new(span, ErrorKind::UnsupportedChar)); } std::char::from_u32(code_point) .ok_or_else(|| Error::new(span, ErrorKind::InvalidCodePoint)) } _ => Err(Error::new( Span::new(self.file_id, escape_start, self.consumed_bytes), ErrorKind::UnsupportedStringEscape, )), } } fn parse_string(&mut self) -> Result { let (span, contents) = self.capture_span(|s| { let (open_quote_span, _) = s.capture_span(|s| { // Eat the opening quote s.eat_bytes(1); }); let mut contents = String::new(); loop { let (_, unescaped_contents) = s.consume_until(|c| c == '"' || c == '\\'); contents.push_str(unescaped_contents); match s.consume_char(WithinContext::String(open_quote_span))? { '"' => { return Ok(contents); } '\\' => contents.push(s.parse_quote_escape()?), _ => { unreachable!("Shouldn't be here"); } } } }); contents.map(|contents| Datum::Str(span, contents.into())) } fn parse_identifier(&mut self, within: WithinContext) -> Result { let (span, content) = self.consume_while(is_identifier_char); if content.is_empty() { let (span, next_char) = self.capture_span(|s| s.consume_char(within)); return Err(Error::new( span, ErrorKind::UnexpectedChar(next_char?, within), )); } match content { "true" => Ok(Datum::Bool(span, true)), "false" => Ok(Datum::Bool(span, false)), _ => Ok(Datum::Sym(span, content.into())), } } fn parse_symbol_shorthand(&mut self, expansion: &str) -> Result { let (outer_span, (shorthand_span, quoted_datum)) = self.capture_span(|s| { let (shorthand_span, _) = s.capture_span(|s| { // Discard the shorthand. Note this must be ASCII. s.eat_bytes(1); }); (shorthand_span, s.parse_datum()) }); quoted_datum.map(|quoted_datum| { Datum::List( outer_span, Box::new([Datum::Sym(shorthand_span, expansion.into()), quoted_datum]), ) }) } fn parse_datum_starting_with(&mut self, c: char, within: WithinContext) -> Result { match c { '(' => self.parse_list(), '[' => self.parse_vector(), '{' => self.parse_map(), '0'..='9' => self.parse_num(), '-' | '+' => self.parse_signed_num_or_symbol(), '\'' => self.parse_symbol_shorthand("quote"), '"' => self.parse_string(), '\\' => self.parse_char(), '#' => self.parse_dispatch(), _ => self.parse_identifier(within), } } fn parse_datum(&mut self) -> Result { let ec = WithinContext::Datum; let start_char = self.skip_until_non_whitespace(ec)?; self.parse_datum_starting_with(start_char, ec) } fn parse_data(&mut self) -> Result> { let mut datum_vec = Vec::new(); // Keep eating datums until we hit EOF loop { match self.parse_datum() { Ok(datum) => { datum_vec.push(datum); } Err(err) if err.kind() == &ErrorKind::Eof(WithinContext::Datum) => { break Ok(datum_vec) } Err(err) => break Err(err), } } } } ///////// #[allow(clippy::many_single_char_names)] #[cfg(test)] mod test { use super::*; use crate::span::t2s; fn whole_str_span(v: &str) -> Span { Span::new(None, 0, v.len() as ByteIndex) } #[test] fn bool_datum() { let j = "false"; let t = "^^^^^"; let expected = Datum::Bool(t2s(t), false); assert_eq!(expected, datum_from_str(None, j).unwrap()); let j = "true"; let t = "^^^^"; let expected = Datum::Bool(t2s(t), true); assert_eq!(expected, datum_from_str(None, j).unwrap()); let j = " false"; let t = " ^^^^^"; let expected = Datum::Bool(t2s(t), false); assert_eq!(expected, datum_from_str(None, j).unwrap()); let j = "\ttrue\t"; let t = "\t^^^^\t"; let expected = Datum::Bool(t2s(t), true); assert_eq!(expected, datum_from_str(None, j).unwrap()); let j = " trueorfalse "; let t = " ^^^^^^^^^^^ "; let expected = Datum::Sym(t2s(t), "trueorfalse".into()); assert_eq!(expected, datum_from_str(None, j).unwrap()); } #[test] fn list_datum() { let j = "() ; with a comment"; let t = "^^ "; let expected = Datum::List(t2s(t), Box::new([])); assert_eq!(expected, datum_from_str(None, j).unwrap()); let j = "( true false )"; let t = "^^^^^^^^^^^^^^^^"; let u = " ^^^^ "; let v = " ^^^^^ "; let expected = Datum::List( t2s(t), Box::new([Datum::Bool(t2s(u), true), Datum::Bool(t2s(v), false)]), ); assert_eq!(expected, datum_from_str(None, j).unwrap()); let j = "(1, 2, (3))"; let t = "^^^^^^^^^^^"; let u = " ^ "; let v = " ^ "; let w = " ^^^ "; let x = " ^ "; let expected = Datum::List( t2s(t), Box::new([ Datum::Int(t2s(u), 1), Datum::Int(t2s(v), 2), Datum::List(t2s(w), Box::new([Datum::Int(t2s(x), 3)])), ]), ); assert_eq!(expected, datum_from_str(None, j).unwrap()); let j = "(true"; let t = " >"; let u = "^ "; let err = Error::new(t2s(t), ErrorKind::Eof(WithinContext::List(t2s(u)))); assert_eq!(err, datum_from_str(None, j).unwrap_err()); let j = ")"; let t = "^"; let err = Error::new(t2s(t), ErrorKind::UnexpectedChar(')', WithinContext::Datum)); assert_eq!(err, datum_from_str(None, j).unwrap_err()); let j = "(]"; let t = "^ "; let u = " ^"; let err = Error::new( t2s(u), ErrorKind::UnexpectedChar(']', WithinContext::List(t2s(t))), ); assert_eq!(err, datum_from_str(None, j).unwrap_err()); } #[test] fn vector_datum() { let j = " []"; let t = " ^^"; let expected = Datum::Vector(t2s(t), Box::new([])); assert_eq!(expected, datum_from_str(None, j).unwrap()); let j = "[ true (true false) ]"; let t = "^^^^^^^^^^^^^^^^^^^^^^^"; let u = " ^^^^ "; let v = " ^^^^^^^^^^^^ "; let w = " ^^^^ "; let x = " ^^^^^ "; let expected = Datum::Vector( t2s(t), Box::new([ Datum::Bool(t2s(u), true), Datum::List( t2s(v), Box::new([Datum::Bool(t2s(w), true), Datum::Bool(t2s(x), false)]), ), ]), ); assert_eq!(expected, datum_from_str(None, j).unwrap()); let j = "[true []"; let t = " >"; let u = "^ "; let err = Error::new(t2s(t), ErrorKind::Eof(WithinContext::Vector(t2s(u)))); assert_eq!(err, datum_from_str(None, j).unwrap_err()); let j = "]"; let t = "^"; let err = Error::new(t2s(t), ErrorKind::UnexpectedChar(']', WithinContext::Datum)); assert_eq!(err, datum_from_str(None, j).unwrap_err()); } #[test] fn symbol_datum() { for &test_symbol in &[ "HELLO", "HELLO123", "predicate?", "mutate!", "from->to", "!$%&*+-./:<=>?", // These are nearly numbers ".", "+", "+.", "+.5", "-", "-.", "-.5", ] { let s = whole_str_span(test_symbol); let expected = Datum::Sym(s, test_symbol.into()); assert_eq!(expected, datum_from_str(None, test_symbol).unwrap()); } } #[test] fn keyword_symbol_datum() { for &test_symbol in &[":HELLO", ":HELLO123", ":predicate?", ":mutate!"] { let s = whole_str_span(test_symbol); let expected = Datum::Sym(s, test_symbol.into()); assert_eq!(expected, datum_from_str(None, test_symbol).unwrap()); } } #[test] fn string_datum() { let test_strings = [ (r#""""#, ""), (r#""Hello, world!""#, "Hello, world!"), (r#""Hello\"World""#, "Hello\"World"), (r#""Hello\\World""#, "Hello\\World"), (r#""Tab\t""#, "Tab\t"), (r#""\nnewline""#, "\nnewline"), (r#""carriage: \r""#, "carriage: \r"), (r#""Space\x20;Bar""#, "Space Bar"), (r#""l\x03BB;""#, "l\u{03bb}"), (r#""\x0;null!""#, "\u{0000}null!"), ( r#""The word \"recursion\" has many meanings.""#, r#"The word "recursion" has many meanings."#, ), ]; for (test_string, expected_contents) in &test_strings { let s = whole_str_span(test_string); let expected = Datum::Str(s, (*expected_contents).into()); assert_eq!(expected, datum_from_str(None, test_string).unwrap()); } let j = r#" "foo "#; let t = r#" >"#; let u = r#" ^ "#; let err = Error::new(t2s(t), ErrorKind::Eof(WithinContext::String(t2s(u)))); assert_eq!(err, datum_from_str(None, j).unwrap_err()); let j = r#""\p""#; let t = r#" ^ "#; let err = Error::new(t2s(t), ErrorKind::UnsupportedStringEscape); assert_eq!(err, datum_from_str(None, j).unwrap_err()); } #[test] fn char_datum() { let test_chars = [ ("\\newline", '\u{0a}'), ("\\return", '\u{0d}'), ("\\space", '\u{20}'), ("\\tab", '\u{09}'), ("\\a", 'a'), ("\\A", 'A'), ("\\(", '('), ("\\☃", '\u{2603}'), ("\\u03BB", '\u{03bb}'), ]; for (j, expected_char) in &test_chars { let s = whole_str_span(j); let expected = Datum::Char(s, *expected_char); assert_eq!(expected, datum_from_str(None, j).unwrap()); } let j = r#"\SPACE"#; let t = r#" ^^^^^"#; let err = Error::new(t2s(t), ErrorKind::UnsupportedChar); assert_eq!(err, datum_from_str(None, j).unwrap_err()); let j = r#"\u110000"#; let t = r#" ^^^^^^^"#; let err = Error::new(t2s(t), ErrorKind::InvalidCodePoint); assert_eq!(err, datum_from_str(None, j).unwrap_err()); let j = r#"[\newline]"#; let t = r#" ^^^^^^^^ "#; let expected = Datum::Vector(whole_str_span(j), Box::new([Datum::Char(t2s(t), '\n')])); assert_eq!(expected, datum_from_str(None, j).unwrap()); } #[test] fn int_datum() { let test_ints = [ ("0", 0), ("000", 0), ("1000", 1000), ("+1000", 1000), ("-1000", -1000), ("9223372036854775807", 9223372036854775807), ("+9223372036854775807", 9223372036854775807), ("-9223372036854775808", -9223372036854775808), ]; for &(j, expected_int) in &test_ints { let s = whole_str_span(j); let expected = Datum::Int(s, expected_int); assert_eq!(expected, datum_from_str(None, j).unwrap()); } let j = "10223372036854775807"; let t = "^^^^^^^^^^^^^^^^^^^^"; let err = Error::new(t2s(t), ErrorKind::IntegerOverflow); assert_eq!(err, datum_from_str(None, j).unwrap_err()); let j = "-10223372036854775807"; let t = "^^^^^^^^^^^^^^^^^^^^^"; let err = Error::new(t2s(t), ErrorKind::IntegerOverflow); assert_eq!(err, datum_from_str(None, j).unwrap_err()); let j = "4545894549584910223372036854775807"; let t = "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^"; let err = Error::new(t2s(t), ErrorKind::IntegerOverflow); assert_eq!(err, datum_from_str(None, j).unwrap_err()); } #[test] fn float_datum() { let test_floats = [ ("0.", 0.0), ("0.0", 0.0), ("000.000", 0.0), ("+16.", 16.0), ("+16.5", 16.5), ("+016.500", 16.5), ("-32.", -32.0), ("-32.25", -32.25), ("-032.2500", -32.25), ("##Inf", std::f64::INFINITY), ("##-Inf", std::f64::NEG_INFINITY), ]; for &(j, expected_float) in &test_floats { let s = whole_str_span(j); let expected = Datum::Float(s, expected_float); assert_eq!(expected, datum_from_str(None, j).unwrap()); } // This can't be compared using normal equality if let Datum::Float(_, f) = datum_from_str(None, "##NaN").unwrap() { assert!(f.is_nan()); } else { panic!("Expected ##NaN to parse as float"); } } #[test] fn map_datum() { let j = "{}"; let t = "^^"; let expected = Datum::Map(t2s(t), Box::new([])); assert_eq!(expected, datum_from_str(None, j).unwrap()); let j = "{ 1,2 ,, 3 4}"; let t = "^^^^^^^^^^^^^^"; let u = " ^ "; let v = " ^ "; let w = " ^ "; let x = " ^ "; let expected_contents = Box::new([ (Datum::Int(t2s(u), 1), Datum::Int(t2s(v), 2)), (Datum::Int(t2s(w), 3), Datum::Int(t2s(x), 4)), ]); let expected = Datum::Map(t2s(t), expected_contents); assert_eq!(expected, datum_from_str(None, j).unwrap()); let j = "{1 {2 3}}"; let t = "^^^^^^^^^"; let u = " ^ "; let v = " ^^^^^ "; let w = " ^ "; let x = " ^ "; let inner_contents = Box::new([(Datum::Int(t2s(w), 2), Datum::Int(t2s(x), 3))]); let inner = Datum::Map(t2s(v), inner_contents); let outer_contents = Box::new([(Datum::Int(t2s(u), 1), inner)]); let expected = Datum::Map(t2s(t), outer_contents); assert_eq!(expected, datum_from_str(None, j).unwrap()); let j = "{1}"; let t = "^^^"; let err = Error::new(t2s(t), ErrorKind::UnevenMap); assert_eq!(err, datum_from_str(None, j).unwrap_err()); } #[test] fn set_datum() { let j = "#{}"; let t = "^^^"; let expected = Datum::Set(t2s(t), Box::new([])); assert_eq!(expected, datum_from_str(None, j).unwrap()); let j = "#{ 1 2 3 4}"; let t = "^^^^^^^^^^^^"; let u = " ^ "; let v = " ^ "; let w = " ^ "; let x = " ^ "; let expected_contents = Box::new([ Datum::Int(t2s(u), 1), Datum::Int(t2s(v), 2), Datum::Int(t2s(w), 3), Datum::Int(t2s(x), 4), ]); let expected = Datum::Set(t2s(t), expected_contents); assert_eq!(expected, datum_from_str(None, j).unwrap()); let j = "#{1 #{2 3}}"; let t = "^^^^^^^^^^^"; let u = " ^ "; let v = " ^^^^^^ "; let w = " ^ "; let x = " ^ "; let inner_contents = Box::new([(Datum::Int(t2s(w), 2)), (Datum::Int(t2s(x), 3))]); let inner = Datum::Set(t2s(v), inner_contents); let outer_contents = Box::new([Datum::Int(t2s(u), 1), inner]); let expected = Datum::Set(t2s(t), outer_contents); assert_eq!(expected, datum_from_str(None, j).unwrap()); } #[test] fn quote_shorthand() { let j = "'foo"; let t = "^^^^"; let u = "^ "; let v = " ^^^"; let expected = Datum::List( t2s(t), Box::new([ Datum::Sym(t2s(u), "quote".into()), Datum::Sym(t2s(v), "foo".into()), ]), ); assert_eq!(expected, datum_from_str(None, j).unwrap()); let j = "' (1 2 3)"; let t = "^^^^^^^^^"; let u = "^ "; let v = " ^^^^^^^"; let w = " ^ "; let x = " ^ "; let y = " ^ "; let expected = Datum::List( t2s(t), Box::new([ Datum::Sym(t2s(u), "quote".into()), Datum::List( t2s(v), Box::new([ Datum::Int(t2s(w), 1), Datum::Int(t2s(x), 2), Datum::Int(t2s(y), 3), ]), ), ]), ); assert_eq!(expected, datum_from_str(None, j).unwrap()); let j = "'"; let t = ">"; let err = Error::new(t2s(t), ErrorKind::Eof(WithinContext::Datum)); assert_eq!(err, datum_from_str(None, j).unwrap_err()); } #[test] fn unsupported_dispatch() { let j = r#"#loop"#; let t = r#"^^ "#; let err = Error::new(t2s(t), ErrorKind::UnsupportedDispatch); assert_eq!(err, datum_from_str(None, j).unwrap_err()); let j = "#"; let t = ">"; let err = Error::new(t2s(t), ErrorKind::Eof(WithinContext::Dispatch)); assert_eq!(err, datum_from_str(None, j).unwrap_err()); } #[test] fn datum_comment() { let j = "(Hello #_(you jerk))"; let t = "^^^^^^^^^^^^^^^^^^^^"; let u = " ^^^^^ "; let expected = Datum::List(t2s(t), Box::new([Datum::Sym(t2s(u), "Hello".into())])); assert_eq!(expected, datum_from_str(None, j).unwrap()); let j = "(Hello #_ you jerk)"; let t = "^^^^^^^^^^^^^^^^^^^^"; let u = " ^^^^^ "; let v = " ^^^^ "; let expected = Datum::List( t2s(t), Box::new([ Datum::Sym(t2s(u), "Hello".into()), Datum::Sym(t2s(v), "jerk".into()), ]), ); assert_eq!(expected, datum_from_str(None, j).unwrap()); } #[test] fn multiple_data() { let j = " 1 #_two 3 "; let t = " ^ "; let u = " ^ "; let expected = vec![Datum::Int(t2s(t), 1), Datum::Int(t2s(u), 3)]; assert_eq!(expected, data_from_str(None, j).unwrap()); let j = "(true)))"; let t = " ^ "; let err = Error::new(t2s(t), ErrorKind::UnexpectedChar(')', WithinContext::Datum)); assert_eq!(err, data_from_str(None, j).unwrap_err()); let j = "(true"; let t = " >"; let u = "^ "; let err = Error::new(t2s(t), ErrorKind::Eof(WithinContext::List(t2s(u)))); assert_eq!(err, data_from_str(None, j).unwrap_err()); } } ================================================ FILE: syntax/span.rs ================================================ use std::num::NonZeroU32; use std::ops::Range; pub type FileId = NonZeroU32; pub type ByteIndex = u32; #[derive(Debug, Clone, Copy, PartialEq)] pub struct Span { file_id: Option, start: ByteIndex, end: ByteIndex, } impl Span { pub const fn new(file_id: Option, start: ByteIndex, end: ByteIndex) -> Self { Self { file_id, start, end, } } pub const fn from_str(file_id: Option, s: &str) -> Self { Self { file_id, start: 0, end: s.len() as ByteIndex, } } pub fn file_id(&self) -> Option { self.file_id } pub fn start(&self) -> ByteIndex { self.start } pub fn end(&self) -> ByteIndex { self.end } pub fn byte_range(&self) -> Range { self.start as usize..self.end as usize } pub fn contains(&self, other: Span) -> bool { self.file_id == other.file_id && self.start() <= other.start() && self.end() >= other.end() } } // This isn't #[cfg(test)] because it's used in other crates pub fn t2s(v: &str) -> Span { let (start, end) = if v.is_empty() { // Used for empty files (0, 0) } else if let Some(zero_size_off) = v.find('>') { let byte_pos = (zero_size_off + 1) as ByteIndex; (byte_pos, byte_pos) } else { let start = v.find('^').expect("Positioning character not found") as ByteIndex; let end = v.rfind('^').map(|i| i + 1).unwrap() as ByteIndex; (start, end) }; Span::new(None, start, end) }