Repository: HigherOrderCO/HVM3
Branch: main
Commit: fba2e9c82faf
Files: 99
Total size: 284.8 KB

Directory structure:
gitextract_w2c5eac0/

├── .github/
│   ├── ISSUE_TEMPLATE/
│   │   ├── bug_report.yml
│   │   ├── config.yml
│   │   └── feature_request.md
│   └── workflows/
│       └── CI.yml
├── .gitignore
├── CLAUDE.md
├── HVM.cabal
├── HVM.md
├── IC.md
├── INTERS.md
├── LICENSE
├── MODES.md
├── README.md
├── app/
│   └── Main.hs
├── bug.hvm
├── cabal.project
├── examples/
│   ├── _test_.js
│   ├── bench_cnots.hvm
│   ├── bench_count.hs
│   ├── bench_count.hvm
│   ├── bench_sum_range.hs
│   ├── bench_sum_range.hvm
│   ├── bench_sum_range.py
│   ├── enum_1D_match.hvm
│   ├── enum_bin.hvm
│   ├── enum_coc_smart.hvm
│   ├── enum_invert_add.hvm
│   ├── enum_lam_naive_blc.hs
│   ├── enum_lam_naive_blc.hvm
│   ├── enum_lam_smart.hvm
│   ├── enum_nat.hvm
│   ├── enum_path_finder.hvm
│   ├── enum_primes.hs
│   ├── enum_primes.hvm
│   ├── feat_affine_ctx.hvm
│   ├── feat_cmul.hvm
│   ├── feat_hoas.hvm
│   ├── feat_mut_ref.hvm
│   ├── fuse_inc.hvm
│   ├── fuse_inc.hvm1
│   ├── fuse_mul.hvm
│   ├── fuse_rot.hvm
│   └── main.hvm
└── src/
    └── HVM/
        ├── API.hs
        ├── Adjust.hs
        ├── Collapse.hs
        ├── Compile.hs
        ├── Extract.hs
        ├── Foreign.hs
        ├── Inject.hs
        ├── Parse.hs
        ├── Reduce.hs
        ├── Runtime.c
        ├── Runtime.h
        ├── Type.hs
        └── runtime/
            ├── heap.c
            ├── memory.c
            ├── prim/
            │   ├── DUP.c
            │   ├── LOG.c
            │   └── SUP.c
            ├── print.c
            ├── reduce/
            │   ├── app_ctr.c
            │   ├── app_era.c
            │   ├── app_lam.c
            │   ├── app_sup.c
            │   ├── app_una.c
            │   ├── app_w32.c
            │   ├── dup_ctr.c
            │   ├── dup_era.c
            │   ├── dup_lam.c
            │   ├── dup_ref.c
            │   ├── dup_sup.c
            │   ├── dup_una.c
            │   ├── dup_w32.c
            │   ├── let.c
            │   ├── mat_ctr.c
            │   ├── mat_era.c
            │   ├── mat_lam.c
            │   ├── mat_sup.c
            │   ├── mat_una.c
            │   ├── mat_w32.c
            │   ├── opx_ctr.c
            │   ├── opx_era.c
            │   ├── opx_lam.c
            │   ├── opx_sup.c
            │   ├── opx_una.c
            │   ├── opx_w32.c
            │   ├── opy_ctr.c
            │   ├── opy_era.c
            │   ├── opy_lam.c
            │   ├── opy_sup.c
            │   ├── opy_una.c
            │   ├── opy_w32.c
            │   ├── ref.c
            │   └── ref_sup.c
            ├── reduce.c
            ├── stack.c
            ├── state.c
            └── term.c

================================================
FILE CONTENTS
================================================

================================================
FILE: .github/ISSUE_TEMPLATE/bug_report.yml
================================================
name: Bug report
description: Create a report to help us improve.
body:
 - type: markdown
   attributes: 
     value: |
      ### Bug Report
      Note, your issue might have been already reported, please check [issues](https://github.com/HigherOrderCO/HVM3/issues). If you find a similar issue, respond with a reaction or any additional information that you feel may be helpful.

      ### For Windows Users
      There is currently no native way to install HVM, as a temporary workaround, please use [WSL2](https://learn.microsoft.com/en-us/windows/wsl/install).
  
 - type: textarea
   attributes:
    label: Reproducing the behavior
    description: A clear and concise description of what the bug is.
    value: |
      Example:
       Running command...
       With code....
       Error...
       Expected behavior....
   validations:
    required: true
 
 - type: textarea
   attributes:
    label: System Settings
    description: Your System's settings
    value: |
     Example:
      - OS: [e.g. Linux (Ubuntu 22.04)]
      - CPU: [e.g. Intel i9-14900KF]
      - GPU: [e.g. RTX 4090]
      - Cuda Version [e.g. release 12.4, V12.4.131]
   validations:
     required: true
 - type: textarea
   attributes:
    label: Additional context
    description: Add any other context about the problem here (Optional). 
   

================================================
FILE: .github/ISSUE_TEMPLATE/config.yml
================================================
blank_issues_enabled: false
contact_links: 
    - name: Bend Related Issues
      url: https://github.com/HigherOrderCO/Bend/issues/new/choose
      about: For Bend related Issues, please Report them on the Bend repository.  


================================================
FILE: .github/ISSUE_TEMPLATE/feature_request.md
================================================
---
name: Feature request
about: Suggest a feature that you think should be added.
title: ''
labels: ''

---

**Is your feature request related to a problem? Please describe.**
A clear and concise description of what the problem is. Ex. I'm frustrated when [...]

**Describe the solution you'd like**
A clear and concise description of what you want to happen.

**Describe alternatives you've considered**
A clear and concise description of any alternative solutions or features you've considered.

**Additional context**
Add any other context or screenshots about the feature request here.


================================================
FILE: .github/workflows/CI.yml
================================================
name: HVM3 CI
on:
  push:
    branches: [ main ]
  pull_request:
    branches: [ main ]
jobs:
  hvml-pipeline:
    name: HVM3 CI (${{ matrix.os }})
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest, macos-latest]
    
    steps:
      - name: Checkout repository
        uses: actions/checkout@v3
        
      - name: Cache GHC and Cabal
        uses: actions/cache@v3
        id: cache-ghc
        with:
          path: |
            ~/.ghcup
          key: ${{ runner.os }}-ghcup-${{ hashFiles('.github/workflows/ci.yml') }}
          restore-keys: |
            ${{ runner.os }}-ghcup-
          
      - name: Set up GHC and Cabal
        uses: haskell-actions/setup@v2
        id: setup-haskell
        with:
          ghc-version: '9.12.2'
          cabal-version: '3.14.1.1'
          setup-haskell-bin: ${{ steps.cache-ghc.outputs.cache-hit != 'true' }}
          setup-haskell-cabal: ${{ steps.cache-ghc.outputs.cache-hit != 'true' }}
          setup-haskell-ghc: ${{ steps.cache-ghc.outputs.cache-hit != 'true' }}
          
      - name: Update Cabal package list
        run: cabal update
        
      - name: Install system dependencies
        run: |
          if [ "${{ matrix.os }}" == "ubuntu-latest" ]; then
            sudo apt-get update
            sudo apt-get install -y libffi-dev
          elif [ "${{ matrix.os }}" == "macos-latest" ]; then
            brew install libffi
          fi
        shell: bash
      
      - name: Cache Cabal packages
        uses: actions/cache@v3
        with:
          path: |
            ${{ steps.setup-haskell.outputs.cabal-store }}
            ~/.cabal/packages
            ~/.cabal/config
          key: ${{ runner.os }}-cabal-${{ hashFiles('**/*.cabal', 'cabal.project*') }}-${{ github.sha }}
          restore-keys: |
            ${{ runner.os }}-cabal-${{ hashFiles('**/*.cabal', 'cabal.project*') }}-
            ${{ runner.os }}-cabal-

      - name: Build dependencies
        run: cabal build --only-dependencies
          
      - name: Build the project
        run: cabal build
        
      - name: Lazy - Run interpreted normalization
        run: cabal run hvm -- run ./examples/feat_hoas.hvm -s
        
      - name: Lazy - Run compiled normalization
        run: cabal run hvm -- run ./examples/feat_hoas.hvm -c -s
        
      - name: Lazy - Run interpreted collapse
        run: cabal run hvm -- run ./examples/enum_lam_smart.hvm -C -s
        
      - name: Lazy - Run compiled collapse
        run: cabal run hvm -- run ./examples/enum_lam_smart.hvm -C -c -s


================================================
FILE: .gitignore
================================================
dist-newstyle/
dist-install/
*.o
*.hi
main
Main1
cabal-dev/
.cabal-sandbox/
cabal.sandbox.config
.stack-work/
*.prof
*.aux
*.hp
*.eventlog
.HTF/
.ghc.environment.*
.log.txt
.msg.txt
.sys.txt
.out.hvm
tmp/
TIMES/
.main*
.build/
dist/


================================================
FILE: CLAUDE.md
================================================
# HVM3

This project is an efficient implementation of the Interaction Calculus.

Before doing any work, read the `HVM.md` file to learn more about it.


================================================
FILE: HVM.cabal
================================================
cabal-version:      3.0
name:               HVM
version:            0.1.0.0
homepage:           https://higherorderco.com/
license:            MIT
license-file:       LICENSE
author:             Victor Taelin
maintainer:         victor.taelin@gmail.com
category:           Language
build-type:         Simple
extra-source-files:
  src/HVM/Runtime.h
  src/HVM/runtime/*.c
  src/HVM/runtime/reduce/*.c
  src/HVM/runtime/prim/*.c


library
    default-language: GHC2024
    build-depends:    base ^>=4.21.0.0,
                      mtl ^>=2.3.1,
                      containers ^>=0.7,
                      parsec ^>=3.1.17.0,
                      hs-highlight ^>= 1.0.5,
                      ansi-terminal == 1.1.1,
                      file-embed,
                      process,
                      libffi,
                      unix,
                      deepseq
    exposed-modules:  HVM.Type
                    , HVM.Collapse
                    , HVM.Compile
                    , HVM.Extract
                    , HVM.Foreign
                    , HVM.Inject
                    , HVM.Parse
                    , HVM.Adjust
                    , HVM.Reduce
                    , HVM.API
    other-modules:
    hs-source-dirs:   src
    include-dirs:     src/HVM
    includes:         Runtime.h
    install-includes: Runtime.h
    -- Build the runtime as a single translation unit for performance
    c-sources:        src/HVM/Runtime.c
    extra-libraries:  c
    ghc-options:     -Wno-all

executable hvm
    default-language: GHC2024
    build-depends:    base ^>=4.21.0.0,
                      containers ^>=0.7,
                      network
    main-is:          Main.hs
    build-depends:    HVM
    hs-source-dirs:   app
    ghc-options:     -Wno-all -threaded


================================================
FILE: HVM.md
================================================
# HVM

The HVM is a extension, and efficient runtime, for the Interaction Calculus.

## Project Organization

- `README.md`: introduction and general information

- `IC.md`: full spec of the Interaction Calculus (read it!)

- `HVM.md`: full spec of the HVM runtime (read it!)

- `Inters.md`: the complete interaction table

- `examples/`: many example `.hvm` files

- `src/`: Haskell and C implementation
  - `Type.hs`: defines the Term and Book types, used in all files (read it!)
  - `Show.hs`: converts a Term to a String
  - `Parse.hs`: converts a String to a Term
  - `Reduce.hs`: evaluates a Term to weak head normal form (WHNF)
  - `Inject.hs`: converts a Haskell-side Term to a C-side Term
  - `Extract.hs`: normalize and extracts a C-side Term into a Haskell-side IC Term
  - `Collapse.hs`: normalizes and collapses a C-side Term into a list of Haskell-side λC (i.e., dup/sup-free) Terms
  - `Foreign.hs`: imports C-side functions on Haskell-side
  - `Runtime.c`: the complete C runtime, including memory types, interactions, and a faster WHNF evaluator
  - `Compile.hs`: converts a top-level Book into a list of optimized, native C-code

- `dist/`, `dist-newstyle`: Haskell artifacts

## Memory Layout

On HVM, each Term is represented as a word, with the following fields:

- `sub`: true if this is a substitution
- `tag`: the tag identifying the term type
- `lab`: a label, used to trigger commutations
- `val`: the value (a node address, or an unboxed number)

The length of each field depends on the version:

- **32-bit**: 1-bit sub | 5-bit tag | 2-bit lab | 24-bit val
- **64-bit**: 1-bit sub | 5-bit tag | 18-bit lab | 40-bit val

The meaning of the val field depends on the term's tag, as follows:

Tag | ID   | Value points to / stores ...
--- | ---- | --------------------------------------
DP0 | 0x00 | Dup Node ({val: Term}) or substitution
DP1 | 0x01 | Dup Node ({val: Term}) or substitution
VAR | 0x02 | Lam Node ({bod: Term}) or substitution
FWD | 0x03 | TODO: document
REF | 0x04 | Ref Node ({arg0: Term, ... argN: Term})
LET | 0x05 | Let Node
APP | 0x06 | App Node ({fun: Term, arg: Term})
MAT | 0x08 | Mat Node
IFL | 0x09 | IfL Node
SWI | 0x0A | Swi Node
OPX | 0x0B | OpX Node
OPY | 0x0C | OpY Node
ERA | 0x0D | Unused
LAM | 0x0E | Lam Node ({bod: Term})
SUP | 0x0F | Sup Node
CTR | 0x10 | Ctr Node ({x0: Term, ... xN: Term})
W32 | 0x11 | Unboxed U32 Number
CHR | 0x12 | Unboxed U32 Number

A Node is a consecutive block of its child terms. For example, the SUP term
points to the memory location where its two child terms are stored.

Variable terms (`VAR`, `DP0`, and `DP1`) point to an entry on the subst map. As
an optimization, HVM doesn't have a separate subst map. Instead, variables point
to the location of the corresponding binder node (like a Lam or Dup). When an
interaction occurs, that location is reused as a subst map entry, and we set the
'sub' bit of the stored term to '1'. When a variable points to a term with the
bit flag set, we it is a substitution, so we retrieve it and clear the flag.

Note that there is no explicit DUP term. That's because Dup nodes are special:
they aren't part of the AST, and they don't store a body; they "float" on the
heap.  In other words, `λx. !&0{x0,x1}=x; &0{x0,x1}` and `!&0{x0,x1}=x; λx.
&0{x0,x1}` are both valid, and stored identically in memory. As such, the only
way to access a Dup node is via its bound variables, `DP0` and `DP1`.

Note that, when a Dup Node interacts, it usually generates two substitutions.
So, how can we store them in its location, given that a Dup Node has only one
word? The answer is: we don't. Dup Nodes only interact when we access them
via either a `DP0` or a `DP1`. As such, we immediatelly return one of the
substitutions to the variable that triggered the interaction, and store the
other substitution on the Dup Node's location.

For example, the DUP-SUP interaction could be implemented as:

```
def dup_sup(dup, sup):
  dup_lab = dup.tag & 0x3
  sup_lab = sup.tag & 0x3
  if dup_lab == sup_lab:
    tm0 = heap[sup.loc + 0]
    tm1 = heap[sup.loc + 1]
    heap[dup.loc] = as_sub(tm1 if (dup.tag & 0x4) == 0 else tm0)
    return (tm0 if (dup.tag & 0x4) == 0 else tm1)
  else:
    co0_loc = alloc(1)
    co1_loc = alloc(1)
    su0_loc = alloc(2)
    su1_loc = alloc(2)
    su0_val = Term(SP0 + sup_lab, su0_loc)
    su1_val = Term(SP0 + sup_lab, su1_loc)
    heap[co0_loc] = heap[sup.loc + 0]
    heap[co1_loc] = heap[sup.loc + 1]
    heap[su0_loc + 0] = Term(CX0 + dup_lab, co0_loc)
    heap[su0_loc + 1] = Term(CX0 + dup_lab, co1_loc)
    heap[su1_loc + 0] = Term(CY0 + dup_lab, co0_loc)
    heap[su1_loc + 1] = Term(CY0 + dup_lab, co1_loc)
    heap[dup.loc] = as_sub(su1_val if (dup.tag & 0x4) == 0 else su0_val)
    return (su0_val if (dup.tag & 0x4) == 0 else su1_val)
```

Note that HVM extends the Interaction Calculus with many new types and
interactions. The complete Interaction Table is at INTERS.md.

## C FFI and Compiler

In this project, Terms have two representations:

- A Haskell-side representation (the Term type on `Type.hs`)

- A C-side representation (the pointer format specified on `HVM.md`)

Functions are implemented on Haskell, C, or both:

- Interactions are exclusively implemented on C, except for CALL.

- The WHNF function is implemented on both Haskell and C.

- All other functions are implemented on Haskell only.

There are two evaluation modes:

- Interpreted Mode:
  - uses Haskell-side parser, WHNF, collapser, and stringifier
  - uses Haskell-side CALL interaction
  - uses C-side for other interactions

- Compiled Mode:
  - uses Haskell-side parser, collapser and strinigifier
  - uses C-side CALL interaction
  - uses C-side WHNF and interactions

To run HVM3 on Compiled Mode, we generate a new copy of `Runtime.c` with the
compiled functions inlined, and then compile with GCC and reload as a dylib.
This allows the C-side WHNF to dispatch CALL interactions to native C
procedures, which run much faster than `inject`.

## The CALL interaction

The CALL interaction performs a global function call. For example, given:

```
@mul2(x) = ~ x { 0:0 p:(+ 2 @mul2(p)) }
```

When we evaluate the expression:

```
@foo(4)
```

It will expand to:

```
~ 4 { 0:0 p:(+ 2 @mul2(p)) }
```

On Interpreted Mode, this is done by `inject`, which allocates the body of the
function, substituting variables by the respective arguments.

On Compiled Mode, this is done by calling a native C function, via two paths:

`Slow Path`: a C function that just allocates the body, like `inject`. Example:

```c
Term mul2_t(Term ref) {
  Term arg0 = got(term_loc(ref) + 0);
  Loc mat1 = alloc_node(3);
  set_new(mat1 + 0, arg0);
  set_new(mat1 + 1, term_new(W32, 0, 0));
  Loc lam2 = alloc_node(1);
  Loc opx3 = alloc_node(2);
  Loc ref4 = alloc_node(1);
  set_new(ref4 + 0, term_new(VAR, 0, lam2 + 0));
  set_new(opx3 + 0, term_new(W32, 0, 2));
  set_new(opx3 + 1, term_new(REF, 0, ref4));
  set_new(lam2 + 0, term_new(OPX, 0, opx3));
  set_new(mat1 + 2, term_new(LAM, 0, lam2));
  return term_new(SWI, 2, mat1);
}
```

`Fast Path`: a C function that attempts to perform *inline interactions*,
avoiding allocating extra memory. For example, in the `mul2` case, it will check
if the argument is a number. If so, it will perform a native C switch, instead
of allocating a `SWI` node. It also performs inline arithmetic and even loops,
when the function is tail-call recursive. Example:

```c
Term mul2_f(Term ref) {
  u64 itrs = 0;
  Term arg0 = got(term_loc(ref) + 0);
  while (1) {
    Term val1 = (arg0);
    if (term_tag(val1) == W32) {
      u32 num2 = term_loc(val1);
      switch (num2) {
        case 0: {
          itrs += 1;
          *HVM.itrs += itrs;
          return term_new(W32, 0, 0);
          break;
        }
        default: {
          Term pre3 = term_new(W32, 0, num2 - 1);
          itrs += 2;
          Loc ref8 = alloc_node(1);
          set_new(ref8 + 0, pre3);
          Term nu06 = (term_new(W32, 0, 2));
          Term nu17 = (term_new(REF, 0, ref8));
          Term ret5;
          if (term_tag(nu06) == W32 && term_tag(nu17) == W32) {
            itrs += 2;
            ret5 = term_new(W32, 0, term_loc(nu06) + term_loc(nu17));
          } else {
            Loc opx4 = alloc_node(2);
            set_new(opx4 + 0, nu06);
            set_new(opx4 + 1, nu17);
            ret5 = term_new(OPX, 0, opx4);
          }
          *HVM.itrs += itrs;
          return ret5;
          break;
        }
      }
    }
    set_old(term_loc(ref) + 0, arg0);
    return mul2_t(ref);
  }
}
```

# Parser

On HVM, all bound variables have global range. For example, consider the term:

```
λt.((t x) λx.λy.y)
```

Here, the `x` variable appears before its binder, `λx`. Since runtime variables
must point to their bound λ's, linking them correctly requires caution. A way to
do it is to store two structures at parse-time: a list from names to locations,
and a map from names to variable terms.

Whenever we parse a name, we add the current location to the 'uses' array, and
whenever we parse a binder (lams, lets, etc.), we add a variable term pointing
to it to the 'vars' map. Then, once the parsing is done, we run iterate through
the 'uses' array, and write, to each location, the corresponding term. Below
are some example parsers using this strategy:

```python
def parse_var(loc):
  nam = parse_name()
  uses.push((nam,loc))

def parse_lam(loc):
  lam = alloc(1)
  consume("λ")
  nam = parse_name()
  consume(".")
  vars[nam] = Term(VAR, 0, lam)
  parse_term(lam)
  heap[loc] = Term(LAM, 0, lam)

def parse_app(loc):
  app = alloc(2)
  consume("(")
  parse_term(app + 0)
  consume(" ")
  parse_term(app + 1)
  consume(")")
  heap[loc] = Term(APP, 0, app)

...
```

# Stringifier

Converting HVM terms to strings faces two challenges:

First, HVM terms and nodes don't store variable names. As such, we must
generate fresh, unique variable names during stringification, and maintain a
mapping from each binder's memory location to its assigned name.

Second, on HVM, Dup nodes aren't part of the main program's AST. Instead, they
"float" on the heap, and are only reachable via DP0 and DP1 variables. Because
of that, by stringifying a term naively, Col nodes will be missing.

To solve these, we proceed as follows:

1. Before stringifying, we pass through the full term, and assign a id to each
variable binder we find (on lam, let, dup, etc.)

2. We also register every Dup node we found, avoiding duplicates (remember the
same dup node is pointed to by up to 2 variables, DP0 and DP1)

Then, to stringify the term, we first stringify each DUP node, and then we
stringify the actual term. As such, the result will always be in the form:

! &{x0 x1} = t0
! &{x2 x3} = t1
! &{x4 x5} = t2
...
term

With no Dup nodes inside the ASTs of t0, t1, t2 ... and term.


================================================
FILE: IC.md
================================================
# The Interaction Calculus

The [Interaction Calculus](https://github.com/VictorTaelin/Interaction-Calculus)
is a minimal term rewriting system inspired by the Lambda Calculus (λC), but
with some key differences:
1. Vars are affine: they can only occur up to one time.
2. Vars are global: they can occur anywhere in the program.
3. There is a new core primitive: the superposition.

An Interaction Calculus term is defined by the following grammar:

```haskell
Term ::=
  | VAR: Name
  | ERA: "*"
  | LAM: "λ" Name "." Term
  | APP: "(" Term " " Term ")"
  | SUP: "&" Label "{" Term "," Term "}"
  | DUP: "!" "&" Label "{" Name "," Name "}" "=" Term ";" Term
```

Where:
- VAR represents a variable.
- ERA represents an erasure.
- LAM represents a lambda.
- APP represents a application.
- SUP represents a superposition.
- DUP represents a duplication.

Lambdas are curried, and work like their λC counterpart, except with a relaxed
scope, and with affine usage. Applications eliminate lambdas, like in λC,
through the beta-reduce (APP-LAM) interaction.

Superpositions work like pairs. Duplications eliminate superpositions through
the DUP-SUP interaction, which works exactly like a pair projection.

What makes SUPs and DUPs unique is how they interact with LAMs and APPs. When a
SUP is applied to an argument, it reduces through the APP-SUP interaction, and
when a LAM is projected, it reduces through the DUP-LAM interaction. This gives
a computational behavior for every possible interaction: there are no runtime
errors on the core Interaction Calculus.

The 'Label' is a numeric value that affects some interactions, like DUP-SUP,
causing terms to commute, instead of annihilate. Read Lafont's Interaction
Combinators paper to learn more.

The core interaction rules are listed below:

```haskell
(* a)
----- APP-ERA
*

(λx.f a)
-------- APP-LAM
x <- a
f

(&L{a,b} c)
----------------- APP-SUP
! &L{c0,c1} = c;
&L{(a c0),(b c1)}

! &L{r,s} = *;
K
-------------- DUP-ERA
r <- *
s <- *
K

! &L{r,s} = λx.f;
K
----------------- DUP-LAM
r <- λx0.f0
s <- λx1.f1
x <- &L{x0,x1}
! &L{f0,f1} = f;
K

! &L{x,y} = &L{a,b};
K
-------------------- DUP-SUP (if equal labels)
x <- a
y <- b
K

! &L{x,y} = &R{a,b};
K
-------------------- DUP-SUP (if different labels)
x <- &R{a0,b0} 
y <- &R{a1,b1}
! &L{a0,a1} = a;
! &L{b0,b1} = b;
K
```

Where `x <- t` stands for a global substitution of `x` by `t`.

Since variables are affine, substitutions can be implemented efficiently by just
inserting an entry in a global substitution map (`sub[var] = value`). There is
no need to traverse the target term, or to handle name capture, as long as fresh
variable names are globally unique. Thread-safe substitutions can be performed
with a single atomic-swap.

Below is a pseudocode implementation of these interaction rules:

```python
def app_lam(app, lam):
  sub[lam.nam] = app.arg
  return lam.bod

def app_sup(app, sup):
  x0 = fresh()
  x1 = fresh()
  a0 = App(sup.lft, Var(x0))
  a1 = App(sup.rgt, Var(x1))
  return Dup(sup.lab, x0, x1, app.arg, Sup(a0, a1))

def dup_lam(dup, lam):
  x0 = fresh()
  x1 = fresh()
  f0 = fresh()
  f1 = fresh()
  sub[dup.lft] = Lam(x0, Var(f0))
  sub[dup.rgt] = Lam(x1, Var(f1))
  sub[lam.nam] = Sup(dup.lab, Var(x0), Var(x1))
  return Dup(dup.lab, f0, f1, lam.bod, dup.bod)

def dup_sup(dup, sup):
  if dup.lab == sup.lab:
    sub[dup.lft] = sup.lft
    sub[dup.rgt] = sup.rgt
    return dup.bod
```

Terms can be reduced to weak head normal form, which means reducing until the
outermost constructor is a value (LAM, SUP, etc.), or until no more reductions
are possible. Example:

```python
def whnf(term):
  while True:
    match term:
      case Var(nam):
        if nam in sub:
          term = sub[nam]
        else:
          return term
      case App(fun, arg):
        fun = whnf(fun)
        match fun.tag:
          case LAM: term = app_lam(term, fun)
          case SUP: term = app_sup(term, fun)
          case _  : return App(fun, arg)
      case Dup(lft, rgt, val, bod):
        val = whnf(val)
        match val.tag:
          case LAM: term = dup_lam(term, val)
          case SUP: term = dup_sup(term, val)
          case _  : return Dup(lft, rgt, val, bod)
      case _:
        return term
```

Terms can be reduced to full normal form by recursively taking the whnf:

```python
def normal(term):
  term = whnf(term)
  match term:
    case Lam(nam, bod):
      bod_nf = normal(bod)
      return Lam(nam, bod_nf)
    case App(fun, arg):
      fun_nf = normal(fun)
      arg_nf = normal(arg)
      return App(fun_nf, arg_nf)
    ...
    case _:
      return term
```

Below are some normalization examples.

Example 0: (simple λ-term)

```haskell
(λx.λt.(t x) λy.y)
------------------ APP-LAM
λt.(t λy.y)
```

Example 1: (larger λ-term)

```haskell
(λb.λt.λf.((b f) t) λT.λF.T)
---------------------------- APP-LAM
λt.λf.((λT.λF.T f) t)
----------------------- APP-LAM
λt.λf.(λF.t f)
-------------- APP-LAM
λt.λf.t
```

Example 2: (global scopes)

```haskell
{x,(λx.λy.y λk.k)}
------------------ APP-LAM
{λk.k,λy.y}
```

Example 3: (superposition)

```haskell
!{a,b} = {λx.x,λy.y}; (a b)
--------------------------- DUP-SUP
(λx.x λy.y)
----------- APP-LAM
λy.y
```

Example 4: (overlap)

```haskell
({λx.x,λy.y} λz.z)
------------------ APP-SUP  
! {x0,x1} = λz.z; {(λx.x x0),(λy.y x1)}  
--------------------------------------- DUP-LAM  
! {f0,f1} = {r,s}; {(λx.x λr.f0),(λy.y λs.f1)}  
---------------------------------------------- DUP-SUP  
{(λx.x λr.r),(λy.y λs.s)}  
------------------------- APP-LAM  
{λr.r,(λy.y λs.s)}  
------------------ APP-LAM  
{λr.r,λs.s}  
```

Example 5: (default test term)

The following term can be used to test all interactions:

```haskell
((λf.λx.!{f0,f1}=f;(f0 (f1 x)) λB.λT.λF.((B F) T)) λa.λb.a)
----------------------------------------------------------- 16 interactions
λa.λb.a
```

# Collapsing

An Interaction Calculus term can be collapsed to a superposed tree of pure
Lambda Calculus terms without SUPs and DUPs, by extending the evaluator with the
following collapse interactions:

```haskell
λx.*
------ ERA-LAM
x <- *
*

(f *)
----- ERA-APP
*

λx.&L{f0,f1}
----------------- SUP-LAM
x <- &L{x0,x1}
&L{λx0.f0,λx1.f1}

(f &L{x0,x1})
------------------- SUP-APP
!&L{f0,f1} = f;
&L{(f0 x0),(f1 x1)}

&R{&L{x0,x1},y}
----------------------- SUP-SUP-X (if R>L)
!&R{y0,y1} = y;
&L{&R{x0,x1},&R{y0,y1}}

&R{x,&L{y0,y1}}
----------------------- SUP-SUP-Y (if R>L)
!&R{x0,x1} = x;
&L{&R{x0,x1},&R{y0,y1}}

!&L{x0,x1} = x; K
----------------- DUP-VAR
x0 <- x
x1 <- x
K

!&L{a0,a1} = (f x); K
--------------------- DUP-APP
a0 <- (f0 x0)
a1 <- (f1 x1)
!&L{f0,f1} = f;
!&L{x0,x1} = x;
K
```


================================================
FILE: INTERS.md
================================================
# HVM - Interaction Table

TODO: this document is a WIP. It is not complete yet.

## Core Interactions

Lambdas and Superpositions:

```haskell
(* a)
----- APP-ERA
*

(λx.f a)
-------- APP-LAM
x <- a
f

(&L{a,b} c)
----------------- APP-SUP
! &L{c0,c1} = c;
&L{(a c0),(b c1)}

! &L{r,s} = *;
K
-------------- DUP-ERA
r <- *
s <- *
K

! &L{r,s} = λx.f;
K
----------------- DUP-LAM
r <- λx0.f0
s <- λx1.f1
x <- &L{x0,x1}
! &L{f0,f1} = f;
K

! &L{x,y} = &L{a,b};
K
-------------------- DUP-SUP (if equal labels)
x <- a
y <- b
K

! &L{x,y} = &R{a,b};
K
-------------------- DUP-SUP (if different labels)
x <- &R{a0,b0} 
y <- &R{a1,b1}
! &L{a0,a1} = a;
! &L{b0,b1} = b;
K
```

Numbers:

```haskell
+N
--- SUC-NUM
N+1

+*
-- SUC-ERA
*

+&L{x,y}
--------- SUC-SUP
&L{+x,+y}

?N{0:z;+:s;}
------------ SWI-NUM (if N==0)
z

?N{0:z;+:s;}
------------ SWI-NUM (if N>0)
(s N-1)

?*{0:z;+:s;}
------------ SWI-ERA
*

?&L{x,y}{0:z;+:s;}
--------------------------------- SWI-SUP
!&L{z0,z1} = z;
!&L{s0,s1} = s;
&L{?x{0:z0;+:s0;},?y{0:z1;+:s1;}}

! &L{x,y} = N;
K
-------------- DUP-NUM
x <- N
y <- N
K
```

## Collapsing Interactions

These interactions are NOT part of the WHNF. They're called by the collapser.

```haskell
λx.*
------ ERA-LAM
x <- *
*

(f *)
----- ERA-APP
*

λx.&L{f0,f1}
----------------- SUP-LAM
x <- &L{x0,x1}
&L{λx0.f0,λx1.f1}

(f &L{x0,x1})
------------------- SUP-APP
!&L{f0,f1} = f;
&L{(f0 x0),(f1 x1)}

~N{0:&L{z0,z1};+:s;}
--------------------------------- SUP-SWI-Z
!&L{N0,N1} = N;
!&L{S0,S1} = S;
&L{~N0{0:z0;+:S0},~N1{0:z1;+:S1}}

~N{0:z;+:&0{s0,s1};}
--------------------------------- SUP-SWI-S
!&L{N0,N1} = N;
!&L{Z0,Z1} = Z;
&L{~N0{0:z0;+:S0},~N1{0:z1;+:S1}}

&R{&L{x0,x1},y}
----------------------- SUP-SUP-X (if R>L)
!&R{y0,y1} = y;
&L{&R{x0,x1},&R{y0,y1}}

&R{x,&L{y0,y1}}
----------------------- SUP-SUP-Y (if R>L)
!&R{x0,x1} = x;
&L{&R{x0,x1},&R{y0,y1}}

!&L{x0,x1} = x; K
----------------- DUP-VAR
x0 <- x
x1 <- x
K

!&L{a0,a1} = (f x); K
--------------------- DUP-APP
a0 <- (f0 x0)
a1 <- (f1 x1)
!&L{f0,f1} = f;
!&L{x0,x1} = x;
K
```


================================================
FILE: LICENSE
================================================
Copyright (c) 2024 Victor Taelin

Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:

The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


================================================
FILE: MODES.md
================================================
# Evaluation Modes

## Lazy Mode

Pointers represent positive-to-negative ports in polarized nets. This causes the
memory format to coincide perfectly with how IC terms are written textually. It
is a direct improvement of [HVM1](https://github.com/HigherOrderCO/hvm1). It is
implemented in this repository.

### Strengths:

- Efficient lazy evaluation
- Lévy Optimality (minimal β-reduction)
- Very fast single-core evaluation
- Compiles to efficient C (often, faster than GHC)

### Drawbacks:

- WHNF may return a pending variable
- Requires global garbage collection
- Parallelism is still an open problem

## Strict Mode

Pointers represent aux-to-main ports, resulting in a tree-like memory format. It
is implemented in a [separate repository](https://github.com/HigherOrderCO/hvm3-strict),
and will be merged later.

### Strengths:

- Efficient parallel evaluation
- Does not require global garbage collection

### Drawbacks:

- Lazy evaluation is impossible

- Not Lévy Optimal (can waste β-reductions)


================================================
FILE: README.md
================================================
# HVM3 - Work In Progress

The **HVM** is an efficient implementation of the [Interaction Calculus](https://github.com/VictorTaelin/Interaction-Calculus) (IC).

The Interaction Calculus is a new foundation for computing, similar to the
Lambda Calculus, but theoretically optimal. The HVM is an efficient
implementation of this new paradigm, and can be seen as a fast engine for
symbolic computations.

In some ways, it is very similar to Haskell, but it has some key differences:
- Lambdas must be linear or affine, making it resource-aware
- Lambdas have no scope boundaries, enabling global substitutions
- First-class duplications allow a term to be copied into two locations
- First-class superpositions allow 2 terms to be stored in 1 location

These primitives allow HVM to natively represent concepts that are not present
in the traditional λ-Calculus, including continuations, linear HOAS interpreters
and mutable references. Moreover, superpositions and duplications allow it to
perform optimal beta-reduction, allowing some expressions to be evaluated with
an exponential speedup. Finally, being fully affine makes its garbage collector
very efficient, and greatly simplifies parallelism.

The HVM3 is the successor to HVM1 and HVM2, combining their strengths. It aims
to be the main compile target of [Bend](https://github.com/HigherOrderCO/Bend).
It is a WIP under active development.

## Specifications

- [IC.md](./IC.md): Interaction Calculus, the theoretical foundation behind HVM

- [HVM.md](./HVM.md): the HVM language, which extends IC with pragmatic primitives

## Install

1. Install Cabal.

3. Clone this repository.

3. Run `cabal install`.

## Usage

```bash
hvm run file.hvml    # runs interpreted (slow)
hvm run file.hvml -c # runs compiled (fast)
```

Note: the `-c` flag will also generate a standalone `.main.c` file, which if you
want, you can compile and run it independently. See examples in the [book/](book/) directory.


================================================
FILE: app/Main.hs
================================================
module Main where

import Network.Socket as Network
import System.IO (hSetEncoding, utf8, hPutStrLn, stderr)
import Control.Exception (try, fromException, SomeException, finally, AsyncException(UserInterrupt))
import Control.Monad (when, forM_, unless)
import Data.List (partition, isPrefixOf, find)
import HVM.API
import HVM.Collapse
import HVM.Extract
import HVM.Foreign
import HVM.Parse
import HVM.Reduce
import HVM.Type
import System.Environment (getArgs)
import System.Exit (exitWith, ExitCode(ExitSuccess, ExitFailure))
import System.IO
import Text.Printf
import Text.Read (readMaybe)
import qualified Data.Map.Strict as MS

-- Main
-- ----

main :: IO ()
main = do
  args <- getArgs
  result <- case args of
    ("run" : file : rest) -> do
      let (flags, sArgs) = partition ("-" `isPrefixOf`) rest
      let compiled       = "-c" `elem` flags
      let collapseFlag   = Data.List.find (isPrefixOf "-C") flags >>= parseCollapseFlag
      let stats          = "-s" `elem` flags
      let debug          = "-d" `elem` flags
      let hideQuotes     = "-Q" `elem` flags
      let mode           = case collapseFlag of { Just n -> Collapse n ; Nothing -> Normalize }
      cliRun file debug compiled mode stats hideQuotes sArgs
    ("serve" : file : rest) -> do
      let (flags, _)     = partition ("-" `isPrefixOf`) rest
      let compiled       = "-c" `elem` flags
      let collapseFlag   = Data.List.find (isPrefixOf "-C") flags >>= parseCollapseFlag
      let stats          = "-s" `elem` flags
      let debug          = "-d" `elem` flags
      let hideQuotes     = "-Q" `elem` flags
      let mode           = case collapseFlag of { Just n -> Collapse n ; Nothing -> Normalize }
      cliServe file debug compiled mode stats hideQuotes
    ["help"] -> printHelp
    _ -> printHelp
  case result of
    Left err -> do
      putStrLn err
      exitWith (ExitFailure 1)
    Right _ -> do
      exitWith ExitSuccess

parseCollapseFlag :: String -> Maybe (Maybe Int)
parseCollapseFlag ('-':'C':rest) = 
  case rest of
    "" -> Just Nothing
    n  -> Just (readMaybe n)
parseCollapseFlag _ = Nothing

printHelp :: IO (Either String ())
printHelp = do
  putStrLn "HVM usage:"
  putStrLn "  hvm3 help       # Shows this help message"
  putStrLn "  hvm3 run <file> [flags] [args...] # Evals main"
  putStrLn "  hvm3 serve <file> [flags] # Starts socket server on port 8080"
  putStrLn "    -c  # Runs with compiled mode (fast)"
  putStrLn "    -C  # Collapse the result to a list of λ-Terms"
  putStrLn "    -CN # Same as above, but show only first N results"
  putStrLn "    -s  # Show statistics"
  putStrLn "    -d  # Print execution steps (debug mode)"
  putStrLn "    -Q  # Hide quotes in output"
  return $ Right ()

-- CLI Commands
-- ------------

cliRun :: FilePath -> Bool -> Bool -> RunMode -> Bool -> Bool -> [String] -> IO (Either String ())
cliRun filePath debug compiled mode showStats hideQuotes strArgs = do
  code <- readFile' filePath
  book <- doParseBook filePath code
  hvmInit
  initBook filePath book compiled
  checkHasMain book
  args <- doParseArguments book strArgs
  checkMainArgs book args
  (_, stats) <- withRunStats $ do
    injectRoot book (Ref "main" maxBound args)
    rxAt <- if compiled
      then return (reduceCAt debug)
      else return (reduceAt debug)
    case mode of
      Collapse limit -> do
        core <- doCollapseFlatAt rxAt book 0
        let vals = maybe id Prelude.take limit core
        forM_ vals $ \val -> do -- Collapse and print the result line by line lazily
          let out = if hideQuotes then removeQuotes (show val) else show val
          printf "%s\n" out
      Normalize -> do
        core <- doExtractCoreAt rxAt book 0
        let val = doLiftDups core
        let out = if hideQuotes then removeQuotes (show val) else show val
        printf "%s\n" out
  hvmFree
  when showStats $ do
    print stats
  return $ Right ()

cliServe :: FilePath -> Bool -> Bool -> RunMode -> Bool -> Bool -> IO (Either String ())
cliServe filePath debug compiled mode showStats hideQuotes = do
  code <- readFile' filePath
  book <- doParseBook filePath code
  hvmInit
  initBook filePath book compiled
  checkHasMain book
  putStrLn "HVM serve mode. Listening on port 8080."
  sock <- socket AF_INET Stream 0
  setSocketOption sock ReuseAddr 1
  Network.bind sock (SockAddrInet 8080 0)
  listen sock 5
  putStrLn "Server started. Listening on port 8080."
  serverLoop sock book `finally` do
    close sock
    hvmFree
    putStrLn "\nServer terminated."
  return $ Right ()
  where
    serverLoop sock book = do
      result <- try $ do
        (conn, _) <- accept sock
        h <- socketToHandle conn ReadWriteMode
        hSetBuffering h LineBuffering
        hSetEncoding h utf8
        input <- hGetLine h
        unless (input == "exit" || input == "quit") $ do
          oldSize <- getLen
          args <- doParseArguments book [input]
          checkMainArgs book args
          let root = Ref "main" maxBound args
          (vals, stats) <- runBook book root mode compiled debug
          let out = unlines $ map (\t -> if hideQuotes then removeQuotes (show t) else show t) vals
          hPutStrLn h out
          when showStats $ do
            hPutStrLn h (show stats)
          setItr 0
          setLen oldSize
        hClose h
      case result of
        Left e -> case fromException e of
          Just UserInterrupt -> return () -- Exit loop on Ctrl+C
          _ -> do
            hPutStrLn stderr $ "Connection error: " ++ show (e :: SomeException)
            serverLoop sock book
        Right _ -> serverLoop sock book

removeQuotes :: String -> String
removeQuotes s = case s of
  '"':rest -> init rest  -- Remove first and last quote if present
  _        -> s          -- Otherwise return as-is

checkHasMain :: Book -> IO ()
checkHasMain book = do
  when (not $ MS.member "main" (namToFid book)) $ do
    putStrLn "Error: 'main' not found."
    exitWith (ExitFailure 1)

checkMainArgs :: Book -> [Core] -> IO ()
checkMainArgs book args = do
  let ((_, mainArgs), _) = mget (fidToFun book) (mget (namToFid book) "main")
  when (length args /= length mainArgs) $ do
    putStrLn $ "Error: 'main' expects " ++ show (length mainArgs) ++ " arguments, found " ++ show (length args)
    exitWith (ExitFailure 1)


================================================
FILE: bug.hvm
================================================
@main =
  !c2_0 = λf !&0{f0 f1}=f λx(f0 (f1 x))
  !c2_1 = λf !&1{f0 f1}=f λx(f0 (f1 x))
  (c2_0 c2_1)


================================================
FILE: cabal.project
================================================
packages: .

package *
  optimization: 2

source-repository-package
  type: git
  location: https://github.com/HigherOrderCO-archive/hs-highlight


================================================
FILE: examples/_test_.js
================================================
/**
 * HVM Test Script
 *
 * This script tests the HVM implementation by running a series of .hvm files
 * in both interpreted and compiled modes, as specified in the test table.
 * For each test case:
 * - If a 'main' line is provided, it replaces the '@main' line in the original file
 *   and saves it as '_test_.hvm'. Otherwise, it copies the original file to '_test_.hvm'.
 * - Runs the test 4 times:
 *   - First run: Extracts the result (first line of output) and checks it against
 *     the expected 'norm'. Logs an error if they don't match.
 *   - Next 3 runs: Measures the execution time (from the 'TIME:' line in output).
 * - Averages the times from the last 3 runs.
 * - Compares the average time with the previous run's time stored in '_perf_.json'.
 *   If the new time is >5% slower and the total run time is >= 0.05 seconds, logs a warning.
 * - Updates '_perf_.json' with the new average times after all tests.
 *
 * Key Notes:
 * - Only runs modes ('intr' or 'comp') specified in the test table for each file.
 * - Some tests lack a 'main' line, meaning no replacement is needed—just copy the file.
 * - Uses Node.js 'fs' for file operations and 'child_process.execSync' to run commands.
 * - Assumes the script runs in a directory containing the .hvm files.
 * - Commands use a hardcoded project directory (/Users/v/vic/dev/HVM) for interpreted mode.
 */

const fs = require('fs');
const { execSync } = require('child_process');

// ### Test Specifications
// Array of test objects, each defining a file and its test cases for 'intr' (interpreted)
// and/or 'comp' (compiled) modes. Each mode object has an expected 'norm' and an optional 'main'.
const tests = [
  {
    file: 'bench_cnots.hvm',
    intr: { main: '@main = (@P20 @not @true)', norm: 'λa λb a' },
    comp: { main: '@main = (@P24 @not @true)', norm: 'λa λb a' }
  },
  {
    file: 'bench_count.hvm',
    intr: { main: '@main = @count(2_000_000 0)', norm: '4000000' },
    comp: { main: '@main = @count(2_000_000_000 0)', norm: '4000000000' }
  },
  {
    file: 'bench_sum_range.hvm',
    intr: { main: '@main = @sum(@range(1_000_000 #Nil) 0)', norm: '1783293664' },
    comp: { main: '@main = @sum(@range(300_000_000 #Nil) 0)', norm: '3992170112' }
  },
  {
    file: 'enum_coc_smart.hvm',
    intr: { norm: '"λλ(0 λλ((1 3) 2))"' },
    comp: { norm: '"λλ(0 λλ((1 3) 2))"' }
  },
  {
    file: 'enum_lam_naive_blc.hvm',
    comp: { norm: '"λλ(1 λλ((2 0) 1))"' }
  },
  {
    file: 'enum_lam_smart.hvm',
    intr: { norm: '"λ(0 λλλ((0 1) 2))"' },
    comp: { norm: '"λ(0 λλλ((0 1) 2))"' }
  },
  {
    file: 'enum_nat.hvm',
    intr: { main: '@main = @if(@eq(@mul(@X @nat(20)) @nat(12000)) @u32(@X) *)', norm: '600' },
    comp: { main: '@main = @if(@eq(@mul(@X @nat(20)) @nat(20000)) @u32(@X) *)', norm: '1000' }
  },
  {
    file: 'enum_primes.hvm',
    intr: { main: '@main = @if(@eq(@mul(@X_B @Y_B) @P_B) λt(t @u32(@X_B) @u32(@Y_B)) *)', norm: 'λa ((a 853) 947)' },
    comp: { main: '@main = @if(@eq(@mul(@X_C @Y_C) @P_C) λt(t @u32(@X_C) @u32(@Y_C)) *)', norm: 'λa ((a 25997) 27299)' }
  },
  {
    file: 'feat_affine_ctx.hvm',
    intr: { norm: '1' },
    comp: { norm: '1' }
  },
  {
    file: 'feat_cmul.hvm',
    intr: { norm: 'λa λb (a (a (a (a b))))' },
    comp: { norm: 'λa λb (a (a (a (a b))))' }
  },
  {
    file: 'feat_hoas.hvm',
    intr: { norm: '"λx λy (x (x (x (x y))))"' },
    comp: { norm: '"λx λy (x (x (x (x y))))"' }
  },
  {
    file: 'feat_mut_ref.hvm',
    intr: { norm: '2' },
    comp: { norm: '2' }
  },
  {
    file: 'fuse_inc.hvm',
    intr: { norm: '1234567' },
    comp: { norm: '1234567' }
  },
  {
    file: 'fuse_mul.hvm',
    intr: { main: '@main = @mul(12345 12345)', norm: '152399025' },
    comp: { main: '@main = @mul(23232 32323)', norm: '750927936' }
  },
  {
    file: 'fuse_rot.hvm',
    intr: { main: '@main = (@read(@S) @sqr(12345 (@add(@S) @KA) @KB))', norm: '209865' },
    comp: { main: '@main = (@read(@S) @sqr(54321 (@add(@S) @KA) @KB))', norm: '923457' }
  },
  {
    file: 'enum_1D_match.hvm',
    intr: { main: '@main = @solve(16)', norm: '64' },
    comp: { main: '@main = @solve(18)', norm: '64' }
  },
];

// ### Load Previous Performance Data
// Load '_perf_.json' if it exists, otherwise initialize an empty object.
let perfData = {};
try {
  perfData = JSON.parse(fs.readFileSync('_perf_.json', 'utf8'));
} catch (e) {
  console.log('[INFO] _perf_.json not found, initializing as empty.');
}

// ### Counters for Errors and Warnings
// Track the number of errors (result mismatches) and warnings (perf regressions).
let errorCount = 0;
let warningCount = 0;

// ### Main Test Loop
for (const test of tests) {
  const file = test.file;
  // Check each mode: 'intr' (interpreted) and 'comp' (compiled).
  for (const mode of ['intr', 'comp']) {
    if (test[mode]) { // Only run if the mode is specified in the test.
      console.log(`Running ${file} in ${mode} mode...`);
      const { main, norm } = test[mode];

      // Prepare the test file by adjusting '@main' or copying as needed.
      prepareTestFile(file, main);

      let times = [];
      for (let i = 0; i < 4; i++) {
        const output = runTest(mode);
        const { result, time } = parseOutput(output);

        console.log("- time:", time.toFixed(7), "| norm: " + result);

        if (i === 0) {
          // First run: Check the result against the expected norm.
          if (result !== norm) {
            console.log(`[ERROR] For ${file} in ${mode} mode, expected "${norm}", but got "${result}"`);
            errorCount++;
          }
        } else {
          // Next 3 runs: Collect execution times.
          times.push(time);
        }
      }

      // Calculate average time from the last 3 runs.
      const averageTime = times.reduce((a, b) => a + b, 0) / times.length;
      const key = `${file}_${mode}`; // Unique key for this test and mode.
      const previousTime = perfData[key];

      // Check for performance regression (>5% slower) if previous data exists and time >= 0.05s.
      if (previousTime && averageTime > previousTime * 1.05 && averageTime >= 0.05) {
        console.log(`[WARNING] Performance regression for ${file} in ${mode} mode: ` +
                    `previous ${previousTime.toFixed(6)}s, now ${averageTime.toFixed(6)}s`);
        warningCount++;
      }

      // Update performance data with the new average time.
      perfData[key] = averageTime;
    }
  }
}

// ### Save Updated Performance Data
// Write the new performance data to '_perf_.json' with readable formatting.
fs.writeFileSync('_perf_.json', JSON.stringify(perfData, null, 2), 'utf8');

// ### Summary
// Log the total number of errors and warnings.
console.log(`All tests completed with ${errorCount} errors and ${warningCount} warnings.`);

// ### Helper Functions

/**
 * Prepares the test file by replacing the '@main' line or copying the original file.
 * @param {string} originalFile - The original .hvm file path.
 * @param {string|undefined} mainLine - The new '@main' line to use, if provided.
 */
function prepareTestFile(originalFile, mainLine) {
  if (mainLine) {
    // Read the original file and split into lines.
    const lines = fs.readFileSync(originalFile, 'utf8').split('\n');
    // Find the line starting with '@main'.
    const index = lines.findIndex(line => line.startsWith('@main'));
    if (index !== -1) {
      lines[index] = mainLine; // Replace the '@main' line.
    } else {
      // If no '@main' line exists, append the new one and warn.
      console.log(`[WARNING] No @main line found in ${originalFile}, adding at the end.`);
      lines.push(mainLine);
    }
    // Save the modified content to '_test_.hvm'.
    fs.writeFileSync('_test_.hvm', lines.join('\n'), 'utf8');
  } else {
    // If no mainLine is provided, copy the original file as is.
    fs.copyFileSync(originalFile, '_test_.hvm');
  }
}

/**
 * Runs the HVM test in the specified mode and returns the output.
 * @param {string} mode - 'intr' for interpreted, 'comp' for compiled.
 * @returns {string} - The command output.
 */
function runTest(mode) {
  let command;
  if (mode === 'intr') {
    command = 'cabal run -v0 hvm --project-dir=/Users/v/vic/dev/HVM -- run _test_.hvm -C -s';
  } else if (mode === 'comp') {
    command = 'hvm run _test_.hvm -c -C -s';
  } else {
    throw new Error(`Unknown mode: ${mode}`);
  }
  // Execute the command and capture the output as a UTF-8 string.
  return execSync(command, { encoding: 'utf8' });
}

/**
 * Parses the command output to extract the result and time.
 * @param {string} output - The output from running the HVM command.
 * @returns {{result: string, time: number}} - The result (first line) and time in seconds.
 */
function parseOutput(output) {
  const lines = output.split('\n');
  const result = lines[0].trim(); // First line is the result.
  const timeLine = lines.find(line => line.startsWith('TIME:'));
  if (!timeLine) {
    throw new Error('TIME line not found in output');
  }
  const timeStr = timeLine.split(' ')[1]; // Extract time value after 'TIME:'.
  const time = parseFloat(timeStr);
  if (isNaN(time)) {
    throw new Error(`Failed to parse time: ${timeStr}`);
  }
  return { result, time };
}


================================================
FILE: examples/bench_cnots.hvm
================================================
@main  = (@P24 @not @true)
@true  = λt λf t
@false = λt λf f
@not   = λX (X @false @true)

@P04 = λf
  ! &0{f00x f00y} = f
  ! &0{f01x f01y} = λk01 (f00x (f00y k01))
  ! &0{f02x f02y} = λk02 (f01x (f01y k02))
  ! &0{f03x f03y} = λk03 (f02x (f02y k03))
  λk04 (f03x (f03y k04))

@P08 = λf
  ! &0{f00x f00y} = f
  ! &0{f01x f01y} = λk01 (f00x (f00y k01))
  ! &0{f02x f02y} = λk02 (f01x (f01y k02))
  ! &0{f03x f03y} = λk03 (f02x (f02y k03))
  ! &0{f04x f04y} = λk04 (f03x (f03y k04))
  ! &0{f05x f05y} = λk05 (f04x (f04y k05))
  ! &0{f06x f06y} = λk06 (f05x (f05y k06))
  ! &0{f07x f07y} = λk07 (f06x (f06y k07))
  λk08 (f07x (f07y k08))

@P16 = λf
  ! &0{f00x f00y} = f
  ! &0{f01x f01y} = λk01 (f00x (f00y k01))
  ! &0{f02x f02y} = λk02 (f01x (f01y k02))
  ! &0{f03x f03y} = λk03 (f02x (f02y k03))
  ! &0{f04x f04y} = λk04 (f03x (f03y k04))
  ! &0{f05x f05y} = λk05 (f04x (f04y k05))
  ! &0{f06x f06y} = λk06 (f05x (f05y k06))
  ! &0{f07x f07y} = λk07 (f06x (f06y k07))
  ! &0{f08x f08y} = λk08 (f07x (f07y k08))
  ! &0{f09x f09y} = λk09 (f08x (f08y k09))
  ! &0{f10x f10y} = λk10 (f09x (f09y k10))
  ! &0{f11x f11y} = λk11 (f10x (f10y k11))
  ! &0{f12x f12y} = λk12 (f11x (f11y k12))
  ! &0{f13x f13y} = λk13 (f12x (f12y k13))
  ! &0{f14x f14y} = λk14 (f13x (f13y k14))
  ! &0{f15x f15y} = λk15 (f14x (f14y k15))
  λk16 (f15x (f15y k16))

@P20 = λf
  ! &0{f00x f00y} = f
  ! &0{f01x f01y} = λk01 (f00x (f00y k01))
  ! &0{f02x f02y} = λk02 (f01x (f01y k02))
  ! &0{f03x f03y} = λk03 (f02x (f02y k03))
  ! &0{f04x f04y} = λk04 (f03x (f03y k04))
  ! &0{f05x f05y} = λk05 (f04x (f04y k05))
  ! &0{f06x f06y} = λk06 (f05x (f05y k06))
  ! &0{f07x f07y} = λk07 (f06x (f06y k07))
  ! &0{f08x f08y} = λk08 (f07x (f07y k08))
  ! &0{f09x f09y} = λk09 (f08x (f08y k09))
  ! &0{f10x f10y} = λk10 (f09x (f09y k10))
  ! &0{f11x f11y} = λk11 (f10x (f10y k11))
  ! &0{f12x f12y} = λk12 (f11x (f11y k12))
  ! &0{f13x f13y} = λk13 (f12x (f12y k13))
  ! &0{f14x f14y} = λk14 (f13x (f13y k14))
  ! &0{f15x f15y} = λk15 (f14x (f14y k15))
  ! &0{f16x f16y} = λk16 (f15x (f15y k16))
  ! &0{f17x f17y} = λk17 (f16x (f16y k17))
  ! &0{f18x f18y} = λk18 (f17x (f17y k18))
  ! &0{f19x f19y} = λk19 (f18x (f18y k19))
  λk20 (f19x (f19y k20))

@P24 = λf
  ! &0{f00x f00y} = f
  ! &0{f01x f01y} = λk01 (f00x (f00y k01))
  ! &0{f02x f02y} = λk02 (f01x (f01y k02))
  ! &0{f03x f03y} = λk03 (f02x (f02y k03))
  ! &0{f04x f04y} = λk04 (f03x (f03y k04))
  ! &0{f05x f05y} = λk05 (f04x (f04y k05))
  ! &0{f06x f06y} = λk06 (f05x (f05y k06))
  ! &0{f07x f07y} = λk07 (f06x (f06y k07))
  ! &0{f08x f08y} = λk08 (f07x (f07y k08))
  ! &0{f09x f09y} = λk09 (f08x (f08y k09))
  ! &0{f10x f10y} = λk10 (f09x (f09y k10))
  ! &0{f11x f11y} = λk11 (f10x (f10y k11))
  ! &0{f12x f12y} = λk12 (f11x (f11y k12))
  ! &0{f13x f13y} = λk13 (f12x (f12y k13))
  ! &0{f14x f14y} = λk14 (f13x (f13y k14))
  ! &0{f15x f15y} = λk15 (f14x (f14y k15))
  ! &0{f16x f16y} = λk16 (f15x (f15y k16))
  ! &0{f17x f17y} = λk17 (f16x (f16y k17))
  ! &0{f18x f18y} = λk18 (f17x (f17y k18))
  ! &0{f19x f19y} = λk19 (f18x (f18y k19))
  ! &0{f20x f20y} = λk20 (f19x (f19y k20))
  ! &0{f21x f21y} = λk21 (f20x (f20y k21))
  ! &0{f22x f22y} = λk22 (f21x (f21y k22))
  ! &0{f23x f23y} = λk23 (f22x (f22y k23))
  λk24 (f23x (f23y k24))


================================================
FILE: examples/bench_count.hs
================================================
import Data.Word

count :: Word32 -> Word32 -> Word32
count 0 k = k
count p k = count (p - 1) (k + 1)

main :: IO ()
main = print $ count 2_000_000_000 0


================================================
FILE: examples/bench_count.hvm
================================================
@count(!n k) = ~n !k {
  0: k
  1+p: @count(p,(+ k 2))
}

@main = @count(2_000_000_000 0)

//WORK: 12000000004 interactions
//TIME: 1.1376750 seconds
//SIZE: 3 nodes
//PERF: 10547.828 MIPS


================================================
FILE: examples/bench_sum_range.hs
================================================
import Data.Word

data List = Nil | Cons !Word32 List

range :: Word32 -> List -> List
range 0 xs = xs
range n xs = range (n - 1) (Cons (n - 1) xs)

sum' :: List -> Word32 -> Word32
sum' Nil              r = r
sum' (Cons head tail) r = sum' tail (head + r)

main :: IO ()
main = do
  let !a = range 50_000_000 Nil
  print $ sum' a 0


================================================
FILE: examples/bench_sum_range.hvm
================================================
data List { #Nil #Cons{ head tail } }

@sum(!xs r) = ~xs !r {
  #Nil: r
  #Cons{head tail}: @sum(tail (+ head r))
}

@range(n xs) = ~n !xs {
  0: xs
  1+p: !&0{p0 p1}=p @range(p0 #Cons{p1 xs})
}

@main = @sum(@range(50_000_000 #Nil) 0)

//WORK: 600000007 interactions
//TIME: 0.1968570 seconds
//SIZE: 100000005 nodes
//PERF: 3047.898 MIPS


================================================
FILE: examples/bench_sum_range.py
================================================
class Nil:
    def __init__(self):
        self.tag = "Nil"

class Cons:
    def __init__(self, head, tail):
        self.tag = "Cons"
        self.head = head
        self.tail = tail

def range_custom(n, xs):
    result = xs
    for i in range(n, 0, -1):
        result = Cons(i - 1, result)
    return result

def sum_custom(lst):
    total = 0
    while lst.tag != "Nil":
        total = (total + lst.head) & 0xFFFFFFFF
        lst = lst.tail
    return total

def main():
    print(sum_custom(range_custom(50000000, Nil())))

if __name__ == "__main__":
    main()


================================================
FILE: examples/enum_1D_match.hvm
================================================
// This is similar to `enum_invert_add`, except the goal is harder:
// > Can we find a displacement (rotations) that will make two arrays equal?
// This file includes interesting insights, such as:
// - How to represent a space that can be rotated with binary trees
// - How to compress such space by compacting identical values
// - How to fuse the tree rotation (rotr/rotl) operations
// - How to recursively perform a bit-reversal permutation of a tree
// - How to implement equality efficiently assuming normalized trees
// Read: https://discord.com/channels/912426566838013994/915345481675186197/1351306002699390976

data Bool { #T #F }
data Pair { #P{x y} }
data Bin { #O{p} #I{p} #E }
data Nat { #S{p} #Z }
data Map { #BR{x y} #M0 #M1 }

// Prelude
// -------

// If/Exit
@when(!c t) = ~ c {
  0: *
  1+p: t
}

// And
!@and(!a b) = ~ a {
  0: 0
  1+p: b
}

// Repeated Application
@rep(n f x) = ~ n !f !x {
  0: x
  1+p: !&0{f0 f1}=f (f0 @rep(p f1 x))
}

// Squared Application
@sqr(n f x) = ~ n !f !x {
  0: x
  1+p: !&0{p0 p1}=(+ p 1)
     !&0{fA f0}=f
     !&0{f1 f2}=fA
     @sqr((/ p0 2) λk(f0 (f1 k)) @rep((% p1 2) f2 x))
}

// Church Nat (with fusion)
@nat(n) = λs λz @sqr(n s z)

// Fixed Point
@fix(f) = !&{f f0}=f !&{f f1}=f (f0 @fix(f1))

// λ-Encoded Booleans
// ------------------

// Booleans
@tru = λt λf t
@fal = λt λf f
@idf = λb λt λf (b t f)
@not = λb λt λf (b f t)

// Below, '@foo' represents two possible functions:
// - @foo(N x) = match N !x { #Z:x #S{n}:@foo(n (id  x)) }
// - @foo(N x) = match N !x { #Z:x #S{n}:@foo(n (not x)) }
// The question is: if we apply @foo to a large N, how long will it take to
// compute? In particular, will it fuse `not` and `id` correctly, even though
// they're behind a superposition?
@foo = λN
  ! step = λfoo_N λx
    // Universe 1-L: apply 'id' to 'x'
    // Universe 1-R: apply 'not' to 'x'
    ! &1{F0 F1} = foo_N
    ! &1{x0 x1} = x
    &1{
      (F0 (@idf x0))
      (F1 (@not x1))
    }
  ! base = λx x
  (N step base)

//@main = (@foo @nat(100001) @tru)

// λ-Encoded Nats
// --------------

// Constructors
@S(n) = λs λz (s n)
@Z    = λs λz z

// Nat
@nat_all = &1{@Z @S(@nat_all)}

// Nat → Nat
@nat_view(n) =
  ! case_s = λp #S{@nat_view(p)}
  ! case_z = #Z
  (n case_s case_z)

// U32 → Nat
@nat(n) = ~ n {
  0: @Z
  1+n: @S(@nat(n))
}

// λ-Encoded Bitstrings
// --------------------

// Constructors
@E     = λo λi λe e
@O(xs) = λo λi λe (o xs)
@I(xs) = λo λi λe (i xs)

// Bin
@bin_zero(n) = ~ n {
  0: @E
  1+n: @O(@bin_zero(n))
}

// U32 → U32 → Bin
@bin(l n) =
  @sqr(n λx@bin_inc(x) @bin_zero(l))

// Bin → U32
@bin_to_u32(x) =
  ! case_o = λp (+ (* @bin_to_u32(p) 2) 0)
  ! case_i = λp (+ (* @bin_to_u32(p) 2) 1)
  ! case_e = 0
  (x case_o case_i case_e)

// Bin → Bin
@bin_id(x) = λo λi λe
  (x o i e)

// Bin → Bin
@bin_inc(x) = λo λi λe 
  ! case_o = λp (i p)
  ! case_i = λp (o @bin_inc(p))
  ! case_e = e
  (x case_o case_i case_e)

// Bin → Bin → Bin
@bin_add(a b) =
  !case_o = λaP λb λo λi λe
    !case_o = λbP λaP (o @bin_add(aP bP))
    !case_i = λbP λaP (i @bin_add(aP bP))
    !case_e = λaP e
    (b case_o case_i case_e aP)
  !case_i = λaP λb λo λi λe
    !case_o = λbP λaP (i @bin_add(aP bP))
    !case_i = λbP λaP (o @bin_inc(@bin_add(aP bP)))
    !case_e = λaP e
    (b case_o case_i case_e aP)
  !case_e = λb b
  (a case_o case_i case_e b)

// Bin → Bin → Bin
@bin_add_2(a b) =
  @bin_sqr(a λx(@bin_inc(x)) b)

// Bin → Bin -> Bool
@bin_eql(a b) =
  !case_o = λaP λb
    !case_o = λbP λaP @bin_eql(aP bP)
    !case_i = λbP λaP 0
    !case_e = λaP 0
    (b case_o case_i case_e aP)
  !case_i = λaP λb
    !case_o = λbP λaP 0
    !case_i = λbP λaP @bin_eql(aP bP)
    !case_e = λaP 0
    (b case_o case_i case_e aP)
  !case_e = λb
    !case_o = λbP 0
    !case_i = λbP 0
    !case_e = 1
    (b case_o case_i case_e)
  (a case_o case_i case_e b)

// Bin → Bin
@bin_view(x) =
  ! case_o = λp #O{@bin_view(p)}
  ! case_i = λp #I{@bin_view(p)}
  ! case_e = #E
  (x case_o case_i case_e)

// U32 → Bin
@bin_all(n) = ~ n {
  0: λo λi λe e
  1+n:
    ! &1{n0 n1} = n
    &1{
      λo λi λe (o @bin_all(n0))
      λo λi λe (i @bin_all(n1))
    }
}

// Bin → U32
@bin_len(xs) =
  ! case_o = λxs (+ 1 @bin_len(xs))
  ! case_i = λxs (+ 1 @bin_len(xs))
  ! case_e = 0
  (xs case_o case_i case_e)

// Squared Application (with a bitstring)
@bin_sqr(xs f x) =
  ! case_o = λxs λf λx !&{f0 f1}=f @bin_sqr(xs λk(f0 (f1 k)) x)
  ! case_i = λxs λf λx !&{F f01}=f !&{f0 f1}=f01 @bin_sqr(xs λk(f0 (f1 k)) (F x))
  ! case_e = λf λx x
  (xs case_o case_i case_e f x)

////Test:
//@L = 64
//@A = @bin(@L 10000000)
//@X = @bin_all(@L)
//@B = @bin(@L 99999999)
//@main =
  //! solved = @bin_eql(@bin_add_2(@A @X) @B) // A + X = B
  //@when(solved @bin_to_u32(@X)) // Prints X

// λ-Encoded Bit Maps
// ------------------

// Constructors
@M0 = λbr λm0 λm1 m0
@M1 = λbr λm0 λm1 m1
@Br(a b) = λbr λm0 λm1 (br a b)
@BR(a b) =
  ! case_br_a = λax λay λb @Br(@Br(ax ay) b)
  ! case_m0_a = λb
    ! case_br_b = λbx λby @Br(@M0 @Br(bx by))
    ! case_m0_b = @M0
    ! case_m1_b = @Br(@M0 @M1)
    (b case_br_b case_m0_b case_m1_b)
  ! case_m1_a = λb
    ! case_br_b = λbx λby @Br(@M1 @Br(bx by))
    ! case_m0_b = @Br(@M1 @M0)
    ! case_m1_b = @M1
    (b case_br_b case_m0_b case_m1_b)
  (a case_br_a case_m0_a case_m1_a b)

// map_view : U32 → Map → Map
@map_view(map) =
  ! case_br = λx λy #BR{@map_view(x) @map_view(y)}
  ! case_m0 = #M0
  ! case_m1 = #M1
  (map case_br case_m0 case_m1)

// map_set : Bin → Map → Map
@map_set(bs map) =
  ! case_o = λbsP λmap 
    ! case_br = λx λy λbsP @BR((@map_set(bsP x)) y)
    ! case_m0 = λbsP @BR((@map_set(bsP @M0)) @M0)
    ! case_m1 = λbsP @BR((@map_set(bsP @M1)) @M1)
    (map case_br case_m0 case_m1 bsP)
  ! case_i = λbsP λmap 
    ! case_br = λx λy λbsP @BR(x (@map_set(bsP y)))
    ! case_m0 = λbsP @BR(@M0 (@map_set(bsP @M0)))
    ! case_m1 = λbsP @BR(@M1 (@map_set(bsP @M1)))
    (map case_br case_m0 case_m1 bsP)
  ! case_e = λmap @M1
  (bs case_o case_i case_e map)

// map_get : Bin → Map → U32
@map_get(bs map) =
  ! case_o = λbsP λmap 
    ! case_br = λx λy @map_get(bsP x)
    ! case_m0 = 0
    ! case_m1 = 1
    (map case_br case_m0 case_m1)
  ! case_i = λbsP λmap 
    ! case_br = λx λy @map_get(bsP y)
    ! case_m0 = 0
    ! case_m1 = 1
    (map case_br case_m0 case_m1)
  ! case_e = λmap 
    ! case_br = λx λy 0
    ! case_m0 = 0
    ! case_m1 = 1
    (map case_br case_m0 case_m1)
  (bs case_o case_i case_e map)

// map_eql : Map → Map → Bool
@map_eql(a b) =
  ! case_br_a = λax λay λb 
    ! case_br_b = λbx λby @and(@map_eql(ax bx) @map_eql(ay by))
    ! case_m0_b = 0
    ! case_m1_b = 0
    (b case_br_b case_m0_b case_m1_b)
  ! case_m0_a = λb
    ! case_br_b = λbx λby 0
    ! case_m0_b = 1
    ! case_m1_b = 0
    (b case_br_b case_m0_b case_m1_b)
  ! case_m1_a = λb 
    ! case_br_b = λbx λby 0
    ! case_m0_b = 0
    ! case_m1_b = 1
    (b case_br_b case_m0_b case_m1_b)
  (a case_br_a case_m0_a case_m1_a b)

// map_zero : Map
@map_zero = @M0

// map_alloc : U32 → Map
@map_alloc(d) = ~ d {
  0: @M0
  1+d: !&{d0 d1}=d @BR(@map_alloc(d0) @map_alloc(d1))
}

// map_swp : Map → Map
@map_swp(map) =
  ! case_br = λa λb
    ! case_br_a = λax λay λb
      ! case_br_b = λbx λby λax λay @BR(@BR(ax bx) @BR(ay by))
      ! case_m0_b = λax λay 0 // TODO
      ! case_m1_b = λax λay 0 // TODO
      (@map_swp(b) case_br_b case_m0_b case_m1_b ax ay)
    ! case_m0_a = λb
      ! case_br_b = λbx λby 0 // TODO
      ! case_m0_b = @BR(@M0 @M0)
      ! case_m1_b = @BR(@M0 @M1)
      (@map_swp(b) case_br_b case_m0_b case_m1_b)
    ! case_m1_a = λb
      ! case_br_b = λbx λby 0 // TODO
      ! case_m0_b = @BR(@M1 @M0)
      ! case_m1_b = @BR(@M1 @M1)
      (@map_swp(b) case_br_b case_m0_b case_m1_b)
    (@map_swp(a) case_br_a case_m0_a case_m1_a b)
  ! case_m0 = @M0
  ! case_m1 = @M1
  (map case_br case_m0 case_m1)

// map_inv : Map → Map
@map_inv(map) = λbr λm0 λm1
  ! case_br = λx λy (br @map_inv(x) @map_inv(y))
  ! case_m0 = m0
  ! case_m1 = m1
  (@map_swp(map) case_br case_m0 case_m1)

// map_rotr : Map → Map
@map_rotr(map) = λbr λm0 λm1
  ! case_br = λx λy (br @map_rotr(y) x)
  ! case_m0 = m0
  ! case_m1 = m1
  (map case_br case_m0 case_m1)

// map_rotl : Map → Map
@map_rotl(map) = λbr λm0 λm1
  ! case_br = λx λy (br y @map_rotl(x))
  ! case_m0 = m0
  ! case_m1 = m1
  (map case_br case_m0 case_m1)

// map_spinr : Bin → Map → Map
@map_spinr(xs map) =
  @bin_sqr(xs λx(@map_rotr(x)) map)

@solve(!&L) =
  ! A = @map_set(@bin(L 0x00) @map_set(@bin(L 0x80) @map_zero))
  ! B = @map_set(@bin(L 0x40) @map_set(@bin(L 0xC0) @map_zero))
  ! E = @map_eql(@map_spinr(@bin_all(L) A) B)
  @when(E @bin_to_u32(@bin_all(L)))

@main = @solve(20)


================================================
FILE: examples/enum_bin.hvm
================================================
// Bitstrings
data Bin { #O{pred} #I{pred} #E }

// Pairs
data Pair { #Pair{fst snd} }

// If-Then-Else
@if(b t f) = ~b {
  0: f
  _: t
}

// Not
@not(a) = ~a {
  0: 1
  _: 0
}

// And
@and(a b) = ~a {
  0: 0
  _: b
}

// Or
@or(a b) = ~a {
  0: b
  _: 1
}

// Converts a Bin to an U32
@u32(b) = ~b{
  #O{p}: (+ (* 2 @u32(p)) 0)
  #I{p}: (+ (* 2 @u32(p)) 1)
  #E: 0
}

// Converts an U32 to a Bin of given size
@bin(s n) = ~s{
  0: #E
  1+p: !&0{n0 n1}=n ~(% n0 2) !p !n1 {
    0: #O{@bin(p (/ n1 2))}
    _: #I{@bin(p (/ n1 2))}
  }
}

// Bin Equality
@eq(a b) = ~a !b {
  #E: ~b {
    #O{bp}: 0
    #I{bp}: 0
    #E: 1
  }
  #O{ap}: ~b{
    #O{bp}: @eq(ap bp)
    #I{bp}: 0
    #E: 0
  }
  #I{ap}: ~b{
    #O{bp}: 0
    #I{bp}: @eq(ap bp)
    #E: 0
  }
}

// Increments a Bin
@inc(a) = ~a{
  #O{p}: #I{p}
  #I{p}: #O{@inc(p)}
  #E: #E
}

// Decrements a Bin
@dec(a) = ~a{
  #O{p}: #O{@dec(p)}
  #I{p}: #I{p}
  #E: #E
}

// Adds two Bins
@add(a b) = ~a !b {
  #O{ap}: ~b !ap {
    #O{bp}: #O{@add(ap bp)}
    #I{bp}: #I{@add(ap bp)}
    #E: #O{ap}
  }
  #I{ap}: ~b !ap {
    #O{bp}: #I{@add(ap bp)}
    #I{bp}: #O{@inc(@add(ap bp))}
    #E: #I{ap}
  }
  #E: #E
}

// Muls two Bins
@mul(a b) = ~b !a {
  #E: #E
  #O{bp}: #O{@mul(a bp)}
  #I{bp}: !&0{a0 a1}=a @add(a0 #O{@mul(a1 bp)})
}

// Concatenates two Bins
@cat(a b) = ~a !b {
  #O{ap}: #O{@cat(ap b)}
  #I{ap}: #I{@cat(ap b)}
  #E: b
}

// Lt two Bins (a < b)
@lt(a b) = @lt_go(0 a b)

@lt_go(&k a b) = ~a !b {
  #E: ~b {
    #E: &k
    #O{bp}: 1
    #I{bp}: 1
  }
  #O: λap ~b !ap {
    #E: 0
    #O{bp}: @lt_go(&k ap bp)
    #I{bp}: @lt_go(1 ap bp)
  }
  #I: λap ~b !ap {
    #E: 0
    #O{bp}: @lt_go(0 ap bp)
    #I{bp}: @lt_go(&k ap bp)
  }
}

// Take the first N bits of a Bin
@take(n b) = ~n {
  0: #E
  1+p: ~b !p {
    #O{bp}: #O{@take(p bp)}
    #I{bp}: #I{@take(p bp)}
    #E: #E
  }
}

// Enums all Bins of given size (label 1)
@all1(s) = ~s{
  0: #E
  1+p: !&1{p0 p1}=p &1{
    #O{@all1(p0)}
    #I{@all1(p1)}
  }
}

// Enums all Bins of given size (label 2)
@all2(s) = ~s{
  0: #E
  1+p: !&2{p0 p1}=p &2{
    #O{@all2(p0)}
    #I{@all2(p1)}
  }
}

//// 4:
//@K = 1
//@H = 2
//@S = 4
//@X = @cat(@all1(@H) @bin(@H 0))
//@Y = @cat(@all2(@H) @bin(@H 0))
//@A = @cat(@bin(@K 3) @bin(@H 0))
//@B = @cat(@bin(@H 3) @bin(@H 0))
//@P = @bin(@S 9)

//// 6:
//@K = 2
//@H = 3
//@S = 6
//@X = @cat(@all1(@H) @bin(@H 0))
//@Y = @cat(@all2(@H) @bin(@H 0))
//@A = @cat(@bin(@K 5) @bin(@H 0))
//@B = @cat(@bin(@H 5) @bin(@H 0))
//@P = @bin(@S 25)

//// 8:
//@K = 3
//@H = 4
//@S = 8
//@X = @cat(@all1(@H) @bin(@H 0))
//@Y = @cat(@all2(@H) @bin(@H 0))
//@A = @cat(@bin(@K 13) @bin(@H 0))
//@B = @cat(@bin(@H 13) @bin(@H 0))
//@P = @bin(@S 169)

//// 10:
//@K = 4
//@H = 5
//@S = 10
//@X = @cat(@all1(@H) @bin(@H 0))
//@Y = @cat(@all2(@H) @bin(@H 0))
//@A = @cat(@bin(@K 31) @bin(@H 0))
//@B = @cat(@bin(@H 19) @bin(@H 0))
//@P = @bin(@S 589)

//// 12:
//@K = 5
//@H = 6
//@S = 12
//@X = @cat(@all1(@H) @bin(@H 0))
//@Y = @cat(@all2(@H) @bin(@H 0))
//@A = @cat(@bin(@K 53) @bin(@H 0))
//@B = @cat(@bin(@H 37) @bin(@H 0))
//@P = @bin(@S 1961)

//// 14:
//@K = 6
//@H = 7
//@S = 14
//@X = @cat(@all1(@H) @bin(@H 0))
//@Y = @cat(@all2(@H) @bin(@H 0))
//@A = @cat(@bin(@K 109) @bin(@H 0))
//@B = @cat(@bin(@H 97) @bin(@H 0))
//@P = @bin(@S 10573)

//// 16:
//@K = 7
//@H = 8
//@S = 16
//@X = @cat(@all1(@H) @bin(@H 0))
//@Y = @cat(@all2(@H) @bin(@H 0))
//@A = @cat(@bin(@K 173) @bin(@H 0))
//@B = @cat(@bin(@H 233) @bin(@H 0))
//@P = @bin(@S 40309)

//// 18:
//@K = 8
//@H = 9
//@S = 18
//@X = @cat(@all1(@H) @bin(@H 0))
//@Y = @cat(@all2(@H) @bin(@H 0))
//@A = @cat(@bin(@K 449) @bin(@H 0))
//@B = @cat(@bin(@H 389) @bin(@H 0))
//@P = @bin(@S 174661)

//// 20:
//@K = 9
//@H = 10
//@S = 20
//@X = @cat(@all1(@H) @bin(@H 0))
//@Y = @cat(@all2(@H) @bin(@H 0))
//@A = @cat(@bin(@K 769) @bin(@H 0))
//@B = @cat(@bin(@H 569) @bin(@H 0))
//@P = @bin(@S 437561)

//// 22:
//@K = 10
//@H = 11
//@S = 22
//@X = @cat(@all1(@H) @bin(@H 0))
//@Y = @cat(@all2(@H) @bin(@H 0))
//@A = @cat(@bin(@K 1423) @bin(@H 0))
//@B = @cat(@bin(@H 1229) @bin(@H 0))
//@P = @bin(@S 1748867)

//// 24:
//@K = 11
//@H = 12
//@S = 24
//@X = @cat(@all1(@H) @bin(@H 0))
//@Y = @cat(@all2(@H) @bin(@H 0))
//@A = @cat(@bin(@K 2437) @bin(@H 0))
//@B = @cat(@bin(@H 2333) @bin(@H 0))
//@P = @bin(@S 5685521)

//// 26:
//@K = 12
//@H = 13
//@S = 26
//@X = @cat(@all1(@H) @bin(@H 0))
//@Y = @cat(@all2(@H) @bin(@H 0))
//@A = @cat(@bin(@K 4987) @bin(@H 0))
//@B = @cat(@bin(@H 6203) @bin(@H 0))
//@P = @bin(@S 30934361)

//// 28:
//@K = 13
//@H = 14
//@S = 28
//@X = @cat(@all1(@H) @bin(@H 0))
//@Y = @cat(@all2(@H) @bin(@H 0))
//@A = @cat(@bin(@K 13513) @bin(@H 0))
//@B = @cat(@bin(@H 13721) @bin(@H 0))
//@P = @bin(@S 185411873)

//// 30:
//@K = 14
//@H = 15
//@S = 30
//@X = @cat(@all1(@H) @bin(@H 0))
//@Y = @cat(@all2(@H) @bin(@H 0))
//@A = @cat(@bin(@K 24923) @bin(@H 0))
//@B = @cat(@bin(@H 19489) @bin(@H 0))
//@P = @bin(@S 485724347)

//// 32:
//@K = 15
//@H = 16
//@S = 32
//@X = @cat(@all1(@H) @bin(@H 0))
//@Y = @cat(@all2(@H) @bin(@H 0))
//@A = @cat(@bin(@K 47791) @bin(@H 0))
//@B = @cat(@bin(@H 54881) @bin(@H 0))
//@P = @bin(@S 2622817871)

@main = 
  ! cond = @eq(@mul(@X @Y) @P)
  ! not1 = @not(@eq(@X @bin(@S 1)))
  @if(@and(cond not1) λt(t @u32(@X) @u32(@Y)) *)


================================================
FILE: examples/enum_coc_smart.hvm
================================================
// Superposes dependently typed λ-terms. With it, solving:
//   (?X λt(t A B)) == λt(t B A)
// Where
//   ?X : ∀A. (∀P. A -> A -> P) -> (∀P. A -> A -> P)
// Is down to 3k interactions. Of course, that's not too surprising given there
// are only two functions of that type, but the real win is that now we only
// need to make a choice when selecting an element from context. Intros and
// elims follow directly from types, no need for choices / superpositions.

data List {
  #Nil
  #Cons{head tail}
}

data Bits {
  #O{pred}
  #I{pred}
  #E
}

data Term {
  #Var{idx}
  #Pol{bod}
  #All{inp bod}
  #Lam{bod}
  #App{fun arg}
  #U32
  #Num{val}
}

data Pair {
  #Pair{fst snd}
}

data Maybe {
  #None
  #Some{value}
}

// Prelude
// -------

@if(c t f) = ~ c {
  0: f
  _: t
}

@when(c t) = ~ c {
  0: *
  _: t
}

@tail(xs) = ~ xs {
  #Nil: *
  #Cons{h t}: t
}

@and(a b) = ~ a !b {
  0: 0
  _: b
}

@unwrap(mb) = ~mb {
  #None: *
  #Some{x}: x
}

@seq(str) = ~ str {
  #Nil: #Nil
  #Cons{h t}:
    !! h = h
    !! t = @seq(t)
    #Cons{h t}
}

@tm0(x) = !&0{a b}=x a
@tm1(x) = !&0{a b}=x b

// Stringification
// ---------------

@show_nat(nat) = ~nat { 
  0: λk #Cons{'Z' k}
  1+p: λk #Cons{'S' (@show_nat(p) k)}
}

@show_dec(&n r) =
  ! chr = (+ (% n 10) '0')
  ~ (< n 10) !chr !r {
    0: @show_dec((/ n 10) #Cons{chr r})
    _: #Cons{chr r}
  }

@do_show_dec(n) = @show_dec(n #Nil)

@show_bits(bits) = ~bits {
  #O{pred}: λk #Cons{'#' #Cons{'O' #Cons{'{' (@show_bits(pred) #Cons{'}' k})}}}
  #I{pred}: λk #Cons{'#' #Cons{'I' #Cons{'{' (@show_bits(pred) #Cons{'}' k})}}}
  #E: λk #Cons{'#' #Cons{'E' k}}
}

@do_show_bits(bits) = (@show_bits(bits) #Nil)

@show_term(term dep) = ~term !&dep {
  #Var{idx}: λk
    @show_dec(idx k)
  #Pol{bod}: λk
    #Cons{'∀' (@show_term((bod #Var{dep}) (+ dep 1)) k)}
  #All{inp bod}: λk
    #Cons{'Π'
    #Cons{'('
    (@show_term(inp dep)
    #Cons{')'
    (@show_term((bod #Var{dep}) (+ dep 1))
    k)})}}
  #Lam{bod}: λk
    #Cons{'λ' (@show_term((bod #Var{dep}) (+ dep 1)) k)}
  #App{fun arg}: λk
    #Cons{'(' (@show_term(fun dep)
    #Cons{' ' (@show_term(arg dep)
    #Cons{')' k})})}
  #U32: λk
    #Cons{'U' k}
  #Num{val}: λk
    #Cons{'#' @show_dec(val k)}
}

@do_show_term(term) = (@show_term(term 0) #Nil)

// Equality
// --------

@eq(a b d) = ~ @wnf(a) !b !d {
  #Var{aI}: ~ @wnf(b) !d {
    #Var{bI}: (== aI bI)
    else: 0
  }
  #Pol{aB}: ~ @wnf(b) !&d {
    #Pol{bB}:
      @eq((aB #Var{d}) (bB #Var{d}) (+ d 1))
    else: 0
  }
  #All{aI aB}: ~ @wnf(b) !&d {
    #All{bI bB}:
      @and(@eq(aI bI d) @eq((aB #Var{d}) (bB #Var{d}) (+ d 1)))
    else: 0
  }
  #Lam{aB}: ~ @wnf(b) !&d {
    #Lam{bB}:
      @eq((aB #Var{d}) (bB #Var{d}) (+ d 1))
    else: 0
  }
  #App{aF aX}: ~ @wnf(b) !&d {
    #App{bF bX}:
      @and(@eq(aF bF d) @eq(aX bX d))
    else: 0
  }
  #U32: ~ @wnf(b) !d {
    #U32: 1
    else: 0
  }
  #Num{aV}: ~ @wnf(b) !d {
    #Num{bV}: (== aV bV)
    else: 0
  }
}

// Evaluation
// ----------

@wnf(term) = ~ term { 
  #Var{idx}: #Var{idx}
  #Pol{bod}: #Pol{bod}
  #All{inp bod}: #All{inp bod}
  #Lam{bod}: #Lam{bod}
  #App{fun arg}: @wnf_app(@wnf(fun) arg)
  #U32: #U32
  #Num{val}: #Num{val}
}

@wnf_app(f x) = ~ f !x {
  #Var{idx}: #App{#Var{idx} x}
  #Pol{bod}: #App{#Pol{bod} x}
  #All{inp bod}: #App{#All{inp bod} x}
  #Lam{bod}: @wnf((bod @wnf(x)))
  #App{fun arg}: #App{#App{fun arg} x}
  #U32: #U32
  #Num{val}: #App{#Num{val} x}
}

// Enumeration
// -----------

@all(&L typ &dep ctx) =
  @intr(L typ dep ctx)

@intr(!&L typ !&dep ctx) =
  ~ typ !ctx {
    #All{t_inp t_bod}: 
      !&0{ctx bod} = @all(L (t_bod #Var{dep}) (+ dep 1) #Cons{#Some{&0{$x t_inp}} ctx})
      &0{@tail(ctx) #Lam{λ$x(bod)}}
    #Pol{t_bod}:
      @intr(L (t_bod #Var{dep}) (+ dep 1) ctx)
    #U32:
      @pick(L #U32 dep ctx λk(k))
    #Var{idx}:
      @pick(L #Var{idx} dep ctx λk(k))
    #App{fun arg}:
      @pick(L #App{fun arg} dep ctx λk(k))
    #Lam{bod}: *
    #Num{val}: *
  }

@pick(!&L typ !&dep ctx rem) = 
  ~ctx {
    #Nil: *
    #Cons{ann ctx}:
      !&L{typL typR} = typ
      !&L{remL remR} = rem
      !&L{annL annR} = ann
      !&L{ctxL ctxR} = ctx
      &L{
        @elim(L typL dep (remL ctxL) annL)
        @pick(L typR dep ctxR λk(remR #Cons{annR k}))
      }
  }

@elim(!&L typ !&dep ctx ann) = ~ann {
  #None: *
  #Some{ann}:
    ! &0{v t} = ann
    ~ t !typ !ctx !v {
      #Pol{t_bod}:
        ! &{typ0 typ1} = typ
        @elim(L typ0 dep ctx #Some{&0{v (t_bod typ1)}})
      #All{t_inp t_bod}:
        ! &0{ctx arg}   = @all((+(* L 2)1) t_inp dep ctx)
        ! &{arg0 arg1}  = arg
        @elim((+(* L 2)0) typ dep ctx #Some{&0{#App{v arg0} (t_bod arg1)}})
      #U32: 
        @when(@eq(typ #U32 dep) &0{ctx v})
      #Var{idx}:
        @when(@eq(typ #Var{idx} dep) &0{ctx v})
      #App{fun arg}:
        @when(@eq(typ #App{fun arg} dep) &0{ctx v})
      #Lam{bod}: *
      #Num{val}: *
    }
}

// T0 = Π(x:U32) Π(y:U32) Π(z:U32) U32
@T0 =
  #All{#U32 λx
  #All{#U32 λy
  #All{#U32 λz
  #U32}}}

// CBool = ∀P Π(x:P) Π(y:P) P
@CBool =
  #Pol{λ&p
  #All{p λx
  #All{p λy
  p}}}

// Tup(A B) = ∀P Π(p: Π(x:A) Π(y:B) P) P
@Tup(A B) =
  #Pol{λ&p
  #All{
    #All{A λx
    #All{B λy
    p}} λ_
  p}}

// TupF = ∀A Π(x: (Tup A A)) (Tup A A)
@TupF =
  #Pol{λ&A
  #Pol{λ&B
  #All{@Tup(A A) λx
  @Tup(A A)}}}

// Tests
// -----

//A= λt(t 1 2)
@A = #Lam{λt #App{#App{t #Num{1}} #Num{2}}}

//B= λt(t 2 1)
@B = #Lam{λt #App{#App{t #Num{2}} #Num{1}}}

//R= λx(x λaλbλt(t b a))
@R = #Lam{λx #App{x #Lam{λa #Lam{λb #Lam{λt #App{#App{t b} a}}}}}}

// X : ∀A. (Tup A A) -> (Tup A A) = <all terms>
@T = @TupF
@X = @tm1(@all(1 @T 0 #Nil))

// Solves for `?X` in `(?X λt(t A B)) == λt(t B A)`.
// It finds `?X = λλ(0 λλ((1 3) 2))` in 3k interactions.
@main = @when(@eq(#App{@X @A} @B 0) @do_show_term(@X))

//@main = @X


================================================
FILE: examples/enum_invert_add.hvm
================================================
// This file shows how we can use superpositionn to apply multiple functions to
// the same input, in a way that "shares computations" across different calls.
// In this example, we apply `add(0)`, `add(1)`, ..., `add(2^64-1)` - i.e., 2^64
// different functions, to the same input, 10,000,000. Then, we check
// `addN(10,000,000) = 99,999,999`, and eliminate all universes that don't
// satisfy this equation. Since `inc` fuses, and since `addN` is a superposition
// of every function, there is a *massive* amount of sharing between different
// universes (i.e., the universe where we check `add1(10,000,00) = 99,999,999`,
// the universe where we check `add2(10,000,00) = 99,999,999`, and so on). Thus,
// after trying all possible universes, we eventually find out that the only
// universe that is *not* destroyed is `add89999999(10,000,00) = 99,999,999`.
// We then use this information to output `89,999,999`, which is the solution to
// the `10,000,000 + X = 99,999,999` equation. And the magic is: this happens in
// just 196411 interactions (0.001 seconds). In any other language, this would
// take *at least* enough time to call `add` 2^64 times, which is probably
// weeks. In other words, we effectivelly implemented 'sub' efficiently, by
// using superpositions to enumerate the domain of 'add(N,x)', inverting it.  Of
// course, 'add' is a very simple function. Can this technique be used to invert
// more complex functions efficiently? See 'enum_1D_match.hvml' for an attempt.

data Bool { #T #F }
data Pair { #P{x y} }
data Bin { #O{p} #I{p} #E }
data Nat { #S{p} #Z }
data Map { #BR{x y} #M0 #M1 }

// Prelude
// -------

// If/Exit
@when(!c t) = ~ c {
  0: *
  _: t
}

// And
!@and(!a b) = ~ a {
  0: 0
  _: b
}

// Repeated Application
@rep(n f x) = ~ n !f !x {
  0: x
  1+p: !&0{f0 f1}=f (f0 @rep(p f1 x))
}

// Squared Application
@sqr(n f x) = ~ n !f !x {
  0: x
  1+p:
    !&0{p0 p1}=(+ p 1)
    !&0{fA f0}=f
    !&0{f1 f2}=fA
    @sqr((/ p0 2) λk(f0 (f1 k)) @rep((% p1 2) f2 x))
}

// Church Nat (with fusion)
@nat(n) = λs λz @sqr(n s z)

// Fixed Point
@fix(f) = !&{f f0}=f !&{f f1}=f (f0 @fix(f1))

// λ-Encoded Nats
// --------------

// Constructors
@S(n) = λs λz (s n)
@Z    = λs λz z

// Nat
@nat_all = &1{@Z @S(@nat_all)}

// Nat → Nat
@nat_view(n) =
  ! case_s = λp #S{@nat_view(p)}
  ! case_z = #Z
  (n case_s case_z)

// U32 → Nat
@nat(n) = ~ n {
  0: @Z
  1+n: @S(@nat(n))
}

// λ-Encoded Bitstrings
// --------------------

// Constructors
@E     = λo λi λe e
@O(xs) = λo λi λe (o xs)
@I(xs) = λo λi λe (i xs)

// Bin
@bin_zero(n) = ~ n {
  0: @E
  1+n: @O(@bin_zero(n))
}

// U32 → U32 → Bin
@bin(l n) =
  @sqr(n λx@bin_inc(x) @bin_zero(l))

// Bin → U32
@bin_to_u32(x) =
  ! case_o = λp (+ (* @bin_to_u32(p) 2) 0)
  ! case_i = λp (+ (* @bin_to_u32(p) 2) 1)
  ! case_e = 0
  (x case_o case_i case_e)

// Bin → Bin
@bin_id(x) = λo λi λe
  (x o i e)

// Bin → Bin
@bin_inc(x) = λo λi λe 
  ! case_o = λp (i p)
  ! case_i = λp (o @bin_inc(p))
  ! case_e = e
  (x case_o case_i case_e)

// Bin → Bin → Bin
@bin_add(a b) =
  !case_o = λaP λb λo λi λe
    !case_o = λbP λaP (o @bin_add(aP bP))
    !case_i = λbP λaP (i @bin_add(aP bP))
    !case_e = λaP e
    (b case_o case_i case_e aP)
  !case_i = λaP λb λo λi λe
    !case_o = λbP λaP (i @bin_add(aP bP))
    !case_i = λbP λaP (o @bin_inc(@bin_add(aP bP)))
    !case_e = λaP e
    (b case_o case_i case_e aP)
  !case_e = λb b
  (a case_o case_i case_e b)

// Bin → Bin → Bin
@bin_add_2(a b) =
  @bin_sqr(a λx(@bin_inc(x)) b)

// Bin → Bin -> Bool
@bin_eql(a b) =
  !case_o = λaP λb
    !case_o = λbP λaP @bin_eql(aP bP)
    !case_i = λbP λaP 0
    !case_e = λaP 0
    (b case_o case_i case_e aP)
  !case_i = λaP λb
    !case_o = λbP λaP 0
    !case_i = λbP λaP @bin_eql(aP bP)
    !case_e = λaP 0
    (b case_o case_i case_e aP)
  !case_e = λb
    !case_o = λbP 0
    !case_i = λbP 0
    !case_e = 1
    (b case_o case_i case_e)
  (a case_o case_i case_e b)

// Bin → Bin
@bin_view(x) =
  ! case_o = λp #O{@bin_view(p)}
  ! case_i = λp #I{@bin_view(p)}
  ! case_e = #E
  (x case_o case_i case_e)

// U32 → Bin
@bin_all(n) = ~ n {
  0: λo λi λe e
  1+n:
    ! &1{n0 n1} = n
    &1{
      λo λi λe (o @bin_all(n0))
      λo λi λe (i @bin_all(n1))
    }
}

// Bin → U32
@bin_len(xs) =
  ! case_o = λxs (+ 1 @bin_len(xs))
  ! case_i = λxs (+ 1 @bin_len(xs))
  ! case_e = 0
  (xs case_o case_i case_e)

// Squared Application (with a bitstring)
@bin_sqr(xs f x) =
  ! case_o = λxs λf λx !&{f0 f1}=f @bin_sqr(xs λk(f0 (f1 k)) x)
  ! case_i = λxs λf λx !&{F f01}=f !&{f0 f1}=f01 @bin_sqr(xs λk(f0 (f1 k)) (F x))
  ! case_e = λf λx x
  (xs case_o case_i case_e f x)

//Test:
@L = 64
@A = @bin(@L 10000000)
@X = @bin_all(@L)
@B = @bin(@L 99999999)
@main =
  ! solved = @bin_eql(@bin_add_2(@A @X) @B) // A + X = B
  @when(solved @bin_to_u32(@X)) // Prints X


================================================
FILE: examples/enum_lam_naive_blc.hs
================================================
-- This is the Haskell version of the naive λ-Calculus enumerator, that just
-- generates all BLC strings and attempts one by one in a loop.

{-# LANGUAGE PatternSynonyms #-}

import Control.Monad (forM_, when)
import Data.Bits (testBit)
import System.Exit (exitSuccess)

data Bits = O Bits | I Bits | E deriving Show
data Term = Lam Term | App Term Term | Var Int deriving Show
data HTerm = HLam (HTerm -> HTerm) | HApp HTerm HTerm | HVar Int | HSub HTerm
data Pair a b = Pair a b deriving Show
data Result a r = Result a r deriving Show

-- Prelude
-- -------

bits :: Int -> Int -> Bits
bits 0 _ = E
bits n i
  | testBit i (n-1) = I (bits (n-1) i)
  | otherwise       = O (bits (n-1) i)

-- Parser
-- ------

parseTerm :: Bits -> Maybe (Result Bits Term)
parseTerm (O src) = do
  Result src nat <- parseInt src
  return $ Result src (Var nat)
parseTerm (I src) = case src of
  O src -> do
    Result src bod <- parseTerm src
    return $ Result src (Lam bod)
  I src -> do
    Result src fun <- parseTerm src
    Result src arg <- parseTerm src
    return $ Result src (App fun arg)
  E -> Nothing
parseTerm E = Nothing

parseInt :: Bits -> Maybe (Result Bits Int)
parseInt (O src) = Just $ Result src 0
parseInt (I src) = do
  Result src nat <- parseInt src
  return $ Result src (1 + nat)
parseInt E = Just $ Result E 0

doParseTerm :: Bits -> Maybe Term
doParseTerm src = do
  Result _ term <- parseTerm src
  return term

doParseHTerm :: Bits -> Maybe HTerm
doParseHTerm src = do
  Result _ term <- parseTerm src
  doBindTerm term

-- Binding
-- -------
-- NOTE: since Haskell doesn't have global variables ($x), we'll bind in two passes
-- The first pass just binds all variables
-- The second pass excludes non-affine terms

uses :: Term -> Int -> Int
uses (Lam bod)     idx = uses bod (idx + 1)
uses (App fun arg) idx = uses fun idx + uses arg idx
uses (Var n)       idx = if n == idx then 1 else 0

affine :: Term -> Bool
affine term = go term 0 where
  go (Lam bod)     dep = uses bod 0 <= 1 && go bod (dep + 1)
  go (App fun arg) dep = go fun dep && go arg dep
  go (Var n)       dep = n < dep

doBindTerm :: Term -> Maybe HTerm
doBindTerm term | affine term = Just (bindTerm term [])
doBindTerm term | otherwise   = Nothing

bindTerm :: Term -> [HTerm] -> HTerm
bindTerm (Lam bod)     ctx = HLam $ \x -> bindTerm bod (x : ctx)
bindTerm (App fun arg) ctx = HApp (bindTerm fun ctx) (bindTerm arg ctx)
bindTerm (Var idx)     ctx = get idx ctx

get :: Int -> [HTerm] -> HTerm
get 0 (x:_) = x
get n (_:t) = get (n-1) t
get _ []    = error "*"

-- Stringification
-- ---------------

showBits :: Bits -> String -> String
showBits (O pred) k = '#':'O':'{': showBits pred ('}':k)
showBits (I pred) k = '#':'I':'{': showBits pred ('}':k)
showBits E        k = '#':'E':k

doShowBits :: Bits -> String
doShowBits bits = showBits bits []

showTerm :: HTerm -> Int -> String -> String
showTerm (HVar idx)     dep k = show (dep - idx - 1) ++ k
showTerm (HLam bod)     dep k = 'λ' : showTerm (bod (HVar dep)) (dep+1) k
showTerm (HApp fun arg) dep k = '(' : showTerm fun dep (' ' : showTerm arg dep (')':k))
showTerm (HSub _)       _   _ = error "*"

doShowTerm :: HTerm -> String
doShowTerm term = showTerm term 0 []

-- Equality
-- --------

eq :: HTerm -> HTerm -> Int -> Bool
eq (HLam aBod)      (HLam bBod)      dep = eq (aBod (HVar dep)) (bBod (HVar dep)) (dep+1)
eq (HApp aFun aArg) (HApp bFun bArg) dep = eq aFun bFun dep && eq aArg bArg dep
eq (HVar aIdx)      (HVar bIdx)      _   = aIdx == bIdx
eq _                _                _   = False

-- Evaluation
-- ----------

wnf :: HTerm -> HTerm
wnf (HLam bod)     = HLam bod
wnf (HApp fun arg) = app (wnf fun) arg
wnf (HVar idx)     = HVar idx
wnf (HSub val)     = HSub val

app :: HTerm -> HTerm -> HTerm
app (HLam bod)     x = wnf (bod (wnf x))
app (HApp fun arg) x = HApp (HApp fun arg) x
app (HVar idx)     x = HApp (HVar idx) x
app (HSub val)     x = HApp (HSub val) x

-- Normalization
-- -------------

nf :: HTerm -> HTerm
nf term = case wnf term of
  HLam bod     -> HLam $ \x -> nf (bod (HSub x))
  HApp fun arg -> HApp (nf fun) (nf arg)
  HVar idx     -> HVar idx
  HSub val     -> val

-- Main
-- ----

a :: HTerm
a = HLam $ \t -> HApp (HApp t (HVar 1)) (HVar 2)

b :: HTerm
b = HLam $ \t -> HApp (HApp t (HVar 2)) (HVar 1)

r :: HTerm
r = HLam $ \x -> HApp x (HLam $ \a -> HLam $ \b -> HLam $ \t -> HApp (HApp t b) a)

-- Solve `?x` in `λaλb(?x λt(t a b)) == λaλbλt(t b a)`
main :: IO ()
main = forM_ [0..2^25-1] $ \i -> do
  let bs = bits 25 i
  case doParseHTerm bs of
    Nothing -> do
      return ()
    Just x -> do
      let solved = eq (nf (HApp x a)) b 0
      -- putStrLn $ show bs
      -- putStrLn $ doShowTerm x
      -- putStrLn $ doShowTerm (nf x)
      -- putStrLn $ show solved
      -- putStrLn $ "----------"
      when solved $ do
        putStrLn (doShowTerm x)
        exitSuccess


================================================
FILE: examples/enum_lam_naive_blc.hvm
================================================
// This is the HVM version of the naive λ-Calculus enumerator. It superposes all
// binary λ-calculus strings, parses, and applies to the equation we want to
// solve. Despite the use of superpositions, this performs about the same as the
// Haskell version, since HVM is forced to enumerate all terms anyway, and not a
// lot of sharing is possible. This takes about 32 million interactions. A
// better approach is provided in the lambda_enumerator_optimal.hvml file, which
// brings this number down to just 72k interactions.
// UPDATE: actually - by just avoiding the issue depicted on:
// https://gist.github.com/VictorTaelin/fb798a5bd182f8c57dd302380f69777a
// We can bring this naive BLC enumerator down to 1.7m interactions. Not quite
// as fast as 72k, but this makes it ~37x faster than the Haskell version.
// UPDATE: somehow this needs only ~111k interactions after using a lazy @and...

data List {
  #Nil
  #Cons{head tail}
}

data Bits {
  #O{pred}
  #I{pred}
  #E
}

data Term {
  #Lam{bod}
  #App{fun arg}
  #Var{idx}
}

data pair {
  #Pair{fst snd}
}

data Maybe {
  #None
  #Some{value}
}

// Prelude
// -------

@if(c t f) = ~ c {
  0: f
  _: t
}

@when(c t) = ~ c {
  0: *
  _: t
}

@tail(xs) = ~ xs {
  #Nil: *
  #Cons{h t}: t
}

@and(a b) = ~ a !b {
  0: 0
  _: b
}

// Parsing
// -------

@do_parse_term(src) =
  ! &0{src term} = @parse_term(src)
  @do_bind_term(term)

@parse_term(src) = ~src {
  #O{src}:
    ! &0{src nat} = @parse_nat(src)
    &0{src #Var{nat}}
  #I{src}: ~src {
    #O{src}:
      ! &0{src bod} = @parse_term(src)
      &0{src #Lam{bod}}
    #I{src}:
      ! &0{src fun} = @parse_term(src)
      ! &0{src arg} = @parse_term(src)
      &0{src #App{fun arg}}
    #E: * 
  }
  #E: *
}

@parse_nat(src) = ~src {
  #O{src}: &0{src 0}
  #I{src}:
    ! &0{src nat} = @parse_nat(src)
    &0{src (+ 1 nat)}
  #E: &0{#E 0}
}

// Binding
// -------

@do_bind_term(term) =
  ! &0{ctx term} = @bind_term(term #Nil)
  term

@bind_term(term ctx) = ~term !ctx {
  #Lam{bod}:
    ! &0{ctx bod} = @bind_term(bod #Cons{#Some{$x} ctx})
    &0{@tail(ctx) #Lam{λ$x bod}}
  #App{fun arg}:
    ! &0{ctx fun} = @bind_term(fun ctx)
    ! &0{ctx arg} = @bind_term(arg ctx)
    &0{ctx #App{fun arg}}
  #Var{idx}: @get(idx ctx)
}

@get(idx ctx) = ~ idx !ctx {
  0: ~ ctx {
    #Nil: *
    #Cons{h t}: ~ h {
      #None: *
      #Some{x}: &0{#Cons{#None t} x}
    }
  }
  1+p: ~ ctx {
    #Nil: *
    #Cons{h t}:
      ! &0{t x} = @get(p t)
      &0{#Cons{h t} x}
  }
}

// Stringification
// ---------------

@show_nat(nat) = ~nat { 
  0: λk #Cons{'Z' k}
  1+p: λk #Cons{'S' (@show_nat(p) k)}
}

@show_dec(n r) =
  ! &{n n0} = n
  ! &{n n1} = n
  ! chr = (+ (% n 10) '0')
  ~ (< n0 10) !chr !r {
    0: @show_dec((/ n1 10) #Cons{chr r})
    _: #Cons{chr r}
  }

@do_show_dec(n) = @show_dec(n #Nil)

@show_bits(bits) = ~bits {
  #O{pred}: λk #Cons{'#' #Cons{'O' #Cons{'{' (@show_bits(pred) #Cons{'}' k})}}}
  #I{pred}: λk #Cons{'#' #Cons{'I' #Cons{'{' (@show_bits(pred) #Cons{'}' k})}}}
  #E: λk #Cons{'#' #Cons{'E' k}}
}

@do_show_bits(bits) = (@show_bits(bits) #Nil)

@show_term(term dep) = ~term !dep {
  #Var{idx}: λk
    @show_dec((- (- dep idx) 1) k)
  #Lam{bod}: λk
    !&0{d0 d1}=dep
    #Cons{'λ' (@show_term((bod #Var{d0}) (+ d1 1)) k)}
  #App{fun arg}: λk
    !&0{d0 d1}=dep
    #Cons{'(' (@show_term(fun d0)
    #Cons{' ' (@show_term(arg d1)
    #Cons{')' k})})}
}

@do_show_term(term) = (@show_term(term 0) #Nil)

// Equality
// --------

@eq(a b dep) = ~ @wnf(a) !b !dep {
  #Lam{a_bod}: ~ @wnf(b) !dep {
    #Lam{b_bod}:
      !&1{dep d0}=dep
      !&1{dep d1}=dep
      !&1{dep d2}=dep
      @eq((a_bod #Var{d0}) (b_bod #Var{d1}) (+ 1 d2))
    #App{b_fun b_arg}: 0
    #Var{b_idx}: 0
  }
  #App{a_fun a_arg}: ~ b !dep {
    #Lam{b_bod}: 0
    #App{b_fun b_arg}:
      !&1{dep d0}=dep
      !&1{dep d1}=dep
      @and(@eq(a_fun b_fun d0) @eq(a_arg b_arg d1))
    #Var{b_idx}: 0
  }
  #Var{a_idx}: ~ b !dep {
    #Lam{b_bod}: 0
    #App{b_fun b_arg}: 0
    #Var{b_idx}: (== a_idx b_idx)
  }
}

// Evaluation
// ----------

@wnf(term) = ~ term { 
  #Lam{bod}: #Lam{bod}
  #App{fun arg}: @app(@wnf(fun) arg)
  #Var{idx}: #Var{idx}
}

@app(f x) = ~ f !x {
  #Lam{bod}: @wnf((bod @wnf(x)))
  #App{fun arg}: #App{#App{fun arg} x}
  #Var{idx}: #App{#Var{idx} x}
}

// Enumeration
// -----------

// Enums all Bins of given size (label 1)
@all1(s) = ~s{
  0: #E
  1+p: !&2{p0 p1}=p &2{
    #O{@all1(p0)}
    #I{@all1(p1)}
  }
}

// Tests
// -----

//A= λt (t ^1 ^2)
//A= λ((Z SZ) SSZ)
@A = #Lam{λt #App{#App{t #Var{1}} #Var{2}}}

//B= λt (t ^2 ^1)
//B= λ((Z SSZ) SZ)
@B = #Lam{λt #App{#App{t #Var{2}} #Var{1}}}

//R= λx (x λa λb λt (t b a))
//R= λ(Z λλλ((SSSZ SSZ) SZ))
@R = #Lam{λx #App{x #Lam{λa #Lam{λb #Lam{λt #App{#App{t b} a}}}}}}

@X = @all1(25)

// Solve `?x` in `λaλb(?x λt(t a b)) == λaλbλt(t b a)`
@main =
  ! &5{x0 x1} = @do_parse_term(@X)
  ! solved    = @eq(#App{x0 @A} @B 0) // (?x A) == B
  @when(solved @do_show_term(x1))


================================================
FILE: examples/enum_lam_smart.hvm
================================================
// An Optimal λ-Calculus Enumerator for Program Search
// ---------------------------------------------------
// This file shows a template on how to enumerate superposed terms in a
// higher-order language (here, the affine λ-Calculus) for proof search.
// Instead of generating the source syntax (like superposing all binary strings
// and parsing it as a binary λ-calculus), we create a superposition of all
// λ-terms *directly*, in a way that head constructors are emitted as soon as
// possible. This allows HVM to prune branches and backtrack efficiently as it
// computes the application of all λ-terms to some expression. The result is a
// reduction in the interactions needed to solve the equation:
// > (?X λt(t 1 2)) == λt(t 2 1)
// From ~32 million to just 76k, a 420x speedup*, which increases the harder the
// problem is. This generator works by synthesizing a λ-term in layers. On each
// layer, we either generate a lambda and extend a context, or select one of the
// variables in the context to return. When we select a variable, we will apply
// it to either 0, 1 or 2 other variables in the context (we don't generate
// terms with >2 arity apps here). This is not ideal; in a typed version, we
// would be able to tell the arity of the context variable, and generate the
// right amount of applications, without making a guess.
// * NOTE: we're actually able to bring the naive approach down to 1.7 million
// interactions. So, the numbers are:
// - Enum binary λC with loops (Haskell): 0.992s 
// - Enum binary λC with sups (HVM): 0.026s (38x speedup)
// - Enum λC directly with sups (HVM): 0.0011s (862x speedup)
// The main contribution of this file is on the shape of the superposer. There
// are a few things that one must get right to achieve the desired effect.
// First, how do we split a linear context? It depends: when generating an
// application like `(f ?A ?B)`, we need to pass the context to `?A`, get the
// leftover, and pass it to `?B`, in such a way that `?A` and `?B` won't use the
// same variable twice. This happens within the same "universe". Yet, when
// making a choice, like "do we return a lambda or a variable here", we need to
// clone the linear context with the same label forked the universe itself,
// allowing a variable to be used more than once, as long as its occurrences are
// in different universes. Handling this correctly is very subtle, which is why
// this file can be useful for study.
// Second, how do we handle labels? As discussed recently on Discord:
// https://discord.com/channels/912426566838013994/915345481675186197/1311434500911403109
// We only need one label to fully enumerate all natural numbers. Yet, that
// doesn't work for binary trees. My conclusion is that we need to "fork" the
// label whenever we enumerate a constructor that branches; i.e., that has more
// than one field. Nats and Bits are safe because their constructors only have
// one field, but a binary Tree needs forking. To fork a label, we just mul by 2
// and add 0 or 1, and the seed has to be 1, so that forked branches never use
// the same label. We apply this here to the arity-2 app case.
// Third, how do we emit constructors as soon as possible, while still passing a
// context down? It is easy to accidentally mess this up by making the enum
// monadic. This will cause it to sequentialize its execution, meaning no ctor
// is emitted until the entire enumerator returns. That's a big problem, since
// we need head ctors to be available as soon as possible. That's how HVM is
// able to invalidate universes and backtrack. While this is a silly issue, it
// can spoil the whole thing, so I've elaborated it here:
// https://gist.github.com/VictorTaelin/fb798a5bd182f8c57dd302380f69777a
// The enumerator in this file is the simplest "template" enumerator that has
// everything a higher order language needs and is structured in a way that can
// be studied and extended with more sophisticate approaches, like types.
// EDIT: the dependently typed version has been pushed. It reduces the rewrite
// count to 3k, and greatly improves the enumerator shape.

data List {
  #Nil
  #Cons{head tail}
}

data Bits {
  #O{pred}
  #I{pred}
  #E
}

data Term {
  #Lam{bod}
  #App{fun arg}
  #Var{idx}
  #Sub{val}
}

data pair {
  #Pair{fst snd}
}

data Result {
  #Result{src val}
}

data Maybe {
  #None
  #Some{value}
}

// Prelude
// -------

@if(c t f) = ~ c {
  0: f
  _: t
}

@when(c t) = ~ c {
  0: *
  _: t
}

@tail(xs) = ~ xs {
  #Nil: *
  #Cons{h t}: t
}

@and(a b) = ~ a !b {
  0: 0
  _: b
}

@unwrap(mb) = ~ mb {
  #None: *
  #Some{x}: x
}

@tm0(x) = !&0{a b}=x a
@tm1(x) = !&0{a b}=x b

// Stringification
// ---------------

@show_nat(nat) = ~nat { 
  0: λk #Cons{'Z' k}
  1+p: λk #Cons{'S' (@show_nat(p) k)}
}

@show_dec(&n r) =
  ! chr = (+ (% n 10) '0')
  ~ (< n 10) !chr !r {
    0: @show_dec((/ n 10) #Cons{chr r})
    _: #Cons{chr r}
  }

@do_show_dec(n) = @show_dec(n #Nil)

@show_bits(bits) = ~bits {
  #O{pred}: λk #Cons{'#' #Cons{'O' #Cons{'{' (@show_bits(pred) #Cons{'}' k})}}}
  #I{pred}: λk #Cons{'#' #Cons{'I' #Cons{'{' (@show_bits(pred) #Cons{'}' k})}}}
  #E: λk #Cons{'#' #Cons{'E' k}}
}

@do_show_bits(bits) = (@show_bits(bits) #Nil)

@show_term(term dep) = ~term !&dep {
  #Var{idx}: λk
    @show_dec((- (- dep idx) 1) k)
  #Lam{bod}: λk
    #Cons{'λ' (@show_term((bod #Var{dep}) (+ dep 1)) k)}
  #App{fun arg}: λk
    #Cons{'(' (@show_term(fun dep)
    #Cons{' ' (@show_term(arg dep)
    #Cons{')' k})})}
  #Sub{val}: *
}

@do_show_term(term) = (@show_term(term 0) #Nil)

// Equality
// --------

@eq(a b dep) = ~ a !b !dep {
  #Lam{a_bod}: ~ b !&dep {
    #Lam{b_bod}:
      @eq((a_bod #Var{dep}) (b_bod #Var{dep}) (+ 1 dep))
    else: 0
  }
  #App{a_fun a_arg}: ~ b !&dep {
    #App{b_fun b_arg}:
      @and(@eq(a_fun b_fun dep) @eq(a_arg b_arg dep))
    else: 0
  }
  #Var{a_idx}: ~ b !&dep {
    #Var{b_idx}: (== a_idx b_idx)
    else: 0
  }
  #Sub{a_val}: *
}

// Evaluation
// ----------

@wnf(term) = ~ term { 
  #Lam{bod}: #Lam{bod}
  #App{fun arg}: @wnf_app(@wnf(fun) arg)
  #Var{idx}: #Var{idx}
  #Sub{val}: #Sub{val}
}

@wnf_app(f x) = ~ f !x {
  #Lam{bod}: @wnf((bod @wnf(x)))
  #App{fun arg}: #App{#App{fun arg} x}
  #Var{idx}: #App{#Var{idx} x}
  #Sub{val}: #App{#Sub{val} x}
}

// Normalization
// -------------

@nf(term) = ~ @wnf(term) {
  #Lam{bod}: #Lam{λx @nf((bod #Sub{x}))}
  #App{fun arg}: #App{@nf(fun) @nf(arg)}
  #Var{idx}: #Var{idx}
  #Sub{val}: val
}

// Enumeration
// -----------

// Enumerates affine λ-terms.
// - lim: max context length (i.e., nested lambdas)
// - lab: superposition label. should be 1 initially.
// - ctx: the current scope. should be [] initially.
// If the binder limit has been reached, destroy this universe.
// Otherwise, make a choice.
// - A. We generate a fresh lambda.
// - B. We select a variable from context.
// Note that, every time we make a choice, we "fork" the current context by
// using DUP nodes with the same label that we used in the choice SUP node.
@all(!&L !&lim ctx) = ~lim {
  0: *
  1+&lim:
    !&L{ctxL ctxR} = ctx
    &L{
      @lam(L lim ctxL)
      @ret(L (+ lim 1) ctxR λk(k))
    }
}

// Generate a fresh lambda and extend the context with its variable.
@lam(!&L !&lim ctx) =
  !&0{ctx bod} = @all(L lim #Cons{#Some{$x} ctx})
  &0{@tail(ctx) #Lam{λ$x(bod)}}

// Return a variable from the context.
// If the context is empty, destroy this universe.
// Otherwise, make a choice.
// - A. We emit the head of the context, and apply it to things.
// - B. We keep the head of the context, and go to the next element.
@ret(!&L !&lim ctx rem) = ~ctx {
  #Nil: *
  #Cons{val ctx}:
    !&L{remL remR} = rem
    !&L{valL valR} = val
    !&L{ctxL ctxR} = ctx
    &L{
      @app(L lim (remL #Cons{#None ctxL}) valL)
      @ret(L lim ctxR λk(remR #Cons{valR k}))
    }
}

// To apply a value to things, we will make a triple choice.
// - A. Just return it directly.
// - B. Apply it to 1 argument.
// - C. Apply it to 2 arguments.
// When we apply it to 2 arguments, as in `(App ?A ?B)`, we need to fork the
// label, so that DUPs/SUPs in `?A` and `?B` never use the same label.
@app(!&L !&lim ctx val) = ~ val {
  #None: *
  #Some{val}: 
    !&L{val val0} = val
    !&L{val val1} = val
    !&L{val val2} = val
    !&L{ctx ctx0} = ctx
    !&L{ctx ctx1} = ctx
    !&L{ctx ctx2} = ctx
    ! arity_0 =
      &0{ctx0 val0}
    ! arity_1 = 
      !&0{ctx1 argX} = @all(L lim ctx1)
      &0{ctx1 #App{val1 argX}}
    ! arity_2 =
      !&0{ctx2 argX} = @all((+(* L 2) 0) lim ctx2)
      !&0{ctx2 argY} = @all((+(* L 2) 1) lim ctx2)
      &0{ctx2 #App{#App{val2 argX} argY}}
    &L{arity_0 &L{arity_1 arity_2}}
}

// Tests
// -----

//A= λt (t ^1 ^2)
//A= λ((Z SZ) SSZ)
@A = #Lam{λt #App{#App{t #Var{1}} #Var{2}}}

//B= λt (t ^2 ^1)
//B= λ((Z SSZ) SZ)
@B = #Lam{λt #App{#App{t #Var{2}} #Var{1}}}

//R= λx (x λa λb λt (t b a))
//R= λ(Z λλλ((SSSZ SSZ) SZ))
@R = #Lam{λx #App{x #Lam{λa #Lam{λb #Lam{λt #App{#App{t b} a}}}}}}

//X= (all terms)
@X = @tm1(@all(1 5 #Nil))

// Solves for `?X` in `(?X λt(t A B)) == λt(t B A)`.
// It finds `?X = λλ(1 λλ((2 0) 1))` in 76k interactions.
@main =
  ! solved = @eq(@nf(#App{@X @A}) @B 0)
  @when(solved @do_show_term(@X))


================================================
FILE: examples/enum_nat.hvm
================================================
// This shows how to use a 'pseudo-metavar' to invert the binary add function,
// and solve the equation: 'X * 20 = 5000'. Run it with collapse mode:
// $ hvml run pseudo_metavar_nat.hvml -C -s

// Unary Peano Nats
data Nat { #Z #S{pred} }

// If-Then-Else
@if(b t f) = ~b {
  0: f
  _: t
}

// Converts an U32 to a Nat
@nat(n) = ~n{
  0: #Z
  1+p: #S{@nat(p)}
}

// Converts a Nat to an U32
@u32(n) = ~n{
  #Z: 0
  #S{np}: (+ 1 @u32(np))
}

// Adds two Nats
@add(a b) = ~a !b {
  #Z: b
  #S{ap}: #S{@add(ap b)}
}

// Muls two Nats
@mul(a b) = ~a !b {
  #Z: #Z
  #S{ap}: !&1{b0 b1}=b @add(b0 @mul(ap b1))
}

// Compares two Nats for equality
@eq(a b) = ~a !b {
  #Z: ~b{
    #Z: 1
    #S{bp}: 0
  }
  #S{ap}: ~b{
    #Z: 0
    #S{bp}: @eq(ap bp)
  }
}

// A superposition of all Nats (pseudo-metavar)
@X = &0{#Z #S{@X}}

// Solves 'X * 20 = 20000'
@main = @if(@eq(@mul(@X @nat(20)) @nat(12000)) @u32(@X) *)

// This is quadratic. In the post below, I discuss solutions to make it linear:
// https://gist.github.com/VictorTaelin/93c327e5b4e752b744d7798687977f8a
// These solutions are implemented on the branches:
// - oportunistic_swaps
// - unordered_superpositions
// Sadly they don't work as I expected in all cases. More clarity is needed.


================================================
FILE: examples/enum_path_finder.hvm
================================================
// Simple path finding with superpositions

// Lists
data List { #Nil #Cons{head tail} }

// Directions (Left/Right/Up/Down)
data Dir { #L #R #U #D }

// Collapses an universe when c=0
@when(!c t) =
  ~ c {
    0: *
    _: t
  }

// Decrements/increments a number
@dec(x) = ~ x { 0:*; 1+x:x }
@inc(x) = (+ x 1)

// Swaps an element in an array
@swap(xs i v) = ~ i !xs !v {
  0: ~ xs {
    #Nil: *
    #Cons{x xs}:
      &0{#Cons{v xs} x}
  }
  1+i: ~ xs {
    #Nil: *
    #Cons{x xs}:
      ! &0{xs v} = @swap(xs i v)
      &0{#Cons{x xs} v}
  }
}

// Swaps an element in a 2D grid
@swap2D(xs pos v) =
  ! &0{i  j } = pos
  ! & {i0 i1} = i
  ! & {j0 j1} = j
  ! &0{xs ys} = @swap(xs j0 *)
  ! &0{ys k } = @swap(ys i0 *)
  ! &0{ys _ } = @swap(ys i1 v)
  ! &0{xs _ } = @swap(xs j1 ys)
  &0{xs k}

// Moves a position to a direction
@move(pos dir) =
  ! &0{x y} = pos
  ~ dir !x !y {
    #L: &0{@dec(x) y}
    #R: &0{@inc(x) y}
    #U: &0{x @dec(y)}
    #D: &0{x @inc(y)}
  }

@diff(&a &b) =
  ~ (< a b) {
    0: (- a b)
    _: (- b a)
  }

@gdist(pos goal) =
  ! &0{px py} = pos
  ! &0{gx gy} = goal
  ! dx = @diff(px gx)
  ! dy = @diff(py gy)
  (+ dx dy)

@closer(prev curr &goal) =
  ! d_prev = @gdist(prev goal) // distance from prev → goal
  ! d_curr = @gdist(curr goal) // distance from cur  → goal
  (< d_curr d_prev)

!@walk(map &pos !path &goal) =
  ~ path {
    #Nil:
      pos
    #Cons{dir path}:
      ! &new_pos    = @move(pos dir)
      ! &0{map got} = @swap2D(map new_pos 0) // <- 0 means we don't replace the prev tile by a wall, making it much harder
      ~ got {
        0: ↑@walk(map new_pos path goal) // <- generates a weird path? and ↓ hangs...
          //~ @closer(pos new_pos goal) !map !path {
            //0: ↓↓@walk(map new_pos path goal)
            //_: ↓@walk(map new_pos path goal)
          //}
        _: *
      }
  }

@map = [
  [0 1 0 1 0 0 0 0 0]
  [0 1 0 1 0 1 1 1 0]
  [0 0 0 1 0 0 0 1 0]
  [0 1 1 1 0 1 1 0 0]
  [0 0 0 0 0 1 0 1 0]
  [0 1 1 1 0 1 0 1 0]
  [0 1 0 0 0 1 0 1 0]
  [0 1 0 1 1 1 0 1 0]
  [0 0 0 0 0 0 0 1 0]
]

// Superposition of all possible paths
@dirs(&L) = ~ L {
  0: #Nil
  1+&L: &1{#Nil &1{#Cons{#L @dirs(L)} &1{#Cons{#R @dirs(L)} &1{#Cons{#U @dirs(L)} #Cons{#D @dirs(L)}}}}}
}

// Finds a path from (0,0) to (8,8)
@main =
  ! &lim = 32
  ! &ini = &0{0 0}
  ! &end = &0{4 0}
  ! &0{i j} = @walk(@map ini @dirs(lim) end)
  ! &0{I J} = end
  @when((& (== i I) (== j J)) @dirs(lim))
  //@when((& (== i I) (== j J)) 1)


================================================
FILE: examples/enum_primes.hs
================================================
-- //./pseudo_metavar_factors.hvml//

import Control.Monad (forM_, when)
import Data.Time.Clock (getCurrentTime, diffUTCTime)
import System.Exit (exitSuccess)
import Text.Printf (printf)

data Bin = O Bin | I Bin | E deriving (Show, Eq)

u32 :: Bin -> Int
u32 (O p) = 2 * u32 p + 0
u32 (I p) = 2 * u32 p + 1
u32 E     = 0

bin :: Int -> Int -> Bin
bin 0 _ = E
bin s n = case n `mod` 2 of
  0 -> O (bin (s-1) (n `div` 2))
  _ -> I (bin (s-1) (n `div` 2))

eq :: Bin -> Bin -> Bool
eq E     E     = True
eq (O a) (O b) = eq a b
eq (I a) (I b) = eq a b
eq _     _     = False

inc :: Bin -> Bin
inc (O p) = I p
inc (I p) = O (inc p)
inc E     = E

add :: Bin -> Bin -> Bin
add (O a) (O b) = O (add a b)
add (O a) (I b) = I (add a b)
add (I a) (O b) = I (add a b)
add (I a) (I b) = O (inc (add a b))
add E     b     = E
add a     E     = E

mul :: Bin -> Bin -> Bin
mul _ E     = E
mul a (O b) = O (mul a b)
mul a (I b) = add a (O (mul a b))

cat :: Bin -> Bin -> Bin
cat (O a) b = O (cat a b)
cat (I a) b = I (cat a b)
cat E     b = b

k = 14
h = 15
s = 30
p = I(I(I(O(O(O(I(I(O(I(O(I(O(O(O(O(I(O(I(I(O(O(I(O(O(I(O(I(O(I(E))))))))))))))))))))))))))))))

--INJECT--

main :: IO ()
main = do
  start <- getCurrentTime
  forM_ [0..2^h-1] $ \a -> do
    forM_ [0..2^h-1] $ \b -> do
      let binA = cat (bin h a) (bin h 0)
      let binB = cat (bin h b) (bin h 0)
      when (eq (mul binA binB) p) $ do
        end <- getCurrentTime
        let duration = diffUTCTime end start
        putStrLn $ "FACT: " ++ show a ++ " " ++ show b
        putStrLn $ "TIME: " ++ printf "%.7f seconds" (realToFrac duration :: Double)
        exitSuccess


================================================
FILE: examples/enum_primes.hvm
================================================
// Bitstrings
data Bin { #O{pred} #I{pred} #E }

// If-Then-Else
@if(b t f) = ~b {
  0: f
  _: t
}

// Converts a Bin to an U32
@u32(b) = ~b{
  #O{p}: (+ (* 2 @u32(p)) 0)
  #I{p}: (+ (* 2 @u32(p)) 1)
  #E: 0
}

// Converts an U32 to a Bin of given size
@bin(s n) = ~s{
  0: #E
  1+p: !&0{n0 n1}=n ~(% n0 2) !p !n1 {
    0: #O{@bin(p (/ n1 2))}
    _: #I{@bin(p (/ n1 2))}
  }
}

// Bin Equality
@eq(a b) = ~a !b {
  #E: ~b {
    #O{bp}: 0
    #I{bp}: 0
    #E: 1
  }
  #O{ap}: ~b{
    #O{bp}: @eq(ap bp)
    #I{bp}: 0
    #E: 0
  }
  #I{ap}: ~b{
    #O{bp}: 0
    #I{bp}: @eq(ap bp)
    #E: 0
  }
}

// Increments a Bin
@inc(a) = ~a{
  #O{p}: #I{p}
  #I{p}: #O{@inc(p)}
  #E: #E
}

// Decrements a Bin
@dec(a) = ~a{
  #O{p}: #O{@dec(p)}
  #I{p}: #I{p}
  #E: #E
}

// Adds two Bins
@add(a b) = ~a !b {
  #O{ap}: ~b !ap {
    #O{bp}: #O{@add(ap bp)}
    #I{bp}: #I{@add(ap bp)}
    #E: #E
  }
  #I{ap}: ~b !ap {
    #O{bp}: #I{@add(ap bp)}
    #I{bp}: #O{@inc(@add(ap bp))}
    #E: #E
  }
  #E: #E
}

// Muls two Bins
@mul(a b) = ~b !a {
  #O{bp}: #O{@mul(a bp)}
  #I{bp}: !&0{a0 a1}=a @add(a0 #O{@mul(a1 bp)})
  #E: #E
}

// Concatenates two Bins
@cat(a b) = ~a !b {
  #O{ap}: #O{@cat(ap b)}
  #I{ap}: #I{@cat(ap b)}
  #E: b
}

// Enums all Bins of given size (label 1)
@all1(s) = ~s{
  0: #E
  1+p: !&1{p0 p1}=p &1{ #O{@all1(p0)} #I{@all1(p1)} }
}

// Enums all Bins of given size (label 2)
@all2(s) = ~s{
  0: #E
  1+p: !&2{p0 p1}=p &2{ #O{@all2(p0)} #I{@all2(p1)} }
}

// 8:
@K_A = 3
@H_A = 4
@S_A = 8
@X_A = @cat(@all1(@H_A) @bin(@H_A 0))
@Y_A = @cat(@all2(@H_A) @bin(@H_A 0))
@P_A = #I{#O{#O{#I{#O{#I{#O{#I{#E}}}}}}}}

// 20:
@K_B = 9
@H_B = 10
@S_B = 20
@X_B = @cat(@all1(@H_B) @bin(@H_B 0))
@Y_B = @cat(@all2(@H_B) @bin(@H_B 0))
@P_B = #I{#I{#I{#I{#O{#I{#I{#O{#I{#I{#O{#O{#I{#O{#I{#O{#O{#O{#I{#I{#E}}}}}}}}}}}}}}}}}}}}

// 30:
@K_C = 14
@H_C = 15
@S_C = 30
@X_C = @cat(@all1(@H_C) @bin(@H_C 0))
@Y_C = @cat(@all2(@H_C) @bin(@H_C 0))
@P_C = #I{#I{#I{#O{#O{#O{#I{#I{#O{#I{#O{#I{#O{#O{#O{#O{#I{#O{#I{#I{#O{#O{#I{#O{#O{#I{#O{#I{#O{#I{#E}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}

////INJECT//

@main = @if(@eq(@mul(@X_B @Y_B) @P_B) λt(t @u32(@X_B) @u32(@Y_B)) *)


================================================
FILE: examples/feat_affine_ctx.hvm
================================================
// Optimal recursive context passing with HVM's "pure mutable references"
// Article: https://gist.github.com/VictorTaelin/fb798a5bd182f8c57dd302380f69777a

data Pair { #Pair{fst snd} }
data List { #Nil #Cons{head tail} }
data Tree { #Leaf #Node{lft rgt} }

// Utils
// -----

@is_node(tree) = ~tree {
  #Leaf: 0
  #Node{lft rgt}: 1
}

@range(n r) = ~n !r {
  0: r
  1+p: !&0{p0 p1}=p @range(p0 #Cons{p1 r})
}

@fst(p) = ~p {
  #Pair{fst snd}: fst
}

@snd(p) = ~p {
  #Pair{fst snd}: snd
}

@tm0(sup) = !&0{tm0 tm1}=sup tm0
@tm1(sup) = !&0{tm0 tm1}=sup tm1

// Mutable references
@mut(ref fn) = !! $new = (fn (ref $new)) *
@spt(ref fn) = (fn λ$y(ref $z) λ$z($y))

// Slow Version
// ------------

// The slow version passes a context monadically, with a pair state.
@list_to_tree_slow(n ctx) = ~n !ctx {
  // Base Case:
  // - take the ctx's head
  // - return the context's tail and '#Leaf{head}'
  0: ~ctx {
    #Nil: *
    #Cons{head tail}: #Pair{tail #Leaf{head}}
  }
  // Step Case:
  // - recurse to the lft, get the new ctx and 'lft' tree
  // - recurse to the rgt, get the new ctx and 'rgt' tree
  // - return the final context and a '#Node{lft rgt}'
  1+p:
    !&0{p0 p1}=p
    ~ @list_to_tree_slow(p0 ctx) {
      #Pair{ctx lft}: ~ @list_to_tree_slow(p1 ctx) {
        #Pair{ctx rgt}: #Pair{ctx #Node{lft rgt}}
      }
    }
}

// Fast Version: parallel destructing
// ----------------------------------

// This version uses a superposition instead of a pair. It is faster because it
// allows us to destruct in parallel (which isn't available for native ADTs),
// preventing the sequential chaining issue.
@list_to_tree_fast_par(n ctx) = ~n !ctx {
  0: ~ctx {
    #Nil: *
    #Cons{head tail}: &0{tail #Leaf{head}}
  }
  1+p:
    ! &0{p0 p1}   = p
    ! &0{ctx lft} = @list_to_tree_fast_par(p0 ctx)
    ! &0{ctx rgt} = @list_to_tree_fast_par(p1 ctx)
    &0{ctx #Node{lft rgt}}
}

// Fast Version: mutable references
// --------------------------------

// This version passes the context as a mutable reference.
// It avoids pair entirely.
@list_to_tree_fast_mut(n ctx) = ~n !ctx {
  // Base case:
  // - mutably replace the context by its tail, and extract its head
  // - return just '#Leaf{head}' (no pairs!)
  0: 
    !! @mut(ctx λctx ~ctx { #Nil:* #Cons{$head tail}:tail })
    #Leaf{$head}
  // Step Case:
  // - split the mutable reference into two
  // - recurse to the lft and rgt, passing the split mut refs
  // - return just '#Node{lft rgt}' directly (no pairs!)
  1+p:
    !&0{pL pR}=p
    !! @spt(ctx λ$ctxL λ$ctxR *)
    #Node{
      @list_to_tree_fast_mut(pL $ctxL)
      @list_to_tree_fast_mut(pR $ctxR)
    }
}

// Main
// ----

// Tree Depth
@depth = 16

// Tests slow version
//@main = @is_node(@snd(@list_to_tree_slow(@depth (@range((<< 1 @depth) 0)))))

// Tests fast version with parallel destruct
@main = @is_node(@tm1(@list_to_tree_fast_par(@depth (@range((<< 1 @depth) 0)))))

// Tests fast version with mutable refs
//@main = @is_node(@list_to_tree_fast_mut(@depth λ$ctx(@range((<< 1 @depth) 0))))


================================================
FILE: examples/feat_cmul.hvm
================================================
@main =
  !c2_0 = λf !&0{f0 f1}=f λx(f0 (f1 x))
  !c2_1 = λf !&1{f0 f1}=f λx(f0 (f1 x))
  (c2_0 c2_1)


================================================
FILE: examples/feat_hoas.hvm
================================================
data List {
  #Nil
  #Cons{head tail}
}

data Term {
  #Var{nam}
  #Lam{nam bod}
  #App{fun arg}
  #Sub{val}
}

@cat(xs ys) = ~xs !ys {
  #Nil: ys
  #Cons{h t}: #Cons{h @cat(t ys)}
}

@join(xs) = ~xs {
  #Nil: #Nil
  #Cons{h t}: @cat(h @join(t))
}

@show(term) = ~term {
  #Var{nam}: nam
  #Lam{nam bod}: !&0{n0 n1}=nam @join(["λ" n0 " " @show((bod #Var{n1}))])
  #App{fun arg}: @join(["(" @show(fun) " " @show(arg) ")"])
  #Sub{val}: @show(val)
}

@wnf(term) = ~term {
  #Var{nam}: #Var{nam}
  #Lam{nam bod}: #Lam{nam bod}
  #App{fun arg}: @app(@wnf(fun) arg)
  #Sub{val}: @wnf(val)
}

@nf(term) = ~ @wnf(term) {
  #Var{nam}: #Var{nam}
  #Lam{nam bod}: #Lam{nam λx @nf((bod #Sub{x}))}
  #App{fun arg}: #App{@nf(fun) @nf(arg)}
  #Sub{val}: val
}

@app(f x) = ~f !x {
  #Var{nam}: #App{#Var{nam} x}
  #Lam{nam bod}: @wnf((bod @wnf(x)))
  #App{fun arg}: #App{#App{fun arg} x}
  #Sub{val}: #Var{"TODO"}
}

@ID = #Lam{"x" λx(x)}
@c2 = #Lam{"f" λf #Lam{"x" λx !&1{f0 f1}=f #App{f0 #App{f1 x}}}}
@k2 = #Lam{"g" λf #Lam{"y" λx !&2{f0 f1}=f #App{f0 #App{f1 x}}}}
@fn = #App{@c2 @k2}

@main = @show(@nf(@fn))


================================================
FILE: examples/feat_mut_ref.hvm
================================================
// Article: https://gist.github.com/VictorTaelin/fb798a5bd182f8c57dd302380f69777a

@mut(ref fn) = !! $new = (fn (ref $new)) *
@spt(ref fn) = (fn λ$y(ref $z) λ$z($y))

@main =
  ! $X = λ$x(0) // u32* X = &0;
  !! @spt($X λ$X0 λ$X1 *) // u32* X0 = X; u32* X1 = X;
  !! @mut($X0 λx(+ x 1)) // *X += 1;
  !! @mut($X1 λx(+ x 1)) // *X += 1;
  $x // *X

// The '!! x = val' notation represents a seq operator.
// It reduces 'val' to whnf and assigns the result to 'x'.
// The '!! val' notation is a shortcut for '!! _ = val'.
// The '$var' notation is for globally scoped variables.


================================================
FILE: examples/fuse_inc.hvm
================================================
// A minimal example of Optimal Evaluation (:

// Bits (Native)
data Bits {
  #O{pred}
  #I{pred}
  #E{}
}

// Repeated Application
@rep(n f x) = ~ n !f !x {
  0: x
  1+p: !&0{f0 f1}=f (f0 @rep(p f1 x))
}

// Squared Application
@sqr(n f x) = ~ n !f !x {
  0: x
  1+p:
    !&0{p0 p1}=(+ p 1)
    !&0{fA f0}=f
    !&0{f1 f2}=fA
    @sqr((/ p0 2) λk(f0 (f1 k)) @rep((% p1 2) f2 x))
}

// Bits (Scott)
@o(x) = λo λi λe (o x)
@i(x) = λo λi λe (i x)
@e    = λo λi λe e

// Bits increment
@inc(x) = λo λi λe (x i λop(o @inc(op)) e)

// Creates an all-zero Bits
@zero(s) = ~s{
  0: @e
  1+p: @o(@zero(p))
}

// Converts a Bits to an U32
@bits_to_u32(xs) = (xs
  λp0 (+ (* 2 @bits_to_u32(p0)) 0)
  λp1 (+ (* 2 @bits_to_u32(p1)) 1)
  0)

// Applies 'inc' N times to zero
@main = @bits_to_u32(@sqr(1234567 λx@inc(x) @zero(32)))


================================================
FILE: examples/fuse_inc.hvm1
================================================
// Repeated Application
(Rep 0 f x) = x
(Rep n f x) = (f (Rep (- n 1) f x))

// Squared Application
(Sqr 0 f x) = x
(Sqr n f x) = (Sqr (/ n 2) λk(f (f k)) (Rep (% n 2) f x))

// Bits (Scott-Encoded)
(O x) = λo λi λe (o x)
(I x) = λo λi λe (i x)
E     = λo λi λe e

// Bits increment
(Inc x) = λo λi λe (x i λop(o (Inc op)) e)

// Converts a Bits to a U60
(BitsToU60 x) = (x
  λp0 (+ (* 2 (BitsToU60 p0)) 0)
  λp1 (+ (* 2 (BitsToU60 p1)) 1)
  0)

(Zero 0) = E
(Zero s) = (O (Zero (- s 1)))

// Applies 'Inc' N times to zero
Main = (BitsToU60 (Sqr 1234567 λx(Inc x) (Zero 60)))


================================================
FILE: examples/fuse_mul.hvm
================================================
// Multiplication by squared addition with optimal evaluation

// Repeated Application
@rep(n f x) = ~ n !f !x {
  0: x
  1+p: !&0{f0 f1}=f (f0 @rep(p f1 x))
}

// Squared Application
@sqr(n f x) = ~ n !f !x {
  0: x
  1+p:
    !&0{p0 p1}=(+ p 1)
    !&0{fA f0}=f
    !&0{f1 f2}=fA
    @sqr((/ p0 2) λk(f0 (f1 k)) @rep((% p1 2) f2 x))
}

// Bits (Scott)
@o(x) = λo λi λe (o x)
@i(x) = λo λi λe (i x)
@e    = λo λi λe e

// Creates an all-zero Bits
@zero(s) = ~s{
  0: @e
  1+p: @o(@zero(p))
}

// U32 -> Bits
@bits(n) = ~ n {
  0: @e
  1+p: !&0{n0 n1}=(+ p 1) ~ (% n0 2) !n1 {
    0: @o(@bits((/ n1 2)))
    1: @i(@bits((/ n1 2)))
    2+p: *
  }
}

// Bits -> U32
@u32(xs) = (xs
  λp0 (+ (* 2 @u32(p0)) 0)
  λp1 (+ (* 2 @u32(p1)) 1)
  0)

// Bits increment
@inc(x) = λo λi λe (x i λop(o @inc(op)) e)

// Addition with carry
@add = λa (a
  λap λb (b λbp λap @o((@add ap bp)) λbp λap @i(     (@add ap bp))  @e ap)
  λap λb (b λbp λap @i((@add ap bp)) λbp λap @o(@inc((@add ap bp))) @e ap)
  λb b)

// Multiplication by squared addition
@mul(a b) = @u32(@sqr(a (@add @bits(b)) @zero(64)))

@main = @mul(23232 32323)

//WORK: 233357193 interactions
//TIME: 3.202 seconds
//SIZE: 1402714952 nodes
//PERF: 72.890 MIPS


================================================
FILE: examples/fuse_rot.hvm
================================================
// Relevant discussion:
// https://discord.com/channels/912426566838013994/915345481675186197/1312147864373301329

data List { #Nil #Cons{head tail} }

// Repeated Application
@rep(n f x) = ~ n !f !x {
  0: x
  1+p: !&0{f0 f1}=f (f0 @rep(p f1 x))
}

// Squared Application
@sqr(n f x) = ~ n !f !x {
  0: x
  1+p:!&0{p0 p1}=(+ p 1)
    !&0{fA f0}=f
    !&0{f1 f2}=fA
    @sqr((/ p0 2) λk(f0 (f1 k)) @rep((% p1 2) f2 x))
}

@O(a b) = λo λi (o a b)
@I(a b) = λo λi (i a b)
@L(x)   = x

@view(!&s) = ~s {
  0: λx x
  1+&p: λx (x
    λa λb [0 (@view(p) a) (@view(p) b)]
    λa λb [1 (@view(p) a) (@view(p) b)])
}

@read(!&s) = ~s {
  0: λx 0
  1+&p: λx (x
    λa λb (+ 0 (* (@read(p) a) 2))
    λa λb (+ 1 (* (@read(p) a) 2)))
}

@zero(s) = ~s {
  0: 0
  1+&p: @O(@zero(p) @zero(p))
}

@inc(s) = ~s {
  0: λx x
  1+&p: λx λo λi (x
    λa λb (i b (@inc(p) a))
    λa λb (o b (@inc(p) a)))
}

@dec(s) = ~s {
  0: λx x
  1+&p: λx λo λi (x
    λa λb (i (@dec(p) b) a)
    λa λb (o (@dec(p) b) a))
}

@neg(!&s) = ~s {
  0: λx x
  1+&p: λx λo λi (x
    λa λb (o (@neg(p) a) (@neg(p) b))
    λa λb (i (@neg(p) b) (@neg(p) a)))
}

// What this does?
@foo(!&s) = ~s {
  0: λx λy x
  1+&p: λx (x
    λxa λxb !&1{xa0 xa1}=xa !&1{xb0 xb1}=xb λy λo λi (y
      λya λyb (o (@foo(p) xa0 ya) (@foo(p) xb0 yb))
      λya λyb (i (@foo(p) xb1 yb) (@foo(p) xa1 ya)))
    λxa λxb !&1{xa0 xa1}=xa !&1{xb0 xb1}=xb λy λo λi (y
      λya λyb (i (@foo(p) xb0 yb) (@foo(p) xa0 ya))
      λya λyb (o (@foo(p) xa1 ya) (@foo(p) xb1 yb))))
}

// Removing the recursive calls to @dec/@inc makes this
// fuse; but since we can't, it doesn't, making it slow
@add(!&s) = ~s {
  0: λx λy y
  1+&p: λx (x
    λxa λxb !&1{xa0 xa1}=xa !&1{xb0 xb1}=xb λy λo λi (y
      λya λyb (o (@add(p) xa0 ya) (@add(p) xb0 yb))
      λya λyb (i (@add(p) xb1 ya) (@add(p) xa1 yb)))
    λxa λxb !&1{xa0 xa1}=xa !&1{xb0 xb1}=xb λy λo λi (y
      λya λyb (i (@add(p) xa0 yb) (@add(p) xb0 ya))
      λya λyb (o (@dec(p) (@add(p) xb1 yb)) (@inc(p) (@add(p) xa1 ya)))))
}

@S    = 32
@K(n) = @rep(n @inc(@S) @zero(@S))
@KA   = @K(17)
@KB   = @K(0)

// meh
@main = (@read(@S) @sqr(54321 (@add(@S) @KA) @KB))


================================================
FILE: examples/main.hvm
================================================
data List{ #Nil #Cons{head tail} }
data Foo{ #A #B }
data Bar{ #C #D }

data Term {
  #Var{idx}
  #Pol{bod}
  #All{inp bod}
  #Lam{bod}
  #App{fun arg}
  #U32
  #Num{val}
}

@main = ~ (#Pol{123}) {
  #Var{idx}: *
  #Pol{bod}: 123
  #All{inp bod}: *
  #Lam{bod}: *
  #App{fun arg}: *
  #U32: *
  #Num{val}: *
}

////@main = ~ (#A) {
  ////#A: 1
  ////#B: 2
////}

////@main = ~ 3 {
  ////0: 10
  ////1: 20
  ////p: p
////}

//@main = 
  //~ [1 2] {
    //#Cons{a b}: a
    //#Nil: 0
  //}

//!@foo(k a) =
  //! &0{x y} = a
  //! &1{_ _} = 1
  //! &2{_ _} = 2
  //(+ x y)
//@main = @foo(1 &0{1 2})

//data Foo {
  //#A{k}
  //#B{k}
//}

//@bar(x) = ~ x {
  //#A{k}: (+ 1 k)
  //#B{k}: (+ 2 k)
//}

//@main = @bar(*)

//@foo(n) = ~n { 0: 0 p: (+ 1 @foo(p)) }

////@main = @foo(&0{1000 1000})
//@main = &0{@foo(1000) @foo(1000)}

// ~ &0{#A{...} #B{...}} {
//  #A: ...
//  #B: ...
// }

//@foo(&0{1 2} x y)
//-----------------
//&0{x0 x1}=x
//&0{y0 y1}=y
//&0{@foo(1 x0 y0) @foo(2 x1 y1)}

//(&L{x y} arg)
//-------------
//&0{arg0 arg1}=arg
//&{(x arg0) (y arg1)}

//data Foo { #A{a b} #B{a b} #C{a b} #D{a b} }

//@main = ~ 7 {
  //0: 3
  //p: p
//}

//@main = ~ #A{1 2} {
  //#A{a b}: ["A" a b]
  //#B{a b}: ["B" a b]
  //#C{a b}: ["C" a b]
//}

//@main = λx ~ #D{1 2} {
  //#A{a b}: ["A" a b]
  //#C{a b}: ["C" a b]
  //term: term
//}

//@main = ~ #A{1 2} {
  //#A{a b}: ["A" a b]
  //x: 123
//}

//data List { #Nil #Cons{head tail} }

//@Foo = #Cons{1 @Foo}
//@main = 50

//@main = λx
  //!! @LOG(&{1 (+ x 1)})
  //123

//@main = λt(t &0{&0{1 2} &0{3 4}} &0{&0{1 2} &0{3 4}})

//λa ((a 1) 1)
//λa ((a 2) 2)
//λa ((a 3) 3)
//λa ((a 4) 4)

//@foo(&a x) = ~ x {
  //0: &a
  //p: &a
//}

//@main = @foo(7 1)

//@main = λa
  //@DUP((+ 5 7) a λx λy [x y])
  //@DUP(11 @SUP(10 50 60) λa λb [a b])

//data Tree { #Leaf #Node{lft rgt} }

//@all(s) = ~s {
  //0: #Leaf
  //p: &0{ #Leaf #Node{@all(p) @all(p)} }
//}

//@main = @all(4)

//data Tree { #Leaf #Node{lft rgt} }

//@all(s l) = ~s {
  //0:
    //#Leaf
  //p:
    //!&0{p p0}=p
    //!&0{p p1}=p
    //!&0{l l0}=l
    //!&0{l l1}=l
    //!&0{l l2}=l
    //! lL = (+ (* l1 2) 0)
    //! lR = (+ (* l2 2) 1)
    //@SUP(l0 #Leaf #Node{ @all(p0 lL) @all(p1 lR) })
//}

//@main = @all(4 1)

//@dyn(n lab) = ~n {
  //0: 0
  //p: !&0{l0 l1}=lab @SUP(l0 0 @dyn(p (+ l1 1)))
//}

//@main = @dyn(8 0)

//// Simple Function, `A -> B`
//@Fun(A B) = %f λx {(f {x::A}) :: B}

//// Dependent Function, `∀(x: A) -> (B x)`
//@All(A B) = %f λx !&0{x0 x1}=x {(f {x0::A}) :: (B x)}

//// main
//// : ∀A (A -> A) -> (A -> A)
//// = λf λx (f x)
//@main = λA
  //! &0{A A0} = A
  //! &0{A A1} = A
  //! &0{A A2} = A
  //! &0{A A3} = A
  //! typ = @Fun(@Fun(A0 A1) @Fun(A2 A3))
  //! val = λf λx (f x)
  //{ val :: typ }

//@main = λA λB
  //! &0{A A0} = A
  //! &0{A A1} = A
  //! &0{A A2} = A
  //! &0{A A3} = A
  //{  λf λx !&0{f0 f1}=f (f0 (f1 x))
  //:: @Fun(@Fun(A0 A1) @Fun(A2 A3)) }

//@main = λA λB {λx x :: @Fun(A B)}

//@main = λA λB
  //!&0{A0 A1}=A
  //{λa λb (a b) :: %f λa λb {(f {a::A0} {b::B})::A1}}

//@count(n k) = ~n !k {
  //0: k
  //p: @count(p (+ k 2))
//}

//@main = @count(2_000_000_000 0)

//data L { #N #C{h t} }

//@foo = λx λa λb λc ~x !a !b !c {
  //#N: λt(t a b c)
  //#C{H T}: λt(t a b c H T)
//}

//@bar = λx ~x {
  //0: 10
  //1+p: p
//}

////@main = (+ (+ 2 3) 4)
////@main = (@bar 10)
////@main = (@foo #C{1 #C{2 #C{3 #N}}} 10 20 30)
////@main = λx ~x { #N: 0 #C{h t}: λK(K h t) }

////@main = λt(t (+ 1 (+ 2 (+ 3 4))) (+ 1 (+ 2 (+ 3 4))))
//@main = (+ 1 2)

//@true = λt λf t
//@false = λt λf f
//@not = λb ((b @false) @true)
//@dups = ! &1{ x y } = &0{ #12{} #24{} } &0{ y x }
//@main = (+ 10 ~7{ 0 1 2 λx x })
//@main = (* 100 123)
//@main = λt(t #5{(λx x 7)} λk k)
//@main = (λa λb(λt λf t &0{a b} &0{b a}) 1 2)
//@succ = λn λs λz (s n)
//@zero = λs λz z
//@mul2 = λn (n λp(@succ (@succ (@mul2 p))) @zero)
//@main = (@mul2 (@succ (@succ (@succ @zero))))
//@foo(x y) = #0{y x}
//@main = @foo(@foo(5 8) @foo(1 2))

//data List { #Nil #Cons{head tail} }

//@sum(xs r) = ~xs{
  //#Nil: r
  //#Cons: λhead λtail @sum(tail (+ head r))
//}

//@range(n xs) = ~n{
  //0: xs
  //p: !&0{p0 p1}=p @range(p0 #Cons{p1 xs})
//}

//@main =
  //!list = #Cons{0 #Cons{1 #Cons{&0{10 20} #Cons{3 #Cons{4 #Cons{5 #Cons{6 #Cons{7 #Cons{8 #Cons{9 #Nil}}}}}}}}}}
  //@sum(list 0)

//@main = !&0{a b}=(+ 1 2) λt(t a b)


================================================
FILE: src/HVM/API.hs
================================================
module HVM.API where

import Control.DeepSeq (deepseq)
import Control.Monad (when, forM_)
import Data.Word (Word64)
import Data.List (isPrefixOf)
import Foreign.LibFFI
import GHC.Clock
import HVM.Adjust
import HVM.Collapse
import HVM.Compile
import HVM.Extract
import HVM.Foreign
import HVM.Inject
import HVM.Parse
import HVM.Reduce
import HVM.Type
import System.Exit (exitWith, ExitCode(ExitFailure))
import System.IO (readFile')
import System.IO.Error (tryIOError)
import System.Posix.DynamicLinker
import System.Process (callCommand)
import Text.Printf

import qualified Data.Map.Strict as MS

data RunMode
  = Normalize
  | Collapse (Maybe Int)
  deriving Eq

data RunStats = RunStats {
  rsItrs :: Word64,
  rsTime :: Double,
  rsSize :: Word64,
  rsPerf :: Double
}

-- Main external API for running HVM
runHVM :: FilePath -> Core -> RunMode -> IO ([Core], RunStats)
runHVM filePath root mode = do
  code   <- readFile' filePath
  book   <- doParseBook filePath code
  hvmInit
  initBook filePath book True
  (vals, stats) <- runBook book root mode True False
  hvmFree
  return (vals, stats)

-- Initializes the runtime with the definitions from a book
initBook :: FilePath -> Book -> Bool -> IO ()
initBook filePath book compiled = do
  forM_ (MS.toList (cidToAri book)) $ \(cid, ari) -> hvmSetCari cid (fromIntegral ari)
  forM_ (MS.toList (cidToLen book)) $ \(cid, len) -> hvmSetClen cid (fromIntegral len)
  forM_ (MS.toList (cidToADT book)) $ \(cid, adt) -> hvmSetCadt cid (fromIntegral adt)
  forM_ (MS.toList (fidToFun book)) $ \(fid, ((_, args), _)) -> hvmSetFari fid (fromIntegral $ length args)
  when compiled $ do
    oPath <- compileBookToBin filePath book
    dylib <- dlopen oPath [RTLD_NOW]
    forM_ (MS.keys (fidToFun book)) $ \fid -> do
      funPtr <- dlsym dylib (mget (fidToNam book) fid ++ "_f")
      hvmDefine fid funPtr
    hvmGotState <- hvmGetState
    hvmSetState <- dlsym dylib "hvm_set_state"
    callFFI hvmSetState retVoid [argPtr hvmGotState]

runBook :: Book -> Core -> RunMode -> Bool -> Bool -> IO ([Core], RunStats)
runBook book root mode compiled debug =
  withRunStats $ do
    injectRoot book root
    rxAt <- if compiled
      then return (reduceCAt debug)
      else return (reduceAt debug)
    vals <- case mode of
      Collapse limit -> do
        core <- doCollapseFlatAt rxAt book 0
        let vals = maybe id Prelude.take limit core
        vals `deepseq` return vals
      Normalize -> do
        core <- doExtractCoreAt rxAt book 0
        let vals = [core]
        vals `deepseq` return vals
    return vals

compileBookToBin :: FilePath -> Book -> IO FilePath
compileBookToBin filePath book = do
  -- Use the embedded runtime sources so compiled mode doesn't depend on CWD
  let mainC = compileBook book runtime_c
  callCommand "mkdir -p .build"
  let fName = last $ words $ map (\c -> if c == '/' then ' ' else c) filePath
  let cPath = ".build/" ++ fName ++ ".c"
  let oPath = ".build/" ++ fName ++ ".so"
  oldCFile <- tryIOError (readFile' cPath)
  when (oldCFile /= Right mainC) $ do
    writeFile cPath mainC
    callCommand $ "gcc -O2 -fPIC -flto -shared " ++ cPath ++ " -o " ++ oPath
  return oPath

injectRoot :: Book -> Core -> IO ()
injectRoot book root = do
  let (book', root') = adjust "" book root []
  doInjectCoreAt book' root' 0 []
  return ()

withRunStats :: IO a -> IO (a, RunStats)
withRunStats action = do
  init <- getMonotonicTimeNSec
  res  <- action
  end  <- getMonotonicTimeNSec
  itrs <- getItr
  size <- getLen
  let time  = fromIntegral (end - init) / (10^9) :: Double
  let mips  = (fromIntegral itrs / 1000000.0) / time
  let stats = RunStats { rsItrs = itrs, rsSize = size , rsTime = time, rsPerf = mips }
  return (res, stats)

instance Show RunStats where
  show stats = printf "WORK: %llu interactions\n" (rsItrs stats) ++
               printf "TIME: %.7f seconds\n" (rsTime stats) ++
               printf "SIZE: %llu nodes\n" (rsSize stats) ++
               printf "PERF: %.3f MIPS\n" (rsPerf stats)


================================================
FILE: src/HVM/Adjust.hs
================================================
module HVM.Adjust where

import Control.Monad
import Control.Monad.State
import Data.List (sortOn)
import Data.Word
import HVM.Type
import qualified Data.Map as MS
import Debug.Trace (trace)

-- External API
----------------

adjustBook :: Book -> Book
adjustBook book = foldr adjustFunc book (MS.toList (fidToFun book))

adjustFunc :: (Word16, Func) -> Book -> Book
adjustFunc (fid, ((cp, ars), cr)) book =
  let nam       = mget (fidToNam book) fid in
  let (b', cr') = adjust nam book cr (map snd ars) in
  let ars'      = map (\(s, n) -> (s, stripName n)) ars in
  b' { fidToFun = MS.insert fid ((cp, ars'), cr') (fidToFun b') }

adjust :: Name -> Book -> Core -> [String] -> (Book, Core)
adjust orig book term binds =
  let termA       = setRefIds (namToFid book) term
      termB       = setCtrIds (ctrToCid book) (cidToADT book) termA
      termC       = sortCases (ctrToCid book) termB
      (fr, termD) = insertDups (freshLab book) binds termC
      termE       = lexify termD
      termF       = validate orig book termE
  in (book { freshLab = fr }, termF)


-- Adjusters
-------------

-- Adds the function id to Ref constructors
setRefIds :: MS.Map String Word16 -> Core -> Core
setRefIds fids term = go term
  where
    go :: Core -> Core
    go (Var nam)         = Var nam
    go (Let m x v b)     = Let m x (go v) (go b)
    go (Lam x bod)       = Lam x (go bod)
    go (App f x)         = App (go f) (go x)
    go (Sup l x y)       = Sup l (go x) (go y)
    go (Dup l x y v b)   = Dup l x y (go v) (go b)
    go (Ctr nam fds)     = Ctr nam (map go fds)
    go (Mat k x mov css) = Mat k (go x) (map (\ (k,v) -> (k, go v)) mov) (map (\ (ctr,fds,cs) -> (ctr, fds, go cs)) css)
    go (Op2 op x y)      = Op2 op (go x) (go y)
    go (U32 n)           = U32 n
    go (Chr c)           = Chr c
    go Era               = Era
    go (Inc x)           = Inc (go x)
    go (Dec x)           = Dec (go x)
    go (Ref nam fid arg) =
      case MS.lookup nam fids of
        Just fid -> Ref nam fid (map go arg)
        Nothing  -> error $ "Unknown function: " ++ show nam


-- Adds the constructor id to Mat and IFL terms
setCtrIds :: MS.Map String Word16 -> MS.Map Word16 Word16 -> Core -> Core
setCtrIds cids adts term = go term
  where
    go :: Core -> Core
    go (Var nam)         = Var nam
    go (Let m x v b)     = Let m x (go v) (go b)
    go (Lam x bod)       = Lam x (go bod)
    go (App f x)         = App (go f) (go x)
    go (Sup l x y)       = Sup l (go x) (go y)
    go (Dup l x y v b)   = Dup l x y (go v) (go b)
    go (Ctr nam fds)     = Ctr nam (map go fds)
    go (Mat k x mov css) = Mat k' (go x) mov' css' where
      getCtr (ctr, _, _) = ctr
      mov' = map (\(k,v) -> (k, go v)) mov
      css' = map (\(ctr,fds,cs) -> (ctr, fds, go cs)) css
      k'   = case k of
        SWI   -> SWI
        MAT _ -> MAT (mget adts (mget cids (getCtr (head css))))
        IFL _ -> IFL (mget cids (getCtr (head css)))
        _     -> k
    go (Op2 op x y)      = Op2 op (go x) (go y)
    go (U32 n)           = U32 n
    go (Chr c)           = Chr c
    go Era               = Era
    go (Inc x)           = Inc (go x)
    go (Dec x)           = Dec (go x)
    go (Ref nam fid arg) = Ref nam fid (map go arg)


-- Sorts match cases by constructor ID or numeric value
sortCases :: MS.Map String Word16 -> Core -> Core
sortCases cids term = go term
  where
    go :: Core -> Core
    go (Var nam)         = Var nam
    go (Let m x v b)     = Let m x (go v) (go b)
    go (Lam x bod)       = Lam x (go bod)
    go (App f x)         = App (go f) (go x)
    go (Sup l x y)       = Sup l (go x) (go y)
    go (Dup l x y v b)   = Dup l x y (go v) (go b)
    go (Ctr nam fds)     = Ctr nam (map go fds)
    go (Mat k x mov css) = Mat k (go x) mov' css' where
      mov' = map (\(k,v) -> (k, go v)) mov
      css' = map (\(ctr,fds,bod) -> (ctr, fds, go bod)) sort
      sort = sortOn sortKey css
      sortKey (name, _, _) =
        case name of
          ('#':_) -> case MS.lookup name cids of
            Nothing -> maxBound
            Just id -> id
          _ -> case reads name of
            [(num :: Word16, "")] -> num
            _                     -> maxBound
    go (Op2 op x y)      = Op2 op (go x) (go y)
    go (U32 n)           = U32 n
    go (Chr c)           = Chr c
    go Era               = Era
    go (Inc x)           = Inc (go x)
    go (Dec x)           = Dec (go x)
    go (Ref nam fid arg) = Ref nam fid (map go arg)


-- Inserts Dup nodes for vars that have been used more than once.
-- Renames vars according to the new Dup bindings.
-- Gives fresh labels to the new Dup nodes.
insertDups :: Lab -> [String] -> Core -> (Lab, Core)
insertDups fresh binds term =
  let (term', (fresh', _)) = runState (withBinds binds term) (fresh, MS.empty)
  in (fresh', term')
  where
    go :: Core -> State (Lab, MS.Map String [String]) Core
    go (Var nam)         = do
      nam <- useVar nam
      return $ (Var nam)
    go (Let m x v b)     = do
      v <- go v
      b <- withBinds [x] b
      return $ Let m (stripName x) v b
    go (Lam x bod)       = do
      bod <- withBinds [x] bod
      return $ Lam (stripName x) bod
    go (App fun arg)     = do
      fun <- go fun
      arg <- go arg
      return $ App fun arg
    go (Sup lab tm0 tm1) = do
      tm0 <- go tm0
      tm1 <- go tm1
      return $ Sup lab tm0 tm1
    go (Dup lab x y v b) = do
      v <- go v
      b <- withBinds [x, y] b
      return $ Dup lab (stripName x) (stripName y) v b
    go (Ctr nam fds)     = do
      fds <- mapM go fds
      return $ Ctr nam fds
    go (Mat k x mov css) = do
      x   <- go x
      mov <- forM mov (\(k,v) -> do
        v <- go v
        return (k, v))
      css <- forM css (\(ctr,fds,bod) -> do
        bod <- withBinds ((map fst mov) ++ fds) bod
        return (ctr, map stripName fds, bod))
      let mov' = map (\(k,v) -> (stripName k, v)) mov
      return $ Mat k x mov' css
    go (Op2 op x y)      = do
      x <- go x
      y <- go y
      return $ Op2 op x y
    go (U32 n)           = do
      return $ U32 n
    go (Chr c)           = do
      return $ Chr c
    go Era               = do
      return Era
    go (Inc x)           = do
      x <- go x
      return $ Inc x
    go (Dec x)           = do
      x <- go x
      return $ Dec x
    go (Ref nam fid arg) = do
      arg <- mapM go arg
      return $ Ref nam fid arg

    -- Recurses on the body of a term that binds variables.
    -- Adds Dups if the new vars are used more than once.
    withBinds :: [String] -> Core -> State (Lab, MS.Map String [String]) Core
    withBinds vars term = do
      (lab, prev) <- get
      -- Add the new binds
      let bfor = foldr addVar prev vars
      put (lab, bfor)
      term <- go term
      term <- foldM applyDups term vars
      -- Remove the new binds
      (lab, aftr) <- get
      let next = foldr (restoreVar prev) (foldr remVar aftr vars) vars
      put (lab, next)
      return term
      where
        addVar var uses = MS.insert (stripName var) [] uses
        remVar var uses = MS.delete (stripName var) uses
        restoreVar old var new = 
          case MS.lookup (stripName var) old of
            Just val -> MS.insert (stripName var) val new
            Nothing  -> new

    applyDups :: Core -> String -> State (Lab, MS.Map String [String]) Core
    applyDups body var = do
      (_, uses) <- get
      let vUse = mget uses (stripName var)
      when ((head var /= '&') && (length vUse > 1)) $
        error $ "Linear variable " ++ show var ++ " used " ++ show (length vUse) ++ " times"
      case (reverse vUse) of
        [] -> do
          return body
        [_] -> do
          return body
        (name:dups) -> do
          foldM (\acc currName -> do
            label <- genFresh
            return $ Dup label name currName (Var name) acc) body dups

    genFresh :: State (Lab, MS.Map String [String]) Lab
    genFresh = do
      (lab, _) <- get
      when (lab > 0x7FFF) $ do
        error "Label overflow: generated label would be too large"
      modify (\(lab, uses) -> (lab + 1, uses))
      return $ 0x8000 + lab

    useVar :: String -> State (Lab, MS.Map String [String]) String
    useVar nam@('$':_) = do
      return nam
    useVar nam = do
      (_, uses) <- get
      case mget uses nam of
        [] -> do
          modify (\(lab, uses) -> (lab, MS.insert nam [nam] uses))
          return nam
        vUse -> do
          let dupNam = nam ++ "$dup" ++ show (length vUse)
          modify (\(lab, uses) -> (lab, MS.insert nam (dupNam : vUse) uses))
          return dupNam

-- Strip the & prefix from a non-linear variable name
-- e.g., "&x" -> "x", "x" -> "x"
stripName :: String -> String
stripName ('&':nam) = nam
stripName nam       = nam


-- Gives unique names to lexically scoped vars, unless they start with '$'.
-- Example: `λx λt (t λx(x) x)` will read as `λx0 λt1 (t1 λx2(x2) x0)`.
lexify :: Core -> Core
lexify term = evalState (go term MS.empty) 0 where
  fresh :: String -> State Int String
  fresh nam@('$':_) = return $ nam
  fresh nam         = do i <- get; put (i+1); return $ nam++"$"++show i

  extend :: String -> String -> MS.Map String String -> State Int (MS.Map String String)
  extend old@('$':_) new ctx = return $ ctx
  extend old         new ctx = return $ MS.insert old new ctx

  go :: Core -> MS.Map String String -> State Int Core
  go term ctx = case term of
    Var nam -> 
      return $ Var (MS.findWithDefault nam nam ctx)
    Ref nam fid arg -> do
      arg <- mapM (\x -> go x ctx) arg
      return $ Ref nam fid arg
    Let mod nam val bod -> do
      val  <- go val ctx
      nam' <- fresh nam
      ctx  <- extend nam nam' ctx
      bod  <- go bod ctx
      return $ Let mod nam' val bod
    Lam nam bod -> do
      nam' <- fresh nam
      ctx  <- extend nam nam' ctx
      bod  <- go bod ctx
      return $ Lam nam' bod
    App fun arg -> do
      fun <- go fun ctx
      arg <- go arg ctx
      return $ App fun arg
    Sup lab tm0 tm1 -> do
      tm0 <- go tm0 ctx
      tm1 <- go tm1 ctx
      return $ Sup lab tm0 tm1
    Dup lab dp0 dp1 val bod -> do
      val  <- go val ctx
      dp0' <- fresh dp0
      dp1' <- fresh dp1
      ctx  <- extend dp0 dp0' ctx
      ctx  <- extend dp1 dp1' ctx
      bod  <- go bod ctx
      return $ Dup lab dp0' dp1' val bod
    Ctr nam fds -> do
      fds <- mapM (\x -> go x ctx) fds
      return $ Ctr nam fds
    Mat kin val mov css -> do
      val' <- go val ctx
      mov' <- forM mov $ \ (k,v) -> do
        k' <- fresh k
        v  <- go v ctx
        return $ (k', v)
      css' <- forM css $ \ (ctr,fds,bod) -> do
        fds' <- mapM fresh fds
        ctx  <- foldM (\ ctx (fd,fd') -> extend fd fd' ctx) ctx (zip fds fds')
        ctx  <- foldM (\ ctx ((k,_),(k',_)) -> extend k k' ctx) ctx (zip mov mov')
        bod <- go bod ctx
        return (ctr, fds', bod)
      return $ Mat kin val' mov' css'
    Op2 op nm0 nm1 -> do
      nm0 <- go nm0 ctx
      nm1 <- go nm1 ctx
      return $ Op2 op nm0 nm1
    U32 n -> 
      return $ U32 n
    Chr c ->
      return $ Chr c
    Era -> 
      return Era
    Inc x -> do
      x <- go x ctx
      return $ Inc x
    Dec x -> do
      x <- go x ctx
      return $ Dec x

validate :: Name -> Book -> Core -> Core
validate orig book term = go term where
  go :: Core -> Core
  go (Var nam)         = Var nam
  go (Let m x v b)     = Let m x (go v) (go b)
  go (Lam x bod)       = Lam x (go bod)
  go (App f x)         = App (go f) (go x)
  go (Sup l x y)       = Sup l (go x) (go y)
  go (Dup l x y v b)   = Dup l x y (go v) (go b)
  go (Ctr nam fds)     =
    case MS.lookup nam (ctrToCid book) of
      Nothing ->
        error $ header ++ "Unknown constructor: " ++ show nam
      Just cid ->
        if length fds /= fromIntegral (mget (cidToAri book) cid) then
          error $ header ++ "Arity mismatch on Ctr: " ++ show (Ctr nam fds) ++ ". " ++ "Expected " ++ show (mget (cidToAri book) cid) ++ " arguments, got " ++ show (length fds)
        else
          Ctr nam (map go fds)
  go (Mat k x mov css) =
    if not uniqueCss then error $ header ++ "Duplicate match case: " ++ show (Mat k x mov css) ++ "."
    else Mat k (go x) mov' css'
    where
      mov' = map (\(k,v) -> (k, go v)) mov
      css' = map (\(ctr,fds,bod) -> (ctr, fds, go bod)) css
      ctrs = map (\(ctr, _, _) -> ctr) css
      uniqueCss = null (filter (\ctr -> length (filter (== ctr) ctrs) > 1) ctrs)
  go (Op2 op x y)      = Op2 op (go x) (go y)
  go (U32 n)           = U32 n
  go (Chr c)           = Chr c
  go Era               = Era
  go (Inc x)           = Inc (go x)
  go (Dec x)           = Dec (go x)
  go (Ref nam fid arg) =
    if not ariOk then
      error $ header ++ "Arity mismatch on Ref: " ++ show (Ref nam fid arg) ++ ". " ++ "Expected " ++ show (funArity book fid) ++ " arguments, got " ++ show (length arg)
    else
      Ref nam fid (map go arg)
    where
      ariOk = length arg == fromIntegral (funArity book fid)

  header = if null orig then "" else "In function @" ++ orig ++ ": "


================================================
FILE: src/HVM/Collapse.hs
================================================
{-./Type.hs-}
{-# LANGUAGE BangPatterns #-}

module HVM.Collapse where

import Control.Monad (ap, forM, forM_)
import Control.Monad.IO.Class
import Data.Char (chr, ord)
import Data.IORef
import Data.Bits ((.&.), xor, (.|.), complement, shiftR)
import Data.Word
import Debug.Trace
import GHC.Conc
import HVM.Foreign
import HVM.Type
import System.Exit (exitFailure)
import System.IO.Unsafe (unsafeInterleaveIO)
import qualified Data.IntMap.Strict as IM
import qualified Data.Map.Strict as MS

-- The Collapse Monad 
-- ------------------
-- See: https://gist.github.com/VictorTaelin/60d3bc72fb4edefecd42095e44138b41

-- A bit-string
data Bin
  = O Bin
  | I Bin
  | E
  deriving Show

-- A Collapse is a tree of superposed values
data Collapse a
  = CSup !Lab (Collapse a) (Collapse a)
  | CInc (Collapse a)
  | CDec (Collapse a)
  | CVal !a
  | CEra
  deriving Show

bind :: Collapse a -> (a -> Collapse b) -> Collapse b
bind k f = fork k IM.empty where
  -- fork :: Collapse a -> IntMap (Bin -> Bin) -> Collapse b
  fork CEra         paths = CEra
  fork (CVal v)     paths = pass (f v) (IM.map (\x -> x E) paths)
  fork (CInc x)     paths = CInc (fork x paths)
  fork (CDec x)     paths = CDec (fork x paths)
  fork (CSup k x y) paths =
    let lft = fork x $ IM.alter (\x -> Just (maybe (putO id) putO x)) (fromIntegral k) paths in
    let rgt = fork y $ IM.alter (\x -> Just (maybe (putI id) putI x)) (fromIntegral k) paths in
    CSup k lft rgt 
  -- pass :: Collapse b -> IntMap Bin -> Collapse b
  pass CEra         paths = CEra
  pass (CVal v)     paths = CVal v
  pass (CInc x)     paths = CInc (pass x paths)
  pass (CDec x)     paths = CDec (pass x paths)
  pass (CSup k x y) paths = case IM.lookup (fromIntegral k) paths of
    Just (O p) -> pass x (IM.insert (fromIntegral k) p paths)
    Just (I p) -> pass y (IM.insert (fromIntegral k) p paths)
    Just E     -> CSup k (pass x paths) (pass y paths)
    Nothing    -> CSup k (pass x paths) (pass y paths)
  -- putO :: (Bin -> Bin) -> (Bin -> Bin)
  putO bs = \x -> bs (O x)
  -- putI :: (Bin -> Bin) -> (Bin -> Bin) 
  putI bs = \x -> bs (I x)

instance Functor Collapse where
  fmap f (CVal v)     = CVal (f v)
  fmap f (CSup k x y) = CSup k (fmap f x) (fmap f y)
  fmap f (CInc x)     = CInc (fmap f x)
  fmap f (CDec x)     = CDec (fmap f x)
  fmap _ CEra         = CEra

instance Applicative Collapse where
  pure  = CVal
  (<*>) = ap

instance Monad Collapse where
  return = pure
  (>>=)  = bind

-- Dup Collapser
-- -------------

collapseDupsAt :: IM.IntMap [Int] -> ReduceAt -> Book -> Loc -> HVM Core

collapseDupsAt state@(paths) reduceAt book host = unsafeInterleaveIO $ do
  term <- reduceAt book host
  case termTag term of
    t | t == _ERA_ -> do
      return Era

    t | t == _LET_ -> do
      let loc = termLoc term
      let mode = modeT (termLab term)
      name <- return $ "$" ++ show (loc + 0)
      val0 <- collapseDupsAt state reduceAt book (loc + 1)
      bod0 <- collapseDupsAt state reduceAt book (loc + 2)
      return $ Let mode name val0 bod0

    t | t == _LAM_ -> do
      let loc = termLoc term
      name <- return $ "$" ++ show (loc + 0)
      bod0 <- collapseDupsAt state reduceAt book (loc + 0)
      return $ Lam name bod0

    t | t == _APP_ -> do
      let loc = termLoc term
      fun0 <- collapseDupsAt state reduceAt book (loc + 0)
      arg0 <- collapseDupsAt state reduceAt book (loc + 1)
      return $ App fun0 arg0

    t | t == _SUP_ -> do
      let loc = termLoc term
      let lab = termLab term
      case IM.lookup (fromIntegral lab) paths of
        Just (p:ps) -> do
          let newPaths = IM.insert (fromIntegral lab) ps paths
          collapseDupsAt (newPaths) reduceAt book (loc + fromIntegral p)
        _ -> do
          tm00 <- collapseDupsAt state reduceAt book (loc + 0)
          tm11 <- collapseDupsAt state reduceAt book (loc + 1)
          return $ Sup lab tm00 tm11

    t | t == _VAR_ -> do
      let loc = termLoc term
      sub <- got loc
      if termGetBit sub /= 0
      then do
        set (loc + 0) (termRemBit sub)
        collapseDupsAt state reduceAt book (loc + 0)
      else do
        name <- return $ "$" ++ show loc
        return $ Var name

    t | t == _DP0_ -> do
      let loc = termLoc term
      let lab = termLab term
      sb0 <- got (loc+0)
      if termGetBit sb0 /= 0
      then do
        set (loc + 0) (termRemBit sb0)
        collapseDupsAt state reduceAt book (loc + 0)
      else do
        let newPaths = IM.alter (Just . maybe [0] (0:)) (fromIntegral lab) paths
        collapseDupsAt (newPaths) reduceAt book (loc + 0)

    t | t == _DP1_ -> do
      let loc = termLoc term
      let lab = termLab term
      sb1 <- got (loc+0)
      if termGetBit sb1 /= 0
      then do
        set (loc + 0) (termRemBit sb1)
        collapseDupsAt state reduceAt book (loc + 0)
      else do
        let newPaths = IM.alter (Just . maybe [1] (1:)) (fromIntegral lab) paths
        collapseDupsAt (newPaths) reduceAt book (loc + 0)

    t | t == _CTR_ -> do
      let loc = termLoc term
      let lab = termLab term
      let cid = fromIntegral lab
      let nam = MS.findWithDefault "?" cid (cidToCtr book)
      let ari = mget (cidToAri book) cid
      let aux = if ari == 0 then [] else [0 .. ari-1]
      fds0 <- forM aux (\i -> collapseDupsAt state reduceAt book (loc + fromIntegral i))
      return $ Ctr nam fds0

    t | t == _MAT_ -> do
      let loc = termLoc term
      let lab = termLab term
      let cid = fromIntegral lab
      let len = fromIntegral $ mget (cidToLen book) cid
      val0 <- collapseDupsAt state reduceAt book (loc + 0)
      css0 <- forM [0..len-1] $ \i -> do
        let ctr = mget (cidToCtr book) (cid + i)
        let ari = fromIntegral $ mget (cidToAri book) (cid + i)
        let fds = if ari == 0 then [] else ["$" ++ show (loc + 1 + j) | j <- [0..ari-1]]
        bod0 <- collapseDupsAt state reduceAt book (loc + 1 + fromIntegral i)
        return (ctr, fds, bod0)
      return $ Mat (MAT cid) val0 [] css0

    t | t == _IFL_ -> do
      let loc = termLoc term
      let lab = termLab term
      let cid = fromIntegral lab
      val0 <- collapseDupsAt state reduceAt book (loc + 0)
      cs00 <- collapseDupsAt state reduceAt book (loc + 1)
      cs10 <- collapseDupsAt state reduceAt book (loc + 2)
      return $ Mat (IFL cid) val0 [] [(mget (cidToCtr book) cid, [], cs00), ("_", [], cs10)]

    t | t == _SWI_ -> do
      let loc = termLoc term
      let lab = termLab term
      let len = fromIntegral lab
      val0 <- collapseDupsAt state reduceAt book (loc + 0)
      css0 <- forM [0..len-1] $ \i -> do
        bod0 <- collapseDupsAt state reduceAt book (loc + 1 + i)
        return (show i, [], bod0)
      return $ Mat SWI val0 [] css0

    t | t == _W32_ -> do
      let val = termLoc term
      return $ U32 (fromIntegral val)

    t | t == _CHR_ -> do
      let val = termLoc term
      return $ Chr (chr (fromIntegral val))

    t | t == _OPX_ -> do
      let loc = termLoc term
      let opr = toEnum (fromIntegral (termLab term))
      nm00 <- collapseDupsAt state reduceAt book (loc + 0)
      nm10 <- collapseDupsAt state reduceAt book (loc + 1)
      return $ Op2 opr nm00 nm10

    t | t == _OPY_ -> do
      let loc = termLoc term
      let opr = toEnum (fromIntegral (termLab term))
      nm00 <- collapseDupsAt state reduceAt book (loc + 0)
      nm10 <- collapseDupsAt state reduceAt book (loc + 1)
      return $ Op2 opr nm00 nm10

    t | t == _REF_ -> do
      let loc = termLoc term
      let lab = termLab term
      let fid = fromIntegral lab
      let ari = fromIntegral (funArity book fid)
      arg0 <- forM [0..ari-1] (\i -> collapseDupsAt state reduceAt book (loc + i))
      let name = MS.findWithDefault "?" fid (fidToNam book)
      return $ Ref name fid arg0

    t | t == _INC_ -> do
      let loc = termLoc term
      val0 <- collapseDupsAt state reduceAt book (loc + 0)
      return $ Inc val0

    t | t == _DEC_ -> do
      let loc = termLoc term
      val0 <- collapseDupsAt state reduceAt book (loc + 0)
      return $ Dec val0

    tag -> do
      return $ Var "?"
      -- exitFailure

-- Sup Collapser
-- -------------

collapseSups :: Book -> Core -> Collapse Core

collapseSups book core = case core of

  Var name -> do
    return $ Var name

  Ref name fid args -> do
    args <- mapM (collapseSups book) args
    return $ Ref name fid args

  Lam name body -> do
    body <- collapseSups book body
    return $ Lam name body

  App fun arg -> do
    fun <- collapseSups book fun
    arg <- collapseSups book arg
    return $ App fun arg

  Dup lab x y val body -> do
    val <- collapseSups book val
    body <- collapseSups book body
    return $ Dup lab x y val body

  Ctr nam fields -> do
    fields <- mapM (collapseSups book) fields
    return $ Ctr nam fields

  Mat kin val mov css -> do
    val <- collapseSups book val
    mov <- mapM (\(key, expr) -> do
      expr <- collapseSups book expr
      return (key, expr)) mov
    css <- mapM (\(ctr, fds, bod) -> do
      bod <- collapseSups book bod
      return (ctr, fds, bod)) css
    return $ Mat kin val mov css

  U32 val -> do
    return $ U32 val

  Chr val -> do
    return $ Chr val

  Op2 op x y -> do
    x <- collapseSups book x
    y <- collapseSups book y
    return $ Op2 op x y

  Let mode name val body -> do
    val <- collapseSups book val
    body <- collapseSups book body
    return $ Let mode name val body

  Era -> do
    CEra

  Sup lab tm0 tm1 -> do
    let tm0' = collapseSups book tm0
    let tm1' = collapseSups book tm1
    CSup lab tm0' tm1'

  Inc val -> do
    let val' = collapseSups book val
    CInc val'

  Dec val -> do
    let val' = collapseSups book val
    CDec val'

-- Tree Collapser
-- --------------

doCollapseAt :: ReduceAt -> Book -> Loc -> HVM (Collapse Core)
doCollapseAt reduceAt book host = do
  -- namesRef <- newIORef MS.empty
  let state = (IM.empty)
  core <- collapseDupsAt state reduceAt book host
  return $ collapseSups book core

-- Simple Queue
-- ------------
-- Allows pushing to an end, and popping from another.
-- Simple purely functional implementation.
-- Includes sqPop and sqPut.

data SQ a = SQ [a] [a]

sqNew :: SQ a
sqNew = SQ [] []

sqPop :: SQ a -> Maybe (a, SQ a)
sqPop (SQ [] [])     = Nothing
sqPop (SQ [] ys)     = sqPop (SQ (reverse ys) [])
sqPop (SQ (x:xs) ys) = Just (x, SQ xs ys)

sqPut :: a -> SQ a -> SQ a
sqPut x (SQ xs ys) = SQ xs (x:ys)

-- Priority Queue
-- --------------
-- A stable min-heap implemented with a radix tree.
-- Orders by an Int priority and a unique Word64 key.
-- Based on IntPSQ from the the psqueues library (https://hackage.haskell.org/package/psqueues-0.2.8.1)

data PQ p v
    = Bin !Word64 !p v !Word64 !(PQ p v) !(PQ p v)
    | Tip !Word64 !p v
    | Nil

pqPush :: Ord p => Word64 -> p -> v -> PQ p v -> PQ p v
pqPush k1 p1 x1 t = case t of
  Nil                                      -> Tip k1 p1 x1
  (Tip k2 p2 x2)
    | (p1, k1) < (p2, k2)                  -> link k1 p1 x1 k2 (Tip k2 p2 x2) Nil
    | otherwise                            -> link k2 p2 x2 k1 (Tip k1 p1 x1) Nil
  (Bin k2 p2 x2 m l r)
    | nomatch k1 k2 m, (p1, k1) < (p2, k2) -> link k1 p1 x1 k2 (Bin k2 p2 x2 m l r) Nil
    | nomatch k1 k2 m                      -> link k2 p2 x2 k1 (Tip k1 p1 x1) (pqMerge m l r)
    | (p1, k1) < (p2, k2), zero k2 m       -> Bin k1 p1 x1 m (pqPush k2 p2 x2 l) r
    | (p1, k1) < (p2, k2)                  -> Bin k1 p1 x1 m l (pqPush k2 p2 x2 r)
    | zero k1 m                            -> Bin k2 p2 x2 m (pqPush k1 p1 x1 l) r
    | otherwise                            -> Bin k2 p2 x2 m l (pqPush k1 p1 x1 r)
  where
    nomatch :: Word64 -> Word64 -> Word64 -> Bool
    nomatch k1 k2 m =
      let maskW = complement (m-1) `xor` m
      in (k1 .&. maskW) /= (k2 .&. maskW)

    zero :: Word64 -> Word64 -> Bool
    zero i m = i .&. m == 0

    link :: Word64 -> p -> v -> Word64 -> (PQ p v) -> (PQ p v) -> (PQ p v)
    link k p x k' fst snd =
      let m = highestBitMask (k `xor` k')
      in if zero m k'
         then Bin k p x m fst snd
         else Bin k p x m snd fst

    highestBitMask :: Word64 -> Word64
    highestBitMask x1 =
      let x2 = x1 .|. x1 `shiftR` 1
          x3 = x2 .|. x2 `shiftR` 2
          x4 = x3 .|. x3 `shiftR` 4
          x5 = x4 .|. x4 `shiftR` 8
          x6 = x5 .|. x5 `shiftR` 16
          x7 = x6 .|. x6 `shiftR` 32
      in x7 `xor` (x7 `shiftR` 1)

pqPop :: Ord p => PQ p v -> Maybe (Word64, p, v, PQ p v)
pqPop t = case t of
  Nil             -> Nothing
  Tip k p x       -> Just (k, p, x, Nil)
  Bin k p x m l r -> Just (k, p, x, pqMerge m l r)

pqMerge :: Ord p => Word64 -> PQ p v -> PQ p v -> PQ p v
pqMerge m l r = case (l, r) of
  (Nil, r)                     -> r
  (l, Nil)                     -> l
  (Tip lk lp lx, Tip rk rp rx)
    | (lp, lk) < (rp, rk)      -> Bin lk lp lx m Nil r
    | otherwise                -> Bin rk rp rx m l Nil
  (Tip lk lp lx, Bin rk rp rx rm rl rr)
    | (lp, lk) < (rp, rk)      -> Bin lk lp lx m Nil r
    | otherwise                -> Bin rk rp rx m l (pqMerge rm rl rr)
  (Bin lk lp lx lm ll lr, Tip rk rp rx)
    | (lp, lk) < (rp, rk)      -> Bin lk lp lx m (pqMerge lm ll lr) r
    | otherwise                -> Bin rk rp rx m l Nil
  (Bin lk lp lx lm ll lr, Bin rk rp rx rm rl rr)
    | (lp, lk) < (rp, rk)      -> Bin lk lp lx m (pqMerge lm ll lr) r
    | otherwise                -> Bin rk rp rx m l (pqMerge rm rl rr)


-- Flattener
-- ---------

flattenDFS :: Collapse a -> [a]
flattenDFS (CSup k a b) = flatten a ++ flatten b
flattenDFS (CVal x)     = [x]
flattenDFS (CInc x)     = flattenDFS x
flattenDFS (CDec x)     = flattenDFS x
flattenDFS CEra         = []

flattenBFS :: Collapse a -> [a]
flattenBFS term = go term (sqNew :: SQ (Collapse a)) where
  go (CSup k a b) sq = go CEra (sqPut b $ sqPut a $ sq)
  go (CVal x)     sq = x : go CEra sq
  go (CInc x)     sq = go x sq
  go (CDec x)     sq = go x sq
  go CEra         sq = case sqPop sq of
    Just (v,sq) -> go v sq
    Nothing     -> []

-- Priority-Queue Flattener
-- ------------------------
-- * priority starts at 0
-- * since PQ is a min-queue, we invert the scoers:
-- * passing through (CInc t) subs 1 ; (CDec t) adds 1
-- * when no Inc/Dec are present every node has priority == depth
--   hence the order matches plain BFS exactly (stable heap).

flattenPRI :: Collapse a -> [a]
flattenPRI term = go 1 (Tip 0 0 term) where
  go i pq = case pqPop pq of
    Nothing -> []
    Just (_, pri, node, pq') -> case node of
      CEra   -> go i pq'
      CVal v -> v : go i pq'
      CInc t -> go (i + 1) (pqPush i (pri - 1) t pq')
      CDec t -> go (i + 1) (pqPush i (pri + 1) t pq')
      CSup _ a b ->
        let pq1 = (pqPush (i + 0) (pri + 1) a pq')
            pq2 = (pqPush (i + 1) (pri + 1) b pq1)
        in go (i + 2) pq2

-- Default Flattener
-- -----------------

flatten :: Collapse a -> [a]
flatten = flattenPRI

-- Flat Collapser
-- --------------

doCollapseFlatAt :: ReduceAt -> Book -> Loc -> HVM [Core]
doCollapseFlatAt reduceAt book host = do
  coll <- doCollapseAt reduceAt book host
  return $ flatten coll


================================================
FILE: src/HVM/Compile.hs
================================================
{-./../IC.md-}
{-./Type.hs-}
{-./Inject.hs-}

{-# LANGUAGE TemplateHaskell #-}

module HVM.Compile where

import Control.Monad (forM_, forM, foldM, when)
import Control.Monad.State
import Data.Bits (shiftL, (.|.))
import Data.List
import Data.Word
import Data.FileEmbed
import Debug.Trace
import HVM.Foreign hiding (fresh)
import HVM.Type
import qualified Data.Map.Strict as MS

-- The Runtime.c file content embedded in the binary
-- Embed the entire runtime as a single translation unit for compiled mode.
-- We inline Runtime.h and concatenate all .c modules with the include line removed.
runtime_h      :: String
runtime_h      = $(embedStringFile "./src/HVM/Runtime.h")
rt_state_c     :: String; rt_state_c  = $(embedStringFile "./src/HVM/runtime/state.c")
rt_heap_c      :: String; rt_heap_c   = $(embedStringFile "./src/HVM/runtime/heap.c")
rt_term_c      :: String; rt_term_c   = $(embedStringFile "./src/HVM/runtime/term.c")
rt_stack_c     :: String; rt_stack_c  = $(embedStringFile "./src/HVM/runtime/stack.c")
rt_print_c     :: String; rt_print_c  = $(embedStringFile "./src/HVM/runtime/print.c")
rt_memory_c    :: String; rt_memory_c = $(embedStringFile "./src/HVM/runtime/memory.c")
rt_reduce_c    :: String; rt_reduce_c = $(embedStringFile "./src/HVM/runtime/reduce.c")
rt_pr_SUP_c    :: String; rt_pr_SUP_c = $(embedStringFile "./src/HVM/runtime/prim/SUP.c")
rt_pr_DUP_c    :: String; rt_pr_DUP_c = $(embedStringFile "./src/HVM/runtime/prim/DUP.c")
rt_pr_LOG_c    :: String; rt_pr_LOG_c = $(embedStringFile "./src/HVM/runtime/prim/LOG.c")
rt_red_app_ctr :: String; rt_red_app_ctr = $(embedStringFile "./src/HVM/runtime/reduce/app_ctr.c")
rt_red_app_era :: String; rt_red_app_era = $(embedStringFile "./src/HVM/runtime/reduce/app_era.c")
rt_red_app_lam :: String; rt_red_app_lam = $(embedStringFile "./src/HVM/runtime/reduce/app_lam.c")
rt_red_app_sup :: String; rt_red_app_sup = $(embedStringFile "./src/HVM/runtime/reduce/app_sup.c")
rt_red_app_una :: String; rt_red_app_una = $(embedStringFile "./src/HVM/runtime/reduce/app_una.c")
rt_red_app_w32 :: String; rt_red_app_w32 = $(embedStringFile "./src/HVM/runtime/reduce/app_w32.c")
rt_red_dup_ctr :: String; rt_red_dup_ctr = $(embedStringFile "./src/HVM/runtime/reduce/dup_ctr.c")
rt_red_dup_era :: String; rt_red_dup_era = $(embedStringFile "./src/HVM/runtime/reduce/dup_era.c")
rt_red_dup_lam :: String; rt_red_dup_lam = $(embedStringFile "./src/HVM/runtime/reduce/dup_lam.c")
rt_red_dup_ref :: String; rt_red_dup_ref = $(embedStringFile "./src/HVM/runtime/reduce/dup_ref.c")
rt_red_dup_sup :: String; rt_red_dup_sup = $(embedStringFile "./src/HVM/runtime/reduce/dup_sup.c")
rt_red_dup_una :: String; rt_red_dup_una = $(embedStringFile "./src/HVM/runtime/reduce/dup_una.c")
rt_red_dup_w32 :: String; rt_red_dup_w32 = $(embedStringFile "./src/HVM/runtime/reduce/dup_w32.c")
rt_red_let     :: String; rt_red_let     = $(embedStringFile "./src/HVM/runtime/reduce/let.c")
rt_red_ref     :: String; rt_red_ref     = $(embedStringFile "./src/HVM/runtime/reduce/ref.c")
rt_red_ref_sup :: String; rt_red_ref_sup = $(embedStringFile "./src/HVM/runtime/reduce/ref_sup.c")
rt_red_mat_ctr :: String; rt_red_mat_ctr = $(embedStringFile "./src/HVM/runtime/reduce/mat_ctr.c")
rt_red_mat_era :: String; rt_red_mat_era = $(embedStringFile "./src/HVM/runtime/reduce/mat_era.c")
rt_red_mat_lam :: String; rt_red_mat_lam = $(embedStringFile "./src/HVM/runtime/reduce/mat_lam.c")
rt_red_mat_sup :: String; rt_red_mat_sup = $(embedStringFile "./src/HVM/runtime/reduce/mat_sup.c")
rt_red_mat_una :: String; rt_red_mat_una = $(embedStringFile "./src/HVM/runtime/reduce/mat_una.c")
rt_red_mat_w32 :: String; rt_red_mat_w32 = $(embedStringFile "./src/HVM/runtime/reduce/mat_w32.c")
rt_red_opx_ctr :: String; rt_red_opx_ctr = $(embedStringFile "./src/HVM/runtime/reduce/opx_ctr.c")
rt_red_opx_era :: String; rt_red_opx_era = $(embedStringFile "./src/HVM/runtime/reduce/opx_era.c")
rt_red_opx_lam :: String; rt_red_opx_lam = $(embedStringFile "./src/HVM/runtime/reduce/opx_lam.c")
rt_red_opx_sup :: String; rt_red_opx_sup = $(embedStringFile "./src/HVM/runtime/reduce/opx_sup.c")
rt_red_opx_una :: String; rt_red_opx_una = $(embedStringFile "./src/HVM/runtime/reduce/opx_una.c")
rt_red_opx_w32 :: String; rt_red_opx_w32 = $(embedStringFile "./src/HVM/runtime/reduce/opx_w32.c")
rt_red_opy_ctr :: String; rt_red_opy_ctr = $(embedStringFile "./src/HVM/runtime/reduce/opy_ctr.c")
rt_red_opy_era :: String; rt_red_opy_era = $(embedStringFile "./src/HVM/runtime/reduce/opy_era.c")
rt_red_opy_lam :: String; rt_red_opy_lam = $(embedStringFile "./src/HVM/runtime/reduce/opy_lam.c")
rt_red_opy_sup :: String; rt_red_opy_sup = $(embedStringFile "./src/HVM/runtime/reduce/opy_sup.c")
rt_red_opy_una :: String; rt_red_opy_una = $(embedStringFile "./src/HVM/runtime/reduce/opy_una.c")
rt_red_opy_w32 :: String; rt_red_opy_w32 = $(embedStringFile "./src/HVM/runtime/reduce/opy_w32.c")

stripIncl :: String -> String
stripIncl = unlines . filter (not . isPrefixOf "#include \"Runtime.h\"") . lines

-- Remove header-only pragmas that are noisy in a main TU
stripHdr :: String -> String
stripHdr = unlines . filter (not . isPrefixOf "#pragma once") . lines

runtime_c :: String
runtime_c = unlines
  [ stripHdr runtime_h
  , stripIncl rt_state_c
  , stripIncl rt_heap_c
  , stripIncl rt_term_c
  , stripIncl rt_stack_c
  , stripIncl rt_print_c
  , stripIncl rt_memory_c
  , stripIncl rt_reduce_c
  , stripIncl rt_pr_SUP_c
  , stripIncl rt_pr_DUP_c
  , stripIncl rt_pr_LOG_c
  , stripIncl rt_red_app_ctr
  , stripIncl rt_red_app_era
  , stripIncl rt_red_app_lam
  , stripIncl rt_red_app_sup
  , stripIncl rt_red_app_una
  , stripIncl rt_red_app_w32
  , stripIncl rt_red_dup_ctr
  , stripIncl rt_red_dup_era
  , stripIncl rt_red_dup_lam
  , stripIncl rt_red_dup_ref
  , stripIncl rt_red_dup_sup
  , stripIncl rt_red_dup_una
  , stripIncl rt_red_dup_w32
  , stripIncl rt_red_let
  , stripIncl rt_red_ref
  , stripIncl rt_red_ref_sup
  , stripIncl rt_red_mat_ctr
  , stripIncl rt_red_mat_era
  , stripIncl rt_red_mat_lam
  , stripIncl rt_red_mat_sup
  , stripIncl rt_red_mat_una
  , stripIncl rt_red_mat_w32
  , stripIncl rt_red_opx_ctr
  , stripIncl rt_red_opx_era
  , stripIncl rt_red_opx_lam
  , stripIncl rt_red_opx_sup
  , stripIncl rt_red_opx_una
  , stripIncl rt_red_opx_w32
  , stripIncl rt_red_opy_ctr
  , stripIncl rt_red_opy_era
  , stripIncl rt_red_opy_lam
  , stripIncl rt_red_opy_sup
  , stripIncl rt_red_opy_una
  , stripIncl rt_red_opy_w32
  ]


-- Generates the complete C code for a Book
compileBook :: Book -> String -> String
compileBook book runtime_c =
  let decls = compileHeaders book
      funcs = map (\ (fid, _) -> compile book fid) (MS.toList (fidToFun book))
  in unlines $ [runtime_c] ++ [decls] ++ funcs ++ [genMain book]

-- Compilation
-- -----------

data CompileState = CompileState
  { next :: Word64
  , tabs :: Int
  , bins :: MS.Map String String  -- var_name => binder_host
  , vars :: [(String, String)]    -- [(var_name, var_host)]
  , code :: [String]
  , reus :: MS.Map Int [String]   -- arity => [reuse_loc]
  , tco  :: Bool                  -- tail-call optimization
  }

type Compile = State CompileState

compile :: Book -> Word16 -> String
compile book fid =
  let full = compileWith compileFull book fid in
  let fast = compileWith compileFast book fid in
  if "<ERR>" `isInfixOf` fast then full else fast

-- Compiles a function using either Fast-Mode or Full-Mode
compileWith :: (Book -> Word16 -> Core -> Bool -> [(Bool,String)] -> Compile ()) -> Book -> Word16 -> String
compileWith cmp book fid = 
  let copy   = fst (fst (mget (fidToFun book) fid)) in
  let args   = snd (fst (mget (fidToFun book) fid)) in
  let core   = snd (mget (fidToFun book) fid) in
  let tco    = isTailRecursive core fid in
  let state  = CompileState 0 0 MS.empty [] [] MS.empty tco in
  let result = runState (cmp book fid core copy args) state in
  unlines $ reverse $ code (snd result)

emit :: String -> Compile ()
emit line = modify $ \st -> st { code = (replicate (tabs st * 2) ' ' ++ line) : code st }

tabInc :: Compile ()
tabInc = modify $ \st -> st { tabs = tabs st + 1 }

tabDec :: Compile ()
tabDec = modify $ \st -> st { tabs = tabs st - 1 }

bind :: String -> String -> Compile ()
bind var host = modify $ \st -> st { bins = MS.insert var host (bins st) }

fresh :: String -> Compile String
fresh name = do
  uid <- gets next
  modify $ \s -> s { next = uid + 1 }
  return $ name ++ show uid

reuse :: Int -> String -> Compile ()
reuse arity loc = modify $ \st -> st { reus = MS.insertWith (++) arity [loc] (reus st) }

-- Full Compiler
-- -------------

compileFull :: Book -> Word16 -> Core -> Bool -> [(Bool,String)] -> Compile ()
compileFull book fid core copy args = do
  emit $ "Term " ++ mget (fidToNam book) fid ++ "_f(Term ref) {"
  tabInc
  forM_ (zip [0..] args) $ \(i, arg) -> do
    argVar <- fresh "arg"
    if fst arg
      then emit $ "Term " ++ argVar ++ " = reduce_at(term_loc(ref) + " ++ show i ++ ");"
      else emit $ "Term " ++ argVar ++ " = got(term_loc(ref) + " ++ show i ++ ");"
    let argName = snd arg
    bind argName argVar
  result <- compileFullCore book fid core "root"
  st <- get
  forM_ (vars st) $ \ (var,host) -> do
    let varTerm = MS.findWithDefault "" var (bins st)
    emit $ "set(" ++ host ++ ", " ++ varTerm ++ ");"
  emit $ "return " ++ result ++ ";"
  tabDec
  emit "}"

compileFullVar :: String -> String -> Compile String
compileFullVar var host = do
  bins <- gets bins
  case MS.lookup var bins of
    Just entry -> do
      return entry
    Nothing -> do
      modify $ \s -> s { vars = (var, host) : vars s }
      return "0"

compileFullCore :: Book -> Word16 -> Core -> String -> Compile String

compileFullCore book fid Era _ = do
  return $ "term_new(ERA, 0, 0)"

compileFullCore book fid (Var name) host = do
  compileFullVar name host

compileFullCore book fid (Let mode var val bod) host = do
  letNam <- fresh "let"
  emit $ "Loc " ++ letNam ++ " = alloc_node(2);"
  valT <- compileFullCore book fid val (letNam ++ " + 0")
  emit $ "set(" ++ letNam ++ " + 0, " ++ valT ++ ");"
  bind var $ "term_new(VAR, 0, " ++ letNam ++ " + 0)"
  bodT <- compileFullCore book fid bod (letNam ++ " + 1")
  emit $ "set(" ++ letNam ++ " + 1, " ++ bodT ++ ");"
  return $ "term_new(LET, " ++ show (fromEnum mode) ++ ", " ++ letNam ++ ")"

compileFullCore book fid (Lam var bod) host = do
  lamNam <- fresh "lam"
  emit $ "Loc " ++ lamNam ++ " = alloc_node(1);"
  bind var $ "term_new(VAR, 0, " ++ lamNam ++ " + 0)"
  bodT <- compileFullCore book fid bod (lamNam ++ " + 0")
  emit $ "set(" ++ lamNam ++ " + 0, " ++ bodT ++ ");"
  return $ "term_new(LAM, 0, " ++ lamNam ++ ")"

compileFullCore book fid (App fun arg) host = do
  appNam <- fresh "app"
  emit $ "Loc " ++ appNam ++ " = alloc_node(2);"
  funT <- compileFullCore book fid fun (appNam ++ " + 0")
  argT <- compileFullCore book fid arg (appNam ++ " + 1")
  emit $ "set(" ++ appNam ++ " + 0, " ++ funT ++ ");"
  emit $ "set(" ++ appNam ++ " + 1, " ++ argT ++ ");"
  return $ "term_new(APP, 0, " ++ appNam ++ ")"

compileFullCore book fid (Sup lab tm0 tm1) host = do
  supNam <- fresh "sup"
  emit $ "Loc " ++ supNam ++ " = alloc_node(2);"
  tm0T <- compileFullCore book fid tm0 (supNam ++ " + 0")
  tm1T <- compileFullCore book fid tm1 (supNam ++ " + 1")
  emit $ "set(" ++ supNam ++ " + 0, " ++ tm0T ++ ");"
  emit $ "set(" ++ supNam ++ " + 1, " ++ tm1T ++ ");"
  return $ "term_new(SUP, " ++ show lab ++ ", " ++ supNam ++ ")"

compileFullCore book fid (Dup lab dp0 dp1 val bod) host = do
  dupNam <- fresh "dup"
  emit $ "Loc " ++ dupNam ++ " = alloc_node(1);"
  bind dp0 $ "term_new(DP0, " ++ show lab ++ ", " ++ dupNam ++ " + 0)"
  bind dp1 $ "term_new(DP1, " ++ show lab ++ ", " ++ dupNam ++ " + 0)"
  valT <- compileFullCore book fid val (dupNam ++ " + 0")
  emit $ "set(" ++ dupNam ++ " + 0, " ++ valT ++ ");"
  bodT <- compileFullCore book fid bod host
  return bodT

compileFullCore book fid (Ctr nam fds) host = do
  ctrNam <- fresh "ctr"
  let arity = length fds
  let cid = mget (ctrToCid book) nam
  emit $ "Loc " ++ ctrNam ++ " = alloc_node(" ++ show arity ++ ");"
  fdsT <- mapM (\ (i,fd) -> compileFullCore book fid fd (ctrNam ++ " + " ++ show i)) (zip [0..] fds)
  sequence_ [emit $ "set(" ++ ctrNam ++ " + " ++ show i ++ ", " ++ fdT ++ ");" | (i,fdT) <- zip [0..] fdsT]
  return $ "term_new(CTR, " ++ show cid ++ ", " ++ ctrNam ++ ")"

compileFullCore book fid tm@(Mat kin val mov css) host = do
  matNam <- fresh "mat"
  emit $ "Loc " ++ matNam ++ " = alloc_node(" ++ show (1 + length css) ++ ");"
  valT <- compileFullCore book fid val (matNam ++ " + 0")
  emit $ "set(" ++ matNam ++ " + 0, " ++ valT ++ ");"
  forM_ (zip [0..] css) $ \ (i,(ctr,fds,bod)) -> do
    let bod' = foldr (\x b -> Lam x b) (foldr (\x b -> Lam x b) bod (map fst mov)) fds
    bodT <- compileFullCore book fid bod' (matNam ++ " + " ++ show (i+1))
    emit $ "set(" ++ matNam ++ " + " ++ show (i+1) ++ ", " ++ bodT ++ ");"
  let tag = case kin of { SWI -> "SWI" ; (IFL _) -> "IFL" ; (MAT _) -> "MAT" }
  let lab = case kin of { SWI -> fromIntegral (length css) ; (IFL cid) -> cid ; (MAT cid) -> cid }
  let mat = "term_new(" ++ tag ++ ", " ++ show lab ++ ", " ++ matNam ++ ")"
  foldM (\term (key, val) -> do
    appNam <- fresh "app"
    emit $ "Loc " ++ appNam ++ " = alloc_node(2);"
    valT <- compileFullCore book fid val (appNam ++ " + 1")
    emit $ "set(" ++ appNam ++ " + 0, " ++ term ++ ");"
    emit $ "set(" ++ appNam ++ " + 1, " ++ valT ++ ");"
    return $ "term_new(APP, 0, " ++ appNam ++ ")") mat mov

compileFullCore book fid (U32 val) _ =
  return $ "term_new(W32, 0, " ++ show (fromIntegral val) ++ ")"

compileFullCore book fid (Chr val) _ =
  return $ "term_new(CHR, 0, " ++ show (fromEnum val) ++ ")"

compileFullCore book fid (Op2 opr nu0 nu1) host = do
  opxNam <- fresh "opx"
  emit $ "Loc " ++ opxNam ++ " = alloc_node(2);"
  nu0T <- compileFullCore book fid nu0 (opxNam ++ " + 0")
  nu1T <- compileFullCore book fid nu1 (opxNam ++ " + 1")
  emit $ "set(" ++ opxNam ++ " + 0, " ++ nu0T ++ ");"
  emit $ "set(" ++ opxNam ++ " + 1, " ++ nu1T ++ ");"
  return $ "term_new(OPX, " ++ show (fromEnum opr) ++ ", " ++ opxNam ++ ")"

compileFullCore book fid t@(Ref rNam rFid rArg) host = do
  checkRefAri book fid t
  refNam <- fresh "ref"
  let arity = length rArg
  emit $ "Loc " ++ refNam ++ " = alloc_node(" ++ show arity ++ ");"
  argsT <- mapM (\ (i,arg) -> compileFullCore book fid arg (refNam ++ " + " ++ show i)) (zip [0..] rArg)
  sequence_ [emit $ "set(" ++ refNam ++ " + " ++ show i ++ ", " ++ argT ++ ");" | (i,argT) <- zip [0..] argsT]
  return $ "term_new(REF, " ++ show rFid ++ ", " ++ refNam ++ ")"

compileFullCore book fid (Inc val) host = do
  incNam <- fresh "inc"
  emit $ "Loc " ++ incNam ++ " = alloc_node(1);"
  valT <- compileFullCore book fid val (incNam ++ " + 0")
  emit $ "set(" ++ incNam ++ " + 0, " ++ valT ++ ");"
  return $ "term_new(INC, 0, " ++ incNam ++ ")"

compileFullCore book fid (Dec val) host = do
  decNam <- fresh "dec"
  emit $ "Loc " ++ decNam ++ " = alloc_node(1);"
  valT <- compileFullCore book fid val (decNam ++ " + 0")
  emit $ "set(" ++ decNam ++ " + 0, " ++ valT ++ ");"
  return $ "term_new(DEC, 0, " ++ decNam ++ ")"

-- Fast Compiler
-- -------------

-- Compiles a function using Fast-Mode
compileFast :: Book -> Word16 -> Core -> Bool -> [(Bool,String)] -> Compile ()
compileFast book fid core copy args = do
  emit $ "Term " ++ mget (fidToNam book) fid ++ "_f(Term ref) {"
  tabInc
  emit "u64 itrs = 0;"
  args <- forM (zip [0..] args) $ \ (i, (strict, arg)) -> do
    argNam <- fresh "arg"
    if strict then do
      emit $ "Term " ++ argNam ++ " = reduce_at(term_loc(ref) + " ++ show i ++ ");"
    else do
      emit $ "Term " ++ argNam ++ " = got(term_loc(ref) + " ++ show i ++ ");"
    if copy && strict then do
      case MS.lookup fid (fidToLab book) of
        Just labs -> do
          emit $ "if (term_tag(" ++ argNam ++ ") == ERA) {"
          emit $ "  itrs += 1;"
          emit $ "  *HVM.itrs += itrs;"
          emit $ "  return term_new(ERA, 0, 0);"
          emit $ "}"
          emit $ "if (term_tag(" ++ argNam ++ ") == SUP) {"
          tabInc
          emit $ "u64 lab = term_lab(" ++ argNam ++ ");"
          emit $ "if (1"
          forM_ (MS.keys labs) $ \lab -> do
            emit $ "    && lab != " ++ show lab
          emit $ ") {"
          tabInc
          emit $ "return reduce_ref_sup(ref, " ++ show i ++ ");"
          tabDec
          emit $ "}"
          tabDec
          emit $ "}"
        Nothing -> return ()
    else
      return ()
    bind arg argNam
    return argNam
  reuse (length (snd (fst (mget (fidToFun book) fid)))) "term_loc(ref)"
  compileFastArgs book fid core args
  tabDec
  emit "}"

-- Compiles a fast function's argument list
compileFastArgs :: Book -> Word16 -> Core -> [String] -> Compile ()
compileFastArgs book fid body ctx = do
  tco <- gets tco
  if tco then do
    emit $ "_Bool fst_iter = true;"
    emit $ "while (1) {"
    tabInc
    compileFastBody book fid body ctx False 0
    tabDec
    emit $ "}"
  else do
    compileFastBody book fid body ctx False 0

-- Compiles a fast function body (pattern-matching)
compileFastBody :: Book -> Word16 -> Core -> [String] -> Bool -> Int -> Compile ()
compileFastBody book fid term@(Mat kin val mov css) ctx stop@False itr = do
  valT   <- compileFastCore book fid val
  valNam <- fresh "val"
  emit $ "Term " ++ valNam ++ " = reduce(" ++ valT ++ ");"
  let valVar = "scrut%"++valNam
  bind valVar valNam
  let isNumeric = length css > 0 && (let (ctr,fds,bod) = css !! 0 in ctr == "0")

  -- Numeric Pattern-Matching
  if isNumeric then do
    numNam <- fresh "num"
    emit $ "if (term_tag("++valNam++") == W32) {"
    tabInc
    emit $ "u32 " ++ numNam ++ " = term_loc(" ++ valNam ++ ");"
    emit $ "switch (" ++ numNam ++ ") {"
    tabInc
    forM_ (zip [0..] css) $ \ (i, (ctr,fds,bod)) -> do
      if i < length css - 1 then do
        emit $ "case " ++ show i ++ ": {"
        tabInc
        forM_ mov $ \ (key,val) -> do
          valT <- compileFastCore book fid val
          bind key valT
        compileFastBody book fid bod ctx stop (itr + 1 + length mov)
        tabDec
        emit $ "}"
      else do
        emit $ "default: {"
        tabInc
        preNam <- fresh "pre"
        emit $ "Term " ++ preNam ++ " = " ++ "term_new(W32, 0, "++numNam++" - "++show (length css - 1)++");"
        forM_ fds $ \ fd -> do
          bind fd preNam
        forM_ mov $ \ (key,val) -> do
          valT <- compileFastCore book fid val
          bind key valT
        compileFastBody book fid bod ctx stop (itr + 1 + length fds + length mov)
        tabDec
        emit $ "}"
    tabDec
    emit $ "}"
    tabDec
    emit $ "} else if (term_tag(" ++ valNam ++ ") == ERA) {"
    tabInc
    compileFastBody book fid Era ctx stop (itr + 1)
    tabDec
    emit $ "} else {"
    tabInc
    val <- compileFastCore book fid (Mat kin (Var valVar) mov css)
    emit $ "itrs += " ++ show itr ++ ";"
    compileFastSave book fid term ctx itr
    emit $ "return " ++ val ++ ";"
    tabDec
    emit $ "}"

  -- Constructor Pattern-Matching (with IfLet)
  else if (case kin of { (IFL _) -> True ; _ -> False }) then do
    let (Var defNam) = val
    let iflCss       = undoIfLetChain defNam term
    let (_, dflt)    = last iflCss
    let othCss       = init iflCss
    emit $ "if (term_tag(" ++ valNam ++ ") == CTR) {"
    tabInc
    emit $ "switch (term_lab(" ++ valNam ++ ")) {"
    tabInc
    reuse' <- gets reus
    itrA <- foldM (\itr (mov, (ctr, fds, bod)) -> do
      emit $ "case " ++ show (mget (ctrToCid book) ctr) ++ ": {"
      tabInc
      reuse (length fds) ("term_loc(" ++ valNam ++ ")")
      forM_ (zip [0..] fds) $ \(k, fd) -> do
        fdNam <- fresh "fd"
        emit $ "Term " ++ fdNam ++ " = got(term_loc(" ++ valNam ++ ") + " ++ show k ++ ");"
        bind fd fdNam
      forM_ mov $ \(key, val) -> do
        valT <- compileFastCore book fid val
        bind key valT
      compileFastBody book fid bod ctx stop (itr + 1 + length fds + length mov)
      tabDec
      emit $ "}"
      modify $ \st -> st { reus = reuse' }
      return (itr + 1 + 1 + length mov)) itr othCss
    emit $ "default: {"
    tabInc
    let (_, [dflNam], dflBod) = dflt
    fdNam <- fresh "fd"
    emit $ "Term " ++ fdNam ++ " = " ++ valNam ++ ";"
    bind dflNam fdNam
    forM_ mov $ \(key, val) -> do
      valT <- compileFastCore book fid val
      bind key valT
    compileFastBody book fid dflBod ctx stop itrA
    tabDec
    emit $ "}"
    tabDec
    emit $ "}"
    tabDec
    emit $ "} else if (term_tag(" ++ valNam ++ ") == ERA) {"
    tabInc
    compileFastBody book fid Era ctx stop (itr + 1)
    tabDec
    emit $ "} else {"
    tabInc
    val <- compileFastCore book fid (Mat kin (Var valVar) mov css)
    emit $ "itrs += " ++ show itr ++ ";"
    compileFastSave book fid term ctx itr
    emit $ "return " ++ val ++ ";"
    tabDec
    emit $ "}"

  -- Constructor Pattern-Matching (without IfLet)
  else do
    emit $ "if (term_tag(" ++ valNam ++ ") == CTR) {"
    tabInc
    emit $ "switch (term_lab(" ++ valNam ++ ") - " ++ show (case kin of { (IFL c) -> c ; (MAT c) -> c ; _ -> 0 }) ++ ") {"
    tabInc
    reuse' <- gets reus
    forM_ (zip [0..] css) $ \ (i, (ctr,fds,bod)) -> do
      emit $ "case " ++ show i ++ ": {"
      tabInc
      reuse (length fds) ("term_loc(" ++ valNam ++ ")")
      forM_ (zip [0..] fds) $ \ (k,fd) -> do
        fdNam <- fresh "fd"
        emit $ "Term " ++ fdNam ++ " = got(term_loc(" ++ valNam ++ ") + " ++ show k ++ ");"
        bind fd fdNam
      forM_ mov $ \ (key,val) -> do
        valT <- compileFastCore book fid val
        bind key valT
      compileFastBody book fid bod ctx stop (itr + 1 + length fds + length mov)
      tabDec
      emit $ "}"
      modify $ \st -> st { reus = reuse' }
    emit $ "default: { exit(1); }"
    tabDec
    emit $ "}"
    tabDec
    emit $ "} else if (term_tag(" ++ valNam ++ ") == ERA) {"
    tabInc
    compileFastBody book fid Era ctx stop (itr + 1)
    tabDec
    emit $ "} else {"
    tabInc
    val <- compileFastCore book fid (Mat kin (Var valVar) mov css)
    emit $ "itrs += " ++ show itr ++ ";"
    compileFastSave book fid term ctx itr
    emit $ "return " ++ val ++ ";"
    tabDec
    emit $ "}"
  where
    undoIfLetChain :: String -> Core -> [([(String,Core)], (String, [String], Core))]
    undoIfLetChain expNam term@(Mat _ (Var gotNam) mov [(ctr, fds, bod), ("_", [nxtNam], rest)]) =
      if gotNam == expNam
        then (mov, (ctr, fds, bod)) : undoIfLetChain nxtNam rest
        else [([], ("_", [expNam], term))]
    undoIfLetChain expNam term = [([], ("_", [expNam], term))]

compileFastBody book fid term@(Dup lab dp0 dp1 val bod) ctx stop itr = do
  valT <- compileFastCore book fid val
  valNam <- fresh "val"
  dp0Nam <- fresh "dpA"
  dp1Nam <- fresh "dpB"
  emit $ "Term " ++ valNam ++ " = (" ++ valT ++ ");"
  emit $ "Term " ++ dp0Nam ++ ";"
  emit $ "Term " ++ dp1Nam ++ ";"
  emit $ "if (term_is_atom(" ++ valNam ++ ")) {"
  tabInc
  emit $ "itrs += 1;"
  emit $ dp0Nam ++ " = " ++ valNam ++ ";"
  emit $ dp1Nam ++ " = " ++ valNam ++ ";"
  tabDec
  emit $ "} else if (term_tag(" ++ valNam ++ ") == SUP && term_lab(" ++ valNam ++ ") == " ++ show lab ++ ") {"
  tabInc
  emit $ "itrs += 1;"
  emit $ dp0Nam ++ " = got(term_loc(" ++ valNam ++ ") + 0);"
  emit $ dp1Nam ++ " = got(term_loc(" ++ valNam ++ ") + 1);"
  tabDec
  emit $ "} else {"
  tabInc
  dupNam <- fresh "dup"
  compileFastAlloc dupNam 1
  emit $ "set(" ++ dupNam ++ " + 0, " ++ valNam ++ ");"
  emit $ dp0Nam ++ " = term_new(DP0, " ++ show lab ++ ", " ++ dupNam ++ " + 0);"
  emit $ dp1Nam ++ " = term_new(DP1, " ++ show lab ++ ", " ++ dupNam ++ " + 0);"
  tabDec
  emit $ "}"
  bind dp0 dp0Nam
  bind dp1 dp1Nam
  compileFastBody book fid bod ctx stop itr

compileFastBody book fid term@(Let mode var val bod) ctx stop itr = do
  valT <- compileFastCore book fid val
  case mode of
    LAZY -> do
      bind var valT
    STRI -> do
      case val of
        t@(Ref _ rFid _) -> do
          checkRefAri book fid t
          valNam <- fresh "val"
          emit $ "Term " ++ valNam ++ " = reduce(" ++ mget (fidToNam book) rFid ++ "_f(" ++ valT ++ "));"
          bind var valNam
        _ -> do
          valNam <- fresh "val" 
          emit $ "Term " ++ valNam ++ " = reduce(" ++ valT ++ ");"
          bind var valNam
  compileFastBody book fid bod ctx stop itr

compileFastBody book fid term@(Ref fNam fFid fArg) ctx stop itr
  -- Tail-call optimization
  | fFid == fid = do
    checkRefAri book fid term
    forM_ (zip fArg ctx) $ \ (arg, ctxVar) -> do
      argT <- compileFastCore book fid arg
      emit $ "" ++ ctxVar ++ " = " ++ argT ++ ";"
    emit $ "itrs += " ++ show (itr + 1) ++ ";"
    emit $ "fst_iter = false;"
    emit $ "continue;"

  -- Inline Dynamic DUP
  -- The label must be a number at this point (not SUP, ERA, etc).
  | fNam == "DUP" && (case fArg of [_, _, Lam _ (Lam _ _)] -> True ; _ -> False) = do
    let [lab, val, Lam dp0 (Lam dp1 bod)] = fArg
    labNam <- fresh "lab"
    labTm  <- compileFastCore book fid lab
    emit $ "Term " ++ labNam ++ " = reduce(" ++ labTm ++ ");"
    emit $ "if (term_tag(" ++ labNam ++ ") != W32) {"
    emit $ "  printf(\"ERROR:non-numeric-sup-label\\n\");"
    emit $ "}"
    emit $ "itrs += 3;"
    -- Regular dup compilation (need to reimplement here since we don't know the label during compilation)
    valT <- compileFastCore book fid val
    valNam <- fresh "val"
    dp0Nam <- fresh "dpA"
    dp1Nam <- fresh "dpB"
    emit $ "Term " ++ valNam ++ " = (" ++ valT ++ ");"
    emit $ "Term " ++ dp0Nam ++ ";"
    emit $ "Term " ++ dp1Nam ++ ";"
    emit $ "if (term_is_atom(" ++ valNam ++ ")) {"
    tabInc
    emit $ "itrs += 1;"
    emit $ dp0Nam ++ " = " ++ valNam ++ ";"
    emit $ dp1Nam ++ " = " ++ valNam ++ ";"
    tabDec
    emit $ "} else if (term_tag(" ++ valNam ++ ") == SUP && term_lab(" ++ valNam ++ ") == term_loc(" ++ labNam ++ ")) {"
    tabInc
    emit $ "itrs += 1;"
    emit $ dp0Nam ++ " = got(term_loc(" ++ valNam ++ ") + 0);"
    emit $ dp1Nam ++ " = got(term_loc(" ++ valNam ++ ") + 1);"
    tabDec
    emit $ "} else {"
    tabInc
    dupNam <- fresh "dup"
    compileFastAlloc dupNam 1
    emit $ "set(" ++ dupNam ++ " + 0, " ++ valNam ++ ");"
    -- Set to the dynamic label.
    emit $ dp0Nam ++ " = term_new(DP0, term_loc(" ++ labNam ++ "), " ++ dupNam ++ " + 0);"
    emit $ dp1Nam ++ " = term_new(DP1, term_loc(" ++ labNam ++ "), " ++ dupNam ++ " + 0);"
    tabDec
    emit $ "}"
    bind dp0 dp0Nam
    bind dp1 dp1Nam
    compileFastBody book fid bod ctx stop itr

compileFastBody book fid term ctx stop itr = do
  body <- compileFastCore book fid term
  emit $ "itrs += " ++ show itr ++ ";"
  compileFastSave book fid term ctx itr
  emit $ "return " ++ body ++ ";"

-- Completes a fast mode call
compileFastSave :: Book -> Word16 -> Core -> [String] -> Int -> Compile ()
compileFastSave book fid term ctx itr = do
  emit $ "*HVM.itrs += itrs;"

-- Helper function to allocate nodes with reuse
compileFastAlloc :: String -> Int -> Compile ()
compileFastAlloc name 0 = do
  emit $ "Loc " ++ name ++ " = 0;"
compileFastAlloc name arity = do
  reuse <- gets reus
  -- Find the smallest reuse location that's big enough
  -- Very large reuses are usually functions with a lot of state that doesn't need to be moved
  -- Don't fragment those to avoid moving those values (a separate optimization)
  let bigEnough = [(k,locs) | (k,locs) <- MS.toList reuse, k >= arity, k <= arity + 5, not (null locs)]
  case bigEnough of
    [] -> do
      emit $ "Loc " ++ name ++ " = alloc_node(" ++ show arity ++ ");"
    ((k,loc:locs):_) -> do
      emit $ "Loc " ++ name ++ ";"
      -- Too hard to determine statically if reusing is ok in tail-call-optimization
      tco <- gets tco
      if tco then do
        emit $ "if (fst_iter) {"
        emit $ "  " ++ name ++ " = " ++ loc ++ ";"
        emit $ "} else {"
        emit $ "  " ++ name ++ " = alloc_node(" ++ show arity ++ ");"
        emit $ "}"
      else do
        emit $ name ++ " = " ++ loc ++ ";"
      -- Remove the used location
      let reuse' = MS.insert k locs reuse
      -- If we used a location bigger than needed, add the remainder back
      let reuse'' = if k > arity 
                    then MS.insertWith (++) (k - arity) [loc ++ " + " ++ show arity] reuse'
                    else reuse'
      modify $ \st -> st { reus = reuse'' }

-- Compiles a core term in fast mode
compileFastCore :: Book -> Word16 -> Core -> Compile String

compileFastCore book fid Era = 
  return $ "term_new(ERA, 0, 0)"

compileFastCore book fid (Let mode var val bod) = do
  valT <- compileFastCore book fid val
  case mode of
    LAZY -> do
      emit $ "itrs += 1;"
      bind var valT
      compileFastCore book fid bod
    STRI -> do
      letNam <- fresh "let"
      compileFastAlloc letNam 2
      emit $ "set(" ++ letNam ++ " + 0, " ++ valT ++ ");"
      bind var $ "term_new(VAR, 0, " ++ letNam ++ " + 0)"
      bodT <- compileFastCore book fid bod
      emit $ "set(" ++ letNam ++ " + 1, " ++ bodT ++ ");"
      return $ "term_new(LET, " ++ show (fromEnum STRI) ++ ", " ++ letNam ++ ")"

compileFastCore book fid (Var name) = do
  compileFastVar name

compileFastCore book fid (Lam var bod) = do
  lamNam <- fresh "lam"
  compileFastAlloc lamNam 1
  bind var $ "term_new(VAR, 0, " ++ lamNam ++ " + 0)"
  bodT <- compileFastCore book fid bod
  emit $ "set(" ++ lamNam ++ " + 0, " ++ bodT ++ ");"
  return $ "term_new(LAM, 0, " ++ lamNam ++ ")"

compileFastCore book fid (App fun arg) = do
  appNam <- fresh "app"
  compileFastAlloc appNam 2
  funT <- compileFastCore book fid fun
  argT <- compileFastCore book fid arg
  emit $ "set(" ++ appNam ++ " + 0, " ++ funT ++ ");"
  emit $ "set(" ++ appNam ++ " + 1, " ++ argT ++ ");"
  return $ "term_new(APP, 0, " ++ appNam ++ ")"

compileFastCore book fid (Sup lab tm0 tm1) = do
  supNam <- fresh "sup"
  compileFastAlloc supNam 2
  tm0T <- compileFastCore book fid tm0
  tm1T <- compileFastCore book fid tm1
  emit $ "set(" ++ supNam ++ " + 0, " ++ tm0T ++ ");"
  emit $ "set(" ++ supNam ++ " + 1, " ++ tm1T ++ ");"
  return $ "term_new(SUP, " ++ show lab ++ ", " ++ supNam ++ ")"

compileFastCore book fid (Dup lab dp0 dp1 val bod) = do
  dupNam <- fresh "dup"
  dp0Nam <- fresh "dpA"
  dp1Nam <- fresh "dpB"
  valNam <- fresh "val"
  valT   <- compileFastCore book fid val
  emit $ "Term " ++ valNam ++ " = (" ++ valT ++ ");"
  emit $ "Term " ++ dp0Nam ++ ";"
  emit $ "Term " ++ dp1Nam ++ ";"
  emit $ "if (term_is_atom(" ++ valNam ++ ")) {"
  tabInc
  emit $ "itrs += 1;"
  emit $ dp0Nam ++ " = " ++ valNam ++ ";"
  emit $ dp1Nam ++ " = " ++ valNam ++ ";"
  tabDec
  emit $ "} else if (term_tag(" ++ valNam ++ ") == SUP && term_lab(" ++ valNam ++ ") == " ++ show lab ++ ") {"
  tabInc
  emit $ "itrs += 1;"
  emit $ dp0Nam ++ " = got(term_loc(" ++ valNam ++ ") + 0);"
  emit $ dp1Nam ++ " = got(term_loc(" ++ valNam ++ ") + 1);"
  tabDec
  emit $ "} else {"
  tabInc
  compileFastAlloc dupNam 1
  emit $ "set(" ++ dupNam ++ " + 0, " ++ valNam ++ ");"
  emit $ dp0Nam ++ " = term_new(DP0, " ++ show lab ++ ", " ++ dupNam ++ " + 0);"
  emit $ dp1Nam ++ " = term_new(DP1, " ++ show lab ++ ", " ++ dupNam ++ " + 0);"
  tabDec
  emit $ "}"
  bind dp0 dp0Nam
  bind dp1 dp1Nam
  compileFastCore book fid bod

compileFastCore book fid (Ctr nam fds) = do
  ctrNam <- fresh "ctr"
  let ari = length fds
  let cid = mget (ctrToCid book) nam
  compileFastAlloc ctrNam ari
  fdsT <- mapM (\ (i,fd) -> compileFastCore book fid fd) (zip [0..] fds)
  sequence_ [emit $ "set(" ++ ctrNam ++ " + " ++ show i ++ ", " ++ fdT ++ ");" | (i,fdT) <- zip [0..] fdsT]
  return $ "term_new(CTR, " ++ show cid ++ ", " ++ ctrNam ++ ")"

compileFastCore book fid tm@(Mat kin val mov css) = do
  matNam <- fresh "mat"
  compileFastAlloc matNam (1 + length css)
  valT <- compileFastCore book fid val
  emit $ "set(" ++ matNam ++ " + 0, " ++ valT ++ ");"
  forM_ (zip [0..] css) $ \(i,(ctr,fds,bod)) -> do
    let bod' = foldr (\x b -> Lam x b) (foldr (\x b -> Lam x b) bod (map fst mov)) fds
    bodT <- compileFastCore book fid bod'
    emit $ "set(" ++ matNam ++ " + " ++ show (i+1) ++ ", " ++ bodT ++ ");"
  let tag = case kin of { SWI -> "SWI" ; (IFL _) -> "IFL" ; (MAT _) -> "MAT" }
  let lab = case kin of { SWI -> fromIntegral (length css) ; (IFL cid) -> cid ; (MAT cid) -> cid }
  retNam <- fresh "ret"
  emit $ "Term " ++ retNam ++ " = term_new(" ++ tag ++ ", " ++ show lab ++ ", " ++ matNam ++ ");"
  foldM (\acc (_, val) -> do
    appNam <- fresh "app"
    compileFastAlloc appNam 2
    emit $ "set(" ++ appNam ++ " + 0, " ++ acc ++ ");"
    valT <- compileFastCore book fid val
    emit $ "set(" ++ appNam ++ " + 1, " ++ valT ++ ");"
    return $ "term_new(APP, 0, " ++ appNam ++ ")") retNam mov

compileFastCore book fid (U32 val) =
  return $ "term_new(W32, 0, " ++ show (fromIntegral val) ++ ")"

compileFastCore book fid (Chr val) =
  return $ "term_new(CHR, 0, " ++ show (fromEnum val) ++ ")"

compileFastCore book fid (Op2 opr nu0 nu1) = do
  opxNam <- fresh "opx"
  retNam <- fresh "ret"
  nu0Nam <- fresh "nu0"
  nu1Nam <- fresh "nu1"
  nu0T <- compileFastCore book fid nu0
  nu1T <- compileFastCore book fid nu1
  emit $ "Term " ++ nu0Nam ++ " = (" ++ nu0T ++ ");"
  emit $ "Term " ++ nu1Nam ++ " = (" ++ nu1T ++ ");"
  emit $ "Term " ++ retNam ++ ";"
  emit $ "if (term_tag(" ++ nu0Nam ++ ") == W32 && term_tag(" ++ nu1Nam ++ ") == W32) {"
  emit $ "  itrs += 2;"
  let oprStr = case opr of
        OP_ADD -> "+"
        OP_SUB -> "-"
        OP_MUL -> "*"
        OP_DIV -> "/"
        OP_MOD -> "%"
        OP_EQ  -> "=="
        OP_NE  -> "!="
        OP_LT  -> "<"
        OP_GT  -> ">"
        OP_LTE -> "<="
        OP_GTE -> ">="
        OP_AND -> "&"
        OP_OR  -> "|"
        OP_XOR -> "^"
        OP_LSH -> "<<"
        OP_RSH -> ">>"
  emit $ "  " ++ retNam ++ " = term_new(W32, 0, term_loc(" ++ nu0Nam ++ ") " ++ oprStr ++ " term_loc(" ++ nu1Nam ++ "));"
  emit $ "} else {"
  tabInc
  compileFastAlloc opxNam 2
  emit $ "set(" ++ opxNam ++ " + 0, " ++ nu0Nam ++ ");"
  emit $ "set(" ++ opxNam ++ " + 1, " ++ nu1Nam ++ ");"
  emit $ retNam ++ " = term_new(OPX, " ++ show (fromEnum opr) ++ ", " ++ opxNam ++ ");"
  tabDec
  emit $ "}"
  return $ retNam

compileFastCore book fid t@(Ref rNam rFid rArg) = do
  checkRefAri book fid t
  refNam <- fresh "ref"
  let arity = length rArg
  compileFastAlloc refNam arity
  argsT <- mapM (\ (i,arg) -> compileFastCore book fid arg) (zip [0..] rArg)
  sequence_ [emit $ "set(" ++ refNam ++ " + " ++ show i ++ ", " ++ argT ++ ");" | (i,argT) <- zip [0..] argsT]
  return $ "term_new(REF, " ++ show rFid ++ ", " ++ refNam ++ ")"

compileFastCore book fid (Inc val) = do
  incNam <- fresh "inc"
  compileFastAlloc incNam 1
  valT <- compileFastCore book fid val
  emit $ "set(" ++ incNam ++ " + 0, " ++ valT ++ ");"
  return $ "term_new(INC, 0, " ++ incNam ++ ")"

compileFastCore book fid (Dec val) = do
  decNam <- fresh "dec"
  compileFastAlloc decNam 1
  valT <- compileFastCore book fid val
  emit $ "set(" ++ decNam ++ " + 0, " ++ valT ++ ");"
  return $ "term_new(DEC, 0, " ++ decNam ++ ")"

-- Compiles a variable in fast mode
compileFastVar :: String -> Compile String
compileFastVar var = do
  bins <- gets bins
  case MS.lookup var bins of
    Just entry -> do
      return entry
    Nothing -> do
      return $ "<ERR>"

checkRefAri :: Book -> Word16 -> Core -> Compile ()
checkRefAri book orig core = do
  case core of
    Ref nam lab arg -> do
      let fid = fromIntegral lab
      let ari = funArity book fid
      let len = length arg
      when (ari /= fromIntegral len) $ do
        let nam = mget (fidToNam book) orig
        error $ "On function @" ++ nam ++ ": Arity mismatch on term: " ++ show core ++ ". Expected " ++ show ari ++ ", got " ++ show len ++ "."
    _ -> return ()

-- Generates the forward declarations of the compiled C functions
compileHeaders :: Book -> String
compileHeaders book =
  let funcs = MS.toList (fidToNam book)
      decls = map (\(_, name) -> "Term " ++ name ++ "_f(Term);") funcs
  in unlines decls

-- Generates the main function for the compiled C code
genMain :: Book -> String
genMain book = case MS.lookup "main" (namToFid book) of
  Just mainFid ->
    unlines
      [ "int main() {"
      , "  hvm_init();"
      , registerFuncs
      , "  clock_t start = clock();"
      , "  Term root = term_new(REF, "++show mainFid++", 0);"
      , "  normal(root);"
      , "  double time = (double)(clock() - start) / CLOCKS_PER_SEC * 1000;"
      , "  printf(\"WORK: %\"PRIu64\" interactions\\n\", get_itr());"
      , "  printf(\"TIME: %.3fs seconds\\n\", time / 1000.0);"
      , "  printf(\"SIZE: %llu nodes\\n\", get_len());"
      , "  printf(\"PERF: %.3f MIPS\\n\", (get_itr() / 1000000.0) / (time / 1000.0));"
      , "  hvm_free();"
      , "  return 0;"
      , "}"
      ]
  Nothing -> ""
  where 
    registerFuncs = unlines ["  hvm_define(" ++ show fid ++ ", " ++ name ++ "_f);" | (fid, name) <- MS.toList (fidToNam book)]

isTailRecursive :: Core -> Word16 -> Bool
isTailRecursive core fid = case core of
  Ref _ fFid _ | fFid == fid          -> True
  Ref "DUP" _ [_, _, Lam _ (Lam _ f)] -> isTailRecursive f fid
  Dup _ _ _ _ f                       -> isTailRecursive f fid
  Let _ _ _ f                         -> isTailRecursive f fid
  Mat _ _ _ c                         -> any (\(_,_,f) -> isTailRecursive f fid) c
  _                                   -> False


================================================
FILE: src/HVM/Extract.hs
================================================
{-./Type.hs-}
{-./Inject.hs-}

module HVM.Extract where

import Control.Monad (foldM, forM_, forM)
import Control.Monad.State
import Data.Bits (shiftR)
import Data.Char (chr, ord)
import Data.IORef
import Data.Word
import Debug.Trace
import HVM.Foreign
import System.IO.Unsafe (unsafeInterleaveIO)
import HVM.Type
import qualified Data.IntSet as IS
import qualified Data.Map.Strict as MS

extractCoreAt :: IORef IS.IntSet -> ReduceAt -> Book -> Loc -> HVM Core

extractCoreAt dupsRef reduceAt book host = unsafeInterleaveIO $ do
  term <- reduceAt book host
  -- trace ("extract " ++ show host ++ " " ++ termToString term) $
  let tag = termTag term
  case tag of
    t | t == _ERA_ -> do
      return Era

    t | t == _LET_ -> do
      let loc  = termLoc term
      let mode = modeT (termLab term)
      name <- return $ "$" ++ show (loc + 0)
      val  <- extractCoreAt dupsRef reduceAt book (loc + 0)
      bod  <- extractCoreAt dupsRef reduceAt book (loc + 1)
      return $ Let mode name val bod

    t | t == _LAM_ -> do
      let loc = termLoc term
      name <- return $ "$" ++ show (loc + 0)
      bod  <- extractCoreAt dupsRef reduceAt book (loc + 0)
      return $ Lam name bod

    t | t == _APP_ -> do
      let loc = termLoc term
      fun <- extractCoreAt dupsRef reduceAt book (loc + 0)
      arg <- extractCoreAt dupsRef reduceAt book (loc + 1)
      return $ App fun arg

    t | t == _SUP_ -> do
      let loc = termLoc term
      let lab = termLab term
      tm0 <- extractCoreAt dupsRef reduceAt book (loc + 0)
      tm1 <- extractCoreAt dupsRef reduceAt book (loc + 1)
      return $ Sup lab tm0 tm1

    t | t == _VAR_ -> do
      let loc = termLoc term
      sub <- got (loc + 0)
      if termGetBit sub == 0
        then do
          name <- return $ "$" ++ show (loc + 0)
          return $ Var name
        else do
          set (loc + 0) (termRemBit sub)
          extractCoreAt dupsRef reduceAt book (loc + 0)

    t | t == _DP0_ -> do
      let loc = termLoc term
      let lab = termLab term
      dups <- readIORef dupsRef
      if IS.member (fromIntegral loc) dups
      then do
        name <- return $ "$" ++ show (loc + 0) ++ "_0"
        return $ Var name
      else do
        dp0 <- return $ "$" ++ show (loc + 0) ++ "_0"
        dp1 <- return $ "$" ++ show (loc + 0) ++ "_1"
        val <- extractCoreAt dupsRef reduceAt book (loc + 0)
        modifyIORef' dupsRef (IS.insert (fromIntegral loc))
        return $ Dup lab dp0 dp1 val (Var dp0)

    t | t == _DP1_ -> do
      let loc = termLoc term
      let lab = termLab term
      dups <- readIORef dupsRef
      if IS.member (fromIntegral loc) dups
      then do
        name <- return $ "$" ++ show (loc + 0) ++ "_1"
        return $ Var name
      else do
        dp0 <- return $ "$" ++ show (loc + 0) ++ "_0"
        dp1 <- return $ "$" ++ show (loc + 0) ++ "_1"
        val <- extractCoreAt dupsRef reduceAt book (loc + 0)
        modifyIORef' dupsRef (IS.insert (fromIntegral loc))
        return $ Dup lab dp0 dp1 val (Var dp1)

    t | t == _CTR_ -> do
      let loc = termLoc term
      let lab = termLab term
      let cid = fromIntegral lab
      let nam = mget (cidToCtr book) cid
      let ari = mget (cidToAri book) cid
      let ars = if ari == 0 then [] else [0..fromIntegral ari-1]
      fds <- mapM (\i -> extractCoreAt dupsRef reduceAt book (loc + i)) ars
      return $ Ctr nam fds

    t | t == _MAT_ -> do
      let loc = termLoc term
      let lab = termLab term
      let cid = fromIntegral lab
      let len = mget (cidToLen book) cid
      val <- extractCoreAt dupsRef reduceAt book (loc + 0)
      css <- foldM (\css i -> do
        let ctr = mget (cidToCtr book) (cid + i)
        let ari = mget (cidToAri book) (cid + i)
        let fds = if ari == 0 then [] else ["$" ++ show (loc + 1 + j) | j <- [0..fromIntegral ari-1]]
        bod <- extractCoreAt dupsRef reduceAt book (loc + 1 + fromIntegral i)
        return $ (ctr,fds,bod):css) [] [0..len-1]
      return $ Mat (MAT cid) val [] (reverse css)

    t | t == _IFL_ -> do
      let loc = termLoc term
      let lab = termLab term
      let cid = fromIntegral lab
      val <- extractCoreAt dupsRef reduceAt book (loc + 0)
      cs0 <- extractCoreAt dupsRef reduceAt book (loc + 1)
      cs1 <- extractCoreAt dupsRef reduceAt book (loc + 2)
      return $ Mat (IFL cid) val [] [(mget (cidToCtr book) cid, [], cs0), ("_", [], cs1)]

    t | t == _SWI_ -> do
      let loc = termLoc term
      let lab = termLab term
      let len = fromIntegral lab
      val <- extractCoreAt dupsRef reduceAt book (loc + 0)
      css <- foldM (\css i -> do
        bod <- extractCoreAt dupsRef reduceAt book (loc + 1 + i)
        return $ (show i, [], bod):css) [] [0..len-1]
      return $ Mat SWI val [] (reverse css)

    t | t == _W32_ -> do
      let val = termLoc term
      return $ U32 (fromIntegral val)

    t | t == _CHR_ -> do
      let val = termLoc term
      return $ Chr (chr (fromIntegral val))

    t | t == _OPX_ -> do
      let loc = termLoc term
      let opr = toEnum (fromIntegral (termLab term))
      nmx <- extractCoreAt dupsRef reduceAt book (loc + 0)
      nmy <- extractCoreAt dupsRef reduceAt book (loc + 1)
      return $ Op2 opr nmx nmy

    t | t == _OPY_ -> do
      let loc = termLoc term
      let opr = toEnum (fromIntegral (termLab term))
      nmy <- extractCoreAt dupsRef reduceAt book (loc + 0)
      nmx <- extractCoreAt dupsRef reduceAt book (loc + 1)
      return $ Op2 opr nmx nmy

    t | t == _REF_ -> do
      let loc = termLoc term
      let lab = termLab term
      let fid = fromIntegral lab
      let ari = fromIntegral $ funArity book fid
      let aux = if ari == 0 then [] else [0..ari-1]
      arg <- mapM (\i -> extractCoreAt dupsRef reduceAt book (loc + i)) aux
      let name = MS.findWithDefault "?" fid (fidToNam book)
      return $ Ref name fid arg

    t | t == _FWD_ -> do
      return Era

    t | t == _INC_ -> do
      let loc = termLoc term
      val <- extractCoreAt dupsRef reduceAt book (loc + 0)
      return $ Inc val

    t | t == _DEC_ -> do
      let loc = termLoc term
      val <- extractCoreAt dupsRef reduceAt book (loc + 0)
      return $ Dec val

    _ -> do
      return Era

doExtractCoreAt :: ReduceAt -> Book -> Loc -> HVM Core
doExtractCoreAt reduceAt book loc = do
  dupsRef <- newIORef IS.empty
  core    <- extractCoreAt dupsRef reduceAt book loc
  return core
  -- return $ doLiftDups core

-- Lifting Dups
-- ------------

liftDups :: Core -> (Core, Core -> Core)

liftDups (Var nam) =
  (Var nam, id)

liftDups (Ref nam fid arg) =
  let (argT, argD) = liftDupsList arg
  in (Ref nam fid argT, argD)

liftDups Era =
  (Era, id)

liftDups (Lam str bod) =
  let (bodT, bodD) = liftDups bod
  in (Lam str bodT, bodD)

liftDups (App fun arg) =
  let (funT, funD) = liftDups fun
      (argT, argD) = liftDups arg
  in (App funT argT, funD . argD)

liftDups (Sup lab tm0 tm1) =
  let (tm0T, tm0D) = liftDups tm0
      (tm1T, tm1D) = liftDups tm1
  in (Sup lab tm0T tm1T, tm0D . tm1D)

liftDups (Dup lab dp0 dp1 val bod) =
  let (valT, valD) = liftDups val
      (bodT, bodD) = liftDups bod
  in (bodT, \x -> valD (bodD (Dup lab dp0 dp1 valT x)))

liftDups (Ctr nam fds) =
  let (fdsT, fdsD) = liftDupsList fds
  in (Ctr nam fdsT, fdsD)

liftDups (Mat kin val mov css) =
  let (valT, valD) = liftDups val
      (movT, movD) = liftDupsMov mov
      (cssT, cssD) = liftDupsCss css
  in (Mat kin valT movT cssT, valD . movD . cssD)

liftDups (U32 val) =
  (U32 val, id)

liftDups (Chr val) =
  (Chr val, id)

liftDups (Op2 opr nm0 nm1) =
  let (nm0T, nm0D) = liftDups nm0
      (nm1T, nm1D) = liftDups nm1
  in (Op2 opr nm0T nm1T, nm0D . nm1D)

liftDups (Let mod nam val bod) =
  let (valT, valD) = liftDups val
      (bodT, bodD) = liftDups bod
  in (Let mod nam valT bodT, valD . bodD)

liftDups (Inc val) =
  let (valT, valD) = liftDups val
  in (Inc valT, valD)

liftDups (Dec val) =
  let (valT, valD) = liftDups val
  in (Dec valT, valD)

liftDupsList :: [Core] -> ([Core], Core -> Core)

liftDupsList [] = 
  ([], id)

liftDupsList (x:xs) =
  let (xT, xD)   = liftDups x
      (xsT, xsD) = liftDupsList xs
  in (xT:xsT, xD . xsD)

liftDupsMov :: [(String, Core)] -> ([(String, Core)], Core -> Core)

liftDupsMov [] = 
  ([], id)

liftDupsMov ((k,v):xs) =
  let (vT, vD)   = liftDups v
      (xsT, xsD) = liftDupsMov xs
  in ((k,vT):xsT, vD . xsD)

liftDupsCss :: [(String, [String], Core)] -> ([(String, [String], Core)], Core -> Core)

liftDupsCss [] = 
  ([], id)

liftDupsCss ((c,fs,b):xs) =
  let (bT, bD)   = liftDups b
      (xsT, xsD) = liftDupsCss xs
  in ((c,fs,bT):xsT, bD . xsD)

doLiftDups :: Core -> Core
doLiftDups term =
  let (termExpr, termDups) = liftDups term in
  let termBody = termDups (Var "") in
  -- hack to print expr before dups
  Let LAZY "" termExpr termBody


================================================
FILE: src/HVM/Foreign.hs
================================================
{-./Runtime.c-}

module HVM.Foreign where

import Data.Word
import Foreign.Ptr
import HVM.Type

foreign import ccall "set_len" 
  setLen :: Word64 -> IO ()

foreign import ccall "set_itr"
  setItr :: Word64 -> IO ()

foreign import ccall unsafe "Runtime.c hvm_init"
  hvmInit :: IO ()

foreign import ccall unsafe "Runtime.c hvm_free"
  hvmFree :: IO ()

foreign import ccall unsafe "Runtime.c alloc_node"
  allocNode :: Loc -> IO Loc

foreign import ccall unsafe "Runtime.c set"
  set :: Loc -> Term -> IO ()

foreign import ccall unsafe "Runtime.c got"
  got :: Loc -> IO Term

foreign import ccall unsafe "Runtime.c take"
  take :: Loc -> IO Term

foreign import ccall unsafe "Runtime.c swap"
  swap :: Loc -> Term -> IO Term

foreign import ccall unsafe "Runtime.c term_new"
  termNew :: Tag -> Lab -> Loc -> Term

foreign import ccall unsafe "Runtime.c term_tag"
  termTag :: Term -> Tag

foreign import ccall unsafe "Runtime.c term_get_bit"
  termGetBit :: Term -> Word8

foreign import ccall unsafe "Runtime.c term_lab"
  termLab :: Term -> Lab

foreign import ccall unsafe "Runtime.c term_loc"
  termLoc :: Term -> Loc

foreign import ccall unsafe "Runtime.c term_set_bit"
  termSetBit :: Term -> Term

foreign import ccall unsafe "Runtime.c term_rem_bit"
  termRemBit :: Term -> Term

foreign import ccall unsafe "Runtime.c get_len"
  getLen :: IO Word64

foreign import ccall unsafe "Runtime.c get_itr"
  getItr :: IO Word64

foreign import ccall unsafe "Runtime.c inc_itr"
  incItr :: IO ()

foreign import ccall unsafe "Runtime.c fresh"
  fresh :: IO Word64

foreign import ccall unsafe "Runtime.c reduce"
  reduceC :: Term -> IO Term

foreign import ccall unsafe "Runtime.c reduce_at"
  reduceAtC :: Loc -> IO Term

foreign import ccall unsafe "Runtime.c reduce_let"
  reduceLet :: Term -> Term -> IO Term

foreign import ccall unsafe "Runtime.c reduce_app_era"
  reduceAppEra :: Term -> Term -> IO Term

foreign import ccall unsafe "Runtime.c reduce_app_lam"
  reduceAppLam :: Term -> Term -> IO Term

foreign import ccall unsafe "Runtime.c reduce_app_sup"
  reduceAppSup :: Term -> Term -> IO Term

foreign import ccall unsafe "Runtime.c reduce_app_ctr"
  reduceAppCtr :: Term -> Term -> IO Term

foreign import ccall unsafe "Runtime.c reduce_app_w32"
  reduceAppW32 :: Term -> Term -> IO Term

foreign import ccall unsafe "Runtime.c reduce_app_inc"
  reduceAppInc :: Term -> Term -> IO Term

foreign import ccall unsafe "Runtime.c reduce_app_dec"
  reduceAppDec :: Term -> Term -> IO Term

foreign import ccall unsafe "Runtime.c reduce_dup_era"
  reduceDupEra :: Term -> Term -> IO Term

foreign import ccall unsafe "Runtime.c reduce_dup_lam"
  reduceDupLam :: Term -> Term -> IO Term

foreign import ccall unsafe "Runtime.c reduce_dup_sup"
  reduceDupSup :: Term -> Term -> IO Term

foreign import ccall unsafe "Runtime.c reduce_dup_ctr"
  reduceDupCtr :: Term -> Term -> IO Term

foreign import ccall unsafe "Runtime.c reduce_dup_w32"
  reduceDupW32 :: Term -> Term -> IO Term

foreign import ccall unsafe "Runtime.c reduce_dup_ref"
  reduceDupRef :: Term -> Term -> IO Term

foreign import ccall unsafe "Runtime.c reduce_dup_inc"
  reduceDupInc :: Term -> Term -> IO Term

foreign import ccall unsafe "Runtime.c reduce_dup_dec"
  reduceDupDec :: Term -> Term -> IO Term

foreign import ccall unsafe "Runtime.c reduce_mat_era"
  reduceMatEra :: Term -> Term -> IO Term

foreign import ccall unsafe "Runtime.c reduce_mat_lam"
  reduceMatLam :: Term -> Term -> IO Term

foreign import ccall unsafe "Runtime.c reduce_mat_sup"
  reduceMatSup :: Term -> Term -> IO Term

foreign import ccall unsafe "Runtime.c reduce_mat_ctr"
  reduceMatCtr :: Term -> Term -> IO Term

foreign import ccall unsafe "Runtime.c reduce_mat_w32"
  reduceMatW32 :: Term -> Term -> IO Term

foreign import ccall unsafe "Runtime.c reduce_mat_inc"
  reduceMatInc :: Term -> Term -> IO Term

foreign import ccall unsafe "Runtime.c reduce_mat_dec"
  reduceMatDec :: Term -> Term -> IO Term

foreign import ccall unsafe "Runtime.c reduce_opx_era"
  reduceOpxEra :: Term -> Term -> IO Term

foreign import ccall unsafe "Runtime.c reduce_opx_lam"
  reduceOpxLam :: Term -> Term -> IO Term

foreign import ccall unsafe "Runtime.c reduce_opx_sup"
  reduceOpxSup :: Term -> Term -> IO Term

foreign import ccall unsafe "Runtime.c reduce_opx_ctr"
  reduceOpxCtr :: Term -> Term -> IO Term

foreign import ccall unsafe "Runtime.c reduce_opx_w32"
  reduceOpxW32 :: Term -> Term -> IO Term

foreign import ccall unsafe "Runtime.c reduce_opx_inc"
  reduceOpxInc :: Term -> Term -> IO Term

foreign import ccall unsafe "Runtime.c reduce_opx_dec"
  reduceOpxDec :: Term -> Term -> IO Term

foreign import ccall unsafe "Runtime.c reduce_opy_era"
  reduceOpyEra :: Term -> Term -> IO Term

foreign import ccall unsafe "Runtime.c reduce_opy_lam"
  reduceOpyLam :: Term -> Term -> IO Term

foreign import ccall unsafe "Runtime.c reduce_opy_sup"
  reduceOpySup :: Term -> Term -> IO Term

foreign import ccall unsafe "Runtime.c reduce_opy_ctr"
  reduceOpyCtr :: Term -> Term -> IO Term

foreign import ccall unsafe "Runtime.c reduce_opy_w32"
  reduceOpyW32 :: Term -> Term -> IO Term

foreign import ccall unsafe "Runtime.c reduce_opy_inc"
  reduceOpyInc :: Term -> Term -> IO Term

foreign import ccall unsafe "Runtime.c reduce_opy_dec"
  reduceOpyDec :: Term -> Term -> IO Term

foreign import ccall unsafe "Runtime.c reduce_ref_sup"
  reduceRefSup :: Term -> Word16 -> IO Term

foreign import ccall unsafe "Runtime.c hvm_define"
  hvmDefine :: Word16 -> FunPtr (IO Term) -> IO ()

foreign import ccall unsafe "Runtime.c hvm_get_state"
  hvmGetState :: IO (Ptr ())

foreign import ccall unsafe "Runtime.c hvm_set_state"
  hvmSetState :: Ptr () -> IO ()

foreign import ccall unsafe "Runtime.c hvm_set_cari"
  hvmSetCari :: Word16 -> Word16 -> IO ()

foreign import ccall unsafe "Runtime.c hvm_set_clen"
  hvmSetClen :: Word16 -> Word16 -> IO ()

foreign import ccall unsafe "Runtime.c hvm_set_cadt"
  hvmSetCadt :: Word16 -> Word16 -> IO ()

foreign import ccall unsafe "Runtime.c hvm_set_fari"
  hvmSetFari :: Word16 -> Word16 -> IO ()

showTerm :: Term -> String
showTerm term =
  let tag = showTag (termTag term)
      lab = showLab (termLab term)
      loc = showLoc (termLoc term)
  in "term_new(" ++ tag ++ ",0x" ++ lab ++ ",0x" ++ loc ++ ")"


================================================
FILE: src/HVM/Inject.hs
================================================
{-./Type.hs-}

module HVM.Inject where

import Control.Monad (foldM, when, forM_)
import Control.Monad.State
import Data.Bits (shiftL, (.|.))
import Data.Char (ord)
import Data.List (foldr, take)
import Data.Word
import Debug.Trace
import HVM.Foreign
import HVM.Type
import qualified Data.Map.Strict as MS

type InjectM a = StateT InjectState HVM a

data InjectState = InjectState
  { args :: MS.Map String Term -- maps var names to binder locations
  , vars :: [(String, Loc)]    -- list of (var name, usage location) pairs
  }

emptyState :: InjectState
emptyState = InjectState MS.empty []

injectCore :: Book -> Core -> Loc -> InjectM ()

injectCore _ Era loc = do
  lift $ set loc (termNew _ERA_ 0 0)

injectCore _ (Var nam) loc = do
  argsMap <- gets args
  case MS.lookup nam argsMap of
    Just term -> do
      lift $ set loc term
      modify $ \s -> s { args = MS.delete nam (args s) }
    Nothing -> do
      modify $ \s -> s { vars = (nam, loc) : vars s }

injectCore book (Let mod nam val bod) loc = do
  let_node <- lift $ allocNode 2
  modify $ \s -> s { args = MS.insert nam (termNew _VAR_ 0 (let_node + 0)) (args s) }
  injectCore book val (let_node + 0)
  injectCore book bod (let_node + 1)
  lift $ set loc (termNew _LET_ (fromIntegral $ fromEnum mod) let_node)

injectCore book (Lam vr0 bod) loc = do
  lam <- lift $ allocNode 1
  modify $ \s -> s { args = MS.insert vr0 (termNew _VAR_ 0 (lam + 0)) (args s) }
  injectCore book bod (lam + 0)
  lift $ set loc (termNew _LAM_ 0 lam)

injectCore book (App fun arg) loc = do
  app <- lift $ allocNode 2
  injectCore book fun (app + 0)
  injectCore book arg (app + 1)
  lift $ set loc (termNew _APP_ 0 app)

injectCore book (Sup lab tm0 tm1) loc = do
  sup <- lift $ allocNode 2
  injectCore book tm0 (sup + 0)
  injectCore book tm1 (sup + 1)
  lift $ set loc (termNew _SUP_ lab sup)

injectCore book (Dup lab dp0 dp1 val bod) loc = do
  dup <- lift $ allocNode 1
  modify $ \s -> s 
    { args = MS.insert dp0 (termNew _DP0_ lab dup) 
           $ MS.insert dp1 (termNew _DP1_ lab dup) (args s) 
    }
  injectCore book val (dup + 0)
  injectCore book bod loc

injectCore book (Ref nam fid arg) loc = do
  let ari = funArity book fid
  let lab = fromIntegral fid
  ref <- lift $ allocNode (fromIntegral ari)
  sequence_ [injectCore book x (ref + i) | (i,x) <- zip [0..] arg]
  lift $ set loc (termNew _REF_ lab ref)

injectCore book (Ctr nam fds) loc = do
  let cid = mget (ctrToCid book) nam
  let ari = mget (cidToAri book) cid
  let lab = fromIntegral cid
  ctr <- lift $ allocNode (fromIntegral ari)
  sequence_ [injectCore book fd (ctr + ix) | (ix,fd) <- zip [0..] fds]
  lift $ set loc (termNew _CTR_ lab ctr)

injectCore book tm@(Mat kin val mov css) loc = do
  mat <- lift $ allocNode (1 + fromIntegral (length css))
  injectCore book val (mat + 0)
  forM_ (zip [0..] css) $ \ (idx, (ctr, fds, bod)) -> do
    injectCore book (foldr (\x b -> Lam x b) (foldr (\x b -> Lam x b) bod (map fst mov)) fds) (mat + 1 + fromIntegral idx)
  let tag = case kin of { SWI -> _SWI_ ; (MAT _) -> _MAT_ ; (IFL _) -> _IFL_ }
  let lab = case kin of { SWI -> fromIntegral $ length css ; (MAT cid) -> fromIntegral cid ; (IFL cid) -> fromIntegral cid }
  trm <- return $ termNew tag lab mat
  ret <- foldM (\mat (_, val) -> do
      app <- lift $ allocNode 2
      lift $ set (app + 0) mat
      injectCore book val (app + 1)
      return $ termNew _APP_ 0 app)
    trm
    mov
  lift $ set loc ret

injectCore book (U32 val) loc = do
  lift $ set loc (termNew _W32_ 0 (fromIntegral val))

injectCore book (Chr val) loc = do
  lift $ set loc (termNew _CHR_ 0 (fromIntegral $ ord val))

injectCore book (Op2 opr nm0 nm1) loc = do
  opx <- lift $ allocNode 2
  injectCore book nm0 (opx + 0)
  injectCore book nm1 (opx + 1)
  lift $ set loc (termNew _OPX_ (fromIntegral $ fromEnum opr) opx)

injectCore book (Inc val) loc = do
  inc <- lift $ allocNode 1
  injectCore book val (inc + 0)
  lift $ set loc (termNew _INC_ 0 inc)

injectCore book (Dec val) loc = do
  dec <- lift $ allocNode 1
  injectCore book val (dec + 0)
  lift $ set loc (termNew _DEC_ 0 dec)

doInjectCoreAt :: Book -> Core -> Loc -> [(String,Term)] -> HVM Term
doInjectCoreAt book core host argList = do
  (_, state) <- runStateT (injectCore book core host) (emptyState { args = MS.fromList argList })
  foldM (\m (name, loc) -> do
    case MS.lookup name (args state) of
      Just term -> do
        set loc term
        return $ MS.delete name m
      Nothing -> do
        error $ "Unbound variable: \n\x1b[2m" ++ name ++ "\n\x1b[0mIn term:\n\x1b[2m" ++ Data.List.take 1024 (showCore core) ++ "...\x1b[0m")
    (args state)
    (vars state)
  got host


================================================
FILE: src/HVM/Parse.hs
================================================
{-./Type.hs-}

module HVM.Parse where

import Control.Monad (foldM, forM, forM_, when)
import Control.Monad.State
import Data.IORef
import Data.List
import Data.Word
import Debug.Trace
import Highlight (highlightError)
import System.Console.ANSI
import Text.Parsec hiding (State)
import Text.Parsec.Error
import HVM.Adjust (adjustBook, adjust)
import HVM.Type
import qualified Data.Map.Strict as MS

-- Core Parsers
-- ------------

data ParserState = ParserState
  { pCidToAri  :: MS.Map Word16 Word16
  , pCidToLen  :: MS.Map Word16 Word16
  , pCtrToCid  :: MS.Map String Word16
  , pCidToCtr  :: MS.Map Word16 String
  , pCidToADT  :: MS.Map Word16 Word16
  , imported   :: MS.Map String ()
  , varUsages  :: MS.Map String Int
  , globalVars :: MS.Map String ()
  , pFreshLab  :: Lab
  }

type ParserM = ParsecT String ParserState IO

-- Core Term
parseCore :: ParserM Core
parseCore = do
  skip
  head <- lookAhead anyChar
  case head of
    '*'  -> parseEra
    'λ'  -> parseLam
    '('  -> parseExp
    '@'  -> parseRef
    '&'  -> parseSup
    '!'  -> parseLet
    '#'  -> parseCtr
    '~'  -> parseMat
    '↑'  -> parseInc
    '↓'  -> parseDec
    '['  -> parseLst
    '\'' -> parseChr
    '"'  -> parseStr '"'
    '`'  -> parseStr '`'
    _    -> parseLit

-- Era: `*`
parseEra :: ParserM Core
parseEra = do
  consume "*"
  return Era

-- Lam: `λx.F` or `λ&x.F` for non-linear variables
parseLam :: ParserM Core
parseLam = do
  consume "λ"
  var <- parseName1
  swallow "."
  bod <- bindVars [var] parseCore
  return $ Lam var bod

-- FshSup: `& {a,b}` -- uses a fresh label
-- StaSup: `&0{a,b}` -- uses a static label
-- DynSup: `&L{a,b}` -- uses a dynamic label
-- DynLab: `&L`      -- a dynamic label variable
parseSup :: ParserM Core
parseSup = do
  consume "&"
  name <- parseName
  next <- optionMaybe $ try $ lookAhead anyChar
  case next of
    Just '{' -> do
      consume "{"
      tm0 <- parseCore
      swallow ","
      tm1 <- parseCore
      consume "}"
      if null name then do
        num <- genFreshLabel
        return $ Sup num tm0 tm1
      else case reads name of
        [(num :: Lab, "")] -> do
          return $ Sup num tm0 tm1
        otherwise -> do
          useVar name
          return $ Ref "SUP" (fromIntegral _SUP_F_) [Var name, tm0, tm1]
    _ -> do
      useVar ("&" ++ name)
      return $ Var ("&" ++ name)

-- Exp: `(<op> A B)`
parseExp :: ParserM Core
parseExp = do
  next <- lookAhead (anyChar >> anyChar)
  case next of
    '+' -> parseOper OP_ADD
    '-' -> parseOper OP_SUB
    '*' -> parseOper OP_MUL
    '/' -> parseOper OP_DIV
    '%' -> parseOper OP_MOD
    '=' -> parseOper OP_EQ
    '!' -> parseOper OP_NE
    '&' -> parseOper OP_AND
    '|' -> parseOper OP_OR
    '^' -> parseOper OP_XOR
    '<' -> do
      next <- lookAhead (anyChar >> anyChar >> anyChar)
      case next of
        '<' -> parseOper OP_LSH
        '=' -> parseOper OP_LTE
        _   -> parseOper OP_LT
    '>' -> do
      next <- lookAhead (anyChar >> anyChar >> anyChar)
      case next of
        '>' -> parseOper OP_RSH
        '=' -> parseOper OP_GTE
        _   -> parseOper OP_GT
    _ -> do
      consume "("
      fun <- parseCore
      args <- many $ do
        closeWith ")"
        parseCore
      skip
      char ')'
      return $ foldl (\f a -> App f a) fun args

-- Oper: `(+ a b)`
parseOper :: Oper -> ParserM Core
parseOper op = do
  consume "("
  consume (show op)
  nm0 <- parseCore
  nm1 <- parseCore
  consume ")"
  return $ Op2 op nm0 nm1

-- Ref: `@Fun(x0 x1 ...)`
parseRef :: ParserM Core
parseRef = do
  consume "@"
  name <- parseName1
  args <- option [] $ do
    try $ string "("
    args <- many $ do
      closeWith ")"
      swallow ","
      parseCore
    consume ")"
    return args
  return $ Ref name 0 args

-- Ctr: `#Ctr{x0 x1 ...}`
parseCtr :: ParserM Core
parseCtr = do
  consume "#"
  nam <- parseName1
  fds <- option [] $ do
    try $ consume "{"
    fds <- many $ do
      closeWith "}"
      parseCore
    consume "}"
    return fds
  return $ Ctr ('#':nam) fds

-- Mat: `~ x !m0=v0 !m1=v1 ... { #Ctr{x0 x1 ...}:... ... }`
parseMat :: ParserM Core
parseMat = do
  consume "~"
  val <- parseCore
  mov <- many parseMove
  consume "{"
  cs0 <- parseCase False (map fst mov)
  css <- many $ parseCase (let (n,_,_)=cs0 in n=="0") (map fst mov)
  consume "}"
  buildMatchExpr val mov (cs0:css)

-- Mov: `!m0 = v0` (used inside Mat)
parseMove :: ParserM (String, Core)
parseMove = do
  try $ skip >> consume "!"
  name <- parseName1
  expr <- optionMaybe $ try $ consume "=" >> parseCore
  case expr of
    Just e  -> return (name, e)
    Nothing -> do
      -- !x is shorthand for !x=x
      -- !&x is shorthand for !&x=x
      useVar (stripName name)
      return (name, Var (stripName name))

-- Case: CtrCase | NumCase | DefCase
parseCase :: Bool -> [String] -> ParserM (String, [String], Core)
parseCase isNumMat mov = do
  closeWith "}" >> skip
  c <- lookAhead anyChar
  if c == '#'
    then parseCtrCase mov -- Constructor case
    else if c >= '0' && c <= '9'
      then parseNumCase mov -- Numeric case
      else (parseDefCase isNumMat mov) -- Default case

-- CtrCase: `#Ctr{x0 x1 ...}: f`
parseCtrCase :: [String] -> ParserM (String, [String], Core)
parseCtrCase mov = do
  consume "#"
  name <- parseName1
  skip
  vars <- option [] $ do
    consume "{"
    vars <- many $ do
      closeWith "}"
      parseName1
    consume "}"
    return vars
  consume ":"
  body <- bindVars (mov ++ vars) parseCore
  swallow ";"
  return ('#':name, vars, body)

-- NumCase: LitCase | PreCase
parseNumCase :: [String] -> ParserM (String, [String], Core)
parseNumCase mov = try (parseLitCase mov) <|> try (parsePreCase mov)

-- LitCase: `123: f`
parseLitCase :: [String] -> ParserM (String, [String], Core)
parseLitCase mov = do
  digits <- many1 digit
  consume ":"
  body <- bindVars mov parseCore
  swallow ";"
  return (digits, [], body)

-- PreCase: `123+p: f`
parsePreCase :: [String] -> ParserM (String, [String], Core)
parsePreCase mov = do
  pred <- many1 digit
  consume "+"
  name <- parseName1
  consume ":"
  body <- bindVars (mov ++ [name]) parseCore
  swallow ";"
  return ("_", [name], body)

-- DefCase: `x: f`
parseDefCase :: Bool -> [String] -> ParserM (String, [String], Core)
parseDefCase isNumMat mov = do
  name <- parseName1
  consume ":"
  body <- bindVars (mov ++ [name]) parseCore
  swallow ";"
  if isNumMat && name /= "_" then do
    fail $ concat
      [ "To avoid ambiguity, the switch syntax changed.\n"
      , "- Old Syntax: ~ n { 0:zero_case x:pred_case }\n"
      , "- New Syntax: ~ n { 0:zero_case 1+x:pred_case }\n"
      , "- Please, update your code."
      ]
  else do
    return ("_", [name], body)

-- Inc: `↑x`
parseInc :: ParserM Core
parseInc = do
  consume "↑"
  term <- parseCore
  return $ Inc term

-- Dec: `↓x`
parseDec :: ParserM Core
parseDec = do
  consume "↓"
  term <- parseCore
  return $ Dec term

-- Let: Dup | StriLet | LazyLet
parseLet :: ParserM Core
parseLet = do
  consume "!"
  skip
  next <- lookAhead anyChar
  case next of
    '&' -> try parseDup <|> try parseLazyLet
    '!' -> parseStriLet
    _   -> parseLazyLet

-- Fresh Dup   : `! & {a b}=v f`
-- Static Dup  : `! &0{a b}=v f`
-- Dynamic Dup : `! &L{a b}=v f`
parseDup :: ParserM Core
parseDup = do
  consume "&"
  nam <- parseName
  consume "{"
  dp0 <- parseName1
  dp1 <- parseName1
  consume "}"
  consume "="
  val <- parseCore
  swallow ";"
  bod <- bindVars [dp0, dp1] parseCore
  if null nam then do
    num <- genFreshLabel
    return $ Dup num dp0 dp1 val bod
  else case reads nam of
    [(num :: Lab, "")] -> do
      return $ Dup num dp0 dp1 val bod
    otherwise -> do
      useVar nam
      return $ Ref "DUP" (fromIntegral _DUP_F_) [Var nam, val, Lam dp0 (Lam dp1 bod)]

-- StriLet: `! !x=v f`
parseStriLet :: ParserM Core
parseStriLet = do
  consume "!"
  nam <- option "_" $ try $ do
    nam <- parseName1
    consume "="
    return nam
  val <- parseCore
  swallow ";"
  bod <- bindVars [nam] parseCore
  return $ Let STRI nam val bod

-- LazyLet: `! x=v f`
parseLazyLet :: ParserM Core
parseLazyLet = do
  nam <- parseName1
  consume "="
  val <- parseCore
  swallow ";"
  bod <- bindVars [nam] parseCore
  return $ Let LAZY nam val bod

-- Lit: Var | U32
parseLit :: ParserM Core
parseLit = do
  name <- parseName1
  case name of
    "log" -> parseLogExpr
    _ -> case reads (filter (/= '_') name) of
      [(num, "")] -> do
        return $ U32 (fromIntegral (num :: Integer))
      _           -> do
        useVar name
        return $ Var name

-- Log: `log x f` -> `!! @LOG(x) f`
parseLogExpr :: ParserM Core
parseLogExpr = do
  skip
  expr <- parseCore
  skip
  cont <- parseCore
  let logCall = Ref "LOG" (fromIntegral _LOG_F_) [expr]
  return $ Let STRI "_" logCall cont

-- Chr: 'x'
parseChr :: ParserM Core
parseChr = do
  skip
  char '\''
  c <- escaped
  char '\''
  return $ Chr c

-- -- Str: "abc"
parseStr :: Char -> ParserM Core
parseStr delim = do
  skip
  char delim
  str <- many escaped
  char delim
  return $ foldr (\c acc -> Ctr "#Cons" [Chr c, acc]) (Ctr "#Nil" []) str

-- Lst: `[x0 x1 ...]`
parseLst :: ParserM Core
parseLst = do
  skip
  char '['
  elems <- many $ do
    closeWith "]"
    swallow ","
    parseCore
  skip
  char ']'
  return $ foldr (\x acc -> Ctr "#Cons" [x, acc]) (Ctr "#Nil" []) elems

-- Def: `@foo(x0 x1...) = f`
parseDef :: ParserM (String, ((Bool, [(Bool, String)]), Core))
parseDef = do
  -- Reset global binds
  modifyState $ \st -> st { globalVars = MS.empty, varUsages = MS.empty }
  copy <- option False $ do
    string "!"
    skip
    return True
  string "@"
  name <- parseName1
  args <- option [] $ do
    try $ string "("
    args <- many $ do
      closeWith ")"
      swallow ","
      strict <- option False $ do
        try $ do
          consume "!"
          return True
      arg <- parseName1
      return (strict, arg)
    consume ")"
    return args
  skip
  consume "="
  core <- bindVars (map snd args) parseCore
  return (name, ((copy,args), core))

-- ADT: `data Foo { #Ctr{x0 x1 ...} ... }`
parseADT :: ParserM ()
parseADT = do
  string "data"
  name <- parseName1
  skip
  consume "{"
  constructors <- many parseADTCtr
  consume "}"
  registerADT name constructors

-- ADT-Ctr: `#Ctr{x0 x1 ...}`
parseADTCtr :: ParserM (String, [String])
parseADTCtr = do
  skip
  consume "#"
  name <- parseName1
  st <- getState
  when (MS.member ('#':name) (pCtrToCid st)) $ do
    fail $ "Constructor '" ++ name ++ "' redefined"
  fields <- option [] $ do
    try $ consume "{"
    fds <- many $ do
      closeWith "}"
      parseName1
    skip
    consume "}"
    return fds
  skip
  return ('#':name, fields)

-- Book: [ADT]
parseBook :: ParserM [(String, ((Bool, [(Bool,String)]), Core))]
parseBook = do
  skip
  defs <- many $ do
    def <- choice [parseTopImp, parseTopADT, parseTopDef]
    skip
    return def
  try $ skip >> eof
  return $ concat defs

-- TopADT: ADT
parseTopADT :: ParserM [(String, ((Bool, [(Bool,String)]), Core))]
parseTopADT = do
  parseADT
  return []

-- TopDef: Def
parseTopDef :: ParserM [(String, ((Bool, [(Bool,String)]), Core))]
parseTopDef = do
  def <- parseDef
  return [def]

-- TopImp: 'import Foo/bar.hvm'
parseTopImp :: ParserM [(String, ((Bool, [(Bool,String)]), Core))]
parseTopImp = do
  string "import"
  space
  path <- many1 (noneOf "\n\r")
  st <- getState
  if MS.member path (imported st)
    then return [] -- skip if already imported
    else importFile path
  where
  importFile :: String -> ParserM [(String, ((Bool, [(Bool,String)]), Core))]
  importFile path = do
    modifyState (\s -> s { imported = MS.insert path () (imported s) })
    contents <- liftIO $ readFile path
    st       <- getState
    result   <- liftIO $ runParserT parseBookWithState st path contents
    case result of
      Left err -> do
        liftIO $ showParseError path contents err
        fail $ "Error importing file " ++ show path ++ ": parse failed"
      Right (importedDefs, importedState) -> do
        putState importedState
        skip
        return importedDefs
  parseBookWithState :: ParserM ([(String, ((Bool, [(Bool,String)]), Core))], ParserState)
  parseBookWithState = do
    defs <- parseBook
    state <- getState
    return (defs, state)

-- Utils
-- -----

parseName :: ParserM String
parseName = skip >> many (alphaNum <|> char '_' <|> char '$' <|> char '&')

parseName1 :: ParserM String
parseName1 = skip >> many1 (alphaNum <|> char '_' <|> char '$' <|> char '&')

consume :: String -> ParserM String
consume str = skip >> string str

swallow :: String -> ParserM ()
swallow str = do
  skip
  _ <- optionMaybe $ try (string str)
  return ()

closeWith :: String -> ParserM ()
closeWith str = try $ do
  skip
  notFollowedBy (string str)

skip :: ParserM ()
skip = skipMany (parseSpace <|> parseComment) where
  parseSpace = (try $ do
    space
    return ()) <?> "space"
  parseComment = (try $ do
    string "//"
    skipMany (noneOf "\n")
    (char '\n' >> return ()) <|> eof
    return ()) <?> "Comment"

escaped :: ParserM Char
escaped
  =   parseEscapeSequence
  <|> parseUnicodeEscape
  <|> parseRegularChar
  where
  parseEscapeSequence = try $ do
    char '\\'
    c <- oneOf "\\\"nrtbf0/\'"
    return $ case c of
      '\\' -> '\\'
      '/'  -> '/'
      '"'  -> '"'
      '\'' -> '\''
      'n'  -> '\n'
      'r'  -> '\r'
      't'  -> '\t'
      'b'  -> '\b'
      'f'  -> '\f'
      '0'  -> '\0'
  parseUnicodeEscape = try $ do
    string "\\u"
    code <- count 4 hexDigit
    return $ toEnum (read ("0x" ++ code) :: Int)
  parseRegularChar = noneOf "\"\\"

-- External API
-- ------------

-- Parse Book and Core
doParseBook :: String -> String -> IO Book
doParseBook filePath code = do
  result <- runParserT p (ParserState MS.empty MS.empty MS.empty MS.empty MS.empty MS.empty MS.empty MS.empty 0) "" code
  case result of
    Right (defs, st) -> do
      return $ createBook defs (pCtrToCid st) (pCidToCtr st) (pCidToAri st) (pCidToLen st) (pCidToADT st) (pFreshLab st)
    Left err -> do
      showParseError filePath code err
      return $ Book MS.empty MS.empty MS.empty MS.empty MS.empty MS.empty MS.empty MS.empty MS.empty 0
  where
    p = do
      defs <- parseBook
      st <- getState
      return (defs, st)

doParseCore :: String -> IO Core
doParseCore code = do
  result <- runParserT parseCore (ParserState MS.empty MS.empty MS.empty MS.empty MS.empty MS.empty MS.empty MS.empty 0) "" code
  case result of
    Right core -> return core
    Left err -> do
      showParseError "" code err
      return $ Ref "⊥" 0 []

doParseArguments :: Book -> [String] -> IO [Core]
doParseArguments book [] = return []
doParseArguments book (arg:args) = do
  (book', core) <- parseArg book arg
  rest <- doParseArguments book' args
  return (core : rest)
  where
    parseArg :: Book -> String -> IO (Book, Core)
    parseArg book arg = do
      let st = ParserState
            { pCidToAri = cidToAri book
            , pCidToLen = cidToLen book
            , pCtrToCid = ctrToCid book
            , pCidToCtr = cidToCtr book
            , pCidToADT = cidToADT book
            , imported  = MS.empty
            , varUsages = MS.empty
            , globalVars = MS.empty
            , pFreshLab = freshLab book
            }
      result <- runParserT p st "" arg
      case result of
        Right (book, core) -> do
          return (book, core)
        Left err -> do
          showParseError "" arg err
          return (book, Ref "⊥" 0 [])
    p = do
      core <- parseCore
      st   <- getState
      let (book', core') = adjust "" (book { freshLab = pFreshLab st }) core []
      return (book', core')

-- Errors
-- ------

extractExpectedTokens :: ParseError -> String
extractExpectedTokens err =
    let msgs = errorMessages err
        failMsg = [msg | Message msg <- msgs]
        expectedMsgs = [msg | Expect msg <- msgs, msg /= "space", msg /= "Comment"]
    in if not (null failMsg)
       then head failMsg
       else if null expectedMsgs
            then "syntax error"
            else intercalate " | " expectedMsgs

showParseError :: String -> String -> ParseError -> IO ()
showParseError filename input err = do
  let pos = errorPos err
  let lin = sourceLine pos
  let col = sourceColumn pos
  let errorMsg = extractExpectedTokens err
  putStr $ setSGRCode [SetConsoleIntensity BoldIntensity] ++ "\nPARSE_ERROR" ++ setSGRCode [Reset]
  putStr " ("
  putStr $ setSGRCode [SetUnderlining SingleUnderline] ++ filename ++ ":" ++ show lin ++ ":" ++ show col ++ setSGRCode [Reset]
  putStrLn ")"
  if any isMessage (errorMessages err)
    then putStrLn $ "- " ++ errorMsg
    else do
      putStrLn $ "- expected: " ++ errorMsg
      putStrLn $ "- detected:"
  putStrLn $ highlightError (lin, col) (lin, col + 1) input
  where
    isMessage (Message _) = True
    isMessage _ = False

parseLog :: String -> ParserM ()
parseLog msg = do
  pos <- getPosition
  remaining <- getInput
  let preview = "[[[" ++ Data.List.take 20 remaining ++ (if length remaining > 20 then "..." else "") ++ "]]]"
  trace ("[" ++ show pos ++ "] " ++ msg ++ "\nRemaining code: " ++ preview) $ return ()

-- Book
-- ----

-- Register the parsed ADT in the parser state
registerADT :: String -> [(String, [String])] -> ParserM ()
registerADT name constructors = do
  st <- getState
  let baseCid  = fromIntegral $ MS.size (pCtrToCid st)
  let ctrToCid = zip (map fst constructors) [baseCid..]
  let cidToCtr = map (\ (ctr,cid) -> (cid, ctr)) ctrToCid
  let cidToAri = map (\ (ctr,cid) -> (cid, fromIntegral . length . snd $ head $ filter ((== ctr) . fst) constructors)) ctrToCid
  let cidToLen = (baseCid, fromIntegral $ length constructors)
  let cidToADT = map (\ (_,cid) -> (cid, baseCid)) ctrToCid
  modifyState (\s -> s {
    pCtrToCid = MS.union (MS.fromList ctrToCid) (pCtrToCid s),
    pCidToCtr = MS.union (MS.fromList cidToCtr) (pCidToCtr s),
    pCidToAri = MS.union (MS.fromList cidToAri) (pCidToAri s),
    pCidToLen = MS.insert (fst cidToLen) (snd cidToLen) (pCidToLen s),
    pCidToADT = MS.union (MS.fromList cidToADT) (pCidToADT s) })

-- Book creation and setup functions
createBook :: [(String, ((Bool,[(Bool,String)]), Core))] -> MS.Map String Word16 -> MS.Map Word16 String -> MS.Map Word16 Word16 -> MS.Map Word16 Word16 -> MS.Map Word16 Word16 -> Lab -> Book
createBook defs ctrToCid cidToCtr cidToAri cidToLen cidToADT freshLab =
  let withPrims = \n2i -> MS.union n2i (MS.fromList primitives)
      nameList  = zip (map fst defs) [0..] :: [(String, Word16)]
      namToFid' = withPrims (MS.fromList nameList)
      fidToNam' = MS.fromList (map (\(k,v) -> (v,k)) (MS.toList namToFid'))
      fidToFun' = MS.fromList (map (\(nam, func) -> (mget namToFid' nam, func)) defs)
      fidToLab' = MS.fromList (map (\(nam, ((_, _), cr)) -> (mget namToFid' nam, collectLabels cr)) defs) in
  let book = Book
       { fidToFun = fidToFun'
       , fidToNam = fidToNam'
       , fidToLab = fidToLab'
       , namToFid = namToFid'
       , cidToAri = cidToAri
       , cidToCtr = cidToCtr
       , ctrToCid = ctrToCid
       , cidToLen = cidToLen
       , cidToADT = cidToADT
       , freshLab = freshLab
       } in
  adjustBook book

-- Binding
-- -------

-- Strip the & prefix from a non-linear variable name
-- e.g., "&x" -> "x", "x" -> "x"
stripName :: String -> String
stripName var = if not (null var) && head var == '&' then tail var else var

bindVars :: [String] -> ParserM Core -> ParserM Core
bindVars vars parse = do
  st <- getState
  let prev = varUsages st
  -- Split into scopeless vars (starting with $) and regular vars
  let (svars, rvars) = partition (\v -> head v == '$') vars
  forM_ svars bindScopeless
  -- Add bindings for regular vars
  let tmp = MS.fromList [(stripName var, 0) | var <- rvars]
  modifyState (\st -> st {varUsages = MS.union tmp prev})
  body <- parse
  forM_ rvars checkLinear
  -- Restore the original state for regular vars
  modifyState (\st -> st {varUsages = MS.union (MS.difference (varUsages st) tmp) prev})
  return body
  where
    bindScopeless var = do
      st <- getState
      case MS.lookup var (globalVars st) of
        Just _  -> fail $ "Global variable " ++ show var ++ " already bound"
        Nothing -> putState st {globalVars = MS.insert var () (globalVars st)}

    checkLinear var = do
      st <- getState
      let uses = mget (varUsages st) (stripName var)
      if (head var /= '&') && uses > 1 then
        fail $ "Linear variable " ++ show var ++ " used " ++ show uses ++ " times"
      else return ()

useVar :: String -> ParserM ()
useVar name = do
  st <- getState
  case (head name, MS.lookup name (varUsages st)) of
    ('$', Nothing) -> do -- $-vars can be used before definition
      putState st {varUsages = MS.insert name 1 (varUsages st)}
    (_, Nothing) -> do
      fail $ "Unbound variable " ++ show name
    (_, Just uses) -> do
      putState st {varUsages = MS.insert name (uses + 1) (varUsages st)}

-- Utils
-- -------------------

genFreshLabel :: ParserM Lab
genFreshLabel = do
  st <- getState
  let lbl = pFreshLab st
  putState st { pFreshLab = lbl + 1 }
  when (lbl > 0x7FFF) $ do
    error "Label overflow: generated label would be too large"
  return $ lbl + 0x8000

-- Collects all labels used
collectLabels :: Core -> MS.Map Lab ()
collectLabels term = case term of
  Var _               -> MS.empty
  U32 _               -> MS.empty
  Chr _               -> MS.empty
  Era                 -> MS.empty
  Ref _ _ args        -> MS.unions $ map collectLabels args
  Let _ _ val bod     -> MS.union (collectLabels val) (collectLabels bod)
  Lam _ bod           -> collectLabels bod
  App fun arg         -> MS.union (collectLabels fun) (collectLabels arg)
  Sup lab tm0 tm1     -> MS.insert lab () $ MS.union (collectLabels tm0) (collectLabels tm1)
  Dup lab _ _ val bod -> MS.insert lab () $ MS.union (collectLabels val) (collectLabels bod)
  Ctr _ fds           -> MS.unions $ map collectLabels fds
  Mat kin val mov css -> MS.unions $ collectLabels val : map (collectLabels . snd) mov ++ map (\(_,_,bod) -> collectLabels bod) css
  Op2 _ x y           -> MS.union (collectLabels x) (collectLabels y)
  Inc x               -> collectLabels x
  Dec x               -> collectLabels x

-- Build match expression based on case types
buildMatchExpr :: Core -> [(String, Core)] -> [(String, [String], Core)] -> ParserM Core
buildMatchExpr val mov cases
  | null cases = 
      fail "Match needs at least one case"
  | isSwitch (head cases) = 
      return $ Mat SWI val mov cases  -- Switch case
  | onlyDefault cases =
      fail "Match with only a default case is not allowed"  -- Invalid case
  | hasDefault (last cases) = do  -- Has default: use If-Let chain
      var <- return $ getVar (last cases)
      ifl <- intoIfLetChain (Var (stripName var)) mov (init cases) var (last cases)
      return $ Let LAZY var val ifl
  | otherwise = do  -- All ADT cases covered
      st <- getState
      let ctrs = map getName cases
      let cids = map (mget (pCtrToCid st)) ctrs
      let adt  = mget (pCidToADT st) (head cids)
      let len  = mget (pCidToLen st) adt
      let miss = filter (\c -> not (c `elem` cids)) [adt..adt+len-1]
      case miss of
        [] -> return $ Mat (MAT adt) val mov cases
        _  -> fail $ "Missing match cases: " ++ show (map (mget (pCidToCtr st)) miss)
  where
    isSwitch (name, _, _) = name == "0"
    hasDefault (name, _, _) = name == "_"
    onlyDefault cases = length cases == 1 && hasDefault (head cases)
    getName (name, _, _) = name
    getVar (_, [v], _) = v

intoIfLetChain :: Core -> [(String, Core)] -> [(String, [String], Core)] -> String -> (String, [String], Core) -> ParserM Core
intoIfLetChain _ _ [] defName (_,_,defBody) = return defBody
intoIfLetChain val mov ((ctr,fds,bod):css) defName defCase = do
  st  <- getState
  kin <- return $ IFL (mget (pCtrToCid st) ctr)
  rec <- intoIfLetChain val mov css defName defCase
  css <- return $ [(ctr, fds, bod), ("_", [defName], rec)]
  return $ Mat kin val mov css


================================================
FILE: src/HVM/Reduce.hs
================================================
{-./Type.hs-}
{-./Foreign.hs-}

-- NOTE: THIS FILE IS MISSING HTE INC/DEC INTERACTIONS. LET'S FIX IT

module HVM.Reduce where

import Control.Monad (when, forM, forM_)
import Data.Word
import HVM.Collapse
import HVM.Extract
import HVM.Foreign
import HVM.Inject
import HVM.Type
import System.Exit
import qualified Data.Map.Strict as MS

reduceAt :: Bool -> ReduceAt

reduceAt debug book host = do 
  term <- got host
  let tag = termTag term
  let lab = termLab term
  let loc = termLoc term

  when debug $ do
    root <- doExtractCoreAt gotT book 0
    core <- doExtractCoreAt gotT book host
    putStrLn $ "reduce: " ++ showTerm term
    -- putStrLn $ "---------------- CORE: "
    -- putStrLn $ showCore core
    putStrLn $ "---------------- ROOT: "
    putStrLn $ showCore (doLiftDups root)

  case tag of
    t | t == _LET_ -> do
      case modeT lab of
        LAZY -> do
          val <- got (loc + 0)
          cont host (reduceLet term val)
        STRI -> do
          val <- reduceAt debug book (loc + 0)
          cont host (reduceLet term val)

    t | t == _APP_ -> do
      fun <- reduceAt debug book (loc + 0)
      case termTag fun of
        t | t == _ERA_ -> cont host (reduceAppEra term fun)
        t | t == _LAM_ -> cont host (reduceAppLam term fun)
        t | t == _SUP_ -> cont host (reduceAppSup term fun)
        t | t == _CTR_ -> cont host (reduceAppCtr term fun)
        t | t == _W32_ -> cont host (reduceAppW32 term fun)
        t | t == _CHR_ -> cont host (reduceAppW32 term fun)
        t | t == _INC_ -> cont host (reduceAppInc term fun)
        t | t == _DEC_ -> cont host (reduceAppDec term fun)
        _   -> set (loc + 0) fun >> return term

    t | t == _MAT_ -> do
      val <- reduceAt debug book (loc + 0)
      case termTag val of
        t | t == _ERA_ -> cont host (reduceMatEra term val)
        t | t == _LAM_ -> cont host (reduceMatLam term val)
        t | t == _SUP_ -> cont host (reduceMatSup term val)
        t | t == _CTR_ -> cont host (reduceMatCtr term val)
        t | t == _W32_ -> cont host (reduceMatW32 term val)
        t | t == _CHR_ -> cont host (reduceMatW32 term val)
        t | t == _INC_ -> cont host (reduceMatInc term val)
        t | t == _DEC_ -> cont host (reduceMatDec term val)
        _   -> set (loc + 0) val >> return term

    t | t == _IFL_ -> do
      val <- reduceAt debug book (loc + 0)
      case termTag val of
        t | t == _ERA_ -> cont host (reduceMatEra term val)
        t | t == _LAM_ -> cont host (reduceMatLam term val)
        t | t == _SUP_ -> cont host (reduceMatSup term val)
        t | t == _CTR_ -> cont host (reduceMatCtr term val)
        t | t == _W32_ -> cont host (reduceMatW32 term val)
        t | t == _CHR_ -> cont host (reduceMatW32 term val)
        t | t == _INC_ -> cont host (reduceMatInc term val)
        t | t == _DEC_ -> cont host (reduceMatDec term val)
        _   -> set (loc + 0) val >> return term

    t | t == _SWI_ -> do
      val <- reduceAt debug book (loc + 0)
      case termTag val of
        t | t == _ERA_ -> cont host (reduceMatEra term val)
        t | t == _LAM_ -> cont host (reduceMatLam term val)
        t | t == _SUP_ -> cont host (reduceMatSup term val)
        t | t == _CTR_ -> cont host (reduceMatCtr term val)
        t | t == _W32_ -> cont host (reduceMatW32 term val)
        t | t == _CHR_ -> cont host (reduceMatW32 term val)
        t | t == _INC_ -> cont host (reduceMatInc term val)
        t | t == _DEC_ -> cont host (reduceMatDec term val)
        _   -> set (loc + 0) val >> return term

    t | t == _OPX_ -> do
      val <- reduceAt debug book (loc + 0)
      case termTag val of
        t | t == _ERA_ -> cont host (reduceOpxEra term val)
        t | t == _LAM_ -> cont host (reduceOpxLam term val)
        t | t == _SUP_ -> cont host (reduceOpxSup term val)
        t | t == _CTR_ -> cont host (reduceOpxCtr term val)
        t | t == _W32_ -> cont host (reduceOpxW32 term val)
        t | t == _CHR_ -> cont host (reduceOpxW32 term val)
        t | t == _INC_ -> cont host (reduceOpxInc term val)
        t | t == _DEC_ -> cont host (reduceOpxDec term val)
        _   -> set (loc + 0) val >> return term

    t | t == _OPY_ -> do
      val <- reduceAt debug book (loc + 0)
      case termTag val of
        t | t == _ERA_ -> cont host (reduceOpyEra term val)
        t | t == _LAM_ -> cont host (reduceOpyLam term val)
        t | t == _SUP_ -> cont host (reduceOpySup term val)
        t | t == _CTR_ -> cont host (reduceOpyCtr term val)
        t | t == _W32_ -> cont host (reduceOpyW32 term val)
        t | t == _CHR_ -> cont host (reduceOpyW32 term val)
        t | t == _INC_ -> cont host (reduceOpyInc term val)
        t | t == _DEC_ -> cont host (reduceOpyDec term val)
        _   -> set (loc + 0) val >> return term

    t | t == _DP0_ -> do
      sb0 <- got (loc + 0)
      if termGetBit sb0 == 0
        then do
          val <- reduceAt debug book (loc + 0)
          case termTag val of
            t | t == _ERA_ -> cont host (reduceDupEra term val)
            t | t == _LAM_ -> cont host (reduceDupLam term val)
            t | t == _SUP_ -> cont host (reduceDupSup term val)
            t | t == _CTR_ -> cont host (reduceDupCtr term val)
            t | t == _W32_ -> cont host (reduceDupW32 term val)
            t | t == _CHR_ -> cont host (reduceDupW32 term val)
            t | t == _INC_ -> cont host (reduceDupInc term val)
            t | t == _DEC_ -> cont host (reduceDupDec term val)
            _   -> set (loc + 0) val >> return term
        else do
          set host (termRemBit sb0)
          reduceAt debug book host

    t | t == _DP1_ -> do
      sb1 <- got (loc + 0)
      if termGetBit sb1 == 0
        then do
          val <- reduceAt debug book (loc + 0)
          case termTag val of
            t | t == _ERA_ -> cont host (reduceDupEra term val)
            t | t == _LAM_ -> cont host (reduceDupLam term val)
            t | t == _SUP_ -> cont host (reduceDupSup term val)
            t | t == _CTR_ -> cont host (reduceDupCtr term val)
            t | t == _W32_ -> cont host (reduceDupW32 term val)
            t | t == _CHR_ -> cont host (reduceDupW32 term val)
            t | t == _INC_ -> cont host (reduceDupInc term val)
            t | t == _DEC_ -> cont host (reduceDupDec term val)
            _   -> set (loc + 0) val >> return term
        else do
          set host (termRemBit sb1)
          reduceAt debug book host

    t | t == _VAR_ -> do
      sub <- got (loc + 0)
      if termGetBit sub == 0
        then return term
        else do
          set host (termRemBit sub)
          reduceAt debug book host

    t | t == _REF_ -> do
      reduceRefAt book host
      reduceAt debug book host

    _ -> do
      return term

  where
    cont host action = do
      ret <- action
      set host ret
      reduceAt debug book host

gotT :: Book -> Loc -> HVM Term
gotT book host = got host

reduceRefAt :: Book -> Loc -> HVM Term
reduceRefAt book host = do
  term <- got host
  let lab = termLab term
  let loc = termLoc term
  let fid = fromIntegral lab
  let ari = funArity book fid
  case lab of
    x | x == _DUP_F_ -> reduceRefAt_DupF book host loc ari
    x | x == _SUP_F_ -> reduceRefAt_SupF book host loc ari
    x | x == _LOG_F_ -> reduceRefAt_LogF book host loc ari
    otherwise -> case MS.lookup fid (fidToFun book) of
      Just ((copy, args), core) -> do
        incItr
        when (length args /= fromIntegral ari) $ do
          putStrLn $ "RUNTIME_ERROR: arity mismatch on call to '@" ++ mget (fidToNam book) fid ++ "'."
          exitFailure
        argTerms <- if ari == 0
          then return []
          else forM (zip [0..] args) $ \(i, (strict, _)) -> do
            term <- got (loc + i)
            if strict
              then reduceAt False book (loc + i)
              else return term
        doInjectCoreAt book core host $ zip (map snd args) argTerms
      Nothing -> do
        putStrLn $ "RUNTIME_ERROR: Function ID " ++ show fid ++ " not found in fidToFun book."
        exitFailure

-- Primitive: Dynamic Dup `@DUP(lab val λdp0λdp1(bod))`
reduceRefAt_DupF :: Book -> Loc -> Loc -> Word16 -> HVM Term  
reduceRefAt_DupF book host loc ari = do
  incItr
  when (ari /= 3) $ do
    putStrLn $ "RUNTIME_ERROR: arity mismatch on call to '@DUP'."
    exitFailure
  lab <- reduceAt False book (loc + 0)
  val <- got (loc + 1)
  bod <- got (loc + 2)
  dup <- allocNode 1
  case termTag lab of
    t | t == _W32_ -> do
      when (termLoc lab > 0xFFFF) $ do
        error "RUNTIME_ERROR: dynamic DUP label too large"
      -- Create the DUP node with value
      set (dup + 0) val
      -- Create first APP node for (APP bod DP0)
      app1 <- allocNode 2
      set (app1 + 0) bod
      set (app1 + 1) (termNew _DP0_ (fromIntegral (termLoc lab)) dup)
      -- Create second APP node for (APP (APP bod DP0) DP1)
      app2 <- allocNode 2
      set (app2 + 0) (termNew _APP_ 0 app1)
      set (app2 + 1) (termNew _DP1_ (fromIntegral (termLoc lab)) dup)
      let ret = termNew _APP_ 0 app2
      set host ret
      return ret
    _ -> do
      core <- doExtractCoreAt gotT book (loc + 0)
      putStrLn $ "RUNTIME_ERROR: dynamic DUP without numeric label: " ++ showTerm lab
      putStrLn $ showCore (doLiftDups core)
      exitFailure

-- Primitive: Dynamic Sup `@SUP(lab tm0 tm1)`
reduceRefAt_SupF :: Book -> Loc -> Loc -> Word16 -> HVM Term
reduceRefAt_SupF book host loc ari = do
  incItr
  when (ari /= 3) $ do
    putStrLn $ "RUNTIME_ERROR: arity mismatch on call to '@SUP'."
    exitFailure
  lab <- reduceAt False book (loc + 0)
  tm0 <- got (loc + 1)
  tm1 <- got (loc + 2)
  sup <- allocNode 2
  case termTag lab of
    t | t == _W32_ -> do
      when (termLoc lab > 0xFFFF) $ do
        error "RUNTIME_ERROR: dynamic SUP label too large"
      let ret = termNew _SUP_ (fromIntegral (termLoc lab)) sup
      set (sup + 0) tm0
      set (sup + 1) tm1
      set host ret
      return ret
    _ -> error "RUNTIME_ERROR: dynamic SUP without numeric label."

-- Primitive: Logger `@LOG(msg)`
-- Will extract the term and log it. 
-- Returns 0.
reduceRefAt_LogF :: Book -> Loc -> Loc -> Word16 -> HVM Term
reduceRefAt_LogF book host loc ari = do
  incItr
  when (ari /= 1) $ do
    putStrLn $ "RUNTIME_ERROR: arity mismatch on call to '@LOG'."
    exitFailure
  msg <- doExtractCoreAt (reduceAt False) book (loc + 0)
  putStrLn $ showCore msg
  -- msgs <- doCollapseFlatAt gotT book (loc + 0)
  -- forM_ msgs $ \msg -> do
    -- putStrLn $ showCore msg
  let ret = termNew _W32_ 0 0
  set host ret
  return ret

-- Primitive: Fresh `@FRESH`
-- Returns a fresh dup label.
reduceRefAt_FreshF :: Book -> Loc -> Loc -> Word16 -> HVM Term
reduceRefAt_FreshF book host loc ari = do
  incItr
  when (ari /= 0) $ do
    putStrLn $ "RUNTIME_ERROR: arity mismatch on call to '@Fresh'."
    exitFailure
  num <- fromIntegral <$> fresh
  let ret = termNew _W32_ 0 num
  set host ret
  return ret

reduceCAt :: Bool -> ReduceAt
reduceCAt = \ _ _ host -> reduceAtC host


================================================
FILE: src/HVM/Runtime.c
================================================
#include "Runtime.h"

// Single translation unit aggregator for the C runtime.
// Keeping hot paths in one TU restores inlining and performance.

// Core state and memory
#include "runtime/state.c"
#include "runtime/memory.c"

// Heap, terms, stack, and debug printing
#include "runtime/heap.c"
#include "runtime/term.c"
#include "runtime/stack.c"
#include "runtime/print.c"

// Reductions dispatcher and helpers
#include "runtime/reduce.c"

// Interaction rules, grouped by tag
#include "runtime/reduce/let.c"

// APP
#include "runtime/reduce/app_era.c"
#include "runtime/reduce/app_lam.c"
#include "runtime/reduce/app_sup.c"
#include "runtime/reduce/app_ctr.c"
#include "runtime/reduce/app_w32.c"
#include "runtime/reduce/app_una.c"

// DUP
#include "runtime/reduce/dup_era.c"
#include "runtime/reduce/dup_lam.c"
#include "runtime/reduce/dup_sup.c"
#include "runtime/reduce/dup_ctr.c"
#include "runtime/reduce/dup_w32.c"
#include "runtime/reduce/dup_ref.c"
#include "runtime/reduce/dup_una.c"

// MAT / IFL / SWI
#include "runtime/reduce/mat_era.c"
#include "runtime/reduce/mat_lam.c"
#include "runtime/reduce/mat_sup.c"
#include "runtime/reduce/mat_ctr.c"
#include "runtime/reduce/mat_w32.c"
#include "runtime/reduce/mat_una.c"

// OPX/OPY
#include "runtime/reduce/opx_era.c"
#include "runtime/reduce/opx_lam.c"
#include "runtime/reduce/opx_sup.c"
#include "runtime/reduce/opx_ctr.c"
#include "runtime/reduce/opx_w32.c"
#include "runtime/reduce/opx_una.c"
#include "runtime/reduce/opy_era.c"
#include "runtime/reduce/opy_lam.c"
#include "runtime/reduce/opy_sup.c"
#include "runtime/reduce/opy_ctr.c"
#include "runtime/reduce/opy_w32.c"
#include "runtime/reduce/opy_una.c"

// Primitives
#include "runtime/prim/SUP.c"
#include "runtime/prim/DUP.c"
#include "runtime/prim/LOG.c"


================================================
FILE: src/HVM/Runtime.h
================================================
// Shared runtime declarations for HVM C runtime
#pragma once

#include <inttypes.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <unistd.h>
#include <time.h>
#include <stdbool.h>

// mmap portability helpers (e.g., macOS)
#ifndef MAP_ANONYMOUS
#  ifdef MAP_ANON
#    define MAP_ANONYMOUS MAP_ANON
#  else
#    define MAP_ANONYMOUS 0
#  endif
#endif
#ifndef MAP_NORESERVE
#  define MAP_NORESERVE 0
#endif

// Limits
#define MAX_HEAP_SIZE (1ULL << 40)
#define MAX_STACK_SIZE (1ULL << 28)

// Basic types
typedef uint8_t  Tag;
typedef uint16_t Lab; // 16-bit label (fits 65536 ctors/ops)
typedef uint64_t Loc; // up to 40-bit heap index in term payload
typedef uint64_t Term;
typedef uint16_t u16;
typedef uint32_t u32;
typedef uint64_t u64;

// Constants (tags)
#define DP0 0x00
#define DP1 0x01
#define VAR 0x02
#define SUB 0x03
#define REF 0x04
#define LET 0x05
#define APP 0x06
#define MAT 0x08
#define IFL 0x09
#define SWI 0x0A
#define OPX 0x0B
#define OPY 0x0C
#define ERA 0x0D
#define LAM 0x0E
#define SUP 0x0F
#define CTR 0x10
#define W32 0x11
#define CHR 0x12
#define INC 0x13
#define DEC 0x14

// Operators
#define OP_ADD 0x00
#define OP_SUB 0x01
#define OP_MUL 0x02
#define OP_DIV 0x03
#define OP_MOD 0x04
#define OP_EQ  0x05
#define OP_NE  0x06
#define OP_LT  0x07
#define OP_GT  0x08
#define OP_LTE 0x09
#define OP_GTE 0x0A
#define OP_AND 0x0B
#define OP_OR  0x0C
#define OP_XOR 0x0D
#define OP_LSH 0x0E
#define OP_RSH 0x0F

// Builtin function ids
#define DUP_F 0xFFFF
#define SUP_F 0xFFFE
#define LOG_F 0xFFFD

// Let flavours
#define LAZY 0x0
#define STRI 0x1

#define VOID 0x00000000000000ULL

// Runtime State
typedef struct {
  Term*  sbuf; // reduction stack buffer
  u64*   spos; // reduction stack position
  Term*  heap; // global node buffer
  u64*   size; // global node buffer position
  u64*   itrs; // interaction count
  u64*   frsh; // fresh dup label count
  Term (*book[65536])(Term); // functions
  u16    cari[65536]; // arity of each constructor
  u16    clen[65536]; // case length of each constructor
  u16    cadt[65536]; // ADT id of each constructor
  u16    fari[65536]; // arity of each function
} State;

// Global runtime state
extern State HVM;

// Heap controls
void set_len(u64 size);
void set_itr(u64 itrs);
u64  get_len();
u64  get_itr();
u64  fresh();

// Term helpers
Term term_new(Tag tag, Lab lab, Loc loc);
Tag  term_tag(Term x);
Lab  term_lab(Term x);
Loc  term_loc(Term x);
u64  term_get_bit(Term x);
Term term_set_bit(Term term);
Term term_rem_bit(Term term);
Term term_set_loc(Term x, Loc loc);
_Bool term_is_atom(Term term);

// Heap read/write
Term swap(Loc loc, Term term);
Term got(Loc loc);
void set(Loc loc, Term term);
void sub(Loc loc, Term term);
Term take(Loc loc);

// Allocation and accounting
Loc  alloc_node(Loc arity);
void inc_itr();

// Stack
void spush(Term term, Term* sbuf, u64* spos);
Term spop(Term* sbuf, u64* spos);

// Debug printing
void print_tag(Tag tag);
void print_term(Term term);
void print_heap();

// Reductions (public API)
Term reduce(Term term);
Term reduce_at(Loc host);
Term normal(Term term);

// Interaction functions
Term reduce_ref_sup(Term ref, u16 idx);
Term reduce_ref(Term ref);
Term reduce_let(Term let, Term val);

// APP
Term reduce_app_era(Term app, Term era);
Term reduce_app_lam(Term app, Term lam);
Term reduce_app_sup(Term app, Term sup);
Term reduce_app_ctr(Term app, Term ctr);
Term reduce_app_w32(Term app, Term w32);
Term reduce_app_una(Term app, Term una, Tag tag);
Term reduce_app_inc(Term app, Term inc);
Term reduce_app_dec(Term app, Term dec);

// DUP
Term reduce_dup_era(Term dup, Term era);
Term reduce_dup_lam(Term dup, Term lam);
Term reduce_dup_sup(Term dup, Term sup);
Term reduce_dup_ctr(Term dup, Term ctr);
Term reduce_dup_w32(Term dup, Term w32);
Term reduce_dup_ref(Term dup, Term ref);
Term reduce_dup_una(Term dup, Term una, Tag tag);
Term reduce_dup_inc(Term dup, Term inc);
Term reduce_dup_dec(Term dup, Term dec);

// MAT
Term reduce_mat_era(Term mat, Term era);
Term reduce_mat_lam(Term mat, Term lam);
Term reduce_mat_sup(Term mat, Term sup);
Term reduce_mat_ctr(Term mat, Term ctr);
Term reduce_mat_w32(Term mat, Term w32);
Term reduce_mat_una(Term mat, Term una, Tag tag);
Term reduce_mat_inc(Term mat, Term inc);
Term reduce_mat_dec(Term mat, Term dec);

// OPX
Term reduce_opx_era(Term opx, Term era);
Term reduce_opx_lam(Term opx, Term lam);
Term reduce_opx_sup(Term opx, Term sup);
Term reduce_opx_ctr(Term opx, Term ctr);
Term reduce_opx_w32(Term opx, Term nmx);
Term reduce_opx_una(Term opx, Term una, Tag tag);
Term reduce_opx_inc(Term opx, Term inc);
Term reduce_opx_dec(Term opx, Term dec);

// OPY
Term reduce_opy_era(Term opy, Term era);
Term reduce_opy_lam(Term opy, Term lam);
Term reduce_opy_sup(Term opy, Term sup);
Term reduce_opy_ctr(Term opy, Term ctr);
Term reduce_opy_w32(Term opy, Term w32);
Term reduce_opy_una(Term opy, Term una, Tag tag);
Term reduce_opy_inc(Term opy, Term inc);
Term reduce_opy_dec(Term opy, Term dec);

// Primitives
Term SUP_f(Term ref);
Term DUP_f(Term ref);
Term LOG_f(Term ref);

// Runtime memory API
void hvm_init();
void hvm_free();
State* hvm_get_state();
void hvm_set_state(State* hvm);
void hvm_define(u16 fid, Term (*func)());
void hvm_set_cari(u16 cid, u16 arity);
void hvm_set_fari(u16 fid, u16 arity);
void hvm_set_clen(u16 cid, u16 cases);
void hvm_set_cadt(u16 cid, u16 adt);


================================================
FILE: src/HVM/Type.hs
================================================
module HVM.Type where

import Data.Word
import Foreign.Ptr

import Control.Applicative ((<|>))
import Control.DeepSeq
import Control.Monad (forM)
import Data.Char (chr, ord)
import Data.Char (intToDigit)
import Data.IORef
import Data.List
import Data.Word
import GHC.Stack (HasCallStack)
import Numeric (showIntAtBase)
import System.IO.Unsafe (unsafePerformIO)
import qualified Data.Map.Strict as MS

-- Core Types
-- ----------

type Tag  = Word8
type Lab  = Word16
type Loc  = Word64
type Term = Word64

type Name = String
type Move = (Name, Core) -- !x = term
type Case = (Name, [Name], Core) -- #Ctr{x0 x1...}: fn

data LetT = LAZY | STRI deriving (Eq, Enum)
data MatT = SWI | MAT Word16 | IFL Word16 deriving (Show, Eq)

data Core
  = Var Name                    -- x
  | Ref Name Word16 [Core]      -- @fn
  | Era                         -- *
  | Lam Name Core               -- λx(F)
  | App Core Core               -- (f x)
  | Sup Lab Core Core           -- &L{a b}
  | Dup Lab Name Name Core Core -- ! &L{a b} = v body
  | Ctr Name [Core]             -- #Ctr{a b ...}
  | U32 Word32                  -- 123
  | Chr Char                    -- 'a'
  | Op2 Oper Core Core          -- (+ a b)
  | Let LetT Name Core Core     -- ! x = v body
  | Mat MatT Core [Move] [Case] -- ~ v !moves { cases }
  | Inc Core                    -- ↑ x
  | Dec Core                    -- ↓ x
  deriving (Eq)

data Oper
  = OP_ADD | OP_SUB | OP_MUL | OP_DIV
  | OP_MOD | OP_EQ  | OP_NE  | OP_LT
  | OP_GT  | OP_LTE | OP_GTE | OP_AND
  | OP_OR  | OP_XOR | OP_LSH | OP_RSH
  deriving (Eq, Enum)

-- A top-level function, including:
-- - copy: true when ref-copy mode is enabled
-- - args: a list of (isArgStrict, argName) pairs
-- - core: the function's body
-- Note: ref-copy improves C speed, but increases interaction count
type Func = ((Bool, [(Bool,String)]), Core)

-- Set of labels in a function's body
type HasLab = (MS.Map Lab ())

data Book = Book
  { fidToFun :: MS.Map Word16 Func   -- func id to Func object
  , fidToLab :: MS.Map Word16 HasLab -- func id to dup labels used
  , fidToNam :: MS.Map Word16 Name   -- func id to name
  , namToFid :: MS.Map Name   Word16 -- func name to id
  , cidToAri :: MS.Map Word16 Word16 -- ctor id to field count (arity)
  , cidToLen :: MS.Map Word16 Word16 -- ctor id to cases length (ADT ctors)
  , cidToCtr :: MS.Map Word16 Name   -- ctor id to name
  , ctrToCid :: MS.Map Name   Word16 -- ctor name to id
  , cidToADT :: MS.Map Word16 Word16 -- ctor id to ADT id (first ADT cid)
  , freshLab :: Lab                  -- auto dup label counter
  } deriving (Show, Eq)

-- Runtime Types
-- -------------

type HVM = IO
type ReduceAt = Book -> Loc -> HVM Term

-- Constants
-- ---------

-- Tags
_DP0_ = 0x00 :: Tag
_DP1_ = 0x01 :: Tag
_VAR_ = 0x02 :: Tag
_FWD_ = 0x03 :: Tag
_REF_ = 0x04 :: Tag
_LET_ = 0x05 :: Tag
_APP_ = 0x06 :: Tag
_MAT_ = 0x08 :: Tag
_IFL_ = 0x09 :: Tag
_SWI_ = 0x0A :: Tag
_OPX_ = 0x0B :: Tag
_OPY_ = 0x0C :: Tag
_ERA_ = 0x0D :: Tag
_LAM_ = 0x0E :: Tag
_SUP_ = 0x0F :: Tag
_CTR_ = 0x10 :: Tag
_W32_ = 0x11 :: Tag
_CHR_ = 0x12 :: Tag
_INC_ = 0x13 :: Tag
_DEC_ = 0x14 :: Tag

-- Let Types
modeT :: Lab -> LetT
modeT 0x00 = LAZY
modeT 0x01 = STRI
modeT mode = error $ "unknown mode: " ++ show mode

-- Primitive Functions
_DUP_F_ = 0xFFFF :: Lab
_SUP_F_ = 0xFFFE :: Lab
_LOG_F_ = 0xFFFD :: Lab

primitives :: [(String, Word16)]
primitives =
  [ ("SUP", fromIntegral _SUP_F_)
  , ("DUP", fromIntegral _DUP_F_)
  , ("LOG", fromIntegral _LOG_F_)
  ]

-- Utils
-- -----

-- Getter function for maps
mget :: (Ord k, Show k, HasCallStack) => MS.Map k a -> k -> a
mget map key =
  case MS.lookup key map of
    Just val -> val
    Nothing  -> error $ "key not found: " ++ show key

funArity :: Book -> Word16 -> Word16
funArity book fid
  | fid == fromIntegral _SUP_F_ = 3
  | fid == fromIntegral _DUP_F_ = 3
  | fid == fromIntegral _LOG_F_ = 1
  | otherwise = case MS.lookup fid (fidToFun book) of
      Just ((_, args), _) -> fromIntegral (length args)
      Nothing -> error $ "Function ID not found: " ++ show fid

instance NFData Core where
  rnf (Var k)           = rnf k
  rnf (Ref k i xs)      = rnf k `seq` rnf i `seq` rnf xs
  rnf Era               = ()
  rnf (Lam x f)         = rnf x `seq` rnf f
  rnf (App f x)         = rnf f `seq` rnf x
  rnf (Sup l a b)       = rnf l `seq` rnf a `seq` rnf b
  rnf (Dup l x y v f)   = rnf l `seq` rnf x `seq` rnf y `seq` rnf v `seq` rnf f
  rnf (Ctr k xs)        = rnf k `seq` rnf xs
  rnf (U32 v)           = rnf v
  rnf (Chr v)           = rnf v
  rnf (Op2 o a b)       = o `seq` rnf a `seq` rnf b
  rnf (Let m k v f)     = m `seq` rnf k `seq` rnf v `seq` rnf f
  rnf (Mat k v m ks)    = k `seq` rnf v `seq` rnf m `seq` rnf ks
  rnf (Inc x)           = rnf x
  rnf (Dec x)           = rnf x


-- Stringification
-- ---------------

padLeft :: String -> Int -> Char -> String
padLeft str n c = replicate (n - length str) c ++ str

showHex :: Word64 -> String
showHex x = showIntAtBase 16 intToDigit (fromIntegral x) ""

showName :: Int -> String
showName n = go (n + 1) "" where
  go n ac | n == 0    = ac
          | otherwise = go q (chr (ord 'a' + r) : ac)
          where (q,r) = quotRem (n - 1) 26

showTag :: Tag -> String
showTag tag
  | tag == _DP0_ = "DP0"
  | tag == _DP1_ = "DP1"
  | tag == _VAR_ = "VAR"
  | tag == _FWD_ = "FWD"
  | tag == _REF_ = "REF"
  | tag == _LET_ = "LET"
  | tag == _APP_ = "APP"
  | tag == _MAT_ = "MAT"
  | tag == _IFL_ = "IFL"
  | tag == _SWI_ = "SWI"
  | tag == _OPX_ = "OPX"
  | tag == _OPY_ = "OPY"
  | tag == _ERA_ = "ERA"
  | tag == _LAM_ = "LAM"
  | tag == _SUP_ = "SUP"
  | tag == _CTR_ = "CTR"
  | tag == _W32_ = "W32"
  | tag == _CHR_ = "CHR"
  | tag == _INC_ = "INC"
  | tag == _DEC_ = "DEC"
  | otherwise    = error $ "unknown tag: " ++ show tag

showLab :: Lab -> String
showLab lab = padLeft (showHex (fromIntegral lab)) 6 '0'

showLoc :: Loc -> String
showLoc loc = padLeft (showHex (fromIntegral loc)) 8 '0'

instance Show Oper where
  show OP_ADD = "+"
  show OP_SUB = "-"
  show OP_MUL = "*"
  show OP_DIV = "/"
  show OP_MOD = "%"
  show OP_EQ  = "=="
  show OP_NE  = "!="
  show OP_LT  = "<"
  show OP_GT  = ">"
  show OP_LTE = "<="
  show OP_GTE = ">="
  show OP_AND = "&"
  show OP_OR  = "|"
  show OP_XOR = "^"
  show OP_LSH = "<<"
  show OP_RSH = ">>"

instance Show LetT where
  show LAZY = ""
  show STRI = "!"

showCore :: Core -> String
showCore core = maybe (format core) id (sugar core) where

  sugar :: Core -> Maybe String
  sugar core = nil core <|> str core <|> lst core where
    nil :: Core -> Maybe String
    nil (Ctr "#Nil" []) = Just "[]"
    nil _               = Nothing
    str :: Core -> Maybe String
    str (Ctr "#Nil" []) = Just "\"\""
    str (Ctr "#Cons" [Chr h, t]) = do
      rest <- str t
      return $ "\"" ++ h : tail rest
    str _ = Nothing
    lst :: Core -> Maybe String
    lst (Ctr "#Nil" [])       = Just "[]"
    lst (Ctr "#Cons" [x, xs]) = do
      rest <- lst xs
      return $ "[" ++ showCore x ++ if rest == "[]" then "]" else " " ++ tail rest
    lst _ = Nothing

  format :: Core -> String
  format (Var k) =
    k
  format Era =
    "*"
  format (Lam x f) =
    let f' = showCore f in
    concat ["λ", x, " ", f']
  format (App f x) =
    let f' = showCore f in
    let x' = showCore x in
    concat ["(", f', " ", x', ")"]
  format (Sup l a b) =
    let a' = showCore a in
    let b' = showCore b in
    concat ["&", show l, "{", a', " ", b', "}"]
  format (Dup l x y v f) =
    let v' = showCore v in
    let f' = showCore f in
    concat ["! &", show l, "{", x, " ", y, "} = ", v', "\n", f']
  format (Ref k i xs) =
    let xs' = intercalate " " (map showCore xs) in
    concat ["@", k, "(", xs', ")"]
  format (Ctr k xs) =
    let xs' = unwords (map showCore xs) in
    concat [k, "{", xs', "}"]
  format (Mat k v m ks) =
    let v'  = showCore v in
    let m'  = concatMap (\(k,v) -> concat [" !", k, "=", showCore v]) m in
    let ks' = unwords [concat [c, ":", showCore b] | (c, vs, b) <- ks] in
    concat ["(~", v', m', " {", ks', "})"]
  format (U32 v) =
    show v
  format (Chr v) =
    concat ["'", [v], "'"]
  format (Op2 o a b) =
    let a' = showCore a in
    let b' = showCore b in
    concat ["(", show o, " ", a', " ", b', ")"]
  format (Let m k v f)
    | k == "" =
      let v' = showCore v in
      let f' = showCore f in
      concat [v', "\n", f']
    | otherwise =
      let v' = showCore v in
      let f' = showCore f in
      concat ["! ", show m, k, " = ", v', "\n", f']
  format (Inc x) =
    let x' = showCore x in
    concat ["↑", x']
  format (Dec x) =
    let x' = showCore x in
    concat ["↓", x']

rename :: Core -> Core
rename core = unsafePerformIO $ do
  names <- newIORef MS.empty
  renamer names core

renamer :: IORef (MS.Map String String) -> Core -> IO Core
renamer names core = case core of
  Var k -> do
    k' <- genName names k
    return $ Var k'
  Lam x f -> do
    x' <- genName names x
    f' <- renamer names f
    return $ Lam x' f'
  Let m k v f -> do
    k' <- genName names k
    v' <- renamer names v
    f' <- renamer names f
    return $ Let m k' v' f'
  App f x -> do
    f' <- renamer names f
    x' <- renamer names x
    return $ App f' x'
  Sup l a b -> do
    a' <- renamer names a
    b' <- renamer names b
    return $ Sup l a' b'
  Dup l x y v f -> do
    x' <- genName names x
    y' <- genName names y
    v' <- renamer names v
    f' <- renamer names f
    return $ Dup l x' y' v' f'
  Ctr k xs -> do
    xs' <- mapM (renamer names) xs
    return $ Ctr k xs'
  Mat k v m ks -> do
    v'  <- renamer names v
    m'  <- forM m $ \(k,v) -> do v' <- renamer names v; return (k,v')
    ks' <- forM ks $ \(c,vs,t) -> do
      vs' <- mapM (genName names) vs
      t'  <- renamer names t
      return (c,vs',t')
    return $ Mat k v' m' ks'
  Op2 o a b -> do
    a' <- renamer names a
    b' <- renamer names b
    return $ Op2 o a' b'
  Ref k i xs -> do
    xs' <- mapM (renamer names) xs
    return $ Ref k i xs'
  Inc x -> do
    x' <- renamer names x
    return $ Inc x'
  Dec x -> do
    x' <- renamer names x
    return $ Dec x'
  other -> 
    return other

genName :: IORef (MS.Map String String) -> String -> IO String
genName names name =
  atomicModifyIORef' names $ \map ->
    case MS.lookup (strip name) map of
      Just val -> (map, val)
      Nothing  ->
        let new  = showName (MS.size map)
            map' = MS.insert (strip name) new map
        in (map', new)
  where strip name = if "&" `isPrefixOf` name then tail name else name

instance Show Core where
  show = showCore . rename


================================================
FILE: src/HVM/runtime/heap.c
================================================
#include "Runtime.h"

// Heap counters
void set_len(u64 size) { *HVM.size = size; }
void set_itr(u64 itrs) { *HVM.itrs = itrs; }
u64  get_len() { return *HVM.size; }
u64  get_itr() { return *HVM.itrs; }
u64  fresh()   { return (*HVM.frsh)++; }

// Atomics
Term swap(Loc loc, Term term) {
  Term val = HVM.heap[loc];
  HVM.heap[loc] = term;
  if (val == 0) {
    printf("SWAP 0 at %08llx\n", (u64)loc);
    exit(0);
  }
  return val;
}

Term got(Loc loc) {
  Term val = HVM.heap[loc];
  if (val == 0) {
    printf("GOT 0 at %08llx\n", (u64)loc);
    exit(0);
  }
  return val;
}

void set(Loc loc, Term term) { HVM.heap[loc] = term; }
void sub(Loc loc, Term term) { set(loc, term_set_bit(term)); }
Term take(Loc loc) { return swap(loc, VOID); }

// Allocation and accounting
Loc alloc_node(Loc arity) {
  if (*HVM.size + arity > MAX_HEAP_SIZE) {
    printf("Heap memory limit exceeded\n");
    exit(1);
  }
  u64 old = *HVM.size;
  *HVM.size += arity;
  return old;
}

void inc_itr() { (*HVM.itrs)++; }


================================================
FILE: src/HVM/runtime/memory.c
================================================
#include "Runtime.h"

static void *alloc_huge(size_t size) {
    void *ptr = mmap(NULL, size, PROT_READ | PROT_WRITE,
                     MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE,
                     -1, 0);
    if (ptr == MAP_FAILED) {
        perror("mmap failed");
        return NULL;
    }
    return ptr;
}

void hvm_init() {
  HVM.sbuf = alloc_huge(MAX_STACK_SIZE * sizeof(Term));
  HVM.heap = alloc_huge(MAX_HEAP_SIZE  * sizeof(Term));
  HVM.spos = alloc_huge(sizeof(u64));
  HVM.size = alloc_huge(sizeof(u64));
  HVM.itrs = alloc_huge(sizeof(u64));
  HVM.frsh = alloc_huge(sizeof(u64));

  #define CHECK_ALLOC(ptr, name) if (!(ptr)) { printf(name " alloc failed\n"); allocs_failed++; }
  int allocs_failed = 0;
  CHECK_ALLOC(HVM.sbuf, "sbuf");
  CHECK_ALLOC(HVM.heap, "heap");
  CHECK_ALLOC(HVM.spos, "spos");
  CHECK_ALLOC(HVM.size, "size");
  CHECK_ALLOC(HVM.itrs, "itrs");
  CHECK_ALLOC(HVM.frsh, "frsh");
  if (allocs_failed > 0) {
    printf("hvm_init alloc's failed: %d allocations failed\n", allocs_failed);
    exit(1);
  }
  #undef CHECK_ALLOC

  *HVM.spos = 0;
  *HVM.size = 1;
  *HVM.itrs = 0;
  *HVM.frsh = 0x20;
  HVM.book[SUP_F] = SUP_f;
  HVM.book[DUP_F] = DUP_f;
  HVM.book[LOG_F] = LOG_f;
  for (int i = 0; i < 65536; i++) {
    HVM.cari[i] = 0;
    HVM.clen[i] = 0;
    HVM.cadt[i] = 0;
    HVM.fari[i] = 0;
  }
}

static void hvm_munmap(void *ptr, size_t size, const char *name) {
    if (ptr != MAP_FAILED) {
        if (munmap(ptr, size) == -1) {
            perror("munmap failed");
        }
    } else {
        printf("%s is already null or invalid.\n", name);
    }
}

void hvm_free() {
    hvm_munmap(HVM.sbuf, MAX_STACK_SIZE * sizeof(Term), "sbuf");
    hvm_munmap(HVM.heap, MAX_HEAP_SIZE  * sizeof(Term), "heap");
    hvm_munmap(HVM.spos, sizeof(u64), "spos");
    hvm_munmap(HVM.size, sizeof(u64), "size");
    hvm_munmap(HVM.itrs, sizeof(u64), "itrs");
    hvm_munmap(HVM.frsh, sizeof(u64), "frsh");
}


================================================
FILE: src/HVM/runtime/prim/DUP.c
================================================
#include "Runtime.h"

// Primitive: Dynamic Dup `@DUP(lab val λdp0λdp1(bod))`
// Creates a DUP node with given label.
Term DUP_f(Term ref) {
  Loc ref_loc = term_loc(ref);
  Term lab = reduce(got(ref_loc + 0));
  Term lab_val = term_loc(lab);
  if (term_tag(lab) != W32) {
    printf("ERROR:non-numeric-dup-label\n");
  }
  if (lab_val > 0xFFFF) {
    printf("ERROR:dup-label-too-large\n");
  }
  Term val = got(ref_loc + 1);
  Term bod = got(ref_loc + 2);
  Loc  dup = alloc_node(1);
  set(dup + 0, val);
  if (term_tag(bod) == LAM) {
    Loc  lam0 = term_loc(bod);
    Term bod0 = got(lam0 + 0);
    if (term_tag(bod0) == LAM) {
      Loc  lam1 = term_loc(bod0);
      Term bod1 = got(lam1 + 0);
      sub(lam0 + 0, term_new(DP0, lab_val, dup));
      sub(lam1 + 0, term_new(DP1, lab_val, dup));
      *HVM.itrs += 3;
      return bod1;
    }
  }
  Loc app0 = alloc_node(2);
  set(app0 + 0, bod);
  set(app0 + 1, term_new(DP0, lab_val, dup));
  Loc app1 = alloc_node(2);
  set(app1 + 0, term_new(APP, 0, app0));
  set(app1 + 1, term_new(DP1, lab_val, dup));
  *HVM.itrs += 1;
  return term_new(APP, 0, app1);
}


================================================
FILE: src/HVM/runtime/prim/LOG.c
================================================
#include "Runtime.h"

Term LOG_f(Term ref) {
  printf("TODO: LOG_f");
  exit(0);
}


================================================
FILE: src/HVM/runtime/prim/SUP.c
================================================
#include "Runtime.h"

// Primitive: Dynamic Sup `@SUP(lab tm0 tm1)`
// Allocates a new SUP node with given label.
Term SUP_f(Term ref) {
  Loc ref_loc = term_loc(ref);
  Term lab = reduce(got(ref_loc + 0));
  Term lab_val = term_loc(lab);
  if (term_tag(lab) != W32) {
    printf("ERROR:non-numeric-sup-label\n");
  }
  if (lab_val > 0xFFFF) {
    printf("ERROR:sup-label-too-large\n");
  }
  Term tm0 = got(ref_loc + 1);
  Term tm1 = got(ref_loc + 2);
  Loc  sup = alloc_node(2);
  Term ret = term_new(SUP, lab_val, sup);
  set(sup + 0, tm0);
  set(sup + 1, tm1);
  *HVM.itrs += 1;
  return ret;
}


================================================
FILE: src/HVM/runtime/print.c
================================================
#include "Runtime.h"

void print_tag(Tag tag) {
  switch (tag) {
    case VAR: printf("VAR"); break;
    case DP0: printf("DP0"); break;
    case DP1: printf("DP1"); break;
    case APP: printf("APP"); break;
    case LAM: printf("LAM"); break;
    case ERA: printf("ERA"); break;
    case SUP: printf("SUP"); break;
    case REF: printf("REF"); break;
    case LET: printf("LET"); break;
    case CTR: printf("CTR"); break;
    case MAT: printf("MAT"); break;
    case IFL: printf("IFL"); break;
    case SWI: printf("SWI"); break;
    case W32: printf("W32"); break;
    case CHR: printf("CHR"); break;
    case OPX: printf("OPX"); break;
    case OPY: printf("OPY"); break;
    case INC: printf("INC"); break;
    case DEC: printf("DEC"); break;
    default : printf("???"); break;
  }
}

void print_term(Term term) {
  printf("term_new(");
  print_tag(term_tag(term));
  printf(",0x%04llx,0x%010llx)", (u64)term_lab(term), (u64)term_loc(term));
}

void print_heap() {
  for (Loc i = 0; i < *HVM.size; i++) {
    Term term = got(i);
    if (term != 0) {
      printf("set(0x%08llx, ", (u64)i);
      print_term(term);
      printf(");\n");
    }
  }
}


================================================
FILE: src/HVM/runtime/reduce/app_ctr.c
================================================
#include "Runtime.h"

// &L(#{x y z ...} a)
// ------------------ APP-CTR
// ⊥
Term reduce_app_ctr(Term app, Term ctr) {
  printf("invalid:app-ctr(%lu)", (unsigned long)term_lab(ctr));
  exit(0);
}


================================================
FILE: src/HVM/runtime/reduce/app_era.c
================================================
#include "Runtime.h"

// (* a)
// ------- APP-ERA
// *
Term reduce_app_era(Term app, Term era) {
  inc_itr();
  return era;
}


================================================
FILE: src/HVM/runtime/reduce/app_lam.c
================================================
#include "Runtime.h"

// (λx(body) arg)
// ---------------- APP-LAM
// x <- arg
// body
Term reduce_app_lam(Term app, Term lam) {
  inc_itr();
  Loc app_loc = term_loc(app);
  Loc lam_loc = term_loc(lam);
  Term bod    = got(lam_loc + 0);
  Term arg    = got(app_loc + 1);
  sub(lam_loc + 0, arg);
  return bod;
}


================================================
FILE: src/HVM/runtime/reduce/app_sup.c
================================================
#include "Runtime.h"

// (&L{a b} c)
// --------------------- APP-SUP
// ! &L{x0 x1} = c
// &L{(a x0) (b x1)}
Term reduce_app_sup(Term app, Term sup) {
  inc_itr();
  Loc app_loc = term_loc(app);
  Loc sup_loc = term_loc(sup);
  Lab sup_lab = term_lab(sup);

  Term arg    = got(app_loc + 1);
  Term tm1    = got(sup_loc + 1);

  Loc loc = alloc_node(3);
  Loc ap0 = sup_loc;
  Loc ap1 = loc + 0;
  Loc su0 = app_loc;
  Loc dup = loc + 2;

  set(ap0 + 1, term_new(DP0, sup_lab, dup));

  set(ap1 + 0, tm1);
  set(ap1 + 1, term_new(DP1, sup_lab, dup));

  // Reuse app_loc for the result superposition
  set(su0 + 0, term_new(APP, 0, ap0));
  set(su0 + 1, term_new(APP, 0, ap1));

  set(dup + 0, arg);

  return term_new(SUP, sup_lab, su0);
}


================================================
FILE: src/HVM/runtime/reduce/app_una.c
================================================
#include "Runtime.h"

// (↑f x) / (↓f x)
// ---------------- APP-INC/DEC
// ↑(f x) / ↓(f x)
Term reduce_app_una(Term app, Term una, Tag tag) {
  inc_itr();
  Loc app_loc = term_loc(app);
  Loc una_loc = term_loc(una);
  Term fun    = got(una_loc + 0);
  Term arg    = got(app_loc + 1);

  // build the inner application in-place, re-using app_loc
  set(app_loc + 0, fun);
  set(app_loc + 1, arg);

  // point INC/DEC to the freshly built APP
  set(una_loc + 0, term_new(APP, 0, app_loc));
  return una;
}

Term reduce_app_inc(Term app, Term inc) {
  return reduce_app_una(app, inc, INC);
}

Term reduce_app_dec(Term app, Term dec) {
  return reduce_app_una(app, dec, DEC);
}


================================================
FILE: src/HVM/runtime/reduce/app_w32.c
================================================
#include "Runtime.h"

// &L(123 a)
// --------- APP-W32
// ⊥
Term reduce_app_w32(Term app, Term w32) {
  printf("invalid:app-w32(%llu)", (unsigned long long)term_loc(w32));
  exit(0);
}


================================================
FILE: src/HVM/runtime/reduce/dup_ctr.c
================================================
#include "Runtime.h"

// ! &L{x y} = #{a b c ...}
// ------------------------ DUP-CTR
// ! &L{a0 a1} = a
// ! &L{b0 b1} = b
// ! &L{c0 c1} = c
// ...
// x <- #{a0 b0 c0 ...} 
// y <- #{a1 b1 c1 ...}
Term reduce_dup_ctr(Term dup, Term ctr) {
  inc_itr();
  Loc dup_loc = term_loc(dup);
  Lab dup_lab = term_lab(dup);
  Loc ctr_loc = term_loc(ctr);
  Lab ctr_lab = term_lab(ctr);
  u64 ctr_ari = HVM.cari[ctr_lab];

  Loc loc     = alloc_node(ctr_ari * 2);
  Loc ctr0    = ctr_loc;
  Loc ctr1    = loc + 0;
  for (u64 i = 0; i < ctr_ari; i++) {
    Loc du0 = loc + ctr_ari + i;
    set(du0 + 0, got(ctr_loc + i));
    set(ctr0 + i, term_new(DP0, dup_lab, du0));
    set(ctr1 + i, term_new(DP1, dup_lab, du0));
  }
  if (term_tag(dup) == DP0) {
    sub(dup_loc + 0, term_new(CTR, ctr_lab, ctr1));
    return term_new(CTR, ctr_lab, ctr0);
  } else {
    sub(dup_loc + 0, term_new(CTR, ctr_lab, ctr0));
    return term_new(CTR, ctr_lab, ctr1);
  }
}


================================================
FILE: src/HVM/runtime/reduce/dup_era.c
================================================
#include "Runtime.h"

// ! &L{x y} = *
// ------------- DUP-ERA
// x <- *
// y <- *
Term reduce_dup_era(Term dup, Term era) {
  inc_itr();
  Loc dup_loc = term_loc(dup);
  sub(dup_loc + 0, era);
  return era;
}


================================================
FILE: src/HVM/runtime/reduce/dup_lam.c
================================================
#include "Runtime.h"

// ! &L{r s} = λx(f)
// ------------------- DUP-LAM
// ! &L{f0 f1} = f
// r <- λx0(f0)
// s <- λx1(f1)
// x <- &L{x0 x1}
Term reduce_dup_lam(Term dup, Term lam) {
  inc_itr();
  Loc dup_loc = term_loc(dup);
  Loc lam_loc = term_loc(lam);
  Lab dup_lab = term_lab(dup);

  Term bod    = got(lam_loc + 0);
  
  Loc loc     = alloc_node(5);
  Loc lm0     = loc + 0;
  Loc lm1     = loc + 1;
  Loc su0     = loc + 2;
  Loc du0     = loc + 4;

  sub(lam_loc + 0, term_new(SUP, dup_lab, su0));

  set(lm0 + 0, term_new(DP0, dup_lab, du0));
  set(lm1 + 0, term_new(DP1, dup_lab, du0));
  set(su0 + 0, term_new(VAR, 0, lm0));
  set(su0 + 1, term_new(VAR, 0, lm1));
  set(du0 + 0, bod);

  if (term_tag(dup) == DP0) {
    sub(dup_loc + 0, term_new(LAM, 0, lm1));
    return term_new(LAM, 0, lm0);
  } else {
    sub(dup_loc + 0, term_new(LAM, 0, lm0));
    return term_new(LAM, 0, lm1);
  }
}


================================================
FILE: src/HVM/runtime/reduce/dup_ref.c
================================================
#include "Runtime.h"

// ! &L{x y} = @foo(a b c ...)
// --------------------------- DUP-REF-COPY (when &L not in @foo)
// ! &L{a0 a1} = a
// ! &L{b0 b1} = b
// ! &L{c0 c1} = c
// ...
// x <- @foo(a0 b0 c0 ...)
// y <- @foo(a1 b1 c1 ...)
Term reduce_dup_ref(Term dup, Term ref) {
  inc_itr();
  Loc dup_loc = term_loc(dup);
  Lab dup_lab = term_lab(dup);
  Loc ref_loc = term_loc(ref);
  Lab ref_lab = term_lab(ref);
  u64 ref_ari = HVM.fari[ref_lab];

  Loc loc     = alloc_node(ref_ari * 2);
  Loc ref0    = ref_loc;
  Loc ref1    = loc + 0;
  for (u64 i = 0; i < ref_ari; i++) {
    Loc du0 = loc + ref_ari + i;
    set(du0 + 0, got(ref_loc + i));
    set(ref0 + i, term_new(DP0, dup_lab, du0));
    set(ref1 + i, term_new(DP1, dup_lab, du0));
  }
  if (term_tag(dup) == DP0) {
    sub(dup_loc + 0, term_new(REF, ref_lab, ref1));
    return term_new(REF, ref_lab, ref0);
  } else {
    sub(dup_loc + 0, term_new(REF, ref_lab, ref0));
    return term_new(REF, ref_lab, ref1);
  }
}


================================================
FILE: src/HVM/runtime/reduce/dup_sup.c
================================================
#include "Runtime.h"

// ! &L{x y} = &R{a b}
// ------------------- DUP-SUP
// if L == R:
//   x <- a
//   y <- b
// else:
//   x <- &R{a0 b0} 
//   y <- &R{a1 b1}
//   ! &L{a0 a1} = a
//   ! &L{b0 b1} = b
Term reduce_dup_sup(Term dup, Term sup) {
  inc_itr();
  Loc dup_loc = term_loc(dup);
  Lab dup_lab = term_lab(dup);
  Lab sup_lab = term_lab(sup);
  Loc sup_loc = term_loc(sup);
  if (dup_lab == sup_lab) {
    Term tm0 = got(sup_loc + 0);
    Term tm1 = got(sup_loc + 1);
    if (term_tag(dup) == DP0) {
      sub(dup_loc + 0, tm1);
      return tm0;
    } else {
      sub(dup_loc + 0, tm0);
      return tm1;
    }
  } else {
    Loc loc = alloc_node(4);
    Loc du0 = sup_loc + 0;
    Loc du1 = sup_loc + 1;
    Loc su0 = loc + 0;
    Loc su1 = loc + 2;
    set(su0 + 0, term_new(DP0, dup_lab, du0));
    set(su0 + 1, term_new(DP0, dup_lab, du1));
    set(su1 + 0, term_new(DP1, dup_lab, du0));
    set(su1 + 1, term_new(DP1, dup_lab, du1));
    if (term_tag(dup) == DP0) {
      sub(dup_loc + 0, term_new(SUP, sup_lab, su1));
      return term_new(SUP, sup_lab, su0);
    } else {
      sub(dup_loc + 0, term_new(SUP, sup_lab, su0));
      return term_new(SUP, sup_lab, su1);
    }
  }
}


================================================
FILE: src/HVM/runtime/reduce/dup_una.c
================================================
#include "Runtime.h"

// ! &L{a b} = ↑x / ↓x
// ------------- DUP-INC/DEC
// ! &L{A B} = x
// a <- ↑A / ↓A
// b <- ↑B / ↓B
Term reduce_dup_una(Term dup, Term una, Tag tag) {
  inc_itr();
  Loc dup_loc = term_loc(dup);
  Lab lab     = term_lab(dup);
  Loc una_loc = term_loc(una);
  Term inner  = got(una_loc + 0);

  // duplicate inner value
  Loc du_loc = una_loc;
  Loc w0_loc = alloc_node(1);
  Loc w1_loc = alloc_node(1);

  // wrap duplicates in INC / DEC
  set(w0_loc + 0, term_new(DP0, lab, du_loc));
  set(w1_loc + 0, term_new(DP1, lab, du_loc));

  if (term_tag(dup) == DP0) {
    sub(dup_loc + 0, term_new(tag, 0, w1_loc));
    return term_new(tag, 0, w0_loc);
  } else {
    sub(dup_loc + 0, term_new(tag, 0, w0_loc));
    return term_new(tag, 0, w1_loc);
  }
}

Term reduce_dup_inc(Term dup, Term inc) {
  return reduce_dup_una(dup, inc, INC);
}

Term reduce_dup_dec(Term dup, Term dec) {
  return reduce_dup_una(dup, dec, DEC);
}


================================================
FILE: src/HVM/runtime/reduce/dup_w32.c
================================================
#include "Runtime.h"

// ! &L{x y} = 123
// --------------- DUP-W32
// x <- 123
// y <- 123
Term reduce_dup_w32(Term dup, Term w32) {
  inc_itr();
  Loc dup_loc = term_loc(dup);
  sub(dup_loc + 0, w32);
  return w32;
}


================================================
FILE: src/HVM/runtime/reduce/let.c
================================================
#include "Runtime.h"

// ! x = val
// bod
// --------- LET
// x <- val
// bod
Term reduce_let(Term let, Term val) {
  inc_itr();
  Loc let_loc = term_loc(let);
  Term bod    = got(let_loc + 1);
  sub(let_loc + 0, val);
  return bod;
}


================================================
FILE: src/HVM/runtime/reduce/mat_ctr.c
================================================
#include "Runtime.h"

Term reduce_mat_ctr(Term mat, Term ctr) {
  inc_itr();
  Tag mat_tag = term_tag(mat);
  Loc mat_loc = term_loc(mat);
  Lab mat_lab = term_lab(mat);
  // If-Let
  if (mat_tag == IFL) {
    Loc ctr_loc = term_loc(ctr);
    Lab ctr_lab = term_lab(ctr);
    u64 mat_ctr = mat_lab;
    u64 ctr_num = ctr_lab;
    u64 ctr_ari = HVM.cari[ctr_num];
    if (mat_ctr == ctr_num) {
      Term app = got(mat_loc + 1);
      Loc loc = alloc_node(ctr_ari * 2);
      for (u64 i = 0; i < ctr_ari; i++) {
        Loc new_app = loc + i * 2;
        set(new_app + 0, app);
        set(new_app + 1, got(ctr_loc + i));
        app = term_new(APP, 0, new_app);
      }
      return app;
    } else {
      Term app = got(mat_loc + 2);
      Loc new_app = mat_loc;
      set(new_app + 0, app);
      set(new_app + 1, ctr);
      app = term_new(APP, 0, new_app);
      return app;
    }
  // Match
  } else {
    Loc ctr_loc = term_loc(ctr);
    Lab ctr_lab = term_lab(ctr);
    u64 ctr_num = ctr_lab;
    u64 ctr_ari = HVM.cari[ctr_num];
    u64 mat_ctr = mat_lab;
    u64 cadt = HVM.cadt[mat_ctr];
    u64 clen = HVM.clen[mat_ctr];
    if (ctr_num < cadt || ctr_num >= cadt + clen) {
      printf("invalid:mat-ctr(%llu, %llu)\n", (unsigned long long)ctr_num, (unsigned long long)cadt);
      exit(1);
    }
    u64 cse_idx = ctr_num - mat_ctr;
    Term app = got(mat_loc + 1 + cse_idx);
    Loc loc = alloc_node(ctr_ari * 2);
    for (u64 i = 0; i < ctr_ari; i++) {
      Loc new_app = loc + i * 2;
      set(new_app + 0, app);
      set(new_app + 1, got(ctr_loc + i));
      app = term_new(APP, 0, new_app);
    }
    return app;
  }
}


================================================
FILE: src/HVM/runtime/reduce/mat_era.c
================================================
#include "Runtime.h"

// ~ * {K0 K1 K2 ...} 
// ------------------ MAT-ERA
// *
Term reduce_mat_era(Term mat, Term era) {
  inc_itr();
  return era;
}


================================================
FILE: src/HVM/runtime/reduce/mat_lam.c
================================================
#include "Runtime.h"

// ~ λx(x) {K0 K1 K2 ...}
// ------------------------ MAT-LAM
// ⊥
Term reduce_mat_lam(Term mat, Term lam) {
  printf("invalid:mat-lam");
  exit(0);
}


================================================
FILE: src/HVM/runtime/reduce/mat_sup.c
================================================
#include "Runtime.h"

// ~ &L{x y} {K0 K1 K2 ...}
// ------------------------ MAT-SUP
// ! &L{k0a k0b} = K0
// ! &L{k1a k1b} = K1
// ! &L{k2a k2b} = K2
// ...
// &L{ ~ x {K0a K1a K2a ...}
//     ~ y {K0b K1b K2b ...} }
Term reduce_mat_sup(Term mat, Term sup) {
  inc_itr();
  Tag mat_tag = term_tag(mat);
  Lab mat_lab = term_lab(mat);
  Loc mat_loc = term_loc(mat);
  Loc sup_loc = term_loc(sup);
  Lab sup_lab = term_lab(sup);

  Term tm0    = got(sup_loc + 0);
  Term tm1    = got(sup_loc + 1);
  u64 mat_len = mat_tag == SWI ? mat_lab : mat_tag == IFL ? 2 : HVM.clen[mat_lab];

  Loc loc     = alloc_node(1 + mat_len + mat_len);
  Loc mat0    = mat_loc;
  Loc mat1    = loc + 0;
  Loc sup0    = sup_loc;

  set(mat0 + 0, tm0);
  set(mat1 + 0, tm1);
  for (u64 i = 0; i < mat_len; i++) {
    Loc du0 = loc + 1 + mat_len + i;
    set(du0 + 0, got(mat_loc + 1 + i));
    set(mat0 + 1 + i, term_new(DP0, sup_lab, du0));
    set(mat1 + 1 + i, term_new(DP1, sup_lab, du0));
  }
  set(sup0 + 0, term_new(mat_tag, mat_lab, mat0));
  set(sup0 + 1, term_new(mat_tag, mat_lab, mat1));
  return term_new(SUP, sup_lab, sup0);
}


================================================
FILE: src/HVM/runtime/reduce/mat_una.c
================================================
#include "Runtime.h"

// ~(↑x) {…} / ~(↓x) {…}
//  →  ↑(~x {…}) / ↓(~x {…})
Term reduce_mat_una(Term mat, Term una, Tag tag) {
  inc_itr();
  Loc mat_loc = term_loc(mat);
  Loc una_loc = term_loc(una);
  Term inner  = got(una_loc + 0);

  set(mat_loc + 0, inner);       // plug x inside the matcher
  set(una_loc + 0, mat);         // re-attach wrapped matcher
  return una;
}

Term reduce_mat_inc(Term mat, Term inc) {
  return reduce_mat_una(mat, inc, INC);
}

Term reduce_mat_dec(Term mat, Term dec) {
  return reduce_mat_una(mat, dec, DEC);
}


================================================
FILE: src/HVM/runtime/reduce/mat_w32.c
================================================
#include "Runtime.h"

// ~ num {K0 K1 K2 ... KN}
// ----------------------- MAT-W32
// if n < N: Kn
// else    : KN(num-N)
Term reduce_mat_w32(Term mat, Term w32) {
  inc_itr();
  Loc mat_loc = term_loc(mat);
  Lab mat_lab = term_lab(mat);
  u64 mat_len = mat_lab;
  u64 w32_val = term_loc(w32);
  if (w32_val < mat_len - 1) {
    return got(mat_loc + 1 + w32_val);
  } else {
    Term fn = got(mat_loc + mat_len);
    Loc app = mat_loc;
    set(app + 0, fn);
    set(app + 1, term_new(W32, 0, w32_val - (mat_len - 1)));
    return term_new(APP, 0, app);
  }
}


================================================
FILE: src/HVM/runtime/reduce/opx_ctr.c
================================================
#include "Runtime.h"

// <op(#{x0 x1 x2...} y)
// --------------------- OPX-CTR
// ⊥
Term reduce_opx_ctr(Term opx, Term ctr) {
  printf("invalid:opx-ctr");
  exit(0);
}


================================================
FILE: src/HVM/runtime/reduce/opx_era.c
================================================
#include "Runtime.h"

// <op(* b)
// -------- OPX-ERA
// *
Term reduce_opx_era(Term opx, Term era) {
  inc_itr();
  return era;
}


================================================
FILE: src/HVM/runtime/reduce/opx_lam.c
================================================
#include "Runtime.h"

// <op(λx(B) y)
// --------------- OPX-LAM
// ⊥
Term reduce_opx_lam(Term opx, Term lam) {
  printf("invalid:opx-lam");
  exit(0);
}


================================================
FILE: src/HVM/runtime/reduce/opx_sup.c
================================================
#include "Runtime.h"

// <op(&L{x0 x1} y)
// ------------------------- OPX-SUP
// ! &L{y0 y1} = y
// &L{<op(x0 y0) <op(x1 y1)}
Term reduce_opx_sup(Term opx, Term sup) {
  inc_itr();
  Loc opx_loc = term_loc(opx);
  Loc sup_loc = term_loc(sup);
  Lab sup_lab = term_lab(sup);
  Term nmy    = got(opx_loc + 1);
  Term tm0    = got(sup_loc + 0);
  Term tm1    = got(sup_loc + 1);
  Loc loc     = alloc_node(3);
  Loc op0     = opx_loc;
  Loc op1     = sup_loc;
  Loc su0     = loc + 0;
  Loc du0     = loc + 2;
  set(op0 + 0, tm0);
  set(op0 + 1, term_new(DP0, sup_lab, du0));
  set(op1 + 0, tm1);
  set(op1 + 1, term_new(DP1, sup_lab, du0));
  set(su0 + 0, term_new(OPX, term_lab(opx), op0));
  set(su0 + 1, term_new(OPX, term_lab(opx), op1));
  set(du0 + 0, nmy);
  return term_new(SUP, sup_lab, su0);
}


================================================
FILE: src/HVM/runtime/reduce/opx_una.c
================================================
#include "Runtime.h"

// <op(↑x y) / <op(↓x y)  →  ↑<op(x y) / ↓<op(x y)
Term reduce_opx_una(Term opx, Term una, Tag tag) {
  inc_itr();
  Loc opx_loc = term_loc(opx);
  Loc una_loc = term_loc(una);
  Term lhs    = got(una_loc + 0);
  Term rhs    = got(opx_loc + 1);         // already stored

  set(opx_loc + 0, lhs);
  set(opx_loc + 1, rhs);
  set(una_loc + 0, term_new(OPX, term_lab(opx), opx_loc));
  return una;
}

Term reduce_opx_inc(Term opx, Term inc) {
  return reduce_opx_una(opx, inc, INC);
}

Term reduce_opx_dec(Term opx, Term dec) {
  return reduce_opx_una(opx, dec, DEC);
}


================================================
FILE: src/HVM/runtime/reduce/opx_w32.c
================================================
#include "Runtime.h"

// <op(x0 x1)
// ---------- OPX-W32
// >op(x0 x1)
Term reduce_opx_w32(Term opx, Term nmx) {
  inc_itr();
  Lab opx_lab = term_lab(opx);
  Loc opx_loc = term_loc(opx);
  Term nmy = got(opx_loc + 1);
  set(opx_loc + 0, nmy);
  set(opx_loc + 1, nmx);
  return term_new(OPY, opx_lab, opx_loc);
}


================================================
FILE: src/HVM/runtime/reduce/opy_ctr.c
================================================
#include "Runtime.h"

// >op(#{x y z ...} b)
// ---------------------- OPY-CTR
// ⊥
Term reduce_opy_ctr(Term opy, Term ctr) {
  printf("invalid:opy-ctr");
  exit(0);
}


================================================
FILE: src/HVM/runtime/reduce/opy_era.c
================================================
#include "Runtime.h"

// >op(a *)
// -------- OPY-ERA
// *
Term reduce_opy_era(Term opy, Term era) {
  inc_itr();
  return era;
}


================================================
FILE: src/HVM/runtime/reduce/opy_lam.c
================================================
#include "Runtime.h"

// >op(a λx(B))
// ------------ OPY-LAM
// ⊥
Term reduce_opy_lam(Term opy, Term era) {
  printf("invalid:opy-lam");
  exit(0);
}


================================================
FILE: src/HVM/runtime/reduce/opy_sup.c
================================================
#include "Runtime.h"

// >op(a &L{x y})
// --------------------- OPY-SUP
// &L{>op(a x) >op(a y)}
Term reduce_opy_sup(Term opy, Term sup) {
  inc_itr();
  Loc opy_loc = term_loc(opy);
  Loc sup_loc = term_loc(sup);
  Lab sup_lab = term_lab(sup);
  Term nmx    = got(opy_loc + 1);
  Term tm0    = got(sup_loc + 0);
  Term tm1    = got(sup_loc + 1);
  Loc op0     = sup_loc;
  Loc op1     = opy_loc;
  Loc su0     = alloc_node(2);
  set(op0 + 1, nmx);
  set(op1 + 0, tm1);
  set(su0 + 0, term_new(OPY, term_lab(opy), op0));
  set(su0 + 1, term_new(OPY, term_lab(opy), op1));
  return term_new(SUP, sup_lab, su0);
}


================================================
FILE: src/HVM/runtime/reduce/opy_una.c
================================================
#include "Runtime.h"

// >op(a ↑y) / >op(a ↓y)  →  ↑>op(a y) / ↓>op(a y)
Term reduce_opy_una(Term opy, Term una, Tag tag) {
  inc_itr();
  Loc opy_loc = term_loc(opy);
  Loc una_loc = term_loc(una);
  Term rhs    = got(una_loc + 0);
  Term lhs    = got(opy_loc + 1);         // first operand stored at +1

  set(opy_loc + 0, rhs);
  set(opy_loc + 1, lhs);
  set(una_loc + 0, term_new(OPY, term_lab(opy), opy_loc));
  return una;
}

Term reduce_opy_inc(Term opy, Term inc) {
  return reduce_opy_una(opy, inc, INC);
}

Term reduce_opy_dec(Term opy, Term dec) {
  return reduce_opy_una(opy, dec, DEC);
}


================================================
FILE: src/HVM/runtime/reduce/opy_w32.c
================================================
#include "Runtime.h"

// >op(x y)
// --------- OPY-W32
// x op y
Term reduce_opy_w32(Term opy, Term w32) {
  inc_itr();
  Loc opy_loc = term_loc(opy);
  Tag t = term_tag(w32);
  u32 x = term_loc(got(opy_loc + 1));
  u32 y = term_loc(w32);
  u32 result;
  switch (term_lab(opy)) {
    case OP_ADD: result = x + y; break;
    case OP_SUB: result = x - y; break;
    case OP_MUL: result = x * y; break;
    case OP_DIV: result = x / y; break;
    case OP_MOD: result = x % y; break;
    case OP_EQ:  result = x == y; break;
    case OP_NE:  result = x != y; break;
    case OP_LT:  result = x < y; break;
    case OP_GT:  result = x > y; break;
    case OP_LTE: result = x <= y; break;
    case OP_GTE: result = x >= y; break;
    case OP_AND: result = x & y; break;
    case OP_OR:  result = x | y; break;
    case OP_XOR: result = x ^ y; break;
    case OP_LSH: result = x << y; break;
    case OP_RSH: result = x >> y; break;
    default: {
      printf("invalid:opy-w32");
      exit(0);
    }
  }
  return term_new(t, 0, result);
}


================================================
FILE: src/HVM/runtime/reduce/ref.c
================================================
#include "Runtime.h"

// @foo(a b c ...)
// -------------------- REF
// book[foo](a b c ...)
Term reduce_ref(Term ref) {
  inc_itr();
  return HVM.book[term_lab(ref)](ref);
}


================================================
FILE: src/HVM/runtime/reduce/ref_sup.c
================================================
#include "Runtime.h"

// @foo(&L{ax ay} b c ...)
// ----------------------- REF-SUP-COPY (when @L not in @foo)
// ! &L{bx by} = b
// ! &L{cx cy} = b
// ...
// &L{@foo(ax bx cx ...) @foo(ay by cy ...)}
Term reduce_ref_sup(Term ref, u16 idx) {
  inc_itr();
  Loc ref_loc = term_loc(ref);
  Lab ref_lab = term_lab(ref);
  u16 fun_id = ref_lab;
  u16 arity  = HVM.fari[fun_id];
  if (idx >= arity) {
    printf("ERROR: Invalid index in reduce_ref_sup\n");
    exit(1);
  }
  Term sup = got(ref_loc + idx);
  if (term_tag(sup) != SUP) {
    printf("ERROR: Expected SUP at index %u\n", idx);
    exit(1);
  }
  Lab sup_lab = term_lab(sup);
  Loc sup_loc = term_loc(sup);
  Term sup0 = got(sup_loc + 0);
  Term sup1 = got(sup_loc + 1);
  // Allocate space for new REF node arguments for the second branch
  Loc ref1_loc = alloc_node(arity);
  for (u64 i = 0; i < arity; ++i) {
    if (i != idx) {
      // Duplicate argument
      Term arg = got(ref_loc + i);
      Loc dup_loc = alloc_node(1);
      set(dup_loc + 0, arg);
      set(ref_loc + i, term_new(DP0, sup_lab, dup_loc));
      set(ref1_loc + i, term_new(DP1, sup_lab, dup_loc));
    } else {
      // Set the SUP components directly
      set(ref_loc + i, sup0);
      set(ref1_loc + i, sup1);
    }
  }
  // Create new REF nodes
  Term ref0 = term_new(REF, ref_lab, ref_loc);
  Term ref1 = term_new(REF, ref_lab, ref1_loc);
  // Reuse sup_loc to create the new SUP node
  set(sup_loc + 0, ref0);
  set(sup_loc + 1, ref1);
  return term_new(SUP, sup_lab, sup_loc);
}


================================================
FILE: src/HVM/runtime/reduce.c
================================================
#include "Runtime.h"

// Core reducer and helpers (dispatcher, WHNF, normal form)

Term reduce(Term term) {
  if (term_tag(term) >= ERA) return term;
  Term  next = term;
  u64   stop = *HVM.spos;
  u64   spos = stop;
  Term* sbuf = HVM.sbuf;

  while (1) {
    Tag tag = term_tag(next);
    Lab lab = term_lab(next);
    Loc loc = term_loc(next);

    // On variables: substitute
    // On eliminators: move to field
    switch (tag) {
      case LET: {
        switch (lab) {
          case LAZY: next = reduce_let(next, got(loc + 0)); continue;
          case STRI: spush(next, sbuf, &spos); next = got(loc + 0); continue;
          default:  printf("invalid:let"); exit(0);
        }
      }
      case APP:
      case MAT:
      case IFL:
      case SWI:
      case OPX:
      case OPY: { spush(next, sbuf, &spos); next = got(loc + 0); continue; }
      case DP0:
      case DP1: {
        Term sub = got(loc + 0);
        if (term_get_bit(sub) == 0) { spush(next, sbuf, &spos); next = sub; continue; }
        next = term_rem_bit(sub); continue;
      }
      case VAR: {
        Term sub = got(loc);
        if (term_get_bit(sub) == 0) break;
        next = term_rem_bit(sub); continue;
      }
      case REF: { *HVM.spos = spos; next = reduce_ref(next); spos = *HVM.spos; continue; }
      default: break;
    }

    // Empty stack: term is in WHNF
    if (spos == stop) { *HVM.spos = spos; return next; }

    // Interaction Dispatcher
    Term prev = spop(sbuf, &spos);
    switch (term_tag(prev)) {
      case LET: next = reduce_let(prev, next); continue;
      case APP: switch (tag) {
        case ERA: next = reduce_app_era(prev, next); continue;
        case LAM: next = reduce_app_lam(prev, next); continue;
        case SUP: next = reduce_app_sup(prev, next); continue;
        case CTR: next = reduce_app_ctr(prev, next); continue;
        case W32: case CHR: next = reduce_app_w32(prev, next); continue;
        case INC: next = reduce_app_inc(prev, next); continue;
        case DEC: next = reduce_app_dec(prev, next); continue;
        default: break;
      }
      case DP0:
      case DP1: switch (tag) {
        case ERA: next = reduce_dup_era(prev, next); continue;
        case LAM: next = reduce_dup_lam(prev, next); continue;
        case SUP: next = reduce_dup_sup(prev, next); continue;
        case CTR: next = reduce_dup_ctr(prev, next); continue;
        case W32: case CHR: next = reduce_dup_w32(prev, next); continue;
        case INC: next = reduce_dup_inc(prev, next); continue;
        case DEC: next = reduce_dup_dec(prev, next); continue;
        default: break;
      }
      case MAT:
      case IFL:
      case SWI: switch (tag) {
        case ERA: next = reduce_mat_era(prev, next); continue;
        case LAM: next = reduce_mat_lam(prev, next); continue;
        case SUP: next = reduce_mat_sup(prev, next); continue;
        case CTR: next = reduce_mat_ctr(prev, next); continue;
        case W32: case CHR: next = reduce_mat_w32(prev, next); continue;
        case INC: next = reduce_mat_inc(prev, next); continue;
        case DEC: next = reduce_mat_dec(prev, next); continue;
        default: break;
      }
      case OPX: switch (tag) {
        case ERA: next = reduce_opx_era(prev, next); continue;
        case LAM: next = reduce_opx_lam(prev, next); continue;
        case SUP: next = reduce_opx_sup(prev, next); continue;
        case CTR: next = reduce_opx_ctr(prev, next); continue;
        case W32: case CHR: next = reduce_opx_w32(prev, next); continue;
        case INC: next = reduce_opx_inc(prev, next); continue;
        case DEC: next = reduce_opx_dec(prev, next); continue;
        default: break;
      }
      case OPY: switch (tag) {
        case ERA: next = reduce_opy_era(prev, next); continue;
        case LAM: next = reduce_opy_lam(prev, next); continue;
        case SUP: next = reduce_opy_sup(prev, next); continue;
        case CTR: next = reduce_opy_ctr(prev, next); continue;
        case W32: case CHR: next = reduce_opy_w32(prev, next); continue;
        case INC: next = reduce_opy_inc(prev, next); continue;
        case DEC: next = reduce_opy_dec(prev, next); continue;
        default: break;
      }
      default: break;
    }

    // No interaction: push term back to stack, update parent chain
    spush(prev, sbuf, &spos);
    while (spos > stop) {
      Term host = spop(sbuf, &spos);
      set(term_loc(host) + 0, next);
      next = host;
    }
    *HVM.spos = spos;
    return next;
  }
}

Term reduce_at(Loc host) {
  Term tm0 = got(host);
  if (term_tag(tm0) >= ERA) return tm0;
  Term tm1 = reduce(tm0);
  set(host, tm1);
  return tm1;
}

Term normal(Term term) {
  Term wnf = reduce(term);
  Tag tag = term_tag(wnf);
  Lab lab = term_lab(wnf);
  Loc loc = term_loc(wnf);
  switch (tag) {
    case LAM: { Term bod = got(loc + 0); bod = normal(bod); set(term_loc(wnf) + 1, bod); return wnf; }
    case APP: { Term fun = got(loc + 0); Term arg = got(loc + 1); fun = normal(fun); arg = normal(arg); set(term_loc(wnf) + 0, fun); set(term_loc(wnf) + 1, arg); return wnf; }
    case SUP: { Term tm0 = got(loc + 0); Term tm1 = got(loc + 1); tm0 = normal(tm0); tm1 = normal(tm1); set(term_loc(wnf) + 0, tm0); set(term_loc(wnf) + 1, tm1); return wnf; }
    case DP0:
    case DP1: { Term val = got(loc + 0); val = normal(val); set(term_loc(wnf) + 0, val); return wnf; }
    case CTR: { u64 cid = lab; u64 ari = HVM.cari[cid]; for (u64 i = 0; i < ari; i++) { Term arg = got(loc + i); arg = normal(arg); set(term_loc(wnf) + i, arg); } return wnf; }
    case MAT:
    case IFL:
    case SWI: { u64 len = tag == SWI ? lab : tag == IFL ? 2 : HVM.clen[lab]; for (u64 i = 0; i <= len; i++) { Term arg = got(loc + i); arg = normal(arg); set(term_loc(wnf) + i, arg); } return wnf; }
    default: return wnf;
  }
}


================================================
FILE: src/HVM/runtime/stack.c
================================================
#include "Runtime.h"

void spush(Term term, Term* sbuf, u64* spos) {
  if (*spos >= MAX_STACK_SIZE) {
    printf("Stack memory limit exceeded\n");
    exit(1);
  }
  sbuf[(*spos)++] = term;
}

Term spop(Term* sbuf, u64* spos) {
  return sbuf[--(*spos)];
}


================================================
FILE: src/HVM/runtime/state.c
================================================
#include "Runtime.h"

State HVM = {
  .sbuf = NULL,
  .spos = NULL,
  .heap = NULL,
  .size = NULL,
  .itrs = NULL,
  .frsh = NULL,
  .book = {NULL},
  .cari = {0},
  .clen = {0},
  .cadt = {0},
  .fari = {0},
};

State* hvm_get_state() {
  return &HVM;
}

void hvm_set_state(State* hvm) {
  HVM.sbuf = hvm->sbuf;
  HVM.spos = hvm->spos;
  HVM.heap = hvm->heap;
  HVM.size = hvm->size;
  HVM.itrs = hvm->itrs;
  HVM.frsh = hvm->frsh;
  for (int i = 0; i < 65536; i++) {
    HVM.book[i] = hvm->book[i];
    HVM.fari[i] = hvm->fari[i];
    HVM.cari[i] = hvm->cari[i];
    HVM.clen[i] = hvm->clen[i];
    HVM.cadt[i] = hvm->cadt[i];
  }
}

void hvm_define(u16 fid, Term (*func)()) {
  HVM.book[fid] = func;
}

void hvm_set_cari(u16 cid, u16 arity) {
  HVM.cari[cid] = arity;
}

void hvm_set_fari(u16 fid, u16 arity) {
  HVM.fari[fid] = arity;
}

void hvm_set_clen(u16 cid, u16 cases) {
  HVM.clen[cid] = cases;
}

void hvm_set_cadt(u16 cid, u16 adt) {
  HVM.cadt[cid] = adt;
}


================================================
FILE: src/HVM/runtime/term.c
================================================
// Term encoding and helpers
// -------------------------
// Layout (least-significant bit on the right):
//   [ 63 ............... 24 ][ 23 .... 8 ][ 7 ][ 6 .... 0 ]
//        location (40b)          lab(16b)  S        tag(7b)
//   - tag: 7-bit node tag (see Runtime.h constants)
//   - S  : substitution/"sub" bit (1 when value is substituted)
//   - lab: constructor/operator label (16 bits)
//   - loc: heap location / payload (40 bits)

#include "Runtime.h"

// Bit masks and shifts for clarity (no behavior change)
#define TAG_MASK   (0x7FULL)
#define SUB_MASK   (1ULL << 7)
#define LAB_MASK   (0xFFFFULL)
#define LOC_MASK   (0xFFFFFFFFFFULL)
#define LAB_SHIFT  (8)
#define LOC_SHIFT  (24)

Term term_new(Tag tag, Lab lab, Loc loc) {
  return ((Term)tag)
       | ((Term)lab << LAB_SHIFT)
       | ((Term)loc << LOC_SHIFT);
}

Tag term_tag(Term x) {
  return (Tag)(x & TAG_MASK);
}

Lab term_lab(Term x) {
  return (Lab)((x >> LAB_SHIFT) & LAB_MASK);
}

Loc term_loc(Term x) {
  return (Loc)((x >> LOC_SHIFT) & LOC_MASK);
}

u64 term_get_bit(Term x) {
  return (x >> 7) & 1ULL;
}

Term term_set_bit(Term x) {
  return x | SUB_MASK;
}

Term term_rem_bit(Term x) {
  return x & ~SUB_MASK;
}

Term term_set_loc(Term x, Loc loc) {
  return (x & 0x0000000000FFFFFFULL) | ((Term)loc << LOC_SHIFT);
}

_Bool term_is_atom(Term t) {
  Tag tg = term_tag(t);
  return (tg == ERA) || (tg == W32) || (tg == CHR);
}