Repository: GiggleLiu/NiLang.jl
Branch: master
Commit: 9f622819bfd6
Files: 114
Total size: 479.4 KB

Directory structure:
gitextract_mdwro7z3/

├── .github/
│   └── workflows/
│       ├── CompatHelper.yml
│       ├── TagBot.yml
│       └── ci.yml
├── .gitignore
├── LICENSE
├── Makefile
├── Project.toml
├── README.md
├── benchmark/
│   ├── besselj_gpu.jl
│   ├── besselj_irreversible.jl
│   ├── besselj_reversible.jl
│   ├── first_function.jl
│   └── stack.jl
├── docs/
│   ├── Project.toml
│   ├── make.jl
│   └── src/
│       ├── api.md
│       ├── extend.md
│       ├── faq.md
│       ├── grammar.md
│       ├── index.md
│       ├── instructions.md
│       ├── tutorial.md
│       └── why.md
├── examples/
│   ├── Adam.jl
│   ├── CUDA/
│   │   ├── README.md
│   │   ├── rotation_gate.jl
│   │   └── swap_gate.jl
│   ├── README.md
│   ├── Symbolics/
│   │   ├── print_jacobians.jl
│   │   ├── symbolic_utils.jl
│   │   └── symlib.jl
│   ├── _sharedwrite.jl
│   ├── batched_tr.jl
│   ├── besselj.jl
│   ├── boxmuller.jl
│   ├── fft.jl
│   ├── fib.jl
│   ├── fixedlog.jl
│   ├── lax_wendroff.jl
│   ├── lognumber.jl
│   ├── nice.jl
│   ├── nice_test.jl
│   ├── port_chainrules.jl
│   ├── port_zygote.jl
│   ├── pyramid.jl
│   ├── qr.jl
│   ├── realnvp.jl
│   ├── sparse.jl
│   └── unitary.jl
├── notebooks/
│   ├── README.md
│   ├── autodiff.jl
│   ├── basic.jl
│   ├── documentation.jl
│   ├── feynman.jl
│   ├── margolus.jl
│   └── reversibleprog.jl
├── src/
│   ├── NiLang.jl
│   ├── autobcast.jl
│   ├── autodiff/
│   │   ├── autodiff.jl
│   │   ├── checks.jl
│   │   ├── complex.jl
│   │   ├── gradfunc.jl
│   │   ├── hessian_backback.jl
│   │   ├── instructs.jl
│   │   ├── jacobian.jl
│   │   ├── stack.jl
│   │   ├── ulog.jl
│   │   └── vars.jl
│   ├── complex.jl
│   ├── deprecations.jl
│   ├── instructs.jl
│   ├── macros.jl
│   ├── stdlib/
│   │   ├── base.jl
│   │   ├── bennett.jl
│   │   ├── blas.jl
│   │   ├── linalg.jl
│   │   ├── mapreduce.jl
│   │   ├── nnlib.jl
│   │   ├── sorting.jl
│   │   ├── sparse.jl
│   │   ├── statistics.jl
│   │   └── stdlib.jl
│   ├── ulog.jl
│   ├── utils.jl
│   ├── vars.jl
│   └── wrappers.jl
└── test/
    ├── autobcast.jl
    ├── autodiff/
    │   ├── autodiff.jl
    │   ├── complex.jl
    │   ├── gradfunc.jl
    │   ├── hessian_backback.jl
    │   ├── instructs.jl
    │   ├── jacobian.jl
    │   ├── manual.jl
    │   ├── stack.jl
    │   ├── ulog.jl
    │   └── vars.jl
    ├── complex.jl
    ├── instructs.jl
    ├── macros.jl
    ├── runtests.jl
    ├── stdlib/
    │   ├── base.jl
    │   ├── bennett.jl
    │   ├── blas.jl
    │   ├── linalg.jl
    │   ├── mapreduce.jl
    │   ├── nnlib.jl
    │   ├── sparse.jl
    │   ├── statistics.jl
    │   └── stdlib.jl
    ├── ulog.jl
    ├── utils.jl
    ├── vars.jl
    └── wrappers.jl

================================================
FILE CONTENTS
================================================

================================================
FILE: .github/workflows/CompatHelper.yml
================================================
name: CompatHelper

on:
  schedule:
    - cron: '00 * * * *'
  issues:
    types: [opened, reopened]

jobs:
  build:
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        julia-version: [1.5]
        julia-arch: [x86]
        os: [ubuntu-latest]
    steps:
      - uses: julia-actions/setup-julia@latest
        with:
          version: ${{ matrix.julia-version }}
      - name: Pkg.add("CompatHelper")
        run: julia -e 'using Pkg; Pkg.add("CompatHelper")'
      - name: CompatHelper.main()
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        run: julia -e 'using CompatHelper; CompatHelper.main()'


================================================
FILE: .github/workflows/TagBot.yml
================================================
name: TagBot
on:
  issue_comment:
    types:
      - created
  workflow_dispatch:
jobs:
  TagBot:
    if: github.event_name == 'workflow_dispatch' || github.actor == 'JuliaTagBot'
    runs-on: ubuntu-latest
    steps:
      - uses: JuliaRegistries/TagBot@v1
        with:
          token: ${{ secrets.GITHUB_TOKEN }}
          ssh: ${{ secrets.DOCUMENTER_KEY }}


================================================
FILE: .github/workflows/ci.yml
================================================
name: CI
on:
  - push
  - pull_request
jobs:
  test:
    name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        version:
          - '1.5'
          - 'nightly'
        os:
          - ubuntu-latest
          - macOS-latest
          - windows-latest
        arch:
          - x64
    steps:
      - uses: actions/checkout@v2
      - uses: julia-actions/setup-julia@v1
        with:
          version: ${{ matrix.version }}
          arch: ${{ matrix.arch }}
      - uses: actions/cache@v1
        env:
          cache-name: cache-artifacts
        with:
          path: ~/.julia/artifacts
          key: ${{ runner.os }}-test-${{ env.cache-name }}-${{ hashFiles('**/Project.toml') }}
          restore-keys: |
            ${{ runner.os }}-test-${{ env.cache-name }}-
            ${{ runner.os }}-test-
            ${{ runner.os }}-
      - uses: julia-actions/julia-buildpkg@v1
      - uses: julia-actions/julia-runtest@v1
      - uses: julia-actions/julia-processcoverage@v1
      - uses: codecov/codecov-action@v1
        with:
          file: lcov.info
  docs:
    name: Documentation
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v2
      - uses: julia-actions/setup-julia@v1
        with:
          version: '1'
      - run: |
          julia --project=docs -e '
            using Pkg
            Pkg.develop(PackageSpec(path=pwd()))
            Pkg.instantiate()'
      - run: |
          julia --project=docs -e '
            using Documenter: doctest
            using NiLang
            doctest(NiLang)'
      - run: julia --project=docs docs/make.jl
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }}


================================================
FILE: .gitignore
================================================
*.jl.*.cov
*.jl.cov
*.jl.mem
.DS_Store
Manifest.toml
/dev/
/docs/build/
/docs/site/
/docs/src/examples/
_local/
*.swp
.vscode/

================================================
FILE: LICENSE
================================================
Copyright (c) 2019 JinGuo Liu, thautwarm

                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "{}"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [year] [fullname]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: Makefile
================================================
JL = julia --project

default: init test

init:
	$(JL) -e 'using Pkg; Pkg.precompile()'
init-docs:
	$(JL) -e 'using Pkg; Pkg.activate("docs"); Pkg.develop(path="."), Pkg.precompile()'

update:
	$(JL) -e 'using Pkg; Pkg.update(); Pkg.precompile()'
update-docs:
	$(JL) -e 'using Pkg; Pkg.activate("docs"); Pkg.update(); Pkg.precompile()'

test:
	$(JL) -e 'using Pkg; Pkg.test("GenericTensorNetworks")'

coverage:
	$(JL) -e 'using Pkg; Pkg.test("GenericTensorNetworks"; coverage=true)'

serve:
	$(JL) -e 'using Pkg; Pkg.activate("docs"); using LiveServer; servedocs(;skip_dirs=["docs/src/assets", "docs/src/generated"], literate_dir="examples")'

clean:
	rm -rf docs/build
	find . -name "*.cov" -type f -print0 | xargs -0 /bin/rm -f

.PHONY: init test coverage serve clean init-docs update update-docs

================================================
FILE: Project.toml
================================================
name = "NiLang"
uuid = "ab4ef3a6-0b42-11ea-31f6-e34652774712"
authors = ["JinGuo Liu", "thautwarm"]
version = "0.9.4"

[deps]
FixedPointNumbers = "53c48c17-4a7d-5ca2-90c5-79b7896eea93"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
LogarithmicNumbers = "aa2f6b4e-9042-5d33-9679-40d3a6b85899"
MLStyle = "d8e11817-5142-5d16-987a-aa16d5891078"
NiLangCore = "575d3204-02a4-11ea-3f62-238caa8bf11e"
Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
TupleTools = "9d95972d-f1c8-5527-a6e0-b4b365fa01f6"

[compat]
FixedPointNumbers = "0.6, 0.7, 0.8"
LogarithmicNumbers = "0.4, 1.0"
MLStyle = "0.4"
NiLangCore = "0.10.1"
Reexport = "0.2, 1.0"
TupleTools = "1.2"
julia = "1.3"

[extras]
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["Test", "Random", "Statistics", "Distributions", "FiniteDifferences"]


================================================
FILE: README.md
================================================
<img src="docs/src/asset/logo3.png" width=500px/>

NiLang.jl (逆lang), is a reversible domain-specific language (DSL) that allow a program to go back to the past.

* Requires Julia version >= 1.3,

NiLang features:

* any program written in NiLang is differentiable,
* a reversible language with abstraction and arrays,
* complex values
* reversible logarithmic number system

![CI](https://github.com/GiggleLiu/NiLang.jl/workflows/CI/badge.svg)
[![codecov](https://codecov.io/gh/GiggleLiu/NiLang.jl/branch/master/graph/badge.svg?token=th86D4USSX)](https://codecov.io/gh/GiggleLiu/NiLang.jl)

The main docs can be found here:
[![](https://img.shields.io/badge/docs-stable-blue.svg)](https://giggleliu.github.io/NiLang.jl/stable/)
[![](https://img.shields.io/badge/docs-dev-blue.svg)](https://giggleliu.github.io/NiLang.jl/dev/)

There are also some Pluto-based notebooks:
* [tutorial](https://giggleliu.github.io/NiLang.jl/dev/notebooks/basic.html)
* [documentation](https://giggleliu.github.io/NiLang.jl/dev/notebooks/documentation.html)
* [Billiard ball model cellular automata](https://giggleliu.github.io/NiLang.jl/dev/notebooks/margolus.html)

> The strangeness of reversible computing is mainly due to
> our lack of experience with it.—Henry Baker, 1992

## To Start
```
pkg> add NiLang
```

## An example: Compute the norm of a vector
```julia
julia> using NiLang

julia> @i function f(res, y, x)
           for i=1:length(x)
               y += x[i] ^ 2
           end
           res += sqrt(y)
       end

julia> res_out, y_out, x_out = f(0.0, 0.0, [1, 2, 3.0])
(3.7416573867739413, 14.0, [1.0, 2.0, 3.0])

julia> (~f)(res_out, y_out, x_out)  # automatically generated inverse program.
(0.0, 0.0, [1.0, 2.0, 3.0])
        
julia> ∂res, ∂y, ∂x = NiLang.AD.gradient(Val(1), f, (0.0, 0.0, [1, 2, 3.0])) 
    # automatic differentiation, `Val(1)` means the first argument of `f` is the loss.
(1.0, 0.1336306209562122, [0.2672612419124244, 0.5345224838248488, 0.8017837257372732])
```

The performance of reversible programming automatic differentiation is much better than most traditional frameworks. Here is why, and how it works,

![how it works](docs/src/asset/adprog.png)

## Check our [paper](https://arxiv.org/abs/2003.04617)

```bibtex
@misc{Liu2020,
    title={Differentiate Everything with a Reversible Programming Language},
    author={Jin-Guo Liu and Taine Zhao},
    year={2020},
    eprint={2003.04617},
    archivePrefix={arXiv},
    primaryClass={cs.PL}
}
```


================================================
FILE: benchmark/besselj_gpu.jl
================================================
using NiLang, NiLang.AD
using CuArrays, CUDAnative, GPUArrays
using BenchmarkTools

@i @inline function :(-=)(CUDAnative.pow)(out!::GVar{T}, x::GVar{T}, n::GVar) where T
    value(out!) -= CUDAnative.pow(value(x), value(n))

    # grad x
    @routine @invcheckoff begin
        @zeros T anc1 anc2 anc3 jac1 jac2

        DEC(value(n))
        anc1 += CUDAnative.pow(value(x), value(n))
        INC(value(n))
        jac1 += anc1 * value(n)

        # get grad of n
        anc2 += log(value(x))
        anc3 += CUDAnative.pow(value(x), value(n))
        jac2 += anc3*anc2
    end
    grad(x) += grad(out!) * jac1
    grad(n) += grad(out!) * jac2
    ~@routine
end

@i @inline function :(-=)(CUDAnative.pow)(out!::GVar{T}, x::GVar, n) where T
    value(out!) -= CUDAnative.pow(value(x), n)
    @routine @invcheckoff begin
        anc1 ← zero(value(x))
        jac ← zero(value(x))

        DEC(value(n))
        anc1 += CUDAnative.pow(value(x), n)
        INC(value(n))
        jac += anc1 * n
    end
    grad(x) += grad(out!) * jac
    ~@routine
end

@i @inline function :(-=)(CUDAnative.pow)(out!::GVar{T}, x, n::GVar) where T
    value(out!) -= CUDAnative.pow(x, value(n))
    # get jac of n
    @routine @invcheckoff begin
        anc1 ← zero(x)
        anc2 ← zero(x)
        jac ← zero(x)

        anc1 += log(x)
        anc2 += CUDAnative.pow(x, value(n))
        jac += anc1*anc2
    end
    grad(n) += grad(out!) * jac
    ~@routine
end


# You need to replace all "^" operations in `ibessel` with `CUDAnative.pow`.
# Please remember to turn invertiblity check off, because error handling is not supported in a cuda thread.
# Function `i_dirtymul` and `i_factorial` are not changed.

@i function ibesselj(out!, ν, z; atol=1e-8)
    @routine @invcheckoff begin
        k ← 0
        fact_nu ← zero(ν)
        halfz ← zero(z)
        halfz_power_nu ← zero(z)
        halfz_power_2 ← zero(z)
        out_anc ← zero(z)
        anc1 ← zero(z)
        anc2 ← zero(z)
        anc3 ← zero(z)
        anc4 ← zero(z)
        anc5 ← zero(z)

        halfz += z / 2
        halfz_power_nu += CUDAnative.pow(halfz, ν)
        halfz_power_2 += CUDAnative.pow(halfz, 2)
        i_factorial(fact_nu, ν)

        anc1 += halfz_power_nu/fact_nu
        out_anc += anc1
        @from k==0 while abs(unwrap(anc1)) > atol && abs(unwrap(anc4)) < atol
            INC(k)
            @routine begin
                anc5 += k
                anc5 += ν
                anc2 -= k * anc5
                anc3 += halfz_power_2 / anc2
            end
            i_dirtymul(anc1, anc3, anc4)
            out_anc += anc1
            ~@routine
        end
    end
    out! += out_anc
    ~@routine
end

# Define your reversible kernel function that calls the reversible bessel function

@i function ibesselj_kernel(out!, ν, z, atol)
    i ← (blockIdx().x-1) * blockDim().x + threadIdx().x
    @inbounds ibesselj(out![i], ν, z[i]; atol=atol)
    @invcheckoff i → (blockIdx().x-1) * blockDim().x + threadIdx().x
end

# To launch this reversible kernel, you also need a reversible host function.

@i function ibesselj(out!::CuVector, ν, z::CuVector; atol=1e-8)
   XY ← GPUArrays.thread_blocks_heuristic(length(out!))
   @cuda threads=XY.:1 blocks=XY.:2 ibesselj_kernel(out!, ν, z, atol)
   @invcheckoff XY → GPUArrays.thread_blocks_heuristic(length(out!))
end

# To test this function, we first define input parameters `a` and output `out!`
N = 4096
T = Float64
a = CuArray(ones(T, N))
out! = CuArray(zeros(T, N))

# We wrap the output with a randomly initialized gradient field, suppose we get the gradients from a virtual loss function.
# Also, we need to initialize an empty gradient field for elements in input cuda tensor `a`.
out! = ibesselj(out!, 2, GVar.(a))[1]
out_g! = GVar.(out!, CuArray(ones(T, N)))
a_g = GVar.(a)

# Call the inverse program, the multiple dispatch will drive you to the goal.
println("Benchmarking NiLang on CUDA, N = $N, T = $T")
display(@benchmark CuArrays.@sync (~ibesselj)($out_g!, 2, $a_g))


================================================
FILE: benchmark/besselj_irreversible.jl
================================================
using Zygote
using ForwardDiff
using BenchmarkTools

function besselj(ν, z; atol=1e-8)
    k = 0
    s = (z/2)^ν / factorial(ν)
    out = s
    while abs(s) > atol
        k += 1
        s *= (-1) / k / (k+ν) * (z/2)^2
        out += s
    end
    out
end

function grad_besselj_manual(ν, z; atol=1e-8)
    (besselj(ν-1, z; atol=atol) - besselj(ν+1, z); atol=atol)/2
end

println("Benchmarking Julia")
display(@benchmark besselj(2, 1.0))
println("Benchmarking Manual")
display(@benchmark grad_besselj_manual(2, 1.0))
println("Benchmarking Zygote")
display(@benchmark Zygote.gradient(besselj, 2, 1.0))
println("Benchmarking ForwardDiff")
display(@benchmark ForwardDiff.derivative(x->besselj(2, x), 1.0))


================================================
FILE: benchmark/besselj_reversible.jl
================================================
using NiLang, NiLang.AD
using BenchmarkTools

include("../exmamples/besselj.jl")

# To test this function, we first define input parameters `a` and output `out!`
a = 1.0
out! = 0.0

# We wrap the output with a randomly initialized gradient field, suppose we get the gradients from a virtual loss function.
# Also, we need to initialize an empty gradient field for elements in input cuda tensor `a`.
out! = ibesselj(out!, 2, a)[1]
out_g! = GVar(out!, 1.0)
a_g = GVar(a)

# Call the inverse program, the multiple dispatch will drive you to the goal.
println("Benchmarking NiLang")
display(@benchmark ibesselj($out!, 2, $a))
println("Benchmarking NiLang.AD")
display(@benchmark (~ibesselj)($out_g!, 2, $a_g))


================================================
FILE: benchmark/first_function.jl
================================================
t1 = time()
using NiLang

@i function dot(x, y, z)
    for i=1:10
        x += y[i]' * z[i]
    end
end
t2 = time()
println("costs $(t2-t1)s")


================================================
FILE: benchmark/stack.jl
================================================


================================================
FILE: docs/Project.toml
================================================
[deps]
BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
ChainRules = "082447d4-558c-5d27-93f4-14fc19e9eca2"
Compose = "a81c6b42-2e10-5240-aca2-a61377ecd94b"
DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
FixedPointNumbers = "53c48c17-4a7d-5ca2-90c5-79b7896eea93"
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306"
LiveServer = "16fef848-5104-11e9-1b77-fb7a48bbb589"
LogarithmicNumbers = "aa2f6b4e-9042-5d33-9679-40d3a6b85899"
NiLang = "ab4ef3a6-0b42-11ea-31f6-e34652774712"
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Viznet = "52a3aca4-6234-47fd-b74a-806bdf78ede9"
Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"


================================================
FILE: docs/make.jl
================================================
using Documenter, NiLang
using SparseArrays

using Literate
tutorialpath = joinpath(@__DIR__, "src/examples")
sourcepath = joinpath(dirname(@__DIR__), "examples")
for jlfile in ["besselj.jl", "sparse.jl", "qr.jl", "port_zygote.jl", "port_chainrules.jl", "fib.jl", "unitary.jl", "nice.jl", "realnvp.jl", "boxmuller.jl", "lognumber.jl", "pyramid.jl"]
    Literate.markdown(joinpath(sourcepath, jlfile), tutorialpath)
end

# # Pluto pages
# import Pkg

# Pkg.add([
# Pkg.PackageSpec(url="https://github.com/GiggleLiu/PlutoUtils.jl", rev="static-export"),
# Pkg.PackageSpec(url="https://github.com/fonsp/Pluto.jl", rev="05e5b68"),
# ]);

makedocs(;
    modules=[NiLang],
    format=Documenter.HTML(),
    pages=[
        "Home" => "index.md",
        "What and Why" => "why.md",
        "Tutorial" => Any[
            "tutorial.md",
            "examples/port_zygote.md",
            "examples/port_chainrules.md"
           ],
        "Examples" => Any[
            "examples/fib.md",
            "examples/pyramid.md",
            "examples/besselj.md",
            "examples/sparse.md",
            "examples/lognumber.md",
            "examples/unitary.md",
            #"examples/nice.md",
            #"examples/realnvp.md",
            "examples/qr.md",
            "examples/boxmuller.md",
           ],
        "API & Manual" => Any[
            "instructions.md",
            "extend.md",
            "api.md",
            "faq.md",
           ]
    ],
    repo="https://github.com/GiggleLiu/NiLang.jl/blob/{commit}{path}#L{line}",
    sitename="NiLang.jl",
    authors="JinGuo Liu, thautwarm",
)

# import PlutoUtils

# PlutoUtils.Export.github_action(; notebook_dir=NiLang.project_relative_path("notebooks"), offer_binder=false, export_dir=NiLang.project_relative_path("docs", "build", "notebooks"), generate_default_index=false, project=NiLang.project_relative_path("docs"))


deploydocs(;
    repo="github.com/GiggleLiu/NiLang.jl.git",
)


================================================
FILE: docs/src/api.md
================================================
```@meta
DocTestSetup = quote
    using NiLangCore, NiLang, NiLang.AD, Test
end
```

# API Manual
## Compiling Tools (Reexported from NiLangCore)
```@autodocs
Modules = [NiLangCore]
Order   = [:macro, :function, :type]
```

## Instructions
```@autodocs
Modules = [NiLang]
Order   = [:macro, :function, :type]
```

## Automatic Differentiation
```@autodocs
Modules = [NiLang.AD]
Order   = [:macro, :function, :type]
```


================================================
FILE: docs/src/extend.md
================================================
# How to extend

## Extend `+=`, `-=` and `⊻=` for irreversible one-out functions

It directly works
```julia
julia> using SpecialFunctions, NiLang

julia> x, y = 2.1, 1.0
(2.1, 1.0)

julia> @instr y += besselj0(x)
2.1

julia> x, y
(2.1, 1.7492472503018073)

julia> @instr ~(y += besselj0(x))
2.1

julia> x, y
(2.1, 1.0)
```

Here the statement
```julia
@instr y += besselj0(x)
```

is mapped to
```julia
@instr y += besselj0(x)
```

However, doing this does not give you correct gradients.
For `y += scalar_out_function(x)`, one can bind the backward rules like

```julia
julia> using ChainRules, NiLang.AD

julia> besselj0_back(x) = ChainRules.rrule(besselj0, x)[2](1.0)[2]
besselj0_back (generic function with 1 method)

julia> primitive_grad(::typeof(besselj0), x::Real) = besselj0_back(x)
primitive_grad (generic function with 1 method)

julia> xg, yg = GVar(x), GVar(y, 1.0)
(GVar(2.1, 0.0), GVar(1.0, 1.0))

julia> @instr yg -= besselj0(xg)
GVar(2.1, -0.5682921357570385)

julia> xg, yg
(GVar(2.1, -0.5682921357570385), GVar(0.8333930196680097, 1.0))

julia> @instr yg += besselj0(xg)
GVar(2.1, 0.0)

julia> xg, yg
(GVar(2.1, 0.0), GVar(1.0, 1.0))

julia> NiLang.AD.check_grad(PlusEq(besselj0), (1.0, 2.1); iloss=1)
true

julia> using BenchmarkTools

julia> @benchmark PlusEq(besselj0)($yg, $xg)
BenchmarkTools.Trial: 
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     451.523 ns (0.00% GC)
  median time:      459.431 ns (0.00% GC)
  mean time:        477.419 ns (0.00% GC)
  maximum time:     857.036 ns (0.00% GC)
  --------------
  samples:          10000
  evals/sample:     197
```

Good!

## Reversible multi-in multi-out functions

It is easy to do, define two normal Julia functions reversible to each other,
using the macro `@dual` to tell the compiler they are reversible to each other.

For example, a pair of dual functions `ROT` (2D rotation) and `IROT` (inverse rotation) that already defined in NiLang.

```julia
"""
    ROT(a!, b!, θ) -> a!', b!', θ
"""
@inline function ROT(i::Real, j::Real, θ::Real)
    a, b = rot(i, j, θ)
    a, b, θ
end

"""
    IROT(a!, b!, θ) -> ROT(a!, b!, -θ)
"""
@inline function IROT(i::Real, j::Real, θ::Real)
    i, j, _ = ROT(i, j, -θ)
    i, j, θ
end
@dual ROT IROT
```

One can easily check the reversibility by typing
```julia
julia> check_inv(ROT, (1.0, 2.0, 3.0))
true
```

For self-reversible functions, one can declare the reversibility for it like this
```julia
"""
    SWAP(a!, b!) -> b!, a!
"""
@inline function SWAP(a!::Real, b!::Real)
    b!, a!
end
@selfdual SWAP
```

To bind gradients for this multi-in, multi-out function.
The general approach is *Binding the backward rule on its inverse*!

```julia
@i @inline function IROT(a!::GVar, b!::GVar, θ::GVar)
    IROT(a!.x, b!.x, θ.x)
    NEG(θ.x)
    θ.x -= π/2
    ROT(a!.g, b!.g, θ.x)
    θ.g += a!.x * a!.g
    θ.g += b!.x * b!.g
    θ.x += π/2
    NEG(θ.x)
    ROT(a!.g, b!.g, π/2)
end

@i @inline function IROT(a!::GVar, b!::GVar, θ::Real)
    IROT(a!.x, b!.x, θ)
    NEG(θ)
    θ -= π/2
    ROT(a!.g, b!.g, θ)
    θ += π/2
    NEG(θ)
    ROT(a!.g, b!.g, π/2)
end

@nograd IROT(a!::Real, b!::Real, θ::GVar)
```

When this inverse function is called, the backward rules are automatically applied.

Good! This method can also be extended to linear algebra functions, however, the memory allocation overhead is high because one need to wrap each element with `GVar`.


================================================
FILE: docs/src/faq.md
================================================
## Why reversibility check fails even though the program is reversible?
Due to the fact that floating pointing numbers are not exactly reversible, sometimes the invertibility check might fail due to the rounding error.

To fix this issue, you may want to make the check less restrictive
```julia
NiLangCore.GLOBAL_ATOL[] = 1e-6  # default is 1e-8
```

Or just turn off the check in the program (only if you are sure the program is correct)
```julia
@routine @invcheckoff begin
    ...
end
```
Turning off the check will make your program faster too!

## What makes the gradient check fails?
##### Finite difference error due to numeric instability
The `NiLang.AD.check_grad` function sometimes fail due to either the rounding error or the finite difference error, you may want to check the gradient manually with the `NiLang.AD.ng` function (numeric gradient).
```julia
julia> NiLang.AD.ng(jin, copy.((out,b,ma,jinzhi,spread,bili)), 6; iloss=1, δ=1e-4)
-5449.643843214744

julia> NiLang.AD.ng(jin, copy.((out,b,ma,jinzhi,spread,bili)), 5; iloss=1, δ=1e-4)
4503-element Array{Float64,1}:
 -0.0023380584934784565
 -0.0021096593627589755
 -0.0019811886886600405
  ⋮
 -0.009526640951662557
 -0.006004695478623034
  0.0
```

and 
```julia
julia> NiLang.AD.gradient(Val(1), jin, copy.((out,b,ma,jinzhi,spread,bili)))[end]
-5449.643116967733

julia> NiLang.AD.gradient(Val(1), jin, copy.((out,b,ma,jinzhi,spread,bili)))[end-1]
4503-element Array{Float64,1}:
 -0.0005285958114468947
 -0.00030225263725219137
 -0.00017545437275561654
  ⋮
 -0.010422627668532736
 -0.0069140339974312695
  0.0
```

Here, we can see the `jin` function is numerically sensitive to perturbations, which makes the numeric gradient incorrect.
The above code is from https://github.com/HanLi123/NiLang/issues/3

##### Allocating a non-constant ancilla
Another possibility is, a non-constant ancilla is allocated.

```julia
julia> @i function f1(z, y)
           x ← y   # wrong!
           z += x
           x → y
       end

julia> NiLang.AD.gradient(Val(1), f1, (0.0, 1.0))
(1.0, 0.0)

julia> @i function f2(z, y)
           x ← zero(y)
           x += y
           z += x
           x -= y
           x → zero(y)
       end

julia> NiLang.AD.gradient(Val(1), f2, (0.0, 1.0))
(1.0, 1.0)
```
`f1` will give incorrect gradient because when ancilla `x` is deallocated, its gradient field will also be discarded.


================================================
FILE: docs/src/grammar.md
================================================
# NiLang Grammar

To define a reversible function one can use macro **@i** plus a function definition like bellow

```julia
"""
docstring...
"""
@i function f(args..., kwargs...) where {...}
    <stmts>
end
```

where the definition of **<stmts>** are shown in the grammar bellow.
The following is a list of terminologies used in the definition of grammar

* <ident>, symbols
* <num>, numbers
* 0, empty statement
* <JuliaExpr>, native Julia expression
* [ ],  zero or one repetitions.

Here, all $JuliaExpr$ should be pure, otherwise the reversibility is not guaranteed.
Dataview is a view of a data, it can be a bijective mapping of an object, an item of an array or a field of an object.


```bnf
Stmts : 0 
      | Stmt
      | Stmts Stmt
      ;

Stmt : BlockStmt
     | IfStmt
     | WhileStmt
     | ForStmt
     | InstrStmt
     | RevStmt
     | AncillaStmt
     | TypecastStmt 
     | @routine Stmt
     | @safe <JuliaExpr>
     | CallStmt
     ;


BlockStmt : 'begin' Stmts 'end';

RevCond : '(' <JuliaExpr> ',' <JuliaExpr> ')';

IfStmt : 'if' RevCond Stmts ['else' Stmts] 'end';

WhileStmt : 'while' RevCond Stmts 'end';

Range : <JuliaExpr> ':' <JuliaExpr> [':' <JuliaExpr>];

ForStmt : 'for' <ident> '=' Range Stmts 'end';

KwArg : <ident> '=' <JuliaExpr>;

KwArgs : [KwArgs ','] KwArg ;

CallStmt : <JuliaExpr> '(' [DataViews] [';' KwArgs] ')';

Constant : <num> | 'π';

InstrBinOp : '+=' | '-=' | '⊻=';

InstrTrailer : ['.'] '(' [DataViews] ')';

InstrStmt : DataView InstrBinOp <ident> [InstrTrailer];

RevStmt : '~' Stmt;

AncillaStmt : <ident> '←' <JuliaExpr>
            | <ident> '→' <JuliaExpr>
            ;

TypecastStmt : '(' <JuliaExpr> '=>' <JuliaExpr> ')' '(' <ident> ')';

@routine : '@routine' <ident> Stmt;

@safe : '@safe' <JuliaExpr>;

DataViews : 0
          | DataView
          | DataViews ',' DataView
          | DataViews ',' DataView '...'
          ;

DataView : DataView '[' <JuliaExpr> ']'
         | DataView '.' <ident>
         | DataView '|>' <JuliaExpr>
         | DataView '\''
         | '-' DataView
         | Constant
         | <ident>
         ;
```


================================================
FILE: docs/src/index.md
================================================
# NiLang.jl

NiLang is a reversible eDSL that can run backwards. The motation is to support source to source AD.

Check [our paper](https://arxiv.org/abs/2003.04617)!

Welcome for discussion in [Julia slack](https://slackinvite.julialang.org/), **#autodiff** and **#reversible-commputing** channel.

## Tutorials
```@contents
Pages = [
    "tutorial.md",
    "examples/port_zygote.md",
]
Depth = 1
```

Also see blog posts
* [How to write a program differentiably](https://nextjournal.com/giggle/how-to-write-a-program-differentiably)
* [Simulate a reversible Turing machine in 50 lines of code](https://nextjournal.com/giggle/rtm50)

## Documentation

## Examples
```@contents
Pages = [
    "examples/fib.md",
    "examples/besselj.md",
    "examples/sparse.md",
    "examples/lognumber.md",
    "examples/unitary.md",
    "examples/qr.md",
    "examples/nice.md",
    "examples/realnvp.md",
    "examples/boxmuller.md",
]
Depth = 1
```

## Manual

```@contents
Pages = [
    "grammar.md",
    "instructions.md",
    "extend.md",
    "examples/sharedwrite.md",
    "api.md",
    "faq.md",
]
Depth = 1
```


================================================
FILE: docs/src/instructions.md
================================================
# Instruction Reference

## Instruction definitions

The Julia functions and symbols for instructions

| instruction | translated |   symbol   |
| ----------- | ---------- | ---- |
| $y \mathrel{+}= f(args...)$ | PlusEq(f)(args...) | $\oplus$ |
| $y \mathrel{-}= f(args...)$ | MinusEq(f)(args...) | $\ominus$ |
| $y \mathrel{\veebar}= f(args...)$ | \texttt{XorEq(f)(args...) | $\odot$ |

The list of reversible instructions that implemented in NiLang

| instruction | output   |
| ----------- | ---------- |
| ${\rm SWAP}(a, b)$ | $b, a$ |
| ${\rm ROT}(a, b, \theta)$ | $a \cos\theta - b\sin\theta, b \cos\theta + a\sin\theta, \theta$ |
| ${\rm IROT}(a, b, \theta)$ | $a \cos\theta + b\sin\theta, b \cos\theta - a\sin\theta, \theta$ |
| $y \mathrel{+}= a^\wedge b$ | $y+a^b, a, b$ |
| $y \mathrel{+}= \exp(x)$ | $y+e^x, x$ |
| $y \mathrel{+}= \log(x)$ | $y+\log x, x$ |
| $y \mathrel{+}= \sin(x)$ | $y+\sin x, x$ |
| $y \mathrel{+}= \cos(x)$ | $y+\cos x, x$ |
| $y \mathrel{+}= {\rm abs}(x)$ | $y+ |x|, x$ |
| $NEG(y)$ | $-y$ |

"." is the broadcasting operations in Julia.

## Jacobians and Hessians for Instructions

See my [blog post](https://giggleliu.github.io/2020/01/18/jacobians.html).


================================================
FILE: docs/src/tutorial.md
================================================
# My first NiLang program

## Basic Statements

| Statement                 | Meaning                                                      |
| :------------------------ | :----------------------------------------------------------- |
| x ← val                   | allocate a new variable `x`, with an initial value `val` (a constant). |
| x → val                   | deallocate variable `x` with content `val`.                  |
| x += f(y)                 | a reversible instruction.                                    |
| x .+= f.(y)                | instruction call with broadcasting.                          |
| f(y)                      | a reversible function.                                       |
| f.(y)                     | function call with broadcasting.                             |
| if (pre, post) ... end    | if statement.                                                |
| @from post while pre ... end | while statement.                                             |
| for x=1:3 ... end         | for statement.                                               |
| begin ... end             | block statement.                                             |
| @safe ...                 | insert an irreversible statement.                            |
| ~(...)                    | inverse a statement.                                         |
| @routine ...              | record a routine in the **routine stack**.                   |
| ~@routine                 | place the inverse of the routine on **routine stack** top.   |

The condition expression in **if** and **while** statements are a bit hard to digest, please refer our paper [arXiv:2003.04617](https://arxiv.org/abs/2003.04617).

## A reversible program

Our first program is to compute a loss function defined as

```math
\mathcal{L} = {\vec z}^T(a\vec{x} + \vec{y}),
```

where $\vec x$, $\vec y$ and $\vec{z}$ are column vectors, $a$ is a scalar.

```julia
@i function r_axpy!(a::T, x::AbstractVector{T}, y!::AbstractVector{T}) where T
    @safe @assert length(x) == length(y!)
    for i=1:length(x)
        y![i] += a * x[i]
    end
end

@i function r_loss(out!, a, x, y!, z)
    r_axpy!(a, x, y!)
    for i=1:length(z)
    	out! += z[i] * y![i]
    end
end
```

Functions do not have return statements, they return input arguments instead.
Hence `r_loss` defines a 5 variable to 5 variable bijection.
Let's check the reversibility
```julia
julia> out, a, x, y, z = 0.0, 2.0, randn(3), randn(3), randn(3)
(0.0, 2.0, [0.9265845776642722, 0.8532458027149912, 0.6201064385679095],
 [1.1142808415540468, 0.5506163710455121, -1.9873779917908814],
 [1.1603953198942412, 0.5562855137395296, 1.9650050430758796])

julia> out, a, x, y, z = r_loss(out, a, x, y, z)
(3.2308283403544342, 2.0, [0.9265845776642722, 0.8532458027149912, 0.6201064385679095],
 [2.967449996882591, 2.2571079764754947, -0.7471651146550624],
 [1.1603953198942412, 0.5562855137395296, 1.9650050430758796])
```

We find the contents in `out` and `y` are changed after calling the loss function.
Then we call the inverse loss function `~r_loss`.

```julia
julia> out, a, x, y, z = (~r_loss)(out, a, x, y, z)
(0.0, 2.0, [0.9265845776642722, 0.8532458027149912, 0.6201064385679095],
 [1.1142808415540466, 0.5506163710455123, -1.9873779917908814],
 [1.1603953198942412, 0.5562855137395296, 1.9650050430758796])
```

Values are restored. Here, instead of assigning variables one by one,
one can also use the macro `@instr`
```julia
@instr r_loss(out, a, x, y, z)
```
`@instr` macro is for executing a reversible statement.

## My first reversible AD program

```julia
julia> using NiLang.AD: Grad

julia> x, y, z = randn(3), randn(3), randn(3)
([2.2683181471139906, -0.7374245775047469, 0.9568936661385092],
 [1.0275914704043452, 1.647972121962081, -0.8349079845797637],
 [1.4272076815911372, 0.5317755971532034, 0.4412421572457776])

julia> Grad(r_loss)(0.0, 0.5, x, y, z; iloss=1)
(GVar(0.0, 1.0), GVar(0.5, 3.2674385142974036),
 GVar{Float64,Float64}[GVar(2.2683181471139906, 0.7136038407955686), GVar(-0.7374245775047469, 0.2658877985766017), GVar(0.9568936661385092, 0.2206210786228888)],
 GVar{Float64,Float64}[GVar(2.1617505439613405, 1.4272076815911372), GVar(1.2792598332097076, 0.5317755971532034), GVar(-0.35646115151050906, 0.4412421572457776)],
 GVar{Float64,Float64}[GVar(1.4272076815911372, 3.295909617518336), GVar(0.5317755971532034, 0.9105475444573341), GVar(0.4412421572457776, 0.12198568155874556)])

julia> gout, ga, gx, gy, gz = Grad(r_loss)(0.0, 0.5, x, y, z; iloss=1)
(GVar(0.0, 1.0), GVar(0.5, 3.2674385142974036),
 GVar{Float64,Float64}[GVar(2.2683181471139906, 0.7136038407955686), GVar(-0.7374245775047469, 0.2658877985766017), GVar(0.9568936661385092, 0.2206210786228888)],
 GVar{Float64,Float64}[GVar(3.295909617518336, 1.4272076815911372), GVar(0.9105475444573341, 0.5317755971532034), GVar(0.12198568155874556, 0.4412421572457776)],
 GVar{Float64,Float64}[GVar(1.4272076815911372, 4.4300686910753315), GVar(0.5317755971532034, 0.5418352557049606), GVar(0.4412421572457776, 0.6004325146280002)])
```

The results are a bit messy, since NiLang wraps each element with a gradient field automatically. We can take the gradient field using the `grad` function like

```julia
julia> grad(gout)
1.0

julia> grad(ga)
3.2674385142974036

julia> grad(gx)
3-element Array{Float64,1}:
 0.7136038407955686
 0.2658877985766017
 0.2206210786228888

julia> grad(gy)
3-element Array{Float64,1}:
 1.4272076815911372
 0.5317755971532034
 0.4412421572457776

julia> grad(gz)
3-element Array{Float64,1}:
 4.4300686910753315
 0.5418352557049606
 0.6004325146280002
```


================================================
FILE: docs/src/why.md
================================================
# What is Reversible Computing and why do we need it

# What are reversible computing and reversible programming
Reversible computing is a computing paradigm that can deterministically undo a computational process, it requires a user not erasing any information during computations. It boomed during 1970-2005, however, but runs into a winter after that. It can do anything that a traditional computing device can do, with possible overheads in time and space. Reversible programing is often considered as the computing model designed for reversible computing, while it can also be executed on a irreversible device. The following book covers a lot about reversible programming.

![Introduction to Reversible Computing](asset/revcomp.jpg)

## Why reversible computing is the future of computing: from a physicist's perspective

The driving force of studying reversible computing is improving the energy efficiency of our computing devices. Energy efficiency of computing devices affect the value of [bitcoins](https://www.investopedia.com/news/do-bitcoin-mining-energy-costs-influence-its-price/), the battery size of a [spacecraft](https://ieeexplore.ieee.org/document/7945170) and artificial intelligence (AI) industry as we will cover bellow.

As is well know, the fundamental laws of physics are reversible. Have you ever had such a confusion that why our computing model is irreversible while our world is governed by reversible laws? This discrepency is due to the fact that the irreversibility is an emergent phenomenon of statistic physics,
we need a ideal heat bath that having an "infinite size" to create irreversibility. This is why the energy efficiency of traditional devices is getting harder and harder to improve, although they are still several orders above the Landauer's limit. The [Landauer's principle](https://en.wikipedia.org/wiki/Landauer%27s_principle) states that irreversible computing has a lower bound of energy cost ~``\ln 2 k_b T``

> Landauer's principle is a physical principle pertaining to the lower theoretical limit of energy consumption of computation. It holds that "any logically irreversible manipulation of information, such as the erasure of a bit or the merging of two computation paths, must be accompanied by a corresponding entropy increase in non-information-bearing degrees of freedom of the information-processing apparatus or its environment".Another way of phrasing Landauer's principle is that if an observer loses information about a physical system, the observer loses the ability to extract work from that system.

Microscopic systems that can be used to build up a reversible computing device are ubiquitous, like [fluxon](https://ieeexplore.ieee.org/abstract/document/8990955), cold atoms, [DNA](https://www.amazon.com/Feynman-Lectures-Computation-Frontiers-Physics/dp/0738202967) and quantum dots. Even the adiabatic CMOS (a reversible computing device utilizing CMOS technology) can potentially be orders more energy efficient than traditional CMOS, and it is [already useful in spacecrafts](https://www.osti.gov/servlets/purl/1377599). The detailed analysis of the energy-speed trade off in adiabatic CMOS can be found [here](https://www3.nd.edu/~lent/pdf/nd/AdiabaticCMOS_HanninenSniderLent2014.pdf).

In reversible programming, [automatically differentiating any program is directly achievable](https://arxiv.org/abs/2003.04617). Automatic differentiation is a building block of artificial intelligence, crunching this problem can potentially lead to the next boom of AI. Programs are built on top of basic instructions like "+", "*", "/", "-". We can use these basic instructions to write Bessel functions, singular value decompositions et. al.  [Traditional autodiff frameworks](https://epubs.siam.org/doi/book/10.1137/1.9780898717761) keep track of intermediate states in a global stack and use them for back-propagation. However, doing this brings space overheads that linear to time, which can easily explode the memory. Reversible programming reverse the tape directly for you, while having flexible yet efficient time-space tradeoff algorithms to control the memory usage.

I am optimistic about reversible computing also because we have so much room to improve in the energy perspective. Our computer computes one bit information at the energy cost ~``10^8 k_b T``, while in our body, DNA copy machine computes a bit information at an energy cost ~``10 k_b T``. To embrace the true artificial intelligence, we still have a long way to go.


================================================
FILE: examples/Adam.jl
================================================
export Adam

mutable struct Adam
    lr::AbstractFloat
    gclip::AbstractFloat
    beta1::AbstractFloat
    beta2::AbstractFloat
    eps::AbstractFloat
    t::Int
    fstm
    scndm
end

Adam(; lr=0.001, gclip=0, beta1=0.9, beta2=0.999, eps=1e-8)=Adam(lr, gclip, beta1, beta2, eps, 0, nothing, nothing)

function update!(w, g, p::Adam)
    gclip!(g, p.gclip)
    if p.fstm===nothing; p.fstm=zero(w); p.scndm=zero(w); end
    p.t += 1
    lmul!(p.beta1, p.fstm)
    BLAS.axpy!(1-p.beta1, g, p.fstm)
    lmul!(p.beta2, p.scndm)
    BLAS.axpy!(1-p.beta2, g .* g, p.scndm)
    fstm_corrected = p.fstm / (1 - p.beta1 ^ p.t)
    scndm_corrected = p.scndm / (1 - p.beta2 ^ p.t)
    BLAS.axpy!(-p.lr, @.(fstm_corrected / (sqrt(scndm_corrected) + p.eps)), w)
end

function gclip!(g, gclip)
    if gclip == 0
        g
    else
        gnorm = vecnorm(g)
        if gnorm <= gclip
            g
        else
            BLAS.scale!(gclip/gnorm, g)
        end
    end
end


================================================
FILE: examples/CUDA/README.md
================================================
# Reversible programming on GPU

Special Notes:
* please use `@invcheckoff` to close all reversibility check in a kernel.
* be careful about the race condition when automatic differentiating a CUDA program.

## Suggested reading order
1. `swap_gate.jl` simulates a quantum swap gate, its reversible counter part is here
http://tutorials.yaoquantum.org/dev/generated/developer-guide/2.cuda-acceleration/
2. `rotation_gate.jl` simulates a quantum rotation gate, obtaining the gradients on rotation angle would have race condition.


================================================
FILE: examples/CUDA/rotation_gate.jl
================================================
using CUDA, GPUArrays
using NiLang, NiLang.AD

const RotGates = Union{Val{:Rz}, Val{:Rx}, Val{:Ry}}

@i @inline function instruct!(state::CuVector, gate::RotGates, loc::Int, theta::Real)
    mask ← 1<<(loc-1)
    @cuda threads=256 blocks=ceil(Int, length(state)/256) rot_kernel(gate, state, mask, theta)
end
#     @launchkernel CUDADevice() 256 length(out!) bessel_kernel(out!, v, z)

@i @inline function rot_kernel(gate::Val{:Rz}, state, mask, θ)
    @invcheckoff b ← (blockIdx().x-1) * blockDim().x + threadIdx().x
    @invcheckoff if (b < length(state) && b & mask == 0, ~)
        ROT_INSTRUCT(gate, state[b+1], state[b⊻mask+1], θ)
    end
end

@i @inline function ROT_INSTRUCT(gate::Val{:Rz}, a::T, b, θ) where T
    # make sure `invcheck` is turned off!
    @routine @invcheckoff begin
        @zeros T anc1 anc2 anc3 anc4
        anc1 += θ*(0.5im)
        anc2 += CUDA.exp(anc1)
    end
    anc3 += a * anc2'
    anc4 += b * anc2
    NiLang.SWAP(a, anc3)
    NiLang.SWAP(b, anc4)
    anc3 -= a / anc2'
    anc4 -= b / anc2
    ~@routine
end

v = randn(ComplexF64, 128) |> CuArray
v1 = instruct!(copy(v), Val(:Rz), 3, 0.5)[1]
# we can not obtain the gradient for the race condition.


# TODO: Rx and Ry gates, not finished!
@i @inline function ROT_INSTRUCT(gate::Val{:Rx}, a, b, θ)
    ROT_INSTRUCT(Val(:Rz), a, b, π/2)
    ROT_INSTRUCT(Val(:Ry), a, b, θ)
    ROT_INSTRUCT(Val(:Rz), a, b, -π/2)
end

@i @inline function ROT_INSTRUCT(gate::Val{:Ry}, a, b, θ)
    divint(θ, 2)
    ROT(a, b, θ)
    mulint(θ, 2)
end


================================================
FILE: examples/CUDA/swap_gate.jl
================================================
using CUDA, GPUArrays
using NiLang, NiLang.AD

"""
A reversible swap kernel for GPU for SWAP gate in quantum computing.
See the irreversible version for comparison

http://tutorials.yaoquantum.org/dev/generated/developer-guide/2.cuda-acceleration/
"""
@i @inline function swap_kernel(state::AbstractVector{T}, mask1, mask2) where T
    @invcheckoff b ← (blockIdx().x-1) * blockDim().x + threadIdx().x
    @invcheckoff if (b < length(state), ~)
        if (b&mask1==0 && b&mask2==mask2, ~)
            NiLang.SWAP(state[b+1], state[b ⊻ (mask1|mask2) + 1])
        end
    end
end

# TODO: support ::Type like argument.
"""
SWAP gate in quantum computing.
"""
@i function instruct!(state::CuVector, gate::Val{:SWAP}, locs::Tuple{Int,Int})
    mask1 ← 1 << (locs[1]-1)
    mask2 ← 1 << (locs[2]-1)
    @cuda threads=256 blocks=ceil(Int,length(state)/256) swap_kernel(state, mask1, mask2)
end

using Test
@testset "swap gate" begin
    v = cu(randn(128))
    v1 = instruct!(copy(v), Val(:SWAP), (3,4))[1]
    v2 = instruct!(copy(v1), Val(:SWAP), (3,4))[1]
    v3 = (~instruct!)(copy(v1), Val(:SWAP), (3,4))[1]
    @test !(v ≈ v1)
    @test v ≈ v2
    @test v ≈ v3
end

@i function loss(out!, state::CuVector)
    instruct!(state, Val(:SWAP), (3,4))
    out! += state[4]
end

loss(0.0, CuArray(randn(128)))
Grad(loss)(Val(1), 0.0, CuArray(randn(128)))

####################### A different loss ###############
@i function loss(out!, state::CuVector, target::CuVector)
    instruct!(state, Val(:SWAP), (3,4))
    out! += state' * target
end

# requires defining a new primitive, we don't how to parallelize a CUDA program automatically yet.
using LinearAlgebra: Adjoint
function (_::MinusEq{typeof(*)})(out!::GVar, x::Adjoint{<:Any, <:CuVector{<:GVar}}, y::CuVector{<:GVar})
    chfield(out!, value, value(out!)-(value.(x) * value.(y))[]),
    chfield.(parent(x), grad, grad.(parent(x)) .+ grad(out!)' .* conj.(value.(y)))',
    chfield.(y, grad, grad.(y) .+ grad(out!) .* conj.(value.(x')))
end

function (_::PlusEq{typeof(*)})(out!::GVar, x::Adjoint{<:Any, <:CuVector{<:GVar}}, y::CuVector{<:GVar})
    chfield(out!, value, value(out!)+(value.(x) * value.(y))[]),
    chfield.(parent(x), grad, grad.(parent(x)) .- grad(out!)' .* conj.(value.(y)))',
    chfield.(y, grad, grad.(y) .- grad(out!) .* conj.(value.(x')))
end

function (_::PlusEq{typeof(*)})(out!, x, y)
    out! += x * y
    out!, x, y
end

function (_::MinusEq{typeof(*)})(out!, x, y)
    out! -= x * y
    out!, x, y
end

loss(0.0, CuArray(randn(128)), CuArray(randn(128)))
Grad(loss)(Val(1), 0.0, CuArray(randn(128)), CuArray(randn(128)))

================================================
FILE: examples/README.md
================================================
# Examples

1. Reversible CUDA programming: [CUDA/](CUDA/)
2. Generate backward rules for Zygote: [port_zygote.jl](port_zygote.jl)
3. Obtaining symbolics gradients: [Symbolics/](Symbolics/)
4. Solving the graph embeding problem: [graph_embeding.jl](graph_embeding.jl) and [graph_embeding_zygote.jl](graph_embeding_zygote.jl)
5. NICE network: [nice.jl](nice.jl)
6. [Gaussian mixture model](https://github.com/JuliaReverse/NiGaussianMixture.jl)
7. [Bundle Adjustment](https://github.com/JuliaReverse/NiBundleAdjustment.jl)


================================================
FILE: examples/Symbolics/print_jacobians.jl
================================================
using NiLang, NiLang.AD

include("symlib.jl")
NiLang.AD.isvar(sym::Basic) = true
NiLang.AD.GVar(sym::Basic) = GVar(sym, zero(sym))

# a patch for symbolic IROT
@i @inline function NiLang.IROT(a!::GVar{<:Basic}, b!::GVar{<:Basic}, θ::GVar{<:Basic})
    IROT(a!.x, b!.x, θ.x)
    NEG(θ.x)
    θ.x -= Basic(π)/2
    ROT(a!.g, b!.g, θ.x)
    θ.g += a!.x * a!.g
    θ.g += b!.x * b!.g
    θ.x += Basic(π)/2
    NEG(θ.x)
    ROT(a!.g, b!.g, Basic(π)/2)
end

NiLang.INC(x::Basic) = x + one(x)
NiLang.DEC(x::Basic) = x - one(x)
@inline function NiLang.ROT(i::Basic, j::Basic, θ::Basic)
    a, b = rot(i, j, θ)
    a, b, θ
end
@inline function NiLang.IROT(i::Basic, j::Basic, θ::Basic)
    i, j, _ = ROT(i, j, -θ)
    i, j, θ
end
Base.sincos(x::Basic) = (sin(x), cos(x))

function printall()
    syms = [Basic(:a), Basic(:b), Basic(:c)]

    for (subop, nargs) in [(identity, 2), (*, 3), (/, 3), (^, 3), (exp, 2), (log, 2), (sin, 2), (cos, 2)]
        for opm in [PlusEq, MinusEq]
            op = opm(subop)
            @show op
            printone(op, syms, nargs)
        end
    end
    for (op, nargs) in [(-, 1), (ROT, 3), (IROT, 3)]
        printone(op, syms, nargs)
    end
    # abs, conj
end

@i function jf1(op, x)
    op(x[1])
end

@i function jf2(op, x)
    op(x[1], x[2])
end

@i function jf3(op, x)
    op(x[1], x[2], x[3])
end

"""print the jacobian of one operator"""
function printone(op, syms, n)
    if n==1
        jac = jacobian_repeat(jf1, op, syms[1:1]; iin=2, iout=2)
    elseif n==2
        jac = jacobian_repeat(jf2, op, syms[1:2]; iin=2, iout=2)
    elseif n==3
        jac = jacobian_repeat(jf3, op, syms[1:3]; iin=2, iout=2)
    end
    println("------ $op ------")
    pretty_print_matrix(jac)
end

printall()


================================================
FILE: examples/Symbolics/symbolic_utils.jl
================================================
using NiLang, NiLang.AD
using SymbolicUtils
using SymbolicUtils: Term, Sym
using LinearAlgebra

const SymReal = Sym{Real}
const TermReal = Term{Real}
const SReals = Union{Term{Real}, Sym{Real}}

import NiLang: INC, DEC, ROT, IROT, FLIP
@inline FLIP(b::Sym{Bool}) = !b

@inline function INC(a!::SReals)
    a! + one(a!)
end

@inline function DEC(a!::SReals)
    a! - one(a!)
end

@inline function ROT(i::SReals, j::SReals, θ::SReals)
    a, b = rot(i, j, θ)
    a, b, θ
end

@inline function IROT(i::SReals, j::SReals, θ::SReals)
    i, j, _ = ROT(i, j, -θ)
    i, j, θ
end

NiLang.AD.GVar(x::SReals) = NiLang.AD.GVar(x, zero(x))
Base.convert(::Type{SymReal}, x::Integer) = SymReal(Symbol(x))
Base.convert(::Type{Term{Real}}, x::Integer) = TermReal(Symbol(x))

Base.zero(x::Sym{T}) where T = zero(Sym{T})
Base.one(x::Sym{T}) where T = one(Sym{T})
Base.zero(::Type{<:Sym{T}}) where T = Sym{T}(Symbol(0))
Base.zero(::Type{<:Term{T}}) where T = Term{T}(Symbol(0))
Base.one(::Type{<:Sym{T}}) where T = Sym{T}(Symbol(1))
Base.one(::Type{<:Term{T}}) where T = Term{T}(Symbol(1))
Base.iszero(x::Sym{T}) where T = x === zero(x)
Base.adjoint(x::SReals) = x
SymbolicUtils.Term{T}(x::Sym{T}) where T = Term{T}(x.name)

LinearAlgebra.dot(a::T, b::T) where T<:SReals = a * b

include("sparse.jl")

using BenchmarkTools, Random
syms = @syms a::Real b::Real c::Real d::Real e::Real f::Real g::Real
Base.rand(r::Random.AbstractRNG, ::Type{SymReal}, i::Integer) = rand(r, syms, i)
Base.rand(r::Random.AbstractRNG, ::Type{TermReal}, i::Integer) = rand(r, TermReal.(syms), i)
a = sprand(TermReal, 100, 100, 0.05);
b = sprand(TermReal, 100, 100, 0.05);
@benchmark SparseArrays.dot($a, $b)
@benchmark idot(TermReal(Symbol(0)), $a, $b)
@benchmark Grad(idot)(Val(1), TermReal(Symbol(0)), $a, $b)
GVar(a)

include("Symbolics/symlib.jl")
syms = @vars a b c d e f g
Base.rand(r::Random.AbstractRNG, ::Type{<:Basic}, i::Integer) = rand(r, syms, i)
a = sprand(Basic, 100, 100, 0.05);
b = sprand(Basic, 100, 100, 0.05);
@benchmark SparseArrays.dot($a, $b)
@benchmark idot(Basic(0), $a, $b)
@benchmark Grad(idot)(Val(1), Basic(0), $a, $b)


================================================
FILE: examples/Symbolics/symlib.jl
================================================
using SymEngine
using SymEngine: BasicType

sconj = SymFunction("conj")
Base.conj(x::Basic) = Basic(conj(SymEngine.BasicType(x)))
Base.conj(x::BasicType) = real(x) - im * imag(x)
Base.imag(x::BasicType{Val{:Constant}}) = Basic(0)
Base.imag(x::BasicType{Val{:Symbol}}) = Basic(0)

pretty_print_number(x; lengthonly=false) = pretty_print_number(stdout, x; lengthonly=lengthonly)
function pretty_print_number(io::IO, x; lengthonly=false)
    sx = string(x)
    lengthonly || print(io, sx)
    return length(sx)
end

function pretty_print_number(io::IO, x::AbstractFloat; lengthonly=false)
    closest_int = round(Int, x)
    if isapprox(x, closest_int, atol=1e-12)
        si = string(closest_int)
        lengthonly || print(io, si)
        return length(si)
    else
        sx = string(x)
        lengthonly || print(io, sx)
        return length(sx)
    end
end

function pretty_print_number(io::IO, x::Complex; atol::Real = 1e-12, lengthonly=false)
    l = 0
    if !isapprox(real(x), 0, atol=atol)
        l += pretty_print_number(io, real(x), lengthonly=lengthonly)
    end
    if !isapprox(imag(x), 0, atol=atol)
        if !isapprox(real(x), 0, atol=atol)
            lengthonly || print(imag(x) > 0 ? "+" : "")
            l += 1
        end
        l += pretty_print_number(io, imag(x), lengthonly=lengthonly)
        lengthonly || print(io, "I")
        l += 1
    else
        if isapprox(real(x), 0, atol=atol)
            lengthonly || print(io, "0")
            l += 1
        end
    end
    return l
end

pretty_print_matrix(m) = pretty_print_matrix(stdout, m)
function pretty_print_matrix(io::IO, m)
    minlen = maximum(pretty_print_number.(m, lengthonly=true))+1
    for i in 1:size(m,1)
        print(io, "[")
        for j in 1:size(m,2)
            l = pretty_print_number(m[i,j])
            print(" "^(minlen-l-(j==size(m,1))))
        end
        println(io, "]")
    end
end


================================================
FILE: examples/_sharedwrite.jl
================================================
# # The shared write problem on GPU

# We will write a GPU version of `axpy!` function.

# ## The main program

using NiLang, NiLang.AD
using CUDA
using KernelAbstractions
CUDA.allowscalar(true)

# so far, this example requires patch: https://github.com/JuliaGPU/KernelAbstractions.jl/pull/52

@i @kernel function axpy_kernel(y!, α, x)
    ## invcheckoff to turn of `reversibility checker`
    ## GPU can not handle errors!
    @invcheckoff begin
        i ← @index(Global)
        y![i] += x[i] * α
        i → @index(Global)
    end
end

@i function cu_axpy!(y!::AbstractVector, α, x::AbstractVector)
    @launchkernel CUDADevice() 256 length(y!) axpy_kernel(y!, α, x)
end

@i function loss(out, y!, α, x)
    cu_axpy!(y!, α, x)
    ## Note: the following code is stupid scalar operations on CuArray,
    ## They are only for testing.
    for i=1:length(y!)
        out += y![i]
    end
end

y! = rand(100)
x = rand(100)
cuy! = y! |> CuArray
cux = x |> CuArray
α = 0.4

# ## Check the correctness of results

using Test
cu_axpy!(cuy!, α, cux)
@test Array(cuy!) ≈ y! .+ α .* x
(~cu_axpy!)(cuy!, α, cux)
@test Array(cuy!) ≈ y!

# Let's check the gradients
lsout = 0.0
@instr Grad(loss)(Val(1), lsout, cuy!, α, cux)

# you will see a correct vector `[0.4, 0.4, 0.4 ...]`
grad.(cux)

# you will see `0.0`.
grad(α)

# ## Why some gradients not correct?
# In the above example, `α` is a scalar, whereas a scalar is not allowed to change in a CUDA kernel.
# What if we change `α` to a CuArray?

# ## This one works: using a vector of `α`
@i @kernel function axpy_kernel(y!, α, x)
    @invcheckoff begin
        i ← @index(Global)
        y![i] += x[i] * α[i]
        i → @index(Global)
    end
end

cuy! = y! |> CuArray
cux = x |> CuArray
cuβ = repeat([0.4], 100) |> CuArray
lsout = 0.0
@instr Grad(loss)(Val(1), lsout, cuy!, cuβ, cux)

# You will see correct answer
grad.(cuβ)

# ## This one has the shared write problem: using a vector of `α`, but shared read.
@i @kernel function axpy_kernel(y!, α, x)
    @invcheckoff begin
        i ← @index(Global)
        y![i] += x[i] * α[i]
        i → @index(Global)
    end
end

cuy! = y! |> CuArray
cux = x |> CuArray
cuβ = repeat([0.4], 100) |> CuArray
lsout = 0.0
cuβ = [0.4] |> CuArray

# Run the following will give you a happy error
#
# > ERROR: a exception was thrown during kernel execution.
# >        Run Julia on debug level 2 for device stack traces.

# ```julia
# @instr Grad(loss)(Val(1), lsout, cuy!, cuβ, cux)
# ```

# Because, shared write is not allowed. We need someone clever enough to solve this problem for us.

# ## Conclusion
# * Shared scalar: the gradient of a scalar will not be updated.
# * Expanded vector: works properly, but costs more memory.
# * Shared 1-element vector: error on shared write.


================================================
FILE: examples/batched_tr.jl
================================================
using NiLang, NiLang.AD
using KernelAbstractions, CUDA, CUDAKernels

@i @kernel function kernel_f(A, B::AbstractVector{TB}) where TB
    # turng off reversibility check, since GPU can not handle errors
    @invcheckoff begin
        # allocate
        batch ← @index(Global)
        s ← zero(TB)
        # computing
        for i in axes(A, 1)
            s += A[i, i, batch]
        end
        B[batch] += s
        # deallocate safely
        s → zero(TB)
        batch → @index(Global)
    end
end

@i function batched_tr!(A::CuArray{T, 3}, B::CuVector{T}) where T
    @launchkernel CUDADevice() 256 length(B) kernel_f(A, B)
end

A = CuArray(randn(ComplexF32, 10, 10, 100))
B = CUDA.zeros(ComplexF32, 100)
A_out, B_out = batched_tr!(A, B)
# put random values in the gradient field of B
grad_B = CuArray(randn(ComplexF32, 100))
A_with_g, B_with_g = (~batched_tr!)(GVar(A_out), GVar(B_out, grad_B))
# will see nonzero gradients in complex diagonal parts of A
grad_A = grad(A_with_g |> Array)


================================================
FILE: examples/besselj.jl
================================================
# # Bessel function
# An Bessel function of the first kind of order ``\nu`` can be computed using Taylor expansion

# ```math
#     J_\nu(z) = \sum\limits_{n=0}^{\infty} \frac{(z/2)^\nu}{\Gamma(k+1)\Gamma(k+\nu+1)} (-z^2/4)^{n}
# ```

# where ``\Gamma(n) = (n-1)!`` is the Gamma function. One can compute the accumulated item iteratively as ``s_n = -\frac{z^2}{4} s_{n-1}``.

using NiLang, NiLang.AD
using ForwardDiff: Dual

# Since we need to use logarithmic numbers to handle the sequential mutiplication.
# Let's first add patch about the conversion between `ULogarithmic` and `Dual` number.
function Base.convert(::Type{Dual{T,V,N}}, x::ULogarithmic) where {T,V,N}
	Dual{T,V,N}(exp(x.log))
end

function Base.exp(::Type{ULogarithmic{Dual{T,V,N}}}, d::Dual) where {T,V,N}
    invoke(Base.exp, Tuple{Type{ULogarithmic{T}}, T} where T<:Real, ULogarithmic{Dual{T,V,N}}, d)
end

@i function ibesselj(y!::T, ν, z::T; atol=1e-8) where T
	if z == 0
		if v == 0
			out! += 1
		end
	else
		@routine @invcheckoff begin
			k ← 0
			@ones ULogarithmic{T} lz halfz halfz_power_2 s
			@zeros T out_anc
			lz *= convert(z)
			halfz *= lz / 2
			halfz_power_2 *= halfz ^ 2
			## s *= (z/2)^ν/ factorial(ν)
			s *= halfz ^ ν
			for i=1:ν
				s /= i
			end
			out_anc += convert(s)
			@from k==0 while s.log > -25 # upto precision e^-25
				k += 1
				## s *= 1 / k / (k+ν) * (z/2)^2
				s *= halfz_power_2 / (@const k*(k+ν))
				if k%2 == 0
					out_anc += convert(s)
				else
					out_anc -= convert(s)
				end
			end
		end
		y! += out_anc
		~@routine
	end
end

# To obtain gradients, one call **Grad(ibesselj)**

y, x = 0.0, 1.0
Grad(ibesselj)(Val(1), y, 2, x)

# Here, **Grad(ibesselj)** is a callable instance of type **Grad{typeof(ibesselj)}}**.
# The first parameter `Val(1)` indicates the first argument is the loss.

# To obtain second order gradients, one can Feed dual numbers to this gradient function.
_, hxy, _, hxx = Grad(ibesselj)(Val(1), Dual(y, zero(y)), 2, Dual(x, one(x)))
println("The hessian dy^2/dx^2 is $(grad(hxx).partials[1])")

# Here, the gradient field is a Dual number, it has a field partials that stores the derivative with respect to `x`.
# This is the Hessian that we need.

# ## CUDA programming
# The AD in NiLang avoids most heap allocation, so that it is able to execute on a GPU device
# We suggest using [KernelAbstraction](https://github.com/JuliaGPU/KernelAbstractions.jl), it provides compatibility between CPU and GPU.
# To execute the above function on GPU, we need only 11 lines of code.

# ```julia
# using CUDA, GPUArrays, KernelAbstractions
#
# @i @kernel function bessel_kernel(out!, v, z)
#     @invcheckoff i ← @index(Global)
#     ibesselj(out![i], v, z[i])
#     @invcheckoff i → @index(Global)
# end
# ```

# We have a macro support to KernelAbstraction in NiLang.
# So it is possible to launch directly like.
# ```julia
# @i function befunc(out!, v::Integer, z)
#     @launchkernel CUDADevice() 256 length(out!) bessel_kernel(out!, v, z)
# end
# ```

# It is equivalent to call
# ```julia
# (~bessel_kernel)(CUDADevice(), 256)(out!, v, z; ndrange=length(out!))
# ```
# But it will execute the job eagerly for you.
# We will consider better support in the future.

# Except it is reversible
# ```julia repl
# julia> @code_reverse @launchkernel CUDA() 256 length(out!) bessel_kernel(out!, v, z)
# :(#= REPL[4]:1 =# @launchkernel CUDA() 256 length(out!) (~bessel_kernel)(out!, v, z))
# ```

# To test this function, we first define input parameters `a` and output `out!`
# ```julia
# a = CuArray(rand(128))
# out! = CuArray(zeros(128))
# ```

# We wrap the output with a randomly initialized gradient field, suppose we get the gradients from a virtual loss function.
# Also, we need to initialize an empty gradient field for elements in input cuda tensor `a`.
# ```julia
# out! = ibesselj(out!, 2, GVar.(a))[1]
# out_g! = GVar.(out!, CuArray(randn(128)))
# ```

# Call the inverse program, the multiple dispatch will drive you to the goal.
# ```julia
# (~ibesselj)(out_g!, 2, GVar.(a))
# ```

# You will get CUDA arrays with `GVar` elements as output, their gradient fields are what you want.
# Cheers! Now you have a adjoint mode differentiable CUDA kernel.

# ## Benchmark
# We have different source to souce automatic differention implementations of the first type Bessel function ``J_2(1.0)`` benchmarked and show the results below.
#
#
# |  Package  | Tangent/Adjoint | ``T_{\rm min}``/ns  |  Space/KB |
# | --------- | --------------- | ------------------- | --------- |
# |  Julia    |     -           |     22              |     0     |
# |  NiLang   |     -           |     59              |     0     |
# |  ForwardDiff |    Tangent   |     35              |     0     |
# |  Manual   |    Adjoint      |     83              |     0     |
# |  NiLang.AD |    Adjoint     |     213             |     0     |
# |  NiLang.AD (GPU) | Adjoint  |     1.4             |     0     |
# |  Zygote   |    Adjoint      |      31201          |   13.47   |
# |  Tapenade |    Adjoint      |     ?               |     ?     |

# Julia is the CPU time used for running the irreversible forward program, is the baseline of benchmarking.
# NiLang is the reversible implementation, it is 2.7 times slower than its irreversible counterpart. Here, we have remove the reversibility check.
# ForwardDiff gives the best performance because it is designed for functions with single input.
# It is even faster than manually derived gradients
# ```math
# \frac{\partial J_{\nu}(z)}{\partial z} = \frac{J_{\nu-1} - J_{\nu+1}}{2}
# ```
# NiLang.AD is the reversible differential programming implementation, it considers only the backward pass.
# The benchmark of its GPU version is estimated on Nvidia Titan V by broadcasting the gradient function on CUDA array of size ``2^17`` and take average.
# The Zygote benchmark considers both forward pass and backward pass.
# Tapenade is not yet ready.


================================================
FILE: examples/boxmuller.jl
================================================
# # Box-Muller method to Generate normal distribution
using NiLang

# In this tutorial, we introduce using Box-Muller method to transform a uniform distribution to a normal distribution.
# The transformation and inverse transformation of `Box-Muller` method could be found in
# [this blog](https://mathworld.wolfram.com/Box-MullerTransformation.html)
@i function boxmuller(x::T, y::T) where T
    @routine @invcheckoff begin
        @zeros T θ logx _2logx
        θ += 2π * y
        logx += log(x)
        _2logx += -2 * logx
    end

    ## store results
    z1 ← zero(T)
    z2 ← zero(T)
    z1 += _2logx ^ 0.5
    ROT(z1, z2, θ)
    ~@routine

    SWAP(x, z1)
    SWAP(y, z2)

    ## arithmetic uncomputing: recomputing the original values of `x` and `y` to deallocate z1 and z2
    @routine @invcheckoff begin
        @zeros T at sq _halfsq
        at += atan(y, x)
        if (y < 0, ~)
            at += T(2π)
        end
        sq += x ^ 2
        sq += y ^ 2
        _halfsq -= sq / 2
    end
    z1 -= exp(_halfsq)
    z2 -= at / (2π)
    @invcheckoff z1 → zero(T)
    @invcheckoff z2 → zero(T)
    ~@routine
end

# One may wonder why this implementation is so long,
# should't NiLang generate the inverse for user?
# The fact is, although Box-Muller is arithmetically reversible.
# It is not finite precision reversible.
# Hence we need to "uncompute" it manually,
# this trick may introduce reversibility error.

using Plots
N = 5000
x = rand(2*N)

Plots.histogram(x, bins = -3:0.1:3, label="uniform",
    legendfontsize=16, xtickfontsize=16, ytickfontsize=16)

# forward
@instr boxmuller.(x[1:N], x[N+1:end])
Plots.histogram(x, bins = -3:0.1:3, label="normal",
    legendfontsize=16, xtickfontsize=16, ytickfontsize=16)

# backward
@instr (~boxmuller).(x[1:N], x[N+1:end])
Plots.histogram(x, bins = -3:0.1:3, label="uniform",
    legendfontsize=16, xtickfontsize=16, ytickfontsize=16)

# ## Check the probability distribution function
using LinearAlgebra, Test

normalpdf(x) = sqrt(1/2π)*exp(-x^2/2)

# obtain `log(abs(det(jacobians)))`
@i function f(x::Vector)
    boxmuller(x[1], x[2])
end
jac = NiLang.AD.jacobian(f, [0.5, 0.5], iin=1)
ladj = log(abs(det(jac)))

# check if it matches the `log(p/q)`.
z1, z2 = boxmuller(0.5, 0.5)
@test ladj ≈ log(1.0 / (normalpdf(z1) * normalpdf(z2)))

# ## To obtaining Jacobian - a simpler approach
# We can define a function that exactly reversible from the instruction level,
# but costs more space for storing output.
@i function boxmuller2(x1::T, x2::T, z1::T, z2::T) where T
    @routine @invcheckoff begin
        @zeros T θ logx _2logx

        θ += 2π * x2
        logx += log(x1)
        _2logx += -2 * logx
    end

    ## store results
    z1 += _2logx ^ 0.5
    ROT(z1, z2, θ)
    ~@routine
end

# However, this is not a bijector from that maps `x` to `z`,
# because computing the backward just erases the content in `z`.
# However, this function can be used to obtain `log(abs(det(jacobians)))`
@i function f2(x::Vector, z::Vector)
    boxmuller2(x[1], x[2], z[1], z[2])
end
jac = NiLang.AD.jacobian(f2, [0.5, 0.5], [0.0, 0.0], iin=1, iout=2)
ladj = log(abs(det(jac)))

# check if it matches the `log(p/q)`.
_, _, z1, z2 = boxmuller2(0.5, 0.5, 0.0, 0.0)
@test ladj ≈ log(1.0 / (normalpdf(z1) * normalpdf(z2)))


================================================
FILE: examples/fft.jl
================================================
# https://rosettacode.org/wiki/Fast_Fourier_transform#Fortran
# In place Cooley-Tukey FFT
function fft!(x::AbstractVector{T}) where T
    N = length(x)
    @inbounds if N <= 1
        return x
    elseif N == 2
        t =  x[2]
        oi = x[1]
        x[1]     = oi + t
        x[2]     = oi - t
        return x
    end
 
    # divide
    odd  = x[1:2:N]
    even = x[2:2:N]
 
    # conquer
    fft!(odd)
    fft!(even)
 
    # combine
    @inbounds for i=1:N÷2
       t = exp(T(-2im*π*(i-1)/N)) * even[i]
       oi = odd[i]
       x[i]     = oi + t
       x[i+N÷2] = oi - t
    end
    return x
end

using NiLang
@i function i_fft!(x::AbstractVector{T}) where T
    @routine @invcheckoff N ← length(x)
    @safe @assert N%2 == 0
    @invcheckoff @inbounds if N <= 1
    elseif N == 2
        HADAMARD(x[1].re, x[2].re)
        HADAMARD(x[1].im, x[2].im)
    else
        # devide and conquer
        i_fft!(x[1:2:N])
        i_fft!(x[2:2:N])

        x2 ← zeros(T, N)
        for i=1:N÷2
            x2[i] += x[2i-1]
            x2[i+N÷2] += x[2i]
        end
        for i=1:N
            SWAP(x[i], x2[i])
        end
        for i=1:N÷2
            x2[2i-1] -= x[i]
            x2[2i] -= x[i+N÷2]
        end
        # combine
        for i=1:N÷2
            @routine θ ← -2*π*(i-1)/N
            ROT(x[i+N÷2].re, x[i+N÷2].im, θ)
            HADAMARD(x[i].re, x[i+N÷2].re)
            HADAMARD(x[i].im, x[i+N÷2].im)
            ~@routine
        end
        x2 → zeros(T, N)
    end
    ~@routine
end

using Test, FFTW
@testset "fft" begin
    x = randn(ComplexF64, 64)
    @test fft!(copy(x)) ≈ FFTW.fft(x)
    @test i_fft!(copy(x)) .* sqrt(length(x)) ≈ FFTW.fft(x)
end

================================================
FILE: examples/fib.jl
================================================
# # Computing Fibonacci Numbers
# The following is an example that everyone likes, computing Fibonacci number recursively.
using NiLang

@i function rfib(out!, n::T) where T
    @routine begin
        n1 ← zero(T)
        n2 ← zero(T)
        n1 += n - 1
        n2 += n - 2
    end
    if (value(n) <= 2, ~)
        out! += 1
    else
        rfib(out!, n1)
        rfib(out!, n2)
    end
    ~@routine
end

# The time complexity of this recursive algorithm is exponential to input `n`. It is also possible to write a reversible linear time with for loops.
# A slightly non-trivial task is computing the first Fibonacci number that greater or equal to a certain number `z`, where a `while` statement is required.

@i function rfibn(n!, z)
    @safe @assert n! == 0
    out ← 0
    rfib(out, n!)
    @from n! == 0 while out < z
        ~rfib(out, n!)
        n! += 1
        rfib(out, n!)
    end
    ~rfib(out, n!)
    out → 0
end

# In this example, the postcondition `n!=0` in the `while` statement is false before entering the loop, and it becomes true in later iterations. In the reverse program, the `while` statement stops at `n==0`.
# If executed correctly, a user will see the following result.

rfib(0, 10)

# compute which the first Fibonacci number greater than 100.

rfibn(0, 100)

# and uncompute

(~rfibn)(rfibn(0, 100)...)

# This example shows how an addition postcondition provided by the user can help to reverse a control flow without caching controls.


================================================
FILE: examples/fixedlog.jl
================================================
using FixedPointNumbers, Test

"""
Reference
-------------------

[1] C. S. Turner,  "A Fast Binary Logarithm Algorithm", IEEE Signal
     Processing Mag., pp. 124,140, Sep. 2010.
"""
function log2fix(x::Fixed{T, P}) where {T, P}
    PREC = UInt(P)
    x.i == 0 && return typemin(T) # represents negative infinity

    y = zero(T)
    xi = x.i
    while xi < 1 << PREC
        xi <<= 1
        y -= T(1) << PREC
    end

    while xi >= 2 << PREC
        xi >>= 1
        y += T(1) << PREC
    end

    z = xi
    b = T(1) << (PREC - UInt(1))
    for i = 1:P
        temp = Base.widemul(z, z) >> PREC
        z = T(temp)
        if z >= T(2) << PREC
            z >>= 1
            y += b
        end
        b >>= 1
    end

    return Fixed{T,PREC}(y, nothing)
end

@test log2fix(Fixed{Int, 43}(2^1.24)) ≈ 1.24


================================================
FILE: examples/lax_wendroff.jl
================================================
"""
solve the 1D linear advection equation
```math
∂q/∂t=−u∂q/∂x
```
in a periodic domain, where ``q`` is the quantity being advected,
``t`` is time, ``x`` is the spatial coordinate and ``u`` is the velocity,
which is constant with ``x``. 
"""
function lax_wendroff!(nt::Int, c, q_init::AbstractVector{T}, q::AbstractVector{T}) where T
    nx = length(q)
    flux = zeros(T, nx-1)   # Fluxes between boxes
    @inbounds for i=1:nx
        q[i] = q_init[i] # Initialize q
    end
    @inbounds for j=1:nt  # Main loop in time
        for i=1:nx-1
            flux[i] = 0.5*c*(q[i]+q[i+1]+c*(q[i]-q[i+1]))
        end
        for i=2:nx-1
            q[i] += flux[i-1]-flux[i]
        end
        q[1] = q[nx-1]; q[nx] = q[2] # Treat boundary conditions
    end
    return q
end

using Random
Random.seed!(2)
q_init = randn(100)
q = zeros(100)
@show lax_wendroff!(2000, 1.0, q_init, zero(q_init))
using BenchmarkTools
@benchmark lax_wendroff!(2000, 1.0, $q_init, x) setup=(x=zero(q_init))
@time lax_wendroff!(2000, 1.0, q_init, q)

using NiLang
@i function i_lax_wendroff!(nt::Int, c, q_init::AbstractVector{T}, q::AbstractVector{T},
        cache::AbstractMatrix{T}) where T
    nx ← length(q)
    @inbounds for i=1:nx
        q[i] += q_init[i] # Initialize q
    end
    @inbounds for j=1:nt  # Main loop in time
        for i=1:nx-1
            @routine begin
                @zeros T anc1 anc2 anc3
                anc1 += 0.5 * c
                anc2 += q[i] - q[i+1]
                anc3 += q[i] + q[i+1]
                anc3 += c * anc2
            end
            cache[i,j] += anc1 * anc3
            ~@routine
        end
        for i=2:nx-1
            q[i] += cache[i-1,j]-cache[i,j]
        end
        # Treat boundary conditions
        cache[nx,j] += q[nx-1]
        SWAP(q[1], cache[nx,j])
        cache[nx+1,j] += q[2]
        SWAP(q[nx], cache[nx+1,j])
    end
    nx → length(q)
end
nt = 2000
i_lax_wendroff!(nt, 1.0, q_init, zero(q_init), zeros(length(q_init)+1,nt))


================================================
FILE: examples/lognumber.jl
================================================
# # Logarithmic number system

# Computing basic functions like `power`, `exp` and `besselj` is not trivial for reversible programming.
# There is no efficient constant memory algorithm using pure fixed point numbers only.
# For example, to compute `x ^ n` reversiblly with fixed point numbers,
# we need to allocate a vector of size $O(n)$.
# With logarithmic numbers, the above computation is straight forward.

using LogarithmicNumbers
using NiLang, NiLang.AD
using FixedPointNumbers

@i function i_power(y::T, x::T, n::Int) where T
    if !iszero(x)
        @routine begin
            lx ← one(ULogarithmic{T})
            ly ← one(ULogarithmic{T})
            ## convert `x` to a logarithmic number
            ## Here, `*=` is reversible for log numbers
            if x > 0
                lx *= convert(x)
            else
                lx *= convert(-x)
            end
            for i=1:n
                ly *= lx
            end
        end

        ## convert back to fixed point numbers
        y += convert(ly)
        if x < 0 && n%2 == 1
            NEG(y)
        end

        ~@routine
    end
end

# To check the function
i_power(Fixed43(0.0), Fixed43(0.4), 3)

# ## `exp` function as an example
# The following example computes `exp(x)`.

@i function i_exp(y!::T, x::T) where T<:Union{Fixed, GVar{<:Fixed}}
    @invcheckoff begin
        @routine begin
            s ← one(ULogarithmic{T})
            lx ← one(ULogarithmic{T})
            k ← 0
        end
        lx *= convert(x)
        y! += convert(s)
        @from k==0 while s.log > -20
            k += 1
            s *= lx / k
            y! += convert(s)
        end
        ~(@from k==0 while s.log > -20
            k += 1
            s *= x / k
        end)
        lx /= convert(x)
        ~@routine
    end
end

x = Fixed43(3.5)

# We can check the reversibility
out, _ = i_exp(Fixed43(0.0), x)
@assert out ≈ exp(3.5)

# Computing the gradients
_, gx = NiLang.AD.gradient(Val(1), i_exp, (Fixed43(0.0), x))
@assert gx ≈ exp(3.5)


================================================
FILE: examples/nice.jl
================================================
# # NICE network
# For the definition of this network and concepts of normalizing flow,
# please refer this nice blog: https://lilianweng.github.io/lil-log/2018/10/13/flow-based-deep-generative-models.html,
# and the pytorch notebook: https://github.com/GiggleLiu/marburg/blob/master/notebooks/nice.ipynb

using NiLang, NiLang.AD
using LinearAlgebra
using DelimitedFiles
using Plots

# `include` the optimizer, you can find it under the `Adam.jl` file in the `examples/` folder.
include(NiLang.project_relative_path("examples", "Adam.jl"))


# ## Model definition
# First, define the single layer transformation and its behavior under `GVar` - the gradient wrapper.
struct NiceLayer{T}
    W1::Matrix{T}
    b1::Vector{T}
    W2::Matrix{T}
    b2::Vector{T}
    y1::Vector{T}
    y1a::Vector{T}
end

"""Apply a single NICE transformation."""
@i function nice_layer!(x::AbstractVector{T}, layer::NiceLayer{T},
                y!::AbstractVector{T}) where T
    @routine @invcheckoff begin
        i_affine!(layer.y1, layer.W1, layer.b1, x)
        @inbounds for i=1:length(layer.y1)
            if (layer.y1[i] > 0, ~)
                layer.y1a[i] += layer.y1[i]
            end
        end
    end
    i_affine!(y!, layer.W2, layer.b2, layer.y1a)
    ~@routine
    ## clean up accumulated rounding error, since this memory is reused.
    @safe layer.y1 .= zero(T)
end

# Here, in each layer, we use the information in `x` to update `y!`.
# During computing, we use the `y1` and `y1a` fields of the network as ancilla space,
# both of them can be uncomputed at the end of the function.
# However, we need to erase small numbers to make sure the rounding error does not accumulate.

# A nice network always transforms inputs reversibly.
# We update one half of `x!` a time, so that input and output memory space do not clash.
const NiceNetwork{T} = Vector{NiceLayer{T}}

"""Apply a the whole NICE network."""
@i function nice_network!(x!::AbstractVector{T}, network::NiceNetwork{T}) where T
    @invcheckoff for i=1:length(network)
        np ← length(x!)
        if (i%2==0, ~)
            @inbounds nice_layer!(x! |> subarray(np÷2+1:np), network[i], x! |> subarray(1:np÷2))
        else
            @inbounds nice_layer!(x! |> subarray(1:np÷2), network[i], x! |> subarray(np÷2+1:np))
        end
        np → length(x!)
    end
end

function random_nice_network(nparams::Int, nhidden::Int, nlayer::Int; scale=0.1)
    random_nice_network(Float64, nparams, nhidden, nlayer; scale=scale)
end

function random_nice_network(::Type{T}, nparams::Int, nhidden::Int, nlayer::Int; scale=0.1) where T
    nin = nparams÷2
    scale = T(scale)
    y1 = zeros(T, nhidden)
    NiceLayer{T}[NiceLayer(randn(T, nhidden, nin)*scale, randn(T, nhidden)*scale,
            randn(T, nin, nhidden)*scale, randn(T, nin)*scale, y1, zero(y1)) for _ = 1:nlayer]
end

# ## Parameter management

nparameters(n::NiceLayer) = length(n.W1) + length(n.b1) + length(n.W2) + length(n.b2)
nparameters(n::NiceNetwork) = sum(nparameters, n)

"""collect parameters in the `layer` into a vector `out`."""
function collect_params!(out, layer::NiceLayer)
    a, b, c, d = length(layer.W1), length(layer.b1), length(layer.W2), length(layer.b2)
    out[1:a] .= vec(layer.W1)
    out[a+1:a+b] .= layer.b1
    out[a+b+1:a+b+c] .= vec(layer.W2)
    out[a+b+c+1:end] .= layer.b2
    return out
end

"""dispatch vectorized parameters `out` into the `layer`."""
function dispatch_params!(layer::NiceLayer, out)
    a, b, c, d = length(layer.W1), length(layer.b1), length(layer.W2), length(layer.b2)
    vec(layer.W1) .= out[1:a]
    layer.b1 .= out[a+1:a+b]
    vec(layer.W2) .= out[a+b+1:a+b+c]
    layer.b2 .= out[a+b+c+1:end]
    return layer
end

function collect_params(n::NiceNetwork{T}) where T
    out = zeros(T, nparameters(n))
    k = 0
    for layer in n
        np = nparameters(layer)
        collect_params!(view(out, k+1:k+np), layer)
        k += np
    end
    return out
end

function dispatch_params!(network::NiceNetwork, out)
    k = 0
    for layer in network
        np = nparameters(layer)
        dispatch_params!(layer, view(out, k+1:k+np))
        k += np
    end
    return network
end

# ## Loss function

# To obtain the log-probability of a data.

@i function logp!(out!::T, x!::AbstractVector{T}, network::NiceNetwork{T}) where T
    (~nice_network!)(x!, network)
    @invcheckoff for i = 1:length(x!)
        @routine begin
            xsq ← zero(T)
            @inbounds xsq += x![i]^2
        end
        out! -= 0.5 * xsq
        ~@routine
    end
end

# The negative-log-likelihood loss function

@i function nice_nll!(out!::T, cum!::T, xs!::Matrix{T}, network::NiceNetwork{T}) where T
    @invcheckoff for i=1:size(xs!, 2)
        @inbounds logp!(cum!, xs! |> subarray(:,i), network)
    end
    out! -= cum!/(@const size(xs!, 2))
end

# ## Training

function train(x_data, model; num_epochs = 800)
    num_vars = size(x_data, 1)
    params = collect_params(model)
    optimizer = Adam(; lr=0.01)
    for epoch = 1:num_epochs
        loss, a, b, c = nice_nll!(0.0, 0.0, copy(x_data), model)
        if epoch % 50 == 1
            println("epoch = $epoch, loss = $loss")
            display(showmodel(x_data, model))
        end
        _, _, _, gmodel = (~nice_nll!)(GVar(loss, 1.0), GVar(a), GVar(b), GVar(c))
        g = grad.(collect_params(gmodel))
        update!(params, grad.(collect_params(gmodel)), optimizer)
        dispatch_params!(model, params)
    end
    return model
end

function showmodel(x_data, model; nsamples=2000)
    scatter(x_data[1,1:nsamples], x_data[2,1:nsamples]; xlims=(-5,5), ylims=(-5,5))
    zs = randn(2, nsamples)
    for i=1:nsamples
        nice_network!(view(zs, :, i), model)
    end
    scatter!(zs[1,:], zs[2,:])
end

# you can find the training data in `examples/` folder
x_data = Matrix(readdlm(NiLang.project_relative_path("examples", "train.dat"))')

import Random; Random.seed!(22)
model = random_nice_network(Float64, size(x_data, 1), 10, 4; scale=0.1)

# Before training, the distribution looks like
# ![before](../asset/nice_before.png)
model = train(x_data, model; num_epochs=800)

# After training, the distribution looks like
# ![before](../asset/nice_after.png)


================================================
FILE: examples/nice_test.jl
================================================
# bijectivity check
using Test
include("nice.jl")

@testset "nice" begin
    num_vars = 4
    model = random_nice_network(num_vars, 10, 3)
    z = randn(num_vars)
    x, _ = nice_network!(z, model)
    z_infer, _ = (~nice_network!)(x, model)
    @test z_infer ≈ z
    newparams = randn(nparameters(model))
    dispatch_params!(model, newparams)
    @test collect_params(model) ≈ newparams
    @test check_inv(logp!, (0.0, x, model))
end

@testset "nice logp" begin
    z1 = [0.5, 0.2]
    z2 = [-0.5, 1.2]
    model = random_nice_network(2, 10, 4)
    x1 = nice_network!(copy(z1), model)[1]
    x2 = nice_network!(copy(z2), model)[1]
    p1 = logp!(0.0, copy(x1), model)[1]
    p2 = logp!(0.0, copy(x2), model)[1]
    pz1 = exp(-sum(abs2, z1)/2)
    pz2 = exp(-sum(abs2, z2)/2)
    @test exp(p1 - p2) ≈ pz1/pz2
    @test nice_nll!(0.0, 0.0, hcat(x1, x2), model)[1] ≈ -log(pz1 * pz2)/2

    xs = hcat(x1, x2)
    gmodel = Grad(nice_nll!)(Val(1), 0.0, 0.0, copy(xs), model)[end]

    for i=1:10, j=1:4
        model[j].W2[i] -= 1e-4
        a = nice_nll!(0.0, 0.0, copy(xs), model)[1]
        model[j].W2[i] += 2e-4
        b = nice_nll!(0.0, 0.0, copy(xs), model)[1]
        model[j].W2[i] -= 1e-4
        ng = (b-a)/2e-4
        @test gmodel[j].W2[i].g ≈ ng
    end

    for i=1:10, j=1:4
        model[j].W1[i] -= 1e-4
        a = nice_nll!(0.0, 0.0, copy(xs), model)[1]
        model[j].W1[i] += 2e-4
        b = nice_nll!(0.0, 0.0, copy(xs), model)[1]
        model[j].W1[i] -= 1e-4
        ng = (b-a)/2e-4
        @test gmodel[j].W1[i].g ≈ ng
    end
end


================================================
FILE: examples/port_chainrules.jl
================================================
# # [How to port NiLang to ChainRules](@id port_chainrules)
#
# In [How to port NiLang to Zygote](@ref port_zygote) we showed the way to insert Nilang-based
# gradient as Zygote's pullback/adjoint. Given that [ChainRules](https://github.com/JuliaDiff/ChainRules.jl)
# is now the core of many AD packages including Zygote, extending `ChainRules.rrule` with Nilang
# does the same job, except that it affects all ChainRules-based AD packages and not just Zygote.
#
# We'll use the same example as [How to port NiLang to Zygote](@ref port_zygote), so you might need
# to restart your Julia to get a fresh environment.

using NiLang, NiLang.AD, Zygote, ChainRules

# Let's start from the Julia native implementation of `norm2` function.
function norm2(x::AbstractArray{T}) where T
    out = zero(T)
    for i=1:length(x)
        @inbounds out += x[i]^2
    end
    return out
end

# Zygote is able to generate correct dual function, i.e., gradients, but much slower than the primal
# function `norm2`
using BenchmarkTools
x = randn(1000);
original_grad = norm2'(x)
@benchmark norm2'($x) seconds=1

# The primal function is
@benchmark norm2($x) seconds=1

# Then we have the reversible implementation
@i function r_norm2(out::T, x::AbstractArray{T}) where T
    for i=1:length(x)
        @inbounds out += x[i]^2
    end
end

# The gradient generated by NiLang is much faster, which is comparable to the forward program
@benchmark (~r_norm2)(GVar($(norm2(x)), 1.0), $(GVar(x))) seconds=1

# By defining our custom `rrule` using Nilang's gradient implementation, `Zygote` automaticallly
# gets boosted because it internally uses the available ChainRules ruleset.
# Here we need to create a new symbol here because otherwise Zygote will still use the
# previously generated slow implementation.
norm2_faster(x) = norm2(x)
function ChainRules.rrule(::typeof(norm2_faster), x::AbstractArray{T}) where T
    out = norm2_faster(x)
    function pullback(ȳ)
        ChainRules.NoTangent(), grad((~r_norm2)(GVar(out, ȳ), GVar(x))[2])
    end
    out, pullback
end
@assert norm2_faster'(x) ≈ original_grad

# See, much faster
@benchmark norm2_faster'(x) seconds=1


================================================
FILE: examples/port_zygote.jl
================================================
# # [How to port NiLang to Zygote](@id port_zygote)
#
# In this demo we'll show how to insert NiLang's gradient implementation to boost Zygote's gradient.
# A similar demo for ChainRules can be found in [How to port NiLang to ChainRules](@ref port_chainrules).

using NiLang, NiLang.AD, Zygote

# Let's start from the Julia native implementation of `norm2` function.
function norm2(x::AbstractArray{T}) where T
    out = zero(T)
    for i=1:length(x)
        @inbounds out += x[i]^2
    end
    return out
end

# Zygote is able to generate correct dual function, i.e., gradients, but much slower than the primal
# function `norm2`
using BenchmarkTools
x = randn(1000);
original_grad = norm2'(x)
@benchmark norm2'($x) seconds=1

# The primal function is
@benchmark norm2($x) seconds=1

# Then we have the reversible implementation
@i function r_norm2(out::T, x::AbstractArray{T}) where T
    for i=1:length(x)
        @inbounds out += x[i]^2
    end
end

# The gradient generated by NiLang is much faster, which is comparable to the forward program
@benchmark (~r_norm2)(GVar($(norm2(x)), 1.0), $(GVar(x))) seconds=1

# to enjoy the speed of `NiLang` in `Zygote`, just bind the adjoint rule
Zygote.@adjoint function norm2(x::AbstractArray{T}) where T
    out = norm2(x)
    out, δy -> (grad((~r_norm2)(GVar(out, δy), GVar(x))[2]),)
end
@assert norm2'(x) ≈ original_grad

# See, much faster
@benchmark norm2'(x) seconds=1


================================================
FILE: examples/pyramid.jl
================================================
# # Pyramid example
#
# This is the Pyramid example in the book "Evaluate Derivatives", Sec. 3.5.

using NiLang, NiLang.AD

@i function pyramid!(y!, v!, x::AbstractVector{T}) where T
    @safe @assert size(v!,2) == size(v!,1) == length(x)
    @invcheckoff @inbounds for j=1:length(x)
        v![1,j] += x[j]
    end
    @invcheckoff @inbounds for i=1:size(v!,1)-1
        for j=1:size(v!,2)-i
            @routine begin
                @zeros T c s
                c += cos(v![i,j+1])
                s += sin(v![i,j])
            end
            v![i+1,j] += c * s
            ~@routine
        end
    end
    y! += v![end,1]
end

x = randn(20)
pyramid!(0.0, zeros(20, 20), x)

# Let's benchmark the gradient of the pyramid function
using BenchmarkTools
@benchmark gradient(Val(1), pyramid!, (0.0, zeros(20, 20), $x))


================================================
FILE: examples/qr.jl
================================================
# # A simple QR decomposition

# ## Functions used in this example

using NiLang, NiLang.AD, Test

# ## The QR decomposition
# Let us consider a naive implementation of QR decomposition from scratch.
# This implementation is just a proof of principle which does not consider reorthogonalization and other practical issues.

@i function qr(Q, R, A::Matrix{T}) where T
    @routine begin
        anc_norm ← zero(T)
        anc_dot ← zeros(T, size(A,2))
        ri ← zeros(T, size(A,1))
    end
    for col = 1:size(A, 1)
        ri .+= A[:,col]
        for precol = 1:col-1
            i_dot(anc_dot[precol], Q[:,precol], ri)
            R[precol,col] += anc_dot[precol]
            for row = 1:size(Q,1)
                ri[row] -=
                    anc_dot[precol] * Q[row, precol]
            end
        end
        i_norm2(anc_norm, ri)

        R[col, col] += anc_norm^0.5
        for row = 1:size(Q,1)
            Q[row,col] += ri[row] / R[col, col]
        end

        ~begin
            ri .+= A[:,col]
            for precol = 1:col-1
                i_dot(anc_dot[precol], Q[:,precol], ri)
                for row = 1:size(Q,1)
                    ri[row] -= anc_dot[precol] *
                        Q[row, precol]
                end
            end
            i_norm2(anc_norm, ri)
        end
    end
    ~@routine
end

# Here, in order to avoid frequent uncomputing, we allocate ancillas `ri` and `anc_dot` as vectors.
# The expression in `~` is used to uncompute `ri`, `anc_dot` and `anc_norm`.
# `i_dot` and `i_norm2` are reversible functions to compute dot product and vector norm.
# One can quickly check the correctness of the gradient function

A  = randn(4,4)
q, r = zero(A), zero(A)
@i function test1(out, q, r, A)
    qr(q, r, A)
    i_sum(out, q)
end

@test check_grad(test1, (0.0, q, r, A); iloss=1)

# Here, the loss function `test1` is defined as the sum of the output unitary matrix `q`.
# The `check_grad` function is a gradient checker function defined in module `NiLang.AD`.


================================================
FILE: examples/realnvp.jl
================================================
# # RealNVP network
# For the definition of this network and concepts of normalizing flow,
# please refer this realnvp blog: https://lilianweng.github.io/lil-log/2018/10/13/flow-based-deep-generative-models.html,
# and the pytorch notebook: https://github.com/GiggleLiu/marburg/blob/master/solutions/realnvp.ipynb

using NiLang, NiLang.AD
using LinearAlgebra
using DelimitedFiles
using Plots

# `include` the optimizer, you can find it under the `Adam.jl` file in the `examples/` folder.
include(NiLang.project_relative_path("examples", "Adam.jl"))


# ## Model definition
# First, define the single layer transformation and its behavior under `GVar` - the gradient wrapper.
struct RealNVPLayer{T}
    ## transform network
    W1::Matrix{T}
    b1::Vector{T}
    W2::Matrix{T}
    b2::Vector{T}
    y1::Vector{T}
    y1a::Vector{T}

    ## scaling network
    sW1::Matrix{T}
    sb1::Vector{T}
    sW2::Matrix{T}
    sb2::Vector{T}
    sy1::Vector{T}
    sy1a::Vector{T}
end

"""collect parameters in the `layer` into a vector `out`."""
function collect_params!(out, layer::RealNVPLayer)
    k=0
    for field in [:W1, :b1, :W2, :b2, :sW1, :sb1, :sW2, :sb2]
        v = getfield(layer, field)
        nv = length(v)
        out[k+1:k+nv] .= vec(v)
        k += nv
    end
    return out
end

"""dispatch vectorized parameters `out` into the `layer`."""
function dispatch_params!(layer::RealNVPLayer, out)
    k=0
    for field in [:W1, :b1, :W2, :b2, :sW1, :sb1, :sW2, :sb2]
        v = getfield(layer, field)
        nv = length(v)
        vec(v) .= out[k+1:k+nv]
        k += nv
    end
    return out
end

function nparameters(n::RealNVPLayer)
    sum(x->length(getfield(n, x)), [:W1, :b1, :W2, :b2, :sW1, :sb1, :sW2, :sb2])
end

# Then, we define `network` and how to access the parameters.
const RealNVP{T} = Vector{RealNVPLayer{T}}

nparameters(n::RealNVP) = sum(nparameters, n)

function collect_params(n::RealNVP{T}) where T
    out = zeros(T, nparameters(n))
    k = 0
    for layer in n
        np = nparameters(layer)
        collect_params!(view(out, k+1:k+np), layer)
        k += np
    end
    return out
end

function dispatch_params!(network::RealNVP, out)
    k = 0
    for layer in network
        np = nparameters(layer)
        dispatch_params!(layer, view(out, k+1:k+np))
        k += np
    end
    return network
end

function random_realnvp(nparams::Int, nhidden::Int, nhidden_s::Int, nlayer::Int; scale=0.1)
    random_realnvp(Float64, nparams, nhidden, nhidden_s::Int, nlayer; scale=scale)
end

function random_realnvp(::Type{T}, nparams::Int, nhidden::Int, nhidden_s::Int, nlayer::Int; scale=0.1) where T
    nin = nparams÷2
    scale = T(scale)
    y1 = zeros(T, nhidden)
    sy1 = zeros(T, nhidden_s)
    RealNVPLayer{T}[RealNVPLayer(
            randn(T, nhidden, nin)*scale, randn(T, nhidden)*scale,
            randn(T, nin, nhidden)*scale, randn(T, nin)*scale, y1, zero(y1),
            randn(T, nhidden_s, nin)*scale, randn(T, nhidden_s)*scale,
            randn(T, nin, nhidden_s)*scale, randn(T, nin)*scale, sy1, zero(sy1),
            ) for _ = 1:nlayer]
end


# ## Loss function
#
# In each layer, we use the information in `x` to update `y!`.
# During computing, we use to vector type ancillas `y1` and `y1a`,
# both of them can be uncomputed at the end of the function.

@i function onelayer!(x::AbstractVector{T}, layer::RealNVPLayer{T},
                y!::AbstractVector{T}, logjacobian!::T; islast) where T
    @routine @invcheckoff begin
        ## scale network
        scale ← zero(y!)
        ytemp2 ← zero(y!)
        i_affine!(layer.sy1, layer.sW1, layer.sb1, x)
        @inbounds for i=1:length(layer.sy1)
            if (layer.sy1[i] > 0, ~)
                layer.sy1a[i] += layer.sy1[i]
            end
        end
        i_affine!(scale, layer.sW2, layer.sb2, layer.sy1a)

        ## transform network
        i_affine!(layer.y1, layer.W1, layer.b1, x)
        ## relu
        @inbounds for i=1:length(layer.y1)
            if (layer.y1[i] > 0, ~)
                layer.y1a[i] += layer.y1[i]
            end
        end
    end
    ## inplace multiply exp of scale! -- dangerous
    @inbounds @invcheckoff for i=1:length(scale)
        @routine begin
            expscale ← zero(T)
            tanhscale ← zero(T)
            if (islast, ~)
                tanhscale += tanh(scale[i])
            else
                tanhscale += scale[i]
            end
            expscale += exp(tanhscale)
        end
        logjacobian! += tanhscale
        ## inplace multiply!!!
        temp ← zero(T)
        temp += y![i] * expscale
        SWAP(temp, y![i])
        temp -= y![i] / expscale
        temp → zero(T)
        ~@routine
    end

    ## affine the transform layer
    i_affine!(y!, layer.W2, layer.b2, layer.y1a)
    ~@routine
    ## clean up accumulated rounding error, since this memory is reused.
    @safe layer.y1 .= zero(T)
    @safe layer.sy1 .= zero(T)
end

# A realnvp network always transforms inputs reversibly.
# We update one half of `x!` a time, so that input and output memory space do not clash.
@i function realnvp!(x!::AbstractVector{T}, network::RealNVP{T}, logjacobian!) where T
    @invcheckoff for i=1:length(network)
        np ← length(x!)
        if (i%2==0, ~)
            @inbounds onelayer!(x! |> subarray(np÷2+1:np), network[i], x! |> subarray(1:np÷2), logjacobian!; islast=i==length(network))
        else
            @inbounds onelayer!(x! |> subarray(1:np÷2), network[i], x! |> subarray(np÷2+1:np), logjacobian!; islast=i==length(network))
        end
        np → length(x!)
    end
end

# How to obtain the log-probability of a data.

@i function logp!(out!::T, x!::AbstractVector{T}, network::RealNVP{T}) where T
    (~realnvp!)(x!, network, out!)
    @invcheckoff for i = 1:length(x!)
        @routine begin
            xsq ← zero(T)
            @inbounds xsq += x![i]^2
        end
        out! -= 0.5 * xsq
        ~@routine
    end
end

# The negative-log-likelihood loss function

@i function nll_loss!(out!::T, cum!::T, xs!::Matrix{T}, network::RealNVP{T}) where T
    @invcheckoff for i=1:size(xs!, 2)
        @inbounds logp!(cum!, xs! |> subarray(:,i), network)
    end
    out! -= cum!/(@const size(xs!, 2))
end

# ## Training

function train(x_data, model; num_epochs = 800)
    num_vars = size(x_data, 1)
    params = collect_params(model)
    optimizer = Adam(; lr=0.01)
    for epoch = 1:num_epochs
        loss, a, b, c = nll_loss!(0.0, 0.0, copy(x_data), model)
        if epoch % 50 == 1
            println("epoch = $epoch, loss = $loss")
            display(showmodel(x_data, model))
        end
        _, _, _, gmodel = (~nll_loss!)(GVar(loss, 1.0), GVar(a), GVar(b), GVar(c))
        g = grad.(collect_params(gmodel))
        update!(params, grad.(collect_params(gmodel)), optimizer)
        dispatch_params!(model, params)
    end
    return model
end

function showmodel(x_data, model; nsamples=2000)
    scatter(x_data[1,1:nsamples], x_data[2,1:nsamples]; xlims=(-5,5), ylims=(-5,5))
    zs = randn(2, nsamples)
    for i=1:nsamples
        realnvp!(view(zs, :, i), model, 0.0)
    end
    scatter!(zs[1,:], zs[2,:])
end

# you can find the training data in `examples/` folder
x_data = Matrix(readdlm(NiLang.project_relative_path("examples", "train.dat"))')

import Random; Random.seed!(22)
model = random_realnvp(Float64, size(x_data, 1), 10, 10, 4; scale=0.1)

# Before training, the distribution looks like
# ![before](../asset/nice_before.png)
model = train(x_data, model; num_epochs=800)

# After training, the distribution looks like
# ![before](../asset/realnvp_after.png)


================================================
FILE: examples/sparse.jl
================================================
# # Sparse matrices
#
# Source to source automatic differentiation is useful in differentiating sparse matrices. It is a well-known problem that sparse matrix operations can not benefit directly from generic backward rules for dense matrices because general rules do not keep the sparse structure.
# In the following, we will show that reversible AD can differentiate the Frobenius dot product between two sparse matrices with the state-of-the-art performance. Here, the Frobenius dot product is defined as \texttt{trace(A'B)}.
# Its native Julia (irreversible) implementation is `SparseArrays.dot`.
#
# The following is a reversible counterpart

using NiLang, NiLang.AD
using SparseArrays

@i function idot(r::T, A::SparseMatrixCSC{T},B::SparseMatrixCSC{T}) where {T}
    @routine begin
        m, n ← size(A)
        branch_keeper ← zeros(Bool, 2*m)
    end
    @safe size(B) == (m,n) || throw(DimensionMismatch("matrices must have the same dimensions"))
    @invcheckoff @inbounds for j = 1:n
        @routine begin
            ia1 ← A.colptr[j]
            ib1 ← B.colptr[j]
            ia2 ← A.colptr[j+1]
            ib2 ← B.colptr[j+1]
            ia ← ia1
            ib ← ib1
        end
        @inbounds for i=1:ia2-ia1+ib2-ib1-1
            ra ← A.rowval[ia]
            rb ← B.rowval[ib]
            if (ra == rb, ~)
                r += A.nzval[ia]' * B.nzval[ib]
            end
            ## b move -> true, a move -> false
            branch_keeper[i] ⊻= @const ia == ia2-1 || ra > rb
            ra → A.rowval[ia]
            rb → B.rowval[ib]
            if (branch_keeper[i], ~)
                INC(ib)
            else
                INC(ia)
            end
        end
        ~@inbounds for i=1:ia2-ia1+ib2-ib1-1
            ## b move -> true, a move -> false
            branch_keeper[i] ⊻= @const ia == ia2-1 || A.rowval[ia] > B.rowval[ib]
            if (branch_keeper[i], ~)
                INC(ib)
            else
                INC(ia)
            end
        end
        ~@routine
    end
    ~@routine
end

# Here, the key point is using a \texttt{branch\_keeper} vector to cache branch decisions.

# The time used for a native implementation is

using BenchmarkTools
a = sprand(1000, 1000, 0.01);
b = sprand(1000, 1000, 0.01);
@benchmark SparseArrays.dot($a, $b)

# To compute the gradients, we wrap each matrix element with `GVar`, and send them to the reversible backward pass

out! = SparseArrays.dot(a, b)
@benchmark (~idot)($(GVar(out!, 1.0)),
        $(GVar.(a)), $(GVar.(b)))

# The time used for computing backward pass is approximately 1.6 times Julia's native forward pass.
# Here, we have turned off the reversibility check off to achieve better performance.
# By writing sparse matrix multiplication and other sparse matrix operations reversibly,
# we will have a differentiable sparse matrix library with proper performance.

# See my another blog post for [reversible sparse matrix multiplication](https://nextjournal.com/giggle/how-to-write-a-program-differentiably).


================================================
FILE: examples/unitary.jl
================================================
# # Unitary matrix operations without allocation
# A unitary matrix features uniform eigenvalues and reversibility. It is widely used as an approach to ease the gradient exploding and vanishing problem and the memory wall problem.
# One of the simplest ways to parametrize a unitary matrix is representing a unitary matrix as a product of two-level unitary operations. A real unitary matrix of size $N$ can be parametrized compactly by $N(N-1)/2$ rotation operations
#
# ```math
#    {\rm ROT}(a!, b!, \theta)  = \left(\begin{matrix}
#        \cos(\theta) & - \sin(\theta)\\
#        \sin(\theta)  & \cos(\theta)
#    \end{matrix}\right)
#    \left(\begin{matrix}
#        a!\\
#        b!
#    \end{matrix}\right),
# ```
#
# where $\theta$ is the rotation angle, `a!` and `b!` are target registers.

using NiLang, NiLang.AD

@i function umm!(x!, θ)
    @safe @assert length(θ) ==
            length(x!)*(length(x!)-1)/2
    k ← 0
    for j=1:length(x!)
        for i=length(x!)-1:-1:j
            k += 1
            ROT(x![i], x![i+1], θ[k])
        end
    end

    k → length(θ)
end

# Here, the ancilla `k` is deallocated manually by specifying its value, because we know the loop size is $N(N-1)/2$.
# We define the test functions in order to check gradients.

@i function isum(out!, x::AbstractArray)
    for i=1:length(x)
        out! += x[i]
    end
end

@i function test!(out!, x!::Vector, θ::Vector)
   umm!(x!, θ)
   isum(out!, x!)
end

# Let's print the program output

out, x, θ = 0.0, randn(4), randn(6);
@instr Grad(test!)(Val(1), out, x, θ)
x

# We can erease the gradient field by uncomputing the gradient function.
# If you want, you can differentiate it twice to obtain Hessians.
# However, we suggest using ForwardDifferentiation over our NiLang program, this is more efficient.

@instr (~Grad(test!))(Val(1), out, x, θ)
x

# In the above testing code, `Grad(test)` attaches a gradient field to each element of `x`. `~Grad(test)` is the inverse program that erase the gradient fields.
# Notably, this reversible implementation costs zero memory allocation, although it changes the target variables inplace.


================================================
FILE: notebooks/README.md
================================================
# How to use notebooks

1. Install Pluto notebook from [here](https://github.com/fonsp/Pluto.jl),
2. Open this file in a Pluto notebook.


================================================
FILE: notebooks/autodiff.jl
================================================
### A Pluto.jl notebook ###
# v0.14.5

using Markdown
using InteractiveUtils

# ╔═╡ f11023e5-8f7b-4f40-86d3-3407b61863d9
begin
	using PlutoUI, Viznet, Compose, Plots
	function shrink(a, b, da, db)
		d = b .- a
		r = sqrt(sum(abs2, d))
		unitd = d ./ r
		a .+ unitd .* da, b .- unitd .* db
	end
end;

# ╔═╡ ce44f8bd-692e-4eab-9ba4-055b25e40c81
using ForwardDiff: Dual

# ╔═╡ 9a46597c-b1ee-4e3b-aed1-fd2874b6e77a
using BenchmarkTools

# ╔═╡ ccd38f52-104d-434a-aea3-dd94e571374f
using NiLang

# ╔═╡ f4230251-ba54-434a-b86b-f972c7389217
using MacroTools

# ╔═╡ 69dc2685-b70f-4a81-af30-f02e0054bd52
using NiLang.AD

# ╔═╡ 200f1848-0980-4185-919a-93ab2e7f788f
using SparseArrays

# ╔═╡ 30c191c5-642b-4062-98f3-643d314a054d
using LinearAlgebra

# ╔═╡ 864dbde7-b689-4165-a08e-6bbbd72190de
using Test

# ╔═╡ a1ef579e-4b66-4042-944e-7e27c660095e
md"""
```math
\newcommand{\comment}[1]{{\bf  \color{blue}{\text{◂~ #1}}}}
```
"""

# ╔═╡ 100b4293-fd1e-4b9c-a831-5b79bc2a5ebe
begin
	# left right layout
	function leftright(a, b; width=600)
		HTML("""
<style>
table.nohover tr:hover td {
   background-color: white !important;
}</style>
			
<table width=$(width)px class="nohover" style="border:none">
<tr>
	<td>$(html(a))</td>
	<td>$(html(b))</td>
</tr></table>
""")
	end
	
	# up down layout
	function updown(a, b; width=nothing)
		HTML("""<table class="nohover" style="border:none" $(width === nothing ? "" : "width=$(width)px")>
<tr>
	<td>$(html(a))</td>
</tr>
<tr>
	<td>$(html(b))</td>
</tr></table>
""")
	end
	
	function highlight(str)
		HTML("""<span style="background-color:yellow">$(str)</span>""")
	end
end;

# ╔═╡ 9d11e058-a7d0-11eb-1d78-6592ff7a1b43
md"# An introduction to automatic differentiation

-- GiggleLiu"

# ╔═╡ b73157bf-1a77-47b8-8a06-8d6ec2045023
html"<button onclick='present()'>present</button>"

# ╔═╡ ec13e0a9-64ff-4f66-a5a6-5fef53428fa1
md"""
* What is automatic differentiation (AD)?
    * A true history of AD
    * Forward mode AD
    * Reverse mode AD 
        * primitves on tensors (including Jax, pytorch et al.)
        * primitves on elementary instructions (usually source code transformation based)
        * defined on a reversible program
* Some applications in **scientific computing**
    * solving the graph embedding problem
    * inverse engineering a hamiltonian
    * obtaining maximum independent set (MIS) configurations
    * towards differentiating `expmv` ``\comment{will be used in our emulator}``
"""

# ╔═╡ f8b0d1ce-99f7-4729-b46e-126da540cbbe
md"""
## The true history of automatic differentiation
"""

# ╔═╡ 435ac19e-1c0c-4ee5-942d-f2a97c8c4d80
md"""
* 1964 ~ Robert Edwin Wengert, A simple automatic derivative evaluation program. ``\comment{first forward mode AD}``
* 1970 ~ Seppo Linnainmaa, Taylor expansion of the accumulated rounding error. ``\comment{first backward mode AD}``
* 1986 ~ Rumelhart, D. E., Hinton, G. E., and Williams, R. J., Learning representations by back-propagating errors.
* 1992 ~ Andreas Griewank, Achieving logarithmic growth of temporal and spatial complexity in reverse automatic differentiation. ``\comment{foundation of source code transformation based AD.}``
* 2000s ~ The boom of tensor based AD frameworks for machine learning.
* 2018 ~ People re-invented AD as differential programming ([wiki](https://en.wikipedia.org/wiki/Differentiable_programming) and this [quora answer](https://www.quora.com/What-is-Differentiable-Programming).)
![](https://qph.fs.quoracdn.net/main-qimg-fb2f8470f2120eb49c8142b08d9c4132)
* 2020 ~ Me, Differentiate everything with a reversible embeded domain-specific language ``\comment{AD based on reversible programming}``.
"""

# ╔═╡ 48ecd619-d01d-43ff-8b52-7c2566c3fa2b
md"## Forward mode automatic differentiation"

# ╔═╡ 4878ce45-40ff-4fae-98e7-1be41e930e4d
md"""
Forward mode AD attaches a infitesimal number $\epsilon$ to a variable, when applying a function $f$, it does the following transformation
```math
\begin{align}
    f(x+g \epsilon) = f(x) + f'(x) g\epsilon + \mathcal{O}(\epsilon^2)
\end{align}
```

The higher order infinitesimal is ignored. 

**In the program**, we can define a *dual number* with two fields, just like a complex number
```
f((x, g)) = (f(x), f'(x)*g)
```
"""

# ╔═╡ b2c1936c-2c27-4fbb-8183-e38c5e858483
res = sin(Dual(π/4, 2.0))

# ╔═╡ 8be1b812-fcac-404f-98aa-0571cb990f34
res === Dual(sin(π/4), cos(π/4)*2.0)

# ╔═╡ 33e0c762-c75e-44aa-bfe2-bff92dd1ace8
md"
We can apply this transformation consecutively, it reflects the chain rule.
```math
\begin{align}
\frac{\partial \vec y_{i+1}}{\partial x} &= \boxed{\frac{\partial \vec y_{i+1}}{\partial \vec y_i}}\frac{\partial \vec y_i}{\partial x}\\
&\text{local Jacobian}
\end{align}
```
"

# ╔═╡ c59c35ee-1907-4736-9893-e22c052150ca
let
	lb = textstyle(:math, fontsize(8), width=0.5, height=0.5)
	tb = textstyle(:default, fontsize(10), Compose.font("monospace"))
	tb_big = textstyle(:default, fontsize(3.5), fill("white"), Compose.font("monospace"))
	nb = nodestyle(:circle, fill("white"), Compose.stroke("black"); r=0.08)
	tri = nodestyle(:triangle, Compose.stroke("transparent"), fill("black"); r=0.02)
	eb = bondstyle(:default, linewidth(0.5mm))
	ebr = bondstyle(:default, Compose.stroke("red"), linewidth(0.5mm))
	ebd = bondstyle(:default, linewidth(0.5mm), dashed=true)
	eba = bondstyle(:default, linewidth(0.5mm), Compose.arrow(), Compose.stroke("red"), Compose.fill("red"))
		
	function arrow(x, y)
		mid = (x .+ y) ./ 2
		t = nodestyle(:triangle, fill("red"), θ=π/2-atan((y .- x)...)-1π/6)
		ebr >> (x, y)
		t >> mid
	end
	
	Compose.set_default_graphic_size(15cm, 5cm)
	x = (0.1, 0.5)
	fi0 = (0.35, 0.5)
	fi1 = (0.7, 0.5)
	fi2 = (1.0, 0.5)
	img = canvas() do
		nb >> fi0
		nb >> fi1
		lb >> (fi0 .- (0.05, 0.1), "f_{i-1}")
		lb >> (fi1 .- (0.02, 0.1), "f_{i}")
		lb >> (x, "x")
		lb >> ((fi1 .+ fi0) ./ 2 .- (0.02, 0.0), raw"\vec{y}_{i}")
		lb >> ((fi1 .+ fi2) ./ 2 .- (0.05, 0.0), raw"\vec{y}_{i+1}")
		lb >> ((fi1 .+ fi2) ./ 2 .- (0.05, 0.0), "\\vec{y}_{i+1}")
		lb >> (x .- (0.00, 0.25), raw"\color{red}{1}")
		lb >> ((fi1 .+ fi0) ./ 2 .- (0.05, 0.45), raw"\color{red}{\frac{\partial \vec{y}_{i}}{\partial x}}")
		lb >> ((fi1 .+ fi2) ./ 2 .- (0.08, 0.45), raw"\color{red}{\frac{\partial \vec{y}_{i+1}}{\partial x}}")
		ebd >> (x, fi0)
		eb >> (fi0, fi1)
		eb >> (fi1, fi2)
		#arrow((fi1 .+ fi0) ./ 2 .+ (0.08, -0.3), (fi1 .+ fi2) ./ 2 .+ (-0.08, -0.3))
		arrow((fi1 .+ fi0) ./ 2 .+ (0.08, -0.3), (fi1 .+ fi2) ./ 2 .+ (-0.08, -0.3))
	end
	img
end

# ╔═╡ 0ae13734-b826-4dbf-93d1-11044ce88bd4
x_ = Dual(π/4, 1.0)

# ╔═╡ 99187515-c8be-49c2-8d70-9c2998d9993c
sin(x_)

# ╔═╡ 78ca6b08-84c4-4e4d-8412-ae6c28bfafce
md"when automatic comes in"

# ╔═╡ f12b25d8-7c78-4686-b46d-00b34e565605
let
	x = Dual(π/4, 1.0)
	z = Dual(1.1)
	for i=1:10
		x = sin(x) * z
	end
	x
end

# ╔═╡ d90c3cc9-084d-4cf7-9db7-42cea043030b
md"""
**Example:** Computing two gradients $\frac{\partial z\sin x}{\partial x}$ and $\frac{\partial \sin^2x}{\partial x}$ at one sweep
"""

# ╔═╡ 93c98cb2-18af-47df-afb3-8c5a34b4723c
let
	lb = textstyle(:math, fontsize(8), width=1.0, height=0.5)
	tb = textstyle(:default, fontsize(3.5), Compose.font("monospace"))
	tb_big = textstyle(:default, fontsize(4.5), fill("white"), Compose.font("monospace"))
	nb = nodestyle(:circle, fill("black"), Compose.stroke("transparent"); r=0.05)
	tri = nodestyle(:triangle, Compose.stroke("transparent"), fill("black"); r=0.02)
	eb = bondstyle(:default, linewidth(0.5mm))
	
	x_x = (0.1, 0.25)
	x_y = (0.9, 0.5)
	x_y2 = (0.9, 0.25)
	x_z = (0.3, 0.5)
	x_sin = (0.3, 0.25)
	x_mul = (0.5, 0.5)
	x_square = (0.5, 0.25)
	
	function arrow(x, y)
		mid = (x .+ y) ./ 2
		t = nodestyle(:triangle, θ=π/2-atan((y .- x)...)-1π/6)
		eb >> (x, y)
		t >> mid
	end

	img = canvas() do
		nb >> x_sin
		nb >> x_mul
		nb >> x_square
		tb_big >> (x_sin, "sin")
		tb_big >> (x_mul .+ (0, 0.01), "*")
		tb_big >> (x_square, "^2")
		arrow(x_sin, x_mul)
		arrow(x_x, x_sin)
		arrow(x_mul, x_y)
		arrow(x_square, x_y2)
		arrow(x_z, x_mul)
		arrow(x_sin, x_square)
		tb >> ((x_x .+ x_sin) ./ 2 .- (0.02, 0.04), "x+ϵˣ")
		tb >> ((x_sin .+ x_mul) ./ 2 .- (0.08, 0.04), "sin(x)+cos(x)*ϵˣ")
		tb >> ((x_y .+ x_mul) ./ 2 .- (-0.04, 0.055), "z*sin(x)\n+z*cos(x)*ϵˣ")
		tb >> ((x_y2 .+ x_square) ./ 2 .- (-0.04, 0.055), "sin(x)^2\n+2*sin(x)*cos(x)*ϵˣ")
		tb >> ((x_z .+ x_mul) ./ 2 .- (0.05, 0.02), "z")
	end
	
	Compose.set_default_graphic_size(100mm, 100mm/2)
	Compose.compose(context(0, -0.15, 1, 2), img)
end

# ╔═╡ 2dc74e15-e2ea-4961-b43f-0ada1a73d80a
md"so the gradients are $z\cos x$ and $2\sin x\cos x$"

# ╔═╡ 7ee75a15-eaea-462a-92b6-293813d2d4d7
md"""
**What if we want to compute gradients for multiple inputs?**

The computing time grows **linearly** as the number of variables that we want to differentiate. But does not grow significantly with the number of outputs.
"""

# ╔═╡ 02a25b73-7353-43b1-8738-e7ca472d0cc7
md"""
## Reverse mode automatic differentiation

"""

# ╔═╡ 2afb984f-624e-4381-903f-ccc1d8a66a17
md"On the other side, the back-propagation can differentiate **many inputs** with respect to a **single output** efficiently"

# ╔═╡ 7e5d5e69-90f2-4106-8edf-223c150a8168
md"""
```math
\begin{align}
    \frac{\partial \mathcal{L}}{\partial \vec y_i} = \frac{\partial \mathcal{L}}{\partial \vec y_{i+1}}&\boxed{\frac{\partial \vec y_{i+1}}{\partial \vec y_i}}\\
&\text{local jacobian?}
\end{align}
```
"""

# ╔═╡ 92d7a938-9463-4eee-8839-0b8c5f762c79
let
	lb = textstyle(:math, fontsize(8), width=0.5, height=0.5)
	tb = textstyle(:default, fontsize(10), Compose.font("monospace"))
	tb_big = textstyle(:default, fontsize(3.5), fill("white"), Compose.font("monospace"))
	nb = nodestyle(:circle, fill("white"), Compose.stroke("black"); r=0.08)
	tri = nodestyle(:triangle, Compose.stroke("transparent"), fill("black"); r=0.02)
	eb = bondstyle(:default, linewidth(0.5mm))
	ebr = bondstyle(:default, Compose.stroke("red"), linewidth(0.5mm))
	ebd = bondstyle(:default, linewidth(0.5mm), dashed=true)
	eba = bondstyle(:default, linewidth(0.5mm), Compose.arrow(), Compose.stroke("red"), Compose.fill("red"))
		
	function arrow(x, y)
		mid = (x .+ y) ./ 2
		t = nodestyle(:triangle, fill("red"), θ=π/2-atan((y .- x)...)-1π/6)
		ebr >> (x, y)
		t >> mid
	end
	
	Compose.set_default_graphic_size(15cm, 5cm)
	x = (0.1, 0.5)
	fi0 = (0.35, 0.5)
	fi1 = (0.7, 0.5)
	fi2 = (0.9, 0.5)
	img = canvas() do
		nb >> fi0
		nb >> fi1
		lb >> (fi0 .- (0.02, 0.1), "f_{i}")
		lb >> (fi1 .- (0.05, 0.1), "f_{i+1}")
		lb >> (fi2 .- (0.05, 0.0), raw"\mathcal{L}")
		lb >> ((fi0 .+ x) ./ 2 .- (0.05, 0.0), raw"\vec{y}_{i}")
		lb >> ((fi0 .+ fi1) ./ 2 .- (0.05, 0.0), raw"\vec{y}_{i+1}")
		lb >> ((fi0 .+ fi1) ./ 2 .- (0.05, 0.0), "\\vec{y}_{i+1}")
		lb >> (fi2 .- (0.05, 0.25), raw"\color{red}{1}")
		lb >> ((fi0 .+ x) ./ 2 .- (0.08, 0.45), raw"\color{red}{\frac{\partial \mathcal{L}}{\partial \vec{y}_{i}}}")
		lb >> ((fi0 .+ fi1) ./ 2 .- (0.08, 0.45), raw"\color{red}{\frac{\partial \mathcal{L}}{\partial \vec{y}_{i+1}}}")
		ebd >> (fi1, fi2)
		eb >> (fi0, fi1)
		eb >> (x, fi0)
		#arrow((fi1 .+ fi0) ./ 2 .+ (0.08, -0.3), (fi1 .+ fi2) ./ 2 .+ (-0.08, -0.3))
		arrow( (fi0 .+ fi1) ./ 2 .+ (-0.08, -0.3), (fi0 .+ x) ./ 2 .+ (0.05, -0.3),)
	end
	img
end

# ╔═╡ 4b1a0b59-ddc6-4b2d-b5f5-d92084c31e46
md"### How to visit local Jacobians in the reversed order? "

# ╔═╡ 81f16b8b-2f0b-4ba3-8c26-6669eabf48aa
md"The naive approach is to store everything."

# ╔═╡ fb6c3a48-550a-4d2e-a00b-a1e40d86b535
md"""
**Example:** Computing the gradient $\frac{\partial z\sin x}{\partial x}$ and $\frac{\partial z\sin x}{\partial z}$ by back propagating cached local information.
"""

# ╔═╡ ab6fa4ac-29ed-4722-88ed-fa1caf2072f3
let
	lb = textstyle(:math, fontsize(10), width=1.0, height=0.5)
	tb = textstyle(:default, fontsize(3.5), Compose.font("monospace"))
	tbc = textstyle(:default, fontsize(3.5), fill("red"), Compose.font("monospace"))
	tb_big = textstyle(:default, fontsize(4), fill("white"), Compose.font("monospace"))
	nb = nodestyle(:circle, fill("black"), Compose.stroke("transparent"); r=0.05)
	tri = nodestyle(:triangle, Compose.stroke("transparent"), fill("black"); r=0.02)
	eb = bondstyle(:default, linewidth(0.5mm))
	
	x_x = (0.1, 0.2)
	x_y = (0.9, 0.5)
	x_z = (0.1, 0.7)
	x_sin = (0.3, 0.3)
	x_mul = (0.5, 0.5)

	function arrow(x, y)
		mid = (x .+ y) ./ 2
		t = nodestyle(:triangle, θ=π/2-atan((y .- x)...)-1π/6)
		eb >> (x, y)
		t >> mid
	end
	img1 = canvas() do
		nb >> x_sin
		nb >> x_mul
		tb_big >> (x_sin, "sin")
		tb_big >> (x_mul .+ (0, 0.01), "*")
		arrow(x_sin, x_mul)
		arrow(x_x, x_sin)
		arrow(x_mul, x_y)
		arrow(x_z, x_mul)
		tb >> ((x_x .+ x_sin) ./ 2 .- (0.0, 0.1), "x \n push(Σ,x)")
		tb >> ((x_sin .+ x_mul) ./ 2 .- (-0.15, 0.04), "s = sin(x) \n push(Σ,s)")
		tb >> ((x_y .+ x_mul) ./ 2 .- (-0.05, 0.04), "y = z*sin(x)")
		tb >> ((x_z .+ x_mul) ./ 2 .- (0.05, 0.07), "z\n push(Σ,z)")
	end
	img2 = canvas() do
		nb >> x_sin
		nb >> x_mul
		tb_big >> (x_sin, "sin")
		tb_big >> (x_mul .+ (0, 0.01), "*")
		arrow(x_mul, x_sin)
		arrow(x_sin, x_x)
		arrow(x_y, x_mul)
		arrow(x_mul, x_z)
		tb >> ((x_x .+ x_sin) ./ 2 .- (0.0, 0.1), "x = pop(Σ)\nx̄ = cos(x)*s̄")
		tb >> ((x_sin .+ x_mul) ./ 2 .- (-0.12, 0.04), "z = pop(Σ)\ns̄ = z*ȳ")
		tb >> ((x_y .+ x_mul) ./ 2 .- (-0.05, 0.06), "y\nȳ=1")
		tb >> ((x_z .+ x_mul) ./ 2 .- (0.05, 0.07), "s = pop(Σ)\nz̄ = s*ȳ")
	end
	
	Compose.set_default_graphic_size(150mm, 75mm/1.4)
	Compose.compose(context(), 
	(context(0, -0.1, 0.5, 1.4), img1),
	(context(0.5, -0.1, 0.5, 1.4), img2)
	)
end

# ╔═╡ 8e72d934-e307-4505-ac82-c06734415df6
md"Here, we use $\overline y$ for $\frac{\partial \mathcal{L}}{\partial y}$, which is also called the adjoint."

# ╔═╡ e6ff86a9-9f54-474b-8111-a59a25eda506
md"### Primitives on different scales"

# ╔═╡ 9c1d9607-a634-4350-aacd-2d40984d647d
md"We call the leaf nodes defining AD rules \"**primitives**\""

# ╔═╡ 63db2fa2-50b2-4940-b8ee-0dc6e3966a57
md"
**Design Decision**

* A: If we define primitives on **arrays**, we need tons of manually defined backward rules. (Jax, Pytorch, Zygote.jl, ReverseDiff.jl et al.)
* B: If we define primitives on **scalar instructions**, we will have worse tensor performance. (Tapenade, Adept, NiLang et al.)

*Note*: Here, implementing AD on scalars means specifically the **optimal checkpointing** approach, rather than a package like Jax, Zygote and ReverseDiff that having scalar support.
"

# ╔═╡ 693167e7-e80c-401d-af89-55b5fae30848
let
	w, h = 0.22, 0.1
	lb = Compose.compose(context(), polygon([(-w, -h), (-w, h), (w, h), (w, -h)]), Compose.stroke("transparent"))
	lb2 = Compose.compose(context(), polygon([(-w, -h), (-w, h), (w, h), (w, -h)]), Compose.stroke("transparent"), fill("red"))
	tb = Compose.compose(context(), Compose.text(0.0, 0.0, ""), fontsize(3), Compose.font("monospace"))
	tb_big = textstyle(:default, fontsize(3), fill("white"), Compose.font("monospace"))
	eb = bondstyle(:default, linewidth(0.5mm))
	ar = bondstyle(:default, linewidth(0.3mm), Compose.arrow())
	xprog = (0.25, 0.15)
	xtensors = (0.25, 0.5)
	t1 = (0.5, 0.15)
	t2 = (0.5, 0.5)
	t3 = (0.5, 0.85)
	xscalars2 = (0.25, 0.85)
	
	function box(loc, text; color="black")
		(color=="black" ? lb : lb2) >> loc
		tb_big >> (loc, text)
	end
	Compose.set_default_graphic_size(10cm, 5cm)
	canvas() do
		box(xprog, "Program")
		ar >> (xprog, xtensors .+ (0, -h-0.03))
		#ar >> (xprog, xscalars .+ (-w/2, -h-0.03))
		ar >> (xtensors, xscalars2 .+ (0, -h-0.05))
		box(xtensors, "Functions on arrays")
		#box(xscalars, "Functions on Scalars")
		box(xscalars2, "Finite instructions"; color="red")
		tb >> (t1, "Neural networks")
		tb >> (t2, "matrix multiplication")
		tb >> (t3, "+, -, *")
	end
end

# ╔═╡ 4cd70901-2142-4868-9a33-c46ca0d064ec
html"""
<table>
<tr>
<th width=200></th>
<th width=300>on tensors</th>
<th width=300>on finite instructions</th>
</tr>
<tr style="vertical-align:top">
<td>meaning</td>
<td>defining backward rules manully for functions on tensors</td>
<td>defining backward rules on a limited set of basic scalar operations, and generate gradient code using source code transformation</td>
</tr>
<tr style="vertical-align:top">
<td>pros and cons</td>
<td>
<ol>
<li style="color:green">Good tensor performance</li>
<li style="color:green">Mature machine learning ecosystem</li>
<li style="color:red">Need to define backward rules manually</li>
</ol>
</td>
<td>
<ol>
<li style="color:green">Reasonalbe scalar performance</li>
<li style="color:red">hard to utilize GPU kernels (except NiLang.jl) and BLAS</li>
</ol>
</td>
<td>
</td>
</tr>
<tr style="vertical-align:top">
<td>packages</td>
<td>Jax<br>PyTorch</td>
<td><a href="http://tapenade.inria.fr:8080/tapenade/">Tapenade</a><br>
<a href="http://www.met.reading.ac.uk/clouds/adept/">Adept</a><br>
<a href="https://github.com/GiggleLiu/NiLang.jl">NiLang.jl</a>
</td>
</tr>
</table>
"""

# ╔═╡ 89018a35-76f4-4f23-b15a-a600db046d6f
md"## A book"

# ╔═╡ 1d219222-0778-4c37-9182-ed5ccbb3ef32
leftright(html"""
<img src="https://images-na.ssl-images-amazon.com/images/I/51+dn97bfKL._SY344_BO1,204,203,200_.jpg"/>
""", md"**Evaluating derivatives: principles and techniques of algorithmic differentiation**
	
By: Griewank, Andreas, and Andrea Walther
(2008)")

# ╔═╡ 4ff09f7c-aeac-48bd-9d58-8446137c3acd
md"""
## The AD ecosystem in Julia

Please check JuliaDiff: [https://juliadiff.org/](https://juliadiff.org/)

A short list:
* Forward mode AD: ForwardDiff.jl
* Reverse mode AD (tensor): ReverseDiff.jl/Zygote.jl
* Reverse mode AD (scalar): NiLang.jl

Warnings
* The main authors of `Tracker`, `ReverseDiff` and `Zygote` are not maintaining them anymore.
"""
#=
|       |   Rules | Favors Tensor? | Type |
| ---- | ---- | --- | --- |
|  Zygote   |  C  |  ✓   |   R     |
|  ReverseDiff  |  D    | ✓    | R |
|  Nabla   |  D→C  |   ✓  |   R     |
|  Tracker  |  D    | ✓    | R |
|  Yota   |  C  |  ✓   |     R   |
|  NiLang   |  -  |  ×   |  R      |
|  Enzyme   |  -  |  ×   |  R      |
|  ForwardDiff   |  -  |  ×   |    F    |
|  Diffractor   |  ?  |  ?   |  ?      |

* R: reverse mode
* F: forward mode
* C: ChainRules
* D: DiffRules
"""
=#

# ╔═╡ ea44037b-9359-4fbd-990f-529d88d54351
md"# Quick summary
1. The history of AD is longer than many people have thought. People are most familar with *reverse mode AD with primitives implemented on tensors* that brings the boom of machine learning. There are also AD frameworks that can differentiate a general program directly, which does not require users defining AD rules manually.
2. **Forward mode AD** propagate gradients forward, it has a computational overhead propotional to the number of input parameters.
2. **Backward mode AD** propagate gradients backward, it has a computational overhead propotional to the number of output parameters.
    * primitives on **tensors** v.s. **scalars**
    * it is very expensive to reverse the program
4. Julia has one of the most active AD community!

#### Forward v.s. Backward
when is forward mode AD more useful?

* It is often combined with backward mode AD for obtaining Hessians (forward over backward).
* Having <20 input parameters.

when is backward mode AD more useful?
* In most variational optimizations, especially when we are training a neural network with ~ 100M parameters.
"

# ╔═╡ e731a8e3-6462-4a60-83e9-6ab7ddfff50e
md"# How do AD libraries work?"

# ╔═╡ 685c2b28-b071-452c-a881-801128dcb6c3
md"`ForwardDiff` is operator overloading based, many of its overheads can be optimized by Julia's JIT compiler."

# ╔═╡ 177ddfc2-2cbe-4dba-9d05-2857633dd1ae
md"# [Tapenade](http://tapenade.inria.fr:8080/tapenade/index.jsp)

![](http://tapenade.inria.fr:8080/tapenade/tapenadelogo.gif)"

# ╔═╡ 6c2a3a93-385f-4758-9b6e-4cb594a8e856
md"## Example 1: Bessel Example"

# ╔═╡ fb8168c2-8489-418b-909b-cede57b5ae64
md"bessel.f90"

# ╔═╡ fdb39284-dbb1-49fa-9a1c-f360f9e6b765
md"""
```fortran
subroutine besselj(res, v, z, atol)
    implicit none
	integer, intent(in) :: v
	real*8, intent(in) :: z, atol
	real*8, intent(out) :: res
	real*8 :: s
	integer :: k, i, factv
    k = 0
    factv = 1
    do i = 2,v
        factv = factv * i
    enddo

    s = (z/2.0)**v / factv
    res = s
    do while(abs(s) > atol)
        k = k + 1
        s = -s / k / (k+v) * ((z/2) ** 2)
        res = res + s
    enddo
endsubroutine besselj
```
"""

# ╔═╡ 60214f22-c8bb-4a32-a882-4e6c727b29a9
md"""
besselj_d.f90 (forward mode)
```fortran
!        Generated by TAPENADE     (INRIA, Ecuador team)
!  Tapenade 3.15 (master) - 15 Apr 2020 11:54
!
!  Differentiation of besselj in forward (tangent) mode:
!   variations   of useful results: res
!   with respect to varying inputs: z
!   RW status of diff variables: res:out z:in
SUBROUTINE BESSELJ_D(res, resd, v, z, zd, atol)
  IMPLICIT NONE
  INTEGER, INTENT(IN) :: v
  REAL*8, INTENT(IN) :: z, atol
  REAL*8, INTENT(IN) :: zd
  REAL*8, INTENT(OUT) :: res
  REAL*8, INTENT(OUT) :: resd
  REAL*8 :: s
  REAL*8 :: sd
  INTEGER :: k, i, factv
  INTRINSIC ABS
  REAL*8 :: abs0
  REAL*8 :: pwx1
  REAL*8 :: pwx1d
  REAL*8 :: pwr1
  REAL*8 :: pwr1d
  INTEGER :: temp
  k = 0
  factv = 1
  DO i=2,v
    factv = factv*i
  END DO
  pwx1d = zd/2.0
  pwx1 = z/2.0
  IF (pwx1 .LE. 0.0 .AND. (v .EQ. 0.0 .OR. v .NE. INT(v))) THEN
    pwr1d = 0.0_8
  ELSE
    pwr1d = v*pwx1**(v-1)*pwx1d
  END IF
  pwr1 = pwx1**v
  sd = pwr1d/factv
  s = pwr1/factv
  resd = sd
  res = s
  DO WHILE (.true.)
    IF (s .GE. 0.) THEN
      abs0 = s
    ELSE
      abs0 = -s
    END IF
    IF (abs0 .GT. atol) THEN
      k = k + 1
      temp = k*(k+v)*(2*2)
      sd = -((z**2*sd+s*2*z*zd)/temp)
      s = -(s*(z*z)/temp)
      resd = resd + sd
      res = res + s
    ELSE
      EXIT
    END IF
  END DO
END SUBROUTINE BESSELJ_D
```

besselj_b.f90 (backward mode)
```fortran
!        Generated by TAPENADE     (INRIA, Ecuador team)
!  Tapenade 3.15 (master) - 15 Apr 2020 11:54
!
!  Differentiation of besselj in reverse (adjoint) mode:
!   gradient     of useful results: res z
!   with respect to varying inputs: res z
!   RW status of diff variables: res:in-zero z:incr
SUBROUTINE BESSELJ_B(res, resb, v, z, zb, atol)
  IMPLICIT NONE
  INTEGER, INTENT(IN) :: v
  REAL*8, INTENT(IN) :: z, atol
  REAL*8 :: zb
  REAL*8 :: res
  REAL*8 :: resb
  REAL*8 :: s
  REAL*8 :: sb
  INTEGER :: k, i, factv
  INTRINSIC ABS
  REAL*8 :: abs0
  REAL*8 :: tempb
  INTEGER :: ad_count
  INTEGER :: i0
  INTEGER :: branch
  k = 0
  factv = 1
  DO i=2,v
    factv = factv*i
  END DO
  s = (z/2.0)**v/factv
  ad_count = 1
  DO WHILE (.true.)
    IF (s .GE. 0.) THEN
      abs0 = s
    ELSE
      abs0 = -s
    END IF
    IF (abs0 .GT. atol) THEN
      CALL PUSHINTEGER4(k)
      k = k + 1
      CALL PUSHREAL8(s)
      s = -(s/k/(k+v)*(z/2)**2)
      ad_count = ad_count + 1
    ELSE
      GOTO 100
    END IF
  END DO
  CALL PUSHCONTROL1B(0)
  GOTO 110
 100 CALL PUSHCONTROL1B(1)
 110 DO i0=1,ad_count
    IF (i0 .EQ. 1) THEN
      CALL POPCONTROL1B(branch)
      IF (branch .EQ. 0) THEN
        sb = 0.0_8
      ELSE
        sb = 0.0_8
      END IF
    ELSE
      sb = sb + resb
      CALL POPREAL8(s)
      tempb = -(sb/(k*(k+v)*2**2))
      sb = z**2*tempb
      zb = zb + 2*z*s*tempb
      CALL POPINTEGER4(k)
    END IF
  END DO
  sb = sb + resb
  IF (.NOT.(z/2.0 .LE. 0.0 .AND. (v .EQ. 0.0 .OR. v .NE. INT(v)))) zb = &
&     zb + v*(z/2.0)**(v-1)*sb/(2.0*factv)
  resb = 0.0_8
END SUBROUTINE BESSELJ_B
```
"""

# ╔═╡ 7a6dbe09-cb7f-405f-b9b5-b350ca170e5f
md"## Example 2: Matrix multiplication"

# ╔═╡ 5dc4a849-76dd-4c4f-8828-755671839e5e
md"""
matmul_b.f90
```fortran
!        Generated by TAPENADE     (INRIA, Ecuador team)
!  Tapenade 3.16 (develop) -  9 Apr 2021 17:40
!
!  Differentiation of mymatmul in reverse (adjoint) mode:
!   gradient     of useful results: x y z
!   with respect to varying inputs: x y z
!   RW status of diff variables: x:incr y:incr z:in-out
SUBROUTINE MYMATMUL_B(z, zb, x, xb, y, yb, m, n, o)
  IMPLICIT NONE
  INTEGER, INTENT(IN) :: m, n, o
  REAL*8, DIMENSION(:, :) :: z(m, n)
  REAL*8 :: zb(m, n)
  REAL*8, DIMENSION(:, :), INTENT(IN) :: x(m, o), y(o, n)
  REAL*8 :: xb(m, o), yb(o, n)
  REAL*8 :: temp
  REAL*8 :: tempb
  INTEGER :: i, j, k
  DO j=n,1,-1
    DO i=m,1,-1
      tempb = zb(i, j)
      zb(i, j) = 0.0_8
      DO k=o,1,-1
        xb(i, k) = xb(i, k) + y(k, j)*tempb
        yb(k, j) = yb(k, j) + x(i, k)*tempb
      END DO
    END DO
  END DO
END SUBROUTINE MYMATMUL_B
```
"""

# ╔═╡ b053f11b-9ed7-47ff-ab32-0c70b87e71ed
md"## Example 3: Pyramid"

# ╔═╡ 7b1aa6dd-647f-44cb-b580-b58e23e8b5a6
html"""
<img src="https://user-images.githubusercontent.com/6257240/117090732-228e1a00-ad27-11eb-8231-09c462a17dc7.png" width=500/>
"""

# ╔═╡ b96bac75-b4ad-45f7-aeec-cb6a387eebf0
md"You will see a lot allocation"

# ╔═╡ 5fe022eb-6a17-466e-a6d0-d67e82af23cd
md"pyramid.f90"

# ╔═╡ 92047e95-7eba-4021-9668-9bb4b92261d7
md"""
```fortran
!  Differentiation of pyramid in reverse (adjoint) mode:
!   gradient     of useful results: v x
!   with respect to varying inputs: v x
!   RW status of diff variables: v:in-out x:incr
SUBROUTINE PYRAMID_B(v, vb, x, xb, n)
  IMPLICIT NONE
  INTEGER, INTENT(IN) :: n
  REAL*8 :: v(n, n)
  REAL*8 :: vb(n, n)
  REAL*8, INTENT(IN) :: x(n)
  REAL*8 :: xb(n)
  INTEGER :: i, j
  INTRINSIC SIN
  INTRINSIC COS
  INTEGER :: ad_to
  DO j=1,n
    v(1, j) = x(j)
  END DO
  DO i=1,n-1
    DO j=1,n-i
      CALL PUSHREAL8(v(i+1, j))
      v(i+1, j) = SIN(v(i, j))*COS(v(i, j+1))
    END DO
    CALL PUSHINTEGER4(j - 1)
  END DO
  DO i=n-1,1,-1
    CALL POPINTEGER4(ad_to)
    DO j=ad_to,1,-1
      CALL POPREAL8(v(i+1, j))
      vb(i, j) = vb(i, j) + COS(v(i, j))*COS(v(i, j+1))*vb(i+1, j)
      vb(i, j+1) = vb(i, j+1) - SIN(v(i, j+1))*SIN(v(i, j))*vb(i+1, j)
      vb(i+1, j) = 0.0_8
    END DO
  END DO
  DO j=n,1,-1
    xb(j) = xb(j) + vb(1, j)
    vb(1, j) = 0.0_8
  END DO
END SUBROUTINE PYRAMID_B
```
"""

# ╔═╡ e2ae1084-8759-4f27-8ad1-43a88e434a3d
md"## How does NiLang avoid too many allocation?"

# ╔═╡ edd3aea8-abdb-4e12-9ef9-12ac0fff835b
@i function pyramid!(y!, v!, x::AbstractVector{T}) where T
    @safe @assert size(v!,2) == size(v!,1) == length(x)
    @inbounds for j=1:length(x)
        v![1,j] += x[j]
    end
    @invcheckoff @inbounds for i=1:size(v!,1)-1
        for j=1:size(v!,2)-i
            @routine begin
                @zeros T c s
                c += cos(v![i,j+1])
                s += sin(v![i,j])
            end
            v![i+1,j] += c * s
            ~@routine
        end
    end
    y! += v![end,1]
end

# ╔═╡ a2904efb-186c-449d-b1aa-caf530f88e91
@i function power(x3, x)
	@routine begin
		x2 ← zero(x)
		x2 += x^2
	end
	x3 += x2 * x
	~@routine
end

# ╔═╡ 14faaf82-ad3e-4192-8d48-84adfa30442d
ex = NiLangCore.precom_ex(NiLang, :(for j=1:size(v!,2)-i
            @routine begin
                @zeros T c s
                c += cos(v![i,j+1])
                s += sin(v![i,j])
            end
            v![i+1,j] += c * s
            ~@routine
		end)) |> NiLangCore.rmlines

# ╔═╡ 5d141b88-ec07-4a02-8eb3-37405e5c9f5d
NiLangCore.dual_ex(NiLang, ex)

# ╔═╡ 0907e683-f216-4cf6-a210-ae5181fdc487
function pyramid0!(v!, x::AbstractVector{T}) where T
    @assert size(v!,2) == size(v!,1) == length(x)
    for j=1:length(x)
        v![1,j] = x[j]
    end
    @inbounds for i=1:size(v!,1)-1
        for j=1:size(v!,2)-i
            v![i+1,j] = cos(v![i,j+1]) * sin(v![i,j])
        end
    end
end

# ╔═╡ 0bbfa106-f465-4a7b-80a7-7732ba435822
x = randn(20);

# ╔═╡ 805c7072-98fa-4086-a69d-2e126c55af36
let
	@benchmark pyramid0!(v, x) seconds=1 setup=(x=randn(1000); v=zeros(1000, 1000))
end

# ╔═╡ 7e527024-c294-4c16-8626-9953588d9b6a
let
	@benchmark pyramid!(0.0, v, x) seconds=1 setup=(x=10*randn(1000); v=zeros(1000, 1000))
end

# ╔═╡ 3e59c65a-ceed-42ed-be64-a6964db016e7
pyramid!(0.0, zeros(20, 20), x)

# ╔═╡ 29f85d05-99fd-4843-9be0-5663e681dad7
html"""<img src="https://github.com/GiggleLiu/NiLang.jl/blob/master/examples/pyramid-benchmark.png?raw=true" width=500/>
"""

# ╔═╡ e7830e55-bd9e-4a8a-9239-4191a5f0b1d1
let
	@benchmark NiLang.AD.gradient(Val(1), pyramid!, (0.0, v, x)) seconds=1 setup=(x=randn(1000); v=zeros(1000, 1000))
end

# ╔═╡ de2cd247-ba68-4ba4-9784-27a743478635
md"## NiLang's implementation"

# ╔═╡ dc929c23-7434-4848-847a-9fa696e84776
md"""
```math
\begin{align}
&v_{−1} &= & x_1 &=&1.5000\\
&v_0 &= & x_2 &=&0.5000\\
&v_1 &= & v_{−1}/v_0 &=&1.5000/0.5000 &= 3.0000\\
&v_2 &= & \sin(v1)&=& \sin(3.0000) &= 0.1411\\
&v_3 &= & \exp(v0)&=& \exp(0.5000) &= 1.6487\\
&v_4 &= & v_1 − v_3 &=&3.0000 − 1.6487 &= 1.3513\\
&v_5 &= & v_2 + v_4 &=&0.1411 + 1.3513 &= 1.4924\\
&v_6 &= & v_5 ∗ v_4 &=&1.4924 ∗ 1.3513 &= 2.0167\\
&y &= & v_6 &=&2.0167
\end{align}
```
"""

# ╔═╡ 4f1df03f-c315-47b1-b181-749e1231594c
html"""
<img src="https://user-images.githubusercontent.com/6257240/117074233-168f6180-ad01-11eb-8b16-7ae9836cfdcd.png" width=400/>
"""

# ╔═╡ 7eccba6a-3ad5-440b-9c5d-392dc8dc7aba
@i function example_linear(y::T, x1::T, x2::T) where T
	@routine begin
		@zeros T v1 v2 v3 v4 v5
		v1 += x1 / x2
		v2 += sin(v1)
		v3 += exp(x2)
		v4 += v1 - v3
		v5 += v2 + v4
	end
	y += v5 * v4
	~@routine
end

# ╔═╡ 4a858a3e-ce28-4642-b061-3975a3ed99ff
md"NOTES:
* a statement changes values inplace directly,
* no return statement, returns the input arguments directly
* `@routine <compute>; <copy statements>; ~@routine` is the Bennett's compute copy uncompute design pattern
"

# ╔═╡ 674bb3bb-637b-44f2-bf6d-d1678da03fbd
PlusEq(identity)(2, 3)

# ╔═╡ 5a59d96f-b2f1-4564-82c7-7f0fe181afb8
prettify(@macroexpand @i function f(y::T, x::T) where T
	y.re += x.re
end)

# ╔═╡ 55d2f8ee-4f77-4d44-b704-30643dbbab84
@i function f3(y::T, x::T) where T
	y.re += x.re
end

# ╔═╡ 14951168-97c2-43ae-8d5e-5506408a2bb2
f3(1+2im, 2+3im)

# ╔═╡ 4f564581-6032-449c-8b15-3c741f44237a
x5 = GVar(3+4.0im)

# ╔═╡ a36516e8-76c1-4bff-8a12-3e1e621b857d
~example_linear

# ╔═╡ 402b861c-d363-4d23-b9e9-eb088f57b5c4
expre = NiLangCore.precom_ex(@__MODULE__, :(begin
	@routine begin
		@zeros T v1 v2 v3 v4 v5
		v1 += x1 / x2
		v2 += sin(v1)
		v3 += exp(x2)
		v4 += v1 - v3
		v5 += v2 + v4
	end
	y += v5 * v4
	~@routine
end), NiLangCore.PreInfo(Symbol[])) |> NiLangCore.rmlines

# ╔═╡ 63975a80-1b41-4f55-91a1-4a316ad7bf26
example_linear(0.0, 1.5, 0.5)

# ╔═╡ 6f688f88-432a-42b2-a2db-19d6bb282e0a
NiLangCore.dual_ex(@__MODULE__, expre)

# ╔═╡ fb46db14-f7e0-4f01-9096-02334c62942d
(~example_linear)(example_linear(0.0, 1.5, 0.5)...)

# ╔═╡ b2c3db3d-c250-4daa-8453-3c9a2734aede
md"**How to get gradients?**"

# ╔═╡ 9a986264-5ba7-4697-a00d-711f8efe29f0
let
	y, x1, x2 = 0.0, 1.5, 0.5
	# compute
	(y_out, x1_out, x2_out) = example_linear(y, x1, x2)
	
	# wrap elements with GVar
	y_out_with_g = GVar(y_out, 1.0)
	x1_out_with_g = GVar(x1_out, 0.0)
	x2_out_with_g = GVar(x2_out, 0.0)
	
	# uncompute
	(y_with_g, x1_with_g, x2_with_g) = (~example_linear)(y_out_with_g, x1_out_with_g, x2_out_with_g)
	
	# get gradients
	grad(y_with_g), grad(x1_with_g), grad(x2_with_g)
end

# ╔═╡ 560cf3e9-0c14-4497-85b9-f07045eea32a
with_terminal() do
	dump(GVar)
end

# ╔═╡ 8ab79efc-e8d0-4c6f-81df-a89008142bb7
gvar1 = GVar(1.5, 0.0)

# ╔═╡ 0eec318c-2c09-4dd6-9187-9c0273d29915
grad(gvar1)

# ╔═╡ 1f0ef29c-0ad5-4d97-aeed-5ff44e86577a
gvar2 = GVar(1.0, 2.0)

# ╔═╡ 603d8fc2-5e7b-4d55-92b6-208b25ea6569
grad(gvar2)

# ╔═╡ 2b3c765e-b505-4f07-9bcb-3c8cc47364ad
md"To differentiate operation `y += exp(x)`, we bind the backward rule on its inverse `y -= exp(x)`, i.e. `MinusEq(exp)` in the program."

# ╔═╡ e0f266da-7e65-4398-bfd4-a6c0b54e626b
MinusEq(exp)(gvar2, gvar1)

# ╔═╡ e1d35886-79d0-40a5-bd33-1c4e5f4a0a9a
md"""
```math
\left(\begin{matrix}\overline y& \overline x\end{matrix}\right) \rightarrow \left(\begin{matrix}\overline y& \overline x\end{matrix}\right)\left(\begin{matrix}
1 & \exp(x) \\
0 & 1
\end{matrix}\right) = \left(\begin{matrix}\overline y& \overline x + \exp(x) \overline y\end{matrix}\right)
```
"""

# ╔═╡ b63a30b0-c75b-4998-a2b2-0b79574cab81
exp(1.5) * 2

# ╔═╡ 139bf020-c4a8-45c8-96fa-aeebc7ddaedc
md"*one line version*"

# ╔═╡ 8967c0f0-89f8-4893-b11b-253333d1a823
NiLang.AD.gradient(example_linear, (0.0, 1.5, 0.5); iloss=1)

# ╔═╡ f2540450-5a07-4fb8-93fb-a6d48dd36a56
md"## Control Flows"

# ╔═╡ 3acb2cfd-fa29-4a2b-8f23-f5aaf474edd0
(@code_julia for i=1:10
	x += y
end) |> NiLangCore.rmlines

# ╔═╡ aa1547f2-5edd-4b7e-b93e-bdfc4e4fc6d5
md"""# Memory Management"""

# ╔═╡ 6e76a107-4f51-4e32-b133-7b6e04d7d107
md"The true reverse mode autodiff has to handle the memory wall problem."

# ╔═╡ 999f7a8f-d72e-4ccd-8cbf-b5bbb7db1842
md"""
## Checkpointing
"""

# ╔═╡ 32772c2a-6b80-4779-963c-06974ff0d832
html"""
<img src="https://raw.githubusercontent.com/GiggleLiu/WuLiXueBao/master/paper/tikzimg-1.svg" style="clip-path: inset(0px 300px 40px 0px); margin-left:40px;" width=600/>
"""

# ╔═╡ 41642bd5-1321-490a-95ad-4c1d6363456f
md"
* red arrow: back propagation
* black dot: cached
* white dot: not cached
"

# ╔═╡ 2a553e32-05ef-4c2d-aba7-41185c6035d4
md"Most time efficient (checkpoint every step)"

# ╔═╡ ab8345ce-e038-4d6b-9e1f-57e4f33bb67b
html"""
<img src="https://raw.githubusercontent.com/GiggleLiu/WuLiXueBao/master/paper/tikzimg3-1.svg" style="clip-path: inset(0px 0px 0px 0px); margin-left:40px;" width=300/>
"""

# ╔═╡ bb9c9a4c-601a-4708-9b2d-04d1583938f2
md"Most space efficient (only checkpoint the first step)"

# ╔═╡ b9917e94-c33d-423f-a478-3252bacc2494
html"""
<img src="https://raw.githubusercontent.com/GiggleLiu/WuLiXueBao/master/paper/tikzimg4-1.svg" style="clip-path: inset(0px 0px 0px 0px); margin-left:40px;" width=300/>
"""

# ╔═╡ 4978f404-11ff-41b8-a673-f2d051b1f526
md"Restricting the number of checkpoints, is evenly checkpointed program optimal?"

# ╔═╡ 73bd2e3b-902f-461b-860f-246257608ecd
html"""
<img src="https://raw.githubusercontent.com/GiggleLiu/WuLiXueBao/master/paper/tikzimg2-1.svg" style="clip-path: inset(0px 0px 0px 0px); margin-left:40px;" width=500/>
"""

# ╔═╡ 4dd47dc8-6dfa-47a4-a088-689b4b870762
md"## Optimal checkpointing"

# ╔═╡ ecd975d2-9374-4f40-80ac-2cceda11e7fb
md"""
1992 ~ Andreas Griewank, Achieving logarithmic growth of temporal and spatial complexity in reverse automatic differentiation.

Julia implementation: [TreeverseAlgorithm.jl](https://github.com/GiggleLiu/TreeverseAlgorithm.jl)
"""

# ╔═╡ 832cc81d-a49d-46e7-9d2b-d8bde9bb1273
html"""
<img src="https://user-images.githubusercontent.com/6257240/116494309-91263000-a86e-11eb-8054-9b91646be0e5.png" style="clip-path: inset(74px 350px 0px 0px);"/>
"""

# ╔═╡ 2192a1de-1042-4b13-a313-b67de489124c
md"""
1. Devide the program into ``\delta`` segments, each segment having size $\eta(\delta, \tau) = \frac{(\delta+\tau)!}{\delta! \tau!}$, where ``\delta=1,...,d`` and ``\tau=t-1``.
2. Cache the first state of each segment,
3. Compute gradients in the last segment,
4. Deallocate last checkpoint,
5. Devide the second last segments into two parts.
6. Recursively apply treeverse (Step 2-5).
"""

# ╔═╡ 01c709c7-806c-4389-bbb2-4081e64426d9
md"total number of steps ``T = \eta(d, t)``, both ``t`` and ``d`` can be logarithmic"

# ╔═╡ b1e0cf83-4337-4044-a7d1-5fca8ae79268
md"## An example"

# ╔═╡ 71f4b476-027d-4c8f-b561-1ee418bc9e61
html"""
<img src="https://raw.githubusercontent.com/GiggleLiu/WuLiXueBao/master/paper/bennett_treeverse_pebbles.svg" style="clip-path: inset(50px 350px 0px 0px);"/>
"""

# ╔═╡ 042013cf-9cd2-409d-827f-a311a2f8ce62
md"""
* black dot: current step,
* gray dot: checkpointed state,
* empty dot: state deallocated in current step,
* red square: gradient computed.
"""

# ╔═╡ 82593cd0-1403-4597-8370-919c80494479
md"# Program is not always linear!"

# ╔═╡ f58720b5-2bcb-4950-b453-bd59f648c66a
md"You think your program is like"

# ╔═╡ 4576d791-6af7-4ba5-9b80-fe99c0bb2e88
let
	Compose.set_default_graphic_size(15cm, 3cm)
	nb = nodestyle(:circle, r=0.01)
	eb = compose(context(), bondstyle(:default, r=0.1), Compose.arrow(), linewidth(0.2mm))
	loc(i) = (i/11, 0.5)
	eloc(i) = (loc(i-1) .- (-0.02, 0.0), loc(i) .- (0.025, 0.0))
	canvas() do
		for i=1:10
			nb >> loc(i)
			i == 1 || eb >> eloc(i)
		end
	end
end

# ╔═╡ 6e9d17f1-b17d-4e8d-82a3-921558a20c0f
md"or a DAG (directed acyclic graph)"

# ╔═╡ f18d89f5-1129-43e0-8b4a-5c1fcd618eab
let
	Compose.set_default_graphic_size(15cm, 3cm)
	nb = nodestyle(:circle, r=0.01)
	eb = compose(context(), bondstyle(:default, r=0.1), Compose.arrow(), linewidth(0.2mm))
	loc(i) = (i/11, 0.2)
	loc2(i) = (i/11, 0.7)
	eloc(i, j) = shrink(loc(i), loc(j), 0.02, 0.025)
	eloc2(i, j) = shrink(loc2(i), loc2(j), 0.02, 0.025)
	eloc12(i, j) = shrink(loc(i), loc2(j), 0.1, 0.15)
	eloc21(i, j) = shrink(loc2(i), loc(j), 0.05, 0.1)
	canvas() do
		for i=1:10
			nb >> loc(i)
			i == 1 || eb >> eloc(i-1,i)
		end
		for i=2:5
			nb >> loc2(i)
			i == 2 || eb >> eloc2(i-1, i)
		end
		eb >> eloc12(2,2)
		eb >> eloc12(4,5)
		eb >> eloc21(5,7)
	end
end

# ╔═╡ 2912c7ed-75e3-4dfd-9c40-92115cc08194
md"The truth is"

# ╔═╡ 5d1517c0-562b-40db-bec2-32b5494de1b8
let
	Compose.set_default_graphic_size(15cm, 3cm)
	nb = nodestyle(:circle, r=0.01)
	tb = textstyle(:default)
	eb = compose(context(), bondstyle(:default, r=0.1), Compose.arrow(), linewidth(0.2mm))
	eb2 = compose(context(), bondstyle(:dcurve, r=0.8), Compose.arrow(), linewidth(0.2mm))
	loc(i) = (i/11, 0.2)
	loc2(i) = (i/11, 0.7)
	eloc(i, j) = shrink(loc(i), loc(j), 0.02, 0.025)
	eloc2(i, j) = shrink(loc2(j), loc2(i), 0.02, 0.025)
	eloc12(i, j) = shrink(loc2(j), loc(i), 0.1, 0.15)
	eloc21(i, j) = shrink(loc(j), loc2(i), 0.05, 0.1)
	canvas() do
		for i=1:10
			nb >> loc(i)
			i == 1 || eb >> eloc(i-1,i)
		end
		for i=2:5
			nb >> loc2(i)
			i == 2 || eb >> eloc2(i-1, i)
		end
		eb >> eloc12(2,2)
		eb >> eloc12(4,5)
		tb >> ((0.3, 0.45), "× n")
		
		for i=7:8
			nb >> loc2(i)
			i == 7 || eb >> eloc2(i-1, i)
		end
		eb >> eloc12(7,7)
		eb >> eloc12(8,8)
		tb >> ((0.68, 0.45), "× ∞")
		
		eb2 >> (loc(6) .+ (0.0, 0.1), loc(9) .+ (0, 0.15))
	end
end

# ╔═╡ ae096ad2-3ae9-4440-a959-0d7d9a174f1d
md"## Example 3: Sparse matrix multiplication"

# ╔═╡ 8148bc1f-ef99-40a4-a5ce-0a42643f703d
md"original implementation: [https://github.com/JuliaLang/julia/blob/master/stdlib/SparseArrays/src/linalg.jl](https://github.com/JuliaLang/julia/blob/master/stdlib/SparseArrays/src/linalg.jl)
"

# ╔═╡ bd86c5c2-16be-4cfd-ba7a-a0e2544d82d1
@i function mul!(C::StridedVecOrMat{T}, A::SparseMatrixCSC{T}, B::StridedVecOrMat{T}, α::Number) where T
    @safe A.n == size(B, 1) || throw(DimensionMismatch())
    @safe A.m == size(C, 1) || throw(DimensionMismatch())
    @safe size(B, 2) == size(C, 2) || throw(DimensionMismatch())
    @invcheckoff for k = 1:size(C, 2)
        @inbounds for col = 1:A.n
            @routine begin
                αxj ← zero(T)
                αxj += α*B[col,k]
            end
            for j = A.colptr[col]:(A.colptr[col + 1] - 1)
                C[A.rowval[j], k] += A.nzval[j]*αxj
            end
            ~@routine
        end
    end
end

# ╔═╡ 11557d6b-3a1e-416d-874f-b8d217976f76
md"## Example 4: How to differentiate QR"

# ╔═╡ 48a10ea2-5d32-4a55-b8c0-f6a5e82eace9
md"original implementation: [https://github.com/JuliaLang/julia/blob/master/stdlib/LinearAlgebra/src/qr.jl](https://github.com/JuliaLang/julia/blob/master/stdlib/LinearAlgebra/src/qr.jl)
"

# ╔═╡ fafc1b0f-6469-4b6c-a00d-5272a45fc69b
md"See also"

# ╔═╡ ad6cff7b-5cbf-4ab1-94f7-d21cbc171000
leftright(html"<img src='https://images-na.ssl-images-amazon.com/images/I/41JjpllrDrL._SX364_BO1,204,203,200_.jpg' width=150/>", md"**Matrix computations**
	
Golub, Gene H., and Charles F. Van Loan (2013)")

# ╔═╡ 4d373cf6-9b39-44bc-8f13-220933fc8f5c
function qrfactPivotedUnblocked!(A::AbstractMatrix)
    m, n = size(A)
    piv = Vector(UnitRange{BlasInt}(1,n))
    τ = Vector{eltype(A)}(undef, min(m,n))
    for j = 1:min(m,n)

        # Find column with maximum norm in trailing submatrix
        jm = indmaxcolumn(view(A, j:m, j:n)) + j - 1

        if jm != j
            # Flip elements in pivoting vector
            tmpp = piv[jm]
            piv[jm] = piv[j]
            piv[j] = tmpp

            # Update matrix with
            for i = 1:m
                tmp = A[i,jm]
                A[i,jm] = A[i,j]
                A[i,j] = tmp
            end
        end

        # Compute reflector of columns j
        x = view(A, j:m, j)
        τj = LinearAlgebra.reflector!(x)
        τ[j] = τj

        # Update trailing submatrix with reflector
        LinearAlgebra.reflectorApply!(x, τj, view(A, j:m, j+1:n))
    end
    return LinearAlgebra.QRPivoted{eltype(A), typeof(A)}(A, τ, piv)
end

# ╔═╡ 293a68ca-e02f-47b3-85ed-aeeb8995f3ec
struct Reflector{T,RT,VT<:AbstractVector{T}}
    ξ::T
    normu::RT
    sqnormu::RT
    r::T
    y::VT
end

# ╔═╡ fa5716f9-8bff-4295-812b-691ccdc12832
struct QRPivotedRes{T,RT,VT}
    factors::Matrix{T}
    τ::Vector{T}
    jpvt::Vector{Int}
    reflectors::Vector{Reflector{T,RT,VT}}
    vAs::Vector{Vector{T}}
    jms::Vector{Int}
end

# ╔═╡ 8324f365-fd12-4ca3-8ca6-657e5917f946
# Elementary reflection similar to LAPACK. The reflector is not Hermitian but
# ensures that tridiagonalization of Hermitian matrices become real. See lawn72
@i function reflector!(R::Reflector{T,RT}, x::AbstractVector{T}) where {T,RT}
    n ← length(x)
    @inbounds @invcheckoff if n != 0
        @zeros T ξ1
        @zeros RT normu sqnormu
        ξ1 += x[1]
        sqnormu += abs2(ξ1)
        for i = 2:n
            sqnormu += abs2(x[i])
        end
        if !iszero(sqnormu)
            normu += sqrt(sqnormu)
            if real(ξ1) < 0
                NEG(normu)
            end
            ξ1 += normu
            R.y[1] -= normu
            for i = 2:n
                R.y[i] += x[i] / ξ1
            end
            R.r += ξ1/normu
        end
        SWAP(R.ξ, ξ1)
        SWAP(R.normu, normu)
        SWAP(R.sqnormu, sqnormu)
    end
end

# ╔═╡ 70fb10ea-9229-46ef-8ba3-b1d3874b7929
# apply reflector from left
@i function reflectorApply!(vA::AbstractVector{T}, x::AbstractVector, τ::Number, A::StridedMatrix{T}) where T
    (m, n) ← size(A)
    if length(x) != m || length(vA) != n
        @safe throw(DimensionMismatch("reflector has length ($(length(x)), $(length(vA))), which must match the first dimension of matrix A, ($m, $n)"))
    end
    @inbounds @invcheckoff if m != 0
        for j = 1:n
            # dot
            @zeros T vAj vAj_τ
            vAj += A[1, j]
            for i = 2:m
                vAj += x[i]'*A[i, j]
            end
            vAj_τ += τ' * vAj
            # ger
            A[1, j] -= vAj_τ
            for i = 2:m
                A[i, j] -= x[i]*vAj_τ
            end
            vAj_τ -= τ' * vAj
            SWAP(vA[j], vAj)
        end
    end
end

# ╔═╡ 51504ba4-4711-48b7-aab9-d4f26c009659
function alloc(::typeof(reflector!), x::AbstractVector{T}) where T
	RT = real(T)
	Reflector(zero(T), zero(RT), zero(RT), zero(T), zero(x))
end

# ╔═╡ f267e315-3c19-4345-8fba-641bb0ea515b
@i function qr_pivoted!(res::QRPivotedRes, A::StridedMatrix{T}) where T
    m, n ← size(A)
    @invcheckoff @inbounds for j = 1:min(m,n)
        # Find column with maximum norm in trailing submatrix
        jm ← LinearAlgebra.indmaxcolumn(NiLang.value.(view(A, j:m, j:n))) + j - 1

        if jm != j
            # Flip elements in pivoting vector
            SWAP(res.jpvt[jm], res.jpvt[j])

            # Update matrix with
            for i = 1:m
                SWAP(A[i, jm], A[i, j])
            end
        end

        # Compute reflector of columns j
        R ← alloc(reflector!, A |> subarray(j:m, j))
        vA ← zeros(T, n-j)
        reflector!(R, A |> subarray(j:m, j))
        # Update trailing submatrix with reflector
        reflectorApply!(vA, R.y, R.r, A |> subarray(j:m, j+1:n))
        for i=1:length(R.y)
            SWAP(R.y[i], A[j+i-1, j])
        end
        PUSH!(res.reflectors, R)
        PUSH!(res.vAs, vA)
        PUSH!(res.jms, jm)
        R → _zero(Reflector{T,real(T),Vector{T}})
        vA → zeros(T, 0)
        jm → 0
    end
    @inbounds for i=1:length(res.reflectors)
        res.τ[i] += res.reflectors[i].r
    end
    res.factors += A
end

# ╔═╡ a07b93b1-742b-41d4-bd0f-bc899de55338
function alloc_qr(A::AbstractMatrix{T}) where T
	(m, n) = size(A)
	τ = zeros(T, min(m,n))
	jpvt = collect(1:n)
	reflectors = Reflector{T,real(T),Vector{T}}[]
	vAs = Vector{T}[]
	jms = Int[]
	QRPivotedRes(zero(A), τ, jpvt, reflectors, vAs, jms)
end

# ╔═╡ 5f207f59-b9f4-477f-b79f-0aee743bdb8e
A = randn(ComplexF64, 20, 20);

# ╔═╡ f88517d6-b87d-45ba-bf3f-67074fa51fca
@test qr_pivoted!(alloc_qr(A), copy(A))[1].factors ≈ LinearAlgebra.qrfactPivotedUnblocked!(copy(A)).factors

# ╔═╡ 45aef837-9b2c-49b2-b815-e4d60f103f58
let
	@testset "qr pivoted gradient" begin
		# rank deficient initial matrix
		n = 50
		U = LinearAlgebra.qr(randn(n, n)).Q
		Σ = Diagonal((x=randn(n); x[n÷2+1:end] .= 0; x))
		A = U*Σ*U'
		res = alloc_qr(A)
		@test rank(A) == n ÷ 2
		qrres = qr_pivoted!(deepcopy(res), copy(A))[1]
		@test count(x->(x>1e-12), sum(abs2, QRPivoted(qrres.factors, qrres.τ, qrres.jpvt).R, dims=2)) == n ÷ 2

		@i function loss(y, qrres, A)
			qr_pivoted!(qrres, A)
			y += abs(qrres.factors[1])
		end
		nrloss(A) = loss(0.0, deepcopy(res), A)[1]
		ngA = zero(A)
		δ = 1e-5
		for j=1:size(A, 2)
			for i=1:size(A, 1)
				A_ = copy(A)
				A_[i,j] -= δ/2
				l1 = nrloss(copy(A_))
				A_[i,j] += δ
				l2 = nrloss(A_)
				ngA[i,j] = (l2-l1)/δ
			end
		end
		gA = NiLang.AD.gradient(loss, (0.0, res, A); iloss=1)[3]
		@test real.(gA) ≈ ngA
	end
end

# ╔═╡ Cell order:
# ╟─a1ef579e-4b66-4042-944e-7e27c660095e
# ╟─100b4293-fd1e-4b9c-a831-5b79bc2a5ebe
# ╟─f11023e5-8f7b-4f40-86d3-3407b61863d9
# ╟─9d11e058-a7d0-11eb-1d78-6592ff7a1b43
# ╟─b73157bf-1a77-47b8-8a06-8d6ec2045023
# ╟─ec13e0a9-64ff-4f66-a5a6-5fef53428fa1
# ╟─f8b0d1ce-99f7-4729-b46e-126da540cbbe
# ╟─435ac19e-1c0c-4ee5-942d-f2a97c8c4d80
# ╟─48ecd619-d01d-43ff-8b52-7c2566c3fa2b
# ╟─4878ce45-40ff-4fae-98e7-1be41e930e4d
# ╠═ce44f8bd-692e-4eab-9ba4-055b25e40c81
# ╠═b2c1936c-2c27-4fbb-8183-e38c5e858483
# ╠═8be1b812-fcac-404f-98aa-0571cb990f34
# ╟─33e0c762-c75e-44aa-bfe2-bff92dd1ace8
# ╟─c59c35ee-1907-4736-9893-e22c052150ca
# ╠═0ae13734-b826-4dbf-93d1-11044ce88bd4
# ╠═99187515-c8be-49c2-8d70-9c2998d9993c
# ╟─78ca6b08-84c4-4e4d-8412-ae6c28bfafce
# ╠═f12b25d8-7c78-4686-b46d-00b34e565605
# ╟─d90c3cc9-084d-4cf7-9db7-42cea043030b
# ╟─93c98cb2-18af-47df-afb3-8c5a34b4723c
# ╟─2dc74e15-e2ea-4961-b43f-0ada1a73d80a
# ╟─7ee75a15-eaea-462a-92b6-293813d2d4d7
# ╟─02a25b73-7353-43b1-8738-e7ca472d0cc7
# ╟─2afb984f-624e-4381-903f-ccc1d8a66a17
# ╟─7e5d5e69-90f2-4106-8edf-223c150a8168
# ╟─92d7a938-9463-4eee-8839-0b8c5f762c79
# ╟─4b1a0b59-ddc6-4b2d-b5f5-d92084c31e46
# ╟─81f16b8b-2f0b-4ba3-8c26-6669eabf48aa
# ╟─fb6c3a48-550a-4d2e-a00b-a1e40d86b535
# ╟─ab6fa4ac-29ed-4722-88ed-fa1caf2072f3
# ╟─8e72d934-e307-4505-ac82-c06734415df6
# ╟─e6ff86a9-9f54-474b-8111-a59a25eda506
# ╟─9c1d9607-a634-4350-aacd-2d40984d647d
# ╟─63db2fa2-50b2-4940-b8ee-0dc6e3966a57
# ╟─693167e7-e80c-401d-af89-55b5fae30848
# ╟─4cd70901-2142-4868-9a33-c46ca0d064ec
# ╟─89018a35-76f4-4f23-b15a-a600db046d6f
# ╟─1d219222-0778-4c37-9182-ed5ccbb3ef32
# ╟─4ff09f7c-aeac-48bd-9d58-8446137c3acd
# ╟─ea44037b-9359-4fbd-990f-529d88d54351
# ╟─e731a8e3-6462-4a60-83e9-6ab7ddfff50e
# ╟─685c2b28-b071-452c-a881-801128dcb6c3
# ╟─177ddfc2-2cbe-4dba-9d05-2857633dd1ae
# ╟─6c2a3a93-385f-4758-9b6e-4cb594a8e856
# ╟─fb8168c2-8489-418b-909b-cede57b5ae64
# ╟─fdb39284-dbb1-49fa-9a1c-f360f9e6b765
# ╟─60214f22-c8bb-4a32-a882-4e6c727b29a9
# ╟─7a6dbe09-cb7f-405f-b9b5-b350ca170e5f
# ╟─5dc4a849-76dd-4c4f-8828-755671839e5e
# ╟─b053f11b-9ed7-47ff-ab32-0c70b87e71ed
# ╟─7b1aa6dd-647f-44cb-b580-b58e23e8b5a6
# ╟─b96bac75-b4ad-45f7-aeec-cb6a387eebf0
# ╟─5fe022eb-6a17-466e-a6d0-d67e82af23cd
# ╟─92047e95-7eba-4021-9668-9bb4b92261d7
# ╟─e2ae1084-8759-4f27-8ad1-43a88e434a3d
# ╠═edd3aea8-abdb-4e12-9ef9-12ac0fff835b
# ╠═a2904efb-186c-449d-b1aa-caf530f88e91
# ╠═14faaf82-ad3e-4192-8d48-84adfa30442d
# ╠═5d141b88-ec07-4a02-8eb3-37405e5c9f5d
# ╠═0907e683-f216-4cf6-a210-ae5181fdc487
# ╠═805c7072-98fa-4086-a69d-2e126c55af36
# ╠═7e527024-c294-4c16-8626-9953588d9b6a
# ╠═0bbfa106-f465-4a7b-80a7-7732ba435822
# ╠═3e59c65a-ceed-42ed-be64-a6964db016e7
# ╟─29f85d05-99fd-4843-9be0-5663e681dad7
# ╠═9a46597c-b1ee-4e3b-aed1-fd2874b6e77a
# ╠═e7830e55-bd9e-4a8a-9239-4191a5f0b1d1
# ╟─de2cd247-ba68-4ba4-9784-27a743478635
# ╟─dc929c23-7434-4848-847a-9fa696e84776
# ╟─4f1df03f-c315-47b1-b181-749e1231594c
# ╠═ccd38f52-104d-434a-aea3-dd94e571374f
# ╠═7eccba6a-3ad5-440b-9c5d-392dc8dc7aba
# ╠═f4230251-ba54-434a-b86b-f972c7389217
# ╟─4a858a3e-ce28-4642-b061-3975a3ed99ff
# ╠═674bb3bb-637b-44f2-bf6d-d1678da03fbd
# ╠═5a59d96f-b2f1-4564-82c7-7f0fe181afb8
# ╠═55d2f8ee-4f77-4d44-b704-30643dbbab84
# ╠═14951168-97c2-43ae-8d5e-5506408a2bb2
# ╠═4f564581-6032-449c-8b15-3c741f44237a
# ╠═a36516e8-76c1-4bff-8a12-3e1e621b857d
# ╠═402b861c-d363-4d23-b9e9-eb088f57b5c4
# ╠═63975a80-1b41-4f55-91a1-4a316ad7bf26
# ╠═6f688f88-432a-42b2-a2db-19d6bb282e0a
# ╠═fb46db14-f7e0-4f01-9096-02334c62942d
# ╟─b2c3db3d-c250-4daa-8453-3c9a2734aede
# ╠═69dc2685-b70f-4a81-af30-f02e0054bd52
# ╠═9a986264-5ba7-4697-a00d-711f8efe29f0
# ╠═560cf3e9-0c14-4497-85b9-f07045eea32a
# ╠═8ab79efc-e8d0-4c6f-81df-a89008142bb7
# ╠═0eec318c-2c09-4dd6-9187-9c0273d29915
# ╠═1f0ef29c-0ad5-4d97-aeed-5ff44e86577a
# ╠═603d8fc2-5e7b-4d55-92b6-208b25ea6569
# ╟─2b3c765e-b505-4f07-9bcb-3c8cc47364ad
# ╠═e0f266da-7e65-4398-bfd4-a6c0b54e626b
# ╟─e1d35886-79d0-40a5-bd33-1c4e5f4a0a9a
# ╠═b63a30b0-c75b-4998-a2b2-0b79574cab81
# ╟─139bf020-c4a8-45c8-96fa-aeebc7ddaedc
# ╠═8967c0f0-89f8-4893-b11b-253333d1a823
# ╟─f2540450-5a07-4fb8-93fb-a6d48dd36a56
# ╠═3acb2cfd-fa29-4a2b-8f23-f5aaf474edd0
# ╟─aa1547f2-5edd-4b7e-b93e-bdfc4e4fc6d5
# ╟─6e76a107-4f51-4e32-b133-7b6e04d7d107
# ╟─999f7a8f-d72e-4ccd-8cbf-b5bbb7db1842
# ╟─32772c2a-6b80-4779-963c-06974ff0d832
# ╟─41642bd5-1321-490a-95ad-4c1d6363456f
# ╟─2a553e32-05ef-4c2d-aba7-41185c6035d4
# ╟─ab8345ce-e038-4d6b-9e1f-57e4f33bb67b
# ╟─bb9c9a4c-601a-4708-9b2d-04d1583938f2
# ╟─b9917e94-c33d-423f-a478-3252bacc2494
# ╟─4978f404-11ff-41b8-a673-f2d051b1f526
# ╟─73bd2e3b-902f-461b-860f-246257608ecd
# ╟─4dd47dc8-6dfa-47a4-a088-689b4b870762
# ╟─ecd975d2-9374-4f40-80ac-2cceda11e7fb
# ╟─832cc81d-a49d-46e7-9d2b-d8bde9bb1273
# ╟─2192a1de-1042-4b13-a313-b67de489124c
# ╟─01c709c7-806c-4389-bbb2-4081e64426d9
# ╟─b1e0cf83-4337-4044-a7d1-5fca8ae79268
# ╟─71f4b476-027d-4c8f-b561-1ee418bc9e61
# ╟─042013cf-9cd2-409d-827f-a311a2f8ce62
# ╟─82593cd0-1403-4597-8370-919c80494479
# ╟─f58720b5-2bcb-4950-b453-bd59f648c66a
# ╟─4576d791-6af7-4ba5-9b80-fe99c0bb2e88
# ╟─6e9d17f1-b17d-4e8d-82a3-921558a20c0f
# ╟─f18d89f5-1129-43e0-8b4a-5c1fcd618eab
# ╟─2912c7ed-75e3-4dfd-9c40-92115cc08194
# ╟─5d1517c0-562b-40db-bec2-32b5494de1b8
# ╟─ae096ad2-3ae9-4440-a959-0d7d9a174f1d
# ╟─8148bc1f-ef99-40a4-a5ce-0a42643f703d
# ╠═200f1848-0980-4185-919a-93ab2e7f788f
# ╠═bd86c5c2-16be-4cfd-ba7a-a0e2544d82d1
# ╟─11557d6b-3a1e-416d-874f-b8d217976f76
# ╟─48a10ea2-5d32-4a55-b8c0-f6a5e82eace9
# ╟─fafc1b0f-6469-4b6c-a00d-5272a45fc69b
# ╟─ad6cff7b-5cbf-4ab1-94f7-d21cbc171000
# ╠═30c191c5-642b-4062-98f3-643d314a054d
# ╠═fa5716f9-8bff-4295-812b-691ccdc12832
# ╠═f267e315-3c19-4345-8fba-641bb0ea515b
# ╠═4d373cf6-9b39-44bc-8f13-220933fc8f5c
# ╠═293a68ca-e02f-47b3-85ed-aeeb8995f3ec
# ╠═8324f365-fd12-4ca3-8ca6-657e5917f946
# ╠═70fb10ea-9229-46ef-8ba3-b1d3874b7929
# ╠═51504ba4-4711-48b7-aab9-d4f26c009659
# ╠═a07b93b1-742b-41d4-bd0f-bc899de55338
# ╠═864dbde7-b689-4165-a08e-6bbbd72190de
# ╠═5f207f59-b9f4-477f-b79f-0aee743bdb8e
# ╠═f88517d6-b87d-45ba-bf3f-67074fa51fca
# ╠═45aef837-9b2c-49b2-b815-e4d60f103f58


================================================
FILE: notebooks/basic.jl
================================================
### A Pluto.jl notebook ###
# v0.14.5

using Markdown
using InteractiveUtils

# This Pluto notebook uses @bind for interactivity. When running this notebook outside of Pluto, the following 'mock version' of @bind gives bound variables a default value (instead of an error).
macro bind(def, element)
    quote
        local el = $(esc(element))
        global $(esc(def)) = Core.applicable(Base.get, el) ? Base.get(el) : missing
        el
    end
end

# ╔═╡ 1ef174fa-16f0-11eb-328a-afc201effd2f
using Pkg, Printf

# ╔═╡ 55cfdab8-d792-11ea-271f-e7383e19997c
using PlutoUI;

# ╔═╡ 9e509f80-d485-11ea-0044-c5b7e750aacb
using NiLang

# ╔═╡ 37ed073a-d492-11ea-156f-1fb155128d0f
using Zygote, BenchmarkTools

# ╔═╡ 4d75f302-d492-11ea-31b9-bbbdb43f344e
using NiLang.AD

# ╔═╡ 627ea2fb-6530-4ea0-98ee-66be3db54411
html"""
<div align="center">
<a class="Header-link " href="https://github.com/GiggleLiu/NiLang.jl" data-hotkey="g d" aria-label="Homepage " data-ga-click="Header, go to dashboard, icon:logo">
  <svg class="octicon octicon-mark-github v-align-middle" height="32" viewBox="0 0 16 16" version="1.1" width="32" aria-hidden="true"><path fill-rule="evenodd" d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"></path></svg>
</a>
<br>
<a href="https://raw.githubusercontent.com/GiggleLiu/NiLang.jl/master/notebooks/basic.jl" target="_blank" download>Download this notebook</a>
</div>
"""

# ╔═╡ 94b2b962-e02a-11ea-09a5-81b3226891ed
md"""# 连猩猩都能懂的可逆编程
### (Reversible programming made simple)
[https://github.com/JuliaReverse/NiLangTutorial/](https://github.com/JuliaReverse/NiLangTutorial/)

$(html"<br>")

**Jinguo Liu** (github: [GiggleLiu](https://github.com/GiggleLiu/))

*Postdoc, Institute of physics, Chinese academy of sciences* (when doing this project)

*Consultant, QuEra Computing* (current)

*Postdoc, Havard* (soon)
"""

# ╔═╡ a5ee60c8-e02a-11ea-3512-7f481e499f23
md"""
# Table of Contents
1. Reversible programming basics
2. Differentiate everything with a reversible programming language
4. Real world applications and benchmarks
"""

# ╔═╡ a11c4b60-d77d-11ea-1afe-1f2ab9621f42
md"""
## In this talk,
We use the reversible eDSL [NiLang](https://github.com/GiggleLiu/NiLang.jl) is a [Julia](https://julialang.org/) as our reversible programming tool.

A package that can differentiate everything.

![NiLang](https://raw.githubusercontent.com/GiggleLiu/NiLang.jl/master/docs/src/asset/logo3.png)

Authors:
[GiggleLiu](https://github.com/GiggleLiu), [Taine Zhao](https://github.com/thautwarm)
"""

# ╔═╡ e54a1be6-d485-11ea-0262-034c56e0fda8
md"""
## Sec I. Reversible programming basic

### Reversible function definition

A reversible function `f` is defined as
```julia
(~f)(f(x, y, z...)...) == (x, y, z...)
```
"""

# ╔═╡ d1628f08-ddfb-11ea-241a-c7e6c1a22212
md"""
##  Example 1: reversible adder
```math
\begin{align}
f &: x, y → x+y, y\\
{\small \mathrel{\sim}}f &: x, y → x-y, y
\end{align}
```
"""

# ╔═╡ 278ac6b6-e02c-11ea-1354-cd7ecd1099be
md"The reversible macro `@i` defines two functions, the function itself and its inverse."

# ╔═╡ a28d38be-d486-11ea-2c40-a377b74a05c1
@i function reversible_plus(x, y)
	x += y
end

# ╔═╡ e93f0bf6-d487-11ea-1baa-21d51ddb4a20
reversible_plus(2.0, 3.0)

# ╔═╡ fc932606-d487-11ea-303e-75ca8b7a02f6
(~reversible_plus)(5.0, 3.0)

# ╔═╡ e3d2b23a-ddfb-11ea-0f5e-e72ed299bb45
md"## The difference to a regular programming language"

# ╔═╡ a961e048-ddf2-11ea-0262-6d19eb82b36b
md"**Comment 1**: The return statement is not allowed, a reversible function returns input arguments directly."

# ╔═╡ 2d22f504-ddf1-11ea-28ec-5de6f4ee79bb
md"**Comment 2**: Every operation is reversible. `+=` is considered as reversible for integers and floating point numbers in NiLang, although for floating point numbers, there are *rounding errors*."

# ╔═╡ 7d08ac24-e143-11ea-2085-539fd9e35889
md"### A case where `+=` is not reversible"

# ╔═╡ 9fcdd77c-e0df-11ea-09e6-49a2861137e5
let
	x, y = 1e-20, 1e20
	x += y
	x -= y
	(x, y)
end

# ╔═╡ 0a1a8594-ddfc-11ea-119a-1997c86cd91b
md"""
## Use this function
"""

# ╔═╡ 0b4edb1a-ddf0-11ea-220c-91f2df7452e7
@i function reversible_plus2(x, y)
	reversible_plus(x, y)  # equivalent to `reversible_plus(x, y)`
	reversible_plus(x, y)
end

# ╔═╡ f875ecd6-ddef-11ea-22a1-619809d15b37
md"**Comment**: Inside a reversible function definition, a statement changes a variable *inplace*"

# ╔═╡ e7557bee-e0cc-11ea-1788-411e759b4766
reversible_plus2(2.0, 3.0)

# ╔═╡ cd7b2a2e-ddf5-11ea-04c4-f7583bbb5a53
md"A statement can be **uncalled** with `~`"

# ╔═╡ bc98a824-ddf5-11ea-1a6a-1f795452d3d0
@i function do_nothing(x, y)
	reversible_plus(x, y)
	~reversible_plus(x, y)  # uncall the expression
end

# ╔═╡ 05f8b91c-e0cd-11ea-09e3-f3c5c0e07e63
do_nothing(2.0, 3.0)

# ╔═╡ ac302844-e07b-11ea-35dd-e3e06054401b
md"## Example 2: Compute $x^5$"

# ╔═╡ b722e098-e07b-11ea-3483-01360fb6954e
@i function naive_power5(y, x::T) where T
	y = one(T)   # error 1: `=` is not reversible
	for i=1:5
		y *= x   # error 2: `*=` is not reversible
	end
end

# ╔═╡ bf8b722c-dfa4-11ea-196a-719802bc23c5
md"""
## Compute $x^5$ reversibly
"""

# ╔═╡ 330edc28-dfac-11ea-35a5-3144c4afbfcf
md"note: `*=` is not reversible for usual number systems"

# ╔═╡ 0a679e04-dfa7-11ea-0288-a1fa490c4387
@i function power5(x5, x4, x3, x2, x1, x)
	x1 += x
	x2 += x1 * x
	x3 += x2 * x
	x4 += x3 * x
	x5 += x4 * x
end

# ╔═╡ cc32cae8-dfab-11ea-0d0b-c70ea8de720a
power5(0.0, 0.0, 0.0, 0.0, 0.0, 2.0)

# ╔═╡ b4240c16-dfac-11ea-3a40-33c54436e3a3
md"# Don't make me so many input arguments!"

# ╔═╡ ade52358-dfac-11ea-2dd3-d3a691e7a8a2
@i function power5_twoinputs(x5, x::T) where T
	x1 ← zero(T)
	x2 ← zero(T)
	x3 ← zero(T)
	x4 ← zero(T)
	x1 += x
	x2 += x1 * x
	x3 += x2 * x
	x4 += x3 * x
	
	x5 += x4 * x
	
	x4 -= x3 * x
	x3 -= x2 * x
	x2 -= x1 * x
	x1 -= x
	x4 → zero(T)
	x3 → zero(T)
	x2 → zero(T)
	x1 → zero(T)
end

# ╔═╡ d86e2e5e-dfab-11ea-0053-6d52f1164bc5
power5_twoinputs(0.0, 2.0)

# ╔═╡ 7951b9ec-e030-11ea-32ee-b1de49378186
md"""
**Comment**:
`n ← zero(T)` is the variable allocation operation. It means
```
if n is defined
	error
else
	n = zero(T)
end
```
Its inverse is `n → zero(T)`. It means
```
@assert n == zero(T)
deallocate(n)
```
"""

# ╔═╡ 6bc97f5e-dfad-11ea-0c43-e30b6620e6e8
md"# Shorter: compute-copy-uncompute"

# ╔═╡ 80d24e9e-dfad-11ea-1dae-49568d534f10
@i function power5_twoinputs_shorter(x5, x::T) where T
	@routine begin  # compute
		@zeros T x1 x2 x3 x4
		x1 += x
		x2 += x1 * x
		x3 += x2 * x
		x4 += x3 * x
	end
	
	x5 += x4 * x   # copy
	
	~@routine    # uncompute
end

# ╔═╡ a8092b18-dfad-11ea-0989-474f37d05f73
power5_twoinputs_shorter(0.0, 2.0)

# ╔═╡ 43f0c2fc-e030-11ea-25d9-b323e6496a35
md"""**Comment**:
```
@routine statement
~@routine
```

is equivalent to
```
statement
~(statement)
```
This is the famous `compute-copy-uncompute` design pattern in reversible computing. Check this [reference](https://epubs.siam.org/doi/10.1137/0219046).
"""

# ╔═╡ b4ad5830-dfad-11ea-0057-055dda8cc9be
md"# How to compute x^1000?"

# ╔═╡ cf576d38-dfad-11ea-2682-7bd540db44a5
@i function power1000(x1000, x::T) where T
	@routine begin
		xs ← zeros(T, 1000)
		xs[1] += 1
		for i=2:1000
			xs[i] += xs[i-1] * x
		end
	end
	
	x1000 += xs[1000] * x
	
	~@routine
end

# ╔═╡ 35fff53c-dfae-11ea-3602-918a17d5a5fa
power1000(0.0, 1.001)

# ╔═╡ 9b9b5328-e030-11ea-1d00-f3341572734a
html"""
<h5>For loop</h5>
<div style="-webkit-column-count: 2; -moz-column-count: 2; column-count: 2; -webkit-column-rule: 1px dotted #e0e0e0; -moz-column-rule: 1px dotted #e0e0e0; column-rule: 1px dotted #e0e0e0; margin-top:30px">
<div style="display: inline-float">
	<center><strong>Forward</strong></center>
	<pre><code class="language-julia">
	for i=start:step:stop
		# do something
	end
	</code></pre>
</div>
<div style="display: inline-block;">
	<center><strong>Reverse</strong></center>
	<pre><code class="language-julia">
	for i=stop:-step:start
		# undo something
	end
	</code>
	</pre>
</div>
</div>
"""

# ╔═╡ f3b87892-e080-11ea-353d-8d81c52cf9ac
md"### Sometimes, a for loop can break down"

# ╔═╡ b27a3974-e030-11ea-0bcd-7f7035d55165
@i function power1000_bad(x1000, x::T) where T
	@routine begin
		xs ← zeros(T, 1000)
		xs[1] += 1
		for i=2:length(xs)
			xs[i] += xs[i-1] * x
			PUSH!(xs, @const zero(T))
		end
	end
	
	x1000 += xs[1000] * x
	
	~@routine
end

# ╔═╡ e5d47096-e030-11ea-1e87-5b9b1dbecfe0
power1000_bad(0.0, 1.001)

# ╔═╡ 9c62289a-dfae-11ea-0fe0-b1cb80a87704
md"#  Don't allocate for me!"

# ╔═╡ 88838bce-dfaf-11ea-1a72-7d15629cfcb0
md"""
Multipling two unsigned logarithmic numbers `x = exp(lx)` and `y = exp(ly)`
```
x * y = exp(lx) * exp(ly) = exp(lx + ly)
```
"""

# ╔═╡ a593f970-dfae-11ea-2d79-876030850dee
@i function power1000_noalloc(x1000, x::T) where T
	if x!= 0
		@routine begin
			absx ← zero(T)
			lx ← one(ULogarithmic{T})
			lx1000 ← one(ULogarithmic{T})
			absx += abs(x)
			lx *= convert(absx)
			for i=1:1000
				lx1000 *= lx
			end
		end
		x1000 += convert(lx1000)
		~@routine
	end
end

# ╔═╡ f448548e-dfaf-11ea-05c0-d5d177683445
power1000_noalloc(0.0, 1.001)

# ╔═╡ 65cd13ca-e031-11ea-3fc6-977792eb5f8c
html"""
<h5>If statement</h5>
<div style="-webkit-column-count: 2; -moz-column-count: 2; column-count: 2; -webkit-column-rule: 1px dotted #e0e0e0; -moz-column-rule: 1px dotted #e0e0e0; column-rule: 1px dotted #e0e0e0; margin-top:30px">
<div style="display: inline-float">
	<center><strong>Forward</strong></center>
	<pre><code class="language-julia">
	if (precondition, postcondition)
		# do A
	else
		# do B
	end
	</code></pre>
</div>
<div style="display: inline-block;">
	<center><strong>Reverse</strong></center>
	<pre><code class="language-julia">
	if (postcondition, precondition)
		# undo A
	else
		# undo B
	end
	</code>
	</pre>
</div>
</div>
"""

# ╔═╡ 53c02100-e08f-11ea-1f5d-8b2311b095d2
md"![](https://user-images.githubusercontent.com/6257240/116341762-78a31080-a7af-11eb-8376-d2ba0bf2b454.png)"

# ╔═╡ 75751b24-e0b8-11ea-2b37-9d138121345c
md"### You should not do"

# ╔═╡ 76b84de4-e031-11ea-0bcf-39b86a6b4552
@i function break_if(x)
	if x%2 == 1
		x += 1
	else
		x -= 1
	end
end

# ╔═╡ b1984d24-e031-11ea-3b13-3bd0119a2bcb
break_if(3)

# ╔═╡ 7f163d82-e0b8-11ea-2fe7-332bb4dee586
md"### You should do"

# ╔═╡ ddc6329e-e031-11ea-0e6e-e7332fa26e22
@i function happy_if(x)
	if (x%2 == 1, x%2 == 0)
		x += 1
	else
		x -= 1
	end
end

# ╔═╡ f3d5e1b0-e031-11ea-1a90-7bed88e28bad
happy_if(3)

# ╔═╡ ab67419a-dfae-11ea-27ba-09321303ad62
md"""# Wrap up

1. reversible arithmetic instructions `+=` and `-=`, besides, we have `SWAP`, `NEG`, `INC` and `ROT` et. al.
2. inverse statement `~`
3. there is no "`=`" operation in reversible computing, use "`←`" to allocate a new variable, and use "`→`" to deallocate an pre-emptied variable.
4. compute-uncompute macro `@routine` and `~@routine`
5. reversible control flow: `for` loop and `if` statement, the `while` statement is also available.
6. logarithmic number is reversible under `*=` and `/=`
"""

# ╔═╡ d5c2efbc-d779-11ea-11ad-1f5873b95628
md"""
![yeah](https://media1.tenor.com/images/40147f2eac14c0a7f18c34ecba73fa34/tenor.gif?itemid=7805520)
"""
# ![yeah](https://pic.chinesefontdesign.com/uploads/2017/03/chinesefontdesign.com_2017-03-07_08-19-24.gif)

# ╔═╡ 30af9642-e084-11ea-1f92-b52abfddcf06
md"# Sec II. Automatic differentiation in NiLang

### References
* Nextjournal [https://nextjournal.com/giggle/reverse-checkpointing](https://nextjournal.com/giggle/reverse-checkpointing)

* arXiv: 2003.04617
"

# ╔═╡ db1fab1c-e084-11ea-0bf0-b1fbe9e74b3f
html"""<h1><del>Auto</del>matic differentiation?</h1>"""

# ╔═╡ e1370f80-e0bc-11ea-2a90-d50cc762cbcb
md"When we start learning AD, we start by learning the backward rules of the matrix multiplication"

# ╔═╡ 3098411c-e0bc-11ea-2754-eb0afbd663de
function mymul!(out::AbstractMatrix, A::AbstractMatrix, B::AbstractMatrix)
	@assert size(A, 2) == size(B, 1) && size(out) == (size(A, 1), size(B, 2))
	for k=1:size(B, 2)
		for j=1:size(B, 1)
			for i=size(A, 1)
				@inbounds out[i, k] += A[i, j] * B[j, k]
			end
		end
	end
	return out
end

# ╔═╡ 3d0150ee-e0bd-11ea-0a5a-339465b496dc
md"Then, we learning how to use chain rule to chain different utilities."

# ╔═╡ 8016ff94-e0bc-11ea-3b9e-4f0676587edf
md"##### But wait! Why don't we start from the backward rules of `+` and `*`, then use the chain rule to derive the backward rule for matrix multiplication?"

# ╔═╡ 99108ace-e0bc-11ea-2744-d1b18db50ae1
md"# They are different"

# ╔═╡ b2337f26-e0bb-11ea-3da0-9507c35101ae
md"""
### Domain-specific autodiff (DS-AD)
* **Tensor**Flow
* **PyTorch**
* **Jax**
* **Flux (Zygote backended)**

### General Purposed autodiff (GP-AD)
* **Tapenade**
* **NiLang**
"""

# ╔═╡ 48db515c-e084-11ea-2eec-018b8545fa34
md"## Traditional AD uses checkpointing
Checkpoint every 100 steps. Blue and yellow objects are computing and re-computing. Here states 1 and state 101 are cached. Blue objects are computing, and yellow ones are re-computing. The state 100 is the desired state.
"

# ╔═╡ f531f556-e083-11ea-2f7e-77e110d6c53a
md"![](https://nextjournal.com/data/Qmes4v3ic2VrYQt6W9mWu4p6W53Gd1DmbDcYCuafbwTe7Y?filename=checkpointing.png&content-type=image/png)"

# ╔═╡ 62643fbc-e084-11ea-1b1f-39b87ff32b9e
md"## Reverse Computing
Reversible computing approach to free up memories (a) when no operations are reversible. (b) when all operations are reversible. Blue and yellow diamonds are reversible operations executed in forward and backward directions, red cubics are garbage variables.
"

# ╔═╡ 0bf54b08-e084-11ea-3d11-7be65f3ec022
md"![](https://nextjournal.com/data/QmPsgm4Z4mqVw2h2eC3RkGf96xTQp13KE9rdzmPeUe5KWN?filename=reversecomputing.png&content-type=image/png)"

# ╔═╡ 15f7c60a-e08e-11ea-31ea-a5cd055644db
md"## Difference Explained"

# ╔═╡ 55a3a260-d48e-11ea-06e2-1b7bd7bba6f5
md"""
![adprog](https://github.com/GiggleLiu/NiLang.jl/raw/master/docs/src/asset/adprog.png)
"""

# ╔═╡ 38014ad0-e08e-11ea-1905-198038ab7e5f
md"# Obtaining the gradient of norm in Zygote"

# ╔═╡ 2e6fe4da-d79d-11ea-1e90-f5215190395c
md"**Obtaining the gradient of the norm function**"

# ╔═╡ 6560c28c-e08e-11ea-1094-d333b88071ce
function regular_norm(x::AbstractArray{T}) where T
	res = zero(T)
	for i=1:length(x)
		@inbounds res += x[i]^2
	end
	return sqrt(res)
end

# ╔═╡ 744dd3c6-d492-11ea-0ed5-0fe02f99db1f
@benchmark Zygote.gradient($regular_norm, $(randn(1000))) seconds=1

# ╔═╡ f72246f8-e08e-11ea-3aa0-53f47a64f3e9
md"## The reversible counterpart"

# ╔═╡ f025e454-e08e-11ea-20d6-d139b9a6b301
@i function reversible_norm(res, y, x::AbstractArray{T}) where {T}
	for i=1:length(x)
		@inbounds y += x[i]^2
	end
	res += sqrt(y)
end

# ╔═╡ 8fedd65a-e08e-11ea-27f4-03bf9ed65875
let x = randn(1000)
	@assert Zygote.gradient(regular_norm, x)[1] ≈ NiLang.AD.gradient(reversible_norm, (0.0, 0.0, x), iloss=1)[3]
end

# ╔═╡ 8ad60dc0-d492-11ea-2cb3-1750b39ddf86
@benchmark NiLang.AD.gradient($reversible_norm, (0.0, 0.0, $(randn(1000))), iloss=1)

# ╔═╡ 7bab4614-d77e-11ea-037c-8d1f432fc3b8
md"""
![yeah](https://media1.tenor.com/images/40147f2eac14c0a7f18c34ecba73fa34/tenor.gif?itemid=7805520)
"""
# ![yeah](https://pic.chinesefontdesign.com/uploads/2017/03/chinesefontdesign.com_2017-03-07_08-19-24.gif)


# ╔═╡ fcca27ba-d4a4-11ea-213a-c3e2305869f1
#**1. The bundle adjustment jacobian benchmark**
#$(LocalResource("ba-origin.png"))
#![ba](https://github.com/JuliaReverse/NiBundleAdjustment.jl/raw/master/benchmarks/adbench.png)

#**2. The Gaussian mixture model benchmark**
#$(LocalResource("gmm-origin.png"))
#![gmm](https://github.com/JuliaReverse/NiGaussianMixture.jl/raw/master/benchmarks/adbench.png)

md"""
# Sec III. Applications in real world and benchmarks
"""

# ╔═╡ 519dc834-e092-11ea-2151-57ef23810b84
md"""
## 1. Bundle Adjustment (Jacobian)
![bundle adjustment](https://encrypted-tbn0.gstatic.com/images?q=tbn%3AANd9GcRgpGSCWRjHDDaIYQX5ejhMvyKY_GFhynVoQg&usqp=CAU)

*Srajer, Filip, Zuzana Kukelova, and Andrew Fitzgibbon. "A benchmark of selected algorithmic differentiation tools on some problems in computer vision and machine learning." Optimization Methods and Software 33.4-6 (2018): 889-906.*

### Benchmarks
**Devices**
* CPU: Intel(R) Xeon(R) Gold 6230 CPU @ 2.10GHz
* GPU: Nvidia Titan V. 

**Github Repos** 
* [https://github.com/microsoft/ADBench](https://github.com/microsoft/ADBench)
* [https://github.com/JuliaReverse/NiBundleAdjustment.jl](https://github.com/JuliaReverse/NiBundleAdjustment.jl)
"""

# ╔═╡ c89108f0-e092-11ea-0fe2-efad85008b28
html"""
<div style="float: left"><img src="https://adbenchresults.blob.core.windows.net/plots/2020-03-29_15-46-08_70e2e936bea81eebf0de78ce18d4d196daf1204e/static/jacobian/BA%20[Jacobian]%20-%20Release%20Graph.png" width=500/></div>
"""

# ╔═╡ 2ec4c700-e093-11ea-06ff-47d2c21a068f
md"""##### NiLang.AD and Tapenade
![](https://user-images.githubusercontent.com/6257240/116341804-907a9480-a7af-11eb-934f-7eb94803f5f2.png)"""

# ╔═╡ 474aa228-e092-11ea-042b-bdfaeb99f16f
md"""
## 2. Gaussian Mixture Model (Gradient)
![gmm](https://prateekvjoshi.files.wordpress.com/2013/06/multimodal.jpg)
"""

# ╔═╡ 2baaff10-d56c-11ea-2a23-bfa3a7ae2e4b
md"""
### Benchmarks
*Srajer, Filip, Zuzana Kukelova, and Andrew Fitzgibbon. "A benchmark of selected algorithmic differentiation tools on some problems in computer vision and machine learning." Optimization Methods and Software 33.4-6 (2018): 889-906.*

**Devices**
* CPU: Intel(R) Xeon(R) Gold 6230 CPU @ 2.10GHz

**Github Repos** 
* [https://github.com/microsoft/ADBench](https://github.com/microsoft/ADBench)
* [https://github.com/JuliaReverse/NiGaussianMixture.jl](https://github.com/JuliaReverse/NiGaussianMixture.jl)
"""

# ╔═╡ 102fbf2e-d56b-11ea-189d-c78d56c0a924
html"""
<h5>Results from the original benchmark<h5>
<img src="https://adbenchresults.blob.core.windows.net/plots/2020-03-29_15-46-08_70e2e936bea81eebf0de78ce18d4d196daf1204e/static/jacobian/GMM%20(10k)%20[Jacobian]%20-%20Release%20Graph.png" width=5000/>
"""

# ╔═╡ cc0d5622-d788-11ea-19cd-3bf6864d9263
md"""##### Including NiLang.AD
![](https://github.com/JuliaReverse/NiLangTutorial/blob/master/notebooks/asset/benchmarks_gmm.png?raw=true)"""

# ╔═╡ a1646ef0-e091-11ea-00f1-e7c246e191ff
md"## 3. Solve the memory wall problem in machine learning"

# ╔═╡ b18b3ae8-e091-11ea-24a1-e968b70b217c
html"""
Learning a ring distribution with NICE network, before and after training

<img style="float:left" src="https://giggleliu.github.io/NiLang.jl/dev/asset/nice_before.png" width=340/>
<img src="https://giggleliu.github.io/NiLang.jl/dev/asset/nice_after.png" width=340/>

<h5>References</h5>
<ul>
<li><a href="https://arxiv.org/abs/1410.8516">arXiv: 1410.8516</li>
<li><a href="https://giggleliu.github.io/NiLang.jl/dev/examples/nice/#NICE-network-1">NiLang's documentation</a></li>
</ul>
"""

# ╔═╡ bf3774de-e091-11ea-3372-ef56452158e6
md"""
## 4. Solve the spinglass ground state configuration
Obtaining the optimal configuration of a spinglass problem on a $28 \times 28$ square lattice.

![](https://user-images.githubusercontent.com/6257240/116342088-067efb80-a7b0-11eb-935e-0b5e29010a22.png)

##### References
Jin-Guo Liu, Lei Wang, Pan Zhang, **arXiv 2008.06888**
"""

# ╔═╡ c8e4f7a6-e091-11ea-24a3-4399635a41a5
md"""
## 5. Optimizing problems in finance
Gradient based optimization of Sharpe rate.


600x acceleration comparing with using pure Zygote.

##### References
* Han Li's Github repo: [https://github.com/HanLi123/NiLang](https://github.com/HanLi123/NiLang) and his Zhihu blog [猴子掷骰子](https://zhuanlan.zhihu.com/c_1092471228488634368).
"""

# ╔═╡ bc872296-e09f-11ea-143b-9bfd5e52b14f
md"""## 6. Accelerate the performance critical part of variational mean field

[https://github.com/quantumlang/NiLangTest/pull/1](https://github.com/quantumlang/NiLangTest/pull/1)

600x acceleration comparing with using pure Zygote.
"""

# ╔═╡ e7b21fce-e091-11ea-180c-7b42e00598a9
md"""# Thank you!
Special thanks to my collaborator **Taine Zhao** and (ex-)advisor **Lei Wang**.

QuEra computing (a quantum computing company located in Boston) is hiring people.
"""

# ╔═╡ 7c79975c-d789-11ea-30b1-67ff05418cdb
md"""
![yeah](https://media1.tenor.com/images/40147f2eac14c0a7f18c34ecba73fa34/tenor.gif?itemid=7805520)
"""
# ![yeah](https://pic.chinesefontdesign.com/uploads/2017/03/chinesefontdesign.com_2017-03-07_08-19-24.gif)

# ╔═╡ 5f1c3f6c-d48b-11ea-3eb0-357fd3ece4fc
md"""
## Sec IV. More about number systems

 
* Integers are reversible under (`+=`, `-=`).
* Floating point number system is **irreversible** under (`+=`, `-=`) and (`*=`, `/=`).
* [Fixedpoint number system](https://github.com/JuliaMath/FixedPointNumbers.jl) are reversible under (`+=`, `-=`)
* [Logarithmic number system](https://github.com/cjdoris/LogarithmicNumbers.jl) is reversible under (`*=`, `/=`)
"""

# ╔═╡ 11ddebfe-d488-11ea-223a-e9403f6ec8de
md"""
##### Example 1: Affine transformation with rounding error

```julia
y = A * x + b
```
"""

# ╔═╡ 030e592e-d488-11ea-060d-97a3bb6353b7
@i function reversible_affine!(y!::AbstractVector{T}, W::AbstractMatrix{T}, b::AbstractVector{T}, x::AbstractVector{T}) where T
    @safe @assert size(W) == (length(y!), length(x)) && length(b) == length(y!)
    for j=1:size(W, 2)
        for i=1:size(W, 1)
            @inbounds y![i] += W[i,j]*x[j]
        end
    end
    for i=1:size(W, 1)
        @inbounds y![i] += b[i]
    end
end

# ╔═╡ c8d26856-d48a-11ea-3cd3-1124cd172f3a
begin
	W = randn(10, 10)
	b = randn(10)
	x = randn(10)
end;

# ╔═╡ 37c4394e-d489-11ea-174c-b13bdddbe741
yout, Wout, bout, xout = reversible_affine!(zeros(10), W, b, x)

# ╔═╡ fef54688-d48a-11ea-340b-295b88d21382
# should be restored to 0, but not!
yin, Win, bin, xin = (~reversible_affine!)(yout, Wout, bout, xout)

# ╔═╡ 259a2852-d48c-11ea-0f01-b9634850e09d
md"""
### Reversible arithmetic functions

Computing basic functions like `power`, `exp` and `besselj` is not trivial for reversible programming.
There is no efficient constant memory algorithm using pure fixed point numbers only.
"""

# ╔═╡ f06fb004-d79f-11ea-0d60-8151019bf8c7
md"""
##### Example 2: Computing power function
To compute `x ^ n` reversiblly with fixed point numbers,
we need to either allocate a vector of size $O(n)$ or suffer from polynomial time overhead. It does not show the advantage to checkpointing.
"""

# ╔═╡ 26a8a42c-d7a1-11ea-24a3-45bc6e0674ea
@i function i_power_cache(y!::T, x::T, n::Int) where T
    @routine @invcheckoff begin
        cache ← zeros(T, n)  # allocate a buffer of size n
		cache[1] += x
        for i=2:n
            cache[i] += cache[i-1] * x
        end
    end

    y! += cache[n]

    ~@routine  # uncompute cache
end

# ╔═╡ 399552c4-d7a1-11ea-36bb-ad5ca42043cb
# To check the function
i_power_cache(Fixed43(0.0), Fixed43(0.99), 100)

# ╔═╡ 4bb19760-d7bf-11ea-12ed-4d9e4efb3482
md"""
##### Example 3: reversible thinker, the logarithmic number approach

With **logarithmic numbers**, we can still utilize reversibility. Fixed point numbers and logarithmic numbers can be converted via "a fast binary logarithm algorithm".

##### References
* [1] C. S. Turner, "A Fast Binary Logarithm Algorithm", IEEE Signal Processing Mag., pp. 124,140, Sep. 2010.
"""

# ╔═╡ 5a8ba8f4-d493-11ea-1839-8ba81f86799d
@i function i_power_lognumber(y::T, x::T, n::Int) where T
    @routine @invcheckoff begin
        lx ← one(ULogarithmic{T})
        ly ← one(ULogarithmic{T})
        ## convert `x` to a logarithmic number
        ## Here, `*=` is reversible for log numbers
        lx *= convert(x)
        for i=1:n
            ly *= lx
        end
    end

    ## convert back to fixed point numbers
    y += convert(ly)

    ~@routine
end

# ╔═╡ a625a922-d493-11ea-1fe9-bdd4a694cde0
# To check the function
i_power_lognumber(Fixed43(0.0), Fixed43(0.99), 100)

# ╔═╡ 4fd20ed2-d7a2-11ea-206e-13799234913f
md"**Less allocation, better speed**"

# ╔═╡ 692dfb44-d7a1-11ea-00da-af6550bc0622
@benchmark i_power_cache(Fixed43(0.0), Fixed43(0.99), 100)

# ╔═╡ 7e4ee09c-d7a1-11ea-0e56-c1921012bc30
@benchmark i_power_lognumber(Fixed43(0.0), Fixed43(0.99), 100)

# ╔═╡ 4c209bbe-d7b1-11ea-0628-33eb8d664f5b
md"""##### Example 4: The first kind Bessel function computed with Taylor expansion
```math
J_\nu(z) = \sum\limits_{n=0}^{\infty} \frac{(z/2)^\nu}{\Gamma(k+1)\Gamma(k+\nu+1)} (-z^2/4)^{n}
```


"""

# ╔═╡ fd44a3d4-d7a4-11ea-24ea-09456ff2c53d
@i function ibesselj(y!::T, ν, z::T; atol=1e-8) where T
	if z == 0
		if ν == 0
			out! += 1
		end
	else
		@routine @invcheckoff begin
			k ← 0
			@ones ULogarithmic{T} lz halfz halfz_power_2 s
			@zeros T out_anc
			lz *= convert(z)
			halfz *= lz / 2
			halfz_power_2 *= halfz ^ 2
			# s *= (z/2)^ν/ factorial(ν)
			s *= halfz ^ ν
			for i=1:ν
				s /= i
			end
			out_anc += convert(s)
			while (s.log > -25, k!=0) # upto precision e^-25
				k += 1
				# s *= 1 / k / (k+ν) * (z/2)^2
				@routine begin
					@zeros Int kkv kv
					kv += k+ ν
					kkv += kv*k
				end
				s *= halfz_power_2 / kkv
				if k%2 == 0
					out_anc += convert(s)
				else
					out_anc -= convert(s)
				end
				~@routine
			end
		end
		y! += out_anc
		~@routine
	end
end

# ╔═╡ 84272664-d7b7-11ea-2e37-dffd2023d8d6
md"z = $(@bind z Slider(0:0.01:10; default=1.0))"

# ╔═╡ 900e2ea4-d7b8-11ea-3511-6f12d95e638a
begin
	y = ibesselj(Fixed43(0.0), 2, Fixed43(z))[1]
	gz = NiLang.AD.gradient(Val(1), ibesselj, (Fixed43(0.0), 2, Fixed43(z)))[3]
end;

# ╔═╡ d76be888-d7b4-11ea-2989-2174682ead76
let
	md"""
| ``z`` | ``y`` | ``\partial y/\partial z`` |
| ----  | ----- | -------- |
| $(@sprintf "%.5f" z) | $(@sprintf "%.5f" y) | $(@sprintf "%.5f" gz) |
"""
end

# ╔═╡ 85c9edcc-d789-11ea-14c8-71697cd6a047
md"""
![yeah](https://media1.tenor.com/images/40147f2eac14c0a7f18c34ecba73fa34/tenor.gif?itemid=7805520)
"""
# ![yeah](https://pic.chinesefontdesign.com/uploads/2017/03/chinesefontdesign.com_2017-03-07_08-19-24.gif)

# ╔═╡ Cell order:
# ╟─1ef174fa-16f0-11eb-328a-afc201effd2f
# ╟─627ea2fb-6530-4ea0-98ee-66be3db54411
# ╟─94b2b962-e02a-11ea-09a5-81b3226891ed
# ╟─a5ee60c8-e02a-11ea-3512-7f481e499f23
# ╟─a11c4b60-d77d-11ea-1afe-1f2ab9621f42
# ╟─e54a1be6-d485-11ea-0262-034c56e0fda8
# ╟─55cfdab8-d792-11ea-271f-e7383e19997c
# ╟─d1628f08-ddfb-11ea-241a-c7e6c1a22212
# ╠═9e509f80-d485-11ea-0044-c5b7e750aacb
# ╟─278ac6b6-e02c-11ea-1354-cd7ecd1099be
# ╠═a28d38be-d486-11ea-2c40-a377b74a05c1
# ╠═e93f0bf6-d487-11ea-1baa-21d51ddb4a20
# ╠═fc932606-d487-11ea-303e-75ca8b7a02f6
# ╟─e3d2b23a-ddfb-11ea-0f5e-e72ed299bb45
# ╟─a961e048-ddf2-11ea-0262-6d19eb82b36b
# ╟─2d22f504-ddf1-11ea-28ec-5de6f4ee79bb
# ╟─7d08ac24-e143-11ea-2085-539fd9e35889
# ╠═9fcdd77c-e0df-11ea-09e6-49a2861137e5
# ╟─0a1a8594-ddfc-11ea-119a-1997c86cd91b
# ╠═0b4edb1a-ddf0-11ea-220c-91f2df7452e7
# ╟─f875ecd6-ddef-11ea-22a1-619809d15b37
# ╠═e7557bee-e0cc-11ea-1788-411e759b4766
# ╟─cd7b2a2e-ddf5-11ea-04c4-f7583bbb5a53
# ╠═bc98a824-ddf5-11ea-1a6a-1f795452d3d0
# ╠═05f8b91c-e0cd-11ea-09e3-f3c5c0e07e63
# ╟─ac302844-e07b-11ea-35dd-e3e06054401b
# ╠═b722e098-e07b-11ea-3483-01360fb6954e
# ╟─bf8b722c-dfa4-11ea-196a-719802bc23c5
# ╟─330edc28-dfac-11ea-35a5-3144c4afbfcf
# ╠═0a679e04-dfa7-11ea-0288-a1fa490c4387
# ╠═cc32cae8-dfab-11ea-0d0b-c70ea8de720a
# ╟─b4240c16-dfac-11ea-3a40-33c54436e3a3
# ╠═ade52358-dfac-11ea-2dd3-d3a691e7a8a2
# ╠═d86e2e5e-dfab-11ea-0053-6d52f1164bc5
# ╟─7951b9ec-e030-11ea-32ee-b1de49378186
# ╟─6bc97f5e-dfad-11ea-0c43-e30b6620e6e8
# ╠═80d24e9e-dfad-11ea-1dae-49568d534f10
# ╠═a8092b18-dfad-11ea-0989-474f37d05f73
# ╟─43f0c2fc-e030-11ea-25d9-b323e6496a35
# ╟─b4ad5830-dfad-11ea-0057-055dda8cc9be
# ╠═cf576d38-dfad-11ea-2682-7bd540db44a5
# ╠═35fff53c-dfae-11ea-3602-918a17d5a5fa
# ╟─9b9b5328-e030-11ea-1d00-f3341572734a
# ╟─f3b87892-e080-11ea-353d-8d81c52cf9ac
# ╠═b27a3974-e030-11ea-0bcd-7f7035d55165
# ╠═e5d47096-e030-11ea-1e87-5b9b1dbecfe0
# ╟─9c62289a-dfae-11ea-0fe0-b1cb80a87704
# ╟─88838bce-dfaf-11ea-1a72-7d15629cfcb0
# ╠═a593f970-dfae-11ea-2d79-876030850dee
# ╠═f448548e-dfaf-11ea-05c0-d5d177683445
# ╟─65cd13ca-e031-11ea-3fc6-977792eb5f8c
# ╟─53c02100-e08f-11ea-1f5d-8b2311b095d2
# ╟─75751b24-e0b8-11ea-2b37-9d138121345c
# ╠═76b84de4-e031-11ea-0bcf-39b86a6b4552
# ╠═b1984d24-e031-11ea-3b13-3bd0119a2bcb
# ╟─7f163d82-e0b8-11ea-2fe7-332bb4dee586
# ╠═ddc6329e-e031-11ea-0e6e-e7332fa26e22
# ╠═f3d5e1b0-e031-11ea-1a90-7bed88e28bad
# ╟─ab67419a-dfae-11ea-27ba-09321303ad62
# ╟─d5c2efbc-d779-11ea-11ad-1f5873b95628
# ╟─30af9642-e084-11ea-1f92-b52abfddcf06
# ╟─db1fab1c-e084-11ea-0bf0-b1fbe9e74b3f
# ╟─e1370f80-e0bc-11ea-2a90-d50cc762cbcb
# ╠═3098411c-e0bc-11ea-2754-eb0afbd663de
# ╟─3d0150ee-e0bd-11ea-0a5a-339465b496dc
# ╟─8016ff94-e0bc-11ea-3b9e-4f0676587edf
# ╟─99108ace-e0bc-11ea-2744-d1b18db50ae1
# ╟─b2337f26-e0bb-11ea-3da0-9507c35101ae
# ╟─48db515c-e084-11ea-2eec-018b8545fa34
# ╟─f531f556-e083-11ea-2f7e-77e110d6c53a
# ╟─62643fbc-e084-11ea-1b1f-39b87ff32b9e
# ╟─0bf54b08-e084-11ea-3d11-7be65f3ec022
# ╟─15f7c60a-e08e-11ea-31ea-a5cd055644db
# ╟─55a3a260-d48e-11ea-06e2-1b7bd7bba6f5
# ╟─38014ad0-e08e-11ea-1905-198038ab7e5f
# ╟─2e6fe4da-d79d-11ea-1e90-f5215190395c
# ╠═6560c28c-e08e-11ea-1094-d333b88071ce
# ╠═37ed073a-d492-11ea-156f-1fb155128d0f
# ╠═744dd3c6-d492-11ea-0ed5-0fe02f99db1f
# ╟─f72246f8-e08e-11ea-3aa0-53f47a64f3e9
# ╠═f025e454-e08e-11ea-20d6-d139b9a6b301
# ╠═4d75f302-d492-11ea-31b9-bbbdb43f344e
# ╠═8fedd65a-e08e-11ea-27f4-03bf9ed65875
# ╠═8ad60dc0-d492-11ea-2cb3-1750b39ddf86
# ╟─7bab4614-d77e-11ea-037c-8d1f432fc3b8
# ╟─fcca27ba-d4a4-11ea-213a-c3e2305869f1
# ╟─519dc834-e092-11ea-2151-57ef23810b84
# ╟─c89108f0-e092-11ea-0fe2-efad85008b28
# ╟─2ec4c700-e093-11ea-06ff-47d2c21a068f
# ╟─474aa228-e092-11ea-042b-bdfaeb99f16f
# ╟─2baaff10-d56c-11ea-2a23-bfa3a7ae2e4b
# ╟─102fbf2e-d56b-11ea-189d-c78d56c0a924
# ╟─cc0d5622-d788-11ea-19cd-3bf6864d9263
# ╟─a1646ef0-e091-11ea-00f1-e7c246e191ff
# ╟─b18b3ae8-e091-11ea-24a1-e968b70b217c
# ╟─bf3774de-e091-11ea-3372-ef56452158e6
# ╟─c8e4f7a6-e091-11ea-24a3-4399635a41a5
# ╟─bc872296-e09f-11ea-143b-9bfd5e52b14f
# ╟─e7b21fce-e091-11ea-180c-7b42e00598a9
# ╟─7c79975c-d789-11ea-30b1-67ff05418cdb
# ╟─5f1c3f6c-d48b-11ea-3eb0-357fd3ece4fc
# ╟─11ddebfe-d488-11ea-223a-e9403f6ec8de
# ╠═030e592e-d488-11ea-060d-97a3bb6353b7
# ╠═c8d26856-d48a-11ea-3cd3-1124cd172f3a
# ╠═37c4394e-d489-11ea-174c-b13bdddbe741
# ╠═fef54688-d48a-11ea-340b-295b88d21382
# ╟─259a2852-d48c-11ea-0f01-b9634850e09d
# ╟─f06fb004-d79f-11ea-0d60-8151019bf8c7
# ╠═26a8a42c-d7a1-11ea-24a3-45bc6e0674ea
# ╠═399552c4-d7a1-11ea-36bb-ad5ca42043cb
# ╟─4bb19760-d7bf-11ea-12ed-4d9e4efb3482
# ╠═5a8ba8f4-d493-11ea-1839-8ba81f86799d
# ╠═a625a922-d493-11ea-1fe9-bdd4a694cde0
# ╟─4fd20ed2-d7a2-11ea-206e-13799234913f
# ╠═692dfb44-d7a1-11ea-00da-af6550bc0622
# ╠═7e4ee09c-d7a1-11ea-0e56-c1921012bc30
# ╟─4c209bbe-d7b1-11ea-0628-33eb8d664f5b
# ╠═fd44a3d4-d7a4-11ea-24ea-09456ff2c53d
# ╟─84272664-d7b7-11ea-2e37-dffd2023d8d6
# ╠═900e2ea4-d7b8-11ea-3511-6f12d95e638a
# ╟─d76be888-d7b4-11ea-2989-2174682ead76
# ╟─85c9edcc-d789-11ea-14c8-71697cd6a047


================================================
FILE: notebooks/documentation.jl
================================================
### A Pluto.jl notebook ###
# v0.14.5

using Markdown
using InteractiveUtils

# ╔═╡ d941d6c2-55bf-11eb-0002-35c7474e4050
using NiLang, Test

# ╔═╡ 2061b434-0ad1-46eb-a0c7-1a5f432bfa62
begin
	twocol(left, right; llabel="forward", rlabel="backward") = HTML("
<table style=\"border:0px\"  class=\"normal-table\" width=80%>
	<tr>
	<td align='center' style='font-family:verdana; background-color: white;'>$llabel</td>
	<td align='center' style='font-family:verdana; background-color: white;'>$rlabel</td>
	</tr>
<tr style=\"background-color: white;\">
<td>
$(html(left))
</td>
<td>
$(html(right))
</td>
</tr>
</table>
")
	example(str) = HTML("""<h6 class="example">$str</h6>""")
	title1(str) = HTML("""<div class="root"><h2 id=$(replace(str, ' '=>'_'))>$str</h2></div>""")
	title2(str) = HTML("""<h4 id=$(replace(str, ' '=>'_'))>$str</h4>""")
	titleref(str) = HTML("""<a href="#$(replace(str, ' '=>'_'))">$str</a>""")
	using PlutoUI: TableOfContents
	using Pkg
	pkgversion(m::Module) = Pkg.TOML.parsefile(NiLang.project_relative_path("Project.toml"))["version"]
	hightlight(str) = HTML("<span style='background-color:yellow'>$str</span>")
end;

# ╔═╡ 8c2c4fa6-172f-4dde-a279-5d0aecfdbe46
module M
using NiLang

# define two functions
function new_forward(x)
	if x > 0
		return x * 2
	elseif x < 0
		return x / 2
	end
end

function new_backward(x)
	if x > 0
		return x / 2
	elseif x < 0
		return x * 2
	end
end

# declare them as reversible to each other
@dual new_forward new_backward

# The following is need only when your function is differentiable
using NiLang.AD: GVar
function new_backward(x::GVar)
	if x.x > 0
		GVar(new_backward(x.x), x.g * 2)
	elseif x.x < 0
		GVar(new_backward(x.x), x.g / 2)
	end
end

function new_forward(x::GVar)
	if x.x > 0
		GVar(new_forward(x.x), x.g / 2)
	elseif x.x < 0
		GVar(new_forward(x.x), x.g * 2)
	end
end
end

# ╔═╡ 3199a048-7b39-40f8-8183-6a54cccd91b6
using BenchmarkTools

# ╔═╡ 0e1ba158-a6bc-401c-9ba7-ed78020ad068
using Base.Threads

# ╔═╡ a4e76427-f051-4b29-915a-fdfce3a299bb
html"""
<div align="center">
<a class="Header-link " href="https://github.com/GiggleLiu/NiLang.jl" data-hotkey="g d" aria-label="Homepage " data-ga-click="Header, go to dashboard, icon:logo">
  <svg class="octicon octicon-mark-github v-align-middle" height="32" viewBox="0 0 16 16" version="1.1" width="32" aria-hidden="true"><path fill-rule="evenodd" d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"></path></svg>
</a>
<br>
<a href="https://raw.githubusercontent.com/GiggleLiu/NiLang.jl/master/notebooks/documentation.jl" target="_blank" download>Download this notebook</a>
</div>
<style>

body {
counter-reset: section subsection example}

h2::before {
counter-reset: subsection;
  counter-increment: section;
  content: "Section " counter(section) ": ";
}
h4::before {
  counter-increment: subsection;
  content: "⋄ ";
}

h6.example::before {
  counter-increment: example;
  content: "Example "counter(example) ": ";
}
</style>
"""

# ╔═╡ c2c7b4d4-f8c9-4ebf-8da2-0103f03136e7
md"# NiLang's (v$(pkgversion(NiLang))) Documentation

NiLang is a embeded domain specific language (eDSL) in Julia, so one need to install [Julia](https://julialang.org/) first. Before reading this documentation, you need to know basic Julia grammar, and how to install and use packages.
Also, it might be good to read the [README](https://github.com/GiggleLiu/NiLang.jl) first. In this tutorial, we focus on

* NiLang grammar and design patterns,
* Automatic differentiation based on reversible programming.
"

# ╔═╡ 12f07cc7-979c-43c3-9dc9-36ea1463c1f6
md"The symbols used in this notebook"

# ╔═╡ 611b577f-4722-42bf-8f8e-aeb2fb30be71
md"""
|  symbol  |  meaning  | how to type |
| -------  | --------- | ----------- |
| ← | allocate |  \leftarrow + TAB |
| → | deallocate | \rightarrow + TAB |
| ↔ | exchange | \leftrightarrow + TAB |
| ∅ | empty variable | \emptyset + TAB |
| ~ | inverse | ~ |
"""

# ╔═╡ 605872cf-f3fd-462e-a2b1-7d1c5ae45efd
title1("Getting started")

# ╔═╡ fb3dee44-5fa9-4773-8b7f-a83c44358545
md"
After installing NiLang in a Julia REPL by typing `]add NiLang`, one can use NiLang and use the macro `@i` to define a reversible function ``f_1: (x, y) → (x+5y, y)``."

# ╔═╡ 70088425-6779-4a2d-ba6d-b0a34c8e93a6
@i @inline function f1(x, y; constant)
	x += y * constant
end

# ╔═╡ af738f89-3214-429c-9c7d-18a6ea0d9401
f1(1.0, 2.0; constant=5.0)  # call

# ╔═╡ 48d7ebc1-5def-4a57-9ec1-3fc370a4543f
 (~f1)(11.0, 2.0; constant = 5.0)  # uncall

# ╔═╡ f0e94247-f615-472b-8218-3fa287b38aa1
md"A NiLang function defines a ``\mathbb{R}^n\rightarrow\mathbb{R}^n`` (notice inputs and outputs have the same shapes) mapping, it can take both keyword and positional arguments, where positional arguments can only be used as constants. There is no `return` statement because this function returns input non-keyword variables automatically, it is forbiden to write `return` statement inside a NiLang function.
One can aslo put macros like `@inline` after NiLang's `@i` macro. NiLang's macro will render the body of the function first and pass it to other macros.
"

# ╔═╡ 2581aa33-1dc5-40b1-aa9f-6a11cc750c93
md"`x += y * constant` is an instruction that defines a bijective mapping ``\mathbb{R}^3\rightarrow\mathbb{R}^3``. All NiLang instructions change the variable inplace. Here, we accumulate the result to `x` rather than using `y *= constant` to modify the variable directly. This is because in a regular number system, one can easily use the zero element as the eraser to erase all information, which will cause irreversibility."

# ╔═╡ 60575978-081a-4bca-a3ed-2b51cd6abc92
md"One can differentiate a NiLang function with the `NiLang.gradient(f, args; iloss, kwargs...)` where `args` and `kwargs` are positional and keyword arguments for `f`, and `iloss` is the index of the loss variable."

# ╔═╡ f98305cb-4ba2-404a-a5c3-65510e059504
NiLang.AD.gradient(f1, (1.0, 2.0); iloss=1, constant=5.0)

# ╔═╡ e8cd6667-597f-458b-8465-1822e09a7891
md"Here, we specify the first variable as the one that stores the loss. We get
```math
\begin{cases}
\frac{\partial x+5y}{\partial x}=1\\
\frac{\partial x+5y}{\partial y}=5\\
\end{cases}
```
"

# ╔═╡ 20145d75-004a-4c2f-b7ff-c400ca846d42
let
	content = md"""The above macro generates two functions, one is `f` and another is `~f` (or `Inv(f)`). The `x += y * constant` is translated to function `(x, y, constant) = PlusEq(*)(x, y, constant)`, where the function `PlusEq(*)` is bijective.

```julia
julia> using MacroTools

julia> MacroTools.prettify(@macroexpand @i function f(x, y; constant)
           x += y * constant
       end)
quote
    $(Expr(:meta, :doc))
    function $(Expr(:where, :(f(x, y; constant))))
        hare = wrap_tuple(((PlusEq)(*))(x, y, constant))
        x = hare[1]
        y = hare[2]
        constant = hare[3]
        (x, y)
    end
    if (NiLangCore)._typeof(f) != _typeof(~f)
        function $(Expr(:where, :((newt::_typeof(~f))(x, y; constant))))
            boar = wrap_tuple(((MinusEq)(*))(x, y, constant))
            x = boar[1]
            y = boar[2]
            constant = boar[3]
            (x, y)
        end
    end
end
```
"""
	HTML("<details><summary><strong>How does the compiler work?</strong></summary>$(html(content))</details>")
end

# ╔═╡ c682a17f-600f-4034-bfe3-a851ab645c10
title1("Instructions and operands")

# ╔═╡ 5239dfe2-ea6d-4e07-a1b1-90954fe8ddc9
md"""
The basic form of an instruction is `y ⊙= f(args...)`, where `⊙` can be `+`, `-`, `*`, `/` or `⊻`, where `*=` and `/=` are only reversible in the logarithmic number systems. See section $(titleref("Integers, floating-point numbers, fixed-point numbers and logarithmic numbers")) for details.

A function/instruction can be used in NiLang only if its reverse is defined. A NiLang function is differentiable if the function body is composed of differentiable instructions, differentiable NiLang functions and NiLang control flows. A list of differentiable NiLang instructions are

| instruction | output   |
| ----------- | ---------- |
| ``{\rm FLIP}(y)`` | ``\sim y`` |
| ``{\rm NEG}(y)`` | ``-y`` |
| ``{\rm INC}(y)`` | ``y+1`` |
| ``{\rm DEC}(y)`` | ``y-1`` |
| ``{\rm INV}(y)`` | ``y ^ {-1}`` |
| ``{\rm HADAMARD}(x, y)`` | ``\frac{1}{\sqrt{2}}(x+y), \frac{1}{\sqrt{2}}(x-y)``
| ``{\rm SWAP}(a, b)`` | ``b, a`` |
| ``{\rm ROT}(a, b, \theta)`` | ``a \cos\theta - b\sin\theta, b \cos\theta + a\sin\theta, \theta`` |
| ``{\rm IROT}(a, b, \theta)`` | ``a \cos\theta + b\sin\theta, b \cos\theta - a\sin\theta, \theta`` |
| ``y \mathrel{\{+,-\}}= f_{+-}(args...)`` | ``y\{+, -\}f_{+-}(args...), args...`` |
| ``y \mathrel{\{*, /\}}= f_{*/}(args...)`` | ``y\{*, /\}f_{*/}(args...), args...`` |

Functions ``f_{+-} ∈ \rm \{identity, +, -, *, /, ^\wedge, abs, abs2, sqrt, exp, log, sin, sinh, asin, cos, cosh,`` ``\rm acos, tan, tanh, atan, sincos, convert\}`` and ``f_{*/}∈\rm \{identity, +, -, *, /, ^\wedge, convert\}.`` 
Functions `FLIP`, `NEG`, `INV`, `HADAMARD`, `SWAP` and `y ⊻= f_{⊻}(args...)` are self-reversible (or reflexive). {`ROT`, `IROT`} and {`INC`, `DEC`}, {`y += f_{+-}(args...)`, `y -= f_{+-}(args...)`} and {`y *= f_{*/}(args...)`, `y /= f_{*/}(args...)`} are pair-wise reversible.

For Jacobians and Hessians defined on these instructions, please check this [blog post](https://giggleliu.github.io/2020/01/18/jacobians.html).
This set of instructions is extensible, see section $(titleref("Extending the instruction set")) for an example.
"""

# ╔═╡ e4d86a5a-e820-4a70-8a87-08bac416291b
md"The operands of an instruction can be a composition of the following expressions"

# ╔═╡ c307b6a4-906d-4be7-9fd7-57c942aded51
md"""

| expression | meaning |
| ---------- | ------- |
| x | change a variable |
| x.field | change a field |
| x.:1    | change a tuple element |
| x'      | change the adjoint |
| -x      | change the inverse |
| x[i]    | change an array/dict element |
| (x, y, z)  | change multiple variables |
| @fields x | change the fields of an object |
| A{Float64}(x, y) | wrap, update fields and unwrap |
| x \|> subarray(:,i) | change the view of an array |
| x \|> f | change a field map or a bijective mapping |
| @const 3 | target is a constant that can not be changed |
| @skip! f(x) | an expression that can not be assigned back |
"""

# ╔═╡ 47b502d4-e8af-4d58-9067-9700784ea435
md"In the following example, one modifies the negated real part of a complex number in a vector inside a tuple directly."

# ╔═╡ faecc0a7-55d7-42a1-8e9f-7e30143eef9c
@i function dataview_func1(x::Vector, y::Tuple, θ)
	(-x[2].re, y.:1) += sincos(θ)
end

# ╔═╡ 4cbe69d6-b68f-4bda-a0dd-209f9ee54f18
dataview_func1([1+2.0im, 3+4im], (1.0,2.0), π/6)

# ╔═╡ 7dc82f28-77bf-40da-b520-800ed1bc80c9
md"""One can check the real part of the second element of `x` is decreased by `sin(π/6)`, while the first element of the tuple is increased by `cos(π/6)`. A NiLang instruction is different to a regular Julia instruction in that $(hightlight("A NiLang instruction changes variables inplace")), eventhough `ComplexF64`, `-x` and `tuple` are all considered as immutable in Julia."""

# ╔═╡ 9f5f9de3-9558-4c18-9d98-b77d19b570ec
example("Complex valued log")

# ╔═╡ 6dfcfa19-f78f-4dac-89f7-d3c5dbe17987
@i function complex_log(y!::Complex{T}, x::Complex{T}) where T
    n ← zero(T)
    n += abs(x)

    y!.re += log(n)
    y!.im += angle(x)

    n -= abs(x)
    n → zero(T)
end; @test complex_log(0.0im, 3.0+2.0im)[1] ≈ log(3.0+2.0im)

# ╔═╡ dad1c6c0-d61b-4f9f-a71e-e683fe143aaa
title2("Broadcasting")

# ╔═╡ 3e4a3916-8fe4-4262-bcd3-3014822717a3
md"The interfaces is similar to julia's native broadcast, but expanded to native NiLang loops."

# ╔═╡ 34906208-e6f1-4a67-860a-a7b056a86dde
@i function complex_log_broadcast!(ys!, xs)
	complex_log.(ys!, xs)
end; @test (x=[1.0+2im, 3.0+4im, 4.0+5im]; complex_log_broadcast!(zeros(ComplexF64, 3), x)[1] ≈ log.(x))

# ╔═╡ 4601df35-679f-465d-9191-c18748b2fd83
title2("Avoid shared read-write")

# ╔═╡ 4fc72b9d-19a2-40f1-a4a8-5e97d3d5e529
md"Shared read-write is not allowed"

# ╔═╡ a0fde16f-8454-4f5c-a29c-a9e415c0c311
# `y -= y` effectively clears the content in `y`, this is why shared read-write is so dangerous.
@test_throws LoadError macroexpand(NiLang, :(@i function shared_readwrite_error(y)
	y -= y
end))

# ╔═╡ 633ff8f3-8d93-4f73-bec2-c42070e6ece9
md"NiLang is more restrictive, it forbids shared read too. This is in purpose, shared read will become shared write to the gradient field in the back-propagation of gradients - the main goal of NiLang."

# ╔═╡ 57f2d890-b5a0-47b7-9e3d-af4d03b10605
# `shared read is also forbidden`
@test_throws LoadError macroexpand(NiLang, :(@i function shared_read_error(x, y)
	x -= y * y  # should be written as `x -= y^2`
end))

# ╔═╡ 34063cd0-171e-46ce-80dd-52a341fa50a1
md"The correct way of avoiding shared read is renaming one of the variable."

# ╔═╡ 5d5d01db-8ff9-434c-8771-1fec6393e1fb
@i function avoid_shared_read(x, y)
	tmp ← zero(y)
	tmp += y
	x -= y * tmp
	tmp -= y
	tmp → zero(y)
end

# ╔═╡ 10d85a50-f2f9-403e-8f6c-baef61cf702a
avoid_shared_read(0.0, 3.0)

# ╔═╡ b52648bf-a28a-48af-8912-31729d943ce0
md"Shared read-write issue is more tricky when one uses NiLang to write kernel function in a parallel program (multi-threading, MPI and CUDA). See $(titleref(\"Multi-threading and CUDA\")) for details."

# ╔═╡ f45db10f-a836-40f3-9d8d-054ea6540e87
title2("Protect constant variables")

# ╔═╡ 1903563e-ccc2-44d9-9dbe-e5dede275b3c
md"""
To achieve the goal of "everything is mutable", NiLang assigns the output value back to the input variable after a call. Sometime it can cause issues for special variables like function, type parameters and constants (including the results generated from a function call). One can use `@const` (assert a variable is a constant) or `@skip!` (skip assigning back) to avoid such complications.
"""

# ╔═╡ 583f2585-15a3-47c6-a70e-e2f002754028
md"When using a function (e.g. `exp` as shown bellow) as an variable, one should be careful about the scope issue."

# ╔═╡ 60e6ff80-3593-4ae4-a273-914847f692db
@i function func_arg(y, f, x)
	y += f(x)
end

# ╔═╡ 9e5cfd68-b58d-4d83-aae2-447e5f805c97
@i function use_func_arg(y, x)
	func_arg(y, exp, x)
end

# ╔═╡ dc85a942-cf52-4405-ad03-32a768e1b6e7
@test_throws UndefVarError use_func_arg(0.0, 3.0)

# ╔═╡ 5cdd346b-10a5-485c-ba78-4c0b3cb0e02f
md"We see an error, but why calling `f2g` causes an error? If one check the generated code with `macroexpand`, one will see the `exp` is assigned in the local scope. The compiler takes it as a local variable and compaints that `exp` is not defined."

# ╔═╡ af9287b7-6131-46f6-beb8-6885e55e1975
macroexpand(NiLang, :(@i function use_func_arg(y, x)
	f2g(y, exp, x)
end)) |> NiLangCore.rmlines

# ╔═╡ e20eeabf-1c80-431e-8cfc-4d1b79c52b5a
@i function avoid_assignback(y, x)
	func_arg(y, (@const exp), x)
end; @test avoid_assignback(0.0, 3.0)[1] == exp(3)

# ╔═╡ 90d30eea-53de-48a0-9700-ff35681fdf38
md"Type parameter can not be assigned back too."

# ╔═╡ 390f58a5-6f5f-4d3a-bb16-ba04e43a07e7
@test_throws ErrorException Core.eval(NiLang, :(@i function type_arg(t::Type{T}, x) where T
	x += one(T)
end))

# ╔═╡ 2b57443e-a516-434b-be86-80616a98e2f5
@i function avoid_type_assignback(t::Type{T}, x) where T
	x += one(@skip! T)
end; @test avoid_type_assignback(Float64, 0.0)[2] == 1.0

# ╔═╡ fc2e27f9-b7ba-44cc-a953-6745548ad733
md"A function call that returning a constant should also be decorated with the `@const` or `@skip!` macro."

# ╔═╡ fc744931-360b-4478-9f77-c50f048de243
@test_throws LoadError macroexpand(NiLang, :(@i function funccall_arg(y, x::Matrix) where T
	y += size(x, 1) * size(x, 2)
end))

# ╔═╡ 9a152b36-f377-44da-9700-ca9e05e365ff
@i function avoid_funccall_assignback(y, x::Matrix) where T
	y += (@const size(x, 1)) * (@const size(x, 2))
end; @test avoid_funccall_assignback(0, randn(3,4))[1] == 12

# ╔═╡ b6dcd18c-606f-4340-b2ec-163e8bad03f5
title1("Variable manipulation")

# ╔═╡ a1a29f34-f8a9-4e9f-9afe-7d0096771440
title2("Allocate and deallocate a variable")

# ╔═╡ 90bd6ad4-3dd8-4e7c-b445-aed1d248a2ec
md"""
One can allocate a new variable `x` like `x ← constant` and deallocate a variable with known value with `x → known value`. They are reversible to each other with the following relation.

$(
twocol(md"
```julia
x ← constant
```", md"
```julia
x → constant
```
")
)

For example
"""

# ╔═╡ c0259e48-1973-486c-a828-1fcd3e4331c6
@i function alloc_func1()
	tmp ← 1
	# some code that uses `tmp` for computing and restores it to `1`
	tmp → 1
end

# ╔═╡ 8bbffa31-04a6-49ca-b36f-4d4140d75992
md"allocate multiple variable of the same type at one time"

# ╔═╡ a6f18c34-80ee-4b52-9ff8-f3c1b1d80f90
@i function power12(y, x::T) where T
	@zeros T a b c  # three variable of type `T`
	a += x^2
	b += a^2
	c += b*a
	y += c^2
	c -= b*a
	b -= a^2
	a -= x^2
	@safe @show a b c x y
	~@zeros T a b c
end

# ╔═╡ a694132b-4f52-467f-8bc4-dc32fe2812db
@test power12(0, 2)[1] == 4096

# ╔═╡ 8c2c82f2-1240-4f2f-830e-ee8021c1a41a
md"One can copy and push a value into a stack and use it later. It inverse operation will pop out a variable and assert its value."

# ╔═╡ 6203cf10-f8cc-4fb9-b814-7552b68c01dc
twocol(md"
```julia
stack[end+1] ← variable
```", md"
```julia
stack[end] → variable
```
")

# ╔═╡ f97a6bab-b9f9-4b95-98a9-381c51397526
@i function stack_push_and_pop!(stack, x, y, z)
	z += y
	stack[end+1] ← x  # copy a variable into a stack
	stack[end+1] ← y 
	stack[end] → z  # pop a variable from a stack, `z` must have the same value as the variable.
end

# ╔═╡ 2a2970f4-ab01-486b-89a2-6ff96f734018
md"A less recommended approach is using the global stacks in NiLang, since NiLang is an eDSL, it can not guarantee the access order. Available global stacks are `FLOAT64_STACK`, `COMPLEXF64_STACK`, `INT64_STACK` and their 32 bit counter parts, as well as a `BOOL_STACK`."

# ╔═╡ 0b80d9be-53d7-4bf3-a558-659607af4709
@i function stack_push_and_pop!(x, y)
	GLOBAL_STACK[end+1] ← x  # copy a variable into a stack
	FLOAT64_STACK[end+1] ← y 
end

# ╔═╡ 4ca48a2e-43da-457a-8e9f-6476097e4d7b
let
	stack = FastStack{Float64}(1000) # a preallocated stack of size 1000
	stack_push_and_pop!(stack, 5.0, 1.0, 0.0)  # you will get a stack of size 1
	@test length(stack) == 1
end

# ╔═╡ 92362fda-bae2-4e35-bfe4-dcaea853d50b
let
	NiLang.empty_global_stacks!()  # empty stacks
	stack_push_and_pop!(5.0, 1.0)
	@test length(GLOBAL_STACK) == 1
	@test length(FLOAT64_STACK) == 1
end

# ╔═╡ db9e7940-39f1-4ccf-ac70-146a521daa6e
md"one can also allocate and deallocate on dicts"

# ╔═╡ 93936612-1447-4114-b864-aba43adef4bd
md"""
The forward pass of dictionary allocation adds an entry to the dictionary (cause key error if the key already exists), the backward pass. The backward pass checks the variable in the dictionary is consistent with the asserted variable, and delete the key.

$(
twocol(md"
```julia
dict[key] ← variable
```", md"
```julia
dict[key] → variable
```
")
)
"""

# ╔═╡ b2add70c-c5d6-4e0f-a153-43e21a197181
@i function dict_assign(dict::Dict)
	var1 ← 3.14

	# copy a new variable to a dict
	dict["data"] ← var1
	
	# deallocate the original variables
	var1 → dict["data"]
end

# ╔═╡ 5c03d5a5-99f0-4efd-9a32-ce6d7c2b266c
dict_assign(Dict())

# ╔═╡ 2349e3ea-3053-42a4-b9d9-f97a76e4abd7
title2("Exchange two variables")

# ╔═╡ 269f18ee-3cd8-466a-a522-7c624503e31b
let 
	expr = twocol(md"
```julia
var1 ↔ var2
```", md"
```julia
var1 ↔ var2
```
")
md"""One can exchange two variables is using `↔`.
$expr
"""
end

# ╔═╡ 89139719-c478-4066-9452-f9893f36d561
@i function exchange_func1(x, y)
	x ↔ y
end

# ╔═╡ d620c5ee-7d9c-4d3f-9e87-0c828dfab9ca
exchange_func1(3, 5)

# ╔═╡ 255d01b9-a873-4e63-9298-9d8f073348b0
md"One can also make a \"link\" by exchanging a variable with an empty variable such as `var::∅` and `stack[end+1]`. The forward pass push a variable to the stack and deallocate variable. The backward pass pops a variable and asserts its value."

# ╔═╡ f6cf1729-766c-4ed7-b004-c8c8ec6c7e07
let 
	expr = twocol(md"
```julia
stack[end+1] ↔ var2
var1::∅ ↔ var2
```", md"
```julia
stack[end] ↔ var2::∅
var1 ↔ var2::∅
```
")
end


# ╔═╡ 3645d672-423f-4ac8-805f-0452793fee5a
@i function exchange_func2(x, y)
	anc ← 0.0
	anc += x * y
	anc ↔ z::∅  # declare `z` as an empty variable
	# after exchange, `anc` is empty and deallocated automatically.
	z -= x * y
	z → 0.0
end

# ╔═╡ c2a0024e-11dd-4ef7-8346-4374d98cafc0
exchange_func2(3, 4)

# ╔═╡ b20004e9-3c73-4dfb-8fd5-f377786fd53b
md"When exchanging with a stack top + 1, it means push and deallocate."

# ╔═╡ 5c1952b1-5016-4c87-b23c-8e6a235bf8cd
@i function stack_exchange(stack, y, x)
	stack[end+1] ↔ y  # push a variable into a stack and deallocate `y`
	y ← 1.2  # since `y` is deallocated, you can assign any value to it
	stack[end] ↔ anc::∅  # pop a variable to `anc`
	anc ↔ x    # exchange `anc` and x
	stack[end+1] ↔ anc  # push `anc` back to stack
end

# ╔═╡ 8e4470ee-01da-4547-b091-c4f65cd729b0
let
	stack = FastStack{Float64}(1000) # a preallocated stack
	stack_exchange(stack, 2.0, 3.0)  # you will get a stack of size 2
	@test length(stack) == 1 && stack.data[1] == 3.0
end

# ╔═╡ 0863bd06-cc70-4dde-b3b2-0a466805a356
md"""$(title2("Integers, floating-point numbers, fixed-point numbers and logarithmic numbers"))

A fixed-point zero with 43 fraction bits can be declared as `x ← Fixed43(0.0)` or `x ← zero(Fixed43)`, a logarithmic one can be declared as `x ← ULogarithmic(1.0)`, `x ← one(ULogarithmic{Float64})` or `x ← ULogarithmic(Fixed43(1.0))`. A complex number zero can be defined as `x ← Complex(0.0, 0.0)`.


| Number Type | +=  | *= | ⊻= | Source |
| ---- | ---- | ---- | ---- | ---- |
| boolean | - |  - | ✓ | JuliaLang |
| integer | ✓ |  × | ✓ | JuliaLang |
| floating-point number | ✓ (rounding error) |  × | - | JuliaLang |
| fixed-point number | ✓ |  × | - | [FixedPointNumbers.jl](https://github.com/JuliaMath/FixedPointNumbers.jl)
| logarithmic number | × |  ✓ | - |  [LogarithmicNumbers.jl](https://github.com/cjdoris/LogarithmicNumbers.jl)

* `✓`: an operation has its reverse when operating on a number type.
* `×`: an operation does not have a reverse when operating on a number type.
* `-`: an operation does not apply on a number type.

The `+=` operation is not regoriously reversible on floating point numbers, but we ignore the rounding errors in NiLang and use the reversibility check to detect the potential too-large rounding errors. Whether logarithmic number has rounding errors depends on its content type. If it uses floating point numbers as storage, then yes, otherwise if it uses fixed point number as the content type, then no.

One can use the `y ⊙= convert(x)` statement to convert `x` to the target type `typeof(y)` and accumulate it to `y`. Here `⊙=` can be one of `+=`, `*=` and `⊻=` that has its reverse on type `typeof(y)`.
"""

# ╔═╡ a0bae195-04e1-4642-9e14-fe4691e0906b
md"Fixed point numbers and Floating point numbers are reversible under the `+=` operation"

# ╔═╡ 20d6e8a0-2cf5-48ad-9549-60506b42b970
@i function fixed_pluseq(y1::T, x::T) where T<:Union{Fixed43, AbstractFloat}
	y1 += x
end; @test fixed_pluseq(Fixed43(0.5), Fixed43(0.6))[1] === Fixed43(1.1) && fixed_pluseq(0.5, 0.6)[1] === 1.1

# ╔═╡ 7dee5748-ed73-4e13-aa80-7a50efbc8449
@i function fixed_muleq(y1::T, x::T) where T<:Union{Fixed43, AbstractFloat}
	y1 *= x
end; @test_throws MethodError fixed_muleq(Fixed43(1.0), Fixed43(2.0))

# ╔═╡ 4c719e9b-641e-404e-9ab7-59e89135f3ba
md"Logarithmic numbers are reversible under the `*=` operation"

# ╔═╡ 77947e00-42c3-4c9e-b62a-b4b29489db43
@i function ulog_pluseq(y1::ULogarithmic{T}, x::ULogarithmic{T}) where T
	y1 += x
end; @test_throws MethodError ulog_pluseq(ULogarithmic(1.0), ULogarithmic(3.0))

# ╔═╡ 2614127d-34fb-4c3d-b678-42693f3c9341
@i function ulog_muleq(y1::ULogarithmic{T}, x::ULogarithmic{T}) where T
	y1 *= x
end; @test ulog_muleq(ULogarithmic(1.0), ULogarithmic(3.0))[1] === ULogarithmic(3.0)

# ╔═╡ 8f169235-3bd1-4cc4-a083-79736d306ad5
example("computing x ^ 3 with logarithmic numbers")

# ╔═╡ dfc9d305-5bce-4555-bfa3-d8d61fe4ca09
@i function power3(y::ULogarithmic{T}, x::T) where T
	for i=1:3
		y *= convert(x)
	end
end; @test power3(ULogarithmic(1.0), 3.0)[1] ≈ 27

# ╔═╡ f6bfa015-c101-45e8-995c-2bb6a3b7dc7d
title2("Types and arrays")

# ╔═╡ 8651d7ec-6bcd-4dbe-a062-c4bde32e5e91
md"
A Julia type can be accessed in NiLang if its default constructor is not overloaded, because NiLang requires the default constructor to \"modify\" a field of a immutable type.
"

# ╔═╡ edaa9fdb-3af8-4554-a701-0e3bff2107a5
md"It is also possbile to extract the fields directly."

# ╔═╡ 7551a880-340e-4e3f-815b-188e73f7eb9a
@i function complex_add(y::Complex{T}, x::Complex{T}) where T
    ((a, b), (c, d))::∅ ↔ ((@fields x), (@fields y))
	a += c
	b += d
    ((a, b), (c, d)) ↔ ((@fields x), (@fields y))::∅
end

# ╔═╡ 0489e51b-781f-4441-bb7f-ff3bd2e848ad
@test complex_add(1+2im, 3+4im) == (1+2im, 4+6im)

# ╔═╡ 7b0d30d6-39ff-4f6e-b13c-0ddbfcb576e5
md"Type cast is also possible"

# ╔═╡ 042297d8-6ab3-4ae6-b6e7-3b1ab2d5553b
@i function add4(a, b, c, d)
	complex_add(Complex{}(a, b), Complex{}(c, d))  # do not omit `{}`
end

# ╔═╡ 57d65a36-bfa8-4dc2-8e11-d87fa1324122
@test add4(1, 2, 3, 4) == (1, 2, 4, 6)

# ╔═╡ c21d81c3-981f-4472-ad61-d1661bfe5c4e
example("Implementing \"axpy\" function")

# ╔═╡ 99d6fe7b-d704-48f3-b115-2b3159a78068
md"`axpy` function is defined as ``\vec y += a \vec x``. One can modify the `Array` directly"

# ╔═╡ 1950ff70-54eb-4ece-a26d-a23fd0e90f5a
@i function arrayaxpy!(y!::Vector{T}, a::T, x::Vector{T}) where T
    for i=1:length(x)
		y![i] += a * x[i]
	end
end; @test arrayaxpy!(zeros(10), 2.0, collect(1.0:10.0))[1] ≈ collect(2.0:2.0:20.0)

# ╔═╡ 21458f81-9007-46f8-92e0-7a17c60beb36
md"To modify an element of a `Tuple`, we need to use a different style to avoid confusion with array"

# ╔═╡ 7813f4ce-6e98-45f3-94a8-7f5981129f2b
@i function tupleaxpy!(y!::NTuple{N,T}, a::T, x::NTuple{N,T}) where {N, T}
    for i=1:length(x)
		(y! |> tget(i)) += a * (x |> tget(i))
	end
end; @test tupleaxpy!((0,0,0), 2, (1,2,3))[1] == (2, 4, 6)

# ╔═╡ 59ec7cb7-6011-456d-9f57-a55bb8ea51a0
md"Here `data |> tget(i)` represents the `i`th field of the tuple (note it is not allowed to write `data.:i`)."

# ╔═╡ aacf63a2-9708-40db-8928-049621a7bbc4
md"## Control flows"

# ╔═╡ ad0097e7-c8ad-457a-82a9-18b998a9e9fb
md"""
#### If statement

The condition expression in `if` statement contains two parts, one is precondition and another is postcondition.

$(
twocol(md"
```julia
if (precondition[, postcondition])
	...
end
```
", md"
```julia
if (postcondition[, precondition])
	~(...)
end
```
")
)

where `...` are statements and `~(...)` are the backward execution of them.
"""

# ╔═╡ 94cd1345-3132-4882-86fe-d2429f610d1d
md"""If no postcondition is provided, it means the precondition is the same as the postcondition. It is translated to `if (cond, cond) ... end`. `elseif` is also supported."""

# ╔═╡ 4a558bd3-6e42-4c61-bd23-888b7f33ae25
@i function f7a(x)
	if x > 1
		x -= 1
	end
end; @test_throws InvertibilityError f7a(1.2)

# ╔═╡ 004f727a-e0c8-49cb-8858-dfdf4d3ac57a
@i function f7b(x, branch_keeper)
	branch_keeper ⊻= x > 1
	if (x > 1, branch_keeper)
		x += 1
	end
end; @test f7b(1.2, false)[1] == 2.2

# ╔═╡ 4c03cde9-b643-40ff-b275-f1795f88949e
title2("For statement")

# ╔═╡ fae7c74e-d25e-4c1e-ac97-199e6dae3365
md"""
The reversible `for` statement is similar to its irreversible counterpart.

$(
twocol(md"
```julia
for iter = start:step:stop
	...
end
```
", md"
```julia
for iter = stop:-step:start
	~(...)
end
```
")
)
"""

# ╔═╡ 2f2b24ea-66d0-4b3b-a460-53b6b3f28ef0
md"The iterator length should not be changed during the iteration."

# ╔═╡ e9dbb64a-27b9-443a-b917-69d55c290235
@i function f7c(x, y)
	for i=1:length(x)
		POP!(x, y[i])
	end
end; @test_throws InvertibilityError f7c([1,2,3], [0,0,0])

# ╔═╡ 0d56ce96-81a5-4102-acbc-7d88f80adcb3
md"There is an `InvertibilityError` because the length of the `x` has been changed. The inverse execution will give incorrect result."

# ╔═╡ 95c41bd1-e50a-42e8-93c3-3b754a458c13
title2("While statement")

# ╔═╡ b8629aeb-6c9a-44ed-87a1-9ab22d9485ed
md"""
The reversible `while` statement starts with the `@from` macro.

$(
twocol(md"
```julia
@from condition1 while condition2
	...
end
```
", md"
```julia
@from !(condition2) while !(condition1)
    ~(...)
end
```
")
)
"""

# ╔═╡ 3b211406-041f-4b41-acae-3958e4a37224
md"where the `condition1` in the forward pass is a condition that holds before entering the loop body, but broken at the first iteration, while `condition2` is just a normal while condition. In the backward pass, `!(condition1)` becomes the criteria to break the loop."

# ╔═╡ 62522772-cb59-4d13-acdd-d5067b223910
@i function f7d(x, i)
	@from i==0 while i<10
		i += 1
		x += 1
	end
end

# ╔═╡ 2ba68a0f-6e36-4ea2-a91d-6af43741bad1
f7d(1, 0)

# ╔═╡ 75cebaf1-38de-475f-892e-346fd2b46f6f
@test (~f7d)(11, 10) == (1, 0)

# ╔═╡ 72dcf2fe-eb48-4dee-8121-efafc87637e3
title2("Compute-copy-uncompute statement")

# ╔═╡ 84321198-93d4-4d22-8c0f-a5a10b884e1f
md"The *compute-copy-uncompute* statement is a widely used design pattern in reversible programming. 
We compute the forward pass for the result, then we copy the result to the output variable, and run the backward pass to erase intermediate results.


For example, to compute `y = x * exp(k)`, we might write the following code"

# ╔═╡ 32244789-afbf-4215-97cd-15483f438eee
@i function f7e(y, x, k)
	expk ← zero(k)
	expk += exp(k)
	y += x * expk
	# uncompute the ancilla and deallocate it
	expk -= exp(k)
	expk → zero(k)
end; @test f7e(0.0, 2.0, 3.0)[1] ≈ 2.0 * exp(3.0)

# ╔═╡ 4b7f0baf-0316-4da7-9ded-50c064ddbaa3
md"It is equivalent to the following statement that generates the backward pass automatically for you."

# ╔═╡ 0e02952c-7589-4606-b006-16a9f3e52ae1
@i function f7f(y, x, k)
	# record the forward pass
	@routine begin
		expk ← zero(k)
		expk += exp(k)
	end
	y += x * expk
	# reverse execute the recorded the program
	~@routine
end; @test f7f(0.0, 2.0, 3.0)[1] ≈ 2.0 * exp(3.0)

# ╔═╡ f0904d3f-1bf1-459c-9959-b53c0f774e3f
example("Computing Fibonacci numbers")

# ╔═╡ 19bb2af5-2a67-453d-82b0-7d3059b1fa47
md"The sequence of Fibonacci numbers are: 1, 1, 2, 3, 5, 8"

# ╔═╡ 5b5858bf-63ac-4e31-a516-055a9cd18ffe
@i function rfib(out!, n::T) where T
    @routine begin
		n1 ← zero(T)
		n2 ← zero(T)
        n1 += n - 1
        n2 += n - 2
    end
    if (value(n) <= 2, ~)
        out! += 1
    else
        rfib(out!, n1)
        rfib(out!, n2)
    end
    ~@routine
end

# ╔═╡ 95060588-f24b-4eeb-9b0b-ed7159962a3c
@test rfib(0, 6)[1] == 8

# ╔═╡ c4cd9f88-9cd6-4364-b016-78f90aba6a66
title1("Extending the instruction set")
# They are translated to `y += f(args...)` is translated to `PlusEq(f)(y, args...)`, `y -= f(args...)` is translated to `MinusEq(f)(y, args...)`, `y *= f(args...)` is translated to `MulEq(f)(y, args...)`, `y /= f(args...)` is translated to `DivEq(f)(y, args...)` and `y ⊻= f(args...)` is translated to `XorEq(f)(y, args...)`.

# ╔═╡ f6049c78-7468-47ce-a4a5-84fab34d115a
title2("How to create a new elementary reversible function")

# ╔═╡ b0f73825-bbb1-448c-b491-bf634fdd398a
md"To define a pair of elementary functions that **reverse to each other**,
1. declare two functions `f` and `g` that each of them defines a mapping ``\mathbb{R}^n \rightarrow \mathbb{R}^n``
2. use `@dual f g` to tell NiLang they are reversible to each other.
3. if you want to make `f` and `g` differentiable, you can specify backward rules on these two function by defining two mappings on ``\mathbb{G}^n\rightarrow \mathbb{G}^n``, where ``\mathbb{G}`` is a 2-tuple of ``\mathbb{R}`` (or `NiLang.AD.GVar`) in NiLang. It is similar to `ForwardDiff.Dual` (check [ForwardDiff](https://github.com/JuliaDiff/ForwardDiff.jl)) but defined for the backward pass.

To define a **self-reversible** elementary function
1. declare a functions `f` that defines a mapping ``\mathbb{R}^n \rightarrow \mathbb{R}^n``
2. use `@selfdual f` 
3. define the backward rule on `f` to make it differentiable.
"

# ╔═╡ 648cdcd6-f4f5-461f-a525-4b350cae9eb0
example("defining a new elementary function")

# ╔═╡ d6b1abd6-749d-4591-99e8-64aaa9199ab5
md"""
One can use the invertibility checker to check if the function is really reversible (under a certain tolerance `NiLangCore.GLOBAL_ATOL[]` = $(NiLangCore.GLOBAL_ATOL[])).
"""

# ╔═╡ f502b8c1-9b80-4e67-80e8-a64ddb88fb0f
@test NiLang.check_inv(M.new_forward, (3.0,))

# ╔═╡ 0bce342e-9a8e-4005-8b88-82da2d2c7163
md"""
To check of the gradients are properly defined, one can use `NiLang.AD.check_grad`
"""

# ╔═╡ fd9cf757-2698-4886-9f0a-c6c23ff0d331
@test NiLang.AD.check_grad(M.new_forward, (3.0,); iloss=1)

# ╔═╡ dda6652a-d063-4511-8041-e869bb88ca26
@test NiLang.AD.check_grad(M.new_backward, (3.0,); iloss=1)

# ╔═╡ a7d47e83-7f44-49d0-a43d-e01316fc6eba
title1("Performance Tips")

# ╔═╡ eca3efef-f35b-4623-8af8-0b830a55566d
md"The following trick still work in NiLang
* Removing boundary check with `@inbounds` (it works on FastStack),
* Add `@inline` before short functions,
* Add `@simd` before a for loop,

Other tricks like type stability are introduced in the [Julia documentation](https://docs.julialang.org/en/v1/manual/performance-tips/).
"

# ╔═╡ 45985244-adbf-4d6d-9732-a963cca62212
title2("Remove reversibility check")

# ╔═╡ 83d7e75f-7273-4c6a-bec1-a2180ebc3fb9
md"This can be done by putting an `@invcheckoff` before a code block."

# ╔═╡ 7fb05c65-f47c-430a-b588-c2f9bade40a9
example("computing the exp function by Taylor expansion")

# ╔═╡ 14c0caa1-51ea-448c-a7dc-d06e34dd0895
md"Note: this is not a clever implementation. There is an approach of defining it without allocation."

# ╔═╡ 457d07bb-e999-413e-8f29-58714670296f
@i function exp_with_reversibility_check(y::T, x::T) where T
	@routine begin
		N ← 1_000
		anc ← zeros(T, N)
		anc[1] += 1
		anc_y ← T(2.0)
		for i=2:N
			@routine begin
				temp ← zero(T)
				temp += x * anc[i-1]
			end
			anc[i] += temp / i
			anc_y += anc[i]
			~@routine
		end
	end
	y += anc_y
	~@routine
end

# ╔═╡ ac53eac0-1a59-4407-8bf6-3d8b966a9bff
@benchmark exp_with_reversibility_check(0.0, 1.0) seconds=0.3

# ╔═╡ 85c8ac7b-54f5-47dc-bd50-e78ffd6cf1cf
@i function exp_without_reversibility_check(y::T, x::T) where T
	@routine @invcheckoff begin
		N ← 1_000
		anc ← zeros(T, N)
		anc[1] += 1
		anc_y ← T(2.0)
		for i=2:N
			@routine begin
				temp ← zero(T)
				temp += x * anc[i-1]
			end
			anc[i] += temp / i
			anc_y += anc[i]
			~@routine
		end
	end
	y += anc_y
	~@routine
end

# ╔═╡ 95c55847-0591-4f7f-b9a1-aa974ccfef69
@benchmark exp_without_reversibility_check(0.0, 1.0) seconds=0.3

# ╔═╡ 91f8cfc6-e261-4945-8506-eed8caa607c2
title1("Multi-threading and CUDA")

# ╔═╡ c82b3b5c-c4e2-4bf6-b4ec-0d05ba9a669b
@i function multi_thread_exp(y::Vector, x::Vector)
	# check the size of `x` and `y`. `@assert` is not a valid statement in NiLang, so one should decorate it with `@safe` to tell the compiler, doing this is safe, do not check this statement.
	@safe @assert length(x) == length(y)
	@threads for i=1:length(y)
		y[i] += exp(x[i])
	end
end; @test multi_thread_exp(zeros(3), [1.0, 2.0, 3.0])[1] ≈ [exp(1.0), exp(2.0), exp(3.0)]

# ╔═╡ 32d75270-60d7-4326-a4ff-8674d0fbd491
md"With [CUDA](https://github.com/JuliaGPU/CUDA.jl), one can also define parallel reversile and differentiable GPU device functions. Use the broadcasting version `y += x` as an example."

# ╔═╡ 4b8834c1-8bb3-49f2-ae9e-1dbb8832d7f0
@i function addkernel(target, source)
    @invcheckoff begin
		@routine b ← (blockIdx().x-1) * blockDim().x + threadIdx().x
		if (b <= length(target), ~)
			@inbounds target[b] += source[b]
		end
		~@routine
	end
end

# ╔═╡ d8f0ae56-e643-48a1-86ee-1cd907ecb662
md"One can launch the kernel function in NiLang with `@cuda`"

# ╔═╡ e72b9dc2-dfac-4631-b114-01ec14297427
md"""
```julia
using CUDA

@i function :(+=)(identity)(target::CuArray, source::CuArray)
    @safe @assert length(target) == length(source)
    @cuda threads=256 blocks=ceil(Int,length(target)/256) addkernel(target, source)
end
```
"""

# ╔═╡ 8c93a773-edc0-4ec2-88ef-1b58b7deddc5
title2("Shared read-write in parallel computing and autodiff")

# ╔═╡ 16d08950-0575-4a4b-afc8-11ddca3198c7
md"The parallel code may suffer from the shared read issue when computing gradients.
Let's take a look at a parallel code that computes the loss ``\mathcal{L} = \sum x \vec z``."

# ╔═╡ 7c594d19-59fc-433a-bffa-c63bad46869e
@i function shared_read(loss::Real, y::Vector, x::Real, z::Vector)
	@safe @assert length(z) == length(y)
	@threads for i=1:length(y)
		y[i] += x * z[i]
	end
	for i=1:length(y)
		loss += y[i]
	end
end; @test shared_read(0.0, zeros(3), 2.0, [1.0, 2.0, 3.0])[1] ≈ 12

# ╔═╡ 345b344d-afda-4ce1-a0e7-ce6063a69206
md"However, when computing the gradients, the gradient on `x` will not be computed correctly."

# ╔═╡ c55eb045-daca-42f9-a357-095edef24644
let
	z = randn(100)
	_, gy, gx, gz = NiLang.AD.gradient(shared_read, (0.0, zeros(100), 2.0, z); iloss=1)
	@test gx ≈ sum(z)
end

# ╔═╡ c6903b65-a8b8-4aef-8c43-c822077b9d0e
md"The error is expected. Because the variable `x` is shared by multiple threads, when updating the gradient field of `x` in the backward pass, all threads will try to update the same gradient field, this is famous [race condition](https://en.wikipedia.org/wiki/Race_condition) in parallel computing."

# ╔═╡ f80353d6-0dfe-4b0a-a1af-655d344473bf
title1("Resources")

# ╔═╡ 4ca276fb-859d-4c5a-81c3-8e4b28922fa4
title2("Help and Discussion")

# ╔═╡ 3d020209-b8dd-4605-9329-78a985f0a6a3
md"""
`reversible-computing` channel of [Julia slack](https://julialang.org/slack/) and [Julia Zulip](https://julialang.zulipchat.com/register/).
"""

# ╔═╡ c7ec3496-79ea-4956-976c-b88dd22207c7
title2("Learning")

# ╔═╡ 8530a9d1-5a27-4a1d-883f-4b033a6f8fe4
md"""
1. Reversible computing book: Perumalla, Kalyan S. Introduction to reversible computing. CRC Press, 2013.
2. Our paper: Liu, Jin-Guo, and Taine Zhao. "Differentiate Everything with a Reversible Programming Language." arXiv:2003.04617 (2020).
"""

# ╔═╡ 7ce31932-0447-4445-99aa-7ebced7d0bad
TableOfContents()

# ╔═╡ Cell order:
# ╟─2061b434-0ad1-46eb-a0c7-1a5f432bfa62
# ╟─a4e76427-f051-4b29-915a-fdfce3a299bb
# ╟─c2c7b4d4-f8c9-4ebf-8da2-0103f03136e7
# ╟─12f07cc7-979c-43c3-9dc9-36ea1463c1f6
# ╟─611b577f-4722-42bf-8f8e-aeb2fb30be71
# ╟─605872cf-f3fd-462e-a2b1-7d1c5ae45efd
# ╟─fb3dee44-5fa9-4773-8b7f-a83c44358545
# ╠═d941d6c2-55bf-11eb-0002-35c7474e4050
# ╠═70088425-6779-4a2d-ba6d-b0a34c8e93a6
# ╠═af738f89-3214-429c-9c7d-18a6ea0d9401
# ╠═48d7ebc1-5def-4a57-9ec1-3fc370a4543f
# ╟─f0e94247-f615-472b-8218-3fa287b38aa1
# ╟─2581aa33-1dc5-40b1-aa9f-6a11cc750c93
# ╟─60575978-081a-4bca-a3ed-2b51cd6abc92
# ╠═f98305cb-4ba2-404a-a5c3-65510e059504
# ╟─e8cd6667-597f-458b-8465-1822e09a7891
# ╟─20145d75-004a-4c2f-b7ff-c400ca846d42
# ╟─c682a17f-600f-4034-bfe3-a851ab645c10
# ╟─5239dfe2-ea6d-4e07-a1b1-90954fe8ddc9
# ╟─e4d86a5a-e820-4a70-8a87-08bac416291b
# ╟─c307b6a4-906d-4be7-9fd7-57c942aded51
# ╟─47b502d4-e8af-4d58-9067-9700784ea435
# ╠═faecc0a7-55d7-42a1-8e9f-7e30143eef9c
# ╠═4cbe69d6-b68f-4bda-a0dd-209f9ee54f18
# ╟─7dc82f28-77bf-40da-b520-800ed1bc80c9
# ╟─9f5f9de3-9558-4c18-9d98-b77d19b570ec
# ╠═6dfcfa19-f78f-4dac-89f7-d3c5dbe17987
# ╟─dad1c6c0-d61b-4f9f-a71e-e683fe143aaa
# ╟─3e4a3916-8fe4-4262-bcd3-3014822717a3
# ╠═34906208-e6f1-4a67-860a-a7b056a86dde
# ╟─4601df35-679f-465d-9191-c18748b2fd83
# ╟─4fc72b9d-19a2-40f1-a4a8-5e97d3d5e529
# ╠═a0fde16f-8454-4f5c-a29c-a9e415c0c311
# ╟─633ff8f3-8d93-4f73-bec2-c42070e6ece9
# ╠═57f2d890-b5a0-47b7-9e3d-af4d03b10605
# ╟─34063cd0-171e-46ce-80dd-52a341fa50a1
# ╠═5d5d01db-8ff9-434c-8771-1fec6393e1fb
# ╠═10d85a50-f2f9-403e-8f6c-baef61cf702a
# ╟─b52648bf-a28a-48af-8912-31729d943ce0
# ╟─f45db10f-a836-40f3-9d8d-054ea6540e87
# ╟─1903563e-ccc2-44d9-9dbe-e5dede275b3c
# ╟─583f2585-15a3-47c6-a70e-e2f002754028
# ╠═60e6ff80-3593-4ae4-a273-914847f692db
# ╠═9e5cfd68-b58d-4d83-aae2-447e5f805c97
# ╠═dc85a942-cf52-4405-ad03-32a768e1b6e7
# ╟─5cdd346b-10a5-485c-ba78-4c0b3cb0e02f
# ╠═af9287b7-6131-46f6-beb8-6885e55e1975
# ╠═e20eeabf-1c80-431e-8cfc-4d1b79c52b5a
# ╟─90d30eea-53de-48a0-9700-ff35681fdf38
# ╠═390f58a5-6f5f-4d3a-bb16-ba04e43a07e7
# ╠═2b57443e-a516-434b-be86-80616a98e2f5
# ╟─fc2e27f9-b7ba-44cc-a953-6745548ad733
# ╠═fc744931-360b-4478-9f77-c50f048de243
# ╠═9a152b36-f377-44da-9700-ca9e05e365ff
# ╟─b6dcd18c-606f-4340-b2ec-163e8bad03f5
# ╟─a1a29f34-f8a9-4e9f-9afe-7d0096771440
# ╟─90bd6ad4-3dd8-4e7c-b445-aed1d248a2ec
# ╠═c0259e48-1973-486c-a828-1fcd3e4331c6
# ╟─8bbffa31-04a6-49ca-b36f-4d4140d75992
# ╠═a6f18c34-80ee-4b52-9ff8-f3c1b1d80f90
# ╠═a694132b-4f52-467f-8bc4-dc32fe2812db
# ╟─8c2c82f2-1240-4f2f-830e-ee8021c1a41a
# ╟─6203cf10-f8cc-4fb9-b814-7552b68c01dc
# ╠═f97a6bab-b9f9-4b95-98a9-381c51397526
# ╠═4ca48a2e-43da-457a-8e9f-6476097e4d7b
# ╟─2a2970f4-ab01-486b-89a2-6ff96f734018
# ╠═0b80d9be-53d7-4bf3-a558-659607af4709
# ╠═92362fda-bae2-4e35-bfe4-dcaea853d50b
# ╟─db9e7940-39f1-4ccf-ac70-146a521daa6e
# ╟─93936612-1447-4114-b864-aba43adef4bd
# ╠═b2add70c-c5d6-4e0f-a153-43e21a197181
# ╠═5c03d5a5-99f0-4efd-9a32-ce6d7c2b266c
# ╟─2349e3ea-3053-42a4-b9d9-f97a76e4abd7
# ╟─269f18ee-3cd8-466a-a522-7c624503e31b
# ╠═89139719-c478-4066-9452-f9893f36d561
# ╠═d620c5ee-7d9c-4d3f-9e87-0c828dfab9ca
# ╟─255d01b9-a873-4e63-9298-9d8f073348b0
# ╟─f6cf1729-766c-4ed7-b004-c8c8ec6c7e07
# ╠═3645d672-423f-4ac8-805f-0452793fee5a
# ╠═c2a0024e-11dd-4ef7-8346-4374d98cafc0
# ╟─b20004e9-3c73-4dfb-8fd5-f377786fd53b
# ╠═5c1952b1-5016-4c87-b23c-8e6a235bf8cd
# ╠═8e4470ee-01da-4547-b091-c4f65cd729b0
# ╟─0863bd06-cc70-4dde-b3b2-0a466805a356
# ╟─a0bae195-04e1-4642-9e14-fe4691e0906b
# ╠═20d6e8a0-2cf5-48ad-9549-60506b42b970
# ╠═7dee5748-ed73-4e13-aa80-7a50efbc8449
# ╟─4c719e9b-641e-404e-9ab7-59e89135f3ba
# ╠═77947e00-42c3-4c9e-b62a-b4b29489db43
# ╠═2614127d-34fb-4c3d-b678-42693f3c9341
# ╟─8f169235-3bd1-4cc4-a083-79736d306ad5
# ╠═dfc9d305-5bce-4555-bfa3-d8d61fe4ca09
# ╟─f6bfa015-c101-45e8-995c-2bb6a3b7dc7d
# ╟─8651d7ec-6bcd-4dbe-a062-c4bde32e5e91
# ╟─edaa9fdb-3af8-4554-a701-0e3bff2107a5
# ╠═7551a880-340e-4e3f-815b-188e73f7eb9a
# ╠═0489e51b-781f-4441-bb7f-ff3bd2e848ad
# ╟─7b0d30d6-39ff-4f6e-b13c-0ddbfcb576e5
# ╠═042297d8-6ab3-4ae6-b6e7-3b1ab2d5553b
# ╠═57d65a36-bfa8-4dc2-8e11-d87fa1324122
# ╟─c21d81c3-981f-4472-ad61-d1661bfe5c4e
# ╟─99d6fe7b-d704-48f3-b115-2b3159a78068
# ╠═1950ff70-54eb-4ece-a26d-a23fd0e90f5a
# ╟─21458f81-9007-46f8-92e0-7a17c60beb36
# ╠═7813f4ce-6e98-45f3-94a8-7f5981129f2b
# ╟─59ec7cb7-6011-456d-9f57-a55bb8ea51a0
# ╟─aacf63a2-9708-40db-8928-049621a7bbc4
# ╟─ad0097e7-c8ad-457a-82a9-18b998a9e9fb
# ╟─94cd1345-3132-4882-86fe-d2429f610d1d
# ╠═4a558bd3-6e42-4c61-bd23-888b7f33ae25
# ╠═004f727a-e0c8-49cb-8858-dfdf4d3ac57a
# ╟─4c03cde9-b643-40ff-b275-f1795f88949e
# ╟─fae7c74e-d25e-4c1e-ac97-199e6dae3365
# ╟─2f2b24ea-66d0-4b3b-a460-53b6b3f28ef0
# ╠═e9dbb64a-27b9-443a-b917-69d55c290235
# ╟─0d56ce96-81a5-4102-acbc-7d88f80adcb3
# ╟─95c41bd1-e50a-42e8-93c3-3b754a458c13
# ╟─b8629aeb-6c9a-44ed-87a1-9ab22d9485ed
# ╟─3b211406-041f-4b41-acae-3958e4a37224
# ╠═62522772-cb59-4d13-acdd-d5067b223910
# ╠═2ba68a0f-6e36-4ea2-a91d-6af43741bad1
# ╠═75cebaf1-38de-475f-892e-346fd2b46f6f
# ╟─72dcf2fe-eb48-4dee-8121-efafc87637e3
# ╟─84321198-93d4-4d22-8c0f-a5a10b884e1f
# ╠═32244789-afbf-4215-97cd-15483f438eee
# ╟─4b7f0baf-0316-4da7-9ded-50c064ddbaa3
# ╠═0e02952c-7589-4606-b006-16a9f3e52ae1
# ╟─f0904d3f-1bf1-459c-9959-b53c0f774e3f
# ╟─19bb2af5-2a67-453d-82b0-7d3059b1fa47
# ╠═5b5858bf-63ac-4e31-a516-055a9cd18ffe
# ╠═95060588-f24b-4eeb-9b0b-ed7159962a3c
# ╟─c4cd9f88-9cd6-4364-b016-78f90aba6a66
# ╟─f6049c78-7468-47ce-a4a5-84fab34d115a
# ╟─b0f73825-bbb1-448c-b491-bf634fdd398a
# ╟─648cdcd6-f4f5-461f-a525-4b350cae9eb0
# ╠═8c2c4fa6-172f-4dde-a279-5d0aecfdbe46
# ╟─d6b1abd6-749d-4591-99e8-64aaa9199ab5
# ╠═f502b8c1-9b80-4e67-80e8-a64ddb88fb0f
# ╟─0bce342e-9a8e-4005-8b88-82da2d2c7163
# ╠═fd9cf757-2698-4886-9f0a-c6c23ff0d331
# ╠═dda6652a-d063-4511-8041-e869bb88ca26
# ╟─a7d47e83-7f44-49d0-a43d-e01316fc6eba
# ╟─eca3efef-f35b-4623-8af8-0b830a55566d
# ╟─45985244-adbf-4d6d-9732-a963cca62212
# ╟─83d7e75f-7273-4c6a-bec1-a2180ebc3fb9
# ╟─7fb05c65-f47c-430a-b588-c2f9bade40a9
# ╟─14c0caa1-51ea-448c-a7dc-d06e34dd0895
# ╠═457d07bb-e999-413e-8f29-58714670296f
# ╠═3199a048-7b39-40f8-8183-6a54cccd91b6
# ╠═ac53eac0-1a59-4407-8bf6-3d8b966a9bff
# ╠═85c8ac7b-54f5-47dc-bd50-e78ffd6cf1cf
# ╠═95c55847-0591-4f7f-b9a1-aa974ccfef69
# ╟─91f8cfc6-e261-4945-8506-eed8caa607c2
# ╠═0e1ba158-a6bc-401c-9ba7-ed78020ad068
# ╠═c82b3b5c-c4e2-4bf6-b4ec-0d05ba9a669b
# ╟─32d75270-60d7-4326-a4ff-8674d0fbd491
# ╠═4b8834c1-8bb3-49f2-ae9e-1dbb8832d7f0
# ╟─d8f0ae56-e643-48a1-86ee-1cd907ecb662
# ╟─e72b9dc2-dfac-4631-b114-01ec14297427
# ╟─8c93a773-edc0-4ec2-88ef-1b58b7deddc5
# ╟─16d08950-0575-4a4b-afc8-11ddca3198c7
# ╠═7c594d19-59fc-433a-bffa-c63bad46869e
# ╟─345b344d-afda-4ce1-a0e7-ce6063a69206
# ╠═c55eb045-daca-42f9-a357-095edef24644
# ╟─c6903b65-a8b8-4aef-8c43-c822077b9d0e
# ╟─f80353d6-0dfe-4b0a-a1af-655d344473bf
# ╟─4ca276fb-859d-4c5a-81c3-8e4b28922fa4
# ╟─3d020209-b8dd-4605-9329-78a985f0a6a3
# ╟─c7ec3496-79ea-4956-976c-b88dd22207c7
# ╟─8530a9d1-5a27-4a1d-883f-4b033a6f8fe4
# ╟─7ce31932-0447-4445-99aa-7ebced7d0bad


================================================
FILE: notebooks/feynman.jl
================================================
### A Pluto.jl notebook ###
# v0.15.0

using Markdown
using InteractiveUtils

# This Pluto notebook uses @bind for interactivity. When running this notebook outside of Pluto, the following 'mock version' of @bind gives bound variables a default value (instead of an error).
macro bind(def, element)
    quote
        local el = $(esc(element))
        global $(esc(def)) = Core.applicable(Base.get, el) ? Base.get(el) : missing
        el
    end
end

# ╔═╡ f3e235e7-76b9-4c39-bc70-038539838ff4
begin
	using Revise, Viznet, Compose, PlutoUI, Random, TikzPictures
	function leftright(a, b; width=600, leftcellwidth=0.5)
		HTML("""
<style>
table.nohover tr:hover td {
   background-color: white !important;
}</style>
			
<table width=$(width)px class="nohover" style="border:none">
<tr>
	<td width=$(leftcellwidth*width)>$(html(a))</td>
	<td width=$((1-leftcellwidth)*width)>$(html(b))</td>
</tr></table>
""")
	end
	
	# up down layout
	function updown(a, b; width=nothing)
		HTML("""<table class="nohover" style="border:none" $(width === nothing ? "" : "width=$(width)px")>
<tr>
	<td>$(html(a))</td>
</tr>
<tr>
	<td>$(html(b))</td>
</tr></table>
""")
	end
	
	function highlight(str)
		HTML("""<span style="background-color:yellow">$(str)</span>""")
	end
end;

# ╔═╡ e81de385-0070-49a9-a889-8fcf9d9e2951
using Plots; gr();

# ╔═╡ db9a97b1-f76d-4f51-96c6-0159469c5adb
using NiLang

# ╔═╡ e20e2d2e-4b28-4e32-8d80-ce029928a094
html"""
<script>
document.body.onkeyup = function(e) {
if (e.ctrlKey && e.altKey && e.which == 80) {
    present();
} else if (e.ctrlKey && e.which == 37) {
	var prev_button = document.querySelector(".changeslide.prev");
	prev_button.dispatchEvent(new Event('click'));
} else if (e.ctrlKey && e.which == 39) {
	var prev_button = document.querySelector(".changeslide.next");
	prev_button.dispatchEvent(new Event('click'));
  }
};
document.body.onclick = function(e) {
	if (e.target.tagName == 'BODY'){
		e.preventDefault();
		var prev_button = document.querySelector(".changeslide.next");
		prev_button.dispatchEvent(new Event('click'));
} else if (e.target.tagName == 'PLUTO-SHOULDER'){
	e.preventDefault();
	var prev_button = document.querySelector(".changeslide.prev");
	prev_button.dispatchEvent(new Event('click'));
	}
};
</script>

<style>
mjx-assistive-mml { display: none !important; }
</style>
"""

# ╔═╡ 8308df59-3faa-4abf-8f05-119bbae48f64
let
	github = html"""<a class="Header-link " href="https://github.com/GiggleLiu/NiLang.jl" data-hotkey="g d" aria-label="Homepage " data-ga-click="Header, go to dashboard, icon:logo">
  <img src="https://avatars.githubusercontent.com/u/6257240?v=4" width=25> GiggleLiu
</a>"""
	md"# Feynman's Lectures on computing, Chap. 5

-- Jinguo Liu ($github)
"
end

# ╔═╡ a3532a83-9fd3-4d24-b1bb-b52457317e51
html"""A postdoc in Mikhail Lukin's group, department of physics <br><br>
<img src="https://1000logos.net/wp-content/uploads/2017/02/Harvard-Logo.png" width=55> Harvard university<br><br>
<img src="https://static1.squarespace.com/static/5dcebcb378a43f6976d84698/t/5dcf099a5f767722ba4b9cdd/1605640592777/?format=1500w" width=80/> Quera Computing
"""

# ╔═╡ 15657e4b-848e-43ad-a99f-37143d11705e
md"# Table of contents
1. Irreversible computing requires dissipating heat to the environment
    * The relation between reversibility and energy cost
        * Compressing boxes
        * An eingine driven by information
    * The relation between computing speed and energy cost
2. several reversible computing models
    * Copy machine
        * Magnetic dipole
        * DNA copying is a type of copy machine
    * General reversible computing
        * Billiard ball model
"

# ╔═╡ f9675365-36aa-430c-b747-3bc4f602e6fb
md"## Information erasure requires dissipating heat to the environment!"

# ╔═╡ 94c5eaa1-432c-4553-829e-f78d97f3c0ca
md"""
* *Information*: uncertainty of system, can be quantified by entropy,
* *Information erasure*: decrease of information entropy,
* *Knowledge*: the complement of information, the more knowledge, the less uncertainty.
"""

# ╔═╡ bef6978d-e654-4364-b5eb-e9608cf68464
md"*setup*: a collection of boxes, with same size ``V``, immersed to a heat bath of temperature of ``T``. Each having a jiggling particle inside."

# ╔═╡ 046f7559-4af9-4982-b5c3-335add0911d7
html"""
<div align=center><img src="https://user-images.githubusercontent.com/6257240/122632611-ef6ad480-d0a1-11eb-976c-a3e7c5dfdb9a.png" width=500/></div>
"""

# ╔═╡ 0a039bfa-571e-4fad-b73c-1324d08777fc
html"""
<div align=center><img src="https://user-images.githubusercontent.com/6257240/122682827-b168d000-d1c9-11eb-930c-0ff13a2bf631.png" width=300/></div>
"""

# ╔═╡ bf7abacc-5b0a-4623-b2c5-af60183ad4b0
md"""
A piston attached to each box.
"""

# ╔═╡ 3f1e4d7a-32a7-4c7e-92dd-465bac925e63
md"""
*goal*: Compress the boxes from size $V$ to size $V/2$, the process is isothermal.
"""

# ╔═╡ 95a21058-0b07-4859-af68-8ca5b48b2a77
md"## Case 1: Ideal Gas"

# ╔═╡ f68bcfb6-97ce-48d1-b0b8-e8466d4ac879
md"""
Case 1: we know nothing about the system. The gas does work
```math
\begin{align}
&pV = N \underbrace{k}_{\substack{\text{Boltzman constant}\\\sim 1.38 × 10-23 m^2 {\rm kg} s^{-2} K^{-1}}} T\\
&W_{\rm gas} = \int_{V}^{V/2} p dV = -NkT\log 2
\end{align}
```
"""

# ╔═╡ 2fe7c298-4c5d-464c-980b-6cd9a537ac1e
let
	img = TikzPicture(L"""
	\draw (-1.05, -1.05) rectangle (1.1,1.1);
	\foreach[evaluate={\a=rand; \b=rand;\c=rand;\d=rand;}] \x in {1,...,20}{
		\fill (\a, \b) circle [radius=0.05];
		\draw[->,thick] (\a, \b) -- (\a+\c*0.2, \b+\d*0.2);
	}
\node[draw, single arrow, minimum height=10mm, minimum width=3mm,
              single arrow head extend=2mm, rotate=90] at (0.0,1.3) {presure};
\node[draw, single arrow, minimum height=10mm, minimum width=3mm,
              single arrow head extend=2mm, rotate=-90] at (0.0,-1.3) {presure};
\node[draw, single arrow, minimum height=10mm, minimum width=3mm,
              single arrow head extend=2mm, rotate=0] at (1.3,0.0) {presure};
\node[draw, single arrow, minimum height=10mm, minimum width=3mm,
              single arrow head extend=2mm, rotate=180] at (-1.3,0.0) {presure};
"""; options="scale=2.0", preamble=raw"\usetikzlibrary{shapes.arrows}")
	leftright(img, md"the microscopic explaination of the presure
		
``kT \sim \text{average kinetic energy}``")
end

# ╔═╡ 49dab78a-7bd9-4faa-8a30-9af8a96e0c5b
md"## Case 2: With prior knowledge"

# ╔═╡ 3d4ba750-8d62-48ac-bf96-691397689ddc
md"""
Case 2: we know one bit knowledge about each box:
* 1: the atom is in the right half
* 0: the atom is in the left half

```math
W_{\rm gas} = 0
```
"""

# ╔═╡ 7aa7b0ee-beeb-4a3e-abf1-aa71e916f4cd
md"""
## Compare
Case 1:

* erase information (the left-right information of the atom),
* consumes energy

Case 2:
* do not erase information,
* does not consume energy
"""

# ╔═╡ f4cb9212-181f-4338-b858-1d99c7f415e9
md"Erasing each bit information comes along with $kT \log 2$ heat dissipation!!"

# ╔═╡ c1bbaec8-4fb9-4ab8-a30d-06a286597de0
md"""
## Maxwell's demon

The *Second Law of Thermodynamics* states that the state of entropy of the entire universe, as an isolated system, will always increase over time. The second law also states that the changes in the entropy in the universe can never be negative.

![](https://user-images.githubusercontent.com/6257240/124372430-b14fe200-dc57-11eb-9e8d-75385e2c621b.png)
"""

# ╔═╡ 6d7a07ff-be1b-4902-8a6d-7d9257c1157f
md"""
Before observing: (s, t), number of possible configurations ``2^{|s|+|t|}``

After observing: (s, t=s), number of possible configurations ``2^{|s|}``
"""

# ╔═╡ 0577d67f-648f-407c-8abf-507d086445bd
md"""
## Proving from the quantum setup
"""

# ╔═╡ eb10e436-bcce-4d81-891e-15158219fe80
md"Reeb (2014)"

# ╔═╡ 7c5a30fd-95f9-4bb8-b34f-b10b0f2a27f2
md"""
1. the process involves a “system” ``S`` and a “reservoir” ``R``, both described by Hilbert spaces,
2. the reservoir ``R`` is initially in a thermal state, ``\rho_R = e^{−\beta H}/{\rm tr}[e^{−\beta H}]`` , where H is a Hermitian operator on ``R`` (“Hamiltonian”) and ``\beta \in [−\infty, +\infty]`` is the “inverse temperature”,
3. the system ``S`` and the reservoir ``R`` are initially uncorrelated, ``\rho_{SR} = \rho_S \otimes \rho_R``, 
4. the process itself proceeds by unitary evolution, ``\rho_{SR}'=U\rho_{SR}U^\dagger``.
"""

# ╔═╡ abee1bee-ed01-4b05-a848-3aeb695a24ba
md"""
![](https://user-images.githubusercontent.com/6257240/124468410-0e868900-dd67-11eb-91b4-b9ab92f21152.png)
"""

# ╔═╡ b05538cc-de01-4b1e-a602-feb780cddf4a
md"""
Main result: ``\Delta > \Delta S``, because
```math
\begin{align}
[S(\rho_S') - S(\rho_S)] + [S(\rho_R')-S(\rho_R)] &=[S(\rho_S') + S(\rho_R')-S(\rho_{SR})] \ldots (3)\\
&=[S(\rho_S') + S(\rho_R')-S(\rho_{SR}')] \ldots (4)\\
&=I(S': R') \geq 0
\end{align}
```
"""

# ╔═╡ 42c398ab-bb45-423f-b030-404e7582df5a
md"""
## An information driven car
"""

# ╔═╡ 48081dd4-2bf4-43a1-899c-0303b4fcedd3
md"""
![](https://user-images.githubusercontent.com/6257240/124372207-2bcc3200-dc57-11eb-840e-1bf2c85abf9b.png)"""

# ╔═╡ c6ef8479-639b-45c1-9b48-a5d2c233d3b8
md"1. set up the initial state to ``0``, contacting with a heat bath of temperature ``T``,
2. place a piston at the half way of the box
3. the environment warm up the box
4. the particle isothermally push the piston outwards"

# ╔═╡ 6f01cdc2-6ce9-41da-b279-b047c9779405
md"## An example of reversible computing: Copy machine"

# ╔═╡ 876ad6cf-84c1-4e34-89de-6f9273ba3479
md"*setup*: A copier (state known) and a model (state unknown),
both being double well potentials. In figure
* ``x`` axis is the parameter,
* ``y`` axis is the energy.
"

# ╔═╡ 9ca8912d-5fc5-4066-adb8-ad02f75c2cbe
md"``0`` state, left well

``1`` state, right well"

# ╔═╡ 9aab5751-e9e0-46c0-8e66-4b98258fed08
md"""
![](https://user-images.githubusercontent.com/6257240/124372454-cc225680-dc57-11eb-8526-ed397ce10583.png)
"""

# ╔═╡ a29af398-ff22-44cb-a5aa-0b0409312be9
md"There is a tilt force when we make two double wells close"

# ╔═╡ c3db622f-e9ff-4d99-afb6-9db65c6cae7a
md"
*goal*: set the state of copier to the same state as the model.
"

# ╔═╡ 12cbf4b7-9b55-423c-bf59-5cb18e167afd
md"*procedure*"

# ╔═╡ 89a5ff44-1b04-4bd8-a40a-83382a027fb3
md"Step 1: lowering the copier's potential barrier."

# ╔═╡ 51e7b853-8640-4415-a9a4-8c0e06ad916a
md"Step 2: bring the model close to copier (above illustration)."

# ╔═╡ a8fe838e-727d-4068-887d-17b1bf99f90b
md"Step 3: raise the copier's potential barrier."

# ╔═╡ c7cd75cb-4c64-4704-b839-c5a556f89be7
md"Step 4: take the model away"

# ╔═╡ ec14fba6-0cb9-483f-b3ea-cc4c5e83c965
md"## Magnetic dipole

[ref](https://en.wikipedia.org/wiki/Magnetic_dipole)"

# ╔═╡ f1abc5c1-2c34-422a-86c4-5ad8e7df8b7e
md"""
*setup*: two magnetic dipoles pointing to the same direction.
"""

# ╔═╡ 8ad4e7c0-c496-4d29-ac09-e6525b1b4c0f
md"""
![](https://user-images.githubusercontent.com/6257240/124498319-23c0df00-dd8a-11eb-9fca-51a87fde6ec0.png)
"""

# ╔═╡ 757e2d78-c5ee-4b40-bfd6-1b39af338d9d
md"""
```math
\text{potential energy} \approx \sin^2 \phi
```
"""

# ╔═╡ cbea35c7-c3c8-48e7-bb47-d5e193aee2c4
md"""
state ``0``: ← ←

state ``1``: → →
"""

# ╔═╡ 81013954-1c48-4c05-82c9-49b4bfafda95
let
	x = 0:1000
	y = map(x->sin(x/1000*2π)^2, x)
	Plots.plot(x./1000, y, xlabel="ϕ/2π", ylabel="potential energy")
end

# ╔═╡ 1924eff7-1423-4e90-8005-43113d9deb3d
md"""
Step 1: Introduce a vertical magnetic field ``B``, we have
```math
\text{potential energy (magnetic field)} = -B \sin \phi
```
"""

# ╔═╡ c7edbc15-cd59-45fd-a0dc-c48aadb1c096
md"B = $(@bind B Slider(0:0.01:2; show_value=true))"

# ╔═╡ bf2c9da7-8c45-409f-82a3-979cd63ea993
let
	x = 0:1000
	y = map(x) do x
		ϕ = x/1000*2π
		sin(ϕ)^2 - B * sin(ϕ)
	end
	Plots.plot(x./1000, y, xlabel="ϕ/2π", ylabel="potential energy")
end

# ╔═╡ 1c32a491-ac85-4132-82fd-9b846a8485df
md"""
copier state is ``\uparrow \uparrow``
"""

# ╔═╡ 6cbf202f-34e4-42b6-a7a7-5d766bfdfc37
md"""
Step 2: bring the model  (assume it is in state 1) close to the copier

```math
\text{potential energy (model)} = -b \cos \phi
```
"""

# ╔═╡ d40b318f-bff2-4d0b-b2a6-d00933ac7567
md"b = $(@bind b Slider(-0.5:0.01:0.5; default=0.0,show_value=true))"

# ╔═╡ 54f53a7b-74e8-433b-94fd-9fa7192dfca5
let
	x = 0:1000
	y = map(x) do x
		ϕ = x/1000*2π
		sin(ϕ)^2 - B * sin(ϕ) - b*cos(ϕ)
	end
	Plots.plot(x./1000, y, xlabel="ϕ/2π", ylabel="potential energy")
end

# ╔═╡ b0dcad96-e439-4e09-9e92-8cad7ede79af
md"""
# Last time
* Erase information -> make the system into a more certain state -> a decrease of entropy in the system -> requires dissipating heat to the heat bath (Landauer's principle), and this can be proved regorously from the quantum perspective.
* Introduce a type of reversible computing model: copy machine
    * Magnetic dipole
    * Protein synthesis
"""

# ╔═╡ 0a15a2cf-2e7a-4bd7-ac78-0803fc3d5c73
md"""
# This time
* The relation between energy and speed in reversible computing.
* General reversible gates and reversible programming,
    * The billiard ball model
    * Reversible control flows
* Several reversible computing architectures
"""

# ╔═╡ ffbc5616-d2d9-4ce4-996f-d1a743bb89b3
md"## Protein synthesis （Brownian computer）"

# ╔═╡ 2602d857-4a21-478a-97a2-58a177666f52
md"""
*setup*: we only consider the first stage of protein synthesis, copying information from DNA to m-RNA. A DNA strand is immersed to a biological soup with lots of triphostrates such as ATP, CTP, GTP and UTP.

A DNA strand is made up of alternating phosphate and pentose
sugar groups. To each sugar group is attached one of four bases, A (adenine),
T (thymine), C (cytosine) and G (guanine)
"""

# ╔═╡ cf27e340-578a-440d-8d4a-e5a2277d5205
md"""
![](https://user-images.githubusercontent.com/6257240/122641081-f957fc00-d0d0-11eb-9c7b-180e11f9bc33.png)
"""

# ╔═╡ aa53fd68-5acd-488d-a096-5ce39759f481
md"lowering potential: enzyme"

# ╔═╡ cb9a9ef0-c0dc-487c-8008-0f73f9910ef8
md"""
key point: chemecal reaction is reversible, the direction to evolve depends on the relative concentrations of pyrophosphates and triphosphates
in the soup
"""

# ╔═╡ f7e0478d-1839-4684-9265-ee990fe9da45
md"## Speed and energy in a Brownian computer"

# ╔═╡ 751e32d6-2582-4b1d-9558-124b1ef54f81
md"""
![](https://user-images.githubusercontent.com/6257240/124506870-8c17bc80-dd9a-11eb-9144-4116cf00f1c2.png)
"""

# ╔═╡ 5f18987d-a69e-4db9-96d3-426ed298d9b8
md"""
Thermal fluctuation helps overcome the barrier
```math
\text{forward rate} = C X e^{-(A-E_1)/kT}
```
```math
\text{backward rate} = C X e^{-(A-E_2)/kT}
```

``C`` is a factor that carries information about the thermal fluctuations in the
environment

``X`` is a factor depends on a variety of molecular properties of the particular
substance
"""

# ╔═╡ 66495c77-3bbc-4731-b9e1-db11bbc24283
md"*analysis*:
The rate of forward/backward computing is 
```math
r = \frac{\text{forward rate}}{\text{backward rate}} = e^{(E_1-E_2)/kT}
```
The minimum free energy/step we need to pay is ``kT \log r = (E_1-E_2)``
"

# ╔═╡ 84b867a3-804e-4e7e-a56c-0ffc1f4e6683
md"""
## Speed v.s. energy efficiency - the entropy perspective
"""

# ╔═╡ 4642d311-ef0b-4c29-901d-b5398a3ca7b6
md"""
![](https://user-images.githubusercontent.com/6257240/124663469-200b8600-de78-11eb-8501-8ce97ea140c5.png)
"""

# ╔═╡ 96c3d50b-8a79-4de0-b7e0-c63c3b769b74
md"The ratio of the forward rate and backward rate"

# ╔═╡ 066aa825-81e4-404d-bf5a-6a9431969702
md"``r = n_2/n_1`` (determines the speed)"

# ╔═╡ 6e99ed64-a896-450e-8bab-845e0fe971ae
md"``kT\log r = \underbrace{(S_2 - S_1) T}_{\text{the cost of free energy}}``"

# ╔═╡ 7f803113-653a-4dfd-93f0-83babb253b32
md"$F = E - TS$"

# ╔═╡ 7eb29d49-05f5-47e9-b4f5-4f31c5cd37ce
md"## A more concrete example about the energy efficiency and speed"

# ╔═╡ aefdec07-dcef-4e00-bcb0-4747250cdd9b
md"Charging up the capacitor to store signal energy in adiabatic CMOS."

# ╔═╡ cbe4abaf-46f9-4726-97ae-cf3c378abaaf
md"""
![](https://user-images.githubusercontent.com/6257240/122668453-287c7500-d186-11eb-962f-cc478be1dafe.png)
"""

# ╔═╡ 4f0c81f5-ce5f-4f73-a528-9feff4a7fc14
md"Case 1: do it fast
```math
E = CU^2
```
"

# ╔═╡ 8249b820-8fb1-45d4-a95c-9c81e62e8216
let
	x = 0:1000
	y = map(x->abs(x-500)<250 ? 1.0 : 0.0, x)
	Plots.plot(x./1000, y, xlabel="time", ylabel="voltage")
end

# ╔═╡ 6503b377-b2d5-48be-90a4-97947afb4e5f
md"Case 1: do it slow
```math
E = \frac{CU^2}{2}
```
"

# ╔═╡ 53a571dd-cac7-432a-869c-b93a8fe05e17
let
	x = 0:1000
	y = map(x->abs(x-500)<200 ? 1.0 : (abs(x-500) < 400 ? 1/200 * (x < 500 ? abs(x-100) : abs(900-x)) : 0.0), x)
	Plots.plot(x./1000, y, xlabel="time", ylabel="voltage")
end

# ╔═╡ 2e5c7f59-dd35-4846-815a-b92eabeee089
md"Conclusion: fast ramping dissipates heat to the environment."

# ╔═╡ 7b326477-43b6-4a6e-8862-12e8b70e1ad9
md"## Universal reversible gate set"

# ╔═╡ 47cd7560-a29e-4b55-bef5-28daa1cdb834
md"Toffoli gate is universal"

# ╔═╡ 59ab4431-ea4d-4707-9a42-d50eafa40b56
md"truth table ``(A, B, C) \mapsto (A, B, C')``

|  A  |  B  |  C  |  C' |
| --- | --- | --- | --- |
| 0 | 0 | 0 | 0 |
| 0 | 0 | 1 | 1 |
| 0 | 1 | 0 | 0 |
| 0 | 1 | 1 | 1 |
| 1 | 0 | 0 | 0 |
| 1 | 0 | 1 | 1 |
| 1 | 1 | 0 | 1 |
| 1 | 1 | 1 | 0 |
"

# ╔═╡ 3630b412-beeb-455a-a4b8-1e1d50860266
md"
```julia
if A && B
	C = ¬C
end
```
"

# ╔═╡ ba6347d6-4ad0-403b-824f-dcf290a7c002
md"proved by constructing a NAND gate (a classical univeral gate)"

# ╔═╡ a2f4975d-eeee-4a2d-97dd-dd0cfd29d665
TikzPicture(L"""
\draw (0,0) rectangle (2,3);
\draw (-0.5,0.5) -- (0.0, 0.5);
\node at (-0.8, 0.5) {1};
\draw (-0.5,1.5) -- (0.0, 1.5);
\node at (-0.8, 1.5) {B};
\draw (-0.5,2.5) -- (0.0, 2.5);
\node at (-0.8, 2.5) {A};
\draw (2.5,0.5) -- (2.0, 0.5);
\node at (3.1, 0.5) {$\overline{A\land B}$};
\draw (2.5,1.5) -- (2.0, 1.5);
\node at (2.8, 1.5) {B};
\draw (2.5,2.5) -- (2.0, 2.5);
\node at (2.8, 2.5) {A};
\node at (1.0, 1.5) {Toffoli};
""")

# ╔═╡ 32d411e9-b01d-4ad2-b4aa-2f091034e6c0
md"Fredkin gate is universal"

# ╔═╡ e5b83421-dd94-43ad-84eb-ca558bff6a2d
md"truth table ``(A, B, C) \mapsto (A, B', C')``

|  A  |  B  |  C  |  B' |  C' |
| --- | --- | --- | --- | --- |
| 0 | 0 | 0 | 0 | 0 |
| 0 | 0 | 1 | 0 | 1 |
| 0 | 1 | 0 | 1 | 0 |
| 0 | 1 | 1 | 1 | 1 |
| 1 | 0 | 0 | 0 | 0 |
| 1 | 0 | 1 | 1 | 0 |
| 1 | 1 | 0 | 0 | 1 |
| 1 | 1 | 1 | 1 | 1 |
"

# ╔═╡ 118642ad-1aad-4f91-8da0-55a417b67750
md"
```julia
if A
	B, C = C, B
end
```
"

# ╔═╡ 45171ecc-9d34-4ab6-a00b-ec9c9afc33f8
md"prove by constructing an AND gate and NOT gate"

# ╔═╡ 6cd60f7d-d7ce-4189-a2dd-e47ce6825741
let
	img1 = TikzPicture(L"""
\draw (0,0) rectangle (2,3);
\draw (-0.5,0.5) -- (0.0, 0.5);
\node at (-0.8, 0.5) {$C$};
\draw (-0.5,1.5) -- (0.0, 1.5);
\node at (-0.8, 1.5) {$0$};
\draw (-0.5,2.5) -- (0.0, 2.5);
\node at (-0.8, 2.5) {$A$};
\draw (2.5,0.5) -- (2.0, 0.5);
\node at (3.1, 0.5) {$\overline{A}\land C$};
\draw (2.5,1.5) -- (2.0, 1.5);
\node at (3.1, 1.5) {$A\land C$};
\draw (2.5,2.5) -- (2.0, 2.5);
\node at (2.8, 2.5) {$A$};
\node at (1.0, 1.5) {Fredkin};
""")
	img2 = TikzPicture(L"""
\draw (0,0) rectangle (2,3);
\draw (-0.5,0.5) -- (0.0, 0.5);
\node at (-0.8, 0.5) {$1$};
\draw (-0.5,1.5) -- (0.0, 1.5);
\node at (-0.8, 1.5) {$0$};
\draw (-0.5,2.5) -- (0.0, 2.5);
\node at (-0.8, 2.5) {$A$};
\draw (2.5,0.5) -- (2.0, 0.5);
\node at (2.8, 0.5) {$\overline{A}$};
\draw (2.5,1.5) -- (2.0, 1.5);
\node at (2.8, 1.5) {$A$};
\draw (2.5,2.5) -- (2.0, 2.5);
\node at (2.8, 2.5) {$A$};
\node at (1.0, 1.5) {Fredkin};
""")
	leftright(img1, img2)
end

# ╔═╡ ff3fc929-f448-41be-8f60-65de33dff36a
md"""
## Billiard Ball Computer
"""

# ╔═╡ 5ec2649e-9988-4f38-896a-64ef6ed91d82
md"*setup*: Billiard balls in 2D space"

# ╔═╡ aa8475c3-c68b-4200-8634-ace33f525417
md"The collision Gate"

# ╔═╡ aa22f905-b69d-405e-b09a-a765d60f6079
md"""
![](https://user-images.githubusercontent.com/6257240/124666172-a2497980-de7b-11eb-9221-378f0453d41d.png)
"""

# ╔═╡ 2dcbaac0-2fad-4292-ad31-8188a60876da
md"Four redirection gates"

# ╔═╡ 6bad4f5f-806f-480a-ae16-2582761ce5e3
md"""
![](https://user-images.githubusercontent.com/6257240/124666268-cc02a080-de7b-11eb-9017-c62d9d251521.png)
"""

# ╔═╡ e200dde3-9033-45b5-bfe0-2d03753b2c11
md"*Define some other useful gates*:"

# ╔═╡ d7a4b342-ef0a-44f9-b88d-bbb04483e8b3
let
	img1 = md"""
![](https://user-images.githubusercontent.com/6257240/124670709-62d25b80-de82-11eb-9c1b-25377e16c43a.png)
"""
	img2 = md"""
![](https://user-images.githubusercontent.com/6257240/124670724-6960d300-de82-11eb-8ebb-4747e0d43fff.png)
"""
	leftright(img1, img2)
end

# ╔═╡ 908f19a2-6d32-4776-95eb-b249a8155ddc
md"""prove by mapping it to a Fredkin gate"""

# ╔═╡ 05fc1fae-b378-4c39-b060-74ca635745ec
md"""![](https://user-images.githubusercontent.com/6257240/124670762-78478580-de82-11eb-8061-409e0db1388c.png)"""

# ╔═╡ d0573bf9-0fd6-4512-bc13-17aa23a3265b
md"""
## General reversible computing
"""

# ╔═╡ d8998d5f-65b2-4850-aef9-f19ecc192eca
md"""*Problem 5.4*: A related problem concerns how to get "if' clauses to work. What
if, after having followed an "if... then ... " command, the machine starts to
reverse? How can the machine get back to the original condition that dictated
which way the "if' branched? Of course, a set of initial conditions can result in
a single "if' output ("if x = 2, 3, 4 or 6.159 let F= d"), so this condition may not be uniquely specified. Here is a nice way to analyze things. Simply bring in a
new variable at each branch, and assign a unique value to this variable for each
choice at a branch point. You might like to work this through in detail."""

# ╔═╡ 6b4c180c-9a12-4e3d-9336-1431e7c5875a
TikzPicture(L"""
\draw [black, thick,->] (0, 0) -- (1, 0);
\draw [black, thick,->,dashed] (1, 0) .. controls (1.5, 0.5) .. (2, 0);
\node at (1.5, 0.5) {goto $\ldots$};
\draw [black, thick,->] (2, 0) -- (3, 0);
       
\def\x{4};
\draw [black, thick,<-] (\x, 0) -- (\x+1, 0);
\draw [black, thick,<-,dashed] (\x+1, 0) .. controls (\x+1.5, 0.5) .. (\x+2, 0);
\draw [black, thick,<-,dashed] (\x+1, 0) -- (\x+2, 0);
\node at (\x+1.5, 0.5) {comefrom?};
\draw [black, thick,<-] (\x+2, 0) -- (\x+3, 0);
       
\node at (1.5, -0.2) {call};
\node at (\x+1.5, -0.2) {uncall};
""", options="scale=2.0", preamble="")

# ╔═╡ 0eb66cc9-93c0-4f07-b31b-a9bf9000260e
md"Reversible branching statement"

# ╔═╡ 85bf9f92-30f1-4e05-8d07-d8e481f20ccb
TikzPicture(L"""
\node [test] (pre) {precondition};
\node [proc, it] (st1) [right=of pre] {statements 1};
\node [proc, it] (st2) {statements 2};	
\node [test] (post1) [right=of st1] {postcondition};
\node [test] (post2) [right=of st2] {postcondition};
\node [proc,red] (err1) [above=of post1] {invertibility error};
\node [proc,red] (err2) [below=of post2] {invertibility error};
\draw [->,black] (pre.east) -- (st1) node[midway,above] {T};
\draw [->,black] (pre.south) |- (st2) node[midway,below] {F};
\draw [->,black] (-2.5, 0.0) -- (pre.west);
\draw [->,black] (st1) -- (post1);
\draw [->,black] (st2) -- (post2);
\draw [->,red] (post1) -- (err1) node[midway,right] {F};
\draw [->,red] (post2) -- (err2) node[midway,right] {T};
\draw [->,black] (post1.east) -- (12, 0) node[midway,above] {T};
\draw [black] (post2.east) -| (11, 0) node[midway,right] {F};
""", options=raw"    font=\sffamily\small,
    >={Triangle[]},
    */.tip={Circle[]},
    start chain=going below,
    node distance=18mm and 40mm,
    every join/.style={norm},
    base/.style={draw, on chain, on grid, align=center, minimum height=4ex, inner color=black!50!gray!10, outer color=black!50!gray!15},
    proc/.style={base, rectangle, text width=8em},
    test/.style={base, diamond, text centered, aspect=2.6,inner sep=-0ex},
    norm/.style={->, draw, black},
    it/.style={font={\sffamily\small\itshape}}", preamble=raw"\usetikzlibrary{shapes.geometric,arrows.meta,chains,positioning,quotes}")

# ╔═╡ fbba0a91-9f48-4d91-90e7-f6a7df3227f9
md"## Bennett's compute copy uncompute scheme"

# ╔═╡ 7fc81b9f-73ed-4780-9204-ddf39467e58f
md"*setup*: we have a long linear program. We use the reversible embeded domain specific language in Julia"

# ╔═╡ d08ae188-937f-474b-92d7-cb8eeda063fe
html"""<img src="https://github.com/GiggleLiu/NiLang.jl/raw/master/docs/src/asset/logo3.png" width=200/>"""

# ╔═╡ 7ee8cfc9-26b2-4fe4-8263-1f4d2f7c276d
md"Initially written by GiggleLiu and Taine Zhao (The author of MLStyle)"

# ╔═╡ 1669f5e3-efe1-4b79-a2b6-11ed7476a2a1
md"the program of finding the maximum number"

# ╔═╡ 5000f4c3-5416-4e53-88ae-e30d8d09827e
@i function i_find_maximum_v1(s₂, s₃, s₄, x₁, x₂, x₃, x₄) where T
	s₂ += max(x₁, x₂) # step 1
	s₃ += max(s₂, x₃) # step 2
	s₄ += max(s₃, x₄) # step 3
end

# ╔═╡ 2413c061-89de-403f-8011-e458f5a9859d
i_find_maximum_v1(0, 0, 0, 3, 2, 8, 1)

# ╔═╡ d4704779-9261-478b-bbf6-551220783e12
md"the basic building block of compute-copy-uncompute"

# ╔═╡ 4be065b5-0841-4d54-b9ab-d6770d4d9d94
@i function i_find_maximum_v2(s₄, x₁, x₂, x₃, x₄) where T
	# compute
	s₂ ← 0  # variable on the working tape
	s₃ ← 0
	s₂ += max(x₁, x₂) # step 1
	s₃ += max(s₂, x₃) # step 2
	
	# copy
	s₄ += max(s₃, x₄) # step 3
	
	# uncompute
	s₃ -= max(s₂, x₃) # step 4
	s₂ -= max(x₁, x₂) # step 5
	s₂ → 0
	s₃ → 0
end

# ╔═╡ e850e53d-cf61-4fc7-9cb3-e318ae957f0b
i_find_maximum_v2(0, 3, 2, 8, 1)

# ╔═╡ a267ea5f-8bd5-4ee0-9c8d-47e2d3b81692
TikzPicture(L"""
\def\r{0.15};
\foreach \x in {1,4}{
	\fill[fill=black] (\x, 0) circle [radius=\r];
	\node[white] at (\x, 0) {$s_{\x}$};
}
\foreach \x in {2,3}{
	\draw (\x, 0) circle [radius=\r];
	\node[black] at (\x, 0) {$s_{\x}$};
}
\fill[fill=white] (5.5, 0) circle [radius=\r];
\foreach \x in {1,...,3}{
	\draw [black, thick, ->] (\x+\r, \r) .. controls (\x+0.5, 0.3) .. (\x+1-\r, \r);
	\node at (\x+0.5, 0.4) {\x};
	}
\foreach[evaluate={\y=int(6-\x)}] \x in {1,...,2}{
	\draw [red, thick, <-] (\x+\r, -\r) .. controls (\x+0.5, -0.3) .. (\x+1-\r, -\r);
	\node at (\x+0.5, -0.4) {\y};
	}
"""
, options="scale=1.8", preamble="")

# ╔═╡ c02520a3-3375-4d83-a0dc-1aeac2aa7d5f
md"Recursively apply Bennett's time space tradeoff scheme"

# ╔═╡ 2e18fc92-4185-493b-9ce8-cca63dad7d2d
TikzPicture(L"""
\def\r{0.15};
\def\n{10};
\foreach \x in {1,4,7,10}{
       \fill[fill=black] (\x, 0) circle [radius=\r];
       \node[white] at (\x, 0) {$s_{\x}$};
}
\foreach \x in {2,3,5,6,8,9}{
       \draw (\x, 0) circle [radius=\r];
       \node[black] at (\x, 0) {$s_{\x}$};
}
\fill[fill=white] (\n+0.5, 0) circle [radius=\r];
\foreach \x/\t in {1/1,2/2,3/3,4/6,5/7,6/8,7/11,8/12,9/13}{
       \draw [black, thick, ->] (\x+\r, \r) .. controls (\x+0.5, 0.3) .. (\x+1-\r, \r);
       \node[black] at (\x+0.5, 0.4) {\t};
       }
\foreach \x/\t in {1/5,2/4,4/10,5/9,7/15,8/14}{
       \draw [black, thick, <-] (\x+\r, -\r) .. controls (\x+0.5, -0.3) .. (\x+1-\r, -\r);
       \node[black] at (\x+0.5, -0.4) {\t};
}
"""
, options="scale=2.0", preamble="")

# ╔═╡ acc7b185-e4df-4aca-aa42-554215065384
TikzPicture(L"""
\def\r{0.15};
\def\n{10};
\foreach \x in {1,4,7,10}{
       \fill[fill=black] (\x, 0) circle [radius=\r];
       \node[white] at (\x, 0) {$s_{\x}$};
}

\fill[fill=white] (\n+0.5, 0) circle [radius=\r];
\foreach \x in {1,4,7}{
       \draw [black, thick, ->] (\x+\r, \r) .. controls (\x+1.5, 0.6) .. (\x+3-\r, \r);
       }
\foreach \x in {1,4}{
       \draw [black, thick, <-] (\x+\r, -\r) .. controls (\x+1.5, -0.6) .. (\x+3-\r, -\r);
}
"""
, options="scale=2.0", preamble="")

# ╔═╡ 00c9e973-7e06-4483-bf4c-be7374707118
md"
* Space complexity: ``O(S \log T)``
* Time complexity: ``O(T^{1+\epsilon})``"

# ╔═╡ 83ff3fc3-bcd8-4235-a42f-1d75c7d6aa5b
md"## Computing architectures"

# ╔═╡ b308e270-6b40-4946-ac92-c705823f2c1e
let
	txt1 = md"Traditional irreversible computer
	
$E \sim 10^8 kT$"
	img1 = html"""<img src="https://www.computerhope.com/jargon/c/computer-laptop-2in1.jpg" width=120/>"""
	txt2 = md"DNA copying is a living copy machine

$E \sim 100k T$"
	img2 = html"""
<img src="https://s3-us-west-2.amazonaws.com/courses-images/wp-content/uploads/sites/110/2016/06/02172248/DNA_replication_split_horizontal.svg_-1024x508.png" width=300/>
"""
	txt3 = md"""
Adiabatic CMOS [Athas, 1994]

$E \sim 10^6 kT$
"""
	img3 = html"""
<img src="https://user-images.githubusercontent.com/6257240/122668453-287c7500-d186-11eb-962f-cc478be1dafe.png" width=350 style="margin-bottom:25px"/>
"""
	txt4 = md"""Adiabatic superconducting devices [Takeuchi, 2014]

$E \sim kT$
"""
	img4 = html"""
<img src="https://scitechdaily.com/images/Magnet-Levitates-Above-Superconductor.jpg" width=300/>
"""
	updown(leftright(updown(img1, txt1), updown(img2, txt2)), leftright(updown(img3, txt3), updown(img4, txt4)))
end

# ╔═╡ e483b3d4-d01c-4a98-8e68-e8120a7d95a7
md"## More


![](https://user-images.githubusercontent.com/6257240/123520518-22ebc700-d67f-11eb-8af1-a452605cc1d8.png)

*Youtube*: Michael P. Frank: Fundamental Physics of Reversible Computing — An Introduction, Part 1
"

# ╔═╡ 74017e78-0f02-41bb-a160-5f2d26c18268
md"""
![](https://user-images.githubusercontent.com/6257240/125467165-d3ec7c24-18cb-48d6-99b6-708326789bf9.png)

	
Kenichi Morita, How can we construct reversible Turing machines in a very simple reversible cellular automaton? Video can be found in this conference page: [https://reversible-computation-2021.github.io/program/](https://reversible-computation-2021.github.io/program/)
"""

# ╔═╡ 0b3735c2-695c-4225-843e-16ca17aac0eb
md"""## Take home message

1. Irreversible computing -> information erasure -> disspate heat (``kT\log 2`` per bit)
2. Reversible computing -> requires operations being adiabatic -> slow
2. Reversible programming suffers from **polynomial time overhead and logarithmic space overhead** when differentiating a irreversible linear program
3. Brownian computer
    * mRNA copy
    * Magnetic dopile
4. General reversible computer
    * Billiard ball model
    * Reversible cellular automata
    * Adiabatic CMOS

6. **How to find this notebook?** In NiLang's Github repo, file: `notebooks/feynman.jl`
"""

# ╔═╡ d7942b37-f821-494a-8f18-5f267aa3457a
md"""
###  References
* Reeb, David, and Michael M. Wolf. "An improved Landauer principle with finite-size corrections." (2014).
* Athas, William C., and L. J. Svensson. "Reversible logic issues in adiabatic CMOS." Proceedings Workshop on Physics and Computation. (1994).
* Takeuchi, N., Y. Yamanashi, and N. Yoshikawa. "Reversible logic gate using adiabatic superconducting devices." (2014)
* Griewank, Andreas. "Achieving logarithmic growth of temporal and spatial complexity in reverse automatic differentiation." Optimization Methods and software 1.1 (1992): 35-54.
* Ming Li, John Tromp, Paul Vitanyi. "Reversible Simulation of Irreversible Computation by Pebble Games" (1997)
"""

# ╔═╡ 00000000-0000-0000-0000-000000000001
PLUTO_PROJECT_TOML_CONTENTS = """
[deps]
Compose = "a81c6b42-2e10-5240-aca2-a61377ecd94b"
NiLang = "ab4ef3a6-0b42-11ea-31f6-e34652774712"
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
PlutoUI = "7f904dfe-b85e-4ff6-b463-dae2292396a8"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Revise = "295af30f-e4ad-537b-8983-00126c2a3abe"
TikzPictures = "37f6aa50-8035-52d0-81c2-5a1d08754b2d"
Viznet = "52a3aca4-6234-47fd-b74a-806bdf78ede9"

[compat]
Compose = "~0.9.2"
NiLang = "~0.9.1"
Plots = "~1.18.0"
PlutoUI = "~0.7.9"
Revise = "~3.1.17"
TikzPictures = "~3.3.3"
Viznet = "~0.3.3"
"""

# ╔═╡ 00000000-0000-0000-0000-000000000002
PLUTO_MANIFEST_TOML_CONTENTS = """
# This file is machine-generated - editing it directly is not advised

[[Adapt]]
deps = ["LinearAlgebra"]
git-tree-sha1 = "84918055d15b3114ede17ac6a7182f68870c16f7"
uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
version = "3.3.1"

[[ArgTools]]
uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"

[[Artifacts]]
uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"

[[Base64]]
uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"

[[Bzip2_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "c3598e525718abcc440f69cc6d5f60dda0a1b61e"
uuid = "6e34b625-4abd-537c-b88f-471c36dfa7a0"
version = "1.0.6+5"

[[Cairo_jll]]
deps = ["Artifacts", "Bzip2_jll", "Fontconfig_jll", "FreeType2_jll", "Glib_jll", "JLLWrappers", "LZO_jll", "Libdl", "Pixman_jll", "Pkg", "Xorg_libXext_jll", "Xorg_libXrender_jll", "Zlib_jll", "libpng_jll"]
git-tree-sha1 = "e2f47f6d8337369411569fd45ae5753ca10394c6"
uuid = "83423d85-b0ee-5818-9007-b63ccbeb887a"
version = "1.16.0+6"

[[CodeTracking]]
deps = ["InteractiveUtils", "UUIDs"]
git-tree-sha1 = "8ad457cfeb0bca98732c97958ef81000a543e73e"
uuid = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"
version = "1.0.5"

[[ColorSchemes]]
deps = ["ColorTypes", "Colors", "FixedPointNumbers", "Random", "StaticArrays"]
git-tree-sha1 = "c8fd01e4b736013bc61b704871d20503b33ea402"
uuid = "35d6a980-a343-548e-a6ea-1d62b119f2f4"
version = "3.12.1"

[[ColorTypes]]
deps = ["FixedPointNumbers", "Random"]
git-tree-sha1 = "32a2b8af383f11cbb65803883837a149d10dfe8a"
uuid = "3da002f7-5984-5a60-b8a6-cbb66c0b333f"
version = "0.10.12"

[[Colors]]
deps = ["ColorTypes", "FixedPointNumbers", "Reexport"]
git-tree-sha1 = "417b0ed7b8b838aa6ca0a87aadf1bb9eb111ce40"
uuid = "5ae59095-9a9b-59fe-a467-6f913c188581"
version = "0.12.8"

[[Compat]]
deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "SHA", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"]
git-tree-sha1 = "dc7dedc2c2aa9faf59a55c622760a25cbefbe941"
uuid = "34da2185-b29b-5c13-b0c7-acf172513d20"
version = "3.31.0"

[[CompilerSupportLibraries_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"

[[Compose]]
deps = ["Base64", "Colors", "DataStructures", "Dates", "IterTools", "JSON", "LinearAlgebra", "Measures", "Printf", "Random", "Requires", "Statistics", "UUIDs"]
git-tree-sha1 = "c6461fc7c35a4bb8d00905df7adafcff1fe3a6bc"
uuid = "a81c6b42-2e10-5240-aca2-a61377ecd94b"
version = "0.9.2"

[[Contour]]
deps = ["StaticArrays"]
git-tree-sha1 = "9f02045d934dc030edad45944ea80dbd1f0ebea7"
uuid = "d38c429a-6771-53c6-b99e-75d170b6e991"
version = "0.5.7"

[[DataAPI]]
git-tree-sha1 = "ee400abb2298bd13bfc3df1c412ed228061a2385"
uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a"
version = "1.7.0"

[[DataStructures]]
deps = ["Compat", "InteractiveUtils", "OrderedCollections"]
git-tree-sha1 = "4437b64df1e0adccc3e5d1adbc3ac741095e4677"
uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
version = "0.18.9"

[[DataValueInterfaces]]
git-tree-sha1 = "bfc1187b79289637fa0ef6d4436ebdfe6905cbd6"
uuid = "e2d170a0-9d28-54be-80f0-106bbe20a464"
version = "1.0.0"

[[Dates]]
deps = ["Printf"]
uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"

[[DelimitedFiles]]
deps = ["Mmap"]
uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab"

[[Dierckx]]
deps = ["Dierckx_jll"]
git-tree-sha1 = "5fefbe52e9a6e55b8f87cb89352d469bd3a3a090"
uuid = "39dd38d3-220a-591b-8e3c-4c3a8c710a94"
version = "0.5.1"

[[Dierckx_jll]]
deps = ["CompilerSupportLibraries_jll", "Libdl", "Pkg"]
git-tree-sha1 = "a580560f526f6fc6973e8bad2b036514a4e3b013"
uuid = "cd4c43a9-7502-52ba-aa6d-59fb2a88580b"
version = "0.0.1+0"

[[Distributed]]
deps = ["Random", "Serialization", "Sockets"]
uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"

[[Downloads]]
deps = ["ArgTools", "LibCURL", "NetworkOptions"]
uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6"

[[EarCut_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "92d8f9f208637e8d2d28c664051a00569c01493d"
uuid = "5ae413db-bbd1-5e63-b57d-d24a61df00f5"
version = "2.1.5+1"

[[Expat_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "b3bfd02e98aedfa5cf885665493c5598c350cd2f"
uuid = "2e619515-83b5-522b-bb60-26c02a35a201"
version = "2.2.10+0"

[[FFMPEG]]
deps = ["FFMPEG_jll"]
git-tree-sha1 = "b57e3acbe22f8484b4b5ff66a7499717fe1a9cc8"
uuid = "c87230d0-a227-11e9-1b43-d7ebe4e7570a"
version = "0.4.1"

[[FFMPEG_jll]]
deps = ["Artifacts", "Bzip2_jll", "FreeType2_jll", "FriBidi_jll", "JLLWrappers", "LAME_jll", "LibVPX_jll", "Libdl", "Ogg_jll", "OpenSSL_jll", "Opus_jll", "Pkg", "Zlib_jll", "libass_jll", "libfdk_aac_jll", "libvorbis_jll", "x264_jll", "x265_jll"]
git-tree-sha1 = "3cc57ad0a213808473eafef4845a74766242e05f"
uuid = "b22a6f82-2f65-5046-a5b2-351ab43fb4e5"
version = "4.3.1+4"

[[FileWatching]]
uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee"

[[FixedPointNumbers]]
deps = ["Statistics"]
git-tree-sha1 = "335bfdceacc84c5cdf16aadc768aa5ddfc5383cc"
uuid = "53c48c17-4a7d-5ca2-90c5-79b7896eea93"
version = "0.8.4"

[[Fontconfig_jll]]
deps = ["Artifacts", "Bzip2_jll", "Expat_jll", "FreeType2_jll", "JLLWrappers", "Libdl", "Libuuid_jll", "Pkg", "Zlib_jll"]
git-tree-sha1 = "35895cf184ceaab11fd778b4590144034a167a2f"
uuid = "a3f928ae-7b40-5064-980b-68af3947d34b"
version = "2.13.1+14"

[[Formatting]]
deps = ["Printf"]
git-tree-sha1 = "8339d61043228fdd3eb658d86c926cb282ae72a8"
uuid = "59287772-0a20-5a39-b81b-1366585eb4c0"
version = "0.4.2"

[[FreeType2_jll]]
deps = ["Artifacts", "Bzip2_jll", "JLLWrappers", "Libdl", "Pkg", "Zlib_jll"]
git-tree-sha1 = "cbd58c9deb1d304f5a245a0b7eb841a2560cfec6"
uuid = "d7e528f0-a631-5988-bf34-fe36492bcfd7"
version = "2.10.1+5"

[[FriBidi_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "aa31987c2ba8704e23c6c8ba8a4f769d5d7e4f91"
uuid = "559328eb-81f9-559d-9380-de523a88c83c"
version = "1.0.10+0"

[[GLFW_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Libglvnd_jll", "Pkg", "Xorg_libXcursor_jll", "Xorg_libXi_jll", "Xorg_libXinerama_jll", "Xorg_libXrandr_jll"]
git-tree-sha1 = "dba1e8614e98949abfa60480b13653813d8f0157"
uuid = "0656b61e-2033-5cc2-a64a-77c0f6c09b89"
version = "3.3.5+0"

[[GR]]
deps = ["Base64", "DelimitedFiles", "GR_jll", "HTTP", "JSON", "Libdl", "LinearAlgebra", "Pkg", "Printf", "Random", "Serialization", "Sockets", "Test", "UUIDs"]
git-tree-sha1 = "b83e3125048a9c3158cbb7ca423790c7b1b57bea"
uuid = "28b8d3ca-fb5f-59d9-8090-bfdbd6d07a71"
version = "0.57.5"

[[GR_jll]]
deps = ["Artifacts", "Bzip2_jll", "Cairo_jll", "FFMPEG_jll", "Fontconfig_jll", "GLFW_jll", "JLLWrappers", "JpegTurbo_jll", "Libdl", "Libtiff_jll", "Pixman_jll", "Pkg", "Qt5Base_jll", "Zlib_jll", "libpng_jll"]
git-tree-sha1 = "e14907859a1d3aee73a019e7b3c98e9e7b8b5b3e"
uuid = "d2c73de3-f751-5644-a686-071e5b155ba9"
version = "0.57.3+0"

[[GeometryBasics]]
deps = ["EarCut_jll", "IterTools", "LinearAlgebra", "StaticArrays", "StructArrays", "Tables"]
git-tree-sha1 = "15ff9a14b9e1218958d3530cc288cf31465d9ae2"
uuid = "5c1252a2-5f33-56bf-86c9-59e7332b4326"
version = "0.3.13"

[[Gettext_jll]]
deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Libiconv_jll", "Pkg", "XML2_jll"]
git-tree-sha1 = "9b02998aba7bf074d14de89f9d37ca24a1a0b046"
uuid = "78b55507-aeef-58d4-861c-77aaff3498b1"
version = "0.21.0+0"

[[Glib_jll]]
deps = ["Artifacts", "Gettext_jll", "JLLWrappers", "Libdl", "Libffi_jll", "Libiconv_jll", "Libmount_jll", "PCRE_jll", "Pkg", "Zlib_jll"]
git-tree-sha1 = "47ce50b742921377301e15005c96e979574e130b"
uuid = "7746bdde-850d-59dc-9ae8-88ece973131d"
version = "2.68.1+0"

[[Grisu]]
git-tree-sha1 = "53bb909d1151e57e2484c3d1b53e19552b887fb2"
uuid = "42e2da0e-8278-4e71-bc24-59509adca0fe"
version = "1.0.2"

[[HTTP]]
deps = ["Base64", "Dates", "IniFile", "Logging", "MbedTLS", "NetworkOptions", "Sockets", "URIs"]
git-tree-sha1 = "c6a1fff2fd4b1da29d3dccaffb1e1001244d844e"
uuid = "cd3eb016-35fb-5094-929b-558a96fad6f3"
version = "0.9.12"

[[IniFile]]
deps = ["Test"]
git-tree-sha1 = "098e4d2c533924c921f9f9847274f2ad89e018b8"
uuid = "83e8ac13-25f8-5344-8a64-a9f2b223428f"
version = "0.5.0"

[[InteractiveUtils]]
deps = ["Markdown"]
uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"

[[IterTools]]
git-tree-sha1 = "05110a2ab1fc5f932622ffea2a003221f4782c18"
uuid = "c8e1da08-722c-5040-9ed9-7db0dc04731e"
version = "1.3.0"

[[IteratorInterfaceExtensions]]
git-tree-sha1 = "a3f24677c21f5bbe9d2a714f95dcd58337fb2856"
uuid = "82899510-4779-5014-852e-03e436cf321d"
version = "1.0.0"

[[JLLWrappers]]
deps = ["Preferences"]
git-tree-sha1 = "642a199af8b68253517b80bd3bfd17eb4e84df6e"
uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210"
version = "1.3.0"

[[JSON]]
deps = ["Dates", "Mmap", "Parsers", "Unicode"]
git-tree-sha1 = "81690084b6198a2e1da36fcfda16eeca9f9f24e4"
uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
version = "0.21.1"

[[JpegTurbo_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "d735490ac75c5cb9f1b00d8b5509c11984dc6943"
uuid = "aacddb02-875f-59d6-b918-886e6ef4fbf8"
version = "2.1.0+0"

[[JuliaInterpreter]]
deps = ["CodeTracking", "InteractiveUtils", "Random", "UUIDs"]
git-tree-sha1 = "31c2eee64c1eee6e8e3f30d5a03d4b5b7086ab29"
uuid = "aa1ae85d-cabe-5617-a682-6adf51b2e16a"
version = "0.8.18"

[[LAME_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "f6250b16881adf048549549fba48b1161acdac8c"
uuid = "c1c5ebd0-6772-5130-a774-d5fcae4a789d"
version = "3.100.1+0"

[[LZO_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "e5b909bcf985c5e2605737d2ce278ed791b89be6"
uuid = "dd4b983a-f0e5-5f8d-a1b7-129d4a5fb1ac"
version = "2.10.1+0"

[[LaTeXStrings]]
git-tree-sha1 = "c7f1c695e06c01b95a67f0cd1d34994f3e7db104"
uuid = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f"
version = "1.2.1"

[[Latexify]]
deps = ["Formatting", "InteractiveUtils", "LaTeXStrings", "MacroTools", "Markdown", "Printf", "Requires"]
git-tree-sha1 = "a4b12a1bd2ebade87891ab7e36fdbce582301a92"
uuid = "23fbe1c1-3f47-55db-b15f-69d7ec21a316"
version = "0.15.6"

[[LibCURL]]
deps = ["LibCURL_jll", "MozillaCACerts_jll"]
uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21"

[[LibCURL_jll]]
deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"]
uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0"

[[LibGit2]]
deps = ["Base64", "NetworkOptions", "Printf", "SHA"]
uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"

[[LibSSH2_jll]]
deps = ["Artifacts", "Libdl", "MbedTLS_jll"]
uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8"

[[LibVPX_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "12ee7e23fa4d18361e7c2cde8f8337d4c3101bc7"
uuid = "dd192d2f-8180-539f-9fb4-cc70b1dcf69a"
version = "1.10.0+0"

[[Libdl]]
uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"

[[Libffi_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "761a393aeccd6aa92ec3515e428c26bf99575b3b"
uuid = "e9f186c6-92d2-5b65-8a66-fee21dc1b490"
version = "3.2.2+0"

[[Libgcrypt_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Libgpg_error_jll", "Pkg"]
git-tree-sha1 = "64613c82a59c120435c067c2b809fc61cf5166ae"
uuid = "d4300ac3-e22c-5743-9152-c294e39db1e4"
version = "1.8.7+0"

[[Libglvnd_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libX11_jll", "Xorg_libXext_jll"]
git-tree-sha1 = "7739f837d6447403596a75d19ed01fd08d6f56bf"
uuid = "7e76a0d4-f3c7-5321-8279-8d96eeed0f29"
version = "1.3.0+3"

[[Libgpg_error_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "c333716e46366857753e273ce6a69ee0945a6db9"
uuid = "7add5ba3-2f88-524e-9cd5-f83b8a55f7b8"
version = "1.42.0+0"

[[Libiconv_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "42b62845d70a619f063a7da093d995ec8e15e778"
uuid = "94ce4f54-9a6c-5748-9c1c-f9c7231a4531"
version = "1.16.1+1"

[[Libmount_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "9c30530bf0effd46e15e0fdcf2b8636e78cbbd73"
uuid = "4b2f31a3-9ecc-558c-b454-b3730dcb73e9"
version = "2.35.0+0"

[[Libtiff_jll]]
deps = ["Artifacts", "JLLWrappers", "JpegTurbo_jll", "Libdl", "Pkg", "Zlib_jll", "Zstd_jll"]
git-tree-sha1 = "340e257aada13f95f98ee352d316c3bed37c8ab9"
uuid = "89763e89-9b03-5906-acba-b20f662cd828"
version = "4.3.0+0"

[[Libuuid_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "7f3efec06033682db852f8b3bc3c1d2b0a0ab066"
uuid = "38a345b3-de98-5d2b-a5d3-14cd9215e700"
version = "2.36.0+0"

[[LinearAlgebra]]
deps = ["Libdl"]
uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"

[[LittleCMS_jll]]
deps = ["JpegTurbo_jll", "Libdl", "Libtiff_jll", "Pkg"]
git-tree-sha1 = "e6ea89d915cdad8d264f7f9158c6664f879edcde"
uuid = "d3a379c0-f9a3-5b72-a4c0-6bf4d2e8af0f"
version = "2.9.0+0"

[[LogarithmicNumbers]]
deps = ["Random", "Requires"]
git-tree-sha1 = "d88b70111754e3660f80d3596a343ce42bf5ee84"
uuid = "aa2f6b4e-9042-5d33-9679-40d3a6b85899"
version = "0.4.2"

[[Logging]]
uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"

[[LoweredCodeUtils]]
deps = ["JuliaInterpreter"]
git-tree-sha1 = "4bfb8b57df913f3b28a6bd3bdbebe9a50538e689"
uuid = "6f1432cf-f94c-5a45-995e-cdbf5db27b0b"
version = "2.1.0"

[[MacroTools]]
deps = ["Markdown", "Random"]
git-tree-sha1 = "6a8a2a625ab0dea913aba95c11370589e0239ff0"
uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
version = "0.5.6"

[[Markdown]]
deps = ["Base64"]
uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"

[[MatchCore]]
git-tree-sha1 = "90af9fe333f8c9851f952dfa7f335185c94567c0"
uuid = "5dd3f0b1-72a9-48ad-ae6e-79f673da005f"
version = "0.1.1"

[[MbedTLS]]
deps = ["Dates", "MbedTLS_jll", "Random", "Sockets"]
git-tree-sha1 = "1c38e51c3d08ef2278062ebceade0e46cefc96fe"
uuid = "739be429-bea8-5141-9913-cc70e7f3736d"
version = "1.0.3"

[[MbedTLS_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"

[[Measures]]
git-tree-sha1 = "e498ddeee6f9fdb4551ce855a46f54dbd900245f"
uuid = "442fdcdd-2543-5da2-b0f3-8c86c306513e"
version = "0.3.1"

[[Missings]]
deps = ["DataAPI"]
git-tree-sha1 = "4ea90bd5d3985ae1f9a908bd4500ae88921c5ce7"
uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
version = "1.0.0"

[[Mmap]]
uuid = "a63ad114-7e13-5084-954f-fe012c677804"

[[MozillaCACerts_jll]]
uuid = "14a3606d-f60d-562e-9121-12d972cd8159"

[[NaNMath]]
git-tree-sha1 = "bfe47e760d60b82b66b61d2d44128b62e3a369fb"
uuid = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3"
version = "0.3.5"

[[NetworkOptions]]
uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"

[[NiLang]]
deps = ["FixedPointNumbers", "LinearAlgebra", "LogarithmicNumbers", "MatchCore", "NiLangCore", "Reexport", "SparseArrays", "TupleTools"]
git-tree-sha1 = "3fe439482d8c08a15f929ae7278a6c7f737672d5"
uuid = "ab4ef3a6-0b42-11ea-31f6-e34652774712"
version = "0.9.1"

[[NiLangCore]]
deps = ["MatchCore", "TupleTools"]
git-tree-sha1 = "239f97ea947531cfe7a596746e31c8429c7169b9"
uuid = "575d3204-02a4-11ea-3f62-238caa8bf11e"
version = "0.10.3"

[[Ogg_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "7937eda4681660b4d6aeeecc2f7e1c81c8ee4e2f"
uuid = "e7412a2a-1a6e-54c0-be00-318e2571c051"
version = "1.3.5+0"

[[OpenJpeg_jll]]
deps = ["Libdl", "Libtiff_jll", "LittleCMS_jll", "Pkg", "libpng_jll"]
git-tree-sha1 = "e330ffff1c6a593fa44cc40c29900bee82026406"
uuid = "643b3616-a352-519d-856d-80112ee9badc"
version = "2.3.1+0"

[[OpenSSL_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "15003dcb7d8db3c6c857fda14891a539a8f2705a"
uuid = "458c3c95-2e84-50aa-8efc-19380b2a3a95"
version = "1.1.10+0"

[[Opus_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "51a08fb14ec28da2ec7a927c4337e4332c2a4720"
uuid = "91d4177d-7536-5919-b921-800302f37372"
version = "1.3.2+0"

[[OrderedCollections]]
git-tree-sha1 = "85f8e6578bf1f9ee0d11e7bb1b1456435479d47c"
uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
version = "1.4.1"

[[PCRE_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "b2a7af664e098055a7529ad1a900ded962bca488"
uuid = "2f80f16e-611a-54ab-bc61-aa92de5b98fc"
version = "8.44.0+0"

[[Parsers]]
deps = ["Dates"]
git-tree-sha1 = "c8abc88faa3f7a3950832ac5d6e690881590d6dc"
uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0"
version = "1.1.0"

[[Pixman_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "b4f5d02549a10e20780a24fce72bea96b6329e29"
uuid = "30392449-352a-5448-841d-b1acce4e97dc"
version = "0.40.1+0"

[[Pkg]]
deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"]
uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"

[[PlotThemes]]
deps = ["PlotUtils", "Requires", "Statistics"]
git-tree-sha1 = "a3a964ce9dc7898193536002a6dd892b1b5a6f1d"
uuid = "ccf2f8ad-2431-5c83-bf29-c5338b663b6a"
version = "2.0.1"

[[PlotUtils]]
deps = ["ColorSchemes", "Colors", "Dates", "Printf", "Random", "Reexport", "Statistics"]
git-tree-sha1 = "501c20a63a34ac1d015d5304da0e645f42d91c9f"
uuid = "995b91a9-d308-5afd-9ec6-746e21dbc043"
version = "1.0.11"

[[Plots]]
deps = ["Base64", "Contour", "Dates", "FFMPEG", "FixedPointNumbers", "GR", "GeometryBasics", "JSON", "Latexify", "LinearAlgebra", "Measures", "NaNMath", "PlotThemes", "PlotUtils", "Printf", "REPL", "Random", "RecipesBase", "RecipesPipeline", "Reexport", "Requires", "Scratch", "Showoff", "SparseArrays", "Statistics", "StatsBase", "UUIDs"]
git-tree-sha1 = "9f126950870ef24ce75cdd841f4b7cf34affc6d2"
uuid = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
version = "1.18.0"

[[PlutoUI]]
deps = ["Base64", "Dates", "InteractiveUtils", "JSON", "Logging", "Markdown", "Random", "Reexport", "Suppressor"]
git-tree-sha1 = "44e225d5837e2a2345e69a1d1e01ac2443ff9fcb"
uuid = "7f904dfe-b85e-4ff6-b463-dae2292396a8"
version = "0.7.9"

[[Poppler_jll]]
deps = ["Artifacts", "Cairo_jll", "Fontconfig_jll", "Glib_jll", "JLLWrappers", "JpegTurbo_jll", "Libdl", "Libtiff_jll", "OpenJpeg_jll", "Pkg", "libpng_jll"]
git-tree-sha1 = "e11443687ac151ac6ef6699eb75f964bed8e1faa"
uuid = "9c32591e-4766-534b-9725-b71a8799265b"
version = "0.87.0+2"

[[Preferences]]
deps = ["TOML"]
git-tree-sha1 = "00cfd92944ca9c760982747e9a1d0d5d86ab1e5a"
uuid = "21216c6a-2e73-6563-6e65-726566657250"
version = "1.2.2"

[[Printf]]
deps = ["Unicode"]
uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"

[[Qt5Base_jll]]
deps = ["Artifacts", "CompilerSupportLibraries_jll", "Fontconfig_jll", "Glib_jll", "JLLWrappers", "Libdl", "Libglvnd_jll", "OpenSSL_jll", "Pkg", "Xorg_libXext_jll", "Xorg_libxcb_jll", "Xorg_xcb_util_image_jll", "Xorg_xcb_util_keysyms_jll", "Xorg_xcb_util_renderutil_jll", "Xorg_xcb_util_wm_jll", "Zlib_jll", "xkbcommon_jll"]
git-tree-sha1 = "ad368663a5e20dbb8d6dc2fddeefe4dae0781ae8"
uuid = "ea2cea3b-5b76-57ae-a6ef-0a8af62496e1"
version = "5.15.3+0"

[[REPL]]
deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"]
uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"

[[Random]]
deps = ["Serialization"]
uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"

[[RecipesBase]]
git-tree-sha1 = "b3fb709f3c97bfc6e948be68beeecb55a0b340ae"
uuid = "3cdcf5f2-1ef4-517c-9805-6587b60abb01"
version = "1.1.1"

[[RecipesPipeline]]
deps = ["Dates", "NaNMath", "PlotUtils", "RecipesBase"]
git-tree-sha1 = "2a7a2469ed5d94a98dea0e85c46fa653d76be0cd"
uuid = "01d81517-befc-4cb6-b9ec-a95719d0359c"
version = "0.3.4"

[[Reexport]]
git-tree-sha1 = "5f6c21241f0f655da3952fd60aa18477cf96c220"
uuid = "189a3867-3050-52da-a836-e630ba90ab69"
version = "1.1.0"

[[Requires]]
deps = ["UUIDs"]
git-tree-sha1 = "4036a3bd08ac7e968e27c203d45f5fff15020621"
uuid = "ae029012-a4dd-5104-9daa-d747884805df"
version = "1.1.3"

[[Revise]]
deps = ["CodeTracking", "Distributed", "FileWatching", "JuliaInterpreter", "LibGit2", "LoweredCodeUtils", "OrderedCollections", "Pkg", "REPL", "Requires", "UUIDs", "Unicode"]
git-tree-sha1 = "410bbe13d9a7816e862ed72ac119bda7fb988c08"
uuid = "295af30f-e4ad-537b-8983-00126c2a3abe"
version = "3.1.17"

[[SHA]]
uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"

[[Scratch]]
deps = ["Dates"]
git-tree-sha1 = "0b4b7f1393cff97c33891da2a0bf69c6ed241fda"
uuid = "6c6a2e73-6563-6170-7368-637461726353"
version = "1.1.0"

[[Serialization]]
uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"

[[SharedArrays]]
deps = ["Distributed", "Mmap", "Random", "Serialization"]
uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383"

[[Showoff]]
deps = ["Dates", "Grisu"]
git-tree-sha1 = "91eddf657aca81df9ae6ceb20b959ae5653ad1de"
uuid = "992d4aef-0814-514b-bc4d-f2e9a6c4116f"
version = "1.0.3"

[[Sockets]]
uuid = "6462fe0b-24de-5631-8697-dd941f90decc"

[[SortingAlgorithms]]
deps = ["DataStructures"]
git-tree-sha1 = "2ec1962eba973f383239da22e75218565c390a96"
uuid = "a2af1166-a08f-5f64-846c-94a0d3cef48c"
version = "1.0.0"

[[SparseArrays]]
deps = ["LinearAlgebra", "Random"]
uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"

[[StaticArrays]]
deps = ["LinearAlgebra", "Random", "Statistics"]
git-tree-sha1 = "896d55218776ab8f23fb7b222a5a4a946d4aafc2"
uuid = "90137ffa-7385-5640-81b9-e52037218182"
version = "1.2.5"

[[Statistics]]
deps = ["LinearAlgebra", "SparseArrays"]
uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"

[[StatsAPI]]
git-tree-sha1 = "1958272568dc176a1d881acb797beb909c785510"
uuid = "82ae8749-77ed-4fe6-ae5f-f523153014b0"
version = "1.0.0"

[[StatsBase]]
deps = ["DataAPI", "DataStructures", "LinearAlgebra", "Missings", "Printf", "Random", "SortingAlgorithms", "SparseArrays", "Statistics", "StatsAPI"]
git-tree-sha1 = "2f6792d523d7448bbe2fec99eca9218f06cc746d"
uuid = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
version = "0.33.8"

[[StructArrays]]
deps = ["Adapt", "DataAPI", "StaticArrays", "Tables"]
git-tree-sha1 = "000e168f5cc9aded17b6999a560b7c11dda69095"
uuid = "09ab397b-f2b6-538f-b94a-2f83cf4a842a"
version = "0.6.0"

[[Suppressor]]
git-tree-sha1 = "a819d77f31f83e5792a76081eee1ea6342ab8787"
uuid = "fd094767-a336-5f1f-9728-57cf17d0bbfb"
version = "0.2.0"

[[TOML]]
deps = ["Dates"]
uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76"

[[TableTraits]]
deps = ["IteratorInterfaceExtensions"]
git-tree-sha1 = "c06b2f539df1c6efa794486abfb6ed2022561a39"
uuid = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c"
version = "1.0.1"

[[Tables]]
deps = ["DataAPI", "DataValueInterfaces", "IteratorInterfaceExtensions", "LinearAlgebra", "TableTraits", "Test"]
git-tree-sha1 = "8ed4a3ea724dac32670b062be3ef1c1de6773ae8"
uuid = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
version = "1.4.4"

[[Tar]]
deps = ["ArgTools", "SHA"]
uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"

[[Test]]
deps = ["InteractiveUtils", "Logging", "Random", "Serialization"]
uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[[TikzPictures]]
deps = ["LaTeXStrings", "Poppler_jll", "Requires"]
git-tree-sha1 = "06b36e2baa9b97814ef1993207b71e2e23e9efb5"
uuid = "37f6aa50-8035-52d0-81c2-5a1d08754b2d"
version = "3.3.3"

[[TupleTools]]
git-tree-sha1 = "3c712976c47707ff893cf6ba4354aa14db1d8938"
uuid = "9d95972d-f1c8-5527-a6e0-b4b365fa01f6"
version = "1.3.0"

[[URIs]]
git-tree-sha1 = "97bbe755a53fe859669cd907f2d96aee8d2c1355"
uuid = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4"
version = "1.3.0"

[[UUIDs]]
deps = ["Random", "SHA"]
uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"

[[Unicode]]
uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"

[[Viznet]]
deps = ["Compose", "Dierckx"]
git-tree-sha1 = "7a022ae6ac8b153d47617ed8c196ce60645689f1"
uuid = "52a3aca4-6234-47fd-b74a-806bdf78ede9"
version = "0.3.3"

[[Wayland_jll]]
deps = ["Artifacts", "Expat_jll", "JLLWrappers", "Libdl", "Libffi_jll", "Pkg", "XML2_jll"]
git-tree-sha1 = "3e61f0b86f90dacb0bc0e73a0c5a83f6a8636e23"
uuid = "a2964d1f-97da-50d4-b82a-358c7fce9d89"
version = "1.19.0+0"

[[Wayland_protocols_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Wayland_jll"]
git-tree-sha1 = "2839f1c1296940218e35df0bbb220f2a79686670"
uuid = "2381bf8a-dfd0-557d-9999-79630e7b1b91"
version = "1.18.0+4"

[[XML2_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Libiconv_jll", "Pkg", "Zlib_jll"]
git-tree-sha1 = "1acf5bdf07aa0907e0a37d3718bb88d4b687b74a"
uuid = "02c8fc9c-b97f-50b9-bbe4-9be30ff0a78a"
version = "2.9.12+0"

[[XSLT_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Libgcrypt_jll", "Libgpg_error_jll", "Libiconv_jll", "Pkg", "XML2_jll", "Zlib_jll"]
git-tree-sha1 = "91844873c4085240b95e795f692c4cec4d805f8a"
uuid = "aed1982a-8fda-507f-9586-7b0439959a61"
version = "1.1.34+0"

[[Xorg_libX11_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libxcb_jll", "Xorg_xtrans_jll"]
git-tree-sha1 = "5be649d550f3f4b95308bf0183b82e2582876527"
uuid = "4f6342f7-b3d2-589e-9d20-edeb45f2b2bc"
version = "1.6.9+4"

[[Xorg_libXau_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "4e490d5c960c314f33885790ed410ff3a94ce67e"
uuid = "0c0b7dd1-d40b-584c-a123-a41640f87eec"
version = "1.0.9+4"

[[Xorg_libXcursor_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libXfixes_jll", "Xorg_libXrender_jll"]
git-tree-sha1 = "12e0eb3bc634fa2080c1c37fccf56f7c22989afd"
uuid = "935fb764-8cf2-53bf-bb30-45bb1f8bf724"
version = "1.2.0+4"

[[Xorg_libXdmcp_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "4fe47bd2247248125c428978740e18a681372dd4"
uuid = "a3789734-cfe1-5b06-b2d0-1dd0d9d62d05"
version = "1.1.3+4"

[[Xorg_libXext_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libX11_jll"]
git-tree-sha1 = "b7c0aa8c376b31e4852b360222848637f481f8c3"
uuid = "1082639a-0dae-5f34-9b06-72781eeb8cb3"
version = "1.3.4+4"

[[Xorg_libXfixes_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libX11_jll"]
git-tree-sha1 = "0e0dc7431e7a0587559f9294aeec269471c991a4"
uuid = "d091e8ba-531a-589c-9de9-94069b037ed8"
version = "5.0.3+4"

[[Xorg_libXi_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libXext_jll", "Xorg_libXfixes_jll"]
git-tree-sha1 = "89b52bc2160aadc84d707093930ef0bffa641246"
uuid = "a51aa0fd-4e3c-5386-b890-e753decda492"
version = "1.7.10+4"

[[Xorg_libXinerama_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libXext_jll"]
git-tree-sha1 = "26be8b1c342929259317d8b9f7b53bf2bb73b123"
uuid = "d1454406-59df-5ea1-beac-c340f2130bc3"
version = "1.1.4+4"

[[Xorg_libXrandr_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libXext_jll", "Xorg_libXrender_jll"]
git-tree-sha1 = "34cea83cb726fb58f325887bf0612c6b3fb17631"
uuid = "ec84b674-ba8e-5d96-8ba1-2a689ba10484"
version = "1.5.2+4"

[[Xorg_libXrender_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libX11_jll"]
git-tree-sha1 = "19560f30fd49f4d4efbe7002a1037f8c43d43b96"
uuid = "ea2f1a96-1ddc-540d-b46f-429655e07cfa"
version = "0.9.10+4"

[[Xorg_libpthread_stubs_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "6783737e45d3c59a4a4c4091f5f88cdcf0908cbb"
uuid = "14d82f49-176c-5ed1-bb49-ad3f5cbd8c74"
version = "0.1.0+3"

[[Xorg_libxcb_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "XSLT_jll", "Xorg_libXau_jll", "Xorg_libXdmcp_jll", "Xorg_libpthread_stubs_jll"]
git-tree-sha1 = "daf17f441228e7a3833846cd048892861cff16d6"
uuid = "c7cfdc94-dc32-55de-ac96-5a1b8d977c5b"
version = "1.13.0+3"

[[Xorg_libxkbfile_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libX11_jll"]
git-tree-sha1 = "926af861744212db0eb001d9e40b5d16292080b2"
uuid = "cc61e674-0454-545c-8b26-ed2c68acab7a"
version = "1.1.0+4"

[[Xorg_xcb_util_image_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_xcb_util_jll"]
git-tree-sha1 = "0fab0a40349ba1cba2c1da699243396ff8e94b97"
uuid = "12413925-8142-5f55-bb0e-6d7ca50bb09b"
version = "0.4.0+1"

[[Xorg_xcb_util_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libxcb_jll"]
git-tree-sha1 = "e7fd7b2881fa2eaa72717420894d3938177862d1"
uuid = "2def613f-5ad1-5310-b15b-b15d46f528f5"
version = "0.4.0+1"

[[Xorg_xcb_util_keysyms_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_xcb_util_jll"]
git-tree-sha1 = "d1151e2c45a544f32441a567d1690e701ec89b00"
uuid = "975044d2-76e6-5fbe-bf08-97ce7c6574c7"
version = "0.4.0+1"

[[Xorg_xcb_util_renderutil_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_xcb_util_jll"]
git-tree-sha1 = "dfd7a8f38d4613b6a575253b3174dd991ca6183e"
uuid = "0d47668e-0667-5a69-a72c-f761630bfb7e"
version = "0.3.9+1"

[[Xorg_xcb_util_wm_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_xcb_util_jll"]
git-tree-sha1 = "e78d10aab01a4a154142c5006ed44fd9e8e31b67"
uuid = "c22f9ab0-d5fe-5066-847c-f4bb1cd4e361"
version = "0.4.1+1"

[[Xorg_xkbcomp_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libxkbfile_jll"]
git-tree-sha1 = "4bcbf660f6c2e714f87e960a171b119d06ee163b"
uuid = "35661453-b289-5fab-8a00-3d9160c6a3a4"
version = "1.4.2+4"

[[Xorg_xkeyboard_config_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_xkbcomp_jll"]
git-tree-sha1 = "5c8424f8a67c3f2209646d4425f3d415fee5931d"
uuid = "33bec58e-1273-512f-9401-5d533626f822"
version = "2.27.0+4"

[[Xorg_xtrans_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "79c31e7844f6ecf779705fbc12146eb190b7d845"
uuid = "c5fb5394-a638-5e4d-96e5-b29de1b5cf10"
version = "1.4.0+3"

[[Zlib_jll]]
deps = ["Libdl"]
uuid = "83775a58-1f1d-513f-b197-d71354ab007a"

[[Zstd_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "cc4bf3fdde8b7e3e9fa0351bdeedba1cf3b7f6e6"
uuid = "3161d3a3-bdf6-5164-811a-617609db77b4"
version = "1.5.0+0"

[[libass_jll]]
deps = ["Artifacts", "Bzip2_jll", "FreeType2_jll", "FriBidi_jll", "JLLWrappers", "Libdl", "Pkg", "Zlib_jll"]
git-tree-sha1 = "acc685bcf777b2202a904cdcb49ad34c2fa1880c"
uuid = "0ac62f75-1d6f-5e53-bd7c-93b484bb37c0"
version = "0.14.0+4"

[[libfdk_aac_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "7a5780a0d9c6864184b3a2eeeb833a0c871f00ab"
uuid = "f638f0a6-7fb0-5443-88ba-1cc74229b280"
version = "0.1.6+4"

[[libpng_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Zlib_jll"]
git-tree-sha1 = "94d180a6d2b5e55e447e2d27a29ed04fe79eb30c"
uuid = "b53b4c65-9356-5827-b1ea-8c7a1a84506f"
version = "1.6.38+0"

[[libvorbis_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Ogg_jll", "Pkg"]
git-tree-sha1 = "c45f4e40e7aafe9d086379e5578947ec8b95a8fb"
uuid = "f27f6e37-5d2b-51aa-960f-b287f2bc3b7a"
version = "1.3.7+0"

[[nghttp2_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d"

[[p7zip_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0"

[[x264_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "d713c1ce4deac133e3334ee12f4adff07f81778f"
uuid = "1270edf5-f2f9-52d2-97e9-ab00b5d0237a"
version = "2020.7.14+2"

[[x265_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "487da2f8f2f0c8ee0e83f39d13037d6bbf0a45ab"
uuid = "dfaa095f-4041-5dcd-9319-2fabd8486b76"
version = "3.0.0+3"

[[xkbcommon_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Wayland_jll", "Wayland_protocols_jll", "Xorg_libxcb_jll", "Xorg_xkeyboard_config_jll"]
git-tree-sha1 = "ece2350174195bb31de1a63bea3a41ae1aa593b6"
uuid = "d8fb68d0-12a3-5cfd-a85a-d49703b185fd"
version = "0.9.1+5"
"""

# ╔═╡ Cell order:
# ╟─e20e2d2e-4b28-4e32-8d80-ce029928a094
# ╟─f3e235e7-76b9-4c39-bc70-038539838ff4
# ╟─8308df59-3faa-4abf-8f05-119bbae48f64
# ╟─a3532a83-9fd3-4d24-b1bb-b52457317e51
# ╟─15657e4b-848e-43ad-a99f-37143d11705e
# ╟─f9675365-36aa-430c-b747-3bc4f602e6fb
# ╟─94c5eaa1-432c-4553-829e-f78d97f3c0ca
# ╟─bef6978d-e654-4364-b5eb-e9608cf68464
# ╟─046f7559-4af9-4982-b5c3-335add0911d7
# ╟─0a039bfa-571e-4fad-b73c-1324d08777fc
# ╟─bf7abacc-5b0a-4623-b2c5-af60183ad4b0
# ╟─3f1e4d7a-32a7-4c7e-92dd-465bac925e63
# ╟─95a21058-0b07-4859-af68-8ca5b48b2a77
# ╟─f68bcfb6-97ce-48d1-b0b8-e8466d4ac879
# ╟─2fe7c298-4c5d-464c-980b-6cd9a537ac1e
# ╟─49dab78a-7bd9-4faa-8a30-9af8a96e0c5b
# ╟─3d4ba750-8d62-48ac-bf96-691397689ddc
# ╟─7aa7b0ee-beeb-4a3e-abf1-aa71e916f4cd
# ╟─f4cb9212-181f-4338-b858-1d99c7f415e9
# ╟─c1bbaec8-4fb9-4ab8-a30d-06a286597de0
# ╟─6d7a07ff-be1b-4902-8a6d-7d9257c1157f
# ╟─0577d67f-648f-407c-8abf-507d086445bd
# ╟─eb10e436-bcce-4d81-891e-15158219fe80
# ╟─7c5a30fd-95f9-4bb8-b34f-b10b0f2a27f2
# ╟─abee1bee-ed01-4b05-a848-3aeb695a24ba
# ╟─b05538cc-de01-4b1e-a602-feb780cddf4a
# ╟─42c398ab-bb45-423f-b030-404e7582df5a
# ╟─48081dd4-2bf4-43a1-899c-0303b4fcedd3
# ╟─c6ef8479-639b-45c1-9b48-a5d2c233d3b8
# ╟─6f01cdc2-6ce9-41da-b279-b047c9779405
# ╟─876ad6cf-84c1-4e34-89de-6f9273ba3479
# ╟─9ca8912d-5fc5-4066-adb8-ad02f75c2cbe
# ╟─9aab5751-e9e0-46c0-8e66-4b98258fed08
# ╟─a29af398-ff22-44cb-a5aa-0b0409312be9
# ╟─c3db622f-e9ff-4d99-afb6-9db65c6cae7a
# ╟─12cbf4b7-9b55-423c-bf59-5cb18e167afd
# ╟─89a5ff44-1b04-4bd8-a40a-83382a027fb3
# ╟─51e7b853-8640-4415-a9a4-8c0e06ad916a
# ╟─a8fe838e-727d-4068-887d-17b1bf99f90b
# ╟─c7cd75cb-4c64-4704-b839-c5a556f89be7
# ╟─ec14fba6-0cb9-483f-b3ea-cc4c5e83c965
# ╟─f1abc5c1-2c34-422a-86c4-5ad8e7df8b7e
# ╟─8ad4e7c0-c496-4d29-ac09-e6525b1b4c0f
# ╟─757e2d78-c5ee-4b40-bfd6-1b39af338d9d
# ╟─cbea35c7-c3c8-48e7-bb47-d5e193aee2c4
# ╟─81013954-1c48-4c05-82c9-49b4bfafda95
# ╟─1924eff7-1423-4e90-8005-43113d9deb3d
# ╟─c7edbc15-cd59-45fd-a0dc-c48aadb1c096
# ╟─bf2c9da7-8c45-409f-82a3-979cd63ea993
# ╟─1c32a491-ac85-4132-82fd-9b846a8485df
# ╟─6cbf202f-34e4-42b6-a7a7-5d766bfdfc37
# ╟─d40b318f-bff2-4d0b-b2a6-d00933ac7567
# ╟─54f53a7b-74e8-433b-94fd-9fa7192dfca5
# ╟─b0dcad96-e439-4e09-9e92-8cad7ede79af
# ╟─0a15a2cf-2e7a-4bd7-ac78-0803fc3d5c73
# ╟─ffbc5616-d2d9-4ce4-996f-d1a743bb89b3
# ╟─2602d857-4a21-478a-97a2-58a177666f52
# ╟─cf27e340-578a-440d-8d4a-e5a2277d5205
# ╟─aa53fd68-5acd-488d-a096-5ce39759f481
# ╟─cb9a9ef0-c0dc-487c-8008-0f73f9910ef8
# ╟─f7e0478d-1839-4684-9265-ee990fe9da45
# ╟─751e32d6-2582-4b1d-9558-124b1ef54f81
# ╟─5f18987d-a69e-4db9-96d3-426ed298d9b8
# ╟─66495c77-3bbc-4731-b9e1-db11bbc24283
# ╟─84b867a3-804e-4e7e-a56c-0ffc1f4e6683
# ╟─4642d311-ef0b-4c29-901d-b5398a3ca7b6
# ╟─96c3d50b-8a79-4de0-b7e0-c63c3b769b74
# ╟─066aa825-81e4-404d-bf5a-6a9431969702
# ╟─6e99ed64-a896-450e-8bab-845e0fe971ae
# ╟─7f803113-653a-4dfd-93f0-83babb253b32
# ╟─7eb29d49-05f5-47e9-b4f5-4f31c5cd37ce
# ╟─aefdec07-dcef-4e00-bcb0-4747250cdd9b
# ╟─cbe4abaf-46f9-4726-97ae-cf3c378abaaf
# ╟─4f0c81f5-ce5f-4f73-a528-9feff4a7fc14
# ╠═e81de385-0070-49a9-a889-8fcf9d9e2951
# ╟─8249b820-8fb1-45d4-a95c-9c81e62e8216
# ╟─6503b377-b2d5-48be-90a4-97947afb4e5f
# ╟─53a571dd-cac7-432a-869c-b93a8fe05e17
# ╟─2e5c7f59-dd35-4846-815a-b92eabeee089
# ╟─7b326477-43b6-4a6e-8862-12e8b70e1ad9
# ╟─47cd7560-a29e-4b55-bef5-28daa1cdb834
# ╟─59ab4431-ea4d-4707-9a42-d50eafa40b56
# ╟─3630b412-beeb-455a-a4b8-1e1d50860266
# ╟─ba6347d6-4ad0-403b-824f-dcf290a7c002
# ╟─a2f4975d-eeee-4a2d-97dd-dd0cfd29d665
# ╟─32d411e9-b01d-4ad2-b4aa-2f091034e6c0
# ╟─e5b83421-dd94-43ad-84eb-ca558bff6a2d
# ╟─118642ad-1aad-4f91-8da0-55a417b67750
# ╟─45171ecc-9d34-4ab6-a00b-ec9c9afc33f8
# ╟─6cd60f7d-d7ce-4189-a2dd-e47ce6825741
# ╟─ff3fc929-f448-41be-8f60-65de33dff36a
# ╟─5ec2649e-9988-4f38-896a-64ef6ed91d82
# ╟─aa8475c3-c68b-4200-8634-ace33f525417
# ╟─aa22f905-b69d-405e-b09a-a765d60f6079
# ╟─2dcbaac0-2fad-4292-ad31-8188a60876da
# ╟─6bad4f5f-806f-480a-ae16-2582761ce5e3
# ╟─e200dde3-9033-45b5-bfe0-2d03753b2c11
# ╟─d7a4b342-ef0a-44f9-b88d-bbb04483e8b3
# ╟─908f19a2-6d32-4776-95eb-b249a8155ddc
# ╟─05fc1fae-b378-4c39-b060-74ca635745ec
# ╟─d0573bf9-0fd6-4512-bc13-17aa23a3265b
# ╟─d8998d5f-65b2-4850-aef9-f19ecc192eca
# ╟─6b4c180c-9a12-4e3d-9336-1431e7c5875a
# ╟─0eb66cc9-93c0-4f07-b31b-a9bf9000260e
# ╟─85bf9f92-30f1-4e05-8d07-d8e481f20ccb
# ╟─fbba0a91-9f48-4d91-90e7-f6a7df3227f9
# ╟─7fc81b9f-73ed-4780-9204-ddf39467e58f
# ╟─d08ae188-937f-474b-92d7-cb8eeda063fe
# ╟─7ee8cfc9-26b2-4fe4-8263-1f4d2f7c276d
# ╠═db9a97b1-f76d-4f51-96c6-0159469c5adb
# ╟─1669f5e3-efe1-4b79-a2b6-11ed7476a2a1
# ╠═5000f4c3-5416-4e53-88ae-e30d8d09827e
# ╠═2413c061-89de-403f-8011-e458f5a9859d
# ╟─d4704779-9261-478b-bbf6-551220783e12
# ╠═4be065b5-0841-4d54-b9ab-d6770d4d9d94
# ╠═e850e53d-cf61-4fc7-9cb3-e318ae957f0b
# ╟─a267ea5f-8bd5-4ee0-9c8d-47e2d3b81692
# ╟─c02520a3-3375-4d83-a0dc-1aeac2aa7d5f
# ╟─2e18fc92-4185-493b-9ce8-cca63dad7d2d
# ╟─acc7b185-e4df-4aca-aa42-554215065384
# ╟─00c9e973-7e06-4483-bf4c-be7374707118
# ╟─83ff3fc3-bcd8-4235-a42f-1d75c7d6aa5b
# ╟─b308e270-6b40-4946-ac92-c705823f2c1e
# ╟─e483b3d4-d01c-4a98-8e68-e8120a7d95a7
# ╟─74017e78-0f02-41bb-a160-5f2d26c18268
# ╟─0b3735c2-695c-4225-843e-16ca17aac0eb
# ╟─d7942b37-f821-494a-8f18-5f267aa3457a
# ╟─00000000-0000-0000-0000-000000000001
# ╟─00000000-0000-0000-0000-000000000002


================================================
FILE: notebooks/margolus.jl
================================================
### A Pluto.jl notebook ###
# v0.12.21

using Markdown
using InteractiveUtils

# This Pluto notebook uses @bind for interactivity. When running this notebook outside of Pluto, the following 'mock version' of @bind gives bound variables a default value (instead of an error).
macro bind(def, element)
    quote
        local el = $(esc(element))
        global $(esc(def)) = Core.applicable(Base.get, el) ? Base.get(el) : missing
        el
    end
end

# ╔═╡ 845b7d0a-7ca2-11eb-2683-cd370811bf68
using NiLang

# ╔═╡ 88912bea-7ca2-11eb-1cde-db111d594c20
using Viznet, PlutoUI, Compose

# ╔═╡ f42405a8-7ca2-11eb-133b-df4f1ce9bb19
html"""<div align="center"><a target="_blank" href="https://raw.githubusercontent.com/GiggleLiu/NiLang.jl/master/notebooks/margolus.jl">download this notebook</a></div>"""

# ╔═╡ 66e972c2-7ca2-11eb-0a7a-a9305dd20511
md"# BBMCA - NiLang implementation"

# ╔═╡ c8722cea-7ca5-11eb-3e7d-0f39322ea40a
md"Check [Physics-like models of computation](https://www.sciencedirect.com/science/article/abs/pii/0167278984902525) (Norman Margolus, 1984) for theories about Billiard ball celluar automata (BBMCA)."

# ╔═╡ 845d1458-7ca2-11eb-0e69-11a10b9894a8
@i function load_and_clear!(x, config, i, j)
	m, n ← size(config)
	x ⊻= config[i,j]  # to make it faster, should put `@inbounds` before it
	config[i,j] ⊻= x
	x ⊻= config[i,mod1(j+1, n)] << 1
	config[i,mod1(j+1, n)] ⊻= x >> 1
	x ⊻= config[mod1(i+1, m),j] << 2
	config[mod1(i+1, m),j] ⊻= x >> 2
	x ⊻= config[mod1(i+1, m),mod1(j+1, n)] << 3
	config[mod1(i+1, m),mod1(j+1, n)] ⊻= x >> 3
end

# ╔═╡ 84680136-7ca2-11eb-1df5-ffa0b4b9d126
@i function margolus_rule(y, x)
	# remove reversibility check to make it run faster
    @invcheckoff if x==6
        y ⊻= 9
    elseif x==9
        y ⊻= 6
    elseif x==4
        y ⊻= 2
    elseif x==2
        y ⊻= 4
    elseif x==1
        y ⊻= 8
    elseif x==8
        y ⊻= 1
    else
        y ⊻= x
    end
end

# ╔═╡ 845c1292-7ca2-11eb-3e56-996aa5229b4e
@i function update_bbmca!(config, iseven)
	# computing offsets, and borrow some ancillas from system
	@routine begin
		offset ← 1
		m, n ← size(config)
		if !iseven
			offset += 1
		end
	end
	for j=offset:2:n
		for i=offset:2:m
			x ← 0
			y ← 0
			# load block to `x` and clean up original data
			load_and_clear!(x, config, i, j)
			# compute new config to `y`
			margolus_rule(y, x)
			# clean up `x` with the following observation:
			# applying margolus rule twice restores the configuration
			margolus_rule(x, y)
			# store `y` to block
			(~load_and_clear!)(y, config, i, j)
			# ancillas `x` and `y` are returned to the pool automatically
		end
	end
	# uncompute `offset`
	~@routine
end

# ╔═╡ 34006d60-7ca3-11eb-3c51-c9758394b838
md"# Visualization"

# ╔═╡ 846e21da-7ca2-11eb-05a3-51e22ed04147
function showconfig(ba::AbstractMatrix)
    m, n = size(ba, 1), size(ba, 2)
    lt = Viznet.SquareLattice(n, m)
    brush1 = nodestyle(:square, fill("black"), stroke("#888888"), linewidth(unit(lt)*mm); r=unit(lt)/2.2)
    brush0 = nodestyle(:square, fill("white"), stroke("#888888"), linewidth(unit(lt)*mm); r=unit(lt)/2.2)
    canvas() do
        for i=1:m, j=1:n
            (ba[i, j] == 1 ? brush1 : brush0) >> lt[j,i]
        end
    end
end

# ╔═╡ 846e8170-7ca2-11eb-351e-eb45c629f6a6
@bind btn Clock(0.1)

# ╔═╡ 84730f04-7ca2-11eb-3b13-3fd82a9e2109
# initial configuration
config = let
	x=zeros(Int, 10, 10)
	x[1,1] = 1
	x
end;

# ╔═╡ 84763e9c-7ca2-11eb-356c-c391966cdc98
# parity - Note: BBMCA is a two state CA
bbmca_parity = Ref(true)

# ╔═╡ 847711e6-7ca2-11eb-326a-15f6bfc05347
let
	btn
	# update
	update_bbmca!(config, bbmca_parity[])
	# change parity
	bbmca_parity[] = !(bbmca_parity[])
	# visualize
	Compose.set_default_graphic_size(10cm, 10cm)
	showconfig(config)
end

# ╔═╡ Cell order:
# ╟─f42405a8-7ca2-11eb-133b-df4f1ce9bb19
# ╟─66e972c2-7ca2-11eb-0a7a-a9305dd20511
# ╟─c8722cea-7ca5-11eb-3e7d-0f39322ea40a
# ╠═845b7d0a-7ca2-11eb-2683-cd370811bf68
# ╠═845c1292-7ca2-11eb-3e56-996aa5229b4e
# ╠═845d1458-7ca2-11eb-0e69-11a10b9894a8
# ╠═84680136-7ca2-11eb-1df5-ffa0b4b9d126
# ╟─34006d60-7ca3-11eb-3c51-c9758394b838
# ╠═88912bea-7ca2-11eb-1cde-db111d594c20
# ╠═846e21da-7ca2-11eb-05a3-51e22ed04147
# ╟─846e8170-7ca2-11eb-351e-eb45c629f6a6
# ╠═84730f04-7ca2-11eb-3b13-3fd82a9e2109
# ╠═84763e9c-7ca2-11eb-356c-c391966cdc98
# ╠═847711e6-7ca2-11eb-326a-15f6bfc05347


================================================
FILE: notebooks/reversibleprog.jl
================================================
### A Pluto.jl notebook ###
# v0.15.0

using Markdown
using InteractiveUtils

# This Pluto notebook uses @bind for interactivity. When running this notebook outside of Pluto, the following 'mock version' of @bind gives bound variables a default value (instead of an error).
macro bind(def, element)
    quote
        local el = $(esc(element))
        global $(esc(def)) = Core.applicable(Base.get, el) ? Base.get(el) : missing
        el
    end
end

# ╔═╡ f3e235e7-76b9-4c39-bc70-038539838ff4
begin
	using Revise, Viznet, Compose, PlutoUI, Random, TikzPictures
	function leftright(a, b; width=600, leftcellwidth=0.5)
		HTML("""
<style>
table.nohover tr:hover td {
   background-color: white !important;
}</style>
			
<table width=$(width)px class="nohover" style="border:none">
<tr>
	<td width=$(leftcellwidth*width)>$(html(a))</td>
	<td width=$((1-leftcellwidth)*width)>$(html(b))</td>
</tr></table>
""")
	end
	
	# up down layout
	function updown(a, b; width=nothing)
		HTML("""<table class="nohover" style="border:none" $(width === nothing ? "" : "width=$(width)px")>
<tr>
	<td>$(html(a))</td>
</tr>
<tr>
	<td>$(html(b))</td>
</tr></table>
""")
	end
	
	function highlight(str)
		HTML("""<span style="background-color:yellow">$(str)</span>""")
	end
end;

# ╔═╡ 3b0fd2b5-5c6d-4d56-9e48-cda1493b4c72
using NiLang

# ╔═╡ a7810352-7967-460d-abd7-361a324c20a9
using ForwardDiff: Dual

# ╔═╡ a56445b5-e530-4035-9ac6-a2d196a6276a
using NiLang.AD: GVar

# ╔═╡ c6bd40af-50ed-4cee-8043-60b2bac05058
using NiLang: bennett

# ╔═╡ 873ef2c2-653e-425e-9732-b1ed19f7a0b7
using TreeverseAlgorithm

# ╔═╡ 141e21c0-1bdf-4e6b-b76d-129567a1180f
using LinearAlgebra

# ╔═╡ ac35b26c-0585-4d2a-8fbf-bda9b141d6af
using Test

# ╔═╡ d4726239-81af-4792-8472-c680508449c6
using BenchmarkTools

# ╔═╡ e20e2d2e-4b28-4e32-8d80-ce029928a094
html"""
<script>
document.body.onkeyup = function(e) {
if (e.ctrlKey && e.altKey && e.which == 80) {
    present();
} else if (e.ctrlKey && e.which == 37) {
	var prev_button = document.querySelector(".changeslide.prev");
	prev_button.dispatchEvent(new Event('click'));
} else if (e.ctrlKey && e.which == 39) {
	var prev_button = document.querySelector(".changeslide.next");
	prev_button.dispatchEvent(new Event('click'));
  }
};
document.body.onclick = function(e) {
	if (e.target.tagName == 'BODY'){
		e.preventDefault();
		var prev_button = document.querySelector(".changeslide.next");
		prev_button.dispatchEvent(new Event('click'));
} else if (e.target.tagName == 'PLUTO-SHOULDER'){
	e.preventDefault();
	var prev_button = document.querySelector(".changeslide.prev");
	prev_button.dispatchEvent(new Event('click'));
	}
};
</script>

<style>
mjx-assistive-mml { display: none !important; }
</style>
"""

# ╔═╡ 8308df59-3faa-4abf-8f05-119bbae48f64
let
	github = html"""<a class="Header-link " href="https://github.com/GiggleLiu/NiLang.jl" data-hotkey="g d" aria-label="Homepage " data-ga-click="Header, go to dashboard, icon:logo">
  <img src="https://avatars.githubusercontent.com/u/6257240?v=4" width=25> GiggleLiu
</a>"""
	md"# Pebble games - Time and space to differentiate a program

-- Jinguo Liu ($github)
"
end

# ╔═╡ a3532a83-9fd3-4d24-b1bb-b52457317e51
html"""A postdoc in Mikhail Lukin's group, department of physics <br><br>
<img src="https://1000logos.net/wp-content/uploads/2017/02/Harvard-Logo.png" width=55> Harvard university<br><br>
<img src="https://static1.squarespace.com/static/5dcebcb378a43f6976d84698/t/5dcf099a5f767722ba4b9cdd/1605640592777/?format=1500w" width=80/> Quera Computing
"""

# ╔═╡ 15657e4b-848e-43ad-a99f-37143d11705e
md"# Table of contents
1. An introduction to reversible computing
2. A reversible eDSL NiLang
3. Automatic differentiating a reversible computing language
"

# ╔═╡ 34a6b7f4-7d72-485d-86dc-f4b1ba6174eb
md"## Let's start from physics"

# ╔═╡ 67d1b500-964e-4668-a7d0-ed93886446ca
let
	img1 = html"""
<img src="http://cen.acs.org/content/dam/cen/98/5/WEB/09805-feature3-ski1.jpg" height=210/>
"""
	img2 = html"""
<img src="https://www.ux1.eiu.edu/~cfadd/1360/29MagFlds/Images/Fig29.26.jpg" height=210/>"""
	leftright(updown(img2, html"<div align='center'>electromagnetic force</div>"), updown(img1, html"<div align='center'>friction</div>"))
end

# ╔═╡ 673992ed-6963-400a-a69b-d65d26c4f443
md"Both can be explained by reversible quantum dynamics."

# ╔═╡ 6078758e-b392-4bdb-a1e7-44b135ce900e
md"""
## How come our programming style is irreversible?
"""

# ╔═╡ 41e06e2a-b482-4e0f-8569-fee2ffd8aaaf
function find_maximum(x::AbstractVector)
	@assert !isempty(x)   # error handling
	m = x[1]              # assignment (a)
	for i=2:length(x)
		m = max(m, x[i])  # assignment (b)
	end
	return m              # function return
end

# ╔═╡ dcf53d46-e259-4101-8530-9621094ee586
TikzPicture(L"""
\draw [black, thick,->] (0, 0) -- (1, 0);
\draw [black, thick,->,dashed] (1, 0) .. controls (1.5, 0.5) .. (2, 0);
\node at (1.5, 0.5) {goto $\ldots$};
\draw [black, thick,->] (2, 0) -- (3, 0);
       
\def\x{4};
\draw [black, thick,<-] (\x, 0) -- (\x+1, 0);
\draw [black, thick,<-,dashed] (\x+1, 0) .. controls (\x+1.5, 0.5) .. (\x+2, 0);
\draw [black, thick,<-,dashed] (\x+1, 0) -- (\x+2, 0);
\node at (\x+1.5, 0.5) {comefrom?};
\draw [black, thick,<-] (\x+2, 0) -- (\x+3, 0);
       
\node at (1.5, -0.2) {call};
\node at (\x+1.5, -0.2) {uncall};
""", options="scale=2.0", preamble="")

# ╔═╡ 6a88d26c-c895-4852-ab4f-37297b848731
md"""
* **Information**: the uncertainty, quantified of *information entropy*.
* **Information erasure**: make the system more certain, e.g.
```julia
m = max(m, x[i])
```
quantified by the decrease of information entropy.
"""

# ╔═╡ f9675365-36aa-430c-b747-3bc4f602e6fb
md"## Information erasure requires dissipating heat to the environment!"

# ╔═╡ 46eb4ba9-dce6-4711-9c4d-3f16de6240de
leftright(html"""
<img src="https://images-na.ssl-images-amazon.com/images/I/51o-kZ4x6fL._SX351_BO1,204,203,200_.jpg" width=200/>""", md"Feynman, Richard P.

**Feynman Lectures on Computation**
	
(2018)")

# ╔═╡ 046f7559-4af9-4982-b5c3-335add0911d7
html"""
<div align=center><img src="https://user-images.githubusercontent.com/6257240/122632611-ef6ad480-d0a1-11eb-976c-a3e7c5dfdb9a.png" width=500/></div>
"""

# ╔═╡ 0a039bfa-571e-4fad-b73c-1324d08777fc
html"""
<div align=center><img src="https://user-images.githubusercontent.com/6257240/122682827-b168d000-d1c9-11eb-930c-0ff13a2bf631.png" width=300/></div>
"""

# ╔═╡ 3f1e4d7a-32a7-4c7e-92dd-465bac925e63
md"""
Compress the boxes from size $V$ to size $V/2$, the process is isothermal.
"""

# ╔═╡ f68bcfb6-97ce-48d1-b0b8-e8466d4ac879
md"""
Case 1: we know nothing about the system. The gas does work
```math
\begin{align}
&pV = N k T\\
&W_{\rm gas} = \int_{V}^{V/2} p dV = -NkT\log 2
\end{align}
```
"""

# ╔═╡ 3d4ba750-8d62-48ac-bf96-691397689ddc
md"""
Case 2: we know one bit knowledge about each box:
* 1: the atom is in the right half
* 0: the atom is in the left half

```math
W_{\rm gas} = 0
```
"""

# ╔═╡ f4cb9212-181f-4338-b858-1d99c7f415e9
md"Erasing each bit information comes along with $kT \log 2$ heat dissipation!!"

# ╔═╡ 31bde262-6352-4be0-b5cc-1781e3df2268
md"Later people proved it from the microscopic picture. [Reeb, 2014]"

# ╔═╡ 83ff3fc3-bcd8-4235-a42f-1d75c7d6aa5b
md"## Computing architectures"

# ╔═╡ b308e270-6b40-4946-ac92-c705823f2c1e
let
	txt1 = md"Traditional irreversible computer
	
$E \sim 10^8 kT$"
	img1 = html"""<img src="https://www.computerhope.com/jargon/c/computer-laptop-2in1.jpg" width=120/>"""
	txt2 = md"DNA copying is a living copy machine

$E \sim 100k T$"
	img2 = html"""
<img src="https://s3-us-west-2.amazonaws.com/courses-images/wp-content/uploads/sites/110/2016/06/02172248/DNA_replication_split_horizontal.svg_-1024x508.png" width=300/>
"""
	txt3 = md"""
Adiabatic CMOS [Athas, 1994]

$E \sim 10^6 kT$
"""
	img3 = html"""
<img src="https://user-images.githubusercontent.com/6257240/122668453-287c7500-d186-11eb-962f-cc478be1dafe.png" width=350 style="margin-bottom:25px"/>
"""
	txt4 = md"""Adiabatic superconducting devices [Takeuchi, 2014]

$E \sim kT$
"""
	img4 = html"""
<img src="https://scitechdaily.com/images/Magnet-Levitates-Above-Superconductor.jpg" width=300/>
"""
	updown(leftright(updown(img1, txt1), updown(img2, txt2)), leftright(updown(img3, txt3), updown(img4, txt4)))
end

# ╔═╡ e483b3d4-d01c-4a98-8e68-e8120a7d95a7
md"# Summary
* An isolated system is reversible,
* Our programs are not reversible,
    * Need a heatbath
    * Dissipate heat to heat bath: ``kT \log 2``/bit (Landauer's principle),


![](https://user-images.githubusercontent.com/6257240/123520518-22ebc700-d67f-11eb-8af1-a452605cc1d8.png)

*Youtube*: Michael P. Frank: Fundamental Physics of Reversible Computing — An Introduction, Part 1

*Loophole*: need to take algorithmic overheads into consideration!
"

# ╔═╡ 20c34526-c7c4-11eb-21fa-d706fd684a4c
md"# A short introduction to the reversible programming"

# ╔═╡ 3f96abdf-fb5f-4d79-a288-e20b8c1f55d1
html"""<img src="https://github.com/GiggleLiu/NiLang.jl/raw/master/docs/src/asset/logo3.png"/>"""

# ╔═╡ 5d51231a-8bf0-4414-9a39-cea264df84f2
md"Initially written by Jinguo Liu and Taine Zhao (The author of MLStyle)"

# ╔═╡ e10e0be8-b26e-4719-92dc-8ca46af0b4b5
md"## Feature 1. one function for two"

# ╔═╡ b1a9946b-82b4-4954-8bb9-5df035eaefe4
md"Example: an identity mapping ``(x, y) \mapsto (x,y)`` "

# ╔═╡ 00342a51-36d8-4fdd-aab7-ee02e2122c49
@i function f1(x1, x2)
	# will return inputs automatically for you
end

# ╔═╡ 40c1c48d-0e5a-4a47-b7e4-8f7666281249
f1(2, 3)

# ╔═╡ 59df1f80-9be1-4b26-b263-ca0c7a0b9ab7
(~f1)(2, 3)

# ╔═╡ 8320d326-c1ab-4807-befb-13dda3480bf5
md"## Feature 2. every instruction is reversible, every object is ''mutable''"

# ╔═╡ 93238608-3b86-49f1-ad60-9360e12cff1c
md"General design patterns
* `y += f(x)`
* `y -= f(x)`
* `y ⊻= f(x)`

There are also instructions like `SWAP`, `ROT`."

# ╔═╡ f657c3fb-e140-4c76-8065-54f1cb6d05eb
md"Example: mutating fields of complex numbers"

# ╔═╡ 629a2549-745c-48a2-9bbc-a8f5fb046d11
@i function f2(x1::Complex, x2::Complex)
	x2 += exp(x1)       # accumulative form
	SWAP(x1.im, x2.im)  # other primitive functions
	f1(x1, x2)			# other reversible functions
end

# ╔═╡ 640e0029-7931-4afd-bdf9-fed317efbd8e
md" $(@bind expand_f2 CheckBox()) macroexpand"

# ╔═╡ 6930345b-6e93-4b35-8d4f-91ad49141fa1
if expand_f2
	macroexpand(NiLang, :(@i function f2(x1::Complex, x2::Complex)
		x2 += exp(x1)
		SWAP(x1.im, x2.im)
	end)) |> NiLangCore.rmlines
end

# ╔═╡ 090522bf-0ff2-4022-8460-aec6e37e936a
f2(1.0+2im, 2.0+4.9im)

# ╔═╡ 88c30609-2f42-405e-a14c-dfab44aef23b
(~f2)(f2(1.0+2im, 2.0+4.9im)...)

# ╔═╡ 2dc665a9-b131-4fef-acde-db346eb0f48b
md"## Feature 3. One can reverse the control flows too"

# ╔═╡ 4e479f48-42cd-476d-8604-08ecbb503a90
md"""
#### Reversible `if` statement
"""

# ╔═╡ dc41e99a-f598-4bf6-9f76-ecdb04f5f40c
leftright(md"
```julia
if (precondition[, postcondition])
	...
end
```
", md"
```julia
if (postcondition[, precondition])
	~(...)
end
```
")

# ╔═╡ 97e0bae1-69ac-4cbf-b9d9-6b38180edd78
TikzPicture(L"""
\node [test] (pre) {precondition};
\node [proc, it] (st1) [right=of pre] {statements 1};
\node [proc, it] (st2) {statements 2};	
\node [test] (post1) [right=of st1] {postcondition};
\node [test] (post2) [right=of st2] {postcondition};
\node [proc,red] (err1) [above=of post1] {invertibility error};
\node [proc,red] (err2) [below=of post2] {invertibility error};
\draw [->,black] (pre.east) -- (st1) node[midway,above] {T};
\draw [->,black] (pre.south) |- (st2) node[midway,below] {F};
\draw [->,black] (-2.5, 0.0) -- (pre.west);
\draw [->,black] (st1) -- (post1);
\draw [->,black] (st2) -- (post2);
\draw [->,red] (post1) -- (err1) node[midway,right] {F};
\draw [->,red] (post2) -- (err2) node[midway,right] {T};
\draw [->,black] (post1.east) -- (12, 0) node[midway,above] {T};
\draw [black] (post2.east) -| (11, 0) node[midway,right] {F};
""", options=raw"    font=\sffamily\small,
    >={Triangle[]},
    */.tip={Circle[]},
    start chain=going below,
    node distance=18mm and 40mm,
    every join/.style={norm},
    base/.style={draw, on chain, on grid, align=center, minimum height=4ex, inner color=black!50!gray!10, outer color=black!50!gray!15},
    proc/.style={base, rectangle, text width=8em},
    test/.style={base, diamond, text centered, aspect=2.6,inner sep=-0ex},
    norm/.style={->, draw, black},
    it/.style={font={\sffamily\small\itshape}}", preamble=raw"\usetikzlibrary{shapes.geometric,arrows.meta,chains,positioning,quotes}")

# ╔═╡ 355ba831-6be0-456a-8f94-36acd2365f17
md"Example: obtaining the absolute value ``x \mapsto |x|``"

# ╔═╡ 003c3e68-600e-4688-832b-5e061572b128
@i function abs_incorrect(x)
	if x < 0
		NEG(x)
	end
end

# ╔═╡ 1fb196f9-0f0a-42dc-b094-077cdf18d13d
abs_incorrect(-3)

# ╔═╡ aa9a679e-63bc-4951-b6f4-65316e212bc8
@i function abs_correct(x, sgn)
	if (x < 0, sgn)
		NEG(x)
		sgn ⊻= true
	end
end

# ╔═╡ e6fd3c2d-cadd-40d7-a575-b1e68c45ee13
abs_correct(-3, false)

# ╔═╡ 02b7e1b4-4622-4e68-966e-ff79817557d1
md"#### Reversible `while` statement"

# ╔═╡ 364fd613-0ebd-4b45-a3fd-f9baa8c487e3
leftright(md"
```julia
@from condition1 while condition2
	...
end
```
", md"
```julia
@from !(condition2) while !(condition1)
	~(...)
end
```
")

# ╔═╡ 75d8283a-b331-4648-84a8-489e168e33f9
TikzPicture(L"""
\node [test] (c1) {condition 1};
\node [test] (c2) [right=of c1]  {condition 2};
\node [test] (c3) [right=of c2]  {condition 1};
\node [proc, it] (st1) [above=of c2] {statements};
\node [proc,red] (err1) [below=of c1] {invertibility error};
\node [proc,red] (err2) [right=of c3] {invertibility error};
\draw [->,black] (c2) -- (st1) node[midway,right] {T};
\draw [->,black] (st1) -| (c3);
\draw [->,black] (-2.5, 0.0) -- (c1.west);
\draw [->,black] (c1) -- (c2) node[midway,above] {T};
\draw [->,black] (c3) -- (c2) node[midway,above] {F};
\draw [->,red] (c1) -- (err1) node[midway,right] {F};
\draw [->,red] (c3) -- (err2) node[midway,above] {T};
\draw [->,black] (c2.south) |- (11, -2) node[midway,below] {F};
""", options=raw"    font=\sffamily\small,
    >={Triangle[]},
    */.tip={Circle[]},
    start chain=going below,
    node distance=18mm and 40mm,
    every join/.style={norm},
    base/.style={draw, on chain, on grid, align=center, minimum height=4ex, inner color=black!50!gray!10, outer color=black!50!gray!15},
    proc/.style={base, rectangle, text width=8em},
    test/.style={base, diamond, text centered, aspect=2.6,inner sep=-0ex},
    norm/.style={->, draw, black},
    it/.style={font={\sffamily\small\itshape}}", preamble=raw"\usetikzlibrary{shapes.geometric,arrows.meta,chains,positioning,quotes}")

# ╔═╡ 2207c2fb-4a52-4766-8dd3-03872744aa74
md"example: computing Fibonacci numbers"

# ╔═╡ 288331c3-2dfb-4941-985f-554be409c0ab
@i function fib(y, n)
    @invcheckoff if (n >= 1, ~)
        counter ← 0
        counter += n
        @from counter==n while counter > 1
			counter -= 1
            fib(y, counter)
            counter -= 1
        end
        counter -= n % 2
        counter → 0
    end
    y += 1
end

# ╔═╡ 12a30359-d6ff-4113-bab5-b198e908cf1a
fib(0, 10)

# ╔═╡ 23ea88b4-6b89-462a-92da-0e8bdf5c73b5
(~fib)(89, 10)

# ╔═╡ 4bfdabd6-b7ff-40fb-b567-52910acb5a07
md"""
#### Reversible `for` statement

$(
leftright(md"
```julia
for iter = start:step:stop
	...
end
```
", md"
```julia
for iter = stop:-step:start
	~(...)
end
```
")
)
"""

# ╔═╡ 2603147b-e7a2-4cae-b88f-2cfebe16bacb
md"## Feature 4. storage access should also be reversible"

# ╔═╡ 12d49e2e-cc6e-48d6-b11a-e7c311453bfc
md"""
$(
leftright(updown(md"
```julia
var ← zero(T)
```", md"borrow some memory from system and allocate it to variable var of type T."), updown(md"
```julia
var → zero(T)
```
", md"return the zero cleared variable to system."))
)
"""

# ╔═╡ 10312dc7-e861-4c89-b2fd-672cfe8850bf
md"""
$(
leftright(updown(md"
```julia
dict[key] ← variable
```", md"create a new entry, asserting `key` does not exist"), updown(md"
```julia
dict[key] → variable
```
", md"asserting the value of an existing key, and delete it."))
)
"""

# ╔═╡ 03f58f2a-24b6-4235-9102-71a19b9679ac
md"Example: implementing `y += log(x)` for complex number.

```math
\log(z) = \log(|z|) + i {\rm Arg}(z)
```"

# ╔═╡ 22a5853e-4f9a-4da0-bc03-84a6b0061cfe
@i function clog_v1(y::Complex{T}, squaren::T, n::T, x::Complex{T}) where T
	squaren += x.re^2
	squaren += x.im^2
	n += sqrt(squaren)
    y.re += log(n)
	y.im += atan(x.im, x.re)
end

# ╔═╡ 1ecdc4d2-b3ca-4f5c-a454-0f0bc51b6ec2
@test clog_v1(0.0im, 0.0, 0.0, 3.0im)[1] ≈ log(3.0im)

# ╔═╡ 927ea209-2ccd-48d5-b69e-0a3c735bb496
md"""Bennett, Charles H. "Logical reversibility of computation." (1973)."""

# ╔═╡ 962b204c-8195-4938-944c-b7c4a52e70bd
TikzPicture(L"""
\def\r{0.15};
\foreach \x in {1,...,5}{
	\fill[fill=black] (\x, 0) circle [radius=\r];
	\node[white] at (\x, 0) {$s_{\x}$};
}
\fill[fill=white] (5.5, 0) circle [radius=\r];
\foreach \x in {1,...,4}{
	\draw [black, thick, ->] (\x+\r, \r) .. controls (\x+0.5, 0.3) .. (\x+1-\r, \r);
	\node at (\x+0.5, 0.4) {\x};
	}
\foreach[evaluate={\y=int(8-\x)}] \x in {1,...,3}{
	\draw [red, thick, <-] (\x+\r, -\r) .. controls (\x+0.5, -0.3) .. (\x+1-\r, -\r);
	\node at (\x+0.5, -0.4) {\y};
	}
"""
, options="scale=2.0", preamble="")

# ╔═╡ af58a0f8-e3fd-465f-b1ae-6fbd94123c91
@i function clog_v2(y::Complex{T}, x::Complex{T}) where T
	######### compute ########
	n ← zero(T)
	squaren ← zero(T)
	squaren += x.re^2
	squaren += x.im^2
	n += sqrt(squaren)

	########## copy ##########
	
    y.re += log(n)
	y.im += atan(x.im, x.re)
	
	####### uncompute ########
	n -= sqrt(squaren)
	squaren -= x.im^2
	squaren -= x.re^2
	n → zero(T)
	squaren → zero(T)
end

# ╔═╡ 4f993061-c6c3-4acb-aef3-8453e7b83997
@test clog_v2(0.0im, 3.0im)[1] ≈ log(3.0im)

# ╔═╡ 6e5cf9bb-7cab-4da1-8831-541e0ee3bde8
@i @inline function clog_v3(y::Complex{T}, x::Complex{T}) where T
	# @invcheckoff turns of reversibility check and accelerate code
    @routine @invcheckoff begin
        @zeros T squaren n
		squaren += x.re^2
		squaren += x.im^2
		n += sqrt(squaren)
    end
    y.re += log(n)
    y.im += atan(x.im, x.re)
    ~@routine
end

# ╔═╡ 320e4114-f0da-4106-90ab-a9f7b0ef0099
@test clog_v3(0.0im, 3.0im)[1] ≈ log(3.0im)

# ╔═╡ 2d944e8d-e19d-48be-ab7f-c3e54e9f43ef
md"# III. Automatic differentiation in NiLang"

# ╔═╡ 4f53fa8e-ea9b-461f-8199-7bbe2a3ef544
md"## Scalar or tensor"

# ╔═╡ 56ea4f5f-ea88-46d6-beed-b9a7afed315d
md"Differentiating matrix vector multiplication"

# ╔═╡ 124a9ecd-0bda-4823-9507-92efcf449d9c
md"""
```math
y = A x
```
"""

# ╔═╡ 6819498c-7a46-48fa-9eec-38341dca72f9
let
	tl = md"tensor level view"
	tl2 = md"
```julia
y = A * x
```
"
	sl = md"scalar level view"
	sl2 = md"
```julia
for j=1:n
	for i=1:m
		y[i] += A[i,j] * x[j]
	end
end
```"
	leftright(updown(tl, tl2, width=300), updown(sl, sl2, width=300))
end

# ╔═╡ 400f79cf-9260-4195-9582-0e8c486ddb5a
html"""implementing AD on scalars
<ul>
<li style="color:green">limited primitive function</li>
<li style="color:red">hard to utilize BLAS</li>
<li style="color:red">harder to manage memory caching</li>
</ul>
"""

# ╔═╡ 2dad3acd-332c-46b9-8f84-21c076bdef41
md"## Forward mode autodiff and reverse mode autodiff"

# ╔═╡ a674bde5-70e1-4d21-aedf-8977f8039c36
md"
A program: ``\vec p \mapsto \vec q``, containing the following forward/backward instruction.

```math
\begin{cases}
\vec y = f(\vec x) \\
\vec x = f^{-1}(\vec y)
\end{cases}
```
"

# ╔═╡ bf696784-23d4-42b2-8ee1-bfec11ff8d78
let
	fd = md"""
```math
\begin{align}
    \frac{d \vec x}{d p_i} = \underbrace{\frac{d \vec y}{d \vec x}}_{\text{local jacobian}}\frac{d \vec x}{d p_i}
\end{align}
```


ForwardDiff: ``(\vec x, \frac{d\vec x}{dp_i}) \mapsto (y, \frac{d\vec y}{dp_i})``
"""

	bd = md"""
```math
\begin{align*}
    \frac{d q_j}{d \vec x} &\mathrel{+}= \frac{\partial q_j}{\partial \vec y}\underbrace{\frac{d \vec y}{d \vec x}}_{\text{local jacobian}}
\end{align*}
```
NiLang: ``(\vec y, \frac{d\mathcal{L}}{d\vec y}) \mapsto (\vec x, \frac{d\mathcal{L}}{d\vec x})``
"""
	leftright(fd, bd)
end

# ╔═╡ 9193fcbb-ec4f-41b4-8fca-be8e183dea31
g_forwarddiff = sin(Dual(π/3, 1.0))

# ╔═╡ 3fadf1d4-8fa2-4c02-aa21-b7969b465536
# note: y += sin(x) is translate to `PlusEq(sin)(y, x)` in NiLang.
g_nilang = MinusEq(sin)(GVar(sin(π/3), 1.0), GVar(π/3, 0.0))

# ╔═╡ 12e933ca-13f3-414c-926a-f1bb1bbe66cf
@test g_forwarddiff.partials[1] ≈ g_nilang[2].g

# ╔═╡ 4403c183-5eeb-4fd0-87a4-4ad29e1f4dc2
md"## Differentiating complex valued log"

# ╔═╡ 4c3f9b91-4f27-4fb8-a9db-1ddbfd62dbdd
@i function real_of_clog(loss::Real, y::Complex, x::Complex)
	clog_v3(y, x)
	loss += y.re
end

# ╔═╡ af71e2d7-a600-46fd-9a46-1b9d4607f06d
@test let
	# forward pass to compute results
	loss_out, y_out, x_out = real_of_clog(0.0, 0.0im, 2+3.0im)

	# backward pass to compute inputs from results, using element type `GVar`
	gloss_out = GVar(loss_out, 1.0)
	gy_out = GVar(y_out)
	gx_out = GVar(x_out)
	gloss_out, gy_out, gx_out = (~real_of_clog)(gloss_out, gy_out, gx_out)

	# forward diff
	dloss_out = Dual(loss_out, 0.0)
	dy_out = Complex(Dual(0.0, 0.0), Dual(0.0, 0.0))
	dx_out = Complex(Dual(2.0, 1.0), Dual(3.0, 0.0))
	dloss_out, dy_out, dx_out = real_of_clog(dloss_out, dy_out, dx_out)
	
	gx_out.re.g ≈ dloss_out.partials[1]
end

# ╔═╡ 1d3c7324-6828-47aa-b30e-bcac0e052213
md"A shortcut"

# ╔═╡ 2993fe1f-042b-4c85-85b8-bcc7ed449a54
NiLang.AD.gradient(real_of_clog, (0.0, 0.0im, 2+3.0im); iloss=1)

# ╔═╡ 048d482e-3e5b-496f-95d7-17589a5f6f11
md"# Overheads matters!"

# ╔═╡ e22bbe66-56f5-40be-b464-1f8651e6a6ac
md"""
* Case 1: Intrinsically irreversible linear program,
* Case 2: A linear algebra function: QR decomposition
"""

# ╔═╡ 28767d73-47a8-4f4b-b3dd-146c4ae3e038
md"## Case 1: differentiating a linear program"

# ╔═╡ ea907d51-d4a9-48ca-90a5-bd91309ccfad
md"Imagine we have a very long linear program that intrinsically irreversible"

# ╔═╡ 3aa99be5-6747-4163-9bb6-ed8cd5ce19f6
TikzPicture(L"""
\def\r{0.15};
\def\n{10};
\foreach \x in {\n}{
       \fill[fill=black] (\x, 0) circle [radius=\r];
       \node[white] at (\x, 0) {$s_{\x}$};
}
\foreach \x in {1,...,9}{
       \draw (\x, 0) circle [radius=\r];
       \node[black] at (\x, 0) {$s_{\x}$};
}
\fill[fill=white] (\n+0.5, 0) circle [radius=\r];
\foreach \x/\t in {1/1,2/2,3/3,4/4,5/5,6/6,7/7,8/8,9/9}{
       \draw [black, thick, ->] (\x+\r, \r) .. controls (\x+0.5, 0.3) .. (\x+1-\r, \r);
       \node[black] at (\x+0.5, 0.4) {\t};
       }
"""
, options="scale=2.0", preamble="")


# ╔═╡ 2f0263f5-2ace-4d4b-8d71-cee26c03122e
md"The accumulative version"

# ╔═╡ 03a21468-c2fe-4df7-ae8a-38b28a0efe2f
TikzPicture(L"""
\def\r{0.15};
\def\n{10};
\foreach \x in {1,...,\n}{
       \fill[fill=black] (\x, 0) circle [radius=\r];
       \node[white] at (\x, 0) {$s_{\x}$};
}
\fill[fill=white] (\n+0.5, 0) circle [radius=\r];
\foreach \x/\t in {1/1,2/2,3/3,4/4,5/5,6/6,7/7,8/8,9/9}{
       \draw [black, thick, ->] (\x+\r, \r) .. controls (\x+0.5, 0.3) .. (\x+1-\r, \r);
       \node[black] at (\x+0.5, 0.4) {\t};
       }
"""
, options="scale=2.0", preamble="")


# ╔═╡ f326d8e5-7117-4eed-b30d-0f64e5974426
md"With uncomputing"

# ╔═╡ b9af3db6-5725-4190-b96c-f3fa41f07c93
TikzPicture(L"""
\def\r{0.15};
\def\n{10};
\foreach \x in {1,4,7,10}{
       \fill[fill=black] (\x, 0) circle [radius=\r];
       \node[white] at (\x, 0) {$s_{\x}$};
}
\foreach \x in {2,3,5,6,8,9}{
       \draw (\x, 0) circle [radius=\r];
       \node[black] at (\x, 0) {$s_{\x}$};
}
\fill[fill=white] (\n+0.5, 0) circle [radius=\r];
\foreach \x/\t in {1/1,2/2,3/3,4/6,5/7,6/8,7/11,8/12,9/13}{
       \draw [black, thick, ->] (\x+\r, \r) .. controls (\x+0.5, 0.3) .. (\x+1-\r, \r);
       \node[black] at (\x+0.5, 0.4) {\t};
       }
\foreach \x/\t in {1/5,2/4,4/10,5/9,7/15,8/14}{
       \draw [black, thick, <-] (\x+\r, -\r) .. controls (\x+0.5, -0.3) .. (\x+1-\r, -\r);
       \node[black] at (\x+0.5, -0.4) {\t};
}
"""
, options="scale=2.0", preamble="")

# ╔═╡ 47cf7e85-c49f-4618-9689-e0de789625f6
md"With uncomputing: the coarser grain"

# ╔═╡ 2fcf48fd-bedc-41d9-a433-68efd5dd0d20
TikzPicture(L"""
\def\r{0.15};
\def\n{10};
\foreach \x in {1,4,7,10}{
       \fill[fill=black] (\x, 0) circle [radius=\r];
       \node[white] at (\x, 0) {$s_{\x}$};
}

\fill[fill=white] (\n+0.5, 0) circle [radius=\r];
\foreach \x in {1,4,7}{
       \draw [black, thick, ->] (\x+\r, \r) .. controls (\x+1.5, 0.6) .. (\x+3-\r, \r);
       }
\foreach \x in {1,4}{
       \draw [black, thick, <-] (\x+\r, -\r) .. controls (\x+1.5, -0.6) .. (\x+3-\r, -\r);
}
"""
, options="scale=2.0", preamble="")

# ╔═╡ 5060f5bb-8430-42aa-b61d-c88249edb323
md"## Pebble game"

# ╔═╡ 55dd53ed-6420-4426-95ae-feb47bf50f22
md"The optimal time-space tradeoff corresponds to the optimal solution to the pebble game."

# ╔═╡ c62a9f94-457f-496b-bee0-bb0db02aca5d
TikzPicture(L"""
\def\y{0}
\node at (4, \y-1) {initial configuration};
\foreach \x in {0,...,16}{
	\draw (0.5*\x-0.25, 0.5*\y-0.25) rectangle (0.5*\x+0.25, 0.5*\y+0.25);
	\ifnum \x > 0
		\node at (0.5*\x, 0.5*\y) {\x};
	\fi
}
\fill (0, 0)  ellipse (0.2 and 0.15);
\def\dx{11}
\foreach \a/\b in {-0.1/0.3, 0.2/0.5, -0.5/0.4, 0.1/0.24, 0.6/0.1, -0.3/-0.3}
	\fill (\dx+\a, \y+\b)  ellipse (0.2 and 0.15);
\node at (11, -1) {free pool of pebbles};
\node at (13, -1) {};
\node (goal) at (9, \y+1) {goal};
\draw[<-,thick] (8, \y+0.3) .. controls (8.2, \y+0.7) .. (goal);
""", options="scale=1.0")

# ╔═╡ bccadcb5-6d9f-4a70-b7c4-74e0e3d5f8c8
TikzPicture(L"""
\def\y{0}
\node at (4, \y-1) {put rule (if and only if the previous grid is occupied)};
\foreach \x in {0,...,16}
	\draw (0.5*\x-0.25, 0.5*\y-0.25) rectangle (0.5*\x+0.25, 0.5*\y+0.25);
\fill (0, 0)  ellipse (0.2 and 0.15);
\fill (2, 0)  ellipse (0.2 and 0.15);
\draw[dashed] (2.5, 0)  ellipse (0.2 and 0.15);
\def\dx{11}
\foreach \a/\b in {-0.1/0.3, 0.2/0.5, -0.5/0.4, 0.1/0.24, 0.6/0.1, -0.3/-0.3}
	\fill (\dx+\a, \y+\b)  ellipse (0.2 and 0.15);
\node at (13, -1) {};
\draw[<-,thick] (2.5, \y+0.3) .. controls (2.8, \y+1) and (8.0, \y+1) .. (10, 0.5);
""", options="scale=1.0")

# ╔═╡ b308ecb0-070e-4ad8-8009-dc60e75bbe01
TikzPicture(L"""
\def\y{0}
\node at (4, \y-1) {remove rule (if and only if the previous grid is occupied)};
\foreach \x in {0,...,16}
	\draw (0.5*\x-0.25, 0.5*\y-0.25) rectangle (0.5*\x+0.25, 0.5*\y+0.25);
\fill (0, 0)  ellipse (0.2 and 0.15);
\fill (2, 0)  ellipse (0.2 and 0.15);
\fill (2.5, 0)  ellipse (0.2 and 0.15);
\def\dx{11}
\foreach \a/\b in {-0.1/0.3, 0.2/0.5, -0.5/0.4, 0.1/0.24, 0.6/0.1, -0.3/-0.3}
	\fill (\dx+\a, \y+\b)  ellipse (0.2 and 0.15);
\node at (13, -1) {};
\draw[->,thick] (2.5, \y+0.3) .. controls (2.8, \y+1) and (8.0, \y+1) .. (10, 0.5);
""", options="scale=1.0")

# ╔═╡ 9123e669-19c7-47c1-a924-0c618b4a9c1f
md"
Space complexity: ``O(\log(T)S)``

Time complexity: ``O(T^{1+\epsilon})``
"

# ╔═╡ 83d0c5fc-9cb7-4e37-bfdb-ed630b94d9b4
md"
The recursive Bennett's time-space tradeoff scheme is probably optimal for a reversible program. (Li 1997)
"

# ╔═╡ 3d2feca5-43d3-4a46-ba1c-849c5ceeb676
md"nstep = $(@bind nstep Slider(2:20000; show_value=true, default=10000))"

# ╔═╡ 7ca616d1-caed-40cf-b768-b07af81a654d
md"k = $(@bind bennett_k Slider(2:100; show_value=true, default=2))"

# ╔═╡ a54d5fc4-643c-47d2-be97-4626a060c9b4
md"Optimal checkpointing is recursive, Griewank (1992). 
Julia Implementation: [https://github.com/GiggleLiu/TreeverseAlgorithm.jl](https://github.com/GiggleLiu/TreeverseAlgorithm.jl)"

# ╔═╡ 8b556fbe-275f-4e7b-94a0-7434ce81ad8b
md"Time complexity is ``O(T\log(T))``"

# ╔═╡ 8e9c55f6-8175-4cb4-8798-91107c4d16ee
md"Space complexity is ``O(S\log(T))``"

# ╔═╡ 972d889c-c48c-470a-b710-aba9ecaacdaa
md"nstep = $(@bind treeverse_nstep Slider(2:20000; show_value=true, default=10000))"

# ╔═╡ 54b3a283-7d42-431f-9ecb-48f37198409a
md"number of checkpoints = $(@bind treeverse_δ Slider(2:100; show_value=true, default=2))"

# ╔═╡ 590f56f7-5654-4493-9103-02a0fce6e945
let
	logger = TreeverseLog()
	treeverse(identity, (x,gy)->0, 0; δ=treeverse_δ, N=treeverse_nstep, logger=logger)
	logger
end

# ╔═╡ 11964529-8e88-4743-a725-57fc5c525649
html"""
<img src="https://user-images.githubusercontent.com/6257240/122655250-07346e00-d11f-11eb-9620-983ef16019a3.png" width=600/>
"""

# ╔═╡ 45448141-214d-4e08-978e-5d1d25f763cd
md"## Case 2: differentiating linear algebra functions"

# ╔═╡ dfe3cc09-6e27-4166-9a4e-2dd22f1e08a2
md"The definition of QR factorization

```math
A = QR
```
"

# ╔═╡ 3d07e6d1-2964-4718-b7bc-114d82389aa4
md"We implement Householder QR"

# ╔═╡ c00cd648-d685-4a17-9ddf-39c06dd5f066
md"""
$Q = H_1H_2 \ldots H_n$
"""

# ╔═╡ 5390c3ba-1507-4db9-9972-8295cbe493bc
md"""
```math
\begin{align}
&H = 1-\beta vv^T
\end{align}
```
"""

# ╔═╡ 33a0bda1-c943-4792-bc3d-fbf1adf16d0a
let
	img = TikzPicture(L"""
\draw[->,thick] (0, 0) -- (1, 1);
\draw[->,thick] (0, 0) -- ({sqrt(2)}, 0);
\draw[thick,dashed] (0, 0) -- (1.5, {1.5*tan(22.5)});
\node at (1.5, 0) {$e$};
\node at (1.1, 1.1) {$x$};
\node at (1.4, {1.6*tan(22.5)}) {$v$};
\node at (2.6, 0.5) {$v = x-\|x\|_2 e$};
""", options="scale=2.0", preamble="")
	HTML("""<div align=center>$(html(img))</div>""")
end

# ╔═╡ a2e81e79-3f85-4eef-8639-f455f9165a25
md"Apply reflector, step $(@bind house_step NumberField(0:4))"

# ╔═╡ 0a04c470-8f5a-4b56-b2e2-5b32e3b944f3
let
	num(i, j) = let
		sym = j>=i || house_step < j ? raw"\times" : raw"0"
		if i>=house_step && j>=house_step
			sym = "\\color{red}{$sym}"
		end
		sym
	end
	elements = join([join([num(i,j) for j=1:5], " & ") for i=1:5], raw"\\\\")
	diag(i) = if i == house_step
		raw"\boldsymbol{\times}"
	else
		raw"\times"
	end
	Markdown.parse("""
```math
\\begin{align}
$(join(["H_$i" for i in house_step:-1:1], "")) A = \\left(\\begin{matrix}
$elements\\end{matrix}\\right)
\\end{align}
```
""")
end

# ╔═╡ dced36eb-3b84-4d08-8268-0ffb831e39b5
md"the following code is adapted from the Julia standard library"

# ╔═╡ 0dc268fa-2b71-4915-b73b-3f1de9fbc157
struct Reflector{T,RT,VT<:AbstractVector{T}}
    ξ::T
    normu::RT
    sqnormu::RT
    r::T
    y::VT   # reflector vector
end

# ╔═╡ 871f62a3-2c9c-445e-b1dc-4cba363fa604
# compute "Householder" reflector
@i function reflector!(R::Reflector{T,RT}, x::AbstractVector{T}) where {T,RT}
    @inbounds @invcheckoff if length(x) != 0
        R.ξ += x[1]
        R.sqnormu += abs2(R.ξ)
        for i = 2:length(x)
            R.sqnormu += abs2(x[i])
        end
        if !iszero(R.sqnormu)
            R.normu += sqrt(R.sqnormu)
            if real(R.ξ) < 0
                NEG(R.normu)
            end
            R.ξ += R.normu
            R.y[1] -= R.normu
            for i = 2:length(x)
                R.y[i] += x[i] / R.ξ
            end
            R.r += R.ξ/R.normu
        end
    end
end

# ╔═╡ 6a50f7ac-b5a5-444d-9042-81b82cb66aec
# apply reflector from left
@i function reflectorApply!(vA::AbstractVector{T}, x::AbstractVector, τ::Number, A::StridedMatrix{T}) where T
    (m, n) ← size(A)
    @safe if length(x) != m || length(vA) != n
        throw(DimensionMismatch("reflector has length ($(length(x)), $(length(vA))), which must match the first dimension of matrix A, ($m, $n)"))
    end
    @inbounds @invcheckoff if m != 0
        for j = 1:n
            # dot
            @routine @zeros T vAj vAj_τ
            vAj += A[1, j]
            for i = 2:m
                vAj += x[i]'*A[i, j]
            end
            @routine vAj_τ += τ' * vAj  # `vAj_τ` can be uncomputed easily
            # ger
            A[1, j] -= vAj_τ
            for i = 2:m
                A[i, j] -= x[i]*vAj_τ
            end
            ~@routine
            SWAP(vA[j], vAj)
            ~@routine
        end
    end
    (m, n) → size(A)
end

# ╔═╡ bace7924-3aff-463f-9351-cc59191b469a
struct QRPivotedRes{T,RT,VT}
    factors::Matrix{T}                       # resulting matrix
    τ::Vector{T}
    jpvt::Vector{Int}                        # pivot vector
    reflectors::Vector{Reflector{T,RT,VT}}   # ~ half size of A (overhead)
    vAs::Vector{Vector{T}}         			 # ~ half size of A (overhead)
    jms::Vector{Int}
end

# ╔═╡ b12bcfbb-c7f2-4dc9-821a-e59365bf6fa4
begin
	_norm(v) = sqrt(sum(x->abs2(NiLang.value(x)), v))
	function indmaxcolumn(A::AbstractMatrix)
		mm = _norm(view(A, :, 1))
		ii = 1
		for i = 2:size(A, 2)
			mi = _norm(view(A, :, i))
			if abs(mi) > mm
				mm = mi
				ii = i
			end
		end
		return ii
	end
end;

# ╔═╡ 12d52dc8-da0b-46dc-9251-eb0cc9a39a7e
begin
	function alloc_qr(A::AbstractMatrix{T}) where T
		(m, n) = size(A)
		τ = zeros(T, min(m,n))
		jpvt = collect(1:n)
		reflectors = Reflector{T,real(T),Vector{T}}[]
		vAs = Vector{T}[]
		jms = Int[]
		QRPivotedRes(zero(A), τ, jpvt, reflectors, vAs, jms)
	end
	function alloc_reflector(x::AbstractVector{T}) where T
		RT = real(T)
		Reflector(zero(T), zero(RT), zero(RT), zero(T), zero(x))
	end
end

# ╔═╡ 923a5e2a-cc91-4404-913a-6e8012fa5834
@i function qr_pivoted!(res::QRPivotedRes, A::StridedMatrix{T}) where T
    m, n ← size(A)
    res.factors += A
    @inbounds @invcheckoff for j = 1:min(m,n)
        # Find column with maximum norm in trailing submatrix
        jm ← indmaxcolumn(view(res.factors, j:m, j:n)) + j - 1

		# pivot columns
        if jm != j
            # Flip elements in pivoting vector
            SWAP(res.jpvt[jm], res.jpvt[j])
            # Update matrix
			SWAP.(res.factors |> subarray(:, jm), res.factors |> subarray(:, j))
        end

        # Compute reflector of columns j
        R ← alloc_reflector(res.factors |> subarray(j:m, j))
        vA ← zeros(T, n-j)
        reflector!(R, res.factors |> subarray(j:m, j))
        # Update trailing submatrix with reflector
        reflectorApply!(vA, R.y, R.r, res.factors |> subarray(j:m, j+1:n))
        for i=1:length(R.y)
            SWAP(R.y[i], res.factors[j+i-1, j])
        end
		res.reflectors[end+1] ↔ R  # stack push
		res.vAs[end+1] ↔ vA
		res.jms[end+1] ↔ jm
    end
    @inbounds for i=1:length(res.reflectors)
        res.τ[i] += res.reflectors[i].r
    end
    m, n → size(A)
end

# ╔═╡ f8616bf4-02e6-4d66-a0f6-d35db701e82c
@testset "qr" begin
    for A in [randn(5, 5), randn(6, 4), randn(4, 6)]
        res = alloc_qr(A)
        res, = qr_pivoted!(res, copy(A))
        res2 = LinearAlgebra.qrfactPivotedUnblocked!(copy(A))
        @test res.factors ≈ res2.factors
        @test res.τ ≈ res2.τ
        @test res.jpvt ≈ res2.jpvt
    end
end

# ╔═╡ bee4ab06-4c60-4bec-89d0-b3c9a512f7a6
md"## Comparing with manual AD

"

# ╔═╡ 717f51fb-bd0a-4bb4-a5fc-1f1cf5d56ed8
html"""
<ul>
<li style="color:red">slower,</li>
<li style="color:red">an extra space overhead of the size of input matrix,</li>
<li style="color:green">stabler, e.g. can handle rank deficient matrices</li>
<li style="color:green">works consistently for complex numbers.</li>
</ul>
"""

# ╔═╡ db6b6481-7e67-4418-b907-13b38c77bac7
md"## Performance"

# ╔═╡ a0be4807-52ed-4626-8009-97a79e36e2f1
let  # Note: approximately 2x slower than the BLAS version
	Random.seed!(3)
	A = randn(ComplexF64, 200, 200)
	@benchmark LinearAlgebra.qrfactPivotedUnblocked!(copy($A))
end

# ╔═╡ 52dec342-d3de-4a88-8acb-0aa186bcc086
let
	Random.seed!(3)
	A = randn(ComplexF64, 200, 200)
	@benchmark qr_pivoted!(alloc_qr($A), copy($A))
end

# ╔═╡ 280c9363-9b49-44f1-a240-b7f205ffc56b
@benchmark let
	res = alloc_qr(A)
	res, A = qr_pivoted!(res, A)
	(~qr_pivoted!)(GVar(res), GVar(A))
end setup=(Random.seed!(3); A = randn(ComplexF64, 200, 200))

# ╔═╡ 0b3735c2-695c-4225-843e-16ca17aac0eb
md"""## Take home message

1. Comparing to irreversible computing, reversible computing is more **energy efficient**.
2. Reversible programming suffers from **polynomial time overhead and logarithmic space overhead** when differentiating a irreversible linear program. The overhead is much less when writting linear algebra functions.
3. Reversible embedded domain specific language NiLang.jl:
$(html"<div align=center><img  src='https://github.com/GiggleLiu/NiLang.jl/raw/master/docs/src/asset/logo3.png' width=300/></div>")
2. It is easy to balance time and space when differentiating a program in a reversible programming language.
4. It is not always more reliable to differentiate a program by deriving the backward rule manually.

5. TODOs
    - Is it possible to port `LoopVectorization` to NiLang to write blas level reversible program?
6. **How to find this notebook?** In NiLang's Github repo, file: `notebooks/reversibleprog.jl`
"""

# ╔═╡ d7942b37-f821-494a-8f18-5f267aa3457a
md"""
###  References
* Reeb, David, and Michael M. Wolf. "An improved Landauer principle with finite-size corrections." (2014).
* Athas, William C., and L. J. Svensson. "Reversible logic issues in adiabatic CMOS." Proceedings Workshop on Physics and Computation. (1994).
* Takeuchi, N., Y. Yamanashi, and N. Yoshikawa. "Reversible logic gate using adiabatic superconducting devices." (2014)
* Griewank, Andreas. "Achieving logarithmic growth of temporal and spatial complexity in reverse automatic differentiation." Optimization Methods and software 1.1 (1992): 35-54.
* Ming Li, John Tromp, Paul Vitanyi. "Reversible Simulation of Irreversible Computation by Pebble Games" (1997)
"""

# ╔═╡ 865f049f-54d0-4d21-860e-062262edcb58
md"# Removed"

# ╔═╡ c3b730a4-d5b4-471e-bd06-30ace6e8b8fe
let
	nodes_list = [[0], [0,1], [0,1,2], [0,2], [0,2,3], [0,2,3,4], [0,2,4], [0,1,2,4], [0,1,4], [0,4], [0,4,5], [0,4,5,6], [0,4,6], [0,4,6,7], [0,4,6,7,8], [0,4,6,8], [0,4,5,6,8], [0,4,5,8], [0,4,8],  [0,1,4,8], [0,1,2,4,8], [0,2,4,8], [0,2,3,4,8], [0,2,3,8], [0,2,8], [0,1,2,8], [0,1,8], [0,8],[0,8,9], [0,8,9,10], [0,8,10], [0,8,10,11], [0,8,10,11,12], [0,8,10,12], [0,8,9,10,12], [0,8,9,12], [0,8,12], [0,8,12,13], [0,8,12,13,14], [0,8,12,14], [0,8,12,14,15], [0,8,12,14,15,16]]
	s = join([raw"""
	\def\y{"""*string(j-1)*raw"""}
	\node at (-1, -0.7*\y) {step \y};
	\foreach \x in {0,...,16}{
		\draw (0.5*\x-0.25, -0.7*\y-0.25) rectangle (0.5*\x+0.25, -0.7*\y+0.25);
	}
	\foreach \x in {"""*join(nodes, ",")*raw"""}{
		\fill (0.5*\x, -0.7*\y)  ellipse (0.2 and 0.15);
	}
""" for (j, nodes) in enumerate(nodes_list)], "\n")
	TikzPicture(LaTeXString(s), options="scale=1.0")
end

# ╔═╡ 98f42f60-7870-4813-b0c1-728285c25f01
md"## Finding maximum, the reversible programming implementation"

# ╔═╡ c3c63865-f538-4d93-bef3-6b9c69cb177f
md"The naive implementation with linear space overhead"

# ╔═╡ 66225c05-165e-4051-bb5e-4cfba579fd5b
@i function i_find_maximum(m, y::AbstractVector, x::AbstractVector) where T
	@safe @assert !isempty(x) && length(y) == length(x)   # error handling
	y[1] += x[1]
	for i=2:length(x)
		if x[i] > y[i-1]
			y[i] += x[i]
		else
			y[i] += y[i-1]
		end
	end
	m += y[end]
end

# ╔═╡ 14291e8d-001f-4e26-b094-addb970cf530
x = randn(17)

# ╔═╡ bf670e51-06f8-4094-949c-ca2d02fd0d01
find_maximum(x)

# ╔═╡ 416e53e4-6123-430f-b433-4334b7e85298
i_find_maximum(0.0, zero(x), x)

# ╔═╡ edd5f8df-9abd-4254-abf6-33ae31d88a8d
struct FindMaxState{T}
	m::T
	step::Int
end

# ╔═╡ c0ff9103-2aa8-45fd-bea5-1903c53196de
let
	x, y = FindMaxState(5.0, 1), FindMaxState(2.0, 3)
	@instr x += y
	x, y
end

# ╔═╡ e17e70cb-2d82-4a08-9f6e-e50dcaa325e3
@i function i_find_maximum_step(t, s, x)
	t.step += s.step + 1
	if x[t.step] > s.m
		t.m += x[t.step]  # everything is mutable in NiLang
	else
		t.m += s.m
	end
end

# ╔═╡ ddc662d7-6901-4885-9d2c-1876a2c9d2ff
let
	Random.seed!(3)
	x = randn(nstep)
	logger = NiLang.BennettLog()
	output = NiLang.bennett(i_find_maximum_step, FindMaxState(0.0, 0), FindMaxState(x[1], 1), x; k=bennett_k, N=length(x)-1, logger=logger)[2]
	Text("output = $(output.m)\n\n$logger")
end

# ╔═╡ d2001eb2-45cb-4f07-aa99-dd84996359b7
md"## The connection to automatic differentiation"

# ╔═╡ fa3b6d6a-a55d-4097-8ad2-7dafb5f01d8c
md"""
* put rule: Only if there exists a pebble in grid $i$, you can move a pebble from your own pool to the grid $i+1$,
* take rule: you can take a pebble from the board any time,
* doodle rule: you can doodle grid $i$ only it when this grid has a pebble in it and grid $i+1$ is doodled,
* end rule: doodle all grids.
"""

# ╔═╡ dabb4656-4aed-4168-8659-c0472528c41d
md"""
## Optimal time
"""

# ╔═╡ 0367be06-4185-4add-a04b-f696c5a43638
TikzPicture(L"""
\foreach[evaluate={\j=int(16-\y)}] \y in {0,...,16}{
	\node at (-1, 0.7*\y) {step \j};
	\foreach \x in {0,...,16}{
		\draw (0.5*\x-0.25, 0.7*\y-0.25) rectangle (0.5*\x+0.25, 0.7*\y+0.25);
	}
	\foreach \x in {0,...,\j}{
		\fill (0.5*\x, 0.7*\y)  ellipse (0.2 and 0.15);
	}
}
""", options="scale=1.0")

# ╔═╡ c79e7651-975b-407c-8c8c-d0c5653ec570
md"
Space complexity: ``O(TS)``

Time complexity: ``O(T)``
"

# ╔═╡ 3fed55d7-dbed-4fc9-8410-2633d5200db6
md"""
## Limited space
"""

# ╔═╡ 663180d2-8fe1-4996-a194-d38120ae05fd
md"The recursive Bennett's time-space tradeoff"

# ╔═╡ 39884e8a-bc83-4ff4-85bc-cfaccb4674f2
html"""
<img src="https://user-images.githubusercontent.com/6257240/123340553-5cef8880-d51a-11eb-98f4-d402fa6b6532.png" width=500/>
"""

# ╔═╡ c24e7391-a187-4a7e-aab7-93027f7db965
md"The space optimal solution for 16 grids requires recursive Bennett's algorithm"

# ╔═╡ 12734842-d66f-4bfc-a9ad-01adeb2450e0
# stepfunc: step function
# state: state dictionary, initial value should contain entry `state[base]`
# k: compute `k` chunks forward and `k-1` chunks backward
# base: starting point
# len: number of steps to compute
@i function bennett_alg!(stepfunc, state::Dict{Int,T}, k::Int, base, len, args...; kwargs...) where T
    if len == 1  		# lowest level
        state[base+1] ← _zero(state[base])
        stepfunc(state[base+1], state[base], args...; kwargs...)
    else
		@safe @assert len % k == 0
        @routine begin  # compute block size
            chunksize ← 0
			start ← 0
			chunksize += len ÷ k
			start += base
			for j=1:k-1
				bennett_alg!(stepfunc, state, k, start, chunksize, args...; kwargs...)
				start += chunksize
			end
		end
		bennett_alg!(stepfunc, state, k, start, chunksize, args...; kwargs...)
        ~@routine
    end
end

# ╔═╡ 357fd442-6e6e-4b14-b2d0-efd3fa775d0b
bennett_alg!(i_find_maximum_step, Dict(0=>FindMaxState(x[1], 1)), 2, 0, length(x)-1, x)[2]

# ╔═╡ 1bca53ef-3438-4db5-9900-9fee71936a62
@i function loss(result, state, x)
	nstep ← length(x)-1
	bennett_alg!((@skip! i_find_maximum_step), state, 2, 0, nstep, x)
	result += state[nstep].m
	nstep → length(x)-1
end

# ╔═╡ 4e273d1f-a00e-49ca-9f8d-a0f6930550fb
let
	@testset "qr pivoted gradient" begin
		Random.seed!(3)
		A = randn(ComplexF64, 5, 5)
		res = alloc_qr(A)
		res, = qr_pivoted!(res, copy(A))
		res2 = LinearAlgebra.qrfactPivotedUnblocked!(copy(A))
		@test res.factors ≈ res2.factors
		@test res.τ ≈ res2.τ
		@test res.jpvt ≈ res2.jpvt

		# rank deficient initial matrix
		n = 50
		U = LinearAlgebra.qr(randn(n, n)).Q
		Σ = Diagonal((x=randn(n); x[n÷2+1:end] .= 0; x))
		A = U*Σ*U'
		res = alloc_qr(A)
		@test rank(A) == n ÷ 2
		qrres = qr_pivoted!(deepcopy(res), copy(A))[1]
		@test count(x->(x>1e-12), sum(abs2, QRPivoted(qrres.factors, qrres.τ, qrres.jpvt).R, dims=2)) == n ÷ 2

		#A = randn(ComplexF64, n, n)
		@i function loss(y, qrres, A)
			qr_pivoted!(qrres, A)
			y += abs(qrres.factors[1])
		end
		nrloss(A) = loss(0.0, deepcopy(res), A)[1]
		ngA = zero(A)
		δ = 1e-5
		for j=1:size(A, 2)
			for i=1:size(A, 1)
				A_ = copy(A)
				A_[i,j] -= δ/2
				l1 = nrloss(copy(A_))
				A_[i,j] += δ
				l2 = nrloss(A_)
				ngA[i,j] = (l2-l1)/δ
			end
		end
		gA = NiLang.AD.gradient(loss, (0.0, res, A); iloss=1)[3]
		@test real.(gA) ≈ ngA
	end
end

# ╔═╡ 50f7070d-9f6f-4025-9be3-13812c3000eb
let
	x = [1.0, 3.0, 2.0, 1.3, -1.0]
	NiLang.gradient(loss, (0.0, Dict(0=>FindMaxState(x[1], 1)), x); iloss=1)
end

# ╔═╡ 00000000-0000-0000-0000-000000000001
PLUTO_PROJECT_TOML_CONTENTS = """
[deps]
BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
Compose = "a81c6b42-2e10-5240-aca2-a61377ecd94b"
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
NiLang = "ab4ef3a6-0b42-11ea-31f6-e34652774712"
PlutoUI = "7f904dfe-b85e-4ff6-b463-dae2292396a8"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Revise = "295af30f-e4ad-537b-8983-00126c2a3abe"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
TikzPictures = "37f6aa50-8035-52d0-81c2-5a1d08754b2d"
TreeverseAlgorithm = "e1c63c57-2fea-45bf-a8bf-df3ea6afb545"
Viznet = "52a3aca4-6234-47fd-b74a-806bdf78ede9"

[compat]
BenchmarkTools = "~1.0.0"
Compose = "~0.9.2"
ForwardDiff = "~0.10.18"
NiLang = "~0.9.1"
PlutoUI = "~0.7.9"
Revise = "~3.1.17"
TikzPictures = "~3.3.3"
TreeverseAlgorithm = "~0.1.0"
Viznet = "~0.3.3"
"""

# ╔═╡ 00000000-0000-0000-0000-000000000002
PLUTO_MANIFEST_TOML_CONTENTS = """
# This file is machine-generated - editing it directly is not advised

[[ArgTools]]
uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"

[[Artifacts]]
uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"

[[Base64]]
uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"

[[BenchmarkTools]]
deps = ["JSON", "Logging", "Printf", "Statistics", "UUIDs"]
git-tree-sha1 = "01ca3823217f474243cc2c8e6e1d1f45956fe872"
uuid = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
version = "1.0.0"

[[Bzip2_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "19a35467a82e236ff51bc17a3a44b69ef35185a2"
uuid = "6e34b625-4abd-537c-b88f-471c36dfa7a0"
version = "1.0.8+0"

[[Cairo_jll]]
deps = ["Artifacts", "Bzip2_jll", "Fontconfig_jll", "FreeType2_jll", "Glib_jll", "JLLWrappers", "LZO_jll", "Libdl", "Pixman_jll", "Pkg", "Xorg_libXext_jll", "Xorg_libXrender_jll", "Zlib_jll", "libpng_jll"]
git-tree-sha1 = "f2202b55d816427cd385a9a4f3ffb226bee80f99"
uuid = "83423d85-b0ee-5818-9007-b63ccbeb887a"
version = "1.16.1+0"

[[ChainRulesCore]]
deps = ["Compat", "LinearAlgebra", "SparseArrays"]
git-tree-sha1 = "be770c08881f7bb928dfd86d1ba83798f76cf62a"
uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
version = "0.10.9"

[[CodeTracking]]
deps = ["InteractiveUtils", "UUIDs"]
git-tree-sha1 = "8ad457cfeb0bca98732c97958ef81000a543e73e"
uuid = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"
version = "1.0.5"

[[ColorTypes]]
deps = ["FixedPointNumbers", "Random"]
git-tree-sha1 = "024fe24d83e4a5bf5fc80501a314ce0d1aa35597"
uuid = "3da002f7-5984-5a60-b8a6-cbb66c0b333f"
version = "0.11.0"

[[Colors]]
deps = ["ColorTypes", "FixedPointNumbers", "Reexport"]
git-tree-sha1 = "417b0ed7b8b838aa6ca0a87aadf1bb9eb111ce40"
uuid = "5ae59095-9a9b-59fe-a467-6f913c188581"
version = "0.12.8"

[[CommonSubexpressions]]
deps = ["MacroTools", "Test"]
git-tree-sha1 = "7b8a93dba8af7e3b42fecabf646260105ac373f7"
uuid = "bbf7d656-a473-5ed7-a52c-81e309532950"
version = "0.3.0"

[[Compat]]
deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "SHA", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"]
git-tree-sha1 = "dc7dedc2c2aa9faf59a55c622760a25cbefbe941"
uuid = "34da2185-b29b-5c13-b0c7-acf172513d20"
version = "3.31.0"

[[CompilerSupportLibraries_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"

[[Compose]]
deps = ["Base64", "Colors", "DataStructures", "Dates", "IterTools", "JSON", "LinearAlgebra", "Measures", "Printf", "Random", "Requires", "Statistics", "UUIDs"]
git-tree-sha1 = "c6461fc7c35a4bb8d00905df7adafcff1fe3a6bc"
uuid = "a81c6b42-2e10-5240-aca2-a61377ecd94b"
version = "0.9.2"

[[DataStructures]]
deps = ["Compat", "InteractiveUtils", "OrderedCollections"]
git-tree-sha1 = "4437b64df1e0adccc3e5d1adbc3ac741095e4677"
uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
version = "0.18.9"

[[Dates]]
deps = ["Printf"]
uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"

[[DelimitedFiles]]
deps = ["Mmap"]
uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab"

[[Dierckx]]
deps = ["Dierckx_jll"]
git-tree-sha1 = "5fefbe52e9a6e55b8f87cb89352d469bd3a3a090"
uuid = "39dd38d3-220a-591b-8e3c-4c3a8c710a94"
version = "0.5.1"

[[Dierckx_jll]]
deps = ["CompilerSupportLibraries_jll", "Libdl", "Pkg"]
git-tree-sha1 = "a580560f526f6fc6973e8bad2b036514a4e3b013"
uuid = "cd4c43a9-7502-52ba-aa6d-59fb2a88580b"
version = "0.0.1+0"

[[DiffResults]]
deps = ["StaticArrays"]
git-tree-sha1 = "c18e98cba888c6c25d1c3b048e4b3380ca956805"
uuid = "163ba53b-c6d8-5494-b064-1a9d43ac40c5"
version = "1.0.3"

[[DiffRules]]
deps = ["NaNMath", "Random", "SpecialFunctions"]
git-tree-sha1 = "214c3fcac57755cfda163d91c58893a8723f93e9"
uuid = "b552c78f-8df3-52c6-915a-8e097449b14b"
version = "1.0.2"

[[Distributed]]
deps = ["Random", "Serialization", "Sockets"]
uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"

[[DocStringExtensions]]
deps = ["LibGit2"]
git-tree-sha1 = "a32185f5428d3986f47c2ab78b1f216d5e6cc96f"
uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
version = "0.8.5"

[[Downloads]]
deps = ["ArgTools", "LibCURL", "NetworkOptions"]
uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6"

[[Expat_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "b3bfd02e98aedfa5cf885665493c5598c350cd2f"
uuid = "2e619515-83b5-522b-bb60-26c02a35a201"
version = "2.2.10+0"

[[FileWatching]]
uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee"

[[FixedPointNumbers]]
deps = ["Statistics"]
git-tree-sha1 = "335bfdceacc84c5cdf16aadc768aa5ddfc5383cc"
uuid = "53c48c17-4a7d-5ca2-90c5-79b7896eea93"
version = "0.8.4"

[[Fontconfig_jll]]
deps = ["Artifacts", "Bzip2_jll", "Expat_jll", "FreeType2_jll", "JLLWrappers", "Libdl", "Libuuid_jll", "Pkg", "Zlib_jll"]
git-tree-sha1 = "21efd19106a55620a188615da6d3d06cd7f6ee03"
uuid = "a3f928ae-7b40-5064-980b-68af3947d34b"
version = "2.13.93+0"

[[ForwardDiff]]
deps = ["CommonSubexpressions", "DiffResults", "DiffRules", "LinearAlgebra", "NaNMath", "Printf", "Random", "SpecialFunctions", "StaticArrays"]
git-tree-sha1 = "e2af66012e08966366a43251e1fd421522908be6"
uuid = "f6369f11-7733-5829-9624-2563aa707210"
version = "0.10.18"

[[FreeType2_jll]]
deps = ["Artifacts", "Bzip2_jll", "JLLWrappers", "Libdl", "Pkg", "Zlib_jll"]
git-tree-sha1 = "87eb71354d8ec1a96d4a7636bd57a7347dde3ef9"
uuid = "d7e528f0-a631-5988-bf34-fe36492bcfd7"
version = "2.10.4+0"

[[Gettext_jll]]
deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Libiconv_jll", "Pkg", "XML2_jll"]
git-tree-sha1 = "9b02998aba7bf074d14de89f9d37ca24a1a0b046"
uuid = "78b55507-aeef-58d4-861c-77aaff3498b1"
version = "0.21.0+0"

[[Glib_jll]]
deps = ["Artifacts", "Gettext_jll", "JLLWrappers", "Libdl", "Libffi_jll", "Libiconv_jll", "Libmount_jll", "PCRE_jll", "Pkg", "Zlib_jll"]
git-tree-sha1 = "47ce50b742921377301e15005c96e979574e130b"
uuid = "7746bdde-850d-59dc-9ae8-88ece973131d"
version = "2.68.1+0"

[[InteractiveUtils]]
deps = ["Markdown"]
uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"

[[IterTools]]
git-tree-sha1 = "05110a2ab1fc5f932622ffea2a003221f4782c18"
uuid = "c8e1da08-722c-5040-9ed9-7db0dc04731e"
version = "1.3.0"

[[JLLWrappers]]
deps = ["Preferences"]
git-tree-sha1 = "642a199af8b68253517b80bd3bfd17eb4e84df6e"
uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210"
version = "1.3.0"

[[JSON]]
deps = ["Dates", "Mmap", "Parsers", "Unicode"]
git-tree-sha1 = "81690084b6198a2e1da36fcfda16eeca9f9f24e4"
uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
version = "0.21.1"

[[JpegTurbo_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "d735490ac75c5cb9f1b00d8b5509c11984dc6943"
uuid = "aacddb02-875f-59d6-b918-886e6ef4fbf8"
version = "2.1.0+0"

[[JuliaInterpreter]]
deps = ["CodeTracking", "InteractiveUtils", "Random", "UUIDs"]
git-tree-sha1 = "31c2eee64c1eee6e8e3f30d5a03d4b5b7086ab29"
uuid = "aa1ae85d-cabe-5617-a682-6adf51b2e16a"
version = "0.8.18"

[[LZO_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "e5b909bcf985c5e2605737d2ce278ed791b89be6"
uuid = "dd4b983a-f0e5-5f8d-a1b7-129d4a5fb1ac"
version = "2.10.1+0"

[[LaTeXStrings]]
git-tree-sha1 = "c7f1c695e06c01b95a67f0cd1d34994f3e7db104"
uuid = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f"
version = "1.2.1"

[[LibCURL]]
deps = ["LibCURL_jll", "MozillaCACerts_jll"]
uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21"

[[LibCURL_jll]]
deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"]
uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0"

[[LibGit2]]
deps = ["Base64", "NetworkOptions", "Printf", "SHA"]
uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"

[[LibSSH2_jll]]
deps = ["Artifacts", "Libdl", "MbedTLS_jll"]
uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8"

[[Libdl]]
uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"

[[Libffi_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "761a393aeccd6aa92ec3515e428c26bf99575b3b"
uuid = "e9f186c6-92d2-5b65-8a66-fee21dc1b490"
version = "3.2.2+0"

[[Libgcrypt_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Libgpg_error_jll", "Pkg"]
git-tree-sha1 = "64613c82a59c120435c067c2b809fc61cf5166ae"
uuid = "d4300ac3-e22c-5743-9152-c294e39db1e4"
version = "1.8.7+0"

[[Libgpg_error_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "c333716e46366857753e273ce6a69ee0945a6db9"
uuid = "7add5ba3-2f88-524e-9cd5-f83b8a55f7b8"
version = "1.42.0+0"

[[Libiconv_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "42b62845d70a619f063a7da093d995ec8e15e778"
uuid = "94ce4f54-9a6c-5748-9c1c-f9c7231a4531"
version = "1.16.1+1"

[[Libmount_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "9c30530bf0effd46e15e0fdcf2b8636e78cbbd73"
uuid = "4b2f31a3-9ecc-558c-b454-b3730dcb73e9"
version = "2.35.0+0"

[[Libtiff_jll]]
deps = ["Artifacts", "JLLWrappers", "JpegTurbo_jll", "Libdl", "Pkg", "Zlib_jll", "Zstd_jll"]
git-tree-sha1 = "340e257aada13f95f98ee352d316c3bed37c8ab9"
uuid = "89763e89-9b03-5906-acba-b20f662cd828"
version = "4.3.0+0"

[[Libuuid_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "7f3efec06033682db852f8b3bc3c1d2b0a0ab066"
uuid = "38a345b3-de98-5d2b-a5d3-14cd9215e700"
version = "2.36.0+0"

[[LinearAlgebra]]
deps = ["Libdl"]
uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"

[[LittleCMS_jll]]
deps = ["JpegTurbo_jll", "Libdl", "Libtiff_jll", "Pkg"]
git-tree-sha1 = "e6ea89d915cdad8d264f7f9158c6664f879edcde"
uuid = "d3a379c0-f9a3-5b72-a4c0-6bf4d2e8af0f"
version = "2.9.0+0"

[[LogExpFunctions]]
deps = ["DocStringExtensions", "LinearAlgebra"]
git-tree-sha1 = "1ba664552f1ef15325e68dc4c05c3ef8c2d5d885"
uuid = "2ab3a3ac-af41-5b50-aa03-7779005ae688"
version = "0.2.4"

[[LogarithmicNumbers]]
deps = ["Random", "Requires"]
git-tree-sha1 = "d88b70111754e3660f80d3596a343ce42bf5ee84"
uuid = "aa2f6b4e-9042-5d33-9679-40d3a6b85899"
version = "0.4.2"

[[Logging]]
uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"

[[LoweredCodeUtils]]
deps = ["JuliaInterpreter"]
git-tree-sha1 = "4bfb8b57df913f3b28a6bd3bdbebe9a50538e689"
uuid = "6f1432cf-f94c-5a45-995e-cdbf5db27b0b"
version = "2.1.0"

[[MacroTools]]
deps = ["Markdown", "Random"]
git-tree-sha1 = "6a8a2a625ab0dea913aba95c11370589e0239ff0"
uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
version = "0.5.6"

[[Markdown]]
deps = ["Base64"]
uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"

[[MatchCore]]
git-tree-sha1 = "90af9fe333f8c9851f952dfa7f335185c94567c0"
uuid = "5dd3f0b1-72a9-48ad-ae6e-79f673da005f"
version = "0.1.1"

[[MbedTLS_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"

[[Measures]]
git-tree-sha1 = "e498ddeee6f9fdb4551ce855a46f54dbd900245f"
uuid = "442fdcdd-2543-5da2-b0f3-8c86c306513e"
version = "0.3.1"

[[Mmap]]
uuid = "a63ad114-7e13-5084-954f-fe012c677804"

[[MozillaCACerts_jll]]
uuid = "14a3606d-f60d-562e-9121-12d972cd8159"

[[NaNMath]]
git-tree-sha1 = "bfe47e760d60b82b66b61d2d44128b62e3a369fb"
uuid = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3"
version = "0.3.5"

[[NetworkOptions]]
uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"

[[NiLang]]
deps = ["FixedPointNumbers", "LinearAlgebra", "LogarithmicNumbers", "MatchCore", "NiLangCore", "Reexport", "SparseArrays", "TupleTools"]
git-tree-sha1 = "3fe439482d8c08a15f929ae7278a6c7f737672d5"
uuid = "ab4ef3a6-0b42-11ea-31f6-e34652774712"
version = "0.9.1"

[[NiLangCore]]
deps = ["MatchCore", "TupleTools"]
git-tree-sha1 = "239f97ea947531cfe7a596746e31c8429c7169b9"
uuid = "575d3204-02a4-11ea-3f62-238caa8bf11e"
version = "0.10.3"

[[OpenJpeg_jll]]
deps = ["Libdl", "Libtiff_jll", "LittleCMS_jll", "Pkg", "libpng_jll"]
git-tree-sha1 = "e330ffff1c6a593fa44cc40c29900bee82026406"
uuid = "643b3616-a352-519d-856d-80112ee9badc"
version = "2.3.1+0"

[[OpenSpecFun_jll]]
deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "13652491f6856acfd2db29360e1bbcd4565d04f1"
uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e"
version = "0.5.5+0"

[[OrderedCollections]]
git-tree-sha1 = "85f8e6578bf1f9ee0d11e7bb1b1456435479d47c"
uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
version = "1.4.1"

[[PCRE_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "b2a7af664e098055a7529ad1a900ded962bca488"
uuid = "2f80f16e-611a-54ab-bc61-aa92de5b98fc"
version = "8.44.0+0"

[[Parsers]]
deps = ["Dates"]
git-tree-sha1 = "c8abc88faa3f7a3950832ac5d6e690881590d6dc"
uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0"
version = "1.1.0"

[[Pixman_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "b4f5d02549a10e20780a24fce72bea96b6329e29"
uuid = "30392449-352a-5448-841d-b1acce4e97dc"
version = "0.40.1+0"

[[Pkg]]
deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"]
uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"

[[PlutoUI]]
deps = ["Base64", "Dates", "InteractiveUtils", "JSON", "Logging", "Markdown", "Random", "Reexport", "Suppressor"]
git-tree-sha1 = "44e225d5837e2a2345e69a1d1e01ac2443ff9fcb"
uuid = "7f904dfe-b85e-4ff6-b463-dae2292396a8"
version = "0.7.9"

[[Poppler_jll]]
deps = ["Artifacts", "Cairo_jll", "Fontconfig_jll", "Glib_jll", "JLLWrappers", "JpegTurbo_jll", "Libdl", "Libtiff_jll", "OpenJpeg_jll", "Pkg", "libpng_jll"]
git-tree-sha1 = "e11443687ac151ac6ef6699eb75f964bed8e1faa"
uuid = "9c32591e-4766-534b-9725-b71a8799265b"
version = "0.87.0+2"

[[Preferences]]
deps = ["TOML"]
git-tree-sha1 = "00cfd92944ca9c760982747e9a1d0d5d86ab1e5a"
uuid = "21216c6a-2e73-6563-6e65-726566657250"
version = "1.2.2"

[[Printf]]
deps = ["Unicode"]
uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"

[[REPL]]
deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"]
uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"

[[Random]]
deps = ["Serialization"]
uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"

[[Reexport]]
git-tree-sha1 = "5f6c21241f0f655da3952fd60aa18477cf96c220"
uuid = "189a3867-3050-52da-a836-e630ba90ab69"
version = "1.1.0"

[[Requires]]
deps = ["UUIDs"]
git-tree-sha1 = "4036a3bd08ac7e968e27c203d45f5fff15020621"
uuid = "ae029012-a4dd-5104-9daa-d747884805df"
version = "1.1.3"

[[Revise]]
deps = ["CodeTracking", "Distributed", "FileWatching", "JuliaInterpreter", "LibGit2", "LoweredCodeUtils", "OrderedCollections", "Pkg", "REPL", "Requires", "UUIDs", "Unicode"]
git-tree-sha1 = "410bbe13d9a7816e862ed72ac119bda7fb988c08"
uuid = "295af30f-e4ad-537b-8983-00126c2a3abe"
version = "3.1.17"

[[SHA]]
uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"

[[Serialization]]
uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"

[[SharedArrays]]
deps = ["Distributed", "Mmap", "Random", "Serialization"]
uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383"

[[Sockets]]
uuid = "6462fe0b-24de-5631-8697-dd941f90decc"

[[SparseArrays]]
deps = ["LinearAlgebra", "Random"]
uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"

[[SpecialFunctions]]
deps = ["ChainRulesCore", "LogExpFunctions", "OpenSpecFun_jll"]
git-tree-sha1 = "a50550fa3164a8c46747e62063b4d774ac1bcf49"
uuid = "276daf66-3868-5448-9aa4-cd146d93841b"
version = "1.5.1"

[[StaticArrays]]
deps = ["LinearAlgebra", "Random", "Statistics"]
git-tree-sha1 = "745914ebcd610da69f3cb6bf76cb7bb83dcb8c9a"
uuid = "90137ffa-7385-5640-81b9-e52037218182"
version = "1.2.4"

[[Statistics]]
deps = ["LinearAlgebra", "SparseArrays"]
uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"

[[Suppressor]]
git-tree-sha1 = "a819d77f31f83e5792a76081eee1ea6342ab8787"
uuid = "fd094767-a336-5f1f-9728-57cf17d0bbfb"
version = "0.2.0"

[[TOML]]
deps = ["Dates"]
uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76"

[[Tar]]
deps = ["ArgTools", "SHA"]
uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"

[[Test]]
deps = ["InteractiveUtils", "Logging", "Random", "Serialization"]
uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[[TikzPictures]]
deps = ["LaTeXStrings", "Poppler_jll", "Requires"]
git-tree-sha1 = "06b36e2baa9b97814ef1993207b71e2e23e9efb5"
uuid = "37f6aa50-8035-52d0-81c2-5a1d08754b2d"
version = "3.3.3"

[[TreeverseAlgorithm]]
deps = ["Requires"]
git-tree-sha1 = "4292bc608573c2047fd12b0a611787e77f5595ba"
uuid = "e1c63c57-2fea-45bf-a8bf-df3ea6afb545"
version = "0.1.0"

[[TupleTools]]
git-tree-sha1 = "62a7a6cd5a608ff71cecfdb612e67a0897836069"
uuid = "9d95972d-f1c8-5527-a6e0-b4b365fa01f6"
version = "1.2.0"

[[UUIDs]]
deps = ["Random", "SHA"]
uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"

[[Unicode]]
uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"

[[Viznet]]
deps = ["Compose", "Dierckx"]
git-tree-sha1 = "7a022ae6ac8b153d47617ed8c196ce60645689f1"
uuid = "52a3aca4-6234-47fd-b74a-806bdf78ede9"
version = "0.3.3"

[[XML2_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Libiconv_jll", "Pkg", "Zlib_jll"]
git-tree-sha1 = "1acf5bdf07aa0907e0a37d3718bb88d4b687b74a"
uuid = "02c8fc9c-b97f-50b9-bbe4-9be30ff0a78a"
version = "2.9.12+0"

[[XSLT_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Libgcrypt_jll", "Libgpg_error_jll", "Libiconv_jll", "Pkg", "XML2_jll", "Zlib_jll"]
git-tree-sha1 = "91844873c4085240b95e795f692c4cec4d805f8a"
uuid = "aed1982a-8fda-507f-9586-7b0439959a61"
version = "1.1.34+0"

[[Xorg_libX11_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libxcb_jll", "Xorg_xtrans_jll"]
git-tree-sha1 = "5be649d550f3f4b95308bf0183b82e2582876527"
uuid = "4f6342f7-b3d2-589e-9d20-edeb45f2b2bc"
version = "1.6.9+4"

[[Xorg_libXau_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "4e490d5c960c314f33885790ed410ff3a94ce67e"
uuid = "0c0b7dd1-d40b-584c-a123-a41640f87eec"
version = "1.0.9+4"

[[Xorg_libXdmcp_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "4fe47bd2247248125c428978740e18a681372dd4"
uuid = "a3789734-cfe1-5b06-b2d0-1dd0d9d62d05"
version = "1.1.3+4"

[[Xorg_libXext_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libX11_jll"]
git-tree-sha1 = "b7c0aa8c376b31e4852b360222848637f481f8c3"
uuid = "1082639a-0dae-5f34-9b06-72781eeb8cb3"
version = "1.3.4+4"

[[Xorg_libXrender_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libX11_jll"]
git-tree-sha1 = "19560f30fd49f4d4efbe7002a1037f8c43d43b96"
uuid = "ea2f1a96-1ddc-540d-b46f-429655e07cfa"
version = "0.9.10+4"

[[Xorg_libpthread_stubs_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "6783737e45d3c59a4a4c4091f5f88cdcf0908cbb"
uuid = "14d82f49-176c-5ed1-bb49-ad3f5cbd8c74"
version = "0.1.0+3"

[[Xorg_libxcb_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "XSLT_jll", "Xorg_libXau_jll", "Xorg_libXdmcp_jll", "Xorg_libpthread_stubs_jll"]
git-tree-sha1 = "daf17f441228e7a3833846cd048892861cff16d6"
uuid = "c7cfdc94-dc32-55de-ac96-5a1b8d977c5b"
version = "1.13.0+3"

[[Xorg_xtrans_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "79c31e7844f6ecf779705fbc12146eb190b7d845"
uuid = "c5fb5394-a638-5e4d-96e5-b29de1b5cf10"
version = "1.4.0+3"

[[Zlib_jll]]
deps = ["Libdl"]
uuid = "83775a58-1f1d-513f-b197-d71354ab007a"

[[Zstd_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "cc4bf3fdde8b7e3e9fa0351bdeedba1cf3b7f6e6"
uuid = "3161d3a3-bdf6-5164-811a-617609db77b4"
version = "1.5.0+0"

[[libpng_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Zlib_jll"]
git-tree-sha1 = "94d180a6d2b5e55e447e2d27a29ed04fe79eb30c"
uuid = "b53b4c65-9356-5827-b1ea-8c7a1a84506f"
version = "1.6.38+0"

[[nghttp2_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d"

[[p7zip_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0"
"""

# ╔═╡ Cell order:
# ╟─e20e2d2e-4b28-4e32-8d80-ce029928a094
# ╟─f3e235e7-76b9-4c39-bc70-038539838ff4
# ╟─8308df59-3faa-4abf-8f05-119bbae48f64
# ╟─a3532a83-9fd3-4d24-b1bb-b52457317e51
# ╟─15657e4b-848e-43ad-a99f-37143d11705e
# ╟─34a6b7f4-7d72-485d-86dc-f4b1ba6174eb
# ╟─67d1b500-964e-4668-a7d0-ed93886446ca
# ╟─673992ed-6963-400a-a69b-d65d26c4f443
# ╟─6078758e-b392-4bdb-a1e7-44b135ce900e
# ╠═41e06e2a-b482-4e0f-8569-fee2ffd8aaaf
# ╟─dcf53d46-e259-4101-8530-9621094ee586
# ╟─6a88d26c-c895-4852-ab4f-37297b848731
# ╟─f9675365-36aa-430c-b747-3bc4f602e6fb
# ╟─46eb4ba9-dce6-4711-9c4d-3f16de6240de
# ╟─046f7559-4af9-4982-b5c3-335add0911d7
# ╟─0a039bfa-571e-4fad-b73c-1324d08777fc
# ╟─3f1e4d7a-32a7-4c7e-92dd-465bac925e63
# ╟─f68bcfb6-97ce-48d1-b0b8-e8466d4ac879
# ╟─3d4ba750-8d62-48ac-bf96-691397689ddc
# ╟─f4cb9212-181f-4338-b858-1d99c7f415e9
# ╟─31bde262-6352-4be0-b5cc-1781e3df2268
# ╟─83ff3fc3-bcd8-4235-a42f-1d75c7d6aa5b
# ╟─b308e270-6b40-4946-ac92-c705823f2c1e
# ╟─e483b3d4-d01c-4a98-8e68-e8120a7d95a7
# ╟─20c34526-c7c4-11eb-21fa-d706fd684a4c
# ╟─3f96abdf-fb5f-4d79-a288-e20b8c1f55d1
# ╟─5d51231a-8bf0-4414-9a39-cea264df84f2
# ╠═3b0fd2b5-5c6d-4d56-9e48-cda1493b4c72
# ╟─e10e0be8-b26e-4719-92dc-8ca46af0b4b5
# ╟─b1a9946b-82b4-4954-8bb9-5df035eaefe4
# ╠═00342a51-36d8-4fdd-aab7-ee02e2122c49
# ╠═40c1c48d-0e5a-4a47-b7e4-8f7666281249
# ╠═59df1f80-9be1-4b26-b263-ca0c7a0b9ab7
# ╟─8320d326-c1ab-4807-befb-13dda3480bf5
# ╟─93238608-3b86-49f1-ad60-9360e12cff1c
# ╟─f657c3fb-e140-4c76-8065-54f1cb6d05eb
# ╠═629a2549-745c-48a2-9bbc-a8f5fb046d11
# ╟─640e0029-7931-4afd-bdf9-fed317efbd8e
# ╟─6930345b-6e93-4b35-8d4f-91ad49141fa1
# ╠═090522bf-0ff2-4022-8460-aec6e37e936a
# ╠═88c30609-2f42-405e-a14c-dfab44aef23b
# ╟─2dc665a9-b131-4fef-acde-db346eb0f48b
# ╟─4e479f48-42cd-476d-8604-08ecbb503a90
# ╟─dc41e99a-f598-4bf6-9f76-ecdb04f5f40c
# ╟─97e0bae1-69ac-4cbf-b9d9-6b38180edd78
# ╟─355ba831-6be0-456a-8f94-36acd2365f17
# ╠═003c3e68-600e-4688-832b-5e061572b128
# ╠═1fb196f9-0f0a-42dc-b094-077cdf18d13d
# ╠═aa9a679e-63bc-4951-b6f4-65316e212bc8
# ╠═e6fd3c2d-cadd-40d7-a575-b1e68c45ee13
# ╟─02b7e1b4-4622-4e68-966e-ff79817557d1
# ╟─364fd613-0ebd-4b45-a3fd-f9baa8c487e3
# ╟─75d8283a-b331-4648-84a8-489e168e33f9
# ╟─2207c2fb-4a52-4766-8dd3-03872744aa74
# ╠═288331c3-2dfb-4941-985f-554be409c0ab
# ╠═12a30359-d6ff-4113-bab5-b198e908cf1a
# ╠═23ea88b4-6b89-462a-92da-0e8bdf5c73b5
# ╟─4bfdabd6-b7ff-40fb-b567-52910acb5a07
# ╟─2603147b-e7a2-4cae-b88f-2cfebe16bacb
# ╟─12d49e2e-cc6e-48d6-b11a-e7c311453bfc
# ╟─10312dc7-e861-4c89-b2fd-672cfe8850bf
# ╟─03f58f2a-24b6-4235-9102-71a19b9679ac
# ╠═22a5853e-4f9a-4da0-bc03-84a6b0061cfe
# ╠═1ecdc4d2-b3ca-4f5c-a454-0f0bc51b6ec2
# ╟─927ea209-2ccd-48d5-b69e-0a3c735bb496
# ╟─962b204c-8195-4938-944c-b7c4a52e70bd
# ╠═af58a0f8-e3fd-465f-b1ae-6fbd94123c91
# ╠═4f993061-c6c3-4acb-aef3-8453e7b83997
# ╠═6e5cf9bb-7cab-4da1-8831-541e0ee3bde8
# ╠═320e4114-f0da-4106-90ab-a9f7b0ef0099
# ╟─2d944e8d-e19d-48be-ab7f-c3e54e9f43ef
# ╟─4f53fa8e-ea9b-461f-8199-7bbe2a3ef544
# ╟─56ea4f5f-ea88-46d6-beed-b9a7afed315d
# ╟─124a9ecd-0bda-4823-9507-92efcf449d9c
# ╟─6819498c-7a46-48fa-9eec-38341dca72f9
# ╟─400f79cf-9260-4195-9582-0e8c486ddb5a
# ╟─2dad3acd-332c-46b9-8f84-21c076bdef41
# ╟─a674bde5-70e1-4d21-aedf-8977f8039c36
# ╟─bf696784-23d4-42b2-8ee1-bfec11ff8d78
# ╠═a7810352-7967-460d-abd7-361a324c20a9
# ╠═9193fcbb-ec4f-41b4-8fca-be8e183dea31
# ╠═a56445b5-e530-4035-9ac6-a2d196a6276a
# ╠═3fadf1d4-8fa2-4c02-aa21-b7969b465536
# ╠═12e933ca-13f3-414c-926a-f1bb1bbe66cf
# ╟─4403c183-5eeb-4fd0-87a4-4ad29e1f4dc2
# ╠═4c3f9b91-4f27-4fb8-a9db-1ddbfd62dbdd
# ╠═af71e2d7-a600-46fd-9a46-1b9d4607f06d
# ╟─1d3c7324-6828-47aa-b30e-bcac0e052213
# ╠═2993fe1f-042b-4c85-85b8-bcc7ed449a54
# ╟─048d482e-3e5b-496f-95d7-17589a5f6f11
# ╟─e22bbe66-56f5-40be-b464-1f8651e6a6ac
# ╟─28767d73-47a8-4f4b-b3dd-146c4ae3e038
# ╟─ea907d51-d4a9-48ca-90a5-bd91309ccfad
# ╟─3aa99be5-6747-4163-9bb6-ed8cd5ce19f6
# ╟─2f0263f5-2ace-4d4b-8d71-cee26c03122e
# ╟─03a21468-c2fe-4df7-ae8a-38b28a0efe2f
# ╟─f326d8e5-7117-4eed-b30d-0f64e5974426
# ╟─b9af3db6-5725-4190-b96c-f3fa41f07c93
# ╟─47cf7e85-c49f-4618-9689-e0de789625f6
# ╟─2fcf48fd-bedc-41d9-a433-68efd5dd0d20
# ╟─5060f5bb-8430-42aa-b61d-c88249edb323
# ╟─55dd53ed-6420-4426-95ae-feb47bf50f22
# ╟─c62a9f94-457f-496b-bee0-bb0db02aca5d
# ╟─bccadcb5-6d9f-4a70-b7c4-74e0e3d5f8c8
# ╟─b308ecb0-070e-4ad8-8009-dc60e75bbe01
# ╟─9123e669-19c7-47c1-a924-0c618b4a9c1f
# ╟─83d0c5fc-9cb7-4e37-bfdb-ed630b94d9b4
# ╠═c6bd40af-50ed-4cee-8043-60b2bac05058
# ╟─3d2feca5-43d3-4a46-ba1c-849c5ceeb676
# ╟─7ca616d1-caed-40cf-b768-b07af81a654d
# ╠═ddc662d7-6901-4885-9d2c-1876a2c9d2ff
# ╟─a54d5fc4-643c-47d2-be97-4626a060c9b4
# ╟─8b556fbe-275f-4e7b-94a0-7434ce81ad8b
# ╟─8e9c55f6-8175-4cb4-8798-91107c4d16ee
# ╠═873ef2c2-653e-425e-9732-b1ed19f7a0b7
# ╟─972d889c-c48c-470a-b710-aba9ecaacdaa
# ╟─54b3a283-7d42-431f-9ecb-48f37198409a
# ╠═590f56f7-5654-4493-9103-02a0fce6e945
# ╟─11964529-8e88-4743-a725-57fc5c525649
# ╟─45448141-214d-4e08-978e-5d1d25f763cd
# ╟─dfe3cc09-6e27-4166-9a4e-2dd22f1e08a2
# ╟─3d07e6d1-2964-4718-b7bc-114d82389aa4
# ╟─c00cd648-d685-4a17-9ddf-39c06dd5f066
# ╟─5390c3ba-1507-4db9-9972-8295cbe493bc
# ╟─33a0bda1-c943-4792-bc3d-fbf1adf16d0a
# ╟─a2e81e79-3f85-4eef-8639-f455f9165a25
# ╟─0a04c470-8f5a-4b56-b2e2-5b32e3b944f3
# ╟─dced36eb-3b84-4d08-8268-0ffb831e39b5
# ╠═141e21c0-1bdf-4e6b-b76d-129567a1180f
# ╠═0dc268fa-2b71-4915-b73b-3f1de9fbc157
# ╠═871f62a3-2c9c-445e-b1dc-4cba363fa604
# ╠═6a50f7ac-b5a5-444d-9042-81b82cb66aec
# ╠═bace7924-3aff-463f-9351-cc59191b469a
# ╠═923a5e2a-cc91-4404-913a-6e8012fa5834
# ╟─b12bcfbb-c7f2-4dc9-821a-e59365bf6fa4
# ╠═12d52dc8-da0b-46dc-9251-eb0cc9a39a7e
# ╠═ac35b26c-0585-4d2a-8fbf-bda9b141d6af
# ╠═f8616bf4-02e6-4d66-a0f6-d35db701e82c
# ╠═4e273d1f-a00e-49ca-9f8d-a0f6930550fb
# ╟─bee4ab06-4c60-4bec-89d0-b3c9a512f7a6
# ╟─717f51fb-bd0a-4bb4-a5fc-1f1cf5d56ed8
# ╟─db6b6481-7e67-4418-b907-13b38c77bac7
# ╠═d4726239-81af-4792-8472-c680508449c6
# ╠═a0be4807-52ed-4626-8009-97a79e36e2f1
# ╠═52dec342-d3de-4a88-8acb-0aa186bcc086
# ╠═280c9363-9b49-44f1-a240-b7f205ffc56b
# ╟─0b3735c2-695c-4225-843e-16ca17aac0eb
# ╟─d7942b37-f821-494a-8f18-5f267aa3457a
# ╟─865f049f-54d0-4d21-860e-062262edcb58
# ╟─c3b730a4-d5b4-471e-bd06-30ace6e8b8fe
# ╟─98f42f60-7870-4813-b0c1-728285c25f01
# ╟─c3c63865-f538-4d93-bef3-6b9c69cb177f
# ╠═66225c05-165e-4051-bb5e-4cfba579fd5b
# ╠═14291e8d-001f-4e26-b094-addb970cf530
# ╠═bf670e51-06f8-4094-949c-ca2d02fd0d01
# ╠═416e53e4-6123-430f-b433-4334b7e85298
# ╠═edd5f8df-9abd-4254-abf6-33ae31d88a8d
# ╠═c0ff9103-2aa8-45fd-bea5-1903c53196de
# ╠═e17e70cb-2d82-4a08-9f6e-e50dcaa325e3
# ╠═357fd442-6e6e-4b14-b2d0-efd3fa775d0b
# ╟─d2001eb2-45cb-4f07-aa99-dd84996359b7
# ╠═1bca53ef-3438-4db5-9900-9fee71936a62
# ╠═50f7070d-9f6f-4025-9be3-13812c3000eb
# ╟─fa3b6d6a-a55d-4097-8ad2-7dafb5f01d8c
# ╟─dabb4656-4aed-4168-8659-c0472528c41d
# ╟─0367be06-4185-4add-a04b-f696c5a43638
# ╟─c79e7651-975b-407c-8c8c-d0c5653ec570
# ╟─3fed55d7-dbed-4fc9-8410-2633d5200db6
# ╟─663180d2-8fe1-4996-a194-d38120ae05fd
# ╟─39884e8a-bc83-4ff4-85bc-cfaccb4674f2
# ╟─c24e7391-a187-4a7e-aab7-93027f7db965
# ╠═12734842-d66f-4bfc-a9ad-01adeb2450e0
# ╟─00000000-0000-0000-0000-000000000001
# ╟─00000000-0000-0000-0000-000000000002


================================================
FILE: src/NiLang.jl
================================================
module NiLang

using Reexport
@reexport using NiLangCore
import NiLangCore: invtype

using FixedPointNumbers: Q20f43, Fixed
import NiLangCore: empty_global_stacks!, loaddata
export Fixed43
const Fixed43 = Q20f43

include("utils.jl")
include("wrappers.jl")
include("vars.jl")
include("instructs.jl")
include("ulog.jl")
include("complex.jl")
include("autobcast.jl")
include("macros.jl")

include("autodiff/autodiff.jl")

include("stdlib/stdlib.jl")

include("deprecations.jl")

export AD

project_relative_path(xs...) = normpath(joinpath(dirname(dirname(pathof(@__MODULE__))), xs...))

end # module


================================================
FILE: src/autobcast.jl
================================================
export AutoBcast

"""
    AutoBcast{T,N} <: IWrapper{T}

A vectorized variable.
"""
struct AutoBcast{T,N} <: IWrapper{T} x::Vector{T} end
AutoBcast(x::Vector{T}) where {T} = AutoBcast{T, length(x)}(x)
AutoBcast(x::AutoBcast{T,N}) where {T,N} = x # to avoid ambiguity error
AutoBcast{T,N}(x::AutoBcast{T,N}) where {T,N} = x
value(x::AutoBcast) = x.x
NiLangCore.chfield(x::AutoBcast, ::typeof(value), xval) = chfield(x, Val(:x), xval)
Base.zero(x::AutoBcast) = AutoBcast(zero(x.x))
Base.zero(::Type{AutoBcast{T,N}}) where {T,N} = AutoBcast{T,N}(zeros(T, N))
Base.length(ab::AutoBcast{T,N}) where {T, N} = N

for F1 in [:(Base.:-), :INC, :FLIP, :DEC]
    @eval function $F1(a!::AutoBcast)
        @instr @invcheckoff @inbounds for i=1:length(a!)
            $F1(a!.x[i])
        end
        a!
    end
end

for F2 in [:SWAP, :((inf::PlusEq)), :((inf::MinusEq)), :((inf::XorEq))]
    F2 != :SWAP && @eval function $F2(a::AutoBcast, b::Real)
        @instr @invcheckoff @inbounds for i=1:length(a)
            $F2(a.x[i], b)
        end
        a, b
    end
    @eval function $F2(a::AutoBcast, b::AutoBcast)
        @instr @invcheckoff @inbounds for i=1:length(a)
            $F2(a.x[i], b.x[i])
        end
        a, b
    end
end

for F3 in [:ROT, :IROT, :((inf::PlusEq)), :((inf::MinusEq)), :((inf::XorEq))]
    if !(F3 in [:ROT, :IROT])
        @eval function $F3(a::AutoBcast, b::Real, c::Real)
            @instr @invcheckoff @inbounds for i=1:length(a)
                $F3(a.x[i], b, c)
            end
            a, b, c
        end
        @eval function $F3(a::AutoBcast, b::Real, c::AutoBcast)
            @instr @invcheckoff for i=1:length(a)
                $F3(a.x[i], b, c.x[i])
            end
            a, b, c
        end
    end
    @eval function $F3(a::AutoBcast, b::AutoBcast, c::Real)
        @instr @invcheckoff @inbounds for i=1:length(a)
            $F3(a.x[i], b.x[i], c)
        end
        a, b, c
    end
    @eval function $F3(a::AutoBcast, b::AutoBcast, c::AutoBcast)
        @instr @invcheckoff @inbounds for i=1:length(a)
            $F3(a.x[i], b.x[i], c.x[i])
        end
        a, b, c
    end
end

(f::PlusEq{typeof(identity)})(x::T, y::T) where T<:AutoBcast = invoke(f, Tuple{T,T} where T, x, y)
(f::MinusEq{typeof(identity)})(x::T, y::T) where T<:AutoBcast = invoke(f, Tuple{T,T} where T, x, y)

================================================
FILE: src/autodiff/autodiff.jl
================================================
module AD

using ..NiLang
using NiLangCore
using MLStyle, TupleTools

import ..NiLang: ROT, IROT, SWAP,
    chfield, value, NoGrad, INC, DEC, HADAMARD,
    AddConst, SubConst, NEG, INV
using NiLangCore: default_constructor

export GVar, grad, Loss, NoGrad, @nograd

include("vars.jl")
include("stack.jl")
include("gradfunc.jl")
include("checks.jl")

include("instructs.jl")
include("ulog.jl")
include("jacobian.jl")
include("hessian_backback.jl")
include("complex.jl")

end


================================================
FILE: src/autodiff/checks.jl
================================================
export check_grad, nparams
export gradient_numeric

using FixedPointNumbers: Fixed

@nospecialize
isvar(x) = nparams(x) != 0

nparams(model) = nparams(NiLangCore.type2tuple(model))
nparams(x::AbstractArray{<:AbstractFloat}) = length(x)
nparams(x::AbstractArray{<:GVar}) = length(x)
nparams(x::AbstractArray) = sum(nparams, x)
nparams(x::Fixed) = 1
function nparams(x::Union{Tuple,NamedTuple})
    res = 0
    for xi in x
        res += nparams(xi)
    end
    res
end
nparams(x::AbstractFloat) = 1
nparams(x::GVar) = 1

function tset(vfunc::Function, tp::Tuple, iloss)
    map(i->i===iloss ? vfunc(tp[i]) : tp[i], (1:length(tp)...,))
end
function tset(value, tp::Tuple, iloss)
    map(i->i===iloss ? value : tp[i], (1:length(tp)...,))
end

function update_var(args, iarg, i::Int, val)
    args[iarg][i] += val
    args
end

function update_var(args, iarg, ::Nothing, val)
    tset(x->chfield(x, value, value(x) + val), args, iarg)
end

function ng_single(::Type{T}, f, args, kwargs, iarg, i, iloss, δ) where T
    args = update_var(args, iarg, i, T(δ/2))
    @instr f(args...; kwargs...)
    pos = value(args[iloss])
    @instr (~f)(args...; kwargs...)
    args = update_var(args, iarg, i, -T(δ))
    @instr f(args...; kwargs...)
    neg = value(args[iloss])
    @instr (~f)(args...; kwargs...)
    args = update_var(args, iarg, i, T(δ/2))
    (pos - neg)/δ
end

function ng_single(::Type{T}, f, args, kwargs, iarg, i, iloss, δ) where T<:Complex
    res = zero(T)
    for dd = [δ, im*δ]
        args = update_var(args, iarg, i, dd/2)
        @instr f(args...; kwargs...)
        pos = value(args[iloss])
        @instr (~f)(args...; kwargs...)
        args = update_var(args, iarg, i, -dd)
        @instr f(args...; kwargs...)
        neg = value(args[iloss])
        @instr (~f)(args...; kwargs...)
        args = update_var(args, iarg, i, dd/2)
        if dd == δ
            res += (pos - neg)/δ
        else
            res += im*(pos - neg)/δ
        end
    end
    res
end

function ng(f, args, iarg; iloss::Int, δ=1e-5, kwargs...)
    x = args[iarg]
    T = eltype(x)
    if x isa AbstractArray
        res = zero(x)
        for i = 1:length(x)
            res[i] = ng_single(T, f, args, kwargs, iarg, i, iloss, δ)
        end
        return res
    else
        ng_single(T, f, args, kwargs, iarg, nothing, iloss, δ)
    end
end

"""
    gradient_numeric(f, args...; iloss, kwargs...)

Numeric differentiating f(args..., kwargs...).
"""
function gradient_numeric(f, args; iloss::Int, kwargs...)
    map(1:length(args)) do iarg
        if isvar(args[iarg])
            ng(f, args, iarg; iloss=iloss, kwargs...)
        else
            0
        end
    end
end

"""
    check_grad(f, args; atol::Real=1e-8, verbose::Bool=false, iloss::Int, kwargs...)

Return true if the gradient of `f(args..., kwargs...)` is reversible.
"""
function check_grad(f, args; atol::Real=1e-4, verbose::Bool=false, iloss::Int, kwargs...)
    vars = ((iarg for iarg in 1:length(args) if isvar(args[iarg]))...,)
    initial_vars = deepcopy(vars)
    ngs = gradient_numeric(f, args; kwargs..., iloss=iloss)
    gs = gradient(f, args; kwargs..., iloss=iloss)
    verbose && @show ngs
    verbose && @show gs
    if !all(isapprox.(ngs, gs, atol=atol))
        verbose && println("gradient not match: $ngs v.s. $gs")
        return false
    end

    if !world_similar(initial_vars, vars, atol=atol, verbose=verbose)
        verbose && println("world changed during obtaining gradient.")
        return false
    end
    return true
end

@specialize


================================================
FILE: src/autodiff/complex.jl
================================================
@i @inline function :(+=)(angle)(r!::T, x::Complex{T}) where T<:GVar
    r! += atan(x.im, x.re)
end

@i @inline function :(+=)(abs2)(y!::T, a::Complex{T}) where T<:GVar
    y! += a.re^2
    y! += a.im^2
end

@i @inline function :(+=)(abs)(y!::T, a::Complex{T}) where T<:GVar
    @routine @invcheckoff begin
        y2 ← zero(y!)
        y2 += abs2(a)
    end
    y! += sqrt(y2)
    ~@routine
end

Base.zero(x::Complex{T}) where T<:GVar = Complex(zero(T), zero(T))
Base.zero(::Type{Complex{T}}) where T<:GVar = Complex(zero(T), zero(T))
Base.one(x::Complex{T}) where T<:GVar = Complex(one(T), zero(T))
Base.one(::Type{Complex{T}}) where T<:GVar = Complex(one(T), zero(T))


================================================
FILE: src/autodiff/gradfunc.jl
================================================
export Grad, NGrad, Hessian, gradient

"""
    NGrad{N,FT} <: Function

Obtain gradients `Grad(f)(Val(i), args..., kwargs...)`, where `i` is the index of loss in `args`. `Grad` object calls forward first, and then backward.

!!! note
    `Val(1)` is specially optimized, so putting the loss as the first parameter can avoid potential overhead.
```
"""
struct NGrad{N,FT} <: Function
    f::FT
end
function NGrad{N}(f::FT) where {N,FT}
    NGrad{N,FT}(f)
end

const Grad{FT} = NGrad{1,FT}
const Hessian{FT} = NGrad{2,FT}

Base.show_function(io::IO, b::NGrad{N}, compact::Bool) where {N} = print(io, "$(b.f)"*"'"^N)
Base.show_function(io::IO, ::MIME"text/plain", b::NGrad{N}, compact::Bool) where {N} = print(io, b)
Base.display(bf::NGrad) = print(bf)
(_::Type{Inv{NGrad{N}}})(f::NGrad{M}) where {M, N} = NGrad{M-N}(f.f)
(_::Type{Inv{NGrad{M}}})(f::NGrad{M}) where {M} = f.f


@i function (g::Grad)(il::Val{iloss}, args...; kwargs...) where iloss
    protectf(g).f(args...; kwargs...)
    GVar.(args)
    INC(args |> tget(iloss) |> grad)
    (~protectf(g).f)(args...; kwargs...)
end

@i function (g::Grad)(il::Val{1}, x, ys...; kwargs...)
    protectf(g).f(x, ys...; kwargs...)
    GVar(x)
    INC(x |> grad)
    GVar.(ys)
    (~protectf(g).f)(x, ys...; kwargs...)
end

@i function (g::Grad)(args...; iloss::Int, kwargs...)
    protectf(g).f(args...; kwargs...)
    GVar.(args)
    INC(args |> tget(iloss) |> grad)
    (~protectf(g).f)(args...; kwargs...)
end

@generated function gradient(::Val{iloss}, f, args::NTuple{N,Any}; kwargs...) where {iloss,N}
    newres = gensym()
    newargs = Any[:(GVar($newres[$i])) for i=1:N]
    newargs[iloss] = :(GVar($newres[$iloss], one($newres[$iloss])))
    quote
        $newres = f(args...; kwargs...)
        grad((~f)($(newargs...); kwargs...))
    end
end

gradient(f, args; iloss::Int, kwargs...) = gradient(Val(iloss), f, args; kwargs...)


================================================
FILE: src/autodiff/hessian_backback.jl
================================================
export hessian_backback

@i function backback(f, args...; index::Int, iloss::Int, kwargs...)
    # forward
    Grad(f)(args...; kwargs..., iloss=iloss)

    for i = 1:length(args)
        GVar(args |> tget(i) |> grad)
        GVar(args |> tget(i) |> value)
    end
    (args |> tget(index) |> grad |> grad) += 1
    # backward#2
    (~Grad(f))(args...; kwargs..., iloss=iloss)
end

"""
    hessian_backback(f, args; iloss::Int, kwargs...)

Obtain the Hessian matrix of `f(args..., kwargs...)` by back propagating adjoint program.
"""
function hessian_backback(f, args; iloss::Int, kwargs...)
    N = length(args)
    hmat = zeros(N, N)
    for i=1:N
        if !(args[i] isa Integer || args[i] isa AbstractVector)
            res = backback(f, args...; kwargs..., index=i, iloss=iloss)
            hmat[:,i] .= map(x->grad(value(x)), res[2:end])
        end
    end
    hmat
end

function hessian_numeric(f, args; iloss::Int, η=1e-5, kwargs...)
    narg = length(args)
    res = zeros(narg, narg)
    largs = [args...]
    for i = 1:narg
        if nparams(args[i]) == 1
            @instr (largs[i] |> value) += η/2
            gpos = gradient(f, (largs...,); iloss=iloss, kwargs...)
            @instr (largs[i] |> value) -= η
            gneg = gradient(f, (largs...,); iloss=iloss, kwargs...)
            @instr (largs[i] |> value) += η/2
            res[:,i] .= (gpos .- gneg)./η
        end
    end
    return res
end

function local_hessian_numeric(f, args; kwargs...)
    nargs = length(args)
    hes = zeros(nargs,nargs,nargs)
    for j=1:nargs
        if nparams(args[j]) == 1
            hes[:,:,j] .= hessian_numeric(f, args; kwargs..., iloss=j)
        end
    end
    mask = BitArray(nparams.(args) .== 1)
    hes[mask, mask, mask]
end


================================================
FILE: src/autodiff/instructs.jl
================================================
# unary
@i @inline function NEG(a!::GVar)
    NEG(a!.x)
    NEG(a!.g)
end

function INV(x!::GVar{T}) where T
    x2 = x!.x ^ 2
    GVar(INV(x!.x), -x!.g * x2)
end

@i @inline function DEC(a!::GVar)
    DEC(a!.x)
end

# +-
@i @inline function :(-=)(identity)(a!::GVar, b::GVar)
    a!.x -= b.x
    b.g += a!.g
end

# inv
@eval @i @inline function :(-=)(inv)(out!::GVar{T}, y::GVar) where T
    out!.x -= inv(y.x)
    @routine @invcheckoff begin
        @zeros T a1
        a1 += y.x ^ 2
    end
    y.g -= out!.g / a1
    ~@routine
end

# +- (triple)
@i @inline function :(-=)(+)(out!::GVar, x::GVar, y::GVar)
    out!.x -= x.x + y.x
    x.g += out! |> grad
    y.g += out! |> grad
end

@i @inline function :(-=)(+)(out!::GVar, x::GVar, y::Real)
    out!.x -= (x |> value) + (y |> value)
    x.g += out! |> grad
end

@i @inline function :(-=)(+)(out!::GVar, x::Real, y::GVar)
    out!.x -= (x |> value) + (y |> value)
    y.g += out! |> grad
end

@i @inline function :(-=)(-)(out!::GVar, x::GVar, y::GVar)
    out!.x -= x.x - y.x
    x.g += out! |> grad
    y.g -= out! |> grad
end

@i @inline function :(-=)(-)(out!::GVar, x::Real, y::GVar)
    out!.x -= (x |> value) - y.x
    y.g -= out!.g
end

@i @inline function :(-=)(-)(out!::GVar, x::GVar, y::Real)
    out!.x -= x.x - (y |> value)
    x.g += out! |> grad
end

# NOTE: it will error on `SWAP(a!::GVar, b)` or `SWAP(a!, b:GVar)`
@i @inline function SWAP(a!::GVar, b!::GVar)
    SWAP(a! |> value,  b! |> value)
    SWAP(a!.g, b!.g)
end

# */
@i @inline function :(-=)(*)(out!::GVar, x::GVar, y::GVar)
    out!.x -= x.x * y.x
    x.g += out!.g * y.x
    y.g += x.x * out!.g
end

@i @inline function :(-=)(*)(out!::GVar, x::Real, y::GVar)
    out!.x -= (x |> value) * y.x
    y.g += (x |> value) * out!.g
end

@i @inline function :(-=)(*)(out!::GVar, x::GVar, y::Real)
    out!.x -= x.x * (y |> value)
    x.g += out!.g * (y |> value)
end

for DIV in [:/, :÷]
    @eval @i @inline function :(-=)($DIV)(out!::GVar{T}, x::GVar, y::GVar) where T
    out!.x -= $DIV(x.x, y.x)
    @routine @invcheckoff begin
        a1 ← zero(out! |> grad)
        a2 ← zero(out! |> grad)
        a1 += x.x * out!.g
        a2 += $DIV(a1, y.x)
    end
    x.g += $DIV(out!.g, y.x)
    y.g -= $DIV(a2, y.x)
    ~@routine
end

@eval @i @inline function :(-=)($DIV)(out!::GVar{T}, x::Real, y::GVar) where T
    out!.x -= $DIV(x, y.x)
    @routine @invcheckoff begin
        a1 ← zero(out!.g)
        a2 ← zero(out!.g)
        a1 += x * out!.g
        a2 += $DIV(a1, y.x)
    end
    y.g -= $DIV(a2, y.x)
    ~@routine
end

@eval @i @inline function :(-=)($DIV)(out!::GVar, x::GVar, y::Real)
    out!.x -= $DIV(x.x, y)
    x.g += $DIV(out!.g, y)
end
end

@i @inline function :(-=)(^)(out!::GVar{T}, x::GVar, n::GVar) where T
    # grad x
    @routine @invcheckoff begin
        @zeros T anc1 anc2 anc3 jac1 jac2 nx_1
        nx_1 += n.x - 1
        anc1 += x.x ^ nx_1
        jac1 += anc1 * n.x

        # get grad of n
        anc2 += log(x.x)
        anc3 += anc1 * x.x
        jac2 += anc3 * anc2
    end
    out!.x -= anc1 * x.x
    x.g += out!.g * jac1
    n.g += out!.g * jac2
    ~@routine
end

@i @inline function :(-=)(^)(out!::GVar{T}, x::GVar, n::Real) where T
    @routine @invcheckoff begin
        anc1 ← zero(x.x)
        jac ← zero(x.x)
	nx_1 ← zero(n)

	nx_1 += n - 1
        anc1 += x.x ^ nx_1
        jac += anc1 * n
    end
    out!.x -= anc1 * x.x
    x.g += out!.g * jac
    ~@routine
end

@i @inline function :(-=)(^)(out!::GVar{T}, x::Real, n::GVar) where T
    # get jac of n
    @routine @invcheckoff begin
        anc1 ← zero(x)
        anc2 ← zero(x)
        jac ← zero(x)

        anc1 += log(x)
        anc2 += x ^ n.x
        jac += anc1*anc2
    end
    out!.x -= anc2
    n.g += out!.g * jac
    ~@routine
end

for (OP, F) in [(:min, :<), (:max, :>)]
@eval @i @inline function :(-=)($OP)(out!::GVar{T}, x::GVar, y::GVar) where T
    if $F(x, y)
        out!.x -= x.x
        x.g += out!.g
    else
        out!.x -= y.x
        y.g += out!.g
    end
end

@eval @i @inline function :(-=)($OP)(out!::GVar{T}, x::GVar, y::Real) where T
    if $F(x, y)
        out!.x -= x.x
        x.g += out!.g
    else
        out!.x -= y.x
    end
end

@eval @i @inline function :(-=)($OP)(out!::GVar{T}, x::Real, y::GVar) where T
    if $F(x, y)
        out!.x -= x.x
    else
        out!.x -= y.x
        y.g += out!.g
    end
end
end

@i @inline function :(-=)(atan)(out!::GVar{T}, y::GVar, x::GVar) where T
    out!.x -= atan(y.x, x.x)
    @routine @invcheckoff begin
        @zeros T xy2 jac_x jac_y
        xy2 += abs2(x.x)
        xy2 += abs2(y.x)
        jac_y += x.x / xy2
        jac_x += (-y.x) / xy2
    end
    y.g += out!.g * jac_y
    x.g += out!.g * jac_x
    ~@routine
end
@i @inline function :(-=)(atan)(out!::GVar{T}, y::Real, x::GVar) where T
    out!.x -= atan(y, x.x)
    @routine @invcheckoff begin
        @zeros T xy2 jac_x
        xy2 += abs2(x.x)
        xy2 += abs2(y)
        jac_x += (-y) / xy2
    end
    x.g += out!.g * jac_x
    ~@routine
end
@i @inline function :(-=)(atan)(out!::GVar{T}, y::GVar, x::Real) where T
    out!.x -= atan(y.x, x)
    @routine @invcheckoff begin
        @zeros T xy2 jac_y
        xy2 += abs2(x)
        xy2 += abs2(y.x)
        jac_y += x / xy2
    end
    y.g += out!.g * jac_y
    ~@routine
end

@i @inline function :(-=)(atan)(out!::GVar{T}, x::GVar) where T
    out!.x -= atan(x.x)
    @routine @invcheckoff begin
        xy2 ← one(T)
        xy2 += abs2(x.x)
    end
    x.g += out!.g / xy2
    ~@routine
end

@i @inline function :(-=)(abs)(out!::GVar, x::GVar{T}) where T
    out!.x -= abs(x.x)
    if (x > 0, ~)
        x.g += out!.g
    else
        x.g -= out!.g
    end
end

@i @inline function :(-=)(abs2)(out!::GVar, x::GVar{T}) where T
    out!.x -= abs2(x.x)
    x.g += out!.g * x.x
    x.g += out!.g * x.x
end

for op in [:*, :/, :^, :+, :-, :atan, :max, :min]
    @eval @nograd :(-=)($op)(out!::GVar, x::Real, y::Real)
    @eval @nograd :(-=)($op)(out!::Real, x::Real, y::GVar)
    @eval @nograd :(-=)($op)(out!::Real, x::GVar, y::GVar)
    @eval @nograd :(-=)($op)(out!::Real, x::GVar, y::Real)
end

@i @inline function :(-=)(sqrt)(out!::GVar, x::GVar{T}) where T
    if x.x != 0
        @routine @invcheckoff begin
            @zeros T anc1 anc2
            anc1 += sqrt(x.x)
            anc2 += 2 * anc1
        end
        out!.x -= anc1
        x.g += out!.g / anc2
        ~@routine
    end
end

@i @inline function :(-=)(exp)(out!::GVar, x::GVar{T}) where T
    @routine @invcheckoff begin
        anc1 ← zero(T)
        anc1 += exp(x.x)
    end
    out!.x -= anc1
    x.g += out!.g * anc1
    ~@routine
end

@i @inline function :(-=)(log)(out!::GVar, x::GVar{T}) where T
    out!.x -= log(x.x)
    x.g += out!.g / x.x
end

@i @inline function :(-=)(sin)(out!::GVar, x::GVar{T}) where T
    @routine @invcheckoff begin
        @zeros T s c
        (s, c) += sincos(x.x)
    end
    out!.x -= s
    x.g += out!.g * c
    ~@routine
end

@i @inline function :(-=)(sinh)(out!::GVar, x::GVar{T}) where T
    out!.x -= sinh(x.x)
    @routine @invcheckoff begin
        anc1 ← zero(x.x)
        anc1 += cosh(x.x)
    end
    x.g += out!.g * anc1
    ~@routine
end

@i @inline function (:-=)(asin)(out!::GVar, x::GVar{T}) where T
    out!.x -= asin(x.x)
    @routine @invcheckoff begin
        @zeros T sqrt_1_x2 x2
        x2 += x.x ^ 2
        sqrt_1_x2 += sqrt(x2 |> NEG |> AddConst(1))
    end
    x.g += out!.g / sqrt_1_x2
    ~@routine
end

@i @inline function (:-=)(cos)(out!::GVar, x::GVar{T}) where T
    @routine @invcheckoff begin
        @zeros T s c
        (s, c) += sincos(x.x)
    end
    out!.x -= c
    x.g -= out!.g * s
    ~@routine
end

@i @inline function :(-=)(cosh)(out!::GVar, x::GVar{T}) where T
    out!.x -= cosh(x.x)
    @routine @invcheckoff begin
        anc1 ← zero(x.x)
        anc1 += sinh(x.x)
    end
    x.g += out!.g * anc1
    ~@routine
end

@i @inline function :(-=)(acos)(out!::GVar, x::GVar{T}) where T
    out!.x -= acos(x.x)
    @routine @invcheckoff begin
        @zeros T sqrt_1_x2 x2
        x2 += x.x ^ 2
        sqrt_1_x2 += sqrt(x2 |> NEG |> AddConst(1))
    end
    x.g -= out!.g / sqrt_1_x2
    ~@routine
end

@i @inline function :(-=)(tan)(out!::GVar, x::GVar{T}) where T
    @routine @invcheckoff begin
        anc1 ← zero(x.x)
        anc2 ← one(x.x)
        anc1 += tan(x.x)
        anc2 += anc1^2
    end
    out!.x -= anc1
    x.g += out!.g * anc2
    ~@routine
end

@i @inline function :(-=)(tanh)(out!::GVar, x::GVar{T}) where T
    @routine @invcheckoff begin
        anc1 ← zero(x.x)
        anc2 ← one(x.x)
        anc1 += tanh(x.x)
        anc2 -= anc1^2
    end
    out!.x -= anc1
    x.g += out!.g * anc2
    ~@routine
end

@i @inline function :(-=)(sincos)(out!::Tuple{T1,T1}, x::GVar{T}) where {T1<:GVar, T}
    @routine @invcheckoff begin
        s ← zero(T)
        c ← zero(T)
        (s, c) += sincos(x.x)
    end
    (out! .|> value) -= (s, c)
    x.g += (out!.:1 |> grad) * c
    x.g -= (out!.:2 |> grad) * s
    ~@routine
end

for op in [:sqrt, :exp, :log, :sin, :cos, :tanh, :abs, :abs2, :identity, :inv]
    @eval @nograd :(-=)($op)(out!::Real, x::GVar)
    @eval @nograd :(-=)($op)(out!::GVar, x::Real)
end

@nograd :(-=)(sincos)(out!::Tuple{<:Real,<:Real}, x::GVar)
@nograd :(-=)(sincos)(out!::Tuple{<:GVar,<:GVar}, x::Real)

@i @inline function IROT(a!::GVar, b!::GVar, θ::GVar)
    IROT(a!.x, b!.x, θ.x)
    NEG(θ |> value)
    θ.x -= π/2
    ROT(a!.g, b!.g, θ.x)
    θ.g += a!.x * a!.g
    θ.g += b!.x * b!.g
    θ.x += π/2
    NEG(θ |> value)
    ROT(a!.g, b!.g, π/2)
end

@i @inline function IROT(a!::GVar, b!::GVar, θ::Real)
    IROT(a!.x, b!.x, θ)
    NEG(θ)
    θ -= π/2
    ROT(a!.g, b!.g, θ)
    θ += π/2
    NEG(θ)
    ROT(a!.g, b!.g, π/2)
end

@nograd IROT(a!::Real, b!::Real, θ::GVar)

export primitive_grad
function primitive_grad end

@i @inline function (mf::MinusEq)(out!::GVar, args...; kwargs...)
    out!.x -= mf.f((args .|> value)...; kwargs...)
    (args .|> grad) .+= (@skip! ntuple(x->out!.g, length(args))) .* (@skip! primitive_grad(mf.f, (args .|> value)...; kwargs...))  # unsafe statement, error on recursive gradient
end

@i @inline function (mf::MinusEq)(out!::GVar, x::GVar; kwargs...)
    out!.x -= mf.f(x |> value; kwargs...)
    x.g += (@skip! out!.g) * (@skip! primitive_grad(mf.f, x.x; kwargs...))  # unsafe statement
end

@i @inline function :(-=)(convert)(out!::GVar{Tx, Tg}, y::GVar) where {Tx, Tg}
    out!.x -= convert(y.x)
    y.g += convert(out!.g)
end

@i @inline function HADAMARD(x::GVar, y::GVar)
    HADAMARD(x.x, y.x)
    HADAMARD(x.g, y.g)
end

@i @inline function (f::AddConst)(y::GVar)
    y.x += f.x
end

# more data views
for (DT, OP, NOP) in [(:AddConst, :+, :-, :add), (:SubConst, :-, :+)]
    @eval chfield(x::GVar, ac::$DT, xval::GVar) = GVar($NOP(xval.x, ac.x), xval.g)
end

#chfield(x::T, ::typeof(INV), xval::T) where T<:GVar = GVar(INV(xval.x), -xval.g*(xval.x^2))
#chfield(x::T, ::typeof(NEG), xval::T) where T<:GVar = GVar(-xval.x, -xval.g)

for F in [:INV, :NEG, :FLIP, :INC, :DEC]
    @eval NiLangCore.chfield(x::T, ::typeof($F), xval::T) where T<:GVar = (~$F)(xval)
end


================================================
FILE: src/autodiff/jacobian.jl
================================================
export jacobian, jacobian_repeat

wrap_tuple(x, args) = length(args) == 1 ? (x,) : x

"""
    jacobian_repeat(f, args...; iin::Int, iout::Int=iin, kwargs...)

Get the Jacobian matrix for function `f(args..., kwargs...)` using repeated computing gradients for each output.
One can use key word arguments `iin` and `iout` to specify the input and output tensor.
"""
function jacobian_repeat(f, args...; iin::Int, iout::Int=iin, kwargs...)
    _check_input(args, iin, iout)
    N = length(args[iout])
    res = zeros(eltype(args[iin]), length(args[iin]), N)
    xargs = wrap_tuple(f(args...; kwargs...), args)
    for i = 1:N
        gxargs = GVar.(xargs)
        @inbounds gxargs[iout][i] = GVar(value(gxargs[iout][i]), one(eltype(xargs[iout])))
        @inbounds res[:,i] .= vec(grad.(wrap_tuple((~f)(gxargs...; kwargs...), gxargs)[iin]))
    end
    return res
end

_copy(x) = x
_copy(x::AbstractArray) = copy(x)

"""
    jacobian(f, args...; iin::Int, iout::Int=iin, kwargs...)

Get the Jacobian matrix for function `f(args..., kwargs...)` using vectorized variables in the gradient field.
One can use key word arguments `iin` and `iout` to specify the input and output tensor.
"""
function jacobian(f, args...; iin::Int, iout::Int=iin, kwargs...)
    _check_input(args, iin, iout)
    args = wrap_tuple(f(args...; kwargs...), args)
    ABT = AutoBcast{eltype(args[iout]), length(args[iout])}
    _args = map(i-> i==iout ? wrap_jacobian(ABT, args[i]) : wrap_bcastgrad(ABT, args[i]), 1:length(args))
    _args = wrap_tuple((~f)(_args...; kwargs...), args)
    out = zeros(eltype(args[iin]), length(args[iin]), length(args[iout]))
    for i=1:length(args[iin])
        @inbounds out[i,:] .= grad(_args[iin][i]).x
    end
    out
end

function wrap_jacobian(::Type{AutoBcast{T,N}}, outarray::AbstractArray{T}) where {T,N}
    map(k->GVar(outarray[k], AutoBcast{T,N}(onehot(T, N, k))), LinearIndices(outarray))
end
function wrap_bcastgrad(::Type{AutoBcast{T,N}}, x::XT) where {T,N,XT}
    GVar(x, zero(AutoBcast{XT,N}))
end
function wrap_bcastgrad(::Type{AutoBcast{T,N}}, x::Union{Integer, Function}) where {T,N}
    x
end
function wrap_bcastgrad(::Type{AutoBcast{T,N}}, x::NoGrad) where {T,N}
    (~NoGrad)(x)
end
function wrap_bcastgrad(::Type{AutoBcast{T,N}}, x::Union{Tuple,AbstractArray}) where {T,N}
    wrap_bcastgrad.(AutoBcast{T,N}, x)
end

function onehot(::Type{T}, N::Int, k::Int) where T
    res = zeros(T, N)
    res[k] = one(T)
    res
end

function _check_input(args, iin, iout)
    if !(args[iin] isa AbstractArray && args[iout] isa AbstractArray)
        throw(ArgumentError("argument at position $iin and $iout are not arrays."))
    elseif (eltype(args[iin]) != eltype(args[iout]))
        throw(ArgumentError("argument at position $iin and $iout do not have the same type."))
    end
end


================================================
FILE: src/autodiff/stack.jl
================================================
# This is a patch for loading a data to GVar correctly.
import NiLangCore

NiLangCore.loaddata(::Type{GT}, x::T) where {T, GT<:GVar{T}} = convert(GT, x)
function NiLangCore.loaddata(t::Type{VT}, x::AbstractVector) where {T, VT<:AbstractVector{T}}
    convert.(T, x)
end

function NiLangCore.loaddata(t::VT, x::AbstractVector) where {T, VT<:AbstractVector{T}}
    convert(VT, NiLangCore.loaddata.(t, x))
end

function NiLangCore.loaddata(::Type{T}, x::XT) where {N, T<:Tuple{N}, XT<:Tuple{N}}
    ntuple(i=>NiLangCore.loaddata.(T.parameters[i], [i]), N)
end


================================================
FILE: src/autodiff/ulog.jl
================================================
@i function (:-=)(gaussian_log)(y!::GVar{T}, x::GVar{T}) where T
	y!.x -= gaussian_log(x.x)
	@routine @invcheckoff begin
		exp_x ← zero(x)
		jac ← zero(x)
		exp_x += exp(-x)
	end
	x.g += y!.g * (exp_x |> AddConst(1) |> INV)
	~@routine
end

@i function (:-=)(gaussian_nlog)(y!::GVar{T}, x::GVar{T}) where T
	y!.x -= gaussian_nlog(x.x)
	@routine @invcheckoff begin
		exp_x ← zero(x)
		exp_x += exp(-x)
	end
	x.g -= y!.g * (exp_x |> SubConst(1) |> INV)
	~@routine
end

@i function :(-=)(convert)(out!::GVar{Tx, Tg}, y::ULogarithmic) where {Tx, Tg}
	out! -= exp(y.log)
end


================================================
FILE: src/autodiff/vars.jl
================================================
######## GVar, a bundle that records gradient
"""
    GVar{T,GT} <: IWrapper{T}
    GVar(x)

Add gradient information to variable `x`, where `x` can be a real number or a general structure.
If it is a non-integer real number, it will wrap the element with a gradient field,
otherwise it will propagate into the type and wrap the elements with `GVar`.
Runing a program backward will update the gradient fields of `GVar`s. The following is a toy using case.

### Example

```jldoctest; setup=:(using NiLang)
julia> using NiLang.AD: GVar, grad

julia> struct A{T}
           x::T
       end

julia> GVar(A(2.0+3im), A(3.0+3im))
A{Complex{GVar{Float64, Float64}}}(GVar(2.0, 3.0) + GVar(3.0, 3.0)*im)

julia> @i function f(a::A, b::A)
           a.x += log(b.x)
       end

julia> outputs = f(A(2.0+3im), A(2.0-1im))  # forward pass
(A{ComplexF64}(2.8047189562170503 + 2.536352390999194im), A{ComplexF64}(2.0 - 1.0im))

julia> outputs_with_gradients = (GVar(outputs[1], A(3.0+3im)), GVar(outputs[2]))  # wrap `GVar`
(A{Complex{GVar{Float64, Float64}}}(GVar(2.8047189562170503, 3.0) + GVar(2.536352390999194, 3.0)*im), A{Complex{GVar{Float64, Float64}}}(GVar(2.0, 0.0) - GVar(1.0, -0.0)*im))

julia> inputs_with_gradients = (~f)(outputs_with_gradients...)  # backward pass
(A{Complex{GVar{Float64, Float64}}}(GVar(2.0, 3.0) + GVar(3.0, 3.0)*im), A{Complex{GVar{Float64, Float64}}}(GVar(2.0, 1.8) - GVar(1.0, -0.6000000000000002)*im))

julia> grad(inputs_with_gradients)
(A{ComplexF64}(3.0 + 3.0im), A{ComplexF64}(1.8 + 0.6000000000000002im))
```

The outputs of `~f` are gradients for input variables, one can use `grad` to take the gradient fields recursively.
"""
struct GVar{T,GT} <: IWrapper{T}
    x::T
    g::GT
    function GVar{T,GT}(x::T, g::GT) where {T,GT}
        new{T,GT}(x, g)
    end
    function GVar(x::T, g::T) where T<:Real
        new{T,T}(x, g)
    end
    function GVar{T,GT}(x::T2) where {T,T2,GT}
        new{T,GT}(T(x), zero(GT))
    end
    function GVar(x::T, g::GT) where {T,GT}
        new{T,GT}(x, g)
    end
end

# `GVar` and `~GVar` on composite types
@generated function GVar(x::Type{T}) where T
    ps = GVar.(T.parameters)
    if length(ps) == 0
        :($(getfield(T.name.module, nameof(T))))
    else
        :($(getfield(T.name.module, nameof(T))){$(ps...)})
    end
end
@generated function GVar(x::Type{T}, y::Type{T}) where T
    :($(getfield(T.name.module, nameof(T))){$(GVar.(T.parameters, T.parameters)...)})
end
@generated function (_::Type{Inv{GVar}})(x::Type{T}) where T
    :($(getfield(T.name.module, nameof(T))){$((~GVar).(T.parameters)...)})
end
# `GVar` and `~GVar` on composite vars
@generated function GVar(x::T) where T
    Expr(:new, GVar(T), [:(GVar(x.$NAME)) for NAME in fieldnames(T)]...)
end
@generated function GVar(x::T, g::T) where T
    Expr(:new, GVar(T, T), [:(GVar(x.$NAME, g.$NAME)) for NAME in fieldnames(T)]...)
end
@generated function (_::Type{Inv{GVar}})(x::T) where T
    Expr(:new, (~GVar)(T), [:((~GVar)(x.$NAME)) for NAME in fieldnames(T)]...)
end

for T in [:Real]
    ## differentiable elementary types
    @eval GVar(::Type{ET}) where ET<:$T = GVar{ET,ET}
    @eval GVar(::Type{ET}, ::Type{ET}) where ET<:$T = GVar{ET,ET}
    @eval (_::Type{Inv{GVar}})(::Type{GVar{ET,GT}}) where {ET<:$T,GT} = ET

    ## differentiable elementary vars
    @eval GVar(x::$T) = GVar(x, zero(x))
    @eval @inline function (_::Type{Inv{GVar}})(x::GVar{<:$T})
        @invcheck x.g zero(x.x)
        x.x
    end
end

for T in [:Integer, :Bool, :Function, :String, :Char, :Nothing]
    ## non-differentiable elementary types
    @eval GVar(::Type{ET}) where ET<:$T = ET
    @eval GVar(::Type{ET}, ::Type{ET}) where ET<:$T = GVar{ET,ET}
    @eval (_::Type{Inv{GVar}})(::Type{ET}) where ET<:$T = ET

    ## non-differentiable elementary vars
    @eval GVar(x::$T) = x
    @eval (_::Type{Inv{GVar}})(x::$T) = x
end

for T in [:Tuple, :AbstractArray]
    ## broadcastable elementary types
    @eval GVar(x::$T) = GVar.(x)
    @eval GVar(x::$T, y::$T) = GVar.(x, y)
    @eval (_::Type{Inv{GVar}})(x::$T) = (~GVar).(x)
end

# no gradient wrapper
GVar(x::NoGrad) = (~NoGrad)(x)

# define on complex numbers to fix ambiguity errors
GVar(x::Complex) = Complex(GVar(x.re), GVar(x.im))
GVar(x::Complex, y::Complex) = Complex(GVar(x.re, y.re), GVar(x.im, y.im))
(_::Type{Inv{GVar}})(x::Complex) = Complex((~GVar)(x.re), (~GVar)(x.im))

Base.copy(b::GVar) = GVar(b.x, copy(b.g))
Base.zero(x::GVar) = GVar(Base.zero(x.x), Base.zero(x.g))
Base.zero(::Type{<:GVar{T,GT}}) where {T,GT} = GVar(zero(T), zero(GT))
Base.one(x::GVar) = GVar(Base.one(x.x), Base.zero(x.g))
Base.one(::Type{<:GVar{T}}) where T = GVar(one(T))
Base.adjoint(b::GVar) = GVar(b.x', b.g')
Base.:-(b::GVar) = GVar(-b.x, -b.g)
Base.isapprox(x::GVar, y::GVar; kwargs...) = isapprox(x.x, y.x; kwargs...) && isapprox(x.g, y.g; kwargs...)

# define kernel and field views
"""
    grad(var)

Get the gradient field of `var`.
"""
@fieldview grad(gv::GVar) = gv.g
@fieldview value(gv::GVar) = gv.x
# TODO: fix the problem causing this patch, the field type can not change?!
chfield(x::GVar, ::typeof(value), xval::GVar) = GVar(xval, x.g)

@generated function grad(x::T) where T
    isprimitivetype(T) && throw("not supported type to obtain gradients: $T.")
    Expr(:new, typegrad(T), [:(grad(x.$NAME)) for NAME in fieldnames(T)]...)
end
typegrad(x) = x
@generated function typegrad(x::Type{T}) where T
    if isprimitivetype(T)
        T
    else
        ps = typegrad.(T.parameters)
        if length(ps) == 0
            :($(getfield(T.name.module, nameof(T))))
        else
            :($(getfield(T.name.module, nameof(T))){$(ps...)})
        end
    end
end
typegrad(::Type{GVar{ET,GT}}) where {ET,GT} = ET
grad(gv::T) where T<:Real = zero(T)
grad(gv::AbstractArray{T}) where T = grad.(gv)
grad(gv::Function) = 0
grad(gv::String) = ""
grad(t::Tuple) = grad.(t)
chfield(x::T, ::typeof(grad), g::T) where T = (@invcheck g zero(g); x)
chfield(x::GVar, ::typeof(grad), g::GVar) = GVar(x.x, g)
#chfield(x::GVar, ::typeof(-), val::GVar) = GVar(-val.x, -val.g)
chfield(x::Complex{<:GVar}, ::typeof(grad), g::Complex) = Complex(GVar(value(x.re), g.re), GVar(value(x.im), g.im))

# NOTE: superwarning: check value only to make ancilla gradient descardable.
NiLangCore.deanc(x::GVar{T}, val::GVar{T}) where T = NiLangCore.deanc(value(x), value(val))
function deanc(x::T, val::T) where {T<:AbstractArray}
   x === val || deanc.(x, val)
end

# constructors and deconstructors
Base.iszero(x::GVar) = iszero(x.x)

## variable mapping
function (_::Type{Inv{GVar}})(x::GVar{<:GVar,<:GVar})
    Partial{:x}(x)
end

Base.show(io::IO, gv::GVar) = print(io, "GVar($(gv.x), $(gv.g))")
Base.show(io::IO, ::MIME"plain/text", gv::GVar) = Base.show(io, gv)

# used in log number iszero function.
Base.isfinite(x::GVar) = isfinite(x.x)
# interfaces

_replace_opmx_callable(ex) = @match ex begin
    :(:+=($f)) => :(PlusEq($f))
    :(:-=($f)) => :(MinusEq($f))
    :(:*=($f)) => :(MulEq($f))
    :(:/=($f)) => :(DivEq($f))
    :(:⊻=($f)) => :(XorEq($f))
    _ => ex
end

"""
    @nograd f(args...)

Mark `f(args...)` as having no gradients.
"""
macro nograd(ex)
    @match ex begin
        :($f($(args...))) => begin
            f2 = _replace_opmx_callable(f)
            newargs = []
            for arg in args
                push!(newargs, @match arg begin
                    :($x::GVar) => :($x.x)
                    :($x::VecGVar) => :($x.x)
                    :($x::GVar{$tp}) => :($x.x)
                    _ => NiLangCore.get_argname(arg)
                end
                )
            end
            esc(quote
                @i function $f($(args...))
                    $f2($(newargs...))
                end
            end)
        end
        _ => error("expect `f(args...)`, got $ex")
    end
end

# ULogarithmic
_content(x::ULogarithmic) = x.log
NiLang.AD.GVar(x::ULogarithmic) = exp(ULogarithmic, GVar(_content(x), zero(_content(x))))
(_::Type{Inv{GVar}})(x::ULogarithmic{GVar{TE}}) where TE = exp(ULogarithmic{TE}, (~GVar)(_content(x)))

Base.one(x::ULogarithmic{GVar{T,GT}}) where {T, GT} = one(ULogarithmic{GVar{T,GT}})
Base.one(::Type{ULogarithmic{GVar{T,GT}}}) where {T,GT} = exp(ULogarithmic, GVar(zero(T), zero(GT)))
Base.zero(x::ULogarithmic{GVar{T,GT}}) where {T,GT} =zero(ULogarithmic{GVar{T,GT}})
Base.zero(::Type{ULogarithmic{GVar{T,T}}}) where T = exp(ULogarithmic, GVar(zero(T), zero(T)))

# the patch for dicts
function GVar(d::Dict)
    Dict([(k=>GVar(v)) for (k, v) in d])
end

function (_::Type{Inv{GVar}})(d::Dict)
    Dict([(k=>(~GVar)(v)) for (k, v) in d])
end

function grad(d::Dict)
    Dict([(k=>grad(v)) for (k, v) in d])
end


================================================
FILE: src/complex.jl
================================================
export CONJ
NiLangCore.chfield(x::Complex, ::typeof(real), r) = chfield(x, Val{:re}(), r)
NiLangCore.chfield(x::Complex, ::typeof(imag), r) = chfield(x, Val{:im}(), r)

@i @inline function NEG(y!::Complex)
    NEG(y!.re)
    NEG(y!.im)
end

@i @inline function CONJ(y!::Complex{T}) where T
    NEG(y!.im)
end

@i @inline function :(+=)(angle)(r!::Real, x::Complex)
    r! += atan(x.im, x.re)
end

@i @inline function :(+=)(identity)(y!::Complex, a::Complex)
    y!.re += a.re
    y!.im += a.im
end

@inline function SWAP(a!::Complex, b!::Complex)
    b!, a!
end

@i @inline function :(+=)(abs2)(y!::Real, a::Complex)
    y! += a.re^2
    y! += a.im^2
end

@i @inline function :(+=)(abs)(y!::Real, a::Complex)
    @routine @invcheckoff begin
        y2 ← zero(y!)
        y2 += abs2(a)
    end
    y! += sqrt(y2)
    ~@routine
end

@i @inline function :(+=)(*)(y!::Complex{T}, a::Complex, b::Complex) where T
    @routine @invcheckoff begin
        @zeros T rere imim reim imre
        rere += a.re * b.re
        imim += a.im * b.im
        reim += a.re * b.im
        imre += a.im * b.re
    end
    y!.re += rere - imim
    y!.im += reim + imre
    ~@routine
end

@i @inline function :(+=)(*)(y!::Complex, a::Real, b::Complex)
    y!.re += a * b.re
    y!.im += a * b.im
end

@i @inline function :(+=)(*)(y!::Complex, a::Complex, b::Real)
    y!.re += a.re * b
    y!.im += a.im * b
end

for OP in [:+, :-]
    @eval @i @inline function :(+=)($OP)(y!::Complex, a::Complex, b::Complex)
        y!.re += $OP(a.re, b.re)
        y!.im += $OP(a.im, b.im)
    end

    @eval @i @inline function :(+=)($OP)(y!::Complex, a::Complex, b::Real)
        y!.re += $OP(a.re, b)
    end

    @eval @i @inline function :(+=)($OP)(y!::Complex, a::Real, b::Complex)
        y!.re += $OP(a, b.re)
    end
end

@i @inline function :(+=)(/)(y!::Complex, a::Complex, b::Complex{T}) where T
    @routine @invcheckoff begin
        b2 ← zero(T)
        ab ← zero(y!)
        b2 += abs2(b)
        CONJ(b)
        ab += a * b
    end
    y! += ab / b2
    ~@routine
end

@i @inline function :(+=)(/)(y!::Complex, a::Complex, b::Real)
    y!.re += a.re / b
    y!.im += a.im / b
end

@i @inline function :(+=)(/)(y!::Complex, a::Real, b::Complex{T}) where T
    @routine @invcheckoff begin
        b2 ← zero(T)
        ab ← zero(y!)
        b2 += abs2(b)
        CONJ(b)
        ab += a * b
    end
    y! += ab / b2
    ~@routine
end

@i @inline function :(+=)(inv)(y!::Complex, b::Complex{T}) where T
    @routine @invcheckoff begin
        b2 ← zero(real(T))
        b2 += abs2(b)
    end
    y! += b' / b2
    ~@routine
end

@i @inline function :(+=)(exp)(y!::Complex, x::Complex{T}) where T
    @routine @invcheckoff begin
        @zeros T s c expn
        z ← zero(y!)
        (s, c) += sincos(x.im)
        SWAP(z.re, c)
        SWAP(z.im, s)
        expn += exp(x.re)
    end
    y! += expn * z
    ~@routine
end

@i @inline function :(+=)(log)(y!::Complex, x::Complex{T}) where T
    @routine @invcheckoff begin
        n ← zero(T)
        n += abs(x)
    end
    y!.re += log(n)
    y!.im += angle(x)
    ~@routine
end

@i @inline function :(+=)(^)(y!::Complex, a::Complex{T}, b::Real) where T
    @routine @invcheckoff begin
        @zeros T r θ s c absy bθ
        r += abs(a)
        θ += angle(a)
        bθ += θ * b
        (s, c) += sincos(bθ)
        absy += r ^ b
    end
    y!.re += absy * c
    y!.im += absy * s
    ~@routine
end

@i @inline function :(+=)(complex)(y!::Complex, a::Real, b::Real)
    y!.re += a
    y!.im += b
end

for OP in [:*, :/, :+, :-, :^]
    @eval @i @inline function :(+=)($OP)(y!::Complex, a::Real, b::Real)
        y!.re += $OP(a, b)
    end
end

for OP in [:identity, :cos, :sin, :log, :exp]
    @eval @i @inline function :(+=)($OP)(y!::Complex, a::Real)
        y!.re += $OP(a)
    end
end

@i @inline function HADAMARD(x::Complex, y::Complex)
    HADAMARD(x.re, y.re)
    HADAMARD(x.im, y.im)
end


================================================
FILE: src/deprecations.jl
================================================
@deprecate simple_hessian hessian_backback
@deprecate hessian_repeat hessian_backback
@deprecate ngradient gradient_numeric
@deprecate nhessian hessian_numeric
@deprecate NEG Base.:-
@deprecate ipush! PUSH!
@deprecate ipop! POP!


================================================
FILE: src/instructs.jl
================================================
export SWAP, FLIP
export ROT, IROT
export INC, DEC, NEG, INV, AddConst, SubConst
export HADAMARD
export PUSH!, POP!, COPYPOP!, COPYPUSH!

"""
    NoGrad{T} <: IWrapper{T}
    NoGrad(x)

A `NoGrad(x)` is equivalent to `GVar^{-1}(x)`, which cancels the `GVar` wrapper.
"""
struct NoGrad{T} <: IWrapper{T}
    x::T
end
NoGrad(x::NoGrad{T}) where T = x # to avoid ambiguity error
NoGrad{T}(x::NoGrad{T}) where T = x # to avoid ambiguity error
(_::Type{Inv{NoGrad}})(x) = x.x
@fieldview value(x::NoGrad) = x.x

const NullType{T} = Union{NoGrad{T}, Partial{T}}

NEG(a!) = -(a!)
@selfdual NEG
@selfdual -

INV(a!) = inv(a!)
@selfdual INV

@inline FLIP(b::Bool) = !b
@selfdual FLIP

"""
    INC(a!) -> a! + 1
"""
@inline function INC(a!::Number)
    a! + one(a!)
end

"""
    DEC(a!) -> a! - 1
"""
@inline function DEC(a!::Number)
    a! - one(a!)
end
@dual INC DEC


"""
    SWAP(a!, b!) -> b!, a!
"""
@inline function SWAP(a!::T, b!::T) where T
    b!, a!
end
@selfdual SWAP

"""
    ROT(a!, b!, θ) -> a!', b!', θ

```math
\\begin{align}
    {\\rm ROT}(a!, b!, \\theta)  = \\begin{bmatrix}
        \\cos(\\theta) & - \\sin(\\theta)\\\\
        \\sin(\\theta)  & \\cos(\\theta)
    \\end{bmatrix}
    \\begin{bmatrix}
        a!\\\\
        b!
    \\end{bmatrix},
\\end{align}
```
"""
@inline function ROT(i::Real, j::Real, θ::Real)
    a, b = rot(i, j, θ)
    a, b, θ
end

"""
    IROT(a!, b!, θ) -> ROT(a!, b!, -θ)
"""
@inline function IROT(i::Real, j::Real, θ::Real)
    i, j, _ = ROT(i, j, -θ)
    i, j, θ
end
@dual ROT IROT

"""
    HADAMARD(x::Real, y::Real)

Hadamard transformation that returns `(x + y)/√2, (x - y)/√2`
"""
function HADAMARD(x::Real, y::Real)
    sqrt(0.5) * (x + y), sqrt(0.5) * (x - y)
end

@selfdual HADAMARD

# more data views
for (DT, OP, NOP) in [(:AddConst, :+, :-), (:SubConst, :-, :+)]
    @eval struct $DT{T}
        x::T
    end

    @eval function (f::$DT)(y::Real)
        $OP(y, f.x)
    end

    @eval NiLangCore.chfield(x::T, ac::$DT, xval::T) where T<:Real = $NOP(xval, ac.x)
end

for F1 in [:(Base.:-), :NEG, :(ac::AddConst), :(sc::SubConst)]
    @eval @inline function $F1(a!::NullType)
        @instr $F1(a! |> value)
        a!
    end
end

for (OP, F, f) in [(:(PlusEq{typeof(identity)}), :(PlusEq(identity)), :+), (:(MinusEq{typeof(identity)}), :(MinusEq(identity)), :-)]
    @eval @inline @generated function (::$OP)(x::T, y::T) where T
        if isprimitivetype(T)
            Expr(:tuple, Expr(:call, $f, :x, :y), :y)
        else
            res = gensym("results")
            computes = Any[:($($F)(x.$field, y.$field)) for field in fieldnames(T)]
            comp = Expr(:(=), res, Expr(:tuple, computes...))
            res1 = Expr(:new, T, [:($res[$i][1]) for i=1:length(computes)]...)
            res2 = Expr(:new, T, [:($res[$i][2]) for i=1:length(computes)]...)
            quote
                $comp
                ($res1, $res2)
            end
        end
    end
    @eval (f::$OP)(x::T, y::T) where T<:Tuple = invoke(f, Tuple{T,T} where T, x, y)
    @eval (f::$OP)(x::T, y::T) where T<:Real = $f(x, y), y
end

for F2 in [:SWAP, :HADAMARD, :((inf::PlusEq)), :((inf::MinusEq)), :((inf::XorEq))]
    @eval @inline function $F2(a::NullType, b::Real)
        @instr $(NiLangCore.get_argname(F2))(a |> value, b)
        a, b
    end
    @eval @inline function $F2(a::NullType, b::NullType)
        @instr $(NiLangCore.get_argname(F2))(a |> value, b |> value)
        a, b
    end
    @eval @inline function $F2(a::Real, b::NullType)
        @instr $(NiLangCore.get_argname(F2))(a, b |> value)
        a, b
    end
end

function type_except(::Type{TT}, ::Type{T2}) where {TT, T2}
    N = length(TT.parameters)
    setdiff(Base.Iterators.product(zip(TT.parameters, repeat([T2], N))...), [ntuple(x->T2, N)])
end

for F3 in [:ROT, :IROT, :((inf::PlusEq)), :((inf::MinusEq)), :((inf::XorEq))]
    PS = (:a, :b, :c)
    for PTS in type_except(Tuple{NullType, NullType, NullType}, Real)
        params = map((P,PT)->PT <: NullType ? :($P |> value) : P, PS, PTS)
        params_ts = map((P,PT)->:($P::$PT), PS, PTS)
        @eval @inline function $F3($(params_ts...))
            @instr $F3($(params...))
            ($(PS...),)
        end
    end
end

# patch for fixed point numbers
function (f::PlusEq{typeof(/)})(out!::T, x::Integer, y::Integer) where T<:Fixed
    out!+T(x)/y, x, y
end

function (f::MinusEq{typeof(/)})(out!::T, x::Integer, y::Integer) where T<:Fixed
    out!-T(x)/y, x, y
end

for F in [:exp, :log, :sin, :sinh, :asin, :cos, :cosh, :acos, :tan, :tanh, :atan]
    @eval Base.$F(x::Fixed43) = Fixed43($F(Float64(x)))
    @eval (f::PlusEq{typeof($F)})(out!::Fixed43, x::Real) = out! + Fixed43($F(x)), x
    @eval (f::MinusEq{typeof($F)})(out!::Fixed43, x::Real) = out! - Fixed43($F(x)), x
end

Base.:^(x::Integer, y::Fixed43) = Fixed43(x^(Float64(y)))
Base.:^(x::Fixed43, y::Fixed43) = Fixed43(x^(Float64(y)))
Base.:^(x::T, y::Fixed43) where T<:AbstractFloat = x^(T(y))

function (::PlusEq{typeof(convert)})(out!::T, y) where T<:Real
    out! + convert(T, y), y
end

function (::MinusEq{typeof(convert)})(out!::T, y) where T<:Real
    out! - convert(T, y), y
end

Base.:~(ac::AddConst) = SubConst(ac.x)
Base.:~(ac::SubConst) = AddConst(ac.x)
@dualtype AddConst SubConst

for F in [:INV, :NEG, :FLIP, :INC, :DEC]
    @eval NiLangCore.chfield(x::T, ::typeof($F), xval::T) where T<:Real = (~$F)(xval)
end

#### The following functions are not safe!
@i @inline function PUSH!(x::T) where T
    PUSH!((@skip! GLOBAL_STACK), x)
end

@i @inline function POP!(x::T) where T
    POP!((@skip! GLOBAL_STACK), x)
end

@i @inline function COPYPUSH!(x)
    COPYPUSH!((@skip! GLOBAL_STACK), x)
end

@i @inline function COPYPOP!(x)
    COPYPOP!((@skip! GLOBAL_STACK), x)
end

# reversibility turned off, in principle, we can not deallocate `GVar{T}` to `T`
@i @inline function PUSH!(st, x::T) where T
    @invcheckoff st[end+1] ↔ x
    @invcheckoff x ← _zero(T)
end

@i @inline function POP!(st, x::T) where T
    @invcheckoff x → _zero(T)
    @invcheckoff st[end] ↔ (x::T)::∅
end

@i @inline function COPYPUSH!(st, x)
    @invcheckoff st[end+1] ← x
end

@i @inline function COPYPOP!(st, x)
    @invcheckoff st[end] → x
end

# accumulation on arrays: initially for Bennett algorithm
# TODO: also define it for composite types. or maybe a macro for it.
@i function :(+=)(identity)(target::AbstractArray, source::AbstractArray)
    @safe @assert length(target) == length(source)
    @inbounds for i=1:length(target)
        target[i] += source[i]
    end
end


================================================
FILE: src/macros.jl
================================================
using MLStyle, NiLang
export alloc, @auto_alloc, @auto_expand

"""
    alloc(f, args...)

allocate function output space (the first argument), where `args` only contains the last `N-1` arguments.
"""
function alloc end

macro auto_alloc(ex)
    esc(auto_alloc(ex))
end

function auto_alloc(ex)
    @match ex begin
        :($f($out, $(args...))) => begin
            Expr(:block, :($out ← $alloc($f, $(args...))), ex)
        end
        :($out = $f($(args...))) => begin
            if length(args) == 0
                error("number of arguments must be >= 1.")
            else
                Expr(:block, :($out ← $alloc($f, $(args...))), :($out += $f($(args...))))
            end
        end
        _ => error("can not allocate automatically for expression: `$ex`")
    end
end

for OPM in [:PlusEq, :MinusEq]
    for OP in [:+, :-, :*, :/, :^]
        @eval alloc(::$OPM{typeof($OP)}, x::T1, ::T2) where {T1<:Number,T2<:Number} = zero(promote_type(T1, T2))
    end
    for OP in [:sin, :cos, :tan, :asin, :atan, :acos, :sinh, :cosh, :tanh, :identity, :sqrt, :exp, :log]
        @eval alloc(::$OPM{typeof($OP)}, x::T) where T<:Number = zero(T)
    end
    for OP in [:abs, :abs2]
        @eval alloc(::$OPM{typeof($OP)}, x::T) where T<:Number = zero(real(T))
    end
    @eval alloc(::$OPM{typeof(sincos)}, x::T) where T<:Number = (zero(T), zero(T))
end

function auto_expand(ex)
    res = Expr[]
    auto_expand!(copy(ex), res)
    Expr(:block, res..., NiLangCore.dual_body(@__MODULE__, res[1:end-1])...)
end

function auto_expand!(ex, exprs, sym=nothing, addnew=true)
    @match ex begin
        :($f($(args...))) => begin
            for (i, arg) in enumerate(args)
                @match arg begin
                    :($_{$(_...)}($(_...))) => begin
                        auto_expand!(arg, exprs, nothing, false)
                    end
                    :($f2($(vs...))) => begin
                        sym2 = gensym()
                        auto_expand!(:(PlusEq($f2)($sym2, $(vs...))), exprs, sym2, true)
                        args[i] = sym2
                    end
                    _ => nothing
                end
            end
            if sym !== nothing
                push!(exprs, :($sym ← $alloc($f, $(args[2:end]...))))
            end
            if addnew
                push!(exprs, :($f($(args...))))
            end
        end
        :($a += $b) || :($a -= $b) || :($a *= $b) || :($a /= $b) || :($a ⊻= $b) => begin
            auto_expand!(NiLangCore.to_standard_format(ex), exprs, sym, addnew)
        end
        _ => error("Can only expand an expression like `f(args...)`, got $(ex)!")
    end
end

macro auto_expand(ex)
    esc(auto_expand(ex))
end


================================================
FILE: src/stdlib/base.jl
================================================
export i_sqdistance, i_dirtymul, i_factorial

"""
    i_sqdistance(dist!, x1, x2)

Squared distance between two points `x1` and `x2`.
"""
@i function i_sqdistance(dist!, x1::AbstractVector{T}, x2::AbstractVector) where T
    @inbounds for i=1:length(x1)
        x1[i] -= x2[i]
        dist! += x1[i] ^ 2
        x1[i] += x2[i]
    end
end

"""
    i_dirtymul(out!, x, anc!)

"dirty" reversible multiplication that computes `out! *= x` approximately for floating point numbers,
the `anc!` is anticipated as a number ~0.
"""
@i @inline function i_dirtymul(out!, x, anc!)
    anc! += out! * x
    out! -= anc! / x
    SWAP(out!, anc!)
end

@i @inline function i_dirtymul(out!::Int, x::Int, anc!::Int)
    anc! += out! * x
    out! -= anc! ÷ x
    SWAP(out!, anc!)
end

"""
    i_factorial(out!, n)

Compute the factorial `out! = factorial(n)`.
"""
@i function i_factorial(out!::Int, n::Int)
    INC(out!)
    @invcheckoff for i=1:n
        i_dirtymul(out!, i, 0)
    end
end


================================================
FILE: src/stdlib/bennett.jl
================================================
export bennett, bennett!

function direct_emulate(step, x0::T, args...; N::Int, kwargs...) where T
    xpre = copy(x0)
    local x
    for i=1:N
        x = _zero(xpre)
        res = step(x, xpre, args...; kwargs...)
        xpre = res[1]
        args = res[3:end]
    end
    return xpre
end

struct BennettLog
    fcalls::Vector{NTuple{3,Any}}  # depth, function index f_i := s_{i-1} -> s_{i}, length should be `(2k-1)^n` and function
    peak_mem::Base.RefValue{Int}  # should be `n*(k-1)+2`
    depth::Base.RefValue{Int}
end
BennettLog() = BennettLog(NTuple{3,Any}[], Ref(0), Ref(0))

# hacking the reversible program
function logfcall(l::BennettLog, i, f)
    push!(l.fcalls, (l.depth[], i, f))
    l, i, f
end
function ilogfcall(l::BennettLog, i, f)
    push!(l.fcalls, (l.depth[], i, ~f))
    l, i, f
end

@dual logfcall ilogfcall

Base.show(io::IO, ::MIME"text/plain", logger::BennettLog) = Base.show(io, logger)
function Base.show(io::IO, logger::BennettLog)
    nreverse = count(x->x[3] isa Inv, logger.fcalls)
    print(io, """Bennett log
| peak memory usage = $(logger.peak_mem[])
| number of function forward/backward calls = $(length(logger.fcalls)-nreverse)/$nreverse""")
end

"""
    bennett(step, y, x, args...; k, N, logger=BennettLog(), kwargs...)

* `step` is a reversible step function,
* `y` is the output state,
* `x` is the input state,
* `k` is the number of steps in each Bennett's recursion,
* `N` is the total number of steps,
* `logger=BennettLog()` is the logging of Bennett's algorithm,
* `args...` and `kwargs...` are additional arguments for steps.
"""
@i function bennett(step, y::T, x::T, args...; k::Int, N::Int, logger=BennettLog(), kwargs...) where T
    state ← Dict{Int, T}()
    state[1] ← _zero(x)
    state[1] +=  x
    bennett!((@skip! step), state, k, 1, N, args...; do_uncomputing=true, logger=logger, kwargs...)
    SWAP(y, state[N+1])
    state[1] -= x
    state[1] → _zero(x)
    state[N+1] → _zero(x)
    state → Dict{Int, T}()
end

"""
    bennett!(step, state::Dict, args...; k, N, logger=BennettLog(), do_uncomputing=false, kwargs...)

* `step` is a reversible step function,
* `state` is the dictionary state, with `state[1]` the input state, the return value is stored in `state[N+1]`,
* `k` is the number of steps in each Bennett's recursion,
* `N` is the total number of steps,
* `logger=BennettLog()` is the logging of Bennett's algorithm,
* `args...` and `kwargs...` are additional arguments for steps.
"""
@i function bennett!(step, state::Dict{Int,T}, args...; k::Int, N::Int, logger=BennettLog(), do_uncomputing=false, kwargs...) where T
    bennett!(step, state, k, 1, N, args...; logger=logger, do_uncomputing=do_uncomputing, kwargs...)
end

@i function bennett!(step, state::Dict{Int,T}, k::Int, base, len, args...; logger, do_uncomputing, kwargs...) where T
    @safe logger !== nothing && (logger.depth[] += 1)
    @invcheckoff if len == 1
        state[base+1] ← _zero(state[base])
        @safe logger !== nothing && (logger.peak_mem[] = max(logger.peak_mem[], length(state)))
        getf(step, base)(state[base+1], state[base], args...; kwargs...)
        if logger !== nothing
            logfcall(logger, (@const base+1), (@const getf(step, base)))
        end
    else
        @routine begin
            @zeros Int nstep n
            n += ceil((@skip! Int), (@const len / k))
            nstep += ceil((@skip! Int), (@const len / n))
        end
        for j=1:nstep
            bennett!(step, state, k, (@const base+n*(j-1)), (@const min(n,len-n*(j-1))), args...; logger=logger, do_uncomputing=true, kwargs...)
        end
        if do_uncomputing
            for j=nstep-1:-1:1
                ~bennett!(step, state, k, (@const base+n*(j-1)), n, args...; logger=logger, do_uncomputing=true, kwargs...)
            end
        end
        ~@routine
    end
end

getf(f, i::Int) = f
getf(f::AbstractArray, i::Int) = f[i]


================================================
FILE: src/stdlib/blas.jl
================================================
export i_sum, i_mul!, i_dot, i_axpy!, i_umm!, i_norm2

"""
    i_sum(out!, x)

get the sum of `x`.
"""
@i function i_sum(out!, x::AbstractArray)
	@invcheckoff for i=1:length(x)
		@inbounds out! += x[i]
	end
end

@i function i_sum(out!, f, x::AbstractArray)
	@invcheckoff for i=1:length(x)
		@inbounds out! += f(x[i])
	end
end

"""
    i_mul!(out!, x, y)

compute `x * y` (`x` and `y` are matrices, and store results in `out!`.
"""
@i function i_mul!(out!::AbstractMatrix{T}, x::AbstractMatrix{T}, y::AbstractMatrix{T}) where T
	@safe size(x, 2) == size(y, 1) || throw(DimensionMismatch())
	@invcheckoff @inbounds for k=1:size(y,2)
	    for j=1:size(x,2)
		    for i=1:size(x,1)
				out![i,k] += x[i,j] * y[j,k]
			end
		end
	end
end

@i function i_mul!(out!::AbstractVector{T}, x::AbstractMatrix, y::AbstractVector) where T
	@safe size(x, 2) == size(y, 1) || throw(DimensionMismatch())
	@invcheckoff @inbounds for j=1:size(x,2)
        @routine begin
            yj ← zero(T)
            yj += y[j]
        end
		for i=1:size(x,1)
			out![i] += x[i,j] * yj
		end
        ~@routine
	end
end

@i function i_dot(out!, x, y)
    @safe @assert length(x) == length(y)
    @invcheckoff @inbounds for i=1:length(x)
        out! += x[i]' * y[i]
    end
end

"""
    i_norm2(out!, x)

get the squared norm of `x`.
"""
@i function i_norm2(out!, x)
    @invcheckoff @inbounds for i=1:length(x)
        out! += abs2(x[i])
    end
end

"""
    i_axpy!(a, x, y!)

compute `y! += a * x`, where `x` and `y` are vectors.
"""
@i function i_axpy!(a, X, Y)
    @safe @assert length(X) == length(Y)
    @invcheckoff @inbounds for i=1:length(Y)
        Y[i] += a * X[i]
    end
end

"""
    i_umm!(x!, θ)

Compute unitary matrix multiplication on `x`, where the unitary matrix is parameterized by (N+1)*N/2 `θ`s.
"""
@i function i_umm!(x!::AbstractArray, θ)
    @routine begin
        M ← size(x!, 1)
        N ← size(x!, 2)
    end
    k ← 0
    @safe @assert length(θ) == M*(M-1)/2
    for l = 1:N
        for j=1:M
            for i=M-1:-1:j
                INC(k)
                ROT(x![i,l], x![i+1,l], θ[k])
            end
        end
    end

    k → length(θ)
    ~@routine
end


================================================
FILE: src/stdlib/linalg.jl
================================================
export i_inv!, i_affine!

"""
    i_inv!(out!, A)

Get the inverse of `A`.

```note!!!
this function is implemented as a primitive.
```
"""
@i function i_inv!(out!::AbstractMatrix{T}, A::AbstractMatrix{T}) where T
    @invcheckoff invA ← inv(A)
    out! .+= invA
    @invcheckoff invA → inv(A)
end

@i function i_inv!(out!::AbstractMatrix{T}, A::AbstractMatrix{T}) where T<:GVar
    @routine @invcheckoff begin
        invA ← inv(value.(A))
        gA ← -transpose(invA) * grad(out!) * transpose(invA)
    end
    for i=1:length(out!)
        (out![i] |> value) -= invA[i]
    end
    for i=1:length(A)
        (A[i] |> grad) -= gA[i]
    end
    ~@routine
end

@i function :(-=)(det)(out!::T, A::AbstractMatrix{T}) where T<:GVar
    @routine @invcheckoff begin
        vA ← value.(A)
        detA ← det(vA)
        gA ← detA * grad(out!) * transpose(inv(vA))
    end
    (out! |> value) -= detA
    for i=1:length(A)
        (A[i] |> grad) += gA[i]
    end
    ~@routine
end

@i function :(-=)(logdet)(out!::T, A::AbstractMatrix{T}) where T<:GVar
    @routine @invcheckoff begin
        gA ← grad(out!) * transpose(inv(value.(A)))
    end
    (out! |> value) -= det(A |> grad)
    for i=1:length(A)
        (A[i] |> grad) += gA[i]
    end
    ~@routine
end

"""
    i_affine!(y!, W, b, x)

`affine!` transformation `y! += W*x + b`.
"""
@i function i_affine!(y!::AbstractVector{T}, W::AbstractMatrix{T}, b::AbstractVector{T}, x::AbstractVector{T}) where T
    @safe @assert size(W) == (length(y!), length(x)) && length(b) == length(y!)
    @invcheckoff for j=1:size(W, 2)
        for i=1:size(W, 1)
            @inbounds y![i] += W[i,j]*x[j]
        end
    end
    @invcheckoff for i=1:size(W, 1)
        @inbounds y![i] += b[i]
    end
end


================================================
FILE: src/stdlib/mapreduce.jl
================================================
export i_mapfoldl, i_filter!, i_map!

"""
    i_mapfoldl(map, fold, out!, iter)

Reversible `mapfoldl` function, `map` can be irreversible, but `fold` should be reversible.
"""
@i function i_mapfoldl(map, fold, out!::T, iter) where T
    anc ← zero(T)
    for i=1:length(iter)
        anc += map(iter[i])
        fold(out!, anc)
        anc -= map(iter[i])
    end
    anc → zero(T)
end

"""
    i_filter!(f, out!, iter)

Reversible `filter` function, `out!` is an emptied vector.
"""
@i function i_filter!(f, out!::AbstractVector, x::AbstractVector{T}) where T
    @invcheckoff @inbounds for i = 1:length(x)
        if (f(x[i]), ~)
            COPYPUSH!(out!, x[i])
        end
    end
end


================================================
FILE: src/stdlib/nnlib.jl
================================================
export i_softmax_crossentropy, i_relu, i_logsumexp

function (_::PlusEq{typeof(argmax)})(out!, x::AbstractArray)
    out! += argmax(x)
    out!, x
end

function (_::MinusEq{typeof(argmax)})(out!, x::AbstractArray)
    out! -= argmax(x)
    out!, x
end


"""
    i_softmax_crossentropy(x, p, imax, xmax, Z, out)

Softmax-Cross entropy function.
"""
@i function i_softmax_crossentropy(x, p, imax, xmax, Z, out::T) where T
    # subtract maximum
    imax += argmax(x)  # trade off space of xmax to time
    xmax += x[imax]
    # accumulate exp(x) to Z, and finally get logZ
    for i=1:length(x)
        x[i] -= xmax
        Z += Base.exp(x[i])
    end
    @routine begin
        yi ← zero(T)
        logZ ← zero(T)
        logZ += log(Z)
    end
    for i=1:length(x)
        yi += logZ
        yi -= x[i]
        out += yi * p[i]
        yi += x[i]
        yi -= logZ
    end
    ~@routine
end

"""
    i_relu(out!, x)

ReLU in machine learning.
"""
@i function i_relu(out!, x)
    @invcheckoff if (x > 0, ~)
        out! += x
    end
end

"""
    i_logsumexp(logout!, out!, xs!, inds!, x)

Compute `logout! = log(sum(exp(x)))`.

# Arguments

    * `out!`, output,
    * `logout!`, logged output,
    * `xs!`, an empty vector to cache the ascending values (same type as `x`),
    * `inds!`, an empty vector to cache the ascending indices (integer type),
    * `x`, input vector.
"""
@i function i_logsumexp(logout!, out!, xs!, inds!, x::AbstractArray{T}) where T
  	i_ascending!(xs!, inds!, x)
    @routine begin
        mx ← zero(T)
        mx += xs![end]
    end
	@invcheckoff @inbounds for i=1:length(x)
		x[i] -= mx
		out! += exp(x[i])
		x[i] += mx
	end
  	logout! += log(out!)
	logout! += mx
    ~@routine
end


================================================
FILE: src/stdlib/sorting.jl
================================================
export i_ascending!

"""
	i_ascending!(xs!, inds!, arr)

Find the ascending sequence in `arr` and store the results into `xs!`, indices are stored in `inds!`.
This function can be used to get the maximum value and maximum indices.
"""
@i function i_ascending!(xs!::AbstractVector{T}, inds!, arr::AbstractArray{T}) where T
	@invcheckoff if (length(arr) > 0, ~)
		y ← zero(T)
		y += arr[1]
		xs![end+1] ↔ y
		anc ← 1
		inds![end+1] ↔ anc
		@inbounds for i = 2:length(arr)
			if (arr[i] > xs![end], i==inds![end])
				ind ← i
				x ← zero(T)
				x += arr[i]
				xs![end+1] ↔ x
				inds![end+1] ↔ ind
			end
		end
	end
end


================================================
FILE: src/stdlib/sparse.jl
================================================
using SparseArrays

@i function i_mul!(C::StridedVecOrMat, A::AbstractSparseMatrix, B::StridedVector{T}, α::Number, β::Number) where T
    @safe size(A, 2) == size(B, 1) || throw(DimensionMismatch())
    @safe size(A, 1) == size(C, 1) || throw(DimensionMismatch())
    @safe size(B, 2) == size(C, 2) || throw(DimensionMismatch())
    @routine begin
        nzv ← nonzeros(A)
        rv ← rowvals(A)
    end
    if (β != 1, ~)
        @safe error("only β = 1 is supported, got β = $(β).")
    end
    # Here, we close the reversibility check inside the loop to increase performance
    @invcheckoff for k = 1:size(C, 2)
        @inbounds for col = 1:size(A, 2)
            @routine begin
                αxj ← zero(T)
                αxj += B[col,k] * α
            end
            for j = SparseArrays.getcolptr(A)[col]:(SparseArrays.getcolptr(A)[col + 1] - 1)
                C[rv[j], k] += nzv[j]*αxj
            end
            ~@routine
        end
    end
    ~@routine
end


@i function i_dot(r::T, A::SparseMatrixCSC{T},B::SparseMatrixCSC{T}) where {T}
    @routine @invcheckoff begin
        (m, n) ← size(A)
        branch_keeper ← zeros(Bool, 2*m)
    end
    @safe size(B) == (m,n) || throw(DimensionMismatch("matrices must have the same dimensions"))
    @invcheckoff @inbounds for j = 1:n
        @routine begin
            ia1 ← A.colptr[j]
            ib1 ← B.colptr[j]
            ia2 ← A.colptr[j+1]
            ib2 ← B.colptr[j+1]
            ia ← ia1
            ib ← ib1
        end
        @inbounds for i=1:ia2-ia1+ib2-ib1-1
            ra ← A.rowval[ia]
            rb ← B.rowval[ib]
            if (ra == rb, ~)
                r += A.nzval[ia]' * B.nzval[ib]
            end
            ## b move -> true, a move -> false
            branch_keeper[i] ⊻= @const ia == ia2-1 || (ib != ib2-1 && ra > rb)
            ra → A.rowval[ia]
            rb → B.rowval[ib]
            if (branch_keeper[i], ~)
                INC(ib)
            else
                INC(ia)
            end
        end
        ~@inbounds for i=1:ia2-ia1+ib2-ib1-1
            ## b move -> true, a move -> false
            branch_keeper[i] ⊻= @const ia == ia2-1 || (ib != ib2-1 && A.rowval[ia] > B.rowval[ib])
            if (branch_keeper[i], ~)
                INC(ib)
            else
                INC(ia)
            end
        end
        ~@routine
    end
    ~@routine
end


================================================
FILE: src/stdlib/statistics.jl
================================================
export i_mean_sum, i_var_mean_sum, i_normal_logpdf, i_cor_cov
export VarianceInfo

"""
    i_mean_sum(out!, sum!, x)

get the `mean` and `sum` of `x`.
"""
@i function i_mean_sum(out!, sum!, x)
    for i=1:length(x)
        sum! += x[i]
    end
    out! += sum!/(@const length(x))
end

struct VarianceInfo{T}
    variance::T
    variance_accumulated::T
    mean::T
    sum::T
end

function VarianceInfo(::Type{T}) where T
    VarianceInfo(zero(T), zero(T), zero(T), zero(T))
end

"""
    i_var_mean_sum(varinfo, sqv)
    i_var_mean_sum(var!, varsum!, mean!, sum!, v)

Compute the variance, the accumulated variance, mean and sum.
`varinfo` is the `VarianceInfo` object to store outputs.
"""
@i function i_var_mean_sum(varinfo::VarianceInfo{T}, v::AbstractVector{T}) where T
    i_var_mean_sum(varinfo.variance, varinfo.variance_accumulated, varinfo.mean, varinfo.sum, v)
end

@i function i_var_mean_sum(var!, varsum!, mean!, sum!, v::AbstractVector{T}) where T
    i_mean_sum(mean!, sum!, v)
    for i=1:length(v)
        @routine @invcheckoff begin
            x ← zero(T)
            x += v[i] - mean!
        end
        varsum! += x ^ 2
        ~@routine
    end
    var! += varsum! / (@const length(v)-1)
 end

"""
    i_normal_logpdf(out, x, μ, σ)

get the pdf of `Normal(μ, σ)` at point `x`.
"""
@i function i_normal_logpdf(out, x::T, μ, σ) where T
    @routine @invcheckoff begin
        @zeros T anc1 anc2 anc3
        anc1 += x
        anc1 -= μ
        anc2 += anc1 / σ  # (x- μ)/σ
        anc3 += anc2^2 # (x-μ)^2/σ^2
    end

    out -= anc3 * 0.5 # -(x-μ)^2/2σ^2
    out -= log(σ) # -(x-μ)^2/2σ^2 - log(σ)
    out -= log(2π)/2 # -(x-μ)^2/2σ^2 - log(σ) - log(2π)/2

    ~@routine
end

"""
     i_cor_cov(rho!,cov!,a,b)

get Pearson correlation and covariance of two vectors `a` and `b` 

"""
@i function i_cor_cov(rho!::T, cov!::T, a::AbstractVector{T}, b::AbstractVector{T}) where T
    @safe @assert length(a) == length(b)
    @routine  @invcheckoff begin
        @zeros T std1 std2
        info1 ← _zero(VarianceInfo{T})
        i_var_mean_sum(info1, a)
        std1 += sqrt(info1.variance)
        info2 ← _zero(VarianceInfo{T})
        i_var_mean_sum(info2, b)
        std2 += sqrt(info2.variance)
        @zeros T anc5 anc6 anc7
        @inbounds for i=1:length(b)
            @routine begin
                @zeros T anc3 anc4
                anc3 += a[i] - info1.mean
                anc4 += b[i] - info2.mean
            end
            anc5 += anc3 * anc4
            ~@routine
        end
        anc6 += std1 * std2
        anc7 += anc6 * (@const length(b)-1)
    end
    cov! += anc5 / (@const length(b)-1)
    rho! += anc5 / anc7 
    ~@routine
end


================================================
FILE: src/stdlib/stdlib.jl
================================================
using .NiLang.AD
using LinearAlgebra
include("base.jl")
include("blas.jl")
include("linalg.jl")
include("statistics.jl")
include("nnlib.jl")
include("sparse.jl")
include("mapreduce.jl")
include("sorting.jl")
include("bennett.jl")


================================================
FILE: src/ulog.jl
================================================
using LogarithmicNumbers
export gaussian_log, gaussian_nlog
export ULogarithmic

@i @inline function (:*=(identity))(x::ULogarithmic, y::ULogarithmic)
    x.log += y.log
end

@i @inline function (:*=(identity))(x::ULogarithmic, y::Real)
    x.log += log(y)
end

for (OP1, OP2, OP3) in [(:*, :+, :(+=)), (:/, :-, :(-=))]
	@eval @i @inline function (:*=($OP1))(out!::ULogarithmic, x::ULogarithmic, y::ULogarithmic)
	    out!.log += $OP2(x.log, y.log)
	end

	@eval @i @inline function (:*=($OP1))(out!::ULogarithmic, x::Real, y::Real)
	    out!.log += log(x)
		$(Expr(OP3, :(out!.log), :(log(y))))
	end

	@eval @i @inline function (:*=($OP1))(out!::ULogarithmic, x::ULogarithmic, y::Real)
	    out!.log += x.log
		$(Expr(OP3, :(out!.log), :(log(y))))
	end

	@eval @i @inline function (:*=($OP1))(out!::ULogarithmic, x::Real, y::ULogarithmic)
	    out!.log += log(x)
		$(Expr(OP3, :(out!.log), :(y.log)))
	end
end

@i @inline function (:*=(^))(out!::ULogarithmic, x::ULogarithmic, y::Real)
    out!.log += x.log * y
end

gaussian_log(x) = log1p(exp(x))
gaussian_nlog(x) = log1p(-exp(x))

@i function (:*=)(+)(out!::ULogarithmic{T}, x::ULogarithmic{T}, y::ULogarithmic{T}) where {T}
	@invcheckoff if (x.log == y.log, ~)
		out!.log += x.log
		out!.log += log(2)
	elseif (x.log ≥ y.log, ~)
		out!.log += x.log
		y.log -= x.log
		out!.log += gaussian_log(y.log)
		y.log += x.log
	else
		out!.log += y.log
		x.log -= y.log
		out!.log += gaussian_log(x.log)
		x.log += y.log
	end
end

@i function (:*=)(-)(out!::ULogarithmic{T}, x::ULogarithmic{T}, y::ULogarithmic{T}) where {T}
	@safe @assert x.log ≥ y.log
	@invcheckoff if (!iszero(x), ~)
		out!.log += x.log
		y.log -= x.log
		out!.log += gaussian_nlog(y.log)
		y.log += x.log
	end
end

@i function :(*=)(convert)(out!::ULogarithmic{T}, y::ULogarithmic) where T
    out!.log += convert((@skip! T), y.log)
end

@i function :(*=)(convert)(out!::ULogarithmic{T}, y::T) where T<:Real
    out!.log += log(y)
end

function (f::PlusEq)(out!::ULogarithmic{T}, args...) where T
    throw(MethodError(f, (out!, args...)))
end

function (f::MinusEq)(out!::ULogarithmic{T}, args...) where T
    throw(MethodError(f, (out!, args...)))
end

Base.convert(::Type{T}, x::ULogarithmic{T}) where {T<:Fixed} = exp(x.log)

function NiLangCore.deanc(x::T, v::T) where T<:ULogarithmic
    x === v || NiLangCore.deanc(x.log, v.log)
end


================================================
FILE: src/utils.jl
================================================
export rot, plshift, prshift, arshift

"""
    rot(a, b, θ)

rotate variables `a` and `b` by an angle `θ`
"""
function rot(a, b, θ)
    s, c = sincos(θ)
    a*c-b*s, a*s+b*c
end

"""
    plshift(x, n)

periodic left shift.
"""
plshift(x, n) = (x << n) | (x >> (sizeof(x)*8-n))

"""
    plshift(x, n)

periodic right shift.
"""
prshift(x, n) = (x >> n) | (x << (sizeof(x)*8-n))

"""
    arshift(x, n)

right shift, sign extending.
"""
arshift(x::T, n) where T = (x >> n) | (x & (T(1) << (sizeof(x)*8-1)))


================================================
FILE: src/vars.jl
================================================
# variable manipulation
export @zeros, @ones

"""
Create zeros of specific type.

```julia
julia> @i function f(x)
           @zeros Float64 a b c
           # do something
       end
```
"""
macro zeros(T, args...)
    esc(Expr(:block, map(x->:($x ← zero($T)), args)...))
end

macro ones(T, args...)
    esc(Expr(:block, map(x->:($x ← one($T)), args)...))
end

function NiLangCore.chfield(a::AbstractArray, ::typeof(vec), val)
    reshape(val, size(a)...)
end

================================================
FILE: src/wrappers.jl
================================================
export IWrapper, Partial, unwrap, value

"""
    value(x)

Get the `value` from a wrapper instance.
"""
value(x) = x
NiLangCore.chfield(x::T, ::typeof(value), y::T) where T = y

"""
    IWrapper{T} <: Real

IWrapper{T} is a wrapper of for data of type T.
It will forward `>, <, >=, <=, ≈` operations.
"""
abstract type IWrapper{T} <: Real end
NiLangCore.chfield(x, ::Type{T}, v) where {T<:IWrapper} = (~T)(v)
Base.eps(::Type{<:IWrapper{T}}) where T = Base.eps(T)

"""
    unwrap(x)

Unwrap a wrapper instance (recursively) to get the content value.
"""
unwrap(x::IWrapper) = unwrap(value(x))
unwrap(x) = x

for op in [:>, :<, :>=, :<=, :isless, :(==), :≈]
    @eval Base.$op(a::IWrapper, b::IWrapper) = $op(unwrap(a), unwrap(b))
    @eval Base.$op(a::IWrapper, b::Real) = $op(unwrap(a), b)
    @eval Base.$op(a::IWrapper, b::AbstractFloat) = $op(unwrap(a), b)
    @eval Base.$op(a::Real, b::IWrapper) = $op(a, unwrap(b))
    @eval Base.$op(a::AbstractFloat, b::IWrapper) = $op(a, unwrap(b))
end

"""
Partial{FIELD, T, T2} <: IWrapper{T2}

Take a field `FIELD` without dropping information.
This operation can be undone by calling `~Partial{FIELD}`.
"""
struct Partial{FIELD, T, T2} <: IWrapper{T2}
    x::T
    function Partial{FIELD,T,T2}(x::T) where {T,T2,FIELD}
        new{FIELD,T,T2}(x)
    end
    function Partial{FIELD,T,T2}(x::T) where {T<:Complex,T2,FIELD}
        new{FIELD,T,T2}(x)
    end
end
Partial{FIELD}(x::T) where {T,FIELD} = Partial{FIELD,T,typeof(getfield(x,FIELD))}(x)
Partial{FIELD}(x::T) where {T<:Complex,FIELD} = Partial{FIELD,T,typeof(getfield(x,FIELD))}(x)

@generated function (_::Type{Inv{Partial{FIELD}}})(x::Partial{FIELD}) where {FIELD}
    :(x.x)
end

function NiLangCore.chfield(hd::Partial{FIELD}, ::typeof(value), val) where FIELD
    chfield(hd, Val(:x), chfield(hd.x, Val(FIELD), val))
end

@generated function value(hv::Partial{FIELD}) where FIELD
    :(hv.x.$FIELD)
end

function Base.zero(x::T) where T<:Partial
    zero(T)
end

function Base.zero(x::Type{<:Partial{FIELD,T}}) where {FIELD, T}
    Partial{FIELD}(Base.zero(T))
end
Base.show(io::IO, gv::Partial{FIELD}) where FIELD = print(io, "$(gv.x).$FIELD")
Base.show(io::IO, ::MIME"plain/text", gv::Partial) = Base.show(io, gv)

================================================
FILE: test/autobcast.jl
================================================
using NiLang
using Test

@testset "auto bcast" begin
    a = AutoBcast([1.0, 2.0, 3.0])
    @instr NEG(a)
    @test a.x == [-1.0,-2.0,-3.0]
    a = AutoBcast([1.0, 2.0, 3.0])
    @instr INC(a)
    @test a.x == [2.0,3.0,4.0]
    @instr DEC(a)
    @test a.x == [1.0,2.0,3.0]

    a = AutoBcast([false, true, true])
    @instr FLIP(a)
    @test a.x == [true, false, false]

    a = AutoBcast([1.0, 2.0, 3.0])
    b = AutoBcast([1.0, 2.0, 4.0])
    @instr a += b
    @test a.x == [2,4,7.0]
    @test b.x == [1,2,4.0]
    @instr SWAP(a, b)
    @test b.x == [2,4,7.0]
    @test a.x == [1,2,4.0]

    a = AutoBcast([1.0, 2.0, 3.0])
    b = 2.0
    @instr a += b
    @test a.x == [3,4,5.0]
    @test b == 2.0

    a = AutoBcast([1.0, 2.0, 3.0])
    b = AutoBcast([1.0, 2.0, 4.0])
    c = AutoBcast([1.0, 2.0, 1.0])
    @instr a += b * c
    @test a.x == [2,6,7.0]
    @test b.x == [1,2,4.0]
    @test c.x == [1,2,1.0]

    a = AutoBcast([1.0, 2.0, 3.0])
    b = 2.0
    c = AutoBcast([1.0, 2.0, 1.0])
    @instr a += b * c
    @test a.x == [3,6,5.0]
    @test b == 2.0
    @test c.x == [1,2,1.0]

    a = AutoBcast([1.0, 2.0, 3.0])
    b = AutoBcast([1.0, 2.0, 4.0])
    c = 3.0
    @instr a += b * c
    @test a.x == [4,8,15.0]
    @test b.x == [1,2,4.0]
    @test c == 3.0

    a = AutoBcast([1.0, 2.0, 3.0])
    b = 2.0
    c = 3.0
    @instr a += b * c
    @test a.x == [7,8,9.0]
    @test b == 2.0
    @test c == 3.0

    @test zero(AutoBcast{Int,3}) == AutoBcast([0, 0, 0])
end


================================================
FILE: test/autodiff/autodiff.jl
================================================
using Test, NiLang, NiLang.AD

include("vars.jl")
include("stack.jl")
include("gradfunc.jl")

include("instructs.jl")
include("ulog.jl")
include("complex.jl")
include("manual.jl")
include("jacobian.jl")
include("hessian_backback.jl")


================================================
FILE: test/autodiff/complex.jl
================================================
using Test, NiLang, NiLang.AD

@testset "complex GVar" begin
    a = 1.0+ 2im
    @test GVar(a) == Complex(GVar(1.0), GVar(2.0))
    @test GVar(a, a) == Complex(GVar(1.0, 1.0), GVar(2.0, 2.0))

    gx = GVar(1.0 + 1.0im)
    gx2 = chfield(gx, grad, 1.0+0.0im)
    @test gx2 == Complex(GVar(1.0, 1.0), GVar(1.0, 0.0))
end

@i function fr(f, loss, args...; il)
    f(args...)
    loss += (args |> tget(il)).re
end
@i function fi(f, loss, args...; il)
    f(args...)
    loss += (args |> tget(il)).im
end
function ccheck_grad(f, args; verbose=true, iloss=1)
    check_grad(fr, (f, 0.0, args...); verbose=verbose, iloss=2, il=1) &&
     check_grad(fi, (f, 0.0, args...); verbose=verbose, iloss=2, il=1)
end

@testset "check grad" begin
    x = 1.0 - 4.0im
    y = 2.0 - 2.3im
    z = 3.0 + 1.0im
    r = 4.0
    for opm in [PlusEq, MinusEq]
        @test check_inv(opm(complex), (1+2.0im, 2.0, 3.0); verbose=true)
        @test ccheck_grad(opm(complex), (1+2.0im, 2.0, 3.0); verbose=true, iloss=1)
        for (subop, args) in [
            (opm(identity), (x,y)), (opm(+), (x, y, z)),
            (opm(-), (x, y, z)), (opm(*), (x, y, z)),
            (opm(/), (x, y, z)), (opm(^), (x, y, r)),
            (opm(exp), (x, y)), (opm(log), (x, y)),
            (opm(inv), (x, y))
            ]
            @test ccheck_grad(subop, args; verbose=true, iloss=1)
            r1 = subop(args...)
            r2 = [(opm == (PlusEq) ? Base.:+ : Base.:-)(args[1], subop.f(args[2:end]...)), args[2:end]...]
            @test all(r1 .≈ r2)
        end

        for (subop, args) in [
            (opm(angle), (r, y)), (opm(abs), (r, y)), (opm(abs), (r, 0.0im)),
            (opm(abs2), (r, y))
            ]
            @show subop, args
            r1 = [subop(args...)...]
            r2 = [(opm == (PlusEq) ? Base.:+ : Base.:-)(args[1], subop.f(args[2:end]...)), args[2:end]...]
            @test r1 ≈ r2
            @test check_grad(subop, args; verbose=true, iloss=1)
        end
    end
    for op in [NEG]
        @test check_inv(op, (x,); verbose=true)
        @test ccheck_grad(op, (x,); verbose=true, iloss=1)
    end
end


================================================
FILE: test/autodiff/gradfunc.jl
================================================
using Test, NiLang, NiLang.AD

const add = PlusEq(identity)

@testset "NGrad" begin
    @test NGrad{3}(exp) isa NGrad{3,typeof(exp)}
end

@testset "instr" begin
    x, y = 3.0, 4.0
    @instr Grad(add)(x, y; iloss=1)
    @test grad(x) == 1.0
    @test grad(y) == 1.0
    @test check_inv(Grad(add), (3.0, 4.0); verbose=true, atol=1e-5, iloss=1)
    x, y = 3.0, 4.0
    @test check_grad(add, (x, y); iloss=1)

    x, y = 3.0, 4.0
    Grad(add)(x, NoGrad(y); iloss=1)
    @test grad(y) === 0.0

    @test check_inv(PlusEq(*), (0.4, 0.4, 0.5))
    @test MinusEq(*)(GVar(0.0, 1.0), GVar(0.4), GVar(0.6)) == (GVar(-0.24, 1.0), GVar(0.4, 0.6), GVar(0.6, 0.4))
    @test check_grad(PlusEq(*), (0.4, 0.4, 0.5); iloss=1)
    @test check_grad(MinusEq(*), (0.4, 0.4, 0.5); iloss=1)
end

@testset "i" begin
    @i function test1(a, b, out)
        a += b
        out += a * b
    end

    @i function tt(a, b)
        out ← 0.0
        test1(a, b, out)
        (~test1)(a, b, out)
        a += b
        out → 0.0
    end

    # compute (a+b)*b -> out
    x = 3.0
    y = 4.0
    out = 0.0
    @test check_grad(test1, (x, y, out); iloss=3)
    @test check_grad(tt, (x, y); iloss=1)
end


@testset "broadcast" begin
    # compute (a+b)*b -> out
    @i function test1(a, b)
        a .+= b
    end
    @i function test2(a, b, out, loss)
        a .+= b
        out .+= (a .* b)
        loss += out[1]
    end

    x = [3, 1.0]
    y = [4, 2.0]
    out = [0.0, 1.0]
    loss = 0.0
    # gradients
    @test check_grad(test2, (x, y, out, loss); iloss=4)
end

@testset "broadcast 2" begin
    # compute (a+b)*b -> out
    @i function test1(a, b)
        a += b
    end
    @i function test2(a, b, out)
        a += b
        out += (a * b)
    end

    # gradients
    a = 1.0
    b = 1.3
    c = 1.9
    @test check_grad(test2, (a,b,c); iloss=3)

    x = GVar([3, 1.0])
    y = GVar([4, 2.0])
    lout = GVar.([0.0, 1.0], [0.0, 2.0])
    @instr (~test2).(x, y, lout)
    @test grad.(lout) == [0,2.0]
    @test grad.(x) == [0, 4.0]
    @test grad.(y) == [0, 6.0]
end

@testset "function call function" begin
    # compute (a+b)*b -> out
    @i function test1(a, b)
        a += b
    end

    @i function test2(a, b, out)
        test1(a, out)
        (~test1)(a, out)
        out += (a * b)
    end

    a = 1.0
    b = 1.3
    c = 1.9
    @test check_grad(test2, (a,b,c); iloss=3)
end

@testset "neg sign" begin
    @i function test(out, x, y)
        out += x * (-y)
    end
    @test check_grad(test, (0.1, 2.0, -2.5); verbose=true, iloss=1)
end

@testset "i" begin
    @i function test1(a::T, b, out) where T<:Number
        add(a, b)
        out += a * b
    end
    @test isreversible(Grad(test1), Tuple{Number, Any,Any})
    @test isreversible(~Grad(test1), Tuple{Number, Any,Any})
    @test Grad(~test1) != ~(Grad(test1)) # this is not true
end

@testset "gradient" begin
    @test gradient((PlusEq(*)), (0.0, 2.0, 3.0); iloss=1) == (1.0, 3.0, 2.0)
end


================================================
FILE: test/autodiff/hessian_backback.jl
================================================
using NiLang, NiLang.AD, Test
using NiLang.AD: hessian_numeric

@testset "hessian" begin
    h1 = hessian_backback(PlusEq(*), (0.0, 2.0, 3.0); iloss=1)
    h2 = hessian_numeric(PlusEq(*), (0.0, 2.0, 3.0); iloss=1)
    @test h1 ≈ h2

    @i function test(a,b,c,d)
        a += b*c
        a += b^d
        c += b/d
        ROT(a, c, d)
        b += d ^ 2
        a += c * d
    end
    h1 = hessian_backback(test, (0.0, 2.0, 1.0, 3.0); iloss=1)
    h2 = hessian_numeric(test, (0.0, 2.0, 1.0, 3.0); iloss=1)
    @show h2
    @test isapprox(h1, h2, atol=1e-8)
end


================================================
FILE: test/autodiff/instructs.jl
================================================
using NiLang, NiLang.AD
using Test

@testset "check grad" begin
    for opm in [PlusEq, MinusEq]
        @test check_grad(opm(identity), (1.0, 2.0); verbose=true, iloss=1)
        @test check_grad(opm(*), (1.0, 2.0, 2.0); verbose=true, iloss=1)
        @test check_grad(opm(+), (1.0, 2.0, 2.0); verbose=true, iloss=1)
        @test check_grad(opm(-), (1.0, 2.0, 2.0); verbose=true, iloss=1)
        @test check_grad(opm(^), (1.0, 2.0, 2); verbose=true, iloss=1)
        @test check_grad(opm(^), (1.0, 2.0, 2.0); verbose=true, iloss=1)
        @test check_grad(opm(inv), (1.0, 2.0); verbose=true, iloss=1)
        @test check_grad(opm(sqrt), (1.0, 2.0); verbose=true, iloss=1)
        @test check_grad(opm(abs), (1.0, -2.0); verbose=true, iloss=1)
        @test check_grad(opm(abs2), (1.0, -2.0); verbose=true, iloss=1)
        @test check_grad(opm(exp), (1.0, 2.0); verbose=true, iloss=1)
        @test check_grad(opm(log), (1.0, 2.0); verbose=true, iloss=1)
        @test check_grad(opm(sin), (1.0, 2.0); verbose=true, iloss=1)
        @test check_grad(opm(sinh), (1.0, 2.0); verbose=true, iloss=1)
        @test check_grad(opm(asin), (1.0, 0.2); verbose=true, iloss=1)
        @test check_grad(opm(cos), (1.0, 2.0); verbose=true, iloss=1)
        @test check_grad(opm(cosh), (1.0, 2.0); verbose=true, iloss=1)
        @test check_grad(opm(acos), (1.0, 0.2); verbose=true, iloss=1)
        @test check_grad(opm(tan), (1.0, 2.0); verbose=true, iloss=1)
        @test check_grad(opm(tanh), (1.0, 2.0); verbose=true, iloss=1)
        @test check_grad(opm(atan), (1.0, -2.0); verbose=true, iloss=1)
        @test check_grad(opm(atan), (1.0, -2.0, 1.5); verbose=true, iloss=1)
        @test check_grad(opm(convert), (Fixed43(0.5), 2.0); verbose=true, iloss=1)
        @test check_grad(opm(/), (1.0, 2.0, 2.0); verbose=true, iloss=1)
        @test check_grad(opm(min), (1.0, 2.0, 3.0); verbose=true, iloss=1)
        @test check_grad(opm(max), (1.0, 2.0, 3.0); verbose=true, iloss=1)
        @test check_grad(opm(min), (1.0, 3.0, 2.0); verbose=true, iloss=1)
        @test check_grad(opm(max), (1.0, 3.0, 2.0); verbose=true, iloss=1)
        @test_broken check_grad(opm(÷), (1.0, 2.0, 2.0); verbose=true, iloss=1)
        @test gradient(opm(sqrt), (1.0, 0.0); iloss=1)[2] == 0
    end
    @test check_grad(NEG, (1.0,); verbose=true, iloss=1)
    @test check_grad(INV, (3.0,); verbose=true, iloss=1)
    @test check_grad(AddConst(2.0), (3.0,); verbose=true, iloss=1)
    @test check_grad(SubConst(2.0), (3.0,); verbose=true, iloss=1)
    @test check_grad(INC, (1.0,); verbose=true, iloss=1)
    @test check_grad(DEC, (1.0,); verbose=true, iloss=1)
    @test check_grad(ROT, (1.0, 2.0, 2.0); verbose=true, iloss=1)
    @test check_grad(ROT, (1.0, 2.0, 2.0); verbose=true, iloss=2)
    @test check_grad(IROT, (1.0, 2.0, 2.0); verbose=true, iloss=1)
    @test check_grad(IROT, (1.0, 2.0, 2.0); verbose=true, iloss=2)
    @test check_grad(HADAMARD, (3.0, 2.0); verbose=true, iloss=1)
    @test check_grad(HADAMARD, (3.0, 2.0); verbose=true, iloss=2)
end

@testset "partial gvar" begin
    @i function testf1(f, a, b)
	f(a, b, 2.0)
    end
    @i function testf2(f, a, b)
	f(a, 2.0, b)
    end
    for testf in [testf1, testf2]
    	for opm in [PlusEq, MinusEq]
            @test check_grad(testf, (opm(*), 1.0, 2.0); verbose=true, iloss=2)
            @test check_grad(testf, (opm(+), 1.0, 2.0); verbose=true, iloss=2)
            @test check_grad(testf, (opm(-), 1.0, 2.0); verbose=true, iloss=2)
            @test check_grad(testf, (opm(^), 1.0, 2.0); verbose=true, iloss=2)
            @test check_grad(testf, (opm(atan), 1.0, -2.0); verbose=true, iloss=2)
            @test check_grad(testf, (opm(/), 1.0, 2.0); verbose=true, iloss=2)
	end
    end
    @test check_grad(testf1, (ROT, 1.0, 2.0); verbose=true, iloss=2)
    @test check_grad(testf1, (ROT, 1.0, 2.0); verbose=true, iloss=3)
    @test check_grad(testf1, (IROT, 1.0, 2.0); verbose=true, iloss=2)
    @test check_grad(testf1, (IROT, 1.0, 2.0); verbose=true, iloss=3)
    # ROT and HADAMARD does not allow different types of rotation elements
end

@testset "sincos" begin
    @i function f(s, c, x)
        (s, c) += sincos(x)
    end
    @test check_grad(f, (1.0, 2.0, 2.0); verbose=true, iloss=1)
    @test check_grad(f, (1.0, 2.0, 2.0); verbose=true, iloss=2)
end

@testset "AD over pop" begin
    @i function mean(out!::T, x) where T
        anc ← zero(out!)
        for i=1:length(x)
            anc += x[i]
        end
        out! += anc / (@const length(x))
        FLOAT64_STACK[end+1] ↔ anc::T
    end

    @test check_grad(mean, (0.0, [1,2,3.0, 4.0]); iloss=1)
end

@testset "AD over pipe" begin
    @i function mean(out!, anc, x)
        for i=1:length(x)
            PlusEq(identity)(anc, x[i])
            SWAP(anc, x[i])
        end
        out! += anc / (@const length(x))
    end

    @test check_grad(mean, (0.0, 0.0, [1,2,3.0, 4.0]); iloss=1, verbose=true)
end

@testset "push, load data" begin
    stack = []
    val = [1,2,3]
    @instr PUSH!(stack, val)
    @test val == Int[]
    val = 3.0
    @instr PUSH!(stack, val)
    @test val == 0.0
    val = 3.0
    @instr PUSH!(stack, val)
    x = GVar(3.0)
    #@test_throws InvertibilityError @instr POP!(stack, x)
    z = 3.0
    @instr PUSH!(stack, z)
    z = GVar(0.0)
    @instr POP!(stack, z)
    @test z == GVar(3.0)
    x = [1.0, 2.0, 3.0]
    @instr PUSH!(stack, x)
    y = empty(x)
    @instr POP!(stack, y)
    @test y == GVar.([1,2,3.0])
    x = [1.0, 2.0, 3.0]
    @instr PUSH!(stack, x)
    y = empty(x)
    @instr POP!(stack, y)
    @test y == [1,2,3.0]
end

@testset "dataviews" begin
    @i function f(z, y, x)
        y += cos(x |> INV)
        z += tan(y |> AddConst(4.0))
        z += y * (x |> NEG |> SubConst(0.5) |> INV)
        z += sin(x |> INV)
    end
    @test check_grad(f, (0.2, 0.5, 0.8); iloss=1)
end

@testset "additive identity" begin
    struct TestAdd2{T}
        x::T
        y::Vector{T}
    end
    x = TestAdd2(GVar(1.0, 2.0), [GVar(2.0, 1.2)])
    y = TestAdd2(GVar(6.0, 3.0), [GVar(4.0, 4.1)])
    @test getfield.(MinusEq(identity)(x, y), :x) == getfield.((TestAdd2(GVar(-5.0, 2.0), [GVar(-2.0, 1.2)]), TestAdd2(GVar(6.0, 5.0), [GVar(4.0, 5.3)])), :x)
    x = TestAdd2(GVar(1.0, 2.0), [GVar(2.0, 1.2)])
    y = TestAdd2(GVar(6.0, 3.0), [GVar(4.0, 4.1)])
    @test getfield.(MinusEq(identity)(x, y), :y) == getfield.((TestAdd2(GVar(-5.0, 2.0), [GVar(-2.0, 1.2)]), TestAdd2(GVar(6.0, 5.0), [GVar(4.0, 5.3)])), :y)
end


================================================
FILE: test/autodiff/jacobian.jl
================================================
using NiLang, NiLang.AD
using Test
using NiLang.AD: wrap_bcastgrad

@i function asarrayfunc(params; f, kwargs...)
    if (length(params) == 1, ~)
        f(params[1]; kwargs...)
    elseif (length(params) == 2, ~)
        f(params[1], params[2]; kwargs...)
    elseif (length(params) == 3, ~)
        f(params[1], params[2], params[3]; kwargs...)
    end
end

@testset "bcastgrad" begin
    T = AutoBcast{Int, 4}
    @test wrap_bcastgrad(T, ones(10)) == [GVar(1.0, AutoBcast(ones(4))) for i=1:10]
    @test wrap_bcastgrad(T, 3) == 3
    @test wrap_bcastgrad(T, NoGrad(3.0)) == 3.0
    @test wrap_bcastgrad(T, 3.0) == GVar(3.0, AutoBcast(ones(4)))
    @test wrap_bcastgrad(T, (3.0,)) == (GVar(3.0, AutoBcast(ones(4))),)
    @test wrap_bcastgrad(T, exp) == exp
    @test wrap_bcastgrad(T, Inv(exp)) == Inv(exp)
end

@testset "jacobians" begin
    for op in [PlusEq(*), PlusEq(/), PlusEq(^), ROT]
        j1 = NiLang.AD.jacobian(asarrayfunc, [0.3, 0.4, 2.0]; iin=1, f=op)
        j2 = NiLang.AD.jacobian_repeat(asarrayfunc, [0.3, 0.4, 2.0]; iin=1, f=op)
        @test j1 ≈ j2
    end

    for op in [PlusEq(identity), PlusEq(abs), SWAP, PlusEq(exp), PlusEq(log), PlusEq(sin), PlusEq(cos)]
        j1 = NiLang.AD.jacobian(asarrayfunc, [0.3, 0.4]; iin=1, f=op)
        j2 = NiLang.AD.jacobian_repeat(asarrayfunc, [0.3, 0.4]; iin=1, f=op)
        @test j1 ≈ j2
    end

    for op in [-, NEG]
        j1 = NiLang.AD.jacobian(asarrayfunc, [0.3]; iin=1, f=op)
        j2 = NiLang.AD.jacobian_repeat(asarrayfunc, [0.3]; iin=1, f=op)
        @test j1 ≈ j2
    end
end

@testset "nograd" begin
    @test AddConst(3.0)(NoGrad(2.0)) == NoGrad(5.0)
    @test SWAP(NoGrad(2.0), NoGrad(3.0)) == (NoGrad(3.0), NoGrad(2.0))
    @test PlusEq(*)(NoGrad(2.0), NoGrad(3.0), NoGrad(4.0)) == (NoGrad(14.0), NoGrad(3.0), NoGrad(4.0))
end


================================================
FILE: test/autodiff/manual.jl
================================================
using NiLang, Test
using NiLang.AD

test_func(x) = exp(x)
NiLang.AD.primitive_grad(::typeof(test_func), x) = exp(x)

test_g(x, y; k=0) = x^k * y
function NiLang.AD.primitive_grad(::typeof(test_g), x, y; k=0)
    return k*x^(k-1)*y, x^k
end

@testset "primitive grad" begin
    @test check_grad(PlusEq(test_func), (1.0, 1.0), iloss=1)
    @test check_grad(PlusEq(test_g), (1.0, 3.0, 2.0), k=2, iloss=1)
end


================================================
FILE: test/autodiff/stack.jl
================================================
using NiLang, Test, NiLang.AD

@testset "loaddata" begin
    @test NiLang.loaddata(GVar(0.1), 0.3) == GVar(0.3)
    @test NiLang.loaddata(Complex(GVar(0.1, 0.2), GVar(0.2)), 0.3+0.6im) == Complex(GVar(0.3), GVar(0.6))
    @test NiLang.loaddata(typeof(Complex(GVar(0.1, 0.2), GVar(0.2))), 0.3+0.6im) == Complex(GVar(0.3), GVar(0.6))
    @test NiLang.loaddata(GVar(0.2, AutoBcast{Float64,3}(zeros(3))), 0.3) == GVar(0.3, AutoBcast{Float64,3}(zeros(3)))
    @test NiLang.loaddata((GVar(0.2, AutoBcast{Float64,3}(zeros(3))), 7), (0.3, 4)) == (GVar(0.3, AutoBcast{Float64,3}(zeros(3))), 4)
    @test NiLang.loaddata(typeof((GVar(0.2, AutoBcast{Float64,3}(zeros(3))), 7)), (0.3, 4)) == (GVar(0.3, AutoBcast{Float64,3}(zeros(3))), 4)
    @test NiLang.loaddata(4, 2.0) == 2
end

@testset "push load" begin
    x = (0.3, 3.0, [1,2,3.0])
    @instr PUSH!(x)
    t = (0.0, 0.0, Float64[])
    @test x == t && typeof(x) == typeof(t)
    y = (0.0, GVar(0.0), GVar{Float64,Float64}[])
    @instr POP!(y)
    t = (0.3, GVar(3.0), GVar([1,2, 3.0]))
    @test y == t && typeof(y) == typeof(t)

    x = [0.3, 3.0, [1,2,3.0]]
    @instr PUSH!(x)
    t = []
    @test x == t && typeof(x) == typeof(t)
    y = []
    @instr POP!(y)
    t = [0.3, GVar(3.0), GVar([1,2, 3.0])]
    @test y == t && typeof(y) == typeof(t)

    x = (0.3, 3.0, [1,2,3.0])
    @instr @invcheckoff PUSH!(x)
    t = (0.0, 0.0, Float64[])
    @test x == t && typeof(x) == typeof(t)
    y = (0.0, GVar(0.0), GVar(zeros(0)))
    @instr @invcheckoff POP!(y)
    t = (0.3, GVar(3.0), GVar([1,2, 3.0]))
    @test y == t && typeof(y) == typeof(t)

    x = (0.3, 3.0, [1,2,3.0])
    @instr @invcheckoff COPYPUSH!(x)
    t = (0.3, 3.0, [1,2,3.0])
    @test x == t && typeof(x) == typeof(t)
    y = (0.3, GVar(t[2]), GVar(t[3]))
    @instr @invcheckoff COPYPOP!(y)
    t = (0.3, GVar(3.0), GVar([1,2, 3.0]))
    @test y == t && typeof(y) == typeof(t)

    x = (0.3, 3.0, [1,2,3.0])
    @instr COPYPUSH!(x)
    t = (0.3, 3.0, [1,2,3.0])
    @test x == t && typeof(x) == typeof(t)
    y = (0.3, GVar(t[2]), GVar(t[3]))
    @instr COPYPOP!(y)
    t = (0.3, GVar(3.0), GVar([1,2, 3.0]))
    @test y == t && typeof(y) == typeof(t)

    x = [0.3, 3.0, [1,2,3.0]]
    @instr COPYPUSH!(x)
    t = [0.3, 3.0, [1,2,3.0]]
    @test x == t && typeof(x) == typeof(t)
    y = [0.3, GVar(t[2]), GVar(t[3])]
    @instr COPYPOP!(y)
    t = [0.3, GVar(3.0), GVar([1,2, 3.0])]
    @test y == t && typeof(y) == typeof(t)
end

================================================
FILE: test/autodiff/ulog.jl
================================================
using NiLang, NiLang.AD
using Test, Random
using FixedPointNumbers
using NiLangCore: default_constructor
using FiniteDifferences

@testset "ULogarithmic" begin
	@test check_grad(PlusEq(gaussian_log), (1.0, 2.0); iloss=1)
	function muleq(f, x::T, y::T, z::T) where T
        x = default_constructor(ULogarithmic{T}, x)
        y = default_constructor(ULogarithmic{T}, y)
        z = default_constructor(ULogarithmic{T}, z)
		x *= f(y, z)
		x.log
	end
	g1, = FiniteDifferences.grad(central_fdm(5,1), arr->muleq(+, arr...), [7.0, 5.0, 3.0])
    x, y, z = default_constructor(ULogarithmic{Float64}, 7.0),
    default_constructor(ULogarithmic{Float64}, 5.0),
    default_constructor(ULogarithmic{Float64}, 3.0)
	@instr (MulEq(+))(x, y, z)
	@instr GVar(x)
	@instr GVar(y)
	@instr GVar(z)
	@instr x.log.g += 1
	@instr (~MulEq(+))(x, y, z)
	@test grad(x.log) ≈ g1[1]
	@test grad(y.log) ≈ g1[2]
	@test grad(z.log) ≈ g1[3]

	g2, = FiniteDifferences.grad(central_fdm(5,1), arr->muleq(-, arr...), [7.0, 5.0, 3.0])
    x, y, z = default_constructor(ULogarithmic{Float64}, 2.0),
    default_constructor(ULogarithmic{Float64}, 5.0),
    default_constructor(ULogarithmic{Float64}, 3.0)
	@instr (MulEq(-))(x, y, z)
	@instr GVar(x)
	@instr GVar(y)
	@instr GVar(z)
	@instr x.log.g += 1
	@instr (~MulEq(-))(x, y, z)
	@test grad(x.log) ≈ g2[1]
	@test grad(y.log) ≈ g2[2]
	@test grad(z.log) ≈ g2[3]
end

@testset "iexp" begin
	@i function i_exp(y!::T, x::T) where T<:Union{Fixed, GVar{<:Fixed}}
	    @invcheckoff begin
	        @routine begin
	            s ← one(ULogarithmic{T})
	            lx ← one(ULogarithmic{T})
	            k ← 0
	        end
	        lx *= convert(x)
	        y! += convert(s)
	        @from k==0 while s.log > -20
	            k += 1
	            s *= lx / k
	            y! += convert(s)
	        end
	        ~(@from k==0 while s.log > -20
	            k += 1
	            s *= x / k
	        end)
	        lx /= convert(x)
	        ~@routine
	    end
	end

	x = Fixed43(3.5)
	res = i_exp(Fixed43(0.0), x)[1]
	gx = grad(Grad(i_exp)(Val(1), Fixed43(0.0), x)[3])
	@test res ≈ exp(3.5)
	@test gx ≈ exp(3.5)
end


================================================
FILE: test/autodiff/vars.jl
================================================
using NiLang, NiLang.AD
using Test

@testset "gvar" begin
    g1 = GVar(0.0)
    @test (~GVar)(g1) === 0.0
    @assign (g1 |> grad) 0.5
    @test g1 === GVar(0.0, 0.5)
    @test_throws InvertibilityError (~GVar)(g1)
    @test !almost_same(GVar(0.0), GVar(0.0, 1.0))
    @test zero(GVar(3.0, 2.0)) == GVar(0.0)
    @test one(GVar(3.0, 2.0)) == GVar(1.0)
    @test iszero(GVar(0.0, 2.0))
    @test zero(GVar(2, AutoBcast([1, 0, 0]))) == GVar(0, AutoBcast([0, 0, 0]))
    @test GVar(true) == true
    @test grad("x") == ""
    @test grad((1.0, GVar(1.0, 2.0))) == (0.0,2.0)
    @test grad(grad) == 0
    @test grad((1.0, 2.0)) == (0.0,0.0)
    @test grad([1.0, 2.0]) == [0.0,0.0]
    @test grad([GVar(1.0, 3.0), GVar(2.0, 1.0)]) == [3.0,1.0]
    @test grad(Complex(GVar(1.0, 3.0), GVar(2.0, 1.0))) == Complex(3.0,1.0)
    @test grad(Complex(1.0, 2.0)) == Complex(0.0,0.0)
end


@testset "assign" begin
    arg = (1,2,GVar(3.0))
    @assign (arg.:3).g 4.0
    @test arg[3].g == 4.0
    gv = GVar(1.0, GVar(0.0))
    @test gv.g.g === 0.0
    @assign gv.g.g 7.0
    @test gv.g.g === 7.0
    gv = GVar(1.0, GVar(0.0))
    @assign gv |> grad |> grad 0.0
    @test gv.g.g === 0.0
    args = (GVar(0.0, 1.0),)
    @assign (args.:1 |> grad) 0.0
    @test args[1].g == 0.0
    arr = [1.0]
    arr0 = arr
    @assign arr[] 0.0
    @test arr[] == 0.0
    @test arr === arr0
end

@testset "assign tuple" begin
    x = 0.3
    @instr for i=1:length(x) GVar(x) end
    @test x === GVar(0.3)
end


@testset "assign bcast func" begin
    # vector bcast
    x = [GVar(0.1, 0.1), GVar(0.2, 0.2)]
    res = [1.0, 2.0]
    @assign (x .|> value) res
    @test x == [GVar(1.0, 0.1), GVar(2.0, 0.2)]

    # tuple bcast
    x = (GVar(0.1, 0.1), GVar(0.2, 0.2))
    res = (1.0, 2.0)
    @assign (x .|> value) res
    @test x == (GVar(1.0, 0.1), GVar(2.0, 0.2))
end

@testset "GVar over general type" begin
    struct ABC{T1, T2}
       a::T1
       b::T1
       c::T2
    end
    x = ABC(1, 2, 3.0)
    @test GVar(x) == ABC(1, 2, GVar(3.0))
    @test GVar(x, x) == ABC(GVar(1, 1), GVar(2, 2), GVar(3.0, 3.0))
    @test (~GVar)(ABC(1, 2, GVar(3.0))) == x
    @test grad(ABC(1, 2, GVar(3.0, 2.0))) == ABC(0, 0, 2.0)
    @test GVar(1.0 + 2.0im , 2.0im + 4.0im) == Complex(GVar(1.0, 2.0), GVar(2.0, 4.0))
    @test GVar((1.0, 2.0im) , (2.0im, 4.0im)) == (GVar(1.0, 2.0), Complex(GVar(0.0), GVar(2.0, 4.0)))

    # without type parameter
    struct EFG
        x
    end
    @test GVar(EFG) == EFG
    @test grad(EFG(GVar(2.0, 3.0))) == EFG(3.0)
end

@testset "dict" begin
    @i function f()
        d ← Dict(1=>GVar(1.0, 2.0))
        d → Dict(1=>GVar(1.0))
    end
    @test f() == ()
end

@testset "NoGrad" begin
    a = NoGrad(0.5)
    @test a isa NoGrad
    @test zero(a) == NoGrad(0.0)
    @test (~NoGrad)(a) === 0.5
    @test -NoGrad(0.5) == NoGrad(-0.5)

    a2 = NoGrad{Float64}(a)
    @test a2 === a
    println(a2)
    @test chfield(a2, NoGrad, NoGrad(0.4)) === 0.4

    @test unwrap(NoGrad(a)) == 0.5
    @test NoGrad(a) < 0.6
    @test NoGrad(a) <= 0.6
    @test NoGrad(a) >= 0.4
    @test a ≈ 0.5
    @test a == 0.5
    @test a > 0.4
    @test isless(a, 0.6)
end


================================================
FILE: test/complex.jl
================================================
using Test, NiLang

@testset "complex" begin
    a = 1.0+ 2im
    @instr (a |> real) += 2
    @instr (a |> imag) += 2
    @test a == 3.0 + 4im

    a = 1.0+ 2im
    @instr a += complex(2.0, 2.0)
    @test a == 3.0 + 4.0im
    @i function f(loss, a::Complex{T}, b) where T
        @routine begin
            c ← zero(a)
            sq ← zero(T)
            c += a * b
            sq += (c |> real) ^ 2
            sq += (c |> imag) ^ 2
        end
        loss += sq ^ 0.5
        ~@routine
    end
    a = 1.0 + 2.0im
    b = 2.0 + 1.0im
    loss = 0.0
    @instr f(loss, a, b)
    @test loss ≈ abs(a*b)
end

@testset "complex arithmetics" begin
    for op in [exp, log, identity]
        x, y = 2.0+1.0im, 0.5+0.2im
        @instr x += op(y)
        @test x ≈ 2.0+1.0im + op(0.5+0.2im)
    end
    for op in [SWAP, HADAMARD]
        x, y = 2.0+1.0im, 0.5+0.2im
        @instr op(x, y)
        @test x ≈ op(2.0+1.0im, 0.5+0.2im)[1]
        @test y ≈ op(2.0+1.0im, 0.5+0.2im)[2]
    end
    for op in [NEG, INC, DEC]
        x = 2.0+1.0im
        @instr op(x)
        @test x ≈ op(2.0+1.0im)
    end
    for op in [^, /, +, -]
        x, y, z = 2.0+1.0im, 0.5+0.2im, 0.8-2.0im
        @instr PlusEq(op)(x, y, z)
        @test x ≈ 2.0+1.0im + op(0.5+0.2im, 0.8-2.0im)
    end
end


================================================
FILE: test/instructs.jl
================================================
using NiLang
using Test

@testset "identity" begin
    x, y = 0.2, 0.5
    @instr x += identity(y)
    @test x == 0.7 && y==0.5
end

@testset "*, /" begin
    x, y, out = 2.0, 2.0, 1.0
    @instr out += x * y
    @test x == 2.0 && y == 2.0 && out == 5.0
    x, y, out = 2.0, 2.0, 1.0
    @instr out += x / y
    @test x == 2.0 && y == 2.0 && out == 2.0

    out = Fixed43(0.0)
    x = 1
    @instr out += x/2
    @test out === Fixed43(0.5)
    @instr out -= x/2
    @test out === Fixed43(0.0)
end

@testset "SWAP" begin
    x, y = 1, 2
    @instr SWAP(x, y)
    @test x == 2 && y == 1
end

@testset "NEG" begin
    x = 0.3
    @instr NEG(x)
    @test x == -0.3
    @test check_inv(NEG, (x,))
end

@testset "INV" begin
    x = 0.2
    @instr INV(x)
    @test x == 5.0
    @test check_inv(INV, (x,))
end

@testset "AddConst" begin
    x = 0.3
    @instr AddConst(4.0)(x)
    @test x == 4.3
    @test check_inv(AddConst(4.0), (x,))

    x = 0.3
    @instr SubConst(4.0)(x)
    @test x == -3.7
    @test check_inv(SubConst(4.0), (x,))
end

@testset "FLIP" begin
    x = false
    @instr FLIP(x)
    @test x == true
    @test check_inv(FLIP, (x,))
end

@testset "ROT" begin
    x, y, θ = 0.0, 1.0, π
    @test check_inv(ROT, (x, y, θ); verbose=true)
    @test check_inv(IROT, (x, y, θ); verbose=true)
end

@testset "INC, DEC" begin
    x = Int32(2)
    @instr INC(x)
    @test x === Int32(3)
    @instr DEC(x)
    @test x === Int32(2)
end

@testset "HADAMARD" begin
    x = 0.5
    y = 0.8
    @test check_inv(HADAMARD, (x, y))
end

@testset "dataviews" begin
    @i function f(z, y, x)
        y += cos(x |> INV)
        z += tan(y |> AddConst(4.0))
        z += y * (x |> NEG |> SubConst(0.5) |> INV)
        z += sin(x |> INV)
    end
    @test check_inv(f, (0.2, 0.5, 0.8))
end

@testset "fixed point arithmetics" begin
    for op in [exp, log, sin, sinh, asin, cos, cosh, acos, tan, tanh, atan]
        x, y = Fixed43(2.0), Fixed43(0.5)
        @instr x += op(y)
        @test x ≈ 2.0 + op(0.5)
    end
    for op in [SWAP, HADAMARD]
        x, y = Fixed43(2.0), Fixed43(0.5)
        @instr op(x, y)
        @test x ≈ op(2.0, 0.5)[1]
        @test y ≈ op(2.0, 0.5)[2]
    end
    for op in [NEG, INC, DEC]
        x = Fixed43(2.0)
        @instr op(x)
        @test x ≈ op(2.0)
    end
    for op in [^, /]
        x, y, z = Fixed43(2.0), Fixed43(0.5), Fixed43(0.8)
        @instr PlusEq(op)(x, y, z)
        @test x ≈ 2.0 + op(0.5, 0.8)
    end
end

@testset "additive identity" begin
    struct TestAdd{T}
        x::T
        y::Vector{T}
    end
    @test getfield.(PlusEq(identity)(TestAdd(1, [2]), TestAdd(10, [2])), :x) == (TestAdd(11, [4]).x, TestAdd(10, [2]).x)
    @test getfield.(PlusEq(identity)(TestAdd(1, [2]), TestAdd(10, [2])), :y) == (TestAdd(11, [4]).y, TestAdd(10, [2]).y)
end

================================================
FILE: test/macros.jl
================================================
using Test, NiLang
using NiLang: auto_alloc, auto_expand

NiLang.alloc(::typeof(NiLang.i_sum), x::AbstractArray{T}) where T = zero(T)

@testset begin
    @test auto_alloc(:(y = exp(x))) == Expr(:block, :(y ← $alloc(exp, x)), :(y += exp(x)))
    ex1 = :(PlusEq(sin)(z, sin(x + 2y)))
    ex2 = auto_expand(ex1)
    @test length(ex2.args) == 13
    @i function test(x, y, z)
        #@auto_expand z += sin(x + 2y)
        @invcheckoff @auto_expand z += sin(x + 2y)
    end
    x, y, z = 1.0, 2.0, 3.0
    @test test(x, y, z)[3] == z + sin(x + 2y)
    @test check_inv(test, (x, y, z))
    @i function test(x, y, z, a)
        @auto_expand PlusEq(sin)(Complex{}(x, y), Complex{}(z, sin(a)))
    end
    x, y, z, a = 1.0, 2.0, 3.0, 4.0
    @test Complex(test(x, y, z, a)[1:2]...) == 1+im*y + sin(z+im*sin(a))
    @test check_inv(test, (x, y, z, a))

    @i function test2(y, x)
        @routine begin
            @auto_alloc i_sum(z, x)
        end
        y += z
        ~@routine
    end
    @test test2(1.0, [2,3.0])[1] == 6.0
    @test check_inv(test2, (1.0, [2,3.0]))
end

================================================
FILE: test/runtests.jl
================================================
using NiLang
using Test

@testset "vars.jl" begin
    include("vars.jl")
end

@testset "utils.jl" begin
    include("utils.jl")
end

@testset "wrappers.jl" begin
    include("wrappers.jl")
end

@testset "instructs.jl" begin
    include("instructs.jl")
end

@testset "complex.jl" begin
    include("complex.jl")
end

@testset "ulog.jl" begin
    include("ulog.jl")
end

@testset "macros.jl" begin
    include("macros.jl")
end

@testset "autobcast.jl" begin
    include("autobcast.jl")
end

@testset "autodiff" begin
    include("autodiff/autodiff.jl")
end

@testset "stdlib" begin
    include("stdlib/stdlib.jl")
end


================================================
FILE: test/stdlib/base.jl
================================================
using NiLang, NiLang.AD
using Test

@testset "sqdistance" begin
    @test i_sqdistance(0.0, [1.0, 0.0], [0.0, 1.0])[1] == 2.0
end


================================================
FILE: test/stdlib/bennett.jl
================================================
using Test
using NiLang, NiLang.AD

@testset "integrate" begin
    FT = Float64
    n = 100
    h = FT(π/n)
    dt = FT(0.01)
    α = FT(4e-2)
    @i function step!(dest::AbstractArray{T}, src::AbstractArray{T}; α, h, dt) where T
        n ← length(dest)
        @invcheckoff for i=1:n
            @routine begin
                @zeros T cum g h2 αcum
                cum += src[mod1(i+1, n)] + src[mod1(i-1, n)]
                cum -= 2*src[i]
                αcum += cum * α
                h2 += h^2
                g += αcum/h2
            end
            dest[i] += src[i]
            dest[i] += dt*g
            ~@routine
        end
        n → length(dest)
    end
    x = zeros(FT, n)
    x[n÷2] = 1
    #state = Dict{Int,Vector{FT}}()
    k = 4
    N = 100
    x_last = NiLang.direct_emulate(step!, FT.(x); N=N, α=α, h=h, dt=dt)
    log1 = NiLang.BennettLog()
    log2 = NiLang.BennettLog()
    log3 = NiLang.BennettLog()
    _, x_last_b, _ = bennett(step!, zero(FT.(x)), FT.(x); k=k, N=N, α=α, h=h, dt=dt, logger=log1)
    _, x_last_b2 = bennett!(step!, Dict(1=>FT.(x)); k=k, N=N, α=α, h=h, dt=dt, logger=log2)
    _, x_last_b3 = bennett!([step! for _=1:100], Dict(1=>FT.(x)); k=k, N=N, α=α, h=h, dt=dt, logger=log3)
    @test sum(x_last_b) ≈ 1
    @test x_last ≈ x_last_b
    @test x_last ≈ x_last_b2[N+1]
    @test x_last ≈ x_last_b3[N+1]
    @test length(log1.fcalls) > length(log2.fcalls)
    @test length(log1.fcalls) < 2*length(log2.fcalls)
    @test length(log3.fcalls) == length(log2.fcalls)

    @i function loss(out, step, y, x; kwargs...)
        bennett((@skip! step), y, x; kwargs...)
        out += y[n÷2]
    end
    @i function loss2(out, step, d; N, kwargs...)
        bennett!((@skip! step), d; N, kwargs...)
        out += d[N+1][n÷2]
    end
    _, _, _, gx = NiLang.AD.gradient(loss, (0.0, step!, zero(x), copy(x)); iloss=1, k=k, N=N, α=α, h=h, dt=dt)
    _, _, gx2 = NiLang.AD.gradient(loss2, (0.0, step!, Dict(1=>copy(x))); iloss=1, k=k, N=N, α=α, h=h, dt=dt)
    x_last_2 = NiLang.direct_emulate(step!, (x2=copy(x); x2[n÷2]+=1e-5; FT.(x2)); N=N, α=α, h=h, dt=dt)
    @test gx[n÷2] ≈ (x_last_2 - x_last)[n÷2]/1e-5
    @test gx2[1][n÷2] ≈ (x_last_2 - x_last)[n÷2]/1e-5
end

================================================
FILE: test/stdlib/blas.jl
================================================
using Test
using LinearAlgebra
using NiLang, NiLang.AD

@testset "i_norm2, dot" begin
    out = 0.0im
    vec = [1.0im, 2.0, 3.0]
    vec2 = [1.0, 2.0im, 5.0]
    @instr i_norm2(out, vec)
    @test out ≈ norm(vec)^2
    @test check_inv(i_norm2, (out, vec))

    out = 0.0im
    vec = [1.0im, 2.0, 3.0]
    vec2 = [1.0, 2.0im, 5.0]
    @instr i_dot(out, vec, vec2)
    @test out ≈ dot(vec, vec2)
    @test check_inv(i_dot, (out, vec, vec2))

    out = 0.0
    vec = [1.0, 2.0, 3.0]
    vec2 = [1.0, 2.0, 5.0]
    @test check_grad(i_norm2, (out, vec); verbose=true, iloss=1)

    out = 0.0
    @instr i_dot(out, vec, vec2)
    @test out ≈ dot(vec, vec2)
    @test check_inv(i_dot, (out, vec, vec2))

    @test check_grad(i_dot, (0.0, vec, vec2); verbose=true, iloss=1)

    m = randn(4,4)
    n = randn(4,4)
    out = 0.0
    @instr i_dot(out, m[:,2], n[:,4])
    @test out ≈ dot(m[:,2], n[:,4])
    @test check_inv(i_dot, (out, m[:,2], n[:,4]))

    @test check_grad(i_dot, (0.0, vec, vec2); verbose=true, iloss=1)
end

function naive_umm!(x, params)
    N = size(x, 1)
    k = 0
    for j=1:N
        for i=N-1:-1:j
            k += 1
            a, b = rot(x[i],x[i+1],params[k])
            x[i], x[i+1] = a, b
        end
    end
end

function inv_naive_umm!(x, params)
    N = size(x, 1)
    k = N*(N-1) ÷ 2
    for j=N:-1:1
        for i=j:N-1
            a, b = rot(x[i],x[i+1],-params[k])
            x[i], x[i+1] = a, b
            k -= 1
        end
    end
end
@testset "naive unitary" begin
    x = randn(200)
    params = randn(100*199).*2π

    x0 = copy(x)
    params0 = copy(params)
    naive_umm!(x, params)
    inv_naive_umm!(x, params)
    @test params ≈ params0
    @test x ≈ x0
end


@testset "unitary" begin
    x = randn(20)
    params = randn(10*19) * 2π

    x0 = copy(x)
    params0 = copy(params)
    Nx = length(x)
    @instr i_umm!(x, params)
    x1 = copy(x0)
    params1 = copy(params0)
    naive_umm!(x1, params1)
    @test params ≈ params1
    @test x ≈ x1
    @instr (~i_umm!)(x, params)
    @test params ≈ params0
    @test x ≈ x0
    @test check_inv(i_umm!, (x, params))
end


================================================
FILE: test/stdlib/linalg.jl
================================================
using Test
using NiLang, NiLang.AD
using LinearAlgebra
using Random

@testset "inv" begin
    Random.seed!(2)

    id = [1 0 0; 0 1 0; 0 0 1.0]
    @test check_inv(i_inv!, (randn(3, 3), randn(3, 3)))
    @test check_inv(PlusEq(det), (0.3, randn(3, 3)))
    @test check_inv(PlusEq(logdet), (0.3, rand(3, 3) .+ id))
    @test check_grad(PlusEq(det), (0.3, randn(3, 3)), iloss=1)
    @test check_grad(PlusEq(logdet), (0.3, rand(3, 3) .+ id), iloss=1)

    @i function loss(out!, y, A)
        i_inv!(y, A)
        out! += y[1,1]
    end
    @test check_grad(loss, (0.0, randn(3, 3), randn(3, 3)); iloss=1)
end

@testset "affine" begin
    Random.seed!(2)
    A = randn(5, 5)
    b = randn(5)
    x = randn(5)
    y! = zeros(5)
    @test i_affine!(y!, A, b, x)[1] ≈ A*x + b
end


================================================
FILE: test/stdlib/mapreduce.jl
================================================
using NiLang, Test

@testset "filter and mapfoldl" begin
    @i function f(z, y, x)
        i_filter!((@skip! x -> x < 0), y, x)
        i_mapfoldl((@skip! exp), (@skip! PlusEq(identity)), z, y)
    end
    @test f(0.0, Float64[], [-1, -0.5, 3])[1] == exp(-0.5) + exp(-1)
end


================================================
FILE: test/stdlib/nnlib.jl
================================================
using Test, Random
using NiLang, NiLang.AD

function _sce(x::AbstractArray{T,N}, p) where {T,N}
    x = x .- maximum(x; dims=N)  # avoid data overflow
    rho = exp.(x)
    Z = sum(rho; dims=N)
    return dropdims(sum((log.(Z) .- x) .* p; dims=N), dims=N)
end


@testset "softmax_crossentropy" begin
    Random.seed!(2)
    x = randn(10)
    x0 = copy(x)
    p = randn(10); p=p./maximum(p)
    res = _sce(x, p)
    imax = 0
    Z = 0.0
    out = 0.0
    xmax = 0.0
    x_ = x
    p_ = p
    @instr i_softmax_crossentropy(x_, p_, imax, xmax, Z, out)
    @show Z
    @test isapprox(imax, argmax(x0), atol=1e-8)
    @test isapprox(out, res[], atol=1e-8)
    @instr (~i_softmax_crossentropy)(x_, p_, imax, xmax, Z, out)
    args = x_, p_, imax, xmax, Z, out
    @test check_inv(i_softmax_crossentropy, args)
    args = x_, p_, imax, xmax, Z, out
    @test check_grad(i_softmax_crossentropy, args; iloss=6, verbose=true)
end

@testset "logsumexp" begin
	function logsumexp2(x)
	  	mx = maximum(x)
	  	log.(sum(exp.(x .- mx))) .+ mx
	end

	x = randn(100)
	@test i_ascending!(Float64[], Int[], x)[1][end] == maximum(x)
	@test i_logsumexp(0.0, 0.0, Float64[], Int[], x)[1] ≈ logsumexp2(x)
	@test check_inv(i_logsumexp, (0.0, 0.0, Float64[], Int[], x))
end


================================================
FILE: test/stdlib/sparse.jl
================================================
using NiLang
using SparseArrays
using Test, Random

@testset "dot" begin
    Random.seed!(2)
    sp1 = sprand(10, 10,0.3)
    sp2 = sprand(10, 10,0.3)
    @test SparseArrays.dot(sp1, sp2) ≈ i_dot(0.0, sp1, sp2)[1]
end

@testset "mul!" begin
    Random.seed!(2)
    sp1 = sprand(10, 10,0.3)
    v = randn(10)
    out = zero(v)
    @test SparseArrays.mul!(copy(out), sp1, v, 0.5, 1) ≈ i_mul!(copy(out), sp1, v, 0.5, 1)[1]
end


================================================
FILE: test/stdlib/statistics.jl
================================================
import Statistics
using Test, Random
using NiLang, NiLang.AD
using Distributions

@testset "statistics" begin
    x = randn(100)
    y = randn(100)
    @test i_mean_sum(0.0, 0.0, x)[1] ≈ Statistics.mean(x)
    info = VarianceInfo(Float64)
    @test almost_same(i_var_mean_sum(info, copy(x))[1], VarianceInfo(Statistics.var(x), Statistics.var(x)*99, Statistics.mean(x), sum(x)))
    @test almost_same((~i_var_mean_sum)(i_var_mean_sum(info, copy(x))...), (info, x))
    @test almost_same(i_cor_cov(0.0, 0.0, copy(x), copy(y)), (Statistics.cor(x,y), Statistics.cov(x,y), x, y))
    @test almost_same((~i_cor_cov)(i_cor_cov(0.0, 0.0, copy(x), copy(y))...), (0.0, 0.0, x, y))
end

@testset "normal log pdf" begin
    out = 0.0
    x = 1.0
    μ = 0.3
    σ = 1.5
    l1 = i_normal_logpdf(out, x, μ, σ)
    distri = Normal(μ, σ)
    l2 = logpdf(distri, x)
    @test l1[1] ≈ l2
    @test check_inv(i_normal_logpdf, (out, x, μ, σ))
end


================================================
FILE: test/stdlib/stdlib.jl
================================================
include("base.jl")
include("blas.jl")
include("linalg.jl")
include("statistics.jl")
include("nnlib.jl")
include("sparse.jl")
include("mapreduce.jl")
include("bennett.jl")


================================================
FILE: test/ulog.jl
================================================
using NiLang, NiLang.AD
using Test, Random
using NiLangCore: default_constructor

@testset "basic instructions, ULogarithmic" begin
    x, y = default_constructor(ULogarithmic{Int}, 1),
    default_constructor(ULogarithmic{Int}, 2)
	@instr x *= y
    @test x == default_constructor(ULogarithmic{Int}, 3)
    @test y == default_constructor(ULogarithmic{Int}, 2)

	@test PlusEq(gaussian_log)(1.0, 2.0) == (1.0+log(1+exp(2.0)), 2.0)

    x, y, z = default_constructor(ULogarithmic{Float64}, 7.0),
    default_constructor(ULogarithmic{Float64}, 2.0),
    default_constructor(ULogarithmic{Float64}, 3.0)
	@instr x *= y + z
	@test check_inv(MulEq(+), (x, y, z))
	@test x.log ≈ log(exp(7.0) * (exp(2.0) + exp(3.0)))
    x, y, z = default_constructor(ULogarithmic{Float64}, 7.0),
    default_constructor(ULogarithmic{Float64}, 5.0),
    default_constructor(ULogarithmic{Float64}, 3.0)
	@instr x *= y - z
	@test x.log ≈ log(exp(7.0) * (exp(5.0) - exp(3.0)))

    x, y, z = default_constructor(ULogarithmic{Float64}, 7.0),
    default_constructor(ULogarithmic{Float64}, 5.0),
    default_constructor(ULogarithmic{Float64}, 3.0)
	@instr x *= y^3.4
	@test x.log ≈ log(exp(5.0)^3.4 * exp(7.0))

    x, y, z = default_constructor(ULogarithmic{Float64}, 7.0),
    default_constructor(ULogarithmic{Float64}, 5.0),
    default_constructor(ULogarithmic{Float64}, 3.0)
	@instr x *= 3
	@test x.log ≈ log(exp(7.0) * 3)
end


@testset "error on += and -=" begin
    @i function f(x::ULogarithmic)
        x += ULogarithmic(3.0)
    end
    
    @test_throws MethodError f(ULogarithmic(2.0))
    @test_throws MethodError (~f)(ULogarithmic(2.0))
end


================================================
FILE: test/utils.jl
================================================
using NiLang
using Test

@testset "vec dataview" begin
    @i function f(x::AbstractVector, y::AbstractMatrix)
        x .+= (y |> vec)
        vec(y)[5] += x[4]
    end
    x = zeros(25)
    y = ones(5,5)
    z = ones(5,5)
    z[5] = 2.0
    @instr f(x, y)
    @test y == z
end


================================================
FILE: test/vars.jl
================================================
using Test, NiLang, NiLangCore

@testset "@zeros" begin
    @test (@macroexpand @zeros Float64 a b c) == :(begin
        a ← zero(Float64)
        b ← zero(Float64)
        c ← zero(Float64)
    end) |> NiLangCore.rmlines

    @test (@macroexpand @ones Float64 a b c) == :(begin
        a ← one(Float64)
        b ← one(Float64)
        c ← one(Float64)
    end) |> NiLangCore.rmlines
end


================================================
FILE: test/wrappers.jl
================================================
using NiLang, Test

@testset "partial" begin
    x = Partial{:im}(3+2im)
    println(x)
    @test x === Partial{:im,Complex{Int64},Int64}(3+2im)
    @test value(x) == 2
    @test chfield(x, value, 4) == Partial{:im}(3+4im)
    @test zero(x) == Partial{:im}(0.0+0.0im)
    @test (~Partial{:im})(x) == 3+2im
end

@testset "value" begin
    x = 1.0
    @test value(x) === 1.0
    @assign (x |> value) 0.2
    @test x == 0.2
end

struct NiTypeTest{T} <: IWrapper{T}
    x::T
    g::T
end
NiTypeTest(x) = NiTypeTest(x, zero(x))
@fieldview NiLang.value(invtype::NiTypeTest) = invtype.x
@fieldview gg(invtype::NiTypeTest) = invtype.g

@testset "inv type" begin
    it = NiTypeTest(0.5)
    @test eps(typeof(it)) === eps(Float64)
    @test value(it) == 0.5
    @test it ≈ NiTypeTest(0.5)
    @test it > 0.4
    @test it < NiTypeTest(0.6)
    @test it < 7
    @test 0.4 < it
    @test 7 > it
    @test chfield(it, value, 0.3) == NiTypeTest(0.3)
    it = chfield(it, Val(:g), 0.2)
    @test almost_same(NiTypeTest(0.5+1e-15), NiTypeTest(0.5))
    @test !almost_same(NiTypeTest(1.0), NiTypeTest(1))
    it = NiTypeTest(0.5)
    @test chfield(it, gg, 0.3) == NiTypeTest(0.5, 0.3)
end