Full Code of erikgrinaker/toydb for AI

main 473afbdb4aea cached

284 files

1.4 MB

475.6k tokens

962 symbols

1 requests

Download .txt

Showing preview only (1,523K chars total). Download the full file or copy to clipboard to get everything.

Repository: erikgrinaker/toydb
Branch: main
Commit: 473afbdb4aea
Files: 284
Total size: 1.4 MB

Directory structure:
gitextract_nc06cv1f/

├── .github/
│   └── workflows/
│       └── ci.yml
├── .gitignore
├── Cargo.toml
├── LICENSE
├── README.md
├── cluster/
│   ├── run.sh
│   ├── toydb1/
│   │   └── toydb.yaml
│   ├── toydb2/
│   │   └── toydb.yaml
│   ├── toydb3/
│   │   └── toydb.yaml
│   ├── toydb4/
│   │   └── toydb.yaml
│   └── toydb5/
│       └── toydb.yaml
├── config/
│   └── toydb.yaml
├── docs/
│   ├── architecture/
│   │   ├── README.md
│   │   ├── client.md
│   │   ├── encoding.md
│   │   ├── index.md
│   │   ├── mvcc.md
│   │   ├── overview.md
│   │   ├── raft.md
│   │   ├── server.md
│   │   ├── sql-data.md
│   │   ├── sql-execution.md
│   │   ├── sql-optimizer.md
│   │   ├── sql-parser.md
│   │   ├── sql-planner.md
│   │   ├── sql-raft.md
│   │   ├── sql-storage.md
│   │   ├── sql.md
│   │   └── storage.md
│   ├── architecture.md
│   ├── crate/
│   │   ├── Cargo.toml
│   │   ├── README.md
│   │   └── src/
│   │       └── lib.rs
│   ├── examples.md
│   ├── references.md
│   ├── sql.md
│   └── tools/
│       └── update-links.py
├── rust-toolchain
├── rustfmt.toml
├── src/
│   ├── bin/
│   │   ├── toydb.rs
│   │   ├── toydump.rs
│   │   ├── toysql.rs
│   │   └── workload.rs
│   ├── client.rs
│   ├── encoding/
│   │   ├── bincode.rs
│   │   ├── format.rs
│   │   ├── keycode.rs
│   │   └── mod.rs
│   ├── error.rs
│   ├── lib.rs
│   ├── raft/
│   │   ├── log.rs
│   │   ├── message.rs
│   │   ├── mod.rs
│   │   ├── node.rs
│   │   ├── state.rs
│   │   └── testscripts/
│   │       ├── log/
│   │       │   ├── append
│   │       │   ├── commit
│   │       │   ├── get
│   │       │   ├── has
│   │       │   ├── init
│   │       │   ├── scan
│   │       │   ├── scan_apply
│   │       │   ├── splice
│   │       │   ├── status
│   │       │   └── term
│   │       └── node/
│   │           ├── append
│   │           ├── append_base_missing
│   │           ├── append_base_missing_all
│   │           ├── append_commit_quorum
│   │           ├── append_initial
│   │           ├── append_max_entries
│   │           ├── append_pipeline
│   │           ├── append_probe_divergent_first
│   │           ├── append_probe_divergent_long
│   │           ├── append_probe_divergent_short
│   │           ├── append_probe_divergent_single
│   │           ├── append_response_beyond_last_index_panics
│   │           ├── append_response_stale_reject
│   │           ├── election
│   │           ├── election_candidate_behind_leader
│   │           ├── election_candidate_behind_quorum
│   │           ├── election_contested
│   │           ├── election_tie
│   │           ├── election_tie_even
│   │           ├── heartbeat_commits_follower
│   │           ├── heartbeat_converts_candidate
│   │           ├── heartbeat_converts_follower
│   │           ├── heartbeat_converts_follower_leaderless
│   │           ├── heartbeat_converts_leader
│   │           ├── heartbeat_lost_append_duplicate
│   │           ├── heartbeat_lost_append_multiple
│   │           ├── heartbeat_lost_append_single
│   │           ├── heartbeat_lost_read
│   │           ├── heartbeat_match_commits
│   │           ├── heartbeat_multiple_leaders_panic
│   │           ├── heartbeat_old_commit_index
│   │           ├── heartbeat_old_last_index
│   │           ├── heartbeat_probe_divergent
│   │           ├── old_campaign_rejected
│   │           ├── old_campaign_response_ignored
│   │           ├── old_heartbeat_ignored
│   │           ├── request_candidate_abort
│   │           ├── request_follower
│   │           ├── request_follower_campaign_abort
│   │           ├── request_follower_disconnect_stall
│   │           ├── request_follower_leaderless_abort
│   │           ├── request_leader
│   │           ├── request_leader_campaign_abort
│   │           ├── request_leader_change_linearizability
│   │           ├── request_leader_disconnect
│   │           ├── request_leader_read_quorum
│   │           ├── request_leader_read_quorum_sequence
│   │           ├── request_leader_single
│   │           ├── request_status
│   │           ├── request_status_single
│   │           ├── restart
│   │           ├── restart_apply
│   │           ├── restart_commit_recover
│   │           ├── restart_term_vote
│   │           ├── tick_candidate
│   │           ├── tick_follower
│   │           ├── tick_follower_leaderless
│   │           └── tick_leader
│   ├── server.rs
│   ├── sql/
│   │   ├── engine/
│   │   │   ├── engine.rs
│   │   │   ├── local.rs
│   │   │   ├── mod.rs
│   │   │   └── raft.rs
│   │   ├── execution/
│   │   │   ├── aggregator.rs
│   │   │   ├── executor.rs
│   │   │   ├── join.rs
│   │   │   ├── mod.rs
│   │   │   └── session.rs
│   │   ├── mod.rs
│   │   ├── parser/
│   │   │   ├── ast.rs
│   │   │   ├── lexer.rs
│   │   │   ├── mod.rs
│   │   │   └── parser.rs
│   │   ├── planner/
│   │   │   ├── mod.rs
│   │   │   ├── optimizer.rs
│   │   │   ├── plan.rs
│   │   │   └── planner.rs
│   │   ├── testscripts/
│   │   │   ├── expressions/
│   │   │   │   ├── cnf
│   │   │   │   ├── func
│   │   │   │   ├── func_sqrt
│   │   │   │   ├── literals
│   │   │   │   ├── op_compare_equal
│   │   │   │   ├── op_compare_greater
│   │   │   │   ├── op_compare_greater_equal
│   │   │   │   ├── op_compare_is_nan
│   │   │   │   ├── op_compare_is_null
│   │   │   │   ├── op_compare_lesser
│   │   │   │   ├── op_compare_lesser_equal
│   │   │   │   ├── op_compare_not_equal
│   │   │   │   ├── op_logic_and
│   │   │   │   ├── op_logic_not
│   │   │   │   ├── op_logic_or
│   │   │   │   ├── op_math_add
│   │   │   │   ├── op_math_divide
│   │   │   │   ├── op_math_exponentiate
│   │   │   │   ├── op_math_factorial
│   │   │   │   ├── op_math_identity
│   │   │   │   ├── op_math_multiply
│   │   │   │   ├── op_math_negate
│   │   │   │   ├── op_math_remainder
│   │   │   │   ├── op_math_subtract
│   │   │   │   ├── op_precedence
│   │   │   │   └── op_string_like
│   │   │   ├── optimizers/
│   │   │   │   ├── constant_folder
│   │   │   │   ├── filter_pushdown
│   │   │   │   ├── hash_join
│   │   │   │   ├── index_lookup
│   │   │   │   └── short_circuit
│   │   │   ├── queries/
│   │   │   │   ├── aggregate
│   │   │   │   ├── clauses
│   │   │   │   ├── group_by
│   │   │   │   ├── having
│   │   │   │   ├── join_cross
│   │   │   │   ├── join_inner
│   │   │   │   ├── join_outer
│   │   │   │   ├── limit
│   │   │   │   ├── offset
│   │   │   │   ├── order
│   │   │   │   ├── select
│   │   │   │   ├── where_
│   │   │   │   ├── where_index
│   │   │   │   └── where_primary_key
│   │   │   ├── schema/
│   │   │   │   ├── create_table
│   │   │   │   ├── create_table_datatypes
│   │   │   │   ├── create_table_default
│   │   │   │   ├── create_table_index
│   │   │   │   ├── create_table_names
│   │   │   │   ├── create_table_null
│   │   │   │   ├── create_table_primary_key
│   │   │   │   ├── create_table_reference
│   │   │   │   ├── create_table_transaction
│   │   │   │   ├── create_table_unique
│   │   │   │   ├── drop_table
│   │   │   │   ├── drop_table_index
│   │   │   │   ├── drop_table_ref
│   │   │   │   └── drop_table_transaction
│   │   │   ├── transactions/
│   │   │   │   ├── anomaly_dirty_read
│   │   │   │   ├── anomaly_dirty_write
│   │   │   │   ├── anomaly_fuzzy_read
│   │   │   │   ├── anomaly_lost_update
│   │   │   │   ├── anomaly_phantom_read
│   │   │   │   ├── anomaly_read_skew
│   │   │   │   ├── anomaly_write_skew
│   │   │   │   ├── begin
│   │   │   │   ├── commit
│   │   │   │   ├── isolation
│   │   │   │   ├── rollback
│   │   │   │   └── schema
│   │   │   └── writes/
│   │   │       ├── delete
│   │   │       ├── delete_index
│   │   │       ├── delete_reference
│   │   │       ├── delete_where
│   │   │       ├── insert
│   │   │       ├── insert_datatypes
│   │   │       ├── insert_default
│   │   │       ├── insert_index
│   │   │       ├── insert_null
│   │   │       ├── insert_primary_key
│   │   │       ├── insert_reference
│   │   │       ├── insert_unique
│   │   │       ├── update
│   │   │       ├── update_datatypes
│   │   │       ├── update_default
│   │   │       ├── update_expression
│   │   │       ├── update_index
│   │   │       ├── update_null
│   │   │       ├── update_primary_key
│   │   │       ├── update_reference
│   │   │       ├── update_unique
│   │   │       └── update_where
│   │   └── types/
│   │       ├── expression.rs
│   │       ├── mod.rs
│   │       ├── schema.rs
│   │       └── value.rs
│   └── storage/
│       ├── bitcask.rs
│       ├── engine.rs
│       ├── memory.rs
│       ├── mod.rs
│       ├── mvcc.rs
│       └── testscripts/
│           ├── bitcask/
│           │   ├── compact
│           │   ├── compact_open
│           │   ├── log
│           │   └── status
│           ├── engine/
│           │   ├── keys
│           │   ├── point
│           │   ├── scan
│           │   └── scan_prefix
│           ├── memory/
│           │   └── status
│           └── mvcc/
│               ├── anomaly_dirty_read
│               ├── anomaly_dirty_write
│               ├── anomaly_fuzzy_read
│               ├── anomaly_lost_update
│               ├── anomaly_phantom_read
│               ├── anomaly_read_skew
│               ├── anomaly_write_skew
│               ├── bank
│               ├── begin
│               ├── begin_as_of
│               ├── begin_readonly
│               ├── delete
│               ├── delete_conflict
│               ├── get
│               ├── get_isolation
│               ├── resume
│               ├── rollback
│               ├── scan
│               ├── scan_isolation
│               ├── scan_key_version_encoding
│               ├── scan_prefix
│               ├── set
│               ├── set_conflict
│               └── unversioned
└── tests/
    ├── scripts/
    │   ├── anomalies
    │   ├── client
    │   ├── errors
    │   ├── isolation
    │   └── queries
    ├── testcluster.rs
    └── tests.rs

================================================
FILE CONTENTS
================================================

================================================
FILE: .github/workflows/ci.yml
================================================
name: CI
on: [push, pull_request, workflow_dispatch]
permissions:
  contents: read

jobs:
  test:
    name: Test
    runs-on: ubuntu-latest
    timeout-minutes: 10

    steps:
    - uses: actions/checkout@v3
    - uses: dtolnay/rust-toolchain@1.93.1
      id: toolchain
      with:
        components: clippy, rustfmt
    - uses: actions/cache@v3
      with:
        path: target
        key: ${{runner.os}}-target-${{steps.toolchain.outputs.cachekey}}-${{hashFiles('Cargo.lock')}}
    - run: cargo build --bins --tests
    - run: cargo test
    - run: cargo clippy --tests --no-deps -- -D warnings
    - run: cargo fmt --check
    - run: cargo doc --no-deps 
      env:
        RUSTDOCFLAGS: -D warnings

================================================
FILE: .gitignore
================================================
/cluster/toydb*/data
/data
/docs/crate/target
/target
.DS_Store
.vscode/
**/*.rs.bk

================================================
FILE: Cargo.toml
================================================
[package]
name = "toydb"
version = "1.0.0"
description = "A simple distributed SQL database, built for education"
authors = ["Erik Grinaker <erik@grinaker.org>"]
license = "Apache-2.0"
homepage = "https://github.com/erikgrinaker/toydb"
repository = "https://github.com/erikgrinaker/toydb"
edition = "2024"
default-run = "toydb"
publish = false

[lib]
doctest = false

[dependencies]
bincode = { version = "2.0", features = ["serde"] }
clap = { version = "4.5", features = ["cargo", "derive"] }
config = "0.15"
crossbeam = { version = "0.8", features = ["crossbeam-channel"] }
dyn-clone = "1.0"
fs4 = "0.13"
hdrhistogram = "7.5"
itertools = "0.14"
log = "0.4"
petname = "2.0.2"
rand = "0.10"
regex = "1.12"
rustyline = "17.0"
rustyline-derive = "0.11"
serde = { version = "1.0", features = ["derive"] }
serde_bytes = "0.11"
simplelog = "0.12"
uuid = { version = "1.21", features = ["serde", "v4"] }

[dev-dependencies]
escargot = "0.5"
goldenscript = "0.7"
hex = "0.4"
paste = "1.0"
serde_json = "1.0"
tempfile = "3.25"
test-case = "3.3"
test_each_file = "0.3"

================================================
FILE: LICENSE
================================================
                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.

================================================
FILE: README.md
================================================
# <a><img src="./docs/architecture/images/toydb.svg" height="40" valign="top" /></a> toyDB

Distributed SQL database in Rust, built from scratch as an educational project. Main features:

* [Raft distributed consensus][raft] for linearizable state machine replication.

* [ACID transactions][txn] with MVCC-based snapshot isolation.

* [Pluggable storage engine][storage] with [BitCask][bitcask] and [in-memory][memory] backends.

* [Iterator-based query engine][query] with [heuristic optimization][optimizer] and time-travel 
  support.

* [SQL interface][sql] including joins, aggregates, and transactions.

toyDB is intended to be simple and understandable, and also functional and correct. Other aspects
like performance, scalability, and availability are non-goals -- these are major sources of
complexity in production-grade databases, and obscure the basic underlying concepts. Shortcuts have
been taken where possible.

I originally wrote toyDB in 2020 to learn more about database internals. Since then, I've spent
several years building real distributed SQL databases at
[CockroachDB](https://github.com/cockroachdb/cockroach) and
[Neon](https://github.com/neondatabase/neon). Based on this experience, I've rewritten toyDB as a
simple illustration of the architecture and concepts behind distributed SQL databases.

[raft]: https://github.com/erikgrinaker/toydb/blob/main/src/raft/mod.rs
[txn]: https://github.com/erikgrinaker/toydb/blob/main/src/storage/mvcc.rs
[storage]: https://github.com/erikgrinaker/toydb/blob/main/src/storage/engine.rs
[bitcask]: https://github.com/erikgrinaker/toydb/blob/main/src/storage/bitcask.rs
[memory]: https://github.com/erikgrinaker/toydb/blob/main/src/storage/memory.rs
[query]: https://github.com/erikgrinaker/toydb/blob/main/src/sql/execution/executor.rs
[optimizer]: https://github.com/erikgrinaker/toydb/blob/main/src/sql/planner/optimizer.rs
[sql]: https://github.com/erikgrinaker/toydb/blob/main/src/sql/parser/parser.rs

## Documentation

* [Architecture guide](docs/architecture/index.md): a guided tour of toyDB's code and architecture.

* [SQL examples](docs/examples.md): walkthrough of toyDB's SQL features.

* [SQL reference](docs/sql.md): reference documentation for toyDB's SQL dialect.

* [References](docs/references.md): research materials used while building toyDB.

## Usage

With a [Rust compiler](https://www.rust-lang.org/tools/install) installed, a local five-node 
cluster can be built and started as:

```
$ ./cluster/run.sh
Starting 5 nodes on ports 9601-9605 with data under cluster/*/data/.
To connect to node 1, run: cargo run --release --bin toysql

toydb4 21:03:55 [INFO] Listening on [::1]:9604 (SQL) and [::1]:9704 (Raft)
toydb1 21:03:55 [INFO] Listening on [::1]:9601 (SQL) and [::1]:9701 (Raft)
toydb2 21:03:55 [INFO] Listening on [::1]:9602 (SQL) and [::1]:9702 (Raft)
toydb3 21:03:55 [INFO] Listening on [::1]:9603 (SQL) and [::1]:9703 (Raft)
toydb5 21:03:55 [INFO] Listening on [::1]:9605 (SQL) and [::1]:9705 (Raft)
toydb2 21:03:56 [INFO] Starting new election for term 1
[...]
toydb2 21:03:56 [INFO] Won election for term 1, becoming leader
```

A command-line client can be built and used with node 1 on `localhost:9601`:

```
$ cargo run --release --bin toysql
Connected to toyDB node n1. Enter !help for instructions.
toydb> CREATE TABLE movies (id INTEGER PRIMARY KEY, title VARCHAR NOT NULL);
toydb> INSERT INTO movies VALUES (1, 'Sicario'), (2, 'Stalker'), (3, 'Her');
toydb> SELECT * FROM movies;
1, 'Sicario'
2, 'Stalker'
3, 'Her'
```

toyDB supports most common SQL features, including joins, aggregates, and transactions. Below is an
`EXPLAIN` query plan of a more complex query (fetches all movies from studios that have released any
movie with an IMDb rating of 8 or more):

```
toydb> EXPLAIN SELECT m.title, g.name AS genre, s.name AS studio, m.rating
  FROM movies m JOIN genres g ON m.genre_id = g.id,
    studios s JOIN movies good ON good.studio_id = s.id AND good.rating >= 8
  WHERE m.studio_id = s.id
  GROUP BY m.title, g.name, s.name, m.rating, m.released
  ORDER BY m.rating DESC, m.released ASC, m.title ASC;

Remap: m.title, genre, studio, m.rating (dropped: m.released)
└─ Order: m.rating desc, m.released asc, m.title asc
   └─ Projection: m.title, g.name as genre, s.name as studio, m.rating, m.released
      └─ Aggregate: m.title, g.name, s.name, m.rating, m.released
         └─ HashJoin: inner on m.studio_id = s.id
            ├─ HashJoin: inner on m.genre_id = g.id
            │  ├─ Scan: movies as m
            │  └─ Scan: genres as g
            └─ HashJoin: inner on s.id = good.studio_id
               ├─ Scan: studios as s
               └─ Scan: movies as good (good.rating > 8 OR good.rating = 8)
```

## Architecture

toyDB's architecture is fairly typical for a distributed SQL database: a transactional
key/value store managed by a Raft cluster with a SQL query engine on top. See the
[architecture guide](./docs/architecture/index.md) for more details.

[![toyDB architecture](./docs/architecture/images/architecture.svg)](./docs/architecture/index.md)

## Tests

toyDB mainly uses [Goldenscripts](https://github.com/erikgrinaker/goldenscript) for tests. These 
script various scenarios, capture events and output, and later assert that the behavior remains the 
same. See e.g.:

* [Raft cluster tests](https://github.com/erikgrinaker/toydb/tree/main/src/raft/testscripts/node)
* [MVCC transaction tests](https://github.com/erikgrinaker/toydb/tree/main/src/storage/testscripts/mvcc)
* [SQL execution tests](https://github.com/erikgrinaker/toydb/tree/main/src/sql/testscripts)
* [End-to-end tests](https://github.com/erikgrinaker/toydb/tree/main/tests/scripts)

Run tests with `cargo test`, or have a look at the latest 
[CI run](https://github.com/erikgrinaker/toydb/actions/workflows/ci.yml).

## Benchmarks

toyDB is not optimized for performance, but comes with a `workload` benchmark tool that can run 
various workloads against a toyDB cluster. For example:

```sh
# Start a 5-node toyDB cluster.
$ ./cluster/run.sh
[...]

# Run a read-only benchmark via all 5 nodes.
$ cargo run --release --bin workload read
Preparing initial dataset... done (0.179s)
Spawning 16 workers... done (0.006s)
Running workload read (rows=1000 size=64 batch=1)...

Time   Progress     Txns      Rate       p50       p90       p99      pMax
1.0s      13.1%    13085   13020/s     1.3ms     1.5ms     1.9ms     8.4ms
2.0s      27.2%    27183   13524/s     1.3ms     1.5ms     1.8ms     8.4ms
3.0s      41.3%    41301   13702/s     1.2ms     1.5ms     1.8ms     8.4ms
4.0s      55.3%    55340   13769/s     1.2ms     1.5ms     1.8ms     8.4ms
5.0s      70.0%    70015   13936/s     1.2ms     1.5ms     1.8ms     8.4ms
6.0s      84.7%    84663   14047/s     1.2ms     1.4ms     1.8ms     8.4ms
7.0s      99.6%    99571   14166/s     1.2ms     1.4ms     1.7ms     8.4ms
7.1s     100.0%   100000   14163/s     1.2ms     1.4ms     1.7ms     8.4ms

Verifying dataset... done (0.002s)
```

The available workloads are:

* `read`: single-row primary key lookups.
* `write`: single-row inserts to sequential primary keys.
* `bank`: bank transfers between various customers and accounts. To make things interesting, this
  includes joins, secondary indexes, sorting, and conflicts.

For more information about workloads and parameters, run `cargo run --bin workload -- --help`.

Example workload results are listed below. Write performance is atrocious, due to
[fsync](https://en.wikipedia.org/wiki/Sync_(Unix)) and a lack of write batching in the Raft layer.
Disabling fsync, or using the in-memory engine, significantly improves write performance (at the
expense of durability).

| Workload | BitCask     | BitCask w/o fsync | Memory      |
|----------|-------------|-------------------|-------------|
| `read`   | 14163 txn/s | 13941 txn/s       | 13949 txn/s |
| `write`  | 35 txn/s    | 4719 txn/s        | 7781 txn/s  |
| `bank`   | 21 txn/s    | 1120 txn/s        | 1346 txn/s  |

## Debugging

[VSCode](https://code.visualstudio.com) and the [CodeLLDB](https://marketplace.visualstudio.com/items?itemName=vadimcn.vscode-lldb)
extension can be used to debug toyDB, with the debug configuration under `.vscode/launch.json`.

Under the "Run and Debug" tab, select e.g. "Debug executable 'toydb'" or "Debug unit tests in
library 'toydb'".

## Credits

The toyDB logo is courtesy of [@jonasmerlin](https://github.com/jonasmerlin).

================================================
FILE: cluster/run.sh
================================================
#!/usr/bin/env bash
#
# This script builds and runs a 5-node toyDB cluster listening on ports
# 9601-9605. Config and data is stored under the toydb* directories.
# To connect a toysql client to node 1 on port 9601, run:
#
# cargo run --release --bin toysql

set -euo pipefail

# Change into the script directory.
cd "$(dirname $0)"

# Build toyDB using release optimizations.
cargo build --release --bin toydb

# Start nodes 1-5 in the background, prefixing their output with the node ID.
echo "Starting 5 nodes on ports 9601-9605 with data under cluster/*/data/."
echo "To connect to node 1, run: cargo run --release --bin toysql"
echo ""

for ID in 1 2 3 4 5; do
    (cargo run -q --release -- -c toydb$ID/toydb.yaml 2>&1 | sed -e "s/\\(.*\\)/toydb$ID \\1/g") &
done

# Wait for the background processes to exit. Kill all toyDB processes when the
# script exits (e.g. via Ctrl-C).
trap 'kill -TERM -- -$$ 2>/dev/null' INT TERM EXIT
wait

================================================
FILE: cluster/toydb1/toydb.yaml
================================================
id: 1
data_dir: toydb1/data
listen_sql: localhost:9601
listen_raft: localhost:9701
peers:
  '2': localhost:9702
  '3': localhost:9703
  '4': localhost:9704
  '5': localhost:9705

================================================
FILE: cluster/toydb2/toydb.yaml
================================================
id: 2
data_dir: toydb2/data
listen_sql: localhost:9602
listen_raft: localhost:9702
peers:
  '1': localhost:9701
  '3': localhost:9703
  '4': localhost:9704
  '5': localhost:9705

================================================
FILE: cluster/toydb3/toydb.yaml
================================================
id: 3
data_dir: toydb3/data
listen_sql: localhost:9603
listen_raft: localhost:9703
peers:
  '1': localhost:9701
  '2': localhost:9702
  '4': localhost:9704
  '5': localhost:9705

================================================
FILE: cluster/toydb4/toydb.yaml
================================================
id: 4
data_dir: toydb4/data
listen_sql: localhost:9604
listen_raft: localhost:9704
peers:
  '1': localhost:9701
  '2': localhost:9702
  '3': localhost:9703
  '5': localhost:9705

================================================
FILE: cluster/toydb5/toydb.yaml
================================================
id: 5
data_dir: toydb5/data
listen_sql: localhost:9605
listen_raft: localhost:9705
peers:
  '1': localhost:9701
  '2': localhost:9702
  '3': localhost:9703
  '4': localhost:9704

================================================
FILE: config/toydb.yaml
================================================
# The node ID (must be unique in the cluster), and map of peer IDs and Raft
# addresses (empty for single node).
id: 1
peers: {}

# Addresses to listen for SQL and Raft connections on.
listen_sql: localhost:9601
listen_raft: localhost:9701

# The log level. Valid values are DEBUG, INFO, WARN, and ERROR.
log_level: INFO

# Node data directory. The Raft log is stored in the file "raft", and the SQL
# database in "sql".
data_dir: data

# Storage engine to use for the Raft log and SQL database.
#
# * bitcask (default): an append-only log-structured store.
# * memory: an in-memory store using the Rust standard library's BTreeMap.
storage_raft: bitcask
storage_sql: bitcask

# Whether to fsync writes to disk. Disabling this yields much better write
# performance, but may lose data on host crashes and violate Raft guarantees. It
# only affects Raft log writes (the SQL state machine is never fsynced since it
# can be reconstructed from the Raft log).
fsync: true

# The minimum garbage fraction and bytes to trigger Bitcask log compaction on
# node startup.
compact_threshold: 0.2
compact_min_bytes: 1000000

================================================
FILE: docs/architecture/README.md
================================================
See [`index.md`](index.md).

================================================
FILE: docs/architecture/client.md
================================================
# Client

The toyDB client is in the [`client`](https://github.com/erikgrinaker/toydb/blob/213e5c02b09f1a3cac6a8bbd0a81773462f367f5/src/client.rs)
module. It uses the same Bincode-based protocol that we saw in the server section, sending
`toydb::Request` and receiving `toydb::Response`.

## Client Library

The main client library `toydb::Client` is used to communicate with a toyDB server:

https://github.com/erikgrinaker/toydb/blob/213e5c02b09f1a3cac6a8bbd0a81773462f367f5/src/client.rs#L15-L24

When initialized, it connects to a toyDB server over TCP, which establishes a SQL session for it:

https://github.com/erikgrinaker/toydb/blob/213e5c02b09f1a3cac6a8bbd0a81773462f367f5/src/client.rs#L27-L33

It can then send Bincode-encoded `toydb::Request` to the server, and receive `toydb::Response`
back.

https://github.com/erikgrinaker/toydb/blob/213e5c02b09f1a3cac6a8bbd0a81773462f367f5/src/client.rs#L35-L40

In particular, `Client::execute` can be used to execute arbitrary SQL statements in the client's
current session:

https://github.com/erikgrinaker/toydb/blob/213e5c02b09f1a3cac6a8bbd0a81773462f367f5/src/client.rs#L42-L56

## `toysql` Binary

However, `toydb::Client` is a programmatic API, and we want a more convenient user interface.
The `toysql` client in [`src/bin/toysql.rs`](https://github.com/erikgrinaker/toydb/blob/0839215770e31f1e693d5cccf20a68210deaaa3f/src/bin/toysql.rs)
provides a typical [REPL](https://en.wikipedia.org/wiki/Read–eval–print_loop) (read-evaluate-print loop) where users can enter SQL statements and view the results.

Like `toydb`, `toysql` is a tiny [`clap`](https://docs.rs/clap/latest/clap/) command that takes a
toyDB server address to connect to and starts an interactive shell:

https://github.com/erikgrinaker/toydb/blob/0839215770e31f1e693d5cccf20a68210deaaa3f/src/bin/toysql.rs#L29-L53

It first attempts to connect to the toyDB server using the `toydb::Client` client, and then starts
an interactive shell using the [Rustyline](https://docs.rs/rustyline/latest/rustyline/) library.

https://github.com/erikgrinaker/toydb/blob/0839215770e31f1e693d5cccf20a68210deaaa3f/src/bin/toysql.rs#L55-L81

The shell is simply a loop that prompts the user to input a SQL statement:

https://github.com/erikgrinaker/toydb/blob/0839215770e31f1e693d5cccf20a68210deaaa3f/src/bin/toysql.rs#L216-L250

Each statement is the executed against the server via `toydb::Client::execute`, and the response
is formatted and printed as output:

https://github.com/erikgrinaker/toydb/blob/0839215770e31f1e693d5cccf20a68210deaaa3f/src/bin/toysql.rs#L83-L92

https://github.com/erikgrinaker/toydb/blob/0839215770e31f1e693d5cccf20a68210deaaa3f/src/bin/toysql.rs#L175-L204

And with that, we have a fully functional SQL database system and can run queries to our heart's
content. Have fun!

---

<p align="center">
← <a href="server.md">Server</a>
</p>

================================================
FILE: docs/architecture/encoding.md
================================================
# Key/Value Encoding

The key/value store uses binary `Vec<u8>` keys and values, so we need an encoding scheme to 
translate between in-memory Rust data structures and the on-disk binary data. This is provided by
the [`encoding`](https://github.com/erikgrinaker/toydb/tree/213e5c02b09f1a3cac6a8bbd0a81773462f367f5/src/encoding)
module, with separate schemes for key and value encoding.

## `Bincode` Value Encoding

Values are encoded using [Bincode](https://github.com/bincode-org/bincode), a third-party binary
encoding scheme for Rust. Bincode is convenient because it can easily encode any arbitrary Rust
data type. But we could also have chosen e.g. [JSON](https://en.wikipedia.org/wiki/JSON),
[Protobuf](https://protobuf.dev), [MessagePack](https://msgpack.org/), or any other encoding.

We won't dwell on the actual binary format here, see the [Bincode specification](https://git.sr.ht/~stygianentity/bincode/tree/trunk/item/docs/spec.md)
for details.

To use a consistent configuration for all encoding and decoding, we provide helper functions in
the [`encoding::bincode`](https://github.com/erikgrinaker/toydb/blob/213e5c02b09f1a3cac6a8bbd0a81773462f367f5/src/encoding/bincode.rs)
module which use `bincode::config::standard()`.

https://github.com/erikgrinaker/toydb/blob/0ce1fb34349fda043cb9905135f103bceb4395b4/src/encoding/bincode.rs#L15-L27

Bincode uses the very common [Serde](https://serde.rs) framework for its API. toyDB also provides an
`encoding::Value` helper trait for value types which adds automatic `encode()` and `decode()`
methods:

https://github.com/erikgrinaker/toydb/blob/b57ae6502e93ea06df00d94946a7304b7d60b977/src/encoding/mod.rs#L39-L68

Here's an example of how this can be used to encode and decode an arbitrary `Dog` data type:

```rust
#[derive(serde::Serialize, serde::Deserialize)]
struct Dog {
    name: String,
    age: u8,
    good_boy: bool,
}

impl encoding::Value for Dog {}

let pluto = Dog { name: "Pluto".into(), age: 4, good_boy: true };
let bytes = pluto.encode();
println!("{bytes:02x?}");

// Outputs [05, 50, 6c, 75, 74, 6f, 04, 01]:
//
// * Length of string "Pluto": 05.
// * String "Pluto": 50 6c 75 74 6f.
// * Age 4: 04.
// * Good boy: 01 (true).

let pluto = Dog::decode(&bytes)?; // gives us back Pluto
```

## `Keycode` Key Encoding

Unlike values, keys can't just use any binary encoding like Bincode. As mentioned in the storage
section, the storage engine sorts data by key to enable range scans. The key encoding must therefore
preserve the [lexicographical order](https://en.wikipedia.org/wiki/Lexicographic_order) of the
encoded values: the binary byte slices must sort in the same order as the original values.

As an example of why we can't just use Bincode, consider the strings "house" and "key". These should
be sorted in alphabetical order: "house" before "key". However, Bincode encodes strings prefixed by
their length, so "key" would be sorted before "house" in binary form:

```
03 6b 65 79        ← 3 bytes: key
05 68 6f 75 73 65  ← 5 bytes: house
```

For similar reasons, we can't just encode numbers in their native binary form: the
[little-endian](https://en.wikipedia.org/wiki/Endianness) representation will order very large
numbers before small numbers, and the [sign bit](https://en.wikipedia.org/wiki/Sign_bit) will order
positive numbers before negative numbers. This would violate the ordering of natural numbers.

We also have to be careful with value sequences, which should be ordered element-wise. For example,
the pair ("a", "xyz") should be ordered before ("ab", "cd"), so we can't just encode the strings
one after the other like "axyz" and "abcd" since that would sort ("ab", "cd") first.

toyDB provides an order-preserving encoding called "Keycode" in the [`encoding::keycode`](https://github.com/erikgrinaker/toydb/blob/213e5c02b09f1a3cac6a8bbd0a81773462f367f5/src/encoding/keycode.rs)
module. Like Bincode, the Keycode encoding is not self-describing: the binary data does not say what
the data type is, the caller must provide a type to decode into. It only supports a handful of
primitive data types, and only needs to order values of the same type.

Keycode is implemented as a [Serde](https://serde.rs) (de)serializer, which requires a lot of
boilerplate code to satisfy the trait, but we'll just focus on the actual encoding. The encoding
scheme is as follows:

* `bool`: `00` for `false` and `01` for `true`.

    https://github.com/erikgrinaker/toydb/blob/2027641004989355c2162bbd9eeefcc991d6b29b/src/encoding/keycode.rs#L113-L117

* `u64`: the [big-endian](https://en.wikipedia.org/wiki/Endianness) binary encoding.

    https://github.com/erikgrinaker/toydb/blob/2027641004989355c2162bbd9eeefcc991d6b29b/src/encoding/keycode.rs#L157-L161

* `i64`: the [big-endian](https://en.wikipedia.org/wiki/Endianness) binary encoding, but with the
   sign bit flipped to order negative numbers before positive ones.

    https://github.com/erikgrinaker/toydb/blob/2027641004989355c2162bbd9eeefcc991d6b29b/src/encoding/keycode.rs#L131-L143

* `f64`: the [big-endian IEEE 754](https://en.wikipedia.org/wiki/Double-precision_floating-point_format)
  binary encoding, but with the sign bit flipped, and all bits flipped for negative numbers, to
  order negative numbers correctly.

    https://github.com/erikgrinaker/toydb/blob/2027641004989355c2162bbd9eeefcc991d6b29b/src/encoding/keycode.rs#L167-L179

* `Vec<u8>`: terminated by `00 00`, with `00` escaped as `00 ff` to disambiguate it.

    https://github.com/erikgrinaker/toydb/blob/2027641004989355c2162bbd9eeefcc991d6b29b/src/encoding/keycode.rs#L190-L205

* `String`: like `Vec<u8>`.

    https://github.com/erikgrinaker/toydb/blob/2027641004989355c2162bbd9eeefcc991d6b29b/src/encoding/keycode.rs#L185-L188

* `Vec<T>`, `[T]`, `(T,)`: the concatenation of the inner values.

    https://github.com/erikgrinaker/toydb/blob/2027641004989355c2162bbd9eeefcc991d6b29b/src/encoding/keycode.rs#L295-L307

* `enum`: the variant's numerical index as a `u8`, then the inner values (if any).

    https://github.com/erikgrinaker/toydb/blob/2027641004989355c2162bbd9eeefcc991d6b29b/src/encoding/keycode.rs#L223-L227

Like `encoding::Value`, there is also an `encoding::Key` helper trait:

https://github.com/erikgrinaker/toydb/blob/b57ae6502e93ea06df00d94946a7304b7d60b977/src/encoding/mod.rs#L20-L37

Different kinds of keys are usually represented as enums. For example, if we wanted to store cars
and video games, we could use:

```rust
#[derive(serde::Serialize, serde::Deserialize)]
enum Key {
    Car(String, String, u64),    // make, model, year
    Game(String, u64, Platform), // name, year, platform
}

#[derive(serde::Serialize, serde::Deserialize)]
enum Platform {
    PC,
    PS5,
    Switch,
    Xbox,
}

impl encoding::Key for Key {}

let returnal = Key::Game("Returnal".into(), 2021, Platform::PS5);
let bytes = returnal.encode();
println!("{bytes:02x?}");

// Outputs [01, 52, 65, 74, 75, 72, 6e, 61, 6c, 00, 00, 00, 00, 00, 00, 00, 00, 07, e5, 01].
//
// * Key::Game: 01
// * Returnal: 52 65 74 75 72 6e 61 6c 00 00
// * 2021: 00 00 00 00 00 00 07 e5
// * Platform::PS5: 01

let returnal = Key::decode(&bytes)?;
```

Because the keys are sorted in element-wise order, this would allow us to e.g. perform a prefix
scan to fetch all platforms which Returnal (2021) was released on, or perform a range scan to fetch 
all models of Nissan Altima released between 2010 and 2015.

---

<p align="center">
← <a href="storage.md">Storage Engine</a> &nbsp; | &nbsp; <a href="mvcc.md">MVCC Transactions</a> →
</p>

================================================
FILE: docs/architecture/index.md
================================================
# toyDB Architecture

toyDB is a simple distributed SQL database, intended to illustrate how such systems are built. The
overall structure is similar to real-world distributed databases, but the design and implementation
has been kept as simple as possible for understandability. Performance and scalability are explicit
non-goals, as these are major sources of complexity in real-world systems.

This guide will walk through toyDB's architecture and code from the bottom up, with plenty of links
to the actual source code.

> ℹ️ View on GitHub with a desktop browser for inline code listings.

* [Overview](overview.md)
  * [Properties](overview.md#properties)
  * [Components](overview.md#components)
* [Storage Engine](storage.md)
  * [`Memory` Storage Engine](storage.md#memory-storage-engine)
  * [`BitCask` Storage Engine](storage.md#bitcask-storage-engine)
* [Key/Value Encoding](encoding.md)
  * [`Bincode` Value Encoding](encoding.md#bincode-value-encoding)
  * [`Keycode` Key Encoding](encoding.md#keycode-key-encoding)
* [MVCC Transactions](mvcc.md)
* [Raft Consensus](raft.md)
  * [Log Storage](raft.md#log-storage)
  * [State Machine Interface](raft.md#state-machine-interface)
  * [Node Roles](raft.md#node-roles)
  * [Node Interface and Communication](raft.md#node-interface-and-communication)
  * [Leader Election and Terms](raft.md#leader-election-and-terms)
  * [Client Requests and Forwarding](raft.md#client-requests-and-forwarding)
  * [Write Replication and Application](raft.md#write-replication-and-application)
  * [Read Processing](raft.md#read-processing)
* [SQL Engine](sql.md)
  * [Data Model](sql-data.md)
    * [Data Types](sql-data.md#data-types)
    * [Schemas](sql-data.md#schemas)
    * [Expressions](sql-data.md#expressions)
  * [Storage](sql-storage.md)
    * [Key/Value Representation](sql-storage.md#keyvalue-representation)
    * [Schema Catalog](sql-storage.md#schema-catalog)
    * [Row Storage and Transactions](sql-storage.md#row-storage-and-transactions)
  * [Raft Replication](sql-raft.md)
  * [Parsing](sql-parser.md)
    * [Lexer](sql-parser.md#lexer)
    * [Abstract Syntax Tree](sql-parser.md#abstract-syntax-tree)
    * [Parser](sql-parser.md#parser)
  * [Planning](sql-planner.md)
    * [Execution Plan](sql-planner.md#execution-plan)
    * [Scope and Name Resolution](sql-planner.md#scope-and-name-resolution)
    * [Planner](sql-planner.md#planner)
  * [Optimization](sql-optimizer.md)
    * [Constant Folding](sql-optimizer.md#constant-folding)
    * [Filter Pushdown](sql-optimizer.md#filter-pushdown)
    * [Index Lookups](sql-optimizer.md#index-lookups)
    * [Hash Join](sql-optimizer.md#hash-join)
    * [Short Circuiting](sql-optimizer.md#short-circuiting)
  * [Execution](sql-execution.md)
    * [Plan Executor](sql-execution.md#plan-executor)
    * [Session Management](sql-execution.md#session-management)
* [Server](server.md)
  * [Raft Routing](server.md#raft-routing)
  * [SQL Service](server.md#sql-service)
  * [`toydb` Binary](server.md#toydb-binary)
* [Client](client.md)
  * [Client Library](client.md#client-library)
  * [`toysql` Binary](client.md#toysql-binary)

---

<p align="center">
<a href="overview.md">Overview</a> →
</p>

================================================
FILE: docs/architecture/mvcc.md
================================================
# MVCC Transactions

Transactions are groups of reads and writes (e.g. to different keys) that are submitted together as
a single unit. For example, a bank transaction that transfers $100 from account A to account B might
consist of this group of reads and writes:

```
a = get(A)
b = get(B)
if a < 100:
    error("insufficient balance")
set(A, a - 100)
set(B, b + 100)
```

toyDB provides [ACID](https://en.wikipedia.org/wiki/ACID) transactions, a set of very strong
guarantees:

* **Atomicity:** all of the writes take effect as an single, atomic unit, at the same instant, when
  they are _committed_. Other users will never see some of the writes without the others.

* **Consistency:** database constraints are never violated (e.g. referential integrity or uniqueness
  contraints). We'll see how this is implemented later in the SQL execution layer.

* **Isolation:** users should appear to have the entire database to themselves, unaffected by other
  simultaneous users. Two transactions may conflict, in which case one has to retry, but if a
  transaction succeeds then the user knows with certainty that the operations were executed without
  interference by anyone else. This eliminates the risk of [race conditions](https://en.wikipedia.org/wiki/Race_condition).

* **Durability:** committed writes are never lost (even if the system crashes).

To illustrate how transactions work, here's an example MVCC test script where two concurrent users
modify a set of bank accounts (there's many [other test scripts](https://github.com/erikgrinaker/toydb/tree/aa14deb71f650249ce1cab8828ed7bcae2c9206e/src/storage/testscripts/mvcc)
there too):

https://github.com/erikgrinaker/toydb/blob/a73e24b7e77671b9f466e0146323cd69c3e27bdf/src/storage/testscripts/mvcc/bank#L1-L69

To provide these guarantees, toyDB uses a common technique called
[Multi-Version Concurrency Control](https://en.wikipedia.org/wiki/Multiversion_concurrency_control)
(MVCC). It is implemented at the key/value storage level, in the [`storage::mvcc`](https://github.com/erikgrinaker/toydb/blob/8f8eae0dcf70b1a0df2e853b1f6600e0c7075340/src/storage/mvcc.rs)
module. It uses a `storage::Engine` for actual data storage.

https://github.com/erikgrinaker/toydb/blob/8f8eae0dcf70b1a0df2e853b1f6600e0c7075340/src/storage/mvcc.rs#L220-L231

MVCC provides an [isolation level](https://en.wikipedia.org/wiki/Isolation_(database_systems)#Isolation_levels)
called [snapshot isolation](https://en.wikipedia.org/wiki/Snapshot_isolation): a transaction sees a
snapshot of the database as it was when the transaction began. Any later changes are invisible to
it.

It does this by storing historical versions of key/value pairs. The version number is simply a
number that's incremented for every new transaction:

https://github.com/erikgrinaker/toydb/blob/8f8eae0dcf70b1a0df2e853b1f6600e0c7075340/src/storage/mvcc.rs#L155-L158

Each transaction has its own unique version number. When it writes a key/value pair it appends its
version number to the key as `Key::Version(&[u8], Version)` (using the Keycode encoding we've seen
previously). If an old version of the key already exists, it will have a different version number
suffix and therefore be stored as a separate key in the storage engine. Deleted keys are versions
with a special tombstone value.

https://github.com/erikgrinaker/toydb/blob/8f8eae0dcf70b1a0df2e853b1f6600e0c7075340/src/storage/mvcc.rs#L183-L189

Here's a simple diagram of what a history of versions 1 to 5 of keys `a` to `d` might look like:

https://github.com/erikgrinaker/toydb/blob/8f8eae0dcf70b1a0df2e853b1f6600e0c7075340/src/storage/mvcc.rs#L11-L26

Additionally, we need to keep track of the currently ongoing (uncommitted) transaction versions,
known as the "active set".

With versioning and the active set, we can summarize the MVCC protocol with a few simple rules:

1. When a new transaction begins, it:
    * Obtains the next available version number.
    * Takes a snapshot of the active set (other uncommitted transactions).
    * Adds its version number to the active set.

2. When the transaction reads a key, it:
    * Returns the latest version of the key at or below its own version.
    * Ignores versions above its own version.
    * Ignores versions in its active set snapshot.

3. When the transaction writes a key, it:
    * Looks for a key version above its own version; errors if found.
    * Looks for a key version in its active set snapshot; errors if found.
    * Writes a key/value pair with its own version.

4. When the transaction commits, it:
    * Flushes all writes to disk.
    * Removes itself from the active set.

The magic happens when the transaction removes itself from the active set. This is a single, atomic
operation, and when it completes all of its writes immediately become visible to _new_ transactions.
However, ongoing transactions still won't see these writes, because the version is still in their
active set snapshot or at a later version (hence they are isolated from this transaction).

Furthermore, the transaction could see its own uncommitted writes even though noone else could, and
if any writes conflicted with another transaction it would error out and have to retry.

Not only that, this also allows us to do time-travel queries, where we can query the database as it
was at any time in the past: we simply pick a version number to read at.

There are a few more details that we've left out here: transaction rollbacks need to keep track of
the writes and undo them, and read-only queries can avoid allocating new version numbers. We also
don't garbage collect old version, for simplicity. See the module documentation for more details:

https://github.com/erikgrinaker/toydb/blob/8f8eae0dcf70b1a0df2e853b1f6600e0c7075340/src/storage/mvcc.rs#L1-L140

Let's walk through a simple example with code pointers to get a feel for how this is implemented.
Notice how we don't have to deal with any version numbers when we're using the MVCC API -- this is
an internal MVCC implementation detail.

```rust
// Open a BitCask database in the file "toy.db" with MVCC support.
let path = PathBuf::from("toy.db");
let db = MVCC::new(BitCask::new(path)?);

// Begin a new transaction.
let txn = db.begin()?;

// Read the key "foo", and decode the binary value as a u64 with bincode.
let bytes = txn.get(b"foo")?.expect("foo not found");
let mut value: u64 = bincode::deserialize(&bytes)?;

// Delete "foo".
txn.delete(b"foo")?;

// Add 1 to the value, and write it back to the key "bar".
value += 1;
let bytes = bincode::serialize(&value);
txn.set(b"bar", bytes)?;

// Commit the transaction.
txn.commit()?;
```

First, we begin a new transaction with `MVCC::begin()`, which calls through to
`Transaction::begin()`. This obtains a version number stored in `Key::NextVersion` and increments
it, then takes a snapshot of the active set in `Key::ActiveSet` and adds itself to it:

https://github.com/erikgrinaker/toydb/blob/8f8eae0dcf70b1a0df2e853b1f6600e0c7075340/src/storage/mvcc.rs#L368-L391

This returns a `Transaction` object which provides the main key/value API, with get/set/delete
methods. It keeps track of the main state of the transaction: it's version number and active set.

https://github.com/erikgrinaker/toydb/blob/8f8eae0dcf70b1a0df2e853b1f6600e0c7075340/src/storage/mvcc.rs#L294-L327

Next, we call `Transaction::get(b"foo")` to read the value of the key `foo`. This finds the latest
version that's visible to us (ignoring future versions and the active set). Recall that we store
multiple version of each key as `Key::Version(key, version)`. The Keycode encoding ensures that all
versions are stored in sorted order, so we can do a reverse range scan from `Key::Version(b"foo",
self.version)` to  `Key::Version(b"foo", 0)` and return the latest version that's visible to us:

https://github.com/erikgrinaker/toydb/blob/8f8eae0dcf70b1a0df2e853b1f6600e0c7075340/src/storage/mvcc.rs#L564-L581

We then call `Transaction::delete(b"foo")` and `Transaction::set(b"bar", value)`. Both of these just
call through to the same `Transaction::write_version()` method, but use `Some(value)` for a regular
key/value pair and `None` as a deletion tombstone:

https://github.com/erikgrinaker/toydb/blob/8f8eae0dcf70b1a0df2e853b1f6600e0c7075340/src/storage/mvcc.rs#L514-L522

To write a new version of a key, we first have to check for conflicts by seeing if there's a
version of the key that's invisible to us -- if it is, we conflicted with a concurrent transaction.
We use a range scan for this, like we did in `Transaction::get()`.

If there are no conflicts, we go on to write `Key::Version(b"foo", self.version)` and encode the
value as an `Option<value>` to accomodate the `None` tombstone marker. We also write a
`Key::TxnWrite(version, key)` to keep track of the keys we've written in case we have to roll back.

https://github.com/erikgrinaker/toydb/blob/8f8eae0dcf70b1a0df2e853b1f6600e0c7075340/src/storage/mvcc.rs#L524-L562

Finally, `Transaction::commit()` will make our transaction take effect and become visible. It does
this simply by removing itself from the active set in `Key::ActiveSet`, and also cleaning up its
`Key::TxnWrite` write tracking. As the comment says, we don't actually have to flush to durable
storage here, because the Raft log will provide durability for us -- we'll get back to this later.

https://github.com/erikgrinaker/toydb/blob/8f8eae0dcf70b1a0df2e853b1f6600e0c7075340/src/storage/mvcc.rs#L466-L485

---

<p align="center">
← <a href="encoding.md">Key/Value Encoding</a> &nbsp; | &nbsp; <a href="raft.md">Raft Consensus</a> →
</p>

================================================
FILE: docs/architecture/overview.md
================================================
# Overview

toyDB consists of a cluster of nodes that execute [SQL](https://en.wikipedia.org/wiki/SQL)
transactions against a replicated state machine. Clients can connect to any node in the cluster and
submit SQL statements. The cluster remains available if a minority of nodes crash or disconnect,
but halts if a majority of nodes fail.

## Properties

* **Distributed:** runs across a cluster of nodes.
* **Highly available:** tolerates failure of a minority of nodes.
* **SQL compliant:** correctly supports most common [SQL](https://en.wikipedia.org/wiki/SQL)
  features.
* **Strongly consistent:** committed writes are immediately visible to all readers ([linearizability](https://en.wikipedia.org/wiki/Linearizability)).
* **Transactional:** provides [ACID](https://en.wikipedia.org/wiki/ACID) transactions
  * **Atomic:** groups of writes are applied as a single, atomic unit.
  * **Consistent:** database constraints and referential integrity are always enforced.
  * **Isolated:** concurrent transactions don't affect each other ([snapshot isolation](https://en.wikipedia.org/wiki/Snapshot_isolation)).
  * **Durable:** committed writes are never lost.

For simplicity, toyDB is:

* **Not scalable:** every node stores the full dataset, and reads/writes execute on one node.
* **Not reliable:** only handles crash failures, not e.g. partial network partitions or node stalls.
* **Not performant:** data processing is slow, and not optimized at all.
* **Not efficient:** loads entire tables into memory, no compression or garbage collection, etc.
* **Not full-featured:** only basic SQL functionality is implemented.
* **Not backwards compatible:** changes to data formats and protocols will break databases.
* **Not flexible:** nodes can't be added or removed while running, and take a long time to join.
* **Not secure:** there is no authentication, authorization, nor encryption.

## Components

Internally, toyDB is made up of a few main components:

* **Storage engine:** stores data on disk and manages transactions.
* **Raft consensus engine:** replicates data and coordinates cluster nodes.
* **SQL engine:** organizes SQL data, manages SQL sessions, and executes SQL statements.
* **Server:** manages network communication, both with SQL clients and Raft nodes.
* **Client:** provides a SQL user interface and communicates with the server.

This diagram illustrates the internal structure of a single toyDB node:

![toyDB architecture](./images/architecture.svg)

We will go through each of these components from the bottom up.

---

<p align="center">
← <a href="index.md">toyDB Architecture</a> &nbsp; | &nbsp; <a href="storage.md">Storage Engine</a> →
</p>

================================================
FILE: docs/architecture/raft.md
================================================
# Raft Consensus

[Raft](https://raft.github.io) is a distributed consensus protocol which replicates data across a
cluster of nodes in a consistent and durable manner. It is described in the very readable
[Raft paper](https://raft.github.io/raft.pdf), and in the more comprehensive
[Raft thesis](https://web.stanford.edu/~ouster/cgi-bin/papers/OngaroPhD.pdf).

The toyDB Raft implementation is in the [`raft`](https://github.com/erikgrinaker/toydb/tree/213e5c02b09f1a3cac6a8bbd0a81773462f367f5/src/raft)
module, and is described in the module documentation:

https://github.com/erikgrinaker/toydb/blob/d96c6dd5ae7c0af55ee609760dcd958c289a44f2/src/raft/mod.rs#L1-L240

Raft is fundamentally the same protocol as [Paxos](https://lamport.azurewebsites.net/pubs/paxos-simple.pdf)
and [Viewstamped Replication](https://pmg.csail.mit.edu/papers/vr-revisited.pdf), but an
opinionated variant designed to be simple, understandable, and practical. It is widely used in the
industry: [CockroachDB](https://www.cockroachlabs.com), [TiDB](https://www.pingcap.com),
[etcd](https://etcd.io), [Consul](https://developer.hashicorp.com/consul), and many others.

Briefly, Raft elects a leader node which coordinates writes and replicates them to followers. Once a
majority (>50%) of nodes have acknowledged a write, it is considered durably committed. It is common
for the leader to also serve reads, since it always has the most recent data and is thus strongly
consistent.

A cluster must have a majority of nodes (known as a [quorum](https://en.wikipedia.org/wiki/Quorum_(distributed_computing)))
live and connected to remain available, otherwise it will not commit writes in order to guarantee
data consistency and durability. Since there can only be one majority in the cluster, this prevents
a [split brain](https://en.wikipedia.org/wiki/Split-brain_(computing)) scenario where two active
leaders can exist concurrently (e.g. during a [network partition](https://en.wikipedia.org/wiki/Network_partition))
and store conflicting values.

The Raft leader appends writes to an ordered command log, which is then replicated to followers.
Once a majority has replicated the log up to a given entry, that log prefix is committed and then
applied to a state machine. This ensures that all nodes will apply the same commands in the same
order and eventually reach the same state (assuming the commands are deterministic). Raft itself
doesn't care what the state machine and commands are, but in toyDB's case it's SQL tables and rows
stored in an MVCC key/value store.

This diagram from the Raft paper illustrates how a Raft node receives a command from a client (1),
adds it to its log and reaches consensus with other nodes (2), then applies it to its state machine
(3) before returning a result to the client (4):

<img src="./images/raft.svg" alt="Raft node" width="400" style="display: block; margin: 30px auto;">

You may notice that Raft is not very scalable, since all reads and writes go via the leader node,
and every node must store the entire dataset. Raft solves replication and availability, but not
scalability. Real-world systems typically provide horizontal scalability by splitting a large
dataset across many separate Raft clusters (i.e. sharding), but this is out of scope for toyDB.

For simplicitly, toyDB implements the bare minimum of Raft, and omits optimizations described in
the paper such as state snapshots, log truncation, leader leases, and more. The implementation is
in the [`raft`](https://github.com/erikgrinaker/toydb/blob/d96c6dd5ae7c0af55ee609760dcd958c289a44f2/src/raft/mod.rs)
module, and we'll walk through the main components next.

There is a comprehensive set of Raft test scripts in [`src/raft/testscripts/node`](https://github.com/erikgrinaker/toydb/blob/386153f5c00cb1a88b1ac8489ae132674d96f68a/src/raft/testscripts/node),
which illustrate the protocol in a wide variety of scenarios.

## Log Storage

Raft replicates an ordered command log consisting of `raft::Entry`:

https://github.com/erikgrinaker/toydb/blob/90a6cae47ac20481ac4eb2f20eea50f02e6c2b33/src/raft/log.rs#L10-L28

`index` specifies the position in the log, and `command` contains the binary command to apply to the
state machine. The `term` identifies the leadership term in which the command was proposed: a new
term begins when a new leader election is held (we'll get back to this later).

Entries are appended to the log by the leader and replicated to followers. Once acknowledged by a
quorum, the log up to that index is committed and will never change. Entries that are not yet
committed may be replaced or removed if the leader changes.

The Raft log enforces the following invariants:

https://github.com/erikgrinaker/toydb/blob/8782c2b05f11333c1586ef248f1a13dc1c8dec4a/src/raft/log.rs#L80-L91

`raft::Log` implements a Raft log, and stores log entries in a `storage::Engine` key/value store:

https://github.com/erikgrinaker/toydb/blob/8782c2b05f11333c1586ef248f1a13dc1c8dec4a/src/raft/log.rs#L43-L116

It also stores some additional metadata that we'll need later: the current term, vote, and commit
index. These are stored as separate keys:

https://github.com/erikgrinaker/toydb/blob/8782c2b05f11333c1586ef248f1a13dc1c8dec4a/src/raft/log.rs#L30-L39

Individual entries are appended to the log via `Log::append`, typically when the leader wants to
replicate a new write:

https://github.com/erikgrinaker/toydb/blob/8782c2b05f11333c1586ef248f1a13dc1c8dec4a/src/raft/log.rs#L190-L203

Entries can also be appended in bulk via `Log::splice`, typically when entries are replicated to
followers. This also allows replacing existing uncommitted entries, e.g. after a leader change:

https://github.com/erikgrinaker/toydb/blob/8782c2b05f11333c1586ef248f1a13dc1c8dec4a/src/raft/log.rs#L269-L343

Committed entries are marked by `Log::commit`, making them immutable and eligible for state machine
application:

https://github.com/erikgrinaker/toydb/blob/8782c2b05f11333c1586ef248f1a13dc1c8dec4a/src/raft/log.rs#L205-L222

The log also has methods to read entries from the log, either individually as `Log::get` or by
iterating over a range with `Log::scan`:

https://github.com/erikgrinaker/toydb/blob/8782c2b05f11333c1586ef248f1a13dc1c8dec4a/src/raft/log.rs#L224-L267

## State Machine Interface

Raft doesn't know or care what the log commands are, nor what the state machine does with them. It
simply takes `raft::Entry` from the log and gives them to the state machine.

The Raft state machine is represented by the `raft::State` trait. Raft will ask about the last
applied entry via `State::get_applied_index`, and feed it newly committed entries via
`State::apply`. It also allows reads via `State::read`, but we'll get back to that later.

https://github.com/erikgrinaker/toydb/blob/8782c2b05f11333c1586ef248f1a13dc1c8dec4a/src/raft/state.rs#L4-L51

The state machine does not have to flush its state to durable storage after each transition; on node
crashes, the state machine is allowed to regress, and will be caught up by replaying the unapplied
log entries. It is also possible to implement a purely in-memory state machine (and in fact, toyDB
allows running the state machine with a `Memory` storage engine).

The state machine must take care to be deterministic: the same commands applied in the same order
must result in the same state across all nodes. This means that a command can't e.g. read the
current time or generate a random number -- these values must be included in the command. It also
means that non-deterministic errors, such as an IO error, must halt command application (in toyDB's
case, we just panic and crash the node).

In toyDB's, the state machine is an MVCC key/value store that stores SQL tables and rows, as we'll
see in the SQL Raft replication section.

## Node Roles

In Raft, a node can have one out of three roles:

* **Leader:** replicates writes to followers and serves client requests.
* **Follower:** replicates writes from a leader.
* **Candidate:** campaigns for leadership.

The Raft paper summarizes these roles and transitions in the following diagram (we'll discuss
leader election in detail below):

<img src="./images/raft-states.svg" alt="Raft states" width="400" style="display: block; margin: 30px auto;">

In toyDB, a node is represented by the `raft::Node` enum, with variants for each state:

https://github.com/erikgrinaker/toydb/blob/8782c2b05f11333c1586ef248f1a13dc1c8dec4a/src/raft/node.rs#L47-L66

This wraps the `raft::RawNode<Role>` type which contains the inner node state. It is generic over
the role, and uses the [typestate pattern](http://cliffle.com/blog/rust-typestate/) to provide
methods and transitions depending on the node's current role. This enforces state transitions and
invariants at compile time via Rust's type system -- for example, only `RawNode<Candidate>` has an
`into_leader()` method, since only candidates can transition to leaders (when they win an election).

https://github.com/erikgrinaker/toydb/blob/8782c2b05f11333c1586ef248f1a13dc1c8dec4a/src/raft/node.rs#L156-L177

The `RawNode::role` field contains role-specific state as structs implementing the `Role` marker
trait:

https://github.com/erikgrinaker/toydb/blob/8782c2b05f11333c1586ef248f1a13dc1c8dec4a/src/raft/node.rs#L661-L680

https://github.com/erikgrinaker/toydb/blob/8782c2b05f11333c1586ef248f1a13dc1c8dec4a/src/raft/node.rs#L242-L255

https://github.com/erikgrinaker/toydb/blob/8782c2b05f11333c1586ef248f1a13dc1c8dec4a/src/raft/node.rs#L523-L531

We'll see what the various fields are used for in the following sections.

## Node Interface and Communication

The `raft::Node` enum has two main methods that drive the node: `tick()` and `step()`. These consume
the current node and return a new node, possibly with a different role.

`tick()` advances time by a logical tick. This is used to measure the passage of time, e.g. to
trigger election timeouts or periodic leader heartbeats. toyDB uses a tick interval of 100
milliseconds (see `raft::TICK_INTERVAL`), and will call `tick()` on the node at this rate.

https://github.com/erikgrinaker/toydb/blob/8782c2b05f11333c1586ef248f1a13dc1c8dec4a/src/raft/node.rs#L125-L132

`step()` processes an inbound message from a different node or client:

https://github.com/erikgrinaker/toydb/blob/8782c2b05f11333c1586ef248f1a13dc1c8dec4a/src/raft/node.rs#L107-L123

Outbound messages to other nodes are sent via the `RawNode::tx` channel:

https://github.com/erikgrinaker/toydb/blob/8782c2b05f11333c1586ef248f1a13dc1c8dec4a/src/raft/node.rs#L171-L172

Nodes are identified by a unique node ID, which is given at node startup:

https://github.com/erikgrinaker/toydb/blob/90a6cae47ac20481ac4eb2f20eea50f02e6c2b33/src/raft/node.rs#L17-L18

Messages are wrapped in a `raft::Envelope` specifying the sender and recipient:

https://github.com/erikgrinaker/toydb/blob/d96c6dd5ae7c0af55ee609760dcd958c289a44f2/src/raft/message.rs#L10-L21

The envelope contains a `raft::Message`, an enum which encodes the Raft message protocol. We won't
dwell on the specific message types here, but discuss them invididually in the following sections.
Raft does not require reliable message delivery, so messages may be dropped or reordered at any
time, although toyDB's use of TCP provides stronger delivery guarantees.

https://github.com/erikgrinaker/toydb/blob/d96c6dd5ae7c0af55ee609760dcd958c289a44f2/src/raft/message.rs#L25-L152

This is an entirely synchronous and deterministic model -- the same sequence of calls on a given
node in a given initial state will always produce the same result. This is very convenient for
testing and understandability. We will see in the server section how toyDB drives the node on a
separate thread, provides a network transport for messages, and ticks it at regular intervals.

## Leader Election and Terms

In the steady state, Raft simply has a leader which replicates writes to followers. But to reach
this steady state, we must elect a leader, which is where much of the subtle complexity lies. See
the Raft paper for comprehensive details and safety arguments, we'll summarize it briefly below.

Raft divides time into _terms_. The term is a monotonically increasing number starting at 1. There
can only be one leader in a term (or none if an election fails), and the term can never regress.
Replicated commands belong to the specific term under which they were proposed.

https://github.com/erikgrinaker/toydb/blob/8782c2b05f11333c1586ef248f1a13dc1c8dec4a/src/raft/node.rs#L20-L21

Let's walk through an election, where we bootstrap a brand new, empty toyDB cluster with 3 nodes.

Nodes are initialized by calling `Node::new()`. Since this is a new cluster, they are given an empty
`raft::Log` and `raft::State`, at term 0. Nodes start with role `Follower`, but without a leader.

https://github.com/erikgrinaker/toydb/blob/8782c2b05f11333c1586ef248f1a13dc1c8dec4a/src/raft/node.rs#L68-L87

https://github.com/erikgrinaker/toydb/blob/8782c2b05f11333c1586ef248f1a13dc1c8dec4a/src/raft/node.rs#L266-L290

Now, nothing really happens for a while, as the nodes are waiting to maybe hear from an existing
leader (there is none). Every 100 ms we call `tick()`, until we reach `election_timeout`:

https://github.com/erikgrinaker/toydb/blob/8782c2b05f11333c1586ef248f1a13dc1c8dec4a/src/raft/node.rs#L489-L497

Notice how `new()` set `election_timeout` to a random value (in the range `ELECTION_TIMEOUT_RANGE`
of 10-20 ticks, i.e. 1-2 seconds). If all nodes had the same timeout, they would likely campaign for
leadership simultaneously, resulting in an election tie -- Raft uses randomized election timeouts to
avoid such ties.

Once a node reaches `election_timeout` it transitions to role `Candidate`:

https://github.com/erikgrinaker/toydb/blob/8782c2b05f11333c1586ef248f1a13dc1c8dec4a/src/raft/node.rs#L292-L312

When it becomes a candidate it campaigns for leadership by increasing its term to 1, voting for
itself, and sending `Message::Campaign` to all peers asking for their vote:

https://github.com/erikgrinaker/toydb/blob/8782c2b05f11333c1586ef248f1a13dc1c8dec4a/src/raft/node.rs#L647-L658

In Raft, the term can't regress, and a node can only cast a single vote in each term (even across
restarts), so both of these are persisted to disk via `Log::set_term_vote()`.

When the two other nodes (still in state `Follower`) receive the `Message::Campaign` asking for a
vote, they will first increase their term to 1 (since this is a newer term than their local term 0):

https://github.com/erikgrinaker/toydb/blob/8782c2b05f11333c1586ef248f1a13dc1c8dec4a/src/raft/node.rs#L347-L351

They then grant the vote since they haven't yet voted for anyone else in term 1. They persist the
vote to disk via `Log::set_term_vote()` and return a `Message::CampaignResponse { vote: true }` to
the candidate:

https://github.com/erikgrinaker/toydb/blob/8782c2b05f11333c1586ef248f1a13dc1c8dec4a/src/raft/node.rs#L424-L449

They also check that the candidate's log is at least as long as theirs, which is trivially true in
this case since the log is empty. This is necessary to ensure that a leader has all committed
entries (see section 5.4.1 in the Raft paper).

When the candidate receives the `Message::CampaignResponse` it records the vote from each node. Once
it has a quorum (in this case 2 out of 3 votes including its own vote) it becomes leader in term 1:

https://github.com/erikgrinaker/toydb/blob/8782c2b05f11333c1586ef248f1a13dc1c8dec4a/src/raft/node.rs#L599-L606

When it becomes leader, it sends a `Message::Heartbeat` to all peers to tell them it is now the
leader in term 1. It also appends an empty entry to its log and replicates it, but we will ignore
this for now (see section 5.4.2 in the Raft paper for why).

https://github.com/erikgrinaker/toydb/blob/8782c2b05f11333c1586ef248f1a13dc1c8dec4a/src/raft/node.rs#L563-L583

When the other nodes receive the heartbeat, they become followers of the new leader in its term:

https://github.com/erikgrinaker/toydb/blob/8782c2b05f11333c1586ef248f1a13dc1c8dec4a/src/raft/node.rs#L359-L384

From now on, the leader will send periodic `Message::Heartbeat` every 4 ticks (see
`HEARTBEAT_INTERVAL`) to assert its leadership:

https://github.com/erikgrinaker/toydb/blob/8782c2b05f11333c1586ef248f1a13dc1c8dec4a/src/raft/node.rs#L945-L953

The followers record when they last received any message from the leader (including heartbeats), and
will hold a new election if they haven't heard from the leader in an election timeout (e.g. due to a
leader crash or network partition):

https://github.com/erikgrinaker/toydb/blob/8782c2b05f11333c1586ef248f1a13dc1c8dec4a/src/raft/node.rs#L353-L356

https://github.com/erikgrinaker/toydb/blob/8782c2b05f11333c1586ef248f1a13dc1c8dec4a/src/raft/node.rs#L489-L497

This entire process is illustrated in the test script [`election`](https://github.com/erikgrinaker/toydb/blob/cb234a0b776484608118fd9382869ee5bc30d4f0/src/raft/testscripts/node/election),
along with several other test scripts that show e.g. [election ties](https://github.com/erikgrinaker/toydb/blob/cb234a0b776484608118fd9382869ee5bc30d4f0/src/raft/testscripts/node/election_tie),
[contested elections](https://github.com/erikgrinaker/toydb/blob/cb234a0b776484608118fd9382869ee5bc30d4f0/src/raft/testscripts/node/election_contested),
and other scenarios:

https://github.com/erikgrinaker/toydb/blob/cb234a0b776484608118fd9382869ee5bc30d4f0/src/raft/testscripts/node/election#L1-L72

## Client Requests and Forwarding

Once a leader has been elected, we can submit read and write requests to it. This is done by
stepping a `Message::ClientRequest` into the node using the local node ID, with a unique request ID
(toyDB uses UUIDv4), and waiting for an outbound response message with the same ID:

https://github.com/erikgrinaker/toydb/blob/d96c6dd5ae7c0af55ee609760dcd958c289a44f2/src/raft/message.rs#L134-L151

https://github.com/erikgrinaker/toydb/blob/d96c6dd5ae7c0af55ee609760dcd958c289a44f2/src/raft/message.rs#L164-L188

The requests and responses themselves are arbitrary binary data which is interpreted by the state
machine. For our purposes here, let's pretend the requests are:

* `Request::Write("key=value")` → `Response::Write("ok")`
* `Request::Read("key")` → `Response::Read("value")`

The fundamental difference between read and write requests are that write requests are replicated
through Raft and executed on all nodes, while read requests are only executed on the leader without
being appended to the log. It would be possible to execute reads on followers too, for load
balancing, but these reads would be eventually consistent and thus violate linearizability, so toyDB
only executes reads on the leader.

If a request is submitted to a follower, it will be forwarded to the leader and the response
forwarded back to the client (distinguished by the sender/recipient node ID -- a local client always
uses the local node ID):

https://github.com/erikgrinaker/toydb/blob/8782c2b05f11333c1586ef248f1a13dc1c8dec4a/src/raft/node.rs#L451-L474

For simplicity, we cancel the request with `Error::Abort` if a request is submitted to a candidate,
and similarly if a follower changes its role to candidate or discovers a new leader. We could have
held on to these and redirected them to a new leader, but we keep it simple and ask the client to
retry.

We'll look at the actual read and write request processing next.

## Write Replication and Application

When the leader receives a write request, it proposes the command for replication to followers. It
keeps track of the in-flight write and its log entry index in `writes`, such that it can respond to
the client with the command result once the entry has been committed and applied.

https://github.com/erikgrinaker/toydb/blob/8782c2b05f11333c1586ef248f1a13dc1c8dec4a/src/raft/node.rs#L895-L904

To propose the command, the leader appends it to its log and sends a `Message::Append` to each
follower to replicate it to their logs:

https://github.com/erikgrinaker/toydb/blob/8782c2b05f11333c1586ef248f1a13dc1c8dec4a/src/raft/node.rs#L966-L980

In steady state, `Message::Append` just contains the single log entry we appended above:

https://github.com/erikgrinaker/toydb/blob/d96c6dd5ae7c0af55ee609760dcd958c289a44f2/src/raft/message.rs#L87-L108

However, sometimes followers may be lagging behind the leader (e.g. after a crash), or their log may
have diverged from the leader (e.g. unsuccessful proposals from a stale leader after a network
partition). To handle these cases, the leader tracks the replication progress of each follower as
`raft::Progress`:

https://github.com/erikgrinaker/toydb/blob/8782c2b05f11333c1586ef248f1a13dc1c8dec4a/src/raft/node.rs#L682-L698

We'll gloss over these cases here (see the Raft paper and the code in `raft::Progress` and
`maybe_send_append()` for details). In the steady state, where each entry is successfully appended
and replicated one at a time, `maybe_send_append()` will fall through to the bottom and send a
single entry:

https://github.com/erikgrinaker/toydb/blob/8782c2b05f11333c1586ef248f1a13dc1c8dec4a/src/raft/node.rs#L1068-L1128

The `Message::Append` contains the index/term of the entry immediately before the new entry as
`base_index` and `base_term`. If the follower's log also contains an entry with this index and term
then its log is guaranteed to match (be equal to) the leader's log up to this entry (see section 5.3
in the Raft paper). The follower can then append the new log entry and return a
`Message::AppendResponse` confirming that the entry was appended and that its log matches the
leader's log up to `match_index`:

https://github.com/erikgrinaker/toydb/blob/8782c2b05f11333c1586ef248f1a13dc1c8dec4a/src/raft/node.rs#L386-L410

When the leader receives the `Message::AppendResponse`, it will update its view of the follower's
`match_index`.

https://github.com/erikgrinaker/toydb/blob/8782c2b05f11333c1586ef248f1a13dc1c8dec4a/src/raft/node.rs#L844-L858

https://github.com/erikgrinaker/toydb/blob/8782c2b05f11333c1586ef248f1a13dc1c8dec4a/src/raft/node.rs#L701-L710

Once a quorum of nodes (in our case 2 out of 3 including the leader) have the entry in their log,
the leader can commit the entry and apply it to the state machine. It also looks up the in-flight
write request from `writes` and sends the command result back to the client as
`Message::ClientResponse`:

https://github.com/erikgrinaker/toydb/blob/8782c2b05f11333c1586ef248f1a13dc1c8dec4a/src/raft/node.rs#L982-L1032

The leader will also propagate the new commit index to followers via the next heartbeat, so that
they can also apply any pending log entries to their state machine. This isn't strictly necessary,
since reads are executed on the leader and nodes have to apply pending entries before becoming
leaders, but we do it anyway so that they don't fall too far behind on application.

https://github.com/erikgrinaker/toydb/blob/8782c2b05f11333c1586ef248f1a13dc1c8dec4a/src/raft/node.rs#L359-L384

This process is illustrated in the test scripts [`append`](https://github.com/erikgrinaker/toydb/blob/cb234a0b776484608118fd9382869ee5bc30d4f0/src/raft/testscripts/node/append) and [`heartbeat_commits_follower`](https://github.com/erikgrinaker/toydb/blob/cb234a0b776484608118fd9382869ee5bc30d4f0/src/raft/testscripts/node/heartbeat_commits_follower)
(along with many other scenarios):

https://github.com/erikgrinaker/toydb/blob/cb234a0b776484608118fd9382869ee5bc30d4f0/src/raft/testscripts/node/append#L1-L43

https://github.com/erikgrinaker/toydb/blob/cb234a0b776484608118fd9382869ee5bc30d4f0/src/raft/testscripts/node/heartbeat_commits_follower#L1-L50

## Read Processing

For linearizable (aka strongly consistent) reads, we must execute read requests on the leader, as
mentioned above. However, this is not sufficient: under e.g. a network partition, a node may think
it's still the leader while in fact a different leader has been elected elsewhere (in a later term)
and executed writes there.

To handle this case, the leader must confirm that it is still the leader for each read, by sending a
`Message::Read` to its followers containing a read sequence number. Only if a quorum confirms that
it is still the leader can the read be executed. This incurs an additional network roundtrip, which
is clearly inefficient, so real-world systems often use leader leases instead (see section 6.4.1 of
the Raft _thesis_, not the paper) -- but it's fine for toyDB.

https://github.com/erikgrinaker/toydb/blob/d96c6dd5ae7c0af55ee609760dcd958c289a44f2/src/raft/message.rs#L125-L132

When the leader receives the read request, it increments the read sequence number, stores the
pending read request in `reads`, and sends a `Message::Read` to all followers:

https://github.com/erikgrinaker/toydb/blob/8782c2b05f11333c1586ef248f1a13dc1c8dec4a/src/raft/node.rs#L906-L917

When the followers receive the `Message::Read`, they simply respond with a `Message::ReadResponse`
if it's from their current leader (messages from stale terms are ignored):

https://github.com/erikgrinaker/toydb/blob/8782c2b05f11333c1586ef248f1a13dc1c8dec4a/src/raft/node.rs#L342-L346

https://github.com/erikgrinaker/toydb/blob/8782c2b05f11333c1586ef248f1a13dc1c8dec4a/src/raft/node.rs#L412-L422

When the leader receives the `Message::ReadResponse` it records it in the peer's `Progress`, and
executes the read once a quorum have confirmed the sequence number:

https://github.com/erikgrinaker/toydb/blob/8782c2b05f11333c1586ef248f1a13dc1c8dec4a/src/raft/node.rs#L860-L866

https://github.com/erikgrinaker/toydb/blob/8782c2b05f11333c1586ef248f1a13dc1c8dec4a/src/raft/node.rs#L1034-L1066

We now have a Raft-managed state machine with replicated writes and linearizable reads.

---

<p align="center">
← <a href="mvcc.md">MVCC Transactions</a> &nbsp; | &nbsp; <a href="sql.md">SQL Engine</a> →
</p>

================================================
FILE: docs/architecture/server.md
================================================
# Server

Now that we've gone over the individual components, we'll tie them all together in the toyDB
server `toydb::Server`, located in the [`server`](https://github.com/erikgrinaker/toydb/blob/0839215770e31f1e693d5cccf20a68210deaaa3f/src/server.rs) module.

The server wraps an inner Raft node `raft::Node`, which manages the SQL state machine, and is
responsible for routing network traffic between the Raft node, its Raft peers, and SQL clients.

https://github.com/erikgrinaker/toydb/blob/0839215770e31f1e693d5cccf20a68210deaaa3f/src/server.rs#L27-L44

For network protocol, the server uses the Bincode encoding that we've discussed in the encoding
section, sent over a TCP connection. There's no need for any further framing, since Bincode knows
how many bytes to expect for each message depending on the type it's decoding into.

The server does not use [async Rust](https://rust-lang.github.io/async-book/) and e.g.
[Tokio](https://tokio.rs), instead opting for regular OS threads. Async Rust can significantly
complicate the code, which would obscure the main concepts, and any efficiency gains would be
entirely irrelevant for toyDB.

Internally in the server, messages are passed around between threads using
[Crossbeam channels](https://docs.rs/crossbeam/latest/crossbeam/channel/index.html).

The main server loop `Server::serve()` listens for inbound TCP connections on port 9705 for Raft
peers and 9605 for SQL clients, and spawns threads to process them. We'll look at Raft and SQL
services separately.

https://github.com/erikgrinaker/toydb/blob/0839215770e31f1e693d5cccf20a68210deaaa3f/src/server.rs#L66-L110

## Raft Routing

The heart of the server is the Raft processing thread `Server::raft_route()`. This is responsible
for periodically ticking the Raft node via `raft::Node::tick()`, stepping inbound messages from
Raft peers into the node via `raft::Node::step()`, and sending outbound messages to peers.

It also takes inbound Raft client requests from the `sql::engine::Raft` SQL engine, steps them
into the Raft node via `raft::Node::step()`, and passes responses back to the appropriate client
as the node emits them.

https://github.com/erikgrinaker/toydb/blob/0839215770e31f1e693d5cccf20a68210deaaa3f/src/server.rs#L169-L249

When the node starts up, it spawns a `Server::raft_send_peer()` thread for each Raft peer to send
outbound messages to them.

https://github.com/erikgrinaker/toydb/blob/0839215770e31f1e693d5cccf20a68210deaaa3f/src/server.rs#L84-L91

These threads continually attempt to connect to the peer via TCP, and then read any outbound
`raft::Envelope(raft::Message)` messages from `Server::raft_route()` via a channel and writes the
messages into the TCP connection using Bincode:

https://github.com/erikgrinaker/toydb/blob/0839215770e31f1e693d5cccf20a68210deaaa3f/src/server.rs#L146-L167

The server also continually listens for inbound Raft TCP connections from peers in
`Server::raft_accept()`:

https://github.com/erikgrinaker/toydb/blob/0839215770e31f1e693d5cccf20a68210deaaa3f/src/server.rs#L112-L134

When an inbound connection is accepted, a `Server::raft_receive_peer()` thread is spawned that reads
Bincode-encoded `raft::Envelope(raft::Message)` messages from the TCP connection and sends them to
`Server::raft_route()` via a channel.

https://github.com/erikgrinaker/toydb/blob/0839215770e31f1e693d5cccf20a68210deaaa3f/src/server.rs#L136-L144

The Raft cluster is now fully connected, and the nodes can all talk to each other.

## SQL Service

Next, let's serve some SQL clients. The SQL service uses the enums `toydb::Request` and
`toydb::Response` as a client protocol, again Bincode-encoded over TCP.

The primary request type is `Request::Execute` which executes a SQL statement against a
`sql::execution::Session` and returns a `sql::execution::StatementResult`, as we've seen previously.

https://github.com/erikgrinaker/toydb/blob/0839215770e31f1e693d5cccf20a68210deaaa3f/src/server.rs#L312-L337

The server sets up a `sql::engine::Raft` SQL engine, with a Crossbeam channel that's used to send
`raft::Request` Raft client requests to `Server::raft_route()` and onwards to the local
`raft::Node`.  It then spawns a `Server::sql_accept()` thread to listen for inbound SQL client
connections:

https://github.com/erikgrinaker/toydb/blob/0839215770e31f1e693d5cccf20a68210deaaa3f/src/server.rs#L104-L106

When a SQL client connection is accepted, a new client session `sql::execution::Session` is set up
for the client, and we spawn a `Server::sql_session()` thread to serve the connection:

https://github.com/erikgrinaker/toydb/blob/0839215770e31f1e693d5cccf20a68210deaaa3f/src/server.rs#L251-L272

These session threads continually read `Request` messages from the client, execute them against the
SQL session (and ultimately the Raft node), before sending a `Response` back to the client.

https://github.com/erikgrinaker/toydb/blob/0839215770e31f1e693d5cccf20a68210deaaa3f/src/server.rs#L274-L309

## `toydb` Binary

The `toydb` binary in `src/bin/toydb.rs` launches the server, and is a thin wrapper around
`toydb::Server`. It is a tiny [`clap`](https://docs.rs/clap/latest/clap/) command:

https://github.com/erikgrinaker/toydb/blob/8f8eae0dcf70b1a0df2e853b1f6600e0c7075340/src/bin/toydb.rs#L82-L89

It first parses a server configuration from the `toydb.yaml` file:

https://github.com/erikgrinaker/toydb/blob/8f8eae0dcf70b1a0df2e853b1f6600e0c7075340/src/bin/toydb.rs#L30-L59

Then it initializes the Raft log storage and SQL state machine:

https://github.com/erikgrinaker/toydb/blob/8f8eae0dcf70b1a0df2e853b1f6600e0c7075340/src/bin/toydb.rs#L105-L133

And finally it launches the `toydb::Server`:

https://github.com/erikgrinaker/toydb/blob/8f8eae0dcf70b1a0df2e853b1f6600e0c7075340/src/bin/toydb.rs#L135-L137

toyDB is now up and running!

---

<p align="center">
← <a href="sql-execution.md">SQL Execution</a> &nbsp; | &nbsp; <a href="client.md">Client</a> →
</p>

================================================
FILE: docs/architecture/sql-data.md
================================================
# SQL Data Model

The SQL data model represents user data in tables and rows. It is made up of data types and schemas,
in the [`sql::types`](https://github.com/erikgrinaker/toydb/tree/686d3971a253bfc9facc2ba1b0e716cff5c109fb/src/sql/types)
module.

## Data Types

toyDB supports four basic scalar data types as `sql::types::DataType`: booleans, integers, floats,
and strings.

https://github.com/erikgrinaker/toydb/blob/b2fe7b76ee634ca6ad31616becabfddb1c03d34b/src/sql/types/value.rs#L15-L27

Specific values are represented as `sql::types::Value`, using the corresponding Rust types. toyDB
also supports SQL `NULL` values, i.e. unknown values, following the rules of
[three-valued logic](https://en.wikipedia.org/wiki/Three-valued_logic).

https://github.com/erikgrinaker/toydb/blob/b2fe7b76ee634ca6ad31616becabfddb1c03d34b/src/sql/types/value.rs#L40-L64

The `Value` type provides basic formatting, conversion, and mathematical operations.

https://github.com/erikgrinaker/toydb/blob/686d3971a253bfc9facc2ba1b0e716cff5c109fb/src/sql/types/value.rs#L68-L79

https://github.com/erikgrinaker/toydb/blob/686d3971a253bfc9facc2ba1b0e716cff5c109fb/src/sql/types/value.rs#L164-L370

It also specifies comparison and ordering semantics, but these are subtly different from the SQL
semantics. For example, in Rust code `Value::Null == Value::Null` yields `true`, while in SQL
`NULL = NULL` yields `NULL`.  This mismatch is necessary for the Rust code to properly detect and
process `Null` values, and the desired SQL semantics are implemented during expression evaluation
which we'll cover below.

https://github.com/erikgrinaker/toydb/blob/b2fe7b76ee634ca6ad31616becabfddb1c03d34b/src/sql/types/value.rs#L91-L162

During execution, a row of values is represented as `sql::types::Row`, with multiple rows emitted
via `sql::types::Rows` row iterators:

https://github.com/erikgrinaker/toydb/blob/b2fe7b76ee634ca6ad31616becabfddb1c03d34b/src/sql/types/value.rs#L378-L388

## Schemas

toyDB schemas only support tables. There are no named indexes or constraints, and there's only a
single unnamed database.

Tables are represented by `sql::types::Table`:

https://github.com/erikgrinaker/toydb/blob/c2b0f7f1d6cbf6e2cdc09fc0aec7b050e840ec21/src/sql/types/schema.rs#L12-L25

A table is made up of a set of columns, represented by `sql::types::Column`. These support the data
types described above, along with unique constraints, foreign keys, and secondary indexes.

https://github.com/erikgrinaker/toydb/blob/c2b0f7f1d6cbf6e2cdc09fc0aec7b050e840ec21/src/sql/types/schema.rs#L29-L53

The table name serves as a unique identifier, and can't be changed later. In fact, tables schemas
are entirely static: they can only be created or dropped (there are no schema changes).

Table schemas are stored in the catalog, represented by the `sql::engine::Catalog` trait. We'll
revisit the implementation of this trait in the SQL storage section.

https://github.com/erikgrinaker/toydb/blob/0839215770e31f1e693d5cccf20a68210deaaa3f/src/sql/engine/engine.rs#L60-L79

Table schemas are validated when created via `Table::validate()`, which enforces invariants and
internal consistency. It uses the catalog to look up information about other tables, e.g. that
foreign key references point to a valid target column in a different table.

https://github.com/erikgrinaker/toydb/blob/c2b0f7f1d6cbf6e2cdc09fc0aec7b050e840ec21/src/sql/types/schema.rs#L98-L170

Table rows are validated via `Table::validate_row()`, which ensures that a `sql::types::Row`
conforms to the schema (e.g. that value types match the column data types). It uses a
`sql::engine::Transaction` to look up other rows in the database, e.g. to check for primary key
conflicts (we'll get back to this later).

https://github.com/erikgrinaker/toydb/blob/c2b0f7f1d6cbf6e2cdc09fc0aec7b050e840ec21/src/sql/types/schema.rs#L172-L236

## Expressions

During SQL execution, we also have to model _expressions_, such as `1 + 2 * 3`. These are
represented as values and operations on them, and can be nested as a tree to represent compound
operations.

https://github.com/erikgrinaker/toydb/blob/9419bcf6aededf0e20b4e7485e2a5fa3e975d79f/src/sql/types/expression.rs#L11-L64

For example, the expression `1 + 2 * 3` (taking [precedence](https://en.wikipedia.org/wiki/Order_of_operations)
into account) is represented as:

```rust
//    +
//   / \
//  1   *
//     /  \
//    2    3
Expression::Add(
    Expression::Constant(Value::Integer(1)),
    Expression::Multiply(
        Expression::Constant(Value::Integer(2)),
        Expression::Constant(Value::Integer(3)),
    ),
)
```

An `Expression` can contain two kinds of values: constant values as
`Expression::Constant(sql::types::Value)`, and dynamic values as `Expression::Column(usize)` column
references. The latter will fetch a `sql::types::Value` from a `sql::types::Row` at the specified
index during evaluation.

We'll see later how the SQL parser and planner transforms text expression like `1 + 2 * 3` into an
`Expression`, and how it resolves column names to row indexes like `price * 0.25` to
`row[3] * 0.25`.

Expressions are evaluated recursively via `Expression::evalute()`, given a `sql::types::Row` with
input values for column references, and return a final `sql::types::Value` result:

https://github.com/erikgrinaker/toydb/blob/9419bcf6aededf0e20b4e7485e2a5fa3e975d79f/src/sql/types/expression.rs#L73-L208

Many of the comparison operations like `==` are implemented explicitly here instead of using
`sql::types::Value` comparisons. This is where we implement the SQL semantics of special values like
`NULL`, such that `NULL = NULL` yields `NULL` instead of `TRUE`.

For mathematical operations however, we generally dispatch to these methods on `sql::types::Value`:

https://github.com/erikgrinaker/toydb/blob/b2fe7b76ee634ca6ad31616becabfddb1c03d34b/src/sql/types/value.rs#L185-L295

Expression parsing and evaluation is tested via test scripts in
[`sql/testscripts/expression`](https://github.com/erikgrinaker/toydb/tree/9419bcf6aededf0e20b4e7485e2a5fa3e975d79f/src/sql/testscripts/expressions).

---

<p align="center">
← <a href="sql.md">SQL Engine</a> &nbsp; | &nbsp; <a href="sql-storage.md">SQL Storage</a> →
</p>

================================================
FILE: docs/architecture/sql-execution.md
================================================
# SQL Execution

Now that the planner and optimizer have done all the hard work of figuring out how to execute a
query, it's time to actually execute it.

## Plan Executor

Plan execution is done by `sql::execution::Executor` in the
[`sql::execution`](https://github.com/erikgrinaker/toydb/tree/9419bcf6aededf0e20b4e7485e2a5fa3e975d79f/src/sql/execution)
module, using a `sql::engine::Transaction` to access the SQL storage engine.

https://github.com/erikgrinaker/toydb/blob/213e5c02b09f1a3cac6a8bbd0a81773462f367f5/src/sql/execution/executor.rs#L14-L49

The executor takes a `sql::planner::Plan` as input, and will return an `ExecutionResult` depending
on the statement type.

https://github.com/erikgrinaker/toydb/blob/686d3971a253bfc9facc2ba1b0e716cff5c109fb/src/sql/execution/executor.rs#L331-L339

When executing the plan, the executor will branch off depending on the statement type:

https://github.com/erikgrinaker/toydb/blob/686d3971a253bfc9facc2ba1b0e716cff5c109fb/src/sql/execution/executor.rs#L57-L101

We'll focus on `SELECT` queries here, which are the most interesting.

toyDB uses the iterator model (also known as the volcano model) for query execution. In the case of
a `SELECT` query, the result is a row iterator, and pulling from this iterator by calling `next()`
will drive the entire execution pipeline by recursively calling `next()` on the child nodes' row
iterators. This maps very naturally onto Rust's iterators, and we leverage these to construct the
execution pipeline as nested iterators.

Execution itself is fairly straightforward, since we're just doing exactly what the planner tells us
to do in the plan. We call `Executor::execute_node` recursively on each `sql::planner:Node`,
starting with the root node. Each node returns a result row iterator that the parent node can pull
its input rows from, process them, and output the resulting rows via its own row iterator (with the
root node's iterator being returned to the caller):

https://github.com/erikgrinaker/toydb/blob/686d3971a253bfc9facc2ba1b0e716cff5c109fb/src/sql/execution/executor.rs#L103-L104

`Executor::execute_node()` will simply look at the type of `Node`, recursively call
`Executor::execute_node()` on any child nodes, and then process the rows accordingly.

https://github.com/erikgrinaker/toydb/blob/686d3971a253bfc9facc2ba1b0e716cff5c109fb/src/sql/execution/executor.rs#L103-L212

We won't discuss every plan node in detail, but let's consider the movie plan we've looked at
previously:

```
Select
└─ Order: movies.released desc
   └─ Projection: movies.title, movies.released, genres.name as genre
      └─ HashJoin: inner on movies.genre_id = genres.id
         ├─ Scan: movies (released >= 2000)
         └─ Scan: genres
```

We'll recursively call `execute_node()` until we end up in the two `Scan` nodes. These simply
call through to the SQL engine (either using Raft or local disk) via `Transaction::scan()`, passing
in the scan predicate if any, and return the resulting row iterator:

https://github.com/erikgrinaker/toydb/blob/686d3971a253bfc9facc2ba1b0e716cff5c109fb/src/sql/execution/executor.rs#L203-L204

`HashJoin` will then join the output rows from the `movies` and `genres` iterators by using a
hash join. This builds an in-memory table for `genres` and then iterates over `movies`, joining
the rows:

https://github.com/erikgrinaker/toydb/blob/686d3971a253bfc9facc2ba1b0e716cff5c109fb/src/sql/execution/executor.rs#L128-L141

https://github.com/erikgrinaker/toydb/blob/889aef9f24c0fa4d58e314877fa17559a9f3d5d2/src/sql/execution/join.rs#L103-L183

The `Projection` node will simply evaluate the (trivial) column expressions using each joined
row as input:

https://github.com/erikgrinaker/toydb/blob/686d3971a253bfc9facc2ba1b0e716cff5c109fb/src/sql/execution/executor.rs#L179-L186

And finally the `Order` node will sort the results (which requires buffering them all in memory):

https://github.com/erikgrinaker/toydb/blob/686d3971a253bfc9facc2ba1b0e716cff5c109fb/src/sql/execution/executor.rs#L173-L177

https://github.com/erikgrinaker/toydb/blob/686d3971a253bfc9facc2ba1b0e716cff5c109fb/src/sql/execution/executor.rs#L298-L328

The output row iterator of `Order` is returned via `ExecutionResult::Select`, and the caller can now
go ahead and pull the resulting rows from it.

## Session Management

The entry point to the SQL engine is the `sql::execution::Session`, which represents a single user
session. It is obtained via `sql::engine::Engine::session()`.

https://github.com/erikgrinaker/toydb/blob/0839215770e31f1e693d5cccf20a68210deaaa3f/src/sql/execution/session.rs#L14-L21

The session takes a series of raw SQL statement strings as input and parses them:

https://github.com/erikgrinaker/toydb/blob/0839215770e31f1e693d5cccf20a68210deaaa3f/src/sql/execution/session.rs#L29-L33

For each statement, it returns a result depending on the kind of statement:

https://github.com/erikgrinaker/toydb/blob/0839215770e31f1e693d5cccf20a68210deaaa3f/src/sql/execution/session.rs#L132-L148

The session itself performs transaction control. It handles `BEGIN`, `COMMIT`, and `ROLLBACK`
statements, and modifies the transaction accordingly.

https://github.com/erikgrinaker/toydb/blob/0839215770e31f1e693d5cccf20a68210deaaa3f/src/sql/execution/session.rs#L34-L70

Any other statements are processed by the SQL planner, optimizer, and executor as we've seen in
previous sections.

https://github.com/erikgrinaker/toydb/blob/0839215770e31f1e693d5cccf20a68210deaaa3f/src/sql/execution/session.rs#L77-L83

These statements are always executed using the session's current transaction. If there is no active
transaction, the session will create a new, implicit transaction for each statement.

https://github.com/erikgrinaker/toydb/blob/0839215770e31f1e693d5cccf20a68210deaaa3f/src/sql/execution/session.rs#L87-L112

And with that, we have a fully functional SQL engine!

---

<p align="center">
← <a href="sql-optimizer.md">SQL Optimization</a> &nbsp; | &nbsp; <a href="server.md">Server</a> →
</p>

================================================
FILE: docs/architecture/sql-optimizer.md
================================================
# SQL Optimization

[Query optimization](https://en.wikipedia.org/wiki/Query_optimization) attempts to improve query
performance and efficiency by altering the execution plan. This is a deep and complex field, and
we can only scratch the surface here.

toyDB's query optimizer is very basic -- it only has a handful of rudimentary heuristic
optimizations to illustrate how the process works. Real-world optimizers use much more sophisticated
methods, including statistical analysis, cost estimation, adaptive execution, etc.

The optimizers are located in the [`sql::planner::optimizer`](https://github.com/erikgrinaker/toydb/blob/213e5c02b09f1a3cac6a8bbd0a81773462f367f5/src/sql/planner/optimizer.rs) module.
An optimizer `sql::planner::Optimizer` just takes in a plan node `sql::planner::Node` (the root node
in the plan), and returns an optimized node:

https://github.com/erikgrinaker/toydb/blob/213e5c02b09f1a3cac6a8bbd0a81773462f367f5/src/sql/planner/optimizer.rs#L20-L25

Optimizations are always implemented as recursive node transformations. To help with this, `Node`
has the helper methods `Node::transform` and `Node::transform_expressions` which recurse into a node
or expression tree and call a given transformation closure on each node, as either
[pre-order](https://en.wikipedia.org/wiki/Tree_traversal#Pre-order,_NLR) or
[post-order](https://en.wikipedia.org/wiki/Tree_traversal#Post-order,_LRN) transforms:

https://github.com/erikgrinaker/toydb/blob/213e5c02b09f1a3cac6a8bbd0a81773462f367f5/src/sql/planner/plan.rs#L269-L371

A technique that's often useful during optimization is to convert expressions into
[conjunctive normal form](https://en.wikipedia.org/wiki/Conjunctive_normal_form), i.e. "an AND of
ORs". For example, the two following expressions are equivalent, but the latter is in conjunctive
normal form (it's a chain of ANDs):

```
(a AND b) OR (c AND d)  →  (a OR c) AND (a OR d) AND (b OR c) AND (b OR d)
```

This is useful because we can often move each AND operand independently around in the plan tree
and still get the same result -- we'll see this in action later. Expressions are converted into
conjunctive normal form via `Expression::into_cnf`, which is implemented using
[De Morgan's laws](https://en.wikipedia.org/wiki/De_Morgan%27s_laws):

https://github.com/erikgrinaker/toydb/blob/9419bcf6aededf0e20b4e7485e2a5fa3e975d79f/src/sql/types/expression.rs#L289-L351

We'll have a brief look at all of toyDB's optimizers, which are listed here in the order they're
applied:

https://github.com/erikgrinaker/toydb/blob/213e5c02b09f1a3cac6a8bbd0a81773462f367f5/src/sql/planner/optimizer.rs#L9-L18

Test scripts for the optimizers are in [`src/sql/testscripts/optimizers`](https://github.com/erikgrinaker/toydb/tree/9419bcf6aededf0e20b4e7485e2a5fa3e975d79f/src/sql/testscripts/optimizers),
and show how query plans evolve as each optimizer is applied.

## Constant Folding

The `ConstantFolding` optimizer performs [constant folding](https://en.wikipedia.org/wiki/Constant_folding).
This pre-evaluates constant expressions in the plan during planning, instead of evaluating them
for every row during execution.

https://github.com/erikgrinaker/toydb/blob/213e5c02b09f1a3cac6a8bbd0a81773462f367f5/src/sql/planner/optimizer.rs#L27-L30

For example, consider the query `SELECT 1 + 2 * 3 - foo FROM bar`. There is no point in
re-evaluating `1 + 2 * 3` for every row in `bar`, because the result is always the same, so we can
just evaluate this once during planning, transforming the expression into `7 - foo`.

Concretely, this plan:

```
Select
└─ Projection: 1 + 2 * 3 - bar.foo
   └─ Scan: bar
```

Should be transformed into this plan:

```
Select
└─ Projection: 7 - bar.foo
   └─ Scan: bar
```

To do this, `ConstantFolding` simply checks whether an `Expression` tree contains an
`Expression::Column` node -- if it doesn't, then it much be a constant expression (since that's the
only dynamic value in an expression), and we can evaluate it with a `None` input row and replace the
original expression node with an `Expression::Constant` node.

This is done recursively for each plan node, and recursively for each expression node (so it does
this both for `SELECT`, `WHERE`, `ORDER BY`, and all other parts of the query). Notably, it does a
post-order expression transform, so it starts at the expression leaf nodes and attempts to transform
each expression node as it moves back up the tree -- this allows it to iteratively evaluate constant
parts as far as possible for each branch.

https://github.com/erikgrinaker/toydb/blob/213e5c02b09f1a3cac6a8bbd0a81773462f367f5/src/sql/planner/optimizer.rs#L32-L56

Additionally, `ConstantFolding` also short-circuits logical expressions. For example, the expression
`foo AND FALSE` will always be `FALSE`, regardless of what `foo` is, so we can replace it with
`FALSE`:

https://github.com/erikgrinaker/toydb/blob/213e5c02b09f1a3cac6a8bbd0a81773462f367f5/src/sql/planner/optimizer.rs#L58-L84

As the code comment mentions though, this doesn't fold optimally: it doesn't attempt to rearrange
expressions, which would require knowledge of precedence rules. For example, `(1 + foo) - 2` could
be folded into `foo - 1` by first rearranging it as `foo + (1 - 2)`, but we don't do this currently.

## Filter Pushdown

The `FilterPushdown` optimizer attempts to push filter predicates as far down into the plan as
possible, to reduce the number of rows each node has to process.

https://github.com/erikgrinaker/toydb/blob/213e5c02b09f1a3cac6a8bbd0a81773462f367f5/src/sql/planner/optimizer.rs#L90-L95

Recall the `movies` query plan from the planning section:

```
Select
└─ Order: movies.released desc
   └─ Projection: movies.title, movies.released, genres.name as genre
      └─ Filter: movies.released >= 2000
         └─ NestedLoopJoin: inner on movies.genre_id = genres.id
            ├─ Scan: movies
            └─ Scan: genres
```

Even though we're filtering on `release >= 2000`, the `Scan` node still has to read all of them from
disk and send them via Raft, and the `NestedLoopJoin` node still has to join all of them. It would
be nice if we could push this filtering into the `NestedLoopJoin` and `Scan` nodes and avoid this
extra work, and this is exactly what `FilterPushdown` does.

The only plan nodes that have predicates that can be pushed down are `Filter` nodes and
`NestedLoopJoin` nodes, so we recurse through the plan tree and look for these nodes, attempting
to push down.

https://github.com/erikgrinaker/toydb/blob/213e5c02b09f1a3cac6a8bbd0a81773462f367f5/src/sql/planner/optimizer.rs#L97-L110

When it encounters the `Filter` node, it will extract the predicate and attempt to push it down
into its `source` node:

https://github.com/erikgrinaker/toydb/blob/213e5c02b09f1a3cac6a8bbd0a81773462f367f5/src/sql/planner/optimizer.rs#L139-L153

If the source node is a `Filter`, `NestedLoopJoin`, or `Scan` node, then we can push the predicate
down into it by `AND`ing it with the existing predicate (if any).

https://github.com/erikgrinaker/toydb/blob/213e5c02b09f1a3cac6a8bbd0a81773462f367f5/src/sql/planner/optimizer.rs#L112-L137

In our case, we were able to push the `Filter` into the `NestedLoopJoin`, and our plan now looks
like this:

```
Select
└─ Order: movies.released desc
   └─ Projection: movies.title, movies.released, genres.name as genre
      └─ NestedLoopJoin: inner on movies.genre_id = genres.id AND movies.released >= 2000
         ├─ Scan: movies
         └─ Scan: genres
```

But we're still not done, as we'd like to push `movies.released >= 2000` down into the `Scan` node.
Pushdown for join nodes is a little more tricky, because we can only push down parts of the
expression that reference one of the source nodes.

We first have to convert the expression into conjunctive normal form, i.e. and AND of ORs, as we've
discussed previously. This allows us to examine and push down each AND part in isolation, because it
has the same effect regardless of whether it is evaluated in the `NestedLoopJoin` node or one of
the source nodes. Our expression is already in conjunctive normal form, though.

We then look at each AND part, and check which side of the join it has column references for.  If it
only references one of the sides, then the expression can be pushed down into it. We also make some
effort here to move primary/foreign key constants across to both sides, but we'll gloss over that.

https://github.com/erikgrinaker/toydb/blob/213e5c02b09f1a3cac6a8bbd0a81773462f367f5/src/sql/planner/optimizer.rs#L155-L247

This allows us to push down the `movies.released >= 2000` predicate into the corresponding `Scan`
node, significantly reducing the amount of data transferred across Raft:

```
Select
└─ Order: movies.released desc
   └─ Projection: movies.title, movies.released, genres.name as genre
      └─ NestedLoopJoin: inner on movies.genre_id = genres.id
         ├─ Scan: movies (released >= 2000)
         └─ Scan: genres
```

## Index Lookups

The `IndexLookup` optimizer uses primary key or secondary index lookups instead of full table
scans where possible.

https://github.com/erikgrinaker/toydb/blob/213e5c02b09f1a3cac6a8bbd0a81773462f367f5/src/sql/planner/optimizer.rs#L250-L252

The optimizer itself is fairly straightforward. It assumes that `FilterPushdown` has already pushed
predicates down into `Scan` nodes, so it only needs to examine these. It converts the predicate into
conjunctive normal form, and looks for any parts that are direct column lookups -- i.e.
`column = value` (possibly a long OR chain of these).

If it finds any, and the column is either a primary key or secondary index column, then we convert
the `Scan` node into either a `KeyLookup` or `IndexLookup` node respectively. If there are any
further AND predicates remaining, we add a parent `Filter` node to keep these predicates.

For example, the following plan:

```
Select
└─ Scan: movies ((id = 1 OR id = 7 OR id = 3) AND released >= 2000)
```

Will be transformed into one that does individual key lookups rather than a full table scan:

```
Select
└─ Filter: movies.released >= 2000
   └─ KeyLookup: movies (1, 3, 7)
```

The code is as outlined above:

https://github.com/erikgrinaker/toydb/blob/213e5c02b09f1a3cac6a8bbd0a81773462f367f5/src/sql/planner/optimizer.rs#L254-L303

Helped by `Expression::is_column_lookup()` and `Expression::into_column_values()`:

https://github.com/erikgrinaker/toydb/blob/9419bcf6aededf0e20b4e7485e2a5fa3e975d79f/src/sql/types/expression.rs#L363-L421

## Hash Join

The `HashJoin` optimizer will replace a `NestedLoopJoin` with a `HashJoin` where possible.

https://github.com/erikgrinaker/toydb/blob/213e5c02b09f1a3cac6a8bbd0a81773462f367f5/src/sql/planner/optimizer.rs#L305-L307

A [nested loop join](https://en.wikipedia.org/wiki/Nested_loop_join) is a very inefficient O(n²)
algorithm, which iterates over all rows in the right source for each row in the left source to see
if they match. However, it is completely general, and can join on arbitraily complex predicates.

In the common case where the join predicate is an equality comparison such as
`movies.genre_id = genres.id` (i.e. an [equijoin](https://en.wikipedia.org/wiki/Relational_algebra#θ-join_and_equijoin)),
then we can instead use a [hash join](https://en.wikipedia.org/wiki/Hash_join). This scans the right
table once, builds an in-memory hash table from it, and for each left row it looks up any right rows
in the hash table. This is a much more efficient O(n) algorithm.

In our previous movie example, we are in fact doing an equijoin:

```
Select
└─ Order: movies.released desc
   └─ Projection: movies.title, movies.released, genres.name as genre
      └─ NestedLoopJoin: inner on movies.genre_id = genres.id
         ├─ Scan: movies (released >= 2000)
         └─ Scan: genres
```

And so our `NestedLoopJoin` can be replaced by a `HashJoin`:

```
Select
└─ Order: movies.released desc
   └─ Projection: movies.title, movies.released, genres.name as genre
      └─ HashJoin: inner on movies.genre_id = genres.id
         ├─ Scan: movies (released >= 2000)
         └─ Scan: genres
```

The `HashJoin` optimizer is extremely simple: if the join predicate is an equijoin, use a hash join.
This isn't always a good idea (the right source can be huge and we can run out of memory for the
hash table), but we keep it simple.

https://github.com/erikgrinaker/toydb/blob/213e5c02b09f1a3cac6a8bbd0a81773462f367f5/src/sql/planner/optimizer.rs#L309-L348

Of course there are many other join algorithms out there, and one of the harder problems in SQL
optimization is how to efficiently perform large N-way multijoins. We don't attempt to tackle these
problems here -- the `HashJoin` optimizer is just a very simple example of such join optimization.

## Short Circuiting

The `ShortCircuit` optimizer tries to find nodes that can't possibly do any useful work, and either
removes them from the plan, or replaces them with trivial nodes that don't do anything. It is kind
of similar to the `ConstantFolding` optimizer in spirit, but works on plan nodes rather than
expression nodes.

https://github.com/erikgrinaker/toydb/blob/213e5c02b09f1a3cac6a8bbd0a81773462f367f5/src/sql/planner/optimizer.rs#L350-L354

For example, `Filter` nodes with a `TRUE` predicate won't actually filter anything:

```
Select
└─ Filter: true
   └─ Scan: movies
```

So we can just remove them:

```
Select
└─ Scan: movies
```

Similarly, `Filter` nodes with a `FALSE` predicate will never emit anything:

```
Select
└─ Filter: false
   └─ Scan: movies
```

There's no point doing a scan in this case, so we can just replace it with a `Nothing` node that
does no work and doesn't emit anything:

```
Select
└─ Nothing
```

The optimizer tries to find a bunch of such patterns. This can also tidy up query plans a fair bit
by removing unnecessary cruft.

https://github.com/erikgrinaker/toydb/blob/213e5c02b09f1a3cac6a8bbd0a81773462f367f5/src/sql/planner/optimizer.rs#L356-L438

---

<p align="center">
← <a href="sql-planner.md">SQL Planning</a> &nbsp; | &nbsp; <a href="sql-execution.md">SQL Execution</a> →
</p>

================================================
FILE: docs/architecture/sql-parser.md
================================================
# SQL Parsing

We finally arrive at SQL. The SQL parser is the first stage in processing SQL queries and
statements, located in the [`sql::parser`](https://github.com/erikgrinaker/toydb/tree/39c6b60afc4c235f19113dc98087176748fa091d/src/sql/parser)
module.

The SQL parser's job is to take a raw SQL string and turn it into a structured form that's more
convenient to work with. In doing so, it will validate that the string is in fact valid SQL
_syntax_. However, it doesn't know if the SQL statement actually makes sense -- it has no idea which
tables or columns exist, what their data types are, and so on. That's the job of the planner, which
we'll look at later.

For example, let's say the parser is given the following SQL query:

```sql
SELECT name, price, price * 25 / 100 AS vat
FROM products JOIN categories ON products.category_id = categories.id
WHERE categories.code = 'BLURAY' AND stock > 0
ORDER BY price DESC
LIMIT 10
```

It will generate a structure that looks something like this (in simplified syntax):

```rust
// A SELECT statement.
Statement::Select {
    // SELECT name, price, price * 25 / 100 AS vat
    select: [
        (Column("name"), None),
        (Column("price"), None),
        (
            Divide(
                Multiply(Column("price"), Integer(25)),
                Integer(100)
            ),
            Some("vat"),
        ),
    ]

    // FROM products JOIN categories ON products.category_id = categories.id
    from: [
        Join {
            left: Table("products"),
            right: Table("categories"),
            type: Inner,
            predicate: Some(
                Equal(
                    Column("products.category_id)",
                    Column("categories.id"),
                )
            )
        }
    ]

    // WHERE categories.code = 'BLURAY' AND stock > 0
    where: Some(
        And(
            Equal(
                Column("categories.code"),
                String("BLURAY"),
            ),
            GreaterThan(
                Column("stock"),
                Integer(0),
            )
        )
    )

    // ORDER BY price DESC
    order: [
        (Column("price"), Descending),
    ]

    // LIMIT 10
    limit: Some(Integer(10))
}
```

Let's have a look at how this happens.

## Lexer

We begin with the `sql::parser::Lexer`, which takes the raw SQL string and performs
[lexical analysis](https://en.wikipedia.org/wiki/Lexical_analysis) to convert it into a sequence of
tokens. These tokens are things like number, string, identifier, SQL keyword, and so on.

This preprocessing is useful to deal with some of the "noise" of SQL text, such as whitespace,
string quotes, identifier normalization, and so on. It also specifies which symbols and keywords are
valid in our SQL queries. This makes the parser's life a lot easier.

The lexer doesn't care about SQL structure at all, only that the individual pieces (tokens) of a
string are well-formed. For example, the following input string:

```
'foo' ) 3.14 SELECT + x
```

Will result in these tokens:

```
String("foo") CloseParen Number("3.14") Keyword(Select) Plus Ident("x")
```

Tokens and keywords are represented by the `sql::parser::Token` and `sql::parser::Keyword` enums
respectively:

https://github.com/erikgrinaker/toydb/blob/39c6b60afc4c235f19113dc98087176748fa091d/src/sql/parser/lexer.rs#L8-L47

https://github.com/erikgrinaker/toydb/blob/39c6b60afc4c235f19113dc98087176748fa091d/src/sql/parser/lexer.rs#L86-L155

The lexer takes an input string and emits tokens as an iterator:

https://github.com/erikgrinaker/toydb/blob/39c6b60afc4c235f19113dc98087176748fa091d/src/sql/parser/lexer.rs#L311-L337

It does this by repeatedly attempting to scan the next token until it reaches the end of the string
(or errors). It can determine the kind of token by looking at the first character:

https://github.com/erikgrinaker/toydb/blob/39c6b60afc4c235f19113dc98087176748fa091d/src/sql/parser/lexer.rs#L358-L373

And then scan across the following characters as appropriate to generate a valid token. For example,
this is how a quoted string (e.g. `'foo'`) is lexed into a `Token::String` (including handling of
any escaped quotes inside the string):

https://github.com/erikgrinaker/toydb/blob/39c6b60afc4c235f19113dc98087176748fa091d/src/sql/parser/lexer.rs#L435-L451

These tokens become the input to the parser.

## Abstract Syntax Tree

The end result of the parsing process will be an [abstract syntax tree](https://en.wikipedia.org/wiki/Abstract_syntax_tree)
(AST), which is a structured representation of a SQL statement, located in the
[`sql::parser::ast`](https://github.com/erikgrinaker/toydb/blob/39c6b60afc4c235f19113dc98087176748fa091d/src/sql/parser/ast.rs) module.

The root of this tree is the `sql::parser::ast::Statement` enum, which represents all the different
kinds of SQL statements that we support, along with their contents:

https://github.com/erikgrinaker/toydb/blob/39c6b60afc4c235f19113dc98087176748fa091d/src/sql/parser/ast.rs#L6-L145

The nested tree structure is particularly apparent with expressions, which represent values and
operations on them. For example, the expression `2 * 3 - 4 / 2`, which evaluates to the value `4`.

We've seen in the data model section how such expressions are represented as
`sql::types::Expression`, but before we get there we have to parse them. The parser has its own
representation `sql::parser::ast::Expression` -- this is necessary e.g. because in the AST, we
represent columns as names rather than numeric indexes (we don't know yet which columns exist or
what their names are, we'll get to that during planning).

https://github.com/erikgrinaker/toydb/blob/39c6b60afc4c235f19113dc98087176748fa091d/src/sql/parser/ast.rs#L147-L170

https://github.com/erikgrinaker/toydb/blob/39c6b60afc4c235f19113dc98087176748fa091d/src/sql/parser/ast.rs#L204-L234

For example, `2 * 3 - 4 / 2` is represented as:

```rust
Expression::Operator(Operator::Subtract(
    // The left-hand operand of -
    Expression::Operator(Operator::Multiply(
        // The left-hand operand of *
        Expression::Literal(Literal::Integer(2)),
        // The right-hand operand of *
        Expression::Literal(Literal::Integer(3)),
    )),
    // The right-hand operand of -
    Expression::Operator(Operator::Divide(
        // The left-hand operand of /
        Expression::Literal(Literal::Integer(4)),
        // The right-hand operand of /
        Expression::Literal(Literal::Integer(2)),
    )),
))
```

## Parser

The parser, `sql::parser::Parser`, takes lexer tokens as input and builds an `ast::Statement`
from them:

https://github.com/erikgrinaker/toydb/blob/39c6b60afc4c235f19113dc98087176748fa091d/src/sql/parser/parser.rs#L9-L32

We can determine the kind of statement we're parsing simply by looking at the first keyword:

https://github.com/erikgrinaker/toydb/blob/39c6b60afc4c235f19113dc98087176748fa091d/src/sql/parser/parser.rs#L109-L130

Let's see how a `SELECT` statement is parsed. The different clauses in a `SELECT` (e.g. `FROM`,
`WHERE`, etc.) must always be given in a specific order, and they always begin with the appropriate
keyword, so we can simply try to parse each clause in the expected order:

https://github.com/erikgrinaker/toydb/blob/39c6b60afc4c235f19113dc98087176748fa091d/src/sql/parser/parser.rs#L330-L342

Parsing each clause is also just a matter of parsing the expected parts in order. For example, the
initial `SELECT` clause is just a comma-separated list of expressions with an optional alias:

https://github.com/erikgrinaker/toydb/blob/39c6b60afc4c235f19113dc98087176748fa091d/src/sql/parser/parser.rs#L344-L365

The `FROM` clause is a comma-separated list of table name, optionally joined with other tables:

https://github.com/erikgrinaker/toydb/blob/39c6b60afc4c235f19113dc98087176748fa091d/src/sql/parser/parser.rs#L367-L427

And the `WHERE` clause is just a predicate expression to filter by:

https://github.com/erikgrinaker/toydb/blob/39c6b60afc4c235f19113dc98087176748fa091d/src/sql/parser/parser.rs#L429-L435

Expression parsing is where this gets tricky, because we have to respect the rules of operator
precedence and associativity. For example, according to mathematical order of operations (aka
"PEMDAS") the expression `2 * 3 - 4 / 2` must be parsed as `(2 * 3) - (4 / 2)` which yields 4, not
`2 * (3 - 4) / 2` which yields -1.

toyDB does this using the [precedence climbing algorithm](https://en.wikipedia.org/wiki/Operator-precedence_parser#Precedence_climbing_method),
which is a fairly simple and compact algorithm as far as these things go. In a nutshell, it will
greedily and recursively group operators together as long as their precedence is the same or higher
than that of the operators preceding them (hence "precedence climbing"). For example:

```
-----   ----- Precedence 2: * and /
------------- Precedence 1: -
2 * 3 - 4 / 2
```

The algorithm is documented in more detail on `Parser::parse_expression()`:

https://github.com/erikgrinaker/toydb/blob/39c6b60afc4c235f19113dc98087176748fa091d/src/sql/parser/parser.rs#L501-L696

---

<p align="center">
← <a href="sql-raft.md">SQL Raft Replication</a> &nbsp; | &nbsp; <a href="sql-planner.md">SQL Planning</a> →
</p>

================================================
FILE: docs/architecture/sql-planner.md
================================================
# SQL Planning

The SQL planner in the [`sql::planner`](https://github.com/erikgrinaker/toydb/tree/c64012e29c5712d6fe028d3d5375a98b8faea266/src/sql/planner)
module takes a SQL statement AST from the parser and generates an execution plan for it. We won't
actually execute it just yet though, only figure out how to execute it.

## Execution Plan

A plan is represented by the `sql::planner::Plan` enum. The variant specifies the operation to
execute (e.g. `SELECT`, `INSERT`, `UPDATE`, `DELETE`):

https://github.com/erikgrinaker/toydb/blob/213e5c02b09f1a3cac6a8bbd0a81773462f367f5/src/sql/planner/plan.rs#L15-L73

Below the root, the plan is typically made of up of a tree of nested `sql::planner::Node`. Each node
emits a stream of SQL rows as output, and may take streams of input rows from child nodes.

https://github.com/erikgrinaker/toydb/blob/213e5c02b09f1a3cac6a8bbd0a81773462f367f5/src/sql/planner/plan.rs#L106-L175

Here is an example, taken from the `Plan` code comment above:

```sql
SELECT title, released, genres.name AS genre
FROM movies INNER JOIN genres ON movies.genre_id = genres.id
WHERE released >= 2000
ORDER BY released
```

Which results in this query plan:

```
Select
└─ Order: movies.released desc
   └─ Projection: movies.title, movies.released, genres.name as genre
      └─ Filter: movies.released >= 2000
         └─ NestedLoopJoin: inner on movies.genre_id = genres.id
            ├─ Scan: movies
            └─ Scan: genres
```

Rows flow from the tree leaves to the root:

1. `Scan` nodes read rows from the tables `movies` and `genres`.
2. `NestedLoopJoin` joins the rows from `movies` and `genres`.
3. `Filter` discards rows with release dates older than 2000.
4. `Projection` picks out the requested column values from the rows.
5. `Order` sorts the rows by release date.
6. `Select` returns the final rows to the client.

## Scope and Name Resolution

One of the main jobs of the planner is to resolve column names to column indexes in the input rows
of each node.

In the query example above, the `WHERE released >= 2000` filter may refer to a column `released`
from either the joined `movies` table or the `genres` tables. The planner needs to figure out which
table has a `released` column, and also figure out which column number in the `NestedLoopJoin`
output rows corresponds to the `released` column (for example column number 2).

This job is further complicated by the fact that many nodes can alias, reorder, or drop columns,
and some nodes may also refer to columns that shouldn't be part of the result at all (for example,
it's possible to `ORDER BY` a column that won't be output by a `SELECT` projection at all, but
the `Order` node still needs access to the column data to sort by it).

The planner uses a `sql::planner::Scope` to keep track of which column names are currently visible,
and which column indexes they refer to. For each node the planner builds, starting from the leaves,
it creates a new `Scope` that contains the currently visible columns, tracking how they are modified
and rearranged by each node.

https://github.com/erikgrinaker/toydb/blob/6f6cec4db10bc015a37ee47ff6c7dae383147dd5/src/sql/planner/planner.rs#L577-L610

When an AST expression refers to a column name, the planner can use `Scope::lookup_column()` to find
out which column number the expression should take its input value from.

https://github.com/erikgrinaker/toydb/blob/6f6cec4db10bc015a37ee47ff6c7dae383147dd5/src/sql/planner/planner.rs#L660-L686

## Planner

The planner itself is `sql:planner::Planner`. It uses a `sql::engine::Catalog` to look up
information about tables and columns from storage.

https://github.com/erikgrinaker/toydb/blob/6f6cec4db10bc015a37ee47ff6c7dae383147dd5/src/sql/planner/planner.rs#L12-L20

To build an execution plan, the planner first looks at the `ast::Statement` kind to determine
what kind of plan to build:

https://github.com/erikgrinaker/toydb/blob/6f6cec4db10bc015a37ee47ff6c7dae383147dd5/src/sql/planner/planner.rs#L28-L47

Let's build this `SELECT` plan from above:

```sql
SELECT title, released, genres.name AS genre
FROM movies INNER JOIN genres ON movies.genre_id = genres.id
WHERE released >= 2000
ORDER BY released
```

Which should result in this plan:

```
Select
└─ Order: movies.released desc
   └─ Projection: movies.title, movies.released, genres.name as genre
      └─ Filter: movies.released >= 2000
         └─ NestedLoopJoin: inner on movies.genre_id = genres.id
            ├─ Scan: movies
            └─ Scan: genres
```

The planner is given the following (simplified) AST from the parser as input:

```rust
// A SELECT statement.
Statement::Select {
    // SELECT title, released, genres.name AS genre
    select: [
        (Column("title"), None),
        (Column("released"), None),
        (Column("genres.name"), "genre"),
    ]

    // FROM movies INNER JOIN genres ON movies.genre_id = genres.id
    from: [
        Join {
            left: Table("movies"),
            right: Table("genres"),
            type: Inner,
            predicate: Some(
                Equal(
                    Column("movies.genre_id"),
                    Column("genres.id"),
                )
            )
        }
    ]

    // WHERE released >= 2000
    where: Some(
        GreaterThanOrEqual(
            Column("released"),
            Integer(2000),
        )
    )

    // ORDER BY released
    order: [
        (Column("released"), Ascending),
    ]
}
```

The first thing `Planner::build_select` does is to create an empty scope (which will track column
names and indexes) and build the `FROM` clause which will generate the initial input rows:

https://github.com/erikgrinaker/toydb/blob/6f6cec4db10bc015a37ee47ff6c7dae383147dd5/src/sql/planner/planner.rs#L170-L179

https://github.com/erikgrinaker/toydb/blob/6f6cec4db10bc015a37ee47ff6c7dae383147dd5/src/sql/planner/planner.rs#L283-L289

`Planner::build_from()` first encounters the `ast::From::Join` item, which joins `movies` and
`genres`. This will build a `Node::NestedLoopJoin` plan node for the join, which is the simplest and
most straightforward join algorithm -- it simply iterates over all rows in the `genres` table for
every row in the `movies` table and emits the joined rows (we'll see how to optimize it with a
better join algorithm later).

https://github.com/erikgrinaker/toydb/blob/6f6cec4db10bc015a37ee47ff6c7dae383147dd5/src/sql/planner/planner.rs#L319-L344

It first recurses into `Planner::build_from()` to build each of the `ast::From::Table` nodes for
each table.  This will look up the table schemas in the catalog, add them to the current scope, and
build a `Node::Scan` node which will emit all rows from each table. The `Node::Scan` nodes are
placed into the `Node::NestedLoopJoin` above.

https://github.com/erikgrinaker/toydb/blob/6f6cec4db10bc015a37ee47ff6c7dae383147dd5/src/sql/planner/planner.rs#L312-L317

While building the `Node::NestedLoopJoin`, it also needs to convert the join expression
`movies.genre_id = genres.id` into a proper `sql::types::Expression`. This is done by
`Planner::build_expression()`:

https://github.com/erikgrinaker/toydb/blob/6f6cec4db10bc015a37ee47ff6c7dae383147dd5/src/sql/planner/planner.rs#L493-L568

Expression building is mostly a direct translation from an `ast::Expression` variant to a
corresponding `sql::types::Expression` variant (for example from
`ast::Expression::Operator(ast::Operator::Equal)` to `sql::types::Expression::Equal`). However, as
mentioned earlier, `ast::Expression` contains column references by name, while
`sql::types::Expression` contains column references as row indexes. This name resolution is done
here, by looking up the column names in the scope:

https://github.com/erikgrinaker/toydb/blob/6f6cec4db10bc015a37ee47ff6c7dae383147dd5/src/sql/planner/planner.rs#L521-L523

The expression we're building is the join predicate of `Node::NestedLoopJoin`, so it operates on
joined rows containing all columns of `movies` then all columns of `genres`. It also operates on all
combinations of joined rows (the [Cartesian product](https://en.wikipedia.org/wiki/Cartesian_product)),
and the purpose of the join predicate is to determine which joined rows to actually keep. For
example, the full set of joined rows that are evaluated might be:

| movies.id | movies.title | movies.released | movies.genre_id | genres.id | genres.name |
|-----------|--------------|-----------------|-----------------|-----------|-------------|
| 1         | Sicario      | 2015            | 2               | 1         | Drama       |
| 2         | Sicario      | 2015            | 2               | 2         | Action      |
| 3         | 21 Grams     | 2003            | 1               | 1         | Drama       |
| 4         | 21 Grams     | 2003            | 1               | 2         | Action      |
| 5         | Heat         | 1995            | 2               | 1         | Drama       |
| 6         | Heat         | 1995            | 2               | 2         | Action      |

The join predicate should pick out the rows where `movies.genre_id = genres.id`. The scope will
reflect the column layout in the example above, and can resolve the column names to zero-based row
indexes as `#3 = #4`, which will be the final built `Expression`.

Now that we've built the `FROM` clause into a `Node::NestedLoopJoin` of two `Node::Scan` nodes, we
move on to the `WHERE` clause. This simply builds the `WHERE` expression `released >= 2000`, like
we've already seen with the join predicate, and creates a `Node::Filter` node which takes its input
rows from the `Node::NestedLoopJoin` and filters them by the given expression. Again, the scope
keeps track of which input columns we're getting from the join node and resolves the `released`
column reference in the expression.

https://github.com/erikgrinaker/toydb/blob/6f6cec4db10bc015a37ee47ff6c7dae383147dd5/src/sql/planner/planner.rs#L202-L206

We then build the `SELECT` clause, which emits the `title, released, genres.name AS genre` columns.
This is just a list of expressions that are built in the current scope and placed into a
`Node::Projection` (the expressions could be arbitrarily complex). However, we also have to make
sure to update the scope with the final three columns that are output to subsequent nodes, taking
into account the `genre` alias for the original `genres.name` column (we won't dwell on the "hidden
columns" mentioned there -- they're not relevant for our query).

https://github.com/erikgrinaker/toydb/blob/6f6cec4db10bc015a37ee47ff6c7dae383147dd5/src/sql/planner/planner.rs#L214-L234

Finally, we build the `ORDER BY` clause. Again, this just builds a trivial expression for `released`
and places it into an `Node::Order` node which takes input rows from the `Node::Projection` and
sorts them by the order expression.

https://github.com/erikgrinaker/toydb/blob/6f6cec4db10bc015a37ee47ff6c7dae383147dd5/src/sql/planner/planner.rs#L245-L252

And that's it. The `Node::Order` is placed into the root `Plan::Select`, and we have our final plan.

```
Select
└─ Order: movies.released desc
   └─ Projection: movies.title, movies.released, genres.name as genre
      └─ Filter: movies.released >= 2000
         └─ NestedLoopJoin: inner on movies.genre_id = genres.id
            ├─ Scan: movies
            └─ Scan: genres
```

We'll see how to execute it soon, but first we should optimize it to see if we can make it run
faster -- in particular, to see if we can avoid reading all movies from storage, and if we can do
better than the very slow nested loop join.

---

<p align="center">
← <a href="sql-parser.md">SQL Parsing</a> &nbsp; | &nbsp; <a href="sql-optimizer.md">SQL Optimization</a> →
</p>

================================================
FILE: docs/architecture/sql-raft.md
================================================
# SQL Raft Replication

toyDB uses Raft to replicate SQL storage across a cluster of nodes (see the Raft section for
details). All nodes will store a full copy of the SQL database, and the Raft leader will replicate
writes across nodes and execute reads.

Recall the Raft state machine interface `raft::State`:

https://github.com/erikgrinaker/toydb/blob/8782c2b05f11333c1586ef248f1a13dc1c8dec4a/src/raft/state.rs#L4-L51

In toyDB, the state machine is just a `sql::engine::Local` storage engine with a thin wrapper:

https://github.com/erikgrinaker/toydb/blob/c2b0f7f1d6cbf6e2cdc09fc0aec7b050e840ec21/src/sql/engine/raft.rs#L278-L291

Raft will submit read and write commands to this state machine as binary `Vec<u8>` data, so we have
to represent the methods of `sql::engine::Engine` as binary Raft commands. We do this as two
enums, `sql::engine::raft::Read` and `sql::engine::raft::Write`, which we'll Bincode-encode:

https://github.com/erikgrinaker/toydb/blob/c2b0f7f1d6cbf6e2cdc09fc0aec7b050e840ec21/src/sql/engine/raft.rs#L16-L71

Notice that almost all requests include a `mvcc::TransactionState`. Most of the useful methods of
`sql::engine::Engine` are on the `sql::engine::Transaction`, but unlike the `Local` engine, below
Raft we can't hold on to a `Transaction` object in memory between each command -- nodes may restart
and leadership may move, and we want client transactions to keep working despite this. Instead, we
will use the client-supplied `mvcc::TransactionState` to reconstruct a `Transaction` for every
command via `mvcc::Transaction::resume()` and call methods on it.

When the state machine receives a write command, it decodes it as a `Write` and calls the
appropriate `Local` method. The result is Bincode-encoded and returned to the caller, who knows what
return type to expect for a given command. The state machine also keeps track of the Raft applied
index of each command as a separate key in the key/value store.

https://github.com/erikgrinaker/toydb/blob/c2b0f7f1d6cbf6e2cdc09fc0aec7b050e840ec21/src/sql/engine/raft.rs#L346-L367

https://github.com/erikgrinaker/toydb/blob/c2b0f7f1d6cbf6e2cdc09fc0aec7b050e840ec21/src/sql/engine/raft.rs#L306-L338

Similarly, read commands are decoded as a `Read` and the appropriate `Local` method is called:

https://github.com/erikgrinaker/toydb/blob/c2b0f7f1d6cbf6e2cdc09fc0aec7b050e840ec21/src/sql/engine/raft.rs#L369-L404

That's the state machine running below Raft. But how do we actually send these commands to Raft and
receive results? That's handled by the `sql::engine::Raft` implementation, which uses a channel to
send requests to the local Raft node (we'll see how this plumbing works in the server section):

https://github.com/erikgrinaker/toydb/blob/c2b0f7f1d6cbf6e2cdc09fc0aec7b050e840ec21/src/sql/engine/raft.rs#L80-L95

The channel takes a `raft::Request` containing binary Raft client requests and a return channel
where the Raft node can send back a `raft::Response`. The Raft engine has a few convenience methods
to send requests and receive responses, for both read and write requests:

https://github.com/erikgrinaker/toydb/blob/c2b0f7f1d6cbf6e2cdc09fc0aec7b050e840ec21/src/sql/engine/raft.rs#L114-L135

And the implementation of the `sql::engine::Engine` and `sql::engine::Transaction` traits simply
send these requests via Raft:

https://github.com/erikgrinaker/toydb/blob/c2b0f7f1d6cbf6e2cdc09fc0aec7b050e840ec21/src/sql/engine/raft.rs#L194-L276

One thing to note here is that we don't support streaming data via Raft, so e.g. the
`Transaction::scan` method will buffer the entire result in a `Vec`. With a full table scan, this
will load the entire table into memory -- that's unfortunate, but we keep it simple.

To summarize, this is what happens when `Transaction::insert()` is called to insert a row via Raft:

1. `sql::engine::raft::Transaction::insert()`: called to insert a row.
2. `sql::engine::raft::Write::Insert`: enum representation of the insert command.
3. `raft::Request::Write`: raft request containing the Bincode-encoded `Write::Insert` command.
4. `sql::engine::raft::Engine::tx`: sends the `Request::Write` and response channel to Raft.
5. `raft::Node::step()`: the `Request::Write` is given to Raft in a `Message::ClientRequest`.
6. Raft does its replication thing, and commits the command's log entry.
7. `raft::State::apply()`: the Bincode-encoded `Write::Insert` is passed to the state machine.
8. `sql::engine::raft::State::apply()`: decodes the command to a `Write::Insert`.
9. `sql::engine::raft::State::local`: contains the `Local` engine on each node.
10. `sql::engine::local::Engine::resume()`: called to obtain the SQL/MVCC transaction.
11. `sql::engine::local::Transaction::insert()`: the row is inserted to the local engine.
12. `raft::RawNode::tx`: the `Ok(())` result is sent as a Bincode-encoded `Message::ClientResponse`.
13. `sql::engine::raft::Transaction::insert()`: receives the result and returns it to the caller.

The plumbing here will be covered in more details in the server section.

---

<p align="center">
← <a href="sql-storage.md">SQL Storage</a> &nbsp; | &nbsp; <a href="sql-parser.md">SQL Parsing</a> →
</p>

================================================
FILE: docs/architecture/sql-storage.md
================================================
# SQL Storage

The SQL storage engine, in the [`sql::engine`](https://github.com/erikgrinaker/toydb/tree/213e5c02b09f1a3cac6a8bbd0a81773462f367f5/src/sql/engine)
module, stores tables and rows. toyDB has two SQL storage implementations:

* `sql::engine::Local`: local storage using a `storage::Engine` key/value store.
* `sql::engine::Raft`: Raft-replicated storage, using `Local` on each node below Raft.

These implement the `sql::engine::Engine` trait, which specifies the SQL storage API. SQL execution
can use either simple local storage or Raft-replicated storage -- toyDB itself always uses the
Raft-replicated engine, but many tests use a local in-memory engine.

The `sql::engine::Engine` trait is fully transactional, based on the `storage::MVCC` transaction
engine discussed previously. As such, the trait just has a few methods that begin transactions --
the storage logic itself is implemented in the transaction, which we'll cover in next. The trait
also has a `session()` method to start SQL sessions for query execution, which we'll revisit in the
execution section.

https://github.com/erikgrinaker/toydb/blob/0839215770e31f1e693d5cccf20a68210deaaa3f/src/sql/engine/engine.rs#L9-L29

Here, we'll only look at the `Local` engine, and we'll discuss Raft replication afterwards. `Local`
itself is just a thin wrapper around a `storage::MVCC<storage::Engine>` to create transactions:

https://github.com/erikgrinaker/toydb/blob/39c6b60afc4c235f19113dc98087176748fa091d/src/sql/engine/local.rs#L50-L97

## Key/Value Representation

`Local` uses a `storage::Engine` key/value store to store SQL table schemas, table rows, and
secondary index entries. But how do we represent these as keys and values?

The keys are represented by the `sql::engine::Key` enum, and encoded using the Keycode encoding
that we've discussed in the encoding section:

https://github.com/erikgrinaker/toydb/blob/39c6b60afc4c235f19113dc98087176748fa091d/src/sql/engine/local.rs#L15-L31

The values are encoded using the Bincode encoding, where the value type is given by the key:

* `Key::Table` → `sql::types::Table` (table schemas)
* `Key::Index` → `BTreeSet<sql::types::Value>` (indexed primary keys)
* `Key::Row` → `sql::types::Row` (table rows)

Recall that the Keycode encoding will store keys in sorted order. This means that all `Key::Table`
entries come first, then all `Key::Index`, then all `Key::Row`. These are further grouped and
sorted by their fields.

For example, consider these SQL tables containing movies and genres, with a secondary index on
`movies.genre_id` for fast lookups of movies with a given genre:

```sql
CREATE TABLE genres (
    id INTEGER PRIMARY KEY,
    name STRING NOT NULL
);

CREATE TABLE movies (
    id INTEGER PRIMARY KEY,
    title STRING NOT NULL,
    released INTEGER NOT NULL,
    genre_id INTEGER NOT NULL INDEX REFERENCES genres
);

INSERT INTO genres VALUES (1, 'Drama'), (2, 'Action');

INSERT INTO movies VALUES
    (1, 'Sicario', 2015, 2),
    (2, '21 Grams', 2003, 1),
    (3, 'Heat', 1995, 2);
```

This would result in the following illustrated keys and values, in the given order:

```
/Table/genres → Table { name: "genres", primary_key: 0, columns: ... }
/Table/movies → Table { name: "movies", primary_key: 0, columns: ... }
/Index/movies/genre_id/Integer(1) → BTreeSet { Integer(2) }
/Index/movies/genre_id/Integer(2) → BTreeSet { Integer(1), Integer(3) }
/Row/genres/Integer(1) → Row { Integer(1), String("Action") }
/Row/genres/Integer(2) → Row { Integer(2), String("Drama") }
/Row/movies/Integer(1) → Row { Integer(1), String("Sicario"), Integer(2015), Integer(2) }
/Row/movies/Integer(2) → Row { Integer(2), String("21 Grams"), Integer(2003), Integer(1) }
/Row/movies/Integer(3) → Row { Integer(3), String("Heat"), Integer(1995), Integer(2) }
```

Thus, if we want to do a full table scan of the `movies` table, we just do a prefix scan of
`/Row/movies/`. If we want to do a secondary index lookup of all movies with `genre_id = 2`, we
fetch `/Index/movies/genre_id/Integer(2)` and find that movies with `id = {1,3}` have this genre.

To help with prefix scans, the valid key prefixes are represented as `sql::engine::KeyPrefix`:

https://github.com/erikgrinaker/toydb/blob/39c6b60afc4c235f19113dc98087176748fa091d/src/sql/engine/local.rs#L35-L48

For a look at the actual on-disk binary storage format, see the test scripts under
[`src/sql/testscripts/writes`](https://github.com/erikgrinaker/toydb/tree/c2b0f7f1d6cbf6e2cdc09fc0aec7b050e840ec21/src/sql/testscripts/writes),
which output the logical and raw binary representation of write operations.

## Schema Catalog

The `sql::engine::Catalog` trait is used to store table schemas, i.e. `sql::types::Table`. It has a
handful of methods for creating, dropping and fetching tables (recall that toyDB does not support
schema changes). The `Table::name` field is used as a unique table identifier throughout.

https://github.com/erikgrinaker/toydb/blob/0839215770e31f1e693d5cccf20a68210deaaa3f/src/sql/engine/engine.rs#L60-L79

The `Catalog` trait is also fully transactional, as it must be implemented on a transaction via the
`type Transaction: Transaction + Catalog` trait bound on `sql::engine::Engine`.

Creating a table is straightforward: insert a key/value pair with a Keycode-encoded `Key::Table`
for the key and a Bincode-encoded `sql::types::Table` for the value. We first check that the
table doesn't already exist, and validate the table schema using `Table::validate()`.

https://github.com/erikgrinaker/toydb/blob/39c6b60afc4c235f19113dc98087176748fa091d/src/sql/engine/local.rs#L340-L347

Similarly, fetching and listing tables is straightforward: just key/value gets or scans using the
appropriate keys.

https://github.com/erikgrinaker/toydb/blob/39c6b60afc4c235f19113dc98087176748fa091d/src/sql/engine/local.rs#L390-L399

Dropping tables is a bit more involved, since we have to perform some validation and also delete the
actual table rows and any secondary index entries, but it's not terribly complicated:

https://github.com/erikgrinaker/toydb/blob/39c6b60afc4c235f19113dc98087176748fa091d/src/sql/engine/local.rs#L349-L388

## Row Storage and Transactions

The workhorse of the SQL storage engine is the `Transaction` trait, which provides
[CRUD](https://en.wikipedia.org/wiki/Create,_read,_update_and_delete) operations (create, read,
update, delete) on table rows and secondary index entries. For performance (especially with Raft),
it operates on row batches rather than individual rows.

https://github.com/erikgrinaker/toydb/blob/0839215770e31f1e693d5cccf20a68210deaaa3f/src/sql/engine/engine.rs#L31-L58

The `Local::Transaction` implementation is just a wrapper around an MVCC transaction, and the
commit/rollback methods just call straight through to it:

https://github.com/erikgrinaker/toydb/blob/39c6b60afc4c235f19113dc98087176748fa091d/src/sql/engine/local.rs#L99-L102

https://github.com/erikgrinaker/toydb/blob/39c6b60afc4c235f19113dc98087176748fa091d/src/sql/engine/local.rs#L182-L192

To insert new rows into a table, we first have to perform some validation: check that the table
exists and validate the rows against the table schema (including checking for e.g. primary key
conflicts and foreign key references). We then store the rows as a key/value pairs, using a
`Key::Row` with the table name and primary key value. And finally, we update secondary index entries
(if any).

https://github.com/erikgrinaker/toydb/blob/39c6b60afc4c235f19113dc98087176748fa091d/src/sql/engine/local.rs#L252-L268

Row updates are similar to inserts, but in the case of a primary key change we instead delete the
old row and insert a new one, for simplicity. Secondary index updates also have to update both the
old and new entries.

https://github.com/erikgrinaker/toydb/blob/39c6b60afc4c235f19113dc98087176748fa091d/src/sql/engine/local.rs#L296-L337

Row deletions are also similar: validate that the deletion is safe (e.g. check that there are no
foreign key references to it), then delete the `Key::Row` keys and any secondary index entries:

https://github.com/erikgrinaker/toydb/blob/39c6b60afc4c235f19113dc98087176748fa091d/src/sql/engine/local.rs#L194-L246

To fetch rows by primary key, we simply call through to key/value gets using the appropriate
`Key::Row`:

https://github.com/erikgrinaker/toydb/blob/39c6b60afc4c235f19113dc98087176748fa091d/src/sql/engine/local.rs#L248-L250

https://github.com/erikgrinaker/toydb/blob/39c6b60afc4c235f19113dc98087176748fa091d/src/sql/engine/local.rs#L127-L133

Similarly, index lookups fetch a `Key::Index` for the indexed value, returning matching primary
keys:

https://github.com/erikgrinaker/toydb/blob/39c6b60afc4c235f19113dc98087176748fa091d/src/sql/engine/local.rs#L270-L273

https://github.com/erikgrinaker/toydb/blob/39c6b60afc4c235f19113dc98087176748fa091d/src/sql/engine/local.rs#L115-L125

Scanning table rows just performs a prefix scan with the appropriate `KeyPrefix::Row`, returning a
row iterator. This can optionally also do row filtering via filter pushdowns, which we'll revisit
when we look at the SQL optimizer.

https://github.com/erikgrinaker/toydb/blob/39c6b60afc4c235f19113dc98087176748fa091d/src/sql/engine/local.rs#L275-L294

And with that, we can now store and retrieve SQL tables and rows on disk. Let's see how to replicate
it across nodes via Raft.

---

<p align="center">
← <a href="sql-data.md">SQL Data Model</a> &nbsp; | &nbsp; <a href="sql-raft.md">SQL Raft Replication</a> →
</p>

================================================
FILE: docs/architecture/sql.md
================================================
# SQL Engine

The SQL engine provides support for the SQL query language, and is the main database interface. It
uses a key/value store for data storage, MVCC for transactions, and Raft for replication. The SQL
engine itself consists of several distinct components that form a pipeline:

> Client → Session → Lexer → Parser → Planner → Optimizer → Executor → Storage

The SQL engine is located in the [`sql`](https://github.com/erikgrinaker/toydb/tree/b2fe7b76ee634ca6ad31616becabfddb1c03d34b/src/sql)
module. We'll discuss each of the components in a bottom-up manner.

The SQL engine is tested as a whole by test scripts under
[`src/sql/testscripts`](https://github.com/erikgrinaker/toydb/tree/9419bcf6aededf0e20b4e7485e2a5fa3e975d79f/src/sql/testscripts).
These typically take a raw SQL string as input, execute them against an in-memory storage engine,
and output the result along with intermediate state such as the query plan, storage operations,
and binary key/value data.

---

<p align="center">
← <a href="raft.md">Raft Consensus</a> &nbsp; | &nbsp; <a href="sql-data.md">SQL Data Model</a> →
</p>

================================================
FILE: docs/architecture/storage.md
================================================
# Storage Engine

toyDB uses an embedded [key/value store](https://en.wikipedia.org/wiki/Key–value_database) for data
storage, located in the [`storage`](https://github.com/erikgrinaker/toydb/tree/213e5c02b09f1a3cac6a8bbd0a81773462f367f5/src/storage)
module. This stores arbitrary keys and values as binary byte strings. The storage engine doesn't
know or care what the keys and values contain -- we'll see later how the SQL data model, with tables
and rows, is mapped onto this key/value structure.

The storage engine supports simple set/get/delete operations on individual keys. It does not itself
support transactions -- this is built on top, and we'll get back to it shortly.

Keys are stored in sorted order. This allows range scans, where we can iterate over all key/value
pairs between two specific keys, or with a specific key prefix. This will be needed by other
components in the system, e.g. to scan all rows in a specific SQL table, to scan all versions of an
MVCC key, to scan the tail of the Raft log, etc.

The storage engine is pluggable: there are multiple implementations, and the user can choose which
one to use in the config file. These implement the `storage::Engine` trait:

https://github.com/erikgrinaker/toydb/blob/4804df254034c51f367d1380d389d80695cd7054/src/storage/engine.rs#L8-L58

Let's look at the existing storage engine implementations.

## `Memory` Storage Engine

The simplest storage engine is the `storage::Memory` engine. This is a trivial implementation which
stores data in memory using the Rust standard library's
[`BTreeMap`](https://doc.rust-lang.org/std/collections/struct.BTreeMap.html), without persisting
it to disk. It is primarily used for testing.

Since this is just a wrapper around the `BTreeMap` we can include it in its entirety here:

https://github.com/erikgrinaker/toydb/blob/8f8eae0dcf70b1a0df2e853b1f6600e0c7075340/src/storage/memory.rs#L8-L77

## `BitCask` Storage Engine

The main storage engine is `storage::BitCask`. This is a very simple variant of
[BitCask](https://riak.com/assets/bitcask-intro.pdf), used in the [Riak](https://riak.com/)
database. It is kind of like the [LSM-tree](https://en.wikipedia.org/wiki/Log-structured_merge-tree)'s
baby cousin.

https://github.com/erikgrinaker/toydb/blob/3e467512dca55843f0b071b3e239f14724f59a41/src/storage/bitcask.rs#L15-L55

toyDB's BitCask implementation uses a single append-only log file for storage. To write a key/value
pair, we simply append it to the file. To delete a key, we append a special tombstone value. When
reading a key, the last entry for that key in the file is used.

The file format for a key/value pair is simply:

1. The key length, as a big-endian `u32` (4 bytes).
2. The value length, as a big-endian `i32` (4 bytes). -1 if tombstone.
3. The binary key (n bytes).
4. The binary value (n bytes).

For example, the key/value pair `foo=bar` would be written as follows (in hexadecimal):

```
keylen   valuelen key    value
00000003 00000003 666f6f 626172
```

Because the data file is a simple log, we don't need a separate [write-ahead log](https://en.wikipedia.org/wiki/Write-ahead_logging)
for crash recovery -- the data file _is_ the write-ahead log.

To quickly look up key/value pairs when reading, we maintain an in-memory `KeyDir` index which maps
a key to the latest value's position in the file. All keys must therefore fit in memory.

https://github.com/erikgrinaker/toydb/blob/3e467512dca55843f0b071b3e239f14724f59a41/src/storage/bitcask.rs#L57-L65

We initially generate this index by scanning through the entire file when it is opened:

https://github.com/erikgrinaker/toydb/blob/3e467512dca55843f0b071b3e239f14724f59a41/src/storage/bitcask.rs#L267-L332

To write a key, we append it to the file and update the `KeyDir`:

https://github.com/erikgrinaker/toydb/blob/3e467512dca55843f0b071b3e239f14724f59a41/src/storage/bitcask.rs#L155-L159

https://github.com/erikgrinaker/toydb/blob/3e467512dca55843f0b071b3e239f14724f59a41/src/storage/bitcask.rs#L342-L366

To delete a key, we append a tombstone value instead:

https://github.com/erikgrinaker/toydb/blob/3e467512dca55843f0b071b3e239f14724f59a41/src/storage/bitcask.rs#L122-L126

To read a value for a key, we look up the key's file location in the `KeyDir` index (if the key
exists), and then read it from the file:

https://github.com/erikgrinaker/toydb/blob/3e467512dca55843f0b071b3e239f14724f59a41/src/storage/bitcask.rs#L334-L340

The `KeyDir` uses an inner stdlib `BTreeMap` to keep track of keys. This allows range scans, where
we iterate over a sorted set of keys between the range bounds, loading each key from the file:

https://github.com/erikgrinaker/toydb/blob/3e467512dca55843f0b071b3e239f14724f59a41/src/storage/bitcask.rs#L144-L146

https://github.com/erikgrinaker/toydb/blob/3e467512dca55843f0b071b3e239f14724f59a41/src/storage/bitcask.rs#L207-L225

As keys are updated and deleted, we'll keep accumulating old versions in the log file. To remove
these, the log file is compacted on startup. This writes out the latest value of every live
key/value pair to a new file, and replaces the old file. The keys are written in sorted order, to
make later scans faster.

https://github.com/erikgrinaker/toydb/blob/3e467512dca55843f0b071b3e239f14724f59a41/src/storage/bitcask.rs#L172-L195

---

<p align="center">
← <a href="overview.md">Overview</a> &nbsp; | &nbsp; <a href="encoding.md">Key/Value Encoding</a> →
</p>

================================================
FILE: docs/architecture.md
================================================
Moved to [`architecture/index.md`](architecture/index.md).

================================================
FILE: docs/crate/Cargo.toml
================================================
[package]
name = "toydb"
version = "1.0.1"
description = "A simple distributed SQL database, built for education"
authors = ["Erik Grinaker <erik@grinaker.org>"]
license = "Apache-2.0"
homepage = "https://github.com/erikgrinaker/toydb"
repository = "https://github.com/erikgrinaker/toydb"
edition = "2024"

================================================
FILE: docs/crate/README.md
================================================
# toyDB

toyDB is a distributed SQL database in Rust, built from scratch as an educational project. Main
features:

* Raft distributed consensus for linearizable state machine replication.

* ACID transactions with MVCC-based snapshot isolation.

* Pluggable storage engine with BitCask and in-memory backends.

* Iterator-based query engine with heuristic optimization and time-travel  support.

* SQL interface including joins, aggregates, and transactions.

toyDB is not distributed as a crate, see <https://github.com/erikgrinaker/toydb> for more.

This crate used to contain the [joydb](https://crates.io/crates/joydb) database. Thanks to Serhii
Potapov for donating the crate name.

================================================
FILE: docs/crate/src/lib.rs
================================================
//! This crate is just a simple README.md placeholder. toydb is not intended to be used as a
//! library, and is not distributed as a crate. See <https://github.com/erikgrinaker/toydb>.

================================================
FILE: docs/examples.md
================================================
# SQL Examples

The following examples demonstrate some of toyDB's SQL features. For more details, see the
[SQL reference](sql.md).

- [Setup](#setup)
- [Creating Tables and Data](#creating-tables-and-data)
- [Constraints and Referential Integrity](#constraints-and-referential-integrity)
- [Basic SQL Queries](#basic-sql-queries)
- [Expressions](#expressions)
- [Joins](#joins)
- [Explain](#explain)
- [Aggregates](#aggregates)
- [Transactions](#transactions)
- [Time-Travel Queries](#time-travel-queries)

## Setup

To start a five-node cluster on the local machine (requires a working
[Rust compiler](https://www.rust-lang.org/tools/install)), run:

```
$ ./cluster/run.sh
toydb2 19:06:28 [ INFO] Listening on 0.0.0.0:9602 (SQL) and 0.0.0.0:9702 (Raft)
toydb2 19:06:28 [ERROR] Failed connecting to Raft peer 127.0.0.1:9705: Connection refused
toydb5 19:06:28 [ INFO] Listening on 0.0.0.0:9605 (SQL) and 0.0.0.0:9705 (Raft)
[...]
toydb5 19:06:29 [ INFO] Voting for toydb-d in term 1 election
toydb3 19:06:29 [ INFO] Voting for toydb-d in term 1 election
toydb4 19:06:29 [ INFO] Won election for term 1, becoming leader
```

In a separate terminal, start a `toysql` client and check the server status:

```
$ cargo run --release --bin toysql
Connected to toyDB node "toydb-a". Enter !help for instructions.
toydb> !status

Server:    5 (leader 4 in term 1 with 5 nodes)
Raft log:  1 committed, 0 applied, 0.000 MB (hybrid storage)
Node logs: 1:1 2:1 3:1 4:1 5:1
SQL txns:  0 active, 0 total (bitcask storage)
```

The cluster is shut down by pressing Ctrl-C. Data is saved under `clusters/toydb-?/data/`,
delete the contents to start over.

## Creating Tables and Data

As a basis for later examples, we'll create a small movie database. The following SQL statements
can be pasted into `toysql`:

```sql
CREATE TABLE genres (
    id INTEGER PRIMARY KEY,
    name STRING NOT NULL
);
INSERT INTO genres VALUES
    (1, 'Science Fiction'),
    (2, 'Action'),
    (3, 'Drama'),
    (4, 'Comedy');

CREATE TABLE studios (
    id INTEGER PRIMARY KEY,
    name STRING NOT NULL
);
INSERT INTO studios VALUES
    (1, 'Mosfilm'),
    (2, 'Lionsgate'),
    (3, 'StudioCanal'),
    (4, 'Warner Bros'),
    (5, 'Focus Features');

CREATE TABLE movies (
    id INTEGER PRIMARY KEY,
    title STRING NOT NULL,
    studio_id INTEGER NOT NULL INDEX REFERENCES studios,
    genre_id INTEGER NOT NULL INDEX REFERENCES genres,
    released INTEGER NOT NULL,
    rating FLOAT
);
INSERT INTO movies VALUES
    (1,  'Stalker',             1, 1, 1979, 8.2),
    (2,  'Sicario',             2, 2, 2015, 7.6),
    (3,  'Primer',              3, 1, 2004, 6.9),
    (4,  'Heat',                4, 2, 1995, 8.2),
    (5,  'The Fountain',        4, 1, 2006, 7.2),
    (6,  'Solaris',             1, 1, 1972, 8.1),
    (7,  'Gravity',             4, 1, 2013, 7.7),
    (8,  '21 Grams',            5, 3, 2003, 7.7),
    (9,  'Birdman',             4, 4, 2014, 7.7),
    (10, 'Inception',           4, 1, 2010, 8.8),
    (11, 'Lost in Translation', 5, 4, 2003, 7.7),
    (12, 'Eternal Sunshine of the Spotless Mind', 5, 3, 2004, 8.3);
```

toyDB supports some basic datatypes, as well as primary keys, foreign keys, and column indexes.
For more information on these, see the [SQL reference](sql.md). Schema changes such as
`ALTER TABLE` are not supported, only `CREATE TABLE` and `DROP TABLE`.

The tables can be inspected via the `!tables` and `!table` commands:

```sql
toydb> !tables
genres
movies
studios

toydb> !table genres
CREATE TABLE genres (
  id INTEGER PRIMARY KEY,
  name STRING NOT NULL
)
```

## Constraints and Referential Integrity

Schemas enforce referential integrity and other constraints:

```sql
toydb> DROP TABLE studios;
Error: Table studios is referenced by table movies column studio_id

toydb> DELETE FROM studios WHERE id = 1;
Error: Primary key 1 is referenced by table movies column studio_id

toydb> UPDATE movies SET id = 1;
Error: Primary key 1 already exists for table movies

toydb> INSERT INTO movies VALUES (13, 'Nebraska', 6, 3, 2013, 7.7);
Error: Referenced primary key 6 in table studios does not exist

toydb> INSERT INTO movies VALUES (13, 'Nebraska', NULL, 3, 2013, 7.7);
Error: NULL value not allowed for column studio_id

toydb> INSERT INTO movies VALUES (13, 'Nebraska', 'Unknown', 3, 2013, 7.7);
Error: Invalid datatype STRING for INTEGER column studio_id
```

## Basic SQL Queries

Most basic SQL query functionality is supported:

```sql
toydb> SELECT * FROM studios;
1|Mosfilm
2|Lionsgate
3|StudioCanal
4|Warner Bros
5|Focus Features

toydb> SELECT title, rating FROM movies WHERE released >= 2000 ORDER BY rating DESC LIMIT 3;
Inception|8.8
Eternal Sunshine of the Spotless Mind|8.3
Gravity|7.7
```

Column headers can be enabled with `!headers on`:

```sql
toydb> !headers on
Headers enabled

toydb> SELECT id, name AS genre FROM genres;
id|genre
1|Science Fiction
2|Action
3|Drama
4|Comedy
```

## Expressions

All common mathematical operators are implemented:

```sql
toydb> SELECT 1 + 2 * 3;
7

toydb> SELECT (1 + 2) * 4 / -3;
-4

SELECT 3! + 7 % 4 - 2 ^ 3;
1
```

64-bit floating point arithmetic is also supported, including infinity and NaN:

```sql
toydb> SELECT 3.14 * 2.718;
8.53452

toydb> SELECT 1.0 / 0.0;
inf

toydb> SELECT 1e10 ^ 8;
100000000000000000000000000000000000000000000000000000000000000000000000000000000

toydb> SELECT 1e10 ^ 8 / INFINITY, 1e10 ^ 1e10, INFINITY / INFINITY;
0|inf|NaN
```

And of course three-valued logic:

```sql
toydb> SELECT TRUE AND TRUE, TRUE AND FALSE, TRUE AND NULL, FALSE AND NULL;
TRUE|FALSE|NULL|FALSE

toydb> SELECT TRUE OR FALSE, FALSE OR FALSE, TRUE OR NULL, FALSE OR NULL;
TRUE|FALSE|TRUE|NULL

toydb> SELECT NOT TRUE, NOT FALSE, NOT NULL;
FALSE|TRUE|NULL
```

Which would be useless without comparison operators for all types:

```sql
toydb> SELECT 3 > 1, 3 <= 1, 3 = 3.0;
TRUE|FALSE|TRUE

toydb> SELECT 'a' = 'A', 'foo' > 'bar', '👍' != '👎';
FALSE|TRUE|TRUE

toydb> SELECT INFINITY > -INFINITY, NULL = NULL;
TRUE|NULL
```

## Joins

No SQL database would be complete without joins, and toyDB supports most join types such as
inner joins (both implicit and explicit):

```sql
toydb> SELECT m.id, m.title, g.name FROM movies m JOIN genres g ON m.genre_id = g.id LIMIT 4;
1|Stalker|Science Fiction
2|Sicario|Action
3|Primer|Science Fiction
4|Heat|Action

toydb> SELECT m.id, m.title, g.name FROM movies m, genres g WHERE m.genre_id = g.id LIMIT 4;
1|Stalker|Science Fiction
2|Sicario|Action
3|Primer|Science Fiction
4|Heat|Action
```

Left and right outer joins:

```sql
toydb> SELECT s.id, s.name, g.name FROM studios s LEFT JOIN genres g ON s.id = g.id;
1|Mosfilm|Science Fiction
2|Lionsgate|Action
3|StudioCanal|Drama
4|Warner Bros|Comedy
5|Focus Features|NULL

toydb> SELECT g.id, g.name, s.name FROM genres g RIGHT JOIN studios s ON g.id = s.id;
1|Science Fiction|Mosfilm
2|Action|Lionsgate
3|Drama|StudioCanal
4|Comedy|Warner Bros
NULL|NULL|Focus Features
```

And cross joins (both implicit and explicit):

```sql
toydb> SELECT g.name, s.name FROM genres g, studios s WHERE s.name < 'S';
Science Fiction|Mosfilm
Science Fiction|Lionsgate
Science Fiction|Focus Features
Action|Mosfilm
Action|Lionsgate
Action|Focus Features
Drama|Mosfilm
Drama|Lionsgate
Drama|Focus Features
Comedy|Mosfilm
Comedy|Lionsgate
Comedy|Focus Features
```

We can join on arbitrary predicates, such as joining movies with any genres whose name is
ordered after the movie's title:

```sql
toydb>  SELECT   m.title, g.name
        FROM     movies m JOIN genres g ON g.name > m.title
        ORDER BY m.title, g.name;

21 Grams|Action
21 Grams|Comedy
21 Grams|Drama
21 Grams|Science Fiction
Birdman|Comedy
Birdman|Drama
Birdman|Science Fiction
Eternal Sunshine of the Spotless Mind|Science Fiction
Gravity|Science Fiction
Heat|Science Fiction
Inception|Science Fiction
Lost in Translation|Science Fiction
Primer|Science Fiction
```

And we can join multiple tables, even using the same table multiple times - like in this example
where we find all science fiction movies released since 2000 by studios that have released any 
movie rated 8 or higher:

```sql
toydb> SELECT   m.id, m.title, g.name AS genre, m.released, s.name AS studio
       FROM     movies m JOIN genres g ON m.genre_id = g.id,
                studios s JOIN movies good ON good.studio_id = s.id AND good.rating >= 8
       WHERE    m.studio_id = s.id AND m.released >= 2000 AND g.id = 1
       ORDER BY m.title ASC;

7|Gravity|Science Fiction|2013|Warner Bros
10|Inception|Science Fiction|2010|Warner Bros
5|The Fountain|Science Fiction|2006|Warner Bros
```

## Explain

When optimizing complex queries with several joins, it can often be useful to inspect the query
plan via an `EXPLAIN` query:

```sql
toydb> EXPLAIN
       SELECT   m.id, m.title, g.name AS genre, m.released, s.name AS studio
       FROM     movies m JOIN genres g ON m.genre_id = g.id,
                studios s JOIN movies good ON good.studio_id = s.id AND good.rating >= 8
       WHERE    m.studio_id = s.id AND m.released >= 2000 AND g.id = 1
       ORDER BY m.title ASC;

Order: m.title asc
└─ Projection: m.id, m.title, g.name, m.released, s.name
   └─ HashJoin: inner on m.studio_id = s.id
      ├─ HashJoin: inner on m.genre_id = g.id
      │  ├─ Filter: m.released > 2000 OR m.released = 2000
      │  │  └─ IndexLookup: movies as m column genre_id (1)
      │  └─ KeyLookup: genres as g (1)
      └─ HashJoin: inner on s.id = good.studio_id
         ├─ Scan: studios as s
         └─ Scan: movies as good (good.rating > 8 OR good.rating = 8)
```

Here, we can see that the planner does a primary key lookup on `genres` and an index lookup on
`movies.genre_id`, filtering the resulting movies by release year and joining them. It also
does full table scans of `studios` and `movies` (to find the good movies) and joins them, pusing
the `rating >= 8` filter down to the `movies` table scan. The results of these two joins are also
joined to produce the final result, which is then formatted and sorted.

## Aggregates

Most basic aggregate functions are supported:

```sql
toydb> SELECT COUNT(*), MIN(rating), MAX(rating), AVG(rating), SUM(rating) FROM movies;
12|6.9|8.8|7.841666666666668|94.10000000000001
```

We can group by values and filter the aggregate results:

```sql
toydb> SELECT s.id, s.name, AVG(m.rating) AS average
       FROM movies m JOIN studios s ON m.studio_id = s.id
       GROUP BY s.id, s.name
       HAVING average > 7.8
       ORDER BY average DESC, s.name ASC;
1|Mosfilm|8.149999999999999
4|Warner Bros|7.919999999999999
5|Focus Features|7.900000000000001
```

And we can combine aggregate functions with arbitrary expressions, both inside and outside:

```sql
toydb> SELECT s.id, s.name, ((MAX(rating^2) - MIN(rating^2)) / AVG(rating^2)) ^ (0.5) AS spread
       FROM movies m JOIN studios s ON m.studio_id = s.id
       GROUP BY s.id, s.name
       HAVING MAX(rating) - MIN(rating) > 0.5
       ORDER BY spread DESC;
4|Warner Bros|0.6373540990222496
5|Focus Features|0.39194971607693424
```

## Transactions

toyDB supports ACID transactions via MVCC-based snapshot isolation. This provides atomic
transactions with good isolation, without taking out locks or blocking reads on writes. As a basic
example, the below transaction is rolled back without taking effect, as opposed to `COMMIT`
which would make it permanent:

```sql
toydb> BEGIN;
Began transaction 131

toydb:131> INSERT INTO genres VALUES (5, 'Western');
toydb:131> SELECT * FROM genres;
1|Science Fiction
2|Action
3|Drama
4|Comedy
5|Western
toydb:131> ROLLBACK;
Rolled back transaction 131

toydb> SELECT * FROM genres;
1|Science Fiction
2|Action
3|Drama
4|Comedy
```

We'll demonstrate transactions by covering most common transaction anomalies given two
concurrent sessions, and show how toyDB prevents these anomalies in all cases but one. In these
examples, the left half is user A and the right is user B. Time flows downwards such that
commands on the same line happen at the same time.

**Dirty write:** an uncommitted write by A should not be affected by a concurrent B write.

```sql
a> BEGIN;
a> INSERT INTO genres VALUES (5, 'Western');
                                                   b> INSERT INTO genres VALUES (5, 'Romance');
                                                   Error: Serialization failure, retry transaction
a> SELECT * FROM genres WHERE id = 5;
5|Western
```

The serialization failure here occurs because the first write always wins. This may not be an
optimal strategy, but it is correct in terms of preventing serialization anomalies.

**Dirty read:** an uncommitted write by A should not be visible to B until committed.

```sql
a> BEGIN;
a> INSERT INTO genres VALUES (5, 'Western');
                                                  b> SELECT * FROM genres WHERE id = 5;
                                                  No rows returned
a> COMMIT;
                                                  b> SELECT * FROM genres WHERE id = 5;
                                                  5|Western
```

**Lost update:** when A and B both read a value, before updating it in turn, the first write should
not be overwritten by the second.

```sql
a> BEGIN;                                         b> BEGIN;
a> SELECT title, rating FROM movies WHERE id = 2; b> SELECT title, rating FROM movies WHERE id = 2;
Sicario|7.6                                       Sicario|7.6
a> UPDATE movies SET rating = 7.8 WHERE id = 2;
                                                  b> UPDATE movies SET rating = 7.7 WHERE id = 2;
                                                  Error: Serialization failure, retry transaction
a> COMMIT;
```

**Fuzzy read:** B should not see a value suddenly change in its transaction, even if A commits a 
new value.

```sql
a> BEGIN;                                         b> BEGIN;
                                                  b> SELECT * FROM genres WHERE id = 1;
                                                  1|Science Fiction
a> UPDATE genres SET name = 'Scifi' WHERE id = 1;
a> COMMIT;
                                                  b> SELECT * FROM genres WHERE id = 1;
                                                  1|Science Fiction
                                                  b> COMMIT;

                                                  b> SELECT * FROM genres WHERE id = 1;
                                                  1|Scifi
```

**Read skew:** if A reads two values, and B modifies the second value in between the reads, A 
should see the old second value.

```sql
a> BEGIN;
a> SELECT * FROM genres WHERE id = 2;
2|Action
                                                  b> BEGIN;
                                                  b> UPDATE genres SET name = 'Drama' WHERE id = 2;
                                                  b> UPDATE genres SET name = 'Action' WHERE id = 3;
                                                  b> COMMIT;
a> SELECT * FROM genres WHERE id = 3;
3|Drama
```

**Phantom read:** when A runs a query with a predicate, and B commits a matching write, A should
not see the write when rerunning it.

```sql
a> BEGIN;
a> SELECT * FROM genres WHERE id > 2;
3|Drama
4|Comedy
                                                  b> INSERT INTO genres VALUES (5, 'Western');
a> SELECT * FROM genres WHERE id > 2;
3|Drama
4|Comedy
```

**Write skew:** when A reads row X and writes it to row Y, B should not concurrently be able to
read row Y and write it to row X.

```sql
a> BEGIN;                                         b> BEGIN;
a> SELECT * FROM genres WHERE id = 2;
2|Action
                                                  b> SELECT * FROM genres WHERE id = 3;
                                                  3|Drama
                                                  b> UPDATE genres SET name = 'Drama' WHERE id = 2;
a> UPDATE genres SET name = 'Action' WHERE id = 3;
a> COMMIT;                                        b> COMMIT;
```

Here, the writes actually go through. This anomaly is not protected against by snapshot isolation, 
and thus not by toyDB either - doing so would require implementing serializable snapshot isolation. 
However, this is the only common serialization anomaly not handled by toyDB, and is not among the
most severe.

## Time-Travel Queries

Since toyDB uses MVCC for transactions and keeps all historical versions, the state of the database
can be queried at any arbitrary point in the past. toyDB uses incremental transaction IDs as
logical timestamps:

```sql
toydb> SELECT * FROM genres;
1|Science Fiction
2|Drama
3|Action
4|Comedy

toydb> BEGIN;
Began transaction 173
toydb:173> UPDATE genres SET name = 'Scifi' WHERE id = 1;
toydb:173> INSERT INTO genres VALUES (5, 'Western');
toydb:173> COMMIT;
Committed transaction 173

toydb> SELECT * FROM genres;
1|Scifi
2|Drama
3|Action
4|Comedy
5|Western

toydb> BEGIN READ ONLY AS OF SYSTEM TIME 172;
Began read-only transaction 175 in snapshot at version 172
toydb@172> SELECT * FROM genres;
1|Science Fiction
2|Drama
3|Action
4|Comedy
```

================================================
FILE: docs/references.md
================================================
# References

This is the main research material I used while building toyDB. It is a subset of my
[reading list](https://github.com/erikgrinaker/readings).

## Introduction

Andy Pavlo's CMU lectures are an absolutely fantastic introduction to database internals:

- 🎥 [CMU 15-445 Intro to Database Systems](https://www.youtube.com/playlist?list=PLSE8ODhjZXjbohkNBWQs_otTrBTrjyohi) (A Pavlo 2019)
- 🎥 [CMU 15-721 Advanced Database Systems](https://www.youtube.com/playlist?list=PLSE8ODhjZXjasmrEd2_Yi1deeE360zv5O) (A Pavlo 2020)

Martin Kleppman has written an excellent overview of database technologies and concepts, while Alex
Petrov goes in depth on implementation of storage engines and distributed systems algorithms:

- 📖 [Designing Data-Intensive Applications](https://dataintensive.net/) (M Kleppmann 2017)
- 📖 [Database Internals](https://www.databass.dev) (A Petrov 2019)

## Raft

The Raft consensus algorithm is described in a very readable paper by Diego Ongaro, and in a talk
given by his advisor John Ousterhout:

- 📄 [In Search of an Understandable Consensus Algorithm](https://raft.github.io/raft.pdf) (D Ongaro, J Ousterhout 2014)
- 🎥 [Designing for Understandability: The Raft Consensus Algorithm](https://www.youtube.com/watch?v=vYp4LYbnnW8) (J Ousterhout 2016)

However, Raft has several subtle pitfalls, and Jon Gjengset's student guide was very helpful in
drawing attention to these:

- 🔗 [Students' Guide to Raft](https://thesquareplanet.com/blog/students-guide-to-raft/) (J Gjengset 2016)

## Parsing

Thorsten Ball has written a very enjoyable hands-on introduction to parsers where he implements
first an interpreter and then a compiler for the made-up Monkey programming language (in Go):

- 📖 [Writing An Interpreter In Go](https://interpreterbook.com) (T Ball 2016) 
- 📖 [Writing A Compiler In Go](https://compilerbook.com) (T Ball 2018)

The toyDB expression parser is inspired by a blog post by Eli Bendersky describing the precedence
climbing algorithm, which is the algorithm I found the most elegant:

- 💬 [Parsing Expressions by Precedence Climbing](https://eli.thegreenplace.net/2012/08/02/parsing-expressions-by-precedence-climbing) (E Bendersky 2012)

## Transactions

Jepsen (i.e. Kyle Kingsbury) has an excellent overview of consistency and isolation models, which 
is very helpful in making sense of the jungle of overlapping and ill-defined terms:

- 🔗 [Consistency Models](https://jepsen.io/consistency) (Jepsen 2016)

For more background on this, in particular on how snapshot isolation provided by the MVCC
transaction engine used in toyDB does not fit into the traditional SQL isolation levels, the
following classic papers were useful:

- 📄 [A Critique of ANSI SQL Isolation Levels](https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/tr-95-51.pdf) (H Berenson et al 1995)
- 📄 [Generalized Isolation Level Definitions](http://pmg.csail.mit.edu/papers/icde00.pdf) (A Adya, B Liskov, P ONeil 2000)

As for actually implementing MVCC, I found blog posts to be the most helpful:

- 💬 [Implementing Your Own Transactions with MVCC](https://levelup.gitconnected.com/implementing-your-own-transactions-with-mvcc-bba11cab8e70) (E Chance 2015)
- 💬 [How Postgres Makes Transactions Atomic](https://brandur.org/postgres-atomicity) (B Leach 2017)

================================================
FILE: docs/sql.md
================================================
# SQL Reference

## Data Types

The following data types are supported:

* `BOOLEAN` (`BOOL`): logical truth values, i.e. true and false.
* `FLOAT` (`DOUBLE`): 64-bit signed floating point numbers, using [IEEE 754 `binary64`](https://en.wikipedia.org/wiki/binary64) encoding. Supports magnitudes of 10⁻³⁰⁷ to 10³⁰⁸ with 53-bit precision (~15 significant figures), as well as the special values infinity and NaN.
* `INTEGER` (`INT`): 64-bit signed integer numbers with a range of ±2⁶³-1.
* `STRING` (`TEXT`, `VARCHAR`): UTF-8 encoded strings.

In addition, the special `NULL` value is used for an unknown value, following the rules of [three-valued logic](https://en.wikipedia.org/wiki/Three-valued_logic).

Numeric types are not interchangable; a float value (even without a fractional part) cannot be stored in an integer column and vice-versa.

## SQL Syntax

### Keywords

Keywords are reserved words with special meaning in SQL statements. They are case-insensitive, and must be quoted with `"` to be used as identifiers. The complete list is:

`AS`, `ASC`, `AND`, `BEGIN`, `BOOL`, `BOOLEAN`, `BY`, `COMMIT`, `CREATE`, `CROSS`, `DEFAULT`,`DELETE`, `DESC`, `DOUBLE`, `DROP`, `EXISTS`, `EXPLAIN`, `FALSE`, `FLOAT`, `FROM`, `GROUP`, `HAVING`, `IF`, `INDEX`, `INFINITY`, `INNER`, `INSERT`, `INT`, `INTEGER`, `INTO`, `IS`, `JOIN`, `KEY`, `LEFT`, `LIKE`, `LIMIT`, `NAN`, `NOT`, `NULL`, `OF`, `OFFSET`, `ON`, `ONLY`, `OR`, `ORDER`, `OUTER`, `PRIMARY`, `READ`, `REFERENCES`, `RIGHT`, `ROLLBACK`, `SELECT`, `SET`, `STRING`, `SYSTEM`, `TABLE`, `TEXT`, `TIME`, `TRANSACTION`, `TRUE`, `UNIQUE`, `UPDATE`, `VALUES`, `VARCHAR`, `WHERE`, `WRITE`

### Identifiers

Identifiers are names for database objects such as tables and columns. Unless quoted with `"`, they must begin with a Unicode letter followed by any combination of letters, numbers, and `_`, and cannot be reserved keywords. `""` can be used to escape a double quote character. They are always converted to lowercase.

### Constants

#### Named constants

The following keywords evaluate to constants:

* `FALSE`: the boolean false value.
* `INFINITY`: the floating-point value for infinity.
* `NAN`: the floating-point value for NaN (not a number).
* `NULL`: an unknown value.
* `TRUE`: the boolean true value.

#### String literals

String literals are surrounded by single quotes `'`, and can contain any valid UTF-8 character. Single quotes must be escaped by an additional single quote, i.e. `''`, no other escape sequences are supported. For example:

```
'A string with ''quotes'' and emojis 😀'
```

#### Numeric literals

Sequences of digits `0-9` are parsed as a 64-bit signed integer. Numbers with decimal points or in scientific notation are parsed as 64-bit floating point numbers. The following pattern is supported:

```
999[.[999]][e[+-]999]
```

The `-` prefix operator can be used to take negative numbers.

### Expressions

Expressions can be used wherever a value is expected, e.g. as `SELECT` columns nd `INSERT` values. They are made up of constants, a column references, an operator invocations, and a function calls.

Column references can either be unqualified, e.g. `name`, or prefixed with the relation identifier separated by `.`, e.g. `person.name`. Unqualified identifiers must be unambiguous.

## SQL Operators

### Logical operators

Logical operators apply standard logic operations on boolean operands.

* `AND`: the logical conjunction, e.g. `TRUE AND TRUE` yields `TRUE`.
* `OR`: the logical disjunction, e.g. `TRUE OR FALSE` yields `TRUE`.
* `NOT`: the logical negation, e.g. `NOT TRUE` yields `FALSE`.

The complete truth tables are:

| `AND`       | `TRUE`  | `FALSE` | `NULL`  |
|-------------|---------|---------|---------|
| **`TRUE`**  | `TRUE`  | `FALSE` | `NULL`  |
| **`FALSE`** | `FALSE` | `FALSE` | `FALSE` |
| **`NULL`**  | `NULL`  | `FALSE` | `NULL`  |

| `OR`        | `TRUE` | `FALSE` | `NULL` |
|-------------|--------|---------|--------|
| **`TRUE`**  | `TRUE` | `TRUE`  | `TRUE` |
| **`FALSE`** | `TRUE` | `FALSE` | `NULL` |
| **`NULL`**  | `TRUE` | `NULL`  | `NULL` |

| `NOT`       |         |
|-------------|---------|
| **`TRUE`**  | `FALSE` |
| **`FALSE`** | `TRUE`  |
| **`NULL`**  | `NULL`  |

### Comparison operators

Comparison operators compare values of the same data type, and return `TRUE` if the comparison holds or `FALSE` otherwise. `INTEGER` and `FLOAT` values are interchangeable. `STRING` comparisons use the string's byte values, i.e. case-sensitive with `'B' < 'a'` due to their UTF-8 code points. `FALSE` is considered lesser than `TRUE`. Comparison with `NULL` always yields `NULL` (even `NULL = NULL`).

Binary operators:

* `=`: equality, e.g. `1 = 1` yields `TRUE`.
* `!=`: inequality, e.g. `1 != 2` yields `TRUE`.
* `>`: greater than, e.g. `2 > 1` yields `TRUE`.
* `>=`: greater than or equal, e.g. `1 >= 1` yields `TRUE`.
* `<`: lesser than, e.g. `1 < 2` yields `TRUE`.
* `<=`: lesser than or equal, e.g. `1 <= 1` yields `TRUE`.

Unary operators:

* `IS NULL`: checks if the value is `NULL`, e.g. `NULL IS NULL` yields `TRUE`.
* `IS NOT NULL`: checks if the value is not `NULL`, e.g. `TRUE IS NOT NULL` yields `TRUE`.
* `IS NAN`: checks if the value is a float `NAN`, e.g. `NAN IS NAN` yields `TRUE`. Errors on 
  non-float datatypes, except `NULL` which yields `NULL`.
* `IS NOT NAN`: checks if the value is not a float `NAN`, e.g. `3.14 IS NOT NAN` yields `TRUE`.

### Mathematical operators

Mathematical operators apply standard math operations on numeric (`INTEGER` or `FLOAT`) operands. If either operand is a `FLOAT`, both operands are converted to `FLOAT` and the result is a `FLOAT`. If either operand is `NULL`, the result is `NULL`. The special values `INFINITY` and `NAN` are handled according to the IEEE 754 spec.

For `INTEGER` operands, failure conditions such as overflow and division by zero yield an error. For `FLOAT` operands, these return `INFINITY` or `NAN` as appropriate.

Binary operators:

* `+`: addition, e.g. `1 + 2` yields `3`.
* `-`: subtraction, e.g. `3 - 2` yields `1`.
* `*`: multiplication, e.g. `3 * 2` yields `6`.
* `/`: division, e.g. `6 / 2` yields `3`.
* `^`: exponentiation, e.g. `2 ^ 4` yields `16`.
* `%`: remainder, e.g. `8 % 3` yields `2`. Unlike modulo, the result has the sign of the dividend.

Unary operators:

* `+` (prefix): identity, e.g. `+1` yields `1`.
* `-` (prefix): negation, e.g. `- -2` yields `2`.
* `!` (postfix): factorial, e.g. `5!` yields `15`.

### String operators

String operators operate on string operands.

* `LIKE`: compares a string with the given pattern, using `%` as multi-character wildcard and `_` as single-character wildcard, returning `TRUE` if the string matches the pattern - e.g. `'abc' LIKE 'a%'` yields `TRUE`.

### Operator precedence

The operator precedence (order of operations) is as follows:

| Precedence | Operator                | Associativity |
|------------|-------------------------|---------------|
| 10         | `+`, `-` (prefix)       | Right         |
| 9          | `!` (postfix)           | Left          |
| 8          | `^`                     | Right         |
| 7          | `*`, `/`, `%`           | Left          |
| 6          | `+`, `-`                | Left          |
| 5          | `>`, `>=`, `<`, `<=`    | Left          |
| 4          | `=`, `!=`, `LIKE`, `IS` | Left          |
| 3          | `NOT`                   | Right         |
| 2          | `AND`                   | Left          |
| 1          | `OR`                    | Left          |

Precedence can be overridden by wrapping an expression in parentheses, e.g. `(1 + 2) * 3`.

### Functions

* `sqrt(expr)`: returns the square root of a numerical argument.

### Aggregate functions

Aggregate function aggregate an expression across all rows, optionally grouped into buckets given by `GROUP BY`, and results can be filtered via `HAVING`.

* `AVG(expr)`: returns the average of numerical values.

* `COUNT(expr)`: returns the number of rows for which ***`expr`*** evaluates to a non-`NULL` value. `COUNT(*)` can be used to count all rows.

* `MAX(expr)`: returns the maximum value, according to the datatype's ordering.

* `MIN(expr)`: returns the minimum value, according to the datatype's ordering.

* `SUM(expr)`: returns the sum of numerical values.

## SQL Statements

### `BEGIN`

Starts a new [transaction](#transactions).

<pre>
BEGIN [ TRANSACTION ] [ READ ONLY | READ WRITE ] [ AS OF SYSTEM TIME <b><i>txn_id</i></b> ]
</pre>

* ***`txn_id`***: A past transaction ID to run a read-only transaction for, for time-travel queries.

### `COMMIT`

Commits an active [transaction](#transactions).

### `CREATE TABLE`

Creates a new table.

<pre>
CREATE TABLE <b><i>table_name</i></b> (
    [ <b><i>column_name</i></b> <b><i>data_type</i></b> [ <b><i>column_constraint</i></b> [ ... ] ]  [ INDEX ] [, ... ] ]
)

where <b><i>column_constraint</i></b> is:

{ NOT NULL | NULL | PRIMARY KEY | DEFAULT <b><i>expr</i></b> | REFERENCES <b><i>ref_table</i></b> | UNIQUE }
</pre>

* ***`table_name`***: The name of the table. Must be a [valid identifier](#identifiers). Errors if a table with this name already exists.

* ***`column_name`***: The name of the column. Must be a [valid identifier](#identifiers), and unique within the table.

* ***`data_type`***: The data type of the column, see [data types](#data-types) for valid types.

* `NOT NULL`: The column may not contain `NULL` values.

* `NULL`: The column may contain `NULL` values. This is the default.

* `PRIMARY KEY`: The column should act as a primary key, i.e. the main row identifier. A table must have exactly one primary key column, and it must be unique and non-nullable.

* `DEFAULT`***`expr`***: Specifies a default value for the column when `INSERT` statements do not give a value. ***`expr`*** can be any constant expression of an appropriate data type, e.g. `'abc'` or `1 + 2 * 3`. For nullable columns, the default value is `NULL` unless specified otherwise.

* `REFERENCES`***`ref_table`***: The column is a foreign key to ***`ref_table`***'s primary key, enforcing referential integrity.

* `UNIQUE`: The column may only contain unique (distinct) values. `NULL` values are not considered equal, thus a `UNIQUE` column which allows `NULL` may contain multiple `NULL` values. `PRIMARY KEY` columns are implicitly `UNIQUE`.

* `INDEX`: Create an index for the column.

#### Example

```sql
CREATE TABLE movie (
    id INTEGER PRIMARY KEY,
    title STRING NOT NULL,
    release_year INTEGER INDEX,
    imdb_id STRING INDEX UNIQUE,
    bluray BOOLEAN NOT NULL DEFAULT TRUE
)
```

### `DELETE`

Deletes rows in a table.

<pre>
DELETE FROM <b><i>table_name</i></b>
    [ WHERE <b><i>predicate</i></b> ]
</pre>

Deletes rows where ***`predicate`*** evaluates to `TRUE`, or all rows if no `WHERE` clause is given.

* ***`table_name`***: the table to delete from. Errors if it does not exist.

* ***`predicate`***: an expression which determines which rows to delete by evaluting to `TRUE`. Must evaluate to a `BOOLEAN` or `NULL`, otherwise an error is returned.

#### Example

```sql
DELETE FROM movie
WHERE release_year < 2000 AND bluray = FALSE
```

### `DROP TABLE`

Deletes a table and all contained data. Errors if the table does not
exist, unless `IF EXISTS` is given.

<pre>
DROP TABLE [ IF EXISTS ] <b><i>table_name</i></b>
</pre>

* ***`table_name`***: the table to delete.

### `EXPLAIN`

Outputs the execution plan for the given statement.

<pre>
EXPLAIN [ <b><i>statement</i></b> ]
</pre>

### `INSERT`

Inserts rows into a table.

<pre>
INSERT INTO <b><i>table_name</i></b>
    [ ( <b><i>column_name</i></b> [, ... ] ) ]
    VALUES ( <b><i>expression</i></b> [, ... ] ) [, ... ]
</pre>

If column names are given, an identical number of values must be given. If no column names are given, values must be given in the table's column order. Omitted columns will get a default value if specified, otherwise an error will be returned.

* ***`table_name`***: the table to insert into. Errors if it does not exist.

* ***`column_name`***: a column to insert into in the given table. Errors if it does not exist.

* ***`expression`***: an expression to insert into the corresponding column. Must be a constant expression, i.e. it cannot refer to table columns.

#### Example

```sql
INSERT INTO movie
    (id, title, release_year)
VALUES
    (1, 'Sicario', 2015),
    (2, 'Stalker', 1979),
    (3, 'Her', 2013)
```

### `ROLLBACK`

Rolls back an active [transaction](#transactions).

### `SELECT`

Selects rows from a table.

<pre>
SELECT [ * | <b><i>expression</i></b> [ [ AS ] <b><i>output_name</i></b> [, ...] ] ]
    [ FROM <b><i>from_item</i></b> [, ...] ]
    [ WHERE <b><i>predicate</i></b> ]
    [ GROUP BY <b><i>group_expr</i></b> [, ...] ]
    [ HAVING <b><i>having_expr</i></b> ]
    [ ORDER BY <b><i>order_expr</i></b> [ ASC | DESC ] [, ...] ]
    [ LIMIT <b><i>count</i></b> ]
    [ OFFSET <b><i>start</i></b> ]

where <b><i>from_item</i></b> is one of:

<b><i>table_name</i></b> [ [ AS ] <b><i>alias</i></b> ]
<b><i>from_item</i></b> <b><i>join_type</i></b> <b><i>from_item</i></b> [ ON <b><i>join_predicate</i></b> ]

where <b><i>join_type</i></b> is one of:

CROSS JOIN
[ INNER ] JOIN
LEFT [ OUTER ] JOIN
RIGHT [ OUTER ] JOIN

</pre>

Fetches rows or expressions, either from table ***`table_name`*** (if given) or generated.

* ***`expression`***: [expression](#expressions) to fetch (can be a simple column name).

* ***`output_name`***: output column [identifier](#identifier), defaults to column name (if single column) otherwise nothing (displayed as `?`).

* ***`table_name`***: table to fetch rows from.

* ***`alias`***: table alias.

* ***`predicate`***: only return rows for which this [expression](#expressions) evaluates to `TRUE`.

* ***`group_expr`***: an expression to group aggregates by. Non-aggregate `SELECT` expressions must either reference a column given in `group_expr`, be idential with a `group_expr`, or have an `output_name` that is referenced by a `group_expr` column.

* ***`having_expr`***: only return aggregate results for which this [expression](#expressions) evaluates to `TRUE`.

* ***`order_expr`***: order rows by this expression (can be a simple column name).

* ***`count`***: maximum number of rows to return. Must be a constant integer expression.

* ***`start`***: number of rows to skip. Must be a constant integer expression.

* ***`join_predicate`***: only return rows for which this [expression](#expressions) evaluates to `TRUE`.

Join types:

* `CROSS JOIN`: returns the Carthesian product of the joined tables. Does not accept a join predicate (`ON` clause).

* `INNER JOIN`: returns the rows of the tables' Carthesian product for which  ***`join_predicate`*** evaluates to `TRUE`.

* `LEFT OUTER JOIN`: returns the rows joined on the ***`join_predicate`***, or for any rows in the left table that does not have a match in the right table a single row is returned with the right table's columns set to `NULL`.

* `RIGHT OUTER JOIN`: the same as a `LEFT OUTER JOIN` but with the left and right tables switched.

#### Example

```sql
SELECT id, title, 2020 - released AS age
FROM movies
WHERE released >= 2000 AND ultrahd
ORDER BY released DESC, title ASC
LIMIT 10
OFFSET 10
```

### `UPDATE`

Updates rows in a table.

<pre>
UPDATE <b><i>table_name</i></b>
    SET <b><i>column_name</i></b> = <b><i>expression</i></b> | DEFAULT [, ... ]
    [ WHERE <b><i>predicate</i></b> ]
</pre>

Updates columns given by ***`column_name`*** to the corresponding ***`expression`*** for all rows where ***`predicate`*** evaluates to `TRUE`. If no `WHERE` clause is given, all rows are updated.

* ***`table_name`***: the table to update. Errors if it does not exist.

* ***`column_name`***: a column to update. Errors if it does not exist.

* ***`expression`***: an expression whose evaluated value will be set for the corresponding column and row. Expressions can refer to column values, and must evaluate to the same datatype as the updated column. Using `DEFAULT` will set the column's default value, if any.

* ***`predicate`***: an expression which determines which rows to update by evaluting to `TRUE`. Must evaluate to a `BOOLEAN` or `NULL`, otherwise an error is returned.

#### Example

```sql
UPDATE movie
SET bluray = TRUE
WHERE release_year >= 2000 AND bluray = FALSE
```

## Transactions

toyDB supports ACID transactions using MVCC-based snapshot isolation, protecting from the following anomalies: dirty writes, dirty reads, lost updates, fuzzy reads, read skew, and phantom reads. However, write skew anomalies are possible since serializable snapshot isolation is not implemented.

A new transaction is started with `BEGIN`, and ended with either `COMMIT` (atomically writing all changes) or `ROLLBACK` (discarding all changes). If any conflicts occur between concurrent transactions, the lowest transaction ID wins and the others will fail with a serialization error and must retry.

All past data is versioned and retained, and can be queried as of a given transaction ID via `BEGIN TRANSACTION READ ONLY AS OF SYSTEM TIME <txn_id>`.

A transaction is still valid for use if a contained statement returns an error. It is up to the client to take appropriate action.

================================================
FILE: docs/tools/update-links.py
================================================
#!/usr/bin/env python3
#
# Updates GitHub code links to the latest commit SHA.

import os, re, sys, argparse
import requests

GITHUB_API = "https://api.github.com"

def get_latest_sha(owner, repo, path, token):
    url = f"{GITHUB_API}/repos/{owner}/{repo}/commits"
    headers = {}
    if token:
        headers["Authorization"] = f"token {token}"
    params = {"path": path, "sha": "main", "per_page": 1}
    resp = requests.get(url, headers=headers, params=params)
    resp.raise_for_status()
    data = resp.json()
    return data[0]["sha"] if data else None

def process_markdown(text, token):
    pattern = re.compile(
        r"https://github\.com/(?P<owner>[^/]+)/(?P<repo>[^/]+)/blob/"
        r"(?P<oldsha>[0-9a-f]{7,40})/(?P<path>[^#)\s]+)"
    )
    cache = {}
    def replacer(m):
        print(f"Checking {m.group(0)}")
        owner, repo, oldsha, path = m.group("owner","repo","oldsha","path")
        key = (owner, repo, path)
        print(f"Key: {key}")
        if key not in cache:
            cache[key] = get_latest_sha(owner, repo, path, token)
        newsha = cache[key]
        if newsha and newsha != oldsha:
            print(f"Updating {m.group(0)} to {newsha}")
            return m.group(0).replace(oldsha, newsha)
        return m.group(0)
    return pattern.sub(replacer, text)

def main():
    p = argparse.ArgumentParser(description="Update GitHub blob links to latest SHAs")
    p.add_argument("file", nargs="?", help="Markdown file to update (defaults to stdin/stdout)")
    args = p.parse_args()
    token = os.getenv("GITHUB_TOKEN")
    if args.file:
        text = open(args.file, encoding="utf-8").read()
        updated = process_markdown(text, token)
        with open(args.file, "w", encoding="utf-8") as f:
            f.write(updated)
    else:
        text = sys.stdin.read()
        sys.stdout.write(process_markdown(text, token))

if __name__ == "__main__":
    main()

================================================
FILE: rust-toolchain
================================================
1.93.1

================================================
FILE: rustfmt.toml
================================================
use_small_heuristics = "Max"

================================================
FILE: src/bin/toydb.rs
================================================
//! The toyDB server. Takes configuration from a config file (default
//! config/toydb.yaml) or corresponding TOYDB_ environment variables. Listens
//! for SQL clients (default port 9601) and Raft connections from other toyDB
//! peers (default port 9701). The Raft log and SQL database are stored at
//! data/raft and data/sql by default.
//!
//! Use the toysql command-line client to connect to the server.

#![warn(clippy::all)]

use std::collections::HashMap;
use std::path::Path;

use clap::Parser as _;
use serde::Deserialize;

use toydb::Server;
use toydb::errinput;
use toydb::error::Result;
use toydb::raft;
use toydb::sql;
use toydb::storage;

fn main() {
    if let Err(error) = Command::parse().run() {
        eprintln!("Error: {error}")
    }
}

/// The toyDB server configuration. Can be provided via config file (default
/// config/toydb.yaml) or TOYDB_ environment variables.
#[derive(Debug, Deserialize)]
struct Config {
    /// The node ID. Must be unique in the cluster.
    id: raft::NodeID,
    /// The other nodes in the cluster, and their Raft TCP addresses.
    peers: HashMap<raft::NodeID, String>,
    /// The Raft listen address.
    listen_raft: String,
    /// The SQL listen address.
    listen_sql: String,
    /// The log level.
    log_level: String,
    /// The path to this node's data directory. The Raft log is stored in
    /// the file "raft", and the SQL state machine in "sql".
    data_dir: String,
    /// The Raft storage engine: bitcask or memory.
    storage_raft: String,
    /// The SQL storage engine: bitcask or memory.
    storage_sql: String,
    /// If false, don't fsync Raft log writes to disk. Disabling this
    /// will yield much better write performance, but may lose data on
    /// host crashes which compromises Raft safety guarantees.
    fsync: bool,
    /// The garbage fraction threshold at which to trigger compaction.
    compact_threshold: f64,
    /// The minimum bytes of garbage before triggering compaction.
    compact_min_bytes: u64,
}

impl Config {
    /// Loads the configuration from the given file.
    fn load(file: &str) -> Result<Self> {
        Ok(config::Config::builder()
            .set_default("id", "1")?
            .set_default("listen_sql", "localhost:9601")?
            .set_default("listen_raft", "localhost:9701")?
            .set_default("log_level", "info")?
            .set_default("data_dir", "data")?
            .set_default("storage_raft", "bitcask")?
            .set_default("storage_sql", "bitcask")?
            .set_default("fsync", true)?
            .set_default("compact_threshold", 0.2)?
            .set_default("compact_min_bytes", 1_000_000)?
            .add_source(config::File::with_name(file))
            .add_source(config::Environment::with_prefix("TOYDB"))
            .build()?
            .try_deserialize()?)
    }
}

/// The toyDB server command.
#[derive(clap::Parser)]
#[command(about = "Starts a toyDB server.", version, propagate_version = true)]
struct Command {
    /// The configuration file path.
    #[arg(short = 'c', long, default_value = "config/toydb.yaml")]
    config: String,
}

impl Command {
    /// Runs the toyDB server.
    fn run(self) -> Result<()> {
        // Load the c

Download .txt

gitextract_nc06cv1f/

├── .github/
│   └── workflows/
│       └── ci.yml
├── .gitignore
├── Cargo.toml
├── LICENSE
├── README.md
├── cluster/
│   ├── run.sh
│   ├── toydb1/
│   │   └── toydb.yaml
│   ├── toydb2/
│   │   └── toydb.yaml
│   ├── toydb3/
│   │   └── toydb.yaml
│   ├── toydb4/
│   │   └── toydb.yaml
│   └── toydb5/
│       └── toydb.yaml
├── config/
│   └── toydb.yaml
├── docs/
│   ├── architecture/
│   │   ├── README.md
│   │   ├── client.md
│   │   ├── encoding.md
│   │   ├── index.md
│   │   ├── mvcc.md
│   │   ├── overview.md
│   │   ├── raft.md
│   │   ├── server.md
│   │   ├── sql-data.md
│   │   ├── sql-execution.md
│   │   ├── sql-optimizer.md
│   │   ├── sql-parser.md
│   │   ├── sql-planner.md
│   │   ├── sql-raft.md
│   │   ├── sql-storage.md
│   │   ├── sql.md
│   │   └── storage.md
│   ├── architecture.md
│   ├── crate/
│   │   ├── Cargo.toml
│   │   ├── README.md
│   │   └── src/
│   │       └── lib.rs
│   ├── examples.md
│   ├── references.md
│   ├── sql.md
│   └── tools/
│       └── update-links.py
├── rust-toolchain
├── rustfmt.toml
├── src/
│   ├── bin/
│   │   ├── toydb.rs
│   │   ├── toydump.rs
│   │   ├── toysql.rs
│   │   └── workload.rs
│   ├── client.rs
│   ├── encoding/
│   │   ├── bincode.rs
│   │   ├── format.rs
│   │   ├── keycode.rs
│   │   └── mod.rs
│   ├── error.rs
│   ├── lib.rs
│   ├── raft/
│   │   ├── log.rs
│   │   ├── message.rs
│   │   ├── mod.rs
│   │   ├── node.rs
│   │   ├── state.rs
│   │   └── testscripts/
│   │       ├── log/
│   │       │   ├── append
│   │       │   ├── commit
│   │       │   ├── get
│   │       │   ├── has
│   │       │   ├── init
│   │       │   ├── scan
│   │       │   ├── scan_apply
│   │       │   ├── splice
│   │       │   ├── status
│   │       │   └── term
│   │       └── node/
│   │           ├── append
│   │           ├── append_base_missing
│   │           ├── append_base_missing_all
│   │           ├── append_commit_quorum
│   │           ├── append_initial
│   │           ├── append_max_entries
│   │           ├── append_pipeline
│   │           ├── append_probe_divergent_first
│   │           ├── append_probe_divergent_long
│   │           ├── append_probe_divergent_short
│   │           ├── append_probe_divergent_single
│   │           ├── append_response_beyond_last_index_panics
│   │           ├── append_response_stale_reject
│   │           ├── election
│   │           ├── election_candidate_behind_leader
│   │           ├── election_candidate_behind_quorum
│   │           ├── election_contested
│   │           ├── election_tie
│   │           ├── election_tie_even
│   │           ├── heartbeat_commits_follower
│   │           ├── heartbeat_converts_candidate
│   │           ├── heartbeat_converts_follower
│   │           ├── heartbeat_converts_follower_leaderless
│   │           ├── heartbeat_converts_leader
│   │           ├── heartbeat_lost_append_duplicate
│   │           ├── heartbeat_lost_append_multiple
│   │           ├── heartbeat_lost_append_single
│   │           ├── heartbeat_lost_read
│   │           ├── heartbeat_match_commits
│   │           ├── heartbeat_multiple_leaders_panic
│   │           ├── heartbeat_old_commit_index
│   │           ├── heartbeat_old_last_index
│   │           ├── heartbeat_probe_divergent
│   │           ├── old_campaign_rejected
│   │           ├── old_campaign_response_ignored
│   │           ├── old_heartbeat_ignored
│   │           ├── request_candidate_abort
│   │           ├── request_follower
│   │           ├── request_follower_campaign_abort
│   │           ├── request_follower_disconnect_stall
│   │           ├── request_follower_leaderless_abort
│   │           ├── request_leader
│   │           ├── request_leader_campaign_abort
│   │           ├── request_leader_change_linearizability
│   │           ├── request_leader_disconnect
│   │           ├── request_leader_read_quorum
│   │           ├── request_leader_read_quorum_sequence
│   │           ├── request_leader_single
│   │           ├── request_status
│   │           ├── request_status_single
│   │           ├── restart
│   │           ├── restart_apply
│   │           ├── restart_commit_recover
│   │           ├── restart_term_vote
│   │           ├── tick_candidate
│   │           ├── tick_follower
│   │           ├── tick_follower_leaderless
│   │           └── tick_leader
│   ├── server.rs
│   ├── sql/
│   │   ├── engine/
│   │   │   ├── engine.rs
│   │   │   ├── local.rs
│   │   │   ├── mod.rs
│   │   │   └── raft.rs
│   │   ├── execution/
│   │   │   ├── aggregator.rs
│   │   │   ├── executor.rs
│   │   │   ├── join.rs
│   │   │   ├── mod.rs
│   │   │   └── session.rs
│   │   ├── mod.rs
│   │   ├── parser/
│   │   │   ├── ast.rs
│   │   │   ├── lexer.rs
│   │   │   ├── mod.rs
│   │   │   └── parser.rs
│   │   ├── planner/
│   │   │   ├── mod.rs
│   │   │   ├── optimizer.rs
│   │   │   ├── plan.rs
│   │   │   └── planner.rs
│   │   ├── testscripts/
│   │   │   ├── expressions/
│   │   │   │   ├── cnf
│   │   │   │   ├── func
│   │   │   │   ├── func_sqrt
│   │   │   │   ├── literals
│   │   │   │   ├── op_compare_equal
│   │   │   │   ├── op_compare_greater
│   │   │   │   ├── op_compare_greater_equal
│   │   │   │   ├── op_compare_is_nan
│   │   │   │   ├── op_compare_is_null
│   │   │   │   ├── op_compare_lesser
│   │   │   │   ├── op_compare_lesser_equal
│   │   │   │   ├── op_compare_not_equal
│   │   │   │   ├── op_logic_and
│   │   │   │   ├── op_logic_not
│   │   │   │   ├── op_logic_or
│   │   │   │   ├── op_math_add
│   │   │   │   ├── op_math_divide
│   │   │   │   ├── op_math_exponentiate
│   │   │   │   ├── op_math_factorial
│   │   │   │   ├── op_math_identity
│   │   │   │   ├── op_math_multiply
│   │   │   │   ├── op_math_negate
│   │   │   │   ├── op_math_remainder
│   │   │   │   ├── op_math_subtract
│   │   │   │   ├── op_precedence
│   │   │   │   └── op_string_like
│   │   │   ├── optimizers/
│   │   │   │   ├── constant_folder
│   │   │   │   ├── filter_pushdown
│   │   │   │   ├── hash_join
│   │   │   │   ├── index_lookup
│   │   │   │   └── short_circuit
│   │   │   ├── queries/
│   │   │   │   ├── aggregate
│   │   │   │   ├── clauses
│   │   │   │   ├── group_by
│   │   │   │   ├── having
│   │   │   │   ├── join_cross
│   │   │   │   ├── join_inner
│   │   │   │   ├── join_outer
│   │   │   │   ├── limit
│   │   │   │   ├── offset
│   │   │   │   ├── order
│   │   │   │   ├── select
│   │   │   │   ├── where_
│   │   │   │   ├── where_index
│   │   │   │   └── where_primary_key
│   │   │   ├── schema/
│   │   │   │   ├── create_table
│   │   │   │   ├── create_table_datatypes
│   │   │   │   ├── create_table_default
│   │   │   │   ├── create_table_index
│   │   │   │   ├── create_table_names
│   │   │   │   ├── create_table_null
│   │   │   │   ├── create_table_primary_key
│   │   │   │   ├── create_table_reference
│   │   │   │   ├── create_table_transaction
│   │   │   │   ├── create_table_unique
│   │   │   │   ├── drop_table
│   │   │   │   ├── drop_table_index
│   │   │   │   ├── drop_table_ref
│   │   │   │   └── drop_table_transaction
│   │   │   ├── transactions/
│   │   │   │   ├── anomaly_dirty_read
│   │   │   │   ├── anomaly_dirty_write
│   │   │   │   ├── anomaly_fuzzy_read
│   │   │   │   ├── anomaly_lost_update
│   │   │   │   ├── anomaly_phantom_read
│   │   │   │   ├── anomaly_read_skew
│   │   │   │   ├── anomaly_write_skew
│   │   │   │   ├── begin
│   │   │   │   ├── commit
│   │   │   │   ├── isolation
│   │   │   │   ├── rollback
│   │   │   │   └── schema
│   │   │   └── writes/
│   │   │       ├── delete
│   │   │       ├── delete_index
│   │   │       ├── delete_reference
│   │   │       ├── delete_where
│   │   │       ├── insert
│   │   │       ├── insert_datatypes
│   │   │       ├── insert_default
│   │   │       ├── insert_index
│   │   │       ├── insert_null
│   │   │       ├── insert_primary_key
│   │   │       ├── insert_reference
│   │   │       ├── insert_unique
│   │   │       ├── update
│   │   │       ├── update_datatypes
│   │   │       ├── update_default
│   │   │       ├── update_expression
│   │   │       ├── update_index
│   │   │       ├── update_null
│   │   │       ├── update_primary_key
│   │   │       ├── update_reference
│   │   │       ├── update_unique
│   │   │       └── update_where
│   │   └── types/
│   │       ├── expression.rs
│   │       ├── mod.rs
│   │       ├── schema.rs
│   │       └── value.rs
│   └── storage/
│       ├── bitcask.rs
│       ├── engine.rs
│       ├── memory.rs
│       ├── mod.rs
│       ├── mvcc.rs
│       └── testscripts/
│           ├── bitcask/
│           │   ├── compact
│           │   ├── compact_open
│           │   ├── log
│           │   └── status
│           ├── engine/
│           │   ├── keys
│           │   ├── point
│           │   ├── scan
│           │   └── scan_prefix
│           ├── memory/
│           │   └── status
│           └── mvcc/
│               ├── anomaly_dirty_read
│               ├── anomaly_dirty_write
│               ├── anomaly_fuzzy_read
│               ├── anomaly_lost_update
│               ├── anomaly_phantom_read
│               ├── anomaly_read_skew
│               ├── anomaly_write_skew
│               ├── bank
│               ├── begin
│               ├── begin_as_of
│               ├── begin_readonly
│               ├── delete
│               ├── delete_conflict
│               ├── get
│               ├── get_isolation
│               ├── resume
│               ├── rollback
│               ├── scan
│               ├── scan_isolation
│               ├── scan_key_version_encoding
│               ├── scan_prefix
│               ├── set
│               ├── set_conflict
│               └── unversioned
└── tests/
    ├── scripts/
    │   ├── anomalies
    │   ├── client
    │   ├── errors
    │   ├── isolation
    │   └── queries
    ├── testcluster.rs
    └── tests.rs

Download .txt

SYMBOL INDEX (962 symbols across 40 files)

FILE: docs/tools/update-links.py
  function get_latest_sha (line 10) | def get_latest_sha(owner, repo, path, token):
  function process_markdown (line 21) | def process_markdown(text, token):
  function main (line 41) | def main():

FILE: src/bin/toydb.rs
  function main (line 24) | fn main() {
  type Config (line 33) | struct Config {
    method load (line 63) | fn load(file: &str) -> Result<Self> {
  type Command (line 85) | struct Command {
    method run (line 93) | fn run(self) -> Result<()> {

FILE: src/bin/toydump.rs
  function main (line 13) | fn main() {
  type Command (line 22) | struct Command {
    method run (line 35) | fn run(self) -> Result<()> {

FILE: src/bin/toysql.rs
  function main (line 23) | fn main() {
  type Command (line 32) | struct Command {
    method run (line 46) | fn run(self) -> Result<()> {
  type Shell (line 56) | struct Shell {
    method new (line 69) | fn new(host: &str, port: u16) -> Result<Self> {
    method execute (line 84) | fn execute(&mut self, input: &str) -> Result<()> {
    method execute_command (line 95) | fn execute_command(&mut self, input: &str) -> Result<()> {
    method execute_sql (line 176) | fn execute_sql(&mut self, statement: &str) -> Result<()> {
    method prompt (line 207) | fn prompt(&mut self) -> rustyline::Result<String> {
    method run (line 217) | fn run(&mut self) -> Result<()> {
  type InputValidator (line 258) | struct InputValidator;
  method validate (line 261) | fn validate(&self, ctx: &mut ValidationContext) -> rustyline::Result<Val...
  method validate_while_typing (line 276) | fn validate_while_typing(&self) -> bool {

FILE: src/bin/workload.rs
  function main (line 28) | fn main() {
  type Command (line 43) | struct Command {
  type Subcommand (line 52) | enum Subcommand {
  type Runner (line 60) | struct Runner {
    method run (line 85) | fn run<W: Workload>(self, workload: W) -> Result<()> {
  type Workload (line 184) | trait Workload: std::fmt::Display {
    method prepare (line 189) | fn prepare(&self, client: &mut Client, rng: &mut StdRng) -> Result<()>;
    method generate (line 192) | fn generate(&self, rng: StdRng) -> Result<impl Iterator<Item = Self::I...
    method execute (line 196) | fn execute(client: &mut Client, item: &Self::Item) -> Result<()>;
    method verify (line 199) | fn verify(&self, _client: &mut Client, _txns: usize) -> Result<()> {
    type Item (line 230) | type Item = HashSet<u64>;
    method prepare (line 232) | fn prepare(&self, client: &mut Client, rng: &mut StdRng) -> Result<()> {
    method generate (line 253) | fn generate(&self, rng: StdRng) -> Result<impl Iterator<Item = Self::I...
    method execute (line 261) | fn execute(client: &mut Client, item: &Self::Item) -> Result<()> {
    method verify (line 272) | fn verify(&self, client: &mut Client, _: usize) -> Result<()> {
    type Item (line 324) | type Item = Vec<(u64, String)>;
    method prepare (line 326) | fn prepare(&self, client: &mut Client, _: &mut StdRng) -> Result<()> {
    method generate (line 334) | fn generate(&self, rng: StdRng) -> Result<impl Iterator<Item = Self::I...
    method execute (line 338) | fn execute(client: &mut Client, item: &Self::Item) -> Result<()> {
    method verify (line 352) | fn verify(&self, client: &mut Client, txns: usize) -> Result<()> {
    type Item (line 415) | type Item = (u64, u64, u64);
    method prepare (line 417) | fn prepare(&self, client: &mut Client, rng: &mut StdRng) -> Result<()> {
    method generate (line 461) | fn generate(&self, rng: StdRng) -> Result<impl Iterator<Item = Self::I...
    method execute (line 473) | fn execute(client: &mut Client, item: &Self::Item) -> Result<()> {
    method verify (line 518) | fn verify(&self, client: &mut Client, _: usize) -> Result<()> {
  type Read (line 209) | struct Read {
    method fmt (line 224) | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
  type ReadGenerator (line 280) | struct ReadGenerator {
  type Item (line 287) | type Item = <Read as Workload>::Item;
  method next (line 289) | fn next(&mut self) -> Option<Self::Item> {
  type Write (line 307) | struct Write {
    method fmt (line 318) | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
  type WriteGenerator (line 361) | struct WriteGenerator {
  type Item (line 369) | type Item = <Write as Workload>::Item;
  method next (line 371) | fn next(&mut self) -> Option<Self::Item> {
  type Bank (line 390) | struct Bank {
    method fmt (line 409) | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {

FILE: src/client.rs
  type Client (line 17) | pub struct Client {
    method connect (line 28) | pub fn connect(addr: impl ToSocketAddrs) -> Result<Self> {
    method request (line 36) | fn request(&mut self, request: Request) -> Result<Response> {
    method execute (line 43) | pub fn execute(&mut self, statement: &str) -> Result<StatementResult> {
    method get_table (line 59) | pub fn get_table(&mut self, table: &str) -> Result<Table> {
    method list_tables (line 67) | pub fn list_tables(&mut self) -> Result<Vec<String>> {
    method status (line 75) | pub fn status(&mut self) -> Result<Status> {
    method txn (line 83) | pub fn txn(&self) -> Option<&mvcc::TransactionState> {
    method with_retry (line 91) | pub fn with_retry<T>(&mut self, f: impl Fn(&mut Client) -> Result<T>) ...

FILE: src/encoding/bincode.rs
  constant CONFIG (line 16) | const CONFIG: bincode::config::Configuration = bincode::config::standard();
  function serialize (line 19) | pub fn serialize<T: Serialize>(value: &T) -> Vec<u8> {
  function deserialize (line 25) | pub fn deserialize<'de, T: Deserialize<'de>>(bytes: &'de [u8]) -> Result...
  function serialize_into (line 30) | pub fn serialize_into<W: Write, T: Serialize>(mut writer: W, value: &T) ...
  function deserialize_from (line 36) | pub fn deserialize_from<R: Read, T: DeserializeOwned>(mut reader: R) -> ...
  function maybe_deserialize_from (line 42) | pub fn maybe_deserialize_from<R: Read, T: DeserializeOwned>(mut reader: ...

FILE: src/encoding/format.rs
  type Formatter (line 16) | pub trait Formatter {
    method key (line 18) | fn key(key: &[u8]) -> String;
    method value (line 21) | fn value(key: &[u8], value: &[u8]) -> String;
    method key_value (line 24) | fn key_value(key: &[u8], value: &[u8]) -> String {
    method key_maybe_value (line 29) | fn key_maybe_value(key: &[u8], value: Option<&[u8]>) -> String {
    method key (line 48) | fn key(key: &[u8]) -> String {
    method value (line 52) | fn value(_key: &[u8], value: &[u8]) -> String {
    method key (line 69) | fn key(key: &[u8]) -> String {
    method value (line 76) | fn value(key: &[u8], value: &[u8]) -> String {
    method key (line 108) | fn key(key: &[u8]) -> String {
    method value (line 128) | fn value(key: &[u8], value: &[u8]) -> String {
    method key (line 174) | fn key(key: &[u8]) -> String {
    method value (line 193) | fn value(key: &[u8], value: &[u8]) -> String {
    method key (line 231) | fn key(_key: &[u8]) -> String {
    method value (line 236) | fn value(_key: &[u8], value: &[u8]) -> String {
  type Raw (line 37) | pub struct Raw;
    method bytes (line 41) | pub fn bytes(bytes: &[u8]) -> String {
  type Raft (line 58) | pub struct Raft<F: Formatter>(PhantomData<F>);
  function entry (line 62) | pub fn entry(entry: &raft::Entry) -> String {
  type MVCC (line 105) | pub struct MVCC<F: Formatter>(PhantomData<F>);
  type SQL (line 157) | pub struct SQL;
    method values (line 161) | fn values(values: impl IntoIterator<Item = sql::types::Value>) -> Stri...
    method schema (line 166) | fn schema(table: sql::types::Table) -> String {
  type SQLCommand (line 228) | pub struct SQLCommand;

FILE: src/encoding/keycode.rs
  function serialize (line 58) | pub fn serialize<T: Serialize>(key: &T) -> Vec<u8> {
  function deserialize (line 66) | pub fn deserialize<'a, T: Deserialize<'a>>(input: &'a [u8]) -> Result<T> {
  function prefix_range (line 85) | pub fn prefix_range(prefix: &[u8]) -> (Bound<Vec<u8>>, Bound<Vec<u8>>) {
  type Serializer (line 97) | struct Serializer {
  type Ok (line 102) | type Ok = ();
  type Error (line 103) | type Error = Error;
  type SerializeSeq (line 105) | type SerializeSeq = Self;
    type Ok (line 297) | type Ok = ();
    type Error (line 298) | type Error = Error;
    method serialize_element (line 300) | fn serialize_element<T: Serialize + ?Sized>(&mut self, value: &T) -> R...
    method end (line 304) | fn end(self) -> Result<()> {
  type SerializeTuple (line 106) | type SerializeTuple = Self;
    type Ok (line 311) | type Ok = ();
    type Error (line 312) | type Error = Error;
    method serialize_element (line 314) | fn serialize_element<T: Serialize + ?Sized>(&mut self, value: &T) -> R...
    method end (line 318) | fn end(self) -> Result<()> {
  type SerializeTupleVariant (line 107) | type SerializeTupleVariant = Self;
    type Ok (line 325) | type Ok = ();
    type Error (line 326) | type Error = Error;
    method serialize_field (line 328) | fn serialize_field<T: Serialize + ?Sized>(&mut self, value: &T) -> Res...
    method end (line 332) | fn end(self) -> Result<()> {
  type SerializeTupleStruct (line 108) | type SerializeTupleStruct = Impossible<(), Error>;
  type SerializeMap (line 109) | type SerializeMap = Impossible<(), Error>;
  type SerializeStruct (line 110) | type SerializeStruct = Impossible<(), Error>;
  type SerializeStructVariant (line 111) | type SerializeStructVariant = Impossible<(), Error>;
  function serialize_bool (line 114) | fn serialize_bool(self, v: bool) -> Result<()> {
  function serialize_i8 (line 119) | fn serialize_i8(self, _: i8) -> Result<()> {
  function serialize_i16 (line 123) | fn serialize_i16(self, _: i16) -> Result<()> {
  function serialize_i32 (line 127) | fn serialize_i32(self, _: i32) -> Result<()> {
  function serialize_i64 (line 138) | fn serialize_i64(self, v: i64) -> Result<()> {
  function serialize_u8 (line 145) | fn serialize_u8(self, _: u8) -> Result<()> {
  function serialize_u16 (line 149) | fn serialize_u16(self, _: u16) -> Result<()> {
  function serialize_u32 (line 153) | fn serialize_u32(self, _: u32) -> Result<()> {
  function serialize_u64 (line 158) | fn serialize_u64(self, v: u64) -> Result<()> {
  function serialize_f32 (line 163) | fn serialize_f32(self, _: f32) -> Result<()> {
  function serialize_f64 (line 171) | fn serialize_f64(self, v: f64) -> Result<()> {
  function serialize_char (line 181) | fn serialize_char(self, _: char) -> Result<()> {
  function serialize_str (line 186) | fn serialize_str(self, v: &str) -> Result<()> {
  function serialize_bytes (line 195) | fn serialize_bytes(self, v: &[u8]) -> Result<()> {
  function serialize_none (line 207) | fn serialize_none(self) -> Result<()> {
  function serialize_some (line 211) | fn serialize_some<T: Serialize + ?Sized>(self, _: &T) -> Result<()> {
  function serialize_unit (line 215) | fn serialize_unit(self) -> Result<()> {
  function serialize_unit_struct (line 219) | fn serialize_unit_struct(self, _: &'static str) -> Result<()> {
  function serialize_unit_variant (line 224) | fn serialize_unit_variant(self, _: &'static str, index: u32, _: &'static...
  function serialize_newtype_struct (line 229) | fn serialize_newtype_struct<T: Serialize + ?Sized>(self, _: &'static str...
  function serialize_newtype_variant (line 234) | fn serialize_newtype_variant<T: Serialize + ?Sized>(
  function serialize_seq (line 246) | fn serialize_seq(self, _: Option<usize>) -> Result<Self::SerializeSeq> {
  function serialize_tuple (line 251) | fn serialize_tuple(self, _: usize) -> Result<Self::SerializeTuple> {
  function serialize_tuple_struct (line 255) | fn serialize_tuple_struct(
  function serialize_tuple_variant (line 265) | fn serialize_tuple_variant(
  function serialize_map (line 276) | fn serialize_map(self, _: Option<usize>) -> Result<Self::SerializeMap> {
  function serialize_struct (line 280) | fn serialize_struct(self, _: &'static str, _: usize) -> Result<Self::Ser...
  function serialize_struct_variant (line 284) | fn serialize_struct_variant(
  type Deserializer (line 340) | pub struct Deserializer<'de> {
  function from_bytes (line 346) | pub fn from_bytes(input: &'de [u8]) -> Self {
  function take_bytes (line 352) | fn take_bytes(&mut self, len: usize) -> Result<&[u8]> {
  function decode_next_bytes (line 362) | fn decode_next_bytes(&mut self) -> Result<Vec<u8>> {
  type Error (line 383) | type Error = Error;
  function deserialize_any (line 385) | fn deserialize_any<V: Visitor<'de>>(self, _: V) -> Result<V::Value> {
  function deserialize_bool (line 389) | fn deserialize_bool<V: Visitor<'de>>(self, visitor: V) -> Result<V::Valu...
  function deserialize_i8 (line 397) | fn deserialize_i8<V: Visitor<'de>>(self, _: V) -> Result<V::Value> {
  function deserialize_i16 (line 401) | fn deserialize_i16<V: Visitor<'de>>(self, _: V) -> Result<V::Value> {
  function deserialize_i32 (line 405) | fn deserialize_i32<V: Visitor<'de>>(self, _: V) -> Result<V::Value> {
  function deserialize_i64 (line 409) | fn deserialize_i64<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value> {
  function deserialize_u8 (line 415) | fn deserialize_u8<V: Visitor<'de>>(self, _: V) -> Result<V::Value> {
  function deserialize_u16 (line 419) | fn deserialize_u16<V: Visitor<'de>>(self, _: V) -> Result<V::Value> {
  function deserialize_u32 (line 423) | fn deserialize_u32<V: Visitor<'de>>(self, _: V) -> Result<V::Value> {
  function deserialize_u64 (line 427) | fn deserialize_u64<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value> {
  function deserialize_f32 (line 431) | fn deserialize_f32<V: Visitor<'de>>(self, _: V) -> Result<V::Value> {
  function deserialize_f64 (line 435) | fn deserialize_f64<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value> {
  function deserialize_char (line 445) | fn deserialize_char<V: Visitor<'de>>(self, _: V) -> Result<V::Value> {
  function deserialize_str (line 449) | fn deserialize_str<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value> {
  function deserialize_string (line 454) | fn deserialize_string<V: Visitor<'de>>(self, visitor: V) -> Result<V::Va...
  function deserialize_bytes (line 459) | fn deserialize_bytes<V: Visitor<'de>>(self, visitor: V) -> Result<V::Val...
  function deserialize_byte_buf (line 464) | fn deserialize_byte_buf<V: Visitor<'de>>(self, visitor: V) -> Result<V::...
  function deserialize_option (line 469) | fn deserialize_option<V: Visitor<'de>>(self, _: V) -> Result<V::Value> {
  function deserialize_unit (line 473) | fn deserialize_unit<V: Visitor<'de>>(self, _: V) -> Result<V::Value> {
  function deserialize_unit_struct (line 477) | fn deserialize_unit_struct<V: Visitor<'de>>(self, _: &'static str, _: V)...
  function deserialize_newtype_struct (line 481) | fn deserialize_newtype_struct<V: Visitor<'de>>(
  function deserialize_seq (line 489) | fn deserialize_seq<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value> {
  function deserialize_tuple (line 493) | fn deserialize_tuple<V: Visitor<'de>>(self, _: usize, visitor: V) -> Res...
  function deserialize_tuple_struct (line 497) | fn deserialize_tuple_struct<V: Visitor<'de>>(
  function deserialize_map (line 506) | fn deserialize_map<V: Visitor<'de>>(self, _: V) -> Result<V::Value> {
  function deserialize_struct (line 510) | fn deserialize_struct<V: Visitor<'de>>(
  function deserialize_enum (line 519) | fn deserialize_enum<V: Visitor<'de>>(
  function deserialize_identifier (line 528) | fn deserialize_identifier<V: Visitor<'de>>(self, _: V) -> Result<V::Valu...
  function deserialize_ignored_any (line 532) | fn deserialize_ignored_any<V: Visitor<'de>>(self, _: V) -> Result<V::Val...
  type Error (line 539) | type Error = Error;
  function next_element_seed (line 541) | fn next_element_seed<T: DeserializeSeed<'de>>(&mut self, seed: T) -> Res...
  type Error (line 551) | type Error = Error;
  type Variant (line 552) | type Variant = Self;
  function variant_seed (line 554) | fn variant_seed<V: DeserializeSeed<'de>>(self, seed: V) -> Result<(V::Va...
  type Error (line 563) | type Error = Error;
  function unit_variant (line 565) | fn unit_variant(self) -> Result<()> {
  function newtype_variant_seed (line 569) | fn newtype_variant_seed<T: DeserializeSeed<'de>>(self, seed: T) -> Resul...
  function tuple_variant (line 573) | fn tuple_variant<V: Visitor<'de>>(self, _: usize, visitor: V) -> Result<...
  function struct_variant (line 577) | fn struct_variant<V: Visitor<'de>>(self, _: &'static [&'static str], _: ...
  type Key (line 595) | enum Key<'a> {

FILE: src/encoding/mod.rs
  type Key (line 22) | pub trait Key<'de>: Serialize + Deserialize<'de> {
    method decode (line 24) | fn decode(bytes: &'de [u8]) -> Result<Self> {
    method encode (line 34) | fn encode(&self) -> Vec<u8> {
  type Value (line 42) | pub trait Value: Serialize + DeserializeOwned {
    method decode (line 44) | fn decode(bytes: &[u8]) -> Result<Self> {
    method decode_from (line 49) | fn decode_from<R: Read>(reader: R) -> Result<Self> {
    method maybe_decode_from (line 55) | fn maybe_decode_from<R: Read>(reader: R) -> Result<Option<Self>> {
    method encode (line 60) | fn encode(&self) -> Vec<u8> {
    method encode_into (line 65) | fn encode_into<W: Write>(&self, writer: W) -> Result<()> {

FILE: src/error.rs
  type Error (line 7) | pub enum Error {
    method is_deterministic (line 46) | pub fn is_deterministic(&self) -> bool {
    method custom (line 89) | fn custom<T: Display>(msg: T) -> Self {
    method custom (line 95) | fn custom<T: Display>(msg: T) -> Self {
    method from (line 101) | fn from(err: bincode::error::DecodeError) -> Self {
    method from (line 107) | fn from(err: bincode::error::EncodeError) -> Self {
    method from (line 113) | fn from(err: config::ConfigError) -> Self {
    method from (line 119) | fn from(err: crossbeam::channel::RecvError) -> Self {
    method from (line 125) | fn from(err: crossbeam::channel::SendError<T>) -> Self {
    method from (line 131) | fn from(err: crossbeam::channel::TryRecvError) -> Self {
    method from (line 137) | fn from(err: crossbeam::channel::TrySendError<T>) -> Self {
    method from (line 143) | fn from(err: hdrhistogram::CreationError) -> Self {
    method from (line 149) | fn from(err: hdrhistogram::RecordError) -> Self {
    method from (line 155) | fn from(err: log::ParseLevelError) -> Self {
    method from (line 161) | fn from(err: log::SetLoggerError) -> Self {
    method from (line 167) | fn from(err: rand::distr::uniform::Error) -> Self {
    method from (line 173) | fn from(err: regex::Error) -> Self {
    method from (line 179) | fn from(err: rustyline::error::ReadlineError) -> Self {
    method from (line 185) | fn from(err: std::array::TryFromSliceError) -> Self {
    method from (line 191) | fn from(err: std::io::Error) -> Self {
    method from (line 197) | fn from(err: std::num::ParseFloatError) -> Self {
    method from (line 203) | fn from(err: std::num::ParseIntError) -> Self {
    method from (line 209) | fn from(err: std::num::TryFromIntError) -> Self {
    method from (line 215) | fn from(err: std::string::FromUtf8Error) -> Self {
    method from (line 221) | fn from(err: std::sync::PoisonError<T>) -> Self {
  method fmt (line 28) | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
  type Result (line 80) | pub type Result<T> = std::result::Result<T, Error>;
  function from (line 83) | fn from(error: Error) -> Self {

FILE: src/raft/log.rs
  type Index (line 11) | pub type Index = u64;
  type Entry (line 15) | pub struct Entry {
  type Key (line 32) | pub enum Key {
  type Log (line 92) | pub struct Log {
    method new (line 120) | pub fn new(mut engine: Box<dyn storage::Engine>) -> Result<Self> {
    method enable_fsync (line 150) | pub fn enable_fsync(&mut self, fsync: bool) {
    method get_commit_index (line 155) | pub fn get_commit_index(&self) -> (Index, Term) {
    method get_last_index (line 160) | pub fn get_last_index(&self) -> (Index, Term) {
    method get_term_vote (line 165) | pub fn get_term_vote(&self) -> (Term, Option<NodeID>) {
    method set_term_vote (line 172) | pub fn set_term_vote(&mut self, term: Term, vote: Option<NodeID>) -> R...
    method append (line 193) | pub fn append(&mut self, command: Option<Vec<u8>>) -> Result<Index> {
    method commit (line 207) | pub fn commit(&mut self, index: Index) -> Result<Index> {
    method get (line 225) | pub fn get(&mut self, index: Index) -> Result<Option<Entry>> {
    method has (line 230) | pub fn has(&mut self, index: Index, term: Term) -> Result<bool> {
    method scan (line 243) | pub fn scan(&mut self, range: impl RangeBounds<Index>) -> Iterator<'_> {
    method scan_apply (line 259) | pub fn scan_apply(&mut self, applied_index: Index) -> Iterator<'_> {
    method splice (line 278) | pub fn splice(&mut self, entries: Vec<Entry>) -> Result<Index> {
    method status (line 346) | pub fn status(&mut self) -> Result<storage::Status> {
  type Iterator (line 352) | pub struct Iterator<'a> {
  function new (line 357) | fn new(inner: Box<dyn storage::ScanIterator + 'a>) -> Self {
  type Item (line 363) | type Item = Result<Entry>;
  function next (line 365) | fn next(&mut self) -> Option<Self::Item> {
  function test_goldenscript (line 390) | fn test_goldenscript(path: &std::path::Path) {
  type TestRunner (line 395) | struct TestRunner {
    method new (line 403) | fn new() -> Self {
    method parse_index_term (line 417) | fn parse_index_term(s: &str) -> Result<(Index, Term), Box<dyn Error>> {
    method parse_index_range (line 426) | fn parse_index_range(s: &str) -> Result<impl RangeBounds<Index>, Box<d...
    method run (line 447) | fn run(&mut self, command: &goldenscript::Command) -> Result<String, B...
    method end_command (line 639) | fn end_command(&mut self, _: &goldenscript::Command) -> Result<String,...

FILE: src/raft/message.rs
  type Envelope (line 12) | pub struct Envelope {
  type Message (line 33) | pub enum Message {
  type RequestID (line 159) | pub type RequestID = uuid::Uuid;
  type ReadSequence (line 162) | pub type ReadSequence = u64;
  type Request (line 166) | pub enum Request {
  type Response (line 181) | pub enum Response {
  type Status (line 194) | pub struct Status {

FILE: src/raft/mod.rs
  constant TICK_INTERVAL (line 256) | pub const TICK_INTERVAL: Duration = Duration::from_millis(100);
  constant HEARTBEAT_INTERVAL (line 259) | const HEARTBEAT_INTERVAL: Ticks = 4;
  constant ELECTION_TIMEOUT_RANGE (line 263) | const ELECTION_TIMEOUT_RANGE: Range<Ticks> = 10..20;
  constant MAX_APPEND_ENTRIES (line 266) | const MAX_APPEND_ENTRIES: usize = 100;

FILE: src/raft/node.rs
  type NodeID (line 18) | pub type NodeID = u8;
  type Term (line 21) | pub type Term = u64;
  type Ticks (line 24) | pub type Ticks = u8;
  type Options (line 28) | pub struct Options {
  method default (line 38) | fn default() -> Self {
  type Node (line 59) | pub enum Node {
    method new (line 73) | pub fn new(
    method id (line 90) | pub fn id(&self) -> NodeID {
    method term (line 99) | pub fn term(&self) -> Term {
    method step (line 108) | pub fn step(self, msg: Envelope) -> Result<Self> {
    method tick (line 126) | pub fn tick(self) -> Result<Self> {
    method from (line 136) | fn from(node: RawNode<Candidate>) -> Self {
    method from (line 142) | fn from(node: RawNode<Follower>) -> Self {
    method from (line 148) | fn from(node: RawNode<Leader>) -> Self {
    method dismantle (line 1261) | fn dismantle(self) -> (Log, Box<dyn State>) {
    method get_applied_index (line 1265) | fn get_applied_index(&self) -> Index {
    method get_commit_index (line 1269) | fn get_commit_index(&self) -> (Index, Term) {
    method get_last_index (line 1273) | fn get_last_index(&self) -> (Index, Term) {
    method get_term_vote (line 1277) | fn get_term_vote(&self) -> (Term, Option<NodeID>) {
    method options (line 1281) | fn options(&self) -> Options {
    method peers (line 1285) | fn peers(&self) -> HashSet<NodeID> {
    method read (line 1289) | fn read(&self, command: Vec<u8>) -> crate::error::Result<Vec<u8>> {
    method scan_log (line 1293) | fn scan_log(&mut self) -> crate::error::Result<Vec<Entry>> {
  type Role (line 154) | pub trait Role {}
  type RawNode (line 160) | pub struct RawNode<R: Role> {
  function into_role (line 181) | fn into_role<T: Role>(self, role: T) -> RawNode<T> {
  function term (line 194) | fn term(&self) -> Term {
  function cluster_size (line 199) | fn cluster_size(&self) -> usize {
  function quorum_size (line 204) | fn quorum_size(&self) -> usize {
  function quorum_value (line 210) | fn quorum_value<T: Ord + Copy>(&self, mut values: Vec<T>) -> T {
  function random_election_timeout (line 216) | fn random_election_timeout(&self) -> Ticks {
  function send (line 221) | fn send(&self, to: NodeID, message: Message) -> Result<()> {
  function send_via (line 227) | fn send_via(tx: &Sender<Envelope>, msg: Envelope) -> Result<()> {
  function broadcast (line 233) | fn broadcast(&self, message: Message) -> Result<()> {
  type Follower (line 245) | pub struct Follower {
    method new (line 259) | fn new(leader: Option<NodeID>, election_timeout: Ticks) -> Self {
  function new (line 268) | fn new(
  function into_candidate (line 294) | fn into_candidate(mut self) -> Result<RawNode<Candidate>> {
  function into_follower (line 316) | fn into_follower(mut self, term: Term, leader: Option<NodeID>) -> Result...
  function step (line 341) | fn step(mut self, msg: Envelope) -> Result<Node> {
  function tick (line 490) | fn tick(mut self) -> Result<Node> {
  function abort_forwarded (line 500) | fn abort_forwarded(&mut self) -> Result<()> {
  function maybe_apply (line 510) | fn maybe_apply(&mut self) -> Result<()> {
  type Candidate (line 524) | pub struct Candidate {
    method new (line 535) | fn new(election_timeout: Ticks) -> Self {
  function into_follower (line 546) | fn into_follower(mut self, term: Term, leader: Option<NodeID>) -> Result...
  function into_leader (line 564) | fn into_leader(self) -> Result<RawNode<Leader>> {
  function step (line 586) | fn step(mut self, msg: Envelope) -> Result<Node> {
  function tick (line 638) | fn tick(mut self) -> Result<Node> {
  function campaign (line 649) | fn campaign(&mut self) -> Result<()> {
  type Leader (line 663) | pub struct Leader {
    method new (line 754) | fn new(peers: HashSet<NodeID>, last_index: Index) -> Self {
  type Progress (line 683) | struct Progress {
    method advance (line 703) | fn advance(&mut self, match_index: Index) -> bool {
    method advance_read (line 713) | fn advance_read(&mut self, read_seq: ReadSequence) -> bool {
    method regress_next (line 723) | fn regress_next(&mut self, next_index: Index) -> bool {
  type Write (line 733) | struct Write {
  type Read (line 741) | struct Read {
  function into_follower (line 776) | fn into_follower(mut self, term: Term) -> Result<RawNode<Follower>> {
  function step (line 797) | fn step(mut self, msg: Envelope) -> Result<Node> {
  function tick (line 946) | fn tick(mut self) -> Result<Node> {
  function heartbeat (line 956) | fn heartbeat(&mut self) -> Result<()> {
  function propose (line 969) | fn propose(&mut self, command: Option<Vec<u8>>) -> Result<Index> {
  function maybe_commit_and_apply (line 984) | fn maybe_commit_and_apply(&mut self) -> Result<Index> {
  function maybe_read (line 1036) | fn maybe_read(&mut self) -> Result<()> {
  function maybe_send_append (line 1081) | fn maybe_send_append(&mut self, peer: NodeID, mut probe: bool) -> Result...
  function status (line 1131) | fn status(&mut self) -> Result<Status> {
  function progress (line 1149) | fn progress(&mut self, id: NodeID) -> &mut Progress {
  function test_goldenscript (line 1179) | fn test_goldenscript(path: &Path) {
  function quorum_size (line 1192) | fn quorum_size(size: usize) -> usize {
  function quorum_value (line 1204) | fn quorum_value(values: Vec<i8>) -> i8 {
  function new_noop (line 1214) | fn new_noop(id: NodeID, peers: HashSet<NodeID>) -> Self {
  type TestRunner (line 1299) | struct TestRunner {
    method run (line 1323) | fn run(&mut self, command: &goldenscript::Command) -> Result<String, B...
    method new (line 1497) | fn new() -> Self {
    method add_node (line 1512) | fn add_node(
    method add_node_with (line 1530) | fn add_node_with(
    method campaign (line 1550) | fn campaign(&mut self, ids: &[NodeID], output: &mut String) -> Result<...
    method cluster (line 1569) | fn cluster(
    method deliver (line 1613) | fn deliver(
    method heal (line 1643) | fn heal(&mut self, ids: &[NodeID], output: &mut String) -> Result<(), ...
    method heartbeat (line 1655) | fn heartbeat(&mut self, ids: &[NodeID], output: &mut String) -> Result...
    method log (line 1667) | fn log(&mut self, ids: &[NodeID], output: &mut String) -> Result<(), B...
    method partition (line 1688) | fn partition(&mut self, ids: &[NodeID], output: &mut String) -> Result...
    method receive (line 1702) | fn receive(&mut self, id: NodeID, output: &mut String) -> Result<u32, ...
    method request (line 1744) | fn request(
    method restart (line 1767) | fn restart(
    method stabilize (line 1825) | fn stabilize(
    method state (line 1850) | fn state(&mut self, ids: &[NodeID], output: &mut String) -> Result<(),...
    method status (line 1868) | fn status(&self, ids: &[NodeID], output: &mut String) -> Result<(), Bo...
    method transition (line 1896) | fn transition(
    method parse_ids (line 1945) | fn parse_ids<A>(&self, args: &[A]) -> Result<Vec<NodeID>, Box<dyn Error>>
    method parse_ids_or_all (line 1965) | fn parse_ids_or_all<A>(&self, args: &[A]) -> Result<Vec<NodeID>, Box<d...
    method parse_ids_or_error (line 1977) | fn parse_ids_or_error<A>(&self, args: &[A]) -> Result<Vec<NodeID>, Box...
    method format_disconnected (line 1989) | fn format_disconnected(disconnected: &HashMap<NodeID, HashSet<NodeID>>...
    method format_entry (line 2065) | fn format_entry(entry: &Entry) -> String {
    method format_message (line 2074) | fn format_message(msg: &Message) -> String {
    method format_node (line 2135) | fn format_node(node: &Node) -> String {
    method format_node_role (line 2140) | fn format_node_role(node: &Node) -> String {
    method format_request (line 2153) | fn format_request(request: &Request) -> String {
    method format_response (line 2161) | fn format_response(response: &crate::error::Result<Response>) -> String {
    method format_strikethrough (line 2172) | fn format_strikethrough(s: &str) -> String {

FILE: src/raft/state.rs
  type State (line 19) | pub trait State: Send {
    method get_applied_index (line 26) | fn get_applied_index(&self) -> Index;
    method apply (line 43) | fn apply(&mut self, entry: Entry) -> Result<Vec<u8>>;
    method read (line 50) | fn read(&self, command: Vec<u8>) -> Result<Vec<u8>>;
    method get_applied_index (line 79) | fn get_applied_index(&self) -> Index {
    method apply (line 83) | fn apply(&mut self, entry: Entry) -> Result<Vec<u8>> {
    method read (line 89) | fn read(&self, command: Vec<u8>) -> Result<Vec<u8>> {
    method get_applied_index (line 107) | fn get_applied_index(&self) -> Index {
    method apply (line 111) | fn apply(&mut self, entry: Entry) -> Result<Vec<u8>> {
    method read (line 127) | fn read(&self, command: Vec<u8>) -> Result<Vec<u8>> {
    method get_applied_index (line 199) | fn get_applied_index(&self) -> Index {
    method apply (line 203) | fn apply(&mut self, entry: Entry) -> Result<Vec<u8>> {
    method read (line 208) | fn read(&self, _: Vec<u8>) -> Result<Vec<u8>> {
  type Emit (line 67) | pub struct Emit {
    method new (line 73) | pub fn new(inner: Box<dyn State>, tx: Sender<Entry>) -> Box<Self> {
  type KV (line 95) | pub struct KV {
    method new (line 101) | pub fn new() -> Box<Self> {
  type KVCommand (line 140) | pub enum KVCommand {
  method fmt (line 152) | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
  type KVResponse (line 163) | pub enum KVResponse {
  method fmt (line 175) | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
  type Noop (line 188) | pub struct Noop {
    method new (line 193) | pub fn new() -> Box<Self> {

FILE: src/server.rs
  constant RAFT_PEER_CHANNEL_CAPACITY (line 22) | const RAFT_PEER_CHANNEL_CAPACITY: usize = 1000;
  constant RAFT_PEER_RETRY_INTERVAL (line 25) | const RAFT_PEER_RETRY_INTERVAL: Duration = Duration::from_secs(1);
  type Server (line 37) | pub struct Server {
    method new (line 48) | pub fn new(
    method serve (line 67) | pub fn serve(self, raft_addr: impl ToSocketAddrs, sql_addr: impl ToSoc...
    method raft_accept (line 114) | fn raft_accept(listener: TcpListener, raft_step_tx: Sender<raft::Envel...
    method raft_receive_peer (line 138) | fn raft_receive_peer(socket: TcpStream, raft_step_tx: Sender<raft::Env...
    method raft_send_peer (line 148) | fn raft_send_peer(addr: String, raft_node_rx: Receiver<raft::Envelope>) {
    method raft_route (line 186) | fn raft_route(
    method sql_accept (line 252) | fn sql_accept(id: raft::NodeID, listener: TcpListener, sql_engine: sql...
    method sql_session (line 276) | fn sql_session(
  type Request (line 314) | pub enum Request {
  type Response (line 329) | pub enum Response {
  type Status (line 341) | pub struct Status {

FILE: src/sql/engine/engine.rs
  type Engine (line 13) | pub trait Engine<'a>: Sized {
    method begin (line 19) | fn begin(&'a self) -> Result<Self::Transaction>;
    method begin_read_only (line 21) | fn begin_read_only(&'a self) -> Result<Self::Transaction>;
    method begin_as_of (line 23) | fn begin_as_of(&'a self, version: mvcc::Version) -> Result<Self::Trans...
    method session (line 26) | fn session(&'a self) -> Session<'a, Self> {
  type Transaction (line 37) | pub trait Transaction: Catalog {
    method state (line 39) | fn state(&self) -> &mvcc::TransactionState;
    method commit (line 42) | fn commit(self) -> Result<()>;
    method rollback (line 44) | fn rollback(self) -> Result<()>;
    method delete (line 47) | fn delete(&self, table: &str, ids: &[Value]) -> Result<()>;
    method get (line 49) | fn get(&self, table: &str, ids: &[Value]) -> Result<Vec<Row>>;
    method insert (line 51) | fn insert(&self, table: &str, rows: Vec<Row>) -> Result<()>;
    method lookup_index (line 53) | fn lookup_index(&self, table: &str, column: &str, values: &[Value]) ->...
    method scan (line 55) | fn scan(&self, table: &str, filter: Option<Expression>) -> Result<Rows>;
    method update (line 57) | fn update(&self, table: &str, rows: BTreeMap<Value, Row>) -> Result<()>;
  type Catalog (line 64) | pub trait Catalog {
    method create_table (line 66) | fn create_table(&self, table: Table) -> Result<()>;
    method drop_table (line 69) | fn drop_table(&self, table: &str, if_exists: bool) -> Result<bool>;
    method get_table (line 71) | fn get_table(&self, table: &str) -> Result<Option<Table>>;
    method list_tables (line 73) | fn list_tables(&self) -> Result<Vec<Table>>;
    method must_get_table (line 76) | fn must_get_table(&self, table: &str) -> Result<Table> {

FILE: src/sql/engine/local.rs
  type Key (line 22) | pub enum Key<'a> {
  type KeyPrefix (line 39) | enum KeyPrefix<'a> {
  type Local (line 53) | pub struct Local<E: storage::Engine + 'static> {
  function new (line 60) | pub fn new(engine: E) -> Self {
  function resume (line 68) | pub fn resume(&self, state: mvcc::TransactionState) -> Result<Transactio...
  function get_unversioned (line 73) | pub fn get_unversioned(&self, key: &[u8]) -> Result<Option<Vec<u8>>> {
  function set_unversioned (line 78) | pub fn set_unversioned(&self, key: &[u8], value: Vec<u8>) -> Result<()> {
  type Transaction (line 84) | type Transaction = Transaction<E>;
  function begin (line 86) | fn begin(&self) -> Result<Self::Transaction> {
  function begin_read_only (line 90) | fn begin_read_only(&self) -> Result<Self::Transaction> {
  function begin_as_of (line 94) | fn begin_as_of(&self, version: mvcc::Version) -> Result<Self::Transactio...
  type Transaction (line 100) | pub struct Transaction<E: storage::Engine + 'static> {
  function new (line 106) | fn new(txn: mvcc::Transaction<E>) -> Self {
  function state (line 111) | pub fn state(&self) -> &mvcc::TransactionState {
  function get_index (line 117) | fn get_index(&self, table: &str, column: &str, value: &Value) -> Result<...
  function get_row (line 128) | fn get_row(&self, table: &str, id: &Value) -> Result<Option<Row>> {
  function has_index (line 136) | fn has_index(&self, table: &str, column: &str) -> Result<bool> {
  function set_index (line 143) | fn set_index(
  function table_references (line 162) | fn table_references(&self, table: &str) -> Result<Vec<(Table, Vec<usize>...
  function state (line 182) | fn state(&self) -> &mvcc::TransactionState {
  function commit (line 186) | fn commit(self) -> Result<()> {
  function rollback (line 190) | fn rollback(self) -> Result<()> {
  function delete (line 194) | fn delete(&self, table: &str, ids: &[Value]) -> Result<()> {
  function get (line 248) | fn get(&self, table: &str, ids: &[Value]) -> Result<Vec<Row>> {
  function insert (line 252) | fn insert(&self, table: &str, rows: Vec<Row>) -> Result<()> {
  function lookup_index (line 270) | fn lookup_index(&self, table: &str, column: &str, values: &[Value]) -> R...
  function scan (line 275) | fn scan(&self, table: &str, filter: Option<Expression>) -> Result<Rows> {
  function update (line 296) | fn update(&self, table: &str, rows: BTreeMap<Value, Row>) -> Result<()> {
  method create_table (line 341) | fn create_table(&self, table: Table) -> Result<()> {
  method drop_table (line 349) | fn drop_table(&self, table: &str, if_exists: bool) -> Result<bool> {
  method get_table (line 390) | fn get_table(&self, table: &str) -> Result<Option<Table>> {
  method list_tables (line 394) | fn list_tables(&self) -> Result<Vec<Table>> {

FILE: src/sql/engine/raft.rs
  type Read (line 20) | pub enum Read<'a> {
  type Write (line 58) | pub enum Write<'a> {
  type Status (line 75) | pub struct Status {
  type Raft (line 92) | pub struct Raft {
    constant APPLIED_INDEX_KEY (line 100) | pub const APPLIED_INDEX_KEY: &'static [u8] = b"applied_index";
    method new (line 104) | pub fn new(tx: Sender<(raft::Request, Sender<Result<raft::Response>>)>...
    method new_state (line 110) | pub fn new_state<E: storage::Engine>(engine: E) -> Result<State<E>> {
    method request (line 115) | fn request(&self, request: raft::Request) -> Result<raft::Response> {
    method write (line 122) | fn write<V: DeserializeOwned>(&self, write: Write) -> Result<V> {
    method read (line 130) | fn read<V: DeserializeOwned>(&self, read: Read) -> Result<V> {
    method status (line 138) | pub fn status(&self) -> Result<Status> {
    type Transaction (line 149) | type Transaction = Transaction<'a>;
    method begin (line 151) | fn begin(&'a self) -> Result<Self::Transaction> {
    method begin_read_only (line 155) | fn begin_read_only(&'a self) -> Result<Self::Transaction> {
    method begin_as_of (line 159) | fn begin_as_of(&'a self, version: mvcc::Version) -> Result<Self::Trans...
  type Transaction (line 171) | pub struct Transaction<'a> {
  function begin (line 180) | fn begin(raft: &'a Raft, read_only: bool, as_of: Option<mvcc::Version>) ...
  function state (line 195) | fn state(&self) -> &mvcc::TransactionState {
  function commit (line 199) | fn commit(self) -> Result<()> {
  function rollback (line 206) | fn rollback(self) -> Result<()> {
  function delete (line 213) | fn delete(&self, table: &str, ids: &[Value]) -> Result<()> {
  function get (line 221) | fn get(&self, table: &str, ids: &[Value]) -> Result<Vec<Row>> {
  function insert (line 229) | fn insert(&self, table: &str, rows: Vec<Row>) -> Result<()> {
  function lookup_index (line 233) | fn lookup_index(&self, table: &str, column: &str, values: &[Value]) -> R...
  function scan (line 242) | fn scan(&self, table: &str, filter: Option<Expression>) -> Result<Rows> {
  function update (line 251) | fn update(&self, table: &str, rows: BTreeMap<Value, Row>) -> Result<()> {
  method create_table (line 257) | fn create_table(&self, schema: Table) -> Result<()> {
  method drop_table (line 261) | fn drop_table(&self, table: &str, if_exists: bool) -> Result<bool> {
  method get_table (line 269) | fn get_table(&self, table: &str) -> Result<Option<Table>> {
  method list_tables (line 273) | fn list_tables(&self) -> Result<Vec<Table>> {
  type State (line 286) | pub struct State<E: storage::Engine + 'static> {
  function new (line 296) | pub fn new(engine: E) -> Result<Self> {
  function write (line 311) | fn write(&self, command: Write) -> Result<Vec<u8>> {
  function get_applied_index (line 342) | fn get_applied_index(&self) -> raft::Index {
  function apply (line 346) | fn apply(&mut self, entry: raft::Entry) -> Result<Vec<u8>> {
  function read (line 369) | fn read(&self, command: Vec<u8>) -> Result<Vec<u8>> {

FILE: src/sql/execution/aggregator.rs
  type Aggregator (line 13) | pub struct Aggregator {
    method new (line 24) | pub fn new(group_by: Vec<Expression>, aggregates: Vec<Aggregate>) -> S...
    method add (line 29) | pub fn add(&mut self, row: &Row) -> Result<()> {
    method add_rows (line 51) | pub fn add_rows(&mut self, rows: Rows) -> Result<()> {
    method into_rows (line 59) | pub fn into_rows(self) -> Rows {
  type Accumulator (line 85) | enum Accumulator {
    method new (line 95) | fn new(aggregate: &Aggregate) -> Self {
    method add (line 106) | fn add(&mut self, value: Value) -> Result<()> {
    method value (line 127) | fn value(self) -> Result<Value> {

FILE: src/sql/execution/executor.rs
  type Executor (line 46) | pub struct Executor<'a, T: Transaction> {
  function new (line 53) | pub fn new(txn: &'a T) -> Self {
  function execute (line 58) | pub fn execute(&mut self, plan: Plan) -> Result<ExecutionResult> {
  function execute_node (line 104) | fn execute_node(&mut self, node: Node) -> Result<Rows> {
  function delete (line 216) | fn delete(&self, table: &str, primary_key: usize, source: Rows) -> Resul...
  function insert (line 230) | fn insert(
  function update (line 277) | fn update(
  function order (line 299) | fn order(source: Rows, order: Vec<(Expression, Direction)>) -> Result<Ro...
  type ExecutionResult (line 332) | pub enum ExecutionResult {

FILE: src/sql/execution/join.rs
  type NestedLoopJoiner (line 21) | pub struct NestedLoopJoiner {
    method new (line 41) | pub fn new(
    method try_next (line 54) | fn try_next(&mut self) -> Result<Option<Row>> {
  type Item (line 96) | type Item = Result<Row>;
  method next (line 98) | fn next(&mut self) -> Option<Self::Item> {
  type HashJoiner (line 112) | pub struct HashJoiner {
    method new (line 129) | pub fn new(
    method try_next (line 153) | fn try_next(&mut self) -> Result<Option<Row>> {
  type Item (line 186) | type Item = Result<Row>;
  method next (line 188) | fn next(&mut self) -> Option<Self::Item> {

FILE: src/sql/execution/session.rs
  type Session (line 16) | pub struct Session<'a, E: Engine<'a>> {
  function new (line 25) | pub fn new(engine: &'a E) -> Self {
  function execute (line 30) | pub fn execute(&mut self, statement: &str) -> Result<StatementResult> {
  function with_txn (line 90) | pub fn with_txn<F, T>(&mut self, read_only: bool, f: F) -> Result<T>
  function status (line 117) | pub fn status(&self) -> Result<Status> {
  method drop (line 124) | fn drop(&mut self) {
  type StatementResult (line 134) | pub enum StatementResult {
    type Error (line 152) | type Error = Error;
    method try_from (line 154) | fn try_from(result: ExecutionResult) -> Result<Self> {
  type Error (line 171) | type Error = Error;
  method try_from (line 173) | fn try_from(result: StatementResult) -> Result<Self> {
  type Error (line 183) | type Error = Error;
  method try_from (line 185) | fn try_from(result: StatementResult) -> Result<Self> {
  type Error (line 193) | type Error = Error;
  method try_from (line 195) | fn try_from(result: StatementResult) -> Result<Self> {
  type Error (line 203) | type Error = Error;
  function try_from (line 205) | fn try_from(result: StatementResult) -> Result<Self> {
  type Error (line 212) | type Error = Error;
  function try_from (line 214) | fn try_from(result: StatementResult) -> Result<Self> {
  type Error (line 221) | type Error = Error;
  function try_from (line 223) | fn try_from(result: StatementResult) -> Result<Self> {
  type Error (line 230) | type Error = Error;
  method try_from (line 232) | fn try_from(result: StatementResult) -> Result<Self> {

FILE: src/sql/mod.rs
  function test_goldenscript (line 108) | fn test_goldenscript(path: &Path) {
  function test_goldenscript_expr (line 125) | fn test_goldenscript_expr(path: &Path) {
  type SQLRunner (line 130) | struct SQLRunner<'a> {
  type TestEngine (line 136) | type TestEngine =
  function new (line 140) | fn new(engine: &'a TestEngine, op_rx: Receiver<testengine::Operation>) -...
  function run (line 146) | fn run(&mut self, command: &goldenscript::Command) -> Result<String, Box...
  function end_command (line 273) | fn end_command(&mut self, _: &goldenscript::Command) -> Result<String, B...
  type ExpressionRunner (line 281) | struct ExpressionRunner;
    method run (line 286) | fn run(&mut self, command: &goldenscript::Command) -> Result<String, B...
  type Catalog (line 283) | type Catalog<'a> = <Local<storage::Memory> as Engine<'a>>::Transaction;

FILE: src/sql/parser/ast.rs
  type Statement (line 12) | pub enum Statement {
  type From (line 85) | pub enum From {
  type Column (line 108) | pub struct Column {
  type JoinType (line 121) | pub enum JoinType {
    method is_outer (line 131) | pub fn is_outer(&self) -> bool {
  type Direction (line 141) | pub enum Direction {
  type Expression (line 149) | pub enum Expression {
    method walk (line 239) | pub fn walk(&self, visitor: &mut impl FnMut(&Expression) -> bool) -> b...
    method contains (line 277) | pub fn contains(&self, visitor: &impl Fn(&Expression) -> bool) -> bool {
    method collect (line 283) | pub fn collect(&self, visitor: &impl Fn(&Expression) -> bool, exprs: &...
    method from (line 324) | fn from(literal: Literal) -> Self {
    method from (line 330) | fn from(op: Operator) -> Self {
  type Literal (line 164) | pub enum Literal {
  method eq (line 177) | fn eq(&self, other: &Self) -> bool {
  method hash (line 192) | fn hash<H: Hasher>(&self, state: &mut H) {
  type Operator (line 210) | pub enum Operator {
  function from (line 336) | fn from(value: Operator) -> Self {

FILE: src/sql/parser/lexer.rs
  type Token (line 17) | pub enum Token {
    method from (line 81) | fn from(keyword: Keyword) -> Self {
  method fmt (line 50) | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
  type Keyword (line 88) | pub enum Keyword {
    type Error (line 159) | type Error = &'static str;
    method try_from (line 161) | fn try_from(value: &str) -> std::result::Result<Self, Self::Error> {
  method fmt (line 238) | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
  type Lexer (line 315) | pub struct Lexer<'a> {
  type Item (line 321) | type Item = Result<Token>;
  method next (line 323) | fn next(&mut self) -> Option<Result<Token>> {
  function new (line 335) | pub fn new(input: &'a str) -> Lexer<'a> {
  function next_if (line 340) | fn next_if(&mut self, predicate: impl Fn(char) -> bool) -> Option<char> {
  function next_if_map (line 347) | fn next_if_map<T>(&mut self, map: impl Fn(char) -> Option<T>) -> Option<...
  function next_is (line 354) | fn next_is(&mut self, c: char) -> bool {
  function scan (line 359) | fn scan(&mut self) -> Result<Option<Token>> {
  function scan_ident_or_keyword (line 377) | fn scan_ident_or_keyword(&mut self) -> Option<Token> {
  function scan_ident_quoted (line 391) | fn scan_ident_quoted(&mut self) -> Result<Option<Token>> {
  function scan_number (line 409) | fn scan_number(&mut self) -> Option<Token> {
  function scan_string (line 436) | fn scan_string(&mut self) -> Result<Option<Token>> {
  function scan_symbol (line 454) | fn scan_symbol(&mut self) -> Option<Token> {
  function skip_whitespace (line 488) | fn skip_whitespace(&mut self) {
  function is_ident (line 494) | pub fn is_ident(ident: &str) -> bool {

FILE: src/sql/parser/parser.rs
  type Parser (line 17) | pub struct Parser<'a> {
  function parse (line 24) | pub fn parse(statement: &str) -> Result<ast::Statement> {
  function parse_expr (line 37) | pub fn parse_expr(expr: &str) -> Result<ast::Expression> {
  function new (line 47) | fn new(input: &str) -> Parser<'_> {
  function next (line 52) | fn next(&mut self) -> Result<Token> {
  function next_ident (line 57) | fn next_ident(&mut self) -> Result<String> {
  function next_if (line 65) | fn next_if(&mut self, predicate: impl Fn(&Token) -> bool) -> Option<Toke...
  function next_if_map (line 72) | fn next_if_map<T>(&mut self, f: impl Fn(&Token) -> Option<T>) -> Option<...
  function next_if_keyword (line 77) | fn next_if_keyword(&mut self) -> Option<Keyword> {
  function next_is (line 85) | fn next_is(&mut self, token: Token) -> bool {
  function expect (line 90) | fn expect(&mut self, expect: Token) -> Result<()> {
  function skip (line 100) | fn skip(&mut self, token: Token) {
  function peek (line 105) | fn peek(&mut self) -> Result<Option<&Token>> {
  function parse_statement (line 110) | fn parse_statement(&mut self) -> Result<ast::Statement> {
  function parse_begin (line 133) | fn parse_begin(&mut self) -> Result<ast::Statement> {
  function parse_commit (line 160) | fn parse_commit(&mut self) -> Result<ast::Statement> {
  function parse_rollback (line 166) | fn parse_rollback(&mut self) -> Result<ast::Statement> {
  function parse_explain (line 172) | fn parse_explain(&mut self) -> Result<ast::Statement> {
  function parse_create_table (line 181) | fn parse_create_table(&mut self) -> Result<ast::Statement> {
  function parse_create_table_column (line 198) | fn parse_create_table_column(&mut self) -> Result<ast::Column> {
  function parse_drop_table (line 247) | fn parse_drop_table(&mut self) -> Result<ast::Statement> {
  function parse_delete (line 260) | fn parse_delete(&mut self) -> Result<ast::Statement> {
  function parse_insert (line 268) | fn parse_insert(&mut self) -> Result<ast::Statement> {
  function parse_update (line 308) | fn parse_update(&mut self) -> Result<ast::Statement> {
  function parse_select (line 331) | fn parse_select(&mut self) -> Result<ast::Statement> {
  function parse_select_clause (line 345) | fn parse_select_clause(&mut self) -> Result<Vec<(ast::Expression, Option...
  function parse_from_clause (line 368) | fn parse_from_clause(&mut self) -> Result<Vec<ast::From>> {
  function parse_from_table (line 394) | fn parse_from_table(&mut self) -> Result<ast::From> {
  function parse_from_join (line 404) | fn parse_from_join(&mut self) -> Result<Option<ast::JoinType>> {
  function parse_where_clause (line 430) | fn parse_where_clause(&mut self) -> Result<Option<ast::Expression>> {
  function parse_group_by_clause (line 438) | fn parse_group_by_clause(&mut self) -> Result<Vec<ast::Expression>> {
  function parse_having_clause (line 454) | fn parse_having_clause(&mut self) -> Result<Option<ast::Expression>> {
  function parse_order_by_clause (line 462) | fn parse_order_by_clause(&mut self) -> Result<Vec<(ast::Expression, ast:...
  function parse_limit_clause (line 486) | fn parse_limit_clause(&mut self) -> Result<Option<ast::Expression>> {
  function parse_offset_clause (line 494) | fn parse_offset_clause(&mut self) -> Result<Option<ast::Expression>> {
  function parse_expression (line 603) | fn parse_expression(&mut self) -> Result<ast::Expression> {
  function parse_expression_at (line 608) | fn parse_expression_at(&mut self, min_precedence: Precedence) -> Result<...
  function parse_expression_atom (line 652) | fn parse_expression_atom(&mut self) -> Result<ast::Expression> {
  function parse_prefix_operator_at (line 700) | fn parse_prefix_operator_at(&mut self, min_precedence: Precedence) -> Op...
  function parse_infix_operator_at (line 714) | fn parse_infix_operator_at(&mut self, min_precedence: Precedence) -> Opt...
  function parse_postfix_operator_at (line 741) | fn parse_postfix_operator_at(
  type Precedence (line 777) | type Precedence = u8;
    type Output (line 786) | type Output = Self;
    method add (line 788) | fn add(self, rhs: Associativity) -> Self {
  type Associativity (line 780) | enum Associativity {
  type PrefixOperator (line 799) | enum PrefixOperator {
    method precedence (line 807) | fn precedence(&self) -> Precedence {
    method associativity (line 816) | fn associativity(&self) -> Associativity {
    method into_expression (line 821) | fn into_expression(self, rhs: ast::Expression) -> ast::Expression {
  type InfixOperator (line 832) | enum InfixOperator {
    method precedence (line 855) | fn precedence(&self) -> Precedence {
    method associativity (line 872) | fn associativity(&self) -> Associativity {
    method into_expression (line 880) | fn into_expression(self, lhs: ast::Expression, rhs: ast::Expression) -...
  type PostfixOperator (line 903) | enum PostfixOperator {
    method precedence (line 911) | fn precedence(&self) -> Precedence {
    method into_expression (line 919) | fn into_expression(self, lhs: ast::Expression) -> ast::Expression {

FILE: src/sql/planner/optimizer.rs
  type Optimizer (line 22) | pub trait Optimizer: Debug + Send + Sync {
    method optimize (line 24) | fn optimize(&self, node: Node) -> Result<Node>;
    method optimize (line 33) | fn optimize(&self, node: Node) -> Result<Node> {
    method optimize (line 98) | fn optimize(&self, node: Node) -> Result<Node> {
    method optimize (line 255) | fn optimize(&self, node: Node) -> Result<Node> {
    method optimize (line 310) | fn optimize(&self, node: Node) -> Result<Node> {
    method optimize (line 357) | fn optimize(&self, node: Node) -> Result<Node> {
  type ConstantFolding (line 30) | pub struct ConstantFolding;
    method fold (line 43) | pub fn fold(mut expr: Expression) -> Result<Expression> {
  type FilterPushdown (line 95) | pub struct FilterPushdown;
    method push_filters (line 106) | fn push_filters(mut node: Node) -> Node {
    method push_into (line 114) | fn push_into(expr: Expression, target: &mut Node) -> Option<Expression> {
    method maybe_push_filter (line 140) | fn maybe_push_filter(node: Node) -> Node {
    method maybe_push_join (line 157) | fn maybe_push_join(node: Node) -> Node {
  type IndexLookup (line 252) | pub struct IndexLookup;
    method index_lookup (line 265) | fn index_lookup(mut node: Node) -> Node {
  type HashJoin (line 307) | pub struct HashJoin;
    method hash_join (line 317) | pub fn hash_join(node: Node) -> Node {
  type ShortCircuit (line 354) | pub struct ShortCircuit;
    method short_circuit (line 366) | fn short_circuit(mut node: Node) -> Node {

FILE: src/sql/planner/plan.rs
  type Plan (line 44) | pub enum Plan {
    method build (line 77) | pub fn build(statement: ast::Statement, catalog: &impl Catalog) -> Res...
    method execute (line 82) | pub fn execute(self, txn: &impl Transaction) -> Result<ExecutionResult> {
    method optimize (line 88) | pub fn optimize(self) -> Result<Self> {
  type Node (line 108) | pub enum Node {
    method columns (line 179) | pub fn columns(&self) -> usize {
    method column_label (line 213) | pub fn column_label(&self, index: usize) -> Label {
    method transform (line 271) | pub fn transform(
    method transform_expressions (line 320) | pub fn transform_expressions(
    method format (line 469) | pub fn format(
  type Aggregate (line 376) | pub enum Aggregate {
    method format (line 385) | fn format(&self, node: &Node) -> String {
    method expr (line 396) | pub fn expr(&self) -> &Expression {
  type Direction (line 409) | pub enum Direction {
    method from (line 424) | fn from(dir: ast::Direction) -> Self {
  method fmt (line 415) | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
  method fmt (line 434) | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
  method fmt (line 460) | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
  function invert_remap (line 649) | pub fn invert_remap(targets: &[Option<usize>]) -> Vec<Option<usize>> {

FILE: src/sql/planner/planner.rs
  type Planner (line 18) | pub struct Planner<'a, C: Catalog> {
  function new (line 24) | pub fn new(catalog: &'a C) -> Self {
  function build (line 29) | pub fn build(&mut self, statement: ast::Statement) -> Result<Plan> {
  function build_create_table (line 50) | fn build_create_table(&self, name: String, columns: Vec<ast::Column>) ->...
  function build_drop_table (line 81) | fn build_drop_table(&self, name: String, if_exists: bool) -> Result<Plan> {
  function build_delete (line 86) | fn build_delete(&self, table: String, r#where: Option<ast::Expression>) ...
  function build_insert (line 98) | fn build_insert(
  function build_update (line 128) | fn build_update(
  function build_select (line 159) | fn build_select(
  function build_from_clause (line 283) | fn build_from_clause(&self, from: Vec<ast::From>, scope: &mut Scope) -> ...
  function build_from (line 306) | fn build_from(&self, from: ast::From, parent_scope: &mut Scope) -> Resul...
  function build_aggregate (line 365) | fn build_aggregate(
  function build_aggregate_function (line 391) | fn build_aggregate_function(expr: ast::Expression, scope: &Scope) -> Res...
  function is_aggregate_function (line 417) | fn is_aggregate_function(expr: &ast::Expression) -> bool {
  function collect_aggregates (line 425) | fn collect_aggregates(
  function build_select_hidden (line 452) | fn build_select_hidden(
  function build_expression (line 495) | pub fn build_expression(expr: ast::Expression, scope: &Scope) -> Result<...
  function build_constant_value (line 572) | fn build_constant_value(expr: ast::Expression) -> Result<Value> {
  type Scope (line 589) | pub struct Scope {
    method new (line 614) | pub fn new() -> Self {
    method from_table (line 619) | fn from_table(table: &Table) -> Result<Self> {
    method spawn (line 626) | pub fn spawn(&self) -> Self {
    method add_table (line 634) | fn add_table(&mut self, table: &Table, alias: Option<&str>) -> Result<...
    method add_column (line 648) | fn add_column(&mut self, label: Label) -> usize {
    method lookup_column (line 661) | fn lookup_column(&self, table: Option<&str>, name: &str) -> Result<usi...
    method add_aggregate (line 692) | fn add_aggregate(&mut self, expr: &ast::Expression, parent: &Scope) ->...
    method lookup_aggregate (line 712) | fn lookup_aggregate(&self, expr: &ast::Expression) -> Option<usize> {
    method add_passthrough (line 718) | fn add_passthrough(&mut self, parent: &Scope, parent_index: usize, hid...
    method merge (line 732) | fn merge(&mut self, scope: Scope) -> Result<()> {
    method project (line 756) | fn project(&self, expressions: &[(ast::Expression, Option<String>)]) -...
    method remap (line 781) | fn remap(&self, targets: &[Option<usize>]) -> Self {
    method remove_hidden (line 791) | fn remove_hidden(&mut self) -> Option<HashSet<usize>> {
    method remap_hidden (line 813) | fn remap_hidden(&mut self) -> Option<Vec<Option<usize>>> {

FILE: src/sql/types/expression.rs
  type Expression (line 19) | pub enum Expression {
    method display (line 69) | pub fn display<'a>(&'a self, node: &'a Node) -> ExpressionDisplay<'a> {
    method evaluate (line 75) | pub fn evaluate(&self, row: Option<&Row>) -> Result<Value> {
    method walk (line 212) | pub fn walk(&self, visitor: &mut impl FnMut(&Expression) -> bool) -> b...
    method contains (line 244) | pub fn contains(&self, visitor: &impl Fn(&Expression) -> bool) -> bool {
    method transform (line 250) | pub fn transform(
    method into_cnf (line 292) | pub fn into_cnf(self) -> Self {
    method into_cnf_vec (line 314) | pub fn into_cnf_vec(self) -> Vec<Self> {
    method into_nnf (line 331) | pub fn into_nnf(self) -> Self {
    method and_vec (line 354) | pub fn and_vec(exprs: Vec<Expression>) -> Option<Self> {
    method is_column_lookup (line 365) | pub fn is_column_lookup(&self) -> Option<usize> {
    method into_column_values (line 393) | pub fn into_column_values(self, index: usize) -> Vec<Value> {
    method replace_column (line 424) | pub fn replace_column(self, from: usize, to: usize) -> Self {
    method shift_column (line 433) | pub fn shift_column(self, diff: isize) -> Self {
    method from (line 538) | fn from(value: Value) -> Self {
  method fmt (line 446) | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
  type ExpressionDisplay (line 453) | pub struct ExpressionDisplay<'a> {
  method fmt (line 460) | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
  function new (line 514) | pub fn new(expr: &'a Expression, node: &'a Node, parent_precedence: u8) ...
  function precedence (line 519) | fn precedence(expr: &Expression) -> u8 {
  function from (line 544) | fn from(value: Value) -> Self {

FILE: src/sql/types/schema.rs
  type Table (line 17) | pub struct Table {
    method validate (line 100) | pub fn validate(&self, catalog: &impl Catalog) -> Result<()> {
    method validate_row (line 181) | pub fn validate_row(&self, row: &Row, update: bool, txn: &impl Transac...
  type Column (line 31) | pub struct Column {
  method fmt (line 57) | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {

FILE: src/sql/types/value.rs
  type DataType (line 18) | pub enum DataType {
  method fmt (line 30) | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
  type Value (line 42) | pub enum Value {
    method datatype (line 166) | pub fn datatype(&self) -> Option<DataType> {
    method is_undefined (line 177) | pub fn is_undefined(&self) -> bool {
    method checked_add (line 186) | pub fn checked_add(&self, other: &Self) -> Result<Self> {
    method checked_div (line 204) | pub fn checked_div(&self, other: &Self) -> Result<Self> {
    method checked_mul (line 220) | pub fn checked_mul(&self, other: &Self) -> Result<Self> {
    method checked_pow (line 238) | pub fn checked_pow(&self, other: &Self) -> Result<Self> {
    method checked_rem (line 264) | pub fn checked_rem(&self, other: &Self) -> Result<Self> {
    method checked_sub (line 280) | pub fn checked_sub(&self, other: &Self) -> Result<Self> {
    method from (line 299) | fn from(v: bool) -> Self {
    method from (line 305) | fn from(v: f64) -> Self {
    method from (line 311) | fn from(v: i64) -> Self {
    method from (line 317) | fn from(v: String) -> Self {
    method from (line 323) | fn from(v: &str) -> Self {
  method fmt (line 69) | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
  function serialize_f64 (line 83) | fn serialize_f64<S: Serializer>(value: &f64, serializer: S) -> StdResult...
  method eq (line 93) | fn eq(&self, other: &Self) -> bool {
  method hash (line 111) | fn hash<H: Hasher>(&self, hasher: &mut H) {
  method cmp (line 135) | fn cmp(&self, other: &Self) -> Ordering {
  method partial_cmp (line 159) | fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
  type Error (line 329) | type Error = Error;
  function try_from (line 331) | fn try_from(value: Value) -> Result<Self> {
  type Error (line 340) | type Error = Error;
  function try_from (line 342) | fn try_from(value: Value) -> Result<Self> {
  type Error (line 351) | type Error = Error;
  function try_from (line 353) | fn try_from(value: Value) -> Result<Self> {
  type Error (line 362) | type Error = Error;
  method try_from (line 364) | fn try_from(value: Value) -> Result<Self> {
  function from (line 373) | fn from(v: &'a Value) -> Self {
  type Row (line 379) | pub type Row = Vec<Value>;
  type Rows (line 382) | pub type Rows = Box<dyn RowIterator>;
  type RowIterator (line 388) | pub trait RowIterator: Iterator<Item = Result<Row>> + DynClone {}
  type Label (line 396) | pub enum Label {
    method as_header (line 417) | pub fn as_header(&self) -> &str {
    method from (line 437) | fn from(name: Option<String>) -> Self {
  method fmt (line 406) | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
  function from (line 427) | fn from(label: Label) -> Self {

FILE: src/storage/bitcask.rs
  type BitCask (line 50) | pub struct BitCask {
    method new (line 75) | pub fn new(path: PathBuf) -> Result<Self> {
    method new_maybe_compact (line 84) | pub fn new_maybe_compact(
    method compact (line 173) | pub fn compact(&mut self) -> Result<()> {
  type KeyDir (line 58) | type KeyDir = BTreeMap<Vec<u8>, ValueLocation>;
  type ValueLocation (line 62) | struct ValueLocation {
    method end (line 68) | fn end(&self) -> u64 {
  type ScanIterator (line 119) | type ScanIterator<'a> = ScanIterator<'a>;
  method delete (line 121) | fn delete(&mut self, key: &[u8]) -> Result<()> {
  method flush (line 127) | fn flush(&mut self) -> Result<()> {
  method get (line 136) | fn get(&mut self, key: &[u8]) -> Result<Option<Vec<u8>>> {
  method scan (line 143) | fn scan(&mut self, range: impl RangeBounds<Vec<u8>>) -> Self::ScanIterat...
  method scan_dyn (line 147) | fn scan_dyn(
  method set (line 154) | fn set(&mut self, key: &[u8], value: Vec<u8>) -> Result<()> {
  method status (line 160) | fn status(&mut self) -> Result<Status> {
  method drop (line 199) | fn drop(&mut self) {
  type ScanIterator (line 206) | pub struct ScanIterator<'a> {
  function map (line 212) | fn map(&mut self, item: (&Vec<u8>, &ValueLocation)) -> <Self as Iterator...
  type Item (line 219) | type Item = Result<(Vec<u8>, Vec<u8>)>;
  method next (line 221) | fn next(&mut self) -> Option<Self::Item> {
  method next_back (line 227) | fn next_back(&mut self) -> Option<Self::Item> {
  type Log (line 239) | struct Log {
    method new (line 250) | fn new(path: PathBuf) -> Result<Self> {
    method build_keydir (line 269) | fn build_keydir(&mut self) -> Result<KeyDir> {
    method read_value (line 334) | fn read_value(&mut self, location: ValueLocation) -> Result<Vec<u8>> {
    method write_entry (line 344) | fn write_entry(&mut self, key: &[u8], value: Option<&[u8]>) -> Result<...
  function test_goldenscript (line 387) | fn test_goldenscript(path: &std::path::Path) {
  function lock (line 394) | fn lock() -> Result<()> {
  function recovery (line 410) | fn recovery() -> Result<()> {
  function point_ops_sizes (line 460) | fn point_ops_sizes() -> Result<()> {
  type BitCaskRunner (line 480) | struct BitCaskRunner {
    method run (line 486) | fn run(&mut self, command: &goldenscript::Command) -> StdResult<String...
    method new (line 531) | fn new() -> Self {
    method dump (line 539) | fn dump(&mut self, output: &mut String) -> StdResult<(), Box<dyn StdEr...

FILE: src/storage/engine.rs
  type Engine (line 22) | pub trait Engine: Send {
    method delete (line 29) | fn delete(&mut self, key: &[u8]) -> Result<()>;
    method flush (line 32) | fn flush(&mut self) -> Result<()>;
    method get (line 35) | fn get(&mut self, key: &[u8]) -> Result<Option<Vec<u8>>>;
    method scan (line 38) | fn scan(&mut self, range: impl RangeBounds<Vec<u8>>) -> Self::ScanIter...
    method scan_dyn (line 43) | fn scan_dyn(&mut self, range: (Bound<Vec<u8>>, Bound<Vec<u8>>)) -> Box...
    method scan_prefix (line 46) | fn scan_prefix(&mut self, prefix: &[u8]) -> Self::ScanIterator<'_>
    method set (line 54) | fn set(&mut self, key: &[u8], value: Vec<u8>) -> Result<()>;
    method status (line 57) | fn status(&mut self) -> Result<Status>;
    type ScanIterator (line 258) | type ScanIterator<'a>
    method flush (line 263) | fn flush(&mut self) -> Result<()> {
    method delete (line 269) | fn delete(&mut self, key: &[u8]) -> Result<()> {
    method get (line 275) | fn get(&mut self, key: &[u8]) -> Result<Option<Vec<u8>>> {
    method scan (line 279) | fn scan(&mut self, range: impl RangeBounds<Vec<u8>>) -> Self::ScanIter...
    method scan_dyn (line 283) | fn scan_dyn(
    method set (line 290) | fn set(&mut self, key: &[u8], value: Vec<u8>) -> Result<()> {
    method status (line 296) | fn status(&mut self) -> Result<Status> {
    type ScanIterator (line 316) | type ScanIterator<'a>
    method delete (line 323) | fn delete(&mut self, key: &[u8]) -> Result<()> {
    method flush (line 328) | fn flush(&mut self) -> Result<()> {
    method get (line 333) | fn get(&mut self, key: &[u8]) -> Result<Option<Vec<u8>>> {
    method scan (line 340) | fn scan(&mut self, range: impl RangeBounds<Vec<u8>>) -> Self::ScanIter...
    method scan_dyn (line 349) | fn scan_dyn(
    method set (line 358) | fn set(&mut self, key: &[u8], value: Vec<u8>) -> Result<()> {
    method status (line 363) | fn status(&mut self) -> Result<Status> {
  type ScanIterator (line 61) | pub trait ScanIterator: DoubleEndedIterator<Item = Result<(Vec<u8>, Vec<...
  type Status (line 68) | pub struct Status {
    method garbage_disk_size (line 83) | pub fn garbage_disk_size(&self) -> u64 {
    method garbage_disk_percent (line 88) | pub fn garbage_disk_percent(&self) -> f64 {
  type Runner (line 114) | pub struct Runner<E: Engine> {
  function new (line 119) | pub fn new(engine: E) -> Self {
  function run (line 125) | fn run(&mut self, command: &goldenscript::Command) -> StdResult<String, ...
  function decode_binary (line 204) | pub fn decode_binary(s: &str) -> Vec<u8> {
  function parse_key_range (line 218) | pub fn parse_key_range(s: &str) -> StdResult<impl RangeBounds<Vec<u8>>, ...
  type Emit (line 237) | pub struct Emit<E: Engine> {
  type Operation (line 245) | pub enum Operation {
  function new (line 252) | pub fn new(inner: E, tx: Sender<Operation>) -> Self {
  type Mirror (line 304) | pub struct Mirror<A: Engine, B: Engine> {
  function new (line 310) | pub fn new(a: A, b: B) -> Self {
  type MirrorIterator (line 373) | pub struct MirrorIterator<'a, A: Engine + 'a, B: Engine + 'a> {
  type Item (line 379) | type Item = Result<(Vec<u8>, Vec<u8>)>;
  method next (line 381) | fn next(&mut self) -> Option<Self::Item> {
  method next_back (line 390) | fn next_back(&mut self) -> Option<Self::Item> {

FILE: src/storage/memory.rs
  type Memory (line 11) | pub struct Memory(BTreeMap<Vec<u8>, Vec<u8>>);
    method new (line 15) | pub fn new() -> Self {
  type ScanIterator (line 21) | type ScanIterator<'a> = ScanIterator<'a>;
  method delete (line 23) | fn delete(&mut self, key: &[u8]) -> Result<()> {
  method flush (line 28) | fn flush(&mut self) -> Result<()> {
  method get (line 32) | fn get(&mut self, key: &[u8]) -> Result<Option<Vec<u8>>> {
  method scan (line 36) | fn scan(&mut self, range: impl RangeBounds<Vec<u8>>) -> Self::ScanIterat...
  method scan_dyn (line 40) | fn scan_dyn(
  method set (line 47) | fn set(&mut self, key: &[u8], value: Vec<u8>) -> Result<()> {
  method status (line 52) | fn status(&mut self) -> Result<Status> {
  type ScanIterator (line 63) | pub struct ScanIterator<'a>(Range<'a, Vec<u8>, Vec<u8>>);
  type Item (line 66) | type Item = Result<(Vec<u8>, Vec<u8>)>;
  method next (line 68) | fn next(&mut self) -> Option<Self::Item> {
  method next_back (line 74) | fn next_back(&mut self) -> Option<Self::Item> {
  function test_goldenscript (line 95) | fn test_goldenscript(path: &Path) {

FILE: src/storage/mvcc.rs
  type Version (line 158) | pub type Version = u64;
  type Key (line 167) | pub enum Key<'a> {
  type KeyPrefix (line 205) | enum KeyPrefix<'a> {
  type MVCC (line 229) | pub struct MVCC<E: Engine> {
  function new (line 235) | pub fn new(engine: E) -> Self {
  function begin (line 240) | pub fn begin(&self) -> Result<Transaction<E>> {
  function begin_read_only (line 245) | pub fn begin_read_only(&self) -> Result<Transaction<E>> {
  function begin_as_of (line 250) | pub fn begin_as_of(&self, version: Version) -> Result<Transaction<E>> {
  function resume (line 255) | pub fn resume(&self, state: TransactionState) -> Result<Transaction<E>> {
  function get_unversioned (line 260) | pub fn get_unversioned(&self, key: &[u8]) -> Result<Option<Vec<u8>>> {
  function set_unversioned (line 265) | pub fn set_unversioned(&self, key: &[u8], value: Vec<u8>) -> Result<()> {
  function status (line 270) | pub fn status(&self) -> Result<Status> {
  type Status (line 283) | pub struct Status {
  type Transaction (line 295) | pub struct Transaction<E: Engine> {
  type TransactionState (line 315) | pub struct TransactionState {
    method is_visible (line 344) | fn is_visible(&self, version: Version) -> bool {
  function from (line 356) | fn from(txn: TransactionState) -> Self {
  function from (line 362) | fn from(txn: &'a TransactionState) -> Self {
  function begin (line 371) | fn begin(engine: Arc<Mutex<E>>) -> Result<Self> {
  function begin_read_only (line 397) | fn begin_read_only(engine: Arc<Mutex<E>>, as_of: Option<Version>) -> Res...
  function resume (line 428) | fn resume(engine: Arc<Mutex<E>>, s: TransactionState) -> Result<Self> {
  function scan_active (line 438) | fn scan_active(session: &mut MutexGuard<E>) -> Result<BTreeSet<Version>> {
  function version (line 451) | pub fn version(&self) -> Version {
  function read_only (line 456) | pub fn read_only(&self) -> bool {
  function state (line 462) | pub fn state(&self) -> &TransactionState {
  function commit (line 472) | pub fn commit(self) -> Result<()> {
  function rollback (line 490) | pub fn rollback(self) -> Result<()> {
  function delete (line 515) | pub fn delete(&self, key: &[u8]) -> Result<()> {
  function set (line 520) | pub fn set(&self, key: &[u8], value: Vec<u8>) -> Result<()> {
  function write_version (line 528) | fn write_version(&self, key: &[u8], value: Option<Vec<u8>>) -> Result<()> {
  function get (line 565) | pub fn get(&self, key: &[u8]) -> Result<Option<Vec<u8>>> {
  function scan (line 585) | pub fn scan(&self, range: impl RangeBounds<Vec<u8>>) -> ScanIterator<E> {
  function scan_prefix (line 600) | pub fn scan_prefix(&self, prefix: &[u8]) -> ScanIterator<E> {
  type ScanIterator (line 620) | pub struct ScanIterator<E: Engine> {
  method clone (line 635) | fn clone(&self) -> Self {
  constant BUFFER_SIZE (line 648) | const BUFFER_SIZE: usize = if cfg!(test) { 2 } else { 32 };
  function new (line 651) | fn new(
  function fill_buffer (line 661) | fn fill_buffer(&mut self) -> Result<()> {
  type Item (line 702) | type Item = Result<(Vec<u8>, Vec<u8>)>;
  method next (line 704) | fn next(&mut self) -> Option<Self::Item> {
  type VersionIterator (line 716) | struct VersionIterator<'a, I: engine::ScanIterator> {
  function new (line 725) | fn new(txn: &'a TransactionState, inner: I) -> Self {
  function try_next (line 730) | fn try_next(&mut self) -> Result<Option<(Vec<u8>, Version, Vec<u8>)>> {
  type Item (line 745) | type Item = Result<(Vec<u8>, Version, Vec<u8>)>;
  method next (line 747) | fn next(&mut self) -> Option<Self::Item> {
  function test_goldenscript (line 774) | fn test_goldenscript(path: &Path) {
  function key_prefix (line 785) | fn key_prefix(prefix: KeyPrefix, key: Key) {
  type MVCCRunner (line 792) | pub struct MVCCRunner {
    method new (line 802) | fn new() -> Self {
    method get_txn (line 815) | fn get_txn(
    method txn_name (line 824) | fn txn_name(prefix: &Option<String>) -> Result<&str, Box<dyn Error>> {
    method no_txn (line 829) | fn no_txn(command: &goldenscript::Command) -> Result<(), Box<dyn Error...
    method run (line 838) | fn run(&mut self, command: &goldenscript::Command) -> Result<String, B...
    method end_command (line 1067) | fn end_command(&mut self, _: &goldenscript::Command) -> Result<String,...
  type TestEngine (line 799) | type TestEngine = Emit<Mirror<BitCask, Memory>>;

FILE: tests/testcluster.rs
  constant TIMEOUT (line 13) | const TIMEOUT: Duration = Duration::from_secs(5);
  constant SQL_BASE_PORT (line 16) | const SQL_BASE_PORT: u16 = 19600;
  constant RAFT_BASE_PORT (line 19) | const RAFT_BASE_PORT: u16 = 19700;
  type TestCluster (line 29) | pub struct TestCluster {
    method run (line 39) | pub fn run(nodes: u8) -> Result<Self, Box<dyn Error>> {
    method connect (line 73) | pub fn connect(&self) -> Result<Client, Box<dyn Error>> {
  type NodePorts (line 35) | type NodePorts = BTreeMap<NodeID, (u16, u16)>;
  type TestServer (line 80) | pub struct TestServer {
    method run (line 88) | fn run(id: NodeID, dir: &Path, ports: &NodePorts) -> Result<Self, Box<...
    method build_config (line 112) | fn build_config(id: NodeID, dir: &Path, ports: &NodePorts) -> Result<S...
    method assert_alive (line 131) | fn assert_alive(&mut self) {
    method connect (line 138) | fn connect(&self) -> Result<Client, Box<dyn Error>> {
  method drop (line 145) | fn drop(&mut self) {

FILE: tests/tests.rs
  function test_goldenscript (line 27) | fn test_goldenscript(path: &Path) {
  type Runner (line 40) | struct Runner {
    method new (line 46) | fn new() -> Self {
    method get_client (line 51) | fn get_client(&mut self, prefix: &Option<String>) -> Result<&mut Clien...
    method client_name (line 64) | fn client_name(prefix: &Option<String>) -> &str {
    method run (line 71) | fn run(&mut self, command: &goldenscript::Command) -> Result<String, B...

Download .json

Condensed preview — 284 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (1,580K chars).

[
  {
    "path": ".github/workflows/ci.yml",
    "chars": 704,
    "preview": "name: CI\non: [push, pull_request, workflow_dispatch]\npermissions:\n  contents: read\n\njobs:\n  test:\n    name: Test\n    run"
  },
  {
    "path": ".gitignore",
    "chars": 84,
    "preview": "/cluster/toydb*/data\n/data\n/docs/crate/target\n/target\n.DS_Store\n.vscode/\n**/*.rs.bk\n"
  },
  {
    "path": "Cargo.toml",
    "chars": 1060,
    "preview": "[package]\nname = \"toydb\"\nversion = \"1.0.0\"\ndescription = \"A simple distributed SQL database, built for education\"\nauthor"
  },
  {
    "path": "LICENSE",
    "chars": 11356,
    "preview": "                                 Apache License\n                           Version 2.0, January 2004\n                   "
  },
  {
    "path": "README.md",
    "chars": 8443,
    "preview": "# <a><img src=\"./docs/architecture/images/toydb.svg\" height=\"40\" valign=\"top\" /></a> toyDB\n\nDistributed SQL database in "
  },
  {
    "path": "cluster/run.sh",
    "chars": 939,
    "preview": "#!/usr/bin/env bash\n#\n# This script builds and runs a 5-node toyDB cluster listening on ports\n# 9601-9605. Config and da"
  },
  {
    "path": "cluster/toydb1/toydb.yaml",
    "chars": 177,
    "preview": "id: 1\ndata_dir: toydb1/data\nlisten_sql: localhost:9601\nlisten_raft: localhost:9701\npeers:\n  '2': localhost:9702\n  '3': l"
  },
  {
    "path": "cluster/toydb2/toydb.yaml",
    "chars": 177,
    "preview": "id: 2\ndata_dir: toydb2/data\nlisten_sql: localhost:9602\nlisten_raft: localhost:9702\npeers:\n  '1': localhost:9701\n  '3': l"
  },
  {
    "path": "cluster/toydb3/toydb.yaml",
    "chars": 177,
    "preview": "id: 3\ndata_dir: toydb3/data\nlisten_sql: localhost:9603\nlisten_raft: localhost:9703\npeers:\n  '1': localhost:9701\n  '2': l"
  },
  {
    "path": "cluster/toydb4/toydb.yaml",
    "chars": 177,
    "preview": "id: 4\ndata_dir: toydb4/data\nlisten_sql: localhost:9604\nlisten_raft: localhost:9704\npeers:\n  '1': localhost:9701\n  '2': l"
  },
  {
    "path": "cluster/toydb5/toydb.yaml",
    "chars": 177,
    "preview": "id: 5\ndata_dir: toydb5/data\nlisten_sql: localhost:9605\nlisten_raft: localhost:9705\npeers:\n  '1': localhost:9701\n  '2': l"
  },
  {
    "path": "config/toydb.yaml",
    "chars": 1112,
    "preview": "# The node ID (must be unique in the cluster), and map of peer IDs and Raft\n# addresses (empty for single node).\nid: 1\np"
  },
  {
    "path": "docs/architecture/README.md",
    "chars": 27,
    "preview": "See [`index.md`](index.md)."
  },
  {
    "path": "docs/architecture/client.md",
    "chars": 2876,
    "preview": "# Client\n\nThe toyDB client is in the [`client`](https://github.com/erikgrinaker/toydb/blob/213e5c02b09f1a3cac6a8bbd0a817"
  },
  {
    "path": "docs/architecture/encoding.md",
    "chars": 7565,
    "preview": "# Key/Value Encoding\n\nThe key/value store uses binary `Vec<u8>` keys and values, so we need an encoding scheme to \ntrans"
  },
  {
    "path": "docs/architecture/index.md",
    "chars": 3203,
    "preview": "# toyDB Architecture\n\ntoyDB is a simple distributed SQL database, intended to illustrate how such systems are built. The"
  },
  {
    "path": "docs/architecture/mvcc.md",
    "chars": 9611,
    "preview": "# MVCC Transactions\n\nTransactions are groups of reads and writes (e.g. to different keys) that are submitted together as"
  },
  {
    "path": "docs/architecture/overview.md",
    "chars": 2677,
    "preview": "# Overview\n\ntoyDB consists of a cluster of nodes that execute [SQL](https://en.wikipedia.org/wiki/SQL)\ntransactions agai"
  },
  {
    "path": "docs/architecture/raft.md",
    "chars": 25854,
    "preview": "# Raft Consensus\n\n[Raft](https://raft.github.io) is a distributed consensus protocol which replicates data across a\nclus"
  },
  {
    "path": "docs/architecture/server.md",
    "chars": 5939,
    "preview": "# Server\n\nNow that we've gone over the individual components, we'll tie them all together in the toyDB\nserver `toydb::Se"
  },
  {
    "path": "docs/architecture/sql-data.md",
    "chars": 6220,
    "preview": "# SQL Data Model\n\nThe SQL data model represents user data in tables and rows. It is made up of data types and schemas,\ni"
  },
  {
    "path": "docs/architecture/sql-execution.md",
    "chars": 6018,
    "preview": "# SQL Execution\n\nNow that the planner and optimizer have done all the hard work of figuring out how to execute a\nquery, "
  },
  {
    "path": "docs/architecture/sql-optimizer.md",
    "chars": 14117,
    "preview": "# SQL Optimization\n\n[Query optimization](https://en.wikipedia.org/wiki/Query_optimization) attempts to improve query\nper"
  },
  {
    "path": "docs/architecture/sql-parser.md",
    "chars": 9237,
    "preview": "# SQL Parsing\n\nWe finally arrive at SQL. The SQL parser is the first stage in processing SQL queries and\nstatements, loc"
  },
  {
    "path": "docs/architecture/sql-planner.md",
    "chars": 11715,
    "preview": "# SQL Planning\n\nThe SQL planner in the [`sql::planner`](https://github.com/erikgrinaker/toydb/tree/c64012e29c5712d6fe028"
  },
  {
    "path": "docs/architecture/sql-raft.md",
    "chars": 5163,
    "preview": "# SQL Raft Replication\n\ntoyDB uses Raft to replicate SQL storage across a cluster of nodes (see the Raft section for\ndet"
  },
  {
    "path": "docs/architecture/sql-storage.md",
    "chars": 9491,
    "preview": "# SQL Storage\n\nThe SQL storage engine, in the [`sql::engine`](https://github.com/erikgrinaker/toydb/tree/213e5c02b09f1a3"
  },
  {
    "path": "docs/architecture/sql.md",
    "chars": 1107,
    "preview": "# SQL Engine\n\nThe SQL engine provides support for the SQL query language, and is the main database interface. It\nuses a "
  },
  {
    "path": "docs/architecture/storage.md",
    "chars": 5429,
    "preview": "# Storage Engine\n\ntoyDB uses an embedded [key/value store](https://en.wikipedia.org/wiki/Key–value_database) for data\nst"
  },
  {
    "path": "docs/architecture.md",
    "chars": 58,
    "preview": "Moved to [`architecture/index.md`](architecture/index.md)."
  },
  {
    "path": "docs/crate/Cargo.toml",
    "chars": 306,
    "preview": "[package]\nname = \"toydb\"\nversion = \"1.0.1\"\ndescription = \"A simple distributed SQL database, built for education\"\nauthor"
  },
  {
    "path": "docs/crate/README.md",
    "chars": 687,
    "preview": "# toyDB\n\ntoyDB is a distributed SQL database in Rust, built from scratch as an educational project. Main\nfeatures:\n\n* Ra"
  },
  {
    "path": "docs/crate/src/lib.rs",
    "chars": 186,
    "preview": "//! This crate is just a simple README.md placeholder. toydb is not intended to be used as a\n//! library, and is not dis"
  },
  {
    "path": "docs/examples.md",
    "chars": 17047,
    "preview": "# SQL Examples\n\nThe following examples demonstrate some of toyDB's SQL features. For more details, see the\n[SQL referenc"
  },
  {
    "path": "docs/references.md",
    "chars": 3301,
    "preview": "# References\n\nThis is the main research material I used while building toyDB. It is a subset of my\n[reading list](https:"
  },
  {
    "path": "docs/sql.md",
    "chars": 17226,
    "preview": "# SQL Reference\n\n## Data Types\n\nThe following data types are supported:\n\n* `BOOLEAN` (`BOOL`): logical truth values, i.e"
  },
  {
    "path": "docs/tools/update-links.py",
    "chars": 1918,
    "preview": "#!/usr/bin/env python3\n#\n# Updates GitHub code links to the latest commit SHA.\n\nimport os, re, sys, argparse\nimport requ"
  },
  {
    "path": "rust-toolchain",
    "chars": 6,
    "preview": "1.93.1"
  },
  {
    "path": "rustfmt.toml",
    "chars": 29,
    "preview": "use_small_heuristics = \"Max\"\n"
  },
  {
    "path": "src/bin/toydb.rs",
    "chars": 5069,
    "preview": "//! The toyDB server. Takes configuration from a config file (default\n//! config/toydb.yaml) or corresponding TOYDB_ env"
  },
  {
    "path": "src/bin/toydump.rs",
    "chars": 1543,
    "preview": "//! toydump is a debug tool that prints a toyDB BitCask database in\n//! human-readable form. It can print both the SQL d"
  },
  {
    "path": "src/bin/toysql.rs",
    "chars": 11032,
    "preview": "//! toySQL is a command-line client for toyDB. It connects to a toyDB node\n//! (default localhost:9601) and executes SQL"
  },
  {
    "path": "src/bin/workload.rs",
    "chars": 18577,
    "preview": "//! Runs toyDB workload benchmarks. By default, it assumes a running 5-node\n//! cluster as launched via cluster/run.sh, "
  },
  {
    "path": "src/client.rs",
    "chars": 4563,
    "preview": "use std::io::{BufReader, BufWriter, Write as _};\nuse std::net::{TcpStream, ToSocketAddrs};\nuse std::time::Duration;\n\nuse"
  },
  {
    "path": "src/encoding/bincode.rs",
    "chars": 2070,
    "preview": "//! Bincode is used to encode values, both in key/value stores and the toyDB\n//! network protocol. It is a Rust-specific"
  },
  {
    "path": "src/encoding/format.rs",
    "chars": 9850,
    "preview": "//! Formats raw keys and values, recursively where necessary. Handles both both\n//! Raft, MVCC, SQL, and raw binary data"
  },
  {
    "path": "src/encoding/keycode.rs",
    "chars": 24555,
    "preview": "//! Keycode is a lexicographical order-preserving binary encoding for use with\n//! keys in key/value stores. It is desig"
  },
  {
    "path": "src/encoding/mod.rs",
    "chars": 2558,
    "preview": "//! Binary data encodings.\n//!\n//! * keycode: used for keys in the key/value store.\n//! * bincode: used for values in th"
  },
  {
    "path": "src/error.rs",
    "chars": 6921,
    "preview": "use std::fmt::Display;\n\nuse serde::{Deserialize, Serialize};\n\n/// toyDB errors.\n#[derive(Clone, Debug, PartialEq, Serial"
  },
  {
    "path": "src/lib.rs",
    "chars": 331,
    "preview": "#![warn(clippy::all)]\n#![allow(clippy::large_enum_variant)]\n#![allow(clippy::module_inception)]\n#![allow(clippy::type_co"
  },
  {
    "path": "src/raft/log.rs",
    "chars": 27981,
    "preview": "use std::ops::{Bound, RangeBounds};\n\nuse serde::{Deserialize, Serialize};\n\nuse super::{NodeID, Term};\nuse crate::encodin"
  },
  {
    "path": "src/raft/message.rs",
    "chars": 8402,
    "preview": "use std::collections::BTreeMap;\n\nuse serde::{Deserialize, Serialize};\n\nuse super::{Entry, Index, NodeID, Term};\nuse crat"
  },
  {
    "path": "src/raft/mod.rs",
    "chars": 14390,
    "preview": "//! Implements the Raft distributed consensus protocol.\n//!\n//! For details, see Diego Ongaro's original writings:\n//!\n/"
  },
  {
    "path": "src/raft/node.rs",
    "chars": 92618,
    "preview": "use std::cmp::{max, min};\nuse std::collections::{HashMap, HashSet, VecDeque};\nuse std::ops::Range;\n\nuse crossbeam::chann"
  },
  {
    "path": "src/raft/state.rs",
    "chars": 7504,
    "preview": "use super::{Entry, Index};\nuse crate::error::Result;\n\n/// A Raft-managed state machine. Raft itself does not care what t"
  },
  {
    "path": "src/raft/testscripts/log/append",
    "chars": 1721,
    "preview": "# Appending an entry with term 0 fails.\n!append foo\n---\nPanic: can't append entry in term 0\n\n# Appending to an empty log"
  },
  {
    "path": "src/raft/testscripts/log/commit",
    "chars": 1920,
    "preview": "# Committing fails on an empty engine.\n!commit 1\n---\nPanic: commit index 1 does not exist\n\n# Add some entries.\nset_term "
  },
  {
    "path": "src/raft/testscripts/log/get",
    "chars": 350,
    "preview": "# get returns None on an empty engine.\nget 1\n---\nNone\n\n# Append a few entries.\nset_term 1\nappend\nappend foo\nset_term 2\na"
  },
  {
    "path": "src/raft/testscripts/log/has",
    "chars": 452,
    "preview": "# has returns false on an empty engine.\nhas 1@1\n---\nfalse\n\n# Append a few entries.\nset_term 1\nappend\nappend foo\nset_term"
  },
  {
    "path": "src/raft/testscripts/log/init",
    "chars": 331,
    "preview": "# Tests that the log correctly initializes cached state when opened.\n\nset_term 1\n---\nok\n\nappend foo\nset_term 2 7\nappend "
  },
  {
    "path": "src/raft/testscripts/log/scan",
    "chars": 835,
    "preview": "# scan works on an empty engine, even when given indexes.\nscan\nscan 3..7\n---\nok\n\n# Append a few entries.\nset_term 1\nappe"
  },
  {
    "path": "src/raft/testscripts/log/scan_apply",
    "chars": 846,
    "preview": "# scan_apply works on an empty engine, even when given an applied index.\nscan_apply 0\nscan_apply 3\n---\nok\n\n# Append a fe"
  },
  {
    "path": "src/raft/testscripts/log/splice",
    "chars": 5173,
    "preview": "# Splicing at index 0 should fail.\n!splice 0@1=foo\n---\nPanic: spliced entry has index or term 0\n\n# Splicing without a te"
  },
  {
    "path": "src/raft/testscripts/log/status",
    "chars": 556,
    "preview": "# Status on empty engine works.\nstatus engine=true\n---\nterm=0 last=0@0 commit=0@0 vote=None engine=Status {\n    name: \"b"
  },
  {
    "path": "src/raft/testscripts/log/term",
    "chars": 1177,
    "preview": "# get_term works on empty engine.\nget_term\n---\nterm=0 vote=None\n\n# Storing a 0 term errors.\n!set_term 0\n---\nPanic: can't"
  },
  {
    "path": "src/raft/testscripts/node/append",
    "chars": 1125,
    "preview": "# Can append single entries in steady state.\n\ncluster nodes=3 leader=1\n---\nn1@1 leader last=1@1 commit=1@1 applied=1 pro"
  },
  {
    "path": "src/raft/testscripts/node/append_base_missing",
    "chars": 1995,
    "preview": "# Appends with a base beyond the node's last log entry should result in a\n# rejection at the index following the last en"
  },
  {
    "path": "src/raft/testscripts/node/append_base_missing_all",
    "chars": 1985,
    "preview": "# Appends to a node with an empty log should result in a rejection of index 1,\n# allowing the leader to send the entire "
  },
  {
    "path": "src/raft/testscripts/node/append_commit_quorum",
    "chars": 7606,
    "preview": "# Append results in a leader-side commit once a quorum is reached for the\n# relevant entries.\n\ncluster nodes=6 leader=1\n"
  },
  {
    "path": "src/raft/testscripts/node/append_initial",
    "chars": 2360,
    "preview": "# An initial append at base 0 can have a single or multiple entries.\n\ncluster nodes=3\n---\nn1@0 follower() last=0@0 commi"
  },
  {
    "path": "src/raft/testscripts/node/append_max_entries",
    "chars": 1722,
    "preview": "# Large appends are limited to MAX_APPEND_ENTRIES, and each successful append\n# triggers the next append batch.\n\ncluster"
  },
  {
    "path": "src/raft/testscripts/node/append_pipeline",
    "chars": 2379,
    "preview": "# Multiple appends are pipelined before acks are received, without\n# retransmitting the unacked entries.\n\ncluster nodes="
  },
  {
    "path": "src/raft/testscripts/node/append_probe_divergent_first",
    "chars": 6783,
    "preview": "# Appends to a previous leader and follower with a divergent tail all\n# the way back to the first entry works.\n\ncluster "
  },
  {
    "path": "src/raft/testscripts/node/append_probe_divergent_long",
    "chars": 7811,
    "preview": "# Appends to a previous leader and follower with a long divergent tail requires\n# the leader to repeatedly probe until i"
  },
  {
    "path": "src/raft/testscripts/node/append_probe_divergent_short",
    "chars": 5734,
    "preview": "# Appends to a previous leader and follower with a shorter divergent tail skips\n# the missing entries before probing.\n\nc"
  },
  {
    "path": "src/raft/testscripts/node/append_probe_divergent_single",
    "chars": 3429,
    "preview": "# An append replaces a conflict at the tail for a single term.\n\ncluster nodes=5 leader=1\n---\nn1@1 leader last=1@1 commit"
  },
  {
    "path": "src/raft/testscripts/node/append_response_beyond_last_index_panics",
    "chars": 661,
    "preview": "# A successful AppendResponse with last index beyond leader's last log\n# should panic.\n\ncluster nodes=3 leader=1\n---\nn1@"
  },
  {
    "path": "src/raft/testscripts/node/append_response_stale_reject",
    "chars": 1880,
    "preview": "# A successful AppendResponse with a reject_index below the match index\n# should be ignored.\n\ncluster nodes=3 leader=1\n-"
  },
  {
    "path": "src/raft/testscripts/node/election",
    "chars": 1922,
    "preview": "# A node campaigns and wins leadership once the election timeout passes. Uses\n# ticks directly to also test tick handlin"
  },
  {
    "path": "src/raft/testscripts/node/election_candidate_behind_leader",
    "chars": 4267,
    "preview": "# A candidate that lags behind the leader can still win the election\n# as long as it isn't behind the quorum.\n\ncluster n"
  },
  {
    "path": "src/raft/testscripts/node/election_candidate_behind_quorum",
    "chars": 2310,
    "preview": "# A candidate that lags behind the quorum can't win an election.\n\ncluster nodes=5 leader=1\n---\nn1@1 leader last=1@1 comm"
  },
  {
    "path": "src/raft/testscripts/node/election_contested",
    "chars": 2731,
    "preview": "# A leader can be elected even when there are multiple candidates.\n\ncluster nodes=5 election_timeout=2\n---\nn1@0 follower"
  },
  {
    "path": "src/raft/testscripts/node/election_tie",
    "chars": 2089,
    "preview": "# No leader can be elected with an election tie.\n\ncluster nodes=3 election_timeout=2\n---\nn1@0 follower() last=0@0 commit"
  },
  {
    "path": "src/raft/testscripts/node/election_tie_even",
    "chars": 2695,
    "preview": "# No leader can be elected with an election tie between an even number of nodes.\n\ncluster nodes=4 election_timeout=2\n---"
  },
  {
    "path": "src/raft/testscripts/node/heartbeat_commits_follower",
    "chars": 1561,
    "preview": "# A heartbeat will commit and apply an entry on a follower.\n\ncluster nodes=3 leader=1\n---\nn1@1 leader last=1@1 commit=1@"
  },
  {
    "path": "src/raft/testscripts/node/heartbeat_converts_candidate",
    "chars": 1615,
    "preview": "# A heartbeat from a leader should convert a candidate in the same term to a\n# follower.\n\ncluster nodes=3\n---\nn1@0 follo"
  },
  {
    "path": "src/raft/testscripts/node/heartbeat_converts_follower",
    "chars": 1626,
    "preview": "# A heartbeat from a leader should convert a follower of a different leader in a\n# past term to a follower.\n\ncluster nod"
  },
  {
    "path": "src/raft/testscripts/node/heartbeat_converts_follower_leaderless",
    "chars": 1205,
    "preview": "# A heartbeat from a leader should convert a leaderless follower.\n\ncluster nodes=3\n---\nn1@0 follower() last=0@0 commit=0"
  },
  {
    "path": "src/raft/testscripts/node/heartbeat_converts_leader",
    "chars": 1613,
    "preview": "# A heartbeat from a leader should convert a leader in a past term to a\n# follower.\n\ncluster nodes=3 leader=3\n---\nn1@1 f"
  },
  {
    "path": "src/raft/testscripts/node/heartbeat_lost_append_duplicate",
    "chars": 2462,
    "preview": "# Duplicate heartbeats and responses with a lost append will\n# trigger duplicate resends, but it will eventually resolve"
  },
  {
    "path": "src/raft/testscripts/node/heartbeat_lost_append_multiple",
    "chars": 2824,
    "preview": "# A heartbeat response triggers a probe and resend of lost appends.\n\ncluster nodes=3 leader=1\n---\nn1@1 leader last=1@1 c"
  },
  {
    "path": "src/raft/testscripts/node/heartbeat_lost_append_single",
    "chars": 1657,
    "preview": "# A heartbeat response triggers a resend of a lost append.\n\ncluster nodes=3 leader=1\n---\nn1@1 leader last=1@1 commit=1@1"
  },
  {
    "path": "src/raft/testscripts/node/heartbeat_lost_read",
    "chars": 1509,
    "preview": "# Heartbeats will recover from a lost read message.\n\ncluster nodes=5 leader=1\n---\nn1@1 leader last=1@1 commit=1@1 applie"
  },
  {
    "path": "src/raft/testscripts/node/heartbeat_match_commits",
    "chars": 1962,
    "preview": "# A heartbeat response can advance a follower match index and commit+apply.\n\ncluster nodes=3 leader=1\n---\nn1@1 leader la"
  },
  {
    "path": "src/raft/testscripts/node/heartbeat_multiple_leaders_panic",
    "chars": 732,
    "preview": "# A heartbeat will panic if there are multiple leaders in a term.\n\ncluster nodes=3 leader=1\n---\nn1@1 leader last=1@1 com"
  },
  {
    "path": "src/raft/testscripts/node/heartbeat_old_commit_index",
    "chars": 737,
    "preview": "# A heartbeat with an old commit index is ignored by a follower.\n\ncluster nodes=3 leader=1\n---\nn1@1 leader last=1@1 comm"
  },
  {
    "path": "src/raft/testscripts/node/heartbeat_old_last_index",
    "chars": 733,
    "preview": "# A heartbeat with an old last index is matched by a follower.\n\ncluster nodes=3 leader=1\n---\nn1@1 leader last=1@1 commit"
  },
  {
    "path": "src/raft/testscripts/node/heartbeat_probe_divergent",
    "chars": 5188,
    "preview": "# A heartbeat while the leader is probing a follower with a long divergent tail\n# doesn't disrupt the probing, and won't"
  },
  {
    "path": "src/raft/testscripts/node/old_campaign_rejected",
    "chars": 1719,
    "preview": "# Old campaign messages (in the same term) are ignored by leaders and followers\n# once a leader is elected.\n\ncluster nod"
  },
  {
    "path": "src/raft/testscripts/node/old_campaign_response_ignored",
    "chars": 3321,
    "preview": "# Old campaign responses (in the same term) are ignored by leaders and followers\n# once a leader is elected.\n\ncluster no"
  },
  {
    "path": "src/raft/testscripts/node/old_heartbeat_ignored",
    "chars": 1018,
    "preview": "# A heartbeat from an old leader should be ignored.\n\n# Make n3 leader.\ncluster nodes=3 leader=3\n---\nn1@1 follower(n3) la"
  },
  {
    "path": "src/raft/testscripts/node/request_candidate_abort",
    "chars": 751,
    "preview": "# Client read/write requests fail on candidates.\n\ncluster nodes=3\n---\nn1@0 follower() last=0@0 commit=0@0 applied=0\nn2@0"
  },
  {
    "path": "src/raft/testscripts/node/request_follower",
    "chars": 1603,
    "preview": "# Client read/write requests are proxied by followers.\n\ncluster nodes=3 leader=1\n---\nn1@1 leader last=1@1 commit=1@1 app"
  },
  {
    "path": "src/raft/testscripts/node/request_follower_campaign_abort",
    "chars": 1043,
    "preview": "# A follower aborts in-flight requests when it steps down.\n\ncluster nodes=3 leader=1\n---\nn1@1 leader last=1@1 commit=1@1"
  },
  {
    "path": "src/raft/testscripts/node/request_follower_disconnect_stall",
    "chars": 1187,
    "preview": "# Client read/write requests stall if the follower is disconnected from the\n# leader when the request is submitted. They"
  },
  {
    "path": "src/raft/testscripts/node/request_follower_leaderless_abort",
    "chars": 640,
    "preview": "# Client read/write requests fail on leaderless followers.\n\ncluster nodes=3\n---\nn1@0 follower() last=0@0 commit=0@0 appl"
  },
  {
    "path": "src/raft/testscripts/node/request_leader",
    "chars": 1290,
    "preview": "# Client read/write requests succeed on leaders.\n\ncluster nodes=3 leader=1\n---\nn1@1 leader last=1@1 commit=1@1 applied=1"
  },
  {
    "path": "src/raft/testscripts/node/request_leader_campaign_abort",
    "chars": 1061,
    "preview": "# A leader aborts in-flight requests when it steps down.\n\ncluster nodes=3 leader=1\n---\nn1@1 leader last=1@1 commit=1@1 a"
  },
  {
    "path": "src/raft/testscripts/node/request_leader_change_linearizability",
    "chars": 3125,
    "preview": "# A new leader that's behind on commit/apply shouldn't serve stale reads.\n\ncluster nodes=3 leader=1\n---\nn1@1 leader last"
  },
  {
    "path": "src/raft/testscripts/node/request_leader_disconnect",
    "chars": 1547,
    "preview": "# Client read/write requests succeed if the leader is disconnected from the\n# quorum when the request is submitted but i"
  },
  {
    "path": "src/raft/testscripts/node/request_leader_read_quorum",
    "chars": 948,
    "preview": "# Client read requests are only processed once a quorum confirms the read sequence.\n\ncluster nodes=5 leader=1\n---\nn1@1 l"
  },
  {
    "path": "src/raft/testscripts/node/request_leader_read_quorum_sequence",
    "chars": 2265,
    "preview": "# Client read requests are only served once a quorum confirm the read sequence\n# number, including higher sequence numbe"
  },
  {
    "path": "src/raft/testscripts/node/request_leader_single",
    "chars": 770,
    "preview": "# Client read/write requests succeed on a lone leader.\n\ncluster nodes=1\n---\nn1@1 leader last=1@1 commit=1@1 applied=1 pr"
  },
  {
    "path": "src/raft/testscripts/node/request_status",
    "chars": 2140,
    "preview": "# Status requests return the cluster status.\n\ncluster nodes=3 leader=1\n---\nn1@1 leader last=1@1 commit=1@1 applied=1 pro"
  },
  {
    "path": "src/raft/testscripts/node/request_status_single",
    "chars": 857,
    "preview": "# Status requests return the cluster status on a single node.\n\ncluster nodes=1\n---\nn1@1 leader last=1@1 commit=1@1 appli"
  },
  {
    "path": "src/raft/testscripts/node/restart",
    "chars": 3080,
    "preview": "# Restarting a cluster that's fully caught up retains the existing state and\n# allows trivially electing a new leader.\n\n"
  },
  {
    "path": "src/raft/testscripts/node/restart_apply",
    "chars": 880,
    "preview": "# Restarting a node and wiping its state machine will reapply the state.\n\ncluster nodes=3 leader=1\n---\nn1@1 leader last="
  },
  {
    "path": "src/raft/testscripts/node/restart_commit_recover",
    "chars": 2152,
    "preview": "# Restarting the cluster and wiping the commit indexes allows\n# a new leader to recover the commit index.\n\ncluster nodes"
  },
  {
    "path": "src/raft/testscripts/node/restart_term_vote",
    "chars": 1052,
    "preview": "# The term/vote is retained across a restart.\n\ncluster nodes=3\n---\nn1@0 follower() last=0@0 commit=0@0 applied=0\nn2@0 fo"
  },
  {
    "path": "src/raft/testscripts/node/tick_candidate",
    "chars": 731,
    "preview": "# Ticking a candidate will eventually hold a new election in a later term.\n\ncluster nodes=3 heartbeat_interval=1 electio"
  },
  {
    "path": "src/raft/testscripts/node/tick_follower",
    "chars": 1080,
    "preview": "# Ticking a follower will transition it to candidate if it hasn't\n# heard from the leader in a while.\n\ncluster nodes=3 l"
  },
  {
    "path": "src/raft/testscripts/node/tick_follower_leaderless",
    "chars": 613,
    "preview": "# Ticking a leaderless follower will eventually transition it to candidate.\n\ncluster nodes=3 heartbeat_interval=1 electi"
  },
  {
    "path": "src/raft/testscripts/node/tick_leader",
    "chars": 982,
    "preview": "# Ticking a leader should cause it to emit heartbeats, even when it doesn't\n# hear back from any followers.\n\ncluster nod"
  },
  {
    "path": "src/server.rs",
    "chars": 13318,
    "preview": "use std::collections::HashMap;\nuse std::io::{BufReader, BufWriter, Write as _};\nuse std::net::{TcpListener, TcpStream, T"
  },
  {
    "path": "src/sql/engine/engine.rs",
    "chars": 3732,
    "preview": "use std::collections::{BTreeMap, BTreeSet};\n\nuse crate::errinput;\nuse crate::error::Result;\nuse crate::sql::execution::S"
  },
  {
    "path": "src/sql/engine/local.rs",
    "chars": 15487,
    "preview": "use std::borrow::Cow;\nuse std::collections::{BTreeMap, BTreeSet};\nuse std::slice;\n\nuse itertools::Itertools as _;\nuse se"
  },
  {
    "path": "src/sql/engine/mod.rs",
    "chars": 440,
    "preview": "//! The SQL engine provides SQL data storage and access, as well as session and\n//! transaction management. The `Local` "
  },
  {
    "path": "src/sql/engine/raft.rs",
    "chars": 15188,
    "preview": "use std::borrow::Cow;\nuse std::collections::{BTreeMap, BTreeSet};\n\nuse crossbeam::channel::Sender;\nuse serde::de::Deseri"
  },
  {
    "path": "src/sql/execution/aggregator.rs",
    "chars": 5036,
    "preview": "use std::collections::BTreeMap;\n\nuse itertools::Itertools as _;\n\nuse crate::error::Result;\nuse crate::sql::planner::Aggr"
  },
  {
    "path": "src/sql/execution/executor.rs",
    "chars": 13555,
    "preview": "use std::cmp::Ordering;\nuse std::collections::{BTreeMap, HashMap};\n\nuse itertools::{Itertools as _, izip};\n\nuse super::a"
  },
  {
    "path": "src/sql/execution/join.rs",
    "chars": 6755,
    "preview": "use std::collections::HashMap;\nuse std::iter::Peekable;\n\nuse crate::errinput;\nuse crate::error::Result;\nuse crate::sql::"
  },
  {
    "path": "src/sql/execution/mod.rs",
    "chars": 182,
    "preview": "//! Executes statements and plans.\n\nmod aggregator;\nmod executor;\nmod join;\nmod session;\n\npub use executor::{ExecutionRe"
  },
  {
    "path": "src/sql/execution/session.rs",
    "chars": 8626,
    "preview": "use itertools::Itertools as _;\nuse log::error;\nuse serde::{Deserialize, Serialize};\n\nuse crate::error::{Error, Result};\n"
  },
  {
    "path": "src/sql/mod.rs",
    "chars": 14309,
    "preview": "//! Implements a SQL execution engine. A SQL statement flows through the engine\n//! as follows:\n//!\n//! 1. The `toySQL` "
  },
  {
    "path": "src/sql/parser/ast.rs",
    "chars": 10951,
    "preview": "use std::collections::BTreeMap;\nuse std::hash::{Hash, Hasher};\n\nuse crate::sql::types::DataType;\n\n/// SQL statements are"
  },
  {
    "path": "src/sql/parser/lexer.rs",
    "chars": 16075,
    "preview": "use std::fmt::Display;\nuse std::iter::Peekable;\nuse std::str::Chars;\n\nuse crate::errinput;\nuse crate::error::Result;\n\n//"
  },
  {
    "path": "src/sql/parser/mod.rs",
    "chars": 179,
    "preview": "//! Parses raw SQL strings into a structured Abstract Syntax Tree.\n\npub mod ast;\nmod lexer;\nmod parser;\n\npub use lexer::"
  },
  {
    "path": "src/sql/parser/parser.rs",
    "chars": 35586,
    "preview": "use std::iter::Peekable;\nuse std::ops::Add;\n\nuse super::{Keyword, Lexer, Token, ast};\nuse crate::errinput;\nuse crate::er"
  },
  {
    "path": "src/sql/planner/mod.rs",
    "chars": 307,
    "preview": "//! The planner builds and optimizes an execution plan based on a SQL\n//! statement's Abstract Syntax Tree (AST) generat"
  },
  {
    "path": "src/sql/planner/optimizer.rs",
    "chars": 17699,
    "preview": "use std::collections::HashMap;\nuse std::fmt::Debug;\nuse std::sync::LazyLock;\n\nuse super::Node;\nuse crate::error::Result;"
  },
  {
    "path": "src/sql/planner/plan.rs",
    "chars": 26956,
    "preview": "use std::collections::HashMap;\nuse std::fmt::Display;\n\nuse itertools::Itertools as _;\nuse serde::{Deserialize, Serialize"
  },
  {
    "path": "src/sql/planner/planner.rs",
    "chars": 36516,
    "preview": "use std::collections::{BTreeMap, HashMap, HashSet};\n\nuse itertools::{Either, Itertools as _};\n\nuse super::plan::{Aggrega"
  },
  {
    "path": "src/sql/testscripts/expressions/cnf",
    "chars": 844,
    "preview": "# Tests conversion of logical expressions into canonical normal form.\n\n# Noop for non-boolean expressions.\n[cnf]> 1 + 2\n"
  },
  {
    "path": "src/sql/testscripts/expressions/func",
    "chars": 771,
    "preview": "# Tests function calls.\n\n# Function names are case-insensitive.\n> sqrt(1)\n> SQRT(1)\n---\n1.0\n1.0\n\n# A space is allowed ar"
  },
  {
    "path": "src/sql/testscripts/expressions/func_sqrt",
    "chars": 799,
    "preview": "# Tests sqrt().\n\n# Integers work, and return floats.\n[expr]> sqrt(2)\n[expr]> sqrt(100)\n---\n1.4142135623730951 ← SquareRo"
  },
  {
    "path": "src/sql/testscripts/expressions/literals",
    "chars": 1708,
    "preview": "# Tests parsing and evaluation of literals and constants.\n\n# Boolean and float constants.\ntrue\nfalse\nnull\ninfinity\nnan\n-"
  },
  {
    "path": "src/sql/testscripts/expressions/op_compare_equal",
    "chars": 837,
    "preview": "# Tests the = equality operator.\n\n# Booleans.\n> TRUE = TRUE\n> TRUE = FALSE\n> FALSE = TRUE\n---\nTRUE\nFALSE\nFALSE\n\n# Intege"
  },
  {
    "path": "src/sql/testscripts/expressions/op_compare_greater",
    "chars": 1346,
    "preview": "# Tests the > greater than operator.\n\n# Booleans.\n> TRUE > FALSE\n> FALSE > TRUE\n> TRUE > TRUE\n> FALSE > FALSE\n---\nTRUE\nF"
  },
  {
    "path": "src/sql/testscripts/expressions/op_compare_greater_equal",
    "chars": 1348,
    "preview": "# Tests the >= greater than operator.\n\n# This is implemented as > OR =, just verify this for a few basic cases.\n\n[expr]>"
  },
  {
    "path": "src/sql/testscripts/expressions/op_compare_is_nan",
    "chars": 383,
    "preview": "# Tests the IS NAN equality operator.\n\n> 0.0 IS NAN\n> NAN IS NAN\n> NULL IS NAN\n---\nFALSE\nTRUE\nNULL\n\n!> FALSE IS NAN\n!> 0"
  },
  {
    "path": "src/sql/testscripts/expressions/op_compare_is_null",
    "chars": 186,
    "preview": "# Tests the IS NULL equality operator.\n\n> FALSE IS NULL\n> 0 IS NULL\n> 0.0 IS NULL\n> '' IS NULL\n> 'null' IS NULL\n> NAN IS"
  },
  {
    "path": "src/sql/testscripts/expressions/op_compare_lesser",
    "chars": 1345,
    "preview": "# Tests the < less than operator.\n\n# Booleans.\n> FALSE < TRUE\n> TRUE < FALSE\n> TRUE < TRUE\n> FALSE < FALSE\n---\nTRUE\nFALS"
  },
  {
    "path": "src/sql/testscripts/expressions/op_compare_lesser_equal",
    "chars": 1327,
    "preview": "# Tests the <= less than or equal operator.\n\n# This is implemented as < OR =, just verify this for a few basic cases.\n\n["
  },
  {
    "path": "src/sql/testscripts/expressions/op_compare_not_equal",
    "chars": 772,
    "preview": "# Tests the != inequality operator.\n\n# != is a combination of NOT and =, just verify that for a few basic cases.\n\n[expr]"
  },
  {
    "path": "src/sql/testscripts/expressions/op_logic_and",
    "chars": 862,
    "preview": "# Tests the AND logical operator.\n\n# Basic truth table.\n> TRUE AND TRUE\n> TRUE AND FALSE\n> FALSE AND TRUE\n> FALSE AND FA"
  },
  {
    "path": "src/sql/testscripts/expressions/op_logic_not",
    "chars": 255,
    "preview": "# Tests the NOT logical operator.\n\n> NOT TRUE\n> NOT FALSE\n> NOT NULL\n---\nFALSE\nTRUE\nNULL\n\n# Non-booleans.\n!> NOT 1\n!> NO"
  },
  {
    "path": "src/sql/testscripts/expressions/op_logic_or",
    "chars": 830,
    "preview": "# Tests the OR logical operator.\n\n# Basic truth table.\n> TRUE OR TRUE\n> TRUE OR FALSE\n> FALSE OR TRUE\n> FALSE OR FALSE\n-"
  },
  {
    "path": "src/sql/testscripts/expressions/op_math_add",
    "chars": 1301,
    "preview": "# Tests the + addition operator.\n\n# Simple integer addition.\n[expr]> 1 + 2\n[expr]> 1 + -3\n[expr]> 1 + -2 + 3\n---\n3 ← Add"
  },
  {
    "path": "src/sql/testscripts/expressions/op_math_divide",
    "chars": 1460,
    "preview": "# Tests the / division operator.\n\n# Integers.\n[expr]> 9 / 3\n[expr]> 8 / 3\n[expr]> 8 / -3\n---\n3 ← Divide(Constant(Integer"
  },
  {
    "path": "src/sql/testscripts/expressions/op_math_exponentiate",
    "chars": 1521,
    "preview": "# Tests the ^ exponentiation operator.\n\n# Integers.\n[expr]> 2 ^ 3\n[expr]> 2 ^ 0\n[expr]> 0 ^ 2\n[expr]> 9 ^ -3\n---\n8 ← Exp"
  },
  {
    "path": "src/sql/testscripts/expressions/op_math_factorial",
    "chars": 964,
    "preview": "# Tests the ! factorial suffix operator.\n\n# Integer works.\n[expr]> 3!\n---\n6 ← Factorial(Constant(Integer(3)))\n\n# But flo"
  },
  {
    "path": "src/sql/testscripts/expressions/op_math_identity",
    "chars": 499,
    "preview": "# Tests the + identity prefix operator.\n\n# Integer and float works.\n[expr]> +1\n[expr]> +3.14\n---\n1 ← Identity(Constant(I"
  },
  {
    "path": "src/sql/testscripts/expressions/op_math_multiply",
    "chars": 1172,
    "preview": "# Tests the * multiplication operator.\n\n# Integers.\n[expr]> 2 * 3\n[expr]> 2 * -3\n---\n6 ← Multiply(Constant(Integer(2)), "
  },
  {
    "path": "src/sql/testscripts/expressions/op_math_negate",
    "chars": 536,
    "preview": "# Tests the - negation prefix operator.\n\n# Integer and float works.\n[expr]> -1\n[expr]> -3.14\n---\n-1 ← Negate(Constant(In"
  },
  {
    "path": "src/sql/testscripts/expressions/op_math_remainder",
    "chars": 1180,
    "preview": "# Tests the % remainder operator.\n#\n# Note that remainder is not the same as modulo: the former has the sign of the\n# di"
  },
  {
    "path": "src/sql/testscripts/expressions/op_math_subtract",
    "chars": 1386,
    "preview": "# Tests the - subtraction operator.\n\n# Simple integer subtraction.\n[expr]> 2 - 1\n[expr]> 2 - 3\n[expr]> 1 - -3 - 2\n---\n1 "
  },
  {
    "path": "src/sql/testscripts/expressions/op_precedence",
    "chars": 6584,
    "preview": "# Tests operator precedence. Test each precedence level against the operators\n# beside and immediately below it, in orde"
  },
  {
    "path": "src/sql/testscripts/expressions/op_string_like",
    "chars": 1981,
    "preview": "# Tests the LIKE string pattern matching operator.\n\n# Multi-character matches.\n> 'abcde' LIKE 'a%e'\n> 'abcde' LIKE 'abc%"
  },
  {
    "path": "src/sql/testscripts/optimizers/constant_folder",
    "chars": 2970,
    "preview": "# Tests the constant folding optimizer.\n\n> CREATE TABLE test (id INT PRIMARY KEY, value STRING)\n> INSERT INTO test VALUE"
  },
  {
    "path": "src/sql/testscripts/optimizers/filter_pushdown",
    "chars": 6294,
    "preview": "# Tests filter pushdown.\n\n> CREATE TABLE test (id INT PRIMARY KEY, value STRING)\n> INSERT INTO test VALUES (1, 'a'), (2,"
  },
  {
    "path": "src/sql/testscripts/optimizers/hash_join",
    "chars": 2120,
    "preview": "# Tests the switch to hash joins where appropriate.\n\n> CREATE TABLE test (id INT PRIMARY KEY, value STRING)\n> INSERT INT"
  },
  {
    "path": "src/sql/testscripts/optimizers/index_lookup",
    "chars": 7630,
    "preview": "# Tests the index_lookup optimizer.\n\n> CREATE TABLE test (id INT PRIMARY KEY, value STRING INDEX, \"float\" FLOAT INDEX)\n>"
  },
  {
    "path": "src/sql/testscripts/optimizers/short_circuit",
    "chars": 4368,
    "preview": "# Tests the short circuiting optimizer.\n\n> CREATE TABLE test (id INT PRIMARY KEY, value STRING)\n> INSERT INTO test VALUE"
  },
  {
    "path": "src/sql/testscripts/queries/aggregate",
    "chars": 6372,
    "preview": "# Tests aggregate functions.\n\n> CREATE TABLE test ( \\\n    id INT PRIMARY KEY, \\\n    \"bool\" BOOLEAN, \\\n    \"int\" INTEGER,"
  },
  {
    "path": "src/sql/testscripts/queries/clauses",
    "chars": 2143,
    "preview": "# Tests the ordering of SELECT clauses.\n\n> CREATE TABLE test ( \\\n    id INT PRIMARY KEY, \\\n    \"bool\" BOOLEAN, \\\n    \"fl"
  },
  {
    "path": "src/sql/testscripts/queries/group_by",
    "chars": 6372,
    "preview": "# Tests GROUP BY clauses. See \"aggregate\" for aggregate function tests.\n\n> CREATE TABLE test ( \\\n    id INT PRIMARY KEY,"
  },
  {
    "path": "src/sql/testscripts/queries/having",
    "chars": 3857,
    "preview": "# Tests HAVING clauses. See \"aggregate\" and \"group_by\" for related tests.\n\n> CREATE TABLE test ( \\\n    id INT PRIMARY KE"
  },
  {
    "path": "src/sql/testscripts/queries/join_cross",
    "chars": 43164,
    "preview": "# Tests cross joins.\n\n# Set up a movies dataset.\n> CREATE TABLE countries ( \\\n    id STRING PRIMARY KEY, \\\n    name STRI"
  },
  {
    "path": "src/sql/testscripts/queries/join_inner",
    "chars": 13080,
    "preview": "# Tests inner joins.\n\n# Set up a movies dataset.\n> CREATE TABLE countries ( \\\n    id STRING PRIMARY KEY, \\\n    name STRI"
  },
  {
    "path": "src/sql/testscripts/queries/join_outer",
    "chars": 8208,
    "preview": "# Tests left/right outer joins.\n\n# Set up a movies dataset.\n> CREATE TABLE countries ( \\\n    id STRING PRIMARY KEY, \\\n  "
  },
  {
    "path": "src/sql/testscripts/queries/limit",
    "chars": 1587,
    "preview": "# Tests LIMIT clauses.\n\n> CREATE TABLE test (id INT PRIMARY KEY, value STRING)\n> INSERT INTO test VALUES (1, 'a'), (2, '"
  },
  {
    "path": "src/sql/testscripts/queries/offset",
    "chars": 1648,
    "preview": "# Tests OFFSET clauses.\n\n> CREATE TABLE test (id INT PRIMARY KEY, value STRING)\n> INSERT INTO test VALUES (1, 'a'), (2, "
  },
  {
    "path": "src/sql/testscripts/queries/order",
    "chars": 10901,
    "preview": "# Tests ORDER BY clauses.\n\n# Create a table with representative values of all types.\n> CREATE TABLE test ( \\\n    id INT "
  },
  {
    "path": "src/sql/testscripts/queries/select",
    "chars": 5861,
    "preview": "# Tests the SELECT part of queries.\n\n# Create a basic test table, and a secondary table for join column lookups.\n> CREAT"
  },
  {
    "path": "src/sql/testscripts/queries/where_",
    "chars": 2751,
    "preview": "# Tests basic WHERE clauses.\n\n> CREATE TABLE test (id INT PRIMARY KEY, value STRING)\n> INSERT INTO test VALUES (1, 'a'),"
  },
  {
    "path": "src/sql/testscripts/queries/where_index",
    "chars": 3912,
    "preview": "# Tests WHERE index lookups.\n\n# Create a table with representative values of all types.\n> CREATE TABLE test ( \\\n    id I"
  },
  {
    "path": "src/sql/testscripts/queries/where_primary_key",
    "chars": 2667,
    "preview": "# Tests WHERE index lookups.\n\n# Boolean lookups.\n> CREATE TABLE \"bool\" (id BOOL PRIMARY KEY)\n> INSERT INTO \"bool\" VALUES"
  },
  {
    "path": "src/sql/testscripts/schema/create_table",
    "chars": 1930,
    "preview": "# Tests basic CREATE TABLE functionality.\n\n# The result contains the table name. The table is written to storage. Also\n#"
  },
  {
    "path": "src/sql/testscripts/schema/create_table_datatypes",
    "chars": 1016,
    "preview": "# Tests CREATE TABLE datatypes.\n\n# Create columns with all datatypes.\n> CREATE TABLE datatypes ( \\\n    id INTEGER PRIMAR"
  },
  {
    "path": "src/sql/testscripts/schema/create_table_default",
    "chars": 1791,
    "preview": "# Tests column defaults.\n\n# All datatypes.\n> CREATE TABLE datatypes ( \\\n    id INT PRIMARY KEY, \\\n    \"bool\" BOOLEAN DEF"
  },
  {
    "path": "src/sql/testscripts/schema/create_table_index",
    "chars": 1430,
    "preview": "# Creating a table with an index only results in a single schema entry (no\n# separate index).\n[ops]> CREATE TABLE indexe"
  },
  {
    "path": "src/sql/testscripts/schema/create_table_names",
    "chars": 1899,
    "preview": "# Tests CREATE TABLE table and column name validation.\n\n# A couple of valid names.\n> CREATE TABLE a_123 (a_123 INTEGER P"
  },
  {
    "path": "src/sql/testscripts/schema/create_table_null",
    "chars": 861,
    "preview": "# Tests column nullability.\n\n# All datatypes can be nullable. Their default value is NULL.\n> CREATE TABLE datatypes ( \\\n"
  },
  {
    "path": "src/sql/testscripts/schema/create_table_primary_key",
    "chars": 1033,
    "preview": "# Tests primary keys.\n\n# There must be exactly one primary key.\n!> CREATE TABLE \"primary\" (id INTEGER)\n!> CREATE TABLE \""
  },
  {
    "path": "src/sql/testscripts/schema/create_table_reference",
    "chars": 4102,
    "preview": "# Tests foreign key references during CREATE TABLE.\n\n# Create two reference tables, with int/string primary keys.\n> CREA"
  },
  {
    "path": "src/sql/testscripts/schema/create_table_transaction",
    "chars": 2166,
    "preview": "# Tests that CREATE TABLE is transactional.\n\n> BEGIN\n[ops]> CREATE TABLE name (id INT PRIMARY KEY, value STRING)\n---\nset"
  },
  {
    "path": "src/sql/testscripts/schema/create_table_unique",
    "chars": 1676,
    "preview": "# Creating a table with a unique index only results in a single schema entry (no\n# separate index).\n[ops]> CREATE TABLE "
  },
  {
    "path": "src/sql/testscripts/schema/drop_table",
    "chars": 3829,
    "preview": "# Basic DROP TABLE tests.\n\n> CREATE TABLE name (id INT PRIMARY KEY, value STRING NOT NULL)\n> INSERT INTO name VALUES (1,"
  },
  {
    "path": "src/sql/testscripts/schema/drop_table_index",
    "chars": 18181,
    "preview": "# Tests that DROP TABLE cleans up secondary indexes of all kinds.\n\n> CREATE TABLE \"ref\" (id INT PRIMARY KEY, value STRIN"
  },
  {
    "path": "src/sql/testscripts/schema/drop_table_ref",
    "chars": 626,
    "preview": "# Tests DROP TABLE with references.\n\n# Create a reference table and foreign key table.\n> CREATE TABLE \"ref\" (id INT PRIM"
  }
]

// ... and 84 more files (download for full content)

About this extraction

This page contains the full source code of the erikgrinaker/toydb GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 284 files (1.4 MB), approximately 475.6k tokens, and a symbol index with 962 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Extract another repo