Repository: nomic-io/merk Branch: develop Commit: 898cc4e95df1 Files: 40 Total size: 328.6 KB Directory structure: gitextract_1164yhgf/ ├── .github/ │ └── workflows/ │ └── ci.yml ├── .gitignore ├── CHANGELOG.md ├── Cargo.toml ├── LICENSE ├── README.md ├── benches/ │ ├── merk.rs │ └── ops.rs ├── docs/ │ └── algorithms.md ├── rustfmt.toml ├── scripts/ │ └── pgo.sh └── src/ ├── error.rs ├── lib.rs ├── merk/ │ ├── chunks.rs │ ├── mod.rs │ ├── restore.rs │ └── snapshot.rs ├── owner.rs ├── proofs/ │ ├── chunk.rs │ ├── encoding.rs │ ├── mod.rs │ ├── query/ │ │ ├── map.rs │ │ └── mod.rs │ └── tree.rs ├── test_utils/ │ ├── crash_merk.rs │ ├── mod.rs │ └── temp_merk.rs └── tree/ ├── commit.rs ├── debug.rs ├── encoding.rs ├── fuzz_tests.rs ├── hash.rs ├── iter.rs ├── kv.rs ├── link.rs ├── mod.rs ├── ops.rs └── walk/ ├── fetch.rs ├── mod.rs └── ref_walker.rs ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/workflows/ci.yml ================================================ name: CI on: push: branches: [master, develop] pull_request: branches: [master, develop] env: CARGO_TERM_COLOR: always jobs: test-base: runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@v2 - name: Use Nightly uses: actions-rs/toolchain@v1 with: toolchain: nightly-2024-04-25 override: true - name: Cache uses: actions/cache@v3 with: path: | ~/.cargo/bin/ ~/.cargo/registry/index/ ~/.cargo/registry/cache/ ~/.cargo/git/db/ ~/.cargo/registry/src/**/librocksdb-sys-* target/ key: ${{ runner.os }}-test-base-${{ hashFiles('Cargo.toml') }} - name: Test uses: actions-rs/cargo@v1 with: command: test args: --verbose test-all-features: runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@v2 - name: Use Nightly uses: actions-rs/toolchain@v1 with: toolchain: nightly-2024-04-25 override: true - name: Cache uses: actions/cache@v3 with: path: | ~/.cargo/bin/ ~/.cargo/registry/index/ ~/.cargo/registry/cache/ ~/.cargo/git/db/ ~/.cargo/registry/src/**/librocksdb-sys-* target/ key: ${{ runner.os }}-test-all-features-${{ hashFiles('Cargo.toml') }} - name: Test uses: actions-rs/cargo@v1 with: command: test args: --verbose --all-features coverage: runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@v2 - name: Use Nightly uses: actions-rs/toolchain@v1 with: toolchain: nightly-2024-04-25 components: llvm-tools-preview override: true - name: Cache uses: actions/cache@v3 with: path: | ~/.cargo/bin/ ~/.cargo/registry/index/ ~/.cargo/registry/cache/ ~/.cargo/git/db/ ~/.cargo/registry/src/**/librocksdb-sys-* target/ key: ${{ runner.os }}-coverage-${{ hashFiles('Cargo.toml') }} - name: Install Coverage Tooling uses: actions-rs/cargo@v1 with: command: install args: cargo-llvm-cov --force - name: Run Coverage uses: actions-rs/cargo@v1 with: command: llvm-cov args: --all-features --workspace --lcov --output-path lcov.info - name: Upload to codecov.io uses: codecov/codecov-action@v1 with: token: ${{ secrets.CODECOV_TOKEN }} files: lcov.info fail_ci_if_error: true format: runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@v2 - name: Use Nightly uses: actions-rs/toolchain@v1 with: toolchain: nightly-2024-04-25 components: rustfmt override: true - name: Check uses: actions-rs/cargo@v1 with: command: fmt args: --all -- --check clippy: runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@v2 - name: Use Nightly uses: actions-rs/toolchain@v1 with: toolchain: nightly-2024-04-25 components: clippy override: true - name: Cache uses: actions/cache@v3 with: path: | ~/.cargo/bin/ ~/.cargo/registry/index/ ~/.cargo/registry/cache/ ~/.cargo/git/db/ ~/.cargo/registry/src/**/librocksdb-sys-* target/ key: ${{ runner.os }}-clippy-${{ hashFiles('Cargo.toml') }} - name: Check uses: actions-rs/clippy-check@v1 with: token: ${{ secrets.GITHUB_TOKEN }} args: --all-features -- -D warnings benches: runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@v2 - name: Use Nightly uses: actions-rs/toolchain@v1 with: toolchain: nightly-2024-04-25 override: true - name: Cache uses: actions/cache@v3 with: path: | ~/.cargo/bin/ ~/.cargo/registry/index/ ~/.cargo/registry/cache/ ~/.cargo/git/db/ ~/.cargo/registry/src/**/librocksdb-sys-* target/ key: ${{ runner.os }}-benches-${{ hashFiles('Cargo.toml') }} - name: Run Benches uses: actions-rs/cargo@v1 with: command: bench ================================================ FILE: .gitignore ================================================ target temp.db .DS_Store Cargo.lock ================================================ FILE: CHANGELOG.md ================================================ # Changelog ## [Unreleased] ### Bug Fixes - Fixed bug where column families would be non-atomically flushed when one memtable was filled, resulting in inconsistency after a crash. [Unreleased]: https://github.com/nomic-io/merk/compare/v1.0.0-alpha.8...HEAD ================================================ FILE: Cargo.toml ================================================ [package] name = "merk" description = "High-performance Merkle key/value store" version = "2.0.0" authors = ["Turbofish "] edition = "2018" license = "Apache-2.0" [dependencies] thiserror= "1.0.58" sha2 = "0.10.8" log = "0.4.21" [dependencies.colored] version = "2.1.0" optional = true [dependencies.num_cpus] version = "1.16.0" optional = true [dependencies.ed] version = "0.3.0" optional = true [dependencies.rand] version = "0.8.5" features = ["small_rng"] optional = true [dependencies.rocksdb] version = "0.22.0" default-features = false optional = true [dependencies.jemallocator] version = "0.5.4" features = ["disable_initial_exec_tls"] optional = true [features] default = ["full", "verify"] full = [ "rand", "rocksdb", "colored", "num_cpus", "ed", ] verify = ["ed"] ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: README.md ================================================

merk

*High-performance Merkle key/value store* ![CI](https://github.com/turbofish-org/merk/actions/workflows/ci.yml/badge.svg) [![codecov](https://codecov.io/gh/turbofish-org/merk/branch/develop/graph/badge.svg?token=TTUTSt2iLz)](https://codecov.io/gh/turbofish-org/merk) [![Crate](https://img.shields.io/crates/v/merk.svg)](https://crates.io/crates/merk) [![API](https://docs.rs/merk/badge.svg)](https://docs.rs/merk) Merk is a crypto key/value store - more specifically, it's a Merkle AVL tree built on top of RocksDB (Facebook's fork of LevelDB). Its priorities are performance and reliability. While Merk was designed to be the state database for blockchains, it can also be used anywhere an auditable key/value store is needed. ### Features - **Fast reads/writes** - Reads have no overhead compared to a normal RocksDB store, and writes are optimized for batch operations (e.g. blocks in a blockchain). - **Fast proof generation** - Since Merk implements an AVL tree rather than a trie, it is very efficient to create and verify proofs for ranges of keys. - **Concurrency** - Unlike most other Merkle stores, all operations utilize all available cores - giving huge performance gains and allowing nodes to scale along with Moore's Law. - **Replication** - The tree is optimized to efficiently build proofs of large chunks, allowing for nodes to download the entire state (e.g. "state syncing"). - **Checkpointing** - Merk can create checkpoints on disk (an immutable view of the entire store at a certain point in time) without blocking, so there are no delays in availability or liveness. - **Web-friendly** - Being written in Rust means it is easy to run the proof-verification code in browsers with WebAssembly, allowing for light-clients that can verify data for themselves. - **Fits any Profile** - Performant on RAM-constrained Raspberry Pi's and beefy validator rigs alike. The algorithms are based on AVL, but optimized for batches of operations and random fetches from the backing store. ## Usage **Install:** ``` cargo add merk ``` **Example:** ```rust extern crate merk; use merk::*; // load or create a Merk store at the given path let mut merk = Merk::open("./merk.db").unwrap(); // apply some operations let batch = [ (b"key", Op::Put(b"value")), (b"key2", Op::Put(b"value2")), (b"key3", Op::Put(b"value3")), (b"key4", Op::Delete) ]; merk.apply(&batch).unwrap(); ``` Merk is currently used by [Nomic](https://github.com/nomic-io/nomic), a blockchain powering decentralized custody of Bitcoin, built on [Orga](https://github.com/turbofish-org/orga). ## Benchmarks Benchmarks are measured on a 1M node tree, each node having a key length of 16 bytes and value length of 40 bytes. All tests are single-threaded (not counting RocksDB background threads). You can test these yourself by running `cargo bench`. ### 2017 Macbook Pro *(Using 1 Merk thread and 4 RocksDB compaction threads)* **Pruned (no state kept in memory)** *RAM usage:* ~20MB average, ~26MB max | Test | Ops per second | | -------- | ------ | | Random inserts | 23,000 | | Random updates | 32,000 | | Random deletes | 26,000 | | Random reads | 210,000 | | Random proof generation | 133,000 | **Cached (all state kept in memory)** *RAM usage:* ~400MB average, ~1.1GB max | Test | Ops per second | | -------- | ------ | | Random inserts | 58,000 | | Random updates | 81,000 | | Random deletes | 72,000 | | Random reads | 1,565,000 | | Random proof generation | 311,000 | ### i9-9900K Desktop *(Using 1 Merk thread and 16 RocksDB compaction threads)* **Pruned (no state kept in memory)** *RAM usage:* ~20MB average, ~26MB max | Test | Ops per second | | -------- | ------ | | Random inserts | 40,000 | | Random updates | 55,000 | | Random deletes | 45,000 | | Random reads | 383,000 | | Random proof generation | 249,000 | **Cached (all state kept in memory)** *RAM usage:* ~400MB average, ~1.1GB max | Test | Ops per second | | -------- | ------ | | Random inserts | 93,000 | | Random updates | 123,000 | | Random deletes | 111,000 | | Random reads | 2,370,000 | | Random proof generation | 497,000 | ## Contributing Merk is an open-source project spearheaded by Turbofish. Anyone is able to contribute to Merk via GitHub. [Contribute to Merk](https://github.com/turbofish-org/merk/contribute) ## Security ### Security Audits | Date | Auditor | Scope | Report | | ---: | :---: | :--- | :---: | | October 2024 | Trail of Bits | `orga` `merk` `ed` `abci2` | [📄](https://github.com/trailofbits/publications/blob/master/reviews/2024-11-orgaandmerk-securityreview.pdf) | Vulnerabilities should not be reported through public channels, including GitHub Issues. You can report a vulnerability via GitHub's Private Vulnerability Reporting or to Turbofish at `security@turbofish.org`. [Report a Vulnerability](https://github.com/turbofish-org/merk/security/advisories/new) ## License Licensed under the Apache License, Version 2.0 (the "License"); you may not use the files in this repository except in compliance with the License. You may obtain a copy of the License at https://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. --- Copyright © 2024 Turbofish, Inc. ================================================ FILE: benches/merk.rs ================================================ #![feature(test)] extern crate test; use merk::proofs::encode_into as encode_proof_into; use merk::restore::Restorer; use merk::test_utils::*; use merk::{Merk, Result}; use rand::prelude::*; use std::thread; use test::Bencher; #[bench] fn get_1m_rocksdb(b: &mut Bencher) { let initial_size = 1_000_000; let batch_size = 2_000; let num_batches = initial_size / batch_size; let path = thread::current().name().unwrap().to_owned(); let mut merk = TempMerk::open(path).expect("failed to open merk"); let mut batches = vec![]; for i in 0..num_batches { let batch = make_batch_rand(batch_size, i); unsafe { merk.apply_unchecked(&batch, &[]).expect("apply failed") }; batches.push(batch); } let mut i = 0; b.iter(|| { let batch_index = (i % num_batches) as usize; let key_index = (i / num_batches) as usize; let key = &batches[batch_index][key_index].0; merk.get(key).expect("get failed"); i = (i + 1) % initial_size; }); } #[bench] fn insert_1m_2k_seq_rocksdb_noprune(b: &mut Bencher) { let initial_size = 1_000_000; let batch_size = 2_000; let path = thread::current().name().unwrap().to_owned(); let mut merk = TempMerk::open(path).expect("failed to open merk"); for i in 0..(initial_size / batch_size) { let batch = make_batch_seq((i * batch_size)..((i + 1) * batch_size)); unsafe { merk.apply_unchecked(&batch, &[]).expect("apply failed") }; } let mut i = initial_size / batch_size; b.iter(|| { let batch = make_batch_seq((i * batch_size)..((i + 1) * batch_size)); unsafe { merk.apply_unchecked(&batch, &[]).expect("apply failed") }; i += 1; }); } #[bench] fn insert_1m_2k_rand_rocksdb_noprune(b: &mut Bencher) { let initial_size = 1_000_000; let batch_size = 2_000; let path = thread::current().name().unwrap().to_owned(); let mut merk = TempMerk::open(path).expect("failed to open merk"); for i in 0..(initial_size / batch_size) { let batch = make_batch_rand(batch_size, i); unsafe { merk.apply_unchecked(&batch, &[]).expect("apply failed") }; } let mut i = initial_size / batch_size; b.iter(|| { let batch = make_batch_rand(batch_size, i); unsafe { merk.apply_unchecked(&batch, &[]).expect("apply failed") }; i += 1; }); } #[bench] fn update_1m_2k_seq_rocksdb_noprune(b: &mut Bencher) { let initial_size = 1_000_000; let batch_size = 2_000; let path = thread::current().name().unwrap().to_owned(); let mut merk = TempMerk::open(path).expect("failed to open merk"); for i in 0..(initial_size / batch_size) { let batch = make_batch_seq((i * batch_size)..((i + 1) * batch_size)); unsafe { merk.apply_unchecked(&batch, &[]).expect("apply failed") }; } let mut i = 0; b.iter(|| { let batch = make_batch_seq((i * batch_size)..((i + 1) * batch_size)); unsafe { merk.apply_unchecked(&batch, &[]).expect("apply failed") }; i = (i + 1) % (initial_size / batch_size); }); } #[bench] fn update_1m_2k_rand_rocksdb_noprune(b: &mut Bencher) { let initial_size = 1_000_000; let batch_size = 2_000; let path = thread::current().name().unwrap().to_owned(); let mut merk = TempMerk::open(path).expect("failed to open merk"); for i in 0..(initial_size / batch_size) { let batch = make_batch_rand(batch_size, i); unsafe { merk.apply_unchecked(&batch, &[]).expect("apply failed") }; } let mut i = 0; b.iter(|| { let batch = make_batch_rand(batch_size, i); unsafe { merk.apply_unchecked(&batch, &[]).expect("apply failed") }; i = (i + 1) % (initial_size / batch_size); }); } #[bench] fn delete_1m_2k_rand_rocksdb_noprune(b: &mut Bencher) { let initial_size = 1_000_000; let batch_size = 2_000; let path = thread::current().name().unwrap().to_owned(); let mut merk = TempMerk::open(path).expect("failed to open merk"); for i in 0..(initial_size / batch_size) { let batch = make_batch_rand(batch_size, i); unsafe { merk.apply_unchecked(&batch, &[]).expect("apply failed") }; } let mut i = 0; b.iter(|| { if i >= (initial_size / batch_size) { println!("WARNING: too many bench iterations, whole tree deleted"); return; } let batch = make_del_batch_rand(batch_size, i); unsafe { merk.apply_unchecked(&batch, &[]).expect("apply failed") }; i = (i + 1) % (initial_size / batch_size); }); } #[bench] fn prove_1m_1_rand_rocksdb_noprune(b: &mut Bencher) { let initial_size = 1_000_000; let batch_size = 1_000; let proof_size = 1; let path = thread::current().name().unwrap().to_owned(); let mut merk = TempMerk::open(path).expect("failed to open merk"); for i in 0..(initial_size / batch_size) { let batch = make_batch_rand(batch_size, i); unsafe { merk.apply_unchecked(&batch, &[]).expect("apply failed") }; } let mut i = 0; b.iter(|| { let batch = make_batch_rand(proof_size, i); let mut keys = Vec::with_capacity(batch.len()); for (key, _) in batch { keys.push(merk::proofs::query::QueryItem::Key(key)); } merk.prove(keys).expect("prove failed"); i = (i + 1) % (initial_size / batch_size); merk.commit(std::collections::LinkedList::new(), &[]) .unwrap(); }); } #[bench] fn build_trunk_chunk_1m_1_rand_rocksdb_noprune(b: &mut Bencher) { let initial_size = 1_000_000; let batch_size = 1_000; let path = thread::current().name().unwrap().to_owned(); let mut merk = TempMerk::open(path).expect("failed to open merk"); for i in 0..(initial_size / batch_size) { let batch = make_batch_rand(batch_size, i); unsafe { merk.apply_unchecked(&batch, &[]).expect("apply failed") }; } let mut bytes = vec![]; b.iter(|| { bytes.clear(); let (ops, _) = merk.walk(|walker| walker.unwrap().create_trunk_proof().unwrap()); encode_proof_into(ops.iter(), &mut bytes); merk.commit(std::collections::LinkedList::new(), &[]) .unwrap(); }); b.bytes = bytes.len() as u64; } #[bench] fn chunkproducer_rand_1m_1_rand_rocksdb_noprune(b: &mut Bencher) { let mut rng = rand::thread_rng(); let initial_size = 1_000_000; let batch_size = 1_000; let path = thread::current().name().unwrap().to_owned(); let mut merk = TempMerk::open(path).expect("failed to open merk"); for i in 0..(initial_size / batch_size) { let batch = make_batch_rand(batch_size, i); unsafe { merk.apply_unchecked(&batch, &[]).expect("apply failed") }; } let mut chunks = merk.chunks().unwrap(); let mut total_bytes = 0; let mut i = 0; let mut next = || { let index = rng.gen::() % chunks.len(); chunks.chunk(index).unwrap() }; b.iter(|| { let chunk = next(); total_bytes += chunk.len(); i += 1; }); b.bytes = (total_bytes / i) as u64; } #[bench] fn chunk_iter_1m_1_rand_rocksdb_noprune(b: &mut Bencher) { let initial_size = 1_000_000; let batch_size = 1_000; let path = thread::current().name().unwrap().to_owned(); let mut merk = TempMerk::open(path).expect("failed to open merk"); for i in 0..(initial_size / batch_size) { let batch = make_batch_rand(batch_size, i); unsafe { merk.apply_unchecked(&batch, &[]).expect("apply failed") }; } let mut chunks = merk.chunks().unwrap().into_iter(); let mut total_bytes = 0; let mut i = 0; let mut next = || match chunks.next() { Some(chunk) => chunk, None => { chunks = merk.chunks().unwrap().into_iter(); chunks.next().unwrap() } }; b.iter(|| { let chunk = next(); total_bytes += chunk.unwrap().len(); i += 1; }); b.bytes = (total_bytes / i) as u64; } #[bench] fn restore_1m_1_rand_rocksdb_noprune(b: &mut Bencher) { let initial_size = 1_000_000; let batch_size = 1_000; let path = thread::current().name().unwrap().to_owned(); let mut merk = TempMerk::open(path).expect("failed to open merk"); for i in 0..(initial_size / batch_size) { let batch = make_batch_rand(batch_size, i); unsafe { merk.apply_unchecked(&batch, &[]).expect("apply failed") }; } let chunks = merk .chunks() .unwrap() .into_iter() .collect::>>() .unwrap(); let path = thread::current().name().unwrap().to_owned() + "_restore"; let mut restorer: Option = None; let mut total_bytes = 0; let mut i = 0; b.iter(|| { if i % chunks.len() == 0 { if i != 0 { let restorer_merk = restorer.take().unwrap().finalize(); drop(restorer_merk); std::fs::remove_dir_all(&path).unwrap(); } restorer = Some(Merk::restore(&path, merk.root_hash(), chunks.len()).unwrap()); } let restorer = restorer.as_mut().unwrap(); let chunk = chunks[i % chunks.len()].as_slice(); restorer.process_chunk(chunk).unwrap(); total_bytes += chunk.len(); i += 1; }); std::fs::remove_dir_all(&path).unwrap(); b.bytes = (total_bytes / i) as u64; } #[bench] fn checkpoint_create_destroy_1m_1_rand_rocksdb_noprune(b: &mut Bencher) { let initial_size = 1_000_000; let batch_size = 1_000; let path = thread::current().name().unwrap().to_owned(); let mut merk = TempMerk::open(&path).expect("failed to open merk"); for i in 0..(initial_size / batch_size) { let batch = make_batch_rand(batch_size, i); unsafe { merk.apply_unchecked(&batch, &[]).expect("apply failed") }; } let path = path + ".checkpoint"; b.iter(|| { let checkpoint = merk.checkpoint(&path).unwrap(); checkpoint.destroy().unwrap(); }); } ================================================ FILE: benches/ops.rs ================================================ #![feature(test)] extern crate test; use merk::owner::Owner; use merk::test_utils::*; use test::Bencher; #[bench] fn insert_1m_10k_seq_memonly(b: &mut Bencher) { let initial_size = 1_000_000; let batch_size = 10_000; let mut tree = Owner::new(make_tree_seq(initial_size)); let mut i = initial_size / batch_size; b.iter(|| { let batch = make_batch_seq((i * batch_size)..((i + 1) * batch_size)); tree.own(|tree| apply_memonly_unchecked(tree, &batch)); i += 1; }); } #[bench] fn insert_1m_10k_rand_memonly(b: &mut Bencher) { let initial_size = 1_000_000; let batch_size = 10_000; let mut tree = Owner::new(make_tree_rand(initial_size, batch_size, 0)); let mut i = initial_size / batch_size; b.iter(|| { let batch = make_batch_rand(batch_size, i); tree.own(|tree| apply_memonly_unchecked(tree, &batch)); i += 1; }); } #[bench] fn update_1m_10k_seq_memonly(b: &mut Bencher) { let initial_size = 1_000_000; let batch_size = 10_000; let mut tree = Owner::new(make_tree_seq(initial_size)); let mut i = 0; b.iter(|| { let batch = make_batch_seq((i * batch_size)..((i + 1) * batch_size)); tree.own(|tree| apply_memonly_unchecked(tree, &batch)); i = (i + 1) % (initial_size / batch_size); }); } #[bench] fn update_1m_10k_rand_memonly(b: &mut Bencher) { let initial_size = 1_010_000; let batch_size = 10_000; let mut tree = Owner::new(make_tree_rand(initial_size, batch_size, 0)); let mut i = 0; b.iter(|| { let batch = make_batch_rand(batch_size, i); tree.own(|tree| apply_memonly_unchecked(tree, &batch)); i = (i + 1) % (initial_size / batch_size); }); } ================================================ FILE: docs/algorithms.md ================================================ # Merk - A High-Performance Merkle AVL Tree **Matt Bell ([@mappum](https://twitter.com/mappum))** • [Nomic Hodlings, Inc.](https://nomic.io) v0.0.4 - _August 5, 2020_ ## Introduction Merk is a Merkle AVL tree designed for performance, running on top of a backing key/value store such as RocksDB. Notable features include concurrent operations for higher throughput, an optimized key/value layout for performant usage of the backing store, and efficient proof generation to enable bulk tree replication. _Note that this document is meant to be a way to grok how Merk works, rather than an authoritative specification._ ## Algorithm Overview The Merk tree was inspired by [`tendermint/iavl`](https://github.com/tendermint/iavl) from the [Tendermint](https://tendermint.com) team but makes various fundamental design changes in the name of performance. ### Tree Structure #### Nodes and Hashing In many Merkle tree designs, only leaf nodes contain key/value pairs (inner nodes only contain child hashes). To contrast, every node in a Merk tree contains a key and a value, including inner nodes. Each node contains a "kv hash", which is the hash of its key/value pair, in addition to its child hashes. The hash of the node is just the hash of the concatenation of these three hashes: ``` kv_hash = H(key, value) node_hash = H(kv_hash, left_child_hash, right_child_hash) ``` Note that the `left_child_hash` and/or `right_child_hash` values may be null since it is possible for the node to have no children or only one child. In our implementation, the hash function used is SHA512/256 (SHA512 with output truncated to 256 bits) but this choice is trivially swappable. #### Database Representation In the backing key/value store, nodes are stored using their key/value pair key as the database key, and a binary encoding that contains the fields in the above `Node` structure - minus the `key` field since that is already implied by the database entry. Storing nodes by key rather than by hash is an important optimization, and is the reason why inner nodes each have a key/value pair. The implication is that reading a key does not require traversing through the tree structure but only requires a single read in the backing key/value store, meaning there is practically no overhead versus using the backing store without a tree structure. Additionally, we can efficiently iterate through nodes in the tree in their in-order traversal just by iterating by key in the backing store (which RocksDB and LevelDB are optimized for). This means we lose the "I" compared to the IAVL library - immutability. Since now we operate on the tree nodes in-place in the backing store, we don't by default have views of past states of the tree. However, **in** our implementation we replicate this functionality with RocksDB's snapshot and checkpoint features which provide a consistent view of the store at a certain point in history - either ephemerally in memory or persistently on disk. ### Operations Operating on a Merk tree is optimized for batches - in the real world we will only be updating the tree once per block, applying a batch of many changes from many transactions at the same time. #### Concurrent Batch Operator To mutate the tree, we apply batches of operations, each of which can either be `Put(key, value)` or `Delete(key)`. Batches of operations are expected to be sorted by key, with every key appearing only once. Our implementation provides an `apply` method which sorts the batch and checks for duplicate keys, and an `apply_unchecked` method which skips the sorting/checking step for performance reasons when the caller has already ensured the batch is sorted. The algorithm to apply these operations to the tree is called recursively on each relevant node. _Simplified pseudocode for the operation algorithm:_ - Given a node and a batch of operations: - Binary search for the current node's key in the batch: - If this node's key is found in the batch at index `i`: - Apply the operation to this node: - If operation is `Put`, update its `value` and `kv_hash` - If the operation is `Delete`, perform a traditional BST node removal - Split the batch into left and right sub-batches (excluding the operation we just applied): - Left batch from batch start to index `i` - Right batch from index `i + 1` to the end of the batch - If this node's key is not found in the batch, but could be inserted at index `i` maintaining sorted order: - Split the batch into left and right sub-batches: - Left batch from batch start to index `i` - Right batch from index `i` to the end of the batch - Recurse: - Apply the left sub-batch to this node's left child - Apply the right sub-batch to this node's right child - Balance: - If after recursing the left and right subtrees are unbalanced (their heights differ by more than 1), perform an AVL tree rotation (possibly more than one) - Recompute node's hash based on hash of its updated children and `kv_hash`, then return This batch application of operations can happen concurrently - recursing into the left and right subtrees of a node are two fully independent operations (operations on one subtree will never involve reading or writing to/from any of the nodes on the other subtree). This means we have an _implicit lock_ - we don't need to coordinate with mutexes but only need to wait for both the left side and right side to finish their operations. ### Proofs Merk was designed with efficient proofs in mind, both for application queries (e.g. a user checking their account balance) and bulk tree replication (a.k.a. "state syncing") between untrusted nodes. #### Structure Merk proofs are a list of stack-based operators and node data, with 3 possible operators: `Push(node)`, `Parent`, and `Child`. A stream of these operators can be processed by a verifier in order to reconstruct a sparse representation of part of the tree, in a way where the data can be verified against a known root hash. The value of `node` in a `Push` operation can be one of three types: - `Hash(hash)` - The hash of a node - `KVHash(hash)` - The key/value hash of a node - `KV(key, value)` - The key and value of a node This proof format can be encoded in a binary format and has negligible space overhead for efficient transport over the network. #### Verification A verifier can process a proof by maintaining a stack of connected tree nodes, and executing the operators in order: - `Push(node)` - Push some node data onto the stack. - `Child` - Pop a value from the stack, `child`. Pop another value from the stack, `parent`. Set `child` as the right child of `parent`, and push the combined result back on the stack. - `Parent` - Pop a value from the stack, `parent`. Pop another value from the stack, `child`. Set `child` as the left child of `parent`, and push the combined result back on the stack. Proof verification will fail if e.g. `Child` or `Parent` try to pop a value from the stack but the stack is empty, `Child` or `Parent` try to overwrite an existing child, or the proof does not result in exactly one stack item. This proof language can be used to specify any possible set or subset of the tree's data in a way that can be reconstructed efficiently by the verifier. Proofs can contain either an arbitrary set of selected key/value pairs (e.g. in an application query), or contiguous tree chunks (when replicating the tree). After processing an entire proof, the verifier should have derived a root hash which can be compared to the root hash they expect (e.g. the one validators committed to in consensus), and have a set of proven key/value pairs. Note that this can be computed in a streaming fashion, e.g. while downloading the proof, which makes the required memory for verification very low even for large proofs. However, the verifier cannot tell if the proof is valid until finishing the entire proof, so very large proofs should be broken up into multiple proofs of smaller size. #### Generation Efficient proof generation is important since nodes will likely receive a high volume of queries and constantly be serving proofs, essentially providing an API service to end-user application clients, as well as servicing demand for replication when new nodes come onto the network. Nodes can generate proofs for a set of keys by traversing through the tree from the root and building up the required proof branches. Much like the batch operator aglorithm, this algorithm takes a batch of sorted, unique keys as input. _Simplified pseudocode for proof generation (based on an in-order traversal):_ - Given a node and a batch of keys to include in the proof: - If the batch is empty, append `Push(Hash(node_hash))` to the proof and return - Binary search the for the current node's key in the batch: - If this node's key is found in the batch at index `i`: - Partition the batch into left and right sub-batches at index `i` (excluding index `i`) - If this node's key is not found in the batch, but could be inserted at index `i` maintaining sorted order: - Partition the batch into left and right sub-batches at index `i` - **Recurse left:** If there is a left child: - If the left sub-batch is not empty, query the left child (appending operators to the proof) - If the left sub-batch is empty, append `Push(Hash(left_child_hash))` to the proof - Append proof operator: - If this node's key is in the batch, or if the left sub-batch was not empty and no left child exists, or if the right sub-batch is not empty and no right child exists,or if the left child's right edge queried a non-existent key, or if the right child's left edge queried a non-existent key, append `Push(KV(key, value))` to the proof - Otherwise, append `Push(KVHash(kv_hash))` to the proof - If the left child exists, append `Parent` to the proof - **Recurse right:** If there is a right child: - If the right sub-batch is not empty, query the right child (appending operators to the proof) - If the right sub-batch is empty, append `Push(Hash(left_child_hash))` to the proof - Append `Child` to the proof Since RocksDB allows concurrent reading from a consistent snapshot/checkpoint, nodes can concurrently generate proofs on all cores to service a higher volume of queries, even if our algorithm isn't designed for concurrency. #### Binary Format We can efficiently encode these proofs by encoding each operator as follows: ``` Push(Hash(hash)) => 0x01 <20-byte hash> Push(KVHash(hash)) => 0x02 <20-byte hash> Push(KV(key, value)) => 0x03 <1-byte key length> <2-byte value length> Parent => 0x10 Child => 0x11 ``` This results in a compact binary representation, with a very small space overhead (roughly 2 bytes per node in the proof (1 byte for the `Push` operator type flag, and 1 byte for a `Parent` or `Child` operator), plus 3 bytes per key/value pair (1 byte for the key length, and 2 bytes for the value length)). #### Efficient Chunk Proofs for Replication An alternate, optimized proof generation can be used when generating proofs for large contiguous subtrees, e.g. chunks for tree replication. This works by iterating sequentially through keys in the backing store (which is much faster than random lookups). Based on some early benchmarks, I estimate that typical server hardware should be able to generate this kind of range proof at a rate of hundreds of MB/s, which means the bottleneck for bulk replication will likely be bandwidth rather than CPU. To improve performance further, these proofs can be cached and trivially served by a CDN or a P2P swarm (each node of which can easily verify the chunks they pass around). Due to the tree structure we already use, streaming the entries in key-order gives us all the nodes to construct complete contiguous subtrees. For instance, in the diagram below, streaming from keys `1` to `7` will give us a complete subtree. This subtree can be verified to be a part of the full tree as long as we know the hash of `4`. ``` 8 / \ / ... 4 / \ 2 6 / \ / \ 1 3 5 7 ``` Our algorithm builds verifiable chunks by first constructing a chunk of the upper levels of the tree, called the _trunk chunk_, plus each subtree below that (each of which is called a _leaf chunk_). The number of levels to include in the trunk can be chosen to control the size of the leaf nodes. For example, a tree of height 10 should have approximately 1,023 nodes. If the trunk contains the top 5 levels, the trunk and the 32 resulting leaf nodes will each contain ~31 nodes. We can even prove to the verifier the trunk size was chosen correctly by also including an approximate tree height proof, by including the branch all the way to the leftmost node of the tree (node `1` in the figure) and using this height as our basis to select the number of trunk levels. After the prover builds the trunk by traversing from the root node and making random lookups down to the chosen level, it can generate the leaf nodes extremely efficiently by reading the database keys sequentially as described a few paragraphs above. We can trivially detect when a chunk should end whenever a node at or above the trunk level is encountered (e.g. encountering node `8` signals we have read a complete subtree). The generated proofs can be efficiently encoded into the same proof format described above. Verifiers only have the added constraint that none of the data should be abbridged (all nodes contain a key and value, rather than just a hash or kvhash). After first downloading and verifying the trunk, verifiers can also download leaf chunks in parallel and verify that each connects to the trunk by comparing each subtree's root hash. Note that this algorithm produces proofs with very little memory requirements, plus little overhead added to the sequential read from disk. In a proof-of-concept benchmark, proof generation was measured to be ~750 MB/s on a modern solid-state drive and processor, meaning a 4GB state tree (the size of the Cosmos Hub state at the time of writing) could be fully proven in ~5 seconds (without considering parallelization). In conjunction with the RocksDB checkpoint feature, this process can happen in the background without blocking the node from executing later blocks. _Pseudocode for the range proof generation algorithm:_ - Given a tree and a range of keys to prove: - Create a stack of keys (initially empty) - **Range iteration:** for every key/value entry within the query range in the backing store: - Append `Push(KV(key, value))` to the proof - If the current node has a left child, append `Parent` to the proof - If the current node has a right child, push the right child's key onto the key stack - If the current node does not have a right child: - While the current node's key is greater than or equal to the key at the top of the key stack, append `Child` to the proof and pop from the key stack Note that this algorithm produces the proof in a streaming fashion and has very little memory requirements (the only overhead is the key stack, which will be small even for extremely large trees since its length is a maximum of `log N`). #### Example Proofs Let's walk through a concrete proof example. Consider the following tree: ``` 5 / \ / \ 2 9 / \ / \ 1 4 7 11 / / \ / 3 6 8 10 ``` _Small proof:_ First, let's create a proof for a small part of the tree. Let's say the user makes a query for keys `1, 2, 3, 4`. If we follow our proof generation algorithm, we should get a proof that looks like this: ``` Push(KV(1, )), Push(KV(2, )), Parent, Push(KV(3, )), Push(KV(4, )), Parent, Child, Push(KVHash()), Parent, Push(Hash()), Child ``` Let's step through verification to show that this proof works. We'll create a verification stack, which starts out empty, and walk through each operator in the proof, in order: ``` Stack: (empty) ``` We will push a key/value pair on the stack, creating a node. However, note that for verification purposes this node will only need to contain the kv_hash which we will compute at this step. ``` Operator: Push(KV(1, )) Stack: 1 ``` ``` Operator: Push(KV(2, )) Stack: 1 2 ``` Now we connect nodes 1 and 2, with 2 as the parent. ``` Operator: Parent Stack: 2 / 1 ``` ``` Operator: Push(KV(3, )) Stack: 2 / 1 3 ``` ``` Operator: Push(KV(4, )) Stack: 2 / 1 3 4 ``` ``` Operator: Parent Stack: 2 / 1 4 / 3 ``` Now connect these two graphs with 4 as the child of 2. ``` Operator: Child Stack: 2 / \ 1 4 / 3 ``` Since the user isn't querying the data from node 5, we only need its kv_hash. ``` Operator: Push(KVHash()) Stack: 2 / \ 1 4 / 3 5 ``` ``` Operator: Parent Stack: 5 / 2 / \ 1 4 / 3 ``` We only need the hash of node 9. ``` Operator: Push(Hash()) Stack: 5 / 2 / \ 1 4 / 3 9 ``` ``` Operator: Child Stack: 5 / \ 2 9 / \ 1 4 / 3 ``` Now after going through all these steps, we have sufficient knowlege of the tree's structure and data to compute node hashes in order to verify. At the end, we will have computed a hash for node 5 (the root), and we verify by comparing this hash to the one we expected. ================================================ FILE: rustfmt.toml ================================================ comment_width = 80 wrap_comments = true ================================================ FILE: scripts/pgo.sh ================================================ #!/bin/bash default_host_triple="" default_toolchain="" IFS=" = " while read -r name value do value="${value//\"/}" if [ "${name}" == "default_host_triple" ]; then default_host_triple="${value}" elif [ "${name}" == "default_toolchain" ]; then default_toolchain="${value}" fi done < ~/.rustup/settings.toml echo "default_host_triple=${default_host_triple}" echo "default_toolchain=${default_toolchain}" rustup component add llvm-tools-preview rm -rf /tmp/merk-pgo RUSTFLAGS="-Cprofile-generate=/tmp/merk-pgo" cargo bench rand_rocks ~/.rustup/toolchains/${default_toolchain}/lib/rustlib/${default_host_triple}/bin/llvm-profdata merge -o /tmp/merk-pgo/merged.profdata /tmp/merk-pgo RUSTFLAGS="-Cprofile-use=/tmp/merk-pgo/merged.profdata" cargo bench ================================================ FILE: src/error.rs ================================================ pub use thiserror::Error; #[derive(Error, Debug)] pub enum Error { #[error("Attach Error: {0}")] Attach(String), #[error("Batch Key Error: {0}")] BatchKey(String), #[error("Bound Error: {0}")] Bound(String), #[error("Chunk Processing Error: {0}")] ChunkProcessing(String), #[error(transparent)] Ed(#[from] ed::Error), #[error("Fetch Error: {0}")] Fetch(String), #[error("Proof did not match expected hash\n\tExpected: {0:?}\n\tActual: {1:?}")] HashMismatch([u8; 32], [u8; 32]), #[error("Index OoB Error: {0}")] IndexOutOfBounds(String), #[error("Integer conversion error: {0}")] IntegerConversionError(#[from] std::num::TryFromIntError), #[error(transparent)] IO(#[from] std::io::Error), #[error("Tried to delete non-existent key {0:?}")] KeyDelete(Vec), #[error("Key Error: {0}")] Key(String), #[error("Key not found: {0}")] KeyNotFound(String), #[error("Proof is missing data for query")] MissingData, #[error("Path Error: {0}")] Path(String), #[error("Proof Error: {0}")] Proof(String), #[cfg(feature = "full")] #[error(transparent)] RocksDB(#[from] rocksdb::Error), #[error("Stack Underflow")] StackUnderflow, #[error("Tree Error: {0}")] Tree(String), #[error("Unexpected Node Error: {0}")] UnexpectedNode(String), #[error("Unknown Error")] Unknown, #[error("Version Error: {0}")] Version(String), } pub type Result = std::result::Result; ================================================ FILE: src/lib.rs ================================================ //! A high-performance Merkle key/value store. //! //! Merk is a crypto key/value store - more specifically, it's a Merkle AVL tree //! built on top of RocksDB (Facebook's fork of LevelDB). //! //! Its priorities are performance and reliability. While Merk was designed to //! be the state database for blockchains, it can also be used anywhere an //! auditable key/value store is needed. #![feature(trivial_bounds)] #[global_allocator] #[cfg(feature = "jemallocator")] static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; #[cfg(feature = "full")] pub use rocksdb; /// Error and Result types. mod error; /// The top-level store API. #[cfg(feature = "full")] mod merk; /// Provides a container type that allows temporarily taking ownership of a /// value. // TODO: move this into its own crate pub mod owner; /// Algorithms for generating and verifying Merkle proofs. pub mod proofs; /// Various helpers useful for tests or benchmarks. #[cfg(feature = "full")] pub mod test_utils; /// The core tree data structure. pub mod tree; #[cfg(feature = "full")] pub use crate::merk::{chunks, restore, snapshot, Merk, MerkSource, Snapshot}; pub use error::{Error, Result}; pub use tree::{Batch, BatchEntry, Hash, Op, PanicSource, HASH_LENGTH}; #[allow(deprecated)] pub use proofs::query::verify_query; pub use proofs::query::verify; ================================================ FILE: src/merk/chunks.rs ================================================ //! Provides `ChunkProducer`, which creates chunk proofs for full replication of //! a Merk. use super::Merk; use crate::proofs::{chunk::get_next_chunk, Node, Op}; use crate::{Error, Result}; use ed::Encode; use rocksdb::DBRawIterator; /// A `ChunkProducer` allows the creation of chunk proofs, used for trustlessly /// replicating entire Merk trees. /// /// Chunks can be generated on the fly in a random order, or iterated in order /// for slightly better performance. pub struct ChunkProducer<'a> { trunk: Vec, chunk_boundaries: Vec>, raw_iter: DBRawIterator<'a>, index: usize, } impl<'a> ChunkProducer<'a> { /// Creates a new `ChunkProducer` for the given `Merk` instance. In the /// constructor, the first chunk (the "trunk") will be created. pub fn new(merk: &'a Merk) -> Result { let (trunk, has_more) = merk.walk(|maybe_walker| match maybe_walker { Some(mut walker) => walker.create_trunk_proof(), None => Ok((vec![], false)), })?; let chunk_boundaries = if has_more { trunk .iter() .filter_map(|op| match op { Op::Push(Node::KV(key, _)) => Some(key.clone()), _ => None, }) .collect() } else { vec![] }; let mut raw_iter = merk.raw_iter(); raw_iter.seek_to_first(); Ok(ChunkProducer { trunk, chunk_boundaries, raw_iter, index: 0, }) } /// Gets the chunk with the given index. Errors if the index is out of /// bounds or the tree is empty - the number of chunks can be checked by /// calling `producer.len()`. pub fn chunk(&mut self, index: usize) -> Result> { if index >= self.len() { return Err(Error::IndexOutOfBounds("Chunk index out-of-bounds".into())); } self.index = index; if index == 0 || index == 1 { self.raw_iter.seek_to_first(); } else { let preceding_key = self.chunk_boundaries.get(index - 2).unwrap(); self.raw_iter.seek(preceding_key); self.raw_iter.next(); } self.next_chunk() } /// Returns the total number of chunks for the underlying Merk tree. #[allow(clippy::len_without_is_empty)] pub fn len(&self) -> usize { let boundaries_len = self.chunk_boundaries.len(); if boundaries_len == 0 { 1 } else { boundaries_len + 2 } } /// Gets the next chunk based on the `ChunkProducer`'s internal index state. /// This is mostly useful for letting `ChunkIter` yield the chunks in order, /// optimizing throughput compared to random access. fn next_chunk(&mut self) -> Result> { if self.index == 0 { if self.trunk.is_empty() { return Err(Error::Fetch( "Attempted to fetch chunk on empty tree".into(), )); } self.index += 1; return Ok(self.trunk.encode()?); } assert!(self.index < self.len(), "Called next_chunk after end"); let end_key = self.chunk_boundaries.get(self.index - 1); let end_key_slice = end_key.as_ref().map(|k| k.as_slice()); self.index += 1; let chunk = get_next_chunk(&mut self.raw_iter, end_key_slice)?; Ok(chunk.encode()?) } } impl<'a> IntoIterator for ChunkProducer<'a> { type IntoIter = ChunkIter<'a>; type Item = as Iterator>::Item; fn into_iter(self) -> Self::IntoIter { ChunkIter(self) } } /// A `ChunkIter` iterates through all the chunks for the underlying `Merk` /// instance in order (the first chunk is the "trunk" chunk). Yields `None` /// after all chunks have been yielded. pub struct ChunkIter<'a>(ChunkProducer<'a>); impl<'a> Iterator for ChunkIter<'a> { type Item = Result>; fn size_hint(&self) -> (usize, Option) { (self.0.len(), Some(self.0.len())) } fn next(&mut self) -> Option { if self.0.index >= self.0.len() { None } else { Some(self.0.next_chunk()) } } } impl Merk { /// Creates a `ChunkProducer` which can return chunk proofs for replicating /// the entire Merk tree. pub fn chunks(&self) -> Result { ChunkProducer::new(self) } } #[cfg(test)] mod tests { use super::*; use crate::{ proofs::{ chunk::{verify_leaf, verify_trunk}, Decoder, }, test_utils::*, }; #[test] fn len_small() { let mut merk = TempMerk::new().unwrap(); let batch = make_batch_seq(1..256); merk.apply(batch.as_slice(), &[]).unwrap(); let chunks = merk.chunks().unwrap(); assert_eq!(chunks.len(), 1); assert_eq!(chunks.into_iter().size_hint().0, 1); } #[test] fn len_big() { let mut merk = TempMerk::new().unwrap(); let batch = make_batch_seq(1..10_000); merk.apply(batch.as_slice(), &[]).unwrap(); let chunks = merk.chunks().unwrap(); assert_eq!(chunks.len(), 129); assert_eq!(chunks.into_iter().size_hint().0, 129); } #[test] fn generate_and_verify_chunks() -> Result<()> { let mut merk = TempMerk::new().unwrap(); let batch = make_batch_seq(1..10_000); merk.apply(batch.as_slice(), &[]).unwrap(); let mut chunks = merk.chunks().unwrap().into_iter().map(Result::unwrap); let chunk = chunks.next().unwrap(); let ops = Decoder::new(chunk.as_slice()); let (trunk, height) = verify_trunk(ops).unwrap(); assert_eq!(height, 14); assert_eq!(trunk.hash()?, merk.root_hash()); assert_eq!(trunk.layer(7).count(), 128); for (chunk, node) in chunks.zip(trunk.layer(height / 2)) { let ops = Decoder::new(chunk.as_slice()); verify_leaf(ops, node.hash()?).unwrap(); } Ok(()) } #[test] fn chunks_from_reopen() { let time = std::time::SystemTime::now() .duration_since(std::time::SystemTime::UNIX_EPOCH) .unwrap() .as_nanos(); let path = format!("chunks_from_reopen_{time}.db"); let original_chunks = { let mut merk = Merk::open(&path).unwrap(); let batch = make_batch_seq(1..10); merk.apply(batch.as_slice(), &[]).unwrap(); merk.chunks() .unwrap() .into_iter() .map(Result::unwrap) .collect::>() .into_iter() }; let merk = TempMerk::open(path).unwrap(); let reopen_chunks = merk.chunks().unwrap().into_iter().map(Result::unwrap); for (original, checkpoint) in original_chunks.zip(reopen_chunks) { assert_eq!(original.len(), checkpoint.len()); } } #[test] fn chunks_from_checkpoint() { let mut merk = TempMerk::new().unwrap(); let batch = make_batch_seq(1..10); merk.apply(batch.as_slice(), &[]).unwrap(); let path: std::path::PathBuf = "generate_and_verify_chunks_from_checkpoint.db".into(); if path.exists() { std::fs::remove_dir_all(&path).unwrap(); } let checkpoint = merk.checkpoint(&path).unwrap(); let original_chunks = merk.chunks().unwrap().into_iter().map(Result::unwrap); let checkpoint_chunks = checkpoint.chunks().unwrap().into_iter().map(Result::unwrap); for (original, checkpoint) in original_chunks.zip(checkpoint_chunks) { assert_eq!(original.len(), checkpoint.len()); } std::fs::remove_dir_all(&path).unwrap(); } #[test] fn random_access_chunks() { let mut merk = TempMerk::new().unwrap(); let batch = make_batch_seq(1..111); merk.apply(batch.as_slice(), &[]).unwrap(); let chunks = merk .chunks() .unwrap() .into_iter() .map(Result::unwrap) .collect::>(); let mut producer = merk.chunks().unwrap(); for i in 0..chunks.len() * 2 { let index = i % chunks.len(); assert_eq!(producer.chunk(index).unwrap(), chunks[index]); } } #[test] #[should_panic(expected = "Attempted to fetch chunk on empty tree")] fn test_chunk_empty() { let merk = TempMerk::new().unwrap(); let _chunks = merk .chunks() .unwrap() .into_iter() .map(Result::unwrap) .collect::>(); } #[test] #[should_panic(expected = "Chunk index out-of-bounds")] fn test_chunk_index_oob() { let mut merk = TempMerk::new().unwrap(); let batch = make_batch_seq(1..42); merk.apply(batch.as_slice(), &[]).unwrap(); let mut producer = merk.chunks().unwrap(); let _chunk = producer.chunk(50000).unwrap(); } #[test] fn test_chunk_index_gt_1_access() { let mut merk = TempMerk::new().unwrap(); let batch = make_batch_seq(1..513); merk.apply(batch.as_slice(), &[]).unwrap(); let mut producer = merk.chunks().unwrap(); println!("length: {}", producer.len()); let chunk = producer.chunk(2).unwrap(); assert_eq!( chunk, vec![ 3, 0, 8, 0, 0, 0, 0, 0, 0, 0, 18, 0, 60, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 3, 0, 8, 0, 0, 0, 0, 0, 0, 0, 19, 0, 60, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 16, 3, 0, 8, 0, 0, 0, 0, 0, 0, 0, 20, 0, 60, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 17, 3, 0, 8, 0, 0, 0, 0, 0, 0, 0, 21, 0, 60, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 16, 3, 0, 8, 0, 0, 0, 0, 0, 0, 0, 22, 0, 60, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 3, 0, 8, 0, 0, 0, 0, 0, 0, 0, 23, 0, 60, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 16, 3, 0, 8, 0, 0, 0, 0, 0, 0, 0, 24, 0, 60, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 17, 17, 3, 0, 8, 0, 0, 0, 0, 0, 0, 0, 25, 0, 60, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 16, 3, 0, 8, 0, 0, 0, 0, 0, 0, 0, 26, 0, 60, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 3, 0, 8, 0, 0, 0, 0, 0, 0, 0, 27, 0, 60, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 16, 3, 0, 8, 0, 0, 0, 0, 0, 0, 0, 28, 0, 60, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 17, 3, 0, 8, 0, 0, 0, 0, 0, 0, 0, 29, 0, 60, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 16, 3, 0, 8, 0, 0, 0, 0, 0, 0, 0, 30, 0, 60, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 3, 0, 8, 0, 0, 0, 0, 0, 0, 0, 31, 0, 60, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 16, 3, 0, 8, 0, 0, 0, 0, 0, 0, 0, 32, 0, 60, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 17, 17, 17 ] ); } #[test] #[should_panic(expected = "Called next_chunk after end")] fn test_next_chunk_index_oob() { let mut merk = TempMerk::new().unwrap(); let batch = make_batch_seq(1..42); merk.apply(batch.as_slice(), &[]).unwrap(); let mut producer = merk.chunks().unwrap(); let _chunk1 = producer.next_chunk(); let _chunk2 = producer.next_chunk(); } } ================================================ FILE: src/merk/mod.rs ================================================ pub mod chunks; pub mod restore; pub mod snapshot; use std::cmp::Ordering; use std::collections::LinkedList; use std::path::{Path, PathBuf}; use std::sync::RwLock; use rocksdb::DB; use rocksdb::{checkpoint::Checkpoint, ColumnFamilyDescriptor, WriteBatch}; use crate::error::{Error, Result}; use crate::proofs::{encode_into, query::QueryItem}; use crate::tree::{Batch, Commit, Fetch, GetResult, Hash, Op, RefWalker, Tree, Walker, NULL_HASH}; pub use self::snapshot::Snapshot; const ROOT_KEY_KEY: &[u8] = b"root"; const FORMAT_VERSION_KEY: &[u8] = b"format"; const AUX_CF_NAME: &str = "aux"; const INTERNAL_CF_NAME: &str = "internal"; const FORMAT_VERSION: u64 = 1; fn column_families() -> Vec { vec![ // TODO: clone opts or take args ColumnFamilyDescriptor::new(AUX_CF_NAME, Merk::default_db_opts()), ColumnFamilyDescriptor::new(INTERNAL_CF_NAME, Merk::default_db_opts()), ] } /// A handle to a Merkle key/value store backed by RocksDB. pub struct Merk { pub(crate) tree: RwLock>, pub(crate) db: rocksdb::DB, pub(crate) path: PathBuf, } pub type UseTreeMutResult = Result, Option>)>>; impl Merk { /// Opens a store with the specified file path. If no store exists at that /// path, one will be created. pub fn open>(path: P) -> Result { let db_opts = Merk::default_db_opts(); Merk::open_opt(path, db_opts) } pub fn open_readonly>(path: P) -> Result { let db_opts = Merk::default_db_opts(); let mut path_buf = PathBuf::new(); path_buf.push(path); let db = rocksdb::DB::open_cf_descriptors_read_only( &db_opts, &path_buf, column_families(), false, )?; let format_version = load_format_version(&db)?; if format_version != FORMAT_VERSION { return Err(Error::Version(format!( "Format version mismatch: expected {}, found {}", FORMAT_VERSION, format_version, ))); } Ok(Merk { tree: RwLock::new(load_root(&db)?), db, path: path_buf, }) } /// Opens a store with the specified file path and the given options. If no /// store exists at that path, one will be created. pub fn open_opt

(path: P, db_opts: rocksdb::Options) -> Result where P: AsRef, { let mut path_buf = PathBuf::new(); path_buf.push(path); let mut db = rocksdb::DB::open_cf_descriptors(&db_opts, &path_buf, column_families())?; let format_version = load_format_version(&db)?; if has_root(&db)? { if format_version == 0 { log::info!("Migrating store from version 0 to {}...", FORMAT_VERSION); drop(db); Merk::migrate_from_v0(&path_buf)?; db = rocksdb::DB::open_cf_descriptors(&db_opts, &path_buf, column_families())?; } else if format_version != FORMAT_VERSION { return Err(Error::Version(format!( "Unknown format version: expected <= {}, found {}", FORMAT_VERSION, format_version, ))); } } Ok(Merk { tree: RwLock::new(load_root(&db)?), db, path: path_buf, }) } pub fn open_and_get_aux

(path: P, key: &[u8]) -> Result>> where P: AsRef, { let db_opts = Merk::default_db_opts(); let db = rocksdb::DB::open_cf_descriptors_read_only(&db_opts, path, column_families(), false)?; let aux_cf = db.cf_handle(AUX_CF_NAME).unwrap(); Ok(db.get_cf(aux_cf, key)?) } pub fn default_db_opts() -> rocksdb::Options { let mut opts = rocksdb::Options::default(); opts.create_if_missing(true); opts.create_missing_column_families(true); opts.set_atomic_flush(true); // TODO: tune opts.increase_parallelism(num_cpus::get() as i32); // opts.set_advise_random_on_open(false); opts.set_allow_mmap_writes(true); opts.set_allow_mmap_reads(true); opts.set_max_log_file_size(1_000_000); opts.set_recycle_log_file_num(5); opts.set_keep_log_file_num(5); opts.set_log_level(rocksdb::LogLevel::Warn); opts } /// Gets an auxiliary value. pub fn get_aux(&self, key: &[u8]) -> Result>> { let aux_cf = self.db.cf_handle(AUX_CF_NAME); Ok(self.db.get_cf(aux_cf.unwrap(), key)?) } /// Gets a value for the given key. If the key is not found, `None` is /// returned. /// /// Note that this is essentially the same as a normal RocksDB `get`, so /// should be a fast operation and has almost no tree overhead. pub fn get(&self, key: &[u8]) -> Result>> { self.use_tree(|maybe_tree| { maybe_tree .and_then(|tree| get(tree, self.source(), key).transpose()) .transpose() }) } /// Returns the root hash of the tree (a digest for the entire store which /// proofs can be checked against). If the tree is empty, returns the null /// hash (zero-filled). pub fn root_hash(&self) -> Hash { self.use_tree(root_hash) } /// Applies a batch of operations (puts and deletes) to the tree. /// /// This will fail if the keys in `batch` are not sorted and unique. This /// check creates some overhead, so if you are sure your batch is sorted and /// unique you can use the unsafe `apply_unchecked` for a small performance /// gain. /// /// # Example /// ``` /// # let mut store = merk::test_utils::TempMerk::new().unwrap(); /// # store.apply(&[(vec![4,5,6], Op::Put(vec![0]))], &[]).unwrap(); /// /// use merk::Op; /// /// let batch = &[ /// (vec![1, 2, 3], Op::Put(vec![4, 5, 6])), // puts value [4,5,6] to key [1,2,3] /// (vec![4, 5, 6], Op::Delete) // deletes key [4,5,6] /// ]; /// store.apply(batch, &[]).unwrap(); /// ``` pub fn apply(&mut self, batch: &Batch, aux: &Batch) -> Result<()> { // ensure keys in batch are sorted and unique let mut maybe_prev_key: Option> = None; for (key, _) in batch.iter() { if let Some(prev_key) = maybe_prev_key { match prev_key.cmp(key) { Ordering::Greater => { return Err(Error::BatchKey("Keys in batch must be sorted".into())); } Ordering::Equal => { return Err(Error::BatchKey("Keys in batch must be unique".into())); } _ => (), } } maybe_prev_key = Some(key.to_vec()); } unsafe { self.apply_unchecked(batch, aux) } } /// Applies a batch of operations (puts and deletes) to the tree. /// /// # Safety /// This is unsafe because the keys in `batch` must be sorted and unique - /// if they are not, there will be undefined behavior. For a safe version of /// this method which checks to ensure the batch is sorted and unique, see /// `apply`. /// /// # Example /// ``` /// # let mut store = merk::test_utils::TempMerk::new().unwrap(); /// # store.apply(&[(vec![4,5,6], Op::Put(vec![0]))], &[]).unwrap(); /// /// use merk::Op; /// /// let batch = &[ /// (vec![1, 2, 3], Op::Put(vec![4, 5, 6])), // puts value [4,5,6] to key [1,2,3] /// (vec![4, 5, 6], Op::Delete) // deletes key [4,5,6] /// ]; /// unsafe { store.apply_unchecked(batch, &[]).unwrap() }; /// ``` pub unsafe fn apply_unchecked(&mut self, batch: &Batch, aux: &Batch) -> Result<()> { let mut tree = self.tree.write().unwrap(); let maybe_walker = tree.take().map(|tree| Walker::new(tree, self.source())); let (maybe_tree, deleted_keys) = Walker::apply_to(maybe_walker, batch, self.source())?; *tree = maybe_tree; drop(tree); // commit changes to db self.commit(deleted_keys, aux) } /// Closes the store and deletes all data from disk. pub fn destroy(self) -> Result<()> { let opts = Merk::default_db_opts(); let path = self.path.clone(); drop(self); rocksdb::DB::destroy(&opts, path)?; Ok(()) } /// Completely rebuilds the tree, keeping all the same stored keys and /// values. pub fn repair(self) -> Result { use rocksdb::IteratorMode; let path = self.path.clone(); let create_path = |suffix| { let mut tmp_path = path.clone(); let tmp_file_name = format!("{}-{}", path.file_name().unwrap().to_str().unwrap(), suffix); tmp_path.set_file_name(tmp_file_name); tmp_path }; let tmp_path = create_path("repair1"); let tmp = Merk::open(&tmp_path)?; tmp.destroy()?; // TODO: split up batch let mut node = Tree::new(vec![], vec![])?; let batch: Vec<_> = self .db .iterator(IteratorMode::Start) .map(|entry| { let (key, node_bytes) = entry.unwrap(); // TODO node.decode_into(vec![], &node_bytes); (key.to_vec(), Op::Put(node.value().to_vec())) }) .collect(); let aux_cf = self.db.cf_handle(AUX_CF_NAME).unwrap(); let aux: Vec<_> = self .db .iterator_cf(aux_cf, IteratorMode::Start) .map(|entry| { let (key, value) = entry.unwrap(); // TODO (key.to_vec(), Op::Put(value.to_vec())) }) .collect(); drop(self); let mut tmp = Self::open(&tmp_path)?; tmp.apply(&batch, &aux)?; drop(tmp); let tmp_path2 = create_path("repair2"); std::fs::rename(&path, &tmp_path2)?; std::fs::rename(&tmp_path, &path)?; std::fs::remove_dir_all(&tmp_path2)?; Self::open(path) } pub fn migrate_from_v0>(path: P) -> Result<()> { let path = path.as_ref().to_path_buf(); let db = rocksdb::DB::open_cf_descriptors(&Merk::default_db_opts(), path, column_families())?; let mut iter = db.raw_iterator(); iter.seek_to_first(); while iter.valid() { let key = iter.key().unwrap(); let mut value = iter.value().unwrap(); let node = Tree::decode_v0(&mut value)?; let new_value = node.encode(); db.put(key, new_value.as_slice())?; iter.next(); } db.put_cf( db.cf_handle(INTERNAL_CF_NAME).unwrap(), FORMAT_VERSION_KEY, FORMAT_VERSION.to_be_bytes(), )?; Ok(()) } /// Creates a Merkle proof for the list of queried keys. For each key in the /// query, if the key is found in the store then the value will be proven to /// be in the tree. For each key in the query that does not exist in the /// tree, its absence will be proven by including boundary keys. /// /// The proof returned is in an encoded format which can be verified with /// `merk::verify`. pub fn prove(&self, query: I) -> Result> where Q: Into, I: IntoIterator, { self.use_tree_mut(move |maybe_tree| prove(maybe_tree, self.source(), query)) } pub fn flush(&self) -> Result<()> { Ok(self.db.flush()?) } pub fn commit(&mut self, deleted_keys: LinkedList>, aux: &Batch) -> Result<()> { let internal_cf = self.db.cf_handle(INTERNAL_CF_NAME).unwrap(); let aux_cf = self.db.cf_handle(AUX_CF_NAME).unwrap(); let mut batch = rocksdb::WriteBatch::default(); let mut to_batch = self.use_tree_mut(|maybe_tree| -> UseTreeMutResult { // TODO: concurrent commit if let Some(tree) = maybe_tree { // TODO: configurable committer let mut committer = MerkCommitter::new(tree.height(), 21); tree.commit(&mut committer)?; // update pointer to root node batch.put_cf(internal_cf, ROOT_KEY_KEY, tree.key()); Ok(committer.batch) } else { // empty tree, delete pointer to root batch.delete_cf(internal_cf, ROOT_KEY_KEY); Ok(vec![]) } })?; // TODO: move this to MerkCommitter impl? for key in deleted_keys { to_batch.push((key, None)); } to_batch.sort_by(|a, b| a.0.cmp(&b.0)); for (key, maybe_value) in to_batch { if let Some(value) = maybe_value { batch.put(key, value); } else { batch.delete(key); } } for (key, value) in aux { match value { Op::Put(value) => batch.put_cf(aux_cf, key, value), Op::Delete => batch.delete_cf(aux_cf, key), }; } // update format version // TODO: shouldn't need a write per commit batch.put_cf( internal_cf, FORMAT_VERSION_KEY, FORMAT_VERSION.to_be_bytes(), ); // write to db self.write(batch)?; Ok(()) } pub fn walk(&self, f: impl FnOnce(Option>) -> T) -> T { let mut tree = self.tree.write().unwrap(); let maybe_walker = tree .as_mut() .map(|tree| RefWalker::new(tree, self.source())); f(maybe_walker) } pub fn raw_iter(&self) -> rocksdb::DBRawIterator { self.db.raw_iterator() } pub fn checkpoint>(&self, path: P) -> Result { Checkpoint::new(&self.db)?.create_checkpoint(&path)?; Merk::open(path) } pub fn snapshot(&self) -> Result { Ok(Snapshot::new(self.db.snapshot(), load_root(&self.db)?)) } pub fn db(&self) -> &DB { &self.db } fn source(&self) -> MerkSource { MerkSource { db: &self.db } } fn use_tree(&self, f: impl FnOnce(Option<&Tree>) -> T) -> T { let tree = self.tree.read().unwrap(); f(tree.as_ref()) } fn use_tree_mut(&self, f: impl FnOnce(Option<&mut Tree>) -> T) -> T { let mut tree_slot = self.tree.write().unwrap(); let mut tree = tree_slot.take(); let res = f(tree.as_mut()); *tree_slot = tree; res } pub(crate) fn write(&mut self, batch: WriteBatch) -> Result<()> { let mut opts = rocksdb::WriteOptions::default(); opts.set_sync(false); // TODO: disable WAL once we can ensure consistency with transactions self.db.write_opt(batch, &opts)?; Ok(()) } pub(crate) fn set_root_key(&mut self, key: Vec) -> Result<()> { let internal_cf = self.db.cf_handle(INTERNAL_CF_NAME).unwrap(); let mut batch = WriteBatch::default(); batch.put_cf(internal_cf, ROOT_KEY_KEY, key); self.write(batch) } pub(crate) fn fetch_node(&self, key: &[u8]) -> Result> { self.source().fetch_by_key(key) } pub(crate) fn load_root(&mut self) -> Result<()> { let root = load_root(&self.db)?; *self.tree.write().unwrap() = root; Ok(()) } } #[derive(Clone)] pub struct MerkSource<'a> { db: &'a rocksdb::DB, } impl<'a> Fetch for MerkSource<'a> { fn fetch_by_key(&self, key: &[u8]) -> Result> { Ok(self .db .get_pinned(key)? .map(|bytes| Tree::decode(key.to_vec(), &bytes))) } } struct MerkCommitter { batch: Vec<(Vec, Option>)>, height: u8, levels: u8, } impl MerkCommitter { fn new(height: u8, levels: u8) -> Self { MerkCommitter { batch: Vec::with_capacity(10000), height, levels, } } } impl Commit for MerkCommitter { fn write(&mut self, tree: &Tree) -> Result<()> { let mut buf = Vec::with_capacity(tree.encoding_length()); tree.encode_into(&mut buf); self.batch.push((tree.key().to_vec(), Some(buf))); Ok(()) } fn prune(&self, tree: &Tree) -> (bool, bool) { // keep N top levels of tree let prune = (self.height - tree.height()) >= self.levels; (prune, prune) } } pub fn get(tree: &Tree, source: F, key: &[u8]) -> Result>> { Ok(match tree.get_value(key)? { GetResult::Found(value) => Some(value), GetResult::NotFound => None, GetResult::Pruned => source.fetch_by_key(key)?.map(|node| node.value().to_vec()), }) } fn root_hash(maybe_tree: Option<&Tree>) -> Hash { maybe_tree.map_or(NULL_HASH, |tree| tree.hash()) } fn prove(maybe_tree: Option<&mut Tree>, source: F, query: I) -> Result> where Q: Into, I: IntoIterator, F: Fetch + Send + Clone, { let query_vec: Vec = query.into_iter().map(Into::into).collect(); let tree = maybe_tree.ok_or_else(|| Error::Proof("Cannot create proof for empty tree".into()))?; let mut ref_walker = RefWalker::new(tree, source); let (proof, _) = ref_walker.create_proof(query_vec.as_slice())?; let mut bytes = Vec::with_capacity(128); encode_into(proof.iter(), &mut bytes); Ok(bytes) } fn has_root(db: &DB) -> Result { let internal_cf = db.cf_handle(INTERNAL_CF_NAME).unwrap(); Ok(db.get_pinned_cf(internal_cf, ROOT_KEY_KEY)?.is_some()) } fn load_root(db: &DB) -> Result> { let internal_cf = db.cf_handle(INTERNAL_CF_NAME).unwrap(); db.get_pinned_cf(internal_cf, ROOT_KEY_KEY)? .map(|key| MerkSource { db }.fetch_by_key_expect(key.to_vec().as_slice())) .transpose() } fn load_format_version(db: &DB) -> Result { let internal_cf = db.cf_handle(INTERNAL_CF_NAME).unwrap(); let maybe_version = db.get_pinned_cf(internal_cf, FORMAT_VERSION_KEY)?; let Some(version) = maybe_version else { return Ok(0); }; let mut buf = [0; 8]; buf.copy_from_slice(&version); Ok(u64::from_be_bytes(buf)) } #[cfg(test)] mod test { use super::{Merk, MerkSource, RefWalker}; use crate::test_utils::*; use crate::Op; use std::thread; // TODO: Close and then reopen test fn assert_invariants(merk: &TempMerk) { merk.use_tree(|maybe_tree| { let tree = maybe_tree.expect("expected tree"); assert_tree_invariants(tree); }) } #[test] fn simple_insert_apply() { let batch_size = 20; let path = thread::current().name().unwrap().to_owned(); let mut merk = TempMerk::open(path).expect("failed to open merk"); let batch = make_batch_seq(0..batch_size); merk.apply(&batch, &[]).expect("apply failed"); assert_invariants(&merk); assert_eq!( merk.root_hash(), [ 29, 99, 91, 248, 54, 96, 47, 252, 39, 203, 208, 163, 199, 30, 34, 251, 247, 34, 241, 203, 17, 252, 127, 44, 155, 83, 22, 54, 117, 85, 252, 200 ] ); } #[test] fn insert_uncached() { let batch_size = 20; let path = thread::current().name().unwrap().to_owned(); let mut merk = TempMerk::open(path).expect("failed to open merk"); let batch = make_batch_seq(0..batch_size); merk.apply(&batch, &[]).expect("apply failed"); assert_invariants(&merk); let batch = make_batch_seq(batch_size..(batch_size * 2)); merk.apply(&batch, &[]).expect("apply failed"); assert_invariants(&merk); } #[test] fn insert_rand() { let tree_size = 40; let batch_size = 4; let path = thread::current().name().unwrap().to_owned(); let mut merk = TempMerk::open(path).expect("failed to open merk"); for i in 0..(tree_size / batch_size) { println!("i:{i}"); let batch = make_batch_rand(batch_size, i); merk.apply(&batch, &[]).expect("apply failed"); } } #[test] fn actual_deletes() { let path = thread::current().name().unwrap().to_owned(); let mut merk = TempMerk::open(path).expect("failed to open merk"); let batch = make_batch_rand(10, 1); merk.apply(&batch, &[]).expect("apply failed"); let key = batch.first().unwrap().0.clone(); merk.apply(&[(key.clone(), Op::Delete)], &[]).unwrap(); let value = merk.db.get(key.as_slice()).unwrap(); assert!(value.is_none()); } #[test] fn aux_data() { let path = thread::current().name().unwrap().to_owned(); let mut merk = TempMerk::open(path).expect("failed to open merk"); merk.apply(&[], &[(vec![1, 2, 3], Op::Put(vec![4, 5, 6]))]) .expect("apply failed"); let val = merk.get_aux(&[1, 2, 3]).unwrap(); assert_eq!(val, Some(vec![4, 5, 6])); } #[test] fn simulated_crash() { let path = thread::current().name().unwrap().to_owned(); let mut merk = CrashMerk::open(path).expect("failed to open merk"); merk.apply( &[(vec![0], Op::Put(vec![1]))], &[(vec![2], Op::Put(vec![3]))], ) .expect("apply failed"); // make enough changes so that main column family gets auto-flushed for i in 0..250 { merk.apply(&make_batch_seq(i * 2_000..(i + 1) * 2_000), &[]) .expect("apply failed"); } unsafe { merk.crash().unwrap(); } assert_eq!(merk.get_aux(&[2]).unwrap(), Some(vec![3])); merk.destroy().unwrap(); } #[test] fn get_not_found() { let path = thread::current().name().unwrap().to_owned(); let mut merk = TempMerk::open(path).expect("failed to open merk"); // no root assert!(merk.get(&[1, 2, 3]).unwrap().is_none()); // cached merk.apply(&[(vec![5, 5, 5], Op::Put(vec![]))], &[]) .unwrap(); assert!(merk.get(&[1, 2, 3]).unwrap().is_none()); // uncached merk.apply( &[ (vec![0, 0, 0], Op::Put(vec![])), (vec![1, 1, 1], Op::Put(vec![])), (vec![2, 2, 2], Op::Put(vec![])), ], &[], ) .unwrap(); assert!(merk.get(&[3, 3, 3]).unwrap().is_none()); } #[test] fn reopen() { fn collect(mut node: RefWalker, nodes: &mut Vec>) { nodes.push(node.tree().encode()); node.walk(true) .unwrap() .into_iter() .for_each(|c| collect(c, nodes)); node.walk(false) .unwrap() .into_iter() .for_each(|c| collect(c, nodes)); } let time = std::time::SystemTime::now() .duration_since(std::time::SystemTime::UNIX_EPOCH) .unwrap() .as_nanos(); let path = format!("merk_reopen_{time}.db"); let original_nodes = { let mut merk = Merk::open(&path).unwrap(); let batch = make_batch_seq(1..10_000); merk.apply(batch.as_slice(), &[]).unwrap(); let mut tree = merk.tree.write().unwrap().take().unwrap(); let walker = RefWalker::new(&mut tree, merk.source()); let mut nodes = vec![]; collect(walker, &mut nodes); nodes }; let merk = TempMerk::open(&path).unwrap(); let mut tree = merk.tree.write().unwrap().take().unwrap(); let walker = RefWalker::new(&mut tree, merk.source()); let mut reopen_nodes = vec![]; collect(walker, &mut reopen_nodes); assert_eq!(reopen_nodes, original_nodes); } #[test] fn reopen_iter() { fn collect(iter: &mut rocksdb::DBRawIterator, nodes: &mut Vec<(Vec, Vec)>) { while iter.valid() { nodes.push((iter.key().unwrap().to_vec(), iter.value().unwrap().to_vec())); iter.next(); } } let time = std::time::SystemTime::now() .duration_since(std::time::SystemTime::UNIX_EPOCH) .unwrap() .as_nanos(); let path = format!("merk_reopen_{time}.db"); let original_nodes = { let mut merk = Merk::open(&path).unwrap(); let batch = make_batch_seq(1..10_000); merk.apply(batch.as_slice(), &[]).unwrap(); let mut nodes = vec![]; collect(&mut merk.raw_iter(), &mut nodes); nodes }; let merk = TempMerk::open(&path).unwrap(); let mut reopen_nodes = vec![]; collect(&mut merk.raw_iter(), &mut reopen_nodes); assert_eq!(reopen_nodes, original_nodes); } #[test] fn checkpoint() { let path = thread::current().name().unwrap().to_owned(); let mut merk = TempMerk::open(&path).expect("failed to open merk"); merk.apply(&[(vec![1], Op::Put(vec![0]))], &[]) .expect("apply failed"); let mut checkpoint = merk.checkpoint(path + ".checkpoint").unwrap(); assert_eq!(merk.get(&[1]).unwrap(), Some(vec![0])); assert_eq!(checkpoint.get(&[1]).unwrap(), Some(vec![0])); merk.apply( &[(vec![1], Op::Put(vec![1])), (vec![2], Op::Put(vec![0]))], &[], ) .expect("apply failed"); assert_eq!(merk.get(&[1]).unwrap(), Some(vec![1])); assert_eq!(merk.get(&[2]).unwrap(), Some(vec![0])); assert_eq!(checkpoint.get(&[1]).unwrap(), Some(vec![0])); assert_eq!(checkpoint.get(&[2]).unwrap(), None); checkpoint .apply(&[(vec![2], Op::Put(vec![123]))], &[]) .expect("apply failed"); assert_eq!(merk.get(&[1]).unwrap(), Some(vec![1])); assert_eq!(merk.get(&[2]).unwrap(), Some(vec![0])); assert_eq!(checkpoint.get(&[1]).unwrap(), Some(vec![0])); assert_eq!(checkpoint.get(&[2]).unwrap(), Some(vec![123])); checkpoint.destroy().unwrap(); assert_eq!(merk.get(&[1]).unwrap(), Some(vec![1])); assert_eq!(merk.get(&[2]).unwrap(), Some(vec![0])); } #[test] fn checkpoint_iterator() { let path = thread::current().name().unwrap().to_owned(); let mut merk = TempMerk::open(&path).expect("failed to open merk"); merk.apply(&make_batch_seq(1..100), &[]) .expect("apply failed"); let path: std::path::PathBuf = (path + ".checkpoint").into(); if path.exists() { std::fs::remove_dir_all(&path).unwrap(); } let checkpoint = merk.checkpoint(&path).unwrap(); let mut merk_iter = merk.raw_iter(); let mut checkpoint_iter = checkpoint.raw_iter(); loop { assert_eq!(merk_iter.valid(), checkpoint_iter.valid()); if !merk_iter.valid() { break; } assert_eq!(merk_iter.key(), checkpoint_iter.key()); assert_eq!(merk_iter.value(), checkpoint_iter.value()); merk_iter.next(); checkpoint_iter.next(); } std::fs::remove_dir_all(&path).unwrap(); } #[test] fn repair() { let path = thread::current().name().unwrap().to_owned(); let mut merk = Merk::open(&path).expect("failed to open merk"); merk.apply(&make_batch_seq(0..100), &[]) .expect("apply failed"); let merk = merk.repair().unwrap(); merk.walk(|mut maybe_walker| { fn recurse(maybe_walker: &mut Option>) { if let Some(walker) = maybe_walker { recurse(&mut walker.walk(true).unwrap()); recurse(&mut walker.walk(false).unwrap()); } } recurse(&mut maybe_walker); let walker = maybe_walker.unwrap(); let exp_value = put_entry_value(); for (i, (key, value)) in walker.tree().iter().enumerate() { let exp_key = seq_key(i as u64); assert_eq!(key, exp_key); assert_eq!(value, exp_value); } }); std::fs::remove_dir_all(&path).unwrap(); } } ================================================ FILE: src/merk/restore.rs ================================================ //! Provides `Restorer`, which can create a replica of a Merk instance by //! receiving chunk proofs. use super::Merk; use crate::{ merk::MerkSource, proofs::{ chunk::{verify_leaf, verify_trunk, MIN_TRUNK_HEIGHT}, tree::{Child, Tree as ProofTree}, Decoder, Node, }, tree::{Link, RefWalker, Tree}, Error, Hash, Result, }; use rocksdb::WriteBatch; use std::iter::Peekable; use std::path::Path; /// A `Restorer` handles decoding, verifying, and storing chunk proofs to /// replicate an entire Merk tree. It expects the chunks to be processed in /// order, retrying the last chunk if verification fails. pub struct Restorer { leaf_hashes: Option>>, parent_keys: Option>>>, trunk_height: Option, merk: Merk, expected_root_hash: Hash, stated_length: usize, } impl Restorer { /// Creates a new `Restorer`, which will initialize a new Merk at the given /// file path. The first chunk (the "trunk") will be compared against /// `expected_root_hash`, then each subsequent chunk will be compared /// against the hashes stored in the trunk, so that the restore process will /// never allow malicious peers to send more than a single invalid chunk. /// /// The `stated_length` should be the number of chunks stated by the peer, /// which will be verified after processing a valid first chunk to make it /// easier to download chunks from peers without needing to trust this /// length. pub fn new>( db_path: P, expected_root_hash: Hash, stated_length: usize, ) -> Result { if db_path.as_ref().exists() { return Err(Error::Path("The given path already exists".into())); } Ok(Self { expected_root_hash, stated_length, trunk_height: None, merk: Merk::open(db_path)?, leaf_hashes: None, parent_keys: None, }) } /// Verifies a chunk and writes it to the working RocksDB instance. Expects /// to be called for each chunk in order. Returns the number of remaining /// chunks. /// /// Once there are no remaining chunks to be processed, `finalize` should /// be called. pub fn process_chunk(&mut self, chunk_bytes: &[u8]) -> Result { let ops = Decoder::new(chunk_bytes); match self.leaf_hashes { None => self.process_trunk(ops), Some(_) => self.process_leaf(ops), } } /// Consumes the `Restorer` and returns the newly-created, fully-populated /// Merk instance. This method will return an error if called before /// processing all chunks (e.g. `restorer.remaining_chunks()` is not equal /// to 0). pub fn finalize(mut self) -> Result { if self.remaining_chunks().is_none() || self.remaining_chunks().unwrap() != 0 { return Err(Error::ChunkProcessing( "Called finalize before all chunks were processed".into(), )); } if self.trunk_height.unwrap() >= MIN_TRUNK_HEIGHT { self.rewrite_trunk_child_heights()?; } self.merk.flush()?; self.merk.load_root()?; Ok(self.merk) } /// Returns the number of remaining chunks to be processed. If called before /// the first chunk is processed, this method will return `None` since we do /// not yet have enough information to know about the number of chunks. pub fn remaining_chunks(&self) -> Option { self.leaf_hashes.as_ref().map(|lh| lh.len()) } /// Writes the data contained in `tree` (extracted from a verified chunk /// proof) to the RocksDB. fn write_chunk(&mut self, tree: ProofTree) -> Result<()> { let mut batch = WriteBatch::default(); tree.visit_refs(&mut |proof_node| { let (key, mut node) = match &proof_node.node { // TODO: encode tree node without cloning key/value Node::KV(key, value) => match Tree::new(key.clone(), value.clone()) { Ok(node) => (key, node), Err(_) => return, }, _ => return, }; *node.slot_mut(true) = proof_node.left.as_ref().map(Child::as_link); *node.slot_mut(false) = proof_node.right.as_ref().map(Child::as_link); let bytes = node.encode(); batch.put(key, bytes); }); self.merk.write(batch) } /// Verifies the trunk then writes its data to the RocksDB. /// /// The trunk contains a height proof which lets us verify the total number /// of expected chunks is the same as `stated_length` as passed into /// `Restorer::new()`. We also verify the expected root hash at this step. fn process_trunk(&mut self, ops: Decoder) -> Result { let (trunk, height) = verify_trunk(ops)?; if trunk.hash()? != self.expected_root_hash { return Err(Error::HashMismatch(self.expected_root_hash, trunk.hash()?)); } let root_key = trunk.key().to_vec(); let trunk_height = height / 2; self.trunk_height = Some(trunk_height); let chunks_remaining = if trunk_height >= MIN_TRUNK_HEIGHT { let leaf_hashes = trunk .layer(trunk_height) .map(|node| node.hash()) .collect::>>()? .into_iter() .peekable(); self.leaf_hashes = Some(leaf_hashes); let parent_keys = trunk .layer(trunk_height - 1) .map(|node| node.key().to_vec()) .collect::>>() .into_iter() .peekable(); self.parent_keys = Some(parent_keys); assert_eq!( self.parent_keys.as_ref().unwrap().len(), self.leaf_hashes.as_ref().unwrap().len() / 2 ); let chunks_remaining = (2_usize).pow(trunk_height as u32); assert_eq!(self.remaining_chunks_unchecked(), chunks_remaining); chunks_remaining } else { self.leaf_hashes = Some(vec![].into_iter().peekable()); self.parent_keys = Some(vec![].into_iter().peekable()); 0 }; if self.stated_length != chunks_remaining + 1 { return Err(Error::ChunkProcessing( "Stated length does not match calculated number of chunks".into(), )); } // note that these writes don't happen atomically, which is fine here // because if anything fails during the restore process we will just // scrap the whole restore and start over self.write_chunk(trunk)?; self.merk.set_root_key(root_key)?; Ok(chunks_remaining) } /// Verifies a leaf chunk then writes it to the RocksDB. This needs to be /// called in order, retrying the last chunk for any failed verifications. fn process_leaf(&mut self, ops: Decoder) -> Result { let leaf_hashes = self.leaf_hashes.as_mut().unwrap(); let leaf_hash = leaf_hashes .peek() .expect("Received more chunks than expected"); let leaf = verify_leaf(ops, *leaf_hash)?; self.rewrite_parent_link(&leaf)?; self.write_chunk(leaf)?; let leaf_hashes = self.leaf_hashes.as_mut().unwrap(); leaf_hashes.next(); Ok(self.remaining_chunks_unchecked()) } /// The parent of the root node of the leaf does not know the key of its /// children when it is first written. Now that we have verified this leaf, /// we can write the key into the parent node's entry. Note that this does /// not need to recalcuate hashes since it already had the child hash. fn rewrite_parent_link(&mut self, leaf: &ProofTree) -> Result<()> { let parent_keys = self.parent_keys.as_mut().unwrap(); let parent_key = parent_keys.peek().unwrap().clone(); let mut parent = self .merk .fetch_node(parent_key.as_slice())? .expect("Could not find parent of leaf chunk"); let is_left_child = self.remaining_chunks_unchecked() % 2 == 0; if let Some(Link::Reference { ref mut key, .. }) = parent.link_mut(is_left_child) { *key = leaf.key().to_vec(); } else { panic!("Expected parent links to be type Link::Reference"); }; let parent_bytes = parent.encode(); self.merk.db.put(parent_key, parent_bytes)?; if !is_left_child { let parent_keys = self.parent_keys.as_mut().unwrap(); parent_keys.next(); } Ok(()) } fn rewrite_trunk_child_heights(&mut self) -> Result<()> { fn recurse( mut node: RefWalker, remaining_depth: usize, batch: &mut WriteBatch, ) -> Result<(u8, u8)> { if remaining_depth == 0 { return Ok(node.tree().child_heights()); } let mut cloned_node = Tree::decode(node.tree().key().to_vec(), node.tree().encode().as_slice()); let left_child = node.walk(true)?.unwrap(); let left_child_heights = recurse(left_child, remaining_depth - 1, batch)?; let left_height = left_child_heights.0.max(left_child_heights.1) + 1; *cloned_node.link_mut(true).unwrap().child_heights_mut() = left_child_heights; let right_child = node.walk(false)?.unwrap(); let right_child_heights = recurse(right_child, remaining_depth - 1, batch)?; let right_height = right_child_heights.0.max(right_child_heights.1) + 1; *cloned_node.link_mut(false).unwrap().child_heights_mut() = right_child_heights; let bytes = cloned_node.encode(); batch.put(node.tree().key(), bytes); Ok((left_height, right_height)) } self.merk.flush()?; self.merk.load_root()?; let mut batch = WriteBatch::default(); let depth = self.trunk_height.unwrap(); self.merk.use_tree_mut(|maybe_tree| { let tree = maybe_tree.unwrap(); let walker = RefWalker::new(tree, self.merk.source()); recurse(walker, depth, &mut batch) })?; self.merk.write(batch)?; Ok(()) } /// Returns the number of remaining chunks to be processed. This method will /// panic if called before processing the first chunk (since that chunk /// gives us the information to know how many chunks to expect). pub fn remaining_chunks_unchecked(&self) -> usize { self.leaf_hashes.as_ref().unwrap().len() } } impl Merk { /// Creates a new `Restorer`, which can be used to verify chunk proofs to /// replicate an entire Merk tree. A new Merk instance will be initialized /// by creating a RocksDB at `path`. /// /// The restoration process will verify integrity by checking that the /// incoming chunk proofs match `expected_root_hash`. The `stated_length` /// should be the number of chunks as stated by peers, which will also be /// verified during the restoration process. pub fn restore>( path: P, expected_root_hash: Hash, stated_length: usize, ) -> Result { Restorer::new(path, expected_root_hash, stated_length) } } impl ProofTree { fn child_heights(&self) -> (u8, u8) { ( self.left.as_ref().map_or(0, |c| c.tree.height as u8), self.right.as_ref().map_or(0, |c| c.tree.height as u8), ) } } impl Child { fn as_link(&self) -> Link { let key = match &self.tree.node { Node::KV(key, _) => key.as_slice(), // for the connection between the trunk and leaf chunks, we don't // have the child key so we must first write in an empty one. once // the leaf gets verified, we can write in this key to its parent _ => &[], }; Link::Reference { hash: self.hash, child_heights: self.tree.child_heights(), key: key.to_vec(), } } } #[cfg(test)] mod tests { use super::*; use crate::test_utils::*; use crate::tree::{Batch, Op}; use std::path::PathBuf; fn restore_test(batches: &[&Batch], expected_nodes: usize) { let mut original = TempMerk::new().unwrap(); for batch in batches { original.apply(batch, &[]).unwrap(); } original.flush().unwrap(); let chunks = original.chunks().unwrap(); let path: PathBuf = std::thread::current().name().unwrap().into(); if path.exists() { std::fs::remove_dir_all(&path).unwrap(); } let mut restorer = Merk::restore(&path, original.root_hash(), chunks.len()).unwrap(); assert_eq!(restorer.remaining_chunks(), None); let mut expected_remaining = chunks.len(); for chunk in chunks { let chunk = chunk.unwrap(); let remaining = restorer.process_chunk(chunk.as_slice()).unwrap(); expected_remaining -= 1; assert_eq!(remaining, expected_remaining); assert_eq!(restorer.remaining_chunks().unwrap(), expected_remaining); } assert_eq!(expected_remaining, 0); let restored = restorer.finalize().unwrap(); assert_eq!(restored.root_hash(), original.root_hash()); assert_raw_db_entries_eq(&restored, &original, expected_nodes); std::fs::remove_dir_all(&path).unwrap(); } #[test] fn restore_10000() { restore_test(&[&make_batch_seq(0..10_000)], 10_000); } #[test] fn restore_3() { restore_test(&[&make_batch_seq(0..3)], 3); } #[test] fn restore_2_left_heavy() { restore_test( &[&[(vec![0], Op::Put(vec![]))], &[(vec![1], Op::Put(vec![]))]], 2, ); } #[test] fn restore_2_right_heavy() { restore_test( &[&[(vec![1], Op::Put(vec![]))], &[(vec![0], Op::Put(vec![]))]], 2, ); } #[test] fn restore_1() { restore_test(&[&make_batch_seq(0..1)], 1); } fn assert_raw_db_entries_eq(restored: &Merk, original: &Merk, length: usize) { let mut original_entries = original.raw_iter(); let mut restored_entries = restored.raw_iter(); original_entries.seek_to_first(); restored_entries.seek_to_first(); let mut i = 0; loop { assert_eq!(restored_entries.valid(), original_entries.valid()); if !restored_entries.valid() { break; } assert_eq!(restored_entries.key(), original_entries.key()); assert_eq!(restored_entries.value(), original_entries.value()); restored_entries.next(); original_entries.next(); i += 1; } assert_eq!(i, length); } } ================================================ FILE: src/merk/snapshot.rs ================================================ //! In-memory snapshots of database state. //! //! Snapshots are read-only views of the database state at a particular point in //! time. This can be useful for retaining recent versions of history which can //! be queried against. Merk snapshots are backed by the similar RocksDB //! snapshot, but with the added ability to create proofs. use std::cell::Cell; use crate::{ proofs::query::QueryItem, tree::{Fetch, RefWalker, Tree, NULL_HASH}, Hash, Result, }; /// A read-only view of the database state at a particular point in time. /// /// `Snapshot`s are cheap to create since they are just a handle and don't copy /// any data - they instead just prevent the underlying replaced data from being /// compacted in RocksDB until they are dropped. They are only held in memory, /// and will not be persisted after the process exits. pub struct Snapshot<'a> { /// The underlying RocksDB snapshot. ss: Option>, /// The Merk tree at the time the snapshot was created. tree: Cell>, /// Whether the underlying RocksDB snapshot should be dropped when the /// `Snapshot` is dropped. should_drop_ss: bool, } impl<'a> Snapshot<'a> { /// Creates a new `Snapshot` from a RocksDB snapshot and a Merk tree. /// /// The RocksDB snapshot will be dropped when the [Snapshot] is dropped. pub fn new(db: rocksdb::Snapshot<'a>, tree: Option) -> Self { Snapshot { ss: Some(db), tree: Cell::new(tree), should_drop_ss: true, } } /// Converts the [Snapshot] into a [StaticSnapshot], an alternative which /// has easier (but more dangerous) lifetime requirements. pub fn staticize(mut self) -> StaticSnapshot { let ss: RocksDBSnapshot = unsafe { std::mem::transmute(self.ss.take().unwrap()) }; StaticSnapshot { tree: Cell::new(self.tree.take()), inner: ss.inner, should_drop: false, } } /// Gets the value associated with the given key, from the time the snapshot /// was created. pub fn get(&self, key: &[u8]) -> Result>> { self.use_tree(|maybe_tree| { maybe_tree .and_then(|tree| super::get(tree, self.source(), key).transpose()) .transpose() }) } /// Gets the root hash of the tree at the time the snapshot was created. pub fn root_hash(&self) -> Hash { self.use_tree(|tree| tree.map_or(NULL_HASH, |tree| tree.hash())) } /// Proves the given query against the tree at the time the snapshot was /// created. pub fn prove(&self, query: I) -> Result> where Q: Into, I: IntoIterator, { self.use_tree_mut(move |maybe_tree| super::prove(maybe_tree, self.source(), query)) } /// Walks the tree at the time the snapshot was created, fetching the child /// node from the backing store if necessary. pub fn walk(&self, f: impl FnOnce(Option>) -> T) -> T { let mut tree = self.tree.take(); let maybe_walker = tree .as_mut() .map(|tree| RefWalker::new(tree, self.source())); let res = f(maybe_walker); self.tree.set(tree); res } /// Returns an iterator over the keys and values in the backing store from /// the time the snapshot was created. pub fn raw_iter(&self) -> rocksdb::DBRawIterator { self.ss.as_ref().unwrap().raw_iterator() } /// A data source which can be used to fetch values from the backing store, /// from the time the snapshot was created. fn source(&self) -> SnapshotSource { SnapshotSource(self.ss.as_ref().unwrap()) } /// Uses the tree, and then puts it back. fn use_tree(&self, f: impl FnOnce(Option<&Tree>) -> T) -> T { let tree = self.tree.take(); let res = f(tree.as_ref()); self.tree.set(tree); res } /// Uses the tree mutably, and then puts it back. fn use_tree_mut(&self, f: impl FnOnce(Option<&mut Tree>) -> T) -> T { let mut tree = self.tree.take(); let res = f(tree.as_mut()); self.tree.set(tree); res } } impl<'a> Drop for Snapshot<'a> { fn drop(&mut self) { if !self.should_drop_ss { std::mem::forget(self.ss.take()); } } } /// A data source which can be used to fetch values from the backing store, from /// the time the snapshot was created. /// /// This implements [Fetch] and should be used with a type such as [RefWalker]. #[derive(Clone)] pub struct SnapshotSource<'a>(&'a rocksdb::Snapshot<'a>); impl<'a> Fetch for SnapshotSource<'a> { fn fetch_by_key(&self, key: &[u8]) -> Result> { Ok(self .0 .get(key)? .map(|bytes| Tree::decode(key.to_vec(), &bytes))) } } /// A read-only view of the database state at a particular point in time, but /// with an internal raw pointer to allow for manual lifetime management. /// /// This is useful when you would otherwise want a [Snapshot], but you want to /// use the database while the snapshot is still alive. This is unsafe because /// it is the caller's responsibility to ensure that the underlying RocksDB /// snapshot outlives the [StaticSnapshot]. /// /// By default, the RocksDB snapshot will not be dropped when the /// [StaticSnapshot] is dropped, resulting in a memory leak. For correct usage, /// you must call [StaticSnapshot::drop] to ensure the RocksDB snapshot gets /// dropped when the [StaticSnapshot] is dropped. pub struct StaticSnapshot { /// A Merk tree based on the database state at the time the snapshot was /// created. tree: Cell>, /// A raw pointer to the RocksDB snapshot. inner: *const (), /// Used to detect whether the `StaticSnapshot` was set to manually drop /// before its [Drop::drop] implementation was called. pub should_drop: bool, } /// An equivalent struct to the [rocksdb::Snapshot] struct within the `rocksdb` /// crate. This is used to access the private fields of the foreign crate's /// struct by first transmuting. /// /// To guarantee that breaking changes in the `rocksdb` crate do not affect the /// transmutation into this struct, see the /// [tests::rocksdb_snapshot_struct_format] test. struct RocksDBSnapshot<'a> { /// A reference to the associated RocksDB database. _db: &'a rocksdb::DB, /// A raw pointer to the snapshot handle. inner: *const (), } // We need this because we have a raw pointer to a RocksDB snapshot, but we // know that our usage of it is thread-safe: // https://github.com/facebook/rocksdb/blob/main/include/rocksdb/snapshot.h#L15-L16 unsafe impl Send for StaticSnapshot {} unsafe impl Sync for StaticSnapshot {} impl StaticSnapshot { /// Converts the [StaticSnapshot] to a [Snapshot] by re-associating with the /// database it was originally created from. /// /// # Safety /// This will cause undefined behavior if a database other than the one /// originally used to create the snapshot is passed as an argument. /// /// This will also cause a memory leak if the underlying RocksDB snapshot is /// not dropped by calling [StaticSnapshot::drop]. Unlike most uses of /// [Snapshot], the RocksDB snapshot will not be dropped when the /// [Snapshot] returned by this method is dropped. pub unsafe fn with_db<'a>(&self, db: &'a rocksdb::DB) -> Snapshot<'a> { let db_ss = RocksDBSnapshot { _db: db, inner: self.inner, }; let db_ss: rocksdb::Snapshot<'a> = std::mem::transmute(db_ss); Snapshot { ss: Some(db_ss), tree: self.clone_tree(), should_drop_ss: false, } } /// Drops the [StaticSnapshot] and the underlying RocksDB snapshot. /// /// # Safety /// This function is unsafe because it results in the RocksDB snapshot being /// dropped, which could lead to use-after-free bugs if there are still /// references to the snapshot in other [Snapshot] or [StaticSnapshot] /// instances. The caller must be sure this is the last remaining reference /// before calling this method. pub unsafe fn drop(mut self, db: &rocksdb::DB) { let mut ss = self.with_db(db); ss.should_drop_ss = true; self.should_drop = true; // the snapshot drop implementation is now called, which includes // dropping the RocksDB snapshot } /// Clones the root node of the Merk tree into a new [Tree]. fn clone_tree(&self) -> Cell> { let tree = self.tree.take().unwrap(); let tree_clone = Cell::new(Some(Tree::decode( tree.key().to_vec(), tree.encode().as_slice(), ))); self.tree.set(Some(tree)); tree_clone } } impl Drop for StaticSnapshot { fn drop(&mut self) { if !self.should_drop { log::debug!("StaticSnapshot must be manually dropped"); } } } impl Clone for StaticSnapshot { fn clone(&self) -> Self { Self { tree: self.clone_tree(), inner: self.inner, should_drop: self.should_drop, } } } #[cfg(test)] mod tests { use std::mem::transmute; use super::RocksDBSnapshot; use crate::test_utils::TempMerk; #[test] fn rocksdb_snapshot_struct_format() { assert_eq!(std::mem::size_of::(), 16); let merk = TempMerk::new().unwrap(); let exptected_db_ptr = merk.db() as *const _; let ss = merk.db().snapshot(); let ss: RocksDBSnapshot = unsafe { transmute(ss) }; let db_ptr = ss._db as *const _; assert_eq!(exptected_db_ptr, db_ptr); } } ================================================ FILE: src/owner.rs ================================================ use std::ops::{Deref, DerefMut}; /// A container type which holds a value that may be temporarily owned by a /// consumer. pub struct Owner { inner: Option, } impl Owner { /// Creates a new `Owner` which holds the given value. pub fn new(value: T) -> Owner { Owner { inner: Some(value) } } /// Takes temporary ownership of the contained value by passing it to `f`. /// The function must return a value of the same type (the same value, or a /// new value to take its place). /// /// # Example /// ``` /// # use merk::owner::Owner; /// # struct SomeType(); /// # impl SomeType { /// # fn method_which_requires_ownership(self) -> SomeType { self } /// # } /// # /// let mut owner = Owner::new(SomeType()); /// owner.own(|value| { /// value.method_which_requires_ownership(); /// SomeType() // now give back a value of the same type /// }); /// ``` pub fn own T>(&mut self, f: F) { let old_value = unwrap(self.inner.take()); let new_value = f(old_value); self.inner = Some(new_value); } /// Takes temporary ownership of the contained value by passing it to `f`. /// The function must return a value of the same type (the same value, or a /// new value to take its place). /// /// Like `own`, but uses a tuple return type which allows specifying a value /// to return from the call to `own_return` for convenience. /// /// # Example /// ``` /// # use merk::owner::Owner; /// let mut owner = Owner::new(123); /// let doubled = owner.own_return(|n| (n, n * 2)); /// ``` pub fn own_return(&mut self, f: F) -> R where R: Sized, F: FnOnce(T) -> (T, R), { let old_value = unwrap(self.inner.take()); let (new_value, return_value) = f(old_value); self.inner = Some(new_value); return_value } /// Takes temporary ownership of the contained value by passing it to `f`. /// The function must return a value of the same type (the same value, or a /// new value to take its place). /// /// Like `own`, but with a fallible operation. /// /// # Example /// ``` /// # use merk::owner::Owner; /// # use std::convert::TryFrom; /// let mut owner = Owner::new(123); /// let converted = owner.own_fallible(|n| u32::try_from(n)); /// ``` pub fn own_fallible Result>(&mut self, f: F) -> Result<(), E> { let old_value = unwrap(self.inner.take()); let new_value = f(old_value)?; self.inner = Some(new_value); Ok(()) } /// Sheds the `Owner` container and returns the value it contained. pub fn into_inner(mut self) -> T { unwrap(self.inner.take()) } } impl Deref for Owner { type Target = T; fn deref(&self) -> &T { unwrap(self.inner.as_ref()) } } impl DerefMut for Owner { fn deref_mut(&mut self) -> &mut T { unwrap(self.inner.as_mut()) } } fn unwrap(option: Option) -> T { match option { Some(value) => value, None => unreachable!("value should be Some"), } } // TODO: unit tests ================================================ FILE: src/proofs/chunk.rs ================================================ #[cfg(feature = "full")] use { super::tree::{execute, Tree as ProofTree}, crate::tree::Hash, crate::tree::Tree, rocksdb::DBRawIterator, }; use super::{Node, Op}; use crate::error::{Error, Result}; use crate::tree::{Fetch, RefWalker}; /// The minimum number of layers the trunk will be guaranteed to have before /// splitting into multiple chunks. /// /// If the tree's height is less than double this value, the trunk should be /// verified as a leaf chunk. pub const MIN_TRUNK_HEIGHT: usize = 5; impl<'a, S> RefWalker<'a, S> where S: Fetch + Sized + Send + Clone, { /// Generates a trunk proof by traversing the tree. /// /// Returns a tuple containing the produced proof, and a boolean indicating /// whether or not there will be more chunks to follow. If the chunk /// contains the entire tree, the boolean will be `false`, if the chunk /// is abdriged and will be connected to leaf chunks, it will be `true`. pub fn create_trunk_proof(&mut self) -> Result<(Vec, bool)> { let approx_size = 2usize.pow((self.tree().height() / 2) as u32) * 3; let mut proof = Vec::with_capacity(approx_size); let trunk_height = self.traverse_for_height_proof(&mut proof, 1)?; if trunk_height < MIN_TRUNK_HEIGHT { proof.clear(); self.traverse_for_trunk(&mut proof, usize::MAX, true)?; Ok((proof, false)) } else { self.traverse_for_trunk(&mut proof, trunk_height, true)?; Ok((proof, true)) } } /// Traverses down the left edge of the tree and pushes ops to the proof, to /// act as a proof of the height of the tree. This is the first step in /// generating a trunk proof. fn traverse_for_height_proof(&mut self, proof: &mut Vec, depth: usize) -> Result { let maybe_left = self.walk(true)?; let has_left_child = maybe_left.is_some(); let trunk_height = if let Some(mut left) = maybe_left { left.traverse_for_height_proof(proof, depth + 1)? } else { depth / 2 }; if depth > trunk_height { proof.push(Op::Push(self.to_kvhash_node())); if has_left_child { proof.push(Op::Parent); } if let Some(right) = self.tree().link(false) { proof.push(Op::Push(Node::Hash(*right.hash()))); proof.push(Op::Child); } } Ok(trunk_height) } /// Traverses down the tree and adds KV push ops for all nodes up to a /// certain depth. This expects the proof to contain a height proof as /// generated by `traverse_for_height_proof`. fn traverse_for_trunk( &mut self, proof: &mut Vec, remaining_depth: usize, is_leftmost: bool, ) -> Result<()> { if remaining_depth == 0 { // return early if we have reached bottom of trunk // for leftmost node, we already have height proof if is_leftmost { return Ok(()); } // add this node's hash proof.push(Op::Push(self.to_hash_node())); return Ok(()); } // traverse left let has_left_child = self.tree().link(true).is_some(); if has_left_child { let mut left = self.walk(true)?.unwrap(); left.traverse_for_trunk(proof, remaining_depth - 1, is_leftmost)?; } // add this node's data proof.push(Op::Push(self.to_kv_node())); if has_left_child { proof.push(Op::Parent); } // traverse right if let Some(mut right) = self.walk(false)? { right.traverse_for_trunk(proof, remaining_depth - 1, false)?; proof.push(Op::Child); } Ok(()) } } /// Builds a chunk proof by iterating over values in a RocksDB, ending the chunk /// when a node with key `end_key` is encountered. /// /// Advances the iterator for all nodes in the chunk and the `end_key` (if any). #[cfg(feature = "full")] pub(crate) fn get_next_chunk(iter: &mut DBRawIterator, end_key: Option<&[u8]>) -> Result> { let mut chunk = Vec::with_capacity(512); let mut stack = Vec::with_capacity(32); let mut node = Tree::new(vec![], vec![])?; while iter.valid() { let key = iter.key().unwrap(); if let Some(end_key) = end_key { if key == end_key { break; } } let encoded_node = iter.value().unwrap(); Tree::decode_into(&mut node, vec![], encoded_node); let kv = Node::KV(key.to_vec(), node.value().to_vec()); chunk.push(Op::Push(kv)); if node.link(true).is_some() { chunk.push(Op::Parent); } if let Some(child) = node.link(false) { stack.push(child.key().to_vec()); } else { while let Some(top_key) = stack.last() { if key < top_key.as_slice() { break; } stack.pop(); chunk.push(Op::Child); } } iter.next(); } if iter.valid() { iter.next(); } Ok(chunk) } /// Verifies a leaf chunk proof by executing its operators. Checks that there /// were no abridged nodes (Hash or KVHash) and the proof hashes to /// `expected_hash`. #[cfg(feature = "full")] pub(crate) fn verify_leaf>>( ops: I, expected_hash: Hash, ) -> Result { let tree = execute(ops, false, |node| match node { Node::KV(_, _) => Ok(()), _ => Err(Error::Tree("Leaf chunks must contain full subtree".into())), })?; if tree.hash()? != expected_hash { return Err(Error::HashMismatch(expected_hash, tree.hash()?)); } Ok(tree) } /// Verifies a trunk chunk proof by executing its operators. Ensures the /// resulting tree contains a valid height proof, the trunk is the correct /// height, and all of its inner nodes are not abridged. Returns the tree and /// the height given by the height proof. #[cfg(feature = "full")] pub(crate) fn verify_trunk>>(ops: I) -> Result<(ProofTree, usize)> { fn verify_height_proof(tree: &ProofTree) -> Result { let mut height = 1; let mut cursor = tree; while let Some(child) = cursor.child(true) { if let Node::Hash(_) = child.tree.node { return Err(Error::UnexpectedNode( "Expected height proof to only contain KV and KVHash nodes" .into(), )); } height += 1; cursor = &child.tree; } Ok(height) } fn verify_completeness(tree: &ProofTree, remaining_depth: usize, leftmost: bool) -> Result<()> { let recurse = |left, leftmost| { if let Some(child) = tree.child(left) { verify_completeness(&child.tree, remaining_depth - 1, left && leftmost)?; } Ok(()) }; if remaining_depth > 0 { match tree.node { Node::KV(_, _) => {} _ => { return Err(Error::UnexpectedNode( "Expected trunk inner nodes to contain keys and values".into(), )); } } recurse(true, leftmost)?; recurse(false, false) } else if !leftmost { match tree.node { Node::Hash(_) => Ok(()), _ => Err(Error::UnexpectedNode( "Expected trunk leaves to contain Hash nodes".into(), )), } } else { match &tree.node { Node::KVHash(_) => Ok(()), _ => Err(Error::UnexpectedNode( "Expected leftmost trunk leaf to contain KVHash node".into(), )), } } } let mut kv_only = true; let tree = execute(ops, false, |node| { kv_only &= matches!(node, Node::KV(_, _)); Ok(()) })?; let height = verify_height_proof(&tree)?; if height > 64 { // This is a sanity check to prevent stack overflows in `verify_completeness`, // but any tree above 64 is probably an error (~3.7e19 nodes). return Err(Error::Tree("Tree is too large".into())); } let trunk_height = height / 2; if trunk_height < MIN_TRUNK_HEIGHT { if !kv_only { return Err(Error::Tree("Leaf chunks must contain full subtree".into())); } } else { verify_completeness(&tree, trunk_height, true)?; } Ok((tree, height)) } #[cfg(test)] mod tests { use super::super::tree::Tree; use super::*; use crate::test_utils::*; use crate::tree::{NoopCommit, PanicSource, Tree as BaseTree}; use ed::Encode; #[derive(Default)] struct NodeCounts { hash: usize, kvhash: usize, kv: usize, } fn count_node_types(tree: Tree) -> NodeCounts { let mut counts = NodeCounts::default(); tree.visit_nodes(&mut |node| { match node { Node::Hash(_) => counts.hash += 1, Node::KVHash(_) => counts.kvhash += 1, Node::KV(_, _) => counts.kv += 1, }; }); counts } #[test] fn small_trunk_roundtrip() { let mut tree = make_tree_seq(31); let mut walker = RefWalker::new(&mut tree, PanicSource {}); let (proof, has_more) = walker.create_trunk_proof().unwrap(); assert!(!has_more); println!("{:?}", &proof); let (trunk, _) = verify_trunk(proof.into_iter().map(Ok)).unwrap(); let counts = count_node_types(trunk); assert_eq!(counts.hash, 0); assert_eq!(counts.kv, 32); assert_eq!(counts.kvhash, 0); } #[test] fn big_trunk_roundtrip() { let mut tree = make_tree_seq(2u64.pow(MIN_TRUNK_HEIGHT as u32 * 2 + 1) - 1); let mut walker = RefWalker::new(&mut tree, PanicSource {}); let (proof, has_more) = walker.create_trunk_proof().unwrap(); assert!(has_more); let (trunk, _) = verify_trunk(proof.into_iter().map(Ok)).unwrap(); let counts = count_node_types(trunk); // are these formulas correct for all values of `MIN_TRUNK_HEIGHT`? 🤔 assert_eq!( counts.hash, 2usize.pow(MIN_TRUNK_HEIGHT as u32) + MIN_TRUNK_HEIGHT - 1 ); assert_eq!(counts.kv, 2usize.pow(MIN_TRUNK_HEIGHT as u32) - 1); assert_eq!(counts.kvhash, MIN_TRUNK_HEIGHT + 1); } #[test] fn one_node_tree_trunk_roundtrip() -> Result<()> { let mut tree = BaseTree::new(vec![0], vec![])?; tree.commit(&mut NoopCommit {}).unwrap(); let mut walker = RefWalker::new(&mut tree, PanicSource {}); let (proof, has_more) = walker.create_trunk_proof().unwrap(); assert!(!has_more); let (trunk, _) = verify_trunk(proof.into_iter().map(Ok)).unwrap(); let counts = count_node_types(trunk); assert_eq!(counts.hash, 0); assert_eq!(counts.kv, 1); assert_eq!(counts.kvhash, 0); Ok(()) } #[test] fn two_node_right_heavy_tree_trunk_roundtrip() -> Result<()> { // 0 // \ // 1 let mut tree = BaseTree::new(vec![0], vec![])?.attach(false, Some(BaseTree::new(vec![1], vec![])?)); tree.commit(&mut NoopCommit {}).unwrap(); let mut walker = RefWalker::new(&mut tree, PanicSource {}); let (proof, has_more) = walker.create_trunk_proof().unwrap(); assert!(!has_more); let (trunk, _) = verify_trunk(proof.into_iter().map(Ok)).unwrap(); let counts = count_node_types(trunk); assert_eq!(counts.hash, 0); assert_eq!(counts.kv, 2); assert_eq!(counts.kvhash, 0); Ok(()) } #[test] fn two_node_left_heavy_tree_trunk_roundtrip() -> Result<()> { // 1 // / // 0 let mut tree = BaseTree::new(vec![1], vec![])?.attach(true, Some(BaseTree::new(vec![0], vec![])?)); tree.commit(&mut NoopCommit {}).unwrap(); let mut walker = RefWalker::new(&mut tree, PanicSource {}); let (proof, has_more) = walker.create_trunk_proof().unwrap(); assert!(!has_more); let (trunk, _) = verify_trunk(proof.into_iter().map(Ok)).unwrap(); let counts = count_node_types(trunk); assert_eq!(counts.hash, 0); assert_eq!(counts.kv, 2); assert_eq!(counts.kvhash, 0); Ok(()) } #[test] fn three_node_tree_trunk_roundtrip() -> Result<()> { // 1 // / \ // 0 2 let mut tree = BaseTree::new(vec![1], vec![])? .attach(true, Some(BaseTree::new(vec![0], vec![])?)) .attach(false, Some(BaseTree::new(vec![2], vec![])?)); tree.commit(&mut NoopCommit {}).unwrap(); let mut walker = RefWalker::new(&mut tree, PanicSource {}); let (proof, has_more) = walker.create_trunk_proof().unwrap(); assert!(!has_more); let (trunk, _) = verify_trunk(proof.into_iter().map(Ok)).unwrap(); let counts = count_node_types(trunk); assert_eq!(counts.hash, 0); assert_eq!(counts.kv, 3); assert_eq!(counts.kvhash, 0); Ok(()) } #[test] fn leaf_chunk_roundtrip() { let mut merk = TempMerk::new().unwrap(); let batch = make_batch_seq(0..31); merk.apply(batch.as_slice(), &[]).unwrap(); let root_node = merk.tree.read().unwrap(); let root_key = root_node.as_ref().unwrap().key().to_vec(); // whole tree as 1 leaf let mut iter = merk.db.raw_iterator(); iter.seek_to_first(); let chunk = get_next_chunk(&mut iter, None).unwrap(); let ops = chunk.into_iter().map(Ok); let chunk = verify_leaf(ops, merk.root_hash()).unwrap(); let counts = count_node_types(chunk); assert_eq!(counts.kv, 31); assert_eq!(counts.hash, 0); assert_eq!(counts.kvhash, 0); drop(iter); let mut iter = merk.db.raw_iterator(); iter.seek_to_first(); // left leaf let chunk = get_next_chunk(&mut iter, Some(root_key.as_slice())).unwrap(); let ops = chunk.into_iter().map(Ok); let chunk = verify_leaf( ops, [ 222, 93, 128, 149, 117, 136, 34, 175, 204, 82, 228, 113, 242, 144, 152, 190, 210, 27, 195, 34, 24, 196, 210, 99, 250, 119, 219, 114, 52, 167, 191, 249, ], ) .unwrap(); let counts = count_node_types(chunk); assert_eq!(counts.kv, 15); assert_eq!(counts.hash, 0); assert_eq!(counts.kvhash, 0); // right leaf let chunk = get_next_chunk(&mut iter, None).unwrap(); let ops = chunk.into_iter().map(Ok); let chunk = verify_leaf( ops, [ 128, 158, 92, 80, 118, 253, 48, 241, 74, 154, 213, 187, 92, 243, 154, 28, 164, 235, 156, 122, 174, 226, 84, 170, 233, 166, 27, 79, 100, 10, 88, 184, ], ) .unwrap(); let counts = count_node_types(chunk); assert_eq!(counts.kv, 15); assert_eq!(counts.hash, 0); assert_eq!(counts.kvhash, 0); } #[test] #[should_panic(expected = "Tree is too large")] fn test_verify_height_stack_overflow() { let height = 5_000u32; let push_op = |i: u32| Op::Push(Node::KV(i.to_be_bytes().to_vec(), vec![])); let mut ops = Vec::with_capacity((height * 2) as usize); ops.push(push_op(0)); for i in 1..height { ops.push(push_op(i)); ops.push(Op::Parent) } assert!(ops.encoding_length().unwrap() < 50_000); println!("Len: {}", ops.encoding_length().unwrap()); let (_, result_height) = verify_trunk(ops.into_iter().map(Ok)).unwrap(); assert_eq!(height, result_height as u32); } } ================================================ FILE: src/proofs/encoding.rs ================================================ use std::io::{Read, Write}; use ed::{Decode, Encode, Terminated}; use super::{Node, Op}; use crate::error::Result; use crate::tree::HASH_LENGTH; impl Encode for Op { fn encode_into(&self, dest: &mut W) -> ed::Result<()> { match self { Op::Push(Node::Hash(hash)) => { dest.write_all(&[0x01])?; dest.write_all(hash)?; } Op::Push(Node::KVHash(kv_hash)) => { dest.write_all(&[0x02])?; dest.write_all(kv_hash)?; } Op::Push(Node::KV(key, value)) => { debug_assert!(key.len() < 65536); debug_assert!(value.len() < 65536); dest.write_all(&[0x03])?; (key.len() as u16).encode_into(dest)?; dest.write_all(key)?; (value.len() as u16).encode_into(dest)?; dest.write_all(value)?; } Op::Parent => dest.write_all(&[0x10])?, Op::Child => dest.write_all(&[0x11])?, }; Ok(()) } fn encoding_length(&self) -> ed::Result { Ok(match self { Op::Push(Node::Hash(_)) => 1 + HASH_LENGTH, Op::Push(Node::KVHash(_)) => 1 + HASH_LENGTH, Op::Push(Node::KV(key, value)) => 5 + key.len() + value.len(), Op::Parent => 1, Op::Child => 1, }) } } impl Decode for Op { fn decode(mut input: R) -> ed::Result { let variant: u8 = Decode::decode(&mut input)?; Ok(match variant { 0x01 => { let mut hash = [0; HASH_LENGTH]; input.read_exact(&mut hash)?; Op::Push(Node::Hash(hash)) } 0x02 => { let mut hash = [0; HASH_LENGTH]; input.read_exact(&mut hash)?; Op::Push(Node::KVHash(hash)) } 0x03 => { let key_len: u16 = Decode::decode(&mut input)?; let mut key = vec![0; key_len as usize]; input.read_exact(key.as_mut_slice())?; let value_len: u16 = Decode::decode(&mut input)?; let mut value = vec![0; value_len as usize]; input.read_exact(value.as_mut_slice())?; Op::Push(Node::KV(key, value)) } 0x10 => Op::Parent, 0x11 => Op::Child, byte => { return Err(ed::Error::UnexpectedByte(byte)); } }) } } impl Terminated for Op {} impl Op { fn encode_into(&self, dest: &mut W) -> Result<()> { Ok(Encode::encode_into(self, dest)?) } fn encoding_length(&self) -> usize { Encode::encoding_length(self).unwrap() } pub fn decode(bytes: &[u8]) -> Result { Ok(Decode::decode(bytes)?) } } pub fn encode_into<'a, T: Iterator>(ops: T, output: &mut Vec) { for op in ops { op.encode_into(output).unwrap(); } } pub struct Decoder<'a> { offset: usize, bytes: &'a [u8], } impl<'a> Decoder<'a> { pub fn new(proof_bytes: &'a [u8]) -> Self { Decoder { offset: 0, bytes: proof_bytes, } } } impl<'a> Iterator for Decoder<'a> { type Item = Result; fn next(&mut self) -> Option { if self.offset >= self.bytes.len() { return None; } Some((|| { let bytes = &self.bytes[self.offset..]; let op = Op::decode(bytes)?; self.offset += op.encoding_length(); Ok(op) })()) } } #[cfg(test)] mod test { use super::super::{Node, Op}; use crate::tree::HASH_LENGTH; #[test] fn encode_push_hash() { let op = Op::Push(Node::Hash([123; HASH_LENGTH])); assert_eq!(op.encoding_length(), 1 + HASH_LENGTH); let mut bytes = vec![]; op.encode_into(&mut bytes).unwrap(); assert_eq!( bytes, vec![ 0x01, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123 ] ); } #[test] fn encode_push_kvhash() { let op = Op::Push(Node::KVHash([123; HASH_LENGTH])); assert_eq!(op.encoding_length(), 1 + HASH_LENGTH); let mut bytes = vec![]; op.encode_into(&mut bytes).unwrap(); assert_eq!( bytes, vec![ 0x02, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123 ] ); } #[test] fn encode_push_kv() { let op = Op::Push(Node::KV(vec![1, 2, 3], vec![4, 5, 6])); assert_eq!(op.encoding_length(), 11); let mut bytes = vec![]; op.encode_into(&mut bytes).unwrap(); assert_eq!(bytes, vec![0x03, 0, 3, 1, 2, 3, 0, 3, 4, 5, 6]); } #[test] fn encode_parent() { let op = Op::Parent; assert_eq!(op.encoding_length(), 1); let mut bytes = vec![]; op.encode_into(&mut bytes).unwrap(); assert_eq!(bytes, vec![0x10]); } #[test] fn encode_child() { let op = Op::Child; assert_eq!(op.encoding_length(), 1); let mut bytes = vec![]; op.encode_into(&mut bytes).unwrap(); assert_eq!(bytes, vec![0x11]); } #[test] #[should_panic] fn encode_push_kv_long_key() { let op = Op::Push(Node::KV(vec![123; 70_000], vec![4, 5, 6])); let mut bytes = vec![]; op.encode_into(&mut bytes).unwrap(); } #[test] fn decode_push_hash() { let bytes = [ 0x01, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, ]; let op = Op::decode(&bytes[..]).expect("decode failed"); assert_eq!(op, Op::Push(Node::Hash([123; HASH_LENGTH]))); } #[test] fn decode_push_kvhash() { let bytes = [ 0x02, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, ]; let op = Op::decode(&bytes[..]).expect("decode failed"); assert_eq!(op, Op::Push(Node::KVHash([123; HASH_LENGTH]))); } #[test] fn decode_push_kv() { let bytes = [0x03, 0, 3, 1, 2, 3, 0, 3, 4, 5, 6]; let op = Op::decode(&bytes[..]).expect("decode failed"); assert_eq!(op, Op::Push(Node::KV(vec![1, 2, 3], vec![4, 5, 6]))); } #[test] fn decode_parent() { let bytes = [0x10]; let op = Op::decode(&bytes[..]).expect("decode failed"); assert_eq!(op, Op::Parent); } #[test] fn decode_child() { let bytes = [0x11]; let op = Op::decode(&bytes[..]).expect("decode failed"); assert_eq!(op, Op::Child); } #[test] fn decode_unknown() { let bytes = [0x88]; assert!(Op::decode(&bytes[..]).is_err()); } } ================================================ FILE: src/proofs/mod.rs ================================================ pub mod chunk; pub mod encoding; pub mod query; pub mod tree; use crate::tree::Hash; pub use encoding::{encode_into, Decoder}; pub use query::Query; pub use tree::Tree; /// A proof operator, executed to verify the data in a Merkle proof. #[derive(Debug, PartialEq)] pub enum Op { /// Pushes a node on the stack. Push(Node), /// Pops the top stack item as `parent`. Pops the next top stack item as /// `child`. Attaches `child` as the left child of `parent`. Pushes the /// updated `parent` back on the stack. Parent, /// Pops the top stack item as `child`. Pops the next top stack item as /// `parent`. Attaches `child` as the right child of `parent`. Pushes the /// updated `parent` back on the stack. Child, } /// A selected piece of data about a single tree node, to be contained in a /// `Push` operator in a proof. #[derive(Clone, Debug, PartialEq)] pub enum Node { /// Represents the hash of a tree node. Hash(Hash), /// Represents the hash of the key/value pair of a tree node. KVHash(Hash), /// Represents the key and value of a tree node. KV(Vec, Vec), } ================================================ FILE: src/proofs/query/map.rs ================================================ use super::super::Node; use crate::{Error, Result}; use std::collections::btree_map; use std::collections::BTreeMap; use std::iter::Peekable; use std::ops::{Bound, RangeBounds}; /// `MapBuilder` allows a consumer to construct a `Map` by inserting the nodes /// contained in a proof, in key-order. pub(crate) struct MapBuilder(Map); impl MapBuilder { /// Creates a new `MapBuilder` with an empty internal `Map`. pub fn new() -> Self { MapBuilder(Map { entries: Default::default(), right_edge: true, }) } /// Adds the node's data to the uncerlying `Map` (if node is type `KV`), or /// makes a note of non-contiguous data (if node is type `KVHash` or /// `Hash`). pub fn insert(&mut self, node: &Node) -> Result<()> { match node { Node::KV(key, value) => { if let Some((prev_key, _)) = self.0.entries.last_key_value() { if key <= prev_key { return Err(Error::Key( "Expected nodes to be in increasing key order".into(), )); } } let value = (self.0.right_edge, value.clone()); self.0.entries.insert(key.clone(), value); self.0.right_edge = true; } _ => self.0.right_edge = false, } Ok(()) } /// Consumes the `MapBuilder` and returns its internal `Map`. pub fn build(self) -> Map { self.0 } } /// `Map` stores data extracted from a proof. /// /// The data (which has already been verified against a known root hash) can be /// accessed by a consumer by looking up individual keys using the `get` method, /// or iterating over ranges using the `range` method. #[derive(Clone, Debug)] pub struct Map { entries: BTreeMap, (bool, Vec)>, right_edge: bool, } impl Map { /// Gets the value for a single key, or `None` if the key was proven to not /// exist in the tree. If the proof does not include the data and also does /// not prove that the key is absent in the tree (meaning the proof is not /// valid), an error will be returned. pub fn get<'a>(&'a self, key: &'a [u8]) -> Result> { // if key is in proof just get from entries if let Some((_, value)) = self.entries.get(key) { return Ok(Some(value.as_slice())); } // otherwise, use range which only includes exact key match to check // absence proof let entry = self .range((Bound::Included(key), Bound::Included(key))) .next() .transpose()? .map(|(_, value)| value); Ok(entry) } /// Returns an iterator over all (key, value) entries in the requested range /// of keys. If during iteration we encounter a gap in the data (e.g. the /// proof did not include all nodes within the range), the iterator will /// yield an error. pub fn range<'a>(&self, bounds: impl RangeBounds<&'a [u8]>) -> Range { let start_bound = bound_to_inner(bounds.start_bound()); let end_bound = bound_to_inner(bounds.end_bound()); let outer_bounds = ( start_bound.map_or(Bound::Unbounded, |k| { self.entries .range(..=k.to_vec()) .next_back() .map_or(Bound::Unbounded, |prev| Bound::Included(prev.0.clone())) }), end_bound.map_or(Bound::Unbounded, |k| { self.entries .range(k.to_vec()..) .next() .map_or(Bound::Unbounded, |next| Bound::Included(next.0.clone())) }), ); Range { map: self, bounds: bounds_to_vec(bounds), done: false, iter: self.entries.range(outer_bounds).peekable(), } } /// Joins two `Map`s together, combining the data in both. /// /// If the maps contain contiguous iteration ranges, the contiguous ranges /// will be joined. If the maps have differing values for the same key, this /// will panic (this should never happen if the queries came from the same /// root and the proofs were verified). pub fn join(self, other: Map) -> Map { // TODO: join at the partial tree level, joining with only Map data means // data from different joins which happen to be contiguous (without explicitly // querying based on next/prev) will be marked as non-contiguous let mut entries = self.entries.clone(); entries.extend(other.entries); for (key, (contiguous, val)) in entries.iter_mut() { if let Some(shadowed) = self.entries.get(key) { assert_eq!(val, &shadowed.1, "Maps have different values",); *contiguous = *contiguous || shadowed.0; } } Map { entries, right_edge: self.right_edge || other.right_edge, } } /// Returns `true` if the [Map] can verify that there is no unproven data /// between `key` and the node to its right (or the global tree edge). /// /// For example, if the underlying tree contains the key `[a, b, c, d]` and /// the map contains the keys `[a, b, d]`, then `contiguous_right(a)` will /// return `true`, `contiguous_right(b)` and `contiguous_right(c)` will /// return `false`, and `contiguous_right(d)` will return `true`. fn contiguous_right(&self, key: &[u8]) -> bool { self.entries .range((Bound::Excluded(key.to_vec()), Bound::Unbounded)) .next() .map_or(self.right_edge, |(_, (contiguous, _))| *contiguous) } } /// Returns `None` for `Bound::Unbounded`, or the inner key value for /// `Bound::Included` and `Bound::Excluded`. fn bound_to_inner(bound: Bound) -> Option { match bound { Bound::Unbounded => None, Bound::Included(key) | Bound::Excluded(key) => Some(key), } } /// Converts the inner key value of a `Bound` from a byte slice to a `Vec`. fn bound_to_vec(bound: Bound<&&[u8]>) -> Bound> { match bound { Bound::Unbounded => Bound::Unbounded, Bound::Excluded(k) => Bound::Excluded(k.to_vec()), Bound::Included(k) => Bound::Included(k.to_vec()), } } /// Converts the inner key values of a [RangeBounds] from byte slices to /// `Vec`. fn bounds_to_vec<'a, R: RangeBounds<&'a [u8]>>(bounds: R) -> (Bound>, Bound>) { ( bound_to_vec(bounds.start_bound()), bound_to_vec(bounds.end_bound()), ) } /// An iterator over (key, value) entries as extracted from a verified proof. /// /// If during iteration we encounter a gap in the data (e.g. the proof did not /// include all nodes within the range), the iterator will yield an error. pub struct Range<'a> { map: &'a Map, bounds: (Bound>, Bound>), done: bool, iter: Peekable>, } type InnerRange<'a> = btree_map::Range<'a, Vec, (bool, Vec)>; impl<'a> Range<'a> { fn yield_entry_if_contiguous( &mut self, entry: (&'a Vec, &'a (bool, Vec)), contiguous: bool, forward: bool, ) -> Option> { if !contiguous { self.done = true; return Some(Err(Error::MissingData)); } self.yield_entry(entry, forward) } fn yield_entry( &mut self, entry: (&'a Vec, &'a (bool, Vec)), forward: bool, ) -> Option> { let (key, (_, value)) = entry; if forward { self.bounds.0 = Bound::Excluded(key.clone()); } else { self.bounds.1 = Bound::Excluded(key.clone()); } Some(Ok((key.as_slice(), value.as_slice()))) } fn yield_none_if_contiguous( &mut self, contiguous: bool, ) -> Option> { self.done = true; if !contiguous { return Some(Err(Error::MissingData)); } None } fn yield_next_if_contiguous(&mut self) -> Option> { if let Some((_, (contiguous, _))) = self.iter.peek() { if !contiguous { self.done = true; return Some(Err(Error::MissingData)); } } self.next() } fn yield_next_back_if_contiguous( &mut self, contiguous: bool, ) -> Option> { if !contiguous { self.done = true; return Some(Err(Error::MissingData)); } self.next_back() } } impl<'a> Iterator for Range<'a> { type Item = Result<(&'a [u8], &'a [u8])>; fn next(&mut self) -> Option { if self.done { return None; } let entry = match self.iter.next() { None => return self.yield_none_if_contiguous(self.map.right_edge), Some(entry) => entry, }; let (key, (contiguous, _)) = entry; let past_start = match bound_to_inner(self.bounds.0.clone()) { None => true, Some(ref start_bound) => key > start_bound, }; let at_start = match self.bounds.0 { Bound::Unbounded => true, Bound::Included(ref start_bound) => key == start_bound, Bound::Excluded(_) => false, }; let past_end = match self.bounds.1 { Bound::Unbounded => false, Bound::Included(ref end_bound) => key > end_bound, Bound::Excluded(ref end_bound) => key >= end_bound, }; if past_end { self.yield_none_if_contiguous(*contiguous) } else if past_start { self.yield_entry_if_contiguous(entry, *contiguous, true) } else if at_start { self.yield_entry(entry, true) } else { self.yield_next_if_contiguous() } } } impl<'a> DoubleEndedIterator for Range<'a> { fn next_back(&mut self) -> Option { if self.done { return None; } let entry = match self.iter.next_back() { None => return self.yield_none_if_contiguous(self.map.contiguous_right(&[])), Some(entry) => entry, }; let (key, (contiguous_l, _)) = entry; let contiguous_r = self.map.contiguous_right(key); let past_end = match bound_to_inner(self.bounds.1.clone()) { None => true, Some(ref end_bound) => key < end_bound, }; let at_end = match self.bounds.1 { Bound::Unbounded => true, Bound::Included(ref end_bound) => key == end_bound, Bound::Excluded(_) => false, }; let past_start = match self.bounds.0 { Bound::Unbounded => false, Bound::Included(ref start_bound) => key < start_bound, Bound::Excluded(ref start_bound) => key <= start_bound, }; if past_start { self.yield_none_if_contiguous(contiguous_r) } else if past_end { self.yield_entry_if_contiguous(entry, contiguous_r, false) } else if at_end { self.yield_entry(entry, false) } else { self.yield_next_back_if_contiguous(*contiguous_l) } } } #[cfg(test)] mod tests { use super::*; use crate::HASH_LENGTH; #[test] #[should_panic(expected = "Expected nodes to be in increasing key order")] fn mapbuilder_insert_out_of_order() { let mut builder = MapBuilder::new(); builder.insert(&Node::KV(vec![1, 2, 3], vec![])).unwrap(); builder.insert(&Node::KV(vec![1, 2, 2], vec![])).unwrap(); } #[test] #[should_panic(expected = "Expected nodes to be in increasing key order")] fn mapbuilder_insert_dupe() { let mut builder = MapBuilder::new(); builder.insert(&Node::KV(vec![1, 2, 3], vec![])).unwrap(); builder.insert(&Node::KV(vec![1, 2, 3], vec![])).unwrap(); } #[test] fn mapbuilder_insert_including_edge() { let mut builder = MapBuilder::new(); builder.insert(&Node::Hash([0; HASH_LENGTH])).unwrap(); builder.insert(&Node::KV(vec![1, 2, 4], vec![])).unwrap(); assert!(builder.0.right_edge); } #[test] fn mapbuilder_insert_abridged_edge() { let mut builder = MapBuilder::new(); builder.insert(&Node::KV(vec![1, 2, 3], vec![])).unwrap(); builder.insert(&Node::Hash([0; HASH_LENGTH])).unwrap(); assert!(!builder.0.right_edge); } #[test] fn mapbuilder_build() { let mut builder = MapBuilder::new(); builder.insert(&Node::KV(vec![1, 2, 3], vec![1])).unwrap(); builder.insert(&Node::Hash([0; HASH_LENGTH])).unwrap(); builder.insert(&Node::KV(vec![1, 2, 4], vec![2])).unwrap(); let map = builder.build(); let mut entries = map.entries.iter(); assert_eq!(entries.next(), Some((&vec![1, 2, 3], &(true, vec![1])))); assert_eq!(entries.next(), Some((&vec![1, 2, 4], &(false, vec![2])))); assert_eq!(entries.next(), None); assert!(map.right_edge); } #[test] fn map_get_included() { let mut builder = MapBuilder::new(); builder.insert(&Node::KV(vec![1, 2, 3], vec![1])).unwrap(); builder.insert(&Node::Hash([0; HASH_LENGTH])).unwrap(); builder.insert(&Node::KV(vec![1, 2, 4], vec![2])).unwrap(); let map = builder.build(); assert_eq!(map.get(&[1, 2, 3]).unwrap().unwrap(), vec![1],); assert_eq!(map.get(&[1, 2, 4]).unwrap().unwrap(), vec![2],); } #[test] #[should_panic(expected = "MissingData")] fn map_get_missing_absence_proof() { let mut builder = MapBuilder::new(); builder.insert(&Node::KV(vec![1, 2, 3], vec![1])).unwrap(); builder.insert(&Node::Hash([0; HASH_LENGTH])).unwrap(); builder.insert(&Node::KV(vec![1, 2, 4], vec![2])).unwrap(); let map = builder.build(); map.get(&[1, 2, 3, 4]).unwrap(); } #[test] fn map_get_valid_absence_proof() { let mut builder = MapBuilder::new(); builder.insert(&Node::KV(vec![1, 2, 3], vec![1])).unwrap(); builder.insert(&Node::KV(vec![1, 2, 4], vec![2])).unwrap(); let map = builder.build(); assert!(map.get(&[1, 2, 3, 4]).unwrap().is_none()); } #[test] #[should_panic(expected = "MissingData")] fn range_abridged() { let mut builder = MapBuilder::new(); builder.insert(&Node::KV(vec![1, 2, 3], vec![1])).unwrap(); builder.insert(&Node::Hash([0; HASH_LENGTH])).unwrap(); builder.insert(&Node::KV(vec![1, 2, 4], vec![2])).unwrap(); let map = builder.build(); let mut range = map.range(&[1u8, 2, 3][..]..&[1u8, 2, 4][..]); assert_eq!(range.next().unwrap().unwrap(), (&[1, 2, 3][..], &[1][..])); range.next().unwrap().unwrap(); } #[test] fn range_ok() { let mut builder = MapBuilder::new(); builder.insert(&Node::KV(vec![1, 2, 3], vec![1])).unwrap(); builder.insert(&Node::KV(vec![1, 2, 4], vec![2])).unwrap(); builder.insert(&Node::KV(vec![1, 2, 5], vec![3])).unwrap(); let map = builder.build(); let mut range = map.range(&[1u8, 2, 3][..]..&[1u8, 2, 5][..]); assert_eq!(range.next().unwrap().unwrap(), (&[1, 2, 3][..], &[1][..])); assert_eq!(range.next().unwrap().unwrap(), (&[1, 2, 4][..], &[2][..])); assert!(range.next().is_none()); assert!(range.next_back().is_none()); assert!(range.next().is_none()); } #[test] fn range_empty() { let map = MapBuilder::new().build(); let mut range = map.range(..); assert!(range.next().is_none()); assert!(range.next_back().is_none()); } #[test] #[should_panic(expected = "MissingData")] fn range_lower_unbounded_map_non_contiguous() { let mut builder = MapBuilder::new(); builder.insert(&Node::KV(vec![1, 2, 3], vec![1])).unwrap(); builder.insert(&Node::Hash([1; HASH_LENGTH])).unwrap(); builder.insert(&Node::KV(vec![1, 2, 4], vec![1])).unwrap(); let map = builder.build(); let mut range = map.range(..&[1u8, 2, 5][..]); range.next().unwrap().unwrap(); range.next().unwrap().unwrap(); } #[test] fn range_reach_proof_end() { let mut builder = MapBuilder::new(); builder.insert(&Node::KV(vec![1, 2, 3], vec![1])).unwrap(); builder.insert(&Node::KV(vec![1, 2, 4], vec![2])).unwrap(); let map = builder.build(); let mut range = map.range(&[1u8, 2, 3][..]..); assert_eq!(range.next().unwrap().unwrap(), (&[1, 2, 3][..], &[1][..])); assert_eq!(range.next().unwrap().unwrap(), (&[1, 2, 4][..], &[2][..])); assert!(range.next().is_none()); } #[test] fn range_unbounded() { let mut builder = MapBuilder::new(); builder.insert(&Node::KV(vec![1, 2, 3], vec![1])).unwrap(); builder.insert(&Node::KV(vec![1, 2, 4], vec![2])).unwrap(); let map = builder.build(); let mut range = map.range(..); assert_eq!(range.next().unwrap().unwrap(), (&[1, 2, 3][..], &[1][..])); assert_eq!(range.next().unwrap().unwrap(), (&[1, 2, 4][..], &[2][..])); assert!(range.next().is_none()); } #[test] #[should_panic(expected = "MissingData")] fn range_abridged_rev() { let mut builder = MapBuilder::new(); builder.insert(&Node::KV(vec![1, 2, 3], vec![1])).unwrap(); builder.insert(&Node::Hash([0; HASH_LENGTH])).unwrap(); builder.insert(&Node::KV(vec![1, 2, 4], vec![2])).unwrap(); let map = builder.build(); let mut range = map.range(&[1u8, 2, 3][..]..=&[1u8, 2, 4][..]).rev(); assert_eq!(range.next().unwrap().unwrap(), (&[1, 2, 4][..], &[2][..])); range.next().unwrap().unwrap(); } #[test] fn range_ok_rev() { let mut builder = MapBuilder::new(); builder.insert(&Node::KV(vec![1, 2, 3], vec![1])).unwrap(); builder.insert(&Node::KV(vec![1, 2, 4], vec![2])).unwrap(); builder.insert(&Node::KV(vec![1, 2, 5], vec![3])).unwrap(); let map = builder.build(); let mut range = map.range(&[1u8, 2, 3][..]..&[1u8, 2, 5][..]).rev(); assert_eq!(range.next().unwrap().unwrap(), (&[1, 2, 4][..], &[2][..])); assert_eq!(range.next().unwrap().unwrap(), (&[1, 2, 3][..], &[1][..])); assert!(range.next().is_none()); assert!(range.next_back().is_none()); } #[test] #[should_panic(expected = "MissingData")] fn range_upper_unbounded_map_non_contiguous() { let mut builder = MapBuilder::new(); builder.insert(&Node::KV(vec![1, 2, 3], vec![1])).unwrap(); builder.insert(&Node::Hash([1; HASH_LENGTH])).unwrap(); builder.insert(&Node::KV(vec![1, 2, 4], vec![1])).unwrap(); let map = builder.build(); let mut range = map.range(&[1u8, 2, 3][..]..).rev(); range.next().unwrap().unwrap(); range.next().unwrap().unwrap(); } #[test] fn range_reach_proof_end_rev() { let mut builder = MapBuilder::new(); builder.insert(&Node::KV(vec![1, 2, 3], vec![1])).unwrap(); builder.insert(&Node::KV(vec![1, 2, 4], vec![2])).unwrap(); let map = builder.build(); let mut range = map.range(..&[1u8, 2, 5][..]).rev(); assert_eq!(range.next().unwrap().unwrap(), (&[1, 2, 4][..], &[2][..])); assert_eq!(range.next().unwrap().unwrap(), (&[1, 2, 3][..], &[1][..])); assert!(range.next().is_none()); } #[test] fn range_unbounded_rev() { let mut builder = MapBuilder::new(); builder.insert(&Node::KV(vec![1, 2, 3], vec![1])).unwrap(); builder.insert(&Node::KV(vec![1, 2, 4], vec![2])).unwrap(); let map = builder.build(); let mut range = map.range(..).rev(); assert_eq!(range.next().unwrap().unwrap(), (&[1, 2, 4][..], &[2][..])); assert_eq!(range.next().unwrap().unwrap(), (&[1, 2, 3][..], &[1][..])); assert!(range.next().is_none()); } #[test] fn map_join() { let mut builder = MapBuilder::new(); builder.insert(&Node::KV(vec![1], vec![1])).unwrap(); builder.insert(&Node::KV(vec![2], vec![1])).unwrap(); builder.insert(&Node::KV(vec![3], vec![1])).unwrap(); builder.insert(&Node::Hash([0; HASH_LENGTH])).unwrap(); builder.insert(&Node::KV(vec![5], vec![1])).unwrap(); let a = builder.build(); let mut builder = MapBuilder::new(); builder.insert(&Node::KV(vec![1], vec![1])).unwrap(); builder.insert(&Node::Hash([0; HASH_LENGTH])).unwrap(); builder.insert(&Node::KV(vec![3], vec![1])).unwrap(); builder.insert(&Node::KV(vec![4], vec![1])).unwrap(); builder.insert(&Node::Hash([0; HASH_LENGTH])).unwrap(); let b = builder.build(); let joined = a.join(b); let mut range = joined.range(..=&[4][..]); assert_eq!(range.next().unwrap().unwrap(), (&[1][..], &[1][..])); assert_eq!(range.next().unwrap().unwrap(), (&[2][..], &[1][..])); assert_eq!(range.next().unwrap().unwrap(), (&[3][..], &[1][..])); assert_eq!(range.next().unwrap().unwrap(), (&[4][..], &[1][..])); assert!(range.next().is_none()); let mut range = joined.range(&[5][..]..); assert_eq!(range.next().unwrap().unwrap(), (&[5][..], &[1][..])); assert!(range.next().is_none()); } } ================================================ FILE: src/proofs/query/mod.rs ================================================ mod map; #[cfg(feature = "full")] use {super::Op, std::collections::LinkedList}; use super::tree::execute; use super::{Decoder, Node}; use crate::error::{Error, Result}; use crate::tree::{Fetch, Hash, Link, RefWalker}; use std::cmp::{max, min, Ordering}; use std::collections::BTreeSet; use std::ops::RangeInclusive; pub use map::*; /// `Query` represents one or more keys or ranges of keys, which can be used to /// resolve a proof which will include all of the requested values. #[derive(Default)] pub struct Query { items: BTreeSet, } impl Query { /// Creates a new query which contains no items. pub fn new() -> Self { Default::default() } pub(crate) fn len(&self) -> usize { self.items.len() } pub(crate) fn iter(&self) -> impl Iterator { self.items.iter() } /// Adds an individual key to the query, so that its value (or its absence) /// in the tree will be included in the resulting proof. /// /// If the key or a range including the key already exists in the query, /// this will have no effect. If the query already includes a range that has /// a non-inclusive bound equal to the key, the bound will be changed to be /// inclusive. pub fn insert_key(&mut self, key: Vec) { let key = QueryItem::Key(key); self.items.insert(key); } /// Adds a range to the query, so that all the entries in the tree with keys /// in the range will be included in the resulting proof. /// /// If a range including the range already exists in the query, this will /// have no effect. If the query already includes a range that overlaps with /// the range, the ranges will be joined together. pub fn insert_range(&mut self, range: std::ops::Range>) { let range = QueryItem::Range(range); self.insert_item(range); } /// Adds an inclusive range to the query, so that all the entries in the /// tree with keys in the range will be included in the resulting proof. /// /// If a range including the range already exists in the query, this will /// have no effect. If the query already includes a range that overlaps with /// the range, the ranges will be merged together. pub fn insert_range_inclusive(&mut self, range: RangeInclusive>) { let range = QueryItem::RangeInclusive(range); self.insert_item(range); } /// Adds the `QueryItem` to the query, first checking to see if it collides /// with any existing ranges or keys. All colliding items will be removed /// then merged together so that the query includes the minimum number of /// items (with no items covering any duplicate parts of keyspace) while /// still including every key or range that has been added to the query. pub fn insert_item(&mut self, mut item: QueryItem) { // since `QueryItem::eq` considers items equal if they collide at all // (including keys within ranges or ranges which partially overlap), // `items.take` will remove the first item which collides while let Some(existing) = self.items.take(&item) { item = item.merge(existing); } self.items.insert(item); } } impl> From> for Query { fn from(other: Vec) -> Self { let items = other.into_iter().map(Into::into).collect(); Query { items } } } impl From for Vec { fn from(q: Query) -> Vec { q.into_iter().collect() } } impl IntoIterator for Query { type Item = QueryItem; type IntoIter = as IntoIterator>::IntoIter; fn into_iter(self) -> Self::IntoIter { self.items.into_iter() } } /// A `QueryItem` represents a key or range of keys to be included in a proof. #[derive(Clone, Debug)] pub enum QueryItem { Key(Vec), Range(std::ops::Range>), RangeInclusive(RangeInclusive>), } impl QueryItem { pub fn lower_bound(&self) -> &[u8] { match self { QueryItem::Key(key) => key.as_slice(), QueryItem::Range(range) => range.start.as_ref(), QueryItem::RangeInclusive(range) => range.start().as_ref(), } } pub fn upper_bound(&self) -> (&[u8], bool) { match self { QueryItem::Key(key) => (key.as_slice(), true), QueryItem::Range(range) => (range.end.as_ref(), false), QueryItem::RangeInclusive(range) => (range.end().as_ref(), true), } } pub fn contains(&self, key: &[u8]) -> bool { let (bound, inclusive) = self.upper_bound(); return key >= self.lower_bound() && (key < bound || (key == bound && inclusive)); } fn merge(self, other: QueryItem) -> QueryItem { // TODO: don't copy into new vecs let start = min(self.lower_bound(), other.lower_bound()).to_vec(); let end = max(self.upper_bound(), other.upper_bound()); if end.1 { QueryItem::RangeInclusive(RangeInclusive::new(start, end.0.to_vec())) } else { QueryItem::Range(std::ops::Range { start, end: end.0.to_vec(), }) } } } impl PartialEq for QueryItem { fn eq(&self, other: &QueryItem) -> bool { self.cmp(other) == Ordering::Equal } } impl PartialEq<&[u8]> for QueryItem { fn eq(&self, other: &&[u8]) -> bool { matches!(self.partial_cmp(other), Some(Ordering::Equal)) } } impl Eq for QueryItem {} impl Ord for QueryItem { fn cmp(&self, other: &QueryItem) -> Ordering { let cmp_lu = self.lower_bound().cmp(other.upper_bound().0); let cmp_ul = self.upper_bound().0.cmp(other.lower_bound()); let self_inclusive = self.upper_bound().1; let other_inclusive = other.upper_bound().1; match (cmp_lu, cmp_ul) { (Ordering::Less, Ordering::Less) => Ordering::Less, (Ordering::Less, Ordering::Equal) => match self_inclusive { true => Ordering::Equal, false => Ordering::Less, }, (Ordering::Less, Ordering::Greater) => Ordering::Equal, (Ordering::Equal, _) => match other_inclusive { true => Ordering::Equal, false => Ordering::Greater, }, (Ordering::Greater, _) => Ordering::Greater, } } } impl PartialOrd for QueryItem { fn partial_cmp(&self, other: &QueryItem) -> Option { Some(self.cmp(other)) } } impl PartialOrd<&[u8]> for QueryItem { fn partial_cmp(&self, other: &&[u8]) -> Option { let other = QueryItem::Key(other.to_vec()); Some(self.cmp(&other)) } } impl From> for QueryItem { fn from(key: Vec) -> Self { QueryItem::Key(key) } } impl Link { /// Creates a `Node::Hash` from this link. Panics if the link is of variant /// `Link::Modified` since its hash has not yet been computed. #[cfg(feature = "full")] fn to_hash_node(&self) -> Node { let hash = match self { Link::Reference { hash, .. } => hash, Link::Modified { .. } => { panic!("Cannot convert Link::Modified to proof hash node"); } Link::Uncommitted { hash, .. } => hash, Link::Loaded { hash, .. } => hash, }; Node::Hash(*hash) } } impl<'a, S> RefWalker<'a, S> where S: Fetch + Sized + Send + Clone, { /// Creates a `Node::KV` from the key/value pair of the root node. pub(crate) fn to_kv_node(&self) -> Node { Node::KV(self.tree().key().to_vec(), self.tree().value().to_vec()) } /// Creates a `Node::KVHash` from the hash of the key/value pair of the root /// node. pub(crate) fn to_kvhash_node(&self) -> Node { Node::KVHash(*self.tree().kv_hash()) } /// Creates a `Node::Hash` from the hash of the node. pub(crate) fn to_hash_node(&self) -> Node { Node::Hash(self.tree().hash()) } /// Generates a proof for the list of queried keys. Returns a tuple /// containing the generated proof operators, and a tuple representing if /// any keys were queried were less than the left edge or greater than the /// right edge, respectively. #[cfg(feature = "full")] pub(crate) fn create_proof( &mut self, query: &[QueryItem], ) -> Result<(LinkedList, (bool, bool))> { // TODO: don't copy into vec, support comparing QI to byte slice let node_key = QueryItem::Key(self.tree().key().to_vec()); let search = query.binary_search_by(|key| key.cmp(&node_key)); let (left_items, right_items) = match search { Ok(index) => { let item = &query[index]; let left_bound = item.lower_bound(); let right_bound = item.upper_bound().0; // if range starts before this node's key, include it in left // child's query let left_query = if left_bound < self.tree().key() { &query[..=index] } else { &query[..index] }; // if range ends after this node's key, include it in right // child's query let right_query = if right_bound > self.tree().key() { &query[index..] } else { &query[index + 1..] }; (left_query, right_query) } Err(index) => (&query[..index], &query[index..]), }; let (mut proof, left_absence) = self.create_child_proof(true, left_items)?; let (mut right_proof, right_absence) = self.create_child_proof(false, right_items)?; let (has_left, has_right) = (!proof.is_empty(), !right_proof.is_empty()); proof.push_back(match search { Ok(_) => Op::Push(self.to_kv_node()), Err(_) => { if left_absence.1 || right_absence.0 { Op::Push(self.to_kv_node()) } else { Op::Push(self.to_kvhash_node()) } } }); if has_left { proof.push_back(Op::Parent); } if has_right { proof.append(&mut right_proof); proof.push_back(Op::Child); } Ok((proof, (left_absence.0, right_absence.1))) } /// Similar to `create_proof`. Recurses into the child on the given side and /// generates a proof for the queried keys. #[cfg(feature = "full")] fn create_child_proof( &mut self, left: bool, query: &[QueryItem], ) -> Result<(LinkedList, (bool, bool))> { Ok(if !query.is_empty() { if let Some(mut child) = self.walk(left)? { child.create_proof(query)? } else { (LinkedList::new(), (true, true)) } } else if let Some(link) = self.tree().link(left) { let mut proof = LinkedList::new(); proof.push_back(Op::Push(link.to_hash_node())); (proof, (false, false)) } else { (LinkedList::new(), (false, false)) }) } } pub fn verify(bytes: &[u8], expected_hash: Hash) -> Result { let ops = Decoder::new(bytes); let mut map_builder = MapBuilder::new(); let root = execute(ops, true, |node| map_builder.insert(node))?; if root.hash()? != expected_hash { return Err(Error::HashMismatch(expected_hash, root.hash()?)); } Ok(map_builder.build()) } /// Verifies the encoded proof with the given query and expected hash. /// /// Every key in `keys` is checked to either have a key/value pair in the proof, /// or to have its absence in the tree proven. /// /// Returns `Err` if the proof is invalid, or a list of proven values associated /// with `keys`. For example, if `keys` contains keys `A` and `B`, the returned /// list will contain 2 elements, the value of `A` and the value of `B`. Keys /// proven to be absent in the tree will have an entry of `None`, keys that have /// a proven value will have an entry of `Some(value)`. #[deprecated] pub fn verify_query( bytes: &[u8], query: &Query, expected_hash: Hash, ) -> Result, Vec)>> { let mut output = Vec::with_capacity(query.len()); let mut last_push = None; let mut query = query.iter().peekable(); let mut in_range = false; let ops = Decoder::new(bytes); let root = execute(ops, true, |node| { if let Node::KV(key, value) = node { while let Some(item) = query.peek() { // get next item in query let query_item = *item; // we have not reached next queried part of tree if *query_item > key.as_slice() { // continue to next push break; } if !in_range { // this is the first data we have encountered for this query // item. ensure lower bound of query item is proven match last_push { // lower bound is proven - we have an exact match _ if key == query_item.lower_bound() => {} // lower bound is proven - this is the leftmost node // in the tree None => {} // lower bound is proven - the preceding tree node // is lower than the bound Some(Node::KV(_, _)) => {} // cannot verify lower bound - we have an abridged // tree so we cannot tell what the preceding key was Some(_) => { return Err(Error::Bound( "Cannot verify lower bound of queried range".into(), )); } } } if key.as_slice() >= query_item.upper_bound().0 { // at or past upper bound of range (or this was an exact // match on a single-key queryitem), advance to next query // item query.next(); in_range = false; } else { // have not reached upper bound, we expect more values // to be proven in the range (and all pushes should be // unabridged until we reach end of range) in_range = true; } // this push matches the queried item if query_item.contains(key) { // add data to output output.push((key.clone(), value.clone())); // continue to next push break; } // continue to next queried item } } else if in_range { // we encountered a queried range but the proof was abridged (saw a // non-KV push), we are missing some part of the range return Err(Error::MissingData); } last_push = Some(node.clone()); Ok(()) })?; // we have remaining query items, check absence proof against right edge of // tree if query.peek().is_some() { match last_push { // last node in tree was less than queried item Some(Node::KV(_, _)) => {} // proof contains abridged data so we cannot verify absence of // remaining query items _ => { return Err(Error::MissingData); } } } if root.hash()? != expected_hash { return Err(Error::HashMismatch(expected_hash, root.hash()?)); } Ok(output) } #[allow(deprecated)] #[cfg(test)] mod test { use super::super::encoding::encode_into; use super::super::*; use super::*; use crate::test_utils::make_tree_seq; use crate::tree::{NoopCommit, PanicSource, RefWalker, Tree}; use ed::Encode; fn make_3_node_tree() -> Result { let mut tree = Tree::new(vec![5], vec![5])? .attach(true, Some(Tree::new(vec![3], vec![3])?)) .attach(false, Some(Tree::new(vec![7], vec![7])?)); tree.commit(&mut NoopCommit {}).expect("commit failed"); Ok(tree) } fn verify_keys_test(keys: Vec>, expected_result: Vec>>) -> Result<()> { let mut tree = make_3_node_tree()?; let mut walker = RefWalker::new(&mut tree, PanicSource {}); let (proof, _) = walker .create_proof( keys.clone() .into_iter() .map(QueryItem::Key) .collect::>() .as_slice(), ) .expect("failed to create proof"); let mut bytes = vec![]; encode_into(proof.iter(), &mut bytes); let expected_hash = [ 185, 181, 28, 21, 108, 13, 202, 48, 129, 184, 3, 8, 157, 78, 213, 241, 94, 200, 205, 95, 179, 177, 195, 177, 216, 233, 164, 73, 102, 32, 141, 37, ]; let mut query = Query::new(); for key in keys.iter() { query.insert_key(key.clone()); } let result = verify_query(bytes.as_slice(), &query, expected_hash).expect("verify failed"); let mut values = std::collections::HashMap::new(); for (key, value) in result { assert!(values.insert(key, value).is_none()); } for (key, expected_value) in keys.iter().zip(expected_result.iter()) { assert_eq!(values.get(key), expected_value.as_ref()); } Ok(()) } #[test] fn root_verify() -> Result<()> { verify_keys_test(vec![vec![5]], vec![Some(vec![5])]) } #[test] fn single_verify() -> Result<()> { verify_keys_test(vec![vec![3]], vec![Some(vec![3])]) } #[test] fn double_verify() -> Result<()> { verify_keys_test(vec![vec![3], vec![5]], vec![Some(vec![3]), Some(vec![5])]) } #[test] fn double_verify_2() -> Result<()> { verify_keys_test(vec![vec![3], vec![7]], vec![Some(vec![3]), Some(vec![7])]) } #[test] fn triple_verify() -> Result<()> { verify_keys_test( vec![vec![3], vec![5], vec![7]], vec![Some(vec![3]), Some(vec![5]), Some(vec![7])], ) } #[test] fn left_edge_absence_verify() -> Result<()> { verify_keys_test(vec![vec![2]], vec![None]) } #[test] fn right_edge_absence_verify() -> Result<()> { verify_keys_test(vec![vec![8]], vec![None]) } #[test] fn inner_absence_verify() -> Result<()> { verify_keys_test(vec![vec![6]], vec![None]) } #[test] fn absent_and_present_verify() -> Result<()> { verify_keys_test(vec![vec![5], vec![6]], vec![Some(vec![5]), None]) } #[test] fn empty_proof() -> Result<()> { let mut tree = make_3_node_tree()?; let mut walker = RefWalker::new(&mut tree, PanicSource {}); let (proof, absence) = walker .create_proof(vec![].as_slice()) .expect("create_proof errored"); let mut iter = proof.iter(); assert_eq!( iter.next(), Some(&Op::Push(Node::Hash([ 203, 210, 184, 52, 29, 56, 76, 7, 155, 239, 81, 16, 54, 13, 106, 27, 44, 218, 198, 245, 203, 189, 15, 203, 55, 184, 75, 146, 127, 38, 143, 214 ]))) ); assert_eq!( iter.next(), Some(&Op::Push(Node::KVHash([ 169, 4, 73, 65, 62, 49, 160, 159, 37, 166, 195, 249, 63, 31, 23, 11, 169, 0, 24, 104, 179, 211, 218, 38, 108, 129, 117, 232, 65, 101, 194, 157 ]))) ); assert_eq!(iter.next(), Some(&Op::Parent)); assert_eq!( iter.next(), Some(&Op::Push(Node::Hash([ 219, 24, 98, 131, 160, 47, 139, 94, 223, 118, 217, 187, 42, 215, 213, 101, 213, 225, 169, 57, 224, 210, 17, 135, 220, 63, 160, 42, 148, 0, 121, 115 ]))) ); assert_eq!(iter.next(), Some(&Op::Child)); assert!(iter.next().is_none()); assert_eq!(absence, (false, false)); let mut bytes = vec![]; encode_into(proof.iter(), &mut bytes); let res = verify_query(bytes.as_slice(), &Query::new(), tree.hash()).unwrap(); assert!(res.is_empty()); Ok(()) } #[test] fn root_proof() -> Result<()> { let mut tree = make_3_node_tree()?; let mut walker = RefWalker::new(&mut tree, PanicSource {}); let queryitems = vec![QueryItem::Key(vec![5])]; let (proof, absence) = walker .create_proof(queryitems.as_slice()) .expect("create_proof errored"); let mut iter = proof.iter(); assert_eq!( iter.next(), Some(&Op::Push(Node::Hash([ 203, 210, 184, 52, 29, 56, 76, 7, 155, 239, 81, 16, 54, 13, 106, 27, 44, 218, 198, 245, 203, 189, 15, 203, 55, 184, 75, 146, 127, 38, 143, 214 ]))) ); assert_eq!(iter.next(), Some(&Op::Push(Node::KV(vec![5], vec![5])))); assert_eq!(iter.next(), Some(&Op::Parent)); assert_eq!( iter.next(), Some(&Op::Push(Node::Hash([ 219, 24, 98, 131, 160, 47, 139, 94, 223, 118, 217, 187, 42, 215, 213, 101, 213, 225, 169, 57, 224, 210, 17, 135, 220, 63, 160, 42, 148, 0, 121, 115 ]))) ); assert_eq!(iter.next(), Some(&Op::Child)); assert!(iter.next().is_none()); assert_eq!(absence, (false, false)); let mut bytes = vec![]; encode_into(proof.iter(), &mut bytes); let mut query = Query::new(); for item in queryitems { query.insert_item(item); } let res = verify_query(bytes.as_slice(), &query, tree.hash()).unwrap(); assert_eq!(res, vec![(vec![5], vec![5])]); Ok(()) } #[test] fn leaf_proof() -> Result<()> { let mut tree = make_3_node_tree()?; let mut walker = RefWalker::new(&mut tree, PanicSource {}); let queryitems = vec![QueryItem::Key(vec![3])]; let (proof, absence) = walker .create_proof(queryitems.as_slice()) .expect("create_proof errored"); let mut iter = proof.iter(); assert_eq!(iter.next(), Some(&Op::Push(Node::KV(vec![3], vec![3])))); assert_eq!( iter.next(), Some(&Op::Push(Node::KVHash([ 169, 4, 73, 65, 62, 49, 160, 159, 37, 166, 195, 249, 63, 31, 23, 11, 169, 0, 24, 104, 179, 211, 218, 38, 108, 129, 117, 232, 65, 101, 194, 157 ]))) ); assert_eq!(iter.next(), Some(&Op::Parent)); assert_eq!( iter.next(), Some(&Op::Push(Node::Hash([ 219, 24, 98, 131, 160, 47, 139, 94, 223, 118, 217, 187, 42, 215, 213, 101, 213, 225, 169, 57, 224, 210, 17, 135, 220, 63, 160, 42, 148, 0, 121, 115 ]))) ); assert_eq!(iter.next(), Some(&Op::Child)); assert!(iter.next().is_none()); assert_eq!(absence, (false, false)); let mut bytes = vec![]; encode_into(proof.iter(), &mut bytes); let mut query = Query::new(); for item in queryitems { query.insert_item(item); } let res = verify_query(bytes.as_slice(), &query, tree.hash()).unwrap(); assert_eq!(res, vec![(vec![3], vec![3])]); Ok(()) } #[test] fn double_leaf_proof() -> Result<()> { let mut tree = make_3_node_tree()?; let mut walker = RefWalker::new(&mut tree, PanicSource {}); let queryitems = vec![QueryItem::Key(vec![3]), QueryItem::Key(vec![7])]; let (proof, absence) = walker .create_proof(queryitems.as_slice()) .expect("create_proof errored"); let mut iter = proof.iter(); assert_eq!(iter.next(), Some(&Op::Push(Node::KV(vec![3], vec![3])))); assert_eq!( iter.next(), Some(&Op::Push(Node::KVHash([ 169, 4, 73, 65, 62, 49, 160, 159, 37, 166, 195, 249, 63, 31, 23, 11, 169, 0, 24, 104, 179, 211, 218, 38, 108, 129, 117, 232, 65, 101, 194, 157 ]))) ); assert_eq!(iter.next(), Some(&Op::Parent)); assert_eq!(iter.next(), Some(&Op::Push(Node::KV(vec![7], vec![7])))); assert_eq!(iter.next(), Some(&Op::Child)); assert!(iter.next().is_none()); assert_eq!(absence, (false, false)); let mut bytes = vec![]; encode_into(proof.iter(), &mut bytes); let mut query = Query::new(); for item in queryitems { query.insert_item(item); } let res = verify_query(bytes.as_slice(), &query, tree.hash()).unwrap(); assert_eq!(res, vec![(vec![3], vec![3]), (vec![7], vec![7]),]); Ok(()) } #[test] fn all_nodes_proof() -> Result<()> { let mut tree = make_3_node_tree()?; let mut walker = RefWalker::new(&mut tree, PanicSource {}); let queryitems = vec![ QueryItem::Key(vec![3]), QueryItem::Key(vec![5]), QueryItem::Key(vec![7]), ]; let (proof, absence) = walker .create_proof(queryitems.as_slice()) .expect("create_proof errored"); let mut iter = proof.iter(); assert_eq!(iter.next(), Some(&Op::Push(Node::KV(vec![3], vec![3])))); assert_eq!(iter.next(), Some(&Op::Push(Node::KV(vec![5], vec![5])))); assert_eq!(iter.next(), Some(&Op::Parent)); assert_eq!(iter.next(), Some(&Op::Push(Node::KV(vec![7], vec![7])))); assert_eq!(iter.next(), Some(&Op::Child)); assert!(iter.next().is_none()); assert_eq!(absence, (false, false)); let mut bytes = vec![]; encode_into(proof.iter(), &mut bytes); let mut query = Query::new(); for item in queryitems { query.insert_item(item); } let res = verify_query(bytes.as_slice(), &query, tree.hash()).unwrap(); assert_eq!( res, vec![(vec![3], vec![3]), (vec![5], vec![5]), (vec![7], vec![7]),] ); Ok(()) } #[test] fn global_edge_absence_proof() -> Result<()> { let mut tree = make_3_node_tree()?; let mut walker = RefWalker::new(&mut tree, PanicSource {}); let queryitems = vec![QueryItem::Key(vec![8])]; let (proof, absence) = walker .create_proof(queryitems.as_slice()) .expect("create_proof errored"); let mut iter = proof.iter(); assert_eq!( iter.next(), Some(&Op::Push(Node::Hash([ 203, 210, 184, 52, 29, 56, 76, 7, 155, 239, 81, 16, 54, 13, 106, 27, 44, 218, 198, 245, 203, 189, 15, 203, 55, 184, 75, 146, 127, 38, 143, 214 ]))) ); assert_eq!( iter.next(), Some(&Op::Push(Node::KVHash([ 169, 4, 73, 65, 62, 49, 160, 159, 37, 166, 195, 249, 63, 31, 23, 11, 169, 0, 24, 104, 179, 211, 218, 38, 108, 129, 117, 232, 65, 101, 194, 157 ]))) ); assert_eq!(iter.next(), Some(&Op::Parent)); assert_eq!(iter.next(), Some(&Op::Push(Node::KV(vec![7], vec![7])))); assert_eq!(iter.next(), Some(&Op::Child)); assert!(iter.next().is_none()); assert_eq!(absence, (false, true)); let mut bytes = vec![]; encode_into(proof.iter(), &mut bytes); let mut query = Query::new(); for item in queryitems { query.insert_item(item); } let res = verify_query(bytes.as_slice(), &query, tree.hash()).unwrap(); assert_eq!(res, vec![]); Ok(()) } #[test] fn absence_proof() -> Result<()> { let mut tree = make_3_node_tree()?; let mut walker = RefWalker::new(&mut tree, PanicSource {}); let queryitems = vec![QueryItem::Key(vec![6])]; let (proof, absence) = walker .create_proof(queryitems.as_slice()) .expect("create_proof errored"); let mut iter = proof.iter(); assert_eq!( iter.next(), Some(&Op::Push(Node::Hash([ 203, 210, 184, 52, 29, 56, 76, 7, 155, 239, 81, 16, 54, 13, 106, 27, 44, 218, 198, 245, 203, 189, 15, 203, 55, 184, 75, 146, 127, 38, 143, 214 ]))) ); assert_eq!(iter.next(), Some(&Op::Push(Node::KV(vec![5], vec![5])))); assert_eq!(iter.next(), Some(&Op::Parent)); assert_eq!(iter.next(), Some(&Op::Push(Node::KV(vec![7], vec![7])))); assert_eq!(iter.next(), Some(&Op::Child)); assert!(iter.next().is_none()); assert_eq!(absence, (false, false)); let mut bytes = vec![]; encode_into(proof.iter(), &mut bytes); let mut query = Query::new(); for item in queryitems { query.insert_item(item); } let res = verify_query(bytes.as_slice(), &query, tree.hash()).unwrap(); assert_eq!(res, vec![]); Ok(()) } #[test] fn doc_proof() -> Result<()> { let mut tree = Tree::new(vec![5], vec![5])? .attach( true, Some( Tree::new(vec![2], vec![2])? .attach(true, Some(Tree::new(vec![1], vec![1])?)) .attach( false, Some( Tree::new(vec![4], vec![4])? .attach(true, Some(Tree::new(vec![3], vec![3])?)), ), ), ), ) .attach( false, Some( Tree::new(vec![9], vec![9])? .attach( true, Some( Tree::new(vec![7], vec![7])? .attach(true, Some(Tree::new(vec![6], vec![6])?)) .attach(false, Some(Tree::new(vec![8], vec![8])?)), ), ) .attach( false, Some( Tree::new(vec![11], vec![11])? .attach(true, Some(Tree::new(vec![10], vec![10])?)), ), ), ), ); tree.commit(&mut NoopCommit {}).unwrap(); let mut walker = RefWalker::new(&mut tree, PanicSource {}); let queryitems = vec![ QueryItem::Key(vec![1]), QueryItem::Key(vec![2]), QueryItem::Key(vec![3]), QueryItem::Key(vec![4]), ]; let (proof, absence) = walker .create_proof(queryitems.as_slice()) .expect("create_proof errored"); let mut iter = proof.iter(); assert_eq!(iter.next(), Some(&Op::Push(Node::KV(vec![1], vec![1])))); assert_eq!(iter.next(), Some(&Op::Push(Node::KV(vec![2], vec![2])))); assert_eq!(iter.next(), Some(&Op::Parent)); assert_eq!(iter.next(), Some(&Op::Push(Node::KV(vec![3], vec![3])))); assert_eq!(iter.next(), Some(&Op::Push(Node::KV(vec![4], vec![4])))); assert_eq!(iter.next(), Some(&Op::Parent)); assert_eq!(iter.next(), Some(&Op::Child)); assert_eq!( iter.next(), Some(&Op::Push(Node::KVHash([ 169, 4, 73, 65, 62, 49, 160, 159, 37, 166, 195, 249, 63, 31, 23, 11, 169, 0, 24, 104, 179, 211, 218, 38, 108, 129, 117, 232, 65, 101, 194, 157 ]))) ); assert_eq!(iter.next(), Some(&Op::Parent)); assert_eq!( iter.next(), Some(&Op::Push(Node::Hash([ 148, 241, 151, 144, 247, 220, 92, 79, 70, 252, 168, 222, 27, 218, 53, 156, 0, 136, 161, 107, 83, 78, 150, 246, 51, 230, 164, 248, 17, 30, 147, 91 ]))) ); assert_eq!(iter.next(), Some(&Op::Child)); assert!(iter.next().is_none()); assert_eq!(absence, (false, false)); let mut bytes = vec![]; encode_into(proof.iter(), &mut bytes); assert_eq!( bytes, vec![ 3, 0, 1, 1, 0, 1, 1, 3, 0, 1, 2, 0, 1, 2, 16, 3, 0, 1, 3, 0, 1, 3, 3, 0, 1, 4, 0, 1, 4, 16, 17, 2, 169, 4, 73, 65, 62, 49, 160, 159, 37, 166, 195, 249, 63, 31, 23, 11, 169, 0, 24, 104, 179, 211, 218, 38, 108, 129, 117, 232, 65, 101, 194, 157, 16, 1, 148, 241, 151, 144, 247, 220, 92, 79, 70, 252, 168, 222, 27, 218, 53, 156, 0, 136, 161, 107, 83, 78, 150, 246, 51, 230, 164, 248, 17, 30, 147, 91, 17 ] ); let mut bytes = vec![]; encode_into(proof.iter(), &mut bytes); let mut query = Query::new(); for item in queryitems { query.insert_item(item); } let res = verify_query(bytes.as_slice(), &query, tree.hash()).unwrap(); assert_eq!( res, vec![ (vec![1], vec![1]), (vec![2], vec![2]), (vec![3], vec![3]), (vec![4], vec![4]), ] ); Ok(()) } #[test] fn query_item_cmp() { assert!(QueryItem::Key(vec![10]) < QueryItem::Key(vec![20])); assert!(QueryItem::Key(vec![10]) == QueryItem::Key(vec![10])); assert!(QueryItem::Key(vec![20]) > QueryItem::Key(vec![10])); assert!(QueryItem::Key(vec![10]) < QueryItem::Range(vec![20]..vec![30])); assert!(QueryItem::Key(vec![10]) == QueryItem::Range(vec![10]..vec![20])); assert!(QueryItem::Key(vec![15]) == QueryItem::Range(vec![10]..vec![20])); assert!(QueryItem::Key(vec![20]) > QueryItem::Range(vec![10]..vec![20])); assert!(QueryItem::Key(vec![20]) == QueryItem::RangeInclusive(vec![10]..=vec![20])); assert!(QueryItem::Key(vec![30]) > QueryItem::Range(vec![10]..vec![20])); assert!(QueryItem::Range(vec![10]..vec![20]) < QueryItem::Range(vec![30]..vec![40])); assert!(QueryItem::Range(vec![10]..vec![20]) < QueryItem::Range(vec![20]..vec![30])); assert!( QueryItem::RangeInclusive(vec![10]..=vec![20]) == QueryItem::Range(vec![20]..vec![30]) ); assert!(QueryItem::Range(vec![15]..vec![25]) == QueryItem::Range(vec![20]..vec![30])); assert!(QueryItem::Range(vec![20]..vec![30]) > QueryItem::Range(vec![10]..vec![20])); } #[test] fn query_item_merge() { let mine = QueryItem::Range(vec![10]..vec![30]); let other = QueryItem::Range(vec![15]..vec![20]); assert_eq!(mine.merge(other), QueryItem::Range(vec![10]..vec![30])); let mine = QueryItem::RangeInclusive(vec![10]..=vec![30]); let other = QueryItem::Range(vec![20]..vec![30]); assert_eq!( mine.merge(other), QueryItem::RangeInclusive(vec![10]..=vec![30]) ); let mine = QueryItem::Key(vec![5]); let other = QueryItem::Range(vec![1]..vec![10]); assert_eq!(mine.merge(other), QueryItem::Range(vec![1]..vec![10])); let mine = QueryItem::Key(vec![10]); let other = QueryItem::RangeInclusive(vec![1]..=vec![10]); assert_eq!( mine.merge(other), QueryItem::RangeInclusive(vec![1]..=vec![10]) ); } #[test] fn query_insert() { let mut query = Query::new(); query.insert_key(vec![2]); query.insert_range(vec![3]..vec![5]); query.insert_range_inclusive(vec![5]..=vec![7]); query.insert_range(vec![4]..vec![6]); query.insert_key(vec![5]); let mut iter = query.items.iter(); assert_eq!(format!("{:?}", iter.next()), "Some(Key([2]))"); assert_eq!( format!("{:?}", iter.next()), "Some(RangeInclusive([3]..=[7]))" ); assert_eq!(iter.next(), None); } #[test] fn range_proof() { let mut tree = make_tree_seq(10); let mut walker = RefWalker::new(&mut tree, PanicSource {}); let queryitems = vec![QueryItem::Range( vec![0, 0, 0, 0, 0, 0, 0, 5]..vec![0, 0, 0, 0, 0, 0, 0, 7], )]; let (proof, absence) = walker .create_proof(queryitems.as_slice()) .expect("create_proof errored"); let mut iter = proof.iter(); assert_eq!( iter.next(), Some(&Op::Push(Node::Hash([ 131, 182, 249, 107, 5, 43, 253, 172, 175, 5, 92, 100, 112, 7, 61, 179, 216, 127, 180, 104, 127, 239, 76, 175, 20, 208, 82, 101, 163, 177, 107, 229 ]))) ); assert_eq!( iter.next(), Some(&Op::Push(Node::KVHash([ 252, 83, 231, 211, 74, 65, 100, 80, 251, 110, 182, 76, 90, 44, 213, 30, 241, 239, 2, 5, 216, 202, 184, 130, 47, 53, 146, 68, 179, 22, 45, 30 ]))) ); assert_eq!(iter.next(), Some(&Op::Parent)); assert_eq!( iter.next(), Some(&Op::Push(Node::Hash([ 71, 142, 184, 184, 188, 130, 2, 241, 17, 17, 179, 82, 112, 27, 31, 20, 92, 69, 145, 176, 112, 235, 30, 16, 54, 157, 64, 114, 154, 54, 63, 253 ]))) ); assert_eq!( iter.next(), Some(&Op::Push(Node::KV( vec![0, 0, 0, 0, 0, 0, 0, 5], vec![123; 60] ))) ); assert_eq!(iter.next(), Some(&Op::Parent)); assert_eq!( iter.next(), Some(&Op::Push(Node::KV( vec![0, 0, 0, 0, 0, 0, 0, 6], vec![123; 60] ))) ); assert_eq!(iter.next(), Some(&Op::Child)); assert_eq!( iter.next(), Some(&Op::Push(Node::KV( vec![0, 0, 0, 0, 0, 0, 0, 7], vec![123; 60] ))) ); assert_eq!(iter.next(), Some(&Op::Parent)); assert_eq!( iter.next(), Some(&Op::Push(Node::Hash([ 150, 100, 68, 82, 53, 2, 5, 199, 230, 152, 77, 216, 114, 30, 205, 210, 226, 140, 161, 62, 235, 10, 116, 142, 115, 201, 56, 218, 44, 151, 86, 154 ]))) ); assert_eq!(iter.next(), Some(&Op::Child)); assert_eq!(iter.next(), Some(&Op::Child)); assert!(iter.next().is_none()); assert_eq!(absence, (false, false)); let mut bytes = vec![]; encode_into(proof.iter(), &mut bytes); let mut query = Query::new(); for item in queryitems { query.insert_item(item); } let res = verify_query(bytes.as_slice(), &query, tree.hash()).unwrap(); assert_eq!( res, vec![ (vec![0, 0, 0, 0, 0, 0, 0, 5], vec![123; 60]), (vec![0, 0, 0, 0, 0, 0, 0, 6], vec![123; 60]), ] ); } #[test] fn range_proof_inclusive() { let mut tree = make_tree_seq(10); let mut walker = RefWalker::new(&mut tree, PanicSource {}); let queryitems = vec![QueryItem::RangeInclusive( vec![0, 0, 0, 0, 0, 0, 0, 5]..=vec![0, 0, 0, 0, 0, 0, 0, 7], )]; let (proof, absence) = walker .create_proof(queryitems.as_slice()) .expect("create_proof errored"); let mut iter = proof.iter(); assert_eq!( iter.next(), Some(&Op::Push(Node::Hash([ 131, 182, 249, 107, 5, 43, 253, 172, 175, 5, 92, 100, 112, 7, 61, 179, 216, 127, 180, 104, 127, 239, 76, 175, 20, 208, 82, 101, 163, 177, 107, 229 ]))) ); assert_eq!( iter.next(), Some(&Op::Push(Node::KVHash([ 252, 83, 231, 211, 74, 65, 100, 80, 251, 110, 182, 76, 90, 44, 213, 30, 241, 239, 2, 5, 216, 202, 184, 130, 47, 53, 146, 68, 179, 22, 45, 30 ]))) ); assert_eq!(iter.next(), Some(&Op::Parent)); assert_eq!( iter.next(), Some(&Op::Push(Node::Hash([ 71, 142, 184, 184, 188, 130, 2, 241, 17, 17, 179, 82, 112, 27, 31, 20, 92, 69, 145, 176, 112, 235, 30, 16, 54, 157, 64, 114, 154, 54, 63, 253 ]))) ); assert_eq!( iter.next(), Some(&Op::Push(Node::KV( vec![0, 0, 0, 0, 0, 0, 0, 5], vec![123; 60] ))) ); assert_eq!(iter.next(), Some(&Op::Parent)); assert_eq!( iter.next(), Some(&Op::Push(Node::KV( vec![0, 0, 0, 0, 0, 0, 0, 6], vec![123; 60] ))) ); assert_eq!(iter.next(), Some(&Op::Child)); assert_eq!( iter.next(), Some(&Op::Push(Node::KV( vec![0, 0, 0, 0, 0, 0, 0, 7], vec![123; 60] ))) ); assert_eq!(iter.next(), Some(&Op::Parent)); assert_eq!( iter.next(), Some(&Op::Push(Node::Hash([ 150, 100, 68, 82, 53, 2, 5, 199, 230, 152, 77, 216, 114, 30, 205, 210, 226, 140, 161, 62, 235, 10, 116, 142, 115, 201, 56, 218, 44, 151, 86, 154 ]))) ); assert_eq!(iter.next(), Some(&Op::Child)); assert_eq!(iter.next(), Some(&Op::Child)); assert!(iter.next().is_none()); assert_eq!(absence, (false, false)); let mut bytes = vec![]; encode_into(proof.iter(), &mut bytes); let mut query = Query::new(); for item in queryitems { query.insert_item(item); } let res = verify_query(bytes.as_slice(), &query, tree.hash()).unwrap(); assert_eq!( res, vec![ (vec![0, 0, 0, 0, 0, 0, 0, 5], vec![123; 60]), (vec![0, 0, 0, 0, 0, 0, 0, 6], vec![123; 60]), (vec![0, 0, 0, 0, 0, 0, 0, 7], vec![123; 60]), ] ); } #[test] fn range_proof_missing_upper_bound() { let mut tree = make_tree_seq(10); let mut walker = RefWalker::new(&mut tree, PanicSource {}); let queryitems = vec![QueryItem::Range( vec![0, 0, 0, 0, 0, 0, 0, 5]..vec![0, 0, 0, 0, 0, 0, 0, 6, 5], )]; let (proof, absence) = walker .create_proof(queryitems.as_slice()) .expect("create_proof errored"); let mut iter = proof.iter(); assert_eq!( iter.next(), Some(&Op::Push(Node::Hash([ 131, 182, 249, 107, 5, 43, 253, 172, 175, 5, 92, 100, 112, 7, 61, 179, 216, 127, 180, 104, 127, 239, 76, 175, 20, 208, 82, 101, 163, 177, 107, 229 ]))) ); assert_eq!( iter.next(), Some(&Op::Push(Node::KVHash([ 252, 83, 231, 211, 74, 65, 100, 80, 251, 110, 182, 76, 90, 44, 213, 30, 241, 239, 2, 5, 216, 202, 184, 130, 47, 53, 146, 68, 179, 22, 45, 30 ]))) ); assert_eq!(iter.next(), Some(&Op::Parent)); assert_eq!( iter.next(), Some(&Op::Push(Node::Hash([ 71, 142, 184, 184, 188, 130, 2, 241, 17, 17, 179, 82, 112, 27, 31, 20, 92, 69, 145, 176, 112, 235, 30, 16, 54, 157, 64, 114, 154, 54, 63, 253 ]))) ); assert_eq!( iter.next(), Some(&Op::Push(Node::KV( vec![0, 0, 0, 0, 0, 0, 0, 5], vec![123; 60] ))) ); assert_eq!(iter.next(), Some(&Op::Parent)); assert_eq!( iter.next(), Some(&Op::Push(Node::KV( vec![0, 0, 0, 0, 0, 0, 0, 6], vec![123; 60] ))) ); assert_eq!(iter.next(), Some(&Op::Child)); assert_eq!( iter.next(), Some(&Op::Push(Node::KV( vec![0, 0, 0, 0, 0, 0, 0, 7], vec![123; 60] ))) ); assert_eq!(iter.next(), Some(&Op::Parent)); assert_eq!( iter.next(), Some(&Op::Push(Node::Hash([ 150, 100, 68, 82, 53, 2, 5, 199, 230, 152, 77, 216, 114, 30, 205, 210, 226, 140, 161, 62, 235, 10, 116, 142, 115, 201, 56, 218, 44, 151, 86, 154 ]))) ); assert_eq!(iter.next(), Some(&Op::Child)); assert_eq!(iter.next(), Some(&Op::Child)); assert!(iter.next().is_none()); assert_eq!(absence, (false, false)); let mut bytes = vec![]; encode_into(proof.iter(), &mut bytes); let mut query = Query::new(); for item in queryitems { query.insert_item(item); } let res = verify_query(bytes.as_slice(), &query, tree.hash()).unwrap(); assert_eq!( res, vec![ (vec![0, 0, 0, 0, 0, 0, 0, 5], vec![123; 60]), (vec![0, 0, 0, 0, 0, 0, 0, 6], vec![123; 60]), ] ); } #[test] fn range_proof_missing_lower_bound() { let mut tree = make_tree_seq(10); let mut walker = RefWalker::new(&mut tree, PanicSource {}); let queryitems = vec![ // 7 is not inclusive QueryItem::Range(vec![0, 0, 0, 0, 0, 0, 0, 5, 5]..vec![0, 0, 0, 0, 0, 0, 0, 7]), ]; let (proof, absence) = walker .create_proof(queryitems.as_slice()) .expect("create_proof errored"); let mut iter = proof.iter(); assert_eq!( iter.next(), Some(&Op::Push(Node::Hash([ 131, 182, 249, 107, 5, 43, 253, 172, 175, 5, 92, 100, 112, 7, 61, 179, 216, 127, 180, 104, 127, 239, 76, 175, 20, 208, 82, 101, 163, 177, 107, 229 ]))) ); assert_eq!( iter.next(), Some(&Op::Push(Node::KVHash([ 252, 83, 231, 211, 74, 65, 100, 80, 251, 110, 182, 76, 90, 44, 213, 30, 241, 239, 2, 5, 216, 202, 184, 130, 47, 53, 146, 68, 179, 22, 45, 30 ]))) ); assert_eq!(iter.next(), Some(&Op::Parent)); assert_eq!( iter.next(), Some(&Op::Push(Node::Hash([ 71, 142, 184, 184, 188, 130, 2, 241, 17, 17, 179, 82, 112, 27, 31, 20, 92, 69, 145, 176, 112, 235, 30, 16, 54, 157, 64, 114, 154, 54, 63, 253 ]))) ); assert_eq!( iter.next(), Some(&Op::Push(Node::KV( vec![0, 0, 0, 0, 0, 0, 0, 5], vec![123; 60] ))) ); assert_eq!(iter.next(), Some(&Op::Parent)); assert_eq!( iter.next(), Some(&Op::Push(Node::KV( vec![0, 0, 0, 0, 0, 0, 0, 6], vec![123; 60] ))) ); assert_eq!(iter.next(), Some(&Op::Child)); assert_eq!( iter.next(), Some(&Op::Push(Node::KV( vec![0, 0, 0, 0, 0, 0, 0, 7], vec![123; 60] ))) ); assert_eq!(iter.next(), Some(&Op::Parent)); assert_eq!( iter.next(), Some(&Op::Push(Node::Hash([ 150, 100, 68, 82, 53, 2, 5, 199, 230, 152, 77, 216, 114, 30, 205, 210, 226, 140, 161, 62, 235, 10, 116, 142, 115, 201, 56, 218, 44, 151, 86, 154 ]))) ); assert_eq!(iter.next(), Some(&Op::Child)); assert_eq!(iter.next(), Some(&Op::Child)); assert!(iter.next().is_none()); assert_eq!(absence, (false, false)); let mut bytes = vec![]; encode_into(proof.iter(), &mut bytes); let mut query = Query::new(); for item in queryitems { query.insert_item(item); } let res = verify_query(bytes.as_slice(), &query, tree.hash()).unwrap(); assert_eq!(res, vec![(vec![0, 0, 0, 0, 0, 0, 0, 6], vec![123; 60]),]); } #[test] fn query_from_vec() { let queryitems = vec![QueryItem::Range( vec![0, 0, 0, 0, 0, 0, 0, 5, 5]..vec![0, 0, 0, 0, 0, 0, 0, 7], )]; let query = Query::from(queryitems); let mut expected = BTreeSet::new(); expected.insert(QueryItem::Range( vec![0, 0, 0, 0, 0, 0, 0, 5, 5]..vec![0, 0, 0, 0, 0, 0, 0, 7], )); assert_eq!(query.items, expected); } #[test] fn query_into_vec() { let mut query = Query::new(); query.insert_item(QueryItem::Range( vec![0, 0, 0, 0, 0, 0, 5, 5]..vec![0, 0, 0, 0, 0, 0, 0, 7], )); let query_vec: Vec = query.into(); let expected = vec![QueryItem::Range( vec![0, 0, 0, 0, 0, 0, 5, 5]..vec![0, 0, 0, 0, 0, 0, 0, 7], )]; assert_eq!( query_vec.get(0).unwrap().lower_bound(), expected.get(0).unwrap().lower_bound() ); assert_eq!( query_vec.get(0).unwrap().upper_bound(), expected.get(0).unwrap().upper_bound() ); } #[test] fn query_item_from_vec_u8() { let queryitems: Vec = vec![42]; let query = QueryItem::from(queryitems); let expected = QueryItem::Key(vec![42]); assert_eq!(query, expected); } #[test] fn verify_ops() -> Result<()> { let mut tree = Tree::new(vec![5], vec![5])?; tree.commit(&mut NoopCommit {}).expect("commit failed"); let root_hash = tree.hash(); let mut walker = RefWalker::new(&mut tree, PanicSource {}); let (proof, _) = walker .create_proof(vec![QueryItem::Key(vec![5])].as_slice()) .expect("failed to create proof"); let mut bytes = vec![]; encode_into(proof.iter(), &mut bytes); let map = verify(&bytes, root_hash).unwrap(); assert_eq!( map.get(vec![5].as_slice()).unwrap().unwrap(), vec![5].as_slice() ); Ok(()) } #[test] #[should_panic(expected = "verify failed")] fn verify_ops_mismatched_hash() { let mut tree = Tree::new(vec![5], vec![5]).expect("tree construction failed"); tree.commit(&mut NoopCommit {}).expect("commit failed"); let mut walker = RefWalker::new(&mut tree, PanicSource {}); let (proof, _) = walker .create_proof(vec![QueryItem::Key(vec![5])].as_slice()) .expect("failed to create proof"); let mut bytes = vec![]; encode_into(proof.iter(), &mut bytes); let _map = verify(&bytes, [42; 32]).expect("verify failed"); } #[test] #[should_panic(expected = "verify failed")] fn verify_query_mismatched_hash() { let mut tree = make_3_node_tree().expect("tree construction failed"); let mut walker = RefWalker::new(&mut tree, PanicSource {}); let keys = vec![vec![5], vec![7]]; let (proof, _) = walker .create_proof( keys.clone() .into_iter() .map(QueryItem::Key) .collect::>() .as_slice(), ) .expect("failed to create proof"); let mut bytes = vec![]; encode_into(proof.iter(), &mut bytes); let mut query = Query::new(); for key in keys.iter() { query.insert_key(key.clone()); } let _result = verify_query(bytes.as_slice(), &query, [42; 32]).expect("verify failed"); } #[test] #[should_panic(expected = "Tried to attach to Hash node")] fn hash_attach() { let mut target = make_3_node_tree().expect("tree construction failed"); let mut proof = Vec::new(); proof.push(Op::Push(Node::KV(vec![42], vec![42]))); proof.push(Op::Push(Node::Hash(target.hash()))); proof.push(Op::Parent); let map = verify(&proof.encode().unwrap(), target.hash()).unwrap(); assert_eq!(map.get(&[42]).unwrap().unwrap(), &[42]) } } ================================================ FILE: src/proofs/tree.rs ================================================ use super::{Node, Op}; use crate::error::{Error, Result}; use crate::tree::{kv_hash, node_hash, Hash, Hasher, NULL_HASH}; /// Contains a tree's child node and its hash. The hash can always be assumed to /// be up-to-date. #[derive(Debug)] pub struct Child { /// The child node. pub tree: Box, /// The hash of the child node. pub hash: Hash, } /// A binary tree data structure used to represent a select subset of a tree /// when verifying Merkle proofs. #[derive(Debug)] pub struct Tree { /// The node at the root of this tree. pub node: Node, /// The left child of this tree. pub left: Option, /// The right child of this tree. pub right: Option, /// The height of this tree. pub height: usize, } impl From for Tree { /// Creates a childless tree with the target node as the `node` field. fn from(node: Node) -> Self { Tree { node, left: None, right: None, height: 1, } } } impl PartialEq for Tree { /// Checks equality for the root hashes of the two trees. fn eq(&self, other: &Self) -> bool { self.hash() .and_then(|this_hash| other.hash().map(|other_hash| this_hash == other_hash)) .unwrap_or_default() } } impl Tree { /// Gets or computes the hash for this tree node. pub fn hash(&self) -> Result { fn compute_hash(tree: &Tree, kv_hash: Hash) -> Hash { node_hash::(&kv_hash, &tree.child_hash(true), &tree.child_hash(false)) } match &self.node { Node::Hash(hash) => Ok(*hash), Node::KVHash(kv_hash) => Ok(compute_hash(self, *kv_hash)), Node::KV(key, value) => kv_hash::(key.as_slice(), value.as_slice()) .map(|kv_hash| compute_hash(self, kv_hash)) .map_err(Into::into), } } /// Creates an iterator that yields the in-order traversal of the nodes at /// the given depth. pub fn layer(&self, depth: usize) -> LayerIter { LayerIter::new(self, depth) } /// Consumes the `Tree` and does an in-order traversal over all the nodes in /// the tree, calling `visit_node` for each. pub fn visit_nodes(mut self, visit_node: &mut F) { if let Some(child) = self.left.take() { child.tree.visit_nodes(visit_node); } let maybe_right_child = self.right.take(); visit_node(self.node); if let Some(child) = maybe_right_child { child.tree.visit_nodes(visit_node); } } /// Does an in-order traversal over references to all the nodes in the tree, /// calling `visit_node` for each. pub fn visit_refs(&self, visit_node: &mut F) { if let Some(child) = &self.left { child.tree.visit_refs(visit_node); } visit_node(self); if let Some(child) = &self.right { child.tree.visit_refs(visit_node); } } /// Returns an immutable reference to the child on the given side, if any. pub fn child(&self, left: bool) -> Option<&Child> { if left { self.left.as_ref() } else { self.right.as_ref() } } /// Returns a mutable reference to the child on the given side, if any. pub(crate) fn child_mut(&mut self, left: bool) -> &mut Option { if left { &mut self.left } else { &mut self.right } } /// Attaches the child to the `Tree`'s given side. Returns an error if /// there is already a child attached to this side. pub(crate) fn attach(&mut self, left: bool, child: Tree) -> Result<()> { if self.child(left).is_some() { return Err(Error::Attach( "Tried to attach to left child, but it is already Some".into(), )); } if let Node::Hash(_) = self.node { return Err(Error::Attach("Tried to attach to Hash node".into())); } self.height = self.height.max(child.height + 1); let hash = child.hash()?; let tree = Box::new(child); *self.child_mut(left) = Some(Child { tree, hash }); Ok(()) } /// Returns the already-computed hash for this tree node's child on the /// given side, if any. If there is no child, returns the null hash /// (zero-filled). #[inline] fn child_hash(&self, left: bool) -> Hash { self.child(left).map_or(NULL_HASH, |c| c.hash) } /// Consumes the tree node, calculates its hash, and returns a `Node::Hash` /// variant. fn try_into_hash(self) -> Result { self.hash().map(Node::Hash).map(Into::into) } #[cfg(feature = "full")] pub(crate) fn key(&self) -> &[u8] { match self.node { Node::KV(ref key, _) => key, _ => panic!("Expected node to be type KV"), } } } /// `LayerIter` iterates over the nodes in a `Tree` at a given depth. Nodes are /// visited in order. pub struct LayerIter<'a> { stack: Vec<&'a Tree>, depth: usize, } impl<'a> LayerIter<'a> { /// Creates a new `LayerIter` that iterates over `tree` at the given depth. fn new(tree: &'a Tree, depth: usize) -> Self { let mut iter = LayerIter { stack: Vec::with_capacity(depth), depth, }; iter.traverse_to_start(tree, depth); iter } /// Builds up the stack by traversing through left children to the desired /// depth. fn traverse_to_start(&mut self, tree: &'a Tree, remaining_depth: usize) { self.stack.push(tree); if remaining_depth == 0 { return; } if let Some(child) = tree.child(true) { self.traverse_to_start(&child.tree, remaining_depth - 1) } else { panic!("Could not traverse to given layer") } } } impl<'a> Iterator for LayerIter<'a> { type Item = &'a Tree; fn next(&mut self) -> Option { let item = self.stack.pop(); let mut popped = item; loop { if self.stack.is_empty() { return item; } let parent = self.stack.last().unwrap(); let left_child = parent.child(true).unwrap(); let right_child = parent.child(false).unwrap(); if left_child.tree.as_ref() == popped.unwrap() { self.stack.push(&right_child.tree); while self.stack.len() - 1 < self.depth { let parent = self.stack.last().unwrap(); let left_child = parent.child(true).unwrap(); self.stack.push(&left_child.tree); } return item; } else { popped = self.stack.pop(); } } } } /// Executes a proof by stepping through its operators, modifying the /// verification stack as it goes. The resulting stack item is returned. /// /// If the `collapse` option is set to `true`, nodes will be hashed and pruned /// from memory during execution. This results in the minimum amount of memory /// usage, and the returned `Tree` will only contain a single node of type /// `Node::Hash`. If `false`, the returned `Tree` will contain the entire /// subtree contained in the proof. /// /// `visit_node` will be called once for every push operation in the proof, in /// key-order. If `visit_node` returns an `Err` result, it will halt the /// execution and `execute` will return the error. pub(crate) fn execute(ops: I, collapse: bool, mut visit_node: F) -> Result where I: IntoIterator>, F: FnMut(&Node) -> Result<()>, { let mut stack: Vec = Vec::with_capacity(32); let mut maybe_last_key = None; fn try_pop(stack: &mut Vec) -> Result { match stack.pop() { None => Err(Error::StackUnderflow), Some(tree) => Ok(tree), } } for op in ops { match op? { Op::Parent => { let (mut parent, child) = (try_pop(&mut stack)?, try_pop(&mut stack)?); parent.attach( true, if collapse { child.try_into_hash()? } else { child }, )?; stack.push(parent); } Op::Child => { let (child, mut parent) = (try_pop(&mut stack)?, try_pop(&mut stack)?); parent.attach( false, if collapse { child.try_into_hash()? } else { child }, )?; stack.push(parent); } Op::Push(node) => { if let Node::KV(key, _) = &node { // keys should always increase if let Some(last_key) = &maybe_last_key { if key <= last_key { return Err(Error::Key("Incorrect key ordering".into())); } } maybe_last_key = Some(key.clone()); } visit_node(&node)?; let tree: Tree = node.into(); stack.push(tree); } } } if stack.len() != 1 { return Err(Error::Proof( "Expected proof to result in exactly on stack item".into(), )); } Ok(stack.pop().unwrap()) } #[cfg(test)] mod test { use super::super::*; use super::Tree as ProofTree; use super::*; fn make_7_node_prooftree() -> ProofTree { let make_node = |i| -> super::super::tree::Tree { Node::KV(vec![i], vec![]).into() }; let mut tree = make_node(3); let mut left = make_node(1); left.attach(true, make_node(0)).unwrap(); left.attach(false, make_node(2)).unwrap(); let mut right = make_node(5); right.attach(true, make_node(4)).unwrap(); right.attach(false, make_node(6)).unwrap(); tree.attach(true, left).unwrap(); tree.attach(false, right).unwrap(); tree } #[test] fn height_counting() { fn recurse(tree: &super::Tree, expected_height: usize) { assert_eq!(tree.height, expected_height); tree.left .as_ref() .into_iter() .for_each(|l| recurse(&l.tree, expected_height - 1)); tree.right .as_ref() .into_iter() .for_each(|r| recurse(&r.tree, expected_height - 1)); } let tree = make_7_node_prooftree(); recurse(&tree, 3); } #[test] fn layer_iter() { let tree = make_7_node_prooftree(); let assert_node = |node: &Tree, i| match node.node { Node::KV(ref key, _) => assert_eq!(key[0], i), _ => unreachable!(), }; let mut iter = tree.layer(0); assert_node(iter.next().unwrap(), 3); assert!(iter.next().is_none()); let mut iter = tree.layer(1); assert_node(iter.next().unwrap(), 1); assert_node(iter.next().unwrap(), 5); assert!(iter.next().is_none()); let mut iter = tree.layer(2); assert_node(iter.next().unwrap(), 0); assert_node(iter.next().unwrap(), 2); assert_node(iter.next().unwrap(), 4); assert_node(iter.next().unwrap(), 6); assert!(iter.next().is_none()); } #[test] fn visit_nodes() { let tree = make_7_node_prooftree(); let assert_node = |node: Node, i| match node { Node::KV(ref key, _) => assert_eq!(key[0], i), _ => unreachable!(), }; let mut visited = vec![]; tree.visit_nodes(&mut |node| visited.push(node)); let mut iter = visited.into_iter(); for i in 0..7 { assert_node(iter.next().unwrap(), i); } assert!(iter.next().is_none()); } } ================================================ FILE: src/test_utils/crash_merk.rs ================================================ use crate::{Merk, Result}; use std::fs; use std::mem::ManuallyDrop; use std::ops::{Deref, DerefMut}; use std::path::Path; /// Wraps a Merk instance and drops it without flushing once it goes out of /// scope. pub struct CrashMerk { inner: Option>, path: Box, } impl CrashMerk { /// Opens a `CrashMerk` at the given file path, creating a new one if it /// does not exist. pub fn open>(path: P) -> Result { let merk = Merk::open(&path)?; let inner = Some(ManuallyDrop::new(merk)); Ok(CrashMerk { inner, path: path.as_ref().into(), }) } #[allow(clippy::missing_safety_doc)] pub unsafe fn crash(&mut self) -> Result<()> { ManuallyDrop::drop(&mut self.inner.take().unwrap()); // rename to invalidate rocksdb's lock let file_name = format!( "{}_crashed", self.path.file_name().unwrap().to_str().unwrap() ); let new_path = self.path.with_file_name(file_name); fs::rename(&self.path, &new_path)?; let mut new_merk = CrashMerk::open(&new_path)?; self.inner = new_merk.inner.take(); self.path = new_merk.path; Ok(()) } pub fn into_inner(self) -> Merk { ManuallyDrop::into_inner(self.inner.unwrap()) } pub fn destroy(self) -> Result<()> { self.into_inner().destroy() } } impl Deref for CrashMerk { type Target = Merk; fn deref(&self) -> &Merk { self.inner.as_ref().unwrap() } } impl DerefMut for CrashMerk { fn deref_mut(&mut self) -> &mut Merk { self.inner.as_mut().unwrap() } } #[cfg(test)] mod tests { use super::CrashMerk; use crate::Op; #[test] #[ignore] // currently this still works because we enabled the WAL fn crash() { let path = std::thread::current().name().unwrap().to_owned(); let mut merk = CrashMerk::open(path).expect("failed to open merk"); merk.apply(&[(vec![1, 2, 3], Op::Put(vec![4, 5, 6]))], &[]) .expect("apply failed"); unsafe { merk.crash().unwrap(); } assert_eq!(merk.get(&[1, 2, 3]).expect("failed to get"), None); merk.into_inner().destroy().unwrap(); } } ================================================ FILE: src/test_utils/mod.rs ================================================ #![allow(missing_docs)] mod crash_merk; mod temp_merk; use crate::tree::{Batch, BatchEntry, NoopCommit, Op, PanicSource, Tree, Walker}; use rand::prelude::*; use std::convert::TryInto; use std::ops::Range; pub use crash_merk::CrashMerk; pub use temp_merk::TempMerk; pub fn assert_tree_invariants(tree: &Tree) { assert!(tree.balance_factor().abs() < 2); let maybe_left = tree.link(true); if let Some(left) = maybe_left { assert!(left.key() < tree.key()); assert!(!left.is_modified()); } let maybe_right = tree.link(false); if let Some(right) = maybe_right { assert!(right.key() > tree.key()); assert!(!right.is_modified()); } if let Some(left) = tree.child(true) { assert_tree_invariants(left); } if let Some(right) = tree.child(false) { assert_tree_invariants(right); } } pub fn apply_memonly_unchecked(tree: Tree, batch: &Batch) -> Tree { let walker = Walker::::new(tree, PanicSource {}); let mut tree = Walker::::apply_to(Some(walker), batch, PanicSource {}) .expect("apply failed") .0 .expect("expected tree"); tree.commit(&mut NoopCommit {}).expect("commit failed"); tree } pub fn apply_memonly(tree: Tree, batch: &Batch) -> Tree { let tree = apply_memonly_unchecked(tree, batch); assert_tree_invariants(&tree); tree } pub fn apply_to_memonly(maybe_tree: Option, batch: &Batch) -> Option { let maybe_walker = maybe_tree.map(|tree| Walker::::new(tree, PanicSource {})); Walker::::apply_to(maybe_walker, batch, PanicSource {}) .expect("apply failed") .0 .map(|mut tree| { tree.commit(&mut NoopCommit {}).expect("commit failed"); println!("{:?}", &tree); assert_tree_invariants(&tree); tree }) } pub fn seq_key(n: u64) -> Vec { n.to_be_bytes().to_vec() } pub fn put_entry_value() -> Vec { vec![123; 60] } pub fn put_entry(n: u64) -> BatchEntry { (seq_key(n), Op::Put(put_entry_value())) } pub fn del_entry(n: u64) -> BatchEntry { (seq_key(n), Op::Delete) } pub fn make_batch_seq(range: Range) -> Vec { let mut batch = Vec::with_capacity((range.end - range.start).try_into().unwrap()); for n in range { batch.push(put_entry(n)); } batch } pub fn make_del_batch_seq(range: Range) -> Vec { let mut batch = Vec::with_capacity((range.end - range.start).try_into().unwrap()); for n in range { batch.push(del_entry(n)); } batch } pub fn make_batch_rand(size: u64, seed: u64) -> Vec { let mut rng: SmallRng = SeedableRng::seed_from_u64(seed); let mut batch = Vec::with_capacity(size.try_into().unwrap()); for _ in 0..size { let n = rng.gen::(); batch.push(put_entry(n)); } batch.sort_by(|a, b| a.0.cmp(&b.0)); batch } pub fn make_del_batch_rand(size: u64, seed: u64) -> Vec { let mut rng: SmallRng = SeedableRng::seed_from_u64(seed); let mut batch = Vec::with_capacity(size.try_into().unwrap()); for _ in 0..size { let n = rng.gen::(); batch.push(del_entry(n)); } batch.sort_by(|a, b| a.0.cmp(&b.0)); batch } pub fn make_tree_rand(node_count: u64, batch_size: u64, initial_seed: u64) -> Tree { assert!(node_count >= batch_size); assert!((node_count % batch_size) == 0); let value = vec![123; 60]; let mut tree = Tree::new(vec![0; 20], value).expect("Tree construction failed"); let mut seed = initial_seed; let batch_count = node_count / batch_size; for _ in 0..batch_count { let batch = make_batch_rand(batch_size, seed); tree = apply_memonly(tree, &batch); seed += 1; } tree } pub fn make_tree_seq(node_count: u64) -> Tree { let batch_size = if node_count >= 10_000 { assert!(node_count % 10_000 == 0); 10_000 } else { node_count }; let value = vec![123; 60]; let mut tree = Tree::new(vec![0; 20], value).expect("Tree construction failed"); let batch_count = node_count / batch_size; for i in 0..batch_count { let batch = make_batch_seq((i * batch_size)..((i + 1) * batch_size)); tree = apply_memonly(tree, &batch); } tree } ================================================ FILE: src/test_utils/temp_merk.rs ================================================ use crate::{Merk, Result}; use std::env::temp_dir; use std::ops::{Deref, DerefMut}; use std::path::{Path, PathBuf}; use std::time::SystemTime; /// Wraps a Merk instance and deletes it from disk it once it goes out of scope. pub struct TempMerk { inner: Option, } impl TempMerk { /// Opens a `TempMerk` at the given file path, creating a new one if it does /// not exist. pub fn open>(path: P) -> Result { let inner = Some(Merk::open(path)?); Ok(TempMerk { inner }) } /// Opens a `TempMerk` at an autogenerated, temporary file path. pub fn new() -> Result { TempMerk::open(Self::create_path()) } pub fn create_path() -> PathBuf { let time = SystemTime::now() .duration_since(SystemTime::UNIX_EPOCH) .unwrap() .as_nanos(); let mut path = temp_dir(); path.push(format!("merk-temp–{time}")); path } } impl Drop for TempMerk { fn drop(&mut self) { self.inner .take() .unwrap() .destroy() .expect("failed to delete db"); } } impl Deref for TempMerk { type Target = Merk; fn deref(&self) -> &Merk { self.inner.as_ref().unwrap() } } impl DerefMut for TempMerk { fn deref_mut(&mut self) -> &mut Merk { self.inner.as_mut().unwrap() } } ================================================ FILE: src/tree/commit.rs ================================================ use super::Tree; use crate::error::Result; /// To be used when committing a tree (writing it to a store after applying the /// changes). pub trait Commit { /// Called once per updated node when a finalized tree is to be written to a /// backing store or cache. fn write(&mut self, tree: &Tree) -> Result<()>; /// Called once per node after writing a node and its children. The returned /// tuple specifies whether or not to prune the left and right child nodes, /// respectively. For example, returning `(true, true)` will prune both /// nodes, removing them from memory. fn prune(&self, _tree: &Tree) -> (bool, bool) { (true, true) } } /// A `Commit` implementation which does not write to a store and does not prune /// any nodes from the Tree. Useful when only keeping a tree in memory. pub struct NoopCommit {} impl Commit for NoopCommit { fn write(&mut self, _tree: &Tree) -> Result<()> { Ok(()) } fn prune(&self, _tree: &Tree) -> (bool, bool) { (false, false) } } ================================================ FILE: src/tree/debug.rs ================================================ use super::{Link, Tree}; use colored::Colorize; use std::fmt::{Debug, Formatter, Result}; impl Debug for Tree { // TODO: unwraps should be results that bubble up fn fmt(&self, f: &mut Formatter) -> Result { fn traverse( f: &mut Formatter, cursor: &Tree, stack: &mut Vec<(Vec, Vec)>, left: bool, ) { if let Some(child_link) = cursor.link(true) { stack.push((child_link.key().to_vec(), cursor.key().to_vec())); if let Some(child_tree) = child_link.tree() { traverse(f, child_tree, stack, true); } else { traverse_pruned(f, child_link, stack, true); } stack.pop(); } let depth = stack.len(); if depth > 0 { // draw ancestor's vertical lines for (low, high) in stack.iter().take(depth - 1) { let draw_line = cursor.key() > low && cursor.key() < high; write!(f, "{}", if draw_line { " │ " } else { " " }.dimmed()).unwrap(); } } let prefix = if depth == 0 { "" } else if left { " ┌-" } else { " └-" }; writeln!( f, "{}{}", prefix.dimmed(), format!("{:?}", cursor.key()).on_bright_black() ) .unwrap(); if let Some(child_link) = cursor.link(false) { stack.push((cursor.key().to_vec(), child_link.key().to_vec())); if let Some(child_tree) = child_link.tree() { traverse(f, child_tree, stack, false); } else { traverse_pruned(f, child_link, stack, false); } stack.pop(); } } fn traverse_pruned( f: &mut Formatter, link: &Link, stack: &mut [(Vec, Vec)], left: bool, ) { let depth = stack.len(); if depth > 0 { // draw ancestor's vertical lines for (low, high) in stack.iter().take(depth - 1) { let draw_line = link.key() > low && link.key() < high; write!(f, "{}", if draw_line { " │ " } else { " " }.dimmed()).unwrap(); } } let prefix = if depth == 0 { "" } else if left { " ┌-" } else { " └-" }; writeln!( f, "{}{}", prefix.dimmed(), format!("{:?}", link.key()).blue() ) .unwrap(); } let mut stack = vec![]; traverse(f, self, &mut stack, false); writeln!(f) } } ================================================ FILE: src/tree/encoding.rs ================================================ use std::io::Read; use crate::Result; use super::{kv::KV, Link, Tree, TreeInner}; use ed::{Decode, Encode}; impl Tree { #[inline] pub fn encode(&self) -> Vec { // operation is infallible so it's ok to unwrap Encode::encode(self).unwrap() } #[inline] pub fn encode_into(&self, dest: &mut Vec) { // operation is infallible so it's ok to unwrap Encode::encode_into(self, dest).unwrap() } #[inline] pub fn encoding_length(&self) -> usize { // operation is infallible so it's ok to unwrap Encode::encoding_length(self).unwrap() } #[inline] pub fn decode_into(&mut self, key: Vec, input: &[u8]) { // operation is infallible so it's ok to unwrap Decode::decode_into(self, input).unwrap(); self.inner.kv.key = key; } #[inline] pub fn decode(key: Vec, input: &[u8]) -> Tree { // operation is infallible so it's ok to unwrap let mut tree: Tree = Decode::decode(input).unwrap(); tree.inner.kv.key = key; tree } pub fn decode_v0(mut input: R) -> Result { let mut read_link_v0 = || -> Result> { let some = bool::decode(&mut input)?; if some { let link = Link::decode_v0(&mut input)?; Ok(Some(link)) } else { Ok(None) } }; let maybe_left = read_link_v0()?; let maybe_right = read_link_v0()?; let kv = KV::decode(&mut input)?; Ok(Tree { inner: Box::new(TreeInner { left: maybe_left, right: maybe_right, kv, }), }) } } #[cfg(test)] mod tests { use super::super::Link; use super::*; use crate::error::Result; #[test] fn encode_leaf_tree() { let tree = Tree::from_fields(vec![0], vec![1], [55; 32], None, None); assert_eq!(tree.encoding_length(), 35); assert_eq!( tree.encode(), vec![ 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 1, ] ); } #[test] #[should_panic] fn encode_modified_tree() { let tree = Tree::from_fields( vec![0], vec![1], [55; 32], Some(Link::Modified { pending_writes: 1, child_heights: (123, 124), tree: Tree::new(vec![2], vec![3]).unwrap(), }), None, ); tree.encode(); } #[test] fn encode_loaded_tree() -> Result<()> { let tree = Tree::from_fields( vec![0], vec![1], [55; 32], Some(Link::Loaded { hash: [66; 32], child_heights: (123, 124), tree: Tree::new(vec![2], vec![3])?, }), None, ); assert_eq!( tree.encode(), vec![ 1, 0, 1, 2, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 123, 124, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 1 ] ); Ok(()) } #[test] fn encode_uncommitted_tree() -> Result<()> { let tree = Tree::from_fields( vec![0], vec![1], [55; 32], Some(Link::Uncommitted { hash: [66; 32], child_heights: (123, 124), tree: Tree::new(vec![2], vec![3])?, }), None, ); assert_eq!( tree.encode(), vec![ 1, 0, 1, 2, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 123, 124, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 1 ] ); Ok(()) } #[test] fn encode_reference_tree() { let tree = Tree::from_fields( vec![0], vec![1], [55; 32], Some(Link::Reference { hash: [66; 32], child_heights: (123, 124), key: vec![2], }), None, ); assert_eq!(tree.encoding_length(), 71); assert_eq!( tree.encode(), vec![ 1, 0, 1, 2, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 123, 124, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 1 ] ); } #[test] fn decode_leaf_tree() { let bytes = vec![ 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ]; let tree = Tree::decode(vec![0], bytes.as_slice()); assert_eq!(tree.key(), &[0]); assert_eq!(tree.value(), &[1]); } #[test] fn decode_reference_tree() { let bytes = vec![ 1, 0, 1, 2, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 123, 124, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 1, ]; let tree = Tree::decode(vec![0], bytes.as_slice()); assert_eq!(tree.key(), &[0]); assert_eq!(tree.value(), &[1]); if let Some(Link::Reference { key, child_heights, hash, }) = tree.link(true) { assert_eq!(*key, [2]); assert_eq!(*child_heights, (123_u8, 124_u8)); assert_eq!(*hash, [66_u8; 32]); } else { panic!("Expected Link::Reference"); } } } ================================================ FILE: src/tree/fuzz_tests.rs ================================================ #![cfg(test)] use crate::test_utils::*; use crate::tree::*; use rand::prelude::*; use std::cell::RefCell; use std::collections::BTreeMap; use std::iter::FromIterator; const ITERATIONS: usize = 2_000; type Map = BTreeMap, Vec>; #[test] fn fuzz() { let mut rng = thread_rng(); for _ in 0..ITERATIONS { let seed = rng.gen::(); fuzz_case(seed); } } #[test] fn fuzz_17391518417409062786() { fuzz_case(17391518417409062786); } #[test] fn fuzz_396148930387069749() { fuzz_case(396148930387069749); } fn fuzz_case(seed: u64) { let mut rng: SmallRng = SeedableRng::seed_from_u64(seed); let initial_size = (rng.gen::() % 10) + 1; let tree = make_tree_rand(initial_size, initial_size, seed); let mut map = Map::from_iter(tree.iter()); let mut maybe_tree = Some(tree); println!("====== MERK FUZZ ======"); println!("SEED: {}", seed); println!("{:?}", maybe_tree.as_ref().unwrap()); for j in 0..3 { let batch_size = (rng.gen::() % 3) + 1; let batch = make_batch(maybe_tree.as_ref(), batch_size, rng.gen::()); println!("BATCH {}", j); println!("{:?}", batch); maybe_tree = apply_to_memonly(maybe_tree, &batch); apply_to_map(&mut map, &batch); assert_map(maybe_tree.as_ref(), &map); if let Some(tree) = &maybe_tree { println!("{:?}", &tree); } else { println!("(Empty tree)"); } } } fn make_batch(maybe_tree: Option<&Tree>, size: u64, seed: u64) -> Vec { let rng: RefCell = RefCell::new(SeedableRng::seed_from_u64(seed)); let mut batch = Vec::with_capacity(size as usize); let get_random_key = || { let tree = maybe_tree.as_ref().unwrap(); let entries: Vec<_> = tree.iter().collect(); let index = rng.borrow_mut().gen::() as usize % entries.len(); entries[index].0.clone() }; let random_value = |size| { let mut value = vec![0; size]; rng.borrow_mut().fill_bytes(&mut value[..]); value }; let insert = || (random_value(2), Op::Put(random_value(2))); let update = || { let key = get_random_key(); (key.to_vec(), Op::Put(random_value(2))) }; let delete = || { let key = get_random_key(); (key.to_vec(), Op::Delete) }; for _ in 0..size { let entry = if maybe_tree.is_some() { let kind = rng.borrow_mut().gen::() % 3; if kind == 0 { insert() } else if kind == 1 { update() } else { delete() } } else { insert() }; batch.push(entry); } batch.sort_by(|a, b| a.0.cmp(&b.0)); // remove dupes let mut maybe_prev_key: Option> = None; let mut deduped_batch = Vec::with_capacity(batch.len()); for entry in batch { if let Some(prev_key) = &maybe_prev_key { if *prev_key == entry.0 { continue; } } maybe_prev_key = Some(entry.0.clone()); deduped_batch.push(entry); } deduped_batch } fn apply_to_map(map: &mut Map, batch: &Batch) { for entry in batch.iter() { match entry { (key, Op::Put(value)) => { map.insert(key.to_vec(), value.to_vec()); } (key, Op::Delete) => { map.remove(key); } } } } fn assert_map(maybe_tree: Option<&Tree>, map: &Map) { if map.is_empty() { assert!(maybe_tree.is_none(), "expected tree to be None"); return; } let tree = maybe_tree.expect("expected tree to be Some"); let map_iter = map.iter(); let tree_iter = tree.iter(); for (tree_kv, map_kv) in tree_iter.zip(map_iter) { assert_eq!(tree_kv.0, *map_kv.0); assert_eq!(tree_kv.1, *map_kv.1); } assert_eq!(tree.iter().count(), map.len()); } ================================================ FILE: src/tree/hash.rs ================================================ use sha2::{Digest, Sha512_256}; use std::{convert::TryFrom, num::TryFromIntError}; /// The hash algorithm used for both KV hashes and node hashes. pub type Hasher = Sha512_256; /// The length of a `Hash` (in bytes). pub const HASH_LENGTH: usize = 32; /// A zero-filled `Hash`. pub const NULL_HASH: Hash = [0; HASH_LENGTH]; /// A cryptographic hash digest. pub type Hash = [u8; HASH_LENGTH]; /// Hashes a key/value pair. pub fn kv_hash(key: &[u8], value: &[u8]) -> Result { let mut hasher = D::new(); hasher.update([0]); u32::try_from(key.len()) .and_then(|key| u32::try_from(value.len()).map(|value| (key, value))) .map(|(key_length, val_length)| { hasher.update(key_length.to_le_bytes()); hasher.update(key); hasher.update(val_length.to_le_bytes()); hasher.update(value); let res = hasher.finalize(); let mut hash: Hash = Default::default(); hash.copy_from_slice(&res[..]); hash }) } /// Hashes a node based on the hash of its left child (if any), its key/value /// pair, and the hash of its right child (if any). pub fn node_hash(kv: &Hash, left: &Hash, right: &Hash) -> Hash { let mut hasher = D::new(); hasher.update([1]); hasher.update(left); hasher.update(kv); hasher.update(right); let res = hasher.finalize(); let mut hash: Hash = Default::default(); hash.copy_from_slice(&res[..]); hash } ================================================ FILE: src/tree/iter.rs ================================================ use super::Tree; /// An entry stored on an `Iter`'s stack, containing a reference to a `Tree`, /// and its traversal state. /// /// The `traversed` field represents whether or not the left child, self, and /// right child have been visited, respectively (`(left, self, right)`). struct StackItem<'a> { tree: &'a Tree, traversed: (bool, bool, bool), } impl<'a> StackItem<'a> { /// Creates a new `StackItem` for the given tree. The `traversed` state will /// be `false` since the children and self have not been visited yet, but /// will default to `true` for sides that do not have a child. fn new(tree: &'a Tree) -> Self { StackItem { tree, traversed: ( tree.child(true).is_none(), false, tree.child(false).is_none(), ), } } /// Gets a tuple to yield from an `Iter`, `(key, value)`. fn to_entry(&self) -> (Vec, Vec) { (self.tree.key().to_vec(), self.tree.value().to_vec()) } } /// An iterator which yields the key/value pairs of the tree, in order, skipping /// any parts of the tree which are pruned (not currently retained in memory). pub struct Iter<'a> { stack: Vec>, } impl<'a> Iter<'a> { /// Creates a new iterator for the given tree. pub fn new(tree: &'a Tree) -> Self { let stack = vec![StackItem::new(tree)]; Iter { stack } } } impl<'a> Tree { /// Creates an iterator which yields `(key, value)` tuples for all of the /// tree's nodes which are retained in memory (skipping pruned subtrees). pub fn iter(&'a self) -> Iter<'a> { Iter::new(self) } } impl<'a> Iterator for Iter<'a> { type Item = (Vec, Vec); /// Traverses to and yields the next key/value pair, in key order. fn next(&mut self) -> Option { if self.stack.is_empty() { return None; } let last = self.stack.last_mut().unwrap(); if !last.traversed.0 { last.traversed.0 = true; let tree = last.tree.child(true).unwrap(); self.stack.push(StackItem::new(tree)); self.next() } else if !last.traversed.1 { last.traversed.1 = true; Some(last.to_entry()) } else if !last.traversed.2 { last.traversed.2 = true; let tree = last.tree.child(false).unwrap(); self.stack.push(StackItem::new(tree)); self.next() } else { self.stack.pop(); self.next() } } } ================================================ FILE: src/tree/kv.rs ================================================ use super::hash::{kv_hash, Hash, Hasher, HASH_LENGTH, NULL_HASH}; use ed::{Decode, Encode, Result}; use std::{ io::{Read, Write}, num::TryFromIntError, }; // TODO: maybe use something similar to Vec but without capacity field, // (should save 16 bytes per entry). also, maybe a shorter length // field to save even more. also might be possible to combine key // field and value field. /// Contains a key/value pair, and the hash of the key/value pair. #[derive(Clone, Debug, PartialEq, Eq)] pub struct KV { pub(super) key: Vec, pub(super) value: Vec, pub(super) hash: Hash, } impl KV { /// Creates a new `KV` with the given key and value and computes its hash. #[inline] pub fn new(key: Vec, value: Vec) -> std::result::Result { kv_hash::(key.as_slice(), value.as_slice()).map(|hash| KV { key, value, hash }) } /// Creates a new `KV` with the given key, value, and hash. The hash is not /// checked to be correct for the given key/value. #[inline] pub fn from_fields(key: Vec, value: Vec, hash: Hash) -> Self { KV { key, value, hash } } /// Replaces the `KV`'s value with the given value, updates the hash, and /// returns the modified `KV`. #[inline] pub fn with_value(mut self, value: Vec) -> std::result::Result { self.value = value; self.hash = kv_hash::(self.key(), self.value())?; Ok(self) } /// Returns the key as a slice. #[inline] pub fn key(&self) -> &[u8] { self.key.as_slice() } /// Returns the value as a slice. #[inline] pub fn value(&self) -> &[u8] { self.value.as_slice() } /// Returns the hash. #[inline] pub fn hash(&self) -> &Hash { &self.hash } /// Consumes the `KV` and returns its key without allocating or cloning. #[inline] pub fn take_key(self) -> Vec { self.key } } impl Encode for KV { #[inline] fn encode_into(&self, out: &mut W) -> Result<()> { out.write_all(&self.hash[..])?; out.write_all(self.value.as_slice())?; Ok(()) } #[inline] fn encoding_length(&self) -> Result { debug_assert!( self.key().len() < 65536, "Key length must be less than 65536" ); Ok(HASH_LENGTH + self.value.len()) } } impl Decode for KV { #[inline] fn decode(input: R) -> Result { let mut kv = KV { key: Vec::with_capacity(0), value: Vec::with_capacity(128), hash: NULL_HASH, }; KV::decode_into(&mut kv, input)?; Ok(kv) } #[inline] fn decode_into(&mut self, mut input: R) -> Result<()> { self.key.clear(); input.read_exact(&mut self.hash[..])?; self.value.clear(); input.read_to_end(self.value.as_mut())?; Ok(()) } } #[cfg(test)] mod test { use super::*; #[test] fn new_kv() -> std::result::Result<(), TryFromIntError> { let kv = KV::new(vec![1, 2, 3], vec![4, 5, 6])?; assert_eq!(kv.key(), &[1, 2, 3]); assert_eq!(kv.value(), &[4, 5, 6]); assert_ne!(kv.hash(), &super::super::hash::NULL_HASH); Ok(()) } #[test] fn with_value() -> std::result::Result<(), TryFromIntError> { let kv = KV::new(vec![1, 2, 3], vec![4, 5, 6])?.with_value(vec![7, 8, 9])?; assert_eq!(kv.key(), &[1, 2, 3]); assert_eq!(kv.value(), &[7, 8, 9]); assert_ne!(kv.hash(), &super::super::hash::NULL_HASH); Ok(()) } } ================================================ FILE: src/tree/link.rs ================================================ use std::cmp::max; use std::io::{Read, Write}; use ed::{Decode, Encode, Result, Terminated}; use super::hash::Hash; use super::Tree; // TODO: optimize memory footprint /// Represents a reference to a child tree node. Links may or may not contain /// the child's `Tree` instance (storing its key if not). #[derive(Debug, Clone, PartialEq, Eq)] pub enum Link { /// Represents a child tree node which has been pruned from memory, only /// retaining a reference to it (its key). The child node can always be /// fetched from the backing store by this key when necessary. Reference { hash: Hash, child_heights: (u8, u8), key: Vec, }, /// Represents a tree node which has been modified since the `Tree`'s last /// hash computation. The child's hash is not stored since it has not yet /// been recomputed. The child's `Tree` instance is stored in the link. Modified { pending_writes: usize, // TODO: rename to `pending_hashes` child_heights: (u8, u8), tree: Tree, }, // Represents a tree node which has been modified since the `Tree`'s last // commit, but which has an up-to-date hash. The child's `Tree` instance is // stored in the link. Uncommitted { hash: Hash, child_heights: (u8, u8), tree: Tree, }, /// Represents a tree node which has not been modified, has an up-to-date /// hash, and which is being retained in memory. Loaded { hash: Hash, child_heights: (u8, u8), tree: Tree, }, } impl Link { /// Creates a `Link::Modified` from the given `Tree`. #[inline] pub fn from_modified_tree(tree: Tree) -> Self { let pending_writes = 1 + tree.child_pending_writes(true) + tree.child_pending_writes(false); Link::Modified { pending_writes, child_heights: tree.child_heights(), tree, } } /// Creates a `Link::Modified` from the given tree, if any. If `None`, /// returns `None`. pub fn maybe_from_modified_tree(maybe_tree: Option) -> Option { maybe_tree.map(Link::from_modified_tree) } /// Returns `true` if the link is of the `Link::Reference` variant. #[inline] pub fn is_reference(&self) -> bool { matches!(self, Link::Reference { .. }) } /// Returns `true` if the link is of the `Link::Modified` variant. #[inline] pub fn is_modified(&self) -> bool { matches!(self, Link::Modified { .. }) } /// Returns `true` if the link is of the `Link::Uncommitted` variant. #[inline] pub fn is_uncommitted(&self) -> bool { matches!(self, Link::Uncommitted { .. }) } /// Returns `true` if the link is of the `Link::Loaded` variant. #[inline] pub fn is_stored(&self) -> bool { matches!(self, Link::Loaded { .. }) } /// Returns the key of the tree referenced by this link, as a slice. #[inline] pub fn key(&self) -> &[u8] { match self { Link::Reference { key, .. } => key.as_slice(), Link::Modified { tree, .. } => tree.key(), Link::Uncommitted { tree, .. } => tree.key(), Link::Loaded { tree, .. } => tree.key(), } } /// Returns the `Tree` instance of the tree referenced by the link. If the /// link is of variant `Link::Reference`, the returned value will be `None`. #[inline] pub fn tree(&self) -> Option<&Tree> { match self { // TODO: panic for Reference, don't return Option? Link::Reference { .. } => None, Link::Modified { tree, .. } => Some(tree), Link::Uncommitted { tree, .. } => Some(tree), Link::Loaded { tree, .. } => Some(tree), } } /// Returns the hash of the tree referenced by the link. Panics if link is /// of variant `Link::Modified` since we have not yet recomputed the tree's /// hash. #[inline] pub fn hash(&self) -> &Hash { match self { Link::Modified { .. } => panic!("Cannot get hash from modified link"), Link::Reference { hash, .. } => hash, Link::Uncommitted { hash, .. } => hash, Link::Loaded { hash, .. } => hash, } } /// Returns the height of the children of the tree referenced by the link, /// if any (note: not the height of the referenced tree itself). Return /// value is `(left_child_height, right_child_height)`. #[inline] pub fn height(&self) -> u8 { let (left_height, right_height) = match self { Link::Reference { child_heights, .. } => *child_heights, Link::Modified { child_heights, .. } => *child_heights, Link::Uncommitted { child_heights, .. } => *child_heights, Link::Loaded { child_heights, .. } => *child_heights, }; 1 + max(left_height, right_height) } /// Returns the balance factor of the tree referenced by the link. #[inline] pub fn balance_factor(&self) -> i8 { let (left_height, right_height) = match self { Link::Reference { child_heights, .. } => *child_heights, Link::Modified { child_heights, .. } => *child_heights, Link::Uncommitted { child_heights, .. } => *child_heights, Link::Loaded { child_heights, .. } => *child_heights, }; right_height as i8 - left_height as i8 } /// Consumes the link and converts to variant `Link::Reference`. Panics if /// the link is of variant `Link::Modified` or `Link::Uncommitted`. #[inline] pub fn into_reference(self) -> Self { match self { Link::Reference { .. } => self, Link::Modified { .. } => panic!("Cannot prune Modified tree"), Link::Uncommitted { .. } => panic!("Cannot prune Uncommitted tree"), Link::Loaded { hash, child_heights, tree, } => Link::Reference { hash, child_heights, key: tree.take_key(), }, } } #[inline] #[cfg(feature = "full")] pub(crate) fn child_heights_mut(&mut self) -> &mut (u8, u8) { match self { Link::Reference { ref mut child_heights, .. } => child_heights, Link::Modified { ref mut child_heights, .. } => child_heights, Link::Uncommitted { ref mut child_heights, .. } => child_heights, Link::Loaded { ref mut child_heights, .. } => child_heights, } } } impl Encode for Link { #[inline] fn encode_into(&self, out: &mut W) -> Result<()> { let (hash, key, (left_height, right_height)) = match self { Link::Reference { hash, key, child_heights, } => (hash, key.as_slice(), child_heights), Link::Loaded { hash, tree, child_heights, } => (hash, tree.key(), child_heights), Link::Uncommitted { hash, tree, child_heights, } => (hash, tree.key(), child_heights), Link::Modified { .. } => panic!("No encoding for Link::Modified"), }; debug_assert!( self.key().len() < 65536, "Key length must be less than 65536" ); out.write_all(&(key.len() as u16).to_be_bytes())?; out.write_all(key)?; out.write_all(hash)?; out.write_all(&[*left_height, *right_height])?; Ok(()) } #[inline] fn encoding_length(&self) -> Result { debug_assert!( self.key().len() < 65536, "Key length must be less than 65536" ); Ok(match self { Link::Reference { key, .. } => 1 + key.len() + 32 + 2, Link::Modified { .. } => panic!("No encoding for Link::Modified"), Link::Uncommitted { tree, .. } => 1 + tree.key().len() + 32 + 2, Link::Loaded { tree, .. } => 1 + tree.key().len() + 32 + 2, }) } } impl Link { #[inline] fn default_reference() -> Self { Link::Reference { key: Vec::with_capacity(64), hash: Default::default(), child_heights: (0, 0), } } pub(crate) fn decode_v0(mut input: R) -> Result { let length = read_u8(&mut input)? as usize; let mut key = vec![0; length]; input.read_exact(&mut key)?; let mut hash = [0; 32]; input.read_exact(&mut hash)?; let left_height = read_u8(&mut input)?; let right_height = read_u8(input)?; Ok(Link::Reference { key, hash, child_heights: (left_height, right_height), }) } } impl Decode for Link { #[inline] fn decode(input: R) -> Result { let mut link = Link::default_reference(); Link::decode_into(&mut link, input)?; Ok(link) } #[inline] fn decode_into(&mut self, mut input: R) -> Result<()> { if !self.is_reference() { // don't create new struct if self is already Link::Reference, // so we can re-use the key vec *self = Link::default_reference(); } if let Link::Reference { ref mut key, ref mut hash, ref mut child_heights, } = self { let length = read_u16(&mut input)? as usize; key.resize(length, 0); input.read_exact(key.as_mut())?; input.read_exact(&mut hash[..])?; child_heights.0 = read_u8(&mut input)?; child_heights.1 = read_u8(&mut input)?; } else { unreachable!() } Ok(()) } } impl Terminated for Link {} #[inline] fn read_u16(mut input: R) -> Result { let mut length = [0, 0]; input.read_exact(length.as_mut())?; Ok(u16::from_be_bytes(length)) } #[inline] fn read_u8(mut input: R) -> Result { let mut length = [0]; input.read_exact(length.as_mut())?; Ok(length[0]) } #[cfg(test)] mod test { use super::super::hash::NULL_HASH; use super::super::Tree; use super::*; #[test] fn from_modified_tree() -> std::result::Result<(), &'static str> { let tree = Tree::new(vec![0], vec![1]).map_err(|_| "tree construction failed")?; let link = Link::from_modified_tree(tree); assert!(link.is_modified()); assert_eq!(link.height(), 1); assert_eq!(link.tree().expect("expected tree").key(), &[0]); if let Link::Modified { pending_writes, .. } = link { assert_eq!(pending_writes, 1); Ok(()) } else { Err("Expected Link::Modified") } } #[test] fn maybe_from_modified_tree() -> std::result::Result<(), crate::error::Error> { let link = Link::maybe_from_modified_tree(None); assert!(link.is_none()); let tree = Tree::new(vec![0], vec![1])?; let link = Link::maybe_from_modified_tree(Some(tree)); assert!(link.expect("expected link").is_modified()); Ok(()) } #[test] fn types() -> std::result::Result<(), crate::error::Error> { let hash = NULL_HASH; let child_heights = (0, 0); let pending_writes = 1; let key = vec![0]; let tree = || Tree::new(vec![0], vec![1]); let reference = Link::Reference { hash, child_heights, key, }; let modified = Link::Modified { pending_writes, child_heights, tree: tree()?, }; let uncommitted = Link::Uncommitted { hash, child_heights, tree: tree()?, }; let loaded = Link::Loaded { hash, child_heights, tree: tree()?, }; assert!(reference.is_reference()); assert!(!reference.is_modified()); assert!(!reference.is_uncommitted()); assert!(!reference.is_stored()); assert!(reference.tree().is_none()); assert_eq!(reference.hash(), &[0; 32]); assert_eq!(reference.height(), 1); assert!(reference.into_reference().is_reference()); assert!(!modified.is_reference()); assert!(modified.is_modified()); assert!(!modified.is_uncommitted()); assert!(!modified.is_stored()); assert!(modified.tree().is_some()); assert_eq!(modified.height(), 1); assert!(!uncommitted.is_reference()); assert!(!uncommitted.is_modified()); assert!(uncommitted.is_uncommitted()); assert!(!uncommitted.is_stored()); assert!(uncommitted.tree().is_some()); assert_eq!(uncommitted.hash(), &[0; 32]); assert_eq!(uncommitted.height(), 1); assert!(!loaded.is_reference()); assert!(!loaded.is_modified()); assert!(!loaded.is_uncommitted()); assert!(loaded.is_stored()); assert!(loaded.tree().is_some()); assert_eq!(loaded.hash(), &[0; 32]); assert_eq!(loaded.height(), 1); assert!(loaded.into_reference().is_reference()); Ok(()) } #[test] #[should_panic(expected = "Cannot get hash from modified link")] fn modified_hash() { Tree::new(vec![0], vec![1]) .map(|tree| Link::Modified { pending_writes: 1, child_heights: (1, 1), tree, }) .map(|link| link.hash().to_vec()) .map(|_| ()) .unwrap_or_default() } #[test] #[should_panic] fn modified_into_reference() { Link::Modified { pending_writes: 1, child_heights: (1, 1), tree: Tree::new(vec![0], vec![1]).expect("tree construction failed"), } .into_reference(); } #[test] #[should_panic] fn uncommitted_into_reference() { Link::Uncommitted { hash: [1; 32], child_heights: (1, 1), tree: Tree::new(vec![0], vec![1]).expect("tree construction failed"), } .into_reference(); } #[test] fn encode_link() { let link = Link::Reference { key: vec![1, 2, 3], child_heights: (123, 124), hash: [55; 32], }; assert_eq!(link.encoding_length().unwrap(), 38); let mut bytes = vec![]; link.encode_into(&mut bytes).unwrap(); assert_eq!( bytes, vec![ 0, 3, 1, 2, 3, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 123, 124 ] ); } #[test] fn encode_link_long_key_valid() { let link = Link::Reference { key: vec![123; 60_000], child_heights: (123, 124), hash: [55; 32], }; let mut bytes = vec![]; link.encode_into(&mut bytes).unwrap(); let decoded = Link::decode(&bytes[..]).unwrap(); assert_eq!(decoded, link); } #[test] #[should_panic = "Key length must be less than 65536"] fn encode_link_long_key_invalid() { let link = Link::Reference { key: vec![123; 70_000], child_heights: (123, 124), hash: [55; 32], }; let mut bytes = vec![]; link.encode_into(&mut bytes).unwrap(); } } ================================================ FILE: src/tree/mod.rs ================================================ mod commit; #[cfg(feature = "full")] mod debug; mod encoding; mod fuzz_tests; mod hash; mod iter; mod kv; mod link; mod ops; mod walk; use std::cmp::max; use ed::{Decode, Encode}; use super::error::Result; pub use commit::{Commit, NoopCommit}; pub use hash::{kv_hash, node_hash, Hash, Hasher, HASH_LENGTH, NULL_HASH}; use kv::KV; pub use link::Link; pub use ops::{Batch, BatchEntry, Op, PanicSource}; pub use walk::{Fetch, RefWalker, Walker}; // TODO: remove need for `TreeInner`, and just use `Box` receiver for // relevant methods /// The fields of the `Tree` type, stored on the heap. #[derive(Debug, Clone, PartialEq, Eq, Encode, Decode)] pub struct TreeInner { left: Option, right: Option, kv: KV, } /// A binary AVL tree data structure, with Merkle hashes. /// /// Trees' inner fields are stored on the heap so that nodes can recursively /// link to each other, and so we can detach nodes from their parents, then /// reattach without allocating or freeing heap memory. #[derive(Clone, PartialEq, Eq, Encode, Decode)] pub struct Tree { inner: Box, } impl Tree { /// Creates a new `Tree` with the given key and value, and no children. /// /// Hashes the key/value pair and initializes the `kv_hash` field. pub fn new(key: Vec, value: Vec) -> Result { KV::new(key, value).map_err(Into::into).map(|kv| Tree { inner: Box::new(TreeInner { kv, left: None, right: None, }), }) } /// Creates a `Tree` by supplying all the raw struct fields (mainly useful /// for testing). The `kv_hash` and `Link`s are not ensured to be correct. pub fn from_fields( key: Vec, value: Vec, kv_hash: Hash, left: Option, right: Option, ) -> Tree { Tree { inner: Box::new(TreeInner { kv: KV::from_fields(key, value, kv_hash), left, right, }), } } /// Returns the root node's key as a slice. #[inline] pub fn key(&self) -> &[u8] { self.inner.kv.key() } /// Consumes the tree and returns its root node's key, without having to /// clone or allocate. #[inline] pub fn take_key(self) -> Vec { self.inner.kv.take_key() } /// Returns the root node's value as a slice. #[inline] pub fn value(&self) -> &[u8] { self.inner.kv.value() } /// Returns the hash of the root node's key/value pair. #[inline] pub fn kv_hash(&self) -> &Hash { self.inner.kv.hash() } /// Returns a reference to the root node's `Link` on the given side, if any. /// If there is no child, returns `None`. #[inline] pub fn link(&self, left: bool) -> Option<&Link> { if left { self.inner.left.as_ref() } else { self.inner.right.as_ref() } } /// Returns a mutable reference to the root node's `Link` on the given side, /// if any. If there is no child, returns `None`. #[inline] pub fn link_mut(&mut self, left: bool) -> Option<&mut Link> { if left { self.inner.left.as_mut() } else { self.inner.right.as_mut() } } /// Returns a reference to the root node's child on the given side, if any. /// If there is no child, returns `None`. #[inline] pub fn child(&self, left: bool) -> Option<&Self> { match self.link(left) { None => None, Some(link) => link.tree(), } } /// Returns a mutable reference to the root node's child on the given side, /// if any. If there is no child, returns `None`. #[inline] pub fn child_mut(&mut self, left: bool) -> Option<&mut Self> { match self.slot_mut(left).as_mut() { None => None, Some(Link::Reference { .. }) => None, Some(Link::Modified { tree, .. }) => Some(tree), Some(Link::Uncommitted { tree, .. }) => Some(tree), Some(Link::Loaded { tree, .. }) => Some(tree), } } /// Returns the hash of the root node's child on the given side, if any. If /// there is no child, returns the null hash (zero-filled). #[inline] pub fn child_hash(&self, left: bool) -> &Hash { self.link(left).map_or(&NULL_HASH, |link| link.hash()) } /// Computes and returns the hash of the root node. #[inline] pub fn hash(&self) -> Hash { node_hash::( self.inner.kv.hash(), self.child_hash(true), self.child_hash(false), ) } /// Returns the number of pending writes for the child on the given side, if /// any. If there is no child, returns 0. #[inline] pub fn child_pending_writes(&self, left: bool) -> usize { match self.link(left) { Some(Link::Modified { pending_writes, .. }) => *pending_writes, _ => 0, } } /// Returns the height of the child on the given side, if any. If there is /// no child, returns 0. #[inline] pub fn child_height(&self, left: bool) -> u8 { self.link(left).map_or(0, |child| child.height()) } #[inline] pub fn child_heights(&self) -> (u8, u8) { (self.child_height(true), self.child_height(false)) } /// Returns the height of the tree (the number of levels). For example, a /// single node has height 1, a node with a single descendant has height 2, /// etc. #[inline] pub fn height(&self) -> u8 { 1 + max(self.child_height(true), self.child_height(false)) } /// Returns the balance factor of the root node. This is the difference /// between the height of the right child (if any) and the height of the /// left child (if any). For example, a balance factor of 2 means the right /// subtree is 2 levels taller than the left subtree. #[inline] pub fn balance_factor(&self) -> i8 { let left_height = self.child_height(true) as i8; let right_height = self.child_height(false) as i8; right_height - left_height } /// Attaches the child (if any) to the root node on the given side. Creates /// a `Link` of variant `Link::Modified` which contains the child. /// /// Panics if there is already a child on the given side. #[inline] pub fn attach(mut self, left: bool, maybe_child: Option) -> Self { debug_assert_ne!( Some(self.key()), maybe_child.as_ref().map(|c| c.key()), "Tried to attach tree with same key" ); let slot = self.slot_mut(left); assert!( !slot.is_some(), "Tried to attach to {} tree slot, but it is already Some", side_to_str(left) ); *slot = Link::maybe_from_modified_tree(maybe_child); self } /// Detaches the child on the given side (if any) from the root node, and /// returns `(root_node, maybe_child)`. /// /// One will usually want to reattach (see `attach`) a child on the same /// side after applying some operation to the detached child. #[inline] pub fn detach(mut self, left: bool) -> (Self, Option) { let maybe_child = match self.slot_mut(left).take() { None => None, Some(Link::Reference { .. }) => None, Some(Link::Modified { tree, .. }) => Some(tree), Some(Link::Uncommitted { tree, .. }) => Some(tree), Some(Link::Loaded { tree, .. }) => Some(tree), }; (self, maybe_child) } /// Detaches the child on the given side from the root node, and /// returns `(root_node, child)`. /// /// Panics if there is no child on the given side. /// /// One will usually want to reattach (see `attach`) a child on the same /// side after applying some operation to the detached child. #[inline] pub fn detach_expect(self, left: bool) -> (Self, Self) { let (parent, maybe_child) = self.detach(left); if let Some(child) = maybe_child { (parent, child) } else { panic!( "Expected tree to have {} child, but got None", side_to_str(left) ); } } /// Detaches the child on the given side and passes it into `f`, which must /// return a new child (either the same child, a new child to take its /// place, or `None` to explicitly keep the slot empty). /// /// This is the same as `detach`, but with the function interface to enforce /// at compile-time that an explicit final child value is returned. This is /// less error prone that detaching with `detach` and reattaching with /// `attach`. #[inline] pub fn walk(self, left: bool, f: F) -> Self where F: FnOnce(Option) -> Option, { let (tree, maybe_child) = self.detach(left); tree.attach(left, f(maybe_child)) } /// Like `walk`, but panics if there is no child on the given side. #[inline] pub fn walk_expect(self, left: bool, f: F) -> Self where F: FnOnce(Self) -> Option, { let (tree, child) = self.detach_expect(left); tree.attach(left, f(child)) } /// Returns a mutable reference to the child slot for the given side. #[inline] pub(crate) fn slot_mut(&mut self, left: bool) -> &mut Option { if left { &mut self.inner.left } else { &mut self.inner.right } } /// Replaces the root node's value with the given value and returns the /// modified `Tree`. #[inline] pub fn with_value(mut self, value: Vec) -> Result { self.inner.kv = self.inner.kv.with_value(value)?; Ok(self) } // TODO: add compute_hashes method /// Called to finalize modifications to a tree, recompute its hashes, and /// write the updated nodes to a backing store. /// /// Traverses through the tree, computing hashes for all modified links and /// replacing them with `Link::Loaded` variants, writes out all changes to /// the given `Commit` object's `write` method, and calls the its `prune` /// method to test whether or not to keep or prune nodes from memory. #[inline] pub fn commit(&mut self, c: &mut C) -> Result<()> { // TODO: make this method less ugly // TODO: call write in-order for better performance in writing batch to db? if let Some(Link::Modified { .. }) = self.inner.left { if let Some(Link::Modified { mut tree, child_heights, .. }) = self.inner.left.take() { tree.commit(c)?; self.inner.left = Some(Link::Loaded { hash: tree.hash(), tree, child_heights, }); } else { unreachable!() } } if let Some(Link::Modified { .. }) = self.inner.right { if let Some(Link::Modified { mut tree, child_heights, .. }) = self.inner.right.take() { tree.commit(c)?; self.inner.right = Some(Link::Loaded { hash: tree.hash(), tree, child_heights, }); } else { unreachable!() } } c.write(self)?; let (prune_left, prune_right) = c.prune(self); if prune_left { self.inner.left = self.inner.left.take().map(|link| link.into_reference()); } if prune_right { self.inner.right = self.inner.right.take().map(|link| link.into_reference()); } Ok(()) } /// Fetches the child on the given side using the given data source, and /// places it in the child slot (upgrading the link from `Link::Reference` /// to `Link::Loaded`). #[inline] pub fn load(&mut self, left: bool, source: &S) -> Result<()> { // TODO: return Err instead of panic? let link = self.link(left).expect("Expected link"); let (child_heights, hash) = match link { Link::Reference { child_heights, hash, .. } => (child_heights, hash), _ => panic!("Expected Some(Link::Reference)"), }; let tree = source.fetch(link)?; debug_assert_eq!(tree.key(), link.key()); *self.slot_mut(left) = Some(Link::Loaded { tree, hash: *hash, child_heights: *child_heights, }); Ok(()) } pub fn get_value(&self, key: &[u8]) -> Result { let mut cursor = self; loop { if key == cursor.key() { return Ok(GetResult::Found(cursor.value().to_vec())); } let left = key < cursor.key(); let link = match cursor.link(left) { None => return Ok(GetResult::NotFound), // not found Some(link) => link, }; let maybe_child = link.tree(); match maybe_child { None => return Ok(GetResult::Pruned), /* value is pruned, caller will have to */ // fetch from disk Some(child) => cursor = child, // traverse to child } } } } pub enum GetResult { Found(Vec), Pruned, NotFound, } pub fn side_to_str(left: bool) -> &'static str { if left { "left" } else { "right" } } #[cfg(test)] mod test { use super::commit::NoopCommit; use super::hash::NULL_HASH; use super::Tree; use crate::error::Result; #[test] fn build_tree() -> Result<()> { let tree = Tree::new(vec![1], vec![101])?; assert_eq!(tree.key(), &[1]); assert_eq!(tree.value(), &[101]); assert!(tree.child(true).is_none()); assert!(tree.child(false).is_none()); let tree = tree.attach(true, None); assert!(tree.child(true).is_none()); assert!(tree.child(false).is_none()); let tree = tree.attach(true, Some(Tree::new(vec![2], vec![102])?)); assert_eq!(tree.key(), &[1]); assert_eq!(tree.child(true).unwrap().key(), &[2]); assert!(tree.child(false).is_none()); let tree = Tree::new(vec![3], vec![103])?.attach(false, Some(tree)); assert_eq!(tree.key(), &[3]); assert_eq!(tree.child(false).unwrap().key(), &[1]); assert!(tree.child(true).is_none()); Ok(()) } #[should_panic] #[test] fn attach_existing() { Tree::new(vec![0], vec![1]) .expect("tree construction failed") .attach( true, Some(Tree::new(vec![2], vec![3]).expect("tree construction failed")), ) .attach( true, Some(Tree::new(vec![4], vec![5]).expect("tree construction failed")), ); } #[test] fn modify() -> Result<()> { let tree = Tree::new(vec![0], vec![1])? .attach(true, Some(Tree::new(vec![2], vec![3])?)) .attach(false, Some(Tree::new(vec![4], vec![5])?)); let tree = tree.walk(true, |left_opt| { assert_eq!(left_opt.as_ref().unwrap().key(), &[2]); None }); assert!(tree.child(true).is_none()); assert!(tree.child(false).is_some()); let fixed_tree = Some(Tree::new(vec![2], vec![3])?); let tree = tree.walk(true, |left_opt| { assert!(left_opt.is_none()); fixed_tree }); assert_eq!(tree.link(true).unwrap().key(), &[2]); let tree = tree.walk_expect(false, |right| { assert_eq!(right.key(), &[4]); None }); assert!(tree.child(true).is_some()); assert!(tree.child(false).is_none()); Ok(()) } #[test] fn child_and_link() -> Result<()> { let mut tree = Tree::new(vec![0], vec![1])?.attach(true, Some(Tree::new(vec![2], vec![3])?)); assert!(tree.link(true).expect("expected link").is_modified()); assert!(tree.child(true).is_some()); assert!(tree.link(false).is_none()); assert!(tree.child(false).is_none()); tree.commit(&mut NoopCommit {}).expect("commit failed"); assert!(tree.link(true).expect("expected link").is_stored()); assert!(tree.child(true).is_some()); let tree = tree.walk(true, |_| None); assert!(tree.link(true).is_none()); assert!(tree.child(true).is_none()); Ok(()) } #[test] fn child_hash() -> Result<()> { let mut tree = Tree::new(vec![0], vec![1])?.attach(true, Some(Tree::new(vec![2], vec![3])?)); tree.commit(&mut NoopCommit {}).expect("commit failed"); assert_eq!( tree.child_hash(true), &[ 130, 215, 14, 92, 6, 226, 203, 67, 180, 206, 170, 81, 142, 77, 62, 33, 146, 78, 29, 252, 100, 149, 207, 172, 89, 254, 96, 166, 159, 49, 169, 106 ] ); assert_eq!(tree.child_hash(false), &NULL_HASH); Ok(()) } #[test] fn hash() -> Result<()> { let tree = Tree::new(vec![0], vec![1])?; assert_eq!( tree.hash(), [ 115, 223, 9, 212, 135, 4, 223, 163, 244, 126, 25, 190, 255, 217, 132, 76, 219, 149, 151, 237, 164, 103, 67, 44, 196, 177, 227, 195, 217, 146, 156, 86 ] ); Ok(()) } #[test] fn child_pending_writes() -> Result<()> { let tree = Tree::new(vec![0], vec![1])?; assert_eq!(tree.child_pending_writes(true), 0); assert_eq!(tree.child_pending_writes(false), 0); let tree = tree.attach(true, Some(Tree::new(vec![2], vec![3])?)); assert_eq!(tree.child_pending_writes(true), 1); assert_eq!(tree.child_pending_writes(false), 0); Ok(()) } #[test] fn height_and_balance() -> Result<()> { let tree = Tree::new(vec![0], vec![1])?; assert_eq!(tree.height(), 1); assert_eq!(tree.child_height(true), 0); assert_eq!(tree.child_height(false), 0); assert_eq!(tree.balance_factor(), 0); let tree = tree.attach(true, Some(Tree::new(vec![2], vec![3])?)); assert_eq!(tree.height(), 2); assert_eq!(tree.child_height(true), 1); assert_eq!(tree.child_height(false), 0); assert_eq!(tree.balance_factor(), -1); let (tree, maybe_child) = tree.detach(true); let tree = tree.attach(false, maybe_child); assert_eq!(tree.height(), 2); assert_eq!(tree.child_height(true), 0); assert_eq!(tree.child_height(false), 1); assert_eq!(tree.balance_factor(), 1); Ok(()) } #[test] fn commit() -> Result<()> { let mut tree = Tree::new(vec![0], vec![1])?.attach(false, Some(Tree::new(vec![2], vec![3])?)); tree.commit(&mut NoopCommit {}).expect("commit failed"); assert!(tree.link(false).expect("expected link").is_stored()); Ok(()) } } ================================================ FILE: src/tree/ops.rs ================================================ use super::{Fetch, Tree, Walker}; use crate::error::Result; use std::collections::LinkedList; use std::fmt; use Op::*; /// An operation to be applied to a key in the store. pub enum Op { /// Inserts or updates the key/value entry to the given value. Put(Vec), /// Deletes the key/value entry. Delete, } impl fmt::Debug for Op { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { writeln!( f, "{}", match self { Put(value) => format!("Put({value:?})"), Delete => "Delete".to_string(), } ) } } /// A single `(key, operation)` pair. pub type BatchEntry = (Vec, Op); /// A mapping of keys and operations. Keys should be sorted and unique. pub type Batch = [BatchEntry]; /// A source of data which panics when called. Useful when creating a store /// which always keeps the state in memory. #[derive(Clone)] pub struct PanicSource {} impl Fetch for PanicSource { fn fetch_by_key(&self, _: &[u8]) -> Result> { unreachable!() } } impl Walker where S: Fetch + Sized + Send + Clone, { /// Applies a batch of operations, possibly creating a new tree if /// `maybe_tree` is `None`. This is similar to `Walker::apply`, but does /// not require a non-empty tree. /// /// Keys in batch must be sorted and unique. pub fn apply_to( maybe_tree: Option, batch: &Batch, source: S, ) -> Result<(Option, LinkedList>)> { let (maybe_walker, deleted_keys) = if batch.is_empty() { (maybe_tree, LinkedList::default()) } else { match maybe_tree { None => return Ok((Self::build(batch, source)?, LinkedList::default())), Some(tree) => tree.apply(batch)?, } }; let maybe_tree = maybe_walker.map(|walker| walker.into_inner()); Ok((maybe_tree, deleted_keys)) } /// Builds a `Tree` from a batch of operations. /// /// Keys in batch must be sorted and unique. fn build(batch: &Batch, source: S) -> Result> { if batch.is_empty() { return Ok(None); } let mid_index = batch.len() / 2; let (mid_key, mid_op) = &batch[mid_index]; let mid_value = match mid_op { Delete => { let left_batch = &batch[..mid_index]; let right_batch = &batch[mid_index + 1..]; let maybe_tree = Self::build(left_batch, source.clone())? .map(|tree| Self::new(tree, source.clone())); let maybe_tree = match maybe_tree { Some(tree) => tree.apply(right_batch)?.0, None => Self::build(right_batch, source.clone())? .map(|tree| Self::new(tree, source.clone())), }; return Ok(maybe_tree.map(|tree| tree.into())); } Put(value) => value, }; // TODO: take from batch so we don't have to clone let mid_tree = Tree::new(mid_key.to_vec(), mid_value.to_vec())?; let mid_walker = Walker::new(mid_tree, PanicSource {}); Ok(mid_walker .recurse(batch, mid_index, true)? .0 // use walker, ignore deleted_keys since it should be empty .map(|w| w.into_inner())) } /// Applies a batch of operations to an existing tree. This is similar to /// `Walker::apply`_to, but requires a populated tree. /// /// Keys in batch must be sorted and unique. fn apply(self, batch: &Batch) -> Result<(Option, LinkedList>)> { // binary search to see if this node's key is in the batch, and to split // into left and right batches let search = batch.binary_search_by(|(key, _op)| key.as_slice().cmp(self.tree().key())); let tree = if let Ok(index) = search { // a key matches this node's key, apply op to this node match &batch[index].1 { // TODO: take vec from batch so we don't need to clone Put(value) => self.with_value(value.to_vec()), Delete => { let source = self.clone_source(); let key = self.tree().key().to_vec(); let (walker, maybe_left) = self.detach(true)?; let (walker, maybe_right) = walker.detach(false)?; let (maybe_left, mut deleted_keys) = Self::apply_to(maybe_left, &batch[..index], source.clone())?; deleted_keys.push_back(key); let (maybe_right, mut deleted_keys_right) = Self::apply_to(maybe_right, &batch[index + 1..], source)?; deleted_keys.append(&mut deleted_keys_right); let maybe_walker = walker .attach(true, maybe_left) .attach(false, maybe_right) .remove()? .map(|w| w.maybe_balance()) .transpose()?; return Ok((maybe_walker, deleted_keys)); } } } else { Ok(self) }; let (mid, exclusive) = match search { Ok(index) => (index, true), Err(index) => (index, false), }; tree?.recurse(batch, mid, exclusive) } /// Recursively applies operations to the tree's children (if there are any /// operations for them). /// /// This recursion executes serially in the same thread, but in the future /// will be dispatched to workers in other threads. fn recurse( self, batch: &Batch, mid: usize, exclusive: bool, ) -> Result<(Option, LinkedList>)> { let left_batch = &batch[..mid]; let right_batch = if exclusive { &batch[mid + 1..] } else { &batch[mid..] }; let mut deleted_keys = LinkedList::default(); let tree = if !left_batch.is_empty() { let source = self.clone_source(); self.walk(true, |maybe_left| { let (maybe_left, mut deleted_keys_left) = Self::apply_to(maybe_left, left_batch, source)?; deleted_keys.append(&mut deleted_keys_left); Ok(maybe_left) })? } else { self }; let tree = if !right_batch.is_empty() { let source = tree.clone_source(); tree.walk(false, |maybe_right| { let (maybe_right, mut deleted_keys_right) = Self::apply_to(maybe_right, right_batch, source)?; deleted_keys.append(&mut deleted_keys_right); Ok(maybe_right) })? } else { tree }; let tree = tree.maybe_balance()?; Ok((Some(tree), deleted_keys)) } /// Gets the wrapped tree's balance factor. #[inline] fn balance_factor(&self) -> i8 { self.tree().balance_factor() } /// Checks if the tree is unbalanced and if so, applies AVL tree rotation(s) /// to rebalance the tree and its subtrees. Returns the root node of the /// balanced tree after applying the rotations. fn maybe_balance(self) -> Result { let balance_factor = self.balance_factor(); if balance_factor.abs() <= 1 { return Ok(self); } let left = balance_factor < 0; // maybe do a double rotation let tree = if left == (self.tree().link(left).unwrap().balance_factor() > 0) { self.walk_expect(left, |child| Ok(Some(child.rotate(!left)?)))? } else { self }; tree.rotate(left) } /// Applies an AVL tree rotation, a constant-time operation which only needs /// to swap pointers in order to rebalance a tree. fn rotate(self, left: bool) -> Result { let (tree, child) = self.detach_expect(left)?; let (child, maybe_grandchild) = child.detach(!left)?; // attach grandchild to self let tree = tree.attach(left, maybe_grandchild).maybe_balance()?; // attach self to child, return child child.attach(!left, Some(tree)).maybe_balance() } /// Removes the root node from the tree. Rearranges and rebalances /// descendants (if any) in order to maintain a valid tree. pub fn remove(self) -> Result> { let tree = self.tree(); let has_left = tree.link(true).is_some(); let has_right = tree.link(false).is_some(); let left = tree.child_height(true) > tree.child_height(false); let maybe_tree = if has_left && has_right { // two children, promote edge of taller child let (tree, tall_child) = self.detach_expect(left)?; let (_, short_child) = tree.detach_expect(!left)?; Some(tall_child.promote_edge(!left, short_child)?) } else if has_left || has_right { // single child, promote it Some(self.detach_expect(left)?.1) } else { // no child None }; Ok(maybe_tree) } /// Traverses to find the tree's edge on the given side, removes it, and /// reattaches it at the top in order to fill in a gap when removing a root /// node from a tree with both left and right children. Attaches `attach` on /// the opposite side. Returns the promoted node. fn promote_edge(self, left: bool, attach: Self) -> Result { let (edge, maybe_child) = self.remove_edge(left)?; edge.attach(!left, maybe_child) .attach(left, Some(attach)) .maybe_balance() } /// Traverses to the tree's edge on the given side and detaches it /// (reattaching its child, if any, to its former parent). Return value is /// `(edge, maybe_updated_tree)`. fn remove_edge(self, left: bool) -> Result<(Self, Option)> { if self.tree().link(left).is_some() { // this node is not the edge, recurse let (tree, child) = self.detach_expect(left)?; let (edge, maybe_child) = child.remove_edge(left)?; let tree = tree.attach(left, maybe_child).maybe_balance()?; Ok((edge, Some(tree))) } else { // this node is the edge, detach its child if present self.detach(!left) } } } #[cfg(test)] mod test { use super::*; use crate::test_utils::{ apply_memonly, assert_tree_invariants, del_entry, make_tree_seq, put_entry, seq_key, }; use crate::tree::*; #[test] fn simple_insert() -> Result<()> { let batch = [(b"foo2".to_vec(), Op::Put(b"bar2".to_vec()))]; let tree = Tree::new(b"foo".to_vec(), b"bar".to_vec())?; let (maybe_walker, deleted_keys) = Walker::new(tree, PanicSource {}) .apply(&batch) .expect("apply errored"); let walker = maybe_walker.expect("should be Some"); assert_eq!(walker.tree().key(), b"foo"); assert_eq!(walker.into_inner().child(false).unwrap().key(), b"foo2"); assert!(deleted_keys.is_empty()); Ok(()) } #[test] fn simple_update() -> Result<()> { let batch = [(b"foo".to_vec(), Op::Put(b"bar2".to_vec()))]; let tree = Tree::new(b"foo".to_vec(), b"bar".to_vec())?; let (maybe_walker, deleted_keys) = Walker::new(tree, PanicSource {}) .apply(&batch) .expect("apply errored"); let walker = maybe_walker.expect("should be Some"); assert_eq!(walker.tree().key(), b"foo"); assert_eq!(walker.tree().value(), b"bar2"); assert!(walker.tree().link(true).is_none()); assert!(walker.tree().link(false).is_none()); assert!(deleted_keys.is_empty()); Ok(()) } #[test] fn simple_delete() -> Result<()> { let batch = [(b"foo2".to_vec(), Op::Delete)]; let tree = Tree::from_fields( b"foo".to_vec(), b"bar".to_vec(), [123; 32], None, Some(Link::Loaded { hash: [123; 32], child_heights: (0, 0), tree: Tree::new(b"foo2".to_vec(), b"bar2".to_vec())?, }), ); let (maybe_walker, deleted_keys) = Walker::new(tree, PanicSource {}) .apply(&batch) .expect("apply errored"); let walker = maybe_walker.expect("should be Some"); assert_eq!(walker.tree().key(), b"foo"); assert_eq!(walker.tree().value(), b"bar"); assert!(walker.tree().link(true).is_none()); assert!(walker.tree().link(false).is_none()); assert_eq!(deleted_keys.len(), 1); assert_eq!(*deleted_keys.front().unwrap(), b"foo2"); Ok(()) } #[test] fn delete_non_existent() -> Result<()> { let batch = [(b"foo2".to_vec(), Op::Delete)]; let tree = Tree::new(b"foo".to_vec(), b"bar".to_vec())?; Walker::new(tree, PanicSource {}).apply(&batch).unwrap(); Ok(()) } #[test] fn delete_only_node() -> Result<()> { let batch = [(b"foo".to_vec(), Op::Delete)]; let tree = Tree::new(b"foo".to_vec(), b"bar".to_vec())?; let (maybe_walker, deleted_keys) = Walker::new(tree, PanicSource {}) .apply(&batch) .expect("apply errored"); assert!(maybe_walker.is_none()); assert_eq!(deleted_keys.len(), 1); assert_eq!(deleted_keys.front().unwrap(), b"foo"); Ok(()) } #[test] fn delete_deep() { let tree = make_tree_seq(50); let batch = [del_entry(5)]; let (maybe_walker, deleted_keys) = Walker::new(tree, PanicSource {}) .apply(&batch) .expect("apply errored"); maybe_walker.expect("should be Some"); assert_eq!(deleted_keys.len(), 1); assert_eq!(*deleted_keys.front().unwrap(), seq_key(5)); } #[test] fn delete_recursive() { let tree = make_tree_seq(50); let batch = [del_entry(29), del_entry(34)]; let (maybe_walker, mut deleted_keys) = Walker::new(tree, PanicSource {}) .apply(&batch) .expect("apply errored"); maybe_walker.expect("should be Some"); assert_eq!(deleted_keys.len(), 2); assert_eq!(deleted_keys.pop_front().unwrap(), seq_key(29)); assert_eq!(deleted_keys.pop_front().unwrap(), seq_key(34)); } #[test] fn delete_recursive_2() { let tree = make_tree_seq(10); let batch = [del_entry(7), del_entry(9)]; let (maybe_walker, deleted_keys) = Walker::new(tree, PanicSource {}) .apply(&batch) .expect("apply errored"); maybe_walker.expect("should be Some"); let mut deleted_keys: Vec<&Vec> = deleted_keys.iter().collect(); deleted_keys.sort(); assert_eq!(deleted_keys, vec![&seq_key(7), &seq_key(9)]); } #[test] fn rebalanced_delete() { let tree = make_tree_seq(7); let walker = Walker::new(tree, PanicSource {}) .apply(&[(vec![0; 20], Delete)]) .expect("apply errored") .0 .unwrap(); let batch = [ put_entry(0), put_entry(1), put_entry(2), put_entry(3), del_entry(4), del_entry(5), del_entry(6), ]; let (maybe_walker, deleted_keys) = walker.apply(&batch).expect("apply errored"); let walker = maybe_walker.expect("should be Some"); let mut deleted_keys: Vec<&Vec> = deleted_keys.iter().collect(); deleted_keys.sort(); assert_eq!(deleted_keys, vec![&seq_key(4), &seq_key(5), &seq_key(6)]); let mut iter = walker.tree().iter(); assert_eq!(iter.next().unwrap().0, seq_key(0)); assert_eq!(iter.next().unwrap().0, seq_key(1)); assert_eq!(iter.next().unwrap().0, seq_key(2)); assert_eq!(iter.next().unwrap().0, seq_key(3)); assert!(iter.next().is_none()); } #[test] fn apply_empty_none() { let (maybe_tree, deleted_keys) = Walker::::apply_to(None, &[], PanicSource {}).expect("apply_to failed"); assert!(maybe_tree.is_none()); assert!(deleted_keys.is_empty()); } #[test] fn insert_empty_single() { let batch = vec![(vec![0], Op::Put(vec![1]))]; let (maybe_tree, deleted_keys) = Walker::::apply_to(None, &batch, PanicSource {}).expect("apply_to failed"); let tree = maybe_tree.expect("expected tree"); assert_eq!(tree.key(), &[0]); assert_eq!(tree.value(), &[1]); assert_tree_invariants(&tree); assert!(deleted_keys.is_empty()); } #[test] fn insert_root_single() -> Result<()> { let tree = Tree::new(vec![5], vec![123])?; let batch = vec![(vec![6], Op::Put(vec![123]))]; let tree = apply_memonly(tree, &batch); assert_eq!(tree.key(), &[5]); assert!(tree.child(true).is_none()); assert_eq!(tree.child(false).expect("expected child").key(), &[6]); Ok(()) } #[test] fn insert_root_double() -> Result<()> { let tree = Tree::new(vec![5], vec![123])?; let batch = vec![(vec![4], Op::Put(vec![123])), (vec![6], Op::Put(vec![123]))]; let tree = apply_memonly(tree, &batch); assert_eq!(tree.key(), &[5]); assert_eq!(tree.child(true).expect("expected child").key(), &[4]); assert_eq!(tree.child(false).expect("expected child").key(), &[6]); Ok(()) } #[test] fn insert_rebalance() -> Result<()> { let tree = Tree::new(vec![5], vec![123])?; let batch = vec![(vec![6], Op::Put(vec![123]))]; let tree = apply_memonly(tree, &batch); let batch = vec![(vec![7], Op::Put(vec![123]))]; let tree = apply_memonly(tree, &batch); assert_eq!(tree.key(), &[6]); assert_eq!(tree.child(true).expect("expected child").key(), &[5]); assert_eq!(tree.child(false).expect("expected child").key(), &[7]); Ok(()) } #[test] fn insert_100_sequential() -> Result<()> { let mut tree = Tree::new(vec![0], vec![123])?; for i in 0..100 { let batch = vec![(vec![i + 1], Op::Put(vec![123]))]; tree = apply_memonly(tree, &batch); } assert_eq!(tree.key(), &[63]); assert_eq!(tree.child(true).expect("expected child").key(), &[31]); assert_eq!(tree.child(false).expect("expected child").key(), &[79]); Ok(()) } #[test] fn delete_recursive_large() { let tree = make_tree_seq(2_500); let mut batch = vec![]; for i in 500..2_000 { batch.push(del_entry(i)); } let (maybe_walker, deleted_keys) = Walker::new(tree, PanicSource {}) .apply(&batch) .expect("apply errored"); maybe_walker.expect("should be Some"); assert_eq!(deleted_keys.len(), 1_500); } } ================================================ FILE: src/tree/walk/fetch.rs ================================================ use super::super::{Link, Tree}; use crate::error::{Error, Result}; /// A source of data to be used by the tree when encountering a pruned node. /// /// This typcially means fetching the tree node from a backing store by its key, /// but could also implement an in-memory cache for example. pub trait Fetch { fn fetch_by_key(&self, key: &[u8]) -> Result>; /// Called when the tree needs to fetch a node with the given `Link`. The /// `link` value will always be a `Link::Reference` variant. fn fetch(&self, link: &Link) -> Result { self.fetch_by_key_expect(link.key()) } fn fetch_by_key_expect(&self, key: &[u8]) -> Result { self.fetch_by_key(key)? .ok_or_else(|| Error::Key(format!("Key does not exist: {key:?}"))) } } ================================================ FILE: src/tree/walk/mod.rs ================================================ mod fetch; mod ref_walker; use super::{Link, Tree}; use crate::error::Result; use crate::owner::Owner; pub use fetch::Fetch; pub use ref_walker::RefWalker; /// Allows traversal of a `Tree`, fetching from the given source when traversing /// to a pruned node, detaching children as they are traversed. pub struct Walker where S: Fetch + Sized + Clone + Send, { tree: Owner, source: S, } impl Walker where S: Fetch + Sized + Clone + Send, { /// Creates a `Walker` with the given tree and source. pub fn new(tree: Tree, source: S) -> Self { Walker { tree: Owner::new(tree), source, } } /// Similar to `Tree#detach`, but yields a `Walker` which fetches from the /// same source as `self`. Returned tuple is `(updated_self, /// maybe_child_walker)`. pub fn detach(mut self, left: bool) -> Result<(Self, Option)> { let link = match self.tree.link(left) { None => return Ok((self, None)), Some(link) => link, }; let child = if link.tree().is_some() { match self.tree.own_return(|t| t.detach(left)) { Some(child) => child, _ => unreachable!("Expected Some"), } } else { let link = self.tree.slot_mut(left).take(); match link { Some(Link::Reference { .. }) => (), _ => unreachable!("Expected Some(Link::Reference)"), } self.source.fetch(&link.unwrap())? }; let child = self.wrap(child); Ok((self, Some(child))) } /// Similar to `Tree#detach_expect`, but yields a `Walker` which fetches /// from the same source as `self`. Returned tuple is `(updated_self, /// child_walker)`. pub fn detach_expect(self, left: bool) -> Result<(Self, Self)> { let (walker, maybe_child) = self.detach(left)?; if let Some(child) = maybe_child { Ok((walker, child)) } else { panic!( "Expected {} child, got None", if left { "left" } else { "right" } ); } } /// Similar to `Tree#walk`, but yields a `Walker` which fetches from the /// same source as `self`. pub fn walk(self, left: bool, f: F) -> Result where F: FnOnce(Option) -> Result>, T: Into, { let (mut walker, maybe_child) = self.detach(left)?; let new_child = f(maybe_child)?.map(|t| t.into()); walker.tree.own(|t| t.attach(left, new_child)); Ok(walker) } /// Similar to `Tree#walk_expect` but yields a `Walker` which fetches from /// the same source as `self`. pub fn walk_expect(self, left: bool, f: F) -> Result where F: FnOnce(Self) -> Result>, T: Into, { let (mut walker, child) = self.detach_expect(left)?; let new_child = f(child)?.map(|t| t.into()); walker.tree.own(|t| t.attach(left, new_child)); Ok(walker) } /// Returns an immutable reference to the `Tree` wrapped by this walker. pub fn tree(&self) -> &Tree { &self.tree } /// Consumes the `Walker` and returns the `Tree` it wraps. pub fn into_inner(self) -> Tree { self.tree.into_inner() } /// Takes a `Tree` and returns a `Walker` which fetches from the same source /// as `self`. fn wrap(&self, tree: Tree) -> Self { Walker::new(tree, self.source.clone()) } /// Returns a clone of this `Walker`'s source. pub fn clone_source(&self) -> S { self.source.clone() } /// Similar to `Tree#attach`, but can also take a `Walker` since it /// implements `Into`. pub fn attach(mut self, left: bool, maybe_child: Option) -> Self where T: Into, { self.tree .own(|t| t.attach(left, maybe_child.map(|t| t.into()))); self } /// Similar to `Tree#with_value`. pub fn with_value(mut self, value: Vec) -> Result { self.tree.own_fallible(|t| t.with_value(value))?; Ok(self) } } impl From> for Tree where S: Fetch + Sized + Clone + Send, { fn from(walker: Walker) -> Tree { walker.into_inner() } } #[cfg(test)] mod test { use super::super::NoopCommit; use super::*; use crate::tree::Tree; #[derive(Clone)] struct MockSource {} impl Fetch for MockSource { fn fetch_by_key(&self, key: &[u8]) -> Result> { Tree::new(key.to_vec(), b"foo".to_vec()).map(Some) } } #[test] fn walk_modified() -> Result<()> { let tree = Tree::new(b"test".to_vec(), b"abc".to_vec())? .attach(true, Some(Tree::new(b"foo".to_vec(), b"bar".to_vec())?)); let source = MockSource {}; let walker = Walker::new(tree, source); let walker = walker .walk(true, |child| -> Result> { assert_eq!(child.expect("should have child").tree().key(), b"foo"); Ok(None) }) .expect("walk failed"); assert!(walker.into_inner().child(true).is_none()); Ok(()) } #[test] fn walk_stored() -> Result<()> { let mut tree = Tree::new(b"test".to_vec(), b"abc".to_vec())? .attach(true, Some(Tree::new(b"foo".to_vec(), b"bar".to_vec())?)); tree.commit(&mut NoopCommit {}).expect("commit failed"); let source = MockSource {}; let walker = Walker::new(tree, source); let walker = walker .walk(true, |child| -> Result> { assert_eq!(child.expect("should have child").tree().key(), b"foo"); Ok(None) }) .expect("walk failed"); assert!(walker.into_inner().child(true).is_none()); Ok(()) } #[test] fn walk_pruned() { let tree = Tree::from_fields( b"test".to_vec(), b"abc".to_vec(), Default::default(), Some(Link::Reference { hash: Default::default(), key: b"foo".to_vec(), child_heights: (0, 0), }), None, ); let source = MockSource {}; let walker = Walker::new(tree, source); let walker = walker .walk_expect(true, |child| -> Result> { assert_eq!(child.tree().key(), b"foo"); Ok(None) }) .expect("walk failed"); assert!(walker.into_inner().child(true).is_none()); } #[test] fn walk_none() -> Result<()> { let tree = Tree::new(b"test".to_vec(), b"abc".to_vec())?; let source = MockSource {}; let walker = Walker::new(tree, source); walker .walk(true, |child| -> Result> { assert!(child.is_none()); Ok(None) }) .expect("walk failed"); Ok(()) } } ================================================ FILE: src/tree/walk/ref_walker.rs ================================================ use super::super::{Link, Tree}; use super::Fetch; use crate::error::Result; /// Allows read-only traversal of a `Tree`, fetching from the given source when /// traversing to a pruned node. /// /// The fetched nodes are in memory until they (possibly) get pruned on the next /// commit. /// /// Only finalized trees may be walked (trees which have had `commit` called /// since the last update). pub struct RefWalker<'a, S> where S: Fetch + Sized + Clone + Send, { tree: &'a mut Tree, source: S, } impl<'a, S> RefWalker<'a, S> where S: Fetch + Sized + Clone + Send, { /// Creates a `RefWalker` with the given tree and source. pub fn new(tree: &'a mut Tree, source: S) -> Self { // TODO: check if tree has modified links, panic if so RefWalker { tree, source } } /// Gets an immutable reference to the `Tree` wrapped by this `RefWalker`. pub fn tree(&self) -> &Tree { self.tree } /// Traverses to the child on the given side (if any), fetching from the /// source if pruned. When fetching, the link is upgraded from /// `Link::Reference` to `Link::Loaded`. pub fn walk(&mut self, left: bool) -> Result>> { let link = match self.tree.link(left) { None => return Ok(None), Some(link) => link, }; match link { Link::Reference { .. } => { self.tree.load(left, &self.source)?; } Link::Modified { .. } => panic!("Cannot traverse Link::Modified"), Link::Uncommitted { .. } | Link::Loaded { .. } => {} } let child = self.tree.child_mut(left).unwrap(); Ok(Some(RefWalker::new(child, self.source.clone()))) } }