Showing preview only (8,711K chars total). Download the full file or copy to clipboard to get everything.
Repository: quickwit-oss/tantivy
Branch: main
Commit: 545169c0d843
Files: 504
Total size: 29.6 MB
Directory structure:
gitextract_0oj2913e/
├── .claude/
│ └── skills/
│ ├── rationalize-deps/
│ │ └── SKILL.md
│ └── simple-pr/
│ └── SKILL.md
├── .github/
│ ├── FUNDING.yml
│ ├── ISSUE_TEMPLATE/
│ │ ├── actions.md
│ │ ├── bug_report.md
│ │ ├── feature_request.md
│ │ └── question.md
│ ├── dependabot.yml
│ └── workflows/
│ ├── coverage.yml
│ ├── long_running.yml
│ └── test.yml
├── .gitignore
├── ARCHITECTURE.md
├── AUTHORS
├── CHANGELOG.md
├── CITATION.cff
├── Cargo.toml
├── LICENSE
├── Makefile
├── README.md
├── RELEASE.md
├── TODO.txt
├── benches/
│ ├── agg_bench.rs
│ ├── alice.txt
│ ├── analyzer.rs
│ ├── and_or_queries.rs
│ ├── bool_queries_with_range.rs
│ ├── exists_json.rs
│ ├── gh.json
│ ├── hdfs.json
│ ├── index-bench.rs
│ ├── merge_segments.rs
│ ├── range_queries.rs
│ ├── range_query.rs
│ ├── regex_all_terms.rs
│ ├── str_search_and_get.rs
│ └── wiki.json
├── bitpacker/
│ ├── Cargo.toml
│ ├── benches/
│ │ └── bench.rs
│ └── src/
│ ├── bitpacker.rs
│ ├── blocked_bitpacker.rs
│ ├── filter_vec/
│ │ ├── avx2.rs
│ │ ├── mod.rs
│ │ └── scalar.rs
│ └── lib.rs
├── cliff.toml
├── columnar/
│ ├── Cargo.toml
│ ├── README.md
│ ├── benches/
│ │ ├── bench_access.rs
│ │ ├── bench_column_values_get.rs
│ │ ├── bench_create_column_values.rs
│ │ ├── bench_first_vals.rs
│ │ ├── bench_merge.rs
│ │ ├── bench_optional_index.rs
│ │ ├── bench_values_u128.rs
│ │ ├── bench_values_u64.rs
│ │ └── common.rs
│ ├── columnar-cli/
│ │ ├── Cargo.toml
│ │ └── src/
│ │ └── main.rs
│ ├── columnar-cli-inspect/
│ │ ├── Cargo.toml
│ │ └── src/
│ │ └── main.rs
│ ├── compat_tests_data/
│ │ ├── v1.columnar
│ │ └── v2.columnar
│ └── src/
│ ├── TODO.md
│ ├── block_accessor.rs
│ ├── column/
│ │ ├── dictionary_encoded.rs
│ │ ├── mod.rs
│ │ └── serialize.rs
│ ├── column_index/
│ │ ├── merge/
│ │ │ ├── mod.rs
│ │ │ ├── shuffled.rs
│ │ │ └── stacked.rs
│ │ ├── mod.rs
│ │ ├── multivalued_index.rs
│ │ ├── optional_index/
│ │ │ ├── mod.rs
│ │ │ ├── set.rs
│ │ │ ├── set_block/
│ │ │ │ ├── dense.rs
│ │ │ │ ├── mod.rs
│ │ │ │ ├── sparse.rs
│ │ │ │ └── tests.rs
│ │ │ └── tests.rs
│ │ └── serialize.rs
│ ├── column_values/
│ │ ├── merge.rs
│ │ ├── mod.rs
│ │ ├── monotonic_column.rs
│ │ ├── monotonic_mapping.rs
│ │ ├── monotonic_mapping_u128.rs
│ │ ├── stats.rs
│ │ ├── u128_based/
│ │ │ ├── compact_space/
│ │ │ │ ├── blank_range.rs
│ │ │ │ ├── build_compact_space.rs
│ │ │ │ └── mod.rs
│ │ │ └── mod.rs
│ │ ├── u64_based/
│ │ │ ├── bitpacked.rs
│ │ │ ├── blockwise_linear.rs
│ │ │ ├── line.rs
│ │ │ ├── linear.rs
│ │ │ ├── mod.rs
│ │ │ ├── stats_collector.rs
│ │ │ └── tests.rs
│ │ └── vec_column.rs
│ ├── columnar/
│ │ ├── column_type.rs
│ │ ├── format_version.rs
│ │ ├── merge/
│ │ │ ├── merge_dict_column.rs
│ │ │ ├── merge_mapping.rs
│ │ │ ├── mod.rs
│ │ │ ├── term_merger.rs
│ │ │ └── tests.rs
│ │ ├── mod.rs
│ │ ├── reader/
│ │ │ └── mod.rs
│ │ └── writer/
│ │ ├── column_operation.rs
│ │ ├── column_writers.rs
│ │ ├── mod.rs
│ │ ├── serializer.rs
│ │ └── value_index.rs
│ ├── compat_tests.rs
│ ├── dictionary.rs
│ ├── dynamic_column.rs
│ ├── iterable.rs
│ ├── lib.rs
│ ├── tests.rs
│ ├── utils.rs
│ └── value.rs
├── common/
│ ├── Cargo.toml
│ ├── benches/
│ │ └── bench.rs
│ └── src/
│ ├── bitset.rs
│ ├── bounds.rs
│ ├── byte_count.rs
│ ├── datetime.rs
│ ├── file_slice.rs
│ ├── group_by.rs
│ ├── json_path_writer.rs
│ ├── lib.rs
│ ├── serialize.rs
│ ├── vint.rs
│ └── writer.rs
├── doc/
│ ├── .gitignore
│ ├── book.toml
│ └── src/
│ ├── SUMMARY.md
│ ├── avant-propos.md
│ ├── basis.md
│ ├── best_practise.md.rs
│ ├── examples.md
│ ├── facetting.md
│ ├── faq.md
│ ├── index_sorting.md
│ ├── innerworkings.md
│ ├── inverted_index.md
│ ├── json.md
│ └── schema.md
├── examples/
│ ├── aggregation.rs
│ ├── basic_search.rs
│ ├── custom_collector.rs
│ ├── custom_tokenizer.rs
│ ├── date_time_field.rs
│ ├── deleting_updating_documents.rs
│ ├── faceted_search.rs
│ ├── faceted_search_with_tweaked_score.rs
│ ├── filter_aggregation.rs
│ ├── fuzzy_search.rs
│ ├── index_from_multiple_threads.rs
│ ├── index_with_json.rs
│ ├── integer_range_search.rs
│ ├── ip_field.rs
│ ├── iterating_docs_and_positions.rs
│ ├── json_field.rs
│ ├── phrase_prefix_search.rs
│ ├── pre_tokenized_text.rs
│ ├── snippet.rs
│ ├── stop_words.rs
│ └── warmer.rs
├── ownedbytes/
│ ├── Cargo.toml
│ └── src/
│ └── lib.rs
├── query-grammar/
│ ├── Cargo.toml
│ ├── README.md
│ └── src/
│ ├── infallible.rs
│ ├── lib.rs
│ ├── occur.rs
│ ├── query_grammar.rs
│ └── user_input_ast.rs
├── rustfmt.toml
├── src/
│ ├── aggregation/
│ │ ├── README.md
│ │ ├── accessor_helpers.rs
│ │ ├── agg_data.rs
│ │ ├── agg_limits.rs
│ │ ├── agg_req.rs
│ │ ├── agg_result.rs
│ │ ├── agg_tests.rs
│ │ ├── bucket/
│ │ │ ├── composite/
│ │ │ │ ├── accessors.rs
│ │ │ │ ├── calendar_interval.rs
│ │ │ │ ├── collector.rs
│ │ │ │ ├── map.rs
│ │ │ │ ├── mod.rs
│ │ │ │ └── numeric_types.rs
│ │ │ ├── filter.rs
│ │ │ ├── histogram/
│ │ │ │ ├── date_histogram.rs
│ │ │ │ ├── histogram.rs
│ │ │ │ └── mod.rs
│ │ │ ├── mod.rs
│ │ │ ├── range.rs
│ │ │ ├── term_agg.rs
│ │ │ └── term_missing_agg.rs
│ │ ├── cached_sub_aggs.rs
│ │ ├── collector.rs
│ │ ├── date.rs
│ │ ├── error.rs
│ │ ├── intermediate_agg_result.rs
│ │ ├── metric/
│ │ │ ├── average.rs
│ │ │ ├── cardinality.rs
│ │ │ ├── count.rs
│ │ │ ├── extended_stats.rs
│ │ │ ├── max.rs
│ │ │ ├── min.rs
│ │ │ ├── mod.rs
│ │ │ ├── percentiles.rs
│ │ │ ├── stats.rs
│ │ │ ├── sum.rs
│ │ │ └── top_hits.rs
│ │ ├── mod.rs
│ │ └── segment_agg_result.rs
│ ├── collector/
│ │ ├── count_collector.rs
│ │ ├── docset_collector.rs
│ │ ├── facet_collector.rs
│ │ ├── filter_collector_wrapper.rs
│ │ ├── histogram_collector.rs
│ │ ├── mod.rs
│ │ ├── multi_collector.rs
│ │ ├── sort_key/
│ │ │ ├── mod.rs
│ │ │ ├── order.rs
│ │ │ ├── sort_by_bytes.rs
│ │ │ ├── sort_by_erased_type.rs
│ │ │ ├── sort_by_score.rs
│ │ │ ├── sort_by_static_fast_value.rs
│ │ │ ├── sort_by_string.rs
│ │ │ └── sort_key_computer.rs
│ │ ├── sort_key_top_collector.rs
│ │ ├── tests.rs
│ │ ├── top_collector.rs
│ │ └── top_score_collector.rs
│ ├── compat_tests.rs
│ ├── core/
│ │ ├── executor.rs
│ │ ├── json_utils.rs
│ │ ├── mod.rs
│ │ ├── searcher.rs
│ │ └── tests.rs
│ ├── directory/
│ │ ├── composite_file.rs
│ │ ├── directory.rs
│ │ ├── directory_lock.rs
│ │ ├── error.rs
│ │ ├── footer.rs
│ │ ├── managed_directory.rs
│ │ ├── mmap_directory/
│ │ │ ├── file_watcher.rs
│ │ │ └── mod.rs
│ │ ├── mod.rs
│ │ ├── ram_directory.rs
│ │ ├── tests.rs
│ │ └── watch_event_router.rs
│ ├── docset.rs
│ ├── error.rs
│ ├── fastfield/
│ │ ├── alive_bitset.rs
│ │ ├── error.rs
│ │ ├── facet_reader.rs
│ │ ├── mod.rs
│ │ ├── readers.rs
│ │ └── writer.rs
│ ├── fieldnorm/
│ │ ├── code.rs
│ │ ├── mod.rs
│ │ ├── reader.rs
│ │ ├── serializer.rs
│ │ └── writer.rs
│ ├── functional_test.rs
│ ├── future_result.rs
│ ├── index/
│ │ ├── index.rs
│ │ ├── index_meta.rs
│ │ ├── inverted_index_reader.rs
│ │ ├── mod.rs
│ │ ├── segment.rs
│ │ ├── segment_component.rs
│ │ ├── segment_id.rs
│ │ └── segment_reader.rs
│ ├── indexer/
│ │ ├── delete_queue.rs
│ │ ├── doc_id_mapping.rs
│ │ ├── doc_opstamp_mapping.rs
│ │ ├── flat_map_with_buffer.rs
│ │ ├── index_writer.rs
│ │ ├── index_writer_status.rs
│ │ ├── indexing_term.rs
│ │ ├── log_merge_policy.rs
│ │ ├── merge_index_test.rs
│ │ ├── merge_operation.rs
│ │ ├── merge_policy.rs
│ │ ├── merger.rs
│ │ ├── mod.rs
│ │ ├── operation.rs
│ │ ├── path_to_unordered_id.rs
│ │ ├── prepared_commit.rs
│ │ ├── segment_entry.rs
│ │ ├── segment_manager.rs
│ │ ├── segment_register.rs
│ │ ├── segment_serializer.rs
│ │ ├── segment_updater.rs
│ │ ├── segment_writer.rs
│ │ ├── single_segment_index_writer.rs
│ │ └── stamper.rs
│ ├── lib.rs
│ ├── macros.rs
│ ├── positions/
│ │ ├── mod.rs
│ │ ├── reader.rs
│ │ └── serializer.rs
│ ├── postings/
│ │ ├── block_search.rs
│ │ ├── block_segment_postings.rs
│ │ ├── compression/
│ │ │ ├── mod.rs
│ │ │ └── vint.rs
│ │ ├── indexing_context.rs
│ │ ├── json_postings_writer.rs
│ │ ├── loaded_postings.rs
│ │ ├── mod.rs
│ │ ├── per_field_postings_writer.rs
│ │ ├── postings.rs
│ │ ├── postings_writer.rs
│ │ ├── recorder.rs
│ │ ├── segment_postings.rs
│ │ ├── serializer.rs
│ │ ├── skip.rs
│ │ └── term_info.rs
│ ├── query/
│ │ ├── all_query.rs
│ │ ├── automaton_weight.rs
│ │ ├── bitset/
│ │ │ └── mod.rs
│ │ ├── bm25.rs
│ │ ├── boolean_query/
│ │ │ ├── block_wand.rs
│ │ │ ├── boolean_query.rs
│ │ │ ├── boolean_weight.rs
│ │ │ └── mod.rs
│ │ ├── boost_query.rs
│ │ ├── const_score_query.rs
│ │ ├── disjunction.rs
│ │ ├── disjunction_max_query.rs
│ │ ├── empty_query.rs
│ │ ├── exclude.rs
│ │ ├── exist_query.rs
│ │ ├── explanation.rs
│ │ ├── fuzzy_query.rs
│ │ ├── intersection.rs
│ │ ├── mod.rs
│ │ ├── more_like_this/
│ │ │ ├── mod.rs
│ │ │ ├── more_like_this.rs
│ │ │ └── query.rs
│ │ ├── phrase_prefix_query/
│ │ │ ├── mod.rs
│ │ │ ├── phrase_prefix_query.rs
│ │ │ ├── phrase_prefix_scorer.rs
│ │ │ └── phrase_prefix_weight.rs
│ │ ├── phrase_query/
│ │ │ ├── mod.rs
│ │ │ ├── phrase_query.rs
│ │ │ ├── phrase_scorer.rs
│ │ │ ├── phrase_weight.rs
│ │ │ ├── regex_phrase_query.rs
│ │ │ └── regex_phrase_weight.rs
│ │ ├── query.rs
│ │ ├── query_parser/
│ │ │ ├── logical_ast.rs
│ │ │ ├── mod.rs
│ │ │ └── query_parser.rs
│ │ ├── range_query/
│ │ │ ├── fast_field_range_doc_set.rs
│ │ │ ├── mod.rs
│ │ │ ├── range_query.rs
│ │ │ └── range_query_fastfield.rs
│ │ ├── regex_query.rs
│ │ ├── reqopt_scorer.rs
│ │ ├── score_combiner.rs
│ │ ├── scorer.rs
│ │ ├── set_query.rs
│ │ ├── size_hint.rs
│ │ ├── term_query/
│ │ │ ├── mod.rs
│ │ │ ├── term_query.rs
│ │ │ ├── term_scorer.rs
│ │ │ └── term_weight.rs
│ │ ├── union/
│ │ │ ├── bitset_union.rs
│ │ │ ├── buffered_union.rs
│ │ │ ├── mod.rs
│ │ │ └── simple_union.rs
│ │ ├── vec_docset.rs
│ │ └── weight.rs
│ ├── reader/
│ │ ├── mod.rs
│ │ └── warming.rs
│ ├── schema/
│ │ ├── bytes_options.rs
│ │ ├── date_time_options.rs
│ │ ├── document/
│ │ │ ├── de.rs
│ │ │ ├── default_document.rs
│ │ │ ├── existing_type_impls.rs
│ │ │ ├── mod.rs
│ │ │ ├── owned_value.rs
│ │ │ ├── se.rs
│ │ │ └── value.rs
│ │ ├── facet.rs
│ │ ├── facet_options.rs
│ │ ├── field.rs
│ │ ├── field_entry.rs
│ │ ├── field_type.rs
│ │ ├── flags.rs
│ │ ├── index_record_option.rs
│ │ ├── ip_options.rs
│ │ ├── json_object_options.rs
│ │ ├── mod.rs
│ │ ├── named_field_document.rs
│ │ ├── numeric_options.rs
│ │ ├── schema.rs
│ │ ├── term.rs
│ │ └── text_options.rs
│ ├── snippet/
│ │ └── mod.rs
│ ├── space_usage/
│ │ └── mod.rs
│ ├── store/
│ │ ├── compression_lz4_block.rs
│ │ ├── compression_zstd_block.rs
│ │ ├── compressors.rs
│ │ ├── decompressors.rs
│ │ ├── footer.rs
│ │ ├── index/
│ │ │ ├── block.rs
│ │ │ ├── mod.rs
│ │ │ ├── skip_index.rs
│ │ │ └── skip_index_builder.rs
│ │ ├── mod.rs
│ │ ├── reader.rs
│ │ ├── store_compressor.rs
│ │ └── writer.rs
│ ├── termdict/
│ │ ├── fst_termdict/
│ │ │ ├── merger.rs
│ │ │ ├── mod.rs
│ │ │ ├── streamer.rs
│ │ │ ├── term_info_store.rs
│ │ │ └── termdict.rs
│ │ ├── mod.rs
│ │ ├── sstable_termdict/
│ │ │ ├── merger.rs
│ │ │ └── mod.rs
│ │ └── tests.rs
│ └── tokenizer/
│ ├── alphanum_only.rs
│ ├── ascii_folding_filter.rs
│ ├── empty_tokenizer.rs
│ ├── facet_tokenizer.rs
│ ├── lower_caser.rs
│ ├── mod.rs
│ ├── ngram_tokenizer.rs
│ ├── raw_tokenizer.rs
│ ├── regex_tokenizer.rs
│ ├── remove_long.rs
│ ├── simple_tokenizer.rs
│ ├── split_compound_words.rs
│ ├── stemmer.rs
│ ├── stop_word_filter/
│ │ ├── gen_stopwords.py
│ │ ├── mod.rs
│ │ └── stopwords.rs
│ ├── tokenized_string.rs
│ ├── tokenizer.rs
│ ├── tokenizer_manager.rs
│ └── whitespace_tokenizer.rs
├── sstable/
│ ├── Cargo.toml
│ ├── README.md
│ ├── benches/
│ │ ├── ord_to_term.rs
│ │ └── stream_bench.rs
│ ├── src/
│ │ ├── block_match_automaton.rs
│ │ ├── block_reader.rs
│ │ ├── delta.rs
│ │ ├── dictionary.rs
│ │ ├── lib.rs
│ │ ├── merge/
│ │ │ ├── heap_merge.rs
│ │ │ └── mod.rs
│ │ ├── sstable_index_v2.rs
│ │ ├── sstable_index_v3.rs
│ │ ├── streamer.rs
│ │ ├── value/
│ │ │ ├── index.rs
│ │ │ ├── mod.rs
│ │ │ ├── range.rs
│ │ │ ├── u64_monotonic.rs
│ │ │ ├── vec_u32.rs
│ │ │ └── void.rs
│ │ └── vint.rs
│ └── tests/
│ └── sstable_test.rs
├── stacker/
│ ├── Cargo.toml
│ ├── Performance.md
│ ├── benches/
│ │ └── bench.rs
│ ├── example/
│ │ └── hashmap.rs
│ ├── fuzz_test/
│ │ ├── Cargo.toml
│ │ └── src/
│ │ └── main.rs
│ └── src/
│ ├── arena_hashmap.rs
│ ├── expull.rs
│ ├── fastcmp.rs
│ ├── fastcpy.rs
│ ├── lib.rs
│ ├── memory_arena.rs
│ └── shared_arena_hashmap.rs
├── tests/
│ ├── compat_tests_data/
│ │ ├── index_v6/
│ │ │ ├── .managed.json
│ │ │ ├── 00000000000000000000000000000000.fast
│ │ │ ├── 00000000000000000000000000000000.fieldnorm
│ │ │ ├── 00000000000000000000000000000000.idx
│ │ │ ├── 00000000000000000000000000000000.pos
│ │ │ ├── 00000000000000000000000000000000.store
│ │ │ ├── 00000000000000000000000000000000.term
│ │ │ └── meta.json
│ │ └── index_v7/
│ │ ├── .managed.json
│ │ ├── 000002f0000000000000000000000000.fast
│ │ ├── 000002f0000000000000000000000000.fieldnorm
│ │ ├── 000002f0000000000000000000000000.idx
│ │ ├── 000002f0000000000000000000000000.pos
│ │ ├── 000002f0000000000000000000000000.store
│ │ ├── 000002f0000000000000000000000000.term
│ │ └── meta.json
│ └── failpoints/
│ └── mod.rs
└── tokenizer-api/
├── Cargo.toml
├── README.md
└── src/
└── lib.rs
================================================
FILE CONTENTS
================================================
================================================
FILE: .claude/skills/rationalize-deps/SKILL.md
================================================
---
name: rationalize-deps
description: Analyze Cargo.toml dependencies and attempt to remove unused features to reduce compile times and binary size
---
# Rationalize Dependencies
This skill analyzes Cargo.toml dependencies to identify and remove unused features.
## Overview
Many crates enable features by default that may not be needed. This skill:
1. Identifies dependencies with default features enabled
2. Tests if `default-features = false` works
3. Identifies which specific features are actually needed
4. Verifies compilation after changes
## Step 1: Identify the target
Ask the user which crate(s) to analyze:
- A specific crate name (e.g., "tokio", "serde")
- A specific workspace member (e.g., "quickwit-search")
- "all" to scan the entire workspace
## Step 2: Analyze current dependencies
For the workspace Cargo.toml (`quickwit/Cargo.toml`), list dependencies that:
- Do NOT have `default-features = false`
- Have default features that might be unnecessary
Run: `cargo tree -p <crate> -f "{p} {f}" --edges features` to see what features are actually used.
## Step 3: For each candidate dependency
### 3a: Check the crate's default features
Look up the crate on crates.io or check its Cargo.toml to understand:
- What features are enabled by default
- What each feature provides
Use: `cargo metadata --format-version=1 | jq '.packages[] | select(.name == "<crate>") | .features'`
### 3b: Try disabling default features
Modify the dependency in `quickwit/Cargo.toml`:
From:
```toml
some-crate = { version = "1.0" }
```
To:
```toml
some-crate = { version = "1.0", default-features = false }
```
### 3c: Run cargo check
Run: `cargo check --workspace` (or target specific packages for faster feedback)
If compilation fails:
1. Read the error messages to identify which features are needed
2. Add only the required features explicitly:
```toml
some-crate = { version = "1.0", default-features = false, features = ["needed-feature"] }
```
3. Re-run cargo check
### 3d: Binary search for minimal features
If there are many default features, use binary search:
1. Start with no features
2. If it fails, add half the default features
3. Continue until you find the minimal set
## Step 4: Document findings
For each dependency analyzed, report:
- Original configuration
- New configuration (if changed)
- Features that were removed
- Any features that are required
## Step 5: Verify full build
After all changes, run:
```bash
cargo check --workspace --all-targets
cargo test --workspace --no-run
```
## Common Patterns
### Serde
Often only needs `derive`:
```toml
serde = { version = "1.0", default-features = false, features = ["derive", "std"] }
```
### Tokio
Identify which runtime features are actually used:
```toml
tokio = { version = "1.0", default-features = false, features = ["rt-multi-thread", "macros", "sync"] }
```
### Reqwest
Often doesn't need all TLS backends:
```toml
reqwest = { version = "0.11", default-features = false, features = ["rustls-tls", "json"] }
```
## Rollback
If changes cause issues:
```bash
git checkout quickwit/Cargo.toml
cargo check --workspace
```
## Tips
- Start with large crates that have many default features (tokio, reqwest, hyper)
- Use `cargo bloat --crates` to identify large dependencies
- Check `cargo tree -d` for duplicate dependencies that might indicate feature conflicts
- Some features are needed only for tests - consider using `[dev-dependencies]` features
================================================
FILE: .claude/skills/simple-pr/SKILL.md
================================================
---
name: simple-pr
description: Create a simple PR from staged changes with an auto-generated commit message
disable-model-invocation: true
---
# Simple PR
Follow these steps to create a simple PR from staged changes:
## Step 1: Check workspace state
Run: `git status`
Verify that all changes have been staged (no unstaged changes). If there are unstaged changes, abort and ask the user to stage their changes first with `git add`.
Also verify that we are on the `main` branch. If not, abort and ask the user to switch to main first.
## Step 2: Ensure main is up to date
Run: `git pull origin main`
This ensures we're working from the latest code.
## Step 3: Review staged changes
Run: `git diff --cached`
Review the staged changes to understand what the PR will contain.
## Step 4: Generate commit message
Based on the staged changes, generate a concise commit message (1-2 sentences) that describes the "why" rather than the "what".
Display the proposed commit message to the user and ask for confirmation before proceeding.
## Step 5: Create a new branch
Get the git username: `git config user.name | tr ' ' '-' | tr '[:upper:]' '[:lower:]'`
Create a short, descriptive branch name based on the changes (e.g., `fix-typo-in-readme`, `add-retry-logic`, `update-deps`).
Create and checkout the branch: `git checkout -b {username}/{short-descriptive-name}`
## Step 6: Commit changes
Commit with the message from step 3:
```
git commit -m "{commit-message}"
```
## Step 7: Push and open a PR
Push the branch and open a PR:
```
git push -u origin {branch-name}
gh pr create --title "{commit-message-title}" --body "{longer-description-if-needed}"
```
Report the PR URL to the user when complete.
================================================
FILE: .github/FUNDING.yml
================================================
# These are supported funding model platforms
github: fulmicoton
patreon: # Replace with a single Patreon username
open_collective: # Replace with a single Open Collective username
ko_fi: # Replace with a single Ko-fi username
tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
liberapay: # Replace with a single Liberapay username
issuehunt: # Replace with a single IssueHunt username
otechie: # Replace with a single Otechie username
custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
================================================
FILE: .github/ISSUE_TEMPLATE/actions.md
================================================
---
name: Actions
about: Actions not directly related to producing code.
---
# Actions title
Action description.
e.g.
- benchmark
- investigate and report
- etc.
================================================
FILE: .github/ISSUE_TEMPLATE/bug_report.md
================================================
---
name: Bug report
about: Create a report to help us improve
---
**Describe the bug**
- What did you do?
- What happened?
- What was expected?
**Which version of tantivy are you using?**
If "master", ideally give the specific sha1 revision.
**To Reproduce**
If your bug is deterministic, can you give a minimal reproducing code?
Some bugs are not deterministic. Can you describe with precision in which context it happened?
If this is possible, can you share your code?
================================================
FILE: .github/ISSUE_TEMPLATE/feature_request.md
================================================
---
name: Feature request
about: Suggest an idea for this project
---
**Is your feature request related to a problem? Please describe.**
A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
**Describe the solution you'd like**
A clear and concise description of what you want to happen.
**[Optional] describe alternatives you've considered**
A clear and concise description of any alternative solutions or features you've considered.
================================================
FILE: .github/ISSUE_TEMPLATE/question.md
================================================
---
name: Question
about: Ask any question about tantivy's usage...
---
Try to be specific about your use case...
================================================
FILE: .github/dependabot.yml
================================================
version: 2
updates:
- package-ecosystem: cargo
directory: "/"
schedule:
interval: daily
time: "20:00"
open-pull-requests-limit: 10
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: daily
time: "20:00"
open-pull-requests-limit: 10
================================================
FILE: .github/workflows/coverage.yml
================================================
name: Coverage
on:
push:
branches: [main]
# Ensures that we cancel running jobs for the same PR / same workflow.
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
coverage:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Install Rust
run: rustup toolchain install nightly-2025-12-01 --profile minimal --component llvm-tools-preview
- uses: Swatinem/rust-cache@v2
- uses: taiki-e/install-action@cargo-llvm-cov
- name: Generate code coverage
run: cargo +nightly-2025-12-01 llvm-cov --all-features --workspace --doctests --lcov --output-path lcov.info
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
continue-on-error: true
with:
token: ${{ secrets.CODECOV_TOKEN }} # not required for public repos
files: lcov.info
fail_ci_if_error: true
================================================
FILE: .github/workflows/long_running.yml
================================================
name: Long running tests
on:
push:
branches: [ main ]
env:
CARGO_TERM_COLOR: always
NUM_FUNCTIONAL_TEST_ITERATIONS: 20000
# Ensures that we cancel running jobs for the same PR / same workflow.
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Install stable
uses: actions-rs/toolchain@v1
with:
toolchain: stable
profile: minimal
override: true
- name: Run indexing_unsorted
run: cargo test indexing_unsorted -- --ignored
- name: Run indexing_sorted
run: cargo test indexing_sorted -- --ignored
================================================
FILE: .github/workflows/test.yml
================================================
name: Unit tests
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
env:
CARGO_TERM_COLOR: always
# Ensures that we cancel running jobs for the same PR / same workflow.
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
check:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Install nightly
uses: actions-rs/toolchain@v1
with:
toolchain: nightly
profile: minimal
components: rustfmt
- name: Install stable
uses: actions-rs/toolchain@v1
with:
toolchain: stable
profile: minimal
components: clippy
- uses: Swatinem/rust-cache@v2
- name: Check Formatting
run: cargo +nightly fmt --all -- --check
- name: Check Stable Compilation
run: cargo build --all-features
- name: Check Bench Compilation
run: cargo +nightly bench --no-run --profile=dev --all-features
- uses: actions-rs/clippy-check@v1
with:
toolchain: stable
token: ${{ secrets.GITHUB_TOKEN }}
args: --tests
test:
runs-on: ubuntu-latest
strategy:
matrix:
features:
- { label: "all", flags: "mmap,stopwords,lz4-compression,zstd-compression,failpoints,stemmer" }
- { label: "quickwit", flags: "mmap,quickwit,failpoints" }
- { label: "none", flags: "" }
name: test-${{ matrix.features.label}}
steps:
- uses: actions/checkout@v4
- name: Install stable
uses: actions-rs/toolchain@v1
with:
toolchain: stable
profile: minimal
override: true
- uses: taiki-e/install-action@nextest
- uses: Swatinem/rust-cache@v2
- name: Run tests
run: |
# if matrix.feature.flags is empty then run on --lib to avoid compiling examples
# (as most of them rely on mmap) otherwise run all
if [ -z "${{ matrix.features.flags }}" ]; then
cargo +stable nextest run --lib --no-default-features --verbose --workspace
else
cargo +stable nextest run --features ${{ matrix.features.flags }} --no-default-features --verbose --workspace
fi
- name: Run doctests
run: |
# if matrix.feature.flags is empty then run on --lib to avoid compiling examples
# (as most of them rely on mmap) otherwise run all
if [ -z "${{ matrix.features.flags }}" ]; then
echo "no doctest for no feature flag"
else
cargo +stable test --doc --features ${{ matrix.features.flags }} --verbose --workspace
fi
================================================
FILE: .gitignore
================================================
tantivy.iml
.cargo
proptest-regressions
*.swp
target
target/debug
.vscode
target/release
Cargo.lock
benchmark
.DS_Store
*.bk
.idea
trace.dat
cargo-timing*
control
variable
================================================
FILE: ARCHITECTURE.md
================================================
# Tantivy
## What is tantivy?
Tantivy is a library that is meant to build search engines. Although it is by no means a port of Lucene, its architecture is strongly inspired by it. If you are familiar with Lucene, you may be struck by the overlapping vocabulary.
This is not fortuitous.
Tantivy's bread and butter is to address the problem of full-text search :
Given a large set of textual documents, and a text query, return the K-most relevant documents in a very efficient way. To execute these queries rapidly, the tantivy needs to build an index beforehand. The relevance score implemented in the tantivy is not configurable. Tantivy uses the same score as the default similarity used in Lucene / Elasticsearch, called [BM25](https://en.wikipedia.org/wiki/Okapi_BM25).
But tantivy's scope does not stop there. Numerous features are required to power rich-search applications. For instance, one may want to:
- compute the count of documents matching a query in the different section of an e-commerce website,
- display an average price per meter square for a real estate search engine,
- take into account historical user data to rank documents in a specific way,
- or even use tantivy to power an OLAP database.
A more abstract description of the problem space tantivy is trying to address is the following.
Ingest a large set of documents, create an index that makes it possible to
rapidly select all documents matching a given predicate (also known as a query) and
collect some information about them ([See collector](#collector-define-what-to-do-with-matched-documents)).
Roughly speaking the design is following these guiding principles:
- Search should be O(1) in memory.
- Indexing should be O(1) in memory. (In practice it is just sublinear)
- Search should be as fast as possible
This comes at the cost of the dynamicity of the index: while it is possible to add, and delete documents from our corpus, the tantivy is designed to handle these updates in large batches.
## [core/](src/core): Index, segments, searchers
Core contains all of the high-level code to make it possible to create an index, add documents, delete documents and commit.
This is both the most high-level part of tantivy, the least performance-sensitive one, the seemingly most mundane code... And paradoxically the most complicated part.
### Index and Segments
A tantivy index is a collection of smaller independent immutable segments.
Each segment contains its own independent set of data structures.
A segment is identified by a segment id that is in fact a UUID.
The file of a segment has the format
```segment-id . ext```
The extension signals which data structure (or [`SegmentComponent`](src/index/segment_component.rs)) is stored in the file.
A small `meta.json` file is in charge of keeping track of the list of segments, as well as the schema.
On commit, one segment per indexing thread is written to disk, and the `meta.json` is then updated atomically.
For a better idea of how indexing works, you may read the [following blog post](https://fulmicoton.com/posts/behold-tantivy-part2/).
### Deletes
Deletes happen by deleting a "term". Tantivy does not offer any notion of primary id, so it is up to the user to use a field in their schema as if it was a primary id, and delete the associated term if they want to delete only one specific document.
On commit, tantivy will find all of the segments with documents matching this existing term and remove from [alive bitset file](src/fastfield/alive_bitset.rs) that represents the bitset of the alive document ids.
Like all segment files, this file is immutable. Because it is possible to have more than one alive bitset file at a given instant, the alive bitset filename has the format ```segment_id . commit_opstamp . del```.
An opstamp is simply an incremental id that identifies any operation applied to the index. For instance, performing a commit or adding a document.
### DocId
Within a segment, all documents are identified by a DocId that ranges within `[0, max_doc)`.
where `max_doc` is the number of documents in the segment, (deleted or not). Having such a compact `DocId` space is key to the compression of our data structures.
The DocIds are simply allocated in the order documents are added to the index.
### Merges
In separate threads, tantivy's index writer search for opportunities to merge segments.
The point of segment merge is to:
- eventually get rid of tombstoned documents
- reduce the otherwise ever-growing number of segments.
Indeed, while having several segments instead of one does not hurt search too much, having hundreds can have a measurable impact on the search performance.
### Searcher
The user of the library usually does not need to know about the existence of Segments.
Searching is done through an object called a [`Searcher`](src/core/searcher.rs), that captures a
snapshot of the index at one point of time, by holding a list of [SegmentReader](src/core/segment_reader.rs).
In other words, regardless of commits, file garbage collection, or segment merge that might happen, as long as the user holds and reuse the same [Searcher](src/core/searcher.rs), search will happen on an immutable snapshot of the index.
## [directory/](src/directory): Where should the data be stored?
Tantivy, like Lucene, abstracts the place where the data should be stored in a key-trait
called [`Directory`](src/directory/directory.rs).
Contrary to Lucene however, "files" are quite different from some kind of `io::Read` object.
Check out [`src/directory/directory.rs`](src/directory/directory.rs) trait for more details.
Tantivy ships two main directory implementation: the `MmapDirectory` and the `RamDirectory`,
but users can extend tantivy with their own implementation.
## [schema/](src/schema): What are documents?
Tantivy's document follows a very strict schema, decided before building any index.
The schema defines all of the fields that the indexes [`Document`](src/schema/document/mod.rs) may and should contain, their types (`text`, `i64`, `u64`, `Date`, ...) as well as how it should be indexed / represented in tantivy.
Depending on the type of the field, you can decide to
- put it in the docstore
- store it as a fast field
- index it
Practically, tantivy will push values associated with this type to up to 3 respective
data structures.
*Limitations*
As of today, tantivy's schema imposes a 1:1 relationship between a field that is being ingested and a field represented in the search index. In sophisticated search application, it is fairly common to want to index a field twice using different tokenizers, or to index the concatenation of several fields together into one field.
This is not something tantivy supports, and it is up to the user to duplicate field / concatenate fields before feeding them to tantivy.
## General information about these data structures
All data structures in tantivy, have:
- a writer
- a serializer
- a reader
The writer builds an in-memory representation of a batch of documents. This representation is not searchable. It is just meant as an intermediary mutable representation, to which we can sequentially add
the document of a batch. At the end of the batch (or if a memory limit is reached), this representation
is then converted into an on-disk immutable representation, that is extremely compact.
This conversion is done by the serializer.
Finally, the reader is in charge of offering an API to read on this on-disk read-only representation.
In tantivy, readers are designed to require very little anonymous memory. The data is read straight from an mmapped file, and loading an index is as fast as mmapping its files.
## [store/](src/store): Here is my DocId, Gimme my document
The docstore is a row-oriented storage that, for each document, stores a subset of the fields
that are marked as stored in the schema. The docstore is compressed using a general-purpose algorithm
like LZ4.
**Useful for**
In search engines, it is often used to display search results.
Once the top 10 documents have been identified, we fetch them from the store, and display them or their snippet on the search result page (aka SERP).
**Not useful for**
Fetching a document from the store is typically a "slow" operation. It usually consists in
- searching into a compact tree-like data structure to find the position of the right block.
- decompressing a small block
- returning the document from this block.
It is NOT meant to be called for every document matching a query.
As a rule of thumb, if you hit the docstore more than 100 times per search query, you are probably misusing tantivy.
## [fastfield/](src/fastfield): Here is my DocId, Gimme my value
Fast fields are stored in a column-oriented storage that allows for random access.
The only compression applied is bitpacking. The column comes with two meta data.
The minimum value in the column and the number of bits per doc.
Fetching a value for a `DocId` is then as simple as computing
```rust
min_value + fetch_bits(num_bits * doc_id..num_bits * (doc_id+1))
```
This operation just requires one memory fetch.
Because, DocSets are scanned through in order (DocId are iterated in a sorted manner) which
also help locality.
In Lucene's jargon, fast fields are called DocValues.
**Useful for**
They are typically integer values that are useful to either rank or compute aggregate over
all of the documents matching a query (aka [DocSet](src/docset.rs)).
For instance, one could define a function to combine upvotes with tantivy's internal relevancy score.
This can be done by fetching a fast field during scoring.
One could also compute the mean price of the items matching a query in an e-commerce website.
This can be done by fetching a fast field in a collector.
Finally one could decide to post-filter a docset to remove docset with a price within a specific range.
If the ratio of filtered out documents is not too low, an efficient way to do this is to fetch the price and apply the filter on the collector side.
Aside from integer values, it is also possible to store an actual byte payload.
For advanced search engine, it is possible to store all of the features required for learning-to-rank in a byte payload, access it during search, and apply the learning-to-rank model.
Finally facets are a specific kind of fast field, and the associated source code is in [`fastfield/facet_reader.rs`](src/fastfield/facet_reader.rs).
# The inverted search index
The inverted index is the core part of full-text search.
When presented a new document with the text field "Hello, happy tax payer!", tantivy breaks it into a list of so-called tokens. In addition to just splitting these strings into tokens, it might also do different kinds of operations like dropping the punctuation, converting the character to lowercase, apply stemming, etc. Tantivy makes it possible to configure the operations to be applied in the schema (tokenizer/ is the place where these operations are implemented).
For instance, the default tokenizer of tantivy would break our text into: `[hello, happy, tax, payer]`.
The document will therefore be registered in the inverted index as containing the terms
`[text:hello, text:happy, text:tax, text:payer]`.
The role of the inverted index is, when given a term, gives us in return a very fast iterator over the sorted doc ids that match the term.
Such an iterator is called a posting list. In addition to giving us `DocId`, they can also give us optionally the number of occurrence of the term for each document, also called term frequency or TF.
These iterators being sorted by DocId, one can create an iterator over the document containing `text:tax AND text:payer`, `(text:tax AND text:payer) OR (text:contribuable)` or any boolean expression.
In order to represent the function
```Term ⟶ Posting```
The inverted index actually consists of two data structures chained together.
- [Term](src/schema/term.rs) ⟶ [TermInfo](src/postings/term_info.rs) is addressed by the term dictionary.
- [TermInfo](src/postings/term_info.rs) ⟶ [Posting](src/postings/postings.rs) is addressed by the posting lists.
Where [TermInfo](src/postings/term_info.rs) is an object containing some meta data about a term.
## [termdict/](src/termdict): Here is a term, give me the [TermInfo](src/postings/term_info.rs)
Tantivy's term dictionary is mainly in charge of supplying the function
[Term](src/schema/term.rs) ⟶ [TermInfo](src/postings/term_info.rs)
It is itself broken into two parts.
- [Term](src/schema/term.rs) ⟶ [TermOrdinal](src/termdict/mod.rs) is addressed by a finite state transducer, implemented by the fst crate.
- [TermOrdinal](src/termdict/mod.rs) ⟶ [TermInfo](src/postings/term_info.rs) is addressed by the term info store.
## [postings/](src/postings): Iterate over documents... very fast
A posting list makes it possible to store a sorted list of doc ids and for each doc store
a term frequency as well.
The posting lists are stored in a separate file. The [TermInfo](src/postings/term_info.rs) contains an offset into that file and a number of documents for the given posting list. Both are required and sufficient to read the posting list.
The posting list is organized in block of 128 documents.
One block of doc ids is followed by one block of term frequencies.
The doc ids are delta encoded and bitpacked.
The term frequencies are bitpacked.
Because the number of docs is rarely a multiple of 128, the last block may contain an arbitrary number of docs between 1 and 127 documents. We then use variable int encoding instead of bitpacking.
## [positions/](src/positions): Where are my terms within the documents?
Phrase queries make it possible to search for documents containing a specific sequence of terms.
For instance, when the phrase query "the art of war" does not match "the war of art".
To make it possible, it is possible to specify in the schema that a field should store positions in addition to being indexed.
The token positions of all of the terms are then stored in a separate file with the extension `.pos`.
The [TermInfo](src/postings/term_info.rs) gives an offset (expressed in position this time) in this file. As we iterate through the docset,
we advance the position reader by the number of term frequencies of the current document.
## [fieldnorm/](src/fieldnorm): Here is my doc, how many tokens in this field?
The [BM25](https://en.wikipedia.org/wiki/Okapi_BM25) formula also requires to know the number of tokens stored in a specific field for a given document. We store this information on one byte per document in the fieldnorm.
The fieldnorm is therefore compressed. Values up to 40 are encoded unchanged.
## [tokenizer/](src/tokenizer): How should we process text?
Text processing is key to a good search experience.
Splits or normalize your text too much, and the search results will have a less precision and a higher recall.
Do not normalize, or under split your text, you will end up with a higher precision and a lesser recall.
Text processing can be configured by selecting an off-the-shelf [`Tokenizer`](./src/tokenizer/tokenizer.rs) or implementing your own to first split the text into tokens, and then chain different [`TokenFilter`](src/tokenizer/tokenizer.rs)'s to it.
Tantivy's comes with few tokenizers, but external crates are offering advanced tokenizers, such as [Lindera](https://crates.io/crates/lindera) for Japanese.
## [query/](src/query): Define and compose queries
The [Query](src/query/query.rs) trait defines what a query is.
Due to the necessity for some queries to compute some statistics over the entire index, and because the
index is composed of several `SegmentReader`, the path from transforming a `Query` to an iterator over documents is slightly convoluted, but fundamentally, this is what a Query is.
The iterator over a document comes with some scoring function. The resulting trait is called a
[Scorer](src/query/scorer.rs) and is specific to a segment.
Different queries can be combined using the [BooleanQuery](src/query/boolean_query/).
Tantivy comes with different types of queries and can be extended by implementing
the `Query`, `Weight`, and `Scorer` traits.
## [collector](src/collector): Define what to do with matched documents
Collectors define how to aggregate the documents matching a query, in the broadest sense possible.
The search will push matched documents one by one, calling their
`fn collect(doc: DocId, score: Score);` method.
Users may implement their own collectors by implementing the [Collector](src/collector/mod.rs) trait.
## [query-grammar](query-grammar): Defines the grammar of the query parser
While the [QueryParser](src/query/query_parser/query_parser.rs) struct is located in the `query/` directory, the actual parser combinator used to convert user queries into an AST is in an external crate called `query-grammar`. This part was externalized to lighten the work of the compiler.
================================================
FILE: AUTHORS
================================================
# This is the list of authors of tantivy for copyright purposes.
Paul Masurel
Laurentiu Nicola
Dru Sellers
Ashley Mannix
Michael J. Curry
Jason Wolfe
# As an employee of Google I am required to add Google LLC
# in the list of authors, but this project is not affiliated to Google
# in any other way.
Google LLC
================================================
FILE: CHANGELOG.md
================================================
Tantivy 0.25
================================
## Bugfixes
- fix union performance regression in tantivy 0.24 [#2663](https://github.com/quickwit-oss/tantivy/pull/2663)(@PSeitz)
- make zstd optional in sstable [#2633](https://github.com/quickwit-oss/tantivy/pull/2633)(@Parth)
- Fix TopDocs::order_by_string_fast_field for asc order [#2672](https://github.com/quickwit-oss/tantivy/pull/2672)(@stuhood @PSeitz)
## Features/Improvements
- add docs/example and Vec<u32> values to sstable [#2660](https://github.com/quickwit-oss/tantivy/pull/2660)(@PSeitz)
- Add string fast field support to `TopDocs`. [#2642](https://github.com/quickwit-oss/tantivy/pull/2642)(@stuhood)
- update edition to 2024 [#2620](https://github.com/quickwit-oss/tantivy/pull/2620)(@PSeitz)
- Allow optional spaces between the field name and the value in the query parser [#2678](https://github.com/quickwit-oss/tantivy/pull/2678)(@Darkheir)
- Support mixed field types in query parser [#2676](https://github.com/quickwit-oss/tantivy/pull/2676)(@trinity-1686a)
- Add per-field size details [#2679](https://github.com/quickwit-oss/tantivy/pull/2679)(@fulmicoton)
Tantivy 0.24.2
================================
- Fix TopNComputer for reverse order. [#2672](https://github.com/quickwit-oss/tantivy/pull/2672)(@stuhood @PSeitz)
Affected queries are [order_by_fast_field](https://docs.rs/tantivy/latest/tantivy/collector/struct.TopDocs.html#method.order_by_fast_field) and
[order_by_u64_field](https://docs.rs/tantivy/latest/tantivy/collector/struct.TopDocs.html#method.order_by_u64_field)
for `Order::Asc`
Tantivy 0.24.1
================================
- Fix: bump required rust version to 1.81
Tantivy 0.24
================================
Tantivy 0.24 will be backwards compatible with indices created with v0.22 and v0.21. The new minimum rust version will be 1.75. Tantivy 0.23 will be skipped.
#### Bugfixes
- fix potential endless loop in merge [#2457](https://github.com/quickwit-oss/tantivy/pull/2457)(@PSeitz)
- fix bug that causes out-of-order sstable key. [#2445](https://github.com/quickwit-oss/tantivy/pull/2445)(@fulmicoton)
- fix ReferenceValue API flaw [#2372](https://github.com/quickwit-oss/tantivy/pull/2372)(@PSeitz)
- fix `OwnedBytes` debug panic [#2512](https://github.com/quickwit-oss/tantivy/pull/2512)(@b41sh)
- catch panics during merges [#2582](https://github.com/quickwit-oss/tantivy/pull/2582)(@rdettai)
- switch from u32 to usize in bitpacker. This enables multivalued columns larger than 4GB, which crashed during merge before. [#2581](https://github.com/quickwit-oss/tantivy/pull/2581) [#2586](https://github.com/quickwit-oss/tantivy/pull/2586)(@fulmicoton-dd @PSeitz)
#### Breaking API Changes
- remove index sorting [#2434](https://github.com/quickwit-oss/tantivy/pull/2434)(@PSeitz)
#### Features/Improvements
- **Aggregation**
- Support for cardinality aggregation [#2337](https://github.com/quickwit-oss/tantivy/pull/2337) [#2446](https://github.com/quickwit-oss/tantivy/pull/2446) (@raphaelcoeffic @PSeitz)
- Support for extended stats aggregation [#2247](https://github.com/quickwit-oss/tantivy/pull/2247)(@giovannicuccu)
- Add Key::I64 and Key::U64 variants in aggregation to avoid f64 precision issues [#2468](https://github.com/quickwit-oss/tantivy/pull/2468)(@PSeitz)
- Faster term aggregation fetch terms [#2447](https://github.com/quickwit-oss/tantivy/pull/2447)(@PSeitz)
- Improve custom order deserialization [#2451](https://github.com/quickwit-oss/tantivy/pull/2451)(@PSeitz)
- Change AggregationLimits behavior [#2495](https://github.com/quickwit-oss/tantivy/pull/2495)(@PSeitz)
- lower contention on AggregationLimits [#2394](https://github.com/quickwit-oss/tantivy/pull/2394)(@PSeitz)
- fix postcard compatibility for top_hits, add postcard test [#2346](https://github.com/quickwit-oss/tantivy/pull/2346)(@PSeitz)
- reduce top hits memory consumption [#2426](https://github.com/quickwit-oss/tantivy/pull/2426)(@PSeitz)
- check unsupported parameters top_hits [#2351](https://github.com/quickwit-oss/tantivy/pull/2351)(@PSeitz)
- Change AggregationLimits to AggregationLimitsGuard [#2495](https://github.com/quickwit-oss/tantivy/pull/2495)(@PSeitz)
- add support for counting non integer in aggregation [#2547](https://github.com/quickwit-oss/tantivy/pull/2547)(@trinity-1686a)
- **Range Queries**
- Support fast field range queries on json fields [#2456](https://github.com/quickwit-oss/tantivy/pull/2456)(@PSeitz)
- Add support for str fast field range query [#2460](https://github.com/quickwit-oss/tantivy/pull/2460) [#2452](https://github.com/quickwit-oss/tantivy/pull/2452) [#2453](https://github.com/quickwit-oss/tantivy/pull/2453)(@PSeitz)
- modify fastfield range query heuristic [#2375](https://github.com/quickwit-oss/tantivy/pull/2375)(@trinity-1686a)
- add FastFieldRangeQuery for explicit range queries on fast field (for `RangeQuery` it is autodetected) [#2477](https://github.com/quickwit-oss/tantivy/pull/2477)(@PSeitz)
- add format backwards-compatibility tests [#2485](https://github.com/quickwit-oss/tantivy/pull/2485)(@PSeitz)
- add columnar format compatibility tests [#2433](https://github.com/quickwit-oss/tantivy/pull/2433)(@PSeitz)
- Improved snippet ranges algorithm [#2474](https://github.com/quickwit-oss/tantivy/pull/2474)(@gezihuzi)
- make find_field_with_default return json fields without path [#2476](https://github.com/quickwit-oss/tantivy/pull/2476)(@trinity-1686a)
- Make `BooleanQuery` support `minimum_number_should_match` [#2405](https://github.com/quickwit-oss/tantivy/pull/2405)(@LebranceBW)
- Make `NUM_MERGE_THREADS` configurable [#2535](https://github.com/quickwit-oss/tantivy/pull/2535)(@Barre)
- **RegexPhraseQuery**
`RegexPhraseQuery` supports phrase queries with regex. E.g. query "b.* b.* wolf" matches "big bad wolf". Slop is supported as well: "b.* wolf"~2 matches "big bad wolf" [#2516](https://github.com/quickwit-oss/tantivy/pull/2516)(@PSeitz)
- **Optional Index in Multivalue Columnar Index**
For mostly empty multivalued indices there was a large overhead during creation when iterating all docids (merge case).
This is alleviated by placing an optional index in the multivalued index to mark documents that have values.
This will slightly increase space and access time. [#2439](https://github.com/quickwit-oss/tantivy/pull/2439)(@PSeitz)
- **Store DateTime as nanoseconds in doc store** DateTime in the doc store was truncated to microseconds previously. This removes this truncation, while still keeping backwards compatibility. [#2486](https://github.com/quickwit-oss/tantivy/pull/2486)(@PSeitz)
- **Performance/Memory**
- lift clauses in LogicalAst for optimized ast during execution [#2449](https://github.com/quickwit-oss/tantivy/pull/2449)(@PSeitz)
- Use Vec instead of BTreeMap to back OwnedValue object [#2364](https://github.com/quickwit-oss/tantivy/pull/2364)(@fulmicoton)
- Replace TantivyDocument with CompactDoc. CompactDoc is much smaller and provides similar performance. [#2402](https://github.com/quickwit-oss/tantivy/pull/2402)(@PSeitz)
- Recycling buffer in PrefixPhraseScorer [#2443](https://github.com/quickwit-oss/tantivy/pull/2443)(@fulmicoton)
- **Json Type**
- JSON supports now all values on the root level. Previously an object was required. This enables support for flat mixed types. allow more JSON values, fix i64 special case [#2383](https://github.com/quickwit-oss/tantivy/pull/2383)(@PSeitz)
- add json path constructor to term [#2367](https://github.com/quickwit-oss/tantivy/pull/2367)(@PSeitz)
- **QueryParser**
- fix de-escaping too much in query parser [#2427](https://github.com/quickwit-oss/tantivy/pull/2427)(@trinity-1686a)
- improve query parser [#2416](https://github.com/quickwit-oss/tantivy/pull/2416)(@trinity-1686a)
- Support field grouping `title:(return AND "pink panther")` [#2333](https://github.com/quickwit-oss/tantivy/pull/2333)(@trinity-1686a)
- allow term starting with wildcard [#2568](https://github.com/quickwit-oss/tantivy/pull/2568)(@trinity-1686a)
- Exist queries match subpath fields [#2558](https://github.com/quickwit-oss/tantivy/pull/2558)(@rdettai)
- add access benchmark for columnar [#2432](https://github.com/quickwit-oss/tantivy/pull/2432)(@PSeitz)
- extend indexwriter proptests [#2342](https://github.com/quickwit-oss/tantivy/pull/2342)(@PSeitz)
- add bench & test for columnar merging [#2428](https://github.com/quickwit-oss/tantivy/pull/2428)(@PSeitz)
- Change in Executor API [#2391](https://github.com/quickwit-oss/tantivy/pull/2391)(@fulmicoton)
- Removed usage of num_cpus [#2387](https://github.com/quickwit-oss/tantivy/pull/2387)(@fulmicoton)
- use bingang for agg and stacker benchmark [#2378](https://github.com/quickwit-oss/tantivy/pull/2378)[#2492](https://github.com/quickwit-oss/tantivy/pull/2492)(@PSeitz)
- cleanup top level exports [#2382](https://github.com/quickwit-oss/tantivy/pull/2382)(@PSeitz)
- make convert_to_fast_value_and_append_to_json_term pub [#2370](https://github.com/quickwit-oss/tantivy/pull/2370)(@PSeitz)
- remove JsonTermWriter [#2238](https://github.com/quickwit-oss/tantivy/pull/2238)(@PSeitz)
- validate sort by field type [#2336](https://github.com/quickwit-oss/tantivy/pull/2336)(@PSeitz)
- Fix trait bound of StoreReader::iter [#2360](https://github.com/quickwit-oss/tantivy/pull/2360)(@adamreichold)
- remove read_postings_no_deletes [#2526](https://github.com/quickwit-oss/tantivy/pull/2526)(@PSeitz)
Tantivy 0.22.1
================================
- Fix TopNComputer for reverse order. [#2672](https://github.com/quickwit-oss/tantivy/pull/2672)(@stuhood @PSeitz)
Affected queries are [order_by_fast_field](https://docs.rs/tantivy/latest/tantivy/collector/struct.TopDocs.html#method.order_by_fast_field) and
[order_by_u64_field](https://docs.rs/tantivy/latest/tantivy/collector/struct.TopDocs.html#method.order_by_u64_field)
for `Order::Asc`
Tantivy 0.22
================================
Tantivy 0.22 will be able to read indices created with Tantivy 0.21.
#### Bugfixes
- Fix null byte handling in JSON paths (null bytes in json keys caused panic during indexing) [#2345](https://github.com/quickwit-oss/tantivy/pull/2345)(@PSeitz)
- Fix bug that can cause `get_docids_for_value_range` to panic. [#2295](https://github.com/quickwit-oss/tantivy/pull/2295)(@fulmicoton)
- Avoid 1 document indices by increase min memory to 15MB for indexing [#2176](https://github.com/quickwit-oss/tantivy/pull/2176)(@PSeitz)
- Fix merge panic for JSON fields [#2284](https://github.com/quickwit-oss/tantivy/pull/2284)(@PSeitz)
- Fix bug occurring when merging JSON object indexed with positions. [#2253](https://github.com/quickwit-oss/tantivy/pull/2253)(@fulmicoton)
- Fix empty DateHistogram gap bug [#2183](https://github.com/quickwit-oss/tantivy/pull/2183)(@PSeitz)
- Fix range query end check (fields with less than 1 value per doc are affected) [#2226](https://github.com/quickwit-oss/tantivy/pull/2226)(@PSeitz)
- Handle exclusive out of bounds ranges on fastfield range queries [#2174](https://github.com/quickwit-oss/tantivy/pull/2174)(@PSeitz)
#### Breaking API Changes
- rename ReloadPolicy onCommit to onCommitWithDelay [#2235](https://github.com/quickwit-oss/tantivy/pull/2235)(@giovannicuccu)
- Move exports from the root into modules [#2220](https://github.com/quickwit-oss/tantivy/pull/2220)(@PSeitz)
- Accept field name instead of `Field` in FilterCollector [#2196](https://github.com/quickwit-oss/tantivy/pull/2196)(@PSeitz)
- remove deprecated IntOptions and DateTime [#2353](https://github.com/quickwit-oss/tantivy/pull/2353)(@PSeitz)
#### Features/Improvements
- Tantivy documents as a trait: Index data directly without converting to tantivy types first [#2071](https://github.com/quickwit-oss/tantivy/pull/2071)(@ChillFish8)
- encode some part of posting list as -1 instead of direct values (smaller inverted indices) [#2185](https://github.com/quickwit-oss/tantivy/pull/2185)(@trinity-1686a)
- **Aggregation**
- Support to deserialize f64 from string [#2311](https://github.com/quickwit-oss/tantivy/pull/2311)(@PSeitz)
- Add a top_hits aggregator [#2198](https://github.com/quickwit-oss/tantivy/pull/2198)(@ditsuke)
- Support bool type in term aggregation [#2318](https://github.com/quickwit-oss/tantivy/pull/2318)(@PSeitz)
- Support ip addresses in term aggregation [#2319](https://github.com/quickwit-oss/tantivy/pull/2319)(@PSeitz)
- Support date type in term aggregation [#2172](https://github.com/quickwit-oss/tantivy/pull/2172)(@PSeitz)
- Support escaped dot when addressing field [#2250](https://github.com/quickwit-oss/tantivy/pull/2250)(@PSeitz)
- Add ExistsQuery to check documents that have a value [#2160](https://github.com/quickwit-oss/tantivy/pull/2160)(@imotov)
- Expose TopDocs::order_by_u64_field again [#2282](https://github.com/quickwit-oss/tantivy/pull/2282)(@ditsuke)
- **Memory/Performance**
- Faster TopN: replace BinaryHeap with TopNComputer [#2186](https://github.com/quickwit-oss/tantivy/pull/2186)(@PSeitz)
- reduce number of allocations during indexing [#2257](https://github.com/quickwit-oss/tantivy/pull/2257)(@PSeitz)
- Less Memory while indexing: docid deltas while indexing [#2249](https://github.com/quickwit-oss/tantivy/pull/2249)(@PSeitz)
- Faster indexing: use term hashmap in fastfield [#2243](https://github.com/quickwit-oss/tantivy/pull/2243)(@PSeitz)
- term hashmap remove copy in is_empty, unused unordered_id [#2229](https://github.com/quickwit-oss/tantivy/pull/2229)(@PSeitz)
- add method to fetch block of first values in columnar [#2330](https://github.com/quickwit-oss/tantivy/pull/2330)(@PSeitz)
- Faster aggregations: add fast path for full columns in fetch_block [#2328](https://github.com/quickwit-oss/tantivy/pull/2328)(@PSeitz)
- Faster sstable loading: use fst for sstable index [#2268](https://github.com/quickwit-oss/tantivy/pull/2268)(@trinity-1686a)
- **QueryParser**
- allow newline where we allow space in query parser [#2302](https://github.com/quickwit-oss/tantivy/pull/2302)(@trinity-1686a)
- allow some mixing of occur and bool in strict query parser [#2323](https://github.com/quickwit-oss/tantivy/pull/2323)(@trinity-1686a)
- handle * inside term in lenient query parser [#2228](https://github.com/quickwit-oss/tantivy/pull/2228)(@trinity-1686a)
- add support for exists query syntax in query parser [#2170](https://github.com/quickwit-oss/tantivy/pull/2170)(@trinity-1686a)
- Add shared search executor [#2312](https://github.com/quickwit-oss/tantivy/pull/2312)(@MochiXu)
- Truncate keys to u16::MAX in term hashmap [#2299](https://github.com/quickwit-oss/tantivy/pull/2299)(@PSeitz)
- report if a term matched when warming up posting list [#2309](https://github.com/quickwit-oss/tantivy/pull/2309)(@trinity-1686a)
- Support json fields in FuzzyTermQuery [#2173](https://github.com/quickwit-oss/tantivy/pull/2173)(@PingXia-at)
- Read list of fields encoded in term dictionary for JSON fields [#2184](https://github.com/quickwit-oss/tantivy/pull/2184)(@PSeitz)
- add collect_block to BoxableSegmentCollector [#2331](https://github.com/quickwit-oss/tantivy/pull/2331)(@PSeitz)
- expose collect_block buffer size [#2326](https://github.com/quickwit-oss/tantivy/pull/2326)(@PSeitz)
- Forward regex parser errors [#2288](https://github.com/quickwit-oss/tantivy/pull/2288)(@adamreichold)
- Make FacetCounts defaultable and cloneable. [#2322](https://github.com/quickwit-oss/tantivy/pull/2322)(@adamreichold)
- Derive Debug for SchemaBuilder [#2254](https://github.com/quickwit-oss/tantivy/pull/2254)(@GodTamIt)
- add missing inlines to tantivy options [#2245](https://github.com/quickwit-oss/tantivy/pull/2245)(@PSeitz)
Tantivy 0.21.1
================================
#### Bugfixes
- Range queries on fast fields with less values on that field than documents had an invalid end condition, leading to missing results. [#2226](https://github.com/quickwit-oss/tantivy/issues/2226)(@appaquet @PSeitz)
- Increase the minimum memory budget from 3MB to 15MB to avoid single doc segments (API fix). [#2176](https://github.com/quickwit-oss/tantivy/issues/2176)(@PSeitz)
Tantivy 0.21
================================
#### Bugfixes
- Fix track fast field memory consumption, which led to higher memory consumption than the budget allowed during indexing [#2148](https://github.com/quickwit-oss/tantivy/issues/2148)[#2147](https://github.com/quickwit-oss/tantivy/issues/2147)(@PSeitz)
- Fix a regression from 0.20 where sort index by date wasn't working anymore [#2124](https://github.com/quickwit-oss/tantivy/issues/2124)(@PSeitz)
- Fix getting the root facet on the `FacetCollector`. [#2086](https://github.com/quickwit-oss/tantivy/issues/2086)(@adamreichold)
- Align numerical type priority order of columnar and query. [#2088](https://github.com/quickwit-oss/tantivy/issues/2088)(@fmassot)
#### Breaking Changes
- Remove support for Brotli and Snappy compression [#2123](https://github.com/quickwit-oss/tantivy/issues/2123)(@adamreichold)
#### Features/Improvements
- Implement lenient query parser [#2129](https://github.com/quickwit-oss/tantivy/pull/2129)(@trinity-1686a)
- order_by_u64_field and order_by_fast_field allow sorting in ascending and descending order [#2111](https://github.com/quickwit-oss/tantivy/issues/2111)(@naveenann)
- Allow dynamic filters in text analyzer builder [#2110](https://github.com/quickwit-oss/tantivy/issues/2110)(@fulmicoton @fmassot)
- **Aggregation**
- Add missing parameter for term aggregation [#2149](https://github.com/quickwit-oss/tantivy/issues/2149)[#2103](https://github.com/quickwit-oss/tantivy/issues/2103)(@PSeitz)
- Add missing parameter for percentiles [#2157](https://github.com/quickwit-oss/tantivy/issues/2157)(@PSeitz)
- Add missing parameter for stats,min,max,count,sum,avg [#2151](https://github.com/quickwit-oss/tantivy/issues/2151)(@PSeitz)
- Improve aggregation deserialization error message [#2150](https://github.com/quickwit-oss/tantivy/issues/2150)(@PSeitz)
- Add validation for type Bytes to term_agg [#2077](https://github.com/quickwit-oss/tantivy/issues/2077)(@PSeitz)
- Alternative mixed field collection [#2135](https://github.com/quickwit-oss/tantivy/issues/2135)(@PSeitz)
- Add missing query_terms impl for TermSetQuery. [#2120](https://github.com/quickwit-oss/tantivy/issues/2120)(@adamreichold)
- Minor improvements to OwnedBytes [#2134](https://github.com/quickwit-oss/tantivy/issues/2134)(@adamreichold)
- Remove allocations in split compound words [#2080](https://github.com/quickwit-oss/tantivy/issues/2080)(@PSeitz)
- Ngram tokenizer now returns an error with invalid arguments [#2102](https://github.com/quickwit-oss/tantivy/issues/2102)(@fmassot)
- Make TextAnalyzerBuilder public [#2097](https://github.com/quickwit-oss/tantivy/issues/2097)(@adamreichold)
- Return an error when tokenizer is not found while indexing [#2093](https://github.com/quickwit-oss/tantivy/issues/2093)(@naveenann)
- Delayed column opening during merge [#2132](https://github.com/quickwit-oss/tantivy/issues/2132)(@PSeitz)
Tantivy 0.20.2
================================
- Align numerical type priority order on the search side. [#2088](https://github.com/quickwit-oss/tantivy/issues/2088) (@fmassot)
- Fix is_child_of function not considering the root facet. [#2086](https://github.com/quickwit-oss/tantivy/issues/2086) (@adamreichhold)
Tantivy 0.20.1
================================
- Fix building on windows with mmap [#2070](https://github.com/quickwit-oss/tantivy/issues/2070) (@ChillFish8)
Tantivy 0.20
================================
#### Bugfixes
- Fix phrase queries with slop (slop supports now transpositions, algorithm that carries slop so far for num terms > 2) [#2031](https://github.com/quickwit-oss/tantivy/issues/2031)[#2020](https://github.com/quickwit-oss/tantivy/issues/2020)(@PSeitz)
- Handle error for exists on MMapDirectory [#1988](https://github.com/quickwit-oss/tantivy/issues/1988) (@PSeitz)
- Aggregation
- Fix min doc_count empty merge bug [#2057](https://github.com/quickwit-oss/tantivy/issues/2057) (@PSeitz)
- Fix: Sort order for term aggregations (sort order on key was inverted) [#1858](https://github.com/quickwit-oss/tantivy/issues/1858) (@PSeitz)
#### Features/Improvements
- Add PhrasePrefixQuery [#1842](https://github.com/quickwit-oss/tantivy/issues/1842) (@trinity-1686a)
- Add `coerce` option for text and numbers types (convert the value instead of returning an error during indexing) [#1904](https://github.com/quickwit-oss/tantivy/issues/1904) (@PSeitz)
- Add regex tokenizer [#1759](https://github.com/quickwit-oss/tantivy/issues/1759)(@mkleen)
- Move tokenizer API to separate crate. Having a separate crate with a stable API will allow us to use tokenizers with different tantivy versions. [#1767](https://github.com/quickwit-oss/tantivy/issues/1767) (@PSeitz)
- **Columnar crate**: New fast field handling (@fulmicoton @PSeitz) [#1806](https://github.com/quickwit-oss/tantivy/issues/1806)[#1809](https://github.com/quickwit-oss/tantivy/issues/1809)
- Support for fast fields with optional values. Previously tantivy supported only single-valued and multi-value fast fields. The encoding of optional fast fields is now very compact.
- Fast field Support for JSON (schemaless fast fields). Support multiple types on the same column. [#1876](https://github.com/quickwit-oss/tantivy/issues/1876) (@fulmicoton)
- Unified access for fast fields over different cardinalities.
- Unified storage for typed and untyped fields.
- Move fastfield codecs into columnar. [#1782](https://github.com/quickwit-oss/tantivy/issues/1782) (@fulmicoton)
- Sparse dense index for optional values [#1716](https://github.com/quickwit-oss/tantivy/issues/1716) (@PSeitz)
- Switch to nanosecond precision in DateTime fastfield [#2016](https://github.com/quickwit-oss/tantivy/issues/2016) (@PSeitz)
- **Aggregation**
- Add `date_histogram` aggregation (only `fixed_interval` for now) [#1900](https://github.com/quickwit-oss/tantivy/issues/1900) (@PSeitz)
- Add `percentiles` aggregations [#1984](https://github.com/quickwit-oss/tantivy/issues/1984) (@PSeitz)
- [**breaking**] Drop JSON support on intermediate agg result (we use postcard as format in `quickwit` to send intermediate results) [#1992](https://github.com/quickwit-oss/tantivy/issues/1992) (@PSeitz)
- Set memory limit in bytes for aggregations after which they abort (Previously there was only the bucket limit) [#1942](https://github.com/quickwit-oss/tantivy/issues/1942)[#1957](https://github.com/quickwit-oss/tantivy/issues/1957)(@PSeitz)
- Add support for u64,i64,f64 fields in term aggregation [#1883](https://github.com/quickwit-oss/tantivy/issues/1883) (@PSeitz)
- Allow histogram bounds to be passed as Rfc3339 [#2076](https://github.com/quickwit-oss/tantivy/issues/2076) (@PSeitz)
- Add count, min, max, and sum aggregations [#1794](https://github.com/quickwit-oss/tantivy/issues/1794) (@guilload)
- Switch to Aggregation without serde_untagged => better deserialization errors. [#2003](https://github.com/quickwit-oss/tantivy/issues/2003) (@PSeitz)
- Switch to ms in histogram for date type (ES compatibility) [#2045](https://github.com/quickwit-oss/tantivy/issues/2045) (@PSeitz)
- Reduce term aggregation memory consumption [#2013](https://github.com/quickwit-oss/tantivy/issues/2013) (@PSeitz)
- Reduce agg memory consumption: Replace generic aggregation collector (which has a high memory requirement per instance) in aggregation tree with optimized versions behind a trait.
- Split term collection count and sub_agg (Faster term agg with less memory consumption for cases without sub-aggs) [#1921](https://github.com/quickwit-oss/tantivy/issues/1921) (@PSeitz)
- Schemaless aggregations: In combination with stacker tantivy supports now schemaless aggregations via the JSON type.
- Add aggregation support for JSON type [#1888](https://github.com/quickwit-oss/tantivy/issues/1888) (@PSeitz)
- Mixed types support on JSON fields in aggs [#1971](https://github.com/quickwit-oss/tantivy/issues/1971) (@PSeitz)
- Perf: Fetch blocks of vals in aggregation for all cardinality [#1950](https://github.com/quickwit-oss/tantivy/issues/1950) (@PSeitz)
- Allow histogram bounds to be passed as Rfc3339 [#2076](https://github.com/quickwit-oss/tantivy/issues/2076) (@PSeitz)
- `Searcher` with disabled scoring via `EnableScoring::Disabled` [#1780](https://github.com/quickwit-oss/tantivy/issues/1780) (@shikhar)
- Enable tokenizer on json fields [#2053](https://github.com/quickwit-oss/tantivy/issues/2053) (@PSeitz)
- Enforcing "NOT" and "-" queries consistency in UserInputAst [#1609](https://github.com/quickwit-oss/tantivy/issues/1609) (@bazhenov)
- Faster indexing
- Refactor tokenization pipeline to use GATs [#1924](https://github.com/quickwit-oss/tantivy/issues/1924) (@trinity-1686a)
- Faster term hash map [#2058](https://github.com/quickwit-oss/tantivy/issues/2058)[#1940](https://github.com/quickwit-oss/tantivy/issues/1940) (@PSeitz)
- tokenizer-api: reduce Tokenizer allocation overhead [#2062](https://github.com/quickwit-oss/tantivy/issues/2062) (@PSeitz)
- Refactor vint [#2010](https://github.com/quickwit-oss/tantivy/issues/2010) (@PSeitz)
- Faster search
- Work in batches of docs on the SegmentCollector (Only for cases without score for now) [#1937](https://github.com/quickwit-oss/tantivy/issues/1937) (@PSeitz)
- Faster fast field range queries using SIMD [#1954](https://github.com/quickwit-oss/tantivy/issues/1954) (@fulmicoton)
- Improve fast field range query performance [#1864](https://github.com/quickwit-oss/tantivy/issues/1864) (@PSeitz)
- Make BM25 scoring more flexible [#1855](https://github.com/quickwit-oss/tantivy/issues/1855) (@alexcole)
- Switch fs2 to fs4 as it is now unmaintained and does not support illumos [#1944](https://github.com/quickwit-oss/tantivy/issues/1944) (@Toasterson)
- Made BooleanWeight and BoostWeight public [#1991](https://github.com/quickwit-oss/tantivy/issues/1991) (@fulmicoton)
- Make index compatible with virtual drives on Windows [#1843](https://github.com/quickwit-oss/tantivy/issues/1843) (@gyk)
- Add stop words for Hungarian language [#2069](https://github.com/quickwit-oss/tantivy/issues/2069) (@tnxbutno)
- Auto downgrade index record option, instead of vint error [#1857](https://github.com/quickwit-oss/tantivy/issues/1857) (@PSeitz)
- Enable range query on fast field for u64 compatible types [#1762](https://github.com/quickwit-oss/tantivy/issues/1762) (@PSeitz) [#1876]
- sstable
- Isolating sstable and stacker in independent crates. [#1718](https://github.com/quickwit-oss/tantivy/issues/1718) (@fulmicoton)
- New sstable format [#1943](https://github.com/quickwit-oss/tantivy/issues/1943)[#1953](https://github.com/quickwit-oss/tantivy/issues/1953) (@trinity-1686a)
- Use DeltaReader directly to implement Dictionary::ord_to_term [#1928](https://github.com/quickwit-oss/tantivy/issues/1928) (@trinity-1686a)
- Use DeltaReader directly to implement Dictionary::term_ord [#1925](https://github.com/quickwit-oss/tantivy/issues/1925) (@trinity-1686a)
- Add separate tokenizer manager for fast fields [#2019](https://github.com/quickwit-oss/tantivy/issues/2019) (@PSeitz)
- Make construction of LevenshteinAutomatonBuilder for FuzzyTermQuery instances lazy. [#1756](https://github.com/quickwit-oss/tantivy/issues/1756) (@adamreichold)
- Added support for madvise when opening an mmapped Index [#2036](https://github.com/quickwit-oss/tantivy/issues/2036) (@fulmicoton)
- Rename `DatePrecision` to `DateTimePrecision` [#2051](https://github.com/quickwit-oss/tantivy/issues/2051) (@guilload)
- Query Parser
- Quotation mark can now be used for phrase queries. [#2050](https://github.com/quickwit-oss/tantivy/issues/2050) (@fulmicoton)
- PhrasePrefixQuery is supported in the query parser via: `field:"phrase ter"*` [#2044](https://github.com/quickwit-oss/tantivy/issues/2044) (@adamreichold)
- Docs
- Update examples for literate docs [#1880](https://github.com/quickwit-oss/tantivy/issues/1880) (@PSeitz)
- Add ip field example [#1775](https://github.com/quickwit-oss/tantivy/issues/1775) (@PSeitz)
- Fix doc store cache documentation [#1821](https://github.com/quickwit-oss/tantivy/issues/1821) (@PSeitz)
- Fix BooleanQuery document [#1999](https://github.com/quickwit-oss/tantivy/issues/1999) (@RT_Enzyme)
- Update comments in the faceted search example [#1737](https://github.com/quickwit-oss/tantivy/issues/1737) (@DawChihLiou)
Tantivy 0.19
================================
#### Bugfixes
- Fix missing fieldnorms for u64, i64, f64, bool, bytes and date [#1620](https://github.com/quickwit-oss/tantivy/pull/1620) (@PSeitz)
- Fix interpolation overflow in linear interpolation fastfield codec [#1480](https://github.com/quickwit-oss/tantivy/pull/1480) (@PSeitz @fulmicoton)
#### Features/Improvements
- Add support for `IN` in queryparser , e.g. `field: IN [val1 val2 val3]` [#1683](https://github.com/quickwit-oss/tantivy/pull/1683) (@trinity-1686a)
- Skip score calculation, when no scoring is required [#1646](https://github.com/quickwit-oss/tantivy/pull/1646) (@PSeitz)
- Limit fast fields to u32 (`get_val(u32)`) [#1644](https://github.com/quickwit-oss/tantivy/pull/1644) (@PSeitz)
- The `DateTime` type has been updated to hold timestamps with microseconds precision.
`DateOptions` and `DatePrecision` have been added to configure Date fields. The precision is used to hint on fast values compression. Otherwise, seconds precision is used everywhere else (i.e terms, indexing) [#1396](https://github.com/quickwit-oss/tantivy/pull/1396) (@evanxg852000)
- Add IP address field type [#1553](https://github.com/quickwit-oss/tantivy/pull/1553) (@PSeitz)
- Add boolean field type [#1382](https://github.com/quickwit-oss/tantivy/pull/1382) (@boraarslan)
- Remove Searcher pool and make `Searcher` cloneable. (@PSeitz)
- Validate settings on create [#1570](https://github.com/quickwit-oss/tantivy/pull/1570) (@PSeitz)
- Detect and apply gcd on fastfield codecs [#1418](https://github.com/quickwit-oss/tantivy/pull/1418) (@PSeitz)
- Doc store
- use separate thread to compress block store [#1389](https://github.com/quickwit-oss/tantivy/pull/1389) [#1510](https://github.com/quickwit-oss/tantivy/pull/1510) (@PSeitz @fulmicoton)
- Expose doc store cache size [#1403](https://github.com/quickwit-oss/tantivy/pull/1403) (@PSeitz)
- Enable compression levels for doc store [#1378](https://github.com/quickwit-oss/tantivy/pull/1378) (@PSeitz)
- Make block size configurable [#1374](https://github.com/quickwit-oss/tantivy/pull/1374) (@kryesh)
- Make `tantivy::TantivyError` cloneable [#1402](https://github.com/quickwit-oss/tantivy/pull/1402) (@PSeitz)
- Add support for phrase slop in query language [#1393](https://github.com/quickwit-oss/tantivy/pull/1393) (@saroh)
- Aggregation
- Add aggregation support for date type [#1693](https://github.com/quickwit-oss/tantivy/pull/1693)(@PSeitz)
- Add support for keyed parameter in range and histogram aggregations [#1424](https://github.com/quickwit-oss/tantivy/pull/1424) (@k-yomo)
- Add aggregation bucket limit [#1363](https://github.com/quickwit-oss/tantivy/pull/1363) (@PSeitz)
- Faster indexing
- [#1610](https://github.com/quickwit-oss/tantivy/pull/1610) (@PSeitz)
- [#1594](https://github.com/quickwit-oss/tantivy/pull/1594) (@PSeitz)
- [#1582](https://github.com/quickwit-oss/tantivy/pull/1582) (@PSeitz)
- [#1611](https://github.com/quickwit-oss/tantivy/pull/1611) (@PSeitz)
- Added a pre-configured stop word filter for various language [#1666](https://github.com/quickwit-oss/tantivy/pull/1666) (@adamreichold)
Tantivy 0.18
================================
- For date values `chrono` has been replaced with `time` (@uklotzde) #1304 :
- The `time` crate is re-exported as `tantivy::time` instead of `tantivy::chrono`.
- The type alias `tantivy::DateTime` has been removed.
- `Value::Date` wraps `time::PrimitiveDateTime` without time zone information.
- Internally date/time values are stored as seconds since UNIX epoch in UTC.
- Converting a `time::OffsetDateTime` to `Value::Date` implicitly converts the value into UTC.
If this is not desired do the time zone conversion yourself and use `time::PrimitiveDateTime`
directly instead.
- Add [histogram](https://github.com/quickwit-oss/tantivy/pull/1306) aggregation (@PSeitz)
- Add support for fastfield on text fields (@PSeitz)
- Add terms aggregation (@PSeitz)
- Add support for zstd compression (@kryesh)
Tantivy 0.18.1
================================
- Hotfix: positions computation. #1629 (@fmassot, @fulmicoton, @PSeitz)
Tantivy 0.17
================================
- LogMergePolicy now triggers merges if the ratio of deleted documents reaches a threshold (@shikhar @fulmicoton) [#115](https://github.com/quickwit-oss/tantivy/issues/115)
- Adds a searcher Warmer API (@shikhar @fulmicoton)
- Change to non-strict schema. Ignore fields in data which are not defined in schema. Previously this returned an error. #1211
- Facets are necessarily indexed. Existing index with indexed facets should work out of the box. Index without facets that are marked with index: false should be broken (but they were already broken in a sense). (@fulmicoton) #1195 .
- Bugfix that could in theory impact durability in theory on some filesystems [#1224](https://github.com/quickwit-oss/tantivy/issues/1224)
- Schema now offers not indexing fieldnorms (@lpouget) [#922](https://github.com/quickwit-oss/tantivy/issues/922)
- Reduce the number of fsync calls [#1225](https://github.com/quickwit-oss/tantivy/issues/1225)
- Fix opening bytes index with dynamic codec (@PSeitz) [#1278](https://github.com/quickwit-oss/tantivy/issues/1278)
- Added an aggregation collector for range, average and stats compatible with Elasticsearch. (@PSeitz)
- Added a JSON schema type @fulmicoton [#1251](https://github.com/quickwit-oss/tantivy/issues/1251)
- Added support for slop in phrase queries @halvorboe [#1068](https://github.com/quickwit-oss/tantivy/issues/1068)
Tantivy 0.16.2
================================
- Bugfix in FuzzyTermQuery. (transposition_cost_one was not doing anything)
Tantivy 0.16.1
========================
- Major Bugfix on multivalued fastfield. #1151
- Demux operation (@PSeitz)
Tantivy 0.16.0
=========================
- Bugfix in the filesum check. (@evanxg852000) #1127
- Bugfix in positions when the index is sorted by a field. (@appaquet) #1125
Tantivy 0.15.3
=========================
- Major bugfix. Deleting documents was broken when the index was sorted by a field. (@appaquet, @fulmicoton) #1101
Tantivy 0.15.2
========================
- Major bugfix. DocStore still panics when a deleted doc is at the beginning of a block. (@appaquet) #1088
Tantivy 0.15.1
=========================
- Major bugfix. DocStore panics when first block is deleted. (@appaquet) #1077
Tantivy 0.15.0
=========================
- API Changes. Using Range instead of (start, end) in the API and internals (`FileSlice`, `OwnedBytes`, `Snippets`, ...)
This change is breaking but migration is trivial.
- Added an Histogram collector. (@fulmicoton) #994
- Added support for Option<TCollector>. (@fulmicoton)
- DocAddress is now a struct (@scampi) #987
- Bugfix consistent tie break handling in facet's topk (@hardikpnsp) #357
- Date field support for range queries (@rihardsk) #516
- Added lz4-flex as the default compression scheme in tantivy (@PSeitz) #1009
- Renamed a lot of symbols to avoid all uppercasing on acronyms, as per new clippy recommendation. For instance, RAMDirectory -> RamDirectory. (@fulmicoton)
- Simplified positions index format (@fulmicoton) #1022
- Moved bitpacking to bitpacker subcrate and add BlockedBitpacker, which bitpacks blocks of 128 elements (@PSeitz) #1030
- Added support for more-like-this query in tantivy (@evanxg852000) #1011
- Added support for sorting an index, e.g presorting documents in an index by a timestamp field. This can heavily improve performance for certain scenarios, by utilizing the sorted data (Top-n optimizations)(@PSeitz). #1026
- Add iterator over documents in doc store (@PSeitz). #1044
- Fix log merge policy (@PSeitz). #1043
- Add detection to avoid small doc store blocks on merge (@PSeitz). #1054
- Make doc store compression dynamic (@PSeitz). #1060
- Switch to json for footer version handling (@PSeitz). #1060
- Updated TermMerger implementation to rely on the union feature of the FST (@scampi) #469
- Add boolean marking whether position is required in the query_terms API call (@fulmicoton). #1070
Tantivy 0.14.0
=========================
- Remove dependency to atomicwrites #833 .Implemented by @fulmicoton upon suggestion and research from @asafigan).
- Migrated tantivy error from the now deprecated `failure` crate to `thiserror` #760. (@hirevo)
- API Change. Accessing the typed value off a `Schema::Value` now returns an Option instead of panicking if the type does not match.
- Large API Change in the Directory API. Tantivy used to assume that all files could be somehow memory mapped. After this change, Directory return a `FileSlice` that can be reduced and eventually read into an `OwnedBytes` object. Long and blocking io operation are still required by they do not span over the entire file.
- Added support for Brotli compression in the DocStore. (@ppodolsky)
- Added helper for building intersections and unions in BooleanQuery (@guilload)
- Bugfix in `Query::explain`
- Removed dependency on `notify` #924. Replaced with `FileWatcher` struct that polls meta file every 500ms in background thread. (@halvorboe @guilload)
- Added `FilterCollector`, which wraps another collector and filters docs using a predicate over a fast field (@barrotsteindev)
- Simplified the encoding of the skip reader struct. BlockWAND max tf is now encoded over a single byte. (@fulmicoton)
- `FilterCollector` now supports all Fast Field value types (@barrotsteindev)
- FastField are not all loaded when opening the segment reader. (@fulmicoton)
- Added an API to merge segments, see `tantivy::merge_segments` #1005. (@evanxg852000)
This version breaks compatibility and requires users to reindex everything.
Tantivy 0.13.2
===================
Bugfix. Acquiring a facet reader on a segment that does not contain any
doc with this facet returns `None`. (#896)
Tantivy 0.13.1
===================
Made `Query` and `Collector` `Send + Sync`.
Updated misc dependency versions.
Tantivy 0.13.0
======================
Tantivy 0.13 introduce a change in the index format that will require
you to reindex your index (BlockWAND information are added in the skiplist).
The index size increase is minor as this information is only added for
full blocks.
If you have a massive index for which reindexing is not an option, please contact me
so that we can discuss possible solutions.
- Bugfix in `FuzzyTermQuery` not matching terms by prefix when it should (@Peachball)
- Relaxed constraints on the custom/tweak score functions. At the segment level, they can be mut, and they are not required to be Sync + Send.
- `MMapDirectory::open` does not return a `Result` anymore.
- Change in the DocSet and Scorer API. (@fulmicoton).
A freshly created DocSet point directly to their first doc. A sentinel value called TERMINATED marks the end of a DocSet.
`.advance()` returns the new DocId. `Scorer::skip(target)` has been replaced by `Scorer::seek(target)` and returns the resulting DocId.
As a result, iterating through DocSet now looks as follows
```rust
let mut doc = docset.doc();
while doc != TERMINATED {
// ...
doc = docset.advance();
}
```
The change made it possible to greatly simplify a lot of the docset's code.
- Misc internal optimization and introduction of the `Scorer::for_each_pruning` function. (@fulmicoton)
- Added an offset option to the Top(.*)Collectors. (@robyoung)
- Added Block WAND. Performance on TOP-K on term-unions should be greatly increased. (@fulmicoton, and special thanks
to the PISA team for answering all my questions!)
Tantivy 0.12.0
======================
- Removing static dispatch in tokenizers for simplicity. (#762)
- Added backward iteration for `TermDictionary` stream. (@halvorboe)
- Fixed a performance issue when searching for the posting lists of a missing term (@audunhalland)
- Added a configurable maximum number of docs (10M by default) for a segment to be considered for merge (@hntd187, landed by @halvorboe #713)
- Important Bugfix #777, causing tantivy to retain memory mapping. (diagnosed by @poljar)
- Added support for field boosting. (#547, @fulmicoton)
## How to update?
Crates relying on custom tokenizer, or registering tokenizer in the manager will require some
minor changes. Check <https://github.com/quickwit-oss/tantivy/blob/main/examples/custom_tokenizer.rs>
to check for some code sample.
Tantivy 0.11.3
=======================
- Fixed DateTime as a fast field (#735)
Tantivy 0.11.2
=======================
- The future returned by `IndexWriter::merge` does not borrow `self` mutably anymore (#732)
- Exposing a constructor for `WatchHandle` (#731)
Tantivy 0.11.1
=====================
- Bug fix #729
Tantivy 0.11.0
=====================
- Added f64 field. Internally reuse u64 code the same way i64 does (@fdb-hiroshima)
- Various bugfixes in the query parser.
- Better handling of hyphens in query parser. (#609)
- Better handling of whitespaces.
- Closes #498 - add support for Elastic-style unbounded range queries for alphanumeric types eg. "title:>hello", "weight:>=70.5", "height:<200" (@petr-tik)
- API change around `Box<BoxableTokenizer>`. See detail in #629
- Avoid rebuilding Regex automaton whenever a regex query is reused. #639 (@brainlock)
- Add footer with some metadata to index files. #605 (@fdb-hiroshima)
- Add a method to check the compatibility of the footer in the index with the running version of tantivy (@petr-tik)
- TopDocs collector: ensure stable sorting on equal score. #671 (@brainlock)
- Added handling of pre-tokenized text fields (#642), which will enable users to
load tokens created outside tantivy. See usage in examples/pre_tokenized_text. (@kkoziara)
- Fix crash when committing multiple times with deleted documents. #681 (@brainlock)
## How to update?
- The index format is changed. You are required to reindex your data to use tantivy 0.11.
- `Box<dyn BoxableTokenizer>` has been replaced by a `BoxedTokenizer` struct.
- Regex are now compiled when the `RegexQuery` instance is built. As a result, it can now return
an error and handling the `Result` is required.
- `tantivy::version()` now returns a `Version` object. This object implements `ToString()`
Tantivy 0.10.2
=====================
- Closes #656. Solving memory leak.
Tantivy 0.10.1
=====================
- Closes #544. A few users experienced problems with the directory watching system.
Avoid watching the mmap directory until someone effectively creates a reader that uses
this functionality.
Tantivy 0.10.0
=====================
*Tantivy 0.10.0 index format is compatible with the index format in 0.9.0.*
- Added an API to easily tweak or entirely replace the
default score. See `TopDocs::tweak_score`and `TopScore::custom_score` (@fulmicoton)
- Added an ASCII folding filter (@drusellers)
- Bugfix in `query.count` in presence of deletes (@fulmicoton)
- Added `.explain(...)` in `Query` and `Weight` to (@fulmicoton)
- Added an efficient way to `delete_all_documents` in `IndexWriter` (@petr-tik).
All segments are simply removed.
Minor
---------
- Switched to Rust 2018 (@uvd)
- Small simplification of the code.
Calling .freq() or .doc() when .advance() has never been called
on segment postings should panic from now on.
- Tokens exceeding `u16::max_value() - 4` chars are discarded silently instead of panicking.
- Fast fields are now preloaded when the `SegmentReader` is created.
- `IndexMeta` is now public. (@hntd187)
- `IndexWriter` `add_document`, `delete_term`. `IndexWriter` is `Sync`, making it possible to use it with a `Arc<RwLock<IndexWriter>>`. `add_document` and `delete_term` can
only require a read lock. (@fulmicoton)
- Introducing `Opstamp` as an expressive type alias for `u64`. (@petr-tik)
- Stamper now relies on `AtomicU64` on all platforms (@petr-tik)
- Bugfix - Files get deleted slightly earlier
- Compilation resources improved (@fdb-hiroshima)
## How to update?
Your program should be usable as is.
### Fast fields
Fast fields used to be accessed directly from the `SegmentReader`.
The API changed, you are now required to acquire your fast field reader via the
`segment_reader.fast_fields()`, and use one of the typed method:
- `.u64()`, `.i64()` if your field is single-valued ;
- `.u64s()`, `.i64s()` if your field is multi-valued ;
- `.bytes()` if your field is bytes fast field.
Tantivy 0.9.0
=====================
*0.9.0 index format is not compatible with the
previous index format.*
- MAJOR BUGFIX :
Some `Mmap` objects were being leaked, and would never get released. (@fulmicoton)
- Removed most unsafe (@fulmicoton)
- Indexer memory footprint improved. (VInt comp, inlining the first block. (@fulmicoton)
- Stemming in other language possible (@pentlander)
- Segments with no docs are deleted earlier (@barrotsteindev)
- Added grouped add and delete operations.
They are guaranteed to happen together (i.e. they cannot be split by a commit).
In addition, adds are guaranteed to happen on the same segment. (@elbow-jason)
- Removed `INT_STORED` and `INT_INDEXED`. It is now possible to use `STORED` and `INDEXED`
for int fields. (@fulmicoton)
- Added DateTime field (@barrotsteindev)
- Added IndexReader. By default, index is reloaded automatically upon new commits (@fulmicoton)
- SIMD linear search within blocks (@fulmicoton)
## How to update ?
tantivy 0.9 brought some API breaking change.
To update from tantivy 0.8, you will need to go through the following steps.
- `schema::INT_INDEXED` and `schema::INT_STORED` should be replaced by `schema::INDEXED` and `schema::INT_STORED`.
- The index now does not hold the pool of searcher anymore. You are required to create an intermediary object called
`IndexReader` for this.
```rust
// create the reader. You typically need to create 1 reader for the entire
// lifetime of you program.
let reader = index.reader()?;
// Acquire a searcher (previously `index.searcher()`) is now written:
let searcher = reader.searcher();
// With the default setting of the reader, you are not required to
// call `index.load_searchers()` anymore.
//
// The IndexReader will pick up that change automatically, regardless
// of whether the update was done in a different process or not.
// If this behavior is not wanted, you can create your reader with
// the `ReloadPolicy::Manual`, and manually decide when to reload the index
// by calling `reader.reload()?`.
```
Tantivy 0.8.2
=====================
Fixing build for x86_64 platforms. (#496)
No need to update from 0.8.1 if tantivy
is building on your platform.
Tantivy 0.8.1
=====================
Hotfix of #476.
Merge was reflecting deletes before commit was passed.
Thanks @barrotsteindev for reporting the bug.
Tantivy 0.8.0
=====================
*No change in the index format*
- API Breaking change in the collector API. (@jwolfe, @fulmicoton)
- Multithreaded search (@jwolfe, @fulmicoton)
Tantivy 0.7.1
=====================
*No change in the index format*
- Bugfix: NGramTokenizer panics on non ascii chars
- Added a space usage API
Tantivy 0.7
=====================
- Skip data for doc ids and positions (@fulmicoton),
greatly improving performance
- Tantivy error now rely on the failure crate (@drusellers)
- Added support for `AND`, `OR`, `NOT` syntax in addition to the `+`,`-` syntax
- Added a snippet generator with highlight (@vigneshsarma, @fulmicoton)
- Added a `TopFieldCollector` (@pentlander)
Tantivy 0.6.1
=========================
- Bugfix #324. GC removing was removing file that were still in useful
- Added support for parsing AllQuery and RangeQuery via QueryParser
- AllQuery: `*`
- RangeQuery:
- Inclusive `field:[startIncl to endIncl]`
- Exclusive `field:{startExcl to endExcl}`
- Mixed `field:[startIncl to endExcl}` and vice versa
- Unbounded `field:[start to *]`, `field:[* to end]`
Tantivy 0.6
==========================
Special thanks to @drusellers and @jason-wolfe for their contributions
to this release!
- Removed C code. Tantivy is now pure Rust. (@fulmicoton)
- BM25 (@fulmicoton)
- Approximate field norms encoded over 1 byte. (@fulmicoton)
- Compiles on stable rust (@fulmicoton)
- Add &[u8] fastfield for associating arbitrary bytes to each document (@jason-wolfe) (#270)
- Completely uncompressed
- Internally: One u64 fast field for indexes, one fast field for the bytes themselves.
- Add NGram token support (@drusellers)
- Add Stopword Filter support (@drusellers)
- Add a FuzzyTermQuery (@drusellers)
- Add a RegexQuery (@drusellers)
- Various performance improvements (@fulmicoton)_
Tantivy 0.5.2
===========================
- bugfix #274
- bugfix #280
- bugfix #289
Tantivy 0.5.1
==========================
- bugfix #254 : tantivy failed if no documents in a segment contained a specific field.
Tantivy 0.5
==========================
- Faceting
- RangeQuery
- Configurable tokenization pipeline
- Bugfix in PhraseQuery
- Various query optimisation
- Allowing very large indexes
- 64 bits file address
- Smarter encoding of the `TermInfo` objects
Tantivy 0.4.3
==========================
- Bugfix race condition when deleting files. (#198)
Tantivy 0.4.2
==========================
- Prevent usage of AVX2 instructions (#201)
Tantivy 0.4.1
==========================
- Bugfix for non-indexed fields. (#199)
Tantivy 0.4.0
==========================
- Raise the limit of number of fields (previously 256 fields) (@fulmicoton)
- Removed u32 fields. They are replaced by u64 and i64 fields (#65) (@fulmicoton)
- Optimized skip in SegmentPostings (#130) (@lnicola)
- Replacing rustc_serialize by serde. Kudos to benchmark@KodrAus and @lnicola
- Using error-chain (@KodrAus)
- QueryParser: (@fulmicoton)
- Explicit error returned when searched for a term that is not indexed
- Searching for a int term via the query parser was broken `(age:1)`
- Searching for a non-indexed field returns an explicit Error
- Phrase query for non-tokenized field are not tokenized by the query parser.
- Faster/Better indexing (@fulmicoton)
- using murmurhash2
- faster merging
- more memory efficient fast field writer (@lnicola )
- better handling of collisions
- lesser memory usage
- Added API, most notably to iterate over ranges of terms (@fulmicoton)
- Bugfix that was preventing to unmap segment files, on index drop (@fulmicoton)
- Made the doc! macro public (@fulmicoton)
- Added an alternative implementation of the streaming dictionary (@fulmicoton)
Tantivy 0.3.1
==========================
- Expose a method to trigger files garbage collection
Tantivy 0.3
==========================
Special thanks to @Kodraus @lnicola @Ameobea @manuel-woelker @celaus
for their contribution to this release.
Thanks also to everyone in tantivy gitter chat
for their advise and company :)
<https://gitter.im/tantivy-search/tantivy>
Warning:
Tantivy 0.3 is NOT backward compatible with tantivy 0.2
code and index format.
You should not expect backward compatibility before
tantivy 1.0.
New Features
------------
- Delete. You can now delete documents from an index.
- Support for windows (Thanks to @lnicola)
Various Bugfixes & small improvements
----------------------------------------
- Added CI for Windows (<https://ci.appveyor.com/project/fulmicoton/tantivy>)
Thanks to @KodrAus ! (#108)
- Various dependy version update (Thanks to @Ameobea) #76
- Fixed several race conditions in `Index.wait_merge_threads`
- Fixed #72. Mmap were never released.
- Fixed #80. Fast field used to take an amplitude of 32 bits after a merge. (Ouch!)
- Fixed #92. u32 are now encoded using big endian in the fst
in order to make there enumeration consistent with
the natural ordering.
- Building binary targets for tantivy-cli (Thanks to @KodrAus)
- Misc invisible bug fixes, and code cleanup.
- Use
================================================
FILE: CITATION.cff
================================================
cff-version: 1.2.0
message: "If you use this software, please cite it as below."
authors:
- alias: Quickwit Inc.
website: "https://quickwit.io"
title: "tantivy"
version: 0.22.0
doi: 10.5281/zenodo.13942948
date-released: 2024-10-17
url: "https://github.com/quickwit-oss/tantivy"
================================================
FILE: Cargo.toml
================================================
[package]
name = "tantivy"
version = "0.26.0"
authors = ["Paul Masurel <paul.masurel@gmail.com>"]
license = "MIT"
categories = ["database-implementations", "data-structures"]
description = """Search engine library"""
documentation = "https://docs.rs/tantivy/"
homepage = "https://github.com/quickwit-oss/tantivy"
repository = "https://github.com/quickwit-oss/tantivy"
readme = "README.md"
keywords = ["search", "information", "retrieval"]
edition = "2021"
rust-version = "1.86"
exclude = ["benches/*.json", "benches/*.txt"]
[dependencies]
oneshot = "0.1.13"
base64 = "0.22.0"
byteorder = "1.4.3"
crc32fast = "1.3.2"
once_cell = "1.10.0"
regex = { version = "1.5.5", default-features = false, features = [
"std",
"unicode",
] }
aho-corasick = "1.0"
tantivy-fst = "0.5"
memmap2 = { version = "0.9.0", optional = true }
lz4_flex = { version = "0.12", default-features = false, optional = true }
zstd = { version = "0.13", optional = true, default-features = false }
tempfile = { version = "3.12.0", optional = true }
log = "0.4.16"
serde = { version = "1.0.219", features = ["derive"] }
serde_json = "1.0.140"
fs4 = { version = "0.13.1", optional = true }
levenshtein_automata = "0.2.1"
uuid = { version = "1.0.0", features = ["v4", "serde"] }
crossbeam-channel = "0.5.4"
rust-stemmers = { version = "1.2.0", optional = true }
downcast-rs = "2.0.1"
bitpacking = { version = "0.9.3", default-features = false, features = [
"bitpacker4x",
] }
census = "0.4.2"
rustc-hash = "2.0.0"
thiserror = "2.0.1"
htmlescape = "0.3.1"
fail = { version = "0.5.0", optional = true }
time = { version = "0.3.47", features = ["serde-well-known"] }
smallvec = "1.8.0"
rayon = "1.5.2"
lru = "0.16.3"
fastdivide = "0.4.0"
itertools = "0.14.0"
measure_time = "0.9.0"
arc-swap = "1.5.0"
bon = "3.3.1"
columnar = { version = "0.6", path = "./columnar", package = "tantivy-columnar" }
sstable = { version = "0.6", path = "./sstable", package = "tantivy-sstable", optional = true }
stacker = { version = "0.6", path = "./stacker", package = "tantivy-stacker" }
query-grammar = { version = "0.25.0", path = "./query-grammar", package = "tantivy-query-grammar" }
tantivy-bitpacker = { version = "0.9", path = "./bitpacker" }
common = { version = "0.10", path = "./common/", package = "tantivy-common" }
tokenizer-api = { version = "0.6", path = "./tokenizer-api", package = "tantivy-tokenizer-api" }
sketches-ddsketch = { git = "https://github.com/quickwit-oss/rust-sketches-ddsketch.git", rev = "555caf1", features = ["use_serde"] }
datasketches = "0.2.0"
futures-util = { version = "0.3.28", optional = true }
futures-channel = { version = "0.3.28", optional = true }
fnv = "1.0.7"
typetag = "0.2.21"
[target.'cfg(windows)'.dependencies]
winapi = "0.3.9"
[dev-dependencies]
binggan = "0.14.2"
rand = "0.9"
maplit = "1.0.2"
matches = "0.1.9"
pretty_assertions = "1.2.1"
proptest = "1.7.0"
test-log = "0.2.10"
futures = "0.3.21"
paste = "1.0.11"
more-asserts = "0.3.1"
rand_distr = "0.5"
time = { version = "0.3.47", features = ["serde-well-known", "macros"] }
postcard = { version = "1.0.4", features = [
"use-std",
], default-features = false }
[target.'cfg(not(windows))'.dev-dependencies]
criterion = { version = "0.5", default-features = false }
[dev-dependencies.fail]
version = "0.5.0"
features = ["failpoints"]
[profile.release]
opt-level = 3
debug = false
debug-assertions = false
[profile.bench]
opt-level = 3
debug = true
debug-assertions = false
[profile.test]
debug-assertions = true
overflow-checks = true
[features]
default = ["mmap", "stopwords", "lz4-compression", "columnar-zstd-compression", "stemmer"]
stemmer = ["rust-stemmers"]
mmap = ["fs4", "tempfile", "memmap2"]
stopwords = []
lz4-compression = ["lz4_flex"]
zstd-compression = ["zstd"]
# enable zstd-compression in columnar (and sstable)
columnar-zstd-compression = ["columnar/zstd-compression"]
failpoints = ["fail", "fail/failpoints"]
unstable = [] # useful for benches.
quickwit = ["sstable", "futures-util", "futures-channel"]
# Compares only the hash of a string when indexing data.
# Increases indexing speed, but may lead to extremely rare missing terms, when there's a hash collision.
# Uses 64bit ahash.
compare_hash_only = ["stacker/compare_hash_only"]
[workspace]
members = [
"query-grammar",
"bitpacker",
"common",
"ownedbytes",
"stacker",
"sstable",
"tokenizer-api",
"columnar",
]
# Following the "fail" crate best practises, we isolate
# tests that define specific behavior in fail check points
# in a different binary.
#
# We do that because, fail rely on a global definition of
# failpoints behavior and hence, it is incompatible with
# multithreading.
[[test]]
name = "failpoints"
path = "tests/failpoints/mod.rs"
required-features = ["failpoints"]
[[bench]]
name = "analyzer"
harness = false
[[bench]]
name = "index-bench"
harness = false
[[bench]]
name = "agg_bench"
harness = false
[[bench]]
name = "exists_json"
harness = false
[[bench]]
name = "range_query"
harness = false
[[bench]]
name = "and_or_queries"
harness = false
[[bench]]
name = "range_queries"
harness = false
[[bench]]
name = "bool_queries_with_range"
harness = false
[[bench]]
name = "str_search_and_get"
harness = false
[[bench]]
name = "merge_segments"
harness = false
[[bench]]
name = "regex_all_terms"
harness = false
================================================
FILE: LICENSE
================================================
Copyright (c) 2018 by the project authors, as listed in the AUTHORS file.
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
================================================
FILE: Makefile
================================================
test:
@echo "Run test only... No examples."
cargo test --tests --lib
fmt:
cargo +nightly fmt --all
================================================
FILE: README.md
================================================
[](https://docs.rs/crate/tantivy/)
[](https://github.com/quickwit-oss/tantivy/actions/workflows/test.yml)
[](https://codecov.io/gh/quickwit-oss/tantivy)
[](https://discord.gg/MT27AG5EVE)
[](https://opensource.org/licenses/MIT)
[](https://crates.io/crates/tantivy)
<img src="https://tantivy-search.github.io/logo/tantivy-logo.png" alt="Tantivy, the fastest full-text search engine library written in Rust" height="250">
## Fast full-text search engine library written in Rust
**If you are looking for an alternative to Elasticsearch or Apache Solr, check out [Quickwit](https://github.com/quickwit-oss/quickwit), our distributed search engine built on top of Tantivy.**
Tantivy is closer to [Apache Lucene](https://lucene.apache.org/) than to [Elasticsearch](https://www.elastic.co/products/elasticsearch) or [Apache Solr](https://lucene.apache.org/solr/) in the sense it is not
an off-the-shelf search engine server, but rather a crate that can be used to build such a search engine.
Tantivy is, in fact, strongly inspired by Lucene's design.
## Benchmark
The following [benchmark](https://tantivy-search.github.io/bench/) breaks down the
performance for different types of queries/collections.
Your mileage WILL vary depending on the nature of queries and their load.
Details about the benchmark can be found at this [repository](https://github.com/quickwit-oss/search-benchmark-game).
## Features
- Full-text search
- Configurable tokenizer (stemming available for 17 Latin languages) with third party support for Chinese ([tantivy-jieba](https://crates.io/crates/tantivy-jieba) and [cang-jie](https://crates.io/crates/cang-jie)), Japanese ([lindera](https://github.com/lindera-morphology/lindera-tantivy), [Vaporetto](https://crates.io/crates/vaporetto_tantivy), and [tantivy-tokenizer-tiny-segmenter](https://crates.io/crates/tantivy-tokenizer-tiny-segmenter)) and Korean ([lindera](https://github.com/lindera-morphology/lindera-tantivy) + [lindera-ko-dic-builder](https://github.com/lindera-morphology/lindera-ko-dic-builder))
- Fast (check out the :racehorse: :sparkles: [benchmark](https://tantivy-search.github.io/bench/) :sparkles: :racehorse:)
- Tiny startup time (<10ms), perfect for command-line tools
- BM25 scoring (the same as Lucene)
- Natural query language (e.g. `(michael AND jackson) OR "king of pop"`)
- Phrase queries search (e.g. `"michael jackson"`)
- Incremental indexing
- Multithreaded indexing (indexing English Wikipedia takes < 3 minutes on my desktop)
- Mmap directory
- SIMD integer compression when the platform/CPU includes the SSE2 instruction set
- Single valued and multivalued u64, i64, and f64 fast fields (equivalent of doc values in Lucene)
- `&[u8]` fast fields
- Text, i64, u64, f64, dates, ip, bool, and hierarchical facet fields
- Compressed document store (LZ4, Zstd, None)
- Range queries
- Faceted search
- Configurable indexing (optional term frequency and position indexing)
- JSON Field
- Aggregation Collector: histogram, range buckets, average, and stats metrics
- LogMergePolicy with deletes
- Searcher Warmer API
- Cheesy logo with a horse
### Non-features
Distributed search is out of the scope of Tantivy, but if you are looking for this feature, check out [Quickwit](https://github.com/quickwit-oss/quickwit/).
## Getting started
Tantivy works on stable Rust and supports Linux, macOS, and Windows.
- [Tantivy's simple search example](https://tantivy-search.github.io/examples/basic_search.html)
- [tantivy-cli and its tutorial](https://github.com/quickwit-oss/tantivy-cli) - `tantivy-cli` is an actual command-line interface that makes it easy for you to create a search engine,
index documents, and search via the CLI or a small server with a REST API.
It walks you through getting a Wikipedia search engine up and running in a few minutes.
- [Reference doc for the last released version](https://docs.rs/tantivy/)
## How can I support this project?
There are many ways to support this project.
- Use Tantivy and tell us about your experience on [Discord](https://discord.gg/MT27AG5EVE) or by email (paul.masurel@gmail.com)
- Report bugs
- Write a blog post
- Help with documentation by asking questions or submitting PRs
- Contribute code (you can join [our Discord server](https://discord.gg/MT27AG5EVE))
- Talk about Tantivy around you
## Contributing code
We use the GitHub Pull Request workflow: reference a GitHub ticket and/or include a comprehensive commit message when opening a PR.
Feel free to update CHANGELOG.md with your contribution.
### Tokenizer
When implementing a tokenizer for tantivy depend on the `tantivy-tokenizer-api` crate.
### Clone and build locally
Tantivy compiles on stable Rust.
To check out and run tests, you can simply run:
```bash
git clone https://github.com/quickwit-oss/tantivy.git
cd tantivy
cargo test
```
## Companies Using Tantivy
<p align="left">
<img align="center" src="doc/assets/images/etsy.png" alt="Etsy" height="25" width="auto" />
<img align="center" src="doc/assets/images/paradedb.png" alt="ParadeDB" height="25" width="auto" />
<img align="center" src="doc/assets/images/Nuclia.png#gh-light-mode-only" alt="Nuclia" height="25" width="auto" />
<img align="center" src="doc/assets/images/humanfirst.png#gh-light-mode-only" alt="Humanfirst.ai" height="30" width="auto" />
<img align="center" src="doc/assets/images/element.io.svg#gh-light-mode-only" alt="Element.io" height="25" width="auto" />
<img align="center" src="doc/assets/images/nuclia-dark-theme.png#gh-dark-mode-only" alt="Nuclia" height="35" width="auto" />
<img align="center" src="doc/assets/images/humanfirst.ai-dark-theme.png#gh-dark-mode-only" alt="Humanfirst.ai" height="25" width="auto" />
<img align="center" src="doc/assets/images/element-dark-theme.png#gh-dark-mode-only" alt="Element.io" height="25" width="auto" />
</p>
## FAQ
### Can I use Tantivy in other languages?
- Python → [tantivy-py](https://github.com/quickwit-oss/tantivy-py)
- Ruby → [tantiny](https://github.com/baygeldin/tantiny)
You can also find other bindings on [GitHub](https://github.com/search?q=tantivy) but they may be less maintained.
### What are some examples of Tantivy use?
- [seshat](https://github.com/matrix-org/seshat/): A matrix message database/indexer
- [tantiny](https://github.com/baygeldin/tantiny): Tiny full-text search for Ruby
- [lnx](https://github.com/lnx-search/lnx): adaptable, typo tolerant search engine with a REST API
- [Bichon](https://github.com/rustmailer/bichon): A lightweight, high-performance Rust email archiver with WebUI
- and [more](https://github.com/search?q=tantivy)!
### On average, how much faster is Tantivy compared to Lucene?
- According to our [search latency benchmark](https://tantivy-search.github.io/bench/), Tantivy is approximately 2x faster than Lucene.
### Does tantivy support incremental indexing?
- Yes.
### How can I edit documents?
- Data in tantivy is immutable. To edit a document, the document needs to be deleted and reindexed.
### When will my documents be searchable during indexing?
- Documents will be searchable after a `commit` is called on an `IndexWriter`. Existing `IndexReader`s will also need to be reloaded in order to reflect the changes. Finally, changes are only visible to newly acquired `Searcher`.
================================================
FILE: RELEASE.md
================================================
# Releasing a new Tantivy Version
## Steps
1. Identify new packages in workspace since last release
2. Identify changed packages in workspace since last release
3. Bump version in `Cargo.toml` and their dependents for all changed packages
4. Update version of root `Cargo.toml`
5. Publish version starting with leaf nodes
6. Set git tag with new version
[`cargo-release`](https://github.com/crate-ci/cargo-release) will help us with steps 1-5:
Replace prev-tag-name
```bash
cargo release --workspace --no-publish -v --prev-tag-name 0.24 --push-remote origin minor --no-tag
```
`no-tag` or it will create tags for all the subpackages
cargo release will _not_ ignore unchanged packages, but it will print warnings for them.
e.g. "warning: updating ownedbytes to 0.10.0 despite no changes made since tag 0.24"
We need to manually ignore these unchanged packages
```bash
cargo release --workspace --no-publish -v --prev-tag-name 0.24 --push-remote origin minor --no-tag --exclude tokenizer-api
```
Add `--execute` to actually publish the packages, otherwise it will only print the commands that would be run.
### Tag Version
```bash
git tag 0.25.0
git push upstream tag 0.25.0
```
================================================
FILE: TODO.txt
================================================
Make schema_builder API fluent.
fix doc serialization and prevent compression problems
u64 , etc. should return Result<Option> now that we support optional missing a column is really not an error
remove fastfield codecs
ditch the first_or_default trick. if it is still useful, improve its implementation.
rename FastFieldReaders::open to load
remove fast field reader
find a way to unify the two DateTime.
re-add type check in the filter wrapper
add unit test on columnar list columns.
make sure sort works
================================================
FILE: benches/agg_bench.rs
================================================
use binggan::plugins::PeakMemAllocPlugin;
use binggan::{black_box, InputGroup, PeakMemAlloc, INSTRUMENTED_SYSTEM};
use rand::distr::weighted::WeightedIndex;
use rand::rngs::StdRng;
use rand::seq::IndexedRandom;
use rand::{Rng, SeedableRng};
use rand_distr::Distribution;
use serde_json::json;
use tantivy::aggregation::agg_req::Aggregations;
use tantivy::aggregation::AggregationCollector;
use tantivy::query::{AllQuery, TermQuery};
use tantivy::schema::{IndexRecordOption, Schema, TextFieldIndexing, FAST, STRING};
use tantivy::{doc, DateTime, Index, Term};
#[global_allocator]
pub static GLOBAL: &PeakMemAlloc<std::alloc::System> = &INSTRUMENTED_SYSTEM;
/// Mini macro to register a function via its name
/// runner.register("average_u64", move |index| average_u64(index));
macro_rules! register {
($runner:expr, $func:ident) => {
$runner.register(stringify!($func), move |index| {
$func(index);
})
};
}
fn main() {
let inputs = vec![
("full", get_test_index_bench(Cardinality::Full).unwrap()),
(
"dense",
get_test_index_bench(Cardinality::OptionalDense).unwrap(),
),
(
"sparse",
get_test_index_bench(Cardinality::OptionalSparse).unwrap(),
),
(
"multivalue",
get_test_index_bench(Cardinality::Multivalued).unwrap(),
),
];
bench_agg(InputGroup::new_with_inputs(inputs));
}
fn bench_agg(mut group: InputGroup<Index>) {
group.add_plugin(PeakMemAllocPlugin::new(GLOBAL));
register!(group, average_u64);
register!(group, average_f64);
register!(group, average_f64_u64);
register!(group, stats_f64);
register!(group, extendedstats_f64);
register!(group, percentiles_f64);
register!(group, terms_7);
register!(group, terms_all_unique);
register!(group, terms_150_000);
register!(group, terms_many_top_1000);
register!(group, terms_many_order_by_term);
register!(group, terms_many_with_top_hits);
register!(group, terms_all_unique_with_avg_sub_agg);
register!(group, terms_many_with_avg_sub_agg);
register!(group, terms_status_with_avg_sub_agg);
register!(group, terms_status_with_histogram);
register!(group, terms_zipf_1000);
register!(group, terms_zipf_1000_with_histogram);
register!(group, terms_zipf_1000_with_avg_sub_agg);
register!(group, terms_many_json_mixed_type_with_avg_sub_agg);
register!(group, composite_term_many_page_1000);
register!(group, composite_term_many_page_1000_with_avg_sub_agg);
register!(group, composite_term_few);
register!(group, composite_histogram);
register!(group, composite_histogram_calendar);
register!(group, cardinality_agg);
register!(group, terms_status_with_cardinality_agg);
register!(group, range_agg);
register!(group, range_agg_with_avg_sub_agg);
register!(group, range_agg_with_term_agg_status);
register!(group, range_agg_with_term_agg_many);
register!(group, histogram);
register!(group, histogram_hard_bounds);
register!(group, histogram_with_avg_sub_agg);
register!(group, histogram_with_term_agg_status);
register!(group, avg_and_range_with_avg_sub_agg);
// Filter aggregation benchmarks
register!(group, filter_agg_all_query_count_agg);
register!(group, filter_agg_term_query_count_agg);
register!(group, filter_agg_all_query_with_sub_aggs);
register!(group, filter_agg_term_query_with_sub_aggs);
group.run();
}
fn exec_term_with_agg(index: &Index, agg_req: serde_json::Value) {
let agg_req: Aggregations = serde_json::from_value(agg_req).unwrap();
let reader = index.reader().unwrap();
let text_field = reader.searcher().schema().get_field("text").unwrap();
let term_query = TermQuery::new(
Term::from_field_text(text_field, "cool"),
IndexRecordOption::Basic,
);
let collector = get_collector(agg_req);
let searcher = reader.searcher();
black_box(searcher.search(&term_query, &collector).unwrap());
}
fn average_u64(index: &Index) {
let agg_req = json!({
"average": { "avg": { "field": "score", } }
});
exec_term_with_agg(index, agg_req)
}
fn average_f64(index: &Index) {
let agg_req = json!({
"average": { "avg": { "field": "score_f64", } }
});
exec_term_with_agg(index, agg_req)
}
fn average_f64_u64(index: &Index) {
let agg_req = json!({
"average_f64": { "avg": { "field": "score_f64" } },
"average": { "avg": { "field": "score" } },
});
exec_term_with_agg(index, agg_req)
}
fn stats_f64(index: &Index) {
let agg_req = json!({
"average_f64": { "stats": { "field": "score_f64", } }
});
exec_term_with_agg(index, agg_req)
}
fn extendedstats_f64(index: &Index) {
let agg_req = json!({
"extendedstats_f64": { "extended_stats": { "field": "score_f64", } }
});
exec_term_with_agg(index, agg_req)
}
fn percentiles_f64(index: &Index) {
let agg_req = json!({
"mypercentiles": {
"percentiles": {
"field": "score_f64",
"percents": [ 95, 99, 99.9 ]
}
}
});
execute_agg(index, agg_req);
}
fn cardinality_agg(index: &Index) {
let agg_req = json!({
"cardinality": {
"cardinality": {
"field": "text_many_terms"
},
}
});
execute_agg(index, agg_req);
}
fn terms_status_with_cardinality_agg(index: &Index) {
let agg_req = json!({
"my_texts": {
"terms": { "field": "text_few_terms_status" },
"aggs": {
"cardinality": {
"cardinality": {
"field": "text_many_terms"
},
}
}
},
});
execute_agg(index, agg_req);
}
fn terms_7(index: &Index) {
let agg_req = json!({
"my_texts": { "terms": { "field": "text_few_terms_status" } },
});
execute_agg(index, agg_req);
}
fn terms_all_unique(index: &Index) {
let agg_req = json!({
"my_texts": { "terms": { "field": "text_all_unique_terms" } },
});
execute_agg(index, agg_req);
}
fn terms_150_000(index: &Index) {
let agg_req = json!({
"my_texts": { "terms": { "field": "text_many_terms" } },
});
execute_agg(index, agg_req);
}
fn terms_many_top_1000(index: &Index) {
let agg_req = json!({
"my_texts": { "terms": { "field": "text_many_terms", "size": 1000 } },
});
execute_agg(index, agg_req);
}
fn terms_many_order_by_term(index: &Index) {
let agg_req = json!({
"my_texts": { "terms": { "field": "text_many_terms", "order": { "_key": "desc" } } },
});
execute_agg(index, agg_req);
}
fn terms_many_with_top_hits(index: &Index) {
let agg_req = json!({
"my_texts": {
"terms": { "field": "text_many_terms" },
"aggs": {
"top_hits": { "top_hits":
{
"sort": [
{ "score": "desc" }
],
"size": 2,
"doc_value_fields": ["score_f64"]
}
}
}
},
});
execute_agg(index, agg_req);
}
fn terms_many_with_avg_sub_agg(index: &Index) {
let agg_req = json!({
"my_texts": {
"terms": { "field": "text_many_terms" },
"aggs": {
"average_f64": { "avg": { "field": "score_f64" } }
}
},
});
execute_agg(index, agg_req);
}
fn terms_all_unique_with_avg_sub_agg(index: &Index) {
let agg_req = json!({
"my_texts": {
"terms": { "field": "text_all_unique_terms" },
"aggs": {
"average_f64": { "avg": { "field": "score_f64" } }
}
},
});
execute_agg(index, agg_req);
}
fn terms_status_with_histogram(index: &Index) {
let agg_req = json!({
"my_texts": {
"terms": { "field": "text_few_terms_status" },
"aggs": {
"histo": {"histogram": { "field": "score_f64", "interval": 10 }}
}
}
});
execute_agg(index, agg_req);
}
fn terms_zipf_1000_with_histogram(index: &Index) {
let agg_req = json!({
"my_texts": {
"terms": { "field": "text_1000_terms_zipf" },
"aggs": {
"histo": {"histogram": { "field": "score_f64", "interval": 10 }}
}
}
});
execute_agg(index, agg_req);
}
fn terms_status_with_avg_sub_agg(index: &Index) {
let agg_req = json!({
"my_texts": {
"terms": { "field": "text_few_terms_status" },
"aggs": {
"average_f64": { "avg": { "field": "score_f64" } }
}
},
});
execute_agg(index, agg_req);
}
fn terms_zipf_1000_with_avg_sub_agg(index: &Index) {
let agg_req = json!({
"my_texts": {
"terms": { "field": "text_1000_terms_zipf" },
"aggs": {
"average_f64": { "avg": { "field": "score_f64" } }
}
},
});
execute_agg(index, agg_req);
}
fn terms_zipf_1000(index: &Index) {
let agg_req = json!({
"my_texts": { "terms": { "field": "text_1000_terms_zipf" } },
});
execute_agg(index, agg_req);
}
fn terms_many_json_mixed_type_with_avg_sub_agg(index: &Index) {
let agg_req = json!({
"my_texts": {
"terms": { "field": "json.mixed_type" },
"aggs": {
"average_f64": { "avg": { "field": "score_f64" } }
}
},
});
execute_agg(index, agg_req);
}
fn composite_term_few(index: &Index) {
let agg_req = json!({
"my_ctf": {
"composite": {
"sources": [
{ "text_few_terms": { "terms": { "field": "text_few_terms" } } }
],
"size": 1000
}
},
});
execute_agg(index, agg_req);
}
fn composite_term_many_page_1000(index: &Index) {
let agg_req = json!({
"my_ctmp1000": {
"composite": {
"sources": [
{ "text_many_terms": { "terms": { "field": "text_many_terms" } } }
],
"size": 1000
}
},
});
execute_agg(index, agg_req);
}
fn composite_term_many_page_1000_with_avg_sub_agg(index: &Index) {
let agg_req = json!({
"my_ctmp1000wasa": {
"composite": {
"sources": [
{ "text_many_terms": { "terms": { "field": "text_many_terms" } } }
],
"size": 1000,
},
"aggs": {
"average_f64": { "avg": { "field": "score_f64" } }
}
},
});
execute_agg(index, agg_req);
}
fn composite_histogram(index: &Index) {
let agg_req = json!({
"my_ch": {
"composite": {
"sources": [
{ "f64_histogram": { "histogram": { "field": "score_f64", "interval": 1 } } }
],
"size": 1000
}
},
});
execute_agg(index, agg_req);
}
fn composite_histogram_calendar(index: &Index) {
let agg_req = json!({
"my_chc": {
"composite": {
"sources": [
{ "time_histogram": { "date_histogram": { "field": "timestamp", "calendar_interval": "month" } } }
],
"size": 1000
}
},
});
execute_agg(index, agg_req);
}
fn execute_agg(index: &Index, agg_req: serde_json::Value) {
let agg_req: Aggregations = serde_json::from_value(agg_req).unwrap();
let collector = get_collector(agg_req);
let reader = index.reader().unwrap();
let searcher = reader.searcher();
black_box(searcher.search(&AllQuery, &collector).unwrap());
}
fn range_agg(index: &Index) {
let agg_req = json!({
"range_f64": { "range": { "field": "score_f64", "ranges": [
{ "from": 3, "to": 7000 },
{ "from": 7000, "to": 20000 },
{ "from": 20000, "to": 30000 },
{ "from": 30000, "to": 40000 },
{ "from": 40000, "to": 50000 },
{ "from": 50000, "to": 60000 }
] } },
});
execute_agg(index, agg_req);
}
fn range_agg_with_avg_sub_agg(index: &Index) {
let agg_req = json!({
"rangef64": {
"range": {
"field": "score_f64",
"ranges": [
{ "from": 3, "to": 7000 },
{ "from": 7000, "to": 20000 },
{ "from": 20000, "to": 30000 },
{ "from": 30000, "to": 40000 },
{ "from": 40000, "to": 50000 },
{ "from": 50000, "to": 60000 }
]
},
"aggs": {
"average_f64": { "avg": { "field": "score_f64" } }
}
},
});
execute_agg(index, agg_req);
}
fn range_agg_with_term_agg_status(index: &Index) {
let agg_req = json!({
"rangef64": {
"range": {
"field": "score_f64",
"ranges": [
{ "from": 3, "to": 7000 },
{ "from": 7000, "to": 20000 },
{ "from": 20000, "to": 30000 },
{ "from": 30000, "to": 40000 },
{ "from": 40000, "to": 50000 },
{ "from": 50000, "to": 60000 }
]
},
"aggs": {
"my_texts": { "terms": { "field": "text_few_terms_status" } },
}
},
});
execute_agg(index, agg_req);
}
fn range_agg_with_term_agg_many(index: &Index) {
let agg_req = json!({
"rangef64": {
"range": {
"field": "score_f64",
"ranges": [
{ "from": 3, "to": 7000 },
{ "from": 7000, "to": 20000 },
{ "from": 20000, "to": 30000 },
{ "from": 30000, "to": 40000 },
{ "from": 40000, "to": 50000 },
{ "from": 50000, "to": 60000 }
]
},
"aggs": {
"my_texts": { "terms": { "field": "text_many_terms" } },
}
},
});
execute_agg(index, agg_req);
}
fn histogram(index: &Index) {
let agg_req = json!({
"rangef64": {
"histogram": {
"field": "score_f64",
"interval": 100 // 1000 buckets
},
}
});
execute_agg(index, agg_req);
}
fn histogram_hard_bounds(index: &Index) {
let agg_req = json!({
"rangef64": { "histogram": { "field": "score_f64", "interval": 100, "hard_bounds": { "min": 1000, "max": 300000 } } },
});
execute_agg(index, agg_req);
}
fn histogram_with_avg_sub_agg(index: &Index) {
let agg_req = json!({
"rangef64": {
"histogram": { "field": "score_f64", "interval": 100 },
"aggs": {
"average_f64": { "avg": { "field": "score_f64" } }
}
}
});
execute_agg(index, agg_req);
}
fn histogram_with_term_agg_status(index: &Index) {
let agg_req = json!({
"rangef64": {
"histogram": { "field": "score_f64", "interval": 10 },
"aggs": {
"my_texts": { "terms": { "field": "text_few_terms_status" } }
}
}
});
execute_agg(index, agg_req);
}
fn avg_and_range_with_avg_sub_agg(index: &Index) {
let agg_req = json!({
"rangef64": {
"range": {
"field": "score_f64",
"ranges": [
{ "from": 3, "to": 7000 },
{ "from": 7000, "to": 20000 },
{ "from": 20000, "to": 60000 }
]
},
"aggs": {
"average_in_range": { "avg": { "field": "score" } }
}
},
"average": { "avg": { "field": "score" } }
});
execute_agg(index, agg_req);
}
#[derive(Clone, Copy, Hash, Default, Debug, PartialEq, Eq, PartialOrd, Ord)]
enum Cardinality {
/// All documents contain exactly one value.
/// `Full` is the default for auto-detecting the Cardinality, since it is the most strict.
#[default]
Full = 0,
/// All documents contain at most one value.
OptionalDense = 1,
/// All documents may contain any number of values.
Multivalued = 2,
/// 1 / 20 documents has a value
OptionalSparse = 3,
}
fn get_collector(agg_req: Aggregations) -> AggregationCollector {
AggregationCollector::from_aggs(agg_req, Default::default())
}
fn get_test_index_bench(cardinality: Cardinality) -> tantivy::Result<Index> {
// Flag to use existing index
let reuse_index = std::env::var("REUSE_AGG_BENCH_INDEX").is_ok();
if reuse_index && std::path::Path::new("agg_bench").exists() {
return Index::open_in_dir("agg_bench");
}
// crreate dir
std::fs::create_dir_all("agg_bench")?;
let mut schema_builder = Schema::builder();
let text_fieldtype = tantivy::schema::TextOptions::default()
.set_indexing_options(
TextFieldIndexing::default().set_index_option(IndexRecordOption::WithFreqs),
)
.set_stored();
let text_field = schema_builder.add_text_field("text", text_fieldtype);
let json_field = schema_builder.add_json_field("json", FAST);
let text_field_all_unique_terms =
schema_builder.add_text_field("text_all_unique_terms", STRING | FAST);
let text_field_many_terms = schema_builder.add_text_field("text_many_terms", STRING | FAST);
let text_field_few_terms = schema_builder.add_text_field("text_few_terms", STRING | FAST);
let text_field_few_terms_status =
schema_builder.add_text_field("text_few_terms_status", STRING | FAST);
let text_field_1000_terms_zipf =
schema_builder.add_text_field("text_1000_terms_zipf", STRING | FAST);
let score_fieldtype = tantivy::schema::NumericOptions::default().set_fast();
let score_field = schema_builder.add_u64_field("score", score_fieldtype.clone());
let score_field_f64 = schema_builder.add_f64_field("score_f64", score_fieldtype.clone());
let score_field_i64 = schema_builder.add_i64_field("score_i64", score_fieldtype);
let date_field = schema_builder.add_date_field("timestamp", FAST);
// use tmp dir
let index = if reuse_index {
Index::create_in_dir("agg_bench", schema_builder.build())?
} else {
Index::create_from_tempdir(schema_builder.build())?
};
// Approximate log proportions
let status_field_data = [
("INFO", 8000),
("ERROR", 300),
("WARN", 1200),
("DEBUG", 500),
("OK", 500),
("CRITICAL", 20),
("EMERGENCY", 1),
];
let log_level_distribution =
WeightedIndex::new(status_field_data.iter().map(|item| item.1)).unwrap();
let few_terms_data = ["INFO", "ERROR", "WARN", "DEBUG"];
let lg_norm = rand_distr::LogNormal::new(2.996f64, 0.979f64).unwrap();
let many_terms_data = (0..150_000)
.map(|num| format!("author{num}"))
.collect::<Vec<_>>();
// Prepare 1000 unique terms sampled using a Zipf distribution.
// Exponent ~1.1 approximates top-20 terms covering around ~20%.
let terms_1000: Vec<String> = (1..=1000).map(|i| format!("term_{i}")).collect();
let zipf_1000 = rand_distr::Zipf::new(1000.0, 1.1f64).unwrap();
{
let mut rng = StdRng::from_seed([1u8; 32]);
let mut index_writer = index.writer_with_num_threads(1, 200_000_000)?;
// To make the different test cases comparable we just change one doc to force the
// cardinality
if cardinality == Cardinality::OptionalDense {
index_writer.add_document(doc!())?;
}
if cardinality == Cardinality::Multivalued {
let log_level_sample_a = status_field_data[log_level_distribution.sample(&mut rng)].0;
let log_level_sample_b = status_field_data[log_level_distribution.sample(&mut rng)].0;
let idx_a = zipf_1000.sample(&mut rng) as usize - 1;
let idx_b = zipf_1000.sample(&mut rng) as usize - 1;
let term_1000_a = &terms_1000[idx_a];
let term_1000_b = &terms_1000[idx_b];
index_writer.add_document(doc!(
json_field => json!({"mixed_type": 10.0}),
json_field => json!({"mixed_type": 10.0}),
text_field => "cool",
text_field => "cool",
text_field_all_unique_terms => "cool",
text_field_all_unique_terms => "coolo",
text_field_many_terms => "cool",
text_field_many_terms => "cool",
text_field_few_terms => "cool",
text_field_few_terms => "cool",
text_field_few_terms_status => log_level_sample_a,
text_field_few_terms_status => log_level_sample_b,
text_field_1000_terms_zipf => term_1000_a.as_str(),
text_field_1000_terms_zipf => term_1000_b.as_str(),
score_field => 1u64,
score_field => 1u64,
score_field_f64 => lg_norm.sample(&mut rng),
score_field_f64 => lg_norm.sample(&mut rng),
score_field_i64 => 1i64,
score_field_i64 => 1i64,
))?;
}
let mut doc_with_value = 1_000_000;
if cardinality == Cardinality::OptionalSparse {
doc_with_value /= 20;
}
let _val_max = 1_000_000.0;
for _ in 0..doc_with_value {
let val: f64 = rng.random_range(0.0..1_000_000.0);
let json = if rng.random_bool(0.1) {
// 10% are numeric values
json!({ "mixed_type": val })
} else {
json!({"mixed_type": many_terms_data.choose(&mut rng).unwrap().to_string()})
};
index_writer.add_document(doc!(
text_field => "cool",
json_field => json,
text_field_all_unique_terms => format!("unique_term_{}", rng.random::<u64>()),
text_field_many_terms => many_terms_data.choose(&mut rng).unwrap().to_string(),
text_field_few_terms => few_terms_data.choose(&mut rng).unwrap().to_string(),
text_field_few_terms_status => status_field_data[log_level_distribution.sample(&mut rng)].0,
text_field_1000_terms_zipf => terms_1000[zipf_1000.sample(&mut rng) as usize - 1].as_str(),
score_field => val as u64,
score_field_f64 => lg_norm.sample(&mut rng),
score_field_i64 => val as i64,
date_field => DateTime::from_timestamp_millis((val * 1_000_000.) as i64),
))?;
if cardinality == Cardinality::OptionalSparse {
for _ in 0..20 {
index_writer.add_document(doc!(text_field => "cool"))?;
}
}
}
// writing the segment
index_writer.commit()?;
}
Ok(index)
}
// Filter aggregation benchmarks
fn filter_agg_all_query_count_agg(index: &Index) {
let agg_req = json!({
"filtered": {
"filter": "*",
"aggs": {
"count": { "value_count": { "field": "score" } }
}
}
});
execute_agg(index, agg_req);
}
fn filter_agg_term_query_count_agg(index: &Index) {
let agg_req = json!({
"filtered": {
"filter": "text:cool",
"aggs": {
"count": { "value_count": { "field": "score" } }
}
}
});
execute_agg(index, agg_req);
}
fn filter_agg_all_query_with_sub_aggs(index: &Index) {
let agg_req = json!({
"filtered": {
"filter": "*",
"aggs": {
"avg_score": { "avg": { "field": "score" } },
"stats_score": { "stats": { "field": "score_f64" } },
"terms_text": {
"terms": { "field": "text_few_terms_status" }
}
}
}
});
execute_agg(index, agg_req);
}
fn filter_agg_term_query_with_sub_aggs(index: &Index) {
let agg_req = json!({
"filtered": {
"filter": "text:cool",
"aggs": {
"avg_score": { "avg": { "field": "score" } },
"stats_score": { "stats": { "field": "score_f64" } },
"terms_text": {
"terms": { "field": "text_few_terms_status" }
}
}
}
});
execute_agg(index, agg_req);
}
================================================
FILE: benches/alice.txt
================================================
The Project Gutenberg EBook of Alice’s Adventures in Wonderland, by Lewis Carroll
This eBook is for the use of anyone anywhere in the United States and most
other parts of the world at no cost and with almost no restrictions
whatsoever. You may copy it, give it away or re-use it under the terms of
the Project Gutenberg License included with this eBook or online at
www.gutenberg.org. If you are not located in the United States, you'll have
to check the laws of the country where you are located before using this ebook.
Title: Alice’s Adventures in Wonderland
Author: Lewis Carroll
Release Date: June 25, 2008 [EBook #11]
[Most recently updated: October 12, 2020]
Language: English
Character set encoding: UTF-8
*** START OF THIS PROJECT GUTENBERG EBOOK ALICE’S ADVENTURES IN WONDERLAND ***
Produced by Arthur DiBianca and David Widger
[Illustration]
Alice’s Adventures in Wonderland
by Lewis Carroll
THE MILLENNIUM FULCRUM EDITION 3.0
Contents
CHAPTER I. Down the Rabbit-Hole
CHAPTER II. The Pool of Tears
CHAPTER III. A Caucus-Race and a Long Tale
CHAPTER IV. The Rabbit Sends in a Little Bill
CHAPTER V. Advice from a Caterpillar
CHAPTER VI. Pig and Pepper
CHAPTER VII. A Mad Tea-Party
CHAPTER VIII. The Queen’s Croquet-Ground
CHAPTER IX. The Mock Turtle’s Story
CHAPTER X. The Lobster Quadrille
CHAPTER XI. Who Stole the Tarts?
CHAPTER XII. Alice’s Evidence
CHAPTER I.
Down the Rabbit-Hole
Alice was beginning to get very tired of sitting by her sister on the
bank, and of having nothing to do: once or twice she had peeped into
the book her sister was reading, but it had no pictures or
conversations in it, “and what is the use of a book,” thought Alice
“without pictures or conversations?”
So she was considering in her own mind (as well as she could, for the
hot day made her feel very sleepy and stupid), whether the pleasure of
making a daisy-chain would be worth the trouble of getting up and
picking the daisies, when suddenly a White Rabbit with pink eyes ran
close by her.
There was nothing so _very_ remarkable in that; nor did Alice think it
so _very_ much out of the way to hear the Rabbit say to itself, “Oh
dear! Oh dear! I shall be late!” (when she thought it over afterwards,
it occurred to her that she ought to have wondered at this, but at the
time it all seemed quite natural); but when the Rabbit actually _took a
watch out of its waistcoat-pocket_, and looked at it, and then hurried
on, Alice started to her feet, for it flashed across her mind that she
had never before seen a rabbit with either a waistcoat-pocket, or a
watch to take out of it, and burning with curiosity, she ran across the
field after it, and fortunately was just in time to see it pop down a
large rabbit-hole under the hedge.
In another moment down went Alice after it, never once considering how
in the world she was to get out again.
The rabbit-hole went straight on like a tunnel for some way, and then
dipped suddenly down, so suddenly that Alice had not a moment to think
about stopping herself before she found herself falling down a very
deep well.
Either the well was very deep, or she fell very slowly, for she had
plenty of time as she went down to look about her and to wonder what
was going to happen next. First, she tried to look down and make out
what she was coming to, but it was too dark to see anything; then she
looked at the sides of the well, and noticed that they were filled with
cupboards and book-shelves; here and there she saw maps and pictures
hung upon pegs. She took down a jar from one of the shelves as she
passed; it was labelled “ORANGE MARMALADE”, but to her great
disappointment it was empty: she did not like to drop the jar for fear
of killing somebody underneath, so managed to put it into one of the
cupboards as she fell past it.
“Well!” thought Alice to herself, “after such a fall as this, I shall
think nothing of tumbling down stairs! How brave they’ll all think me
at home! Why, I wouldn’t say anything about it, even if I fell off the
top of the house!” (Which was very likely true.)
Down, down, down. Would the fall _never_ come to an end? “I wonder how
many miles I’ve fallen by this time?” she said aloud. “I must be
getting somewhere near the centre of the earth. Let me see: that would
be four thousand miles down, I think—” (for, you see, Alice had learnt
several things of this sort in her lessons in the schoolroom, and
though this was not a _very_ good opportunity for showing off her
knowledge, as there was no one to listen to her, still it was good
practice to say it over) “—yes, that’s about the right distance—but
then I wonder what Latitude or Longitude I’ve got to?” (Alice had no
idea what Latitude was, or Longitude either, but thought they were nice
grand words to say.)
Presently she began again. “I wonder if I shall fall right _through_
the earth! How funny it’ll seem to come out among the people that walk
with their heads downward! The Antipathies, I think—” (she was rather
glad there _was_ no one listening, this time, as it didn’t sound at all
the right word) “—but I shall have to ask them what the name of the
country is, you know. Please, Ma’am, is this New Zealand or Australia?”
(and she tried to curtsey as she spoke—fancy _curtseying_ as you’re
falling through the air! Do you think you could manage it?) “And what
an ignorant little girl she’ll think me for asking! No, it’ll never do
to ask: perhaps I shall see it written up somewhere.”
Down, down, down. There was nothing else to do, so Alice soon began
talking again. “Dinah’ll miss me very much to-night, I should think!”
(Dinah was the cat.) “I hope they’ll remember her saucer of milk at
tea-time. Dinah my dear! I wish you were down here with me! There are
no mice in the air, I’m afraid, but you might catch a bat, and that’s
very like a mouse, you know. But do cats eat bats, I wonder?” And here
Alice began to get rather sleepy, and went on saying to herself, in a
dreamy sort of way, “Do cats eat bats? Do cats eat bats?” and
sometimes, “Do bats eat cats?” for, you see, as she couldn’t answer
either question, it didn’t much matter which way she put it. She felt
that she was dozing off, and had just begun to dream that she was
walking hand in hand with Dinah, and saying to her very earnestly,
“Now, Dinah, tell me the truth: did you ever eat a bat?” when suddenly,
thump! thump! down she came upon a heap of sticks and dry leaves, and
the fall was over.
Alice was not a bit hurt, and she jumped up on to her feet in a moment:
she looked up, but it was all dark overhead; before her was another
long passage, and the White Rabbit was still in sight, hurrying down
it. There was not a moment to be lost: away went Alice like the wind,
and was just in time to hear it say, as it turned a corner, “Oh my ears
and whiskers, how late it’s getting!” She was close behind it when she
turned the corner, but the Rabbit was no longer to be seen: she found
herself in a long, low hall, which was lit up by a row of lamps hanging
from the roof.
There were doors all round the hall, but they were all locked; and when
Alice had been all the way down one side and up the other, trying every
door, she walked sadly down the middle, wondering how she was ever to
get out again.
Suddenly she came upon a little three-legged table, all made of solid
glass; there was nothing on it except a tiny golden key, and Alice’s
first thought was that it might belong to one of the doors of the hall;
but, alas! either the locks were too large, or the key was too small,
but at any rate it would not open any of them. However, on the second
time round, she came upon a low curtain she had not noticed before, and
behind it was a little door about fifteen inches high: she tried the
little golden key in the lock, and to her great delight it fitted!
Alice opened the door and found that it led into a small passage, not
much larger than a rat-hole: she knelt down and looked along the
passage into the loveliest garden you ever saw. How she longed to get
out of that dark hall, and wander about among those beds of bright
flowers and those cool fountains, but she could not even get her head
through the doorway; “and even if my head would go through,” thought
poor Alice, “it would be of very little use without my shoulders. Oh,
how I wish I could shut up like a telescope! I think I could, if I only
knew how to begin.” For, you see, so many out-of-the-way things had
happened lately, that Alice had begun to think that very few things
indeed were really impossible.
There seemed to be no use in waiting by the little door, so she went
back to the table, half hoping she might find another key on it, or at
any rate a book of rules for shutting people up like telescopes: this
time she found a little bottle on it, (“which certainly was not here
before,” said Alice,) and round the neck of the bottle was a paper
label, with the words “DRINK ME,” beautifully printed on it in large
letters.
It was all very well to say “Drink me,” but the wise little Alice was
not going to do _that_ in a hurry. “No, I’ll look first,” she said,
“and see whether it’s marked ‘_poison_’ or not”; for she had read
several nice little histories about children who had got burnt, and
eaten up by wild beasts and other unpleasant things, all because they
_would_ not remember the simple rules their friends had taught them:
such as, that a red-hot poker will burn you if you hold it too long;
and that if you cut your finger _very_ deeply with a knife, it usually
bleeds; and she had never forgotten that, if you drink much from a
bottle marked “poison,” it is almost certain to disagree with you,
sooner or later.
However, this bottle was _not_ marked “poison,” so Alice ventured to
taste it, and finding it very nice, (it had, in fact, a sort of mixed
flavour of cherry-tart, custard, pine-apple, roast turkey, toffee, and
hot buttered toast,) she very soon finished it off.
* * * * * * *
* * * * * *
* * * * * * *
“What a curious feeling!” said Alice; “I must be shutting up like a
telescope.”
And so it was indeed: she was now only ten inches high, and her face
brightened up at the thought that she was now the right size for going
through the little door into that lovely garden. First, however, she
waited for a few minutes to see if she was going to shrink any further:
she felt a little nervous about this; “for it might end, you know,”
said Alice to herself, “in my going out altogether, like a candle. I
wonder what I should be like then?” And she tried to fancy what the
flame of a candle is like after the candle is blown out, for she could
not remember ever having seen such a thing.
After a while, finding that nothing more happened, she decided on going
into the garden at once; but, alas for poor Alice! when she got to the
door, she found she had forgotten the little golden key, and when she
went back to the table for it, she found she could not possibly reach
it: she could see it quite plainly through the glass, and she tried her
best to climb up one of the legs of the table, but it was too slippery;
and when she had tired herself out with trying, the poor little thing
sat down and cried.
“Come, there’s no use in crying like that!” said Alice to herself,
rather sharply; “I advise you to leave off this minute!” She generally
gave herself very good advice, (though she very seldom followed it),
and sometimes she scolded herself so severely as to bring tears into
her eyes; and once she remembered trying to box her own ears for having
cheated herself in a game of croquet she was playing against herself,
for this curious child was very fond of pretending to be two people.
“But it’s no use now,” thought poor Alice, “to pretend to be two
people! Why, there’s hardly enough of me left to make _one_ respectable
person!”
Soon her eye fell on a little glass box that was lying under the table:
she opened it, and found in it a very small cake, on which the words
“EAT ME” were beautifully marked in currants. “Well, I’ll eat it,” said
Alice, “and if it makes me grow larger, I can reach the key; and if it
makes me grow smaller, I can creep under the door; so either way I’ll
get into the garden, and I don’t care which happens!”
She ate a little bit, and said anxiously to herself, “Which way? Which
way?”, holding her hand on the top of her head to feel which way it was
growing, and she was quite surprised to find that she remained the same
size: to be sure, this generally happens when one eats cake, but Alice
had got so much into the way of expecting nothing but out-of-the-way
things to happen, that it seemed quite dull and stupid for life to go
on in the common way.
So she set to work, and very soon finished off the cake.
* * * * * * *
* * * * * *
* * * * * * *
CHAPTER II.
The Pool of Tears
“Curiouser and curiouser!” cried Alice (she was so much surprised, that
for the moment she quite forgot how to speak good English); “now I’m
opening out like the largest telescope that ever was! Good-bye, feet!”
(for when she looked down at her feet, they seemed to be almost out of
sight, they were getting so far off). “Oh, my poor little feet, I
wonder who will put on your shoes and stockings for you now, dears? I’m
sure _I_ shan’t be able! I shall be a great deal too far off to trouble
myself about you: you must manage the best way you can;—but I must be
kind to them,” thought Alice, “or perhaps they won’t walk the way I
want to go! Let me see: I’ll give them a new pair of boots every
Christmas.”
And she went on planning to herself how she would manage it. “They must
go by the carrier,” she thought; “and how funny it’ll seem, sending
presents to one’s own feet! And how odd the directions will look!
_Alice’s Right Foot, Esq., Hearthrug, near the Fender,_ (_with
Alice’s love_).
Oh dear, what nonsense I’m talking!”
Just then her head struck against the roof of the hall: in fact she was
now more than nine feet high, and she at once took up the little golden
key and hurried off to the garden door.
Poor Alice! It was as much as she could do, lying down on one side, to
look through into the garden with one eye; but to get through was more
hopeless than ever: she sat down and began to cry again.
“You ought to be ashamed of yourself,” said Alice, “a great girl like
you,” (she might well say this), “to go on crying in this way! Stop
this moment, I tell you!” But she went on all the same, shedding
gallons of tears, until there was a large pool all round her, about
four inches deep and reaching half down the hall.
After a time she heard a little pattering of feet in the distance, and
she hastily dried her eyes to see what was coming. It was the White
Rabbit returning, splendidly dressed, with a pair of white kid gloves
in one hand and a large fan in the other: he came trotting along in a
great hurry, muttering to himself as he came, “Oh! the Duchess, the
Duchess! Oh! won’t she be savage if I’ve kept her waiting!” Alice felt
so desperate that she was ready to ask help of any one; so, when the
Rabbit came near her, she began, in a low, timid voice, “If you please,
sir—” The Rabbit started violently, dropped the white kid gloves and
the fan, and skurried away into the darkness as hard as he could go.
Alice took up the fan and gloves, and, as the hall was very hot, she
kept fanning herself all the time she went on talking: “Dear, dear! How
queer everything is to-day! And yesterday things went on just as usual.
I wonder if I’ve been changed in the night? Let me think: was I the
same when I got up this morning? I almost think I can remember feeling
a little different. But if I’m not the same, the next question is, Who
in the world am I? Ah, _that’s_ the great puzzle!” And she began
thinking over all the children she knew that were of the same age as
herself, to see if she could have been changed for any of them.
“I’m sure I’m not Ada,” she said, “for her hair goes in such long
ringlets, and mine doesn’t go in ringlets at all; and I’m sure I can’t
be Mabel, for I know all sorts of things, and she, oh! she knows such a
very little! Besides, _she’s_ she, and _I’m_ I, and—oh dear, how
puzzling it all is! I’ll try if I know all the things I used to know.
Let me see: four times five is twelve, and four times six is thirteen,
and four times seven is—oh dear! I shall never get to twenty at that
rate! However, the Multiplication Table doesn’t signify: let’s try
Geography. London is the capital of Paris, and Paris is the capital of
Rome, and Rome—no, _that’s_ all wrong, I’m certain! I must have been
changed for Mabel! I’ll try and say ‘_How doth the little_—’” and she
crossed her hands on her lap as if she were saying lessons, and began
to repeat it, but her voice sounded hoarse and strange, and the words
did not come the same as they used to do:—
“How doth the little crocodile
Improve his shining tail,
And pour the waters of the Nile
On every golden scale!
“How cheerfully he seems to grin,
How neatly spread his claws,
And welcome little fishes in
With gently smiling jaws!”
“I’m sure those are not the right words,” said poor Alice, and her eyes
filled with tears again as she went on, “I must be Mabel after all, and
I shall have to go and live in that poky little house, and have next to
no toys to play with, and oh! ever so many lessons to learn! No, I’ve
made up my mind about it; if I’m Mabel, I’ll stay down here! It’ll be
no use their putting their heads down and saying ‘Come up again, dear!’
I shall only look up and say ‘Who am I then? Tell me that first, and
then, if I like being that person, I’ll come up: if not, I’ll stay down
here till I’m somebody else’—but, oh dear!” cried Alice, with a sudden
burst of tears, “I do wish they _would_ put their heads down! I am so
_very_ tired of being all alone here!”
As she said this she looked down at her hands, and was surprised to see
that she had put on one of the Rabbit’s little white kid gloves while
she was talking. “How _can_ I have done that?” she thought. “I must be
growing small again.” She got up and went to the table to measure
herself by it, and found that, as nearly as she could guess, she was
now about two feet high, and was going on shrinking rapidly: she soon
found out that the cause of this was the fan she was holding, and she
dropped it hastily, just in time to avoid shrinking away altogether.
“That _was_ a narrow escape!” said Alice, a good deal frightened at the
sudden change, but very glad to find herself still in existence; “and
now for the garden!” and she ran with all speed back to the little
door: but, alas! the little door was shut again, and the little golden
key was lying on the glass table as before, “and things are worse than
ever,” thought the poor child, “for I never was so small as this
before, never! And I declare it’s too bad, that it is!”
As she said these words her foot slipped, and in another moment,
splash! she was up to her chin in salt water. Her first idea was that
she had somehow fallen into the sea, “and in that case I can go back by
railway,” she said to herself. (Alice had been to the seaside once in
her life, and had come to the general conclusion, that wherever you go
to on the English coast you find a number of bathing machines in the
sea, some children digging in the sand with wooden spades, then a row
of lodging houses, and behind them a railway station.) However, she
soon made out that she was in the pool of tears which she had wept when
she was nine feet high.
“I wish I hadn’t cried so much!” said Alice, as she swam about, trying
to find her way out. “I shall be punished for it now, I suppose, by
being drowned in my own tears! That _will_ be a queer thing, to be
sure! However, everything is queer to-day.”
Just then she heard something splashing about in the pool a little way
off, and she swam nearer to make out what it was: at first she thought
it must be a walrus or hippopotamus, but then she remembered how small
she was now, and she soon made out that it was only a mouse that had
slipped in like herself.
“Would it be of any use, now,” thought Alice, “to speak to this mouse?
Everything is so out-of-the-way down here, that I should think very
likely it can talk: at any rate, there’s no harm in trying.” So she
began: “O Mouse, do you know the way out of this pool? I am very tired
of swimming about here, O Mouse!” (Alice thought this must be the right
way of speaking to a mouse: she had never done such a thing before, but
she remembered having seen in her brother’s Latin Grammar, “A mouse—of
a mouse—to a mouse—a mouse—O mouse!”) The Mouse looked at her rather
inquisitively, and seemed to her to wink with one of its little eyes,
but it said nothing.
“Perhaps it doesn’t understand English,” thought Alice; “I daresay it’s
a French mouse, come over with William the Conqueror.” (For, with all
her knowledge of history, Alice had no very clear notion how long ago
anything had happened.) So she began again: “Où est ma chatte?” which
was the first sentence in her French lesson-book. The Mouse gave a
sudden leap out of the water, and seemed to quiver all over with
fright. “Oh, I beg your pardon!” cried Alice hastily, afraid that she
had hurt the poor animal’s feelings. “I quite forgot you didn’t like
cats.”
“Not like cats!” cried the Mouse, in a shrill, passionate voice. “Would
_you_ like cats if you were me?”
“Well, perhaps not,” said Alice in a soothing tone: “don’t be angry
about it. And yet I wish I could show you our cat Dinah: I think you’d
take a fancy to cats if you could only see her. She is such a dear
quiet thing,” Alice went on, half to herself, as she swam lazily about
in the pool, “and she sits purring so nicely by the fire, licking her
paws and washing her face—and she is such a nice soft thing to
nurse—and she’s such a capital one for catching mice—oh, I beg your
pardon!” cried Alice again, for this time the Mouse was bristling all
over, and she felt certain it must be really offended. “We won’t talk
about her any more if you’d rather not.”
“We indeed!” cried the Mouse, who was trembling down to the end of his
tail. “As if _I_ would talk on such a subject! Our family always
_hated_ cats: nasty, low, vulgar things! Don’t let me hear the name
again!”
“I won’t indeed!” said Alice, in a great hurry to change the subject of
conversation. “Are you—are you fond—of—of dogs?” The Mouse did not
answer, so Alice went on eagerly: “There is such a nice little dog near
our house I should like to show you! A little bright-eyed terrier, you
know, with oh, such long curly brown hair! And it’ll fetch things when
you throw them, and it’ll sit up and beg for its dinner, and all sorts
of things—I can’t remember half of them—and it belongs to a farmer, you
know, and he says it’s so useful, it’s worth a hundred pounds! He says
it kills all the rats and—oh dear!” cried Alice in a sorrowful tone,
“I’m afraid I’ve offended it again!” For the Mouse was swimming away
from her as hard as it could go, and making quite a commotion in the
pool as it went.
So she called softly after it, “Mouse dear! Do come back again, and we
won’t talk about cats or dogs either, if you don’t like them!” When the
Mouse heard this, it turned round and swam slowly back to her: its face
was quite pale (with passion, Alice thought), and it said in a low
trembling voice, “Let us get to the shore, and then I’ll tell you my
history, and you’ll understand why it is I hate cats and dogs.”
It was high time to go, for the pool was getting quite crowded with the
birds and animals that had fallen into it: there were a Duck and a
Dodo, a Lory and an Eaglet, and several other curious creatures. Alice
led the way, and the whole party swam to the shore.
CHAPTER III.
A Caucus-Race and a Long Tale
They were indeed a queer-looking party that assembled on the bank—the
birds with draggled feathers, the animals with their fur clinging close
to them, and all dripping wet, cross, and uncomfortable.
The first question of course was, how to get dry again: they had a
consultation about this, and after a few minutes it seemed quite
natural to Alice to find herself talking familiarly with them, as if
she had known them all her life. Indeed, she had quite a long argument
with the Lory, who at last turned sulky, and would only say, “I am
older than you, and must know better;” and this Alice would not allow
without knowing how old it was, and, as the Lory positively refused to
tell its age, there was no more to be said.
At last the Mouse, who seemed to be a person of authority among them,
called out, “Sit down, all of you, and listen to me! _I’ll_ soon make
you dry enough!” They all sat down at once, in a large ring, with the
Mouse in the middle. Alice kept her eyes anxiously fixed on it, for she
felt sure she would catch a bad cold if she did not get dry very soon.
“Ahem!” said the Mouse with an important air, “are you all ready? This
is the driest thing I know. Silence all round, if you please! ‘William
the Conqueror, whose cause was favoured by the pope, was soon submitted
to by the English, who wanted leaders, and had been of late much
accustomed to usurpation and conquest. Edwin and Morcar, the earls of
Mercia and Northumbria—’”
“Ugh!” said the Lory, with a shiver.
“I beg your pardon!” said the Mouse, frowning, but very politely: “Did
you speak?”
“Not I!” said the Lory hastily.
“I thought you did,” said the Mouse. “—I proceed. ‘Edwin and Morcar,
the earls of Mercia and Northumbria, declared for him: and even
Stigand, the patriotic archbishop of Canterbury, found it advisable—’”
“Found _what_?” said the Duck.
“Found _it_,” the Mouse replied rather crossly: “of course you know
what ‘it’ means.”
“I know what ‘it’ means well enough, when _I_ find a thing,” said the
Duck: “it’s generally a frog or a worm. The question is, what did the
archbishop find?”
The Mouse did not notice this question, but hurriedly went on, “‘—found
it advisable to go with Edgar Atheling to meet William and offer him
the crown. William’s conduct at first was moderate. But the insolence
of his Normans—’ How are you getting on now, my dear?” it continued,
turning to Alice as it spoke.
“As wet as ever,” said Alice in a melancholy tone: “it doesn’t seem to
dry me at all.”
“In that case,” said the Dodo solemnly, rising to its feet, “I move
that the meeting adjourn, for the immediate adoption of more energetic
remedies—”
“Speak English!” said the Eaglet. “I don’t know the meaning of half
those long words, and, what’s more, I don’t believe you do either!” And
the Eaglet bent down its head to hide a smile: some of the other birds
tittered audibly.
“What I was going to say,” said the Dodo in an offended tone, “was,
that the best thing to get us dry would be a Caucus-race.”
“What _is_ a Caucus-race?” said Alice; not that she wanted much to
know, but the Dodo had paused as if it thought that _somebody_ ought to
speak, and no one else seemed inclined to say anything.
“Why,” said the Dodo, “the best way to explain it is to do it.” (And,
as you might like to try the thing yourself, some winter day, I will
tell you how the Dodo managed it.)
First it marked out a race-course, in a sort of circle, (“the exact
shape doesn’t matter,” it said,) and then all the party were placed
along the course, here and there. There was no “One, two, three, and
away,” but they began running when they liked, and left off when they
liked, so that it was not easy to know when the race was over. However,
when they had been running half an hour or so, and were quite dry
again, the Dodo suddenly called out “The race is over!” and they all
crowded round it, panting, and asking, “But who has won?”
This question the Dodo could not answer without a great deal of
thought, and it sat for a long time with one finger pressed upon its
forehead (the position in which you usually see Shakespeare, in the
pictures of him), while the rest waited in silence. At last the Dodo
said, “_Everybody_ has won, and all must have prizes.”
“But who is to give the prizes?” quite a chorus of voices asked.
“Why, _she_, of course,” said the Dodo, pointing to Alice with one
finger; and the whole party at once crowded round her, calling out in a
confused way, “Prizes! Prizes!”
Alice had no idea what to do, and in despair she put her hand in her
pocket, and pulled out a box of comfits, (luckily the salt water had
not got into it), and handed them round as prizes. There was exactly
one a-piece, all round.
“But she must have a prize herself, you know,” said the Mouse.
“Of course,” the Dodo replied very gravely. “What else have you got in
your pocket?” he went on, turning to Alice.
“Only a thimble,” said Alice sadly.
“Hand it over here,” said the Dodo.
Then they all crowded round her once more, while the Dodo solemnly
presented the thimble, saying “We beg your acceptance of this elegant
thimble;” and, when it had finished this short speech, they all
cheered.
Alice thought the whole thing very absurd, but they all looked so grave
that she did not dare to laugh; and, as she could not think of anything
to say, she simply bowed, and took the thimble, looking as solemn as
she could.
The next thing was to eat the comfits: this caused some noise and
confusion, as the large birds complained that they could not taste
theirs, and the small ones choked and had to be patted on the back.
However, it was over at last, and they sat down again in a ring, and
begged the Mouse to tell them something more.
“You promised to tell me your history, you know,” said Alice, “and why
it is you hate—C and D,” she added in a whisper, half afraid that it
would be offended again.
“Mine is a long and a sad tale!” said the Mouse, turning to Alice, and
sighing.
“It _is_ a long tail, certainly,” said Alice, looking down with wonder
at the Mouse’s tail; “but why do you call it sad?” And she kept on
puzzling about it while the Mouse was speaking, so that her idea of the
tale was something like this:—
“Fury said to a mouse, That he met in the house, ‘Let us both
go to law: _I_ will prosecute _you_.—Come, I’ll take no
denial; We must have a trial: For really this morning I’ve
nothing to do.’ Said the mouse to the cur, ‘Such a trial, dear
sir, With no jury or judge, would be wasting our breath.’
‘I’ll be judge, I’ll be jury,’ Said cunning old Fury: ‘I’ll
try the whole cause, and condemn you to death.’”
“You are not attending!” said the Mouse to Alice severely. “What are
you thinking of?”
“I beg your pardon,” said Alice very humbly: “you had got to the fifth
bend, I think?”
“I had _not!_” cried the Mouse, sharply and very angrily.
“A knot!” said Alice, always ready to make herself useful, and looking
anxiously about her. “Oh, do let me help to undo it!”
“I shall do nothing of the sort,” said the Mouse, getting up and
walking away. “You insult me by talking such nonsense!”
“I didn’t mean it!” pleaded poor Alice. “But you’re so easily offended,
you know!”
The Mouse only growled in reply.
“Please come back and finish your story!” Alice called after it; and
the others all joined in chorus, “Yes, please do!” but the Mouse only
shook its head impatiently, and walked a little quicker.
“What a pity it wouldn’t stay!” sighed the Lory, as soon as it was
quite out of sight; and an old Crab took the opportunity of saying to
her daughter “Ah, my dear! Let this be a lesson to you never to lose
_your_ temper!” “Hold your tongue, Ma!” said the young Crab, a little
snappishly. “You’re enough to try the patience of an oyster!”
“I wish I had our Dinah here, I know I do!” said Alice aloud,
addressing nobody in particular. “She’d soon fetch it back!”
“And who is Dinah, if I might venture to ask the question?” said the
Lory.
Alice replied eagerly, for she was always ready to talk about her pet:
“Dinah’s our cat. And she’s such a capital one for catching mice you
can’t think! And oh, I wish you could see her after the birds! Why,
she’ll eat a little bird as soon as look at it!”
This speech caused a remarkable sensation among the party. Some of the
birds hurried off at once: one old Magpie began wrapping itself up very
carefully, remarking, “I really must be getting home; the night-air
doesn’t suit my throat!” and a Canary called out in a trembling voice
to its children, “Come away, my dears! It’s high time you were all in
bed!” On various pretexts they all moved off, and Alice was soon left
alone.
“I wish I hadn’t mentioned Dinah!” she said to herself in a melancholy
tone. “Nobody seems to like her, down here, and I’m sure she’s the best
cat in the world! Oh, my dear Dinah! I wonder if I shall ever see you
any more!” And here poor Alice began to cry again, for she felt very
lonely and low-spirited. In a little while, however, she again heard a
little pattering of footsteps in the distance, and she looked up
eagerly, half hoping that the Mouse had changed his mind, and was
coming back to finish his story.
CHAPTER IV.
The Rabbit Sends in a Little Bill
It was the White Rabbit, trotting slowly back again, and looking
anxiously about as it went, as if it had lost something; and she heard
it muttering to itself “The Duchess! The Duchess! Oh my dear paws! Oh
my fur and whiskers! She’ll get me executed, as sure as ferrets are
ferrets! Where _can_ I have dropped them, I wonder?” Alice guessed in a
moment that it was looking for the fan and the pair of white kid
gloves, and she very good-naturedly began hunting about for them, but
they were nowhere to be seen—everything seemed to have changed since
her swim in the pool, and the great hall, with the glass table and the
little door, had vanished completely.
Very soon the Rabbit noticed Alice, as she went hunting about, and
called out to her in an angry tone, “Why, Mary Ann, what _are_ you
doing out here? Run home this moment, and fetch me a pair of gloves and
a fan! Quick, now!” And Alice was so much frightened that she ran off
at once in the direction it pointed to, without trying to explain the
mistake it had made.
“He took me for his housemaid,” she said to herself as she ran. “How
surprised he’ll be when he finds out who I am! But I’d better take him
his fan and gloves—that is, if I can find them.” As she said this, she
came upon a neat little house, on the door of which was a bright brass
plate with the name “W. RABBIT,” engraved upon it. She went in without
knocking, and hurried upstairs, in great fear lest she should meet the
real Mary Ann, and be turned out of the house before she had found the
fan and gloves.
“How queer it seems,” Alice said to herself, “to be going messages for
a rabbit! I suppose Dinah’ll be sending me on messages next!” And she
began fancying the sort of thing that would happen: “‘Miss Alice! Come
here directly, and get ready for your walk!’ ‘Coming in a minute,
nurse! But I’ve got to see that the mouse doesn’t get out.’ Only I
don’t think,” Alice went on, “that they’d let Dinah stop in the house
if it began ordering people about like that!”
By this time she had found her way into a tidy little room with a table
in the window, and on it (as she had hoped) a fan and two or three
pairs of tiny white kid gloves: she took up the fan and a pair of the
gloves, and was just going to leave the room, when her eye fell upon a
little bottle that stood near the looking-glass. There was no label
this time with the words “DRINK ME,” but nevertheless she uncorked it
and put it to her lips. “I know _something_ interesting is sure to
happen,” she said to herself, “whenever I eat or drink anything; so
I’ll just see what this bottle does. I do hope it’ll make me grow large
again, for really I’m quite tired of being such a tiny little thing!”
It did so indeed, and much sooner than she had expected: before she had
drunk half the bottle, she found her head pressing against the ceiling,
and had to stoop to save her neck from being broken. She hastily put
down the bottle, saying to herself “That’s quite enough—I hope I shan’t
grow any more—As it is, I can’t get out at the door—I do wish I hadn’t
drunk quite so much!”
Alas! it was too late to wish that! She went on growing, and growing,
and very soon had to kneel down on the floor: in another minute there
was not even room for this, and she tried the effect of lying down with
one elbow against the door, and the other arm curled round her head.
Still she went on growing, and, as a last resource, she put one arm out
of the window, and one foot up the chimney, and said to herself “Now I
can do no more, whatever happens. What _will_ become of me?”
Luckily for Alice, the little magic bottle had now had its full effect,
and she grew no larger: still it was very uncomfortable, and, as there
seemed to be no sort of chance of her ever getting out of the room
again, no wonder she felt unhappy.
“It was much pleasanter at home,” thought poor Alice, “when one wasn’t
always growing larger and smaller, and being ordered about by mice and
rabbits. I almost wish I hadn’t gone down that rabbit-hole—and yet—and
yet—it’s rather curious, you know, this sort of life! I do wonder what
_can_ have happened to me! When I used to read fairy-tales, I fancied
that kind of thing never happened, and now here I am in the middle of
one! There ought to be a book written about me, that there ought! And
when I grow up, I’ll write one—but I’m grown up now,” she added in a
sorrowful tone; “at least there’s no room to grow up any more _here_.”
“But then,” thought Alice, “shall I _never_ get any older than I am
now? That’ll be a comfort, one way—never to be an old woman—but
then—always to have lessons to learn! Oh, I shouldn’t like _that!_”
“Oh, you foolish Alice!” she answered herself. “How can you learn
lessons in here? Why, there’s hardly room for _you_, and no room at all
for any lesson-books!”
And so she went on, taking first one side and then the other, and
making quite a conversation of it altogether; but after a few minutes
she heard a voice outside, and stopped to listen.
“Mary Ann! Mary Ann!” said the voice. “Fetch me my gloves this moment!”
Then came a little pattering of feet on the stairs. Alice knew it was
the Rabbit coming to look for her, and she trembled till she shook the
house, quite forgetting that she was now about a thousand times as
large as the Rabbit, and had no reason to be afraid of it.
Presently the Rabbit came up to the door, and tried to open it; but, as
the door opened inwards, and Alice’s elbow was pressed hard against it,
that attempt proved a failure. Alice heard it say to itself “Then I’ll
go round and get in at the window.”
“_That_ you won’t!” thought Alice, and, after waiting till she fancied
she heard the Rabbit just under the window, she suddenly spread out her
hand, and made a snatch in the air. She did not get hold of anything,
but she heard a little shriek and a fall, and a crash of broken glass,
from which she concluded that it was just possible it had fallen into a
cucumber-frame, or something of the sort.
Next came an angry voice—the Rabbit’s—“Pat! Pat! Where are you?” And
then a voice she had never heard before, “Sure then I’m here! Digging
for apples, yer honour!”
“Digging for apples, indeed!” said the Rabbit angrily. “Here! Come and
help me out of _this!_” (Sounds of more broken glass.)
“Now tell me, Pat, what’s that in the window?”
“Sure, it’s an arm, yer honour!” (He pronounced it “arrum.”)
“An arm, you goose! Who ever saw one that size? Why, it fills the whole
window!”
“Sure, it does, yer honour: but it’s an arm for all that.”
“Well, it’s got no business there, at any rate: go and take it away!”
There was a long silence after this, and Alice could only hear whispers
now and then; such as, “Sure, I don’t like it, yer honour, at all, at
all!” “Do as I tell you, you coward!” and at last she spread out her
hand again, and made another snatch in the air. This time there were
_two_ little shrieks, and more sounds of broken glass. “What a number
of cucumber-frames there must be!” thought Alice. “I wonder what
they’ll do next! As for pulling me out of the window, I only wish they
_could!_ I’m sure _I_ don’t want to stay in here any longer!”
She waited for some time without hearing anything more: at last came a
rumbling of little cartwheels, and the sound of a good many voices all
talking together: she made out the words: “Where’s the other
ladder?—Why, I hadn’t to bring but one; Bill’s got the other—Bill!
fetch it here, lad!—Here, put ’em up at this corner—No, tie ’em
together first—they don’t reach half high enough yet—Oh! they’ll do
well enough; don’t be particular—Here, Bill! catch hold of this
rope—Will the roof bear?—Mind that loose slate—Oh, it’s coming down!
Heads below!” (a loud crash)—“Now, who did that?—It was Bill, I
fancy—Who’s to go down the chimney?—Nay, _I_ shan’t! _You_ do
it!—_That_ I won’t, then!—Bill’s to go down—Here, Bill! the master says
you’re to go down the chimney!”
“Oh! So Bill’s got to come down the chimney, has he?” said Alice to
herself. “Shy, they seem to put everything upon Bill! I wouldn’t be in
Bill’s place for a good deal: this fireplace is narrow, to be sure; but
I _think_ I can kick a little!”
She drew her foot as far down the chimney as she could, and waited till
she heard a little animal (she couldn’t guess of what sort it was)
scratching and scrambling about in the chimney close above her: then,
saying to herself “This is Bill,” she gave one sharp kick, and waited
to see what would happen next.
The first thing she heard was a general chorus of “There goes Bill!”
then the Rabbit’s voice along—“Catch him, you by the hedge!” then
silence, and then another confusion of voices—“Hold up his head—Brandy
now—Don’t choke him—How was it, old fellow? What happened to you? Tell
us all about it!”
Last came a little feeble, squeaking voice, (“That’s Bill,” thought
Alice,) “Well, I hardly know—No more, thank ye; I’m better now—but I’m
a deal too flustered to tell you—all I know is, something comes at me
like a Jack-in-the-box, and up I goes like a sky-rocket!”
“So you did, old fellow!” said the others.
“We must burn the house down!” said the Rabbit’s voice; and Alice
called out as loud as she could, “If you do, I’ll set Dinah at you!”
There was a dead silence instantly, and Alice thought to herself, “I
wonder what they _will_ do next! If they had any sense, they’d take the
roof off.” After a minute or two, they began moving about again, and
Alice heard the Rabbit say, “A barrowful will do, to begin with.”
“A barrowful of _what?_” thought Alice; but she had not long to doubt,
for the next moment a shower of little pebbles came rattling in at the
window, and some of them hit her in the face. “I’ll put a stop to
this,” she said to herself, and shouted out, “You’d better not do that
again!” which produced another dead silence.
Alice noticed with some surprise that the pebbles were all turning into
little cakes as they lay on the floor, and a bright idea came into her
head. “If I eat one of these cakes,” she thought, “it’s sure to make
_some_ change in my size; and as it can’t possibly make me larger, it
must make me smaller, I suppose.”
So she swallowed one of the cakes, and was delighted to find that she
began shrinking directly. As soon as she was small enough to get
through the door, she ran out of the house, and found quite a crowd of
little animals and birds waiting outside. The poor little Lizard, Bill,
was in the middle, being held up by two guinea-pigs, who were giving it
something out of a bottle. They all made a rush at Alice the moment she
appeared; but she ran off as hard as she could, and soon found herself
safe in a thick wood.
“The first thing I’ve got to do,” said Alice to herself, as she
wandered about in the wood, “is to grow to my right size again; and the
second thing is to find my way into that lovely garden. I think that
will be the best plan.”
It sounded an excellent plan, no doubt, and very neatly and simply
arranged; the only difficulty was, that she had not the smallest idea
how to set about it; and while she was peering about anxiously among
the trees, a little sharp bark just over her head made her look up in a
great hurry.
An enormous puppy was looking down at her with large round eyes, and
feebly stretching out one paw, trying to touch her. “Poor little
thing!” said Alice, in a coaxing tone, and she tried hard to whistle to
it; but she was terribly frightened all the time at the thought that it
might be hungry, in which case it would be very likely to eat her up in
spite of all her coaxing.
Hardly knowing what she did, she picked up a little bit of stick, and
held it out to the puppy; whereupon the puppy jumped into the air off
all its feet at once, with a yelp of delight, and rushed at the stick,
and made believe to worry it; then Alice dodged behind a great thistle,
to keep herself from being run over; and the moment she appeared on the
other side, the puppy made another rush at the stick, and tumbled head
over heels in its hurry to get hold of it; then Alice, thinking it was
very like having a game of play with a cart-horse, and expecting every
moment to be trampled under its feet, ran round the thistle again; then
the puppy began a series of short charges at the stick, running a very
little way forwards each time and a long way back, and barking hoarsely
all the while, till at last it sat down a good way off, panting, with
its tongue hanging out of its mouth, and its great eyes half shut.
This seemed to Alice a good opportunity for making her escape; so she
set off at once, and ran till she was quite tired and out of breath,
and till the puppy’s bark sounded quite faint in the distance.
“And yet what a dear little puppy it was!” said Alice, as she leant
against a buttercup to rest herself, and fanned herself with one of the
leaves: “I should have liked teaching it tricks very much, if—if I’d
only been the right size to do it! Oh dear! I’d nearly forgotten that
I’ve got to grow up again! Let me see—how _is_ it to be managed? I
suppose I ought to eat or drink something or other; but the great
question is, what?”
The great question certainly was, what? Alice looked all round her at
the flowers and the blades of grass, but she did not see anything that
looked like the right thing to eat or drink under the circumstances.
There was a large mushroom growing near her, about the same height as
herself; and when she had looked under it, and on both sides of it, and
behind it, it occurred to her that she might as well look and see what
was on the top of it.
She stretched herself up on tiptoe, and peeped over the edge of the
mushroom, and her eyes immediately met those of a large blue
caterpillar, that was sitting on the top with its arms folded, quietly
smoking a long hookah, and taking not the smallest notice of her or of
anything else.
CHAPTER V.
Advice from a Caterpillar
The Caterpillar and Alice looked at each other for some time in
silence: at last the Caterpillar took the hookah out of its mouth, and
addressed her in a languid, sleepy voice.
“Who are _you?_” said the Caterpillar.
This was not an encouraging opening for a conversation. Alice replied,
rather shyly, “I—I hardly know, sir, just at present—at least I know
who I _was_ when I got up this morning, but I think I must have been
changed several times since then.”
“What do you mean by that?” said the Caterpillar sternly. “Explain
yourself!”
“I can’t explain _myself_, I’m afraid, sir,” said Alice, “because I’m
not myself, you see.”
“I don’t see,” said the Caterpillar.
“I’m afraid I can’t put it more clearly,” Alice replied very politely,
“for I can’t understand it myself to begin with; and being so many
different sizes in a day is very confusing.”
“It isn’t,” said the Caterpillar.
“Well, perhaps you haven’t found it so yet,” said Alice; “but when you
have to turn into a chrysalis—you will some day, you know—and then
after that into a butterfly, I should think you’ll feel it a little
queer, won’t you?”
“Not a bit,” said the Caterpillar.
“Well, perhaps your feelings may be different,” said Alice; “all I know
is, it would feel very queer to _me_.”
“You!” said the Caterpillar contemptuously. “Who are _you?_”
Which brought them back again to the beginning of the conversation.
Alice felt a little irritated at the Caterpillar’s making such _very_
short remarks, and she drew herself up and said, very gravely, “I
think, you ought to tell me who _you_ are, first.”
“Why?” said the Caterpillar.
Here was another puzzling question; and as Alice could not think of any
good reason, and as the Caterpillar seemed to be in a _very_ unpleasant
state of mind, she turned away.
“Come back!” the Caterpillar called after her. “I’ve something
important to say!”
This sounded promising, certainly: Alice turned and came back again.
“Keep your temper,” said the Caterpillar.
“Is that all?” said Alice, swallowing down her anger as well as she
could.
“No,” said the Caterpillar.
Alice thought she might as well wait, as she had nothing else to do,
and perhaps after all it might tell her something worth hearing. For
some minutes it puffed away without speaking, but at last it unfolded
its arms, took the hookah out of its mouth again, and said, “So you
think you’re changed, do you?”
“I’m afraid I am, sir,” said Alice; “I can’t remember things as I
used—and I don’t keep the same size for ten minutes together!”
“Can’t remember _what_ things?” said the Caterpillar.
“Well, I’ve tried to say “How doth the little busy bee,” but it all
came different!” Alice replied in a very melancholy voice.
“Repeat, “_You are old, Father William_,’” said the Caterpillar.
Alice folded her hands, and began:—
“You are old, Father William,” the young man said,
“And your hair has become very white;
And yet you incessantly stand on your head—
Do you think, at your age, it is right?”
“In my youth,” Father William replied to his son,
“I feared it might injure the brain;
But, now that I’m perfectly sure I have none,
Why, I do it again and again.”
“You are old,” said the youth, “as I mentioned before,
And have grown most uncommonly fat;
Yet you turned a back-somersault in at the door—
Pray, what is the reason of that?”
“In my youth,” said the sage, as he shook his grey locks,
“I kept all my limbs very supple
By the use of this ointment—one shilling the box—
Allow me to sell you a couple?”
“You are old,” said the youth, “and your jaws are too weak
For anything tougher than suet;
Yet you finished the goose, with the bones and the beak—
Pray, how did you manage to do it?”
“In my youth,” said his father, “I took to the law,
And argued each case with my wife;
And the muscular strength, which it gave to my jaw,
Has lasted the rest of my life.”
“You are old,” said the youth, “one would hardly suppose
That your eye was as steady as ever;
Yet you balanced an eel on the end of your nose—
What made you so awfully clever?”
“I have answered three questions, and that is enough,”
Said his father; “don’t give yourself airs!
Do you think I can listen all day to such stuff?
Be off, or I’ll kick you down stairs!”
“That is not said right,” said the Caterpillar.
“Not _quite_ right, I’m afraid,” said Alice, timidly; “some of the
words have got altered.”
“It is wrong from beginning to end,” said the Caterpillar decidedly,
and there was silence for some minutes.
The Caterpillar was the first to speak.
“What size do you want to be?” it asked.
“Oh, I’m not particular as to size,” Alice hastily replied; “only one
doesn’t like changing so often, you know.”
“I _don’t_ know,” said the Caterpillar.
Alice said nothing: she had never been so much contradicted in her life
before, and she felt that she was losing her temper.
“Are you content now?” said the Caterpillar.
“Well, I should like to be a _little_ larger, sir, if you wouldn’t
mind,” said Alice: “three inches is such a wretched height to be.”
“It is a very good height indeed!” said the Caterpillar angrily,
rearing itself upright as it spoke (it was exactly three inches high).
“But I’m not used to it!” pleaded poor Alice in a piteous tone. And she
thought of herself, “I wish the creatures wouldn’t be so easily
offended!”
“You’ll get used to it in time,” said the Caterpillar; and it put the
hookah into its mouth and began smoking again.
This time Alice waited patiently until it chose to speak again. In a
minute or two the Caterpillar took the hookah out of its mouth and
yawned once or twice, and shook itself. Then it got down off the
mushroom, and crawled away in the grass, merely remarking as it went,
“One side will make you grow taller, and the other side will make you
grow shorter.”
“One side of _what?_ The other side of _what?_” thought Alice to
herself.
“Of the mushroom,” said the Caterpillar, just as if she had asked it
aloud; and in another moment it was out of sight.
Alice remained looking thoughtfully at the mushroom for a minute,
trying to make out which were the two sides of it; and as it was
perfectly round, she found this a very difficult question. However, at
last she stretched her arms round it as far as they would go, and broke
off a bit of the edge with each hand.
“And now which is which?” she said to herself, and nibbled a little of
the right-hand bit to try the effect: the next moment she felt a
violent blow underneath her chin: it had struck her foot!
She was a good deal frightened by this very sudden change, but she felt
that there was no time to be lost, as she was shrinking rapidly; so she
set to work at once to eat some of the other bit. Her chin was pressed
so closely against her foot, that there was hardly room to open her
mouth; but she did it at last, and managed to swallow a morsel of the
lefthand bit.
* * * * * * *
* * * * * *
* * * * * * *
“Come, my head’s free at last!” said Alice in a tone of delight, which
changed into alarm in another moment, when she found that her shoulders
were nowhere to be found: all she could see, when she looked down, was
an immense length of neck, which seemed to rise like a stalk out of a
sea of green leaves that lay far below her.
“What _can_ all that green stuff be?” said Alice. “And where _have_ my
shoulders got to? And oh, my poor hands, how is it I can’t see you?”
She was moving them about as she spoke, but no result seemed to follow,
except a little shaking among the distant green leaves.
As there seemed to be no chance of getting her hands up to her head,
she tried to get her head down to them, and was delighted to find that
her neck would bend about easily in any direction, like a serpent. She
had just succeeded in curving it down into a graceful zigzag, and was
going to dive in among the leaves, which she found to be nothing but
the tops of the trees under which she had been wandering, when a sharp
hiss made her draw back in a hurry: a large pigeon had flown into her
face, and was beating her violently with its wings.
“Serpent!” screamed the Pigeon.
“I’m _not_ a serpent!” said Alice indignantly. “Let me alone!”
“Serpent, I say again!” repeated the Pigeon, but in a more subdued
tone, and added with a kind of sob, “I’ve tried every way, and nothing
seems to suit them!”
“I haven’t the least idea what you’re talking about,” said Alice.
“I’ve tried the roots of trees, and I’ve tried banks, and I’ve tried
hedges,” the Pigeon went on, without attending to her; “but those
serpents! There’s no pleasing them!”
Alice was more and more puzzled, but she thought there was no use in
saying anything more till the Pigeon had finished.
“As if it wasn’t trouble enough hatching the eggs,” said the Pigeon;
“but I must be on the look-out for serpents night and day! Why, I
haven’t had a wink of sleep these three weeks!”
“I’m very sorry you’ve been annoyed,” said Alice, who was beginning to
see its meaning.
“And just as I’d taken the highest tree in the wood,” continued the
Pigeon, raising its voice to a shriek, “and just as I was thinking I
should be free of them at last, they must needs come wriggling down
from the sky! Ugh, Serpent!”
“But I’m _not_ a serpent, I tell you!” said Alice. “I’m a—I’m a—”
“Well! _What_ are you?” said the Pigeon. “I can see you’re trying to
invent something!”
“I—I’m a little girl,” said Alice, rather doubtfully, as she remembered
the number of changes she had gone through that day.
“A likely story indeed!” said the Pigeon in a tone of the deepest
contempt. “I’ve seen a good many little girls in my time, but never
_one_ with such a neck as that! No, no! You’re a serpent; and there’s
no use denying it. I suppose you’ll be telling me next that you never
tasted an egg!”
“I _have_ tasted eggs, certainly,” said Alice, who was a very truthful
child; “but little girls eat eggs quite as much as serpents do, you
know.”
“I don’t believe it,” said the Pigeon; “but if they do, why then
they’re a kind of serpent, that’s all I can say.”
This was such a new idea to Alice, that she was quite silent for a
minute or two, which gave the Pigeon the opportunity of adding, “You’re
looking for eggs, I know _that_ well enough; and what does it matter to
me whether you’re a little girl or a serpent?”
“It matters a good deal to _me_,” said Alice hastily; “but I’m not
looking for eggs, as it happens; and if I was, I shouldn’t want
_yours_: I don’t like them raw.”
“Well, be off, then!” said the Pigeon in a sulky tone, as it settled
down again into its nest. Alice crouched down among the trees as well
as she could, for her neck kept getting entangled among the branches,
and every now and then she had to stop and untwist it. After a while
she remembered that she still held the pieces of mushroom in her hands,
and she set to work very carefully, nibbling first at one and then at
the other, and growing sometimes taller and sometimes shorter, until
she had succeeded in bringing herself down to her usual height.
It was so long since she had been anything near the right size, that it
felt quite strange at first; but she got used to it in a few minutes,
and began talking to herself, as usual. “Come, there’s half my plan
done now! How puzzling all these changes are! I’m never sure what I’m
going to be, from one minute to another! However, I’ve got back to my
right size: the next thing is, to get into that beautiful garden—how
_is_ that to be done, I wonder?” As she said this, she came suddenly
upon an open place, with a little house in it about four feet high.
“Whoever lives there,” thought Alice, “it’ll never do to come upon them
_this_ size: why, I should frighten them out of their wits!” So she
began nibbling at the righthand bit again, and did not venture to go
near the house till she had brought herself down to nine inches high.
CHAPTER VI.
Pig and Pepper
For a minute or two she stood looking at the house, and wondering what
to do next, when suddenly a footman in livery came running out of the
wood—(she considered him to be a footman because he was in livery:
otherwise, judging by his face only, she would have called him a
fish)—and rapped loudly at the door with his knuckles. It was opened by
another footman in livery, with a round face, and large eyes like a
frog; and both footmen, Alice noticed, had powdered hair that curled
all over their heads. She felt very curious to know what it was all
about, and crept a little way out of the wood to listen.
The Fish-Footman began by producing from under his arm a great letter,
nearly as large as himself, and this he handed over to the other,
saying, in a solemn tone, “For the Duchess. An invitation from the
Queen to play croquet.” The Frog-Footman repeated, in the same solemn
tone, only changing the order of the words a little, “From the Queen.
An invitation for the Duchess to play croquet.”
Then they both bowed low, and their curls got entangled together.
Alice laughed so much at this, that she had to run back into the wood
for fear of their hearing her; and when she next peeped out the
Fish-Footman was gone, and the other was sitting on the ground near the
door, staring stupidly up into the sky.
Alice went timidly up to the door, and knocked.
“There’s no sort of use in knocking,” said the Footman, “and that for
two reasons. First, because I’m on the same side of the door as you
are; secondly, because they’re making such a noise inside, no one could
possibly hear you.” And certainly there _was_ a most extraordinary
noise going on within—a constant howling and sneezing, and every now
and then a great crash, as if a dish or kettle had been broken to
pieces.
“Please, then,” said Alice, “how am I to get in?”
“There might be some sense in your knocking,” the Footman went on
without attending to her, “if we had the door between us. For instance,
if you were _inside_, you might knock, and I could let you out, you
know.” He was looking up into the sky all the time he was speaking, and
this Alice thought decidedly uncivil. “But perhaps he can’t help it,”
she said to herself; “his eyes are so _very_ nearly at the top of his
head. But at any rate he might answer questions.—How am I to get in?”
she repeated, aloud.
“I shall sit here,” the Footman remarked, “till tomorrow—”
At this moment the door of the house opened, and a large plate came
skimming out, straight at the Footman’s head: it just grazed his nose,
and broke to pieces against one of the trees behind him.
“—or next day, maybe,” the Footman continued in the same tone, exactly
as if nothing had happened.
“How am I to get in?” asked Alice again, in a louder tone.
“_Are_ you to get in at all?” said the Footman. “That’s the first
question, you know.”
It was, no doubt: only Alice did not like to be told so. “It’s really
dreadful,” she muttered to herself, “the way all the creatures argue.
It’s enough to drive one crazy!”
The Footman seemed to think this a good opportunity for repeating his
remark, with variations. “I shall sit here,” he said, “on and off, for
days and days.”
“But what am _I_ to do?” said Alice.
“Anything you like,” said the Footman, and began whistling.
“Oh, there’s no use in talking to him,” said Alice desperately: “he’s
perfectly idiotic!” And she opened the door and went in.
The door led right into a large kitchen, which was full of smoke from
one end to the other: the Duchess was sitting on a three-legged stool
in the middle, nursing a baby; the cook was leaning over the fire,
stirring a large cauldron which seemed to be full of soup.
“There’s certainly too much pepper in that soup!” Alice said to
herself, as well as she could for sneezing.
There was certainly too much of it in the air. Even the Duchess sneezed
occasionally; and as for the baby, it was sneezing and howling
alternately without a moment’s pause. The only things in the kitchen
that did not sneeze, were the cook, and a large cat which was sitting
on the hearth and grinning from ear to ear.
“Please would you tell me,” said Alice, a little timidly, for she was
not quite sure whether it was good manners for her to speak first, “why
your cat grins like that?”
“It’s a Cheshire cat,” said the Duchess, “and that’s why. Pig!”
She said the last word with such sudden violence that Alice quite
jumped; but she saw in another moment that it was addressed to the
baby, and not to her, so she took courage, and went on again:—
“I didn’t know that Cheshire cats always grinned; in fact, I didn’t
know that cats _could_ grin.”
“They all can,” said the Duchess; “and most of ’em do.”
“I don’t know of any that do,” Alice said very politely, feeling quite
pleased to have got into a conversation.
“You don’t know much,” said the Duchess; “and that’s a fact.”
Alice did not at all like the tone of this remark, and thought it would
be as well to introduce some other subject of conversation. While she
was trying to fix on one, the cook took the cauldron of soup off the
fire, and at once set to work throwing everything within her reach at
the Duchess and the baby—the fire-irons came first; then followed a
shower of saucepans, plates, and dishes. The Duchess took no notice of
them even when they hit her; and the baby was howling so much already,
that it was quite impossible to say whether the blows hurt it or not.
“Oh, _please_ mind what you’re doing!” cried Alice, jumping up and down
in an agony of terror. “Oh, there goes his _precious_ nose!” as an
unusually large saucepan flew close by it, and very nearly carried it
off.
“If everybody minded their own business,” the Duchess said in a hoarse
growl, “the world would go round a deal faster than it does.”
“Which would _not_ be an advantage,” said Alice, who felt very glad to
get an opportunity of showing off a little of her knowledge. “Just
think of what work it would make with the day and night! You see the
earth takes twenty-four hours to turn round on its axis—”
“Talking of axes,” said the Duchess, “chop off her head!”
Alice glanced rather anxiously at the cook, to see if she meant to take
the hint; but the cook was busily stirring the soup, and seemed not to
be listening, so she went on again: “Twenty-four hours, I _think_; or
is it twelve? I—”
“Oh, don’t bother _me_,” said the Duchess; “I never could abide
figures!” And with that she began nursing her child again, singing a
sort of lullaby to it as she did so, and giving it a violent shake at
the end of every line:
“Speak roughly to your little boy,
And beat him when he sneezes:
He only does it to annoy,
Because he knows it teases.”
CHORUS.
(In which the cook and the baby joined):
“Wow! wow! wow!”
While the Duchess sang the second verse of the song, she kept tossing
the baby violently up and down, and the poor little thing howled so,
that Alice could hardly hear the words:—
“I speak severely to my boy,
I beat him when he sneezes;
For he can thoroughly enjoy
The pepper when he pleases!”
CHORUS.
“Wow! wow! wow!”
“Here! you may nurse it a bit, if you like!” the Duchess said to Alice,
flinging the baby at her as she spoke. “I must go and get ready to play
croquet with the Queen,” and she hurried out of the room. The cook
threw a frying-pan after her as she went out, but it just missed her.
Alice caught the baby with some difficulty, as it was a queer-shaped
little creature, and held out its arms and legs in all directions,
“just like a star-fish,” thought Alice. The poor little thing was
snorting like a steam-engine when she caught it, and kept doubling
itself up and straightening itself out again, so that altogether, for
the first minute or two, it was as much as she could do to hold it.
As soon as she had made out the proper way of nursing it, (which was to
twist it up into a sort of knot, and then keep tight hold of its right
ear and left foot, so as to prevent its undoing itself,) she carried it
out into the open air. “If I don’t take this child away with me,”
thought Alice, “they’re sure to kill it in a day or two: wouldn’t it be
murder to leave it behind?” She said the last words out loud, and the
little thing grunted in reply (it had left off sneezing by this time).
“Don’t grunt,” said Alice; “that’s not at all a proper way of
expressing yourself.”
The baby grunted again, and Alice looked very anxiously into its face
to see what was the matter with it. There could be no doubt that it had
a _very_ turn-up nose, much more like a snout than a real nose; also
its eyes were getting extremely small for a baby: altogether Alice did
not like the look of the thing at all. “But perhaps it was only
sobbing,” she thought, and looked into its eyes again, to see if there
were any tears.
No, there were no tears. “If you’re going to turn into a pig, my dear,”
said Alice, seriously, “I’ll have nothing more to do with you. Mind
now!” The poor little thing sobbed again (or grunted, it was impossible
to say which), and they went on for some while in silence.
Alice was just beginning to think to herself, “Now, what am I to do
with this creature when I get it home?” when it grunted again, so
violently, that she looked down into its face in some alarm. This time
there could be _no_ mistake about it: it was neither more nor less than
a pig, and she felt that it would be quite absurd for her to carry it
further.
So she set the little creature down, and felt quite relieved to see it
trot away quietly into the wood. “If it had grown up,” she said to
herself, “it would have made a dreadfully ugly child: but it makes
rather a handsome pig, I think.” And she began thinking over other
children she knew, who might do very well as pigs, and was just saying
to herself, “if one only knew the right way to change them—” when she
was a little startled by seeing the Cheshire Cat sitting on a bough of
a tree a few yards off.
The Cat only grinned when it saw Alice. It looked good-natured, she
thought: still it had _very_ long claws and a great many teeth, so she
felt that it ought to be treated with respect.
“Cheshire Puss,” she began, rather timidly, as she did not at all know
whether it would like the name: however, it only grinned a little
wider. “Come, it’s pleased so far,” thought Alice, and she went on.
“Would you tell me, please, which way I ought to go from here?”
“That depends a good deal on where you want to get to,” said the Cat.
“I don’t much care where—” said Alice.
“Then it doesn’t matter which way you go,” said the Cat.
“—so long as I get _somewhere_,” Alice added as an explanation.
“Oh, you’re sure to do that,” said the Cat, “if you only walk long
enough.”
Alice felt that this could not be denied, so she tried another
question. “What sort of people live about here?”
“In _that_ direction,” the Cat said, waving its right paw round, “lives
a Hatter: and in _that_ direction,” waving the other paw, “lives a
March Hare. Visit either you like: they’re both mad.”
“But I don’t want to go among mad people,” Alice remarked.
“Oh, you can’t help that,” said the Cat: “we’re all mad here. I’m mad.
You’re mad.”
“How do you know I’m mad?” said Alice.
“You must be,” said the Cat, “or you wouldn’t have come here.”
Alice didn’t think that proved it at all; however, she went on “And how
do you know that you’re mad?”
“To begin with,” said the Cat, “a dog’s not mad. You grant that?”
“I suppose so,” said Alice.
“Well, then,” the Cat went on, “you see, a dog growls when it’s angry,
and wags its tail when it’s pleased. Now _I_ growl when I’m pleased,
and wag my tail when I’m angry. Therefore I’m mad.”
“_I_ call it purring, not growling,” said Alice.
“Call it what you like,” said the Cat. “Do you play croquet with the
Queen to-day?”
“I should like it very much,” said Alice, “but I haven’t been invited
yet.”
“You’ll see me there,” said the Cat, and vanished.
Alice was not much surprised at this, she was getting so used to queer
things happening. While she was looking at the place where it had been,
it suddenly appeared again.
“By-the-bye, what became of the baby?” said the Cat. “I’d nearly
forgotten to ask.”
“It turned into a pig,” Alice quietly said, just as if it had come back
in a natural way.
“I thought it would,” said the Cat, and vanished again.
Alice waited a little, half expecting to see it again, but it did not
appear, and after a minute or two she walked on in the direction in
which the March Hare was said to live. “I’ve seen hatters before,” she
said to herself; “the March Hare will be much the most interesting, and
perhaps as this is May it won’t be raving mad—at least not so mad as it
was in March.” As she said this, she looked up, and there was the Cat
again, sitting on a branch of a tree.
“Did you say pig, or fig?” said the Cat.
“I said pig,” replied Alice; “and I wish you wouldn’t keep appearing
and vanishing so suddenly: you make one quite giddy.”
“All right,” said the Cat; and this time it vanished quite slowly,
beginning with the end of the tail, and ending with the grin, which
remained some time after the rest of it had gone.
“Well! I’ve often seen a cat without a grin,” thought Alice; “but a
grin without a cat! It’s the most curious thing I ever saw in my life!”
She had not gone much farther before she came in sight of th
gitextract_0oj2913e/
├── .claude/
│ └── skills/
│ ├── rationalize-deps/
│ │ └── SKILL.md
│ └── simple-pr/
│ └── SKILL.md
├── .github/
│ ├── FUNDING.yml
│ ├── ISSUE_TEMPLATE/
│ │ ├── actions.md
│ │ ├── bug_report.md
│ │ ├── feature_request.md
│ │ └── question.md
│ ├── dependabot.yml
│ └── workflows/
│ ├── coverage.yml
│ ├── long_running.yml
│ └── test.yml
├── .gitignore
├── ARCHITECTURE.md
├── AUTHORS
├── CHANGELOG.md
├── CITATION.cff
├── Cargo.toml
├── LICENSE
├── Makefile
├── README.md
├── RELEASE.md
├── TODO.txt
├── benches/
│ ├── agg_bench.rs
│ ├── alice.txt
│ ├── analyzer.rs
│ ├── and_or_queries.rs
│ ├── bool_queries_with_range.rs
│ ├── exists_json.rs
│ ├── gh.json
│ ├── hdfs.json
│ ├── index-bench.rs
│ ├── merge_segments.rs
│ ├── range_queries.rs
│ ├── range_query.rs
│ ├── regex_all_terms.rs
│ ├── str_search_and_get.rs
│ └── wiki.json
├── bitpacker/
│ ├── Cargo.toml
│ ├── benches/
│ │ └── bench.rs
│ └── src/
│ ├── bitpacker.rs
│ ├── blocked_bitpacker.rs
│ ├── filter_vec/
│ │ ├── avx2.rs
│ │ ├── mod.rs
│ │ └── scalar.rs
│ └── lib.rs
├── cliff.toml
├── columnar/
│ ├── Cargo.toml
│ ├── README.md
│ ├── benches/
│ │ ├── bench_access.rs
│ │ ├── bench_column_values_get.rs
│ │ ├── bench_create_column_values.rs
│ │ ├── bench_first_vals.rs
│ │ ├── bench_merge.rs
│ │ ├── bench_optional_index.rs
│ │ ├── bench_values_u128.rs
│ │ ├── bench_values_u64.rs
│ │ └── common.rs
│ ├── columnar-cli/
│ │ ├── Cargo.toml
│ │ └── src/
│ │ └── main.rs
│ ├── columnar-cli-inspect/
│ │ ├── Cargo.toml
│ │ └── src/
│ │ └── main.rs
│ ├── compat_tests_data/
│ │ ├── v1.columnar
│ │ └── v2.columnar
│ └── src/
│ ├── TODO.md
│ ├── block_accessor.rs
│ ├── column/
│ │ ├── dictionary_encoded.rs
│ │ ├── mod.rs
│ │ └── serialize.rs
│ ├── column_index/
│ │ ├── merge/
│ │ │ ├── mod.rs
│ │ │ ├── shuffled.rs
│ │ │ └── stacked.rs
│ │ ├── mod.rs
│ │ ├── multivalued_index.rs
│ │ ├── optional_index/
│ │ │ ├── mod.rs
│ │ │ ├── set.rs
│ │ │ ├── set_block/
│ │ │ │ ├── dense.rs
│ │ │ │ ├── mod.rs
│ │ │ │ ├── sparse.rs
│ │ │ │ └── tests.rs
│ │ │ └── tests.rs
│ │ └── serialize.rs
│ ├── column_values/
│ │ ├── merge.rs
│ │ ├── mod.rs
│ │ ├── monotonic_column.rs
│ │ ├── monotonic_mapping.rs
│ │ ├── monotonic_mapping_u128.rs
│ │ ├── stats.rs
│ │ ├── u128_based/
│ │ │ ├── compact_space/
│ │ │ │ ├── blank_range.rs
│ │ │ │ ├── build_compact_space.rs
│ │ │ │ └── mod.rs
│ │ │ └── mod.rs
│ │ ├── u64_based/
│ │ │ ├── bitpacked.rs
│ │ │ ├── blockwise_linear.rs
│ │ │ ├── line.rs
│ │ │ ├── linear.rs
│ │ │ ├── mod.rs
│ │ │ ├── stats_collector.rs
│ │ │ └── tests.rs
│ │ └── vec_column.rs
│ ├── columnar/
│ │ ├── column_type.rs
│ │ ├── format_version.rs
│ │ ├── merge/
│ │ │ ├── merge_dict_column.rs
│ │ │ ├── merge_mapping.rs
│ │ │ ├── mod.rs
│ │ │ ├── term_merger.rs
│ │ │ └── tests.rs
│ │ ├── mod.rs
│ │ ├── reader/
│ │ │ └── mod.rs
│ │ └── writer/
│ │ ├── column_operation.rs
│ │ ├── column_writers.rs
│ │ ├── mod.rs
│ │ ├── serializer.rs
│ │ └── value_index.rs
│ ├── compat_tests.rs
│ ├── dictionary.rs
│ ├── dynamic_column.rs
│ ├── iterable.rs
│ ├── lib.rs
│ ├── tests.rs
│ ├── utils.rs
│ └── value.rs
├── common/
│ ├── Cargo.toml
│ ├── benches/
│ │ └── bench.rs
│ └── src/
│ ├── bitset.rs
│ ├── bounds.rs
│ ├── byte_count.rs
│ ├── datetime.rs
│ ├── file_slice.rs
│ ├── group_by.rs
│ ├── json_path_writer.rs
│ ├── lib.rs
│ ├── serialize.rs
│ ├── vint.rs
│ └── writer.rs
├── doc/
│ ├── .gitignore
│ ├── book.toml
│ └── src/
│ ├── SUMMARY.md
│ ├── avant-propos.md
│ ├── basis.md
│ ├── best_practise.md.rs
│ ├── examples.md
│ ├── facetting.md
│ ├── faq.md
│ ├── index_sorting.md
│ ├── innerworkings.md
│ ├── inverted_index.md
│ ├── json.md
│ └── schema.md
├── examples/
│ ├── aggregation.rs
│ ├── basic_search.rs
│ ├── custom_collector.rs
│ ├── custom_tokenizer.rs
│ ├── date_time_field.rs
│ ├── deleting_updating_documents.rs
│ ├── faceted_search.rs
│ ├── faceted_search_with_tweaked_score.rs
│ ├── filter_aggregation.rs
│ ├── fuzzy_search.rs
│ ├── index_from_multiple_threads.rs
│ ├── index_with_json.rs
│ ├── integer_range_search.rs
│ ├── ip_field.rs
│ ├── iterating_docs_and_positions.rs
│ ├── json_field.rs
│ ├── phrase_prefix_search.rs
│ ├── pre_tokenized_text.rs
│ ├── snippet.rs
│ ├── stop_words.rs
│ └── warmer.rs
├── ownedbytes/
│ ├── Cargo.toml
│ └── src/
│ └── lib.rs
├── query-grammar/
│ ├── Cargo.toml
│ ├── README.md
│ └── src/
│ ├── infallible.rs
│ ├── lib.rs
│ ├── occur.rs
│ ├── query_grammar.rs
│ └── user_input_ast.rs
├── rustfmt.toml
├── src/
│ ├── aggregation/
│ │ ├── README.md
│ │ ├── accessor_helpers.rs
│ │ ├── agg_data.rs
│ │ ├── agg_limits.rs
│ │ ├── agg_req.rs
│ │ ├── agg_result.rs
│ │ ├── agg_tests.rs
│ │ ├── bucket/
│ │ │ ├── composite/
│ │ │ │ ├── accessors.rs
│ │ │ │ ├── calendar_interval.rs
│ │ │ │ ├── collector.rs
│ │ │ │ ├── map.rs
│ │ │ │ ├── mod.rs
│ │ │ │ └── numeric_types.rs
│ │ │ ├── filter.rs
│ │ │ ├── histogram/
│ │ │ │ ├── date_histogram.rs
│ │ │ │ ├── histogram.rs
│ │ │ │ └── mod.rs
│ │ │ ├── mod.rs
│ │ │ ├── range.rs
│ │ │ ├── term_agg.rs
│ │ │ └── term_missing_agg.rs
│ │ ├── cached_sub_aggs.rs
│ │ ├── collector.rs
│ │ ├── date.rs
│ │ ├── error.rs
│ │ ├── intermediate_agg_result.rs
│ │ ├── metric/
│ │ │ ├── average.rs
│ │ │ ├── cardinality.rs
│ │ │ ├── count.rs
│ │ │ ├── extended_stats.rs
│ │ │ ├── max.rs
│ │ │ ├── min.rs
│ │ │ ├── mod.rs
│ │ │ ├── percentiles.rs
│ │ │ ├── stats.rs
│ │ │ ├── sum.rs
│ │ │ └── top_hits.rs
│ │ ├── mod.rs
│ │ └── segment_agg_result.rs
│ ├── collector/
│ │ ├── count_collector.rs
│ │ ├── docset_collector.rs
│ │ ├── facet_collector.rs
│ │ ├── filter_collector_wrapper.rs
│ │ ├── histogram_collector.rs
│ │ ├── mod.rs
│ │ ├── multi_collector.rs
│ │ ├── sort_key/
│ │ │ ├── mod.rs
│ │ │ ├── order.rs
│ │ │ ├── sort_by_bytes.rs
│ │ │ ├── sort_by_erased_type.rs
│ │ │ ├── sort_by_score.rs
│ │ │ ├── sort_by_static_fast_value.rs
│ │ │ ├── sort_by_string.rs
│ │ │ └── sort_key_computer.rs
│ │ ├── sort_key_top_collector.rs
│ │ ├── tests.rs
│ │ ├── top_collector.rs
│ │ └── top_score_collector.rs
│ ├── compat_tests.rs
│ ├── core/
│ │ ├── executor.rs
│ │ ├── json_utils.rs
│ │ ├── mod.rs
│ │ ├── searcher.rs
│ │ └── tests.rs
│ ├── directory/
│ │ ├── composite_file.rs
│ │ ├── directory.rs
│ │ ├── directory_lock.rs
│ │ ├── error.rs
│ │ ├── footer.rs
│ │ ├── managed_directory.rs
│ │ ├── mmap_directory/
│ │ │ ├── file_watcher.rs
│ │ │ └── mod.rs
│ │ ├── mod.rs
│ │ ├── ram_directory.rs
│ │ ├── tests.rs
│ │ └── watch_event_router.rs
│ ├── docset.rs
│ ├── error.rs
│ ├── fastfield/
│ │ ├── alive_bitset.rs
│ │ ├── error.rs
│ │ ├── facet_reader.rs
│ │ ├── mod.rs
│ │ ├── readers.rs
│ │ └── writer.rs
│ ├── fieldnorm/
│ │ ├── code.rs
│ │ ├── mod.rs
│ │ ├── reader.rs
│ │ ├── serializer.rs
│ │ └── writer.rs
│ ├── functional_test.rs
│ ├── future_result.rs
│ ├── index/
│ │ ├── index.rs
│ │ ├── index_meta.rs
│ │ ├── inverted_index_reader.rs
│ │ ├── mod.rs
│ │ ├── segment.rs
│ │ ├── segment_component.rs
│ │ ├── segment_id.rs
│ │ └── segment_reader.rs
│ ├── indexer/
│ │ ├── delete_queue.rs
│ │ ├── doc_id_mapping.rs
│ │ ├── doc_opstamp_mapping.rs
│ │ ├── flat_map_with_buffer.rs
│ │ ├── index_writer.rs
│ │ ├── index_writer_status.rs
│ │ ├── indexing_term.rs
│ │ ├── log_merge_policy.rs
│ │ ├── merge_index_test.rs
│ │ ├── merge_operation.rs
│ │ ├── merge_policy.rs
│ │ ├── merger.rs
│ │ ├── mod.rs
│ │ ├── operation.rs
│ │ ├── path_to_unordered_id.rs
│ │ ├── prepared_commit.rs
│ │ ├── segment_entry.rs
│ │ ├── segment_manager.rs
│ │ ├── segment_register.rs
│ │ ├── segment_serializer.rs
│ │ ├── segment_updater.rs
│ │ ├── segment_writer.rs
│ │ ├── single_segment_index_writer.rs
│ │ └── stamper.rs
│ ├── lib.rs
│ ├── macros.rs
│ ├── positions/
│ │ ├── mod.rs
│ │ ├── reader.rs
│ │ └── serializer.rs
│ ├── postings/
│ │ ├── block_search.rs
│ │ ├── block_segment_postings.rs
│ │ ├── compression/
│ │ │ ├── mod.rs
│ │ │ └── vint.rs
│ │ ├── indexing_context.rs
│ │ ├── json_postings_writer.rs
│ │ ├── loaded_postings.rs
│ │ ├── mod.rs
│ │ ├── per_field_postings_writer.rs
│ │ ├── postings.rs
│ │ ├── postings_writer.rs
│ │ ├── recorder.rs
│ │ ├── segment_postings.rs
│ │ ├── serializer.rs
│ │ ├── skip.rs
│ │ └── term_info.rs
│ ├── query/
│ │ ├── all_query.rs
│ │ ├── automaton_weight.rs
│ │ ├── bitset/
│ │ │ └── mod.rs
│ │ ├── bm25.rs
│ │ ├── boolean_query/
│ │ │ ├── block_wand.rs
│ │ │ ├── boolean_query.rs
│ │ │ ├── boolean_weight.rs
│ │ │ └── mod.rs
│ │ ├── boost_query.rs
│ │ ├── const_score_query.rs
│ │ ├── disjunction.rs
│ │ ├── disjunction_max_query.rs
│ │ ├── empty_query.rs
│ │ ├── exclude.rs
│ │ ├── exist_query.rs
│ │ ├── explanation.rs
│ │ ├── fuzzy_query.rs
│ │ ├── intersection.rs
│ │ ├── mod.rs
│ │ ├── more_like_this/
│ │ │ ├── mod.rs
│ │ │ ├── more_like_this.rs
│ │ │ └── query.rs
│ │ ├── phrase_prefix_query/
│ │ │ ├── mod.rs
│ │ │ ├── phrase_prefix_query.rs
│ │ │ ├── phrase_prefix_scorer.rs
│ │ │ └── phrase_prefix_weight.rs
│ │ ├── phrase_query/
│ │ │ ├── mod.rs
│ │ │ ├── phrase_query.rs
│ │ │ ├── phrase_scorer.rs
│ │ │ ├── phrase_weight.rs
│ │ │ ├── regex_phrase_query.rs
│ │ │ └── regex_phrase_weight.rs
│ │ ├── query.rs
│ │ ├── query_parser/
│ │ │ ├── logical_ast.rs
│ │ │ ├── mod.rs
│ │ │ └── query_parser.rs
│ │ ├── range_query/
│ │ │ ├── fast_field_range_doc_set.rs
│ │ │ ├── mod.rs
│ │ │ ├── range_query.rs
│ │ │ └── range_query_fastfield.rs
│ │ ├── regex_query.rs
│ │ ├── reqopt_scorer.rs
│ │ ├── score_combiner.rs
│ │ ├── scorer.rs
│ │ ├── set_query.rs
│ │ ├── size_hint.rs
│ │ ├── term_query/
│ │ │ ├── mod.rs
│ │ │ ├── term_query.rs
│ │ │ ├── term_scorer.rs
│ │ │ └── term_weight.rs
│ │ ├── union/
│ │ │ ├── bitset_union.rs
│ │ │ ├── buffered_union.rs
│ │ │ ├── mod.rs
│ │ │ └── simple_union.rs
│ │ ├── vec_docset.rs
│ │ └── weight.rs
│ ├── reader/
│ │ ├── mod.rs
│ │ └── warming.rs
│ ├── schema/
│ │ ├── bytes_options.rs
│ │ ├── date_time_options.rs
│ │ ├── document/
│ │ │ ├── de.rs
│ │ │ ├── default_document.rs
│ │ │ ├── existing_type_impls.rs
│ │ │ ├── mod.rs
│ │ │ ├── owned_value.rs
│ │ │ ├── se.rs
│ │ │ └── value.rs
│ │ ├── facet.rs
│ │ ├── facet_options.rs
│ │ ├── field.rs
│ │ ├── field_entry.rs
│ │ ├── field_type.rs
│ │ ├── flags.rs
│ │ ├── index_record_option.rs
│ │ ├── ip_options.rs
│ │ ├── json_object_options.rs
│ │ ├── mod.rs
│ │ ├── named_field_document.rs
│ │ ├── numeric_options.rs
│ │ ├── schema.rs
│ │ ├── term.rs
│ │ └── text_options.rs
│ ├── snippet/
│ │ └── mod.rs
│ ├── space_usage/
│ │ └── mod.rs
│ ├── store/
│ │ ├── compression_lz4_block.rs
│ │ ├── compression_zstd_block.rs
│ │ ├── compressors.rs
│ │ ├── decompressors.rs
│ │ ├── footer.rs
│ │ ├── index/
│ │ │ ├── block.rs
│ │ │ ├── mod.rs
│ │ │ ├── skip_index.rs
│ │ │ └── skip_index_builder.rs
│ │ ├── mod.rs
│ │ ├── reader.rs
│ │ ├── store_compressor.rs
│ │ └── writer.rs
│ ├── termdict/
│ │ ├── fst_termdict/
│ │ │ ├── merger.rs
│ │ │ ├── mod.rs
│ │ │ ├── streamer.rs
│ │ │ ├── term_info_store.rs
│ │ │ └── termdict.rs
│ │ ├── mod.rs
│ │ ├── sstable_termdict/
│ │ │ ├── merger.rs
│ │ │ └── mod.rs
│ │ └── tests.rs
│ └── tokenizer/
│ ├── alphanum_only.rs
│ ├── ascii_folding_filter.rs
│ ├── empty_tokenizer.rs
│ ├── facet_tokenizer.rs
│ ├── lower_caser.rs
│ ├── mod.rs
│ ├── ngram_tokenizer.rs
│ ├── raw_tokenizer.rs
│ ├── regex_tokenizer.rs
│ ├── remove_long.rs
│ ├── simple_tokenizer.rs
│ ├── split_compound_words.rs
│ ├── stemmer.rs
│ ├── stop_word_filter/
│ │ ├── gen_stopwords.py
│ │ ├── mod.rs
│ │ └── stopwords.rs
│ ├── tokenized_string.rs
│ ├── tokenizer.rs
│ ├── tokenizer_manager.rs
│ └── whitespace_tokenizer.rs
├── sstable/
│ ├── Cargo.toml
│ ├── README.md
│ ├── benches/
│ │ ├── ord_to_term.rs
│ │ └── stream_bench.rs
│ ├── src/
│ │ ├── block_match_automaton.rs
│ │ ├── block_reader.rs
│ │ ├── delta.rs
│ │ ├── dictionary.rs
│ │ ├── lib.rs
│ │ ├── merge/
│ │ │ ├── heap_merge.rs
│ │ │ └── mod.rs
│ │ ├── sstable_index_v2.rs
│ │ ├── sstable_index_v3.rs
│ │ ├── streamer.rs
│ │ ├── value/
│ │ │ ├── index.rs
│ │ │ ├── mod.rs
│ │ │ ├── range.rs
│ │ │ ├── u64_monotonic.rs
│ │ │ ├── vec_u32.rs
│ │ │ └── void.rs
│ │ └── vint.rs
│ └── tests/
│ └── sstable_test.rs
├── stacker/
│ ├── Cargo.toml
│ ├── Performance.md
│ ├── benches/
│ │ └── bench.rs
│ ├── example/
│ │ └── hashmap.rs
│ ├── fuzz_test/
│ │ ├── Cargo.toml
│ │ └── src/
│ │ └── main.rs
│ └── src/
│ ├── arena_hashmap.rs
│ ├── expull.rs
│ ├── fastcmp.rs
│ ├── fastcpy.rs
│ ├── lib.rs
│ ├── memory_arena.rs
│ └── shared_arena_hashmap.rs
├── tests/
│ ├── compat_tests_data/
│ │ ├── index_v6/
│ │ │ ├── .managed.json
│ │ │ ├── 00000000000000000000000000000000.fast
│ │ │ ├── 00000000000000000000000000000000.fieldnorm
│ │ │ ├── 00000000000000000000000000000000.idx
│ │ │ ├── 00000000000000000000000000000000.pos
│ │ │ ├── 00000000000000000000000000000000.store
│ │ │ ├── 00000000000000000000000000000000.term
│ │ │ └── meta.json
│ │ └── index_v7/
│ │ ├── .managed.json
│ │ ├── 000002f0000000000000000000000000.fast
│ │ ├── 000002f0000000000000000000000000.fieldnorm
│ │ ├── 000002f0000000000000000000000000.idx
│ │ ├── 000002f0000000000000000000000000.pos
│ │ ├── 000002f0000000000000000000000000.store
│ │ ├── 000002f0000000000000000000000000.term
│ │ └── meta.json
│ └── failpoints/
│ └── mod.rs
└── tokenizer-api/
├── Cargo.toml
├── README.md
└── src/
└── lib.rs
Showing preview only (677K chars total). Download the full file or copy to clipboard to get everything.
SYMBOL INDEX (7762 symbols across 417 files)
FILE: benches/agg_bench.rs
function main (line 28) | fn main() {
function bench_agg (line 48) | fn bench_agg(mut group: InputGroup<Index>) {
function exec_term_with_agg (line 101) | fn exec_term_with_agg(index: &Index, agg_req: serde_json::Value) {
function average_u64 (line 115) | fn average_u64(index: &Index) {
function average_f64 (line 121) | fn average_f64(index: &Index) {
function average_f64_u64 (line 127) | fn average_f64_u64(index: &Index) {
function stats_f64 (line 134) | fn stats_f64(index: &Index) {
function extendedstats_f64 (line 140) | fn extendedstats_f64(index: &Index) {
function percentiles_f64 (line 146) | fn percentiles_f64(index: &Index) {
function cardinality_agg (line 158) | fn cardinality_agg(index: &Index) {
function terms_status_with_cardinality_agg (line 168) | fn terms_status_with_cardinality_agg(index: &Index) {
function terms_7 (line 184) | fn terms_7(index: &Index) {
function terms_all_unique (line 190) | fn terms_all_unique(index: &Index) {
function terms_150_000 (line 197) | fn terms_150_000(index: &Index) {
function terms_many_top_1000 (line 203) | fn terms_many_top_1000(index: &Index) {
function terms_many_order_by_term (line 209) | fn terms_many_order_by_term(index: &Index) {
function terms_many_with_top_hits (line 215) | fn terms_many_with_top_hits(index: &Index) {
function terms_many_with_avg_sub_agg (line 234) | fn terms_many_with_avg_sub_agg(index: &Index) {
function terms_all_unique_with_avg_sub_agg (line 245) | fn terms_all_unique_with_avg_sub_agg(index: &Index) {
function terms_status_with_histogram (line 256) | fn terms_status_with_histogram(index: &Index) {
function terms_zipf_1000_with_histogram (line 268) | fn terms_zipf_1000_with_histogram(index: &Index) {
function terms_status_with_avg_sub_agg (line 280) | fn terms_status_with_avg_sub_agg(index: &Index) {
function terms_zipf_1000_with_avg_sub_agg (line 292) | fn terms_zipf_1000_with_avg_sub_agg(index: &Index) {
function terms_zipf_1000 (line 304) | fn terms_zipf_1000(index: &Index) {
function terms_many_json_mixed_type_with_avg_sub_agg (line 311) | fn terms_many_json_mixed_type_with_avg_sub_agg(index: &Index) {
function composite_term_few (line 323) | fn composite_term_few(index: &Index) {
function composite_term_many_page_1000 (line 336) | fn composite_term_many_page_1000(index: &Index) {
function composite_term_many_page_1000_with_avg_sub_agg (line 349) | fn composite_term_many_page_1000_with_avg_sub_agg(index: &Index) {
function composite_histogram (line 365) | fn composite_histogram(index: &Index) {
function composite_histogram_calendar (line 378) | fn composite_histogram_calendar(index: &Index) {
function execute_agg (line 392) | fn execute_agg(index: &Index, agg_req: serde_json::Value) {
function range_agg (line 400) | fn range_agg(index: &Index) {
function range_agg_with_avg_sub_agg (line 413) | fn range_agg_with_avg_sub_agg(index: &Index) {
function range_agg_with_term_agg_status (line 435) | fn range_agg_with_term_agg_status(index: &Index) {
function range_agg_with_term_agg_many (line 456) | fn range_agg_with_term_agg_many(index: &Index) {
function histogram (line 478) | fn histogram(index: &Index) {
function histogram_hard_bounds (line 489) | fn histogram_hard_bounds(index: &Index) {
function histogram_with_avg_sub_agg (line 495) | fn histogram_with_avg_sub_agg(index: &Index) {
function histogram_with_term_agg_status (line 506) | fn histogram_with_term_agg_status(index: &Index) {
function avg_and_range_with_avg_sub_agg (line 517) | fn avg_and_range_with_avg_sub_agg(index: &Index) {
type Cardinality (line 538) | enum Cardinality {
function get_collector (line 551) | fn get_collector(agg_req: Aggregations) -> AggregationCollector {
function get_test_index_bench (line 555) | fn get_test_index_bench(cardinality: Cardinality) -> tantivy::Result<Ind...
function filter_agg_all_query_count_agg (line 694) | fn filter_agg_all_query_count_agg(index: &Index) {
function filter_agg_term_query_count_agg (line 706) | fn filter_agg_term_query_count_agg(index: &Index) {
function filter_agg_all_query_with_sub_aggs (line 718) | fn filter_agg_all_query_with_sub_aggs(index: &Index) {
function filter_agg_term_query_with_sub_aggs (line 734) | fn filter_agg_term_query_with_sub_aggs(index: &Index) {
FILE: benches/analyzer.rs
constant ALICE_TXT (line 6) | const ALICE_TXT: &str = include_str!("alice.txt");
function criterion_benchmark (line 8) | pub fn criterion_benchmark(c: &mut Criterion) {
FILE: benches/and_or_queries.rs
type BenchIndex (line 30) | struct BenchIndex {
function build_shared_indices (line 41) | fn build_shared_indices(num_docs: usize, p_a: f32, p_b: f32, p_c: f32) -...
function main (line 126) | fn main() {
function add_bench_task (line 189) | fn add_bench_task<C: Collector + 'static>(
type SearchTask (line 206) | struct SearchTask<C: Collector> {
function run (line 214) | pub fn run(&self) -> usize {
FILE: benches/bool_queries_with_range.rs
type BenchIndex (line 11) | struct BenchIndex {
function build_shared_indices (line 18) | fn build_shared_indices(num_docs: usize, p_title_a: f32, distribution: &...
function main (line 109) | fn main() {
function run_benchmark_tasks (line 191) | fn run_benchmark_tasks(
function add_bench_task (line 239) | fn add_bench_task<C: Collector + 'static>(
type SearchTask (line 256) | struct SearchTask<C: Collector> {
function run (line 264) | pub fn run(&self) -> usize {
FILE: benches/exists_json.rs
function main (line 12) | fn main() {
function exists_json_union (line 36) | fn exists_json_union(index: &Index) {
function build_index_with_json_subfields (line 45) | fn build_index_with_json_subfields(num_docs: usize, num_subfields: usize...
FILE: benches/index-bench.rs
constant HDFS_LOGS (line 5) | const HDFS_LOGS: &str = include_str!("hdfs.json");
constant GH_LOGS (line 6) | const GH_LOGS: &str = include_str!("gh.json");
constant WIKI (line 7) | const WIKI: &str = include_str!("wiki.json");
function benchmark (line 9) | fn benchmark(
function get_index (line 26) | fn get_index(schema: tantivy::schema::Schema) -> Index {
function _benchmark (line 39) | fn _benchmark(
function benchmark_dynamic_json (line 84) | fn benchmark_dynamic_json(
function hdfs_index_benchmark (line 98) | pub fn hdfs_index_benchmark(c: &mut Criterion) {
function gh_index_benchmark (line 158) | pub fn gh_index_benchmark(c: &mut Criterion) {
function wiki_index_benchmark (line 185) | pub fn wiki_index_benchmark(c: &mut Criterion) {
FILE: benches/merge_segments.rs
type NullDirectory (line 27) | struct NullDirectory {
type NullWriter (line 31) | struct NullWriter;
method write (line 34) | fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
method flush (line 38) | fn flush(&mut self) -> io::Result<()> {
method terminate_ref (line 44) | fn terminate_ref(&mut self, _token: AntiCallToken) -> io::Result<()> {
type InMemoryWriter (line 49) | struct InMemoryWriter {
method write (line 56) | fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
method flush (line 61) | fn flush(&mut self) -> io::Result<()> {
method terminate_ref (line 67) | fn terminate_ref(&mut self, _token: AntiCallToken) -> io::Result<()> {
type NullFileHandle (line 75) | struct NullFileHandle;
method len (line 77) | fn len(&self) -> usize {
method read_bytes (line 82) | fn read_bytes(&self, _range: std::ops::Range<usize>) -> io::Result<Owned...
method get_file_handle (line 88) | fn get_file_handle(&self, path: &Path) -> Result<Arc<dyn FileHandle>, Op...
method delete (line 95) | fn delete(&self, _path: &Path) -> Result<(), DeleteError> {
method exists (line 99) | fn exists(&self, _path: &Path) -> Result<bool, OpenReadError> {
method open_write (line 103) | fn open_write(&self, path: &Path) -> Result<WritePtr, OpenWriteError> {
method atomic_read (line 117) | fn atomic_read(&self, path: &Path) -> Result<Vec<u8>, OpenReadError> {
method atomic_write (line 124) | fn atomic_write(&self, _path: &Path, _data: &[u8]) -> io::Result<()> {
method sync_directory (line 128) | fn sync_directory(&self) -> io::Result<()> {
method watch (line 132) | fn watch(&self, _watch_callback: WatchCallback) -> tantivy::Result<Watch...
type MergeScenario (line 137) | struct MergeScenario {
function build_index (line 145) | fn build_index(
function main (line 199) | fn main() {
FILE: benches/range_queries.rs
type BenchIndex (line 13) | struct BenchIndex {
function build_shared_indices (line 19) | fn build_shared_indices(num_docs: usize, distribution: &str) -> BenchInd...
function main (line 78) | fn main() {
function run_benchmark_tasks (line 164) | fn run_benchmark_tasks(
function add_bench_task_count (line 216) | fn add_bench_task_count(
function add_bench_task_docset (line 237) | fn add_bench_task_docset(
function add_bench_task_top100_asc (line 258) | fn add_bench_task_top100_asc(
function add_bench_task_top100_desc (line 281) | fn add_bench_task_top100_desc(
type CountSearchTask (line 304) | struct CountSearchTask {
method run (line 311) | pub fn run(&self) -> usize {
type DocSetSearchTask (line 316) | struct DocSetSearchTask {
method run (line 323) | pub fn run(&self) -> usize {
type Top100AscSearchTask (line 329) | struct Top100AscSearchTask {
method run (line 337) | pub fn run(&self) -> usize {
type Top100DescSearchTask (line 348) | struct Top100DescSearchTask {
method run (line 356) | pub fn run(&self) -> usize {
FILE: benches/range_query.rs
function main (line 18) | fn main() {
function bench_range_query (line 22) | fn bench_range_query() {
function test_range (line 54) | fn test_range<T: Display>(
function get_index_0_to_100 (line 95) | fn get_index_0_to_100() -> Index {
type Doc (line 122) | pub struct Doc {
function create_index_from_docs (line 128) | pub fn create_index_from_docs(docs: &[Doc]) -> Index {
function get_90_percent (line 184) | fn get_90_percent() -> RangeInclusive<u64> {
function get_10_percent (line 188) | fn get_10_percent() -> RangeInclusive<u64> {
function get_1_percent (line 192) | fn get_1_percent() -> RangeInclusive<u64> {
function get_90_percent_ip (line 196) | fn get_90_percent_ip() -> RangeInclusive<Ipv6Addr> {
function get_10_percent_ip (line 202) | fn get_10_percent_ip() -> RangeInclusive<Ipv6Addr> {
function get_1_percent_ip (line 208) | fn get_1_percent_ip() -> RangeInclusive<Ipv6Addr> {
type NumHits (line 214) | struct NumHits {
method column_title (line 218) | fn column_title() -> &'static str {
method format (line 221) | fn format(&self) -> Option<String> {
function execute_query (line 226) | fn execute_query<T: Display>(
function execute_query_ (line 246) | fn execute_query_(query: &str, index: &Index) -> NumHits {
FILE: benches/regex_all_terms.rs
constant HEAP_SIZE_BYTES (line 23) | const HEAP_SIZE_BYTES: usize = 200_000_000;
type BenchConfig (line 26) | struct BenchConfig {
function main (line 32) | fn main() {
function default_configs (line 59) | fn default_configs() -> Vec<BenchConfig> {
function build_index (line 84) | fn build_index(config: BenchConfig, heap_size_bytes: usize) -> (Index, t...
FILE: benches/str_search_and_get.rs
type BenchIndex (line 25) | struct BenchIndex {
function build_shared_indices (line 31) | fn build_shared_indices(num_docs: usize, distribution: &str) -> BenchInd...
function main (line 116) | fn main() {
function run_benchmark_tasks (line 210) | fn run_benchmark_tasks(
function add_bench_task_count (line 254) | fn add_bench_task_count(
function add_bench_task_docset (line 270) | fn add_bench_task_docset(
function add_bench_task_fetch_all_strings (line 286) | fn add_bench_task_fetch_all_strings(
function add_bench_task_fetch_all_strings_from_doc (line 309) | fn add_bench_task_fetch_all_strings_from_doc(
type CountSearchTask (line 332) | struct CountSearchTask {
method run (line 339) | pub fn run(&self) -> usize {
type DocSetSearchTask (line 344) | struct DocSetSearchTask {
method run (line 351) | pub fn run(&self) -> usize {
type FetchAllStringsSearchTask (line 357) | struct FetchAllStringsSearchTask {
method run (line 364) | pub fn run(&self) -> Vec<String> {
type FetchAllStringsFromDocTask (line 388) | struct FetchAllStringsFromDocTask {
method run (line 395) | pub fn run(&self) -> Vec<String> {
FILE: bitpacker/benches/bench.rs
function create_bitpacked_data (line 13) | fn create_bitpacked_data(bit_width: u8, num_els: u32) -> Vec<u8> {
function bench_bitpacking_read (line 25) | fn bench_bitpacking_read(b: &mut Bencher) {
function bench_blockedbitp_read (line 41) | fn bench_blockedbitp_read(b: &mut Bencher) {
function bench_blockedbitp_create (line 56) | fn bench_blockedbitp_create(b: &mut Bencher) {
FILE: bitpacker/src/bitpacker.rs
type BitPacker (line 6) | pub struct BitPacker {
method new (line 17) | pub fn new() -> BitPacker {
method write (line 25) | pub fn write<TWrite: io::Write + ?Sized>(
method flush (line 49) | pub fn flush<TWrite: io::Write + ?Sized>(&mut self, output: &mut TWrit...
method close (line 60) | pub fn close<TWrite: io::Write + ?Sized>(&mut self, output: &mut TWrit...
method default (line 12) | fn default() -> Self {
type BitUnpacker (line 67) | pub struct BitUnpacker {
method new (line 78) | pub fn new(num_bits: u8) -> BitUnpacker {
method bit_width (line 91) | pub fn bit_width(&self) -> u8 {
method get (line 96) | pub fn get(&self, idx: u32, data: &[u8]) -> u64 {
method get_slow_path (line 114) | fn get_slow_path(&self, addr: usize, bit_shift: u32, data: &[u8]) -> u...
method get_batch_u32s (line 131) | fn get_batch_u32s(&self, start_idx: u32, data: &[u8], output: &mut [u3...
method get_ids_for_value_range (line 196) | pub fn get_ids_for_value_range(
method get_ids_for_value_range_slow (line 215) | fn get_ids_for_value_range_slow(
method get_ids_for_value_range_fast (line 233) | fn get_ids_for_value_range_fast(
function create_bitpacker (line 250) | fn create_bitpacker(len: usize, num_bits: u8) -> (BitUnpacker, Vec<u64>,...
function test_bitpacker_util (line 266) | fn test_bitpacker_util(len: usize, num_bits: u8) {
function test_bitpacker (line 274) | fn test_bitpacker() {
function num_bits_strategy (line 284) | fn num_bits_strategy() -> impl Strategy<Value = u8> {
function vals_strategy (line 288) | fn vals_strategy() -> impl Strategy<Value = (u8, Vec<u64>)> {
function test_bitpacker_aux (line 300) | fn test_bitpacker_aux(num_bits: u8, vals: &[u64]) {
function test_get_batch_panics_over_32_bits (line 329) | fn test_get_batch_panics_over_32_bits() {
function test_get_batch_limit (line 336) | fn test_get_batch_limit() {
function test_get_batch_panics_when_off_scope (line 344) | fn test_get_batch_panics_when_off_scope() {
FILE: bitpacker/src/blocked_bitpacker.rs
constant BLOCK_SIZE (line 5) | const BLOCK_SIZE: usize = 128;
type BlockedBitpacker (line 10) | pub struct BlockedBitpacker {
method new (line 66) | pub fn new() -> Self {
method mem_usage (line 75) | pub fn mem_usage(&self) -> usize {
method add (line 83) | pub fn add(&mut self, val: u64) {
method flush (line 90) | pub fn flush(&mut self) {
method get (line 126) | pub fn get(&self, idx: usize) -> u64 {
method iter (line 140) | pub fn iter(&self) -> impl Iterator<Item = u64> + '_ {
method default (line 18) | fn default() -> Self {
type BlockedBitpackerEntryMetaData (line 30) | struct BlockedBitpackerEntryMetaData {
method new (line 36) | fn new(offset: u64, num_bits: u8, base_value: u64) -> Self {
method offset (line 43) | fn offset(&self) -> u64 {
method num_bits (line 46) | fn num_bits(&self) -> u8 {
method base_value (line 49) | fn base_value(&self) -> u64 {
function metadata_test (line 55) | fn metadata_test() {
function mem_usage (line 61) | fn mem_usage<T>(items: &Vec<T>) -> usize {
function blocked_bitpacker_empty (line 154) | fn blocked_bitpacker_empty() {
function blocked_bitpacker_one (line 159) | fn blocked_bitpacker_one() {
function blocked_bitpacker_test (line 166) | fn blocked_bitpacker_test() {
FILE: bitpacker/src/filter_vec/avx2.rs
constant NUM_LANES (line 10) | const NUM_LANES: usize = 8;
constant HIGHEST_BIT (line 12) | const HIGHEST_BIT: u32 = 1 << 31;
function u32_to_i32 (line 15) | fn u32_to_i32(val: u32) -> i32 {
function u32_to_i32_avx2 (line 20) | unsafe fn u32_to_i32_avx2(vals_u32x8s: DataType) -> DataType {
function filter_vec_in_place (line 25) | pub fn filter_vec_in_place(range: RangeInclusive<u32>, offset: u32, outp...
function filter_vec_avx2_aux (line 48) | unsafe fn filter_vec_avx2_aux(
function compact (line 86) | unsafe fn compact(data: DataType, mask: u8) -> DataType {
function compute_filter_bitset (line 93) | unsafe fn compute_filter_bitset(val: __m256i, range: std::ops::RangeIncl...
function from_u32x8 (line 105) | const fn from_u32x8(vals: [u32; NUM_LANES]) -> DataType {
constant MASK_TO_PERMUTATION (line 109) | const MASK_TO_PERMUTATION: [DataType; 256] = [
FILE: bitpacker/src/filter_vec/mod.rs
type FilterImplPerInstructionSet (line 10) | enum FilterImplPerInstructionSet {
method is_available (line 18) | pub fn is_available(&self) -> bool {
method from (line 40) | fn from(code: u8) -> FilterImplPerInstructionSet {
method filter_vec_in_place (line 49) | fn filter_vec_in_place(self, range: RangeInclusive<u32>, offset: u32, ...
constant IMPLS (line 29) | const IMPLS: [FilterImplPerInstructionSet; 2] = [
constant IMPLS (line 35) | const IMPLS: [FilterImplPerInstructionSet; 1] = [FilterImplPerInstructio...
function get_best_available_instruction_set (line 61) | fn get_best_available_instruction_set() -> FilterImplPerInstructionSet {
function filter_vec_in_place (line 77) | pub fn filter_vec_in_place(range: RangeInclusive<u32>, offset: u32, outp...
function test_get_best_available_instruction_set (line 86) | fn test_get_best_available_instruction_set() {
function test_instruction_set_to_code_from_code (line 95) | fn test_instruction_set_to_code_from_code() {
function test_filter_impl_empty_aux (line 105) | fn test_filter_impl_empty_aux(filter_impl: FilterImplPerInstructionSet) {
function test_filter_impl_simple_aux (line 111) | fn test_filter_impl_simple_aux(filter_impl: FilterImplPerInstructionSet) {
function test_filter_impl_simple_aux_shifted (line 117) | fn test_filter_impl_simple_aux_shifted(filter_impl: FilterImplPerInstruc...
function test_filter_impl_simple_outside_i32_range (line 123) | fn test_filter_impl_simple_outside_i32_range(filter_impl: FilterImplPerI...
function test_filter_impl_test_suite (line 129) | fn test_filter_impl_test_suite(filter_impl: FilterImplPerInstructionSet) {
function test_filter_implementation_avx2 (line 138) | fn test_filter_implementation_avx2() {
function test_filter_implementation_scalar (line 145) | fn test_filter_implementation_scalar() {
FILE: bitpacker/src/filter_vec/scalar.rs
function filter_vec_in_place (line 3) | pub fn filter_vec_in_place(range: RangeInclusive<u32>, offset: u32, outp...
FILE: bitpacker/src/lib.rs
function compute_num_bits (line 34) | pub fn compute_num_bits(n: u64) -> u8 {
function minmax (line 48) | pub fn minmax<I, T>(mut vals: I) -> Option<(T, T)>
function test_compute_num_bits (line 76) | fn test_compute_num_bits() {
function test_minmax_empty (line 88) | fn test_minmax_empty() {
function test_minmax_one (line 94) | fn test_minmax_one() {
function test_minmax_two (line 99) | fn test_minmax_two() {
function test_minmax_nan (line 105) | fn test_minmax_nan() {
function test_minmax_inf (line 121) | fn test_minmax_inf() {
FILE: columnar/benches/bench_access.rs
constant NUM_DOCS (line 7) | const NUM_DOCS: u32 = 2_000_000;
function generate_columnar_and_open (line 9) | pub fn generate_columnar_and_open(card: Card, num_docs: u32) -> Column {
function main (line 17) | fn main() {
function bench_group (line 36) | fn bench_group(mut runner: InputGroup<Column>) {
FILE: columnar/benches/bench_column_values_get.rs
function get_data (line 9) | fn get_data() -> Vec<u64> {
function value_iter (line 24) | fn value_iter() -> impl Iterator<Item = u64> {
type Col (line 28) | type Col = Arc<dyn ColumnValues<u64>>;
function main (line 30) | fn main() {
FILE: columnar/benches/bench_create_column_values.rs
function get_data (line 6) | fn get_data() -> Vec<u64> {
function main (line 20) | fn main() {
FILE: columnar/benches/bench_first_vals.rs
type Columns (line 8) | struct Columns {
function get_test_columns (line 14) | fn get_test_columns() -> Columns {
constant NUM_VALUES (line 57) | const NUM_VALUES: u64 = 100_000;
function generate_permutation (line 58) | fn generate_permutation() -> Vec<u64> {
function serialize_and_load (line 64) | pub fn serialize_and_load(column: &[u64], codec_type: CodecType) -> Arc<...
function main (line 68) | fn main() {
FILE: columnar/benches/bench_merge.rs
constant NUM_DOCS (line 7) | const NUM_DOCS: u32 = 100_000;
function main (line 9) | fn main() {
FILE: columnar/benches/bench_optional_index.rs
constant TOTAL_NUM_VALUES (line 6) | const TOTAL_NUM_VALUES: u32 = 1_000_000;
function gen_optional_index (line 8) | fn gen_optional_index(fill_ratio: f64) -> OptionalIndex {
function random_range_iterator (line 19) | fn random_range_iterator(
function n_percent_step_iterator (line 33) | fn n_percent_step_iterator(percent: f32, num_values: u32) -> impl Iterat...
function walk_over_data (line 40) | fn walk_over_data(codec: &OptionalIndex, avg_step_size: u32) -> Option<u...
function walk_over_data_from_positions (line 47) | fn walk_over_data_from_positions(
function main (line 58) | fn main() {
FILE: columnar/benches/bench_values_u128.rs
function generate_random (line 12) | fn generate_random() -> Vec<u64> {
function get_u128_column_random (line 20) | fn get_u128_column_random() -> Arc<dyn ColumnValues<u128>> {
function get_u128_column_from_data (line 26) | fn get_u128_column_from_data(data: &[u128]) -> Arc<dyn ColumnValues<u128...
constant FIFTY_PERCENT_RANGE (line 33) | const FIFTY_PERCENT_RANGE: RangeInclusive<u64> = 1..=50;
constant SINGLE_ITEM (line 34) | const SINGLE_ITEM: u64 = 90;
constant SINGLE_ITEM_RANGE (line 35) | const SINGLE_ITEM_RANGE: RangeInclusive<u64> = 90..=90;
function get_data_50percent_item (line 37) | fn get_data_50percent_item() -> Vec<u128> {
function main (line 50) | fn main() {
FILE: columnar/benches/bench_values_u64.rs
function generate_permutation (line 10) | fn generate_permutation() -> Vec<u64> {
function generate_permutation_gcd (line 17) | fn generate_permutation_gcd() -> Vec<u64> {
function serialize_and_load (line 23) | pub fn serialize_and_load(column: &[u64], codec_type: CodecType) -> Arc<...
constant FIFTY_PERCENT_RANGE (line 27) | const FIFTY_PERCENT_RANGE: RangeInclusive<u64> = 1..=50;
constant SINGLE_ITEM (line 28) | const SINGLE_ITEM: u64 = 90;
constant SINGLE_ITEM_RANGE (line 29) | const SINGLE_ITEM_RANGE: RangeInclusive<u64> = 90..=90;
constant ONE_PERCENT_ITEM_RANGE (line 30) | const ONE_PERCENT_ITEM_RANGE: RangeInclusive<u64> = 49..=49;
function get_data_50percent_item (line 32) | fn get_data_50percent_item() -> Vec<u128> {
type VecCol (line 46) | type VecCol = (Vec<u64>, Arc<dyn ColumnValues<u64>>);
function bench_access (line 48) | fn bench_access() {
function bench_range (line 106) | fn bench_range() {
function main (line 158) | fn main() {
FILE: columnar/benches/common.rs
type Card (line 8) | pub enum Card {
method fmt (line 16) | fn fmt(&self, f: &mut Formatter) -> fmt::Result {
function generate_columnar_with_name (line 26) | pub fn generate_columnar_with_name(card: Card, num_docs: u32, column_nam...
FILE: columnar/columnar-cli-inspect/src/main.rs
function main (line 7) | fn main() -> io::Result<()> {
function validate_columnar_reader (line 18) | pub fn validate_columnar_reader(reader: &ColumnarReader) {
function open_and_validate_columnar (line 47) | pub fn open_and_validate_columnar(path: &str) -> io::Result<ColumnarRead...
FILE: columnar/columnar-cli/src/main.rs
type JsonStack (line 11) | struct JsonStack {
method push (line 17) | fn push(&mut self, seg: &str) {
method pop (line 24) | fn pop(&mut self) {
method path (line 30) | fn path(&self) -> &str {
function append_json_to_columnar (line 35) | fn append_json_to_columnar(
function main (line 93) | fn main() -> io::Result<()> {
FILE: columnar/src/block_accessor.rs
type ColumnBlockAccessor (line 6) | pub struct ColumnBlockAccessor<T> {
function fetch_block (line 17) | pub fn fetch_block<'a>(&'a mut self, docs: &'a [u32], accessor: &Column<...
function fetch_block_with_missing (line 32) | pub fn fetch_block_with_missing(
function iter_vals (line 62) | pub fn iter_vals(&self) -> impl Iterator<Item = T> + '_ {
function iter_docid_vals (line 73) | pub fn iter_docid_vals<'a>(
function find_missing_docs (line 91) | fn find_missing_docs<F>(docs: &[u32], hits: &[u32], mut callback: F)
function test_find_missing_docs (line 126) | fn test_find_missing_docs() {
function test_find_missing_docs_empty (line 140) | fn test_find_missing_docs_empty() {
function test_find_missing_docs_all_missing (line 154) | fn test_find_missing_docs_all_missing() {
FILE: columnar/src/column/dictionary_encoded.rs
type BytesColumn (line 19) | pub struct BytesColumn {
method fmt (line 25) | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
method empty (line 33) | pub fn empty(num_docs: u32) -> BytesColumn {
method ord_to_bytes (line 44) | pub fn ord_to_bytes(&self, ord: u64, output: &mut Vec<u8>) -> io::Resu...
method num_rows (line 49) | pub fn num_rows(&self) -> RowId {
method term_ords (line 53) | pub fn term_ords(&self, row_id: RowId) -> impl Iterator<Item = u64> + ...
method ords (line 58) | pub fn ords(&self) -> &Column<u64> {
method num_terms (line 62) | pub fn num_terms(&self) -> usize {
method dictionary (line 66) | pub fn dictionary(&self) -> &Dictionary<VoidSSTable> {
method from (line 81) | fn from(str_column: StrColumn) -> BytesColumn {
type StrColumn (line 72) | pub struct StrColumn(BytesColumn);
method fmt (line 75) | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
method wrap (line 87) | pub fn wrap(bytes_column: BytesColumn) -> StrColumn {
method dictionary (line 91) | pub fn dictionary(&self) -> &Dictionary<VoidSSTable> {
method ord_to_str (line 96) | pub fn ord_to_str(&self, term_ord: u64, output: &mut String) -> io::Re...
type Target (line 116) | type Target = BytesColumn;
method deref (line 118) | fn deref(&self) -> &Self::Target {
FILE: columnar/src/column/mod.rs
type Column (line 23) | pub struct Column<T = u64> {
method fmt (line 29) | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
function build_empty_column (line 39) | pub fn build_empty_column(num_docs: u32) -> Column<T> {
function to_u64_monotonic (line 48) | pub fn to_u64_monotonic(self) -> Column<u64> {
function get_cardinality (line 62) | pub fn get_cardinality(&self) -> Cardinality {
function num_docs (line 66) | pub fn num_docs(&self) -> RowId {
function min_value (line 79) | pub fn min_value(&self) -> T {
function max_value (line 83) | pub fn max_value(&self) -> T {
function first (line 88) | pub fn first(&self, doc_id: DocId) -> Option<T> {
function first_vals (line 94) | pub fn first_vals(&self, docids: &[DocId], output: &mut [Option<T>]) {
function row_ids_for_docs (line 125) | pub fn row_ids_for_docs(
function values_for_doc (line 136) | pub fn values_for_doc(&self, doc_id: DocId) -> impl Iterator<Item = T> +...
function get_docids_for_value_range (line 144) | pub fn get_docids_for_value_range(
function first_or_default_col (line 163) | pub fn first_or_default_col(self, default_value: T) -> Arc<dyn ColumnVal...
method serialize (line 172) | fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> std::io::Resul...
method deserialize (line 176) | fn deserialize<R: std::io::Read>(reader: &mut R) -> std::io::Result<Self> {
type FirstValueWithDefault (line 184) | struct FirstValueWithDefault<T: Copy> {
function get_val (line 193) | fn get_val(&self, idx: u32) -> T {
function min_value (line 197) | fn min_value(&self) -> T {
function max_value (line 201) | fn max_value(&self) -> T {
function num_vals (line 205) | fn num_vals(&self) -> u32 {
FILE: columnar/src/column/serialize.rs
function serialize_column_mappable_to_u128 (line 17) | pub fn serialize_column_mappable_to_u128<T: MonotonicallyMappableToU128>(
function serialize_column_mappable_to_u64 (line 28) | pub fn serialize_column_mappable_to_u64<T: MonotonicallyMappableToU64>(
function open_column_u64 (line 43) | pub fn open_column_u64<T: MonotonicallyMappableToU64>(
function open_column_u128 (line 63) | pub fn open_column_u128<T: MonotonicallyMappableToU128>(
function open_column_u128_as_compact_u64 (line 86) | pub fn open_column_u128_as_compact_u64(
function open_column_bytes (line 106) | pub fn open_column_bytes(data: OwnedBytes, format_version: Version) -> i...
function open_column_str (line 118) | pub fn open_column_str(data: OwnedBytes, format_version: Version) -> io:...
FILE: columnar/src/column_index/merge/mod.rs
function detect_cardinality_single_column_index (line 11) | fn detect_cardinality_single_column_index(
function detect_cardinality (line 45) | fn detect_cardinality(
function merge_column_index (line 73) | pub fn merge_column_index<'a>(
function test_detect_cardinality (line 110) | fn test_detect_cardinality() {
function test_merge_index_multivalued_sorted (line 158) | fn test_merge_index_multivalued_sorted() {
function test_merge_index_multivalued_sorted_several_segment (line 187) | fn test_merge_index_multivalued_sorted_several_segment() {
FILE: columnar/src/column_index/merge/shuffled.rs
function merge_column_index_shuffled (line 9) | pub fn merge_column_index_shuffled<'a>(
function merge_column_index_shuffled_optional (line 46) | fn merge_column_index_shuffled_optional<'a>(
type ShuffledIndex (line 56) | struct ShuffledIndex<'a> {
function boxed_iter (line 62) | fn boxed_iter(&self) -> Box<dyn Iterator<Item = u32> + '_> {
function merge_column_index_shuffled_multivalued (line 80) | fn merge_column_index_shuffled_multivalued<'a>(
type ShuffledMultivaluedIndex (line 90) | struct ShuffledMultivaluedIndex<'a> {
function iter_num_values (line 95) | fn iter_num_values<'a>(
function integrate_num_vals (line 119) | fn integrate_num_vals(num_vals: impl Iterator<Item = u32>) -> impl Itera...
function boxed_iter (line 131) | fn boxed_iter(&self) -> Box<dyn Iterator<Item = u32> + '_> {
function test_integrate_num_vals_empty (line 144) | fn test_integrate_num_vals_empty() {
function test_integrate_num_vals_one_el (line 149) | fn test_integrate_num_vals_one_el() {
function test_integrate_num_vals_several (line 154) | fn test_integrate_num_vals_several() {
function test_merge_column_index_optional_shuffle (line 159) | fn test_merge_column_index_optional_shuffle() {
FILE: columnar/src/column_index/merge/stacked.rs
function merge_column_index_stacked (line 13) | pub fn merge_column_index_stacked<'a>(
type StackedDocIdsWithValues (line 35) | struct StackedDocIdsWithValues<'a> {
function boxed_iter (line 41) | fn boxed_iter(&self) -> Box<dyn Iterator<Item = u32> + '_> {
function get_doc_ids_with_values (line 50) | fn get_doc_ids_with_values<'a>(
function stack_doc_ids_with_values (line 83) | fn stack_doc_ids_with_values<'a>(
type StackedStartOffsets (line 97) | struct StackedStartOffsets<'a> {
function get_num_values_iterator (line 102) | fn get_num_values_iterator<'a>(
function boxed_iter (line 128) | fn boxed_iter(&self) -> Box<dyn Iterator<Item = u32> + '_> {
function stack_start_offsets (line 144) | fn stack_start_offsets<'a>(
function make_serializable_multivalued_index (line 154) | fn make_serializable_multivalued_index<'a>(
type StackedOptionalIndex (line 164) | struct StackedOptionalIndex<'a> {
function boxed_iter (line 170) | fn boxed_iter(&self) -> Box<dyn Iterator<Item = RowId> + 'a> {
FILE: columnar/src/column_index/mod.rs
type ColumnIndex (line 24) | pub enum ColumnIndex {
method from (line 36) | fn from(optional_index: OptionalIndex) -> ColumnIndex {
method from (line 42) | fn from(multi_value_index: MultiValueIndex) -> ColumnIndex {
method get_cardinality (line 53) | pub fn get_cardinality(&self) -> Cardinality {
method has_value (line 63) | pub fn has_value(&self, doc_id: DocId) -> bool {
method value_row_ids (line 74) | pub fn value_row_ids(&self, doc_id: DocId) -> Range<RowId> {
method docids_to_rowids (line 97) | pub fn docids_to_rowids(
method docid_range_to_rowids (line 128) | pub fn docid_range_to_rowids(&self, doc_id_range: Range<DocId>) -> Ran...
method select_batch_in_place (line 175) | pub fn select_batch_in_place(&self, doc_id_start: DocId, rank_ids: &mu...
function test_column_index_get_cardinality (line 199) | fn test_column_index_get_cardinality() {
FILE: columnar/src/column_index/multivalued_index.rs
type SerializableMultivalueIndex (line 16) | pub struct SerializableMultivalueIndex<'a> {
function serialize_multivalued_index (line 21) | pub fn serialize_multivalued_index(
function open_multivalued_index (line 46) | pub fn open_multivalued_index(
type MultiValueIndex (line 78) | pub enum MultiValueIndex {
method fmt (line 156) | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
method for_test (line 168) | pub fn for_test(start_offsets: &[RowId]) -> MultiValueIndex {
method get_start_index_column (line 192) | pub fn get_start_index_column(&self) -> &Arc<dyn crate::ColumnValues<R...
method range (line 202) | pub(crate) fn range(&self, doc_id: DocId) -> Range<RowId> {
method num_docs (line 211) | pub fn num_docs(&self) -> u32 {
method iter_non_null_docs (line 219) | pub fn iter_non_null_docs(&self) -> Box<dyn Iterator<Item = DocId> + '...
method select_batch_in_place (line 255) | pub(crate) fn select_batch_in_place(&self, docid_start: DocId, ranks: ...
type MultiValueIndexV1 (line 86) | pub struct MultiValueIndexV1 {
method range (line 94) | pub(crate) fn range(&self, doc_id: DocId) -> Range<RowId> {
method num_docs (line 105) | pub fn num_docs(&self) -> u32 {
method select_batch_in_place (line 120) | pub(crate) fn select_batch_in_place(&self, docid_start: DocId, ranks: ...
type MultiValueIndexV2 (line 150) | pub struct MultiValueIndexV2 {
method range (line 270) | pub(crate) fn range(&self, doc_id: DocId) -> Range<RowId> {
method num_docs (line 281) | pub fn num_docs(&self) -> u32 {
method select_batch_in_place (line 296) | pub(crate) fn select_batch_in_place(&self, docid_start: DocId, ranks: ...
function index_to_pos_helper (line 338) | fn index_to_pos_helper(
function test_positions_to_docid (line 349) | fn test_positions_to_docid() {
function test_range_to_rowids (line 365) | fn test_range_to_rowids() {
FILE: columnar/src/column_index/optional_index/mod.rs
constant DENSE_BLOCK_THRESHOLD (line 19) | const DENSE_BLOCK_THRESHOLD: u32 =
constant ELEMENTS_PER_BLOCK (line 22) | const ELEMENTS_PER_BLOCK: u32 = u16::MAX as u32 + 1;
type BlockMeta (line 25) | struct BlockMeta {
type BlockVariant (line 32) | enum BlockVariant {
method empty (line 38) | pub fn empty() -> Self {
method num_bytes_in_block (line 41) | pub fn num_bytes_in_block(&self) -> u32 {
type OptionalIndex (line 82) | pub struct OptionalIndex {
method fmt (line 96) | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
type SelectCursor (line 177) | type SelectCursor<'b>
method contains (line 182) | fn contains(&self, row_id: RowId) -> bool {
method rank (line 197) | fn rank(&self, doc_id: DocId) -> RowId {
method rank_if_exists (line 218) | fn rank_if_exists(&self, doc_id: DocId) -> Option<RowId> {
method select (line 233) | fn select(&self, rank: RowId) -> RowId {
method select_cursor (line 246) | fn select_cursor(&self) -> OptionalIndexSelectCursor<'_> {
method for_test (line 261) | pub fn for_test(num_rows: RowId, row_ids: &[RowId]) -> OptionalIndex {
method num_docs (line 275) | pub fn num_docs(&self) -> RowId {
method num_non_nulls (line 279) | pub fn num_non_nulls(&self) -> RowId {
method iter_non_null_docs (line 283) | pub fn iter_non_null_docs(&self) -> impl Iterator<Item = RowId> + '_ {
method select_batch (line 289) | pub fn select_batch(&self, ranks: &mut [RowId]) {
method block (line 297) | fn block(&self, block_meta: BlockMeta) -> Block<'_> {
method find_block (line 318) | fn find_block(&self, dense_idx: u32, start_block_pos: u16) -> u16 {
function boxed_iter (line 90) | fn boxed_iter(&self) -> Box<dyn Iterator<Item = u32> + '_> {
type RowAddr (line 108) | struct RowAddr {
function row_addr_from_row_id (line 114) | fn row_addr_from_row_id(row_id: RowId) -> RowAddr {
type BlockSelectCursor (line 121) | enum BlockSelectCursor<'a> {
function select (line 127) | fn select(&mut self, rank: u16) -> u16 {
type OptionalIndexSelectCursor (line 134) | pub struct OptionalIndexSelectCursor<'a> {
function search_and_load_block (line 145) | fn search_and_load_block(&mut self, rank: RowId) {
function select (line 169) | fn select(&mut self, rank: RowId) -> RowId {
type Block (line 333) | enum Block<'a> {
function serialize_optional_index_block (line 338) | fn serialize_optional_index_block(block_els: &[u16], out: &mut impl io::...
function serialize_optional_index (line 348) | pub fn serialize_optional_index<W: io::Write>(
constant SERIALIZED_BLOCK_META_NUM_BYTES (line 402) | const SERIALIZED_BLOCK_META_NUM_BYTES: usize = 4;
type SerializedBlockMeta (line 405) | struct SerializedBlockMeta {
method from_bytes (line 413) | fn from_bytes(bytes: [u8; SERIALIZED_BLOCK_META_NUM_BYTES]) -> Seriali...
method to_bytes (line 424) | fn to_bytes(self) -> [u8; SERIALIZED_BLOCK_META_NUM_BYTES] {
function is_sparse (line 436) | fn is_sparse(num_rows_in_block: u32) -> bool {
function deserialize_optional_index_block_metadatas (line 440) | fn deserialize_optional_index_block_metadatas(
function open_optional_index (line 489) | pub fn open_optional_index(bytes: OwnedBytes) -> io::Result<OptionalInde...
FILE: columnar/src/column_index/optional_index/set.rs
type SetCodec (line 5) | pub trait SetCodec {
method serialize (line 12) | fn serialize(els: impl Iterator<Item = Self::Item>, wrt: impl io::Writ...
method open (line 13) | fn open(data: &[u8]) -> Self::Reader<'_>;
type SelectCursor (line 18) | pub trait SelectCursor<T> {
method select (line 21) | fn select(&mut self, rank: T) -> T;
type Set (line 24) | pub trait Set<T> {
method contains (line 29) | fn contains(&self, el: T) -> bool;
method rank (line 33) | fn rank(&self, el: T) -> T;
method rank_if_exists (line 37) | fn rank_if_exists(&self, el: T) -> Option<T>;
method select (line 45) | fn select(&self, rank: T) -> T;
method select_cursor (line 48) | fn select_cursor(&self) -> Self::SelectCursor<'_>;
FILE: columnar/src/column_index/optional_index/set_block/dense.rs
function get_bit_at (line 8) | fn get_bit_at(input: u64, n: u16) -> bool {
function set_bit_at (line 13) | fn set_bit_at(input: &mut u64, n: u16) {
constant ELEMENTS_PER_MINI_BLOCK (line 26) | const ELEMENTS_PER_MINI_BLOCK: u16 = 64;
constant MINI_BLOCK_BITVEC_NUM_BYTES (line 27) | const MINI_BLOCK_BITVEC_NUM_BYTES: usize = 8;
constant MINI_BLOCK_OFFSET_NUM_BYTES (line 28) | const MINI_BLOCK_OFFSET_NUM_BYTES: usize = 2;
constant MINI_BLOCK_NUM_BYTES (line 29) | pub const MINI_BLOCK_NUM_BYTES: usize = MINI_BLOCK_BITVEC_NUM_BYTES + MI...
constant DENSE_BLOCK_NUM_BYTES (line 32) | pub const DENSE_BLOCK_NUM_BYTES: u32 =
type DenseBlockCodec (line 35) | pub struct DenseBlockCodec;
type Item (line 38) | type Item = u16;
type Reader (line 39) | type Reader<'a> = DenseBlock<'a>;
method serialize (line 41) | fn serialize(els: impl Iterator<Item = u16>, wrt: impl io::Write) -> io:...
method open (line 46) | fn open(data: &[u8]) -> Self::Reader<'_> {
function rank_u64 (line 60) | fn rank_u64(bitvec: u64, el: u16) -> u16 {
function select_u64 (line 68) | fn select_u64(mut bitvec: u64, rank: u16) -> u16 {
type DenseMiniBlock (line 83) | struct DenseMiniBlock {
method from_bytes (line 89) | fn from_bytes(data: [u8; MINI_BLOCK_NUM_BYTES]) -> Self {
method to_bytes (line 95) | fn to_bytes(self) -> [u8; MINI_BLOCK_NUM_BYTES] {
type DenseBlock (line 104) | pub struct DenseBlock<'a>(&'a [u8]);
type DenseBlockSelectCursor (line 106) | pub struct DenseBlockSelectCursor<'a> {
function select (line 113) | fn select(&mut self, rank: u16) -> u16 {
type SelectCursor (line 125) | type SelectCursor<'b>
function contains (line 130) | fn contains(&self, el: u16) -> bool {
function rank_if_exists (line 138) | fn rank_if_exists(&self, el: u16) -> Option<u16> {
function rank (line 152) | fn rank(&self, el: u16) -> u16 {
function select (line 161) | fn select(&self, rank: u16) -> u16 {
function select_cursor (line 169) | fn select_cursor(&self) -> Self::SelectCursor<'_> {
function mini_block (line 179) | fn mini_block(&self, mini_block_id: u16) -> DenseMiniBlock {
function iter_miniblocks (line 189) | fn iter_miniblocks(
function find_miniblock_containing_rank (line 210) | fn find_miniblock_containing_rank(&self, rank: u16, from_block_id: u16) ...
function serialize_dense_codec (line 219) | pub fn serialize_dense_codec(
function test_select_bitvec (line 258) | fn test_select_bitvec() {
function test_count_ones (line 268) | fn test_count_ones() {
function test_dense (line 275) | fn test_dense() {
FILE: columnar/src/column_index/optional_index/set_block/sparse.rs
type SparseBlockCodec (line 3) | pub struct SparseBlockCodec;
type Item (line 6) | type Item = u16;
type Reader (line 7) | type Reader<'a> = SparseBlock<'a>;
method serialize (line 9) | fn serialize(
method open (line 19) | fn open(data: &[u8]) -> Self::Reader<'_> {
type SparseBlock (line 25) | pub struct SparseBlock<'a>(&'a [u8]);
function select (line 29) | fn select(&mut self, rank: u16) -> u16 {
type SelectCursor (line 35) | type SelectCursor<'b>
function contains (line 40) | fn contains(&self, el: u16) -> bool {
function rank_if_exists (line 45) | fn rank_if_exists(&self, el: u16) -> Option<u16> {
function rank (line 50) | fn rank(&self, el: u16) -> u16 {
function select (line 55) | fn select(&self, rank: u16) -> u16 {
function select_cursor (line 61) | fn select_cursor(&self) -> Self::SelectCursor<'_> {
function get_u16 (line 67) | fn get_u16(data: &[u8], byte_position: usize) -> u16 {
function value_at_idx (line 74) | fn value_at_idx(&self, data: &[u8], idx: u16) -> u16 {
function num_vals (line 80) | fn num_vals(&self) -> u16 {
function binary_search (line 87) | fn binary_search(&self, target: u16) -> Result<u16, u16> {
FILE: columnar/src/column_index/optional_index/set_block/tests.rs
function test_set_helper (line 7) | fn test_set_helper<C: SetCodec<Item = u16>>(vals: &[u16]) -> usize {
function test_dense_block_set_u16_empty (line 32) | fn test_dense_block_set_u16_empty() {
function test_dense_block_set_u16_max (line 38) | fn test_dense_block_set_u16_max() {
function test_sparse_block_set_u16_empty (line 44) | fn test_sparse_block_set_u16_empty() {
function test_sparse_block_set_u16_max (line 50) | fn test_sparse_block_set_u16_max() {
function test_simple_translate_codec_codec_idx_to_original_idx_dense (line 75) | fn test_simple_translate_codec_codec_idx_to_original_idx_dense() {
function test_simple_translate_codec_idx_to_original_idx_sparse (line 88) | fn test_simple_translate_codec_idx_to_original_idx_sparse() {
function test_simple_translate_codec_idx_to_original_idx_dense (line 100) | fn test_simple_translate_codec_idx_to_original_idx_dense() {
function test_simple_translate_idx_to_value_idx_dense (line 112) | fn test_simple_translate_idx_to_value_idx_dense() {
function test_simple_translate_idx_to_value_idx_sparse (line 129) | fn test_simple_translate_idx_to_value_idx_sparse() {
FILE: columnar/src/column_index/optional_index/tests.rs
function test_optional_index_bug_2293 (line 8) | fn test_optional_index_bug_2293() {
function test_optional_index_with_num_docs (line 14) | fn test_optional_index_with_num_docs(num_docs: u32) {
function test_dense_block_threshold (line 29) | fn test_dense_block_threshold() {
function random_bitvec (line 33) | fn random_bitvec() -> BoxedStrategy<Vec<bool>> {
function test_with_random_sets_simple (line 58) | fn test_with_random_sets_simple() {
function test_optional_index_trailing_empty_blocks (line 72) | fn test_optional_index_trailing_empty_blocks() {
function test_optional_index_one_block_false (line 77) | fn test_optional_index_one_block_false() {
function test_optional_index_one_block_true (line 84) | fn test_optional_index_one_block_true() {
function boxed_iter (line 91) | fn boxed_iter(&self) -> Box<dyn Iterator<Item = RowId> + 'a> {
function test_null_index (line 102) | fn test_null_index(data: &[bool]) {
function test_optional_index_test_translation (line 130) | fn test_optional_index_test_translation() {
function test_optional_index_translate (line 138) | fn test_optional_index_translate() {
function test_optional_index_small (line 145) | fn test_optional_index_small() {
function test_optional_index_large (line 154) | fn test_optional_index_large() {
function test_optional_index_iter_aux (line 164) | fn test_optional_index_iter_aux(row_ids: &[RowId], num_rows: RowId) {
function test_optional_index_iter_empty (line 175) | fn test_optional_index_iter_empty() {
function test_optional_index_rank_aux (line 179) | fn test_optional_index_rank_aux(row_ids: &[RowId]) {
function test_optional_index_rank (line 194) | fn test_optional_index_rank() {
function test_optional_index_iter_empty_one (line 204) | fn test_optional_index_iter_empty_one() {
function test_optional_index_iter_dense_block (line 210) | fn test_optional_index_iter_dense_block() {
function test_optional_index_for_tests (line 218) | fn test_optional_index_for_tests() {
FILE: columnar/src/column_index/serialize.rs
type SerializableOptionalIndex (line 14) | pub struct SerializableOptionalIndex<'a> {
function from (line 20) | fn from(optional_index: &'a OptionalIndex) -> Self {
type SerializableColumnIndex (line 28) | pub enum SerializableColumnIndex<'a> {
function get_cardinality (line 35) | pub fn get_cardinality(&self) -> Cardinality {
function serialize_column_index (line 45) | pub fn serialize_column_index(
function open_column_index (line 67) | pub fn open_column_index(
FILE: columnar/src/column_values/merge.rs
type MergedColumnValues (line 7) | pub(crate) struct MergedColumnValues<'a, T> {
function boxed_iter (line 14) | fn boxed_iter(&self) -> Box<dyn Iterator<Item = T> + '_> {
FILE: columnar/src/column_values/mod.rs
type ColumnValues (line 48) | pub trait ColumnValues<T: PartialOrd = u64>: Send + Sync + DowncastSync {
method get_val (line 56) | fn get_val(&self, idx: u32) -> T;
method get_vals (line 65) | fn get_vals(&self, indexes: &[u32], output: &mut [T]) {
method get_vals_opt (line 93) | fn get_vals_opt(&self, indexes: &[u32], output: &mut [Option<T>]) {
method get_range (line 121) | fn get_range(&self, start: u64, output: &mut [T]) {
method get_row_ids_for_value_range (line 130) | fn get_row_ids_for_value_range(
method min_value (line 154) | fn min_value(&self) -> T;
method max_value (line 165) | fn max_value(&self) -> T;
method num_vals (line 168) | fn num_vals(&self) -> u32;
method iter (line 171) | fn iter<'a>(&'a self) -> Box<dyn Iterator<Item = T> + 'a> {
type EmptyColumnValues (line 178) | pub struct EmptyColumnValues;
method get_val (line 181) | fn get_val(&self, _idx: u32) -> T {
method min_value (line 185) | fn min_value(&self) -> T {
method max_value (line 189) | fn max_value(&self) -> T {
method num_vals (line 193) | fn num_vals(&self) -> u32 {
function get_val (line 200) | fn get_val(&self, idx: u32) -> T {
function get_vals_opt (line 205) | fn get_vals_opt(&self, indexes: &[u32], output: &mut [Option<T>]) {
function min_value (line 210) | fn min_value(&self) -> T {
function max_value (line 215) | fn max_value(&self) -> T {
function num_vals (line 220) | fn num_vals(&self) -> u32 {
function iter (line 225) | fn iter<'b>(&'b self) -> Box<dyn Iterator<Item = T> + 'b> {
function get_range (line 230) | fn get_range(&self, start: u64, output: &mut [T]) {
function get_row_ids_for_value_range (line 235) | fn get_row_ids_for_value_range(
FILE: columnar/src/column_values/monotonic_column.rs
type MonotonicMappingColumn (line 8) | struct MonotonicMappingColumn<C, T, Input> {
function monotonic_map_column (line 29) | pub fn monotonic_map_column<C, T, Input, Output>(
function get_val (line 54) | fn get_val(&self, idx: u32) -> Output {
function min_value (line 59) | fn min_value(&self) -> Output {
function max_value (line 64) | fn max_value(&self) -> Output {
function num_vals (line 69) | fn num_vals(&self) -> u32 {
function iter (line 73) | fn iter(&self) -> Box<dyn Iterator<Item = Output> + '_> {
function get_row_ids_for_value_range (line 81) | fn get_row_ids_for_value_range(
function test_monotonic_mapping_iter (line 108) | fn test_monotonic_mapping_iter() {
FILE: columnar/src/column_values/monotonic_mapping.rs
type MonotonicallyMappableToU64 (line 11) | pub trait MonotonicallyMappableToU64: 'static + PartialOrd + Debug + Cop...
method to_u64 (line 15) | fn to_u64(self) -> u64;
method from_u64 (line 21) | fn from_u64(val: u64) -> Self;
method to_u64 (line 117) | fn to_u64(self) -> u64 {
method from_u64 (line 122) | fn from_u64(val: u64) -> Self {
method to_u64 (line 129) | fn to_u64(self) -> u64 {
method from_u64 (line 134) | fn from_u64(val: u64) -> Self {
method to_u64 (line 141) | fn to_u64(self) -> u64 {
method from_u64 (line 146) | fn from_u64(val: u64) -> Self {
method to_u64 (line 153) | fn to_u64(self) -> u64 {
method from_u64 (line 158) | fn from_u64(val: u64) -> Self {
method to_u64 (line 165) | fn to_u64(self) -> u64 {
method from_u64 (line 170) | fn from_u64(val: u64) -> RowId {
method to_u64 (line 179) | fn to_u64(self) -> u64 {
method from_u64 (line 184) | fn from_u64(val: u64) -> Self {
type StrictlyMonotonicFn (line 34) | pub trait StrictlyMonotonicFn<External, Internal> {
method mapping (line 36) | fn mapping(&self, inp: External) -> Internal;
method inverse (line 38) | fn inverse(&self, out: Internal) -> External;
type StrictlyMonotonicMappingInverter (line 49) | pub(crate) struct StrictlyMonotonicMappingInverter<T> {
function from (line 53) | fn from(orig_mapping: T) -> Self {
function mapping (line 62) | fn mapping(&self, val: To) -> From {
function inverse (line 67) | fn inverse(&self, val: From) -> To {
type StrictlyMonotonicMappingToInternal (line 73) | pub(crate) struct StrictlyMonotonicMappingToInternal<T> {
function new (line 78) | pub(crate) fn new() -> StrictlyMonotonicMappingToInternal<T> {
function mapping (line 90) | fn mapping(&self, inp: External) -> u128 {
function inverse (line 95) | fn inverse(&self, out: u128) -> External {
function mapping (line 105) | fn mapping(&self, inp: External) -> u64 {
function inverse (line 110) | fn inverse(&self, out: u64) -> External {
function strictly_monotonic_test (line 195) | fn strictly_monotonic_test() {
function test_round_trip (line 205) | fn test_round_trip<T: StrictlyMonotonicFn<K, L>, K: std::fmt::Debug + Eq...
FILE: columnar/src/column_values/monotonic_mapping_u128.rs
type MonotonicallyMappableToU128 (line 6) | pub trait MonotonicallyMappableToU128: 'static + PartialOrd + Copy + Deb...
method to_u128 (line 10) | fn to_u128(self) -> u128;
method from_u128 (line 16) | fn from_u128(val: u128) -> Self;
method to_u128 (line 20) | fn to_u128(self) -> u128 {
method from_u128 (line 24) | fn from_u128(val: u128) -> Self {
method to_u128 (line 30) | fn to_u128(self) -> u128 {
method from_u128 (line 34) | fn from_u128(val: u128) -> Self {
function ip_to_u128 (line 39) | fn ip_to_u128(ip_addr: Ipv6Addr) -> u128 {
FILE: columnar/src/column_values/stats.rs
type ColumnStats (line 11) | pub struct ColumnStats {
method amplitude (line 25) | pub fn amplitude(&self) -> u64 {
method serialize (line 31) | fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
method deserialize (line 39) | fn deserialize<R: io::Read>(reader: &mut R) -> io::Result<Self> {
function test_stats_ser_deser_aux (line 65) | fn test_stats_ser_deser_aux(stats: &ColumnStats, num_bytes: usize) {
function test_stats_serialization (line 74) | fn test_stats_serialization() {
FILE: columnar/src/column_values/u128_based/compact_space/blank_range.rs
type BlankRange (line 11) | pub(crate) struct BlankRange {
type Error (line 15) | type Error = &'static str;
method try_from (line 16) | fn try_from(range: RangeInclusive<u128>) -> Result<Self, Self::Error> {
method blank_size (line 26) | pub(crate) fn blank_size(&self) -> u128 {
method blank_range (line 29) | pub(crate) fn blank_range(&self) -> RangeInclusive<u128> {
method cmp (line 35) | fn cmp(&self, other: &Self) -> std::cmp::Ordering {
method partial_cmp (line 40) | fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
FILE: columnar/src/column_values/u128_based/compact_space/build_compact_space.rs
function get_blanks (line 11) | fn get_blanks(values_sorted: &BTreeSet<u128>) -> BinaryHeap<BlankRange> {
type BlankCollector (line 26) | struct BlankCollector {
method new (line 31) | fn new() -> Self {
method stage_blank (line 37) | fn stage_blank(&mut self, blank: BlankRange) {
method drain (line 41) | fn drain(&mut self) -> impl Iterator<Item = BlankRange> + '_ {
method staged_blanks_sum (line 45) | fn staged_blanks_sum(&self) -> u128 {
method num_staged_blanks (line 48) | fn num_staged_blanks(&self) -> usize {
function num_bits (line 52) | fn num_bits(val: u128) -> u8 {
function get_compact_space (line 58) | pub fn get_compact_space(
type CompactSpaceBuilder (line 158) | struct CompactSpaceBuilder {
method new (line 164) | fn new() -> Self {
method add_blanks (line 172) | fn add_blanks(&mut self, blank: impl Iterator<Item = RangeInclusive<u1...
method is_empty (line 176) | fn is_empty(&self) -> bool {
method finish (line 181) | fn finish(mut self) -> CompactSpace {
function test_binary_heap_pop_order (line 237) | fn test_binary_heap_pop_order() {
function test_worst_case_scenario (line 247) | fn test_worst_case_scenario() {
FILE: columnar/src/column_values/u128_based/compact_space/mod.rs
constant COST_PER_BLANK_IN_BITS (line 34) | const COST_PER_BLANK_IN_BITS: usize = 36;
type CompactSpace (line 37) | pub struct CompactSpace {
method amplitude_compact_space (line 112) | fn amplitude_compact_space(&self) -> u128 {
method get_range_mapping (line 119) | fn get_range_mapping(&self, pos: usize) -> &RangeMapping {
method u128_to_compact (line 125) | fn u128_to_compact(&self, value: u128) -> Result<u32, usize> {
method compact_to_u128 (line 145) | fn compact_to_u128(&self, compact: u32) -> u128 {
type RangeMapping (line 43) | struct RangeMapping {
method range_length (line 48) | fn range_length(&self) -> u32 {
method compact_end (line 53) | fn compact_end(&self) -> u32 {
method serialize (line 59) | fn serialize<W: io::Write + ?Sized>(&self, writer: &mut W) -> io::Result...
method deserialize (line 80) | fn deserialize<R: io::Read>(reader: &mut R) -> io::Result<Self> {
type CompactSpaceCompressor (line 159) | pub struct CompactSpaceCompressor {
method num_vals (line 174) | pub fn num_vals(&self) -> RowId {
method train_from (line 179) | pub fn train_from(iter: impl Iterator<Item = u128>) -> Self {
method write_footer (line 219) | fn write_footer(self, writer: &mut impl Write) -> io::Result<()> {
method compress_into (line 229) | pub fn compress_into(
type IPCodecParams (line 164) | pub struct IPCodecParams {
type CompactSpaceDecompressor (line 255) | pub struct CompactSpaceDecompressor {
method get_val (line 372) | fn get_val(&self, doc: u32) -> u128 {
method min_value (line 376) | fn min_value(&self) -> u128 {
method max_value (line 380) | fn max_value(&self) -> u128 {
method num_vals (line 384) | fn num_vals(&self) -> u32 {
method iter (line 389) | fn iter(&self) -> Box<dyn Iterator<Item = u128> + '_> {
method get_row_ids_for_value_range (line 394) | fn get_row_ids_for_value_range(
method open (line 440) | pub fn open(data: OwnedBytes) -> io::Result<CompactSpaceDecompressor> {
method u128_to_compact (line 458) | fn u128_to_compact(&self, value: u128) -> Result<u32, usize> {
method u128_to_next_compact (line 463) | pub fn u128_to_next_compact(&self, value: u128) -> CompactHit {
method compact_to_u128 (line 477) | fn compact_to_u128(&self, compact: u32) -> u128 {
method iter_compact (line 482) | fn iter_compact(&self) -> impl Iterator<Item = u32> + '_ {
method iter (line 488) | fn iter(&self) -> impl Iterator<Item = u128> + '_ {
method get_compact (line 496) | pub fn get_compact(&self, idx: u32) -> u32 {
method get (line 501) | pub fn get(&self, idx: u32) -> u128 {
method min_value (line 506) | pub fn min_value(&self) -> u128 {
method max_value (line 510) | pub fn max_value(&self) -> u128 {
method get_positions_for_compact_value_range (line 514) | fn get_positions_for_compact_value_range(
method serialize (line 261) | fn serialize<W: io::Write + ?Sized>(&self, writer: &mut W) -> io::Result...
method deserialize (line 276) | fn deserialize<R: io::Read>(reader: &mut R) -> io::Result<Self> {
type CompactHit (line 299) | pub enum CompactHit {
type CompactSpaceU64Accessor (line 315) | pub struct CompactSpaceU64Accessor(CompactSpaceDecompressor);
method open (line 317) | pub(crate) fn open(data: OwnedBytes) -> io::Result<CompactSpaceU64Acce...
method compact_to_u128 (line 322) | pub fn compact_to_u128(&self, compact: u32) -> u128 {
method u128_to_next_compact (line 327) | pub fn u128_to_next_compact(&self, value: u128) -> CompactHit {
method get_val (line 334) | fn get_val(&self, doc: u32) -> u64 {
method min_value (line 339) | fn min_value(&self) -> u64 {
method max_value (line 343) | fn max_value(&self) -> u64 {
method num_vals (line 347) | fn num_vals(&self) -> u32 {
method iter (line 352) | fn iter(&self) -> Box<dyn Iterator<Item = u64> + '_> {
method get_row_ids_for_value_range (line 357) | fn get_row_ids_for_value_range(
function compact_space_test (line 539) | fn compact_space_test() {
function compact_space_amplitude_test (line 574) | fn compact_space_amplitude_test() {
function test_all (line 581) | fn test_all(mut data: OwnedBytes, expected: &[u128]) {
function test_aux_vals (line 610) | fn test_aux_vals(u128_vals: &[u128]) -> OwnedBytes {
function test_range_1 (line 619) | fn test_range_1() {
function test_empty (line 741) | fn test_empty() {
function test_range_2 (line 748) | fn test_range_2() {
function get_positions_for_value_range_helper (line 776) | fn get_positions_for_value_range_helper<C: ColumnValues<T> + ?Sized, T: ...
function test_range_3 (line 787) | fn test_range_3() {
function test_bug1 (line 830) | fn test_bug1() {
function test_bug2 (line 836) | fn test_bug2() {
function test_bug3 (line 842) | fn test_bug3() {
function test_bug4 (line 848) | fn test_bug4() {
function test_first_large_gaps (line 854) | fn test_first_large_gaps() {
function test_u128_to_next_compact (line 860) | fn test_u128_to_next_compact() {
function num_strategy (line 896) | fn num_strategy() -> impl Strategy<Value = u128> {
FILE: columnar/src/column_values/u128_based/mod.rs
type U128Header (line 21) | pub(crate) struct U128Header {
method serialize (line 27) | fn serialize<W: io::Write + ?Sized>(&self, writer: &mut W) -> io::Result...
method deserialize (line 33) | fn deserialize<R: io::Read>(reader: &mut R) -> io::Result<Self> {
function serialize_column_values_u128 (line 44) | pub fn serialize_column_values_u128<T: MonotonicallyMappableToU128>(
type U128FastFieldCodecType (line 70) | pub(crate) enum U128FastFieldCodecType {
method to_code (line 90) | pub(crate) fn to_code(self) -> u8 {
method from_code (line 94) | pub(crate) fn from_code(code: u8) -> Option<Self> {
method serialize (line 77) | fn serialize<W: Write + ?Sized>(&self, wrt: &mut W) -> io::Result<()> {
method deserialize (line 81) | fn deserialize<R: io::Read>(reader: &mut R) -> io::Result<Self> {
function open_u128_mapped (line 103) | pub fn open_u128_mapped<T: MonotonicallyMappableToU128 + Debug>(
function open_u128_as_compact_u64 (line 123) | pub fn open_u128_as_compact_u64(mut bytes: OwnedBytes) -> io::Result<Arc...
function test_serialize_deserialize_u128_header (line 140) | fn test_serialize_deserialize_u128_header() {
function test_serialize_deserialize (line 152) | fn test_serialize_deserialize() {
function test_fastfield_bool_size_bitwidth_1 (line 162) | fn test_fastfield_bool_size_bitwidth_1() {
function test_fastfield_bool_bit_size_bitwidth_0 (line 176) | fn test_fastfield_bool_bit_size_bitwidth_0() {
function test_fastfield_gcd (line 189) | fn test_fastfield_gcd() {
FILE: columnar/src/column_values/u64_based/bitpacked.rs
type BitpackedReader (line 15) | pub struct BitpackedReader {
function div_ceil (line 22) | const fn div_ceil(n: u64, q: NonZeroU64) -> u64 {
function transform_range_before_linear_transformation (line 37) | fn transform_range_before_linear_transformation(
method get_val (line 53) | fn get_val(&self, doc: u32) -> u64 {
method min_value (line 57) | fn min_value(&self) -> u64 {
method max_value (line 61) | fn max_value(&self) -> u64 {
method num_vals (line 65) | fn num_vals(&self) -> RowId {
method get_row_ids_for_value_range (line 69) | fn get_row_ids_for_value_range(
function num_bits (line 90) | fn num_bits(stats: &ColumnStats) -> u8 {
type BitpackedCodecEstimator (line 95) | pub struct BitpackedCodecEstimator;
method collect (line 98) | fn collect(&mut self, _value: u64) {}
method estimate (line 100) | fn estimate(&self, stats: &ColumnStats) -> Option<u64> {
method serialize (line 105) | fn serialize(
type BitpackedCodec (line 123) | pub struct BitpackedCodec;
type ColumnValues (line 126) | type ColumnValues = BitpackedReader;
type Estimator (line 127) | type Estimator = BitpackedCodecEstimator;
method load (line 130) | fn load(mut data: OwnedBytes) -> io::Result<Self::ColumnValues> {
function test_with_codec_data_sets_simple (line 148) | fn test_with_codec_data_sets_simple() {
function test_with_codec_data_sets_simple_gcd (line 153) | fn test_with_codec_data_sets_simple_gcd() {
function test_with_codec_data_sets (line 158) | fn test_with_codec_data_sets() {
function bitpacked_fast_field_rand (line 168) | fn bitpacked_fast_field_rand() {
FILE: columnar/src/column_values/u64_based/blockwise_linear.rs
constant BLOCK_SIZE (line 14) | const BLOCK_SIZE: u32 = 512u32;
type Block (line 17) | struct Block {
method serialize (line 24) | fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
method deserialize (line 30) | fn deserialize<R: io::Read>(reader: &mut R) -> io::Result<Self> {
function compute_num_blocks (line 41) | fn compute_num_blocks(num_vals: u32) -> u32 {
type BlockwiseLinearEstimator (line 45) | pub struct BlockwiseLinearEstimator {
method flush_block_estimate (line 62) | fn flush_block_estimate(&mut self) {
method default (line 52) | fn default() -> Self {
method collect (line 83) | fn collect(&mut self, value: u64) {
method estimate (line 90) | fn estimate(&self, stats: &ColumnStats) -> Option<u64> {
method finalize (line 100) | fn finalize(&mut self) {
method serialize (line 104) | fn serialize(
type BlockwiseLinearCodec (line 168) | pub struct BlockwiseLinearCodec;
type ColumnValues (line 171) | type ColumnValues = BlockwiseLinearReader;
type Estimator (line 173) | type Estimator = BlockwiseLinearEstimator;
method load (line 175) | fn load(mut bytes: OwnedBytes) -> io::Result<Self::ColumnValues> {
type BlockwiseLinearReader (line 198) | pub struct BlockwiseLinearReader {
method get_val (line 206) | fn get_val(&self, idx: u32) -> u64 {
method min_value (line 224) | fn min_value(&self) -> u64 {
method max_value (line 229) | fn max_value(&self) -> u64 {
method num_vals (line 234) | fn num_vals(&self) -> u32 {
function test_with_codec_data_sets_simple (line 245) | fn test_with_codec_data_sets_simple() {
function test_with_codec_data_sets_simple_gcd (line 254) | fn test_with_codec_data_sets_simple_gcd() {
function test_with_codec_data_sets (line 264) | fn test_with_codec_data_sets() {
function test_blockwise_linear_fast_field_rand (line 274) | fn test_blockwise_linear_fast_field_rand() {
FILE: columnar/src/column_values/u64_based/line.rs
constant MID_POINT (line 8) | const MID_POINT: u64 = (1u64 << 32) - 1u64;
type Line (line 19) | pub struct Line {
method eval (line 65) | pub fn eval(&self, x: u32) -> u64 {
method train_from (line 71) | pub fn train_from(
method train (line 135) | pub fn train(ys: &dyn ColumnValues) -> Self {
function compute_slope (line 32) | fn compute_slope(y0: u64, y1: u64, num_vals: NonZeroU32) -> u64 {
method serialize (line 148) | fn serialize<W: io::Write + ?Sized>(&self, writer: &mut W) -> io::Result...
method deserialize (line 154) | fn deserialize<R: io::Read>(reader: &mut R) -> io::Result<Self> {
function test_line_interpol_with_translation (line 171) | fn test_line_interpol_with_translation(ys: &[u64], expected: Option<u64>) {
function test_eval_max_err (line 185) | fn test_eval_max_err(ys: &[u64]) -> Option<u64> {
function test_train (line 194) | fn test_train() {
FILE: columnar/src/column_values/u64_based/linear.rs
constant HALF_SPACE (line 12) | const HALF_SPACE: u64 = u64::MAX / 2;
constant LINE_ESTIMATION_BLOCK_LEN (line 13) | const LINE_ESTIMATION_BLOCK_LEN: usize = 512;
type LinearReader (line 18) | pub struct LinearReader {
method get_val (line 26) | fn get_val(&self, doc: u32) -> u64 {
method min_value (line 33) | fn min_value(&self) -> u64 {
method max_value (line 38) | fn max_value(&self) -> u64 {
method num_vals (line 43) | fn num_vals(&self) -> u32 {
type LinearCodec (line 50) | pub struct LinearCodec;
type LinearParams (line 53) | struct LinearParams {
method serialize (line 59) | fn serialize<W: io::Write + ?Sized>(&self, writer: &mut W) -> io::Result...
method deserialize (line 65) | fn deserialize<R: io::Read>(reader: &mut R) -> io::Result<Self> {
type LinearCodecEstimator (line 75) | pub struct LinearCodecEstimator {
method collect_after_line_estimation (line 160) | fn collect_after_line_estimation(&mut self, line: &Line, value: u64) {
method collect_before_line_estimation (line 173) | fn collect_before_line_estimation(&mut self, value: u64) {
method default (line 86) | fn default() -> LinearCodecEstimator {
method finalize (line 100) | fn finalize(&mut self) {
method estimate (line 109) | fn estimate(&self, stats: &ColumnStats) -> Option<u64> {
method serialize (line 124) | fn serialize(
method collect (line 149) | fn collect(&mut self, value: u64) {
type ColumnValues (line 189) | type ColumnValues = LinearReader;
type Estimator (line 191) | type Estimator = LinearCodecEstimator;
method load (line 193) | fn load(mut data: OwnedBytes) -> io::Result<Self::ColumnValues> {
function test_compression_simple (line 212) | fn test_compression_simple() {
function test_compression (line 220) | fn test_compression() {
function test_with_codec_datasets (line 229) | fn test_with_codec_datasets() {
function linear_interpol_fast_field_test_large_amplitude (line 238) | fn linear_interpol_fast_field_test_large_amplitude() {
function overflow_error_test (line 248) | fn overflow_error_test() {
function linear_interpol_fast_concave_data (line 254) | fn linear_interpol_fast_concave_data() {
function linear_interpol_fast_convex_data (line 259) | fn linear_interpol_fast_convex_data() {
function linear_interpol_fast_field_test_simple (line 264) | fn linear_interpol_fast_field_test_simple() {
function linear_interpol_fast_field_rand (line 270) | fn linear_interpol_fast_field_rand() {
FILE: columnar/src/column_values/u64_based/mod.rs
type ColumnCodecEstimator (line 35) | pub trait ColumnCodecEstimator<T = u64>: 'static {
method collect (line 39) | fn collect(&mut self, value: u64);
method finalize (line 41) | fn finalize(&mut self) {}
method estimate (line 44) | fn estimate(&self, stats: &ColumnStats) -> Option<u64>;
method serialize (line 47) | fn serialize(
type ColumnCodec (line 56) | pub trait ColumnCodec<T: PartialOrd = u64> {
method load (line 63) | fn load(bytes: OwnedBytes) -> io::Result<Self::ColumnValues>;
method estimator (line 66) | fn estimator() -> Self::Estimator {
method boxed_estimator (line 71) | fn boxed_estimator() -> Box<dyn ColumnCodecEstimator> {
type CodecType (line 79) | pub enum CodecType {
method to_code (line 99) | fn to_code(self) -> u8 {
method try_from_code (line 103) | fn try_from_code(code: u8) -> Option<CodecType> {
method load (line 112) | fn load<T: MonotonicallyMappableToU64>(
method estimator (line 137) | pub fn estimator(&self) -> Box<dyn ColumnCodecEstimator> {
constant ALL_U64_CODEC_TYPES (line 92) | pub const ALL_U64_CODEC_TYPES: [CodecType; 3] = [
function load_specific_codec (line 124) | fn load_specific_codec<C: ColumnCodec, T: MonotonicallyMappableToU64>(
function serialize_u64_based_column_values (line 147) | pub fn serialize_u64_based_column_values<T: MonotonicallyMappableToU64>(
function load_u64_based_column_values (line 191) | pub fn load_u64_based_column_values<T: MonotonicallyMappableToU64>(
function serialize_and_load_u64_based_column_values (line 204) | pub fn serialize_and_load_u64_based_column_values<T: MonotonicallyMappab...
FILE: columnar/src/column_values/u64_based/stats_collector.rs
function compute_gcd (line 11) | fn compute_gcd(mut large: NonZeroU64, mut small: NonZeroU64) -> NonZeroU...
type StatsCollector (line 23) | pub struct StatsCollector {
method stats (line 36) | pub fn stats(&self) -> ColumnStats {
method update_increment_gcd (line 52) | fn update_increment_gcd(&mut self, value: u64) {
method set_increment_gcd (line 79) | fn set_increment_gcd(&mut self, gcd: NonZeroU64) {
method collect (line 84) | pub fn collect(&mut self, value: u64) {
function compute_stats (line 102) | fn compute_stats(vals: impl Iterator<Item = u64>) -> ColumnStats {
function find_gcd (line 110) | fn find_gcd(vals: impl Iterator<Item = u64>) -> u64 {
function test_compute_gcd (line 115) | fn test_compute_gcd() {
function test_gcd (line 130) | fn test_gcd() {
function test_stats (line 144) | fn test_stats() {
FILE: columnar/src/column_values/u64_based/tests.rs
function test_serialize_and_load_simple (line 6) | fn test_serialize_and_load_simple() {
function test_empty_column_i64 (line 24) | fn test_empty_column_i64() {
function test_empty_column_u64 (line 42) | fn test_empty_column_u64() {
function test_empty_column_f64 (line 60) | fn test_empty_column_f64() {
function create_and_validate (line 78) | pub(crate) fn create_and_validate<TColumnCodec: ColumnCodec>(
function compression_rate (line 149) | fn compression_rate(num_bytes: u64, num_values: u32) -> f32 {
function relative_difference (line 153) | fn relative_difference(left: u64, right: u64) -> f32 {
function test_small_blockwise_linear_example (line 180) | fn test_small_blockwise_linear_example() {
function num_strategy (line 206) | fn num_strategy() -> impl Strategy<Value = u64> {
function get_codec_test_datasets (line 214) | pub fn get_codec_test_datasets() -> Vec<(Vec<u64>, &'static str)> {
function test_codec (line 235) | fn test_codec<C: ColumnCodec>() {
function test_codec_bitpacking (line 249) | fn test_codec_bitpacking() {
function test_codec_interpolation (line 253) | fn test_codec_interpolation() {
function test_codec_multi_interpolation (line 257) | fn test_codec_multi_interpolation() {
function estimate (line 263) | fn estimate<C: ColumnCodec>(vals: &[u64]) -> Option<f32> {
function estimation_good_interpolation_case (line 280) | fn estimation_good_interpolation_case() {
function estimation_test_bad_interpolation_case_monotonically_increasing (line 295) | fn estimation_test_bad_interpolation_case_monotonically_increasing() {
function test_fast_field_codec_type_to_code (line 310) | fn test_fast_field_codec_type_to_code() {
function test_fastfield_gcd_i64_with_codec (line 321) | fn test_fastfield_gcd_i64_with_codec(codec_type: CodecType, num_vals: us...
function test_fastfield_gcd_i64 (line 353) | fn test_fastfield_gcd_i64() -> io::Result<()> {
function test_fastfield_gcd_u64_with_codec (line 364) | fn test_fastfield_gcd_u64_with_codec(codec_type: CodecType, num_vals: us...
function test_fastfield_gcd_u64 (line 395) | fn test_fastfield_gcd_u64() -> io::Result<()> {
function test_fastfield2 (line 407) | pub fn test_fastfield2() {
FILE: columnar/src/column_values/vec_column.rs
type VecColumn (line 8) | pub struct VecColumn<T = u64> {
function get_val (line 15) | fn get_val(&self, position: u32) -> T {
function iter (line 19) | fn iter(&self) -> Box<dyn Iterator<Item = T> + '_> {
function min_value (line 23) | fn min_value(&self) -> T {
function max_value (line 27) | fn max_value(&self) -> T {
function num_vals (line 31) | fn num_vals(&self) -> u32 {
function get_range (line 35) | fn get_range(&self, start: u64, output: &mut [T]) {
function from (line 41) | fn from(values: Vec<T>) -> Self {
function from (line 51) | fn from(column: VecColumn) -> Self {
FILE: columnar/src/columnar/column_type.rs
type ColumnType (line 14) | pub enum ColumnType {
method fmt (line 26) | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
method to_code (line 54) | pub fn to_code(self) -> u8 {
method is_date_time (line 57) | pub fn is_date_time(&self) -> bool {
method try_from_code (line 61) | pub(crate) fn try_from_code(code: u8) -> Result<ColumnType, InvalidDat...
method from (line 67) | fn from(numerical_type: NumericalType) -> Self {
method numerical_type (line 77) | pub fn numerical_type(&self) -> Option<NumericalType> {
constant COLUMN_TYPES (line 42) | const COLUMN_TYPES: [ColumnType; 8] = [
type HasAssociatedColumnType (line 92) | pub trait HasAssociatedColumnType: 'static + Debug + Send + Sync + Copy ...
method column_type (line 93) | fn column_type() -> ColumnType;
method default_value (line 94) | fn default_value() -> Self;
method column_type (line 98) | fn column_type() -> ColumnType {
method default_value (line 102) | fn default_value() -> Self {
method column_type (line 108) | fn column_type() -> ColumnType {
method default_value (line 112) | fn default_value() -> Self {
method column_type (line 118) | fn column_type() -> ColumnType {
method default_value (line 122) | fn default_value() -> Self {
method column_type (line 128) | fn column_type() -> ColumnType {
method default_value (line 131) | fn default_value() -> Self {
method column_type (line 137) | fn column_type() -> ColumnType {
method default_value (line 140) | fn default_value() -> Self {
method column_type (line 146) | fn column_type() -> ColumnType {
method default_value (line 150) | fn default_value() -> Self {
function test_column_type_to_code (line 161) | fn test_column_type_to_code() {
function test_cardinality_to_code (line 173) | fn test_cardinality_to_code() {
FILE: columnar/src/columnar/format_version.rs
constant VERSION_FOOTER_NUM_BYTES (line 6) | pub const VERSION_FOOTER_NUM_BYTES: usize = MAGIC_BYTES.len() + std::mem...
constant MAGIC_BYTES (line 10) | const MAGIC_BYTES: [u8; 4] = [2, 113, 119, 66];
function footer (line 12) | pub fn footer() -> [u8; VERSION_FOOTER_NUM_BYTES] {
function parse_footer (line 19) | pub fn parse_footer(footer_bytes: [u8; VERSION_FOOTER_NUM_BYTES]) -> Res...
constant CURRENT_VERSION (line 26) | pub const CURRENT_VERSION: Version = Version::V2;
type Version (line 30) | pub enum Version {
method to_bytes (line 45) | fn to_bytes(self) -> [u8; 4] {
method try_from_bytes (line 49) | fn try_from_bytes(bytes: [u8; 4]) -> Result<Version, InvalidData> {
method fmt (line 36) | fn fmt(&self, f: &mut Formatter) -> fmt::Result {
function test_footer_deserialization (line 66) | fn test_footer_deserialization() {
function test_version_serialization (line 72) | fn test_version_serialization() {
FILE: columnar/src/columnar/merge/merge_dict_column.rs
function merge_bytes_or_str_column (line 14) | pub fn merge_bytes_or_str_column(
type RemappedTermOrdinalsValues (line 36) | struct RemappedTermOrdinalsValues<'a> {
method boxed_iter (line 43) | fn boxed_iter(&self) -> Box<dyn Iterator<Item = u64> + '_> {
function boxed_iter_stacked (line 54) | fn boxed_iter_stacked(&self) -> Box<dyn Iterator<Item = u64> + '_> {
function boxed_iter_shuffled (line 75) | fn boxed_iter_shuffled<'b>(
function compute_term_bitset (line 97) | fn compute_term_bitset(column: &BytesColumn, row_bitset: &ReadOnlyBitSet...
function is_term_present (line 108) | fn is_term_present(bitsets: &[Option<BitSet>], term_merger: &TermMerger)...
function serialize_merged_dict (line 121) | fn serialize_merged_dict(
type TermOrdinalMapping (line 196) | struct TermOrdinalMapping {
method add_segment (line 202) | fn add_segment(&mut self, max_term_ord: usize) {
method register_from_to (line 207) | fn register_from_to(&mut self, segment_ord: usize, from_ord: TermOrdin...
method get_segment (line 211) | fn get_segment(&self, segment_ord: u32) -> &[TermOrdinal] {
FILE: columnar/src/columnar/merge/merge_mapping.rs
type StackMergeOrder (line 7) | pub struct StackMergeOrder {
method stack_for_test (line 15) | pub fn stack_for_test(num_rows_per_columnar: &[u32]) -> StackMergeOrder {
method stack (line 25) | pub fn stack(columnars: &[&ColumnarReader]) -> StackMergeOrder {
method num_rows (line 35) | pub fn num_rows(&self) -> RowId {
method offset (line 39) | pub fn offset(&self, columnar_id: usize) -> RowId {
method columnar_range (line 46) | pub fn columnar_range(&self, columnar_id: usize) -> Range<RowId> {
type MergeRowOrder (line 51) | pub enum MergeRowOrder {
method from (line 66) | fn from(stack_merge_order: StackMergeOrder) -> MergeRowOrder {
method from (line 72) | fn from(shuffle_merge_order: ShuffleMergeOrder) -> MergeRowOrder {
method num_rows (line 78) | pub fn num_rows(&self) -> RowId {
type ShuffleMergeOrder (line 86) | pub struct ShuffleMergeOrder {
method for_test (line 92) | pub fn for_test(
method num_rows (line 122) | pub fn num_rows(&self) -> RowId {
method iter_new_to_old_row_addrs (line 126) | pub fn iter_new_to_old_row_addrs(&self) -> impl Iterator<Item = RowAdd...
FILE: columnar/src/columnar/merge/mod.rs
type ColumnTypeCategory (line 34) | pub(crate) enum ColumnTypeCategory {
method from (line 44) | fn from(column_type: ColumnType) -> Self {
function merge_columnar (line 76) | pub fn merge_columnar(
function dynamic_column_to_u64_monotonic (line 119) | fn dynamic_column_to_u64_monotonic(dynamic_column: DynamicColumn) -> Opt...
function merge_column (line 130) | fn merge_column(
type GroupedColumns (line 227) | struct GroupedColumns {
method is_empty (line 234) | fn is_empty(&self) -> bool {
method column_type_after_merge (line 242) | fn column_type_after_merge(&self) -> ColumnType {
type GroupedColumnsHandle (line 268) | struct GroupedColumnsHandle {
method new (line 274) | fn new(num_columnars: usize) -> Self {
method open (line 280) | fn open(self, merge_row_order: &MergeRowOrder) -> io::Result<GroupedCo...
method set_column (line 305) | fn set_column(&mut self, columnar_id: usize, column: DynamicColumnHand...
method require_type (line 310) | fn require_type(&mut self, required_type: ColumnType) -> io::Result<()> {
function merged_numerical_columns_type (line 336) | fn merged_numerical_columns_type<'a>(
function is_empty_after_merge (line 349) | fn is_empty_after_merge(
function group_columns_for_merge (line 397) | fn group_columns_for_merge<'a>(
function coerce_columns (line 424) | fn coerce_columns(
function coerce_column (line 436) | fn coerce_column(column_type: ColumnType, column: DynamicColumn) -> io::...
function min_max_if_numerical (line 463) | fn min_max_if_numerical(column: &DynamicColumn) -> Option<(NumericalValu...
FILE: columnar/src/columnar/merge/term_merger.rs
type TermsWithSegmentOrd (line 9) | pub struct TermsWithSegmentOrd<'a> {
method eq (line 15) | fn eq(&self, other: &Self) -> bool {
method partial_cmp (line 23) | fn partial_cmp(&self, other: &TermsWithSegmentOrd<'a>) -> Option<Orderin...
method cmp (line 29) | fn cmp(&self, other: &TermsWithSegmentOrd<'a>) -> Ordering {
type TermMerger (line 40) | pub struct TermMerger<'a> {
function new (line 47) | pub fn new(term_streams_with_segment: Vec<TermsWithSegmentOrd<'a>>) -> T...
function matching_segments (line 54) | pub(crate) fn matching_segments<'b: 'a>(
function advance_segments (line 62) | fn advance_segments(&mut self) {
function advance (line 75) | pub fn advance(&mut self) -> bool {
function key (line 98) | pub fn key(&self) -> &[u8] {
FILE: columnar/src/columnar/merge/tests.rs
function make_columnar (line 9) | fn make_columnar<T: Into<NumericalValue> + HasAssociatedColumnType + Copy>(
function test_column_coercion_to_u64 (line 26) | fn test_column_coercion_to_u64() {
function test_column_coercion_to_i64 (line 39) | fn test_column_coercion_to_i64() {
function test_group_columns_with_required_column (line 63) | fn test_group_columns_with_required_column() {
function test_group_columns_required_column_with_no_existing_columns (line 74) | fn test_group_columns_required_column_with_no_existing_columns() {
function test_group_columns_required_column_is_above_all_columns_have_the_same_type_rule (line 92) | fn test_group_columns_required_column_is_above_all_columns_have_the_same...
function test_missing_column (line 103) | fn test_missing_column() {
function make_numerical_columnar_multiple_columns (line 129) | fn make_numerical_columnar_multiple_columns(
function make_byte_columnar_multiple_columns (line 151) | fn make_byte_columnar_multiple_columns(
function make_text_columnar_multiple_columns (line 173) | fn make_text_columnar_multiple_columns(columns: &[(&str, &[&[&str]])]) -...
function test_merge_columnar_numbers (line 193) | fn test_merge_columnar_numbers() {
function test_merge_columnar_texts (line 225) | fn test_merge_columnar_texts() {
function test_merge_columnar_byte (line 274) | fn test_merge_columnar_byte() {
function test_merge_columnar_byte_with_missing (line 323) | fn test_merge_columnar_byte_with_missing() {
function test_merge_columnar_different_types (line 381) | fn test_merge_columnar_different_types() {
function test_merge_columnar_different_empty_cardinality (line 447) | fn test_merge_columnar_different_empty_cardinality() {
type ColumnSpec (line 475) | struct ColumnSpec {
type ColumnarSpec (line 482) | struct ColumnarSpec {
function rowid_and_term_strategy (line 489) | fn rowid_and_term_strategy() -> impl Strategy<Value = (RowId, Vec<u8>)> {
function column_spec_strategy (line 504) | fn column_spec_strategy() -> impl Strategy<Value = ColumnSpec> {
function columnar_strategy (line 525) | fn columnar_strategy() -> impl Strategy<Value = ColumnarSpec> {
function columnars_strategy (line 531) | fn columnars_strategy() -> impl Strategy<Value = Vec<ColumnarSpec>> {
function build_columnar (line 536) | fn build_columnar(spec: &ColumnarSpec) -> ColumnarReader {
FILE: columnar/src/columnar/reader/mod.rs
function io_invalid_data (line 12) | fn io_invalid_data(msg: String) -> io::Error {
type ColumnarReader (line 19) | pub struct ColumnarReader {
method fmt (line 27) | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
method open (line 95) | pub fn open<F>(file_slice: F) -> io::Result<ColumnarReader>
method open_inner (line 100) | fn open_inner(file_slice: FileSlice) -> io::Result<ColumnarReader> {
method num_docs (line 120) | pub fn num_docs(&self) -> RowId {
method iter_columns (line 124) | pub fn iter_columns(
method list_columns (line 157) | pub fn list_columns(&self) -> io::Result<Vec<(String, DynamicColumnHan...
method read_columns_async (line 161) | pub async fn read_columns_async(
method read_columns (line 178) | pub fn read_columns(&self, column_name: &str) -> io::Result<Vec<Dynami...
method read_subpath_columns_async (line 184) | pub async fn read_subpath_columns_async(
method read_subpath_columns (line 202) | pub fn read_subpath_columns(&self, root_path: &str) -> io::Result<Vec<...
method num_columns (line 212) | pub fn num_columns(&self) -> usize {
function read_all_columns_in_stream (line 55) | fn read_all_columns_in_stream(
function column_dictionary_prefix_for_column_name (line 80) | fn column_dictionary_prefix_for_column_name(column_name: &str) -> String {
function column_dictionary_prefix_for_subpath (line 89) | fn column_dictionary_prefix_for_subpath(root_path: &str) -> String {
function test_list_columns (line 224) | fn test_list_columns() {
function test_list_columns_strict_typing_prevents_coercion (line 240) | fn test_list_columns_strict_typing_prevents_coercion() {
function test_read_columns (line 254) | fn test_read_columns() {
function test_read_subpath_columns (line 273) | fn test_read_subpath_columns() {
function test_list_columns_strict_typing_panics_on_wrong_types (line 313) | fn test_list_columns_strict_typing_panics_on_wrong_types() {
FILE: columnar/src/columnar/writer/column_operation.rs
type ColumnOperation (line 17) | pub(super) enum ColumnOperation<T> {
type ColumnOperationMetadata (line 23) | struct ColumnOperationMetadata {
method to_code (line 29) | fn to_code(self) -> u8 {
method try_from_code (line 33) | fn try_from_code(code: u8) -> Result<Self, InvalidData> {
type ColumnOperationType (line 46) | enum ColumnOperationType {
method to_code (line 52) | pub fn to_code(self) -> u8 {
method try_from_code (line 56) | pub fn try_from_code(code: u8) -> Result<Self, InvalidData> {
function serialize (line 66) | pub(super) fn serialize(self) -> impl AsRef<[u8]> {
function deserialize (line 95) | pub(super) fn deserialize(bytes: &mut &[u8]) -> Option<Self> {
function from (line 115) | fn from(value: T) -> Self {
type SymbolValue (line 125) | pub(super) trait SymbolValue: Clone + Copy {
method serialize (line 130) | fn serialize(self, buffer: &mut [u8]) -> u8;
method deserialize (line 132) | fn deserialize(bytes: &[u8]) -> Self;
method serialize (line 136) | fn serialize(self, buffer: &mut [u8]) -> u8 {
method deserialize (line 141) | fn deserialize(bytes: &[u8]) -> Self {
method serialize (line 147) | fn serialize(self, buffer: &mut [u8]) -> u8 {
method deserialize (line 152) | fn deserialize(bytes: &[u8]) -> Self {
method deserialize (line 171) | fn deserialize(mut bytes: &[u8]) -> Self {
method serialize (line 197) | fn serialize(self, output: &mut [u8]) -> u8 {
method serialize (line 222) | fn serialize(self, output: &mut [u8]) -> u8 {
method deserialize (line 228) | fn deserialize(bytes: &[u8]) -> Self {
method serialize (line 236) | fn serialize(self, output: &mut [u8]) -> u8 {
method deserialize (line 240) | fn deserialize(bytes: &[u8]) -> Self {
type MiniBuffer (line 159) | struct MiniBuffer {
method as_ref (line 165) | fn as_ref(&self) -> &[u8] {
function compute_num_bytes_for_u64 (line 245) | fn compute_num_bytes_for_u64(val: u64) -> usize {
function encode_zig_zag (line 250) | fn encode_zig_zag(n: i64) -> u64 {
function decode_zig_zag (line 254) | fn decode_zig_zag(n: u64) -> i64 {
function test_zig_zag_aux (line 263) | fn test_zig_zag_aux(val: i64) {
function test_zig_zag (line 273) | fn test_zig_zag() {
function test_column_op_metadata_byte_serialization (line 293) | fn test_column_op_metadata_byte_serialization() {
function ser_deser_symbol (line 306) | fn ser_deser_symbol(column_op: ColumnOperation<NumericalValue>) {
function test_compute_num_bytes_for_u64 (line 317) | fn test_compute_num_bytes_for_u64() {
function test_symbol_serialization (line 327) | fn test_symbol_serialization() {
function test_column_operation_unordered_aux (line 340) | fn test_column_operation_unordered_aux(val: u32, expected_len: usize) {
function test_column_operation_unordered (line 354) | fn test_column_operation_unordered() {
FILE: columnar/src/columnar/writer/column_writers.rs
type DocumentStep (line 11) | enum DocumentStep {
function delta_with_last_doc (line 18) | fn delta_with_last_doc(last_doc_opt: Option<u32>, doc: u32) -> DocumentS...
type ColumnWriter (line 28) | pub struct ColumnWriter {
method operation_iterator (line 41) | pub(super) fn operation_iterator<'a, V: SymbolValue>(
method record (line 56) | pub(super) fn record<S: SymbolValue>(&mut self, doc: RowId, value: S, ...
method get_cardinality (line 80) | pub(crate) fn get_cardinality(&self, num_docs: RowId) -> Cardinality {
method write_symbol (line 88) | fn write_symbol<V: SymbolValue>(
type NumericalColumnWriter (line 100) | pub(crate) struct NumericalColumnWriter {
method force_numerical_type (line 106) | pub fn force_numerical_type(&mut self, numerical_type: NumericalType) {
method numerical_type (line 193) | pub fn numerical_type(&self) -> NumericalType {
method cardinality (line 197) | pub fn cardinality(&self, num_docs: RowId) -> Cardinality {
method record_numerical_value (line 201) | pub fn record_numerical_value(
method operation_iterator (line 211) | pub(super) fn operation_iterator<'a>(
type CompatibleNumericalTypes (line 118) | pub(crate) enum CompatibleNumericalTypes {
method is_type_accepted (line 136) | pub fn is_type_accepted(&self, numerical_type: NumericalType) -> bool {
method accept_value (line 152) | pub fn accept_value(&mut self, numerical_value: NumericalValue) {
method to_numerical_type (line 182) | pub fn to_numerical_type(self) -> NumericalType {
method default (line 127) | fn default() -> CompatibleNumericalTypes {
type StrOrBytesColumnWriter (line 221) | pub(crate) struct StrOrBytesColumnWriter {
method with_dictionary_id (line 235) | pub(crate) fn with_dictionary_id(dictionary_id: u32) -> StrOrBytesColu...
method record_bytes (line 243) | pub(crate) fn record_bytes(
method operation_iterator (line 255) | pub(super) fn operation_iterator<'a>(
function test_delta_with_last_doc (line 269) | fn test_delta_with_last_doc() {
function test_column_writer_coercion_iter_aux (line 281) | fn test_column_writer_coercion_iter_aux(
function test_column_writer_coercion_aux (line 296) | fn test_column_writer_coercion_aux(
function test_column_writer_coercion (line 305) | fn test_column_writer_coercion() {
function test_compatible_numerical_types_static_incompatible_type (line 320) | fn test_compatible_numerical_types_static_incompatible_type() {
function test_compatible_numerical_types_static_different_type_forbidden (line 327) | fn test_compatible_numerical_types_static_different_type_forbidden() {
function test_compatible_numerical_types_static (line 334) | fn test_compatible_numerical_types_static() {
FILE: columnar/src/columnar/writer/mod.rs
type SpareBuffers (line 30) | struct SpareBuffers {
type ColumnarWriter (line 50) | pub struct ColumnarWriter {
method mem_usage (line 64) | pub fn mem_usage(&self) -> usize {
method record_column_type (line 84) | pub fn record_column_type(
method record_numerical (line 151) | pub fn record_numerical<T: Into<NumericalValue> + Copy>(
method record_ip_addr (line 168) | pub fn record_ip_addr(&mut self, doc: RowId, column_name: &str, ip_add...
method record_bool (line 180) | pub fn record_bool(&mut self, doc: RowId, column_name: &str, val: bool) {
method record_datetime (line 192) | pub fn record_datetime(&mut self, doc: RowId, column_name: &str, datet...
method record_str (line 208) | pub fn record_str(&mut self, doc: RowId, column_name: &str, value: &st...
method record_bytes (line 229) | pub fn record_bytes(&mut self, doc: RowId, column_name: &str, value: &...
method serialize (line 249) | pub fn serialize(&mut self, num_docs: RowId, wrt: &mut dyn io::Write) ...
function serialize_bytes_or_str_column (line 395) | fn serialize_bytes_or_str_column(
function serialize_numerical_column (line 438) | fn serialize_numerical_column(
function serialize_bool_column (line 489) | fn serialize_bool_column(
function serialize_ip_addr_column (line 516) | fn serialize_ip_addr_column(
function send_to_serialize_column_mappable_to_u128 (line 539) | fn send_to_serialize_column_mappable_to_u128<
function send_to_serialize_column_mappable_to_u64 (line 584) | fn send_to_serialize_column_mappable_to_u64(
function sort_values_within_row_in_place (line 633) | fn sort_values_within_row_in_place(
function coerce_numerical_symbol (line 645) | fn coerce_numerical_symbol<T>(
function consume_operation_iterator (line 657) | fn consume_operation_iterator<T: Ord, TIndexBuilder: IndexBuilder>(
function test_column_writer_required_simple (line 683) | fn test_column_writer_required_simple() {
function test_column_writer_optional_cardinality_missing_first (line 713) | fn test_column_writer_optional_cardinality_missing_first() {
function test_column_writer_optional_cardinality_missing_last (line 737) | fn test_column_writer_optional_cardinality_missing_last() {
function test_column_writer_multivalued (line 755) | fn test_column_writer_multivalued() {
FILE: columnar/src/columnar/writer/serializer.rs
type ColumnarSerializer (line 12) | pub struct ColumnarSerializer<W: io::Write> {
function prepare_key (line 20) | fn prepare_key(key: &[u8], column_type: ColumnType, buffer: &mut Vec<u8>) {
function new (line 28) | pub(crate) fn new(wrt: W) -> ColumnarSerializer<W> {
function start_serialize_column (line 39) | pub fn start_serialize_column<'a>(
function finalize (line 52) | pub(crate) fn finalize(mut self, num_rows: RowId) -> io::Result<()> {
type ColumnSerializer (line 65) | pub struct ColumnSerializer<'a, W: io::Write> {
function finalize (line 71) | pub fn finalize(self) -> io::Result<()> {
function write (line 84) | fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
function flush (line 88) | fn flush(&mut self) -> io::Result<()> {
function write_all (line 92) | fn write_all(&mut self, buf: &[u8]) -> io::Result<()> {
FILE: columnar/src/columnar/writer/value_index.rs
type IndexBuilder (line 12) | pub(crate) trait IndexBuilder {
method record_row (line 13) | fn record_row(&mut self, doc: RowId);
method record_value (line 15) | fn record_value(&mut self) {}
method record_row (line 24) | fn record_row(&mut self, _doc: RowId) {}
method record_row (line 51) | fn record_row(&mut self, doc: RowId) {
method record_row (line 95) | fn record_row(&mut self, row_id: RowId) {
method record_value (line 100) | fn record_value(&mut self) {
type FullIndexBuilder (line 20) | pub struct FullIndexBuilder;
type OptionalIndexBuilder (line 28) | pub struct OptionalIndexBuilder {
method finish (line 33) | pub fn finish(&mut self, num_rows: RowId) -> impl Iterable<RowId> + '_ {
method reset (line 44) | fn reset(&mut self) {
type MultivaluedIndexBuilder (line 64) | pub struct MultivaluedIndexBuilder {
method finish (line 73) | pub fn finish(&mut self, num_docs: RowId) -> SerializableMultivalueInd...
method reset (line 85) | fn reset(&mut self) {
type PreallocatedIndexBuilders (line 113) | pub struct PreallocatedIndexBuilders {
method borrow_required_index_builder (line 120) | pub fn borrow_required_index_builder(&mut self) -> &mut FullIndexBuild...
method borrow_optional_index_builder (line 124) | pub fn borrow_optional_index_builder(&mut self) -> &mut OptionalIndexB...
method borrow_multivalued_index_builder (line 129) | pub fn borrow_multivalued_index_builder(&mut self) -> &mut Multivalued...
function test_optional_value_index_builder (line 140) | fn test_optional_value_index_builder() {
function test_multivalued_value_index_builder_simple (line 164) | fn test_multivalued_value_index_builder_simple() {
function test_multivalued_value_index_builder (line 190) | fn test_multivalued_value_index_builder() {
FILE: columnar/src/compat_tests.rs
constant NUM_DOCS (line 10) | const NUM_DOCS: u32 = u16::MAX as u32;
function generate_columnar (line 12) | fn generate_columnar(num_docs: u32, value_offset: u64) -> Vec<u8> {
function create_format (line 37) | fn create_format() {
function path_for_version (line 47) | fn path_for_version(version: &str) -> String {
function test_format_v1 (line 52) | fn test_format_v1() {
function test_format_v2 (line 58) | fn test_format_v2() {
function test_format (line 63) | fn test_format(path: &str) {
function check_columns (line 79) | fn check_columns(reader: &ColumnarReader) {
type RowIdAndValue (line 114) | struct RowIdAndValue {
method from (line 119) | fn from((row_id, value): (u32, u64)) -> Self {
function check_column (line 124) | fn check_column<F: Fn(u32) -> Vec<RowIdAndValue>>(column: &Column<u64>, ...
function open_column (line 173) | fn open_column(reader: &ColumnarReader, name: &str) -> Column<u64> {
FILE: columnar/src/dictionary.rs
type TermIdMapping (line 6) | pub(crate) struct TermIdMapping {
method to_ord (line 11) | pub fn to_ord(&self, unordered: UnorderedId) -> OrderedId {
type UnorderedId (line 20) | pub struct UnorderedId(pub u32);
type OrderedId (line 23) | pub struct OrderedId(pub u32);
type DictionaryBuilder (line 33) | pub(crate) struct DictionaryBuilder {
method get_or_allocate_id (line 40) | pub fn get_or_allocate_id(&mut self, term: &[u8], arena: &mut MemoryAr...
method serialize (line 56) | pub fn serialize<'a, W: io::Write + 'a>(
method mem_usage (line 79) | pub(crate) fn mem_usage(&self) -> usize {
function test_dictionary_builder (line 89) | fn test_dictionary_builder() {
FILE: columnar/src/dynamic_column.rs
type DynamicColumn (line 15) | pub enum DynamicColumn {
method fmt (line 27) | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
method column_index (line 44) | pub fn column_index(&self) -> &ColumnIndex {
method get_cardinality (line 57) | pub fn get_cardinality(&self) -> Cardinality {
method num_values (line 61) | pub fn num_values(&self) -> u32 {
method column_type (line 74) | pub fn column_type(&self) -> ColumnType {
method coerce_numerical (line 87) | pub fn coerce_numerical(self, target_numerical_type: NumericalType) ->...
method is_numerical (line 95) | pub fn is_numerical(&self) -> bool {
method is_f64 (line 99) | pub fn is_f64(&self) -> bool {
method is_i64 (line 102) | pub fn is_i64(&self) -> bool {
method is_u64 (line 105) | pub fn is_u64(&self) -> bool {
method coerce_to_f64 (line 109) | fn coerce_to_f64(self) -> Option<DynamicColumn> {
method coerce_to_i64 (line 123) | fn coerce_to_i64(self) -> Option<DynamicColumn> {
method coerce_to_u64 (line 138) | fn coerce_to_u64(self) -> Option<DynamicColumn> {
type MapI64ToF64 (line 155) | struct MapI64ToF64;
method mapping (line 158) | fn mapping(&self, inp: i64) -> f64 {
method inverse (line 162) | fn inverse(&self, out: f64) -> i64 {
type MapU64ToF64 (line 167) | struct MapU64ToF64;
method mapping (line 170) | fn mapping(&self, inp: u64) -> f64 {
method inverse (line 174) | fn inverse(&self, out: f64) -> u64 {
type MapU64ToI64 (line 179) | struct MapU64ToI64;
method mapping (line 182) | fn mapping(&self, inp: u64) -> i64 {
method inverse (line 186) | fn inverse(&self, out: i64) -> u64 {
type MapI64ToU64 (line 191) | struct MapI64ToU64;
method mapping (line 194) | fn mapping(&self, inp: i64) -> u64 {
method inverse (line 198) | fn inverse(&self, out: u64) -> i64 {
type DynamicColumnHandle (line 233) | pub struct DynamicColumnHandle {
method open (line 241) | pub fn open(&self) -> io::Result<DynamicColumn> {
method file_slice (line 247) | pub fn file_slice(&self) -> &FileSlice {
method open_u64_lenient (line 261) | pub fn open_u64_lenient(&self) -> io::Result<Option<Column<u64>>> {
method open_internal (line 288) | fn open_internal(&self, column_bytes: OwnedBytes) -> io::Result<Dynami...
method num_bytes (line 320) | pub fn num_bytes(&self) -> ByteCount {
method column_and_dictionary_num_bytes (line 325) | pub fn column_and_dictionary_num_bytes(&self) -> io::Result<ColumnSpac...
method space_usage (line 336) | pub fn space_usage(&self) -> io::Result<ColumnSpaceUsage> {
method column_type (line 355) | pub fn column_type(&self) -> ColumnType {
type ColumnSpaceUsage (line 366) | pub struct ColumnSpaceUsage {
method new (line 372) | pub(crate) fn new(
method column_num_bytes (line 382) | pub fn column_num_bytes(&self) -> ByteCount {
method dictionary_num_bytes (line 386) | pub fn dictionary_num_bytes(&self) -> Option<ByteCount> {
method total_num_bytes (line 390) | pub fn total_num_bytes(&self) -> ByteCount {
method merge (line 395) | pub fn merge(&self, other: &ColumnSpaceUsage) -> ColumnSpaceUsage {
FILE: columnar/src/iterable.rs
type Iterable (line 6) | pub trait Iterable<T = u64> {
method boxed_iter (line 7) | fn boxed_iter(&self) -> Box<dyn Iterator<Item = T> + '_>;
method boxed_iter (line 25) | fn boxed_iter(&self) -> Box<dyn Iterator<Item = u64> + '_> {
function boxed_iter (line 11) | fn boxed_iter(&self) -> Box<dyn Iterator<Item = T> + '_> {
function boxed_iter (line 19) | fn boxed_iter(&self) -> Box<dyn Iterator<Item = T> + '_> {
FILE: columnar/src/lib.rs
type RowId (line 53) | pub type RowId = u32;
type DocId (line 54) | pub type DocId = u32;
type RowAddr (line 57) | pub struct RowAddr {
type Streamer (line 63) | pub type Streamer<'a> = sstable::Streamer<'a, VoidSSTable>;
type InvalidData (line 68) | pub struct InvalidData;
function from (line 71) | fn from(_: InvalidData) -> Self {
type Cardinality (line 82) | pub enum Cardinality {
method is_optional (line 105) | pub fn is_optional(&self) -> bool {
method is_multivalue (line 108) | pub fn is_multivalue(&self) -> bool {
method is_full (line 111) | pub fn is_full(&self) -> bool {
method to_code (line 114) | pub(crate) fn to_code(self) -> u8 {
method try_from_code (line 117) | pub(crate) fn try_from_code(code: u8) -> Result<Cardinality, InvalidDa...
method fmt (line 94) | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
FILE: columnar/src/tests.rs
function test_dataframe_writer_str (line 19) | fn test_dataframe_writer_str() {
function test_dataframe_writer_bytes (line 33) | fn test_dataframe_writer_bytes() {
function test_dataframe_writer_bool (line 47) | fn test_dataframe_writer_bool() {
function test_dataframe_writer_u64_multivalued (line 68) | fn test_dataframe_writer_u64_multivalued() {
function test_dataframe_writer_ip_addr (line 95) | fn test_dataframe_writer_ip_addr() {
function test_dataframe_writer_numerical (line 125) | fn test_dataframe_writer_numerical() {
function test_dictionary_encoded_str (line 157) | fn test_dictionary_encoded_str() {
function test_dictionary_encoded_bytes (line 191) | fn test_dictionary_encoded_bytes() {
function num_strategy (line 235) | fn num_strategy() -> impl Strategy<Value = NumericalValue> {
type ColumnValue (line 250) | enum ColumnValue {
method from (line 260) | fn from(val: T) -> ColumnValue {
method column_type_category (line 266) | pub(crate) fn column_type_category(&self) -> ColumnTypeCategory {
function column_name_strategy (line 278) | fn column_name_strategy() -> impl Strategy<Value = &'static str> {
function string_strategy (line 282) | fn string_strategy() -> impl Strategy<Value = &'static str> {
function bytes_strategy (line 286) | fn bytes_strategy() -> impl Strategy<Value = &'static [u8]> {
function column_value_strategy (line 291) | fn column_value_strategy() -> impl Strategy<Value = ColumnValue> {
function doc_strategy (line 313) | fn doc_strategy() -> impl Strategy<Value = Vec<(&'static str, ColumnValu...
function num_docs_strategy (line 317) | fn num_docs_strategy() -> impl Strategy<Value = usize> {
function columnar_docs_strategy (line 327) | fn columnar_docs_strategy() -> impl Strategy<Value = Vec<Vec<(&'static s...
function permutation_and_subset_strategy (line 332) | fn permutation_and_subset_strategy(n: usize) -> impl Strategy<Value = Ve...
function build_columnar_with_mapping (line 337) | fn build_columnar_with_mapping(docs: &[Vec<(&'static str, ColumnValue)>]...
function build_columnar (line 370) | fn build_columnar(docs: &[Vec<(&'static str, ColumnValue)>]) -> Columnar...
function assert_columnar_eq_strict (line 374) | fn assert_columnar_eq_strict(left: &ColumnarReader, right: &ColumnarRead...
function assert_columnar_eq (line 378) | fn assert_columnar_eq(
function assert_column_eq (line 396) | fn assert_column_eq<T: Copy + PartialOrd + Debug + Send + Sync + 'static>(
function assert_bytes_column_eq (line 416) | fn assert_bytes_column_eq(left: &BytesColumn, right: &BytesColumn) {
function assert_dyn_column_eq (line 436) | fn assert_dyn_column_eq(
type AssertEqualToColumnValue (line 487) | trait AssertEqualToColumnValue {
method assert_equal_to_column_value (line 488) | fn assert_equal_to_column_value(&self, column_value: &ColumnValue);
method assert_equal_to_column_value (line 492) | fn assert_equal_to_column_value(&self, column_value: &ColumnValue) {
method assert_equal_to_column_value (line 501) | fn assert_equal_to_column_value(&self, column_value: &ColumnValue) {
method assert_equal_to_column_value (line 510) | fn assert_equal_to_column_value(&self, column_value: &ColumnValue) {
method assert_equal_to_column_value (line 519) | fn assert_equal_to_column_value(&self, column_value: &ColumnValue) {
function assert_column_values (line 527) | fn assert_column_values<
function assert_bytes_column_values (line 549) | fn assert_bytes_column_values(
function test_columnar_merging_empty_columnar (line 653) | fn test_columnar_merging_empty_columnar() {
function test_columnar_merging_number_columns (line 678) | fn test_columnar_merging_number_columns() {
function columnar_docs_and_remap (line 719) | fn columnar_docs_and_remap()
function test_columnar_merge_and_remap (line 751) | fn test_columnar_merge_and_remap(
function test_columnar_merge_and_remap_bug_1 (line 785) | fn test_columnar_merge_and_remap_bug_1() {
function test_columnar_merge_empty (line 808) | fn test_columnar_merge_empty() {
function test_columnar_merge_single_str_column (line 828) | fn test_columnar_merge_single_str_column() {
function test_delete_decrease_cardinality (line 854) | fn test_delete_decrease_cardinality() {
FILE: columnar/src/utils.rs
function compute_mask (line 1) | const fn compute_mask(num_bits: u8) -> u8 {
function select_bits (line 11) | pub(crate) fn select_bits<const START: u8, const END: u8>(code: u8) -> u8 {
function place_bits (line 21) | pub(crate) fn place_bits<const START: u8, const END: u8>(code: u8) -> u8 {
function pop_first_byte (line 32) | pub fn pop_first_byte(bytes: &mut &[u8]) -> Option<u8> {
function test_select_bits (line 46) | fn test_select_bits() {
function test_place_bits (line 55) | fn test_place_bits() {
function test_place_bits_overflows (line 63) | fn test_place_bits_overflows() {
function test_pop_first_byte (line 68) | fn test_pop_first_byte() {
FILE: columnar/src/value.rs
type NumericalValue (line 8) | pub enum NumericalValue {
method numerical_type (line 32) | pub fn numerical_type(&self) -> NumericalType {
method normalize (line 42) | pub fn normalize(self) -> Self {
method from (line 67) | fn from(val: u64) -> NumericalValue {
method from (line 73) | fn from(val: i64) -> Self {
method from (line 79) | fn from(val: f64) -> Self {
type Err (line 15) | type Err = ();
method from_str (line 17) | fn from_str(s: &str) -> Result<Self, ()> {
type NumericalType (line 86) | pub enum NumericalType {
method to_code (line 94) | pub fn to_code(self) -> u8 {
method try_from_code (line 98) | pub fn try_from_code(code: u8) -> Result<NumericalType, InvalidData> {
type Coerce (line 119) | pub(crate) trait Coerce {
method coerce (line 120) | fn coerce(numerical_value: NumericalValue) -> Self;
method coerce (line 124) | fn coerce(value: NumericalValue) -> Self {
method coerce (line 134) | fn coerce(value: NumericalValue) -> Self {
method coerce (line 144) | fn coerce(value: NumericalValue) -> Self {
method coerce (line 154) | fn coerce(value: NumericalValue) -> Self {
function test_numerical_type_code (line 166) | fn test_numerical_type_code() {
function test_parse_numerical (line 178) | fn test_parse_numerical() {
function test_normalize_numerical (line 202) | fn test_normalize_numerical() {
FILE: common/benches/bench.rs
function bench_vint (line 6) | fn bench_vint() {
function bench_bitset (line 32) | fn bench_bitset() {
function main (line 61) | fn main() {
FILE: common/src/bitset.rs
type TinySet (line 9) | pub struct TinySet(u64);
method fmt (line 12) | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
method serialize (line 36) | pub fn serialize<T: Write>(&self, writer: &mut T) -> io::Result<()> {
method into_bytes (line 40) | pub fn into_bytes(self) -> [u8; 8] {
method deserialize (line 45) | pub fn deserialize(data: [u8; 8]) -> Self {
method empty (line 52) | pub fn empty() -> TinySet {
method full (line 58) | pub fn full() -> TinySet {
method clear (line 62) | pub fn clear(&mut self) {
method complement (line 71) | fn complement(self) -> TinySet {
method contains (line 77) | pub fn contains(self, el: u32) -> bool {
method len (line 83) | pub fn len(self) -> u32 {
method intersect (line 90) | pub fn intersect(self, other: TinySet) -> TinySet {
method singleton (line 97) | pub fn singleton(el: u32) -> TinySet {
method insert (line 104) | pub fn insert(self, el: u32) -> TinySet {
method remove (line 111) | pub fn remove(self, el: u32) -> TinySet {
method insert_mut (line 119) | pub fn insert_mut(&mut self, el: u32) -> bool {
method remove_mut (line 129) | pub fn remove_mut(&mut self, el: u32) -> bool {
method union (line 138) | pub fn union(self, other: TinySet) -> TinySet {
method is_empty (line 144) | pub fn is_empty(self) -> bool {
method pop_lowest (line 151) | pub fn pop_lowest(&mut self) -> Option<u32> {
method range_lower (line 165) | pub fn range_lower(upper_bound: u32) -> TinySet {
method range_greater_or_equal (line 173) | pub fn range_greater_or_equal(from_included: u32) -> TinySet {
type TinySetIterator (line 17) | pub struct TinySetIterator(TinySet);
type Item (line 19) | type Item = u32;
method next (line 22) | fn next(&mut self) -> Option<Self::Item> {
type Item (line 28) | type Item = u32;
type IntoIter (line 29) | type IntoIter = TinySetIterator;
method into_iter (line 30) | fn into_iter(self) -> Self::IntoIter {
type BitSet (line 179) | pub struct BitSet {
method fmt (line 185) | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
method serialize (line 199) | pub fn serialize<T: Write>(&self, writer: &mut T) -> io::Result<()> {
method with_max_value (line 210) | pub fn with_max_value(max_value: u32) -> BitSet {
method with_max_value_and_full (line 222) | pub fn with_max_value_and_full(max_value: u32) -> BitSet {
method clear (line 239) | pub fn clear(&mut self) {
method intersect_update (line 246) | pub fn intersect_update(&mut self, other: &ReadOnlyBitSet) {
method intersect_update_with_iter (line 251) | fn intersect_update_with_iter(&mut self, other: impl Iterator<Item = T...
method len (line 261) | pub fn len(&self) -> usize {
method insert (line 267) | pub fn insert(&mut self, el: u32) {
method remove (line 276) | pub fn remove(&mut self, el: u32) {
method contains (line 285) | pub fn contains(&self, el: u32) -> bool {
method first_non_empty_bucket (line 294) | pub fn first_non_empty_bucket(&self, bucket: u32) -> Option<u32> {
method max_value (line 303) | pub fn max_value(&self) -> u32 {
method tinyset (line 310) | pub fn tinyset(&self, bucket: u32) -> TinySet {
function num_buckets (line 193) | fn num_buckets(max_val: u32) -> u32 {
type ReadOnlyBitSet (line 317) | pub struct ReadOnlyBitSet {
method open (line 340) | pub fn open(data: OwnedBytes) -> Self {
method len (line 349) | pub fn len(&self) -> usize {
method iter_tinysets (line 357) | fn iter_tinysets(&self) -> impl Iterator<Item = TinySet> + '_ {
method iter (line 366) | pub fn iter(&self) -> impl Iterator<Item = u32> + '_ {
method contains (line 380) | pub fn contains(&self, el: u32) -> bool {
method max_value (line 393) | pub fn max_value(&self) -> u32 {
method num_bytes (line 398) | pub fn num_bytes(&self) -> ByteCount {
method from (line 404) | fn from(bitset: &'a BitSet) -> ReadOnlyBitSet {
function intersect_bitsets (line 322) | pub fn intersect_bitsets(left: &ReadOnlyBitSet, other: &ReadOnlyBitSet) ...
function test_read_serialized_bitset_full_multi (line 426) | fn test_read_serialized_bitset_full_multi() {
function test_read_serialized_bitset_full_block (line 438) | fn test_read_serialized_bitset_full_block() {
function test_read_serialized_bitset_full (line 448) | fn test_read_serialized_bitset_full() {
function test_bitset_intersect (line 459) | fn test_bitset_intersect() {
function test_read_serialized_bitset_empty (line 499) | fn test_read_serialized_bitset_empty() {
function test_tiny_set_remove (line 518) | fn test_tiny_set_remove() {
function test_tiny_set (line 545) | fn test_tiny_set() {
function test_bitset (line 583) | fn test_bitset() {
function test_bitset_num_buckets (line 619) | fn test_bitset_num_buckets() {
function test_tinyset_range (line 630) | fn test_tinyset_range() {
function test_bitset_len (line 657) | fn test_bitset_len() {
function sample_with_seed (line 680) | pub fn sample_with_seed(n: u32, ratio: f64, seed_val: u8) -> Vec<u32> {
function sample (line 689) | pub fn sample(n: u32, ratio: f64) -> Vec<u32> {
function test_bitset_clear (line 694) | fn test_bitset_clear() {
FILE: common/src/bounds.rs
type BoundsRange (line 5) | pub struct BoundsRange<T> {
function new (line 10) | pub fn new(lower_bound: Bound<T>, upper_bound: Bound<T>) -> Self {
function is_unbounded (line 16) | pub fn is_unbounded(&self) -> bool {
function map_bound (line 19) | pub fn map_bound<TTo>(&self, transform: impl Fn(&T) -> TTo) -> BoundsRan...
function map_bound_res (line 26) | pub fn map_bound_res<TTo, Err>(
function transform_inner (line 36) | pub fn transform_inner<TTo>(
function get_inner (line 48) | pub fn get_inner(&self) -> Option<&T> {
type TransformBound (line 53) | pub enum TransformBound<T> {
function transform_bound_inner_res (line 62) | pub fn transform_bound_inner_res<TFrom, TTo>(
function transform_bound_inner (line 82) | pub fn transform_bound_inner<TFrom, TTo>(
function inner_bound (line 101) | pub fn inner_bound<T>(val: &Bound<T>) -> Option<&T> {
function map_bound (line 108) | pub fn map_bound<TFrom, TTo>(
function map_bound_res (line 120) | pub fn map_bound_res<TFrom, TTo, Err>(
FILE: common/src/byte_count.rs
type ByteCount (line 8) | pub struct ByteCount(u64);
method fmt (line 11) | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
method fmt (line 17) | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
method get_bytes (line 32) | pub fn get_bytes(&self) -> u64 {
method human_readable (line 36) | pub fn human_readable(&self) -> String {
method from (line 48) | fn from(value: u64) -> Self {
method from (line 53) | fn from(value: usize) -> Self {
method eq (line 67) | fn eq(&self, other: &u64) -> bool {
method partial_cmp (line 74) | fn partial_cmp(&self, other: &u64) -> Option<std::cmp::Ordering> {
constant SUFFIX_AND_THRESHOLD (line 22) | const SUFFIX_AND_THRESHOLD: [(&str, u64); 5] = [
method sum (line 60) | fn sum<I: Iterator<Item = Self>>(iter: I) -> Self {
type Output (line 80) | type Output = Self;
method add (line 83) | fn add(self, other: Self) -> Self {
method add_assign (line 90) | fn add_assign(&mut self, other: Self) {
function test_bytes (line 100) | fn test_bytes() {
FILE: common/src/datetime.rs
type DateTimePrecision (line 16) | pub enum DateTimePrecision {
type DateTime (line 39) | pub struct DateTime {
constant MIN (line 46) | pub const MIN: DateTime = DateTime {
constant MAX (line 51) | pub const MAX: DateTime = DateTime {
method from_timestamp_secs (line 56) | pub const fn from_timestamp_secs(seconds: i64) -> Self {
method from_timestamp_millis (line 63) | pub const fn from_timestamp_millis(milliseconds: i64) -> Self {
method from_timestamp_micros (line 70) | pub const fn from_timestamp_micros(microseconds: i64) -> Self {
method from_timestamp_nanos (line 77) | pub const fn from_timestamp_nanos(nanoseconds: i64) -> Self {
method from_utc (line 87) | pub fn from_utc(dt: OffsetDateTime) -> Self {
method from_primitive (line 97) | pub fn from_primitive(dt: PrimitiveDateTime) -> Self {
method into_timestamp_secs (line 102) | pub const fn into_timestamp_secs(self) -> i64 {
method into_timestamp_millis (line 107) | pub const fn into_timestamp_millis(self) -> i64 {
method into_timestamp_micros (line 112) | pub const fn into_timestamp_micros(self) -> i64 {
method into_timestamp_nanos (line 117) | pub const fn into_timestamp_nanos(self) -> i64 {
method into_utc (line 122) | pub fn into_utc(self) -> OffsetDateTime {
method into_offset (line 130) | pub fn into_offset(self, offset: UtcOffset) -> OffsetDateTime {
method into_primitive (line 138) | pub fn into_primitive(self) -> PrimitiveDateTime {
method truncate (line 146) | pub fn truncate(self, precision: DateTimePrecision) -> Self {
method fmt (line 160) | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
method serialize (line 167) | fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> std::io::Resul...
method deserialize (line 172) | fn deserialize<R: Read>(reader: &mut R) -> std::io::Result<Self> {
FILE: common/src/file_slice.rs
type FileHandle (line 22) | pub trait FileHandle: 'static + Send + Sync + HasLen + fmt::Debug {
method read_bytes (line 26) | fn read_bytes(&self, range: Range<usize>) -> io::Result<OwnedBytes>;
method read_bytes_async (line 29) | async fn read_bytes_async(&self, _byte_range: Range<usize>) -> io::Res...
method read_bytes (line 53) | fn read_bytes(&self, range: Range<usize>) -> io::Result<OwnedBytes> {
method read_bytes (line 95) | fn read_bytes(&self, range: Range<usize>) -> io::Result<OwnedBytes> {
method read_bytes_async (line 100) | async fn read_bytes_async(&self, byte_range: Range<usize>) -> io::Resu...
method read_bytes (line 316) | fn read_bytes(&self, range: Range<usize>) -> io::Result<OwnedBytes> {
method read_bytes_async (line 320) | async fn read_bytes_async(&self, byte_range: Range<usize>) -> io::Resu...
method read_bytes (line 333) | fn read_bytes(&self, range: Range<usize>) -> io::Result<OwnedBytes> {
method read_bytes_async (line 337) | async fn read_bytes_async(&self, range: Range<usize>) -> io::Result<Ow...
type WrapFile (line 39) | pub struct WrapFile {
method new (line 45) | pub fn new(file: File) -> io::Result<Self> {
method len (line 88) | fn len(&self) -> usize {
type FileSlice (line 117) | pub struct FileSlice {
method from (line 108) | fn from(bytes: B) -> FileSlice {
method fmt (line 123) | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
method stream_file_chunks (line 129) | pub fn stream_file_chunks(&self) -> impl Iterator<Item = io::Result<Ow...
method open (line 182) | pub fn open(path: &Path) -> io::Result<FileSlice> {
method new (line 188) | pub fn new(file_handle: Arc<dyn FileHandle>) -> Self {
method new_with_num_bytes (line 196) | pub fn new_with_num_bytes(file_handle: Arc<dyn FileHandle>, num_bytes:...
method slice (line 210) | pub fn slice<R: RangeBounds<usize>>(&self, byte_range: R) -> FileSlice {
method empty (line 218) | pub fn empty() -> FileSlice {
method read_bytes (line 229) | pub fn read_bytes(&self) -> io::Result<OwnedBytes> {
method read_bytes_async (line 234) | pub async fn read_bytes_async(&self) -> io::Result<OwnedBytes> {
method read_bytes_slice (line 241) | pub fn read_bytes_slice(&self, range: Range<usize>) -> io::Result<Owne...
method read_bytes_slice_async (line 253) | pub async fn read_bytes_slice_async(&self, byte_range: Range<usize>) -...
method split (line 269) | pub fn split(self, left_len: usize) -> (FileSlice, FileSlice) {
method split_from_end (line 277) | pub fn split_from_end(self, right_len: usize) -> (FileSlice, FileSlice) {
method slice_from (line 287) | pub fn slice_from(&self, from_offset: usize) -> FileSlice {
method slice_from_end (line 295) | pub fn slice_from_end(&self, from_offset: usize) -> FileSlice {
method slice_to (line 304) | pub fn slice_to(&self, to_offset: usize) -> FileSlice {
method num_bytes (line 309) | pub fn num_bytes(&self) -> ByteCount {
function combine_ranges (line 162) | fn combine_ranges<R: RangeBounds<usize>>(orig_range: Range<usize>, rel_r...
method len (line 326) | fn len(&self) -> usize {
function test_file_slice (line 353) | fn test_file_slice() -> io::Result<()> {
function test_file_slice_trait_slice_len (line 390) | fn test_file_slice_trait_slice_len() {
function test_slice_simple_read (line 397) | fn test_slice_simple_read() -> io::Result<()> {
function test_slice_read_slice (line 406) | fn test_slice_read_slice() -> io::Result<()> {
function test_slice_read_slice_invalid_range_exceeds (line 414) | fn test_slice_read_slice_invalid_range_exceeds() {
function test_combine_range (line 423) | fn test_combine_range() {
function test_combine_range_panics (line 437) | fn test_combine_range_panics() {
FILE: common/src/group_by.rs
type GroupByIteratorExtended (line 5) | pub trait GroupByIteratorExtended: Iterator {
method group_by (line 26) | fn group_by<K, F>(self, key: F) -> GroupByIterator<Self, F, K>
type GroupByIterator (line 38) | pub struct GroupByIterator<I, F, K: Clone>
type GroupByShared (line 53) | struct GroupByShared<I, F, K: Clone>
function new (line 68) | fn new(inner: I, group_by_fn: F) -> Self {
type Item (line 87) | type Item = (K, GroupIterator<I, F, K>);
method next (line 89) | fn next(&mut self) -> Option<Self::Item> {
type GroupIterator (line 104) | pub struct GroupIterator<I, F, K: Clone>
type Item (line 119) | type Item = I::Item;
method next (line 121) | fn next(&mut self) -> Option<Self::Item> {
function group_by_collect (line 137) | fn group_by_collect<I: Iterator<Item = u32>>(iter: I) -> Vec<(I::Item, V...
function group_by_two_groups (line 144) | fn group_by_two_groups() {
function group_by_test_empty (line 151) | fn group_by_test_empty() {
function group_by_three_groups (line 158) | fn group_by_three_groups() {
FILE: common/src/json_path_writer.rs
constant JSON_PATH_SEGMENT_SEP (line 4) | pub const JSON_PATH_SEGMENT_SEP: u8 = 1u8;
constant JSON_PATH_SEGMENT_SEP_STR (line 5) | pub const JSON_PATH_SEGMENT_SEP_STR: &str =
constant JSON_END_OF_PATH (line 10) | pub const JSON_END_OF_PATH: u8 = 0u8;
constant JSON_END_OF_PATH_STR (line 11) | pub const JSON_END_OF_PATH_STR: &str =
type JsonPathWriter (line 16) | pub struct JsonPathWriter {
method with_expand_dots (line 23) | pub fn with_expand_dots(expand_dots: bool) -> Self {
method new (line 31) | pub fn new() -> Self {
method set_expand_dots (line 47) | pub fn set_expand_dots(&mut self, expand_dots: bool) {
method push (line 53) | pub fn push(&mut self, segment: &str) {
method set_end (line 74) | pub fn set_end(&mut self) {
method pop (line 80) | pub fn pop(&mut self) {
method clear (line 88) | pub fn clear(&mut self) {
method as_str (line 95) | pub fn as_str(&self) -> &str {
method from (line 102) | fn from(value: JsonPathWriter) -> Self {
function json_path_writer_test (line 112) | fn json_path_writer_test() {
function test_json_path_expand_dots_enabled_pop_segment (line 135) | fn test_json_path_expand_dots_enabled_pop_segment() {
FILE: common/src/lib.rs
type HasLen (line 30) | pub trait HasLen {
method len (line 32) | fn len(&self) -> usize;
method is_empty (line 35) | fn is_empty(&self) -> bool {
method len (line 41) | fn len(&self) -> usize {
constant HIGHEST_BIT (line 46) | const HIGHEST_BIT: u64 = 1 << 63;
function i64_to_u64 (line 69) | pub fn i64_to_u64(val: i64) -> u64 {
function u64_to_i64 (line 75) | pub fn u64_to_i64(val: u64) -> i64 {
function f64_to_u64 (line 97) | pub fn f64_to_u64(val: f64) -> u64 {
function u64_to_f64 (line 108) | pub fn u64_to_f64(val: u64) -> f64 {
function replace_in_place (line 121) | pub fn replace_in_place(needle: u8, replacement: u8, bytes: &mut [u8]) {
function test_i64_converter_helper (line 139) | fn test_i64_converter_helper(val: i64) {
function test_f64_converter_helper (line 143) | fn test_f64_converter_helper(val: f64) {
function test_i64_converter (line 157) | fn test_i64_converter() {
function test_f64_converter (line 169) | fn test_f64_converter() {
function test_f64_order (line 179) | fn test_f64_order() {
function test_replace_in_place (line 194) | fn test_replace_in_place() {
FILE: common/src/serialize.rs
type Counter (line 10) | struct Counter(u64);
method write (line 13) | fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
method write_all (line 18) | fn write_all(&mut self, buf: &[u8]) -> io::Result<()> {
method flush (line 23) | fn flush(&mut self) -> io::Result<()> {
type BinarySerializable (line 29) | pub trait BinarySerializable: fmt::Debug + Sized {
method serialize (line 31) | fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()>;
method deserialize (line 33) | fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self>;
method num_bytes (line 35) | fn num_bytes(&self) -> u64 {
method serialize (line 63) | fn serialize<W: Write + ?Sized>(&self, _: &mut W) -> io::Result<()> {
method deserialize (line 66) | fn deserialize<R: Read>(_: &mut R) -> io::Result<Self> {
method serialize (line 76) | fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<(...
method deserialize (line 83) | fn deserialize<R: Read>(reader: &mut R) -> io::Result<Vec<T>> {
method serialize (line 95) | fn serialize<W: Write + ?Sized>(&self, write: &mut W) -> io::Result<()> {
method deserialize (line 99) | fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
method serialize (line 110) | fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<(...
method deserialize (line 114) | fn deserialize<R: Read>(reader: &mut R) -> io::Result<u32> {
method serialize (line 124) | fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<(...
method deserialize (line 128) | fn deserialize<R: Read>(reader: &mut R) -> io::Result<u16> {
method serialize (line 138) | fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<(...
method deserialize (line 141) | fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
method serialize (line 151) | fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<(...
method deserialize (line 154) | fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
method serialize (line 164) | fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<(...
method deserialize (line 167) | fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
method serialize (line 177) | fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<(...
method deserialize (line 180) | fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
method serialize (line 190) | fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<(...
method deserialize (line 193) | fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
method serialize (line 203) | fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<(...
method deserialize (line 206) | fn deserialize<R: Read>(reader: &mut R) -> io::Result<u8> {
method serialize (line 216) | fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<(...
method deserialize (line 219) | fn deserialize<R: Read>(reader: &mut R) -> io::Result<bool> {
method serialize (line 237) | fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<(...
method deserialize (line 243) | fn deserialize<R: Read>(reader: &mut R) -> io::Result<String> {
method serialize (line 254) | fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<(...
method deserialize (line 260) | fn deserialize<R: Read>(reader: &mut R) -> io::Result<Cow<'a, str>> {
method serialize (line 271) | fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<(...
method deserialize (line 279) | fn deserialize<R: Read>(reader: &mut R) -> io::Result<Cow<'a, [u8]>> {
type DeserializeFrom (line 42) | pub trait DeserializeFrom<T: BinarySerializable> {
method deserialize (line 43) | fn deserialize(&mut self) -> io::Result<T>;
function deserialize (line 51) | fn deserialize(&mut self) -> io::Result<T> {
type FixedSize (line 58) | pub trait FixedSize: BinarySerializable {
constant SIZE_IN_BYTES (line 59) | const SIZE_IN_BYTES: usize;
constant SIZE_IN_BYTES (line 72) | const SIZE_IN_BYTES: usize = 0;
constant SIZE_IN_BYTES (line 106) | const SIZE_IN_BYTES: usize = Left::SIZE_IN_BYTES + Right::SIZE_IN_BYTES;
constant SIZE_IN_BYTES (line 120) | const SIZE_IN_BYTES: usize = 4;
constant SIZE_IN_BYTES (line 134) | const SIZE_IN_BYTES: usize = 2;
constant SIZE_IN_BYTES (line 147) | const SIZE_IN_BYTES: usize = 8;
constant SIZE_IN_BYTES (line 160) | const SIZE_IN_BYTES: usize = 16;
constant SIZE_IN_BYTES (line 173) | const SIZE_IN_BYTES: usize = 4;
constant SIZE_IN_BYTES (line 186) | const SIZE_IN_BYTES: usize = 8;
constant SIZE_IN_BYTES (line 199) | const SIZE_IN_BYTES: usize = 8;
constant SIZE_IN_BYTES (line 212) | const SIZE_IN_BYTES: usize = 1;
constant SIZE_IN_BYTES (line 233) | const SIZE_IN_BYTES: usize = 1;
function fixed_size_test (line 294) | pub fn fixed_size_test<O: BinarySerializable + FixedSize + Default>() {
function serialize_test (line 300) | fn serialize_test<T: BinarySerializable + Eq>(v: T) -> usize {
function test_serialize_u8 (line 311) | fn test_serialize_u8() {
function test_serialize_u32 (line 316) | fn test_serialize_u32() {
function test_serialize_i64 (line 324) | fn test_serialize_i64() {
function test_serialize_f64 (line 329) | fn test_serialize_f64() {
function test_serialize_u64 (line 334) | fn test_serialize_u64() {
function test_serialize_bool (line 339) | fn test_serialize_bool() {
function test_serialize_string (line 344) | fn test_serialize_string() {
function test_serialize_vec (line 351) | fn test_serialize_vec() {
function test_serialize_vint (line 357) | fn test_serialize_vint() {
FILE: common/src/vint.rs
function serialize_vint_u128 (line 7) | pub fn serialize_vint_u128(mut val: u128, output: &mut Vec<u8>) {
type VIntU128 (line 22) | pub struct VIntU128(pub u128);
method serialize (line 25) | fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
method deserialize (line 32) | fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
type VInt (line 59) | pub struct VInt(pub u64);
method val (line 163) | pub fn val(&self) -> u64 {
method deserialize_u64 (line 167) | pub fn deserialize_u64<R: Read>(reader: &mut R) -> io::Result<u64> {
method serialize_into_vec (line 171) | pub fn serialize_into_vec(&self, output: &mut Vec<u8>) {
method serialize_into (line 177) | pub fn serialize_into(&self, buffer: &mut [u8; 10]) -> usize {
constant STOP_BIT (line 61) | const STOP_BIT: u8 = 128;
function serialize_vint_u32 (line 64) | pub fn serialize_vint_u32(val: u32, buf: &mut [u8; 8]) -> &[u8] {
function vint_len (line 123) | fn vint_len(data: &[u8]) -> usize {
function read_u32_vint (line 139) | pub fn read_u32_vint(data: &mut &[u8]) -> u32 {
function read_u32_vint_no_advance (line 145) | pub fn read_u32_vint_no_advance(data: &[u8]) -> (u32, usize) {
function write_u32_vint (line 156) | pub fn write_u32_vint<W: io::Write + ?Sized>(val: u32, writer: &mut W) -...
method serialize (line 194) | fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
method deserialize (line 201) | fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
function aux_test_vint (line 231) | fn aux_test_vint(val: u64) {
function test_vint (line 249) | fn test_vint() {
function aux_test_serialize_vint_u32 (line 263) | fn aux_test_serialize_vint_u32(val: u32) {
function test_vint_u32 (line 272) | fn test_vint_u32() {
FILE: common/src/writer.rs
type CountingWriter (line 3) | pub struct CountingWriter<W> {
function wrap (line 9) | pub fn wrap(underlying: W) -> CountingWriter<W> {
function written_bytes (line 17) | pub fn written_bytes(&self) -> u64 {
function finish (line 24) | pub fn finish(self) -> W {
method write (line 31) | fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
method write_all (line 38) | fn write_all(&mut self, buf: &[u8]) -> io::Result<()> {
method flush (line 45) | fn flush(&mut self) -> io::Result<()> {
type AntiCallToken (line 62) | pub struct AntiCallToken(());
type TerminatingWrite (line 67) | pub trait TerminatingWrite: Write {
method terminate_ref (line 52) | fn terminate_ref(&mut self, token: AntiCallToken) -> io::Result<()> {
method terminate (line 69) | fn terminate(mut self) -> io::Result<()>
method terminate_ref (line 76) | fn terminate_ref(&mut self, _: AntiCallToken) -> io::Result<()>;
method terminate_ref (line 80) | fn terminate_ref(&mut self, token: AntiCallToken) -> io::Result<()> {
method terminate_ref (line 86) | fn terminate_ref(&mut self, a: AntiCallToken) -> io::Result<()> {
method terminate_ref (line 93) | fn terminate_ref(&mut self, _a: AntiCallToken) -> io::Result<()> {
function test_counting_writer (line 106) | fn test_counting_writer() {
FILE: examples/aggregation.rs
function main (line 17) | fn main() -> tantivy::Result<()> {
FILE: examples/basic_search.rs
function main (line 21) | fn main() -> tantivy::Result<()> {
FILE: examples/custom_collector.rs
type Stats (line 20) | struct Stats {
method count (line 27) | pub fn count(&self) -> usize {
method mean (line 31) | pub fn mean(&self) -> f64 {
method square_mean (line 35) | fn square_mean(&self) -> f64 {
method standard_deviation (line 39) | pub fn standard_deviation(&self) -> f64 {
method non_zero_count (line 44) | fn non_zero_count(self) -> Option<Stats> {
type StatsCollector (line 53) | struct StatsCollector {
method with_field (line 58) | fn with_field(field: String) -> StatsCollector {
type Fruit (line 66) | type Fruit = Option<Stats>;
type Child (line 68) | type Child = StatsSegmentCollector;
method for_segment (line 70) | fn for_segment(
method requires_scoring (line 82) | fn requires_scoring(&self) -> bool {
method merge_fruits (line 87) | fn merge_fruits(&self, segment_stats: Vec<Option<Stats>>) -> tantivy::Re...
type StatsSegmentCollector (line 98) | struct StatsSegmentCollector {
type Fruit (line 104) | type Fruit = Option<Stats>;
method collect (line 106) | fn collect(&mut self, doc: u32, _score: Score) {
method harvest (line 117) | fn harvest(self) -> <Self as SegmentCollector>::Fruit {
function main (line 122) | fn main() -> tantivy::Result<()> {
FILE: examples/custom_tokenizer.rs
function main (line 11) | fn main() -> tantivy::Result<()> {
FILE: examples/date_time_field.rs
function main (line 10) | fn main() -> tantivy::Result<()> {
FILE: examples/deleting_updating_documents.rs
function extract_doc_given_isbn (line 19) | fn extract_doc_given_isbn(
function main (line 42) | fn main() -> tantivy::Result<()> {
FILE: examples/faceted_search.rs
function main (line 22) | fn main() -> tantivy::Result<()> {
FILE: examples/faceted_search_with_tweaked_score.rs
function main (line 17) | fn main() -> tantivy::Result<()> {
FILE: examples/filter_aggregation.rs
function main (line 17) | fn main() -> tantivy::Result<()> {
FILE: examples/fuzzy_search.rs
function main (line 20) | fn main() -> tantivy::Result<()> {
FILE: examples/index_from_multiple_threads.rs
function main (line 35) | fn main() -> tantivy::Result<()> {
FILE: examples/index_with_json.rs
function main (line 6) | fn main() -> tantivy::Result<()> {
FILE: examples/integer_range_search.rs
function main (line 12) | fn main() -> Result<()> {
FILE: examples/ip_field.rs
function main (line 11) | fn main() -> tantivy::Result<()> {
FILE: examples/iterating_docs_and_positions.rs
function main (line 16) | fn main() -> tantivy::Result<()> {
FILE: examples/json_field.rs
function main (line 12) | fn main() -> tantivy::Result<()> {
FILE: examples/phrase_prefix_search.rs
function main (line 7) | fn main() -> Result<()> {
FILE: examples/pre_tokenized_text.rs
function pre_tokenize_text (line 19) | fn pre_tokenize_text(text: &str) -> Vec<Token> {
function main (line 29) | fn main() -> tantivy::Result<()> {
FILE: examples/snippet.rs
function main (line 17) | fn main() -> tantivy::Result<()> {
function highlight (line 69) | fn highlight(snippet: Snippet) -> String {
FILE: examples/stop_words.rs
function main (line 20) | fn main() -> tantivy::Result<()> {
FILE: examples/warmer.rs
type ProductId (line 21) | type ProductId = u64;
type Price (line 23) | type Price = u32;
type PriceFetcher (line 25) | pub trait PriceFetcher: Send + Sync + 'static {
method fetch_prices (line 26) | fn fetch_prices(&self, product_ids: &[ProductId]) -> Vec<Price>;
method fetch_prices (line 115) | fn fetch_prices(&self, product_ids: &[ProductId]) -> Vec<Price> {
type SegmentKey (line 29) | type SegmentKey = (SegmentId, Option<Opstamp>);
type DynamicPriceColumn (line 31) | struct DynamicPriceColumn {
method with_product_id_field (line 38) | pub fn with_product_id_field<T: PriceFetcher>(field: String, price_fet...
method price_for_segment (line 46) | pub fn price_for_segment(&self, segment_reader: &SegmentReader) -> Opt...
method warm (line 52) | fn warm(&self, searcher: &Searcher) -> tantivy::Result<()> {
method garbage_collect (line 85) | fn garbage_collect(&self, live_generations: &[&SearcherGeneration]) {
type ExternalPriceTable (line 104) | pub struct ExternalPriceTable {
method update_price (line 109) | pub fn update_price(&self, product_id: ProductId, price: Price) {
function main (line 125) | fn main() -> tantivy::Result<()> {
FILE: ownedbytes/src/lib.rs
type OwnedBytes (line 12) | pub struct OwnedBytes {
method empty (line 19) | pub fn empty() -> OwnedBytes {
method new (line 24) | pub fn new<T: StableDeref + Deref<Target = [u8]> + 'static + Send + Sy...
method slice (line 39) | pub fn slice(&self, range: Range<usize>) -> Self {
method as_slice (line 49) | pub fn as_slice(&self) -> &[u8] {
method len (line 55) | pub fn len(&self) -> usize {
method is_empty (line 61) | pub fn is_empty(&self) -> bool {
method split (line 75) | pub fn split(self, split_len: usize) -> (OwnedBytes, OwnedBytes) {
method rsplit (line 99) | pub fn rsplit(self, split_len: usize) -> (OwnedBytes, OwnedBytes) {
method split_off (line 107) | pub fn split_off(&mut self, split_len: usize) -> OwnedBytes {
method advance (line 120) | pub fn advance(&mut self, advance_len: usize) -> &[u8] {
method read_u8 (line 128) | pub fn read_u8(&mut self) -> u8 {
method read_n (line 133) | fn read_n<const N: usize>(&mut self) -> [u8; N] {
method read_u32 (line 139) | pub fn read_u32(&mut self) -> u32 {
method read_u64 (line 145) | pub fn read_u64(&mut self) -> u64 {
method fmt (line 151) | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
method eq (line 172) | fn eq(&self, other: &[u8]) -> bool {
method eq (line 178) | fn eq(&self, other: &str) -> bool {
method eq (line 186) | fn eq(&self, other: &&'a T) -> bool {
method as_ref (line 202) | fn as_ref(&self) -> &[u8] {
method read (line 209) | fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
method read_to_end (line 223) | fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
method read_exact (line 230) | fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> {
method eq (line 164) | fn eq(&self, other: &OwnedBytes) -> bool {
type Target (line 192) | type Target = [u8];
method deref (line 195) | fn deref(&self) -> &Self::Target {
function test_owned_bytes_debug (line 249) | fn test_owned_bytes_debug() {
function test_owned_bytes_read (line 268) | fn test_owned_bytes_read() -> io::Result<()> {
function test_owned_bytes_read_right_at_the_end (line 286) | fn test_owned_bytes_read_right_at_the_end() -> io::Result<()> {
function test_owned_bytes_read_incomplete (line 297) | fn test_owned_bytes_read_incomplete() -> io::Result<()> {
function test_owned_bytes_read_to_end (line 307) | fn test_owned_bytes_read_to_end() -> io::Result<()> {
function test_owned_bytes_read_u8 (line 316) | fn test_owned_bytes_read_u8() -> io::Result<()> {
function test_owned_bytes_read_u64 (line 324) | fn test_owned_bytes_read_u64() -> io::Result<()> {
function test_owned_bytes_split (line 332) | fn test_owned_bytes_split() {
function test_owned_bytes_split_boundary (line 340) | fn test_owned_bytes_split_boundary() {
function test_split_off (line 355) | fn test_split_off() {
FILE: query-grammar/src/infallible.rs
type ErrorList (line 8) | pub(crate) type ErrorList = Vec<LenientErrorInternal>;
type JResult (line 9) | pub(crate) type JResult<I, O> = IResult<I, (O, ErrorList), Infallible>;
type LenientErrorInternal (line 13) | pub(crate) struct LenientErrorInternal {
type LenientError (line 21) | pub struct LenientError {
method from_internal (line 27) | pub(crate) fn from_internal(internal: LenientErrorInternal, str_len: u...
function unwrap_infallible (line 35) | fn unwrap_infallible<T>(res: Result<T, nom::Err<Infallible>>) -> T {
function opt_i (line 48) | pub(crate) fn opt_i<I: Clone, O, F>(mut f: F) -> impl FnMut(I) -> JResul...
function opt_i_err (line 59) | pub(crate) fn opt_i_err<'a, I: Clone + InputLength, O, F>(
function space0_infallible (line 81) | pub(crate) fn space0_infallible<T>(input: T) -> JResult<T, T>
function space1_infallible (line 90) | pub(crate) fn space1_infallible<T>(input: T) -> JResult<T, Option<T>>
function fallible (line 106) | pub(crate) fn fallible<I, O, E: nom::error::ParseError<I>, F>(
function terminated_infallible (line 120) | pub(crate) fn terminated_infallible<I, O1, O2, F, G>(
function delimited_infallible (line 136) | pub(crate) fn delimited_infallible<I, O1, O2, O3, F, G, H>(
function nothing (line 157) | pub(crate) fn nothing(i: &str) -> JResult<&str, ()> {
type TupleInfallible (line 161) | pub(crate) trait TupleInfallible<I, O> {
method parse (line 163) | fn parse(&mut self, input: I) -> JResult<I, O>;
function parse (line 169) | fn parse(&mut self, input: Input) -> JResult<Input, (Output,)> {
function parse (line 256) | fn parse(&mut self, input: I) -> JResult<I, ()> {
function tuple_infallible (line 261) | pub(crate) fn tuple_infallible<I, O, List: TupleInfallible<I, O>>(
function separated_list_infallible (line 267) | pub(crate) fn separated_list_infallible<I, O, O2, F, G>(
type Alt (line 304) | pub(crate) trait Alt<I, O> {
method choice (line 306) | fn choice(&mut self, input: I) -> Option<JResult<I, O>>;
function alt_infallible (line 365) | pub(crate) fn alt_infallible<I: Clone, O, F, List: Alt<I, O>>(
function test_lenient_error_serialization (line 380) | fn test_lenient_error_serialization() {
FILE: query-grammar/src/lib.rs
type Error (line 19) | pub struct Error;
function parse_query (line 22) | pub fn parse_query(query: &str) -> Result<UserInputAst, Error> {
function parse_query_lenient (line 28) | pub fn parse_query_lenient(query: &str) -> (UserInputAst, Vec<LenientErr...
function test_deduplication (line 37) | fn test_deduplication() {
function test_parse_query_serialization (line 47) | fn test_parse_query_serialization() {
function test_parse_query_wrong_query (line 57) | fn test_parse_query_wrong_query() {
function test_parse_query_lenient_wrong_query (line 62) | fn test_parse_query_lenient_wrong_query() {
FILE: query-grammar/src/occur.rs
type Occur (line 10) | pub enum Occur {
method to_char (line 27) | fn to_char(self) -> char {
method compose (line 36) | pub fn compose(left: Occur, right: Occur) -> Occur {
method fmt (line 48) | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
function test_occur_compose (line 58) | fn test_occur_compose() {
FILE: query-grammar/src/query_grammar.rs
constant SPECIAL_CHARS (line 23) | const SPECIAL_CHARS: &[char] = &[
function field_name (line 29) | fn field_name(inp: &str) -> IResult<&str, String> {
constant ESCAPE_IN_WORD (line 46) | const ESCAPE_IN_WORD: &[char] = &['^', '`', ':', '{', '}', '"', '\'', '[...
function interpret_escape (line 48) | fn interpret_escape(source: &str) -> String {
function word (line 72) | fn word(inp: &str) -> IResult<&str, Cow<'_, str>> {
function word_infallible (line 92) | fn word_infallible(
function relaxed_word (line 138) | fn relaxed_word(inp: &str) -> IResult<&str, &str> {
function negative_number (line 147) | fn negative_number(inp: &str) -> IResult<&str, &str> {
function simple_term (line 154) | fn simple_term(inp: &str) -> IResult<&str, (Delimiter, String)> {
function simple_term_infallible (line 187) | fn simple_term_infallible(
function term_or_phrase (line 227) | fn term_or_phrase(inp: &str) -> IResult<&str, UserInputLeaf> {
function term_or_phrase_infallible (line 243) | fn term_or_phrase_infallible(inp: &str) -> JResult<&str, Option<UserInpu...
function term_group (line 278) | fn term_group(inp: &str) -> IResult<&str, UserInputAst> {
function term_group_precond (line 293) | fn term_group_precond(inp: &str) -> IResult<&str, (), ()> {
function term_group_infallible (line 305) | fn term_group_infallible(inp: &str) -> JResult<&str, UserInputAst> {
function exists (line 319) | fn exists(inp: &str) -> IResult<&str, UserInputLeaf> {
function exists_precond (line 338) | fn exists_precond(inp: &str) -> IResult<&str, (), ()> {
function exists_infallible (line 357) | fn exists_infallible(inp: &str) -> JResult<&str, UserInputAst> {
function literal (line 365) | fn literal(inp: &str) -> IResult<&str, UserInputAst> {
function literal_no_group_infallible (line 380) | fn literal_no_group_infallible(inp: &str) -> JResult<&str, Option<UserIn...
function literal_infallible (line 423) | fn literal_infallible(inp: &str) -> JResult<&str, Option<UserInputAst>> {
function slop_or_prefix_val (line 439) | fn slop_or_prefix_val(inp: &str) -> JResult<&str, (u32, bool)> {
function range (line 453) | fn range(inp: &str) -> IResult<&str, UserInputLeaf> {
function range_infallible (line 519) | fn range_infallible(inp: &str) -> JResult<&str, UserInputLeaf> {
function set (line 633) | fn set(inp: &str) -> IResult<&str, UserInputLeaf> {
function set_infallible (line 650) | fn set_infallible(mut inp: &str) -> JResult<&str, UserInputLeaf> {
function regex (line 699) | fn regex(inp: &str) -> IResult<&str, UserInputLeaf> {
function regex_infallible (line 720) | fn regex_infallible(inp: &str) -> JResult<&str, UserInputLeaf> {
function negate (line 762) | fn negate(expr: UserInputAst) -> UserInputAst {
function leaf (line 766) | fn leaf(inp: &str) -> IResult<&str, UserInputAst> {
function leaf_infallible (line 785) | fn leaf_infallible(inp: &str) -> JResult<&str, Option<UserInputAst>> {
function positive_float_number (line 828) | fn positive_float_number(inp: &str) -> IResult<&str, f64> {
function boost (line 837) | fn boost(inp: &str) -> JResult<&str, Option<f64>> {
function boosted_leaf (line 841) | fn boosted_leaf(inp: &str) -> IResult<&str, UserInputAst> {
function boosted_leaf_infallible (line 853) | fn boosted_leaf_infallible(inp: &str) -> JResult<&str, Option<UserInputA...
function occur_symbol (line 866) | fn occur_symbol(inp: &str) -> JResult<&str, Option<Occur>> {
function occur_leaf (line 873) | fn occur_leaf(inp: &str) -> IResult<&str, (Option<Occur>, UserInputAst)> {
function operand_occur_leaf_infallible (line 878) | fn operand_occur_leaf_infallible(
type BinaryOperand (line 890) | enum BinaryOperand {
function binary_operand (line 895) | fn binary_operand(inp: &str) -> IResult<&str, BinaryOperand> {
function aggregate_binary_expressions (line 902) | fn aggregate_binary_expressions(
function aggregate_infallible_expressions (line 923) | fn aggregate_infallible_expressions(
function operand_leaf (line 1037) | fn operand_leaf(inp: &str) -> IResult<&str, (Option<BinaryOperand>, Opti...
function ast (line 1047) | fn ast(inp: &str) -> IResult<&str, UserInputAst> {
function ast_infallible (line 1062) | fn ast_infallible(inp: &str) -> JResult<&str, UserInputAst> {
function parse_to_ast (line 1082) | pub fn parse_to_ast(inp: &str) -> IResult<&str, UserInputAst> {
function parse_to_ast_lenient (line 1088) | pub fn parse_to_ast_lenient(query_str: &str) -> (UserInputAst, Vec<Lenie...
function rewrite_ast (line 1109) | fn rewrite_ast(mut input: UserInputAst) -> UserInputAst {
function rewrite_ast_clause (line 1134) | fn rewrite_ast_clause(input: &mut (Option<Occur>, UserInputAst)) {
function nearly_equals (line 1147) | pub fn nearly_equals(a: f64, b: f64) -> bool {
function assert_nearly_equals (line 1151) | fn assert_nearly_equals(expected: f64, val: f64) {
function test_positive_float_number (line 1167) | fn test_positive_float_number() {
function test_date_time (line 1187) | fn test_date_time() {
function test_parse_query_to_ast_helper (line 1202) | fn test_parse_query_to_ast_helper(query: &str, expected: &str) {
function test_is_parse_err (line 1217) | fn test_is_parse_err(query: &str, lenient_expected: &str) {
function test_parse_empty_to_ast (line 1230) | fn test_parse_empty_to_ast() {
function test_parse_query_to_ast_hyphen (line 1235) | fn test_parse_query_to_ast_hyphen() {
function test_parse_query_lenient_unfinished_quote (line 1246) | fn test_parse_query_lenient_unfinished_quote() {
function test_parse_query_to_ast_not_op (line 1254) | fn test_parse_query_to_ast_not_op() {
function test_boosting (line 1261) | fn test_boosting() {
function test_parse_query_to_ast_binary_op (line 1270) | fn test_parse_query_to_ast_binary_op() {
function test_parse_mixed_bool_occur (line 1285) | fn test_parse_mixed_bool_occur() {
function test_parse_elastic_query_ranges (line 1312) | fn test_parse_elastic_query_ranges() {
function test_occur_leaf (line 1345) | fn test_occur_leaf() {
function test_field_name (line 1352) | fn test_field_name() {
function test_range_parser (line 1411) | fn test_range_parser() {
function test_range_parser_lenient (line 1493) | fn test_range_parser_lenient() {
function test_parse_query_to_trimming_spaces (line 1602) | fn test_parse_query_to_trimming_spaces() {
function test_parse_query_term_group (line 1615) | fn test_parse_query_term_group() {
function field_re_specification (line 1633) | fn field_re_specification() {
function test_parse_query_single_term (line 1638) | fn test_parse_query_single_term() {
function test_parse_query_default_clause (line 1643) | fn test_parse_query_default_clause() {
function test_parse_query_must_default_clause (line 1648) | fn test_parse_query_must_default_clause() {
function test_parse_query_must_single_term (line 1653) | fn test_parse_query_must_single_term() {
function test_single_term_with_field (line 1658) | fn test_single_term_with_field() {
function test_phrase_with_field (line 1663) | fn test_phrase_with_field() {
function test_single_term_with_float (line 1669) | fn test_single_term_with_float() {
function test_must_clause (line 1676) | fn test_must_clause() {
function test_parse_test_query_plus_a_b_plus_d (line 1681) | fn test_parse_test_query_plus_a_b_plus_d() {
function test_parse_test_query_set (line 1686) | fn test_parse_test_query_set() {
function test_parse_test_query_other (line 1699) | fn test_parse_test_query_other() {
function test_parse_query_all (line 1725) | fn test_parse_query_all() {
function test_parse_query_with_range (line 1732) | fn test_parse_query_with_range() {
function test_slop (line 1746) | fn test_slop() {
function test_phrase_prefix (line 1767) | fn test_phrase_prefix() {
function test_exist_query (line 1777) | fn test_exist_query() {
function test_not_queries_are_consistent (line 1794) | fn test_not_queries_are_consistent() {
function test_escaping (line 1800) | fn test_escaping() {
function test_queries_with_colons (line 1814) | fn test_queries_with_colons() {
function test_invalid_field (line 1823) | fn test_invalid_field() {
function test_regex_parser (line 1828) | fn test_regex_parser() {
function test_regex_parser_lenient (line 1858) | fn test_regex_parser_lenient() {
function test_space_before_value (line 1885) | fn test_space_before_value() {
FILE: query-grammar/src/user_input_ast.rs
type UserInputLeaf (line 11) | pub enum UserInputLeaf {
method set_field (line 33) | pub(crate) fn set_field(self, field: Option<String>) -> Self {
method set_default_field (line 57) | pub(crate) fn set_default_field(&mut self, default_field: String) {
method from (line 287) | fn from(literal: UserInputLiteral) -> UserInputLeaf {
method fmt (line 76) | fn fmt(&self, formatter: &mut Formatter) -> Result<(), fmt::Error> {
type Delimiter (line 126) | pub enum Delimiter {
type UserInputLiteral (line 134) | pub struct UserInputLiteral {
method fmt (line 143) | fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
type UserInputBound (line 174) | pub enum UserInputBound {
method display_lower (line 181) | fn display_lower(&self, formatter: &mut fmt::Formatter) -> Result<(), ...
method display_upper (line 190) | fn display_upper(&self, formatter: &mut fmt::Formatter) -> Result<(), ...
method term_str (line 199) | pub fn term_str(&self) -> &str {
type UserInputAst (line 210) | pub enum UserInputAst {
method unary (line 245) | pub fn unary(self, occur: Occur) -> UserInputAst {
method compose (line 249) | fn compose(occur: Occur, asts: Vec<UserInputAst>) -> UserInputAst {
method empty_query (line 263) | pub fn empty_query() -> UserInputAst {
method and (line 267) | pub fn and(asts: Vec<UserInputAst>) -> UserInputAst {
method or (line 271) | pub fn or(asts: Vec<UserInputAst>) -> UserInputAst {
method set_default_field (line 275) | pub(crate) fn set_default_field(&mut self, field: String) {
method from (line 293) | fn from(leaf: UserInputLeaf) -> UserInputAst {
method fmt (line 312) | fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
type UserInputAstSerde (line 218) | enum UserInputAstSerde {
method from (line 231) | fn from(ast: UserInputAst) -> Self {
function print_occur_ast (line 298) | fn print_occur_ast(
function test_all_leaf_serialization (line 340) | fn test_all_leaf_serialization() {
function test_literal_leaf_serialization (line 347) | fn test_literal_leaf_serialization() {
function test_range_leaf_serialization (line 364) | fn test_range_leaf_serialization() {
function test_range_leaf_unbounded_serialization (line 379) | fn test_range_leaf_unbounded_serialization() {
function test_boost_serialization (line 394) | fn test_boost_serialization() {
function test_boost_serialization2 (line 405) | fn test_boost_serialization2() {
function test_clause_serialization (line 433) | fn test_clause_serialization() {
FILE: src/aggregation/accessor_helpers.rs
function get_missing_val_as_u64_lenient (line 17) | pub(crate) fn get_missing_val_as_u64_lenient(
function get_numeric_or_date_column_types (line 49) | pub(crate) fn get_numeric_or_date_column_types() -> &'static [ColumnType] {
function get_ff_reader (line 59) | pub(crate) fn get_ff_reader(
function get_dynamic_columns (line 76) | pub(crate) fn get_dynamic_columns(
function get_all_ff_reader_or_empty (line 92) | pub(crate) fn get_all_ff_reader_or_empty(
FILE: src/aggregation/agg_data.rs
type AggregationsSegmentCtx (line 35) | pub struct AggregationsSegmentCtx {
method push_term_req_data (line 43) | pub(crate) fn push_term_req_data(&mut self, data: TermsAggReqData) -> ...
method push_cardinality_req_data (line 47) | pub(crate) fn push_cardinality_req_data(&mut self, data: CardinalityAg...
method push_metric_req_data (line 51) | pub(crate) fn push_metric_req_data(&mut self, data: MetricAggReqData) ...
method push_top_hits_req_data (line 55) | pub(crate) fn push_top_hits_req_data(&mut self, data: TopHitsAggReqDat...
method push_missing_term_req_data (line 59) | pub(crate) fn push_missing_term_req_data(&mut self, data: MissingTermA...
method push_histogram_req_data (line 63) | pub(crate) fn push_histogram_req_data(&mut self, data: HistogramAggReq...
method push_range_req_data (line 69) | pub(crate) fn push_range_req_data(&mut self, data: RangeAggReqData) ->...
method push_filter_req_data (line 73) | pub(crate) fn push_filter_req_data(&mut self, data: FilterAggReqData) ...
method push_composite_req_data (line 77) | pub(crate) fn push_composite_req_data(&mut self, data: CompositeAggReq...
method get_term_req_data (line 85) | pub(crate) fn get_term_req_data(&self, idx: usize) -> &TermsAggReqData {
method get_cardinality_req_data (line 91) | pub(crate) fn get_cardinality_req_data(&self, idx: usize) -> &Cardinal...
method get_metric_req_data (line 95) | pub(crate) fn get_metric_req_data(&self, idx: usize) -> &MetricAggReqD...
method get_top_hits_req_data (line 99) | pub(crate) fn get_top_hits_req_data(&self, idx: usize) -> &TopHitsAggR...
method get_missing_term_req_data (line 103) | pub(crate) fn get_missing_term_req_data(&self, idx: usize) -> &Missing...
method get_histogram_req_data (line 107) | pub(crate) fn get_histogram_req_data(&self, idx: usize) -> &HistogramA...
method get_range_req_data (line 113) | pub(crate) fn get_range_req_data(&self, idx: usize) -> &RangeAggReqData {
method get_composite_req_data (line 119) | pub(crate) fn get_composite_req_data(&self, idx: usize) -> &CompositeA...
method get_metric_req_data_mut (line 128) | pub(crate) fn get_metric_req_data_mut(&mut self, idx: usize) -> &mut M...
method get_cardinality_req_data_mut (line 133) | pub(crate) fn get_cardinality_req_data_mut(
method get_histogram_req_data_mut (line 141) | pub(crate) fn get_histogram_req_data_mut(&mut self, idx: usize) -> &mu...
method take_histogram_req_data (line 151) | pub(crate) fn take_histogram_req_data(&mut self, idx: usize) -> Box<Hi...
method put_back_histogram_req_data (line 159) | pub(crate) fn put_back_histogram_req_data(
method take_range_req_data (line 170) | pub(crate) fn take_range_req_data(&mut self, idx: usize) -> Box<RangeA...
method put_back_range_req_data (line 178) | pub(crate) fn put_back_range_req_data(&mut self, idx: usize, value: Bo...
method take_filter_req_data (line 185) | pub(crate) fn take_filter_req_data(&mut self, idx: usize) -> Box<Filte...
method put_back_filter_req_data (line 193) | pub(crate) fn put_back_filter_req_data(&mut self, idx: usize, value: B...
method take_composite_req_data (line 200) | pub(crate) fn take_composite_req_data(&mut self, idx: usize) -> Box<Co...
method put_back_composite_req_data (line 208) | pub(crate) fn put_back_composite_req_data(
type PerRequestAggSegCtx (line 225) | pub struct PerRequestAggSegCtx {
method get_memory_consumption (line 252) | fn get_memory_consumption(&self) -> usize {
method get_name (line 300) | pub fn get_name(&self, node: &AggRefNode) -> &str {
method get_view_tree (line 344) | pub fn get_view_tree(&self) -> Vec<AggTreeViewNode> {
function build_segment_agg_collectors_root (line 366) | pub(crate) fn build_segment_agg_collectors_root(
function build_segment_agg_collectors (line 372) | pub(crate) fn build_segment_agg_collectors(
function build_segment_agg_collectors_generic (line 379) | fn build_segment_agg_collectors_generic(
function build_segment_agg_collector (line 399) | pub(crate) fn build_segment_agg_collector(
type AggRefNode (line 474) | pub struct AggRefNode {
method get_sub_agg (line 480) | pub fn get_sub_agg(&self, name: &str, pr: &PerRequestAggSegCtx) -> Opt...
type AggKind (line 488) | pub enum AggKind {
method as_str (line 504) | fn as_str(&self) -> &'static str {
function build_aggregations_data_from_req (line 521) | pub(crate) fn build_aggregations_data_from_req(
function build_nodes (line 540) | fn build_nodes(
function build_composite_node (line 805) | fn build_composite_node(
function build_children (line 834) | fn build_children(
function get_term_agg_accessors (line 854) | fn get_term_agg_accessors(
type TermsOrCardinalityRequest (line 890) | enum TermsOrCardinalityRequest {
method as_terms (line 895) | fn as_terms(&self) -> Option<&TermsAggregation> {
function build_terms_or_cardinality_nodes (line 904) | fn build_terms_or_cardinality_nodes(
function build_allowed_term_ids_for_str (line 1028) | fn build_allowed_term_ids_for_str(
function for_each_matching_term_ord (line 1055) | fn for_each_matching_term_ord(
type AggTreeViewNode (line 1088) | pub struct AggTreeViewNode {
function agg_from_json (line 1101) | fn agg_from_json(val: serde_json::Value) -> crate::aggregation::agg_req:...
function test_tree_roots_and_expansion_terms_missing_on_numeric (line 1106) | fn test_tree_roots_and_expansion_terms_missing_on_numeric() -> crate::Re...
FILE: src/aggregation/agg_limits.rs
type MemoryConsumption (line 11) | pub trait MemoryConsumption {
method memory_consumption (line 12) | fn memory_consumption(&self) -> usize;
method memory_consumption (line 16) | fn memory_consumption(&self) -> usize {
type AggregationLimitsGuard (line 27) | pub struct AggregationLimitsGuard {
method new (line 82) | pub fn new(memory_limit: Option<u64>, bucket_limit: Option<u32>) -> Se...
method add_memory_consumed (line 91) | pub(crate) fn add_memory_consumed(&mut self, add_num_bytes: u64) -> cr...
method get_bucket_limit (line 100) | pub(crate) fn get_bucket_limit(&self) -> u32 {
method clone (line 40) | fn clone(&self) -> Self {
method drop (line 53) | fn drop(&mut self) {
method default (line 60) | fn default() -> Self {
function validate_memory_consumption (line 105) | fn validate_memory_consumption(
function test_agg_limits_with_empty_merge (line 126) | fn test_agg_limits_with_empty_merge() {
function test_agg_limits_with_empty_data (line 194) | fn test_agg_limits_with_empty_data() {
FILE: src/aggregation/agg_req.rs
type Aggregations (line 48) | pub type Aggregations = FxHashMap<String, Aggregation>;
type Aggregation (line 55) | pub struct Aggregation {
type Error (line 78) | type Error = serde_json::Error;
method try_from (line 80) | fn try_from(value: AggregationForDeserialization) -> serde_json::Resul...
method sub_aggregation (line 94) | pub(crate) fn sub_aggregation(&self) -> &Aggregations {
method get_fast_field_names (line 98) | fn get_fast_field_names(&self, fast_field_names: &mut HashSet<String>) {
type AggregationForDeserialization (line 69) | struct AggregationForDeserialization {
function get_fast_field_names (line 110) | pub fn get_fast_field_names(aggs: &Aggregations) -> HashSet<String> {
type AggregationVariants (line 120) | pub enum AggregationVariants {
method get_fast_field_names (line 179) | pub fn get_fast_field_names(&self) -> Vec<&str> {
method as_range (line 204) | pub(crate) fn as_range(&self) -> Option<&RangeAggregation> {
method as_histogram (line 210) | pub(crate) fn as_histogram(&self) -> crate::Result<Option<HistogramAgg...
method as_term (line 219) | pub(crate) fn as_term(&self) -> Option<&TermsAggregation> {
method as_composite (line 225) | pub(crate) fn as_composite(&self) -> Option<&CompositeAggregation> {
method as_percentile (line 231) | pub(crate) fn as_percentile(&self) -> Option<&PercentilesAggregationRe...
function deser_json_test (line 245) | fn deser_json_test() {
function deser_json_test_bucket (line 258) | fn deser_json_test_bucket() {
function test_metric_aggregations_deser (line 289) | fn test_metric_aggregations_deser() {
function serialize_to_json_test (line 321) | fn serialize_to_json_test() {
function test_get_fast_field_names (line 355) | fn test_get_fast_field_names() {
FILE: src/aggregation/agg_result.rs
type AggregationResults (line 22) | pub struct AggregationResults(pub FxHashMap<String, AggregationResult>);
method get_bucket_count (line 25) | pub(crate) fn get_bucket_count(&self) -> u64 {
method get_value_from_aggregation (line 32) | pub(crate) fn get_value_from_aggregation(
type AggregationResult (line 51) | pub enum AggregationResult {
method get_bucket_count (line 59) | pub(crate) fn get_bucket_count(&self) -> u64 {
method get_value_from_aggregation (line 66) | pub(crate) fn get_value_from_aggregation(
type MetricResult (line 85) | pub enum MetricResult {
method get_value (line 109) | fn get_value(&self, agg_property: &str) -> crate::Result<Option<f64>> {
type BucketResult (line 132) | pub enum BucketResult {
method get_bucket_count (line 174) | pub(crate) fn get_bucket_count(&self) -> u64 {
type BucketEntries (line 203) | pub enum BucketEntries<T> {
function iter (line 211) | fn iter<'a>(&'a self) -> Box<dyn Iterator<Item = &'a T> + 'a> {
type BucketEntry (line 246) | pub struct BucketEntry {
method get_bucket_count (line 259) | pub(crate) fn get_bucket_count(&self) -> u64 {
method doc_count (line 264) | fn doc_count(&self) -> u64 {
method doc_count (line 269) | fn doc_count(&self) -> u64 {
type RangeBucketEntry (line 305) | pub struct RangeBucketEntry {
method get_bucket_count (line 327) | pub(crate) fn get_bucket_count(&self) -> u64 {
type FilterBucketResult (line 346) | pub struct FilterBucketResult {
type CompositeKey (line 358) | pub enum CompositeKey {
method hash (line 374) | fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
method from (line 400) | fn from(value: CompositeIntermediateKey) -> Self {
method eq (line 387) | fn eq(&self, other: &Self) -> bool {
type CompositeBucketEntry (line 422) | pub struct CompositeBucketEntry {
method get_bucket_count (line 433) | pub(crate) fn get_bucket_count(&self) -> u64 {
FILE: src/aggregation/agg_tests.rs
function test_terms_as_subagg_parent_more_vs_child_more (line 22) | fn test_terms_as_subagg_parent_more_vs_child_more() -> crate::Result<()> {
function test_range_as_subagg_parent_more_vs_child_more (line 139) | fn test_range_as_subagg_parent_more_vs_child_more() -> crate::Result<()> {
function test_histogram_as_subagg_parent_more_vs_child_more (line 274) | fn test_histogram_as_subagg_parent_more_vs_child_more() -> crate::Result...
function test_date_histogram_as_subagg_parent_more_vs_child_more (line 373) | fn test_date_histogram_as_subagg_parent_more_vs_child_more() -> crate::R...
function get_avg_req (line 440) | fn get_avg_req(field_name: &str) -> Aggregation {
function get_collector (line 449) | fn get_collector(agg_req: Aggregations) -> AggregationCollector {
function test_aggregation_flushing (line 458) | fn test_aggregation_flushing(
function test_aggregation_flushing_variants (line 660) | fn test_aggregation_flushing_variants() {
function test_aggregation_level1_simple (line 668) | fn test_aggregation_level1_simple() -> crate::Result<()> {
function test_aggregation_level1 (line 736) | fn test_aggregation_level1() -> crate::Result<()> {
function test_aggregation_level2 (line 809) | fn test_aggregation_level2(
function test_aggregation_level2_multi_segments (line 960) | fn test_aggregation_level2_multi_segments() -> crate::Result<()> {
function test_aggregation_level2_single_segment (line 965) | fn test_aggregation_level2_single_segment() -> crate::Result<()> {
function test_aggregation_level2_multi_segments_distributed_collector (line 970) | fn test_aggregation_level2_multi_segments_distributed_collector() -> cra...
function test_aggregation_level2_single_segment_distributed_collector (line 975) | fn test_aggregation_level2_single_segment_distributed_collector() -> cra...
function test_aggregation_invalid_requests (line 980) | fn test_aggregation_invalid_requests() -> crate::Result<()> {
function test_aggregation_on_json_object (line 1052) | fn test_aggregation_on_json_object() {
function test_aggregation_on_nested_json_object (line 1099) | fn test_aggregation_on_nested_json_object() {
function test_aggregation_on_json_object_empty_columns (line 1161) | fn test_aggregation_on_json_object_empty_columns() {
function test_aggregation_on_json_object_mixed_types (line 1279) | fn test_aggregation_on_json_object_mixed_types() {
function test_aggregation_on_json_object_mixed_numerical_segments (line 1384) | fn test_aggregation_on_json_object_mixed_numerical_segments() {
FILE: src/aggregation/bucket/composite/accessors.rs
type CompositeAggReqData (line 19) | pub struct CompositeAggReqData {
method get_memory_consumption (line 30) | pub fn get_memory_consumption(&self) -> usize {
type CompositeAccessor (line 37) | pub struct CompositeAccessor {
type CompositeSourceAccessors (line 51) | pub struct CompositeSourceAccessors {
method build_for_source (line 77) | pub fn build_for_source(
function find_first_column_to_collect (line 264) | fn find_first_column_to_collect<T>(
function precompute_missing_after_key (line 302) | fn precompute_missing_after_key(
type PrecomputedDateInterval (line 323) | pub enum PrecomputedDateInterval {
method from_date_histogram_source_intervals (line 334) | pub fn from_date_histogram_source_intervals(
type PrecomputedAfterKey (line 362) | pub enum PrecomputedAfterKey {
method from (line 374) | fn from(hit: CompactHit) -> Self {
method from (line 384) | fn from(hit: TermOrdHit) -> Self {
method from (line 394) | fn from(num: ProjectedNumber<T>) -> Self {
method equals (line 405) | pub fn equals(&self, column_value: u64) -> bool {
method gt (line 413) | pub fn gt(&self, column_value: u64) -> bool {
method lt (line 421) | pub fn lt(&self, column_value: u64) -> bool {
method precompute_ip_addr (line 430) | fn precompute_ip_addr(column: &Column<u64>, key: &Ipv6Addr) -> crate::...
method precompute_term_ord (line 445) | fn precompute_term_ord(
method precompute (line 468) | pub fn precompute(
method keep_all (line 512) | fn keep_all(order: Order) -> Self {
FILE: src/aggregation/bucket/composite/calendar_interval.rs
constant NS_IN_DAY (line 4) | const NS_IN_DAY: i64 = Nanosecond::per_t::<i128>(Day) as i64;
function try_year_bucket (line 8) | pub(super) fn try_year_bucket(timestamp_ns: i64) -> crate::Result<i64> {
function try_month_bucket (line 19) | pub(super) fn try_month_bucket(timestamp_ns: i64) -> crate::Result<i64> {
function week_bucket (line 30) | pub(super) fn week_bucket(timestamp_ns: i64) -> i64 {
function year_bucket_using_time_crate (line 39) | fn year_bucket_using_time_crate(timestamp_ns: i64) -> Result<i64, time::...
function month_bucket_using_time_crate (line 47) | fn month_bucket_using_time_crate(timestamp_ns: i64) -> Result<i64, time:...
function ts_ns (line 64) | fn ts_ns(iso: &str) -> i64 {
function test_year_bucket (line 71) | fn test_year_bucket() {
function test_month_bucket (line 98) | fn test_month_bucket() {
function test_week_bucket (line 113) | fn test_week_bucket() {
FILE: src/aggregation/bucket/composite/collector.rs
type CompositeBucketCollector (line 34) | struct CompositeBucketCollector {
type InternalValueRepr (line 55) | struct InternalValueRepr {
method new_term (line 64) | fn new_term(raw: u64, accessor_idx: u8, order: Order) -> Self {
method new_histogram (line 77) | fn new_histogram(raw: u64, order: Order) -> Self {
method new_missing (line 89) | fn new_missing(order: Order, missing_order: MissingOrder) -> Self {
method decode (line 103) | fn decode(self, order: Order) -> Option<(u8, u64)> {
type SegmentCompositeCollector (line 118) | pub struct SegmentCompositeCollector {
method get_memory_consumption (line 205) | fn get_memory_consumption(&self) -> u64 {
method from_req_and_validate (line 212) | pub(crate) fn from_req_and_validate(
method add_intermediate_bucket_result (line 239) | fn add_intermediate_bucket_result(
method add_intermediate_aggregation_result (line 129) | fn add_intermediate_aggregation_result(
method collect (line 149) | fn collect(
method flush (line 183) | fn flush(&mut self, agg_data: &mut AggregationsSegmentCtx) -> crate::Res...
method prepare_max_bucket (line 190) | fn prepare_max_bucket(
function validate_req (line 293) | fn validate_req(req_data: &mut AggregationsSegmentCtx, accessor_idx: usi...
function collect_bucket_with_limit (line 330) | fn collect_bucket_with_limit(
function resolve_key (line 376) | fn resolve_key(
function resolve_internal_value_repr (line 393) | fn resolve_internal_value_repr(
function resolve_term (line 429) | fn resolve_term(
type CompositeKeyVisitor (line 487) | struct CompositeKeyVisitor<'a> {
function visit (line 503) | fn visit(&mut self, source_idx: usize, is_on_after_key: bool) -> crate::...
FILE: src/aggregation/bucket/composite/map.rs
type ArrayHeapMap (line 13) | struct ArrayHeapMap<K: Ord, V, const S: usize> {
method default (line 19) | fn default() -> Self {
function get_or_insert_with (line 29) | fn get_or_insert_with<F: FnOnce() -> V>(&mut self, key: &[K], f: F) -> &...
function get_mut (line 38) | fn get_mut(&mut self, key: &[K]) -> Option<&mut V> {
function peek_highest (line 43) | fn peek_highest(&self) -> Option<&[K]> {
function evict_highest (line 47) | fn evict_highest(&mut self) {
function memory_consumption (line 53) | fn memory_consumption(&self) -> u64 {
function into_iter (line 62) | fn into_iter(self) -> Box<dyn Iterator<Item = (SmallVec<[K; MAX_DYN_ARRA...
constant MAX_DYN_ARRAY_SIZE (line 71) | pub(super) const MAX_DYN_ARRAY_SIZE: usize = 16;
constant MAX_DYN_ARRAY_SIZE_PLUS_ONE (line 72) | const MAX_DYN_ARRAY_SIZE_PLUS_ONE: usize = MAX_DYN_ARRAY_SIZE + 1;
type DynArrayHeapMap (line 81) | pub(super) struct DynArrayHeapMap<K: Ord, V>(DynArrayHeapMapInner<K, V>);
type DynArrayHeapMapInner (line 85) | enum DynArrayHeapMapInner<K: Ord, V> {
function try_new (line 106) | pub(super) fn try_new(key_dimension: usize) -> crate::Result<Self> {
function size (line 140) | pub(super) fn size(&self) -> usize {
function get_or_insert_with (line 167) | pub(super) fn get_or_insert_with<F: FnOnce() -> V>(&mut self, key: &[K],...
function get_mut (line 191) | pub fn get_mut(&mut self, key: &[K]) -> Option<&mut V> {
function peek_highest (line 213) | pub(super) fn peek_highest(&self) -> Option<&[K]> {
function evict_highest (line 235) | pub(super) fn evict_highest(&mut self) {
function memory_consumption (line 256) | pub(crate) fn memory_consumption(&self) -> u64 {
function into_iter (line 280) | pub fn into_iter(self) -> impl Iterator<Item = (SmallVec<[K; MAX_DYN_ARR...
function test_dyn_array_heap_map (line 307) | fn test_dyn_array_heap_map() {
FILE: src/aggregation/bucket/composite/mod.rs
type MissingOrder (line 34) | pub enum MissingOrder {
function agg_source_default_order (line 44) | fn agg_source_default_order() -> Order {
type TermCompositeAggregationSource (line 50) | pub struct TermCompositeAggregationSource {
type HistogramCompositeAggregationSource (line 70) | pub struct HistogramCompositeAggregationSource {
type CalendarInterval (line 95) | pub enum CalendarInterval {
type DateHistogramCompositeAggregationSource (line 108) | pub struct DateHistogramCompositeAggregationSource {
type CompositeAggregationSource (line 139) | pub enum CompositeAggregationSource {
method field (line 149) | pub(crate) fn field(&self) -> &str {
method name (line 157) | pub(crate) fn name(&self) -> &str {
method order (line 165) | pub(crate) fn order(&self) -> Order {
method missing_order (line 173) | pub(crate) fn missing_order(&self) -> MissingOrder {
method missing_bucket (line 181) | pub(crate) fn missing_bucket(&self) -> bool {
type CompositeAggregation (line 201) | pub struct CompositeAggregation {
type Error (line 219) | type Error = TantivyError;
method try_from (line 221) | fn try_from(value: CompositeAggregationSerde) -> Result<Self, Self::Er...
type CompositeAggregationSerde (line 211) | struct CompositeAggregationSerde {
method from (line 252) | fn from(value: CompositeAggregation) -> Self {
type ColumnPaginationOrder (line 284) | enum ColumnPaginationOrder {
type ToTypePaginationOrder (line 292) | trait ToTypePaginationOrder {
method column_pagination_order (line 297) | fn column_pagination_order(&self) -> ColumnPaginationOrder;
method column_pagination_order (line 301) | fn column_pagination_order(&self) -> ColumnPaginationOrder {
method column_pagination_order (line 314) | fn column_pagination_order(&self) -> ColumnPaginationOrder {
method column_pagination_order (line 329) | fn column_pagination_order(&self) -> ColumnPaginationOrder {
type AfterKey (line 345) | pub struct AfterKey(pub CompositeIntermediateKey);
method deserialize (line 365) | fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
method from (line 428) | fn from(key: CompositeIntermediateKey) -> Self {
method serialize (line 348) | fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
method from (line 434) | fn from(value: AfterKey) -> Self {
function composite_intermediate_key_ordering (line 440) | pub fn composite_intermediate_key_ordering(
function datetime_from_iso_str (line 512) | fn datetime_from_iso_str(date_str: &str) -> common::DateTime {
function ms_timestamp_from_iso_str (line 519) | fn ms_timestamp_from_iso_str(date_str: &str) -> i64 {
function exec_and_assert_all_paginations (line 528) | fn exec_and_assert_all_paginations(
function composite_aggregation_test (line 593) | fn composite_aggregation_test(merge_segments: bool) -> crate::Result<()> {
function composite_aggregation_term_single_segment (line 642) | fn composite_aggregation_term_single_segment() -> crate::Result<()> {
function composite_aggregation_term_multi_segment (line 647) | fn composite_aggregation_term_multi_segment() -> crate::Result<()> {
function composite_aggregation_term_size_limit (line 651) | fn composite_aggregation_term_size_limit(merge_segments: bool) -> crate:...
function composite_aggregation_term_size_limit_single_segment (line 720) | fn composite_aggregation_term_size_limit_single_segment() -> crate::Resu...
function composite_aggregation_term_size_limit_multi_segment (line 725) | fn composite_aggregation_term_size_limit_multi_segment() -> crate::Resul...
function composite_aggregation_term_ordering (line 730) | fn composite_aggregation_term_ordering() -> crate::Result<()> {
function composite_aggregation_term_missing_values (line 829) | fn composite_aggregation_term_missing_values() -> crate::Result<()> {
function composite_aggregation_term_missing_order (line 874) | fn composite_aggregation_term_missing_order() -> crate::Result<()> {
function composite_aggregation_term_multi_source (line 958) | fn composite_aggregation_term_multi_source() -> crate::Result<()> {
function composite_aggregation_term_multi_source_ordering (line 994) | fn composite_aggregation_term_multi_source_ordering() -> crate::Result<(...
function composite_aggregation_term_with_sub_aggregations (line 1027) | fn composite_aggregation_term_with_sub_aggregations() -> crate::Result<(...
function composite_aggregation_term_validation_errors (line 1091) | fn composite_aggregation_term_validation_errors() -> crate::Result<()> {
function composite_aggregation_term_numeric_fields (line 1137) | fn composite_aggregation_term_numeric_fields() -> crate::Result<()> {
function composite_aggregation_term_date_fields (line 1169) | fn composite_aggregation_term_date_fields() -> crate::Result<()> {
function composite_aggregation_term_ip_fields (line 1204) | fn composite_aggregation_term_ip_fields() -> crate::Result<()> {
function composite_aggregation_term_multiple_column_types (line 1242) | fn composite_aggregation_term_multiple_column_types() -> crate::Result<(...
function composite_aggregation_term_json_various_types (line 1277) | fn composite_aggregation_term_json_various_types() -> crate::Result<()> {
function composite_aggregation_term_json_missing_fields (line 1317) | fn composite_aggregation_term_json_missing_fields() -> crate::Result<()> {
function composite_aggregation_term_json_nested_fields (line 1370) | fn composite_aggregation_term_json_nested_fields() -> crate::Result<()> {
function composite_aggregation_term_json_mixed_types (line 1409) | fn composite_aggregation_term_json_mixed_types() -> crate::Result<()> {
function composite_aggregation_term_multi_value_fields (line 1480) | fn composite_aggregation_term_multi_value_fields() -> crate::Result<()> {
function composite_aggregation_histogram_basic (line 1543) | fn composite_aggregation_histogram_basic() -> crate::Result<()> {
function composite_aggregation_histogram_json_mixed_types (line 1575) | fn composite_aggregation_histogram_json_mixed_types() -> crate::Result<(...
function composite_aggregation_date_histogram_calendar_interval (line 1631) | fn composite_aggregation_date_histogram_calendar_interval() -> crate::Re...
function composite_aggregation_date_histogram_fixed_interval (line 1664) | fn composite_aggregation_date_histogram_fixed_interval() -> crate::Resul...
function composite_aggregation_mixed_term_and_date_histogram (line 1699) | fn composite_aggregation_mixed_term_and_date_histogram() -> crate::Resul...
function composite_aggregation_no_matching_columns (line 1801) | fn composite_aggregation_no_matching_columns() -> crate::Result<()> {
FILE: src/aggregation/bucket/composite/numeric_types.rs
function cmp_i64_f64 (line 8) | pub fn cmp_i64_f64(left_i: i64, right_f: f64) -> crate::Result<Ordering> {
function cmp_u64_f64 (line 48) | pub fn cmp_u64_f64(left_u: u64, right_f: f64) -> crate::Result<Ordering> {
function cmp_i64_u64 (line 86) | pub fn cmp_i64_u64(left_i: i64, right_u: u64) -> Ordering {
type ProjectedNumber (line 104) | pub enum ProjectedNumber<T> {
function i64_to_u64 (line 110) | pub fn i64_to_u64(value: i64) -> ProjectedNumber<u64> {
function u64_to_i64 (line 118) | pub fn u64_to_i64(value: u64) -> ProjectedNumber<i64> {
function f64_to_u64 (line 126) | pub fn f64_to_u64(value: f64) -> ProjectedNumber<u64> {
function f64_to_i64 (line 139) | pub fn f64_to_i64(value: f64) -> ProjectedNumber<i64> {
function i64_to_f64 (line 154) | pub fn i64_to_f64(value: i64) -> ProjectedNumber<f64> {
function u64_to_f64 (line 170) | pub fn u64_to_f64(value: u64) -> ProjectedNumber<f64> {
function test_cmp_u64_f64 (line 191) | fn test_cmp_u64_f64() {
function test_cmp_i64_f64 (line 236) | fn test_cmp_i64_f64() {
function test_cmp_i64_u64 (line 312) | fn test_cmp_i64_u64() {
function test_i64_to_u64 (line 338) | fn test_i64_to_u64() {
function test_u64_to_i64 (line 350) | fn test_u64_to_i64() {
function test_f64_to_u64 (line 365) | fn test_f64_to_u64() {
function test_f64_to_i64 (line 380) | fn test_f64_to_i64() {
function test_i64_to_f64 (line 401) | fn test_i64_to_f64() {
function test_u64_to_f64 (line 437) | fn test_u64_to_f64() {
FILE: src/aggregation/bucket/filter.rs
type QueryBuilder (line 85) | pub trait QueryBuilder: Debug + Send + Sync {
method build_query (line 96) | fn build_query(
method box_clone (line 111) | fn box_clone(&self) -> Box<dyn QueryBuilder>;
type FilterAggregation (line 170) | pub struct FilterAggregation {
method new (line 220) | pub fn new(query_string: String) -> Self {
method new_with_builder (line 268) | pub fn new_with_builder(builder: Box<dyn QueryBuilder>) -> Self {
method parse_query (line 278) | pub(crate) fn parse_query(
method parse_query_with_parser (line 306) | pub fn parse_query_with_parser(
method get_fast_field_names (line 323) | pub fn get_fast_field_names(&self) -> Vec<&str> {
method deserialize (line 362) | fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
type FilterQuery (line 176) | pub enum FilterQuery {
method fmt (line 196) | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
method clone (line 207) | fn clone(&self) -> Self {
method serialize (line 346) | fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
method eq (line 388) | fn eq(&self, other: &Self) -> bool {
type FilterAggReqData (line 399) | pub struct FilterAggReqData {
method get_memory_consumption (line 416) | pub(crate) fn get_memory_consumption(&self) -> usize {
type DocumentQueryEvaluator (line 427) | pub struct DocumentQueryEvaluator {
method new (line 438) | pub(crate) fn new(
method matches_document (line 475) | pub fn matches_document(&self, doc: DocId) -> bool {
method filter_batch (line 482) | pub fn filter_batch(&self, docs: &[DocId], output: &mut Vec<DocId>) {
method fmt (line 492) | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
type DocCount (line 500) | struct DocCount {
type SegmentFilterCollector (line 506) | pub struct SegmentFilterCollector<C: SubAggCache> {
function from_req_and_validate (line 518) | pub(crate) fn from_req_and_validate(
function build_segment_filter_collector (line 539) | pub(crate) fn build_segment_filter_collector(
method fmt (line 560) | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
method add_intermediate_aggregation_result (line 570) | fn add_intermediate_aggregation_result(
method collect (line 615) | fn collect(
method flush (line 656) | fn flush(&mut self, agg_data: &mut AggregationsSegmentCtx) -> crate::Res...
method prepare_max_bucket (line 663) | fn prepare_max_bucket(
type IntermediateFilterBucketResult (line 681) | pub struct IntermediateFilterBucketResult {
function aggregation_results_to_json (line 701) | fn aggregation_results_to_json(results: &AggregationResults) -> Value {
function json_values_match (line 705) | fn json_values_match(actual: &Value, expected: &Value, tolerance: f64) -...
function assert_aggregation_results_match (line 743) | fn assert_aggregation_results_match(
function create_standard_test_index (line 768) | fn create_standard_test_index() -> crate::Result<Index> {
function create_collector (line 804) | fn create_collector(
function test_basic_filter_with_metric_agg (line 820) | fn test_basic_filter_with_metric_agg() -> crate::Result<()> {
function test_filter_with_no_matches (line 850) | fn test_filter_with_no_matches() -> crate::Result<()> {
function test_multiple_independent_filters (line 880) | fn test_multiple_independent_filters() -> crate::Result<()> {
function test_term_query_filter (line 928) | fn test_term_query_filter() -> crate::Result<()> {
function test_range_query_filter (line 956) | fn test_range_query_filter() -> crate::Result<()> {
function test_boolean_query_filter (line 984) | fn test_boolean_query_filter() -> crate::Result<()> {
function test_bool_field_filter (line 1012) | fn test_bool_field_filter() -> crate::Result<()> {
function test_two_level_nested_filters (line 1052) | fn test_two_level_nested_filters() -> crate::Result<()> {
function test_deeply_nested_filters (line 1098) | fn test_deeply_nested_filters() -> crate::Result<()> {
function test_multiple_nested_branches (line 1152) | fn test_multiple_nested_branches() -> crate::Result<()> {
function test_nested_filters_with_multiple_siblings_at_each_level (line 1200) | fn test_nested_filters_with_multiple_siblings_at_each_level() -> crate::...
function test_filter_with_terms_sub_agg (line 1292) | fn test_filter_with_terms_sub_agg() -> crate::Result<()> {
function test_filter_with_multiple_metric_aggs (line 1343) | fn test_filter_with_multiple_metric_aggs() -> crate::Result<()> {
function test_filter_on_empty_index (line 1387) | fn test_filter_on_empty_index() -> crate::Result<()> {
function test_malformed_query_string (line 1423) | fn test_malformed_query_string() -> crate::Result<()> {
function test_filter_with_base_query (line 1449) | fn test_filter_with_base_query() -> crate::Result<()> {
function test_custom_query_builder (line 1489) | fn test_custom_query_builder() -> crate::Result<()> {
function test_query_string_serialization (line 1556) | fn test_query_string_serialization() -> crate::Result<()> {
function test_query_builder_serialization_roundtrip (line 1588) | fn test_query_builder_serialization_roundtrip() -> crate::Result<()> {
function test_filter_result_correctness_vs_separate_query (line 1665) | fn test_filter_result_correctness_vs_separate_query() -> crate::Result<(...
function test_custom_tokenizer_required (line 1723) | fn test_custom_tokenizer_required() -> crate::Result<()> {
FILE: src/aggregation/bucket/histogram/date_histogram.rs
type DateHistogramAggregationReq (line 34) | pub struct DateHistogramAggregationReq {
method to_histogram_req (line 120) | pub(crate) fn to_histogram_req(&self) -> crate::Result<HistogramAggreg...
method validate (line 139) | fn validate(&self) -> crate::Result<()> {
type DateHistogramParseError (line 172) | pub enum DateHistogramParseError {
function parse_offset_into_milliseconds (line 190) | fn parse_offset_into_milliseconds(input: &str) -> Result<i64, Aggregatio...
function parse_into_milliseconds (line 210) | pub(crate) fn parse_into_milliseconds(input: &str) -> Result<i64, Aggreg...
function test_parse_into_millisecs (line 258) | fn test_parse_into_millisecs() {
function test_parse_offset_into_milliseconds (line 277) | fn test_parse_offset_into_milliseconds() {
function test_parse_into_milliseconds_do_not_accept_non_ascii (line 300) | fn test_parse_into_milliseconds_do_not_accept_non_ascii() {
function get_test_index_from_docs (line 304) | pub fn get_test_index_from_docs(
function histogram_test_date_force_merge_segments (line 342) | fn histogram_test_date_force_merge_segments() {
function histogram_test_date (line 347) | fn histogram_test_date() {
function histogram_test_date_merge_segments (line 351) | fn histogram_test_date_merge_segments(merge_segments: bool) {
function histogram_test_invalid_req (line 650) | fn histogram_test_invalid_req() {
FILE: src/aggregation/bucket/histogram/histogram.rs
type HistogramAggReqData (line 24) | pub struct HistogramAggReqData {
method get_memory_consumption (line 42) | pub fn get_memory_consumption(&self) -> usize {
type HistogramAggregation (line 92) | pub struct HistogramAggregation {
method normalize_date_time (line 154) | pub(crate) fn normalize_date_time(&mut self) {
method validate (line 171) | fn validate(&self) -> crate::Result<()> {
method min_doc_count (line 198) | pub fn min_doc_count(&self) -> u64 {
type HistogramBounds (line 205) | pub struct HistogramBounds {
method contains (line 246) | fn contains(&self, val: f64) -> bool {
function deserialize_date_or_num (line 214) | fn deserialize_date_or_num<'de, D>(deserializer: D) -> Result<f64, D::Er...
method fmt (line 240) | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
type SegmentHistogramBucketEntry (line 252) | pub(crate) struct SegmentHistogramBucketEntry {
method into_intermediate_bucket_entry (line 259) | pub(crate) fn into_intermediate_bucket_entry(
type HistogramBuckets (line 283) | struct HistogramBuckets {
type SegmentHistogramCollector (line 290) | pub struct SegmentHistogramCollector {
method get_memory_consumption (line 398) | fn get_memory_consumption(&self) -> usize {
method add_intermediate_bucket_result (line 404) | fn add_intermediate_bucket_result(
method from_req_and_validate (line 428) | pub(crate) fn from_req_and_validate(
method add_intermediate_aggregation_result (line 300) | fn add_intermediate_aggregation_result(
method collect (line 320) | fn collect(
method flush (line 376) | fn flush(&mut self, agg_data: &mut AggregationsSegmentCtx) -> crate::Res...
method prepare_max_bucket (line 383) | fn prepare_max_bucket(
function get_bucket_pos_f64 (line 459) | fn get_bucket_pos_f64(val: f64, interval: f64, offset: f64) -> f64 {
function get_bucket_key_from_pos (line 464) | fn get_bucket_key_from_pos(bucket_pos: f64, interval: f64, offset: f64) ...
function intermediate_buckets_to_final_buckets_fill_gaps (line 469) | fn intermediate_buckets_to_final_buckets_fill_gaps(
function intermediate_histogram_buckets_to_final_buckets (line 526) | pub(crate) fn intermediate_histogram_buckets_to_final_buckets(
function get_req_min_max (line 578) | fn get_req_min_max(req: &HistogramAggregation, min_max: Option<(f64, f64...
function generate_bucket_pos_with_opt_minmax (line 597) | pub(crate) fn generate_bucket_pos_with_opt_minmax(
function generate_buckets_with_opt_minmax (line 612) | pub(crate) fn generate_buckets_with_opt_minmax(
function histogram_test_crooked_values (line 642) | fn histogram_test_crooked_values() -> crate::Result<()> {
function histogram_test_min_value_positive_force_merge_segments (line 701) | fn histogram_test_min_value_positive_force_merge_segments() -> crate::Re...
function histogram_test_min_value_positive (line 706) | fn histogram_test_min_value_positive() -> crate::Result<()> {
function histogram_test_min_value_positive_merge_segments (line 709) | fn histogram_test_min_value_positive_merge_segments(merge_segments: bool...
function histogram_simple_test (line 742) | fn histogram_simple_test() -> crate::Result<()> {
function histogram_memory_limit (line 768) | fn histogram_memory_limit() -> crate::Result<()> {
function histogram_merge_test (line 796) | fn histogram_merge_test() -> crate::Result<()> {
function histogram_min_doc_test_multi_segments (line 826) | fn histogram_min_doc_test_multi_segments() -> crate::Result<()> {
function histogram_min_doc_test_single_segments (line 830) | fn histogram_min_doc_test_single_segments() -> crate::Result<()> {
function histogram_min_doc_test_with_opt (line 833) | fn histogram_min_doc_test_with_opt(merge_segments: bool) -> crate::Resul...
function histogram_extended_bounds_test_multi_segment (line 861) | fn histogram_extended_bounds_test_multi_segment() -> crate::Result<()> {
function histogram_extended_bounds_test_single_segment (line 865) | fn histogram_extended_bounds_test_single_segment() -> crate::Result<()> {
function histogram_extended_bounds_test_with_opt (line 868) | fn histogram_extended_bounds_test_with_opt(merge_segments: bool) -> crat...
function histogram_hard_bounds_test_multi_segment (line 964) | fn histogram_hard_bounds_test_multi_segment() -> crate::Result<()> {
function histogram_hard_bounds_test_single_segment (line 968) | fn histogram_hard_bounds_test_single_segment() -> crate::Result<()> {
function histogram_hard_bounds_test_with_opt (line 971) | fn histogram_hard_bounds_test_with_opt(merge_segments: bool) -> crate::R...
function histogram_empty_result_behaviour_test_single_segment (line 1062) | fn histogram_empty_result_behaviour_test_single_segment() -> crate::Resu...
function histogram_empty_result_behaviour_test_multi_segment (line 1067) | fn histogram_empty_result_behaviour_test_multi_segment() -> crate::Resul...
function histogram_empty_result_behaviour_test_with_opt (line 1071) | fn histogram_empty_result_behaviour_test_with_opt(merge_segments: bool) ...
function histogram_single_bucket_test_single_segment (line 1250) | fn histogram_single_bucket_test_single_segment() -> crate::Result<()> {
function histogram_single_bucket_test_multi_segment (line 1255) | fn histogram_single_bucket_test_multi_segment() -> crate::Result<()> {
function histogram_single_bucket_test_with_opt (line 1259) | fn histogram_single_bucket_test_with_opt(merge_segments: bool) -> crate:...
function histogram_date_test_single_segment (line 1284) | fn histogram_date_test_single_segment() -> crate::Result<()> {
function histogram_date_test_multi_segment (line 1289) | fn histogram_date_test_multi_segment() -> crate::Result<()> {
function histogram_date_test_with_opt (line 1293) | fn histogram_date_test_with_opt(merge_segments: bool) -> crate::Result<(...
function histogram_invalid_request (line 1337) | fn histogram_invalid_request() -> crate::Result<()> {
function histogram_keyed_buckets_test (line 1358) | fn histogram_keyed_buckets_test() -> crate::Result<()> {
function test_aggregation_histogram_empty_index (line 1394) | fn test_aggregation_histogram_empty_index() -> crate::Result<()> {
FILE: src/aggregation/bucket/mod.rs
type Order (line 45) | pub enum Order {
type OrderTarget (line 58) | pub enum OrderTarget {
method from (line 71) | fn from(val: &str) -> Self {
method fmt (line 81) | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
type CustomOrder (lin
Condensed preview — 504 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (9,064K chars).
[
{
"path": ".claude/skills/rationalize-deps/SKILL.md",
"chars": 3463,
"preview": "---\nname: rationalize-deps\ndescription: Analyze Cargo.toml dependencies and attempt to remove unused features to reduce "
},
{
"path": ".claude/skills/simple-pr/SKILL.md",
"chars": 1718,
"preview": "---\nname: simple-pr\ndescription: Create a simple PR from staged changes with an auto-generated commit message\ndisable-mo"
},
{
"path": ".github/FUNDING.yml",
"chars": 644,
"preview": "# These are supported funding model platforms\n\ngithub: fulmicoton\npatreon: # Replace with a single Patreon username\nopen"
},
{
"path": ".github/ISSUE_TEMPLATE/actions.md",
"chars": 167,
"preview": "---\nname: Actions\nabout: Actions not directly related to producing code.\n\n---\n\n# Actions title\n\nAction description. \ne.g"
},
{
"path": ".github/ISSUE_TEMPLATE/bug_report.md",
"chars": 478,
"preview": "---\nname: Bug report\nabout: Create a report to help us improve\n\n---\n\n**Describe the bug**\n- What did you do?\n- What happ"
},
{
"path": ".github/ISSUE_TEMPLATE/feature_request.md",
"chars": 478,
"preview": "---\nname: Feature request\nabout: Suggest an idea for this project\n\n---\n\n**Is your feature request related to a problem? "
},
{
"path": ".github/ISSUE_TEMPLATE/question.md",
"chars": 116,
"preview": "---\nname: Question\nabout: Ask any question about tantivy's usage...\n\n---\n\nTry to be specific about your use case...\n"
},
{
"path": ".github/dependabot.yml",
"chars": 282,
"preview": "version: 2\nupdates:\n- package-ecosystem: cargo\n directory: \"/\"\n schedule:\n interval: daily\n time: \"20:00\"\n open"
},
{
"path": ".github/workflows/coverage.yml",
"chars": 979,
"preview": "name: Coverage\n\non:\n push:\n branches: [main]\n\n# Ensures that we cancel running jobs for the same PR / same workflow."
},
{
"path": ".github/workflows/long_running.yml",
"chars": 745,
"preview": "name: Long running tests\n\non:\n push:\n branches: [ main ]\n\nenv:\n CARGO_TERM_COLOR: always\n NUM_FUNCTIONAL_TEST_ITER"
},
{
"path": ".github/workflows/test.yml",
"chars": 2694,
"preview": "name: Unit tests\n\non:\n push:\n branches: [ main ]\n pull_request:\n branches: [ main ]\n\nenv:\n CARGO_TERM_COLOR: al"
},
{
"path": ".gitignore",
"chars": 172,
"preview": "tantivy.iml\n.cargo\nproptest-regressions\n*.swp\ntarget\ntarget/debug\n.vscode\ntarget/release\nCargo.lock\nbenchmark\n.DS_Store\n"
},
{
"path": "ARCHITECTURE.md",
"chars": 16981,
"preview": "# Tantivy\n\n## What is tantivy?\n\nTantivy is a library that is meant to build search engines. Although it is by no means a"
},
{
"path": "AUTHORS",
"chars": 312,
"preview": "# This is the list of authors of tantivy for copyright purposes.\nPaul Masurel\nLaurentiu Nicola\nDru Sellers\nAshley Mannix"
},
{
"path": "CHANGELOG.md",
"chars": 52423,
"preview": "Tantivy 0.25\n================================\n\n## Bugfixes\n- fix union performance regression in tantivy 0.24 [#2663](ht"
},
{
"path": "CITATION.cff",
"chars": 285,
"preview": "cff-version: 1.2.0\nmessage: \"If you use this software, please cite it as below.\"\nauthors:\n - alias: Quickwit Inc.\n w"
},
{
"path": "Cargo.toml",
"chars": 5356,
"preview": "[package]\nname = \"tantivy\"\nversion = \"0.26.0\"\nauthors = [\"Paul Masurel <paul.masurel@gmail.com>\"]\nlicense = \"MIT\"\ncatego"
},
{
"path": "LICENSE",
"chars": 1099,
"preview": "Copyright (c) 2018 by the project authors, as listed in the AUTHORS file. \n\nPermission is hereby granted, free of charge"
},
{
"path": "Makefile",
"chars": 103,
"preview": "test:\n\t@echo \"Run test only... No examples.\"\n\tcargo test --tests --lib\n\nfmt:\n\tcargo +nightly fmt --all\n"
},
{
"path": "README.md",
"chars": 7786,
"preview": "[](https://docs.rs/crate/tantivy/)\n[.\n// -"
},
{
"path": "benches/range_queries.rs",
"chars": 9987,
"preview": "use std::ops::Bound;\n\nuse binggan::{black_box, BenchGroup, BenchRunner};\nuse rand::prelude::*;\nuse rand::rngs::StdRng;\nu"
},
{
"path": "benches/range_query.rs",
"chars": 7955,
"preview": "use std::fmt::Display;\nuse std::net::Ipv6Addr;\nuse std::ops::RangeInclusive;\n\nuse binggan::plugins::PeakMemAllocPlugin;\n"
},
{
"path": "benches/regex_all_terms.rs",
"chars": 3282,
"preview": "// Benchmarks regex query that matches all terms in a synthetic index.\n//\n// Corpus model:\n// - N unique terms: t000000,"
},
{
"path": "benches/str_search_and_get.rs",
"chars": 12289,
"preview": "// This benchmark compares different approaches for retrieving string values:\n//\n// 1. Fast Field Approach: retrieves st"
},
{
"path": "benches/wiki.json",
"chars": 1154470,
"preview": "{\"url\":\"https://en.wikipedia.org/wiki?curid=48687903\",\"title\":\"Jeon Hye-jin (actress, born 1988)\",\"body\":\"\\nJeon Hye-jin"
},
{
"path": "bitpacker/Cargo.toml",
"chars": 657,
"preview": "[package]\nname = \"tantivy-bitpacker\"\nversion = \"0.9.0\"\nedition = \"2024\"\nauthors = [\"Paul Masurel <paul.masurel@gmail.com"
},
{
"path": "bitpacker/benches/bench.rs",
"chars": 1851,
"preview": "#![feature(test)]\n\nextern crate test;\n\n#[cfg(test)]\nmod tests {\n use rand::rng;\n use rand::seq::IteratorRandom;\n "
},
{
"path": "bitpacker/src/bitpacker.rs",
"chars": 13181,
"preview": "use std::io;\nuse std::ops::{Range, RangeInclusive};\n\nuse bitpacking::{BitPacker as ExternalBitPackerTrait, BitPacker1x};"
},
{
"path": "bitpacker/src/blocked_bitpacker.rs",
"chars": 5668,
"preview": "use super::bitpacker::BitPacker;\nuse super::compute_num_bits;\nuse crate::{BitUnpacker, minmax};\n\nconst BLOCK_SIZE: usize"
},
{
"path": "bitpacker/src/filter_vec/avx2.rs",
"chars": 14416,
"preview": "//! SIMD filtering of a vector as described in the following blog post.\n//! <https://quickwit.io/blog/filtering%20a%20ve"
},
{
"path": "bitpacker/src/filter_vec/mod.rs",
"chars": 5915,
"preview": "use std::ops::RangeInclusive;\n\n#[cfg(target_arch = \"x86_64\")]\nmod avx2;\n\nmod scalar;\n\n#[derive(Clone, Copy, Eq, PartialE"
},
{
"path": "bitpacker/src/filter_vec/scalar.rs",
"chars": 476,
"preview": "use std::ops::RangeInclusive;\n\npub fn filter_vec_in_place(range: RangeInclusive<u32>, offset: u32, output: &mut Vec<u32>"
},
{
"path": "bitpacker/src/lib.rs",
"chars": 4334,
"preview": "mod bitpacker;\nmod blocked_bitpacker;\nmod filter_vec;\n\nuse std::cmp::Ordering;\n\npub use crate::bitpacker::{BitPacker, Bi"
},
{
"path": "cliff.toml",
"chars": 3052,
"preview": "# configuration file for git-cliff{ pattern = \"foo\", replace = \"bar\"}\n# see https://github.com/orhun/git-cliff#configura"
},
{
"path": "columnar/Cargo.toml",
"chars": 1304,
"preview": "[package]\nname = \"tantivy-columnar\"\nversion = \"0.6.0\"\nedition = \"2024\"\nlicense = \"MIT\"\nhomepage = \"https://github.com/qu"
},
{
"path": "columnar/README.md",
"chars": 3900,
"preview": "# Columnar format\n\nThis crate describes columnar format used in tantivy.\n\n## Goals\n\nThis format is special in the follow"
},
{
"path": "columnar/benches/bench_access.rs",
"chars": 1789,
"preview": "use binggan::{InputGroup, black_box};\nuse common::*;\nuse tantivy_columnar::Column;\n\npub mod common;\n\nconst NUM_DOCS: u32"
},
{
"path": "columnar/benches/bench_column_values_get.rs",
"chars": 1659,
"preview": "use std::sync::Arc;\n\nuse binggan::{InputGroup, black_box};\nuse rand::rngs::StdRng;\nuse rand::{Rng, SeedableRng};\nuse tan"
},
{
"path": "columnar/benches/bench_create_column_values.rs",
"chars": 1282,
"preview": "use binggan::{InputGroup, black_box};\nuse rand::rngs::StdRng;\nuse rand::{Rng, SeedableRng};\nuse tantivy_columnar::column"
},
{
"path": "columnar/benches/bench_first_vals.rs",
"chars": 3300,
"preview": "use std::sync::Arc;\n\nuse binggan::{InputGroup, black_box};\nuse rand::prelude::*;\nuse tantivy_columnar::column_values::{C"
},
{
"path": "columnar/benches/bench_merge.rs",
"chars": 1617,
"preview": "pub mod common;\n\nuse binggan::BenchRunner;\nuse common::{Card, generate_columnar_with_name};\nuse tantivy_columnar::*;\n\nco"
},
{
"path": "columnar/benches/bench_optional_index.rs",
"chars": 3728,
"preview": "use binggan::{InputGroup, black_box};\nuse rand::rngs::StdRng;\nuse rand::{Rng, SeedableRng};\nuse tantivy_columnar::column"
},
{
"path": "columnar/benches/bench_values_u128.rs",
"chars": 3761,
"preview": "use std::ops::RangeInclusive;\nuse std::sync::Arc;\n\nuse binggan::{InputGroup, black_box};\nuse common::OwnedBytes;\nuse ran"
},
{
"path": "columnar/benches/bench_values_u64.rs",
"chars": 4912,
"preview": "use std::ops::RangeInclusive;\nuse std::sync::Arc;\n\nuse binggan::{InputGroup, black_box};\nuse rand::prelude::*;\nuse tanti"
},
{
"path": "columnar/benches/common.rs",
"chars": 1824,
"preview": "extern crate tantivy_columnar;\n\nuse core::fmt;\nuse std::fmt::{Display, Formatter};\n\nuse tantivy_columnar::{ColumnarReade"
},
{
"path": "columnar/columnar-cli/Cargo.toml",
"chars": 307,
"preview": "[package]\nname = \"tantivy-columnar-cli\"\nversion = \"0.1.0\"\nedition = \"2021\"\nlicense = \"MIT\"\n\n[dependencies]\ncolumnar = {p"
},
{
"path": "columnar/columnar-cli/src/main.rs",
"chars": 3981,
"preview": "use columnar::ColumnarWriter;\nuse columnar::NumericalValue;\nuse serde_json_borrow;\nuse std::fs::File;\nuse std::io;\nuse s"
},
{
"path": "columnar/columnar-cli-inspect/Cargo.toml",
"chars": 371,
"preview": "[package]\nname = \"tantivy-columnar-inspect\"\nversion = \"0.1.0\"\nedition = \"2021\"\nlicense = \"MIT\"\n\n[dependencies]\ntantivy ="
},
{
"path": "columnar/columnar-cli-inspect/src/main.rs",
"chars": 2205,
"preview": "use columnar::ColumnarReader;\nuse common::file_slice::{FileSlice, WrapFile};\nuse std::io;\nuse std::path::Path;\nuse tanti"
},
{
"path": "columnar/src/TODO.md",
"chars": 1452,
"preview": "# zero to one\n\n* revisit line codec\n* add columns from schema on merge\n* Plugging JSON\n* replug examples\n* move datetime"
},
{
"path": "columnar/src/block_accessor.rs",
"chars": 5150,
"preview": "use std::cmp::Ordering;\n\nuse crate::{Column, DocId, RowId};\n\n#[derive(Debug, Default, Clone)]\npub struct ColumnBlockAcce"
},
{
"path": "columnar/src/column/dictionary_encoded.rs",
"chars": 3311,
"preview": "use std::ops::Deref;\nuse std::sync::Arc;\nuse std::{fmt, io};\n\nuse sstable::{Dictionary, VoidSSTable};\n\nuse crate::RowId;"
},
{
"path": "columnar/src/column/mod.rs",
"chars": 6782,
"preview": "mod dictionary_encoded;\nmod serialize;\n\nuse std::fmt::{self, Debug};\nuse std::io::Write;\nuse std::ops::{Range, RangeIncl"
},
{
"path": "columnar/src/column/serialize.rs",
"chars": 4472,
"preview": "use std::io;\nuse std::io::Write;\nuse std::sync::Arc;\n\nuse common::OwnedBytes;\nuse sstable::Dictionary;\n\nuse crate::colum"
},
{
"path": "columnar/src/column_index/merge/mod.rs",
"chars": 8380,
"preview": "mod shuffled;\nmod stacked;\n\nuse common::ReadOnlyBitSet;\nuse shuffled::merge_column_index_shuffled;\nuse stacked::merge_co"
},
{
"path": "columnar/src/column_index/merge/shuffled.rs",
"chars": 6473,
"preview": "use std::iter;\n\nuse crate::column_index::{\n SerializableColumnIndex, SerializableMultivalueIndex, SerializableOptiona"
},
{
"path": "columnar/src/column_index/merge/stacked.rs",
"chars": 7104,
"preview": "use std::ops::Range;\n\nuse crate::column_index::SerializableColumnIndex;\nuse crate::column_index::multivalued_index::{Mul"
},
{
"path": "columnar/src/column_index/mod.rs",
"chars": 7952,
"preview": "//! # `column_index`\n//!\n//! `column_index` provides rank and select operations to associate positions when not all\n//! "
},
{
"path": "columnar/src/column_index/multivalued_index.rs",
"chars": 15438,
"preview": "use std::io;\nuse std::io::Write;\nuse std::ops::Range;\nuse std::sync::Arc;\n\nuse common::{CountingWriter, OwnedBytes};\n\nus"
},
{
"path": "columnar/src/column_index/optional_index/mod.rs",
"chars": 18180,
"preview": "use std::io;\nuse std::sync::Arc;\n\nmod set;\nmod set_block;\n\nuse common::{BinarySerializable, OwnedBytes, VInt};\npub use s"
},
{
"path": "columnar/src/column_index/optional_index/set.rs",
"chars": 1770,
"preview": "use std::io;\n\n/// A codec makes it possible to serialize a set of\n/// elements, and open the resulting Set representatio"
},
{
"path": "columnar/src/column_index/optional_index/set_block/dense.rs",
"chars": 9050,
"preview": "use std::io::{self, Write};\n\nuse common::BinarySerializable;\n\nuse crate::column_index::optional_index::{ELEMENTS_PER_BLO"
},
{
"path": "columnar/src/column_index/optional_index/set_block/mod.rs",
"chars": 167,
"preview": "mod dense;\nmod sparse;\n\npub use dense::{DENSE_BLOCK_NUM_BYTES, DenseBlock, DenseBlockCodec};\npub use sparse::{SparseBloc"
},
{
"path": "columnar/src/column_index/optional_index/set_block/sparse.rs",
"chars": 2872,
"preview": "use crate::column_index::optional_index::{SelectCursor, Set, SetCodec};\n\npub struct SparseBlockCodec;\n\nimpl SetCodec for"
},
{
"path": "columnar/src/column_index/optional_index/set_block/tests.rs",
"chars": 5459,
"preview": "use std::collections::HashMap;\n\nuse crate::column_index::optional_index::set_block::dense::DENSE_BLOCK_NUM_BYTES;\nuse cr"
},
{
"path": "columnar/src/column_index/optional_index/tests.rs",
"chars": 7859,
"preview": "use proptest::prelude::*;\nuse proptest::{prop_oneof, proptest};\n\nuse super::*;\nuse crate::{ColumnarReader, ColumnarWrite"
},
{
"path": "columnar/src/column_index/serialize.rs",
"chars": 3153,
"preview": "use std::io;\nuse std::io::Write;\n\nuse common::{CountingWriter, OwnedBytes};\n\nuse super::OptionalIndex;\nuse super::multiv"
},
{
"path": "columnar/src/column_values/merge.rs",
"chars": 1623,
"preview": "use std::fmt::Debug;\nuse std::sync::Arc;\n\nuse crate::iterable::Iterable;\nuse crate::{ColumnIndex, ColumnValues, MergeRow"
},
{
"path": "columnar/src/column_values/mod.rs",
"chars": 7645,
"preview": "#![warn(missing_docs)]\n\n//! # `fastfield_codecs`\n//!\n//! - Columnar storage of data for tantivy [`crate::Column`].\n//! -"
},
{
"path": "columnar/src/column_values/monotonic_column.rs",
"chars": 4063,
"preview": "use std::fmt::Debug;\nuse std::marker::PhantomData;\nuse std::ops::{Range, RangeInclusive};\n\nuse crate::ColumnValues;\nuse "
},
{
"path": "columnar/src/column_values/monotonic_mapping.rs",
"chars": 5819,
"preview": "use std::fmt::Debug;\nuse std::marker::PhantomData;\n\nuse common::DateTime;\n\nuse super::MonotonicallyMappableToU128;\nuse c"
},
{
"path": "columnar/src/column_values/monotonic_mapping_u128.rs",
"chars": 1072,
"preview": "use std::fmt::Debug;\nuse std::net::Ipv6Addr;\n\n/// Monotonic maps a value to u128 value space\n/// Monotonic mapping enabl"
},
{
"path": "columnar/src/column_values/stats.rs",
"chars": 2927,
"preview": "use std::io;\nuse std::io::Write;\nuse std::num::NonZeroU64;\n\nuse common::{BinarySerializable, VInt};\n\nuse crate::RowId;\n\n"
},
{
"path": "columnar/src/column_values/u128_based/compact_space/blank_range.rs",
"chars": 1275,
"preview": "use std::ops::RangeInclusive;\n\n/// The range of a blank in value space.\n///\n/// A blank is an unoccupied space in the da"
},
{
"path": "columnar/src/column_values/u128_based/compact_space/build_compact_space.rs",
"chars": 9451,
"preview": "use std::collections::{BTreeSet, BinaryHeap};\nuse std::iter;\nuse std::ops::RangeInclusive;\n\nuse itertools::Itertools;\n\nu"
},
{
"path": "columnar/src/column_values/u128_based/compact_space/mod.rs",
"chars": 29672,
"preview": "/// This codec takes a large number space (u128) and reduces it to a compact number space.\n///\n/// It will find spaces i"
},
{
"path": "columnar/src/column_values/u128_based/mod.rs",
"chars": 6525,
"preview": "use std::fmt::Debug;\nuse std::io;\nuse std::io::Write;\nuse std::sync::Arc;\n\nmod compact_space;\n\nuse common::{BinarySerial"
},
{
"path": "columnar/src/column_values/u64_based/bitpacked.rs",
"chars": 5456,
"preview": "use std::io::{self, Write};\nuse std::num::NonZeroU64;\nuse std::ops::{Range, RangeInclusive};\n\nuse common::{BinarySeriali"
},
{
"path": "columnar/src/column_values/u64_based/blockwise_linear.rs",
"chars": 8902,
"preview": "use std::io::Write;\nuse std::sync::Arc;\nuse std::{io, iter};\n\nuse common::{BinarySerializable, CountingWriter, Deseriali"
},
{
"path": "columnar/src/column_values/u64_based/line.rs",
"chars": 7431,
"preview": "use std::io;\nuse std::num::NonZeroU32;\n\nuse common::{BinarySerializable, VInt};\n\nuse crate::column_values::ColumnValues;"
},
{
"path": "columnar/src/column_values/u64_based/linear.rs",
"chars": 8372,
"preview": "use std::io;\n\nuse common::{BinarySerializable, OwnedBytes};\nuse tantivy_bitpacker::{BitPacker, BitUnpacker, compute_num_"
},
{
"path": "columnar/src/column_values/u64_based/mod.rs",
"chars": 7568,
"preview": "mod bitpacked;\nmod blockwise_linear;\nmod line;\nmod linear;\nmod stats_collector;\n\nuse std::io;\nuse std::io::Write;\nuse st"
},
{
"path": "columnar/src/column_values/u64_based/stats_collector.rs",
"chars": 6343,
"preview": "use std::num::NonZeroU64;\n\nuse fastdivide::DividerU64;\n\nuse crate::RowId;\nuse crate::column_values::ColumnStats;\n\n/// Co"
},
{
"path": "columnar/src/column_values/u64_based/tests.rs",
"chars": 13607,
"preview": "use proptest::prelude::*;\nuse proptest::{prop_oneof, proptest};\nuse rand::Rng;\n\n#[test]\nfn test_serialize_and_load_simpl"
},
{
"path": "columnar/src/column_values/vec_column.rs",
"chars": 1287,
"preview": "use std::fmt::Debug;\n\nuse tantivy_bitpacker::minmax;\n\nuse crate::ColumnValues;\n\n/// VecColumn provides `Column` over a `"
},
{
"path": "columnar/src/columnar/column_type.rs",
"chars": 4508,
"preview": "use std::fmt;\nuse std::fmt::Debug;\nuse std::net::Ipv6Addr;\n\nuse serde::{Deserialize, Serialize};\n\nuse crate::InvalidData"
},
{
"path": "columnar/src/columnar/format_version.rs",
"chars": 2486,
"preview": "use core::fmt;\nuse std::fmt::{Display, Formatter};\n\nuse crate::InvalidData;\n\npub const VERSION_FOOTER_NUM_BYTES: usize ="
},
{
"path": "columnar/src/columnar/merge/merge_dict_column.rs",
"chars": 8193,
"preview": "use std::io::{self, Write};\n\nuse common::{BitSet, CountingWriter, ReadOnlyBitSet};\nuse sstable::{SSTable, Streamer, Term"
},
{
"path": "columnar/src/columnar/merge/merge_mapping.rs",
"chars": 4156,
"preview": "use std::ops::Range;\n\nuse common::{BitSet, OwnedBytes, ReadOnlyBitSet};\n\nuse crate::{ColumnarReader, RowAddr, RowId};\n\np"
},
{
"path": "columnar/src/columnar/merge/mod.rs",
"chars": 18842,
"preview": "mod merge_dict_column;\nmod merge_mapping;\nmod term_merger;\n\nuse std::collections::{BTreeMap, HashSet};\nuse std::io;\nuse "
},
{
"path": "columnar/src/columnar/merge/term_merger.rs",
"chars": 3096,
"preview": "use std::cmp::Ordering;\nuse std::collections::BinaryHeap;\n\nuse sstable::TermOrdinal;\n\nuse crate::Streamer;\n\n/// The term"
},
{
"path": "columnar/src/columnar/merge/tests.rs",
"chars": 21111,
"preview": "use itertools::Itertools;\nuse proptest::collection::vec;\nuse proptest::prelude::*;\n\nuse super::*;\nuse crate::columnar::{"
},
{
"path": "columnar/src/columnar/mod.rs",
"chars": 388,
"preview": "mod column_type;\nmod format_version;\nmod merge;\nmod reader;\nmod writer;\n\npub use column_type::{ColumnType, HasAssociated"
},
{
"path": "columnar/src/columnar/reader/mod.rs",
"chars": 12383,
"preview": "use std::{fmt, io, mem};\n\nuse common::BinarySerializable;\nuse common::file_slice::FileSlice;\nuse common::json_path_write"
},
{
"path": "columnar/src/columnar/writer/column_operation.rs",
"chars": 11879,
"preview": "use std::net::Ipv6Addr;\n\nuse crate::dictionary::UnorderedId;\nuse crate::utils::{place_bits, pop_first_byte, select_bits}"
},
{
"path": "columnar/src/columnar/writer/column_writers.rs",
"chars": 12307,
"preview": "use std::cmp::Ordering;\n\nuse stacker::{ExpUnrolledLinkedList, MemoryArena};\n\nuse crate::columnar::writer::column_operati"
},
{
"path": "columnar/src/columnar/writer/mod.rs",
"chars": 30573,
"preview": "mod column_operation;\nmod column_writers;\nmod serializer;\nmod value_index;\n\nuse std::io;\nuse std::net::Ipv6Addr;\n\nuse co"
},
{
"path": "columnar/src/columnar/writer/serializer.rs",
"chars": 3077,
"preview": "use std::io;\nuse std::io::Write;\n\nuse common::json_path_writer::JSON_END_OF_PATH;\nuse common::{BinarySerializable, Count"
},
{
"path": "columnar/src/columnar/writer/value_index.rs",
"chars": 6763,
"preview": "use crate::RowId;\nuse crate::column_index::{SerializableMultivalueIndex, SerializableOptionalIndex};\nuse crate::iterable"
},
{
"path": "columnar/src/compat_tests.rs",
"chars": 5694,
"preview": "use std::path::PathBuf;\n\nuse itertools::Itertools;\n\nuse crate::{\n CURRENT_VERSION, Cardinality, Column, ColumnarReade"
},
{
"path": "columnar/src/dictionary.rs",
"chars": 3495,
"preview": "use std::io;\n\nuse sstable::SSTable;\nuse stacker::{MemoryArena, SharedArenaHashMap};\n\npub(crate) struct TermIdMapping {\n "
},
{
"path": "columnar/src/dynamic_column.rs",
"chars": 14045,
"preview": "use std::net::Ipv6Addr;\nuse std::sync::Arc;\nuse std::{fmt, io};\n\nuse common::file_slice::FileSlice;\nuse common::{ByteCou"
},
{
"path": "columnar/src/iterable.rs",
"chars": 686,
"preview": "use std::ops::Range;\nuse std::sync::Arc;\n\nuse crate::{ColumnValues, RowId};\n\npub trait Iterable<T = u64> {\n fn boxed_"
},
{
"path": "columnar/src/lib.rs",
"chars": 3815,
"preview": "//! # Tantivy-Columnar\n//!\n//! `tantivy-columnar`provides a columnar storage for tantivy.\n//! The crate allows for effic"
},
{
"path": "columnar/src/tests.rs",
"chars": 33731,
"preview": "use std::collections::HashMap;\nuse std::fmt::Debug;\nuse std::net::Ipv6Addr;\n\nuse common::DateTime;\nuse proptest::prelude"
},
{
"path": "columnar/src/utils.rs",
"chars": 2003,
"preview": "const fn compute_mask(num_bits: u8) -> u8 {\n if num_bits == 8 {\n u8::MAX\n } else {\n (1u8 << num_bits"
},
{
"path": "columnar/src/value.rs",
"chars": 6170,
"preview": "use std::str::FromStr;\n\nuse common::DateTime;\n\nuse crate::InvalidData;\n\n#[derive(Copy, Clone, PartialEq, Debug)]\npub enu"
},
{
"path": "common/Cargo.toml",
"chars": 811,
"preview": "[package]\nname = \"tantivy-common\"\nversion = \"0.10.0\"\nauthors = [\"Paul Masurel <paul@quickwit.io>\", \"Pascal Seitz <pascal"
},
{
"path": "common/benches/bench.rs",
"chars": 1859,
"preview": "use binggan::{BenchRunner, black_box};\nuse rand::rng;\nuse rand::seq::IteratorRandom;\nuse tantivy_common::{BitSet, TinySe"
},
{
"path": "common/src/bitset.rs",
"chars": 21236,
"preview": "use std::io::Write;\nuse std::{fmt, io};\n\nuse ownedbytes::OwnedBytes;\n\nuse crate::ByteCount;\n\n#[derive(Clone, Copy, Eq, P"
},
{
"path": "common/src/bounds.rs",
"chars": 4289,
"preview": "use std::io;\nuse std::ops::Bound;\n\n#[derive(Clone, Debug)]\npub struct BoundsRange<T> {\n pub lower_bound: Bound<T>,\n "
},
{
"path": "common/src/byte_count.rs",
"chars": 2852,
"preview": "use std::iter::Sum;\nuse std::ops::{Add, AddAssign};\n\nuse serde::{Deserialize, Serialize};\n\n/// Indicates space usage in "
},
{
"path": "common/src/datetime.rs",
"chars": 6110,
"preview": "use std::fmt;\nuse std::io::{Read, Write};\n\nuse serde::{Deserialize, Serialize};\nuse time::format_description::well_known"
},
{
"path": "common/src/file_slice.rs",
"chars": 14032,
"preview": "use std::fs::File;\nuse std::ops::{Deref, Range, RangeBounds};\nuse std::path::Path;\nuse std::sync::Arc;\nuse std::{fmt, io"
},
{
"path": "common/src/group_by.rs",
"chars": 4590,
"preview": "use std::cell::RefCell;\nuse std::iter::Peekable;\nuse std::rc::Rc;\n\npub trait GroupByIteratorExtended: Iterator {\n ///"
},
{
"path": "common/src/json_path_writer.rs",
"chars": 4186,
"preview": "use crate::replace_in_place;\n\n/// Separates the different segments of a json path.\npub const JSON_PATH_SEGMENT_SEP: u8 ="
},
{
"path": "common/src/lib.rs",
"chars": 5934,
"preview": "#![allow(clippy::len_without_is_empty)]\n\nuse std::ops::Deref;\n\npub use byteorder::LittleEndian as Endianness;\n\nmod bitse"
},
{
"path": "common/src/serialize.rs",
"chars": 10709,
"preview": "use std::borrow::Cow;\nuse std::io::{Read, Write};\nuse std::{fmt, io};\n\nuse byteorder::{ReadBytesExt, WriteBytesExt};\n\nus"
},
{
"path": "common/src/vint.rs",
"chars": 8296,
"preview": "use std::io;\nuse std::io::{Read, Write};\n\nuse super::BinarySerializable;\n\n/// Variable int serializes a u128 number\npub "
},
{
"path": "common/src/writer.rs",
"chars": 3237,
"preview": "use std::io::{self, BufWriter, Write};\n\npub struct CountingWriter<W> {\n underlying: W,\n written_bytes: u64,\n}\n\nimp"
},
{
"path": "doc/.gitignore",
"chars": 5,
"preview": "book\n"
},
{
"path": "doc/book.toml",
"chars": 101,
"preview": "[book]\nauthors = [\"Paul Masurel\"]\nmultilingual = false\nsrc = \"src\"\ntitle = \"Tantivy, the user guide\"\n"
},
{
"path": "doc/src/SUMMARY.md",
"chars": 362,
"preview": "# Summary\n\n[Avant Propos](./avant-propos.md)\n\n- [Segments](./basis.md)\n- [Defining your schema](./schema.md)\n- [Facettin"
},
{
"path": "doc/src/avant-propos.md",
"chars": 1867,
"preview": "# Foreword, what is the scope of tantivy?\n\n> Tantivy is a **search** engine **library** for Rust.\n\nIf you are familiar w"
},
{
"path": "doc/src/basis.md",
"chars": 4442,
"preview": "# Anatomy of an index\n\n## Straight from disk\n\nTantivy accesses its data using an abstracting trait called `Directory`.\nI"
},
{
"path": "doc/src/best_practise.md.rs",
"chars": 0,
"preview": ""
},
{
"path": "doc/src/examples.md",
"chars": 58,
"preview": "# Examples\n\n- [Basic search](/examples/basic_search.html)\n"
},
{
"path": "doc/src/facetting.md",
"chars": 30,
"preview": "# Facetting\n\nwewew\n\n## weeewe\n"
},
{
"path": "doc/src/faq.md",
"chars": 0,
"preview": ""
},
{
"path": "doc/src/index_sorting.md",
"chars": 2847,
"preview": "\n- [Index Sorting](#index-sorting)\n - [Why Sorting](#why-sorting)\n - [Compression](#compression)\n - [Top-N Optimi"
},
{
"path": "doc/src/innerworkings.md",
"chars": 16,
"preview": "# Innerworkings\n"
},
{
"path": "doc/src/inverted_index.md",
"chars": 17,
"preview": "# Inverted index\n"
},
{
"path": "doc/src/json.md",
"chars": 4557,
"preview": "# Json\n\nAs of tantivy 0.17, tantivy supports a json object type.\nThis type can be used to allow for a schema-less search"
},
{
"path": "doc/src/schema.md",
"chars": 23,
"preview": "# Defining your schema\n"
},
{
"path": "examples/aggregation.rs",
"chars": 8704,
"preview": "// # Aggregation example\n//\n// This example shows how you can use built-in aggregations.\n// We will use nested aggregati"
},
{
"path": "examples/basic_search.rs",
"chars": 8878,
"preview": "// # Basic Example\n//\n// This example covers the basic functionalities of\n// tantivy.\n//\n// We will :\n// - define our sc"
},
{
"path": "examples/custom_collector.rs",
"chars": 5685,
"preview": "// # Custom collector example\n//\n// This example shows how you can implement your own\n// collector. As an example, we wi"
},
{
"path": "examples/custom_tokenizer.rs",
"chars": 4635,
"preview": "// # Defining a tokenizer pipeline\n//\n// In this example, we'll see how to define a tokenizer\n// by creating a custom `N"
},
{
"path": "examples/date_time_field.rs",
"chars": 2714,
"preview": "// # DateTime field example\n//\n// This example shows how the DateTime field can be used\n\nuse tantivy::collector::TopDocs"
},
{
"path": "examples/deleting_updating_documents.rs",
"chars": 5378,
"preview": "// # Deleting and Updating (?) documents\n//\n// This example explains how to delete and update documents.\n// In fact ther"
},
{
"path": "examples/faceted_search.rs",
"chars": 4276,
"preview": "// # Faceted Search\n//\n// This example covers the faceted search functionalities of\n// tantivy.\n//\n// We will :\n// - def"
},
{
"path": "examples/faceted_search_with_tweaked_score.rs",
"chars": 3816,
"preview": "// # Faceted Search With Tweak Score\n//\n// This example covers the faceted search functionalities of\n// tantivy.\n//\n// W"
},
{
"path": "examples/filter_aggregation.rs",
"chars": 7323,
"preview": "// # Filter Aggregation Example\n//\n// This example demonstrates filter aggregations - creating buckets of documents\n// m"
},
{
"path": "examples/fuzzy_search.rs",
"chars": 5667,
"preview": "// # Basic Example\n//\n// This example covers the basic functionalities of\n// tantivy.\n//\n// We will :\n// - define our sc"
},
{
"path": "examples/index_from_multiple_threads.rs",
"chars": 4719,
"preview": "// # Indexing from different threads.\n//\n// It is fairly common to have to index from different threads.\n// Tantivy forb"
},
{
"path": "examples/index_with_json.rs",
"chars": 1449,
"preview": "use tantivy::schema::*;\n\n// # Document from json\n//\n// For convenience, `Document` can be parsed directly from json.\nfn "
},
{
"path": "examples/integer_range_search.rs",
"chars": 1601,
"preview": "use std::ops::Bound;\n\n// # Searching a range on an indexed int field.\n//\n// Below is an example of creating an indexed i"
},
{
"path": "examples/ip_field.rs",
"chars": 4048,
"preview": "// # IP Address example\n//\n// This example shows how the ip field can be used\n// with IpV6 and IpV4.\n\nuse tantivy::colle"
},
{
"path": "examples/iterating_docs_and_positions.rs",
"chars": 6018,
"preview": "// # Iterating docs and positions.\n//\n// At its core of tantivy, relies on a data structure\n// called an inverted index."
},
{
"path": "examples/json_field.rs",
"chars": 4072,
"preview": "// # Json field example\n//\n// This example shows how the json field can be used\n// to make tantivy partially schemaless "
},
{
"path": "examples/phrase_prefix_search.rs",
"chars": 3340,
"preview": "use tantivy::collector::TopDocs;\nuse tantivy::query::QueryParser;\nuse tantivy::schema::*;\nuse tantivy::{doc, Index, Inde"
},
{
"path": "examples/pre_tokenized_text.rs",
"chars": 4675,
"preview": "// # Pre-tokenized text example\n//\n// This example shows how to use pre-tokenized text. Sometimes you might\n// want to i"
},
{
"path": "examples/snippet.rs",
"chars": 3244,
"preview": "// # Snippet example\n//\n// This example shows how to return a representative snippet of\n// your hit result.\n// Snippet a"
},
{
"path": "examples/stop_words.rs",
"chars": 4416,
"preview": "// # Stop Words Example\n//\n// This example covers the basic usage of stop words\n// with tantivy\n//\n// We will :\n// - def"
},
{
"path": "examples/warmer.rs",
"chars": 7164,
"preview": "use std::cmp::Reverse;\nuse std::collections::{HashMap, HashSet};\nuse std::sync::{Arc, RwLock, Weak};\n\nuse tantivy::colle"
},
{
"path": "ownedbytes/Cargo.toml",
"chars": 505,
"preview": "[package]\nauthors = [\"Paul Masurel <paul@quickwit.io>\", \"Pascal Seitz <pascal@quickwit.io>\"]\nname = \"ownedbytes\"\nversion"
},
{
"path": "ownedbytes/src/lib.rs",
"chars": 10816,
"preview": "use std::ops::{Deref, Range};\nuse std::sync::Arc;\nuse std::{fmt, io};\n\npub use stable_deref_trait::StableDeref;\n\n/// An "
},
{
"path": "query-grammar/Cargo.toml",
"chars": 569,
"preview": "[package]\nname = \"tantivy-query-grammar\"\nversion = \"0.25.0\"\nauthors = [\"Paul Masurel <paul.masurel@gmail.com>\"]\nlicense "
},
{
"path": "query-grammar/README.md",
"chars": 73,
"preview": "# Tantivy Query Grammar\n\nThis crate is used by tantivy to parse queries.\n"
},
{
"path": "query-grammar/src/infallible.rs",
"chars": 13861,
"preview": "//! nom combinators for infallible operations\n\nuse std::convert::Infallible;\n\nuse nom::{AsChar, IResult, InputLength, In"
},
{
"path": "query-grammar/src/lib.rs",
"chars": 2217,
"preview": "#![allow(clippy::derive_partial_eq_without_eq)]\n\nuse serde::Serialize;\n\nmod infallible;\nmod occur;\nmod query_grammar;\nmo"
},
{
"path": "query-grammar/src/occur.rs",
"chars": 2371,
"preview": "use std::fmt;\nuse std::fmt::Write;\n\nuse serde::Serialize;\n\n/// Defines whether a term in a query must be present,\n/// sh"
},
{
"path": "query-grammar/src/query_grammar.rs",
"chars": 69260,
"preview": "use std::borrow::Cow;\nuse std::iter::once;\n\nuse fnv::FnvHashSet;\nuse nom::IResult;\nuse nom::branch::alt;\nuse nom::bytes:"
},
{
"path": "query-grammar/src/user_input_ast.rs",
"chars": 15351,
"preview": "use std::fmt;\nuse std::fmt::{Debug, Formatter};\n\nuse serde::Serialize;\n\nuse crate::Occur;\n\n#[derive(PartialEq, Eq, Hash,"
},
{
"path": "rustfmt.toml",
"chars": 180,
"preview": "comment_width = 120\nformat_strings = true\ngroup_imports = \"StdExternalCrate\"\nimports_granularity = \"Module\"\nnormalize_co"
},
{
"path": "src/aggregation/README.md",
"chars": 1273,
"preview": "# Contributing\n\nWhen adding new bucket aggregation make sure to extend the \"test_aggregation_flushing\" test for at least"
},
{
"path": "src/aggregation/accessor_helpers.rs",
"chars": 3791,
"preview": "//! This will enhance the request tree with access to the fastfield and metadata.\n\nuse std::io;\n\nuse columnar::{Column, "
},
{
"path": "src/aggregation/agg_data.rs",
"chars": 44124,
"preview": "use columnar::{Column, ColumnBlockAccessor, ColumnType, StrColumn};\nuse common::BitSet;\nuse rustc_hash::FxHashSet;\nuse s"
},
{
"path": "src/aggregation/agg_limits.rs",
"chars": 10466,
"preview": "use std::collections::HashMap;\nuse std::sync::atomic::{AtomicU64, Ordering};\nuse std::sync::Arc;\n\nuse common::ByteCount;"
},
{
"path": "src/aggregation/agg_req.rs",
"chars": 13889,
"preview": "//! Contains the aggregation request tree. Used to build an\n//! [`AggregationCollector`](super::AggregationCollector).\n/"
},
{
"path": "src/aggregation/agg_result.rs",
"chars": 14349,
"preview": "//! Contains the final aggregation tree.\n//!\n//! This tree can be converted via the `into()` method from `IntermediateAg"
},
{
"path": "src/aggregation/agg_tests.rs",
"chars": 46913,
"preview": "use serde_json::Value;\n\nuse crate::aggregation::agg_req::{Aggregation, Aggregations};\nuse crate::aggregation::agg_result"
},
{
"path": "src/aggregation/bucket/composite/accessors.rs",
"chars": 21959,
"preview": "use std::net::Ipv6Addr;\n\nuse columnar::column_values::{CompactHit, CompactSpaceU64Accessor};\nuse columnar::{Column, Colu"
},
{
"path": "src/aggregation/bucket/composite/calendar_interval.rs",
"chars": 4866,
"preview": "use time::convert::{Day, Nanosecond};\nuse time::{Time, UtcDateTime};\n\nconst NS_IN_DAY: i64 = Nanosecond::per_t::<i128>(D"
},
{
"path": "src/aggregation/bucket/composite/collector.rs",
"chars": 25922,
"preview": "use std::fmt::Debug;\nuse std::mem;\nuse std::net::Ipv6Addr;\n\nuse columnar::column_values::CompactSpaceU64Accessor;\nuse co"
},
{
"path": "src/aggregation/bucket/composite/map.rs",
"chars": 15355,
"preview": "use std::collections::BinaryHeap;\nuse std::fmt::Debug;\nuse std::hash::Hash;\n\nuse rustc_hash::FxHashMap;\nuse smallvec::Sm"
},
{
"path": "src/aggregation/bucket/composite/mod.rs",
"chars": 75800,
"preview": "mod accessors;\nmod calendar_interval;\nmod collector;\nmod map;\nmod numeric_types;\n\nuse core::panic;\nuse std::cmp::Orderin"
},
{
"path": "src/aggregation/bucket/composite/numeric_types.rs",
"chars": 18283,
"preview": "/// This module helps comparing numerical values of different types (i64, u64\n/// and f64).\npub(super) mod num_cmp {\n "
},
{
"path": "src/aggregation/bucket/filter.rs",
"chars": 64512,
"preview": "use std::fmt::Debug;\n\nuse common::BitSet;\nuse serde::{Deserialize, Deserializer, Serialize, Serializer};\n\nuse crate::agg"
},
{
"path": "src/aggregation/bucket/histogram/date_histogram.rs",
"chars": 25356,
"preview": "use serde::{Deserialize, Serialize};\n\nuse super::{HistogramAggregation, HistogramBounds};\nuse crate::aggregation::*;\n\n//"
},
{
"path": "src/aggregation/bucket/histogram/histogram.rs",
"chars": 50232,
"preview": "use std::cmp::Ordering;\n\nuse columnar::{Column, ColumnType};\nuse rustc_hash::FxHashMap;\nuse serde::{Deserialize, Seriali"
},
{
"path": "src/aggregation/bucket/histogram/mod.rs",
"chars": 84,
"preview": "mod date_histogram;\nmod histogram;\npub use date_histogram::*;\npub use histogram::*;\n"
},
{
"path": "src/aggregation/bucket/mod.rs",
"chars": 7145,
"preview": "//! Module for all bucket aggregations.\n//!\n//! BucketAggregations create buckets of documents.\n//! Each bucket is assoc"
},
{
"path": "src/aggregation/bucket/range.rs",
"chars": 34011,
"preview": "use std::fmt::Debug;\nuse std::ops::Range;\n\nuse columnar::{Column, ColumnType};\nuse rustc_hash::FxHashMap;\nuse serde::{De"
},
{
"path": "src/aggregation/bucket/term_agg.rs",
"chars": 108552,
"preview": "use std::fmt::Debug;\nuse std::io;\nuse std::net::Ipv6Addr;\n\nuse columnar::column_values::CompactSpaceU64Accessor;\nuse col"
},
{
"path": "src/aggregation/bucket/term_missing_agg.rs",
"chars": 19441,
"preview": "use columnar::{Column, ColumnType};\nuse rustc_hash::FxHashMap;\n\nuse crate::aggregation::agg_data::{\n build_segment_ag"
},
{
"path": "src/aggregation/cached_sub_aggs.rs",
"chars": 7966,
"preview": "use std::fmt::Debug;\n\nuse super::segment_agg_result::SegmentAggregationCollector;\nuse crate::aggregation::agg_data::Aggr"
},
{
"path": "src/aggregation/collector.rs",
"chars": 6826,
"preview": "use super::agg_req::Aggregations;\nuse super::agg_result::AggregationResults;\nuse super::cached_sub_aggs::LowCardCachedSu"
},
{
"path": "src/aggregation/date.rs",
"chars": 575,
"preview": "use time::format_description::well_known::Rfc3339;\nuse time::OffsetDateTime;\n\nuse crate::TantivyError;\n\npub(crate) fn fo"
}
]
// ... and 304 more files (download for full content)
About this extraction
This page contains the full source code of the quickwit-oss/tantivy GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 504 files (29.6 MB), approximately 2.2M tokens, and a symbol index with 7762 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.