Showing preview only (749K chars total). Download the full file or copy to clipboard to get everything.
Repository: valeriansaliou/sonic
Branch: master
Commit: 75ec203693a3
Files: 153
Total size: 708.9 KB
Directory structure:
gitextract_z4cxjv9t/
├── .dockerignore
├── .github/
│ ├── FUNDING.yml
│ └── workflows/
│ ├── build.yml
│ └── test.yml
├── .gitignore
├── CHANGELOG.md
├── CODE_OF_CONDUCT.md
├── CONFIGURATION.md
├── CONTRIBUTING.md
├── Cargo.toml
├── Dockerfile
├── INNER_WORKINGS.md
├── LICENSE.md
├── PACKAGING.md
├── PROTOCOL.md
├── README.md
├── config.cfg
├── debian/
│ ├── changelog
│ ├── compat
│ ├── control
│ ├── copyright
│ ├── rules
│ ├── sonic.install
│ ├── sonic.postinst
│ ├── sonic.service
│ └── source/
│ └── format
├── scripts/
│ ├── build_packages.sh
│ ├── release_binaries.sh
│ └── sign_binaries.sh
├── src/
│ ├── channel/
│ │ ├── command.rs
│ │ ├── format.rs
│ │ ├── handle.rs
│ │ ├── listen.rs
│ │ ├── macros.rs
│ │ ├── message.rs
│ │ ├── mod.rs
│ │ ├── mode.rs
│ │ └── statistics.rs
│ ├── config/
│ │ ├── defaults.rs
│ │ ├── env_var.rs
│ │ ├── logger.rs
│ │ ├── mod.rs
│ │ ├── options.rs
│ │ └── reader.rs
│ ├── executor/
│ │ ├── count.rs
│ │ ├── flushb.rs
│ │ ├── flushc.rs
│ │ ├── flusho.rs
│ │ ├── list.rs
│ │ ├── macros.rs
│ │ ├── mod.rs
│ │ ├── pop.rs
│ │ ├── push.rs
│ │ ├── search.rs
│ │ └── suggest.rs
│ ├── lexer/
│ │ ├── mod.rs
│ │ ├── ranges.rs
│ │ ├── stopwords.rs
│ │ └── token.rs
│ ├── main.rs
│ ├── query/
│ │ ├── actions.rs
│ │ ├── builder.rs
│ │ ├── mod.rs
│ │ └── types.rs
│ ├── stopwords/
│ │ ├── afr.rs
│ │ ├── aka.rs
│ │ ├── amh.rs
│ │ ├── ara.rs
│ │ ├── aze.rs
│ │ ├── bel.rs
│ │ ├── ben.rs
│ │ ├── bul.rs
│ │ ├── cat.rs
│ │ ├── ces.rs
│ │ ├── cmn.rs
│ │ ├── dan.rs
│ │ ├── deu.rs
│ │ ├── ell.rs
│ │ ├── eng.rs
│ │ ├── epo.rs
│ │ ├── est.rs
│ │ ├── fin.rs
│ │ ├── fra.rs
│ │ ├── guj.rs
│ │ ├── heb.rs
│ │ ├── hin.rs
│ │ ├── hrv.rs
│ │ ├── hun.rs
│ │ ├── hye.rs
│ │ ├── ind.rs
│ │ ├── ita.rs
│ │ ├── jav.rs
│ │ ├── jpn.rs
│ │ ├── kan.rs
│ │ ├── kat.rs
│ │ ├── khm.rs
│ │ ├── kor.rs
│ │ ├── lat.rs
│ │ ├── lav.rs
│ │ ├── lit.rs
│ │ ├── mal.rs
│ │ ├── mar.rs
│ │ ├── mkd.rs
│ │ ├── mod.rs
│ │ ├── mya.rs
│ │ ├── nep.rs
│ │ ├── nld.rs
│ │ ├── nob.rs
│ │ ├── ori.rs
│ │ ├── pan.rs
│ │ ├── pes.rs
│ │ ├── pol.rs
│ │ ├── por.rs
│ │ ├── ron.rs
│ │ ├── rus.rs
│ │ ├── sin.rs
│ │ ├── slk.rs
│ │ ├── slv.rs
│ │ ├── sna.rs
│ │ ├── spa.rs
│ │ ├── srp.rs
│ │ ├── swe.rs
│ │ ├── tam.rs
│ │ ├── tel.rs
│ │ ├── tgl.rs
│ │ ├── tha.rs
│ │ ├── tuk.rs
│ │ ├── tur.rs
│ │ ├── ukr.rs
│ │ ├── urd.rs
│ │ ├── uzb.rs
│ │ ├── vie.rs
│ │ ├── yid.rs
│ │ └── zul.rs
│ ├── store/
│ │ ├── fst.rs
│ │ ├── generic.rs
│ │ ├── identifiers.rs
│ │ ├── item.rs
│ │ ├── keyer.rs
│ │ ├── kv.rs
│ │ ├── macros.rs
│ │ ├── mod.rs
│ │ └── operation.rs
│ └── tasker/
│ ├── mod.rs
│ ├── runtime.rs
│ └── shutdown.rs
└── tests/
└── integration/
├── .gitignore
├── instance/
│ └── config.cfg
├── runner/
│ ├── package.json
│ └── runner.js
├── scenarios/
│ ├── insert.js
│ └── ping.js
└── scripts/
└── run.sh
================================================
FILE CONTENTS
================================================
================================================
FILE: .dockerignore
================================================
tests/*
target/*
data/*
================================================
FILE: .github/FUNDING.yml
================================================
# These are supported funding model platforms
github: valeriansaliou
================================================
FILE: .github/workflows/build.yml
================================================
on:
push:
tags:
- "v*.*.*"
name: Build and Release
jobs:
build-releases:
runs-on: ubuntu-22.04
steps:
- name: Checkout code
uses: actions/checkout@v2
- name: Cache build artifacts
id: cache-cargo
uses: actions/cache@v4
with:
path: |
~/.cargo/bin
~/.cargo/registry
~/.cargo/git
target
key: build-${{ runner.os }}-cargo-any
- name: Install Rust toolchain
uses: actions-rs/toolchain@v1
with:
toolchain: stable
components: rustfmt
override: true
- name: Verify versions
run: rustc --version && rustup --version && cargo --version
- name: Get current tag
id: current_tag
uses: WyriHaximus/github-action-get-previous-tag@v1
- name: Release package
run: cargo publish --no-verify --token ${CRATES_TOKEN}
env:
CRATES_TOKEN: ${{ secrets.CRATES_TOKEN }}
- name: Release binaries
run: ./scripts/release_binaries.sh --version=${{ steps.current_tag.outputs.tag }}
- name: Release new version
uses: softprops/action-gh-release@v1
with:
tag_name: ${{ steps.current_tag.outputs.tag }}
name: Sonic ${{ steps.current_tag.outputs.tag }}
body: "⚠️ Changelog not yet provided."
files: ./${{ steps.current_tag.outputs.tag }}-*.tar.gz
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
build-packages:
needs: build-releases
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v2
- name: Build packages
run: ./scripts/build_packages.sh
- name: Push packages to Packagecloud
uses: faucetsdn/action-packagecloud-upload-debian-packages@v1
with:
path: ./packages
repo: ${{ secrets.PACKAGECLOUD_REPO }}
token: ${{ secrets.PACKAGECLOUD_TOKEN }}
build-docker:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Acquire Docker image metadata
id: metadata
uses: docker/metadata-action@v4
with:
images: valeriansaliou/sonic
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Build and push Docker image
uses: docker/build-push-action@v4
id: build
with:
context: .
tags: ${{ steps.metadata.outputs.tags }}
labels: ${{ steps.metadata.outputs.labels }}
push: true
================================================
FILE: .github/workflows/test.yml
================================================
on: [push, pull_request]
name: Test and Build
jobs:
test:
strategy:
matrix:
os: [ubuntu-latest]
rust-toolchain: [stable]
fail-fast: false
runs-on: ${{ matrix.os }}
steps:
- name: Checkout code
uses: actions/checkout@v2
- name: Cache build artifacts
id: cache-cargo
uses: actions/cache@v4
with:
path: |
~/.cargo/registry
~/.cargo/git
target
key: test-${{ runner.os }}-cargo-${{ matrix.rust-toolchain }}
- name: Cache integration artifacts
id: cache-integration
uses: actions/cache@v4
with:
path: |
tests/integration/runner/node_modules
key: test-${{ runner.os }}-integration-${{ matrix.rust-toolchain }}
- name: Install Rust toolchain
uses: actions-rs/toolchain@v1
with:
toolchain: ${{ matrix.rust-toolchain }}
components: rustfmt
override: true
- name: Install NodeJS
uses: actions/setup-node@v1
- name: Verify versions
run: rustc --version && rustup --version && cargo --version && node --version && npm --version
- name: Build code
run: cargo build
- name: Test code
run: cargo test
- name: Check code style
run: cargo fmt -- --check
- name: Run integration tests
run: tests/integration/scripts/run.sh
================================================
FILE: .gitignore
================================================
target/*
.DS_Store
*~
*#
.cargo
data/store/fst/*
data/store/kv/*
================================================
FILE: CHANGELOG.md
================================================
Sonic Changelog
===============
## 1.4.9 (2024-06-16)
### Changes
* Update Rust code style to conform to new `rustc` requirements (preventing builds on `rustc 1.79.0` and further) [[@jaseemabid](https://github.com/jaseemabid), [#321](https://github.com/valeriansaliou/sonic/pull/321)].
## 1.4.8 (2023-12-14)
### Changes
* Pull out the `arm64` platform from the Docker image, since it does not build in acceptable time via GitHub Actions due to using QEMU emulation (will wait that GitHub Actions provides a native `arm64` runner) [[@valeriansaliou](https://github.com/valeriansaliou)].
## 1.4.7 (2023-12-14)
### Bug Fixes
* Fixed non-working `arm64` builds due to hardcoded `x86_64-unknown-linux-gnu` Rust target in the `Dockerfile` [[@valeriansaliou](https://github.com/valeriansaliou)].
## 1.4.6 (2023-12-14)
### New Features
* The Docker image is now also available for the `arm64` platform, in addition to `amd64` [[@PovilasID](https://github.com/PovilasID), [#310](https://github.com/valeriansaliou/sonic/pull/310)].
## 1.4.5 (2023-12-11)
### Bug Fixes
* Fixed an issue where system clock can move back to the past on a virtualized system, resulting in client threads entering a crash loop due to mutex poisoning [[@valeriansaliou](https://github.com/valeriansaliou)].
## 1.4.4 (2023-12-08)
### Bug Fixes
* Fixed `rocksdb` not building due to a `rust-bindgen` version which was not compatible with `clang` version 16 [[@anthonyroussel](https://github.com/anthonyroussel), [#316](https://github.com/valeriansaliou/sonic/pull/316)].
### Changes
* Dependencies have been bumped to latest versions (namely: `rocksdb`, `toml`, `regex-syntax`, `hashbrown`, `lindera-core`, `lindera-dictionary`, `lindera-tokenizer`) [[@valeriansaliou](https://github.com/valeriansaliou)].
## 1.4.3 (2023-09-04)
### Changes
* Publish `.deb` packages for Debian 12 on `x86_64` architecture [[@valeriansaliou](https://github.com/valeriansaliou)].
## 1.4.2 (2023-09-04)
### Changes
* Produce `glibc` builds from GitHub Actions whenever a new Sonic version gets released [[@valeriansaliou](https://github.com/valeriansaliou)].
* Pull out `tokenizer-japanese` from the default features, as it x10 the final binary size [[@valeriansaliou](https://github.com/valeriansaliou)].
## 1.4.1 (2023-08-12)
### New Features
* Added support for Japanese word segmentation in tokenizer (note that as this adds quite some size overhead to the final binary size, the feature `tokenizer-japanese` can be disabled when building Sonic) [[@nmkj-io](https://github.com/nmkj-io), [#311](https://github.com/valeriansaliou/sonic/pull/311)].
## 1.4.0 (2022-10-20)
### Bug Fixes
* Fixed typo in README abstract [[@remram44](https://github.com/remram44), [#295](https://github.com/valeriansaliou/sonic/pull/295)].
* Fixed typos in code and documentation [[@kianmeng](https://github.com/kianmeng), [#294](https://github.com/valeriansaliou/sonic/pull/294)].
### Changes
* Replaced Docker source image from Debian Slim to lighter Google distroless image [[@0x0x1](https://github.com/0x0x1), [#282](https://github.com/valeriansaliou/sonic/pull/282)].
### New Features
* Added an index enumeration `LIST` command to Sonic Channel [[@trkohler](https://github.com/trkohler), [#293](https://github.com/valeriansaliou/sonic/pull/293)].
## 1.3.5 (2022-07-10)
### Bug Fixes
* Rolled back `rocksdb` version, as the latest version does not link properly in `--release` mode [[@valeriansaliou](https://github.com/valeriansaliou)].
## 1.3.4 (2022-07-10)
### Changes
* Dependencies have been bumped to latest versions (namely: `rocksdb`, `clap`, `regex`) [[@valeriansaliou](https://github.com/valeriansaliou)].
## 1.3.3 (2022-07-07)
### Changes
* Dependencies have been bumped to latest versions (namely: `hashbrown`, `whatlang`, `regex`) [[@valeriansaliou](https://github.com/valeriansaliou)].
* Moved the release pipeline to GitHub Actions [[@valeriansaliou](https://github.com/valeriansaliou)].
### New Features
* The language detection system is now about 2x faster (due to the upgrade of `whatlang` past `v0.14.0`) [[@valeriansaliou](https://github.com/valeriansaliou)].
* Added Armenian stopwords [[@valeriansaliou](https://github.com/valeriansaliou)].
* Added Georgian stopwords [[@valeriansaliou](https://github.com/valeriansaliou)].
* Added Gujarati stopwords [[@valeriansaliou](https://github.com/valeriansaliou)].
* Added Tagalog stopwords [[@valeriansaliou](https://github.com/valeriansaliou)].
## 1.3.2 (2021-11-09)
### Bug Fixes
* Fixed Norwegian stopwords [[@valeriansaliou](https://github.com/valeriansaliou), [#239](https://github.com/valeriansaliou/sonic/issues/239)].
### Changes
* Code has been formatted according to `clippy` recommendations. This does not change the way Sonic behaves [[@pleshevskiy](https://github.com/pleshevskiy), [#233](https://github.com/valeriansaliou/sonic/pull/233)].
### New Features
* Added support for Chinese word segmentation in tokenizer (note that as this adds quite some size overhead to the final binary size, the feature `tokenizer-chinese` can be disabled when building Sonic) [[@vincascm](https://github.com/vincascm), [#209](https://github.com/valeriansaliou/sonic/pull/209)].
## 1.3.1 (2021-11-02)
### Changes
* Apple Silicon is now supported [[@valeriansaliou](https://github.com/valeriansaliou)].
* Added Norwegian stopwords [[@mikalv](https://github.com/mikalv), [#236](https://github.com/valeriansaliou/sonic/pull/236)].
* Added Catalan stopwords [[@coopanio](https://github.com/coopanio), [#227](https://github.com/valeriansaliou/sonic/pull/227)].
* Dependencies have been bumped to latest versions (namely: `rocksdb`, `fst-levenshtein`, `fst-regex`, `hashbrown`, `whatlang`, `byteorder`, `rand`) [[@valeriansaliou](https://github.com/valeriansaliou)].
### Deprecations
* A few rarely-used languages have been removed, following `whatlang` `v0.12.0` release, [see the notes here](https://github.com/greyblake/whatlang-rs/blob/master/CHANGELOG.md#v0120---2021-04-18) [[@valeriansaliou](https://github.com/valeriansaliou), [940d3c3](https://github.com/valeriansaliou/sonic/commit/940d3c3070e144a10f041fcfdf77d15548598eee)].
## 1.3.0 (2020-06-27)
### Changes
* Added support for Slovak, which is now auto-detected from terms [[@valeriansaliou](https://github.com/valeriansaliou), [19412ce](https://github.com/valeriansaliou/sonic/commit/19412ce05a802ef1e6054b751faaef50cab5d36b)].
* Added Slovak stopwords [[@valeriansaliou](https://github.com/valeriansaliou), [19412ce](https://github.com/valeriansaliou/sonic/commit/19412ce05a802ef1e6054b751faaef50cab5d36b)].
* Dependencies have been bumped to latest versions (namely: `whatlang`) [[@valeriansaliou](https://github.com/valeriansaliou), [19412ce](https://github.com/valeriansaliou/sonic/commit/19412ce05a802ef1e6054b751faaef50cab5d36b)].
## 1.2.4 (2020-06-25)
### Bug Fixes
* Fixed multiple deadlocks, which where not noticed in practice by running Sonic at scale, but that are still theoretically possible [[@BurtonQin](https://github.com/BurtonQin), [#213](https://github.com/valeriansaliou/sonic/pull/213), [#211](https://github.com/valeriansaliou/sonic/pull/211)].
### Changes
* Added support for Latin, which is now auto-detected from terms [[@valeriansaliou](https://github.com/valeriansaliou), [e6c5621](https://github.com/valeriansaliou/sonic/commit/e6c5621ba0fabe83b8bc060824951006b373dc3f)].
* Added Latin stopwords [[@valeriansaliou](https://github.com/valeriansaliou), [e6c5621](https://github.com/valeriansaliou/sonic/commit/e6c5621ba0fabe83b8bc060824951006b373dc3f)].
* Dependencies have been bumped to latest versions (namely: `rocksdb`, `radix`, `hashbrown`, `whatlang`) [[@valeriansaliou](https://github.com/valeriansaliou)].
### New Features
* Added a release script, with cross-compilation capabilities (currently for the `x86_64` architecture, dynamically linked against GNU libraries) [[@valeriansaliou](https://github.com/valeriansaliou), [961bab9](https://github.com/valeriansaliou/sonic/commit/961bab92211295e99f1f6052577fa1aeff459d0c)].
## 1.2.3 (2019-10-14)
### Changes
* RocksDB compression algorithm has been changed from LZ4 to Zstandard, for a slightly better compression ratio, and much better read/write performance; this will be used for new SST files only [[@valeriansaliou](https://github.com/valeriansaliou), [cd4cdfb](https://github.com/valeriansaliou/sonic/commit/cd4cdfb756ae9eccd43dc7e73d2c115b33297714)].
* Dependencies have been bumped to latest versions (namely: `rocksdb`) [[@valeriansaliou](https://github.com/valeriansaliou), [cd4cdfb](https://github.com/valeriansaliou/sonic/commit/cd4cdfb756ae9eccd43dc7e73d2c115b33297714)].
## 1.2.2 (2019-07-12)
### Bug Fixes
* Fixed a regression on optional configuration values not working anymore, due to an issue in the environment variable reading system introduced in `v1.2.1` [[@valeriansaliou](https://github.com/valeriansaliou), [#155](https://github.com/valeriansaliou/sonic/issues/155)].
### Changes
* Optimized some aspects of FST consolidation and pending operations management [[@valeriansaliou](https://github.com/valeriansaliou), [#156](https://github.com/valeriansaliou/sonic/issues/156)].
## 1.2.1 (2019-07-08)
### Changes
* FST graph consolidation is now able to ignore new words when the graph is over configured limits, which are set with the new `store.fst.graph.max_size` and `store.fst.graph.max_words` configuration variables [[@valeriansaliou](https://github.com/valeriansaliou), [53db9c1](https://github.com/valeriansaliou/sonic/commit/53db9c186630a6751c0a85e610cebabace1aee2b)].
* An integration testing infrastructure has been added to the Sonic automated test suite [[@vilunov](https://github.com/vilunov), [#154](https://github.com/valeriansaliou/sonic/pull/154)].
* Configuration values can now be sourced from environment variables, using the `${env.VARIABLE}` syntax in `config.cfg` [[@perzanko](https://github.com/perzanko), [#148](https://github.com/valeriansaliou/sonic/pull/148)].
* Dependencies have been bumped to latest versions (namely: `rand`, `radix` and `hashbrown`) [[@valeriansaliou](https://github.com/valeriansaliou), [c1b1f54](https://github.com/valeriansaliou/sonic/commit/c1b1f54ad836df553bec0cd14f041bb34058307c)].
## 1.2.0 (2019-05-03)
### Bug Fixes
* Fixed a rare deadlock occurring when 3 concurrent operations get executed on different threads for the same collection, in the following timely order: `PUSH` then `FLUSHB` then `PUSH` [[@valeriansaliou](https://github.com/valeriansaliou), [d96546b](https://github.com/valeriansaliou/sonic/commit/d96546bd9d8b79332df1106766377e4a4acebd50)].
### Changes
* Reworked the KV store manager to perform periodic memory flushes to disk, thus reducing startup time [[@valeriansaliou](https://github.com/valeriansaliou), [6713488](https://github.com/valeriansaliou/sonic/commit/6713488af3543bca33be6e772936f9668430ba86)].
* Stop accepting Sonic Channel commands when shutting down Sonic [[@valeriansaliou](https://github.com/valeriansaliou), [#131](https://github.com/valeriansaliou/sonic/issues/131)].
### New Features
* Introduced a server statistics `INFO` command to Sonic Channel [[@valeriansaliou](https://github.com/valeriansaliou), [#70](https://github.com/valeriansaliou/sonic/issues/70)].
* Added the ability to disable the lexer for a command with the command modifier `LANG(none)` [[@valeriansaliou](https://github.com/valeriansaliou), [#108](https://github.com/valeriansaliou/sonic/issues/108)].
* Added a backup and restore system for both KV and FST stores, which can be triggered over Sonic Channel with `TRIGGER backup` and `TRIGGER restore` [[@valeriansaliou](https://github.com/valeriansaliou), [#5](https://github.com/valeriansaliou/sonic/issues/5)].
* Added the ability to disable KV store WAL (Write-Ahead Log) with the `write_ahead_log` option, which helps limit write wear on heavily loaded SSD-backed servers [[@valeriansaliou](https://github.com/valeriansaliou), [#130](https://github.com/valeriansaliou/sonic/issues/130)].
## 1.1.9 (2019-03-29)
### Bug Fixes
* RocksDB has been bumped to `v5.18.3`, which fixes a dead-lock occurring in RocksDB at scale when a compaction task is ran under heavy disk writes (ie. disk flushes). This dead-lock was causing Sonic to stop responding to any command issued for the frozen collection. This dead-lock was due to a bug in RocksDB internals (not originating from Sonic itself) [[@baptistejamin](https://github.com/baptistejamin), [19c4a10](https://github.com/baptistejamin/sonic/commit/19c4a104a6d6aaed1dd9beb2e51d2639627825cd)].
### Changes
* Reworked the `FLUSHB` command internals, which now use the atomic `delete_range()` operation provided by RocksDB `v5.18` [[@valeriansaliou](https://github.com/valeriansaliou), [660f8b7](https://github.com/valeriansaliou/sonic/commit/660f8b714d968400fb9f88a245752dca02249bf7)].
### New Features
* Added the `LANG(<locale>)` command modifier for `QUERY` and `PUSH`, that lets a Sonic Channel client force a text locale (instead of letting the lexer system guess the text language) [[@valeriansaliou](https://github.com/valeriansaliou), [#75](https://github.com/valeriansaliou/sonic/issues/75)].
* The FST word lookup system, used by the `SUGGEST` command, now support all scripts via a restricted Unicode range forward scan [[@valeriansaliou](https://github.com/valeriansaliou), [#64](https://github.com/valeriansaliou/sonic/issues/64)].
## 1.1.8 (2019-03-27)
### Bug Fixes
* A store acquire lock has been added to prevent 2 concurrent threads from opening the same collection at the same time [[@valeriansaliou](https://github.com/valeriansaliou), [2628077](https://github.com/valeriansaliou/sonic/commit/2628077ebe7e24155975962471e7653745a0add7)].
## 1.1.7 (2019-03-27)
### Bug Fixes
* A superfluous mutex was removed from KV and FST store managers, in an attempt to solve a rare dead-lock occurring on high-traffic Sonic setups in the KV store [[@valeriansaliou](https://github.com/valeriansaliou), [60566d2](https://github.com/valeriansaliou/sonic/commit/60566d2f087fd6725dba4a60c3c5a3fef7e8399b)].
## 1.1.6 (2019-03-27)
### Changes
* Reverted changes made in `v1.1.5` regarding the open files `rlimit`, as this can be set from outside Sonic [[@valeriansaliou](https://github.com/valeriansaliou), [f6400c6](https://github.com/valeriansaliou/sonic/commit/f6400c61a9a956130ae0bdaa9a164f4955cd2a18)].
* Added Chinese Traditional stopwords [[@dsewnr](https://github.com/dsewnr), [#87](https://github.com/valeriansaliou/sonic/issues/87)].
### Bug Fixes
* Improved the way database locking is handled when calling a pool janitor; this prevents potential dead-locks under high load [[@valeriansaliou](https://github.com/valeriansaliou), [fa78372](https://github.com/valeriansaliou/sonic/commit/fa783728fd27a116b8dcf9a7180740d204b69aa4)].
## 1.1.5 (2019-03-27)
### New Features
* Added the `server.limit_open_files` configuration variable to allow configuring `rlimit` [[@valeriansaliou](https://github.com/valeriansaliou)].
## 1.1.4 (2019-03-27)
### Changes
* Added Kannada stopwords [[@dileepbapat](https://github.com/dileepbapat)].
* The Docker image is now much lighter [[@codeflows](https://github.com/codeflows)].
### New Features
* Automatically adjust `rlimit` for the process to the hard limit allowed by the system (allows opening more FSTs in parallel) [[@valeriansaliou](https://github.com/valeriansaliou)].
## 1.1.3 (2019-03-25)
### Changes
* Limit the size of words that can hit against the FST graph, as the FST gets slower for long words [[@valeriansaliou](https://github.com/valeriansaliou), [#81](https://github.com/valeriansaliou/sonic/issues/81)].
### Bug Fixes
* Rework Sonic Channel buffer management using a VecDeque (Sonic should now work better in harsh network environments) [[@valeriansaliou](https://github.com/valeriansaliou), [1c2b9c8](https://github.com/valeriansaliou/sonic/commit/1c2b9c8fcd28b033a7cb80d678c388ce78ab989d)].
## 1.1.2 (2019-03-24)
### Changes
* FST graph consolidation locking strategy has been improved even further, based on issues with the previous rework we have noticed at scale in production (now, consolidation locking is done at a lower-priority relative to actual queries and pushes to the index) [[@valeriansaliou](https://github.com/valeriansaliou), [#68](https://github.com/valeriansaliou/sonic/issues/68)].
## 1.1.1 (2019-03-24)
### Changes
* FST graph consolidation locking strategy has been reworked as to allow queries to be executed lock-free when the FST consolidate task takes a lot of time (previously, queries were being deferred due to an ongoing FST consolidate task) [[@valeriansaliou](https://github.com/valeriansaliou), [#68](https://github.com/valeriansaliou/sonic/issues/68)].
* Removed special license clause introduced in `v1.0.2`, Sonic is full `MPL 2.0` now. [[@valeriansaliou](https://github.com/valeriansaliou)]
## 1.1.0 (2019-03-21)
### Breaking Changes
* Change how buckets are stored in a KV-based collection (nest them in the same RocksDB database; this is much more efficient on setups with a large number of buckets - **`v1.1.0` is incompatible with the `v1.0.0` KV database format**) [[@valeriansaliou](https://github.com/valeriansaliou)].
### Changes
* Bump `jemallocator` to version `0.3` [[@valeriansaliou](https://github.com/valeriansaliou)].
## 1.0.2 (2019-03-20)
### Changes
* Re-license from `MPL 2.0` to `SOSSL 1.0` (Sonic has a special license clause) [[@valeriansaliou](https://github.com/valeriansaliou)].
## 1.0.1 (2019-03-19)
### Changes
* Added automated benchmarks (can be ran via `cargo bench --features benchmark`) [[@valeriansaliou](https://github.com/valeriansaliou)].
* Reduced the time to query the search index by 50% via optimizations (in multiple methods, eg. the lexer) [[@valeriansaliou](https://github.com/valeriansaliou)].
## 1.0.0 (2019-03-18)
### New Features
* Initial Sonic release [[@valeriansaliou](https://github.com/valeriansaliou)].
================================================
FILE: CODE_OF_CONDUCT.md
================================================
# Contributor Covenant Code of Conduct
## Our Pledge
In the interest of fostering an open and welcoming environment, we as
contributors and maintainers pledge to making participation in our project and
our community a harassment-free experience for everyone, regardless of age, body
size, disability, ethnicity, sex characteristics, gender identity and expression,
level of experience, education, socio-economic status, nationality, personal
appearance, race, religion, or sexual identity and orientation.
## Our Standards
Examples of behavior that contributes to creating a positive environment
include:
* Using welcoming and inclusive language
* Being respectful of differing viewpoints and experiences
* Gracefully accepting constructive criticism
* Focusing on what is best for the community
* Showing empathy towards other community members
Examples of unacceptable behavior by participants include:
* The use of sexualized language or imagery and unwelcome sexual attention or
advances
* Trolling, insulting/derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others' private information, such as a physical or electronic
address, without explicit permission
* Other conduct which could reasonably be considered inappropriate in a
professional setting
## Our Responsibilities
Project maintainers are responsible for clarifying the standards of acceptable
behavior and are expected to take appropriate and fair corrective action in
response to any instances of unacceptable behavior.
Project maintainers have the right and responsibility to remove, edit, or
reject comments, commits, code, wiki edits, issues, and other contributions
that are not aligned to this Code of Conduct, or to ban temporarily or
permanently any contributor for other behaviors that they deem inappropriate,
threatening, offensive, or harmful.
## Scope
This Code of Conduct applies both within project spaces and in public spaces
when an individual is representing the project or its community. Examples of
representing a project or community include using an official project e-mail
address, posting via an official social media account, or acting as an appointed
representative at an online or offline event. Representation of a project may be
further defined and clarified by project maintainers.
## Enforcement
Instances of abusive, harassing, or otherwise unacceptable behavior may be
reported by contacting the project team at valerian@valeriansaliou.name. All
complaints will be reviewed and investigated and will result in a response that
is deemed necessary and appropriate to the circumstances. The project team is
obligated to maintain confidentiality with regard to the reporter of an incident.
Further details of specific enforcement policies may be posted separately.
Project maintainers who do not follow or enforce the Code of Conduct in good
faith may face temporary or permanent repercussions as determined by other
members of the project's leadership.
## Attribution
This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
[homepage]: https://www.contributor-covenant.org
For answers to common questions about this code of conduct, see
https://www.contributor-covenant.org/faq
================================================
FILE: CONFIGURATION.md
================================================
Sonic Configuration
===================
# File: config.cfg
**All available configuration options are commented below, with allowed values:**
**[server]**
* `log_level` (type: _string_, allowed: `debug`, `info`, `warn`, `error`, default: `error`) — Verbosity of logging, set it to `error` in production
**[channel]**
* `inet` (type: _string_, allowed: IPv4 / IPv6 + port, default: `[::1]:1491`) — Host and TCP port Sonic Channel should listen on
* `tcp_timeout` (type: _integer_, allowed: seconds, default: `300`) — Timeout of idle/dead client connections to Sonic Channel
* `auth_password` (type: _string_, allowed: password values, default: none) — Authentication password required to connect to the channel (optional but recommended)
**[channel.search]**
* `query_limit_default` (type: _integer_, allowed: numbers, default: `10`) — Default search results limit for a query command (if the LIMIT command modifier is not used when issuing a QUERY command)
* `query_limit_maximum` (type: _integer_, allowed: numbers, default: `100`) — Maximum search results limit for a query command (if the LIMIT command modifier is being used when issuing a QUERY command)
* `query_alternates_try` (type: _integer_, allowed: numbers, default: `4`) — Number of alternate words that look like query word to try if there are not enough query results (if zero, no alternate will be tried; if too high there may be a noticeable performance penalty)
* `suggest_limit_default` (type: _integer_, allowed: numbers, default: `5`) — Default suggested words limit for a suggest command (if the LIMIT command modifier is not used when issuing a SUGGEST command)
* `suggest_limit_maximum` (type: _integer_, allowed: numbers, default: `20`) — Maximum suggested words limit for a suggest command (if the LIMIT command modifier is being used when issuing a SUGGEST command)
* `list_limit_default` (type: _integer_, allowed: numbers, default: `100`) — Default listed words limit for a list command (if the LIMIT command modifier is not used when issuing a LIST command)
* `list_limit_maximum` (type: _integer_, allowed: numbers, default: `500`) — Maximum listed words limit for a list command (if the LIMIT command modifier is being used when issuing a LIST command)
**[store]**
**[store.kv]**
* `path` (type: _string_, allowed: UNIX path, default: `./data/store/kv/`) — Path to the Key-Value database store
* `retain_word_objects` (type: _integer_, allowed: numbers, default: `1000`) — Maximum number of objects a given word in the index can be linked to (older objects are cleared using a sliding window)
**[store.kv.pool]**
* `inactive_after` (type: _integer_, allowed: seconds, default: `1800`) — Time after which a cached database is considered inactive and can be closed (if it is not used, ie. re-activated)
**[store.kv.database]**
* `flush_after` (type: _integer_, allowed: seconds, default: `900`) — Time after which pending database updates should be flushed from memory to disk (increase this delay if you encounter high-CPU usage issues when a flush task kicks-in; this value should be lower than `store.kv.pool.inactive_after`)
* `compress` (type: _boolean_, allowed: `true`, `false`, default: `true`) — Whether to compress database or not (uses Zstandard)
* `parallelism` (type: _integer_, allowed: numbers, default: `2`) — Limit on the number of compaction and flush threads that can run at the same time
* `max_files` (type: _integer_, allowed: numbers, no default) — Maximum number of database files kept open at the same time per-database (if any; otherwise there are no limits)
* `max_compactions` (type: _integer_, allowed: numbers, default: `1`) — Limit on the number of concurrent database compaction jobs
* `max_flushes` (type: _integer_, allowed: numbers, default: `1`) — Limit on the number of concurrent database flush jobs
* `write_buffer` (type: _integer_, allowed: numbers, default: `16384`) — Maximum size in KB of the database write buffer, after which data gets flushed to disk (ie. `16384` is `16MB`; the size should be a multiple of `1024`, eg. `128 * 1024 = 131072` for `128MB`)
* `write_ahead_log` (type: _boolean_, allowed: `true`, `false`, default: `true`) — Whether to enable Write-Ahead Log or not (it avoids losing non-flushed data in case of server crash)
**[store.fst]**
* `path` (type: _string_, allowed: UNIX path, default: `./data/store/fst/`) — Path to the Finite-State Transducer database store
**[store.fst.pool]**
* `inactive_after` (type: _integer_, allowed: seconds, default: `300`) — Time after which a cached graph is considered inactive and can be closed (if it is not used, ie. re-activated)
**[store.fst.graph]**
* `consolidate_after` (type: _integer_, allowed: seconds, default: `180`) — Time after which a graph that has pending updates should be consolidated (increase this delay if you encounter high-CPU usage issues when a consolidation task kicks-in; this value should be lower than `store.fst.pool.inactive_after`)
* `max_size` (type: _integer_, allowed: numbers, default: `2048`) — Maximum size in KB of the graph file on disk, after which further words are not inserted anymore (ie. `2048` is `2MB`; the size should be a multiple of `1024`, eg. `8 * 1024 = 8192` for `8MB`; use this limit to prevent heavy graphs to be consolidating forever; this limit is enforced in pair with `store.fst.graph.max_words`, whichever is reached first)
* `max_words` (type: _integer_, allowed: numbers, default: `250000`) — Maximum number of words that can be held at the same time in the graph, after which further words are not inserted anymore (use this limit to prevent heavy graphs to be consolidating forever; this limit is enforced in pair with `store.fst.graph.max_size`, whichever is reached first)
# Command-Line: Environment variables
You are allowed to use environment variables in the configuration file.
**You can provide them as follows:**
```toml
[channel]
auth_password = "${env.SECRET}"
```
**Then, you can run Sonic providing a defined environment variable:**
```bash
SECRET=secretphrase ./sonic -c /path/to/config.cfg
```
_Note that this can only be used with string-like values._
================================================
FILE: CONTRIBUTING.md
================================================
Sonic Contributing Guide
========================
# Get Started
- First of all, fork and clone this repo;
- Install Rust and Cargo (to build and test Sonic);
- Install NPM (for integration tests);
## Build Sonic
From the repository root, run:
```sh
cargo build
```
## Start Sonic
From the repository root, run:
```sh
cargo run
```
## Run unit tests
From the repository root, run:
```sh
cargo test
```
## Run integration tests
From the directory: `<repository root>/tests/integration/scripts/`, run:
```sh
./run.sh
```
# Report Issues & Request Features
**If you encounter an issue with Sonic, or would like to request a feature to be implemented, please do [open an issue](https://github.com/valeriansaliou/sonic/issues/new).**
Note that before opening an issue, you should always search for other similar issues as to avoid opening a duplicate issue. This makes the life of the project maintainer much easier.
When writing your issue title and command, make sure to be as precise as possible, giving away the maximum amount of details (even if you have a feeling some details are useless, they might make debugging or understanding easier for us).
# Submit Your Code
**If you would like to contribute directly by writing code, you should fork this repository and edit it right away from your GitHub namespace.**
Once you are done with your work, always ensure to format your Rust code according to guidelines, via the [rustfmt](https://github.com/rust-lang/rustfmt) utility: `rustfmt src/*.rs`
When this is done, you may open a Pull Request (PR), then explain your changes and their purpose precisely. We will finally accept or comment on your Pull Request, if we need more changes done on your code.
================================================
FILE: Cargo.toml
================================================
[package]
name = "sonic-server"
version = "1.4.9"
description = "Fast, lightweight and schema-less search backend."
readme = "README.md"
license = "MPL-2.0"
edition = "2018"
homepage = "https://github.com/valeriansaliou/sonic"
repository = "https://github.com/valeriansaliou/sonic.git"
keywords = ["search", "query", "server", "index"]
categories = ["database-implementations", "web-programming"]
authors = ["Valerian Saliou <valerian@valeriansaliou.name>", "Baptiste Jamin <baptistejamin@gmail.com>"]
[[bin]]
name = "sonic"
path = "src/main.rs"
doc = false
[dependencies]
log = "0.4"
toml = "0.8"
clap = { version = "3.2", features = ["std", "cargo"] }
lazy_static = "1.4"
serde = "1.0"
serde_derive = "1.0"
rand = "0.8"
unicode-segmentation = "1.6"
radix = "0.6"
rocksdb = { version = "0.24", features = ["zstd"] }
fst = "0.3"
fst-levenshtein = "0.3"
fst-regex = "0.3"
regex-syntax = "0.8"
twox-hash = "1.5"
byteorder = "1.4"
hashbrown = "0.14"
linked_hash_set = "0.1"
whatlang = "0.16"
regex = "1.6"
jieba-rs = { version = "0.7", optional = true }
lindera-core = { version = "0.31", optional = true }
lindera-dictionary = { version = "0.31", features = ["unidic"], optional = true }
lindera-tokenizer = { version = "0.31", features = ["unidic"], optional = true }
[target.'cfg(unix)'.dependencies]
nix = { version = "0.31.1", features = ["signal"] }
tikv-jemallocator = { version = "0.4", optional = true }
[target.'cfg(windows)'.dependencies]
winapi = { version = "0.3", features = ["minwindef", "consoleapi"] }
[features]
default = ["allocator-jemalloc", "tokenizer-chinese"]
allocator-jemalloc = ["tikv-jemallocator"]
tokenizer-chinese = ["jieba-rs"]
tokenizer-japanese = ["lindera-core", "lindera-dictionary", "lindera-tokenizer"]
benchmark = []
[profile.dev]
opt-level = 0
debug = true
debug-assertions = true
[profile.release]
opt-level = 3
lto = true
debug = false
debug-assertions = false
strip = true
[profile.bench]
opt-level = 3
debug = false
debug-assertions = false
================================================
FILE: Dockerfile
================================================
FROM rust:slim-bullseye AS build
RUN apt-get update
RUN apt-get install -y build-essential clang
RUN rustup --version
RUN rustup component add rustfmt
RUN rustc --version && \
rustup --version && \
cargo --version
WORKDIR /app
COPY . /app
RUN cargo clean && cargo build --release
RUN strip ./target/release/sonic
FROM gcr.io/distroless/cc
WORKDIR /usr/src/sonic
COPY --from=build /app/target/release/sonic /usr/local/bin/sonic
CMD [ "sonic", "-c", "/etc/sonic.cfg" ]
EXPOSE 1491
================================================
FILE: INNER_WORKINGS.md
================================================
Sonic Inner Workings
====================
This document was written with the goal of explaining the inner workings of Sonic, as well as the whys of the design choices that were made while building Sonic.
Anyone reading this documentation should quickly get more familiar in how such a search index can be built from scratch, to the point that they should be able to start building their own Sonic from scratch.
_If you feel something is missing from this document, or if it did not help you understand a concept Sonic implements, please [open an issue](https://github.com/valeriansaliou/sonic/issues/new) and explain precisely which part you did not get and why you think you did not get it._
# The Building Blocks of a Search Index
## Basics of a search index
A search index is nothing more than a specialized database. It should expose primitives such as: query the index, push text in the index, pop text from the index, flush parts of the index.
The search index server is responsible for organizing the index data in a way that makes writes and reads efficient. It makes uses of specialized data structures for some very specific operations like typos corrections. The overall goal of such a search index system is: speed, lightweightness and data compactness (ie. it should minimize the resulting output database size given a text input size).
As to provide flexibility to organized indexed data, the search index is organized into collections that contain buckets. Buckets contain indexed objects. This means that you can organize your search index within a depth of 2 layers. Objects are actual search results; you could push an object `result_1` to collection `messages` within bucket `user_1`. This would index `messages` for `user_1` with result `result_1`. Later on, one could search for `messages` matching a given query for `user_1`. If the Sonic user use case does not require using buckets, the bucket value can still be set to a generic value, eg. `default`.
Sonic, unlike many other search index systems, does not serve actual documents as search results. A strategic choice was made to store only identifiers referring to primary keys in an external database, which makes the data stored on-disk as compact as it can be. Users can still refer to their external database to fetch actual search result documents, using identifiers provided by Sonic.
It is worth nothing that any project initiated as of 2019 should make use of modern server hardware, which is mostly all about multi-core CPUs and SSDs. Also, Sonic should be very wary of minimizing its resource requirements — _from a cold start to running under high load_ — as a lot of developers nowadays expect to run software on cheap VPS servers with limited CPU time, small disk space and little RAM. Those modern VPS are nonetheless powered by modern SSDs with fast random I/O. Last but not least, it would definitely be a plus if we could make software a bit greener.
In order to address the above, Sonic is capable to run queries over multiple CPUs in parallel. It leverages SSDs fast random I/O by using RocksDB as its main key-value store. It also avoids eating all available RAM by storing most data on-disk (via memory mapping), which is not an issue anymore as of 2019, as SSDs have low I/O latency and can sustain an unlimited number of reads over their lifetimes. Though, as writes are Achilles' heel of SSD disks, Sonic aims at minimizing writes and buffers a lot of those writes in RAM, which are committed to disk at periodic intervals. This should maximize the lifespan of the SSD disk under heavy index write load. Unfortunately, the side-effect of doing this is that in case of server power loss, non-committed writes will vanish.
## How do result objects get indexed?
Sonic stores result objects in a key-value database (abbreviated KV), powered by RocksDB.
When a text is pushed to Sonic, this text gets normalized, cleaned up and split in separate words. Each word is then associated to the pushed object result, and committed to the KV database as `word <-> object`.
Upon cleaning the text, overhead is eluded. For instance, in the text `the lazy dog` there would be no point in indexing the word `the`, which is what is called a _stopword_. Sonic does not push stopwords to the index ([read more on stopwords](https://en.wikipedia.org/wiki/Stop_words)).
When objects are pushed to the search index for a given bucket in a given collection, for instance object `session_77f2e05e-5a81-49f0-89e3-177e9e1d1f32`, Sonic converts this object to a compact 32 bits format, for instance `10292198`. We call the user-provided object identifier the OID, while the compact internal identifier is named the IID. The IID is mapped internally to indexed words, and is much more compact in terms of storage than the OID. You can think of OIDs and IIDs as basically the same thing, except that the IID is the compact version of an OID. OIDs are only used for user-facing input and output objects, while IIDs are only used for internal storage of those objects. On very long indexed texts, this helps save **_a lot_** of disk space.
The KV store has a simple schema, where we associate a binary key to binary data. The following types of keys exist:
1. **Meta-To-Value**: state data for the bucket, eg. stores the count increment of indexed objects (data is in arbitrary format) (_code: [StoreKeyerIdx::MetaToValue](https://github.com/valeriansaliou/sonic/blob/5320b81afc1598ac1cd2af938df0b2ef6cb96dc4/src/store/keyer.rs#L24)_);
2. **Term-To-IIDs**: maps a word (ie. term) to an internal identifier (ie. IID), which is essentially a word-to-result mapping (data is an array of 32 bits numbers encoded to binary as little-endian) (_code: [StoreKeyerIdx::TermToIIDs](https://github.com/valeriansaliou/sonic/blob/5320b81afc1598ac1cd2af938df0b2ef6cb96dc4/src/store/keyer.rs#L25)_);
3. **OID-To-IID**: maps an object identifier (ie. OID) to an internal identifier (ie. IID), which converts an user-provided object to a compact internal object (data is a 32 bits number encoded to binary as little-endian) (_code: [StoreKeyerIdx::OIDToIID](https://github.com/valeriansaliou/sonic/blob/5320b81afc1598ac1cd2af938df0b2ef6cb96dc4/src/store/keyer.rs#L26)_);
4. **IID-To-OID**: this is the reverse mapping of OID-To-IID, which lets convert an IID back to an OID (data is a variable-length UTF-8 string encoded in binary) (_code: [StoreKeyerIdx::IIDToOID](https://github.com/valeriansaliou/sonic/blob/5320b81afc1598ac1cd2af938df0b2ef6cb96dc4/src/store/keyer.rs#L27)_);
5. **IID-To-Terms**: this lists all words (ie. terms) associated to an internal identifier (ie. IID) (data is an array of 32 bits numbers encoded to binary as little-endian) (_code: [StoreKeyerIdx::IIDToTerms](https://github.com/valeriansaliou/sonic/blob/5320b81afc1598ac1cd2af938df0b2ef6cb96dc4/src/store/keyer.rs#L28)_);
A key is formatted as such, in binary: `[idx<1B> | bucket<4B> | route<4B>]` (_code: [StoreKeyerBuilder::build_key](https://github.com/valeriansaliou/sonic/blob/5320b81afc1598ac1cd2af938df0b2ef6cb96dc4/src/store/keyer.rs#L73)_), which makes it 9-bytes long. The index stands for the type of key, eg. Term-To-IIDs. The bucket and what we call the route are hashed as 32 bits numbers, and appended in little-endian binary format to the key.
Both IIDs and terms are stored as 32 bits numbers in binary format. 64 bits numbers could have been used instead, increasing the total number of objects that can be indexed per-bucket. Though, storing such 64 bits numbers instead of 32 bits numbers would double required storage space. As they make up most of stored space, it was important to keep them as small as possible. Those 32 bits numbers are generated using a fast and low-collision hash family called [XxHash](http://www.xxhash.com), from the OID in the case of the IID, and from the word in the case of the term hash (_code: [StoreTermHash](https://github.com/valeriansaliou/sonic/blob/5320b81afc1598ac1cd2af938df0b2ef6cb96dc4/src/store/identifiers.rs#L32)_).
## How do word suggestion and user typo auto-correction work?
When most users input text to a computer system using an actual keyboard, they make typos and mistakes. A nice property of a good search system should be that those typos can be forgiven and accurate search results still come up for the bogus user query. Sonic implements a data structure that lets it correct typos or autocomplete incomplete words.
For instance, if our index has the word `english` but the user, for some reason, inputs `englich`, Sonic would still return results for `english`. Similarly, if the user inputs an incomplete word eg. `eng`, Sonic would expand this word to `english`, if there were no or not enough exact matches for `eng`.
The store system responsible for such a feat is the FST ([Finite-State Transducer](https://en.wikipedia.org/wiki/Finite-state_transducer)). It can be grossly compared to a graph of characters, where nodes are characters and edges connect those characters to produce words.
Sonic stores a single FST file per bucket. This FST file is memory-mapped, and read directly from the disk when Sonic needs to read it. The [fst](https://crates.io/crates/fst) crate is used to implement the FST data structure.
One downside of the FST implementation that Sonic uses, is that once built, an FST is immutable. It means that in order to add a new word to the search index (for a given bucket), Sonic needs to re-build the entire FST (ie. iterate word-by-word on the existing FST and stream those words plus the added word to a new on-disk FST file). In order to do that in an efficient manner, Sonic implements an FST consolidation tasker, which stores FST changes in-memory and consolidates them to disk at periodic intervals (this interval can be configured) (_code: [StoreFSTPool::consolidate](https://github.com/valeriansaliou/sonic/blob/5320b81afc1598ac1cd2af938df0b2ef6cb96dc4/src/store/fst.rs#L173)_).
## How do texts get cleaned up? (via the lexer)
Any text that gets pushed to Sonic needs to be normalized (eg. lower-cased) and cleaned up (eg. remove stopwords) before it can be added to the index. This task is handled by the lexer system, also called [tokenizer](https://en.wikipedia.org/wiki/Lexical_analysis#Tokenization).
Sonic's tokenizer is built around an iterator pattern (_code: [Iterator->TokenLexer](https://github.com/valeriansaliou/sonic/blob/5320b81afc1598ac1cd2af938df0b2ef6cb96dc4/src/lexer/token.rs#L244)_), and yields lexed words one-by-one. Iteration can be stopped before the end of the text is reached, for instance if we did not get enough search results for the first words of the query. This ensures no extraneous lexing work is done.
Given that stopwords depend on the text language, Sonic first needs to detect the language of the text that is being cleaned up. This is done using an hybrid method of either counting the number of stopwords that appear in the text for long-enough texts (which is faster) (_code: [TokenLexerBuilder::detect_lang_fast](https://github.com/valeriansaliou/sonic/blob/5320b81afc1598ac1cd2af938df0b2ef6cb96dc4/src/lexer/token.rs#L177)_), or performing an [n-gram](https://en.wikipedia.org/wiki/N-gram) pass on the text for smaller texts (which is **_an order of magnitude_** slower) (_code: [TokenLexerBuilder::detect_lang_slow](https://github.com/valeriansaliou/sonic/blob/5320b81afc1598ac1cd2af938df0b2ef6cb96dc4/src/lexer/token.rs#L126)_).
As the n-gram method is better at guessing the language for small texts than the stopwords method is, we prefer it, although it is crazy slow in comparison to the stopwords method. For long-enough texts, the stopwords method becomes reliable enough, so we can use it. In either cases, if the first chosen guessing method result is judged as non-reliable, Sonic fallbacks on the other method (_code: [!detector.is_reliable()](https://github.com/valeriansaliou/sonic/blob/5320b81afc1598ac1cd2af938df0b2ef6cb96dc4/src/lexer/token.rs#L148)_).
By the way, Sonic builds up its own list of stopwords for all supported languages, [which can be found here](https://github.com/valeriansaliou/sonic/tree/master/src/stopwords) (languages are referred to via their ISO 639-3 codes). People are welcome to improve those lists of stopwords by [submitting a Pull Request](https://github.com/valeriansaliou/sonic/pulls).
## What is the purpose of the tasker system?
Looking at the source code of Sonic, you will find a module named `tasker` ([see here](https://github.com/valeriansaliou/sonic/tree/master/src/tasker)). This module performs background tasks, and is triggered periodically.
**The tasker performs the following actions:**
1. **Janitor**: it closes cached collection and bucket stores that were not used recently, freeing up memory (_code: [Tasker::tick](https://github.com/valeriansaliou/sonic/blob/5320b81afc1598ac1cd2af938df0b2ef6cb96dc4/src/tasker/runtime.rs#L48)_);
2. **Consolidate**: it writes in-memory FST changes to the on-disk FST data structure (_code: [Tasker::tick](https://github.com/valeriansaliou/sonic/blob/5320b81afc1598ac1cd2af938df0b2ef6cb96dc4/src/tasker/runtime.rs#L48)_);
As in all databases, a lot of locking is involved while the tasker is performing heavy-duty work on a KV or FST store. Thus, when the tasker system kicks-in, stores may experience higher than expected latency for all consumers attempting to read or write to them. The tasker system has been optimized to minimize thread contention caused by locks, so the impact of those locks on Sonic consumers should be minimum.
# On the Sonic Channel Protocol
In order for a client to communicate with the search index system, one needs a protocol. Sonic uses the Sonic Channel protocol, which defines a way for clients to send commands (ie. requests) to a Sonic server over the network (via a raw TCP socket); and get responses from the Sonic server. For instance, a client may send a search query command such as `QUERY collection bucket "search query"` and get a response with search results such as `EVENT QUERY isgsHQYu result_1 result_2`.
**On that Sonic Channel protocol, technical choices that may seem to go against common sense were made:**
1. **Sonic does not expose any HTTP API interface**, as it adds a network and processing overhead cost we do not want to bear;
2. **Sonic only exposes a raw TCP socket** with which clients interact via the Sonic Channel protocol, which was designed to be simple, lightweight and extensible;
3. **Most Sonic Channel commands are synchronous**, for simplicity's sake (Redis does the same). You can still run multiple Sonic Channel connections in parallel, and enjoy increased parallelism, but on a given Sonic Channel connection, you must wait for the previous command to return before issuing the next one;
4. **Some Sonic Channel commands are asynchronous**, when a lot of commands may be issued in a short period of time, in a burst pattern. This is typical of read operations such as search queries, which should be submitted as jobs to a dedicated thread pool, which can be upsized and downsized at will. To handle this, a special eventing protocol format should be used;
_The Sonic Channel protocol is specified in a separate document, which [you can read here](https://github.com/valeriansaliou/sonic/blob/master/PROTOCOL.md)._
# The Journey of a Search Query
As always, examples are the way to go to explain any complex system. This section drafts the journey of a search query in Sonic, from receiving the search query command over Sonic Channel, to serving results to the Sonic Channel consumer.
Given a collection `messages` and a bucket `acme_corp` (ie. indexed messages for Acme Corp), John Doe wants to find messages that match the query text `"The robber has stolen our corporate car"`.
First off, John Doe would connect to Sonic over a Sonic Channel client, for instance [node-sonic-channel](https://github.com/valeriansaliou/node-sonic-channel). Using this client, he would issue the following query: `QUERY messages acme_corp "The robber has stolen our corporate car"` to find conversations that contain messages about a recent robbery at Acme Corp.
**After receiving the raw command above, the Sonic server would, in order:**
1. Read the raw command from the Sonic Channel TCP stream buffer (_code: [Self::on_message](https://github.com/valeriansaliou/sonic/blob/5320b81afc1598ac1cd2af938df0b2ef6cb96dc4/src/channel/handle.rs#L163)_);
2. Route the unpacked command message to the proper command handler, which would be `ChannelCommandSearch::dispatch_query` (_code: [ChannelMessage::on](https://github.com/valeriansaliou/sonic/blob/5320b81afc1598ac1cd2af938df0b2ef6cb96dc4/src/channel/message.rs#L39)_);
3. Commit the search query for processing (_code: [ChannelCommandBase::commit_pending_operation](https://github.com/valeriansaliou/sonic/blob/5320b81afc1598ac1cd2af938df0b2ef6cb96dc4/src/channel/command.rs#L428)_);
4. Dispatch the search query to its executor (_code: [StoreOperationDispatch::dispatch](https://github.com/valeriansaliou/sonic/blob/5320b81afc1598ac1cd2af938df0b2ef6cb96dc4/src/channel/command.rs#L351)_);
5. Run the search executor (_code: [ExecutorSearch::search](https://github.com/valeriansaliou/sonic/blob/5320b81afc1598ac1cd2af938df0b2ef6cb96dc4/src/executor/search.rs#L21)_);
6. Open both the KV and FST stores for the collection `messages` and bucket `acme_corp` (_code: [StoreKVPool::acquire + StoreFSTPool::acquire](https://github.com/valeriansaliou/sonic/blob/5320b81afc1598ac1cd2af938df0b2ef6cb96dc4/src/executor/search.rs#L34)_);
7. Perform search query text lexing, and search word-by-word, which would yield in order: `robber`, `stolen`, `corporate`, `car` (_code: [lexer.next](https://github.com/valeriansaliou/sonic/blob/5320b81afc1598ac1cd2af938df0b2ef6cb96dc4/src/executor/search.rs#L50)_);
8. If not enough search results are found, tries to suggest other words eg. typos corrections (_code: [fst_action.suggest_words](https://github.com/valeriansaliou/sonic/blob/5320b81afc1598ac1cd2af938df0b2ef6cb96dc4/src/executor/search.rs#L81)_);
9. Perform paging on found OIDs from KV store to limit results (_code: [found_iids.iter](https://github.com/valeriansaliou/sonic/blob/5320b81afc1598ac1cd2af938df0b2ef6cb96dc4/src/executor/search.rs#L163)_);
10. Return found OIDs from the executor (_code: [result_oids](https://github.com/valeriansaliou/sonic/blob/5320b81afc1598ac1cd2af938df0b2ef6cb96dc4/src/executor/search.rs#L180)_);
11. Write back the final results to the TCP stream (_code: [response_args_groups](https://github.com/valeriansaliou/sonic/blob/5320b81afc1598ac1cd2af938df0b2ef6cb96dc4/src/channel/message.rs#L81)_);
_This is it!_ John Doe would receive the following response from Sonic Channel: `EVENT QUERY isgsHQYu conversation_3459 conversation_29398`, which indicates that there are 2 conversations that contain messages matching the search text `"The robber has stolen our corporate car"`.
================================================
FILE: LICENSE.md
================================================
Mozilla Public License Version 2.0
==================================
1. Definitions
--------------
1.1. "Contributor"
means each individual or legal entity that creates, contributes to
the creation of, or owns Covered Software.
1.2. "Contributor Version"
means the combination of the Contributions of others (if any) used
by a Contributor and that particular Contributor's Contribution.
1.3. "Contribution"
means Covered Software of a particular Contributor.
1.4. "Covered Software"
means Source Code Form to which the initial Contributor has attached
the notice in Exhibit A, the Executable Form of such Source Code
Form, and Modifications of such Source Code Form, in each case
including portions thereof.
1.5. "Incompatible With Secondary Licenses"
means
(a) that the initial Contributor has attached the notice described
in Exhibit B to the Covered Software; or
(b) that the Covered Software was made available under the terms of
version 1.1 or earlier of the License, but not also under the
terms of a Secondary License.
1.6. "Executable Form"
means any form of the work other than Source Code Form.
1.7. "Larger Work"
means a work that combines Covered Software with other material, in
a separate file or files, that is not Covered Software.
1.8. "License"
means this document.
1.9. "Licensable"
means having the right to grant, to the maximum extent possible,
whether at the time of the initial grant or subsequently, any and
all of the rights conveyed by this License.
1.10. "Modifications"
means any of the following:
(a) any file in Source Code Form that results from an addition to,
deletion from, or modification of the contents of Covered
Software; or
(b) any new file in Source Code Form that contains any Covered
Software.
1.11. "Patent Claims" of a Contributor
means any patent claim(s), including without limitation, method,
process, and apparatus claims, in any patent Licensable by such
Contributor that would be infringed, but for the grant of the
License, by the making, using, selling, offering for sale, having
made, import, or transfer of either its Contributions or its
Contributor Version.
1.12. "Secondary License"
means either the GNU General Public License, Version 2.0, the GNU
Lesser General Public License, Version 2.1, the GNU Affero General
Public License, Version 3.0, or any later versions of those
licenses.
1.13. "Source Code Form"
means the form of the work preferred for making modifications.
1.14. "You" (or "Your")
means an individual or a legal entity exercising rights under this
License. For legal entities, "You" includes any entity that
controls, is controlled by, or is under common control with You. For
purposes of this definition, "control" means (a) the power, direct
or indirect, to cause the direction or management of such entity,
whether by contract or otherwise, or (b) ownership of more than
fifty percent (50%) of the outstanding shares or beneficial
ownership of such entity.
2. License Grants and Conditions
--------------------------------
2.1. Grants
Each Contributor hereby grants You a world-wide, royalty-free,
non-exclusive license:
(a) under intellectual property rights (other than patent or trademark)
Licensable by such Contributor to use, reproduce, make available,
modify, display, perform, distribute, and otherwise exploit its
Contributions, either on an unmodified basis, with Modifications, or
as part of a Larger Work; and
(b) under Patent Claims of such Contributor to make, use, sell, offer
for sale, have made, import, and otherwise transfer either its
Contributions or its Contributor Version.
2.2. Effective Date
The licenses granted in Section 2.1 with respect to any Contribution
become effective for each Contribution on the date the Contributor first
distributes such Contribution.
2.3. Limitations on Grant Scope
The licenses granted in this Section 2 are the only rights granted under
this License. No additional rights or licenses will be implied from the
distribution or licensing of Covered Software under this License.
Notwithstanding Section 2.1(b) above, no patent license is granted by a
Contributor:
(a) for any code that a Contributor has removed from Covered Software;
or
(b) for infringements caused by: (i) Your and any other third party's
modifications of Covered Software, or (ii) the combination of its
Contributions with other software (except as part of its Contributor
Version); or
(c) under Patent Claims infringed by Covered Software in the absence of
its Contributions.
This License does not grant any rights in the trademarks, service marks,
or logos of any Contributor (except as may be necessary to comply with
the notice requirements in Section 3.4).
2.4. Subsequent Licenses
No Contributor makes additional grants as a result of Your choice to
distribute the Covered Software under a subsequent version of this
License (see Section 10.2) or under the terms of a Secondary License (if
permitted under the terms of Section 3.3).
2.5. Representation
Each Contributor represents that the Contributor believes its
Contributions are its original creation(s) or it has sufficient rights
to grant the rights to its Contributions conveyed by this License.
2.6. Fair Use
This License is not intended to limit any rights You have under
applicable copyright doctrines of fair use, fair dealing, or other
equivalents.
2.7. Conditions
Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted
in Section 2.1.
3. Responsibilities
-------------------
3.1. Distribution of Source Form
All distribution of Covered Software in Source Code Form, including any
Modifications that You create or to which You contribute, must be under
the terms of this License. You must inform recipients that the Source
Code Form of the Covered Software is governed by the terms of this
License, and how they can obtain a copy of this License. You may not
attempt to alter or restrict the recipients' rights in the Source Code
Form.
3.2. Distribution of Executable Form
If You distribute Covered Software in Executable Form then:
(a) such Covered Software must also be made available in Source Code
Form, as described in Section 3.1, and You must inform recipients of
the Executable Form how they can obtain a copy of such Source Code
Form by reasonable means in a timely manner, at a charge no more
than the cost of distribution to the recipient; and
(b) You may distribute such Executable Form under the terms of this
License, or sublicense it under different terms, provided that the
license for the Executable Form does not attempt to limit or alter
the recipients' rights in the Source Code Form under this License.
3.3. Distribution of a Larger Work
You may create and distribute a Larger Work under terms of Your choice,
provided that You also comply with the requirements of this License for
the Covered Software. If the Larger Work is a combination of Covered
Software with a work governed by one or more Secondary Licenses, and the
Covered Software is not Incompatible With Secondary Licenses, this
License permits You to additionally distribute such Covered Software
under the terms of such Secondary License(s), so that the recipient of
the Larger Work may, at their option, further distribute the Covered
Software under the terms of either this License or such Secondary
License(s).
3.4. Notices
You may not remove or alter the substance of any license notices
(including copyright notices, patent notices, disclaimers of warranty,
or limitations of liability) contained within the Source Code Form of
the Covered Software, except that You may alter any license notices to
the extent required to remedy known factual inaccuracies.
3.5. Application of Additional Terms
You may choose to offer, and to charge a fee for, warranty, support,
indemnity or liability obligations to one or more recipients of Covered
Software. However, You may do so only on Your own behalf, and not on
behalf of any Contributor. You must make it absolutely clear that any
such warranty, support, indemnity, or liability obligation is offered by
You alone, and You hereby agree to indemnify every Contributor for any
liability incurred by such Contributor as a result of warranty, support,
indemnity or liability terms You offer. You may include additional
disclaimers of warranty and limitations of liability specific to any
jurisdiction.
4. Inability to Comply Due to Statute or Regulation
---------------------------------------------------
If it is impossible for You to comply with any of the terms of this
License with respect to some or all of the Covered Software due to
statute, judicial order, or regulation then You must: (a) comply with
the terms of this License to the maximum extent possible; and (b)
describe the limitations and the code they affect. Such description must
be placed in a text file included with all distributions of the Covered
Software under this License. Except to the extent prohibited by statute
or regulation, such description must be sufficiently detailed for a
recipient of ordinary skill to be able to understand it.
5. Termination
--------------
5.1. The rights granted under this License will terminate automatically
if You fail to comply with any of its terms. However, if You become
compliant, then the rights granted under this License from a particular
Contributor are reinstated (a) provisionally, unless and until such
Contributor explicitly and finally terminates Your grants, and (b) on an
ongoing basis, if such Contributor fails to notify You of the
non-compliance by some reasonable means prior to 60 days after You have
come back into compliance. Moreover, Your grants from a particular
Contributor are reinstated on an ongoing basis if such Contributor
notifies You of the non-compliance by some reasonable means, this is the
first time You have received notice of non-compliance with this License
from such Contributor, and You become compliant prior to 30 days after
Your receipt of the notice.
5.2. If You initiate litigation against any entity by asserting a patent
infringement claim (excluding declaratory judgment actions,
counter-claims, and cross-claims) alleging that a Contributor Version
directly or indirectly infringes any patent, then the rights granted to
You by any and all Contributors for the Covered Software under Section
2.1 of this License shall terminate.
5.3. In the event of termination under Sections 5.1 or 5.2 above, all
end user license agreements (excluding distributors and resellers) which
have been validly granted by You or Your distributors under this License
prior to termination shall survive termination.
6. Disclaimer of Warranty
-------------------------
> Covered Software is provided under this License on an "as is"
> basis, without warranty of any kind, either expressed, implied, or
> statutory, including, without limitation, warranties that the
> Covered Software is free of defects, merchantable, fit for a
> particular purpose or non-infringing. The entire risk as to the
> quality and performance of the Covered Software is with You.
> Should any Covered Software prove defective in any respect, You
> (not any Contributor) assume the cost of any necessary servicing,
> repair, or correction. This disclaimer of warranty constitutes an
> essential part of this License. No use of any Covered Software is
> authorized under this License except under this disclaimer.
7. Limitation of Liability
--------------------------
> Under no circumstances and under no legal theory, whether tort
> (including negligence), contract, or otherwise, shall any
> Contributor, or anyone who distributes Covered Software as
> permitted above, be liable to You for any direct, indirect,
> special, incidental, or consequential damages of any character
> including, without limitation, damages for lost profits, loss of
> goodwill, work stoppage, computer failure or malfunction, or any
> and all other commercial damages or losses, even if such party
> shall have been informed of the possibility of such damages. This
> limitation of liability shall not apply to liability for death or
> personal injury resulting from such party's negligence to the
> extent applicable law prohibits such limitation. Some
> jurisdictions do not allow the exclusion or limitation of
> incidental or consequential damages, so this exclusion and
> limitation may not apply to You.
8. Litigation
-------------
Any litigation relating to this License may be brought only in the
courts of a jurisdiction where the defendant maintains its principal
place of business and such litigation shall be governed by laws of that
jurisdiction, without reference to its conflict-of-law provisions.
Nothing in this Section shall prevent a party's ability to bring
cross-claims or counter-claims.
9. Miscellaneous
----------------
This License represents the complete agreement concerning the subject
matter hereof. If any provision of this License is held to be
unenforceable, such provision shall be reformed only to the extent
necessary to make it enforceable. Any law or regulation which provides
that the language of a contract shall be construed against the drafter
shall not be used to construe this License against a Contributor.
10. Versions of the License
---------------------------
10.1. New Versions
Mozilla Foundation is the license steward. Except as provided in Section
10.3, no one other than the license steward has the right to modify or
publish new versions of this License. Each version will be given a
distinguishing version number.
10.2. Effect of New Versions
You may distribute the Covered Software under the terms of the version
of the License under which You originally received the Covered Software,
or under the terms of any subsequent version published by the license
steward.
10.3. Modified Versions
If you create software not governed by this License, and you want to
create a new license for such software, you may create and use a
modified version of this License if you rename the license and remove
any references to the name of the license steward (except to note that
such modified license differs from this License).
10.4. Distributing Source Code Form that is Incompatible With Secondary
Licenses
If You choose to distribute Source Code Form that is Incompatible With
Secondary Licenses under the terms of this version of the License, the
notice described in Exhibit B of this License must be attached.
Exhibit A - Source Code Form License Notice
-------------------------------------------
This Source Code Form is subject to the terms of the Mozilla Public
License, v. 2.0. If a copy of the MPL was not distributed with this
file, You can obtain one at http://mozilla.org/MPL/2.0/.
If it is not possible or desirable to put the notice in a particular
file, then You may include the notice in a location (such as a LICENSE
file in a relevant directory) where a recipient would be likely to look
for such a notice.
You may add additional accurate notices of copyright ownership.
Exhibit B - "Incompatible With Secondary Licenses" Notice
---------------------------------------------------------
This Source Code Form is "Incompatible With Secondary Licenses", as
defined by the Mozilla Public License, v. 2.0.
================================================
FILE: PACKAGING.md
================================================
Packaging
=========
This file contains quick reminders and notes on how to package Sonic.
We consider here the packaging flow of Sonic version `1.0.0` for Linux.
1. **How to bump Sonic version before a release:**
1. Bump version in `Cargo.toml` to `1.0.0`
2. Execute `cargo update` to bump `Cargo.lock`
3. Bump Debian package version in `debian/rules` to `1.0.0`
2. **How to build Sonic, package it and release it on Crates, GitHub, Docker Hub and Packagecloud (multiple architectures):**
1. Tag the latest Git commit corresponding to the release with tag `v1.0.0`, and push the tag
2. Wait for all release jobs to complete on the [actions](https://github.com/valeriansaliou/sonic/actions) page on GitHub
3. Download all release archives, and sign them locally using: `./scripts/sign_binaries.sh --version=1.0.0`
4. Publish a changelog and upload all the built archives, as well as their signatures on the [releases](https://github.com/valeriansaliou/sonic/releases) page on GitHub
================================================
FILE: PROTOCOL.md
================================================
Sonic Protocol
==============
# ⚡️ Sonic Channel
**Sonic Channel is the protocol used to perform searches and ingest index data. You can also use it for Sonic administration operations. Sonic listens on TCP port 1491 by default.**
This document specifies the Sonic Channel protocol. Use it if you are looking to build your own Sonic Channel library, or if you are looking to debug Sonic using eg. `telnet` in command-line.
To start a `telnet` session with your local Sonic instance, execute: `telnet ::1 1491`
_Refer to sections below to interact with Sonic._
---
### 1️⃣ Before you start
**Please consider the following upon integrating the Sonic Channel protocol:**
1. Each command sent must be terminated with a new line character (`\n`) as to commit the command to the server;
2. Upon starting a Sonic Channel session, your library should read the `buffer(20000)` parameter in the `STARTED` response, and use this value (in bytes) as to know when a command data should be truncated and split in multiple sub-commands (to avoid buffer overflows, ie. sending too much data in a single command);
---
### 2️⃣ Sonic Channel (uninitialized)
* `START <mode> <password>`: select mode to use for connection (either: `search` or `ingest`). The password is found in the `config.cfg` file at `channel.auth_password`.
_Issuing any other command — eg. `QUIT` — in this mode will abort the TCP connection, effectively resulting in a `QUIT` with the `ENDED not_recognized` response._
---
### 3️⃣ Sonic Channel (Search mode)
_The Sonic Channel Search mode is used for querying the search index. Once in this mode, you cannot switch to other modes or gain access to commands from other modes._
**➡️ Available commands:**
* `QUERY`: query database (syntax: `QUERY <collection> <bucket> "<terms>" [LIMIT(<count>)]? [OFFSET(<count>)]? [LANG(<locale>)]?`; time complexity: `O(1)` if enough exact word matches or `O(N)` if not enough exact matches where `N` is the number of alternate words tried, in practice it approaches `O(1)`)
* `SUGGEST`: auto-completes word (syntax: `SUGGEST <collection> <bucket> "<word>" [LIMIT(<count>)]?`; time complexity: `O(1)`)
* `LIST`: enumerates all words in an index (syntax: `LIST <collection> <bucket> [LIMIT(<count>)]? [OFFSET(<count>)]?`; time complexity: `O(N)` where `N` is the number of words enumerated, within provided limits)
* `PING`: ping server (syntax: `PING`; time complexity: `O(1)`)
* `HELP`: show help (syntax: `HELP [<manual>]?`; time complexity: `O(1)`)
* `QUIT`: stop connection (syntax: `QUIT`; time complexity: `O(1)`)
**⏩ Syntax terminology:**
* `<collection>`: index collection (ie. what you search in, eg. `messages`, `products`, etc.);
* `<bucket>`: index bucket name (ie. user-specific search classifier in the collection if you have any eg. `user-1, user-2, ..`, otherwise use a common bucket name eg. `generic, default, common, ..`);
* `<terms>`: text for search terms (between quotes);
* `<count>`: a positive integer number; set within allowed maximum & minimum limits;
* `<locale>`: an ISO 639-3 locale code eg. `eng` for English (if set, the locale must be a valid ISO 639-3 code; if set to `none`, lexing will be disabled; if not set, the locale will be guessed from text);
* `<manual>`: help manual to be shown (available manuals: `commands`);
_Notice: the `bucket` terminology may confuse some Sonic users. As we are well-aware Sonic may be used in an environment where end-users may each hold their own search index in a given `collection`, we made it possible to manage per-end-user search indexes with `bucket`. If you only have a single index per `collection` (most Sonic users will), we advise you use a static generic name for your `bucket`, for instance: `default`._
**⬇️ Search flow example (via `telnet`):**
```bash
T1: telnet sonic.local 1491
T2: Trying ::1...
T3: Connected to sonic.local.
T4: Escape character is '^]'.
T5: CONNECTED <sonic-server v1.0.0>
T6: START search SecretPassword
T7: STARTED search protocol(1) buffer(20000)
T8: QUERY messages user:0dcde3a6 "valerian saliou" LIMIT(10)
T9: PENDING Bt2m2gYa
T10: EVENT QUERY Bt2m2gYa conversation:71f3d63b conversation:6501e83a
T11: QUERY helpdesk user:0dcde3a6 "gdpr" LIMIT(50)
T12: PENDING y57KaB2d
T13: QUERY helpdesk user:0dcde3a6 "law" LIMIT(50) OFFSET(200)
T14: PENDING CjPvE5t9
T15: PING
T16: PONG
T17: EVENT QUERY CjPvE5t9
T18: EVENT QUERY y57KaB2d article:28d79959
T19: SUGGEST messages user:0dcde3a6 "val"
T20: PENDING z98uDE0f
T21: EVENT SUGGEST z98uDE0f valerian valala
T22: QUIT
T23: ENDED quit
T24: Connection closed by foreign host.
```
_Notes on what happens:_
* **T6:** we enter `search` mode (this is required to enable `search` commands);
* **T8:** we query collection `messages`, in bucket for platform user `user:0dcde3a6` with search terms `valerian saliou` and a limit of `10` on returned results;
* **T9:** Sonic received the query and stacked it for processing with marker `Bt2m2gYa` (the marker is used to track the asynchronous response);
* **T10:** Sonic processed search query of T8 with marker `Bt2m2gYa` and sends 2 search results (those are conversation identifiers, that refer to a primary key in an external database);
* **T11 + T13:** we query collection `helpdesk` twice (in the example, this one is heavy, so processing of results takes more time);
* **T17 + T18:** we receive search results for search queries of T11 + T13 (this took a while!);
---
### 4️⃣ Sonic Channel (Ingest mode)
_The Sonic Channel Ingest mode is used for altering the search index (push, pop and flush). Once in this mode, you cannot switch to other modes or gain access to commands from other modes._
**➡️ Available commands:**
* `PUSH`: Push search data in the index (syntax: `PUSH <collection> <bucket> <object> "<text>" [LANG(<locale>)]?`; time complexity: `O(1)`)
* `POP`: Pop search data from the index (syntax: `POP <collection> <bucket> <object> "<text>"`; time complexity: `O(1)`)
* `COUNT`: Count indexed search data (syntax: `COUNT <collection> [<bucket> [<object>]?]?`; time complexity: `O(1)`)
* `FLUSHC`: Flush all indexed data from a collection (syntax: `FLUSHC <collection>`; time complexity: `O(1)`)
* `FLUSHB`: Flush all indexed data from a bucket in a collection (syntax: `FLUSHB <collection> <bucket>`; time complexity: `O(N)` where `N` is the number of bucket objects)
* `FLUSHO`: Flush all indexed data from an object in a bucket in collection (syntax: `FLUSHO <collection> <bucket> <object>`; time complexity: `O(1)`)
* `PING`: ping server (syntax: `PING`; time complexity: `O(1)`)
* `HELP`: show help (syntax: `HELP [<manual>]?`; time complexity: `O(1)`)
* `QUIT`: stop connection (syntax: `QUIT`; time complexity: `O(1)`)
**⏩ Syntax terminology:**
* `<collection>`: index collection (ie. what you search in, eg. `messages`, `products`, etc.);
* `<bucket>`: index bucket name (ie. user-specific search classifier in the collection if you have any eg. `user-1, user-2, ..`, otherwise use a common bucket name eg. `generic, default, common, ..`);
* `<object>`: object identifier that refers to an entity in an external database, where the searched object is stored (eg. you use Sonic to index CRM contacts by name; full CRM contact data is stored in a MySQL database; in this case the object identifier in Sonic will be the MySQL primary key for the CRM contact);
* `<text>`: search text to be indexed (can be a single word, or a longer text; within maximum length safety limits; should be quoted using `"` quotes; internal quotes should be escaped using `\"`);
* `<locale>`: an ISO 639-3 locale code eg. `eng` for English (if set, the locale must be a valid ISO 639-3 code; if set to `none`, lexing will be disabled; if not set, the locale will be guessed from text);
* `<manual>`: help manual to be shown (available manuals: `commands`);
_Notice: the `bucket` terminology may confuse some Sonic users. As we are well-aware Sonic may be used in an environment where end-users may each hold their own search index in a given `collection`, we made it possible to manage per-end-user search indexes with `bucket`. If you only have a single index per `collection` (most Sonic users will), we advise you use a static generic name for your `bucket`, for instance: `default`._
**⬇️ Ingest flow example (via `telnet`):**
```bash
T1: telnet sonic.local 1491
T2: Trying ::1...
T3: Connected to sonic.local.
T4: Escape character is '^]'.
T5: CONNECTED <sonic-server v1.0.0>
T6: START ingest SecretPassword
T7: STARTED ingest protocol(1) buffer(20000)
T8: PUSH messages user:0dcde3a6 conversation:71f3d63b Hey Valerian
T9: ERR invalid_format(PUSH <collection> <bucket> <object> "<text>")
T10: PUSH messages user:0dcde3a6 conversation:71f3d63b "Hello Valerian Saliou, how are you today?"
T11: OK
T12: COUNT messages user:0dcde3a6
T13: RESULT 43
T14: COUNT messages user:0dcde3a6 conversation:71f3d63b
T15: RESULT 1
T16: FLUSHO messages user:0dcde3a6 conversation:71f3d63b
T17: RESULT 1
T18: FLUSHB messages user:0dcde3a6
T19: RESULT 42
T20: PING
T21: PONG
T22: QUIT
T23: ENDED quit
T24: Connection closed by foreign host.
```
_Notes on what happens:_
* **T6:** we enter `ingest` mode (this is required to enable `ingest` commands);
* **T8:** we try to push text `Hey Valerian` to the index, in collection `messages`, bucket `user:0dcde3a6` and object `conversation:71f3d63b` (the syntax that was used is invalid);
* **T9:** Sonic refuses the command we issued in T8, and provides us with the correct command format (notice that `<text>` should be quoted);
* **T10:** we attempt to push another text in the same collection, bucket and object as in T8;
* **T11:** this time, our push command in T10 was valid (Sonic acknowledges the push commit to the search index);
* **T12:** we count the number of indexed terms in collection `messages` and bucket `user:0dcde3a6`;
* **T13:** there are 43 terms (ie. words) in index for query in T12;
* **T18:** we flush all index data from collection `messages` and bucket `user:0dcde3a6`;
* **T19:** 42 terms have been flushed from index for command in T18;
---
### 5️⃣ Sonic Channel (Control mode)
_The Sonic Channel Control mode is used for administration purposes. Once in this mode, you cannot switch to other modes or gain access to commands from other modes._
**➡️ Available commands:**
* `TRIGGER`: trigger an action (syntax: `TRIGGER [<action>]? [<data>]?`; time complexity: `O(1)`)
* `INFO`: get server information (syntax: `INFO`; time complexity: `O(1)`)
* `PING`: ping server (syntax: `PING`; time complexity: `O(1)`)
* `HELP`: show help (syntax: `HELP [<manual>]?`; time complexity: `O(1)`)
* `QUIT`: stop connection (syntax: `QUIT`; time complexity: `O(1)`)
**⏩ Syntax terminology:**
* `<action>`: action to be triggered (available actions: `consolidate`, `backup`, `restore`);
* `<data>`: additional data to provide to the action (required for: `backup`, `restore`);
* `<manual>`: help manual to be shown (available manuals: `commands`);
**⬇️ Control flow example (via `telnet`):**
```bash
T1: telnet sonic.local 1491
T2: Trying ::1...
T3: Connected to sonic.local.
T4: Escape character is '^]'.
T5: CONNECTED <sonic-server v1.0.0>
T6: START control SecretPassword
T7: STARTED control protocol(1) buffer(20000)
T8: TRIGGER consolidate
T9: OK
T10: PING
T11: PONG
T12: QUIT
T13: ENDED quit
T14: Connection closed by foreign host.
```
_Notes on what happens:_
* **T6:** we enter `control` mode (this is required to enable `control` commands);
* **T8:** we trigger a database consolidation (instead of waiting for the next automated consolidation tick);
================================================
FILE: README.md
================================================
Sonic
=====
[](https://github.com/valeriansaliou/sonic/actions?query=workflow%3A%22Test+and+Build%22) [](https://github.com/valeriansaliou/sonic/actions?query=workflow%3A%22Build+and+Release%22) [](https://deps.rs/repo/github/valeriansaliou/sonic) [](https://www.buymeacoffee.com/valeriansaliou)
**Sonic is a fast, lightweight and schema-less search backend. It ingests search texts and identifier tuples that can then be queried against in a microsecond's time.**
Sonic can be used as a simple alternative to super-heavy and full-featured search backends such as Elasticsearch in some use-cases. It is capable of normalizing natural language search queries, auto-completing a search query and providing the most relevant results for a query. Sonic is an identifier index, rather than a document index; when queried, it returns IDs that can then be used to refer to the matched documents in an external database.
A strong attention to performance and code cleanliness has been given when designing Sonic. It aims at being crash-free, super-fast and puts minimum strain on server resources (our measurements have shown that Sonic - when under load - responds to search queries in the μs range, eats ~30MB RAM and has a low CPU footprint; [see our benchmarks](https://github.com/valeriansaliou/sonic#how-fast--lightweight-is-it)).
_Tested at Rust version: `rustc 1.74.1 (a28077b28 2023-12-04)`_
**🇫🇷 Crafted in Nantes, France.**
**:newspaper: The Sonic project was initially announced in [a post on my personal journal](https://journal.valeriansaliou.name/announcing-sonic-a-super-light-alternative-to-elasticsearch/).**

> _« Sonic » is the mascot of the Sonic project. I drew it to look like a psychedelic hipster hedgehog._
## Who uses it?
<table>
<tr>
<td align="center"><a href="https://crisp.chat/"><img src="https://valeriansaliou.github.io/sonic/images/logo-crisp.png" width="64" /></a></td>
<td align="center"><a href="https://scrumpy.io/"><img src="https://valeriansaliou.github.io/sonic/images/logo-scrumpy.png" width="64" /></a></td>
</tr>
<tr>
<td align="center">Crisp</td>
<td align="center">Scrumpy</td>
</tr>
</table>
_👋 You use Sonic and you want to be listed there? [Contact me](https://valeriansaliou.name/)._
## Demo
Sonic is integrated in all Crisp search products on the [Crisp](https://crisp.chat/) platform. It is used to index half a billion objects on a $5/mth 1-vCPU SSD cloud server (as of 2019). Crisp users use it to search in their messages, conversations, contacts, helpdesk articles and more.
**You can test Sonic live on: [Crisp Helpdesk](https://help.crisp.chat/), and get an idea of the speed and relevance of Sonic search results. You can also test search suggestions from there: start typing at least 2 characters for a word, and get suggested a full word (press the tab key to expand suggestion). _Both search and suggestions are powered by Sonic._**

> _Sonic fuzzy search in helpdesk articles at its best. Lookup for any word or group of terms, get results instantly._
## Features
* **Search terms are stored in collections, organized in buckets**; you may use a single bucket, or a bucket per user on your platform if you need to search in separate indexes.
* **Search results return object identifiers**, that can be resolved from an external database if you need to enrich the search results. This makes Sonic a simple word index, that points to identifier results. Sonic doesn't store any direct textual data in its index, but it still holds a word graph for auto-completion and typo corrections.
* **Search query typos are corrected** if there are not enough exact-match results for a given word in a search query, Sonic tries to correct the word and tries against alternate words. You're allowed to make mistakes when searching.
* **Insert and remove items in the index**; index-altering operations are light and can be committed to the server while it is running. A background tasker handles the job of consolidating the index so that the entries you have pushed or popped are quickly made available for search.
* **Auto-complete any word** in real-time via the suggest operation. This helps build a snappy word suggestion feature in your end-user search interface.
* **Full Unicode compatibility** on 80+ most spoken languages in the world. Sonic removes useless stop words from any text (eg. 'the' in English), after guessing the text language. This ensures any searched or ingested text is clean before it hits the index; [see languages](https://github.com/valeriansaliou/sonic#which-text-languages-are-supported).
* **Simple protocol (Sonic Channel)**, that let you search your index, manage data ingestion (push in the index, pop from the index, flush a collection, flush a bucket, etc.) and perform administrative actions. Sonic Channel was designed to be lightweight on resources and simple to integrate with; [read protocol specification](https://github.com/valeriansaliou/sonic/blob/master/PROTOCOL.md).
* **Easy-to-use libraries**, that let you connect to Sonic from your apps; [see libraries](https://github.com/valeriansaliou/sonic#-sonic-channel-libraries).
## How to use it?
### Installation
Sonic is built in Rust. To install it, either download a version from the [Sonic releases](https://github.com/valeriansaliou/sonic/releases) page, use `cargo install` or pull the source code from `master`.
👉 _Each release binary comes with an `.asc` signature file, which can be verified using [@valeriansaliou](https://github.com/valeriansaliou) GPG public key: [:key:valeriansaliou.gpg.pub.asc](https://valeriansaliou.name/files/keys/valeriansaliou.gpg.pub.asc)._
**👉 Install from packages:**
Sonic provides [pre-built packages](https://packagecloud.io/valeriansaliou/sonic) for Debian-based systems (Debian, Ubuntu, etc.).
**Important: Sonic only provides 64 bits packages targeting Debian 12 for now (codename: `bookworm`). You might still be able to use them on other Debian versions, as well as Ubuntu (although they rely on a specific `glibc` version that might not be available on older or newer systems).**
First, add the Sonic APT repository (eg. for Debian `bookworm`):
```bash
echo "deb [signed-by=/usr/share/keyrings/valeriansaliou_sonic.gpg] https://packagecloud.io/valeriansaliou/sonic/debian/ bookworm main" > /etc/apt/sources.list.d/valeriansaliou_sonic.list
```
```bash
curl -fsSL https://packagecloud.io/valeriansaliou/sonic/gpgkey | gpg --dearmor -o /usr/share/keyrings/valeriansaliou_sonic.gpg
```
```bash
apt-get update
```
Then, install the Sonic package:
```bash
apt-get install sonic
```
Then, edit the pre-filled Sonic configuration file:
```bash
nano /etc/sonic.cfg
```
Finally, restart Sonic:
```
service sonic restart
```
**👉 Install from source:**
If you pulled the source code from Git, you can build it using `cargo`:
```bash
cargo build --release
```
You can find the built binaries in the `./target/release` directory.
_Install `build-essential`, `clang`, `libclang-dev`, `libc6-dev`, `g++` and `llvm-dev` to be able to compile the required RocksDB dependency._
Note that the following optional features can be enabled upon building Sonic: `allocator-jemalloc`, `tokenizer-chinese` and `tokenizer-japanese` (some might be already enabled by default).
**👉 Install from Cargo:**
You can install Sonic directly with `cargo install`:
```bash
cargo install sonic-server
```
Ensure that your `$PATH` is properly configured to source the Crates binaries, and then run Sonic using the `sonic` command.
_Install `build-essential`, `clang`, `libclang-dev`, `libc6-dev`, `g++` and `llvm-dev` to be able to compile the required RocksDB dependency._
**👉 Install from Docker Hub:**
You might find it convenient to run Sonic via Docker. You can find the pre-built Sonic image on Docker Hub as [valeriansaliou/sonic](https://hub.docker.com/r/valeriansaliou/sonic/).
First, pull the `valeriansaliou/sonic` image:
```bash
docker pull valeriansaliou/sonic:v1.4.9
```
Then, seed it a configuration file and run it (replace `/path/to/your/sonic/config.cfg` with the path to your configuration file):
```bash
docker run -p 1491:1491 -v /path/to/your/sonic/config.cfg:/etc/sonic.cfg -v /path/to/your/sonic/store/:/var/lib/sonic/store/ valeriansaliou/sonic:v1.4.9
```
In the configuration file, ensure that:
* `channel.inet` is set to `0.0.0.0:1491` (this lets Sonic be reached from outside the container)
* `store.kv.path` is set to `/var/lib/sonic/store/kv/` (this lets the external KV store directory be reached by Sonic)
* `store.fst.path` is set to `/var/lib/sonic/store/fst/` (this lets the external FST store directory be reached by Sonic)
Sonic will be reachable from `tcp://localhost:1491`.
**👉 Install from another source (non-official):**
Other installation sources are available:
* **Homebrew (macOS)**: `brew install sonic` ([see formula](https://formulae.brew.sh/formula/sonic))
_Note that those sources are non-official, meaning that they are not owned nor maintained by the Sonic project owners. The latest Sonic version available on those sources might be outdated, in comparison to the latest version available through the Sonic project._
### Configuration
Use the sample [config.cfg](https://github.com/valeriansaliou/sonic/blob/master/config.cfg) configuration file and adjust it to your own environment.
_If you are looking to fine-tune your configuration, you may read our [detailed configuration documentation](https://github.com/valeriansaliou/sonic/blob/master/CONFIGURATION.md)._
### Run Sonic
Sonic can be run as such:
`./sonic -c /path/to/config.cfg`
## Perform searches and manage objects
Both searches and object management (i.e. data ingestion) is handled via the Sonic Channel protocol only. As we want to keep things simple with Sonic (similarly to how Redis does it), Sonic does not offer a HTTP endpoint or similar; connecting via Sonic Channel is the way to go when you need to interact with the Sonic search database.
Sonic distributes official libraries, that let you integrate Sonic to your apps easily. Click on a library below to see library integration documentation and code.
_If you are looking for details on the raw Sonic Channel TCP-based protocol, you can read our [detailed protocol documentation](https://github.com/valeriansaliou/sonic/blob/master/PROTOCOL.md). It can prove handy if you are looking to code your own Sonic Channel library._
### 📦 Sonic Channel Libraries
#### 1️⃣ Official Libraries
Sonic distributes official Sonic integration libraries for your programming language (official means that those libraries have been reviewed and validated by a core maintainer):
* **NodeJS**:
* **[node-sonic-channel](https://www.npmjs.com/package/sonic-channel)** by [@valeriansaliou](https://github.com/valeriansaliou)
* **PHP**:
* **[psonic](https://github.com/ppshobi/psonic)** by [@ppshobi](https://github.com/ppshobi)
* **Rust**:
* **[sonic-channel](https://github.com/pleshevskiy/sonic-channel)** by [@pleshevskiy](https://github.com/pleshevskiy)
#### 2️⃣ Community Libraries
You can find below a list of Sonic integrations provided by the community (many thanks to them!):
* **Rust**:
* **[sonic_client](https://github.com/FrontMage/sonic_client)** by [@FrontMage](https://github.com/FrontMage)
* **Python**:
* **[asonic](https://github.com/moshe/asonic)** by [@moshe](https://github.com/moshe)
* **[python-sonic-client](https://github.com/xmonader/python-sonic-client)** by [@xmonader](https://github.com/xmonader)
* **[pysonic-channel](https://github.com/AlongWY/pysonic)** by [@AlongWY](https://github.com/AlongWY)
* **Ruby**:
* **[sonic-ruby](https://github.com/atipugin/sonic-ruby)** by [@atipugin](https://github.com/atipugin)
* **Go**:
* **[go-sonic](https://github.com/expectedsh/go-sonic)** by [@alexisvisco](https://github.com/alexisvisco)
* **[go-sonic](https://github.com/OGKevin/go-sonic)** by [@OGKevin](https://github.com/OGKevin)
* **PHP**:
* **[php-sonic](https://github.com/php-sonic/php-sonic)** by [@touhonoob](https://github.com/touhonoob)
* **[laravel-scout-sonic](https://github.com/james2doyle/laravel-scout-sonic)** by [@james2doyle](https://github.com/james2doyle)
* **Java**:
* **[java-sonic](https://github.com/twohou/java-sonic)** by [@touhonoob](https://github.com/touhonoob)
* **[jsonic](https://github.com/alohaking/jsonic)** by [@alohaking](https://github.com/alohaking)
* **Deno**:
* **[deno-sonic](https://github.com/erfanium/deno_sonic)** by [@erfanium](https://github.com/erfanium)
* **Bun**:
* **[sonic-bun](https://github.com/emilianscheel/sonic-bun)** by [@emilianscheel](https://github.com/emilianscheel)
* **Elixir**:
* **[sonix](https://github.com/imerkle/sonix)** by [@imerkle](https://github.com/imerkle)
* **Crystal**:
* **[sonic-crystal](https://github.com/babelian/sonic-crystal)** by [@babelian](https://github.com/babelian)
* **Nim**:
* **[nim-sonic-client](https://github.com/xmonader/nim-sonic-client)** by [@xmonader](https://github.com/xmonader)
* **.NET**:
* **[nsonic](https://github.com/spikensbror-dotnet/nsonic)** by [@spikensbror](https://github.com/spikensbror)
_ℹ️ Cannot find the library for your programming language? Build your own and be referenced here! ([contact me](https://valeriansaliou.name/))_
## Which text languages are supported?
Sonic supports a wide range of languages in its lexing system. If a language is not in this list, you will still be able to push this language to the search index, but stop-words will not be eluded, which could lead to lower-quality search results.
**The languages supported by the lexing system are:**
* 🇿🇦 Afrikaans
* 🇸🇦 Arabic
* 🇦🇲 Armenian
* 🇦🇿 Azerbaijani
* 🇧🇩 Bengali
* 🇧🇬 Bulgarian
* 🇲🇲 Burmese
* 🏳 Catalan
* 🇨🇳 Chinese (Simplified)
* 🇹🇼 Chinese (Traditional)
* 🇭🇷 Croatian
* 🇨🇿 Czech
* 🇩🇰 Danish
* 🇳🇱 Dutch
* 🇬🇧 English
* 🏳 Esperanto
* 🇪🇪 Estonian
* 🇫🇮 Finnish
* 🇫🇷 French
* 🇬🇪 Georgian
* 🇩🇪 German
* 🇬🇷 Greek
* 🇮🇳 Gujarati
* 🇮🇱 Hebrew
* 🇮🇳 Hindi
* 🇭🇺 Hungarian
* 🇮🇩 Indonesian
* 🇮🇹 Italian
* 🇯🇵 Japanese
* 🇮🇳 Kannada
* 🇰🇭 Khmer
* 🇰🇷 Korean
* 🏳 Latin
* 🇱🇻 Latvian
* 🇱🇹 Lithuanian
* 🇮🇳 Malayalam
* 🇮🇳 Marathi
* 🇳🇵 Nepali
* 🇮🇷 Persian
* 🇵🇱 Polish
* 🇵🇹 Portuguese
* 🇮🇳 Punjabi
* 🇷🇺 Russian
* 🇷🇸 Serbian
* 🇸🇰 Slovak
* 🇸🇮 Slovene
* 🇪🇸 Spanish
* 🇸🇪 Swedish
* 🇵🇭 Tagalog
* 🇮🇳 Tamil
* 🇹🇭 Thai
* 🇹🇷 Turkish
* 🇺🇦 Ukrainian
* 🇵🇰 Urdu
* 🇻🇳 Vietnamese
* 🇮🇱 Yiddish
* 🇿🇦 Zulu
## How fast & lightweight is it?
Sonic was built for [Crisp](https://crisp.chat/) from the start. As Crisp was growing and indexing more and more search data into a full-text search SQL database, we decided it was time to switch to a proper search backend system. When reviewing Elasticsearch (ELS) and others, we found those were full-featured heavyweight systems that did not scale well with Crisp's freemium-based cost structure.
At the end, we decided to build our own search backend, designed to be simple and lightweight on resources.
You can run function-level benchmarks with the command: `cargo bench --features benchmark`
### 👩🔬 Benchmark #1
#### ➡️ Scenario
We performed an extract of all messages from the Crisp team used for [Crisp](https://crisp.chat/) own customer support.
We want to import all those messages into a clean Sonic instance, and then perform searches on the index we built. We will measure the time that Sonic spent executing each operation (ie. each `PUSH` and `QUERY` commands over Sonic Channel), and group results per 1,000 operations (this outputs a mean time per 1,000 operations).
#### ➡️ Context
**Our benchmark is ran on the following computer:**
* **Device**: MacBook Pro (Retina, 15-inch, Mid 2014)
* **OS**: MacOS 10.14.3
* **Disk**: 512GB SSD (formatted under the AFS file system)
* **CPU**: 2.5 GHz Intel Core i7
* **RAM**: 16 GB 1600 MHz DDR3
**Sonic is compiled as following:**
* **Sonic version**: 1.0.1
* **Rustc version**: `rustc 1.35.0-nightly (719b0d984 2019-03-13)`
* **Compiler flags**: `release` profile (`-03` with `lto`)
**Our dataset is as such:**
* **Number of objects**: ~1,000,000 messages
* **Total size**: ~100MB of raw message text (this does not account for identifiers and other metas)
#### ➡️ Scripts
**The scripts we used to perform the benchmark are:**
1. **PUSH script**: [sonic-benchmark_batch-push.js](https://gist.github.com/valeriansaliou/e5ab737b28601ebd70483f904d21aa09)
2. **QUERY script**: [sonic-benchmark_batch-query.js](https://gist.github.com/valeriansaliou/3ef8315d7282bd173c2cb9eba64fa739)
#### ⏬ Results
**Our findings:**
* We imported ~1,000,000 messages of dynamic length (some very long, eg. emails);
* Once imported, the search index weights 20MB (KV) + 1.4MB (FST) on disk;
* CPU usage during import averaged 75% of a single CPU core;
* RAM usage for the Sonic process peaked at 28MB during our benchmark;
* We used a single Sonic Channel TCP connection, which limits the import to a single thread (we could have load-balanced this across as many Sonic Channel connections as there are CPUs);
* We get an import RPS approaching 4,000 operations per second (per thread);
* We get a search query RPS approaching 1,000 operations per second (per thread);
* On the hyper-threaded 4-cores CPU used, we could have parallelized operations to 8 virtual cores, thus theoretically increasing the import RPS to 32,000 operations / second, while the search query RPS would be increased to 8,000 operations / second (we may be SSD-bound at some point though);
**Compared results per operation (on a single object):**
We took a sample of 8 results from our batched operations, which produced a total of 1,000 results (1,000,000 items, with 1,000 items batched per measurement report).
_This is not very scientific, but it should give you a clear idea of Sonic performances._
**Time spent per operation:**
Operation | Average | Best | Worst
--------- | ------- | ----- | -----
PUSH | 275μs | 190μs | 363μs
QUERY | 880μs | 852μs | 1ms
**Batch PUSH results as seen from our terminal (from initial index of: 0 objects):**

**Batch QUERY results as seen from our terminal (on index of: 1,000,000 objects):**

## Limitations
* **Indexed data limits**: Sonic is designed for large search indexes split over thousands of search buckets per collection. An IID (ie. Internal-ID) is stored in the index as a 32 bits number, which theoretically allow up to ~4.2 billion objects to be indexed (ie. OID) per bucket. We've observed storage savings of 30% to 40%, which justifies the trade-off on large databases (versus Sonic using 64 bits IIDs). Also, Sonic only keeps the N most recently pushed results for a given word, in a sliding window way (the sliding window width can be configured).
* **Search query limits**: Sonic Natural Language Processing system (NLP) does not work at the sentence-level, for storage compactness reasons (we keep the FST graph shallow as to reduce time and space complexity). It works at the word-level, and is thus able to search per-word and can predict a word based on user input, though it is unable to predict the next word in a sentence.
* **Real-time limits**: the FST needs to be rebuilt every time a word is pushed or popped from the bucket graph. As this is quite heavy, Sonic batches rebuild cycles. If you have just pushed a new word to the index and you are not seeing it in the `SUGGEST` command yet, wait for the next rebuild cycle to kick-in, or force it with `TRIGGER consolidate` in a `control` channel.
* **Interoperability limits**: The Sonic Channel protocol is the only way to read and write search entries to the Sonic search index. Sonic does not expose any HTTP API. Sonic Channel has been designed with performance and minimal network footprint in mind. If you need to access Sonic from an unsupported programming language, you can either [open an issue](https://github.com/valeriansaliou/sonic/issues/new) or look at the reference [node-sonic-channel](https://github.com/valeriansaliou/node-sonic-channel) implementation and build it in your target programming language.
* **Hardware limits**: Sonic performs the search on the file-system directly; ie. it does not fit the index in RAM. A search query results in a lot of random accesses on the disk, which means that it will be quite slow on old-school HDDs and super-fast on newer SSDs. Do store the Sonic database on SSD-backed file systems only.
## :fire: Report A Vulnerability
If you find a vulnerability in Sonic, you are more than welcome to report it directly to [@valeriansaliou](https://github.com/valeriansaliou) by sending an encrypted email to [valerian@valeriansaliou.name](mailto:valerian@valeriansaliou.name). Do not report vulnerabilities in public GitHub issues, as they may be exploited by malicious people to target production servers running an unpatched Sonic instance.
**:warning: You must encrypt your email using [@valeriansaliou](https://github.com/valeriansaliou) GPG public key: [:key:valeriansaliou.gpg.pub.asc](https://valeriansaliou.name/files/keys/valeriansaliou.gpg.pub.asc).**
================================================
FILE: config.cfg
================================================
# Sonic
# Fast, lightweight and schema-less search backend
# Configuration file
# Example: https://github.com/valeriansaliou/sonic/blob/master/config.cfg
[server]
log_level = "debug"
[channel]
inet = "[::1]:1491"
tcp_timeout = 300
auth_password = "SecretPassword"
[channel.search]
query_limit_default = 10
query_limit_maximum = 100
query_alternates_try = 4
suggest_limit_default = 5
suggest_limit_maximum = 20
list_limit_default = 100
list_limit_maximum = 500
[store]
[store.kv]
path = "./data/store/kv/"
retain_word_objects = 1000
[store.kv.pool]
inactive_after = 1800
[store.kv.database]
flush_after = 900
compress = true
parallelism = 2
max_files = 100
max_compactions = 1
max_flushes = 1
write_buffer = 16384
write_ahead_log = true
[store.fst]
path = "./data/store/fst/"
[store.fst.pool]
inactive_after = 300
[store.fst.graph]
consolidate_after = 180
max_size = 2048
max_words = 250000
================================================
FILE: debian/changelog
================================================
sonic (0.0.0-1) UNRELEASED; urgency=medium
* Initial release.
-- Valerian Saliou <valerian@valeriansaliou.name> Tue, 31 Aug 2023 12:00:00 +0000
================================================
FILE: debian/compat
================================================
10
================================================
FILE: debian/control
================================================
Source: sonic
Section: net
Priority: ext
Maintainer: Valerian Saliou <valerian@valeriansaliou.name>
Standards-Version: 3.9.4
Build-Depends: wget, ca-certificates
Homepage: https://github.com/valeriansaliou/sonic
Package: sonic
Architecture: any
Depends: adduser
Provides: sonic
Description: Fast, lightweight & schema-less search backend. An alternative to Elasticsearch that runs on a few MBs of RAM.
================================================
FILE: debian/copyright
================================================
Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
Upstream-Name: sonic
Upstream-Contact: Valerian Saliou <valerian@valeriansaliou.name>
Source: https://github.com/valeriansaliou/sonic
Files: *
Copyright: 2023 Valerian Saliou
License: MPL-2
License: MPL-2
This Source Code Form is subject to the terms of the Mozilla Public License,
v. 2.0. If a copy of the MPL was not distributed with this file,
You can obtain one at http://mozilla.org/MPL/2.0/.
================================================
FILE: debian/rules
================================================
#!/usr/bin/make -f
DISTRIBUTION = $(shell lsb_release -sr)
VERSION = 1.4.9
PACKAGEVERSION = $(VERSION)-0~$(DISTRIBUTION)0
URL = https://github.com/valeriansaliou/sonic/releases/download/v$(VERSION)/
%:
dh $@ --with systemd
override_dh_auto_clean:
override_dh_auto_test:
override_dh_auto_build:
override_dh_auto_install:
$(eval ENV_ARCH := $(shell dpkg --print-architecture))
$(eval ENV_ISA := $(shell if [ "$(ENV_ARCH)" = "amd64" ]; then echo "x86_64"; else echo "$(ENV_ARCH)"; fi))
$(eval ENV_TARBALL := v$(VERSION)-$(ENV_ISA)-gnu.tar.gz)
echo "Architecture: $(ENV_ARCH)"
echo "Instruction Set: $(ENV_ISA)"
echo "Target: $(URL)$(ENV_TARBALL)"
wget -N --progress=dot:mega $(URL)$(ENV_TARBALL)
tar -xf $(ENV_TARBALL)
strip sonic/sonic
mv sonic/config.cfg sonic/sonic.cfg
mkdir sonic/store/
sed -i 's/path = ".\/data\/store\//path = "\/var\/lib\/sonic\/store\//g' sonic/sonic.cfg
override_dh_gencontrol:
dh_gencontrol -- -v$(PACKAGEVERSION)
================================================
FILE: debian/sonic.install
================================================
sonic/sonic usr/bin/
sonic/sonic.cfg etc/
sonic/store/ var/lib/sonic/
================================================
FILE: debian/sonic.postinst
================================================
#!/bin/sh
set -e
case "$1" in
configure)
adduser --system --disabled-password --disabled-login --home /var/empty \
--no-create-home --quiet --group sonic && \
chown sonic:sonic -R /var/lib/sonic/
;;
esac
#DEBHELPER#
exit 0
================================================
FILE: debian/sonic.service
================================================
[Unit]
Description=Sonic Search Index
After=network.target
[Service]
Type=simple
User=sonic
Group=sonic
ExecStart=/usr/bin/sonic -c /etc/sonic.cfg
Restart=on-failure
LimitNOFILE=infinity
[Install]
WantedBy=multi-user.target
================================================
FILE: debian/source/format
================================================
3.0 (quilt)
================================================
FILE: scripts/build_packages.sh
================================================
#!/bin/bash
##
# Sonic
#
# Fast, lightweight and schema-less search backend
# Copyright: 2023, Valerian Saliou <valerian@valeriansaliou.name>
# License: Mozilla Public License v2.0 (MPL v2.0)
##
# Define build pipeline
function build_for_target {
OS="$2" DIST="$3" ARCH="$1" ./packpack/packpack
release_result=$?
if [ $release_result -eq 0 ]; then
mkdir -p "./packages/$2_$3/"
mv ./build/*$4 "./packages/$2_$3/"
echo "Result: Packaged architecture: $1 for OS: $2:$3 (*$4)"
fi
return $release_result
}
# Run release tasks
ABSPATH=$(cd "$(dirname "$0")"; pwd)
BASE_DIR="$ABSPATH/../"
rc=0
pushd "$BASE_DIR" > /dev/null
echo "Executing packages build steps for Sonic..."
# Initialize `packpack`
rm -rf ./packpack && \
git clone https://github.com/packpack/packpack.git packpack
rc=$?
# Proceed build for each target?
if [ $rc -eq 0 ]; then
build_for_target "x86_64" "debian" "bookworm" ".deb"
rc=$?
fi
# Cleanup environment
rm -rf ./build ./packpack
if [ $rc -eq 0 ]; then
echo "Success: Done executing packages build steps for Sonic"
else
echo "Error: Failed executing packages build steps for Sonic"
fi
popd > /dev/null
exit $rc
================================================
FILE: scripts/release_binaries.sh
================================================
#!/bin/bash
##
# Sonic
#
# Fast, lightweight and schema-less search backend
# Copyright: 2023, Valerian Saliou <valerian@valeriansaliou.name>
# License: Mozilla Public License v2.0 (MPL v2.0)
##
# Read arguments
while [ "$1" != "" ]; do
argument_key=`echo $1 | awk -F= '{print $1}'`
argument_value=`echo $1 | awk -F= '{print $2}'`
case $argument_key in
-v | --version)
# Notice: strip any leading 'v' to the version number
SONIC_VERSION="${argument_value/v}"
;;
*)
echo "Unknown argument received: '$argument_key'"
exit 1
;;
esac
shift
done
# Ensure release version is provided
if [ -z "$SONIC_VERSION" ]; then
echo "No Sonic release version was provided, please provide it using '--version'"
exit 1
fi
# Define release pipeline
function release_for_architecture {
final_tar="v$SONIC_VERSION-$1-$2.tar.gz"
rm -rf ./sonic/ && \
cargo build --target "$3" --release && \
mkdir ./sonic && \
cp -p "target/$3/release/sonic" ./sonic/ && \
cp -r ./config.cfg sonic/ && \
tar --owner=0 --group=0 -czvf "$final_tar" ./sonic && \
rm -r ./sonic/
release_result=$?
if [ $release_result -eq 0 ]; then
echo "Result: Packed architecture: $1 ($2) to file: $final_tar"
fi
return $release_result
}
# Run release tasks
ABSPATH=$(cd "$(dirname "$0")"; pwd)
BASE_DIR="$ABSPATH/../"
rc=0
pushd "$BASE_DIR" > /dev/null
echo "Executing release steps for Sonic v$SONIC_VERSION..."
release_for_architecture "x86_64" "gnu" "x86_64-unknown-linux-gnu"
rc=$?
if [ $rc -eq 0 ]; then
echo "Success: Done executing release steps for Sonic v$SONIC_VERSION"
else
echo "Error: Failed executing release steps for Sonic v$SONIC_VERSION"
fi
popd > /dev/null
exit $rc
================================================
FILE: scripts/sign_binaries.sh
================================================
#!/bin/bash
##
# Sonic
#
# Fast, lightweight and schema-less search backend
# Copyright: 2023, Valerian Saliou <valerian@valeriansaliou.name>
# License: Mozilla Public License v2.0 (MPL v2.0)
##
# Read arguments
while [ "$1" != "" ]; do
argument_key=`echo $1 | awk -F= '{print $1}'`
argument_value=`echo $1 | awk -F= '{print $2}'`
case $argument_key in
-v | --version)
# Notice: strip any leading 'v' to the version number
SONIC_VERSION="${argument_value/v}"
;;
*)
echo "Unknown argument received: '$argument_key'"
exit 1
;;
esac
shift
done
# Ensure release version is provided
if [ -z "$SONIC_VERSION" ]; then
echo "No Sonic release version was provided, please provide it using '--version'"
exit 1
fi
# Define sign pipeline
function sign_for_architecture {
final_tar="v$SONIC_VERSION-$1-$2.tar.gz"
gpg_signer="valerian@valeriansaliou.name"
gpg -u "$gpg_signer" --armor --detach-sign "$final_tar"
sign_result=$?
if [ $sign_result -eq 0 ]; then
echo "Result: Signed architecture: $1 ($2) for file: $final_tar"
fi
return $sign_result
}
# Run sign tasks
ABSPATH=$(cd "$(dirname "$0")"; pwd)
BASE_DIR="$ABSPATH/../"
rc=0
pushd "$BASE_DIR" > /dev/null
echo "Executing sign steps for Sonic v$SONIC_VERSION..."
sign_for_architecture "x86_64" "gnu"
rc=$?
if [ $rc -eq 0 ]; then
echo "Success: Done executing sign steps for Sonic v$SONIC_VERSION"
else
echo "Error: Failed executing sign steps for Sonic v$SONIC_VERSION"
fi
popd > /dev/null
exit $rc
================================================
FILE: src/channel/command.rs
================================================
// Sonic
//
// Fast, lightweight and schema-less search backend
// Copyright: 2019, Valerian Saliou <valerian@valeriansaliou.name>
// License: Mozilla Public License v2.0 (MPL v2.0)
use hashbrown::HashMap;
use rand::distributions::Alphanumeric;
use rand::{thread_rng, Rng};
use std::fmt;
use std::path::Path;
use std::str::{self, SplitWhitespace};
use std::vec::Vec;
use super::format::unescape;
use super::statistics::ChannelStatistics;
use crate::query::builder::{QueryBuilder, QueryBuilderResult};
use crate::query::types::{
ListMetaData, QueryGenericLang, QueryMetaData, QuerySearchLimit, QuerySearchOffset,
};
use crate::store::fst::StoreFSTPool;
use crate::store::kv::StoreKVPool;
use crate::store::operation::StoreOperationDispatch;
use crate::APP_CONF;
#[derive(PartialEq)]
pub enum ChannelCommandError {
UnknownCommand,
NotFound,
QueryError,
InternalError,
ShuttingDown,
PolicyReject(&'static str),
InvalidFormat(&'static str),
InvalidMetaKey((String, String)),
InvalidMetaValue((String, String)),
}
#[derive(PartialEq)]
pub enum ChannelCommandResponse {
Void,
Ok,
Pong,
Pending(String),
Result(String),
Event(&'static str, String, String),
Ended(&'static str),
Err(ChannelCommandError),
}
pub struct ChannelCommandBase;
pub struct ChannelCommandSearch;
pub struct ChannelCommandIngest;
pub struct ChannelCommandControl;
pub type ChannelCommandResponseArgs = (&'static str, Option<Vec<String>>);
type ChannelResult = Result<Vec<ChannelCommandResponse>, ChannelCommandError>;
type MetaPartsResult<'a> = Result<(&'a str, &'a str), (&'a str, &'a str)>;
pub const EVENT_ID_SIZE: usize = 8;
const TEXT_PART_BOUNDARY: char = '"';
const TEXT_PART_ESCAPE: char = '\\';
const META_PART_GROUP_OPEN: char = '(';
const META_PART_GROUP_CLOSE: char = ')';
static BACKUP_KV_PATH: &str = "kv";
static BACKUP_FST_PATH: &str = "fst";
lazy_static! {
pub static ref COMMANDS_MODE_SEARCH: Vec<&'static str> =
vec!["QUERY", "SUGGEST", "LIST", "PING", "HELP", "QUIT"];
pub static ref COMMANDS_MODE_INGEST: Vec<&'static str> =
vec!["PUSH", "POP", "COUNT", "FLUSHC", "FLUSHB", "FLUSHO", "PING", "HELP", "QUIT"];
pub static ref COMMANDS_MODE_CONTROL: Vec<&'static str> =
vec!["TRIGGER", "INFO", "PING", "HELP", "QUIT"];
pub static ref CONTROL_TRIGGER_ACTIONS: Vec<&'static str> =
vec!["consolidate", "backup", "restore"];
static ref MANUAL_MODE_SEARCH: HashMap<&'static str, &'static Vec<&'static str>> =
[("commands", &*COMMANDS_MODE_SEARCH)]
.iter()
.cloned()
.collect();
static ref MANUAL_MODE_INGEST: HashMap<&'static str, &'static Vec<&'static str>> =
[("commands", &*COMMANDS_MODE_INGEST)]
.iter()
.cloned()
.collect();
static ref MANUAL_MODE_CONTROL: HashMap<&'static str, &'static Vec<&'static str>> =
[("commands", &*COMMANDS_MODE_CONTROL)]
.iter()
.cloned()
.collect();
}
impl ChannelCommandResponse {
pub fn to_args(&self) -> ChannelCommandResponseArgs {
// Convert internal response to channel response arguments; this either gives 'RESPONSE' \
// or 'RESPONSE <value:1> <value:2> <..>' whether there are values or not.
match *self {
ChannelCommandResponse::Void => ("", None),
ChannelCommandResponse::Ok => ("OK", None),
ChannelCommandResponse::Pong => ("PONG", None),
ChannelCommandResponse::Pending(ref id) => ("PENDING", Some(vec![id.to_owned()])),
ChannelCommandResponse::Result(ref id) => ("RESULT", Some(vec![id.to_owned()])),
ChannelCommandResponse::Event(ref query, ref id, ref payload) => (
"EVENT",
Some(vec![query.to_string(), id.to_owned(), payload.to_owned()]),
),
ChannelCommandResponse::Ended(reason) => ("ENDED", Some(vec![reason.to_owned()])),
ChannelCommandResponse::Err(ref reason) => ("ERR", Some(vec![reason.to_string()])),
}
}
}
impl ChannelCommandBase {
pub fn dispatch_ping(mut parts: SplitWhitespace) -> ChannelResult {
match parts.next() {
None => Ok(vec![ChannelCommandResponse::Pong]),
_ => Err(ChannelCommandError::InvalidFormat("PING")),
}
}
pub fn dispatch_quit(mut parts: SplitWhitespace) -> ChannelResult {
match parts.next() {
None => Ok(vec![ChannelCommandResponse::Ended("quit")]),
_ => Err(ChannelCommandError::InvalidFormat("QUIT")),
}
}
pub fn generic_dispatch_help(
mut parts: SplitWhitespace,
manuals: &HashMap<&str, &Vec<&str>>,
) -> ChannelResult {
match (parts.next(), parts.next()) {
(None, _) => {
let manual_list = manuals.keys().map(|k| k.to_owned()).collect::<Vec<&str>>();
Ok(vec![ChannelCommandResponse::Result(format!(
"manuals({})",
manual_list.join(", ")
))])
}
(Some(manual_key), next_part) => {
if next_part.is_none() {
if let Some(manual_data) = manuals.get(manual_key) {
Ok(vec![ChannelCommandResponse::Result(format!(
"{}({})",
manual_key,
manual_data.join(", ")
))])
} else {
Err(ChannelCommandError::NotFound)
}
} else {
Err(ChannelCommandError::InvalidFormat("HELP [<manual>]?"))
}
}
}
}
pub fn parse_text_parts(parts: &mut SplitWhitespace) -> Option<String> {
// Parse text parts and nest them together
let mut text_raw = String::new();
for text_part in parts {
if !text_raw.is_empty() {
text_raw.push(' ');
}
text_raw.push_str(text_part);
// End reached? (ie. got boundary character)
let text_part_bytes = text_part.as_bytes();
let text_part_bound = text_part_bytes.len();
if text_raw.len() > 1
&& text_part_bytes[text_part_bound - 1] as char == TEXT_PART_BOUNDARY
{
// Count the total amount of escape characters before escape (check if escape \
// characters are also being escaped, or not)
let mut count_escapes = 0;
if text_part_bound > 1 {
for index in (0..text_part_bound - 1).rev() {
if text_part_bytes[index] as char != TEXT_PART_ESCAPE {
break;
}
count_escapes += 1
}
}
// Boundary is not escaped, we can stop there.
if count_escapes == 0 || (count_escapes % 2 == 0) {
break;
}
}
}
// Ensure parsed text parts are valid
let text_bytes = text_raw.as_bytes();
let text_bytes_len = text_bytes.len();
if text_raw.is_empty()
|| text_bytes_len < 2
|| text_bytes[0] as char != TEXT_PART_BOUNDARY
|| text_bytes[text_bytes_len - 1] as char != TEXT_PART_BOUNDARY
{
info!("could not properly parse text parts: {}", text_raw);
None
} else {
debug!(
"parsed text parts (still needs post-processing): {}",
text_raw
);
// Return inner text (without boundary characters)
match str::from_utf8(&text_bytes[1..text_bytes_len - 1]) {
Ok(text_inner) => {
let text_inner_string = unescape(text_inner.trim());
debug!("parsed text parts (post-processed): {}", text_inner_string);
// Text must not be empty
if !text_inner_string.is_empty() {
Some(text_inner_string)
} else {
None
}
}
Err(err) => {
info!(
"could not type-cast post-processed text parts: {} because: {}",
text_raw, err
);
None
}
}
}
}
pub fn parse_next_meta_parts<'a>(
parts: &'a mut SplitWhitespace,
) -> Option<MetaPartsResult<'a>> {
if let Some(part) = parts.next() {
// Parse meta (with format: 'KEY(VALUE)'; no '(' or ')' is allowed in KEY and VALUE)
if !part.is_empty() {
if let Some(index_open) = part.find(META_PART_GROUP_OPEN) {
let (key_bound_start, key_bound_end) = (0, index_open);
let (value_bound_start, value_bound_end) = (index_open + 1, part.len() - 1);
if part.as_bytes()[value_bound_end] as char == META_PART_GROUP_CLOSE {
let (key, value) = (
&part[key_bound_start..key_bound_end],
&part[value_bound_start..value_bound_end],
);
// Ensure final key and value do not contain reserved syntax characters
return if !key.contains(META_PART_GROUP_OPEN)
&& !key.contains(META_PART_GROUP_CLOSE)
&& !value.contains(META_PART_GROUP_OPEN)
&& !value.contains(META_PART_GROUP_CLOSE)
{
debug!("parsed meta part as: {} = {}", key, value);
Some(Ok((key, value)))
} else {
info!(
"parsed meta part, but it contains reserved characters: {} = {}",
key, value
);
Some(Err((key, value)))
};
}
}
}
info!("could not parse meta part: {}", part);
Some(Err(("?", part)))
} else {
None
}
}
pub fn make_error_invalid_meta_key(meta_key: &str, meta_value: &str) -> ChannelCommandError {
ChannelCommandError::InvalidMetaKey((meta_key.to_owned(), meta_value.to_owned()))
}
pub fn make_error_invalid_meta_value(meta_key: &str, meta_value: &str) -> ChannelCommandError {
ChannelCommandError::InvalidMetaValue((meta_key.to_owned(), meta_value.to_owned()))
}
pub fn commit_ok_operation(query_builder: QueryBuilderResult) -> ChannelResult {
query_builder
.and_then(StoreOperationDispatch::dispatch)
.map(|_| vec![ChannelCommandResponse::Ok])
.or(Err(ChannelCommandError::QueryError))
}
pub fn commit_result_operation(query_builder: QueryBuilderResult) -> ChannelResult {
query_builder
.and_then(StoreOperationDispatch::dispatch)
.or(Err(ChannelCommandError::QueryError))
.and_then(|result| {
if let Some(result_inner) = result {
Ok(vec![ChannelCommandResponse::Result(result_inner)])
} else {
Err(ChannelCommandError::InternalError)
}
})
}
pub fn commit_pending_operation(
query_type: &'static str,
query_id: &str,
query_builder: QueryBuilderResult,
) -> ChannelResult {
// Idea: this could be made asynchronous in the future, if there are some latency issues \
// on large Sonic deployments. The idea would be to have a number of worker threads for \
// the whole running daemon, and channel threads dispatching work to those threads. This \
// way Sonic can be up-scaled to N CPUs instead of 1 CPU per channel connection. Now on, \
// the only way to scale Sonic executors to multiple CPUs is opening multiple parallel \
// Sonic Channel connections and dispatching work evenly to each connection. It does not \
// prevent scaling Sonic vertically, but could be made simpler for the Sonic Channel \
// consumer via a worker thread pool.
query_builder
.and_then(StoreOperationDispatch::dispatch)
.map(|results| {
vec![
ChannelCommandResponse::Pending(query_id.to_string()),
ChannelCommandResponse::Event(
query_type,
query_id.to_string(),
results.unwrap_or_default(),
),
]
})
.or(Err(ChannelCommandError::QueryError))
}
pub fn generate_event_id() -> String {
thread_rng()
.sample_iter(&Alphanumeric)
.take(EVENT_ID_SIZE)
.map(|value| value as char)
.collect()
}
}
impl ChannelCommandSearch {
pub fn dispatch_query(mut parts: SplitWhitespace) -> ChannelResult {
match (
parts.next(),
parts.next(),
ChannelCommandBase::parse_text_parts(&mut parts),
) {
(Some(collection), Some(bucket), Some(text)) => {
// Generate command identifier
let event_id = ChannelCommandBase::generate_event_id();
debug!(
"dispatching search query #{} on collection: {} and bucket: {}",
event_id, collection, bucket
);
// Define query parameters
let (mut query_limit, mut query_offset, mut query_lang) =
(APP_CONF.channel.search.query_limit_default, 0, None);
// Parse meta parts (meta comes after text; extract meta parts second)
let mut last_meta_err = None;
while let Some(meta_result) = ChannelCommandBase::parse_next_meta_parts(&mut parts)
{
match Self::handle_query_meta(meta_result) {
Ok((Some(query_limit_parsed), None, None)) => {
query_limit = query_limit_parsed
}
Ok((None, Some(query_offset_parsed), None)) => {
query_offset = query_offset_parsed
}
Ok((None, None, Some(query_lang_parsed))) => {
query_lang = Some(query_lang_parsed)
}
Err(parse_err) => last_meta_err = Some(parse_err),
_ => {}
}
}
if let Some(err) = last_meta_err {
Err(err)
} else if query_limit < 1
|| query_limit > APP_CONF.channel.search.query_limit_maximum
{
Err(ChannelCommandError::PolicyReject(
"LIMIT out of minimum/maximum bounds",
))
} else {
debug!(
"will search for #{} with text: {}, limit: {}, offset: {}, locale: <{:?}>",
event_id, text, query_limit, query_offset, query_lang
);
// Commit 'search' query
ChannelCommandBase::commit_pending_operation(
"QUERY",
&event_id,
QueryBuilder::search(
&event_id,
collection,
bucket,
&text,
query_limit,
query_offset,
query_lang,
),
)
}
}
_ => Err(ChannelCommandError::InvalidFormat(
"QUERY <collection> <bucket> \"<terms>\" [LIMIT(<count>)]? [OFFSET(<count>)]? \
[LANG(<locale>)]?",
)),
}
}
pub fn dispatch_suggest(mut parts: SplitWhitespace) -> ChannelResult {
match (
parts.next(),
parts.next(),
ChannelCommandBase::parse_text_parts(&mut parts),
) {
(Some(collection), Some(bucket), Some(text)) => {
// Generate command identifier
let event_id = ChannelCommandBase::generate_event_id();
debug!(
"dispatching search suggest #{} on collection: {} and bucket: {}",
event_id, collection, bucket
);
// Define suggest parameters
let mut suggest_limit = APP_CONF.channel.search.suggest_limit_default;
// Parse meta parts (meta comes after text; extract meta parts second)
let mut last_meta_err = None;
while let Some(meta_result) = ChannelCommandBase::parse_next_meta_parts(&mut parts)
{
match Self::handle_suggest_meta(meta_result) {
Ok(Some(suggest_limit_parsed)) => suggest_limit = suggest_limit_parsed,
Err(parse_err) => last_meta_err = Some(parse_err),
_ => {}
}
}
if let Some(err) = last_meta_err {
Err(err)
} else if suggest_limit < 1
|| suggest_limit > APP_CONF.channel.search.suggest_limit_maximum
{
Err(ChannelCommandError::PolicyReject(
"LIMIT out of minimum/maximum bounds",
))
} else {
debug!(
"will suggest for #{} with text: {}, limit: {}",
event_id, text, suggest_limit
);
// Commit 'suggest' query
ChannelCommandBase::commit_pending_operation(
"SUGGEST",
&event_id,
QueryBuilder::suggest(&event_id, collection, bucket, &text, suggest_limit),
)
}
}
_ => Err(ChannelCommandError::InvalidFormat(
"SUGGEST <collection> <bucket> \"<word>\" [LIMIT(<count>)]?",
)),
}
}
pub fn dispatch_list(mut parts: SplitWhitespace) -> ChannelResult {
match (parts.next(), parts.next()) {
(Some(collection), Some(bucket)) => {
// Generate command identifier
let event_id = ChannelCommandBase::generate_event_id();
debug!(
"dispatching search list #{} on collection: {} and bucket: {}",
event_id, collection, bucket
);
// Define list parameters
let (mut list_limit, mut list_offset) =
(APP_CONF.channel.search.list_limit_default, 0);
// Parse meta parts (meta comes last; extract meta parts second)
let mut last_meta_err = None;
while let Some(meta_result) = ChannelCommandBase::parse_next_meta_parts(&mut parts)
{
match Self::handle_list_meta(meta_result) {
Ok(metadata) => match metadata {
(Some(list_limit_parsed), None) => list_limit = list_limit_parsed,
(None, Some(list_offset_parsed)) => list_offset = list_offset_parsed,
_ => {}
},
Err(parse_err) => last_meta_err = Some(parse_err),
}
}
if let Some(err) = last_meta_err {
Err(err)
} else if list_limit < 1 || list_limit > APP_CONF.channel.search.list_limit_maximum
{
Err(ChannelCommandError::PolicyReject(
"LIMIT out of minimum/maximum bounds",
))
} else {
// Commit 'list' query
ChannelCommandBase::commit_pending_operation(
"LIST",
&event_id,
QueryBuilder::list(&event_id, collection, bucket, list_limit, list_offset),
)
}
}
_ => Err(ChannelCommandError::InvalidFormat(
"LIST <collection> <bucket> [LIMIT(<count>)]? [OFFSET(<count>)]?",
)),
}
}
pub fn dispatch_help(parts: SplitWhitespace) -> ChannelResult {
ChannelCommandBase::generic_dispatch_help(parts, &*MANUAL_MODE_SEARCH)
}
fn handle_query_meta(
meta_result: MetaPartsResult,
) -> Result<QueryMetaData, ChannelCommandError> {
match meta_result {
Ok((meta_key, meta_value)) => {
debug!("handle query meta: {} = {}", meta_key, meta_value);
match meta_key {
"LIMIT" => {
// 'LIMIT(<count>)' where 0 <= <count> < 2^16
if let Ok(query_limit_parsed) = meta_value.parse::<QuerySearchLimit>() {
Ok((Some(query_limit_parsed), None, None))
} else {
Err(ChannelCommandBase::make_error_invalid_meta_value(
meta_key, meta_value,
))
}
}
"OFFSET" => {
// 'OFFSET(<count>)' where 0 <= <count> < 2^32
if let Ok(query_offset_parsed) = meta_value.parse::<QuerySearchOffset>() {
Ok((None, Some(query_offset_parsed), None))
} else {
Err(ChannelCommandBase::make_error_invalid_meta_value(
meta_key, meta_value,
))
}
}
"LANG" => {
// 'LANG(<locale>)' where <locale> ∈ ISO 639-3
if let Some(query_lang_parsed) = QueryGenericLang::from_value(meta_value) {
Ok((None, None, Some(query_lang_parsed)))
} else {
Err(ChannelCommandBase::make_error_invalid_meta_value(
meta_key, meta_value,
))
}
}
_ => Err(ChannelCommandBase::make_error_invalid_meta_key(
meta_key, meta_value,
)),
}
}
Err(err) => Err(ChannelCommandBase::make_error_invalid_meta_key(
err.0, err.1,
)),
}
}
fn handle_suggest_meta(
meta_result: MetaPartsResult,
) -> Result<Option<QuerySearchLimit>, ChannelCommandError> {
match meta_result {
Ok((meta_key, meta_value)) => {
debug!("handle suggest meta: {} = {}", meta_key, meta_value);
match meta_key {
"LIMIT" => {
// 'LIMIT(<count>)' where 0 <= <count> < 2^16
if let Ok(suggest_limit_parsed) = meta_value.parse::<QuerySearchLimit>() {
Ok(Some(suggest_limit_parsed))
} else {
Err(ChannelCommandBase::make_error_invalid_meta_value(
meta_key, meta_value,
))
}
}
_ => Err(ChannelCommandBase::make_error_invalid_meta_key(
meta_key, meta_value,
)),
}
}
Err(err) => Err(ChannelCommandBase::make_error_invalid_meta_key(
err.0, err.1,
)),
}
}
fn handle_list_meta(meta_result: MetaPartsResult) -> Result<ListMetaData, ChannelCommandError> {
match meta_result {
Ok((meta_key, meta_value)) => {
debug!("handle list meta: {} = {}", meta_key, meta_value);
match meta_key {
"LIMIT" => {
// 'LIMIT(<count>)' where 0 <= <count> < 2^16
if let Ok(list_limit_parsed) = meta_value.parse::<QuerySearchLimit>() {
Ok((Some(list_limit_parsed), None))
} else {
Err(ChannelCommandBase::make_error_invalid_meta_value(
meta_key, meta_value,
))
}
}
"OFFSET" => {
// 'OFFSET(<count>)' where 0 <= <count> < 2^32
if let Ok(list_offset_parsed) = meta_value.parse::<QuerySearchOffset>() {
Ok((None, Some(list_offset_parsed)))
} else {
Err(ChannelCommandBase::make_error_invalid_meta_value(
meta_key, meta_value,
))
}
}
_ => Err(ChannelCommandBase::make_error_invalid_meta_key(
meta_key, meta_value,
)),
}
}
Err(err) => Err(ChannelCommandBase::make_error_invalid_meta_key(
err.0, err.1,
)),
}
}
}
impl ChannelCommandIngest {
pub fn dispatch_push(mut parts: SplitWhitespace) -> ChannelResult {
match (
parts.next(),
parts.next(),
parts.next(),
ChannelCommandBase::parse_text_parts(&mut parts),
) {
(Some(collection), Some(bucket), Some(object), Some(text)) => {
debug!(
"dispatching ingest push in collection: {}, bucket: {} and object: {}",
collection, bucket, object
);
debug!("ingest push has text: {}", text);
// Define push parameters
let mut push_lang = None;
// Parse meta parts (meta comes after text; extract meta parts second)
let mut last_meta_err = None;
while let Some(meta_result) = ChannelCommandBase::parse_next_meta_parts(&mut parts)
{
match Self::handle_push_meta(meta_result) {
Ok(Some(push_lang_parsed)) => push_lang = Some(push_lang_parsed),
Err(parse_err) => last_meta_err = Some(parse_err),
_ => {}
}
}
if let Some(err) = last_meta_err {
Err(err)
} else {
debug!(
"will push for text: {} with hinted locale: <{:?}>",
text, push_lang
);
// Commit 'push' query
ChannelCommandBase::commit_ok_operation(QueryBuilder::push(
collection, bucket, object, &text, push_lang,
))
}
}
_ => Err(ChannelCommandError::InvalidFormat(
"PUSH <collection> <bucket> <object> \"<text>\" [LANG(<locale>)]?",
)),
}
}
pub fn dispatch_pop(mut parts: SplitWhitespace) -> ChannelResult {
match (
parts.next(),
parts.next(),
parts.next(),
ChannelCommandBase::parse_text_parts(&mut parts),
parts.next(),
) {
(Some(collection), Some(bucket), Some(object), Some(text), None) => {
debug!(
"dispatching ingest pop in collection: {}, bucket: {} and object: {}",
collection, bucket, object
);
debug!("ingest pop has text: {}", text);
// Make 'pop' query
ChannelCommandBase::commit_result_operation(QueryBuilder::pop(
collection, bucket, object, &text,
))
}
_ => Err(ChannelCommandError::InvalidFormat(
"POP <collection> <bucket> <object> \"<text>\"",
)),
}
}
pub fn dispatch_count(mut parts: SplitWhitespace) -> ChannelResult {
match (parts.next(), parts.next(), parts.next(), parts.next()) {
(Some(collection), bucket_part, object_part, None) => {
debug!("dispatching ingest count in collection: {}", collection);
// Make 'count' query
ChannelCommandBase::commit_result_operation(QueryBuilder::count(
collection,
bucket_part,
object_part,
))
}
_ => Err(ChannelCommandError::InvalidFormat(
"COUNT <collection> [<bucket> [<object>]?]?",
)),
}
}
pub fn dispatch_flushc(mut parts: SplitWhitespace) -> ChannelResult {
match (parts.next(), parts.next()) {
(Some(collection), None) => {
debug!(
"dispatching ingest flush collection in collection: {}",
collection
);
// Make 'flushc' query
ChannelCommandBase::commit_result_operation(QueryBuilder::flushc(collection))
}
_ => Err(ChannelCommandError::InvalidFormat("FLUSHC <collection>")),
}
}
pub fn dispatch_flushb(mut parts: SplitWhitespace) -> ChannelResult {
match (parts.next(), parts.next(), parts.next()) {
(Some(collection), Some(bucket), None) => {
debug!(
"dispatching ingest flush bucket in collection: {}, bucket: {}",
collection, bucket
);
// Make 'flushb' query
ChannelCommandBase::commit_result_operation(QueryBuilder::flushb(
collection, bucket,
))
}
_ => Err(ChannelCommandError::InvalidFormat(
"FLUSHB <collection> <bucket>",
)),
}
}
pub fn dispatch_flusho(mut parts: SplitWhitespace) -> ChannelResult {
match (parts.next(), parts.next(), parts.next(), parts.next()) {
(Some(collection), Some(bucket), Some(object), None) => {
debug!(
"dispatching ingest flush object in collection: {}, bucket: {}, object: {}",
collection, bucket, object
);
// Make 'flusho' query
ChannelCommandBase::commit_result_operation(QueryBuilder::flusho(
collection, bucket, object,
))
}
_ => Err(ChannelCommandError::InvalidFormat(
"FLUSHO <collection> <bucket> <object>",
)),
}
}
pub fn dispatch_help(parts: SplitWhitespace) -> ChannelResult {
ChannelCommandBase::generic_dispatch_help(parts, &*MANUAL_MODE_INGEST)
}
fn handle_push_meta(
meta_result: MetaPartsResult,
) -> Result<Option<QueryGenericLang>, ChannelCommandError> {
match meta_result {
Ok((meta_key, meta_value)) => {
debug!("handle push meta: {} = {}", meta_key, meta_value);
match meta_key {
"LANG" => {
// 'LANG(<locale>)' where <locale> ∈ ISO 639-3
if let Some(query_lang_parsed) = QueryGenericLang::from_value(meta_value) {
Ok(Some(query_lang_parsed))
} else {
Err(ChannelCommandBase::make_error_invalid_meta_value(
meta_key, meta_value,
))
}
}
_ => Err(ChannelCommandBase::make_error_invalid_meta_key(
meta_key, meta_value,
)),
}
}
Err(err) => Err(ChannelCommandBase::make_error_invalid_meta_key(
err.0, err.1,
)),
}
}
}
impl ChannelCommandControl {
pub fn dispatch_trigger(mut parts: SplitWhitespace) -> ChannelResult {
match (parts.next(), parts.next(), parts.next()) {
(None, _, _) => Ok(vec![ChannelCommandResponse::Result(format!(
"actions({})",
CONTROL_TRIGGER_ACTIONS.join(", ")
))]),
(Some(action_key), data_part, last_part) => {
let action_key_lower = action_key.to_lowercase();
match action_key_lower.as_str() {
"consolidate" => {
if data_part.is_none() {
// Force a FST consolidate
StoreFSTPool::consolidate(true);
Ok(vec![ChannelCommandResponse::Ok])
} else {
Err(ChannelCommandError::InvalidFormat("TRIGGER consolidate"))
}
}
"backup" => {
match (data_part, last_part) {
(Some(path), None) => {
// Proceed KV + FST backup
let path = Path::new(path);
if StoreKVPool::backup(&path.join(BACKUP_KV_PATH)).is_ok()
&& StoreFSTPool::backup(&path.join(BACKUP_FST_PATH)).is_ok()
{
Ok(vec![ChannelCommandResponse::Ok])
} else {
Err(ChannelCommandError::InternalError)
}
}
_ => Err(ChannelCommandError::InvalidFormat("TRIGGER backup <path>")),
}
}
"restore" => {
match (data_part, last_part) {
(Some(path), None) => {
// Proceed KV + FST restore
let path = Path::new(path);
if StoreKVPool::restore(&path.join(BACKUP_KV_PATH)).is_ok()
&& StoreFSTPool::restore(&path.join(BACKUP_FST_PATH)).is_ok()
{
Ok(vec![ChannelCommandResponse::Ok])
} else {
Err(ChannelCommandError::InternalError)
}
}
_ => Err(ChannelCommandError::InvalidFormat("TRIGGER restore <path>")),
}
}
_ => Err(ChannelCommandError::NotFound),
}
}
}
}
pub fn dispatch_info(mut parts: SplitWhitespace) -> ChannelResult {
match parts.next() {
None => {
let statistics = ChannelStatistics::gather();
Ok(vec![ChannelCommandResponse::Result(format!(
"uptime({}) clients_connected({}) commands_total({}) \
command_latency_best({}) command_latency_worst({}) \
kv_open_count({}) fst_open_count({}) fst_consolidate_count({})",
statistics.uptime,
statistics.clients_connected,
statistics.commands_total,
statistics.command_latency_best,
statistics.command_latency_worst,
statistics.kv_open_count,
statistics.fst_open_count,
statistics.fst_consolidate_count
))])
}
_ => Err(ChannelCommandError::InvalidFormat("INFO")),
}
}
pub fn dispatch_help(parts: SplitWhitespace) -> ChannelResult {
ChannelCommandBase::generic_dispatch_help(parts, &*MANUAL_MODE_CONTROL)
}
}
impl fmt::Display for ChannelCommandError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
match self {
ChannelCommandError::UnknownCommand => write!(f, "unknown_command"),
ChannelCommandError::NotFound => write!(f, "not_found"),
ChannelCommandError::QueryError => write!(f, "query_error"),
ChannelCommandError::InternalError => write!(f, "internal_error"),
ChannelCommandError::ShuttingDown => write!(f, "shutting_down"),
ChannelCommandError::PolicyReject(reason) => write!(f, "policy_reject({})", reason),
ChannelCommandError::InvalidFormat(format) => write!(f, "invalid_format({})", format),
ChannelCommandError::InvalidMetaKey(ref data) => {
write!(f, "invalid_meta_key({}[{}])", data.0, data.1)
}
ChannelCommandError::InvalidMetaValue(ref data) => {
write!(f, "invalid_meta_value({}[{}])", data.0, data.1)
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn it_matches_command_response_string() {
assert_eq!(ChannelCommandResponse::Ok.to_args().0, "OK");
assert_eq!(ChannelCommandResponse::Pong.to_args().0, "PONG");
assert_eq!(ChannelCommandResponse::Ended("").to_args().0, "ENDED");
assert_eq!(
ChannelCommandResponse::Err(ChannelCommandError::UnknownCommand)
.to_args()
.0,
"ERR"
);
}
}
================================================
FILE: src/channel/format.rs
================================================
// Sonic
//
// Fast, lightweight and schema-less search backend
// Copyright: 2019, Valerian Saliou <valerian@valeriansaliou.name>
// License: Mozilla Public License v2.0 (MPL v2.0)
pub fn unescape(text: &str) -> String {
// Pre-reserve a byte-aware required capacity as to avoid heap resizes (30% performance \
// gain relative to initializing this with a zero-capacity)
let mut unescaped = String::with_capacity(text.as_bytes().len());
let mut characters = text.chars();
while let Some(character) = characters.next() {
if character == '\\' {
// Found escaped character
match characters.next() {
Some('n') => unescaped.push('\n'),
Some('\"') => unescaped.push('\"'),
_ => unescaped.push(character),
};
} else {
unescaped.push(character);
}
}
unescaped
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn it_unescapes_command_text() {
assert_eq!(unescape(r#"hello world!"#), r#"hello world!"#.to_string());
assert_eq!(
unescape(r#"i'm so good at this"#),
r#"i'm so good at this"#.to_string()
);
assert_eq!(
unescape(r#"look at \\\\"\\\" me i'm \\"\"trying to hack you\""#),
r#"look at \\"\" me i'm \""trying to hack you""#.to_string()
);
}
}
#[cfg(all(feature = "benchmark", test))]
mod benches {
extern crate test;
use super::*;
use test::Bencher;
#[bench]
fn bench_unescape_command_text(b: &mut Bencher) {
b.iter(|| unescape(r#"i'm \\"\"trying to hack you\""#));
}
}
================================================
FILE: src/channel/handle.rs
================================================
// Sonic
//
// Fast, lightweight and schema-less search backend
// Copyright: 2019, Valerian Saliou <valerian@valeriansaliou.name>
// License: Mozilla Public License v2.0 (MPL v2.0)
use std::collections::VecDeque;
use std::io::{ErrorKind, Read, Write};
use std::net::TcpStream;
use std::result::Result;
use std::str;
use std::time::Duration;
use super::message::{
ChannelMessage, ChannelMessageModeControl, ChannelMessageModeIngest, ChannelMessageModeSearch,
ChannelMessageResult,
};
use super::mode::ChannelMode;
use super::statistics::CLIENTS_CONNECTED;
use crate::APP_CONF;
use crate::LINE_FEED;
pub struct ChannelHandle;
enum ChannelHandleError {
Closed,
InvalidMode,
AuthenticationRequired,
AuthenticationFailed,
NotRecognized,
TimedOut,
ConnectionAborted,
Interrupted,
Unknown,
}
const LINE_END_GAP: usize = 1;
const BUFFER_SIZE: usize = 20000;
const MAX_LINE_SIZE: usize = BUFFER_SIZE + LINE_END_GAP + 1;
const TCP_TIMEOUT_NON_ESTABLISHED: u64 = 10;
const PROTOCOL_REVISION: u8 = 1;
const BUFFER_LINE_SEPARATOR: u8 = b'\n';
lazy_static! {
static ref CONNECTED_BANNER: String = format!(
"CONNECTED <{} v{}>",
env!("CARGO_PKG_NAME"),
env!("CARGO_PKG_VERSION")
);
}
impl ChannelHandleError {
pub fn to_str(&self) -> &'static str {
match *self {
ChannelHandleError::Closed => "closed",
ChannelHandleError::InvalidMode => "invalid_mode",
ChannelHandleError::AuthenticationRequired => "authentication_required",
ChannelHandleError::AuthenticationFailed => "authentication_failed",
ChannelHandleError::NotRecognized => "not_recognized",
ChannelHandleError::TimedOut => "timed_out",
ChannelHandleError::ConnectionAborted => "connection_aborted",
ChannelHandleError::Interrupted => "interrupted",
ChannelHandleError::Unknown => "unknown",
}
}
}
impl ChannelHandle {
pub fn client(mut stream: TcpStream) {
// Configure stream (non-established)
ChannelHandle::configure_stream(&stream, false);
// Send connected banner
write!(stream, "{}{}", *CONNECTED_BANNER, LINE_FEED).expect("write failed");
// Increment connected clients count
*CLIENTS_CONNECTED.write().unwrap() += 1;
// Ensure channel mode is set
match Self::ensure_start(&stream) {
Ok(mode) => {
// Configure stream (established)
ChannelHandle::configure_stream(&stream, true);
// Send started acknowledgement (with environment variables)
write!(
stream,
"STARTED {} protocol({}) buffer({}){}",
mode.to_str(),
PROTOCOL_REVISION,
BUFFER_SIZE,
LINE_FEED
)
.expect("write failed");
Self::handle_stream(mode, stream);
}
Err(err) => {
write!(stream, "ENDED {}{}", err.to_str(), LINE_FEED).expect("write failed");
}
}
// Decrement connected clients count
*CLIENTS_CONNECTED.write().unwrap() -= 1;
}
fn configure_stream(stream: &TcpStream, is_established: bool) {
let tcp_timeout = if is_established {
APP_CONF.channel.tcp_timeout
} else {
TCP_TIMEOUT_NON_ESTABLISHED
};
assert!(stream.set_nodelay(true).is_ok());
assert!(stream
.set_read_timeout(Some(Duration::new(tcp_timeout, 0)))
.is_ok());
assert!(stream
.set_write_timeout(Some(Duration::new(tcp_timeout, 0)))
.is_ok());
}
fn handle_stream(mode: ChannelMode, mut stream: TcpStream) {
// Initialize packet buffer
let mut buffer: VecDeque<u8> = VecDeque::with_capacity(MAX_LINE_SIZE);
// Wait for incoming messages
'handler: loop {
let mut read = [0; MAX_LINE_SIZE];
match stream.read(&mut read) {
Ok(n) => {
// Should close?
if n == 0 {
break;
}
// Buffer overflow?
{
let buffer_len = n + buffer.len();
if buffer_len > MAX_LINE_SIZE {
// Do not continue, as there is too much pending data in the buffer. \
// Most likely the client does not implement a proper back-pressure \
// management system, thus we terminate it.
error!("closing channel thread because of buffer overflow");
panic!("buffer overflow ({}/{} bytes)", buffer_len, MAX_LINE_SIZE);
}
}
// Add chunk to buffer
buffer.extend(&read[0..n]);
// Handle full lines from buffer (keep the last incomplete line in buffer)
{
let mut processed_line = Vec::with_capacity(MAX_LINE_SIZE);
while let Some(byte) = buffer.pop_front() {
// Commit line and start a new one?
if byte == BUFFER_LINE_SEPARATOR {
if Self::on_message(&mode, &stream, &processed_line)
== ChannelMessageResult::Close
{
// Should close?
break 'handler;
}
// Important: clear the contents of the line, as it has just been \
// processed.
processed_line.clear();
} else {
// Append current byte to processed line
processed_line.push(byte);
}
}
// Incomplete line remaining? Put it back in buffer.
if !processed_line.is_empty() {
buffer.extend(processed_line);
}
}
}
Err(err) => {
error!("closing channel thread with traceback: {}", err);
panic!("closing channel");
}
}
}
}
fn ensure_start(mut stream: &TcpStream) -> Result<ChannelMode, ChannelHandleError> {
#[allow(clippy::never_loop)]
loop {
let mut read = [0; MAX_LINE_SIZE];
match stream.read(&mut read) {
Ok(n) => {
if n == 0 {
return Err(ChannelHandleError::Closed);
}
let mut parts = str::from_utf8(&read[0..n]).unwrap_or("").split_whitespace();
if parts.next().unwrap_or("").to_uppercase().as_str() == "START" {
if let Some(res_mode) = parts.next() {
debug!("got mode response: {}", res_mode);
// Extract mode
if let Ok(mode) = ChannelMode::from_str(res_mode) {
// Check if authenticated?
if let Some(ref auth_password) = APP_CONF.channel.auth_password {
if let Some(provided_auth) = parts.next() {
// Compare provided password with configured password
if provided_auth != auth_password {
info!("password provided, but does not match");
return Err(ChannelHandleError::AuthenticationFailed);
}
} else {
info!("no password provided, but one required");
// No password was provided, but we require one
return Err(ChannelHandleError::AuthenticationRequired);
}
}
return Ok(mode);
}
}
return Err(ChannelHandleError::InvalidMode);
}
return Err(ChannelHandleError::NotRecognized);
}
Err(err) => {
let err_reason = match err.kind() {
ErrorKind::TimedOut => ChannelHandleError::TimedOut,
ErrorKind::ConnectionAborted => ChannelHandleError::ConnectionAborted,
ErrorKind::Interrupted => ChannelHandleError::Interrupted,
_ => ChannelHandleError::Unknown,
};
return Err(err_reason);
}
}
}
}
fn on_message(
mode: &ChannelMode,
stream: &TcpStream,
message_slice: &[u8],
) -> ChannelMessageResult {
match mode {
ChannelMode::Search => {
ChannelMessage::on::<ChannelMessageModeSearch>(stream, message_slice)
}
ChannelMode::Ingest => {
ChannelMessage::on::<ChannelMessageModeIngest>(stream, message_slice)
}
ChannelMode::Control => {
ChannelMessage::on::<ChannelMessageModeControl>(stream, message_slice)
}
}
}
}
================================================
FILE: src/channel/listen.rs
================================================
// Sonic
//
// Fast, lightweight and schema-less search backend
// Copyright: 2019, Valerian Saliou <valerian@valeriansaliou.name>
// License: Mozilla Public License v2.0 (MPL v2.0)
use std::net::TcpListener;
use std::process;
use std::sync::RwLock;
use std::thread;
use super::handle::ChannelHandle;
use crate::{APP_CONF, THREAD_NAME_CHANNEL_CLIENT};
pub struct ChannelListenBuilder;
pub struct ChannelListen;
lazy_static! {
pub static ref CHANNEL_AVAILABLE: RwLock<bool> = RwLock::new(true);
}
impl ChannelListenBuilder {
pub fn build() -> ChannelListen {
ChannelListen {}
}
}
impl ChannelListen {
pub fn run(&self) {
match TcpListener::bind(APP_CONF.channel.inet) {
Ok(listener) => {
info!("listening on tcp://{}", APP_CONF.channel.inet);
for stream in listener.incoming() {
match stream {
Ok(stream) => {
thread::Builder::new()
.name(THREAD_NAME_CHANNEL_CLIENT.to_string())
.spawn(move || {
if let Ok(peer_addr) = stream.peer_addr() {
debug!("channel client connecting: {}", peer_addr);
}
// Create client
ChannelHandle::client(stream);
})
.ok();
}
Err(err) => {
warn!("error handling stream: {}", err);
}
}
}
}
Err(err) => {
error!("error binding channel listener: {}", err);
// Exit Sonic
process::exit(1);
}
}
}
pub fn teardown() {
// Channel cannot be used anymore
*CHANNEL_AVAILABLE.write().unwrap() = false;
}
}
================================================
FILE: src/channel/macros.rs
================================================
// Sonic
//
// Fast, lightweight and schema-less search backend
// Copyright: 2019, Valerian Saliou <valerian@valeriansaliou.name>
// License: Mozilla Public License v2.0 (MPL v2.0)
#[macro_export]
macro_rules! gen_channel_message_mode_handle {
($message:ident, $commands:ident, { $($external:expr => $internal:expr),+, }) => {{
let (command, parts) = ChannelMessage::extract($message);
if command.is_empty() == true || $commands.contains(&command.as_str()) == true {
match command.as_str() {
"" => Ok(vec![ChannelCommandResponse::Void]),
$(
$external => $internal(parts),
)+
"PING" => ChannelCommandBase::dispatch_ping(parts),
"QUIT" => ChannelCommandBase::dispatch_quit(parts),
_ => Ok(vec![ChannelCommandResponse::Err(
ChannelCommandError::InternalError,
)]),
}
} else {
Ok(vec![ChannelCommandResponse::Err(
ChannelCommandError::UnknownCommand,
)])
}
}};
}
================================================
FILE: src/channel/message.rs
================================================
// Sonic
//
// Fast, lightweight and schema-less search backend
// Copyright: 2019, Valerian Saliou <valerian@valeriansaliou.name>
// License: Mozilla Public License v2.0 (MPL v2.0)
use std::io::Write;
use std::net::TcpStream;
use std::str::{self, SplitWhitespace};
use std::time::Instant;
use super::command::{
ChannelCommandBase, ChannelCommandControl, ChannelCommandError, ChannelCommandIngest,
ChannelCommandResponse, ChannelCommandResponseArgs, ChannelCommandSearch,
COMMANDS_MODE_CONTROL, COMMANDS_MODE_INGEST, COMMANDS_MODE_SEARCH,
};
use super::listen::CHANNEL_AVAILABLE;
use super::statistics::{COMMANDS_TOTAL, COMMAND_LATENCY_BEST, COMMAND_LATENCY_WORST};
use crate::LINE_FEED;
pub struct ChannelMessage;
pub struct ChannelMessageModeSearch;
pub struct ChannelMessageModeIngest;
pub struct ChannelMessageModeControl;
const COMMAND_ELAPSED_MILLIS_SLOW_WARN: u128 = 50;
#[derive(PartialEq)]
pub enum ChannelMessageResult {
Continue,
Close,
}
pub trait ChannelMessageMode {
fn handle(message: &str) -> Result<Vec<ChannelCommandResponse>, ChannelCommandError>;
}
impl ChannelMessage {
pub fn on<M: ChannelMessageMode>(
mut stream: &TcpStream,
message_slice: &[u8],
) -> ChannelMessageResult {
let message = str::from_utf8(message_slice).unwrap_or("");
debug!("got channel message: {}", message);
let command_start = Instant::now();
let mut result = ChannelMessageResult::Continue;
// Process response for issued command
let response_args_groups: Vec<ChannelCommandResponseArgs>;
if !(*CHANNEL_AVAILABLE.read().unwrap()) {
// Server going down, reject command
response_args_groups =
vec![ChannelCommandResponse::Err(ChannelCommandError::ShuttingDown).to_args()];
} else {
// Handle response arguments to issued command
response_args_groups = match M::handle(message) {
Ok(resp_groups) => resp_groups
.iter()
.map(|resp| match resp {
ChannelCommandResponse::Ok
| ChannelCommandResponse::Pong
| ChannelCommandResponse::Pending(_)
| ChannelCommandResponse::Result(_)
| ChannelCommandResponse::Event(_, _, _)
| ChannelCommandResponse::Void
| ChannelCommandResponse::Err(_) => resp.to_args(),
ChannelCommandResponse::Ended(_) => {
result = ChannelMessageResult::Close;
resp.to_args()
}
})
.collect(),
Err(reason) => vec![ChannelCommandResponse::Err(reason).to_args()],
};
}
// Serve response messages on socket
for response_args in response_args_groups {
if !response_args.0.is_empty() {
if let Some(ref values) = response_args.1 {
let values_string = values.join(" ");
write!(stream, "{} {}{}", response_args.0, values_string, LINE_FEED)
.expect("write failed");
debug!(
"wrote response with values: {} ({})",
response_args.0, values_string
);
} else {
write!(stream, "{}{}", response_args.0, LINE_FEED).expect("write failed");
debug!("wrote response with no values: {}", response_args.0);
}
}
}
// Measure and log time it took to execute command
// Notice: this is critical as to raise developer awareness on the performance bits when \
// altering commands-related code, or when making changes to underlying store executors.
let command_took = command_start.elapsed();
if command_took.as_millis() >= COMMAND_ELAPSED_MILLIS_SLOW_WARN {
warn!(
"took a lot of time: {}ms to process channel message",
command_took.as_millis(),
);
} else {
info!(
"took {}ms/{}us/{}ns to process channel message",
command_took.as_millis(),
command_took.as_micros(),
command_took.as_nanos(),
);
}
// Update command statistics
{
// Update performance measures
// Notice: commands that take 0ms are not accounted for there (ie. those are usually \
// commands that do no work or I/O; they would make statistics less accurate)
// Important: acquire write locks instead of read + write locks, as to prevent \
// deadlocks (explained here: https://github.com/valeriansaliou/sonic/pull/211)
let command_took_millis = command_took.as_millis() as u32;
{
let mut worst = COMMAND_LATENCY_WORST.write().unwrap();
if command_took_millis > *worst {
*worst = command_took_millis;
}
}
{
let mut best = COMMAND_LATENCY_BEST.write().unwrap();
if command_took_millis > 0 && (*best == 0 || command_took_millis < *best) {
*best = command_took_millis;
}
}
// Increment total commands
*COMMANDS_TOTAL.write().unwrap() += 1;
}
result
}
fn extract(message: &str) -> (String, SplitWhitespace<'_>) {
// Extract command name and arguments
let mut parts = message.split_whitespace();
let command = parts.next().unwrap_or("").to_uppercase();
debug!("will dispatch search command: {}", command);
(command, parts)
}
}
impl ChannelMessageMode for ChannelMessageModeSearch {
fn handle(message: &str) -> Result<Vec<ChannelCommandResponse>, ChannelCommandError> {
gen_channel_message_mode_handle!(message, COMMANDS_MODE_SEARCH, {
"QUERY" => ChannelCommandSearch::dispatch_query,
"SUGGEST" => ChannelCommandSearch::dispatch_suggest,
"LIST" => ChannelCommandSearch::dispatch_list,
"HELP" => ChannelCommandSearch::dispatch_help,
})
}
}
impl ChannelMessageMode for ChannelMessageModeIngest {
fn handle(message: &str) -> Result<Vec<ChannelCommandResponse>, ChannelCommandError> {
gen_channel_message_mode_handle!(message, COMMANDS_MODE_INGEST, {
"PUSH" => ChannelCommandIngest::dispatch_push,
"POP" => ChannelCommandIngest::dispatch_pop,
"COUNT" => ChannelCommandIngest::dispatch_count,
"FLUSHC" => ChannelCommandIngest::dispatch_flushc,
"FLUSHB" => ChannelCommandIngest::dispatch_flushb,
"FLUSHO" => ChannelCommandIngest::dispatch_flusho,
"HELP" => ChannelCommandIngest::dispatch_help,
})
}
}
impl ChannelMessageMode for ChannelMessageModeControl {
fn handle(message: &str) -> Result<Vec<ChannelCommandResponse>, ChannelCommandError> {
gen_channel_message_mode_handle!(message, COMMANDS_MODE_CONTROL, {
"TRIGGER" => ChannelCommandControl::dispatch_trigger,
"INFO" => ChannelCommandControl::dispatch_info,
"HELP" => ChannelCommandControl::dispatch_help,
})
}
}
================================================
FILE: src/channel/mod.rs
================================================
// Sonic
//
// Fast, lightweight and schema-less search backend
// Copyright: 2019, Valerian Saliou <valerian@valeriansaliou.name>
// License: Mozilla Public License v2.0 (MPL v2.0)
#[macro_use]
mod macros;
mod command;
mod format;
mod handle;
mod message;
mod mode;
pub mod listen;
pub mod statistics;
================================================
FILE: src/channel/mode.rs
================================================
// Sonic
//
// Fast, lightweight and schema-less search backend
// Copyright: 2019, Valerian Saliou <valerian@valeriansaliou.name>
// License: Mozilla Public License v2.0 (MPL v2.0)
pub enum ChannelMode {
Search,
Ingest,
Control,
}
impl ChannelMode {
pub fn from_str(value: &str) -> Result<Self, ()> {
match value {
"search" => Ok(ChannelMode::Search),
"ingest" => Ok(ChannelMode::Ingest),
"control" => Ok(ChannelMode::Control),
_ => Err(()),
}
}
pub fn to_str(&self) -> &'static str {
match *self {
ChannelMode::Search => "search",
ChannelMode::Ingest => "ingest",
ChannelMode::Control => "control",
}
}
}
================================================
FILE: src/channel/statistics.rs
================================================
// Sonic
//
// Fast, lightweight and schema-less search backend
// Copyright: 2019, Valerian Saliou <valerian@valeriansaliou.name>
// License: Mozilla Public License v2.0 (MPL v2.0)
use std::ops::Deref;
use std::sync::RwLock;
use std::time::Instant;
use crate::store::fst::StoreFSTPool;
use crate::store::kv::StoreKVPool;
lazy_static! {
static ref START_TIME: Instant = Instant::now();
pub static ref CLIENTS_CONNECTED: RwLock<u32> = RwLock::new(0);
pub static ref COMMANDS_TOTAL: RwLock<u64> = RwLock::new(0);
pub static ref COMMAND_LATENCY_BEST: RwLock<u32> = RwLock::new(0);
pub static ref COMMAND_LATENCY_WORST: RwLock<u32> = RwLock::new(0);
}
#[derive(Default)]
pub struct ChannelStatistics {
pub uptime: u64,
pub clients_connected: u32,
pub commands_total: u64,
pub command_latency_best: u32,
pub command_latency_worst: u32,
pub kv_open_count: usize,
pub fst_open_count: usize,
pub fst_consolidate_count: usize,
}
pub fn ensure_states() {
// Ensure all statics are initialized (a `deref` is enough to lazily initialize them)
let (_, _, _, _, _) = (
START_TIME.deref(),
CLIENTS_CONNECTED.deref(),
COMMANDS_TOTAL.deref(),
COMMAND_LATENCY_BEST.deref(),
COMMAND_LATENCY_WORST.deref(),
);
}
impl ChannelStatistics {
pub fn gather() -> ChannelStatistics {
let (kv_count, fst_count) = (StoreKVPool::count(), StoreFSTPool::count());
ChannelStatistics {
uptime: START_TIME.elapsed().as_secs(),
clients_connected: *CLIENTS_CONNECTED.read().unwrap(),
commands_total: *COMMANDS_TOTAL.read().unwrap(),
command_latency_best: *COMMAND_LATENCY_BEST.read().unwrap(),
command_latency_worst: *COMMAND_LATENCY_WORST.read().unwrap(),
kv_open_count: kv_count,
fst_open_count: fst_count.0,
fst_consolidate_count: fst_count.1,
}
}
}
================================================
FILE: src/config/defaults.rs
================================================
// Sonic
//
// Fast, lightweight and schema-less search backend
// Copyright: 2019, Valerian Saliou <valerian@valeriansaliou.name>
// License: Mozilla Public License v2.0 (MPL v2.0)
use std::net::SocketAddr;
use std::path::PathBuf;
pub fn server_log_level() -> String {
"error".to_string()
}
pub fn channel_inet() -> SocketAddr {
"[::1]:1491".parse().unwrap()
}
pub fn channel_tcp_timeout() -> u64 {
300
}
pub fn channel_search_query_limit_default() -> u16 {
10
}
pub fn channel_search_query_limit_maximum() -> u16 {
100
}
pub fn channel_search_query_alternates_try() -> usize {
4
}
pub fn channel_search_suggest_limit_default() -> u16 {
5
}
pub fn channel_search_suggest_limit_maximum() -> u16 {
20
}
pub fn channel_search_list_limit_default() -> u16 {
100
}
pub fn channel_search_list_limit_maximum() -> u16 {
500
}
pub fn store_kv_path() -> PathBuf {
PathBuf::from("./data/store/kv/")
}
pub fn store_kv_retain_word_objects() -> usize {
1000
}
pub fn store_kv_pool_inactive_after() -> u64 {
1800
}
pub fn store_kv_database_flush_after() -> u64 {
900
}
pub fn store_kv_database_compress() -> bool {
true
}
pub fn store_kv_database_parallelism() -> u16 {
2
}
pub fn store_kv_database_max_compactions() -> u16 {
1
}
pub fn store_kv_database_max_flushes() -> u16 {
1
}
pub fn store_kv_database_write_buffer() -> usize {
16384
}
pub fn store_kv_database_write_ahead_log() -> bool {
true
}
pub fn store_fst_path() -> PathBuf {
PathBuf::from("./data/store/fst/")
}
pub fn store_fst_pool_inactive_after() -> u64 {
300
}
pub fn store_fst_graph_consolidate_after() -> u64 {
180
}
pub fn store_fst_graph_max_size() -> usize {
2048
}
pub fn store_fst_graph_max_words() -> usize {
250000
}
================================================
FILE: src/config/env_var.rs
================================================
// Sonic
//
// Fast, lightweight and schema-less search backend
// Copyright: 2019, Valerian Saliou <valerian@valeriansaliou.name>
// License: Mozilla Public License v2.0 (MPL v2.0)
use regex::Regex;
use serde::{Deserialize, Deserializer};
use std::net::SocketAddr;
use std::path::PathBuf;
#[derive(Deserialize, PartialEq)]
struct WrappedString(String);
pub fn str<'de, D>(deserializer: D) -> Result<String, D::Error>
where
D: Deserializer<'de>,
{
let value = String::deserialize(deserializer)?;
match is_env_var(&value) {
true => Ok(get_env_var(&value)),
false => Ok(value),
}
}
pub fn opt_str<'de, D>(deserializer: D) -> Result<Option<String>, D::Error>
where
D: Deserializer<'de>,
{
Option::<WrappedString>::deserialize(deserializer).map(|option: Option<WrappedString>| {
option.map(|wrapped: WrappedString| {
let value = wrapped.0;
match is_env_var(&value) {
true => get_env_var(&value),
false => value,
}
})
})
}
pub fn socket_addr<'de, D>(deserializer: D) -> Result<SocketAddr, D::Error>
where
D: Deserializer<'de>,
{
let value = String::deserialize(deserializer)?;
match is_env_var(&value) {
true => Ok(get_env_var(&value).parse().unwrap()),
false => Ok(value.parse().unwrap()),
}
}
pub fn path_buf<'de, D>(deserializer: D) -> Result<PathBuf, D::Error>
where
D: Deserializer<'de>,
{
let value = String::deserialize(deserializer)?;
match is_env_var(&value) {
true => Ok(PathBuf::from(get_env_var(&value))),
false => Ok(PathBuf::from(value)),
}
}
fn is_env_var(value: &str) -> bool {
Regex::new(r"^\$\{env\.\w+\}$")
.expect("env_var: regex is invalid")
.is_match(value)
}
fn get_env_var(wrapped_key: &str) -> String {
let key: String = String::from(wrapped_key)
.drain(6..(wrapped_key.len() - 1))
.collect();
std::env::var(key.clone()).unwrap_or_else(|_| panic!("env_var: variable '{}' is not set", key))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn it_checks_environment_variable_patterns() {
assert!(is_env_var("${env.XXX}"));
assert!(!is_env_var("${env.XXX"));
assert!(!is_env_var("${env.XXX}a"));
assert!(!is_env_var("a${env.XXX}"));
assert!(!is_env_var("{env.XXX}"));
assert!(!is_env_var("$env.XXX}"));
assert!(!is_env_var("${envXXX}"));
assert!(!is_env_var("${.XXX}"));
assert!(!is_env_var("${XXX}"));
}
#[test]
fn it_gets_environment_variable() {
std::env::set_var("TEST", "test");
assert_eq!(get_env_var("${env.TEST}"), "test");
std::env::remove_var("TEST");
}
}
================================================
FILE: src/config/logger.rs
================================================
// Sonic
//
// Fast, lightweight and schema-less search backend
// Copyright: 2019, Valerian Saliou <valerian@valeriansaliou.name>
// License: Mozilla Public License v2.0 (MPL v2.0)
use log::{Level, LevelFilter, Metadata, Record, SetLoggerError};
pub struct ConfigLogger;
impl log::Log for ConfigLogger {
fn enabled(&self, metadata: &Metadata) -> bool {
metadata.level() <= Level::Debug
}
fn log(&self, record: &Record) {
if self.enabled(record.metadata()) {
println!("({}) - {}", record.level(), record.args());
}
}
fn flush(&self) {}
}
impl ConfigLogger {
pub fn init(level: LevelFilter) -> Result<(), SetLoggerError> {
log::set_max_level(level);
log::set_logger(&ConfigLogger)
}
}
================================================
FILE: src/config/mod.rs
================================================
// Sonic
//
// Fast, lightweight and schema-less search backend
// Copyright: 2019, Valerian Saliou <valerian@valeriansaliou.name>
// License: Mozilla Public License v2.0 (MPL v2.0)
mod defaults;
mod env_var;
pub mod logger;
pub mod options;
pub mod reader;
================================================
FILE: src/config/options.rs
================================================
// Sonic
//
// Fast, lightweight and schema-less search backend
// Copyright: 2019, Valerian Saliou <valerian@valeriansaliou.name>
// License: Mozilla Public License v2.0 (MPL v2.0)
use std::net::SocketAddr;
use std::path::PathBuf;
use super::defaults;
use super::env_var;
#[derive(Deserialize)]
pub struct Config {
pub server: ConfigServer,
pub channel: ConfigChannel,
pub store: ConfigStore,
}
#[derive(Deserialize)]
pub struct ConfigServer {
#[serde(
default = "defaults::server_log_level",
deserialize_with = "env_var::str"
)]
pub log_level: String,
}
#[derive(Deserialize)]
pub struct ConfigChannel {
#[serde(
default = "defaults::channel_inet",
deserialize_with = "env_var::socket_addr"
)]
pub inet: SocketAddr,
#[serde(default = "defaults::channel_tcp_timeout")]
pub tcp_timeout: u64,
#[serde(default, deserialize_with = "env_var::opt_str")]
pub auth_password: Option<String>,
pub search: ConfigChannelSearch,
}
#[derive(Deserialize)]
pub struct ConfigChannelSearch {
#[serde(default = "defaults::channel_search_query_limit_default")]
pub query_limit_default: u16,
#[serde(default = "defaults::channel_search_query_limit_maximum")]
pub query_limit_maximum: u16,
#[serde(default = "defaults::channel_search_query_alternates_try")]
pub query_alternates_try: usize,
#[serde(default = "defaults::channel_search_suggest_limit_default")]
pub suggest_limit_default: u16,
#[serde(default = "defaults::channel_search_suggest_limit_maximum")]
pub suggest_limit_maximum: u16,
#[serde(default = "defaults::channel_search_list_limit_default")]
pub list_limit_default: u16,
#[serde(default = "defaults::channel_search_list_limit_maximum")]
pub list_limit_maximum: u16,
}
#[derive(Deserialize)]
pub struct ConfigStore {
pub kv: ConfigStoreKV,
pub fst: ConfigStoreFST,
}
#[derive(Deserialize)]
pub struct ConfigStoreKV {
#[serde(
default = "defaults::store_kv_path",
deserialize_with = "env_var::path_buf"
)]
pub path: PathBuf,
#[serde(default = "defaults::store_kv_retain_word_objects")]
pub retain_word_objects: usize,
pub pool: ConfigStoreKVPool,
pub database: ConfigStoreKVDatabase,
}
#[derive(Deserialize)]
pub struct ConfigStoreKVPool {
#[serde(default = "defaults::store_kv_pool_inactive_after")]
pub inactive_after: u64,
}
#[derive(Deserialize)]
pub struct ConfigStoreKVDatabase {
#[serde(default = "defaults::store_kv_database_flush_after")]
pub flush_after: u64,
#[serde(default = "defaults::store_kv_database_compress")]
pub compress: bool,
#[serde(default = "defaults::store_kv_database_parallelism")]
pub parallelism: u16,
pub max_files: Option<u32>,
#[serde(default = "defaults::store_kv_database_max_compactions")]
pub max_compactions: u16,
#[serde(default = "defaults::store_kv_database_max_flushes")]
pub max_flushes: u16,
#[serde(default = "defaults::store_kv_database_write_buffer")]
pub write_buffer: usize,
#[serde(default = "defaults::store_kv_database_write_ahead_log")]
pub write_ahead_log: bool,
}
#[derive(Deserialize)]
pub struct ConfigStoreFST {
#[serde(
default = "defaults::store_fst_path",
deserialize_with = "env_var::path_buf"
)]
pub path: PathBuf,
pub pool: ConfigStoreFSTPool,
pub graph: ConfigStoreFSTGraph,
}
#[derive(Deserialize)]
pub struct ConfigStoreFSTPool {
#[serde(default = "defaults::store_fst_pool_inactive_after")]
pub inactive_after: u64,
}
#[derive(Deserialize)]
pub struct ConfigStoreFSTGraph {
#[serde(default = "defaults::store_fst_graph_consolidate_after")]
pub consolidate_after: u64,
#[serde(default = "defaults::store_fst_graph_max_size")]
pub max_size: usize,
#[serde(default = "defaults::store_fst_graph_max_words")]
pub max_words: usize,
}
================================================
FILE: src/config/reader.rs
================================================
// Sonic
//
// Fast, lightweight and schema-less search backend
// Copyright: 2019, Valerian Saliou <valerian@valeriansaliou.name>
// License: Mozilla Public License v2.0 (MPL v2.0)
use std::fs::File;
use std::io::Read;
use super::options::Config;
use crate::APP_ARGS;
pub struct ConfigReader;
impl ConfigReader {
pub fn make() -> Config {
debug!("reading config file: {}", &APP_ARGS.config);
let mut file = File::open(&APP_ARGS.config).expect("cannot find config file");
let mut conf = String::new();
file.read_to_string(&mut conf)
.expect("cannot read config file");
debug!("read config file: {}", &APP_ARGS.config);
// Parse configuration
let config = toml::from_str(&conf).expect("syntax error in config file");
// Validate configuration
Self::validate(&config);
config
}
fn validate(config: &Config) {
// Check 'write_buffer' for KV
if config.store.kv.database.write_buffer == 0 {
panic!("write_buffer for kv must not be zero");
}
// Check 'flush_after' for KV
if config.store.kv.database.flush_after >= config.store.kv.pool.inactive_after {
panic!("flush_after for kv must be strictly lower than inactive_after");
}
// Check 'consolidate_after' for FST
if config.store.fst.graph.consolidate_after >= config.store.fst.pool.inactive_after {
panic!("consolidate_after for fst must be strictly lower than inactive_after");
}
}
}
================================================
FILE: src/executor/count.rs
================================================
// Sonic
//
// Fast, lightweight and schema-less search backend
// Copyright: 2019, Valerian Saliou <valerian@valeriansaliou.name>
// License: Mozilla Public License v2.0 (MPL v2.0)
use crate::store::fst::StoreFSTPool;
use crate::store::fst::{StoreFSTActionBuilder, StoreFSTMisc};
use crate::store::item::StoreItem;
use crate::store::kv::StoreKVActionBuilder;
use crate::store::kv::{StoreKVAcquireMode, StoreKVPool};
pub struct ExecutorCount;
impl ExecutorCount {
pub fn execute(store: StoreItem) -> Result<u32, ()> {
match store {
// Count terms in (collection, bucket, object) from KV
StoreItem(collection, Some(bucket), Some(object)) => {
// Important: acquire database access read lock, and reference it in context. This \
// prevents the database from being erased while using it in this block.
general_kv_access_lock_read!();
if let Ok(kv_store) = StoreKVPool::acquire(StoreKVAcquireMode::OpenOnly, collection)
{
// Important: acquire bucket store read lock
executor_kv_lock_read!(kv_store);
let kv_action = StoreKVActionBuilder::access(bucket, kv_store);
// Try to resolve existing OID to IID
let oid = object.as_str();
kv_action
.get_oid_to_iid(oid)
.unwrap_or(None)
.map(|iid| {
// List terms for IID
if let Some(terms) = kv_action.get_iid_to_terms(iid).unwrap_or(None) {
terms.len() as u32
} else {
0
}
})
.ok_or(())
.or(Ok(0))
} else {
Err(())
}
}
// Count terms in (collection, bucket) from FST
StoreItem(collection, Some(bucket), None) => {
// Important: acquire graph access read lock, and reference it in context. This \
// prevents the graph from being erased while using it in this block.
general_fst_access_lock_read!();
if let Ok(fst_store) = StoreFSTPool::acquire(collection, bucket) {
let fst_action = StoreFSTActionBuilder::access(fst_store);
Ok(fst_action.count_words() as u32)
} else {
Err(())
}
}
// Count buckets in (collection) from FS
StoreItem(collection, None, None) => {
StoreFSTMisc::count_collection_buckets(collection).map(|count| count as u32)
}
_ => Err(()),
}
}
}
================================================
FILE: src/executor/flushb.rs
================================================
// Sonic
//
// Fast, lightweight and schema-less search backend
// Copyright: 2019, Valerian Saliou <valerian@valeriansaliou.name>
// License: Mozilla Public License v2.0 (MPL v2.0)
use crate::store::fst::StoreFSTActionBuilder;
use crate::store::item::StoreItem;
use crate::store::kv::{StoreKVAcquireMode, StoreKVActionBuilder, StoreKVPool};
pub struct ExecutorFlushB;
impl ExecutorFlushB {
pub fn execute(store: StoreItem) -> Result<u32, ()> {
if let StoreItem(collection, Some(bucket), None) = store {
// Important: acquire database access read lock, and reference it in context. This \
// prevents the database from being erased while using it in this block.
// Notice: acquire FST lock in write mode, as we will erase it.
general_kv_access_lock_read!();
general_fst_access_lock_write!();
if let Ok(kv_store) = StoreKVPool::acquire(StoreKVAcquireMode::OpenOnly, collection) {
// Important: acquire bucket store write lock
executor_kv_lock_write!(kv_store);
if kv_store.is_some() {
// Store exists, proceed erasure.
debug!(
"collection store exists, erasing: {} from {}",
bucket.as_str(),
collection.as_str()
);
let kv_action = StoreKVActionBuilder::access(bucket, kv_store);
// Notice: we cannot use the provided KV bucket erasure helper there, as \
// erasing a bucket requires a database lock, which would incur a dead-lock, \
// thus we need to perform the erasure from there.
if let Ok(erase_count) = kv_action.batch_erase_bucket() {
if StoreFSTActionBuilder::erase(collection, Some(bucket)).is_ok() {
debug!("done with bucket erasure");
return Ok(erase_count);
}
}
} else {
// Store does not exist, consider as already erased.
debug!(
"collection store does not exist, consider {} from {} already erased",
bucket.as_str(),
collection.as_str()
);
return Ok(0);
}
}
}
Err(())
}
}
================================================
FILE: src/executor/flushc.rs
================================================
// Sonic
//
// Fast, lightweight and schema-less search backend
// Copyright: 2019, Valerian Saliou <valerian@valeriansaliou.name>
// License: Mozilla Public License v2.0 (MPL v2.0)
use crate::store::fst::StoreFSTActionBuilder;
use crate::store::item::StoreItem;
use crate::store::kv::StoreKVActionBuilder;
pub struct ExecutorFlushC;
impl ExecutorFlushC {
pub fn execute(store: StoreItem) -> Result<u32, ()> {
// Important: do not acquire the stor
gitextract_z4cxjv9t/
├── .dockerignore
├── .github/
│ ├── FUNDING.yml
│ └── workflows/
│ ├── build.yml
│ └── test.yml
├── .gitignore
├── CHANGELOG.md
├── CODE_OF_CONDUCT.md
├── CONFIGURATION.md
├── CONTRIBUTING.md
├── Cargo.toml
├── Dockerfile
├── INNER_WORKINGS.md
├── LICENSE.md
├── PACKAGING.md
├── PROTOCOL.md
├── README.md
├── config.cfg
├── debian/
│ ├── changelog
│ ├── compat
│ ├── control
│ ├── copyright
│ ├── rules
│ ├── sonic.install
│ ├── sonic.postinst
│ ├── sonic.service
│ └── source/
│ └── format
├── scripts/
│ ├── build_packages.sh
│ ├── release_binaries.sh
│ └── sign_binaries.sh
├── src/
│ ├── channel/
│ │ ├── command.rs
│ │ ├── format.rs
│ │ ├── handle.rs
│ │ ├── listen.rs
│ │ ├── macros.rs
│ │ ├── message.rs
│ │ ├── mod.rs
│ │ ├── mode.rs
│ │ └── statistics.rs
│ ├── config/
│ │ ├── defaults.rs
│ │ ├── env_var.rs
│ │ ├── logger.rs
│ │ ├── mod.rs
│ │ ├── options.rs
│ │ └── reader.rs
│ ├── executor/
│ │ ├── count.rs
│ │ ├── flushb.rs
│ │ ├── flushc.rs
│ │ ├── flusho.rs
│ │ ├── list.rs
│ │ ├── macros.rs
│ │ ├── mod.rs
│ │ ├── pop.rs
│ │ ├── push.rs
│ │ ├── search.rs
│ │ └── suggest.rs
│ ├── lexer/
│ │ ├── mod.rs
│ │ ├── ranges.rs
│ │ ├── stopwords.rs
│ │ └── token.rs
│ ├── main.rs
│ ├── query/
│ │ ├── actions.rs
│ │ ├── builder.rs
│ │ ├── mod.rs
│ │ └── types.rs
│ ├── stopwords/
│ │ ├── afr.rs
│ │ ├── aka.rs
│ │ ├── amh.rs
│ │ ├── ara.rs
│ │ ├── aze.rs
│ │ ├── bel.rs
│ │ ├── ben.rs
│ │ ├── bul.rs
│ │ ├── cat.rs
│ │ ├── ces.rs
│ │ ├── cmn.rs
│ │ ├── dan.rs
│ │ ├── deu.rs
│ │ ├── ell.rs
│ │ ├── eng.rs
│ │ ├── epo.rs
│ │ ├── est.rs
│ │ ├── fin.rs
│ │ ├── fra.rs
│ │ ├── guj.rs
│ │ ├── heb.rs
│ │ ├── hin.rs
│ │ ├── hrv.rs
│ │ ├── hun.rs
│ │ ├── hye.rs
│ │ ├── ind.rs
│ │ ├── ita.rs
│ │ ├── jav.rs
│ │ ├── jpn.rs
│ │ ├── kan.rs
│ │ ├── kat.rs
│ │ ├── khm.rs
│ │ ├── kor.rs
│ │ ├── lat.rs
│ │ ├── lav.rs
│ │ ├── lit.rs
│ │ ├── mal.rs
│ │ ├── mar.rs
│ │ ├── mkd.rs
│ │ ├── mod.rs
│ │ ├── mya.rs
│ │ ├── nep.rs
│ │ ├── nld.rs
│ │ ├── nob.rs
│ │ ├── ori.rs
│ │ ├── pan.rs
│ │ ├── pes.rs
│ │ ├── pol.rs
│ │ ├── por.rs
│ │ ├── ron.rs
│ │ ├── rus.rs
│ │ ├── sin.rs
│ │ ├── slk.rs
│ │ ├── slv.rs
│ │ ├── sna.rs
│ │ ├── spa.rs
│ │ ├── srp.rs
│ │ ├── swe.rs
│ │ ├── tam.rs
│ │ ├── tel.rs
│ │ ├── tgl.rs
│ │ ├── tha.rs
│ │ ├── tuk.rs
│ │ ├── tur.rs
│ │ ├── ukr.rs
│ │ ├── urd.rs
│ │ ├── uzb.rs
│ │ ├── vie.rs
│ │ ├── yid.rs
│ │ └── zul.rs
│ ├── store/
│ │ ├── fst.rs
│ │ ├── generic.rs
│ │ ├── identifiers.rs
│ │ ├── item.rs
│ │ ├── keyer.rs
│ │ ├── kv.rs
│ │ ├── macros.rs
│ │ ├── mod.rs
│ │ └── operation.rs
│ └── tasker/
│ ├── mod.rs
│ ├── runtime.rs
│ └── shutdown.rs
└── tests/
└── integration/
├── .gitignore
├── instance/
│ └── config.cfg
├── runner/
│ ├── package.json
│ └── runner.js
├── scenarios/
│ ├── insert.js
│ └── ping.js
└── scripts/
└── run.sh
SYMBOL INDEX (500 symbols across 40 files)
FILE: src/channel/command.rs
type ChannelCommandError (line 27) | pub enum ChannelCommandError {
method fmt (line 935) | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
type ChannelCommandResponse (line 40) | pub enum ChannelCommandResponse {
method to_args (line 98) | pub fn to_args(&self) -> ChannelCommandResponseArgs {
type ChannelCommandBase (line 51) | pub struct ChannelCommandBase;
method dispatch_ping (line 118) | pub fn dispatch_ping(mut parts: SplitWhitespace) -> ChannelResult {
method dispatch_quit (line 125) | pub fn dispatch_quit(mut parts: SplitWhitespace) -> ChannelResult {
method generic_dispatch_help (line 132) | pub fn generic_dispatch_help(
method parse_text_parts (line 163) | pub fn parse_text_parts(parts: &mut SplitWhitespace) -> Option<String> {
method parse_next_meta_parts (line 246) | pub fn parse_next_meta_parts<'a>(
method make_error_invalid_meta_key (line 291) | pub fn make_error_invalid_meta_key(meta_key: &str, meta_value: &str) -...
method make_error_invalid_meta_value (line 295) | pub fn make_error_invalid_meta_value(meta_key: &str, meta_value: &str)...
method commit_ok_operation (line 299) | pub fn commit_ok_operation(query_builder: QueryBuilderResult) -> Chann...
method commit_result_operation (line 306) | pub fn commit_result_operation(query_builder: QueryBuilderResult) -> C...
method commit_pending_operation (line 319) | pub fn commit_pending_operation(
method generate_event_id (line 348) | pub fn generate_event_id() -> String {
type ChannelCommandSearch (line 52) | pub struct ChannelCommandSearch;
method dispatch_query (line 358) | pub fn dispatch_query(mut parts: SplitWhitespace) -> ChannelResult {
method dispatch_suggest (line 434) | pub fn dispatch_suggest(mut parts: SplitWhitespace) -> ChannelResult {
method dispatch_list (line 492) | pub fn dispatch_list(mut parts: SplitWhitespace) -> ChannelResult {
method dispatch_help (line 544) | pub fn dispatch_help(parts: SplitWhitespace) -> ChannelResult {
method handle_query_meta (line 548) | fn handle_query_meta(
method handle_suggest_meta (line 597) | fn handle_suggest_meta(
method handle_list_meta (line 626) | fn handle_list_meta(meta_result: MetaPartsResult) -> Result<ListMetaDa...
type ChannelCommandIngest (line 53) | pub struct ChannelCommandIngest;
method dispatch_push (line 665) | pub fn dispatch_push(mut parts: SplitWhitespace) -> ChannelResult {
method dispatch_pop (line 714) | pub fn dispatch_pop(mut parts: SplitWhitespace) -> ChannelResult {
method dispatch_count (line 740) | pub fn dispatch_count(mut parts: SplitWhitespace) -> ChannelResult {
method dispatch_flushc (line 758) | pub fn dispatch_flushc(mut parts: SplitWhitespace) -> ChannelResult {
method dispatch_flushb (line 773) | pub fn dispatch_flushb(mut parts: SplitWhitespace) -> ChannelResult {
method dispatch_flusho (line 792) | pub fn dispatch_flusho(mut parts: SplitWhitespace) -> ChannelResult {
method dispatch_help (line 811) | pub fn dispatch_help(parts: SplitWhitespace) -> ChannelResult {
method handle_push_meta (line 815) | fn handle_push_meta(
type ChannelCommandControl (line 54) | pub struct ChannelCommandControl;
method dispatch_trigger (line 846) | pub fn dispatch_trigger(mut parts: SplitWhitespace) -> ChannelResult {
method dispatch_info (line 906) | pub fn dispatch_info(mut parts: SplitWhitespace) -> ChannelResult {
method dispatch_help (line 929) | pub fn dispatch_help(parts: SplitWhitespace) -> ChannelResult {
type ChannelCommandResponseArgs (line 56) | pub type ChannelCommandResponseArgs = (&'static str, Option<Vec<String>>);
type ChannelResult (line 58) | type ChannelResult = Result<Vec<ChannelCommandResponse>, ChannelCommandE...
type MetaPartsResult (line 59) | type MetaPartsResult<'a> = Result<(&'a str, &'a str), (&'a str, &'a str)>;
constant EVENT_ID_SIZE (line 61) | pub const EVENT_ID_SIZE: usize = 8;
constant TEXT_PART_BOUNDARY (line 63) | const TEXT_PART_BOUNDARY: char = '"';
constant TEXT_PART_ESCAPE (line 64) | const TEXT_PART_ESCAPE: char = '\\';
constant META_PART_GROUP_OPEN (line 65) | const META_PART_GROUP_OPEN: char = '(';
constant META_PART_GROUP_CLOSE (line 66) | const META_PART_GROUP_CLOSE: char = ')';
function it_matches_command_response_string (line 959) | fn it_matches_command_response_string() {
FILE: src/channel/format.rs
function unescape (line 7) | pub fn unescape(text: &str) -> String {
function it_unescapes_command_text (line 34) | fn it_unescapes_command_text() {
function bench_unescape_command_text (line 55) | fn bench_unescape_command_text(b: &mut Bencher) {
FILE: src/channel/handle.rs
type ChannelHandle (line 23) | pub struct ChannelHandle;
method client (line 69) | pub fn client(mut stream: TcpStream) {
method configure_stream (line 107) | fn configure_stream(stream: &TcpStream, is_established: bool) {
method handle_stream (line 124) | fn handle_stream(mode: ChannelMode, mut stream: TcpStream) {
method ensure_start (line 194) | fn ensure_start(mut stream: &TcpStream) -> Result<ChannelMode, Channel...
method on_message (line 253) | fn on_message(
type ChannelHandleError (line 25) | enum ChannelHandleError {
method to_str (line 53) | pub fn to_str(&self) -> &'static str {
constant LINE_END_GAP (line 37) | const LINE_END_GAP: usize = 1;
constant BUFFER_SIZE (line 38) | const BUFFER_SIZE: usize = 20000;
constant MAX_LINE_SIZE (line 39) | const MAX_LINE_SIZE: usize = BUFFER_SIZE + LINE_END_GAP + 1;
constant TCP_TIMEOUT_NON_ESTABLISHED (line 40) | const TCP_TIMEOUT_NON_ESTABLISHED: u64 = 10;
constant PROTOCOL_REVISION (line 41) | const PROTOCOL_REVISION: u8 = 1;
constant BUFFER_LINE_SEPARATOR (line 42) | const BUFFER_LINE_SEPARATOR: u8 = b'\n';
FILE: src/channel/listen.rs
type ChannelListenBuilder (line 15) | pub struct ChannelListenBuilder;
method build (line 23) | pub fn build() -> ChannelListen {
type ChannelListen (line 16) | pub struct ChannelListen;
method run (line 29) | pub fn run(&self) {
method teardown (line 64) | pub fn teardown() {
FILE: src/channel/message.rs
type ChannelMessage (line 21) | pub struct ChannelMessage;
method on (line 39) | pub fn on<M: ChannelMessageMode>(
method extract (line 153) | fn extract(message: &str) -> (String, SplitWhitespace<'_>) {
type ChannelMessageModeSearch (line 22) | pub struct ChannelMessageModeSearch;
type ChannelMessageModeIngest (line 23) | pub struct ChannelMessageModeIngest;
type ChannelMessageModeControl (line 24) | pub struct ChannelMessageModeControl;
constant COMMAND_ELAPSED_MILLIS_SLOW_WARN (line 26) | const COMMAND_ELAPSED_MILLIS_SLOW_WARN: u128 = 50;
type ChannelMessageResult (line 29) | pub enum ChannelMessageResult {
type ChannelMessageMode (line 34) | pub trait ChannelMessageMode {
method handle (line 35) | fn handle(message: &str) -> Result<Vec<ChannelCommandResponse>, Channe...
method handle (line 165) | fn handle(message: &str) -> Result<Vec<ChannelCommandResponse>, Channe...
method handle (line 176) | fn handle(message: &str) -> Result<Vec<ChannelCommandResponse>, Channe...
method handle (line 190) | fn handle(message: &str) -> Result<Vec<ChannelCommandResponse>, Channe...
FILE: src/channel/mode.rs
type ChannelMode (line 7) | pub enum ChannelMode {
method from_str (line 14) | pub fn from_str(value: &str) -> Result<Self, ()> {
method to_str (line 23) | pub fn to_str(&self) -> &'static str {
FILE: src/channel/statistics.rs
type ChannelStatistics (line 23) | pub struct ChannelStatistics {
method gather (line 46) | pub fn gather() -> ChannelStatistics {
function ensure_states (line 34) | pub fn ensure_states() {
FILE: src/config/defaults.rs
function server_log_level (line 10) | pub fn server_log_level() -> String {
function channel_inet (line 14) | pub fn channel_inet() -> SocketAddr {
function channel_tcp_timeout (line 18) | pub fn channel_tcp_timeout() -> u64 {
function channel_search_query_limit_default (line 22) | pub fn channel_search_query_limit_default() -> u16 {
function channel_search_query_limit_maximum (line 26) | pub fn channel_search_query_limit_maximum() -> u16 {
function channel_search_query_alternates_try (line 30) | pub fn channel_search_query_alternates_try() -> usize {
function channel_search_suggest_limit_default (line 34) | pub fn channel_search_suggest_limit_default() -> u16 {
function channel_search_suggest_limit_maximum (line 38) | pub fn channel_search_suggest_limit_maximum() -> u16 {
function channel_search_list_limit_default (line 42) | pub fn channel_search_list_limit_default() -> u16 {
function channel_search_list_limit_maximum (line 46) | pub fn channel_search_list_limit_maximum() -> u16 {
function store_kv_path (line 50) | pub fn store_kv_path() -> PathBuf {
function store_kv_retain_word_objects (line 54) | pub fn store_kv_retain_word_objects() -> usize {
function store_kv_pool_inactive_after (line 58) | pub fn store_kv_pool_inactive_after() -> u64 {
function store_kv_database_flush_after (line 62) | pub fn store_kv_database_flush_after() -> u64 {
function store_kv_database_compress (line 66) | pub fn store_kv_database_compress() -> bool {
function store_kv_database_parallelism (line 70) | pub fn store_kv_database_parallelism() -> u16 {
function store_kv_database_max_compactions (line 74) | pub fn store_kv_database_max_compactions() -> u16 {
function store_kv_database_max_flushes (line 78) | pub fn store_kv_database_max_flushes() -> u16 {
function store_kv_database_write_buffer (line 82) | pub fn store_kv_database_write_buffer() -> usize {
function store_kv_database_write_ahead_log (line 86) | pub fn store_kv_database_write_ahead_log() -> bool {
function store_fst_path (line 90) | pub fn store_fst_path() -> PathBuf {
function store_fst_pool_inactive_after (line 94) | pub fn store_fst_pool_inactive_after() -> u64 {
function store_fst_graph_consolidate_after (line 98) | pub fn store_fst_graph_consolidate_after() -> u64 {
function store_fst_graph_max_size (line 102) | pub fn store_fst_graph_max_size() -> usize {
function store_fst_graph_max_words (line 106) | pub fn store_fst_graph_max_words() -> usize {
FILE: src/config/env_var.rs
type WrappedString (line 13) | struct WrappedString(String);
function str (line 15) | pub fn str<'de, D>(deserializer: D) -> Result<String, D::Error>
function opt_str (line 27) | pub fn opt_str<'de, D>(deserializer: D) -> Result<Option<String>, D::Error>
function socket_addr (line 43) | pub fn socket_addr<'de, D>(deserializer: D) -> Result<SocketAddr, D::Error>
function path_buf (line 55) | pub fn path_buf<'de, D>(deserializer: D) -> Result<PathBuf, D::Error>
function is_env_var (line 67) | fn is_env_var(value: &str) -> bool {
function get_env_var (line 73) | fn get_env_var(wrapped_key: &str) -> String {
function it_checks_environment_variable_patterns (line 86) | fn it_checks_environment_variable_patterns() {
function it_gets_environment_variable (line 99) | fn it_gets_environment_variable() {
FILE: src/config/logger.rs
type ConfigLogger (line 9) | pub struct ConfigLogger;
method enabled (line 12) | fn enabled(&self, metadata: &Metadata) -> bool {
method log (line 16) | fn log(&self, record: &Record) {
method flush (line 22) | fn flush(&self) {}
method init (line 26) | pub fn init(level: LevelFilter) -> Result<(), SetLoggerError> {
FILE: src/config/options.rs
type Config (line 14) | pub struct Config {
type ConfigServer (line 21) | pub struct ConfigServer {
type ConfigChannel (line 30) | pub struct ConfigChannel {
type ConfigChannelSearch (line 47) | pub struct ConfigChannelSearch {
type ConfigStore (line 71) | pub struct ConfigStore {
type ConfigStoreKV (line 77) | pub struct ConfigStoreKV {
type ConfigStoreKVPool (line 92) | pub struct ConfigStoreKVPool {
type ConfigStoreKVDatabase (line 98) | pub struct ConfigStoreKVDatabase {
type ConfigStoreFST (line 124) | pub struct ConfigStoreFST {
type ConfigStoreFSTPool (line 136) | pub struct ConfigStoreFSTPool {
type ConfigStoreFSTGraph (line 142) | pub struct ConfigStoreFSTGraph {
FILE: src/config/reader.rs
type ConfigReader (line 13) | pub struct ConfigReader;
method make (line 16) | pub fn make() -> Config {
method validate (line 36) | fn validate(config: &Config) {
FILE: src/executor/count.rs
type ExecutorCount (line 13) | pub struct ExecutorCount;
method execute (line 16) | pub fn execute(store: StoreItem) -> Result<u32, ()> {
FILE: src/executor/flushb.rs
type ExecutorFlushB (line 11) | pub struct ExecutorFlushB;
method execute (line 14) | pub fn execute(store: StoreItem) -> Result<u32, ()> {
FILE: src/executor/flushc.rs
type ExecutorFlushC (line 11) | pub struct ExecutorFlushC;
method execute (line 14) | pub fn execute(store: StoreItem) -> Result<u32, ()> {
FILE: src/executor/flusho.rs
type ExecutorFlushO (line 10) | pub struct ExecutorFlushO;
method execute (line 13) | pub fn execute(store: StoreItem) -> Result<u32, ()> {
FILE: src/executor/list.rs
type ExecutorList (line 12) | pub struct ExecutorList;
method execute (line 15) | pub fn execute(
FILE: src/executor/pop.rs
type ExecutorPop (line 16) | pub struct ExecutorPop;
method execute (line 19) | pub fn execute<'a>(store: StoreItem<'a>, lexer: TokenLexer<'a>) -> Res...
FILE: src/executor/push.rs
type ExecutorPush (line 17) | pub struct ExecutorPush;
method execute (line 20) | pub fn execute<'a>(store: StoreItem<'a>, lexer: TokenLexer<'a>) -> Res...
FILE: src/executor/search.rs
type ExecutorSearch (line 18) | pub struct ExecutorSearch;
method execute (line 21) | pub fn execute<'a>(
FILE: src/executor/suggest.rs
type ExecutorSuggest (line 12) | pub struct ExecutorSuggest;
method execute (line 15) | pub fn execute<'a>(
FILE: src/lexer/ranges.rs
type LexerRange (line 10) | struct LexerRange;
method from (line 47) | pub fn from(text: &str) -> Option<&'static [(char, char)]> {
type LexerRegexRange (line 13) | pub struct LexerRegexRange(&'static [(char, char)]);
method from (line 79) | pub fn from(text: &str) -> Option<Self> {
method write_to (line 83) | pub fn write_to<W: fmt::Write>(&self, formatter: &mut W) -> Result<(),...
constant RANGE_LATIN (line 15) | const RANGE_LATIN: &[(char, char)] = &[('\u{0000}', '\u{024F}')];
constant RANGE_CYRILLIC (line 16) | const RANGE_CYRILLIC: &[(char, char)] = &[('\u{0400}', '\u{052F}')];
constant RANGE_ARABIC (line 17) | const RANGE_ARABIC: &[(char, char)] = &[('\u{0600}', '\u{06FF}'), ('\u{0...
constant RANGE_ARMENIAN (line 18) | const RANGE_ARMENIAN: &[(char, char)] = &[('\u{0530}', '\u{058F}')];
constant RANGE_DEVANAGARI (line 19) | const RANGE_DEVANAGARI: &[(char, char)] = &[('\u{0900}', '\u{097F}')];
constant RANGE_HIRAGANA (line 20) | const RANGE_HIRAGANA: &[(char, char)] = &[('\u{3040}', '\u{309F}')];
constant RANGE_KATAKANA (line 21) | const RANGE_KATAKANA: &[(char, char)] = &[('\u{30A0}', '\u{30FF}'), ('\u...
constant RANGE_ETHIOPIC (line 22) | const RANGE_ETHIOPIC: &[(char, char)] = &[('\u{1200}', '\u{139F}'), ('\u...
constant RANGE_HEBREW (line 23) | const RANGE_HEBREW: &[(char, char)] = &[('\u{0590}', '\u{05FF}')];
constant RANGE_BENGALI (line 24) | const RANGE_BENGALI: &[(char, char)] = &[('\u{0980}', '\u{09FF}')];
constant RANGE_GEORGIAN (line 25) | const RANGE_GEORGIAN: &[(char, char)] = &[('\u{10A0}', '\u{10FF}'), ('\u...
constant RANGE_MANDARIN (line 26) | const RANGE_MANDARIN: &[(char, char)] = &[
constant RANGE_HANGUL (line 32) | const RANGE_HANGUL: &[(char, char)] = &[('\u{1100}', '\u{11FF}'), ('\u{3...
constant RANGE_GREEK (line 33) | const RANGE_GREEK: &[(char, char)] = &[('\u{0370}', '\u{03FF}'), ('\u{1F...
constant RANGE_KANNADA (line 34) | const RANGE_KANNADA: &[(char, char)] = &[('\u{0C80}', '\u{0CFF}')];
constant RANGE_TAMIL (line 35) | const RANGE_TAMIL: &[(char, char)] = &[('\u{0B80}', '\u{0BFF}')];
constant RANGE_THAI (line 36) | const RANGE_THAI: &[(char, char)] = &[('\u{0E00}', '\u{0E7F}')];
constant RANGE_GUJARATI (line 37) | const RANGE_GUJARATI: &[(char, char)] = &[('\u{0A80}', '\u{0AFF}')];
constant RANGE_GURMUKHI (line 38) | const RANGE_GURMUKHI: &[(char, char)] = &[('\u{0A00}', '\u{0A7F}')];
constant RANGE_TELUGU (line 39) | const RANGE_TELUGU: &[(char, char)] = &[('\u{0C00}', '\u{0C7F}')];
constant RANGE_MALAYALAM (line 40) | const RANGE_MALAYALAM: &[(char, char)] = &[('\u{0D00}', '\u{0D7F}')];
constant RANGE_ORIYA (line 41) | const RANGE_ORIYA: &[(char, char)] = &[('\u{0B00}', '\u{0B7F}')];
constant RANGE_MYANMAR (line 42) | const RANGE_MYANMAR: &[(char, char)] = &[('\u{1000}', '\u{109F}')];
constant RANGE_SINHALA (line 43) | const RANGE_SINHALA: &[(char, char)] = &[('\u{0D80}', '\u{0DFF}')];
constant RANGE_KHMER (line 44) | const RANGE_KHMER: &[(char, char)] = &[('\u{1780}', '\u{17FF}'), ('\u{19...
method default (line 102) | fn default() -> Self {
function it_gives_ranges (line 112) | fn it_gives_ranges() {
function it_gives_regex_range (line 119) | fn it_gives_regex_range() {
function bench_give_ranges_latin (line 135) | fn bench_give_ranges_latin(b: &mut Bencher) {
function bench_give_ranges_mandarin (line 140) | fn bench_give_ranges_mandarin(b: &mut Bencher) {
function bench_give_ranges_cyrillic (line 145) | fn bench_give_ranges_cyrillic(b: &mut Bencher) {
function bench_give_regex_range_latin (line 150) | fn bench_give_regex_range_latin(b: &mut Bencher) {
FILE: src/lexer/stopwords.rs
type LexerStopWord (line 12) | pub struct LexerStopWord;
method is (line 116) | pub fn is(word: &str, locale: Option<Lang>) -> bool {
method guess_lang (line 128) | pub fn guess_lang(text: &str, script: Script) -> Option<Lang> {
method lang_stopwords (line 182) | fn lang_stopwords(lang: Lang) -> &'static HashSet<&'static str> {
method script_langs (line 256) | fn script_langs(script: Script) -> &'static [Lang] {
function make (line 111) | fn make<'a>(words: &[&'a str]) -> HashSet<&'a str> {
function it_detects_stopwords (line 337) | fn it_detects_stopwords() {
function it_guesses_language (line 347) | fn it_guesses_language() {
function bench_detect_stopwords_not_found (line 395) | fn bench_detect_stopwords_not_found(b: &mut Bencher) {
function bench_detect_stopwords_found (line 400) | fn bench_detect_stopwords_found(b: &mut Bencher) {
function bench_guess_language_latin (line 405) | fn bench_guess_language_latin(b: &mut Bencher) {
function bench_guess_language_mandarin (line 415) | fn bench_guess_language_mandarin(b: &mut Bencher) {
FILE: src/lexer/token.rs
type TokenLexerBuilder (line 21) | pub struct TokenLexerBuilder;
method from (line 72) | pub fn from(mode: TokenLexerMode, text: &str) -> Result<TokenLexer<'_>...
method detect_lang (line 98) | fn detect_lang(text: &str) -> Option<Lang> {
method detect_lang_slow (line 160) | fn detect_lang_slow(safe_text: &str) -> Option<Lang> {
method detect_lang_fast (line 211) | fn detect_lang_fast(safe_text: &str) -> Option<Lang> {
type TokenLexer (line 23) | pub struct TokenLexer<'a> {
type TokenLexerMode (line 31) | pub enum TokenLexerMode {
method from_query_lang (line 276) | pub fn from_query_lang(lang: Option<QueryGenericLang>) -> TokenLexerMo...
type TokenLexerWords (line 36) | enum TokenLexerWords<'a> {
constant TEXT_LANG_TRUNCATE_OVER_CHARS (line 46) | const TEXT_LANG_TRUNCATE_OVER_CHARS: usize = 200;
constant TEXT_LANG_DETECT_PROCEED_OVER_CHARS (line 47) | const TEXT_LANG_DETECT_PROCEED_OVER_CHARS: usize = 20;
constant TEXT_LANG_DETECT_NGRAM_UNDER_CHARS (line 48) | const TEXT_LANG_DETECT_NGRAM_UNDER_CHARS: usize = 60;
function new (line 249) | fn new(mode: TokenLexerMode, text: &'a str, locale: Option<Lang>) -> Tok...
type Item (line 295) | type Item = (String, StoreTermHashed);
method next (line 302) | fn next(&mut self) -> Option<Self::Item> {
type Item (line 343) | type Item = &'a str;
method next (line 345) | fn next(&mut self) -> Option<Self::Item> {
function it_cleans_token_english (line 366) | fn it_cleans_token_english() {
function it_cleans_token_french (line 390) | fn it_cleans_token_french() {
function it_cleans_token_chinese_jieba (line 420) | fn it_cleans_token_chinese_jieba() {
function it_cleans_token_chinese_naive (line 436) | fn it_cleans_token_chinese_naive() {
function it_cleans_token_japanese_lindera_product (line 454) | fn it_cleans_token_japanese_lindera_product() {
function it_cleans_token_japanese_lindera_food (line 479) | fn it_cleans_token_japanese_lindera_food() {
function it_cleans_token_japanese_lindera_sentence (line 494) | fn it_cleans_token_japanese_lindera_sentence() {
function it_cleans_token_emojis (line 517) | fn it_cleans_token_emojis() {
function it_cleans_token_lang_hinted (line 527) | fn it_cleans_token_lang_hinted() {
function it_detects_lang_english_regular (line 553) | fn it_detects_lang_english_regular() {
function it_detects_lang_english_long (line 561) | fn it_detects_lang_english_long() {
function it_doesnt_detect_lang_english_tiny (line 575) | fn it_doesnt_detect_lang_english_tiny() {
function bench_normalize_token_french_build (line 588) | fn bench_normalize_token_french_build(b: &mut Bencher) {
function bench_normalize_token_french_exhaust (line 598) | fn bench_normalize_token_french_exhaust(b: &mut Bencher) {
function bench_clean_token_english_regular_build (line 611) | fn bench_clean_token_english_regular_build(b: &mut Bencher) {
function bench_clean_token_english_regular_exhaust (line 621) | fn bench_clean_token_english_regular_exhaust(b: &mut Bencher) {
function bench_clean_token_english_long_exhaust (line 634) | fn bench_clean_token_english_long_exhaust(b: &mut Bencher) {
function bench_clean_token_english_hinted_build (line 651) | fn bench_clean_token_english_hinted_build(b: &mut Bencher) {
function bench_clean_token_english_hinted_exhaust (line 661) | fn bench_clean_token_english_hinted_exhaust(b: &mut Bencher) {
function bench_clean_token_chinese_build (line 674) | fn bench_clean_token_chinese_build(b: &mut Bencher) {
function bench_clean_token_chinese_exhaust (line 684) | fn bench_clean_token_chinese_exhaust(b: &mut Bencher) {
function bench_clean_token_japanese_build (line 697) | fn bench_clean_token_japanese_build(b: &mut Bencher) {
function bench_clean_token_japanese_exhaust (line 707) | fn bench_clean_token_japanese_exhaust(b: &mut Bencher) {
function bench_detect_lang_english_short (line 720) | fn bench_detect_lang_english_short(b: &mut Bencher) {
function bench_detect_lang_english_regular (line 725) | fn bench_detect_lang_english_regular(b: &mut Bencher) {
function bench_detect_lang_english_long (line 730) | fn bench_detect_lang_english_long(b: &mut Bencher) {
function bench_dont_detect_lang_english_tiny (line 743) | fn bench_dont_detect_lang_english_tiny(b: &mut Bencher) {
FILE: src/main.rs
type AppArgs (line 44) | struct AppArgs {
function make_app_args (line 101) | fn make_app_args() -> AppArgs {
function ensure_states (line 122) | fn ensure_states() {
function main (line 130) | fn main() {
FILE: src/query/actions.rs
type Query (line 11) | pub enum Query<'a> {
FILE: src/query/builder.rs
type QueryBuilder (line 12) | pub struct QueryBuilder;
method search (line 17) | pub fn search<'a>(
method suggest (line 37) | pub fn suggest<'a>(
method list (line 53) | pub fn list<'a>(
method push (line 66) | pub fn push<'a>(
method pop (line 82) | pub fn pop<'a>(
method count (line 97) | pub fn count<'a>(
method flushc (line 116) | pub fn flushc(collection: &str) -> QueryBuilderResult<'_> {
method flushb (line 123) | pub fn flushb<'a>(collection: &'a str, bucket: &'a str) -> QueryBuilde...
method flusho (line 130) | pub fn flusho<'a>(
type QueryBuilderResult (line 14) | pub type QueryBuilderResult<'a> = Result<Query<'a>, ()>;
function it_builds_search_query (line 147) | fn it_builds_search_query() {
function it_builds_suggest_query (line 156) | fn it_builds_suggest_query() {
function it_builds_list_query (line 162) | fn it_builds_list_query() {
function it_builds_push_query (line 168) | fn it_builds_push_query() {
function it_builds_pop_query (line 184) | fn it_builds_pop_query() {
function it_builds_count_query (line 190) | fn it_builds_count_query() {
function it_builds_flushc_query (line 198) | fn it_builds_flushc_query() {
function it_builds_flushb_query (line 204) | fn it_builds_flushb_query() {
function it_builds_flusho_query (line 210) | fn it_builds_flusho_query() {
FILE: src/query/types.rs
type QueryGenericLang (line 10) | pub enum QueryGenericLang {
method from_value (line 28) | pub fn from_value(value: &str) -> Option<QueryGenericLang> {
type QuerySearchID (line 15) | pub type QuerySearchID<'a> = &'a str;
type QuerySearchLimit (line 16) | pub type QuerySearchLimit = u16;
type QuerySearchOffset (line 17) | pub type QuerySearchOffset = u32;
type QueryMetaData (line 19) | pub type QueryMetaData = (
type ListMetaData (line 25) | pub type ListMetaData = (Option<QuerySearchLimit>, Option<QuerySearchOff...
function it_parses_generic_lang_from_value (line 42) | fn it_parses_generic_lang_from_value() {
FILE: src/store/fst.rs
type StoreFSTPool (line 36) | pub struct StoreFSTPool;
method count (line 101) | pub fn count() -> (usize, usize) {
method acquire (line 108) | pub fn acquire<'a, T: Into<&'a str>>(collection: T, bucket: T) -> Resu...
method janitor (line 136) | pub fn janitor() {
method backup (line 145) | pub fn backup(path: &Path) -> Result<(), io::Error> {
method restore (line 161) | pub fn restore(path: &Path) -> Result<(), io::Error> {
method consolidate (line 174) | pub fn consolidate(force: bool) {
method dump_action (line 322) | fn dump_action(
method backup_item (line 380) | fn backup_item(
method restore_item (line 446) | fn restore_item(
method consolidate_item (line 517) | fn consolidate_item(store: &StoreFSTBox) -> (bool, usize, usize, usize) {
type StoreFSTBuilder (line 37) | pub struct StoreFSTBuilder;
method open (line 738) | fn open(collection_hash: StoreFSTAtom, bucket_hash: StoreFSTAtom) -> R...
method close (line 765) | fn close(collection_hash: StoreFSTAtom, bucket_hash: StoreFSTAtom) {
method path (line 777) | fn path(
method build (line 797) | fn build(pool_key: StoreFSTKey) -> Result<StoreFST, ()> {
type StoreFST (line 39) | pub struct StoreFST {
method cardinality (line 817) | pub fn cardinality(&self) -> usize {
method as_stream (line 821) | pub fn as_stream(&self) -> FSTStream<'_, AlwaysMatch> {
method lookup_begins (line 825) | pub fn lookup_begins(&self, word: &str) -> Result<FSTStream<'_, Regex>...
method lookup_typos (line 864) | pub fn lookup_typos(
method should_consolidate (line 896) | pub fn should_consolidate(&self) {
type StoreFSTPending (line 48) | pub struct StoreFSTPending {
type StoreFSTActionBuilder (line 53) | pub struct StoreFSTActionBuilder;
method access (line 928) | pub fn access(store: StoreFSTBox) -> StoreFSTAction {
method erase (line 932) | pub fn erase<'a, T: Into<&'a str>>(collection: T, bucket: Option<T>) -...
method build (line 936) | fn build(store: StoreFSTBox) -> StoreFSTAction {
type StoreFSTAction (line 55) | pub struct StoreFSTAction {
method push_word (line 1064) | pub fn push_word(&self, word: &str) -> bool {
method pop_word (line 1104) | pub fn pop_word(&self, word: &str) -> bool {
method suggest_words (line 1138) | pub fn suggest_words(
method list_words (line 1174) | pub fn list_words(&self, limit: usize, offset: usize) -> Result<Vec<St...
method count_words (line 1190) | pub fn count_words(&self) -> usize {
method word_over_limit (line 1194) | fn word_over_limit(word: &str) -> bool {
method find_words_stream (line 1204) | fn find_words_stream<A: Automaton>(
type StoreFSTKey (line 60) | pub struct StoreFSTKey {
method from_atom (line 1291) | pub fn from_atom(collection_hash: StoreFSTAtom, bucket_hash: StoreFSTA...
method from_str (line 1298) | pub fn from_str(collection_str: &str, bucket_str: &str) -> StoreFSTKey {
method fmt (line 1307) | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
type StoreFSTMisc (line 65) | pub struct StoreFSTMisc;
method count_collection_buckets (line 1227) | pub fn count_collection_buckets<'a, T: Into<&'a str>>(collection: T) -...
method check_over_limits (line 1262) | fn check_over_limits(bytes_count: usize, words_count: usize) -> bool {
type StoreFSTPathMode (line 68) | enum StoreFSTPathMode {
method extension (line 91) | fn extension(&self) -> &'static str {
type StoreFSTAtom (line 74) | type StoreFSTAtom = u32;
type StoreFSTBox (line 75) | type StoreFSTBox = Arc<StoreFST>;
constant WORD_LIMIT_LENGTH (line 77) | const WORD_LIMIT_LENGTH: usize = 40;
constant ATOM_HASH_RADIX (line 78) | const ATOM_HASH_RADIX: usize = 16;
method ref_last_used (line 922) | fn ref_last_used(&self) -> &RwLock<SystemTime> {
method proceed_erase_collection (line 942) | fn proceed_erase_collection(collection_str: &str) -> Result<u32, ()> {
method proceed_erase_bucket (line 1015) | fn proceed_erase_bucket(collection_str: &str, bucket_str: &str) -> Resul...
function it_acquires_graph (line 1317) | fn it_acquires_graph() {
function it_janitors_graph (line 1322) | fn it_janitors_graph() {
function it_proceeds_primitives (line 1327) | fn it_proceeds_primitives() {
FILE: src/store/generic.rs
type StoreGeneric (line 14) | pub trait StoreGeneric {
method ref_last_used (line 15) | fn ref_last_used(&self) -> &RwLock<SystemTime>;
type StoreGenericPool (line 18) | pub trait StoreGenericPool<
method proceed_acquire_cache (line 24) | fn proceed_acquire_cache(
method proceed_acquire_open (line 46) | fn proceed_acquire_open(
method proceed_janitor (line 78) | fn proceed_janitor(
type StoreGenericBuilder (line 147) | pub trait StoreGenericBuilder<K, S> {
method build (line 148) | fn build(pool_key: K) -> Result<S, ()>;
type StoreGenericActionBuilder (line 151) | pub trait StoreGenericActionBuilder {
method proceed_erase_collection (line 152) | fn proceed_erase_collection(collection_str: &str) -> Result<u32, ()>;
method proceed_erase_bucket (line 154) | fn proceed_erase_bucket(collection_str: &str, bucket_str: &str) -> Res...
method dispatch_erase (line 156) | fn dispatch_erase<'a, T: Into<&'a str>>(
FILE: src/store/identifiers.rs
type StoreObjectIID (line 10) | pub type StoreObjectIID = u32;
type StoreObjectOID (line 11) | pub type StoreObjectOID<'a> = &'a str;
type StoreTermHashed (line 12) | pub type StoreTermHashed = u32;
type StoreTermHash (line 14) | pub struct StoreTermHash;
method from (line 33) | pub fn from(term: &str) -> StoreTermHashed {
type StoreMetaKey (line 16) | pub enum StoreMetaKey {
method as_u32 (line 25) | pub fn as_u32(&self) -> u32 {
type StoreMetaValue (line 20) | pub enum StoreMetaValue {
function it_converts_meta_key_to_u32 (line 47) | fn it_converts_meta_key_to_u32() {
function it_hashes_term (line 52) | fn it_hashes_term() {
FILE: src/store/item.rs
type StoreItemBuilder (line 7) | pub struct StoreItemBuilder;
method from_depth_1 (line 57) | pub fn from_depth_1(collection: &str) -> Result<StoreItem<'_>, StoreIt...
method from_depth_2 (line 66) | pub fn from_depth_2<'a>(
method from_depth_3 (line 83) | pub fn from_depth_3<'a>(
type StoreItem (line 10) | pub struct StoreItem<'a>(
type StoreItemPart (line 17) | pub struct StoreItemPart<'a>(&'a str);
type StoreItemError (line 22) | pub enum StoreItemError {
constant STORE_ITEM_PART_LEN_MIN (line 28) | const STORE_ITEM_PART_LEN_MIN: usize = 0;
constant STORE_ITEM_PART_LEN_MAX (line 29) | const STORE_ITEM_PART_LEN_MAX: usize = 128;
function from_str (line 32) | pub fn from_str(part: &'a str) -> Result<Self, ()> {
function as_str (line 45) | pub fn as_str(&self) -> &'a str {
function from (line 51) | fn from(part: StoreItemPart<'a>) -> Self {
function it_builds_store_item_depth_1 (line 111) | fn it_builds_store_item_depth_1() {
function it_builds_store_item_depth_2 (line 123) | fn it_builds_store_item_depth_2() {
function it_builds_store_item_depth_3 (line 143) | fn it_builds_store_item_depth_3() {
FILE: src/store/keyer.rs
type StoreKeyerBuilder (line 15) | pub struct StoreKeyerBuilder;
method meta_to_value (line 47) | pub fn meta_to_value<'a>(bucket: &'a str, meta: &'a StoreMetaKey) -> S...
method term_to_iids (line 51) | pub fn term_to_iids(bucket: &str, term_hash: StoreTermHashed) -> Store...
method oid_to_iid (line 55) | pub fn oid_to_iid<'a>(bucket: &'a str, oid: StoreObjectOID<'a>) -> Sto...
method iid_to_oid (line 59) | pub fn iid_to_oid(bucket: &str, iid: StoreObjectIID) -> StoreKeyer {
method iid_to_terms (line 63) | pub fn iid_to_terms(bucket: &str, iid: StoreObjectIID) -> StoreKeyer {
method make (line 67) | fn make<'a>(idx: StoreKeyerIdx<'a>, bucket: &'a str) -> StoreKeyer {
method build_key (line 73) | fn build_key<'a>(idx: StoreKeyerIdx<'a>, bucket: &'a str) -> StoreKeye...
method route_to_compact (line 99) | fn route_to_compact(idx: &StoreKeyerIdx) -> u32 {
type StoreKeyer (line 17) | pub struct StoreKeyer {
method as_bytes (line 111) | pub fn as_bytes(&self) -> StoreKeyerKey {
method as_prefix (line 115) | pub fn as_prefix(&self) -> StoreKeyerPrefix {
method fmt (line 139) | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
type StoreKeyerHasher (line 21) | pub struct StoreKeyerHasher;
method to_compact (line 130) | pub fn to_compact(part: &str) -> u32 {
type StoreKeyerIdx (line 23) | enum StoreKeyerIdx<'a> {
type StoreKeyerKey (line 31) | pub type StoreKeyerKey = [u8; 9];
type StoreKeyerPrefix (line 32) | pub type StoreKeyerPrefix = [u8; 5];
function to_index (line 35) | pub fn to_index(&self) -> u8 {
function it_keys_meta_to_value (line 163) | fn it_keys_meta_to_value() {
function it_keys_term_to_iids (line 171) | fn it_keys_term_to_iids() {
function it_keys_oid_to_iid (line 183) | fn it_keys_oid_to_iid() {
function it_keys_iid_to_oid (line 192) | fn it_keys_iid_to_oid() {
function it_keys_iid_to_terms (line 200) | fn it_keys_iid_to_terms() {
function it_hashes_compact (line 212) | fn it_hashes_compact() {
function it_formats_key (line 218) | fn it_formats_key() {
function bench_hash_compact_short (line 241) | fn bench_hash_compact_short(b: &mut Bencher) {
function bench_hash_compact_long (line 246) | fn bench_hash_compact_long(b: &mut Bencher) {
function bench_key_meta_to_value (line 255) | fn bench_key_meta_to_value(b: &mut Bencher) {
function bench_key_term_to_iids (line 260) | fn bench_key_term_to_iids(b: &mut Bencher) {
function bench_key_oid_to_iid (line 265) | fn bench_key_oid_to_iid(b: &mut Bencher) {
function bench_key_iid_to_oid (line 272) | fn bench_key_iid_to_oid(b: &mut Bencher) {
function bench_key_iid_to_terms (line 277) | fn bench_key_iid_to_terms(b: &mut Bencher) {
FILE: src/store/kv.rs
type StoreKVPool (line 36) | pub struct StoreKVPool;
method count (line 78) | pub fn count() -> usize {
method acquire (line 82) | pub fn acquire<'a, T: Into<&'a str>>(
method janitor (line 126) | pub fn janitor() {
method backup (line 135) | pub fn backup(path: &Path) -> Result<(), io::Error> {
method restore (line 145) | pub fn restore(path: &Path) -> Result<(), io::Error> {
method flush (line 157) | pub fn flush(force: bool) {
method dump_action (line 255) | fn dump_action(
method backup_item (line 280) | fn backup_item(
method restore_item (line 337) | fn restore_item(
type StoreKVBuilder (line 37) | pub struct StoreKVBuilder;
method open (line 397) | fn open(collection_hash: StoreKVAtom) -> Result<DB, DBError> {
method close (line 410) | fn close(collection_hash: StoreKVAtom) {
method path (line 423) | fn path(collection_hash: StoreKVAtom) -> PathBuf {
method configure (line 431) | fn configure() -> DBOptions {
method build (line 470) | fn build(pool_key: StoreKVKey) -> Result<StoreKV, ()> {
type StoreKV (line 39) | pub struct StoreKV {
method get (line 489) | pub fn get(&self, key: &[u8]) -> Result<Option<Vec<u8>>, DBError> {
method put (line 493) | pub fn put(&self, key: &[u8], data: &[u8]) -> Result<(), DBError> {
method delete (line 501) | pub fn delete(&self, key: &[u8]) -> Result<(), DBError> {
method flush (line 509) | fn flush(&self) -> Result<(), DBError> {
method do_write (line 519) | fn do_write(&self, batch: WriteBatch) -> Result<(), DBError> {
type StoreKVActionBuilder (line 46) | pub struct StoreKVActionBuilder;
method access (line 546) | pub fn access(bucket: StoreItemPart, store: Option<StoreKVBox>) -> Sto...
method erase (line 550) | pub fn erase<'a, T: Into<&'a str>>(collection: T, bucket: Option<T>) -...
method build (line 554) | fn build(bucket: StoreItemPart, store: Option<StoreKVBox>) -> StoreKVA...
type StoreKVAction (line 48) | pub struct StoreKVAction<'a> {
type StoreKVKey (line 54) | pub struct StoreKVKey {
method from_atom (line 1171) | pub fn from_atom(collection_hash: StoreKVAtom) -> StoreKVKey {
method from_str (line 1175) | pub fn from_str(collection_str: &str) -> StoreKVKey {
method fmt (line 1183) | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
type StoreKVAcquireMode (line 59) | pub enum StoreKVAcquireMode {
type StoreKVAtom (line 64) | type StoreKVAtom = u32;
type StoreKVBox (line 65) | type StoreKVBox = Arc<StoreKV>;
constant ATOM_HASH_RADIX (line 67) | const ATOM_HASH_RADIX: usize = 16;
method ref_last_used (line 540) | fn ref_last_used(&self) -> &RwLock<SystemTime> {
method proceed_erase_collection (line 560) | fn proceed_erase_collection(collection_str: &str) -> Result<u32, ()> {
method proceed_erase_bucket (line 593) | fn proceed_erase_bucket(_collection: &str, _bucket: &str) -> Result<u32,...
function get_meta_to_value (line 604) | pub fn get_meta_to_value(&self, meta: StoreMetaKey) -> Result<Option<Sto...
function set_meta_to_value (line 645) | pub fn set_meta_to_value(&self, meta: StoreMetaKey, value: StoreMetaValu...
function get_term_to_iids (line 666) | pub fn get_term_to_iids(
function set_term_to_iids (line 712) | pub fn set_term_to_iids(
function delete_term_to_iids (line 736) | pub fn delete_term_to_iids(&self, term_hashed: StoreTermHashed) -> Resul...
function get_oid_to_iid (line 751) | pub fn get_oid_to_iid(&self, oid: StoreObjectOID<'a>) -> Result<Option<S...
function set_oid_to_iid (line 792) | pub fn set_oid_to_iid(&self, oid: StoreObjectOID<'a>, iid: StoreObjectII...
function delete_oid_to_iid (line 812) | pub fn delete_oid_to_iid(&self, oid: StoreObjectOID<'a>) -> Result<(), (...
function get_iid_to_oid (line 827) | pub fn get_iid_to_oid(&self, iid: StoreObjectIID) -> Result<Option<Strin...
function set_iid_to_oid (line 843) | pub fn set_iid_to_oid(&self, iid: StoreObjectIID, oid: StoreObjectOID<'a...
function delete_iid_to_oid (line 855) | pub fn delete_iid_to_oid(&self, iid: StoreObjectIID) -> Result<(), ()> {
function get_iid_to_terms (line 870) | pub fn get_iid_to_terms(
function set_iid_to_terms (line 909) | pub fn set_iid_to_terms(
function delete_iid_to_terms (line 935) | pub fn delete_iid_to_terms(&self, iid: StoreObjectIID) -> Result<(), ()> {
function batch_flush_bucket (line 947) | pub fn batch_flush_bucket(
function batch_truncate_object (line 995) | pub fn batch_truncate_object(
function batch_erase_bucket (line 1041) | pub fn batch_erase_bucket(&self) -> Result<u32, ()> {
function encode_u32 (line 1129) | fn encode_u32(decoded: u32) -> [u8; 4] {
function decode_u32 (line 1137) | fn decode_u32(encoded: &[u8]) -> Result<u32, ()> {
function encode_u32_list (line 1141) | fn encode_u32_list(decoded: &[u32]) -> Vec<u8> {
function decode_u32_list (line 1153) | fn decode_u32_list(encoded: &[u8]) -> Result<Vec<u32>, ()> {
function it_acquires_database (line 1193) | fn it_acquires_database() {
function it_janitors_database (line 1198) | fn it_janitors_database() {
function it_proceeds_primitives (line 1203) | fn it_proceeds_primitives() {
function it_proceeds_actions (line 1214) | fn it_proceeds_actions() {
function it_encodes_atom (line 1242) | fn it_encodes_atom() {
function it_decodes_atom (line 1249) | fn it_decodes_atom() {
function it_encodes_atom_list (line 1256) | fn it_encodes_atom_list() {
function it_decodes_atom_list (line 1265) | fn it_decodes_atom_list() {
function bench_encode_atom (line 1285) | fn bench_encode_atom(b: &mut Bencher) {
function bench_decode_atom (line 1290) | fn bench_decode_atom(b: &mut Bencher) {
function bench_encode_atom_list (line 1297) | fn bench_encode_atom_list(b: &mut Bencher) {
function bench_decode_atom_list (line 1304) | fn bench_decode_atom_list(b: &mut Bencher) {
FILE: src/store/operation.rs
type StoreOperationDispatch (line 18) | pub struct StoreOperationDispatch;
method dispatch (line 21) | pub fn dispatch(query: Query) -> Result<Option<String>, ()> {
FILE: src/tasker/runtime.rs
type TaskerBuilder (line 13) | pub struct TaskerBuilder;
method build (line 19) | pub fn build() -> Tasker {
type Tasker (line 14) | pub struct Tasker;
method run (line 25) | pub fn run(&self) {
method tick (line 48) | fn tick() {
constant TASKER_TICK_INTERVAL (line 16) | const TASKER_TICK_INTERVAL: Duration = Duration::from_secs(10);
FILE: src/tasker/shutdown.rs
function handler (line 27) | unsafe extern "system" fn handler(event: DWORD) -> BOOL {
type ShutdownSignal (line 34) | pub struct ShutdownSignal;
method new (line 37) | pub fn new() -> ShutdownSignal {
method at_exit (line 43) | pub fn at_exit<F: FnOnce(usize)>(&self, handler: F) {
method new (line 67) | pub fn new() -> ShutdownSignal {
method init (line 74) | fn init(mask: &mut SigSet) -> nix::Result<()> {
method at_exit (line 82) | pub fn at_exit<F: FnOnce(usize)>(&self, handler: F) {
type ShutdownSignal (line 64) | pub struct ShutdownSignal(SigSet);
method new (line 37) | pub fn new() -> ShutdownSignal {
method at_exit (line 43) | pub fn at_exit<F: FnOnce(usize)>(&self, handler: F) {
method new (line 67) | pub fn new() -> ShutdownSignal {
method init (line 74) | fn init(mask: &mut SigSet) -> nix::Result<()> {
method at_exit (line 82) | pub fn at_exit<F: FnOnce(usize)>(&self, handler: F) {
FILE: tests/integration/runner/runner.js
function connect (line 9) | function connect(channel, name) {
function main (line 43) | async function main(scenario) {
function wrapper (line 69) | function wrapper(name, scenario, timeout) {
FILE: tests/integration/scenarios/insert.js
function run (line 22) | async function run(search, ingest) {
FILE: tests/integration/scenarios/ping.js
function run (line 7) | async function run(search) {
Condensed preview — 153 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (816K chars).
[
{
"path": ".dockerignore",
"chars": 24,
"preview": "tests/*\ntarget/*\ndata/*\n"
},
{
"path": ".github/FUNDING.yml",
"chars": 70,
"preview": "# These are supported funding model platforms\n\ngithub: valeriansaliou\n"
},
{
"path": ".github/workflows/build.yml",
"chars": 2723,
"preview": "on:\n push:\n tags:\n - \"v*.*.*\"\n\nname: Build and Release\n\njobs:\n build-releases:\n runs-on: ubuntu-22.04\n\n "
},
{
"path": ".github/workflows/test.yml",
"chars": 1457,
"preview": "on: [push, pull_request]\n\nname: Test and Build\n\njobs:\n test:\n strategy:\n matrix:\n os: [ubuntu-latest]\n "
},
{
"path": ".gitignore",
"chars": 66,
"preview": "target/*\n.DS_Store\n*~\n*#\n.cargo\n\ndata/store/fst/*\ndata/store/kv/*\n"
},
{
"path": "CHANGELOG.md",
"chars": 18030,
"preview": "Sonic Changelog\n===============\n\n## 1.4.9 (2024-06-16)\n\n### Changes\n\n* Update Rust code style to conform to new `rustc` "
},
{
"path": "CODE_OF_CONDUCT.md",
"chars": 3360,
"preview": "# Contributor Covenant Code of Conduct\n\n## Our Pledge\n\nIn the interest of fostering an open and welcoming environment, w"
},
{
"path": "CONFIGURATION.md",
"chars": 6146,
"preview": "Sonic Configuration\n===================\n\n# File: config.cfg\n\n**All available configuration options are commented below, "
},
{
"path": "CONTRIBUTING.md",
"chars": 1724,
"preview": "Sonic Contributing Guide\n========================\n\n# Get Started\n\n- First of all, fork and clone this repo;\n- Install Ru"
},
{
"path": "Cargo.toml",
"chars": 1990,
"preview": "[package]\nname = \"sonic-server\"\nversion = \"1.4.9\"\ndescription = \"Fast, lightweight and schema-less search backend.\"\nread"
},
{
"path": "Dockerfile",
"chars": 497,
"preview": "FROM rust:slim-bullseye AS build\n\nRUN apt-get update\nRUN apt-get install -y build-essential clang\n\nRUN rustup --version\n"
},
{
"path": "INNER_WORKINGS.md",
"chars": 18962,
"preview": "Sonic Inner Workings\n====================\n\nThis document was written with the goal of explaining the inner workings of S"
},
{
"path": "LICENSE.md",
"chars": 15585,
"preview": "Mozilla Public License Version 2.0\n==================================\n\n1. Definitions\n--------------\n\n1.1. \"Contributor\""
},
{
"path": "PACKAGING.md",
"chars": 1014,
"preview": "Packaging\n=========\n\nThis file contains quick reminders and notes on how to package Sonic.\n\nWe consider here the packagi"
},
{
"path": "PROTOCOL.md",
"chars": 11591,
"preview": "Sonic Protocol\n==============\n\n# ⚡️ Sonic Channel\n\n**Sonic Channel is the protocol used to perform searches and ingest i"
},
{
"path": "README.md",
"chars": 21790,
"preview": "Sonic\n=====\n\n[ UNRELEASED; urgency=medium\n\n * Initial release.\n\n -- Valerian Saliou <valerian@valeriansaliou.name> Tu"
},
{
"path": "debian/compat",
"chars": 3,
"preview": "10\n"
},
{
"path": "debian/control",
"chars": 403,
"preview": "Source: sonic\nSection: net\nPriority: ext\nMaintainer: Valerian Saliou <valerian@valeriansaliou.name>\nStandards-Version: 3"
},
{
"path": "debian/copyright",
"chars": 477,
"preview": "Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/\nUpstream-Name: sonic\nUpstream-Contact: Valeria"
},
{
"path": "debian/rules",
"chars": 973,
"preview": "#!/usr/bin/make -f\n\nDISTRIBUTION = $(shell lsb_release -sr)\nVERSION = 1.4.9\nPACKAGEVERSION = $(VERSION)-0~$(DISTRIBUTION"
},
{
"path": "debian/sonic.install",
"chars": 70,
"preview": "sonic/sonic usr/bin/\nsonic/sonic.cfg etc/\nsonic/store/ var/lib/sonic/\n"
},
{
"path": "debian/sonic.postinst",
"chars": 271,
"preview": "#!/bin/sh\n\nset -e\n\ncase \"$1\" in\n configure)\n adduser --system --disabled-password --disabled-login --home /var"
},
{
"path": "debian/sonic.service",
"chars": 226,
"preview": "[Unit]\nDescription=Sonic Search Index\nAfter=network.target\n\n[Service]\nType=simple\nUser=sonic\nGroup=sonic\nExecStart=/usr/"
},
{
"path": "debian/source/format",
"chars": 12,
"preview": "3.0 (quilt)\n"
},
{
"path": "scripts/build_packages.sh",
"chars": 1277,
"preview": "#!/bin/bash\n\n##\n# Sonic\n#\n# Fast, lightweight and schema-less search backend\n# Copyright: 2023, Valerian Saliou <vale"
},
{
"path": "scripts/release_binaries.sh",
"chars": 1880,
"preview": "#!/bin/bash\n\n##\n# Sonic\n#\n# Fast, lightweight and schema-less search backend\n# Copyright: 2023, Valerian Saliou <vale"
},
{
"path": "scripts/sign_binaries.sh",
"chars": 1649,
"preview": "#!/bin/bash\n\n##\n# Sonic\n#\n# Fast, lightweight and schema-less search backend\n# Copyright: 2023, Valerian Saliou <vale"
},
{
"path": "src/channel/command.rs",
"chars": 38555,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/channel/format.rs",
"chars": 1663,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/channel/handle.rs",
"chars": 9895,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/channel/listen.rs",
"chars": 2048,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/channel/macros.rs",
"chars": 1118,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/channel/message.rs",
"chars": 7510,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/channel/mod.rs",
"chars": 306,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/channel/mode.rs",
"chars": 754,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/channel/statistics.rs",
"chars": 1954,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/config/defaults.rs",
"chars": 1805,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/config/env_var.rs",
"chars": 2761,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/config/logger.rs",
"chars": 770,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/config/mod.rs",
"chars": 260,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/config/options.rs",
"chars": 3957,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/config/reader.rs",
"chars": 1560,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/executor/count.rs",
"chars": 2902,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/executor/flushb.rs",
"chars": 2506,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/executor/flushc.rs",
"chars": 1248,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/executor/flusho.rs",
"chars": 2501,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/executor/list.rs",
"chars": 1215,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2022, Troy Kohler <troy.kohler@zalando.de>"
},
{
"path": "src/executor/macros.rs",
"chars": 1700,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/executor/mod.rs",
"chars": 346,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/executor/pop.rs",
"chars": 7415,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/executor/push.rs",
"chars": 7304,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/executor/search.rs",
"chars": 8572,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/executor/suggest.rs",
"chars": 1373,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/lexer/mod.rs",
"chars": 230,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/lexer/ranges.rs",
"chars": 5356,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/lexer/stopwords.rs",
"chars": 16841,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/lexer/token.rs",
"chars": 26510,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/main.rs",
"chars": 4526,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/query/actions.rs",
"chars": 874,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/query/builder.rs",
"chars": 6776,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/query/mod.rs",
"chars": 232,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/query/types.rs",
"chars": 1302,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/afr.rs",
"chars": 586,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/aka.rs",
"chars": 283,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/amh.rs",
"chars": 283,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/ara.rs",
"chars": 5993,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/aze.rs",
"chars": 2233,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/bel.rs",
"chars": 283,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/ben.rs",
"chars": 5091,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/bul.rs",
"chars": 7595,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/cat.rs",
"chars": 9720,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2020, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/ces.rs",
"chars": 7263,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/cmn.rs",
"chars": 11123,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/dan.rs",
"chars": 1591,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/deu.rs",
"chars": 8473,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/ell.rs",
"chars": 3219,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/eng.rs",
"chars": 16981,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/epo.rs",
"chars": 2261,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/est.rs",
"chars": 489,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/fin.rs",
"chars": 12326,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/fra.rs",
"chars": 9498,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/guj.rs",
"chars": 2842,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/heb.rs",
"chars": 2508,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/hin.rs",
"chars": 2838,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/hrv.rs",
"chars": 1663,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/hun.rs",
"chars": 17123,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/hye.rs",
"chars": 711,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2022, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/ind.rs",
"chars": 11976,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/ita.rs",
"chars": 9117,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/jav.rs",
"chars": 283,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/jpn.rs",
"chars": 1588,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/kan.rs",
"chars": 1296,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/kat.rs",
"chars": 6565,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/khm.rs",
"chars": 3024,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/kor.rs",
"chars": 7707,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/lat.rs",
"chars": 655,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/lav.rs",
"chars": 2241,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/lit.rs",
"chars": 6666,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/mal.rs",
"chars": 1929,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/mar.rs",
"chars": 1406,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/mkd.rs",
"chars": 283,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/mod.rs",
"chars": 1180,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/mya.rs",
"chars": 4938,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/nep.rs",
"chars": 3385,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/nld.rs",
"chars": 5772,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/nob.rs",
"chars": 2291,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/ori.rs",
"chars": 283,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/pan.rs",
"chars": 5820,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/pes.rs",
"chars": 10075,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/pol.rs",
"chars": 4261,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/por.rs",
"chars": 7664,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/ron.rs",
"chars": 5540,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/rus.rs",
"chars": 7535,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/sin.rs",
"chars": 283,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/slk.rs",
"chars": 5403,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2020, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/slv.rs",
"chars": 5745,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/sna.rs",
"chars": 283,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/spa.rs",
"chars": 10125,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/srp.rs",
"chars": 5091,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/swe.rs",
"chars": 5667,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/tam.rs",
"chars": 1838,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/tel.rs",
"chars": 283,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/tgl.rs",
"chars": 2179,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2022, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/tha.rs",
"chars": 1576,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/tuk.rs",
"chars": 283,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/tur.rs",
"chars": 7131,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/ukr.rs",
"chars": 524,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/urd.rs",
"chars": 6494,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/uzb.rs",
"chars": 283,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/vie.rs",
"chars": 9172,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/yid.rs",
"chars": 2576,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/stopwords/zul.rs",
"chars": 604,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/store/fst.rs",
"chars": 50307,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/store/generic.rs",
"chars": 5557,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/store/identifiers.rs",
"chars": 1123,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/store/item.rs",
"chars": 4815,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/store/keyer.rs",
"chars": 7835,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/store/kv.rs",
"chars": 43544,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/store/macros.rs",
"chars": 308,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/store/mod.rs",
"chars": 313,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/store/operation.rs",
"chars": 2338,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/tasker/mod.rs",
"chars": 218,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/tasker/runtime.rs",
"chars": 1292,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "src/tasker/shutdown.rs",
"chars": 2394,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Valerian Saliou <valerian@valeriansa"
},
{
"path": "tests/integration/.gitignore",
"chars": 37,
"preview": "instance/data/\n\nrunner/node_modules/\n"
},
{
"path": "tests/integration/instance/config.cfg",
"chars": 270,
"preview": "# Sonic\n# Configuration file (integration tests)\n\n[server]\n\nlog_level = \"warn\"\n\n[channel]\n\ninet = \"127.0.0.1:1491\"\nauth_"
},
{
"path": "tests/integration/runner/package.json",
"chars": 514,
"preview": "{\n \"name\": \"sonic-tests-integration\",\n \"description\": \"Sonic integration tests\",\n \"version\": \"1.0.0\",\n \"main\": \"runn"
},
{
"path": "tests/integration/runner/runner.js",
"chars": 2363,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Nikita Vilunov <nikitaoryol@gmail.co"
},
{
"path": "tests/integration/scenarios/insert.js",
"chars": 1362,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Nikita Vilunov <nikitaoryol@gmail.co"
},
{
"path": "tests/integration/scenarios/ping.js",
"chars": 299,
"preview": "// Sonic\n//\n// Fast, lightweight and schema-less search backend\n// Copyright: 2019, Nikita Vilunov <nikitaoryol@gmail.co"
},
{
"path": "tests/integration/scripts/run.sh",
"chars": 1010,
"preview": "#!/bin/bash\n\n##\n# Sonic\n# Fast, lightweight and schema-less search backend\n#\n# Copyright: 2019, Nikita Vilunov <nikit"
}
]
About this extraction
This page contains the full source code of the valeriansaliou/sonic GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 153 files (708.9 KB), approximately 225.9k tokens, and a symbol index with 500 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.