Full Code of asg017/sqlite-vec for AI

main e7ae41b76192 cached
219 files
1.0 MB
339.6k tokens
541 symbols
1 requests
Download .txt
Showing preview only (1,120K chars total). Download the full file or copy to clipboard to get everything.
Repository: asg017/sqlite-vec
Branch: main
Commit: e7ae41b76192
Files: 219
Total size: 1.0 MB

Directory structure:
gitextract_v4s81yo1/

├── .github/
│   └── workflows/
│       ├── fuzz.yaml
│       ├── release.yaml
│       ├── site.yaml
│       └── test.yaml
├── .gitignore
├── ARCHITECTURE.md
├── LICENSE-APACHE
├── LICENSE-MIT
├── Makefile
├── README.md
├── SECURITY.md
├── TODO
├── VERSION
├── benchmarks/
│   ├── README.md
│   ├── exhaustive-memory/
│   │   ├── .gitignore
│   │   ├── Makefile
│   │   ├── README.md
│   │   ├── bench.py
│   │   ├── gist.suite
│   │   ├── requirements.txt
│   │   └── sift.suite
│   ├── micro/
│   │   ├── .gitignore
│   │   ├── Cargo.toml
│   │   ├── benches/
│   │   │   └── my_benchmark.rs
│   │   ├── build.rs
│   │   └── src/
│   │       └── lib.rs
│   ├── profiling/
│   │   ├── build-from-npy.sql
│   │   └── query-k.sql
│   └── self-params/
│       ├── build.py
│       ├── knn.py
│       └── test.py
├── bindings/
│   ├── go/
│   │   └── ncruces/
│   │       └── go-sqlite3.patch
│   ├── python/
│   │   └── extra_init.py
│   └── rust/
│       ├── .gitignore
│       ├── Cargo.toml.tmpl
│       ├── Makefile
│       ├── build.rs
│       └── src/
│           └── lib.rs
├── examples/
│   ├── nbc-headlines/
│   │   ├── .gitignore
│   │   ├── 1_scrape.ipynb
│   │   ├── 2_build.ipynb
│   │   ├── 3_search.ipynb
│   │   ├── Makefile
│   │   └── README.md
│   ├── python-recipes/
│   │   └── openai-sample.py
│   ├── simple-bun/
│   │   ├── .gitignore
│   │   ├── demo.ts
│   │   └── package.json
│   ├── simple-c/
│   │   ├── .gitignore
│   │   ├── Makefile
│   │   └── demo.c
│   ├── simple-deno/
│   │   └── demo.ts
│   ├── simple-go-cgo/
│   │   ├── .gitignore
│   │   ├── Makefile
│   │   ├── demo.go
│   │   ├── go.mod
│   │   └── go.sum
│   ├── simple-go-ncruces/
│   │   ├── .gitignore
│   │   ├── Makefile
│   │   ├── demo.go
│   │   ├── go.mod
│   │   └── go.sum
│   ├── simple-node/
│   │   ├── .gitignore
│   │   ├── demo.mjs
│   │   └── package.json
│   ├── simple-node2/
│   │   ├── .gitignore
│   │   ├── demo.mjs
│   │   ├── package.json
│   │   └── tmp.mjs
│   ├── simple-python/
│   │   ├── .gitignore
│   │   ├── demo.py
│   │   └── requirements.txt
│   ├── simple-ruby/
│   │   ├── .gitignore
│   │   ├── Gemfile
│   │   └── demo.rb
│   ├── simple-rust/
│   │   ├── .gitignore
│   │   ├── Cargo.toml
│   │   └── demo.rs
│   ├── simple-sqlite/
│   │   └── demo.sql
│   ├── simple-wasm/
│   │   └── index.html
│   ├── sqlite3-cli/
│   │   ├── README.md
│   │   └── core_init.c
│   └── wasm/
│       ├── README.md
│       └── wasm.c
├── reference.yaml
├── scripts/
│   ├── progress.ts
│   ├── publish-release.sh
│   └── vendor.sh
├── site/
│   ├── .gitignore
│   ├── .vitepress/
│   │   ├── config.mts
│   │   └── theme/
│   │       ├── HeroImg.vue
│   │       ├── Sponsors.vue
│   │       ├── index.ts
│   │       └── style.css
│   ├── api-reference.md
│   ├── build-ref.mjs
│   ├── compiling.md
│   ├── features/
│   │   ├── knn.md
│   │   └── vec0.md
│   ├── getting-started/
│   │   ├── installation.md
│   │   └── introduction.md
│   ├── guides/
│   │   ├── arithmetic.md
│   │   ├── binary-quant.md
│   │   ├── classifiers.md
│   │   ├── hybrid-search.md
│   │   ├── matryoshka.md
│   │   ├── performance.md
│   │   ├── rag.md
│   │   ├── scalar-quant.md
│   │   └── semantic-search.md
│   ├── index.md
│   ├── package.json
│   ├── project.data.ts
│   ├── public/
│   │   └── fonts/
│   │       └── ZillaSlab-SemiBold.otf
│   ├── sqlite.tmlanguage.json
│   ├── using/
│   │   ├── android-ios.md
│   │   ├── c.md
│   │   ├── datasette.md
│   │   ├── go.md
│   │   ├── js.md
│   │   ├── python.md
│   │   ├── rqlite.md
│   │   ├── ruby.md
│   │   ├── rust.md
│   │   ├── sqlite-utils.md
│   │   └── wasm.md
│   └── versioning.md
├── sqlite-dist.toml
├── sqlite-vec.c
├── sqlite-vec.h.tmpl
├── test.sql
├── tests/
│   ├── .gitignore
│   ├── .python-version
│   ├── Cargo.toml
│   ├── __snapshots__/
│   │   ├── test-auxiliary.ambr
│   │   ├── test-general.ambr
│   │   ├── test-insert-delete.ambr
│   │   ├── test-knn-distance-constraints.ambr
│   │   ├── test-metadata.ambr
│   │   └── test-partition-keys.ambr
│   ├── afbd/
│   │   ├── .gitignore
│   │   ├── .python-version
│   │   ├── Makefile
│   │   ├── README.md
│   │   └── test-afbd.py
│   ├── build.rs
│   ├── conftest.py
│   ├── correctness/
│   │   ├── build.py
│   │   └── test-correctness.py
│   ├── fuzz/
│   │   ├── .gitignore
│   │   ├── Makefile
│   │   ├── README.md
│   │   ├── TODO.md
│   │   ├── corpus/
│   │   │   ├── exec/
│   │   │   │   ├── select1
│   │   │   │   └── vec_version
│   │   │   ├── json/
│   │   │   │   ├── empty
│   │   │   │   ├── valid_2d
│   │   │   │   └── valid_4d
│   │   │   ├── shadow-corrupt/
│   │   │   │   ├── target0
│   │   │   │   ├── target1
│   │   │   │   ├── target2
│   │   │   │   ├── target3
│   │   │   │   ├── target4
│   │   │   │   └── target5
│   │   │   ├── vec-mismatch/
│   │   │   │   ├── dim_mismatch_4d_2d
│   │   │   │   ├── json2d_invalid_blob
│   │   │   │   ├── json4d_invalid_blob
│   │   │   │   ├── json_1d_blob_5byte
│   │   │   │   ├── json_2d_blob_3byte
│   │   │   │   ├── json_valid_blob_invalid
│   │   │   │   ├── json_valid_empty
│   │   │   │   ├── single_f32_bad_text
│   │   │   │   ├── single_normalize_json
│   │   │   │   ├── type_mismatch_f32_bit
│   │   │   │   └── type_mismatch_f32_int8
│   │   │   ├── vec0-create/
│   │   │   │   ├── normal1
│   │   │   │   └── normal2
│   │   │   └── vec0-operations/
│   │   │       ├── ins_del_ins
│   │   │       └── insert5
│   │   ├── exec.c
│   │   ├── exec.dict
│   │   ├── json.c
│   │   ├── metadata-columns.c
│   │   ├── numpy.c
│   │   ├── numpy.dict
│   │   ├── scalar-functions.c
│   │   ├── scalar-functions.dict
│   │   ├── shadow-corrupt.c
│   │   ├── targets/
│   │   │   └── .gitignore
│   │   ├── vec-each.c
│   │   ├── vec-mismatch.c
│   │   ├── vec0-create-full.c
│   │   ├── vec0-create.c
│   │   ├── vec0-create.dict
│   │   ├── vec0-delete-completeness.c
│   │   └── vec0-operations.c
│   ├── fuzz.py
│   ├── helpers.py
│   ├── leak-fixtures/
│   │   ├── each.sql
│   │   ├── knn.sql
│   │   └── vec0-create.sql
│   ├── minimum/
│   │   ├── .gitignore
│   │   ├── Makefile
│   │   └── demo.c
│   ├── pyproject.toml
│   ├── skip.test-correctness.py
│   ├── sqlite-vec-internal.h
│   ├── test-auxiliary.py
│   ├── test-general.py
│   ├── test-insert-delete.py
│   ├── test-knn-distance-constraints.py
│   ├── test-loadable.py
│   ├── test-metadata.py
│   ├── test-partition-keys.py
│   ├── test-unit.c
│   ├── test-wasm.mjs
│   ├── unittest.rs
│   └── utils.py
└── tmp-static.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .github/workflows/fuzz.yaml
================================================
name: "Fuzz"
on:
  push:
    branches: [main]
  schedule:
    # Nightly at 2am UTC for longer fuzzing sessions
    - cron: "0 2 * * *"
  workflow_dispatch:
    inputs:
      duration:
        description: "Fuzz duration per target (seconds)"
        default: "60"

permissions:
  contents: read

jobs:
  fuzz-linux:
    runs-on: ubuntu-22.04
    steps:
      - uses: actions/checkout@v4
      - name: Install LLVM 18
        run: |
          wget -qO- https://apt.llvm.org/llvm.sh | sudo bash -s -- 18
          echo "FUZZ_CC=clang-18" >> $GITHUB_ENV
      - run: ./scripts/vendor.sh
      - name: Generate sqlite-vec.h
        run: make sqlite-vec.h
      - name: Build fuzz targets
        run: make -C tests/fuzz all FUZZ_CC=$FUZZ_CC FUZZ_LDFLAGS=
      - name: Run fuzz targets
        run: |
          DURATION=${{ github.event.inputs.duration || '60' }}
          EXIT_CODE=0
          for target in tests/fuzz/targets/*; do
            [ -f "$target" ] && [ -x "$target" ] || continue
            name=$(basename "$target")
            echo "::group::Fuzzing $name ($DURATION seconds)"
            corpus="tests/fuzz/corpus/$name"
            mkdir -p "$corpus"
            dict="tests/fuzz/${name//_/-}.dict"
            dict_flag=""
            [ -f "$dict" ] && dict_flag="-dict=$dict"
            if ! ASAN_OPTIONS=detect_leaks=1 "$target" $dict_flag \
              -max_total_time="$DURATION" "$corpus" 2>&1; then
              echo "::error::Fuzz target $name found a crash!"
              EXIT_CODE=1
            fi
            echo "::endgroup::"
          done
          exit $EXIT_CODE
      - name: Upload crash artifacts
        if: failure()
        uses: actions/upload-artifact@v4
        with:
          name: fuzz-crashes-linux
          path: |
            crash-*
            leak-*
            timeout-*

  fuzz-macos:
    runs-on: macos-14
    # Best-effort: Homebrew LLVM 18 runtime dylibs pull in
    # __ZnwmSt19__type_descriptor_t (typed allocation ABI) which
    # macOS 14's system libc++ doesn't provide, causing dyld to abort.
    # Xcode clang doesn't ship libclang_rt.fuzzer_osx.a (no libFuzzer).
    # TODO: fix macOS fuzzing (pin older compiler-rt, or static runtime).
    continue-on-error: true
    steps:
      - uses: actions/checkout@v4
      - name: Install LLVM 18
        run: brew install llvm@18
      - run: ./scripts/vendor.sh
      - name: Generate sqlite-vec.h
        run: make sqlite-vec.h
      - name: Build fuzz targets
        run: |
          LLVM=/opt/homebrew/opt/llvm@18
          make -C tests/fuzz all \
            FUZZ_CC=$LLVM/bin/clang \
            FUZZ_LDFLAGS="-Wl,-ld_classic"
      - name: Run fuzz targets
        run: |
          DURATION=${{ github.event.inputs.duration || '60' }}
          EXIT_CODE=0
          for target in tests/fuzz/targets/*; do
            [ -f "$target" ] && [ -x "$target" ] || continue
            name=$(basename "$target")
            echo "::group::Fuzzing $name ($DURATION seconds)"
            corpus="tests/fuzz/corpus/$name"
            mkdir -p "$corpus"
            dict="tests/fuzz/${name//_/-}.dict"
            dict_flag=""
            [ -f "$dict" ] && dict_flag="-dict=$dict"
            if ! "$target" $dict_flag \
              -max_total_time="$DURATION" "$corpus" 2>&1; then
              echo "::error::Fuzz target $name found a crash!"
              EXIT_CODE=1
            fi
            echo "::endgroup::"
          done
          exit $EXIT_CODE
      - name: Upload crash artifacts
        if: failure()
        uses: actions/upload-artifact@v4
        with:
          name: fuzz-crashes-macos
          path: |
            crash-*
            leak-*
            timeout-*

  fuzz-windows:
    # Best-effort: libFuzzer works on Windows via LLVM but ASAN/UBSAN
    # support is less reliable. Leak detection is not available.
    runs-on: windows-2022
    continue-on-error: true
    steps:
      - uses: actions/checkout@v4
      - name: Install LLVM
        run: choco install llvm -y
      - run: bash ./scripts/vendor.sh
        shell: bash
      - name: Generate sqlite-vec.h
        shell: bash
        run: make sqlite-vec.h
      - name: Build fuzz targets
        shell: bash
        run: |
          export PATH="/c/Program Files/LLVM/bin:$PATH"
          cd tests/fuzz
          mkdir -p targets
          for src in *.c; do
            name="${src%.c}"
            target_name="${name//-/_}"
            echo "Building $target_name from $src"
            clang -fsanitize=address,fuzzer \
              -I ../../ -I ../../vendor -DSQLITE_CORE -g \
              ../../vendor/sqlite3.c ../../sqlite-vec.c \
              "$src" -o "targets/${target_name}.exe" || {
              echo "Warning: failed to build $target_name (best-effort)"
            }
          done
      - name: Run fuzz targets
        shell: bash
        run: |
          export PATH="/c/Program Files/LLVM/bin:$PATH"
          DURATION=${{ github.event.inputs.duration || '60' }}
          for target in tests/fuzz/targets/*.exe; do
            [ -f "$target" ] || continue
            name=$(basename "$target" .exe)
            echo "=== Fuzzing $name ($DURATION seconds) ==="
            corpus="tests/fuzz/corpus/$name"
            mkdir -p "$corpus"
            "$target" -max_total_time="$DURATION" "$corpus" 2>&1 || {
              echo "Warning: $name found an issue or failed"
            }
          done
      - name: Upload crash artifacts
        if: failure()
        uses: actions/upload-artifact@v4
        with:
          name: fuzz-crashes-windows
          path: |
            tests/fuzz/crash-*
            tests/fuzz/leak-*


================================================
FILE: .github/workflows/release.yaml
================================================
name: "Release"
on:
  release:
    types: [published]
permissions:
  contents: read
jobs:
  build-linux-x86_64-extension:
    runs-on: ubuntu-22.04
    steps:
      - uses: actions/checkout@v4
      - run: ./scripts/vendor.sh
      - run: make loadable static
      - uses: actions/upload-artifact@v4
        with:
          name: sqlite-vec-linux-x86_64-extension
          path: dist/*
  build-macos-x86_64-extension:
    runs-on: macos-15-intel
    steps:
      - uses: actions/checkout@v4
      - run: ./scripts/vendor.sh
      - run: make loadable static
      - uses: actions/upload-artifact@v4
        with:
          name: sqlite-vec-macos-x86_64-extension
          path: dist/*
  build-macos-aarch64-extension:
    runs-on: macos-14
    steps:
      - uses: actions/checkout@v4
      - run: ./scripts/vendor.sh
      - run: make loadable static
      - uses: actions/upload-artifact@v4
        with:
          name: sqlite-vec-macos-aarch64-extension
          path: dist/*
  build-windows-x86_64-extension:
    runs-on: windows-2022
    permissions:
      contents: write
    steps:
      - uses: actions/checkout@v4
      - uses: ilammy/msvc-dev-cmd@v1
      - uses: actions/setup-python@v5
        with:
          python-version: "3.12"
      - run: ./scripts/vendor.sh
        shell: bash
      - run: make sqlite-vec.h
      - run: mkdir dist
      - run: cl.exe /fPIC -shared /W4 /Ivendor/ /O2 /LD sqlite-vec.c -o dist/vec0.dll
      - uses: actions/upload-artifact@v4
        with:
          name: sqlite-vec-windows-x86_64-extension
          path: dist/*
  build-linux-aarch64-extension:
    runs-on: ubuntu-22.04-arm
    steps:
      - uses: actions/checkout@v4
      - run: ./scripts/vendor.sh
      - run: make loadable static
      - uses: actions/upload-artifact@v4
        with:
          name: sqlite-vec-linux-aarch64-extension
          path: dist/*
  build-cosmopolitan:
    runs-on: macos-latest
    permissions:
      contents: write
    steps:
      - uses: actions/checkout@v4
      - run: |
          mkdir $HOME/cosmo
          curl -L -o cosmocc-$COSMO_VERSION.zip https://github.com/jart/cosmopolitan/releases/download/$COSMO_VERSION/cosmocc-$COSMO_VERSION.zip
          unzip cosmocc-$COSMO_VERSION.zip -d $HOME/cosmo
        env:
          COSMO_VERSION: "3.5.4"
      - run: ./scripts/vendor.sh
      - run: make cli CC=$HOME/cosmo/bin/cosmocc AR=$HOME/cosmo/bin/cosmoar OMIT_SIMD=1
      - run: tar -czvf sqlite-vec-$(cat VERSION)-cli-cosmopolitan.tar.gz dist/sqlite3
      - run: gh release upload ${{ github.ref_name }} sqlite-vec-$(cat VERSION)-cli-cosmopolitan.tar.gz
        env:
          GH_TOKEN: ${{ github.token }}
      - uses: actions/upload-artifact@v4
        with:
          name: sqlite-vec-cosmopolitan
          path: dist/*
  build-wasm32-emscripten:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: mymindstorm/setup-emsdk@v12
        with:
          version: "latest"
      - run: ./scripts/vendor.sh
      - run: make sqlite-vec.h
      - run: make wasm
      - uses: actions/upload-artifact@v4
        with:
          name: sqlite-vec-wasm32-emscripten
          path: dist/.wasm/*
          include-hidden-files: true
  build-android-extensions:
    runs-on: ubuntu-latest
    strategy:
      matrix:
        platforms:
          [
            { name: android-aarch64, cc: aarch64-linux-android21-clang },
            { name: android-i686, cc: i686-linux-android21-clang },
            { name: android-x86_64, cc: x86_64-linux-android21-clang },
            { name: android-armv7a, cc: armv7a-linux-androideabi21-clang },
          ]
    steps:
      - uses: actions/checkout@v4
      - run: ./scripts/vendor.sh
      - uses: actions/setup-java@v4
        with:
          java-version: "17"
          distribution: "temurin"
      - uses: android-actions/setup-android@v3
      - run: |
          sdkmanager --install "ndk;27.0.12077973"
          echo "ANDROID_NDK_HOME=$ANDROID_SDK_ROOT/ndk/27.0.12077973" >> $GITHUB_ENV
      - run: |
          ls $ANDROID_NDK_HOME/toolchains/llvm/prebuilt/
          ls $ANDROID_NDK_HOME/toolchains/llvm/prebuilt/linux-x86_64/bin
      - run: |
          export PATH=$ANDROID_NDK_HOME/toolchains/llvm/prebuilt/linux-x86_64/bin:$PATH
          make CC=${{ matrix.platforms.cc }} \
             AR=llvm-ar \
             RANLIB=llvm-ranlib \
             STRIP=llvm-strip loadable
      - uses: actions/upload-artifact@v4
        with:
          name: sqlite-vec-${{ matrix.platforms.name }}-extension
          path: dist/*.so
  build-ios-extensions:
    runs-on: ${{ matrix.platforms.runner }}
    strategy:
      fail-fast: false
      matrix:
        platforms: [
            {
              name: ios-aarch64,
              target: arm64-apple-ios,
              sdk: iphoneos,
              runner: macos-14,
            },
            #{
            #  name: ios-x86_64,
            #  target: x86_64-apple-ios,
            #  sdk: iphoneos,
            #  runner: macos-12,
            #},
            {
              name: iossimulator-aarch64,
              target: arm64-apple-ios-simulator,
              sdk: iphonesimulator,
              runner: macos-14,
            },
            {
              name: iossimulator-x86_64,
              target: x86_64-apple-ios-simulator,
              sdk: iphonesimulator,
              runner: macos-14,
            },
          ]
    steps:
      - uses: actions/checkout@v4
      - uses: maxim-lobanov/setup-xcode@v1
        with:
          xcode-version: latest-stable
      - run: ./scripts/vendor.sh

      - run: make CFLAGS="-target ${{ matrix.platforms.target }} -isysroot $(xcrun -sdk ${{ matrix.platforms.sdk }} --show-sdk-path) -fembed-bitcode -DNDEBUG=1" loadable static
      - uses: actions/upload-artifact@v4
        with:
          name: sqlite-vec-${{ matrix.platforms.name }}-extension
          path: dist/*
  dist:
    runs-on: ubuntu-latest
    needs:
      [
        build-linux-x86_64-extension,
        build-linux-aarch64-extension,
        build-macos-x86_64-extension,
        build-macos-aarch64-extension,
        build-windows-x86_64-extension,
        build-wasm32-emscripten,
        build-android-extensions,
        build-ios-extensions,
      ]
    environment:
      name: release
    permissions:
      contents: write
      id-token: write
    steps:
      - uses: actions/checkout@v4
      - uses: actions/download-artifact@v4
        with:
          name: sqlite-vec-linux-x86_64-extension
          path: dist/linux-x86_64
      - uses: actions/download-artifact@v4
        with:
          name: sqlite-vec-linux-aarch64-extension
          path: dist/linux-aarch64
      - uses: actions/download-artifact@v4
        with:
          name: sqlite-vec-macos-x86_64-extension
          path: dist/macos-x86_64
      - uses: actions/download-artifact@v4
        with:
          name: sqlite-vec-macos-aarch64-extension
          path: dist/macos-aarch64
      - uses: actions/download-artifact@v4
        with:
          name: sqlite-vec-windows-x86_64-extension
          path: dist/windows-x86_64
      - uses: actions/download-artifact@v4
        with:
          name: sqlite-vec-wasm32-emscripten
          path: dist/wasm32-emscripten
      - uses: actions/download-artifact@v4
        with:
          name: sqlite-vec-android-aarch64-extension
          path: dist/android-aarch64
      - uses: actions/download-artifact@v4
        with:
          name: sqlite-vec-android-i686-extension
          path: dist/android-i686
      - uses: actions/download-artifact@v4
        with:
          name: sqlite-vec-android-x86_64-extension
          path: dist/android-x86_64
      - uses: actions/download-artifact@v4
        with:
          name: sqlite-vec-android-armv7a-extension
          path: dist/android-armv7a
      - uses: actions/download-artifact@v4
        with:
          name: sqlite-vec-ios-aarch64-extension
          path: dist/ios-aarch64
      - uses: actions/download-artifact@v4
        with:
          name: sqlite-vec-iossimulator-aarch64-extension
          path: dist/iossimulator-aarch64
      - uses: actions/download-artifact@v4
        with:
          name: sqlite-vec-iossimulator-x86_64-extension
          path: dist/iossimulator-x86_64
      - run: make sqlite-vec.h
      - uses: asg017/setup-sqlite-dist@fadb0183a6ec70c3f1942de7d232b087ff2bacd1
      - run: sqlite-dist build --set-version $(cat VERSION)
      - run: |
          gh release upload ${{ github.ref_name }} \
            .sqlite-dist/amalgamation/* \
            .sqlite-dist/github_releases/* \
            .sqlite-dist/spm/* \
            .sqlite-dist/sqlpkg/* \
            .sqlite-dist/checksums.txt \
            .sqlite-dist/sqlite-dist-manifest.json \
            .sqlite-dist/install.sh
        env:
          GH_TOKEN: ${{ github.token }}
      - name: Install node
        uses: actions/setup-node@v4
        with:
          node-version: "24"
          registry-url: "https://registry.npmjs.org"
      - run: |
          VERSION=$(cat VERSION)
          if echo "$VERSION" | grep -q "alpha"; then
            TAG=alpha
          elif echo "$VERSION" | grep -q "beta"; then
            TAG=beta
          else
            TAG=latest
          fi
          npm publish --provenance --access public --tag $TAG .sqlite-dist/npm/sqlite-vec-darwin-arm64.tar.gz
          npm publish --provenance --access public --tag $TAG .sqlite-dist/npm/sqlite-vec-darwin-x64.tar.gz
          npm publish --provenance --access public --tag $TAG .sqlite-dist/npm/sqlite-vec-linux-x64.tar.gz
          npm publish --provenance --access public --tag $TAG .sqlite-dist/npm/sqlite-vec-linux-arm64.tar.gz
          npm publish --provenance --access public --tag $TAG .sqlite-dist/npm/sqlite-vec-windows-x64.tar.gz
          npm publish --provenance --access public --tag $TAG .sqlite-dist/npm/sqlite-vec-wasm-demo.tar.gz
          npm publish --provenance --access public --tag $TAG .sqlite-dist/npm/sqlite-vec.tar.gz
        env:
          NODE_AUTH_TOKEN: ""
      - uses: ruby/setup-ruby@v1
        with:
          ruby-version: 3.2
      - run: |
          for file in .sqlite-dist/gem/*; do
            gem push  "$file"
          done
        env:
          GEM_HOST_API_KEY: ${{ secrets.GEM_HOST_API_KEY }}
      - uses: actions/setup-python@v5
        with:
          python-version: "3.12"
      - run: pip install twine
      - run: |
          twine upload .sqlite-dist/pip/*
          twine upload .sqlite-dist/datasette/*
          twine upload .sqlite-dist/sqlite_utils/*
        env:
          TWINE_USERNAME: __token__
          TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
  upload-crate:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: actions-rs/toolchain@v1
        with:
          toolchain: stable
      - run: ./scripts/vendor.sh
      - run: make sqlite-vec.h
      - run: make deps
        working-directory: ./bindings/rust
      - run: cargo publish --no-verify
        working-directory: ./bindings/rust
        env:
          CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}

  build-ncruces-go:
    if: false
    runs-on: ubuntu-latest
    permissions:
      contents: write
    steps:
      - uses: actions/checkout@v4
      - run: make sqlite-vec.h
      - uses: actions/checkout@v4
        with:
          repository: ncruces/go-sqlite3
          path: go-sqlite3
      - run: git apply ../bindings/go/ncruces/go-sqlite3.patch
        working-directory: go-sqlite3/
      - run: |
          mkdir -p tools/
          [ -d "tools/wasi-sdk" ] || curl -#L "$WASI_SDK" | tar xzC tools &
          [ -d "tools/binaryen" ] || curl -#L "$BINARYEN" | tar xzC tools &
          wait

          mv "tools/wasi-sdk"* "tools/wasi-sdk"
          mv "tools/binaryen"* "tools/binaryen"

          sqlite3/download.sh
          embed/build.sh
        env:
          WASI_SDK: "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-23/wasi-sdk-23.0-x86_64-linux.tar.gz"
          BINARYEN: "https://github.com/WebAssembly/binaryen/releases/download/version_118/binaryen-version_118-x86_64-linux.tar.gz"
        working-directory: go-sqlite3/
      - uses: actions/checkout@v4
        with:
          repository: asg017/sqlite-vec-go-bindings
          path: sqlite-vec-go-bindings
          token: ${{secrets.NCRUCES_BINDINGS_REPO_PAT}}
      - run: |
          cp go-sqlite3/embed/sqlite3.wasm sqlite-vec-go-bindings/ncruces/sqlite3.wasm
          cp sqlite-vec.c sqlite-vec-go-bindings/cgo/sqlite-vec.c
          cp sqlite-vec.h sqlite-vec-go-bindings/cgo/sqlite-vec.h
      - run: |
          git config user.name "Alex Garcia"
          git config user.email "alexsebastian.garcia@gmail.com"
          git add .
          git commit --allow-empty -m "AUTOMATED ${{ github.ref_name }}" || exit 0
          git tag  "${{ github.ref_name }}"
          git push origin main "${{ github.ref_name }}"
        working-directory: sqlite-vec-go-bindings
        env:
          GITHUB_TOKEN: ${{secrets.NCRUCES_BINDINGS_REPO_PAT}}


================================================
FILE: .github/workflows/site.yaml
================================================
name: Deploy Site
on:
  workflow_dispatch: {}
  push:
    branches:
      - main
    paths:
      - "site/**"
      - ".github/**"
      - "VERSION"
      - "reference.yaml"
jobs:
  deploy:
    runs-on: ubuntu-latest
    permissions:
      pages: write
      id-token: write
    environment:
      name: github-pages
      url: ${{ steps.deployment.outputs.page_url }}
    steps:
      - uses: actions/checkout@v4
      - uses: actions/setup-node@v4
        with:
          cache: npm
          cache-dependency-path: site/package-lock.json
      - run: npm ci
        working-directory: site/
      - run: make site-build
      - uses: actions/configure-pages@v2
      - uses: actions/upload-pages-artifact@v4
        with:
          path: site/.vitepress/dist
      - id: deployment
        uses: actions/deploy-pages@v4


================================================
FILE: .github/workflows/test.yaml
================================================
name: "Test"
on:
  push:
    branches:
      - main
permissions:
  contents: read
jobs:
  build-linux-x86_64-extension:
    runs-on: ubuntu-22.04
    steps:
      - uses: actions/checkout@v4
      - uses: astral-sh/setup-uv@v7
      - run: ./scripts/vendor.sh
      - run: make loadable static
      - run: uv sync --directory tests
      - run: make test-loadable
      - uses: actions/upload-artifact@v4
        with:
          name: sqlite-vec-linux-x86_64-extension
          path: dist/*
  build-macos-x86_64-extension:
    runs-on: macos-15-intel
    steps:
      - uses: actions/checkout@v4
      - uses: astral-sh/setup-uv@v7
        with:
          enable-cache: true
      - run: uv python install 3.12
      - run: ./scripts/vendor.sh
      - run: make loadable static
      - run: uv sync --directory tests
      - run: make test-loadable
      - uses: actions/upload-artifact@v4
        with:
          name: sqlite-vec-macos-x86_64-extension
          path: dist/*
  build-macos-aarch64-extension:
    runs-on: macos-14
    steps:
      - uses: actions/checkout@v4
      - uses: astral-sh/setup-uv@v7
        with:
          enable-cache: true
      - run: ./scripts/vendor.sh
      - run: make loadable static
      - run: uv sync --directory tests
      - run: make test-loadable
      - uses: actions/upload-artifact@v4
        with:
          name: sqlite-vec-macos-aarch64-extension
          path: dist/*
  build-windows-x86_64-extension:
    runs-on: windows-2022
    steps:
      - uses: actions/checkout@v4
      - uses: ilammy/msvc-dev-cmd@v1
      - uses: astral-sh/setup-uv@v7
        with:
          enable-cache: true
      - run: ./scripts/vendor.sh
        shell: bash
      - run: make sqlite-vec.h
      - run: mkdir dist
      - run: cl.exe /fPIC -shared /W4 /Ivendor/ /O2 /LD sqlite-vec.c -o dist/vec0.dll
      - run: uv sync --directory tests
      - run: make test-loadable
        shell: bash
      - uses: actions/upload-artifact@v4
        with:
          name: sqlite-vec-windows-x86_64-extension
          path: dist/*
  build-linux-aarch64-extension:
    runs-on: ubuntu-22.04-arm
    steps:
      - uses: actions/checkout@v4
      - uses: astral-sh/setup-uv@v7
      - run: ./scripts/vendor.sh
      - run: make loadable static
      - run: uv sync --directory tests
      - run: make test-loadable
      - uses: actions/upload-artifact@v4
        with:
          name: sqlite-vec-linux-aarch64-extension
          path: dist/*
  build-android-extensions:
    runs-on: ubuntu-latest
    strategy:
      matrix:
        platforms:
          [
            { name: android-aarch64, cc: aarch64-linux-android21-clang },
            { name: android-i686, cc: i686-linux-android21-clang },
            { name: android-x86_64, cc: x86_64-linux-android21-clang },
            { name: android-armv7a, cc: armv7a-linux-androideabi21-clang },
          ]
    steps:
      - uses: actions/checkout@v4
      - run: ./scripts/vendor.sh
      - uses: actions/setup-java@v4
        with:
          java-version: "17"
          distribution: "temurin"
      - uses: android-actions/setup-android@v3
      - run: |
          sdkmanager --install "ndk;27.0.12077973"
          echo "ANDROID_NDK_HOME=$ANDROID_SDK_ROOT/ndk/27.0.12077973" >> $GITHUB_ENV
      - run: |
          ls $ANDROID_NDK_HOME/toolchains/llvm/prebuilt/
          ls $ANDROID_NDK_HOME/toolchains/llvm/prebuilt/linux-x86_64/bin
      - run: |
          export PATH=$ANDROID_NDK_HOME/toolchains/llvm/prebuilt/linux-x86_64/bin:$PATH
          make CC=${{ matrix.platforms.cc }} \
             AR=llvm-ar \
             RANLIB=llvm-ranlib \
             STRIP=llvm-strip loadable
      - uses: actions/upload-artifact@v4
        with:
          name: sqlite-vec-android-${{ matrix.platforms.name }}-extension
          path: dist/*.so
  build-ios-extensions:
    runs-on: ${{ matrix.platforms.runner }}
    strategy:
      fail-fast: false
      matrix:
        platforms: [
            {
              name: ios-arm64,
              target: arm64-apple-ios,
              sdk: iphoneos,
              runner: macos-14,
            },
            #{
            #  name: ios-x86_64,
            #  target: x86_64-apple-ios,
            #  sdk: iphoneos,
            #  runner: macos-12,
            #},
            {
              name: ios-simulator-arm64,
              target: arm64-apple-ios-simulator,
              sdk: iphonesimulator,
              runner: macos-14,
            },
            {
              name: ios-simulator-x86_64,
              target: x86_64-apple-ios-simulator,
              sdk: iphonesimulator,
              runner: macos-14,
            },
          ]
    steps:
      - uses: actions/checkout@v4
      - uses: maxim-lobanov/setup-xcode@v1
        with:
          xcode-version: latest-stable
      - run: ./scripts/vendor.sh

      - run: make CFLAGS="-target ${{ matrix.platforms.target }} -isysroot $(xcrun -sdk ${{ matrix.platforms.sdk }} --show-sdk-path) -fembed-bitcode -DNDEBUG=1" loadable static
      - uses: actions/upload-artifact@v4
        with:
          name: sqlite-vec-${{ matrix.platforms.name }}-extension
          path: dist/*
  build-wasm32-emscripten:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: mymindstorm/setup-emsdk@v12
        with:
          version: "latest"
      - run: ./scripts/vendor.sh
      - run: make sqlite-vec.h
      - run: make wasm
      - run: ls; ls dist; ls dist/.wasm
      - uses: actions/upload-artifact@v4
        with:
          name: sqlite-vec-wasm32-emscripten
          path: dist/.wasm/*
          include-hidden-files: true
  build-pyodide:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: mymindstorm/setup-emsdk@v12
        with:
          version: "latest"
      - run: ./scripts/vendor.sh
      - run: make sqlite-vec.h
      - run: |
          emcc -c ./sqlite-vec.c -o ./sqlite-vec.o -I ./vendor -fPIC -g3 \
          -DSTDC_HEADERS=1 -DHAVE_SYS_TYPES_H=1 -DHAVE_SYS_STAT_H=1 -DHAVE_STDLIB_H=1 -DHAVE_STRING_H=1 -DHAVE_MEMORY_H=1 -DHAVE_STRINGS_H=1 -DHAVE_INTTYPES_H=1 -DHAVE_STDINT_H=1 -DHAVE_UNISTD_H=1 -DHAVE_FDATASYNC=1 -DHAVE_USLEEP=1 -DHAVE_LOCALTIME_R=1 -DHAVE_GMTIME_R=1 -DHAVE_DECL_STRERROR_R=1 -DHAVE_STRERROR_R=1 -DHAVE_POSIX_FALLOCATE=1 -DSQLITE_ENABLE_MATH_FUNCTIONS=1 -DSQLITE_ENABLE_FTS4=1 -DSQLITE_ENABLE_FTS5=1 -DSQLITE_ENABLE_RTREE=1 -DSQLITE_ENABLE_GEOPOLY=1 -DSQLITE_OMIT_POPEN=1 -DSQLITE_THREADSAFE=0
          emcc ./sqlite-vec.o -o vec0.so -s SIDE_MODULE=1 -g3 -s WASM_BIGINT=1
      - run: ls
      - uses: actions/upload-artifact@v4
        with:
          name: sqlite-vec-pyodide
          path: vec0.so
  build-ncruces-go:
    if: false
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - run: make sqlite-vec.h
      - uses: actions/checkout@v4
        with:
          repository: ncruces/go-sqlite3
          path: go-sqlite3
      - run: git apply ../bindings/go/ncruces/go-sqlite3.patch
        working-directory: go-sqlite3/
      - run: |
          mkdir -p tools/
          [ -d "tools/wasi-sdk" ] || curl -#L "$WASI_SDK" | tar xzC tools &
          [ -d "tools/binaryen" ] || curl -#L "$BINARYEN" | tar xzC tools &
          wait

          mv "tools/wasi-sdk"* "tools/wasi-sdk"
          mv "tools/binaryen"* "tools/binaryen"

          sqlite3/download.sh
          embed/build.sh
        env:
          WASI_SDK: "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-23/wasi-sdk-23.0-x86_64-linux.tar.gz"
          BINARYEN: "https://github.com/WebAssembly/binaryen/releases/download/version_118/binaryen-version_118-x86_64-linux.tar.gz"
        working-directory: go-sqlite3/
      - uses: actions/upload-artifact@v4
        with:
          name: sqlite-vec-ncruces-go
          path: go-sqlite3/embed/sqlite3.wasm
  build-cosmopolitan:
    runs-on: macos-latest
    steps:
      - uses: actions/checkout@v4
      - run: |
          mkdir $HOME/cosmo
          curl -L -o cosmocc-$COSMO_VERSION.zip https://github.com/jart/cosmopolitan/releases/download/$COSMO_VERSION/cosmocc-$COSMO_VERSION.zip
          unzip cosmocc-$COSMO_VERSION.zip -d $HOME/cosmo
        env:
          COSMO_VERSION: "3.5.4"
      - run: ./scripts/vendor.sh
      - run: make cli CC=$HOME/cosmo/bin/cosmocc AR=$HOME/cosmo/bin/cosmoar OMIT_SIMD=1
      - uses: actions/upload-artifact@v4
        with:
          name: sqlite-vec-cosmopolitan
          path: dist/*
  test-minimum:
    runs-on: macos-latest
    steps:
      - uses: actions/checkout@v4
      - run: make sqlite-vec.h
      - run: make test
        working-directory: tests/minimum


================================================
FILE: .gitignore
================================================
/target
.vscode
sift/
*.tar.gz
*.db
*.npy
*.bin
*.out
venv/

vendor/
dist/

*.pyc
*.db-journal

alexandria/
openai/
examples/supabase-dbpedia
examples/ann-filtering
examples/dbpedia-openai
examples/imdb
examples/sotu

sqlite-vec.h
tmp/

poetry.lock

*.jsonl

memstat.c
memstat.*


================================================
FILE: ARCHITECTURE.md
================================================
# `sqlite-vec` Architecture

Internal documentation for how `sqlite-vec` works under-the-hood. Not meant for
users of the `sqlite-vec` project, consult
[the official `sqlite-vec` documentation](https://alexgarcia.xyz/sqlite-vec) for
how-to-guides. Rather, this is for people interested in how `sqlite-vec` works
and some guidelines to any future contributors.

Very much a WIP.

## `vec0`

### Shadow Tables

#### `xyz_chunks`

- `chunk_id INTEGER`
- `size INTEGER`
- `validity BLOB`
- `rowids BLOB`

#### `xyz_rowids`

- `rowid INTEGER`
- `id`
- `chunk_id INTEGER`
- `chunk_offset INTEGER`

#### `xyz_vector_chunksNN`

- `rowid INTEGER`
- `vector BLOB`

#### `xyz_auxiliary`

- `rowid INTEGER`
- `valueNN [type]`

#### `xyz_metadatachunksNN`

- `rowid INTEGER`
- `data BLOB`

#### `xyz_metadatatextNN`

- `rowid INTEGER`
- `data TEXT`

### idxStr

The `vec0` idxStr is a string composed of single "header" character and 0 or
more "blocks" of 4 characters each.

The "header" charcter denotes the type of query plan, as determined by the
`enum vec0_query_plan` values. The current possible values are:

| Name                       | Value | Description                                                            |
| -------------------------- | ----- | ---------------------------------------------------------------------- |
| `VEC0_QUERY_PLAN_FULLSCAN` | `'1'` | Perform a full-scan on all rows                                        |
| `VEC0_QUERY_PLAN_POINT`    | `'2'` | Perform a single-lookup point query for the provided rowid             |
| `VEC0_QUERY_PLAN_KNN`      | `'3'` | Perform a KNN-style query on the provided query vector and parameters. |

Each 4-character "block" is associated with a corresponding value in `argv[]`.
For example, the 1st block at byte offset `1-4` (inclusive) is the 1st block and
is associated with `argv[1]`. The 2nd block at byte offset `5-8` (inclusive) is
associated with `argv[2]` and so on. Each block describes what kind of value or
filter the given `argv[i]` value is.

#### `VEC0_IDXSTR_KIND_KNN_MATCH` (`'{'`)

`argv[i]` is the query vector of the KNN query.

The remaining 3 characters of the block are `_` fillers.

#### `VEC0_IDXSTR_KIND_KNN_K` (`'}'`)

`argv[i]` is the limit/k value of the KNN query.

The remaining 3 characters of the block are `_` fillers.

#### `VEC0_IDXSTR_KIND_KNN_ROWID_IN` (`'['`)

`argv[i]` is the optional `rowid in (...)` value, and must be handled with
[`sqlite3_vtab_in_first()` / `sqlite3_vtab_in_next()`](https://www.sqlite.org/c3ref/vtab_in_first.html).

The remaining 3 characters of the block are `_` fillers.

#### `VEC0_IDXSTR_KIND_KNN_PARTITON_CONSTRAINT` (`']'`)

`argv[i]` is a "constraint" on a specific partition key.

The second character of the block denotes which partition key to filter on,
using `A` to denote the first partition key column, `B` for the second, etc. It
is encoded with `'A' + partition_idx` and can be decoded with `c - 'A'`.

The third character of the block denotes which operator is used in the
constraint. It will be one of the values of `enum vec0_partition_operator`, as
only a subset of operations are supported on partition keys.

The fourth character of the block is a `_` filler.

#### `VEC0_IDXSTR_KIND_POINT_ID` (`'!'`)

`argv[i]` is the value of the rowid or id to match against for the point query.

The remaining 3 characters of the block are `_` fillers.

#### `VEC0_IDXSTR_KIND_METADATA_CONSTRAINT` (`'&'`)

`argv[i]` is the value of the `WHERE` constraint for a metdata column in a KNN
query.

The second character of the block denotes which metadata column the constraint
belongs to, using `A` to denote the first metadata column column, `B` for the
second, etc. It is encoded with `'A' + metadata_idx` and can be decoded with
`c - 'A'`.

The third character of the block is the constraint operator. It will be one of
`enum vec0_metadata_operator`, as only a subset of operators are supported on
metadata column KNN filters.

The foruth character of the block is a `_` filler.


================================================
FILE: LICENSE-APACHE
================================================
                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

1.  Definitions.

    "License" shall mean the terms and conditions for use, reproduction,
    and distribution as defined by Sections 1 through 9 of this document.

    "Licensor" shall mean the copyright owner or entity authorized by
    the copyright owner that is granting the License.

    "Legal Entity" shall mean the union of the acting entity and all
    other entities that control, are controlled by, or are under common
    control with that entity. For the purposes of this definition,
    "control" means (i) the power, direct or indirect, to cause the
    direction or management of such entity, whether by contract or
    otherwise, or (ii) ownership of fifty percent (50%) or more of the
    outstanding shares, or (iii) beneficial ownership of such entity.

    "You" (or "Your") shall mean an individual or Legal Entity
    exercising permissions granted by this License.

    "Source" form shall mean the preferred form for making modifications,
    including but not limited to software source code, documentation
    source, and configuration files.

    "Object" form shall mean any form resulting from mechanical
    transformation or translation of a Source form, including but
    not limited to compiled object code, generated documentation,
    and conversions to other media types.

    "Work" shall mean the work of authorship, whether in Source or
    Object form, made available under the License, as indicated by a
    copyright notice that is included in or attached to the work
    (an example is provided in the Appendix below).

    "Derivative Works" shall mean any work, whether in Source or Object
    form, that is based on (or derived from) the Work and for which the
    editorial revisions, annotations, elaborations, or other modifications
    represent, as a whole, an original work of authorship. For the purposes
    of this License, Derivative Works shall not include works that remain
    separable from, or merely link (or bind by name) to the interfaces of,
    the Work and Derivative Works thereof.

    "Contribution" shall mean any work of authorship, including
    the original version of the Work and any modifications or additions
    to that Work or Derivative Works thereof, that is intentionally
    submitted to Licensor for inclusion in the Work by the copyright owner
    or by an individual or Legal Entity authorized to submit on behalf of
    the copyright owner. For the purposes of this definition, "submitted"
    means any form of electronic, verbal, or written communication sent
    to the Licensor or its representatives, including but not limited to
    communication on electronic mailing lists, source code control systems,
    and issue tracking systems that are managed by, or on behalf of, the
    Licensor for the purpose of discussing and improving the Work, but
    excluding communication that is conspicuously marked or otherwise
    designated in writing by the copyright owner as "Not a Contribution."

    "Contributor" shall mean Licensor and any individual or Legal Entity
    on behalf of whom a Contribution has been received by Licensor and
    subsequently incorporated within the Work.

2.  Grant of Copyright License. Subject to the terms and conditions of
    this License, each Contributor hereby grants to You a perpetual,
    worldwide, non-exclusive, no-charge, royalty-free, irrevocable
    copyright license to reproduce, prepare Derivative Works of,
    publicly display, publicly perform, sublicense, and distribute the
    Work and such Derivative Works in Source or Object form.

3.  Grant of Patent License. Subject to the terms and conditions of
    this License, each Contributor hereby grants to You a perpetual,
    worldwide, non-exclusive, no-charge, royalty-free, irrevocable
    (except as stated in this section) patent license to make, have made,
    use, offer to sell, sell, import, and otherwise transfer the Work,
    where such license applies only to those patent claims licensable
    by such Contributor that are necessarily infringed by their
    Contribution(s) alone or by combination of their Contribution(s)
    with the Work to which such Contribution(s) was submitted. If You
    institute patent litigation against any entity (including a
    cross-claim or counterclaim in a lawsuit) alleging that the Work
    or a Contribution incorporated within the Work constitutes direct
    or contributory patent infringement, then any patent licenses
    granted to You under this License for that Work shall terminate
    as of the date such litigation is filed.

4.  Redistribution. You may reproduce and distribute copies of the
    Work or Derivative Works thereof in any medium, with or without
    modifications, and in Source or Object form, provided that You
    meet the following conditions:

    (a) You must give any other recipients of the Work or
    Derivative Works a copy of this License; and

    (b) You must cause any modified files to carry prominent notices
    stating that You changed the files; and

    (c) You must retain, in the Source form of any Derivative Works
    that You distribute, all copyright, patent, trademark, and
    attribution notices from the Source form of the Work,
    excluding those notices that do not pertain to any part of
    the Derivative Works; and

    (d) If the Work includes a "NOTICE" text file as part of its
    distribution, then any Derivative Works that You distribute must
    include a readable copy of the attribution notices contained
    within such NOTICE file, excluding those notices that do not
    pertain to any part of the Derivative Works, in at least one
    of the following places: within a NOTICE text file distributed
    as part of the Derivative Works; within the Source form or
    documentation, if provided along with the Derivative Works; or,
    within a display generated by the Derivative Works, if and
    wherever such third-party notices normally appear. The contents
    of the NOTICE file are for informational purposes only and
    do not modify the License. You may add Your own attribution
    notices within Derivative Works that You distribute, alongside
    or as an addendum to the NOTICE text from the Work, provided
    that such additional attribution notices cannot be construed
    as modifying the License.

    You may add Your own copyright statement to Your modifications and
    may provide additional or different license terms and conditions
    for use, reproduction, or distribution of Your modifications, or
    for any such Derivative Works as a whole, provided Your use,
    reproduction, and distribution of the Work otherwise complies with
    the conditions stated in this License.

5.  Submission of Contributions. Unless You explicitly state otherwise,
    any Contribution intentionally submitted for inclusion in the Work
    by You to the Licensor shall be under the terms and conditions of
    this License, without any additional terms or conditions.
    Notwithstanding the above, nothing herein shall supersede or modify
    the terms of any separate license agreement you may have executed
    with Licensor regarding such Contributions.

6.  Trademarks. This License does not grant permission to use the trade
    names, trademarks, service marks, or product names of the Licensor,
    except as required for reasonable and customary use in describing the
    origin of the Work and reproducing the content of the NOTICE file.

7.  Disclaimer of Warranty. Unless required by applicable law or
    agreed to in writing, Licensor provides the Work (and each
    Contributor provides its Contributions) on an "AS IS" BASIS,
    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
    implied, including, without limitation, any warranties or conditions
    of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
    PARTICULAR PURPOSE. You are solely responsible for determining the
    appropriateness of using or redistributing the Work and assume any
    risks associated with Your exercise of permissions under this License.

8.  Limitation of Liability. In no event and under no legal theory,
    whether in tort (including negligence), contract, or otherwise,
    unless required by applicable law (such as deliberate and grossly
    negligent acts) or agreed to in writing, shall any Contributor be
    liable to You for damages, including any direct, indirect, special,
    incidental, or consequential damages of any character arising as a
    result of this License or out of the use or inability to use the
    Work (including but not limited to damages for loss of goodwill,
    work stoppage, computer failure or malfunction, or any and all
    other commercial damages or losses), even if such Contributor
    has been advised of the possibility of such damages.

9.  Accepting Warranty or Additional Liability. While redistributing
    the Work or Derivative Works thereof, You may choose to offer,
    and charge a fee for, acceptance of support, warranty, indemnity,
    or other liability obligations and/or rights consistent with this
    License. However, in accepting such obligations, You may act only
    on Your own behalf and on Your sole responsibility, not on behalf
    of any other Contributor, and only if You agree to indemnify,
    defend, and hold each Contributor harmless for any liability
    incurred by, or claims asserted against, such Contributor by reason
    of your accepting any such warranty or additional liability.

END OF TERMS AND CONDITIONS

APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

Copyright 2024 Alex Garcia

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.


================================================
FILE: LICENSE-MIT
================================================
MIT License

Copyright (c) 2024 Alex Garcia

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


================================================
FILE: Makefile
================================================

COMMIT=$(shell git rev-parse HEAD)
VERSION=$(shell cat VERSION)
DATE=$(shell date +'%FT%TZ%z')

INSTALL_LIB_DIR = /usr/local/lib
INSTALL_INCLUDE_DIR = /usr/local/include
INSTALL_BIN_DIR = /usr/local/bin

ifndef CC
CC=gcc
endif
ifndef AR
AR=ar
endif

ifeq ($(shell uname -s),Darwin)
CONFIG_DARWIN=y
else ifeq ($(OS),Windows_NT)
CONFIG_WINDOWS=y
else
CONFIG_LINUX=y
endif

ifdef CONFIG_DARWIN
LOADABLE_EXTENSION=dylib
endif

ifdef CONFIG_LINUX
LOADABLE_EXTENSION=so
CFLAGS += -lm
endif

ifdef CONFIG_WINDOWS
LOADABLE_EXTENSION=dll
endif

ifndef OMIT_SIMD
	ifeq ($(shell uname -sm),Darwin x86_64)
	CFLAGS += -mavx -DSQLITE_VEC_ENABLE_AVX
	endif
	ifeq ($(shell uname -sm),Darwin arm64)
	CFLAGS += -mcpu=apple-m1 -DSQLITE_VEC_ENABLE_NEON
	endif
endif

ifdef USE_BREW_SQLITE
	SQLITE_INCLUDE_PATH=-I/opt/homebrew/opt/sqlite/include
	SQLITE_LIB_PATH=-L/opt/homebrew/opt/sqlite/lib
	CFLAGS += $(SQLITE_INCLUDE_PATH) $(SQLITE_LIB_PATH)
endif

ifdef IS_MACOS_ARM
RENAME_WHEELS_ARGS=--is-macos-arm
else
RENAME_WHEELS_ARGS=
endif

prefix=dist
$(prefix):
	mkdir -p $(prefix)

TARGET_LOADABLE=$(prefix)/vec0.$(LOADABLE_EXTENSION)
TARGET_STATIC=$(prefix)/libsqlite_vec0.a
TARGET_STATIC_H=$(prefix)/sqlite-vec.h
TARGET_CLI=$(prefix)/sqlite3

loadable: $(TARGET_LOADABLE)
static: $(TARGET_STATIC)
cli: $(TARGET_CLI)

all: loadable static cli

OBJS_DIR=$(prefix)/.objs
LIBS_DIR=$(prefix)/.libs
BUILD_DIR=$(prefix)/.build

$(OBJS_DIR): $(prefix)
	mkdir -p $@

$(LIBS_DIR): $(prefix)
	mkdir -p $@

$(BUILD_DIR): $(prefix)
	mkdir -p $@


$(TARGET_LOADABLE): sqlite-vec.c sqlite-vec.h $(prefix)
	$(CC) \
		-fPIC -shared \
		-Wall -Wextra \
		-Ivendor/ \
		-O3 \
		$(CFLAGS) \
		$< -o $@

$(TARGET_STATIC): sqlite-vec.c sqlite-vec.h $(prefix) $(OBJS_DIR)
	$(CC) -Ivendor/ $(CFLAGS) -DSQLITE_CORE -DSQLITE_VEC_STATIC \
	-O3 -c  $< -o $(OBJS_DIR)/vec.o
	$(AR) rcs $@ $(OBJS_DIR)/vec.o

$(TARGET_STATIC_H): sqlite-vec.h $(prefix)
	cp $< $@


$(OBJS_DIR)/sqlite3.o: vendor/sqlite3.c $(OBJS_DIR)
	$(CC) -c -g3 -O3 -DSQLITE_EXTRA_INIT=core_init -DSQLITE_CORE -DSQLITE_ENABLE_STMT_SCANSTATUS -DSQLITE_ENABLE_BYTECODE_VTAB -DSQLITE_ENABLE_EXPLAIN_COMMENTS -I./vendor $< -o $@

$(LIBS_DIR)/sqlite3.a: $(OBJS_DIR)/sqlite3.o $(LIBS_DIR)
	$(AR) rcs $@ $<

$(BUILD_DIR)/shell-new.c: vendor/shell.c $(BUILD_DIR)
	sed 's/\/\*extra-version-info\*\//EXTRA_TODO/g' $< > $@

$(OBJS_DIR)/shell.o: $(BUILD_DIR)/shell-new.c $(OBJS_DIR)
	$(CC) -c -g3 -O3 \
		-I./vendor \
		-DSQLITE_ENABLE_STMT_SCANSTATUS -DSQLITE_ENABLE_BYTECODE_VTAB -DSQLITE_ENABLE_EXPLAIN_COMMENTS \
		-DEXTRA_TODO="\"CUSTOMBUILD:sqlite-vec\n\"" \
		$< -o $@

$(LIBS_DIR)/shell.a: $(OBJS_DIR)/shell.o $(LIBS_DIR)
	$(AR) rcs $@ $<

$(OBJS_DIR)/sqlite-vec.o: sqlite-vec.c $(OBJS_DIR)
	$(CC) -c -g3 -Ivendor/ -I./ $(CFLAGS) $< -o $@

$(LIBS_DIR)/sqlite-vec.a: $(OBJS_DIR)/sqlite-vec.o $(LIBS_DIR)
	$(AR) rcs $@ $<


$(TARGET_CLI): sqlite-vec.h $(LIBS_DIR)/sqlite-vec.a $(LIBS_DIR)/shell.a $(LIBS_DIR)/sqlite3.a examples/sqlite3-cli/core_init.c $(prefix)
	$(CC) -g3  \
	-Ivendor/ -I./ \
	-DSQLITE_CORE \
	-DSQLITE_VEC_STATIC \
	-DSQLITE_THREADSAFE=0 -DSQLITE_ENABLE_FTS4 \
	-DSQLITE_ENABLE_STMT_SCANSTATUS -DSQLITE_ENABLE_BYTECODE_VTAB -DSQLITE_ENABLE_EXPLAIN_COMMENTS \
	-DSQLITE_EXTRA_INIT=core_init \
	$(CFLAGS) \
	-ldl -lm \
	examples/sqlite3-cli/core_init.c $(LIBS_DIR)/shell.a $(LIBS_DIR)/sqlite3.a $(LIBS_DIR)/sqlite-vec.a -o $@


sqlite-vec.h: sqlite-vec.h.tmpl VERSION
	VERSION=$(shell cat VERSION) \
	DATE=$(shell date -r VERSION +'%FT%TZ%z') \
	SOURCE=$(shell git log -n 1 --pretty=format:%H -- VERSION) \
	VERSION_MAJOR=$$(echo $$VERSION | cut -d. -f1) \
	VERSION_MINOR=$$(echo $$VERSION | cut -d. -f2) \
	VERSION_PATCH=$$(echo $$VERSION | cut -d. -f3 | cut -d- -f1) \
	envsubst < $< > $@

clean:
	rm -rf dist


FORMAT_FILES=sqlite-vec.h sqlite-vec.c
format: $(FORMAT_FILES)
	clang-format -i $(FORMAT_FILES)
	black tests/test-loadable.py

lint: SHELL:=/bin/bash
lint:
	diff -u <(cat $(FORMAT_FILES)) <(clang-format $(FORMAT_FILES))

progress:
	deno run --allow-read=sqlite-vec.c scripts/progress.ts


evidence-of:
	@echo "EVIDENCE-OF: V$(shell printf "%05d" $$((RANDOM % 100000)))_$(shell printf "%05d" $$((RANDOM % 100000)))"

test:
	sqlite3 :memory: '.read test.sql'

.PHONY: version loadable static test clean gh-release evidence-of install uninstall

publish-release:
	./scripts/publish-release.sh

# -k test_vec0_update
test-loadable: loadable
	uv run --managed-python --project tests pytest -vv -s -x . tests/test-*.py

test-loadable-snapshot-update: loadable
	uv run --managed-python --project tests pytest -vv tests/test-loadable.py --snapshot-update

test-loadable-watch:
	watchexec --exts c,py,Makefile --clear -- make test-loadable

test-unit:
	$(CC) -DSQLITE_CORE -DSQLITE_VEC_TEST tests/test-unit.c sqlite-vec.c vendor/sqlite3.c -I./ -Ivendor -o $(prefix)/test-unit && $(prefix)/test-unit

fuzz-build:
	$(MAKE) -C tests/fuzz all

fuzz-quick: fuzz-build
	@echo "Running all fuzz targets for 30 seconds each..."
	@for target in tests/fuzz/targets/*; do \
	  [ -f "$$target" ] && [ -x "$$target" ] || continue; \
	  name=$$(basename $$target); \
	  echo "=== Fuzzing $$name ==="; \
	  corpus="tests/fuzz/corpus/$$name"; \
	  mkdir -p "$$corpus"; \
	  dict="tests/fuzz/$${name//_/-}.dict"; \
	  dict_flag=""; \
	  [ -f "$$dict" ] && dict_flag="-dict=$$dict"; \
	  "$$target" $$dict_flag \
	    -max_total_time=30 "$$corpus" 2>&1 || true; \
	done

fuzz-long: fuzz-build
	@echo "Running all fuzz targets for 5 minutes each..."
	@for target in tests/fuzz/targets/*; do \
	  [ -f "$$target" ] && [ -x "$$target" ] || continue; \
	  name=$$(basename $$target); \
	  echo "=== Fuzzing $$name ==="; \
	  corpus="tests/fuzz/corpus/$$name"; \
	  mkdir -p "$$corpus"; \
	  dict="tests/fuzz/$${name//_/-}.dict"; \
	  dict_flag=""; \
	  [ -f "$$dict" ] && dict_flag="-dict=$$dict"; \
	  "$$target" $$dict_flag \
	    -max_total_time=300 "$$corpus" 2>&1 || true; \
	done

site-dev:
	npm --prefix site run dev

site-build:
	npm --prefix site run build

install:
	install -d $(INSTALL_LIB_DIR)
	install -d $(INSTALL_INCLUDE_DIR)
	install -m 644 sqlite-vec.h $(INSTALL_INCLUDE_DIR)
	@if [ -f $(TARGET_LOADABLE) ]; then \
		install -m 644 $(TARGET_LOADABLE) $(INSTALL_LIB_DIR); \
	fi
	@if [ -f $(TARGET_STATIC) ]; then \
		install -m 644 $(TARGET_STATIC) $(INSTALL_LIB_DIR); \
	fi
	@if [ -f $(TARGET_CLI) ]; then \
		sudo install -m 755 $(TARGET_CLI) $(INSTALL_BIN_DIR); \
	fi
	ldconfig

uninstall:
	rm -f $(INSTALL_LIB_DIR)/$(notdir $(TARGET_LOADABLE))
	rm -f $(INSTALL_LIB_DIR)/$(notdir $(TARGET_STATIC))
	rm -f $(INSTALL_LIB_DIR)/$(notdir $(TARGET_CLI))
	rm -f $(INSTALL_INCLUDE_DIR)/sqlite-vec.h
	ldconfig

# ███████████████████████████████ WASM SECTION ███████████████████████████████

WASM_DIR=$(prefix)/.wasm

$(WASM_DIR): $(prefix)
	mkdir -p $@

SQLITE_WASM_VERSION=3450300
SQLITE_WASM_YEAR=2024
SQLITE_WASM_SRCZIP=$(BUILD_DIR)/sqlite-src.zip
SQLITE_WASM_COMPILED_SQLITE3C=$(BUILD_DIR)/sqlite-src-$(SQLITE_WASM_VERSION)/sqlite3.c
SQLITE_WASM_COMPILED_MJS=$(BUILD_DIR)/sqlite-src-$(SQLITE_WASM_VERSION)/ext/wasm/jswasm/sqlite3.mjs
SQLITE_WASM_COMPILED_WASM=$(BUILD_DIR)/sqlite-src-$(SQLITE_WASM_VERSION)/ext/wasm/jswasm/sqlite3.wasm

TARGET_WASM_LIB=$(WASM_DIR)/libsqlite_vec.wasm.a
TARGET_WASM_MJS=$(WASM_DIR)/sqlite3.mjs
TARGET_WASM_WASM=$(WASM_DIR)/sqlite3.wasm
TARGET_WASM=$(TARGET_WASM_MJS) $(TARGET_WASM_WASM)

$(SQLITE_WASM_SRCZIP): $(BUILD_DIR)
	curl -o $@ https://www.sqlite.org/$(SQLITE_WASM_YEAR)/sqlite-src-$(SQLITE_WASM_VERSION).zip
	touch $@

$(SQLITE_WASM_COMPILED_SQLITE3C): $(SQLITE_WASM_SRCZIP) $(BUILD_DIR)
	rm -rf $(BUILD_DIR)/sqlite-src-$(SQLITE_WASM_VERSION)/ || true
	unzip -q -o $< -d $(BUILD_DIR)
	(cd $(BUILD_DIR)/sqlite-src-$(SQLITE_WASM_VERSION)/ && ./configure --enable-all && make sqlite3.c)
	touch $@

$(TARGET_WASM_LIB): examples/wasm/wasm.c sqlite-vec.c $(BUILD_DIR) $(WASM_DIR)
	emcc -O3  -I./ -Ivendor -DSQLITE_CORE -c examples/wasm/wasm.c -o $(BUILD_DIR)/wasm.wasm.o
	emcc -O3  -I./ -Ivendor -DSQLITE_CORE -c sqlite-vec.c -o $(BUILD_DIR)/sqlite-vec.wasm.o
	emar rcs $@ $(BUILD_DIR)/wasm.wasm.o $(BUILD_DIR)/sqlite-vec.wasm.o

$(SQLITE_WASM_COMPILED_MJS) $(SQLITE_WASM_COMPILED_WASM): $(SQLITE_WASM_COMPILED_SQLITE3C) $(TARGET_WASM_LIB)
	(cd $(BUILD_DIR)/sqlite-src-$(SQLITE_WASM_VERSION)/ext/wasm && \
		make sqlite3_wasm_extra_init.c=../../../../.wasm/libsqlite_vec.wasm.a jswasm/sqlite3.mjs jswasm/sqlite3.wasm \
	)

$(TARGET_WASM_MJS): $(SQLITE_WASM_COMPILED_MJS)
	cp $< $@

$(TARGET_WASM_WASM): $(SQLITE_WASM_COMPILED_WASM)
	cp $< $@

wasm: $(TARGET_WASM)

# ███████████████████████████████   END WASM   ███████████████████████████████


================================================
FILE: README.md
================================================
# `sqlite-vec`

[![](https://dcbadge.vercel.app/api/server/VCtQ8cGhUs)](https://discord.gg/Ve7WeCJFXk)

An extremely small, "fast enough" vector search SQLite extension that runs
anywhere! A successor to [`sqlite-vss`](https://github.com/asg017/sqlite-vss)

<!-- deno-fmt-ignore-start -->

> [!IMPORTANT]
> _`sqlite-vec` is a pre-v1, so expect breaking changes!_

<!-- deno-fmt-ignore-end -->

- Store and query float, int8, and binary vectors in `vec0` virtual tables
- Written in pure C, no dependencies, runs anywhere SQLite runs
  (Linux/MacOS/Windows, in the browser with WASM, Raspberry Pis, etc.)
- Store non-vector data in metadata, auxiliary, or partition key columns

<p align="center">
  <a href="https://hacks.mozilla.org/2024/06/sponsoring-sqlite-vec-to-enable-more-powerful-local-ai-applications/">
  <picture>
    <source media="(prefers-color-scheme: dark)" srcset="./.github/logos/mozilla.dark.svg">
    <source media="(prefers-color-scheme: light)" srcset="./.github/logos/mozilla.svg">
    <img alt="Mozilla Builders logo" width=400>
  </picture>
  </a>
</p>

<p align="center">
<i>
<code>sqlite-vec</code> is a
<a href="https://hacks.mozilla.org/2024/06/sponsoring-sqlite-vec-to-enable-more-powerful-local-ai-applications/">Mozilla Builders project</a>,
with additional sponsorship from
<a href="https://fly.io/"><img width=14px src="./.github/logos/flyio.small.ico"/> Fly.io </a>,
<a href="https://tur.so/sqlite-vec"><img width=14px src="./.github/logos/turso.small.ico"/> Turso</a>,
<a href="https://sqlitecloud.io/"><img width=14px src="./.github/logos/sqlitecloud.small.svg"/> SQLite Cloud</a>, and
<a href="https://shinkai.com/"><img width=14px src="./.github/logos/shinkai.small.svg"/> Shinkai</a>.
See <a href="#sponsors">the Sponsors section</a> for more details.
</i>
</p>

## Installing

See [Installing `sqlite-vec`](https://alexgarcia.xyz/sqlite-vec/installation.html)
for more details.

| Language       | Install                                              | More Info                                                                             |                                                                                                                                                                                                    |
| -------------- | ---------------------------------------------------- | ------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| Python         | `pip install sqlite-vec`                             | [`sqlite-vec` with Python](https://alexgarcia.xyz/sqlite-vec/python.html)             | [![PyPI](https://img.shields.io/pypi/v/sqlite-vec.svg?color=blue&logo=python&logoColor=white)](https://pypi.org/project/sqlite-vec/)                                                               |
| Node.js        | `npm install sqlite-vec`                             | [`sqlite-vec` with Node.js](https://alexgarcia.xyz/sqlite-vec/js.html)            | [![npm](https://img.shields.io/npm/v/sqlite-vec.svg?color=green&logo=nodedotjs&logoColor=white)](https://www.npmjs.com/package/sqlite-vec)                                                         |
| Ruby           | `gem install sqlite-vec`                             | [`sqlite-vec` with Ruby](https://alexgarcia.xyz/sqlite-vec/ruby.html)                 | ![Gem](https://img.shields.io/gem/v/sqlite-vec?color=red&logo=rubygems&logoColor=white)                                                                       |
| Go             | `go get -u github.com/asg017/sqlite-vec/bindings/go` | [`sqlite-vec` with Go](https://alexgarcia.xyz/sqlite-vec/go.html)                     | [![Go Reference](https://pkg.go.dev/badge/github.com/asg017/sqlite-vec-go-bindings/cgo.svg)](https://pkg.go.dev/github.com/asg017/asg017/sqlite-vec-go-bindings/cgo)                                              |
| Rust           | `cargo add sqlite-vec`                               | [`sqlite-vec` with Rust](https://alexgarcia.xyz/sqlite-vec/rust.html)                 | [![Crates.io](https://img.shields.io/crates/v/sqlite-vec?logo=rust)](https://crates.io/crates/sqlite-vec)                                                                                          |
| Datasette      | `datasette install datasette-sqlite-vec`             | [`sqlite-vec` with Datasette](https://alexgarcia.xyz/sqlite-vec/datasette.html)       | [![Datasette](https://img.shields.io/pypi/v/datasette-sqlite-vec.svg?color=B6B6D9&label=Datasette+plugin&logoColor=white&logo=python)](https://datasette.io/plugins/datasette-sqlite-vec)          |
| rqlite         | `rqlited -extensions-path=sqlite-vec.tar.gz`         | [`sqlite-vec` with rqlite](https://alexgarcia.xyz/sqlite-vec/rqlite.html)                        | [![rqlite](https://img.shields.io/badge/rqlite-sqlite_extensions-blue)](https://rqlite.io/docs/guides/extensions/)           |
| `sqlite-utils` | `sqlite-utils install sqlite-utils-sqlite-vec`       | [`sqlite-vec` with sqlite-utils](https://alexgarcia.xyz/sqlite-vec/sqlite-utils.html) | [![sqlite-utils](https://img.shields.io/pypi/v/sqlite-utils-sqlite-vec.svg?color=B6B6D9&label=sqlite-utils+plugin&logoColor=white&logo=python)](https://datasette.io/plugins/datasette-sqlite-vec) |
| Github Release |                                                      |                                                                                       | ![GitHub tag (latest SemVer pre-release)](https://img.shields.io/github/v/tag/asg017/sqlite-vec?color=lightgrey&include_prereleases&label=Github+release&logo=github)                              |


## Sample usage

```sql
.load ./vec0

create virtual table vec_examples using vec0(
  sample_embedding float[8]
);

-- vectors can be provided as JSON or in a compact binary format
insert into vec_examples(rowid, sample_embedding)
  values
    (1, '[-0.200, 0.250, 0.341, -0.211, 0.645, 0.935, -0.316, -0.924]'),
    (2, '[0.443, -0.501, 0.355, -0.771, 0.707, -0.708, -0.185, 0.362]'),
    (3, '[0.716, -0.927, 0.134, 0.052, -0.669, 0.793, -0.634, -0.162]'),
    (4, '[-0.710, 0.330, 0.656, 0.041, -0.990, 0.726, 0.385, -0.958]');


-- KNN style query
select
  rowid,
  distance
from vec_examples
where sample_embedding match '[0.890, 0.544, 0.825, 0.961, 0.358, 0.0196, 0.521, 0.175]'
order by distance
limit 2;
/*
┌───────┬──────────────────┐
│ rowid │     distance     │
├───────┼──────────────────┤
│ 2     │ 2.38687372207642 │
│ 1     │ 2.38978505134583 │
└───────┴──────────────────┘
*/
```

## Sponsors

Development of `sqlite-vec` is supported by multiple generous sponsors! Mozilla
is the main sponsor through the new Builders project.
<p align="center">
  <a href="https://hacks.mozilla.org/2024/06/sponsoring-sqlite-vec-to-enable-more-powerful-local-ai-applications/">
  <picture>
    <source media="(prefers-color-scheme: dark)" srcset="./.github/logos/mozilla.dark.svg">
    <source media="(prefers-color-scheme: light)" srcset="./.github/logos/mozilla.svg">
    <img alt="Mozilla Builders logo" width=400>
  </picture>
  </a>
</p>

`sqlite-vec` is also sponsored by the following companies:

<a href="https://fly.io/">
<picture>
  <source media="(prefers-color-scheme: dark)" srcset="./.github/logos/flyio.dark.svg">
  <source media="(prefers-color-scheme: light)" srcset="./.github/logos/flyio.svg">
  <img alt="Fly.io logo" src="./.github/logos/flyio.svg" width="48%">
</picture>
</a>

<a href="https://tur.so/sqlite-vec">
<picture>
  <source media="(prefers-color-scheme: dark)" srcset="./.github/logos/turso.svg">
  <source media="(prefers-color-scheme: light)" srcset="./.github/logos/turso.svg">
  <img alt="Turso logo" src="./.github/logos/turso.svg" width="48%">
</picture>
</a>

<a href="https://sqlitecloud.io/">
<picture>
  <source media="(prefers-color-scheme: dark)" srcset="./.github/logos/sqlitecloud.dark.svg">
  <source media="(prefers-color-scheme: light)" srcset="./.github/logos/sqlitecloud.svg">
  <img alt="SQLite Cloud logo" src="./.github/logos/flyio.svg" width="48%">
</picture>
</a>

<a href="https://shinkai.com">
<picture>
  <source media="(prefers-color-scheme: dark)" srcset="./.github/logos/shinkai.dark.svg">
  <source media="(prefers-color-scheme: light)" srcset="./.github/logos/shinkai.svg">

  <img alt="Shinkai logo" src="./.github/logos/shinkai.svg" width="48%">
</picture>
</a>

As well as multiple individual supporters on
[Github sponsors](https://github.com/sponsors/asg017/)!

If your company interested in sponsoring `sqlite-vec` development, send me an
email to get more info: https://alexgarcia.xyz

## See Also

- [**`sqlite-ecosystem`**](https://github.com/asg017/sqlite-ecosystem), Maybe
  more 3rd party SQLite extensions I've developed
- [**`sqlite-rembed`**](https://github.com/asg017/sqlite-rembed), Generate text
  embeddings from remote APIs like OpenAI/Nomic/Ollama, meant for testing and
  SQL scripts
- [**`sqlite-lembed`**](https://github.com/asg017/sqlite-lembed), Generate text
  embeddings locally from embedding models in the `.gguf` format


================================================
FILE: SECURITY.md
================================================
Please report any security vulnerabilities to alexsebastian.garcia@gmail.com . Avould using public Github issues whenever possible. I will get back to you as quickly as possible.


================================================
FILE: TODO
================================================
- [ ] add `xyz_info` shadow table with version etc.

- later
  - [ ] partition: UPDATE support
  - [ ] skip invalid validity entries in knn filter?
  - [ ] nulls in metadata
  - [ ] partition `x in (...)` handling
  - [ ] blobs/date/datetime
  - [ ] uuid/ulid perf
  - [ ] Aux columns: `NOT NULL` constraint
  - [ ] Metadata columns: `NOT NULL` constraint
   - [ ] Partiion key: `NOT NULL` constraint
  - [ ] dictionary encoding?
  - [ ] properly sqlite3_vtab_nochange / sqlite3_value_nochange handling
  - [ ] perf
    - [ ] aux: cache INSERT
    - [ ] aux: LEFT JOIN on `_rowids` queries to avoid N lookup queries


================================================
FILE: VERSION
================================================
0.1.8-alpha.1

================================================
FILE: benchmarks/README.md
================================================


================================================
FILE: benchmarks/exhaustive-memory/.gitignore
================================================
data/


================================================
FILE: benchmarks/exhaustive-memory/Makefile
================================================



data/:
	mkdir -p $@

data/sift: data/
	curl -o data/sift.tar.gz ftp://ftp.irisa.fr/local/texmex/corpus/sift.tar.gz
	tar -xvzf data/sift.tar.gz -C data/
	rm data/sift.tar.gz

data/gist: data/
	curl -o data/gist.tar.gz ftp://ftp.irisa.fr/local/texmex/corpus/gist.tar.gz
	tar -xvzf data/gist.tar.gz -C data/
	rm data/gist.tar.gz


================================================
FILE: benchmarks/exhaustive-memory/README.md
================================================
# `sqlite-vec` In-memory benchmark comparisions

This repo contains a benchmarks that compares KNN queries of `sqlite-vec` to other in-process vector search tools using **brute force linear scans only**. These include:


- [Faiss IndexFlatL2](https://faiss.ai/)
- [usearch with `exact=True`](https://github.com/unum-cloud/usearch)
- [libsql vector search with `vector_distance_cos`](https://turso.tech/vector)
- [numpy](https://numpy.org/), using [this approach](https://github.com/EthanRosenthal/nn-vs-ann)
- [duckdb with `list_cosine_similarity`](https://duckdb.org/docs/sql/functions/nested.html#list_cosine_similaritylist1-list2)
- [`sentence_transformers.util.semantic_search`](https://sbert.net/docs/package_reference/util.html#sentence_transformers.util.semantic_search)
- [hnswlib BFIndex](https://github.com/nmslib/hnswlib/blob/c1b9b79af3d10c6ee7b5d0afa1ce851ae975254c/TESTING_RECALL.md?plain=1#L8)


Again **ONLY BRUTE FORCE LINEAR SCANS ARE TESTED**. This benchmark does **not** test approximate nearest neighbors (ANN) implementations. This benchmarks is extremely narrow to just testing KNN searches using brute force.

A few other caveats:

- Only brute-force linear scans, no ANN
- Only CPU is used. The only tool that does offer GPU is Faiss anyway.
- Only in-memory datasets are used. Many of these tools do support serializing and reading from disk (including `sqlite-vec`) and possibly `mmap`'ing, but this only tests in-memory datasets. Mostly because of numpy
- Queries are made one after the other, **not batched.** Some tools offer APIs to query multiple inputs at the same time, but this benchmark runs queries sequentially. This was done to emulate "server request"-style queries, but multiple users would send queries at different times, making batching more difficult. To note, `sqlite-vec` does **not** support batched queries yet.


These tests are run in Python. Vectors are provided as an in-memory numpy array, and each test converts that numpy array into whatever makes sense for the given tool. For example, `sqlite-vec` tests will read those vectors into a SQLite table. DuckDB will read them into an Array array then create a DuckDB table from that.


================================================
FILE: benchmarks/exhaustive-memory/bench.py
================================================
import numpy as np
import numpy.typing as npt
import time
import sqlite3
import pandas as pd
from dataclasses import dataclass
from rich.console import Console
from rich.table import Table
from typing import List, Optional


@dataclass
class BenchResult:
    tool: str
    build_time_ms: float
    query_times_ms: List[float]


def duration(seconds: float):
    ms = seconds * 1000
    return f"{int(ms)}ms"


def cosine_similarity(
    vec: npt.NDArray[np.float32], mat: npt.NDArray[np.float32], do_norm: bool = True
) -> npt.NDArray[np.float32]:
    sim = vec @ mat.T
    if do_norm:
        sim /= np.linalg.norm(vec) * np.linalg.norm(mat, axis=1)
    return sim


def topk(
    vec: npt.NDArray[np.float32],
    mat: npt.NDArray[np.float32],
    k: int = 5,
    do_norm: bool = True,
) -> tuple[npt.NDArray[np.int32], npt.NDArray[np.float32]]:
    sim = cosine_similarity(vec, mat, do_norm=do_norm)
    # Rather than sorting all similarities and taking the top K, it's faster to
    # argpartition and then just sort the top K.
    # The difference is O(N logN) vs O(N + k logk)
    indices = np.argpartition(-sim, kth=k)[:k]
    top_indices = np.argsort(-sim[indices])
    return indices[top_indices], sim[top_indices]


def ivecs_read(fname):
    a = np.fromfile(fname, dtype="int32",)
    d = a[0]
    return a.reshape(-1, d + 1)[:, 1:].copy()


def fvecs_read(fname, sample):
    return ivecs_read(fname).view("float32")[:sample]


def bench_hnsw(base, query):
    import hnswlib
    t0 = time.time()
    p = hnswlib.Index(space="ip", dim=128)  # possible options are l2, cosine or ip

    # NOTE: Use default settings from the README.
    print("buildings hnsw")
    p.init_index(max_elements=base.shape[0], ef_construction=200, M=16)
    ids = np.arange(base.shape[0])
    p.add_items(base, ids)
    p.set_ef(50)

    print("build time", time.time() - t0)

    results = []
    times = []
    t = time.time()
    for idx, q in enumerate(query):
        t0 = time.time()
        result = p.knn_query(q, k=5)
        results.append(result)
        times.append(time.time() - t0)
    print(time.time() - t)
    print("hnsw avg", np.mean(times))
    return results


def bench_hnsw_bf(base, query, k) -> BenchResult:
    import hnswlib
    print("hnswlib-bf")
    dimensions = base.shape[1]
    t0 = time.time()
    p = hnswlib.BFIndex(space="l2", dim=dimensions)

    p.init_index(max_elements=base.shape[0])
    ids = np.arange(base.shape[0])
    p.add_items(base, ids)

    build_time = time.time() - t0

    results = []
    times = []
    t = time.time()
    for idx, q in enumerate(query):
        t0 = time.time()
        result = p.knn_query(q, k=k)
        results.append(result)
        times.append(time.time() - t0)
    return BenchResult("hnswlib-bf", build_time, times)


def bench_numpy(base, query, k) -> BenchResult:
    print("numpy...")
    times = []
    results = []
    for idx, q in enumerate(query):
        t0 = time.time()
        result = topk(q, base, k=k)
        results.append(result)
        times.append(time.time() - t0)
    return BenchResult("numpy", 0, times)


def bench_sqlite_vec(base, query, page_size, chunk_size, k) -> BenchResult:
    dimensions = base.shape[1]
    print(f"sqlite-vec {page_size} {chunk_size}...")

    db = sqlite3.connect(":memory:")
    db.execute(f"PRAGMA page_size = {page_size}")
    db.enable_load_extension(True)
    db.load_extension("../../dist/vec0")
    db.execute(
        f"""
          create virtual table vec_sift1m using vec0(
            chunk_size={chunk_size},
            vector float[{dimensions}]
          )
        """
    )

    t = time.time()
    with db:
        db.executemany(
            "insert into vec_sift1m(vector) values (?)",
            list(map(lambda x: [x.tobytes()], base)),
        )
    build_time = time.time() - t
    times = []
    results = []
    for (
        idx,
        q,
    ) in enumerate(query):
        t0 = time.time()
        result = db.execute(
            """
              select
                rowid,
                distance
              from vec_sift1m
              where vector match ?
                and k = ?
              order by distance
            """,
            [q.tobytes(), k],
        ).fetchall()
        assert len(result) == k
        times.append(time.time() - t0)
    return BenchResult(f"sqlite-vec vec0 ({page_size}|{chunk_size})", build_time, times)


def bench_sqlite_vec_scalar(base, query, page_size, k) -> BenchResult:
    print(f"sqlite-vec-scalar...")

    db = sqlite3.connect(":memory:")
    db.enable_load_extension(True)
    db.load_extension("../../dist/vec0")
    db.execute(f"PRAGMA page_size={page_size}")
    db.execute(f"create table sift1m(vector);")

    t = time.time()
    with db:
        db.executemany(
            "insert into sift1m(vector) values (?)",
            list(map(lambda x: [x.tobytes()], base)),
        )
    build_time = time.time() - t
    times = []
    results = []
    t = time.time()
    for (
        idx,
        q,
    ) in enumerate(query):
        t0 = time.time()
        result = db.execute(
            """
              select
                rowid,
                vec_distance_l2(?, vector) as distance
              from sift1m
              order by distance
              limit ?
            """,
            [q.tobytes(), k],
        ).fetchall()
        assert len(result) == k
        times.append(time.time() - t0)
    return BenchResult(f"sqlite-vec-scalar ({page_size})", build_time, times)

def bench_libsql(base, query, page_size, k) -> BenchResult:
    print(f"libsql ...")
    dimensions = base.shape[1]

    db = sqlite3.connect(":memory:")
    db.enable_load_extension(True)
    assert db.execute("select 'vector' in (select name from pragma_function_list)").fetchone()[0] == 1
    db.execute(f"PRAGMA page_size={page_size}")
    db.execute(f"create table vectors(vector f32_blob({dimensions}));")

    # TODO: only does DiskANN?
    #db.execute("CREATE INDEX vectors_idx ON vectors (libsql_vector_idx(vector, 'metric=cosine'))")

    t = time.time()
    with db:
        db.executemany(
            "insert into vectors(vector) values (?)",
            list(map(lambda x: [x.tobytes()], base)),
        )
    build_time = time.time() - t
    times = []
    results = []
    t = time.time()
    for (
        idx,
        q,
    ) in enumerate(query):
        t0 = time.time()
        result = db.execute(
            """
              select
                rowid,
                vector_distance_cos(?, vector) as distance
              FROM vectors
              order by 2
              limit ?
            """,
            [q.tobytes(), k],
        ).fetchall()
        times.append(time.time() - t0)
    return BenchResult(f"libsql ({page_size})", build_time, times)


def register_np(db, array, name):
    ptr = array.__array_interface__["data"][0]
    nvectors, dimensions = array.__array_interface__["shape"]
    element_type = array.__array_interface__["typestr"]

    assert element_type == "<f4"

    name_escaped = db.execute("select printf('%w', ?)", [name]).fetchone()[0]

    db.execute(
        "insert into temp.vec_static_blobs(name, data) select ?, vec_static_blob_from_raw(?, ?, ?, ?)",
        [name, ptr, element_type, dimensions, nvectors],
    )

    db.execute(
        f'create virtual table "{name_escaped}" using vec_static_blob_entries({name_escaped})'
    )

def bench_sqlite_vec_static(base, query, k) -> BenchResult:
    print(f"sqlite-vec static...")

    db = sqlite3.connect(":memory:")
    db.enable_load_extension(True)
    db.load_extension("../../dist/vec0")



    t = time.time()
    register_np(db, base, "base")
    build_time = time.time() - t

    times = []
    results = []
    for (
        idx,
        q,
    ) in enumerate(query):
        t0 = time.time()
        result = db.execute(
            """
              select
                rowid
              from base
              where vector match ?
                and k = ?
              order by distance
            """,
            [q.tobytes(), k],
        ).fetchall()
        assert len(result) == k
        times.append(time.time() - t0)
    return BenchResult(f"sqlite-vec static", build_time, times)

def bench_faiss(base, query, k) -> BenchResult:
    import faiss
    dimensions = base.shape[1]
    print("faiss...")
    t = time.time()
    index = faiss.IndexFlatL2(dimensions)
    index.add(base)
    build_time = time.time() - t
    times = []
    results = []
    t = time.time()
    for idx, q in enumerate(query):
        t0 = time.time()
        distances, rowids = index.search(x=np.array([q]), k=k)
        results.append(rowids)
        times.append(time.time() - t0)
    return BenchResult("faiss", build_time, times)


def bench_lancedb(base, query, k) -> BenchResult:
    import lancedb
    print('lancedb...')
    dimensions = base.shape[1]
    db = lancedb.connect("a")
    data = [{"vector": row.reshape(1, -1)[0]} for row in base]
    # Create a DataFrame where each row is a 1D array
    df = pd.DataFrame(data=data, columns=["vector"])
    t = time.time()
    db.create_table("t", data=df)
    build_time = time.time() - t
    tbl = db.open_table("t")
    times = []
    for q in query:
        t0 = time.time()
        result = tbl.search(q).limit(k).to_arrow()
        times.append(time.time() - t0)
    return BenchResult("lancedb", build_time, times)

def bench_duckdb(base, query, k) -> BenchResult:
    import duckdb
    import pyarrow as pa
    print("duckdb...")
    dimensions = base.shape[1]
    db = duckdb.connect(":memory:")
    db.execute(f"CREATE TABLE t(vector float[{dimensions}])")

    t0 = time.time()
    pa_base = pa.Table.from_arrays([pa.array(list(base))], names=['vector'])
    pa_base
    db.execute(f"INSERT INTO t(vector) SELECT vector::float[{dimensions}] FROM pa_base")
    build_time = time.time() - t0
    times = []
    for q in query:
        t0 = time.time()
        result = db.execute(
            f"""
              SELECT
                rowid,
                array_cosine_similarity(vector, ?::float[{dimensions}])
              FROM t
              ORDER BY 2 DESC
              LIMIT ?
            """, [q, k]).fetchall()
        times.append(time.time() - t0)
    return BenchResult("duckdb", build_time, times)

def bench_sentence_transformers(base, query, k) -> BenchResult:
    from sentence_transformers.util import semantic_search
    print("sentence-transformers")
    dimensions = base.shape[1]
    t0 = time.time()
    build_time = time.time() - t0

    times = []
    for q in query:
        t0 = time.time()
        result = semantic_search(q, base, top_k=k)
        times.append(time.time() - t0)

    return BenchResult("sentence-transformers", build_time, times)


def bench_chroma(base, query, k):
   import chromadb
   from chromadb.utils.batch_utils import create_batches
   chroma_client = chromadb.EphemeralClient()
   collection = chroma_client.create_collection(name="my_collection")

   t = time.time()
   for batch in create_batches(api=chroma_client, ids=[str(x) for x in range(len(base))], embeddings=base.tolist()):
      collection.add(*batch)
   build_time = time.time() - t
   times = []
   for q in query:
       t0 = time.time()
       result = collection.query(
           query_embeddings=[q.tolist()],
           n_results=k,
       )
       times.append(time.time() - t0)
   #print("chroma avg", duration(np.mean(times)))
   return BenchResult("chroma", build_time, times)

def bench_usearch_npy(base, query, k) -> BenchResult:
    from usearch.index import Index, search, MetricKind
    times = []
    for q in query:
        t0 = time.time()
        # result = index.search(q, exact=True)
        result = search(base, q, k, MetricKind.L2sq, exact=True)
        times.append(time.time() - t0)
    return BenchResult("usearch numpy exact=True", 0, times)


def bench_usearch_special(base, query, k) -> BenchResult:
    from usearch.index import Index, search, MetricKind
    dimensions = base.shape[1]
    index = Index(ndim=dimensions)
    t = time.time()
    index.add(np.arange(len(base)), base)
    build_time = time.time() - t

    times = []
    for q in query:
        t0 = time.time()
        result = index.search(q, exact=True)
        times.append(time.time() - t0)
    return BenchResult("usuearch index", build_time, times)


def suite(name, base, query, k, benchmarks):
    print(f"Starting benchmark suite: {name} {base.shape}, k={k}")
    results = []

    for b in benchmarks:
        if b == "faiss":
            results.append(bench_faiss(base, query, k=k))
        elif b == "vec-static":
          results.append(bench_sqlite_vec_static(base, query, k=k))
        elif b.startswith("vec-scalar"):
            _, page_size = b.split('.')
            results.append(bench_sqlite_vec_scalar(base, query, page_size, k=k))
        elif b.startswith("libsql"):
            _, page_size = b.split('.')
            results.append(bench_libsql(base, query, page_size, k=k))
        elif b.startswith("vec-vec0"):
            _, page_size, chunk_size = b.split('.')
            results.append(bench_sqlite_vec(base, query, int(page_size), int(chunk_size), k=k))
        elif b == "usearch":
            results.append(bench_usearch_npy(base, query, k=k))
        elif b == "hnswlib":
            results.append(bench_hnsw_bf(base, query, k=k))
        elif b == "numpy":
            results.append(bench_numpy(base, query, k=k))
        elif b == "duckdb":
            results.append(bench_duckdb(base, query, k=k))
        elif b == "sentence-transformers":
            results.append(bench_sentence_transformers(base, query, k=k))
        elif b == "chroma":
            results.append(bench_chroma(base, query, k=k))
        else:
            raise Exception(f"unknown benchmark {b}")

    #results.append(bench_sqlite_vec(base, query, 32768, 512, k=k))
    #results.append(bench_sqlite_vec(base, query, 32768, 256, k=k))


    #results.append(bench_sqlite_vec_expo(base, query, k=k))

      # n = bench_chroma(base[:40000], query, k=k)

      # n = bench_usearch_special(base, query, k=k)



      # n = bench_sqlite_vec(base, query, 4096, 1024, k=k)
      # n = bench_sqlite_vec(base, query, 32768, 1024, k=k)



      # blessed

      ###   #for pgsz in [4096, 8192, 16384, 32768, 65536]:
      ###   #    for chunksz in [8, 32, 128, 512, 1024, 2048]:
      ###   #      results.append(bench_sqlite_vec(base, query, pgsz, chunksz, k=k))
      ###   # n = bench_sqlite_vec(base, query, 16384, 64, k=k)
      ###   # n = bench_sqlite_vec(base, query, 16384, 32, k=k)
      ###   results.append(bench_sqlite_normal(base, query, 8192, k=k))
      ###   results.append(bench_lancedb(base, query, k=k))

      ###   #h = bench_hnsw(base, query)

    table = Table(
        title=f"{name}: {base.shape[0]:,} {base.shape[1]}-dimension vectors, k={k}"
    )

    table.add_column("Tool")
    table.add_column("Build Time (ms)", justify="right")
    table.add_column("Query time (ms)", justify="right")
    for res in sorted(results, key=lambda x: np.mean(x.query_times_ms)):
        table.add_row(
            res.tool, duration(res.build_time_ms), duration(np.mean(res.query_times_ms))
        )

    console = Console()
    console.print(table)


import argparse


def parse_args():
    parser = argparse.ArgumentParser(description="Benchmark processing script.")
    # Required arguments
    parser.add_argument("-n", "--name", required=True, help="Name of the benchmark.")
    parser.add_argument(
        "-i", "--input", required=True, help="Path to input file (.npy)."
    )
    parser.add_argument(
        "-k", type=int, required=True, help="Parameter k to use in benchmark."
    )

    # Optional arguments
    parser.add_argument(
        "-q", "--query", required=False, help="Path to query file (.npy)."
    )
    parser.add_argument(
        "--sample",
        type=int,
        required=False,
        help="Number of entries in base to use. Defaults all",
        default=-1
    )
    parser.add_argument(
        "--qsample",
        type=int,
        required=False,
        help="Number of queries to use. Defaults all",
    )
    parser.add_argument(
        "-x", help="type of runs to make", default="faiss,vec-scalar.4096,vec-static,vec-vec0.4096.16,usearch,duckdb,hnswlib,numpy"
    )

    args = parser.parse_args()
    return args


from pathlib import Path


def cli_read_input(input, sample):
    input_path = Path(input)
    if input_path.suffix == ".fvecs":
        return fvecs_read(input_path, sample)
    if input_path.suffx == ".npy":
        return np.fromfile(input_path, dtype="float32", count=sample)
    raise Exception("unknown filetype", input)


def cli_read_query(query, base):
    if query is None:
        return base[np.random.choice(base.shape[0], 100, replace=False), :]
    return cli_read_input(query, -1)



@dataclass
class Config:
    name: str
    input: str
    k: int
    queries: str
    qsample: int
    tests: List[str]
    sample: Optional[int]

def parse_config_file(path:str) -> Config:
  name = None
  input = None
  k = None
  queries = None
  qsample = None
  sample = None
  tests = []

  for line in open(path, 'r'):
    line = line.strip()
    if not line or line.startswith('#'):
      continue
    elif line.startswith('@name='):
      name = line.removeprefix('@name=')
    elif line.startswith('@k='):
      k = line.removeprefix('@k=')
    elif line.startswith('@input='):
      input = line.removeprefix('@input=')
    elif line.startswith('@queries='):
      queries = line.removeprefix('@queries=')
    elif line.startswith('@qsample='):
      qsample = line.removeprefix('@qsample=')
    elif line.startswith('@sample='):
      sample = line.removeprefix('@sample=')
    elif line.startswith('@'):
        raise Exception(f"unknown config line '{line}'")
    else:
      tests.append(line)
  return Config(name, input, int(k), queries, int(qsample), tests, int(sample) if sample is not None else None)



from sys import argv
if __name__ == "__main__":
    config = parse_config_file(argv[1])
    print(config)
    #args = parse_args()
    #print(args)
    base = cli_read_input(config.input, config.sample)
    queries = cli_read_query(config.queries, base)[: config.qsample]
    suite(config.name, base, queries, config.k, config.tests)

    #main()


================================================
FILE: benchmarks/exhaustive-memory/gist.suite
================================================
@name=gist
@input=data/gist/gist_base.fvecs
@queries=data/gist/gist_query.fvecs
@sample=500000
@qsample=20
@k=20

faiss
usearch
vec-static
#duckdb
#vec-vec0.8192.1024
#vec-vec0.8192.2048
#vec-scalar.8192
#numpy


================================================
FILE: benchmarks/exhaustive-memory/requirements.txt
================================================
annotated-types==0.7.0
anyio==4.4.0
asgiref==3.8.1
attrs==23.2.0
backoff==2.2.1
bcrypt==4.2.0
build==1.2.1
cachetools==5.4.0
certifi==2024.7.4
charset-normalizer==3.3.2
chroma-hnswlib==0.7.6
chromadb==0.5.5
click==8.1.7
coloredlogs==15.0.1
decorator==5.1.1
deprecated==1.2.14
deprecation==2.1.0
dnspython==2.6.1
duckdb==1.0.0
email-validator==2.2.0
faiss-cpu==1.8.0.post1
fastapi==0.111.1
fastapi-cli==0.0.4
filelock==3.15.4
flatbuffers==24.3.25
fsspec==2024.6.1
google-auth==2.32.0
googleapis-common-protos==1.63.2
grpcio==1.65.1
h11==0.14.0
hnswlib==0.8.0
httpcore==1.0.5
httptools==0.6.1
httpx==0.27.0
huggingface-hub==0.24.1
humanfriendly==10.0
idna==3.7
importlib-metadata==8.0.0
importlib-resources==6.4.0
jinja2==3.1.4
joblib==1.4.2
kubernetes==30.1.0
lancedb==0.10.2
markdown-it-py==3.0.0
markupsafe==2.1.5
mdurl==0.1.2
mmh3==4.1.0
monotonic==1.6
mpmath==1.3.0
networkx==3.3
numpy==1.26.4
oauthlib==3.2.2
onnxruntime==1.18.1
opentelemetry-api==1.26.0
opentelemetry-exporter-otlp-proto-common==1.26.0
opentelemetry-exporter-otlp-proto-grpc==1.26.0
opentelemetry-instrumentation==0.47b0
opentelemetry-instrumentation-asgi==0.47b0
opentelemetry-instrumentation-fastapi==0.47b0
opentelemetry-proto==1.26.0
opentelemetry-sdk==1.26.0
opentelemetry-semantic-conventions==0.47b0
opentelemetry-util-http==0.47b0
orjson==3.10.6
overrides==7.7.0
packaging==24.1
pandas==2.2.2
pillow==10.4.0
posthog==3.5.0
protobuf==4.25.4
py==1.11.0
pyarrow==15.0.0
pyasn1==0.6.0
pyasn1-modules==0.4.0
pydantic==2.8.2
pydantic-core==2.20.1
pygments==2.18.0
pylance==0.14.1
pypika==0.48.9
pyproject-hooks==1.1.0
python-dateutil==2.9.0.post0
python-dotenv==1.0.1
python-multipart==0.0.9
pytz==2024.1
pyyaml==6.0.1
ratelimiter==1.2.0.post0
regex==2024.5.15
requests==2.32.3
requests-oauthlib==2.0.0
retry==0.9.2
rich==13.7.1
rsa==4.9
safetensors==0.4.3
scikit-learn==1.5.1
scipy==1.14.0
sentence-transformers==3.0.1
setuptools==71.1.0
shellingham==1.5.4
six==1.16.0
sniffio==1.3.1
starlette==0.37.2
sympy==1.13.1
tenacity==8.5.0
threadpoolctl==3.5.0
tokenizers==0.19.1
torch==2.3.1
tqdm==4.66.4
transformers==4.43.1
typer==0.12.3
typing-extensions==4.12.2
tzdata==2024.1
urllib3==2.2.2
usearch==2.12.0
uvicorn==0.30.3
uvloop==0.19.0
watchfiles==0.22.0
websocket-client==1.8.0
websockets==12.0
wrapt==1.16.0
zipp==3.19.2


================================================
FILE: benchmarks/exhaustive-memory/sift.suite
================================================
@name=sift1m
@input=data/sift/sift_base.fvecs
@queries=data/sift/sift_query.fvecs
@qsample=100
@k=20

faiss
usearch
duckdb
vec-static
vec-vec0.8192.1024
vec-vec0.8192.2048
vec-scalar.8192
numpy

# #libsql.4096
# #libsql.8192
# faiss
# vec-scalar.4096
# vec-static
# vec-vec0.4096.16
# vec-vec0.8192.1024
# vec-vec0.4096.2048
# usearch
# duckdb
# hnswlib
# numpy
# chroma


================================================
FILE: benchmarks/micro/.gitignore
================================================
target/


================================================
FILE: benchmarks/micro/Cargo.toml
================================================
[package]
name = "micro"
version = "0.1.0"
edition = "2021"

[dependencies]
rusqlite = {version="0.31.0", features=["bundled"]}

[dev-dependencies]
criterion = "0.3"
rand = "0.8.5"
zerocopy = "0.7.34"

[build-dependencies]
cc = "1.0.99"

[[bench]]
name = "my_benchmark"
harness = false


================================================
FILE: benchmarks/micro/benches/my_benchmark.rs
================================================
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use micro::init_vec;
use rand::Rng;
use rusqlite::Connection;
use zerocopy::AsBytes;

fn random_vector(n: usize) -> Vec<f32> {
    let mut rng = rand::thread_rng();
    (0..n).map(|_| rng.gen()).collect()
}

fn setup_base(page_size: usize, d: usize, n: i32) -> Connection {
    let base: Vec<Vec<f32>> = (0..n).map(|_| random_vector(d)).collect();

    let mut db = Connection::open_in_memory().unwrap();
    db.pragma_update(
        Some(rusqlite::DatabaseName::Main),
        "page_size",
        page_size, //,
                   //|row| Ok(assert!(row.get::<usize, String>(0).unwrap() == page_size)),
    )
    .unwrap();
    assert_eq!(
        db.pragma_query_value(Some(rusqlite::DatabaseName::Main), "page_size", |v| {
            Ok(v.get::<usize, usize>(0).unwrap())
        })
        .unwrap(),
        page_size,
    );
    db.execute(
        format!("create virtual table vec_base using vec0(a float[{d}])").as_str(),
        [],
    )
    .unwrap();

    let tx = db.transaction().unwrap();
    for item in &base {
        tx.execute("insert into vec_base(a) values (?)", [item.as_bytes()])
            .unwrap();
    }
    tx.commit().unwrap();
    db
}
pub fn criterion_benchmark(c: &mut Criterion) {
    init_vec();

    let n = 1_000_000;
    let d = 1536;
    let k = 10;
    let page_size = 8192;

    let page_sizes = [4096, 8192, 16384, 32768];
    for page_size in page_sizes {
        let db = setup_base(page_size, d, n);

        let mut stmt = db
            .prepare("select rowid, a from vec_base where rowid = ?")
            .unwrap();

        c.bench_function(
            format!("point page_size={page_size} n={n} dimension={d} k={k}").as_str(),
            |b| {
                let mut rng = rand::thread_rng();
                let query: i64 = rng.gen_range(0..n.into());

                b.iter(|| {
                    let result: (i64, Vec<u8>) = stmt
                        .query_row(rusqlite::params![query], |r| {
                            Ok((r.get(0).unwrap(), r.get(1).unwrap()))
                        })
                        .unwrap();
                    assert_eq!(result.0, query);
                });
            },
        );
        /*
        c.bench_function(
            format!("KNN page_size={page_size} n={n} dimension={d} k={k}").as_str(),
            |b| {
                let query: Vec<f32> = random_vector(d);
                let db = setup_base(page_size, d, n);

                let mut stmt = db.prepare(
                "select rowid, distance from vec_base where a match ? order by distance limit ?",
                )
                .unwrap();

                b.iter(|| {
                    let result: Vec<(i64, f64)> = stmt
                        .query_map(rusqlite::params![query.as_bytes(), k], |r| {
                            Ok((r.get(0).unwrap(), r.get(1).unwrap()))
                        })
                        .unwrap()
                        .collect::<Result<Vec<_>, _>>()
                        .unwrap();
                    assert_eq!(result.len(), 10);
                });
                stmt.finalize().unwrap()
            },
        ); */
    }
}

criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);


================================================
FILE: benchmarks/micro/build.rs
================================================
fn main() {
    cc::Build::new()
        .file("../../sqlite-vec.c")
        .compile("sqlite_vec0");
}


================================================
FILE: benchmarks/micro/src/lib.rs
================================================
use rusqlite::ffi::sqlite3_auto_extension;

#[link(name = "sqlite_vec0")]
extern "C" {
    pub fn sqlite3_vec_init();
}

pub fn init_vec() {
    unsafe {
        sqlite3_auto_extension(Some(std::mem::transmute(sqlite3_vec_init as *const ())));
    }
}


================================================
FILE: benchmarks/profiling/build-from-npy.sql
================================================
.timer on
pragma page_size = 32768;
--pragma page_size = 16384;
--pragma page_size = 16384;
--pragma page_size = 4096;

create virtual table vec_items using vec0(
  embedding float[1536]
);

-- 65s (limit 1e5), ~615MB on disk
insert into vec_items
  select
    rowid,
    vector
  from vec_npy_each(vec_npy_file('examples/dbpedia-openai/data/vectors.npy'))
  limit 1e5;


================================================
FILE: benchmarks/profiling/query-k.sql
================================================
.timer on

select rowid, distance
from vec_items
where embedding match (select embedding from vec_items where rowid = 100)
  and k = :k
order by distance;

select rowid, distance
from vec_items
where embedding match (select embedding from vec_items where rowid = 100)
  and k = :k
order by distance;

select rowid, distance
from vec_items
where embedding match (select embedding from vec_items where rowid = 100)
  and k = :k
order by distance;

select rowid, distance
from vec_items
where embedding match (select embedding from vec_items where rowid = 100)
  and k = :k
order by distance;

select rowid, distance
from vec_items
where embedding match (select embedding from vec_items where rowid = 100)
  and k = :k
order by distance;


================================================
FILE: benchmarks/self-params/build.py
================================================
import sqlite3
import time


def connect(path):
    db = sqlite3.connect(path)
    db.enable_load_extension(True)
    db.load_extension("../dist/vec0")
    db.execute("select load_extension('../dist/vec0', 'sqlite3_vec_fs_read_init')")
    db.enable_load_extension(False)
    return db


page_sizes = [  # 4096, 8192,
    16384,
    32768,
]
chunk_sizes = [128, 256, 1024, 2048]
types = ["f32", "int8", "bit"]

SRC = "../examples/dbpedia-openai/data/vectors.npy"

for page_size in page_sizes:
    for chunk_size in chunk_sizes:
        for t in types:
            print(f"{t} page_size={page_size}, chunk_size={chunk_size}")

            t0 = time.time()
            db = connect(f"dbs/test.{page_size}.{chunk_size}.{t}.db")
            db.execute(f"pragma page_size = {page_size}")
            with db:
                db.execute(
                    f"""
                      create virtual table vec_items using vec0(
                        embedding {t}[1536],
                        chunk_size={chunk_size}
                      )
                    """
                )
                func = "vector"
                if t == "int8":
                    func = "vec_quantize_i8(vector, 'unit')"
                if t == "bit":
                    func = "vec_quantize_binary(vector)"
                db.execute(
                    f"""
                      insert into vec_items
                      select rowid, {func}
                      from vec_npy_each(vec_npy_file(?))
                      limit 100000
                    """,
                    [SRC],
                )
            elapsed = time.time() - t0
            print(elapsed)

"""

# for 100_000

page_size=4096, chunk_size=256
3.5894200801849365
page_size=4096, chunk_size=1024
60.70046401023865
page_size=4096, chunk_size=2048
201.04426288604736
page_size=8192, chunk_size=256
7.034514904022217
page_size=8192, chunk_size=1024
9.983598947525024
page_size=8192, chunk_size=2048
12.318921089172363
page_size=16384, chunk_size=256
4.97080397605896
page_size=16384, chunk_size=1024
6.051195859909058
page_size=16384, chunk_size=2048
8.492683172225952
page_size=32768, chunk_size=256
5.906642198562622
page_size=32768, chunk_size=1024
5.876632213592529
page_size=32768, chunk_size=2048
5.420510292053223
"""


================================================
FILE: benchmarks/self-params/knn.py
================================================
import sqlite3
import time
from random import randrange
from statistics import mean


def connect(path):
    print(path)
    db = sqlite3.connect(path)
    db.enable_load_extension(True)
    db.load_extension("../dist/vec0")
    db.execute("select load_extension('../dist/vec0', 'sqlite3_vec_fs_read_init')")
    db.enable_load_extension(False)
    return db


page_sizes = [  # 4096, 8192,
    16384,
    32768,
]
chunk_sizes = [128, 256, 1024, 2048]
types = ["f32", "int8", "bit"]

types.reverse()

for t in types:
    for page_size in page_sizes:
        for chunk_size in chunk_sizes:
            print(f"page_size={page_size}, chunk_size={chunk_size}")

            func = "embedding"
            if t == "int8":
                func = "vec_quantize_i8(embedding, 'unit')"
            if t == "bit":
                func = "vec_quantize_binary(embedding)"

            times = []
            trials = 20
            db = connect(f"dbs/test.{page_size}.{chunk_size}.{t}.db")

            for trial in range(trials):
                t0 = time.time()
                results = db.execute(
                    f"""
                      select rowid
                      from vec_items
                      where embedding match (select {func} from vec_items where rowid = ?)
                        and k = 10
                      order by distance
                    """,
                    [randrange(100000)],
                ).fetchall()

                times.append(time.time() - t0)
            print(mean(times))

"""

page_size=4096, chunk_size=256
0.2635102152824402
page_size=4096, chunk_size=1024
0.2609449863433838
page_size=4096, chunk_size=2048
0.275589919090271
page_size=8192, chunk_size=256
0.18621582984924318
page_size=8192, chunk_size=1024
0.20939643383026124
page_size=8192, chunk_size=2048
0.22376316785812378
page_size=16384, chunk_size=256
0.16012665033340454
page_size=16384, chunk_size=1024
0.18346318006515502
page_size=16384, chunk_size=2048
0.18224761486053467
page_size=32768, chunk_size=256
0.14202518463134767
page_size=32768, chunk_size=1024
0.15340715646743774
page_size=32768, chunk_size=2048
0.18018823862075806
"""


================================================
FILE: benchmarks/self-params/test.py
================================================
import sqlite3
import time


def connect(path):
    db = sqlite3.connect(path)
    db.enable_load_extension(True)
    db.load_extension("../dist/vec0")
    db.execute("select load_extension('../dist/vec0', 'sqlite3_vec_fs_read_init')")
    db.enable_load_extension(False)
    return db


page_sizes = [4096, 8192, 16384, 32768]
chunk_sizes = [256, 1024, 2048]

for page_size in page_sizes:
    for chunk_size in chunk_sizes:
        print(f"page_size={page_size}, chunk_size={chunk_size}")

        t0 = time.time()
        db = connect(f"dbs/test.{page_size}.{chunk_size}.db")
        print(db.execute("pragma page_size").fetchone()[0])
        print(db.execute("select count(*) from vec_items_rowids").fetchone()[0])


================================================
FILE: bindings/go/ncruces/go-sqlite3.patch
================================================
diff --git a/embed/build.sh b/embed/build.sh
index ed2aaec..4cc0b0e 100755
--- a/embed/build.sh
+++ b/embed/build.sh
@@ -23,6 +23,7 @@ trap 'rm -f sqlite3.tmp' EXIT
 	-Wl,--initial-memory=327680 \
 	-D_HAVE_SQLITE_CONFIG_H \
 	-DSQLITE_CUSTOM_INCLUDE=sqlite_opt.h \
+  -DSQLITE_VEC_OMIT_FS=1 \
 	$(awk '{print "-Wl,--export="$0}' exports.txt)

 "$BINARYEN/wasm-ctor-eval" -g -c _initialize sqlite3.wasm -o sqlite3.tmp
diff --git a/sqlite3/main.c b/sqlite3/main.c
index c732937..7c9002a 100644
--- a/sqlite3/main.c
+++ b/sqlite3/main.c
@@ -19,6 +19,7 @@
 #include "time.c"
 #include "vfs.c"
 #include "vtab.c"
+#include "../../sqlite-vec.c"

 __attribute__((constructor)) void init() {
   sqlite3_initialize();
@@ -30,4 +31,5 @@ __attribute__((constructor)) void init() {
   sqlite3_auto_extension((void (*)(void))sqlite3_spellfix_init);
   sqlite3_auto_extension((void (*)(void))sqlite3_uint_init);
   sqlite3_auto_extension((void (*)(void))sqlite3_time_init);
+  sqlite3_auto_extension((void (*)(void))sqlite3_vec_init);
 }
\ No newline at end of file


================================================
FILE: bindings/python/extra_init.py
================================================
from typing import List
from struct import pack
from sqlite3 import Connection


def serialize_float32(vector: List[float]) -> bytes:
    """Serializes a list of floats into the "raw bytes" format sqlite-vec expects"""
    return pack("%sf" % len(vector), *vector)


def serialize_int8(vector: List[int]) -> bytes:
    """Serializes a list of integers into the "raw bytes" format sqlite-vec expects"""
    return pack("%sb" % len(vector), *vector)


try:
    import numpy.typing as npt

    def register_numpy(db: Connection, name: str, array: npt.NDArray):
        """ayoo"""

        ptr = array.__array_interface__["data"][0]
        nvectors, dimensions = array.__array_interface__["shape"]
        element_type = array.__array_interface__["typestr"]

        assert element_type == "<f4"

        name_escaped = db.execute("select printf('%w', ?)", [name]).fetchone()[0]

        db.execute(
            """
              insert into temp.vec_static_blobs(name, data)
              select ?, vec_static_blob_from_raw(?, ?, ?, ?)
            """,
            [name, ptr, element_type, dimensions, nvectors],
        )

        db.execute(
            f'create virtual table "{name_escaped}" using vec_static_blob_entries({name_escaped})'
        )

except ImportError:

    def register_numpy(db: Connection, name: str, array):
        raise Exception("numpy package is required for register_numpy")


================================================
FILE: bindings/rust/.gitignore
================================================
target/
sqlite-vec.c
sqlite-vec.h
Cargo.toml


================================================
FILE: bindings/rust/Cargo.toml.tmpl
================================================
[package]
name = "sqlite-vec"
version = "${VERSION}"
edition = "2021"
authors = ["Alex Garcia <alexsebastian.garcia@gmail.com>"]
description = "FFI bindings to the sqlite-vec SQLite extension"
homepage = "https://github.com/asg017/sqlite-vec"
repository = "https://github.com/asg017/sqlite-vec"
keywords = ["sqlite", "sqlite-extension"]
license = "MIT/Apache-2.0"


[dependencies]

[build-dependencies]
cc = "1.0"

[dev-dependencies]
rusqlite = "0.31.0"


================================================
FILE: bindings/rust/Makefile
================================================
VERSION=$(shell cat ../../VERSION)

deps: Cargo.toml sqlite-vec.c sqlite-vec.h sqlite3ext.h sqlite3.h

Cargo.toml: ../../VERSION Cargo.toml.tmpl
		VERSION=$(VERSION) envsubst < Cargo.toml.tmpl > $@

sqlite-vec.c: ../../sqlite-vec.c
		cp $< $@

sqlite-vec.h: ../../sqlite-vec.h
		cp $< $@

sqlite3ext.h: ../../vendor/sqlite3ext.h
		cp $< $@

sqlite3.h: ../../vendor/sqlite3.h
		cp $< $@

.PHONY: deps


================================================
FILE: bindings/rust/build.rs
================================================
fn main() {
    cc::Build::new().file("sqlite-vec.c").define("SQLITE_CORE", None).compile("sqlite_vec0");
}


================================================
FILE: bindings/rust/src/lib.rs
================================================
#[link(name = "sqlite_vec0")]
extern "C" {
    pub fn sqlite3_vec_init();
}

#[cfg(test)]
mod tests {
    use super::*;

    use rusqlite::{ffi::sqlite3_auto_extension, Connection};

    #[test]
    fn test_rusqlite_auto_extension() {
        unsafe {
            sqlite3_auto_extension(Some(std::mem::transmute(sqlite3_vec_init as *const ())));
        }

        let conn = Connection::open_in_memory().unwrap();

        let result: String = conn
            .query_row("select vec_version()", [], |x| x.get(0))
            .unwrap();

        assert!(result.starts_with("v"));
    }
}


================================================
FILE: examples/nbc-headlines/.gitignore
================================================
*.dylib
*.so
*.dll
*.gguf


================================================
FILE: examples/nbc-headlines/1_scrape.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# NBC News Headlines: Scraper\n",
    "\n",
    "This notebooks implements a scraper for [NBC News](https://www.nbcnews.com) headlines. It uses [this sitemap](https://www.nbcnews.com/archive/articles/2024/march), which provides a list of article headlines + URLs\n",
    "for every month for the past few years. \n",
    "\n",
    "This dataset is mostly to get a simple, real-world small text dataset for testing embeddings. \n",
    "They're small pieces of text (~dozen words), have a wide range of semantic meaning, and are more \"real-world\"\n",
    "them some other embeddings datasets out there.\n",
    "\n",
    "This notebook uses [Deno](https://deno.com/), [linkedom](https://github.com/WebReflection/linkedom), and a few \n",
    "SQLite extensions to scrape the headlines for a given date range. It creates a single SQL table, `articles`, \n",
    "with a few columns like `headline` and `url`. By default it will get all article headlines from January 2024 -> present\n",
    "and save them to a database called `headlines-2024.db`. Feel free to copy+paste this code into your own custom scraper. \n",
    "\n",
    "This notebook also just scrapes the data into a SQLite database, it does NOT do any embeddings + vector search. \n",
    "For those examples of those, see [`./2_build.ipynb`](./2_build.ipynb) and [`./3_search.ipynb`](./3_search.ipynb)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [],
   "source": [
    "import { Database, Statement } from \"jsr:@db/sqlite@0.11\";\n",
    "import { parseHTML } from \"npm:linkedom\";\n",
    "import * as d3 from \"npm:d3-time\";\n",
    "import * as sqlitePath from \"npm:sqlite-path\";\n",
    "import * as sqliteUrl from \"npm:sqlite-url\";\n",
    "import * as sqliteRegex from \"npm:sqlite-regex\";\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [],
   "source": [
    "const months = [\"january\", \"february\", \"march\", \"april\", \"may\", \"june\", \"july\", \"august\", \"september\", \"october\", \"november\", \"december\"]\n",
    "\n",
    "class Db {\n",
    "  db: Database;\n",
    "  #stmtInsertArticle: Statement;\n",
    "\n",
    "  constructor(path:string) {\n",
    "    this.db = new Database(path);\n",
    "    this.db.enableLoadExtension = true;\n",
    "    this.db.loadExtension(sqlitePath.getLoadablePath());\n",
    "    this.db.loadExtension(sqliteUrl.getLoadablePath());\n",
    "    this.db.loadExtension(sqliteRegex.getLoadablePath());\n",
    "    this.db.enableLoadExtension = false;\n",
    "\n",
    "    this.db.exec(`\n",
    "      CREATE TABLE IF NOT EXISTS articles(\n",
    "        id integer primary key autoincrement,\n",
    "        year integer,\n",
    "        month integer,\n",
    "        slug TEXT,\n",
    "        slug_id TEXT,\n",
    "        headline TEXT,\n",
    "        url TEXT,\n",
    "        category1 TEXT,\n",
    "        category2 TEXT\n",
    "      )\n",
    "    `);\n",
    "\n",
    "    this.#stmtInsertArticle = this.db.prepare(`\n",
    "      insert into articles(year, month, slug, slug_id, headline, url, category1, category2)\n",
    "      select\n",
    "        :year as year,\n",
    "        :month as month,\n",
    "         regex_capture(\n",
    "          '(?P<slug>.+)-(?P<id>[^-]+)$',\n",
    "          path_at(url_path(:url), -1),\n",
    "          'slug'\n",
    "        ) as slug,\n",
    "        regex_capture(\n",
    "          '(?P<slug>.+)-(?P<id>[^-]+)$',\n",
    "          path_at(url_path(:url), -1),\n",
    "          'id'\n",
    "        ) as slug_id,\n",
    "        :headline as headline,\n",
    "        :url as url,\n",
    "        path_at(url_path(:url), 0) as category1,\n",
    "        iif(\n",
    "          path_length(url_path(:url)) > 2,\n",
    "          path_at(url_path(:url), 1),\n",
    "          null\n",
    "        ) as category2\n",
    "    `);\n",
    "  }\n",
    "\n",
    "  insertArticles(year:number, month:text, articles:{url: string, year: number, month: number}[]) {\n",
    "    const tx = this.db.transaction((year, month, articles) => {\n",
    "      for(const article of articles) {\n",
    "        this.#stmtInsertArticle.run({...article, year, month})\n",
    "      }\n",
    "    });\n",
    "    tx(year, month, articles);\n",
    "  }\n",
    "}\n",
    "\n",
    "async function insertMonth(db: Db, year:number, month: text) {\n",
    "  let url = `https://www.nbcnews.com/archive/articles/${year}/${month}`;\n",
    "  while(true) {\n",
    "    const monthPage = await fetch(url).then(r=>r.text())\n",
    "    const {document:monthPageDoc} = parseHTML(monthPage);\n",
    "    const monthEntries = monthPageDoc\n",
    "      .querySelectorAll('.MonthPage a')\n",
    "      .map(a => ({headline: a.innerText, url: a.getAttribute('href')}));\n",
    "    db.insertArticles(year, months.findIndex(m => m === month)+1, monthEntries);\n",
    "    const next = monthPageDoc.querySelector('a.Pagination__next.Pagination__enable');\n",
    "    if(!next) {\n",
    "      break;\n",
    "    }\n",
    "    url = `https://www.nbcnews.com${next.getAttribute('href')}`;\n",
    "  }\n",
    "\n",
    "}\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "async function backfill(db, start: Date, end: Date) {\n",
    "  const targets = d3.timeMonths(start, end)\n",
    "    .map(date => ({year: date.getFullYear(), monthIndex: date.getMonth()}));\n",
    "  for(const target of targets) {\n",
    "    console.log(`${target.year} ${target.monthIndex}`)\n",
    "    await insertMonth(db, target.year, months[target.monthIndex]);\n",
    "  }\n",
    "}\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024 0\n",
      "2024 1\n",
      "2024 2\n",
      "2024 3\n",
      "2024 4\n",
      "2024 5\n",
      "2024 6\n",
      "2024 7\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "\u001b[33m1\u001b[39m"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "const db = new Db(\":memory:\");\n",
    "await backfill(db, new Date('2024-01-01'), new Date())\n",
    "db.db.exec(\"vacuum into 'headlines-2024.db'\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Deno",
   "language": "typescript",
   "name": "deno"
  },
  "language_info": {
   "codemirror_mode": "typescript",
   "file_extension": ".ts",
   "mimetype": "text/x.typescript",
   "name": "typescript",
   "nbconvert_exporter": "script",
   "pygments_lexer": "typescript",
   "version": "5.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}


================================================
FILE: examples/nbc-headlines/2_build.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# NBC News Headlines: Building FTS5 + `vec0` indexes\n",
    "\n",
    "Using the dataset built in [the previous `./1_scrape.ipynb` notebook](./1_scrape.ipynb), \n",
    "this notebook will enrich that dataset with a full-text search index and a semantic search index,\n",
    "using  [FTS5](https://www.sqlite.org/fts5.html), \n",
    "[`sqlite-vec`](https://github.com/asg017/sqlite-vec), and \n",
    "[`sqlite-lembed`](https://github.com/asg017/sqlite-lembed).\n",
    "\n",
    "This example will use pure SQL for everything. You can do the same exact thing in Python/JavaScript/Go/Rust/etc., or use\n",
    "your own embeddings providers like Ollama/llamafile/OpenAI/etc. The core mechanics of FTS5 and `sqlite-vec` will remain the same. \n",
    "\n",
    "We will use the [Snowflake Artic Embed v1.5](https://huggingface.co/Snowflake/snowflake-arctic-embed-m-v1.5) embeddings model to generate embeddings. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[no code]"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    ".open tmp-artic2.db"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Step 1: Create a FTS5 index\n",
    "\n",
    "Creating a full-text search index is as simple as 3 SQL commands! We already have the headlines stored in the `articles` \n",
    "table under the `headline` column, so it's just a matter of initializing the FTS5 virtual table and inserting the data."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table>\n",
       "<thead>\n",
       "<tr style=\"text-align: center;\">\n",
       "</tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "</tbody>\n",
       "</table>\n",
       "<div style=\"text-align: right;\">\n",
       "0 row × 0 column\n",
       "</div>\n",
       "</div>\n"
      ],
      "text/plain": [
       "\u001b[0m┌\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "create virtual table fts_articles using fts5(\n",
    "  headline,\n",
    "  content='articles', content_rowid='id'\n",
    ");\n",
    "\n",
    "insert into fts_articles(rowid, headline)\n",
    "  select rowid, headline\n",
    "  from articles;\n",
    "\n",
    "insert into fts_articles(fts_articles) values('optimize');"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "By convention we name the FTS5 table `fts_articles`, where the `fts_` prefix says \"this virtual table is full-text search of the `articles` table\". We are only searching the `headline` column, the rest can be ignored. \n",
    "\n",
    "Here we are using the [\"external content tables\"](https://www.sqlite.org/fts5.html#external_content_tables)\n",
    "feature in FTS5 tables, which will avoid storing the headlines a 2nd time, since they already exist in the `articles` table. \n",
    "This part isn't required, but saves us a bit of storage. \n",
    "\n",
    "We also use the [`'optimize'`](https://www.sqlite.org/fts5.html#the_optimize_command) command\n",
    " to keep things tidy. This doesn't do much on such a small dataset, but is important to remember for larger tables!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table>\n",
       "<thead>\n",
       "<tr style=\"text-align: center;\">\n",
       "<th>\n",
       "headline\n",
       "</th>\n",
       "</tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "<tr>\n",
       "<td style=\"text-align: left;\">\n",
       "Kamala Harris visits Planned Parenthood clinic\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td style=\"text-align: left;\">\n",
       "Former Marine sentenced to 9 years in prison for firebombing Planned Parenthood clinic\n",
       "</td>\n",
       "</tr>\n",
       "</tbody>\n",
       "</table>\n",
       "<div style=\"text-align: right;\">\n",
       "2 rows × 1 column\n",
       "</div>\n",
       "</div>\n"
      ],
      "text/plain": [
       "\u001b[0m┌\u001b[0m\u001b[0m────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┐\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mheadline\u001b[0m                                                                              \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┤\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mKamala Harris visits Planned Parenthood clinic                                        \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mFormer Marine sentenced to 9 years in prison for firebombing Planned Parenthood clinic\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m└\u001b[0m\u001b[0m────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┘\n",
       "\u001b[0m\u001b[0m"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "select *\n",
    "from fts_articles\n",
    "where headline match 'planned parenthood'\n",
    "limit 10;"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 2: Create a \"semantic index\"\n",
    "\n",
    "\"Semantic index\" in this case is just a fancy way of saying \"vector store\", which we will do with a `sqlite-vec` `vec0` virtual table. \n",
    "\n",
    "Now, `sqlite-vec` just stores vectors, it doesn't generate embeddings for us. There are hundreds of different remote APIs or local inference runtimes you can use to generate embeddings,\n",
    "but here we will use [`sqlite-lembed`](https://github.com/asg017/sqlite-lembed) to keep everything local and everything in pure SQL. \n",
    "\n",
    "We will need to choose an embeddings model in the [GGUF format](https://huggingface.co/docs/hub/en/gguf),\n",
    "since `sqlite-lembed` uses [llama.cpp](https://github.com/ggerganov/llama.cpp) under the hood. \n",
    "Here we will use [`Snowflake/snowflake-arctic-embed-m-v1.5`](https://huggingface.co/Snowflake/snowflake-arctic-embed-m-v1.5),\n",
    "where we can find a GGUF version [here](https://huggingface.co/asg017/sqlite-lembed-model-examples/tree/main/snowflake-arctic-embed-m-v1.5). \n",
    "This model is small-sh (`436MB` full-sized, `118MB` at `Q8_0` quantized), and is trained on fairly recent data so it understands\n",
    "recent events like \"COVID-19\" or \"Kamala Harris\". \n",
    "\n",
    "You can download a `.gguf` quantized version of this model with:\n",
    "\n",
    "```bash\n",
    "wget https://huggingface.co/asg017/sqlite-lembed-model-examples/resolve/main/snowflake-arctic-embed-m-v1.5/snowflake-arctic-embed-m-v1.5.d70deb40.f16.gguf\n",
    "```\n",
    "\n",
    "And we can configure `sqlite-lembed` to use this model like so:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table>\n",
       "<thead>\n",
       "<tr style=\"text-align: center;\">\n",
       "</tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "</tbody>\n",
       "</table>\n",
       "<div style=\"text-align: right;\">\n",
       "0 row × 0 column\n",
       "</div>\n",
       "</div>\n"
      ],
      "text/plain": [
       "\u001b[0m┌\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    ".load ./lembed0\n",
    ".load ../../dist/vec0\n",
    "\n",
    "insert into lembed_models(name, model) values\n",
    "  ('default', lembed_model_from_file('./snowflake-arctic-embed-m-v1.5.d70deb40.f16.gguf'));"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "It's embeddings time! We can use the `lembed()` function, which takes in text and returns a vector representation of that text,\n",
    "as an embeddings BLOB that we can insert directly into a `vec0` virtul table. \n",
    "\n",
    "We'll declare this new `vec_articles` table, using the `vec_` prefix as convention. This matches the `fts_articles` table above. \n",
    "The Snowflake embedding model generate vectors with `768` dimensions, which we we store as-as. \n",
    "\n",
    "Embedding and inserting into this vector store is as easy as a single `INSERT INTO` and `lembed()` call."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table>\n",
       "<thead>\n",
       "<tr style=\"text-align: center;\">\n",
       "</tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "</tbody>\n",
       "</table>\n",
       "<div style=\"text-align: right;\">\n",
       "0 row × 0 column\n",
       "</div>\n",
       "</div>\n"
      ],
      "text/plain": [
       "\u001b[0m┌\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\n",
    "create virtual table vec_articles using vec0(\n",
    "  article_id integer primary key,\n",
    "  headline_embedding float[768]\n",
    ");\n",
    "\n",
    "insert into vec_articles(article_id, headline_embedding)\n",
    "select\n",
    "  rowid,\n",
    "  lembed(headline)\n",
    "from articles;"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This took ~13 minutes for ~14,500 embeddings on my older 2019 Macbook, but newer computers with better CPUs will finish quicker (it took `2m20s` on my newer Mac M1 Mini). \n",
    "\n",
    "Once the `vec_articles` is ready, we can perform a KNN query like so:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table>\n",
       "<thead>\n",
       "<tr style=\"text-align: center;\">\n",
       "<th>\n",
       "headline\n",
       "</th>\n",
       "<th>\n",
       "distance\n",
       "</th>\n",
       "</tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "<tr>\n",
       "<td style=\"text-align: left;\">\n",
       "Kamala Harris visits Planned Parenthood clinic\n",
       "</td>\n",
       "<td >\n",
       "0.492593914270401\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td style=\"text-align: left;\">\n",
       "After Dobbs decision, more women are managing their own abortions\n",
       "</td>\n",
       "<td >\n",
       "0.5789032578468323\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td style=\"text-align: left;\">\n",
       "Transforming Healthcare\n",
       "</td>\n",
       "<td >\n",
       "0.5822411179542542\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td style=\"text-align: left;\">\n",
       "A timeline of Trump&#39;s many, many positions on abortion\n",
       "</td>\n",
       "<td >\n",
       "0.6101462841033936\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td style=\"text-align: left;\">\n",
       "How a network of abortion pill providers works together in the wake of new threats\n",
       "</td>\n",
       "<td >\n",
       "0.6196886897087097\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td style=\"text-align: left;\">\n",
       "&#39;Major hurdles&#39;: The reality check behind Biden&#39;s big abortion promise\n",
       "</td>\n",
       "<td >\n",
       "0.6198344826698303\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td style=\"text-align: left;\">\n",
       "Trump&#39;s conflicting abortion stances are coming back to haunt him — and his party\n",
       "</td>\n",
       "<td >\n",
       "0.6198986768722534\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td style=\"text-align: left;\">\n",
       "Where abortion rights could be on the ballot this fall: From the Politics Desk\n",
       "</td>\n",
       "<td >\n",
       "0.6201764345169067\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td style=\"text-align: left;\">\n",
       "How the Biden campaign quickly mobilized on Trump&#39;s abortion stance\n",
       "</td>\n",
       "<td >\n",
       "0.633980393409729\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td style=\"text-align: left;\">\n",
       "Battle over abortion heats up in Arizona — and could be on the 2024 ballot\n",
       "</td>\n",
       "<td >\n",
       "0.6341449022293091\n",
       "</td>\n",
       "</tr>\n",
       "</tbody>\n",
       "</table>\n",
       "<div style=\"text-align: right;\">\n",
       "10 rows × 2 columns\n",
       "</div>\n",
       "</div>\n"
      ],
      "text/plain": [
       "\u001b[0m┌\u001b[0m\u001b[0m────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m────────────────────\u001b[0m\u001b[0m┐\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mheadline\u001b[0m                                                                          \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mdistance\u001b[0m          \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m────────────────────\u001b[0m\u001b[0m┤\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mKamala Harris visits Planned Parenthood clinic                                    \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.492593914270401\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mAfter Dobbs decision, more women are managing their own abortions                 \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.5789032578468323\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mTransforming Healthcare                                                           \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.5822411179542542\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mA timeline of Trump's many, many positions on abortion                            \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.6101462841033936\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mHow a network of abortion pill providers works together in the wake of new threats\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.6196886897087097\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m'Major hurdles': The reality check behind Biden's big abortion promise            \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.6198344826698303\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mTrump's conflicting abortion stances are coming back to haunt him — and his party \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.6198986768722534\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mWhere abortion rights could be on the ballot this fall: From the Politics Desk    \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.6201764345169067\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mHow the Biden campaign quickly mobilized on Trump's abortion stance               \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.633980393409729\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mBattle over abortion heats up in Arizona — and could be on the 2024 ballot        \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.6341449022293091\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m└\u001b[0m\u001b[0m────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m────────────────────\u001b[0m\u001b[0m┘\n",
       "\u001b[0m\u001b[0m"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "select\n",
    "  articles.headline,\n",
    "  vec_articles.distance\n",
    "from vec_articles\n",
    "left join articles on articles.rowid = vec_articles.article_id\n",
    "where headline_embedding match lembed(\"planned parenthood\")\n",
    "  and k = 10;"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Slim it down with Binary Quantization\n",
    "\n",
    "The vectors in the `vec_articles` table take up a lot of space. A vector with `768` dimensions take up `786 * 4 = 3072` bytes of space each, or around `45MB` of space for these ~14,500 entries. \n",
    "\n",
    "That's a lot — the original text dataset was only `~4MB`!\n",
    "\n",
    "If you want to make the database smaller, there's a number of quantization or other methods to do so, by trading accuracy. \n",
    "Here's an example of performing [binary quantization](https://alexgarcia.xyz/sqlite-vec/guides/binary-quant.html)\n",
    "on this dataset, storing 768-dimensional bit-vectors instead of floating-point vectors, a `32x` size reduction, at the expense of accuracy. \n",
    "\n",
    "We'll keep the current SQLite database as-is, and instead make a copy into a new SQLite database file, and change the `vec_articles` table\n",
    "to store bit-vectors instead. \n",
    "\n",
    "First, we'll make a copy of the current database into a new file:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table>\n",
       "<thead>\n",
       "<tr style=\"text-align: center;\">\n",
       "</tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "</tbody>\n",
       "</table>\n",
       "<div style=\"text-align: right;\">\n",
       "0 row × 0 column\n",
       "</div>\n",
       "</div>\n"
      ],
      "text/plain": [
       "\u001b[0m┌\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "vacuum into 'tmp-artic2.slim.db';"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now we'll make a connection to this new file, and drop the old `vec_articles` table that contains the large `float[768]` vectors."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table>\n",
       "<thead>\n",
       "<tr style=\"text-align: center;\">\n",
       "</tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "</tbody>\n",
       "</table>\n",
       "<div style=\"text-align: right;\">\n",
       "0 row × 0 column\n",
       "</div>\n",
       "</div>\n"
      ],
      "text/plain": [
       "\u001b[0m┌\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "attach database 'tmp-artic2.slim.db' as slim;\n",
    "drop table slim.vec_articles;"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now we can create a new `vec0` table, storing `bit[768]` vectors instead! \n",
    "We can insert the original `float[768]` from the `main.vec_articles` table (original table),\n",
    "calling [`vec_quantize_binary()`](https://alexgarcia.xyz/sqlite-vec/api-reference.html#vec_quantize_binary) to convert the floats to bits. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table>\n",
       "<thead>\n",
       "<tr style=\"text-align: center;\">\n",
       "</tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "</tbody>\n",
       "</table>\n",
       "<div style=\"text-align: right;\">\n",
       "0 row × 0 column\n",
       "</div>\n",
       "</div>\n"
      ],
      "text/plain": [
       "\u001b[0m┌\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\n",
    "create virtual table slim.vec_articles using vec0(\n",
    "  article_id integer primary key,\n",
    "  headline_embedding bit[768]\n",
    ");\n",
    "\n",
    "insert into slim.vec_articles(article_id, headline_embedding)\n",
    "select\n",
    "  article_id,\n",
    "  vec_quantize_binary(headline_embedding)\n",
    "from main.vec_articles;"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Then we can `VACUUM` the new `slim` database to shrink the file, delete the `DROP`'ed pages from the older `vec0` table. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table>\n",
       "<thead>\n",
       "<tr style=\"text-align: center;\">\n",
       "</tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "</tbody>\n",
       "</table>\n",
       "<div style=\"text-align: right;\">\n",
       "0 row × 0 column\n",
       "</div>\n",
       "</div>\n"
      ],
      "text/plain": [
       "\u001b[0m┌\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "vacuum slim;"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "And there we have it! This file is `7.1MB`, a large reduction from the original `53MB` table. \n",
    "\n",
    "KNN queries are similar, only adding the `vec_quantize_binary()` function to the query vector."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table>\n",
       "<thead>\n",
       "<tr style=\"text-align: center;\">\n",
       "<th>\n",
       "headline\n",
       "</th>\n",
       "<th>\n",
       "distance\n",
       "</th>\n",
       "</tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "<tr>\n",
       "<td style=\"text-align: left;\">\n",
       "Kamala Harris visits Planned Parenthood clinic\n",
       "</td>\n",
       "<td >\n",
       "139\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td style=\"text-align: left;\">\n",
       "How a network of abortion pill providers works together in the wake of new threats\n",
       "</td>\n",
       "<td >\n",
       "151\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td style=\"text-align: left;\">\n",
       "After Dobbs decision, more women are managing their own abortions\n",
       "</td>\n",
       "<td >\n",
       "153\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td style=\"text-align: left;\">\n",
       "A timeline of Trump&#39;s many, many positions on abortion\n",
       "</td>\n",
       "<td >\n",
       "156\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td style=\"text-align: left;\">\n",
       "Two of the country’s largest transgender rights organizations will merge\n",
       "</td>\n",
       "<td >\n",
       "158\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td style=\"text-align: left;\">\n",
       "Transforming Healthcare\n",
       "</td>\n",
       "<td >\n",
       "158\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td style=\"text-align: left;\">\n",
       "With Harris and Walz, Democrats put abortion rights at the top of the agenda\n",
       "</td>\n",
       "<td >\n",
       "159\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td style=\"text-align: left;\">\n",
       "In states with strict abortion policies, simply seeing an OB/GYN for regular care can be difficult\n",
       "</td>\n",
       "<td >\n",
       "160\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td style=\"text-align: left;\">\n",
       "Where abortion rights could be on the ballot this fall: From the Politics Desk\n",
       "</td>\n",
       "<td >\n",
       "161\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td style=\"text-align: left;\">\n",
       "Map: Where medication abortion is and isn’t legal\n",
       "</td>\n",
       "<td >\n",
       "162\n",
       "</td>\n",
       "</tr>\n",
       "</tbody>\n",
       "</table>\n",
       "<div style=\"text-align: right;\">\n",
       "10 rows × 2 columns\n",
       "</div>\n",
       "</div>\n"
      ],
      "text/plain": [
       "\u001b[0m┌\u001b[0m\u001b[0m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m──────────\u001b[0m\u001b[0m┐\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mheadline\u001b[0m                                                                                          \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mdistance\u001b[0m\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m──────────\u001b[0m\u001b[0m┤\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mKamala Harris visits Planned Parenthood clinic                                                    \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m     139\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mHow a network of abortion pill providers works together in the wake of new threats                \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m     151\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mAfter Dobbs decision, more women are managing their own abortions                                 \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m     153\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mA timeline of Trump's many, many positions on abortion                                            \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m     156\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mTwo of the country’s largest transgender rights organizations will merge                          \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m     158\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mTransforming Healthcare                                                                           \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m     158\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mWith Harris and Walz, Democrats put abortion rights at the top of the agenda                      \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m     159\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mIn states with strict abortion policies, simply seeing an OB/GYN for regular care can be difficult\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m     160\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mWhere abortion rights could be on the ballot this fall: From the Politics Desk                    \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m     161\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mMap: Where medication abortion is and isn’t legal                                                 \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m     162\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m└\u001b[0m\u001b[0m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m──────────\u001b[0m\u001b[0m┘\n",
       "\u001b[0m\u001b[0m"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "select\n",
    "  slim.articles.headline,\n",
    "  slim.vec_articles.distance\n",
    "from slim.vec_articles\n",
    "left join slim.articles on slim.articles.rowid = slim.vec_articles.article_id\n",
    "where headline_embedding match vec_quantize_binary(lembed(\"planned parenthood\"))\n",
    "  and k = 10;"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "You'll notice the results differ slightly to the full-sized query from above. Some results are ordered differently, some are missing. \n",
    "The `distance` in this binary KNN search is hamming distance, not the default L2 distance. "
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Solite",
   "language": "sql",
   "name": "solite"
  },
  "language_info": {
   "file_extension": ".sql",
   "mimetype": "text/x.sqlite",
   "name": "sql",
   "nb_converter": "script",
   "pygments_lexer": "sql",
   "version": "TODO"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}


================================================
FILE: examples/nbc-headlines/3_search.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# NBC News Headlines: Exploring Hybrod FTS5 + Vector Search\n",
    "\n",
    "This notebooks explore a few different ways one could combine FTS5 and vector search results, when querying \n",
    "[FTS5](https://www.sqlite.org/fts5.html) and\n",
    "[`sqlite-vec`](https://github.com/asg017/sqlite-vec) virtual table.\n",
    "\n",
    "This dataset is a small list of headines scraped from NBC News, found in the [`./1_scrape.ipynb`](./1_scrape.ipynb) notebook.\n",
    "To see how the `fts_articles` and `vec_articles` tables were created, see the [`./3_search.ipynb`](./3_search.ipynb) notebook."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table>\n",
       "<thead>\n",
       "<tr style=\"text-align: center;\">\n",
       "<th>\n",
       "vec_version()\n",
       "</th>\n",
       "<th>\n",
       "lembed_version()\n",
       "</th>\n",
       "</tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "<tr>\n",
       "<td style=\"text-align: left;\">\n",
       "v0.1.3-alpha.2\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "v0.0.1-alpha.8\n",
       "</td>\n",
       "</tr>\n",
       "</tbody>\n",
       "</table>\n",
       "<div style=\"text-align: right;\">\n",
       "1 row × 2 columns\n",
       "</div>\n",
       "</div>\n"
      ],
      "text/plain": [
       "\u001b[0m┌\u001b[0m\u001b[0m────────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m──────────────────\u001b[0m\u001b[0m┐\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mvec_version()\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mlembed_version()\u001b[0m\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m────────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m──────────────────\u001b[0m\u001b[0m┤\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mv0.1.3-alpha.2\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mv0.0.1-alpha.8  \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m└\u001b[0m\u001b[0m────────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m──────────────────\u001b[0m\u001b[0m┘\n",
       "\u001b[0m\u001b[0m"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    ".open tmp-artic2.db\n",
    "\n",
    ".load ../../dist/vec0\n",
    ".load ./lembed0\n",
    "\n",
    "insert into lembed_models(name, model)\n",
    "  values (\n",
    "    'default',\n",
    "    lembed_model_from_file('snowflake-arctic-embed-m-v1.5.d70deb40.f16.gguf')\n",
    "  );\n",
    "\n",
    "select vec_version(), lembed_version();"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Full-text Search Only\n",
    "\n",
    "A simple FTS query on the `fts_articles` virutal table can be made like so:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table>\n",
       "<thead>\n",
       "<tr style=\"text-align: center;\">\n",
       "<th>\n",
       "rowid\n",
       "</th>\n",
       "<th>\n",
       "headline\n",
       "</th>\n",
       "<th>\n",
       "rank\n",
       "</th>\n",
       "</tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "<tr>\n",
       "<td >\n",
       "4666\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "Kamala Harris visits Planned Parenthood clinic\n",
       "</td>\n",
       "<td >\n",
       "-18.9139950477264\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td >\n",
       "6521\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "Former Marine sentenced to 9 years in prison for firebombing Planned Parenthood clinic\n",
       "</td>\n",
       "<td >\n",
       "-14.807022703838651\n",
       "</td>\n",
       "</tr>\n",
       "</tbody>\n",
       "</table>\n",
       "<div style=\"text-align: right;\">\n",
       "2 rows × 3 columns\n",
       "</div>\n",
       "</div>\n"
      ],
      "text/plain": [
       "\u001b[0m┌\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m─────────────────────\u001b[0m\u001b[0m┐\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mrowid\u001b[0m\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mheadline\u001b[0m                                                                              \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mrank\u001b[0m               \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m─────────────────────\u001b[0m\u001b[0m┤\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 4666\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mKamala Harris visits Planned Parenthood clinic                                        \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m  -18.9139950477264\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 6521\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mFormer Marine sentenced to 9 years in prison for firebombing Planned Parenthood clinic\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m-14.807022703838651\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m└\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m─────────────────────\u001b[0m\u001b[0m┘\n",
       "\u001b[0m\u001b[0m"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    ".param set query planned parenthood\n",
    "\n",
    "select\n",
    "  rowid,\n",
    "  headline,\n",
    "  rank\n",
    "from fts_articles\n",
    "where headline match :query\n",
    "order by rank\n",
    "limit 10;"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The `rank` column is the negative BM25 score of the query + document. "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##  Vector Search Only\n",
    "\n",
    "A KNN vector search can be made on the `vec_articles` virtual table like so:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table>\n",
       "<thead>\n",
       "<tr style=\"text-align: center;\">\n",
       "<th>\n",
       "article_id\n",
       "</th>\n",
       "<th>\n",
       "headline\n",
       "</th>\n",
       "<th>\n",
       "distance\n",
       "</th>\n",
       "</tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "<tr>\n",
       "<td >\n",
       "4666\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "Kamala Harris visits Planned Parenthood clinic\n",
       "</td>\n",
       "<td >\n",
       "0.492593914270401\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td >\n",
       "13928\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "After Dobbs decision, more women are managing their own abortions\n",
       "</td>\n",
       "<td >\n",
       "0.5789032578468323\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td >\n",
       "12636\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "Transforming Healthcare\n",
       "</td>\n",
       "<td >\n",
       "0.5822411179542542\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td >\n",
       "6979\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "A timeline of Trump&#39;s many, many positions on abortion\n",
       "</td>\n",
       "<td >\n",
       "0.6101462841033936\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td >\n",
       "7038\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "How a network of abortion pill providers works together in the wake of new threats\n",
       "</td>\n",
       "<td >\n",
       "0.6196886897087097\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td >\n",
       "6914\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "&#39;Major hurdles&#39;: The reality check behind Biden&#39;s big abortion promise\n",
       "</td>\n",
       "<td >\n",
       "0.6198344826698303\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td >\n",
       "6794\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "Trump&#39;s conflicting abortion stances are coming back to haunt him — and his party\n",
       "</td>\n",
       "<td >\n",
       "0.6198986768722534\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td >\n",
       "7381\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "Where abortion rights could be on the ballot this fall: From the Politics Desk\n",
       "</td>\n",
       "<td >\n",
       "0.6201764345169067\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td >\n",
       "6871\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "How the Biden campaign quickly mobilized on Trump&#39;s abortion stance\n",
       "</td>\n",
       "<td >\n",
       "0.633980393409729\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td >\n",
       "5496\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "Battle over abortion heats up in Arizona — and could be on the 2024 ballot\n",
       "</td>\n",
       "<td >\n",
       "0.6341449022293091\n",
       "</td>\n",
       "</tr>\n",
       "</tbody>\n",
       "</table>\n",
       "<div style=\"text-align: right;\">\n",
       "10 rows × 3 columns\n",
       "</div>\n",
       "</div>\n"
      ],
      "text/plain": [
       "\u001b[0m┌\u001b[0m\u001b[0m────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m────────────────────\u001b[0m\u001b[0m┐\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1marticle_id\u001b[0m\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mheadline\u001b[0m                                                                          \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mdistance\u001b[0m          \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m────────────────────\u001b[0m\u001b[0m┤\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m      4666\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mKamala Harris visits Planned Parenthood clinic                                    \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.492593914270401\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m     13928\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mAfter Dobbs decision, more women are managing their own abortions                 \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.5789032578468323\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m     12636\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mTransforming Healthcare                                                           \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.5822411179542542\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m      6979\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mA timeline of Trump's many, many positions on abortion                            \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.6101462841033936\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m      7038\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mHow a network of abortion pill providers works together in the wake of new threats\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.6196886897087097\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m      6914\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m'Major hurdles': The reality check behind Biden's big abortion promise            \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.6198344826698303\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m      6794\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mTrump's conflicting abortion stances are coming back to haunt him — and his party \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.6198986768722534\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m      7381\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mWhere abortion rights could be on the ballot this fall: From the Politics Desk    \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.6201764345169067\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m      6871\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mHow the Biden campaign quickly mobilized on Trump's abortion stance               \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.633980393409729\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m      5496\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mBattle over abortion heats up in Arizona — and could be on the 2024 ballot        \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.6341449022293091\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m└\u001b[0m\u001b[0m────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m────────────────────\u001b[0m\u001b[0m┘\n",
       "\u001b[0m\u001b[0m"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    ".param set query planned parenthood\n",
    "\n",
    "select\n",
    "  article_id,\n",
    "  articles.headline,\n",
    "  distance\n",
    "from vec_articles\n",
    "left join articles on articles.rowid = vec_articles.article_id\n",
    "where headline_embedding match lembed(:query)\n",
    "  and k = 10;"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The `distance` column is the L2 distance between the query vector and the headline embedding. \n",
    "\n",
    "The rest of this notebook explore different ways of combining these FTS5 and vector search results. \n",
    "The core queries are similar, and only really different on different `JOIN` or `ORDER BY` techniques."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Combination Technique #1: Keyword-first\n",
    "\n",
    "In many search-engine cases, you may way to display keyword matches first, and supplement the rest wih with vector search results. \n",
    "This makes some intuitive sense — keyword matches are what uses expect, but you'll want to display more result if there are only a few matching documents. \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table>\n",
       "<thead>\n",
       "<tr style=\"text-align: center;\">\n",
       "<th>\n",
       "id\n",
       "</th>\n",
       "<th>\n",
       "headline\n",
       "</th>\n",
       "<th>\n",
       "match_type\n",
       "</th>\n",
       "<th>\n",
       "article_id\n",
       "</th>\n",
       "<th>\n",
       "rank_number\n",
       "</th>\n",
       "<th>\n",
       "score\n",
       "</th>\n",
       "</tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "<tr>\n",
       "<td >\n",
       "10098\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "Kamala Harris says abortion bans are creating &#39;a health care crisis&#39;\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "fts\n",
       "</td>\n",
       "<td >\n",
       "10098\n",
       "</td>\n",
       "<td >\n",
       "1\n",
       "</td>\n",
       "<td >\n",
       "-10.678829270936067\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td >\n",
       "9776\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "States with abortion bans saw birth control prescriptions fall post-Dobbs, study finds\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "fts\n",
       "</td>\n",
       "<td >\n",
       "9776\n",
       "</td>\n",
       "<td >\n",
       "2\n",
       "</td>\n",
       "<td >\n",
       "-10.016316725971112\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td >\n",
       "2292\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "Ohio GOP Senate candidates pitch federal abortion bans even after voters protected reproductive rights\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "fts\n",
       "</td>\n",
       "<td >\n",
       "2292\n",
       "</td>\n",
       "<td >\n",
       "3\n",
       "</td>\n",
       "<td >\n",
       "-9.7149595994016\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td >\n",
       "452\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "64K women and girls became pregnant due to rape in states with abortion bans, study estimates\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "fts\n",
       "</td>\n",
       "<td >\n",
       "452\n",
       "</td>\n",
       "<td >\n",
       "4\n",
       "</td>\n",
       "<td >\n",
       "-9.163558569425538\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td >\n",
       "9187\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "Abortion bans drive away up to half of young talent, CNBC/Generation Lab youth survey finds\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "fts\n",
       "</td>\n",
       "<td >\n",
       "9187\n",
       "</td>\n",
       "<td >\n",
       "5\n",
       "</td>\n",
       "<td >\n",
       "-9.163558569425538\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td >\n",
       "6989\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "Trump says abortion restrictions should be left to states, dodging a national ban\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "vec\n",
       "</td>\n",
       "<td >\n",
       "6989\n",
       "</td>\n",
       "<td >\n",
       "1\n",
       "</td>\n",
       "<td >\n",
       "0.4930749833583832\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td >\n",
       "13928\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "After Dobbs decision, more women are managing their own abortions\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "vec\n",
       "</td>\n",
       "<td >\n",
       "13928\n",
       "</td>\n",
       "<td >\n",
       "2\n",
       "</td>\n",
       "<td >\n",
       "0.5120846629142761\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td >\n",
       "11822\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "Iowa now bans most abortions after about 6 weeks\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "vec\n",
       "</td>\n",
       "<td >\n",
       "11822\n",
       "</td>\n",
       "<td >\n",
       "3\n",
       "</td>\n",
       "<td >\n",
       "0.512569785118103\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td >\n",
       "7381\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "Where abortion rights could be on the ballot this fall: From the Politics Desk\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "vec\n",
       "</td>\n",
       "<td >\n",
       "7381\n",
       "</td>\n",
       "<td >\n",
       "4\n",
       "</td>\n",
       "<td >\n",
       "0.5168291926383972\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td >\n",
       "14009\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "Trump signals openness to banning abortion pill\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "vec\n",
       "</td>\n",
       "<td >\n",
       "14009\n",
       "</td>\n",
       "<td >\n",
       "5\n",
       "</td>\n",
       "<td >\n",
       "0.5288293957710266\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td >\n",
       "4426\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "Medication abortions rose in year after Dobbs decision, report finds\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "vec\n",
       "</td>\n",
       "<td >\n",
       "4426\n",
       "</td>\n",
       "<td >\n",
       "6\n",
       "</td>\n",
       "<td >\n",
       "0.5305097699165344\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td >\n",
       "4328\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "Trump signals support for a national 15-week abortion ban\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "vec\n",
       "</td>\n",
       "<td >\n",
       "4328\n",
       "</td>\n",
       "<td >\n",
       "7\n",
       "</td>\n",
       "<td >\n",
       "0.532848060131073\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td >\n",
       "6979\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "A timeline of Trump&#39;s many, many positions on abortion\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "vec\n",
       "</td>\n",
       "<td >\n",
       "6979\n",
       "</td>\n",
       "<td >\n",
       "8\n",
       "</td>\n",
       "<td >\n",
       "0.533357560634613\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td >\n",
       "2092\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "For the first time in years, Sen. Graham hasn&#39;t introduced a national abortion ban\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "vec\n",
       "</td>\n",
       "<td >\n",
       "2092\n",
       "</td>\n",
       "<td >\n",
       "9\n",
       "</td>\n",
       "<td >\n",
       "0.5336830615997314\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td >\n",
       "6794\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "Trump&#39;s conflicting abortion stances are coming back to haunt him — and his party\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "vec\n",
       "</td>\n",
       "<td >\n",
       "6794\n",
       "</td>\n",
       "<td >\n",
       "10\n",
       "</td>\n",
       "<td >\n",
       "0.5347095131874084\n",
       "</td>\n",
       "</tr>\n",
       "</tbody>\n",
       "</table>\n",
       "<div style=\"text-align: right;\">\n",
       "15 rows × 6 columns\n",
       "</div>\n",
       "</div>\n"
      ],
      "text/plain": [
       "\u001b[0m┌\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m────────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m─────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m─────────────────────\u001b[0m\u001b[0m┐\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mid\u001b[0m   \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mheadline\u001b[0m                                                                                              \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mmatch_type\u001b[0m\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1marticle_id\u001b[0m\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mrank_number\u001b[0m\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mscore\u001b[0m              \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m────────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m─────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m─────────────────────\u001b[0m\u001b[0m┤\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m10098\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mKamala Harris says abortion bans are creating 'a health care crisis'                                  \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mfts       \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m     10098\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m          1\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m-10.678829270936067\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 9776\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mStates with abortion bans saw birth control prescriptions fall post-Dobbs, study finds                \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mfts       \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m      9776\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m          2\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m-10.016316725971112\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 2292\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mOhio GOP Senate candidates pitch federal abortion bans even after voters protected reproductive rights\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mfts       \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m      2292\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m          3\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m   -9.7149595994016\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m  452\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m64K women and girls became pregnant due to rape in states with abortion bans, study estimates         \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mfts       \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m       452\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m          4\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m -9.163558569425538\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 9187\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mAbortion bans drive away up to half of young talent, CNBC/Generation Lab youth survey finds           \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mfts       \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m      9187\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m          5\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m -9.163558569425538\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 6989\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mTrump says abortion restrictions should be left to states, dodging a national ban                     \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mvec       \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m      6989\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m          1\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.4930749833583832\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m13928\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mAfter Dobbs decision, more women are managing their own abortions                                     \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mvec       \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m     13928\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m          2\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.5120846629142761\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m11822\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mIowa now bans most abortions after about 6 weeks                                                      \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mvec       \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m     11822\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m          3\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m  0.512569785118103\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 7381\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mWhere abortion rights could be on the ballot this fall: From the Politics Desk                        \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mvec       \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m      7381\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m          4\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.5168291926383972\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m14009\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mTrump signals openness to banning abortion pill                                                       \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mvec       \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m     14009\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m          5\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.5288293957710266\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 4426\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mMedication abortions rose in year after Dobbs decision, report finds                                  \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mvec       \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m      4426\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m          6\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.5305097699165344\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 4328\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mTrump signals support for a national 15-week abortion ban                                             \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mvec       \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m      4328\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m          7\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m  0.532848060131073\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 6979\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mA timeline of Trump's many, many positions on abortion                                                \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mvec       \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m      6979\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m          8\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m  0.533357560634613\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 2092\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mFor the first time in years, Sen. Graham hasn't introduced a national abortion ban                    \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mvec       \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m      2092\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m          9\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.5336830615997314\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 6794\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mTrump's conflicting abortion stances are coming back to haunt him — and his party                     \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mvec       \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m      6794\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m         10\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.5347095131874084\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m└\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m────────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m─────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m─────────────────────\u001b[0m\u001b[0m┘\n",
       "\u001b[0m\u001b[0m"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    ".param set query abortion bans\n",
    ".param set k 10\n",
    "\n",
    "\n",
    "with fts_matches as (\n",
    "  select\n",
    "    rowid as article_id,\n",
    "    row_number() over (order by rank) as rank_number,\n",
    "    rank as score\n",
    "  from fts_articles\n",
    "  where headline match :query\n",
    "  limit :k\n",
    "),\n",
    "vec_matches as (\n",
    "  select\n",
    "    article_id,\n",
    "    row_number() over (order by distance) as rank_number,\n",
    "    distance as score\n",
    "  from vec_articles\n",
    "  where\n",
    "    headline_embedding match lembed(:query)\n",
    "    and k = :k\n",
    "  order by distance\n",
    "),\n",
    "combined as (\n",
    "  select 'fts' as match_type, * from fts_matches\n",
    "  union all\n",
    "  select 'vec' as match_type, * from vec_matches\n",
    "),\n",
    "final as (\n",
    "  select\n",
    "    articles.id,\n",
    "    articles.headline,\n",
    "    combined.*\n",
    "  from combined\n",
    "  left join articles on articles.rowid = combined.article_id\n",
    ")\n",
    "select * from final;\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We do this with a verbose CTE: one step for the FTS5 query, another for the vector search, one to \"combine\" the results with a `UNION ALL`, and one last one to `LEFT JOIN` back to the base `articles` table to get the headline.\n",
    "\n",
    "Here we have 5 FTS results and 10 additional vector results. This seems pretty natural, a fallback to vector search when keywords matches lack a bit.\n",
    "\n",
    "One note: this example doesn't do any de-duplication, so you may get the same results twice. So you may want to add a `DISTINCT` or `GROUP BY` somehwere to handle that. "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Combination Technique #2: Reciprocal Rank Fusion (RRF)\n",
    "\n",
    "[Reciprocal Rank Fusion](https://learn.microsoft.com/en-us/azure/search/hybrid-search-ranking) \n",
    "is another combination technique, where matches that are both FTS matches and vector matches\n",
    "are ranked higher than other. The CTE logic is a bit more involved, but can still be represented in a few steps:\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table>\n",
       "<thead>\n",
       "<tr style=\"text-align: center;\">\n",
       "<th>\n",
       "id\n",
       "</th>\n",
       "<th>\n",
       "headline\n",
       "</th>\n",
       "<th>\n",
       "vec_rank\n",
       "</th>\n",
       "<th>\n",
       "fts_rank\n",
       "</th>\n",
       "<th>\n",
       "combined_rank\n",
       "</th>\n",
       "<th>\n",
       "vec_distance\n",
       "</th>\n",
       "<th>\n",
       "fts_score\n",
       "</th>\n",
       "</tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "<tr>\n",
       "<td >\n",
       "4328\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "Trump signals support for a national 15-week abortion ban\n",
       "</td>\n",
       "<td >\n",
       "2\n",
       "</td>\n",
       "<td >\n",
       "3\n",
       "</td>\n",
       "<td >\n",
       "0.03200204813108039\n",
       "</td>\n",
       "<td >\n",
       "0.5334203839302063\n",
       "</td>\n",
       "<td >\n",
       "-9.841645168493953\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td >\n",
       "5769\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "Mitch McConnell shies away from supporting national abortion ban\n",
       "</td>\n",
       "<td >\n",
       "8\n",
       "</td>\n",
       "<td >\n",
       "2\n",
       "</td>\n",
       "<td >\n",
       "0.030834914611005692\n",
       "</td>\n",
       "<td >\n",
       "0.5501425266265869\n",
       "</td>\n",
       "<td >\n",
       "-10.19017787567105\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td >\n",
       "9507\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "Arizona Senate passes repeal of 1864 abortion ban\n",
       "</td>\n",
       "<td >\n",
       "\n",
       "</td>\n",
       "<td >\n",
       "1\n",
       "</td>\n",
       "<td >\n",
       "0.01639344262295082\n",
       "</td>\n",
       "<td >\n",
       "\n",
       "</td>\n",
       "<td >\n",
       "-10.564302831642667\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td >\n",
       "6989\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "Trump says abortion restrictions should be left to states, dodging a national ban\n",
       "</td>\n",
       "<td >\n",
       "1\n",
       "</td>\n",
       "<td >\n",
       "\n",
       "</td>\n",
       "<td >\n",
       "0.01639344262295082\n",
       "</td>\n",
       "<td >\n",
       "0.5142395496368408\n",
       "</td>\n",
       "<td >\n",
       "\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td >\n",
       "10717\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "Supreme Court rejects bid to restrict access to abortion pill\n",
       "</td>\n",
       "<td >\n",
       "3\n",
       "</td>\n",
       "<td >\n",
       "\n",
       "</td>\n",
       "<td >\n",
       "0.015873015873015872\n",
       "</td>\n",
       "<td >\n",
       "0.5351248383522034\n",
       "</td>\n",
       "<td >\n",
       "\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td >\n",
       "5981\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "Arizona state House passes bill to repeal 1864 abortion ban\n",
       "</td>\n",
       "<td >\n",
       "\n",
       "</td>\n",
       "<td >\n",
       "4\n",
       "</td>\n",
       "<td >\n",
       "0.015625\n",
       "</td>\n",
       "<td >\n",
       "\n",
       "</td>\n",
       "<td >\n",
       "-9.841645168493953\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td >\n",
       "14009\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "Trump signals openness to banning abortion pill\n",
       "</td>\n",
       "<td >\n",
       "4\n",
       "</td>\n",
       "<td >\n",
       "\n",
       "</td>\n",
       "<td >\n",
       "0.015625\n",
       "</td>\n",
       "<td >\n",
       "0.5364335179328918\n",
       "</td>\n",
       "<td >\n",
       "\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td >\n",
       "6375\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "Arizona Republicans again quash effort to repeal 1864 abortion ban\n",
       "</td>\n",
       "<td >\n",
       "\n",
       "</td>\n",
       "<td >\n",
       "5\n",
       "</td>\n",
       "<td >\n",
       "0.015384615384615385\n",
       "</td>\n",
       "<td >\n",
       "\n",
       "</td>\n",
       "<td >\n",
       "-9.841645168493953\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td >\n",
       "7381\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "Where abortion rights could be on the ballot this fall: From the Politics Desk\n",
       "</td>\n",
       "<td >\n",
       "5\n",
       "</td>\n",
       "<td >\n",
       "\n",
       "</td>\n",
       "<td >\n",
       "0.015384615384615385\n",
       "</td>\n",
       "<td >\n",
       "0.5462378859519958\n",
       "</td>\n",
       "<td >\n",
       "\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td >\n",
       "9443\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "Arizona Gov. Katie Hobbs signs repeal of 1864 abortion ban\n",
       "</td>\n",
       "<td >\n",
       "\n",
       "</td>\n",
       "<td >\n",
       "6\n",
       "</td>\n",
       "<td >\n",
       "0.015151515151515152\n",
       "</td>\n",
       "<td >\n",
       "\n",
       "</td>\n",
       "<td >\n",
       "-9.841645168493953\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td >\n",
       "13928\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "After Dobbs decision, more women are managing their own abortions\n",
       "</td>\n",
       "<td >\n",
       "6\n",
       "</td>\n",
       "<td >\n",
       "\n",
       "</td>\n",
       "<td >\n",
       "0.015151515151515152\n",
       "</td>\n",
       "<td >\n",
       "0.5467031002044678\n",
       "</td>\n",
       "<td >\n",
       "\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td >\n",
       "1821\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "Dominican women fight child marriage, teen pregancy amid total abortion ban\n",
       "</td>\n",
       "<td >\n",
       "\n",
       "</td>\n",
       "<td >\n",
       "7\n",
       "</td>\n",
       "<td >\n",
       "0.014925373134328358\n",
       "</td>\n",
       "<td >\n",
       "\n",
       "</td>\n",
       "<td >\n",
       "-9.51616557526609\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td >\n",
       "2092\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "For the first time in years, Sen. Graham hasn&#39;t introduced a national abortion ban\n",
       "</td>\n",
       "<td >\n",
       "7\n",
       "</td>\n",
       "<td >\n",
       "\n",
       "</td>\n",
       "<td >\n",
       "0.014925373134328358\n",
       "</td>\n",
       "<td >\n",
       "0.5477523803710938\n",
       "</td>\n",
       "<td >\n",
       "\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td >\n",
       "7150\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "Tennessee court weighs challenge to abortion ban’s narrow medical exception\n",
       "</td>\n",
       "<td >\n",
       "\n",
       "</td>\n",
       "<td >\n",
       "8\n",
       "</td>\n",
       "<td >\n",
       "0.014705882352941176\n",
       "</td>\n",
       "<td >\n",
       "\n",
       "</td>\n",
       "<td >\n",
       "-9.51616557526609\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td >\n",
       "8690\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "Arizona Supreme Court pushes back enforcement date for 1864 abortion ban\n",
       "</td>\n",
       "<td >\n",
       "\n",
       "</td>\n",
       "<td >\n",
       "9\n",
       "</td>\n",
       "<td >\n",
       "0.014492753623188406\n",
       "</td>\n",
       "<td >\n",
       "\n",
       "</td>\n",
       "<td >\n",
       "-9.51616557526609\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td >\n",
       "11822\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "Iowa now bans most abortions after about 6 weeks\n",
       "</td>\n",
       "<td >\n",
       "9\n",
       "</td>\n",
       "<td >\n",
       "\n",
       "</td>\n",
       "<td >\n",
       "0.014492753623188406\n",
       "</td>\n",
       "<td >\n",
       "0.5557170510292053\n",
       "</td>\n",
       "<td >\n",
       "\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td >\n",
       "2646\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "Trump campaign scrambles over abortion ban report as Democrats seize the moment\n",
       "</td>\n",
       "<td >\n",
       "\n",
       "</td>\n",
       "<td >\n",
       "10\n",
       "</td>\n",
       "<td >\n",
       "0.014285714285714285\n",
       "</td>\n",
       "<td >\n",
       "\n",
       "</td>\n",
       "<td >\n",
       "-9.211525101866211\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td >\n",
       "5538\n",
       "</td>\n",
       "<td style=\"text-align: left;\">\n",
       "Map: Where medication abortion is and isn’t legal\n",
       "</td>\n",
       "<td >\n",
       "10\n",
       "</td>\n",
       "<td >\n",
       "\n",
       "</td>\n",
       "<td >\n",
       "0.014285714285714285\n",
       "</td>\n",
       "<td >\n",
       "0.5588464140892029\n",
       "</td>\n",
       "<td >\n",
       "\n",
       "</td>\n",
       "</tr>\n",
       "</tbody>\n",
       "</table>\n",
       "<div style=\"text-align: right;\">\n",
       "18 rows × 7 columns\n",
       "</div>\n",
       "</div>\n"
      ],
      "text/plain": [
       "\u001b[0m┌\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m──────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m──────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m──────────────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m────────────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m─────────────────────\u001b[0m\u001b[0m┐\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mid\u001b[0m   \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mheadline\u001b[0m                                                                          \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mvec_rank\u001b[0m\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mfts_rank\u001b[0m\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mcombined_rank\u001b[0m       \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mvec_distance\u001b[0m      \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mfts_score\u001b[0m          \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m──────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m──────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m──────────────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m────────────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m─────────────────────\u001b[0m\u001b[0m┤\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 4328\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mTrump signals support for a national 15-week abortion ban                         \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m       2\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m       3\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.03200204813108039\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.5334203839302063\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m -9.841645168493953\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 5769\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mMitch McConnell shies away from supporting national abortion ban                  \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m       8\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m       2\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.030834914611005692\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.5501425266265869\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m -10.19017787567105\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
       "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 9507\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mArizona Senate passes repeal of 1864 abortion ban                                 \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m        \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m       1\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.01639344262295082\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m                  \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m-10.564302831642667\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[
Download .txt
gitextract_v4s81yo1/

├── .github/
│   └── workflows/
│       ├── fuzz.yaml
│       ├── release.yaml
│       ├── site.yaml
│       └── test.yaml
├── .gitignore
├── ARCHITECTURE.md
├── LICENSE-APACHE
├── LICENSE-MIT
├── Makefile
├── README.md
├── SECURITY.md
├── TODO
├── VERSION
├── benchmarks/
│   ├── README.md
│   ├── exhaustive-memory/
│   │   ├── .gitignore
│   │   ├── Makefile
│   │   ├── README.md
│   │   ├── bench.py
│   │   ├── gist.suite
│   │   ├── requirements.txt
│   │   └── sift.suite
│   ├── micro/
│   │   ├── .gitignore
│   │   ├── Cargo.toml
│   │   ├── benches/
│   │   │   └── my_benchmark.rs
│   │   ├── build.rs
│   │   └── src/
│   │       └── lib.rs
│   ├── profiling/
│   │   ├── build-from-npy.sql
│   │   └── query-k.sql
│   └── self-params/
│       ├── build.py
│       ├── knn.py
│       └── test.py
├── bindings/
│   ├── go/
│   │   └── ncruces/
│   │       └── go-sqlite3.patch
│   ├── python/
│   │   └── extra_init.py
│   └── rust/
│       ├── .gitignore
│       ├── Cargo.toml.tmpl
│       ├── Makefile
│       ├── build.rs
│       └── src/
│           └── lib.rs
├── examples/
│   ├── nbc-headlines/
│   │   ├── .gitignore
│   │   ├── 1_scrape.ipynb
│   │   ├── 2_build.ipynb
│   │   ├── 3_search.ipynb
│   │   ├── Makefile
│   │   └── README.md
│   ├── python-recipes/
│   │   └── openai-sample.py
│   ├── simple-bun/
│   │   ├── .gitignore
│   │   ├── demo.ts
│   │   └── package.json
│   ├── simple-c/
│   │   ├── .gitignore
│   │   ├── Makefile
│   │   └── demo.c
│   ├── simple-deno/
│   │   └── demo.ts
│   ├── simple-go-cgo/
│   │   ├── .gitignore
│   │   ├── Makefile
│   │   ├── demo.go
│   │   ├── go.mod
│   │   └── go.sum
│   ├── simple-go-ncruces/
│   │   ├── .gitignore
│   │   ├── Makefile
│   │   ├── demo.go
│   │   ├── go.mod
│   │   └── go.sum
│   ├── simple-node/
│   │   ├── .gitignore
│   │   ├── demo.mjs
│   │   └── package.json
│   ├── simple-node2/
│   │   ├── .gitignore
│   │   ├── demo.mjs
│   │   ├── package.json
│   │   └── tmp.mjs
│   ├── simple-python/
│   │   ├── .gitignore
│   │   ├── demo.py
│   │   └── requirements.txt
│   ├── simple-ruby/
│   │   ├── .gitignore
│   │   ├── Gemfile
│   │   └── demo.rb
│   ├── simple-rust/
│   │   ├── .gitignore
│   │   ├── Cargo.toml
│   │   └── demo.rs
│   ├── simple-sqlite/
│   │   └── demo.sql
│   ├── simple-wasm/
│   │   └── index.html
│   ├── sqlite3-cli/
│   │   ├── README.md
│   │   └── core_init.c
│   └── wasm/
│       ├── README.md
│       └── wasm.c
├── reference.yaml
├── scripts/
│   ├── progress.ts
│   ├── publish-release.sh
│   └── vendor.sh
├── site/
│   ├── .gitignore
│   ├── .vitepress/
│   │   ├── config.mts
│   │   └── theme/
│   │       ├── HeroImg.vue
│   │       ├── Sponsors.vue
│   │       ├── index.ts
│   │       └── style.css
│   ├── api-reference.md
│   ├── build-ref.mjs
│   ├── compiling.md
│   ├── features/
│   │   ├── knn.md
│   │   └── vec0.md
│   ├── getting-started/
│   │   ├── installation.md
│   │   └── introduction.md
│   ├── guides/
│   │   ├── arithmetic.md
│   │   ├── binary-quant.md
│   │   ├── classifiers.md
│   │   ├── hybrid-search.md
│   │   ├── matryoshka.md
│   │   ├── performance.md
│   │   ├── rag.md
│   │   ├── scalar-quant.md
│   │   └── semantic-search.md
│   ├── index.md
│   ├── package.json
│   ├── project.data.ts
│   ├── public/
│   │   └── fonts/
│   │       └── ZillaSlab-SemiBold.otf
│   ├── sqlite.tmlanguage.json
│   ├── using/
│   │   ├── android-ios.md
│   │   ├── c.md
│   │   ├── datasette.md
│   │   ├── go.md
│   │   ├── js.md
│   │   ├── python.md
│   │   ├── rqlite.md
│   │   ├── ruby.md
│   │   ├── rust.md
│   │   ├── sqlite-utils.md
│   │   └── wasm.md
│   └── versioning.md
├── sqlite-dist.toml
├── sqlite-vec.c
├── sqlite-vec.h.tmpl
├── test.sql
├── tests/
│   ├── .gitignore
│   ├── .python-version
│   ├── Cargo.toml
│   ├── __snapshots__/
│   │   ├── test-auxiliary.ambr
│   │   ├── test-general.ambr
│   │   ├── test-insert-delete.ambr
│   │   ├── test-knn-distance-constraints.ambr
│   │   ├── test-metadata.ambr
│   │   └── test-partition-keys.ambr
│   ├── afbd/
│   │   ├── .gitignore
│   │   ├── .python-version
│   │   ├── Makefile
│   │   ├── README.md
│   │   └── test-afbd.py
│   ├── build.rs
│   ├── conftest.py
│   ├── correctness/
│   │   ├── build.py
│   │   └── test-correctness.py
│   ├── fuzz/
│   │   ├── .gitignore
│   │   ├── Makefile
│   │   ├── README.md
│   │   ├── TODO.md
│   │   ├── corpus/
│   │   │   ├── exec/
│   │   │   │   ├── select1
│   │   │   │   └── vec_version
│   │   │   ├── json/
│   │   │   │   ├── empty
│   │   │   │   ├── valid_2d
│   │   │   │   └── valid_4d
│   │   │   ├── shadow-corrupt/
│   │   │   │   ├── target0
│   │   │   │   ├── target1
│   │   │   │   ├── target2
│   │   │   │   ├── target3
│   │   │   │   ├── target4
│   │   │   │   └── target5
│   │   │   ├── vec-mismatch/
│   │   │   │   ├── dim_mismatch_4d_2d
│   │   │   │   ├── json2d_invalid_blob
│   │   │   │   ├── json4d_invalid_blob
│   │   │   │   ├── json_1d_blob_5byte
│   │   │   │   ├── json_2d_blob_3byte
│   │   │   │   ├── json_valid_blob_invalid
│   │   │   │   ├── json_valid_empty
│   │   │   │   ├── single_f32_bad_text
│   │   │   │   ├── single_normalize_json
│   │   │   │   ├── type_mismatch_f32_bit
│   │   │   │   └── type_mismatch_f32_int8
│   │   │   ├── vec0-create/
│   │   │   │   ├── normal1
│   │   │   │   └── normal2
│   │   │   └── vec0-operations/
│   │   │       ├── ins_del_ins
│   │   │       └── insert5
│   │   ├── exec.c
│   │   ├── exec.dict
│   │   ├── json.c
│   │   ├── metadata-columns.c
│   │   ├── numpy.c
│   │   ├── numpy.dict
│   │   ├── scalar-functions.c
│   │   ├── scalar-functions.dict
│   │   ├── shadow-corrupt.c
│   │   ├── targets/
│   │   │   └── .gitignore
│   │   ├── vec-each.c
│   │   ├── vec-mismatch.c
│   │   ├── vec0-create-full.c
│   │   ├── vec0-create.c
│   │   ├── vec0-create.dict
│   │   ├── vec0-delete-completeness.c
│   │   └── vec0-operations.c
│   ├── fuzz.py
│   ├── helpers.py
│   ├── leak-fixtures/
│   │   ├── each.sql
│   │   ├── knn.sql
│   │   └── vec0-create.sql
│   ├── minimum/
│   │   ├── .gitignore
│   │   ├── Makefile
│   │   └── demo.c
│   ├── pyproject.toml
│   ├── skip.test-correctness.py
│   ├── sqlite-vec-internal.h
│   ├── test-auxiliary.py
│   ├── test-general.py
│   ├── test-insert-delete.py
│   ├── test-knn-distance-constraints.py
│   ├── test-loadable.py
│   ├── test-metadata.py
│   ├── test-partition-keys.py
│   ├── test-unit.c
│   ├── test-wasm.mjs
│   ├── unittest.rs
│   └── utils.py
└── tmp-static.py
Download .txt
SYMBOL INDEX (541 symbols across 59 files)

FILE: benchmarks/exhaustive-memory/bench.py
  class BenchResult (line 13) | class BenchResult:
  function duration (line 19) | def duration(seconds: float):
  function cosine_similarity (line 24) | def cosine_similarity(
  function topk (line 33) | def topk(
  function ivecs_read (line 48) | def ivecs_read(fname):
  function fvecs_read (line 54) | def fvecs_read(fname, sample):
  function bench_hnsw (line 58) | def bench_hnsw(base, query):
  function bench_hnsw_bf (line 85) | def bench_hnsw_bf(base, query, k) -> BenchResult:
  function bench_numpy (line 109) | def bench_numpy(base, query, k) -> BenchResult:
  function bench_sqlite_vec (line 121) | def bench_sqlite_vec(base, query, page_size, chunk_size, k) -> BenchResult:
  function bench_sqlite_vec_scalar (line 169) | def bench_sqlite_vec_scalar(base, query, page_size, k) -> BenchResult:
  function bench_libsql (line 208) | def bench_libsql(base, query, page_size, k) -> BenchResult:
  function register_np (line 251) | def register_np(db, array, name):
  function bench_sqlite_vec_static (line 269) | def bench_sqlite_vec_static(base, query, k) -> BenchResult:
  function bench_faiss (line 304) | def bench_faiss(base, query, k) -> BenchResult:
  function bench_lancedb (line 323) | def bench_lancedb(base, query, k) -> BenchResult:
  function bench_duckdb (line 342) | def bench_duckdb(base, query, k) -> BenchResult:
  function bench_sentence_transformers (line 370) | def bench_sentence_transformers(base, query, k) -> BenchResult:
  function bench_chroma (line 386) | def bench_chroma(base, query, k):
  function bench_usearch_npy (line 407) | def bench_usearch_npy(base, query, k) -> BenchResult:
  function bench_usearch_special (line 418) | def bench_usearch_special(base, query, k) -> BenchResult:
  function suite (line 434) | def suite(name, base, query, k, benchmarks):
  function parse_args (line 515) | def parse_args():
  function cli_read_input (line 554) | def cli_read_input(input, sample):
  function cli_read_query (line 563) | def cli_read_query(query, base):
  class Config (line 571) | class Config:
  function parse_config_file (line 580) | def parse_config_file(path:str) -> Config:

FILE: benchmarks/micro/benches/my_benchmark.rs
  function random_vector (line 7) | fn random_vector(n: usize) -> Vec<f32> {
  function setup_base (line 12) | fn setup_base(page_size: usize, d: usize, n: i32) -> Connection {
  function criterion_benchmark (line 44) | pub fn criterion_benchmark(c: &mut Criterion) {

FILE: benchmarks/micro/build.rs
  function main (line 1) | fn main() {

FILE: benchmarks/micro/src/lib.rs
  function sqlite3_vec_init (line 5) | pub fn sqlite3_vec_init();
  function init_vec (line 8) | pub fn init_vec() {

FILE: benchmarks/profiling/build-from-npy.sql
  type vec_items (line 7) | create virtual table vec_items using vec0(

FILE: benchmarks/self-params/build.py
  function connect (line 5) | def connect(path):

FILE: benchmarks/self-params/knn.py
  function connect (line 7) | def connect(path):

FILE: benchmarks/self-params/test.py
  function connect (line 5) | def connect(path):

FILE: bindings/python/extra_init.py
  function serialize_float32 (line 6) | def serialize_float32(vector: List[float]) -> bytes:
  function serialize_int8 (line 11) | def serialize_int8(vector: List[int]) -> bytes:
  function register_numpy (line 19) | def register_numpy(db: Connection, name: str, array: npt.NDArray):
  function register_numpy (line 44) | def register_numpy(db: Connection, name: str, array):

FILE: bindings/rust/build.rs
  function main (line 1) | fn main() {

FILE: bindings/rust/src/lib.rs
  function sqlite3_vec_init (line 3) | pub fn sqlite3_vec_init();
  function test_rusqlite_auto_extension (line 13) | fn test_rusqlite_auto_extension() {

FILE: examples/python-recipes/openai-sample.py
  function serialize (line 10) | def serialize(vector: List[float]) -> bytes:

FILE: examples/simple-c/demo.c
  function main (line 7) | int main(int argc, char *argv[]) {

FILE: examples/simple-go-cgo/demo.go
  function main (line 12) | func main() {

FILE: examples/simple-go-ncruces/demo.go
  function main (line 11) | func main() {

FILE: examples/simple-python/demo.py
  function serialize_f32 (line 8) | def serialize_f32(vector: List[float]) -> bytes:

FILE: examples/simple-rust/demo.rs
  function main (line 5) | fn main() -> Result<()> {

FILE: examples/simple-sqlite/demo.sql
  type vec_items (line 7) | CREATE VIRTUAL TABLE vec_items USING vec0(embedding float[4])

FILE: examples/sqlite3-cli/core_init.c
  function core_init (line 4) | int core_init(const char *dummy) {

FILE: examples/wasm/wasm.c
  function sqlite3_wasm_extra_init (line 4) | int sqlite3_wasm_extra_init(const char * unused) {

FILE: scripts/progress.ts
  function numOccuranges (line 3) | function numOccuranges(rg) {
  constant TOTAL (line 21) | const TOTAL = 246;

FILE: site/.vitepress/theme/index.ts
  method enhanceApp (line 22) | enhanceApp({ app, router, siteData }) {

FILE: site/build-ref.mjs
  constant HEADER (line 9) | const HEADER = `---
  constant REF_PATH (line 25) | const REF_PATH = resolve(
  constant EXT_PATH (line 29) | const EXT_PATH = resolve(
  function formatSingleValue (line 77) | function formatSingleValue(value) {
  function formatValue (line 99) | function formatValue(value) {
  function tableize (line 115) | function tableize(stmt, results) {
  function renderExamples (line 125) | function renderExamples(db, name, example) {

FILE: site/project.data.ts
  constant PROJECT (line 5) | const PROJECT = "sqlite-vec";
  constant VERSION (line 7) | const VERSION = readFileSync(
  method load (line 13) | load() {

FILE: sqlite-vec.c
  type u_int8_t (line 68) | typedef u_int8_t uint8_t;
  type u_int16_t (line 69) | typedef u_int16_t uint16_t;
  type u_int64_t (line 70) | typedef u_int64_t uint64_t;
  type i8 (line 76) | typedef int8_t i8;
  type u8 (line 77) | typedef uint8_t u8;
  type i16 (line 78) | typedef int16_t i16;
  type i32 (line 79) | typedef int32_t i32;
  type sqlite3_int64 (line 80) | typedef sqlite3_int64 i64;
  type u32 (line 81) | typedef uint32_t u32;
  type u64 (line 82) | typedef uint64_t u64;
  type f32 (line 83) | typedef float f32;
  type usize (line 84) | typedef size_t usize;
  type VectorElementType (line 115) | enum VectorElementType {
  function f32 (line 128) | static f32 l2_sqr_float_avx(const void *pVect1v, const void *pVect2v,
  function f32 (line 169) | static f32 l2_sqr_float_neon(const void *pVect1v, const void *pVect2v,
  function f32 (line 227) | static f32 l2_sqr_int8_neon(const void *pVect1v, const void *pVect2v,
  function i32 (line 266) | static i32 l1_int8_neon(const void *pVect1v, const void *pVect2v,
  function l1_f32_neon (line 326) | static double l1_f32_neon(const void *pVect1v, const void *pVect2v,
  function f32 (line 361) | static f32 l2_sqr_float(const void *pVect1v, const void *pVect2v,
  function f32 (line 377) | static f32 l2_sqr_int8(const void *pA, const void *pB, const void *pD) {
  function f32 (line 392) | static f32 distance_l2_sqr_float(const void *a, const void *b, const voi...
  function f32 (line 406) | static f32 distance_l2_sqr_int8(const void *a, const void *b, const void...
  function i32 (line 415) | static i32 l1_int8(const void *pA, const void *pB, const void *pD) {
  function i32 (line 430) | static i32 distance_l1_int8(const void *a, const void *b, const void *d) {
  function l1_f32 (line 439) | static double l1_f32(const void *pA, const void *pB, const void *pD) {
  function distance_l1_f32 (line 454) | static double distance_l1_f32(const void *a, const void *b, const void *...
  function f32 (line 463) | static f32 distance_cosine_float(const void *pVect1v, const void *pVect2v,
  function f32 (line 481) | static f32 distance_cosine_int8(const void *pA, const void *pB,
  function f32 (line 514) | static f32 distance_hamming_u8(u8 *a, u8 *b, size_t n) {
  function __builtin_popcountl (line 527) | static unsigned int __builtin_popcountl(unsigned int x) {
  function f32 (line 540) | static f32 distance_hamming_u64(u64 *a, u64 *b, size_t n) {
  function f32 (line 556) | static f32 distance_hamming(const void *a, const void *b, const void *d) {
  function f32 (line 566) | f32 _test_distance_l2_sqr_float(const f32 *a, const f32 *b, size_t dims) {
  function f32 (line 569) | f32 _test_distance_cosine_float(const f32 *a, const f32 *b, size_t dims) {
  function f32 (line 572) | f32 _test_distance_hamming(const u8 *a, const u8 *b, size_t dims) {
  function vector_cleanup_noop (line 599) | void vector_cleanup_noop(void *_) { UNUSED_PARAMETER(_); }
  function vtab_set_error (line 603) | void vtab_set_error(sqlite3_vtab *pVTab, const char *zFormat, ...) {
  type Array (line 610) | struct Array {
  function array_init (line 626) | int array_init(struct Array *array, size_t element_size, size_t init_cap...
  function array_append (line 641) | int array_append(struct Array *array, const void *element) {
  function array_cleanup (line 658) | void array_cleanup(struct Array *array) {
  function fvec_cleanup_noop (line 697) | void fvec_cleanup_noop(void *_) { UNUSED_PARAMETER(_); }
  function fvec_from_value (line 699) | static int fvec_from_value(sqlite3_value *value, f32 **vector,
  function bitvec_from_value (line 829) | static int bitvec_from_value(sqlite3_value *value, u8 **vector,
  function int8_vec_from_value (line 849) | static int int8_vec_from_value(sqlite3_value *value, i8 **vector,
  function vector_from_value (line 982) | int vector_from_value(sqlite3_value *value, void **vector, size_t *dimen...
  function ensure_vector_match (line 1016) | int ensure_vector_match(sqlite3_value *aValue, sqlite3_value *bValue, vo...
  function _cmp (line 1066) | int _cmp(const void *a, const void *b) { return (*(i64 *)a - *(i64 *)b); }
  type VecNpyFile (line 1068) | struct VecNpyFile {
  function vec_npy_file (line 1075) | static void vec_npy_file(sqlite3_context *context, int argc,
  function vec_f32 (line 1096) | static void vec_f32(sqlite3_context *context, int argc, sqlite3_value **...
  function vec_bit (line 1114) | static void vec_bit(sqlite3_context *context, int argc, sqlite3_value **...
  function vec_int8 (line 1131) | static void vec_int8(sqlite3_context *context, int argc, sqlite3_value *...
  function vec_length (line 1149) | static void vec_length(sqlite3_context *context, int argc,
  function vec_distance_cosine (line 1169) | static void vec_distance_cosine(sqlite3_context *context, int argc,
  function vec_distance_l2 (line 1211) | static void vec_distance_l2(sqlite3_context *context, int argc,
  function vec_distance_l1 (line 1252) | static void vec_distance_l1(sqlite3_context *context, int argc,
  function vec_distance_hamming (line 1293) | static void vec_distance_hamming(sqlite3_context *context, int argc,
  type VectorElementType (line 1335) | enum VectorElementType
  function vec_type (line 1347) | static void vec_type(sqlite3_context *context, int argc, sqlite3_value *...
  function vec_quantize_binary (line 1364) | static void vec_quantize_binary(sqlite3_context *context, int argc,
  function vec_quantize_int8 (line 1433) | static void vec_quantize_int8(sqlite3_context *context, int argc,
  function vec_add (line 1480) | static void vec_add(sqlite3_context *context, int argc, sqlite3_value **...
  function vec_sub (line 1537) | static void vec_sub(sqlite3_context *context, int argc, sqlite3_value **...
  function vec_slice (line 1595) | static void vec_slice(sqlite3_context *context, int argc,
  function vec_to_json (line 1711) | static void vec_to_json(sqlite3_context *context, int argc,
  function vec_normalize (line 1761) | static void vec_normalize(sqlite3_context *context, int argc,
  function _static_text_func (line 1810) | static void _static_text_func(sqlite3_context *context, int argc,
  type Vec0TokenType (line 1819) | enum Vec0TokenType {
  type Vec0Token (line 1830) | struct Vec0Token {
  function is_alpha (line 1836) | int is_alpha(char x) {
  function is_digit (line 1839) | int is_digit(char x) { return (x >= '0' && x <= '9'); }
  function is_whitespace (line 1840) | int is_whitespace(char x) {
  function vec0_token_next (line 1848) | int vec0_token_next(char *start, char *end, struct Vec0Token *out) {
  type Vec0Scanner (line 1922) | struct Vec0Scanner {
  function vec0_scanner_init (line 1928) | void vec0_scanner_init(struct Vec0Scanner *scanner, const char *source,
  function vec0_scanner_next (line 1934) | int vec0_scanner_next(struct Vec0Scanner *scanner, struct Vec0Token *out) {
  function vec0_parse_table_option (line 1942) | int vec0_parse_table_option(const char *source, int source_length,
  function vec0_parse_partition_key_definition (line 1998) | int vec0_parse_partition_key_definition(const char *source, int source_l...
  function vec0_parse_auxiliary_column_definition (line 2075) | int vec0_parse_auxiliary_column_definition(const char *source, int sourc...
  type vec0_metadata_column_kind (line 2133) | typedef enum {
  function vec0_parse_metadata_column_definition (line 2153) | int vec0_parse_metadata_column_definition(const char *source, int source...
  function vec0_parse_primary_key_definition (line 2213) | int vec0_parse_primary_key_definition(const char *source, int source_len...
  type Vec0DistanceMetrics (line 2278) | enum Vec0DistanceMetrics {
  type VectorColumnDefinition (line 2284) | struct VectorColumnDefinition {
  type Vec0PartitionColumnDefinition (line 2292) | struct Vec0PartitionColumnDefinition {
  type Vec0AuxiliaryColumnDefinition (line 2298) | struct Vec0AuxiliaryColumnDefinition {
  type Vec0MetadataColumnDefinition (line 2303) | struct Vec0MetadataColumnDefinition {
  function vector_byte_size (line 2309) | size_t vector_byte_size(enum VectorElementType element_type,
  function vector_column_byte_size (line 2322) | size_t vector_column_byte_size(struct VectorColumnDefinition column) {
  function vec0_parse_vector_column (line 2336) | int vec0_parse_vector_column(const char *source, int source_length,
  type vec_each_vtab (line 2471) | typedef struct vec_each_vtab vec_each_vtab;
  type vec_each_vtab (line 2472) | struct vec_each_vtab {
  type vec_each_cursor (line 2476) | typedef struct vec_each_cursor vec_each_cursor;
  type vec_each_cursor (line 2477) | struct vec_each_cursor {
  function vec_eachConnect (line 2486) | static int vec_eachConnect(sqlite3 *db, void *pAux, int argc,
  function vec_eachDisconnect (line 2509) | static int vec_eachDisconnect(sqlite3_vtab *pVtab) {
  function vec_eachOpen (line 2515) | static int vec_eachOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor) {
  function vec_eachClose (line 2526) | static int vec_eachClose(sqlite3_vtab_cursor *cur) {
  function vec_eachBestIndex (line 2535) | static int vec_eachBestIndex(sqlite3_vtab *pVTab,
  function vec_eachFilter (line 2564) | static int vec_eachFilter(sqlite3_vtab_cursor *pVtabCursor, int idxNum,
  function vec_eachRowid (line 2587) | static int vec_eachRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid) {
  function vec_eachEof (line 2593) | static int vec_eachEof(sqlite3_vtab_cursor *cur) {
  function vec_eachNext (line 2598) | static int vec_eachNext(sqlite3_vtab_cursor *cur) {
  function vec_eachColumn (line 2604) | static int vec_eachColumn(sqlite3_vtab_cursor *cur, sqlite3_context *con...
  type NpyTokenType (line 2665) | enum NpyTokenType {
  type NpyToken (line 2678) | struct NpyToken {
  function npy_token_next (line 2684) | int npy_token_next(unsigned char *start, unsigned char *end,
  type NpyScanner (line 2761) | struct NpyScanner {
  function npy_scanner_init (line 2767) | void npy_scanner_init(struct NpyScanner *scanner, const unsigned char *s...
  function npy_scanner_next (line 2774) | int npy_scanner_next(struct NpyScanner *scanner, struct NpyToken *out) {
  function parse_npy_header (line 2783) | int parse_npy_header(sqlite3_vtab *pVTab, const unsigned char *header,
  type vec_npy_each_vtab (line 2919) | typedef struct vec_npy_each_vtab vec_npy_each_vtab;
  type vec_npy_each_vtab (line 2920) | struct vec_npy_each_vtab {
  type vec_npy_each_input_type (line 2924) | typedef enum {
  type vec_npy_each_cursor (line 2929) | typedef struct vec_npy_each_cursor vec_npy_each_cursor;
  type vec_npy_each_cursor (line 2930) | struct vec_npy_each_cursor {
  function parse_npy_file (line 2979) | int parse_npy_file(sqlite3_vtab *pVTab, FILE *file, vec_npy_each_cursor ...
  function parse_npy_buffer (line 3069) | int parse_npy_buffer(sqlite3_vtab *pVTab, const unsigned char *buffer,
  function vec_npy_eachConnect (line 3121) | static int vec_npy_eachConnect(sqlite3 *db, void *pAux, int argc,
  function vec_npy_eachDisconnect (line 3144) | static int vec_npy_eachDisconnect(sqlite3_vtab *pVtab) {
  function vec_npy_eachOpen (line 3150) | static int vec_npy_eachOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCur...
  function vec_npy_eachClose (line 3161) | static int vec_npy_eachClose(sqlite3_vtab_cursor *cur) {
  function vec_npy_eachBestIndex (line 3180) | static int vec_npy_eachBestIndex(sqlite3_vtab *pVTab,
  function vec_npy_eachFilter (line 3209) | static int vec_npy_eachFilter(sqlite3_vtab_cursor *pVtabCursor, int idxNum,
  function vec_npy_eachRowid (line 3278) | static int vec_npy_eachRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRo...
  function vec_npy_eachEof (line 3284) | static int vec_npy_eachEof(sqlite3_vtab_cursor *cur) {
  function vec_npy_eachNext (line 3292) | static int vec_npy_eachNext(sqlite3_vtab_cursor *cur) {
  function vec_npy_eachColumnBuffer (line 3316) | static int vec_npy_eachColumnBuffer(vec_npy_each_cursor *pCur,
  function vec_npy_eachColumnFile (line 3345) | static int vec_npy_eachColumnFile(vec_npy_each_cursor *pCur,
  function vec_npy_eachColumn (line 3372) | static int vec_npy_eachColumn(sqlite3_vtab_cursor *cur,
  type vec0_vtab (line 3481) | typedef struct vec0_vtab vec0_vtab;
  type vec0_user_column_kind (line 3492) | typedef enum {
  type vec0_vtab (line 3506) | struct vec0_vtab {
  function vec0_free_resources (line 3632) | void vec0_free_resources(vec0_vtab *p) {
  function vec0_free (line 3650) | void vec0_free(vec0_vtab *p) {
  function vec0_num_defined_user_columns (line 3686) | int vec0_num_defined_user_columns(vec0_vtab *p) {
  function vec0_column_distance_idx (line 3697) | int vec0_column_distance_idx(vec0_vtab *p) {
  function vec0_column_k_idx (line 3708) | int vec0_column_k_idx(vec0_vtab *p) {
  function vec0_column_idx_is_vector (line 3717) | int vec0_column_idx_is_vector(vec0_vtab *pVtab, int column_idx) {
  function vec0_column_idx_to_vector_idx (line 3727) | int vec0_column_idx_to_vector_idx(vec0_vtab *pVtab, int column_idx) {
  function vec0_column_idx_is_partition (line 3735) | int vec0_column_idx_is_partition(vec0_vtab *pVtab, int column_idx) {
  function vec0_column_idx_to_partition_idx (line 3745) | int vec0_column_idx_to_partition_idx(vec0_vtab *pVtab, int column_idx) {
  function vec0_column_idx_is_auxiliary (line 3754) | int vec0_column_idx_is_auxiliary(vec0_vtab *pVtab, int column_idx) {
  function vec0_column_idx_to_auxiliary_idx (line 3764) | int vec0_column_idx_to_auxiliary_idx(vec0_vtab *pVtab, int column_idx) {
  function vec0_column_idx_is_metadata (line 3773) | int vec0_column_idx_is_metadata(vec0_vtab *pVtab, int column_idx) {
  function vec0_column_idx_to_metadata_idx (line 3783) | int vec0_column_idx_to_metadata_idx(vec0_vtab *pVtab, int column_idx) {
  function vec0_get_chunk_position (line 3800) | int vec0_get_chunk_position(vec0_vtab *p, i64 rowid, sqlite3_value **id,
  function vec0_get_id_value_from_rowid (line 3870) | int vec0_get_id_value_from_rowid(vec0_vtab *pVtab, i64 rowid,
  function vec0_rowid_from_id (line 3876) | int vec0_rowid_from_id(vec0_vtab *p, sqlite3_value *valueId, i64 *rowid) {
  function vec0_result_id (line 3914) | int vec0_result_id(vec0_vtab *p, sqlite3_context *context, i64 rowid) {
  function vec0_get_vector_data (line 3945) | int vec0_get_vector_data(vec0_vtab *pVtab, i64 rowid, int vector_column_...
  function vec0_get_partition_value_for_rowid (line 4027) | int vec0_get_partition_value_for_rowid(vec0_vtab *pVtab, i64 rowid, int ...
  function vec0_get_auxiliary_value_for_rowid (line 4073) | int vec0_get_auxiliary_value_for_rowid(vec0_vtab *pVtab, i64 rowid, int ...
  function vec0_result_metadata_value_for_rowid (line 4113) | int vec0_result_metadata_value_for_rowid(vec0_vtab *p, i64 rowid, int me...
  function vec0_get_latest_chunk_rowid (line 4199) | int vec0_get_latest_chunk_rowid(vec0_vtab *p, i64 *chunk_rowid, sqlite3_...
  function vec0_rowids_insert_rowid (line 4270) | int vec0_rowids_insert_rowid(vec0_vtab *p, i64 rowid) {
  function vec0_rowids_insert_id (line 4332) | int vec0_rowids_insert_id(vec0_vtab *p, sqlite3_value *idValue, i64 *row...
  function vec0_metadata_chunk_size (line 4399) | int vec0_metadata_chunk_size(vec0_metadata_column_kind kind, int chunk_s...
  function vec0_rowids_update_position (line 4413) | int vec0_rowids_update_position(vec0_vtab *p, i64 rowid, i64 chunk_rowid,
  function vec0_new_chunk (line 4474) | int vec0_new_chunk(vec0_vtab *p, sqlite3_value ** partitionKeyValues, i6...
  type vec0_query_fullscan_data (line 4632) | struct vec0_query_fullscan_data {
  function vec0_query_fullscan_data_clear (line 4636) | void vec0_query_fullscan_data_clear(
  type vec0_query_knn_data (line 4647) | struct vec0_query_knn_data {
  function vec0_query_knn_data_clear (line 4656) | void vec0_query_knn_data_clear(struct vec0_query_knn_data *knn_data) {
  type vec0_query_point_data (line 4670) | struct vec0_query_point_data {
  function vec0_query_point_data_clear (line 4675) | void vec0_query_point_data_clear(struct vec0_query_point_data *point_dat...
  type vec0_query_plan (line 4684) | typedef enum {
  type vec0_cursor (line 4692) | typedef struct vec0_cursor vec0_cursor;
  type vec0_cursor (line 4693) | struct vec0_cursor {
  function vec0_cursor_clear (line 4702) | void vec0_cursor_clear(vec0_cursor *pCur) {
  function vec0_init (line 4721) | static int vec0_init(sqlite3 *db, void *pAux, int argc, const char *cons...
  function vec0Create (line 5256) | static int vec0Create(sqlite3 *db, void *pAux, int argc,
  function vec0Connect (line 5261) | static int vec0Connect(sqlite3 *db, void *pAux, int argc,
  function vec0Disconnect (line 5267) | static int vec0Disconnect(sqlite3_vtab *pVtab) {
  function vec0Destroy (line 5273) | static int vec0Destroy(sqlite3_vtab *pVtab) {
  function vec0Open (line 5375) | static int vec0Open(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor) {
  function vec0Close (line 5386) | static int vec0Close(sqlite3_vtab_cursor *cur) {
  type vec0_idxstr_kind (line 5395) | typedef enum  {
  type vec0_partition_operator (line 5418) | typedef enum  {
  type vec0_metadata_operator (line 5439) | typedef enum  {
  type vec0_distance_constraint_operator (line 5450) | typedef enum {
  function vec0BestIndex (line 5458) | static int vec0BestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pIdxIn...
  function merge_sorted_lists (line 5861) | void merge_sorted_lists(f32 *a, i64 *a_rowids, i64 a_length, f32 *b,
  function u8 (line 5896) | u8 *bitmap_new(i32 n) {
  function u8 (line 5904) | u8 *bitmap_new_from(i32 n, u8 *from) {
  function bitmap_copy (line 5913) | void bitmap_copy(u8 *base, u8 *from, i32 n) {
  function bitmap_and_inplace (line 5918) | void bitmap_and_inplace(u8 *base, u8 *other, i32 n) {
  function bitmap_set (line 5925) | void bitmap_set(u8 *bitmap, i32 position, int value) {
  function bitmap_get (line 5933) | int bitmap_get(u8 *bitmap, i32 position) {
  function bitmap_clear (line 5937) | void bitmap_clear(u8 *bitmap, i32 n) {
  function bitmap_fill (line 5942) | void bitmap_fill(u8 *bitmap, i32 n) {
  function min_idx (line 5957) | int min_idx(const f32 *distances, i32 n, u8 *candidates, i32 *out, i32 k,
  function vec0_get_metadata_text_long_value (line 5989) | int vec0_get_metadata_text_long_value(
  function vec0_chunks_iter (line 6038) | int vec0_chunks_iter(vec0_vtab * p, const char * idxStr, int argc, sqlit...
  type Vec0MetadataIn (line 6123) | struct Vec0MetadataIn{
  type Vec0MetadataInTextEntry (line 6133) | struct Vec0MetadataInTextEntry {
  function vec0_metadata_filter_text (line 6139) | int vec0_metadata_filter_text(vec0_vtab * p, sqlite3_value * value, cons...
  function vec0_set_metadata_filter_bitmap (line 6458) | int vec0_set_metadata_filter_bitmap(
  function vec0Filter_knn_chunks_iter (line 6624) | int vec0Filter_knn_chunks_iter(vec0_vtab *p, sqlite3_stmt *stmtChunks,
  function vec0Filter_knn (line 7029) | int vec0Filter_knn(vec0_cursor *pCur, vec0_vtab *p, int idxNum,
  function vec0Filter_fullscan (line 7317) | int vec0Filter_fullscan(vec0_vtab *p, vec0_cursor *pCur) {
  function vec0Filter_point (line 7363) | int vec0Filter_point(vec0_cursor *pCur, vec0_vtab *p, int argc,
  function vec0Filter (line 7418) | static int vec0Filter(sqlite3_vtab_cursor *pVtabCursor, int idxNum,
  function vec0Rowid (line 7450) | static int vec0Rowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid) {
  function vec0Next (line 7472) | static int vec0Next(sqlite3_vtab_cursor *cur) {
  function vec0Eof (line 7508) | static int vec0Eof(sqlite3_vtab_cursor *cur) {
  function vec0Column_fullscan (line 7535) | static int vec0Column_fullscan(vec0_vtab *pVtab, vec0_cursor *pCur,
  function vec0Column_point (line 7610) | static int vec0Column_point(vec0_vtab *pVtab, vec0_cursor *pCur,
  function vec0Column_knn (line 7694) | static int vec0Column_knn(vec0_vtab *pVtab, vec0_cursor *pCur,
  function vec0Column (line 7772) | static int vec0Column(sqlite3_vtab_cursor *cur, sqlite3_context *context,
  function vec0Update_InsertRowidStep (line 7802) | int vec0Update_InsertRowidStep(vec0_vtab *p, sqlite3_value *idValue,
  function vec0Update_InsertNextAvailableStep (line 7872) | int vec0Update_InsertNextAvailableStep(
  function vec0_write_vector_to_vector_blob (line 8025) | static int
  function vec0Update_InsertWriteFinalStep (line 8063) | int vec0Update_InsertWriteFinalStep(vec0_vtab *p, i64 chunk_rowid,
  function vec0_write_metadata_value (line 8183) | int vec0_write_metadata_value(vec0_vtab *p, int metadata_column_idx, i64...
  function vec0Update_Insert (line 8342) | int vec0Update_Insert(sqlite3_vtab *pVTab, int argc, sqlite3_value **argv,
  function vec0Update_Delete_ClearValidity (line 8575) | int vec0Update_Delete_ClearValidity(vec0_vtab *p, i64 chunk_id,
  function vec0Update_Delete_ClearRowid (line 8638) | int vec0Update_Delete_ClearRowid(vec0_vtab *p, i64 chunk_id,
  function vec0Update_Delete_ClearVectors (line 8673) | int vec0Update_Delete_ClearVectors(vec0_vtab *p, i64 chunk_id,
  function vec0Update_Delete_DeleteChunkIfEmpty (line 8721) | int vec0Update_Delete_DeleteChunkIfEmpty(vec0_vtab *p, i64 chunk_id,
  function vec0Update_Delete_DeleteRowids (line 8832) | int vec0Update_Delete_DeleteRowids(vec0_vtab *p, i64 rowid) {
  function vec0Update_Delete_DeleteAux (line 8860) | int vec0Update_Delete_DeleteAux(vec0_vtab *p, i64 rowid) {
  function vec0Update_Delete_ClearMetadata (line 8888) | int vec0Update_Delete_ClearMetadata(vec0_vtab *p, int metadata_idx, i64 ...
  function vec0Update_Delete (line 8965) | int vec0Update_Delete(sqlite3_vtab *pVTab, sqlite3_value *idValue) {
  function vec0Update_UpdateAuxColumn (line 9045) | int vec0Update_UpdateAuxColumn(vec0_vtab *p, int auxiliary_column_idx, s...
  function vec0Update_UpdateVectorColumn (line 9067) | int vec0Update_UpdateVectorColumn(vec0_vtab *p, i64 chunk_id, i64 chunk_...
  function vec0Update_Update (line 9145) | int vec0Update_Update(sqlite3_vtab *pVTab, int argc, sqlite3_value **arg...
  function vec0Update (line 9253) | static int vec0Update(sqlite3_vtab *pVTab, int argc, sqlite3_value **argv,
  function vec0ShadowName (line 9272) | static int vec0ShadowName(const char *zName) {
  function vec0Begin (line 9322) | static int vec0Begin(sqlite3_vtab *pVTab) {
  function vec0Sync (line 9326) | static int vec0Sync(sqlite3_vtab *pVTab) {
  function vec0Commit (line 9351) | static int vec0Commit(sqlite3_vtab *pVTab) {
  function vec0Rollback (line 9355) | static int vec0Rollback(sqlite3_vtab *pVTab) {
  type static_blob_definition (line 9392) | struct static_blob_definition {
  function vec_static_blob_from_raw (line 9398) | static void vec_static_blob_from_raw(sqlite3_context *context, int argc,
  type static_blob (line 9420) | typedef struct static_blob static_blob;
  type static_blob (line 9421) | struct static_blob {
  type vec_static_blob_data (line 9429) | typedef struct vec_static_blob_data vec_static_blob_data;
  type vec_static_blob_data (line 9430) | struct vec_static_blob_data {
  type vec_static_blobs_vtab (line 9434) | typedef struct vec_static_blobs_vtab vec_static_blobs_vtab;
  type vec_static_blobs_vtab (line 9435) | struct vec_static_blobs_vtab {
  type vec_static_blobs_cursor (line 9440) | typedef struct vec_static_blobs_cursor vec_static_blobs_cursor;
  type vec_static_blobs_cursor (line 9441) | struct vec_static_blobs_cursor {
  function vec_static_blobsConnect (line 9446) | static int vec_static_blobsConnect(sqlite3 *db, void *pAux, int argc,
  function vec_static_blobsDisconnect (line 9471) | static int vec_static_blobsDisconnect(sqlite3_vtab *pVtab) {
  function vec_static_blobsUpdate (line 9477) | static int vec_static_blobsUpdate(sqlite3_vtab *pVTab, int argc,
  function vec_static_blobsOpen (line 9515) | static int vec_static_blobsOpen(sqlite3_vtab *p,
  function vec_static_blobsClose (line 9527) | static int vec_static_blobsClose(sqlite3_vtab_cursor *cur) {
  function vec_static_blobsBestIndex (line 9533) | static int vec_static_blobsBestIndex(sqlite3_vtab *pVTab,
  function vec_static_blobsFilter (line 9543) | static int vec_static_blobsFilter(sqlite3_vtab_cursor *pVtabCursor, int ...
  function vec_static_blobsRowid (line 9556) | static int vec_static_blobsRowid(sqlite3_vtab_cursor *cur,
  function vec_static_blobsNext (line 9563) | static int vec_static_blobsNext(sqlite3_vtab_cursor *cur) {
  function vec_static_blobsEof (line 9576) | static int vec_static_blobsEof(sqlite3_vtab_cursor *cur) {
  function vec_static_blobsColumn (line 9581) | static int vec_static_blobsColumn(sqlite3_vtab_cursor *cur,
  type vec_static_blob_entries_vtab (line 9637) | typedef struct vec_static_blob_entries_vtab vec_static_blob_entries_vtab;
  type vec_static_blob_entries_vtab (line 9638) | struct vec_static_blob_entries_vtab {
  type vec_sbe_query_plan (line 9642) | typedef enum {
  type sbe_query_knn_data (line 9647) | struct sbe_query_knn_data {
  function sbe_query_knn_data_clear (line 9656) | void sbe_query_knn_data_clear(struct sbe_query_knn_data *knn_data) {
  type vec_static_blob_entries_cursor (line 9670) | typedef struct vec_static_blob_entries_cursor vec_static_blob_entries_cu...
  type vec_static_blob_entries_cursor (line 9671) | struct vec_static_blob_entries_cursor {
  function vec_static_blob_entriesConnect (line 9678) | static int vec_static_blob_entriesConnect(sqlite3 *db, void *pAux, int a...
  function vec_static_blob_entriesCreate (line 9714) | static int vec_static_blob_entriesCreate(sqlite3 *db, void *pAux, int argc,
  function vec_static_blob_entriesDisconnect (line 9720) | static int vec_static_blob_entriesDisconnect(sqlite3_vtab *pVtab) {
  function vec_static_blob_entriesOpen (line 9726) | static int vec_static_blob_entriesOpen(sqlite3_vtab *p,
  function vec_static_blob_entriesClose (line 9738) | static int vec_static_blob_entriesClose(sqlite3_vtab_cursor *cur) {
  function vec_static_blob_entriesBestIndex (line 9745) | static int vec_static_blob_entriesBestIndex(sqlite3_vtab *pVTab,
  function vec_static_blob_entriesFilter (line 9826) | static int vec_static_blob_entriesFilter(sqlite3_vtab_cursor *pVtabCursor,
  function vec_static_blob_entriesRowid (line 9919) | static int vec_static_blob_entriesRowid(sqlite3_vtab_cursor *cur,
  function vec_static_blob_entriesNext (line 9936) | static int vec_static_blob_entriesNext(sqlite3_vtab_cursor *cur) {
  function vec_static_blob_entriesEof (line 9951) | static int vec_static_blob_entriesEof(sqlite3_vtab_cursor *cur) {
  function vec_static_blob_entriesColumn (line 9966) | static int vec_static_blob_entriesColumn(sqlite3_vtab_cursor *cur,
  function SQLITE_VEC_API (line 10058) | SQLITE_VEC_API int sqlite3_vec_init(sqlite3 *db, char **pzErrMsg,
  function SQLITE_VEC_API (line 10143) | SQLITE_VEC_API int sqlite3_vec_numpy_init(sqlite3 *db, char **pzErrMsg,
  function SQLITE_VEC_API (line 10160) | SQLITE_VEC_API int

FILE: test.sql
  type v (line 13) | create virtual table v using vec0(
  type v (line 56) | create virtual table v using vec0(
  type vec_articles (line 65) | create virtual table vec_articles using vec0(
  type movies (line 84) | create table movies(movie_id integer primary key, synopsis text)
  type vec_chunks (line 202) | create virtual table vec_chunks using vec0(
  type vec_chunks (line 243) | create virtual table vec_chunks using vec0(
  type vec_movies (line 281) | create virtual table vec_movies using vec0(
  type vec_movies (line 319) | create virtual table vec_movies using vec0(
  type vec_chunks (line 331) | create virtual table vec_chunks using vec0(
  type v (line 351) | create virtual table v using vec0(a float[1])
  type v (line 357) | create virtual table v using vec0(

FILE: tests/afbd/test-afbd.py
  function serialize_float32 (line 16) | def serialize_float32(vector: List[float]) -> bytes:
  function build_command (line 21) | def build_command(file_path, metadata_set=None):
  function tests_command (line 111) | def tests_command(file_path):
  function main (line 213) | def main():

FILE: tests/build.rs
  function main (line 1) | fn main() {

FILE: tests/conftest.py
  function db (line 6) | def db():

FILE: tests/correctness/test-correctness.py
  function cosine_similarity (line 7) | def cosine_similarity(
  function distance_l2 (line 15) | def distance_l2(
  function topk (line 21) | def topk(
  function test_all (line 96) | def test_all():

FILE: tests/fuzz.py
  function trace (line 23) | def trace(sql):
  function spread_args (line 27) | def spread_args(n):

FILE: tests/fuzz/exec.c
  function LLVMFuzzerTestOneInput (line 11) | int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {

FILE: tests/fuzz/json.c
  function LLVMFuzzerTestOneInput (line 11) | int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {

FILE: tests/fuzz/metadata-columns.c
  function LLVMFuzzerTestOneInput (line 10) | int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {

FILE: tests/fuzz/numpy.c
  function LLVMFuzzerTestOneInput (line 14) | int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {

FILE: tests/fuzz/scalar-functions.c
  function LLVMFuzzerTestOneInput (line 10) | int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {

FILE: tests/fuzz/shadow-corrupt.c
  function LLVMFuzzerTestOneInput (line 10) | int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {

FILE: tests/fuzz/vec-each.c
  function LLVMFuzzerTestOneInput (line 10) | int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {

FILE: tests/fuzz/vec-mismatch.c
  function bind_valid_vector (line 31) | static void bind_valid_vector(sqlite3_stmt *stmt, int param, int mode) {
  function run_query (line 62) | static void run_query(sqlite3 *db, const char *sql,
  function LLVMFuzzerTestOneInput (line 90) | int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {

FILE: tests/fuzz/vec0-create-full.c
  function LLVMFuzzerTestOneInput (line 10) | int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {

FILE: tests/fuzz/vec0-create.c
  function LLVMFuzzerTestOneInput (line 11) | int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {

FILE: tests/fuzz/vec0-delete-completeness.c
  function LLVMFuzzerTestOneInput (line 10) | int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {

FILE: tests/fuzz/vec0-operations.c
  function LLVMFuzzerTestOneInput (line 10) | int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {

FILE: tests/helpers.py
  function _f32 (line 6) | def _f32(list):
  function _i64 (line 10) | def _i64(list):
  function _int8 (line 14) | def _int8(list):
  function exec (line 18) | def exec(db, sql, parameters=[]):
  function vec0_shadow_table_contents (line 38) | def vec0_shadow_table_contents(db, v, skip_info=True):

FILE: tests/leak-fixtures/knn.sql
  type v (line 9) | create virtual table v using vec0(a float[1], chunk_size=8)

FILE: tests/minimum/demo.c
  function main (line 7) | int main(int argc, char *argv[]) {

FILE: tests/sqlite-vec-internal.h
  type Vec0TokenType (line 19) | enum Vec0TokenType {
  type Vec0Token (line 35) | struct Vec0Token {
  type Vec0Scanner (line 41) | struct Vec0Scanner {
  type Vec0Scanner (line 47) | struct Vec0Scanner
  type Vec0Scanner (line 48) | struct Vec0Scanner
  type Vec0Token (line 48) | struct Vec0Token
  type Vec0Token (line 49) | struct Vec0Token
  type VectorElementType (line 53) | enum VectorElementType {
  type Vec0DistanceMetrics (line 59) | enum Vec0DistanceMetrics {
  type VectorColumnDefinition (line 65) | struct VectorColumnDefinition {
  type VectorColumnDefinition (line 74) | struct VectorColumnDefinition

FILE: tests/test-auxiliary.py
  function test_constructor_limit (line 5) | def test_constructor_limit(db, snapshot):
  function test_normal (line 17) | def test_normal(db, snapshot):
  function test_types (line 38) | def test_types(db, snapshot):
  function test_updates (line 79) | def test_updates(db, snapshot):
  function test_deletes (line 95) | def test_deletes(db, snapshot):
  function test_knn (line 111) | def test_knn(db, snapshot):

FILE: tests/test-general.py
  function test_shadow (line 10) | def test_shadow(db, snapshot):
  function test_info (line 25) | def test_info(db, snapshot):

FILE: tests/test-insert-delete.py
  function test_insert_creates_chunks_and_vectors (line 7) | def test_insert_creates_chunks_and_vectors(db, snapshot):
  function test_insert_auto_rowid (line 34) | def test_insert_auto_rowid(db):
  function test_insert_text_primary_key (line 50) | def test_insert_text_primary_key(db, snapshot):
  function test_delete_clears_validity (line 73) | def test_delete_clears_validity(db):
  function test_insert_delete_reinsert (line 96) | def test_insert_delete_reinsert(db):
  function test_insert_validates_dimensions (line 115) | def test_insert_validates_dimensions(db):
  function test_insert_validates_type (line 133) | def test_insert_validates_type(db):
  function test_info_table_contents (line 147) | def test_info_table_contents(db, snapshot):
  function test_delete_zeroes_rowid_blob (line 156) | def test_delete_zeroes_rowid_blob(db):
  function test_delete_zeroes_vector_blob (line 174) | def test_delete_zeroes_vector_blob(db):
  function test_delete_all_rows_deletes_chunk (line 197) | def test_delete_all_rows_deletes_chunk(db):
  function test_delete_chunk_multiple_chunks (line 224) | def test_delete_chunk_multiple_chunks(db):
  function test_delete_with_metadata_columns (line 246) | def test_delete_with_metadata_columns(db):
  function test_delete_with_auxiliary_columns (line 276) | def test_delete_with_auxiliary_columns(db):
  function test_delete_with_text_primary_key (line 298) | def test_delete_with_text_primary_key(db):
  function test_delete_with_partition_keys (line 326) | def test_delete_with_partition_keys(db):
  function test_delete_int8_vectors (line 365) | def test_delete_int8_vectors(db):
  function test_delete_bit_vectors (line 389) | def test_delete_bit_vectors(db):
  function _file_db (line 411) | def _file_db(tmp_path):
  function test_delete_chunk_shrinks_pages (line 421) | def test_delete_chunk_shrinks_pages(tmp_path):
  function test_delete_one_chunk_of_two_shrinks_pages (line 452) | def test_delete_one_chunk_of_two_shrinks_pages(tmp_path):

FILE: tests/test-knn-distance-constraints.py
  function test_normal (line 5) | def test_normal(db, snapshot):
  class Row (line 42) | class Row:
    method __init__ (line 43) | def __init__(self):
    method __repr__ (line 46) | def __repr__(self) -> str:

FILE: tests/test-loadable.py
  function bitmap_full (line 23) | def bitmap_full(n: int) -> bytearray:
  function bitmap_zerod (line 28) | def bitmap_zerod(n: int) -> bytearray:
  function f32_zerod (line 33) | def f32_zerod(n: int) -> bytearray:
  function _f32 (line 40) | def _f32(list):
  function _i64 (line 44) | def _i64(list):
  function _int8 (line 48) | def _int8(list):
  function bitmap (line 52) | def bitmap(bitstring):
  function connect (line 56) | def connect(ext, path=":memory:", extra_entrypoint=None):
  function explain_query_plan (line 84) | def explain_query_plan(sql, db=db):
  function execute_all (line 88) | def execute_all(cursor, sql, args=None):
  function spread_args (line 95) | def spread_args(args):
  function register_numpy (line 127) | def register_numpy(db, name: str, array):
  function test_vec_static_blob_entries (line 149) | def test_vec_static_blob_entries():
  function test_limits (line 267) | def test_limits():
  function test_funcs (line 281) | def test_funcs():
  function test_modules (line 291) | def test_modules():
  function test_vec_version (line 298) | def test_vec_version():
  function test_vec_debug (line 303) | def test_vec_debug():
  function test_vec_bit (line 309) | def test_vec_bit():
  function test_vec_f32 (line 328) | def test_vec_f32():
  function test_vec_int8 (line 385) | def test_vec_int8():
  function npy_cosine (line 396) | def npy_cosine(a, b):
  function npy_l2 (line 400) | def npy_l2(a, b):
  function test_vec_distance_cosine (line 404) | def test_vec_distance_cosine():
  function test_vec_distance_hamming (line 427) | def test_vec_distance_hamming():
  function test_vec_distance_l1 (line 448) | def test_vec_distance_l1():
  function test_vec_distance_l2 (line 510) | def test_vec_distance_l2():
  function test_vec_length (line 533) | def test_vec_length():
  function test_vec_normalize (line 582) | def test_vec_normalize():
  function test_vec_slice (line 594) | def test_vec_slice():
  function test_vec_type (line 650) | def test_vec_type():
  function test_vec_add (line 666) | def test_vec_add():
  function test_vec_sub (line 694) | def test_vec_sub():
  function test_vec_to_json (line 722) | def test_vec_to_json():
  function test_vec_quantize_int8 (line 735) | def test_vec_quantize_int8():
  function test_vec_quantize_binary (line 742) | def test_vec_quantize_binary():
  function test_vec0 (line 750) | def test_vec0():
  function test_vec0_inserts (line 754) | def test_vec0_inserts():
  function test_vec0_insert_errors2 (line 954) | def test_vec0_insert_errors2():
  function test_vec0_drops (line 1012) | def test_vec0_drops():
  function test_vec0_delete (line 1040) | def test_vec0_delete():
  function test_vec0_delete_errors (line 1173) | def test_vec0_delete_errors():
  function test_vec0_updates (line 1234) | def test_vec0_updates():
  function test_vec0_point (line 1390) | def test_vec0_point():
  function test_vec0_text_pk (line 1424) | def test_vec0_text_pk():
  function test_vec0_best_index (line 1542) | def test_vec0_best_index():
  function authorizer_deny_on (line 1584) | def authorizer_deny_on(operation, x1, x2=None):
  function authorizer_debug (line 1593) | def authorizer_debug(op, p1, p2, p3, p4):
  function _raises (line 1602) | def _raises(message, error=sqlite3.OperationalError):
  function test_vec_each (line 1607) | def test_vec_each():
  function to_npy (line 1624) | def to_npy(arr):
  function test_vec_npy_each (line 1631) | def test_vec_npy_each():
  function test_vec_npy_each_errors (line 1664) | def test_vec_npy_each_errors():
  function test_vec_npy_each_errors_files (line 1783) | def test_vec_npy_each_errors_files():
  function test_vec0_constructor (line 1846) | def test_vec0_constructor():
  function test_vec0_create_errors (line 1926) | def test_vec0_create_errors():
  function test_vec0_knn (line 2003) | def test_vec0_knn():
  function np_distance_l2 (line 2136) | def np_distance_l2(
  function np_topk (line 2142) | def np_topk(
  function test_correctness_npy (line 2158) | def test_correctness_npy():
  function test_smoke (line 2211) | def test_smoke():
  function test_vec0_stress_small_chunks (line 2327) | def test_vec0_stress_small_chunks():
  function test_vec0_distance_metric (line 2419) | def test_vec0_distance_metric():
  function test_vec0_vacuum (line 2471) | def test_vec0_vacuum():
  function rowids_value (line 2480) | def rowids_value(buffer: bytearray) -> List[int]:
  function cosine_similarity (line 2489) | def cosine_similarity(
  function topk (line 2498) | def topk(
  function test_stress1 (line 2513) | def test_stress1():
  function test_stress (line 2542) | def test_stress():
  function test_coverage (line 2564) | def test_coverage():

FILE: tests/test-metadata.py
  function test_constructor_limit (line 8) | def test_constructor_limit(db, snapshot):
  function test_normal (line 20) | def test_normal(db, snapshot):
  function test_text_knn (line 52) | def test_text_knn(db, snapshot):
  function test_long_text_updates (line 127) | def test_long_text_updates(db, snapshot):
  function test_long_text_knn (line 139) | def test_long_text_knn(db, snapshot):
  function test_types (line 172) | def test_types(db, snapshot):
  function test_updates (line 194) | def test_updates(db, snapshot):
  function test_deletes (line 243) | def test_deletes(db, snapshot):
  function test_knn (line 268) | def test_knn(db, snapshot):
  function test_vtab_in (line 296) | def test_vtab_in(db, snapshot):
  function test_vtab_in_long_text (line 342) | def test_vtab_in_long_text(db, snapshot):
  function test_idxstr (line 373) | def test_idxstr(db, snapshot):
  function eqp (line 424) | def eqp(db, sql):
  function test_stress (line 436) | def test_stress(db, snapshot):
  function test_errors (line 576) | def test_errors(db, snapshot):
  function authorizer_deny_on (line 589) | def authorizer_deny_on(operation, x1, x2=None):

FILE: tests/test-partition-keys.py
  function test_constructor_limit (line 5) | def test_constructor_limit(db, snapshot):
  function test_normal (line 21) | def test_normal(db, snapshot):
  function test_types (line 34) | def test_types(db, snapshot):
  function test_updates (line 56) | def test_updates(db, snapshot):
  class Row (line 77) | class Row:
    method __init__ (line 78) | def __init__(self):
    method __repr__ (line 81) | def __repr__(self) -> str:

FILE: tests/test-unit.c
  function test_vec0_token_next (line 15) | void test_vec0_token_next() {
  function test_vec0_scanner (line 136) | void test_vec0_scanner() {
  function test_vec0_parse_vector_column (line 314) | void test_vec0_parse_vector_column() {
  function test_vec0_parse_partition_key_definition (line 511) | void test_vec0_parse_partition_key_definition() {
  function test_distance_l2_sqr_float (line 553) | void test_distance_l2_sqr_float() {
  function test_distance_cosine_float (line 592) | void test_distance_cosine_float() {
  function test_distance_hamming (line 623) | void test_distance_hamming() {
  function main (line 662) | int main() {

FILE: tests/test-wasm.mjs
  function main (line 1) | async function main() {

FILE: tests/unittest.rs
  function main (line 1) | fn main() {
  function _min_idx (line 6) | fn _min_idx(distances: Vec<f32>, k: i32) -> Vec<i32> {
  function _merge_sorted_lists (line 31) | fn _merge_sorted_lists(
  function min_idx (line 65) | fn min_idx(
  function merge_sorted_lists (line 75) | fn merge_sorted_lists(
  function test_basic (line 95) | fn test_basic() {
  function test_merge_sorted_lists (line 110) | fn test_merge_sorted_lists() {

FILE: tests/utils.py
  function to_npy (line 5) | def to_npy(arr):

FILE: tmp-static.py
  function register_np (line 25) | def register_np(array, name):
Condensed preview — 219 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (1,161K chars).
[
  {
    "path": ".github/workflows/fuzz.yaml",
    "chars": 5659,
    "preview": "name: \"Fuzz\"\non:\n  push:\n    branches: [main]\n  schedule:\n    # Nightly at 2am UTC for longer fuzzing sessions\n    - cro"
  },
  {
    "path": ".github/workflows/release.yaml",
    "chars": 13116,
    "preview": "name: \"Release\"\non:\n  release:\n    types: [published]\npermissions:\n  contents: read\njobs:\n  build-linux-x86_64-extension"
  },
  {
    "path": ".github/workflows/site.yaml",
    "chars": 823,
    "preview": "name: Deploy Site\non:\n  workflow_dispatch: {}\n  push:\n    branches:\n      - main\n    paths:\n      - \"site/**\"\n      - \"."
  },
  {
    "path": ".github/workflows/test.yaml",
    "chars": 8676,
    "preview": "name: \"Test\"\non:\n  push:\n    branches:\n      - main\npermissions:\n  contents: read\njobs:\n  build-linux-x86_64-extension:\n"
  },
  {
    "path": ".gitignore",
    "chars": 279,
    "preview": "/target\n.vscode\nsift/\n*.tar.gz\n*.db\n*.npy\n*.bin\n*.out\nvenv/\n\nvendor/\ndist/\n\n*.pyc\n*.db-journal\n\nalexandria/\nopenai/\nexam"
  },
  {
    "path": "ARCHITECTURE.md",
    "chars": 4018,
    "preview": "# `sqlite-vec` Architecture\n\nInternal documentation for how `sqlite-vec` works under-the-hood. Not meant for\nusers of th"
  },
  {
    "path": "LICENSE-APACHE",
    "chars": 10931,
    "preview": "                                 Apache License\n                           Version 2.0, January 2004\n                   "
  },
  {
    "path": "LICENSE-MIT",
    "chars": 1068,
    "preview": "MIT License\n\nCopyright (c) 2024 Alex Garcia\n\nPermission is hereby granted, free of charge, to any person obtaining a cop"
  },
  {
    "path": "Makefile",
    "chars": 8608,
    "preview": "\nCOMMIT=$(shell git rev-parse HEAD)\nVERSION=$(shell cat VERSION)\nDATE=$(shell date +'%FT%TZ%z')\n\nINSTALL_LIB_DIR = /usr/"
  },
  {
    "path": "README.md",
    "chars": 9204,
    "preview": "# `sqlite-vec`\n\n[![](https://dcbadge.vercel.app/api/server/VCtQ8cGhUs)](https://discord.gg/Ve7WeCJFXk)\n\nAn extremely sma"
  },
  {
    "path": "SECURITY.md",
    "chars": 179,
    "preview": "Please report any security vulnerabilities to alexsebastian.garcia@gmail.com . Avould using public Github issues wheneve"
  },
  {
    "path": "TODO",
    "chars": 616,
    "preview": "- [ ] add `xyz_info` shadow table with version etc.\n\n- later\n  - [ ] partition: UPDATE support\n  - [ ] skip invalid vali"
  },
  {
    "path": "VERSION",
    "chars": 13,
    "preview": "0.1.8-alpha.1"
  },
  {
    "path": "benchmarks/README.md",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "benchmarks/exhaustive-memory/.gitignore",
    "chars": 6,
    "preview": "data/\n"
  },
  {
    "path": "benchmarks/exhaustive-memory/Makefile",
    "chars": 329,
    "preview": "\n\n\ndata/:\n\tmkdir -p $@\n\ndata/sift: data/\n\tcurl -o data/sift.tar.gz ftp://ftp.irisa.fr/local/texmex/corpus/sift.tar.gz\n\tt"
  },
  {
    "path": "benchmarks/exhaustive-memory/README.md",
    "chars": 2186,
    "preview": "# `sqlite-vec` In-memory benchmark comparisions\n\nThis repo contains a benchmarks that compares KNN queries of `sqlite-ve"
  },
  {
    "path": "benchmarks/exhaustive-memory/bench.py",
    "chars": 18388,
    "preview": "import numpy as np\nimport numpy.typing as npt\nimport time\nimport sqlite3\nimport pandas as pd\nfrom dataclasses import dat"
  },
  {
    "path": "benchmarks/exhaustive-memory/gist.suite",
    "chars": 211,
    "preview": "@name=gist\n@input=data/gist/gist_base.fvecs\n@queries=data/gist/gist_query.fvecs\n@sample=500000\n@qsample=20\n@k=20\n\nfaiss\n"
  },
  {
    "path": "benchmarks/exhaustive-memory/requirements.txt",
    "chars": 2298,
    "preview": "annotated-types==0.7.0\nanyio==4.4.0\nasgiref==3.8.1\nattrs==23.2.0\nbackoff==2.2.1\nbcrypt==4.2.0\nbuild==1.2.1\ncachetools==5"
  },
  {
    "path": "benchmarks/exhaustive-memory/sift.suite",
    "chars": 371,
    "preview": "@name=sift1m\n@input=data/sift/sift_base.fvecs\n@queries=data/sift/sift_query.fvecs\n@qsample=100\n@k=20\n\nfaiss\nusearch\nduck"
  },
  {
    "path": "benchmarks/micro/.gitignore",
    "chars": 8,
    "preview": "target/\n"
  },
  {
    "path": "benchmarks/micro/Cargo.toml",
    "chars": 286,
    "preview": "[package]\nname = \"micro\"\nversion = \"0.1.0\"\nedition = \"2021\"\n\n[dependencies]\nrusqlite = {version=\"0.31.0\", features=[\"bun"
  },
  {
    "path": "benchmarks/micro/benches/my_benchmark.rs",
    "chars": 3311,
    "preview": "use criterion::{black_box, criterion_group, criterion_main, Criterion};\nuse micro::init_vec;\nuse rand::Rng;\nuse rusqlite"
  },
  {
    "path": "benchmarks/micro/build.rs",
    "chars": 104,
    "preview": "fn main() {\n    cc::Build::new()\n        .file(\"../../sqlite-vec.c\")\n        .compile(\"sqlite_vec0\");\n}\n"
  },
  {
    "path": "benchmarks/micro/src/lib.rs",
    "chars": 252,
    "preview": "use rusqlite::ffi::sqlite3_auto_extension;\n\n#[link(name = \"sqlite_vec0\")]\nextern \"C\" {\n    pub fn sqlite3_vec_init();\n}\n"
  },
  {
    "path": "benchmarks/profiling/build-from-npy.sql",
    "chars": 370,
    "preview": ".timer on\npragma page_size = 32768;\n--pragma page_size = 16384;\n--pragma page_size = 16384;\n--pragma page_size = 4096;\n\n"
  },
  {
    "path": "benchmarks/profiling/query-k.sql",
    "chars": 735,
    "preview": ".timer on\n\nselect rowid, distance\nfrom vec_items\nwhere embedding match (select embedding from vec_items where rowid = 10"
  },
  {
    "path": "benchmarks/self-params/build.py",
    "chars": 2291,
    "preview": "import sqlite3\nimport time\n\n\ndef connect(path):\n    db = sqlite3.connect(path)\n    db.enable_load_extension(True)\n    db"
  },
  {
    "path": "benchmarks/self-params/knn.py",
    "chars": 2160,
    "preview": "import sqlite3\nimport time\nfrom random import randrange\nfrom statistics import mean\n\n\ndef connect(path):\n    print(path)"
  },
  {
    "path": "benchmarks/self-params/test.py",
    "chars": 719,
    "preview": "import sqlite3\nimport time\n\n\ndef connect(path):\n    db = sqlite3.connect(path)\n    db.enable_load_extension(True)\n    db"
  },
  {
    "path": "bindings/go/ncruces/go-sqlite3.patch",
    "chars": 1053,
    "preview": "diff --git a/embed/build.sh b/embed/build.sh\nindex ed2aaec..4cc0b0e 100755\n--- a/embed/build.sh\n+++ b/embed/build.sh\n@@ "
  },
  {
    "path": "bindings/python/extra_init.py",
    "chars": 1404,
    "preview": "from typing import List\nfrom struct import pack\nfrom sqlite3 import Connection\n\n\ndef serialize_float32(vector: List[floa"
  },
  {
    "path": "bindings/rust/.gitignore",
    "chars": 45,
    "preview": "target/\nsqlite-vec.c\nsqlite-vec.h\nCargo.toml\n"
  },
  {
    "path": "bindings/rust/Cargo.toml.tmpl",
    "chars": 454,
    "preview": "[package]\nname = \"sqlite-vec\"\nversion = \"${VERSION}\"\nedition = \"2021\"\nauthors = [\"Alex Garcia <alexsebastian.garcia@gmai"
  },
  {
    "path": "bindings/rust/Makefile",
    "chars": 400,
    "preview": "VERSION=$(shell cat ../../VERSION)\n\ndeps: Cargo.toml sqlite-vec.c sqlite-vec.h sqlite3ext.h sqlite3.h\n\nCargo.toml: ../.."
  },
  {
    "path": "bindings/rust/build.rs",
    "chars": 108,
    "preview": "fn main() {\n    cc::Build::new().file(\"sqlite-vec.c\").define(\"SQLITE_CORE\", None).compile(\"sqlite_vec0\");\n}\n"
  },
  {
    "path": "bindings/rust/src/lib.rs",
    "chars": 589,
    "preview": "#[link(name = \"sqlite_vec0\")]\nextern \"C\" {\n    pub fn sqlite3_vec_init();\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n"
  },
  {
    "path": "examples/nbc-headlines/.gitignore",
    "chars": 26,
    "preview": "*.dylib\n*.so\n*.dll\n*.gguf\n"
  },
  {
    "path": "examples/nbc-headlines/1_scrape.ipynb",
    "chars": 6969,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# NBC News Headlines: Scraper\\n\",\n "
  },
  {
    "path": "examples/nbc-headlines/2_build.ipynb",
    "chars": 31822,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# NBC News Headlines: Building FTS5"
  },
  {
    "path": "examples/nbc-headlines/3_search.ipynb",
    "chars": 82169,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# NBC News Headlines: Exploring Hyb"
  },
  {
    "path": "examples/nbc-headlines/Makefile",
    "chars": 177,
    "preview": "all-MiniLM-L6-v2.e4ce9877.q8_0.gguf:\n\tcurl -L -o $@ https://huggingface.co/asg017/sqlite-lembed-model-examples/resolve/m"
  },
  {
    "path": "examples/nbc-headlines/README.md",
    "chars": 48,
    "preview": "- `headlines-2024.db`\n  - 14.5k rows\n  - 4.4MB\n\n"
  },
  {
    "path": "examples/python-recipes/openai-sample.py",
    "chars": 2496,
    "preview": "# pip install openai sqlite-vec\n\nfrom openai import OpenAI\nimport sqlite3\nimport sqlite_vec\nimport struct\nfrom typing im"
  },
  {
    "path": "examples/simple-bun/.gitignore",
    "chars": 24,
    "preview": "node_modules/\nbun.lockb\n"
  },
  {
    "path": "examples/simple-bun/demo.ts",
    "chars": 1195,
    "preview": "import { Database } from \"bun:sqlite\";\nDatabase.setCustomSQLite(\"/usr/local/opt/sqlite3/lib/libsqlite3.dylib\");\n\nconst d"
  },
  {
    "path": "examples/simple-bun/package.json",
    "chars": 123,
    "preview": "{\n  \"name\": \"simple-bun\",\n  \"module\": \"index.ts\",\n  \"type\": \"module\",\n  \"dependencies\": {\n    \"sqlite-vec\": \"latest\"\n  }"
  },
  {
    "path": "examples/simple-c/.gitignore",
    "chars": 5,
    "preview": "demo\n"
  },
  {
    "path": "examples/simple-c/Makefile",
    "chars": 131,
    "preview": "demo: demo.c\n\tgcc \\\n\t\t-O3 -DSQLITE_CORE \\\n\t\t-I../../ -I../../vendor \\\n\t\tdemo.c ../../sqlite-vec.c ../../vendor/sqlite3.c"
  },
  {
    "path": "examples/simple-c/demo.c",
    "chars": 2386,
    "preview": "#include \"sqlite3.h\"\n#include \"sqlite-vec.h\"\n#include <stdio.h>\n#include <unistd.h>\n#include <assert.h>\n\nint main(int ar"
  },
  {
    "path": "examples/simple-deno/demo.ts",
    "chars": 1239,
    "preview": "import { Database } from \"jsr:@db/sqlite@0.11\";\nimport * as sqliteVec from \"npm:sqlite-vec@0.0.1-alpha.9\";\n\nconst db = n"
  },
  {
    "path": "examples/simple-go-cgo/.gitignore",
    "chars": 5,
    "preview": "demo\n"
  },
  {
    "path": "examples/simple-go-cgo/Makefile",
    "chars": 44,
    "preview": "demo: demo.go go.mod go.sum\n\tgo build -o $@\n"
  },
  {
    "path": "examples/simple-go-cgo/demo.go",
    "chars": 1635,
    "preview": "package main\n\nimport (\n\t\"database/sql\"\n\t\"fmt\"\n\t\"log\"\n\n\tsqlite_vec \"github.com/asg017/sqlite-vec-go-bindings/cgo\"\n\t_ \"git"
  },
  {
    "path": "examples/simple-go-cgo/go.mod",
    "chars": 183,
    "preview": "module github.com/asg017/sqlite-vec/examples/go\n\ngo 1.22.5\n\nrequire github.com/mattn/go-sqlite3 v1.14.22\n\nrequire github"
  },
  {
    "path": "examples/simple-go-cgo/go.sum",
    "chars": 657,
    "preview": "github.com/asg017/sqlite-vec-go-bindings v0.0.1-alpha.36 h1:FMGkKAA7nZL8gr/dvIx1uc54J3v2gbLVa+mLqZDCvjk=\ngithub.com/asg0"
  },
  {
    "path": "examples/simple-go-ncruces/.gitignore",
    "chars": 12,
    "preview": "demo\n*.wasm\n"
  },
  {
    "path": "examples/simple-go-ncruces/Makefile",
    "chars": 66,
    "preview": "\ndemo: demo.go\n\tgo build -o $@ $<\n\nclean:\n\trm demo\n\n.PHONY: clean\n"
  },
  {
    "path": "examples/simple-go-ncruces/demo.go",
    "chars": 1764,
    "preview": "package main\n\nimport (\n\t_ \"embed\"\n\t\"log\"\n\n\tsqlite_vec \"github.com/asg017/sqlite-vec-go-bindings/ncruces\"\n\t\"github.com/nc"
  },
  {
    "path": "examples/simple-go-ncruces/go.mod",
    "chars": 323,
    "preview": "module asg017.com/ex1\n\ngo 1.22.5\n\nrequire (\n\tgithub.com/asg017/sqlite-vec-go-bindings v0.0.1-alpha.37\n\tgithub.com/ncruce"
  },
  {
    "path": "examples/simple-go-ncruces/go.sum",
    "chars": 1116,
    "preview": "github.com/asg017/sqlite-vec-go-bindings v0.0.1-alpha.37 h1:Gz6YkDCs60k5VwbBPKDfAPPeIBcuaN3qriAozAaIIZI=\ngithub.com/asg0"
  },
  {
    "path": "examples/simple-node/.gitignore",
    "chars": 32,
    "preview": "node_modules/\npackage-lock.json\n"
  },
  {
    "path": "examples/simple-node/demo.mjs",
    "chars": 1115,
    "preview": "import * as sqliteVec from \"sqlite-vec\";\nimport Database from \"better-sqlite3\";\n\nconst db = new Database(\":memory:\");\nsq"
  },
  {
    "path": "examples/simple-node/package.json",
    "chars": 302,
    "preview": "{\n  \"name\": \"node\",\n  \"version\": \"1.0.0\",\n  \"description\": \"\",\n  \"main\": \"index.js\",\n  \"scripts\": {\n    \"test\": \"echo \\\""
  },
  {
    "path": "examples/simple-node2/.gitignore",
    "chars": 31,
    "preview": "node_modules/\npackage-lock.json"
  },
  {
    "path": "examples/simple-node2/demo.mjs",
    "chars": 1465,
    "preview": "/**\n * This demo Node.js script shows how you can use sqlite-vec with\n * the new builtin node:sqlite module.\n * Note tha"
  },
  {
    "path": "examples/simple-node2/package.json",
    "chars": 168,
    "preview": "{\n  \"name\": \"simple-node2\",\n  \"version\": \"1.0.0\",\n  \"main\": \"demo.mjs\",\n  \"engines\": {\n    \"node\": \">=23.5.0\"\n  },\n  \"de"
  },
  {
    "path": "examples/simple-node2/tmp.mjs",
    "chars": 367,
    "preview": "import { DatabaseSync } from \"node:sqlite\";\nimport * as sqliteVec from \"sqlite-vec\";\n\nconst db = new DatabaseSync(\":memo"
  },
  {
    "path": "examples/simple-python/.gitignore",
    "chars": 6,
    "preview": ".venv\n"
  },
  {
    "path": "examples/simple-python/demo.py",
    "chars": 1214,
    "preview": "import sqlite3\nimport sqlite_vec\n\nfrom typing import List\nimport struct\n\n\ndef serialize_f32(vector: List[float]) -> byte"
  },
  {
    "path": "examples/simple-python/requirements.txt",
    "chars": 11,
    "preview": "sqlite-vec\n"
  },
  {
    "path": "examples/simple-ruby/.gitignore",
    "chars": 13,
    "preview": "Gemfile.lock\n"
  },
  {
    "path": "examples/simple-ruby/Gemfile",
    "chars": 99,
    "preview": "source 'https://rubygems.org'\n\nruby '>= 3.0'\n\ngem 'sqlite3', '~> 2.0', '>= 2.0.1'\ngem 'sqlite-vec'\n"
  },
  {
    "path": "examples/simple-ruby/demo.rb",
    "chars": 907,
    "preview": "require 'sqlite3'\nrequire 'sqlite_vec'\n\n\ndb = SQLite3::Database.new(':memory:')\ndb.enable_load_extension(true)\nSqliteVec"
  },
  {
    "path": "examples/simple-rust/.gitignore",
    "chars": 19,
    "preview": "target/\nCargo.lock\n"
  },
  {
    "path": "examples/simple-rust/Cargo.toml",
    "chars": 213,
    "preview": "[package]\nname = \"sqlite-vec-demo\"\nedition = \"2021\"\n\n[dependencies]\nsqlite-vec={version=\"0.0.1-alpha.7\"}\nrusqlite = {ver"
  },
  {
    "path": "examples/simple-rust/demo.rs",
    "chars": 1690,
    "preview": "use rusqlite::{ffi::sqlite3_auto_extension, Connection, Result};\nuse sqlite_vec::sqlite3_vec_init;\nuse zerocopy::AsBytes"
  },
  {
    "path": "examples/simple-sqlite/demo.sql",
    "chars": 529,
    "preview": ".load ../../dist/vec0\n.mode table\n.header on\n\nselect sqlite_version(), vec_version();\n\nCREATE VIRTUAL TABLE vec_items US"
  },
  {
    "path": "examples/simple-wasm/index.html",
    "chars": 1569,
    "preview": "<html>\n  <body>\n    <h1>sqlite-vec demo/simple-wasm</h1>\n\n    <div id=\"target\"></div>\n    <script type=\"module\">\n      i"
  },
  {
    "path": "examples/sqlite3-cli/README.md",
    "chars": 438,
    "preview": "# `sqlite-vec` statically compiled in the SQLite CLI\n\nYou can compile your own version of the `sqlite3` CLI with `sqlite"
  },
  {
    "path": "examples/sqlite3-cli/core_init.c",
    "chars": 160,
    "preview": "#include \"sqlite3.h\"\n#include \"sqlite-vec.h\"\n#include <stdio.h>\nint core_init(const char *dummy) {\n  return sqlite3_auto"
  },
  {
    "path": "examples/wasm/README.md",
    "chars": 581,
    "preview": "# `sqlite-vec` statically compiled into WASM builds\n\nYou can compile your own version of SQLite's WASM build with `sqlit"
  },
  {
    "path": "examples/wasm/wasm.c",
    "chars": 167,
    "preview": "#include \"sqlite3.h\"\n#include \"sqlite-vec.h\"\n\nint sqlite3_wasm_extra_init(const char * unused) {\n  return sqlite3_auto_e"
  },
  {
    "path": "reference.yaml",
    "chars": 12244,
    "preview": "sections:\n  constructors:\n    title: Constructors\n    desc: |\n      SQL functions that \"construct\" vectors with differen"
  },
  {
    "path": "scripts/progress.ts",
    "chars": 1003,
    "preview": "const src = Deno.readTextFileSync(\"sqlite-vec.c\");\n\nfunction numOccuranges(rg) {\n  return [...src.matchAll(rg)].length;\n"
  },
  {
    "path": "scripts/publish-release.sh",
    "chars": 614,
    "preview": "#!/bin/bash\n\nset -euo pipefail xtrace\n\nif [[ -n $(git status --porcelain | grep -v VERSION | grep -v sqlite-dist.toml) ]"
  },
  {
    "path": "scripts/vendor.sh",
    "chars": 261,
    "preview": "#!/bin/bash\nmkdir -p vendor\ncurl -o sqlite-amalgamation.zip https://www.sqlite.org/2024/sqlite-amalgamation-3450300.zip\n"
  },
  {
    "path": "site/.gitignore",
    "chars": 30,
    "preview": "node_modules\n.vitepress/cache\n"
  },
  {
    "path": "site/.vitepress/config.mts",
    "chars": 6734,
    "preview": "import { DefaultTheme, defineConfig, HeadConfig } from \"vitepress\";\nimport { readFileSync } from \"node:fs\";\nimport { dir"
  },
  {
    "path": "site/.vitepress/theme/HeroImg.vue",
    "chars": 3853,
    "preview": "<script setup lang=\"ts\"></script>\n\n<template>\n  <div\n    style=\"\n      background: var(--vp-c-default-3);\n      padding:"
  },
  {
    "path": "site/.vitepress/theme/Sponsors.vue",
    "chars": 1755,
    "preview": "<script setup lang=\"ts\">\nimport { computed } from \"vue\";\nimport { VPDocAsideSponsors } from \"vitepress/theme\";\nimport { "
  },
  {
    "path": "site/.vitepress/theme/index.ts",
    "chars": 773,
    "preview": "// https://vitepress.dev/guide/custom-theme\nimport { h } from \"vue\";\nimport type { Theme } from \"vitepress\";\nimport Defa"
  },
  {
    "path": "site/.vitepress/theme/style.css",
    "chars": 5380,
    "preview": "/*@import \"https://code.cdn.mozilla.net/fonts/zilla-slab.css\";*/\n\n@font-face {\n  font-family: \"ZillaSlab-SemiBold\";\n  sr"
  },
  {
    "path": "site/api-reference.md",
    "chars": 16087,
    "preview": "---\noutline: 2\n---\n\n# API Reference\n\nA complete reference to all the SQL scalar functions, table functions, and virtual "
  },
  {
    "path": "site/build-ref.mjs",
    "chars": 4505,
    "preview": "import Database from \"better-sqlite3\";\nimport { load } from \"js-yaml\";\nimport { fileURLToPath } from \"node:url\";\nimport "
  },
  {
    "path": "site/compiling.md",
    "chars": 2808,
    "preview": "<script setup>\nimport { data } from './project.data.ts';\n</script>\n\n# Compiling `sqlite-vec`\n\n`sqlite-vec` is is easy to"
  },
  {
    "path": "site/features/knn.md",
    "chars": 5324,
    "preview": "# KNN queries\n\nThe most common use-case for vectors in databases is for K-nearest-neighbors (KNN) queries.\nYou'll have a"
  },
  {
    "path": "site/features/vec0.md",
    "chars": 8764,
    "preview": "# `vec0` Virtual Table\n\n## Metadata in `vec0` Virtual Tables {#vec0_metadata}\n\nThere are three ways to store non-vector "
  },
  {
    "path": "site/getting-started/installation.md",
    "chars": 1693,
    "preview": "# Installing\n\nYou have several options to include `sqlite-vec` into your projects, including\nPyPi packages for Python, N"
  },
  {
    "path": "site/getting-started/introduction.md",
    "chars": 124,
    "preview": "# Introduction to `sqlite-vec`\n\n## Intro to Vector Databases\n\n## Vector Search in SQLite with `sqlite-vec`\n\n## Getting h"
  },
  {
    "path": "site/guides/arithmetic.md",
    "chars": 64,
    "preview": "# Vector Arithmetic\n\n- `vec_add()`\n- `vec_sub()`\n- `vec_mean()`\n"
  },
  {
    "path": "site/guides/binary-quant.md",
    "chars": 3678,
    "preview": "# Binary Quantization\n\n\"Quantization\" refers to a variety of methods and techniques for reducing the\nsize of vectors in "
  },
  {
    "path": "site/guides/classifiers.md",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "site/guides/hybrid-search.md",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "site/guides/matryoshka.md",
    "chars": 2173,
    "preview": "# Matryoshka (Adaptive-Length) Embeddings\n\nMatryoshka embeddings are a new class of embedding models introduced in the\nT"
  },
  {
    "path": "site/guides/performance.md",
    "chars": 64,
    "preview": "- page_size\n- memory mapping\n- in-memory index\n- chunk_size (?)\n"
  },
  {
    "path": "site/guides/rag.md",
    "chars": 64,
    "preview": "# Retrival Augmented Generation (RAG)\n\n- \"memories\"?\n- chunking\n"
  },
  {
    "path": "site/guides/scalar-quant.md",
    "chars": 1104,
    "preview": "# Scalar Quantization (SQ)\n\n\"Quantization\" refers to a variety of methods and techniques for reducing the\nsize of vector"
  },
  {
    "path": "site/guides/semantic-search.md",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "site/index.md",
    "chars": 1182,
    "preview": "---\n# https://vitepress.dev/reference/default-theme-home-page\nlayout: home\n\nhero:\n  name: \"sqlite-vec\"\n  text: \"\"\n  tagl"
  },
  {
    "path": "site/package.json",
    "chars": 389,
    "preview": "{\n  \"scripts\": {\n    \"ref\": \"node build-ref.mjs\",\n    \"dev\": \"vitepress dev\",\n    \"build\": \"vitepress build\",\n    \"previ"
  },
  {
    "path": "site/project.data.ts",
    "chars": 353,
    "preview": "import { readFileSync } from \"node:fs\";\nimport { dirname, join } from \"node:path\";\nimport { fileURLToPath } from \"node:u"
  },
  {
    "path": "site/sqlite.tmlanguage.json",
    "chars": 29784,
    "preview": "{\n  \"information_for_contributors\": [\n    \"This file has been converted from https://github.com/microsoft/vscode-mssql/b"
  },
  {
    "path": "site/using/android-ios.md",
    "chars": 996,
    "preview": "# `sqlite-vec` on Android and iOS devices\n\n`sqlite-vec` can run on mobile devices like Android and iOS. As of `v0.1.2`, "
  },
  {
    "path": "site/using/c.md",
    "chars": 300,
    "preview": "# Using `sqlite-vec` in C\n\nThe `sqlite-vec` project is a single `sqlite-vec.c` and `sqlite-vec.h` file. They can be vend"
  },
  {
    "path": "site/using/datasette.md",
    "chars": 613,
    "preview": "# Using `sqlite-vec` in Datasette\n\n[![Datasette](https://img.shields.io/pypi/v/datasette-sqlite-vec.svg?color=B6B6D9&lab"
  },
  {
    "path": "site/using/go.md",
    "chars": 3435,
    "preview": "# Using `sqlite-vec` in Go\n\n\n\nThere are two ways you can embed `sqlite-vec` into Go applications: a CGO option\nfor libra"
  },
  {
    "path": "site/using/js.md",
    "chars": 4887,
    "preview": "# Using `sqlite-vec` in Node.js, Deno, and Bun\n\n[![npm](https://img.shields.io/npm/v/sqlite-vec.svg?color=green&logo=nod"
  },
  {
    "path": "site/using/python.md",
    "chars": 4544,
    "preview": "---\ntitle: sqlite-vec in Python\n---\n\n# Using `sqlite-vec` in Python\n\n[![PyPI](https://img.shields.io/pypi/v/sqlite-vec.s"
  },
  {
    "path": "site/using/rqlite.md",
    "chars": 1669,
    "preview": "# Using `sqlite-vec` in rqlite\n\n[rqlite](https://rqlite.io/) users can use `sqlite-vec` with rqlite by loading the exten"
  },
  {
    "path": "site/using/ruby.md",
    "chars": 1057,
    "preview": "# Using `sqlite-vec` in Ruby\n\n![Gem](https://img.shields.io/gem/v/sqlite-vec?color=red&logo=rubygems&logoColor=white)\n\nR"
  },
  {
    "path": "site/using/rust.md",
    "chars": 2391,
    "preview": "# Using `sqlite-vec` in Rust\n[![Crates.io](https://img.shields.io/crates/v/sqlite-vec?logo=rust)](https://crates.io/crat"
  },
  {
    "path": "site/using/sqlite-utils.md",
    "chars": 414,
    "preview": "# Using `sqlite-vec` in `sqlite-utils`\n\n![sqlite-utils](https://img.shields.io/pypi/v/sqlite-utils-sqlite-vec.svg?color="
  },
  {
    "path": "site/using/wasm.md",
    "chars": 1354,
    "preview": "# `sqlite-vec` in the Browser with WebAssembly\n\n`sqlite-vec` can be statically compiled into [official SQLite WASM](http"
  },
  {
    "path": "site/versioning.md",
    "chars": 1158,
    "preview": "# Semantic Versioning for `sqlite-vec`\n\n`sqlite-vec` is pre-v1, so according to the rules of\n[Semantic Versioning](https"
  },
  {
    "path": "sqlite-dist.toml",
    "chars": 504,
    "preview": "[package]\nname = \"sqlite-vec\"\nlicense = \"MIT OR Apache\"\nhomepage = \"https://alexgarcia.xyz/sqlite-vec\"\nrepo = \"https://g"
  },
  {
    "path": "sqlite-vec.c",
    "chars": 319629,
    "preview": "#include \"sqlite-vec.h\"\n\n#include <assert.h>\n#include <errno.h>\n#include <float.h>\n#include <inttypes.h>\n#include <limit"
  },
  {
    "path": "sqlite-vec.h.tmpl",
    "chars": 831,
    "preview": "#ifndef SQLITE_VEC_H\n#define SQLITE_VEC_H\n\n#ifndef SQLITE_CORE\n#include \"sqlite3ext.h\"\n#else\n#include \"sqlite3.h\"\n#endif"
  },
  {
    "path": "test.sql",
    "chars": 14742,
    "preview": "\n.load dist/vec0main\n.bail on\n\n.mode qbox\n\n\n.load ./memstat\n.echo on\n\nselect name, value from sqlite_memstat where name "
  },
  {
    "path": "tests/.gitignore",
    "chars": 8,
    "preview": "target/\n"
  },
  {
    "path": "tests/.python-version",
    "chars": 5,
    "preview": "3.12\n"
  },
  {
    "path": "tests/Cargo.toml",
    "chars": 159,
    "preview": "[package]\nname = \"tests\"\nversion = \"0.1.0\"\nedition = \"2021\"\n\n[dependencies]\n\n[build-dependencies]\ncc = \"1.0\"\n\n[[bin]]\nna"
  },
  {
    "path": "tests/__snapshots__/test-auxiliary.ambr",
    "chars": 16767,
    "preview": "# serializer version: 1\n# name: test_constructor_limit[max 16 auxiliary columns]\n  dict({\n    'error': 'OperationalError"
  },
  {
    "path": "tests/__snapshots__/test-general.ambr",
    "chars": 5022,
    "preview": "# serializer version: 1\n# name: test_info\n  OrderedDict({\n    'sql': 'select key, typeof(value) from v_info order by 1',"
  },
  {
    "path": "tests/__snapshots__/test-insert-delete.ambr",
    "chars": 1078,
    "preview": "# serializer version: 1\n# name: test_info_table_contents\n  OrderedDict({\n    'sql': \"select key, value from v_info where"
  },
  {
    "path": "tests/__snapshots__/test-knn-distance-constraints.ambr",
    "chars": 5740,
    "preview": "# serializer version: 1\n# name: test_normal\n  OrderedDict({\n    'sql': 'SELECT * FROM v',\n    'rows': list([\n      Order"
  },
  {
    "path": "tests/__snapshots__/test-metadata.ambr",
    "chars": 116121,
    "preview": "# serializer version: 1\n# name: test_constructor_limit[max 16 metadata columns]\n  dict({\n    'error': 'OperationalError'"
  },
  {
    "path": "tests/__snapshots__/test-partition-keys.ambr",
    "chars": 7316,
    "preview": "# serializer version: 1\n# name: test_constructor_limit[max 4 partition keys]\n  dict({\n    'error': 'OperationalError',\n "
  },
  {
    "path": "tests/afbd/.gitignore",
    "chars": 6,
    "preview": "*.tgz\n"
  },
  {
    "path": "tests/afbd/.python-version",
    "chars": 5,
    "preview": "3.12\n"
  },
  {
    "path": "tests/afbd/Makefile",
    "chars": 421,
    "preview": "random_ints_1m.tgz:\n\tcurl -o $@ https://storage.googleapis.com/ann-filtered-benchmark/datasets/random_ints_1m.tgz\n\nrando"
  },
  {
    "path": "tests/afbd/README.md",
    "chars": 441,
    "preview": "\n# hnm\n\n```\ntar -xOzf hnm.tgz ./tests.jsonl  > tests.jsonl\nsolite q \"select group_concat(distinct key) from lines_read('"
  },
  {
    "path": "tests/afbd/test-afbd.py",
    "chars": 8375,
    "preview": "import numpy as np\nfrom tqdm import tqdm\nfrom deepdiff import DeepDiff\n\nimport tarfile\nimport json\nfrom io import BytesI"
  },
  {
    "path": "tests/build.rs",
    "chars": 368,
    "preview": "fn main() {\n    cc::Build::new()\n        .file(\"../sqlite-vec.c\")\n        .file(\"../vendor/sqlite3.c\")\n        .define(\""
  },
  {
    "path": "tests/conftest.py",
    "chars": 249,
    "preview": "import pytest\nimport sqlite3\n\n\n@pytest.fixture()\ndef db():\n    db = sqlite3.connect(\":memory:\")\n    db.row_factory = sql"
  },
  {
    "path": "tests/correctness/build.py",
    "chars": 374,
    "preview": "import numpy as np\nimport duckdb\ndb = duckdb.connect(\":memory:\")\n\nresult = db.execute(\n\"\"\"\n  select\n    -- _id,\n    -- t"
  },
  {
    "path": "tests/correctness/test-correctness.py",
    "chars": 3420,
    "preview": "import numpy as np\nimport numpy.typing as npt\nimport time\nimport tqdm\nimport pytest\n\ndef cosine_similarity(\n    vec: npt"
  },
  {
    "path": "tests/fuzz/.gitignore",
    "chars": 16,
    "preview": "*.dSYM\ntargets/\n"
  },
  {
    "path": "tests/fuzz/Makefile",
    "chars": 3085,
    "preview": "# Auto-detect clang with libFuzzer support.\n# Priority: Homebrew LLVM (macOS ARM) → Homebrew LLVM (macOS Intel) →\n#     "
  },
  {
    "path": "tests/fuzz/README.md",
    "chars": 370,
    "preview": "```\nASAN_OPTIONS=detect_leaks=1 ./targets/vec0_create \\\n  -dict=./vec0-create.dict -max_total_time=5 \\\n  ./corpus/vec0-c"
  },
  {
    "path": "tests/fuzz/TODO.md",
    "chars": 1874,
    "preview": "# Fuzz Testing TODO: Undefined Behavior Findings\n\nUBSAN findings from fuzz targets. None are crash-level bugs, but all a"
  },
  {
    "path": "tests/fuzz/corpus/exec/select1",
    "chars": 8,
    "preview": "SELECT 1"
  },
  {
    "path": "tests/fuzz/corpus/exec/vec_version",
    "chars": 20,
    "preview": "SELECT vec_version()"
  },
  {
    "path": "tests/fuzz/corpus/json/empty",
    "chars": 2,
    "preview": "[]"
  },
  {
    "path": "tests/fuzz/corpus/json/valid_2d",
    "chars": 11,
    "preview": "[0.5, -0.5]"
  },
  {
    "path": "tests/fuzz/corpus/json/valid_4d",
    "chars": 20,
    "preview": "[1.0, 2.0, 3.0, 4.0]"
  },
  {
    "path": "tests/fuzz/corpus/vec-mismatch/json_1d_blob_5byte",
    "chars": 6,
    "preview": "\fABCDE"
  },
  {
    "path": "tests/fuzz/corpus/vec-mismatch/json_2d_blob_3byte",
    "chars": 4,
    "preview": "\u0006ABC"
  },
  {
    "path": "tests/fuzz/corpus/vec0-create/normal1",
    "chars": 14,
    "preview": "aaa float[12]\n"
  },
  {
    "path": "tests/fuzz/corpus/vec0-create/normal2",
    "chars": 27,
    "preview": "aaa float[12], bbb int8[6]\n"
  },
  {
    "path": "tests/fuzz/exec.c",
    "chars": 626,
    "preview": "#include <stdint.h>\n#include <stddef.h>\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include \"sqlite-vec"
  },
  {
    "path": "tests/fuzz/exec.dict",
    "chars": 326,
    "preview": "select=\"select\"\nfrom=\"from\"\ncname1=\"aaa\"\ncname1=\"bbb\"\ncname1=\"ccc\"\ntype1=\"float\"\ntype2=\"int8\"\ntype3=\"bit\"\nlparen=\"[\"\nrpa"
  },
  {
    "path": "tests/fuzz/json.c",
    "chars": 769,
    "preview": "#include <stdint.h>\n#include <stddef.h>\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include \"sqlite-vec"
  },
  {
    "path": "tests/fuzz/metadata-columns.c",
    "chars": 3625,
    "preview": "#include <stdint.h>\n#include <stddef.h>\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include \"sqlite-vec."
  },
  {
    "path": "tests/fuzz/numpy.c",
    "chars": 947,
    "preview": "#include <stdint.h>\n#include <stddef.h>\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include \"sqlite-vec"
  },
  {
    "path": "tests/fuzz/numpy.dict",
    "chars": 79,
    "preview": "magic=\"\\x93NUMPY\"\nlparen=\"(\"\nrparen=\")\"\nlbrace=\"{\"\nrbrace=\"}\"\nsq1=\"\\\"\"\nsq2=\"'\"\n"
  },
  {
    "path": "tests/fuzz/scalar-functions.c",
    "chars": 2851,
    "preview": "#include <stdint.h>\n#include <stddef.h>\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include \"sqlite-vec."
  },
  {
    "path": "tests/fuzz/scalar-functions.dict",
    "chars": 163,
    "preview": "json_vec1=\"[1.0, 2.0, 3.0, 4.0]\"\njson_vec2=\"[0.5, -0.5]\"\njson_empty=\"[]\"\njson_nan=\"[NaN]\"\njson_inf=\"[Infinity]\"\njson_lar"
  },
  {
    "path": "tests/fuzz/shadow-corrupt.c",
    "chars": 3589,
    "preview": "#include <stdint.h>\n#include <stddef.h>\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include \"sqlite-vec."
  },
  {
    "path": "tests/fuzz/targets/.gitignore",
    "chars": 14,
    "preview": "*\n!.gitignore\n"
  },
  {
    "path": "tests/fuzz/vec-each.c",
    "chars": 758,
    "preview": "#include <stdint.h>\n#include <stddef.h>\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include \"sqlite-vec."
  },
  {
    "path": "tests/fuzz/vec-mismatch.c",
    "chars": 6322,
    "preview": "#include <stdint.h>\n#include <stddef.h>\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include \"sqlite-vec."
  },
  {
    "path": "tests/fuzz/vec0-create-full.c",
    "chars": 1284,
    "preview": "#include <stdint.h>\n#include <stddef.h>\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include \"sqlite-vec."
  },
  {
    "path": "tests/fuzz/vec0-create.c",
    "chars": 875,
    "preview": "#include <stdint.h>\n#include <stddef.h>\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include \"sqlite-vec"
  },
  {
    "path": "tests/fuzz/vec0-create.dict",
    "chars": 220,
    "preview": "cname1=\"aaa\"\ncname1=\"bbb\"\ncname1=\"ccc\"\ntype1=\"float\"\ntype2=\"int8\"\ntype3=\"bit\"\nlparen=\"[\"\nrparen=\"]\"\npk=\"primary key\"\ntex"
  },
  {
    "path": "tests/fuzz/vec0-delete-completeness.c",
    "chars": 3113,
    "preview": "#include <stdint.h>\n#include <stddef.h>\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include \"sqlite-vec."
  },
  {
    "path": "tests/fuzz/vec0-operations.c",
    "chars": 2729,
    "preview": "#include <stdint.h>\n#include <stddef.h>\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include \"sqlite-vec."
  },
  {
    "path": "tests/fuzz.py",
    "chars": 1059,
    "preview": "import sqlite3\nEXT_PATH = \"dist/vec0\"\ndb = sqlite3.connect(\":memory:\")\n\ndb.execute(\n    \"create temp table base_function"
  },
  {
    "path": "tests/helpers.py",
    "chars": 1206,
    "preview": "import sqlite3\nimport struct\nfrom collections import OrderedDict\n\n\ndef _f32(list):\n    return struct.pack(\"%sf\" % len(li"
  },
  {
    "path": "tests/leak-fixtures/each.sql",
    "chars": 220,
    "preview": ".load dist/vec0\n.mode box\n.header on\n.eqp on\n.echo on\n\nselect sqlite_version(), vec_version();\n\nselect * from vec_each('"
  },
  {
    "path": "tests/leak-fixtures/knn.sql",
    "chars": 802,
    "preview": ".load dist/vec0\n.mode box\n.header on\n.eqp on\n.echo on\n\nselect sqlite_version(), vec_version();\n\ncreate virtual table v u"
  },
  {
    "path": "tests/leak-fixtures/vec0-create.sql",
    "chars": 93,
    "preview": ".load dist/vec0\n.mode box\n.header on\n.eqp on\n.echo on\n\ncreate virtual table v using vec0(y);\n"
  },
  {
    "path": "tests/minimum/.gitignore",
    "chars": 6,
    "preview": "dist/\n"
  },
  {
    "path": "tests/minimum/Makefile",
    "chars": 1032,
    "preview": "dist/.stammp:\n\tmkdir -p dist\n\ttouch $@\n\ndist/sqlite-amalgamation-3310100/.stamp: dist/.stammp\n\trm -rf dist/sqlite-amalga"
  },
  {
    "path": "tests/minimum/demo.c",
    "chars": 756,
    "preview": "#include \"sqlite3.h\"\n#include \"sqlite-vec.h\"\n#include <stdio.h>\n#include <unistd.h>\n#include <assert.h>\n\nint main(int ar"
  },
  {
    "path": "tests/pyproject.toml",
    "chars": 182,
    "preview": "[project]\nname = \"tests\"\nversion = \"0.1.0\"\ndescription = \"Add your description here\"\nreadme = \"README.md\"\nrequires-pytho"
  },
  {
    "path": "tests/skip.test-correctness.py",
    "chars": 1157,
    "preview": "import sqlite3\nimport json\n\ndb = sqlite3.connect(\"test2.db\")\ndb.enable_load_extension(True)\ndb.load_extension(\"dist/vec0"
  },
  {
    "path": "tests/sqlite-vec-internal.h",
    "chars": 2275,
    "preview": "#ifndef SQLITE_VEC_INTERNAL_H\n#define SQLITE_VEC_INTERNAL_H\n\n#include <stdlib.h>\n#include <stddef.h>\n\nint min_idx(\n  con"
  },
  {
    "path": "tests/test-auxiliary.py",
    "chars": 4348,
    "preview": "import sqlite3\nfrom helpers import exec, vec0_shadow_table_contents\n\n\ndef test_constructor_limit(db, snapshot):\n    asse"
  },
  {
    "path": "tests/test-general.py",
    "chars": 894,
    "preview": "import sqlite3\nimport pytest\nfrom helpers import exec\n\n\n@pytest.mark.skipif(\n    sqlite3.sqlite_version_info[1] < 37,\n  "
  },
  {
    "path": "tests/test-insert-delete.py",
    "chars": 16183,
    "preview": "import sqlite3\nimport struct\nimport pytest\nfrom helpers import _f32, _i64, _int8, exec\n\n\ndef test_insert_creates_chunks_"
  },
  {
    "path": "tests/test-knn-distance-constraints.py",
    "chars": 1623,
    "preview": "import sqlite3\nfrom helpers import exec\n\n\ndef test_normal(db, snapshot):\n    db.execute(\"create virtual table v using ve"
  },
  {
    "path": "tests/test-loadable.py",
    "chars": 87298,
    "preview": "# ruff: noqa: E731\n\nimport re\nfrom typing import List\nimport sqlite3\nimport unittest\nfrom random import random\nimport st"
  },
  {
    "path": "tests/test-metadata.py",
    "chars": 20600,
    "preview": "import pytest\nimport sqlite3\nfrom collections import OrderedDict\nimport json\nfrom helpers import exec, vec0_shadow_table"
  },
  {
    "path": "tests/test-partition-keys.py",
    "chars": 2512,
    "preview": "import sqlite3\nfrom helpers import exec, vec0_shadow_table_contents\n\n\ndef test_constructor_limit(db, snapshot):\n    asse"
  },
  {
    "path": "tests/test-unit.c",
    "chars": 20702,
    "preview": "#include \"../sqlite-vec.h\"\n#include \"sqlite-vec-internal.h\"\n#include <stdio.h>\n#include <string.h>\n#include <assert.h>\n#"
  },
  {
    "path": "tests/test-wasm.mjs",
    "chars": 265,
    "preview": "async function main() {\n  const { default: init } = await import(\"../dist/.wasm/sqlite3.mjs\");\n  const sqlite3 = await i"
  },
  {
    "path": "tests/unittest.rs",
    "chars": 5462,
    "preview": "fn main() {\n    println!(\"Hello, world!\");\n    println!(\"{:?}\", _min_idx(vec![3.0, 2.0, 1.0, f32::MAX, f32::MAX, f32::MA"
  },
  {
    "path": "tests/utils.py",
    "chars": 462,
    "preview": "import numpy as np\nfrom io import BytesIO\n\n\ndef to_npy(arr):\n    buf = BytesIO()\n    np.save(buf, arr)\n    buf.seek(0)\n "
  }
]

// ... and 19 more files (download for full content)

About this extraction

This page contains the full source code of the asg017/sqlite-vec GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 219 files (1.0 MB), approximately 339.6k tokens, and a symbol index with 541 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Copied to clipboard!