Repository: darrenldl/docfd
Branch: main
Commit: 203de25de255
Files: 125
Total size: 618.6 KB

Directory structure:
gitextract_8iyswmwt/

├── .gitattributes
├── .github/
│   └── workflows/
│       └── deploy.yml
├── .gitignore
├── CHANGELOG.md
├── LICENSE
├── Makefile
├── README.md
├── bin/
│   ├── BLAKE2B.ml
│   ├── UI.ml
│   ├── UI_base.ml
│   ├── args.ml
│   ├── clipboard.ml
│   ├── command.ml
│   ├── content_and_search_result_rendering.ml
│   ├── debug_utils.ml
│   ├── docfd.ml
│   ├── document.ml
│   ├── document.mli
│   ├── document_pipeline.ml
│   ├── document_pipeline.mli
│   ├── document_src.ml
│   ├── dune
│   ├── file_utils.ml
│   ├── filter_exp.ml
│   ├── glob.ml
│   ├── glob.mli
│   ├── lock_protected_cell.ml
│   ├── lock_protected_cell.mli
│   ├── misc_utils.ml
│   ├── params.ml
│   ├── path_opening.ml
│   ├── ping.ml
│   ├── ping.mli
│   ├── printers.ml
│   ├── proc_utils.ml
│   ├── result_syntax.ml
│   ├── script.ml
│   ├── search_mode.ml
│   ├── session.ml
│   ├── session.mli
│   ├── session_manager.ml
│   ├── session_manager.mli
│   ├── string_utils.ml
│   ├── version_string.ml
│   └── xdg_utils.ml
├── containers/
│   ├── Containerfile.demo-vhs
│   └── Containerfile.docfd
├── demo-vhs-tapes/
│   ├── repo-non-interactive.tape
│   ├── repo.tape
│   └── ui-screenshot.tape
├── demo-vhs.sh
├── docfd.opam
├── docfd.opam.locked
├── docfd.opam.template
├── dune-project
├── file-collection-tests.t/
│   ├── dune
│   └── run.t
├── lib/
│   ├── GZIP.ml
│   ├── char_map.ml
│   ├── doc_id_db.ml
│   ├── doc_id_db.mli
│   ├── docfd_lib.ml
│   ├── dune
│   ├── index.ml
│   ├── index.mli
│   ├── int_map.ml
│   ├── int_set.ml
│   ├── link.ml
│   ├── misc_utils.ml
│   ├── option_syntax.ml
│   ├── params.ml
│   ├── parser_components.ml
│   ├── search_exp.ml
│   ├── search_exp.mli
│   ├── search_phrase.ml
│   ├── search_phrase.mli
│   ├── search_result.ml
│   ├── search_result.mli
│   ├── search_result_heap.ml
│   ├── sqlite3_utils.ml
│   ├── stop_signal.ml
│   ├── stop_signal.mli
│   ├── string_map.ml
│   ├── string_set.ml
│   ├── task_pool.ml
│   ├── task_pool.mli
│   ├── tokenization.ml
│   ├── word_db.ml
│   └── word_db.mli
├── line-wrapping-tests.t/
│   ├── dune
│   ├── long-words.txt
│   ├── run.t
│   ├── sentences.txt
│   └── words.txt
├── match-type-tests.t/
│   ├── dune
│   ├── run.t
│   └── test.txt
├── misc-behavior-tests.t/
│   ├── abcd.txt
│   ├── dune
│   └── run.t
├── non-interactive-mode-return-code-tests.t/
│   ├── dune
│   └── run.t
├── open-with-tests.t/
│   ├── dune
│   └── run.t
├── printing-tests.t/
│   ├── empty.txt
│   ├── run.t
│   ├── test0.txt
│   ├── test1.txt
│   ├── test2.txt
│   ├── test3.txt
│   └── test4.txt
├── profiling/
│   ├── dune
│   └── main.ml
├── publish.sh
├── run-container.sh
├── script-tests.t/
│   ├── dune
│   └── run.t
├── search-scope-narrowing-tests.t/
│   ├── dune
│   └── run.t
├── tests/
│   ├── dune
│   ├── main.ml
│   ├── search_exp_tests.ml
│   ├── test_utils.ml
│   └── utils_tests.ml
└── update-version-string.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitattributes
================================================
*.t/run.t linguist-vendored


================================================
FILE: .github/workflows/deploy.yml
================================================
name: Deploy on release

on:
  push:
    tags:
      - "[0-9]*"
      - "test*"
    branches:
      - "ci-test"

jobs:
  build:
    strategy:
      fail-fast: false
      matrix:
        os:
          - ubuntu-22.04
          - ubuntu-22.04-arm
          - macos-latest

    runs-on: ${{ matrix.os }}

    steps:
      - name: Checkout code
        uses: actions/checkout@v4

      - run: echo "GITHUB_TAG=$(git describe --always --tags)" >> $GITHUB_ENV

      - if: ${{ startsWith(matrix.os, 'ubuntu') && !endsWith(matrix.os, 'arm') }}
        run: echo "OS_SHORT_NAME=linux" >> $GITHUB_ENV

      - if: ${{ startsWith(matrix.os, 'ubuntu') && endsWith(matrix.os, 'arm') }}
        run: echo "OS_SHORT_NAME=linux-arm" >> $GITHUB_ENV

      - if: ${{ startsWith(matrix.os, 'macos') }}
        run: echo "OS_SHORT_NAME=macos" >> $GITHUB_ENV

      - if: ${{ startsWith(matrix.os, 'windows') }}
        run: echo "OS_SHORT_NAME=windows" >> $GITHUB_ENV

      - name: Set up OCaml for Linux
        uses: ocaml/setup-ocaml@v3
        with:
          ocaml-compiler: "5.2.1"

      - run: opam install dune

      - run: opam install . --deps-only --with-test

      - name: Use commit hash as version if on ci-test branch
        if: ${{ github.ref_name == 'ci-test' }}
        run: |
          echo "DOCFD_VERSION_OVERRIDE=${{ env.GITHUB_TAG }}" >> $GITHUB_ENV

      - name: Create build for macOS
        if: ${{ env.OS_SHORT_NAME == 'macos' }}
        run: |
          export DOCFD_VERSION_OVERRIDE=${{ env.GITHUB_TAG }}
          opam exec -- make release-build

      - name: Create static build for Linux
        if: ${{ env.OS_SHORT_NAME == 'linux' }}
        run: |
          export DOCFD_VERSION_OVERRIDE=${{ env.GITHUB_TAG }}
          opam exec -- make release-static-build

      - name: Create static build for Linux ARM
        if: ${{ env.OS_SHORT_NAME == 'linux-arm' }}
        run: |
          export DOCFD_VERSION_OVERRIDE=${{ env.GITHUB_TAG }}
          opam exec -- make release-static-build-arm

      - name: Package into tar.gz
        run: |
          mv release/docfd docfd
          tar -cvzf docfd-${{ env.GITHUB_TAG }}-${{ env.OS_SHORT_NAME }}.tar.gz docfd

      - name: Upload artifacts
        if: ${{ github.ref_name == 'ci-test' }}
        uses: actions/upload-artifact@v4
        with:
          name: docfd-${{ env.GITHUB_TAG }}-${{ env.OS_SHORT_NAME }}.tar.gz
          path: docfd-${{ env.GITHUB_TAG }}-${{ env.OS_SHORT_NAME }}.tar.gz

      - name: Release
        if: ${{ github.ref_name != 'ci-test' }}
        uses: softprops/action-gh-release@v2
        with:
          files: |
            docfd-${{ env.GITHUB_TAG }}-${{ env.OS_SHORT_NAME }}.tar.gz

      - name: Release preview
        if: ${{ github.ref_name == 'ci-test' }}
        uses: softprops/action-gh-release@v2
        with:
          tag_name: preview
          name: "Preview Build"
          body: "Automated preview build from commit ${{ github.sha }}. This release is updated on every push to ci-test."
          prerelease: true
          files: |
            docfd-${{ env.GITHUB_TAG }}-${{ env.OS_SHORT_NAME }}.tar.gz


================================================
FILE: .gitignore
================================================
_build/
_coverage/
.merlin
*.rst~
*.install
bisect*.out
bisect*.coverage
fuzz-*-input
fuzz-*-output
fuzz-logs/
/test*.md
/test*.txt
test*.pdf
test*.docx
*.tar.gz
/release/
perf.data*
*.pdf
.cache
/*.log
*.mp4
dummy.gif
*.db
*.db-journal


================================================
FILE: CHANGELOG.md
================================================
# Changelog

## 13.0.0

- Removed fzf dependency entirely
    - Switched from fzf to a built-in implementation for fuzzy selection
      menus
    - Replaced "sort by fzf" functionality with built-in PATH-FUZZY-RANK mode
- Renamed OPEN-SCRIPT to SCRIPTS
- Moved DELETE-SCRIPT functionality to `Ctrl`+`X` under SCRIPTS
- Added `y` key binding to copy in LINKS
- Changed key binding for rotation of key binding info grid from `?` to `<` and `>`
- Added "focus content" via `z` which hides the bottom right pane
- Added key binding info pane toggle via `?`
- Made the UX of various input fields slightly more polished
    - This includes adding `Esc` for cancelling during SAVE-SCRIPT
- Added request handling debouncing to reduce pointless workload triggered during
  fast typing
- Improved link extraction
- Minor UI fixes and polishes

## 12.3.2

- Fixed missing stop signal passing for `content:"..."` filter expression handling
    - Previously, filter expressions with `content:"..."` were not
      cancelled properly
- Added additional guards against potential freezing due to DB pool
  exhaustion or DB connection issues

## 12.3.1

- Fixed key binding info grid
    - Added the missing `l` key binding info
    - Fixed some other key binding labels

## 12.3.0

**Note**: This update contains a feature that requires removing the existing index DB to take effect.

- Docfd script comment support improvement
    - Added `;` prefix for system comments, which are not preserved after editing of command history
    - `#` now denotes user comment and is preserved after editing of command history
    - Ordering of `#` is also preserved during saving a session as script
- Added link opening support via LINKS mode
    - This will **only** work after recreating the index DB
    - `l` opens LINKS mode with the same navigation keybinds
        - `Enter` to open link
        - `o` to open link and remain in LINKS
        - Links which are closest to the selected search result will be prioritized first
- Added key binding `xh` to clear command history quickly

## 12.2.0

- Dependencies adjustment for CI build
- Internal refactoring
    - Refactored document store module into session state module to
      better reflect that the data structure is capturing not just
      documents and search results, but also some UI states, etc
- Moved screen split handling to the level of session state instead of
  plain UI state
    - This allows Docfd script to better capture the view on screen
- Added `Shift`+`Tab` for changing screen split ratio in the other
  direction, and removed the "rotating" behaviour of `Tab`

## 12.1.0

- Added missing sorting based on paths when path dates are the same
- Added PDF viewer integration for Zathura on Linux
- Moved sorting handling to the level of document store command instead of just plain UI update
    - This allows Docfd script to better capture the view on screen
- Fixed default cache directory location on macOS
    - Changed from `~/Library/Application Support` to `~/Library/Caches`
- Added `--reverse` to fzf invocation for better UX
- Added `Ctrl`+`D` for script deletion

## 12.0.0

This contains a **breaking** DB change, you will need to remove index DB generated by Docfd version prior to 12.0.0-alpha.13

#### Highlights of changes since 11.0.1

- Moved to using a global word table to reduce index DB size and speed up search (12.0.0-alpha.13)

    - This is the **breaking** DB change stated above

- Added further search speed optimizations (12.0.0)

    - Added an additional document pruning stage
    - Added a first word candidate pruning stage based on length of the first search word
        - Searching for short words should now feel much more responsive

- Replaced filter glob with a more powerful filter language, with
  autocomplete in filter field (12.0.0-alpha.1, 12.0.0-alpha.2,
  12.0.0-alpha.5, 12.0.0-alpha.6, 12.0.0-alpha.10, 12.0.0-alpha.11)

- Added content view pane scrolling (12.0.0-alpha.5, 12.0.0-alpha.8)

    - Controlled by `-`/`=`

- Added "save script" and "load script" functionality to make it
  actually viable to reuse Docfd commands (12.0.0-alpha.8,
  12.0.0-alpha.9)

- SQL query optimizations for prefix and exact search terms
  (12.0.0-alpha.3)

- Key binding info grid improvements (12.0.0-alpha.4)

    - Added more key bindings

    - Packed columns more tightly

- Added `--paths-from -` to accept list of paths from stdin
  (12.0.0-alpha.3)

- Added WSL clipboard integration (12.0.0-alpha.4)

- Added more marking key bindings (12.0.0-alpha.4)

    - `mark listed` (`ml`) marks all currently listed documents
    - `unmark listed` (`Ml`) unmarks all currently listed documents

- `--open-with` placeholder handling fixes (12.0.0-alpha.4)

    - Using `{page_num}` and `{line_num}` crashes in 11.0.1
      when there are no search results

- Added sorting to document list (12.0.0-alpha.11, 12.0.0)

    - `s` for sort ascending mode and `Shift+S` for sort descending mode
    - Under the sort modes, the sort by types are as follows:
        - `p` sort by path
        - `d` sort by path date
        - `s` sort by score
        - `m` sort by modification time
        - `f` sort by an interactive fzf search
            - Selected option will be ranked the highest
            - Rest of the documents will be ranked using the ranking from fzf

- Adjusted attributes listed in document list entry (12.0.0-alpha.11)

    - Added path date
    - Replaced last scan time with last modified time

- Reworked the internal architecture of document store snapshots
  storage and management, which makes the overall interaction
  between UI and core code much more robust (12.0.0-alpha.11)

#### Changes since 12.0.0-alpha.13

- Added further search speed optimizations:

    - Added an additional document pruning stage
    - Added a first word candidate pruning stage based on length of the first search word
        - Searching for short words should now feel much more responsive

- Fixed interaction with fzf (which is used in some selection menus) on macOS
  due to different behavior of `Unix.waitpid` on macOS compared to Linux

- Document sorting fine tuning

- Fixed document sorting fallback behavior

    - If there is no search expression but sorting method chosen is to
      sort by score, then sorting method falls back to the option
      specified by `--sort-no-score`

- Fixed macOS detection

- Updated `--open-with` to accept a list of extensions, e.g.
  `--open-with ts,js:detached="... {path}"`

- Added sort by fzf functionality
    - Under sort mode
        - `f` sort by an interactive fzf search
            - Selected option will be ranked the highest
            - Rest of the documents will be ranked using the ranking from fzf

## 12.0.0-alpha.13

- Moved to using a global word table to reduce index DB size and speed up search

    - This is a **breaking** DB change, you will need to remove index DB generated by older versions of Docfd

- Added missing mutexes for caches, should further reduce random crashes

- Added more path date extraction formats
    - `yyyy-mmm-dd`, `yyyy-mmmm-dd`, `dd-mmm-yyyy`, `dd-mmmm-yyyy`
    - `-` is an optional separator that is not a digit and not a letter

## 12.0.0-alpha.12

- Made resetting of search result selection and content view offset less aggressive

    - Some changes in 12.0.0-alpha.11 caused some UI counters to reset more frequently than desired

## 12.0.0-alpha.11

- Removed disabling of drop mode key binding `d` when searching or filtering is ongoing

- Fixed content view pane offset not resetting when mouse is used to scroll search result list

- Fixed content view pane staying small while scrolling up when the search result is close to the bottom of the file

- Swapped all mutexes to Eio mutexes to hopefully remove the very random freezes that occur quite rarely

    - They feel like deadlocks due to mixing Eio mutexes
      (which block fiber) and stdlib mutexes (which block an entire domain)

- Added sorting to document list

    - `s` for sort ascending mode and `Shift+S` for sort descending mode
    - Under the sort modes, the sort by types are as follows:
        - `p` sort by path
        - `d` sort by path date
        - `s` sort by score
        - `m` sort by modification time

- Added `--sort` and `--sort-no-score`

    - Latter is mainly useful for when `--files-without-match` is used

- Added `yyyymmdd` path date extraction

- Added `mod-date` to filter language

- Adjusted attributes listed in document list entry

    - Added path date
    - Replaced last scan time with last modified time

- Reworked `--script` into `--script` and `--start-with-script`

    - `--script` is now only for non-interactive use
    - `--start-with-script` is only for interactive use
    - This mirrors the duals `--filter` vs `--start-with-filter` and `--search` vs `--start-with-search`

- Reworked the internal architecture of document store snapshots storage and management

    - Snapshots are now centrally managed by `Document_store_manager`, along with
      improvements to snapshot handling logic in general

    - This makes the overall interaction between UI and core code
      much more robust, and eliminates random workarounds used to
      deal with UI and data synchronization, which have
      been riddled with random minor bugs

## 12.0.0-alpha.10

- Added basic autocomplete to filter field

- Improved script save autocomplete to insert longest common prefix

- Fixed script save autocomplete so it no longer erases original text when no recommendations are available

## 12.0.0-alpha.9

- Disabled `Tab` handling in edit fields to reduce friction in UX

- Added `nano`-style autocomplete to save commands field with listing of existing scripts

## 12.0.0-alpha.8

- Changed `--commands-from` to `--script`

- Added "save commands as script" and "load script" functionality to streamline reusing of commands

- Improved content view pane scrolling control

    - The internal counter no longer scrolls past the limit

## 12.0.0-alpha.7

- Fixed interactive use of `--commands-from`

- Added `mark listed` and `unmark listed` to template command history file help info

## 12.0.0-alpha.6

- Fixed `not` operator parsing

    - Previously `not ext:txt and not ext:md` would be parsed as `not (ext:txt and not ext:md)`, which is not what is typically expected

    - `not` now binds tightly, so `not ext:txt and not ext:md` is parsed as `(not ext:txt) and (not ext:md)`

## 12.0.0-alpha.5

- Added content view pane scrolling

    - Controlled by `-`/`=`

- Removed extraneous marking functionality

    - `mark unlisted`
    - `unmark unlisted`

- Added `"..."` as a shorthand to `content:"..."` to filter expression

    - For example, `content:keyword AND path-date:>2025-01-01` can be written as `"keyword" AND path-date:>2025-01-01`

    - The quotation is necessary to differentiate between typos
      and actual query, otherwise incorrect input like
      `pathfuzzy:...` would be parsed as content queries instead

## 12.0.0-alpha.4

- Added additional marking functionality

    - `mark listed` (`ml`) marks all currently listed documents
    - `mark unlisted` (`mL`) marks all currently unlisted documents
    - `unmark listed` (`Ml`) unmarks all currently listed documents
    - `unmark unlisted` (`ML`) unmarks all currently unlisted documents

- `unmark all` is moved to key binding `Ma`

- Reworked key binding info grid to pack columns more tightly

- Added WSL clipboard integration

- Minor fix in command history file template help text

- Added `Tab` key to key binding info grid

- Added key binding info about scrolling through document list and search result list

- Minor fix for `{line_num}` placeholder handling in `--open-with`

    - This should always be usable for text files but previously
      Docfd crashes when `{line_num}` is specified in `--open-with` 
      and user opens a text file when no search has been made

    - This is fixed by defaulting `{line_num}` to 1 when
      there are no search results present

- Minor fix for `{page_num}` and `{search_word}` placeholders handling in `--open-with`

    - This should always be usable for PDF files but previously
      Docfd crashes when `{page_num}` or `{search_word}` is specified in `--open-with`
      and user opens a PDF file when no search has been made

    - This is fixed by defaulting `{page_num}` to 1
      and `{search_word}` to empty string when
      there are no search results present

## 12.0.0-alpha.3

- **Users are advised to recreate the index DB**

- Adjusted SQL indices and swapped to specialized SQL queries
  for exact and prefix search terms, e.g. `'hello`, `^worl`

    - Handling of these terms is now 10-20% faster depending on the document

- Fixed command history recomputation not using the reloaded version
  of document store

    - This issue is most noticeable when you've edited a text file after hitting `Enter` in Docfd (after which Docfd reloads the file for you),
      and you hit `h` to modify the command history

    - The replaying of the command history would use the old copy of the file instead of the new edited version of the text file

- Added missing SQL transaction in code path for reloading a single document

    - Previously, reloading a single document was incredibly slow, which was very noticeable if you edited a text file
      after hitting `Enter` in Docfd, unless the text file was very small

- Updated `--paths-from` argument handling

    - Added `--paths-from -` for accepting list of paths from stdin

    - Adjusted to accept comma separated list of paths, e.g. `--paths-from path-list0.txt,path-list1.txt`

- Removed builtin piping to fzf triggered by providing `?` as a file path, e.g. `docfd ?`

    - The `--paths-from -` handling makes this obsolete and a lot less flexible by comparison

- Fixed interaction between search and filter

    - Previously, starting a search would incorrectly cancel an ongoing filtering operation.
      Now only a new filtering operation can cancel an ongoing filtering operation.
      A new search still cancels an ongoing search.

    - Starting a new filtering operation also still cancels any ongoing search. This is fine since the search results
      are refreshed after the filtering has been completed.

        - The refreshing of the search results also means that the following sequences of events are still handled correctly,
          namely they still arrive at the same normal form of the document store:

            - Example 1:

                - (0) Filter `f_exp0` (filtering is canceled by step (2), but the updating of filter expression is never canceled)
                - (1) Search `s_exp0` (search is canceled by step (2), but the updating of search expression is never canceled)
                - (2) Filter `f_exp1` (refreshes search results using `s_exp0`)

            - Example 2:

                - (0) Search `s_exp0` (search is canceled by step (1), but updating of search expression is never canceled)
                - (1) Filter `f_exp0` (this stage is canceled by step (2),
                    either during the filtering or during the
                    refreshing of search results, but the updating
                    of filter expression is never canceled)
                - (2) Filter `f_exp1` (refreshes search results using `s_exp0`)

- Renaming query expression/language to filter expression/language in help text and documentation

- Added a separate loading indicator for filter field

- Fixed concurrency issue where an update of document store may cause the
  filter field and search field in UI to be out of sync with the actual
  filter expression and search expression used by the underlying document store

    - Suppose we have the following sequence of events:

        - (0) Document store `store0` carries filter expression
            `f_exp0` and search expression `s_exp0`, which we write
            as pair `(f_exp0, s_exp0)`
        - (1) User initiates filter/search operation by placing `(f_exp1, s_exp1)` into the input fields.

            We name the document store resulting from this filter/search operation as `store1a`,
            which carries `(f_exp1, s_exp1)` when finalized.
        - (2) While filter/search operation is ongoing,
        user drops a set of documents from the
            current document store. Since `store1a` is not
            finalized yet, the current document store is still `store0`, thus the new document store encoding the result of the drop operation, `store1b`, is computed from `store0` instead of `store1a`.

            In other words, both `store1a` and `store1b` share
            `store0` as their parent.
            Note that `store1b` carries `(f_exp0, s_exp0)` as
            inherited from `store0`,
            since a drop operation does not alter the filter expression or search expression.
        - (3) As a drop operation immediately updates the document store
        and cancels ongoing filter/search operation, step (2) canceled the computation of `store1a`, and instead places `store1b` as the current document store.

    - However, this means the input fields are `(f_exp1, s_exp1)`
      while the current document store `store1b` actually carries
      `(f_exp0, s_exp0)`.

      The fix in this update is then to add an
      extra "sync from input fields" step whenever a document store
      is updated. To illustrate, we continue from the above
      sequence of events, where the updated version of Docfd
      carries out the following step missing from previous
      versions.

        - (4) Update input fields to `(f_exp0, s_exp0)`

    - This addresses the mismatch between the underlying document store and the UI input fields.

    - In practice this is very unlikely to occur with human input, as the modes that update document store
      are disabled if document store manager is carrying out any ongoing filtering or search.

      However, since the UI is async, there will be gaps in timing between UI input/feedback and actual updates of values,
      opening up to TOCTOU problems.
      So there is always a chance that a document store update will be requested before the modes are are disabled.

- Made interrupted filter/search operation to not yield a document store at all instead of yielding an empty document store
  to simplify reasoning about filter/search cancellations and UI fields being in sync

## 12.0.0-alpha.2

- Added `path-date` clause to query expression

    - This allows filtering based on date recognized from document path, for example, `path-date:>=2025-01-01 AND path-date:<2025-02-01`
      would allow `/home/user/meeting-notes-2025-01-10.md` to pass through

    - This gives a very lightweight method of attaching date information to any document

    - See [relevant Wiki page](https://github.com/darrenldl/docfd/wiki/Document-filtering) for details

## 12.0.0-alpha.1

- Added a more powerful filter mode that replaces the filter glob mode and "pipe to fzf" feature

    - Filter query mode uses a proper query language that supports file path globbing and file path fuzzy matching among other features

    - This mode uses key binding `f`

- Removed `q` exit key binding to avoid accidental exiting

## 11.0.1

- Added better search cancellation handling, removing massive lags in some scenarios

## 11.0.0

- Minor fix for search scope narrowing logic:

    - Search scope should also be set to empty if the document is not passing the file filter, not just when the search results are empty

    - The old behavior can be confusing when a document passes an old file filter and thus has search results in memory,
      but fail to pass a new file filter,
      yet appears in later searches when file filter is reset

    - It is simpler to make it so if a document is not listed for
      whatever reason, search scope of that document just becomes
      empty during narrowing

- Added missing commands in the list of possible commands in the command history file template

    - `clear search`

    - `clear filter`

- Minor breaking change, filter regex mode should have been called filter glob mode

    - The key binding `fr` is changed to `fg`

- Changed UI text "File path filter" to "File path glob" to be more descriptive

## 10.2.0

- Added `--open-with` to allow customising the command used to open a file based on file extension

    - Example: `--open-with pdf:detached='okular {path}'`

    - Can be specified multiple times

- Added non-interactive use of `--commands-from`

    - Non-interactive use can be triggered by pairing `--commands-from` with `-l`/`--files-with-match`

    - Useful for advanced document management workflow

- Adjustments to search scope narrowing

    - Added `narrow level: 0` for resetting the search scopes of
      all documents back to full

    - Narrowing now no longer drops unlisted document, so the
      previous set of documents remain accessible for later
      searches after resetting the search scopes

- Reworked search into multi-stage pipeline

    - This improves the search speed by around 30%

    - The core search procedure was reworked into an API that
      generates grouped search jobs which can be easily distributed
      to threads.
      This gives a better workload distribution than the current
      multithreading approach.

## 10.1.3

- Minor fixes

    - "Reload document" now removes the document if the document is no longer accessible

    - Docfd now only checks the existence of directly specified files
      at launch, e.g. `file.txt` in `docfd file.txt`. This means
      "reload all documents" now does not error out due to files becoming
      no longer accessible.

## 10.1.2

- Minor fix for "reload all doucments" when fzf was used to pick documents initially, i.e. `docfd [PATH]... ?`, or any variation where `?` appears anywhere in the path list

    - Under this workflow, later "reload all" should use the same selection
      instead of having the user select again in fzf, which is cumbersome

    - Now Docfd correctly reuses the selection when "reload all" is requested,
      if fzf was used initially to pick documents

    - This does technically mean the functionality is now less flexible,
      since if `docfd ?` alike is used, "reload all" no longer discovers
      new files

    - But the convenience from reusing the selection outweighs the flexibility
      in practically all use cases from author's experience

## 10.1.1

- Minor fix for "filter files via fzf" functionality

    - Previously, if instead of making a selection,
      the user quits fzf (e.g. pressing `Ctrl`+`C`, `Ctrl`+`Q`),
      Docfd also closes with it

    - Now Docfd just discards the interaction and goes back to the main UI

## 10.1.0

- Added back index DB entry pruning

    - Previously missing after swapping to SQLite DB

    - Also renamed `--cache-soft-limit` to `--cache-limit` to
      reflect the new pruning logic

    - Fixes [issue #12](https://github.com/darrenldl/docfd/issues/12)

- Swapped to a better `doc_id` allocation strategy to minimise
  `doc_id` size in DB

- Added blinking when drop mode is disabled but `d` is pressed

## 10.0.0

- Reworked document indexing into a multi-stage pipeline

    - This significantly improves the indexing throughput by allowing
      I/O tasks and computational tasks to run concurrently

    - See [issue #11](https://github.com/darrenldl/docfd/issues/11)

- **Breaking** changes in index DB design - index DBs made by previous version
  of Docfd are not compatible

    - Optimized DB design, on average the index DB is roughly 60% smaller
      compared to Docfd 9.0.0 index DB

    - See [issue #11](https://github.com/darrenldl/docfd/issues/11)

- Added functionality to filter files via fzf

    - This is grouped under filter mode. The previous filter mode
      is renamed to filter regex mode.

    - `f` enters filter mode

        - `f` again activates filter files via fzf functionality

        - `r` activates the filter regex mode, which was previously
          just called the filter mode

- Fixed incomplete search results when file path filter field is updated while
  search is ongoing

    - Updating file path filter always cancels the current search (if there is one)
      and start a new search after the filter is in place

    - Previously, documents with partial search results due to cancellation
      are kept

    - Docfd now discards said documents, forcing the new search to complete the
      search results of these documents

- Removed `--no-cache` flag

    - Previously was unused completey

    - It is difficult to share an in-memory SQlite DB
      between threads, so discarding this flag entirely

    - See [issue #11](https://github.com/darrenldl/docfd/issues/11)

- Swapped to using proper unicode segmentation for tokenisation

    - This should reduce the index size for Western non-English languages
      significantly

- Added screen split ratios for hiding left or right pane completely

- Minor UI/UX fixes

    - Drop mode is now disabled when search is still ongoing or when either search field or filter field has an error

    - Added missing update of search and filter status when undoing/redoing, or when replaying command history

        - This is most noticeable when the status indicates an error, but undoing does not return it to OK

## 9.0.0

- Swapped over to using SQLite for index

    - Memory usage is much slimmer/stays flat

        - For the sample of 1.4GB worth of PDFs used, after indexing, 9.0.0-rc1 uses
          1.9GB of memory, while 9.0.0-rc2 uses 39MB

    - Search is a bit slower

    - Added token length limit of 500 bytes to accommodate word table limit in index DB

        - This means during indexing, if Docfd encounters a very long token,
          e.g. serial number, long hex string, it will be split into chunks of
          up to 500 bytes

- Added `Ctrl`+`C` exit key binding to key binding info on screen

- Updated exit keys

    - To exit Docfd: `q`, `Ctrl`+`Q` or `Ctrl`+`C`

    - To exit other modes: `Esc`

- Now defaults to not scanning hidden files and directories

    - This behaviour is now enabled via the `--hidden` flag

- Changed to allow `--add-exts` and `--single-line-add-exts` to be specified multiple times

- Changed return code to be 1 when there are no results for `--sample` or `--search`

- Added `--no-pdftotext` and `--no-pandoc` flags

    - Docfd also notes the presence of these flags in error message if there
      are PDF files but no pdftotext command is available, and same with files
      relying on pandoc

- Renamed `drop path` command to just `drop`

- Added drop unselected key binding, and the associated command `drop all except`

- Various key binding help info grid adjustments

## 9.0.0-rc1

- Changed default cache size from 100 to 10000

    - Index after compression doesn't take up that much space, and storage is
      generally cheap enough these days

- Adjusted cache eviction behaviour to be less strict on when eviction happens
  and thus less expensive

- Renamed `--cache-size` to `--cache-soft-limit`

- Removed periodic GC compact call to avoid freezes when working with many
  files

- Removed GC compact call during file indexing core loops to reduce overhead

- Added progress bars to initial document processing stage

- Swapped to using C backend for BLAKE2B hashing, this gives >20x speedup depending on CPU

- Swapped from JSON+GZIP to CBOR+GZIP serialization for indices

- Changed help info rotation key from `h` to `?`

- Renamed discard mode to drop mode

- Added command history editing functionality

- Added `--commands-from` command line argument

- Added `--tokens-per-search-scope-level` command line argument

- Concurrency related bug fixes

    - Unlikely to encounter in normal workflows with human input speed

    - https://github.com/darrenldl/docfd/commit/14fcc45b746e6156f29eb989d70700476977a3d7

    - https://github.com/darrenldl/docfd/commit/bfd63d93562f8785ecad8152005aa0f823185699

    - https://github.com/darrenldl/docfd/commit/4e0aa6785ce80630d0cd3cda6e316b7b15a4fb4b

- Replaced print mode with copy mode

- Replaced single file view with key binding to change screen split ratio
  to remove feature discrepencies

- Added narrow mode for search scope narrowing

- Renamed `--index-chunk-token-count` to `--index-chunk-size`

- Renamed `--sample-count-per-doc` to `--samples-per-doc`

## 8.0.3

- Fixed single file view crash

## 8.0.2

- Reworked asynchronous search/filter UI code to avoid noticeable lag due to
  waiting for cancellations that take too long

    - Previously there was still a lockstep somewhere that would prevent UI
      from progressing if previous search was still being canceled

    - The current implementation allows newest requests to override older
      requests entirely, and not wait for cancellations at all

- Adjusted document counter in multi-file view to be visible even when no files
  are listed

## 8.0.1

- Fixed missing file path filter field update when undoing or redoing document
  store updates

- Fixed case insensitive marker handling in glob command line arguments

## 8.0.0

- Removed `--markdown-headings atx` from pandoc commandline
  arguments

- Removed `Alt`+`U` undo key binding

- Removed `Alt`+`E` redo key binding

- Removed `Ctrl`+`Q` exit key binding

- Added documentation for undo, redo key bindings

- Added clear mode and moved clear search field key binding
  under this mode for multi-file view

- Added file path filtering functionality to multi-file view

## 7.1.0

- Added initial macOS support

    - Likely to have bugs, but will need macOS users to report back

- Major speedup from letting `pdftotext` output everything in one pass and split
  on Docfd side instead of asking `pdftotext` to output one page per invocation

    - For very large PDFs the indexing used to take minutes but now only takes
      seconds

    - Page count may be inaccurate if the PDF page contains form feed character
      itself (not fully sure if `pdftotext` filters the form feed character from
      content), but should be rare

- Significant reduction of index file size by adding GZIP
  compression to the index JSON

## 7.0.0

- Added discard mode to multi-file view

- Changed to using thin bars as pane separators, i.e. tmux style

- Added `g` and `G` key bindings for going to top and bottom of document list respectively

- Added `-l`/`--files-with-match` and `--files-without-match` for printing just paths
  in non-interactive mode

- Grouped print key bindings under print mode

- Added more print key bindings

- Grouped reload key bindings under reload mode

- Added fixes to ensure Docfd does not exit until all printing is done

- Slimmed down memory usage by switching to OCaml 5.2 which enables use of `Gc.compact`

    - Still no auto-compaction yet, however, will need to wait for a future
      OCaml release

- Added `h` key binding to rotate key binding info grid

- Added exact, prefix and suffix search syntax from fzf

- Fixed extraneous document path print in non-interactive mode when documents have no search results

- Added "explicit spaces" token `~` to match spaces

## 6.0.1

- Fixed random UI freezes when updating search field

    - This is due to a race condition in the search cancellation mechanism that
      may cause UI fiber to starve and wait forever for a cancellation
      acknowledgement

    - This mechanism was put in place for asynchronous search since 4.0.0

    - As usual with race conditions, this only manifests under some specific
      timing by chance

## 6.0.0

- Fixed help message of `--max-linked-token-search-dist`

- Fixed search result printing where output gets chopped off if terminal width is too small

- Added smart additional line grabbing for search result printing

    - `--search-result-print-snippet-min-size N`
        - If the search result to be printed has fewer than `N` non-space tokens,
          then Docfd tries to add surrounding lines to the snippet
          to give better context.
    - `--search-result-print-snippet-max-add-lines`
        - Controls maximum number of surrounding lines that can be added in each direction.

- Added search result underlining when output is not a terminal,
  e.g. redirected to file, piped to another command

- Changed `--search` to show all search results

- Added `--sample` that uses `--search` previous behavior where (by default)
  only a handful of top search results are picked for each document

- Changed `--search-result-count-per-doc` to `--sample-count-per-doc`

- Added `--color` and `--underline` for controlling behavior of search result
  printing, they can take one of:

    - `never`
    - `always`
    - `auto`

- Removed blinking for `Tab` key presses

## 5.1.0

- Fixed help message of `--max-token-search-dist`

- Adjusted path display in UI to hide current working directory segment when
  applicable

- Added missing blinking for `Tab` key presses

## 5.0.0

- Added file globbing support in the form of `--glob` argument

- Added single line search mode arguments

    - `--single-line-exts`
    - `--single-line-add-exts`
    - `--single-line-glob`
    - `--single-line`

- Fixed crash on empty file

   - This was due to assertion failure of `max_line_num` in
     `Content_and_search_result_render.content_snippet`

- Changed search result printing via `Shift+P` and `p` within TUI to not exit
  after printing, allowing printing of more results

- Added blinking to key binding info grid to give better visual feedback,
  especially for the new behavior of search result printing

- Changed to allow `--paths-from` to be specified multiple times

- Fixed handling of `.htm` files

    - `htm` is not a valid value for pandoc's `--format` argument
    - Now it is rewritten to `html` before being passed to pandoc

- Changed `--max-depth`:

    - Changed default from 10 to 100
    - Changed to accept 0

## 4.0.0

- Made document search asynchronous to search field input, so UI remains
  smooth even if search is slow

- Added status to search bar:

    - `OK` means Docfd is idling
    - `...` means Docfd is searching
    - `ERR` means Docfd failed to parse the search expression

- Added search cancellation. Triggered by editing or clearing search field.

- Added dynamic search distance adjustment based on notion of linked tokens

    - Two tokens are linked if there is no space between them,
      e.g. `-` and `>` are linked in `->`, but not in `- >`

- Replaced `word` with `token` in the following options for consistency

    - `--max-word-search-dist`
    - `--index-chunk-word-count`

- Replaced `word` with `token` in user-facing text

## 3.0.0

- Fixed crash from search result snippet being bigger the content view pane

    - Crash was from `Content_and_search_result_render.color_word_image_grid`

- Added key bindings

    - `p`: exit and print search result to stderr
    - `Shift+P`: exit and print file path to stderr

- Changed `--debug-log -` to use stderr instead of stdout

- Added non-interactive search mode where search results are printed to stdout

    - `--search EXP` invokes non-interactive search mode with search expression `EXP`
    - `--search-result-count-per-document` sets the number of top search results printed per document
    - `--search-result-print-text-width`  sets the text width to use when printing

- Added `--start-with-search` to prefill the search field in interactive mode

- Removed content requirement expression from multi-file view

    - Originally designed for file filtering, but I have almost never used
      it since its addition in 1.0.0

- Added word based line wrapping to following components of document list in multi-file view

    - Document title
    - Document path
    - Document content preview

- Added word breaking in word based line wrapping logic so all of the original characters
  are displayed even when the terminal width is very small or when a word/token is very long

- Added `--paths-from` to specify a file containing list of paths to (also) be scanned

- Fixed search result centering in presence of line wrapping

- Renamed `--max-fuzzy-edit` to `--max-fuzzy-edit-dist` for consistency

- Changed error messages to not be capitalized to follow Rust's and Go's
  guidelines on error messages

- Added fallback rendering text so Docfd does not crash from trying
  to render invalid text.

- Added pandoc integration

- Changed the logic of determining when to use stdin as document source

    - Now if any paths are specified, stdin is ignored
    - This change mostly came from Dune's cram test mechanism
      not providing a tty to stdin, so previously Docfd would keep
      trying to source from stdin even when explicit paths are provided

## 2.2.0

- Restored behaviour of skipping file extension checks for top-level
  user specified files. This behaviour was likely removed during some
  previous overhaul.

    - This means, for instance, `docfd bin/docfd.ml` will now open the file
      just fine without `--add-exts ml`

- Bumped default max word search distance from 20 to 50

- Added consideration for balanced opening closing symbols in search result ranking

    - Namely symbol pairs: `()`, `[]`, `{}`

- Fixed crash from reading from stdin

    - This was caused by calling `Notty_unix.Term.release` after closing the underlying
      file descriptor in stdin input mode

- Added back handling of optional operator `?` in search expression

- Added test corpus to check translation of search expression to search phrases

## 2.1.0

- Added text editor integration for `jed`/`xjed`

    - See [PR #3](https://github.com/darrenldl/docfd/pull/3)
      by [kseistrup](https://github.com/kseistrup)

## 2.0.0

- Added "Last scan" field display to multi-file view and single file view

- Reduced screen flashing by only recreating `Notty_unix.Term.t` when needed

- Added code to recursively mkdir cache directory if needed

- Search procedure parameter tuning

- UI tuning

- Added search expression support

- Adjusted quit key bindings to be: `Esc`, `Ctrl+C`, and `Ctrl+Q`

- Added file selection support via `fzf`

## 1.9.0

- Added PDF viewer integration for:

    - okular
    - evince
    - xreader
    - atril
    - mupdf

- Fixed change in terminal behavior after invoking text editor
  by recreating `Notty_unix.Term.t`

- Fixed file auto-reloading to apply to all file types instead of
  just text files

## 1.8.0

- Swapped to using Nottui at [a337a77](https://github.com/let-def/lwd/commit/a337a778001e6c1dbaed7e758c9e05f300abd388)
  which fixes event handling, and pasting into edit field works correctly as a result

- Caching is now disabled if number of documents exceeds cache size

- Moved index cache to `XDG_CACHE_HOME/docfd`, which overall
  defaults to `$HOME/.cache/docfd`

- Added cache related arguments

    - `--cache-dir`
    - `--cache-size`
    - `--no-cache`

- Fixed search result centering in content view pane

- Changed `--debug` to `--debug-log` to support outputting debug log to a file

- Fixed file opening failure due to exhausting file descriptors

    - This was caused by not bounding the number of concurrent fibers when loading files
      via `Document.of_path` in `Eio.Fiber.List.filter_map`

- Added `--index-only` flag

- Fixed document rescanning in multi-file view

## 1.7.3

- Fixed crash from using mouse scrolling in multi-file view

    - The mouse handler did not reset the search result selected
      when selecting a different document
    - This leads to out of bound access if the newly selected document
      does not have enough search results

## 1.7.2

- Fixed content pane sometimes not showing all the lines
  depending on terminal size and width of lines

- Made chunk size dynamic for parallel search

## 1.7.1

- Parallelization fine-tuning

## 1.7.0

- Added back parallel search

- General optimizations

- Added index file rotation

## 1.6.3

- Further underestimate space available for the purpose of line wrapping

## 1.6.2

- Fixed line wrapping

## 1.6.1

- Fixed line wrapping

## 1.6.0

- Docfd now saves stdin into a tmp file before processing
  to allow opening in text editor

- Added `--add-exts` argument for additional file extensions

- Added real-time response to terminal size changes

## 1.5.3

- Updated key binding info pane of multi-file view

## 1.5.2

- Added line number into search result ranking consideration

## 1.5.1

- Tuned search procedure and search result ranking

    - Made substring bidirectional matching differently weighted based
      on direction
    - Made reverse substring match require at least 3 characters
    - Case-sensitive bonus only applies if search phrase
      is not all ascii lowercase

## 1.5.0

- Made substring matching bidirectional

- Tuned search result ranking

## 1.4.0

- Moved reading of environment variables `VISUAL` and `EDITOR` to program start

- Performance tuning

    - Increased cache size for search phrase automata

## 1.3.4

- Added dispatching of search to task pool at file granularity

## 1.3.3

- Performance tuning

    - Switched back to using the old default max word search distance of 20
    - Reduced default max fuzzy edit distance from 3 to 2 to prevent massive
      slowdown on long words

## 1.3.2

- Performance tuning

    - Added caching to search phrase automata construction
    - Removed dispatching of search to task pool
    - Adjusted search result limits

## 1.3.1

- Added more commandline argument error checking

- Adjusted help messages

- Adjusted max word search range calculation

- Renamed `max-word-search-range` to `max-word-search-dist`

## 1.3.0

- Index data structure optimizations

- Search procedure optimizations

## 1.2.2

- Fixed editor recognition for kakoune

## 1.2.1

- Fixed search results when multiple words are involved

## 1.2.0

- Removed UI components for search cancellation

- Added real time refresh of search

- Added code to open selected text file to selected search result for:

    - nano
    - neovim/vim/vi
    - helix
    - kakoune
    - emacs
    - micro

- Added "rescan for documents" to multi-file view

## 1.1.1

- Fixed releasing Notty terminal too early

## 1.1.0

- Added index saving and loading

- Added search cancellation

## 1.0.2

- Fixed file tree scan

## 1.0.1

- Minor UI tweaks

## 1.0.0

- Added expression language for file filtering in multi-file view

- Adjusted default file tree depth

- Added `--exts` argument for configuring file extensions recognized

- Fixed parameters passing from binary to library

## 0.9.0

- Added PDF search support via `pdftotext`

- Added UTF-8 support

## 0.8.6

- Minor wording fix

## 0.8.5

- Added check to skip re-searching if search phrase is equivalent to the previous one

## 0.8.4

- Index data structure optimization

- Code cleanup

## 0.8.3

- Optimized multi-file view reload so it does not redo the search over all documents

- Implemented a proper document store

## 0.8.2

- Fixed single file view document reloading not refreshing search results

## 0.8.1

- Replaced shared data structures with multicore safe versions

- Fixed work partitioning for parallel indexing

## 0.8.0

- Added multicore support for indexing and searching

## 0.7.4

- Fixed crashing and incorrect rendering in some cases of files with blank lines

    - This is due to `Index.line_count` being incorrectly calculated

- Added auto refresh on change of file

    - Change detection is based on file modification time

- Added reload file via `r` key

## 0.7.3

- Bumped the default word search range from 15 to 40

    - Since spaces are also counted as words in the index,
      15 doesn't actually give a lot of range

- Added minor optimization to search

## 0.7.2

- Code refactoring

## 0.7.1

- Delayed `Nottui_unix` term creation so pre TUI
  printing like `--version` would work

- Added back mouse scrolling support

- Added Page Up and Page Down keys support

## 0.7.0

- Fixed indexing bug

- Added UI mode switch

- Adjusted status bar to show current file name in single file mode

- Adjusted content view to track search result

- Added content view to single file mode

## 0.6.3

- Adjusted status bar to not display index of document selected
  when in single document mode

- Edited debug message a bit

## 0.6.2

- Fixed typo in error message

## 0.6.1

- Added check of whether provided files exist

## 0.6.0

- Upgraded status bar and help text/key binding info

## 0.5.9

- Changed help text to status bar + help text

## 0.5.8

- Fixed debug print of file paths

- Tuned UI text slightly

## 0.5.7

- Changed word db to do global word recording to further reduce memory footprint

## 0.5.6

- Optimized overall memory footprint

    - Content index memory usage

    - Switched to using content index to render content
      lines instead of storing file lines again after indexing

## 0.5.5

- Fixed weighing of fuzzy matches

- Fixed bug in scoring of substring matches

## 0.5.4

- Fixed handling of search phrase with uppercase characters

- Prioritized search results that match the case

## 0.5.3

- Cleaned up code

## 0.5.2

- Cleaned up code

## 0.5.1

- Cleaned up code and debug info print a bit

## 0.5.0

- Removed tags handling

- Added stdin piping support

## 0.4.1

- Tuning content search result scoring

## 0.4.0

- Improved content search result scoring

- Added limit on content search results to consider to avoid
  slowdown

- General optimizations

## 0.3.3

- Fixed crash due to not resetting content search result selection
  when changing document selection

## 0.3.2

- Fixed internal line numbering, but displayed line numbering
  still begins at 1

## 0.3.1

- Adjusted line number to begin at 1

## 0.3.0

- Adjusted colouring

## 0.2.9

- Fixed word position tracking in content indexing

## 0.2.8

- Fixed content indexing

## 0.2.7

- Changed to vim style highlighting for content search results

- Color adjustments in general

## 0.2.6

- Added single file UI mode

- Added support for specifying multiple files in command line

## 0.2.5

- Added limit to word search range of each step in content search

    - This speeds up usual search while giving good enough results,
      and prevents search from becoming very slow in large documents

## 0.2.4

- Adjusted displayed document list size

- Updated style of document list view

## 0.2.3

- Added sanitization to file view text

- Docfd now accepts file being passed as argument

## 0.2.2

- Fixed tokenization of user provided content search input

- Fixed content indexing to not include spaces

## 0.2.1

- Optimized file discovery procedure

- Added `--max-depth` option to limit scanning depth

- Added content search results view

- Adjusted tokenization procedure

## 0.2.0

- Switched to interactive TUI

- Renamed to Docfd

## 0.1.6

- Optimized parsing code slightly

## 0.1.5

- Adjusted parsing code slightly

## 0.1.4

- Adjusted `--tags` and `--ltags` output slightly

## 0.1.3

- Upgraded `--tags` and `--ltags` output to be more human readable
  when output is terminal

    - Changed behavior to output each tag in individual line when output
      is not terminal

## 0.1.2

- Fixed output text when output is not terminal

## 0.1.1

- Fixed checking of whether output is terminal

## 0.1.0

- Flipped output positions of file path and tags

## 0.0.9

- Notefd now adds color to title and matching tags if output is terminal

- Improved fuzzy search index building

## 0.0.8

- Code cleanup

## 0.0.7

- Made file recognition more lenient

- Added support for alternative tag section syntax

    - `| ... |`
    - `@ ... @`

## 0.0.6

- Fixed Notefd to only handle consecutive tag sections

## 0.0.5

- Added `--tags` and `--ltags` flags

- Adjusted parsing to allow multiple tag sections

## 0.0.4

- Fixed tag extraction

## 0.0.3

- Made header extraction more robust to files with very long lines

## 0.0.2

- Added `-s` for case-insensitive substring tag match

- Renamed `-p` to `-e` for exact tag match

## 0.0.1

- Base version


================================================
FILE: LICENSE
================================================
MIT License

Copyright (c) 2022 Di Long Li

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


================================================
FILE: Makefile
================================================
SRCFILES = lib/*.ml lib/*.mli bin/*.ml bin/*.mli profiling/*.ml tests/*.ml

OCPINDENT = ocp-indent \
	--inplace \
	$(SRCFILES)

.PHONY: all
all :
	python3 update-version-string.py
	dune build @all

.PHONY: podman-build
podman-build:
	podman build --format docker -t localhost/docfd -f containers/Containerfile.docfd .

.PHONY: podman-build-demo-vhs
podman-build-demo-vhs:
	podman build --format docker -t localhost/docfd-demo-vhs -f containers/Containerfile.demo-vhs .

.PHONY: lock
lock:
	opam lock .

.PHONY: release-build
release-build :
	python3 update-version-string.py
	dune build --release bin/docfd.exe
	mkdir -p release
	cp -f _build/default/bin/docfd.exe release/docfd
	chmod 755 release/docfd

.PHONY: release-static-build
release-static-build :
	python3 update-version-string.py
	OCAMLPARAM='_,ccopt=-static' dune build --release bin/docfd.exe
	mkdir -p release
	cp -f _build/default/bin/docfd.exe release/docfd
	chmod 755 release/docfd

.PHONY: release-static-build-arm
release-static-build-arm :
	python3 update-version-string.py
	OCAMLPARAM='_,ccopt=-static,fPIC' dune build --release bin/docfd.exe
	mkdir -p release
	cp -f _build/default/bin/docfd.exe release/docfd
	chmod 755 release/docfd

.PHONY: tests
tests :
	# Cleaning and rebuilding here to make sure cram tests actually use a recent binary,
	# since Dune (as of 3.14.0) doesn't trigger rebuild of binary when
	# invoking cram tests, even if the source code has changed.
	make clean
	make
	OCAMLRUNPARAM=b dune exec tests/main.exe --no-buffer --force
	dune build @file-collection-tests
	dune build @line-wrapping-tests
	dune build @misc-behavior-tests
	dune build @printing-tests
	dune build @match-type-tests
	dune build @open-with-tests
	dune build @non-interactive-mode-return-code-tests
	dune build @search-scope-narrowing-tests
	dune build @script-tests

.PHONY: demo-vhs
demo-vhs :
	for file in demo-vhs-tapes/*; do ./demo-vhs.sh $$file; done
	rm dummy.gif

.PHONY: profile
profile :
	OCAMLPARAM='_,ccopt=-static' dune build --release profiling/main.exe

.PHONY: format
format :
	$(OCPINDENT)

.PHONY : clean
clean:
	dune clean


================================================
FILE: README.md
================================================
# Docfd

[Online Demo](https://demo.docfd.sh)

TUI multiline fuzzy document finder

Think interactive grep for text files, PDFs, DOCXs, etc,
but word/token based instead of regex and line based,
so you can search across lines easily.

Docfd aims to provide good UX via integration with common text editors
and PDF viewers,
so you can jump directly to a search result with a single key press.

---

Interactive use

![](demo-vhs-gifs/repo.gif)

Non-interactive use

![](demo-vhs-gifs/repo-non-interactive.gif)

## Features

- Multithreaded indexing and searching

- Multiline fuzzy search of multiple files

- Content view pane that shows the snippet surrounding the search result selected

- Text editor and PDF viewer integration

- Editable command history - rewrite/plan your actions in text editor

- Search scope narrowing - limit scope of next search based on current search results

- Clipboard integration

## Why Docfd might be for you

<details>
<summary>
You want a standalone, offline TUI search tool that
allows you to immediately start searching without any complicated setup.
</summary>

Docfd only starts processing the current directory or
specified directories/files upon start.
Hashing is used to pick out files that have not been indexed yet.

There is no need to wait for a background indexer to refresh
before you get up-to-date results.
</details>

<details>
<summary>
You don't want to move everything into a central storage, and want to just keep your current folder structure
</summary>

There are no strings attached with using Docfd.
Docfd does not require you to import your files into
any special storage system, so you can continue mix and match
tools to best handle your files.
</details>

<details>
<summary>
You want to script or record your search
</summary>

Docfd comes with a simple scripting language,
which is already used to capture your actions in the TUI.

Finally found what you need after many steps?
Save the session as a script with `Ctrl`+`S`!
Then open it next time with `Ctrl`+`O`.
</details>

## Why Docfd might not be for you

<details>
<summary>
Docfd is not all-encompassing
</summary>

Docfd does not try to be a full blown document management system such as Paperless-ngx.
While there may be significant overlaps in terms of the search functionality, Docfd will fall short for almost any other kind of features, such as storage management, tagging, web interface, OCR, email ingestion.

</details>

<details>
<summary>
Docfd is not a "proper" search engine
</summary>

Docfd is a search engine in the sense that it uses the same
fundamental principles, i.e. inverted indices, but it lacks features
that you would expect from a "proper" search engine solution, e.g.
[Apache Lucene](https://lucene.apache.org/),
[Tantivy](https://github.com/quickwit-oss/tantivy),
[Lnx](https://github.com/lnx-search/lnx).

Here are some of the fundamental features which I think are crucial to a proper search engine, but Docfd lacks:
- You cannot customize what are indexed by Docfd
- You cannot add a new type of ranking
- Docfd lacks support for languages other than English
- Docfd does not scale very well to very large quantity of documents
    - Search should still be serviceable when you reach beyond, say, 10k documents, but it will be noticeably more sluggish

Some of these shortcomings are fundamental to the goals of Docfd. For instance,
Docfd is primarily a standalone desktop TUI tool with quick startup and should not impact other desktop applications.
As such, some performance related engineering choices typical for a proper search engine
are difficult to accommodate as they require longer startup and significantly more memory usage.

Other shortcomings are due to limited time and limited return on efforts - if one is to push Docfd so much to reach the feature parity
and performance of a proper search engine, then one might as well just use an existing search engine to begin with.
</details>

<details>
<summary>
If your notes are consistently very short, and you only want to do simple searches, then there are better options
</summary>

If you follow note taking methodologies such as Zettelkasten, where each note consists of very few lines, then using a combination of grep and file preview tool can yield a much faster search experience by skipping out on indexing and consideration of word proximity.
</details>

<details>
<summary>
Docfd does not "stream" its search results
</summary>

One user feedback received was that searching felt slow when Docfd is still conducting the search as UI is not updated result by result. By comparison, fzf felt faster as results start to immediately pop into the screen.

It is fundamentally more difficult to implement this streaming behavior nicely in Docfd, as Docfd operates with snapshots in mind (e.g. allowing you to undo/redo commands), while fzf does not. More specifically, it is much easier to wait for all search results to be ready, and finalize as a snapshot before presenting onto Docfd UI.

So while possible to implement in Docfd, it is unclear if the effort is worthwhile with the additional system complexity in mind.
</details>

## Installation

Statically linked binaries for Linux and macOS are available via
[GitHub releases](https://github.com/darrenldl/docfd/releases).

Docfd is also packaged on the following platforms for Linux:

- [opam](https://ocaml.org/p/docfd/latest)
- [AUR](https://aur.archlinux.org/packages/docfd-bin) (as `docfd-bin`)
    - First packaged by [@kseistrup](https://github.com/kseistrup), now maintained by Dominiquini
- Nix (as `docfd`)
    - Packaged by [@chewblacka](https://github.com/chewblacka)

The only way to use Docfd on Windows right now is via WSL.

**Notes for packagers**: Outside of the OCaml toolchain for building (if you are
packaging from source), Docfd also requires the following
external tools at run time for full functionality:

- `pdftotext` from `poppler-utils` for PDF support
- `pandoc` for support of `.epub`, `.odt`, `.docx`, `.fb2`, `.ipynb`, `.html`, and `.htm` files
- `wl-clibpard` for clipboard support on Wayland
- `xclip` for clipboard support on X11

## Basic usage

The typical usage of Docfd is to either `cd` into the directory of interest
and launch `docfd` directly, or specify the paths as arguments:

```
docfd [PATH]...
```

The list of paths can contain directories.
Each directory in the list is scanned recursively for
files with the following extensions by default:

- For multiline search mode:
    - `.txt`,
      `.md`,
      `.pdf`,
      `.epub`,
      `.odt`,
      `.docx`,
      `.fb2`,
      `.ipynb`,
      `.html`,
      `.htm`
- For single line search mode:
    - `.log`,
      `.csv`,
      `.tsv`

You can change the file extensions to use via
`--exts` and `--single-line-exts`,
or add onto the list of extensions via
`--add-exts` and `--single-line-add-exts`.

If the list `PATH`s is empty,
then Docfd defaults to scanning the
current directory `.`
unless any of the following is used:
`--paths-from`, `--glob`, `--single-line-glob`.

## Documentation

See [GitHub Wiki](https://github.com/darrenldl/docfd/wiki) for
more examples/cookbook, and technical details.

## Changelog

[CHANGELOG](CHANGELOG.md)

## Limitations

- Docfd generally expects one intance per index DB

    - You should pick a different cache directory (which houses
      the index DB) via `--cache-dir`
      if you need multiple instances

    - There are safe guards to avoid corruptions even if you do run
      multiple instances of Docfd, but note that the instances of Docfd
      may exit unexpectedly

    - That being said, running multiple instances of Docfd which are only reading
      the index DB and not updating it should be fine

- File auto-reloading is not supported for PDF files,
  as PDF viewers are invoked in the background via shell.
  It is possible to support this properly
  in the ways listed below, but requires
  a lot of engineering for potentially very little gain:

    - Docfd waits for PDF viewer to terminate fully
      before resuming, but this
      prohibits viewing multiple search results
      simultaneously in different PDF viewer instances.

    - Docfd manages the launched PDF viewers completely,
      but these viewers are closed when Docfd terminates.

    - Docfd invokes the PDF viewers via shell
      so they stay open when Docfd terminates.
      Docfd instead periodically checks if they are still running
      via the PDF viewers' process IDs,
      but this requires handling forks.

    - Outside of tracking whether the PDF viewer instances
      interacting with the files are still running,
      Docfd also needs to set up file update handling
      either via `inotify` or via checking
      file modification times periodically.

## Acknowledgement

- Big thanks to [@lunacookies](https://github.com/lunacookies) and
  [@jthvai](https://github.com/jthvai) for the many UI/UX discussions and
  suggestions
- Demo gifs and some screenshots are made using [vhs](https://github.com/charmbracelet/vhs).
- [ripgrep-all](https://github.com/phiresky/ripgrep-all) was used as reference
  for text extraction software choices
- [Marc Coquand](https://mccd.space) (author of
  [Stitch](https://git.mccd.space/pub/stitch/)) for discussions and inspiration
  of results narrowing functionality
- Part of the search syntax was copied from [fzf](https://github.com/junegunn/fzf)
- Command history editing workflow was inspired by Git interactive rebase workflow, e.g. `git rebase -i`
- [PDF corpora](https://github.com/pdf-association/pdf-corpora) from PDF association was used to stress test performance


================================================
FILE: bin/BLAKE2B.ml
================================================
module B = Digestif.Make_BLAKE2B (struct
    let digest_size = 64
  end)

let hash_of_file ~env ~path =
  let fs = Eio.Stdenv.fs env in
  let ctx = ref B.empty in
  try
    Eio.Path.(with_open_in (fs / path))
      (fun flow ->
         match
           Eio.Buf_read.parse ~max_size:Params.hash_chunk_size
             (fun buf ->
                try
                  while true do
                    ctx := B.feed_string !ctx (Eio.Buf_read.take Params.hash_chunk_size buf)
                  done
                with
                | End_of_file ->
                  ctx := B.feed_string !ctx (Eio.Buf_read.take_all buf)
             )
             flow
         with
         | Ok () -> Ok (!ctx |> B.get |> B.to_hex)
         | Error (`Msg msg) -> Error msg
      )
  with
  | _ -> Error (Printf.sprintf "failed to hash file: %s" (Filename.quote path))


================================================
FILE: bin/UI.ml
================================================
open Docfd_lib
open Lwd_infix

module Vars = struct
  let script_name_field = Lwd.var UI_base.empty_text_field

  let script_name_field_focus_handle = Nottui.Focus.make ()

  let path_fuzzy_rank_field = Lwd.var UI_base.empty_text_field

  let path_fuzzy_rank_field_focus_handle = Nottui.Focus.make ()

  let script_files : string Dynarray.t Lwd.var = Lwd.var (Dynarray.create ())

  let usable_script_files : (string Dynarray.t * Int_set.t Dynarray.t option) Lwd.t =
    let$* arr = Lwd.get script_files in
    let$* input_mode = Lwd.get UI_base.Vars.input_mode in
    let$ script_name_specified, _ = Lwd.get script_name_field in
    match input_mode with
    | Save_script -> (
        (Dynarray.filter
           (CCString.starts_with ~prefix:script_name_specified)
           arr,
         None)
      )
    | Scripts | Delete_script_confirm (_, _) -> (
        match Search_exp.parse script_name_specified with
        | None -> (
            (Dynarray.filter
               (CCString.starts_with ~prefix:script_name_specified)
               arr,
             None)
          )
        | Some exp -> (
            if Search_exp.is_empty exp then (
              (arr, None)
            ) else (
              let ranking =
                Misc_utils.fuzzy_rank_assoc
                  (Stop_signal.make ())
                  ~get_key:Filename.chop_extension
                  exp
                  (Dynarray.to_seq arr)
              in
              Dynarray.to_seq ranking
              |> Seq.map (fun (path, search_result) ->
                  (path, Misc_utils.highlights_of_search_result search_result)
                )
              |> Seq.split
              |> (fun (s0, s1) ->
                  (Dynarray.of_seq s0, Some (Dynarray.of_seq s1)))
            )
          )
      )
    | _ -> (
        (Dynarray.create (), None)
      )
end

let refresh_script_files () =
  Lwd.set UI_base.Vars.index_of_script_selected 0;
  File_utils.list_files_recursive_filter_by_exts
    ~max_depth:1
    ~report_progress:(fun () -> ())
    ~exts:[ Params.docfd_script_ext ]
    (Seq.return (Params.script_dir ()))
  |> String_set.to_seq
  |> Seq.map Filename.basename
  |> Dynarray.of_seq
  |> Lwd.set Vars.script_files

let reload_document (doc : Document.t) =
  let pool = UI_base.task_pool () in
  let path = Document.path doc in
  let doc =
    match
      Document.of_path
        ~env:(UI_base.eio_env ())
        pool
        ~already_in_transaction:false
        (Document.search_mode doc)
        path
    with
    | Ok doc -> Some doc
    | Error _ -> (
        None
      )
  in
  let session_state =
    Session_manager.lock_with_view (fun view ->
        view.init_state
      )
    |> (fun state ->
        match doc with
        | Some doc -> (
            Session.State.add_document pool doc state
          )
        | None -> (
            Session.State.drop (`Path path) state
          )
      )
  in
  Session_manager.update_starting_state session_state

let reload_document_selected
    ~(search_result_groups : Session.search_result_group array)
  : unit =
  if Array.length search_result_groups > 0 then (
    let index = Lwd.peek UI_base.Vars.index_of_document_selected in
    let doc, _search_results = search_result_groups.(index) in
    reload_document doc;
  )

let toggle_mark ~path =
  Session_manager.update_from_cur_snapshot
    (fun cur_snapshot ->
       let state = Session.Snapshot.state cur_snapshot in
       let new_command =
         if
           String_set.mem
             path
             (Session.State.marked_document_paths state)
         then (
           `Unmark path
         ) else (
           `Mark path
         )
       in
       state
       |> Session.run_command
         (UI_base.task_pool ())
         new_command
       |> Option.get
       |> (fun (new_command, state) ->
           Session.Snapshot.make
             ~last_command:(Some new_command)
             state)
    )

let drop ~document_count (choice : [`Path of string | `All_except of string | `Marked | `Unmarked | `Listed | `Unlisted]) =
  let new_command =
    match choice with
    | `Path path -> (
        let n = Lwd.peek UI_base.Vars.index_of_document_selected in
        UI_base.set_document_selected ~choice_count:(document_count - 1) n;
        `Drop path
      )
    | `All_except path -> (
        UI_base.set_document_selected ~choice_count:1 0;
        `Drop_all_except path
      )
    | `Marked -> (
        UI_base.reset_document_selected ();
        `Drop_marked
      )
    | `Unmarked -> (
        UI_base.reset_document_selected ();
        `Drop_unmarked
      )
    | `Listed -> (
        UI_base.reset_document_selected ();
        `Drop_listed
      )
    | `Unlisted -> (
        UI_base.reset_document_selected ();
        `Drop_unlisted
      )
  in
  Session_manager.update_from_cur_snapshot (fun cur_snapshot ->
      Session.Snapshot.state cur_snapshot
      |> Session.run_command
        (UI_base.task_pool ())
        new_command
      |> Option.get
      |> (fun (new_command, state) ->
          Session.Snapshot.make
            ~last_command:(Some new_command)
            state)
    )

let mark (choice : [`Path of string | `Listed]) =
  let new_command =
    match choice with
    | `Path path -> `Mark path
    | `Listed -> `Mark_listed
  in
  Session_manager.update_from_cur_snapshot (fun cur_snapshot ->
      Session.Snapshot.state cur_snapshot
      |> Session.run_command
        (UI_base.task_pool ())
        new_command
      |> Option.get
      |> (fun (new_command, state) ->
          Session.Snapshot.make
            ~last_command:(Some new_command)
            state)
    )

let unmark (choice : [`Path of string | `Listed | `All]) =
  let new_command =
    match choice with
    | `Path path -> `Unmark path
    | `Listed -> `Unmark_listed
    | `All -> `Unmark_all
  in
  Session_manager.update_from_cur_snapshot (fun cur_snapshot ->
      Session.Snapshot.state cur_snapshot
      |> Session.run_command
        (UI_base.task_pool ())
        new_command
      |> Option.get
      |> (fun (new_command, state) ->
          Session.Snapshot.make
            ~last_command:(Some new_command)
            state)
    )

let sort (sort_by : Command.Sort_by.t) =
  UI_base.reset_document_selected ();
  let new_command = `Sort (sort_by, Command.Sort_by.default_no_score) in
  Session_manager.update_from_cur_snapshot (fun cur_snapshot ->
      Session.Snapshot.state cur_snapshot
      |> Session.run_command
        (UI_base.task_pool ())
        new_command
      |> Option.get
      |> (fun (new_command, state) ->
          Session.Snapshot.make
            ~last_command:(Some new_command)
            state)
    )

let narrow_search_scope_to_level ~level =
  Session_manager.update_from_cur_snapshot (fun cur_snapshot ->
      Session.Snapshot.make
        ~last_command:(Some (`Narrow_level level))
        (Session.State.narrow_search_scope_to_level
           ~level
           (Session.Snapshot.state cur_snapshot))
    )

let update_filter ~commit () =
  let s = fst @@ Lwd.peek UI_base.Vars.filter_field in
  Session_manager.submit_filter_req ~commit s

let update_search ~commit () =
  let s = fst @@ Lwd.peek UI_base.Vars.search_field in
  Session_manager.submit_search_req ~commit s

let update_path_fuzzy_rank ~commit () =
  let s = fst @@ Lwd.peek Vars.path_fuzzy_rank_field in
  Session_manager.submit_path_fuzzy_rank_req ~commit s

let compute_save_script_path base_name =
  let dir = Params.script_dir () in
  File_utils.mkdir_recursive dir;
  Filename.concat
    dir
    (Fmt.str "%s%s" base_name Params.docfd_script_ext)

let save_script ~path =
  Session_manager.stop_filter_and_search_and_restore_input_fields ();
  let lines =
    Session_manager.lock_with_view (fun view ->
        view.snapshots
        |> Dynarray.to_seq
        |> Seq.filter_map  (fun (snapshot : Session.Snapshot.t) ->
            Option.map
              Command.to_string
              (Session.Snapshot.last_command snapshot)
          )
        |> List.of_seq
      )
  in
  try
    CCIO.with_out path (fun oc ->
        CCIO.write_lines_l oc lines;
      )
  with
  | Sys_error _ -> (
      Misc_utils.exit_with_error_msg
        (Fmt.str "failed to write script %s" path)
    )

module Top_pane = struct
  module Document_list = struct
    let render_document_entry
        ~input_mode
        ~width
        ~documents_marked
        ~(search_result_group : Session.search_result_group)
        ~path_highlights
        ~selected
      : Notty.image =
      let open Notty in
      let open Notty.Infix in
      let (doc, search_results) = search_result_group in
      let search_result_score_image =
        if Option.is_some !Params.debug_output then (
          if Array.length search_results = 0 then
            I.empty
          else (
            let x = search_results.(0) in
            I.strf "(Best search result score: %f)" (Search_result.score x)
          )
        ) else (
          I.empty
        )
      in
      let sub_item_base_left_padding = I.string A.empty "    " in
      let sub_item_width = width - I.width sub_item_base_left_padding - 2 in
      let preview_image =
        let preview_left_padding_per_line =
          I.string A.(bg lightgreen) " "
          <|>
          I.string A.empty " "
        in
        let preview_line_images =
          let line_count =
            min Params.preview_line_count (Index.global_line_count ~doc_id:(Document.doc_id doc))
          in
          OSeq.(0 --^ line_count)
          |> Seq.map (fun global_line_num ->
              Index.words_of_global_line_num ~doc_id:(Document.doc_id doc) global_line_num
              |> Dynarray.to_list
              |> Content_and_search_result_rendering.Text_block_rendering.of_words ~width:sub_item_width
            )
          |> Seq.map (fun img ->
              let left_padding =
                OSeq.(0 --^ I.height img)
                |> Seq.map (fun _ -> preview_left_padding_per_line)
                |> List.of_seq
                |> I.vcat
              in
              left_padding <|> img
            )
          |> List.of_seq
        in
        I.vcat preview_line_images
      in
      let path_highlights =
        match input_mode with
        | UI_base.Path_fuzzy_rank -> (
            String_map.find_opt (Document.path doc) path_highlights
          )
        | _ -> (
            None
          )
      in
      let path_image =
        Document.path doc
        |> File_utils.remove_cwd_from_path
        |> Tokenization.tokenize ~drop_spaces:false
        |> List.of_seq
        |> Content_and_search_result_rendering.Text_block_rendering.of_words
          ~width:sub_item_width
          ?highlights:path_highlights
      in
      let path_image_with_prefix =
        (I.string A.(fg lightgreen) "@ ")
        <|>
        path_image
      in
      let path_date_image =
        (match Document.path_date doc with
         | None -> I.void 0 0
         | Some date -> (
             I.string A.(fg lightgreen) "  ⤷ "
             <|>
             I.string A.empty
               (Timedesc.Date.to_rfc3339 date)
           )
        )
      in
      let last_modified_image =
        I.string A.(fg lightgreen) "Last modified: "
        <|>
        I.string A.empty
          (Timedesc.to_string ~format:Params.last_modified_format_string (Document.mod_time doc))
      in
      let marked = String_set.mem (Document.path doc) documents_marked in
      let title =
        let attr =
          if selected then (
            A.(fg lightblue ++ st bold)
          ) else (
            A.(fg lightblue)
          )
        in
        match Document.title doc with
        | None ->
          I.void 0 1
        | Some title -> (
            title
            |> Tokenization.tokenize ~drop_spaces:false
            |> List.of_seq
            |> Content_and_search_result_rendering.Text_block_rendering.of_words ~attr ~width
          )
      in
      match input_mode with
      | UI_base.Path_fuzzy_rank -> (
          path_image
        )
      | _ -> (
          (
            (if marked then I.strf "> " else I.void 0 1)
            <|>
            title
          )
          <->
          (
            sub_item_base_left_padding
            <|>
            I.vcat
              [ search_result_score_image;
                path_image_with_prefix;
                path_date_image;
                preview_image;
                last_modified_image;
              ]
          )
        )

    let main
        ~width
        ~height
        ~documents_marked
        ~(search_result_groups : Session.search_result_group array)
        ~(document_selected : int)
      : Nottui.ui Lwd.t =
      let document_count = Array.length search_result_groups in
      let$* input_mode = Lwd.get UI_base.Vars.input_mode in
      let$* (_cur_ver, snapshot) = Session_manager.cur_snapshot in
      let state = Session.Snapshot.state snapshot in
      let render_pane () =
        let rec aux index height_filled acc =
          if index < document_count
          && height_filled < height
          then (
            let selected = Int.equal document_selected index in
            let img =
              render_document_entry
                ~input_mode
                ~width
                ~documents_marked
                ~search_result_group:search_result_groups.(index)
                ~path_highlights:(Session.State.path_highlights state)
                ~selected
            in
            aux (index + 1) (height_filled + Notty.I.height img) (img :: acc)
          ) else (
            List.rev acc
            |> List.map Nottui.Ui.atom
            |> Nottui.Ui.vcat
          )
        in
        if document_count = 0 then (
          Nottui.Ui.empty
        ) else (
          aux document_selected 0 []
        )
      in
      let$ background = UI_base.full_term_sized_background in
      Nottui.Ui.join_z background (render_pane ())
      |> Nottui.Ui.mouse_area
        (UI_base.mouse_handler
           ~f:(fun direction ->
               let offset =
                 match direction with
                 | `Up -> -1
                 | `Down -> 1
               in
               let document_current_choice =
                 Lwd.peek UI_base.Vars.index_of_document_selected
               in
               UI_base.set_document_selected
                 ~choice_count:document_count
                 (document_current_choice + offset);
             )
        )
  end

  module Right_pane = struct
    module Search_result_list = struct
      let main
          ~height
          ~width
          ~(search_result_group : Session.search_result_group)
          ~(index_of_search_result_selected : int Lwd.var)
        : Nottui.ui Lwd.t =
        let (document, search_results) = search_result_group in
        let search_result_selected = Lwd.peek index_of_search_result_selected in
        let result_count = Array.length search_results in
        if result_count = 0 then (
          Lwd.return Nottui.Ui.empty
        ) else (
          let images =
            Misc_utils.array_sub_seq
              ~start:search_result_selected
              ~end_exc:(min result_count (search_result_selected + height))
              search_results
            |> Seq.map (Content_and_search_result_rendering.search_result
                          ~doc_id:(Document.doc_id document)
                          ~render_mode:(UI_base.render_mode_of_document document)
                          ~width
                       )
            |> List.of_seq
          in
          let pane =
            images
            |> List.map (fun img ->
                Nottui.Ui.atom Notty.I.(img <-> strf "")
              )
            |> Nottui.Ui.vcat
          in
          let$ background = UI_base.full_term_sized_background in
          Nottui.Ui.join_z background pane
          |> Nottui.Ui.mouse_area
            (UI_base.mouse_handler
               ~f:(fun direction ->
                   let n = Lwd.peek index_of_search_result_selected in
                   let offset =
                     match direction with
                     | `Up -> -1
                     | `Down -> 1
                   in
                   UI_base.set_search_result_selected
                     ~choice_count:result_count
                     (n + offset)
                 )
            )
        )
    end

    module Link_list = struct
      let main
          ~height
          ~width
          ~(search_result_group : Session.search_result_group)
          ~(index_of_link_selected : int Lwd.var)
        : Nottui.ui Lwd.t =
        let (document, _search_results) = search_result_group in
        let links = Document.links document in
        let link_selected = Lwd.peek index_of_link_selected in
        let link_count = Array.length links in
        if link_count = 0 then (
          Lwd.return Nottui.Ui.empty
        ) else (
          let start = max 0 (link_selected - height / 2) in
          let end_exc = min link_count (start + height) in
          let render ~width s =
            s
            |> Tokenization.tokenize ~drop_spaces:false
            |> List.of_seq
            |> Content_and_search_result_rendering.Text_block_rendering.of_words
              ~width
          in
          let pane =
            Misc_utils.array_sub_seq ~start ~end_exc
              links
            |> Seq.map (fun link -> link.Link.link)
            |> List.of_seq
            |> Content_and_search_result_rendering.centered_list
              ~height
              ~width
              ~render
              (link_selected - start)
            |> Nottui.Ui.atom
          in
          let$ background = UI_base.full_term_sized_background in
          Nottui.Ui.join_z background pane
          |> Nottui.Ui.mouse_area
            (UI_base.mouse_handler
               ~f:(fun direction ->
                   let n = Lwd.peek index_of_link_selected in
                   let offset =
                     match direction with
                     | `Up -> -1
                     | `Down -> 1
                   in
                   UI_base.set_link_selected
                     ~choice_count:link_count
                     (n + offset)
                 )
            )
        )
    end

    let main
        ~width
        ~height
        ~(search_result_groups : Session.search_result_group array)
        ~(document_selected : int)
        ~show_bottom_right_pane
      : Nottui.ui Lwd.t =
      if Array.length search_result_groups = 0 then (
        let blank ~height =
          let _ = height in
          Nottui_widgets.empty_lwd
        in
        UI_base.vpane ~width ~height
          blank blank
      ) else (
        let$* input_mode = Lwd.get UI_base.Vars.input_mode in
        let$* search_result_selected = Lwd.get UI_base.Vars.index_of_search_result_selected in
        let$* link_selected = Lwd.get UI_base.Vars.index_of_link_selected in
        let search_result_group = search_result_groups.(document_selected) in
        if show_bottom_right_pane then (
          UI_base.vpane ~width ~height
            (UI_base.Content_view.main
               ~input_mode
               ~width
               ~search_result_group
               ~search_result_selected
               ~link_selected)
            (match input_mode with
             | Links -> (
                 Link_list.main
                   ~width
                   ~search_result_group
                   ~index_of_link_selected:UI_base.Vars.index_of_link_selected
               )
             | _ -> (
                 Search_result_list.main
                   ~width
                   ~search_result_group
                   ~index_of_search_result_selected:UI_base.Vars.index_of_search_result_selected
               ))
        ) else (
          UI_base.Content_view.main
            ~input_mode
            ~width
            ~height
            ~search_result_group
            ~search_result_selected
            ~link_selected
        )
      )
  end

  let item_list
      ~width
      ~height
      ~selected
      ?highlights
      items
    : Nottui.ui Lwd.t =
    let arr = Dynarray.create () in
    Seq.iter (fun i ->
        Dynarray.add_last arr (Dynarray.get items i)
      ) OSeq.(selected --^ Dynarray.length items);
    Dynarray.to_seq arr
    |> Seq.mapi (fun i s ->
        let highlights =
          Option.map (fun highlights ->
              Dynarray.get highlights (selected + i)
            )
            highlights
        in
        s
        |> Tokenization.tokenize ~drop_spaces:false
        |> List.of_seq
        |> Content_and_search_result_rendering.Text_block_rendering.of_words
          ~width
          ?highlights
        |> Nottui.Ui.atom
      )
    |> List.of_seq
    |> Nottui.Ui.vcat
    |> Nottui.Ui.resize ~w:width ~h:height
    |> Lwd.return

  let main
      ~width
      ~height
      ~documents_marked
      ~screen_split
      ~show_bottom_right_pane
      ~(search_result_groups : Session.search_result_group array)
    : Nottui.ui Lwd.t =
    let$* input_mode = Lwd.get UI_base.Vars.input_mode in
    let$* script_selected = Lwd.get UI_base.Vars.index_of_script_selected in
    let$* usable_scripts, usable_script_highlights =
      Vars.usable_script_files
    in
    let file =
      if script_selected < Dynarray.length usable_scripts then (
        Some (Dynarray.get usable_scripts script_selected)
      ) else (
        None
      )
    in
    let$* selected = Lwd.get UI_base.Vars.index_of_script_selected in
    match input_mode with
    | Save_script -> (
        item_list
          ~width
          ~height
          ~selected
          usable_scripts
      )
    | Scripts | Delete_script_confirm (_, _) -> (
        let lines =
          try
            match file with
            | None -> []
            | Some file -> (
                let dir = Params.script_dir () in
                CCIO.with_in (Filename.concat dir file) (fun ic ->
                    CCIO.read_lines_seq ic
                    |> OSeq.take height
                    |> List.of_seq
                  )
              )
          with
          | Sys_error _ -> []
        in
        UI_base.hpane ~l_ratio:0.5 ~width ~height
          (item_list
             ~height
             ~selected
             ?highlights:usable_script_highlights
             usable_scripts)
          (fun ~width ->
             List.map (fun s -> Nottui.Ui.atom (Notty.I.strf "%s" s)) lines
             |> Nottui.Ui.vcat
             |> Nottui.Ui.resize ~w:width ~h:height
             |> Lwd.return
          )
      )
    | _ -> (
        let$* document_selected = Lwd.get UI_base.Vars.index_of_document_selected in
        let l_ratio =
          match screen_split with
          | `Even -> 0.50
          | `Focus_left -> 1.0
          | `Wide_left -> 0.618
          | `Focus_right -> 0.0
          | `Wide_right -> 1.0 -. 0.618
        in
        UI_base.hpane ~l_ratio ~width ~height
          (Document_list.main
             ~height
             ~documents_marked
             ~search_result_groups
             ~document_selected)
          (Right_pane.main
             ~height
             ~search_result_groups
             ~document_selected
             ~show_bottom_right_pane)
      )
end

module Bottom_pane = struct
  let status_bar
      ~width
      ~(search_result_groups : Session.search_result_group array)
      ~(input_mode : UI_base.input_mode)
    : Nottui.Ui.t Lwd.t =
    let open Notty.Infix in
    let input_mode_image =
      UI_base.Input_mode_map.find input_mode UI_base.Status_bar.input_mode_images
    in
    let attr = UI_base.Status_bar.attr in
    let$* usable_scripts, _usable_script_highlights = Vars.usable_script_files in
    let$* script_selected = Lwd.get UI_base.Vars.index_of_script_selected in
    let usable_script_count = Dynarray.length usable_scripts in
    let selected_script_file_and_path =
      if usable_script_count > 0 then (
        let dir = Params.script_dir () in
        let file = Dynarray.get usable_scripts script_selected in
        Some (file, Filename.concat dir file)
      ) else (
        None
      )
    in
    match input_mode with
    | Save_script | Scripts -> (
        let text_field = Vars.script_name_field in
        let prompt =
          match input_mode with
          | Save_script -> "Save as"
          | Scripts -> "Filter"
          | _ -> failwith "unexpected case"
        in
        let on_tab =
          match input_mode with
          | Save_script -> (
              Some (fun (text, _) ->
                  let best_fit =
                    let usable_script_count = Dynarray.length usable_scripts in
                    if usable_script_count = 0 then (
                      text
                    ) else if usable_script_count = 1 then (
                      Filename.chop_extension (Dynarray.get usable_scripts 0)
                    ) else (
                      usable_scripts
                      |> Dynarray.to_seq
                      |> String_utils.longest_common_prefix
                    )
                  in
                  Lwd.set text_field (best_fit, String.length best_fit)
                )
            )
          | _ -> None
        in
        let on_submit =
          match input_mode with
          | Save_script -> (
              (fun (text, _x) ->
                 Lwd.set text_field UI_base.empty_text_field;
                 Nottui.Focus.release Vars.script_name_field_focus_handle;
                 Lwd.set UI_base.Vars.input_mode
                   (if String.length text = 0 then
                      Save_script_no_name
                    else
                      Save_script_overwrite text
                   );
              )
            )
          | Scripts -> (
              (fun (_text, _x) ->
                 Option.iter (fun (_file, path) ->
                     Lwd.set text_field UI_base.empty_text_field;
                     Nottui.Focus.release Vars.script_name_field_focus_handle;
                     Lwd.set UI_base.Vars.quit true;
                     UI_base.Vars.action := Some (Open_script path);
                     Lwd.set UI_base.Vars.input_mode Navigate;
                   ) selected_script_file_and_path;
              )
            )
          | _ -> failwith "unexpected case"
        in
        let on_up_down =
          match input_mode with
          | Save_script -> None
          | Scripts -> (
              Some (fun up_down _ ->
                  UI_base.set_script_selected
                    ~choice_count:usable_script_count
                    (script_selected +
                     (match up_down with
                      | `Up -> (-1)
                      | `Down -> 1)
                    ))
            )
          | _ -> failwith "unexpected case"
        in
        let on_ctrl_prefixed =
          match input_mode with
          | Scripts -> (
              Some (fun key (_text, _x) ->
                  match key with
                  | (`ASCII 'X', [`Ctrl]) -> (
                      Option.iter (fun (file, path) ->
                          Lwd.set UI_base.Vars.input_mode (Delete_script_confirm (file, path))
                        ) selected_script_file_and_path;
                      `Handled
                    )
                  | _ -> (
                      `Unhandled
                    )
                )
            )
          | _ -> None
        in
        let$* content =
          Nottui_widgets.hbox
            [
              Lwd.return
                (Nottui.Ui.atom
                   (Notty.I.hcat
                      [
                        input_mode_image;
                        UI_base.Status_bar.element_spacer;
                        Notty.I.strf ~attr "%s: [ " prompt;
                      ]));
              UI_base.edit_field text_field
                ~focus:Vars.script_name_field_focus_handle
                ~on_cancel:(fun (_, _) ->
                    Lwd.set UI_base.Vars.input_mode Navigate
                  )
                ~on_change:(fun (text, x) ->
                    Lwd.set text_field (text, x);
                  )
                ~on_submit
                ?on_tab
                ?on_up_down
                ?on_ctrl_prefixed;
              Lwd.return (Nottui.Ui.atom (Notty.I.strf ~attr " ] + %s" Params.docfd_script_ext));
            ]
        in
        let$ bar = UI_base.Status_bar.background_bar in
        Nottui.Ui.join_z bar content
      )
    | Save_script_overwrite script_name -> (
        let path = compute_save_script_path script_name in
        if Sys.file_exists path then (
          let$* content =
            Lwd.return
              (Nottui.Ui.atom
                 (Notty.I.hcat
                    [
                      input_mode_image;
                      UI_base.Status_bar.element_spacer;
                      Notty.I.strf ~attr "%s already exists, overwrite?"
                        (Filename.basename path);
                    ]))
          in
          let$ bar = UI_base.Status_bar.background_bar in
          Nottui.Ui.join_z bar content
        ) else (
          save_script ~path;
          Lwd.set UI_base.Vars.input_mode (Save_script_edit script_name);
          UI_base.Status_bar.background_bar
        )
      )
    | Save_script_no_name -> (
        let$* content =
          Lwd.return
            (Nottui.Ui.atom
               (Notty.I.hcat
                  [
                    input_mode_image;
                    UI_base.Status_bar.element_spacer;
                    Notty.I.strf ~attr "No name entered, saving skipped";
                  ]))
        in
        let$ bar = UI_base.Status_bar.background_bar in
        Nottui.Ui.join_z bar content
      )
    | Save_script_edit script_name -> (
        let path = compute_save_script_path script_name in
        let$* content =
          Lwd.return
            (Nottui.Ui.atom
               (Notty.I.hcat
                  [
                    input_mode_image;
                    UI_base.Status_bar.element_spacer;
                    Notty.I.strf ~attr "Do you want to edit %s to add comments etc?" (Filename.basename path);
                  ]))
        in
        let$ bar = UI_base.Status_bar.background_bar in
        Nottui.Ui.join_z bar content
      )
    | Delete_script_confirm (script, _) -> (
        let$* content =
          Lwd.return
            (Nottui.Ui.atom
               (Notty.I.hcat
                  [
                    input_mode_image;
                    UI_base.Status_bar.element_spacer;
                    Notty.I.strf ~attr "Confirm deletion of %s?" script;
                  ]))
        in
        let$ bar = UI_base.Status_bar.background_bar in
        Nottui.Ui.join_z bar content
      )
    | Path_fuzzy_rank -> (
        let text_field = Vars.path_fuzzy_rank_field in
        let document_count = Array.length search_result_groups in
        let$* document_current_choice = Lwd.get UI_base.Vars.index_of_document_selected in
        let$* content =
          Nottui_widgets.hbox
            [
              Lwd.return
                (Nottui.Ui.atom
                   (Notty.I.hcat
                      [
                        input_mode_image;
                        UI_base.Status_bar.element_spacer;
                      ]));
              UI_base.edit_field text_field
                ~focus:Vars.path_fuzzy_rank_field_focus_handle
                ~on_cancel:(fun (_, _) ->
                    Lwd.set UI_base.Vars.input_mode Navigate
                  )
                ~on_change:(fun (text, x) ->
                    Lwd.set text_field (text, x);
                    update_path_fuzzy_rank ~commit:false ();
                  )
                ~on_submit:(fun (text, x) ->
                    Lwd.set text_field (text, x);
                    update_path_fuzzy_rank ~commit:true ();
                    Lwd.set UI_base.Vars.input_mode Navigate
                  )
                ~on_up_down:(fun ud _ ->
                    let offset =
                      match ud with
                      | `Up -> -1
                      | `Down -> 1
                    in
                    UI_base.set_document_selected
                      ~choice_count:document_count
                      (document_current_choice + offset);
                  )
              ;
            ]
        in
        let$ bar = UI_base.Status_bar.background_bar in
        Nottui.Ui.join_z bar content
      )
    | _ -> (
        let$* index_of_document_selected = Lwd.get UI_base.Vars.index_of_document_selected in
        let document_count = Array.length search_result_groups in
        let$* (_cur_ver, snapshot) = Session_manager.cur_snapshot in
        let content =
          let file_shown_count =
            Notty.I.strf ~attr
              "%5d/%d documents listed"
              document_count
              (Session.Snapshot.state snapshot
               |> Session.State.size)
          in
          let hint =
            Notty.I.strf ~attr "< and > to see more key binding info, ? to toggle hide"
          in
          let hint_len = Notty.I.width hint in
          let hint_overlay =
            Notty.I.void
              (width - hint_len) 1
            <|>
            hint
          in
          let core =
            if document_count = 0 then (
              [
                UI_base.Status_bar.element_spacer;
                file_shown_count;
              ]
            ) else (
              let index_of_selected =
                Notty.I.strf ~attr
                  "Index of document selected: %d"
                  index_of_document_selected
              in
              [
                file_shown_count;
                UI_base.Status_bar.element_spacer;
                index_of_selected;
              ]
            )
          in
          Notty.I.zcat
            [
              Notty.I.hcat
                (input_mode_image
                 ::
                 UI_base.Status_bar.element_spacer
                 ::
                 core);
              hint_overlay;
            ]
          |> Nottui.Ui.atom
        in
        let$ bar = UI_base.Status_bar.background_bar in
        Nottui.Ui.join_z bar content
      )

  module Key_binding_info = struct
    let grid_contents : UI_base.Key_binding_info.grid_contents =
      let open UI_base.Key_binding_info in
      let empty_row =
        [
          { label = ""; msg = "" };
        ]
      in
      let navigate_grid =
        [
          [
            { label = "Enter"; msg = "open document" };
            { label = "/"; msg = "SEARCH" };
            { label = "↑/↓/j/k"; msg = "select document" };
            { label = "s"; msg = "SORT-ASC" };
            { label = "Tab"; msg = "expand right pane" };
            { label = "y"; msg = "COPY" };
            { label = "n"; msg = "NARROW" };
            { label = "Space"; msg = "toggle mark" };
            { label = "h"; msg = "command history" };
            { label = "Ctrl+S"; msg = "save session as script" };
          ];
          [
            { label = "v"; msg = "focus content" };
            { label = "f"; msg = "FILTER" };
            { label = "Shift+↑/↓/j/k"; msg = "select search result" };
            { label = "Shift+S"; msg = "SORT-DESC" };
            { label = "Shift+Tab"; msg = "expand left pane" };
            { label = "Shift+Y"; msg = "COPY-PATHS" };
            { label = "d"; msg = "DROP" };
            { label = "m"; msg = "MARK" };
            { label = ""; msg = "" };
            { label = "Ctrl+O"; msg = "SCRIPTS" };
          ];
          [
            { label = "Ctrl+C"; msg = "exit" };
            { label = "x"; msg = "CLEAR" };
            { label = "-/="; msg = "scroll content view" };
            { label = "l"; msg = "LINKS" };
            { label = ""; msg = "" };
            { label = ""; msg = "" };
            { label = "r"; msg = "RELOAD" };
            { label = "Shift+M"; msg = "UNMARK" };
            { label = ""; msg = "" };
            { label = ""; msg = "" };
          ];
        ]
      in
      let search_grid =
        [
          [
            { label = "Enter"; msg = "exit SEARCH" };
          ];
        ]
      in
      let filter_grid =
        [
          [
            { label = "Enter"; msg = "exit FILTER" };
            { label = "Tab"; msg = "autocomplete" };
          ];
        ]
      in
      let save_script_grid =
        [
          [
            { label = "Enter"; msg = "confirm answer" };
            { label = "Tab"; msg = "autocomplete" };
            { label = "Esc"; msg = "cancel" };
          ];
          empty_row;
          empty_row;
        ]
      in
      let save_script_confirm_grid =
        [
          [
            { label = "y"; msg = "confirm overwrite" };
            { label = "Esc/n"; msg = "cancel" };
          ];
          empty_row;
          empty_row;
        ]
      in
      let save_script_cancel_grid =
        [
          [
            { label = "Enter"; msg = "confirm" };
          ];
          empty_row;
          empty_row;
        ]
      in
      let save_script_edit_grid =
        [
          [
            { label = "y"; msg = "open in editor" };
            { label = "Esc/n"; msg = "skip" };
          ];
          empty_row;
          empty_row;
        ]
      in
      let scripts_grid =
        [
          [
            { label = "Enter"; msg = "open" };
            { label = "↑/↓"; msg = "select" };
            { label = "Ctrl+X"; msg = "delete" };
            { label = "Esc"; msg = "cancel" };
          ];
          empty_row;
          empty_row;
        ]
      in
      let delete_script_confirm_grid =
        [
          [
            { label = "y"; msg = "confirm deletion" };
            { label = "Esc/n"; msg = "cancel" };
          ];
          empty_row;
          empty_row;
        ]
      in
      let clear_grid =
        [
          [
            { label = "/"; msg = "search field" };
            { label = "f"; msg = "filter field" };
            { label = "h"; msg = "command history" };
          ];
          [
            { label = "Esc"; msg = "cancel" };
          ];
          empty_row;
        ]
      in
      let sort_asc_grid =
        [
          [
            { label = "s"; msg = "score" };
            { label = "p"; msg = "path" };
            { label = "d"; msg = "path date" };
            { label = "m"; msg = "mod time" };
          ];
          [
            { label = "Esc"; msg = "cancel" };
          ];
          empty_row;
        ]
      in
      let sort_desc_grid =
        [
          [
            { label = "s"; msg = "score" };
            { label = "p"; msg = "path" };
            { label = "d"; msg = "path date" };
            { label = "m"; msg = "mod time" };
          ];
          [
            { label = "Esc"; msg = "cancel" };
          ];
          empty_row;
        ]
      in
      let drop_grid =
        [
          [
            { label = "d"; msg = "selected" };
            { label = "l"; msg = "listed" };
            { label = "m"; msg = "marked" };
          ];
          [
            { label = "Shift+D"; msg = "unselected" };
            { label = "Shift+L"; msg = "unlisted" };
            { label = "Shift+M"; msg = "unmarked" };
          ];
          [
            { label = "Esc"; msg = "cancel" };
          ];
        ]
      in
      let mark_grid =
        [
          [
            { label = "l"; msg = "listed" };
          ];
          empty_row;
          [
            { label = "Esc"; msg = "cancel" };
          ];
        ]
      in
      let unmark_grid =
        [
          [
            { label = "l"; msg = "listed" };
            { label = "a"; msg = "all" };
          ];
          empty_row;
          [
            { label = "Esc"; msg = "cancel" };
          ];
        ]
      in
      let copy_grid =
        [
          [
            { label = "y"; msg = "selected search result" };
            { label = "m"; msg = "results of marked documents" };
            { label = "l"; msg = "results of listed documents" };
          ];
          [
            { label = "a"; msg = "results of selected document" };
          ];
          [
            { label = "Esc"; msg = "cancel" };
          ];
        ]
      in
      let copy_paths_grid =
        [
          [
            { label = "y"; msg = "path of selected document" };
            { label = "m"; msg = "paths of marked documents" };
            { label = "l"; msg = "paths of listed documents" };
          ];
          [
            { label = ""; msg = "" };
            { label = "Shift+M"; msg = "paths of unmarked documents" };
            { label = "Shift+L"; msg = "paths of unlisted documents" };
          ];
          [
            { label = "Esc"; msg = "cancel" };
          ];
        ]
      in
      let narrow_grid =
        [
          [
            { label = "0-9"; msg = "narrow search scope to level N" };
          ];
          [
            { label = "Esc"; msg = "cancel" };
          ];
          empty_row;
        ]
      in
      let reload_grid =
        [
          [
            { label = "r"; msg = "selected" };
            { label = "a"; msg = "all" };
          ];
          [
            { label = "Esc"; msg = "cancel" };
          ];
          empty_row;
        ]
      in
      let links_grid =
        [
          [
            { label = "Enter"; msg = "open" };
            { label = "o"; msg = "open and remain in LINKS" };
            { label = "↑/↓/j/k"; msg = "select" };
            { label = "y"; msg = "copy" };
          ];
          [
            { label = "Esc"; msg = "exit" };
          ];
          empty_row;
        ]
      in
      let path_fuzzy_rank_grid =
        [
          [
            { label = "Enter"; msg = "pin to top" };
            { label = "↑/↓"; msg = "select" };
          ];
          [
            { label = "Esc"; msg = "exit" };
          ];
          empty_row;
        ]
      in
      [
        (Navigate, navigate_grid);
        (Search, search_grid);
        (Filter, filter_grid);
        (Clear, clear_grid);
        (Sort `Asc, sort_asc_grid);
        (Sort `Desc, sort_desc_grid);
        (Drop, drop_grid);
        (Mark, mark_grid);
        (Unmark, unmark_grid);
        (Narrow, narrow_grid);
        (Copy, copy_grid);
        (Copy_paths, copy_paths_grid);
        (Reload, reload_grid);
        (Save_script, save_script_grid);
        (Save_script_overwrite "", save_script_confirm_grid);
        (Save_script_no_name, save_script_cancel_grid);
        (Save_script_edit "", save_script_edit_grid);
        (Scripts, scripts_grid);
        (Delete_script_confirm ("", ""), delete_script_confirm_grid);
        (Links, links_grid);
        (Path_fuzzy_rank, path_fuzzy_rank_grid);
      ]

    let grid_lookup = UI_base.Key_binding_info.make_grid_lookup grid_contents

    let main ~input_mode ~show_key_binding_info_pane =
      if show_key_binding_info_pane then (
        UI_base.Key_binding_info.main ~grid_lookup ~input_mode
      ) else (
        Lwd.return (Nottui.Ui.atom (Notty.I.void 0 0))
      )
  end

  let autocomplete_grid ~input_mode ~width =
    match input_mode with
    | UI_base.Filter -> (
        let$* l = Lwd.get UI_base.Vars.autocomplete_choices in
        let max_len =
          List.fold_left (fun n x ->
              max n (String.length x)
            ) 0 l
        in
        let cell_len = max_len + 4 in
        let cells_per_row = width / cell_len in
        l
        |> CCList.chunks cells_per_row
        |> (fun rows ->
            let row_count = List.length rows in
            let padding =
              if row_count < 2 then (
                CCList.(0 --^ (2 - row_count))
                |> List.map (fun _ -> [ "" ])
              ) else (
                []
              )
            in
            rows @ padding
          )
        |> List.map (fun row ->
            List.map (fun s ->
                let full_background = Notty.I.void cell_len 1 in
                Notty.I.(strf "%s" s </> full_background)
                |> Nottui.Ui.atom
                |> Lwd.return
              ) row
          )
        |> Nottui_widgets.grid
          ~pad:(Nottui.Gravity.make ~h:`Negative ~v:`Negative)
      )
    | _ -> Lwd.return (Nottui.Ui.atom (Notty.I.void 0 0))

  let filter_bar =
    UI_base.Filter_bar.main
      ~text_field:UI_base.Vars.filter_field
      ~focus_handle:UI_base.Vars.filter_field_focus_handle
      ~on_change:(update_filter ~commit:false)
      ~on_submit:(update_filter ~commit:true)

  let search_bar ~input_mode =
    UI_base.Search_bar.main ~input_mode
      ~text_field:UI_base.Vars.search_field
      ~focus_handle:UI_base.Vars.search_field_focus_handle
      ~on_change:(update_search ~commit:false)
      ~on_submit:(update_search ~commit:true)

  let main ~width ~show_key_binding_info_pane ~search_result_groups =
    let$* input_mode = Lwd.get UI_base.Vars.input_mode in
    Nottui_widgets.vbox
      [
        status_bar ~width ~search_result_groups ~input_mode;
        Key_binding_info.main ~input_mode ~show_key_binding_info_pane;
        autocomplete_grid ~input_mode ~width;
        filter_bar ~input_mode;
        search_bar ~input_mode;
      ]
end

let keyboard_handler
    ~(session_state : Session.State.t)
    ~(search_result_groups : Session.search_result_group array)
    (key : Nottui.Ui.key)
  =
  let document_count =
    Array.length search_result_groups
  in
  let document_current_choice =
    Lwd.peek UI_base.Vars.index_of_document_selected
  in
  let search_result_group =
    if document_count = 0 then
      None
    else
      Some search_result_groups.(document_current_choice)
  in
  let search_result_choice_count =
    match search_result_group with
    | None -> 0
    | Some (_doc, search_results) -> Array.length search_results
  in
  let link_choice_count =
    match search_result_group with
    | None -> 0
    | Some (doc, _search_results) -> Array.length (Document.links doc)
  in
  let search_result_current_choice =
    Lwd.peek UI_base.Vars.index_of_search_result_selected
  in
  let link_current_choice =
    Lwd.peek UI_base.Vars.index_of_link_selected
  in
  match Lwd.peek UI_base.Vars.input_mode with
  | Navigate -> (
      match key with
      | (`ASCII 'C', [`Ctrl])
      | (`ASCII 'Q', [`Ctrl]) -> (
          Lwd.set UI_base.Vars.quit true;
          UI_base.Vars.action := None;
          `Handled
        )
      | (`ASCII '<', []) -> (
          UI_base.Key_binding_info.decr_rotation ();
          `Handled
        )
      | (`ASCII '>', []) -> (
          UI_base.Key_binding_info.incr_rotation ();
          `Handled
        )
      | (`ASCII ' ', []) -> (
          let index = Lwd.peek UI_base.Vars.index_of_document_selected in
          if index < Array.length search_result_groups then (
            let doc, _ = search_result_groups.(index) in
            toggle_mark ~path:(Document.path doc)
          );
          `Handled
        )
      | (`ASCII 'm', []) -> (
          UI_base.set_input_mode Mark;
          `Handled
        )
      | (`ASCII 'M', []) -> (
          UI_base.set_input_mode Unmark;
          `Handled
        )
      | (`ASCII 'd', []) -> (
          UI_base.set_input_mode Drop;
          `Handled
        )
      | (`ASCII 'n', []) -> (
          UI_base.set_input_mode Narrow;
          `Handled
        )
      | (`ASCII 'r', []) -> (
          UI_base.set_input_mode Reload;
          `Handled
        )
      | (`ASCII 'y', []) -> (
          UI_base.set_input_mode Copy;
          `Handled
        )
      | (`ASCII 'Y', []) -> (
          UI_base.set_input_mode Copy_paths;
          `Handled
        )
      | (`Arrow `Left, [])
      | (`ASCII 'u', [])
      | (`ASCII 'Z', [`Ctrl]) -> (
          Session_manager.shift_ver ~offset:(-1);
          `Handled
        )
      | (`Arrow `Right, [])
      | (`ASCII 'R', [`Ctrl])
      | (`ASCII 'Y', [`Ctrl]) -> (
          Session_manager.shift_ver ~offset:1;
          `Handled
        )
      | (`Tab, [])
      | (`Tab, [`Shift]) -> (
          let direction =
            match key with
            | (_, [`Shift]) -> `Expand_left
            | (_, _) -> `Expand_right
          in
          Session_manager.update_from_cur_snapshot
            (fun cur_snapshot ->
               let state = Session.Snapshot.state cur_snapshot in
               let cur = Session.State.screen_split state in
               let offset =
                 match direction with
                 | `Expand_left -> 1
                 | `Expand_right -> -1
               in
               let next =
                 Command.screen_split_of_int
                   (Command.int_of_screen_split cur + offset)
               in
               let command = `Split_screen next in
               state
               |> Session.run_command
                 (UI_base.task_pool ())
                 command
               |> Option.get
               |> (fun (command, state) ->
                   Session.Snapshot.make
                     ~last_command:(Some command)
                     state)
            );
          `Handled
        )
      | (`ASCII '?', [])
      | (`ASCII 'v', []) -> (
          let pane =
            match key with
            | (`ASCII '?', []) -> `Key_binding_info
            | (`ASCII 'v', []) -> `Bottom_right
            | _ -> failwith "unexpected case"
          in
          Session_manager.update_from_cur_snapshot
            (fun cur_snapshot ->
               let state = Session.Snapshot.state cur_snapshot in
               let cur = Session.State.show_pane state pane in
               let command =
                 if cur then (
                   `Hide_pane pane
                 ) else (
                   `Show_pane pane
                 ) in
               state
               |> Session.run_command
                 (UI_base.task_pool ())
                 command
               |> Option.get
               |> (fun (command, state) ->
                   Session.Snapshot.make
                     ~last_command:(Some command)
                     state)
            );
          `Handled
        )
      | (`ASCII '=', []) -> (
          UI_base.incr_content_view_offset ();
          `Handled
        )
      | (`ASCII '-', []) -> (
          UI_base.decr_content_view_offset ();
          `Handled
        )
      | (`Page `Down, [`Shift])
      | (`ASCII 'J', [])
      | (`Arrow `Down, [`Shift]) -> (
          UI_base.set_search_result_selected
            ~choice_count:search_result_choice_count
            (search_result_current_choice+1);
          `Handled
        )
      | (`Page `Up, [`Shift])
      | (`ASCII 'K', [])
      | (`Arrow `Up, [`Shift]) -> (
          UI_base.set_search_result_selected
            ~choice_count:search_result_choice_count
            (search_result_current_choice-1);
          `Handled
        )
      | (`Page `Down, [])
      | (`ASCII 'j', [])
      | (`Arrow `Down, []) -> (
          if document_count = 1 then (
            UI_base.set_search_result_selected
              ~choice_count:search_result_choice_count
              (search_result_current_choice+1);
            `Handled
          ) else (
            UI_base.set_document_selected
              ~choice_count:document_count
              (document_current_choice+1);
            `Handled
          )
        )
      | (`Page `Up, [])
      | (`ASCII 'k', [])
      | (`Arrow `Up, []) -> (
          if document_count = 1 then (
            UI_base.set_search_result_selected
              ~choice_count:search_result_choice_count
              (search_result_current_choice-1);
            `Handled
          ) else (
            UI_base.set_document_selected
              ~choice_count:document_count
              (document_current_choice-1);
            `Handled
          )
        )
      | (`ASCII 'g', []) -> (
          UI_base.set_document_selected
            ~choice_count:document_count
            0;
          `Handled
        )
      | (`ASCII 'G', []) -> (
          UI_base.set_document_selected
            ~choice_count:document_count
            (document_count - 1);
          `Handled
        )
      | (`ASCII 'f', []) -> (
          Nottui.Focus.request UI_base.Vars.filter_field_focus_handle;
          UI_base.set_input_mode Filter;
          `Handled
        )
      | (`ASCII 'F', []) -> (
          Nottui.Focus.request Vars.path_fuzzy_rank_field_focus_handle;
          Lwd.set Vars.path_fuzzy_rank_field UI_base.empty_text_field;
          UI_base.set_input_mode Path_fuzzy_rank;
          `Handled
        )
      | (`ASCII '/', []) -> (
          Nottui.Focus.request UI_base.Vars.search_field_focus_handle;
          UI_base.set_input_mode Search;
          `Handled
        )
      | (`ASCII 'h', []) -> (
          Lwd.set UI_base.Vars.quit true;
          UI_base.Vars.action := Some UI_base.Edit_command_history;
          `Handled
        )
      | (`ASCII 'S', [`Ctrl]) -> (
          UI_base.set_input_mode Save_script;
          refresh_script_files ();
          Nottui.Focus.request Vars.script_name_field_focus_handle;
          `Handled
        )
      | (`ASCII 'O', [`Ctrl]) -> (
          UI_base.set_input_mode Scripts;
          refresh_script_files ();
          Nottui.Focus.request Vars.script_name_field_focus_handle;
          `Handled
        )
      | (`ASCII 'x', []) -> (
          UI_base.set_input_mode Clear;
          `Handled
        )
      | (`ASCII 'l', []) -> (
          UI_base.set_input_mode Links;
          if search_result_choice_count > 0  then (
            let (doc, search_results) = Option.get search_result_group in
            let search_result = search_results.(search_result_current_choice) in
            let links = Document.links doc in
            let avg_pos =
              List.fold_left (fun min_max_pos search_result ->
                  let { Search_result.found_word_pos; _ } = search_result in
                  match min_max_pos with
                  | None -> Some (found_word_pos, found_word_pos)
                  | Some (min_pos, max_pos) -> (
                      Some (min found_word_pos min_pos,
                            max found_word_pos max_pos)
                    )
                )
                None
                (Search_result.found_phrase search_result)
              |> (fun x ->
                  let (x, y) = Option.get x in
                  (x + y) / 2)
            in
            let before, exact, after = Int_map.split avg_pos (Document.link_index_of_start_pos doc) in
            let index =
              match exact with
              | Some index -> Some index
              | None -> (
                  match
                    Int_map.max_binding_opt before,
                    Int_map.min_binding_opt after
                  with
                  | Some (pos_x, index_x), Some (pos_y, index_y) -> (
                      let diff_x = Int.to_float (Int.abs (pos_x - avg_pos)) in
                      let diff_y = Int.to_float (Int.abs (pos_y - avg_pos)) in
                      (* We prefer picking y (link after search result)
                         over x (link before search result), as it usually feels more
                         intuitive to jump forward than backward.

                         But if distance to x is <= 50% the distance
                         to y, then we resort to x.
                      *)
                      if diff_x /. diff_y <= 0.5 then (
                        Some index_x
                      ) else (
                        let link_x = links.(index_x) in
                        let end_inc_pos_x = link_x.Link.end_inc_pos in
                        if pos_x <= avg_pos && avg_pos <= end_inc_pos_x then (
                          Some index_x
                        ) else (
                          Some index_y
                        )
                      )
                    )
                  | Some (_pos, index), None
                  | None, Some (_pos, index) -> Some index
                  | None, None -> None
                )
            in
            match index with
            | None -> ()
            | Some index -> (
                UI_base.set_link_selected
                  ~choice_count:link_choice_count
                  index
              )
          );
          `Handled
        )
      | (`ASCII 's', []) -> (
          UI_base.set_input_mode (Sort `Asc);
          `Handled
        )
      | (`ASCII 'S', []) -> (
          UI_base.set_input_mode (Sort `Desc);
          `Handled
        )
      | (`Enter, []) -> (
          Option.iter (fun (doc, search_results) ->
              let search_result =
                if search_result_current_choice < Array.length search_results then
                  Some search_results.(search_result_current_choice)
                else
                  None
              in
              Lwd.set UI_base.Vars.quit true;
              UI_base.Vars.action :=
                Some (UI_base.Open_file_and_search_result (doc, search_result));
            )
            search_result_group;
          `Handled
        )
      | _ -> `Handled
    )
  | Sort order -> (
      let exit =
        match key with
        | (`Escape, []) -> true
        | (`ASCII 's', []) -> (
            sort (`Score, order);
            true
          )
        | (`ASCII 'p', []) -> (
            sort (`Path, order);
            true
          )
        | (`ASCII 'd', []) -> (
            sort (`Path_date, order);
            true
          )
        | (`ASCII 'm', []) -> (
            sort (`Mod_time, order);
            true
          )
        | _ -> false
      in
      if exit then (
        UI_base.set_input_mode Navigate;
      );
      `Handled
    )
  | Clear -> (
      let exit =
        match key with
        | (`Escape, []) -> true
        | (`ASCII '/', []) -> (
            Lwd.set UI_base.Vars.search_field UI_base.empty_text_field;
            update_search ~commit:true ();
            true
          )
        | (`ASCII 'f', []) -> (
            Lwd.set UI_base.Vars.filter_field UI_base.empty_text_field;
            update_filter ~commit:true ();
            true
          )
        | (`ASCII 'h', []) -> (
            Lwd.set UI_base.Vars.quit true;
            UI_base.Vars.action := Some UI_base.Clear_command_history;
            true
          )
        | _ -> false
      in
      if exit then (
        UI_base.set_input_mode Navigate;
      );
      `Handled
    )
  | Drop -> (
      let exit =
        match key with
        | (`Escape, []) -> true
        | (`ASCII '>', []) -> (
            UI_base.Key_binding_info.incr_rotation ();
            false
          )
        | (`ASCII 'd', []) -> (
            Option.iter (fun (doc, _search_results) ->
                drop ~document_count (`Path (Document.path doc))
              ) search_result_group;
            true
          )
        | (`ASCII 'D', []) -> (
            Option.iter (fun (doc, _search_results) ->
                drop ~document_count (`All_except (Document.path doc))
              ) search_result_group;
            true
          )
        | (`ASCII 'l', []) -> (
            drop ~document_count `Listed;
            true
          )
        | (`ASCII 'L', []) -> (
            drop ~document_count `Unlisted;
            true
          )
        | (`ASCII 'm', []) -> (
            drop ~document_count `Marked;
            true
          )
        | (`ASCII 'M', []) -> (
            drop ~document_count `Unmarked;
            true
          )
        | _ -> false
      in
      if exit then (
        UI_base.set_input_mode Navigate;
      );
      `Handled
    )
  | Mark -> (
      let exit =
        match key with
        | (`Escape, []) -> true
        | (`ASCII '>', []) -> (
            UI_base.Key_binding_info.incr_rotation ();
            false
          )
        | (`ASCII 'l', []) -> (
            mark `Listed;
            true
          )
        | _ -> false
      in
      if exit then (
        UI_base.set_input_mode Navigate;
      );
      `Handled
    )
  | Unmark -> (
      let exit =
        match key with
        | (`Escape, []) -> true
        | (`ASCII '>', []) -> (
            UI_base.Key_binding_info.incr_rotation ();
            false
          )
        | (`ASCII 'l', []) -> (
            unmark `Listed;
            true
          )
        | (`ASCII 'a', []) -> (
            unmark `All;
            true
          )
        | _ -> false
      in
      if exit then (
        UI_base.set_input_mode Navigate;
      );
      `Handled
    )
  | Narrow -> (
      let exit =
        match key with
        | (`Escape, []) -> true
        | (`ASCII '>', []) -> (
            UI_base.Key_binding_info.incr_rotation ();
            false
          )
        | (`ASCII c, []) -> (
            let code_0 = Char.code '0' in
            let code_9 = Char.code '9' in
            let code_c = Char.code c in
            if code_0 <= code_c && code_c <= code_9 then (
              let level = code_c - code_0 in
              narrow_search_scope_to_level ~level;
              true
            ) else (
              false
            )
          )
        | _ -> false
      in
      if exit then (
        UI_base.set_input_mode Navigate;
      );
      `Handled
    )
  | Copy -> (
      let copy_search_result_groups (s : Session.search_result_group Seq.t) =
        Clipboard.pipe_to_clipboard (fun oc ->
            Printers.search_result_groups
              ~color:false
              ~underline:true
              oc
              s
          )
      in
      let copy_search_result_group x =
        copy_search_result_groups (Seq.return x)
      in
      let exit =
        match key with
        | (`Escape, []) -> true
        | (`ASCII '>', []) -> (
            UI_base.Key_binding_info.incr_rotation ();
            false
          )
        | (`ASCII 'y', []) -> (
            Option.iter (fun (doc, search_results) ->
                copy_search_result_group
                  (doc,
                   (if search_result_current_choice < Array.length search_results then
                      [|search_results.(search_result_current_choice)|]
                    else
                      [||])
                  )
              )
              search_result_group;
            true
          )
        | (`ASCII 'a', []) -> (
            Option.iter
              copy_search_result_group
              search_result_group;
            true
          )
        | (`ASCII 'm', []) -> (
            let marked =
              Session.State.marked_document_paths session_state
            in
            search_result_groups
            |> Array.to_seq
            |> Seq.filter (fun (doc, _) ->
                String_set.mem (Document.path doc) marked)
            |> copy_search_result_groups;
            true
          )
        | (`ASCII 'l', []) -> (
            search_result_groups
            |> Array.to_seq
            |> copy_search_result_groups;
            true
          )
        | _ -> false
      in
      if exit then (
        UI_base.set_input_mode Navigate;
      );
      `Handled
    )
  | Copy_paths -> (
      let copy_paths s =
        Clipboard.pipe_to_clipboard (fun oc ->
            Seq.iter (Printers.path_image ~color:false oc) s
          )
      in
      let exit =
        match key with
        | (`Escape, []) -> true
        | (`ASCII '>', []) -> (
            UI_base.Key_binding_info.incr_rotation ();
            false
          )
        | (`ASCII 'y', []) -> (
            Option.iter (fun (doc, _search_results) ->
                copy_paths (Seq.return (Document.path doc))
              )
              search_result_group;
            true
          )
        | (`ASCII 'm', []) -> (
            String_set.inter
              (Session.State.usable_document_paths session_state)
              (Session.State.marked_document_paths session_state)
            |> String_set.to_seq
            |> copy_paths;
            true
          )
        | (`ASCII 'M', []) -> (
            String_set.diff
              (Session.State.usable_document_paths session_state)
              (Session.State.marked_document_paths session_state)
            |> String_set.to_seq
            |> copy_paths;
            true
          )
        | (`ASCII 'l', []) -> (
            Session.State.usable_document_paths session_state
            |> String_set.to_seq
            |> copy_paths;
            true
          )
        | (`ASCII 'L', []) -> (
            Session.State.unusable_document_paths session_state
            |> copy_paths;
            true
          )
        | _ -> false
      in
      if exit then (
        UI_base.set_input_mode Navigate;
      );
      `Handled
    )
  | Reload -> (
      let exit =
        (match key with
         | (`Escape, []) -> true
         | (`ASCII '>', []) -> (
             UI_base.Key_binding_info.incr_rotation ();
             false
           )
         | (`ASCII 'r', []) -> (
             reload_document_selected ~search_result_groups;
             true
           )
         | (`ASCII 'a', []) -> (
             UI_base.reset_document_selected ();
             Lwd.set UI_base.Vars.quit true;
             UI_base.Vars.action := Some UI_base.Recompute_document_src;
             true
           )
         | _ -> false
        );
      in
      if exit then (
        UI_base.set_input_mode Navigate;
      );
      `Handled
    )
  | Links -> (
      let doc_and_link =
        if link_choice_count > 0 then (
          Option.map (fun (doc, _search_results) ->
              (doc, (Document.links doc).(link_current_choice))
            ) search_result_group
        ) else (
          None
        )
      in
      let set_action_to_open_link () =
        Option.iter (fun (doc, link) ->
            Lwd.set UI_base.Vars.quit true;
            UI_base.Vars.action :=
              Some (UI_base.Open_link (doc, link))
          ) doc_and_link
      in
      let exit =
        (match key with
         | (`Escape, []) -> true
         | (`Enter, []) -> (
             set_action_to_open_link ();
             true
           )
         | (`ASCII 'o', []) -> (
             set_action_to_open_link ();
             false
           )
         | (`Page `Down, [])
         | (`ASCII 'j', [])
         | (`Arrow `Down, []) -> (
             UI_base.set_link_selected
               ~choice_count:link_choice_count
               (link_current_choice+1);
             false
           )
         | (`Page `Up, [])
         | (`ASCII 'k', [])
         | (`Arrow `Up, []) -> (
             UI_base.set_link_selected
               ~choice_count:link_choice_count
               (link_current_choice-1);
             false
           )
         | (`ASCII 'y', []) -> (
             Option.iter (fun (_doc, link) ->
                 Clipboard.pipe_to_clipboard (fun oc ->
                     output_string oc link.Link.link
                   );
               ) doc_and_link;
             true
           )
         | _ -> false
        );
      in
      if exit then (
        UI_base.set_input_mode Navigate;
      );
      `Handled
    )
  | Save_script_overwrite script_name -> (
      (match key with
       | (`Escape, [])
       | (`ASCII 'n', []) -> (
           UI_base.set_input_mode Navigate;
         )
       | (`ASCII 'y', []) -> (
           let path = compute_save_script_path script_name in
           save_script ~path;
           UI_base.set_input_mode (Save_script_edit script_name);
         )
       | _ -> ()
      );
      `Handled
    )
  | Save_script_no_name -> (
      let exit =
        (match key with
         | (`Enter, []) -> true
         | _ -> false
        );
      in
      if exit then (
        UI_base.set_input_mode Navigate;
      );
      `Handled
    )
  | Save_script_edit script_name -> (
      let exit =
        (match key with
         | (`Escape, [])
         | (`ASCII 'n', []) -> true
         | (`ASCII 'y', []) -> (
             let path = compute_save_script_path script_name in
             Lwd.set UI_base.Vars.quit true;
             UI_base.Vars.action := Some (UI_base.Edit_script path);
             true
           )
         | _ -> false
        );
      in
      if exit then (
        UI_base.set_input_mode Navigate;
      );
      `Handled
    )
  | Delete_script_confirm (_script, path) -> (
      (match key with
       | (`Escape, [])
       | (`ASCII 'n', []) -> (
           UI_base.set_input_mode Scripts;
         )
       | (`ASCII 'y', []) -> (
           Sys.remove path;
           refresh_script_files ();
           UI_base.set_input_mode Scripts;
         )
       | _ -> ()
      );
      `Handled
    )
  | _ -> `Unhandled

let main : Nottui.ui Lwd.t =
  let$* (_, snapshot) =
    Session_manager.cur_snapshot
  in
  let session_state =
    Session.Snapshot.state snapshot
  in
  let search_result_groups =
    Session.State.search_result_groups session_state
  in
  let document_count = Array.length search_result_groups in
  UI_base.set_document_selected
    ~choice_count:document_count
    (Lwd.peek UI_base.Vars.index_of_document_selected);
  if document_count > 0 then (
    UI_base.set_search_result_selected
      ~choice_count:(Array.length
                       (snd search_result_groups.(Lwd.peek UI_base.Vars.index_of_document_selected)))
      (Lwd.peek UI_base.Vars.index_of_search_result_selected)
  );
  if document_count > 0 then (
    UI_base.set_link_selected
      ~choice_count:(search_result_groups.(Lwd.peek UI_base.Vars.index_of_document_selected)
                     |> fst
                     |> Document.links
                     |> Array.length)
      (Lwd.peek UI_base.Vars.index_of_link_selected)
  );
  let$* (term_width, term_height) = Lwd.get UI_base.Vars.term_width_height in
  let show_key_binding_info_pane = Session.State.show_pane session_state `Key_binding_info in
  let$* bottom_pane =
    Bottom_pane.main
      ~width:term_width
      ~search_result_groups
      ~show_key_binding_info_pane
  in
  let bottom_pane_height = Nottui.Ui.layout_height bottom_pane in
  let top_pane_height = term_height - bottom_pane_height in
  let screen_split = Session.State.screen_split session_state in
  let show_bottom_right_pane = Session.State.show_pane session_state `Bottom_right in
  let$* top_pane =
    Top_pane.main
      ~width:term_width
      ~height:top_pane_height
      ~documents_marked:(Session.State.marked_document_paths session_state)
      ~screen_split
      ~show_bottom_right_pane
      ~search_result_groups
  in
  Nottui_widgets.vbox
    [
      Lwd.return (Nottui.Ui.keyboard_area
                    (keyboard_handler ~session_state ~search_result_groups)
                    top_pane);
      Lwd.return bottom_pane;
    ]


================================================
FILE: bin/UI_base.ml
================================================
open Docfd_lib
open Lwd_infix

type input_mode =
  | Navigate
  | Search
  | Filter
  | Clear
  | Sort of [ `Asc | `Desc ]
  | Drop
  | Mark
  | Unmark
  | Narrow
  | Copy
  | Copy_paths
  | Reload
  | Save_script
  | Save_script_overwrite of string
  | Save_script_no_name
  | Save_script_edit of string
  | Scripts
  | Delete_script_confirm of string * string
  | Links
  | Path_fuzzy_rank
[@@deriving ord]

module Input_mode_map = Map.Make (struct
    type t = input_mode

    let compare x y =
      match x, y with
      | Save_script_overwrite _, Save_script_overwrite _ -> 0
      | Save_script_edit _, Save_script_edit _ -> 0
      | Delete_script_confirm _, Delete_script_confirm _ -> 0
      | _, _ -> compare_input_mode x y
  end)

type top_level_action =
  | Recompute_document_src
  | Open_file_and_search_result of Document.t * Search_result.t option
  | Open_link of (Document.t * Link.t)
  | Clear_command_history
  | Edit_command_history
  | Open_script of string
  | Edit_script of string

type search_status = [
  | `Idle
  | `Searching
  | `Parse_error
]

type filter_status = [
  | `Idle
  | `Filtering
  | `Parse_error
]

let empty_text_field = ("", 0)

let render_mode_of_document (doc : Document.t)
  : Content_and_search_result_rendering.render_mode =
  match File_utils.format_of_file (Document.path doc) with
  | `PDF -> `Page_num_only
  | `Pandoc_supported_format -> `None
  | `Text | `Other -> `Line_num_only

module Vars = struct
  let quit = Lwd.var false

  let pool : Task_pool.t option Atomic.t = Atomic.make None

  let action : top_level_action option ref = ref None

  let eio_env : Eio_unix.Stdenv.base option ref = ref None

  let hide_document_list : bool Lwd.var = Lwd.var false

  let input_mode : input_mode Lwd.var = Lwd.var Navigate

  let document_src : Document_src.t ref = ref (Document_src.(Files empty_file_collection))

  let term : Notty_unix.Term.t option ref = ref None

  let term_width_height : (int * int) Lwd.var = Lwd.var (0, 0)

  let content_view_offset = Lwd.var 0

  let autocomplete_choices = Lwd.var []

  let filter_field = Lwd.var empty_text_field

  let filter_field_focus_handle = Nottui.Focus.make ()

  let search_field = Lwd.var empty_text_field

  let search_field_focus_handle = Nottui.Focus.make ()

  let search_ui_status : search_status Lwd.var = Lwd.var `Idle

  let filter_ui_status : filter_status Lwd.var = Lwd.var `Idle

  let index_of_document_selected = Lwd.var 0

  let index_of_search_result_selected = Lwd.var 0

  let index_of_link_selected = Lwd.var 0

  let index_of_script_selected = Lwd.var 0
end

let reset_content_view_offset () =
  Lwd.set Vars.content_view_offset 0

let decr_content_view_offset () =
  let x = Lwd.peek Vars.content_view_offset in
  Lwd.set Vars.content_view_offset (x - 1)

let incr_content_view_offset () =
  let x = Lwd.peek Vars.content_view_offset in
  Lwd.set Vars.content_view_offset (x + 1)

let reset_document_selected () =
  reset_content_view_offset ();
  Lwd.set Vars.index_of_document_selected 0;
  Lwd.set Vars.index_of_search_result_selected 0;
  Lwd.set Vars.index_of_link_selected 0

let set_document_selected ~choice_count n =
  let n = Misc_utils.bound_selection ~choice_count n in
  let old = Lwd.peek Vars.index_of_document_selected in
  if old <> n then (
    reset_content_view_offset ();
    Lwd.set Vars.index_of_document_selected n;
    Lwd.set Vars.index_of_search_result_selected 0;
    Lwd.set Vars.index_of_link_selected 0;
  )

let set_search_result_selected ~choice_count n =
  let old = Lwd.peek Vars.index_of_search_result_selected in
  if old <> n then (
    reset_content_view_offset ();
    let n = Misc_utils.bound_selection ~choice_count n in
    Lwd.set Vars.index_of_search_result_selected n
  )

let set_link_selected ~choice_count n =
  let old = Lwd.peek Vars.index_of_link_selected in
  if old <> n then (
    reset_content_view_offset ();
    let n = Misc_utils.bound_selection ~choice_count n in
    Lwd.set Vars.index_of_link_selected n
  )

let set_script_selected ~choice_count n =
  let old = Lwd.peek Vars.index_of_script_selected in
  if old <> n then (
    let n = Misc_utils.bound_selection ~choice_count n in
    Lwd.set Vars.index_of_script_selected n
  )

let task_pool () =
  Option.get (Atomic.get Vars.pool)

let eio_env () =
  Option.get !Vars.eio_env

let term () =
  Option.get !Vars.term

let full_term_sized_background =
  let$ (term_width, term_height) = Lwd.get Vars.term_width_height in
  Notty.I.void term_width term_height
  |> Nottui.Ui.atom

let vbar ~height =
  let uc = Uchar.of_int 0x2502 in
  Notty.I.uchar Notty.A.(fg white) uc 1 height
  |> Nottui.Ui.atom

let hbar ~width =
  let uc = Uchar.of_int 0x2015 in
  Notty.I.uchar Notty.A.(fg white) uc width 1
  |> Nottui.Ui.atom

let hpane
    ~l_ratio
    ~width
    ~height
    (x : width:int -> Nottui.ui Lwd.t)
    (y : width:int -> Nottui.ui Lwd.t)
  : Nottui.ui Lwd.t =
  let l_width =
    (* Minus 1 for pane separator bar. *)
    Int.to_float width *. l_ratio
    |> Float.floor
    |> Int.of_float
    |> (fun x ->
        if x = 0 || x = width then (
          x
        ) else (
          x - 1
        ))
  in
  let r_width =
    (* Minus 1 here too just to be conservative. *)
    width - l_width - 1
  in
  let crop w x = Nottui.Ui.resize ~w ~h:height x in
  let x () =
    let$ x = x ~width:l_width in
    crop l_width x
  in
  let y () =
    let$ y = y ~width:r_width in
    crop r_width y
  in
  if l_width = 0 then (
    y ()
  ) else if r_width = 0 then (
    x ()
  ) else (
    let$* x = x () in
    let$ y = y () in
    Nottui.Ui.hcat [
      x;
      vbar ~height;
      y;
    ]
  )

let vpane
    ~width
    ~height
    (x : height:int -> Nottui.ui Lwd.t)
    (y : height:int -> Nottui.ui Lwd.t)
  : Nottui.ui Lwd.t =
  let t_height =
    (Misc_utils.div_round_up height 2)
  in
  let b_height =
    (* Minus 1 for pane separator bar. *)
    (height / 2) - 1
  in
  let$* x = x ~height:t_height in
  let$ y = y ~height:b_height in
  let crop h x = Nottui.Ui.resize ~w:width ~h x in
  Nottui.Ui.vcat [
    crop t_height x;
    hbar ~width;
    crop b_height y;
  ]

let (mini, maxi, clampi) = Lwd_utils.(mini, maxi, clampi)

(* Modified from upstream Nottui source code. *)
let edit_field
    ~focus
    ~on_change
    ~on_submit
    ~on_cancel
    ?(on_tab : ((string * int) -> unit) option)
    ?(on_up_down : ([ `Up | `Down ] -> (string * int) -> unit) option)
    ?(on_ctrl_prefixed : (Nottui.Ui.key -> (string * int) -> [ `Handled | `Unhandled ]) option)
    state
  =
  let update _focus_h focus (text, pos) =
    let pos = clampi pos ~min:0 ~max:(String.length text) in
    let content =
      Nottui.Ui.atom @@ Notty.I.hcat @@
      if Nottui.Focus.has_focus focus then (
        let attr = Notty.A.(bg lightblue) in
        let len = String.length text in
        (if pos >= len
         then [Notty.I.string attr text]
         else [Notty.I.string attr (String.sub text 0 pos)])
        @
        (if pos < String.length text then
           [Notty.I.string Notty.A.(bg lightred) (String.sub text pos 1);
            Notty.I.string attr (String.sub text (pos + 1) (len - pos - 1))]
         else [Notty.I.string Notty.A.(bg lightred) " "]);
      ) else
        [Notty.I.string Notty.A.(st underline) (if text = "" then " " else text)]
    in
    let handler = function
      | `Escape, [] -> (
          on_cancel (text, pos);
          `Handled
        )
      | `ASCII k, [] -> (
          let text =
            if pos < String.length text then (
              String.sub text 0 pos ^ String.make 1 k ^
              String.sub text pos (String.length text - pos)
            ) else (
              text ^ String.make 1 k
            )
          in
          on_change (text, (pos + 1));
          `Handled
        )
      | `Backspace, _ -> (
          let text =
            if pos > 0 then (
              if pos < String.length text then (
                String.sub text 0 (pos - 1) ^
                String.sub text pos (String.length text - pos)
              ) else if String.length text > 0 then (
                String.sub text 0 (String.length text - 1)
              ) else text
            ) else text
          in
          let pos = maxi 0 (pos - 1) in
          on_change (text, pos);
          `Handled
        )
      | `Enter, _ -> (
          on_submit (text, pos);
          `Handled
        )
      | `Arrow `Left, [] -> (
          let pos = mini (String.length text) pos in
          if pos > 0 then (
            on_change (text, pos - 1);
            `Handled
          )
          else `Unhandled
        )
      | `Arrow `Right, [] -> (
          let pos = pos + 1 in
          if pos <= String.length text
          then (on_change (text, pos); `Handled)
          else `Unhandled
        )
      | (`Arrow (`Up as ud), [])
      | (`Arrow (`Down as ud), []) -> (
          match on_up_down with
          | None -> `Unhandled
          | Some on_up_down -> (
              on_up_down ud (text, pos);
              `Handled
            )
        )
      | (`Tab, []) -> (
          match on_tab with
          | None -> `Unhandled
          | Some on_tab -> (
              on_tab (text, pos);
              `Handled
            )
        )
      | (_, [`Ctrl]) as x -> (
          match on_ctrl_prefixed with
          | None -> `Unhandled
          | Some f -> (
              f x (text, pos)
            )
        )
      | _ -> `Unhandled
    in
    Nottui.Ui.keyboard_area ~focus handler content
  in
  let state = Lwd.get state in
  let node =
    Lwd.map2 ~f:(update focus) (Nottui.Focus.status focus) state
  in
  let mouse_grab (text, pos) ~x ~y:_ = function
    | `Left ->
      if x <> pos then on_change (text, x);
      Nottui.Focus.request focus;
      `Handled
    | _ -> `Unhandled
  in
  Lwd.map2 state node ~f:(fun state content ->
      Nottui.Ui.mouse_area (mouse_grab state) content
    )

let mouse_handler
    ~(f : [ `Up | `Down ] -> unit)
    ~x ~y
    (button : Notty.Unescape.button)
  =
  let _ = x in
  let _ = y in
  match button with
  | `Scroll `Down -> (
      f `Down;
      `Handled
    )
  | `Scroll `Up -> (
      f `Up;
      `Handled
    )
  | _ -> `Unhandled

module Content_view = struct
  let main
      ~(input_mode : input_mode)
      ~height
      ~width
      ~(search_result_group : Document.t * Search_result.t array)
      ~(search_result_selected : int)
      ~(link_selected : int)
    : Nottui.ui Lwd.t =
    let (document, search_results) = search_result_group in
    let links = Document.links document in
    let data =
      let search_result_count = Array.length search_results in
      let link_count = Array.length links in
      match input_mode with
      | Links -> (
          if link_count = 0 then (
            None
          ) else (
            Some (`Link links.(link_selected))
          )
        )
      | _ -> (
          if search_result_count = 0 then (
            None
          ) else (
            Some (`Search_result search_results.(search_result_selected))
          )
        )
    in
    let$* _ = Lwd.get Vars.content_view_offset in
    let content =
      Content_and_search_result_rendering.content_snippet
        ~doc_id:(Document.doc_id document)
        ~view_offset:Vars.content_view_offset
        ?data
        ~height
        ~width
        ()
    in
    let$* background = full_term_sized_background in
    Nottui.Ui.join_z background (Nottui.Ui.atom content)
    |> Nottui.Ui.mouse_area
      (mouse_handler
         ~f:(fun direction ->
             match direction with
             | `Up -> decr_content_view_offset ()
             | `Down -> incr_content_view_offset ()
           )
      )
    |> Lwd.return
end

module Status_bar = struct
  let fg_color = Notty.A.black

  let bg_color = Notty.A.white

  let attr = Notty.A.(bg bg_color ++ fg fg_color)

  let background_bar : Nottui.Ui.t Lwd.t =
    let$ (term_width, _term_height) = Lwd.get Vars.term_width_height in
    Notty.I.char Notty.A.(bg bg_color) ' ' term_width 1
    |> Nottui.Ui.atom

  let element_spacing = 4

  let element_spacer =
    Notty.(I.string
             A.(bg bg_color ++ fg fg_color))
      (String.make element_spacing ' ')

  let input_mode_images =
    let l =
      [ (Navigate, "NAVIGATE")
      ; (Search, "SEARCH")
      ; (Filter, "FILTER")
      ; (Clear, "CLEAR")
      ; (Sort `Asc, "SORT-ASC")
      ; (Sort `Desc, "SORT-DESC")
      ; (Drop, "DROP")
      ; (Mark, "MARK")
      ; (Unmark, "UNMARK")
      ; (Narrow, "NARROW")
      ; (Copy, "COPY")
      ; (Copy_paths, "COPY-PATHS")
      ; (Reload, "RELOAD")
      ; (Save_script, "SAVE-SCRIPT")
      ; (Save_script_overwrite "", "SAVE-SCRIPT")
      ; (Save_script_no_name, "SAVE-SCRIPT")
      ; (Save_script_edit "", "SAVE-SCRIPT")
      ; (Scripts, "SCRIPTS")
      ; (Delete_script_confirm ("", ""), "DELETE-SCRIPT")
      ; (Links, "LINKS")
      ; (Path_fuzzy_rank, "PATH-FUZZY-RANK")
      ]
    in
    let max_input_mode_string_len =
      List.fold_left (fun acc (_, s) ->
          max acc (String.length s)
        )
        0
        l
    in
    let input_mode_string_background =
      Notty.I.char Notty.A.(bg bg_color) ' ' max_input_mode_string_len 1
    in
    List.fold_left (fun m (mode, s) ->
        let s = Notty.(I.string A.(bg bg_color ++ fg fg_color ++ st bold) s) in
        Input_mode_map.add mode Notty.I.(s </> input_mode_string_background) m
      )
      Input_mode_map.empty
      l
end

module Key_binding_info = struct
  let rotation : int Lwd.var = Lwd.var 0

  let incr_rotation () =
    Lwd.set rotation (Lwd.peek rotation + 1)

  let decr_rotation () =
    Lwd.set rotation (Lwd.peek rotation - 1)

  let reset_rotation () =
    Lwd.set rotation 0

  type labelled_msg = {
    label : string;
    msg : string;
  }

  type labelled_msg_line = labelled_msg list

  type grid_contents = (input_mode * (labelled_msg_line list)) list

  type grid_lookup = Nottui.ui Lwd.t Input_mode_map.t

  let grid_lights : (string, Mtime.t ref * bool Lwd.var list) Hashtbl.t = Hashtbl.create 100

  let lock = Eio.Mutex.create ()

  let grid_light_on_req : string Eio.Stream.t = Eio.Stream.create 100

  let grid_light_off_req : (Mtime.t * Mtime.t * string) Eio.Stream.t = Eio.Stream.create 100

  let blink label =
    Eio.Stream.add grid_light_on_req label

  let grid_light_fiber () =
    let clock = Eio.Stdenv.mono_clock (eio_env ()) in
    Eio.Fiber.both
      (fun () ->
         while true do
           let label = Eio.Stream.take grid_light_on_req in
           let ts_now = Eio.Time.Mono.now clock in
           Eio.Mutex.use_rw lock ~protect:false (fun () ->
               match Hashtbl.find_opt grid_lights label with
               | None -> failwith "unexpected case"
               | Some (ts, l) -> (
                   ts := ts_now;
                   List.iter (fun x -> Lwd.set x true) l;
                   Eio.Stream.add
                     grid_light_off_req
                     (ts_now, Option.get (Mtime.(add_span ts_now Params.blink_on_duration)), label);
                 )
             )
         done
      )
      (fun () ->
         while true do
           let ts_req_time, ts_target_time, label = Eio.Stream.take grid_light_off_req in
           Eio.Time.Mono.sleep_until clock ts_target_time;
           Eio.Mutex.use_rw lock ~protect:false (fun () ->
               match Hashtbl.find_opt grid_lights label with
               | None -> failwith "unexpected case"
               | Some (ts_last_update, l) -> (
                   if Mtime.equal !ts_last_update ts_req_time then (
                     List.iter (fun x -> Lwd.set x false) l;
                   )
                 )
             )
         done
      )

  let make_grid_lookup grid_contents : grid_lookup =
    let max_label_msg_len_lookup : (input_mode * (int * int) Int_map.t) list =
      grid_contents
      |> List.map (fun (grid_key, grid) ->
          let lookup =
            List.fold_left
              (fun (acc : (int * int) Int_map.t) (line : labelled_msg_line) ->
                 line
                 |> List.to_seq
                 |> Seq.fold_lefti
                   (fun (acc : (int * int) Int_map.t) col ({ label; msg } : labelled_msg) ->
                      let label_len =
                        Uuseg_string.fold_utf_8 `Grapheme_cluster (fun x _ -> x + 1) 0 label
                      in
                      let msg_len =
                        Uuseg_string.fold_utf_8 `Grapheme_cluster (fun x _ -> x + 1) 0 msg
                      in
                      let (max_label_len, max_msg_len) =
                        match Int_map.find_opt col acc with
                        | None -> (label_len, msg_len)
                        | Some (max_label_len, max_msg_len) -> (
                            (max max_label_len label_len,
                             max max_msg_len msg_len)
                          )
                      in
                      Int_map.add col (max_label_len, max_msg_len) acc
                   )
                   acc
              )
              Int_map.empty
              grid
          in
          (grid_key, lookup)
        )
    in
    let label_msg_pair grid_key col { label; msg } : Nottui.ui Lwd.t =
      let (max_label_len, max_msg_len) =
        List.assoc grid_key max_label_msg_len_lookup
        |> Int_map.find col
      in
      let light_on_var = Lwd.var false in
      Eio.Mutex.use_rw lock ~protect:false (fun () ->
          let x =
            match Hashtbl.find_opt grid_lights label with
            | None -> (ref Mtime.min_stamp, [ light_on_var ])
            | Some (x, l) -> (x, light_on_var :: l)
          in
          Hashtbl.replace grid_lights label x
        );
      let$ light_on = Lwd.get light_on_var in
      let label_attr =
        if light_on then
          Notty.A.(fg black ++ bg lightyellow ++ st bold)
        else
          Notty.A.(fg lightyellow ++ st bold)
      in
      let msg_attr = Notty.A.empty in
      let msg = String.capitalize_ascii msg in
      let content = Notty.(I.hcat
                             [ I.(string label_attr label
                                  </>
                                  (string label_attr (String.make max_label_len ' ')))
                             ; I.string A.empty " "
                             ; I.string msg_attr msg
                             ]
                          )
      in
      let full_background =
        Notty.I.void (max_label_len + 1 + max_msg_len + 3) 1
      in
      Notty.I.(content </> full_background)
      |> Nottui.Ui.atom
    in
    List.fold_left (fun m (grid_key, grid_contents) ->
        let max_row_size =
          List.fold_left (fun n l ->
              max n (List.length l)
            )
            0
            grid_contents
        in
        let grid_contents =
          grid_contents
          |> List.map (fun l ->
              let padding =
                List.init (max_row_size - List.length l)
                  (fun _ -> { label = ""; msg = "" })
              in
              List.mapi (fun col x ->
                  label_msg_pair grid_key col x
                )
                (l @ padding)
            )
        in
        let grid =
          let$* rotation = Lwd.get rotation in
          grid_contents
          |> List.map (fun l ->
              Misc_utils.rotate_list
                (((rotation mod max_row_size) + max_row_size)
                 mod
                 max_row_size
                )
                l
            )
          |> Nottui_widgets.grid
            ~pad:(Nottui.Gravity.make ~h:`Negative ~v:`Negative)
        in
        Input_mode_map.add grid_key grid m
      )
      Input_mode_map.empty
      grid_contents

  let main ~(grid_lookup : grid_lookup) ~(input_mode : input_mode) =
    Input_mode_map.find input_mode grid_lookup
end

let filter_bar_label_string = "Document filter"

let search_bar_label_string = "Content search"

let max_label_length =
  List.fold_left (fun acc s ->
      max acc (String.length s)
    )
    0
    [ filter_bar_label_string
    ; search_bar_label_string
    ]

let pad_label_string s =
  CCString.pad ~side:`Right ~c:' ' max_label_length s

let autocomplete ~choices (text, pos) : string * int =
  let left = String.sub text 0 pos in
  let right = String.sub text pos (String.length text - pos) in
  let grab_input_word (s : string) =
    let rec aux acc i s =
      if i < 0 then (
        CCString.of_list acc
      ) else (
        let c = s.[i] in
        if Parser_components.is_alphanum c
        || c = '-'
        || c = ':'
        then (
          aux (c :: acc) (i - 1) s
        ) else (
          aux acc (-1) s
        )
      )
    in
    aux [] (String.length s - 1) s
  in
  let current_input_word = grab_input_word left in
  let usable_choices =
    List.filter
      (CCString.prefix ~pre:current_input_word)
      choices
  in
  Lwd.set
    Vars.autocomplete_choices usable_choices;
  match usable_choices with
  | [] -> (text, pos)
  | _ -> (
      let best_fit = usable_choices
        |> List.to_seq
        |> String_utils.longest_common_prefix
      in
      let left =
        String.sub
          left
          0
          (String.length left - String.length current_input_word)
      in
      (String.concat "" [ left; best_fit; right ],
       pos + (String.length best_fit - String.length current_input_word))
    )

module Filter_bar = struct
  let label_string = pad_label_string filter_bar_label_string

  let label ~(input_mode : input_mode) =
    let attr =
      match input_mode with
      | Filter -> Notty.A.(st bold)
      | _ -> Notty.A.empty
    in
    Notty.I.string attr label_string
    |> Nottui.Ui.atom
    |> Lwd.return

  let status =
    let$* status = Lwd.get Vars.filter_ui_status in
    (match status with
     | `Idle -> (
         Notty.I.string Notty.A.(fg lightgreen)
           "  OK"
       )
     | `Filtering -> (
         Notty.I.string Notty.A.(fg lightyellow)
           " ..."
       )
     | `Parse_error -> (
         Notty.I.string Notty.A.(fg lightred)
           " ERR"
       )
    )
    |> Nottui.Ui.atom
    |> Lwd.return

  let autocomplete_choices =
    [ "path-date:"
    ; "path-fuzzy:"
    ; "path-glob:"
    ; "ext:"
    ; "content:"
    ; "mod-date:"
    ]

  let main
      ~input_mode
      ~(text_field : (string * int) Lwd.var)
      ~focus_handle
      ~on_change
      ~on_submit
    : Nottui.ui Lwd.t =
    Nottui_widgets.hbox
      [
        label ~input_mode;
        status;
        Lwd.return (Nottui.Ui.atom (Notty.I.strf ": "));
        edit_field text_field
          ~focus:focus_handle
          ~on_cancel:(fun (_text, _x) -> ())
          ~on_change:(fun (text, x) ->
              Lwd.set text_field (text, x);
              on_change ();
            )
          ~on_submit:(fun (text, x) ->
              Lwd.set text_field (text, x);
              on_submit ();
              Lwd.set Vars.autocomplete_choices [];
              Nottui.Focus.release focus_handle;
              Lwd.set Vars.input_mode Navigate
            )
          ~on_tab:(fun (text, pos) ->
              let (text, pos) =
                autocomplete ~choices:autocomplete_choices (text, pos)
              in
              Lwd.set text_field (text, pos)
            );
      ]
end

module Search_bar = struct
  let label_string = pad_label_string search_bar_label_string

  let label ~(input_mode : input_mode) =
    let attr =
      match input_mode with
      | Search -> Notty.A.(st bold)
      | _ -> Notty.A.empty
    in
    Notty.I.string attr label_string
    |> Nottui.Ui.atom
    |> Lwd.return

  let status =
    let$* status = Lwd.get Vars.search_ui_status in
    (match status with
     | `Idle -> (
         Notty.I.string Notty.A.(fg lightgreen)
           "  OK"
       )
     | `Searching -> (
         Notty.I.string Notty.A.(fg lightyellow)
           " ..."
       )
     | `Parse_error -> (
         Notty.I.string Notty.A.(fg lightred)
           " ERR"
       )
    )
    |> Nottui.Ui.atom
    |> Lwd.return

  let main
      ~input_mode
      ~(text_field : (string * int) Lwd.var)
      ~focus_handle
      ~on_change
      ~on_submit
    : Nottui.ui Lwd.t =
    Nottui_widgets.hbox
      [
        label ~input_mode;
        status;
        Lwd.return (Nottui.Ui.atom (Notty.I.strf ": "));
        edit_field text_field
          ~focus:focus_handle
          ~on_cancel:(fun (_text, _x) -> ())
          ~on_change:(fun (text, x) ->
              Lwd.set text_field (text, x);
              on_change ();
            )
          ~on_submit:(fun (text, x) ->
              Lwd.set text_field (text, x);
              on_submit ();
              Lwd.set Vars.autocomplete_choices [];
              Nottui.Focus.release focus_handle;
              Lwd.set Vars.input_mode Navigate
            )
          ~on_tab:(fun (_, _) -> ());
      ]
end

let term' : unit -> Notty_unix.Term.t = term

let ui_loop ~quit ~term root =
  let renderer = Nottui.Renderer.make () in
  let root =
    let$ root = root in
    root
    (* |> Nottui.Ui.event_filter (fun x ->
        match x with
        | `Key (`Escape, []) -> (
            Lwd.set quit true;
            `Handled
          )
        | _ -> `Unhandled
       ) *)
  in
  let rec loop () =
    if not (Lwd.peek quit) then (
      let (term_width, term_height) = Notty_unix.Term.size (term' ()) in
      let (prev_term_width, prev_term_height) = Lwd.peek Vars.term_width_height in
      if term_width <> prev_term_width || term_height <> prev_term_height then (
        Lwd.set Vars.term_width_height (term_width, term_height)
      );
      Nottui_unix.step
        ~process_event:true
        ~timeout:0.05
        ~renderer
        term
        (Lwd.observe @@ root);
      Eio.Fiber.yield ();
      loop ()
    )
  in
  loop ()

let set_input_mode mode =
  Lwd.set Vars.input_mode mode;
  Key_binding_info.reset_rotation ()


================================================
FILE: bin/args.ml
================================================
open Cmdliner
open Docfd_lib
open Misc_utils

let no_pdftotext_arg_name = "no-pdftotext"

let no_pdftotext_arg =
  let doc =
    Fmt.str {|Disable use of pdftotext command.
    Files that require use of pdftotext are excluded.
    |}
  in
  Arg.(value & flag & info [ no_pdftotext_arg_name ] ~doc)

let no_pandoc_arg_name = "no-pandoc"

let no_pandoc_arg =
  let doc =
    Fmt.str {|Disable use of pandoc command.
    Files that require use of pandoc are excluded.
    |}
  in
  Arg.(value & flag & info [ no_pandoc_arg_name ] ~doc)

let hidden_arg_name = "hidden"

let hidden_arg =
  let doc =
    Fmt.str {|Scan hidden files and directories.
By default, hidden files and directories are skipped.

A file or directory is hidden if the base name starts
with a dot, e.g. ".gitignore".
    |}
  in
  Arg.(value & flag & info [ hidden_arg_name ] ~doc)

let max_depth_arg_name = "max-depth"

let max_depth_arg =
  let doc =
    Fmt.str
      "Scan up to N levels when exploring file trees.
This applies to directory paths provided
and ** in globs.
Note that --%s 0 results in no-op when scanning
directories, and --%s 1 means only scanning for
direct children."
      max_depth_arg_name
      max_depth_arg_name
  in
  Arg.(
    value
    & opt int Params.default_max_file_tree_scan_depth
    & info [ max_depth_arg_name ] ~doc ~docv:"N"
  )

let exts_arg_name = "exts"

let exts_arg =
  let doc =
    "File extensions to use, comma separated. Leading dots of any extension are removed."
  in
  Arg.(
    value
    & opt string Params.default_recognized_exts
    & info [ exts_arg_name ] ~doc ~docv:"EXTS"
  )

let single_file_exts_arg_name = Fmt.str "single-line-%s" exts_arg_name

let single_line_exts_arg =
  let doc =
    Fmt.str "Same as --%s, but use single line search mode instead.
If an extension appears in both --%s and --%s,
then single line search mode is used for that extension."
      exts_arg_name
      exts_arg_name
      single_file_exts_arg_name
  in
  Arg.(
    value
    & opt string Params.default_recognized_single_line_exts
    & info [ single_file_exts_arg_name ] ~doc ~docv:"EXTS"
  )

let add_exts_arg_name = "add-exts"

let add_exts_arg =
  let doc =
    "Additional file extensions to use, comma separated.
May be specified multiple times."
  in
  Arg.(
    value
    & opt_all string []
    & info [ add_exts_arg_name ] ~doc ~docv:"EXTS"
  )

let single_line_add_exts_arg_name = Fmt.str "single-line-%s" add_exts_arg_name

let single_line_add_exts_arg =
  let doc =
    Fmt.str "Same as --%s, but use single line search mode instead." add_exts_arg_name
  in
  Arg.(
    value
    & opt_all string []
    & info [ single_line_add_exts_arg_name ] ~doc ~docv:"EXTS"
  )

let max_fuzzy_edit_dist_arg_name = "max-fuzzy-edit-dist"

let max_fuzzy_edit_dist_arg =
  let doc =
    "Maximum edit distance for fuzzy matches."
  in
  Arg.(
    value
    & opt int Params.default_max_fuzzy_edit_dist
    & info [ max_fuzzy_edit_dist_arg_name ] ~doc ~docv:"N"
  )

let max_token_search_dist_arg_name = "max-token-search-dist"

let max_token_search_dist_arg =
  let doc =
    "Maximum distance to look for the next matching token in document.
If two tokens are adjacent, then they are 1 distance away from each other.
Note that contiguous spaces count as one token as well."
  in
  Arg.(
    value
    & opt int Params.default_max_token_search_dist
    & info [ max_token_search_dist_arg_name ] ~doc ~docv:"N"
  )

let max_linked_token_search_dist_arg_name = "max-linked-token-search-dist"

let max_linked_token_search_dist_arg =
  let doc =
    Fmt.str
      {|Similar to %s but for linked tokens.
Two tokens are linked if there is no space between them in the search phrase,
e.g. "-" and ">" are linked in "->" but not in "- >",
"and" "/" "or" are linked in "and/or" but not in "and / or".|}
      max_token_search_dist_arg_name
  in
  Arg.(
    value
    & opt int Params.default_max_linked_token_search_dist
    & info [ max_linked_token_search_dist_arg_name ] ~doc ~docv:"N"
  )

let tokens_per_search_scope_level_arg_name = "tokens-per-search-scope-level"

let tokens_per_search_scope_level_arg =
  let doc =
    Fmt.str
      {|Number of tokens to use around the current search
results for each search scope level in narrow mode.|}
  in
  Arg.(
    value
    & opt int Params.default_tokens_per_search_scope_level
    & info [ tokens_per_search_scope_level_arg_name ] ~doc ~docv:"N"
  )

let index_chunk_size_arg_name = "index-chunk-size"

let index_chunk_size_arg =
  let doc =
    "Number of tokens to send as a job unit to the thread pool for indexing."
  in
  Arg.(
    value
    & opt int Params.default_index_chunk_size
    & info [ index_chunk_size_arg_name ] ~doc ~docv:"N"
  )

let cache_dir_arg =
  let doc =
    "Docfd cache directory, mainly for index DB."
  in
  let cache_home = Xdg_utils.cache_home in
  Arg.(
    value
    & opt string (Filename.concat cache_home "docfd")
    & info [ "cache-dir" ] ~doc ~docv:"DIR"
  )

let cache_limit_arg_name = "cache-limit"

let cache_limit_arg =
  let doc =
    "Maximum number of documents to keep in index.
Docfd resets the cache to this limit at launch."
  in
  Arg.(
    value
    & opt int Params.default_cache_limit
    & info [ cache_limit_arg_name ] ~doc ~docv:"N"
  )

let data_dir_arg =
  let doc =
    "Docfd data directory."
  in
  let data_home = Xdg_utils.data_home in
  Arg.(
    value
    & opt string (Filename.concat data_home "docfd")
    & info [ "data-dir" ] ~doc ~docv:"DIR"
  )

let index_only_arg =
  let doc =
    Fmt.str "Exit after indexing."
  in
  Arg.(value & flag & info [ "index-only" ] ~doc)

let debug_log_arg =
  let doc =
    Fmt.str "Specify debug log file to use and enable debug mode where
additional checks are enabled and additional info is displayed on UI.
If FILE is -, then debug log is printed to stderr instead.
Otherwise FILE is opened in append mode for log writing."
  in
  Arg.(
    value
    & opt (some string) None
    & info [ "debug-log" ] ~doc ~docv:"FILE"
  )

let start_with_filter_arg_name = "start-with-filter"

let start_with_filter_arg =
  let doc =
    Fmt.str "Start interactive mode with an initial filter using expression EXP."
  in
  Arg.(
    value
    & opt (some string) None
    & info [ start_with_filter_arg_name ] ~doc ~docv:"EXP"
  )

let start_with_search_arg_name = "start-with-search"

let start_with_search_arg =
  let doc =
    Fmt.str "Start interactive mode with search expression EXP."
  in
  Arg.(
    value
    & opt (some string) None
    & info [ start_with_search_arg_name ] ~doc ~docv:"EXP"
  )

let sample_arg_name = "sample"

let samples_per_doc_arg_name = "samples-per-doc"

let sample_arg =
  let doc =
    Fmt.str "Search with expression EXP in non-interactive mode but only
show top N results where N is controlled by --%s."
      samples_per_doc_arg_name
  in
  Arg.(
    value
    & opt (some string) None
    & info [ sample_arg_name ] ~doc ~docv:"EXP"
  )

let samples_per_doc_arg =
  let doc =
    Fmt.str
      "Number of search results to show per document when --%s is used
or when samples printing is triggered."
      sample_arg_name
  in
  Arg.(
    value
    & opt int Params.default_samples_per_document
    & info [ samples_per_doc_arg_name ] ~doc ~docv:"N"
  )

let search_arg_name = "search"

let search_arg =
  let doc =
    "Search with expression EXP in non-interactive mode and show all results."
  in
  Arg.(
    value
    & opt (some string) None
    & info [ search_arg_name ] ~doc ~docv:"EXP"
  )

let filter_arg_name = "filter"

let filter_arg =
  let doc =
    Fmt.str
      "Filter with expression EXP in non-interactive mode. May be combined with --%s or --%s."
      search_arg_name
      sample_arg_name
  in
  Arg.(
    value
    & opt (some string) None
    & info [ filter_arg_name ] ~doc ~docv:"EXP"
  )

let sort_arg_name = "sort"

let sort_arg =
  let doc =
    Fmt.str
      "Sort document by: TYPE,ORDER. TYPE is one of: path, path-date, score, mod-time. ORDER is one of: asc, desc."
  in
  Arg.(
    value
    & opt string Params.default_sort_by_arg
    & info [ sort_arg_name ] ~doc ~docv:"TYPE,ORDER"
  )

let style_mode_options = [ ("never", `Never); ("always", `Always); ("auto", `Auto) ]

let color_arg =
  let doc =
    Fmt.str
      "Set color mode for search result printing, one of: %s."
      (String.concat ", " (List.map fst style_mode_options))
  in
  Arg.(
    value
    & opt (Arg.enum style_mode_options) `Auto
    & info [ "color" ] ~doc ~docv:"MODE"
  )

let underline_arg =
  let doc =
    Fmt.str
      "Set underline mode for search result printing, one of: %s."
      (String.concat ", " (List.map fst style_mode_options))
  in
  Arg.(
    value
    & opt (Arg.enum style_mode_options) `Auto
    & info [ "underline" ] ~doc ~docv:"MODE"
  )

let search_result_print_text_width_arg_name = "search-result-print-text-width"

let search_result_print_text_width_arg =
  let doc =
    "Text width to use when printing search results."
  in
  Arg.(
    value
    & opt int Params.default_search_result_print_text_width
    & info [ search_result_print_text_width_arg_name ] ~doc ~docv:"N"
  )

let search_result_print_snippet_min_size_arg_name = "search-result-print-snippet-min-size"

let search_result_print_snippet_min_size_arg =
  let doc =
    "If the search result to be printed has fewer than N non-space tokens,
then Docfd tries to add surrounding lines to the snippet
to give better context."
  in
  Arg.(
    value
    & opt int Params.default_search_result_print_snippet_min_size
    & info [ search_result_print_snippet_min_size_arg_name ] ~doc ~docv:"N"
  )

let search_result_print_snippet_max_add_lines_arg_name = "search-result-print-snippet-max-add-lines"

let search_result_print_snippet_max_add_lines_arg =
  let doc =
    "This controls the maximum number of surrounding lines
Docfd can add in each direction."
  in
  Arg.(
    value
    & opt int Params.default_search_result_print_snippet_max_additional_lines_each_direction
    & info [ search_result_print_snippet_max_add_lines_arg_name ] ~doc ~docv:"N"
  )

let script_arg_name = "script"

let script_arg =
  let doc =
    Fmt.str "Read and run Docfd script FILE."
  in
  Arg.(
    value
    & opt (some string) None
    & info [ script_arg_name ] ~doc ~docv:"FILE"
  )

let start_with_script_arg_name = "start-with-script"

let start_with_script_arg =
  let doc =
    Fmt.str "Read and run Docfd script FILE, then continue in interactive mode."
  in
  Arg.(
    value
    & opt (some string) None
    & info [ start_with_script_arg_name ] ~doc ~docv:"FILE"
  )

let paths_from_arg_name = "paths-from"

let paths_from_arg =
  let doc =
    Fmt.str "Read list of paths from FILES,
which is a comma separated list of files,
and add to the final list of paths to be scanned.
For example, \"--%s path-list0.txt,path-list1.txt\".
If - is in FILES, then stdin is also read for
list of paths to be scanned. This is useful
for piping, e.g. \"find -name '*.txt' | docfd --%s -\""
      paths_from_arg_name
      paths_from_arg_name
  in
  Arg.(
    value
    & opt_all string []
    & info [ paths_from_arg_name ] ~doc ~docv:"FILES"
  )

let glob_arg_name = "glob"

let glob_arg =
  let doc =
    "Add to the final list of paths to be scanned using glob pattern.
The pattern should pick up the files directly.
Directories picked up by the pattern are not further scanned
for files with suitable extensions."
  in
  Arg.(
    value
    & opt_all string []
    & info [ glob_arg_name ] ~doc ~docv:"PATTERN"
  )

let single_line_glob_arg_name = Fmt.str "single-line-%s" glob_arg_name

let single_line_glob_arg =
  let doc =
    Fmt.str
      "Same as --%s, but use single line search mode instead.
If the file are picked up by both patterns from --%s and --%s,
then single line search mode is used."
      glob_arg_name
      glob_arg_name
      single_line_glob_arg_name
  in
  Arg.(
    value
    & opt_all string []
    & info [ single_line_glob_arg_name ] ~doc ~docv:"PATTERN"
  )

let single_line_arg =
  let doc =
    "Use single line search mode by default."
  in
  Arg.(
    value
    & flag
    & info [ "single-line" ] ~doc
  )

let open_with_arg_name = "open-with"

let open_with_arg =
  let doc =
    Fmt.str "Specify custom command CMD for
opening files with file extension EXT.
May be specified multiple times.
Leading dots of EXT are removed.
LAUNCH_MODE specifies how the command should be
executed:
`terminal` - for commands which Docfd
should run in the terminal and wait for completion,
e.g. text editors, pagers.
`detached` - for background
commands, such as PDF viewers or
other GUI tools.
CMD may contain the following placeholders:
{path} - file path,
{page_num} - page number (PDF only),
{line_num} - line number (not available in PDF),
{search_word} - most unique word of the page
(PDF only, useful for passing to PDF viewers as search term).
Examples: \"pdf:detached='okular --page {page_num} --find {search_word} {path}'\",
\"txt:terminal='nano +{line_num} {path}'\".
"
  in
  Arg.(
    value
    & opt_all string []
    & info [ open_with_arg_name ] ~doc ~docv:"EXT:LAUNCH_MODE=CMD"
  )

let files_with_match_arg_name = "files-with-match"

let files_with_match_arg =
  let doc =
    Fmt.str "If paired with
--%s or --%s,
then print the paths of documents with at least one match
instead of printing the search results.
If paired with --%s, then print paths of documents
that would have be listed in the UI
after running the commands in interactive mode."
      search_arg_name
      sample_arg_name
      script_arg_name
  in
  Arg.(
    value
    & flag
    & info [ "l"; files_with_match_arg_name ] ~doc
  )

let files_without_match_arg_name = "files-without-match"

let files_without_match_arg =
  let doc =
    Fmt.str "If paired with
--%s or --%s,
then print the paths of documents with no matches
instead of printing the search results.
Cannot be paired with --%s."
      search_arg_name
      sample_arg_name
      script_arg_name
  in
  Arg.(
    value
    & flag
    & info [ files_without_match_arg_name ] ~doc
  )

let sort_no_score_arg_name = "sort-no-score"

let sort_no_score_arg =
  let doc =
    Fmt.str
      "Same as --%s but sorting TYPE cannot be score. Used for scenarios when no scores are available, e.g. --%s is used."
      sort_no_score_arg_name
      files_without_match_arg_name
  in
  Arg.(
    value
    & opt string Params.default_sort_by_no_score_arg
    & info [ sort_no_score_arg_name ] ~doc ~docv:"TYPE,ORDER"
  )

let paths_arg =
  let doc =
    Fmt.str
      "PATH can be either file or directory.
Directories are scanned for files with matching extensions.
If no paths are provided,
then Docfd defaults to scanning the current working directory
unless any of the following is used: %a.
To use piped stdin as input, the list of paths must be empty."
      Fmt.(list ~sep:comma (fun fmt s -> Fmt.pf fmt "--%s" s))
      [ paths_from_arg_name; glob_arg_name; single_line_glob_arg_name ]
  in
  Arg.(value & pos_all string [] & info [] ~doc ~docv:"PATH")

let check
    ~max_depth
    ~max_fuzzy_edit_dist
    ~max_token_search_dist
    ~max_linked_token_search_dist
    ~tokens_per_search_scope_level
    ~index_chunk_size
    ~cache_limit
    ~start_with_filter
    ~start_with_search
    ~filter_exp
    ~sample_search_exp
    ~samples_per_doc
    ~search_exp
    ~search_result_print_text_width
    ~search_result_print_snippet_min_size
    ~search_result_print_max_add_lines
    ~start_with_script
    ~script
    ~paths_from
    ~print_files_with_match
    ~print_files_without_match
  =
  if max_depth < 0 then (
    exit_with_error_msg
      (Fmt.str "invalid %s: cannot be < 0" max_depth_arg_name)
  );
  if max_fuzzy_edit_dist < 0 then (
    exit_with_error_msg
      (Fmt.str "invalid %s: cannot be < 0" max_fuzzy_edit_dist_arg_name)
  );
  if max_token_search_dist < 1 then (
    exit_with_error_msg
      (Fmt.str "invalid %s: cannot be < 1" max_token_search_dist_arg_name)
  );
  if max_linked_token_search_dist < 1 then (
    exit_with_error_msg
      (Fmt.str "invalid %s: cannot be < 1" max_linked_token_search_dist_arg_name)
  );
  if tokens_per_search_scope_level < 1 then (
    exit_with_error_msg
      (Fmt.str "invalid %s: cannot be < 1" tokens_per_search_scope_level_arg_name)
  );
  if index_chunk_size < 1 then (
    exit_with_error_msg
      (Fmt.str "invalid %s: cannot be < 1" index_chunk_size_arg_name)
  );
  if cache_limit < 1 then (
    exit_with_error_msg
      (Fmt.str "invalid %s: cannot be < 1" cache_limit_arg_name)
  );
  if samples_per_doc < 1 then (
    exit_with_error_msg
      (Fmt.str "invalid %s: cannot be < 1" samples_per_doc_arg_name)
  );
  if search_result_print_text_width < 1 then (
    exit_with_error_msg
      (Fmt.str "invalid %s: cannot be < 1" search_result_print_text_width_arg_name)
  );
  if search_result_print_snippet_min_size < 0 then (
    exit_with_error_msg
      (Fmt.str "invalid %s: cannot be < 0" search_result_print_snippet_min_size_arg_name)
  );
  if search_result_print_max_add_lines < 0 then (
    exit_with_error_msg
      (Fmt.str "invalid %s: cannot be < 0" search_result_print_snippet_max_add_lines_arg_name)
  );
  if Option.is_some filter_exp then (
    if not (
        Option.is_some sample_search_exp
        ||
        Option.is_some search_exp
        ||
        print_files_with_match
        ||
        print_files_without_match
      )
    then (
      exit_with_error_msg
        (Fmt.str "--%s must be used with at least one of: --%s, --%s, --%s, --%s"
           filter_arg_name
           search_arg_name
           sample_arg_name
           files_with_match_arg_name
           files_without_match_arg_name
        )
    )
  );
  let cannot_be_used_together x y =
    exit_with_error_msg
      (Fmt.str "--%s and --%s cannot be used together" x y)
  in
  (match print_files_with_match, print_files_without_match with
   | true, true -> (
       cannot_be_used_together
         files_with_match_arg_name files_without_match_arg_name
     )
   | true, false -> (
       if not (
           Option.is_some filter_exp
           ||
           Option.is_some sample_search_exp
           ||
           Option.is_some search_exp
           ||
           Option.is_some script
         )
       then (
         exit_with_error_msg
           (Fmt.str "--%s cannot be used without one of: --%s, --%s, --%s, --%s"
              files_with_match_arg_name
              filter_arg_name
              sample_arg_name
              search_arg_name
              script_arg_name
           )
       )
     )
   | false, true -> (
       if not (
           Option.is_some filter_exp
           ||
           Option.is_some sample_search_exp
           ||
           Option.is_some search_exp
         )
       then (
         exit_with_error_msg
           (Fmt.str "--%s cannot be used without one of: --%s, --%s, --%s"
              files_without_match_arg_name
              filter_arg_name
              sample_arg_name
              search_arg_name
           )
       )
     )
   | false, false -> ()
  );
  (
    let l = List.filter (fun x -> x = "-") paths_from in
    if List.length l > 1 then (
      exit_with_error_msg
        (Fmt.str "at most one \"-\" may be supplied to --%s" paths_from_arg_name)
    )
  );
  (match filter_exp with
   | None -> ()
   | Some filter_exp_string -> (
       match
         Filter_exp.parse filter_exp_string
       with
       | None -> (
           exit_with_error_msg "failed to parse filter exp"
         )
       | Some _ -> ()
     )
  );
  (match sample_search_exp, search_exp with
   | None, None -> ()
   | Some _, Some _ -> (
       exit_with_error_msg
         (Fmt.str "%s and %s cannot be used together" sample_arg_name search_arg_name)
     )
   | Some search_exp_string, None
   | None, Some search_exp_string -> (
       match
         Search_exp.parse search_exp_string
       with
       | None -> (
           exit_with_error_msg "failed to parse search exp"
         )
       | Some _ -> ()
     )
  );
  let start_with_arg_check ~arg_name =
    if Option.is_some filter_exp then (
      cannot_be_used_together arg_name filter_arg_name
    );
    if Option.is_some sample_search_exp then (
      cannot_be_used_together arg_name sample_arg_name
    );
    if Option.is_some search_exp then (
      cannot_be_used_together arg_name search_arg_name
    );
  in
  if Option.is_some start_with_filter then (
    start_with_arg_check ~arg_name:start_with_filter_arg_name
  );
  if Option.is_some start_with_search then (
    start_with_arg_check ~arg_name:start_with_search_arg_name
  );
  if Option.is_some start_with_script then (
    start_with_arg_check ~arg_name:start_with_script_arg_name
  );
  let script_common_check ~arg_name =
    if Option.is_some filter_exp then (
      cannot_be_used_together arg_name filter_arg_name
    );
    if Option.is_some sample_search_exp then (
      cannot_be_used_together arg_name sample_arg_name
    );
    if Option.is_some search_exp then (
      cannot_be_used_together arg_name search_arg_name
    );
    if Option.is_some start_with_filter then (
      cannot_be_used_together arg_name start_with_filter_arg_name
    );
    if Option.is_some start_with_search then (
      cannot_be_used_together arg_name start_with_search_arg_name
    );
    if print_files_without_match then (
      cannot_be_used_together arg_name files_without_match_arg_name
    );
  in
  if Option.is_some script then (
    script_common_check ~arg_name:script_arg_name;
    if Option.is_some start_with_script then (
      cannot_be_used_together script_arg_name start_with_script_arg_name
    );
  );
  if Option.is_some start_with_script then (
    script_common_check ~arg_name:start_with_script_arg_name;
  )


================================================
FILE: bin/clipboard.ml
================================================
let pipe_to_clipboard (f : out_channel -> unit) : unit =
  match Params.clipboard_copy_cmd_and_args with
  | None -> ()
  | Some (cmd, args) -> (
      Proc_utils.pipe_to_command f
        cmd args
    )


================================================
FILE: bin/command.ml
================================================
open Docfd_lib

module Sort_by = struct
  type typ = [
    | `Path_date
    | `Path
    | `Score
    | `Mod_time
  ]

  type t = typ * Document.Compare.order

  let default : t = (`Score, `Desc)

  let default_no_score : t = (`Path, `Asc)

  let pp formatter ((typ, order) : t) =
    Fmt.pf formatter "%s,%s"
      (match typ with
       | `Path_date -> "path-date"
       | `Path -> "path"
       | `Score -> "score"
       | `Mod_time -> "mod-time"
      )
      (match order with
       | `Asc -> "asc"
       | `Desc -> "desc"
      )

  let p ~no_score : t Angstrom.t =
    let open Angstrom in
    let open Parser_components in
    skip_spaces *>
    (choice (List.filter_map
               Fun.id ([
                   Some (string "path-date" *> return `Path_date);
                   Some (string "path" *> return `Path);
                   (if no_score then
                      None
                    else
                      Some (string "score" *> return `Score));
                   Some (string "mod-time" *> return `Mod_time);
                 ]))
     <|>
     (take_while (fun c -> is_not_space c && c <> ',') >>=
      fun s -> fail (Fmt.str "unrecognized sort by type: %s" s))
    )
    >>= fun typ ->
    skip_spaces *>
    char ',' *> skip_spaces *>
    (choice [
        string "asc" *> return `Asc;
        string "desc" *> return `Desc;
      ]
     <|>
     (take_while is_not_space >>=
      fun s -> fail (Fmt.str "unrecognized sort by order: %s" s))
    )
    >>= fun order -> (
      return (typ, order)
    )

  let parse ~no_score s =
    match Angstrom.(parse_string ~consume:Consume.All) (p ~no_score) s with
    | Ok t -> Ok t
    | Error msg -> Error msg
end

type screen_split = [
  | `Even
  | `Focus_left
  | `Wide_left
  | `Focus_right
  | `Wide_right
]

let screen_split_of_int (x : int) : screen_split =
  if x <= 0 then
    `Focus_right
  else if x = 1 then
    `Wide_right
  else if x = 2 then
    `Even
  else if x = 3 then
    `Wide_left
  else
    `Focus_left

let int_of_screen_split (x : screen_split) =
  match x with
  | `Focus_right -> 0
  | `Wide_right -> 1
  | `Even -> 2
  | `Wide_left -> 3
  | `Focus_left -> 4

type pane = [
  | `Bottom_right
  | `Key_binding_info
]

let string_of_pane (x : pane) =
  match x with
  | `Bottom_right -> "bottom-right"
  | `Key_binding_info -> "key-binding-info"

type t = [
  | `Mark of string
  | `Mark_listed
  | `Unmark of string
  | `Unmark_listed
  | `Unmark_all
  | `Drop of string
  | `Drop_all_except of string
  | `Drop_marked
  | `Drop_unmarked
  | `Drop_listed
  | `Drop_unlisted
  | `Narrow_level of int
  | `Sort of Sort_by.t * Sort_by.t
  | `Path_fuzzy_rank of string * int String_map.t option
  | `Split_screen of screen_split
  | `Hide_pane of pane
  | `Show_pane of pane
  | `Comment of string
  | `Focus of string
  | `Search of string
  | `Filter of string
]

let pp fmt (t : t) =
  match t with
  | `Mark s -> Fmt.pf fmt "mark: %s" s
  | `Mark_listed -> Fmt.pf fmt "mark listed"
  | `Unmark s -> Fmt.pf fmt "unmark: %s" s
  | `Unmark_listed -> Fmt.pf fmt "unmark listed"
  | `Unmark_all -> Fmt.pf fmt "unmark all"
  | `Drop s -> Fmt.pf fmt "drop: %s" s
  | `Drop_all_except s -> Fmt.pf fmt "drop all except: %s" s
  | `Drop_marked -> Fmt.pf fmt "drop marked"
  | `Drop_unmarked -> Fmt.pf fmt "drop unmarked"
  | `Drop_listed -> Fmt.pf fmt "drop listed"
  | `Drop_unlisted -> Fmt.pf fmt "drop unlisted"
  | `Narrow_level x -> Fmt.pf fmt "narrow level: %d" x
  | `Sort (x, y) -> (
      Fmt.pf fmt "sort by: %a; %a"
        Sort_by.pp
        x
        Sort_by.pp
        y
    )
  | `Path_fuzzy_rank (s, _ranking) -> (
      Fmt.pf fmt "path fuzzy rank: %s" s
    )
  | `Split_screen s -> (
      Fmt.pf fmt "split screen: %s"
        (match s with
         | `Even -> "even"
         | `Focus_left -> "focus-left"
         | `Wide_left -> "wide-left"
         | `Focus_right -> "focus-right"
         | `Wide_right -> "wide-right"
        )
    )
  | `Hide_pane pane -> (
      Fmt.pf fmt "hide-pane: %s" (string_of_pane pane)
    )
  | `Show_pane pane -> (
      Fmt.pf fmt "show-pane: %s" (string_of_pane pane)
    )
  | `Comment s -> Fmt.pf fmt "#%s" s
  | `Focus s -> Fmt.pf fmt "focus: %s" s
  | `Search s -> (
      if String.length s = 0 then (
        Fmt.pf fmt "clear search"
      ) else (
        Fmt.pf fmt "search: %s" s
      )
    )
  | `Filter s -> (
      if String.length s = 0 then (
        Fmt.pf fmt "clear filter"
      ) else (
        Fmt.pf fmt "filter: %s" s
      )
    )

let to_string (t : t) =
  Fmt.str "%a" pp t

module Parsers = struct
  type t' = t

  open Angstrom
  open Parser_components

  let any_string_trimmed =
    any_string >>| String.trim

  let p : t' Angstrom.t =
    skip_spaces *>
    choice [
      string "mark" *> skip_spaces *> (
        choice [
          char ':' *> skip_spaces *>
          any_string_trimmed >>| (fun s -> (`Mark s));
          string "listed" *> skip_spaces *> return `Mark_listed;
        ]
      );
      string "unmark" *> skip_spaces *> (
        choice [
          string "listed" *> skip_spaces *> return `Unmark_listed;
          string "all" *> skip_spaces *> return `Unmark_all;
          char ':' *> skip_spaces *>
          any_string_trimmed >>| (fun s -> (`Unmark s));
        ]
      );
      string "drop" *> skip_spaces *> (
        choice [
          char ':' *> skip_spaces *>
          any_string_trimmed >>| (fun s -> (`Drop s));
          string "all" *> skip_spaces *>
          string "except" *> skip_spaces *> char ':' *> skip_spaces *>
          any_string_trimmed >>| (fun s -> (`Drop_all_except s));
          string "listed" *> skip_spaces *> return `Drop_listed;
          string "unlisted" *> skip_spaces *> return `Drop_unlisted;
          string "marked" *> skip_spaces *> return `Drop_marked;
          string "unmarked" *> skip_spaces *> return `Drop_unmarked;
        ]
      );
      string "narrow" *> skip_spaces *> (
        choice [
          string "level" *> skip_spaces *>
          char ':' *> skip_spaces *>
          satisfy (function '0'..'9' -> true | _ -> false) <* skip_spaces >>|
          (fun c -> `Narrow_level (Char.code c - Char.code '0'));
        ]
      );
      string "clear" *> skip_spaces *> (
        choice [
          string "search" *> skip_spaces *> return (`Search "");
          string "filter" *> skip_spaces *> return (`Filter "");
        ]
      );
      string "path" *> skip_spaces *>
      string "fuzzy" *> skip_spaces *>
      string "rank" *> skip_spaces *>
      char ':' *> skip_spaces *>
      any_string_trimmed >>|
      (fun s -> `Path_fuzzy_rank (s, None));
      (string "sort" *> skip_spaces *>
       string "by" *> skip_spaces *>
       char ':' *> skip_spaces *>
       Sort_by.p ~no_score:false >>= fun sort_by ->
       skip_spaces *>
       char ';' *>
       skip_spaces *>
       Sort_by.p ~no_score:true >>= fun sort_by_no_score ->
       skip_spaces *>
       return (`Sort (sort_by, sort_by_no_score)));
      string "focus" *> skip_spaces *>
      char ':' *> skip_spaces *>
      any_string_trimmed >>| (fun s -> (`Focus s));
      string "split" *> skip_spaces *>
      string "screen" *> skip_spaces *>
      char ':' *> skip_spaces *> (
        choice [
          string "even" *> skip_spaces *> return (`Split_screen `Even);
          string "focus-left" *> skip_spaces *> return (`Split_screen `Focus_left);
          string "wide-left" *> skip_spaces *> return (`Split_screen `Wide_left);
          string "focus-right" *> skip_spaces *> return (`Split_screen `Focus_right);
          string "wide-right" *> skip_spaces *> return (`Split_screen `Wide_right);
        ]
      );
      string "hide-pane" *> skip_spaces *>
      char ':' *> skip_spaces *> (
        choice [
          string "bottom-right" *> skip_spaces *> return (`Hide_pane `Bottom_right);
          string "key-binding-info" *> skip_spaces *> return (`Hide_pane `Key_binding_info);
        ]
      );
      string "show-pane" *> skip_spaces *>
      char ':' *> skip_spaces *> (
        choice [
          string "bottom-right" *> skip_spaces *> return (`Show_pane `Bottom_right);
          string "key-binding-info" *> skip_spaces *> return (`Show_pane `Key_binding_info);
        ]
      );
      string "#" *> any_string >>| (fun s -> (`Comment s));
      string "search" *> skip_spaces *>
      char ':' *> skip_spaces *>
      any_string_trimmed >>| (fun s -> (`Search s));
      string "filter" *> skip_spaces *>
      char ':' *> skip_spaces *>
      any_string_trimmed >>| (fun s -> (`Filter s));
    ]
end

let of_string (s : string) : t option =
  match Angstrom.(parse_string ~consume:Consume.All) Parsers.p s with
  | Ok t -> Some t
  | Error _ -> None


================================================
FILE: bin/content_and_search_result_rendering.ml
================================================
open Docfd_lib

module I = Notty.I

module A = Notty.A

type cell_typ = [
  | `Plain
  | `Search_result
]

type cell = {
  word : string;
  typ : cell_typ;
}

module Text_block_rendering = struct
  let hchunk_rev ~width (img : Notty.image) : Notty.image list =
    let open Notty in
    let rec aux acc img =
      let img_width = I.width img in
      if img_width <= width then (
        img :: acc
      ) else (
        let acc = (I.hcrop 0 (img_width - width) img) :: acc in
        aux acc (I.hcrop width 0 img)
      )
    in
    aux [] img

  let of_cells ?attr ~width ?(underline = false) (cells : cell list) : Notty.image * Int_set.t =
    let open Notty.Infix in
    assert (width > 0);
    let rendered_lines_with_search_result_words = ref Int_set.empty in
    let grid : Notty.image list list =
      List.fold_left
        (fun ((cur_len, acc) : int * Notty.image list list) (cell : cell) ->
           let attr =
             match attr with
             | Some attr -> attr
             | None -> (match cell.typ with
                 | `Plain -> A.empty
                 | `Search_result -> A.(fg black ++ bg lightyellow)
               )
           in
           let word =
             (match I.string attr cell.word with
              | s -> s
              | exception _ -> (
                  I.string A.(fg lightred) (String.make (String.length cell.word) '?')
                ))
           in
           let word_len = I.width word in
           let word =
             match cell.typ with
             | `Plain -> word
             | `Search_result -> (
                 if underline then (
                   word
                   <->
                   (I.string A.empty (String.make word_len '^'))
                 ) else (
                   word
                 )
               )
           in
           let new_len = cur_len + word_len in
           let cur_len, acc =
             if new_len <= width then (
               match acc with
               | [] -> (new_len, [ [ word ] ])
               | line :: rest -> (
                   (new_len, (word :: line) :: rest)
                 )
             ) else (
               if word_len <= width then (
                 (word_len, [ word ] :: acc)
               ) else (
                 let lines =
                   hchunk_rev ~width word
                   |> List.map (fun x -> [ x ])
                 in
                 (0, [] :: (lines @ acc))
               )
             )
           in
           (match cell.typ with
            | `Plain -> ()
            | `Search_result -> (
                rendered_lines_with_search_result_words :=
                  Int_set.add (List.length acc - 1) !rendered_lines_with_search_result_words
              ));
           (cur_len, acc)
        )
        (0, [])
        cells
      |> snd
      |> List.rev_map List.rev
    in
    let img =
      grid
      |> List.map I.hcat
      |> I.vcat
    in
    (img, !rendered_lines_with_search_result_words)

  let of_words ?attr ~width ?underline ?(highlights = Int_set.empty) (words : string list) : Notty.image =
    of_cells
      ?attr
      ~width
      ?underline
      (List.mapi
         (fun i word ->
            if Int_set.mem i highlights then (
              { word; typ = `Search_result }
            ) else (
              { word; typ = `Plain }
            )
         )
         words)
    |> fst
end

type word_grid = {
  start_global_line_num : int;
  data : cell array array;
}

let start_and_end_inc_global_line_num_of_search_result
    ~doc_id
    (search_result : Search_result.t)
  : (int * int) =
  match Search_result.found_phrase search_result with
  | [] -> failwith "unexpected case"
  | l -> (
      List.fold_left (fun s_e Search_result.{ found_word_pos; _ } ->
          let loc = Index.loc_of_pos ~doc_id found_word_pos in
          let line_loc = Index.Loc.line_loc loc in
          let global_line_num = Index.Line_loc.global_line_num line_loc in
          match s_e with
          | None -> (
              Some (global_line_num, global_line_num)
            )
          | Some (s, e) -> (
              Some (min s global_line_num, max global_line_num e)
            )
        )
        None
        l
      |> Option.get
    )

let word_grid_of_index
    ~doc_id
    ~start_global_line_num
    ~end_inc_global_line_num
  : word_grid =
  let global_line_count = Index.global_line_count ~doc_id in
  let check x =
    assert (0 <= x);
    assert (x <= global_line_count - 1);
  in
  check start_global_line_num;
  check end_inc_global_line_num;
  if global_line_count = 0 then (
    { start_global_line_num = 0; data = [||] }
  ) else (
    let data =
      OSeq.(start_global_line_num -- end_inc_global_line_num)
      |> Seq.map (fun global_line_num ->
          let data =
            Index.words_of_global_line_num ~doc_id global_line_num
            |> Dynarray.to_seq
            |> Seq.map (fun word -> { word; typ = `Plain })
            |> Array.of_seq
          in
          data
        )
      |> Array.of_seq
    in
    { start_global_line_num; data }
  )

let mark_in_word_grid
    ~doc_id
    (grid : word_grid)
    (positions : int list)
  : unit =
  let grid_end_inc_global_line_num = grid.start_global_line_num + Array.length grid.data - 1 in
  List.iter (fun pos ->
      let loc = Index.loc_of_pos ~doc_id pos in
      let line_loc = Index.Loc.line_loc loc in
      let global_line_num = Index.Line_loc.global_line_num line_loc in
      if grid.start_global_line_num <= global_line_num
      && global_line_num <= grid_end_inc_global_line_num
      then (
        let pos_in_line = Index.Loc.pos_in_line loc in
        let row = global_line_num - grid.start_global_line_num in
        let cell = grid.data.(row).(pos_in_line) in
        grid.data.(row).(pos_in_line) <- { cell with typ = `Search_result }
      )
    )
    positions

let mark_search_result_in_word_grid
    ~doc_id
    (grid : word_grid)
    (search_result : Search_result.t)
  : unit =
  Search_result.found_phrase search_result
  |> List.map (fun Search_result.{ found_word_pos; _ } ->
      found_word_pos
    )
  |> mark_in_word_grid ~doc_id grid

let mark_link_in_word_grid
    ~doc_id
    (grid : word_grid)
    (link : Link.t)
  : unit =
  let { Link.start_pos; end_inc_pos; _ } = link in
  CCList.(start_pos -- end_inc_pos)
  |> mark_in_word_grid ~doc_id grid

type render_mode = [
  | `Page_num_only
  | `Line_num_only
  | `Page_and_line_num
  | `None
]

let render_grid
    ~doc_id
    ~(view_offset : int Lwd.var option)
    ~(render_mode : render_mode)
    ~width
    ?(height : int option)
    ?underline
    (grid : word_grid)
  : Notty.image =
  let (_rendered_line_count, rendered_lines_with_search_result_words), images =
    grid.data
    |> Array.to_list
    |> CCList.fold_map_i
      (fun (rendered_line_count, rendered_lines_with_search_result_words_acc) i cells ->
         let cells = Array.to_list cells in
         let global_line_num = grid.start_global_line_num + i in
         let line_loc = Index.line_loc_of_global_line_num ~doc_id global_line_num in
         let displayed_line_num = Index.Line_loc.line_num_in_page line_loc + 1 in
         let displayed_page_num = Index.Line_loc.page_num line_loc + 1 in
         let left_column_label =
           match render_mode with
           | `Page_num_only -> (
               I.hcat
                 [ I.strf ~attr:A.(fg lightyellow) "Page %d" displayed_page_num
                 ; I.strf ": " ]
             )
           | `Line_num_only -> (
               I.hcat
                 [ I.strf ~attr:A.(fg lightyellow) "%d" displayed_line_num
                 ; I.strf ": " ]
             )
           | `Page_and_line_num -> (
               I.hcat
                 [ I.strf ~attr:A.(fg lightyellow) "Page %d, %d"
                     displayed_page_num
                     displayed_line_num
                 ; I.strf ": " ]
             )
           | `None -> (
               I.void 0 1
             )
         in
         let content_width = max 1 (width - I.width left_column_label) in
         let content, rendered_lines_with_search_result_words =
           Text_block_rendering.of_cells ?underline ~width:content_width cells
         in
         ((rendered_line_count + I.height content,
           rendered_lines_with_search_result_words
           |> Int_set.map (fun x -> x + rendered_line_count)
           |> Int_set.union rendered_lines_with_search_result_words_acc
          ),
          I.hcat [ left_column_label; content ])
      )
      (0, Int_set.empty)
  in
  let img = I.vcat images in
  match height with
  | None -> img
  | Some height -> (
      let focal_point_offset =
        match
          Int_set.min_elt_opt rendered_lines_with_search_result_words,
          Int_set.max_elt_opt rendered_lines_with_search_result_words
        with
        | Some start_rendered_line_num, Some end_inc_rendered_line_num -> (
            Misc_utils.div_round_to_closest
              (start_rendered_line_num + end_inc_rendered_line_num)
              2
          )
        | _, _ -> 0
      in
      let target_region_start =
        max 0 (focal_point_offset - (Misc_utils.div_round_to_closest height 2))
      in
      let img_height = I.height img in
      let target_region_end_exc =
        min
          img_height
          (target_region_start + height)
      in
      let view_offset_old =
        match view_offset with
        | None -> 0
        | Some x -> Lwd.peek x
      in
      let view_offset' =
        if view_offset_old >= 0 then (
          min
            view_offset_old
            (img_height - target_region_end_exc)
        ) else (
          let view_offset_old = Int.abs view_offset_old in
          - (min
               view_offset_old
               (target_region_start - 0))
        )
      in
      Option.iter (fun x ->
          if view_offset_old <> view_offset' then (
            Lwd.set x view_offset'
          )) view_offset;
      let target_region_start, target_region_end_exc =
        if view_offset' >= 0 then (
          (target_region_start + view_offset',
           target_region_end_exc + view_offset')
        ) else (
          (* If the offset is negative (shifting view upwards),
             then make the bottom border "sticky".

             In other words, if the height of view window
             is smaller than the height of pane,
             and the view is shifting upwards, don't bother
             moving the bottom border.

             This prevents the rendered content snippet view staying
             small and cropping the bottom text when scrolling up
             if the view started out being small (due to the search
             result being close to the bottom of the file).
          *)
          let view_window_height = target_region_end_exc - target_region_start in
          (target_region_start + view_offset',
           if view_window_height < height then (
             target_region_end_exc
           ) else (
             target_region_end_exc + view_offset'
           )
          )
        )
      in
      I.vcrop target_region_start (img_height - target_region_end_exc) img
    )

let content_snippet
    ~doc_id
    ~(view_offset : int Lwd.var)
    ?(data : [ `Search_result of Search_result.t | `Link of Link.t ] option)
    ~(width : int)
    ~(height : int)
    ?underline
    ()
  : Notty.image =
  let max_end_inc_global_line_num = Index.global_line_count ~doc_id - 1 in
  assert (height > 0);
  let compute_final_line_num_range
      ~(view_offset : int Lwd.var)
      ~start_global_line_num
    : int * int =
    let end_inc_global_line_num =
      min
        max_end_inc_global_line_num
        (start_global_line_num + height - 1)
    in
    (* We grow the area in one direction
       rather than shifting the area, in order
       to not interfere with the focal point offset computation
       in render_grid.

       The number of lines to grow is an overapproximation
       of the actual lines required, as a
       line may wrap into multiple rendered lines
       in the rendered view if it is longer than
       the width of the content pane.
       But we do not know how many lines (or partial segments
       of lines) exactly until we
       actually render the view/word grid.
    *)
    let view_offset' = Lwd.peek view_offset in
    if view_offset' >= 0 then (
      let end_inc_global_line_num =
        min
          max_end_inc_global_line_num
          (end_inc_global_line_num + view_offset')
      in
      (start_global_line_num, end_inc_global_line_num)
    ) else (
      let start_global_line_num =
        max
          0
          (start_global_line_num - Int.abs view_offset')
      in
      (start_global_line_num, end_inc_global_line_num)
    )
  in
  match data with
  | None -> (
      let start_global_line_num, end_inc_global_line_num =
        compute_final_line_num_range
          ~view_offset
          ~start_global_line_num:0
      in
      let grid =
        word_grid_of_index
          ~doc_id
          ~start_global_line_num
          ~end_inc_global_line_num
      in
      render_grid
        ~doc_id
        ~view_offset:(Some view_offset)
        ~render_mode:`None
        ~width
        ~height
        ?underline
        grid
    )
  | Some data -> (
      let focal_line =
        match data with
        | `Search_result search_result -> (
            let (relevant_start_line, relevant_end_inc_line) =
              start_and_end_inc_global_line_num_of_search_result ~doc_id search_result
            in
            let avg = (relevant_start_line + relevant_end_inc_line) / 2 in
            avg
          )
        | `Link link -> (
            let loc = Index.loc_of_pos ~doc_id link.Link.start_pos in
            let line_loc = Index.Loc.line_loc loc in
            Index.Line_loc.global_line_num line_loc
          )
      in
      let start_global_line_num, end_inc_global_line_num =
        compute_final_line_num_range
          ~view_offset
          ~start_global_line_num:(
            max
              0
              (focal_line - (Misc_utils.div_round_to_closest height 2))
          )
      in
      let grid =
        word_grid_of_index
          ~doc_id
          ~start_global_line_num
          ~end_inc_global_line_num
      in
      (match data with
       | `Search_result search_result -> (
           mark_search_result_in_word_grid ~doc_id grid search_result
         )
       | `Link link -> (
           mark_link_in_word_grid ~doc_id grid link
         )
      );
      render_grid
        ~doc_id
        ~view_offset:(Some view_offset)
        ~render_mode:`None
        ~width
        ~height
        ?underline
        grid
    )

let word_is_not_space s =
  String.length s > 0 && not (Parser_components.is_space s.[0])

let grab_additional_lines
    ~doc_id
    ~non_space_word_count
    start_global_line_num
    end_inc_global_line_num
  : int * int =
  let max_end_inc_global_line_num = Index.global_line_count ~doc_id - 1 in
  let non_space_word_count_of_line n =
    Index.words_of_global_line_num ~doc_id n
    |> Dynarray.to_seq
    |> Seq.filter word_is_not_space
    |> Seq.length
  in
  let rec aux ~non_space_word_count ~i x y =
    if i < !Params.search_result_print_snippet_max_additional_lines_each_direction
    && non_space_word_count < !Params.search_result_print_snippet_min_size
    then (
      let x, top_add_count =
        let n = x - 1 in
        if n >= 0 then (
          (n, non_space_word_count_of_line n)
        ) else (
          (x, 0)
        )
      in
      let y, bottom_add_count =
        let n = y + 1 in
        if n <= max_end_inc_global_line_num then (
          (n, non_space_word_count_of_line n)
        ) else (
          (y, 0)
        )
      in
      let non_space_word_count =
        non_space_word_count
        + top_add_count
        + bottom_add_count
      in
      aux ~non_space_word_count ~i:(i + 1) x y
    ) else (
      (x, y)
    )
  in
  aux ~non_space_word_count ~i:0 start_global_line_num end_inc_global_line_num

let search_result
    ~doc_id
    ~render_mode
    ~width
    ?underline
    ?(fill_in_context = false)
    (search_result : Search_result.t)
  : Notty.image =
  let open Notty in
  let open Notty.Infix in
  let (start_global_line_num, end_inc_global_line_num) =
    start_and_end_inc_global_line_num_of_search_result ~doc_id search_result
    |> (fun (x, y) ->
        if fill_in_context then (
          let non_space_word_count =
            Search_result.search_phrase search_result
            |> Search_phrase.enriched_tokens
            |> List.filter_map (fun token ->
                match Search_phrase.Enriched_token.data token with
                | `Explicit_spaces -> None
                | `String s -> (
                    assert (word_is_not_space s);
                    Some s
                  )
              )
            |> List.length
          in
          grab_additional_lines ~doc_id ~non_space_word_count x y
        ) else (
          (x, y)
        )
      )
  in
  let grid =
    word_grid_of_index
      ~doc_id
      ~start_global_line_num
      ~end_inc_global_line_num
  in
  mark_search_result_in_word_grid ~doc_id grid search_result;
  let img =
    render_grid
      ~doc_id
      ~view_offset:None
      ~render_mode
      ~width
      ?underline
      grid
  in
  if Option.is_some !Params.debug_output then (
    let score = Search_result.score search_result in
    I.strf "(Score: %f)" score
    <->
    img
  ) else (
    img
  )

let centered_list
    ~width
    ~height
    ~(render : width:int -> 'a -> Notty.image)
    (selection : int)
    (l : 'a list)
  : Notty.image =
  let height_rendered_before_selection = ref 0 in
  let img =
    l
    |> List.mapi (fun i x ->
        let img =
          let x =
            if i = selection then (
              let img = render ~width:(width - 2) x in
              Notty.I.(strf ~attr:A.(fg lightyellow) "> "
                       <|>
                       img
                      )
            ) else (
              render ~width x
            )
          in
          Notty.I.(x <-> strf "")
        in
        if i < selection then (
          height_rendered_before_selection := !height_rendered_before_selection + Notty.I.height img;
        );
        img
      )
    |> Notty.I.vcat
  in
  let focal_point_offset = !height_rendered_before_selection in
  let img_height = I.height img in
  let target_region_start =
    max 0 (focal_point_offset - (Misc_utils.div_round_to_closest height 2))
  in
  let target_region_end_exc =
    min
      img_height
      (target_region_start + height)
  in
  I.vcrop target_region_start (img_height - target_region_end_exc) img


================================================
FILE: bin/debug_utils.ml
================================================
let do_if_debug (f : out_channel -> unit) =
  match !Params.debug_output with
  | None -> ()
  | Some oc -> (
      f oc
    )


================================================
FILE: bin/docfd.ml
================================================
open Cmdliner
open Lwd_infix
open Docfd_lib
open Debug_utils
open Misc_utils
open File_utils

let compute_paths_from_globs ~report_progress globs =
  Seq.iter (fun s ->
      match Glob.parse s with
      | Some _ -> ()
      | None -> (
          exit_with_error_msg
            (Fmt.str "failed to parse glob pattern: \"%s\"" s)
        )
    ) globs;
  list_files_recursive_filter_by_globs ~report_progress globs

type file_constraints = {
  no_pdftotext : bool;
  no_pandoc : bool;
  paths_were_specified_by_user : bool;
  exts : string list;
  single_line_exts : string list;
  directly_specified_paths : String_set.t;
  globs : String_set.t;
  single_line_globs : String_set.t;
}

let make_file_constraints
    ~no_pdftotext
    ~no_pandoc
    ~(exts : string list)
    ~(single_line_exts : string list)
    ~(paths : string list)
    ~(paths_from_file_or_stdin : string list option)
    ~(globs : string list)
    ~(single_line_globs : string list)
  : file_constraints =
  match
    paths,
    paths_from_file_or_stdin,
    globs,
    single_line_globs
  with
  | [], None, [], [] -> (
      {
        no_pdftotext;
        no_pandoc;
        paths_were_specified_by_user = false;
        exts;
        single_line_exts;
        directly_specified_paths = String_set.of_list [ "." ];
        globs = String_set.empty;
        single_line_globs = String_set.empty;
      }
    )
  | _, _, _, _ -> (
      let paths_from_file_or_stdin = Option.value ~default:[] paths_from_file_or_stdin in
      let directly_specified_paths = String_set.of_list (paths @ paths_from_file_or_stdin) in
      let globs = String_set.of_list globs in
      let single_line_globs = String_set.of_list single_line_globs in
      {
        no_pdftotext;
        no_pandoc;
        paths_were_specified_by_user = true;
        exts;
        single_line_exts;
        directly_specified_paths;
        globs;
        single_line_globs;
      }
    )

let files_satisfying_constraints
    ~interactive
    (cons : file_constraints)
  : Document_src.file_collection =
  let bar =
    let open Progress.Line in
    list
      [ const "Scanning"
      ; spinner ()
      ]
  in
  progress_with_reporter
    ~interactive
    bar
    (fun report_progress : Document_src.file_collection ->
       let single_line_search_mode_applies file =
         List.mem (extension_of_file file) cons.single_line_exts
       in
       let single_line_search_mode_paths_by_exts, default_search_mode_paths_by_exts =
         cons.directly_specified_paths
         |> String_set.to_seq
         |> list_files_recursive_filter_by_exts
           ~report_progress
           ~exts:(cons.exts @ cons.single_line_exts)
         |> String_set.partition single_line_search_mode_applies
       in
       let paths_from_single_line_globs =
         cons.single_line_globs
         |> String_set.to_seq
         |> compute_paths_from_globs ~report_progress
       in
       let single_line_search_mode_paths_from_globs, default_search_mode_paths_from_globs =
         cons.globs
         |> String_set.to_seq
         |> compute_paths_from_globs ~report_progress
         |> String_set.partition single_line_search_mode_applies
       in
       let single_line_search_mode_files =
         single_line_search_mode_paths_by_exts
         |> String_set.union paths_from_single_line_globs
         |> String_set.union single_line_search_mode_paths_from_globs
       in
       let default_search_mode_files =
         default_search_mode_paths_by_exts
         |> String_set.union default_search_mode_paths_from_globs
         |> (fun s -> String_set.diff s single_line_search_mode_files)
       in
       do_if_debug (fun oc ->
           Printf.fprintf oc "Checking if single line search mode files and default search mode files are disjoint\n";
           if String_set.is_empty
               (String_set.inter
                  single_line_search_mode_files
                  default_search_mode_files)
           then (
             Printf.fprintf oc "Check successful\n"
           ) else (
             failwith "check failed"
           );
           let all_files =
             single_line_search_mode_paths_by_exts
             |> String_set.union default_search_mode_paths_by_exts
             |> String_set.union paths_from_single_line_globs
             |> String_set.union single_line_search_mode_paths_from_globs
             |> String_set.union default_search_mode_paths_from_globs
           in
           let single_line_search_mode_files', default_search_mode_files' =
             String_set.partition (fun s ->
                 single_line_search_mode_applies s
                 ||
                 String_set.mem s paths_from_single_line_globs
               )
               all_files
           in
           Printf.fprintf oc "Checking if efficiently computed and naively computed results for single line search mode files are consistent\n";
           if String_set.equal
               single_line_search_mode_files
               single_line_search_mode_files'
           then (
             Printf.fprintf oc "Check successful\n"
           ) else (
             failwith "check failed"
           );
           Printf.fprintf oc "Checking if efficiently computed and naively computed results for default search mode files are consistent\n";
           if String_set.equal
               default_search_mode_files
               default_search_mode_files'
           then (
             Printf.fprintf oc "Check successful\n"
           ) else (
             failwith "check failed"
           )
         );
       let filter_for_no_pdftotext_or_no_pandoc (s : String_set.t) =
         if cons.no_pdftotext || cons.no_pandoc then (
           String_set.filter
             (fun s ->
                match File_utils.format_of_file s with
                | `PDF -> not cons.no_pdftotext
                | `Pandoc_supported_format -> not cons.no_pandoc
                | `Text | `Other -> true
             )
             s
         ) else (
           s
         )
       in
       let default_search_mode_files =
         filter_for_no_pdftotext_or_no_pandoc default_search_mode_files
       in
       let single_line_search_mode_files =
         filter_for_no_pdftotext_or_no_pandoc single_line_search_mode_files
       in
       {
         default_search_mode_files;
         single_line_search_mode_files;
       }
    )

let init_session_state_of_document_src ~env ~interactive pool (document_src : Document_src.t) =
  let file_bar ~total_file_count =
    let open Progress.Line in
    list
      [ brackets (elapsed ())
      ; bar ~width:(`Fixed 20) total_file_count
      ; percentage_of total_file_count
      ; const "ETA: " ++ eta total_file_count
      ]
  in
  let byte_bar ~total_byte_count =
    let open Progress.Line in
    list
      [ brackets (elapsed ())
      ; bar ~width:(`Fixed 20) total_byte_count
      ; percentage_of total_byte_count
      ; bytes_per_sec
      ; const "ETA: " ++ eta total_byte_count
      ]
  in
  let all_documents : Document.t list list =
    match document_src with
    | Document_src.Stdin path -> (
        match
          Document.of_path
            ~env
            pool
            ~already_in_transaction:false
            !Params.default_search_mode
            path
        with
        | Ok x -> [ [ x ] ]
        | Error msg ->  (
            exit_with_error_msg msg
          )
      )
    | Files { default_search_mode_files; single_line_search_mode_files } -> (
        let print_stage_stats ~file_count ~total_byte_count =
          Printf.printf "- File count: %6d\n" file_count;
          Printf.printf "- MiB:        %8.1f\n"
            (Misc_utils.mib_of_bytes total_byte_count);
        in
        let total_file_count, files =
          Seq.append
            (Seq.map (fun path -> (!Params.default_search_mode, path))
               (String_set.to_seq default_search_mode_files))
            (Seq.map (fun path -> (`Single_line, path))
               (String_set.to_seq single_line_search_mode_files))
          |> Misc_utils.length_and_list_of_seq
        in
        if interactive then (
          Printf.printf "Collecting file stats\n";
          flush stdout;
        );
        let documents_total_byte_count, document_sizes =
          progress_with_reporter
            ~interactive
            (file_bar ~total_file_count)
            (fun report_progress ->
               List.fold_left (fun (total_size, m) (_, path) ->
                   let res =
                     match File_utils.file_size path with
                     | None -> (total_size, m)
                     | Some x -> (total_size + x, String_map.add path x m)
                   in
                   report_progress 1;
                   res
                 )
                 (0, String_map.empty)
                 files
            )
        in
        if interactive then (
          print_stage_stats
            ~file_count:total_file_count
            ~total_byte_count:documents_total_byte_count
        );
        if interactive then (
          Printf.printf "Hashing\n";
          flush stdout;
        );
        let file_and_hash_list =
          match files with
          | [] -> []
          | _ -> (
              files
              |> (fun l ->
                  progress_with_reporter
                    ~interactive
                    (byte_bar ~total_byte_count:documents_total_byte_count)
                    (fun report_progress ->
                       Task_pool.filter_map_list pool (fun (search_mode, path) ->
                           do_if_debug (fun oc ->
                               Printf.fprintf oc "Hashing document: %s\n" (Filename.quote path);
                             );
                           let res =
                             match BLAKE2B.hash_of_file ~env ~path with
                             | Ok hash -> Some (search_mode, path, hash)
                             | Error msg -> (
                                 do_if_debug (fun oc ->
                                     Printf.fprintf oc "Error: %s\n" msg
                                   );
                                 None
                               )
                           in
                           (match String_map.find_opt path document_sizes with
                            | None -> ()
                            | Some x -> report_progress x);
                           res
                         )
                         l
                    )
                )
            )
        in
        if interactive then (
          Printf.printf "Allocating document IDs\n";
          flush stdout;
        );
        progress_with_reporter
          ~interactive
          (file_bar ~total_file_count)
          (fun report_progress ->
             file_and_hash_list
             |> List.to_seq
             |> Seq.map (fun (_, _, doc_hash) ->
                 report_progress 1;
                 doc_hash)
             |> Doc_id_db.allocate_bulk
          );
        let indexed_files, unindexed_files =
          let open Sqlite3_utils in
          with_stmt
            Index.is_indexed_sql
            (fun stmt ->
               List.partition (fun (_, _, doc_hash) ->
                   bind_names stmt [ ("@doc_hash", TEXT doc_hash) ];
                   step stmt;
                   let indexed = data_count stmt > 0 in
                   reset stmt;
                   indexed
                 )
                 file_and_hash_list
            )
        in
        indexed_files
        |> List.map (fun (_, _, doc_hash) ->
            Doc_id_db.doc_id_of_doc_hash doc_hash
          )
        |> Index.refresh_last_used_batch;
        let load_document ~env pool search_mode ~doc_hash path =
          do_if_debug (fun oc ->
              Printf.fprintf oc "Loading document: %s\n" (Filename.quote path);
            );
          do_if_debug (fun oc ->
              Printf.fprintf oc "Using %s search mode for document %s\n"
                (match search_mode with
                 | `Single_line -> "single line"
                 | `Multiline -> "multiline"
                )
                (Filename.quote path)
            );
          match
            Document.of_path
              ~env
              pool
              ~already_in_transaction:false
              search_mode
              ~doc_hash
              path
          with
          | Ok x -> (
              do_if_debug (fun oc ->
                  Printf.fprintf oc "Document %s loaded successfully\n" (Filename.quote path);
                );
              Some x
            )
          | Error msg -> (
              do_if_debug (fun oc ->
                  Printf.fprintf oc "Error: %s\n" msg
                );
              None
            )
        in
        if interactive then (
          Printf.printf "Processing indexed files\n";
          flush stdout;
        );
        let indexed_files =
          indexed_files
          |> List.filter_map (fun (search_mode, path, doc_hash) ->
              load_document ~env pool search_mode ~doc_hash path
            )
        in
        if interactive then (
          Printf.printf "Indexing remaining files\n";
          flush stdout;
        );
        let unindexed_file_count, unindexed_files_byte_count =
          List.fold_left (fun (file_count, byte_count) (_, path, _) ->
              (file_count + 1,
               byte_count + Option.value ~default:0 (String_map.find_opt path document_sizes))
            )
            (0, 0)
            unindexed_files
        in
        if interactive then (
          print_stage_stats
            ~file_count:unindexed_file_count
            ~total_byte_count:unindexed_files_byte_count;
        );
        let pipeline = Document_pipeline.make ~env pool in
        let _, unindexed_files =
          Eio.Fiber.pair
            (fun () ->
               Document_pipeline.run pipeline
            )
            (fun () ->
               (match unindexed_files with
                | [] -> ()
                | _ -> (
                    progress_with_reporter
                      ~interactive
                      (byte_bar ~total_byte_count:unindexed_files_byte_count)
                      (fun report_progress ->
                         unindexed_files
                         |> List.iter (fun (search_mode, path, doc_hash) ->
                             Document_pipeline.feed
                               pipeline
                               search_mode
                               ~doc_hash
                               path;
                             (match String_map.find_opt path document_sizes with
                              | None -> ()
                              | Some x -> report_progress x
                             )
                           )
                      )
                  ));
               if interactive then (
                 Printf.printf "Finalizing index\n";
                 flush stdout;
               );
               Document_pipeline.finalize pipeline
            )
        in
        [ indexed_files; unindexed_files ]
      )
  in
  let state =
    all_documents
    |> List.to_seq
    |> Seq.flat_map List.to_seq
    |> Session.State.of_seq pool
  in
  Gc.compact ();
  state

let parse_sort_by_arg ~no_score (s : string) : Command.Sort_by.t =
  match Command.Sort_by.parse ~no_score s with
  | Ok t -> t
  | Error msg -> (
      let msg = CCString.chop_prefix ~pre:": " msg
        |> Option.value ~default:msg
      in
      exit_with_error_msg
        (Fmt.str "failed to parse --%s argument: %s"
           (if no_score then (
               Args.sort_no_score_arg_name
             ) else (
              Args.sort_arg_name
            ))
           msg
        )
    )

let run
    ~(eio_env : Eio_unix.Stdenv.base)
    ~sw
    (debug_log : string option)
    (no_pdftotext : bool)
    (no_pandoc : bool)
    (scan_hidden : bool)
    (max_depth : int)
    (max_fuzzy_edit_dist : int)
    (max_token_search_dist : int)
    (max_linked_token_search_dist : int)
    (tokens_per_search_scope_level : int)
    (index_chunk_size : int)
    (exts : string)
    (single_line_exts : string)
    (additional_exts : string list)
    (single_line_additional_exts : string list)
    (cache_dir : string)
    (cache_limit : int)
    (data_dir : string)
    (index_only : bool)
    (start_with_filter : string option)
    (start_with_search : string option)
    (filter_exp : string option)
    (sample_search_exp : string option)
    (samples_per_doc : int)
    (search_exp : string option)
    (sort_by : string)
    (sort_by_no_score : string)
    (print_color_mode : Params.style_mode)
    (print_underline_mode : Params.style_mode)
    (search_result_print_text_width : int)
    (search_result_print_snippet_min_size : int)
    (search_result_print_max_add_lines : int)
    (start_with_script : string option)
    (script : string option)
    (paths_from : string list)
    (globs : string list)
    (single_line_globs : string list)
    (single_line_search_mode_by_default : bool)
    (path_open_specs : string list)
    (print_files_with_match : bool)
    (print_files_without_match : bool)
    (paths : string list)
  =
  let env = eio_env in
  Args.check
    ~max_depth
    ~max_fuzzy_edit_dist
    ~max_token_search_dist
    ~max_linked_token_search_dist
    ~tokens_per_search_scope_level
    ~index_chunk_size
    ~cache_limit
    ~start_with_filter
    ~start_with_search
    ~filter_exp
    ~sample_search_exp
    ~samples_per_doc
    ~search_exp
    ~search_result_print_text_width
    ~search_result_print_snippet_min_size
    ~search_result_print_max_add_lines
    ~start_with_script
    ~script
    ~paths_from
    ~print_files_with_match
    ~print_files_without_match;
  Params.debug_output := (match debug_log with
      | None -> None
      | Some "-" -> Some stderr
      | Some debug_log -> (
          try
            Some (
              open_out_gen
                [ Open_append; Open_creat; Open_wronly; Open_text ]
                0o644
                debug_log
            )
          with
          | Sys_error _ -> (
              exit_with_error_msg
                (Fmt.str "failed to open debug log file %s" (Filename.quote debug_log))
            )
        )
    );
  Params.scan_hidden := scan_hidden;
  Params.max_file_tree_scan_depth := max_depth;
  Params.max_fuzzy_edit_dist := max_fuzzy_edit_dist;
  Params.max_token_search_dist := max_token_search_dist;
  Params.max_linked_token_search_dist := max_linked_token_search_dist;
  Params.tokens_per_search_scope_level := tokens_per_search_scope_level;
  Params.index_chunk_size := index_chunk_size;
  Params.cache_limit := cache_limit;
  Params.search_result_print_text_width := search_result_print_text_width;
  Params.search_result_print_snippet_min_size := search_result_print_snippet_min_size;
  Params.search_result_print_snippet_max_additional_lines_each_direction :=
    search_result_print_max_add_lines;
  Params.samples_per_document := samples_per_doc;
  let sort_by = parse_sort_by_arg ~no_score:false sort_by in
  let sort_by_no_score = parse_sort_by_arg ~no_score:true sort_by_no_score in
  Params.cache_dir := (
    mkdir_recursive cache_dir;
    Some cache_dir
  );
  Params.data_dir := (
    mkdir_recursive data_dir;
    Some data_dir
  );
  Params.default_search_mode := (
    if single_line_search_mode_by_default then (
      `Single_line
    ) else (
      `Multiline
    )
  );
  List.iter (fun spec ->
      match Path_opening.parse_spec spec with
      | Error msg -> (
          exit_with_error_msg (Fmt.str "failed to parse %s, %s" spec msg)
        )
      | Ok (exts, launch_mode, cmd) -> (
          List.iter (fun ext ->
              Hashtbl.replace Path_opening.specs ext (launch_mode, cmd)
            )
            exts
        )
    ) path_open_specs;
  let db_path = Filename.concat cache_dir Params.db_file_name in
  (match Docfd_lib.init ~db_path ~document_count_limit:cache_limit with
   | None -> ()
   | Some msg -> exit_with_error_msg msg
  );
  let interactive =
    Option.is_none filter_exp
    &&
    Option.is_none sample_search_exp
    &&
    Option.is_none search_exp
    &&
    not print_files_with_match
    &&
    not print_files_without_match
    &&
    Option.is_none script
  in
  if interactive then (
    Printf.printf "Initializing in-memory index\n";
    flush stdout;
  );
  Word_db.read_from_db ();
  Index.State.read_from_db ();
  (match Sys.getenv_opt "VISUAL", Sys.getenv_opt "EDITOR" with
   | None, None -> (
       exit_with_error_msg
         (Fmt.str "environment variable VISUAL or EDITOR needs to be set")
     )
   | Some editor, _
   | None, Some editor -> (
       Params.text_editor := editor;
     )
  );
  Lwd.unsafe_mutation_logger := (fun () -> ());
  let recognized_exts =
    compute_total_recognized_exts ~exts ~additional_exts
  in
  let recognized_single_line_exts =
    compute_total_recognized_exts ~exts:single_line_exts ~additional_exts:single_line_additional_exts
  in
  (match recognized_exts, recognized_single_line_exts, globs, single_line_globs with
   | [], [], [], [] -> (
       exit_with_error_msg
         (Fmt.str "no usable file extensions or glob patterns")
     )
   | _, _, _, _ -> ()
  );
  let paths_from_file_or_stdin =
    match paths_from with
    | [] -> None
    | l -> (
        l
        |> List.to_seq
        |> Seq.map (String.split_on_char ',')
        |> Seq.flat_map List.to_seq
        |> Seq.flat_map (fun paths_from ->
            (match paths_from with
             | "-" -> (
                 CCIO.read_lines_l stdin
               )
             | _ -> (
                 try
                   CCIO.with_in paths_from CCIO.read_lines_l
                 with
                 | Sys_error _ -> (
                     exit_with_error_msg
                       (Fmt.str "failed to read list of paths from %s" (Filename.quote paths_from))
                   )
               )
            )
            |> List.to_seq
          )
        |> List.of_seq
        |> Option.some
      )
  in
  let file_constraints =
    make_file_constraints
      ~no_pdftotext
      ~no_pandoc
      ~exts:recognized_exts
      ~single_line_exts:recognized_single_line_exts
      ~paths
      ~paths_from_file_or_stdin
      ~globs
      ~single_line_globs
  in
  let pool = Task_pool.make ~sw (Eio.Stdenv.domain_mgr env) in
  Atomic.set UI_base.Vars.pool (Some pool);
  String_set.iter (fun path ->
      if not (Sys.file_exists path) then (
        exit_with_error_msg
          (Fmt.str "path %s does not exist" (Filename.quote path))
      )
    )
    file_constraints.directly_specified_paths;
  let compute_if_hide_document_list_initially_and_document_src : unit -> bool * Document_src.t =
    let stdin_tmp_file = ref None in
    (fun () ->
       let file_collection =
         files_satisfying_constraints ~interactive file_constraints
       in
       if file_constraints.paths_were_specified_by_user
       || stdin_is_atty ()
       then (
         let hide_document_list =
           match Document_src.file_collection_size file_collection with
           | 0 -> false
           | 1 -> true
           | _ -> false
         in
         (hide_document_list, Files file_collection)
       ) else (
         match !stdin_tmp_file with
         | None -> (
             match read_in_channel_to_tmp_file stdin with
             | Ok tmp_file -> (
                 stdin_tmp_file := Some tmp_file;
                 (true, Stdin tmp_file)
               )
             | Error msg -> (
                 exit_with_error_msg msg
               )
           )
         | Some tmp_file -> (
             (true, Stdin tmp_file)
           )
       )
    )
  in
  let compute_document_src () =
    snd (compute_if_hide_document_list_initially_and_document_src ())
  in
  let hide_document_list_initially, init_document_src =
    compute_if_hide_document_list_initially_and_document_src ()
  in
  let clean_up () =
    match init_document_src with
    | Stdin tmp_file -> (
        try
          Sys.remove tmp_file
        with
        | Sys_error _ -> ()
      )
    | Files _ -> ()
  in
  do_if_debug (fun oc ->
      Printf.fprintf oc "Scanning completed\n"
    );
  do_if_debug (fun oc ->
      match init_document_src with
      | Stdin _ -> Printf.fprintf oc "Document source: stdin\n"
      | Files file_collection -> (
          Printf.fprintf oc "Document source: files\n";
          Document_src.seq_of_file_collection file_collection
          |> Seq.iter (fun file ->
              Printf.fprintf oc "File: %s\n" (Filename.quote file);
            )
        )
    );
  (match init_document_src with
   | Stdin _ -> ()
   | Files file_collection -> (
       let pdftotext_exists = Proc_utils.command_exists "pdftotext" in
       let pandoc_exists = Proc_utils.command_exists "pandoc" in
       let formats = Document_src.seq_of_file_collection file_collection
         |> Seq.map format_of_file
         |> Seq.fold_left (fun acc x -> File_format_set.add x acc) File_format_set.empty
       in
       if not pdftotext_exists && File_format_set.mem `PDF formats then (
         exit_with_error_msg
           (Fmt.str "command pdftotext not found, use --%s to disable use of pdftotext" Args.no_pdftotext_arg_name)
       );
       if File_format_set.mem `Pandoc_supported_format formats then (
         if not pandoc_exists then (
           exit_with_error_msg
             (Fmt.str "command pandoc not found, use --%s to disable use of pandoc" Args.no_pandoc_arg_name)
         );
       );
     )
  );
  Lwd.set UI_base.Vars.hide_document_list hide_document_list_initially;
  init_session_state_of_document_src ~env pool ~interactive init_document_src
  |> (fun state ->
      Session.run_command pool (`Sort (sort_by, sort_by_no_score)) state
      |> Option.get
      |> snd
    )
  |> Session_manager.update_starting_state;
  if index_only then (
    clean_up ();
    exit 0
  );
  let print_oc = stdout in
  let print_with_color =
    match print_color_mode with
    | `Never -> false
    | `Always -> true
    | `Auto -> Out_channel.isatty print_oc
  in
  let print_with_underline =
    match print_underline_mode with
    | `Never -> false
    | `Always -> true
    | `Auto -> not (Out_channel.isatty print_oc)
  in
  let snapshots_from_script =
    match script, start_with_script with
    | None, None -> None
    | Some _, Some _ -> failwith "unexpected case"
    | Some script, None
    | None, Some script -> (
        let init_state =
          Session_manager.lock_with_view (fun view ->
              view.init_state
            )
        in
        match
          Script.run
            pool
            ~init_state
            ~path:script
        with
        | Error msg -> exit_with_error_msg msg
        | Ok snapshots -> Some snapshots
      )
  in
  (match snapshots_from_script with
   | None -> ()
   | Some snapshots -> (
       Session_manager.load_snapshots snapshots;
     ));
  if not interactive then (
    let filter_exp_and_original_string =
      match filter_exp with
      | None -> None
      | Some s ->
        Some (Option.get (Filter_exp.parse s), s)
    in
    let print_limit_per_doc, search_exp_and_original_string =
      match sample_search_exp, search_exp with
      | None, None -> (None, None)
      | Some _, Some _ -> failwith "unexpected case"
      | Some search_exp_string, None
      | None, Some search_exp_string -> (
          let print_limit_per_doc =
            match sample_search_exp with
            | Some _ -> Some samples_per_doc
            | None -> None
          in
          let search_exp =
            Option.get (Search_exp.parse search_exp_string)
          in
          do_if_debug (fun oc ->
              Fmt.pf
                (Format.formatter_of_out_channel oc)
                "Search expression: @[<v>%a@]@." Search_exp.pp search_exp
            );
          (print_limit_per_doc, Some (search_exp, search_exp_string))
        )
    in
    let session_state =
      match snapshots_from_script with
      | None -> (
          Session_manager.lock_with_view (fun view ->
              view.init_state
            )
          |> (fun state ->
              match filter_exp_and_original_string with
              | None -> state
              | Some (filter_exp, filter_exp_string) -> (
                  Option.get
                    (Session.State.update_filter_exp
                       pool
                       (Stop_signal.make ())
                       filter_exp_string
                       filter_exp
                       state
                    )
                )
            )
          |> (fun state ->
              match search_exp_and_original_string with
              | None -> state
              | Some (search_exp, search_exp_string) -> (
                  Option.get
                    (Session.State.update_search_exp
                       pool
                       (Stop_signal.make ())
                       search_exp_string
                       search_exp
                       state
                    )
                )
            )
        )
      | Some _ -> (
          Session_manager.lock_with_view (fun view ->
              Dynarray.get_last view.snapshots
              |> Session.Snapshot.state
            )
        )
    in
    let oc = stdout in
    let no_results =
      if print_files_with_match then (
        let arr =
          Session.State.search_result_groups session_state
        in
        Array.iter (fun (doc, _search_result) ->
            Printers.path_image ~color:print_with_color oc (Document.path doc)
          ) arr;
        Array.length arr = 0
      ) else if print_files_without_match then (
        let arr =
          Session.State.unusable_documents session_state
          |> Array.of_seq
        in
        let (sort_by_typ, sort_by_order) = sort_by_no_score in
        let f =
          match sort_by_typ with
          | `Path_date -> Document.Compare.path_date sort_by_order
          | `Mod_time -> Document.Compare.mod_time sort_by_order
          | `Path -> Document.Compare.path sort_by_order
          | `Score -> failwith "unexpected case"
        in
        Array.sort f arr;
        Array.iter (fun doc ->
            Printers.path_image ~color:print_with_color oc (Document.path doc)
          ) arr;
        Array.length arr = 0
      ) else (
        let s =
          Session.State.search_result_groups session_state
          |> Array.to_seq
          |> Seq.map (fun (doc, arr) ->
              let arr =
                match print_limit_per_doc with
                | None -> arr
                | Some n -> (
                    Array.sub
                      arr
                      0
                      (min (Array.length arr) n)
                  )
              in
              (doc, arr)
            )
        in
        Printers.search_result_groups
          ~color:print_with_color
          ~underline:print_with_underline
          oc
          s;
        Seq.is_empty s
      )
    in
    clean_up ();
    if no_results then (
      exit 1
    ) else (
      exit 0
    )
  );
  UI_base.Vars.eio_env := Some env;
  let root : Nottui.ui Lwd.t =
    let$* (term_width, term_height) = Lwd.get UI_base.Vars.term_width_height in
    if term_width <= 40 || term_height <= 20 then (
      let msg = Nottui.Ui.atom (Notty.I.strf "Terminal size too small") in
      let keyboard_handler (key : Nottui.Ui.key) =
        match key with
        | (`Escape, [])
        | (`ASCII 'Q', [`Ctrl])
        | (`ASCII 'C', [`Ctrl]) -> (
            Lwd.set UI_base.Vars.quit true;
            UI_base.Vars.action := None;
            `Handled
          )
        | _ -> `Unhandled
      in
      Lwd.return (Nottui.Ui.keyboard_area keyboard_handler msg)
    ) else (
      UI.main
    )
  in
  let get_term, close_term =
    let term_and_tty_fd = ref None in
    ((fun () ->
        match !term_and_tty_fd with
        | None -> (
            if stdin_is_atty () then (
              let term = Notty_unix.Term.create () in
              term_and_tty_fd := Some (term, None);
              term
            ) else (
              let input =
                Unix.(openfile "/dev/tty" [ O_RDONLY ] 0o444)
              in
              let term = Notty_unix.Term.create ~input () in
              term_and_tty_fd := Some (term, Some input);
              term
            )
          )
        | Some (term, _tty_fd) -> term
      ),
     (fun () ->
        match !term_and_tty_fd with
        | None -> ()
        | Some (term, tty_fd) -> (
            Notty_unix.Term.release term;
            (match tty_fd with
             | None -> ()
             | Some fd -> Unix.close fd);
            term_and_tty_fd := None
          )
     )
    )
  in
  let rec loop () =
    Sys.command "clear -x" |> ignore;
    let term = get_term () in
    UI_base.Vars.term := Some term;
    UI_base.Vars.action := None;
    Lwd.set UI_base.Vars.quit false;
    UI_base.ui_loop
      ~quit:UI_base.Vars.quit
      ~term
      root;
    match !UI_base.Vars.action with
    | None -> ()
    | Some action -> (
        Session_manager.stop_filter_and_search_and_restore_input_fields ();
        match action with
        | UI_base.Recompute_document_src -> (
            close_term ();
            let new_starting_state =
              compute_document_src ()
              |> init_session_state_of_document_src ~env ~interactive pool
            in
            Session_manager.update_starting_state new_starting_state;
            loop ()
          )
        | Open_file_and_search_result (doc, search_result) -> (
            let doc_id = Document.doc_id doc in
            let path = Document.path doc in
            let old_stats = Unix.stat path in
            Path_opening.main
              ~close_term
              ~path
              ~doc_id_and_search_result:(Option.map (fun x -> (doc_id, x)) search_result);
            let new_stats = Unix.stat path in
            if
              Float.abs
                (new_stats.st_mtime -. old_stats.st_mtime) >= Params.float_compare_margin
            then (
              UI.reload_document doc
            );
            loop ()
          )
        | Open_link (doc, link) -> (
            Path_opening.open_link
              ~close_term
              ~doc
              link;
            loop ()
          )
        | Edit_command_history -> (
            let file = Filename.temp_file "" Params.docfd_script_ext in
            let init_snapshots =
              Session_manager.lock_with_view (fun view ->
                  view.snapshots
                )
            in
            let init_lines =
              Seq.append
                (
                  init_snapshots
                  |> Dynarray.to_seq
                  |> Seq.filter_map  (fun (snapshot : Session.Snapshot.t) ->
                      Option.map
                        Command.to_string
                        (Session.Snapshot.last_command snapshot)
                    )
                )
                (
                  List.to_seq
                    [
                      "";
                      "; You are viewing/editing Docfd command history.";
                      "; If any change is made to this file, Docfd will replay the commands from the start.";
                      ";";
                      "; There are two types of comments:";
                      "; - System comments begin with `;`, and are not preserved after editing of command history.";
                      ";   These are for communication of error message to user during command history editing.";
                      "; - User comments begin with `#`, and are preserved after editing of command history.";
                      ";";
                      "; If a line is not blank and is not a comment,";
                      "; then the line should contain exactly one command.";
                      "; A command cannot be written across multiple lines.";
                      ";";
                      "; Starting point is v0, the state with the full set of documents.";
                      "; Each command adds one to the version number.";
                      "; Command at the top is oldest, command at bottom is the newest.";
                      ";";
                      "; Note that for commands that accept text, all text after `:` is trimmed and then used in full.";
                      "; This means \" and ' are treated literally and are not used to delimit strings.";
                      ";";
                      "; Possible commands:";
                      Fmt.str "; - %a" Command.pp (`Search "search phrase");
                      Fmt.str "; - %a" Command.pp (`Search "");
                      Fmt.str "; - %a" Command.pp (`Filter "path-fuzzy:\"file txt\"");
                      Fmt.str "; - %a" Command.pp (`Filter "");
                      Fmt.str "; - %a" Command.pp (`Sort (Command.Sort_by.default, Command.Sort_by.default_no_score));
                      Fmt.str "; - %a" Command.pp (`Path_fuzzy_rank ("readme", None));
                      Fmt.str "; - %a" Command.pp (`Narrow_level 1);
                      Fmt.str "; - %a" Command.pp (`Mark "/path/to/document");
                      Fmt.str "; - %a" Command.pp `Mark_listed;
                      Fmt.str "; - %a" Command.pp (`Unmark "/path/to/document");
                      Fmt.str "; - %a" Command.pp `Unmark_listed;
                      Fmt.str "; - %a" Command.pp `Unmark_all;
                      Fmt.str "; - %a" Command.pp (`Drop "/path/to/document");
                      Fmt.str "; - %a" Command.pp (`Drop_all_except "/path/to/document");
                      Fmt.str "; - %a" Command.pp `Drop_marked;
                      Fmt.str "; - %a" Command.pp `Drop_unmarked;
                      Fmt.str "; - %a" Command.pp `Drop_listed;
                      Fmt.str "; - %a" Command.pp `Drop_unlisted;
                    ]
                )
              |> List.of_seq
            in
            let init_state =
              Session_manager.lock_with_view (fun view ->
                  view.init_state
                )
            in
            let rec aux rerun snapshots lines : [ `No_changes | `Changes_made of Session.Snapshot.t Dynarray.t ] =
              CCIO.with_out file (fun oc ->
                  CCIO.write_lines_l oc lines;
                );
              let old_stats = Unix.stat file in
              close_term ();
              Path_opening.config_and_cmd_to_open_text_file
                ~path:file
                ~line_num:(max 1 (Dynarray.length snapshots - 1))
                ()
              |> (fun (config, cmd) ->
                  Result.get_ok (Path_opening.resolve_cmd config cmd)
                )
              |> Sys.command
              |> ignore;
              let new_stats = Unix.stat file in
              if
                rerun
                ||
                Float.abs
                  (new_stats.st_mtime -. old_stats.st_mtime) >= Params.float_compare_margin
              then (
                let state = ref init_state in
                let snapshots = Dynarray.create () in
                Dynarray.add_last
                  snapshots
                  (Session.Snapshot.make
                     ~last_command:None
                     init_state);
                let rerun = ref false in
                let lines =
                  CCIO.with_in file (fun ic ->
                      CCIO.read_lines_l ic
                      |> CCList.flat_map (fun line ->
                          if
                            String_utils.line_is_blank_or_system_comment line
                          then (
                            [ line ]
                          ) else (
                            match Command.of_string line with
                            | None -> (
                                rerun := true;
                                [
                                  line;
                                  "# Failed to parse the above command"
                                ]
                              )
                            | Some command -> (
                                match Session.run_command pool command !state with
                                | None -> (
                                    rerun := true;
                                    [
                                      line;
                                      "# Failed to run the above command, check if the arguments are correct"
                                    ]
                                  )
                                | Some (command, x) -> (
                                    state := x;
                                    let snapshot =
                                      Session.Snapshot.make
                                        ~last_command:(Some command)
                                        !state
                                    in
                                    Dynarray.add_last
                                      snapshots
                                      snapshot;
                                    [ line ]
                                  )
                              )
                          )
                        )
                    )
                in
                if !rerun then (
                  aux true snapshots lines
                ) else (
                  `Changes_made snapshots
                )
              ) else (
                `No_changes
              )
            in
            (try
               let res =
                 aux false init_snapshots init_lines
               in
               (try
                  Sys.remove file;
                with
                | Sys_error _ -> ()
               );
               (match res with
                | `No_changes -> ()
                | `Changes_made snapshots -> (
                    Session_manager.load_snapshots snapshots
                  )
               );
             with
             | Sys_error _ -> (
                 exit_with_error_msg
                   (Fmt.str "failed to read or write temporary command history file %s" (Filename.quote file))
               ));
            loop ()
          )
        | Clear_command_history -> (
            let init_state =
              Session_manager.lock_with_view (fun view ->
                  view.init_state
                )
            in
            let snapshots = Dynarray.create () in
            Dynarray.add_last
              snapshots
              (Session.Snapshot.make
                 ~last_command:None
                 init_state);
            Session_manager.load_snapshots snapshots;
            loop ()
          )
        | Open_script path -> (
            let init_state =
              Session_manager.lock_with_view (fun view ->
                  view.init_state
                )
            in
            match
              Script.run
                pool
                ~init_state
                ~path
            with
            | Error msg -> exit_with_error_msg msg
            | Ok snapshots -> (
                Session_manager.load_snapshots snapshots;
                loop ()
              )
          )
        | Edit_script path -> (
            close_term ();
            Path_opening.config_and_cmd_to_open_text_file
              ~path
              ()
            |> (fun (config, cmd) ->
                Result.get_ok (Path_opening.resolve_cmd config cmd)
              )
            |> Sys.command
            |> ignore;
            loop ()
          )
      )
  in
  Eio.Fiber.any [
    (fun () ->
       Eio.Domain_manager.run (Eio.Stdenv.domain_mgr env)
         (fun () -> Session_manager.worker_fiber pool));
    Session_manager.manager_fiber;
    UI_base.Key_binding_info.grid_light_fiber;
    (fun () ->
       (match start_with_filter with
        | None -> ()
        | Some start_with_filter -> (
            let start_with_filter_len = String.length start_with_filter in
            Lwd.set UI_base.Vars.filter_field (start_with_filter, start_with_filter_len);
            UI.update_filter ~commit:true ();
          ));
       (match start_with_search with
        | None -> ()
        | Some start_with_search -> (
            let start_with_search_len = String.length start_with_search in
            Lwd.set UI_base.Vars.search_field (start_with_search, start_with_search_len);
            UI.update_search ~commit:true ();
          ));
       loop ();
    );
  ];
  close_term ();
  clean_up ();
  (match debug_log with
   | Some "-" -> ()
   | _ -> (
       match !Params.debug_output with
       | None -> ()
       | Some oc -> (
           close_out oc
         )
     )
  )

let cmd ~eio_env ~sw =
  let open Term in
  let open Args in
  let doc = "TUI multiline fuzzy document finder" in
  let version = Version_string.s in
  Cmd.v (Cmd.info "docfd" ~version ~doc)
    (const (run ~eio_env ~sw)
     $ debug_log_arg
     $ no_pdftotext_arg
     $ no_pandoc_arg
     $ hidden_arg
     $ max_depth_arg
     $ max_fuzzy_edit_dist_arg
     $ max_token_search_dist_arg
     $ max_linked_token_search_dist_arg
     $ tokens_per_search_scope_level_arg
     $ index_chunk_size_arg
     $ exts_arg
     $ single_line_exts_arg
     $ add_exts_arg
     $ single_line_add_exts_arg
     $ cache_dir_arg
     $ cache_limit_arg
     $ data_dir_arg
     $ index_only_arg
     $ start_with_filter_arg
     $ start_with_search_arg
     $ filter_arg
     $ sample_arg
     $ samples_per_doc_arg
     $ search_arg
     $ sort_arg
     $ sort_no_score_arg
     $ color_arg
     $ underline_arg
     $ search_result_print_text_width_arg
     $ search_result_print_snippet_min_size_arg
     $ search_result_print_snippet_max_add_lines_arg
     $ start_with_script_arg
     $ script_arg
     $ paths_from_arg
     $ glob_arg
     $ single_line_glob_arg
     $ single_line_arg
     $ open_with_arg
     $ files_with_match_arg
     $ files_without_match_arg
     $ paths_arg)

let () =
  if Sys.win32 then (
    exit_with_error_msg "Windows is not supported"
  );
  Random.self_init ();
  Eio_posix.run (fun eio_env ->
      Eio.Switch.run (fun sw ->
          exit (Cmd.eval (cmd ~eio_env ~sw))
        ))


================================================
FILE: bin/document.ml
================================================
open Result_syntax
open Docfd_lib

type t = {
  search_mode : Search_mode.t;
  path : string;
  path_parts : string list;
  path_date : Timedesc.Date.t option;
  mod_time : Timedesc.t;
  title : string option;
  doc_id : int64;
  doc_hash : string;
  word_ids : Int_set.t;
  search_scope : Diet.Int.t option;
  links : Link.t array;
  link_index_of_start_pos : int Int_map.t;
  last_scan : Timedesc.t;
}

let equal (x : t) (y : t) =
  x.search_mode = y.search_mode
  &&
  String.equal x.path y.path
  &&
  String.equal x.doc_hash y.doc_hash
  &&
  Option.equal Diet.Int.equal x.search_scope y.search_scope

let compute_path_parts (path : string) =
  let path_parts = Tokenization.tokenize ~drop_spaces:false path
    |> List.of_seq
  in
  (path_parts)

let compute_link_index_of_start_pos links =
  CCArray.foldi (fun acc i link ->
      Int_map.add link.Link.start_pos i acc
    )
    Int_map.empty
    links

let search_mode (t : t) = t.search_mode

let path (t : t) = t.path

let path_parts (t : t) = t.path_parts

let path_date (t : t) = t.path_date

let mod_time (t : t) = t.mod_time

let title (t : t) = t.title

let word_ids (t : t) = t.word_ids

let doc_hash (t : t) = t.doc_hash

let doc_id (t : t) = t.doc_id

let search_scope (t : t) = t.search_scope

let last_scan (t : t) = t.last_scan

let links (t : t) = t.links

let link_index_of_start_pos (t : t) = t.link_index_of_start_pos

let refresh_modification_time ~path =
  let time = Unix.time () in
  Unix.utimes path time time

let reset_search_scope_to_full (t : t) : t =
  { t with search_scope = None }

let inter_search_scope (x : Diet.Int.t) (t : t) : t =
  let search_scope =
    match t.search_scope with
    | None -> x
    | Some y -> Diet.Int.inter x y
  in
  { t with search_scope = Some search_scope }

module Compare = struct
  type order = [
    | `Asc
    | `Desc
  ]

  let mod_time order d0 d1 =
    match order with
    | `Asc ->
      Timedesc.compare_chrono_min (mod_time d0) (mod_time d1)
    | `Desc ->
      Timedesc.compare_chrono_min (mod_time d1) (mod_time d0)

  let path order d0 d1 =
    match order with
    | `Asc ->
      String.compare (path d0) (path d1)
    | `Desc ->
      String.compare (path d1) (path d0)

  let path_date order d0 d1 =
    let fallback () = path order d0 d1 in
    match path_date d0, path_date d1 with
    | None, None -> fallback ()
    | None, Some _ -> (
        (* Always shuffle document with no path date to the back. *)
        1
      )
    | Some _, None -> (
        (* Always shuffle document with no path date to the back. *)
        -1
      )
    | Some x0, Some x1 -> (
        match order with
        | `Asc -> (
            match Timedesc.Date.compare x0 x1 with
            | 0 -> fallback ()
            | n -> n
          )
        | `Desc -> (
            match Timedesc.Date.compare x1 x0 with
            | 0 -> fallback ()
            | n -> n
          )
      )
end

module Ir0 = struct
  type t = {
    search_mode : Search_mode.t;
    doc_id : int64;
    doc_hash : string;
    path : string;
    last_scan : Timedesc.t;
  }

  let of_path ~(env : Eio_unix.Stdenv.base) search_mode ?doc_hash path : (t, string) result =
    let* doc_hash =
      match doc_hash with
      | Some x -> Ok x
      | None -> BLAKE2B.hash_of_file ~env ~path
    in
    let doc_id = Doc_id_db.doc_id_of_doc_hash doc_hash in
    Ok {
      search_mode;
      doc_id;
      doc_hash;
      path;
      last_scan = Timedesc.now ~tz_of_date_time:Params.tz ();
    }
end

module Ir1 = struct
  type t = {
    search_mode : Search_mode.t;
    doc_id : int64;
    doc_hash : string;
    path : string;
    data : [ `Lines of string Dynarray.t | `Pages of string list Dynarray.t ];
    last_scan : Timedesc.t;
  }

  let of_path_to_text ~env ~doc_id ~doc_hash search_mode last_scan path : (t, string) result =
    let fs = Eio.Stdenv.fs env in
    try
      let data =
        Eio.Path.(with_lines (fs / path))
          (fun lines ->
             `Lines (Dynarray.of_seq lines)
          )
      in
      Ok {
        search_mode;
        doc_id;
        doc_hash;
        path;
        data;
        last_scan;
      }
    with
    | Failure _
    | End_of_file
    | Eio.Buf_read.Buffer_limit_exceeded -> (
        Error (Printf.sprintf "failed to read file: %s" (Filename.quote path))
      )

  let of_path_to_pdf ~env ~doc_id ~doc_hash search_mode last_scan path : (t, string) result =
    let proc_mgr = Eio.Stdenv.process_mgr env in
    let fs = Eio.Stdenv.fs env in
    try
      let cmd = [ "pdftotext"; path; "-" ] in
      let pages =
        match Proc_utils.run_return_stdout ~proc_mgr ~fs ~split_mode:`On_form_feed cmd with
        | None -> Seq.empty
        | Some pages -> (
            List.to_seq pages
            |> Seq.map (fun page -> String.split_on_char '\n' page)
          )
      in
      let data = `Pages (Dynarray.of_seq pages) in
      Ok {
        search_mode;
        doc_id;
        doc_hash;
        path;
        data;
        last_scan;
      }
    with
    | Failure _
    | End_of_file
    | Eio.Buf_read.Buffer_limit_exceeded -> (
        Error (Printf.sprintf "failed to read file: %s" (Filename.quote path))
      )

  let of_path_to_pandoc_supported_format ~env ~doc_id ~doc_hash search_mode last_scan path : (t, string) result =
    let proc_mgr = Eio.Stdenv.process_mgr env in
    let fs = Eio.Stdenv.fs env in
    let ext = File_utils.extension_of_file path in
    let from_format = ext
      |> String_utils.remove_leading_dots
      |> (fun s ->
          match s with
          | "htm" -> "html"
          | _ -> s
        )
    in
    let cmd = [ "pandoc"
              ; "--from"
              ; from_format
              ; "--to"
              ; "plain"
              ; "--wrap"
              ; "none"
              ; path
              ]
    in
    let error_msg = Fmt.str "failed to extract text from %s" (Filename.quote path) in
    match
      Proc_utils.run_return_stdout
        ~proc_mgr
        ~fs
        ~split_mode:`On_line_split
        cmd
    with
    | None -> (
        Error error_msg
      )
    | Some lines -> (
        let data = `Lines (Dynarray.of_list lines) in
        Ok {
          search_mode;
          doc_id;
          doc_hash;
          path;
          data;
          last_scan;
        }
      )

  let of_ir0 ~(env : Eio_unix.Stdenv.base) (ir0 : Ir0.t) : (t, string) result =
    let { Ir0.search_mode; doc_id; doc_hash; path; last_scan } = ir0 in
    match File_utils.format_of_file path with
    | `PDF -> (
        of_path_to_pdf ~env ~doc_id ~doc_hash search_mode last_scan path
      )
    | `Pandoc_supported_format -> (
        of_path_to_pandoc_supported_format ~env ~doc_id ~doc_hash search_mode last_scan path
      )
    | `Text | `Other -> (
        of_path_to_text ~env ~doc_id ~doc_hash search_mode last_scan path
      )
end

module Date_extraction = struct
  let yyyy = "(\\d{4})"

  let mm = "([01]\\d)"

  let mmm = "(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)"

  let mmmm = "(january|february|march|april|may|june|july|august|september|october|november|december)"

  let dd = "([0-3]\\d)"

  let int_of_month_string s =
    match String.lowercase_ascii (String.sub s 0 3) with
    | "jan" -> 1
    | "feb" -> 2
    | "mar" -> 3
    | "apr" -> 4
    | "may" -> 5
    | "jun" -> 6
    | "jul" -> 7
    | "aug" -> 8
    | "sep" -> 9
    | "oct" -> 10
    | "nov" -> 11
    | "dec" -> 12
    | _ -> failwith "unexpected case"

  let yyyy_mm_dd =
    let re =
      Fmt.str
        "(?:^|.*[^\\d])%s[^\\d]%s[^\\d]%s(?:$|[^\\d])"
        yyyy
        mm
        dd
      |> Re.Pcre.re
      |> Re.compile
    in
    fun s ->
      try
        let g = Re.exec re s in
        let start = Re.Group.start g 1 in
        let y = Re.Group.get g 1 |> int_of_string in
        let m = Re.Group.get g 2 |> int_of_string in
        let d = Re.Group.get g 3 |> int_of_string in
        Some (start, (y, m, d))
      with
      | _ -> None

  let yyyy_month_dd ~month ~reverse =
    let re =
      let g1, g3 =
        if not reverse then (
          (yyyy, dd)
        ) else (
          (dd, yyyy)
        )
      in
      Fmt.str
        "(?:^|.*[^\\d])%s[^A-Za-z\\d]?%s[^A-Za-z\\d]?%s(?:$|[^\\d])"
        g1
        month
        g3
      |> Re.Pcre.re
      |> Re.no_case
      |> Re.compile
    in
    fun s ->
      try
        let g = Re.exec re s in
        let start = Re.Group.start g 1 in
        let y_group_index, d_group_index =
          if not reverse then (
            (1, 3)
          ) else (
            (3, 1)
          )
        in
        let y = Re.Group.get g y_group_index |> int_of_string in
        let m = int_of_month_string (Re.Group.get g 2) in
        let d = Re.Group.get g d_group_index |> int_of_string in
        Some (start, (y, m, d))
      with
      | _ -> None

  let yyyymmdd =
    let re =
      Fmt.str
        "(?:^|.*[^\\d])%s%s%s"
        yyyy
        mm
        dd
      |> Re.Pcre.re
      |> Re.compile
    in
    fun s ->
      try
        let g = Re.exec re s in
        let start = Re.Group.start g 1 in
        let y = Re.Group.get g 1 |> int_of_string in
        let m = Re.Group.get g 2 |> int_of_string in
        let d = Re.Group.get g 3 |> int_of_string in
        Some (start, (y, m, d))
      with
      | _ -> None

  let extract s =
    let rec aux acc l =
      match l with
      | [] -> (
          match acc with
          | None -> None
          | Some (_start_match_pos, (year, month, day)) -> (
              match Timedesc.Date.Ymd.make ~year ~month ~day with
              | Ok date -> Some date
              | Error _ -> None
            )
        )
      | f :: fs -> (
          let acc =
            match acc, f s with
            | None, x -> x
            | Some x, None -> Some x
            | Some (start_match_pos, ymd),
              Some (start_match_pos', ymd') -> (
                if start_match_pos' > start_match_pos then (
                  Some (start_match_pos', ymd')
                ) else (
                  Some (start_match_pos, ymd)
                )
              )
          in
          aux acc fs
        )
    in
    aux
      None
      [
        yyyy_mm_dd;
        yyyy_month_dd ~month:mmm ~reverse:true;
        yyyy_month_dd ~month:mmm ~reverse:false;
        yyyy_month_dd ~month:mmmm ~reverse:true;
        yyyy_month_dd ~month:mmmm ~reverse:false;
        yyyymmdd;
      ]
end

module Ir2 = struct
  type t = {
    search_mode : Search_mode.t;
    doc_id : int64;
    doc_hash : string;
    path : string;
    path_parts : string list;
    path_date : Timedesc.Date.t option;
    mod_time : Timedesc.t;
    title : string option;
    raw : Index.Raw.t;
    links : Link.t array;
    last_scan : Timedesc.t;
  }

  type work_stage =
    | Title
    | Content

  let parse_lines pool (s : string Seq.t) : string option * Index.Raw.t =
    let rec aux (stage : work_stage) title s =
      match stage with
      | Content -> (
          let raw = Index.Raw.of_lines pool s in
          (title, raw)
        )
      | Title -> (
          match s () with
          | Seq.Nil -> aux Content title Seq.empty
          | Seq.Cons (x, xs) -> (
              aux Content (Some (Misc_utils.sanitize_string x)) (Seq.cons x xs)
            )
        )
    in
    aux Title None s

  let parse_pages pool (s : string list Seq.t) : string option * Index.Raw.t =
    let rec aux (stage : work_stage) title s =
      match stage with
      | Content -> (
          let raw = Index.Raw.of_pages pool s in
          (title, raw)
        )
      | Title -> (
          match s () with
          | Seq.Nil -> aux Content title Seq.empty
          | Seq.Cons (x, xs) -> (
              let title =
                match x with
                | [] -> None
                | x :: _ ->
                  Some (Misc_utils.sanitize_string x)
              in
              aux Content title (Seq.cons x xs)
            )
        )
    in
    aux Title None s

  let of_ir1 pool (ir : Ir1.t) : t =
    let { Ir1.search_mode; doc_id; doc_hash; path; data; last_scan } = ir in
    let path_parts = compute_path_parts path in
    let path_date = Date_extraction.extract path in
    let stats = Unix.stat path in
    let mod_time = Timedesc.of_timestamp_float_s_exn stats.Unix.st_mtime in
    let title, raw =
      match data with
      | `Lines x -> (
          parse_lines pool (Dynarray.to_seq x)
        )
      | `Pages x -> (
          parse_pages pool (Dynarray.to_seq x)
        )
    in
    let links = Index.Raw.links raw in
    {
      search_mode;
      path;
      path_parts;
      path_date;
      mod_time;
      doc_id;
      doc_hash;
      title;
      raw;
      links;
      last_scan;
    }
end

let of_ir2 db ~already_in_transaction (ir : Ir2.t) : t =
  let
    {
      Ir2.search_mode;
      path;
      path_parts;
      path_date;
      mod_time;
      title;
      doc_id;
      doc_hash;
      raw;
      links;
      last_scan;
    } = ir in
  Word_db.write_to_db db ~already_in_transaction;
  Index.write_raw_to_db db ~already_in_transaction ~doc_id raw;
  let link_index_of_start_pos = compute_link_index_of_start_pos links in
  {
    search_mode;
    path;
    path_parts;
    path_date;
    mod_time;
    title;
    doc_id;
    doc_hash;
    word_ids = Index.Raw.word_ids raw;
    search_scope = None;
    links;
    link_index_of_start_pos;
    last_scan;
  }

let of_path
    ~(env : Eio_unix.Stdenv.base)
    pool
    ~already_in_transaction
    search_mode
    ?doc_hash
    path
  : (t, string) result =
  let open Sqlite3_utils in
  let* doc_hash =
    match doc_hash with
    | Some x -> Ok x
    | None -> BLAKE2B.hash_of_file ~env ~path
  in
  if Index.is_indexed ~doc_hash then (
    let doc_id = Doc_id_db.doc_id_of_doc_hash doc_hash in
    let title =
      if Index.global_line_count ~doc_id = 0 then
        None
      else
        Some (Index.line_of_global_line_num ~doc_id 0)
    in
    let path_parts = compute_path_parts path in
    let path_date = Date_extraction.extract path in
    let stats = Unix.stat path in
    let mod_time = Timedesc.of_timestamp_float_s_exn stats.Unix.st_mtime in
    let links = Index.links ~doc_id in
    Ok
      {
        search_mode;
        path;
        path_parts;
        path_date;
        mod_time;
        title;
        doc_id;
        doc_hash;
        word_ids = Index.word_ids ~doc_id;
        search_scope = None;
        links;
        link_index_of_start_pos = compute_link_index_of_start_pos links;
        last_scan = Timedesc.now ~tz_of_date_time:Params.tz ()
      }
  ) else (
    let* ir0 = Ir0.of_path ~env search_mode ~doc_hash path in
    let* ir1 = Ir1.of_ir0 ~env ir0 in
    let ir2 = Ir2.of_ir1 pool ir1 in
    let res =
      with_db (fun db ->
          Ok (of_ir2 db ~already_in_transaction ir2)
        )
    in
    res
  )

let satisfies_filter_exp pool stop_signal ~global_first_word_candidates_lookup (exp : Filter_exp.t) (t : t) : bool =
  let open Filter_exp in
  let date_f (op : Filter_exp.compare_op) =
    match op with
    | Eq -> Timedesc.Date.equal
    | Le -> Timedesc.Date.le
    | Ge -> Timedesc.Date.ge
    | Lt -> Timedesc.Date.lt
    | Gt -> Timedesc.Date.gt
  in
  let rec aux exp =
    match exp with
    | Empty -> true
    | Path_date (op, date) -> (
        match t.path_date with
        | None -> false
        | Some path_date -> (
            date_f op path_date date
          )
      )
    | Mod_date (op, date) -> (
        date_f op (Timedesc.date t.mod_time) date
      )
    | Path_fuzzy exp -> (
        List.exists (fun phrase ->
            List.for_all (fun token ->
                List.exists (fun path_part ->
                    Search_phrase.Enriched_token.compatible_with_word token path_part
                  )
                  t.path_parts
              )
              (Search_phrase.enriched_tokens phrase)
          )
          (Search_exp.flattened exp)
      )
    | Path_glob glob -> (
        Glob.is_empty glob || Glob.match_ glob t.path
      )
    | Ext ext -> (
        File_utils.extension_of_file t.path = ext
      )
    | Content exp -> (
        try
          Index.search
            pool
            stop_signal
            ~terminate_on_result_found:true
            ~doc_id:t.doc_id
            ~doc_word_ids:(word_ids t)
            ~global_first_word_candidates_lookup
            ~within_same_line:false
            ~search_scope:None
            exp
          |> ignore;
          false
        with
        | Index.Search_job.Result_found -> true
      )
    | Binary_op (op, e1, e2) -> (
        match op with
        | And -> aux e1 && aux e2
        | Or -> aux e1 || aux e2
      )
    | Unary_op (op, e) -> (
        match op with
        | Not -> not (aux e)
      )
  in
  aux exp


================================================
FILE: bin/document.mli
================================================
open Docfd_lib

type t

val equal : t -> t -> bool

module Compare : sig
  type order = [
    | `Asc
    | `Desc
  ]

  val mod_time : order -> t -> t -> int

  val path_date : order -> t -> t -> int

  val path : order -> t -> t -> int
end

val search_mode : t -> Search_mode.t

val path : t -> string

val path_date : t -> Timedesc.Date.t option

val mod_time : t -> Timedesc.t

val title : t -> string option

val word_ids : t -> Int_set.t

val doc_hash : t -> string

val doc_id : t -> int64

val search_scope : t -> Diet.Int.t option

val last_scan : t -> Timedesc.t

val links : t -> Link.t array

val link_index_of_start_pos : t -> int Int_map.t

val satisfies_filter_exp :
  Task_pool.t ->
  Stop_signal.t ->
  global_first_word_candidates_lookup:Int_set.t Search_phrase.Enriched_token.Data_map.t ->
  Filter_exp.t ->
  t ->
  bool

val of_path :
  env:Eio_unix.Stdenv.base ->
  Task_pool.t ->
  already_in_transaction:bool ->
  Search_mode.t ->
  ?doc_hash:string ->
  string ->
  (t, string) result

val reset_search_scope_to_full : t -> t

val inter_search_scope : Diet.Int.t -> t -> t

module Ir0 : sig
  type t

  val of_path :
    env:Eio_unix.Stdenv.base ->
    Search_mode.t ->
    ?doc_hash:string ->
    string ->
    (t, string) result
end

module Ir1 : sig
  type t

  val of_ir0 :
    env:Eio_unix.Stdenv.base ->
    Ir0.t ->
    (t, string) result
end

module Ir2 : sig
  type t

  val of_ir1 : Task_pool.t -> Ir1.t -> t
end

val of_ir2 :
  Sqlite3.db ->
  already_in_transaction:bool ->
  Ir2.t ->
  t


================================================
FILE: bin/document_pipeline.ml
================================================
open Docfd_lib
open Debug_utils

type t = {
  env : Eio_unix.Stdenv.base;
  pool : Task_pool.t;
  ir0_queue : Document.Ir0.t option Eio.Stream.t;
  ir1_of_ir0_workers_batch_release : Eio.Semaphore.t;
  ir1_queue : Document.Ir1.t option Eio.Stream.t;
  ir2_of_ir1_workers_batch_release : Eio.Semaphore.t;
  ir2_queue : Document.Ir2.t option Eio.Stream.t;
  documents : Document.t Dynarray.t;
  result : Document.t Dynarray.t Eio.Stream.t;
}

let make ~env pool : t =
  {
    env;
    pool;
    ir0_queue = Eio.Stream.create 100;
    ir1_of_ir0_workers_batch_release = Eio.Semaphore.make 0;
    ir1_queue = Eio.Stream.create 100;
    ir2_of_ir1_workers_batch_release = Eio.Semaphore.make 0;
    ir2_queue = Eio.Stream.create 100;
    documents = Dynarray.create ();
    result = Eio.Stream.create 1;
  }

let ir1_of_ir0_worker (t : t) =
  let run = ref true in
  while !run do
    match Eio.Stream.take t.ir0_queue with
    | None -> (
        Eio.Semaphore.release t.ir1_of_ir0_workers_batch_release;
        run := false
      )
    | Some ir0 -> (
        match Document.Ir1.of_ir0 ~env:t.env ir0 with
        | Error msg -> (
            do_if_debug (fun oc ->
                Printf.fprintf oc "Error: %s\n" msg
              )
          )
        | Ok ir1 -> (
            Eio.Stream.add t.ir1_queue (Some ir1)
          )
      )
  done

let ir2_of_ir1_worker (t : t) =
  let run = ref true in
  while !run do
    match Eio.Stream.take t.ir1_queue with
    | None -> (
        Eio.Semaphore.release t.ir2_of_ir1_workers_batch_release;
        run := false
      )
    | Some ir -> (
        Eio.Stream.add t.ir2_queue (Some (Document.Ir2.of_ir1 t.pool ir))
      )
  done

let document_of_ir2_worker (t : t) =
  let open Sqlite3_utils in
  let run = ref true in
  let counter = ref 0 in
  let outstanding_transaction = ref false in
  with_db (fun db ->
      while !run do
        if !counter = 0 then (
          step_stmt ~db "BEGIN IMMEDIATE" ignore;
          outstanding_transaction := true;
        );
        (match Eio.Stream.take t.ir2_queue with
         | None -> (
             run := false
           )
         | Some ir -> (
             let doc = Document.of_ir2 db ~already_in_transaction:true ir in
             Dynarray.add_last t.documents doc;
             do_if_debug (fun oc ->
                 Printf.fprintf oc "Document %s loaded successfully\n" (Filename.quote (Document.path doc));
               );
           ));
        if !counter >= 100 then (
          step_stmt ~db "COMMIT" ignore;
          outstanding_transaction := false;
          counter := 0;
        ) else (
          incr counter;
        );
      done;
      if !outstanding_transaction then (
        step_stmt ~db "COMMIT" ignore;
      )
    )

let feed (t : t) search_mode ~doc_hash path =
  do_if_debug (fun oc ->
      Printf.fprintf oc "Loading document: %s\n" (Filename.quote path);
    );
  do_if_debug (fun oc ->
      Printf.fprintf oc "Using %s search mode for document %s\n"
        (match search_mode with
         | `Single_line -> "single line"
         | `Multiline -> "multiline"
        )
        (Filename.quote path)
    );
  match Document.Ir0.of_path ~env:t.env search_mode ~doc_hash path with
  | Error msg -> (
      do_if_debug (fun oc ->
          Printf.fprintf oc "Error: %s\n" msg
        )
    )
  | Ok ir0 -> (
      Eio.Stream.add t.ir0_queue (Some ir0)
    )

let run (t : t) =
  Eio.Fiber.all
    (List.concat
       [ CCList.(0 --^ Task_pool.size)
         |> List.map (fun _ -> (fun () -> ir1_of_ir0_worker t))
       ; CCList.(0 --^ Task_pool.size)
         |> List.map (fun _ -> (fun () -> ir2_of_ir1_worker t))
       ; [ fun () -> document_of_ir2_worker t ]
       ]
    );
  Eio.Stream.add t.result t.documents

let finalize (t : t) =
  for _ = 0 to Task_pool.size - 1 do
    Eio.Stream.add t.ir0_queue None;
  done;
  for _ = 0 to Task_pool.size - 1 do
    Eio.Semaphore.acquire t.ir1_of_ir0_workers_batch_release;
  done;
  for _ = 0 to Task_pool.size - 1 do
    Eio.Stream.add t.ir1_queue None;
  done;
  for _ = 0 to Task_pool.size - 1 do
    Eio.Semaphore.acquire t.ir2_of_ir1_workers_batch_release;
  done;
  Eio.Stream.add t.ir2_queue None;
  Dynarray.to_list (Eio.Stream.take t.result)


================================================
FILE: bin/document_pipeline.mli
================================================
type t

val make : env:Eio_unix.Stdenv.base -> Docfd_lib.Task_pool.t -> t

val feed : t -> Search_mode.t -> doc_hash:string -> string -> unit

val run : t -> unit

val finalize : t -> Document.t list


================================================
FILE: bin/document_src.ml
================================================
type file_collection = {
  default_search_mode_files : String_set.t;
  single_line_search_mode_files : String_set.t;
}

let seq_of_file_collection (x : file_collection) =
  Seq.append
    (String_set.to_seq x.default_search_mode_files)
    (String_set.to_seq x.single_line_search_mode_files)

let file_collection_size (x : file_collection) =
  String_set.cardinal x.default_search_mode_files
  + String_set.cardinal x.single_line_search_mode_files

let empty_file_collection =
  {
    default_search_mode_files = String_set.empty;
    single_line_search_mode_files = String_set.empty;
  }

type t =
  | Stdin of string
  | Files of file_collection


================================================
FILE: bin/dune
================================================
(rule
  (targets int_map.ml)
  (deps ../lib/int_map.ml)
  (action (copy# %{deps} %{targets}))
  )

(rule
  (targets int_set.ml)
  (deps ../lib/int_set.ml)
  (action (copy# %{deps} %{targets}))
  )

(rule
  (targets string_map.ml)
  (deps ../lib/string_map.ml)
  (action (copy# %{deps} %{targets}))
  )

(rule
  (targets char_map.ml)
  (deps ../lib/char_map.ml)
  (action (copy# %{deps} %{targets}))
  )

(rule
  (targets string_set.ml)
  (deps ../lib/string_set.ml)
  (action (copy# %{deps} %{targets}))
  )

(rule
  (targets parser_components.ml)
  (deps ../lib/parser_components.ml)
  (action (copy# %{deps} %{targets}))
  )

(executable
 (flags     (-w "+a-4-9-29-37-40-42-44-48-50-32-30-70@8" -g))
 (name docfd)
 (public_name docfd)
 (preprocess (pps ppx_deriving.show ppx_deriving.ord))
 (libraries docfd_lib
            containers
            containers.unix
            cmdliner
            fmt
            notty
            notty.unix
            nottui
            nottui-unix
            lwd
            oseq
            eio
            eio_main
            eio_posix
            digestif.c
            digestif
            timedesc
            timedesc-tzlocal.unix-or-utc
            re
            progress
            diet
            sqlite3
 )
)


================================================
FILE: bin/file_utils.ml
================================================
open Misc_utils
open Debug_utils

let extension_of_file (s : string) =
  Filename.extension s
  |> String.lowercase_ascii

type file_format = [ `PDF | `Pandoc_supported_format | `Text | `Other ] [@@deriving ord]

module File_format_set = CCSet.Make (struct
    type t = file_format

    let compare = compare_file_format
  end)

let format_of_file (s : string) : file_format =
  let ext = extension_of_file s in
  if ext = ".pdf" then (
    `PDF
  ) else if List.mem ext Params.pandoc_supported_exts then (
    `Pandoc_supported_format
  ) else if String_set.mem ext Params.common_text_file_exts
         || String_set.mem ext Params.common_code_file_exts
  then (
    `Text
  ) else (
    `Other
  )

type typ = [
  | `File
  | `Dir
]

type is_link = [
  | `Is_link
  | `Not_link
]

let typ_of_path (path : string) : (typ * is_link) option =
  try
    let stat = Unix.lstat path in
    match stat.st_kind with
    | S_REG -> Some (`File, `Not_link)
    | S_DIR -> Some (`Dir, `Not_link)
    | S_LNK -> (
        let stat = Unix.stat path in
        match stat.st_kind with
        | S_REG -> Some (`File, `Is_link)
        | S_DIR -> Some (`Dir, `Is_link)
        | _ -> None
      )
    | _ -> None
  with
  | _ -> None

let cwd_with_trailing_sep () = Sys.getcwd () ^ Filename.dir_sep

let remove_cwd_from_path (s : string) =
  let pre = cwd_with_trailing_sep () in
  match CCString.chop_prefix ~pre s with
  | None -> s
  | Some s -> s

let read_in_channel_to_tmp_file (ic : in_channel) : (string, string) result =
  let file = Filename.temp_file "docfd-" ".txt" in
  try
    CCIO.with_out file (fun oc ->
        CCIO.copy_into ic oc
      );
    Ok file
  with
  | _ -> (
      Error (Fmt.str "failed to write stdin to %s" (Filename.quote file))
    )

let next_choices path : string Seq.t =
  try
    Sys.readdir path
    |> Array.to_seq
  with
  | _ -> Seq.empty

let list_files_recursive
    ?(max_depth = !Params.max_file_tree_scan_depth)
    ~(report_progress : unit -> unit)
    ~(filter : int -> string -> bool)
    (path : string)
  : String_set.t =
  let acc = ref String_set.empty in
  let add x =
    acc := String_set.add x !acc
  in
  let rec aux depth path =
    report_progress ();
    let hidden =
      String.starts_with ~prefix:"." (Filename.basename path)
    in
    if ((hidden && !Params.scan_hidden) || not hidden)
    && depth <= max_depth then (
      match typ_of_path path with
      | Some (`Dir, _) -> (
          next_choices path
          |> Seq.iter (fun f ->
              aux (depth + 1) (Filename.concat path f)
            )
        )
      | Some (`File, _) -> (
          if filter depth path then (
            add path
          )
        )
      | _ -> ()
    )
  in
  aux 0 (normalize_path_to_absolute path);
  !acc

let list_files_recursive_filter_by_globs
    ?max_depth
    ~(report_progress : unit -> unit)
    (globs : string Seq.t)
  : String_set.t =
  let acc = ref String_set.empty in
  let add x =
    acc := String_set.add x !acc
  in
  let parse_glob ~case_sensitive s =
    match Glob.parse ~case_sensitive s with
    | None -> (
        failwith (Fmt.str "expected subpath of a valid glob pattern to also be valid: \"%s\"" s)
      )
    | Some x -> x
  in
  let rec aux ~case_sensitive (path_parts : string list) (glob_parts : string list) =
    report_progress ();
    let path = path_of_parts path_parts in
    match typ_of_path path, glob_parts with
    | Some (`File, _), [] -> add path
    | Some (`File, _), _ -> ()
    | Some (`Dir, _), [] -> ()
    | Some (`Dir, _), x :: xs -> (
        match x with
        | "" | "." -> aux ~case_sensitive path_parts xs
        | ".." -> (
            let path_parts =
              match path_parts with
              | [] -> []
              | _ :: xs -> xs
            in
            aux ~case_sensitive path_parts xs
          )
        | "**" -> (
            let glob_string = String.concat Filename.dir_sep (path :: glob_parts) in
            do_if_debug (fun oc ->
                Printf.fprintf oc "Compiling glob using pattern: %s\n" glob_string
              );
            let glob = parse_glob ~case_sensitive glob_string in
            path
            |> list_files_recursive
              ?max_depth
              ~report_progress
              ~filter:(fun _depth path ->
                  Glob.match_ glob path
                )
            |> String_set.iter (fun path ->
                do_if_debug (fun oc ->
                    Printf.fprintf oc "Glob %s matches path %s\n" glob_string path
                  );
                add path
              )
          )
        | _ -> (
            let glob = parse_glob ~case_sensitive x in
            next_choices path
            |> Seq.iter (fun f ->
                if Glob.match_ glob f then (
                  aux ~case_sensitive (f :: path_parts) xs
                )
              )
          )
      )
    | None, _ -> ()
    | exception _ -> ()
  in
  Seq.iter (fun glob ->
      let case_sensitive =
        Glob.parse glob
        |> Option.get
        |> Glob.case_sensitive
      in
      let glob_parts = CCString.split ~by:Filename.dir_sep glob in
      match glob_parts with
      | "" :: l -> (
          (* Absolute path on Unix-like systems *)
          aux ~case_sensitive [ "" ] l
        )
      | _ -> (
          aux ~case_sensitive (cwd_path_parts ()) glob_parts
        )
    ) globs;
  !acc

let list_files_recursive_filter_by_exts
    ?max_depth
    ~report_progress
    ~(exts : string list)
    (paths : string Seq.t)
  : String_set.t =
  let filter depth path =
    let ext = extension_of_file path in
    depth = 0 || List.mem ext exts
  in
  paths
  |> Seq.map normalize_path_to_absolute
  |> Seq.map (list_files_recursive ?max_depth ~report_progress ~filter)
  |> Seq.fold_left String_set.union String_set.empty

let mkdir_recursive (dir : string) : unit =
  let rec aux first acc parts =
    match parts with
    | [] -> ()
    | "" :: xs -> (
        if first then
          aux false Filename.dir_sep xs
        else
          aux false "" xs
      )
    | x :: xs -> (
        let acc = Filename.concat acc x in
        match Sys.is_directory acc with
        | true -> aux false acc xs
        | false -> (
            exit_with_error_msg
              (Fmt.str "%s is not a directory" (Filename.quote acc))
          )
        | exception (Sys_error _) -> (
            do_if_debug (fun oc ->
                Printf.fprintf oc "Creating directory: %s\n" (Filename.quote acc)
              );
            (try
               Sys.mkdir acc 0o755
             with
             | _ -> (
                 exit_with_error_msg
                   (Fmt.str "failed to create directory: %s" (Filename.quote acc))
               )
            );
            aux false acc xs
          )
      )
  in
  aux true "" (CCString.split ~by:Filename.dir_sep dir)

let file_size (path : string) : int option =
  try
    let st = Unix.stat path in
    Some st.st_size
  with
  | _ -> None


================================================
FILE: bin/filter_exp.ml
================================================
open Docfd_lib

type t =
  | Empty
  | Path_date of compare_op * Timedesc.Date.t
  | Path_fuzzy of Search_exp.t
  | Path_glob of Glob.t
  | Ext of string
  | Mod_date of compare_op * Timedesc.Date.t
  | Content of Search_exp.t
  | Binary_op of binary_op * t * t
  | Unary_op of unary_op * t

and binary_op =
  | And
  | Or

and unary_op =
  | Not

and compare_op =
  | Eq
  | Le
  | Ge
  | Lt
  | Gt

let empty = Empty

let is_empty (e : t) =
  match e with
  | Empty -> true
  | _ -> false

let equal (e1 : t) (e2 : t) =
  let rec aux e1 e2 =
    match e1, e2 with
    | Empty, Empty -> true
    | Path_date (op1, x1), Path_date (op2, x2) ->
      op1 = op2 && Timedesc.Date.equal x1 x2
    | Path_fuzzy x, Path_fuzzy y -> Search_exp.equal x y
    | Path_glob x, Path_glob y -> Glob.equal x y
    | Ext x, Ext y -> String.equal x y
    | Content x, Content y -> Search_exp.equal x y
    | Binary_op (op1, x1, y1), Binary_op (op2, x2, y2) ->
      op1 = op2 && aux x1 x2 && aux y1 y2
    | Unary_op (op1, x1), Unary_op (op2, x2) ->
      op1 = op2 && aux x1 x2
    | _, _ -> false
  in
  aux e1 e2

let all_content_search_exps (t : t) : Search_exp.t list =
  let rec aux t =
    match t with
    | Empty
    | Path_date _
    | Path_fuzzy _
    | Path_glob _
    | Ext _
    | Mod_date _ -> []
    | Content e -> [e]
    | Binary_op (_op, e1, e2) -> aux e1 @ aux e2
    | Unary_op (_op, e) -> aux e
  in
  aux t

module Parsers = struct
  type exp = t

  open Angstrom
  open Parser_components

  let alphanum_symbol_string =
    take_while1 (fun c ->
        is_letter c
        ||
        is_digit c
        ||
        (match c with
         | '&'
         | '|' -> true
         | _ -> false
        )
      )

  let maybe_quoted_string ?(force_quote = false) () =
    (
      (choice
         [
           char '"';
           char '\'';
         ]
       >>= fun c -> return (Some c)
      )
      <|>
      (if force_quote then
         fail ""
       else
         return None)
    )
    >>= fun quote_char ->
    many1 (
      take_while1 (fun c ->
          match c with
          | '\\' -> false
          | c -> (
              match quote_char with
              | None -> (
                  is_not_space c
                  &&
                  (match c with
                   | '('
                   | ')' -> false
                   | _ -> true
                  )
                )
              | Some quote_char -> c <> quote_char
            )
        )
      <|>
      (char '\\' *> any_char >>| fun c -> Printf.sprintf "%c" c)
    )
    >>= fun l ->
    let s = String.concat "" l in
    (end_of_input *> return s)
    <|>
    (match quote_char with
     | None -> return s
     | Some quote_char -> char quote_char *> return s)

  let search_exp ?force_quote () =
    maybe_quoted_string ?force_quote ()
    >>= fun s ->
    match Search_exp.parse s with
    | None -> fail ""
    | Some x -> return x

  let glob =
    maybe_quoted_string ()
    >>= fun s ->
    let s = Misc_utils.normalize_filter_glob_if_not_empty s in
    match Glob.parse s with
    | None -> fail ""
    | Some x -> return x

  let ext =
    maybe_quoted_string ()
    >>| fun s ->
    s
    |> String.lowercase_ascii
    |> String_utils.remove_leading_dots
    |> Fmt.str ".%s"

  let compare_op =
    choice
      [
        char '=' *> skip_spaces *> return Eq;
        string "<=" *> skip_spaces *> return Le;
        string ">=" *> skip_spaces *> return Ge;
        char '<' *> skip_spaces *> return Lt;
        char '>' *> skip_spaces *> return Gt;
      ]

  let date =
    any_string
    >>= fun s ->
    match Timedesc.Date.Ymd.of_iso8601 s with
    | Ok x -> return x
    | Error _ -> fail ""

  let compare_date f =
    let p =
      compare_op >>= fun op ->
      date >>| fun date ->
      f (op, date)
    in
    maybe_quoted_string ()
    >>= fun s ->
    match Angstrom.(parse_string ~consume:Consume.All) p s with
    | Ok x -> return x
    | Error s -> fail s

  let binary_op op_strings op =
    alphanum_symbol_string >>= fun s ->
    skip_spaces *>
    (
      if List.mem (String.lowercase_ascii s) op_strings then (
        return (fun x y -> Binary_op (op, x, y))
      ) else (
        fail ""
      )
    )

  let and_op = binary_op [ "and" ] And

  let or_op = binary_op [ "or" ] Or

  let unary_op op_strings op =
    alphanum_symbol_string >>= fun s ->
    skip_spaces *>
    (
      if List.mem (String.lowercase_ascii s) op_strings then (
        return (fun x -> Unary_op (op, x))
      ) else (
        fail ""
      )
    )

  let not_op = unary_op [ "not" ] Not

  let p =
    skip_spaces *>
    (
      (end_of_input *> return empty)
      <|>
      fix (fun (exp : exp Angstrom.t) ->
          let base =
            choice [
              (search_exp ~force_quote:true () >>|
               fun x -> Content x);
              (string "content:" *>
               search_exp () >>| fun x -> Content x);
              (string "path-date:" *>
               compare_date (fun (op, date) -> Path_date (op, date)));
              (string "path-fuzzy:" *>
               search_exp () >>| fun x -> Path_fuzzy x);
              (string "path-glob:" *>
               glob >>| fun x -> Path_glob x);
              (string "mod-date:" *>
               compare_date (fun (op, date) -> Mod_date (op, date)));
              (string "ext:" *>
               ext >>| fun x -> Ext x);
              (char '(' *> skip_spaces *> exp <* char ')');
            ]
            <* skip_spaces
          in
          let maybe_neg =
            (not_op >>= fun f ->
             skip_spaces *> base >>| f)
            <|>
            base
          in
          let conj = chainl1 maybe_neg and_op in
          chainl1 conj or_op
        )
    )
    <* skip_spaces
end

let parse s =
  match Angstrom.(parse_string ~consume:Consume.All) Parsers.p s with
  | Ok e -> Some e
  | Error _ -> None


================================================
FILE: bin/glob.ml
================================================
type t = {
  case_sensitive : bool;
  string : string;
  re : Re.re;
}

let equal x y =
  x.case_sensitive = y.case_sensitive
  &&
  String.equal x.string y.string

let case_sensitive t = t.case_sensitive

let string t = t.string

let is_empty t = String.length t.string = 0

module Parsers = struct
  open Angstrom

  type part = [
    | `Case_insensitive
    | `String of string
  ]

  let parts : part list Angstrom.t =
    many (
      (take_while1 (fun c ->
           match c with
           | '\\' -> false
           | _ -> true
         )
       >>| fun s -> `String s)
      <|>
      (char '\\' *> any_char >>= fun c ->
       if c = 'c' then return `Case_insensitive
       else return (`String (Printf.sprintf "%c" c)))
    )
end

let parse ?case_sensitive (s : string) : t option =
  match Angstrom.(parse_string ~consume:Consume.All) Parsers.parts s with
  | Error _ -> None
  | Ok parts -> (
      let case_insensitive = ref false in
      let s =
        parts
        |> List.filter_map (fun x ->
            match x with
            | `String s -> Some s
            | `Case_insensitive -> (
                case_insensitive := true;
                None
              )
          )
        |> String.concat ""
      in
      let case_sensitive =
        match case_sensitive with
        | None -> not !case_insensitive
        | Some x -> x
      in
      try
        let re =
          s
          |> (fun s -> if case_sensitive then s else String.lowercase_ascii s)
          |> Re.Glob.glob
            ~anchored:true
            ~pathname:true
            ~match_backslashes:false
            ~period:true
            ~expand_braces:false
            ~double_asterisk:true
          |> Re.compile
        in
        Some
          {
            case_sensitive;
            string = s;
            re;
          }
      with
      | _ -> None
    )

let match_ t (s : string) =
  is_empty t
  || Re.execp t.re (if t.case_sensitive then s else String.lowercase_ascii s)


================================================
FILE: bin/glob.mli
================================================
type t

val parse : ?case_sensitive:bool -> string -> t option

val equal : t -> t -> bool

val is_empty : t -> bool

val case_sensitive : t -> bool

val string : t -> string

val match_ : t -> string -> bool


================================================
FILE: bin/lock_protected_cell.ml
================================================
type 'a t = {
  lock : Eio.Mutex.t;
  mutable data : 'a option;
}

let make () =
  {
    lock = Eio.Mutex.create ();
    data = None;
  }

let set (t : 'a t) (x : 'a) =
  Eio.Mutex.use_rw t.lock ~protect:false (fun () ->
      t.data <- Some x
    )

let unset (t : 'a t) =
  Eio.Mutex.use_rw t.lock ~protect:false (fun () ->
      t.data <- None
    )

let get (t : 'a t) : 'a option =
  Eio.Mutex.use_rw t.lock ~protect:false (fun () ->
      let x = t.data in
      t.data <- None;
      x
    )


================================================
FILE: bin/lock_protected_cell.mli
================================================
type 'a t

val make : unit -> 'a t

val set : 'a t -> 'a -> unit

val unset : 'a t -> unit

val get : 'a t -> 'a option


================================================
FILE: bin/misc_utils.ml
================================================
open Docfd_lib
include Docfd_lib.Misc_utils'

let bound_selection ~choice_count (x : int) : int =
  max 0 (min (choice_count - 1) x)

let frequencies_of_words_ci (s : string Seq.t) : int String_map.t =
  Seq.fold_left (fun m word ->
      let word = String.lowercase_ascii word in
      let count = Option.value ~default:0
          (String_map.find_opt word m)
      in
      String_map.add word (count + 1) m
    )
    String_map.empty
    s

let stdin_is_atty () =
  Unix.isatty Unix.stdin

let stdout_is_atty () =
  Unix.isatty Unix.stdout

let stderr_is_atty () =
  Unix.isatty Unix.stderr

let compute_total_recognized_exts ~exts ~additional_exts =
  let split_on_comma = String.split_on_char ',' in
  (split_on_comma exts)
  :: (List.map split_on_comma additional_exts)
  |> List.to_seq
  |> Seq.flat_map List.to_seq
  |> Seq.map (fun s ->
      s
      |> CCString.trim
      |> String_utils.remove_leading_dots
      |> String.lowercase_ascii
    )
  |> Seq.filter (fun s -> s <> "")
  |> Seq.map (fun s -> Printf.sprintf ".%s" s)
  |> List.of_seq

let array_sub_seq : 'a. start:int -> end_exc:int -> 'a array -> 'a Seq.t =
  fun ~start ~end_exc arr ->
  let count = Array.length arr in
  let end_exc = min count end_exc in
  let rec aux start =
    if start < end_exc then (
      Seq.cons arr.(start) (aux (start + 1))
    ) else (
      Seq.empty
    )
  in
  aux start

let rotate_list (x : int) (l : 'a list) : 'a list =
  let arr = Array.of_list l in
  let len = Array.length arr in
  Seq.append
    (array_sub_seq ~start:x ~end_exc:len arr)
    (array_sub_seq ~start:0 ~end_exc:x arr)
  |> List.of_seq

let drain_eio_stream (x : 'a Eio.Stream.t) =
  let rec aux () =
    match Eio.Stream.take_nonblocking x with
    | None -> ()
    | Some _ -> aux ()
  in
  aux ()

let mib_of_bytes (x : int) =
  (Int.to_float x) /. (1024.0 *. 1024.0)

let progress_with_reporter ~interactive bar f =
  if interactive then (
    Progress.with_reporter
      ~config:(Progress.Config.v ~ppf:Format.std_formatter ())
      bar
      (fun report_progress ->
         let report_progress =
           let lock = Eio.Mutex.create () in
           fun x ->
             Eio.Mutex.use_rw lock ~protect:false (fun () ->
                 report_progress x
               )
         in
         f report_progress
      )
  ) else (
    f (fun _ -> ())
  )

let normalize_filter_glob_if_not_empty (s : string) =
  if String.length s = 0 then (
    s
  ) else (
    normalize_glob_to_absolute s
  )

let trim_angstrom_error_msg (s : string) =
  CCString.chop_prefix ~pre:": " s
  |> Option.value ~default:s

let ranking_of_ranked_document_list (l : string list) : int String_map.t =
  CCList.foldi (fun acc i path ->
      String_map.add path i acc
    ) String_map.empty l

let fuzzy_rank_assoc
    (stop_signal : Stop_signal.t)
    ~(get_key : 'a -> string)
    (exp : Search_exp.t)
    (items : 'a Seq.t)
  : ('a * Search_result.t) Dynarray.t =
  let pick_best_search_result (s : Search_result.t Seq.t) : Search_result.t option =
    Seq.fold_left (fun best x ->
        match best with
        | None -> Some x
        | Some best -> (
            if Search_result.score x > Search_result.score best then (
              Some x
            ) else (
              Some best
            )
          )
      )
      None
      s
  in
  let search_in_line (line : string) (exp : Search_exp.t) : Search_result.t option =
    let parts = Tokenization.tokenize ~drop_spaces:false line
      |> List.of_seq
    in
    Search_exp.flattened exp
    |> List.to_seq
    |> Seq.flat_map (fun phrase ->
        Search_phrase.enriched_tokens phrase
        |> List.to_seq
        |> Seq.map (fun token ->
            List.to_seq parts
            |> Seq.mapi (fun i part ->
                (i, part)
              )
            |> Seq.filter_map (fun (i, part) ->
                if
                  Search_phrase.Enriched_token.compatible_with_word token part
                then (
                  Some (i, part, String.lowercase_ascii part)
                ) else (
                  None
                )
              )
          )
        |> OSeq.cartesian_product
        |> Seq.map (fun l ->
            let found_phrase =
              List.map (fun (i, part, part_ci) ->
                  Search_result.{
                    found_word_pos = i;
                    found_word_ci = part_ci;
                    found_word = part;
                  }
                )
                l
            in
            Search_result.make phrase
              ~found_phrase
              ~found_phrase_opening_closing_symbol_match_count:0
          )
      )
    |> pick_best_search_result
  in
  Eio.Fiber.first
    (fun () ->
       Stop_signal.await stop_signal;
       Dynarray.create ()
    )
    (fun () ->
       items
       |> Seq.fold_left (fun acc item ->
           let line = get_key item in
           match search_in_line line exp with
           | None -> acc
           | Some best_result -> (
               (item, best_result) :: acc
             )
         )
         []
       |> List.sort (fun (_x0, r0) (_x1, r1) ->
           Search_result.compare_relevance r0 r1
         )
       |> Dynarray.of_list
    )

let highlights_of_search_result (search_result : Search_result.t) =
  List.fold_left
    (fun acc (x : Search_result.indexed_found_word) ->
       Int_set.add x.found_word_pos acc
    )
    Int_set.empty
    (Search_result.found_phrase search_result)


================================================
FILE: bin/params.ml
================================================
include Docfd_lib.Params'

let debug_output : out_channel option ref = ref None

let scan_hidden = ref false

let default_max_file_tree_scan_depth = 100

let max_file_tree_scan_depth = ref default_max_file_tree_scan_depth

let preview_line_count = 5

let default_tokens_per_search_scope_level = 100

let tokens_per_search_scope_level = ref default_tokens_per_search_scope_level

let docfd_script_ext = ".docfd-script"

let pandoc_supported_exts =
  [ ".epub"
  ; ".odt"
  ; ".docx"
  ; ".fb2"
  ; ".ipynb"
  ; ".html"
  ; ".htm"
  ]

let common_text_file_exts =
  String_set.of_list
    [ ".txt"
    ; ".md"
    ; ".markdown"
    ; ".rst"
    ; ".adoc"
    ; ".org"
    ; ".json"
    ; ".yaml"
    ; ".yml"
    ; ".toml"
    ; ".xml"
    ; ".csv"
    ; ".tsv"
    ; ".conf"
    ; ".cfg"
    ; ".ini"
    ; ".env"
    ; ".log"
    ]

let common_code_file_exts =
  String_set.of_list
    [ ".sh"
    ; ".bash"
    ; ".zsh"
    ; ".fish"
    ; ".ksh"
    ; ".csh"
    ; ".tcsh"
    ; ".ps1"
    ; ".psm1"
    ; ".psd1"
    ; ".cmd"
    ; ".bat"
    ; ".py"
    ; ".pyi"
    ; ".pyw"
    ; ".ipynb"
    ; ".pxd"
    ; ".pxi"
    ; ".js"
    ; ".mjs"
    ; ".cjs"
    ; ".jsx"
    ; ".ts"
    ; ".tsx"
    ; ".vite"
    ; ".webpack"
    ; ".html"
    ; ".htm"
    ; ".xhtml"
    ; ".css"
    ; ".scss"
    ; ".sass"
    ; ".less"
    ; ".vue"
    ; ".svelte"
    ; ".svg"
    ; ".c"
    ; ".h"
    ; ".i"
    ; ".cpp"
    ; ".cc"
    ; ".cxx"
    ; ".hpp"
    ; ".hh"
    ; ".hxx"
    ; ".m"
    ; ".mm"
    ; ".java"
    ; ".kt"
    ; ".kts"
    ; ".scala"
    ; ".groovy"
    ; ".clj"
    ; ".cljs"
    ; ".cljc"
    ; ".edn"
    ; ".go"
    ; ".rs"
    ; ".zig"
    ; ".nim"
    ; ".nims"
    ; ".hs"
    ; ".lhs"
    ; ".ml"
    ; ".mli"
    ; ".re"
    ; ".rei"
    ; ".elm"
    ; ".idr"
    ; ".agda"
    ; ".lua"
    ; ".pl"
    ; ".pm"
    ; ".rb"
    ; ".tcl"
    ; ".raku"
    ; ".jl"
    ; ".r"
    ; ".cs"
    ; ".csx"
    ; ".fs"
    ; ".fsi"
    ; ".fsx"
    ; ".vb"
    ; ".asm"
    ; ".s"
    ; ".S"
    ; ".ll"
    ; ".bc"
    ; ".wat"
    ; ".wasm"
    ; ".swift"
    ; ".storyboard"
    ; ".xib"
    ; ".json"
    ; ".jsonc"
    ; ".json5"
    ; ".yaml"
    ; ".yml"
    ; ".toml"
    ; ".xml"
    ; ".ini"
    ; ".cfg"
    ; ".conf"
    ; ".env"
    ; ".properties"
    ; ".sql"
    ; ".psql"
    ; ".cql"
    ; ".gql"
    ; ".graphql"
    ; ".mk"
    ; ".make"
    ; ".cmake"
    ; ".gradle"
    ; ".gradle.kts"
    ; ".bazel"
    ; ".bzl"
    ; ".ninja"
    ; ".build"
    ; ".Dockerfile"
    ; ".dockerignore"
    ; ".compose"
    ; ".compose.yml"
    ; ".tf"
    ; ".tfvars"
    ; ".hcl"
    ; ".ansible"
    ; ".playbook"
    ; ".md"
    ; ".markdown"
    ; ".rst"
    ; ".adoc"
    ; ".org"
    ; ".tex"
    ; ".vim"
    ; ".vimrc"
    ; ".emacs"
    ; ".editorconfig"
    ; ".awk"
    ; ".sed"
    ; ".pug"
    ; ".jade"
    ; ".haml"
    ; ".qs"
    ; ".kql"
    ]

let default_recognized_exts =
  ([ "txt"; "md"; "pdf" ]
   @
   pandoc_supported_exts
  )
  |> List.map String_utils.remove_leading_dots
  |> String.concat ","

let default_recognized_single_line_exts =
  [ "log"; "csv"; "tsv" ]
  |> List.map String_utils.remove_leading_dots
  |> String.concat ","

let default_search_mode : Search_mode.t ref = ref `Multiline

let default_sort_by_arg = "score,desc"

let default_sort_by_no_score_arg = "path,asc"

let index_file_ext = ".index"

let db_file_name = "index.db"

let hash_chunk_size = 4096

let text_editor = ref ""

let default_samples_per_document = 5

let samples_per_document = ref default_samples_per_document

type style_mode = [ `Never | `Always | `Auto ]

let default_search_result_print_text_width = 80

let search_result_print_text_width = ref default_search_result_print_text_width

let default_search_result_print_snippet_min_size = 10

let search_result_print_snippet_min_size = ref default_search_result_print_snippet_min_size

let default_search_result_print_snippet_max_additional_lines_each_direction = 2

let search_result_print_snippet_max_additional_lines_each_direction =
  ref default_search_result_print_snippet_max_additional_lines_each_direction

let default_cache_limit = 10_000

let cache_limit = ref default_cache_limit

let cache_dir : string option ref = ref None

let data_dir : string option ref = ref None

let script_dir () =
  Filename.concat
    (Option.get !data_dir)
    "scripts"

let tz : Timedesc.Time_zone.t =
  Option.value ~default:Timedesc.Time_zone.utc
    (Timedesc.Time_zone.local ())

let last_scan_format_string =
  "{year}-{mon:0X}-{day:0X} {hour:0X}:{min:0X}:{sec:0X}"
  ^
  (match Timedesc.Time_zone.local () with
   | None -> "Z"
   | Some _ -> "")

let last_modified_format_string =
  "{year}-{mon:0X}-{day:0X} {hour:0X}:{min:0X}"
  ^
  (match Timedesc.Time_zone.local () with
   | None -> "Z"
   | Some _ -> "")

let blink_on_duration : Mtime.span = Mtime.Span.(140 * ms)

let session_manager_request_debounce_interval = Mtime.Span.(200 * ms)

let session_manager_request_debounce_wait_buffer = Mtime.Span.(5 * ms)

let os_typ : [ `Darwin | `Linux ] =
  let s = CCUnix.call_stdout "uname"
    |> String.trim
    |> String.lowercase_ascii
  in
  match s with
  | "darwin" -> `Darwin
  | _ -> `Linux

let clipboard_copy_cmd_and_args =
  match os_typ with
  | `Darwin -> Some ("pbcopy", [||])
  | `Linux -> (
      if Proc_utils.command_exists "clip.exe" then (
        Some ("clip.exe", [||])
      ) else (
        match Sys.getenv_opt "XDG_SESSION_TYPE" with
        | None -> None
        | Some s -> (
            match String.lowercase_ascii s with
            | "x11" -> Some ("xclip", [| "-sel"; "clip" |])
            | "wayland" -> Some ("wl-copy", [|"-n"|])
            | _ -> None
          )
      )
    )


================================================
FILE: bin/path_opening.ml
================================================
open Docfd_lib
open Debug_utils

type launch_mode = [ `Terminal | `Detached ]

type spec = string list * launch_mode * string

let specs : (string, launch_mode * string) Hashtbl.t = Hashtbl.create 128

module Parsers = struct
  open Angstrom
  open Parser_components

  let expected_char c =
    fail (Fmt.str "expected char %c" c)

  let inner ~path ~page_num ~line_num ~search_word : string t =
    choice [
      string "path" *> commit *> return path;
      string "page_num" *> commit
      >>= (fun _ ->
          match page_num with
          | None -> fail "page_num not available"
          | Some n -> return (Fmt.str "%d" n)
        );
      string "line_num" *> commit
      >>= (fun _ ->
          match line_num with
          | None -> fail "line_num not available"
          | Some n -> return (Fmt.str "%d" n)
        );
      string "search_word" *> commit
      >>= (fun _ ->
          match search_word with
          | None -> fail "search_word not available"
          | Some s -> return (Fmt.str "'%s'" s));
    ]
    <|>
    fail "invalid placeholder"

  let cmd ~path ~page_num ~line_num ~search_word : string t =
    let single =
      choice [
        (string "{{" >>| fun _ -> Fmt.str "{");
        (char '{' *> commit *>
         inner ~path ~page_num ~line_num ~search_word <*
         (char '}' <|> expected_char '}'));
        (take_while1 (function '{' -> false | _ -> true));
      ]
    in
    many single
    >>| fun l -> String.concat "" l

  let spec : spec t =
    sep_by (char ',')
      (take_while1 (function ':' | ',' -> false | _ -> true))
    >>= fun exts ->
    (char ':' <|> expected_char ':')*>
    (choice [
        string "terminal" *> return `Terminal;
        string "detached" *> return `Detached;
      ]
     <|>
     fail "invalid launch mode")
    >>= fun launch_mode ->
    (char '=' <|> expected_char '=') *> any_string
    >>= fun cmd ->
    return (exts, launch_mode, cmd)
end

module Config = struct
  type t = {
    quote_path : bool;
    path : string;
    page_num : int option;
    line_num : int option;
    search_word : string option;
    launch_mode : launch_mode;
  }

  let make ?(quote_path = true) ~path ?page_num ?line_num ?search_word ~launch_mode () : t =
    {
      quote_path;
      path;
      page_num;
      line_num;
      search_word;
      launch_mode;
    }
end

let resolve_cmd (config : Config.t) (s : string) : (string, string) result =
  let open Angstrom in
  let { Config.quote_path; path; page_num; line_num; search_word } = config in
  let path =
    if quote_path then
      Filename.quote path
    else
      path
  in
  match
    parse_string ~consume:All (Parsers.cmd ~path ~page_num ~line_num ~search_word) s
  with
  | Error msg -> Error (Misc_utils.trim_angstrom_error_msg msg)
  | Ok s -> Ok s

let parse_spec (s : string) : (spec, string) result =
  let open Angstrom in
  match
    parse_string ~consume:All Parsers.spec s
  with
  | Error msg -> Error (Misc_utils.trim_angstrom_error_msg msg)
  | Ok (exts', launch_mode, cmd) -> (
      let rec aux acc exts =
        match exts with
        | [] -> Ok (List.rev acc, launch_mode, cmd)
        | ext :: rest -> (
            let ext = ext
              |> String.lowercase_ascii
              |> String_utils.remove_leading_dots
              |> Fmt.str ".%s"
            in
            let config =
              if ext = ".pdf" then (
                Config.make
                  ~path:"path"
                  ~page_num:1
                  ~search_word:"word"
                  ~launch_mode:`Detached
                  ()
              ) else (
                Config.make
                  ~path:"path"
                  ~line_num:1
                  ~launch_mode:`Terminal
                  ()
              )
            in
            match
              resolve_cmd config cmd
            with
            | Error msg -> Error msg
            | Ok _ -> aux (ext :: acc) rest
          )
      in
      aux [] exts'
    )

let xdg_open_cmd =
  "xdg-open {path}"

let pandoc_supported_format_config_and_cmd ~path =
  (Config.make ~path ~launch_mode:`Detached (),
   xdg_open_cmd)

let fallback_cmd : string =
  match Params.os_typ with
  | `Linux -> xdg_open_cmd
  | `Darwin -> "open {path}"

let compute_most_unique_word_and_residing_page_num ~doc_id found_phrase =
  let page_nums = found_phrase
    |> List.map (fun word ->
        word.Search_result.found_word_pos
        |> (fun pos -> Index.loc_of_pos ~doc_id pos)
        |> Index.Loc.line_loc
        |> Index.Line_loc.page_num
      )
    |> List.sort_uniq Int.compare
  in
  let frequency_of_word_of_page_ci : int String_map.t Int_map.t =
    List.fold_left (fun acc page_num ->
        let m = Misc_utils.frequencies_of_words_ci
            (Index.words_of_page_num ~doc_id page_num
             |> Dynarray.to_seq)
        in
        Int_map.add page_num m acc
      )
      Int_map.empty
      page_nums
  in
  found_phrase
  |> List.map (fun word ->
      let page_num =
        Index.loc_of_pos ~doc_id word.Search_result.found_word_pos
        |> Index.Loc.line_loc
        |> Index.Line_loc.page_num
      in
      let m = Int_map.find page_num frequency_of_word_of_page_ci in
      let freq =
        String_map.fold (fun word_on_page_ci freq acc_freq ->
            if
              CCString.find ~sub:word.Search_result.found_word_ci word_on_page_ci >= 0
            then (
              acc_freq + freq
            ) else (
              acc_freq
            )
          )
          m
          0
      in
      (word, page_num, freq)
    )
  |> List.fold_left (fun best x ->
      let (_x_word, _x_page_num, x_freq) = x in
      match best with
      | None -> Some x
      | Some (_best_word, _best_page_num, best_freq) -> (
          if x_freq < best_freq then
            Some x
          else
            best
        )
    )
    None
  |> Option.get
  |> (fun (word, page_num, _freq) ->
      (word.found_word, page_num))

let pdf_config_and_cmd ~path ~doc_id_and_search_result : Config.t * string =
  let config =
    let page_num, search_word =
      match doc_id_and_search_result with
      | None -> (
          (1, "")
        )
      | Some (doc_id, search_result) -> (
          let found_phrase = Search_result.found_phrase search_result in
          let (most_unique_word, most_unique_word_page_num) =
            compute_most_unique_word_and_residing_page_num ~doc_id found_phrase
          in
          let page_num = most_unique_word_page_num + 1 in
          (page_num, most_unique_word)
        )
    in
    Config.make ~path ~page_num ~search_word ~launch_mode:`Detached ()
  in
  let cmd =
    match Params.os_typ with
    | `Linux -> (
        match Xdg_utils.default_desktop_file_path `PDF with
        | None -> fallback_cmd
        | Some viewer_desktop_file_path -> (
            let flatpak_package_name =
              let s = Filename.basename viewer_desktop_file_path in
              Option.value ~default:s
                (CCString.chop_suffix ~suf:".desktop" s)
            in
            let viewer_desktop_file_path_lowercase_ascii =
              String.lowercase_ascii viewer_desktop_file_path
            in
            let contains sub =
              CCString.find ~sub viewer_desktop_file_path_lowercase_ascii >= 0
            in
            let make_command name args =
              if contains "flatpak" then
                Fmt.str "flatpak run %s %s" flatpak_package_name args
              else
                Fmt.str "%s %s" name args
            in
            match doc_id_and_search_result with
            | None -> fallback_cmd
            | Some _ -> (
                if contains "okular" then
                  make_command "okular"
                    "--page {page_num} --find {search_word} {path}"
                else if contains "evince" then
                  make_command "evince"
                    "--page-index {page_num} --find {search_word} {path}"
                else if contains "xreader" then
                  make_command "xreader"
                    "--page-index {page_num} --find {search_word} {path}"
                else if contains "atril" then
                  make_command "atril"
                    "--page-index {page_num} --find {search_word} {path}"
                else if contains "zathura" then
                  (* Check zathura before mupdf as desktop file for
                     zathura might be `org.pwmt.zathura-pdf-mupdf.desktop`
                  *)
                  make_command "zathura"
                    "--page {page_num} --find {search_word} {path}"
                else if contains "mupdf" then
                  make_command "mupdf" "{path} {page_num}"
                else
                  fallback_cmd
              )
          )
      )
    | `Darwin -> fallback_cmd
  in
  (config, cmd)

let config_and_cmd_to_open_text_file ~path ?(line_num = 1) () : Config.t * string =
  let editor = !Params.text_editor in
  let config =
    Config.make ~path ~line_num ~launch_mode:`Terminal ()
  in
  let cmd =
    match Filename.basename editor with
    | "nano" ->
      Fmt.str "%s +{line_num} {path}" editor
    | "nvim" | "vim" | "vi" ->
      Fmt.str "%s +{line_num} {path}" editor
    | "kak" ->
      Fmt.str "%s +{line_num} {path}" editor
    | "hx" ->
      Fmt.str "%s {path}:{line_num}" editor
    | "emacs" ->
      Fmt.str "%s +{line_num} {path}" editor
    | "micro" ->
      Fmt.str "%s {path}:{line_num}" editor
    | "jed" | "xjed" ->
      Fmt.str "%s {path} -g {line_num}" editor
    | _ ->
      Fmt.str "%s {path}" editor
  in
  (config, cmd)

let text_config_and_cmd ~path ~doc_id_and_search_result : Config.t * string =
  let line_num =
    match doc_id_and_search_result with
    | None -> None
    | Some (doc_id, search_result) -> (
        let first_word = List.hd @@ Search_result.found_phrase search_result in
        let first_word_loc =
          Index.loc_of_pos ~doc_id first_word.Search_result.found_word_pos
        in
        first_word_loc
        |> Index.Loc.line_loc
        |> Index.Line_loc.line_num_in_page
        |> (fun x -> x + 1)
        |> Option.some
      )
  in
  config_and_cmd_to_open_text_file
    ~path
    ?line_num
    ()

let main ~close_term ~path ~doc_id_and_search_result =
  let ext = File_utils.extension_of_file path in
  let config, cmd =
    (match File_utils.format_of_file path with
     | `PDF -> (
         pdf_config_and_cmd ~path ~doc_id_and_search_result
       )
     | `Pandoc_supported_format -> (
         pandoc_supported_format_config_and_cmd ~path
       )
     | `Text -> (
         text_config_and_cmd ~path ~doc_id_and_search_result
       )
     | `Other -> (
         (Config.make ~path ~launch_mode:`Detached (), fallback_cmd)
       )
    )
    |> (fun (config, cmd) ->
        match Hashtbl.find_opt specs ext with
        | None -> (
            (config, cmd)
          )
        | Some (launch_mode, cmd) -> (
            ({ config with launch_mode }, cmd)
          )
      )
    |> (fun (config, cmd) ->
        (config, Result.get_ok (resolve_cmd config cmd)))
  in
  match config.launch_mode with
  | `Terminal -> (
      let cmd =
        if Misc_utils.stdin_is_atty () then (
          cmd
        ) else (
          Fmt.str "</dev/tty %s" cmd
        )
      in
      close_term ();
      do_if_debug (fun oc ->
          Printf.fprintf oc "System command: %s\n" cmd
        );
      Sys.command cmd |> ignore
    )
  | `Detached -> (
      do_if_debug (fun oc ->
          Printf.fprintf oc "System command: %s\n" cmd
        );
      Proc_utils.run_in_background cmd |> ignore
    )

let find_project_root path =
  let rec aux arr =
    let cur = CCString.concat_seq ~sep:Filename.dir_sep (Dynarray.to_seq arr) in
    if Dynarray.length arr = 0 then (
      None
    ) else if Dynarray.length arr = 3
           && Dynarray.get arr 0 = "home"
    then (
      Some cur
    ) else (
      let candidates =
        try
          Some (Sys.readdir cur)
        with
        | _ -> None
      in
      match candidates with
      | None -> (
          None
        )
      | Some candidates -> (
          let root_indicator_exists =
            Array.exists (fun name ->
                List.mem name
                  [ ".git"
                  ; ".hg"
                  ; ".svn"
                  ; ".obsidian"
                  ; ".logseq"
                  ; ".tangent"
                  ]
              )
              candidates
          in
          if root_indicator_exists then (
            Some cur
          ) else (
            Dynarray.pop_last arr |> ignore;
            aux arr
          )
        )
    )
  in
  let arr = Dynarray.of_list (CCString.split ~by:Filename.dir_sep path) in
  aux arr

let open_link ~close_term ~doc link =
  let { Link.typ; link; _ } = link in
  let doc_path = Document.path doc in
  let doc_dir = Filename.dirname doc_path in
  let doc_ext = Filename.extension doc_path in
  let resolve_wiki_link link =
    let link =
      Option.value ~default:link
        (CCString.chop_prefix ~pre:"/" link)
    in
    let link_with_ext = link ^ doc_ext in
    if link.[0] = '.' then (
      Filename.concat doc_dir link_with_ext
    ) else (
      let wiki_root =
        Option.value ~default:doc_dir (find_project_root doc_dir)
      in
      let candidates = File_utils.list_files_recursive
          ~report_progress:(fun () -> ())
          ~filter:(fun _depth path ->
              let path_no_ext =
                try
                  Filename.chop_extension path
                with
                | _ -> path
              in
              CCString.suffix ~suf:link path_no_ext
            )
          wiki_root
      in
      match
        String_set.find_first_opt (fun path ->
            CCString.suffix ~suf:link_with_ext path
          ) candidates
      with
      | Some x -> x
      | None -> (
          match String_set.min_elt_opt candidates with
          | Some x -> x
          | None -> Filename.concat wiki_root link_with_ext
        )
    )
  in
  if String.length link > 0 then (
    match typ with
    | `Markdown -> (
        let path =
          if Filename.is_relative link then (
            Filename.concat doc_dir link
          ) else (
            let project_root =
              Option.value ~default:doc_dir (find_project_root doc_dir)
            in
            Filename.concat project_root link
          )
        in
        main ~close_term ~path ~doc_id_and_search_result:None
      )
    | `Wiki -> (
        let path = resolve_wiki_link link in
        main ~close_term ~path ~doc_id_and_search_result:None
      )
    | `URL -> (
        let config = Config.make ~path:link ~launch_mode:`Detached () in
        resolve_cmd config fallback_cmd
        |> Result.get_ok
        |> Proc_utils.run_in_background
        |> ignore
      )
  )


================================================
FILE: bin/ping.ml
================================================
type t = {
  queue : unit Eio.Stream.t;
}

let make () =
  {
    queue = Eio.Stream.create Int.max_int;
  }

let ping (t : t) =
  Eio.Stream.add t.queue ()

let wait (t : t) =
  Eio.Stream.take t.queue;
  Misc_utils.drain_eio_stream t.queue

let clear (t : t) =
  Misc_utils.drain_eio_stream t.queue


================================================
FILE: bin/ping.mli
================================================
type t

val make : unit -> t

val ping : t -> unit

val wait : t -> unit

val clear : t -> unit


================================================
FILE: bin/printers.ml
================================================
let output_image ~color (oc : out_channel) (img : Notty.image) : unit =
  let open Notty in
  let buf = Buffer.create 1024 in
  let cap =
    if color then
      Cap.ansi
    else
      Cap.dumb
  in
  Render.to_buffer buf cap (0, 0) (I.width img, I.height img) img;
  Buffer.output_buffer oc buf

let newline_image oc =
  Notty_unix.eol (Notty.I.void 0 1)
  |> output_image ~color:false oc

let path_image ~color oc path =
  Notty.I.string Notty.A.(fg magenta) path
  |> Notty_unix.eol
  |> output_image ~color oc

let search_result_group ~color ~underline (oc : out_channel) ((document, results) : Session.search_result_group) =
  let path = Document.path document in
  path_image ~color oc path;
  Array.iteri (fun i search_result ->
      if i > 0 then (
        newline_image oc
      );
      let img =
        Content_and_search_result_rendering.search_result
          ~doc_id:(Document.doc_id document)
          ~render_mode:(UI_base.render_mode_of_document document)
          ~width:!Params.search_result_print_text_width
          ~underline
          ~fill_in_context:true
          search_result
      in
      Notty_unix.eol img
      |> output_image ~color oc;
    ) results

let search_result_groups
    ~color
    ~underline
    (oc : out_channel)
    (s : Session.search_result_group Seq.t)
  =
  Seq.iteri (fun i x ->
      if i > 0 then (
        newline_image oc;
      );
      search_result_group ~color ~underline oc x
    ) s


================================================
FILE: bin/proc_utils.ml
================================================
open Misc_utils

let command_exists (cmd : string) : bool =
  Sys.command (Fmt.str "command -v %s 2>/dev/null 1>/dev/null" (Filename.quote cmd)) = 0

let run_in_background (cmd : string) =
  Sys.command (Fmt.str "%s 2>/dev/null 1>/dev/null &" cmd)

let run_return_stdout
    ~proc_mgr
    ~fs
    ~(split_mode : [ `On_line_split | `On_form_feed ])
    (cmd : string list)
  : string list option =
  let form_feed = Char.chr 0x0C in
  Eio.Path.(with_open_out
              ~create:`Never
              (fs / "/dev/null"))
    (fun stderr ->
       let output =
         try
           let lines =
             Eio.Process.parse_out proc_mgr
               (match split_mode with
                | `On_line_split -> Eio.Buf_read.(map List.of_seq lines)
                | `On_form_feed -> (
                    let p =
                      let open Eio.Buf_read in
                      let open Syntax in
                      let* c = peek_char in
                      (match c with
                       | None -> return ()
                       | Some c -> (
                           if c = form_feed then (
                             skip 1
                           ) else (
                             return ()
                           )
                         ))
                      *>
                      (take_while (fun c -> c <> form_feed))
                    in
                    Eio.Buf_read.(map List.of_seq (seq p))
                  )
               )
               ~stderr cmd
           in
           Some lines
         with
         | _ -> None
       in
       output
    )

let pipe_to_command (f : out_channel -> unit) command args =
  if not (command_exists command) then (
    exit_with_error_msg
      (Fmt.str "command %s not found" command)
  );
  let oc =
    Unix.open_process_args_out
      command (Array.append [|command|] args)
  in
  f oc;
  Out_channel.flush oc;
  Out_channel.close oc


================================================
FILE: bin/result_syntax.ml
================================================
let ( let* ) = Result.bind

let ( let+ ) x y = Result.map y x


================================================
FILE: bin/script.ml
================================================
let run pool ~init_state ~path
  : (Session.Snapshot.t Dynarray.t, string) result =
  let exception Error_with_msg of string in
  let snapshots = Dynarray.create () in
  try
    let lines =
      try
        CCIO.with_in path CCIO.read_lines_l
      with
      | Sys_error _ -> (
          raise (Error_with_msg (Fmt.str "failed to read script %s" (Filename.quote path)))
        )
    in
    Dynarray.add_last
      snapshots
      (Session.Snapshot.make
         ~last_command:None
         init_state);
    lines
    |> CCList.foldi (fun state i line ->
        let line_num_in_error_msg = i + 1 in
        if String_utils.line_is_blank_or_system_comment line then (
          state
        ) else (
          match Command.of_string line with
          | None -> (
              raise (Error_with_msg
                       (Fmt.str "failed to parse command on line %d: %s"
                          line_num_in_error_msg line))
            )
          | Some command -> (
              match Session.run_command pool command state with
              | None -> (
                  raise (Error_with_msg
                           (Fmt.str "failed to run command on line %d: %s"
                              line_num_in_error_msg line))
                )
              | Some (command, state) -> (
                  let snapshot =
                    Session.Snapshot.make
                      ~last_command:(Some command)
                      state
                  in
                  Dynarray.add_last snapshots snapshot;
                  state
                )
            )
        )
      ) init_state
    |> ignore;
    Ok snapshots
  with
  | Error_with_msg msg -> Error msg


================================================
FILE: bin/search_mode.ml
================================================
type t = [
  | `Single_line
  | `Multiline
]


================================================
FILE: bin/session.ml
================================================
open Docfd_lib

type search_result_group = Document.t * Search_result.t array

module State = struct
  module Sort_by = struct
    type typ = [
      | `Path_date
      | `Path
      | `Score
      | `Mod_time
      | `Ranking of int String_map.t
    ]

    type t = typ * Document.Compare.order
  end

  type t = {
    all_documents : Document.t String_map.t;
    filter_exp : Filter_exp.t;
    filter_exp_string : string;
    documents_passing_filter : String_set.t;
    documents_marked : String_set.t;
    search_exp : Search_exp.t;
    search_exp_string : string;
    search_results : Search_result.t array String_map.t;
    sort_by : Sort_by.t;
    sort_by_no_score : Sort_by.t;
    screen_split : Command.screen_split;
    show_bottom_right_pane : bool;
    show_key_binding_info_pane : bool;
    focus_list : string list;
    path_highlights : Int_set.t String_map.t;
  }

  let equal (x : t) (y : t) =
    String_map.equal Document.equal x.all_documents y.all_documents
    &&
    String.equal x.filter_exp_string y.filter_exp_string
    &&
    String_set.equal x.documents_passing_filter y.documents_passing_filter
    &&
    String_set.equal x.documents_marked y.documents_marked
    &&
    String.equal x.search_exp_string y.search_exp_string
    &&
    String_map.equal
      (Array.for_all2 Search_result.equal)
      x.search_results
      y.search_results

  let size (t : t) =
    String_map.cardinal t.all_documents

  let empty : t =
    {
      all_documents = String_map.empty;
      filter_exp = Filter_exp.empty;
      filter_exp_string = "";
      documents_passing_filter = String_set.empty;
      documents_marked = String_set.empty;
      search_exp = Search_exp.empty;
      search_exp_string = "";
      search_results = String_map.empty;
      sort_by = Command.Sort_by.default
        |> (fun (typ, order) -> ((typ :> Sort_by.typ), order));
      sort_by_no_score = Command.Sort_by.default_no_score
        |> (fun (typ, order) -> ((typ :> Sort_by.typ), order));
      screen_split = `Even;
      show_bottom_right_pane = true;
      show_key_binding_info_pane = true;
      focus_list = [];
      path_highlights = String_map.empty;
    }

  let filter_exp (t : t) = t.filter_exp

  let filter_exp_string (t : t) = t.filter_exp_string

  let search_exp (t : t) = t.search_exp

  let search_exp_string (t : t) = t.search_exp_string

  let path_highlights (t : t) = t.path_highlights

  let clear_path_highlights (t : t) =
    { t with path_highlights = String_map.empty }

  let screen_split (t : t) = t.screen_split

  let show_pane (t : t) (pane : Command.pane) =
    match pane with
    | `Bottom_right -> t.show_bottom_right_pane
    | `Key_binding_info -> t.show_key_binding_info_pane

  let refresh_search_results pool stop_signal (t : t) : t option =
    let cancellation_notifier = Atomic.make false in
    let updates =
      Eio.Fiber.first
        (fun () ->
           Stop_signal.await stop_signal;
           Atomic.set cancellation_notifier true;
           String_map.empty)
        (fun () ->
           let global_first_word_candidates_lookup =
             Index.generate_global_first_word_candidates_lookup
               pool
               t.search_exp
           in
           let usable_doc_ids =
             let bv = CCBV.empty () in
             Search_phrase.Enriched_token.Data_map.iter
               (fun _word word_ids ->
                  Int_set.iter (fun word_id ->
                      Index.State.union_doc_ids_of_word_id_into_bv ~word_id ~into:bv
                    )
                    word_ids
               )
               global_first_word_candidates_lookup;
             bv
           in
           let documents_to_search_through =
             t.documents_passing_filter
             |> String_set.to_seq
             |> Seq.map (fun path -> (path, String_map.find path t.all_documents))
             |> Seq.filter (fun (path, doc) ->
                 Option.is_none (String_map.find_opt path t.search_results)
                 && CCBV.get usable_doc_ids (Int64.to_int @@ Document.doc_id doc)
               )
             |> List.of_seq
           in
           documents_to_search_through
           |> Task_pool.map_list pool (fun (path, doc) ->
               let within_same_line =
                 match Document.search_mode doc with
                 | `Single_line -> true
                 | `Multiline -> false
               in
               Index.make_search_job_groups
                 stop_signal
                 ~cancellation_notifier
                 ~doc_id:(Document.doc_id doc)
                 ~doc_word_ids:(Document.word_ids doc)
                 ~global_first_word_candidates_lookup
                 ~within_same_line
                 ~search_scope:(Document.search_scope doc)
                 t.search_exp
               |> Seq.map (fun x -> (path, x))
               |> List.of_seq
             )
           |> List.concat
           |> Task_pool.map_list pool (fun (path, search_job_group) ->
               let heap = Index.Search_job_group.run search_job_group in
               (path, heap)
             )
           |> List.fold_left (fun acc (path, heap) ->
               Eio.Fiber.yield ();
               let heap =
                 String_map.find_opt path acc
                 |> Option.value ~default:Search_result_heap.empty
                 |> Search_result_heap.merge heap
               in
               String_map.add path heap acc
             )
             String_map.empty
           |> String_map.map (fun v ->
               Eio.Fiber.yield ();
               let arr =
                 Search_result_heap.to_seq v
                 |> Array.of_seq
               in
               Array.sort Search_result.compare_relevance arr;
               arr
             )
        )
    in
    if Atomic.get cancellation_notifier then (
      None
    ) else (
      let search_results =
        String_map.union (fun _k v1 _v2 -> Some v1)
          updates
          t.search_results
      in
      Some { t with search_results }
    )

  let update_filter_exp
      pool
      stop_signal
      filter_exp_string
      filter_exp
      (t : t)
    : t option =
    if Filter_exp.equal filter_exp t.filter_exp then (
      Some { t with filter_exp_string }
    ) else (
      let cancellation_notifier = Atomic.make false in
      let documents_passing_filter =
        Eio.Fiber.first
          (fun () ->
             Stop_signal.await stop_signal;
             Atomic.set cancellation_notifier true;
             String_set.empty
          )
          (fun () ->
             let global_first_word_candidates_lookup =
               Filter_exp.all_content_search_exps filter_exp
               |> List.fold_left (fun acc search_exp ->
                   Index.generate_global_first_word_candidates_lookup
                     pool
                     ~acc
                     search_exp
                 )
                 Search_phrase.Enriched_token.Data_map.empty
             in
             t.all_documents
             |> String_map.to_seq
             |> Seq.map snd
             |> (fun s ->
                 if Filter_exp.is_empty filter_exp then (
                   s
                 ) else (
                   Seq.filter (fun s ->
                       Eio.Fiber.yield ();
                       Document.satisfies_filter_exp
                         pool
                         stop_signal
                         ~global_first_word_candidates_lookup
                         filter_exp
                         s
                     ) s
                 )
               )
             |> Seq.map Document.path
             |> String_set.of_seq
          )
      in
      if Atomic.get cancellation_notifier then (
        None
      ) else (
        { t with
          filter_exp_string;
          filter_exp;
          documents_passing_filter;
        }
        |> refresh_search_results pool stop_signal
      )
    )

  let update_search_exp pool stop_signal search_exp_string search_exp (t : t) : t option =
    if Search_exp.equal search_exp t.search_exp then (
      Some { t with search_exp_string }
    ) else (
      { t with
        search_exp;
        search_exp_string;
        search_results = String_map.empty;
      }
      |> refresh_search_results pool stop_signal
    )

  let add_document pool (doc : Document.t) (t : t) : t =
    let within_same_line =
      match Document.search_mode doc with
      | `Single_line -> true
      | `Multiline -> false
    in
    let path = Document.path doc in
    let documents_passing_filter =
      let global_first_word_candidates_lookup =
        Filter_exp.all_content_search_exps t.filter_exp
        |> List.fold_left (fun acc search_exp ->
            Index.generate_global_first_word_candidates_lookup
              pool
              ~acc
              search_exp
          )
          Search_phrase.Enriched_token.Data_map.empty
      in
      if Document.satisfies_filter_exp pool (Stop_signal.make ()) ~global_first_word_candidates_lookup t.filter_exp doc
      then
        String_set.add path t.documents_passing_filter
      else
        t.documents_passing_filter
    in
    let search_results =
      let global_first_word_candidates_lookup =
        Index.generate_global_first_word_candidates_lookup
          pool
          t.search_exp
      in
      String_map.add
        path
        (Index.search
           pool
           (Stop_signal.make ())
           ~doc_id:(Document.doc_id doc)
           ~doc_word_ids:(Document.word_ids doc)
           ~global_first_word_candidates_lookup
           ~within_same_line
           ~search_scope:(Document.search_scope doc)
           t.search_exp
         |> Option.get
        )
        t.search_results
    in
    { t with
      all_documents =
        String_map.add
          path
          doc
          t.all_documents;
      documents_passing_filter;
      search_results;
    }

  let of_seq pool (s : Document.t Seq.t) =
    Seq.fold_left (fun t doc ->
        add_document pool doc t
      )
      empty
      s

  module Compare_search_result_group = struct
    let mod_time order (d0, _s0) (d1, _s1) =
      Document.Compare.mod_time order d0 d1

    let path_date order (d0, _s0) (d1, _s1) =
      Document.Compare.path_date order d0 d1

    let path order (d0, _s0) (d1, _s1) =
      Document.Compare.path order d0 d1

    let ranking ranking order (d0, _s0) (d1, _s1) =
      match
        String_map.find_opt (Document.path d0) ranking,
        String_map.find_opt (Document.path d1) ranking
      with
      | None, None -> Document.Compare.path order d0 d1
      | None, Some _ -> (
          (* Always shuffle document with no ranking to the back. *)
          1
        )
      | Some _, None -> (
          (* Always shuffle document with no ranking to the back. *)
          -1
        )
      | Some x0, Some x1 -> (
          match order with
          | `Asc -> Int.compare x0 x1
          | `Desc -> Int.compare x1 x0
        )

    let score order (_d0, s0) (_d1, s1) =
      assert (Array.length s0 > 0);
      assert (Array.length s1 > 0);
      (* Search_result.compare_relevance puts the more relevant
         result to the front, so we flip the comparison here to
         obtain an ordering of "lowest score" first to match the
         usual definition of "sort by score in ascending order".
      *)
      match order with
      | `Asc -> Search_result.compare_relevance s1.(0) s0.(0)
      | `Desc -> Search_result.compare_relevance s0.(0) s1.(0)
  end

  let search_result_groups
      (t : t)
    : (Document.t * Search_result.t array) array =
    let no_search_exp = Search_exp.is_empty t.search_exp in
    let arr =
      t.documents_passing_filter
      |> String_set.to_seq
      |> Seq.map (fun path ->
          (path, String_map.find path t.all_documents)
        )
      |> (fun s ->
          if no_search_exp then (
            Seq.map (fun (_path, doc) -> (doc, [||])) s
          ) else (
            Seq.filter_map (fun (path, doc) ->
                match String_map.find_opt path t.search_results with
                | None -> None
                | Some search_results -> (
                    if Array.length search_results = 0 then
                      None
                    else
                      Some (doc, search_results)
                  )
              ) s
          )
        )
      |> Array.of_seq
    in
    let rec f (sort_by_typ, sort_by_order) =
      match sort_by_typ with
      | `Path_date -> Compare_search_result_group.path_date sort_by_order
      | `Mod_time -> Compare_search_result_group.mod_time sort_by_order
      | `Path -> Compare_search_result_group.path sort_by_order
      | `Score -> (
          if no_search_exp then (
            f t.sort_by_no_score
          ) else (
            Compare_search_result_group.score sort_by_order
          )
        )
      | `Ranking ranking -> (
          Compare_search_result_group.ranking ranking sort_by_order
        )
    in
    Array.sort (f t.sort_by) arr;
    let focus_ranking =
      List.rev t.focus_list
      |> CCList.foldi (fun ranking i x ->
          String_map.add x i ranking) String_map.empty
    in
    Array.stable_sort (fun (d0, _) (d1, _) ->
        match
          String_map.find_opt (Document.path d0) focus_ranking,
          String_map.find_opt (Document.path d1) focus_ranking
        with
        | Some x0, Some x1 -> Int.compare x0 x1
        | Some _, None -> -1
        | None, Some _ -> 1
        | None, None -> 0
      ) arr;
    arr

  let usable_document_paths (t : t) : String_set.t =
    search_result_groups t
    |> Array.to_seq
    |> Seq.map (fun (doc, _) -> Document.path doc)
    |> String_set.of_seq

  let marked_document_paths (t : t) =
    t.documents_marked

  let all_document_paths (t : t) : string Seq.t =
    t.all_documents
    |> String_map.to_seq
    |> Seq.map fst

  let unusable_documents (t : t) =
    let s = usable_document_paths t in
    t.all_documents
    |> String_map.to_seq
    |> Seq.filter (fun (path, _doc) ->
        not (String_set.mem path s))
    |> Seq.map snd

  let unusable_document_paths (t : t) =
    unusable_documents t
    |> Seq.map Document.path

  let mark
      (choice :
         [ `Path of string
         | `Usable
         | `Unusable ])
      t
    : t =
    match choice with
    | `Path path -> (
        match String_map.find_opt path t.all_documents with
        | None -> t
        | Some _ -> (
            let documents_marked =
              String_set.add path t.documents_marked
            in
            { t with documents_marked }
          )
      )
    | `Usable -> (
        let documents_marked =
          String_set.union
            t.documents_marked
            (usable_document_paths t)
        in
        { t with documents_marked }
      )
    | `Unusable -> (
        let documents_marked =
          Seq.fold_left
            (fun acc x -> String_set.add x acc)
            t.documents_marked
            (unusable_document_paths t)
        in
        { t with documents_marked }
      )

  let unmark
      (choice :
         [ `Path of string
         | `Usable
         | `Unusable
         | `All ])
      t
    : t =
    match choice with
    | `Path path -> (
        let documents_marked =
          String_set.remove path t.documents_marked
        in
        { t with documents_marked }
      )
    | `Usable -> (
        let documents_marked =
          String_set.diff
            t.documents_marked
            (usable_document_paths t)
        in
        { t with documents_marked }
      )
    | `Unusable -> (
        let documents_marked =
          Seq.fold_left
            (fun acc x -> String_set.remove x acc)
            t.documents_marked
            (unusable_document_paths t)
        in
        { t with documents_marked }
      )
    | `All -> (
        { t with documents_marked = String_set.empty }
      )

  let drop
      (choice :
         [ `Path of string
         | `All_except of string
         | `Marked
         | `Unmarked
         | `Usable
         | `Unusable ])
      (t : t)
    : t =
    let aux ~(keep : string -> bool) =
      let keep' : 'a. string -> 'a -> bool =
        fun path _ ->
        keep path
      in
      { all_documents = String_map.filter keep' t.all_documents;
        filter_exp = t.filter_exp;
        filter_exp_string = t.filter_exp_string;
        documents_passing_filter = String_set.filter keep t.documents_passing_filter;
        documents_marked = String_set.filter keep t.documents_marked;
        search_exp = t.search_exp;
        search_exp_string = t.search_exp_string;
        search_results = String_map.filter keep' t.search_results;
        sort_by = t.sort_by;
        sort_by_no_score = t.sort_by_no_score;
        screen_split = t.screen_split;
        show_bottom_right_pane = t.show_bottom_right_pane;
        show_key_binding_info_pane = t.show_key_binding_info_pane;
        focus_list = t.focus_list;
        path_highlights = t.path_highlights;
      }
    in
    match choice with
    | `Path path -> (
        { all_documents = String_map.remove path t.all_documents;
          filter_exp = t.filter_exp;
          filter_exp_string = t.filter_exp_string;
          documents_passing_filter = String_set.remove path t.documents_passing_filter;
          documents_marked = String_set.remove path t.documents_marked;
          search_exp = t.search_exp;
          search_exp_string = t.search_exp_string;
          search_results = String_map.remove path t.search_results;
          sort_by = t.sort_by;
          sort_by_no_score = t.sort_by_no_score;
          screen_split = t.screen_split;
          show_bottom_right_pane = t.show_bottom_right_pane;
          show_key_binding_info_pane = t.show_key_binding_info_pane;
          focus_list = t.focus_list;
          path_highlights = t.path_highlights;
        }
      )
    | `All_except path -> (
        let keep path' =
          String.equal path' path
        in
        aux ~keep
      )
    | `Marked -> (
        let keep path =
          not (String_set.mem path t.documents_marked)
        in
        aux ~keep
      )
    | `Unmarked -> (
        let keep path =
          String_set.mem path t.documents_marked
        in
        aux ~keep
      )
    | `Usable | `Unusable -> (
        let usable_document_paths =
          usable_document_paths t
        in
        let document_is_usable path =
          String_set.mem path usable_document_paths
        in
        let keep path =
          match choice with
          | `Usable -> not (document_is_usable path)
          | `Unusable -> document_is_usable path
          | _ -> failwith "unexpected case"
        in
        aux ~keep
      )

  let update_path_fuzzy_ranking stop_signal exp (t : t) : t option =
    let cancellation_notifier = Atomic.make false in
    let ranking, path_highlights =
      Eio.Fiber.first
        (fun () ->
           Stop_signal.await stop_signal;
           Atomic.set cancellation_notifier true;
           (String_map.empty, String_map.empty)
        )
        (fun () ->
           let l =
             usable_document_paths t
             |> String_set.to_seq
             |> Seq.map File_utils.remove_cwd_from_path
             |> Misc_utils.fuzzy_rank_assoc
               (Stop_signal.make ())
               ~get_key:Fun.id
               exp
             |> Dynarray.to_list
             |> List.map (fun (path, x) ->
                 (Misc_utils.normalize_path_to_absolute path, x))
           in
           let ranking =
             List.map fst l
             |> Misc_utils.ranking_of_ranked_document_list
           in
           let highlights =
             List.fold_left (fun acc (path, search_result) ->
                 String_map.add
                   path
                   (Misc_utils.highlights_of_search_result search_result)
                   acc
               )
               String_map.empty
               l
           in
           (ranking, highlights)
        )
    in
    if Atomic.get cancellation_notifier then (
      None
    ) else (
      let sort_by = (`Ranking ranking, `Asc) in
      Some {
        t with sort_by;
               sort_by_no_score = sort_by;
               path_highlights;
      }
    )

  let narrow_search_scope_to_level ~level (t : t) : t =
    let all_documents =
      if level = 0 then (
        String_map.mapi (fun _path doc ->
            Document.reset_search_scope_to_full doc
          )
          t.all_documents
      ) else (
        String_map.mapi (fun path doc ->
            let doc_id = Document.doc_id doc in
            let search_scope =
              match String_map.find_opt path t.search_results with
              | None -> Diet.Int.empty
              | Some search_results -> (
                  if String_set.mem path t.documents_passing_filter then (
                    Array.fold_left (fun scope search_result ->
                        let s, e =
                          List.fold_left (fun s_e Search_result.{ found_word_pos; _ } ->
                              match s_e with
                              | None -> Some (found_word_pos, found_word_pos)
                              | Some (s, e) -> (
                                  Some (min s found_word_pos, max found_word_pos e)
                                )
                            )
                            None
                            (Search_result.found_phrase search_result)
                          |> Option.get
                        in
                        let offset = level * !Params.tokens_per_search_scope_level in
                        let s, e =
                          (max 0 (s - offset), min (Index.max_pos ~doc_id) (e + offset))
                        in
                        Diet.Int.add
                          (Diet.Int.Interval.make s e)
                          scope
                      )
                      Diet.Int.empty
                      search_results
                  ) else (
                    Diet.Int.empty
                  )
                )
            in
            Document.inter_search_scope
              search_scope
              doc
          )
          t.all_documents
      )
    in
    { t with all_documents }
end

let run_command pool (command : Command.t) (st : State.t) : (Command.t * State.t) option =
  let open State in
  let reset_focus_list st = { st with focus_list = [] } in
  match command with
  | `Mark path -> (
      Some (command, mark (`Path path) st)
    )
  | `Mark_listed -> (
      Some (command, mark `Usable st)
    )
  | `Unmark path -> (
      Some (command, unmark (`Path path) st)
    )
  | `Unmark_listed -> (
      Some (command, unmark `Usable st)
    )
  | `Unmark_all -> (
      Some (command, unmark `All st)
    )
  | `Drop s -> (
      Some (command, drop (`Path s) st)
    )
  | `Drop_all_except s -> (
      Some (command, drop (`All_except s) st)
    )
  | `Drop_marked -> (
      Some (command, drop `Marked st)
    )
  | `Drop_unmarked -> (
      Some (command, drop `Unmarked st)
    )
  | `Drop_listed -> (
      Some (command, drop `Usable st)
    )
  | `Drop_unlisted -> (
      Some (command, drop `Unusable st)
    )
  | `Narrow_level level -> (
      Some (command, narrow_search_scope_to_level ~level st)
    )
  | `Focus path -> (
      let focus_list = path :: st.focus_list in
      Some (command, { st with focus_list })
    )
  | `Sort (sort_by, sort_by_no_score) -> (
      let st = reset_focus_list st in
      let sort_by =
        sort_by
        |> (fun (typ, order) -> ((typ :> Sort_by.typ), order))
      in
      let sort_by_no_score =
        sort_by_no_score
        |> (fun (typ, order) -> ((typ :> Sort_by.typ), order))
      in
      Some (command, { st with sort_by; sort_by_no_score })
    )
  | `Path_fuzzy_rank (s, ranking) -> (
      let st = reset_focus_list st in
      match Search_exp.parse s with
      | None -> None
      | Some exp -> (
          update_path_fuzzy_ranking
            (Stop_signal.make ())
            exp
            st
          |> Option.map (fun state ->
              let command = `Path_fuzzy_rank (s, ranking) in
              (command, state)
            )
        )
    )
  | `Split_screen screen_split -> (
      Some (command, { st with screen_split })
    )
  | `Hide_pane pane -> (
      let st =
        match pane with
        | `Bottom_right -> { st with show_bottom_right_pane = false }
        | `Key_binding_info -> { st with show_key_binding_info_pane = false }
      in
      Some (command, st)
    )
  | `Show_pane pane -> (
      let st =
        match pane with
        | `Bottom_right -> { st with show_bottom_right_pane = true }
        | `Key_binding_info -> { st with show_key_binding_info_pane = true }
      in
      Some (command, st)
    )
  | `Comment _ -> (
      Some (command, st)
    )
  | `Search s -> (
      match Search_exp.parse s with
      | None -> None
      | Some search_exp -> (
          update_search_exp
            pool
            (Stop_signal.make ())
            s
            search_exp
            st
          |> Option.map (fun state -> (command, state))
        )
    )
  | `Filter s -> (
      match Filter_exp.parse s with
      | None -> None
      | Some exp -> (
          update_filter_exp
            pool
            (Stop_signal.make ())
            s
            exp
            st
          |> Option.map (fun state -> (command, state))
        )
    )

module Snapshot = struct
  let counter = ref 0

  type t = {
    last_command : Command.t option;
    state : State.t;
    committed : bool;
    id : int;
  }

  let committed t = t.committed

  let last_command t = t.last_command

  let state t = t.state

  let id t = t.id

  let equal_id x y =
    id x = id y

  let make ?(committed = true) ~last_command state : t =
    let id = !counter in
    counter := id + 1;
    { last_command; state; id; committed }

  let make_empty ?committed () =
    make ?committed ~last_command:None State.empty

  let update_state state t =
    { t with state }
end


================================================
FILE: bin/session.mli
================================================
open Docfd_lib

type search_result_group = Document.t * Search_result.t array

module State : sig
  type t

  val equal : t -> t -> bool

  val size : t -> int

  val empty : t

  val update_filter_exp :
    Task_pool.t ->
    Stop_signal.t ->
    string ->
    Filter_exp.t ->
    t ->
    t option

  val update_search_exp :
    Task_pool.t ->
    Stop_signal.t ->
    string ->
    Search_exp.t ->
    t ->
    t option

  val filter_exp : t -> Filter_exp.t

  val filter_exp_string : t -> string

  val search_exp : t -> Search_exp.t

  val search_exp_string : t -> string

  val path_highlights : t -> Int_set.t String_map.t

  val clear_path_highlights : t -> t

  val add_document : Task_pool.t -> Document.t -> t -> t

  val of_seq : Task_pool.t -> Document.t Seq.t -> t

  val search_result_groups : t -> search_result_group array

  val usable_document_paths : t -> String_set.t

  val unusable_documents : t -> Document.t Seq.t

  val unusable_document_paths : t -> string Seq.t

  val all_document_paths : t -> string Seq.t

  val marked_document_paths : t -> String_set.t

  val mark : [ `Path of string | `Usable | `Unusable ] -> t -> t

  val unmark : [ `Path of string | `Usable | `Unusable | `All ] -> t -> t

  val drop : [ `Path of string | `All_except of string | `Marked | `Unmarked | `Usable | `Unusable ] -> t -> t

  val update_path_fuzzy_ranking : Stop_signal.t -> Search_exp.t -> t -> t option

  val narrow_search_scope_to_level : level:int -> t -> t

  val screen_split : t -> Command.screen_split

  val show_pane : t -> Command.pane -> bool
end

val run_command : Task_pool.t -> Command.t -> State.t -> (Command.t * State.t) option

module Snapshot : sig
  type t

  val committed : t -> bool

  val last_command : t -> Command.t option

  val state : t -> State.t

  val id : t -> int

  val equal_id : t -> t -> bool

  val make : ?committed:bool -> last_command:Command.t option -> State.t -> t

  val make_empty : ?committed:bool -> unit -> t

  val update_state : State.t -> t -> t
end


================================================
FILE: bin/session_manager.ml
================================================
open Docfd_lib

let last_request_timestamp : Mtime.t Atomic.t =
  Atomic.make (Mtime_clock.now ())

let search_request : (bool * string) Lock_protected_cell.t =
  Lock_protected_cell.make ()

let filter_request : (bool * string) Lock_protected_cell.t =
  Lock_protected_cell.make ()

let version_shift_request : int Lock_protected_cell.t =
  Lock_protected_cell.make ()

let path_fuzzy_rank_request : (bool * string) Lock_protected_cell.t =
  Lock_protected_cell.make ()

let worker_ping : Ping.t = Ping.make ()

let _requester_lock = Eio.Mutex.create ()

let lock_as_requester : type a. (unit -> a) -> a =
  fun f ->
  Eio.Mutex.use_rw ~protect:false _requester_lock f

(* let requester_ping : Ping.t = Ping.make () *)

type egress_payload =
  | Search_exp_parse_error
  | Searching
  | Search_cancelled
  | Search_done of int * Session.Snapshot.t
  | Filter_parse_error
  | Filtering
  | Filtering_cancelled
  | Filtering_done of int * Session.Snapshot.t
  | Path_fuzzy_rank_done of int * Session.Snapshot.t * bool

let egress : egress_payload Eio.Stream.t =
  Eio.Stream.create 0

let egress_ack : unit Eio.Stream.t =
  Eio.Stream.create 0

let stop_filter_signal = Atomic.make (Stop_signal.make ())

let stop_search_signal = Atomic.make (Stop_signal.make ())

let stop_path_fuzzy_rank_signal = Atomic.make (Stop_signal.make ())

let stop_filter () =
  let x = Atomic.exchange stop_filter_signal (Stop_signal.make ()) in
  Stop_signal.broadcast x

let stop_search () =
  let x = Atomic.exchange stop_search_signal (Stop_signal.make ()) in
  Stop_signal.broadcast x

let stop_path_fuzzy_rank () =
  let x = Atomic.exchange stop_path_fuzzy_rank_signal (Stop_signal.make ()) in
  Stop_signal.broadcast x

let _worker_state_lock = Eio.Mutex.create ()

let lock_worker_state : type a. (unit -> a) -> a =
  fun f ->
  Eio.Mutex.use_rw ~protect:false _worker_state_lock f

let init_state : Session.State.t ref = ref Session.State.empty

let snapshots =
  let arr = Dynarray.create () in
  Dynarray.add_last arr (Session.Snapshot.make_empty ());
  arr

let cur_ver = ref 0

let cur_snapshot_var = Lwd.var (0, Session.Snapshot.make_empty ())

let cur_snapshot = Lwd.get cur_snapshot_var

type view = {
  init_state : Session.State.t;
  snapshots : Session.Snapshot.t Dynarray.t;
  cur_ver : int;
}

let sync_input_fields_from_snapshot
    (x : Session.Snapshot.t)
  =
  let state = Session.Snapshot.state x in
  Session.State.filter_exp_string state
  |> (fun s ->
      Lwd.set UI_base.Vars.filter_field (s, String.length s));
  Session.State.search_exp_string state
  |> (fun s ->
      Lwd.set UI_base.Vars.search_field (s, String.length s))

let lock_for_external_editing ~clean_up f =
  (* This blocks further requests from being made. *)
  lock_as_requester (fun () ->
      (* We try to get worker to finish ASAP. *)
      stop_filter ();
      stop_search ();
      (* Locking worker also locks the manager, as egress_ack forces
         lock-step progression of the system.
      *)
      lock_worker_state (fun () ->
          (* Clear any outstanding requests. *)
          Lock_protected_cell.unset filter_request;
          Lock_protected_cell.unset search_request;
          let result = f () in
          if clean_up then (
            Lwd.set UI_base.Vars.search_ui_status `Idle;
            Lwd.set UI_base.Vars.filter_ui_status `Idle;
            let snapshot = Dynarray.get snapshots !cur_ver in
            Lwd.set cur_snapshot_var (!cur_ver, snapshot);
            sync_input_fields_from_snapshot snapshot;
          );
          result
        )
    )

let lock_with_view : type a. (view -> a) -> a =
  fun f ->
  lock_for_external_editing ~clean_up:false (fun () ->
      f
        {
          init_state = !init_state;
          snapshots = Dynarray.copy snapshots;
          cur_ver = !cur_ver;
        }
    )

let update_starting_state (starting_state : Session.State.t) =
  lock_for_external_editing ~clean_up:true (fun () ->
      let pool = UI_base.task_pool () in
      init_state := starting_state;
      let starting_snapshot =
        Session.Snapshot.make
          ~last_command:None
          starting_state
      in
      Dynarray.set snapshots 0 starting_snapshot;
      for i=1 to Dynarray.length snapshots - 1 do
        let prev = Dynarray.get snapshots (i - 1) in
        let prev_state = Session.Snapshot.state prev in
        let cur = Dynarray.get snapshots i in
        let state =
          match Session.Snapshot.last_command cur with
          | None -> prev_state
          | Some command ->
            Session.run_command pool command prev_state
            |> Option.map snd
            |> Option.value ~default:prev_state
        in
        Dynarray.set
          snapshots
          i
          (Session.Snapshot.update_state state cur)
      done;
      cur_ver := (Dynarray.length snapshots - 1);
    )

let load_snapshots snapshots' =
  assert (Dynarray.length snapshots' > 0);
  lock_for_external_editing ~clean_up:true (fun () ->
      assert
        (Session.State.equal
           (Session.Snapshot.state @@ Dynarray.get snapshots' 0)
           !init_state);
      Dynarray.clear snapshots;
      Dynarray.append snapshots snapshots';
      cur_ver := (Dynarray.length snapshots - 1);
    )

let stop_filter_and_search_and_restore_input_fields () =
  lock_for_external_editing ~clean_up:true (fun () ->
      ()
    )

let shift_ver ~offset =
  lock_for_external_editing ~clean_up:true (fun () ->
      let new_ver = !cur_ver + offset in
      if 0 <= new_ver && new_ver < Dynarray.length snapshots then (
        cur_ver := new_ver;
      )
    )

let update_from_cur_snapshot f =
  lock_for_external_editing ~clean_up:true (fun () ->
      Dynarray.truncate snapshots (!cur_ver + 1);
      let next_snapshot = f (Dynarray.get_last snapshots) in
      Dynarray.add_last snapshots next_snapshot;
      cur_ver := Dynarray.length snapshots - 1;
    )

let manager_fiber () =
  (* This fiber handles updates of Lwd.var which are not thread-safe,
     and thus cannot be done by worker_fiber directly
  *)
  let update_snapshot ?(reset_document_selected = true) ver snapshot =
    if reset_document_selected then (
      UI_base.reset_document_selected ();
    );
    Lwd.set cur_snapshot_var (ver, snapshot);
  in
  while true do
    let payload = Eio.Stream.take egress in
    (match payload with
     | Search_exp_parse_error -> (
         Lwd.set UI_base.Vars.search_ui_status `Parse_error
       )
     | Searching -> (
         Lwd.set UI_base.Vars.search_ui_status `Searching
       )
     | Filtering -> (
         Lwd.set UI_base.Vars.filter_ui_status `Filtering
       )
     | Search_cancelled -> (
       )
     | Search_done (ver, snapshot) -> (
         update_snapshot ver snapshot;
         Lwd.set UI_base.Vars.search_ui_status `Idle
       )
     | Filter_parse_error -> (
         Lwd.set UI_base.Vars.filter_ui_status `Parse_error
       )
     | Filtering_cancelled -> (
       )
     | Filtering_done (ver, snapshot) -> (
         update_snapshot ver snapshot;
         Lwd.set UI_base.Vars.filter_ui_status `Idle
       )
     | Path_fuzzy_rank_done (ver, snapshot, commit) -> (
         let snapshot =
           if commit then (
             let state =
               Session.Snapshot.state snapshot
               |> Session.State.clear_path_highlights
             in
             Session.Snapshot.update_state state snapshot
           ) else (
             snapshot
           )
         in
         update_snapshot ~reset_document_selected:false ver snapshot;
       )
    );
    Eio.Stream.add egress_ack ();
  done

let worker_fiber pool =
  (* This fiber runs in a background domain to allow the UI code in the main
     domain to immediately continue running after key presses that trigger
     searches or search cancellations.

     This removes the need to make the code of session module always yield
     frequently.
  *)
  let get_cur_snapshot () =
    Dynarray.get snapshots !cur_ver
  in
  let add_snapshot
      ?(overwrite_if_last_snapshot_satisfies = fun _ -> false)
      snapshot
    =
    Dynarray.truncate snapshots (!cur_ver + 1);
    let last_snapshot = Dynarray.get_last snapshots in
    if !cur_ver > 0 && overwrite_if_last_snapshot_satisfies last_snapshot then (
      Dynarray.set snapshots !cur_ver snapshot;
    ) else (
      Dynarray.add_last snapshots snapshot;
      incr cur_ver;
    );
  in
  let send_to_manager x =
    Eio.Stream.add egress x;
    Eio.Stream.take egress_ack;
  in
  let cancelled_search_request : (bool * string) option ref = ref None in
  let process_search_req stop_signal ~commit (s : string) =
    cancelled_search_request := None;
    match Search_exp.parse s with
    | None -> (
        send_to_manager Search_exp_parse_error
      )
    | Some search_exp -> (
        send_to_manager Searching;
        let state =
          get_cur_snapshot ()
          |> Session.Snapshot.state
          |> Session.State.update_search_exp
            pool
            stop_signal
            s
            search_exp
        in
        match state with
        | None -> (
            send_to_manager Search_cancelled;
            cancelled_search_request := Some (commit, s);
          )
        | Some state -> (
            let command = Some (`Search s) in
            let snapshot =
              Session.Snapshot.make
                ~committed:commit
                ~last_command:command
                state
            in
            add_snapshot
              ~overwrite_if_last_snapshot_satisfies:(fun snapshot ->
                  match Session.Snapshot.last_command snapshot with
                  | Some (`Search s') -> (
                      not (Session.Snapshot.committed snapshot)
                      ||
                      s' = s
                    )
                  | _ -> false
                )
              snapshot;
            send_to_manager (Search_done (!cur_ver, snapshot))
          )
      )
  in
  let process_filter_req stop_signal ~commit (s : string) =
    match Filter_exp.parse s with
    | Some filter_exp -> (
        send_to_manager Filtering;
        let state =
          get_cur_snapshot ()
          |> Session.Snapshot.state
          |> Session.State.update_filter_exp
            pool
            stop_signal
            s
            filter_exp
        in
        match state with
        | None -> (
            send_to_manager Filtering_cancelled
          )
        | Some state -> (
            let command = Some (`Filter s) in
            let snapshot =
              Session.Snapshot.make
                ~committed:commit
                ~last_command:command
                state
            in
            add_snapshot
              ~overwrite_if_last_snapshot_satisfies:(fun snapshot ->
                  match Session.Snapshot.last_command snapshot with
                  | Some (`Filter s') -> (
                      not (Session.Snapshot.committed snapshot)
                      ||
                      s' = s
                    )
                  | _ -> false
                )
              snapshot;
            send_to_manager (Filtering_done (!cur_ver, snapshot))
          )
      )
    | None -> (
        send_to_manager Filter_parse_error
      )
  in
  let process_path_fuzzy_rank_req stop_signal ~commit (s : string) =
    match Search_exp.parse s with
    | None -> ()
    | Some exp -> (
        let state =
          get_cur_snapshot ()
          |> Session.Snapshot.state
          |> Session.State.update_path_fuzzy_ranking
            stop_signal
            exp
        in
        match state with
        | None -> (
          )
        | Some state -> (
            let command = Some (`Path_fuzzy_rank (s, None)) in
            let snapshot =
              Session.Snapshot.make
                ~committed:commit
                ~last_command:command
                state
            in
            add_snapshot
              ~overwrite_if_last_snapshot_satisfies:(fun snapshot ->
                  match Session.Snapshot.last_command snapshot with
                  | Some (`Path_fuzzy_rank (s', _)) -> (
                      not (Session.Snapshot.committed snapshot)
                      ||
                      s' = s
                    )
                  | _ -> false
                )
              snapshot;
            send_to_manager (Path_fuzzy_rank_done (!cur_ver, snapshot, commit))
          )
      )
  in
  while true do
    Ping.wait worker_ping;
    let time_since_last_request =
      Mtime.span
        (Atomic.get last_request_timestamp)
        (Mtime_clock.now ())
    in
    if
      Mtime.Span.is_shorter
        time_since_last_request
        ~than:Params.session_manager_request_debounce_interval
    then (
      let clock = Eio.Stdenv.mono_clock (UI_base.eio_env ()) in
      let sleep_duration_s =
        Mtime.Span.abs_diff
          time_since_last_request
          Params.session_manager_request_debounce_interval
        |> Mtime.Span.add Params.session_manager_request_debounce_wait_buffer
        |> Mtime.Span.to_float_ns
        |> (fun x -> x /. 1_000_000_000.0)
      in
      Eio.Time.Mono.sleep clock sleep_duration_s;
      Ping.ping worker_ping
    ) else (
      lock_worker_state (fun () ->
          (match Lock_protected_cell.get filter_request with
           | None -> ()
           | Some (commit, s) -> (
               process_filter_req (Atomic.get stop_filter_signal) ~commit s
             )
          );
          (match Lock_protected_cell.get search_request with
           | None -> !cancelled_search_request
           | Some (commit, s) -> Some (commit, s)
          )
          |> Option.iter (fun (commit, s) ->
              process_search_req (Atomic.get stop_search_signal) ~commit s
            );
          Lock_protected_cell.get path_fuzzy_rank_request
          |> Option.iter (fun (commit, s) ->
              process_path_fuzzy_rank_req (Atomic.get stop_path_fuzzy_rank_signal) ~commit s
            );
        )
    )
  done

let submit_filter_req ~commit (s : string) =
  lock_as_requester (fun () ->
      stop_filter ();
      stop_search ();
      Lock_protected_cell.set filter_request (commit, s);
      Ping.ping worker_ping
    )

let submit_search_req ~commit (s : string) =
  lock_as_requester (fun () ->
      stop_search ();
      Lock_protected_cell.set search_request (commit, s);
      Ping.ping worker_ping
    )

let submit_path_fuzzy_rank_req ~commit (s : string) =
  lock_as_requester (fun () ->
      stop_path_fuzzy_rank ();
      Lock_protected_cell.set path_fuzzy_rank_request (commit, s);
      Ping.ping worker_ping
    )


================================================
FILE: bin/session_manager.mli
================================================
open Docfd_lib

val manager_fiber : unit -> unit

val worker_fiber : Task_pool.t -> unit

val cur_snapshot : (int * Session.Snapshot.t) Lwd.t

type view = {
  init_state : Session.State.t;
  snapshots : Session.Snapshot.t Dynarray.t;
  cur_ver : int;
}

val lock_with_view : (view -> 'a) -> 'a

val update_starting_state : Session.State.t -> unit

val load_snapshots : Session.Snapshot.t Dynarray.t -> unit

val shift_ver : offset:int -> unit

val update_from_cur_snapshot : (Session.Snapshot.t -> Session.Snapshot.t) -> unit

val submit_filter_req : commit:bool -> string -> unit

val submit_search_req : commit:bool -> string -> unit

val submit_path_fuzzy_rank_req : commit:bool -> string -> unit

val stop_filter_and_search_and_restore_input_fields : unit -> unit


================================================
FILE: bin/string_utils.ml
================================================
let remove_leading_dots (s : string) =
  let str_len = String.length s in
  if str_len = 0 then (
    ""
  ) else (
    let rec aux pos =
      if pos < str_len then (
        if String.get s pos = '.' then
          aux (pos + 1)
        else (
          String.sub s pos (str_len - pos)
        )
      ) else (
        ""
      )
    in
    aux 0
  )

let line_is_system_comment line =
  CCString.starts_with ~prefix:";" line

let line_is_blank_or_system_comment line =
  line_is_system_comment line
  ||
  String.length (String.trim line) = 0

let longest_common_prefix (seq : string Seq.t) : string =
  let prefix = ref "" in
  Seq.iteri (fun i s ->
      if i = 0 then (
        prefix := s
      ) else (
        let match_len = ref 0 in
        let prefix_len = String.length !prefix in
        String.iteri (fun i c ->
            if !match_len = i
            && i < prefix_len
            && !prefix.[i] = c then (
              incr match_len
            )
          ) s;
        prefix :=
          String.sub !prefix 0 (min !match_len prefix_len)
      )
    ) seq;
  !prefix


================================================
FILE: bin/version_string.ml
================================================
let s = "13.0.0"


================================================
FILE: bin/xdg_utils.ml
================================================
let all_desktop_files () : string Seq.t =
  match Sys.getenv_opt "XDG_DATA_DIRS" with
  | None -> Seq.empty
  | Some s -> (
      String.split_on_char ':' s
      |> List.to_seq
      |> Seq.flat_map (fun dir ->
          let dir = Filename.concat dir "applications" in
          try
            Sys.readdir dir
            |> Array.to_seq
            |> Seq.map (Filename.concat dir)
          with
          | _ -> Seq.empty
        )
    )

let path_of_desktop_file file =
  let rec aux paths =
    match paths () with
    | Seq.Nil -> None
    | Seq.Cons (path, rest) -> (
        if String.equal file (Filename.basename path) then (
          Some path
        ) else (
          aux rest
        )
      )
  in
  aux (all_desktop_files ())

let default_desktop_file_path (typ : [ `PDF ]) =
  let mime_typ =
    match typ with
    | `PDF -> "application/pdf"
    (* | `ODT -> "application/vnd.oasis.opendocument.text"
       | `DOCX -> "application/vnd.openxmlformats-officedocument.wordprocessingml.document" *)
  in
  let (stdout, _, ret) = CCUnix.call "xdg-mime query default %s" mime_typ in
  if ret = 0 then (
    path_of_desktop_file (CCString.trim stdout)
  ) else (
    None
  )

let data_home =
  let home_dir =
    match Sys.getenv_opt "HOME" with
    | None -> (
        Misc_utils.exit_with_error_msg "environment variable HOME is not set";
      )
    | Some home -> home
  in
  match Params.os_typ with
  | `Linux -> (
      match Sys.getenv_opt "XDG_DATA_HOME" with
      | None -> Filename.concat home_dir ".local/share"
      | Some x -> x
    )
  | `Darwin -> (
      Filename.concat home_dir
        (Filename.concat "Library" "Application Support")
    )

let cache_home =
  let home_dir =
    match Sys.getenv_opt "HOME" with
    | None -> (
        Misc_utils.exit_with_error_msg "environment variable HOME is not set";
      )
    | Some home -> home
  in
  match Params.os_typ with
  | `Linux -> (
      match Sys.getenv_opt "XDG_CACHE_HOME" with
      | None -> Filename.concat home_dir ".cache"
      | Some x -> x
    )
  | `Darwin -> (
      Filename.concat home_dir
        (Filename.concat "Library" "Caches")
    )


================================================
FILE: containers/Containerfile.demo-vhs
================================================
FROM ghcr.io/charmbracelet/vhs

RUN apt-get update

RUN apt-get install -y poppler-utils

RUN apt-get install -y neovim


================================================
FILE: containers/Containerfile.docfd
================================================
FROM docker.io/alpine:3.22

USER root
RUN apk add linux-headers
RUN apk add poppler-utils
RUN apk add sqlite sqlite-libs sqlite-dev sqlite-static
RUN apk add sqlite-analyzer
RUN apk add make
RUN apk add clang
RUN apk add opam
RUN apk add git
RUN apk add python3

# RUN ln -s $(which opam-2.2) /usr/local/bin/opam
RUN opam --version
RUN apk add bash
RUN opam init --disable-sandboxing
RUN opam install --yes dune
RUN opam install --yes utop ocp-indent

COPY . /root/docfd
# RUN chown -R root:root /home/opam/docfd

WORKDIR /root/docfd
RUN eval $(opam env) && dune build docfd.opam
RUN opam install --yes . --deps-only --with-test
RUN echo 'eval $(opam env)' >> /root/.bash_profile


================================================
FILE: demo-vhs-tapes/repo-non-interactive.tape
================================================
Output demo-vhs-gifs/repo-non-interactive.gif

Set Padding 0
Set Framerate 10

Set Width  1366
Set Height  768
Set FontSize 15

Set TypingSpeed 100ms

Type@50ms 'docfd README.md --sample "interactive grep across lines"'
Enter

Sleep 4s


================================================
FILE: demo-vhs-tapes/repo.tape
================================================
Output demo-vhs-gifs/repo.gif

Set Padding 0
Set Framerate 10

Set Width  1366
Set Height  768
Set FontSize 15

Set TypingSpeed 100ms

Type@50ms "docfd *.md"
Sleep 1s
Enter

Sleep 1s

Type "/"
Type "fuzz search"
Enter
Sleep 1s

Type "f"
Type "path-fuzzy:readme"
Enter
Sleep 1s

Enter
Sleep 1s
Type "zz"
Sleep 1s
Type "O"
Type@50ms "Docfd opens the editor to where the search result is when we hit Enter."
Escape
Sleep 2s
Type@200ms ":q!"
Enter

Sleep 4s


================================================
FILE: demo-vhs-tapes/ui-screenshot.tape
================================================
Output dummy.gif

Set Padding 0
Set Framerate 10

Set Width  1366
Set Height  768
Set FontSize 15

Set TypingSpeed 100ms

Type@10ms "docfd *.md"
Enter
Sleep 1s

Screenshot screenshots/ui0.png

Sleep 1s


================================================
FILE: demo-vhs.sh
================================================
#!/usr/bin/env bash

podman run --rm -v $PWD:/vhs \
  --env 'VISUAL=nvim' \
  -v $PWD/release/docfd:/usr/bin/docfd \
  localhost/docfd-demo-vhs \
  "$@"


================================================
FILE: docfd.opam
================================================
# This file is generated by dune, edit dune-project instead
opam-version: "2.0"
synopsis: "TUI multiline fuzzy document finder"
description: """

Think interactive grep for text files, PDFs, DOCXs, etc,
but word/token based instead of regex and line based,
so you can search across lines easily.

Docfd aims to provide good UX via integration with common text editors
and PDF viewers,
so you can jump directly to a search result with a single key press.

Features:

- Multithreaded indexing and searching

- Multiline fuzzy search of multiple files or a single file

- Swap between multi-file view and single file view on the fly

- Content view pane that shows the snippet surrounding the search result selected

- Text editor and PDF viewer integration
              """
maintainer: ["Darren Li"]
authors: ["Darren Li"]
license: "MIT"
tags: ["fuzzy" "document" "finder"]
homepage: "https://github.com/darrenldl/docfd"
doc: "https://github.com/darrenldl/docfd"
bug-reports: "https://github.com/darrenldl/docfd/issues"
depends: [
  "ocaml" {>= "5.2"}
  "dune" {>= "3.4"}
  "fmt" {>= "0.9.0"}
  "angstrom" {>= "0.15.0"}
  "containers" {>= "3.12"}
  "oseq"
  "spelll"
  "notty-community"
  "nottui" {>= "0.4"}
  "nottui-unix" {>= "0.4"}
  "lwd"
  "cmdliner" {>= "2.0.0"}
  "eio" {>= "0.14"}
  "digestif"
  "eio_main" {>= "1.3"}
  "containers-data"
  "timedesc" {>= "3.1.0"}
  "re" {>= "1.11.0"}
  "ppx_deriving" {>= "5.0"}
  "decompress"
  "progress" {>= "0.5.0"}
  "diet"
  "sqlite3"
  "uuseg"
  "uucp"
  "alcotest" {with-test}
  "qcheck-alcotest" {with-test}
  "qcheck" {with-test}
  "odoc" {with-doc}
]
dev-repo: "git+https://github.com/darrenldl/docfd.git"
build: [
  ["dune" "subst"] {dev}
  [
    "dune"
    "build"
    "-p"
    name
    "-j"
    jobs
    "@install"
    "@doc" {with-doc}
  ]
]


================================================
FILE: docfd.opam.locked
================================================
opam-version: "2.0"
name: "docfd"
version: "3.0.0"
synopsis: "TUI multiline fuzzy document finder"
maintainer: "Darren Li"
authors: "Darren Li"
license: "MIT"
tags: ["fuzzy" "document" "finder"]
homepage: "https://github.com/darrenldl/docfd"
doc: "https://github.com/darrenldl/docfd"
bug-reports: "https://github.com/darrenldl/docfd/issues"
depends: [
  "alcotest" {= "1.8.0" & with-test}
  "angstrom" {= "0.16.1"}
  "astring" {= "0.8.5" & with-test}
  "base-bigarray" {= "base"}
  "base-bytes" {= "base"}
  "base-domains" {= "base"}
  "base-nnp" {= "base"}
  "base-threads" {= "base"}
  "base-unix" {= "base"}
  "bigstringaf" {= "0.10.0"}
  "checkseum" {= "0.5.2"}
  "cmdliner" {= "1.3.0"}
  "conf-pkg-config" {= "3"}
  "conf-sqlite3" {= "1"}
  "containers" {= "3.15"}
  "containers-data" {= "3.15"}
  "cppo" {= "1.8.0"}
  "csexp" {= "1.5.2"}
  "cstruct" {= "6.2.0"}
  "decompress" {= "1.5.3"}
  "diet" {= "0.4"}
  "digestif" {= "1.2.0"}
  "domain-local-await" {= "1.0.1"}
  "dune" {= "3.17.1"}
  "dune-configurator" {= "3.17.1"}
  "eio" {= "1.2"}
  "eio_linux" {= "1.2"}
  "eio_main" {= "1.2"}
  "eio_posix" {= "1.2"}
  "either" {= "1.0.0"}
  "eqaf" {= "0.10"}
  "fmt" {= "0.9.0"}
  "hmap" {= "0.8.1"}
  "host-arch-x86_64" {= "1"}
  "host-system-other" {= "1"}
  "iomux" {= "0.3"}
  "logs" {= "0.7.0"}
  "lwd" {= "0.3"}
  "lwt" {= "5.9.0"}
  "lwt-dllist" {= "1.0.1"}
  "mtime" {= "2.1.0"}
  "nottui" {= "0.3"}
  "notty" {= "0.2.3"}
  "ocaml" {= "5.2.1"}
  "ocaml-base-compiler" {= "5.2.1"}
  "ocaml-compiler-libs" {= "v0.17.0"}
  "ocaml-config" {= "3"}
  "ocaml-options-vanilla" {= "1"}
  "ocaml-syntax-shims" {= "1.0.0"}
  "ocamlbuild" {= "0.15.0"}
  "ocamlfind" {= "1.9.6"}
  "ocplib-endian" {= "1.2"}
  "optint" {= "0.3.0"}
  "oseq" {= "0.5.1"}
  "ounit2" {= "2.2.7" & with-test}
  "ppx_derivers" {= "1.2.1"}
  "ppx_deriving" {= "6.0.3"}
  "ppxlib" {= "0.33.0"}
  "progress" {= "0.4.0"}
  "psq" {= "0.2.1"}
  "ptime" {= "1.2.0"}
  "qcheck" {= "0.23" & with-test}
  "qcheck-alcotest" {= "0.23" & with-test}
  "qcheck-core" {= "0.23" & with-test}
  "qcheck-ounit" {= "0.23" & with-test}
  "re" {= "1.12.0"}
  "result" {= "1.5"}
  "seq" {= "base"}
  "sexplib0" {= "v0.17.0"}
  "spelll" {= "0.4"}
  "sqlite3" {= "5.2.0"}
  "stdlib-shims" {= "0.3.0"}
  "terminal" {= "0.4.0"}
  "thread-table" {= "1.0.0"}
  "timedesc" {= "3.1.0"}
  "timedesc-tzdb" {= "3.1.0"}
  "timedesc-tzlocal" {= "3.1.0"}
  "topkg" {= "1.0.7"}
  "uring" {= "0.9"}
  "uucp" {= "16.0.0"}
  "uuseg" {= "16.0.0"}
  "uutf" {= "1.0.3"}
  "vector" {= "1.0.0"}
]
build: [
  ["dune" "subst"] {dev}
  ["dune" "build" "-p" name "-j" jobs "@install" "@doc" {with-doc}]
]
dev-repo: "git+https://github.com/darrenldl/docfd.git"
pin-depends: [
  [
    "nottui.0.3"
    "git+https://github.com/let-def/lwd.git#a337a778001e6c1dbaed7e758c9e05f300abd388"
  ]
  [
  "notty.0.2.3"
  "git+https://github.com/ocaml-dune/notty.git#b6e1036c61521be3b1f4d585895ac598bdf4ab8d"
]
  [
  "ocaml-base-compiler.5.2.1"
  "https://github.com/ocaml/ocaml/releases/download/5.2.1/ocaml-5.2.1.tar.gz"
]
]
description: """\
Think interactive grep for text files, PDFs, DOCXs, etc,
but word/token based instead of regex and line based,
so you can search across lines easily.

Docfd aims to provide good UX via integration with common text editors
and PDF viewers,
so you can jump directly to a search result with a single key press.

Features:

- Multithreaded indexing and searching

- Multiline fuzzy search of multiple files or a single file

- Swap between multi-file view and single file view on the fly

- Content view pane that shows the snippet surrounding the search result selected

- Text editor and PDF viewer integration"""


================================================
FILE: docfd.opam.template
================================================
build: [
  ["dune" "subst"] {dev}
  [
    "dune"
    "build"
    "-p"
    name
    "-j"
    jobs
    "@install"
    "@doc" {with-doc}
  ]
]


================================================
FILE: dune-project
================================================
(lang dune 3.4)

(name docfd)

(generate_opam_files true)

(source
 (github darrenldl/docfd))

(authors "Darren Li")

(maintainers "Darren Li")

(license MIT)

(package
 (name docfd)
 (synopsis "TUI multiline fuzzy document finder")
 (description "
Think interactive grep for text files, PDFs, DOCXs, etc,
but word/token based instead of regex and line based,
so you can search across lines easily.

Docfd aims to provide good UX via integration with common text editors
and PDF viewers,
so you can jump directly to a search result with a single key press.

Features:

- Multithreaded indexing and searching

- Multiline fuzzy search of multiple files or a single file

- Swap between multi-file view and single file view on the fly

- Content view pane that shows the snippet surrounding the search result selected

- Text editor and PDF viewer integration
              ")
 (documentation https://github.com/darrenldl/docfd)
 (depends
   (ocaml (>= "5.2"))
   dune
   (fmt (>= "0.9.0"))
   (angstrom (>= "0.15.0"))
   (containers (>= "3.12"))
   oseq
   spelll
   notty-community
   (nottui (>= "0.4"))
   (nottui-unix (>= "0.4"))
   lwd
   (cmdliner (>= "2.0.0"))
   (eio (>= "0.14"))
   digestif
   (eio_main (>= "1.3"))
   containers-data
   (timedesc (>= "3.1.0"))
   (re (>= "1.11.0"))
   (ppx_deriving (>= "5.0"))
   decompress
   (progress (>= "0.5.0"))
   diet
   sqlite3
   uuseg
   uucp
   (alcotest :with-test)
   (qcheck-alcotest :with-test)
   (qcheck :with-test)
   )
 (tags
  ("fuzzy" "document" "finder"
   ))
 )


================================================
FILE: file-collection-tests.t/dune
================================================
(cram
  (deps ../bin/docfd.exe))


================================================
FILE: file-collection-tests.t/run.t
================================================
Setup:
  $ touch no-ext
  $ touch empty-paths.txt
  $ echo "test.txt" >> paths
  $ echo "test-symlink.txt" >> paths
  $ echo "test0" >> paths
  $ echo "test1/ijkl" >> paths
  $ echo "test2/" >> paths
  $ echo "test3/" >> paths
  $ echo "test.log" >> single-path0.txt
  $ echo "test.txt" >> single-path1.txt
  $ touch test.ext0
  $ touch test.log
  $ touch test.md
  $ touch test.txt
  $ mkdir test0
  $ touch test0/1234.md
  $ touch test0/abcd.txt
  $ mkdir test0/abcd
  $ touch test0/abcd/efgh.md
  $ touch test0/abcd/efgh.txt
  $ mkdir test1
  $ touch test1/5678.md
  $ touch test1/ijkl.txt
  $ mkdir test1/ijkl
  $ touch test1/ijkl/mnop.md
  $ touch test1/ijkl/mnop.txt
  $ mkdir test2
  $ touch test2/1234.md
  $ mkdir test2/abcd
  $ touch test2/abcd/efgh.md
  $ ln -s $(pwd)/test0/abcd/efgh.txt test2/abcd/efgh.txt
  $ ln -s ../test1/5678.md test2/56.md
  $ ln -s ../test1/ijkl test2/ijkl
  $ ln -s test2 test3
  $ ln -s test.txt test-symlink.txt
  $ mkdir miXeD-CaSe
  $ touch miXeD-CaSe/AbCd.md
  $ tree
  .
  |-- dune -> ../../../../default/file-collection-tests.t/dune
  |-- empty-paths.txt
  |-- miXeD-CaSe
  |   `-- AbCd.md
  |-- no-ext
  |-- paths
  |-- single-path0.txt
  |-- single-path1.txt
  |-- test-symlink.txt -> test.txt
  |-- test.ext0
  |-- test.log
  |-- test.md
  |-- test.txt
  |-- test0
  |   |-- 1234.md
  |   |-- abcd
  |   |   |-- efgh.md
  |   |   `-- efgh.txt
  |   `-- abcd.txt
  |-- test1
  |   |-- 5678.md
  |   |-- ijkl
  |   |   |-- mnop.md
  |   |   `-- mnop.txt
  |   `-- ijkl.txt
  |-- test2
  |   |-- 1234.md
  |   |-- 56.md -> ../test1/5678.md
  |   |-- abcd
  |   |   |-- efgh.md
  |   |   `-- efgh.txt -> $TESTCASE_ROOT/test0/abcd/efgh.txt
  |   `-- ijkl -> ../test1/ijkl
  `-- test3 -> test2
  
  7 directories, 26 files

Basic invocation for reference:
  $ docfd --debug-log - --cache-dir .cache --index-only . 2>&1 | grep '^Using .* search mode' | sort
  Using multiline search mode for document '$TESTCASE_ROOT/empty-paths.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md'
  Using multiline search mode for document '$TESTCASE_ROOT/single-path0.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/single-path1.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test0/1234.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test1/5678.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/1234.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/56.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/1234.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/56.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.txt'
  Using single line search mode for document '$TESTCASE_ROOT/test.log'

--max-depth 0:
  $ docfd --debug-log - --cache-dir .cache --index-only --max-depth 0 . 2>&1 | grep '^Using .* search mode' | sort
  $ docfd --debug-log - --cache-dir .cache --index-only --max-depth 0 test.txt 2>&1 | grep '^Using .* search mode' | sort
  Using multiline search mode for document '$TESTCASE_ROOT/test.txt'

--max-depth 1:
  $ docfd --debug-log - --cache-dir .cache --index-only --max-depth 1 . 2>&1 | grep '^Using .* search mode' | sort
  Using multiline search mode for document '$TESTCASE_ROOT/empty-paths.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/single-path0.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/single-path1.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test.txt'
  Using single line search mode for document '$TESTCASE_ROOT/test.log'
  $ docfd --debug-log - --cache-dir .cache --index-only --max-depth 1 --glob '**/*.md' 2>&1 | grep '^Using .* search mode' | sort
  Using multiline search mode for document '$TESTCASE_ROOT/test.md'

--max-depth 2:
  $ docfd --debug-log - --cache-dir .cache --index-only --max-depth 2 . 2>&1 | grep '^Using .* search mode' | sort
  Using multiline search mode for document '$TESTCASE_ROOT/empty-paths.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md'
  Using multiline search mode for document '$TESTCASE_ROOT/single-path0.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/single-path1.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test0/1234.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test1/5678.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/1234.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/56.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/1234.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/56.md'
  Using single line search mode for document '$TESTCASE_ROOT/test.log'
  $ docfd --debug-log - --cache-dir .cache --index-only --max-depth 2 --glob '**/*.md' 2>&1 | grep '^Using .* search mode' | sort
  Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test0/1234.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test1/5678.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/1234.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/56.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/1234.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/56.md'

Default path is not picked if --paths-from is used:
  $ docfd --debug-log - --cache-dir .cache --index-only --paths-from empty-paths.txt 2>&1 | grep '^Using .* search mode' | sort

Default path is not picked if --glob is used:
  $ docfd --debug-log - --cache-dir .cache --index-only --glob '*.log' 2>&1 | grep '^Using .* search mode' | sort
  Using single line search mode for document '$TESTCASE_ROOT/test.log'

Default path is not picked if --single-line-glob is used:
  $ docfd --debug-log - --cache-dir .cache --index-only --single-line-glob '*.log' 2>&1 | grep '^Using .* search mode' | sort
  Using single line search mode for document '$TESTCASE_ROOT/test.log'

Multiple --paths-from:
  $ docfd --debug-log - --cache-dir .cache --index-only --paths-from single-path0.txt --paths-from single-path1.txt 2>&1 | grep '^Using .* search mode' | sort
  Using multiline search mode for document '$TESTCASE_ROOT/test.txt'
  Using single line search mode for document '$TESTCASE_ROOT/test.log'

Empty --exts:
  $ docfd --debug-log - --cache-dir .cache --index-only --exts "" . 2>&1 | grep '^Using .* search mode' | sort
  Using single line search mode for document '$TESTCASE_ROOT/test.log'

Empty --single-line-exts:
  $ docfd --debug-log - --cache-dir .cache --index-only --single-line-exts "" . 2>&1 | grep '^Using .* search mode' | sort
  Using multiline search mode for document '$TESTCASE_ROOT/empty-paths.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md'
  Using multiline search mode for document '$TESTCASE_ROOT/single-path0.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/single-path1.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test0/1234.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test1/5678.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/1234.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/56.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/1234.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/56.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.txt'

Empty --exts and --single-line-exts:
  $ docfd --debug-log - --cache-dir .cache --index-only --exts "" --single-line-exts "" .
  Initializing in-memory index
  error: no usable file extensions or glob patterns
  [1]

--add-exts:
  $ docfd --debug-log - --cache-dir .cache --index-only --add-exts ext0 . 2>&1 | grep '^Using .* search mode' | sort | grep "ext0"
  Using multiline search mode for document '$TESTCASE_ROOT/test.ext0'

--single-line-add-exts:
  $ docfd --debug-log - --cache-dir .cache --index-only --single-line-add-exts ext0 . 2>&1 | grep '^Using .* search mode' | sort | grep "ext0"
  Using single line search mode for document '$TESTCASE_ROOT/test.ext0'

Picking via multiple --glob:
  $ docfd --debug-log - --cache-dir .cache --index-only --glob '*.txt' --glob '*.md' --glob '*.log' 2>&1 | grep '^Using .* search mode' | sort
  Using multiline search mode for document '$TESTCASE_ROOT/empty-paths.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/single-path0.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/single-path1.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test.txt'
  Using single line search mode for document '$TESTCASE_ROOT/test.log'

Picking via multiple --single-line-glob:
  $ docfd --debug-log - --cache-dir .cache --index-only --single-line-glob '*.txt' --single-line-glob '*.md' --single-line-glob '*.log' 2>&1 | grep '^Using .* search mode' | sort
  Using single line search mode for document '$TESTCASE_ROOT/empty-paths.txt'
  Using single line search mode for document '$TESTCASE_ROOT/single-path0.txt'
  Using single line search mode for document '$TESTCASE_ROOT/single-path1.txt'
  Using single line search mode for document '$TESTCASE_ROOT/test-symlink.txt'
  Using single line search mode for document '$TESTCASE_ROOT/test.log'
  Using single line search mode for document '$TESTCASE_ROOT/test.md'
  Using single line search mode for document '$TESTCASE_ROOT/test.txt'

Picking via multiple --glob and --single-line-glob:
  $ # --single-line-glob for .txt files
  $ docfd --debug-log - --cache-dir .cache --index-only --single-line-glob '*.txt' --glob '*.md' --glob '*.log' 2>&1 | grep '^Using .* search mode' | sort
  Using multiline search mode for document '$TESTCASE_ROOT/test.md'
  Using single line search mode for document '$TESTCASE_ROOT/empty-paths.txt'
  Using single line search mode for document '$TESTCASE_ROOT/single-path0.txt'
  Using single line search mode for document '$TESTCASE_ROOT/single-path1.txt'
  Using single line search mode for document '$TESTCASE_ROOT/test-symlink.txt'
  Using single line search mode for document '$TESTCASE_ROOT/test.log'
  Using single line search mode for document '$TESTCASE_ROOT/test.txt'
  $ # --single-line-glob for .md files
  $ docfd --debug-log - --cache-dir .cache --index-only --glob '*.txt' --single-line-glob '*.md' --glob '*.log' 2>&1 | grep '^Using .* search mode' | sort
  Using multiline search mode for document '$TESTCASE_ROOT/empty-paths.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/single-path0.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/single-path1.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test.txt'
  Using single line search mode for document '$TESTCASE_ROOT/test.log'
  Using single line search mode for document '$TESTCASE_ROOT/test.md'
  $ # --single-line-glob for .log files
  $ docfd --debug-log - --cache-dir .cache --index-only --glob '*.txt' --glob '*.md' --single-line-glob '*.log' 2>&1 | grep '^Using .* search mode' | sort
  Using multiline search mode for document '$TESTCASE_ROOT/empty-paths.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/single-path0.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/single-path1.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test.txt'
  Using single line search mode for document '$TESTCASE_ROOT/test.log'
  $ # --glob for .txt files
  $ docfd --debug-log - --cache-dir .cache --index-only --glob '*.txt' --single-line-glob '*.md' --single-line-glob '*.log' 2>&1 | grep '^Using .* search mode' | sort
  Using multiline search mode for document '$TESTCASE_ROOT/empty-paths.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/single-path0.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/single-path1.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test.txt'
  Using single line search mode for document '$TESTCASE_ROOT/test.log'
  Using single line search mode for document '$TESTCASE_ROOT/test.md'
  $ # --glob for .md files
  $ docfd --debug-log - --cache-dir .cache --index-only --single-line-glob '*.txt' --glob '*.md' --single-line-glob '*.log' 2>&1 | grep '^Using .* search mode' | sort
  Using multiline search mode for document '$TESTCASE_ROOT/test.md'
  Using single line search mode for document '$TESTCASE_ROOT/empty-paths.txt'
  Using single line search mode for document '$TESTCASE_ROOT/single-path0.txt'
  Using single line search mode for document '$TESTCASE_ROOT/single-path1.txt'
  Using single line search mode for document '$TESTCASE_ROOT/test-symlink.txt'
  Using single line search mode for document '$TESTCASE_ROOT/test.log'
  Using single line search mode for document '$TESTCASE_ROOT/test.txt'
  $ # --glob for .log files
  $ docfd --debug-log - --cache-dir .cache --index-only --single-line-glob '*.txt' --single-line-glob '*.md' --glob '*.log' 2>&1 | grep '^Using .* search mode' | sort
  Using single line search mode for document '$TESTCASE_ROOT/empty-paths.txt'
  Using single line search mode for document '$TESTCASE_ROOT/single-path0.txt'
  Using single line search mode for document '$TESTCASE_ROOT/single-path1.txt'
  Using single line search mode for document '$TESTCASE_ROOT/test-symlink.txt'
  Using single line search mode for document '$TESTCASE_ROOT/test.log'
  Using single line search mode for document '$TESTCASE_ROOT/test.md'
  Using single line search mode for document '$TESTCASE_ROOT/test.txt'

--single-line-exts and --exts:
  $ docfd --debug-log - --cache-dir .cache --index-only --single-line-exts md --exts md . 2>&1 | grep -e '^Using .* search mode' -e '^Checking.*search mode' | sort
  Checking if efficiently computed and naively computed results for default search mode files are consistent
  Checking if efficiently computed and naively computed results for single line search mode files are consistent
  Checking if single line search mode files and default search mode files are disjoint
  Using single line search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md'
  Using single line search mode for document '$TESTCASE_ROOT/test.md'
  Using single line search mode for document '$TESTCASE_ROOT/test0/1234.md'
  Using single line search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.md'
  Using single line search mode for document '$TESTCASE_ROOT/test1/5678.md'
  Using single line search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.md'
  Using single line search mode for document '$TESTCASE_ROOT/test2/1234.md'
  Using single line search mode for document '$TESTCASE_ROOT/test2/56.md'
  Using single line search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.md'
  Using single line search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.md'
  Using single line search mode for document '$TESTCASE_ROOT/test3/1234.md'
  Using single line search mode for document '$TESTCASE_ROOT/test3/56.md'
  Using single line search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.md'
  Using single line search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.md'

--single-line-exts and --glob:
  $ docfd --debug-log - --cache-dir .cache --index-only --exts "" --single-line-exts md --glob '**/*.md' 2>&1 | grep -e '^Using .* search mode' -e '^Checking.*search mode' | sort
  Checking if efficiently computed and naively computed results for default search mode files are consistent
  Checking if efficiently computed and naively computed results for single line search mode files are consistent
  Checking if single line search mode files and default search mode files are disjoint
  Using single line search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md'
  Using single line search mode for document '$TESTCASE_ROOT/test.md'
  Using single line search mode for document '$TESTCASE_ROOT/test0/1234.md'
  Using single line search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.md'
  Using single line search mode for document '$TESTCASE_ROOT/test1/5678.md'
  Using single line search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.md'
  Using single line search mode for document '$TESTCASE_ROOT/test2/1234.md'
  Using single line search mode for document '$TESTCASE_ROOT/test2/56.md'
  Using single line search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.md'
  Using single line search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.md'
  Using single line search mode for document '$TESTCASE_ROOT/test3/1234.md'
  Using single line search mode for document '$TESTCASE_ROOT/test3/56.md'
  Using single line search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.md'
  Using single line search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.md'

--single-line-glob and --exts:
  $ docfd --debug-log - --cache-dir .cache --index-only --single-line-glob '*.md' --exts md . 2>&1 | grep -e '^Using .* search mode' -e '^Checking.*search mode'| sort
  Checking if efficiently computed and naively computed results for default search mode files are consistent
  Checking if efficiently computed and naively computed results for single line search mode files are consistent
  Checking if single line search mode files and default search mode files are disjoint
  Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test0/1234.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test1/5678.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/1234.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/56.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/1234.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/56.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.md'
  Using single line search mode for document '$TESTCASE_ROOT/test.log'
  Using single line search mode for document '$TESTCASE_ROOT/test.md'

--single-line-glob and --glob:
  $ docfd --debug-log - --cache-dir .cache --index-only --exts "" --single-line-glob '*.md' --glob '**/*.md' 2>&1 | grep -e '^Using .* search mode' -e '^Checking.*search mode' | sort
  Checking if efficiently computed and naively computed results for default search mode files are consistent
  Checking if efficiently computed and naively computed results for single line search mode files are consistent
  Checking if single line search mode files and default search mode files are disjoint
  Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test0/1234.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test1/5678.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/1234.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/56.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/1234.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/56.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.md'
  Using single line search mode for document '$TESTCASE_ROOT/test.md'

--exts applies to directories in FILE in --paths-from FILE:
  $ docfd --debug-log - --cache-dir .cache --index-only --paths-from paths --exts md 2>&1 | grep '^Using .* search mode' | sort
  Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test0/1234.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/1234.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/56.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/1234.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/56.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.md'

--single-line-exts apply to directories in FILE in --paths-from FILE:
  $ docfd --debug-log - --cache-dir .cache --index-only --paths-from paths --single-line-exts md 2>&1 | grep '^Using .* search mode' | sort
  Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.txt'
  Using single line search mode for document '$TESTCASE_ROOT/test0/1234.md'
  Using single line search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.md'
  Using single line search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.md'
  Using single line search mode for document '$TESTCASE_ROOT/test2/1234.md'
  Using single line search mode for document '$TESTCASE_ROOT/test2/56.md'
  Using single line search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.md'
  Using single line search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.md'
  Using single line search mode for document '$TESTCASE_ROOT/test3/1234.md'
  Using single line search mode for document '$TESTCASE_ROOT/test3/56.md'
  Using single line search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.md'
  Using single line search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.md'

Top-level symlinks:
  $ docfd --debug-log - --cache-dir .cache --index-only test-symlink.txt 2>&1 | grep '^Using .* search mode' | sort
  Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt'
  $ docfd --debug-log - --cache-dir .cache --index-only test3 2>&1 | grep '^Using .* search mode' | sort
  Using multiline search mode for document '$TESTCASE_ROOT/test3/1234.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/56.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.txt'

Top-level files and --single-line-exts:
  $ docfd --debug-log - --cache-dir .cache --index-only test.txt --single-line-exts txt 2>&1 | grep '^Using .* search mode' | sort
  Using single line search mode for document '$TESTCASE_ROOT/test.txt'

Top-level files and --single-line-glob:
  $ docfd --debug-log - --cache-dir .cache --index-only test.txt --single-line-glob '*.txt' 2>&1 | grep '^Using .* search mode' | sort
  Using single line search mode for document '$TESTCASE_ROOT/empty-paths.txt'
  Using single line search mode for document '$TESTCASE_ROOT/single-path0.txt'
  Using single line search mode for document '$TESTCASE_ROOT/single-path1.txt'
  Using single line search mode for document '$TESTCASE_ROOT/test-symlink.txt'
  Using single line search mode for document '$TESTCASE_ROOT/test.txt'

--glob and unrecognized extensions:
  $ docfd --debug-log - --cache-dir .cache --index-only --exts md --glob "*.txt" . 2>&1 | grep '^Using .* search mode' | sort
  Using multiline search mode for document '$TESTCASE_ROOT/empty-paths.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md'
  Using multiline search mode for document '$TESTCASE_ROOT/single-path0.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/single-path1.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test0/1234.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test1/5678.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/1234.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/56.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/1234.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/56.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.md'
  Using single line search mode for document '$TESTCASE_ROOT/test.log'

--single-line-glob and unrecognized extensions:
  $ docfd --debug-log - --cache-dir .cache --index-only --exts md --single-line-glob "*.txt" . 2>&1 | grep '^Using .* search mode' | sort
  Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test0/1234.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test1/5678.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/1234.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/56.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/1234.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/56.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.md'
  Using single line search mode for document '$TESTCASE_ROOT/empty-paths.txt'
  Using single line search mode for document '$TESTCASE_ROOT/single-path0.txt'
  Using single line search mode for document '$TESTCASE_ROOT/single-path1.txt'
  Using single line search mode for document '$TESTCASE_ROOT/test-symlink.txt'
  Using single line search mode for document '$TESTCASE_ROOT/test.log'
  Using single line search mode for document '$TESTCASE_ROOT/test.txt'

--single-line:
  $ docfd --debug-log - --cache-dir .cache --index-only --single-line . 2>&1 | grep '^Using .* search mode' | sort
  Using single line search mode for document '$TESTCASE_ROOT/empty-paths.txt'
  Using single line search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md'
  Using single line search mode for document '$TESTCASE_ROOT/single-path0.txt'
  Using single line search mode for document '$TESTCASE_ROOT/single-path1.txt'
  Using single line search mode for document '$TESTCASE_ROOT/test-symlink.txt'
  Using single line search mode for document '$TESTCASE_ROOT/test.log'
  Using single line search mode for document '$TESTCASE_ROOT/test.md'
  Using single line search mode for document '$TESTCASE_ROOT/test.txt'
  Using single line search mode for document '$TESTCASE_ROOT/test0/1234.md'
  Using single line search mode for document '$TESTCASE_ROOT/test0/abcd.txt'
  Using single line search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.md'
  Using single line search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.txt'
  Using single line search mode for document '$TESTCASE_ROOT/test1/5678.md'
  Using single line search mode for document '$TESTCASE_ROOT/test1/ijkl.txt'
  Using single line search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.md'
  Using single line search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.txt'
  Using single line search mode for document '$TESTCASE_ROOT/test2/1234.md'
  Using single line search mode for document '$TESTCASE_ROOT/test2/56.md'
  Using single line search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.md'
  Using single line search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.txt'
  Using single line search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.md'
  Using single line search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.txt'
  Using single line search mode for document '$TESTCASE_ROOT/test3/1234.md'
  Using single line search mode for document '$TESTCASE_ROOT/test3/56.md'
  Using single line search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.md'
  Using single line search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.txt'
  Using single line search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.md'
  Using single line search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.txt'

--glob and --single-line:
  $ docfd --debug-log - --cache-dir .cache --index-only --single-line --glob '**/*.txt' 2>&1 | grep '^Using .* search mode' | sort
  Using single line search mode for document '$TESTCASE_ROOT/empty-paths.txt'
  Using single line search mode for document '$TESTCASE_ROOT/single-path0.txt'
  Using single line search mode for document '$TESTCASE_ROOT/single-path1.txt'
  Using single line search mode for document '$TESTCASE_ROOT/test-symlink.txt'
  Using single line search mode for document '$TESTCASE_ROOT/test.txt'
  Using single line search mode for document '$TESTCASE_ROOT/test0/abcd.txt'
  Using single line search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.txt'
  Using single line search mode for document '$TESTCASE_ROOT/test1/ijkl.txt'
  Using single line search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.txt'
  Using single line search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.txt'
  Using single line search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.txt'
  Using single line search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.txt'
  Using single line search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.txt'

Top-level files with unrecognized extensions are still picked:
  $ docfd --debug-log - --cache-dir .cache --index-only --exts md test.txt . 2>&1 | grep '^Using .* search mode' | sort | grep 'test.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test.txt'

Top-level files without extensions are still picked:
  $ docfd --debug-log - --cache-dir .cache --index-only --exts md no-ext . 2>&1 | grep '^Using .* search mode' | sort | grep 'no-ext'
  Using multiline search mode for document '$TESTCASE_ROOT/no-ext'

Current working directory is symlink:
  $ cd test3
  $ docfd --debug-log - --cache-dir .cache --index-only . 2>&1 | grep '^Using .* search mode' | sort
  Using multiline search mode for document '$TESTCASE_ROOT/test2/1234.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/56.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.txt'
  $ docfd --debug-log - --cache-dir .cache --index-only --glob '*.txt' . 2>&1 | grep '^Using .* search mode' | sort
  Using multiline search mode for document '$TESTCASE_ROOT/test2/1234.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/56.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.txt'
  $ docfd --debug-log - --cache-dir .cache --index-only --glob '**/*.txt' . 2>&1 | grep '^Using .* search mode' | sort
  Using multiline search mode for document '$TESTCASE_ROOT/test2/1234.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/56.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.txt'
  $ docfd --debug-log - --cache-dir .cache --index-only --glob "$(pwd)/**/*.txt" . 2>&1 | grep '^Using .* search mode' | sort
  Using multiline search mode for document '$TESTCASE_ROOT/test2/1234.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/56.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.txt'
  $ cd ..

'./' in glob:
  $ docfd --debug-log - --cache-dir .cache --index-only --glob './*.txt' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
  Using multiline search mode for document '$TESTCASE_ROOT/empty-paths.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/single-path0.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/single-path1.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test.txt'

'..' in glob:
  $ docfd --debug-log - --cache-dir .cache --index-only --glob 'test1/../*.txt' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
  Using multiline search mode for document '$TESTCASE_ROOT/empty-paths.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/single-path0.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/single-path1.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test.txt'

Directories in glob:
  $ docfd --debug-log - --cache-dir .cache --index-only --glob '.' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
  $ docfd --debug-log - --cache-dir .cache --index-only --glob '..' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
  $ docfd --debug-log - --cache-dir .cache --index-only --glob 'test0/' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
  $ docfd --debug-log - --cache-dir .cache --index-only --glob 'test3' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort

Crossing symlinks explicitly in glob:
  $ docfd --debug-log - --cache-dir .cache --index-only --glob 'test3/../*.txt' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
  Using multiline search mode for document '$TESTCASE_ROOT/empty-paths.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/single-path0.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/single-path1.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test.txt'
  $ docfd --debug-log - --cache-dir .cache --index-only --glob 'test3/abcd/*.txt' --glob 'test3/abcd/*.md' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
  Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.txt'
  $ docfd --debug-log - --cache-dir .cache --index-only --glob 'test3/ijkl/*.txt' --glob 'test3/ijkl/*.md' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
  Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.md'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.txt'

'**' in glob:
  $ docfd --debug-log - --cache-dir .cache --index-only --glob '**/*.txt' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
  Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/empty-paths.txt
  Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/single-path0.txt
  Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/single-path1.txt
  Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test-symlink.txt
  Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test.txt
  Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test0/abcd.txt
  Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test0/abcd/efgh.txt
  Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test1/ijkl.txt
  Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test1/ijkl/mnop.txt
  Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test2/abcd/efgh.txt
  Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test2/ijkl/mnop.txt
  Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test3/abcd/efgh.txt
  Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test3/ijkl/mnop.txt
  Using multiline search mode for document '$TESTCASE_ROOT/empty-paths.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/single-path0.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/single-path1.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.txt'
  $ docfd --debug-log - --cache-dir .cache --index-only --glob '**/**/*.txt' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
  Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/empty-paths.txt
  Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/single-path0.txt
  Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/single-path1.txt
  Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test-symlink.txt
  Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test.txt
  Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test0/abcd.txt
  Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test0/abcd/efgh.txt
  Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test1/ijkl.txt
  Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test1/ijkl/mnop.txt
  Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test2/abcd/efgh.txt
  Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test2/ijkl/mnop.txt
  Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test3/abcd/efgh.txt
  Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test3/ijkl/mnop.txt
  Using multiline search mode for document '$TESTCASE_ROOT/empty-paths.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/single-path0.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/single-path1.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.txt'
  $ docfd --debug-log - --cache-dir .cache --index-only --glob "$(pwd)/**/*.txt" 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
  Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/empty-paths.txt
  Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/single-path0.txt
  Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/single-path1.txt
  Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test-symlink.txt
  Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test.txt
  Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test0/abcd.txt
  Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test0/abcd/efgh.txt
  Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test1/ijkl.txt
  Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test1/ijkl/mnop.txt
  Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test2/abcd/efgh.txt
  Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test2/ijkl/mnop.txt
  Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test3/abcd/efgh.txt
  Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test3/ijkl/mnop.txt
  Using multiline search mode for document '$TESTCASE_ROOT/empty-paths.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/single-path0.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/single-path1.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.txt'
  $ docfd --debug-log - --cache-dir .cache --index-only --glob "$(pwd)/**/**/*.txt" 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
  Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/empty-paths.txt
  Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/single-path0.txt
  Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/single-path1.txt
  Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test-symlink.txt
  Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test.txt
  Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test0/abcd.txt
  Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test0/abcd/efgh.txt
  Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test1/ijkl.txt
  Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test1/ijkl/mnop.txt
  Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test2/abcd/efgh.txt
  Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test2/ijkl/mnop.txt
  Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test3/abcd/efgh.txt
  Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test3/ijkl/mnop.txt
  Using multiline search mode for document '$TESTCASE_ROOT/empty-paths.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/single-path0.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/single-path1.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.txt'
  $ docfd --debug-log - --cache-dir .cache --index-only --glob "**/test[01]/*.txt" 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
  Glob $TESTCASE_ROOT/**/test[01]/*.txt matches path $TESTCASE_ROOT/test0/abcd.txt
  Glob $TESTCASE_ROOT/**/test[01]/*.txt matches path $TESTCASE_ROOT/test1/ijkl.txt
  Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd.txt'
  Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl.txt'

Case insensitive marker:
  $ # Exact match without marker
  $ docfd --debug-log - --cache-dir .cache --index-only --glob 'miXeD-CaSe/AbCd.md' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
  Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md'
  $ # All lowercase glob
  $ docfd --debug-log - --cache-dir .cache --index-only --glob 'mixed-case/abcd.md' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
  $ docfd --debug-log - --cache-dir .cache --index-only --glob '\cmixed-case/abcd.md' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
  Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md'
  $ docfd --debug-log - --cache-dir .cache --index-only --glob 'mixed-\ccase/abcd.md' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
  Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md'
  $ docfd --debug-log - --cache-dir .cache --index-only --glob 'mixed-case/\cabcd.md' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
  Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md'
  $ docfd --debug-log - --cache-dir .cache --index-only --glob 'mixed-case/abcd.md\c' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
  Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md'
  $ # All uppercase glob
  $ docfd --debug-log - --cache-dir .cache --index-only --glob 'MIXED-CASE/ABCD.MD' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
  $ docfd --debug-log - --cache-dir .cache --index-only --glob '\cMIXED-CASE/ABCD.MD' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
  Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md'
  $ docfd --debug-log - --cache-dir .cache --index-only --glob 'MIX\cED-CASE/ABCD.MD' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
  Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md'
  $ docfd --debug-log - --cache-dir .cache --index-only --glob 'MIXED-CASE/\cABCD.MD' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
  Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md'
  $ docfd --debug-log - --cache-dir .cache --index-only --glob 'MIXED-CASE/ABCD.MD\c' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
  Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md'
  $ # Mixed case glob
  $ docfd --debug-log - --cache-dir .cache --index-only --glob 'MixeD-CaSE/aBcD.mD' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
  $ docfd --debug-log - --cache-dir .cache --index-only --glob '\cMixeD-CaSE/aBcD.mD' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
  Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md'
  $ docfd --debug-log - --cache-dir .cache --index-only --glob 'MixeD\c-CaSE/aBcD.mD' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
  Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md'
  $ docfd --debug-log - --cache-dir .cache --index-only --glob 'MixeD-CaSE/\caBcD.mD' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
  Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md'
  $ docfd --debug-log - --cache-dir .cache --index-only --glob 'MixeD-CaSE/aBcD.mD\c' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
  Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md'

Double escape characters:
  $ docfd --debug-log - --cache-dir .cache --index-only --glob '\\cMixeD-CaSE/AbCd.mD' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
  $ docfd --debug-log - --cache-dir .cache --index-only --glob 'MixeD\\c-CaSE/AbCd.mD' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
  $ docfd --debug-log - --cache-dir .cache --index-only --glob 'MixeD-CaSE/AbCd.mD\\c' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort


================================================
FILE: lib/GZIP.ml
================================================
(* Basically fully copied from examples in Decompress manual *)

let time () =
  Int32.of_float (Unix.gettimeofday ())

let compress (s : string) : string =
  let i = De.bigstring_create De.io_buffer_size in
  let o = De.bigstring_create De.io_buffer_size in
  let config = Gz.Higher.configuration Gz.Unix time in
  let w = De.Lz77.make_window ~bits:15 in
  let q = De.Queue.create 1024 in
  let res = Buffer.create 4096 in
  let cur = ref 0 in
  let refill buf =
    let len = min (String.length s - !cur) De.io_buffer_size in
    Bigstringaf.blit_from_string s ~src_off:!cur buf ~dst_off:0 ~len;
    cur := !cur + len;
    len
  in
  let flush buf len =
    let str = Bigstringaf.substring buf ~off:0 ~len in
    Buffer.add_string res str
  in
  Gz.Higher.compress ~w ~q ~level:4 ~refill ~flush () config i o;
  Buffer.contents res

let decompress (s : string) : string option =
  let i = De.bigstring_create De.io_buffer_size in
  let o = De.bigstring_create De.io_buffer_size in
  let r = Buffer.create 0x1000 in
  let cur = ref 0 in
  let refill buf =
    let len = min (String.length s - !cur) De.io_buffer_size in
    Bigstringaf.blit_from_string s ~src_off:!cur buf ~dst_off:0 ~len;
    cur := !cur + len;
    len
  in
  let flush buf len =
    let str = Bigstringaf.substring buf ~off:0 ~len in
    Buffer.add_string r str
  in
  match Gz.Higher.uncompress ~refill ~flush i o with
  | Ok _ -> Some (Buffer.contents r)
  | Error _ -> None


================================================
FILE: lib/char_map.ml
================================================
include CCMap.Make (Char)


================================================
FILE: lib/doc_id_db.ml
================================================
type t = {
  lock : Eio.Mutex.t;
  doc_id_of_doc_hash : (string, int64) Hashtbl.t;
}

let t : t =
  {
    lock = Eio.Mutex.create ();
    doc_id_of_doc_hash = Hashtbl.create 10_000;
  }

let lock : type a. (unit -> a) -> a =
  fun f ->
  Eio.Mutex.use_rw ~protect:true t.lock f

let allocate_bulk (doc_hashes : string Seq.t) : unit =
  let open Sqlite3_utils in
  lock (fun () ->
      with_db (fun db ->
          with_stmt ~db
            {|
  INSERT INTO doc_info
  (id, hash, status)
  VALUES
  (
    (SELECT
      IFNULL(
        (
          SELECT a.id - 1 AS id
          FROM doc_info a
          LEFT JOIN doc_info b ON a.id - 1 = b.id
          WHERE b.id IS NULL AND a.id - 1 >= 0

          UNION

          SELECT a.id + 1 AS id
          FROM doc_info a
          LEFT JOIN doc_info b ON a.id + 1 = b.id
          WHERE b.id IS NULL

          ORDER BY id
          LIMIT 1
        ),
        0
      )
    ),
    @doc_hash,
    'ONGOING'
  )
  ON CONFLICT(hash) DO NOTHING
  |}
            (fun stmt ->
               Seq.iter (fun doc_hash ->
                   bind_names stmt [ ("@doc_hash", TEXT doc_hash) ];
                   step stmt;
                   reset stmt;
                 )
                 doc_hashes
            );
          with_stmt ~db
            {|
    SELECT id
    FROM doc_info
    WHERE hash = @doc_hash
    |}
            (fun stmt ->
               Seq.iter (fun doc_hash ->
                   bind_names stmt [ ("@doc_hash", TEXT doc_hash) ];
                   step stmt;
                   Hashtbl.add t.doc_id_of_doc_hash doc_hash (column_int64 stmt 0);
                   reset stmt;
                 )
                 doc_hashes
            )
        )
    )

let doc_id_of_doc_hash (doc_hash : string) : int64 =
  let test =
    lock (fun () ->
        Hashtbl.find_opt t.doc_id_of_doc_hash doc_hash
      )
  in
  match test with
  | Some id -> id
  | None -> (
      allocate_bulk (Seq.return doc_hash);
      lock (fun () ->
          Hashtbl.find t.doc_id_of_doc_hash doc_hash
        )
    )


================================================
FILE: lib/doc_id_db.mli
================================================
val allocate_bulk : string Seq.t -> unit

val doc_id_of_doc_hash : string -> int64


================================================
FILE: lib/docfd_lib.ml
================================================
module Index = Index

module Doc_id_db = Doc_id_db

module Link = Link

module Search_result = Search_result

module Search_phrase = Search_phrase

module Search_exp = Search_exp

module Search_result_heap = Search_result_heap

module Word_db = Word_db

module Tokenization = Tokenization

module Params' = Params

module Task_pool = Task_pool

module Stop_signal = Stop_signal

module Parser_components = Parser_components

module Misc_utils' = Misc_utils

module Sqlite3_utils = Sqlite3_utils

let init ~db_path ~document_count_limit =
  let open Sqlite3_utils in
  let db = db_open db_path in
  let db_res =
    Sqlite3.exec db Params.db_schema
  in
  let res =
    if not (Rc.is_success db_res) then (
      Some (Fmt.str
              "failed to initialize index DB: %s" (Rc.to_string db_res))
    ) else (
      Params.db_path := Some db_path;
      if Index.document_count () >= document_count_limit then (
        Index.prune_old_documents ~keep_n_latest:document_count_limit
      );
      None
    )
  in
  while not (db_close db) do Unix.sleepf 0.1 done;
  res


================================================
FILE: lib/dune
================================================
(library
 (flags     (-w "+a-4-9-29-37-40-42-44-48-50-32-30-70@8"))
 (name docfd_lib)
 (preprocess (pps
               ppx_deriving.show
               ppx_deriving.ord
               ppx_deriving.eq
               ))
 (libraries containers
            containers-data
            angstrom
            fmt
            spelll
            oseq
            eio
            decompress.gz
            bigstringaf
            unix
            diet
            sqlite3
            uuseg
            uucp
            timedesc
            timedesc-tzlocal.unix-or-utc
 )
)


================================================
FILE: lib/index.ml
================================================
module Line_loc = struct
  type t = {
    page_num : int;
    line_num_in_page : int;
    global_line_num : int;
  }
  [@@deriving eq]

  let page_num t = t.page_num

  let line_num_in_page t = t.line_num_in_page

  let global_line_num t = t.global_line_num

  let compare (x : t) (y : t) =
    Int.compare x.global_line_num y.global_line_num
end

module Loc = struct
  type t = {
    line_loc : Line_loc.t;
    pos_in_line : int;
  }
  [@@deriving eq]

  let line_loc t = t.line_loc

  let pos_in_line t =  t.pos_in_line
end

module Raw = struct
  type t = {
    pos_s_of_word : Int_set.t Int_map.t;
    loc_of_pos : Loc.t Int_map.t;
    line_loc_of_global_line_num : Line_loc.t Int_map.t;
    start_end_inc_pos_of_global_line_num : (int * int) Int_map.t;
    start_end_inc_pos_of_page_num : (int * int) Int_map.t;
    word_of_pos : int Int_map.t;
    line_count_of_page_num : int Int_map.t;
    page_count : int;
    global_line_count : int;
    links : Link.t array;
  }

  type multi_indexed_word = {
    pos : int;
    loc : Loc.t;
    word : string;
  }

  type chunk = multi_indexed_word array

  let make () : t = {
    pos_s_of_word = Int_map.empty;
    loc_of_pos = Int_map.empty;
    line_loc_of_global_line_num = Int_map.empty;
    start_end_inc_pos_of_global_line_num = Int_map.empty;
    start_end_inc_pos_of_page_num = Int_map.empty;
    word_of_pos = Int_map.empty;
    line_count_of_page_num = Int_map.empty;
    page_count = 0;
    global_line_count = 0;
    links = [||];
  }

  let word_ids (t : t) : Int_set.t =
    Int_map.fold (fun word_id _pos_s acc ->
        Int_set.add word_id acc
      )
      t.pos_s_of_word
      Int_set.empty

  let links (t : t) = t.links

  let union (x : t) (y : t) =
    {
      pos_s_of_word =
        Int_map.union (fun _k s0 s1 -> Some (Int_set.union s0 s1))
          x.pos_s_of_word
          y.pos_s_of_word;
      loc_of_pos =
        Int_map.union (fun _k x _ -> Some x)
          x.loc_of_pos
          y.loc_of_pos;
      line_loc_of_global_line_num =
        Int_map.union (fun _k x _ -> Some x)
          x.line_loc_of_global_line_num
          y.line_loc_of_global_line_num;
      start_end_inc_pos_of_global_line_num =
        Int_map.union (fun _k (start_x, end_inc_x) (start_y, end_inc_y) ->
            Some (min start_x start_y, max end_inc_x end_inc_y))
          x.start_end_inc_pos_of_global_line_num
          y.start_end_inc_pos_of_global_line_num;
      start_end_inc_pos_of_page_num =
        Int_map.union (fun _k (start_x, end_inc_x) (start_y, end_inc_y) ->
            Some (min start_x start_y, max end_inc_x end_inc_y))
          x.start_end_inc_pos_of_page_num
          y.start_end_inc_pos_of_page_num;
      word_of_pos =
        Int_map.union (fun _k x _ -> Some x)
          x.word_of_pos
          y.word_of_pos;
      line_count_of_page_num =
        Int_map.union (fun _k x y -> Some (max x y))
          x.line_count_of_page_num
          y.line_count_of_page_num;
      page_count = max x.page_count y.page_count;
      global_line_count = max x.global_line_count y.global_line_count;
      links = [||];
    }

  let words_of_lines
      (s : (Line_loc.t * string) Seq.t)
    : multi_indexed_word Seq.t =
    s
    |> Seq.flat_map (fun (line_loc, s) ->
        let seq = Tokenization.tokenize_with_pos ~drop_spaces:false s in
        if Seq.is_empty seq then (
          let empty_word = ({ Loc.line_loc; pos_in_line = 0 }, "") in
          Seq.return empty_word
        ) else (
          Seq.map (fun (pos_in_line, word) ->
              ({ Loc.line_loc; pos_in_line }, word))
            seq
        )
      )
    |> Seq.mapi (fun pos (loc, word) ->
        { pos; loc; word })

  let of_chunk (arr : chunk) : t =
    Array.fold_left
      (fun
        { pos_s_of_word;
          loc_of_pos;
          line_loc_of_global_line_num;
          start_end_inc_pos_of_global_line_num;
          start_end_inc_pos_of_page_num;
          word_of_pos;
          line_count_of_page_num;
          page_count;
          global_line_count;
        }
        { pos; loc; word } ->

        let word_id =
          Word_db.add word
        in

        let line_loc = loc.Loc.line_loc in
        let global_line_num = line_loc.global_line_num in
        let page_num = line_loc.page_num in
        let pos_s =
          Int_map.find_opt word_id pos_s_of_word
          |> Option.value ~default:Int_set.empty
          |> Int_set.add pos
        in
        let cur_page_line_count =
          Option.value ~default:0
            (Int_map.find_opt page_num line_count_of_page_num)
        in
        { pos_s_of_word = Int_map.add word_id pos_s pos_s_of_word;
          loc_of_pos = Int_map.add pos loc loc_of_pos;
          line_loc_of_global_line_num =
            Int_map.add global_line_num line_loc line_loc_of_global_line_num;
          start_end_inc_pos_of_global_line_num =
            Int_map.add
              global_line_num
              (match Int_map.find_opt global_line_num start_end_inc_pos_of_global_line_num with
               | None -> (pos, pos)
               | Some (x, y) -> (min x pos, max y pos))
              start_end_inc_pos_of_global_line_num;
          start_end_inc_pos_of_page_num =
            Int_map.add
              page_num
              (match Int_map.find_opt page_num start_end_inc_pos_of_page_num with
               | None -> (pos, pos)
               | Some (x, y) -> (min x pos, max y pos))
              start_end_inc_pos_of_page_num;
          word_of_pos = Int_map.add pos word_id word_of_pos;
          line_count_of_page_num =
            Int_map.add line_loc.page_num (max cur_page_line_count (line_loc.line_num_in_page + 1)) line_count_of_page_num;
          page_count = max page_count (line_loc.page_num + 1);
          global_line_count = max global_line_count (global_line_num + 1);
          links = [||];
        }
      )
      (make ())
      arr

  let chunks_of_words (s : multi_indexed_word Seq.t) : chunk Seq.t =
    let empty_word =
      let line_loc =
        { Line_loc.page_num = 0; line_num_in_page = 0; global_line_num = 0 }
      in
      let loc = { Loc.line_loc; pos_in_line = 0 } in
      { pos = 0; loc; word = "" }
    in
    (if Seq.is_empty s then (
        Seq.return empty_word
      ) else (
       s
     ))
    |> OSeq.chunks !Params.index_chunk_size

  let extract_links (t : t) : Link.t array =
    let flush_buf link_typ ~acc ~buf =
      match buf with
      | [] -> (acc, buf)
      | _ -> (
          let start_pos, end_inc_pos, strings =
            List.fold_left (fun (start, end_inc, strings) (pos, word) ->
                let start =
                  match start with
                  | None -> Some pos
                  | Some start -> Some (min pos start)
                in
                let end_inc =
                  match end_inc with
                  | None -> Some pos
                  | Some end_inc -> Some (max pos end_inc)
                in
                (start, end_inc, word :: strings)
              )
              (None, None, [])
              buf
          in
          let start_pos = Option.get start_pos in
          let end_inc_pos = Option.get end_inc_pos in
          let link = Link.{
              start_pos;
              end_inc_pos;
              typ = link_typ;
              link = String.concat "" strings;
            }
          in
          (link :: acc, [])
        )
    in
    let process_line (line : (int * string) Dynarray.t) : Link.t list =
      assert (Dynarray.length line > 0);
      let word_at pos = Dynarray.get line pos in
      let word_string_at pos = snd @@ word_at pos in
      let word_ci_string_at pos = String.lowercase_ascii @@ word_string_at pos in
      let rec aux
          (state : [ `Scanning | `In_link of Link.typ ])
          ~(acc : Link.t list)
          ~(buf : (int * string) list)
          (cur : int)
        : Link.t list
        =
        if cur >= Dynarray.length line then (
          match state with
          | `Scanning -> (
              acc
            )
          | `In_link link_typ -> (
              let acc, _buf = flush_buf link_typ ~acc ~buf in
              acc
            )
        ) else (
          let words_left = Dynarray.length line - cur - 1 in
          let pos, word = word_at cur in
          let word_ci = String.lowercase_ascii word in
          match state with
          | `Scanning -> (
              let link_typ =
                if List.mem word_ci [ "https"; "http"; "file" ]
                && words_left >= 3
                && word_ci_string_at (cur + 1) = ":"
                && word_ci_string_at (cur + 2) = "/"
                && word_ci_string_at (cur + 3) = "/"
                then (
                  Some `URL
                ) else if cur >= 2
                       && word_ci_string_at (cur - 2) = "]"
                       && word_ci_string_at (cur - 1) = "("
                then (
                  Some `Markdown
                ) else if cur >= 2
                       && word_ci_string_at (cur - 2) = "["
                       && word_ci_string_at (cur - 1) = "["
                then (
                  Some `Wiki
                ) else (
                  None
                )
              in
              match link_typ with
              | Some link_typ -> (
                  aux (`In_link link_typ) ~acc ~buf:((pos, word) :: buf) (cur + 1)
                )
              | None -> (
                  aux `Scanning ~acc ~buf (cur + 1)
                )
            )
          | `In_link link_typ -> (
              let link_ended =
                String.length word = 0
                || Parser_components.is_space word.[0]
                || List.mem word
                  [ "]"
                  ; ")"
                  ; "|"
                  ; "\""
                  ; "<"
                  ; ">"
                  ; "{"
                  ; "}"
                  ; "^"
                  ; "\\"
                  ]
              in
              if link_ended then (
                let acc, buf = flush_buf link_typ ~acc ~buf in
                aux `Scanning ~acc ~buf (cur + 1)
              ) else (
                aux state ~acc ~buf:((pos, word) :: buf) (cur + 1)
              )
            )
        )
      in
      aux `Scanning ~acc:[] ~buf:[] 0
    in
    let lines_with (mode : [ `Any_of | `All_of ]) l =
      let line_nums_by_word =
        l
        |> List.to_seq
        |> Seq.filter_map Word_db.id_of_word
        |> Seq.map (fun word_id ->
            Int_map.find_opt word_id t.pos_s_of_word
            |> Option.value ~default:Int_set.empty
            |> Int_set.to_seq
            |> Seq.fold_left (fun acc pos ->
                let loc = Int_map.find pos t.loc_of_pos in
                let line_loc = Loc.line_loc loc in
                Int_set.add line_loc.global_line_num acc
              )
              Int_set.empty
          )
      in
      line_nums_by_word
      |> Seq.fold_left
        (match mode with
         | `Any_of -> (fun acc s ->
             let acc = Option.value ~default:Int_set.empty acc in
             Some (Int_set.union acc s)
           )
         | `All_of -> (fun acc s ->
             match acc with
             | None -> Some s
             | Some acc -> Some (Int_set.inter acc s)
           )
        )
        None
      |> Option.value ~default:Int_set.empty
    in
    let url_line_candidates =
      Int_set.inter
        (lines_with `Any_of [ "http"; "https"; "file" ])
        (lines_with `All_of [ ":"; "/" ])
    in
    let markdown_and_wiki_line_candidates =
      lines_with `All_of [ "["; "]" ]
    in
    let line_candidates =
      Int_set.union
        url_line_candidates
        markdown_and_wiki_line_candidates
      |> Int_set.to_seq
    in
    let rec aux acc line_nums =
      match line_nums () with
      | Seq.Nil -> List.rev acc
      | Seq.Cons (cur, rest) -> (
          let start, end_inc = Int_map.find cur t.start_end_inc_pos_of_global_line_num in
          let links =
            OSeq.(start -- end_inc)
            |> Seq.map (fun pos -> (pos, Int_map.find pos t.word_of_pos))
            |> Seq.map (fun (pos, word_id) -> (pos, Word_db.word_of_id word_id))
            |> Dynarray.of_seq
            |> process_line
          in
          aux (links @ acc) rest
        )
    in
    aux [] line_candidates
    |> Array.of_list

  let of_seq pool (s : (Line_loc.t * string) Seq.t) : t =
    let indices =
      s
      |> Seq.map (fun (line_loc, s) -> (line_loc, Misc_utils.sanitize_string s))
      |> words_of_lines
      |> chunks_of_words
      |> List.of_seq
      |> Task_pool.map_list pool of_chunk
    in
    let res =
      List.fold_left (fun acc index ->
          union acc index
        )
        (make ())
        indices
    in
    let links = extract_links res in
    { res with links }

  let of_lines pool (s : string Seq.t) : t =
    s
    |> Seq.mapi (fun global_line_num line ->
        ({ Line_loc.page_num = 0; line_num_in_page = global_line_num; global_line_num }, line)
      )
    |> of_seq pool

  let of_pages pool (s : string list Seq.t) : t =
    s
    |> Seq.mapi (fun page_num page ->
        (page_num, page)
      )
    |> Seq.flat_map (fun (page_num, page) ->
        match page with
        | [] -> (
            let empty_line = ({ Line_loc.page_num; line_num_in_page = 0; global_line_num = 0 }, "") in
            Seq.return empty_line
          )
        | _ -> (
            List.to_seq page
            |> Seq.mapi (fun line_num_in_page line ->
                ({ Line_loc.page_num; line_num_in_page; global_line_num = 0 }, line)
              )
          )
      )
    |> Seq.mapi (fun global_line_num ((line_loc : Line_loc.t), line) ->
        ({ line_loc with global_line_num }, line)
      )
    |> of_seq pool

end

module State : sig
  val add_word_id_doc_id_link : word_id:int -> doc_id:int64 -> unit

  val read_from_db : unit -> unit

  val union_doc_ids_of_word_id_into_bv : word_id:int -> into:CCBV.t -> unit
end = struct
  type t = {
    lock : Eio.Mutex.t;
    doc_ids_of_word_id : (int, CCBV.t) Hashtbl.t;
  }

  let t : t =
    {
      lock = Eio.Mutex.create ();
      doc_ids_of_word_id = Hashtbl.create 100_000;
    }

  let lock : type a. (unit -> a) -> a =
    fun f ->
    Eio.Mutex.use_rw ~protect:true t.lock f

  let find_doc_ids_bv ~word_id =
    match Hashtbl.find_opt t.doc_ids_of_word_id word_id with
    | Some doc_ids -> doc_ids
    | None -> (
        let bv = CCBV.empty () in
        Hashtbl.replace t.doc_ids_of_word_id word_id bv;
        bv
      )

  let union_doc_ids_of_word_id_into_bv ~word_id ~into =
    lock (fun () ->
        let bv = find_doc_ids_bv ~word_id in
        CCBV.union_into ~into bv
      )

  let add_word_id_doc_id_link ~word_id ~doc_id =
    lock (fun () ->
        let doc_ids = find_doc_ids_bv ~word_id in
        CCBV.set doc_ids (Int64.to_int doc_id)
      )

  let read_from_db () : unit =
    let open Sqlite3_utils in
    lock (fun () ->
        with_db (fun db ->
            iter_stmt ~db
              {|
  SELECT word_id, doc_id
  FROM word_id_doc_id_link
  |}
              ~names:[]
              (fun data ->
                 let word_id = Data.to_int_exn data.(0) in
                 let doc_id = Data.to_int_exn data.(1) in
                 let doc_ids = find_doc_ids_bv ~word_id in
                 CCBV.set doc_ids doc_id
              )
          )
      )
end

let now_int64 () =
  Timedesc.Timestamp.now ()
  |> Timedesc.Timestamp.get_s

let refresh_last_used_batch (doc_ids : int64 list) : unit =
  let open Sqlite3_utils in
  let now = now_int64 () in
  with_db (fun db ->
      step_stmt ~db "BEGIN IMMEDIATE" ignore;
      List.iter (fun doc_id ->
          step_stmt ~db
            {|
  UPDATE doc_info
  SET last_used = @now
  WHERE id = @doc_id
  |}
            ~names:[ ("@doc_id", INT doc_id)
                   ; ("@now", INT now)
                   ]
            ignore;
        )
        doc_ids;
      step_stmt ~db "COMMIT" ignore;
    )

let document_count () : int =
  let open Sqlite3_utils in
  with_db (fun db ->
      step_stmt ~db "SELECT COUNT(1) FROM doc_info"
        (fun stmt ->
           Int64.to_int (column_int64 stmt 0)
        )
    )

let prune_old_documents ~keep_n_latest : unit =
  let open Sqlite3_utils in
  with_db (fun db ->
      step_stmt ~db "BEGIN IMMEDIATE" ignore;
      step_stmt ~db "DROP TABLE IF EXISTS temp.docs_to_drop" ignore;
      step_stmt ~db "CREATE TEMP TABLE docs_to_drop (hash TEXT, id INTEGER)" ignore;
      step_stmt ~db
        {|
    INSERT INTO temp.docs_to_drop
    SELECT hash, id
    FROM doc_info
    ORDER BY last_used DESC
    LIMIT -1
    OFFSET @offset
    |}
        ~names:[("@offset", INT (Int64.of_int keep_n_latest))]
        ignore;
      let drop_based_on_doc_id ?(id_column = "doc_id") table =
        step_stmt ~db
          (Fmt.str
             {|
      DELETE FROM %s
      WHERE EXISTS (
        SELECT 1 FROM temp.docs_to_drop WHERE %s.%s = temp.docs_to_drop.id
      )
      |}
             table
             table
             id_column
          )
          ignore
      in
      drop_based_on_doc_id ~id_column:"id" "doc_info";
      drop_based_on_doc_id "line_info";
      drop_based_on_doc_id "page_info";
      drop_based_on_doc_id "position";
      drop_based_on_doc_id "word_id_doc_id_link";
      step_stmt ~db "DROP TABLE temp.docs_to_drop" ignore;
      step_stmt ~db "COMMIT" ignore;
    )

let write_raw_to_db db ~already_in_transaction ~doc_id (x : Raw.t) : unit =
  let open Sqlite3_utils in
  let now = now_int64 () in
  with_db ~db (fun db ->
      step_stmt ~db
        {|
  UPDATE doc_info
  SET page_count = @page_count,
      global_line_count = @global_line_count,
      max_pos = @max_pos,
      last_used = @now,
      status = 'ONGOING'
  WHERE
      id = @doc_id
  |}
        ~names:[ ("@doc_id", INT doc_id)
               ; ("@page_count", INT (Int64.of_int x.page_count))
               ; ("@global_line_count", INT (Int64.of_int x.global_line_count))
               ; ("@max_pos", INT (Int64.of_int (Int_map.max_binding x.word_of_pos |> fst)))
               ; ("@now", INT now)
               ]
        ignore;
      if not already_in_transaction then (
        step_stmt ~db "BEGIN IMMEDIATE" ignore;
      );
      with_stmt ~db
        {|
  INSERT INTO page_info
  (doc_id, page_num, line_count, start_pos, end_inc_pos)
  VALUES
  (@doc_id, @page_num, @line_count, @start_pos, @end_inc_pos)
  ON CONFLICT(doc_id, page_num) DO NOTHING
  |}
        (fun stmt ->
           Int_map.iter (fun page_num line_count ->
               let (start_pos, end_inc_pos) =
                 Int_map.find page_num x.start_end_inc_pos_of_page_num
               in
               bind_names stmt [ ("@doc_id", INT doc_id)
                               ; ("@page_num", INT (Int64.of_int page_num))
                               ; ("@line_count", INT (Int64.of_int line_count))
                               ; ("@start_pos", INT (Int64.of_int start_pos))
                               ; ("@end_inc_pos", INT (Int64.of_int end_inc_pos))
                               ];
               step stmt;
               reset stmt;
             )
             x.line_count_of_page_num
        );
      with_stmt ~db
        {|
  INSERT INTO line_info
  (doc_id, global_line_num, start_pos, end_inc_pos, page_num, line_num_in_page)
  VALUES
  (@doc_id, @global_line_num, @start_pos, @end_inc_pos, @page_num, @line_num_in_page)
  ON CONFLICT(doc_id, global_line_num) DO NOTHING
  |}
        (fun stmt ->
           Int_map.iter (fun line_num line_loc ->
               let (start_pos, end_inc_pos) =
                 Int_map.find line_num x.start_end_inc_pos_of_global_line_num
               in
               let page_num = line_loc.Line_loc.page_num in
               let line_num_in_page = line_loc.Line_loc.line_num_in_page in
               bind_names stmt [ ("@doc_id", INT doc_id)
                               ; ("@global_line_num", INT (Int64.of_int line_num))
                               ; ("@start_pos", INT (Int64.of_int start_pos))
                               ; ("@end_inc_pos", INT (Int64.of_int end_inc_pos))
                               ; ("@page_num", INT (Int64.of_int page_num))
                               ; ("@line_num_in_page", INT (Int64.of_int line_num_in_page))
                               ];
               step stmt;
               reset stmt;
             )
             x.line_loc_of_global_line_num;
        );
      with_stmt ~db
        {|
  INSERT INTO position
  (doc_id, pos, word_id)
  VALUES
  (@doc_id, @pos, @word_id)
  ON CONFLICT(doc_id, pos) DO NOTHING
    |}
        (fun stmt ->
           Int_map.iter (fun word_id pos_s ->
               Int_set.iter (fun pos ->
                   bind_names stmt
                     [ ("@doc_id", INT doc_id)
                     ; ("@pos", INT (Int64.of_int pos))
                     ; ("@word_id", INT (Int64.of_int word_id))
                     ];
                   step stmt;
                   reset stmt;
                 )
                 pos_s
             )
             x.pos_s_of_word
        );
      with_stmt ~db
        {|
  INSERT INTO link
  (doc_id, start_pos, end_inc_pos, typ, link)
  VALUES
  (@doc_id, @start_pos, @end_inc_pos, @typ, @link)
  ON CONFLICT(doc_id, start_pos, end_inc_pos) DO NOTHING
    |}
        (fun stmt ->
           Array.iter (fun link ->
               let { Link.start_pos; end_inc_pos; typ; link } = link in
               let typ = Link.string_of_typ typ in
               bind_names stmt
                 [ ("@doc_id", INT doc_id)
                 ; ("@start_pos", INT (Int64.of_int start_pos))
                 ; ("@end_inc_pos", INT (Int64.of_int end_inc_pos))
                 ; ("@typ", TEXT typ)
                 ; ("@link", TEXT link)
                 ];
               step stmt;
               reset stmt;
             )
             x.links
        );
      with_stmt ~db
        {|
  INSERT INTO word_id_doc_id_link
  (word_id, doc_id)
  VALUES
  (@word_id, @doc_id)
  ON CONFLICT(word_id, doc_id) DO NOTHING
    |}
        (fun stmt ->
           Int_map.iter (fun word_id _pos_s ->
               State.add_word_id_doc_id_link ~word_id ~doc_id;
               bind_names stmt
                 [ ("@word_id", INT (Int64.of_int word_id))
                 ; ("@doc_id", INT doc_id)
                 ];
               step stmt;
               reset stmt;
             )
             x.pos_s_of_word
        );
      step_stmt ~db
        {|
      UPDATE doc_info
      SET status = 'COMPLETED'
      WHERE id = @doc_id
    |}
        ~names:[ ("@doc_id", INT doc_id) ]
        ignore;
      if not already_in_transaction then (
        step_stmt ~db "COMMIT" ignore;
      );
    )

let global_line_count =
  let open Sqlite3_utils in
  fun ~doc_id ->
    step_stmt
      {|
    SELECT global_line_count FROM doc_info
    WHERE id = @doc_id
    |}
      ~names:[ ("@doc_id", INT doc_id) ]
      (fun stmt ->
         column_int stmt 0
      )

let page_count ~doc_id =
  let open Sqlite3_utils in
  step_stmt
    {|
    SELECT page_count FROM doc_info
    WHERE id = @doc_id
    |}
    ~names:[("@doc_id", INT doc_id)]
    (fun stmt ->
       column_int stmt 0
    )

let ccvector_of_int_map
  : 'a . 'a Int_map.t -> 'a CCVector.ro_vector =
  fun m ->
  Int_map.to_seq m
  |> Seq.map snd
  |> CCVector.of_seq
  |> CCVector.freeze

let is_indexed_sql =
  {|
    SELECT 1
    FROM doc_info
    WHERE hash = @doc_hash
    AND status = 'COMPLETED'
    |}

let is_indexed ~doc_hash =
  let open Sqlite3_utils in
  step_stmt
    is_indexed_sql
    ~names:[ ("@doc_hash", TEXT doc_hash) ]
    (fun stmt ->
       data_count stmt > 0
    )

let word_of_pos ~doc_id pos : string =
  let open Sqlite3_utils in
  step_stmt
    {|
    SELECT word.word
    FROM position p
    JOIN word
        ON word.id = p.word_id
    WHERE p.doc_id = @doc_id
    AND p.pos = @pos
    |}
    ~names:[ ("@doc_id", INT doc_id)
           ; ("@pos", INT (Int64.of_int pos)) ]
    (fun stmt ->
       column_text stmt 0
    )

let word_ci_of_pos ~doc_id pos : string =
  word_of_pos ~doc_id pos
  |> String.lowercase_ascii

let words_between_start_and_end_inc : doc_id:int64 -> int * int -> string Dynarray.t =
  let lock = Eio.Mutex.create () in
  let cache =
    CCCache.lru ~eq:(fun (x0, y0, z0) (x1, y1, z1) ->
        Int64.equal x0 x1
        && Int.equal y0 y1
        && Int.equal z0 z1
      )
      10240
  in
  fun ~doc_id (start, end_inc) ->
    Eio.Mutex.use_rw ~protect:false lock (fun () ->
        CCCache.with_cache cache (fun (doc_id, start, end_inc) ->
            let open Sqlite3_utils in
            let acc = Dynarray.create () in
            iter_stmt
              {|
    SELECT word.word
    FROM position p
    JOIN word
      ON word.id = p.word_id
    WHERE p.doc_id = @doc_id
    AND p.pos BETWEEN @start AND @end_inc
    ORDER BY p.pos
    |}
              ~names:[ ("@doc_id", INT doc_id)
                     ; ("@start", INT (Int64.of_int start))
                     ; ("@end_inc", INT (Int64.of_int end_inc))
                     ]
              (fun data ->
                 Dynarray.add_last acc (Data.to_string_exn data.(0))
              );
            acc
          )
          (doc_id, start, end_inc)
      )

let words_of_global_line_num : doc_id:int64 -> int -> string Dynarray.t =
  let lock = Eio.Mutex.create () in
  let cache =
    CCCache.lru ~eq:(fun (x0, y0) (x1, y1) ->
        Int64.equal x0 x1 && Int.equal y0 y1)
      10240
  in
  fun ~doc_id x ->
    Eio.Mutex.use_rw ~protect:false lock (fun () ->
        CCCache.with_cache cache (fun (doc_id, x) ->
            let open Sqlite3_utils in
            if x >= global_line_count ~doc_id then (
              invalid_arg "Index.words_of_global_line_num: global_line_num out of range"
            ) else (
              let start, end_inc =
                step_stmt
                  {|
        SELECT start_pos, end_inc_pos
        FROM line_info
        WHERE doc_id = @doc_id
        AND global_line_num = @x
        |}
                  ~names:[ ("@doc_id", INT doc_id)
                         ; ("@x", INT (Int64.of_int x))
                         ]
                  (fun stmt ->
                     (column_int stmt 0, column_int stmt 1)
                  )
              in
              words_between_start_and_end_inc ~doc_id (start, end_inc)
            )
          )
          (doc_id, x)
      )

let words_of_page_num ~doc_id x : string Dynarray.t =
  let open Sqlite3_utils in
  if x >= page_count ~doc_id then (
    invalid_arg "Index.words_of_page_num: page_num out of range"
  ) else (
    let start, end_inc =
      step_stmt
        {|
        SELECT start_pos, end_inc_pos
        FROM page_info
        WHERE doc_id = @doc_id
        AND page_num = @x
        |}
        ~names:[ ("@doc_id", INT doc_id)
               ; ("@x", INT (Int64.of_int x))
               ]
        (fun stmt ->
           (column_int stmt 0, column_int stmt 1)
        )
    in
    words_between_start_and_end_inc ~doc_id (start, end_inc)
  )

let line_of_global_line_num ~doc_id x =
  if x >= global_line_count ~doc_id then (
    invalid_arg "Index.line_of_global_line_num: global_line_num out of range"
  ) else (
    words_of_global_line_num ~doc_id x
    |> Dynarray.to_list
    |> String.concat ""
  )

let line_loc_of_global_line_num ~doc_id global_line_num : Line_loc.t =
  let open Sqlite3_utils in
  if global_line_num >= global_line_count ~doc_id then (
    invalid_arg "Index.line_loc_of_global_line_num: global_line_num out of range"
  ) else (
    let page_num, line_num_in_page =
      step_stmt
        {|
        SELECT page_num, line_num_in_page
        FROM line_info
        WHERE doc_id = @doc_id
        AND global_line_num = @global_line_num
        |}
        ~names:[ ("@doc_id", INT doc_id)
               ; ("@global_line_num", INT (Int64.of_int global_line_num)) ]
        (fun stmt ->
           (column_int stmt 0, column_int stmt 1)
        )
    in
    { Line_loc.page_num; line_num_in_page; global_line_num }
  )

let loc_of_pos ~doc_id pos : Loc.t =
  let open Sqlite3_utils in
  let pos_in_line, global_line_num =
    step_stmt
      {|
      SELECT @pos - start_pos, global_line_num
      FROM line_info
      WHERE doc_id = @doc_id
      AND @pos BETWEEN start_pos AND end_inc_pos
      |}
      ~names:[ ("@doc_id", INT doc_id)
             ; ("@pos", INT (Int64.of_int pos)) ]
      (fun stmt ->
         (column_int stmt 0, column_int stmt 1)
      )
  in
  let line_loc = line_loc_of_global_line_num ~doc_id global_line_num in
  { line_loc; pos_in_line }

let max_pos ~doc_id =
  let open Sqlite3_utils in
  step_stmt
    {|
    SELECT max_pos
    FROM doc_info
    WHERE id = @doc_id
    |}
    ~names:[ ("@doc_id", INT doc_id) ]
    (fun stmt ->
       column_int stmt 0
    )

let line_count_of_page_num ~doc_id page : int =
  let open Sqlite3_utils in
  step_stmt
    {|
    SELECT line_count
    FROM page_info
    WHERE doc_id = @doc_id
    AND page = @page
    |}
    ~names:[ ("@doc_id", INT doc_id)
           ; ("@page", INT (Int64.of_int page)) ]
    (fun stmt ->
       column_int stmt 0
    )

let start_end_inc_pos_of_global_line_num ~doc_id global_line_num =
  let open Sqlite3_utils in
  if global_line_num >= global_line_count ~doc_id then (
    invalid_arg "Index.start_end_inc_pos_of_global_line_num: global_line_num out of range"
  ) else (
    step_stmt
      {|
      SELECT start_pos, end_inc_pos
      FROM line_info
      WHERE doc_id = @doc_id
      AND global_line_num = @global_line_num
      |}
      ~names:[ ("@doc_id", INT doc_id)
             ; ("@global_line_num", INT (Int64.of_int global_line_num)) ]
      (fun stmt ->
         (column_int stmt 0, column_int stmt 1)
      )
  )

module Search = struct
  module ET = Search_phrase.Enriched_token

  let positions_of_words
      ~doc_id
      (words : int Seq.t)
    : int Dynarray.t =
    let open Sqlite3_utils in
    let acc = Dynarray.create () in
    let f data =
      Dynarray.add_last acc (Data.to_int_exn data.(0))
    in
    with_stmt
      {|
    SELECT
      p.pos
    FROM position p
    WHERE doc_id = @doc_id
    AND word_id = @word_id
    ORDER BY p.pos
    |}
      (fun stmt ->
         Seq.iter (fun word_id ->
             bind_names stmt [ ("@doc_id", INT doc_id)
                             ; ("@word_id", INT (Int64.of_int word_id))
                             ];
             Rc.check (iter stmt ~f);
             reset stmt;
           )
           words
      );
    acc

  let usable_positions
      ~doc_id
      ?within
      ~around_pos
      (token : Search_phrase.Enriched_token.t)
    : int Seq.t =
    let open Sqlite3_utils in
    Eio.Fiber.yield ();
    let match_typ = ET.match_typ token in
    let start, end_inc =
      let start, end_inc =
        if ET.is_linked_to_prev token then (
          match match_typ with
          | `Fuzzy ->
            (around_pos - !Params.max_linked_token_search_dist,
             around_pos + !Params.max_linked_token_search_dist)
          | `Exact | `Prefix | `Suffix ->
            (around_pos + 1,
             around_pos + 1)
        ) else (
          (around_pos - !Params.max_token_search_dist,
           around_pos + !Params.max_token_search_dist)
        )
      in
      match within with
      | None -> (start, end_inc)
      | Some (within_start_pos, within_end_inc_pos) -> (
          (max within_start_pos start, min within_end_inc_pos end_inc)
        )
    in
    let positions : int Dynarray.t =
      let acc : int Dynarray.t =
        Dynarray.create ()
      in
      let cache : (string, bool) Hashtbl.t = Hashtbl.create 100 in
      let f data =
        Eio.Fiber.yield ();
        let indexed_word = Data.to_string_exn data.(0) in
        let pos = Data.to_int_exn data.(1) in
        let compatible =
          match Hashtbl.find_opt cache indexed_word with
          | None -> (
              let compatible = ET.compatible_with_word token indexed_word in
              Hashtbl.replace cache indexed_word compatible;
              compatible
            )
          | Some compatible -> compatible
        in
        if compatible then (
          Dynarray.add_last acc pos
        )
      in
      (
        let extra_sql =
          match ET.data token with
          | `Explicit_spaces -> (
              {|AND (
                  word LIKE ' %'
                  OR
                  word LIKE char(9) || '%'
                  OR
                  word LIKE char(10) || '%'
                  OR
                  word LIKE char(13) || '%'
                )
            |}
            )
          | `String search_word -> (
              let search_word = search_word
                |> CCString.replace ~sub:"'" ~by:"''"
                |> CCString.replace ~sub:"\\" ~by:"\\\\"
                |> CCString.replace ~sub:"%" ~by:"\\%"
              in
              match match_typ with
              | `Fuzzy | `Suffix -> ""
              | `Exact -> (
                  Fmt.str "AND word LIKE '%s' ESCAPE '\\'" search_word
                )
              | `Prefix -> (
                  Fmt.str "AND word LIKE '%s%%' ESCAPE '\\'" search_word
                )
            )
        in
        iter_stmt
          (Fmt.str
             {|
              SELECT
                word.word AS word,
                p.pos as pos
              FROM position p
              JOIN word
                  ON p.word_id = word.id
              WHERE p.doc_id = @doc_id
              AND p.pos BETWEEN @start AND @end_inc
              %s
              |}
             extra_sql)
          ~names:[ ("@doc_id", INT doc_id)
                 ; ("@start", INT (Int64.of_int start))
                 ; ("@end_inc", INT (Int64.of_int end_inc))
                 ]
          f
      );
      acc
    in
    Dynarray.to_seq positions

  let search_around_pos
      ~doc_id
      ~(within : (int * int) option)
      (around_pos : int)
      (l : Search_phrase.Enriched_token.t list)
    : int list Seq.t =
    let rec aux around_pos l =
      Eio.Fiber.yield ();
      match l with
      | [] -> Seq.return []
      | token :: rest -> (
          usable_positions
            ~doc_id
            ?within
            ~around_pos
            token
          |> Seq.flat_map (fun pos ->
              aux pos rest
              |> Seq.map (fun l -> pos :: l)
            )
        )
    in
    aux around_pos l

  let search_result_heap_merge_with_yield x y =
    Eio.Fiber.yield ();
    Search_result_heap.merge x y

  module Search_job = struct
    exception Result_found

    type t = {
      stop_signal : Stop_signal.t;
      terminate_on_result_found : bool;
      cancellation_notifier : bool Atomic.t;
      doc_id : int64;
      within_same_line : bool;
      phrase : Search_phrase.t;
      start_pos : int;
      search_limit_per_start : int;
    }

    let make
        stop_signal
        ~terminate_on_result_found
        ~cancellation_notifier
        ~doc_id
        ~within_same_line
        ~phrase
        ~start_pos
        ~search_limit_per_start
      =
      {
        stop_signal;
        terminate_on_result_found;
        cancellation_notifier;
        doc_id;
        within_same_line;
        phrase;
        start_pos;
        search_limit_per_start;
      }

    let run (t : t) : Search_result_heap.t =
      match Search_phrase.enriched_tokens t.phrase with
      | [] -> Search_result_heap.empty
      | _ :: rest -> (
          let doc_id = t.doc_id in
          let within =
            if t.within_same_line then (
              let loc = loc_of_pos ~doc_id t.start_pos in
              Some (start_end_inc_pos_of_global_line_num ~doc_id loc.line_loc.global_line_num)
            ) else (
              None
            )
          in
          Eio.Fiber.first
            (fun () ->
               Stop_signal.await t.stop_signal;
               Atomic.set t.cancellation_notifier true;
               Search_result_heap.empty)
            (fun () ->
               search_around_pos
                 ~doc_id
                 ~within
                 t.start_pos
                 rest
               |> Seq.map (fun l -> t.start_pos :: l)
               |> Seq.map (fun (l : int list) ->
                   if t.terminate_on_result_found then (
                     raise Result_found
                   );
                   Eio.Fiber.yield ();
                   let opening_closing_symbol_pairs =
                     List.map (fun pos -> word_of_pos ~doc_id pos) l
                     |>  Misc_utils.opening_closing_symbol_pairs
                   in
                   let found_phrase_opening_closing_symbol_match_count =
                     let pos_arr : int array = Array.of_list l in
                     List.fold_left (fun total (x, y) ->
                         let pos_x = pos_arr.(x) in
                         let pos_y = pos_arr.(y) in
                         let c_x = String.get (word_of_pos ~doc_id pos_x) 0 in
                         let c_y = String.get (word_of_pos ~doc_id pos_y) 0 in
                         assert (List.exists (fun (x, y) -> c_x = x && c_y = y)
                                   Params.opening_closing_symbols);
                         if pos_x < pos_y then (
                           let outstanding_opening_symbol_count =
                             OSeq.(pos_x + 1 --^ pos_y)
                             |> Seq.fold_left (fun count pos ->
                                 match count with
                                 | Some count -> (
                                     let word = word_of_pos ~doc_id pos in
                                     if String.length word = 1 then (
                                       if String.get word 0 = c_x then (
                                         Some (count + 1)
                                       ) else if String.get word 0 = c_y then (
                                         if count = 0 then (
                                           None
                                         ) else (
                                           Some (count - 1)
                                         )
                                       ) else (
                                         Some count
                                       )
                                     ) else (
                                       Some count
                                     )
                                   )
                                 | None -> None
                               )
                               (Some 0)
                           in
                           match outstanding_opening_symbol_count with
                           | Some 0 -> total + 1
                           | _ -> total
                         ) else (
                           total
                         )
                       )
                       0
                       opening_closing_symbol_pairs
                   in
                   Search_result.make
                     t.phrase
                     ~found_phrase:(List.map
                                      (fun pos ->
                                         Search_result.{
                                           found_word_pos = pos;
                                           found_word_ci = word_ci_of_pos ~doc_id pos;
                                           found_word = word_of_pos ~doc_id pos;
                                         }) l)
                     ~found_phrase_opening_closing_symbol_match_count
                 )
               |> Seq.fold_left (fun best_results r ->
                   Eio.Fiber.yield ();
                   let best_results = Search_result_heap.add best_results r in
                   if Search_result_heap.size best_results <= t.search_limit_per_start then (
                     best_results
                   ) else (
                     let best_results, _ = Search_result_heap.take_exn best_results in
                     best_results
                   )
                 )
                 Search_result_heap.empty
            )
        )
  end

  module Search_job_group = struct
    type t = {
      terminate_on_result_found : bool;
      stop_signal : Stop_signal.t;
      cancellation_notifier : bool Atomic.t;
      doc_id : int64;
      within_same_line : bool;
      phrase : Search_phrase.t;
      possible_start_pos_list : int list;
      search_limit_per_start : int;
    }

    let unpack (group : t) : Search_job.t Seq.t =
      let
        {
          stop_signal;
          terminate_on_result_found;
          cancellation_notifier;
          doc_id;
          within_same_line;
          phrase;
          possible_start_pos_list;
          search_limit_per_start;
        } = group in
      List.to_seq possible_start_pos_list
      |> Seq.map (fun start_pos ->
          Search_job.make
            stop_signal
            ~terminate_on_result_found
            ~cancellation_notifier
            ~doc_id
            ~within_same_line
            ~phrase
            ~start_pos
            ~search_limit_per_start
        )

    let run (t : t) =
      unpack t
      |> Seq.map Search_job.run
      |> Seq.fold_left Search_result_heap.merge Search_result_heap.empty
  end

  let make_search_job_groups
      stop_signal
      ?(terminate_on_result_found = false)
      ~(cancellation_notifier : bool Atomic.t)
      ~doc_id
      ~doc_word_ids
      ~global_first_word_candidates_lookup
      ~within_same_line
      ~(search_scope : Diet.Int.t option)
      (exp : Search_exp.t)
    : Search_job_group.t Seq.t =
    if Search_exp.is_empty exp then (
      Seq.empty
    ) else (
      Search_exp.flattened exp
      |> List.to_seq
      |> Seq.flat_map (fun phrase ->
          let first_word_candidates =
            match Search_phrase.enriched_tokens phrase with
            | [] -> failwith "unexpected case"
            | first_word :: _ -> (
                Search_phrase.Enriched_token.Data_map.find
                  (Search_phrase.Enriched_token.data first_word)
                  global_first_word_candidates_lookup
                |> Int_set.inter doc_word_ids
              )
          in
          let possible_starts =
            first_word_candidates
            |> Int_set.to_seq
            |> positions_of_words ~doc_id
            |> (fun arr ->
                match search_scope with
                | None -> arr
                | Some search_scope -> (
                    Dynarray.filter (fun x ->
                        Diet.Int.mem x search_scope
                      ) arr
                  )
              )
          in
          let possible_start_count = Dynarray.length possible_starts in
          if possible_start_count = 0 then (
            Seq.empty
          ) else (
            let search_limit_per_start =
              max
                Params.search_result_min_per_start
                (
                  (Params.default_search_result_total_per_document + possible_start_count - 1) / possible_start_count
                )
            in
            let search_chunk_size =
              max 10 (possible_start_count / Task_pool.size)
            in
            OSeq.(0 --^ possible_start_count)
            |> OSeq.chunks search_chunk_size
            |> Seq.map (fun index_arr ->
                Array.map (fun i ->
                    Dynarray.get possible_starts i
                  ) index_arr
                |> Array.to_list
              )
            |> Seq.map (fun possible_start_pos_list ->
                {
                  Search_job_group.stop_signal;
                  terminate_on_result_found;
                  cancellation_notifier;
                  doc_id;
                  within_same_line;
                  phrase;
                  possible_start_pos_list;
                  search_limit_per_start;
                }
              )
          )
        )
    )

  let search
      pool
      stop_signal
      ?terminate_on_result_found
      ~cancellation_notifier
      ~doc_id
      ~doc_word_ids
      ~global_first_word_candidates_lookup
      ~within_same_line
      ~search_scope
      (exp : Search_exp.t)
    : Search_result_heap.t =
    make_search_job_groups
      stop_signal
      ?terminate_on_result_found
      ~cancellation_notifier
      ~doc_id
      ~doc_word_ids
      ~global_first_word_candidates_lookup
      ~within_same_line
      ~search_scope
      exp
    |> List.of_seq
    |> Task_pool.map_list pool Search_job_group.run
    |> List.fold_left search_result_heap_merge_with_yield Search_result_heap.empty
end

let search
    pool
    stop_signal
    ?terminate_on_result_found
    ~doc_id
    ~doc_word_ids
    ~global_first_word_candidates_lookup
    ~within_same_line
    ~search_scope
    (exp : Search_exp.t)
  : Search_result.t array option =
  let cancellation_notifier = Atomic.make false in
  let arr =
    Search.search
      pool
      stop_signal
      ?terminate_on_result_found
      ~cancellation_notifier
      ~doc_id
      ~doc_word_ids
      ~global_first_word_candidates_lookup
      ~within_same_line
      ~search_scope
      exp
    |> Search_result_heap.to_seq
    |> Array.of_seq
  in
  if Atomic.get cancellation_notifier then (
    None
  ) else (
    Array.sort Search_result.compare_relevance arr;
    Some arr
  )

module Search_job = Search.Search_job

module Search_job_group = Search.Search_job_group

let make_search_job_groups = Search.make_search_job_groups

module Word_candidate_heap = CCHeap.Make_from_compare (struct
    type t = int * float

    let compare (_x0, y0) (_x1, y1) = Float.compare y0 y1
  end)

let generate_global_first_word_candidates_lookup
    pool
    ?(acc = Search_phrase.Enriched_token.Data_map.empty)
    (exp : Search_exp.t)
  : Int_set.t Search_phrase.Enriched_token.Data_map.t =
  Search_exp.flattened exp
  |> List.fold_left (fun acc phrase ->
      match Search_phrase.enriched_tokens phrase with
      | [] -> failwith "unexpected case"
      | first_word :: _rest -> (
          let data = Search_phrase.Enriched_token.data first_word in
          match Search_phrase.Enriched_token.Data_map.find_opt data acc with
          | None -> (
              let score ~search_word ~found_word =
                let search_word_ci = String.lowercase_ascii search_word in
                let search_word_len = Int.to_float (String.length search_word) in
                let found_word_ci = String.lowercase_ascii found_word in
                let found_word_len = Int.to_float (String.length found_word) in
                if String.equal search_word found_word then (
                  1.0
                ) else if String.equal search_word_ci found_word_ci then (
                  0.9
                ) else if CCString.find ~sub:search_word found_word >= 0 then (
                  search_word_len
                  /.
                  found_word_len
                ) else if CCString.find ~sub:search_word_ci found_word_ci >= 0 then (
                  0.9
                  *.
                  (search_word_len
                   /.
                   found_word_len)
                ) else (
                  1.0
                  -.
                  (Int.to_float (Spelll.edit_distance search_word_ci found_word_ci)
                   /.
                   search_word_len
                  )
                )
              in
              let candidates =
                Word_db.filter
                  pool
                  (Search_phrase.Enriched_token.compatible_with_word first_word)
              in
              let candidates =
                match data with
                | `Explicit_spaces -> (
                    Dynarray.fold_left (fun acc (id, _word) ->
                        Int_set.add id acc
                      )
                      Int_set.empty
                      candidates
                  )
                | `String s -> (
                    let s_len = Int.to_float (String.length s) in
                    let max_candidate_count = min 500 (Int.of_float @@ Float.pow 2.5 s_len) in
                    Dynarray.fold_left (fun acc (id, word) ->
                        let acc = Word_candidate_heap.add acc (id, score ~search_word:s ~found_word:word) in
                        if Word_candidate_heap.size acc <= max_candidate_count then (
                          acc
                        ) else (
                          let acc, _ = Word_candidate_heap.take_exn acc in
                          acc
                        )
                      )
                      Word_candidate_heap.empty
                      candidates
                    |> Word_candidate_heap.to_seq
                    |> Seq.map (fun (id, _score) ->
                        id
                      )
                    |> Int_set.of_seq
                  )
              in
              Search_phrase.Enriched_token.Data_map.add
                data
                candidates
                acc
            )
          | Some _ -> acc
        )
    )
    acc

let word_ids ~doc_id =
  let open Sqlite3_utils in
  with_db (fun db ->
      fold_stmt ~db
        {|
    SELECT word_id_doc_id_link.word_id
    FROM word_id_doc_id_link
    WHERE word_id_doc_id_link.doc_id = @doc_id
    |}
        ~names:[ ("@doc_id", INT doc_id) ]
        (fun acc data ->
           Int_set.add (Data.to_int_exn data.(0)) acc
        )
        Int_set.empty
    )

let links ~doc_id : Link.t array =
  let open Sqlite3_utils in
  let acc = Dynarray.create () in
  with_db (fun db ->
      iter_stmt ~db
        {|
    SELECT start_pos, end_inc_pos, typ, link
    FROM link
    WHERE link.doc_id = @doc_id
    |}
        ~names:[ ("@doc_id", INT doc_id) ]
        (fun data ->
           let start_pos = Data.to_int_exn data.(0) in
           let end_inc_pos = Data.to_int_exn data.(1) in
           let typ =
             Data.to_string_exn data.(2)
             |> Link.typ_of_string
             |> Option.get
           in
           let link = Data.to_string_exn data.(3) in
           Dynarray.add_last acc { Link.start_pos; end_inc_pos; typ; link }
        );
    );
  Dynarray.to_array acc


================================================
FILE: lib/index.mli
================================================
module Line_loc : sig
  type t

  val page_num : t -> int

  val line_num_in_page : t -> int

  val global_line_num : t -> int

  val compare : t -> t -> int
end

module Loc : sig
  type t

  val line_loc : t -> Line_loc.t

  val pos_in_line : t -> int
end

val word_ci_of_pos : doc_id:int64 -> int -> string

val word_of_pos : doc_id:int64 -> int -> string

val words_of_global_line_num : doc_id:int64 -> int -> string Dynarray.t

val line_of_global_line_num : doc_id:int64 -> int -> string

val line_loc_of_global_line_num : doc_id:int64 -> int -> Line_loc.t

val loc_of_pos : doc_id:int64 -> int -> Loc.t

val max_pos : doc_id:int64 -> int

val words_of_page_num : doc_id:int64 -> int -> string Dynarray.t

val line_count_of_page_num : doc_id:int64 -> int -> int

val generate_global_first_word_candidates_lookup :
  Task_pool.t ->
  ?acc:Int_set.t Search_phrase.Enriched_token.Data_map.t ->
  Search_exp.t ->
  Int_set.t Search_phrase.Enriched_token.Data_map.t

val search :
  Task_pool.t ->
  Stop_signal.t ->
  ?terminate_on_result_found : bool ->
  doc_id:int64 ->
  doc_word_ids:Int_set.t ->
  global_first_word_candidates_lookup:Int_set.t Search_phrase.Enriched_token.Data_map.t ->
  within_same_line:bool ->
  search_scope:Diet.Int.t option ->
  Search_exp.t ->
  Search_result.t array option

module Search_job : sig
  exception Result_found

  type t

  val run : t -> Search_result_heap.t
end

module Search_job_group : sig
  type t

  val unpack : t -> Search_job.t Seq.t

  val run : t -> Search_result_heap.t
end

val make_search_job_groups :
  Stop_signal.t ->
  ?terminate_on_result_found : bool ->
  cancellation_notifier:bool Atomic.t ->
  doc_id:int64 ->
  doc_word_ids:Int_set.t ->
  global_first_word_candidates_lookup:Int_set.t Search_phrase.Enriched_token.Data_map.t ->
  within_same_line:bool ->
  search_scope:Diet.Int.t option ->
  Search_exp.t ->
  Search_job_group.t Seq.t

val global_line_count : doc_id:int64 -> int

val page_count : doc_id:int64 -> int

val is_indexed_sql : string

val is_indexed : doc_hash:string -> bool

val refresh_last_used_batch : int64 list -> unit

val document_count : unit -> int

val prune_old_documents : keep_n_latest:int -> unit

module Raw : sig
  type t

  val word_ids : t -> Int_set.t

  val of_lines : Task_pool.t -> string Seq.t -> t

  val of_pages : Task_pool.t -> string list Seq.t -> t

  val links : t -> Link.t array
end

val word_ids : doc_id:int64 -> Int_set.t

val write_raw_to_db :
  Sqlite3.db ->
  already_in_transaction:bool ->
  doc_id:int64 ->
  Raw.t ->
  unit

module State : sig
  val union_doc_ids_of_word_id_into_bv : word_id:int -> into:CCBV.t -> unit

  val read_from_db : unit -> unit
end

val links : doc_id:int64 -> Link.t array


================================================
FILE: lib/int_map.ml
================================================
include CCMap.Make (Int)


================================================
FILE: lib/int_set.ml
================================================
include CCSet.Make (Int)


================================================
FILE: lib/link.ml
================================================
type typ = [
  | `Markdown
  | `Wiki
  | `URL
]

let string_of_typ (typ : typ) =
  match typ with
  | `Markdown -> "markdown"
  | `Wiki -> "wiki"
  | `URL -> "url"

let typ_of_string (s : string) : typ option =
  match String.lowercase_ascii s with
  | "markdown" -> Some `Markdown
  | "wiki" -> Some `Wiki
  | "url" -> Some `URL
  | _ -> None

type t = {
  start_pos : int;
  end_inc_pos : int;
  typ : typ;
  link : string;
}


================================================
FILE: lib/misc_utils.ml
================================================
let exit_with_error_msg (msg : string) =
  Printf.printf "error: %s\n" msg;
  exit 1

let ci_string_set_of_list (l : string list) =
  l
  |> List.map String.lowercase_ascii
  |> String_set.of_list

let first_n_chars_of_string_contains ~n s c =
  let s_len = String.length s in
  let s =
    if s_len <= n then
      s
    else
      String.sub s 0 n
  in
  String.contains s c

let char_is_usable c =
  let code = Char.code c in
  (0x20 <= code && code <= 0x7E)

let sanitize_string s =
  let s_len = String.length s in
  let bytes = Bytes.make s_len ' ' in
  let rec aux pos =
    if pos >= s_len then
      String.of_bytes bytes
    else (
      let decode = String.get_utf_8_uchar s pos in
      if Uchar.utf_decode_is_valid decode then (
        let c = String.get_uint8 s pos in
        if c land 0b1000_0000 = 0b0000_0000 then (
          if 0x20 <= c && c <= 0x7E then (
            Bytes.blit_string s pos bytes pos 1
          );
          aux (pos+1)
        ) else (
          let len = Uchar.utf_decode_length decode in
          Bytes.blit_string s pos bytes pos len;
          aux (pos+len)
        )
      ) else (
        aux (pos+1)
      )
    )
  in
  aux 0

let length_and_list_of_seq (s : 'a Seq.t) : int * 'a list =
  let len, acc =
    Seq.fold_left (fun (len, acc) x ->
        (len + 1, x :: acc)
      )
      (0, [])
      s
  in
  (len, List.rev acc)

let div_round_to_closest x y =
  (x + (y / 2)) / y

let div_round_up x y =
  (x + (y - 1)) / y

let opening_closing_symbol_pairs (l : string list) : (int * int) list =
  let _, pairs =
    CCList.foldi
      (fun ((m, pairs) : (int list Char_map.t) * ((int * int) list)) i s ->
         if String.length s = 1 then (
           let c = String.get s 0 in
           match List.assoc_opt c Params.opening_closing_symbols with
           | Some _ -> (
               let stack =
                 match Char_map.find_opt c m with
                 | None -> []
                 | Some l -> l
               in
               (Char_map.add c (i :: stack) m, pairs)
             )
           | None -> (
               match List.assoc_opt c Params.opening_closing_symbols_flipped with
               | Some corresponding_open_symbol -> (
                   let stack =
                     match Char_map.find_opt corresponding_open_symbol m with
                     | None -> []
                     | Some l -> l
                   in
                   match stack with
                   | [] -> (m, pairs)
                   | x :: xs -> (
                       (Char_map.add corresponding_open_symbol xs m, (x, i) :: pairs)
                     )
                 )
               | None -> (m, pairs)
             )
         ) else (
           (m, pairs)
         )
      )
      (Char_map.empty, [])
      l
  in
  pairs

let cwd_path_parts () =
  Sys.getcwd ()
  |> CCString.split ~by:Filename.dir_sep
  |> List.rev

let path_of_parts parts =
  match List.rev parts with
  | [] | [ "" ] -> Filename.dir_sep
  | [ x ] -> x
  | l -> String.concat Filename.dir_sep l

let normalize_glob_to_absolute glob =
  let rec aux acc parts =
    match parts with
    | [] -> path_of_parts acc
    | x :: xs -> (
        match x with
        | "" | "." -> aux acc xs
        | ".." -> (
            let acc =
              match acc with
              | [] -> []
              | _ :: xs -> xs
            in
            aux acc xs
          )
        | "**" -> (
            aux (List.rev parts @ acc) []
          )
        | _ -> (
            aux (x :: acc) xs
          )
      )
  in
  let glob_parts = CCString.split ~by:Filename.dir_sep glob in
  match glob_parts with
  | "" :: l -> (
      (* Absolute path on Unix-like systems *)
      aux [ "" ] l
    )
  | _ -> (
      aux (cwd_path_parts ()) glob_parts
    )

let normalize_path_to_absolute path =
  let rec aux acc path_parts =
    match path_parts with
    | [] -> path_of_parts acc
    | x :: xs -> (
        match x with
        | "" | "." -> aux acc xs
        | ".." -> (
            let acc =
              match acc with
              | [] -> []
              | _ :: xs -> xs
            in
            aux acc xs
          )
        | _ -> (
            aux (x :: acc) xs
          )
      )
  in
  let path_parts = CCString.split ~by:Filename.dir_sep path in
  match path_parts with
  | "" :: l -> (
      (* Absolute path on Unix-like systems *)
      aux [ "" ] l
    )
  | _ -> (
      aux (cwd_path_parts ()) path_parts
    )


================================================
FILE: lib/option_syntax.ml
================================================
let ( let* ) = Option.bind

let ( let+ ) x y = Option.map y x


================================================
FILE: lib/params.ml
================================================
let default_search_result_total_per_document = 50

let search_result_min_per_start = 5

let max_token_size = 500

let default_max_token_search_dist = 50

let max_token_search_dist = ref default_max_token_search_dist

let default_max_linked_token_search_dist = 2

let max_linked_token_search_dist = ref default_max_linked_token_search_dist

let default_index_chunk_size = 5000

let index_chunk_size = ref default_index_chunk_size

let search_word_automaton_cache_size = 200

let float_compare_margin = 0.000_001

let opening_closing_symbols = [ ('(', ')')
                              ; ('[', ']')
                              ; ('{', '}')
                              ]

let opening_closing_symbols_flipped = List.map (fun (x, y) -> (y, x)) opening_closing_symbols

let default_max_fuzzy_edit_dist = 2

let max_fuzzy_edit_dist = ref default_max_fuzzy_edit_dist

let db_schema =
  {|
CREATE TABLE IF NOT EXISTS line_info (
  doc_id INTEGER,
  global_line_num INTEGER,
  start_pos INTEGER,
  end_inc_pos INTEGER,
  page_num INTEGER,
  line_num_in_page INTEGER,
  PRIMARY KEY (doc_id, global_line_num)
) WITHOUT ROWID;

CREATE INDEX IF NOT EXISTS line_info_index_1 ON line_info (start_pos);

CREATE INDEX IF NOT EXISTS line_info_index_2 ON line_info (end_inc_pos);

CREATE TABLE IF NOT EXISTS position (
  doc_id INTEGER,
  pos INTEGER,
  word_id INTEGER,
  PRIMARY KEY (doc_id, pos)
) WITHOUT ROWID;

CREATE INDEX IF NOT EXISTS position_index_1 ON position (doc_id, word_id, pos);

CREATE TABLE IF NOT EXISTS page_info (
  doc_id INTEGER,
  page_num INTEGER,
  line_count INTEGER,
  start_pos INTEGER,
  end_inc_pos INTEGER,
  PRIMARY KEY (doc_id, page_num)
) WITHOUT ROWID;

CREATE TABLE IF NOT EXISTS doc_info (
  hash TEXT PRIMARY KEY,
  id INTEGER,
  page_count INTEGER,
  global_line_count INTEGER,
  max_pos INTEGER,
  last_used INTEGER,
  status TEXT
) WITHOUT ROWID;

CREATE INDEX IF NOT EXISTS doc_info_index_1 ON doc_info (id);

CREATE INDEX IF NOT EXISTS doc_info_index_2 ON doc_info (last_used);

CREATE TABLE IF NOT EXISTS link (
  doc_id INTEGER,
  start_pos INTEGER,
  end_inc_pos INTEGER,
  typ TEXT,
  link TEXT,
  PRIMARY KEY (doc_id, start_pos, end_inc_pos)
) WITHOUT ROWID;

CREATE TABLE IF NOT EXISTS word (
  id INTEGER,
  word TEXT,
  PRIMARY KEY (id)
) WITHOUT ROWID;

CREATE INDEX IF NOT EXISTS word_index_1 ON word (word);

CREATE INDEX IF NOT EXISTS word_index_2 ON word (word COLLATE NOCASE);

CREATE TABLE IF NOT EXISTS word_id_doc_id_link (
  word_id INTEGER,
  doc_id INTEGER,
  PRIMARY KEY (word_id, doc_id)
) WITHOUT ROWID;

CREATE INDEX IF NOT EXISTS word_id_doc_id_link_index_1 ON word_id_doc_id_link (doc_id);
  |}

let db_path : string option ref = ref None


================================================
FILE: lib/parser_components.ml
================================================
open Angstrom

let is_space c =
  match c with
  | ' '
  | '\t'
  | '\n'
  | '\r' -> true
  | _ -> false

let skip_spaces = skip_while is_space

let is_not_space c = not (is_space c)

let any_string : string t = take_while (fun _ -> true)

let is_letter c =
  match c with
  | 'A'..'Z'
  | 'a'..'z' -> true
  | _ -> false

let is_digit c =
  match c with
  | '0'..'9' -> true
  | _ -> false

let is_alphanum c =
  is_letter c || is_digit c

let is_possibly_utf_8 c =
  let c = Char.code c in
  c land 0b1000_0000 <> 0b0000_0000

let utf_8_char =
  peek_char >>= fun c ->
  match c with
  | None -> fail "Eof"
  | Some c -> (
      let c = Char.code c in
      if c land 0b1000_0000 = 0b0000_0000 then (
        take 1
      ) else if c land 0b1110_0000 = 0b1100_0000 then (
        take 2
      ) else if c land 0b1111_0000 = 0b1110_0000 then (
        take 3
      ) else if c land 0b1111_1000 = 0b1111_0000 then (
        take 4
      ) else (
        fail "Invalid UTF-8"
      )
    )

(* Copied from Angstrom README. *)
let chainl1 e op =
  let rec go acc =
    (lift2 (fun f x -> f acc x) op e >>= go) <|> return acc in
  e >>= fun init -> go init


================================================
FILE: lib/search_exp.ml
================================================
type match_typ_marker = [ `Exact | `Prefix | `Suffix ]
[@@deriving show]

type exp = [
  | `Word of string
  | `Match_typ_marker of match_typ_marker
  | `Explicit_spaces
  | `List of exp list
  | `Paren of exp
  | `Binary_op of binary_op * exp * exp
  | `Optional of exp
]
[@@deriving show]

and binary_op =
  | Or
[@@deriving show]

type t = {
  exp : exp;
  flattened : Search_phrase.t list;
}
[@@deriving show]

let flattened (t : t) = t.flattened

let empty : t = {
  exp = `List [];
  flattened = [];
}

let is_empty (t : t) =
  t.flattened = []
  ||
  List.for_all Search_phrase.is_empty t.flattened

let equal (t1 : t) (t2 : t) =
  let rec aux (e1 : exp) (e2 : exp) =
    match e1, e2 with
    | `Word x1, `Word x2 -> String.equal x1 x2
    | `List l1, `List l2 -> List.equal aux l1 l2
    | `Paren e1, `Paren e2 -> aux e1 e2
    | `Binary_op (Or, e1x, e1y), `Binary_op (Or, e2x, e2y) ->
      aux e1x e2x && aux e1y e2y
    | `Optional e1, `Optional e2 -> aux e1 e2
    | _, _ -> false
  in
  aux t1.exp t2.exp

let as_paren x : exp = `Paren x

let as_list l : exp = `List l

let as_word s : exp = `Word s

let as_word_list (l : string list) : exp = `List (List.map as_word  l)

module Parsers = struct
  open Angstrom
  open Parser_components

  let phrase : string list Angstrom.t =
    many1 (
      take_while1 (fun c ->
          match c with
          | '?'
          | '|'
          | '\\'
          | '('
          | ')'
          | '\''
          | '^'
          | '$'
          | '~' -> false
          | _ -> true
        )
      <|>
      (char '\\' *> any_char >>| fun c -> Printf.sprintf "%c" c)
    )
    >>| fun l ->
    String.concat "" l
    |> Tokenization.tokenize ~drop_spaces:false
    |> List.of_seq

  let or_op =
    char '|' *> skip_spaces *> return (fun x y -> `Binary_op (Or, x, y))

  let p : exp Angstrom.t =
    fix (fun (exp : exp Angstrom.t) : exp Angstrom.t ->
        let base =
          choice [
            (phrase >>| as_word_list);
            (char '\'' *> return (`Match_typ_marker `Exact));
            (char '^' *> return (`Match_typ_marker `Prefix));
            (char '$' *> return (`Match_typ_marker `Suffix));
            (char '~' *> return (`Explicit_spaces));
            (string "()" *> return (as_word_list []));
            (char '(' *> exp <* char ')' >>| as_paren);
          ]
        in
        let opt_base =
          choice [
            (char '?' *> skip_spaces *> phrase
             >>| fun l ->
             match l with
             | [] -> failwith "unexpected case"
             | x :: xs -> (
                 as_list [ `Optional (as_word x); as_word_list xs ]
               )
            );
            (char '?' *> skip_spaces *> base >>| fun p -> `Optional p);
            base;
          ]
        in
        let opt_bases =
          many1 opt_base
          >>| fun l -> `List l
        in
        chainl1 opt_bases or_op
      )
    <* skip_spaces
end

let flatten_nested_lists (exp : exp) : exp =
  let rec aux (exp : exp) =
    match exp with
    | `Word _
    | `Match_typ_marker _
    | `Explicit_spaces -> exp
    | `List l -> (
        `List
          (CCList.flat_map (fun e ->
               match aux e with
               | `List l -> l
               | x -> [ x ]
             ) l)
      )
    | `Paren e -> `Paren (aux e)
    | `Binary_op (op, x, y) -> `Binary_op (op, aux x, aux y)
    | `Optional e -> `Optional (aux e)
  in
  aux exp

let flatten (exp : exp) : Search_phrase.t list =
  let get_group_id =
    let counter = ref 0 in
    fun () ->
      let x = !counter in
      counter := x + 1;
      x
  in
  let rec aux group_id (exp : exp) : Search_phrase.annotated_token list Seq.t =
    match exp with
    | `Match_typ_marker x -> (
        Seq.return [
          Search_phrase.{ data = `Match_typ_marker x; group_id }
        ]
      )
    | `Word s ->
      Seq.return [
        Search_phrase.{ data = `String s; group_id }
      ]
    | `Explicit_spaces ->
      Seq.return [
        Search_phrase.{ data = `Explicit_spaces; group_id }
      ]
    | `List l -> (
        l
        |> List.to_seq
        |> Seq.map (aux group_id)
        |> OSeq.cartesian_product
        |> Seq.map List.concat
      )
    | `Paren e -> (
        aux (get_group_id ()) e
      )
    | `Binary_op (Or, x, y) -> (
        Seq.append
          (aux group_id x)
          (aux group_id y)
      )
    | `Optional x -> (
        Seq.cons [] (aux (get_group_id ()) x)
      )
  in
  aux (get_group_id ()) exp
  |> Seq.map (fun l ->
      List.to_seq l
      |> Search_phrase.of_annotated_tokens)
  |> List.of_seq
  |> List.sort_uniq Search_phrase.compare

let parse s =
  if String.length s = 0 || String.for_all Parser_components.is_space s then (
    Some empty
  ) else (
    match Angstrom.(parse_string ~consume:Consume.All) Parsers.p s with
    | Ok exp -> (
        let exp = flatten_nested_lists exp in
        Some
          { exp;
            flattened = flatten exp;
          }
      )
    | Error _ -> None
  )


================================================
FILE: lib/search_exp.mli
================================================
type t

val pp : Format.formatter -> t -> unit

val empty : t

val is_empty : t -> bool

val flattened : t -> Search_phrase.t list

val parse : string -> t option

val equal : t -> t -> bool


================================================
FILE: lib/search_phrase.ml
================================================
type match_typ = [
  | `Fuzzy
  | `Exact
  | `Suffix
  | `Prefix
]
[@@deriving show, ord]

type match_typ_marker = [ `Exact | `Prefix | `Suffix ]
[@@deriving show, ord]

let char_of_match_typ_marker (x : match_typ_marker) =
  match x with
  | `Exact -> '\''
  | `Prefix -> '^'
  | `Suffix -> '$'

let string_of_match_typ_marker (x : match_typ_marker) =
  match x with
  | `Exact -> "\'"
  | `Prefix -> "^"
  | `Suffix -> "$"

type annotated_token = {
  data : [
    | `String of string
    | `Match_typ_marker of match_typ_marker
    | `Explicit_spaces
  ];
  group_id : int;
}
[@@deriving show]

type ir0 = {
  data : [
    | `String of string
    | `Match_typ_marker of match_typ_marker
    | `Explicit_spaces
  ];
  is_linked_to_prev : bool;
  is_linked_to_next : bool;
  match_typ : match_typ option;
}

module Enriched_token = struct
  type data = [ `String of string | `Explicit_spaces ]
  [@@deriving ord]

  module Data_map = Map.Make (struct
      type t = data

      let compare = compare_data
    end)

  let pp_data formatter data =
    Fmt.pf formatter "%s"
      (match data with
       | `String s -> s
       | `Explicit_spaces -> " ")

  type t = {
    data : data;
    is_linked_to_prev : bool;
    is_linked_to_next : bool;
    automaton : Spelll.automaton;
    match_typ : match_typ;
  }

  let make data ~is_linked_to_prev ~is_linked_to_next automaton match_typ =
    { data; is_linked_to_prev; is_linked_to_next; automaton; match_typ }

  let pp fmt (x : t) =
    Fmt.pf fmt "%a:%b:%b:%a"
      pp_data
      x.data
      x.is_linked_to_prev
      x.is_linked_to_next
      pp_match_typ
      x.match_typ

  let data (t : t) =
    t.data

  let match_typ (t : t) =
    t.match_typ

  let automaton (t : t) =
    t.automaton

  let is_linked_to_prev (t : t) =
    t.is_linked_to_prev

  let is_linked_to_next (t : t) =
    t.is_linked_to_next

  let compare (x : t) (y : t) =
    match compare_data x.data y.data with
    | 0 -> (
        match Bool.compare x.is_linked_to_prev y.is_linked_to_prev with
        | 0 -> (
            match Bool.compare x.is_linked_to_next y.is_linked_to_next with
            | 0 -> (
                compare_match_typ x.match_typ y.match_typ
              )
            | n -> n
          )
        | n -> n
      )
    | n -> n

  let equal (x : t) (y : t) =
    compare x y = 0

  let compatible_with_word (token : t) indexed_word =
    String.length indexed_word > 0
    &&
    (match data token with
     | `Explicit_spaces -> (
         Parser_components.is_space indexed_word.[0]
       )
     | `String search_word -> (
         let search_word_ci = String.lowercase_ascii search_word in
         let indexed_word_ci = String.lowercase_ascii indexed_word in
         let use_ci_match = String.equal search_word search_word_ci in
         let search_word_len = String.length search_word in
         let indexed_word_len = String.length indexed_word in
         let edit_dist_based_match_min_len = !Params.max_fuzzy_edit_dist + 1 in
         if Parser_components.is_possibly_utf_8 indexed_word.[0] then (
           String.equal search_word indexed_word
         ) else (
           match match_typ token with
           | `Fuzzy -> (
               String.equal search_word_ci indexed_word_ci
               || CCString.find ~sub:search_word_ci indexed_word_ci >= 0
               || (indexed_word_len >= 2
                   && CCString.find ~sub:indexed_word_ci search_word_ci >= 0)
               || (search_word_len >= edit_dist_based_match_min_len
                   && indexed_word_len >= edit_dist_based_match_min_len
                   && Misc_utils.first_n_chars_of_string_contains ~n:3 indexed_word_ci search_word_ci.[0]
                   && Spelll.match_with (automaton token) indexed_word_ci)
             )
           | `Exact -> (
               if use_ci_match then (
                 String.equal search_word_ci indexed_word_ci
               ) else (
                 String.equal search_word indexed_word
               )
             )
           | `Prefix -> (
               if use_ci_match then (
                 CCString.prefix ~pre:search_word_ci indexed_word_ci
               ) else (
                 CCString.prefix ~pre:search_word indexed_word
               )
             )
           | `Suffix -> (
               if use_ci_match then (
                 CCString.suffix ~suf:search_word_ci indexed_word_ci
               ) else (
                 CCString.suffix ~suf:search_word indexed_word
               )
             )
         )
       )
    )
end

type t = {
  annotated_tokens : annotated_token list;
  enriched_tokens : Enriched_token.t list;
}

let is_empty (t : t) =
  List.is_empty t.enriched_tokens

let pp fmt (t : t) =
  Fmt.pf fmt "%a"
    Fmt.(list ~sep:sp Enriched_token.pp)
    t.enriched_tokens

type cache = {
  mutex : Eio.Mutex.t;
  cache : (string, Spelll.automaton) CCCache.t;
}

let cache = {
  mutex = Eio.Mutex.create ();
  cache = CCCache.lru ~eq:String.equal Params.search_word_automaton_cache_size;
}

let compare (t1 : t) (t2 : t) =
  List.compare Enriched_token.compare t1.enriched_tokens t2.enriched_tokens

let equal (t1 : t) (t2 : t) =
  compare t1 t2 = 0

let empty : t =
  {
    annotated_tokens = [];
    enriched_tokens = [];
  }

let ir0_s_of_annotated_tokens (tokens : annotated_token Seq.t) : ir0 list =
  let token_is_space (token : annotated_token) =
    match token.data with
    | `String s -> Parser_components.is_space (String.get s 0)
    | _ -> false
  in
  let rec aux acc (prev_token : annotated_token option) (tokens : annotated_token Seq.t) =
    match tokens () with
    | Seq.Nil -> List.rev acc
    | Seq.Cons (token, rest) -> (
        let is_linked_to_prev =
          match prev_token with
          | None -> false
          | Some prev_token -> (
              (prev_token.group_id = token.group_id)
              &&
              (not (token_is_space prev_token))
            )
        in
        if token_is_space token then (
          aux acc None rest
        ) else (
          let ir0 : ir0 = 
            { data = token.data;
              is_linked_to_prev;
              is_linked_to_next = false;
              match_typ = None;
            }
          in
          aux (ir0 :: acc) (Some token) rest
        )
      )
  in
  aux [] None tokens

let ir0_s_link_forward (ir0_s : ir0 list) : ir0 list =
  List.rev ir0_s
  |> List.fold_left (fun (acc, next) x ->
      match next with
      | None -> (x :: acc, Some x)
      | Some next -> (
          let x = { x with is_linked_to_next = next.is_linked_to_prev } in
          (x :: acc, Some x)
        )
    )
    ([], None)
  |> fst

let ir0_process_exact_prefix_match_typ_markers (ir0_s : ir0 list) : ir0 list =
  let rec aux
      (acc : ir0 list)
      token_removed
      (marker : ([ `Exact ] * [ `Exact | `Prefix ]) option)
      (ir0_s : ir0 list)
    =
    match ir0_s with
    | [] -> List.rev acc
    | x :: xs -> (
        match marker with
        | None -> (
            let default () =
              aux (x :: acc) false None xs
            in
            match x.data with
            | `String _ | `Explicit_spaces ->
              default ()
            | `Match_typ_marker m -> (
                match x.match_typ with
                | None -> (
                    if x.is_linked_to_next then (
                      match m with
                      | `Exact ->
                        aux acc true (Some (`Exact, `Exact)) xs
                      | `Prefix ->
                        aux acc true (Some (`Exact, `Prefix)) xs
                      | _ ->
                        default ()
                    ) else (
                      default ()
                    )
                  )
                | Some _ ->
                  default ()
              )
          )
        | Some (m, m_last) -> (
            let x =
              if x.is_linked_to_prev then (
                { x with
                  is_linked_to_prev = not token_removed;
                  match_typ = Some (
                      if x.is_linked_to_next then
                        (m :> match_typ)
                      else
                        (m_last :> match_typ)
                    );
                }
              ) else (
                x
              )
            in
            let marker =
              if x.is_linked_to_next then
                marker
              else
                None
            in
            aux (x :: acc) false marker xs
          )
      )
  in
  aux [] false None ir0_s

let ir0_process_suffix_match_typ_markers (ir0_s : ir0 list) : ir0 list =
  let rec aux
      (acc : ir0 list)
      token_removed
      (marker : ([ `Suffix ] * [ `Exact ]) option)
      (ir0_s : ir0 list)
    =
    match ir0_s with
    | [] -> acc
    | x :: xs -> (
        match marker with
        | None -> (
            let default () =
              aux (x :: acc) false None xs
            in
            match x.data with
            | `String _ | `Explicit_spaces ->
              default ()
            | `Match_typ_marker m -> (
                match x.match_typ with
                | None -> (
                    if x.is_linked_to_prev then (
                      match m with
                      | `Suffix ->
                        aux acc true (Some (`Suffix, `Exact)) xs
                      | _ ->
                        default ()
                    ) else (
                      default ()
                    )
                  )
                | Some _ ->
                  default ()
              )
          )
        | Some (m_first, m) -> (
            let x =
              if x.is_linked_to_next then (
                { x with
                  is_linked_to_next = not token_removed;
                  match_typ = Some (
                      if x.is_linked_to_prev then
                        (m :> match_typ)
                      else
                        (m_first :> match_typ)
                    );
                }
              ) else (
                x
              )
            in
            let marker =
              if x.is_linked_to_prev then
                marker
              else
                None
            in
            aux (x :: acc) false marker xs
          )
      )
  in
  aux [] false None (List.rev ir0_s)

let enriched_tokens_of_ir0 (ir0_s : ir0 list) : Enriched_token.t list =
  List.map (fun (ir0 : ir0) ->
      let data =
        match ir0.data with
        | `String s -> `String s
        | `Match_typ_marker m ->
          `String (string_of_match_typ_marker m)
        | `Explicit_spaces -> `Explicit_spaces
      in
      let is_linked_to_prev = ir0.is_linked_to_prev in
      let is_linked_to_next = ir0.is_linked_to_next in
      let automaton =
        match data with
        | `String string -> (
            Eio.Mutex.use_rw cache.mutex ~protect:false (fun () ->
                let automaton =
                  CCCache.with_cache cache.cache
                    (Spelll.of_string ~limit:!Params.max_fuzzy_edit_dist)
                    string
                in
                automaton
              )
          )
        | `Explicit_spaces ->
          Spelll.of_string ~limit:0 ""
      in
      Enriched_token.make
        data
        ~is_linked_to_prev
        ~is_linked_to_next
        automaton
        (Option.value ~default:`Fuzzy ir0.match_typ)
    ) ir0_s

let of_annotated_tokens
    (annotated_tokens : annotated_token Seq.t)
  =
  let enriched_tokens =
    annotated_tokens
    |> ir0_s_of_annotated_tokens
    |> ir0_s_link_forward
    |> ir0_process_exact_prefix_match_typ_markers
    |> ir0_process_suffix_match_typ_markers
    |> enriched_tokens_of_ir0
  in
  {
    annotated_tokens = List.of_seq annotated_tokens;
    enriched_tokens;
  }

let of_tokens
    (tokens : string Seq.t)
  =
  tokens
  |> Seq.map (fun s -> { data = `String s; group_id = 0 })
  |> of_annotated_tokens

let parse phrase =
  phrase
  |> Tokenization.tokenize ~drop_spaces:false
  |> of_tokens

let annotated_tokens t =
  t.annotated_tokens

let enriched_tokens t =
  t.enriched_tokens


================================================
FILE: lib/search_phrase.mli
================================================
type match_typ = [
  | `Fuzzy
  | `Exact
  | `Suffix
  | `Prefix
]
[@@deriving show, ord]

type match_typ_marker = [ `Exact | `Prefix | `Suffix ]
[@@deriving show]

type annotated_token = {
  data : [
    | `String of string
    | `Match_typ_marker of match_typ_marker
    | `Explicit_spaces
  ];
  group_id : int;
}
[@@deriving show]

module Enriched_token : sig
  type data = [ `String of string | `Explicit_spaces ]
  [@@deriving ord]

  module Data_map : Map.S with type key = data

  type t

  val make :
    data ->
    is_linked_to_prev:bool ->
    is_linked_to_next:bool ->
    Spelll.automaton ->
    match_typ ->
    t

  val data : t -> data

  val equal : t -> t -> bool

  val pp : Format.formatter -> t -> unit

  val match_typ : t -> match_typ

  val is_linked_to_prev : t -> bool

  val is_linked_to_next : t -> bool

  val automaton : t -> Spelll.automaton

  val compatible_with_word : t -> string -> bool
end

type t

val empty : t

val compare : t -> t -> int

val equal : t -> t -> bool

val pp : Format.formatter -> t -> unit

val of_annotated_tokens : annotated_token Seq.t -> t

val of_tokens : string Seq.t -> t

val parse : string -> t

val is_empty : t -> bool

val annotated_tokens : t -> annotated_token list

val enriched_tokens : t -> Enriched_token.t list


================================================
FILE: lib/search_result.ml
================================================
type indexed_found_word = {
  found_word_pos : int;
  found_word_ci : string;
  found_word : string;
}

type t = {
  score : float;
  search_phrase : Search_phrase.t;
  found_phrase : indexed_found_word list;
}

let equal t1 t2 =
  Search_phrase.equal t1.search_phrase t2.search_phrase
  && List.length t1.found_phrase = List.length t2.found_phrase
  && List.for_all2 (fun x1 x2 -> x1.found_word_pos = x2.found_word_pos)
    t1.found_phrase t2.found_phrase

module Score = struct
  module ET = Search_phrase.Enriched_token

  type stats = {
    total_search_char_count : float;
    total_found_char_count : float;
    exact_match_found_char_count : float;
    ci_exact_match_found_char_count : float;
    sub_match_search_word_in_found_word_char_count : float;
    sub_match_found_word_in_search_word_char_count : float;
    sub_match_search_char_count : float;
    sub_match_found_char_count : float;
    ci_sub_match_search_word_in_found_word_char_count : float;
    ci_sub_match_found_word_in_search_word_char_count : float;
    ci_sub_match_search_char_count : float;
    ci_sub_match_found_char_count : float;
    fuzzy_match_edit_distance : float;
    fuzzy_match_search_char_count : float;
    fuzzy_match_found_char_count : float;
  }

  let empty_stats = {
    total_search_char_count = 0.0;
    total_found_char_count = 0.0;
    exact_match_found_char_count = 0.0;
    ci_exact_match_found_char_count = 0.0;
    sub_match_search_word_in_found_word_char_count = 0.0;
    sub_match_found_word_in_search_word_char_count = 0.0;
    sub_match_search_char_count = 0.0;
    sub_match_found_char_count = 0.0;
    ci_sub_match_search_word_in_found_word_char_count = 0.0;
    ci_sub_match_found_word_in_search_word_char_count = 0.0;
    ci_sub_match_search_char_count = 0.0;
    ci_sub_match_found_char_count = 0.0;
    fuzzy_match_edit_distance = 0.0;
    fuzzy_match_search_char_count = 0.0;
    fuzzy_match_found_char_count = 0.0;
  }

  let score
      (search_phrase : Search_phrase.t)
      ~(found_phrase : indexed_found_word list)
      ~(found_phrase_opening_closing_symbol_match_count : int)
    : float =
    assert (not (Search_phrase.is_empty search_phrase));
    assert (List.length (Search_phrase.enriched_tokens search_phrase) = List.length found_phrase);
    let found_phrase_opening_closing_symbol_match_count =
      Int.to_float found_phrase_opening_closing_symbol_match_count
    in
    let quite_close_to_zero x =
      Float.abs x < Params.float_compare_margin
    in
    let add_sub_match_search_and_found_char_count ~search_word_len ~found_word_len stats =
      { stats with
        sub_match_search_char_count =
          stats.sub_match_search_char_count +. search_word_len;
        sub_match_found_char_count =
          stats.sub_match_found_char_count +. found_word_len;
      }
    in
    let add_ci_sub_match_search_and_found_char_count ~search_word_len ~found_word_len stats =
      { stats with
        ci_sub_match_search_char_count =
          stats.ci_sub_match_search_char_count +. search_word_len;
        ci_sub_match_found_char_count =
          stats.ci_sub_match_found_char_count +. found_word_len;
      }
    in
    let stats =
      List.fold_left2 (fun (stats : stats) (token : ET.t) { found_word_ci; found_word; _ } ->
          let found_word_len = Int.to_float (String.length found_word) in
          match ET.data token with
          | `Explicit_spaces -> (
              { stats with
                total_search_char_count =
                  stats.total_search_char_count +. 1.0;
                total_found_char_count =
                  stats.total_found_char_count +. 1.0;
                exact_match_found_char_count =
                  stats.exact_match_found_char_count +. 1.0;
              }
            )
          | `String search_word -> (
              let search_word_len = Int.to_float (String.length search_word) in
              let search_word_ci = String.lowercase_ascii search_word in
              let stats =
                { stats with
                  total_search_char_count =
                    stats.total_search_char_count +. search_word_len;
                  total_found_char_count =
                    stats.total_found_char_count +. found_word_len;
                }
              in
              if String.equal search_word found_word then (
                { stats with
                  exact_match_found_char_count =
                    stats.exact_match_found_char_count +. found_word_len;
                }
              ) else if String.equal search_word_ci found_word_ci then (
                { stats with
                  ci_exact_match_found_char_count =
                    stats.ci_exact_match_found_char_count +. found_word_len;
                }
              ) else if CCString.find ~sub:search_word found_word >= 0 then (
                { stats with
                  sub_match_search_word_in_found_word_char_count =
                    stats.sub_match_search_word_in_found_word_char_count +. search_word_len;
                }
                |> add_sub_match_search_and_found_char_count
                  ~search_word_len
                  ~found_word_len
              ) else if CCString.find ~sub:found_word search_word >= 0 then (
                { stats with
                  sub_match_found_word_in_search_word_char_count =
                    stats.sub_match_found_word_in_search_word_char_count +. found_word_len;
                }
                |> add_sub_match_search_and_found_char_count
                  ~search_word_len
                  ~found_word_len
              ) else if CCString.find ~sub:search_word_ci found_word_ci >= 0 then (
                { stats with
                  ci_sub_match_search_word_in_found_word_char_count =
                    stats.ci_sub_match_search_word_in_found_word_char_count +. search_word_len;
                }
                |> add_ci_sub_match_search_and_found_char_count
                  ~search_word_len
                  ~found_word_len
              ) else if CCString.find ~sub:found_word_ci search_word_ci >= 0 then (
                { stats with
                  ci_sub_match_found_word_in_search_word_char_count =
                    stats.ci_sub_match_found_word_in_search_word_char_count +. found_word_len;
                }
                |> add_ci_sub_match_search_and_found_char_count
                  ~search_word_len
                  ~found_word_len
              ) else (
                { stats with
                  fuzzy_match_edit_distance =
                    stats.fuzzy_match_edit_distance
                    +. Int.to_float (Spelll.edit_distance search_word_ci found_word_ci);
                  fuzzy_match_search_char_count =
                    stats.fuzzy_match_search_char_count +. search_word_len;
                  fuzzy_match_found_char_count =
                    stats.fuzzy_match_found_char_count +. found_word_len;
                }
              )
            )
        )
        empty_stats
        (Search_phrase.enriched_tokens search_phrase)
        found_phrase
    in
    let search_phrase_length =
      Search_phrase.enriched_tokens search_phrase
      |> List.length
      |> Int.to_float
    in
    let unique_match_count =
      found_phrase
      |> List.map (fun x -> x.found_word_pos)
      |> List.sort_uniq Int.compare
      |> List.length
      |> Int.to_float
    in
    let (total_distance, out_of_order_match_count, _) =
      List.fold_left
        (fun (total_dist, out_of_order_match_count, last_pos) { found_word_pos = pos; _ } ->
           match last_pos with
           | None -> (total_dist, out_of_order_match_count, Some pos)
           | Some last_pos ->
             let total_dist = total_dist +. Int.to_float (abs (pos - last_pos)) in
             let out_of_order_match_count =
               if last_pos <= pos then
                 out_of_order_match_count
               else
                 out_of_order_match_count +. 1.0
             in
             (total_dist, out_of_order_match_count, Some pos)
        )
        (0.0, 0.0, None)
        found_phrase
    in
    let average_distance =
      let gaps = unique_match_count -. 1.0 in
      assert (gaps >= 0.0);
      if quite_close_to_zero gaps then (
        0.0
      ) else (
        total_distance /. gaps
      )
    in
    let exact_match_score =
      if quite_close_to_zero stats.exact_match_found_char_count then (
        0.0
      ) else (
        1.0
      )
    in
    let ci_exact_match_score =
      if quite_close_to_zero stats.ci_exact_match_found_char_count then (
        0.0
      ) else (
        1.0
      )
    in
    let sub_match_score_s_in_f =
      if quite_close_to_zero stats.sub_match_found_char_count then (
        0.0
      ) else (
        stats.sub_match_search_word_in_found_word_char_count
        /.
        stats.sub_match_found_char_count
      )
    in
    let sub_match_score_f_in_s =
      if quite_close_to_zero stats.sub_match_search_char_count then (
        0.0
      ) else (
        stats.sub_match_found_word_in_search_word_char_count
        /.
        stats.sub_match_search_char_count
      )
    in
    let ci_sub_match_score_s_in_f =
      if quite_close_to_zero stats.ci_sub_match_found_char_count then (
        0.0
      ) else (
        stats.ci_sub_match_search_word_in_found_word_char_count
        /.
        stats.ci_sub_match_found_char_count
      )
    in
    let ci_sub_match_score_f_in_s =
      if quite_close_to_zero stats.ci_sub_match_search_char_count then (
        0.0
      ) else (
        stats.ci_sub_match_found_word_in_search_word_char_count
        /.
        stats.ci_sub_match_search_char_count
      )
    in
    let fuzzy_match_score =
      if quite_close_to_zero stats.fuzzy_match_search_char_count then (
        0.0
      ) else (
        1.0
        -.
        (stats.fuzzy_match_edit_distance
         /.
         stats.fuzzy_match_search_char_count)
      )
    in
    let total_char_count =
      stats.total_search_char_count +. stats.total_found_char_count
    in
    let case_sensitive_bonus_multiplier =
      if
        List.exists
          (fun token ->
             match ET.data token with
             | `Explicit_spaces -> false
             | `String search_word -> (
                 not
                   (String.equal
                      (String.lowercase_ascii search_word)
                      search_word))
          )
          (Search_phrase.enriched_tokens search_phrase)
      then (
        1.10
      ) else (
        1.00
      )
    in
    let exact_match_weight =
      case_sensitive_bonus_multiplier
      *.
      (stats.exact_match_found_char_count *. 2.0) /. total_char_count
    in
    let ci_exact_match_weight =
      (stats.ci_exact_match_found_char_count *. 2.0) /. total_char_count
    in
    let f_in_s_penalty_multiplier = 0.8 in
    let sub_match_weight_s_in_f =
      case_sensitive_bonus_multiplier
      *.
      stats.sub_match_found_char_count /. stats.total_found_char_count
    in
    let sub_match_weight_f_in_s =
      case_sensitive_bonus_multiplier
      *.
      f_in_s_penalty_multiplier
      *.
      (stats.sub_match_search_char_count /. stats.total_search_char_count)
    in
    let ci_sub_match_weight_s_in_f =
      (stats.ci_sub_match_found_char_count /. stats.total_found_char_count)
    in
    let ci_sub_match_weight_f_in_s =
      f_in_s_penalty_multiplier
      *.
      (stats.ci_sub_match_search_char_count /. stats.total_search_char_count)
    in
    let fuzzy_match_weight =
      (stats.fuzzy_match_found_char_count *. 2.0) /. total_char_count
    in
    let distance_score =
      if average_distance <= 1.0 then (
        1.0
      ) else (
        1.0 -. (0.20 *. (average_distance /. (Int.to_float !Params.max_token_search_dist)))
      )
    in
    (1.0 +. (0.10 *. search_phrase_length))
    *.
    (1.0 +. (0.10 *. (unique_match_count /. search_phrase_length)))
    *.
    (1.0 -. (0.10 *. (out_of_order_match_count /. search_phrase_length)))
    *.
    (1.00 +. (0.10 *. found_phrase_opening_closing_symbol_match_count))
    *.
    distance_score
    *.
    (
      (exact_match_weight *. exact_match_score)
      +.
      (ci_exact_match_weight *. ci_exact_match_score)
      +.
      (sub_match_weight_s_in_f *. sub_match_score_s_in_f)
      +.
      (sub_match_weight_f_in_s *. sub_match_score_f_in_s)
      +.
      (ci_sub_match_weight_s_in_f *. ci_sub_match_score_s_in_f)
      +.
      (ci_sub_match_weight_f_in_s *. ci_sub_match_score_f_in_s)
      +.
      (fuzzy_match_weight *. fuzzy_match_score)
    )
end

let make search_phrase ~found_phrase ~found_phrase_opening_closing_symbol_match_count =
  let len = List.length (Search_phrase.enriched_tokens search_phrase) in
  if len = 0 then (
    invalid_arg "Search_result.make search_phrase is empty"
  ) else if len <> List.length found_phrase then (
    invalid_arg "length of found_phrase does not match length of search_phrase"
  ) else (
    let score = Score.score search_phrase ~found_phrase ~found_phrase_opening_closing_symbol_match_count in
    { score; search_phrase; found_phrase }
  )

let search_phrase (t : t) =
  t.search_phrase

let found_phrase (t : t) =
  t.found_phrase

let score (t : t) =
  t.score

let compare_relevance (t1 : t) (t2 : t) =
  (* Order the more relevant result to the front. *)
  if Float.abs (t1.score -. t2.score) < Params.float_compare_margin then (
    (* If scores are within the comparison margin,
       then order result that appears earlier to the front. *)
    let t1_found_phrase_start_pos = (List.hd t1.found_phrase).found_word_pos in
    let t2_found_phrase_start_pos = (List.hd t2.found_phrase).found_word_pos in
    Int.compare t1_found_phrase_start_pos t2_found_phrase_start_pos
  ) else (
    (* Otherwise just order result with higher score to the front. *)
    Float.compare t2.score t1.score
  )


================================================
FILE: lib/search_result.mli
================================================
type indexed_found_word = {
  found_word_pos : int;
  found_word_ci : string;
  found_word : string;
}

type t

val make :
  Search_phrase.t ->
  found_phrase:indexed_found_word list ->
  found_phrase_opening_closing_symbol_match_count:int ->
  t

val search_phrase : t -> Search_phrase.t

val found_phrase : t -> indexed_found_word list

val score : t -> float

val equal : t -> t -> bool

val compare_relevance : t -> t -> int


================================================
FILE: lib/search_result_heap.ml
================================================
include CCHeap.Make (struct
    type t = Search_result.t

    let leq x y =
      (Search_result.score x) <= (Search_result.score y)
  end)


================================================
FILE: lib/sqlite3_utils.ml
================================================
include Sqlite3

let db_pool =
  Eio.Pool.create
    (* This is not ideal since validate is not called until next use of the
       pool, so an idle DB connection could be held for a lot longer than
       described here. But this seems to be the best we can do.
    *)
    ~validate:(fun (last_used, _db) ->
        Unix.time () -. !last_used <= 30.0
      )
    ~dispose:(fun (_last_used, db) ->
        (* Best-effort closing. *)
        try
          let try_count = ref 0 in
          while !try_count < 10 && not (db_close db) do
            Unix.sleepf 0.01;
            incr try_count;
          done
        with
        | _ -> ()
      )
    Task_pool.size
    (fun () ->
       (ref (Unix.time ()),
        db_open
          ~mutex:`FULL
          (CCOption.get_exn_or "Docfd_lib.Params.db_path uninitialized" !Params.db_path)
       )
    )

let with_db : type a. ?db:db -> (db -> a) -> a =
  fun ?db f ->
  match db with
  | None -> (
      Eio.Pool.use db_pool ~never_block:true (fun (last_used, db) ->
          last_used := Unix.time ();
          f db
        )
    )
  | Some db -> (
      f db
    )

let retry_if_busy (f : unit -> Sqlite3.Rc.t) =
  let rec aux tries_left =
    let r = f () in
    if tries_left > 0 then (
      match r with
      | BUSY -> (
          Unix.sleepf 0.1;
          aux (tries_left - 1)
        )
      | _ -> r
    ) else (
      r
    )
  in
  aux 50

let exec db s =
  retry_if_busy (fun () -> Sqlite3.exec db s)
  |> Sqlite3.Rc.check

let prepare db s =
  Sqlite3.prepare db s

let bind_names stmt l =
  retry_if_busy (fun () -> Sqlite3.bind_names stmt l)
  |> Sqlite3.Rc.check

let reset stmt =
  retry_if_busy (fun () -> Sqlite3.reset stmt)
  |> Sqlite3.Rc.check

let step stmt =
  match retry_if_busy (fun () -> Sqlite3.step stmt) with
  | OK | DONE | ROW -> ()
  | x -> Sqlite3.Rc.check x

let finalize stmt =
  retry_if_busy (fun () -> Sqlite3.finalize stmt)
  |> Sqlite3.Rc.check

let with_stmt : type a. ?db:db -> string -> ?names:((string * Sqlite3.Data.t) list) -> (Sqlite3.stmt -> a) -> a =
  fun ?db s ?names f ->
  with_db ?db (fun db ->
      let stmt = prepare db s in
      Option.iter
        (fun names -> bind_names stmt names)
        names;
      let res = f stmt in
      finalize stmt;
      res
    )

let step_stmt : type a. ?db:db -> string -> ?names:((string * Data.t) list) -> (stmt -> a) -> a =
  fun ?db s ?names f ->
  with_stmt ?db s ?names
    (fun stmt ->
       step stmt;
       f stmt
    )

let iter_stmt ?db s ?names (f : Data.t array -> unit) =
  with_stmt ?db s ?names
    (fun stmt ->
       Rc.check (Sqlite3.iter stmt ~f)
    )

let fold_stmt : type a. ?db:db -> string -> ?names:((string * Data.t) list) -> (a -> Sqlite3.Data.t array -> a) -> a -> a =
  fun ?db s ?names f init ->
  with_stmt ?db s ?names
    (fun stmt ->
       let rc, res = Sqlite3.fold stmt ~f ~init in
       Sqlite3.Rc.check rc;
       res
    )


================================================
FILE: lib/stop_signal.ml
================================================
type t = {
  mutable stop : bool;
  cond : Eio.Condition.t;
  mutex : Eio.Mutex.t;
}

let make () =
  {
    stop = false;
    cond = Eio.Condition.create ();
    mutex = Eio.Mutex.create ();
  }

let await (t : t) =
  Eio.Mutex.use_ro t.mutex (fun () ->
      while not t.stop do
        Eio.Condition.await t.cond t.mutex
      done
    )

let broadcast (t : t) =
  Eio.Mutex.use_rw ~protect:false t.mutex
    (fun () -> t.stop <- true);
  Eio.Condition.broadcast t.cond


================================================
FILE: lib/stop_signal.mli
================================================
type t

val make : unit -> t

val await : t -> unit

val broadcast : t -> unit


================================================
FILE: lib/string_map.ml
================================================
include CCMap.Make (String)


================================================
FILE: lib/string_set.ml
================================================
include CCSet.Make (String)


================================================
FILE: lib/task_pool.ml
================================================
type t = Eio.Executor_pool.t

let size = max 1 (Domain.recommended_domain_count () - 1)

let make ~sw mgr =
  Eio.Executor_pool.create ~sw ~domain_count:size mgr

let run (t : t) (f : unit -> 'a) : 'a =
  Eio.Executor_pool.submit_exn t ~weight:1.0 f

let map_list : 'a 'b . t -> ('a -> 'b) -> 'a list -> 'b list =
  fun t f l ->
  Eio.Fiber.List.map ~max_fibers:size
    (fun x ->
       Eio.Fiber.yield ();
       run t (fun () -> f x))
    l

let filter_list : 'a 'b . t -> ('a -> bool) -> 'a list -> 'a list =
  fun t f l ->
  Eio.Fiber.List.filter ~max_fibers:size
    (fun x ->
       Eio.Fiber.yield ();
       run t (fun () -> f x))
    l

let filter_map_list : 'a 'b . t -> ('a -> 'b option) -> 'a list -> 'b list =
  fun t f l ->
  Eio.Fiber.List.filter_map ~max_fibers:size
    (fun x ->
       Eio.Fiber.yield ();
       run t (fun () -> f x))
    l


================================================
FILE: lib/task_pool.mli
================================================
type t

val size : int

val make : sw:Eio.Switch.t -> _ Eio.Domain_manager.t -> t

val run : t -> (unit -> 'a) -> 'a

val map_list : t -> ('a -> 'b) -> 'a list -> 'b list

val filter_list : t -> ('a -> bool) -> 'a list -> 'a list

val filter_map_list : t -> ('a -> 'b option) -> 'a list -> 'b list


================================================
FILE: lib/tokenization.ml
================================================
let chunk_tokens (s : (int * string) Seq.t) : (int * string) Seq.t =
  let rec aux offset s =
    match s () with
    | Seq.Nil -> Seq.empty
    | Seq.Cons ((pos, word), rest) -> (
        let word_len = String.length word in
        if word_len <= Params.max_token_size then (
          fun () -> Seq.Cons ((pos + offset, word), aux offset rest)
        ) else (
          let up_to_limit =
            String.sub word 0 Params.max_token_size
          in
          let rest_of_token =
            String.sub word Params.max_token_size (word_len - Params.max_token_size)
          in
          fun () ->
            Seq.Cons
              ((pos + offset, up_to_limit),
               (aux (offset + 1) (Seq.cons (pos, rest_of_token) rest)))
        )
      )
  in
  aux 0 s

type token =
  | Space of string
  | Text of string

let split_utf8_seg (s : string) : string list =
  let open Angstrom in
  let open Parser_components in
  let token_p =
    choice [
      take_while1 is_alphanum;
      utf_8_char;
    ]
  in
  let tokens_p = many token_p in
  match Angstrom.(parse_string ~consume:Consume.All) tokens_p s with
  | Ok l -> l
  | Error _ -> []

let tokenize_with_pos ~drop_spaces (s : string) : (int * string) Seq.t =
  let segmenter = Uuseg.create `Word in
  let s = Misc_utils.sanitize_string s in
  let s_len = String.length s in
  let acc : token Dynarray.t = Dynarray.create () in
  let buf : Uchar.t Dynarray.t = Dynarray.create () in
  let sbuf = Buffer.create 256 in
  let flush_to_acc () =
    if Dynarray.length buf > 0 then (
      Dynarray.iter (Buffer.add_utf_8_uchar sbuf) buf;
      if Uucp.White.is_white_space (Dynarray.get buf 0) then (
        Dynarray.add_last acc (Space (Buffer.contents sbuf))
      ) else (
        Buffer.contents sbuf
        |> split_utf8_seg
        |> List.iter (fun s ->
            Dynarray.add_last acc (Text s)
          )
      );
      Dynarray.clear buf;
      Buffer.clear sbuf
    )
  in
  let rec add v =
    match Uuseg.add segmenter v with
    | `Uchar uc -> (
        Dynarray.add_last buf uc;
        add `Await
      )
    | `Boundary -> (
        flush_to_acc ();
        add `Await
      )
    | `Await | `End -> ()
  in
  let rec aux pos =
    if pos >= s_len then (
      add `End;
      flush_to_acc ()
    ) else (
      let decode = String.get_utf_8_uchar s pos in
      if Uchar.utf_decode_is_valid decode then (
        let uchar = Uchar.utf_decode_uchar decode in
        add (`Uchar uchar);
        aux (pos + Uchar.utf_decode_length decode)
      ) else (
        aux (pos + 1)
      )
    )
  in
  aux 0;
  Dynarray.to_seq acc
  |> Seq.mapi (fun i x -> (i, x))
  |> Seq.filter_map (fun ((i, token) : int * token) ->
      match token with
      | Text s -> Some (i, s)
      | Space s ->
        if drop_spaces then
          None
        else
          Some (i, s)
    )
  |> chunk_tokens

let tokenize ~drop_spaces s =
  tokenize_with_pos ~drop_spaces s
  |> Seq.map snd


================================================
FILE: lib/word_db.ml
================================================
type t = {
  lock : Eio.Mutex.t;
  mutable size : int;
  mutable size_written_to_db : int;
  mutable word_of_id : string Int_map.t;
  id_of_word : (string, int) Hashtbl.t;
}

let t : t =
  {
    lock = Eio.Mutex.create ();
    size = 0;
    size_written_to_db = 0;
    word_of_id = Int_map.empty;
    id_of_word = Hashtbl.create 100_000;
  }

let lock : type a. (unit -> a) -> a =
  fun f ->
  Eio.Mutex.use_rw ~protect:true t.lock f

let filter pool (f : string -> bool) : (int * string) Dynarray.t =
  let word_of_id =
    lock (fun () ->
        t.word_of_id
      )
  in
  let max_end_exc_seen = ref 0 in
  let chunk_size = !Params.index_chunk_size * 10 in
  let chunk_start_end_exc_ranges =
    OSeq.(0 -- (t.size - 1) / chunk_size)
    |> Seq.map (fun chunk_index ->
        let start = chunk_index * chunk_size in
        let end_exc =
          min
            ((chunk_index + 1) * chunk_size)
            t.size
        in
        max_end_exc_seen := max !max_end_exc_seen end_exc;
        (start, end_exc)
      )
    |> List.of_seq
  in
  assert (!max_end_exc_seen = t.size);
  let batches =
    chunk_start_end_exc_ranges
    |> Task_pool.map_list pool (fun (start, end_exc) ->
        let acc = Dynarray.create () in
        for i=start to end_exc-1 do
          let word = Int_map.find i word_of_id in
          if f word then (
            Dynarray.add_last acc (i, word)
          )
        done;
        acc
      )
  in
  let acc = Dynarray.create () in
  List.iter (fun batch ->
      Dynarray.append acc batch
    ) batches;
  acc

let add (word : string) : int =
  lock (fun () ->
      match Hashtbl.find_opt t.id_of_word word with
      | Some id -> id
      | None -> (
          let id = t.size in
          t.size <- t.size + 1;
          t.word_of_id <- Int_map.add id word t.word_of_id;
          Hashtbl.replace t.id_of_word word id;
          id
        )
    )

let word_of_id i : string =
  lock (fun () ->
      Int_map.find i t.word_of_id
    )

let id_of_word s : int option =
  lock (fun () ->
      Hashtbl.find_opt t.id_of_word s
    )

let read_from_db () : unit =
  let open Sqlite3_utils in
  lock (fun () ->
      with_db (fun db ->
          t.word_of_id <- Int_map.empty;
          Hashtbl.clear t.id_of_word;
          iter_stmt ~db
            {|
  SELECT id, word
  FROM word
  ORDER by id
  |}
            ~names:[]
            (fun data ->
               let id = Data.to_int_exn data.(0) in
               let word = Data.to_string_exn data.(1) in
               t.word_of_id <- Int_map.add id word t.word_of_id;
               Hashtbl.replace t.id_of_word word id;
            )
        );
      t.size <- Int_map.cardinal t.word_of_id;
      t.size_written_to_db <- t.size;
    )

let write_to_db db ~already_in_transaction : unit =
  let open Sqlite3_utils in
  lock (fun () ->
      if not already_in_transaction then (
        step_stmt ~db "BEGIN IMMEDIATE" ignore;
      );
      let word_table_size =
        step_stmt ~db
          {|
      SELECT COUNT(1) FROM word
      |}
          (fun stmt ->
             Int64.to_int (column_int64 stmt 0)
          )
      in
      if word_table_size <> t.size_written_to_db then (
        Misc_utils.exit_with_error_msg
          "unexpected change in word table, likely due to indexing from another Docfd instance";
      );
      with_stmt ~db
        {|
  INSERT INTO word
  (id, word)
  VALUES
  (@id, @word)
  ON CONFLICT(id) DO NOTHING
  |}
        (fun stmt ->
           for id = t.size_written_to_db to t.size-1 do
             let word = Int_map.find id t.word_of_id in
             bind_names
               stmt
               [ ("@id", INT (Int64.of_int id))
               ; ("@word", TEXT word)
               ];
             step stmt;
             reset stmt;
           done
        );
      if not already_in_transaction then (
        step_stmt ~db "COMMIT" ignore;
      );
      t.size_written_to_db <- t.size;
    )


================================================
FILE: lib/word_db.mli
================================================
type t

val add : string -> int

val filter : Task_pool.t -> (string -> bool) -> (int * string) Dynarray.t

val word_of_id : int -> string

val id_of_word : string -> int option

val read_from_db : unit -> unit

val write_to_db : Sqlite3.db -> already_in_transaction:bool -> unit


================================================
FILE: line-wrapping-tests.t/dune
================================================
(cram
  (deps ../bin/docfd.exe))


================================================
FILE: line-wrapping-tests.t/long-words.txt
================================================
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789

abcdefghijklmnopqrstuvwxyz
01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz


0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789

abcdefghijklmnopqrstuvwxyz
01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz


================================================
FILE: line-wrapping-tests.t/run.t
================================================
Word breaking:
  $ docfd --cache-dir .cache --search-result-print-snippet-min-size 0 long-words.txt --sample "01 ab" --search-result-print-text-width 80
  $TESTCASE_ROOT/long-words.txt
  1: 01234567890123456789012345678901234567890123456789012345678901234567890123456
     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
     78901234567890123456789
     ^^^^^^^^^^^^^^^^^^^^^^^
  2: 
  3: abcdefghijklmnopqrstuvwxyz
     ^^^^^^^^^^^^^^^^^^^^^^^^^^
  
  16: 0123456789012345678901234567890123456789012345678901234567890123456789012345
      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
      678901234567890123456789
      ^^^^^^^^^^^^^^^^^^^^^^^^
  17: 
  18: abcdefghijklmnopqrstuvwxyz
      ^^^^^^^^^^^^^^^^^^^^^^^^^^
  
  1: 01234567890123456789012345678901234567890123456789012345678901234567890123456
     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
     78901234567890123456789
     ^^^^^^^^^^^^^^^^^^^^^^^
  2: 
  3: abcdefghijklmnopqrstuvwxyz
  4: 01234567890123456789012345678901234567890123456789012345678901234567890123456
     78901234567890123456789012345678901234567890123456789012345678901234567890123
     4567890123456789012345678901234567890123456789
  5: abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz
  6: 
  7: 
  8: 
  9: 
  10: 
  11: 
  12: 
  13: 
  14: 
  15: 
  16: 0123456789012345678901234567890123456789012345678901234567890123456789012345
      678901234567890123456789
  17: 
  18: abcdefghijklmnopqrstuvwxyz
      ^^^^^^^^^^^^^^^^^^^^^^^^^^
  
  3: abcdefghijklmnopqrstuvwxyz
     ^^^^^^^^^^^^^^^^^^^^^^^^^^
  4: 01234567890123456789012345678901234567890123456789012345678901234567890123456
     78901234567890123456789012345678901234567890123456789012345678901234567890123
     4567890123456789012345678901234567890123456789
  5: abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz
  6: 
  7: 
  8: 
  9: 
  10: 
  11: 
  12: 
  13: 
  14: 
  15: 
  16: 0123456789012345678901234567890123456789012345678901234567890123456789012345
      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
      678901234567890123456789
      ^^^^^^^^^^^^^^^^^^^^^^^^
  
  1: 01234567890123456789012345678901234567890123456789012345678901234567890123456
     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
     78901234567890123456789
     ^^^^^^^^^^^^^^^^^^^^^^^
  2: 
  3: abcdefghijklmnopqrstuvwxyz
  4: 01234567890123456789012345678901234567890123456789012345678901234567890123456
     78901234567890123456789012345678901234567890123456789012345678901234567890123
     4567890123456789012345678901234567890123456789
  5: abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz
     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  $ docfd --cache-dir .cache --search-result-print-snippet-min-size 0 long-words.txt --sample "01 ab" --search-result-print-text-width 20
  $TESTCASE_ROOT/long-words.txt
  1: 01234567890123456
     ^^^^^^^^^^^^^^^^^
     78901234567890123
     ^^^^^^^^^^^^^^^^^
     45678901234567890
     ^^^^^^^^^^^^^^^^^
     12345678901234567
     ^^^^^^^^^^^^^^^^^
     89012345678901234
     ^^^^^^^^^^^^^^^^^
     567890123456789
     ^^^^^^^^^^^^^^^
  2: 
  3: abcdefghijklmnopq
     ^^^^^^^^^^^^^^^^^
     rstuvwxyz
     ^^^^^^^^^
  
  16: 0123456789012345
      ^^^^^^^^^^^^^^^^
      6789012345678901
      ^^^^^^^^^^^^^^^^
      2345678901234567
      ^^^^^^^^^^^^^^^^
      8901234567890123
      ^^^^^^^^^^^^^^^^
      4567890123456789
      ^^^^^^^^^^^^^^^^
      0123456789012345
      ^^^^^^^^^^^^^^^^
      6789
      ^^^^
  17: 
  18: abcdefghijklmnop
      ^^^^^^^^^^^^^^^^
      qrstuvwxyz
      ^^^^^^^^^^
  
  1: 01234567890123456
     ^^^^^^^^^^^^^^^^^
     78901234567890123
     ^^^^^^^^^^^^^^^^^
     45678901234567890
     ^^^^^^^^^^^^^^^^^
     12345678901234567
     ^^^^^^^^^^^^^^^^^
     89012345678901234
     ^^^^^^^^^^^^^^^^^
     567890123456789
     ^^^^^^^^^^^^^^^
  2: 
  3: abcdefghijklmnopq
     rstuvwxyz
  4: 01234567890123456
     78901234567890123
     45678901234567890
     12345678901234567
     89012345678901234
     56789012345678901
     23456789012345678
     90123456789012345
     67890123456789012
     34567890123456789
     01234567890123456
     7890123456789
  5: abcdefghijklmnopq
     rstuvwxyzabcdefgh
     ijklmnopqrstuvwxy
     z
  6: 
  7: 
  8: 
  9: 
  10: 
  11: 
  12: 
  13: 
  14: 
  15: 
  16: 0123456789012345
      6789012345678901
      2345678901234567
      8901234567890123
      4567890123456789
      0123456789012345
      6789
  17: 
  18: abcdefghijklmnop
      ^^^^^^^^^^^^^^^^
      qrstuvwxyz
      ^^^^^^^^^^
  
  3: abcdefghijklmnopq
     ^^^^^^^^^^^^^^^^^
     rstuvwxyz
     ^^^^^^^^^
  4: 01234567890123456
     78901234567890123
     45678901234567890
     12345678901234567
     89012345678901234
     56789012345678901
     23456789012345678
     90123456789012345
     67890123456789012
     34567890123456789
     01234567890123456
     7890123456789
  5: abcdefghijklmnopq
     rstuvwxyzabcdefgh
     ijklmnopqrstuvwxy
     z
  6: 
  7: 
  8: 
  9: 
  10: 
  11: 
  12: 
  13: 
  14: 
  15: 
  16: 0123456789012345
      ^^^^^^^^^^^^^^^^
      6789012345678901
      ^^^^^^^^^^^^^^^^
      2345678901234567
      ^^^^^^^^^^^^^^^^
      8901234567890123
      ^^^^^^^^^^^^^^^^
      4567890123456789
      ^^^^^^^^^^^^^^^^
      0123456789012345
      ^^^^^^^^^^^^^^^^
      6789
      ^^^^
  
  1: 01234567890123456
     ^^^^^^^^^^^^^^^^^
     78901234567890123
     ^^^^^^^^^^^^^^^^^
     45678901234567890
     ^^^^^^^^^^^^^^^^^
     12345678901234567
     ^^^^^^^^^^^^^^^^^
     89012345678901234
     ^^^^^^^^^^^^^^^^^
     567890123456789
     ^^^^^^^^^^^^^^^
  2: 
  3: abcdefghijklmnopq
     rstuvwxyz
  4: 01234567890123456
     78901234567890123
     45678901234567890
     12345678901234567
     89012345678901234
     56789012345678901
     23456789012345678
     90123456789012345
     67890123456789012
     34567890123456789
     01234567890123456
     7890123456789
  5: abcdefghijklmnopq
     ^^^^^^^^^^^^^^^^^
     rstuvwxyzabcdefgh
     ^^^^^^^^^^^^^^^^^
     ijklmnopqrstuvwxy
     ^^^^^^^^^^^^^^^^^
     z
     ^
  $ docfd --cache-dir .cache --search-result-print-snippet-min-size 0 words.txt --sample "01 ab" --search-result-print-text-width 5
  $TESTCASE_ROOT/words.txt
  1: 01
     ^^
     23
     ^^
     45
     ^^
  2: 
  3: ab
     ^^
     cd
     ^^
     ef
     ^^
     g
     ^
  
  15: 0
      ^
      1
      ^
      2
      ^
      3
      ^
      4
      ^
      5
      ^
  16: 
  17: a
      ^
      b
      ^
      c
      ^
      d
      ^
      e
      ^
      f
      ^
      g
      ^
  
  1: 01
     ^^
     23
     ^^
     45
     ^^
  2: 
  3: ab
     cd
     ef
     g
  4: 
  5: 
  6: 
  7: 
  8: 
  9: 
  10: 
  11: 
  12: 
  13: 
  14: 
  15: 0
      1
      2
      3
      4
      5
  16: 
  17: a
      ^
      b
      ^
      c
      ^
      d
      ^
      e
      ^
      f
      ^
      g
      ^
  
  3: ab
     ^^
     cd
     ^^
     ef
     ^^
     g
     ^
  4: 
  5: 
  6: 
  7: 
  8: 
  9: 
  10: 
  11: 
  12: 
  13: 
  14: 
  15: 0
      ^
      1
      ^
      2
      ^
      3
      ^
      4
      ^
      5
      ^
  $ docfd --cache-dir .cache --search-result-print-snippet-min-size 0 words.txt --sample "01 ab" --search-result-print-text-width 1
  $TESTCASE_ROOT/words.txt
  1: 0
     ^
     1
     ^
     2
     ^
     3
     ^
     4
     ^
     5
     ^
  2: 
  3: a
     ^
     b
     ^
     c
     ^
     d
     ^
     e
     ^
     f
     ^
     g
     ^
  
  15: 0
      ^
      1
      ^
      2
      ^
      3
      ^
      4
      ^
      5
      ^
  16: 
  17: a
      ^
      b
      ^
      c
      ^
      d
      ^
      e
      ^
      f
      ^
      g
      ^
  
  1: 0
     ^
     1
     ^
     2
     ^
     3
     ^
     4
     ^
     5
     ^
  2: 
  3: a
     b
     c
     d
     e
     f
     g
  4: 
  5: 
  6: 
  7: 
  8: 
  9: 
  10: 
  11: 
  12: 
  13: 
  14: 
  15: 0
      1
      2
      3
      4
      5
  16: 
  17: a
      ^
      b
      ^
      c
      ^
      d
      ^
      e
      ^
      f
      ^
      g
      ^
  
  3: a
     ^
     b
     ^
     c
     ^
     d
     ^
     e
     ^
     f
     ^
     g
     ^
  4: 
  5: 
  6: 
  7: 
  8: 
  9: 
  10: 
  11: 
  12: 
  13: 
  14: 
  15: 0
      ^
      1
      ^
      2
      ^
      3
      ^
      4
      ^
      5
      ^

Line wrapping and word breaking:
  $ docfd --cache-dir .cache --search-result-print-snippet-min-size 0 sentences.txt --sample "lorem" --search-result-print-text-width 80
  $TESTCASE_ROOT/sentences.txt
  1:     Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod 
         ^^^^^
     tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, 
     quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo 
     consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse 
     cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
      proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
  $ docfd --cache-dir .cache --search-result-print-snippet-min-size 0 sentences.txt --sample "lorem" --search-result-print-text-width 20
  $TESTCASE_ROOT/sentences.txt
  1:     Lorem ipsum 
         ^^^^^
     dolor sit amet, 
     consectetur 
     adipiscing elit, 
     sed do eiusmod 
     tempor incididunt
      ut labore et 
     dolore magna 
     aliqua. Ut enim 
     ad minim veniam, 
     quis nostrud 
     exercitation 
     ullamco laboris 
     nisi ut aliquip 
     ex ea commodo 
     consequat. Duis 
     aute irure dolor 
     in reprehenderit 
     in voluptate 
     velit esse cillum
      dolore eu fugiat
      nulla pariatur. 
     Excepteur sint 
     occaecat 
     cupidatat non 
     proident, sunt in
      culpa qui 
     officia deserunt 
     mollit anim id 
     est laborum.
  $ docfd --cache-dir .cache --search-result-print-snippet-min-size 0 sentences.txt --sample "lorem" --search-result-print-text-width 10
  $TESTCASE_ROOT/sentences.txt
  1:     
     Lorem 
     ^^^^^
     ipsum 
     dolor 
     sit 
     amet, 
     consect
     etur
      
     adipisc
     ing
      elit, 
     sed do 
     eiusmod
      tempor
      
     incidid
     unt
      ut 
     labore 
     et 
     dolore 
     magna 
     aliqua.
      Ut 
     enim ad
      minim 
     veniam,
      quis 
     nostrud
      
     exercit
     ation
      
     ullamco
      
     laboris
      nisi 
     ut 
     aliquip
      ex ea 
     commodo
      
     consequ
     at
     . Duis 
     aute 
     irure 
     dolor 
     in 
     reprehe
     nderit
      in 
     volupta
     te
      velit 
     esse 
     cillum 
     dolore 
     eu 
     fugiat 
     nulla 
     pariatu
     r
     . 
     Excepte
     ur
      sint 
     occaeca
     t
      
     cupidat
     at
      non 
     proiden
     t
     , sunt 
     in 
     culpa 
     qui 
     officia
      
     deserun
     t
      mollit
      anim 
     id est 
     laborum
     .
  $ docfd --cache-dir .cache --search-result-print-snippet-min-size 0 sentences.txt --sample "laborum 0" --search-result-print-text-width 80
  $TESTCASE_ROOT/sentences.txt
  1:     Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod 
     tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, 
     quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo 
     consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse 
     cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
      proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
                                                                      ^^^^^^^
  2: 0 12 abcd efghi jkl mnopqrst uvwx yz 0123456 789012345
     ^
  
  1:     Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod 
     tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, 
     quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo 
     consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse 
     cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
      proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
                                                                      ^^^^^^^
  2: 0 12 abcd efghi jkl mnopqrst uvwx yz 0123456 789012345
                                          ^^^^^^^
  
  1:     Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod 
     tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, 
     quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo 
     consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse 
     cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
      proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
                                                                      ^^^^^^^
  2: 0 12 abcd efghi jkl mnopqrst uvwx yz 0123456 789012345
                                                  ^^^^^^^^^
  $ docfd --cache-dir .cache --search-result-print-snippet-min-size 0 sentences.txt --sample "laborum 0" --search-result-print-text-width 20
  $TESTCASE_ROOT/sentences.txt
  1:     Lorem ipsum 
     dolor sit amet, 
     consectetur 
     adipiscing elit, 
     sed do eiusmod 
     tempor incididunt
      ut labore et 
     dolore magna 
     aliqua. Ut enim 
     ad minim veniam, 
     quis nostrud 
     exercitation 
     ullamco laboris 
     nisi ut aliquip 
     ex ea commodo 
     consequat. Duis 
     aute irure dolor 
     in reprehenderit 
     in voluptate 
     velit esse cillum
      dolore eu fugiat
      nulla pariatur. 
     Excepteur sint 
     occaecat 
     cupidatat non 
     proident, sunt in
      culpa qui 
     officia deserunt 
     mollit anim id 
     est laborum.
         ^^^^^^^
  2: 0 12 abcd efghi 
     ^
     jkl mnopqrst uvwx
      yz 0123456 
     789012345
  
  1:     Lorem ipsum 
     dolor sit amet, 
     consectetur 
     adipiscing elit, 
     sed do eiusmod 
     tempor incididunt
      ut labore et 
     dolore magna 
     aliqua. Ut enim 
     ad minim veniam, 
     quis nostrud 
     exercitation 
     ullamco laboris 
     nisi ut aliquip 
     ex ea commodo 
     consequat. Duis 
     aute irure dolor 
     in reprehenderit 
     in voluptate 
     velit esse cillum
      dolore eu fugiat
      nulla pariatur. 
     Excepteur sint 
     occaecat 
     cupidatat non 
     proident, sunt in
      culpa qui 
     officia deserunt 
     mollit anim id 
     est laborum.
         ^^^^^^^
  2: 0 12 abcd efghi 
     jkl mnopqrst uvwx
      yz 0123456 
         ^^^^^^^
     789012345
  
  1:     Lorem ipsum 
     dolor sit amet, 
     consectetur 
     adipiscing elit, 
     sed do eiusmod 
     tempor incididunt
      ut labore et 
     dolore magna 
     aliqua. Ut enim 
     ad minim veniam, 
     quis nostrud 
     exercitation 
     ullamco laboris 
     nisi ut aliquip 
     ex ea commodo 
     consequat. Duis 
     aute irure dolor 
     in reprehenderit 
     in voluptate 
     velit esse cillum
      dolore eu fugiat
      nulla pariatur. 
     Excepteur sint 
     occaecat 
     cupidatat non 
     proident, sunt in
      culpa qui 
     officia deserunt 
     mollit anim id 
     est laborum.
         ^^^^^^^
  2: 0 12 abcd efghi 
     jkl mnopqrst uvwx
      yz 0123456 
     789012345
     ^^^^^^^^^
  $ docfd --cache-dir .cache --search-result-print-snippet-min-size 0 sentences.txt --sample "laborum 0" --search-result-print-text-width 10
  $TESTCASE_ROOT/sentences.txt
  1:     
     Lorem 
     ipsum 
     dolor 
     sit 
     amet, 
     consect
     etur
      
     adipisc
     ing
      elit, 
     sed do 
     eiusmod
      tempor
      
     incidid
     unt
      ut 
     labore 
     et 
     dolore 
     magna 
     aliqua.
      Ut 
     enim ad
      minim 
     veniam,
      quis 
     nostrud
      
     exercit
     ation
      
     ullamco
      
     laboris
      nisi 
     ut 
     aliquip
      ex ea 
     commodo
      
     consequ
     at
     . Duis 
     aute 
     irure 
     dolor 
     in 
     reprehe
     nderit
      in 
     volupta
     te
      velit 
     esse 
     cillum 
     dolore 
     eu 
     fugiat 
     nulla 
     pariatu
     r
     . 
     Excepte
     ur
      sint 
     occaeca
     t
      
     cupidat
     at
      non 
     proiden
     t
     , sunt 
     in 
     culpa 
     qui 
     officia
      
     deserun
     t
      mollit
      anim 
     id est 
     laborum
     ^^^^^^^
     .
  2: 0 12 
     ^
     abcd 
     efghi 
     jkl 
     mnopqrs
     t
      uvwx 
     yz 
     0123456
      
     7890123
     45
  
  1:     
     Lorem 
     ipsum 
     dolor 
     sit 
     amet, 
     consect
     etur
      
     adipisc
     ing
      elit, 
     sed do 
     eiusmod
      tempor
      
     incidid
     unt
      ut 
     labore 
     et 
     dolore 
     magna 
     aliqua.
      Ut 
     enim ad
      minim 
     veniam,
      quis 
     nostrud
      
     exercit
     ation
      
     ullamco
      
     laboris
      nisi 
     ut 
     aliquip
      ex ea 
     commodo
      
     consequ
     at
     . Duis 
     aute 
     irure 
     dolor 
     in 
     reprehe
     nderit
      in 
     volupta
     te
      velit 
     esse 
     cillum 
     dolore 
     eu 
     fugiat 
     nulla 
     pariatu
     r
     . 
     Excepte
     ur
      sint 
     occaeca
     t
      
     cupidat
     at
      non 
     proiden
     t
     , sunt 
     in 
     culpa 
     qui 
     officia
      
     deserun
     t
      mollit
      anim 
     id est 
     laborum
     ^^^^^^^
     .
  2: 0 12 
     abcd 
     efghi 
     jkl 
     mnopqrs
     t
      uvwx 
     yz 
     0123456
     ^^^^^^^
      
     7890123
     45
  
  1:     
     Lorem 
     ipsum 
     dolor 
     sit 
     amet, 
     consect
     etur
      
     adipisc
     ing
      elit, 
     sed do 
     eiusmod
      tempor
      
     incidid
     unt
      ut 
     labore 
     et 
     dolore 
     magna 
     aliqua.
      Ut 
     enim ad
      minim 
     veniam,
      quis 
     nostrud
      
     exercit
     ation
      
     ullamco
      
     laboris
      nisi 
     ut 
     aliquip
      ex ea 
     commodo
      
     consequ
     at
     . Duis 
     aute 
     irure 
     dolor 
     in 
     reprehe
     nderit
      in 
     volupta
     te
      velit 
     esse 
     cillum 
     dolore 
     eu 
     fugiat 
     nulla 
     pariatu
     r
     . 
     Excepte
     ur
      sint 
     occaeca
     t
      
     cupidat
     at
      non 
     proiden
     t
     , sunt 
     in 
     culpa 
     qui 
     officia
      
     deserun
     t
      mollit
      anim 
     id est 
     laborum
     ^^^^^^^
     .
  2: 0 12 
     abcd 
     efghi 
     jkl 
     mnopqrs
     t
      uvwx 
     yz 
     0123456
      
     7890123
     ^^^^^^^
     45
     ^^


================================================
FILE: line-wrapping-tests.t/sentences.txt
================================================
    Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
0 12 abcd efghi jkl mnopqrst uvwx yz 0123456 789012345


================================================
FILE: line-wrapping-tests.t/words.txt
================================================
012345

abcdefg


012345

abcdefg


================================================
FILE: match-type-tests.t/dune
================================================
(cram
  (deps ../bin/docfd.exe))


================================================
FILE: match-type-tests.t/run.t
================================================
Exact match:
  $ docfd --cache-dir .cache test.txt --sample "'abc"
  [1]
  $ docfd --cache-dir .cache test.txt --sample "'abcd"
  $TESTCASE_ROOT/test.txt
  1: abcd
     ^^^^
  2: abcdef
  3: ABCD
  
  1: abcd
  2: abcdef
  3: ABCD
     ^^^^
  4: ABCDEF
  5: ABcd
  
  3: ABCD
  4: ABCDEF
  5: ABcd
     ^^^^
  6: ABcdEF
  7: 
  
  6: ABcdEF
  7: 
  8: 'abcd
      ^^^^
  9: 'abcd'
  10: ^efgh
  
  7: 
  8: 'abcd
  9: 'abcd'
      ^^^^
  10: ^efgh
  11: ^^efgh
  $ docfd --cache-dir .cache test.txt --sample "\\'abcd"
  $TESTCASE_ROOT/test.txt
  6: ABcdEF
  7: 
  8: 'abcd
     ^^^^^
  9: 'abcd'
  10: ^efgh
  
  7: 
  8: 'abcd
  9: 'abcd'
     ^^^^^
  10: ^efgh
  11: ^^efgh
  
  6: ABcdEF
  7: 
  8: 'abcd
      ^^^^
  9: 'abcd'
     ^
  10: ^efgh
  11: ^^efgh
  
  7: 
  8: 'abcd
  9: 'abcd'
      ^^^^^
  10: ^efgh
  11: ^^efgh
  
  4: ABCDEF
  5: ABcd
  6: ABcdEF
     ^^^^^^
  7: 
  8: 'abcd
     ^
  9: 'abcd'
  10: ^efgh
  $ docfd --cache-dir .cache test.txt --sample "'abcdef"
  $TESTCASE_ROOT/test.txt
  1: abcd
  2: abcdef
     ^^^^^^
  3: ABCD
  4: ABCDEF
  
  2: abcdef
  3: ABCD
  4: ABCDEF
     ^^^^^^
  5: ABcd
  6: ABcdEF
  
  4: ABCDEF
  5: ABcd
  6: ABcdEF
     ^^^^^^
  7: 
  8: 'abcd
  $ docfd --cache-dir .cache test.txt --sample "''abcd"
  $TESTCASE_ROOT/test.txt
  6: ABcdEF
  7: 
  8: 'abcd
     ^^^^^
  9: 'abcd'
  10: ^efgh
  
  7: 
  8: 'abcd
  9: 'abcd'
     ^^^^^
  10: ^efgh
  11: ^^efgh

Exact match smart case sensitivity:
  $ docfd --cache-dir .cache test.txt --sample "'ABCD"
  $TESTCASE_ROOT/test.txt
  1: abcd
  2: abcdef
  3: ABCD
     ^^^^
  4: ABCDEF
  5: ABcd
  $ docfd --cache-dir .cache test.txt --sample "'ABcd"
  $TESTCASE_ROOT/test.txt
  3: ABCD
  4: ABCDEF
  5: ABcd
     ^^^^
  6: ABcdEF
  7: 

Prefix match:
  $ docfd --cache-dir .cache test.txt --sample "^bcd"
  [1]
  $ docfd --cache-dir .cache test.txt --sample "^abcd"
  $TESTCASE_ROOT/test.txt
  1: abcd
     ^^^^
  2: abcdef
  3: ABCD
  
  1: abcd
  2: abcdef
  3: ABCD
     ^^^^
  4: ABCDEF
  5: ABcd
  
  3: ABCD
  4: ABCDEF
  5: ABcd
     ^^^^
  6: ABcdEF
  7: 
  
  6: ABcdEF
  7: 
  8: 'abcd
      ^^^^
  9: 'abcd'
  10: ^efgh
  
  7: 
  8: 'abcd
  9: 'abcd'
      ^^^^
  10: ^efgh
  11: ^^efgh
  $ docfd --cache-dir .cache test.txt --sample "^abcdef"
  $TESTCASE_ROOT/test.txt
  1: abcd
  2: abcdef
     ^^^^^^
  3: ABCD
  4: ABCDEF
  
  2: abcdef
  3: ABCD
  4: ABCDEF
     ^^^^^^
  5: ABcd
  6: ABcdEF
  
  4: ABCDEF
  5: ABcd
  6: ABcdEF
     ^^^^^^
  7: 
  8: 'abcd
  $ docfd --cache-dir .cache test.txt --sample "^'abcd"
  $TESTCASE_ROOT/test.txt
  6: ABcdEF
  7: 
  8: 'abcd
     ^^^^^
  9: 'abcd'
  10: ^efgh
  
  7: 
  8: 'abcd
  9: 'abcd'
     ^^^^^
  10: ^efgh
  11: ^^efgh

Prefix match smart case sensitivity:
  $ docfd --cache-dir .cache test.txt --sample "^ABCD"
  $TESTCASE_ROOT/test.txt
  1: abcd
  2: abcdef
  3: ABCD
     ^^^^
  4: ABCDEF
  5: ABcd
  
  2: abcdef
  3: ABCD
  4: ABCDEF
     ^^^^^^
  5: ABcd
  6: ABcdEF
  $ docfd --cache-dir .cache test.txt --sample "^ABcd"
  $TESTCASE_ROOT/test.txt
  3: ABCD
  4: ABCDEF
  5: ABcd
     ^^^^
  6: ABcdEF
  7: 
  
  4: ABCDEF
  5: ABcd
  6: ABcdEF
     ^^^^^^
  7: 
  8: 'abcd

Suffix match:
  $ docfd --cache-dir .cache test.txt --sample 'bcd$'
  $TESTCASE_ROOT/test.txt
  1: abcd
     ^^^^
  2: abcdef
  3: ABCD
  
  1: abcd
  2: abcdef
  3: ABCD
     ^^^^
  4: ABCDEF
  5: ABcd
  
  3: ABCD
  4: ABCDEF
  5: ABcd
     ^^^^
  6: ABcdEF
  7: 
  
  6: ABcdEF
  7: 
  8: 'abcd
      ^^^^
  9: 'abcd'
  10: ^efgh
  
  7: 
  8: 'abcd
  9: 'abcd'
      ^^^^
  10: ^efgh
  11: ^^efgh
  $ docfd --cache-dir .cache test.txt --sample 'abcd$$'
  $TESTCASE_ROOT/test.txt
  13: efgh$$
  14: 
  15: abcd$
      ^^^^^
  16: efgh$
  17: 
  $ docfd --cache-dir .cache test.txt --sample 'ef$'
  $TESTCASE_ROOT/test.txt
  1: abcd
  2: abcdef
     ^^^^^^
  3: ABCD
  4: ABCDEF
  
  2: abcdef
  3: ABCD
  4: ABCDEF
     ^^^^^^
  5: ABcd
  6: ABcdEF
  
  4: ABCDEF
  5: ABcd
  6: ABcdEF
     ^^^^^^
  7: 
  8: 'abcd

Suffix match smart case sensitivity:
  $ docfd --cache-dir .cache test.txt --sample 'ABCD$'
  $TESTCASE_ROOT/test.txt
  1: abcd
  2: abcdef
  3: ABCD
     ^^^^
  4: ABCDEF
  5: ABcd
  $ docfd --cache-dir .cache test.txt --sample 'EF$'
  $TESTCASE_ROOT/test.txt
  2: abcdef
  3: ABCD
  4: ABCDEF
     ^^^^^^
  5: ABcd
  6: ABcdEF
  
  4: ABCDEF
  5: ABcd
  6: ABcdEF
     ^^^^^^
  7: 
  8: 'abcd

Fuzzy match explicit spaces:
  $ docfd --cache-dir .cache test.txt --sample 'hel~word'
  $TESTCASE_ROOT/test.txt
  16: efgh$
  17: 
  18: hello world
      ^^^^^^^^^^^
  19: hello   world
  20: 
  
  17: 
  18: hello world
  19: hello   world
      ^^^^^^^^^^^^^
  20: 
  21: Hello world
  
  19: hello   world
  20: 
  21: Hello world
      ^^^^^^^^^^^
  22: 
  23: HELLO WORLD
  
  21: Hello world
  22: 
  23: HELLO WORLD
      ^^^^^^^^^^^
  
  16: efgh$
  17: 
  18: hello world
           ^^^^^^
  19: hello   world
      ^^^^^
  20: 
  21: Hello world

Exact match explicit spaces:
  $ docfd --cache-dir .cache test.txt --sample "'hello~world"
  $TESTCASE_ROOT/test.txt
  16: efgh$
  17: 
  18: hello world
      ^^^^^^^^^^^
  19: hello   world
  20: 
  
  17: 
  18: hello world
  19: hello   world
      ^^^^^^^^^^^^^
  20: 
  21: Hello world
  
  19: hello   world
  20: 
  21: Hello world
      ^^^^^^^^^^^
  22: 
  23: HELLO WORLD
  
  21: Hello world
  22: 
  23: HELLO WORLD
      ^^^^^^^^^^^
  $ docfd --cache-dir .cache test.txt --sample "'Hello~world"
  $TESTCASE_ROOT/test.txt
  19: hello   world
  20: 
  21: Hello world
      ^^^^^^^^^^^
  22: 
  23: HELLO WORLD
  $ docfd --cache-dir .cache test.txt --sample "'Hello~World"
  [1]

Prefix match explicit spaces:
  $ docfd --cache-dir .cache test.txt --sample '^hello~wo'
  $TESTCASE_ROOT/test.txt
  16: efgh$
  17: 
  18: hello world
      ^^^^^^^^^^^
  19: hello   world
  20: 
  
  17: 
  18: hello world
  19: hello   world
      ^^^^^^^^^^^^^
  20: 
  21: Hello world
  
  19: hello   world
  20: 
  21: Hello world
      ^^^^^^^^^^^
  22: 
  23: HELLO WORLD
  
  21: Hello world
  22: 
  23: HELLO WORLD
      ^^^^^^^^^^^
  $ docfd --cache-dir .cache test.txt --sample '^ello~wo'
  [1]

Suffix match explicit spaces:
  $ docfd --cache-dir .cache test.txt --sample 'lo~world$'
  $TESTCASE_ROOT/test.txt
  16: efgh$
  17: 
  18: hello world
      ^^^^^^^^^^^
  19: hello   world
  20: 
  
  17: 
  18: hello world
  19: hello   world
      ^^^^^^^^^^^^^
  20: 
  21: Hello world
  
  19: hello   world
  20: 
  21: Hello world
      ^^^^^^^^^^^
  22: 
  23: HELLO WORLD
  
  21: Hello world
  22: 
  23: HELLO WORLD
      ^^^^^^^^^^^
  $ docfd --cache-dir .cache test.txt --sample 'lo~worl$'
  [1]


================================================
FILE: match-type-tests.t/test.txt
================================================
abcd
abcdef
ABCD
ABCDEF
ABcd
ABcdEF

'abcd
'abcd'
^efgh
^^efgh
efgh$
efgh$$

abcd$
efgh$

hello world
hello   world

Hello world

HELLO WORLD


================================================
FILE: misc-behavior-tests.t/abcd.txt
================================================
abcd


================================================
FILE: misc-behavior-tests.t/dune
================================================
(cram
  (deps ../bin/docfd.exe))


================================================
FILE: misc-behavior-tests.t/run.t
================================================
Stdin temp file cleanup:
  $ echo "abcd" | docfd --cache-dir .cache --search "a" | tail -n +2
  1: abcd
     ^^^^
  $ ls /tmp/docfd-*
  ls: cannot access '/tmp/docfd-*': No such file or directory
  [2]

Stdin and path both specified:
  $ echo "0123" | docfd --cache-dir .cache abcd.txt --search "01" # Should not print anything since stdin should be ignored.
  [1]
  $ echo "0123" | docfd --cache-dir .cache abcd.txt --search "ab"
  $TESTCASE_ROOT/abcd.txt
  1: abcd
     ^^^^

--underline:
  $ docfd --cache-dir .cache --underline never abcd.txt --search "ab|cd"
  $TESTCASE_ROOT/abcd.txt
  1: abcd
  
  1: abcd
  $ docfd --cache-dir .cache --underline always abcd.txt --search "ab|cd"
  $TESTCASE_ROOT/abcd.txt
  1: abcd
     ^^^^
  
  1: abcd
     ^^^^
  $ docfd --cache-dir .cache --underline auto abcd.txt --search "ab|cd"
  $TESTCASE_ROOT/abcd.txt
  1: abcd
     ^^^^
  
  1: abcd
     ^^^^

--color:
  $ docfd --cache-dir .cache --color never abcd.txt --search "ab|cd"
  $TESTCASE_ROOT/abcd.txt
  1: abcd
     ^^^^
  
  1: abcd
     ^^^^
  $ # The output below is messed up after passing through Dune, I do not know why.
  $ docfd --cache-dir .cache --color always abcd.txt --search "ab|cd"
  $TESTCASE_ROOT/abcd.txt1: abcd   ^^^^
  1: abcd   ^^^^
  $ docfd --cache-dir .cache --color auto abcd.txt --search "ab|cd"
  $TESTCASE_ROOT/abcd.txt
  1: abcd
     ^^^^
  
  1: abcd
     ^^^^


================================================
FILE: non-interactive-mode-return-code-tests.t/dune
================================================
(cram
  (deps ../bin/docfd.exe))


================================================
FILE: non-interactive-mode-return-code-tests.t/run.t
================================================
Setup:
  $ echo "0123 abcd" >> test0.txt
  $ echo "0123 efgh" >> test1.txt

--sample, text all files:
  $ docfd --sample "'0123" .
  $TESTCASE_ROOT/test1.txt
  1: 0123 efgh
     ^^^^
  
  $TESTCASE_ROOT/test0.txt
  1: 0123 abcd
     ^^^^
  $ docfd --sample "'0123" . -l
  $TESTCASE_ROOT/test1.txt
  $TESTCASE_ROOT/test0.txt
  $ docfd --sample "'0123" . --files-without-match
  [1]

--sample, text in only one file:
  $ docfd --sample "'abcd" .
  $TESTCASE_ROOT/test0.txt
  1: 0123 abcd
          ^^^^
  $ docfd --sample "'abcd" . -l
  $TESTCASE_ROOT/test0.txt
  $ docfd --sample "'abcd" . --files-without-match
  $TESTCASE_ROOT/test1.txt

--sample, text not in any file:
  $ docfd --sample "'hello" .
  [1]
  $ docfd --sample "'hello" . -l
  [1]
  $ docfd --sample "'hello" . --files-without-match
  $TESTCASE_ROOT/test0.txt
  $TESTCASE_ROOT/test1.txt

--search, text all files:
  $ docfd --search "'0123" .
  $TESTCASE_ROOT/test1.txt
  1: 0123 efgh
     ^^^^
  
  $TESTCASE_ROOT/test0.txt
  1: 0123 abcd
     ^^^^
  $ docfd --search "'0123" . -l
  $TESTCASE_ROOT/test1.txt
  $TESTCASE_ROOT/test0.txt
  $ docfd --search "'0123" . --files-without-match
  [1]

--search, text in only one file:
  $ docfd --search "'abcd" .
  $TESTCASE_ROOT/test0.txt
  1: 0123 abcd
          ^^^^
  $ docfd --search "'abcd" . -l
  $TESTCASE_ROOT/test0.txt
  $ docfd --search "'abcd" . --files-without-match
  $TESTCASE_ROOT/test1.txt

--search, text not in any file:
  $ docfd --search "'hello" .
  [1]
  $ docfd --search "'hello" . -l
  [1]
  $ docfd --search "'hello" . --files-without-match
  $TESTCASE_ROOT/test0.txt
  $TESTCASE_ROOT/test1.txt


================================================
FILE: open-with-tests.t/dune
================================================
(cram
  (deps ../bin/docfd.exe))


================================================
FILE: open-with-tests.t/run.t
================================================
Error case tests:
  $ docfd --index-only --open-with pdf:term='okular {path}'
  error: failed to parse pdf:term=okular {path}, invalid launch mode
  [1]
  $ docfd --index-only --open-with pdf='okular {path}'
  error: failed to parse pdf=okular {path}, expected char :
  [1]
  $ docfd --index-only --open-with pdfterminal='okular {path}'
  error: failed to parse pdfterminal=okular {path}, expected char :
  [1]
  $ docfd --index-only --open-with pdf:terminal='okular path}'
  Initializing in-memory index
  $ docfd --index-only --open-with pdf:terminal='okular {path'
  error: failed to parse pdf:terminal=okular {path, expected char }
  [1]
  $ docfd --index-only --open-with pdf:terminal='okular {abc}'
  error: failed to parse pdf:terminal=okular {abc}, invalid placeholder
  [1]

PDF parsing test, terminal launch mode:
  $ docfd --index-only --open-with pdf:terminal='okular {path}'
  Initializing in-memory index
  $ docfd --index-only --open-with pdf:terminal='okular {page_num}'
  Initializing in-memory index
  $ docfd --index-only --open-with pdf:terminal='okular {line_num}'
  error: failed to parse pdf:terminal=okular {line_num}, line_num not available
  [1]
  $ docfd --index-only --open-with pdf:terminal='okular {search_word}'
  Initializing in-memory index

PDF parsing test, detached launch mode:
  $ docfd --index-only --open-with pdf:detached='okular {path}'
  Initializing in-memory index
  $ docfd --index-only --open-with pdf:detached='okular {page_num}'
  Initializing in-memory index
  $ docfd --index-only --open-with pdf:detached='okular {line_num}'
  error: failed to parse pdf:detached=okular {line_num}, line_num not available
  [1]
  $ docfd --index-only --open-with pdf:detached='okular {search_word}'
  Initializing in-memory index

Pandoc supported extensions parsing test, terminal launch mode:
  $ docfd --index-only --open-with odt:terminal='xdg-open {path}'
  Initializing in-memory index
  $ docfd --index-only --open-with odt:terminal='xdg-open {page_num}'
  error: failed to parse odt:terminal=xdg-open {page_num}, page_num not available
  [1]
  $ docfd --index-only --open-with odt:terminal='xdg-open {line_num}'
  Initializing in-memory index
  $ docfd --index-only --open-with odt:terminal='xdg-open {search_word}'
  error: failed to parse odt:terminal=xdg-open {search_word}, search_word not available
  [1]

Pandoc supported extensions parsing test, detached launch mode:
  $ docfd --index-only --open-with odt:detached='xdg-open {path}'
  Initializing in-memory index
  $ docfd --index-only --open-with odt:detached='xdg-open {page_num}'
  error: failed to parse odt:detached=xdg-open {page_num}, page_num not available
  [1]
  $ docfd --index-only --open-with odt:detached='xdg-open {line_num}'
  Initializing in-memory index
  $ docfd --index-only --open-with odt:detached='xdg-open {search_word}'
  error: failed to parse odt:detached=xdg-open {search_word}, search_word not available
  [1]

Text parsing test, terminal launch mode:
  $ docfd --index-only --open-with txt:terminal='nano {path}'
  Initializing in-memory index
  $ docfd --index-only --open-with txt:terminal='nano {page_num}'
  error: failed to parse txt:terminal=nano {page_num}, page_num not available
  [1]
  $ docfd --index-only --open-with txt:terminal='nano {line_num}'
  Initializing in-memory index
  $ docfd --index-only --open-with txt:terminal='nano {search_word}'
  error: failed to parse txt:terminal=nano {search_word}, search_word not available
  [1]

Text parsing test, detached launch mode:
  $ docfd --index-only --open-with txt:detached='nano {path}'
  Initializing in-memory index
  $ docfd --index-only --open-with txt:detached='nano {page_num}'
  error: failed to parse txt:detached=nano {page_num}, page_num not available
  [1]
  $ docfd --index-only --open-with txt:detached='nano {line_num}'
  Initializing in-memory index
  $ docfd --index-only --open-with txt:detached='nano {search_word}'
  error: failed to parse txt:detached=nano {search_word}, search_word not available
  [1]

Unrecognized extensions parsing test, terminal launch mode:
  $ docfd --index-only --open-with abc:terminal='nano {path}'
  Initializing in-memory index
  $ docfd --index-only --open-with abc:terminal='nano {page_num}'
  error: failed to parse abc:terminal=nano {page_num}, page_num not available
  [1]
  $ docfd --index-only --open-with abc:terminal='nano {line_num}'
  Initializing in-memory index
  $ docfd --index-only --open-with abc:terminal='nano {search_word}'
  error: failed to parse abc:terminal=nano {search_word}, search_word not available
  [1]

Unrecognized extensions parsing test, detached launch mode:
  $ docfd --index-only --open-with abc:detached='nano {path}'
  Initializing in-memory index
  $ docfd --index-only --open-with abc:detached='nano {page_num}'
  error: failed to parse abc:detached=nano {page_num}, page_num not available
  [1]
  $ docfd --index-only --open-with abc:detached='nano {line_num}'
  Initializing in-memory index
  $ docfd --index-only --open-with abc:detached='nano {search_word}'
  error: failed to parse abc:detached=nano {search_word}, search_word not available
  [1]


================================================
FILE: printing-tests.t/empty.txt
================================================


================================================
FILE: printing-tests.t/run.t
================================================
--sample:
  $ docfd --cache-dir .cache --sample abcd .
  $TESTCASE_ROOT/test3.txt
  1: abcd
     ^^^^
  
  $TESTCASE_ROOT/test2.txt
  1: hello
  2: 
  3: abcd
     ^^^^
  4: 
  5: abcdefgh
  
  5: abcdefgh
  6: 
  7: hello world abcd
                 ^^^^
  
  3: abcd
  4: 
  5: abcdefgh
     ^^^^^^^^
  6: 
  7: hello world abcd

--search:
  $ docfd --cache-dir .cache --search abcd .
  $TESTCASE_ROOT/test3.txt
  1: abcd
     ^^^^
  
  $TESTCASE_ROOT/test2.txt
  1: hello
  2: 
  3: abcd
     ^^^^
  4: 
  5: abcdefgh
  
  5: abcdefgh
  6: 
  7: hello world abcd
                 ^^^^
  
  3: abcd
  4: 
  5: abcdefgh
     ^^^^^^^^
  6: 
  7: hello world abcd

-l/--files-with-match:
  $ docfd --cache-dir .cache --sample abcd . -l
  $TESTCASE_ROOT/test3.txt
  $TESTCASE_ROOT/test2.txt
  $ docfd --cache-dir .cache --sample abcd . --files-with-match
  $TESTCASE_ROOT/test3.txt
  $TESTCASE_ROOT/test2.txt

--files-without-match:
  $ docfd --cache-dir .cache --sample abcd . --files-without-match
  $TESTCASE_ROOT/empty.txt
  $TESTCASE_ROOT/test0.txt
  $TESTCASE_ROOT/test1.txt
  $TESTCASE_ROOT/test4.txt


================================================
FILE: printing-tests.t/test0.txt
================================================
hello


================================================
FILE: printing-tests.t/test1.txt
================================================
hello


================================================
FILE: printing-tests.t/test2.txt
================================================
hello

abcd

abcdefgh

hello world abcd


================================================
FILE: printing-tests.t/test3.txt
================================================
abcd


================================================
FILE: printing-tests.t/test4.txt
================================================
efgh


================================================
FILE: profiling/dune
================================================
(rule
  (targets string_set.ml)
  (deps ../lib/string_set.ml)
  (action (copy# %{deps} %{targets}))
  )

(rule
  (targets misc_utils.ml)
  (deps ../lib/misc_utils.ml)
  (action (copy# %{deps} %{targets}))
  )

(executable
 (flags     (-w "+a-4-9-29-37-40-42-44-48-50-32-30-70@8"))
 (name main)
 (libraries docfd_lib
            containers
            cmdliner
            fmt
            notty
            notty.unix
            nottui
            lwd
            oseq
            eio
            eio_main
            digestif.ocaml
            digestif
 )
)


================================================
FILE: profiling/main.ml
================================================
open Docfd_lib

let lines = [
  "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Integer placerat lacus non cursus";
  "tincidunt. Suspendisse viverra leo ac quam tincidunt, quis euismod neque tempus. ";
  "Vestibulum rutrum commodo tristique. Curabitur tristique dapibus dolor, vitae porttitor";
  "est tristique quis. Sed urna ex, gravida vitae ipsum vel, fermentum viverra risus. ";
  "Maecenas et nulla iaculis, bibendum libero vitae, varius est. Fusce eros enim, placerat ";
  "quis magna eu, rutrum vulputate ante. Praesent non mi vel ipsum finibus lobortis. ";
  "Duis posuere auctor hendrerit. Nunc sodales egestas vestibulum. Quisque suscipit maximus ";
  "aliquam. Pellentesque tempor mi condimentum bibendum bibendum. Donec vitae accumsan quam, ";
  "nec vulputate lectus. Nulla ligula ipsum, dictum vel augue at, semper vestibulum ex. ";
  "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed arcu ligula, cursus nec ";
  "lacinia ut, lobortis in libero.";
  "";
  "Sed ultricies placerat urna, hendrerit ornare elit semper sit amet. Praesent ";
  "pretium blandit velit, eu imperdiet lectus tincidunt ut. Suspendisse eget eros ";
  "tellus. Nulla tristique vel libero non dapibus. Ut scelerisque sem sit amet ";
  "odio mattis vestibulum. Nam vitae commodo mi. Vestibulum consequat orci at tellus porta ";
  "placerat.";
  "";
  "In vel mi vestibulum felis accumsan congue eget efficitur tortor. Integer ";
  "quam purus, malesuada vel nisl at, posuere vestibulum augue. Curabitur velit ";
  "tortor, vestibulum id placerat eu, convallis at velit. Ut a lectus ";
  "quis erat tincidunt aliquet. Etiam ut erat magna. Maecenas quis commodo leo, ";
  "eleifend elementum ante. Nullam dapibus erat augue, a bibendum quam volutpat id. ";
  "Morbi in ullamcorper arcu. Fusce venenatis lacus purus, vel pellentesque mi ";
  "elementum a. Maecenas at mattis massa. Fusce ut elit tortor. Morbi rhoncus ";
  "molestie orci eu malesuada. Aliquam gravida rutrum sem, vitae condimentum magna ";
  "convallis pulvinar. Duis urna lacus, ultrices a ultrices pharetra, eleifend ";
  "in ante. Fusce id elementum dolor. Nullam ornare nisl ac ultrices lobortis.";
  "";
  "Proin ullamcorper vulputate enim sed facilisis. Praesent vel mi metus. ";
  "Fusce sagittis efficitur odio at condimentum. Nullam mollis lacinia consequat. ";
  "Integer vel ex sit amet nunc aliquam molestie et eu nibh. Nam leo nunc, laoreet ";
  "vitae iaculis sit amet, dapibus sit amet neque. Suspendisse eleifend, leo eget ";
  "tempor molestie, massa enim auctor dui, quis vehicula erat urna id felis. Vivamus ";
  "pharetra, sem non tempus ornare, risus tortor posuere tortor, eu pellentesque eros est ";
  "ac erat. Sed sit amet tellus nisl. Phasellus magna urna, tincidunt in sem ";
  "id, aliquam vulputate leo. Sed eleifend justo eu mauris egestas imperdiet. Fusce sagittis";
  ", turpis ac efficitur pulvinar, purus tellus gravida sem, eget accumsan nisl sapien a ";
  "ante. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac ";
  "turpis egestas. Nunc eget nibh orci. Cras facilisis facilisis sapien, ";
  "a vehicula lorem imperdiet vel. Proin vel nulla nisi.";
  "";
  "Aenean sit amet risus at lectus pellentesque pellentesque at eu quam. ";
  "Duis euismod porttitor ante quis lacinia. Cras sit amet vulputate nunc. ";
  "Integer sollicitudin vitae sapien finibus fermentum. Donec eu tellus ";
  "suscipit, dignissim turpis non, eleifend massa. Nullam quis ex nisi. ";
  "Quisque dignissim quis leo eu finibus.  ";
]

let bench ~name ~cycle (f : unit -> 'a) =
  let start_time = Sys.time () in
  for _=0 to cycle-1 do
    f () |> ignore
  done;
  let end_time = Sys.time () in
  Printf.printf "%s: time per cycle: %6fs\n" name
    ((end_time -. start_time) /. (float_of_int cycle))

let main env =
  Eio.Switch.run @@ fun sw ->
  assert (Option.is_none (init ~db_path:"test.db" ~document_count_limit:1000));
  let pool = Task_pool.make ~sw (Eio.Stdenv.domain_mgr env) in
  let _raw = Index.Raw.of_lines pool (List.to_seq lines) in
  Params'.max_fuzzy_edit_dist := 3;
  let search_exp = Search_exp.parse "vestibul rutru" |> Option.get in
  let s = "PellentesquePellentesque" in
  for len=1 to 20 do
    let limit = 2 in
    bench ~name:(Fmt.str "Spelll.of_string, limit: %d, len %2d:" limit len) ~cycle:10 (fun () ->
        Spelll.of_string ~limit:2 (String.sub s 0 len))
  done;
  for len=1 to 20 do
    let limit = 1 in
    bench ~name:(Fmt.str "Spelll.of_string, limit: %d, len %2d:" limit len) ~cycle:10 (fun () ->
        Spelll.of_string ~limit:1 (String.sub s 0 len))
  done;
  bench ~name:"Index.search" ~cycle:1000 (fun () ->
      Index.search pool (Stop_signal.make ()) ~search_scope:None search_exp);
  ()

let () = Eio_main.run main


================================================
FILE: publish.sh
================================================
#!/usr/bin/env bash

RETAG_CONFIRM_TEXT="retag-docfd"

opam_repo="$HOME/opam-repository"

echo "Checking if $opam_repo exists"

if [ ! -d "$opam_repo" ]; then
  echo "$opam_repo does not exist"
  exit 1
fi

ver=$(cat CHANGELOG.md \
  | grep '## ' \
  | head -n 1 \
  | sed -n 's/^## \s*\(\S*\)$/\1/p')

echo "Detected version for Docfd:" $ver

git_tag="$ver"

echo "Computed git tag for Docfd:" $git_tag

read -p "Are the version and git tag correct [y/n]? " ans

if [[ $ans != "y" ]]; then
  echo "Publishing canceled"
  exit 0
fi

echo "Checking if $git_tag exists in repo already"

if [[ $(git tag -l "$git_tag") == "" ]]; then
  echo "Tagging commit"
  git tag "$git_tag"
else
  read -p "Tag already exists, retag [y/n]? " ans

  if [[ $ans == "y" ]]; then
    read -p "Type \"$RETAG_CONFIRM_TEXT\" to confirm: " ans

    if [[ $ans != "$RETAG_CONFIRM_TEXT" ]]; then
      echo "Publishing canceled"
      exit 0
    fi

    echo "Removing tag"
    git tag -d "$git_tag"
    git push --delete origin "$git_tag"

    echo "Tagging commit"
    git tag "$git_tag"
  fi
fi

echo "Pushing all tags"

git push --tags

archive="$git_tag".tar.gz

echo "Archiving as $archive"

rm -f "$archive"
git archive --output=./"$archive" "$git_tag"

echo "Hashing $archive"

hash_cmd=sha256sum

archive_hash=$("$hash_cmd" "$archive" | awk '{ print $1 }')

echo "Hash from $hash_cmd:" $archive_hash

packages=(
  "docfd"
)

for package in ${packages[@]}; do
  package_dir="$opam_repo"/packages/"$package"/"$package"."$ver"
  dest_opam="$package_dir"/opam

  echo "Making directory $package_dir"
  mkdir -p "$package_dir"

  echo "Copying $package.opam over"
  cp "$package.opam" "$dest_opam"

  echo "Adding url section to $dest_opam"
  echo "
url {
  src:
    \"https://github.com/darrenldl/docfd/releases/download/$git_tag/$archive\"
  checksum:
    \"sha256=$archive_hash\"
}
" >> "$dest_opam"
done


================================================
FILE: run-container.sh
================================================
#!/usr/bin/env bash

podman run -it \
  -v ~/docfd:/home/docfd \
  --workdir /home/docfd \
  --env VISUAL=nano \
  --rm \
  localhost/docfd \
  /bin/bash --login


================================================
FILE: script-tests.t/dune
================================================
(cram
  (deps ../bin/docfd.exe))


================================================
FILE: script-tests.t/run.t
================================================
Setup:
  $ echo "abcd" > test0.txt
  $ echo "efgh" > test1.txt
  $ echo "hijk" > test2.txt
  $ echo "0123" > test3.txt
  $ echo "search: ^ab" >> 0.docfd-script
  $ echo "search: 'xyz" >> 1.docfd-script
  $ tree
  .
  |-- 0.docfd-script
  |-- 1.docfd-script
  |-- dune -> ../../../../default/script-tests.t/dune
  |-- test0.txt
  |-- test1.txt
  |-- test2.txt
  `-- test3.txt
  
  0 directories, 7 files

Basic:
  $ docfd -l --script 0.docfd-script .
  $TESTCASE_ROOT/test0.txt
  $ docfd -l --script 1.docfd-script .
  [1]


================================================
FILE: search-scope-narrowing-tests.t/dune
================================================
(cram
  (deps ../bin/docfd.exe))


================================================
FILE: search-scope-narrowing-tests.t/run.t
================================================
Setup:
  $ echo "abcd" >> test0.txt
  $ echo "efgh" >> test0.txt
  $ echo "0123" >> test0.txt
  $ echo "ijkl" >> test0.txt
  $ echo "abcd" >> test1.txt
  $ echo "efgh" >> test1.txt
  $ echo "0123" >> test1.txt
  $ echo "ijkl" >> test1.txt
  $ tree
  .
  |-- dune -> ../../../../default/search-scope-narrowing-tests.t/dune
  |-- test0.txt
  `-- test1.txt
  
  0 directories, 3 files

Single restriction:
  $ # Case 0 for single restriction
  $ echo "" > test.docfd-script
  $ echo "search: 'abcd" >> test.docfd-script
  $ echo "narrow level: 1" >> test.docfd-script
  $ echo "search: 'efgh" >> test.docfd-script
  $ docfd --tokens-per-search-scope-level 1 --script test.docfd-script test0.txt -l
  $TESTCASE_ROOT/test0.txt
  $ # Case 1 for single restriction
  $ echo "" > test.docfd-script
  $ echo "search: 'abcd" >> test.docfd-script
  $ echo "narrow level: 1" >> test.docfd-script
  $ echo "search: '0123" >> test.docfd-script
  $ docfd --tokens-per-search-scope-level 1 --script test.docfd-script test0.txt -l
  [1]
  $ # Case 2 for single restriction
  $ echo "" > test.docfd-script
  $ echo "search: 'abcd" >> test.docfd-script
  $ echo "narrow level: 1" >> test.docfd-script
  $ echo "narrow level: 0" >> test.docfd-script
  $ echo "search: '0123" >> test.docfd-script
  $ docfd --tokens-per-search-scope-level 1 --script test.docfd-script test0.txt -l
  $TESTCASE_ROOT/test0.txt

Chained restriction:
  $ # Case 0 for chained restrictions
  $ echo "" > test.docfd-script
  $ echo "search: 'abcd" >> test.docfd-script
  $ echo "narrow level: 1" >> test.docfd-script
  $ echo "search: '0123" >> test.docfd-script
  $ echo "narrow level: 1" >> test.docfd-script
  $ echo "search: 'efgh" >> test.docfd-script
  $ docfd --tokens-per-search-scope-level 1 --script test.docfd-script test0.txt -l
  [1]
  $ # Case 1 for chained restrictions
  $ echo "" > test.docfd-script
  $ echo "search: 'abcd" >> test.docfd-script
  $ echo "narrow level: 2" >> test.docfd-script
  $ echo "search: '0123" >> test.docfd-script
  $ echo "narrow level: 2" >> test.docfd-script
  $ echo "search: 'efgh" >> test.docfd-script
  $ docfd --tokens-per-search-scope-level 1 --script test.docfd-script test0.txt -l
  $TESTCASE_ROOT/test0.txt
  $ # Case 2 for chained restrictions
  $ echo "" > test.docfd-script
  $ echo "search: 'abcd" >> test.docfd-script
  $ echo "narrow level: 2" >> test.docfd-script
  $ echo "search: '0123" >> test.docfd-script
  $ echo "narrow level: 1" >> test.docfd-script
  $ echo "search: 'efgh" >> test.docfd-script
  $ docfd --tokens-per-search-scope-level 1 --script test.docfd-script test0.txt -l
  $TESTCASE_ROOT/test0.txt

File path filter + restrictions:
  $ # Baseline case: "clear filter" after "search" should trigger a search for each file that has not been searched through yet
  $ # So both documents should appear
  $ echo "" > test.docfd-script
  $ echo "search: 'abcd" >> test.docfd-script
  $ echo "filter: path-glob:test0.txt" >> test.docfd-script
  $ echo "search: 'efgh" >> test.docfd-script
  $ echo "clear filter" >> test.docfd-script
  $ docfd --tokens-per-search-scope-level 1 --script test.docfd-script -l .
  $TESTCASE_ROOT/test1.txt
  $TESTCASE_ROOT/test0.txt
  $ # Baseline case quoted string using single quote
  $ echo "" > test.docfd-script
  $ echo "search: 'abcd" >> test.docfd-script
  $ echo "filter: path-glob:'test0.txt'" >> test.docfd-script
  $ echo "search: 'efgh" >> test.docfd-script
  $ echo "clear filter" >> test.docfd-script
  $ docfd --tokens-per-search-scope-level 1 --script test.docfd-script -l .
  $TESTCASE_ROOT/test1.txt
  $TESTCASE_ROOT/test0.txt
  $ # Baseline case quoted string using double quote
  $ echo "" > test.docfd-script
  $ echo "search: 'abcd" >> test.docfd-script
  $ echo 'filter: path-glob:"test0.txt"' >> test.docfd-script
  $ echo "search: 'efgh" >> test.docfd-script
  $ echo "clear filter" >> test.docfd-script
  $ docfd --tokens-per-search-scope-level 1 --script test.docfd-script -l .
  $TESTCASE_ROOT/test1.txt
  $TESTCASE_ROOT/test0.txt
  $ # Since there is no "search" after "narrow", both documents should still appear
  $ echo "" > test.docfd-script
  $ echo "search: 'abcd" >> test.docfd-script
  $ echo "filter: path-glob:test0.txt" >> test.docfd-script
  $ echo "narrow level: 1" >> test.docfd-script
  $ echo "clear filter" >> test.docfd-script
  $ docfd --tokens-per-search-scope-level 1 --script test.docfd-script -l .
  $TESTCASE_ROOT/test1.txt
  $TESTCASE_ROOT/test0.txt
  $ # "narrow" + "search" after filtering should prevent test1.txt from appearing, even after we clear the filter
  $ echo "" > test.docfd-script
  $ echo "search: 'abcd" >> test.docfd-script
  $ echo "filter: path-glob:test0.txt" >> test.docfd-script
  $ echo "narrow level: 1" >> test.docfd-script
  $ echo "search: 'efgh" >> test.docfd-script
  $ echo "clear filter" >> test.docfd-script
  $ docfd --tokens-per-search-scope-level 1 --script test.docfd-script -l .
  $TESTCASE_ROOT/test0.txt
  $ # Similar to the above case, but the order of "search" and "clear filter" is swapped
  $ # test1.txt still should not appear
  $ echo "" > test.docfd-script
  $ echo "search: 'abcd" >> test.docfd-script
  $ echo "filter: path-glob:test0.txt" >> test.docfd-script
  $ echo "narrow level: 1" >> test.docfd-script
  $ echo "clear filter" >> test.docfd-script
  $ echo "search: 'efgh" >> test.docfd-script
  $ docfd --tokens-per-search-scope-level 1 --script test.docfd-script -l .
  $TESTCASE_ROOT/test0.txt
  $ # Similar to the above case, but we also reset the search scope via "narrow level: 0"
  $ # Since resetting the search scope does not refresh the search results, test1.txt should still not appear as there is not another "search"
  $ echo "" > test.docfd-script
  $ echo "search: 'abcd" >> test.docfd-script
  $ echo "filter: path-glob:test0.txt" >> test.docfd-script
  $ echo "narrow level: 1" >> test.docfd-script
  $ echo "clear filter" >> test.docfd-script
  $ echo "search: 'efgh" >> test.docfd-script
  $ echo "narrow level: 0" >> test.docfd-script
  $ docfd --tokens-per-search-scope-level 1 --script test.docfd-script -l .
  $TESTCASE_ROOT/test0.txt
  $ # Similar to the above case, but we search again after resetting search scope
  $ echo "" > test.docfd-script
  $ echo "search: 'abcd" >> test.docfd-script
  $ echo "filter: path-glob:test0.txt" >> test.docfd-script
  $ echo "narrow level: 1" >> test.docfd-script
  $ echo "clear filter" >> test.docfd-script
  $ echo "search: 'efgh" >> test.docfd-script
  $ echo "narrow level: 0" >> test.docfd-script
  $ echo "search: 'efgh" >> test.docfd-script
  $ docfd --tokens-per-search-scope-level 1 --script test.docfd-script -l .
  $TESTCASE_ROOT/test1.txt
  $TESTCASE_ROOT/test0.txt
  $ # Simplified version of the above case where we skip the search before "narrow level: 0"
  $ echo "" > test.docfd-script
  $ echo "search: 'abcd" >> test.docfd-script
  $ echo "filter: path-glob:test0.txt" >> test.docfd-script
  $ echo "narrow level: 1" >> test.docfd-script
  $ echo "clear filter" >> test.docfd-script
  $ echo "narrow level: 0" >> test.docfd-script
  $ echo "search: 'efgh" >> test.docfd-script
  $ docfd --tokens-per-search-scope-level 1 --script test.docfd-script -l .
  $TESTCASE_ROOT/test1.txt
  $TESTCASE_ROOT/test0.txt
  $ # Similar to the above case, but the order of "clear filter" and "narrow level: 0" is swapped
  $ # Both documents should still appear
  $ echo "" > test.docfd-script
  $ echo "search: 'abcd" >> test.docfd-script
  $ echo "filter: path-glob:test0.txt" >> test.docfd-script
  $ echo "narrow level: 1" >> test.docfd-script
  $ echo "narrow level: 0" >> test.docfd-script
  $ echo "clear filter" >> test.docfd-script
  $ echo "search: 'efgh" >> test.docfd-script
  $ docfd --tokens-per-search-scope-level 1 --script test.docfd-script -l .
  $TESTCASE_ROOT/test1.txt
  $TESTCASE_ROOT/test0.txt
  $ # Similar to the above case, but the order of "clear filter" and "search" is swapped
  $ # Both documents should still appear
  $ echo "" > test.docfd-script
  $ echo "search: 'abcd" >> test.docfd-script
  $ echo "filter: path-glob:test0.txt" >> test.docfd-script
  $ echo "narrow level: 1" >> test.docfd-script
  $ echo "narrow level: 0" >> test.docfd-script
  $ echo "search: 'efgh" >> test.docfd-script
  $ echo "clear filter" >> test.docfd-script
  $ docfd --tokens-per-search-scope-level 1 --script test.docfd-script -l .
  $TESTCASE_ROOT/test1.txt
  $TESTCASE_ROOT/test0.txt


================================================
FILE: tests/dune
================================================
(executable
 (flags     (-w "+a-4-9-29-37-40-42-44-48-50-70@8" -g))
 (name main)
 (libraries qcheck-core
            qcheck-alcotest
            alcotest
            docfd_lib
            eio
            eio_main
            )
 )


================================================
FILE: tests/main.ml
================================================
open Docfd_lib

let () =
  Eio_main.run (fun env ->
      Eio.Switch.run (fun sw ->
          let _task_pool = Task_pool.make ~sw (Eio.Stdenv.domain_mgr env) in
          let alco_suites =
            [
              ("Search_exp_tests.Alco", Search_exp_tests.Alco.suite);
              ("Utils_tests.Alco", Utils_tests.Alco.suite);
            ]
          in
          let qc_suites =
            [
            ]
            |> List.map (fun (name, suite) ->
                (name, List.map QCheck_alcotest.to_alcotest suite))
          in
          let suites = alco_suites @ qc_suites in
          Alcotest.run "docfd-lib" suites
        )
    )


================================================
FILE: tests/search_exp_tests.ml
================================================
open Docfd_lib
open Test_utils

module Alco = struct
  let test_invalid_exp (s : string) =
    Alcotest.(check bool)
      "true"
      true
      (Option.is_none
         (Search_exp.parse s))

  let test_empty_phrase (s : string) =
    let phrase = Search_phrase.parse s in
    Alcotest.(check bool)
      "case0"
      true
      (Search_phrase.is_empty phrase);
    Alcotest.(check bool)
      "case1"
      true
      (List.is_empty (Search_phrase.enriched_tokens phrase))

  let test_empty_exp (s : string) =
    let exp = Search_exp.parse s |> Option.get in
    Alcotest.(check bool)
      "case0"
      true
      (Search_exp.is_empty exp);
    let flattened = Search_exp.flattened exp in
    Alcotest.(check bool)
      "case1"
      true
      (List.is_empty flattened
       ||
       List.for_all Search_phrase.is_empty flattened)

  let at s : Search_phrase.annotated_token =
    Search_phrase.{ data = `String s; group_id = 0 }

  let atm m : Search_phrase.annotated_token =
    Search_phrase.{ data = `Match_typ_marker m; group_id = 0 }

  let ats : Search_phrase.annotated_token =
    Search_phrase.{ data = `Explicit_spaces; group_id = 0 }

  let et
      ?(m : Search_phrase.match_typ = `Fuzzy)
      string
      is_linked_to_prev
      is_linked_to_next
    : Search_phrase.Enriched_token.t =
    let automaton = Spelll.of_string ~limit:0 "" in
    Search_phrase.Enriched_token.make
      (`String string)
      ~is_linked_to_prev
      ~is_linked_to_next
      automaton
      m

  let ets
      ?(m : Search_phrase.match_typ = `Fuzzy)
      is_linked_to_prev
      is_linked_to_next
    =
    let automaton = Spelll.of_string ~limit:0 "" in
    Search_phrase.Enriched_token.make
      `Explicit_spaces
      ~is_linked_to_prev
      ~is_linked_to_next
      automaton
      m

  let test_exp
      ?(neg = false)
      (s : string)
      (l : (Search_phrase.annotated_token list * Search_phrase.Enriched_token.t list) list)
    =
    let neg' = neg in
    let phrases =
      l
      |> List.map fst
      |> List.map (fun l ->
          List.to_seq l
          |> Search_phrase.of_annotated_tokens)
    in
    let enriched_token_list_list =
      List.map snd l
    in
    Alcotest.(check
                (if neg' then (
                    neg (list search_phrase_testable)
                  ) else (
                   list search_phrase_testable
                 )))
      (Fmt.str "case0 of %S" s)
      (List.sort Search_phrase.compare phrases)
      (Search_exp.parse s
       |> Option.get
       |> Search_exp.flattened
       |> List.sort Search_phrase.compare
      );
    Alcotest.(check (list (list enriched_token_testable)))
      (Fmt.str "case1 of %S" s)
      enriched_token_list_list
      (phrases
       |> List.map Search_phrase.enriched_tokens
      )

  let corpus () =
    test_empty_exp "";
    test_empty_phrase "";
    test_empty_exp "    ";
    test_empty_phrase "    ";
    test_empty_exp "\r\n";
    test_empty_phrase "\r\n";
    test_empty_exp "\t";
    test_empty_phrase "\t";
    test_empty_exp "\r\n\t";
    test_empty_phrase "\r\n\t";
    test_empty_exp " \r \n \t ";
    test_empty_phrase " \r \n \t ";
    test_empty_exp "()";
    test_empty_exp " () ";
    test_empty_exp "( )";
    test_empty_exp " ( ) ";
    test_empty_exp " ( ) () ";
    test_empty_exp " ( ( ) ) () ";
    test_empty_exp " ( () ) (( )) ";
    test_empty_exp " ( () ) (( () )) ";
    test_invalid_exp " ( ) ( ";
    test_invalid_exp " ) ( ";
    test_invalid_exp " ( ( ) ";
    test_invalid_exp " ( ( ) ";
    test_invalid_exp "?";
    test_invalid_exp "?  ";
    test_exp "\\?"
      [ ([ at "?" ],
         [ et "?" false false ])
      ];
    test_exp "hello_world"
      [ ([ at "hello"; at "_"; at "world" ],
         [ et "hello" false true; et "_" true true; et "world" true false ])
      ];
    test_exp "(hello)"
      [ ([ at "hello" ],
         [ et "hello" false false ])
      ];
    test_exp "()hello"
      [ ([ at "hello" ],
         [ et "hello" false false ])
      ];
    test_exp "hello()"
      [ ([ at "hello" ],
         [ et "hello" false false ])
      ];
    test_exp "( ) hello"
      [ ([ at "hello" ],
         [ et "hello" false false ])
      ];
    test_exp "hello ( )"
      [ ([ at "hello" ],
         [ et "hello" false false ])
      ];
    test_exp "?hello"
      [ ([], [])
      ; ([ at "hello" ],
         [ et "hello" false false ])
      ];
    test_exp "(?hello)"
      [ ([], [])
      ; ([ at "hello" ],
         [ et "hello" false false ])
      ];
    test_exp "?(hello)"
      [ ([], [])
      ; ([ at "hello" ],
         [ et "hello" false false ])
      ];
    test_exp "?hello()"
      [ ([], [])
      ; ([ at "hello" ],
         [ et "hello" false false ])
      ];
    test_exp "?hello ()"
      [ ([], [])
      ; ([ at "hello" ],
         [ et "hello" false false ])
      ];
    test_exp "? hello"
      [ ([], [])
      ; ([ at "hello" ],
         [ et "hello" false false ])
      ];
    test_exp "?hello world"
      [ ([ at "world" ],
         [ et "world" false false ])
      ; ([ at "hello"; at " "; at "world" ],
         [ et "hello" false false; et "world" false false ])
      ];
    test_exp "? hello world"
      [ ([ at "world" ],
         [ et "world" false false ])
      ; ([ at "hello"; at " "; at "world" ],
         [ et "hello" false false; et "world" false false ])
      ];
    test_exp "?(hello) world"
      [ ([ at "world" ],
         [ et "world" false false ] )
      ; ([ at "hello"; at " "; at "world" ],
         [ et "hello" false false; et "world" false false ] )
      ];
    test_exp "? (hello) world"
      [ ([ at "world" ],
         [ et "world" false false ] )
      ; ([ at "hello"; at " "; at "world" ],
         [ et "hello" false false; et "world" false false ])
      ];
    test_exp "?(hello world) abcd"
      [ ([ at "abcd" ],
         [ et "abcd" false false ] )
      ; ([ at "hello"; at " "; at "world"; at " "; at "abcd" ],
         [ et "hello" false false; et "world" false false; et "abcd" false false ] )
      ];
    test_exp "ab ?(hello world) cd"
      [ ([ at "ab"; at " "; at "cd" ],
         [ et "ab" false false; et "cd" false false ])
      ; ([ at "ab"; at " "; at "hello"; at " "; at "world"; at " "; at "cd" ],
         [ et "ab" false false; et "hello" false false; et "world" false false; et "cd" false false ])
      ];
    test_exp "ab ?hello world cd"
      [ ([ at "ab"; at " "; at "world"; at " "; at "cd" ],
         [ et "ab" false false; et "world" false false; et "cd" false false ])
      ; ([ at "ab"; at " "; at "hello"; at " "; at "world"; at " "; at "cd" ],
         [ et "ab" false false; et "hello" false false; et "world" false false; et "cd" false false ])
      ];
    test_exp "go (left | right)"
      [ ([ at "go"; at " "; at "left" ],
         [ et "go" false false; et "left" false false ])
      ; ([ at "go"; at " "; at "right" ],
         [ et "go" false false; et "right" false false ])
      ];
    test_exp "go (?up | left | right)"
      [ ([ at "go" ],
         [ et "go" false false ])
      ; ([ at "go"; at " "; at "up" ],
         [ et "go" false false; et "up" false false ])
      ; ([ at "go"; at " "; at "left" ],
         [ et "go" false false; et "left" false false ])
      ; ([ at "go"; at " "; at "right" ],
         [ et "go" false false; et "right" false false ])];
    test_exp "(left | right) (up | down)"
      [ ([ at "left"; at " "; at "up" ],
         [ et "left" false false; et "up" false false ])
      ; ([ at "left"; at " "; at "down" ],
         [ et "left" false false; et "down" false false ])
      ; ([ at "right"; at " "; at "up" ],
         [ et "right" false false; et "up" false false ])
      ; ([ at "right"; at " "; at "down" ],
         [ et "right" false false; et "down" false false ])
      ];
    test_exp "((a|b)(c|d)) (e | f)"
      [ ([ at "a"; at " "; at "c"; at " "; at "e" ],
         [ et "a" false false; et "c" false false; et "e" false false ])
      ; ([ at "a"; at " "; at "c"; at " "; at "f" ],
         [ et "a" false false; et "c" false false; et "f" false false ])
      ; ([ at "a"; at " "; at "d"; at " "; at "e" ],
         [ et "a" false false; et "d" false false; et "e" false false ])
      ; ([ at "a"; at " "; at "d"; at " "; at "f" ],
         [ et "a" false false; et "d" false false; et "f" false false ])
      ; ([ at "b"; at " "; at "c"; at " "; at "e" ],
         [ et "b" false false; et "c" false false; et "e" false false ])
      ; ([ at "b"; at " "; at "c"; at " "; at "f" ],
         [ et "b" false false; et "c" false false; et "f" false false ])
      ; ([ at "b"; at " "; at "d"; at " "; at "e" ],
         [ et "b" false false; et "d" false false; et "e" false false ])
      ; ([ at "b"; at " "; at "d"; at " "; at "f" ],
         [ et "b" false false; et "d" false false; et "f" false false ])
      ];
    test_exp "(?left | right) (up | down)"
      [ ([ at "up" ],
         [ et "up" false false ])
      ; ([ at "down" ],
         [ et "down" false false ])
      ; ([ at "left"; at " "; at "up" ],
         [ et "left" false false; et "up" false false ])
      ; ([ at "left"; at " "; at "down" ],
         [ et "left" false false; et "down" false false ])
      ; ([ at "right"; at " "; at "up" ],
         [ et "right" false false; et "up" false false ])
      ; ([ at "right"; at " "; at "down" ],
         [ et "right" false false; et "down" false false ])
      ];
    test_exp "go (left | right) or ( up | down )"
      [ ([ at "go"; at " "; at "left"; at " "; at "or"; at " "; at "up" ],
         [ et "go" false false; et "left" false false; et "or" false false; et "up" false false ])
      ; ([ at "go"; at " "; at "left"; at " "; at "or"; at " "; at "down" ],
         [ et "go" false false; et "left" false false; et "or" false false; et "down" false false ])
      ; ([ at "go"; at " "; at "right"; at " "; at "or"; at " "; at "up" ],
         [ et "go" false false; et "right" false false; et "or" false false; et "up" false false ])
      ; ([ at "go"; at " "; at "right"; at " "; at "or"; at " "; at "down" ],
         [ et "go" false false; et "right" false false; et "or" false false; et "down" false false ])
      ];
    test_exp "and/or"
      [ ([ at "and"; at "/"; at "or" ],
         [ et "and" false true; et "/" true true; et "or" true false ])
      ];
    test_exp ~neg:true "and/or"
      [ ([ at "and"; at " "; at "/"; at " "; at "or" ],
         [ et "and" false false; et "/" false false; et "or" false false ])
      ];
    test_exp ~neg:true "and/or"
      [ ([ at "and"; at " "; at "/"; at "or" ],
         [ et "and" false false; et "/" false true; et "or" true false ])
      ];
    test_exp ~neg:true "and/or"
      [ ([ at "and"; at "/"; at " "; at "or" ],
         [ et "and" false true; et "/" true false; et "or" false false ])
      ];
    test_exp "and / or"
      [ ([ at "and"; at " "; at "/"; at " "; at "or" ],
         [ et "and" false false; et "/" false false; et "or" false false ])
      ];
    test_exp ~neg:true "and / or"
      [ ([ at "and"; at "/"; at "or" ],
         [ et "and" false true; et "/" true true; et "or" true false ])
      ];
    test_exp ~neg:true "and / or"
      [ ([ at "and"; at " "; at "/"; at "or" ],
         [ et "and" false false; et "/" false true; et "or" true false ])
      ];
    test_exp ~neg:true "and / or"
      [ ([ at "and"; at "/"; at " "; at "or" ],
         [ et "and" false true; et "/" true false; et "or" false false ])
      ];
    test_exp "(and)/ or"
      [ ([ at "and"; at " "; at "/"; at " "; at "or" ],
         [ et "and" false false; et "/" false false; et "or" false false ])
      ];
    test_exp ~neg:true "(and)/ or"
      [ ([ at "and"; at "/"; at " "; at "or" ],
         [ et "and" false true; et "/" true false; et "or" false false ])
      ];
    test_exp "and(/) or"
      [ ([ at "and"; at " "; at "/"; at " "; at "or" ],
         [ et "and" false false; et "/" false false; et "or" false false ])
      ];
    test_exp ~neg:true "and(/) or"
      [ ([ at "and"; at "/"; at " "; at "or" ],
         [ et "and" false true; et "/" true false; et "or" false false ])
      ];
    test_exp "and/(or)"
      [ ([ at "and"; at "/"; at " "; at "or" ],
         [ et "and" false true; et "/" true false; et "or" false false ])
      ];
    test_exp ~neg:true "and/(or)"
      [ ([ at "and"; at "/"; at "or" ],
         [ et "and" false true; et "/" true true; et "or" true false ])
      ];
    test_exp "go (left | right) and/or ( up | down )"
      [ ([ at "go"; at " "; at "left"; at " "; at "and"; at "/"; at "or"; at " "; at "up" ],
         [ et "go" false false; et "left" false false; et "and" false true; et "/" true true; et "or" true false; et "up" false false ])
      ; ([ at "go"; at " "; at "left"; at " "; at "and"; at "/"; at "or"; at " "; at "down" ],
         [ et "go" false false; et "left" false false; et "and" false true; et "/" true true; et "or" true false; et "down" false false ])
      ; ([ at "go"; at " "; at "right"; at " "; at "and"; at "/"; at "or"; at " "; at "up" ],
         [ et "go" false false; et "right" false false; et "and" false true; et "/" true true; et "or" true false; et "up" false false ])
      ; ([ at "go"; at " "; at "right"; at " "; at "and"; at "/"; at "or"; at " "; at "down" ],
         [ et "go" false false; et "right" false false; et "and" false true; et "/" true true; et "or" true false; et "down" false false ])
      ];
    test_exp "go (left | right) and / or ( up | down )"
      [ ([ at "go"; at " "; at "left"; at " "; at "and"; at " "; at "/"; at " "; at "or"; at " "; at "up" ],
         [ et "go" false false; et "left" false false; et "and" false false; et "/" false false; et "or" false false; et "up" false false ])
      ; ([ at "go"; at " "; at "left"; at " "; at "and"; at " "; at "/"; at " "; at "or"; at " "; at "down" ],
         [ et "go" false false; et "left" false false; et "and" false false; et "/" false false; et "or" false false; et "down" false false ])
      ; ([ at "go"; at " "; at "right"; at " "; at "and"; at " "; at "/"; at " "; at "or"; at " "; at "up" ],
         [ et "go" false false; et "right" false false; et "and" false false; et "/" false false; et "or" false false; et "up" false false ])
      ; ([ at "go"; at " "; at "right"; at " "; at "and"; at " "; at "/"; at " "; at "or"; at " "; at "down" ],
         [ et "go" false false; et "right" false false; et "and" false false; et "/" false false; et "or" false false; et "down" false false ])
      ];
    test_exp "go ?(left | right) ( up | down )"
      [ ([ at "go"; at " "; at "up" ],
         [ et "go" false false; et "up" false false ])
      ; ([ at "go"; at " "; at "down" ],
         [ et "go" false false; et "down" false false ])
      ; ([ at "go"; at " "; at "left"; at " "; at "up" ],
         [ et "go" false false; et "left" false false; et "up" false false ])
      ; ([ at "go"; at " "; at "left"; at " "; at "down" ],
         [ et "go" false false; et "left" false false; et "down" false false ])
      ; ([ at "go"; at " "; at "right"; at " "; at "up" ],
         [ et "go" false false; et "right" false false; et "up" false false ])
      ; ([ at "go"; at " "; at "right"; at " "; at "down" ],
         [ et "go" false false; et "right" false false; et "down" false false ])
      ];
    test_exp "go ?((left | right) or) ( up | down )"
      [ ([ at "go"; at " "; at "up" ],
         [ et "go" false false; et "up" false false ])
      ; ([ at "go"; at " "; at "down" ],
         [ et "go" false false; et "down" false false ])
      ; ([ at "go"; at " "; at "left"; at " "; at "or"; at " "; at "up" ],
         [ et "go" false false; et "left" false false; et "or" false false; et "up" false false ])
      ; ([ at "go"; at " "; at "left"; at " "; at "or"; at " "; at "down" ],
         [ et "go" false false; et "left" false false; et "or" false false; et "down" false false ])
      ; ([ at "go"; at " "; at "right"; at " "; at "or"; at " "; at "up" ],
         [ et "go" false false; et "right" false false; et "or" false false; et "up" false false ])
      ; ([ at "go"; at " "; at "right"; at " "; at "or"; at " "; at "down" ],
         [ et "go" false false; et "right" false false; et "or" false false; et "down" false false ])
      ];
    test_exp "go ?(?(left | right) or) ( up | down )"
      [ ([ at "go"; at " "; at "up" ],
         [ et "go" false false; et "up" false false ])
      ; ([ at "go"; at " "; at "down" ],
         [ et "go" false false; et "down" false false ])
      ; ([ at "go"; at " "; at "or"; at " "; at "up" ],
         [ et "go" false false; et "or" false false; et "up" false false ])
      ; ([ at "go"; at " "; at "or"; at " "; at "down" ],
         [ et "go" false false; et "or" false false; et "down" false false ])
      ; ([ at "go"; at " "; at "left"; at " "; at "or"; at " "; at "up" ],
         [ et "go" false false; et "left" false false; et "or" false false; et "up" false false ])
      ; ([ at "go"; at " "; at "left"; at " "; at "or"; at " "; at "down" ],
         [ et "go" false false; et "left" false false; et "or" false false; et "down" false false ])
      ; ([ at "go"; at " "; at "right"; at " "; at "or"; at " "; at "up" ],
         [ et "go" false false; et "right" false false; et "or" false false; et "up" false false ])
      ; ([ at "go"; at " "; at "right"; at " "; at "or"; at " "; at "down" ],
         [ et "go" false false; et "right" false false; et "or" false false; et "down" false false ])
      ];
    test_exp "go ?(?(left | right) or) or ( ?up | down )"
      [ ([ at "go"; at " "; at "or" ],
         [ et "go" false false; et "or" false false ])
      ; ([ at "go"; at " "; at "or"; at " "; at "up" ],
         [ et "go" false false; et "or" false false; et "up" false false ])
      ; ([ at "go"; at " "; at "or"; at " "; at "down" ],
         [ et "go" false false; et "or" false false; et "down" false false ])
      ; ([ at "go"; at " "; at "or"; at " "; at "or" ],
         [ et "go" false false; et "or" false false; et "or" false false ])
      ; ([ at "go"; at " "; at "or"; at " "; at "or"; at " "; at "up" ],
         [ et "go" false false; et "or" false false; et "or" false false; et "up" false false ])
      ; ([ at "go"; at " "; at "or"; at " "; at "or"; at " "; at "down" ],
         [ et "go" false false; et "or" false false; et "or" false false; et "down" false false ])
      ; ([ at "go"; at " "; at "left"; at " "; at "or"; at " "; at "or" ],
         [ et "go" false false; et "left" false false; et "or" false false; et "or" false false ])
      ; ([ at "go"; at " "; at "left"; at " "; at "or"; at " "; at "or"; at " "; at "up" ],
         [ et "go" false false; et "left" false false; et "or" false false; et "or" false false; et "up" false false ])
      ; ([ at "go"; at " "; at "left"; at " "; at "or"; at " "; at "or"; at " "; at "down" ],
         [ et "go" false false; et "left" false false; et "or" false false; et "or" false false; et "down" false false ])
      ; ([ at "go"; at " "; at "right"; at " "; at "or"; at " "; at "or" ],
         [ et "go" false false; et "right" false false; et "or" false false; et "or" false false ])
      ; ([ at "go"; at " "; at "right"; at " "; at "or"; at " "; at "or"; at " "; at "up" ],
         [ et "go" false false; et "right" false false; et "or" false false; et "or" false false; et "up" false false ])
      ; ([ at "go"; at " "; at "right"; at " "; at "or"; at " "; at "or"; at " "; at "down" ],
         [ et "go" false false; et "right" false false; et "or" false false; et "or" false false; et "down" false false ])
      ];
    test_exp "- - -"
      [ ([ at "-"; at " "; at "-"; at " "; at "-" ],
         [ et "-" false false; et "-" false false; et "-" false false ])
      ];
    test_exp "-- -"
      [ ([ at "-"; at "-"; at " "; at "-" ],
         [ et "-" false true; et "-" true false; et "-" false false ])
      ];
    test_exp "\\'abcd"
      [ ([ at "'"; at "abcd" ],
         [ et "'" false true; et "abcd" true false ])
      ];
    test_exp "'abcd"
      [ ([ atm `Exact; at "abcd" ],
         [ et ~m:`Exact "abcd" false false ])
      ];
    test_exp "' abcd"
      [ ([ atm `Exact; at " "; at "abcd" ],
         [ et "'" false false; et "abcd" false false ])
      ];
    test_exp "' abcd"
      [ ([ at "'"; at " "; at "abcd" ],
         [ et "'" false false; et "abcd" false false ])
      ];
    test_exp "\\^abcd"
      [ ([ at "^"; at "abcd" ],
         [ et "^" false true; et "abcd" true false ])
      ];
    test_exp "^abcd"
      [ ([ atm `Prefix; at "abcd" ],
         [ et ~m:`Prefix "abcd" false false ])
      ];
    test_exp "^ abcd"
      [ ([ at "^"; at " "; at "abcd" ],
         [ et "^" false false; et "abcd" false false ])
      ];
    test_exp "^ abcd"
      [ ([ atm `Prefix; at " "; at "abcd" ],
         [ et "^" false false; et "abcd" false false ])
      ];
    test_exp "abcd$"
      [ ([ at "abcd"; atm `Suffix ],
         [ et ~m:`Suffix "abcd" false false ])
      ];
    test_exp "abcd $"
      [ ([ at "abcd"; at " "; at "$" ],
         [ et "abcd" false false; et "$" false false ])
      ];
    test_exp "abcd $"
      [ ([ at "abcd"; at " "; atm `Suffix ],
         [ et "abcd" false false; et "$" false false ])
      ];
    test_exp "''abcd"
      [ ([ atm `Exact; atm `Exact; at "abcd" ],
         [ et ~m:`Exact "'" false true; et ~m:`Exact "abcd" true false ])
      ];
    test_exp "^^abcd"
      [ ([ atm `Prefix; at "^"; at "abcd" ],
         [ et ~m:`Exact "^" false true; et ~m:`Prefix "abcd" true false ])
      ];
    test_exp "abcd$$"
      [ ([ at "abcd"; at "$"; atm `Suffix ],
         [ et ~m:`Suffix "abcd" false true; et ~m:`Exact "$" true false ])
      ];
    test_exp "abcd$$"
      [ ([ at "abcd"; atm `Suffix; atm `Suffix ],
         [ et ~m:`Suffix "abcd" false true; et ~m:`Exact "$" true false ])
      ];
    test_exp "'ab~cd"
      [ ([ atm `Exact; at "ab"; ats; at "cd" ],
         [ et ~m:`Exact "ab" false true
         ; ets ~m:`Exact true true
         ; et ~m:`Exact "cd" true false ])
      ];
    test_exp "^ab~cd$"
      [ ([ atm `Prefix; at "ab"; ats; at "cd"; atm `Suffix ],
         [ et ~m:`Exact "ab" false true
         ; ets ~m:`Exact true true
         ; et ~m:`Exact "cd" true true
         ; et ~m:`Prefix "$" true false
         ])
      ];
    test_exp "ab~cd$"
      [ ([ at "ab"; ats; at "cd"; atm `Suffix ],
         [ et ~m:`Suffix "ab" false true
         ; ets ~m:`Exact true true
         ; et ~m:`Exact "cd" true false
         ])
      ];
    test_exp " ~cd$"
      [ ([ ats; at "cd"; atm `Suffix ],
         [ ets ~m:`Suffix false true
         ; et ~m:`Exact "cd" true false
         ])
      ];
    test_exp "'^abcd efgh$$ ij$kl$"
      [ ([ atm `Exact
         ; atm `Prefix
         ; at "abcd"
         ; at " "
         ; at "efgh"
         ; atm `Suffix
         ; atm `Suffix
         ; at " "
         ; at "ij"
         ; atm `Suffix
         ; at "kl"
         ; atm `Suffix
         ],
         [ et ~m:`Exact "^" false true
         ; et ~m:`Exact "abcd" true false
         ; et ~m:`Suffix "efgh" false true
         ; et ~m:`Exact "$" true false
         ; et ~m:`Suffix "ij" false true
         ; et ~m:`Exact "$" true true
         ; et ~m:`Exact "kl" true false
         ])
      ];
    test_exp "'^abcd efgh$$ ij$kl$"
      [ ([ atm `Exact
         ; at "^"
         ; at "abcd"
         ; at " "
         ; at "efgh"
         ; at "$"
         ; atm `Suffix
         ; at " "
         ; at "ij"
         ; at "$"
         ; at "kl"
         ; atm `Suffix
         ],
         [ et ~m:`Exact "^" false true
         ; et ~m:`Exact "abcd" true false
         ; et ~m:`Suffix "efgh" false true
         ; et ~m:`Exact "$" true false
         ; et ~m:`Suffix "ij" false true
         ; et ~m:`Exact "$" true true
         ; et ~m:`Exact "kl" true false
         ])
      ];
    ()

  let suite =
    [
      Alcotest.test_case "corpus" `Quick corpus;
    ]
end


================================================
FILE: tests/test_utils.ml
================================================
open Docfd_lib

let enriched_token_testable : (module Alcotest.TESTABLE with type t = Search_phrase.Enriched_token.t) =
  (module Search_phrase.Enriched_token)

let search_phrase_testable : (module Alcotest.TESTABLE with type t = Search_phrase.t) =
  (module Search_phrase)


================================================
FILE: tests/utils_tests.ml
================================================
open Docfd_lib

module Alco = struct
  let normalize_path_to_absolute_corpus () =
    let test expected input =
      Alcotest.(check string)
        (Printf.sprintf "%s becomes %s" input expected)
        expected
        (Misc_utils'.normalize_path_to_absolute input)
    in
    let cwd = Sys.getcwd () in
    let test' expected input =
      test (if expected = "" then cwd else Filename.concat cwd expected) input
    in
    test "/" "/..";
    test "/" "/..";
    test "/" "/abcd/..";
    test "/" "/abcd/..";
    test "/abc" "/abc/";
    test "/abc" "/abc/def/..";
    test "/abc" "/abc//";
    test "/abc/def" "/abc//def";
    test "/abc/def" "/abc/./def";
    test "/abc/def" "/abc/.///def/.";
    test "/def" "/abc/.//../def/.";
    test' "" "abc/..";
    test' "def" "abc/../def";
    test' "abc/def" "abc////.//./././/def";
    ()

  let normalize_glob_to_absolute_corpus () =
    let test expected input =
      Alcotest.(check string)
        (Printf.sprintf "%s becomes %s" input expected)
        expected
        (Misc_utils'.normalize_glob_to_absolute input)
    in
    let cwd = Sys.getcwd () in
    let test' expected input =
      test (if expected = "" then cwd else Filename.concat cwd expected) input
    in
    test "/" "/..";
    test "/" "/..";
    test "/" "/abcd/..";
    test "/" "/abcd/..";
    test "/abc" "/abc/";
    test "/abc" "/abc/def/..";
    test "/abc" "/abc/*/..";
    test "/abc/**/.." "/abc/**/..";
    test "/abc/**/def" "/abc/**/def";
    test "/**/def/*/.." "/abc/../**/def/*/..";
    test "/abc/**/def" "/abc/.////**/def";
    test "/abc" "/abc//";
    test "/abc/def" "/abc//def";
    test "/abc/def" "/abc/./def";
    test "/abc/def" "/abc/.///def/.";
    test "/def" "/abc/.//../def/.";
    test' "" "abc/..";
    test' "def" "abc/../def";
    test' "abc/def" "abc////.//./././/def";
    test' "abc/**/../def" "abc/**/../def";
    test' "abc/**/../def" "abc/*/../**/../def";
    test' "abc/*/def" "abc/*/*/../def";
    ()

  let suite =
    [
      Alcotest.test_case
        "normalize_path_to_absolute_corpus"
        `Quick
        normalize_path_to_absolute_corpus;
      Alcotest.test_case
        "normalize_glob_to_absolute_corpus"
        `Quick
        normalize_glob_to_absolute_corpus;
    ]
end


================================================
FILE: update-version-string.py
================================================
import os
import re

ml_path = "bin/version_string.ml"

version = os.environ.get('DOCFD_VERSION_OVERRIDE')

if version is None or version == "":
    with open("CHANGELOG.md") as f:
        for line in f:
            if line.startswith("## ") and not ("future release" in line.lower()):
                version = line.split(" ")[1].strip()
                break

print(f"Detected version for Docfd: {version}")

print(f"Writing to {ml_path}")

with open(ml_path, "w") as f:
    f.write(f"let s = \"{version}\"")
    f.write("\n")