Repository: darrenldl/docfd Branch: main Commit: 203de25de255 Files: 125 Total size: 618.6 KB Directory structure: gitextract_8iyswmwt/ ├── .gitattributes ├── .github/ │ └── workflows/ │ └── deploy.yml ├── .gitignore ├── CHANGELOG.md ├── LICENSE ├── Makefile ├── README.md ├── bin/ │ ├── BLAKE2B.ml │ ├── UI.ml │ ├── UI_base.ml │ ├── args.ml │ ├── clipboard.ml │ ├── command.ml │ ├── content_and_search_result_rendering.ml │ ├── debug_utils.ml │ ├── docfd.ml │ ├── document.ml │ ├── document.mli │ ├── document_pipeline.ml │ ├── document_pipeline.mli │ ├── document_src.ml │ ├── dune │ ├── file_utils.ml │ ├── filter_exp.ml │ ├── glob.ml │ ├── glob.mli │ ├── lock_protected_cell.ml │ ├── lock_protected_cell.mli │ ├── misc_utils.ml │ ├── params.ml │ ├── path_opening.ml │ ├── ping.ml │ ├── ping.mli │ ├── printers.ml │ ├── proc_utils.ml │ ├── result_syntax.ml │ ├── script.ml │ ├── search_mode.ml │ ├── session.ml │ ├── session.mli │ ├── session_manager.ml │ ├── session_manager.mli │ ├── string_utils.ml │ ├── version_string.ml │ └── xdg_utils.ml ├── containers/ │ ├── Containerfile.demo-vhs │ └── Containerfile.docfd ├── demo-vhs-tapes/ │ ├── repo-non-interactive.tape │ ├── repo.tape │ └── ui-screenshot.tape ├── demo-vhs.sh ├── docfd.opam ├── docfd.opam.locked ├── docfd.opam.template ├── dune-project ├── file-collection-tests.t/ │ ├── dune │ └── run.t ├── lib/ │ ├── GZIP.ml │ ├── char_map.ml │ ├── doc_id_db.ml │ ├── doc_id_db.mli │ ├── docfd_lib.ml │ ├── dune │ ├── index.ml │ ├── index.mli │ ├── int_map.ml │ ├── int_set.ml │ ├── link.ml │ ├── misc_utils.ml │ ├── option_syntax.ml │ ├── params.ml │ ├── parser_components.ml │ ├── search_exp.ml │ ├── search_exp.mli │ ├── search_phrase.ml │ ├── search_phrase.mli │ ├── search_result.ml │ ├── search_result.mli │ ├── search_result_heap.ml │ ├── sqlite3_utils.ml │ ├── stop_signal.ml │ ├── stop_signal.mli │ ├── string_map.ml │ ├── string_set.ml │ ├── task_pool.ml │ ├── task_pool.mli │ ├── tokenization.ml │ ├── word_db.ml │ └── word_db.mli ├── line-wrapping-tests.t/ │ ├── dune │ ├── long-words.txt │ ├── run.t │ ├── sentences.txt │ └── words.txt ├── match-type-tests.t/ │ ├── dune │ ├── run.t │ └── test.txt ├── misc-behavior-tests.t/ │ ├── abcd.txt │ ├── dune │ └── run.t ├── non-interactive-mode-return-code-tests.t/ │ ├── dune │ └── run.t ├── open-with-tests.t/ │ ├── dune │ └── run.t ├── printing-tests.t/ │ ├── empty.txt │ ├── run.t │ ├── test0.txt │ ├── test1.txt │ ├── test2.txt │ ├── test3.txt │ └── test4.txt ├── profiling/ │ ├── dune │ └── main.ml ├── publish.sh ├── run-container.sh ├── script-tests.t/ │ ├── dune │ └── run.t ├── search-scope-narrowing-tests.t/ │ ├── dune │ └── run.t ├── tests/ │ ├── dune │ ├── main.ml │ ├── search_exp_tests.ml │ ├── test_utils.ml │ └── utils_tests.ml └── update-version-string.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitattributes ================================================ *.t/run.t linguist-vendored ================================================ FILE: .github/workflows/deploy.yml ================================================ name: Deploy on release on: push: tags: - "[0-9]*" - "test*" branches: - "ci-test" jobs: build: strategy: fail-fast: false matrix: os: - ubuntu-22.04 - ubuntu-22.04-arm - macos-latest runs-on: ${{ matrix.os }} steps: - name: Checkout code uses: actions/checkout@v4 - run: echo "GITHUB_TAG=$(git describe --always --tags)" >> $GITHUB_ENV - if: ${{ startsWith(matrix.os, 'ubuntu') && !endsWith(matrix.os, 'arm') }} run: echo "OS_SHORT_NAME=linux" >> $GITHUB_ENV - if: ${{ startsWith(matrix.os, 'ubuntu') && endsWith(matrix.os, 'arm') }} run: echo "OS_SHORT_NAME=linux-arm" >> $GITHUB_ENV - if: ${{ startsWith(matrix.os, 'macos') }} run: echo "OS_SHORT_NAME=macos" >> $GITHUB_ENV - if: ${{ startsWith(matrix.os, 'windows') }} run: echo "OS_SHORT_NAME=windows" >> $GITHUB_ENV - name: Set up OCaml for Linux uses: ocaml/setup-ocaml@v3 with: ocaml-compiler: "5.2.1" - run: opam install dune - run: opam install . --deps-only --with-test - name: Use commit hash as version if on ci-test branch if: ${{ github.ref_name == 'ci-test' }} run: | echo "DOCFD_VERSION_OVERRIDE=${{ env.GITHUB_TAG }}" >> $GITHUB_ENV - name: Create build for macOS if: ${{ env.OS_SHORT_NAME == 'macos' }} run: | export DOCFD_VERSION_OVERRIDE=${{ env.GITHUB_TAG }} opam exec -- make release-build - name: Create static build for Linux if: ${{ env.OS_SHORT_NAME == 'linux' }} run: | export DOCFD_VERSION_OVERRIDE=${{ env.GITHUB_TAG }} opam exec -- make release-static-build - name: Create static build for Linux ARM if: ${{ env.OS_SHORT_NAME == 'linux-arm' }} run: | export DOCFD_VERSION_OVERRIDE=${{ env.GITHUB_TAG }} opam exec -- make release-static-build-arm - name: Package into tar.gz run: | mv release/docfd docfd tar -cvzf docfd-${{ env.GITHUB_TAG }}-${{ env.OS_SHORT_NAME }}.tar.gz docfd - name: Upload artifacts if: ${{ github.ref_name == 'ci-test' }} uses: actions/upload-artifact@v4 with: name: docfd-${{ env.GITHUB_TAG }}-${{ env.OS_SHORT_NAME }}.tar.gz path: docfd-${{ env.GITHUB_TAG }}-${{ env.OS_SHORT_NAME }}.tar.gz - name: Release if: ${{ github.ref_name != 'ci-test' }} uses: softprops/action-gh-release@v2 with: files: | docfd-${{ env.GITHUB_TAG }}-${{ env.OS_SHORT_NAME }}.tar.gz - name: Release preview if: ${{ github.ref_name == 'ci-test' }} uses: softprops/action-gh-release@v2 with: tag_name: preview name: "Preview Build" body: "Automated preview build from commit ${{ github.sha }}. This release is updated on every push to ci-test." prerelease: true files: | docfd-${{ env.GITHUB_TAG }}-${{ env.OS_SHORT_NAME }}.tar.gz ================================================ FILE: .gitignore ================================================ _build/ _coverage/ .merlin *.rst~ *.install bisect*.out bisect*.coverage fuzz-*-input fuzz-*-output fuzz-logs/ /test*.md /test*.txt test*.pdf test*.docx *.tar.gz /release/ perf.data* *.pdf .cache /*.log *.mp4 dummy.gif *.db *.db-journal ================================================ FILE: CHANGELOG.md ================================================ # Changelog ## 13.0.0 - Removed fzf dependency entirely - Switched from fzf to a built-in implementation for fuzzy selection menus - Replaced "sort by fzf" functionality with built-in PATH-FUZZY-RANK mode - Renamed OPEN-SCRIPT to SCRIPTS - Moved DELETE-SCRIPT functionality to `Ctrl`+`X` under SCRIPTS - Added `y` key binding to copy in LINKS - Changed key binding for rotation of key binding info grid from `?` to `<` and `>` - Added "focus content" via `z` which hides the bottom right pane - Added key binding info pane toggle via `?` - Made the UX of various input fields slightly more polished - This includes adding `Esc` for cancelling during SAVE-SCRIPT - Added request handling debouncing to reduce pointless workload triggered during fast typing - Improved link extraction - Minor UI fixes and polishes ## 12.3.2 - Fixed missing stop signal passing for `content:"..."` filter expression handling - Previously, filter expressions with `content:"..."` were not cancelled properly - Added additional guards against potential freezing due to DB pool exhaustion or DB connection issues ## 12.3.1 - Fixed key binding info grid - Added the missing `l` key binding info - Fixed some other key binding labels ## 12.3.0 **Note**: This update contains a feature that requires removing the existing index DB to take effect. - Docfd script comment support improvement - Added `;` prefix for system comments, which are not preserved after editing of command history - `#` now denotes user comment and is preserved after editing of command history - Ordering of `#` is also preserved during saving a session as script - Added link opening support via LINKS mode - This will **only** work after recreating the index DB - `l` opens LINKS mode with the same navigation keybinds - `Enter` to open link - `o` to open link and remain in LINKS - Links which are closest to the selected search result will be prioritized first - Added key binding `xh` to clear command history quickly ## 12.2.0 - Dependencies adjustment for CI build - Internal refactoring - Refactored document store module into session state module to better reflect that the data structure is capturing not just documents and search results, but also some UI states, etc - Moved screen split handling to the level of session state instead of plain UI state - This allows Docfd script to better capture the view on screen - Added `Shift`+`Tab` for changing screen split ratio in the other direction, and removed the "rotating" behaviour of `Tab` ## 12.1.0 - Added missing sorting based on paths when path dates are the same - Added PDF viewer integration for Zathura on Linux - Moved sorting handling to the level of document store command instead of just plain UI update - This allows Docfd script to better capture the view on screen - Fixed default cache directory location on macOS - Changed from `~/Library/Application Support` to `~/Library/Caches` - Added `--reverse` to fzf invocation for better UX - Added `Ctrl`+`D` for script deletion ## 12.0.0 This contains a **breaking** DB change, you will need to remove index DB generated by Docfd version prior to 12.0.0-alpha.13 #### Highlights of changes since 11.0.1 - Moved to using a global word table to reduce index DB size and speed up search (12.0.0-alpha.13) - This is the **breaking** DB change stated above - Added further search speed optimizations (12.0.0) - Added an additional document pruning stage - Added a first word candidate pruning stage based on length of the first search word - Searching for short words should now feel much more responsive - Replaced filter glob with a more powerful filter language, with autocomplete in filter field (12.0.0-alpha.1, 12.0.0-alpha.2, 12.0.0-alpha.5, 12.0.0-alpha.6, 12.0.0-alpha.10, 12.0.0-alpha.11) - Added content view pane scrolling (12.0.0-alpha.5, 12.0.0-alpha.8) - Controlled by `-`/`=` - Added "save script" and "load script" functionality to make it actually viable to reuse Docfd commands (12.0.0-alpha.8, 12.0.0-alpha.9) - SQL query optimizations for prefix and exact search terms (12.0.0-alpha.3) - Key binding info grid improvements (12.0.0-alpha.4) - Added more key bindings - Packed columns more tightly - Added `--paths-from -` to accept list of paths from stdin (12.0.0-alpha.3) - Added WSL clipboard integration (12.0.0-alpha.4) - Added more marking key bindings (12.0.0-alpha.4) - `mark listed` (`ml`) marks all currently listed documents - `unmark listed` (`Ml`) unmarks all currently listed documents - `--open-with` placeholder handling fixes (12.0.0-alpha.4) - Using `{page_num}` and `{line_num}` crashes in 11.0.1 when there are no search results - Added sorting to document list (12.0.0-alpha.11, 12.0.0) - `s` for sort ascending mode and `Shift+S` for sort descending mode - Under the sort modes, the sort by types are as follows: - `p` sort by path - `d` sort by path date - `s` sort by score - `m` sort by modification time - `f` sort by an interactive fzf search - Selected option will be ranked the highest - Rest of the documents will be ranked using the ranking from fzf - Adjusted attributes listed in document list entry (12.0.0-alpha.11) - Added path date - Replaced last scan time with last modified time - Reworked the internal architecture of document store snapshots storage and management, which makes the overall interaction between UI and core code much more robust (12.0.0-alpha.11) #### Changes since 12.0.0-alpha.13 - Added further search speed optimizations: - Added an additional document pruning stage - Added a first word candidate pruning stage based on length of the first search word - Searching for short words should now feel much more responsive - Fixed interaction with fzf (which is used in some selection menus) on macOS due to different behavior of `Unix.waitpid` on macOS compared to Linux - Document sorting fine tuning - Fixed document sorting fallback behavior - If there is no search expression but sorting method chosen is to sort by score, then sorting method falls back to the option specified by `--sort-no-score` - Fixed macOS detection - Updated `--open-with` to accept a list of extensions, e.g. `--open-with ts,js:detached="... {path}"` - Added sort by fzf functionality - Under sort mode - `f` sort by an interactive fzf search - Selected option will be ranked the highest - Rest of the documents will be ranked using the ranking from fzf ## 12.0.0-alpha.13 - Moved to using a global word table to reduce index DB size and speed up search - This is a **breaking** DB change, you will need to remove index DB generated by older versions of Docfd - Added missing mutexes for caches, should further reduce random crashes - Added more path date extraction formats - `yyyy-mmm-dd`, `yyyy-mmmm-dd`, `dd-mmm-yyyy`, `dd-mmmm-yyyy` - `-` is an optional separator that is not a digit and not a letter ## 12.0.0-alpha.12 - Made resetting of search result selection and content view offset less aggressive - Some changes in 12.0.0-alpha.11 caused some UI counters to reset more frequently than desired ## 12.0.0-alpha.11 - Removed disabling of drop mode key binding `d` when searching or filtering is ongoing - Fixed content view pane offset not resetting when mouse is used to scroll search result list - Fixed content view pane staying small while scrolling up when the search result is close to the bottom of the file - Swapped all mutexes to Eio mutexes to hopefully remove the very random freezes that occur quite rarely - They feel like deadlocks due to mixing Eio mutexes (which block fiber) and stdlib mutexes (which block an entire domain) - Added sorting to document list - `s` for sort ascending mode and `Shift+S` for sort descending mode - Under the sort modes, the sort by types are as follows: - `p` sort by path - `d` sort by path date - `s` sort by score - `m` sort by modification time - Added `--sort` and `--sort-no-score` - Latter is mainly useful for when `--files-without-match` is used - Added `yyyymmdd` path date extraction - Added `mod-date` to filter language - Adjusted attributes listed in document list entry - Added path date - Replaced last scan time with last modified time - Reworked `--script` into `--script` and `--start-with-script` - `--script` is now only for non-interactive use - `--start-with-script` is only for interactive use - This mirrors the duals `--filter` vs `--start-with-filter` and `--search` vs `--start-with-search` - Reworked the internal architecture of document store snapshots storage and management - Snapshots are now centrally managed by `Document_store_manager`, along with improvements to snapshot handling logic in general - This makes the overall interaction between UI and core code much more robust, and eliminates random workarounds used to deal with UI and data synchronization, which have been riddled with random minor bugs ## 12.0.0-alpha.10 - Added basic autocomplete to filter field - Improved script save autocomplete to insert longest common prefix - Fixed script save autocomplete so it no longer erases original text when no recommendations are available ## 12.0.0-alpha.9 - Disabled `Tab` handling in edit fields to reduce friction in UX - Added `nano`-style autocomplete to save commands field with listing of existing scripts ## 12.0.0-alpha.8 - Changed `--commands-from` to `--script` - Added "save commands as script" and "load script" functionality to streamline reusing of commands - Improved content view pane scrolling control - The internal counter no longer scrolls past the limit ## 12.0.0-alpha.7 - Fixed interactive use of `--commands-from` - Added `mark listed` and `unmark listed` to template command history file help info ## 12.0.0-alpha.6 - Fixed `not` operator parsing - Previously `not ext:txt and not ext:md` would be parsed as `not (ext:txt and not ext:md)`, which is not what is typically expected - `not` now binds tightly, so `not ext:txt and not ext:md` is parsed as `(not ext:txt) and (not ext:md)` ## 12.0.0-alpha.5 - Added content view pane scrolling - Controlled by `-`/`=` - Removed extraneous marking functionality - `mark unlisted` - `unmark unlisted` - Added `"..."` as a shorthand to `content:"..."` to filter expression - For example, `content:keyword AND path-date:>2025-01-01` can be written as `"keyword" AND path-date:>2025-01-01` - The quotation is necessary to differentiate between typos and actual query, otherwise incorrect input like `pathfuzzy:...` would be parsed as content queries instead ## 12.0.0-alpha.4 - Added additional marking functionality - `mark listed` (`ml`) marks all currently listed documents - `mark unlisted` (`mL`) marks all currently unlisted documents - `unmark listed` (`Ml`) unmarks all currently listed documents - `unmark unlisted` (`ML`) unmarks all currently unlisted documents - `unmark all` is moved to key binding `Ma` - Reworked key binding info grid to pack columns more tightly - Added WSL clipboard integration - Minor fix in command history file template help text - Added `Tab` key to key binding info grid - Added key binding info about scrolling through document list and search result list - Minor fix for `{line_num}` placeholder handling in `--open-with` - This should always be usable for text files but previously Docfd crashes when `{line_num}` is specified in `--open-with` and user opens a text file when no search has been made - This is fixed by defaulting `{line_num}` to 1 when there are no search results present - Minor fix for `{page_num}` and `{search_word}` placeholders handling in `--open-with` - This should always be usable for PDF files but previously Docfd crashes when `{page_num}` or `{search_word}` is specified in `--open-with` and user opens a PDF file when no search has been made - This is fixed by defaulting `{page_num}` to 1 and `{search_word}` to empty string when there are no search results present ## 12.0.0-alpha.3 - **Users are advised to recreate the index DB** - Adjusted SQL indices and swapped to specialized SQL queries for exact and prefix search terms, e.g. `'hello`, `^worl` - Handling of these terms is now 10-20% faster depending on the document - Fixed command history recomputation not using the reloaded version of document store - This issue is most noticeable when you've edited a text file after hitting `Enter` in Docfd (after which Docfd reloads the file for you), and you hit `h` to modify the command history - The replaying of the command history would use the old copy of the file instead of the new edited version of the text file - Added missing SQL transaction in code path for reloading a single document - Previously, reloading a single document was incredibly slow, which was very noticeable if you edited a text file after hitting `Enter` in Docfd, unless the text file was very small - Updated `--paths-from` argument handling - Added `--paths-from -` for accepting list of paths from stdin - Adjusted to accept comma separated list of paths, e.g. `--paths-from path-list0.txt,path-list1.txt` - Removed builtin piping to fzf triggered by providing `?` as a file path, e.g. `docfd ?` - The `--paths-from -` handling makes this obsolete and a lot less flexible by comparison - Fixed interaction between search and filter - Previously, starting a search would incorrectly cancel an ongoing filtering operation. Now only a new filtering operation can cancel an ongoing filtering operation. A new search still cancels an ongoing search. - Starting a new filtering operation also still cancels any ongoing search. This is fine since the search results are refreshed after the filtering has been completed. - The refreshing of the search results also means that the following sequences of events are still handled correctly, namely they still arrive at the same normal form of the document store: - Example 1: - (0) Filter `f_exp0` (filtering is canceled by step (2), but the updating of filter expression is never canceled) - (1) Search `s_exp0` (search is canceled by step (2), but the updating of search expression is never canceled) - (2) Filter `f_exp1` (refreshes search results using `s_exp0`) - Example 2: - (0) Search `s_exp0` (search is canceled by step (1), but updating of search expression is never canceled) - (1) Filter `f_exp0` (this stage is canceled by step (2), either during the filtering or during the refreshing of search results, but the updating of filter expression is never canceled) - (2) Filter `f_exp1` (refreshes search results using `s_exp0`) - Renaming query expression/language to filter expression/language in help text and documentation - Added a separate loading indicator for filter field - Fixed concurrency issue where an update of document store may cause the filter field and search field in UI to be out of sync with the actual filter expression and search expression used by the underlying document store - Suppose we have the following sequence of events: - (0) Document store `store0` carries filter expression `f_exp0` and search expression `s_exp0`, which we write as pair `(f_exp0, s_exp0)` - (1) User initiates filter/search operation by placing `(f_exp1, s_exp1)` into the input fields. We name the document store resulting from this filter/search operation as `store1a`, which carries `(f_exp1, s_exp1)` when finalized. - (2) While filter/search operation is ongoing, user drops a set of documents from the current document store. Since `store1a` is not finalized yet, the current document store is still `store0`, thus the new document store encoding the result of the drop operation, `store1b`, is computed from `store0` instead of `store1a`. In other words, both `store1a` and `store1b` share `store0` as their parent. Note that `store1b` carries `(f_exp0, s_exp0)` as inherited from `store0`, since a drop operation does not alter the filter expression or search expression. - (3) As a drop operation immediately updates the document store and cancels ongoing filter/search operation, step (2) canceled the computation of `store1a`, and instead places `store1b` as the current document store. - However, this means the input fields are `(f_exp1, s_exp1)` while the current document store `store1b` actually carries `(f_exp0, s_exp0)`. The fix in this update is then to add an extra "sync from input fields" step whenever a document store is updated. To illustrate, we continue from the above sequence of events, where the updated version of Docfd carries out the following step missing from previous versions. - (4) Update input fields to `(f_exp0, s_exp0)` - This addresses the mismatch between the underlying document store and the UI input fields. - In practice this is very unlikely to occur with human input, as the modes that update document store are disabled if document store manager is carrying out any ongoing filtering or search. However, since the UI is async, there will be gaps in timing between UI input/feedback and actual updates of values, opening up to TOCTOU problems. So there is always a chance that a document store update will be requested before the modes are are disabled. - Made interrupted filter/search operation to not yield a document store at all instead of yielding an empty document store to simplify reasoning about filter/search cancellations and UI fields being in sync ## 12.0.0-alpha.2 - Added `path-date` clause to query expression - This allows filtering based on date recognized from document path, for example, `path-date:>=2025-01-01 AND path-date:<2025-02-01` would allow `/home/user/meeting-notes-2025-01-10.md` to pass through - This gives a very lightweight method of attaching date information to any document - See [relevant Wiki page](https://github.com/darrenldl/docfd/wiki/Document-filtering) for details ## 12.0.0-alpha.1 - Added a more powerful filter mode that replaces the filter glob mode and "pipe to fzf" feature - Filter query mode uses a proper query language that supports file path globbing and file path fuzzy matching among other features - This mode uses key binding `f` - Removed `q` exit key binding to avoid accidental exiting ## 11.0.1 - Added better search cancellation handling, removing massive lags in some scenarios ## 11.0.0 - Minor fix for search scope narrowing logic: - Search scope should also be set to empty if the document is not passing the file filter, not just when the search results are empty - The old behavior can be confusing when a document passes an old file filter and thus has search results in memory, but fail to pass a new file filter, yet appears in later searches when file filter is reset - It is simpler to make it so if a document is not listed for whatever reason, search scope of that document just becomes empty during narrowing - Added missing commands in the list of possible commands in the command history file template - `clear search` - `clear filter` - Minor breaking change, filter regex mode should have been called filter glob mode - The key binding `fr` is changed to `fg` - Changed UI text "File path filter" to "File path glob" to be more descriptive ## 10.2.0 - Added `--open-with` to allow customising the command used to open a file based on file extension - Example: `--open-with pdf:detached='okular {path}'` - Can be specified multiple times - Added non-interactive use of `--commands-from` - Non-interactive use can be triggered by pairing `--commands-from` with `-l`/`--files-with-match` - Useful for advanced document management workflow - Adjustments to search scope narrowing - Added `narrow level: 0` for resetting the search scopes of all documents back to full - Narrowing now no longer drops unlisted document, so the previous set of documents remain accessible for later searches after resetting the search scopes - Reworked search into multi-stage pipeline - This improves the search speed by around 30% - The core search procedure was reworked into an API that generates grouped search jobs which can be easily distributed to threads. This gives a better workload distribution than the current multithreading approach. ## 10.1.3 - Minor fixes - "Reload document" now removes the document if the document is no longer accessible - Docfd now only checks the existence of directly specified files at launch, e.g. `file.txt` in `docfd file.txt`. This means "reload all documents" now does not error out due to files becoming no longer accessible. ## 10.1.2 - Minor fix for "reload all doucments" when fzf was used to pick documents initially, i.e. `docfd [PATH]... ?`, or any variation where `?` appears anywhere in the path list - Under this workflow, later "reload all" should use the same selection instead of having the user select again in fzf, which is cumbersome - Now Docfd correctly reuses the selection when "reload all" is requested, if fzf was used initially to pick documents - This does technically mean the functionality is now less flexible, since if `docfd ?` alike is used, "reload all" no longer discovers new files - But the convenience from reusing the selection outweighs the flexibility in practically all use cases from author's experience ## 10.1.1 - Minor fix for "filter files via fzf" functionality - Previously, if instead of making a selection, the user quits fzf (e.g. pressing `Ctrl`+`C`, `Ctrl`+`Q`), Docfd also closes with it - Now Docfd just discards the interaction and goes back to the main UI ## 10.1.0 - Added back index DB entry pruning - Previously missing after swapping to SQLite DB - Also renamed `--cache-soft-limit` to `--cache-limit` to reflect the new pruning logic - Fixes [issue #12](https://github.com/darrenldl/docfd/issues/12) - Swapped to a better `doc_id` allocation strategy to minimise `doc_id` size in DB - Added blinking when drop mode is disabled but `d` is pressed ## 10.0.0 - Reworked document indexing into a multi-stage pipeline - This significantly improves the indexing throughput by allowing I/O tasks and computational tasks to run concurrently - See [issue #11](https://github.com/darrenldl/docfd/issues/11) - **Breaking** changes in index DB design - index DBs made by previous version of Docfd are not compatible - Optimized DB design, on average the index DB is roughly 60% smaller compared to Docfd 9.0.0 index DB - See [issue #11](https://github.com/darrenldl/docfd/issues/11) - Added functionality to filter files via fzf - This is grouped under filter mode. The previous filter mode is renamed to filter regex mode. - `f` enters filter mode - `f` again activates filter files via fzf functionality - `r` activates the filter regex mode, which was previously just called the filter mode - Fixed incomplete search results when file path filter field is updated while search is ongoing - Updating file path filter always cancels the current search (if there is one) and start a new search after the filter is in place - Previously, documents with partial search results due to cancellation are kept - Docfd now discards said documents, forcing the new search to complete the search results of these documents - Removed `--no-cache` flag - Previously was unused completey - It is difficult to share an in-memory SQlite DB between threads, so discarding this flag entirely - See [issue #11](https://github.com/darrenldl/docfd/issues/11) - Swapped to using proper unicode segmentation for tokenisation - This should reduce the index size for Western non-English languages significantly - Added screen split ratios for hiding left or right pane completely - Minor UI/UX fixes - Drop mode is now disabled when search is still ongoing or when either search field or filter field has an error - Added missing update of search and filter status when undoing/redoing, or when replaying command history - This is most noticeable when the status indicates an error, but undoing does not return it to OK ## 9.0.0 - Swapped over to using SQLite for index - Memory usage is much slimmer/stays flat - For the sample of 1.4GB worth of PDFs used, after indexing, 9.0.0-rc1 uses 1.9GB of memory, while 9.0.0-rc2 uses 39MB - Search is a bit slower - Added token length limit of 500 bytes to accommodate word table limit in index DB - This means during indexing, if Docfd encounters a very long token, e.g. serial number, long hex string, it will be split into chunks of up to 500 bytes - Added `Ctrl`+`C` exit key binding to key binding info on screen - Updated exit keys - To exit Docfd: `q`, `Ctrl`+`Q` or `Ctrl`+`C` - To exit other modes: `Esc` - Now defaults to not scanning hidden files and directories - This behaviour is now enabled via the `--hidden` flag - Changed to allow `--add-exts` and `--single-line-add-exts` to be specified multiple times - Changed return code to be 1 when there are no results for `--sample` or `--search` - Added `--no-pdftotext` and `--no-pandoc` flags - Docfd also notes the presence of these flags in error message if there are PDF files but no pdftotext command is available, and same with files relying on pandoc - Renamed `drop path` command to just `drop` - Added drop unselected key binding, and the associated command `drop all except` - Various key binding help info grid adjustments ## 9.0.0-rc1 - Changed default cache size from 100 to 10000 - Index after compression doesn't take up that much space, and storage is generally cheap enough these days - Adjusted cache eviction behaviour to be less strict on when eviction happens and thus less expensive - Renamed `--cache-size` to `--cache-soft-limit` - Removed periodic GC compact call to avoid freezes when working with many files - Removed GC compact call during file indexing core loops to reduce overhead - Added progress bars to initial document processing stage - Swapped to using C backend for BLAKE2B hashing, this gives >20x speedup depending on CPU - Swapped from JSON+GZIP to CBOR+GZIP serialization for indices - Changed help info rotation key from `h` to `?` - Renamed discard mode to drop mode - Added command history editing functionality - Added `--commands-from` command line argument - Added `--tokens-per-search-scope-level` command line argument - Concurrency related bug fixes - Unlikely to encounter in normal workflows with human input speed - https://github.com/darrenldl/docfd/commit/14fcc45b746e6156f29eb989d70700476977a3d7 - https://github.com/darrenldl/docfd/commit/bfd63d93562f8785ecad8152005aa0f823185699 - https://github.com/darrenldl/docfd/commit/4e0aa6785ce80630d0cd3cda6e316b7b15a4fb4b - Replaced print mode with copy mode - Replaced single file view with key binding to change screen split ratio to remove feature discrepencies - Added narrow mode for search scope narrowing - Renamed `--index-chunk-token-count` to `--index-chunk-size` - Renamed `--sample-count-per-doc` to `--samples-per-doc` ## 8.0.3 - Fixed single file view crash ## 8.0.2 - Reworked asynchronous search/filter UI code to avoid noticeable lag due to waiting for cancellations that take too long - Previously there was still a lockstep somewhere that would prevent UI from progressing if previous search was still being canceled - The current implementation allows newest requests to override older requests entirely, and not wait for cancellations at all - Adjusted document counter in multi-file view to be visible even when no files are listed ## 8.0.1 - Fixed missing file path filter field update when undoing or redoing document store updates - Fixed case insensitive marker handling in glob command line arguments ## 8.0.0 - Removed `--markdown-headings atx` from pandoc commandline arguments - Removed `Alt`+`U` undo key binding - Removed `Alt`+`E` redo key binding - Removed `Ctrl`+`Q` exit key binding - Added documentation for undo, redo key bindings - Added clear mode and moved clear search field key binding under this mode for multi-file view - Added file path filtering functionality to multi-file view ## 7.1.0 - Added initial macOS support - Likely to have bugs, but will need macOS users to report back - Major speedup from letting `pdftotext` output everything in one pass and split on Docfd side instead of asking `pdftotext` to output one page per invocation - For very large PDFs the indexing used to take minutes but now only takes seconds - Page count may be inaccurate if the PDF page contains form feed character itself (not fully sure if `pdftotext` filters the form feed character from content), but should be rare - Significant reduction of index file size by adding GZIP compression to the index JSON ## 7.0.0 - Added discard mode to multi-file view - Changed to using thin bars as pane separators, i.e. tmux style - Added `g` and `G` key bindings for going to top and bottom of document list respectively - Added `-l`/`--files-with-match` and `--files-without-match` for printing just paths in non-interactive mode - Grouped print key bindings under print mode - Added more print key bindings - Grouped reload key bindings under reload mode - Added fixes to ensure Docfd does not exit until all printing is done - Slimmed down memory usage by switching to OCaml 5.2 which enables use of `Gc.compact` - Still no auto-compaction yet, however, will need to wait for a future OCaml release - Added `h` key binding to rotate key binding info grid - Added exact, prefix and suffix search syntax from fzf - Fixed extraneous document path print in non-interactive mode when documents have no search results - Added "explicit spaces" token `~` to match spaces ## 6.0.1 - Fixed random UI freezes when updating search field - This is due to a race condition in the search cancellation mechanism that may cause UI fiber to starve and wait forever for a cancellation acknowledgement - This mechanism was put in place for asynchronous search since 4.0.0 - As usual with race conditions, this only manifests under some specific timing by chance ## 6.0.0 - Fixed help message of `--max-linked-token-search-dist` - Fixed search result printing where output gets chopped off if terminal width is too small - Added smart additional line grabbing for search result printing - `--search-result-print-snippet-min-size N` - If the search result to be printed has fewer than `N` non-space tokens, then Docfd tries to add surrounding lines to the snippet to give better context. - `--search-result-print-snippet-max-add-lines` - Controls maximum number of surrounding lines that can be added in each direction. - Added search result underlining when output is not a terminal, e.g. redirected to file, piped to another command - Changed `--search` to show all search results - Added `--sample` that uses `--search` previous behavior where (by default) only a handful of top search results are picked for each document - Changed `--search-result-count-per-doc` to `--sample-count-per-doc` - Added `--color` and `--underline` for controlling behavior of search result printing, they can take one of: - `never` - `always` - `auto` - Removed blinking for `Tab` key presses ## 5.1.0 - Fixed help message of `--max-token-search-dist` - Adjusted path display in UI to hide current working directory segment when applicable - Added missing blinking for `Tab` key presses ## 5.0.0 - Added file globbing support in the form of `--glob` argument - Added single line search mode arguments - `--single-line-exts` - `--single-line-add-exts` - `--single-line-glob` - `--single-line` - Fixed crash on empty file - This was due to assertion failure of `max_line_num` in `Content_and_search_result_render.content_snippet` - Changed search result printing via `Shift+P` and `p` within TUI to not exit after printing, allowing printing of more results - Added blinking to key binding info grid to give better visual feedback, especially for the new behavior of search result printing - Changed to allow `--paths-from` to be specified multiple times - Fixed handling of `.htm` files - `htm` is not a valid value for pandoc's `--format` argument - Now it is rewritten to `html` before being passed to pandoc - Changed `--max-depth`: - Changed default from 10 to 100 - Changed to accept 0 ## 4.0.0 - Made document search asynchronous to search field input, so UI remains smooth even if search is slow - Added status to search bar: - `OK` means Docfd is idling - `...` means Docfd is searching - `ERR` means Docfd failed to parse the search expression - Added search cancellation. Triggered by editing or clearing search field. - Added dynamic search distance adjustment based on notion of linked tokens - Two tokens are linked if there is no space between them, e.g. `-` and `>` are linked in `->`, but not in `- >` - Replaced `word` with `token` in the following options for consistency - `--max-word-search-dist` - `--index-chunk-word-count` - Replaced `word` with `token` in user-facing text ## 3.0.0 - Fixed crash from search result snippet being bigger the content view pane - Crash was from `Content_and_search_result_render.color_word_image_grid` - Added key bindings - `p`: exit and print search result to stderr - `Shift+P`: exit and print file path to stderr - Changed `--debug-log -` to use stderr instead of stdout - Added non-interactive search mode where search results are printed to stdout - `--search EXP` invokes non-interactive search mode with search expression `EXP` - `--search-result-count-per-document` sets the number of top search results printed per document - `--search-result-print-text-width` sets the text width to use when printing - Added `--start-with-search` to prefill the search field in interactive mode - Removed content requirement expression from multi-file view - Originally designed for file filtering, but I have almost never used it since its addition in 1.0.0 - Added word based line wrapping to following components of document list in multi-file view - Document title - Document path - Document content preview - Added word breaking in word based line wrapping logic so all of the original characters are displayed even when the terminal width is very small or when a word/token is very long - Added `--paths-from` to specify a file containing list of paths to (also) be scanned - Fixed search result centering in presence of line wrapping - Renamed `--max-fuzzy-edit` to `--max-fuzzy-edit-dist` for consistency - Changed error messages to not be capitalized to follow Rust's and Go's guidelines on error messages - Added fallback rendering text so Docfd does not crash from trying to render invalid text. - Added pandoc integration - Changed the logic of determining when to use stdin as document source - Now if any paths are specified, stdin is ignored - This change mostly came from Dune's cram test mechanism not providing a tty to stdin, so previously Docfd would keep trying to source from stdin even when explicit paths are provided ## 2.2.0 - Restored behaviour of skipping file extension checks for top-level user specified files. This behaviour was likely removed during some previous overhaul. - This means, for instance, `docfd bin/docfd.ml` will now open the file just fine without `--add-exts ml` - Bumped default max word search distance from 20 to 50 - Added consideration for balanced opening closing symbols in search result ranking - Namely symbol pairs: `()`, `[]`, `{}` - Fixed crash from reading from stdin - This was caused by calling `Notty_unix.Term.release` after closing the underlying file descriptor in stdin input mode - Added back handling of optional operator `?` in search expression - Added test corpus to check translation of search expression to search phrases ## 2.1.0 - Added text editor integration for `jed`/`xjed` - See [PR #3](https://github.com/darrenldl/docfd/pull/3) by [kseistrup](https://github.com/kseistrup) ## 2.0.0 - Added "Last scan" field display to multi-file view and single file view - Reduced screen flashing by only recreating `Notty_unix.Term.t` when needed - Added code to recursively mkdir cache directory if needed - Search procedure parameter tuning - UI tuning - Added search expression support - Adjusted quit key bindings to be: `Esc`, `Ctrl+C`, and `Ctrl+Q` - Added file selection support via `fzf` ## 1.9.0 - Added PDF viewer integration for: - okular - evince - xreader - atril - mupdf - Fixed change in terminal behavior after invoking text editor by recreating `Notty_unix.Term.t` - Fixed file auto-reloading to apply to all file types instead of just text files ## 1.8.0 - Swapped to using Nottui at [a337a77](https://github.com/let-def/lwd/commit/a337a778001e6c1dbaed7e758c9e05f300abd388) which fixes event handling, and pasting into edit field works correctly as a result - Caching is now disabled if number of documents exceeds cache size - Moved index cache to `XDG_CACHE_HOME/docfd`, which overall defaults to `$HOME/.cache/docfd` - Added cache related arguments - `--cache-dir` - `--cache-size` - `--no-cache` - Fixed search result centering in content view pane - Changed `--debug` to `--debug-log` to support outputting debug log to a file - Fixed file opening failure due to exhausting file descriptors - This was caused by not bounding the number of concurrent fibers when loading files via `Document.of_path` in `Eio.Fiber.List.filter_map` - Added `--index-only` flag - Fixed document rescanning in multi-file view ## 1.7.3 - Fixed crash from using mouse scrolling in multi-file view - The mouse handler did not reset the search result selected when selecting a different document - This leads to out of bound access if the newly selected document does not have enough search results ## 1.7.2 - Fixed content pane sometimes not showing all the lines depending on terminal size and width of lines - Made chunk size dynamic for parallel search ## 1.7.1 - Parallelization fine-tuning ## 1.7.0 - Added back parallel search - General optimizations - Added index file rotation ## 1.6.3 - Further underestimate space available for the purpose of line wrapping ## 1.6.2 - Fixed line wrapping ## 1.6.1 - Fixed line wrapping ## 1.6.0 - Docfd now saves stdin into a tmp file before processing to allow opening in text editor - Added `--add-exts` argument for additional file extensions - Added real-time response to terminal size changes ## 1.5.3 - Updated key binding info pane of multi-file view ## 1.5.2 - Added line number into search result ranking consideration ## 1.5.1 - Tuned search procedure and search result ranking - Made substring bidirectional matching differently weighted based on direction - Made reverse substring match require at least 3 characters - Case-sensitive bonus only applies if search phrase is not all ascii lowercase ## 1.5.0 - Made substring matching bidirectional - Tuned search result ranking ## 1.4.0 - Moved reading of environment variables `VISUAL` and `EDITOR` to program start - Performance tuning - Increased cache size for search phrase automata ## 1.3.4 - Added dispatching of search to task pool at file granularity ## 1.3.3 - Performance tuning - Switched back to using the old default max word search distance of 20 - Reduced default max fuzzy edit distance from 3 to 2 to prevent massive slowdown on long words ## 1.3.2 - Performance tuning - Added caching to search phrase automata construction - Removed dispatching of search to task pool - Adjusted search result limits ## 1.3.1 - Added more commandline argument error checking - Adjusted help messages - Adjusted max word search range calculation - Renamed `max-word-search-range` to `max-word-search-dist` ## 1.3.0 - Index data structure optimizations - Search procedure optimizations ## 1.2.2 - Fixed editor recognition for kakoune ## 1.2.1 - Fixed search results when multiple words are involved ## 1.2.0 - Removed UI components for search cancellation - Added real time refresh of search - Added code to open selected text file to selected search result for: - nano - neovim/vim/vi - helix - kakoune - emacs - micro - Added "rescan for documents" to multi-file view ## 1.1.1 - Fixed releasing Notty terminal too early ## 1.1.0 - Added index saving and loading - Added search cancellation ## 1.0.2 - Fixed file tree scan ## 1.0.1 - Minor UI tweaks ## 1.0.0 - Added expression language for file filtering in multi-file view - Adjusted default file tree depth - Added `--exts` argument for configuring file extensions recognized - Fixed parameters passing from binary to library ## 0.9.0 - Added PDF search support via `pdftotext` - Added UTF-8 support ## 0.8.6 - Minor wording fix ## 0.8.5 - Added check to skip re-searching if search phrase is equivalent to the previous one ## 0.8.4 - Index data structure optimization - Code cleanup ## 0.8.3 - Optimized multi-file view reload so it does not redo the search over all documents - Implemented a proper document store ## 0.8.2 - Fixed single file view document reloading not refreshing search results ## 0.8.1 - Replaced shared data structures with multicore safe versions - Fixed work partitioning for parallel indexing ## 0.8.0 - Added multicore support for indexing and searching ## 0.7.4 - Fixed crashing and incorrect rendering in some cases of files with blank lines - This is due to `Index.line_count` being incorrectly calculated - Added auto refresh on change of file - Change detection is based on file modification time - Added reload file via `r` key ## 0.7.3 - Bumped the default word search range from 15 to 40 - Since spaces are also counted as words in the index, 15 doesn't actually give a lot of range - Added minor optimization to search ## 0.7.2 - Code refactoring ## 0.7.1 - Delayed `Nottui_unix` term creation so pre TUI printing like `--version` would work - Added back mouse scrolling support - Added Page Up and Page Down keys support ## 0.7.0 - Fixed indexing bug - Added UI mode switch - Adjusted status bar to show current file name in single file mode - Adjusted content view to track search result - Added content view to single file mode ## 0.6.3 - Adjusted status bar to not display index of document selected when in single document mode - Edited debug message a bit ## 0.6.2 - Fixed typo in error message ## 0.6.1 - Added check of whether provided files exist ## 0.6.0 - Upgraded status bar and help text/key binding info ## 0.5.9 - Changed help text to status bar + help text ## 0.5.8 - Fixed debug print of file paths - Tuned UI text slightly ## 0.5.7 - Changed word db to do global word recording to further reduce memory footprint ## 0.5.6 - Optimized overall memory footprint - Content index memory usage - Switched to using content index to render content lines instead of storing file lines again after indexing ## 0.5.5 - Fixed weighing of fuzzy matches - Fixed bug in scoring of substring matches ## 0.5.4 - Fixed handling of search phrase with uppercase characters - Prioritized search results that match the case ## 0.5.3 - Cleaned up code ## 0.5.2 - Cleaned up code ## 0.5.1 - Cleaned up code and debug info print a bit ## 0.5.0 - Removed tags handling - Added stdin piping support ## 0.4.1 - Tuning content search result scoring ## 0.4.0 - Improved content search result scoring - Added limit on content search results to consider to avoid slowdown - General optimizations ## 0.3.3 - Fixed crash due to not resetting content search result selection when changing document selection ## 0.3.2 - Fixed internal line numbering, but displayed line numbering still begins at 1 ## 0.3.1 - Adjusted line number to begin at 1 ## 0.3.0 - Adjusted colouring ## 0.2.9 - Fixed word position tracking in content indexing ## 0.2.8 - Fixed content indexing ## 0.2.7 - Changed to vim style highlighting for content search results - Color adjustments in general ## 0.2.6 - Added single file UI mode - Added support for specifying multiple files in command line ## 0.2.5 - Added limit to word search range of each step in content search - This speeds up usual search while giving good enough results, and prevents search from becoming very slow in large documents ## 0.2.4 - Adjusted displayed document list size - Updated style of document list view ## 0.2.3 - Added sanitization to file view text - Docfd now accepts file being passed as argument ## 0.2.2 - Fixed tokenization of user provided content search input - Fixed content indexing to not include spaces ## 0.2.1 - Optimized file discovery procedure - Added `--max-depth` option to limit scanning depth - Added content search results view - Adjusted tokenization procedure ## 0.2.0 - Switched to interactive TUI - Renamed to Docfd ## 0.1.6 - Optimized parsing code slightly ## 0.1.5 - Adjusted parsing code slightly ## 0.1.4 - Adjusted `--tags` and `--ltags` output slightly ## 0.1.3 - Upgraded `--tags` and `--ltags` output to be more human readable when output is terminal - Changed behavior to output each tag in individual line when output is not terminal ## 0.1.2 - Fixed output text when output is not terminal ## 0.1.1 - Fixed checking of whether output is terminal ## 0.1.0 - Flipped output positions of file path and tags ## 0.0.9 - Notefd now adds color to title and matching tags if output is terminal - Improved fuzzy search index building ## 0.0.8 - Code cleanup ## 0.0.7 - Made file recognition more lenient - Added support for alternative tag section syntax - `| ... |` - `@ ... @` ## 0.0.6 - Fixed Notefd to only handle consecutive tag sections ## 0.0.5 - Added `--tags` and `--ltags` flags - Adjusted parsing to allow multiple tag sections ## 0.0.4 - Fixed tag extraction ## 0.0.3 - Made header extraction more robust to files with very long lines ## 0.0.2 - Added `-s` for case-insensitive substring tag match - Renamed `-p` to `-e` for exact tag match ## 0.0.1 - Base version ================================================ FILE: LICENSE ================================================ MIT License Copyright (c) 2022 Di Long Li Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: Makefile ================================================ SRCFILES = lib/*.ml lib/*.mli bin/*.ml bin/*.mli profiling/*.ml tests/*.ml OCPINDENT = ocp-indent \ --inplace \ $(SRCFILES) .PHONY: all all : python3 update-version-string.py dune build @all .PHONY: podman-build podman-build: podman build --format docker -t localhost/docfd -f containers/Containerfile.docfd . .PHONY: podman-build-demo-vhs podman-build-demo-vhs: podman build --format docker -t localhost/docfd-demo-vhs -f containers/Containerfile.demo-vhs . .PHONY: lock lock: opam lock . .PHONY: release-build release-build : python3 update-version-string.py dune build --release bin/docfd.exe mkdir -p release cp -f _build/default/bin/docfd.exe release/docfd chmod 755 release/docfd .PHONY: release-static-build release-static-build : python3 update-version-string.py OCAMLPARAM='_,ccopt=-static' dune build --release bin/docfd.exe mkdir -p release cp -f _build/default/bin/docfd.exe release/docfd chmod 755 release/docfd .PHONY: release-static-build-arm release-static-build-arm : python3 update-version-string.py OCAMLPARAM='_,ccopt=-static,fPIC' dune build --release bin/docfd.exe mkdir -p release cp -f _build/default/bin/docfd.exe release/docfd chmod 755 release/docfd .PHONY: tests tests : # Cleaning and rebuilding here to make sure cram tests actually use a recent binary, # since Dune (as of 3.14.0) doesn't trigger rebuild of binary when # invoking cram tests, even if the source code has changed. make clean make OCAMLRUNPARAM=b dune exec tests/main.exe --no-buffer --force dune build @file-collection-tests dune build @line-wrapping-tests dune build @misc-behavior-tests dune build @printing-tests dune build @match-type-tests dune build @open-with-tests dune build @non-interactive-mode-return-code-tests dune build @search-scope-narrowing-tests dune build @script-tests .PHONY: demo-vhs demo-vhs : for file in demo-vhs-tapes/*; do ./demo-vhs.sh $$file; done rm dummy.gif .PHONY: profile profile : OCAMLPARAM='_,ccopt=-static' dune build --release profiling/main.exe .PHONY: format format : $(OCPINDENT) .PHONY : clean clean: dune clean ================================================ FILE: README.md ================================================ # Docfd [Online Demo](https://demo.docfd.sh) TUI multiline fuzzy document finder Think interactive grep for text files, PDFs, DOCXs, etc, but word/token based instead of regex and line based, so you can search across lines easily. Docfd aims to provide good UX via integration with common text editors and PDF viewers, so you can jump directly to a search result with a single key press. --- Interactive use ![](demo-vhs-gifs/repo.gif) Non-interactive use ![](demo-vhs-gifs/repo-non-interactive.gif) ## Features - Multithreaded indexing and searching - Multiline fuzzy search of multiple files - Content view pane that shows the snippet surrounding the search result selected - Text editor and PDF viewer integration - Editable command history - rewrite/plan your actions in text editor - Search scope narrowing - limit scope of next search based on current search results - Clipboard integration ## Why Docfd might be for you
You want a standalone, offline TUI search tool that allows you to immediately start searching without any complicated setup. Docfd only starts processing the current directory or specified directories/files upon start. Hashing is used to pick out files that have not been indexed yet. There is no need to wait for a background indexer to refresh before you get up-to-date results.
You don't want to move everything into a central storage, and want to just keep your current folder structure There are no strings attached with using Docfd. Docfd does not require you to import your files into any special storage system, so you can continue mix and match tools to best handle your files.
You want to script or record your search Docfd comes with a simple scripting language, which is already used to capture your actions in the TUI. Finally found what you need after many steps? Save the session as a script with `Ctrl`+`S`! Then open it next time with `Ctrl`+`O`.
## Why Docfd might not be for you
Docfd is not all-encompassing Docfd does not try to be a full blown document management system such as Paperless-ngx. While there may be significant overlaps in terms of the search functionality, Docfd will fall short for almost any other kind of features, such as storage management, tagging, web interface, OCR, email ingestion.
Docfd is not a "proper" search engine Docfd is a search engine in the sense that it uses the same fundamental principles, i.e. inverted indices, but it lacks features that you would expect from a "proper" search engine solution, e.g. [Apache Lucene](https://lucene.apache.org/), [Tantivy](https://github.com/quickwit-oss/tantivy), [Lnx](https://github.com/lnx-search/lnx). Here are some of the fundamental features which I think are crucial to a proper search engine, but Docfd lacks: - You cannot customize what are indexed by Docfd - You cannot add a new type of ranking - Docfd lacks support for languages other than English - Docfd does not scale very well to very large quantity of documents - Search should still be serviceable when you reach beyond, say, 10k documents, but it will be noticeably more sluggish Some of these shortcomings are fundamental to the goals of Docfd. For instance, Docfd is primarily a standalone desktop TUI tool with quick startup and should not impact other desktop applications. As such, some performance related engineering choices typical for a proper search engine are difficult to accommodate as they require longer startup and significantly more memory usage. Other shortcomings are due to limited time and limited return on efforts - if one is to push Docfd so much to reach the feature parity and performance of a proper search engine, then one might as well just use an existing search engine to begin with.
If your notes are consistently very short, and you only want to do simple searches, then there are better options If you follow note taking methodologies such as Zettelkasten, where each note consists of very few lines, then using a combination of grep and file preview tool can yield a much faster search experience by skipping out on indexing and consideration of word proximity.
Docfd does not "stream" its search results One user feedback received was that searching felt slow when Docfd is still conducting the search as UI is not updated result by result. By comparison, fzf felt faster as results start to immediately pop into the screen. It is fundamentally more difficult to implement this streaming behavior nicely in Docfd, as Docfd operates with snapshots in mind (e.g. allowing you to undo/redo commands), while fzf does not. More specifically, it is much easier to wait for all search results to be ready, and finalize as a snapshot before presenting onto Docfd UI. So while possible to implement in Docfd, it is unclear if the effort is worthwhile with the additional system complexity in mind.
## Installation Statically linked binaries for Linux and macOS are available via [GitHub releases](https://github.com/darrenldl/docfd/releases). Docfd is also packaged on the following platforms for Linux: - [opam](https://ocaml.org/p/docfd/latest) - [AUR](https://aur.archlinux.org/packages/docfd-bin) (as `docfd-bin`) - First packaged by [@kseistrup](https://github.com/kseistrup), now maintained by Dominiquini - Nix (as `docfd`) - Packaged by [@chewblacka](https://github.com/chewblacka) The only way to use Docfd on Windows right now is via WSL. **Notes for packagers**: Outside of the OCaml toolchain for building (if you are packaging from source), Docfd also requires the following external tools at run time for full functionality: - `pdftotext` from `poppler-utils` for PDF support - `pandoc` for support of `.epub`, `.odt`, `.docx`, `.fb2`, `.ipynb`, `.html`, and `.htm` files - `wl-clibpard` for clipboard support on Wayland - `xclip` for clipboard support on X11 ## Basic usage The typical usage of Docfd is to either `cd` into the directory of interest and launch `docfd` directly, or specify the paths as arguments: ``` docfd [PATH]... ``` The list of paths can contain directories. Each directory in the list is scanned recursively for files with the following extensions by default: - For multiline search mode: - `.txt`, `.md`, `.pdf`, `.epub`, `.odt`, `.docx`, `.fb2`, `.ipynb`, `.html`, `.htm` - For single line search mode: - `.log`, `.csv`, `.tsv` You can change the file extensions to use via `--exts` and `--single-line-exts`, or add onto the list of extensions via `--add-exts` and `--single-line-add-exts`. If the list `PATH`s is empty, then Docfd defaults to scanning the current directory `.` unless any of the following is used: `--paths-from`, `--glob`, `--single-line-glob`. ## Documentation See [GitHub Wiki](https://github.com/darrenldl/docfd/wiki) for more examples/cookbook, and technical details. ## Changelog [CHANGELOG](CHANGELOG.md) ## Limitations - Docfd generally expects one intance per index DB - You should pick a different cache directory (which houses the index DB) via `--cache-dir` if you need multiple instances - There are safe guards to avoid corruptions even if you do run multiple instances of Docfd, but note that the instances of Docfd may exit unexpectedly - That being said, running multiple instances of Docfd which are only reading the index DB and not updating it should be fine - File auto-reloading is not supported for PDF files, as PDF viewers are invoked in the background via shell. It is possible to support this properly in the ways listed below, but requires a lot of engineering for potentially very little gain: - Docfd waits for PDF viewer to terminate fully before resuming, but this prohibits viewing multiple search results simultaneously in different PDF viewer instances. - Docfd manages the launched PDF viewers completely, but these viewers are closed when Docfd terminates. - Docfd invokes the PDF viewers via shell so they stay open when Docfd terminates. Docfd instead periodically checks if they are still running via the PDF viewers' process IDs, but this requires handling forks. - Outside of tracking whether the PDF viewer instances interacting with the files are still running, Docfd also needs to set up file update handling either via `inotify` or via checking file modification times periodically. ## Acknowledgement - Big thanks to [@lunacookies](https://github.com/lunacookies) and [@jthvai](https://github.com/jthvai) for the many UI/UX discussions and suggestions - Demo gifs and some screenshots are made using [vhs](https://github.com/charmbracelet/vhs). - [ripgrep-all](https://github.com/phiresky/ripgrep-all) was used as reference for text extraction software choices - [Marc Coquand](https://mccd.space) (author of [Stitch](https://git.mccd.space/pub/stitch/)) for discussions and inspiration of results narrowing functionality - Part of the search syntax was copied from [fzf](https://github.com/junegunn/fzf) - Command history editing workflow was inspired by Git interactive rebase workflow, e.g. `git rebase -i` - [PDF corpora](https://github.com/pdf-association/pdf-corpora) from PDF association was used to stress test performance ================================================ FILE: bin/BLAKE2B.ml ================================================ module B = Digestif.Make_BLAKE2B (struct let digest_size = 64 end) let hash_of_file ~env ~path = let fs = Eio.Stdenv.fs env in let ctx = ref B.empty in try Eio.Path.(with_open_in (fs / path)) (fun flow -> match Eio.Buf_read.parse ~max_size:Params.hash_chunk_size (fun buf -> try while true do ctx := B.feed_string !ctx (Eio.Buf_read.take Params.hash_chunk_size buf) done with | End_of_file -> ctx := B.feed_string !ctx (Eio.Buf_read.take_all buf) ) flow with | Ok () -> Ok (!ctx |> B.get |> B.to_hex) | Error (`Msg msg) -> Error msg ) with | _ -> Error (Printf.sprintf "failed to hash file: %s" (Filename.quote path)) ================================================ FILE: bin/UI.ml ================================================ open Docfd_lib open Lwd_infix module Vars = struct let script_name_field = Lwd.var UI_base.empty_text_field let script_name_field_focus_handle = Nottui.Focus.make () let path_fuzzy_rank_field = Lwd.var UI_base.empty_text_field let path_fuzzy_rank_field_focus_handle = Nottui.Focus.make () let script_files : string Dynarray.t Lwd.var = Lwd.var (Dynarray.create ()) let usable_script_files : (string Dynarray.t * Int_set.t Dynarray.t option) Lwd.t = let$* arr = Lwd.get script_files in let$* input_mode = Lwd.get UI_base.Vars.input_mode in let$ script_name_specified, _ = Lwd.get script_name_field in match input_mode with | Save_script -> ( (Dynarray.filter (CCString.starts_with ~prefix:script_name_specified) arr, None) ) | Scripts | Delete_script_confirm (_, _) -> ( match Search_exp.parse script_name_specified with | None -> ( (Dynarray.filter (CCString.starts_with ~prefix:script_name_specified) arr, None) ) | Some exp -> ( if Search_exp.is_empty exp then ( (arr, None) ) else ( let ranking = Misc_utils.fuzzy_rank_assoc (Stop_signal.make ()) ~get_key:Filename.chop_extension exp (Dynarray.to_seq arr) in Dynarray.to_seq ranking |> Seq.map (fun (path, search_result) -> (path, Misc_utils.highlights_of_search_result search_result) ) |> Seq.split |> (fun (s0, s1) -> (Dynarray.of_seq s0, Some (Dynarray.of_seq s1))) ) ) ) | _ -> ( (Dynarray.create (), None) ) end let refresh_script_files () = Lwd.set UI_base.Vars.index_of_script_selected 0; File_utils.list_files_recursive_filter_by_exts ~max_depth:1 ~report_progress:(fun () -> ()) ~exts:[ Params.docfd_script_ext ] (Seq.return (Params.script_dir ())) |> String_set.to_seq |> Seq.map Filename.basename |> Dynarray.of_seq |> Lwd.set Vars.script_files let reload_document (doc : Document.t) = let pool = UI_base.task_pool () in let path = Document.path doc in let doc = match Document.of_path ~env:(UI_base.eio_env ()) pool ~already_in_transaction:false (Document.search_mode doc) path with | Ok doc -> Some doc | Error _ -> ( None ) in let session_state = Session_manager.lock_with_view (fun view -> view.init_state ) |> (fun state -> match doc with | Some doc -> ( Session.State.add_document pool doc state ) | None -> ( Session.State.drop (`Path path) state ) ) in Session_manager.update_starting_state session_state let reload_document_selected ~(search_result_groups : Session.search_result_group array) : unit = if Array.length search_result_groups > 0 then ( let index = Lwd.peek UI_base.Vars.index_of_document_selected in let doc, _search_results = search_result_groups.(index) in reload_document doc; ) let toggle_mark ~path = Session_manager.update_from_cur_snapshot (fun cur_snapshot -> let state = Session.Snapshot.state cur_snapshot in let new_command = if String_set.mem path (Session.State.marked_document_paths state) then ( `Unmark path ) else ( `Mark path ) in state |> Session.run_command (UI_base.task_pool ()) new_command |> Option.get |> (fun (new_command, state) -> Session.Snapshot.make ~last_command:(Some new_command) state) ) let drop ~document_count (choice : [`Path of string | `All_except of string | `Marked | `Unmarked | `Listed | `Unlisted]) = let new_command = match choice with | `Path path -> ( let n = Lwd.peek UI_base.Vars.index_of_document_selected in UI_base.set_document_selected ~choice_count:(document_count - 1) n; `Drop path ) | `All_except path -> ( UI_base.set_document_selected ~choice_count:1 0; `Drop_all_except path ) | `Marked -> ( UI_base.reset_document_selected (); `Drop_marked ) | `Unmarked -> ( UI_base.reset_document_selected (); `Drop_unmarked ) | `Listed -> ( UI_base.reset_document_selected (); `Drop_listed ) | `Unlisted -> ( UI_base.reset_document_selected (); `Drop_unlisted ) in Session_manager.update_from_cur_snapshot (fun cur_snapshot -> Session.Snapshot.state cur_snapshot |> Session.run_command (UI_base.task_pool ()) new_command |> Option.get |> (fun (new_command, state) -> Session.Snapshot.make ~last_command:(Some new_command) state) ) let mark (choice : [`Path of string | `Listed]) = let new_command = match choice with | `Path path -> `Mark path | `Listed -> `Mark_listed in Session_manager.update_from_cur_snapshot (fun cur_snapshot -> Session.Snapshot.state cur_snapshot |> Session.run_command (UI_base.task_pool ()) new_command |> Option.get |> (fun (new_command, state) -> Session.Snapshot.make ~last_command:(Some new_command) state) ) let unmark (choice : [`Path of string | `Listed | `All]) = let new_command = match choice with | `Path path -> `Unmark path | `Listed -> `Unmark_listed | `All -> `Unmark_all in Session_manager.update_from_cur_snapshot (fun cur_snapshot -> Session.Snapshot.state cur_snapshot |> Session.run_command (UI_base.task_pool ()) new_command |> Option.get |> (fun (new_command, state) -> Session.Snapshot.make ~last_command:(Some new_command) state) ) let sort (sort_by : Command.Sort_by.t) = UI_base.reset_document_selected (); let new_command = `Sort (sort_by, Command.Sort_by.default_no_score) in Session_manager.update_from_cur_snapshot (fun cur_snapshot -> Session.Snapshot.state cur_snapshot |> Session.run_command (UI_base.task_pool ()) new_command |> Option.get |> (fun (new_command, state) -> Session.Snapshot.make ~last_command:(Some new_command) state) ) let narrow_search_scope_to_level ~level = Session_manager.update_from_cur_snapshot (fun cur_snapshot -> Session.Snapshot.make ~last_command:(Some (`Narrow_level level)) (Session.State.narrow_search_scope_to_level ~level (Session.Snapshot.state cur_snapshot)) ) let update_filter ~commit () = let s = fst @@ Lwd.peek UI_base.Vars.filter_field in Session_manager.submit_filter_req ~commit s let update_search ~commit () = let s = fst @@ Lwd.peek UI_base.Vars.search_field in Session_manager.submit_search_req ~commit s let update_path_fuzzy_rank ~commit () = let s = fst @@ Lwd.peek Vars.path_fuzzy_rank_field in Session_manager.submit_path_fuzzy_rank_req ~commit s let compute_save_script_path base_name = let dir = Params.script_dir () in File_utils.mkdir_recursive dir; Filename.concat dir (Fmt.str "%s%s" base_name Params.docfd_script_ext) let save_script ~path = Session_manager.stop_filter_and_search_and_restore_input_fields (); let lines = Session_manager.lock_with_view (fun view -> view.snapshots |> Dynarray.to_seq |> Seq.filter_map (fun (snapshot : Session.Snapshot.t) -> Option.map Command.to_string (Session.Snapshot.last_command snapshot) ) |> List.of_seq ) in try CCIO.with_out path (fun oc -> CCIO.write_lines_l oc lines; ) with | Sys_error _ -> ( Misc_utils.exit_with_error_msg (Fmt.str "failed to write script %s" path) ) module Top_pane = struct module Document_list = struct let render_document_entry ~input_mode ~width ~documents_marked ~(search_result_group : Session.search_result_group) ~path_highlights ~selected : Notty.image = let open Notty in let open Notty.Infix in let (doc, search_results) = search_result_group in let search_result_score_image = if Option.is_some !Params.debug_output then ( if Array.length search_results = 0 then I.empty else ( let x = search_results.(0) in I.strf "(Best search result score: %f)" (Search_result.score x) ) ) else ( I.empty ) in let sub_item_base_left_padding = I.string A.empty " " in let sub_item_width = width - I.width sub_item_base_left_padding - 2 in let preview_image = let preview_left_padding_per_line = I.string A.(bg lightgreen) " " <|> I.string A.empty " " in let preview_line_images = let line_count = min Params.preview_line_count (Index.global_line_count ~doc_id:(Document.doc_id doc)) in OSeq.(0 --^ line_count) |> Seq.map (fun global_line_num -> Index.words_of_global_line_num ~doc_id:(Document.doc_id doc) global_line_num |> Dynarray.to_list |> Content_and_search_result_rendering.Text_block_rendering.of_words ~width:sub_item_width ) |> Seq.map (fun img -> let left_padding = OSeq.(0 --^ I.height img) |> Seq.map (fun _ -> preview_left_padding_per_line) |> List.of_seq |> I.vcat in left_padding <|> img ) |> List.of_seq in I.vcat preview_line_images in let path_highlights = match input_mode with | UI_base.Path_fuzzy_rank -> ( String_map.find_opt (Document.path doc) path_highlights ) | _ -> ( None ) in let path_image = Document.path doc |> File_utils.remove_cwd_from_path |> Tokenization.tokenize ~drop_spaces:false |> List.of_seq |> Content_and_search_result_rendering.Text_block_rendering.of_words ~width:sub_item_width ?highlights:path_highlights in let path_image_with_prefix = (I.string A.(fg lightgreen) "@ ") <|> path_image in let path_date_image = (match Document.path_date doc with | None -> I.void 0 0 | Some date -> ( I.string A.(fg lightgreen) " ⤷ " <|> I.string A.empty (Timedesc.Date.to_rfc3339 date) ) ) in let last_modified_image = I.string A.(fg lightgreen) "Last modified: " <|> I.string A.empty (Timedesc.to_string ~format:Params.last_modified_format_string (Document.mod_time doc)) in let marked = String_set.mem (Document.path doc) documents_marked in let title = let attr = if selected then ( A.(fg lightblue ++ st bold) ) else ( A.(fg lightblue) ) in match Document.title doc with | None -> I.void 0 1 | Some title -> ( title |> Tokenization.tokenize ~drop_spaces:false |> List.of_seq |> Content_and_search_result_rendering.Text_block_rendering.of_words ~attr ~width ) in match input_mode with | UI_base.Path_fuzzy_rank -> ( path_image ) | _ -> ( ( (if marked then I.strf "> " else I.void 0 1) <|> title ) <-> ( sub_item_base_left_padding <|> I.vcat [ search_result_score_image; path_image_with_prefix; path_date_image; preview_image; last_modified_image; ] ) ) let main ~width ~height ~documents_marked ~(search_result_groups : Session.search_result_group array) ~(document_selected : int) : Nottui.ui Lwd.t = let document_count = Array.length search_result_groups in let$* input_mode = Lwd.get UI_base.Vars.input_mode in let$* (_cur_ver, snapshot) = Session_manager.cur_snapshot in let state = Session.Snapshot.state snapshot in let render_pane () = let rec aux index height_filled acc = if index < document_count && height_filled < height then ( let selected = Int.equal document_selected index in let img = render_document_entry ~input_mode ~width ~documents_marked ~search_result_group:search_result_groups.(index) ~path_highlights:(Session.State.path_highlights state) ~selected in aux (index + 1) (height_filled + Notty.I.height img) (img :: acc) ) else ( List.rev acc |> List.map Nottui.Ui.atom |> Nottui.Ui.vcat ) in if document_count = 0 then ( Nottui.Ui.empty ) else ( aux document_selected 0 [] ) in let$ background = UI_base.full_term_sized_background in Nottui.Ui.join_z background (render_pane ()) |> Nottui.Ui.mouse_area (UI_base.mouse_handler ~f:(fun direction -> let offset = match direction with | `Up -> -1 | `Down -> 1 in let document_current_choice = Lwd.peek UI_base.Vars.index_of_document_selected in UI_base.set_document_selected ~choice_count:document_count (document_current_choice + offset); ) ) end module Right_pane = struct module Search_result_list = struct let main ~height ~width ~(search_result_group : Session.search_result_group) ~(index_of_search_result_selected : int Lwd.var) : Nottui.ui Lwd.t = let (document, search_results) = search_result_group in let search_result_selected = Lwd.peek index_of_search_result_selected in let result_count = Array.length search_results in if result_count = 0 then ( Lwd.return Nottui.Ui.empty ) else ( let images = Misc_utils.array_sub_seq ~start:search_result_selected ~end_exc:(min result_count (search_result_selected + height)) search_results |> Seq.map (Content_and_search_result_rendering.search_result ~doc_id:(Document.doc_id document) ~render_mode:(UI_base.render_mode_of_document document) ~width ) |> List.of_seq in let pane = images |> List.map (fun img -> Nottui.Ui.atom Notty.I.(img <-> strf "") ) |> Nottui.Ui.vcat in let$ background = UI_base.full_term_sized_background in Nottui.Ui.join_z background pane |> Nottui.Ui.mouse_area (UI_base.mouse_handler ~f:(fun direction -> let n = Lwd.peek index_of_search_result_selected in let offset = match direction with | `Up -> -1 | `Down -> 1 in UI_base.set_search_result_selected ~choice_count:result_count (n + offset) ) ) ) end module Link_list = struct let main ~height ~width ~(search_result_group : Session.search_result_group) ~(index_of_link_selected : int Lwd.var) : Nottui.ui Lwd.t = let (document, _search_results) = search_result_group in let links = Document.links document in let link_selected = Lwd.peek index_of_link_selected in let link_count = Array.length links in if link_count = 0 then ( Lwd.return Nottui.Ui.empty ) else ( let start = max 0 (link_selected - height / 2) in let end_exc = min link_count (start + height) in let render ~width s = s |> Tokenization.tokenize ~drop_spaces:false |> List.of_seq |> Content_and_search_result_rendering.Text_block_rendering.of_words ~width in let pane = Misc_utils.array_sub_seq ~start ~end_exc links |> Seq.map (fun link -> link.Link.link) |> List.of_seq |> Content_and_search_result_rendering.centered_list ~height ~width ~render (link_selected - start) |> Nottui.Ui.atom in let$ background = UI_base.full_term_sized_background in Nottui.Ui.join_z background pane |> Nottui.Ui.mouse_area (UI_base.mouse_handler ~f:(fun direction -> let n = Lwd.peek index_of_link_selected in let offset = match direction with | `Up -> -1 | `Down -> 1 in UI_base.set_link_selected ~choice_count:link_count (n + offset) ) ) ) end let main ~width ~height ~(search_result_groups : Session.search_result_group array) ~(document_selected : int) ~show_bottom_right_pane : Nottui.ui Lwd.t = if Array.length search_result_groups = 0 then ( let blank ~height = let _ = height in Nottui_widgets.empty_lwd in UI_base.vpane ~width ~height blank blank ) else ( let$* input_mode = Lwd.get UI_base.Vars.input_mode in let$* search_result_selected = Lwd.get UI_base.Vars.index_of_search_result_selected in let$* link_selected = Lwd.get UI_base.Vars.index_of_link_selected in let search_result_group = search_result_groups.(document_selected) in if show_bottom_right_pane then ( UI_base.vpane ~width ~height (UI_base.Content_view.main ~input_mode ~width ~search_result_group ~search_result_selected ~link_selected) (match input_mode with | Links -> ( Link_list.main ~width ~search_result_group ~index_of_link_selected:UI_base.Vars.index_of_link_selected ) | _ -> ( Search_result_list.main ~width ~search_result_group ~index_of_search_result_selected:UI_base.Vars.index_of_search_result_selected )) ) else ( UI_base.Content_view.main ~input_mode ~width ~height ~search_result_group ~search_result_selected ~link_selected ) ) end let item_list ~width ~height ~selected ?highlights items : Nottui.ui Lwd.t = let arr = Dynarray.create () in Seq.iter (fun i -> Dynarray.add_last arr (Dynarray.get items i) ) OSeq.(selected --^ Dynarray.length items); Dynarray.to_seq arr |> Seq.mapi (fun i s -> let highlights = Option.map (fun highlights -> Dynarray.get highlights (selected + i) ) highlights in s |> Tokenization.tokenize ~drop_spaces:false |> List.of_seq |> Content_and_search_result_rendering.Text_block_rendering.of_words ~width ?highlights |> Nottui.Ui.atom ) |> List.of_seq |> Nottui.Ui.vcat |> Nottui.Ui.resize ~w:width ~h:height |> Lwd.return let main ~width ~height ~documents_marked ~screen_split ~show_bottom_right_pane ~(search_result_groups : Session.search_result_group array) : Nottui.ui Lwd.t = let$* input_mode = Lwd.get UI_base.Vars.input_mode in let$* script_selected = Lwd.get UI_base.Vars.index_of_script_selected in let$* usable_scripts, usable_script_highlights = Vars.usable_script_files in let file = if script_selected < Dynarray.length usable_scripts then ( Some (Dynarray.get usable_scripts script_selected) ) else ( None ) in let$* selected = Lwd.get UI_base.Vars.index_of_script_selected in match input_mode with | Save_script -> ( item_list ~width ~height ~selected usable_scripts ) | Scripts | Delete_script_confirm (_, _) -> ( let lines = try match file with | None -> [] | Some file -> ( let dir = Params.script_dir () in CCIO.with_in (Filename.concat dir file) (fun ic -> CCIO.read_lines_seq ic |> OSeq.take height |> List.of_seq ) ) with | Sys_error _ -> [] in UI_base.hpane ~l_ratio:0.5 ~width ~height (item_list ~height ~selected ?highlights:usable_script_highlights usable_scripts) (fun ~width -> List.map (fun s -> Nottui.Ui.atom (Notty.I.strf "%s" s)) lines |> Nottui.Ui.vcat |> Nottui.Ui.resize ~w:width ~h:height |> Lwd.return ) ) | _ -> ( let$* document_selected = Lwd.get UI_base.Vars.index_of_document_selected in let l_ratio = match screen_split with | `Even -> 0.50 | `Focus_left -> 1.0 | `Wide_left -> 0.618 | `Focus_right -> 0.0 | `Wide_right -> 1.0 -. 0.618 in UI_base.hpane ~l_ratio ~width ~height (Document_list.main ~height ~documents_marked ~search_result_groups ~document_selected) (Right_pane.main ~height ~search_result_groups ~document_selected ~show_bottom_right_pane) ) end module Bottom_pane = struct let status_bar ~width ~(search_result_groups : Session.search_result_group array) ~(input_mode : UI_base.input_mode) : Nottui.Ui.t Lwd.t = let open Notty.Infix in let input_mode_image = UI_base.Input_mode_map.find input_mode UI_base.Status_bar.input_mode_images in let attr = UI_base.Status_bar.attr in let$* usable_scripts, _usable_script_highlights = Vars.usable_script_files in let$* script_selected = Lwd.get UI_base.Vars.index_of_script_selected in let usable_script_count = Dynarray.length usable_scripts in let selected_script_file_and_path = if usable_script_count > 0 then ( let dir = Params.script_dir () in let file = Dynarray.get usable_scripts script_selected in Some (file, Filename.concat dir file) ) else ( None ) in match input_mode with | Save_script | Scripts -> ( let text_field = Vars.script_name_field in let prompt = match input_mode with | Save_script -> "Save as" | Scripts -> "Filter" | _ -> failwith "unexpected case" in let on_tab = match input_mode with | Save_script -> ( Some (fun (text, _) -> let best_fit = let usable_script_count = Dynarray.length usable_scripts in if usable_script_count = 0 then ( text ) else if usable_script_count = 1 then ( Filename.chop_extension (Dynarray.get usable_scripts 0) ) else ( usable_scripts |> Dynarray.to_seq |> String_utils.longest_common_prefix ) in Lwd.set text_field (best_fit, String.length best_fit) ) ) | _ -> None in let on_submit = match input_mode with | Save_script -> ( (fun (text, _x) -> Lwd.set text_field UI_base.empty_text_field; Nottui.Focus.release Vars.script_name_field_focus_handle; Lwd.set UI_base.Vars.input_mode (if String.length text = 0 then Save_script_no_name else Save_script_overwrite text ); ) ) | Scripts -> ( (fun (_text, _x) -> Option.iter (fun (_file, path) -> Lwd.set text_field UI_base.empty_text_field; Nottui.Focus.release Vars.script_name_field_focus_handle; Lwd.set UI_base.Vars.quit true; UI_base.Vars.action := Some (Open_script path); Lwd.set UI_base.Vars.input_mode Navigate; ) selected_script_file_and_path; ) ) | _ -> failwith "unexpected case" in let on_up_down = match input_mode with | Save_script -> None | Scripts -> ( Some (fun up_down _ -> UI_base.set_script_selected ~choice_count:usable_script_count (script_selected + (match up_down with | `Up -> (-1) | `Down -> 1) )) ) | _ -> failwith "unexpected case" in let on_ctrl_prefixed = match input_mode with | Scripts -> ( Some (fun key (_text, _x) -> match key with | (`ASCII 'X', [`Ctrl]) -> ( Option.iter (fun (file, path) -> Lwd.set UI_base.Vars.input_mode (Delete_script_confirm (file, path)) ) selected_script_file_and_path; `Handled ) | _ -> ( `Unhandled ) ) ) | _ -> None in let$* content = Nottui_widgets.hbox [ Lwd.return (Nottui.Ui.atom (Notty.I.hcat [ input_mode_image; UI_base.Status_bar.element_spacer; Notty.I.strf ~attr "%s: [ " prompt; ])); UI_base.edit_field text_field ~focus:Vars.script_name_field_focus_handle ~on_cancel:(fun (_, _) -> Lwd.set UI_base.Vars.input_mode Navigate ) ~on_change:(fun (text, x) -> Lwd.set text_field (text, x); ) ~on_submit ?on_tab ?on_up_down ?on_ctrl_prefixed; Lwd.return (Nottui.Ui.atom (Notty.I.strf ~attr " ] + %s" Params.docfd_script_ext)); ] in let$ bar = UI_base.Status_bar.background_bar in Nottui.Ui.join_z bar content ) | Save_script_overwrite script_name -> ( let path = compute_save_script_path script_name in if Sys.file_exists path then ( let$* content = Lwd.return (Nottui.Ui.atom (Notty.I.hcat [ input_mode_image; UI_base.Status_bar.element_spacer; Notty.I.strf ~attr "%s already exists, overwrite?" (Filename.basename path); ])) in let$ bar = UI_base.Status_bar.background_bar in Nottui.Ui.join_z bar content ) else ( save_script ~path; Lwd.set UI_base.Vars.input_mode (Save_script_edit script_name); UI_base.Status_bar.background_bar ) ) | Save_script_no_name -> ( let$* content = Lwd.return (Nottui.Ui.atom (Notty.I.hcat [ input_mode_image; UI_base.Status_bar.element_spacer; Notty.I.strf ~attr "No name entered, saving skipped"; ])) in let$ bar = UI_base.Status_bar.background_bar in Nottui.Ui.join_z bar content ) | Save_script_edit script_name -> ( let path = compute_save_script_path script_name in let$* content = Lwd.return (Nottui.Ui.atom (Notty.I.hcat [ input_mode_image; UI_base.Status_bar.element_spacer; Notty.I.strf ~attr "Do you want to edit %s to add comments etc?" (Filename.basename path); ])) in let$ bar = UI_base.Status_bar.background_bar in Nottui.Ui.join_z bar content ) | Delete_script_confirm (script, _) -> ( let$* content = Lwd.return (Nottui.Ui.atom (Notty.I.hcat [ input_mode_image; UI_base.Status_bar.element_spacer; Notty.I.strf ~attr "Confirm deletion of %s?" script; ])) in let$ bar = UI_base.Status_bar.background_bar in Nottui.Ui.join_z bar content ) | Path_fuzzy_rank -> ( let text_field = Vars.path_fuzzy_rank_field in let document_count = Array.length search_result_groups in let$* document_current_choice = Lwd.get UI_base.Vars.index_of_document_selected in let$* content = Nottui_widgets.hbox [ Lwd.return (Nottui.Ui.atom (Notty.I.hcat [ input_mode_image; UI_base.Status_bar.element_spacer; ])); UI_base.edit_field text_field ~focus:Vars.path_fuzzy_rank_field_focus_handle ~on_cancel:(fun (_, _) -> Lwd.set UI_base.Vars.input_mode Navigate ) ~on_change:(fun (text, x) -> Lwd.set text_field (text, x); update_path_fuzzy_rank ~commit:false (); ) ~on_submit:(fun (text, x) -> Lwd.set text_field (text, x); update_path_fuzzy_rank ~commit:true (); Lwd.set UI_base.Vars.input_mode Navigate ) ~on_up_down:(fun ud _ -> let offset = match ud with | `Up -> -1 | `Down -> 1 in UI_base.set_document_selected ~choice_count:document_count (document_current_choice + offset); ) ; ] in let$ bar = UI_base.Status_bar.background_bar in Nottui.Ui.join_z bar content ) | _ -> ( let$* index_of_document_selected = Lwd.get UI_base.Vars.index_of_document_selected in let document_count = Array.length search_result_groups in let$* (_cur_ver, snapshot) = Session_manager.cur_snapshot in let content = let file_shown_count = Notty.I.strf ~attr "%5d/%d documents listed" document_count (Session.Snapshot.state snapshot |> Session.State.size) in let hint = Notty.I.strf ~attr "< and > to see more key binding info, ? to toggle hide" in let hint_len = Notty.I.width hint in let hint_overlay = Notty.I.void (width - hint_len) 1 <|> hint in let core = if document_count = 0 then ( [ UI_base.Status_bar.element_spacer; file_shown_count; ] ) else ( let index_of_selected = Notty.I.strf ~attr "Index of document selected: %d" index_of_document_selected in [ file_shown_count; UI_base.Status_bar.element_spacer; index_of_selected; ] ) in Notty.I.zcat [ Notty.I.hcat (input_mode_image :: UI_base.Status_bar.element_spacer :: core); hint_overlay; ] |> Nottui.Ui.atom in let$ bar = UI_base.Status_bar.background_bar in Nottui.Ui.join_z bar content ) module Key_binding_info = struct let grid_contents : UI_base.Key_binding_info.grid_contents = let open UI_base.Key_binding_info in let empty_row = [ { label = ""; msg = "" }; ] in let navigate_grid = [ [ { label = "Enter"; msg = "open document" }; { label = "/"; msg = "SEARCH" }; { label = "↑/↓/j/k"; msg = "select document" }; { label = "s"; msg = "SORT-ASC" }; { label = "Tab"; msg = "expand right pane" }; { label = "y"; msg = "COPY" }; { label = "n"; msg = "NARROW" }; { label = "Space"; msg = "toggle mark" }; { label = "h"; msg = "command history" }; { label = "Ctrl+S"; msg = "save session as script" }; ]; [ { label = "v"; msg = "focus content" }; { label = "f"; msg = "FILTER" }; { label = "Shift+↑/↓/j/k"; msg = "select search result" }; { label = "Shift+S"; msg = "SORT-DESC" }; { label = "Shift+Tab"; msg = "expand left pane" }; { label = "Shift+Y"; msg = "COPY-PATHS" }; { label = "d"; msg = "DROP" }; { label = "m"; msg = "MARK" }; { label = ""; msg = "" }; { label = "Ctrl+O"; msg = "SCRIPTS" }; ]; [ { label = "Ctrl+C"; msg = "exit" }; { label = "x"; msg = "CLEAR" }; { label = "-/="; msg = "scroll content view" }; { label = "l"; msg = "LINKS" }; { label = ""; msg = "" }; { label = ""; msg = "" }; { label = "r"; msg = "RELOAD" }; { label = "Shift+M"; msg = "UNMARK" }; { label = ""; msg = "" }; { label = ""; msg = "" }; ]; ] in let search_grid = [ [ { label = "Enter"; msg = "exit SEARCH" }; ]; ] in let filter_grid = [ [ { label = "Enter"; msg = "exit FILTER" }; { label = "Tab"; msg = "autocomplete" }; ]; ] in let save_script_grid = [ [ { label = "Enter"; msg = "confirm answer" }; { label = "Tab"; msg = "autocomplete" }; { label = "Esc"; msg = "cancel" }; ]; empty_row; empty_row; ] in let save_script_confirm_grid = [ [ { label = "y"; msg = "confirm overwrite" }; { label = "Esc/n"; msg = "cancel" }; ]; empty_row; empty_row; ] in let save_script_cancel_grid = [ [ { label = "Enter"; msg = "confirm" }; ]; empty_row; empty_row; ] in let save_script_edit_grid = [ [ { label = "y"; msg = "open in editor" }; { label = "Esc/n"; msg = "skip" }; ]; empty_row; empty_row; ] in let scripts_grid = [ [ { label = "Enter"; msg = "open" }; { label = "↑/↓"; msg = "select" }; { label = "Ctrl+X"; msg = "delete" }; { label = "Esc"; msg = "cancel" }; ]; empty_row; empty_row; ] in let delete_script_confirm_grid = [ [ { label = "y"; msg = "confirm deletion" }; { label = "Esc/n"; msg = "cancel" }; ]; empty_row; empty_row; ] in let clear_grid = [ [ { label = "/"; msg = "search field" }; { label = "f"; msg = "filter field" }; { label = "h"; msg = "command history" }; ]; [ { label = "Esc"; msg = "cancel" }; ]; empty_row; ] in let sort_asc_grid = [ [ { label = "s"; msg = "score" }; { label = "p"; msg = "path" }; { label = "d"; msg = "path date" }; { label = "m"; msg = "mod time" }; ]; [ { label = "Esc"; msg = "cancel" }; ]; empty_row; ] in let sort_desc_grid = [ [ { label = "s"; msg = "score" }; { label = "p"; msg = "path" }; { label = "d"; msg = "path date" }; { label = "m"; msg = "mod time" }; ]; [ { label = "Esc"; msg = "cancel" }; ]; empty_row; ] in let drop_grid = [ [ { label = "d"; msg = "selected" }; { label = "l"; msg = "listed" }; { label = "m"; msg = "marked" }; ]; [ { label = "Shift+D"; msg = "unselected" }; { label = "Shift+L"; msg = "unlisted" }; { label = "Shift+M"; msg = "unmarked" }; ]; [ { label = "Esc"; msg = "cancel" }; ]; ] in let mark_grid = [ [ { label = "l"; msg = "listed" }; ]; empty_row; [ { label = "Esc"; msg = "cancel" }; ]; ] in let unmark_grid = [ [ { label = "l"; msg = "listed" }; { label = "a"; msg = "all" }; ]; empty_row; [ { label = "Esc"; msg = "cancel" }; ]; ] in let copy_grid = [ [ { label = "y"; msg = "selected search result" }; { label = "m"; msg = "results of marked documents" }; { label = "l"; msg = "results of listed documents" }; ]; [ { label = "a"; msg = "results of selected document" }; ]; [ { label = "Esc"; msg = "cancel" }; ]; ] in let copy_paths_grid = [ [ { label = "y"; msg = "path of selected document" }; { label = "m"; msg = "paths of marked documents" }; { label = "l"; msg = "paths of listed documents" }; ]; [ { label = ""; msg = "" }; { label = "Shift+M"; msg = "paths of unmarked documents" }; { label = "Shift+L"; msg = "paths of unlisted documents" }; ]; [ { label = "Esc"; msg = "cancel" }; ]; ] in let narrow_grid = [ [ { label = "0-9"; msg = "narrow search scope to level N" }; ]; [ { label = "Esc"; msg = "cancel" }; ]; empty_row; ] in let reload_grid = [ [ { label = "r"; msg = "selected" }; { label = "a"; msg = "all" }; ]; [ { label = "Esc"; msg = "cancel" }; ]; empty_row; ] in let links_grid = [ [ { label = "Enter"; msg = "open" }; { label = "o"; msg = "open and remain in LINKS" }; { label = "↑/↓/j/k"; msg = "select" }; { label = "y"; msg = "copy" }; ]; [ { label = "Esc"; msg = "exit" }; ]; empty_row; ] in let path_fuzzy_rank_grid = [ [ { label = "Enter"; msg = "pin to top" }; { label = "↑/↓"; msg = "select" }; ]; [ { label = "Esc"; msg = "exit" }; ]; empty_row; ] in [ (Navigate, navigate_grid); (Search, search_grid); (Filter, filter_grid); (Clear, clear_grid); (Sort `Asc, sort_asc_grid); (Sort `Desc, sort_desc_grid); (Drop, drop_grid); (Mark, mark_grid); (Unmark, unmark_grid); (Narrow, narrow_grid); (Copy, copy_grid); (Copy_paths, copy_paths_grid); (Reload, reload_grid); (Save_script, save_script_grid); (Save_script_overwrite "", save_script_confirm_grid); (Save_script_no_name, save_script_cancel_grid); (Save_script_edit "", save_script_edit_grid); (Scripts, scripts_grid); (Delete_script_confirm ("", ""), delete_script_confirm_grid); (Links, links_grid); (Path_fuzzy_rank, path_fuzzy_rank_grid); ] let grid_lookup = UI_base.Key_binding_info.make_grid_lookup grid_contents let main ~input_mode ~show_key_binding_info_pane = if show_key_binding_info_pane then ( UI_base.Key_binding_info.main ~grid_lookup ~input_mode ) else ( Lwd.return (Nottui.Ui.atom (Notty.I.void 0 0)) ) end let autocomplete_grid ~input_mode ~width = match input_mode with | UI_base.Filter -> ( let$* l = Lwd.get UI_base.Vars.autocomplete_choices in let max_len = List.fold_left (fun n x -> max n (String.length x) ) 0 l in let cell_len = max_len + 4 in let cells_per_row = width / cell_len in l |> CCList.chunks cells_per_row |> (fun rows -> let row_count = List.length rows in let padding = if row_count < 2 then ( CCList.(0 --^ (2 - row_count)) |> List.map (fun _ -> [ "" ]) ) else ( [] ) in rows @ padding ) |> List.map (fun row -> List.map (fun s -> let full_background = Notty.I.void cell_len 1 in Notty.I.(strf "%s" s full_background) |> Nottui.Ui.atom |> Lwd.return ) row ) |> Nottui_widgets.grid ~pad:(Nottui.Gravity.make ~h:`Negative ~v:`Negative) ) | _ -> Lwd.return (Nottui.Ui.atom (Notty.I.void 0 0)) let filter_bar = UI_base.Filter_bar.main ~text_field:UI_base.Vars.filter_field ~focus_handle:UI_base.Vars.filter_field_focus_handle ~on_change:(update_filter ~commit:false) ~on_submit:(update_filter ~commit:true) let search_bar ~input_mode = UI_base.Search_bar.main ~input_mode ~text_field:UI_base.Vars.search_field ~focus_handle:UI_base.Vars.search_field_focus_handle ~on_change:(update_search ~commit:false) ~on_submit:(update_search ~commit:true) let main ~width ~show_key_binding_info_pane ~search_result_groups = let$* input_mode = Lwd.get UI_base.Vars.input_mode in Nottui_widgets.vbox [ status_bar ~width ~search_result_groups ~input_mode; Key_binding_info.main ~input_mode ~show_key_binding_info_pane; autocomplete_grid ~input_mode ~width; filter_bar ~input_mode; search_bar ~input_mode; ] end let keyboard_handler ~(session_state : Session.State.t) ~(search_result_groups : Session.search_result_group array) (key : Nottui.Ui.key) = let document_count = Array.length search_result_groups in let document_current_choice = Lwd.peek UI_base.Vars.index_of_document_selected in let search_result_group = if document_count = 0 then None else Some search_result_groups.(document_current_choice) in let search_result_choice_count = match search_result_group with | None -> 0 | Some (_doc, search_results) -> Array.length search_results in let link_choice_count = match search_result_group with | None -> 0 | Some (doc, _search_results) -> Array.length (Document.links doc) in let search_result_current_choice = Lwd.peek UI_base.Vars.index_of_search_result_selected in let link_current_choice = Lwd.peek UI_base.Vars.index_of_link_selected in match Lwd.peek UI_base.Vars.input_mode with | Navigate -> ( match key with | (`ASCII 'C', [`Ctrl]) | (`ASCII 'Q', [`Ctrl]) -> ( Lwd.set UI_base.Vars.quit true; UI_base.Vars.action := None; `Handled ) | (`ASCII '<', []) -> ( UI_base.Key_binding_info.decr_rotation (); `Handled ) | (`ASCII '>', []) -> ( UI_base.Key_binding_info.incr_rotation (); `Handled ) | (`ASCII ' ', []) -> ( let index = Lwd.peek UI_base.Vars.index_of_document_selected in if index < Array.length search_result_groups then ( let doc, _ = search_result_groups.(index) in toggle_mark ~path:(Document.path doc) ); `Handled ) | (`ASCII 'm', []) -> ( UI_base.set_input_mode Mark; `Handled ) | (`ASCII 'M', []) -> ( UI_base.set_input_mode Unmark; `Handled ) | (`ASCII 'd', []) -> ( UI_base.set_input_mode Drop; `Handled ) | (`ASCII 'n', []) -> ( UI_base.set_input_mode Narrow; `Handled ) | (`ASCII 'r', []) -> ( UI_base.set_input_mode Reload; `Handled ) | (`ASCII 'y', []) -> ( UI_base.set_input_mode Copy; `Handled ) | (`ASCII 'Y', []) -> ( UI_base.set_input_mode Copy_paths; `Handled ) | (`Arrow `Left, []) | (`ASCII 'u', []) | (`ASCII 'Z', [`Ctrl]) -> ( Session_manager.shift_ver ~offset:(-1); `Handled ) | (`Arrow `Right, []) | (`ASCII 'R', [`Ctrl]) | (`ASCII 'Y', [`Ctrl]) -> ( Session_manager.shift_ver ~offset:1; `Handled ) | (`Tab, []) | (`Tab, [`Shift]) -> ( let direction = match key with | (_, [`Shift]) -> `Expand_left | (_, _) -> `Expand_right in Session_manager.update_from_cur_snapshot (fun cur_snapshot -> let state = Session.Snapshot.state cur_snapshot in let cur = Session.State.screen_split state in let offset = match direction with | `Expand_left -> 1 | `Expand_right -> -1 in let next = Command.screen_split_of_int (Command.int_of_screen_split cur + offset) in let command = `Split_screen next in state |> Session.run_command (UI_base.task_pool ()) command |> Option.get |> (fun (command, state) -> Session.Snapshot.make ~last_command:(Some command) state) ); `Handled ) | (`ASCII '?', []) | (`ASCII 'v', []) -> ( let pane = match key with | (`ASCII '?', []) -> `Key_binding_info | (`ASCII 'v', []) -> `Bottom_right | _ -> failwith "unexpected case" in Session_manager.update_from_cur_snapshot (fun cur_snapshot -> let state = Session.Snapshot.state cur_snapshot in let cur = Session.State.show_pane state pane in let command = if cur then ( `Hide_pane pane ) else ( `Show_pane pane ) in state |> Session.run_command (UI_base.task_pool ()) command |> Option.get |> (fun (command, state) -> Session.Snapshot.make ~last_command:(Some command) state) ); `Handled ) | (`ASCII '=', []) -> ( UI_base.incr_content_view_offset (); `Handled ) | (`ASCII '-', []) -> ( UI_base.decr_content_view_offset (); `Handled ) | (`Page `Down, [`Shift]) | (`ASCII 'J', []) | (`Arrow `Down, [`Shift]) -> ( UI_base.set_search_result_selected ~choice_count:search_result_choice_count (search_result_current_choice+1); `Handled ) | (`Page `Up, [`Shift]) | (`ASCII 'K', []) | (`Arrow `Up, [`Shift]) -> ( UI_base.set_search_result_selected ~choice_count:search_result_choice_count (search_result_current_choice-1); `Handled ) | (`Page `Down, []) | (`ASCII 'j', []) | (`Arrow `Down, []) -> ( if document_count = 1 then ( UI_base.set_search_result_selected ~choice_count:search_result_choice_count (search_result_current_choice+1); `Handled ) else ( UI_base.set_document_selected ~choice_count:document_count (document_current_choice+1); `Handled ) ) | (`Page `Up, []) | (`ASCII 'k', []) | (`Arrow `Up, []) -> ( if document_count = 1 then ( UI_base.set_search_result_selected ~choice_count:search_result_choice_count (search_result_current_choice-1); `Handled ) else ( UI_base.set_document_selected ~choice_count:document_count (document_current_choice-1); `Handled ) ) | (`ASCII 'g', []) -> ( UI_base.set_document_selected ~choice_count:document_count 0; `Handled ) | (`ASCII 'G', []) -> ( UI_base.set_document_selected ~choice_count:document_count (document_count - 1); `Handled ) | (`ASCII 'f', []) -> ( Nottui.Focus.request UI_base.Vars.filter_field_focus_handle; UI_base.set_input_mode Filter; `Handled ) | (`ASCII 'F', []) -> ( Nottui.Focus.request Vars.path_fuzzy_rank_field_focus_handle; Lwd.set Vars.path_fuzzy_rank_field UI_base.empty_text_field; UI_base.set_input_mode Path_fuzzy_rank; `Handled ) | (`ASCII '/', []) -> ( Nottui.Focus.request UI_base.Vars.search_field_focus_handle; UI_base.set_input_mode Search; `Handled ) | (`ASCII 'h', []) -> ( Lwd.set UI_base.Vars.quit true; UI_base.Vars.action := Some UI_base.Edit_command_history; `Handled ) | (`ASCII 'S', [`Ctrl]) -> ( UI_base.set_input_mode Save_script; refresh_script_files (); Nottui.Focus.request Vars.script_name_field_focus_handle; `Handled ) | (`ASCII 'O', [`Ctrl]) -> ( UI_base.set_input_mode Scripts; refresh_script_files (); Nottui.Focus.request Vars.script_name_field_focus_handle; `Handled ) | (`ASCII 'x', []) -> ( UI_base.set_input_mode Clear; `Handled ) | (`ASCII 'l', []) -> ( UI_base.set_input_mode Links; if search_result_choice_count > 0 then ( let (doc, search_results) = Option.get search_result_group in let search_result = search_results.(search_result_current_choice) in let links = Document.links doc in let avg_pos = List.fold_left (fun min_max_pos search_result -> let { Search_result.found_word_pos; _ } = search_result in match min_max_pos with | None -> Some (found_word_pos, found_word_pos) | Some (min_pos, max_pos) -> ( Some (min found_word_pos min_pos, max found_word_pos max_pos) ) ) None (Search_result.found_phrase search_result) |> (fun x -> let (x, y) = Option.get x in (x + y) / 2) in let before, exact, after = Int_map.split avg_pos (Document.link_index_of_start_pos doc) in let index = match exact with | Some index -> Some index | None -> ( match Int_map.max_binding_opt before, Int_map.min_binding_opt after with | Some (pos_x, index_x), Some (pos_y, index_y) -> ( let diff_x = Int.to_float (Int.abs (pos_x - avg_pos)) in let diff_y = Int.to_float (Int.abs (pos_y - avg_pos)) in (* We prefer picking y (link after search result) over x (link before search result), as it usually feels more intuitive to jump forward than backward. But if distance to x is <= 50% the distance to y, then we resort to x. *) if diff_x /. diff_y <= 0.5 then ( Some index_x ) else ( let link_x = links.(index_x) in let end_inc_pos_x = link_x.Link.end_inc_pos in if pos_x <= avg_pos && avg_pos <= end_inc_pos_x then ( Some index_x ) else ( Some index_y ) ) ) | Some (_pos, index), None | None, Some (_pos, index) -> Some index | None, None -> None ) in match index with | None -> () | Some index -> ( UI_base.set_link_selected ~choice_count:link_choice_count index ) ); `Handled ) | (`ASCII 's', []) -> ( UI_base.set_input_mode (Sort `Asc); `Handled ) | (`ASCII 'S', []) -> ( UI_base.set_input_mode (Sort `Desc); `Handled ) | (`Enter, []) -> ( Option.iter (fun (doc, search_results) -> let search_result = if search_result_current_choice < Array.length search_results then Some search_results.(search_result_current_choice) else None in Lwd.set UI_base.Vars.quit true; UI_base.Vars.action := Some (UI_base.Open_file_and_search_result (doc, search_result)); ) search_result_group; `Handled ) | _ -> `Handled ) | Sort order -> ( let exit = match key with | (`Escape, []) -> true | (`ASCII 's', []) -> ( sort (`Score, order); true ) | (`ASCII 'p', []) -> ( sort (`Path, order); true ) | (`ASCII 'd', []) -> ( sort (`Path_date, order); true ) | (`ASCII 'm', []) -> ( sort (`Mod_time, order); true ) | _ -> false in if exit then ( UI_base.set_input_mode Navigate; ); `Handled ) | Clear -> ( let exit = match key with | (`Escape, []) -> true | (`ASCII '/', []) -> ( Lwd.set UI_base.Vars.search_field UI_base.empty_text_field; update_search ~commit:true (); true ) | (`ASCII 'f', []) -> ( Lwd.set UI_base.Vars.filter_field UI_base.empty_text_field; update_filter ~commit:true (); true ) | (`ASCII 'h', []) -> ( Lwd.set UI_base.Vars.quit true; UI_base.Vars.action := Some UI_base.Clear_command_history; true ) | _ -> false in if exit then ( UI_base.set_input_mode Navigate; ); `Handled ) | Drop -> ( let exit = match key with | (`Escape, []) -> true | (`ASCII '>', []) -> ( UI_base.Key_binding_info.incr_rotation (); false ) | (`ASCII 'd', []) -> ( Option.iter (fun (doc, _search_results) -> drop ~document_count (`Path (Document.path doc)) ) search_result_group; true ) | (`ASCII 'D', []) -> ( Option.iter (fun (doc, _search_results) -> drop ~document_count (`All_except (Document.path doc)) ) search_result_group; true ) | (`ASCII 'l', []) -> ( drop ~document_count `Listed; true ) | (`ASCII 'L', []) -> ( drop ~document_count `Unlisted; true ) | (`ASCII 'm', []) -> ( drop ~document_count `Marked; true ) | (`ASCII 'M', []) -> ( drop ~document_count `Unmarked; true ) | _ -> false in if exit then ( UI_base.set_input_mode Navigate; ); `Handled ) | Mark -> ( let exit = match key with | (`Escape, []) -> true | (`ASCII '>', []) -> ( UI_base.Key_binding_info.incr_rotation (); false ) | (`ASCII 'l', []) -> ( mark `Listed; true ) | _ -> false in if exit then ( UI_base.set_input_mode Navigate; ); `Handled ) | Unmark -> ( let exit = match key with | (`Escape, []) -> true | (`ASCII '>', []) -> ( UI_base.Key_binding_info.incr_rotation (); false ) | (`ASCII 'l', []) -> ( unmark `Listed; true ) | (`ASCII 'a', []) -> ( unmark `All; true ) | _ -> false in if exit then ( UI_base.set_input_mode Navigate; ); `Handled ) | Narrow -> ( let exit = match key with | (`Escape, []) -> true | (`ASCII '>', []) -> ( UI_base.Key_binding_info.incr_rotation (); false ) | (`ASCII c, []) -> ( let code_0 = Char.code '0' in let code_9 = Char.code '9' in let code_c = Char.code c in if code_0 <= code_c && code_c <= code_9 then ( let level = code_c - code_0 in narrow_search_scope_to_level ~level; true ) else ( false ) ) | _ -> false in if exit then ( UI_base.set_input_mode Navigate; ); `Handled ) | Copy -> ( let copy_search_result_groups (s : Session.search_result_group Seq.t) = Clipboard.pipe_to_clipboard (fun oc -> Printers.search_result_groups ~color:false ~underline:true oc s ) in let copy_search_result_group x = copy_search_result_groups (Seq.return x) in let exit = match key with | (`Escape, []) -> true | (`ASCII '>', []) -> ( UI_base.Key_binding_info.incr_rotation (); false ) | (`ASCII 'y', []) -> ( Option.iter (fun (doc, search_results) -> copy_search_result_group (doc, (if search_result_current_choice < Array.length search_results then [|search_results.(search_result_current_choice)|] else [||]) ) ) search_result_group; true ) | (`ASCII 'a', []) -> ( Option.iter copy_search_result_group search_result_group; true ) | (`ASCII 'm', []) -> ( let marked = Session.State.marked_document_paths session_state in search_result_groups |> Array.to_seq |> Seq.filter (fun (doc, _) -> String_set.mem (Document.path doc) marked) |> copy_search_result_groups; true ) | (`ASCII 'l', []) -> ( search_result_groups |> Array.to_seq |> copy_search_result_groups; true ) | _ -> false in if exit then ( UI_base.set_input_mode Navigate; ); `Handled ) | Copy_paths -> ( let copy_paths s = Clipboard.pipe_to_clipboard (fun oc -> Seq.iter (Printers.path_image ~color:false oc) s ) in let exit = match key with | (`Escape, []) -> true | (`ASCII '>', []) -> ( UI_base.Key_binding_info.incr_rotation (); false ) | (`ASCII 'y', []) -> ( Option.iter (fun (doc, _search_results) -> copy_paths (Seq.return (Document.path doc)) ) search_result_group; true ) | (`ASCII 'm', []) -> ( String_set.inter (Session.State.usable_document_paths session_state) (Session.State.marked_document_paths session_state) |> String_set.to_seq |> copy_paths; true ) | (`ASCII 'M', []) -> ( String_set.diff (Session.State.usable_document_paths session_state) (Session.State.marked_document_paths session_state) |> String_set.to_seq |> copy_paths; true ) | (`ASCII 'l', []) -> ( Session.State.usable_document_paths session_state |> String_set.to_seq |> copy_paths; true ) | (`ASCII 'L', []) -> ( Session.State.unusable_document_paths session_state |> copy_paths; true ) | _ -> false in if exit then ( UI_base.set_input_mode Navigate; ); `Handled ) | Reload -> ( let exit = (match key with | (`Escape, []) -> true | (`ASCII '>', []) -> ( UI_base.Key_binding_info.incr_rotation (); false ) | (`ASCII 'r', []) -> ( reload_document_selected ~search_result_groups; true ) | (`ASCII 'a', []) -> ( UI_base.reset_document_selected (); Lwd.set UI_base.Vars.quit true; UI_base.Vars.action := Some UI_base.Recompute_document_src; true ) | _ -> false ); in if exit then ( UI_base.set_input_mode Navigate; ); `Handled ) | Links -> ( let doc_and_link = if link_choice_count > 0 then ( Option.map (fun (doc, _search_results) -> (doc, (Document.links doc).(link_current_choice)) ) search_result_group ) else ( None ) in let set_action_to_open_link () = Option.iter (fun (doc, link) -> Lwd.set UI_base.Vars.quit true; UI_base.Vars.action := Some (UI_base.Open_link (doc, link)) ) doc_and_link in let exit = (match key with | (`Escape, []) -> true | (`Enter, []) -> ( set_action_to_open_link (); true ) | (`ASCII 'o', []) -> ( set_action_to_open_link (); false ) | (`Page `Down, []) | (`ASCII 'j', []) | (`Arrow `Down, []) -> ( UI_base.set_link_selected ~choice_count:link_choice_count (link_current_choice+1); false ) | (`Page `Up, []) | (`ASCII 'k', []) | (`Arrow `Up, []) -> ( UI_base.set_link_selected ~choice_count:link_choice_count (link_current_choice-1); false ) | (`ASCII 'y', []) -> ( Option.iter (fun (_doc, link) -> Clipboard.pipe_to_clipboard (fun oc -> output_string oc link.Link.link ); ) doc_and_link; true ) | _ -> false ); in if exit then ( UI_base.set_input_mode Navigate; ); `Handled ) | Save_script_overwrite script_name -> ( (match key with | (`Escape, []) | (`ASCII 'n', []) -> ( UI_base.set_input_mode Navigate; ) | (`ASCII 'y', []) -> ( let path = compute_save_script_path script_name in save_script ~path; UI_base.set_input_mode (Save_script_edit script_name); ) | _ -> () ); `Handled ) | Save_script_no_name -> ( let exit = (match key with | (`Enter, []) -> true | _ -> false ); in if exit then ( UI_base.set_input_mode Navigate; ); `Handled ) | Save_script_edit script_name -> ( let exit = (match key with | (`Escape, []) | (`ASCII 'n', []) -> true | (`ASCII 'y', []) -> ( let path = compute_save_script_path script_name in Lwd.set UI_base.Vars.quit true; UI_base.Vars.action := Some (UI_base.Edit_script path); true ) | _ -> false ); in if exit then ( UI_base.set_input_mode Navigate; ); `Handled ) | Delete_script_confirm (_script, path) -> ( (match key with | (`Escape, []) | (`ASCII 'n', []) -> ( UI_base.set_input_mode Scripts; ) | (`ASCII 'y', []) -> ( Sys.remove path; refresh_script_files (); UI_base.set_input_mode Scripts; ) | _ -> () ); `Handled ) | _ -> `Unhandled let main : Nottui.ui Lwd.t = let$* (_, snapshot) = Session_manager.cur_snapshot in let session_state = Session.Snapshot.state snapshot in let search_result_groups = Session.State.search_result_groups session_state in let document_count = Array.length search_result_groups in UI_base.set_document_selected ~choice_count:document_count (Lwd.peek UI_base.Vars.index_of_document_selected); if document_count > 0 then ( UI_base.set_search_result_selected ~choice_count:(Array.length (snd search_result_groups.(Lwd.peek UI_base.Vars.index_of_document_selected))) (Lwd.peek UI_base.Vars.index_of_search_result_selected) ); if document_count > 0 then ( UI_base.set_link_selected ~choice_count:(search_result_groups.(Lwd.peek UI_base.Vars.index_of_document_selected) |> fst |> Document.links |> Array.length) (Lwd.peek UI_base.Vars.index_of_link_selected) ); let$* (term_width, term_height) = Lwd.get UI_base.Vars.term_width_height in let show_key_binding_info_pane = Session.State.show_pane session_state `Key_binding_info in let$* bottom_pane = Bottom_pane.main ~width:term_width ~search_result_groups ~show_key_binding_info_pane in let bottom_pane_height = Nottui.Ui.layout_height bottom_pane in let top_pane_height = term_height - bottom_pane_height in let screen_split = Session.State.screen_split session_state in let show_bottom_right_pane = Session.State.show_pane session_state `Bottom_right in let$* top_pane = Top_pane.main ~width:term_width ~height:top_pane_height ~documents_marked:(Session.State.marked_document_paths session_state) ~screen_split ~show_bottom_right_pane ~search_result_groups in Nottui_widgets.vbox [ Lwd.return (Nottui.Ui.keyboard_area (keyboard_handler ~session_state ~search_result_groups) top_pane); Lwd.return bottom_pane; ] ================================================ FILE: bin/UI_base.ml ================================================ open Docfd_lib open Lwd_infix type input_mode = | Navigate | Search | Filter | Clear | Sort of [ `Asc | `Desc ] | Drop | Mark | Unmark | Narrow | Copy | Copy_paths | Reload | Save_script | Save_script_overwrite of string | Save_script_no_name | Save_script_edit of string | Scripts | Delete_script_confirm of string * string | Links | Path_fuzzy_rank [@@deriving ord] module Input_mode_map = Map.Make (struct type t = input_mode let compare x y = match x, y with | Save_script_overwrite _, Save_script_overwrite _ -> 0 | Save_script_edit _, Save_script_edit _ -> 0 | Delete_script_confirm _, Delete_script_confirm _ -> 0 | _, _ -> compare_input_mode x y end) type top_level_action = | Recompute_document_src | Open_file_and_search_result of Document.t * Search_result.t option | Open_link of (Document.t * Link.t) | Clear_command_history | Edit_command_history | Open_script of string | Edit_script of string type search_status = [ | `Idle | `Searching | `Parse_error ] type filter_status = [ | `Idle | `Filtering | `Parse_error ] let empty_text_field = ("", 0) let render_mode_of_document (doc : Document.t) : Content_and_search_result_rendering.render_mode = match File_utils.format_of_file (Document.path doc) with | `PDF -> `Page_num_only | `Pandoc_supported_format -> `None | `Text | `Other -> `Line_num_only module Vars = struct let quit = Lwd.var false let pool : Task_pool.t option Atomic.t = Atomic.make None let action : top_level_action option ref = ref None let eio_env : Eio_unix.Stdenv.base option ref = ref None let hide_document_list : bool Lwd.var = Lwd.var false let input_mode : input_mode Lwd.var = Lwd.var Navigate let document_src : Document_src.t ref = ref (Document_src.(Files empty_file_collection)) let term : Notty_unix.Term.t option ref = ref None let term_width_height : (int * int) Lwd.var = Lwd.var (0, 0) let content_view_offset = Lwd.var 0 let autocomplete_choices = Lwd.var [] let filter_field = Lwd.var empty_text_field let filter_field_focus_handle = Nottui.Focus.make () let search_field = Lwd.var empty_text_field let search_field_focus_handle = Nottui.Focus.make () let search_ui_status : search_status Lwd.var = Lwd.var `Idle let filter_ui_status : filter_status Lwd.var = Lwd.var `Idle let index_of_document_selected = Lwd.var 0 let index_of_search_result_selected = Lwd.var 0 let index_of_link_selected = Lwd.var 0 let index_of_script_selected = Lwd.var 0 end let reset_content_view_offset () = Lwd.set Vars.content_view_offset 0 let decr_content_view_offset () = let x = Lwd.peek Vars.content_view_offset in Lwd.set Vars.content_view_offset (x - 1) let incr_content_view_offset () = let x = Lwd.peek Vars.content_view_offset in Lwd.set Vars.content_view_offset (x + 1) let reset_document_selected () = reset_content_view_offset (); Lwd.set Vars.index_of_document_selected 0; Lwd.set Vars.index_of_search_result_selected 0; Lwd.set Vars.index_of_link_selected 0 let set_document_selected ~choice_count n = let n = Misc_utils.bound_selection ~choice_count n in let old = Lwd.peek Vars.index_of_document_selected in if old <> n then ( reset_content_view_offset (); Lwd.set Vars.index_of_document_selected n; Lwd.set Vars.index_of_search_result_selected 0; Lwd.set Vars.index_of_link_selected 0; ) let set_search_result_selected ~choice_count n = let old = Lwd.peek Vars.index_of_search_result_selected in if old <> n then ( reset_content_view_offset (); let n = Misc_utils.bound_selection ~choice_count n in Lwd.set Vars.index_of_search_result_selected n ) let set_link_selected ~choice_count n = let old = Lwd.peek Vars.index_of_link_selected in if old <> n then ( reset_content_view_offset (); let n = Misc_utils.bound_selection ~choice_count n in Lwd.set Vars.index_of_link_selected n ) let set_script_selected ~choice_count n = let old = Lwd.peek Vars.index_of_script_selected in if old <> n then ( let n = Misc_utils.bound_selection ~choice_count n in Lwd.set Vars.index_of_script_selected n ) let task_pool () = Option.get (Atomic.get Vars.pool) let eio_env () = Option.get !Vars.eio_env let term () = Option.get !Vars.term let full_term_sized_background = let$ (term_width, term_height) = Lwd.get Vars.term_width_height in Notty.I.void term_width term_height |> Nottui.Ui.atom let vbar ~height = let uc = Uchar.of_int 0x2502 in Notty.I.uchar Notty.A.(fg white) uc 1 height |> Nottui.Ui.atom let hbar ~width = let uc = Uchar.of_int 0x2015 in Notty.I.uchar Notty.A.(fg white) uc width 1 |> Nottui.Ui.atom let hpane ~l_ratio ~width ~height (x : width:int -> Nottui.ui Lwd.t) (y : width:int -> Nottui.ui Lwd.t) : Nottui.ui Lwd.t = let l_width = (* Minus 1 for pane separator bar. *) Int.to_float width *. l_ratio |> Float.floor |> Int.of_float |> (fun x -> if x = 0 || x = width then ( x ) else ( x - 1 )) in let r_width = (* Minus 1 here too just to be conservative. *) width - l_width - 1 in let crop w x = Nottui.Ui.resize ~w ~h:height x in let x () = let$ x = x ~width:l_width in crop l_width x in let y () = let$ y = y ~width:r_width in crop r_width y in if l_width = 0 then ( y () ) else if r_width = 0 then ( x () ) else ( let$* x = x () in let$ y = y () in Nottui.Ui.hcat [ x; vbar ~height; y; ] ) let vpane ~width ~height (x : height:int -> Nottui.ui Lwd.t) (y : height:int -> Nottui.ui Lwd.t) : Nottui.ui Lwd.t = let t_height = (Misc_utils.div_round_up height 2) in let b_height = (* Minus 1 for pane separator bar. *) (height / 2) - 1 in let$* x = x ~height:t_height in let$ y = y ~height:b_height in let crop h x = Nottui.Ui.resize ~w:width ~h x in Nottui.Ui.vcat [ crop t_height x; hbar ~width; crop b_height y; ] let (mini, maxi, clampi) = Lwd_utils.(mini, maxi, clampi) (* Modified from upstream Nottui source code. *) let edit_field ~focus ~on_change ~on_submit ~on_cancel ?(on_tab : ((string * int) -> unit) option) ?(on_up_down : ([ `Up | `Down ] -> (string * int) -> unit) option) ?(on_ctrl_prefixed : (Nottui.Ui.key -> (string * int) -> [ `Handled | `Unhandled ]) option) state = let update _focus_h focus (text, pos) = let pos = clampi pos ~min:0 ~max:(String.length text) in let content = Nottui.Ui.atom @@ Notty.I.hcat @@ if Nottui.Focus.has_focus focus then ( let attr = Notty.A.(bg lightblue) in let len = String.length text in (if pos >= len then [Notty.I.string attr text] else [Notty.I.string attr (String.sub text 0 pos)]) @ (if pos < String.length text then [Notty.I.string Notty.A.(bg lightred) (String.sub text pos 1); Notty.I.string attr (String.sub text (pos + 1) (len - pos - 1))] else [Notty.I.string Notty.A.(bg lightred) " "]); ) else [Notty.I.string Notty.A.(st underline) (if text = "" then " " else text)] in let handler = function | `Escape, [] -> ( on_cancel (text, pos); `Handled ) | `ASCII k, [] -> ( let text = if pos < String.length text then ( String.sub text 0 pos ^ String.make 1 k ^ String.sub text pos (String.length text - pos) ) else ( text ^ String.make 1 k ) in on_change (text, (pos + 1)); `Handled ) | `Backspace, _ -> ( let text = if pos > 0 then ( if pos < String.length text then ( String.sub text 0 (pos - 1) ^ String.sub text pos (String.length text - pos) ) else if String.length text > 0 then ( String.sub text 0 (String.length text - 1) ) else text ) else text in let pos = maxi 0 (pos - 1) in on_change (text, pos); `Handled ) | `Enter, _ -> ( on_submit (text, pos); `Handled ) | `Arrow `Left, [] -> ( let pos = mini (String.length text) pos in if pos > 0 then ( on_change (text, pos - 1); `Handled ) else `Unhandled ) | `Arrow `Right, [] -> ( let pos = pos + 1 in if pos <= String.length text then (on_change (text, pos); `Handled) else `Unhandled ) | (`Arrow (`Up as ud), []) | (`Arrow (`Down as ud), []) -> ( match on_up_down with | None -> `Unhandled | Some on_up_down -> ( on_up_down ud (text, pos); `Handled ) ) | (`Tab, []) -> ( match on_tab with | None -> `Unhandled | Some on_tab -> ( on_tab (text, pos); `Handled ) ) | (_, [`Ctrl]) as x -> ( match on_ctrl_prefixed with | None -> `Unhandled | Some f -> ( f x (text, pos) ) ) | _ -> `Unhandled in Nottui.Ui.keyboard_area ~focus handler content in let state = Lwd.get state in let node = Lwd.map2 ~f:(update focus) (Nottui.Focus.status focus) state in let mouse_grab (text, pos) ~x ~y:_ = function | `Left -> if x <> pos then on_change (text, x); Nottui.Focus.request focus; `Handled | _ -> `Unhandled in Lwd.map2 state node ~f:(fun state content -> Nottui.Ui.mouse_area (mouse_grab state) content ) let mouse_handler ~(f : [ `Up | `Down ] -> unit) ~x ~y (button : Notty.Unescape.button) = let _ = x in let _ = y in match button with | `Scroll `Down -> ( f `Down; `Handled ) | `Scroll `Up -> ( f `Up; `Handled ) | _ -> `Unhandled module Content_view = struct let main ~(input_mode : input_mode) ~height ~width ~(search_result_group : Document.t * Search_result.t array) ~(search_result_selected : int) ~(link_selected : int) : Nottui.ui Lwd.t = let (document, search_results) = search_result_group in let links = Document.links document in let data = let search_result_count = Array.length search_results in let link_count = Array.length links in match input_mode with | Links -> ( if link_count = 0 then ( None ) else ( Some (`Link links.(link_selected)) ) ) | _ -> ( if search_result_count = 0 then ( None ) else ( Some (`Search_result search_results.(search_result_selected)) ) ) in let$* _ = Lwd.get Vars.content_view_offset in let content = Content_and_search_result_rendering.content_snippet ~doc_id:(Document.doc_id document) ~view_offset:Vars.content_view_offset ?data ~height ~width () in let$* background = full_term_sized_background in Nottui.Ui.join_z background (Nottui.Ui.atom content) |> Nottui.Ui.mouse_area (mouse_handler ~f:(fun direction -> match direction with | `Up -> decr_content_view_offset () | `Down -> incr_content_view_offset () ) ) |> Lwd.return end module Status_bar = struct let fg_color = Notty.A.black let bg_color = Notty.A.white let attr = Notty.A.(bg bg_color ++ fg fg_color) let background_bar : Nottui.Ui.t Lwd.t = let$ (term_width, _term_height) = Lwd.get Vars.term_width_height in Notty.I.char Notty.A.(bg bg_color) ' ' term_width 1 |> Nottui.Ui.atom let element_spacing = 4 let element_spacer = Notty.(I.string A.(bg bg_color ++ fg fg_color)) (String.make element_spacing ' ') let input_mode_images = let l = [ (Navigate, "NAVIGATE") ; (Search, "SEARCH") ; (Filter, "FILTER") ; (Clear, "CLEAR") ; (Sort `Asc, "SORT-ASC") ; (Sort `Desc, "SORT-DESC") ; (Drop, "DROP") ; (Mark, "MARK") ; (Unmark, "UNMARK") ; (Narrow, "NARROW") ; (Copy, "COPY") ; (Copy_paths, "COPY-PATHS") ; (Reload, "RELOAD") ; (Save_script, "SAVE-SCRIPT") ; (Save_script_overwrite "", "SAVE-SCRIPT") ; (Save_script_no_name, "SAVE-SCRIPT") ; (Save_script_edit "", "SAVE-SCRIPT") ; (Scripts, "SCRIPTS") ; (Delete_script_confirm ("", ""), "DELETE-SCRIPT") ; (Links, "LINKS") ; (Path_fuzzy_rank, "PATH-FUZZY-RANK") ] in let max_input_mode_string_len = List.fold_left (fun acc (_, s) -> max acc (String.length s) ) 0 l in let input_mode_string_background = Notty.I.char Notty.A.(bg bg_color) ' ' max_input_mode_string_len 1 in List.fold_left (fun m (mode, s) -> let s = Notty.(I.string A.(bg bg_color ++ fg fg_color ++ st bold) s) in Input_mode_map.add mode Notty.I.(s input_mode_string_background) m ) Input_mode_map.empty l end module Key_binding_info = struct let rotation : int Lwd.var = Lwd.var 0 let incr_rotation () = Lwd.set rotation (Lwd.peek rotation + 1) let decr_rotation () = Lwd.set rotation (Lwd.peek rotation - 1) let reset_rotation () = Lwd.set rotation 0 type labelled_msg = { label : string; msg : string; } type labelled_msg_line = labelled_msg list type grid_contents = (input_mode * (labelled_msg_line list)) list type grid_lookup = Nottui.ui Lwd.t Input_mode_map.t let grid_lights : (string, Mtime.t ref * bool Lwd.var list) Hashtbl.t = Hashtbl.create 100 let lock = Eio.Mutex.create () let grid_light_on_req : string Eio.Stream.t = Eio.Stream.create 100 let grid_light_off_req : (Mtime.t * Mtime.t * string) Eio.Stream.t = Eio.Stream.create 100 let blink label = Eio.Stream.add grid_light_on_req label let grid_light_fiber () = let clock = Eio.Stdenv.mono_clock (eio_env ()) in Eio.Fiber.both (fun () -> while true do let label = Eio.Stream.take grid_light_on_req in let ts_now = Eio.Time.Mono.now clock in Eio.Mutex.use_rw lock ~protect:false (fun () -> match Hashtbl.find_opt grid_lights label with | None -> failwith "unexpected case" | Some (ts, l) -> ( ts := ts_now; List.iter (fun x -> Lwd.set x true) l; Eio.Stream.add grid_light_off_req (ts_now, Option.get (Mtime.(add_span ts_now Params.blink_on_duration)), label); ) ) done ) (fun () -> while true do let ts_req_time, ts_target_time, label = Eio.Stream.take grid_light_off_req in Eio.Time.Mono.sleep_until clock ts_target_time; Eio.Mutex.use_rw lock ~protect:false (fun () -> match Hashtbl.find_opt grid_lights label with | None -> failwith "unexpected case" | Some (ts_last_update, l) -> ( if Mtime.equal !ts_last_update ts_req_time then ( List.iter (fun x -> Lwd.set x false) l; ) ) ) done ) let make_grid_lookup grid_contents : grid_lookup = let max_label_msg_len_lookup : (input_mode * (int * int) Int_map.t) list = grid_contents |> List.map (fun (grid_key, grid) -> let lookup = List.fold_left (fun (acc : (int * int) Int_map.t) (line : labelled_msg_line) -> line |> List.to_seq |> Seq.fold_lefti (fun (acc : (int * int) Int_map.t) col ({ label; msg } : labelled_msg) -> let label_len = Uuseg_string.fold_utf_8 `Grapheme_cluster (fun x _ -> x + 1) 0 label in let msg_len = Uuseg_string.fold_utf_8 `Grapheme_cluster (fun x _ -> x + 1) 0 msg in let (max_label_len, max_msg_len) = match Int_map.find_opt col acc with | None -> (label_len, msg_len) | Some (max_label_len, max_msg_len) -> ( (max max_label_len label_len, max max_msg_len msg_len) ) in Int_map.add col (max_label_len, max_msg_len) acc ) acc ) Int_map.empty grid in (grid_key, lookup) ) in let label_msg_pair grid_key col { label; msg } : Nottui.ui Lwd.t = let (max_label_len, max_msg_len) = List.assoc grid_key max_label_msg_len_lookup |> Int_map.find col in let light_on_var = Lwd.var false in Eio.Mutex.use_rw lock ~protect:false (fun () -> let x = match Hashtbl.find_opt grid_lights label with | None -> (ref Mtime.min_stamp, [ light_on_var ]) | Some (x, l) -> (x, light_on_var :: l) in Hashtbl.replace grid_lights label x ); let$ light_on = Lwd.get light_on_var in let label_attr = if light_on then Notty.A.(fg black ++ bg lightyellow ++ st bold) else Notty.A.(fg lightyellow ++ st bold) in let msg_attr = Notty.A.empty in let msg = String.capitalize_ascii msg in let content = Notty.(I.hcat [ I.(string label_attr label (string label_attr (String.make max_label_len ' '))) ; I.string A.empty " " ; I.string msg_attr msg ] ) in let full_background = Notty.I.void (max_label_len + 1 + max_msg_len + 3) 1 in Notty.I.(content full_background) |> Nottui.Ui.atom in List.fold_left (fun m (grid_key, grid_contents) -> let max_row_size = List.fold_left (fun n l -> max n (List.length l) ) 0 grid_contents in let grid_contents = grid_contents |> List.map (fun l -> let padding = List.init (max_row_size - List.length l) (fun _ -> { label = ""; msg = "" }) in List.mapi (fun col x -> label_msg_pair grid_key col x ) (l @ padding) ) in let grid = let$* rotation = Lwd.get rotation in grid_contents |> List.map (fun l -> Misc_utils.rotate_list (((rotation mod max_row_size) + max_row_size) mod max_row_size ) l ) |> Nottui_widgets.grid ~pad:(Nottui.Gravity.make ~h:`Negative ~v:`Negative) in Input_mode_map.add grid_key grid m ) Input_mode_map.empty grid_contents let main ~(grid_lookup : grid_lookup) ~(input_mode : input_mode) = Input_mode_map.find input_mode grid_lookup end let filter_bar_label_string = "Document filter" let search_bar_label_string = "Content search" let max_label_length = List.fold_left (fun acc s -> max acc (String.length s) ) 0 [ filter_bar_label_string ; search_bar_label_string ] let pad_label_string s = CCString.pad ~side:`Right ~c:' ' max_label_length s let autocomplete ~choices (text, pos) : string * int = let left = String.sub text 0 pos in let right = String.sub text pos (String.length text - pos) in let grab_input_word (s : string) = let rec aux acc i s = if i < 0 then ( CCString.of_list acc ) else ( let c = s.[i] in if Parser_components.is_alphanum c || c = '-' || c = ':' then ( aux (c :: acc) (i - 1) s ) else ( aux acc (-1) s ) ) in aux [] (String.length s - 1) s in let current_input_word = grab_input_word left in let usable_choices = List.filter (CCString.prefix ~pre:current_input_word) choices in Lwd.set Vars.autocomplete_choices usable_choices; match usable_choices with | [] -> (text, pos) | _ -> ( let best_fit = usable_choices |> List.to_seq |> String_utils.longest_common_prefix in let left = String.sub left 0 (String.length left - String.length current_input_word) in (String.concat "" [ left; best_fit; right ], pos + (String.length best_fit - String.length current_input_word)) ) module Filter_bar = struct let label_string = pad_label_string filter_bar_label_string let label ~(input_mode : input_mode) = let attr = match input_mode with | Filter -> Notty.A.(st bold) | _ -> Notty.A.empty in Notty.I.string attr label_string |> Nottui.Ui.atom |> Lwd.return let status = let$* status = Lwd.get Vars.filter_ui_status in (match status with | `Idle -> ( Notty.I.string Notty.A.(fg lightgreen) " OK" ) | `Filtering -> ( Notty.I.string Notty.A.(fg lightyellow) " ..." ) | `Parse_error -> ( Notty.I.string Notty.A.(fg lightred) " ERR" ) ) |> Nottui.Ui.atom |> Lwd.return let autocomplete_choices = [ "path-date:" ; "path-fuzzy:" ; "path-glob:" ; "ext:" ; "content:" ; "mod-date:" ] let main ~input_mode ~(text_field : (string * int) Lwd.var) ~focus_handle ~on_change ~on_submit : Nottui.ui Lwd.t = Nottui_widgets.hbox [ label ~input_mode; status; Lwd.return (Nottui.Ui.atom (Notty.I.strf ": ")); edit_field text_field ~focus:focus_handle ~on_cancel:(fun (_text, _x) -> ()) ~on_change:(fun (text, x) -> Lwd.set text_field (text, x); on_change (); ) ~on_submit:(fun (text, x) -> Lwd.set text_field (text, x); on_submit (); Lwd.set Vars.autocomplete_choices []; Nottui.Focus.release focus_handle; Lwd.set Vars.input_mode Navigate ) ~on_tab:(fun (text, pos) -> let (text, pos) = autocomplete ~choices:autocomplete_choices (text, pos) in Lwd.set text_field (text, pos) ); ] end module Search_bar = struct let label_string = pad_label_string search_bar_label_string let label ~(input_mode : input_mode) = let attr = match input_mode with | Search -> Notty.A.(st bold) | _ -> Notty.A.empty in Notty.I.string attr label_string |> Nottui.Ui.atom |> Lwd.return let status = let$* status = Lwd.get Vars.search_ui_status in (match status with | `Idle -> ( Notty.I.string Notty.A.(fg lightgreen) " OK" ) | `Searching -> ( Notty.I.string Notty.A.(fg lightyellow) " ..." ) | `Parse_error -> ( Notty.I.string Notty.A.(fg lightred) " ERR" ) ) |> Nottui.Ui.atom |> Lwd.return let main ~input_mode ~(text_field : (string * int) Lwd.var) ~focus_handle ~on_change ~on_submit : Nottui.ui Lwd.t = Nottui_widgets.hbox [ label ~input_mode; status; Lwd.return (Nottui.Ui.atom (Notty.I.strf ": ")); edit_field text_field ~focus:focus_handle ~on_cancel:(fun (_text, _x) -> ()) ~on_change:(fun (text, x) -> Lwd.set text_field (text, x); on_change (); ) ~on_submit:(fun (text, x) -> Lwd.set text_field (text, x); on_submit (); Lwd.set Vars.autocomplete_choices []; Nottui.Focus.release focus_handle; Lwd.set Vars.input_mode Navigate ) ~on_tab:(fun (_, _) -> ()); ] end let term' : unit -> Notty_unix.Term.t = term let ui_loop ~quit ~term root = let renderer = Nottui.Renderer.make () in let root = let$ root = root in root (* |> Nottui.Ui.event_filter (fun x -> match x with | `Key (`Escape, []) -> ( Lwd.set quit true; `Handled ) | _ -> `Unhandled ) *) in let rec loop () = if not (Lwd.peek quit) then ( let (term_width, term_height) = Notty_unix.Term.size (term' ()) in let (prev_term_width, prev_term_height) = Lwd.peek Vars.term_width_height in if term_width <> prev_term_width || term_height <> prev_term_height then ( Lwd.set Vars.term_width_height (term_width, term_height) ); Nottui_unix.step ~process_event:true ~timeout:0.05 ~renderer term (Lwd.observe @@ root); Eio.Fiber.yield (); loop () ) in loop () let set_input_mode mode = Lwd.set Vars.input_mode mode; Key_binding_info.reset_rotation () ================================================ FILE: bin/args.ml ================================================ open Cmdliner open Docfd_lib open Misc_utils let no_pdftotext_arg_name = "no-pdftotext" let no_pdftotext_arg = let doc = Fmt.str {|Disable use of pdftotext command. Files that require use of pdftotext are excluded. |} in Arg.(value & flag & info [ no_pdftotext_arg_name ] ~doc) let no_pandoc_arg_name = "no-pandoc" let no_pandoc_arg = let doc = Fmt.str {|Disable use of pandoc command. Files that require use of pandoc are excluded. |} in Arg.(value & flag & info [ no_pandoc_arg_name ] ~doc) let hidden_arg_name = "hidden" let hidden_arg = let doc = Fmt.str {|Scan hidden files and directories. By default, hidden files and directories are skipped. A file or directory is hidden if the base name starts with a dot, e.g. ".gitignore". |} in Arg.(value & flag & info [ hidden_arg_name ] ~doc) let max_depth_arg_name = "max-depth" let max_depth_arg = let doc = Fmt.str "Scan up to N levels when exploring file trees. This applies to directory paths provided and ** in globs. Note that --%s 0 results in no-op when scanning directories, and --%s 1 means only scanning for direct children." max_depth_arg_name max_depth_arg_name in Arg.( value & opt int Params.default_max_file_tree_scan_depth & info [ max_depth_arg_name ] ~doc ~docv:"N" ) let exts_arg_name = "exts" let exts_arg = let doc = "File extensions to use, comma separated. Leading dots of any extension are removed." in Arg.( value & opt string Params.default_recognized_exts & info [ exts_arg_name ] ~doc ~docv:"EXTS" ) let single_file_exts_arg_name = Fmt.str "single-line-%s" exts_arg_name let single_line_exts_arg = let doc = Fmt.str "Same as --%s, but use single line search mode instead. If an extension appears in both --%s and --%s, then single line search mode is used for that extension." exts_arg_name exts_arg_name single_file_exts_arg_name in Arg.( value & opt string Params.default_recognized_single_line_exts & info [ single_file_exts_arg_name ] ~doc ~docv:"EXTS" ) let add_exts_arg_name = "add-exts" let add_exts_arg = let doc = "Additional file extensions to use, comma separated. May be specified multiple times." in Arg.( value & opt_all string [] & info [ add_exts_arg_name ] ~doc ~docv:"EXTS" ) let single_line_add_exts_arg_name = Fmt.str "single-line-%s" add_exts_arg_name let single_line_add_exts_arg = let doc = Fmt.str "Same as --%s, but use single line search mode instead." add_exts_arg_name in Arg.( value & opt_all string [] & info [ single_line_add_exts_arg_name ] ~doc ~docv:"EXTS" ) let max_fuzzy_edit_dist_arg_name = "max-fuzzy-edit-dist" let max_fuzzy_edit_dist_arg = let doc = "Maximum edit distance for fuzzy matches." in Arg.( value & opt int Params.default_max_fuzzy_edit_dist & info [ max_fuzzy_edit_dist_arg_name ] ~doc ~docv:"N" ) let max_token_search_dist_arg_name = "max-token-search-dist" let max_token_search_dist_arg = let doc = "Maximum distance to look for the next matching token in document. If two tokens are adjacent, then they are 1 distance away from each other. Note that contiguous spaces count as one token as well." in Arg.( value & opt int Params.default_max_token_search_dist & info [ max_token_search_dist_arg_name ] ~doc ~docv:"N" ) let max_linked_token_search_dist_arg_name = "max-linked-token-search-dist" let max_linked_token_search_dist_arg = let doc = Fmt.str {|Similar to %s but for linked tokens. Two tokens are linked if there is no space between them in the search phrase, e.g. "-" and ">" are linked in "->" but not in "- >", "and" "/" "or" are linked in "and/or" but not in "and / or".|} max_token_search_dist_arg_name in Arg.( value & opt int Params.default_max_linked_token_search_dist & info [ max_linked_token_search_dist_arg_name ] ~doc ~docv:"N" ) let tokens_per_search_scope_level_arg_name = "tokens-per-search-scope-level" let tokens_per_search_scope_level_arg = let doc = Fmt.str {|Number of tokens to use around the current search results for each search scope level in narrow mode.|} in Arg.( value & opt int Params.default_tokens_per_search_scope_level & info [ tokens_per_search_scope_level_arg_name ] ~doc ~docv:"N" ) let index_chunk_size_arg_name = "index-chunk-size" let index_chunk_size_arg = let doc = "Number of tokens to send as a job unit to the thread pool for indexing." in Arg.( value & opt int Params.default_index_chunk_size & info [ index_chunk_size_arg_name ] ~doc ~docv:"N" ) let cache_dir_arg = let doc = "Docfd cache directory, mainly for index DB." in let cache_home = Xdg_utils.cache_home in Arg.( value & opt string (Filename.concat cache_home "docfd") & info [ "cache-dir" ] ~doc ~docv:"DIR" ) let cache_limit_arg_name = "cache-limit" let cache_limit_arg = let doc = "Maximum number of documents to keep in index. Docfd resets the cache to this limit at launch." in Arg.( value & opt int Params.default_cache_limit & info [ cache_limit_arg_name ] ~doc ~docv:"N" ) let data_dir_arg = let doc = "Docfd data directory." in let data_home = Xdg_utils.data_home in Arg.( value & opt string (Filename.concat data_home "docfd") & info [ "data-dir" ] ~doc ~docv:"DIR" ) let index_only_arg = let doc = Fmt.str "Exit after indexing." in Arg.(value & flag & info [ "index-only" ] ~doc) let debug_log_arg = let doc = Fmt.str "Specify debug log file to use and enable debug mode where additional checks are enabled and additional info is displayed on UI. If FILE is -, then debug log is printed to stderr instead. Otherwise FILE is opened in append mode for log writing." in Arg.( value & opt (some string) None & info [ "debug-log" ] ~doc ~docv:"FILE" ) let start_with_filter_arg_name = "start-with-filter" let start_with_filter_arg = let doc = Fmt.str "Start interactive mode with an initial filter using expression EXP." in Arg.( value & opt (some string) None & info [ start_with_filter_arg_name ] ~doc ~docv:"EXP" ) let start_with_search_arg_name = "start-with-search" let start_with_search_arg = let doc = Fmt.str "Start interactive mode with search expression EXP." in Arg.( value & opt (some string) None & info [ start_with_search_arg_name ] ~doc ~docv:"EXP" ) let sample_arg_name = "sample" let samples_per_doc_arg_name = "samples-per-doc" let sample_arg = let doc = Fmt.str "Search with expression EXP in non-interactive mode but only show top N results where N is controlled by --%s." samples_per_doc_arg_name in Arg.( value & opt (some string) None & info [ sample_arg_name ] ~doc ~docv:"EXP" ) let samples_per_doc_arg = let doc = Fmt.str "Number of search results to show per document when --%s is used or when samples printing is triggered." sample_arg_name in Arg.( value & opt int Params.default_samples_per_document & info [ samples_per_doc_arg_name ] ~doc ~docv:"N" ) let search_arg_name = "search" let search_arg = let doc = "Search with expression EXP in non-interactive mode and show all results." in Arg.( value & opt (some string) None & info [ search_arg_name ] ~doc ~docv:"EXP" ) let filter_arg_name = "filter" let filter_arg = let doc = Fmt.str "Filter with expression EXP in non-interactive mode. May be combined with --%s or --%s." search_arg_name sample_arg_name in Arg.( value & opt (some string) None & info [ filter_arg_name ] ~doc ~docv:"EXP" ) let sort_arg_name = "sort" let sort_arg = let doc = Fmt.str "Sort document by: TYPE,ORDER. TYPE is one of: path, path-date, score, mod-time. ORDER is one of: asc, desc." in Arg.( value & opt string Params.default_sort_by_arg & info [ sort_arg_name ] ~doc ~docv:"TYPE,ORDER" ) let style_mode_options = [ ("never", `Never); ("always", `Always); ("auto", `Auto) ] let color_arg = let doc = Fmt.str "Set color mode for search result printing, one of: %s." (String.concat ", " (List.map fst style_mode_options)) in Arg.( value & opt (Arg.enum style_mode_options) `Auto & info [ "color" ] ~doc ~docv:"MODE" ) let underline_arg = let doc = Fmt.str "Set underline mode for search result printing, one of: %s." (String.concat ", " (List.map fst style_mode_options)) in Arg.( value & opt (Arg.enum style_mode_options) `Auto & info [ "underline" ] ~doc ~docv:"MODE" ) let search_result_print_text_width_arg_name = "search-result-print-text-width" let search_result_print_text_width_arg = let doc = "Text width to use when printing search results." in Arg.( value & opt int Params.default_search_result_print_text_width & info [ search_result_print_text_width_arg_name ] ~doc ~docv:"N" ) let search_result_print_snippet_min_size_arg_name = "search-result-print-snippet-min-size" let search_result_print_snippet_min_size_arg = let doc = "If the search result to be printed has fewer than N non-space tokens, then Docfd tries to add surrounding lines to the snippet to give better context." in Arg.( value & opt int Params.default_search_result_print_snippet_min_size & info [ search_result_print_snippet_min_size_arg_name ] ~doc ~docv:"N" ) let search_result_print_snippet_max_add_lines_arg_name = "search-result-print-snippet-max-add-lines" let search_result_print_snippet_max_add_lines_arg = let doc = "This controls the maximum number of surrounding lines Docfd can add in each direction." in Arg.( value & opt int Params.default_search_result_print_snippet_max_additional_lines_each_direction & info [ search_result_print_snippet_max_add_lines_arg_name ] ~doc ~docv:"N" ) let script_arg_name = "script" let script_arg = let doc = Fmt.str "Read and run Docfd script FILE." in Arg.( value & opt (some string) None & info [ script_arg_name ] ~doc ~docv:"FILE" ) let start_with_script_arg_name = "start-with-script" let start_with_script_arg = let doc = Fmt.str "Read and run Docfd script FILE, then continue in interactive mode." in Arg.( value & opt (some string) None & info [ start_with_script_arg_name ] ~doc ~docv:"FILE" ) let paths_from_arg_name = "paths-from" let paths_from_arg = let doc = Fmt.str "Read list of paths from FILES, which is a comma separated list of files, and add to the final list of paths to be scanned. For example, \"--%s path-list0.txt,path-list1.txt\". If - is in FILES, then stdin is also read for list of paths to be scanned. This is useful for piping, e.g. \"find -name '*.txt' | docfd --%s -\"" paths_from_arg_name paths_from_arg_name in Arg.( value & opt_all string [] & info [ paths_from_arg_name ] ~doc ~docv:"FILES" ) let glob_arg_name = "glob" let glob_arg = let doc = "Add to the final list of paths to be scanned using glob pattern. The pattern should pick up the files directly. Directories picked up by the pattern are not further scanned for files with suitable extensions." in Arg.( value & opt_all string [] & info [ glob_arg_name ] ~doc ~docv:"PATTERN" ) let single_line_glob_arg_name = Fmt.str "single-line-%s" glob_arg_name let single_line_glob_arg = let doc = Fmt.str "Same as --%s, but use single line search mode instead. If the file are picked up by both patterns from --%s and --%s, then single line search mode is used." glob_arg_name glob_arg_name single_line_glob_arg_name in Arg.( value & opt_all string [] & info [ single_line_glob_arg_name ] ~doc ~docv:"PATTERN" ) let single_line_arg = let doc = "Use single line search mode by default." in Arg.( value & flag & info [ "single-line" ] ~doc ) let open_with_arg_name = "open-with" let open_with_arg = let doc = Fmt.str "Specify custom command CMD for opening files with file extension EXT. May be specified multiple times. Leading dots of EXT are removed. LAUNCH_MODE specifies how the command should be executed: `terminal` - for commands which Docfd should run in the terminal and wait for completion, e.g. text editors, pagers. `detached` - for background commands, such as PDF viewers or other GUI tools. CMD may contain the following placeholders: {path} - file path, {page_num} - page number (PDF only), {line_num} - line number (not available in PDF), {search_word} - most unique word of the page (PDF only, useful for passing to PDF viewers as search term). Examples: \"pdf:detached='okular --page {page_num} --find {search_word} {path}'\", \"txt:terminal='nano +{line_num} {path}'\". " in Arg.( value & opt_all string [] & info [ open_with_arg_name ] ~doc ~docv:"EXT:LAUNCH_MODE=CMD" ) let files_with_match_arg_name = "files-with-match" let files_with_match_arg = let doc = Fmt.str "If paired with --%s or --%s, then print the paths of documents with at least one match instead of printing the search results. If paired with --%s, then print paths of documents that would have be listed in the UI after running the commands in interactive mode." search_arg_name sample_arg_name script_arg_name in Arg.( value & flag & info [ "l"; files_with_match_arg_name ] ~doc ) let files_without_match_arg_name = "files-without-match" let files_without_match_arg = let doc = Fmt.str "If paired with --%s or --%s, then print the paths of documents with no matches instead of printing the search results. Cannot be paired with --%s." search_arg_name sample_arg_name script_arg_name in Arg.( value & flag & info [ files_without_match_arg_name ] ~doc ) let sort_no_score_arg_name = "sort-no-score" let sort_no_score_arg = let doc = Fmt.str "Same as --%s but sorting TYPE cannot be score. Used for scenarios when no scores are available, e.g. --%s is used." sort_no_score_arg_name files_without_match_arg_name in Arg.( value & opt string Params.default_sort_by_no_score_arg & info [ sort_no_score_arg_name ] ~doc ~docv:"TYPE,ORDER" ) let paths_arg = let doc = Fmt.str "PATH can be either file or directory. Directories are scanned for files with matching extensions. If no paths are provided, then Docfd defaults to scanning the current working directory unless any of the following is used: %a. To use piped stdin as input, the list of paths must be empty." Fmt.(list ~sep:comma (fun fmt s -> Fmt.pf fmt "--%s" s)) [ paths_from_arg_name; glob_arg_name; single_line_glob_arg_name ] in Arg.(value & pos_all string [] & info [] ~doc ~docv:"PATH") let check ~max_depth ~max_fuzzy_edit_dist ~max_token_search_dist ~max_linked_token_search_dist ~tokens_per_search_scope_level ~index_chunk_size ~cache_limit ~start_with_filter ~start_with_search ~filter_exp ~sample_search_exp ~samples_per_doc ~search_exp ~search_result_print_text_width ~search_result_print_snippet_min_size ~search_result_print_max_add_lines ~start_with_script ~script ~paths_from ~print_files_with_match ~print_files_without_match = if max_depth < 0 then ( exit_with_error_msg (Fmt.str "invalid %s: cannot be < 0" max_depth_arg_name) ); if max_fuzzy_edit_dist < 0 then ( exit_with_error_msg (Fmt.str "invalid %s: cannot be < 0" max_fuzzy_edit_dist_arg_name) ); if max_token_search_dist < 1 then ( exit_with_error_msg (Fmt.str "invalid %s: cannot be < 1" max_token_search_dist_arg_name) ); if max_linked_token_search_dist < 1 then ( exit_with_error_msg (Fmt.str "invalid %s: cannot be < 1" max_linked_token_search_dist_arg_name) ); if tokens_per_search_scope_level < 1 then ( exit_with_error_msg (Fmt.str "invalid %s: cannot be < 1" tokens_per_search_scope_level_arg_name) ); if index_chunk_size < 1 then ( exit_with_error_msg (Fmt.str "invalid %s: cannot be < 1" index_chunk_size_arg_name) ); if cache_limit < 1 then ( exit_with_error_msg (Fmt.str "invalid %s: cannot be < 1" cache_limit_arg_name) ); if samples_per_doc < 1 then ( exit_with_error_msg (Fmt.str "invalid %s: cannot be < 1" samples_per_doc_arg_name) ); if search_result_print_text_width < 1 then ( exit_with_error_msg (Fmt.str "invalid %s: cannot be < 1" search_result_print_text_width_arg_name) ); if search_result_print_snippet_min_size < 0 then ( exit_with_error_msg (Fmt.str "invalid %s: cannot be < 0" search_result_print_snippet_min_size_arg_name) ); if search_result_print_max_add_lines < 0 then ( exit_with_error_msg (Fmt.str "invalid %s: cannot be < 0" search_result_print_snippet_max_add_lines_arg_name) ); if Option.is_some filter_exp then ( if not ( Option.is_some sample_search_exp || Option.is_some search_exp || print_files_with_match || print_files_without_match ) then ( exit_with_error_msg (Fmt.str "--%s must be used with at least one of: --%s, --%s, --%s, --%s" filter_arg_name search_arg_name sample_arg_name files_with_match_arg_name files_without_match_arg_name ) ) ); let cannot_be_used_together x y = exit_with_error_msg (Fmt.str "--%s and --%s cannot be used together" x y) in (match print_files_with_match, print_files_without_match with | true, true -> ( cannot_be_used_together files_with_match_arg_name files_without_match_arg_name ) | true, false -> ( if not ( Option.is_some filter_exp || Option.is_some sample_search_exp || Option.is_some search_exp || Option.is_some script ) then ( exit_with_error_msg (Fmt.str "--%s cannot be used without one of: --%s, --%s, --%s, --%s" files_with_match_arg_name filter_arg_name sample_arg_name search_arg_name script_arg_name ) ) ) | false, true -> ( if not ( Option.is_some filter_exp || Option.is_some sample_search_exp || Option.is_some search_exp ) then ( exit_with_error_msg (Fmt.str "--%s cannot be used without one of: --%s, --%s, --%s" files_without_match_arg_name filter_arg_name sample_arg_name search_arg_name ) ) ) | false, false -> () ); ( let l = List.filter (fun x -> x = "-") paths_from in if List.length l > 1 then ( exit_with_error_msg (Fmt.str "at most one \"-\" may be supplied to --%s" paths_from_arg_name) ) ); (match filter_exp with | None -> () | Some filter_exp_string -> ( match Filter_exp.parse filter_exp_string with | None -> ( exit_with_error_msg "failed to parse filter exp" ) | Some _ -> () ) ); (match sample_search_exp, search_exp with | None, None -> () | Some _, Some _ -> ( exit_with_error_msg (Fmt.str "%s and %s cannot be used together" sample_arg_name search_arg_name) ) | Some search_exp_string, None | None, Some search_exp_string -> ( match Search_exp.parse search_exp_string with | None -> ( exit_with_error_msg "failed to parse search exp" ) | Some _ -> () ) ); let start_with_arg_check ~arg_name = if Option.is_some filter_exp then ( cannot_be_used_together arg_name filter_arg_name ); if Option.is_some sample_search_exp then ( cannot_be_used_together arg_name sample_arg_name ); if Option.is_some search_exp then ( cannot_be_used_together arg_name search_arg_name ); in if Option.is_some start_with_filter then ( start_with_arg_check ~arg_name:start_with_filter_arg_name ); if Option.is_some start_with_search then ( start_with_arg_check ~arg_name:start_with_search_arg_name ); if Option.is_some start_with_script then ( start_with_arg_check ~arg_name:start_with_script_arg_name ); let script_common_check ~arg_name = if Option.is_some filter_exp then ( cannot_be_used_together arg_name filter_arg_name ); if Option.is_some sample_search_exp then ( cannot_be_used_together arg_name sample_arg_name ); if Option.is_some search_exp then ( cannot_be_used_together arg_name search_arg_name ); if Option.is_some start_with_filter then ( cannot_be_used_together arg_name start_with_filter_arg_name ); if Option.is_some start_with_search then ( cannot_be_used_together arg_name start_with_search_arg_name ); if print_files_without_match then ( cannot_be_used_together arg_name files_without_match_arg_name ); in if Option.is_some script then ( script_common_check ~arg_name:script_arg_name; if Option.is_some start_with_script then ( cannot_be_used_together script_arg_name start_with_script_arg_name ); ); if Option.is_some start_with_script then ( script_common_check ~arg_name:start_with_script_arg_name; ) ================================================ FILE: bin/clipboard.ml ================================================ let pipe_to_clipboard (f : out_channel -> unit) : unit = match Params.clipboard_copy_cmd_and_args with | None -> () | Some (cmd, args) -> ( Proc_utils.pipe_to_command f cmd args ) ================================================ FILE: bin/command.ml ================================================ open Docfd_lib module Sort_by = struct type typ = [ | `Path_date | `Path | `Score | `Mod_time ] type t = typ * Document.Compare.order let default : t = (`Score, `Desc) let default_no_score : t = (`Path, `Asc) let pp formatter ((typ, order) : t) = Fmt.pf formatter "%s,%s" (match typ with | `Path_date -> "path-date" | `Path -> "path" | `Score -> "score" | `Mod_time -> "mod-time" ) (match order with | `Asc -> "asc" | `Desc -> "desc" ) let p ~no_score : t Angstrom.t = let open Angstrom in let open Parser_components in skip_spaces *> (choice (List.filter_map Fun.id ([ Some (string "path-date" *> return `Path_date); Some (string "path" *> return `Path); (if no_score then None else Some (string "score" *> return `Score)); Some (string "mod-time" *> return `Mod_time); ])) <|> (take_while (fun c -> is_not_space c && c <> ',') >>= fun s -> fail (Fmt.str "unrecognized sort by type: %s" s)) ) >>= fun typ -> skip_spaces *> char ',' *> skip_spaces *> (choice [ string "asc" *> return `Asc; string "desc" *> return `Desc; ] <|> (take_while is_not_space >>= fun s -> fail (Fmt.str "unrecognized sort by order: %s" s)) ) >>= fun order -> ( return (typ, order) ) let parse ~no_score s = match Angstrom.(parse_string ~consume:Consume.All) (p ~no_score) s with | Ok t -> Ok t | Error msg -> Error msg end type screen_split = [ | `Even | `Focus_left | `Wide_left | `Focus_right | `Wide_right ] let screen_split_of_int (x : int) : screen_split = if x <= 0 then `Focus_right else if x = 1 then `Wide_right else if x = 2 then `Even else if x = 3 then `Wide_left else `Focus_left let int_of_screen_split (x : screen_split) = match x with | `Focus_right -> 0 | `Wide_right -> 1 | `Even -> 2 | `Wide_left -> 3 | `Focus_left -> 4 type pane = [ | `Bottom_right | `Key_binding_info ] let string_of_pane (x : pane) = match x with | `Bottom_right -> "bottom-right" | `Key_binding_info -> "key-binding-info" type t = [ | `Mark of string | `Mark_listed | `Unmark of string | `Unmark_listed | `Unmark_all | `Drop of string | `Drop_all_except of string | `Drop_marked | `Drop_unmarked | `Drop_listed | `Drop_unlisted | `Narrow_level of int | `Sort of Sort_by.t * Sort_by.t | `Path_fuzzy_rank of string * int String_map.t option | `Split_screen of screen_split | `Hide_pane of pane | `Show_pane of pane | `Comment of string | `Focus of string | `Search of string | `Filter of string ] let pp fmt (t : t) = match t with | `Mark s -> Fmt.pf fmt "mark: %s" s | `Mark_listed -> Fmt.pf fmt "mark listed" | `Unmark s -> Fmt.pf fmt "unmark: %s" s | `Unmark_listed -> Fmt.pf fmt "unmark listed" | `Unmark_all -> Fmt.pf fmt "unmark all" | `Drop s -> Fmt.pf fmt "drop: %s" s | `Drop_all_except s -> Fmt.pf fmt "drop all except: %s" s | `Drop_marked -> Fmt.pf fmt "drop marked" | `Drop_unmarked -> Fmt.pf fmt "drop unmarked" | `Drop_listed -> Fmt.pf fmt "drop listed" | `Drop_unlisted -> Fmt.pf fmt "drop unlisted" | `Narrow_level x -> Fmt.pf fmt "narrow level: %d" x | `Sort (x, y) -> ( Fmt.pf fmt "sort by: %a; %a" Sort_by.pp x Sort_by.pp y ) | `Path_fuzzy_rank (s, _ranking) -> ( Fmt.pf fmt "path fuzzy rank: %s" s ) | `Split_screen s -> ( Fmt.pf fmt "split screen: %s" (match s with | `Even -> "even" | `Focus_left -> "focus-left" | `Wide_left -> "wide-left" | `Focus_right -> "focus-right" | `Wide_right -> "wide-right" ) ) | `Hide_pane pane -> ( Fmt.pf fmt "hide-pane: %s" (string_of_pane pane) ) | `Show_pane pane -> ( Fmt.pf fmt "show-pane: %s" (string_of_pane pane) ) | `Comment s -> Fmt.pf fmt "#%s" s | `Focus s -> Fmt.pf fmt "focus: %s" s | `Search s -> ( if String.length s = 0 then ( Fmt.pf fmt "clear search" ) else ( Fmt.pf fmt "search: %s" s ) ) | `Filter s -> ( if String.length s = 0 then ( Fmt.pf fmt "clear filter" ) else ( Fmt.pf fmt "filter: %s" s ) ) let to_string (t : t) = Fmt.str "%a" pp t module Parsers = struct type t' = t open Angstrom open Parser_components let any_string_trimmed = any_string >>| String.trim let p : t' Angstrom.t = skip_spaces *> choice [ string "mark" *> skip_spaces *> ( choice [ char ':' *> skip_spaces *> any_string_trimmed >>| (fun s -> (`Mark s)); string "listed" *> skip_spaces *> return `Mark_listed; ] ); string "unmark" *> skip_spaces *> ( choice [ string "listed" *> skip_spaces *> return `Unmark_listed; string "all" *> skip_spaces *> return `Unmark_all; char ':' *> skip_spaces *> any_string_trimmed >>| (fun s -> (`Unmark s)); ] ); string "drop" *> skip_spaces *> ( choice [ char ':' *> skip_spaces *> any_string_trimmed >>| (fun s -> (`Drop s)); string "all" *> skip_spaces *> string "except" *> skip_spaces *> char ':' *> skip_spaces *> any_string_trimmed >>| (fun s -> (`Drop_all_except s)); string "listed" *> skip_spaces *> return `Drop_listed; string "unlisted" *> skip_spaces *> return `Drop_unlisted; string "marked" *> skip_spaces *> return `Drop_marked; string "unmarked" *> skip_spaces *> return `Drop_unmarked; ] ); string "narrow" *> skip_spaces *> ( choice [ string "level" *> skip_spaces *> char ':' *> skip_spaces *> satisfy (function '0'..'9' -> true | _ -> false) <* skip_spaces >>| (fun c -> `Narrow_level (Char.code c - Char.code '0')); ] ); string "clear" *> skip_spaces *> ( choice [ string "search" *> skip_spaces *> return (`Search ""); string "filter" *> skip_spaces *> return (`Filter ""); ] ); string "path" *> skip_spaces *> string "fuzzy" *> skip_spaces *> string "rank" *> skip_spaces *> char ':' *> skip_spaces *> any_string_trimmed >>| (fun s -> `Path_fuzzy_rank (s, None)); (string "sort" *> skip_spaces *> string "by" *> skip_spaces *> char ':' *> skip_spaces *> Sort_by.p ~no_score:false >>= fun sort_by -> skip_spaces *> char ';' *> skip_spaces *> Sort_by.p ~no_score:true >>= fun sort_by_no_score -> skip_spaces *> return (`Sort (sort_by, sort_by_no_score))); string "focus" *> skip_spaces *> char ':' *> skip_spaces *> any_string_trimmed >>| (fun s -> (`Focus s)); string "split" *> skip_spaces *> string "screen" *> skip_spaces *> char ':' *> skip_spaces *> ( choice [ string "even" *> skip_spaces *> return (`Split_screen `Even); string "focus-left" *> skip_spaces *> return (`Split_screen `Focus_left); string "wide-left" *> skip_spaces *> return (`Split_screen `Wide_left); string "focus-right" *> skip_spaces *> return (`Split_screen `Focus_right); string "wide-right" *> skip_spaces *> return (`Split_screen `Wide_right); ] ); string "hide-pane" *> skip_spaces *> char ':' *> skip_spaces *> ( choice [ string "bottom-right" *> skip_spaces *> return (`Hide_pane `Bottom_right); string "key-binding-info" *> skip_spaces *> return (`Hide_pane `Key_binding_info); ] ); string "show-pane" *> skip_spaces *> char ':' *> skip_spaces *> ( choice [ string "bottom-right" *> skip_spaces *> return (`Show_pane `Bottom_right); string "key-binding-info" *> skip_spaces *> return (`Show_pane `Key_binding_info); ] ); string "#" *> any_string >>| (fun s -> (`Comment s)); string "search" *> skip_spaces *> char ':' *> skip_spaces *> any_string_trimmed >>| (fun s -> (`Search s)); string "filter" *> skip_spaces *> char ':' *> skip_spaces *> any_string_trimmed >>| (fun s -> (`Filter s)); ] end let of_string (s : string) : t option = match Angstrom.(parse_string ~consume:Consume.All) Parsers.p s with | Ok t -> Some t | Error _ -> None ================================================ FILE: bin/content_and_search_result_rendering.ml ================================================ open Docfd_lib module I = Notty.I module A = Notty.A type cell_typ = [ | `Plain | `Search_result ] type cell = { word : string; typ : cell_typ; } module Text_block_rendering = struct let hchunk_rev ~width (img : Notty.image) : Notty.image list = let open Notty in let rec aux acc img = let img_width = I.width img in if img_width <= width then ( img :: acc ) else ( let acc = (I.hcrop 0 (img_width - width) img) :: acc in aux acc (I.hcrop width 0 img) ) in aux [] img let of_cells ?attr ~width ?(underline = false) (cells : cell list) : Notty.image * Int_set.t = let open Notty.Infix in assert (width > 0); let rendered_lines_with_search_result_words = ref Int_set.empty in let grid : Notty.image list list = List.fold_left (fun ((cur_len, acc) : int * Notty.image list list) (cell : cell) -> let attr = match attr with | Some attr -> attr | None -> (match cell.typ with | `Plain -> A.empty | `Search_result -> A.(fg black ++ bg lightyellow) ) in let word = (match I.string attr cell.word with | s -> s | exception _ -> ( I.string A.(fg lightred) (String.make (String.length cell.word) '?') )) in let word_len = I.width word in let word = match cell.typ with | `Plain -> word | `Search_result -> ( if underline then ( word <-> (I.string A.empty (String.make word_len '^')) ) else ( word ) ) in let new_len = cur_len + word_len in let cur_len, acc = if new_len <= width then ( match acc with | [] -> (new_len, [ [ word ] ]) | line :: rest -> ( (new_len, (word :: line) :: rest) ) ) else ( if word_len <= width then ( (word_len, [ word ] :: acc) ) else ( let lines = hchunk_rev ~width word |> List.map (fun x -> [ x ]) in (0, [] :: (lines @ acc)) ) ) in (match cell.typ with | `Plain -> () | `Search_result -> ( rendered_lines_with_search_result_words := Int_set.add (List.length acc - 1) !rendered_lines_with_search_result_words )); (cur_len, acc) ) (0, []) cells |> snd |> List.rev_map List.rev in let img = grid |> List.map I.hcat |> I.vcat in (img, !rendered_lines_with_search_result_words) let of_words ?attr ~width ?underline ?(highlights = Int_set.empty) (words : string list) : Notty.image = of_cells ?attr ~width ?underline (List.mapi (fun i word -> if Int_set.mem i highlights then ( { word; typ = `Search_result } ) else ( { word; typ = `Plain } ) ) words) |> fst end type word_grid = { start_global_line_num : int; data : cell array array; } let start_and_end_inc_global_line_num_of_search_result ~doc_id (search_result : Search_result.t) : (int * int) = match Search_result.found_phrase search_result with | [] -> failwith "unexpected case" | l -> ( List.fold_left (fun s_e Search_result.{ found_word_pos; _ } -> let loc = Index.loc_of_pos ~doc_id found_word_pos in let line_loc = Index.Loc.line_loc loc in let global_line_num = Index.Line_loc.global_line_num line_loc in match s_e with | None -> ( Some (global_line_num, global_line_num) ) | Some (s, e) -> ( Some (min s global_line_num, max global_line_num e) ) ) None l |> Option.get ) let word_grid_of_index ~doc_id ~start_global_line_num ~end_inc_global_line_num : word_grid = let global_line_count = Index.global_line_count ~doc_id in let check x = assert (0 <= x); assert (x <= global_line_count - 1); in check start_global_line_num; check end_inc_global_line_num; if global_line_count = 0 then ( { start_global_line_num = 0; data = [||] } ) else ( let data = OSeq.(start_global_line_num -- end_inc_global_line_num) |> Seq.map (fun global_line_num -> let data = Index.words_of_global_line_num ~doc_id global_line_num |> Dynarray.to_seq |> Seq.map (fun word -> { word; typ = `Plain }) |> Array.of_seq in data ) |> Array.of_seq in { start_global_line_num; data } ) let mark_in_word_grid ~doc_id (grid : word_grid) (positions : int list) : unit = let grid_end_inc_global_line_num = grid.start_global_line_num + Array.length grid.data - 1 in List.iter (fun pos -> let loc = Index.loc_of_pos ~doc_id pos in let line_loc = Index.Loc.line_loc loc in let global_line_num = Index.Line_loc.global_line_num line_loc in if grid.start_global_line_num <= global_line_num && global_line_num <= grid_end_inc_global_line_num then ( let pos_in_line = Index.Loc.pos_in_line loc in let row = global_line_num - grid.start_global_line_num in let cell = grid.data.(row).(pos_in_line) in grid.data.(row).(pos_in_line) <- { cell with typ = `Search_result } ) ) positions let mark_search_result_in_word_grid ~doc_id (grid : word_grid) (search_result : Search_result.t) : unit = Search_result.found_phrase search_result |> List.map (fun Search_result.{ found_word_pos; _ } -> found_word_pos ) |> mark_in_word_grid ~doc_id grid let mark_link_in_word_grid ~doc_id (grid : word_grid) (link : Link.t) : unit = let { Link.start_pos; end_inc_pos; _ } = link in CCList.(start_pos -- end_inc_pos) |> mark_in_word_grid ~doc_id grid type render_mode = [ | `Page_num_only | `Line_num_only | `Page_and_line_num | `None ] let render_grid ~doc_id ~(view_offset : int Lwd.var option) ~(render_mode : render_mode) ~width ?(height : int option) ?underline (grid : word_grid) : Notty.image = let (_rendered_line_count, rendered_lines_with_search_result_words), images = grid.data |> Array.to_list |> CCList.fold_map_i (fun (rendered_line_count, rendered_lines_with_search_result_words_acc) i cells -> let cells = Array.to_list cells in let global_line_num = grid.start_global_line_num + i in let line_loc = Index.line_loc_of_global_line_num ~doc_id global_line_num in let displayed_line_num = Index.Line_loc.line_num_in_page line_loc + 1 in let displayed_page_num = Index.Line_loc.page_num line_loc + 1 in let left_column_label = match render_mode with | `Page_num_only -> ( I.hcat [ I.strf ~attr:A.(fg lightyellow) "Page %d" displayed_page_num ; I.strf ": " ] ) | `Line_num_only -> ( I.hcat [ I.strf ~attr:A.(fg lightyellow) "%d" displayed_line_num ; I.strf ": " ] ) | `Page_and_line_num -> ( I.hcat [ I.strf ~attr:A.(fg lightyellow) "Page %d, %d" displayed_page_num displayed_line_num ; I.strf ": " ] ) | `None -> ( I.void 0 1 ) in let content_width = max 1 (width - I.width left_column_label) in let content, rendered_lines_with_search_result_words = Text_block_rendering.of_cells ?underline ~width:content_width cells in ((rendered_line_count + I.height content, rendered_lines_with_search_result_words |> Int_set.map (fun x -> x + rendered_line_count) |> Int_set.union rendered_lines_with_search_result_words_acc ), I.hcat [ left_column_label; content ]) ) (0, Int_set.empty) in let img = I.vcat images in match height with | None -> img | Some height -> ( let focal_point_offset = match Int_set.min_elt_opt rendered_lines_with_search_result_words, Int_set.max_elt_opt rendered_lines_with_search_result_words with | Some start_rendered_line_num, Some end_inc_rendered_line_num -> ( Misc_utils.div_round_to_closest (start_rendered_line_num + end_inc_rendered_line_num) 2 ) | _, _ -> 0 in let target_region_start = max 0 (focal_point_offset - (Misc_utils.div_round_to_closest height 2)) in let img_height = I.height img in let target_region_end_exc = min img_height (target_region_start + height) in let view_offset_old = match view_offset with | None -> 0 | Some x -> Lwd.peek x in let view_offset' = if view_offset_old >= 0 then ( min view_offset_old (img_height - target_region_end_exc) ) else ( let view_offset_old = Int.abs view_offset_old in - (min view_offset_old (target_region_start - 0)) ) in Option.iter (fun x -> if view_offset_old <> view_offset' then ( Lwd.set x view_offset' )) view_offset; let target_region_start, target_region_end_exc = if view_offset' >= 0 then ( (target_region_start + view_offset', target_region_end_exc + view_offset') ) else ( (* If the offset is negative (shifting view upwards), then make the bottom border "sticky". In other words, if the height of view window is smaller than the height of pane, and the view is shifting upwards, don't bother moving the bottom border. This prevents the rendered content snippet view staying small and cropping the bottom text when scrolling up if the view started out being small (due to the search result being close to the bottom of the file). *) let view_window_height = target_region_end_exc - target_region_start in (target_region_start + view_offset', if view_window_height < height then ( target_region_end_exc ) else ( target_region_end_exc + view_offset' ) ) ) in I.vcrop target_region_start (img_height - target_region_end_exc) img ) let content_snippet ~doc_id ~(view_offset : int Lwd.var) ?(data : [ `Search_result of Search_result.t | `Link of Link.t ] option) ~(width : int) ~(height : int) ?underline () : Notty.image = let max_end_inc_global_line_num = Index.global_line_count ~doc_id - 1 in assert (height > 0); let compute_final_line_num_range ~(view_offset : int Lwd.var) ~start_global_line_num : int * int = let end_inc_global_line_num = min max_end_inc_global_line_num (start_global_line_num + height - 1) in (* We grow the area in one direction rather than shifting the area, in order to not interfere with the focal point offset computation in render_grid. The number of lines to grow is an overapproximation of the actual lines required, as a line may wrap into multiple rendered lines in the rendered view if it is longer than the width of the content pane. But we do not know how many lines (or partial segments of lines) exactly until we actually render the view/word grid. *) let view_offset' = Lwd.peek view_offset in if view_offset' >= 0 then ( let end_inc_global_line_num = min max_end_inc_global_line_num (end_inc_global_line_num + view_offset') in (start_global_line_num, end_inc_global_line_num) ) else ( let start_global_line_num = max 0 (start_global_line_num - Int.abs view_offset') in (start_global_line_num, end_inc_global_line_num) ) in match data with | None -> ( let start_global_line_num, end_inc_global_line_num = compute_final_line_num_range ~view_offset ~start_global_line_num:0 in let grid = word_grid_of_index ~doc_id ~start_global_line_num ~end_inc_global_line_num in render_grid ~doc_id ~view_offset:(Some view_offset) ~render_mode:`None ~width ~height ?underline grid ) | Some data -> ( let focal_line = match data with | `Search_result search_result -> ( let (relevant_start_line, relevant_end_inc_line) = start_and_end_inc_global_line_num_of_search_result ~doc_id search_result in let avg = (relevant_start_line + relevant_end_inc_line) / 2 in avg ) | `Link link -> ( let loc = Index.loc_of_pos ~doc_id link.Link.start_pos in let line_loc = Index.Loc.line_loc loc in Index.Line_loc.global_line_num line_loc ) in let start_global_line_num, end_inc_global_line_num = compute_final_line_num_range ~view_offset ~start_global_line_num:( max 0 (focal_line - (Misc_utils.div_round_to_closest height 2)) ) in let grid = word_grid_of_index ~doc_id ~start_global_line_num ~end_inc_global_line_num in (match data with | `Search_result search_result -> ( mark_search_result_in_word_grid ~doc_id grid search_result ) | `Link link -> ( mark_link_in_word_grid ~doc_id grid link ) ); render_grid ~doc_id ~view_offset:(Some view_offset) ~render_mode:`None ~width ~height ?underline grid ) let word_is_not_space s = String.length s > 0 && not (Parser_components.is_space s.[0]) let grab_additional_lines ~doc_id ~non_space_word_count start_global_line_num end_inc_global_line_num : int * int = let max_end_inc_global_line_num = Index.global_line_count ~doc_id - 1 in let non_space_word_count_of_line n = Index.words_of_global_line_num ~doc_id n |> Dynarray.to_seq |> Seq.filter word_is_not_space |> Seq.length in let rec aux ~non_space_word_count ~i x y = if i < !Params.search_result_print_snippet_max_additional_lines_each_direction && non_space_word_count < !Params.search_result_print_snippet_min_size then ( let x, top_add_count = let n = x - 1 in if n >= 0 then ( (n, non_space_word_count_of_line n) ) else ( (x, 0) ) in let y, bottom_add_count = let n = y + 1 in if n <= max_end_inc_global_line_num then ( (n, non_space_word_count_of_line n) ) else ( (y, 0) ) in let non_space_word_count = non_space_word_count + top_add_count + bottom_add_count in aux ~non_space_word_count ~i:(i + 1) x y ) else ( (x, y) ) in aux ~non_space_word_count ~i:0 start_global_line_num end_inc_global_line_num let search_result ~doc_id ~render_mode ~width ?underline ?(fill_in_context = false) (search_result : Search_result.t) : Notty.image = let open Notty in let open Notty.Infix in let (start_global_line_num, end_inc_global_line_num) = start_and_end_inc_global_line_num_of_search_result ~doc_id search_result |> (fun (x, y) -> if fill_in_context then ( let non_space_word_count = Search_result.search_phrase search_result |> Search_phrase.enriched_tokens |> List.filter_map (fun token -> match Search_phrase.Enriched_token.data token with | `Explicit_spaces -> None | `String s -> ( assert (word_is_not_space s); Some s ) ) |> List.length in grab_additional_lines ~doc_id ~non_space_word_count x y ) else ( (x, y) ) ) in let grid = word_grid_of_index ~doc_id ~start_global_line_num ~end_inc_global_line_num in mark_search_result_in_word_grid ~doc_id grid search_result; let img = render_grid ~doc_id ~view_offset:None ~render_mode ~width ?underline grid in if Option.is_some !Params.debug_output then ( let score = Search_result.score search_result in I.strf "(Score: %f)" score <-> img ) else ( img ) let centered_list ~width ~height ~(render : width:int -> 'a -> Notty.image) (selection : int) (l : 'a list) : Notty.image = let height_rendered_before_selection = ref 0 in let img = l |> List.mapi (fun i x -> let img = let x = if i = selection then ( let img = render ~width:(width - 2) x in Notty.I.(strf ~attr:A.(fg lightyellow) "> " <|> img ) ) else ( render ~width x ) in Notty.I.(x <-> strf "") in if i < selection then ( height_rendered_before_selection := !height_rendered_before_selection + Notty.I.height img; ); img ) |> Notty.I.vcat in let focal_point_offset = !height_rendered_before_selection in let img_height = I.height img in let target_region_start = max 0 (focal_point_offset - (Misc_utils.div_round_to_closest height 2)) in let target_region_end_exc = min img_height (target_region_start + height) in I.vcrop target_region_start (img_height - target_region_end_exc) img ================================================ FILE: bin/debug_utils.ml ================================================ let do_if_debug (f : out_channel -> unit) = match !Params.debug_output with | None -> () | Some oc -> ( f oc ) ================================================ FILE: bin/docfd.ml ================================================ open Cmdliner open Lwd_infix open Docfd_lib open Debug_utils open Misc_utils open File_utils let compute_paths_from_globs ~report_progress globs = Seq.iter (fun s -> match Glob.parse s with | Some _ -> () | None -> ( exit_with_error_msg (Fmt.str "failed to parse glob pattern: \"%s\"" s) ) ) globs; list_files_recursive_filter_by_globs ~report_progress globs type file_constraints = { no_pdftotext : bool; no_pandoc : bool; paths_were_specified_by_user : bool; exts : string list; single_line_exts : string list; directly_specified_paths : String_set.t; globs : String_set.t; single_line_globs : String_set.t; } let make_file_constraints ~no_pdftotext ~no_pandoc ~(exts : string list) ~(single_line_exts : string list) ~(paths : string list) ~(paths_from_file_or_stdin : string list option) ~(globs : string list) ~(single_line_globs : string list) : file_constraints = match paths, paths_from_file_or_stdin, globs, single_line_globs with | [], None, [], [] -> ( { no_pdftotext; no_pandoc; paths_were_specified_by_user = false; exts; single_line_exts; directly_specified_paths = String_set.of_list [ "." ]; globs = String_set.empty; single_line_globs = String_set.empty; } ) | _, _, _, _ -> ( let paths_from_file_or_stdin = Option.value ~default:[] paths_from_file_or_stdin in let directly_specified_paths = String_set.of_list (paths @ paths_from_file_or_stdin) in let globs = String_set.of_list globs in let single_line_globs = String_set.of_list single_line_globs in { no_pdftotext; no_pandoc; paths_were_specified_by_user = true; exts; single_line_exts; directly_specified_paths; globs; single_line_globs; } ) let files_satisfying_constraints ~interactive (cons : file_constraints) : Document_src.file_collection = let bar = let open Progress.Line in list [ const "Scanning" ; spinner () ] in progress_with_reporter ~interactive bar (fun report_progress : Document_src.file_collection -> let single_line_search_mode_applies file = List.mem (extension_of_file file) cons.single_line_exts in let single_line_search_mode_paths_by_exts, default_search_mode_paths_by_exts = cons.directly_specified_paths |> String_set.to_seq |> list_files_recursive_filter_by_exts ~report_progress ~exts:(cons.exts @ cons.single_line_exts) |> String_set.partition single_line_search_mode_applies in let paths_from_single_line_globs = cons.single_line_globs |> String_set.to_seq |> compute_paths_from_globs ~report_progress in let single_line_search_mode_paths_from_globs, default_search_mode_paths_from_globs = cons.globs |> String_set.to_seq |> compute_paths_from_globs ~report_progress |> String_set.partition single_line_search_mode_applies in let single_line_search_mode_files = single_line_search_mode_paths_by_exts |> String_set.union paths_from_single_line_globs |> String_set.union single_line_search_mode_paths_from_globs in let default_search_mode_files = default_search_mode_paths_by_exts |> String_set.union default_search_mode_paths_from_globs |> (fun s -> String_set.diff s single_line_search_mode_files) in do_if_debug (fun oc -> Printf.fprintf oc "Checking if single line search mode files and default search mode files are disjoint\n"; if String_set.is_empty (String_set.inter single_line_search_mode_files default_search_mode_files) then ( Printf.fprintf oc "Check successful\n" ) else ( failwith "check failed" ); let all_files = single_line_search_mode_paths_by_exts |> String_set.union default_search_mode_paths_by_exts |> String_set.union paths_from_single_line_globs |> String_set.union single_line_search_mode_paths_from_globs |> String_set.union default_search_mode_paths_from_globs in let single_line_search_mode_files', default_search_mode_files' = String_set.partition (fun s -> single_line_search_mode_applies s || String_set.mem s paths_from_single_line_globs ) all_files in Printf.fprintf oc "Checking if efficiently computed and naively computed results for single line search mode files are consistent\n"; if String_set.equal single_line_search_mode_files single_line_search_mode_files' then ( Printf.fprintf oc "Check successful\n" ) else ( failwith "check failed" ); Printf.fprintf oc "Checking if efficiently computed and naively computed results for default search mode files are consistent\n"; if String_set.equal default_search_mode_files default_search_mode_files' then ( Printf.fprintf oc "Check successful\n" ) else ( failwith "check failed" ) ); let filter_for_no_pdftotext_or_no_pandoc (s : String_set.t) = if cons.no_pdftotext || cons.no_pandoc then ( String_set.filter (fun s -> match File_utils.format_of_file s with | `PDF -> not cons.no_pdftotext | `Pandoc_supported_format -> not cons.no_pandoc | `Text | `Other -> true ) s ) else ( s ) in let default_search_mode_files = filter_for_no_pdftotext_or_no_pandoc default_search_mode_files in let single_line_search_mode_files = filter_for_no_pdftotext_or_no_pandoc single_line_search_mode_files in { default_search_mode_files; single_line_search_mode_files; } ) let init_session_state_of_document_src ~env ~interactive pool (document_src : Document_src.t) = let file_bar ~total_file_count = let open Progress.Line in list [ brackets (elapsed ()) ; bar ~width:(`Fixed 20) total_file_count ; percentage_of total_file_count ; const "ETA: " ++ eta total_file_count ] in let byte_bar ~total_byte_count = let open Progress.Line in list [ brackets (elapsed ()) ; bar ~width:(`Fixed 20) total_byte_count ; percentage_of total_byte_count ; bytes_per_sec ; const "ETA: " ++ eta total_byte_count ] in let all_documents : Document.t list list = match document_src with | Document_src.Stdin path -> ( match Document.of_path ~env pool ~already_in_transaction:false !Params.default_search_mode path with | Ok x -> [ [ x ] ] | Error msg -> ( exit_with_error_msg msg ) ) | Files { default_search_mode_files; single_line_search_mode_files } -> ( let print_stage_stats ~file_count ~total_byte_count = Printf.printf "- File count: %6d\n" file_count; Printf.printf "- MiB: %8.1f\n" (Misc_utils.mib_of_bytes total_byte_count); in let total_file_count, files = Seq.append (Seq.map (fun path -> (!Params.default_search_mode, path)) (String_set.to_seq default_search_mode_files)) (Seq.map (fun path -> (`Single_line, path)) (String_set.to_seq single_line_search_mode_files)) |> Misc_utils.length_and_list_of_seq in if interactive then ( Printf.printf "Collecting file stats\n"; flush stdout; ); let documents_total_byte_count, document_sizes = progress_with_reporter ~interactive (file_bar ~total_file_count) (fun report_progress -> List.fold_left (fun (total_size, m) (_, path) -> let res = match File_utils.file_size path with | None -> (total_size, m) | Some x -> (total_size + x, String_map.add path x m) in report_progress 1; res ) (0, String_map.empty) files ) in if interactive then ( print_stage_stats ~file_count:total_file_count ~total_byte_count:documents_total_byte_count ); if interactive then ( Printf.printf "Hashing\n"; flush stdout; ); let file_and_hash_list = match files with | [] -> [] | _ -> ( files |> (fun l -> progress_with_reporter ~interactive (byte_bar ~total_byte_count:documents_total_byte_count) (fun report_progress -> Task_pool.filter_map_list pool (fun (search_mode, path) -> do_if_debug (fun oc -> Printf.fprintf oc "Hashing document: %s\n" (Filename.quote path); ); let res = match BLAKE2B.hash_of_file ~env ~path with | Ok hash -> Some (search_mode, path, hash) | Error msg -> ( do_if_debug (fun oc -> Printf.fprintf oc "Error: %s\n" msg ); None ) in (match String_map.find_opt path document_sizes with | None -> () | Some x -> report_progress x); res ) l ) ) ) in if interactive then ( Printf.printf "Allocating document IDs\n"; flush stdout; ); progress_with_reporter ~interactive (file_bar ~total_file_count) (fun report_progress -> file_and_hash_list |> List.to_seq |> Seq.map (fun (_, _, doc_hash) -> report_progress 1; doc_hash) |> Doc_id_db.allocate_bulk ); let indexed_files, unindexed_files = let open Sqlite3_utils in with_stmt Index.is_indexed_sql (fun stmt -> List.partition (fun (_, _, doc_hash) -> bind_names stmt [ ("@doc_hash", TEXT doc_hash) ]; step stmt; let indexed = data_count stmt > 0 in reset stmt; indexed ) file_and_hash_list ) in indexed_files |> List.map (fun (_, _, doc_hash) -> Doc_id_db.doc_id_of_doc_hash doc_hash ) |> Index.refresh_last_used_batch; let load_document ~env pool search_mode ~doc_hash path = do_if_debug (fun oc -> Printf.fprintf oc "Loading document: %s\n" (Filename.quote path); ); do_if_debug (fun oc -> Printf.fprintf oc "Using %s search mode for document %s\n" (match search_mode with | `Single_line -> "single line" | `Multiline -> "multiline" ) (Filename.quote path) ); match Document.of_path ~env pool ~already_in_transaction:false search_mode ~doc_hash path with | Ok x -> ( do_if_debug (fun oc -> Printf.fprintf oc "Document %s loaded successfully\n" (Filename.quote path); ); Some x ) | Error msg -> ( do_if_debug (fun oc -> Printf.fprintf oc "Error: %s\n" msg ); None ) in if interactive then ( Printf.printf "Processing indexed files\n"; flush stdout; ); let indexed_files = indexed_files |> List.filter_map (fun (search_mode, path, doc_hash) -> load_document ~env pool search_mode ~doc_hash path ) in if interactive then ( Printf.printf "Indexing remaining files\n"; flush stdout; ); let unindexed_file_count, unindexed_files_byte_count = List.fold_left (fun (file_count, byte_count) (_, path, _) -> (file_count + 1, byte_count + Option.value ~default:0 (String_map.find_opt path document_sizes)) ) (0, 0) unindexed_files in if interactive then ( print_stage_stats ~file_count:unindexed_file_count ~total_byte_count:unindexed_files_byte_count; ); let pipeline = Document_pipeline.make ~env pool in let _, unindexed_files = Eio.Fiber.pair (fun () -> Document_pipeline.run pipeline ) (fun () -> (match unindexed_files with | [] -> () | _ -> ( progress_with_reporter ~interactive (byte_bar ~total_byte_count:unindexed_files_byte_count) (fun report_progress -> unindexed_files |> List.iter (fun (search_mode, path, doc_hash) -> Document_pipeline.feed pipeline search_mode ~doc_hash path; (match String_map.find_opt path document_sizes with | None -> () | Some x -> report_progress x ) ) ) )); if interactive then ( Printf.printf "Finalizing index\n"; flush stdout; ); Document_pipeline.finalize pipeline ) in [ indexed_files; unindexed_files ] ) in let state = all_documents |> List.to_seq |> Seq.flat_map List.to_seq |> Session.State.of_seq pool in Gc.compact (); state let parse_sort_by_arg ~no_score (s : string) : Command.Sort_by.t = match Command.Sort_by.parse ~no_score s with | Ok t -> t | Error msg -> ( let msg = CCString.chop_prefix ~pre:": " msg |> Option.value ~default:msg in exit_with_error_msg (Fmt.str "failed to parse --%s argument: %s" (if no_score then ( Args.sort_no_score_arg_name ) else ( Args.sort_arg_name )) msg ) ) let run ~(eio_env : Eio_unix.Stdenv.base) ~sw (debug_log : string option) (no_pdftotext : bool) (no_pandoc : bool) (scan_hidden : bool) (max_depth : int) (max_fuzzy_edit_dist : int) (max_token_search_dist : int) (max_linked_token_search_dist : int) (tokens_per_search_scope_level : int) (index_chunk_size : int) (exts : string) (single_line_exts : string) (additional_exts : string list) (single_line_additional_exts : string list) (cache_dir : string) (cache_limit : int) (data_dir : string) (index_only : bool) (start_with_filter : string option) (start_with_search : string option) (filter_exp : string option) (sample_search_exp : string option) (samples_per_doc : int) (search_exp : string option) (sort_by : string) (sort_by_no_score : string) (print_color_mode : Params.style_mode) (print_underline_mode : Params.style_mode) (search_result_print_text_width : int) (search_result_print_snippet_min_size : int) (search_result_print_max_add_lines : int) (start_with_script : string option) (script : string option) (paths_from : string list) (globs : string list) (single_line_globs : string list) (single_line_search_mode_by_default : bool) (path_open_specs : string list) (print_files_with_match : bool) (print_files_without_match : bool) (paths : string list) = let env = eio_env in Args.check ~max_depth ~max_fuzzy_edit_dist ~max_token_search_dist ~max_linked_token_search_dist ~tokens_per_search_scope_level ~index_chunk_size ~cache_limit ~start_with_filter ~start_with_search ~filter_exp ~sample_search_exp ~samples_per_doc ~search_exp ~search_result_print_text_width ~search_result_print_snippet_min_size ~search_result_print_max_add_lines ~start_with_script ~script ~paths_from ~print_files_with_match ~print_files_without_match; Params.debug_output := (match debug_log with | None -> None | Some "-" -> Some stderr | Some debug_log -> ( try Some ( open_out_gen [ Open_append; Open_creat; Open_wronly; Open_text ] 0o644 debug_log ) with | Sys_error _ -> ( exit_with_error_msg (Fmt.str "failed to open debug log file %s" (Filename.quote debug_log)) ) ) ); Params.scan_hidden := scan_hidden; Params.max_file_tree_scan_depth := max_depth; Params.max_fuzzy_edit_dist := max_fuzzy_edit_dist; Params.max_token_search_dist := max_token_search_dist; Params.max_linked_token_search_dist := max_linked_token_search_dist; Params.tokens_per_search_scope_level := tokens_per_search_scope_level; Params.index_chunk_size := index_chunk_size; Params.cache_limit := cache_limit; Params.search_result_print_text_width := search_result_print_text_width; Params.search_result_print_snippet_min_size := search_result_print_snippet_min_size; Params.search_result_print_snippet_max_additional_lines_each_direction := search_result_print_max_add_lines; Params.samples_per_document := samples_per_doc; let sort_by = parse_sort_by_arg ~no_score:false sort_by in let sort_by_no_score = parse_sort_by_arg ~no_score:true sort_by_no_score in Params.cache_dir := ( mkdir_recursive cache_dir; Some cache_dir ); Params.data_dir := ( mkdir_recursive data_dir; Some data_dir ); Params.default_search_mode := ( if single_line_search_mode_by_default then ( `Single_line ) else ( `Multiline ) ); List.iter (fun spec -> match Path_opening.parse_spec spec with | Error msg -> ( exit_with_error_msg (Fmt.str "failed to parse %s, %s" spec msg) ) | Ok (exts, launch_mode, cmd) -> ( List.iter (fun ext -> Hashtbl.replace Path_opening.specs ext (launch_mode, cmd) ) exts ) ) path_open_specs; let db_path = Filename.concat cache_dir Params.db_file_name in (match Docfd_lib.init ~db_path ~document_count_limit:cache_limit with | None -> () | Some msg -> exit_with_error_msg msg ); let interactive = Option.is_none filter_exp && Option.is_none sample_search_exp && Option.is_none search_exp && not print_files_with_match && not print_files_without_match && Option.is_none script in if interactive then ( Printf.printf "Initializing in-memory index\n"; flush stdout; ); Word_db.read_from_db (); Index.State.read_from_db (); (match Sys.getenv_opt "VISUAL", Sys.getenv_opt "EDITOR" with | None, None -> ( exit_with_error_msg (Fmt.str "environment variable VISUAL or EDITOR needs to be set") ) | Some editor, _ | None, Some editor -> ( Params.text_editor := editor; ) ); Lwd.unsafe_mutation_logger := (fun () -> ()); let recognized_exts = compute_total_recognized_exts ~exts ~additional_exts in let recognized_single_line_exts = compute_total_recognized_exts ~exts:single_line_exts ~additional_exts:single_line_additional_exts in (match recognized_exts, recognized_single_line_exts, globs, single_line_globs with | [], [], [], [] -> ( exit_with_error_msg (Fmt.str "no usable file extensions or glob patterns") ) | _, _, _, _ -> () ); let paths_from_file_or_stdin = match paths_from with | [] -> None | l -> ( l |> List.to_seq |> Seq.map (String.split_on_char ',') |> Seq.flat_map List.to_seq |> Seq.flat_map (fun paths_from -> (match paths_from with | "-" -> ( CCIO.read_lines_l stdin ) | _ -> ( try CCIO.with_in paths_from CCIO.read_lines_l with | Sys_error _ -> ( exit_with_error_msg (Fmt.str "failed to read list of paths from %s" (Filename.quote paths_from)) ) ) ) |> List.to_seq ) |> List.of_seq |> Option.some ) in let file_constraints = make_file_constraints ~no_pdftotext ~no_pandoc ~exts:recognized_exts ~single_line_exts:recognized_single_line_exts ~paths ~paths_from_file_or_stdin ~globs ~single_line_globs in let pool = Task_pool.make ~sw (Eio.Stdenv.domain_mgr env) in Atomic.set UI_base.Vars.pool (Some pool); String_set.iter (fun path -> if not (Sys.file_exists path) then ( exit_with_error_msg (Fmt.str "path %s does not exist" (Filename.quote path)) ) ) file_constraints.directly_specified_paths; let compute_if_hide_document_list_initially_and_document_src : unit -> bool * Document_src.t = let stdin_tmp_file = ref None in (fun () -> let file_collection = files_satisfying_constraints ~interactive file_constraints in if file_constraints.paths_were_specified_by_user || stdin_is_atty () then ( let hide_document_list = match Document_src.file_collection_size file_collection with | 0 -> false | 1 -> true | _ -> false in (hide_document_list, Files file_collection) ) else ( match !stdin_tmp_file with | None -> ( match read_in_channel_to_tmp_file stdin with | Ok tmp_file -> ( stdin_tmp_file := Some tmp_file; (true, Stdin tmp_file) ) | Error msg -> ( exit_with_error_msg msg ) ) | Some tmp_file -> ( (true, Stdin tmp_file) ) ) ) in let compute_document_src () = snd (compute_if_hide_document_list_initially_and_document_src ()) in let hide_document_list_initially, init_document_src = compute_if_hide_document_list_initially_and_document_src () in let clean_up () = match init_document_src with | Stdin tmp_file -> ( try Sys.remove tmp_file with | Sys_error _ -> () ) | Files _ -> () in do_if_debug (fun oc -> Printf.fprintf oc "Scanning completed\n" ); do_if_debug (fun oc -> match init_document_src with | Stdin _ -> Printf.fprintf oc "Document source: stdin\n" | Files file_collection -> ( Printf.fprintf oc "Document source: files\n"; Document_src.seq_of_file_collection file_collection |> Seq.iter (fun file -> Printf.fprintf oc "File: %s\n" (Filename.quote file); ) ) ); (match init_document_src with | Stdin _ -> () | Files file_collection -> ( let pdftotext_exists = Proc_utils.command_exists "pdftotext" in let pandoc_exists = Proc_utils.command_exists "pandoc" in let formats = Document_src.seq_of_file_collection file_collection |> Seq.map format_of_file |> Seq.fold_left (fun acc x -> File_format_set.add x acc) File_format_set.empty in if not pdftotext_exists && File_format_set.mem `PDF formats then ( exit_with_error_msg (Fmt.str "command pdftotext not found, use --%s to disable use of pdftotext" Args.no_pdftotext_arg_name) ); if File_format_set.mem `Pandoc_supported_format formats then ( if not pandoc_exists then ( exit_with_error_msg (Fmt.str "command pandoc not found, use --%s to disable use of pandoc" Args.no_pandoc_arg_name) ); ); ) ); Lwd.set UI_base.Vars.hide_document_list hide_document_list_initially; init_session_state_of_document_src ~env pool ~interactive init_document_src |> (fun state -> Session.run_command pool (`Sort (sort_by, sort_by_no_score)) state |> Option.get |> snd ) |> Session_manager.update_starting_state; if index_only then ( clean_up (); exit 0 ); let print_oc = stdout in let print_with_color = match print_color_mode with | `Never -> false | `Always -> true | `Auto -> Out_channel.isatty print_oc in let print_with_underline = match print_underline_mode with | `Never -> false | `Always -> true | `Auto -> not (Out_channel.isatty print_oc) in let snapshots_from_script = match script, start_with_script with | None, None -> None | Some _, Some _ -> failwith "unexpected case" | Some script, None | None, Some script -> ( let init_state = Session_manager.lock_with_view (fun view -> view.init_state ) in match Script.run pool ~init_state ~path:script with | Error msg -> exit_with_error_msg msg | Ok snapshots -> Some snapshots ) in (match snapshots_from_script with | None -> () | Some snapshots -> ( Session_manager.load_snapshots snapshots; )); if not interactive then ( let filter_exp_and_original_string = match filter_exp with | None -> None | Some s -> Some (Option.get (Filter_exp.parse s), s) in let print_limit_per_doc, search_exp_and_original_string = match sample_search_exp, search_exp with | None, None -> (None, None) | Some _, Some _ -> failwith "unexpected case" | Some search_exp_string, None | None, Some search_exp_string -> ( let print_limit_per_doc = match sample_search_exp with | Some _ -> Some samples_per_doc | None -> None in let search_exp = Option.get (Search_exp.parse search_exp_string) in do_if_debug (fun oc -> Fmt.pf (Format.formatter_of_out_channel oc) "Search expression: @[%a@]@." Search_exp.pp search_exp ); (print_limit_per_doc, Some (search_exp, search_exp_string)) ) in let session_state = match snapshots_from_script with | None -> ( Session_manager.lock_with_view (fun view -> view.init_state ) |> (fun state -> match filter_exp_and_original_string with | None -> state | Some (filter_exp, filter_exp_string) -> ( Option.get (Session.State.update_filter_exp pool (Stop_signal.make ()) filter_exp_string filter_exp state ) ) ) |> (fun state -> match search_exp_and_original_string with | None -> state | Some (search_exp, search_exp_string) -> ( Option.get (Session.State.update_search_exp pool (Stop_signal.make ()) search_exp_string search_exp state ) ) ) ) | Some _ -> ( Session_manager.lock_with_view (fun view -> Dynarray.get_last view.snapshots |> Session.Snapshot.state ) ) in let oc = stdout in let no_results = if print_files_with_match then ( let arr = Session.State.search_result_groups session_state in Array.iter (fun (doc, _search_result) -> Printers.path_image ~color:print_with_color oc (Document.path doc) ) arr; Array.length arr = 0 ) else if print_files_without_match then ( let arr = Session.State.unusable_documents session_state |> Array.of_seq in let (sort_by_typ, sort_by_order) = sort_by_no_score in let f = match sort_by_typ with | `Path_date -> Document.Compare.path_date sort_by_order | `Mod_time -> Document.Compare.mod_time sort_by_order | `Path -> Document.Compare.path sort_by_order | `Score -> failwith "unexpected case" in Array.sort f arr; Array.iter (fun doc -> Printers.path_image ~color:print_with_color oc (Document.path doc) ) arr; Array.length arr = 0 ) else ( let s = Session.State.search_result_groups session_state |> Array.to_seq |> Seq.map (fun (doc, arr) -> let arr = match print_limit_per_doc with | None -> arr | Some n -> ( Array.sub arr 0 (min (Array.length arr) n) ) in (doc, arr) ) in Printers.search_result_groups ~color:print_with_color ~underline:print_with_underline oc s; Seq.is_empty s ) in clean_up (); if no_results then ( exit 1 ) else ( exit 0 ) ); UI_base.Vars.eio_env := Some env; let root : Nottui.ui Lwd.t = let$* (term_width, term_height) = Lwd.get UI_base.Vars.term_width_height in if term_width <= 40 || term_height <= 20 then ( let msg = Nottui.Ui.atom (Notty.I.strf "Terminal size too small") in let keyboard_handler (key : Nottui.Ui.key) = match key with | (`Escape, []) | (`ASCII 'Q', [`Ctrl]) | (`ASCII 'C', [`Ctrl]) -> ( Lwd.set UI_base.Vars.quit true; UI_base.Vars.action := None; `Handled ) | _ -> `Unhandled in Lwd.return (Nottui.Ui.keyboard_area keyboard_handler msg) ) else ( UI.main ) in let get_term, close_term = let term_and_tty_fd = ref None in ((fun () -> match !term_and_tty_fd with | None -> ( if stdin_is_atty () then ( let term = Notty_unix.Term.create () in term_and_tty_fd := Some (term, None); term ) else ( let input = Unix.(openfile "/dev/tty" [ O_RDONLY ] 0o444) in let term = Notty_unix.Term.create ~input () in term_and_tty_fd := Some (term, Some input); term ) ) | Some (term, _tty_fd) -> term ), (fun () -> match !term_and_tty_fd with | None -> () | Some (term, tty_fd) -> ( Notty_unix.Term.release term; (match tty_fd with | None -> () | Some fd -> Unix.close fd); term_and_tty_fd := None ) ) ) in let rec loop () = Sys.command "clear -x" |> ignore; let term = get_term () in UI_base.Vars.term := Some term; UI_base.Vars.action := None; Lwd.set UI_base.Vars.quit false; UI_base.ui_loop ~quit:UI_base.Vars.quit ~term root; match !UI_base.Vars.action with | None -> () | Some action -> ( Session_manager.stop_filter_and_search_and_restore_input_fields (); match action with | UI_base.Recompute_document_src -> ( close_term (); let new_starting_state = compute_document_src () |> init_session_state_of_document_src ~env ~interactive pool in Session_manager.update_starting_state new_starting_state; loop () ) | Open_file_and_search_result (doc, search_result) -> ( let doc_id = Document.doc_id doc in let path = Document.path doc in let old_stats = Unix.stat path in Path_opening.main ~close_term ~path ~doc_id_and_search_result:(Option.map (fun x -> (doc_id, x)) search_result); let new_stats = Unix.stat path in if Float.abs (new_stats.st_mtime -. old_stats.st_mtime) >= Params.float_compare_margin then ( UI.reload_document doc ); loop () ) | Open_link (doc, link) -> ( Path_opening.open_link ~close_term ~doc link; loop () ) | Edit_command_history -> ( let file = Filename.temp_file "" Params.docfd_script_ext in let init_snapshots = Session_manager.lock_with_view (fun view -> view.snapshots ) in let init_lines = Seq.append ( init_snapshots |> Dynarray.to_seq |> Seq.filter_map (fun (snapshot : Session.Snapshot.t) -> Option.map Command.to_string (Session.Snapshot.last_command snapshot) ) ) ( List.to_seq [ ""; "; You are viewing/editing Docfd command history."; "; If any change is made to this file, Docfd will replay the commands from the start."; ";"; "; There are two types of comments:"; "; - System comments begin with `;`, and are not preserved after editing of command history."; "; These are for communication of error message to user during command history editing."; "; - User comments begin with `#`, and are preserved after editing of command history."; ";"; "; If a line is not blank and is not a comment,"; "; then the line should contain exactly one command."; "; A command cannot be written across multiple lines."; ";"; "; Starting point is v0, the state with the full set of documents."; "; Each command adds one to the version number."; "; Command at the top is oldest, command at bottom is the newest."; ";"; "; Note that for commands that accept text, all text after `:` is trimmed and then used in full."; "; This means \" and ' are treated literally and are not used to delimit strings."; ";"; "; Possible commands:"; Fmt.str "; - %a" Command.pp (`Search "search phrase"); Fmt.str "; - %a" Command.pp (`Search ""); Fmt.str "; - %a" Command.pp (`Filter "path-fuzzy:\"file txt\""); Fmt.str "; - %a" Command.pp (`Filter ""); Fmt.str "; - %a" Command.pp (`Sort (Command.Sort_by.default, Command.Sort_by.default_no_score)); Fmt.str "; - %a" Command.pp (`Path_fuzzy_rank ("readme", None)); Fmt.str "; - %a" Command.pp (`Narrow_level 1); Fmt.str "; - %a" Command.pp (`Mark "/path/to/document"); Fmt.str "; - %a" Command.pp `Mark_listed; Fmt.str "; - %a" Command.pp (`Unmark "/path/to/document"); Fmt.str "; - %a" Command.pp `Unmark_listed; Fmt.str "; - %a" Command.pp `Unmark_all; Fmt.str "; - %a" Command.pp (`Drop "/path/to/document"); Fmt.str "; - %a" Command.pp (`Drop_all_except "/path/to/document"); Fmt.str "; - %a" Command.pp `Drop_marked; Fmt.str "; - %a" Command.pp `Drop_unmarked; Fmt.str "; - %a" Command.pp `Drop_listed; Fmt.str "; - %a" Command.pp `Drop_unlisted; ] ) |> List.of_seq in let init_state = Session_manager.lock_with_view (fun view -> view.init_state ) in let rec aux rerun snapshots lines : [ `No_changes | `Changes_made of Session.Snapshot.t Dynarray.t ] = CCIO.with_out file (fun oc -> CCIO.write_lines_l oc lines; ); let old_stats = Unix.stat file in close_term (); Path_opening.config_and_cmd_to_open_text_file ~path:file ~line_num:(max 1 (Dynarray.length snapshots - 1)) () |> (fun (config, cmd) -> Result.get_ok (Path_opening.resolve_cmd config cmd) ) |> Sys.command |> ignore; let new_stats = Unix.stat file in if rerun || Float.abs (new_stats.st_mtime -. old_stats.st_mtime) >= Params.float_compare_margin then ( let state = ref init_state in let snapshots = Dynarray.create () in Dynarray.add_last snapshots (Session.Snapshot.make ~last_command:None init_state); let rerun = ref false in let lines = CCIO.with_in file (fun ic -> CCIO.read_lines_l ic |> CCList.flat_map (fun line -> if String_utils.line_is_blank_or_system_comment line then ( [ line ] ) else ( match Command.of_string line with | None -> ( rerun := true; [ line; "# Failed to parse the above command" ] ) | Some command -> ( match Session.run_command pool command !state with | None -> ( rerun := true; [ line; "# Failed to run the above command, check if the arguments are correct" ] ) | Some (command, x) -> ( state := x; let snapshot = Session.Snapshot.make ~last_command:(Some command) !state in Dynarray.add_last snapshots snapshot; [ line ] ) ) ) ) ) in if !rerun then ( aux true snapshots lines ) else ( `Changes_made snapshots ) ) else ( `No_changes ) in (try let res = aux false init_snapshots init_lines in (try Sys.remove file; with | Sys_error _ -> () ); (match res with | `No_changes -> () | `Changes_made snapshots -> ( Session_manager.load_snapshots snapshots ) ); with | Sys_error _ -> ( exit_with_error_msg (Fmt.str "failed to read or write temporary command history file %s" (Filename.quote file)) )); loop () ) | Clear_command_history -> ( let init_state = Session_manager.lock_with_view (fun view -> view.init_state ) in let snapshots = Dynarray.create () in Dynarray.add_last snapshots (Session.Snapshot.make ~last_command:None init_state); Session_manager.load_snapshots snapshots; loop () ) | Open_script path -> ( let init_state = Session_manager.lock_with_view (fun view -> view.init_state ) in match Script.run pool ~init_state ~path with | Error msg -> exit_with_error_msg msg | Ok snapshots -> ( Session_manager.load_snapshots snapshots; loop () ) ) | Edit_script path -> ( close_term (); Path_opening.config_and_cmd_to_open_text_file ~path () |> (fun (config, cmd) -> Result.get_ok (Path_opening.resolve_cmd config cmd) ) |> Sys.command |> ignore; loop () ) ) in Eio.Fiber.any [ (fun () -> Eio.Domain_manager.run (Eio.Stdenv.domain_mgr env) (fun () -> Session_manager.worker_fiber pool)); Session_manager.manager_fiber; UI_base.Key_binding_info.grid_light_fiber; (fun () -> (match start_with_filter with | None -> () | Some start_with_filter -> ( let start_with_filter_len = String.length start_with_filter in Lwd.set UI_base.Vars.filter_field (start_with_filter, start_with_filter_len); UI.update_filter ~commit:true (); )); (match start_with_search with | None -> () | Some start_with_search -> ( let start_with_search_len = String.length start_with_search in Lwd.set UI_base.Vars.search_field (start_with_search, start_with_search_len); UI.update_search ~commit:true (); )); loop (); ); ]; close_term (); clean_up (); (match debug_log with | Some "-" -> () | _ -> ( match !Params.debug_output with | None -> () | Some oc -> ( close_out oc ) ) ) let cmd ~eio_env ~sw = let open Term in let open Args in let doc = "TUI multiline fuzzy document finder" in let version = Version_string.s in Cmd.v (Cmd.info "docfd" ~version ~doc) (const (run ~eio_env ~sw) $ debug_log_arg $ no_pdftotext_arg $ no_pandoc_arg $ hidden_arg $ max_depth_arg $ max_fuzzy_edit_dist_arg $ max_token_search_dist_arg $ max_linked_token_search_dist_arg $ tokens_per_search_scope_level_arg $ index_chunk_size_arg $ exts_arg $ single_line_exts_arg $ add_exts_arg $ single_line_add_exts_arg $ cache_dir_arg $ cache_limit_arg $ data_dir_arg $ index_only_arg $ start_with_filter_arg $ start_with_search_arg $ filter_arg $ sample_arg $ samples_per_doc_arg $ search_arg $ sort_arg $ sort_no_score_arg $ color_arg $ underline_arg $ search_result_print_text_width_arg $ search_result_print_snippet_min_size_arg $ search_result_print_snippet_max_add_lines_arg $ start_with_script_arg $ script_arg $ paths_from_arg $ glob_arg $ single_line_glob_arg $ single_line_arg $ open_with_arg $ files_with_match_arg $ files_without_match_arg $ paths_arg) let () = if Sys.win32 then ( exit_with_error_msg "Windows is not supported" ); Random.self_init (); Eio_posix.run (fun eio_env -> Eio.Switch.run (fun sw -> exit (Cmd.eval (cmd ~eio_env ~sw)) )) ================================================ FILE: bin/document.ml ================================================ open Result_syntax open Docfd_lib type t = { search_mode : Search_mode.t; path : string; path_parts : string list; path_date : Timedesc.Date.t option; mod_time : Timedesc.t; title : string option; doc_id : int64; doc_hash : string; word_ids : Int_set.t; search_scope : Diet.Int.t option; links : Link.t array; link_index_of_start_pos : int Int_map.t; last_scan : Timedesc.t; } let equal (x : t) (y : t) = x.search_mode = y.search_mode && String.equal x.path y.path && String.equal x.doc_hash y.doc_hash && Option.equal Diet.Int.equal x.search_scope y.search_scope let compute_path_parts (path : string) = let path_parts = Tokenization.tokenize ~drop_spaces:false path |> List.of_seq in (path_parts) let compute_link_index_of_start_pos links = CCArray.foldi (fun acc i link -> Int_map.add link.Link.start_pos i acc ) Int_map.empty links let search_mode (t : t) = t.search_mode let path (t : t) = t.path let path_parts (t : t) = t.path_parts let path_date (t : t) = t.path_date let mod_time (t : t) = t.mod_time let title (t : t) = t.title let word_ids (t : t) = t.word_ids let doc_hash (t : t) = t.doc_hash let doc_id (t : t) = t.doc_id let search_scope (t : t) = t.search_scope let last_scan (t : t) = t.last_scan let links (t : t) = t.links let link_index_of_start_pos (t : t) = t.link_index_of_start_pos let refresh_modification_time ~path = let time = Unix.time () in Unix.utimes path time time let reset_search_scope_to_full (t : t) : t = { t with search_scope = None } let inter_search_scope (x : Diet.Int.t) (t : t) : t = let search_scope = match t.search_scope with | None -> x | Some y -> Diet.Int.inter x y in { t with search_scope = Some search_scope } module Compare = struct type order = [ | `Asc | `Desc ] let mod_time order d0 d1 = match order with | `Asc -> Timedesc.compare_chrono_min (mod_time d0) (mod_time d1) | `Desc -> Timedesc.compare_chrono_min (mod_time d1) (mod_time d0) let path order d0 d1 = match order with | `Asc -> String.compare (path d0) (path d1) | `Desc -> String.compare (path d1) (path d0) let path_date order d0 d1 = let fallback () = path order d0 d1 in match path_date d0, path_date d1 with | None, None -> fallback () | None, Some _ -> ( (* Always shuffle document with no path date to the back. *) 1 ) | Some _, None -> ( (* Always shuffle document with no path date to the back. *) -1 ) | Some x0, Some x1 -> ( match order with | `Asc -> ( match Timedesc.Date.compare x0 x1 with | 0 -> fallback () | n -> n ) | `Desc -> ( match Timedesc.Date.compare x1 x0 with | 0 -> fallback () | n -> n ) ) end module Ir0 = struct type t = { search_mode : Search_mode.t; doc_id : int64; doc_hash : string; path : string; last_scan : Timedesc.t; } let of_path ~(env : Eio_unix.Stdenv.base) search_mode ?doc_hash path : (t, string) result = let* doc_hash = match doc_hash with | Some x -> Ok x | None -> BLAKE2B.hash_of_file ~env ~path in let doc_id = Doc_id_db.doc_id_of_doc_hash doc_hash in Ok { search_mode; doc_id; doc_hash; path; last_scan = Timedesc.now ~tz_of_date_time:Params.tz (); } end module Ir1 = struct type t = { search_mode : Search_mode.t; doc_id : int64; doc_hash : string; path : string; data : [ `Lines of string Dynarray.t | `Pages of string list Dynarray.t ]; last_scan : Timedesc.t; } let of_path_to_text ~env ~doc_id ~doc_hash search_mode last_scan path : (t, string) result = let fs = Eio.Stdenv.fs env in try let data = Eio.Path.(with_lines (fs / path)) (fun lines -> `Lines (Dynarray.of_seq lines) ) in Ok { search_mode; doc_id; doc_hash; path; data; last_scan; } with | Failure _ | End_of_file | Eio.Buf_read.Buffer_limit_exceeded -> ( Error (Printf.sprintf "failed to read file: %s" (Filename.quote path)) ) let of_path_to_pdf ~env ~doc_id ~doc_hash search_mode last_scan path : (t, string) result = let proc_mgr = Eio.Stdenv.process_mgr env in let fs = Eio.Stdenv.fs env in try let cmd = [ "pdftotext"; path; "-" ] in let pages = match Proc_utils.run_return_stdout ~proc_mgr ~fs ~split_mode:`On_form_feed cmd with | None -> Seq.empty | Some pages -> ( List.to_seq pages |> Seq.map (fun page -> String.split_on_char '\n' page) ) in let data = `Pages (Dynarray.of_seq pages) in Ok { search_mode; doc_id; doc_hash; path; data; last_scan; } with | Failure _ | End_of_file | Eio.Buf_read.Buffer_limit_exceeded -> ( Error (Printf.sprintf "failed to read file: %s" (Filename.quote path)) ) let of_path_to_pandoc_supported_format ~env ~doc_id ~doc_hash search_mode last_scan path : (t, string) result = let proc_mgr = Eio.Stdenv.process_mgr env in let fs = Eio.Stdenv.fs env in let ext = File_utils.extension_of_file path in let from_format = ext |> String_utils.remove_leading_dots |> (fun s -> match s with | "htm" -> "html" | _ -> s ) in let cmd = [ "pandoc" ; "--from" ; from_format ; "--to" ; "plain" ; "--wrap" ; "none" ; path ] in let error_msg = Fmt.str "failed to extract text from %s" (Filename.quote path) in match Proc_utils.run_return_stdout ~proc_mgr ~fs ~split_mode:`On_line_split cmd with | None -> ( Error error_msg ) | Some lines -> ( let data = `Lines (Dynarray.of_list lines) in Ok { search_mode; doc_id; doc_hash; path; data; last_scan; } ) let of_ir0 ~(env : Eio_unix.Stdenv.base) (ir0 : Ir0.t) : (t, string) result = let { Ir0.search_mode; doc_id; doc_hash; path; last_scan } = ir0 in match File_utils.format_of_file path with | `PDF -> ( of_path_to_pdf ~env ~doc_id ~doc_hash search_mode last_scan path ) | `Pandoc_supported_format -> ( of_path_to_pandoc_supported_format ~env ~doc_id ~doc_hash search_mode last_scan path ) | `Text | `Other -> ( of_path_to_text ~env ~doc_id ~doc_hash search_mode last_scan path ) end module Date_extraction = struct let yyyy = "(\\d{4})" let mm = "([01]\\d)" let mmm = "(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)" let mmmm = "(january|february|march|april|may|june|july|august|september|october|november|december)" let dd = "([0-3]\\d)" let int_of_month_string s = match String.lowercase_ascii (String.sub s 0 3) with | "jan" -> 1 | "feb" -> 2 | "mar" -> 3 | "apr" -> 4 | "may" -> 5 | "jun" -> 6 | "jul" -> 7 | "aug" -> 8 | "sep" -> 9 | "oct" -> 10 | "nov" -> 11 | "dec" -> 12 | _ -> failwith "unexpected case" let yyyy_mm_dd = let re = Fmt.str "(?:^|.*[^\\d])%s[^\\d]%s[^\\d]%s(?:$|[^\\d])" yyyy mm dd |> Re.Pcre.re |> Re.compile in fun s -> try let g = Re.exec re s in let start = Re.Group.start g 1 in let y = Re.Group.get g 1 |> int_of_string in let m = Re.Group.get g 2 |> int_of_string in let d = Re.Group.get g 3 |> int_of_string in Some (start, (y, m, d)) with | _ -> None let yyyy_month_dd ~month ~reverse = let re = let g1, g3 = if not reverse then ( (yyyy, dd) ) else ( (dd, yyyy) ) in Fmt.str "(?:^|.*[^\\d])%s[^A-Za-z\\d]?%s[^A-Za-z\\d]?%s(?:$|[^\\d])" g1 month g3 |> Re.Pcre.re |> Re.no_case |> Re.compile in fun s -> try let g = Re.exec re s in let start = Re.Group.start g 1 in let y_group_index, d_group_index = if not reverse then ( (1, 3) ) else ( (3, 1) ) in let y = Re.Group.get g y_group_index |> int_of_string in let m = int_of_month_string (Re.Group.get g 2) in let d = Re.Group.get g d_group_index |> int_of_string in Some (start, (y, m, d)) with | _ -> None let yyyymmdd = let re = Fmt.str "(?:^|.*[^\\d])%s%s%s" yyyy mm dd |> Re.Pcre.re |> Re.compile in fun s -> try let g = Re.exec re s in let start = Re.Group.start g 1 in let y = Re.Group.get g 1 |> int_of_string in let m = Re.Group.get g 2 |> int_of_string in let d = Re.Group.get g 3 |> int_of_string in Some (start, (y, m, d)) with | _ -> None let extract s = let rec aux acc l = match l with | [] -> ( match acc with | None -> None | Some (_start_match_pos, (year, month, day)) -> ( match Timedesc.Date.Ymd.make ~year ~month ~day with | Ok date -> Some date | Error _ -> None ) ) | f :: fs -> ( let acc = match acc, f s with | None, x -> x | Some x, None -> Some x | Some (start_match_pos, ymd), Some (start_match_pos', ymd') -> ( if start_match_pos' > start_match_pos then ( Some (start_match_pos', ymd') ) else ( Some (start_match_pos, ymd) ) ) in aux acc fs ) in aux None [ yyyy_mm_dd; yyyy_month_dd ~month:mmm ~reverse:true; yyyy_month_dd ~month:mmm ~reverse:false; yyyy_month_dd ~month:mmmm ~reverse:true; yyyy_month_dd ~month:mmmm ~reverse:false; yyyymmdd; ] end module Ir2 = struct type t = { search_mode : Search_mode.t; doc_id : int64; doc_hash : string; path : string; path_parts : string list; path_date : Timedesc.Date.t option; mod_time : Timedesc.t; title : string option; raw : Index.Raw.t; links : Link.t array; last_scan : Timedesc.t; } type work_stage = | Title | Content let parse_lines pool (s : string Seq.t) : string option * Index.Raw.t = let rec aux (stage : work_stage) title s = match stage with | Content -> ( let raw = Index.Raw.of_lines pool s in (title, raw) ) | Title -> ( match s () with | Seq.Nil -> aux Content title Seq.empty | Seq.Cons (x, xs) -> ( aux Content (Some (Misc_utils.sanitize_string x)) (Seq.cons x xs) ) ) in aux Title None s let parse_pages pool (s : string list Seq.t) : string option * Index.Raw.t = let rec aux (stage : work_stage) title s = match stage with | Content -> ( let raw = Index.Raw.of_pages pool s in (title, raw) ) | Title -> ( match s () with | Seq.Nil -> aux Content title Seq.empty | Seq.Cons (x, xs) -> ( let title = match x with | [] -> None | x :: _ -> Some (Misc_utils.sanitize_string x) in aux Content title (Seq.cons x xs) ) ) in aux Title None s let of_ir1 pool (ir : Ir1.t) : t = let { Ir1.search_mode; doc_id; doc_hash; path; data; last_scan } = ir in let path_parts = compute_path_parts path in let path_date = Date_extraction.extract path in let stats = Unix.stat path in let mod_time = Timedesc.of_timestamp_float_s_exn stats.Unix.st_mtime in let title, raw = match data with | `Lines x -> ( parse_lines pool (Dynarray.to_seq x) ) | `Pages x -> ( parse_pages pool (Dynarray.to_seq x) ) in let links = Index.Raw.links raw in { search_mode; path; path_parts; path_date; mod_time; doc_id; doc_hash; title; raw; links; last_scan; } end let of_ir2 db ~already_in_transaction (ir : Ir2.t) : t = let { Ir2.search_mode; path; path_parts; path_date; mod_time; title; doc_id; doc_hash; raw; links; last_scan; } = ir in Word_db.write_to_db db ~already_in_transaction; Index.write_raw_to_db db ~already_in_transaction ~doc_id raw; let link_index_of_start_pos = compute_link_index_of_start_pos links in { search_mode; path; path_parts; path_date; mod_time; title; doc_id; doc_hash; word_ids = Index.Raw.word_ids raw; search_scope = None; links; link_index_of_start_pos; last_scan; } let of_path ~(env : Eio_unix.Stdenv.base) pool ~already_in_transaction search_mode ?doc_hash path : (t, string) result = let open Sqlite3_utils in let* doc_hash = match doc_hash with | Some x -> Ok x | None -> BLAKE2B.hash_of_file ~env ~path in if Index.is_indexed ~doc_hash then ( let doc_id = Doc_id_db.doc_id_of_doc_hash doc_hash in let title = if Index.global_line_count ~doc_id = 0 then None else Some (Index.line_of_global_line_num ~doc_id 0) in let path_parts = compute_path_parts path in let path_date = Date_extraction.extract path in let stats = Unix.stat path in let mod_time = Timedesc.of_timestamp_float_s_exn stats.Unix.st_mtime in let links = Index.links ~doc_id in Ok { search_mode; path; path_parts; path_date; mod_time; title; doc_id; doc_hash; word_ids = Index.word_ids ~doc_id; search_scope = None; links; link_index_of_start_pos = compute_link_index_of_start_pos links; last_scan = Timedesc.now ~tz_of_date_time:Params.tz () } ) else ( let* ir0 = Ir0.of_path ~env search_mode ~doc_hash path in let* ir1 = Ir1.of_ir0 ~env ir0 in let ir2 = Ir2.of_ir1 pool ir1 in let res = with_db (fun db -> Ok (of_ir2 db ~already_in_transaction ir2) ) in res ) let satisfies_filter_exp pool stop_signal ~global_first_word_candidates_lookup (exp : Filter_exp.t) (t : t) : bool = let open Filter_exp in let date_f (op : Filter_exp.compare_op) = match op with | Eq -> Timedesc.Date.equal | Le -> Timedesc.Date.le | Ge -> Timedesc.Date.ge | Lt -> Timedesc.Date.lt | Gt -> Timedesc.Date.gt in let rec aux exp = match exp with | Empty -> true | Path_date (op, date) -> ( match t.path_date with | None -> false | Some path_date -> ( date_f op path_date date ) ) | Mod_date (op, date) -> ( date_f op (Timedesc.date t.mod_time) date ) | Path_fuzzy exp -> ( List.exists (fun phrase -> List.for_all (fun token -> List.exists (fun path_part -> Search_phrase.Enriched_token.compatible_with_word token path_part ) t.path_parts ) (Search_phrase.enriched_tokens phrase) ) (Search_exp.flattened exp) ) | Path_glob glob -> ( Glob.is_empty glob || Glob.match_ glob t.path ) | Ext ext -> ( File_utils.extension_of_file t.path = ext ) | Content exp -> ( try Index.search pool stop_signal ~terminate_on_result_found:true ~doc_id:t.doc_id ~doc_word_ids:(word_ids t) ~global_first_word_candidates_lookup ~within_same_line:false ~search_scope:None exp |> ignore; false with | Index.Search_job.Result_found -> true ) | Binary_op (op, e1, e2) -> ( match op with | And -> aux e1 && aux e2 | Or -> aux e1 || aux e2 ) | Unary_op (op, e) -> ( match op with | Not -> not (aux e) ) in aux exp ================================================ FILE: bin/document.mli ================================================ open Docfd_lib type t val equal : t -> t -> bool module Compare : sig type order = [ | `Asc | `Desc ] val mod_time : order -> t -> t -> int val path_date : order -> t -> t -> int val path : order -> t -> t -> int end val search_mode : t -> Search_mode.t val path : t -> string val path_date : t -> Timedesc.Date.t option val mod_time : t -> Timedesc.t val title : t -> string option val word_ids : t -> Int_set.t val doc_hash : t -> string val doc_id : t -> int64 val search_scope : t -> Diet.Int.t option val last_scan : t -> Timedesc.t val links : t -> Link.t array val link_index_of_start_pos : t -> int Int_map.t val satisfies_filter_exp : Task_pool.t -> Stop_signal.t -> global_first_word_candidates_lookup:Int_set.t Search_phrase.Enriched_token.Data_map.t -> Filter_exp.t -> t -> bool val of_path : env:Eio_unix.Stdenv.base -> Task_pool.t -> already_in_transaction:bool -> Search_mode.t -> ?doc_hash:string -> string -> (t, string) result val reset_search_scope_to_full : t -> t val inter_search_scope : Diet.Int.t -> t -> t module Ir0 : sig type t val of_path : env:Eio_unix.Stdenv.base -> Search_mode.t -> ?doc_hash:string -> string -> (t, string) result end module Ir1 : sig type t val of_ir0 : env:Eio_unix.Stdenv.base -> Ir0.t -> (t, string) result end module Ir2 : sig type t val of_ir1 : Task_pool.t -> Ir1.t -> t end val of_ir2 : Sqlite3.db -> already_in_transaction:bool -> Ir2.t -> t ================================================ FILE: bin/document_pipeline.ml ================================================ open Docfd_lib open Debug_utils type t = { env : Eio_unix.Stdenv.base; pool : Task_pool.t; ir0_queue : Document.Ir0.t option Eio.Stream.t; ir1_of_ir0_workers_batch_release : Eio.Semaphore.t; ir1_queue : Document.Ir1.t option Eio.Stream.t; ir2_of_ir1_workers_batch_release : Eio.Semaphore.t; ir2_queue : Document.Ir2.t option Eio.Stream.t; documents : Document.t Dynarray.t; result : Document.t Dynarray.t Eio.Stream.t; } let make ~env pool : t = { env; pool; ir0_queue = Eio.Stream.create 100; ir1_of_ir0_workers_batch_release = Eio.Semaphore.make 0; ir1_queue = Eio.Stream.create 100; ir2_of_ir1_workers_batch_release = Eio.Semaphore.make 0; ir2_queue = Eio.Stream.create 100; documents = Dynarray.create (); result = Eio.Stream.create 1; } let ir1_of_ir0_worker (t : t) = let run = ref true in while !run do match Eio.Stream.take t.ir0_queue with | None -> ( Eio.Semaphore.release t.ir1_of_ir0_workers_batch_release; run := false ) | Some ir0 -> ( match Document.Ir1.of_ir0 ~env:t.env ir0 with | Error msg -> ( do_if_debug (fun oc -> Printf.fprintf oc "Error: %s\n" msg ) ) | Ok ir1 -> ( Eio.Stream.add t.ir1_queue (Some ir1) ) ) done let ir2_of_ir1_worker (t : t) = let run = ref true in while !run do match Eio.Stream.take t.ir1_queue with | None -> ( Eio.Semaphore.release t.ir2_of_ir1_workers_batch_release; run := false ) | Some ir -> ( Eio.Stream.add t.ir2_queue (Some (Document.Ir2.of_ir1 t.pool ir)) ) done let document_of_ir2_worker (t : t) = let open Sqlite3_utils in let run = ref true in let counter = ref 0 in let outstanding_transaction = ref false in with_db (fun db -> while !run do if !counter = 0 then ( step_stmt ~db "BEGIN IMMEDIATE" ignore; outstanding_transaction := true; ); (match Eio.Stream.take t.ir2_queue with | None -> ( run := false ) | Some ir -> ( let doc = Document.of_ir2 db ~already_in_transaction:true ir in Dynarray.add_last t.documents doc; do_if_debug (fun oc -> Printf.fprintf oc "Document %s loaded successfully\n" (Filename.quote (Document.path doc)); ); )); if !counter >= 100 then ( step_stmt ~db "COMMIT" ignore; outstanding_transaction := false; counter := 0; ) else ( incr counter; ); done; if !outstanding_transaction then ( step_stmt ~db "COMMIT" ignore; ) ) let feed (t : t) search_mode ~doc_hash path = do_if_debug (fun oc -> Printf.fprintf oc "Loading document: %s\n" (Filename.quote path); ); do_if_debug (fun oc -> Printf.fprintf oc "Using %s search mode for document %s\n" (match search_mode with | `Single_line -> "single line" | `Multiline -> "multiline" ) (Filename.quote path) ); match Document.Ir0.of_path ~env:t.env search_mode ~doc_hash path with | Error msg -> ( do_if_debug (fun oc -> Printf.fprintf oc "Error: %s\n" msg ) ) | Ok ir0 -> ( Eio.Stream.add t.ir0_queue (Some ir0) ) let run (t : t) = Eio.Fiber.all (List.concat [ CCList.(0 --^ Task_pool.size) |> List.map (fun _ -> (fun () -> ir1_of_ir0_worker t)) ; CCList.(0 --^ Task_pool.size) |> List.map (fun _ -> (fun () -> ir2_of_ir1_worker t)) ; [ fun () -> document_of_ir2_worker t ] ] ); Eio.Stream.add t.result t.documents let finalize (t : t) = for _ = 0 to Task_pool.size - 1 do Eio.Stream.add t.ir0_queue None; done; for _ = 0 to Task_pool.size - 1 do Eio.Semaphore.acquire t.ir1_of_ir0_workers_batch_release; done; for _ = 0 to Task_pool.size - 1 do Eio.Stream.add t.ir1_queue None; done; for _ = 0 to Task_pool.size - 1 do Eio.Semaphore.acquire t.ir2_of_ir1_workers_batch_release; done; Eio.Stream.add t.ir2_queue None; Dynarray.to_list (Eio.Stream.take t.result) ================================================ FILE: bin/document_pipeline.mli ================================================ type t val make : env:Eio_unix.Stdenv.base -> Docfd_lib.Task_pool.t -> t val feed : t -> Search_mode.t -> doc_hash:string -> string -> unit val run : t -> unit val finalize : t -> Document.t list ================================================ FILE: bin/document_src.ml ================================================ type file_collection = { default_search_mode_files : String_set.t; single_line_search_mode_files : String_set.t; } let seq_of_file_collection (x : file_collection) = Seq.append (String_set.to_seq x.default_search_mode_files) (String_set.to_seq x.single_line_search_mode_files) let file_collection_size (x : file_collection) = String_set.cardinal x.default_search_mode_files + String_set.cardinal x.single_line_search_mode_files let empty_file_collection = { default_search_mode_files = String_set.empty; single_line_search_mode_files = String_set.empty; } type t = | Stdin of string | Files of file_collection ================================================ FILE: bin/dune ================================================ (rule (targets int_map.ml) (deps ../lib/int_map.ml) (action (copy# %{deps} %{targets})) ) (rule (targets int_set.ml) (deps ../lib/int_set.ml) (action (copy# %{deps} %{targets})) ) (rule (targets string_map.ml) (deps ../lib/string_map.ml) (action (copy# %{deps} %{targets})) ) (rule (targets char_map.ml) (deps ../lib/char_map.ml) (action (copy# %{deps} %{targets})) ) (rule (targets string_set.ml) (deps ../lib/string_set.ml) (action (copy# %{deps} %{targets})) ) (rule (targets parser_components.ml) (deps ../lib/parser_components.ml) (action (copy# %{deps} %{targets})) ) (executable (flags (-w "+a-4-9-29-37-40-42-44-48-50-32-30-70@8" -g)) (name docfd) (public_name docfd) (preprocess (pps ppx_deriving.show ppx_deriving.ord)) (libraries docfd_lib containers containers.unix cmdliner fmt notty notty.unix nottui nottui-unix lwd oseq eio eio_main eio_posix digestif.c digestif timedesc timedesc-tzlocal.unix-or-utc re progress diet sqlite3 ) ) ================================================ FILE: bin/file_utils.ml ================================================ open Misc_utils open Debug_utils let extension_of_file (s : string) = Filename.extension s |> String.lowercase_ascii type file_format = [ `PDF | `Pandoc_supported_format | `Text | `Other ] [@@deriving ord] module File_format_set = CCSet.Make (struct type t = file_format let compare = compare_file_format end) let format_of_file (s : string) : file_format = let ext = extension_of_file s in if ext = ".pdf" then ( `PDF ) else if List.mem ext Params.pandoc_supported_exts then ( `Pandoc_supported_format ) else if String_set.mem ext Params.common_text_file_exts || String_set.mem ext Params.common_code_file_exts then ( `Text ) else ( `Other ) type typ = [ | `File | `Dir ] type is_link = [ | `Is_link | `Not_link ] let typ_of_path (path : string) : (typ * is_link) option = try let stat = Unix.lstat path in match stat.st_kind with | S_REG -> Some (`File, `Not_link) | S_DIR -> Some (`Dir, `Not_link) | S_LNK -> ( let stat = Unix.stat path in match stat.st_kind with | S_REG -> Some (`File, `Is_link) | S_DIR -> Some (`Dir, `Is_link) | _ -> None ) | _ -> None with | _ -> None let cwd_with_trailing_sep () = Sys.getcwd () ^ Filename.dir_sep let remove_cwd_from_path (s : string) = let pre = cwd_with_trailing_sep () in match CCString.chop_prefix ~pre s with | None -> s | Some s -> s let read_in_channel_to_tmp_file (ic : in_channel) : (string, string) result = let file = Filename.temp_file "docfd-" ".txt" in try CCIO.with_out file (fun oc -> CCIO.copy_into ic oc ); Ok file with | _ -> ( Error (Fmt.str "failed to write stdin to %s" (Filename.quote file)) ) let next_choices path : string Seq.t = try Sys.readdir path |> Array.to_seq with | _ -> Seq.empty let list_files_recursive ?(max_depth = !Params.max_file_tree_scan_depth) ~(report_progress : unit -> unit) ~(filter : int -> string -> bool) (path : string) : String_set.t = let acc = ref String_set.empty in let add x = acc := String_set.add x !acc in let rec aux depth path = report_progress (); let hidden = String.starts_with ~prefix:"." (Filename.basename path) in if ((hidden && !Params.scan_hidden) || not hidden) && depth <= max_depth then ( match typ_of_path path with | Some (`Dir, _) -> ( next_choices path |> Seq.iter (fun f -> aux (depth + 1) (Filename.concat path f) ) ) | Some (`File, _) -> ( if filter depth path then ( add path ) ) | _ -> () ) in aux 0 (normalize_path_to_absolute path); !acc let list_files_recursive_filter_by_globs ?max_depth ~(report_progress : unit -> unit) (globs : string Seq.t) : String_set.t = let acc = ref String_set.empty in let add x = acc := String_set.add x !acc in let parse_glob ~case_sensitive s = match Glob.parse ~case_sensitive s with | None -> ( failwith (Fmt.str "expected subpath of a valid glob pattern to also be valid: \"%s\"" s) ) | Some x -> x in let rec aux ~case_sensitive (path_parts : string list) (glob_parts : string list) = report_progress (); let path = path_of_parts path_parts in match typ_of_path path, glob_parts with | Some (`File, _), [] -> add path | Some (`File, _), _ -> () | Some (`Dir, _), [] -> () | Some (`Dir, _), x :: xs -> ( match x with | "" | "." -> aux ~case_sensitive path_parts xs | ".." -> ( let path_parts = match path_parts with | [] -> [] | _ :: xs -> xs in aux ~case_sensitive path_parts xs ) | "**" -> ( let glob_string = String.concat Filename.dir_sep (path :: glob_parts) in do_if_debug (fun oc -> Printf.fprintf oc "Compiling glob using pattern: %s\n" glob_string ); let glob = parse_glob ~case_sensitive glob_string in path |> list_files_recursive ?max_depth ~report_progress ~filter:(fun _depth path -> Glob.match_ glob path ) |> String_set.iter (fun path -> do_if_debug (fun oc -> Printf.fprintf oc "Glob %s matches path %s\n" glob_string path ); add path ) ) | _ -> ( let glob = parse_glob ~case_sensitive x in next_choices path |> Seq.iter (fun f -> if Glob.match_ glob f then ( aux ~case_sensitive (f :: path_parts) xs ) ) ) ) | None, _ -> () | exception _ -> () in Seq.iter (fun glob -> let case_sensitive = Glob.parse glob |> Option.get |> Glob.case_sensitive in let glob_parts = CCString.split ~by:Filename.dir_sep glob in match glob_parts with | "" :: l -> ( (* Absolute path on Unix-like systems *) aux ~case_sensitive [ "" ] l ) | _ -> ( aux ~case_sensitive (cwd_path_parts ()) glob_parts ) ) globs; !acc let list_files_recursive_filter_by_exts ?max_depth ~report_progress ~(exts : string list) (paths : string Seq.t) : String_set.t = let filter depth path = let ext = extension_of_file path in depth = 0 || List.mem ext exts in paths |> Seq.map normalize_path_to_absolute |> Seq.map (list_files_recursive ?max_depth ~report_progress ~filter) |> Seq.fold_left String_set.union String_set.empty let mkdir_recursive (dir : string) : unit = let rec aux first acc parts = match parts with | [] -> () | "" :: xs -> ( if first then aux false Filename.dir_sep xs else aux false "" xs ) | x :: xs -> ( let acc = Filename.concat acc x in match Sys.is_directory acc with | true -> aux false acc xs | false -> ( exit_with_error_msg (Fmt.str "%s is not a directory" (Filename.quote acc)) ) | exception (Sys_error _) -> ( do_if_debug (fun oc -> Printf.fprintf oc "Creating directory: %s\n" (Filename.quote acc) ); (try Sys.mkdir acc 0o755 with | _ -> ( exit_with_error_msg (Fmt.str "failed to create directory: %s" (Filename.quote acc)) ) ); aux false acc xs ) ) in aux true "" (CCString.split ~by:Filename.dir_sep dir) let file_size (path : string) : int option = try let st = Unix.stat path in Some st.st_size with | _ -> None ================================================ FILE: bin/filter_exp.ml ================================================ open Docfd_lib type t = | Empty | Path_date of compare_op * Timedesc.Date.t | Path_fuzzy of Search_exp.t | Path_glob of Glob.t | Ext of string | Mod_date of compare_op * Timedesc.Date.t | Content of Search_exp.t | Binary_op of binary_op * t * t | Unary_op of unary_op * t and binary_op = | And | Or and unary_op = | Not and compare_op = | Eq | Le | Ge | Lt | Gt let empty = Empty let is_empty (e : t) = match e with | Empty -> true | _ -> false let equal (e1 : t) (e2 : t) = let rec aux e1 e2 = match e1, e2 with | Empty, Empty -> true | Path_date (op1, x1), Path_date (op2, x2) -> op1 = op2 && Timedesc.Date.equal x1 x2 | Path_fuzzy x, Path_fuzzy y -> Search_exp.equal x y | Path_glob x, Path_glob y -> Glob.equal x y | Ext x, Ext y -> String.equal x y | Content x, Content y -> Search_exp.equal x y | Binary_op (op1, x1, y1), Binary_op (op2, x2, y2) -> op1 = op2 && aux x1 x2 && aux y1 y2 | Unary_op (op1, x1), Unary_op (op2, x2) -> op1 = op2 && aux x1 x2 | _, _ -> false in aux e1 e2 let all_content_search_exps (t : t) : Search_exp.t list = let rec aux t = match t with | Empty | Path_date _ | Path_fuzzy _ | Path_glob _ | Ext _ | Mod_date _ -> [] | Content e -> [e] | Binary_op (_op, e1, e2) -> aux e1 @ aux e2 | Unary_op (_op, e) -> aux e in aux t module Parsers = struct type exp = t open Angstrom open Parser_components let alphanum_symbol_string = take_while1 (fun c -> is_letter c || is_digit c || (match c with | '&' | '|' -> true | _ -> false ) ) let maybe_quoted_string ?(force_quote = false) () = ( (choice [ char '"'; char '\''; ] >>= fun c -> return (Some c) ) <|> (if force_quote then fail "" else return None) ) >>= fun quote_char -> many1 ( take_while1 (fun c -> match c with | '\\' -> false | c -> ( match quote_char with | None -> ( is_not_space c && (match c with | '(' | ')' -> false | _ -> true ) ) | Some quote_char -> c <> quote_char ) ) <|> (char '\\' *> any_char >>| fun c -> Printf.sprintf "%c" c) ) >>= fun l -> let s = String.concat "" l in (end_of_input *> return s) <|> (match quote_char with | None -> return s | Some quote_char -> char quote_char *> return s) let search_exp ?force_quote () = maybe_quoted_string ?force_quote () >>= fun s -> match Search_exp.parse s with | None -> fail "" | Some x -> return x let glob = maybe_quoted_string () >>= fun s -> let s = Misc_utils.normalize_filter_glob_if_not_empty s in match Glob.parse s with | None -> fail "" | Some x -> return x let ext = maybe_quoted_string () >>| fun s -> s |> String.lowercase_ascii |> String_utils.remove_leading_dots |> Fmt.str ".%s" let compare_op = choice [ char '=' *> skip_spaces *> return Eq; string "<=" *> skip_spaces *> return Le; string ">=" *> skip_spaces *> return Ge; char '<' *> skip_spaces *> return Lt; char '>' *> skip_spaces *> return Gt; ] let date = any_string >>= fun s -> match Timedesc.Date.Ymd.of_iso8601 s with | Ok x -> return x | Error _ -> fail "" let compare_date f = let p = compare_op >>= fun op -> date >>| fun date -> f (op, date) in maybe_quoted_string () >>= fun s -> match Angstrom.(parse_string ~consume:Consume.All) p s with | Ok x -> return x | Error s -> fail s let binary_op op_strings op = alphanum_symbol_string >>= fun s -> skip_spaces *> ( if List.mem (String.lowercase_ascii s) op_strings then ( return (fun x y -> Binary_op (op, x, y)) ) else ( fail "" ) ) let and_op = binary_op [ "and" ] And let or_op = binary_op [ "or" ] Or let unary_op op_strings op = alphanum_symbol_string >>= fun s -> skip_spaces *> ( if List.mem (String.lowercase_ascii s) op_strings then ( return (fun x -> Unary_op (op, x)) ) else ( fail "" ) ) let not_op = unary_op [ "not" ] Not let p = skip_spaces *> ( (end_of_input *> return empty) <|> fix (fun (exp : exp Angstrom.t) -> let base = choice [ (search_exp ~force_quote:true () >>| fun x -> Content x); (string "content:" *> search_exp () >>| fun x -> Content x); (string "path-date:" *> compare_date (fun (op, date) -> Path_date (op, date))); (string "path-fuzzy:" *> search_exp () >>| fun x -> Path_fuzzy x); (string "path-glob:" *> glob >>| fun x -> Path_glob x); (string "mod-date:" *> compare_date (fun (op, date) -> Mod_date (op, date))); (string "ext:" *> ext >>| fun x -> Ext x); (char '(' *> skip_spaces *> exp <* char ')'); ] <* skip_spaces in let maybe_neg = (not_op >>= fun f -> skip_spaces *> base >>| f) <|> base in let conj = chainl1 maybe_neg and_op in chainl1 conj or_op ) ) <* skip_spaces end let parse s = match Angstrom.(parse_string ~consume:Consume.All) Parsers.p s with | Ok e -> Some e | Error _ -> None ================================================ FILE: bin/glob.ml ================================================ type t = { case_sensitive : bool; string : string; re : Re.re; } let equal x y = x.case_sensitive = y.case_sensitive && String.equal x.string y.string let case_sensitive t = t.case_sensitive let string t = t.string let is_empty t = String.length t.string = 0 module Parsers = struct open Angstrom type part = [ | `Case_insensitive | `String of string ] let parts : part list Angstrom.t = many ( (take_while1 (fun c -> match c with | '\\' -> false | _ -> true ) >>| fun s -> `String s) <|> (char '\\' *> any_char >>= fun c -> if c = 'c' then return `Case_insensitive else return (`String (Printf.sprintf "%c" c))) ) end let parse ?case_sensitive (s : string) : t option = match Angstrom.(parse_string ~consume:Consume.All) Parsers.parts s with | Error _ -> None | Ok parts -> ( let case_insensitive = ref false in let s = parts |> List.filter_map (fun x -> match x with | `String s -> Some s | `Case_insensitive -> ( case_insensitive := true; None ) ) |> String.concat "" in let case_sensitive = match case_sensitive with | None -> not !case_insensitive | Some x -> x in try let re = s |> (fun s -> if case_sensitive then s else String.lowercase_ascii s) |> Re.Glob.glob ~anchored:true ~pathname:true ~match_backslashes:false ~period:true ~expand_braces:false ~double_asterisk:true |> Re.compile in Some { case_sensitive; string = s; re; } with | _ -> None ) let match_ t (s : string) = is_empty t || Re.execp t.re (if t.case_sensitive then s else String.lowercase_ascii s) ================================================ FILE: bin/glob.mli ================================================ type t val parse : ?case_sensitive:bool -> string -> t option val equal : t -> t -> bool val is_empty : t -> bool val case_sensitive : t -> bool val string : t -> string val match_ : t -> string -> bool ================================================ FILE: bin/lock_protected_cell.ml ================================================ type 'a t = { lock : Eio.Mutex.t; mutable data : 'a option; } let make () = { lock = Eio.Mutex.create (); data = None; } let set (t : 'a t) (x : 'a) = Eio.Mutex.use_rw t.lock ~protect:false (fun () -> t.data <- Some x ) let unset (t : 'a t) = Eio.Mutex.use_rw t.lock ~protect:false (fun () -> t.data <- None ) let get (t : 'a t) : 'a option = Eio.Mutex.use_rw t.lock ~protect:false (fun () -> let x = t.data in t.data <- None; x ) ================================================ FILE: bin/lock_protected_cell.mli ================================================ type 'a t val make : unit -> 'a t val set : 'a t -> 'a -> unit val unset : 'a t -> unit val get : 'a t -> 'a option ================================================ FILE: bin/misc_utils.ml ================================================ open Docfd_lib include Docfd_lib.Misc_utils' let bound_selection ~choice_count (x : int) : int = max 0 (min (choice_count - 1) x) let frequencies_of_words_ci (s : string Seq.t) : int String_map.t = Seq.fold_left (fun m word -> let word = String.lowercase_ascii word in let count = Option.value ~default:0 (String_map.find_opt word m) in String_map.add word (count + 1) m ) String_map.empty s let stdin_is_atty () = Unix.isatty Unix.stdin let stdout_is_atty () = Unix.isatty Unix.stdout let stderr_is_atty () = Unix.isatty Unix.stderr let compute_total_recognized_exts ~exts ~additional_exts = let split_on_comma = String.split_on_char ',' in (split_on_comma exts) :: (List.map split_on_comma additional_exts) |> List.to_seq |> Seq.flat_map List.to_seq |> Seq.map (fun s -> s |> CCString.trim |> String_utils.remove_leading_dots |> String.lowercase_ascii ) |> Seq.filter (fun s -> s <> "") |> Seq.map (fun s -> Printf.sprintf ".%s" s) |> List.of_seq let array_sub_seq : 'a. start:int -> end_exc:int -> 'a array -> 'a Seq.t = fun ~start ~end_exc arr -> let count = Array.length arr in let end_exc = min count end_exc in let rec aux start = if start < end_exc then ( Seq.cons arr.(start) (aux (start + 1)) ) else ( Seq.empty ) in aux start let rotate_list (x : int) (l : 'a list) : 'a list = let arr = Array.of_list l in let len = Array.length arr in Seq.append (array_sub_seq ~start:x ~end_exc:len arr) (array_sub_seq ~start:0 ~end_exc:x arr) |> List.of_seq let drain_eio_stream (x : 'a Eio.Stream.t) = let rec aux () = match Eio.Stream.take_nonblocking x with | None -> () | Some _ -> aux () in aux () let mib_of_bytes (x : int) = (Int.to_float x) /. (1024.0 *. 1024.0) let progress_with_reporter ~interactive bar f = if interactive then ( Progress.with_reporter ~config:(Progress.Config.v ~ppf:Format.std_formatter ()) bar (fun report_progress -> let report_progress = let lock = Eio.Mutex.create () in fun x -> Eio.Mutex.use_rw lock ~protect:false (fun () -> report_progress x ) in f report_progress ) ) else ( f (fun _ -> ()) ) let normalize_filter_glob_if_not_empty (s : string) = if String.length s = 0 then ( s ) else ( normalize_glob_to_absolute s ) let trim_angstrom_error_msg (s : string) = CCString.chop_prefix ~pre:": " s |> Option.value ~default:s let ranking_of_ranked_document_list (l : string list) : int String_map.t = CCList.foldi (fun acc i path -> String_map.add path i acc ) String_map.empty l let fuzzy_rank_assoc (stop_signal : Stop_signal.t) ~(get_key : 'a -> string) (exp : Search_exp.t) (items : 'a Seq.t) : ('a * Search_result.t) Dynarray.t = let pick_best_search_result (s : Search_result.t Seq.t) : Search_result.t option = Seq.fold_left (fun best x -> match best with | None -> Some x | Some best -> ( if Search_result.score x > Search_result.score best then ( Some x ) else ( Some best ) ) ) None s in let search_in_line (line : string) (exp : Search_exp.t) : Search_result.t option = let parts = Tokenization.tokenize ~drop_spaces:false line |> List.of_seq in Search_exp.flattened exp |> List.to_seq |> Seq.flat_map (fun phrase -> Search_phrase.enriched_tokens phrase |> List.to_seq |> Seq.map (fun token -> List.to_seq parts |> Seq.mapi (fun i part -> (i, part) ) |> Seq.filter_map (fun (i, part) -> if Search_phrase.Enriched_token.compatible_with_word token part then ( Some (i, part, String.lowercase_ascii part) ) else ( None ) ) ) |> OSeq.cartesian_product |> Seq.map (fun l -> let found_phrase = List.map (fun (i, part, part_ci) -> Search_result.{ found_word_pos = i; found_word_ci = part_ci; found_word = part; } ) l in Search_result.make phrase ~found_phrase ~found_phrase_opening_closing_symbol_match_count:0 ) ) |> pick_best_search_result in Eio.Fiber.first (fun () -> Stop_signal.await stop_signal; Dynarray.create () ) (fun () -> items |> Seq.fold_left (fun acc item -> let line = get_key item in match search_in_line line exp with | None -> acc | Some best_result -> ( (item, best_result) :: acc ) ) [] |> List.sort (fun (_x0, r0) (_x1, r1) -> Search_result.compare_relevance r0 r1 ) |> Dynarray.of_list ) let highlights_of_search_result (search_result : Search_result.t) = List.fold_left (fun acc (x : Search_result.indexed_found_word) -> Int_set.add x.found_word_pos acc ) Int_set.empty (Search_result.found_phrase search_result) ================================================ FILE: bin/params.ml ================================================ include Docfd_lib.Params' let debug_output : out_channel option ref = ref None let scan_hidden = ref false let default_max_file_tree_scan_depth = 100 let max_file_tree_scan_depth = ref default_max_file_tree_scan_depth let preview_line_count = 5 let default_tokens_per_search_scope_level = 100 let tokens_per_search_scope_level = ref default_tokens_per_search_scope_level let docfd_script_ext = ".docfd-script" let pandoc_supported_exts = [ ".epub" ; ".odt" ; ".docx" ; ".fb2" ; ".ipynb" ; ".html" ; ".htm" ] let common_text_file_exts = String_set.of_list [ ".txt" ; ".md" ; ".markdown" ; ".rst" ; ".adoc" ; ".org" ; ".json" ; ".yaml" ; ".yml" ; ".toml" ; ".xml" ; ".csv" ; ".tsv" ; ".conf" ; ".cfg" ; ".ini" ; ".env" ; ".log" ] let common_code_file_exts = String_set.of_list [ ".sh" ; ".bash" ; ".zsh" ; ".fish" ; ".ksh" ; ".csh" ; ".tcsh" ; ".ps1" ; ".psm1" ; ".psd1" ; ".cmd" ; ".bat" ; ".py" ; ".pyi" ; ".pyw" ; ".ipynb" ; ".pxd" ; ".pxi" ; ".js" ; ".mjs" ; ".cjs" ; ".jsx" ; ".ts" ; ".tsx" ; ".vite" ; ".webpack" ; ".html" ; ".htm" ; ".xhtml" ; ".css" ; ".scss" ; ".sass" ; ".less" ; ".vue" ; ".svelte" ; ".svg" ; ".c" ; ".h" ; ".i" ; ".cpp" ; ".cc" ; ".cxx" ; ".hpp" ; ".hh" ; ".hxx" ; ".m" ; ".mm" ; ".java" ; ".kt" ; ".kts" ; ".scala" ; ".groovy" ; ".clj" ; ".cljs" ; ".cljc" ; ".edn" ; ".go" ; ".rs" ; ".zig" ; ".nim" ; ".nims" ; ".hs" ; ".lhs" ; ".ml" ; ".mli" ; ".re" ; ".rei" ; ".elm" ; ".idr" ; ".agda" ; ".lua" ; ".pl" ; ".pm" ; ".rb" ; ".tcl" ; ".raku" ; ".jl" ; ".r" ; ".cs" ; ".csx" ; ".fs" ; ".fsi" ; ".fsx" ; ".vb" ; ".asm" ; ".s" ; ".S" ; ".ll" ; ".bc" ; ".wat" ; ".wasm" ; ".swift" ; ".storyboard" ; ".xib" ; ".json" ; ".jsonc" ; ".json5" ; ".yaml" ; ".yml" ; ".toml" ; ".xml" ; ".ini" ; ".cfg" ; ".conf" ; ".env" ; ".properties" ; ".sql" ; ".psql" ; ".cql" ; ".gql" ; ".graphql" ; ".mk" ; ".make" ; ".cmake" ; ".gradle" ; ".gradle.kts" ; ".bazel" ; ".bzl" ; ".ninja" ; ".build" ; ".Dockerfile" ; ".dockerignore" ; ".compose" ; ".compose.yml" ; ".tf" ; ".tfvars" ; ".hcl" ; ".ansible" ; ".playbook" ; ".md" ; ".markdown" ; ".rst" ; ".adoc" ; ".org" ; ".tex" ; ".vim" ; ".vimrc" ; ".emacs" ; ".editorconfig" ; ".awk" ; ".sed" ; ".pug" ; ".jade" ; ".haml" ; ".qs" ; ".kql" ] let default_recognized_exts = ([ "txt"; "md"; "pdf" ] @ pandoc_supported_exts ) |> List.map String_utils.remove_leading_dots |> String.concat "," let default_recognized_single_line_exts = [ "log"; "csv"; "tsv" ] |> List.map String_utils.remove_leading_dots |> String.concat "," let default_search_mode : Search_mode.t ref = ref `Multiline let default_sort_by_arg = "score,desc" let default_sort_by_no_score_arg = "path,asc" let index_file_ext = ".index" let db_file_name = "index.db" let hash_chunk_size = 4096 let text_editor = ref "" let default_samples_per_document = 5 let samples_per_document = ref default_samples_per_document type style_mode = [ `Never | `Always | `Auto ] let default_search_result_print_text_width = 80 let search_result_print_text_width = ref default_search_result_print_text_width let default_search_result_print_snippet_min_size = 10 let search_result_print_snippet_min_size = ref default_search_result_print_snippet_min_size let default_search_result_print_snippet_max_additional_lines_each_direction = 2 let search_result_print_snippet_max_additional_lines_each_direction = ref default_search_result_print_snippet_max_additional_lines_each_direction let default_cache_limit = 10_000 let cache_limit = ref default_cache_limit let cache_dir : string option ref = ref None let data_dir : string option ref = ref None let script_dir () = Filename.concat (Option.get !data_dir) "scripts" let tz : Timedesc.Time_zone.t = Option.value ~default:Timedesc.Time_zone.utc (Timedesc.Time_zone.local ()) let last_scan_format_string = "{year}-{mon:0X}-{day:0X} {hour:0X}:{min:0X}:{sec:0X}" ^ (match Timedesc.Time_zone.local () with | None -> "Z" | Some _ -> "") let last_modified_format_string = "{year}-{mon:0X}-{day:0X} {hour:0X}:{min:0X}" ^ (match Timedesc.Time_zone.local () with | None -> "Z" | Some _ -> "") let blink_on_duration : Mtime.span = Mtime.Span.(140 * ms) let session_manager_request_debounce_interval = Mtime.Span.(200 * ms) let session_manager_request_debounce_wait_buffer = Mtime.Span.(5 * ms) let os_typ : [ `Darwin | `Linux ] = let s = CCUnix.call_stdout "uname" |> String.trim |> String.lowercase_ascii in match s with | "darwin" -> `Darwin | _ -> `Linux let clipboard_copy_cmd_and_args = match os_typ with | `Darwin -> Some ("pbcopy", [||]) | `Linux -> ( if Proc_utils.command_exists "clip.exe" then ( Some ("clip.exe", [||]) ) else ( match Sys.getenv_opt "XDG_SESSION_TYPE" with | None -> None | Some s -> ( match String.lowercase_ascii s with | "x11" -> Some ("xclip", [| "-sel"; "clip" |]) | "wayland" -> Some ("wl-copy", [|"-n"|]) | _ -> None ) ) ) ================================================ FILE: bin/path_opening.ml ================================================ open Docfd_lib open Debug_utils type launch_mode = [ `Terminal | `Detached ] type spec = string list * launch_mode * string let specs : (string, launch_mode * string) Hashtbl.t = Hashtbl.create 128 module Parsers = struct open Angstrom open Parser_components let expected_char c = fail (Fmt.str "expected char %c" c) let inner ~path ~page_num ~line_num ~search_word : string t = choice [ string "path" *> commit *> return path; string "page_num" *> commit >>= (fun _ -> match page_num with | None -> fail "page_num not available" | Some n -> return (Fmt.str "%d" n) ); string "line_num" *> commit >>= (fun _ -> match line_num with | None -> fail "line_num not available" | Some n -> return (Fmt.str "%d" n) ); string "search_word" *> commit >>= (fun _ -> match search_word with | None -> fail "search_word not available" | Some s -> return (Fmt.str "'%s'" s)); ] <|> fail "invalid placeholder" let cmd ~path ~page_num ~line_num ~search_word : string t = let single = choice [ (string "{{" >>| fun _ -> Fmt.str "{"); (char '{' *> commit *> inner ~path ~page_num ~line_num ~search_word <* (char '}' <|> expected_char '}')); (take_while1 (function '{' -> false | _ -> true)); ] in many single >>| fun l -> String.concat "" l let spec : spec t = sep_by (char ',') (take_while1 (function ':' | ',' -> false | _ -> true)) >>= fun exts -> (char ':' <|> expected_char ':')*> (choice [ string "terminal" *> return `Terminal; string "detached" *> return `Detached; ] <|> fail "invalid launch mode") >>= fun launch_mode -> (char '=' <|> expected_char '=') *> any_string >>= fun cmd -> return (exts, launch_mode, cmd) end module Config = struct type t = { quote_path : bool; path : string; page_num : int option; line_num : int option; search_word : string option; launch_mode : launch_mode; } let make ?(quote_path = true) ~path ?page_num ?line_num ?search_word ~launch_mode () : t = { quote_path; path; page_num; line_num; search_word; launch_mode; } end let resolve_cmd (config : Config.t) (s : string) : (string, string) result = let open Angstrom in let { Config.quote_path; path; page_num; line_num; search_word } = config in let path = if quote_path then Filename.quote path else path in match parse_string ~consume:All (Parsers.cmd ~path ~page_num ~line_num ~search_word) s with | Error msg -> Error (Misc_utils.trim_angstrom_error_msg msg) | Ok s -> Ok s let parse_spec (s : string) : (spec, string) result = let open Angstrom in match parse_string ~consume:All Parsers.spec s with | Error msg -> Error (Misc_utils.trim_angstrom_error_msg msg) | Ok (exts', launch_mode, cmd) -> ( let rec aux acc exts = match exts with | [] -> Ok (List.rev acc, launch_mode, cmd) | ext :: rest -> ( let ext = ext |> String.lowercase_ascii |> String_utils.remove_leading_dots |> Fmt.str ".%s" in let config = if ext = ".pdf" then ( Config.make ~path:"path" ~page_num:1 ~search_word:"word" ~launch_mode:`Detached () ) else ( Config.make ~path:"path" ~line_num:1 ~launch_mode:`Terminal () ) in match resolve_cmd config cmd with | Error msg -> Error msg | Ok _ -> aux (ext :: acc) rest ) in aux [] exts' ) let xdg_open_cmd = "xdg-open {path}" let pandoc_supported_format_config_and_cmd ~path = (Config.make ~path ~launch_mode:`Detached (), xdg_open_cmd) let fallback_cmd : string = match Params.os_typ with | `Linux -> xdg_open_cmd | `Darwin -> "open {path}" let compute_most_unique_word_and_residing_page_num ~doc_id found_phrase = let page_nums = found_phrase |> List.map (fun word -> word.Search_result.found_word_pos |> (fun pos -> Index.loc_of_pos ~doc_id pos) |> Index.Loc.line_loc |> Index.Line_loc.page_num ) |> List.sort_uniq Int.compare in let frequency_of_word_of_page_ci : int String_map.t Int_map.t = List.fold_left (fun acc page_num -> let m = Misc_utils.frequencies_of_words_ci (Index.words_of_page_num ~doc_id page_num |> Dynarray.to_seq) in Int_map.add page_num m acc ) Int_map.empty page_nums in found_phrase |> List.map (fun word -> let page_num = Index.loc_of_pos ~doc_id word.Search_result.found_word_pos |> Index.Loc.line_loc |> Index.Line_loc.page_num in let m = Int_map.find page_num frequency_of_word_of_page_ci in let freq = String_map.fold (fun word_on_page_ci freq acc_freq -> if CCString.find ~sub:word.Search_result.found_word_ci word_on_page_ci >= 0 then ( acc_freq + freq ) else ( acc_freq ) ) m 0 in (word, page_num, freq) ) |> List.fold_left (fun best x -> let (_x_word, _x_page_num, x_freq) = x in match best with | None -> Some x | Some (_best_word, _best_page_num, best_freq) -> ( if x_freq < best_freq then Some x else best ) ) None |> Option.get |> (fun (word, page_num, _freq) -> (word.found_word, page_num)) let pdf_config_and_cmd ~path ~doc_id_and_search_result : Config.t * string = let config = let page_num, search_word = match doc_id_and_search_result with | None -> ( (1, "") ) | Some (doc_id, search_result) -> ( let found_phrase = Search_result.found_phrase search_result in let (most_unique_word, most_unique_word_page_num) = compute_most_unique_word_and_residing_page_num ~doc_id found_phrase in let page_num = most_unique_word_page_num + 1 in (page_num, most_unique_word) ) in Config.make ~path ~page_num ~search_word ~launch_mode:`Detached () in let cmd = match Params.os_typ with | `Linux -> ( match Xdg_utils.default_desktop_file_path `PDF with | None -> fallback_cmd | Some viewer_desktop_file_path -> ( let flatpak_package_name = let s = Filename.basename viewer_desktop_file_path in Option.value ~default:s (CCString.chop_suffix ~suf:".desktop" s) in let viewer_desktop_file_path_lowercase_ascii = String.lowercase_ascii viewer_desktop_file_path in let contains sub = CCString.find ~sub viewer_desktop_file_path_lowercase_ascii >= 0 in let make_command name args = if contains "flatpak" then Fmt.str "flatpak run %s %s" flatpak_package_name args else Fmt.str "%s %s" name args in match doc_id_and_search_result with | None -> fallback_cmd | Some _ -> ( if contains "okular" then make_command "okular" "--page {page_num} --find {search_word} {path}" else if contains "evince" then make_command "evince" "--page-index {page_num} --find {search_word} {path}" else if contains "xreader" then make_command "xreader" "--page-index {page_num} --find {search_word} {path}" else if contains "atril" then make_command "atril" "--page-index {page_num} --find {search_word} {path}" else if contains "zathura" then (* Check zathura before mupdf as desktop file for zathura might be `org.pwmt.zathura-pdf-mupdf.desktop` *) make_command "zathura" "--page {page_num} --find {search_word} {path}" else if contains "mupdf" then make_command "mupdf" "{path} {page_num}" else fallback_cmd ) ) ) | `Darwin -> fallback_cmd in (config, cmd) let config_and_cmd_to_open_text_file ~path ?(line_num = 1) () : Config.t * string = let editor = !Params.text_editor in let config = Config.make ~path ~line_num ~launch_mode:`Terminal () in let cmd = match Filename.basename editor with | "nano" -> Fmt.str "%s +{line_num} {path}" editor | "nvim" | "vim" | "vi" -> Fmt.str "%s +{line_num} {path}" editor | "kak" -> Fmt.str "%s +{line_num} {path}" editor | "hx" -> Fmt.str "%s {path}:{line_num}" editor | "emacs" -> Fmt.str "%s +{line_num} {path}" editor | "micro" -> Fmt.str "%s {path}:{line_num}" editor | "jed" | "xjed" -> Fmt.str "%s {path} -g {line_num}" editor | _ -> Fmt.str "%s {path}" editor in (config, cmd) let text_config_and_cmd ~path ~doc_id_and_search_result : Config.t * string = let line_num = match doc_id_and_search_result with | None -> None | Some (doc_id, search_result) -> ( let first_word = List.hd @@ Search_result.found_phrase search_result in let first_word_loc = Index.loc_of_pos ~doc_id first_word.Search_result.found_word_pos in first_word_loc |> Index.Loc.line_loc |> Index.Line_loc.line_num_in_page |> (fun x -> x + 1) |> Option.some ) in config_and_cmd_to_open_text_file ~path ?line_num () let main ~close_term ~path ~doc_id_and_search_result = let ext = File_utils.extension_of_file path in let config, cmd = (match File_utils.format_of_file path with | `PDF -> ( pdf_config_and_cmd ~path ~doc_id_and_search_result ) | `Pandoc_supported_format -> ( pandoc_supported_format_config_and_cmd ~path ) | `Text -> ( text_config_and_cmd ~path ~doc_id_and_search_result ) | `Other -> ( (Config.make ~path ~launch_mode:`Detached (), fallback_cmd) ) ) |> (fun (config, cmd) -> match Hashtbl.find_opt specs ext with | None -> ( (config, cmd) ) | Some (launch_mode, cmd) -> ( ({ config with launch_mode }, cmd) ) ) |> (fun (config, cmd) -> (config, Result.get_ok (resolve_cmd config cmd))) in match config.launch_mode with | `Terminal -> ( let cmd = if Misc_utils.stdin_is_atty () then ( cmd ) else ( Fmt.str " Printf.fprintf oc "System command: %s\n" cmd ); Sys.command cmd |> ignore ) | `Detached -> ( do_if_debug (fun oc -> Printf.fprintf oc "System command: %s\n" cmd ); Proc_utils.run_in_background cmd |> ignore ) let find_project_root path = let rec aux arr = let cur = CCString.concat_seq ~sep:Filename.dir_sep (Dynarray.to_seq arr) in if Dynarray.length arr = 0 then ( None ) else if Dynarray.length arr = 3 && Dynarray.get arr 0 = "home" then ( Some cur ) else ( let candidates = try Some (Sys.readdir cur) with | _ -> None in match candidates with | None -> ( None ) | Some candidates -> ( let root_indicator_exists = Array.exists (fun name -> List.mem name [ ".git" ; ".hg" ; ".svn" ; ".obsidian" ; ".logseq" ; ".tangent" ] ) candidates in if root_indicator_exists then ( Some cur ) else ( Dynarray.pop_last arr |> ignore; aux arr ) ) ) in let arr = Dynarray.of_list (CCString.split ~by:Filename.dir_sep path) in aux arr let open_link ~close_term ~doc link = let { Link.typ; link; _ } = link in let doc_path = Document.path doc in let doc_dir = Filename.dirname doc_path in let doc_ext = Filename.extension doc_path in let resolve_wiki_link link = let link = Option.value ~default:link (CCString.chop_prefix ~pre:"/" link) in let link_with_ext = link ^ doc_ext in if link.[0] = '.' then ( Filename.concat doc_dir link_with_ext ) else ( let wiki_root = Option.value ~default:doc_dir (find_project_root doc_dir) in let candidates = File_utils.list_files_recursive ~report_progress:(fun () -> ()) ~filter:(fun _depth path -> let path_no_ext = try Filename.chop_extension path with | _ -> path in CCString.suffix ~suf:link path_no_ext ) wiki_root in match String_set.find_first_opt (fun path -> CCString.suffix ~suf:link_with_ext path ) candidates with | Some x -> x | None -> ( match String_set.min_elt_opt candidates with | Some x -> x | None -> Filename.concat wiki_root link_with_ext ) ) in if String.length link > 0 then ( match typ with | `Markdown -> ( let path = if Filename.is_relative link then ( Filename.concat doc_dir link ) else ( let project_root = Option.value ~default:doc_dir (find_project_root doc_dir) in Filename.concat project_root link ) in main ~close_term ~path ~doc_id_and_search_result:None ) | `Wiki -> ( let path = resolve_wiki_link link in main ~close_term ~path ~doc_id_and_search_result:None ) | `URL -> ( let config = Config.make ~path:link ~launch_mode:`Detached () in resolve_cmd config fallback_cmd |> Result.get_ok |> Proc_utils.run_in_background |> ignore ) ) ================================================ FILE: bin/ping.ml ================================================ type t = { queue : unit Eio.Stream.t; } let make () = { queue = Eio.Stream.create Int.max_int; } let ping (t : t) = Eio.Stream.add t.queue () let wait (t : t) = Eio.Stream.take t.queue; Misc_utils.drain_eio_stream t.queue let clear (t : t) = Misc_utils.drain_eio_stream t.queue ================================================ FILE: bin/ping.mli ================================================ type t val make : unit -> t val ping : t -> unit val wait : t -> unit val clear : t -> unit ================================================ FILE: bin/printers.ml ================================================ let output_image ~color (oc : out_channel) (img : Notty.image) : unit = let open Notty in let buf = Buffer.create 1024 in let cap = if color then Cap.ansi else Cap.dumb in Render.to_buffer buf cap (0, 0) (I.width img, I.height img) img; Buffer.output_buffer oc buf let newline_image oc = Notty_unix.eol (Notty.I.void 0 1) |> output_image ~color:false oc let path_image ~color oc path = Notty.I.string Notty.A.(fg magenta) path |> Notty_unix.eol |> output_image ~color oc let search_result_group ~color ~underline (oc : out_channel) ((document, results) : Session.search_result_group) = let path = Document.path document in path_image ~color oc path; Array.iteri (fun i search_result -> if i > 0 then ( newline_image oc ); let img = Content_and_search_result_rendering.search_result ~doc_id:(Document.doc_id document) ~render_mode:(UI_base.render_mode_of_document document) ~width:!Params.search_result_print_text_width ~underline ~fill_in_context:true search_result in Notty_unix.eol img |> output_image ~color oc; ) results let search_result_groups ~color ~underline (oc : out_channel) (s : Session.search_result_group Seq.t) = Seq.iteri (fun i x -> if i > 0 then ( newline_image oc; ); search_result_group ~color ~underline oc x ) s ================================================ FILE: bin/proc_utils.ml ================================================ open Misc_utils let command_exists (cmd : string) : bool = Sys.command (Fmt.str "command -v %s 2>/dev/null 1>/dev/null" (Filename.quote cmd)) = 0 let run_in_background (cmd : string) = Sys.command (Fmt.str "%s 2>/dev/null 1>/dev/null &" cmd) let run_return_stdout ~proc_mgr ~fs ~(split_mode : [ `On_line_split | `On_form_feed ]) (cmd : string list) : string list option = let form_feed = Char.chr 0x0C in Eio.Path.(with_open_out ~create:`Never (fs / "/dev/null")) (fun stderr -> let output = try let lines = Eio.Process.parse_out proc_mgr (match split_mode with | `On_line_split -> Eio.Buf_read.(map List.of_seq lines) | `On_form_feed -> ( let p = let open Eio.Buf_read in let open Syntax in let* c = peek_char in (match c with | None -> return () | Some c -> ( if c = form_feed then ( skip 1 ) else ( return () ) )) *> (take_while (fun c -> c <> form_feed)) in Eio.Buf_read.(map List.of_seq (seq p)) ) ) ~stderr cmd in Some lines with | _ -> None in output ) let pipe_to_command (f : out_channel -> unit) command args = if not (command_exists command) then ( exit_with_error_msg (Fmt.str "command %s not found" command) ); let oc = Unix.open_process_args_out command (Array.append [|command|] args) in f oc; Out_channel.flush oc; Out_channel.close oc ================================================ FILE: bin/result_syntax.ml ================================================ let ( let* ) = Result.bind let ( let+ ) x y = Result.map y x ================================================ FILE: bin/script.ml ================================================ let run pool ~init_state ~path : (Session.Snapshot.t Dynarray.t, string) result = let exception Error_with_msg of string in let snapshots = Dynarray.create () in try let lines = try CCIO.with_in path CCIO.read_lines_l with | Sys_error _ -> ( raise (Error_with_msg (Fmt.str "failed to read script %s" (Filename.quote path))) ) in Dynarray.add_last snapshots (Session.Snapshot.make ~last_command:None init_state); lines |> CCList.foldi (fun state i line -> let line_num_in_error_msg = i + 1 in if String_utils.line_is_blank_or_system_comment line then ( state ) else ( match Command.of_string line with | None -> ( raise (Error_with_msg (Fmt.str "failed to parse command on line %d: %s" line_num_in_error_msg line)) ) | Some command -> ( match Session.run_command pool command state with | None -> ( raise (Error_with_msg (Fmt.str "failed to run command on line %d: %s" line_num_in_error_msg line)) ) | Some (command, state) -> ( let snapshot = Session.Snapshot.make ~last_command:(Some command) state in Dynarray.add_last snapshots snapshot; state ) ) ) ) init_state |> ignore; Ok snapshots with | Error_with_msg msg -> Error msg ================================================ FILE: bin/search_mode.ml ================================================ type t = [ | `Single_line | `Multiline ] ================================================ FILE: bin/session.ml ================================================ open Docfd_lib type search_result_group = Document.t * Search_result.t array module State = struct module Sort_by = struct type typ = [ | `Path_date | `Path | `Score | `Mod_time | `Ranking of int String_map.t ] type t = typ * Document.Compare.order end type t = { all_documents : Document.t String_map.t; filter_exp : Filter_exp.t; filter_exp_string : string; documents_passing_filter : String_set.t; documents_marked : String_set.t; search_exp : Search_exp.t; search_exp_string : string; search_results : Search_result.t array String_map.t; sort_by : Sort_by.t; sort_by_no_score : Sort_by.t; screen_split : Command.screen_split; show_bottom_right_pane : bool; show_key_binding_info_pane : bool; focus_list : string list; path_highlights : Int_set.t String_map.t; } let equal (x : t) (y : t) = String_map.equal Document.equal x.all_documents y.all_documents && String.equal x.filter_exp_string y.filter_exp_string && String_set.equal x.documents_passing_filter y.documents_passing_filter && String_set.equal x.documents_marked y.documents_marked && String.equal x.search_exp_string y.search_exp_string && String_map.equal (Array.for_all2 Search_result.equal) x.search_results y.search_results let size (t : t) = String_map.cardinal t.all_documents let empty : t = { all_documents = String_map.empty; filter_exp = Filter_exp.empty; filter_exp_string = ""; documents_passing_filter = String_set.empty; documents_marked = String_set.empty; search_exp = Search_exp.empty; search_exp_string = ""; search_results = String_map.empty; sort_by = Command.Sort_by.default |> (fun (typ, order) -> ((typ :> Sort_by.typ), order)); sort_by_no_score = Command.Sort_by.default_no_score |> (fun (typ, order) -> ((typ :> Sort_by.typ), order)); screen_split = `Even; show_bottom_right_pane = true; show_key_binding_info_pane = true; focus_list = []; path_highlights = String_map.empty; } let filter_exp (t : t) = t.filter_exp let filter_exp_string (t : t) = t.filter_exp_string let search_exp (t : t) = t.search_exp let search_exp_string (t : t) = t.search_exp_string let path_highlights (t : t) = t.path_highlights let clear_path_highlights (t : t) = { t with path_highlights = String_map.empty } let screen_split (t : t) = t.screen_split let show_pane (t : t) (pane : Command.pane) = match pane with | `Bottom_right -> t.show_bottom_right_pane | `Key_binding_info -> t.show_key_binding_info_pane let refresh_search_results pool stop_signal (t : t) : t option = let cancellation_notifier = Atomic.make false in let updates = Eio.Fiber.first (fun () -> Stop_signal.await stop_signal; Atomic.set cancellation_notifier true; String_map.empty) (fun () -> let global_first_word_candidates_lookup = Index.generate_global_first_word_candidates_lookup pool t.search_exp in let usable_doc_ids = let bv = CCBV.empty () in Search_phrase.Enriched_token.Data_map.iter (fun _word word_ids -> Int_set.iter (fun word_id -> Index.State.union_doc_ids_of_word_id_into_bv ~word_id ~into:bv ) word_ids ) global_first_word_candidates_lookup; bv in let documents_to_search_through = t.documents_passing_filter |> String_set.to_seq |> Seq.map (fun path -> (path, String_map.find path t.all_documents)) |> Seq.filter (fun (path, doc) -> Option.is_none (String_map.find_opt path t.search_results) && CCBV.get usable_doc_ids (Int64.to_int @@ Document.doc_id doc) ) |> List.of_seq in documents_to_search_through |> Task_pool.map_list pool (fun (path, doc) -> let within_same_line = match Document.search_mode doc with | `Single_line -> true | `Multiline -> false in Index.make_search_job_groups stop_signal ~cancellation_notifier ~doc_id:(Document.doc_id doc) ~doc_word_ids:(Document.word_ids doc) ~global_first_word_candidates_lookup ~within_same_line ~search_scope:(Document.search_scope doc) t.search_exp |> Seq.map (fun x -> (path, x)) |> List.of_seq ) |> List.concat |> Task_pool.map_list pool (fun (path, search_job_group) -> let heap = Index.Search_job_group.run search_job_group in (path, heap) ) |> List.fold_left (fun acc (path, heap) -> Eio.Fiber.yield (); let heap = String_map.find_opt path acc |> Option.value ~default:Search_result_heap.empty |> Search_result_heap.merge heap in String_map.add path heap acc ) String_map.empty |> String_map.map (fun v -> Eio.Fiber.yield (); let arr = Search_result_heap.to_seq v |> Array.of_seq in Array.sort Search_result.compare_relevance arr; arr ) ) in if Atomic.get cancellation_notifier then ( None ) else ( let search_results = String_map.union (fun _k v1 _v2 -> Some v1) updates t.search_results in Some { t with search_results } ) let update_filter_exp pool stop_signal filter_exp_string filter_exp (t : t) : t option = if Filter_exp.equal filter_exp t.filter_exp then ( Some { t with filter_exp_string } ) else ( let cancellation_notifier = Atomic.make false in let documents_passing_filter = Eio.Fiber.first (fun () -> Stop_signal.await stop_signal; Atomic.set cancellation_notifier true; String_set.empty ) (fun () -> let global_first_word_candidates_lookup = Filter_exp.all_content_search_exps filter_exp |> List.fold_left (fun acc search_exp -> Index.generate_global_first_word_candidates_lookup pool ~acc search_exp ) Search_phrase.Enriched_token.Data_map.empty in t.all_documents |> String_map.to_seq |> Seq.map snd |> (fun s -> if Filter_exp.is_empty filter_exp then ( s ) else ( Seq.filter (fun s -> Eio.Fiber.yield (); Document.satisfies_filter_exp pool stop_signal ~global_first_word_candidates_lookup filter_exp s ) s ) ) |> Seq.map Document.path |> String_set.of_seq ) in if Atomic.get cancellation_notifier then ( None ) else ( { t with filter_exp_string; filter_exp; documents_passing_filter; } |> refresh_search_results pool stop_signal ) ) let update_search_exp pool stop_signal search_exp_string search_exp (t : t) : t option = if Search_exp.equal search_exp t.search_exp then ( Some { t with search_exp_string } ) else ( { t with search_exp; search_exp_string; search_results = String_map.empty; } |> refresh_search_results pool stop_signal ) let add_document pool (doc : Document.t) (t : t) : t = let within_same_line = match Document.search_mode doc with | `Single_line -> true | `Multiline -> false in let path = Document.path doc in let documents_passing_filter = let global_first_word_candidates_lookup = Filter_exp.all_content_search_exps t.filter_exp |> List.fold_left (fun acc search_exp -> Index.generate_global_first_word_candidates_lookup pool ~acc search_exp ) Search_phrase.Enriched_token.Data_map.empty in if Document.satisfies_filter_exp pool (Stop_signal.make ()) ~global_first_word_candidates_lookup t.filter_exp doc then String_set.add path t.documents_passing_filter else t.documents_passing_filter in let search_results = let global_first_word_candidates_lookup = Index.generate_global_first_word_candidates_lookup pool t.search_exp in String_map.add path (Index.search pool (Stop_signal.make ()) ~doc_id:(Document.doc_id doc) ~doc_word_ids:(Document.word_ids doc) ~global_first_word_candidates_lookup ~within_same_line ~search_scope:(Document.search_scope doc) t.search_exp |> Option.get ) t.search_results in { t with all_documents = String_map.add path doc t.all_documents; documents_passing_filter; search_results; } let of_seq pool (s : Document.t Seq.t) = Seq.fold_left (fun t doc -> add_document pool doc t ) empty s module Compare_search_result_group = struct let mod_time order (d0, _s0) (d1, _s1) = Document.Compare.mod_time order d0 d1 let path_date order (d0, _s0) (d1, _s1) = Document.Compare.path_date order d0 d1 let path order (d0, _s0) (d1, _s1) = Document.Compare.path order d0 d1 let ranking ranking order (d0, _s0) (d1, _s1) = match String_map.find_opt (Document.path d0) ranking, String_map.find_opt (Document.path d1) ranking with | None, None -> Document.Compare.path order d0 d1 | None, Some _ -> ( (* Always shuffle document with no ranking to the back. *) 1 ) | Some _, None -> ( (* Always shuffle document with no ranking to the back. *) -1 ) | Some x0, Some x1 -> ( match order with | `Asc -> Int.compare x0 x1 | `Desc -> Int.compare x1 x0 ) let score order (_d0, s0) (_d1, s1) = assert (Array.length s0 > 0); assert (Array.length s1 > 0); (* Search_result.compare_relevance puts the more relevant result to the front, so we flip the comparison here to obtain an ordering of "lowest score" first to match the usual definition of "sort by score in ascending order". *) match order with | `Asc -> Search_result.compare_relevance s1.(0) s0.(0) | `Desc -> Search_result.compare_relevance s0.(0) s1.(0) end let search_result_groups (t : t) : (Document.t * Search_result.t array) array = let no_search_exp = Search_exp.is_empty t.search_exp in let arr = t.documents_passing_filter |> String_set.to_seq |> Seq.map (fun path -> (path, String_map.find path t.all_documents) ) |> (fun s -> if no_search_exp then ( Seq.map (fun (_path, doc) -> (doc, [||])) s ) else ( Seq.filter_map (fun (path, doc) -> match String_map.find_opt path t.search_results with | None -> None | Some search_results -> ( if Array.length search_results = 0 then None else Some (doc, search_results) ) ) s ) ) |> Array.of_seq in let rec f (sort_by_typ, sort_by_order) = match sort_by_typ with | `Path_date -> Compare_search_result_group.path_date sort_by_order | `Mod_time -> Compare_search_result_group.mod_time sort_by_order | `Path -> Compare_search_result_group.path sort_by_order | `Score -> ( if no_search_exp then ( f t.sort_by_no_score ) else ( Compare_search_result_group.score sort_by_order ) ) | `Ranking ranking -> ( Compare_search_result_group.ranking ranking sort_by_order ) in Array.sort (f t.sort_by) arr; let focus_ranking = List.rev t.focus_list |> CCList.foldi (fun ranking i x -> String_map.add x i ranking) String_map.empty in Array.stable_sort (fun (d0, _) (d1, _) -> match String_map.find_opt (Document.path d0) focus_ranking, String_map.find_opt (Document.path d1) focus_ranking with | Some x0, Some x1 -> Int.compare x0 x1 | Some _, None -> -1 | None, Some _ -> 1 | None, None -> 0 ) arr; arr let usable_document_paths (t : t) : String_set.t = search_result_groups t |> Array.to_seq |> Seq.map (fun (doc, _) -> Document.path doc) |> String_set.of_seq let marked_document_paths (t : t) = t.documents_marked let all_document_paths (t : t) : string Seq.t = t.all_documents |> String_map.to_seq |> Seq.map fst let unusable_documents (t : t) = let s = usable_document_paths t in t.all_documents |> String_map.to_seq |> Seq.filter (fun (path, _doc) -> not (String_set.mem path s)) |> Seq.map snd let unusable_document_paths (t : t) = unusable_documents t |> Seq.map Document.path let mark (choice : [ `Path of string | `Usable | `Unusable ]) t : t = match choice with | `Path path -> ( match String_map.find_opt path t.all_documents with | None -> t | Some _ -> ( let documents_marked = String_set.add path t.documents_marked in { t with documents_marked } ) ) | `Usable -> ( let documents_marked = String_set.union t.documents_marked (usable_document_paths t) in { t with documents_marked } ) | `Unusable -> ( let documents_marked = Seq.fold_left (fun acc x -> String_set.add x acc) t.documents_marked (unusable_document_paths t) in { t with documents_marked } ) let unmark (choice : [ `Path of string | `Usable | `Unusable | `All ]) t : t = match choice with | `Path path -> ( let documents_marked = String_set.remove path t.documents_marked in { t with documents_marked } ) | `Usable -> ( let documents_marked = String_set.diff t.documents_marked (usable_document_paths t) in { t with documents_marked } ) | `Unusable -> ( let documents_marked = Seq.fold_left (fun acc x -> String_set.remove x acc) t.documents_marked (unusable_document_paths t) in { t with documents_marked } ) | `All -> ( { t with documents_marked = String_set.empty } ) let drop (choice : [ `Path of string | `All_except of string | `Marked | `Unmarked | `Usable | `Unusable ]) (t : t) : t = let aux ~(keep : string -> bool) = let keep' : 'a. string -> 'a -> bool = fun path _ -> keep path in { all_documents = String_map.filter keep' t.all_documents; filter_exp = t.filter_exp; filter_exp_string = t.filter_exp_string; documents_passing_filter = String_set.filter keep t.documents_passing_filter; documents_marked = String_set.filter keep t.documents_marked; search_exp = t.search_exp; search_exp_string = t.search_exp_string; search_results = String_map.filter keep' t.search_results; sort_by = t.sort_by; sort_by_no_score = t.sort_by_no_score; screen_split = t.screen_split; show_bottom_right_pane = t.show_bottom_right_pane; show_key_binding_info_pane = t.show_key_binding_info_pane; focus_list = t.focus_list; path_highlights = t.path_highlights; } in match choice with | `Path path -> ( { all_documents = String_map.remove path t.all_documents; filter_exp = t.filter_exp; filter_exp_string = t.filter_exp_string; documents_passing_filter = String_set.remove path t.documents_passing_filter; documents_marked = String_set.remove path t.documents_marked; search_exp = t.search_exp; search_exp_string = t.search_exp_string; search_results = String_map.remove path t.search_results; sort_by = t.sort_by; sort_by_no_score = t.sort_by_no_score; screen_split = t.screen_split; show_bottom_right_pane = t.show_bottom_right_pane; show_key_binding_info_pane = t.show_key_binding_info_pane; focus_list = t.focus_list; path_highlights = t.path_highlights; } ) | `All_except path -> ( let keep path' = String.equal path' path in aux ~keep ) | `Marked -> ( let keep path = not (String_set.mem path t.documents_marked) in aux ~keep ) | `Unmarked -> ( let keep path = String_set.mem path t.documents_marked in aux ~keep ) | `Usable | `Unusable -> ( let usable_document_paths = usable_document_paths t in let document_is_usable path = String_set.mem path usable_document_paths in let keep path = match choice with | `Usable -> not (document_is_usable path) | `Unusable -> document_is_usable path | _ -> failwith "unexpected case" in aux ~keep ) let update_path_fuzzy_ranking stop_signal exp (t : t) : t option = let cancellation_notifier = Atomic.make false in let ranking, path_highlights = Eio.Fiber.first (fun () -> Stop_signal.await stop_signal; Atomic.set cancellation_notifier true; (String_map.empty, String_map.empty) ) (fun () -> let l = usable_document_paths t |> String_set.to_seq |> Seq.map File_utils.remove_cwd_from_path |> Misc_utils.fuzzy_rank_assoc (Stop_signal.make ()) ~get_key:Fun.id exp |> Dynarray.to_list |> List.map (fun (path, x) -> (Misc_utils.normalize_path_to_absolute path, x)) in let ranking = List.map fst l |> Misc_utils.ranking_of_ranked_document_list in let highlights = List.fold_left (fun acc (path, search_result) -> String_map.add path (Misc_utils.highlights_of_search_result search_result) acc ) String_map.empty l in (ranking, highlights) ) in if Atomic.get cancellation_notifier then ( None ) else ( let sort_by = (`Ranking ranking, `Asc) in Some { t with sort_by; sort_by_no_score = sort_by; path_highlights; } ) let narrow_search_scope_to_level ~level (t : t) : t = let all_documents = if level = 0 then ( String_map.mapi (fun _path doc -> Document.reset_search_scope_to_full doc ) t.all_documents ) else ( String_map.mapi (fun path doc -> let doc_id = Document.doc_id doc in let search_scope = match String_map.find_opt path t.search_results with | None -> Diet.Int.empty | Some search_results -> ( if String_set.mem path t.documents_passing_filter then ( Array.fold_left (fun scope search_result -> let s, e = List.fold_left (fun s_e Search_result.{ found_word_pos; _ } -> match s_e with | None -> Some (found_word_pos, found_word_pos) | Some (s, e) -> ( Some (min s found_word_pos, max found_word_pos e) ) ) None (Search_result.found_phrase search_result) |> Option.get in let offset = level * !Params.tokens_per_search_scope_level in let s, e = (max 0 (s - offset), min (Index.max_pos ~doc_id) (e + offset)) in Diet.Int.add (Diet.Int.Interval.make s e) scope ) Diet.Int.empty search_results ) else ( Diet.Int.empty ) ) in Document.inter_search_scope search_scope doc ) t.all_documents ) in { t with all_documents } end let run_command pool (command : Command.t) (st : State.t) : (Command.t * State.t) option = let open State in let reset_focus_list st = { st with focus_list = [] } in match command with | `Mark path -> ( Some (command, mark (`Path path) st) ) | `Mark_listed -> ( Some (command, mark `Usable st) ) | `Unmark path -> ( Some (command, unmark (`Path path) st) ) | `Unmark_listed -> ( Some (command, unmark `Usable st) ) | `Unmark_all -> ( Some (command, unmark `All st) ) | `Drop s -> ( Some (command, drop (`Path s) st) ) | `Drop_all_except s -> ( Some (command, drop (`All_except s) st) ) | `Drop_marked -> ( Some (command, drop `Marked st) ) | `Drop_unmarked -> ( Some (command, drop `Unmarked st) ) | `Drop_listed -> ( Some (command, drop `Usable st) ) | `Drop_unlisted -> ( Some (command, drop `Unusable st) ) | `Narrow_level level -> ( Some (command, narrow_search_scope_to_level ~level st) ) | `Focus path -> ( let focus_list = path :: st.focus_list in Some (command, { st with focus_list }) ) | `Sort (sort_by, sort_by_no_score) -> ( let st = reset_focus_list st in let sort_by = sort_by |> (fun (typ, order) -> ((typ :> Sort_by.typ), order)) in let sort_by_no_score = sort_by_no_score |> (fun (typ, order) -> ((typ :> Sort_by.typ), order)) in Some (command, { st with sort_by; sort_by_no_score }) ) | `Path_fuzzy_rank (s, ranking) -> ( let st = reset_focus_list st in match Search_exp.parse s with | None -> None | Some exp -> ( update_path_fuzzy_ranking (Stop_signal.make ()) exp st |> Option.map (fun state -> let command = `Path_fuzzy_rank (s, ranking) in (command, state) ) ) ) | `Split_screen screen_split -> ( Some (command, { st with screen_split }) ) | `Hide_pane pane -> ( let st = match pane with | `Bottom_right -> { st with show_bottom_right_pane = false } | `Key_binding_info -> { st with show_key_binding_info_pane = false } in Some (command, st) ) | `Show_pane pane -> ( let st = match pane with | `Bottom_right -> { st with show_bottom_right_pane = true } | `Key_binding_info -> { st with show_key_binding_info_pane = true } in Some (command, st) ) | `Comment _ -> ( Some (command, st) ) | `Search s -> ( match Search_exp.parse s with | None -> None | Some search_exp -> ( update_search_exp pool (Stop_signal.make ()) s search_exp st |> Option.map (fun state -> (command, state)) ) ) | `Filter s -> ( match Filter_exp.parse s with | None -> None | Some exp -> ( update_filter_exp pool (Stop_signal.make ()) s exp st |> Option.map (fun state -> (command, state)) ) ) module Snapshot = struct let counter = ref 0 type t = { last_command : Command.t option; state : State.t; committed : bool; id : int; } let committed t = t.committed let last_command t = t.last_command let state t = t.state let id t = t.id let equal_id x y = id x = id y let make ?(committed = true) ~last_command state : t = let id = !counter in counter := id + 1; { last_command; state; id; committed } let make_empty ?committed () = make ?committed ~last_command:None State.empty let update_state state t = { t with state } end ================================================ FILE: bin/session.mli ================================================ open Docfd_lib type search_result_group = Document.t * Search_result.t array module State : sig type t val equal : t -> t -> bool val size : t -> int val empty : t val update_filter_exp : Task_pool.t -> Stop_signal.t -> string -> Filter_exp.t -> t -> t option val update_search_exp : Task_pool.t -> Stop_signal.t -> string -> Search_exp.t -> t -> t option val filter_exp : t -> Filter_exp.t val filter_exp_string : t -> string val search_exp : t -> Search_exp.t val search_exp_string : t -> string val path_highlights : t -> Int_set.t String_map.t val clear_path_highlights : t -> t val add_document : Task_pool.t -> Document.t -> t -> t val of_seq : Task_pool.t -> Document.t Seq.t -> t val search_result_groups : t -> search_result_group array val usable_document_paths : t -> String_set.t val unusable_documents : t -> Document.t Seq.t val unusable_document_paths : t -> string Seq.t val all_document_paths : t -> string Seq.t val marked_document_paths : t -> String_set.t val mark : [ `Path of string | `Usable | `Unusable ] -> t -> t val unmark : [ `Path of string | `Usable | `Unusable | `All ] -> t -> t val drop : [ `Path of string | `All_except of string | `Marked | `Unmarked | `Usable | `Unusable ] -> t -> t val update_path_fuzzy_ranking : Stop_signal.t -> Search_exp.t -> t -> t option val narrow_search_scope_to_level : level:int -> t -> t val screen_split : t -> Command.screen_split val show_pane : t -> Command.pane -> bool end val run_command : Task_pool.t -> Command.t -> State.t -> (Command.t * State.t) option module Snapshot : sig type t val committed : t -> bool val last_command : t -> Command.t option val state : t -> State.t val id : t -> int val equal_id : t -> t -> bool val make : ?committed:bool -> last_command:Command.t option -> State.t -> t val make_empty : ?committed:bool -> unit -> t val update_state : State.t -> t -> t end ================================================ FILE: bin/session_manager.ml ================================================ open Docfd_lib let last_request_timestamp : Mtime.t Atomic.t = Atomic.make (Mtime_clock.now ()) let search_request : (bool * string) Lock_protected_cell.t = Lock_protected_cell.make () let filter_request : (bool * string) Lock_protected_cell.t = Lock_protected_cell.make () let version_shift_request : int Lock_protected_cell.t = Lock_protected_cell.make () let path_fuzzy_rank_request : (bool * string) Lock_protected_cell.t = Lock_protected_cell.make () let worker_ping : Ping.t = Ping.make () let _requester_lock = Eio.Mutex.create () let lock_as_requester : type a. (unit -> a) -> a = fun f -> Eio.Mutex.use_rw ~protect:false _requester_lock f (* let requester_ping : Ping.t = Ping.make () *) type egress_payload = | Search_exp_parse_error | Searching | Search_cancelled | Search_done of int * Session.Snapshot.t | Filter_parse_error | Filtering | Filtering_cancelled | Filtering_done of int * Session.Snapshot.t | Path_fuzzy_rank_done of int * Session.Snapshot.t * bool let egress : egress_payload Eio.Stream.t = Eio.Stream.create 0 let egress_ack : unit Eio.Stream.t = Eio.Stream.create 0 let stop_filter_signal = Atomic.make (Stop_signal.make ()) let stop_search_signal = Atomic.make (Stop_signal.make ()) let stop_path_fuzzy_rank_signal = Atomic.make (Stop_signal.make ()) let stop_filter () = let x = Atomic.exchange stop_filter_signal (Stop_signal.make ()) in Stop_signal.broadcast x let stop_search () = let x = Atomic.exchange stop_search_signal (Stop_signal.make ()) in Stop_signal.broadcast x let stop_path_fuzzy_rank () = let x = Atomic.exchange stop_path_fuzzy_rank_signal (Stop_signal.make ()) in Stop_signal.broadcast x let _worker_state_lock = Eio.Mutex.create () let lock_worker_state : type a. (unit -> a) -> a = fun f -> Eio.Mutex.use_rw ~protect:false _worker_state_lock f let init_state : Session.State.t ref = ref Session.State.empty let snapshots = let arr = Dynarray.create () in Dynarray.add_last arr (Session.Snapshot.make_empty ()); arr let cur_ver = ref 0 let cur_snapshot_var = Lwd.var (0, Session.Snapshot.make_empty ()) let cur_snapshot = Lwd.get cur_snapshot_var type view = { init_state : Session.State.t; snapshots : Session.Snapshot.t Dynarray.t; cur_ver : int; } let sync_input_fields_from_snapshot (x : Session.Snapshot.t) = let state = Session.Snapshot.state x in Session.State.filter_exp_string state |> (fun s -> Lwd.set UI_base.Vars.filter_field (s, String.length s)); Session.State.search_exp_string state |> (fun s -> Lwd.set UI_base.Vars.search_field (s, String.length s)) let lock_for_external_editing ~clean_up f = (* This blocks further requests from being made. *) lock_as_requester (fun () -> (* We try to get worker to finish ASAP. *) stop_filter (); stop_search (); (* Locking worker also locks the manager, as egress_ack forces lock-step progression of the system. *) lock_worker_state (fun () -> (* Clear any outstanding requests. *) Lock_protected_cell.unset filter_request; Lock_protected_cell.unset search_request; let result = f () in if clean_up then ( Lwd.set UI_base.Vars.search_ui_status `Idle; Lwd.set UI_base.Vars.filter_ui_status `Idle; let snapshot = Dynarray.get snapshots !cur_ver in Lwd.set cur_snapshot_var (!cur_ver, snapshot); sync_input_fields_from_snapshot snapshot; ); result ) ) let lock_with_view : type a. (view -> a) -> a = fun f -> lock_for_external_editing ~clean_up:false (fun () -> f { init_state = !init_state; snapshots = Dynarray.copy snapshots; cur_ver = !cur_ver; } ) let update_starting_state (starting_state : Session.State.t) = lock_for_external_editing ~clean_up:true (fun () -> let pool = UI_base.task_pool () in init_state := starting_state; let starting_snapshot = Session.Snapshot.make ~last_command:None starting_state in Dynarray.set snapshots 0 starting_snapshot; for i=1 to Dynarray.length snapshots - 1 do let prev = Dynarray.get snapshots (i - 1) in let prev_state = Session.Snapshot.state prev in let cur = Dynarray.get snapshots i in let state = match Session.Snapshot.last_command cur with | None -> prev_state | Some command -> Session.run_command pool command prev_state |> Option.map snd |> Option.value ~default:prev_state in Dynarray.set snapshots i (Session.Snapshot.update_state state cur) done; cur_ver := (Dynarray.length snapshots - 1); ) let load_snapshots snapshots' = assert (Dynarray.length snapshots' > 0); lock_for_external_editing ~clean_up:true (fun () -> assert (Session.State.equal (Session.Snapshot.state @@ Dynarray.get snapshots' 0) !init_state); Dynarray.clear snapshots; Dynarray.append snapshots snapshots'; cur_ver := (Dynarray.length snapshots - 1); ) let stop_filter_and_search_and_restore_input_fields () = lock_for_external_editing ~clean_up:true (fun () -> () ) let shift_ver ~offset = lock_for_external_editing ~clean_up:true (fun () -> let new_ver = !cur_ver + offset in if 0 <= new_ver && new_ver < Dynarray.length snapshots then ( cur_ver := new_ver; ) ) let update_from_cur_snapshot f = lock_for_external_editing ~clean_up:true (fun () -> Dynarray.truncate snapshots (!cur_ver + 1); let next_snapshot = f (Dynarray.get_last snapshots) in Dynarray.add_last snapshots next_snapshot; cur_ver := Dynarray.length snapshots - 1; ) let manager_fiber () = (* This fiber handles updates of Lwd.var which are not thread-safe, and thus cannot be done by worker_fiber directly *) let update_snapshot ?(reset_document_selected = true) ver snapshot = if reset_document_selected then ( UI_base.reset_document_selected (); ); Lwd.set cur_snapshot_var (ver, snapshot); in while true do let payload = Eio.Stream.take egress in (match payload with | Search_exp_parse_error -> ( Lwd.set UI_base.Vars.search_ui_status `Parse_error ) | Searching -> ( Lwd.set UI_base.Vars.search_ui_status `Searching ) | Filtering -> ( Lwd.set UI_base.Vars.filter_ui_status `Filtering ) | Search_cancelled -> ( ) | Search_done (ver, snapshot) -> ( update_snapshot ver snapshot; Lwd.set UI_base.Vars.search_ui_status `Idle ) | Filter_parse_error -> ( Lwd.set UI_base.Vars.filter_ui_status `Parse_error ) | Filtering_cancelled -> ( ) | Filtering_done (ver, snapshot) -> ( update_snapshot ver snapshot; Lwd.set UI_base.Vars.filter_ui_status `Idle ) | Path_fuzzy_rank_done (ver, snapshot, commit) -> ( let snapshot = if commit then ( let state = Session.Snapshot.state snapshot |> Session.State.clear_path_highlights in Session.Snapshot.update_state state snapshot ) else ( snapshot ) in update_snapshot ~reset_document_selected:false ver snapshot; ) ); Eio.Stream.add egress_ack (); done let worker_fiber pool = (* This fiber runs in a background domain to allow the UI code in the main domain to immediately continue running after key presses that trigger searches or search cancellations. This removes the need to make the code of session module always yield frequently. *) let get_cur_snapshot () = Dynarray.get snapshots !cur_ver in let add_snapshot ?(overwrite_if_last_snapshot_satisfies = fun _ -> false) snapshot = Dynarray.truncate snapshots (!cur_ver + 1); let last_snapshot = Dynarray.get_last snapshots in if !cur_ver > 0 && overwrite_if_last_snapshot_satisfies last_snapshot then ( Dynarray.set snapshots !cur_ver snapshot; ) else ( Dynarray.add_last snapshots snapshot; incr cur_ver; ); in let send_to_manager x = Eio.Stream.add egress x; Eio.Stream.take egress_ack; in let cancelled_search_request : (bool * string) option ref = ref None in let process_search_req stop_signal ~commit (s : string) = cancelled_search_request := None; match Search_exp.parse s with | None -> ( send_to_manager Search_exp_parse_error ) | Some search_exp -> ( send_to_manager Searching; let state = get_cur_snapshot () |> Session.Snapshot.state |> Session.State.update_search_exp pool stop_signal s search_exp in match state with | None -> ( send_to_manager Search_cancelled; cancelled_search_request := Some (commit, s); ) | Some state -> ( let command = Some (`Search s) in let snapshot = Session.Snapshot.make ~committed:commit ~last_command:command state in add_snapshot ~overwrite_if_last_snapshot_satisfies:(fun snapshot -> match Session.Snapshot.last_command snapshot with | Some (`Search s') -> ( not (Session.Snapshot.committed snapshot) || s' = s ) | _ -> false ) snapshot; send_to_manager (Search_done (!cur_ver, snapshot)) ) ) in let process_filter_req stop_signal ~commit (s : string) = match Filter_exp.parse s with | Some filter_exp -> ( send_to_manager Filtering; let state = get_cur_snapshot () |> Session.Snapshot.state |> Session.State.update_filter_exp pool stop_signal s filter_exp in match state with | None -> ( send_to_manager Filtering_cancelled ) | Some state -> ( let command = Some (`Filter s) in let snapshot = Session.Snapshot.make ~committed:commit ~last_command:command state in add_snapshot ~overwrite_if_last_snapshot_satisfies:(fun snapshot -> match Session.Snapshot.last_command snapshot with | Some (`Filter s') -> ( not (Session.Snapshot.committed snapshot) || s' = s ) | _ -> false ) snapshot; send_to_manager (Filtering_done (!cur_ver, snapshot)) ) ) | None -> ( send_to_manager Filter_parse_error ) in let process_path_fuzzy_rank_req stop_signal ~commit (s : string) = match Search_exp.parse s with | None -> () | Some exp -> ( let state = get_cur_snapshot () |> Session.Snapshot.state |> Session.State.update_path_fuzzy_ranking stop_signal exp in match state with | None -> ( ) | Some state -> ( let command = Some (`Path_fuzzy_rank (s, None)) in let snapshot = Session.Snapshot.make ~committed:commit ~last_command:command state in add_snapshot ~overwrite_if_last_snapshot_satisfies:(fun snapshot -> match Session.Snapshot.last_command snapshot with | Some (`Path_fuzzy_rank (s', _)) -> ( not (Session.Snapshot.committed snapshot) || s' = s ) | _ -> false ) snapshot; send_to_manager (Path_fuzzy_rank_done (!cur_ver, snapshot, commit)) ) ) in while true do Ping.wait worker_ping; let time_since_last_request = Mtime.span (Atomic.get last_request_timestamp) (Mtime_clock.now ()) in if Mtime.Span.is_shorter time_since_last_request ~than:Params.session_manager_request_debounce_interval then ( let clock = Eio.Stdenv.mono_clock (UI_base.eio_env ()) in let sleep_duration_s = Mtime.Span.abs_diff time_since_last_request Params.session_manager_request_debounce_interval |> Mtime.Span.add Params.session_manager_request_debounce_wait_buffer |> Mtime.Span.to_float_ns |> (fun x -> x /. 1_000_000_000.0) in Eio.Time.Mono.sleep clock sleep_duration_s; Ping.ping worker_ping ) else ( lock_worker_state (fun () -> (match Lock_protected_cell.get filter_request with | None -> () | Some (commit, s) -> ( process_filter_req (Atomic.get stop_filter_signal) ~commit s ) ); (match Lock_protected_cell.get search_request with | None -> !cancelled_search_request | Some (commit, s) -> Some (commit, s) ) |> Option.iter (fun (commit, s) -> process_search_req (Atomic.get stop_search_signal) ~commit s ); Lock_protected_cell.get path_fuzzy_rank_request |> Option.iter (fun (commit, s) -> process_path_fuzzy_rank_req (Atomic.get stop_path_fuzzy_rank_signal) ~commit s ); ) ) done let submit_filter_req ~commit (s : string) = lock_as_requester (fun () -> stop_filter (); stop_search (); Lock_protected_cell.set filter_request (commit, s); Ping.ping worker_ping ) let submit_search_req ~commit (s : string) = lock_as_requester (fun () -> stop_search (); Lock_protected_cell.set search_request (commit, s); Ping.ping worker_ping ) let submit_path_fuzzy_rank_req ~commit (s : string) = lock_as_requester (fun () -> stop_path_fuzzy_rank (); Lock_protected_cell.set path_fuzzy_rank_request (commit, s); Ping.ping worker_ping ) ================================================ FILE: bin/session_manager.mli ================================================ open Docfd_lib val manager_fiber : unit -> unit val worker_fiber : Task_pool.t -> unit val cur_snapshot : (int * Session.Snapshot.t) Lwd.t type view = { init_state : Session.State.t; snapshots : Session.Snapshot.t Dynarray.t; cur_ver : int; } val lock_with_view : (view -> 'a) -> 'a val update_starting_state : Session.State.t -> unit val load_snapshots : Session.Snapshot.t Dynarray.t -> unit val shift_ver : offset:int -> unit val update_from_cur_snapshot : (Session.Snapshot.t -> Session.Snapshot.t) -> unit val submit_filter_req : commit:bool -> string -> unit val submit_search_req : commit:bool -> string -> unit val submit_path_fuzzy_rank_req : commit:bool -> string -> unit val stop_filter_and_search_and_restore_input_fields : unit -> unit ================================================ FILE: bin/string_utils.ml ================================================ let remove_leading_dots (s : string) = let str_len = String.length s in if str_len = 0 then ( "" ) else ( let rec aux pos = if pos < str_len then ( if String.get s pos = '.' then aux (pos + 1) else ( String.sub s pos (str_len - pos) ) ) else ( "" ) in aux 0 ) let line_is_system_comment line = CCString.starts_with ~prefix:";" line let line_is_blank_or_system_comment line = line_is_system_comment line || String.length (String.trim line) = 0 let longest_common_prefix (seq : string Seq.t) : string = let prefix = ref "" in Seq.iteri (fun i s -> if i = 0 then ( prefix := s ) else ( let match_len = ref 0 in let prefix_len = String.length !prefix in String.iteri (fun i c -> if !match_len = i && i < prefix_len && !prefix.[i] = c then ( incr match_len ) ) s; prefix := String.sub !prefix 0 (min !match_len prefix_len) ) ) seq; !prefix ================================================ FILE: bin/version_string.ml ================================================ let s = "13.0.0" ================================================ FILE: bin/xdg_utils.ml ================================================ let all_desktop_files () : string Seq.t = match Sys.getenv_opt "XDG_DATA_DIRS" with | None -> Seq.empty | Some s -> ( String.split_on_char ':' s |> List.to_seq |> Seq.flat_map (fun dir -> let dir = Filename.concat dir "applications" in try Sys.readdir dir |> Array.to_seq |> Seq.map (Filename.concat dir) with | _ -> Seq.empty ) ) let path_of_desktop_file file = let rec aux paths = match paths () with | Seq.Nil -> None | Seq.Cons (path, rest) -> ( if String.equal file (Filename.basename path) then ( Some path ) else ( aux rest ) ) in aux (all_desktop_files ()) let default_desktop_file_path (typ : [ `PDF ]) = let mime_typ = match typ with | `PDF -> "application/pdf" (* | `ODT -> "application/vnd.oasis.opendocument.text" | `DOCX -> "application/vnd.openxmlformats-officedocument.wordprocessingml.document" *) in let (stdout, _, ret) = CCUnix.call "xdg-mime query default %s" mime_typ in if ret = 0 then ( path_of_desktop_file (CCString.trim stdout) ) else ( None ) let data_home = let home_dir = match Sys.getenv_opt "HOME" with | None -> ( Misc_utils.exit_with_error_msg "environment variable HOME is not set"; ) | Some home -> home in match Params.os_typ with | `Linux -> ( match Sys.getenv_opt "XDG_DATA_HOME" with | None -> Filename.concat home_dir ".local/share" | Some x -> x ) | `Darwin -> ( Filename.concat home_dir (Filename.concat "Library" "Application Support") ) let cache_home = let home_dir = match Sys.getenv_opt "HOME" with | None -> ( Misc_utils.exit_with_error_msg "environment variable HOME is not set"; ) | Some home -> home in match Params.os_typ with | `Linux -> ( match Sys.getenv_opt "XDG_CACHE_HOME" with | None -> Filename.concat home_dir ".cache" | Some x -> x ) | `Darwin -> ( Filename.concat home_dir (Filename.concat "Library" "Caches") ) ================================================ FILE: containers/Containerfile.demo-vhs ================================================ FROM ghcr.io/charmbracelet/vhs RUN apt-get update RUN apt-get install -y poppler-utils RUN apt-get install -y neovim ================================================ FILE: containers/Containerfile.docfd ================================================ FROM docker.io/alpine:3.22 USER root RUN apk add linux-headers RUN apk add poppler-utils RUN apk add sqlite sqlite-libs sqlite-dev sqlite-static RUN apk add sqlite-analyzer RUN apk add make RUN apk add clang RUN apk add opam RUN apk add git RUN apk add python3 # RUN ln -s $(which opam-2.2) /usr/local/bin/opam RUN opam --version RUN apk add bash RUN opam init --disable-sandboxing RUN opam install --yes dune RUN opam install --yes utop ocp-indent COPY . /root/docfd # RUN chown -R root:root /home/opam/docfd WORKDIR /root/docfd RUN eval $(opam env) && dune build docfd.opam RUN opam install --yes . --deps-only --with-test RUN echo 'eval $(opam env)' >> /root/.bash_profile ================================================ FILE: demo-vhs-tapes/repo-non-interactive.tape ================================================ Output demo-vhs-gifs/repo-non-interactive.gif Set Padding 0 Set Framerate 10 Set Width 1366 Set Height 768 Set FontSize 15 Set TypingSpeed 100ms Type@50ms 'docfd README.md --sample "interactive grep across lines"' Enter Sleep 4s ================================================ FILE: demo-vhs-tapes/repo.tape ================================================ Output demo-vhs-gifs/repo.gif Set Padding 0 Set Framerate 10 Set Width 1366 Set Height 768 Set FontSize 15 Set TypingSpeed 100ms Type@50ms "docfd *.md" Sleep 1s Enter Sleep 1s Type "/" Type "fuzz search" Enter Sleep 1s Type "f" Type "path-fuzzy:readme" Enter Sleep 1s Enter Sleep 1s Type "zz" Sleep 1s Type "O" Type@50ms "Docfd opens the editor to where the search result is when we hit Enter." Escape Sleep 2s Type@200ms ":q!" Enter Sleep 4s ================================================ FILE: demo-vhs-tapes/ui-screenshot.tape ================================================ Output dummy.gif Set Padding 0 Set Framerate 10 Set Width 1366 Set Height 768 Set FontSize 15 Set TypingSpeed 100ms Type@10ms "docfd *.md" Enter Sleep 1s Screenshot screenshots/ui0.png Sleep 1s ================================================ FILE: demo-vhs.sh ================================================ #!/usr/bin/env bash podman run --rm -v $PWD:/vhs \ --env 'VISUAL=nvim' \ -v $PWD/release/docfd:/usr/bin/docfd \ localhost/docfd-demo-vhs \ "$@" ================================================ FILE: docfd.opam ================================================ # This file is generated by dune, edit dune-project instead opam-version: "2.0" synopsis: "TUI multiline fuzzy document finder" description: """ Think interactive grep for text files, PDFs, DOCXs, etc, but word/token based instead of regex and line based, so you can search across lines easily. Docfd aims to provide good UX via integration with common text editors and PDF viewers, so you can jump directly to a search result with a single key press. Features: - Multithreaded indexing and searching - Multiline fuzzy search of multiple files or a single file - Swap between multi-file view and single file view on the fly - Content view pane that shows the snippet surrounding the search result selected - Text editor and PDF viewer integration """ maintainer: ["Darren Li"] authors: ["Darren Li"] license: "MIT" tags: ["fuzzy" "document" "finder"] homepage: "https://github.com/darrenldl/docfd" doc: "https://github.com/darrenldl/docfd" bug-reports: "https://github.com/darrenldl/docfd/issues" depends: [ "ocaml" {>= "5.2"} "dune" {>= "3.4"} "fmt" {>= "0.9.0"} "angstrom" {>= "0.15.0"} "containers" {>= "3.12"} "oseq" "spelll" "notty-community" "nottui" {>= "0.4"} "nottui-unix" {>= "0.4"} "lwd" "cmdliner" {>= "2.0.0"} "eio" {>= "0.14"} "digestif" "eio_main" {>= "1.3"} "containers-data" "timedesc" {>= "3.1.0"} "re" {>= "1.11.0"} "ppx_deriving" {>= "5.0"} "decompress" "progress" {>= "0.5.0"} "diet" "sqlite3" "uuseg" "uucp" "alcotest" {with-test} "qcheck-alcotest" {with-test} "qcheck" {with-test} "odoc" {with-doc} ] dev-repo: "git+https://github.com/darrenldl/docfd.git" build: [ ["dune" "subst"] {dev} [ "dune" "build" "-p" name "-j" jobs "@install" "@doc" {with-doc} ] ] ================================================ FILE: docfd.opam.locked ================================================ opam-version: "2.0" name: "docfd" version: "3.0.0" synopsis: "TUI multiline fuzzy document finder" maintainer: "Darren Li" authors: "Darren Li" license: "MIT" tags: ["fuzzy" "document" "finder"] homepage: "https://github.com/darrenldl/docfd" doc: "https://github.com/darrenldl/docfd" bug-reports: "https://github.com/darrenldl/docfd/issues" depends: [ "alcotest" {= "1.8.0" & with-test} "angstrom" {= "0.16.1"} "astring" {= "0.8.5" & with-test} "base-bigarray" {= "base"} "base-bytes" {= "base"} "base-domains" {= "base"} "base-nnp" {= "base"} "base-threads" {= "base"} "base-unix" {= "base"} "bigstringaf" {= "0.10.0"} "checkseum" {= "0.5.2"} "cmdliner" {= "1.3.0"} "conf-pkg-config" {= "3"} "conf-sqlite3" {= "1"} "containers" {= "3.15"} "containers-data" {= "3.15"} "cppo" {= "1.8.0"} "csexp" {= "1.5.2"} "cstruct" {= "6.2.0"} "decompress" {= "1.5.3"} "diet" {= "0.4"} "digestif" {= "1.2.0"} "domain-local-await" {= "1.0.1"} "dune" {= "3.17.1"} "dune-configurator" {= "3.17.1"} "eio" {= "1.2"} "eio_linux" {= "1.2"} "eio_main" {= "1.2"} "eio_posix" {= "1.2"} "either" {= "1.0.0"} "eqaf" {= "0.10"} "fmt" {= "0.9.0"} "hmap" {= "0.8.1"} "host-arch-x86_64" {= "1"} "host-system-other" {= "1"} "iomux" {= "0.3"} "logs" {= "0.7.0"} "lwd" {= "0.3"} "lwt" {= "5.9.0"} "lwt-dllist" {= "1.0.1"} "mtime" {= "2.1.0"} "nottui" {= "0.3"} "notty" {= "0.2.3"} "ocaml" {= "5.2.1"} "ocaml-base-compiler" {= "5.2.1"} "ocaml-compiler-libs" {= "v0.17.0"} "ocaml-config" {= "3"} "ocaml-options-vanilla" {= "1"} "ocaml-syntax-shims" {= "1.0.0"} "ocamlbuild" {= "0.15.0"} "ocamlfind" {= "1.9.6"} "ocplib-endian" {= "1.2"} "optint" {= "0.3.0"} "oseq" {= "0.5.1"} "ounit2" {= "2.2.7" & with-test} "ppx_derivers" {= "1.2.1"} "ppx_deriving" {= "6.0.3"} "ppxlib" {= "0.33.0"} "progress" {= "0.4.0"} "psq" {= "0.2.1"} "ptime" {= "1.2.0"} "qcheck" {= "0.23" & with-test} "qcheck-alcotest" {= "0.23" & with-test} "qcheck-core" {= "0.23" & with-test} "qcheck-ounit" {= "0.23" & with-test} "re" {= "1.12.0"} "result" {= "1.5"} "seq" {= "base"} "sexplib0" {= "v0.17.0"} "spelll" {= "0.4"} "sqlite3" {= "5.2.0"} "stdlib-shims" {= "0.3.0"} "terminal" {= "0.4.0"} "thread-table" {= "1.0.0"} "timedesc" {= "3.1.0"} "timedesc-tzdb" {= "3.1.0"} "timedesc-tzlocal" {= "3.1.0"} "topkg" {= "1.0.7"} "uring" {= "0.9"} "uucp" {= "16.0.0"} "uuseg" {= "16.0.0"} "uutf" {= "1.0.3"} "vector" {= "1.0.0"} ] build: [ ["dune" "subst"] {dev} ["dune" "build" "-p" name "-j" jobs "@install" "@doc" {with-doc}] ] dev-repo: "git+https://github.com/darrenldl/docfd.git" pin-depends: [ [ "nottui.0.3" "git+https://github.com/let-def/lwd.git#a337a778001e6c1dbaed7e758c9e05f300abd388" ] [ "notty.0.2.3" "git+https://github.com/ocaml-dune/notty.git#b6e1036c61521be3b1f4d585895ac598bdf4ab8d" ] [ "ocaml-base-compiler.5.2.1" "https://github.com/ocaml/ocaml/releases/download/5.2.1/ocaml-5.2.1.tar.gz" ] ] description: """\ Think interactive grep for text files, PDFs, DOCXs, etc, but word/token based instead of regex and line based, so you can search across lines easily. Docfd aims to provide good UX via integration with common text editors and PDF viewers, so you can jump directly to a search result with a single key press. Features: - Multithreaded indexing and searching - Multiline fuzzy search of multiple files or a single file - Swap between multi-file view and single file view on the fly - Content view pane that shows the snippet surrounding the search result selected - Text editor and PDF viewer integration""" ================================================ FILE: docfd.opam.template ================================================ build: [ ["dune" "subst"] {dev} [ "dune" "build" "-p" name "-j" jobs "@install" "@doc" {with-doc} ] ] ================================================ FILE: dune-project ================================================ (lang dune 3.4) (name docfd) (generate_opam_files true) (source (github darrenldl/docfd)) (authors "Darren Li") (maintainers "Darren Li") (license MIT) (package (name docfd) (synopsis "TUI multiline fuzzy document finder") (description " Think interactive grep for text files, PDFs, DOCXs, etc, but word/token based instead of regex and line based, so you can search across lines easily. Docfd aims to provide good UX via integration with common text editors and PDF viewers, so you can jump directly to a search result with a single key press. Features: - Multithreaded indexing and searching - Multiline fuzzy search of multiple files or a single file - Swap between multi-file view and single file view on the fly - Content view pane that shows the snippet surrounding the search result selected - Text editor and PDF viewer integration ") (documentation https://github.com/darrenldl/docfd) (depends (ocaml (>= "5.2")) dune (fmt (>= "0.9.0")) (angstrom (>= "0.15.0")) (containers (>= "3.12")) oseq spelll notty-community (nottui (>= "0.4")) (nottui-unix (>= "0.4")) lwd (cmdliner (>= "2.0.0")) (eio (>= "0.14")) digestif (eio_main (>= "1.3")) containers-data (timedesc (>= "3.1.0")) (re (>= "1.11.0")) (ppx_deriving (>= "5.0")) decompress (progress (>= "0.5.0")) diet sqlite3 uuseg uucp (alcotest :with-test) (qcheck-alcotest :with-test) (qcheck :with-test) ) (tags ("fuzzy" "document" "finder" )) ) ================================================ FILE: file-collection-tests.t/dune ================================================ (cram (deps ../bin/docfd.exe)) ================================================ FILE: file-collection-tests.t/run.t ================================================ Setup: $ touch no-ext $ touch empty-paths.txt $ echo "test.txt" >> paths $ echo "test-symlink.txt" >> paths $ echo "test0" >> paths $ echo "test1/ijkl" >> paths $ echo "test2/" >> paths $ echo "test3/" >> paths $ echo "test.log" >> single-path0.txt $ echo "test.txt" >> single-path1.txt $ touch test.ext0 $ touch test.log $ touch test.md $ touch test.txt $ mkdir test0 $ touch test0/1234.md $ touch test0/abcd.txt $ mkdir test0/abcd $ touch test0/abcd/efgh.md $ touch test0/abcd/efgh.txt $ mkdir test1 $ touch test1/5678.md $ touch test1/ijkl.txt $ mkdir test1/ijkl $ touch test1/ijkl/mnop.md $ touch test1/ijkl/mnop.txt $ mkdir test2 $ touch test2/1234.md $ mkdir test2/abcd $ touch test2/abcd/efgh.md $ ln -s $(pwd)/test0/abcd/efgh.txt test2/abcd/efgh.txt $ ln -s ../test1/5678.md test2/56.md $ ln -s ../test1/ijkl test2/ijkl $ ln -s test2 test3 $ ln -s test.txt test-symlink.txt $ mkdir miXeD-CaSe $ touch miXeD-CaSe/AbCd.md $ tree . |-- dune -> ../../../../default/file-collection-tests.t/dune |-- empty-paths.txt |-- miXeD-CaSe | `-- AbCd.md |-- no-ext |-- paths |-- single-path0.txt |-- single-path1.txt |-- test-symlink.txt -> test.txt |-- test.ext0 |-- test.log |-- test.md |-- test.txt |-- test0 | |-- 1234.md | |-- abcd | | |-- efgh.md | | `-- efgh.txt | `-- abcd.txt |-- test1 | |-- 5678.md | |-- ijkl | | |-- mnop.md | | `-- mnop.txt | `-- ijkl.txt |-- test2 | |-- 1234.md | |-- 56.md -> ../test1/5678.md | |-- abcd | | |-- efgh.md | | `-- efgh.txt -> $TESTCASE_ROOT/test0/abcd/efgh.txt | `-- ijkl -> ../test1/ijkl `-- test3 -> test2 7 directories, 26 files Basic invocation for reference: $ docfd --debug-log - --cache-dir .cache --index-only . 2>&1 | grep '^Using .* search mode' | sort Using multiline search mode for document '$TESTCASE_ROOT/empty-paths.txt' Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md' Using multiline search mode for document '$TESTCASE_ROOT/single-path0.txt' Using multiline search mode for document '$TESTCASE_ROOT/single-path1.txt' Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt' Using multiline search mode for document '$TESTCASE_ROOT/test.md' Using multiline search mode for document '$TESTCASE_ROOT/test.txt' Using multiline search mode for document '$TESTCASE_ROOT/test0/1234.md' Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd.txt' Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.md' Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.txt' Using multiline search mode for document '$TESTCASE_ROOT/test1/5678.md' Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl.txt' Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.md' Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.txt' Using multiline search mode for document '$TESTCASE_ROOT/test2/1234.md' Using multiline search mode for document '$TESTCASE_ROOT/test2/56.md' Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.md' Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.txt' Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.md' Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.txt' Using multiline search mode for document '$TESTCASE_ROOT/test3/1234.md' Using multiline search mode for document '$TESTCASE_ROOT/test3/56.md' Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.md' Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.txt' Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.md' Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.txt' Using single line search mode for document '$TESTCASE_ROOT/test.log' --max-depth 0: $ docfd --debug-log - --cache-dir .cache --index-only --max-depth 0 . 2>&1 | grep '^Using .* search mode' | sort $ docfd --debug-log - --cache-dir .cache --index-only --max-depth 0 test.txt 2>&1 | grep '^Using .* search mode' | sort Using multiline search mode for document '$TESTCASE_ROOT/test.txt' --max-depth 1: $ docfd --debug-log - --cache-dir .cache --index-only --max-depth 1 . 2>&1 | grep '^Using .* search mode' | sort Using multiline search mode for document '$TESTCASE_ROOT/empty-paths.txt' Using multiline search mode for document '$TESTCASE_ROOT/single-path0.txt' Using multiline search mode for document '$TESTCASE_ROOT/single-path1.txt' Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt' Using multiline search mode for document '$TESTCASE_ROOT/test.md' Using multiline search mode for document '$TESTCASE_ROOT/test.txt' Using single line search mode for document '$TESTCASE_ROOT/test.log' $ docfd --debug-log - --cache-dir .cache --index-only --max-depth 1 --glob '**/*.md' 2>&1 | grep '^Using .* search mode' | sort Using multiline search mode for document '$TESTCASE_ROOT/test.md' --max-depth 2: $ docfd --debug-log - --cache-dir .cache --index-only --max-depth 2 . 2>&1 | grep '^Using .* search mode' | sort Using multiline search mode for document '$TESTCASE_ROOT/empty-paths.txt' Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md' Using multiline search mode for document '$TESTCASE_ROOT/single-path0.txt' Using multiline search mode for document '$TESTCASE_ROOT/single-path1.txt' Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt' Using multiline search mode for document '$TESTCASE_ROOT/test.md' Using multiline search mode for document '$TESTCASE_ROOT/test.txt' Using multiline search mode for document '$TESTCASE_ROOT/test0/1234.md' Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd.txt' Using multiline search mode for document '$TESTCASE_ROOT/test1/5678.md' Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl.txt' Using multiline search mode for document '$TESTCASE_ROOT/test2/1234.md' Using multiline search mode for document '$TESTCASE_ROOT/test2/56.md' Using multiline search mode for document '$TESTCASE_ROOT/test3/1234.md' Using multiline search mode for document '$TESTCASE_ROOT/test3/56.md' Using single line search mode for document '$TESTCASE_ROOT/test.log' $ docfd --debug-log - --cache-dir .cache --index-only --max-depth 2 --glob '**/*.md' 2>&1 | grep '^Using .* search mode' | sort Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md' Using multiline search mode for document '$TESTCASE_ROOT/test.md' Using multiline search mode for document '$TESTCASE_ROOT/test0/1234.md' Using multiline search mode for document '$TESTCASE_ROOT/test1/5678.md' Using multiline search mode for document '$TESTCASE_ROOT/test2/1234.md' Using multiline search mode for document '$TESTCASE_ROOT/test2/56.md' Using multiline search mode for document '$TESTCASE_ROOT/test3/1234.md' Using multiline search mode for document '$TESTCASE_ROOT/test3/56.md' Default path is not picked if --paths-from is used: $ docfd --debug-log - --cache-dir .cache --index-only --paths-from empty-paths.txt 2>&1 | grep '^Using .* search mode' | sort Default path is not picked if --glob is used: $ docfd --debug-log - --cache-dir .cache --index-only --glob '*.log' 2>&1 | grep '^Using .* search mode' | sort Using single line search mode for document '$TESTCASE_ROOT/test.log' Default path is not picked if --single-line-glob is used: $ docfd --debug-log - --cache-dir .cache --index-only --single-line-glob '*.log' 2>&1 | grep '^Using .* search mode' | sort Using single line search mode for document '$TESTCASE_ROOT/test.log' Multiple --paths-from: $ docfd --debug-log - --cache-dir .cache --index-only --paths-from single-path0.txt --paths-from single-path1.txt 2>&1 | grep '^Using .* search mode' | sort Using multiline search mode for document '$TESTCASE_ROOT/test.txt' Using single line search mode for document '$TESTCASE_ROOT/test.log' Empty --exts: $ docfd --debug-log - --cache-dir .cache --index-only --exts "" . 2>&1 | grep '^Using .* search mode' | sort Using single line search mode for document '$TESTCASE_ROOT/test.log' Empty --single-line-exts: $ docfd --debug-log - --cache-dir .cache --index-only --single-line-exts "" . 2>&1 | grep '^Using .* search mode' | sort Using multiline search mode for document '$TESTCASE_ROOT/empty-paths.txt' Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md' Using multiline search mode for document '$TESTCASE_ROOT/single-path0.txt' Using multiline search mode for document '$TESTCASE_ROOT/single-path1.txt' Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt' Using multiline search mode for document '$TESTCASE_ROOT/test.md' Using multiline search mode for document '$TESTCASE_ROOT/test.txt' Using multiline search mode for document '$TESTCASE_ROOT/test0/1234.md' Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd.txt' Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.md' Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.txt' Using multiline search mode for document '$TESTCASE_ROOT/test1/5678.md' Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl.txt' Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.md' Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.txt' Using multiline search mode for document '$TESTCASE_ROOT/test2/1234.md' Using multiline search mode for document '$TESTCASE_ROOT/test2/56.md' Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.md' Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.txt' Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.md' Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.txt' Using multiline search mode for document '$TESTCASE_ROOT/test3/1234.md' Using multiline search mode for document '$TESTCASE_ROOT/test3/56.md' Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.md' Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.txt' Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.md' Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.txt' Empty --exts and --single-line-exts: $ docfd --debug-log - --cache-dir .cache --index-only --exts "" --single-line-exts "" . Initializing in-memory index error: no usable file extensions or glob patterns [1] --add-exts: $ docfd --debug-log - --cache-dir .cache --index-only --add-exts ext0 . 2>&1 | grep '^Using .* search mode' | sort | grep "ext0" Using multiline search mode for document '$TESTCASE_ROOT/test.ext0' --single-line-add-exts: $ docfd --debug-log - --cache-dir .cache --index-only --single-line-add-exts ext0 . 2>&1 | grep '^Using .* search mode' | sort | grep "ext0" Using single line search mode for document '$TESTCASE_ROOT/test.ext0' Picking via multiple --glob: $ docfd --debug-log - --cache-dir .cache --index-only --glob '*.txt' --glob '*.md' --glob '*.log' 2>&1 | grep '^Using .* search mode' | sort Using multiline search mode for document '$TESTCASE_ROOT/empty-paths.txt' Using multiline search mode for document '$TESTCASE_ROOT/single-path0.txt' Using multiline search mode for document '$TESTCASE_ROOT/single-path1.txt' Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt' Using multiline search mode for document '$TESTCASE_ROOT/test.md' Using multiline search mode for document '$TESTCASE_ROOT/test.txt' Using single line search mode for document '$TESTCASE_ROOT/test.log' Picking via multiple --single-line-glob: $ docfd --debug-log - --cache-dir .cache --index-only --single-line-glob '*.txt' --single-line-glob '*.md' --single-line-glob '*.log' 2>&1 | grep '^Using .* search mode' | sort Using single line search mode for document '$TESTCASE_ROOT/empty-paths.txt' Using single line search mode for document '$TESTCASE_ROOT/single-path0.txt' Using single line search mode for document '$TESTCASE_ROOT/single-path1.txt' Using single line search mode for document '$TESTCASE_ROOT/test-symlink.txt' Using single line search mode for document '$TESTCASE_ROOT/test.log' Using single line search mode for document '$TESTCASE_ROOT/test.md' Using single line search mode for document '$TESTCASE_ROOT/test.txt' Picking via multiple --glob and --single-line-glob: $ # --single-line-glob for .txt files $ docfd --debug-log - --cache-dir .cache --index-only --single-line-glob '*.txt' --glob '*.md' --glob '*.log' 2>&1 | grep '^Using .* search mode' | sort Using multiline search mode for document '$TESTCASE_ROOT/test.md' Using single line search mode for document '$TESTCASE_ROOT/empty-paths.txt' Using single line search mode for document '$TESTCASE_ROOT/single-path0.txt' Using single line search mode for document '$TESTCASE_ROOT/single-path1.txt' Using single line search mode for document '$TESTCASE_ROOT/test-symlink.txt' Using single line search mode for document '$TESTCASE_ROOT/test.log' Using single line search mode for document '$TESTCASE_ROOT/test.txt' $ # --single-line-glob for .md files $ docfd --debug-log - --cache-dir .cache --index-only --glob '*.txt' --single-line-glob '*.md' --glob '*.log' 2>&1 | grep '^Using .* search mode' | sort Using multiline search mode for document '$TESTCASE_ROOT/empty-paths.txt' Using multiline search mode for document '$TESTCASE_ROOT/single-path0.txt' Using multiline search mode for document '$TESTCASE_ROOT/single-path1.txt' Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt' Using multiline search mode for document '$TESTCASE_ROOT/test.txt' Using single line search mode for document '$TESTCASE_ROOT/test.log' Using single line search mode for document '$TESTCASE_ROOT/test.md' $ # --single-line-glob for .log files $ docfd --debug-log - --cache-dir .cache --index-only --glob '*.txt' --glob '*.md' --single-line-glob '*.log' 2>&1 | grep '^Using .* search mode' | sort Using multiline search mode for document '$TESTCASE_ROOT/empty-paths.txt' Using multiline search mode for document '$TESTCASE_ROOT/single-path0.txt' Using multiline search mode for document '$TESTCASE_ROOT/single-path1.txt' Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt' Using multiline search mode for document '$TESTCASE_ROOT/test.md' Using multiline search mode for document '$TESTCASE_ROOT/test.txt' Using single line search mode for document '$TESTCASE_ROOT/test.log' $ # --glob for .txt files $ docfd --debug-log - --cache-dir .cache --index-only --glob '*.txt' --single-line-glob '*.md' --single-line-glob '*.log' 2>&1 | grep '^Using .* search mode' | sort Using multiline search mode for document '$TESTCASE_ROOT/empty-paths.txt' Using multiline search mode for document '$TESTCASE_ROOT/single-path0.txt' Using multiline search mode for document '$TESTCASE_ROOT/single-path1.txt' Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt' Using multiline search mode for document '$TESTCASE_ROOT/test.txt' Using single line search mode for document '$TESTCASE_ROOT/test.log' Using single line search mode for document '$TESTCASE_ROOT/test.md' $ # --glob for .md files $ docfd --debug-log - --cache-dir .cache --index-only --single-line-glob '*.txt' --glob '*.md' --single-line-glob '*.log' 2>&1 | grep '^Using .* search mode' | sort Using multiline search mode for document '$TESTCASE_ROOT/test.md' Using single line search mode for document '$TESTCASE_ROOT/empty-paths.txt' Using single line search mode for document '$TESTCASE_ROOT/single-path0.txt' Using single line search mode for document '$TESTCASE_ROOT/single-path1.txt' Using single line search mode for document '$TESTCASE_ROOT/test-symlink.txt' Using single line search mode for document '$TESTCASE_ROOT/test.log' Using single line search mode for document '$TESTCASE_ROOT/test.txt' $ # --glob for .log files $ docfd --debug-log - --cache-dir .cache --index-only --single-line-glob '*.txt' --single-line-glob '*.md' --glob '*.log' 2>&1 | grep '^Using .* search mode' | sort Using single line search mode for document '$TESTCASE_ROOT/empty-paths.txt' Using single line search mode for document '$TESTCASE_ROOT/single-path0.txt' Using single line search mode for document '$TESTCASE_ROOT/single-path1.txt' Using single line search mode for document '$TESTCASE_ROOT/test-symlink.txt' Using single line search mode for document '$TESTCASE_ROOT/test.log' Using single line search mode for document '$TESTCASE_ROOT/test.md' Using single line search mode for document '$TESTCASE_ROOT/test.txt' --single-line-exts and --exts: $ docfd --debug-log - --cache-dir .cache --index-only --single-line-exts md --exts md . 2>&1 | grep -e '^Using .* search mode' -e '^Checking.*search mode' | sort Checking if efficiently computed and naively computed results for default search mode files are consistent Checking if efficiently computed and naively computed results for single line search mode files are consistent Checking if single line search mode files and default search mode files are disjoint Using single line search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md' Using single line search mode for document '$TESTCASE_ROOT/test.md' Using single line search mode for document '$TESTCASE_ROOT/test0/1234.md' Using single line search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.md' Using single line search mode for document '$TESTCASE_ROOT/test1/5678.md' Using single line search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.md' Using single line search mode for document '$TESTCASE_ROOT/test2/1234.md' Using single line search mode for document '$TESTCASE_ROOT/test2/56.md' Using single line search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.md' Using single line search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.md' Using single line search mode for document '$TESTCASE_ROOT/test3/1234.md' Using single line search mode for document '$TESTCASE_ROOT/test3/56.md' Using single line search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.md' Using single line search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.md' --single-line-exts and --glob: $ docfd --debug-log - --cache-dir .cache --index-only --exts "" --single-line-exts md --glob '**/*.md' 2>&1 | grep -e '^Using .* search mode' -e '^Checking.*search mode' | sort Checking if efficiently computed and naively computed results for default search mode files are consistent Checking if efficiently computed and naively computed results for single line search mode files are consistent Checking if single line search mode files and default search mode files are disjoint Using single line search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md' Using single line search mode for document '$TESTCASE_ROOT/test.md' Using single line search mode for document '$TESTCASE_ROOT/test0/1234.md' Using single line search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.md' Using single line search mode for document '$TESTCASE_ROOT/test1/5678.md' Using single line search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.md' Using single line search mode for document '$TESTCASE_ROOT/test2/1234.md' Using single line search mode for document '$TESTCASE_ROOT/test2/56.md' Using single line search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.md' Using single line search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.md' Using single line search mode for document '$TESTCASE_ROOT/test3/1234.md' Using single line search mode for document '$TESTCASE_ROOT/test3/56.md' Using single line search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.md' Using single line search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.md' --single-line-glob and --exts: $ docfd --debug-log - --cache-dir .cache --index-only --single-line-glob '*.md' --exts md . 2>&1 | grep -e '^Using .* search mode' -e '^Checking.*search mode'| sort Checking if efficiently computed and naively computed results for default search mode files are consistent Checking if efficiently computed and naively computed results for single line search mode files are consistent Checking if single line search mode files and default search mode files are disjoint Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md' Using multiline search mode for document '$TESTCASE_ROOT/test0/1234.md' Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.md' Using multiline search mode for document '$TESTCASE_ROOT/test1/5678.md' Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.md' Using multiline search mode for document '$TESTCASE_ROOT/test2/1234.md' Using multiline search mode for document '$TESTCASE_ROOT/test2/56.md' Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.md' Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.md' Using multiline search mode for document '$TESTCASE_ROOT/test3/1234.md' Using multiline search mode for document '$TESTCASE_ROOT/test3/56.md' Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.md' Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.md' Using single line search mode for document '$TESTCASE_ROOT/test.log' Using single line search mode for document '$TESTCASE_ROOT/test.md' --single-line-glob and --glob: $ docfd --debug-log - --cache-dir .cache --index-only --exts "" --single-line-glob '*.md' --glob '**/*.md' 2>&1 | grep -e '^Using .* search mode' -e '^Checking.*search mode' | sort Checking if efficiently computed and naively computed results for default search mode files are consistent Checking if efficiently computed and naively computed results for single line search mode files are consistent Checking if single line search mode files and default search mode files are disjoint Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md' Using multiline search mode for document '$TESTCASE_ROOT/test0/1234.md' Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.md' Using multiline search mode for document '$TESTCASE_ROOT/test1/5678.md' Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.md' Using multiline search mode for document '$TESTCASE_ROOT/test2/1234.md' Using multiline search mode for document '$TESTCASE_ROOT/test2/56.md' Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.md' Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.md' Using multiline search mode for document '$TESTCASE_ROOT/test3/1234.md' Using multiline search mode for document '$TESTCASE_ROOT/test3/56.md' Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.md' Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.md' Using single line search mode for document '$TESTCASE_ROOT/test.md' --exts applies to directories in FILE in --paths-from FILE: $ docfd --debug-log - --cache-dir .cache --index-only --paths-from paths --exts md 2>&1 | grep '^Using .* search mode' | sort Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt' Using multiline search mode for document '$TESTCASE_ROOT/test.txt' Using multiline search mode for document '$TESTCASE_ROOT/test0/1234.md' Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.md' Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.md' Using multiline search mode for document '$TESTCASE_ROOT/test2/1234.md' Using multiline search mode for document '$TESTCASE_ROOT/test2/56.md' Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.md' Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.md' Using multiline search mode for document '$TESTCASE_ROOT/test3/1234.md' Using multiline search mode for document '$TESTCASE_ROOT/test3/56.md' Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.md' Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.md' --single-line-exts apply to directories in FILE in --paths-from FILE: $ docfd --debug-log - --cache-dir .cache --index-only --paths-from paths --single-line-exts md 2>&1 | grep '^Using .* search mode' | sort Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt' Using multiline search mode for document '$TESTCASE_ROOT/test.txt' Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd.txt' Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.txt' Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.txt' Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.txt' Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.txt' Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.txt' Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.txt' Using single line search mode for document '$TESTCASE_ROOT/test0/1234.md' Using single line search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.md' Using single line search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.md' Using single line search mode for document '$TESTCASE_ROOT/test2/1234.md' Using single line search mode for document '$TESTCASE_ROOT/test2/56.md' Using single line search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.md' Using single line search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.md' Using single line search mode for document '$TESTCASE_ROOT/test3/1234.md' Using single line search mode for document '$TESTCASE_ROOT/test3/56.md' Using single line search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.md' Using single line search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.md' Top-level symlinks: $ docfd --debug-log - --cache-dir .cache --index-only test-symlink.txt 2>&1 | grep '^Using .* search mode' | sort Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt' $ docfd --debug-log - --cache-dir .cache --index-only test3 2>&1 | grep '^Using .* search mode' | sort Using multiline search mode for document '$TESTCASE_ROOT/test3/1234.md' Using multiline search mode for document '$TESTCASE_ROOT/test3/56.md' Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.md' Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.txt' Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.md' Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.txt' Top-level files and --single-line-exts: $ docfd --debug-log - --cache-dir .cache --index-only test.txt --single-line-exts txt 2>&1 | grep '^Using .* search mode' | sort Using single line search mode for document '$TESTCASE_ROOT/test.txt' Top-level files and --single-line-glob: $ docfd --debug-log - --cache-dir .cache --index-only test.txt --single-line-glob '*.txt' 2>&1 | grep '^Using .* search mode' | sort Using single line search mode for document '$TESTCASE_ROOT/empty-paths.txt' Using single line search mode for document '$TESTCASE_ROOT/single-path0.txt' Using single line search mode for document '$TESTCASE_ROOT/single-path1.txt' Using single line search mode for document '$TESTCASE_ROOT/test-symlink.txt' Using single line search mode for document '$TESTCASE_ROOT/test.txt' --glob and unrecognized extensions: $ docfd --debug-log - --cache-dir .cache --index-only --exts md --glob "*.txt" . 2>&1 | grep '^Using .* search mode' | sort Using multiline search mode for document '$TESTCASE_ROOT/empty-paths.txt' Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md' Using multiline search mode for document '$TESTCASE_ROOT/single-path0.txt' Using multiline search mode for document '$TESTCASE_ROOT/single-path1.txt' Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt' Using multiline search mode for document '$TESTCASE_ROOT/test.md' Using multiline search mode for document '$TESTCASE_ROOT/test.txt' Using multiline search mode for document '$TESTCASE_ROOT/test0/1234.md' Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.md' Using multiline search mode for document '$TESTCASE_ROOT/test1/5678.md' Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.md' Using multiline search mode for document '$TESTCASE_ROOT/test2/1234.md' Using multiline search mode for document '$TESTCASE_ROOT/test2/56.md' Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.md' Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.md' Using multiline search mode for document '$TESTCASE_ROOT/test3/1234.md' Using multiline search mode for document '$TESTCASE_ROOT/test3/56.md' Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.md' Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.md' Using single line search mode for document '$TESTCASE_ROOT/test.log' --single-line-glob and unrecognized extensions: $ docfd --debug-log - --cache-dir .cache --index-only --exts md --single-line-glob "*.txt" . 2>&1 | grep '^Using .* search mode' | sort Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md' Using multiline search mode for document '$TESTCASE_ROOT/test.md' Using multiline search mode for document '$TESTCASE_ROOT/test0/1234.md' Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.md' Using multiline search mode for document '$TESTCASE_ROOT/test1/5678.md' Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.md' Using multiline search mode for document '$TESTCASE_ROOT/test2/1234.md' Using multiline search mode for document '$TESTCASE_ROOT/test2/56.md' Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.md' Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.md' Using multiline search mode for document '$TESTCASE_ROOT/test3/1234.md' Using multiline search mode for document '$TESTCASE_ROOT/test3/56.md' Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.md' Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.md' Using single line search mode for document '$TESTCASE_ROOT/empty-paths.txt' Using single line search mode for document '$TESTCASE_ROOT/single-path0.txt' Using single line search mode for document '$TESTCASE_ROOT/single-path1.txt' Using single line search mode for document '$TESTCASE_ROOT/test-symlink.txt' Using single line search mode for document '$TESTCASE_ROOT/test.log' Using single line search mode for document '$TESTCASE_ROOT/test.txt' --single-line: $ docfd --debug-log - --cache-dir .cache --index-only --single-line . 2>&1 | grep '^Using .* search mode' | sort Using single line search mode for document '$TESTCASE_ROOT/empty-paths.txt' Using single line search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md' Using single line search mode for document '$TESTCASE_ROOT/single-path0.txt' Using single line search mode for document '$TESTCASE_ROOT/single-path1.txt' Using single line search mode for document '$TESTCASE_ROOT/test-symlink.txt' Using single line search mode for document '$TESTCASE_ROOT/test.log' Using single line search mode for document '$TESTCASE_ROOT/test.md' Using single line search mode for document '$TESTCASE_ROOT/test.txt' Using single line search mode for document '$TESTCASE_ROOT/test0/1234.md' Using single line search mode for document '$TESTCASE_ROOT/test0/abcd.txt' Using single line search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.md' Using single line search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.txt' Using single line search mode for document '$TESTCASE_ROOT/test1/5678.md' Using single line search mode for document '$TESTCASE_ROOT/test1/ijkl.txt' Using single line search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.md' Using single line search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.txt' Using single line search mode for document '$TESTCASE_ROOT/test2/1234.md' Using single line search mode for document '$TESTCASE_ROOT/test2/56.md' Using single line search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.md' Using single line search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.txt' Using single line search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.md' Using single line search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.txt' Using single line search mode for document '$TESTCASE_ROOT/test3/1234.md' Using single line search mode for document '$TESTCASE_ROOT/test3/56.md' Using single line search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.md' Using single line search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.txt' Using single line search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.md' Using single line search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.txt' --glob and --single-line: $ docfd --debug-log - --cache-dir .cache --index-only --single-line --glob '**/*.txt' 2>&1 | grep '^Using .* search mode' | sort Using single line search mode for document '$TESTCASE_ROOT/empty-paths.txt' Using single line search mode for document '$TESTCASE_ROOT/single-path0.txt' Using single line search mode for document '$TESTCASE_ROOT/single-path1.txt' Using single line search mode for document '$TESTCASE_ROOT/test-symlink.txt' Using single line search mode for document '$TESTCASE_ROOT/test.txt' Using single line search mode for document '$TESTCASE_ROOT/test0/abcd.txt' Using single line search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.txt' Using single line search mode for document '$TESTCASE_ROOT/test1/ijkl.txt' Using single line search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.txt' Using single line search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.txt' Using single line search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.txt' Using single line search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.txt' Using single line search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.txt' Top-level files with unrecognized extensions are still picked: $ docfd --debug-log - --cache-dir .cache --index-only --exts md test.txt . 2>&1 | grep '^Using .* search mode' | sort | grep 'test.txt' Using multiline search mode for document '$TESTCASE_ROOT/test.txt' Top-level files without extensions are still picked: $ docfd --debug-log - --cache-dir .cache --index-only --exts md no-ext . 2>&1 | grep '^Using .* search mode' | sort | grep 'no-ext' Using multiline search mode for document '$TESTCASE_ROOT/no-ext' Current working directory is symlink: $ cd test3 $ docfd --debug-log - --cache-dir .cache --index-only . 2>&1 | grep '^Using .* search mode' | sort Using multiline search mode for document '$TESTCASE_ROOT/test2/1234.md' Using multiline search mode for document '$TESTCASE_ROOT/test2/56.md' Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.md' Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.txt' Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.md' Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.txt' $ docfd --debug-log - --cache-dir .cache --index-only --glob '*.txt' . 2>&1 | grep '^Using .* search mode' | sort Using multiline search mode for document '$TESTCASE_ROOT/test2/1234.md' Using multiline search mode for document '$TESTCASE_ROOT/test2/56.md' Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.md' Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.txt' Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.md' Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.txt' $ docfd --debug-log - --cache-dir .cache --index-only --glob '**/*.txt' . 2>&1 | grep '^Using .* search mode' | sort Using multiline search mode for document '$TESTCASE_ROOT/test2/1234.md' Using multiline search mode for document '$TESTCASE_ROOT/test2/56.md' Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.md' Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.txt' Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.md' Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.txt' $ docfd --debug-log - --cache-dir .cache --index-only --glob "$(pwd)/**/*.txt" . 2>&1 | grep '^Using .* search mode' | sort Using multiline search mode for document '$TESTCASE_ROOT/test2/1234.md' Using multiline search mode for document '$TESTCASE_ROOT/test2/56.md' Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.md' Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.txt' Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.md' Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.txt' Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.txt' Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.txt' $ cd .. './' in glob: $ docfd --debug-log - --cache-dir .cache --index-only --glob './*.txt' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort Using multiline search mode for document '$TESTCASE_ROOT/empty-paths.txt' Using multiline search mode for document '$TESTCASE_ROOT/single-path0.txt' Using multiline search mode for document '$TESTCASE_ROOT/single-path1.txt' Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt' Using multiline search mode for document '$TESTCASE_ROOT/test.txt' '..' in glob: $ docfd --debug-log - --cache-dir .cache --index-only --glob 'test1/../*.txt' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort Using multiline search mode for document '$TESTCASE_ROOT/empty-paths.txt' Using multiline search mode for document '$TESTCASE_ROOT/single-path0.txt' Using multiline search mode for document '$TESTCASE_ROOT/single-path1.txt' Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt' Using multiline search mode for document '$TESTCASE_ROOT/test.txt' Directories in glob: $ docfd --debug-log - --cache-dir .cache --index-only --glob '.' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort $ docfd --debug-log - --cache-dir .cache --index-only --glob '..' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort $ docfd --debug-log - --cache-dir .cache --index-only --glob 'test0/' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort $ docfd --debug-log - --cache-dir .cache --index-only --glob 'test3' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort Crossing symlinks explicitly in glob: $ docfd --debug-log - --cache-dir .cache --index-only --glob 'test3/../*.txt' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort Using multiline search mode for document '$TESTCASE_ROOT/empty-paths.txt' Using multiline search mode for document '$TESTCASE_ROOT/single-path0.txt' Using multiline search mode for document '$TESTCASE_ROOT/single-path1.txt' Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt' Using multiline search mode for document '$TESTCASE_ROOT/test.txt' $ docfd --debug-log - --cache-dir .cache --index-only --glob 'test3/abcd/*.txt' --glob 'test3/abcd/*.md' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.md' Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.txt' $ docfd --debug-log - --cache-dir .cache --index-only --glob 'test3/ijkl/*.txt' --glob 'test3/ijkl/*.md' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.md' Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.txt' '**' in glob: $ docfd --debug-log - --cache-dir .cache --index-only --glob '**/*.txt' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/empty-paths.txt Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/single-path0.txt Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/single-path1.txt Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test-symlink.txt Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test.txt Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test0/abcd.txt Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test0/abcd/efgh.txt Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test1/ijkl.txt Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test1/ijkl/mnop.txt Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test2/abcd/efgh.txt Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test2/ijkl/mnop.txt Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test3/abcd/efgh.txt Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test3/ijkl/mnop.txt Using multiline search mode for document '$TESTCASE_ROOT/empty-paths.txt' Using multiline search mode for document '$TESTCASE_ROOT/single-path0.txt' Using multiline search mode for document '$TESTCASE_ROOT/single-path1.txt' Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt' Using multiline search mode for document '$TESTCASE_ROOT/test.txt' Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd.txt' Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.txt' Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl.txt' Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.txt' Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.txt' Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.txt' Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.txt' Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.txt' $ docfd --debug-log - --cache-dir .cache --index-only --glob '**/**/*.txt' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/empty-paths.txt Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/single-path0.txt Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/single-path1.txt Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test-symlink.txt Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test.txt Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test0/abcd.txt Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test0/abcd/efgh.txt Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test1/ijkl.txt Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test1/ijkl/mnop.txt Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test2/abcd/efgh.txt Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test2/ijkl/mnop.txt Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test3/abcd/efgh.txt Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test3/ijkl/mnop.txt Using multiline search mode for document '$TESTCASE_ROOT/empty-paths.txt' Using multiline search mode for document '$TESTCASE_ROOT/single-path0.txt' Using multiline search mode for document '$TESTCASE_ROOT/single-path1.txt' Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt' Using multiline search mode for document '$TESTCASE_ROOT/test.txt' Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd.txt' Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.txt' Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl.txt' Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.txt' Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.txt' Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.txt' Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.txt' Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.txt' $ docfd --debug-log - --cache-dir .cache --index-only --glob "$(pwd)/**/*.txt" 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/empty-paths.txt Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/single-path0.txt Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/single-path1.txt Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test-symlink.txt Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test.txt Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test0/abcd.txt Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test0/abcd/efgh.txt Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test1/ijkl.txt Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test1/ijkl/mnop.txt Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test2/abcd/efgh.txt Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test2/ijkl/mnop.txt Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test3/abcd/efgh.txt Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test3/ijkl/mnop.txt Using multiline search mode for document '$TESTCASE_ROOT/empty-paths.txt' Using multiline search mode for document '$TESTCASE_ROOT/single-path0.txt' Using multiline search mode for document '$TESTCASE_ROOT/single-path1.txt' Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt' Using multiline search mode for document '$TESTCASE_ROOT/test.txt' Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd.txt' Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.txt' Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl.txt' Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.txt' Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.txt' Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.txt' Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.txt' Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.txt' $ docfd --debug-log - --cache-dir .cache --index-only --glob "$(pwd)/**/**/*.txt" 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/empty-paths.txt Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/single-path0.txt Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/single-path1.txt Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test-symlink.txt Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test.txt Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test0/abcd.txt Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test0/abcd/efgh.txt Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test1/ijkl.txt Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test1/ijkl/mnop.txt Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test2/abcd/efgh.txt Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test2/ijkl/mnop.txt Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test3/abcd/efgh.txt Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test3/ijkl/mnop.txt Using multiline search mode for document '$TESTCASE_ROOT/empty-paths.txt' Using multiline search mode for document '$TESTCASE_ROOT/single-path0.txt' Using multiline search mode for document '$TESTCASE_ROOT/single-path1.txt' Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt' Using multiline search mode for document '$TESTCASE_ROOT/test.txt' Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd.txt' Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.txt' Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl.txt' Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.txt' Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.txt' Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.txt' Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.txt' Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.txt' $ docfd --debug-log - --cache-dir .cache --index-only --glob "**/test[01]/*.txt" 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort Glob $TESTCASE_ROOT/**/test[01]/*.txt matches path $TESTCASE_ROOT/test0/abcd.txt Glob $TESTCASE_ROOT/**/test[01]/*.txt matches path $TESTCASE_ROOT/test1/ijkl.txt Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd.txt' Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl.txt' Case insensitive marker: $ # Exact match without marker $ docfd --debug-log - --cache-dir .cache --index-only --glob 'miXeD-CaSe/AbCd.md' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md' $ # All lowercase glob $ docfd --debug-log - --cache-dir .cache --index-only --glob 'mixed-case/abcd.md' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort $ docfd --debug-log - --cache-dir .cache --index-only --glob '\cmixed-case/abcd.md' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md' $ docfd --debug-log - --cache-dir .cache --index-only --glob 'mixed-\ccase/abcd.md' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md' $ docfd --debug-log - --cache-dir .cache --index-only --glob 'mixed-case/\cabcd.md' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md' $ docfd --debug-log - --cache-dir .cache --index-only --glob 'mixed-case/abcd.md\c' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md' $ # All uppercase glob $ docfd --debug-log - --cache-dir .cache --index-only --glob 'MIXED-CASE/ABCD.MD' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort $ docfd --debug-log - --cache-dir .cache --index-only --glob '\cMIXED-CASE/ABCD.MD' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md' $ docfd --debug-log - --cache-dir .cache --index-only --glob 'MIX\cED-CASE/ABCD.MD' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md' $ docfd --debug-log - --cache-dir .cache --index-only --glob 'MIXED-CASE/\cABCD.MD' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md' $ docfd --debug-log - --cache-dir .cache --index-only --glob 'MIXED-CASE/ABCD.MD\c' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md' $ # Mixed case glob $ docfd --debug-log - --cache-dir .cache --index-only --glob 'MixeD-CaSE/aBcD.mD' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort $ docfd --debug-log - --cache-dir .cache --index-only --glob '\cMixeD-CaSE/aBcD.mD' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md' $ docfd --debug-log - --cache-dir .cache --index-only --glob 'MixeD\c-CaSE/aBcD.mD' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md' $ docfd --debug-log - --cache-dir .cache --index-only --glob 'MixeD-CaSE/\caBcD.mD' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md' $ docfd --debug-log - --cache-dir .cache --index-only --glob 'MixeD-CaSE/aBcD.mD\c' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md' Double escape characters: $ docfd --debug-log - --cache-dir .cache --index-only --glob '\\cMixeD-CaSE/AbCd.mD' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort $ docfd --debug-log - --cache-dir .cache --index-only --glob 'MixeD\\c-CaSE/AbCd.mD' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort $ docfd --debug-log - --cache-dir .cache --index-only --glob 'MixeD-CaSE/AbCd.mD\\c' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort ================================================ FILE: lib/GZIP.ml ================================================ (* Basically fully copied from examples in Decompress manual *) let time () = Int32.of_float (Unix.gettimeofday ()) let compress (s : string) : string = let i = De.bigstring_create De.io_buffer_size in let o = De.bigstring_create De.io_buffer_size in let config = Gz.Higher.configuration Gz.Unix time in let w = De.Lz77.make_window ~bits:15 in let q = De.Queue.create 1024 in let res = Buffer.create 4096 in let cur = ref 0 in let refill buf = let len = min (String.length s - !cur) De.io_buffer_size in Bigstringaf.blit_from_string s ~src_off:!cur buf ~dst_off:0 ~len; cur := !cur + len; len in let flush buf len = let str = Bigstringaf.substring buf ~off:0 ~len in Buffer.add_string res str in Gz.Higher.compress ~w ~q ~level:4 ~refill ~flush () config i o; Buffer.contents res let decompress (s : string) : string option = let i = De.bigstring_create De.io_buffer_size in let o = De.bigstring_create De.io_buffer_size in let r = Buffer.create 0x1000 in let cur = ref 0 in let refill buf = let len = min (String.length s - !cur) De.io_buffer_size in Bigstringaf.blit_from_string s ~src_off:!cur buf ~dst_off:0 ~len; cur := !cur + len; len in let flush buf len = let str = Bigstringaf.substring buf ~off:0 ~len in Buffer.add_string r str in match Gz.Higher.uncompress ~refill ~flush i o with | Ok _ -> Some (Buffer.contents r) | Error _ -> None ================================================ FILE: lib/char_map.ml ================================================ include CCMap.Make (Char) ================================================ FILE: lib/doc_id_db.ml ================================================ type t = { lock : Eio.Mutex.t; doc_id_of_doc_hash : (string, int64) Hashtbl.t; } let t : t = { lock = Eio.Mutex.create (); doc_id_of_doc_hash = Hashtbl.create 10_000; } let lock : type a. (unit -> a) -> a = fun f -> Eio.Mutex.use_rw ~protect:true t.lock f let allocate_bulk (doc_hashes : string Seq.t) : unit = let open Sqlite3_utils in lock (fun () -> with_db (fun db -> with_stmt ~db {| INSERT INTO doc_info (id, hash, status) VALUES ( (SELECT IFNULL( ( SELECT a.id - 1 AS id FROM doc_info a LEFT JOIN doc_info b ON a.id - 1 = b.id WHERE b.id IS NULL AND a.id - 1 >= 0 UNION SELECT a.id + 1 AS id FROM doc_info a LEFT JOIN doc_info b ON a.id + 1 = b.id WHERE b.id IS NULL ORDER BY id LIMIT 1 ), 0 ) ), @doc_hash, 'ONGOING' ) ON CONFLICT(hash) DO NOTHING |} (fun stmt -> Seq.iter (fun doc_hash -> bind_names stmt [ ("@doc_hash", TEXT doc_hash) ]; step stmt; reset stmt; ) doc_hashes ); with_stmt ~db {| SELECT id FROM doc_info WHERE hash = @doc_hash |} (fun stmt -> Seq.iter (fun doc_hash -> bind_names stmt [ ("@doc_hash", TEXT doc_hash) ]; step stmt; Hashtbl.add t.doc_id_of_doc_hash doc_hash (column_int64 stmt 0); reset stmt; ) doc_hashes ) ) ) let doc_id_of_doc_hash (doc_hash : string) : int64 = let test = lock (fun () -> Hashtbl.find_opt t.doc_id_of_doc_hash doc_hash ) in match test with | Some id -> id | None -> ( allocate_bulk (Seq.return doc_hash); lock (fun () -> Hashtbl.find t.doc_id_of_doc_hash doc_hash ) ) ================================================ FILE: lib/doc_id_db.mli ================================================ val allocate_bulk : string Seq.t -> unit val doc_id_of_doc_hash : string -> int64 ================================================ FILE: lib/docfd_lib.ml ================================================ module Index = Index module Doc_id_db = Doc_id_db module Link = Link module Search_result = Search_result module Search_phrase = Search_phrase module Search_exp = Search_exp module Search_result_heap = Search_result_heap module Word_db = Word_db module Tokenization = Tokenization module Params' = Params module Task_pool = Task_pool module Stop_signal = Stop_signal module Parser_components = Parser_components module Misc_utils' = Misc_utils module Sqlite3_utils = Sqlite3_utils let init ~db_path ~document_count_limit = let open Sqlite3_utils in let db = db_open db_path in let db_res = Sqlite3.exec db Params.db_schema in let res = if not (Rc.is_success db_res) then ( Some (Fmt.str "failed to initialize index DB: %s" (Rc.to_string db_res)) ) else ( Params.db_path := Some db_path; if Index.document_count () >= document_count_limit then ( Index.prune_old_documents ~keep_n_latest:document_count_limit ); None ) in while not (db_close db) do Unix.sleepf 0.1 done; res ================================================ FILE: lib/dune ================================================ (library (flags (-w "+a-4-9-29-37-40-42-44-48-50-32-30-70@8")) (name docfd_lib) (preprocess (pps ppx_deriving.show ppx_deriving.ord ppx_deriving.eq )) (libraries containers containers-data angstrom fmt spelll oseq eio decompress.gz bigstringaf unix diet sqlite3 uuseg uucp timedesc timedesc-tzlocal.unix-or-utc ) ) ================================================ FILE: lib/index.ml ================================================ module Line_loc = struct type t = { page_num : int; line_num_in_page : int; global_line_num : int; } [@@deriving eq] let page_num t = t.page_num let line_num_in_page t = t.line_num_in_page let global_line_num t = t.global_line_num let compare (x : t) (y : t) = Int.compare x.global_line_num y.global_line_num end module Loc = struct type t = { line_loc : Line_loc.t; pos_in_line : int; } [@@deriving eq] let line_loc t = t.line_loc let pos_in_line t = t.pos_in_line end module Raw = struct type t = { pos_s_of_word : Int_set.t Int_map.t; loc_of_pos : Loc.t Int_map.t; line_loc_of_global_line_num : Line_loc.t Int_map.t; start_end_inc_pos_of_global_line_num : (int * int) Int_map.t; start_end_inc_pos_of_page_num : (int * int) Int_map.t; word_of_pos : int Int_map.t; line_count_of_page_num : int Int_map.t; page_count : int; global_line_count : int; links : Link.t array; } type multi_indexed_word = { pos : int; loc : Loc.t; word : string; } type chunk = multi_indexed_word array let make () : t = { pos_s_of_word = Int_map.empty; loc_of_pos = Int_map.empty; line_loc_of_global_line_num = Int_map.empty; start_end_inc_pos_of_global_line_num = Int_map.empty; start_end_inc_pos_of_page_num = Int_map.empty; word_of_pos = Int_map.empty; line_count_of_page_num = Int_map.empty; page_count = 0; global_line_count = 0; links = [||]; } let word_ids (t : t) : Int_set.t = Int_map.fold (fun word_id _pos_s acc -> Int_set.add word_id acc ) t.pos_s_of_word Int_set.empty let links (t : t) = t.links let union (x : t) (y : t) = { pos_s_of_word = Int_map.union (fun _k s0 s1 -> Some (Int_set.union s0 s1)) x.pos_s_of_word y.pos_s_of_word; loc_of_pos = Int_map.union (fun _k x _ -> Some x) x.loc_of_pos y.loc_of_pos; line_loc_of_global_line_num = Int_map.union (fun _k x _ -> Some x) x.line_loc_of_global_line_num y.line_loc_of_global_line_num; start_end_inc_pos_of_global_line_num = Int_map.union (fun _k (start_x, end_inc_x) (start_y, end_inc_y) -> Some (min start_x start_y, max end_inc_x end_inc_y)) x.start_end_inc_pos_of_global_line_num y.start_end_inc_pos_of_global_line_num; start_end_inc_pos_of_page_num = Int_map.union (fun _k (start_x, end_inc_x) (start_y, end_inc_y) -> Some (min start_x start_y, max end_inc_x end_inc_y)) x.start_end_inc_pos_of_page_num y.start_end_inc_pos_of_page_num; word_of_pos = Int_map.union (fun _k x _ -> Some x) x.word_of_pos y.word_of_pos; line_count_of_page_num = Int_map.union (fun _k x y -> Some (max x y)) x.line_count_of_page_num y.line_count_of_page_num; page_count = max x.page_count y.page_count; global_line_count = max x.global_line_count y.global_line_count; links = [||]; } let words_of_lines (s : (Line_loc.t * string) Seq.t) : multi_indexed_word Seq.t = s |> Seq.flat_map (fun (line_loc, s) -> let seq = Tokenization.tokenize_with_pos ~drop_spaces:false s in if Seq.is_empty seq then ( let empty_word = ({ Loc.line_loc; pos_in_line = 0 }, "") in Seq.return empty_word ) else ( Seq.map (fun (pos_in_line, word) -> ({ Loc.line_loc; pos_in_line }, word)) seq ) ) |> Seq.mapi (fun pos (loc, word) -> { pos; loc; word }) let of_chunk (arr : chunk) : t = Array.fold_left (fun { pos_s_of_word; loc_of_pos; line_loc_of_global_line_num; start_end_inc_pos_of_global_line_num; start_end_inc_pos_of_page_num; word_of_pos; line_count_of_page_num; page_count; global_line_count; } { pos; loc; word } -> let word_id = Word_db.add word in let line_loc = loc.Loc.line_loc in let global_line_num = line_loc.global_line_num in let page_num = line_loc.page_num in let pos_s = Int_map.find_opt word_id pos_s_of_word |> Option.value ~default:Int_set.empty |> Int_set.add pos in let cur_page_line_count = Option.value ~default:0 (Int_map.find_opt page_num line_count_of_page_num) in { pos_s_of_word = Int_map.add word_id pos_s pos_s_of_word; loc_of_pos = Int_map.add pos loc loc_of_pos; line_loc_of_global_line_num = Int_map.add global_line_num line_loc line_loc_of_global_line_num; start_end_inc_pos_of_global_line_num = Int_map.add global_line_num (match Int_map.find_opt global_line_num start_end_inc_pos_of_global_line_num with | None -> (pos, pos) | Some (x, y) -> (min x pos, max y pos)) start_end_inc_pos_of_global_line_num; start_end_inc_pos_of_page_num = Int_map.add page_num (match Int_map.find_opt page_num start_end_inc_pos_of_page_num with | None -> (pos, pos) | Some (x, y) -> (min x pos, max y pos)) start_end_inc_pos_of_page_num; word_of_pos = Int_map.add pos word_id word_of_pos; line_count_of_page_num = Int_map.add line_loc.page_num (max cur_page_line_count (line_loc.line_num_in_page + 1)) line_count_of_page_num; page_count = max page_count (line_loc.page_num + 1); global_line_count = max global_line_count (global_line_num + 1); links = [||]; } ) (make ()) arr let chunks_of_words (s : multi_indexed_word Seq.t) : chunk Seq.t = let empty_word = let line_loc = { Line_loc.page_num = 0; line_num_in_page = 0; global_line_num = 0 } in let loc = { Loc.line_loc; pos_in_line = 0 } in { pos = 0; loc; word = "" } in (if Seq.is_empty s then ( Seq.return empty_word ) else ( s )) |> OSeq.chunks !Params.index_chunk_size let extract_links (t : t) : Link.t array = let flush_buf link_typ ~acc ~buf = match buf with | [] -> (acc, buf) | _ -> ( let start_pos, end_inc_pos, strings = List.fold_left (fun (start, end_inc, strings) (pos, word) -> let start = match start with | None -> Some pos | Some start -> Some (min pos start) in let end_inc = match end_inc with | None -> Some pos | Some end_inc -> Some (max pos end_inc) in (start, end_inc, word :: strings) ) (None, None, []) buf in let start_pos = Option.get start_pos in let end_inc_pos = Option.get end_inc_pos in let link = Link.{ start_pos; end_inc_pos; typ = link_typ; link = String.concat "" strings; } in (link :: acc, []) ) in let process_line (line : (int * string) Dynarray.t) : Link.t list = assert (Dynarray.length line > 0); let word_at pos = Dynarray.get line pos in let word_string_at pos = snd @@ word_at pos in let word_ci_string_at pos = String.lowercase_ascii @@ word_string_at pos in let rec aux (state : [ `Scanning | `In_link of Link.typ ]) ~(acc : Link.t list) ~(buf : (int * string) list) (cur : int) : Link.t list = if cur >= Dynarray.length line then ( match state with | `Scanning -> ( acc ) | `In_link link_typ -> ( let acc, _buf = flush_buf link_typ ~acc ~buf in acc ) ) else ( let words_left = Dynarray.length line - cur - 1 in let pos, word = word_at cur in let word_ci = String.lowercase_ascii word in match state with | `Scanning -> ( let link_typ = if List.mem word_ci [ "https"; "http"; "file" ] && words_left >= 3 && word_ci_string_at (cur + 1) = ":" && word_ci_string_at (cur + 2) = "/" && word_ci_string_at (cur + 3) = "/" then ( Some `URL ) else if cur >= 2 && word_ci_string_at (cur - 2) = "]" && word_ci_string_at (cur - 1) = "(" then ( Some `Markdown ) else if cur >= 2 && word_ci_string_at (cur - 2) = "[" && word_ci_string_at (cur - 1) = "[" then ( Some `Wiki ) else ( None ) in match link_typ with | Some link_typ -> ( aux (`In_link link_typ) ~acc ~buf:((pos, word) :: buf) (cur + 1) ) | None -> ( aux `Scanning ~acc ~buf (cur + 1) ) ) | `In_link link_typ -> ( let link_ended = String.length word = 0 || Parser_components.is_space word.[0] || List.mem word [ "]" ; ")" ; "|" ; "\"" ; "<" ; ">" ; "{" ; "}" ; "^" ; "\\" ] in if link_ended then ( let acc, buf = flush_buf link_typ ~acc ~buf in aux `Scanning ~acc ~buf (cur + 1) ) else ( aux state ~acc ~buf:((pos, word) :: buf) (cur + 1) ) ) ) in aux `Scanning ~acc:[] ~buf:[] 0 in let lines_with (mode : [ `Any_of | `All_of ]) l = let line_nums_by_word = l |> List.to_seq |> Seq.filter_map Word_db.id_of_word |> Seq.map (fun word_id -> Int_map.find_opt word_id t.pos_s_of_word |> Option.value ~default:Int_set.empty |> Int_set.to_seq |> Seq.fold_left (fun acc pos -> let loc = Int_map.find pos t.loc_of_pos in let line_loc = Loc.line_loc loc in Int_set.add line_loc.global_line_num acc ) Int_set.empty ) in line_nums_by_word |> Seq.fold_left (match mode with | `Any_of -> (fun acc s -> let acc = Option.value ~default:Int_set.empty acc in Some (Int_set.union acc s) ) | `All_of -> (fun acc s -> match acc with | None -> Some s | Some acc -> Some (Int_set.inter acc s) ) ) None |> Option.value ~default:Int_set.empty in let url_line_candidates = Int_set.inter (lines_with `Any_of [ "http"; "https"; "file" ]) (lines_with `All_of [ ":"; "/" ]) in let markdown_and_wiki_line_candidates = lines_with `All_of [ "["; "]" ] in let line_candidates = Int_set.union url_line_candidates markdown_and_wiki_line_candidates |> Int_set.to_seq in let rec aux acc line_nums = match line_nums () with | Seq.Nil -> List.rev acc | Seq.Cons (cur, rest) -> ( let start, end_inc = Int_map.find cur t.start_end_inc_pos_of_global_line_num in let links = OSeq.(start -- end_inc) |> Seq.map (fun pos -> (pos, Int_map.find pos t.word_of_pos)) |> Seq.map (fun (pos, word_id) -> (pos, Word_db.word_of_id word_id)) |> Dynarray.of_seq |> process_line in aux (links @ acc) rest ) in aux [] line_candidates |> Array.of_list let of_seq pool (s : (Line_loc.t * string) Seq.t) : t = let indices = s |> Seq.map (fun (line_loc, s) -> (line_loc, Misc_utils.sanitize_string s)) |> words_of_lines |> chunks_of_words |> List.of_seq |> Task_pool.map_list pool of_chunk in let res = List.fold_left (fun acc index -> union acc index ) (make ()) indices in let links = extract_links res in { res with links } let of_lines pool (s : string Seq.t) : t = s |> Seq.mapi (fun global_line_num line -> ({ Line_loc.page_num = 0; line_num_in_page = global_line_num; global_line_num }, line) ) |> of_seq pool let of_pages pool (s : string list Seq.t) : t = s |> Seq.mapi (fun page_num page -> (page_num, page) ) |> Seq.flat_map (fun (page_num, page) -> match page with | [] -> ( let empty_line = ({ Line_loc.page_num; line_num_in_page = 0; global_line_num = 0 }, "") in Seq.return empty_line ) | _ -> ( List.to_seq page |> Seq.mapi (fun line_num_in_page line -> ({ Line_loc.page_num; line_num_in_page; global_line_num = 0 }, line) ) ) ) |> Seq.mapi (fun global_line_num ((line_loc : Line_loc.t), line) -> ({ line_loc with global_line_num }, line) ) |> of_seq pool end module State : sig val add_word_id_doc_id_link : word_id:int -> doc_id:int64 -> unit val read_from_db : unit -> unit val union_doc_ids_of_word_id_into_bv : word_id:int -> into:CCBV.t -> unit end = struct type t = { lock : Eio.Mutex.t; doc_ids_of_word_id : (int, CCBV.t) Hashtbl.t; } let t : t = { lock = Eio.Mutex.create (); doc_ids_of_word_id = Hashtbl.create 100_000; } let lock : type a. (unit -> a) -> a = fun f -> Eio.Mutex.use_rw ~protect:true t.lock f let find_doc_ids_bv ~word_id = match Hashtbl.find_opt t.doc_ids_of_word_id word_id with | Some doc_ids -> doc_ids | None -> ( let bv = CCBV.empty () in Hashtbl.replace t.doc_ids_of_word_id word_id bv; bv ) let union_doc_ids_of_word_id_into_bv ~word_id ~into = lock (fun () -> let bv = find_doc_ids_bv ~word_id in CCBV.union_into ~into bv ) let add_word_id_doc_id_link ~word_id ~doc_id = lock (fun () -> let doc_ids = find_doc_ids_bv ~word_id in CCBV.set doc_ids (Int64.to_int doc_id) ) let read_from_db () : unit = let open Sqlite3_utils in lock (fun () -> with_db (fun db -> iter_stmt ~db {| SELECT word_id, doc_id FROM word_id_doc_id_link |} ~names:[] (fun data -> let word_id = Data.to_int_exn data.(0) in let doc_id = Data.to_int_exn data.(1) in let doc_ids = find_doc_ids_bv ~word_id in CCBV.set doc_ids doc_id ) ) ) end let now_int64 () = Timedesc.Timestamp.now () |> Timedesc.Timestamp.get_s let refresh_last_used_batch (doc_ids : int64 list) : unit = let open Sqlite3_utils in let now = now_int64 () in with_db (fun db -> step_stmt ~db "BEGIN IMMEDIATE" ignore; List.iter (fun doc_id -> step_stmt ~db {| UPDATE doc_info SET last_used = @now WHERE id = @doc_id |} ~names:[ ("@doc_id", INT doc_id) ; ("@now", INT now) ] ignore; ) doc_ids; step_stmt ~db "COMMIT" ignore; ) let document_count () : int = let open Sqlite3_utils in with_db (fun db -> step_stmt ~db "SELECT COUNT(1) FROM doc_info" (fun stmt -> Int64.to_int (column_int64 stmt 0) ) ) let prune_old_documents ~keep_n_latest : unit = let open Sqlite3_utils in with_db (fun db -> step_stmt ~db "BEGIN IMMEDIATE" ignore; step_stmt ~db "DROP TABLE IF EXISTS temp.docs_to_drop" ignore; step_stmt ~db "CREATE TEMP TABLE docs_to_drop (hash TEXT, id INTEGER)" ignore; step_stmt ~db {| INSERT INTO temp.docs_to_drop SELECT hash, id FROM doc_info ORDER BY last_used DESC LIMIT -1 OFFSET @offset |} ~names:[("@offset", INT (Int64.of_int keep_n_latest))] ignore; let drop_based_on_doc_id ?(id_column = "doc_id") table = step_stmt ~db (Fmt.str {| DELETE FROM %s WHERE EXISTS ( SELECT 1 FROM temp.docs_to_drop WHERE %s.%s = temp.docs_to_drop.id ) |} table table id_column ) ignore in drop_based_on_doc_id ~id_column:"id" "doc_info"; drop_based_on_doc_id "line_info"; drop_based_on_doc_id "page_info"; drop_based_on_doc_id "position"; drop_based_on_doc_id "word_id_doc_id_link"; step_stmt ~db "DROP TABLE temp.docs_to_drop" ignore; step_stmt ~db "COMMIT" ignore; ) let write_raw_to_db db ~already_in_transaction ~doc_id (x : Raw.t) : unit = let open Sqlite3_utils in let now = now_int64 () in with_db ~db (fun db -> step_stmt ~db {| UPDATE doc_info SET page_count = @page_count, global_line_count = @global_line_count, max_pos = @max_pos, last_used = @now, status = 'ONGOING' WHERE id = @doc_id |} ~names:[ ("@doc_id", INT doc_id) ; ("@page_count", INT (Int64.of_int x.page_count)) ; ("@global_line_count", INT (Int64.of_int x.global_line_count)) ; ("@max_pos", INT (Int64.of_int (Int_map.max_binding x.word_of_pos |> fst))) ; ("@now", INT now) ] ignore; if not already_in_transaction then ( step_stmt ~db "BEGIN IMMEDIATE" ignore; ); with_stmt ~db {| INSERT INTO page_info (doc_id, page_num, line_count, start_pos, end_inc_pos) VALUES (@doc_id, @page_num, @line_count, @start_pos, @end_inc_pos) ON CONFLICT(doc_id, page_num) DO NOTHING |} (fun stmt -> Int_map.iter (fun page_num line_count -> let (start_pos, end_inc_pos) = Int_map.find page_num x.start_end_inc_pos_of_page_num in bind_names stmt [ ("@doc_id", INT doc_id) ; ("@page_num", INT (Int64.of_int page_num)) ; ("@line_count", INT (Int64.of_int line_count)) ; ("@start_pos", INT (Int64.of_int start_pos)) ; ("@end_inc_pos", INT (Int64.of_int end_inc_pos)) ]; step stmt; reset stmt; ) x.line_count_of_page_num ); with_stmt ~db {| INSERT INTO line_info (doc_id, global_line_num, start_pos, end_inc_pos, page_num, line_num_in_page) VALUES (@doc_id, @global_line_num, @start_pos, @end_inc_pos, @page_num, @line_num_in_page) ON CONFLICT(doc_id, global_line_num) DO NOTHING |} (fun stmt -> Int_map.iter (fun line_num line_loc -> let (start_pos, end_inc_pos) = Int_map.find line_num x.start_end_inc_pos_of_global_line_num in let page_num = line_loc.Line_loc.page_num in let line_num_in_page = line_loc.Line_loc.line_num_in_page in bind_names stmt [ ("@doc_id", INT doc_id) ; ("@global_line_num", INT (Int64.of_int line_num)) ; ("@start_pos", INT (Int64.of_int start_pos)) ; ("@end_inc_pos", INT (Int64.of_int end_inc_pos)) ; ("@page_num", INT (Int64.of_int page_num)) ; ("@line_num_in_page", INT (Int64.of_int line_num_in_page)) ]; step stmt; reset stmt; ) x.line_loc_of_global_line_num; ); with_stmt ~db {| INSERT INTO position (doc_id, pos, word_id) VALUES (@doc_id, @pos, @word_id) ON CONFLICT(doc_id, pos) DO NOTHING |} (fun stmt -> Int_map.iter (fun word_id pos_s -> Int_set.iter (fun pos -> bind_names stmt [ ("@doc_id", INT doc_id) ; ("@pos", INT (Int64.of_int pos)) ; ("@word_id", INT (Int64.of_int word_id)) ]; step stmt; reset stmt; ) pos_s ) x.pos_s_of_word ); with_stmt ~db {| INSERT INTO link (doc_id, start_pos, end_inc_pos, typ, link) VALUES (@doc_id, @start_pos, @end_inc_pos, @typ, @link) ON CONFLICT(doc_id, start_pos, end_inc_pos) DO NOTHING |} (fun stmt -> Array.iter (fun link -> let { Link.start_pos; end_inc_pos; typ; link } = link in let typ = Link.string_of_typ typ in bind_names stmt [ ("@doc_id", INT doc_id) ; ("@start_pos", INT (Int64.of_int start_pos)) ; ("@end_inc_pos", INT (Int64.of_int end_inc_pos)) ; ("@typ", TEXT typ) ; ("@link", TEXT link) ]; step stmt; reset stmt; ) x.links ); with_stmt ~db {| INSERT INTO word_id_doc_id_link (word_id, doc_id) VALUES (@word_id, @doc_id) ON CONFLICT(word_id, doc_id) DO NOTHING |} (fun stmt -> Int_map.iter (fun word_id _pos_s -> State.add_word_id_doc_id_link ~word_id ~doc_id; bind_names stmt [ ("@word_id", INT (Int64.of_int word_id)) ; ("@doc_id", INT doc_id) ]; step stmt; reset stmt; ) x.pos_s_of_word ); step_stmt ~db {| UPDATE doc_info SET status = 'COMPLETED' WHERE id = @doc_id |} ~names:[ ("@doc_id", INT doc_id) ] ignore; if not already_in_transaction then ( step_stmt ~db "COMMIT" ignore; ); ) let global_line_count = let open Sqlite3_utils in fun ~doc_id -> step_stmt {| SELECT global_line_count FROM doc_info WHERE id = @doc_id |} ~names:[ ("@doc_id", INT doc_id) ] (fun stmt -> column_int stmt 0 ) let page_count ~doc_id = let open Sqlite3_utils in step_stmt {| SELECT page_count FROM doc_info WHERE id = @doc_id |} ~names:[("@doc_id", INT doc_id)] (fun stmt -> column_int stmt 0 ) let ccvector_of_int_map : 'a . 'a Int_map.t -> 'a CCVector.ro_vector = fun m -> Int_map.to_seq m |> Seq.map snd |> CCVector.of_seq |> CCVector.freeze let is_indexed_sql = {| SELECT 1 FROM doc_info WHERE hash = @doc_hash AND status = 'COMPLETED' |} let is_indexed ~doc_hash = let open Sqlite3_utils in step_stmt is_indexed_sql ~names:[ ("@doc_hash", TEXT doc_hash) ] (fun stmt -> data_count stmt > 0 ) let word_of_pos ~doc_id pos : string = let open Sqlite3_utils in step_stmt {| SELECT word.word FROM position p JOIN word ON word.id = p.word_id WHERE p.doc_id = @doc_id AND p.pos = @pos |} ~names:[ ("@doc_id", INT doc_id) ; ("@pos", INT (Int64.of_int pos)) ] (fun stmt -> column_text stmt 0 ) let word_ci_of_pos ~doc_id pos : string = word_of_pos ~doc_id pos |> String.lowercase_ascii let words_between_start_and_end_inc : doc_id:int64 -> int * int -> string Dynarray.t = let lock = Eio.Mutex.create () in let cache = CCCache.lru ~eq:(fun (x0, y0, z0) (x1, y1, z1) -> Int64.equal x0 x1 && Int.equal y0 y1 && Int.equal z0 z1 ) 10240 in fun ~doc_id (start, end_inc) -> Eio.Mutex.use_rw ~protect:false lock (fun () -> CCCache.with_cache cache (fun (doc_id, start, end_inc) -> let open Sqlite3_utils in let acc = Dynarray.create () in iter_stmt {| SELECT word.word FROM position p JOIN word ON word.id = p.word_id WHERE p.doc_id = @doc_id AND p.pos BETWEEN @start AND @end_inc ORDER BY p.pos |} ~names:[ ("@doc_id", INT doc_id) ; ("@start", INT (Int64.of_int start)) ; ("@end_inc", INT (Int64.of_int end_inc)) ] (fun data -> Dynarray.add_last acc (Data.to_string_exn data.(0)) ); acc ) (doc_id, start, end_inc) ) let words_of_global_line_num : doc_id:int64 -> int -> string Dynarray.t = let lock = Eio.Mutex.create () in let cache = CCCache.lru ~eq:(fun (x0, y0) (x1, y1) -> Int64.equal x0 x1 && Int.equal y0 y1) 10240 in fun ~doc_id x -> Eio.Mutex.use_rw ~protect:false lock (fun () -> CCCache.with_cache cache (fun (doc_id, x) -> let open Sqlite3_utils in if x >= global_line_count ~doc_id then ( invalid_arg "Index.words_of_global_line_num: global_line_num out of range" ) else ( let start, end_inc = step_stmt {| SELECT start_pos, end_inc_pos FROM line_info WHERE doc_id = @doc_id AND global_line_num = @x |} ~names:[ ("@doc_id", INT doc_id) ; ("@x", INT (Int64.of_int x)) ] (fun stmt -> (column_int stmt 0, column_int stmt 1) ) in words_between_start_and_end_inc ~doc_id (start, end_inc) ) ) (doc_id, x) ) let words_of_page_num ~doc_id x : string Dynarray.t = let open Sqlite3_utils in if x >= page_count ~doc_id then ( invalid_arg "Index.words_of_page_num: page_num out of range" ) else ( let start, end_inc = step_stmt {| SELECT start_pos, end_inc_pos FROM page_info WHERE doc_id = @doc_id AND page_num = @x |} ~names:[ ("@doc_id", INT doc_id) ; ("@x", INT (Int64.of_int x)) ] (fun stmt -> (column_int stmt 0, column_int stmt 1) ) in words_between_start_and_end_inc ~doc_id (start, end_inc) ) let line_of_global_line_num ~doc_id x = if x >= global_line_count ~doc_id then ( invalid_arg "Index.line_of_global_line_num: global_line_num out of range" ) else ( words_of_global_line_num ~doc_id x |> Dynarray.to_list |> String.concat "" ) let line_loc_of_global_line_num ~doc_id global_line_num : Line_loc.t = let open Sqlite3_utils in if global_line_num >= global_line_count ~doc_id then ( invalid_arg "Index.line_loc_of_global_line_num: global_line_num out of range" ) else ( let page_num, line_num_in_page = step_stmt {| SELECT page_num, line_num_in_page FROM line_info WHERE doc_id = @doc_id AND global_line_num = @global_line_num |} ~names:[ ("@doc_id", INT doc_id) ; ("@global_line_num", INT (Int64.of_int global_line_num)) ] (fun stmt -> (column_int stmt 0, column_int stmt 1) ) in { Line_loc.page_num; line_num_in_page; global_line_num } ) let loc_of_pos ~doc_id pos : Loc.t = let open Sqlite3_utils in let pos_in_line, global_line_num = step_stmt {| SELECT @pos - start_pos, global_line_num FROM line_info WHERE doc_id = @doc_id AND @pos BETWEEN start_pos AND end_inc_pos |} ~names:[ ("@doc_id", INT doc_id) ; ("@pos", INT (Int64.of_int pos)) ] (fun stmt -> (column_int stmt 0, column_int stmt 1) ) in let line_loc = line_loc_of_global_line_num ~doc_id global_line_num in { line_loc; pos_in_line } let max_pos ~doc_id = let open Sqlite3_utils in step_stmt {| SELECT max_pos FROM doc_info WHERE id = @doc_id |} ~names:[ ("@doc_id", INT doc_id) ] (fun stmt -> column_int stmt 0 ) let line_count_of_page_num ~doc_id page : int = let open Sqlite3_utils in step_stmt {| SELECT line_count FROM page_info WHERE doc_id = @doc_id AND page = @page |} ~names:[ ("@doc_id", INT doc_id) ; ("@page", INT (Int64.of_int page)) ] (fun stmt -> column_int stmt 0 ) let start_end_inc_pos_of_global_line_num ~doc_id global_line_num = let open Sqlite3_utils in if global_line_num >= global_line_count ~doc_id then ( invalid_arg "Index.start_end_inc_pos_of_global_line_num: global_line_num out of range" ) else ( step_stmt {| SELECT start_pos, end_inc_pos FROM line_info WHERE doc_id = @doc_id AND global_line_num = @global_line_num |} ~names:[ ("@doc_id", INT doc_id) ; ("@global_line_num", INT (Int64.of_int global_line_num)) ] (fun stmt -> (column_int stmt 0, column_int stmt 1) ) ) module Search = struct module ET = Search_phrase.Enriched_token let positions_of_words ~doc_id (words : int Seq.t) : int Dynarray.t = let open Sqlite3_utils in let acc = Dynarray.create () in let f data = Dynarray.add_last acc (Data.to_int_exn data.(0)) in with_stmt {| SELECT p.pos FROM position p WHERE doc_id = @doc_id AND word_id = @word_id ORDER BY p.pos |} (fun stmt -> Seq.iter (fun word_id -> bind_names stmt [ ("@doc_id", INT doc_id) ; ("@word_id", INT (Int64.of_int word_id)) ]; Rc.check (iter stmt ~f); reset stmt; ) words ); acc let usable_positions ~doc_id ?within ~around_pos (token : Search_phrase.Enriched_token.t) : int Seq.t = let open Sqlite3_utils in Eio.Fiber.yield (); let match_typ = ET.match_typ token in let start, end_inc = let start, end_inc = if ET.is_linked_to_prev token then ( match match_typ with | `Fuzzy -> (around_pos - !Params.max_linked_token_search_dist, around_pos + !Params.max_linked_token_search_dist) | `Exact | `Prefix | `Suffix -> (around_pos + 1, around_pos + 1) ) else ( (around_pos - !Params.max_token_search_dist, around_pos + !Params.max_token_search_dist) ) in match within with | None -> (start, end_inc) | Some (within_start_pos, within_end_inc_pos) -> ( (max within_start_pos start, min within_end_inc_pos end_inc) ) in let positions : int Dynarray.t = let acc : int Dynarray.t = Dynarray.create () in let cache : (string, bool) Hashtbl.t = Hashtbl.create 100 in let f data = Eio.Fiber.yield (); let indexed_word = Data.to_string_exn data.(0) in let pos = Data.to_int_exn data.(1) in let compatible = match Hashtbl.find_opt cache indexed_word with | None -> ( let compatible = ET.compatible_with_word token indexed_word in Hashtbl.replace cache indexed_word compatible; compatible ) | Some compatible -> compatible in if compatible then ( Dynarray.add_last acc pos ) in ( let extra_sql = match ET.data token with | `Explicit_spaces -> ( {|AND ( word LIKE ' %' OR word LIKE char(9) || '%' OR word LIKE char(10) || '%' OR word LIKE char(13) || '%' ) |} ) | `String search_word -> ( let search_word = search_word |> CCString.replace ~sub:"'" ~by:"''" |> CCString.replace ~sub:"\\" ~by:"\\\\" |> CCString.replace ~sub:"%" ~by:"\\%" in match match_typ with | `Fuzzy | `Suffix -> "" | `Exact -> ( Fmt.str "AND word LIKE '%s' ESCAPE '\\'" search_word ) | `Prefix -> ( Fmt.str "AND word LIKE '%s%%' ESCAPE '\\'" search_word ) ) in iter_stmt (Fmt.str {| SELECT word.word AS word, p.pos as pos FROM position p JOIN word ON p.word_id = word.id WHERE p.doc_id = @doc_id AND p.pos BETWEEN @start AND @end_inc %s |} extra_sql) ~names:[ ("@doc_id", INT doc_id) ; ("@start", INT (Int64.of_int start)) ; ("@end_inc", INT (Int64.of_int end_inc)) ] f ); acc in Dynarray.to_seq positions let search_around_pos ~doc_id ~(within : (int * int) option) (around_pos : int) (l : Search_phrase.Enriched_token.t list) : int list Seq.t = let rec aux around_pos l = Eio.Fiber.yield (); match l with | [] -> Seq.return [] | token :: rest -> ( usable_positions ~doc_id ?within ~around_pos token |> Seq.flat_map (fun pos -> aux pos rest |> Seq.map (fun l -> pos :: l) ) ) in aux around_pos l let search_result_heap_merge_with_yield x y = Eio.Fiber.yield (); Search_result_heap.merge x y module Search_job = struct exception Result_found type t = { stop_signal : Stop_signal.t; terminate_on_result_found : bool; cancellation_notifier : bool Atomic.t; doc_id : int64; within_same_line : bool; phrase : Search_phrase.t; start_pos : int; search_limit_per_start : int; } let make stop_signal ~terminate_on_result_found ~cancellation_notifier ~doc_id ~within_same_line ~phrase ~start_pos ~search_limit_per_start = { stop_signal; terminate_on_result_found; cancellation_notifier; doc_id; within_same_line; phrase; start_pos; search_limit_per_start; } let run (t : t) : Search_result_heap.t = match Search_phrase.enriched_tokens t.phrase with | [] -> Search_result_heap.empty | _ :: rest -> ( let doc_id = t.doc_id in let within = if t.within_same_line then ( let loc = loc_of_pos ~doc_id t.start_pos in Some (start_end_inc_pos_of_global_line_num ~doc_id loc.line_loc.global_line_num) ) else ( None ) in Eio.Fiber.first (fun () -> Stop_signal.await t.stop_signal; Atomic.set t.cancellation_notifier true; Search_result_heap.empty) (fun () -> search_around_pos ~doc_id ~within t.start_pos rest |> Seq.map (fun l -> t.start_pos :: l) |> Seq.map (fun (l : int list) -> if t.terminate_on_result_found then ( raise Result_found ); Eio.Fiber.yield (); let opening_closing_symbol_pairs = List.map (fun pos -> word_of_pos ~doc_id pos) l |> Misc_utils.opening_closing_symbol_pairs in let found_phrase_opening_closing_symbol_match_count = let pos_arr : int array = Array.of_list l in List.fold_left (fun total (x, y) -> let pos_x = pos_arr.(x) in let pos_y = pos_arr.(y) in let c_x = String.get (word_of_pos ~doc_id pos_x) 0 in let c_y = String.get (word_of_pos ~doc_id pos_y) 0 in assert (List.exists (fun (x, y) -> c_x = x && c_y = y) Params.opening_closing_symbols); if pos_x < pos_y then ( let outstanding_opening_symbol_count = OSeq.(pos_x + 1 --^ pos_y) |> Seq.fold_left (fun count pos -> match count with | Some count -> ( let word = word_of_pos ~doc_id pos in if String.length word = 1 then ( if String.get word 0 = c_x then ( Some (count + 1) ) else if String.get word 0 = c_y then ( if count = 0 then ( None ) else ( Some (count - 1) ) ) else ( Some count ) ) else ( Some count ) ) | None -> None ) (Some 0) in match outstanding_opening_symbol_count with | Some 0 -> total + 1 | _ -> total ) else ( total ) ) 0 opening_closing_symbol_pairs in Search_result.make t.phrase ~found_phrase:(List.map (fun pos -> Search_result.{ found_word_pos = pos; found_word_ci = word_ci_of_pos ~doc_id pos; found_word = word_of_pos ~doc_id pos; }) l) ~found_phrase_opening_closing_symbol_match_count ) |> Seq.fold_left (fun best_results r -> Eio.Fiber.yield (); let best_results = Search_result_heap.add best_results r in if Search_result_heap.size best_results <= t.search_limit_per_start then ( best_results ) else ( let best_results, _ = Search_result_heap.take_exn best_results in best_results ) ) Search_result_heap.empty ) ) end module Search_job_group = struct type t = { terminate_on_result_found : bool; stop_signal : Stop_signal.t; cancellation_notifier : bool Atomic.t; doc_id : int64; within_same_line : bool; phrase : Search_phrase.t; possible_start_pos_list : int list; search_limit_per_start : int; } let unpack (group : t) : Search_job.t Seq.t = let { stop_signal; terminate_on_result_found; cancellation_notifier; doc_id; within_same_line; phrase; possible_start_pos_list; search_limit_per_start; } = group in List.to_seq possible_start_pos_list |> Seq.map (fun start_pos -> Search_job.make stop_signal ~terminate_on_result_found ~cancellation_notifier ~doc_id ~within_same_line ~phrase ~start_pos ~search_limit_per_start ) let run (t : t) = unpack t |> Seq.map Search_job.run |> Seq.fold_left Search_result_heap.merge Search_result_heap.empty end let make_search_job_groups stop_signal ?(terminate_on_result_found = false) ~(cancellation_notifier : bool Atomic.t) ~doc_id ~doc_word_ids ~global_first_word_candidates_lookup ~within_same_line ~(search_scope : Diet.Int.t option) (exp : Search_exp.t) : Search_job_group.t Seq.t = if Search_exp.is_empty exp then ( Seq.empty ) else ( Search_exp.flattened exp |> List.to_seq |> Seq.flat_map (fun phrase -> let first_word_candidates = match Search_phrase.enriched_tokens phrase with | [] -> failwith "unexpected case" | first_word :: _ -> ( Search_phrase.Enriched_token.Data_map.find (Search_phrase.Enriched_token.data first_word) global_first_word_candidates_lookup |> Int_set.inter doc_word_ids ) in let possible_starts = first_word_candidates |> Int_set.to_seq |> positions_of_words ~doc_id |> (fun arr -> match search_scope with | None -> arr | Some search_scope -> ( Dynarray.filter (fun x -> Diet.Int.mem x search_scope ) arr ) ) in let possible_start_count = Dynarray.length possible_starts in if possible_start_count = 0 then ( Seq.empty ) else ( let search_limit_per_start = max Params.search_result_min_per_start ( (Params.default_search_result_total_per_document + possible_start_count - 1) / possible_start_count ) in let search_chunk_size = max 10 (possible_start_count / Task_pool.size) in OSeq.(0 --^ possible_start_count) |> OSeq.chunks search_chunk_size |> Seq.map (fun index_arr -> Array.map (fun i -> Dynarray.get possible_starts i ) index_arr |> Array.to_list ) |> Seq.map (fun possible_start_pos_list -> { Search_job_group.stop_signal; terminate_on_result_found; cancellation_notifier; doc_id; within_same_line; phrase; possible_start_pos_list; search_limit_per_start; } ) ) ) ) let search pool stop_signal ?terminate_on_result_found ~cancellation_notifier ~doc_id ~doc_word_ids ~global_first_word_candidates_lookup ~within_same_line ~search_scope (exp : Search_exp.t) : Search_result_heap.t = make_search_job_groups stop_signal ?terminate_on_result_found ~cancellation_notifier ~doc_id ~doc_word_ids ~global_first_word_candidates_lookup ~within_same_line ~search_scope exp |> List.of_seq |> Task_pool.map_list pool Search_job_group.run |> List.fold_left search_result_heap_merge_with_yield Search_result_heap.empty end let search pool stop_signal ?terminate_on_result_found ~doc_id ~doc_word_ids ~global_first_word_candidates_lookup ~within_same_line ~search_scope (exp : Search_exp.t) : Search_result.t array option = let cancellation_notifier = Atomic.make false in let arr = Search.search pool stop_signal ?terminate_on_result_found ~cancellation_notifier ~doc_id ~doc_word_ids ~global_first_word_candidates_lookup ~within_same_line ~search_scope exp |> Search_result_heap.to_seq |> Array.of_seq in if Atomic.get cancellation_notifier then ( None ) else ( Array.sort Search_result.compare_relevance arr; Some arr ) module Search_job = Search.Search_job module Search_job_group = Search.Search_job_group let make_search_job_groups = Search.make_search_job_groups module Word_candidate_heap = CCHeap.Make_from_compare (struct type t = int * float let compare (_x0, y0) (_x1, y1) = Float.compare y0 y1 end) let generate_global_first_word_candidates_lookup pool ?(acc = Search_phrase.Enriched_token.Data_map.empty) (exp : Search_exp.t) : Int_set.t Search_phrase.Enriched_token.Data_map.t = Search_exp.flattened exp |> List.fold_left (fun acc phrase -> match Search_phrase.enriched_tokens phrase with | [] -> failwith "unexpected case" | first_word :: _rest -> ( let data = Search_phrase.Enriched_token.data first_word in match Search_phrase.Enriched_token.Data_map.find_opt data acc with | None -> ( let score ~search_word ~found_word = let search_word_ci = String.lowercase_ascii search_word in let search_word_len = Int.to_float (String.length search_word) in let found_word_ci = String.lowercase_ascii found_word in let found_word_len = Int.to_float (String.length found_word) in if String.equal search_word found_word then ( 1.0 ) else if String.equal search_word_ci found_word_ci then ( 0.9 ) else if CCString.find ~sub:search_word found_word >= 0 then ( search_word_len /. found_word_len ) else if CCString.find ~sub:search_word_ci found_word_ci >= 0 then ( 0.9 *. (search_word_len /. found_word_len) ) else ( 1.0 -. (Int.to_float (Spelll.edit_distance search_word_ci found_word_ci) /. search_word_len ) ) in let candidates = Word_db.filter pool (Search_phrase.Enriched_token.compatible_with_word first_word) in let candidates = match data with | `Explicit_spaces -> ( Dynarray.fold_left (fun acc (id, _word) -> Int_set.add id acc ) Int_set.empty candidates ) | `String s -> ( let s_len = Int.to_float (String.length s) in let max_candidate_count = min 500 (Int.of_float @@ Float.pow 2.5 s_len) in Dynarray.fold_left (fun acc (id, word) -> let acc = Word_candidate_heap.add acc (id, score ~search_word:s ~found_word:word) in if Word_candidate_heap.size acc <= max_candidate_count then ( acc ) else ( let acc, _ = Word_candidate_heap.take_exn acc in acc ) ) Word_candidate_heap.empty candidates |> Word_candidate_heap.to_seq |> Seq.map (fun (id, _score) -> id ) |> Int_set.of_seq ) in Search_phrase.Enriched_token.Data_map.add data candidates acc ) | Some _ -> acc ) ) acc let word_ids ~doc_id = let open Sqlite3_utils in with_db (fun db -> fold_stmt ~db {| SELECT word_id_doc_id_link.word_id FROM word_id_doc_id_link WHERE word_id_doc_id_link.doc_id = @doc_id |} ~names:[ ("@doc_id", INT doc_id) ] (fun acc data -> Int_set.add (Data.to_int_exn data.(0)) acc ) Int_set.empty ) let links ~doc_id : Link.t array = let open Sqlite3_utils in let acc = Dynarray.create () in with_db (fun db -> iter_stmt ~db {| SELECT start_pos, end_inc_pos, typ, link FROM link WHERE link.doc_id = @doc_id |} ~names:[ ("@doc_id", INT doc_id) ] (fun data -> let start_pos = Data.to_int_exn data.(0) in let end_inc_pos = Data.to_int_exn data.(1) in let typ = Data.to_string_exn data.(2) |> Link.typ_of_string |> Option.get in let link = Data.to_string_exn data.(3) in Dynarray.add_last acc { Link.start_pos; end_inc_pos; typ; link } ); ); Dynarray.to_array acc ================================================ FILE: lib/index.mli ================================================ module Line_loc : sig type t val page_num : t -> int val line_num_in_page : t -> int val global_line_num : t -> int val compare : t -> t -> int end module Loc : sig type t val line_loc : t -> Line_loc.t val pos_in_line : t -> int end val word_ci_of_pos : doc_id:int64 -> int -> string val word_of_pos : doc_id:int64 -> int -> string val words_of_global_line_num : doc_id:int64 -> int -> string Dynarray.t val line_of_global_line_num : doc_id:int64 -> int -> string val line_loc_of_global_line_num : doc_id:int64 -> int -> Line_loc.t val loc_of_pos : doc_id:int64 -> int -> Loc.t val max_pos : doc_id:int64 -> int val words_of_page_num : doc_id:int64 -> int -> string Dynarray.t val line_count_of_page_num : doc_id:int64 -> int -> int val generate_global_first_word_candidates_lookup : Task_pool.t -> ?acc:Int_set.t Search_phrase.Enriched_token.Data_map.t -> Search_exp.t -> Int_set.t Search_phrase.Enriched_token.Data_map.t val search : Task_pool.t -> Stop_signal.t -> ?terminate_on_result_found : bool -> doc_id:int64 -> doc_word_ids:Int_set.t -> global_first_word_candidates_lookup:Int_set.t Search_phrase.Enriched_token.Data_map.t -> within_same_line:bool -> search_scope:Diet.Int.t option -> Search_exp.t -> Search_result.t array option module Search_job : sig exception Result_found type t val run : t -> Search_result_heap.t end module Search_job_group : sig type t val unpack : t -> Search_job.t Seq.t val run : t -> Search_result_heap.t end val make_search_job_groups : Stop_signal.t -> ?terminate_on_result_found : bool -> cancellation_notifier:bool Atomic.t -> doc_id:int64 -> doc_word_ids:Int_set.t -> global_first_word_candidates_lookup:Int_set.t Search_phrase.Enriched_token.Data_map.t -> within_same_line:bool -> search_scope:Diet.Int.t option -> Search_exp.t -> Search_job_group.t Seq.t val global_line_count : doc_id:int64 -> int val page_count : doc_id:int64 -> int val is_indexed_sql : string val is_indexed : doc_hash:string -> bool val refresh_last_used_batch : int64 list -> unit val document_count : unit -> int val prune_old_documents : keep_n_latest:int -> unit module Raw : sig type t val word_ids : t -> Int_set.t val of_lines : Task_pool.t -> string Seq.t -> t val of_pages : Task_pool.t -> string list Seq.t -> t val links : t -> Link.t array end val word_ids : doc_id:int64 -> Int_set.t val write_raw_to_db : Sqlite3.db -> already_in_transaction:bool -> doc_id:int64 -> Raw.t -> unit module State : sig val union_doc_ids_of_word_id_into_bv : word_id:int -> into:CCBV.t -> unit val read_from_db : unit -> unit end val links : doc_id:int64 -> Link.t array ================================================ FILE: lib/int_map.ml ================================================ include CCMap.Make (Int) ================================================ FILE: lib/int_set.ml ================================================ include CCSet.Make (Int) ================================================ FILE: lib/link.ml ================================================ type typ = [ | `Markdown | `Wiki | `URL ] let string_of_typ (typ : typ) = match typ with | `Markdown -> "markdown" | `Wiki -> "wiki" | `URL -> "url" let typ_of_string (s : string) : typ option = match String.lowercase_ascii s with | "markdown" -> Some `Markdown | "wiki" -> Some `Wiki | "url" -> Some `URL | _ -> None type t = { start_pos : int; end_inc_pos : int; typ : typ; link : string; } ================================================ FILE: lib/misc_utils.ml ================================================ let exit_with_error_msg (msg : string) = Printf.printf "error: %s\n" msg; exit 1 let ci_string_set_of_list (l : string list) = l |> List.map String.lowercase_ascii |> String_set.of_list let first_n_chars_of_string_contains ~n s c = let s_len = String.length s in let s = if s_len <= n then s else String.sub s 0 n in String.contains s c let char_is_usable c = let code = Char.code c in (0x20 <= code && code <= 0x7E) let sanitize_string s = let s_len = String.length s in let bytes = Bytes.make s_len ' ' in let rec aux pos = if pos >= s_len then String.of_bytes bytes else ( let decode = String.get_utf_8_uchar s pos in if Uchar.utf_decode_is_valid decode then ( let c = String.get_uint8 s pos in if c land 0b1000_0000 = 0b0000_0000 then ( if 0x20 <= c && c <= 0x7E then ( Bytes.blit_string s pos bytes pos 1 ); aux (pos+1) ) else ( let len = Uchar.utf_decode_length decode in Bytes.blit_string s pos bytes pos len; aux (pos+len) ) ) else ( aux (pos+1) ) ) in aux 0 let length_and_list_of_seq (s : 'a Seq.t) : int * 'a list = let len, acc = Seq.fold_left (fun (len, acc) x -> (len + 1, x :: acc) ) (0, []) s in (len, List.rev acc) let div_round_to_closest x y = (x + (y / 2)) / y let div_round_up x y = (x + (y - 1)) / y let opening_closing_symbol_pairs (l : string list) : (int * int) list = let _, pairs = CCList.foldi (fun ((m, pairs) : (int list Char_map.t) * ((int * int) list)) i s -> if String.length s = 1 then ( let c = String.get s 0 in match List.assoc_opt c Params.opening_closing_symbols with | Some _ -> ( let stack = match Char_map.find_opt c m with | None -> [] | Some l -> l in (Char_map.add c (i :: stack) m, pairs) ) | None -> ( match List.assoc_opt c Params.opening_closing_symbols_flipped with | Some corresponding_open_symbol -> ( let stack = match Char_map.find_opt corresponding_open_symbol m with | None -> [] | Some l -> l in match stack with | [] -> (m, pairs) | x :: xs -> ( (Char_map.add corresponding_open_symbol xs m, (x, i) :: pairs) ) ) | None -> (m, pairs) ) ) else ( (m, pairs) ) ) (Char_map.empty, []) l in pairs let cwd_path_parts () = Sys.getcwd () |> CCString.split ~by:Filename.dir_sep |> List.rev let path_of_parts parts = match List.rev parts with | [] | [ "" ] -> Filename.dir_sep | [ x ] -> x | l -> String.concat Filename.dir_sep l let normalize_glob_to_absolute glob = let rec aux acc parts = match parts with | [] -> path_of_parts acc | x :: xs -> ( match x with | "" | "." -> aux acc xs | ".." -> ( let acc = match acc with | [] -> [] | _ :: xs -> xs in aux acc xs ) | "**" -> ( aux (List.rev parts @ acc) [] ) | _ -> ( aux (x :: acc) xs ) ) in let glob_parts = CCString.split ~by:Filename.dir_sep glob in match glob_parts with | "" :: l -> ( (* Absolute path on Unix-like systems *) aux [ "" ] l ) | _ -> ( aux (cwd_path_parts ()) glob_parts ) let normalize_path_to_absolute path = let rec aux acc path_parts = match path_parts with | [] -> path_of_parts acc | x :: xs -> ( match x with | "" | "." -> aux acc xs | ".." -> ( let acc = match acc with | [] -> [] | _ :: xs -> xs in aux acc xs ) | _ -> ( aux (x :: acc) xs ) ) in let path_parts = CCString.split ~by:Filename.dir_sep path in match path_parts with | "" :: l -> ( (* Absolute path on Unix-like systems *) aux [ "" ] l ) | _ -> ( aux (cwd_path_parts ()) path_parts ) ================================================ FILE: lib/option_syntax.ml ================================================ let ( let* ) = Option.bind let ( let+ ) x y = Option.map y x ================================================ FILE: lib/params.ml ================================================ let default_search_result_total_per_document = 50 let search_result_min_per_start = 5 let max_token_size = 500 let default_max_token_search_dist = 50 let max_token_search_dist = ref default_max_token_search_dist let default_max_linked_token_search_dist = 2 let max_linked_token_search_dist = ref default_max_linked_token_search_dist let default_index_chunk_size = 5000 let index_chunk_size = ref default_index_chunk_size let search_word_automaton_cache_size = 200 let float_compare_margin = 0.000_001 let opening_closing_symbols = [ ('(', ')') ; ('[', ']') ; ('{', '}') ] let opening_closing_symbols_flipped = List.map (fun (x, y) -> (y, x)) opening_closing_symbols let default_max_fuzzy_edit_dist = 2 let max_fuzzy_edit_dist = ref default_max_fuzzy_edit_dist let db_schema = {| CREATE TABLE IF NOT EXISTS line_info ( doc_id INTEGER, global_line_num INTEGER, start_pos INTEGER, end_inc_pos INTEGER, page_num INTEGER, line_num_in_page INTEGER, PRIMARY KEY (doc_id, global_line_num) ) WITHOUT ROWID; CREATE INDEX IF NOT EXISTS line_info_index_1 ON line_info (start_pos); CREATE INDEX IF NOT EXISTS line_info_index_2 ON line_info (end_inc_pos); CREATE TABLE IF NOT EXISTS position ( doc_id INTEGER, pos INTEGER, word_id INTEGER, PRIMARY KEY (doc_id, pos) ) WITHOUT ROWID; CREATE INDEX IF NOT EXISTS position_index_1 ON position (doc_id, word_id, pos); CREATE TABLE IF NOT EXISTS page_info ( doc_id INTEGER, page_num INTEGER, line_count INTEGER, start_pos INTEGER, end_inc_pos INTEGER, PRIMARY KEY (doc_id, page_num) ) WITHOUT ROWID; CREATE TABLE IF NOT EXISTS doc_info ( hash TEXT PRIMARY KEY, id INTEGER, page_count INTEGER, global_line_count INTEGER, max_pos INTEGER, last_used INTEGER, status TEXT ) WITHOUT ROWID; CREATE INDEX IF NOT EXISTS doc_info_index_1 ON doc_info (id); CREATE INDEX IF NOT EXISTS doc_info_index_2 ON doc_info (last_used); CREATE TABLE IF NOT EXISTS link ( doc_id INTEGER, start_pos INTEGER, end_inc_pos INTEGER, typ TEXT, link TEXT, PRIMARY KEY (doc_id, start_pos, end_inc_pos) ) WITHOUT ROWID; CREATE TABLE IF NOT EXISTS word ( id INTEGER, word TEXT, PRIMARY KEY (id) ) WITHOUT ROWID; CREATE INDEX IF NOT EXISTS word_index_1 ON word (word); CREATE INDEX IF NOT EXISTS word_index_2 ON word (word COLLATE NOCASE); CREATE TABLE IF NOT EXISTS word_id_doc_id_link ( word_id INTEGER, doc_id INTEGER, PRIMARY KEY (word_id, doc_id) ) WITHOUT ROWID; CREATE INDEX IF NOT EXISTS word_id_doc_id_link_index_1 ON word_id_doc_id_link (doc_id); |} let db_path : string option ref = ref None ================================================ FILE: lib/parser_components.ml ================================================ open Angstrom let is_space c = match c with | ' ' | '\t' | '\n' | '\r' -> true | _ -> false let skip_spaces = skip_while is_space let is_not_space c = not (is_space c) let any_string : string t = take_while (fun _ -> true) let is_letter c = match c with | 'A'..'Z' | 'a'..'z' -> true | _ -> false let is_digit c = match c with | '0'..'9' -> true | _ -> false let is_alphanum c = is_letter c || is_digit c let is_possibly_utf_8 c = let c = Char.code c in c land 0b1000_0000 <> 0b0000_0000 let utf_8_char = peek_char >>= fun c -> match c with | None -> fail "Eof" | Some c -> ( let c = Char.code c in if c land 0b1000_0000 = 0b0000_0000 then ( take 1 ) else if c land 0b1110_0000 = 0b1100_0000 then ( take 2 ) else if c land 0b1111_0000 = 0b1110_0000 then ( take 3 ) else if c land 0b1111_1000 = 0b1111_0000 then ( take 4 ) else ( fail "Invalid UTF-8" ) ) (* Copied from Angstrom README. *) let chainl1 e op = let rec go acc = (lift2 (fun f x -> f acc x) op e >>= go) <|> return acc in e >>= fun init -> go init ================================================ FILE: lib/search_exp.ml ================================================ type match_typ_marker = [ `Exact | `Prefix | `Suffix ] [@@deriving show] type exp = [ | `Word of string | `Match_typ_marker of match_typ_marker | `Explicit_spaces | `List of exp list | `Paren of exp | `Binary_op of binary_op * exp * exp | `Optional of exp ] [@@deriving show] and binary_op = | Or [@@deriving show] type t = { exp : exp; flattened : Search_phrase.t list; } [@@deriving show] let flattened (t : t) = t.flattened let empty : t = { exp = `List []; flattened = []; } let is_empty (t : t) = t.flattened = [] || List.for_all Search_phrase.is_empty t.flattened let equal (t1 : t) (t2 : t) = let rec aux (e1 : exp) (e2 : exp) = match e1, e2 with | `Word x1, `Word x2 -> String.equal x1 x2 | `List l1, `List l2 -> List.equal aux l1 l2 | `Paren e1, `Paren e2 -> aux e1 e2 | `Binary_op (Or, e1x, e1y), `Binary_op (Or, e2x, e2y) -> aux e1x e2x && aux e1y e2y | `Optional e1, `Optional e2 -> aux e1 e2 | _, _ -> false in aux t1.exp t2.exp let as_paren x : exp = `Paren x let as_list l : exp = `List l let as_word s : exp = `Word s let as_word_list (l : string list) : exp = `List (List.map as_word l) module Parsers = struct open Angstrom open Parser_components let phrase : string list Angstrom.t = many1 ( take_while1 (fun c -> match c with | '?' | '|' | '\\' | '(' | ')' | '\'' | '^' | '$' | '~' -> false | _ -> true ) <|> (char '\\' *> any_char >>| fun c -> Printf.sprintf "%c" c) ) >>| fun l -> String.concat "" l |> Tokenization.tokenize ~drop_spaces:false |> List.of_seq let or_op = char '|' *> skip_spaces *> return (fun x y -> `Binary_op (Or, x, y)) let p : exp Angstrom.t = fix (fun (exp : exp Angstrom.t) : exp Angstrom.t -> let base = choice [ (phrase >>| as_word_list); (char '\'' *> return (`Match_typ_marker `Exact)); (char '^' *> return (`Match_typ_marker `Prefix)); (char '$' *> return (`Match_typ_marker `Suffix)); (char '~' *> return (`Explicit_spaces)); (string "()" *> return (as_word_list [])); (char '(' *> exp <* char ')' >>| as_paren); ] in let opt_base = choice [ (char '?' *> skip_spaces *> phrase >>| fun l -> match l with | [] -> failwith "unexpected case" | x :: xs -> ( as_list [ `Optional (as_word x); as_word_list xs ] ) ); (char '?' *> skip_spaces *> base >>| fun p -> `Optional p); base; ] in let opt_bases = many1 opt_base >>| fun l -> `List l in chainl1 opt_bases or_op ) <* skip_spaces end let flatten_nested_lists (exp : exp) : exp = let rec aux (exp : exp) = match exp with | `Word _ | `Match_typ_marker _ | `Explicit_spaces -> exp | `List l -> ( `List (CCList.flat_map (fun e -> match aux e with | `List l -> l | x -> [ x ] ) l) ) | `Paren e -> `Paren (aux e) | `Binary_op (op, x, y) -> `Binary_op (op, aux x, aux y) | `Optional e -> `Optional (aux e) in aux exp let flatten (exp : exp) : Search_phrase.t list = let get_group_id = let counter = ref 0 in fun () -> let x = !counter in counter := x + 1; x in let rec aux group_id (exp : exp) : Search_phrase.annotated_token list Seq.t = match exp with | `Match_typ_marker x -> ( Seq.return [ Search_phrase.{ data = `Match_typ_marker x; group_id } ] ) | `Word s -> Seq.return [ Search_phrase.{ data = `String s; group_id } ] | `Explicit_spaces -> Seq.return [ Search_phrase.{ data = `Explicit_spaces; group_id } ] | `List l -> ( l |> List.to_seq |> Seq.map (aux group_id) |> OSeq.cartesian_product |> Seq.map List.concat ) | `Paren e -> ( aux (get_group_id ()) e ) | `Binary_op (Or, x, y) -> ( Seq.append (aux group_id x) (aux group_id y) ) | `Optional x -> ( Seq.cons [] (aux (get_group_id ()) x) ) in aux (get_group_id ()) exp |> Seq.map (fun l -> List.to_seq l |> Search_phrase.of_annotated_tokens) |> List.of_seq |> List.sort_uniq Search_phrase.compare let parse s = if String.length s = 0 || String.for_all Parser_components.is_space s then ( Some empty ) else ( match Angstrom.(parse_string ~consume:Consume.All) Parsers.p s with | Ok exp -> ( let exp = flatten_nested_lists exp in Some { exp; flattened = flatten exp; } ) | Error _ -> None ) ================================================ FILE: lib/search_exp.mli ================================================ type t val pp : Format.formatter -> t -> unit val empty : t val is_empty : t -> bool val flattened : t -> Search_phrase.t list val parse : string -> t option val equal : t -> t -> bool ================================================ FILE: lib/search_phrase.ml ================================================ type match_typ = [ | `Fuzzy | `Exact | `Suffix | `Prefix ] [@@deriving show, ord] type match_typ_marker = [ `Exact | `Prefix | `Suffix ] [@@deriving show, ord] let char_of_match_typ_marker (x : match_typ_marker) = match x with | `Exact -> '\'' | `Prefix -> '^' | `Suffix -> '$' let string_of_match_typ_marker (x : match_typ_marker) = match x with | `Exact -> "\'" | `Prefix -> "^" | `Suffix -> "$" type annotated_token = { data : [ | `String of string | `Match_typ_marker of match_typ_marker | `Explicit_spaces ]; group_id : int; } [@@deriving show] type ir0 = { data : [ | `String of string | `Match_typ_marker of match_typ_marker | `Explicit_spaces ]; is_linked_to_prev : bool; is_linked_to_next : bool; match_typ : match_typ option; } module Enriched_token = struct type data = [ `String of string | `Explicit_spaces ] [@@deriving ord] module Data_map = Map.Make (struct type t = data let compare = compare_data end) let pp_data formatter data = Fmt.pf formatter "%s" (match data with | `String s -> s | `Explicit_spaces -> " ") type t = { data : data; is_linked_to_prev : bool; is_linked_to_next : bool; automaton : Spelll.automaton; match_typ : match_typ; } let make data ~is_linked_to_prev ~is_linked_to_next automaton match_typ = { data; is_linked_to_prev; is_linked_to_next; automaton; match_typ } let pp fmt (x : t) = Fmt.pf fmt "%a:%b:%b:%a" pp_data x.data x.is_linked_to_prev x.is_linked_to_next pp_match_typ x.match_typ let data (t : t) = t.data let match_typ (t : t) = t.match_typ let automaton (t : t) = t.automaton let is_linked_to_prev (t : t) = t.is_linked_to_prev let is_linked_to_next (t : t) = t.is_linked_to_next let compare (x : t) (y : t) = match compare_data x.data y.data with | 0 -> ( match Bool.compare x.is_linked_to_prev y.is_linked_to_prev with | 0 -> ( match Bool.compare x.is_linked_to_next y.is_linked_to_next with | 0 -> ( compare_match_typ x.match_typ y.match_typ ) | n -> n ) | n -> n ) | n -> n let equal (x : t) (y : t) = compare x y = 0 let compatible_with_word (token : t) indexed_word = String.length indexed_word > 0 && (match data token with | `Explicit_spaces -> ( Parser_components.is_space indexed_word.[0] ) | `String search_word -> ( let search_word_ci = String.lowercase_ascii search_word in let indexed_word_ci = String.lowercase_ascii indexed_word in let use_ci_match = String.equal search_word search_word_ci in let search_word_len = String.length search_word in let indexed_word_len = String.length indexed_word in let edit_dist_based_match_min_len = !Params.max_fuzzy_edit_dist + 1 in if Parser_components.is_possibly_utf_8 indexed_word.[0] then ( String.equal search_word indexed_word ) else ( match match_typ token with | `Fuzzy -> ( String.equal search_word_ci indexed_word_ci || CCString.find ~sub:search_word_ci indexed_word_ci >= 0 || (indexed_word_len >= 2 && CCString.find ~sub:indexed_word_ci search_word_ci >= 0) || (search_word_len >= edit_dist_based_match_min_len && indexed_word_len >= edit_dist_based_match_min_len && Misc_utils.first_n_chars_of_string_contains ~n:3 indexed_word_ci search_word_ci.[0] && Spelll.match_with (automaton token) indexed_word_ci) ) | `Exact -> ( if use_ci_match then ( String.equal search_word_ci indexed_word_ci ) else ( String.equal search_word indexed_word ) ) | `Prefix -> ( if use_ci_match then ( CCString.prefix ~pre:search_word_ci indexed_word_ci ) else ( CCString.prefix ~pre:search_word indexed_word ) ) | `Suffix -> ( if use_ci_match then ( CCString.suffix ~suf:search_word_ci indexed_word_ci ) else ( CCString.suffix ~suf:search_word indexed_word ) ) ) ) ) end type t = { annotated_tokens : annotated_token list; enriched_tokens : Enriched_token.t list; } let is_empty (t : t) = List.is_empty t.enriched_tokens let pp fmt (t : t) = Fmt.pf fmt "%a" Fmt.(list ~sep:sp Enriched_token.pp) t.enriched_tokens type cache = { mutex : Eio.Mutex.t; cache : (string, Spelll.automaton) CCCache.t; } let cache = { mutex = Eio.Mutex.create (); cache = CCCache.lru ~eq:String.equal Params.search_word_automaton_cache_size; } let compare (t1 : t) (t2 : t) = List.compare Enriched_token.compare t1.enriched_tokens t2.enriched_tokens let equal (t1 : t) (t2 : t) = compare t1 t2 = 0 let empty : t = { annotated_tokens = []; enriched_tokens = []; } let ir0_s_of_annotated_tokens (tokens : annotated_token Seq.t) : ir0 list = let token_is_space (token : annotated_token) = match token.data with | `String s -> Parser_components.is_space (String.get s 0) | _ -> false in let rec aux acc (prev_token : annotated_token option) (tokens : annotated_token Seq.t) = match tokens () with | Seq.Nil -> List.rev acc | Seq.Cons (token, rest) -> ( let is_linked_to_prev = match prev_token with | None -> false | Some prev_token -> ( (prev_token.group_id = token.group_id) && (not (token_is_space prev_token)) ) in if token_is_space token then ( aux acc None rest ) else ( let ir0 : ir0 = { data = token.data; is_linked_to_prev; is_linked_to_next = false; match_typ = None; } in aux (ir0 :: acc) (Some token) rest ) ) in aux [] None tokens let ir0_s_link_forward (ir0_s : ir0 list) : ir0 list = List.rev ir0_s |> List.fold_left (fun (acc, next) x -> match next with | None -> (x :: acc, Some x) | Some next -> ( let x = { x with is_linked_to_next = next.is_linked_to_prev } in (x :: acc, Some x) ) ) ([], None) |> fst let ir0_process_exact_prefix_match_typ_markers (ir0_s : ir0 list) : ir0 list = let rec aux (acc : ir0 list) token_removed (marker : ([ `Exact ] * [ `Exact | `Prefix ]) option) (ir0_s : ir0 list) = match ir0_s with | [] -> List.rev acc | x :: xs -> ( match marker with | None -> ( let default () = aux (x :: acc) false None xs in match x.data with | `String _ | `Explicit_spaces -> default () | `Match_typ_marker m -> ( match x.match_typ with | None -> ( if x.is_linked_to_next then ( match m with | `Exact -> aux acc true (Some (`Exact, `Exact)) xs | `Prefix -> aux acc true (Some (`Exact, `Prefix)) xs | _ -> default () ) else ( default () ) ) | Some _ -> default () ) ) | Some (m, m_last) -> ( let x = if x.is_linked_to_prev then ( { x with is_linked_to_prev = not token_removed; match_typ = Some ( if x.is_linked_to_next then (m :> match_typ) else (m_last :> match_typ) ); } ) else ( x ) in let marker = if x.is_linked_to_next then marker else None in aux (x :: acc) false marker xs ) ) in aux [] false None ir0_s let ir0_process_suffix_match_typ_markers (ir0_s : ir0 list) : ir0 list = let rec aux (acc : ir0 list) token_removed (marker : ([ `Suffix ] * [ `Exact ]) option) (ir0_s : ir0 list) = match ir0_s with | [] -> acc | x :: xs -> ( match marker with | None -> ( let default () = aux (x :: acc) false None xs in match x.data with | `String _ | `Explicit_spaces -> default () | `Match_typ_marker m -> ( match x.match_typ with | None -> ( if x.is_linked_to_prev then ( match m with | `Suffix -> aux acc true (Some (`Suffix, `Exact)) xs | _ -> default () ) else ( default () ) ) | Some _ -> default () ) ) | Some (m_first, m) -> ( let x = if x.is_linked_to_next then ( { x with is_linked_to_next = not token_removed; match_typ = Some ( if x.is_linked_to_prev then (m :> match_typ) else (m_first :> match_typ) ); } ) else ( x ) in let marker = if x.is_linked_to_prev then marker else None in aux (x :: acc) false marker xs ) ) in aux [] false None (List.rev ir0_s) let enriched_tokens_of_ir0 (ir0_s : ir0 list) : Enriched_token.t list = List.map (fun (ir0 : ir0) -> let data = match ir0.data with | `String s -> `String s | `Match_typ_marker m -> `String (string_of_match_typ_marker m) | `Explicit_spaces -> `Explicit_spaces in let is_linked_to_prev = ir0.is_linked_to_prev in let is_linked_to_next = ir0.is_linked_to_next in let automaton = match data with | `String string -> ( Eio.Mutex.use_rw cache.mutex ~protect:false (fun () -> let automaton = CCCache.with_cache cache.cache (Spelll.of_string ~limit:!Params.max_fuzzy_edit_dist) string in automaton ) ) | `Explicit_spaces -> Spelll.of_string ~limit:0 "" in Enriched_token.make data ~is_linked_to_prev ~is_linked_to_next automaton (Option.value ~default:`Fuzzy ir0.match_typ) ) ir0_s let of_annotated_tokens (annotated_tokens : annotated_token Seq.t) = let enriched_tokens = annotated_tokens |> ir0_s_of_annotated_tokens |> ir0_s_link_forward |> ir0_process_exact_prefix_match_typ_markers |> ir0_process_suffix_match_typ_markers |> enriched_tokens_of_ir0 in { annotated_tokens = List.of_seq annotated_tokens; enriched_tokens; } let of_tokens (tokens : string Seq.t) = tokens |> Seq.map (fun s -> { data = `String s; group_id = 0 }) |> of_annotated_tokens let parse phrase = phrase |> Tokenization.tokenize ~drop_spaces:false |> of_tokens let annotated_tokens t = t.annotated_tokens let enriched_tokens t = t.enriched_tokens ================================================ FILE: lib/search_phrase.mli ================================================ type match_typ = [ | `Fuzzy | `Exact | `Suffix | `Prefix ] [@@deriving show, ord] type match_typ_marker = [ `Exact | `Prefix | `Suffix ] [@@deriving show] type annotated_token = { data : [ | `String of string | `Match_typ_marker of match_typ_marker | `Explicit_spaces ]; group_id : int; } [@@deriving show] module Enriched_token : sig type data = [ `String of string | `Explicit_spaces ] [@@deriving ord] module Data_map : Map.S with type key = data type t val make : data -> is_linked_to_prev:bool -> is_linked_to_next:bool -> Spelll.automaton -> match_typ -> t val data : t -> data val equal : t -> t -> bool val pp : Format.formatter -> t -> unit val match_typ : t -> match_typ val is_linked_to_prev : t -> bool val is_linked_to_next : t -> bool val automaton : t -> Spelll.automaton val compatible_with_word : t -> string -> bool end type t val empty : t val compare : t -> t -> int val equal : t -> t -> bool val pp : Format.formatter -> t -> unit val of_annotated_tokens : annotated_token Seq.t -> t val of_tokens : string Seq.t -> t val parse : string -> t val is_empty : t -> bool val annotated_tokens : t -> annotated_token list val enriched_tokens : t -> Enriched_token.t list ================================================ FILE: lib/search_result.ml ================================================ type indexed_found_word = { found_word_pos : int; found_word_ci : string; found_word : string; } type t = { score : float; search_phrase : Search_phrase.t; found_phrase : indexed_found_word list; } let equal t1 t2 = Search_phrase.equal t1.search_phrase t2.search_phrase && List.length t1.found_phrase = List.length t2.found_phrase && List.for_all2 (fun x1 x2 -> x1.found_word_pos = x2.found_word_pos) t1.found_phrase t2.found_phrase module Score = struct module ET = Search_phrase.Enriched_token type stats = { total_search_char_count : float; total_found_char_count : float; exact_match_found_char_count : float; ci_exact_match_found_char_count : float; sub_match_search_word_in_found_word_char_count : float; sub_match_found_word_in_search_word_char_count : float; sub_match_search_char_count : float; sub_match_found_char_count : float; ci_sub_match_search_word_in_found_word_char_count : float; ci_sub_match_found_word_in_search_word_char_count : float; ci_sub_match_search_char_count : float; ci_sub_match_found_char_count : float; fuzzy_match_edit_distance : float; fuzzy_match_search_char_count : float; fuzzy_match_found_char_count : float; } let empty_stats = { total_search_char_count = 0.0; total_found_char_count = 0.0; exact_match_found_char_count = 0.0; ci_exact_match_found_char_count = 0.0; sub_match_search_word_in_found_word_char_count = 0.0; sub_match_found_word_in_search_word_char_count = 0.0; sub_match_search_char_count = 0.0; sub_match_found_char_count = 0.0; ci_sub_match_search_word_in_found_word_char_count = 0.0; ci_sub_match_found_word_in_search_word_char_count = 0.0; ci_sub_match_search_char_count = 0.0; ci_sub_match_found_char_count = 0.0; fuzzy_match_edit_distance = 0.0; fuzzy_match_search_char_count = 0.0; fuzzy_match_found_char_count = 0.0; } let score (search_phrase : Search_phrase.t) ~(found_phrase : indexed_found_word list) ~(found_phrase_opening_closing_symbol_match_count : int) : float = assert (not (Search_phrase.is_empty search_phrase)); assert (List.length (Search_phrase.enriched_tokens search_phrase) = List.length found_phrase); let found_phrase_opening_closing_symbol_match_count = Int.to_float found_phrase_opening_closing_symbol_match_count in let quite_close_to_zero x = Float.abs x < Params.float_compare_margin in let add_sub_match_search_and_found_char_count ~search_word_len ~found_word_len stats = { stats with sub_match_search_char_count = stats.sub_match_search_char_count +. search_word_len; sub_match_found_char_count = stats.sub_match_found_char_count +. found_word_len; } in let add_ci_sub_match_search_and_found_char_count ~search_word_len ~found_word_len stats = { stats with ci_sub_match_search_char_count = stats.ci_sub_match_search_char_count +. search_word_len; ci_sub_match_found_char_count = stats.ci_sub_match_found_char_count +. found_word_len; } in let stats = List.fold_left2 (fun (stats : stats) (token : ET.t) { found_word_ci; found_word; _ } -> let found_word_len = Int.to_float (String.length found_word) in match ET.data token with | `Explicit_spaces -> ( { stats with total_search_char_count = stats.total_search_char_count +. 1.0; total_found_char_count = stats.total_found_char_count +. 1.0; exact_match_found_char_count = stats.exact_match_found_char_count +. 1.0; } ) | `String search_word -> ( let search_word_len = Int.to_float (String.length search_word) in let search_word_ci = String.lowercase_ascii search_word in let stats = { stats with total_search_char_count = stats.total_search_char_count +. search_word_len; total_found_char_count = stats.total_found_char_count +. found_word_len; } in if String.equal search_word found_word then ( { stats with exact_match_found_char_count = stats.exact_match_found_char_count +. found_word_len; } ) else if String.equal search_word_ci found_word_ci then ( { stats with ci_exact_match_found_char_count = stats.ci_exact_match_found_char_count +. found_word_len; } ) else if CCString.find ~sub:search_word found_word >= 0 then ( { stats with sub_match_search_word_in_found_word_char_count = stats.sub_match_search_word_in_found_word_char_count +. search_word_len; } |> add_sub_match_search_and_found_char_count ~search_word_len ~found_word_len ) else if CCString.find ~sub:found_word search_word >= 0 then ( { stats with sub_match_found_word_in_search_word_char_count = stats.sub_match_found_word_in_search_word_char_count +. found_word_len; } |> add_sub_match_search_and_found_char_count ~search_word_len ~found_word_len ) else if CCString.find ~sub:search_word_ci found_word_ci >= 0 then ( { stats with ci_sub_match_search_word_in_found_word_char_count = stats.ci_sub_match_search_word_in_found_word_char_count +. search_word_len; } |> add_ci_sub_match_search_and_found_char_count ~search_word_len ~found_word_len ) else if CCString.find ~sub:found_word_ci search_word_ci >= 0 then ( { stats with ci_sub_match_found_word_in_search_word_char_count = stats.ci_sub_match_found_word_in_search_word_char_count +. found_word_len; } |> add_ci_sub_match_search_and_found_char_count ~search_word_len ~found_word_len ) else ( { stats with fuzzy_match_edit_distance = stats.fuzzy_match_edit_distance +. Int.to_float (Spelll.edit_distance search_word_ci found_word_ci); fuzzy_match_search_char_count = stats.fuzzy_match_search_char_count +. search_word_len; fuzzy_match_found_char_count = stats.fuzzy_match_found_char_count +. found_word_len; } ) ) ) empty_stats (Search_phrase.enriched_tokens search_phrase) found_phrase in let search_phrase_length = Search_phrase.enriched_tokens search_phrase |> List.length |> Int.to_float in let unique_match_count = found_phrase |> List.map (fun x -> x.found_word_pos) |> List.sort_uniq Int.compare |> List.length |> Int.to_float in let (total_distance, out_of_order_match_count, _) = List.fold_left (fun (total_dist, out_of_order_match_count, last_pos) { found_word_pos = pos; _ } -> match last_pos with | None -> (total_dist, out_of_order_match_count, Some pos) | Some last_pos -> let total_dist = total_dist +. Int.to_float (abs (pos - last_pos)) in let out_of_order_match_count = if last_pos <= pos then out_of_order_match_count else out_of_order_match_count +. 1.0 in (total_dist, out_of_order_match_count, Some pos) ) (0.0, 0.0, None) found_phrase in let average_distance = let gaps = unique_match_count -. 1.0 in assert (gaps >= 0.0); if quite_close_to_zero gaps then ( 0.0 ) else ( total_distance /. gaps ) in let exact_match_score = if quite_close_to_zero stats.exact_match_found_char_count then ( 0.0 ) else ( 1.0 ) in let ci_exact_match_score = if quite_close_to_zero stats.ci_exact_match_found_char_count then ( 0.0 ) else ( 1.0 ) in let sub_match_score_s_in_f = if quite_close_to_zero stats.sub_match_found_char_count then ( 0.0 ) else ( stats.sub_match_search_word_in_found_word_char_count /. stats.sub_match_found_char_count ) in let sub_match_score_f_in_s = if quite_close_to_zero stats.sub_match_search_char_count then ( 0.0 ) else ( stats.sub_match_found_word_in_search_word_char_count /. stats.sub_match_search_char_count ) in let ci_sub_match_score_s_in_f = if quite_close_to_zero stats.ci_sub_match_found_char_count then ( 0.0 ) else ( stats.ci_sub_match_search_word_in_found_word_char_count /. stats.ci_sub_match_found_char_count ) in let ci_sub_match_score_f_in_s = if quite_close_to_zero stats.ci_sub_match_search_char_count then ( 0.0 ) else ( stats.ci_sub_match_found_word_in_search_word_char_count /. stats.ci_sub_match_search_char_count ) in let fuzzy_match_score = if quite_close_to_zero stats.fuzzy_match_search_char_count then ( 0.0 ) else ( 1.0 -. (stats.fuzzy_match_edit_distance /. stats.fuzzy_match_search_char_count) ) in let total_char_count = stats.total_search_char_count +. stats.total_found_char_count in let case_sensitive_bonus_multiplier = if List.exists (fun token -> match ET.data token with | `Explicit_spaces -> false | `String search_word -> ( not (String.equal (String.lowercase_ascii search_word) search_word)) ) (Search_phrase.enriched_tokens search_phrase) then ( 1.10 ) else ( 1.00 ) in let exact_match_weight = case_sensitive_bonus_multiplier *. (stats.exact_match_found_char_count *. 2.0) /. total_char_count in let ci_exact_match_weight = (stats.ci_exact_match_found_char_count *. 2.0) /. total_char_count in let f_in_s_penalty_multiplier = 0.8 in let sub_match_weight_s_in_f = case_sensitive_bonus_multiplier *. stats.sub_match_found_char_count /. stats.total_found_char_count in let sub_match_weight_f_in_s = case_sensitive_bonus_multiplier *. f_in_s_penalty_multiplier *. (stats.sub_match_search_char_count /. stats.total_search_char_count) in let ci_sub_match_weight_s_in_f = (stats.ci_sub_match_found_char_count /. stats.total_found_char_count) in let ci_sub_match_weight_f_in_s = f_in_s_penalty_multiplier *. (stats.ci_sub_match_search_char_count /. stats.total_search_char_count) in let fuzzy_match_weight = (stats.fuzzy_match_found_char_count *. 2.0) /. total_char_count in let distance_score = if average_distance <= 1.0 then ( 1.0 ) else ( 1.0 -. (0.20 *. (average_distance /. (Int.to_float !Params.max_token_search_dist))) ) in (1.0 +. (0.10 *. search_phrase_length)) *. (1.0 +. (0.10 *. (unique_match_count /. search_phrase_length))) *. (1.0 -. (0.10 *. (out_of_order_match_count /. search_phrase_length))) *. (1.00 +. (0.10 *. found_phrase_opening_closing_symbol_match_count)) *. distance_score *. ( (exact_match_weight *. exact_match_score) +. (ci_exact_match_weight *. ci_exact_match_score) +. (sub_match_weight_s_in_f *. sub_match_score_s_in_f) +. (sub_match_weight_f_in_s *. sub_match_score_f_in_s) +. (ci_sub_match_weight_s_in_f *. ci_sub_match_score_s_in_f) +. (ci_sub_match_weight_f_in_s *. ci_sub_match_score_f_in_s) +. (fuzzy_match_weight *. fuzzy_match_score) ) end let make search_phrase ~found_phrase ~found_phrase_opening_closing_symbol_match_count = let len = List.length (Search_phrase.enriched_tokens search_phrase) in if len = 0 then ( invalid_arg "Search_result.make search_phrase is empty" ) else if len <> List.length found_phrase then ( invalid_arg "length of found_phrase does not match length of search_phrase" ) else ( let score = Score.score search_phrase ~found_phrase ~found_phrase_opening_closing_symbol_match_count in { score; search_phrase; found_phrase } ) let search_phrase (t : t) = t.search_phrase let found_phrase (t : t) = t.found_phrase let score (t : t) = t.score let compare_relevance (t1 : t) (t2 : t) = (* Order the more relevant result to the front. *) if Float.abs (t1.score -. t2.score) < Params.float_compare_margin then ( (* If scores are within the comparison margin, then order result that appears earlier to the front. *) let t1_found_phrase_start_pos = (List.hd t1.found_phrase).found_word_pos in let t2_found_phrase_start_pos = (List.hd t2.found_phrase).found_word_pos in Int.compare t1_found_phrase_start_pos t2_found_phrase_start_pos ) else ( (* Otherwise just order result with higher score to the front. *) Float.compare t2.score t1.score ) ================================================ FILE: lib/search_result.mli ================================================ type indexed_found_word = { found_word_pos : int; found_word_ci : string; found_word : string; } type t val make : Search_phrase.t -> found_phrase:indexed_found_word list -> found_phrase_opening_closing_symbol_match_count:int -> t val search_phrase : t -> Search_phrase.t val found_phrase : t -> indexed_found_word list val score : t -> float val equal : t -> t -> bool val compare_relevance : t -> t -> int ================================================ FILE: lib/search_result_heap.ml ================================================ include CCHeap.Make (struct type t = Search_result.t let leq x y = (Search_result.score x) <= (Search_result.score y) end) ================================================ FILE: lib/sqlite3_utils.ml ================================================ include Sqlite3 let db_pool = Eio.Pool.create (* This is not ideal since validate is not called until next use of the pool, so an idle DB connection could be held for a lot longer than described here. But this seems to be the best we can do. *) ~validate:(fun (last_used, _db) -> Unix.time () -. !last_used <= 30.0 ) ~dispose:(fun (_last_used, db) -> (* Best-effort closing. *) try let try_count = ref 0 in while !try_count < 10 && not (db_close db) do Unix.sleepf 0.01; incr try_count; done with | _ -> () ) Task_pool.size (fun () -> (ref (Unix.time ()), db_open ~mutex:`FULL (CCOption.get_exn_or "Docfd_lib.Params.db_path uninitialized" !Params.db_path) ) ) let with_db : type a. ?db:db -> (db -> a) -> a = fun ?db f -> match db with | None -> ( Eio.Pool.use db_pool ~never_block:true (fun (last_used, db) -> last_used := Unix.time (); f db ) ) | Some db -> ( f db ) let retry_if_busy (f : unit -> Sqlite3.Rc.t) = let rec aux tries_left = let r = f () in if tries_left > 0 then ( match r with | BUSY -> ( Unix.sleepf 0.1; aux (tries_left - 1) ) | _ -> r ) else ( r ) in aux 50 let exec db s = retry_if_busy (fun () -> Sqlite3.exec db s) |> Sqlite3.Rc.check let prepare db s = Sqlite3.prepare db s let bind_names stmt l = retry_if_busy (fun () -> Sqlite3.bind_names stmt l) |> Sqlite3.Rc.check let reset stmt = retry_if_busy (fun () -> Sqlite3.reset stmt) |> Sqlite3.Rc.check let step stmt = match retry_if_busy (fun () -> Sqlite3.step stmt) with | OK | DONE | ROW -> () | x -> Sqlite3.Rc.check x let finalize stmt = retry_if_busy (fun () -> Sqlite3.finalize stmt) |> Sqlite3.Rc.check let with_stmt : type a. ?db:db -> string -> ?names:((string * Sqlite3.Data.t) list) -> (Sqlite3.stmt -> a) -> a = fun ?db s ?names f -> with_db ?db (fun db -> let stmt = prepare db s in Option.iter (fun names -> bind_names stmt names) names; let res = f stmt in finalize stmt; res ) let step_stmt : type a. ?db:db -> string -> ?names:((string * Data.t) list) -> (stmt -> a) -> a = fun ?db s ?names f -> with_stmt ?db s ?names (fun stmt -> step stmt; f stmt ) let iter_stmt ?db s ?names (f : Data.t array -> unit) = with_stmt ?db s ?names (fun stmt -> Rc.check (Sqlite3.iter stmt ~f) ) let fold_stmt : type a. ?db:db -> string -> ?names:((string * Data.t) list) -> (a -> Sqlite3.Data.t array -> a) -> a -> a = fun ?db s ?names f init -> with_stmt ?db s ?names (fun stmt -> let rc, res = Sqlite3.fold stmt ~f ~init in Sqlite3.Rc.check rc; res ) ================================================ FILE: lib/stop_signal.ml ================================================ type t = { mutable stop : bool; cond : Eio.Condition.t; mutex : Eio.Mutex.t; } let make () = { stop = false; cond = Eio.Condition.create (); mutex = Eio.Mutex.create (); } let await (t : t) = Eio.Mutex.use_ro t.mutex (fun () -> while not t.stop do Eio.Condition.await t.cond t.mutex done ) let broadcast (t : t) = Eio.Mutex.use_rw ~protect:false t.mutex (fun () -> t.stop <- true); Eio.Condition.broadcast t.cond ================================================ FILE: lib/stop_signal.mli ================================================ type t val make : unit -> t val await : t -> unit val broadcast : t -> unit ================================================ FILE: lib/string_map.ml ================================================ include CCMap.Make (String) ================================================ FILE: lib/string_set.ml ================================================ include CCSet.Make (String) ================================================ FILE: lib/task_pool.ml ================================================ type t = Eio.Executor_pool.t let size = max 1 (Domain.recommended_domain_count () - 1) let make ~sw mgr = Eio.Executor_pool.create ~sw ~domain_count:size mgr let run (t : t) (f : unit -> 'a) : 'a = Eio.Executor_pool.submit_exn t ~weight:1.0 f let map_list : 'a 'b . t -> ('a -> 'b) -> 'a list -> 'b list = fun t f l -> Eio.Fiber.List.map ~max_fibers:size (fun x -> Eio.Fiber.yield (); run t (fun () -> f x)) l let filter_list : 'a 'b . t -> ('a -> bool) -> 'a list -> 'a list = fun t f l -> Eio.Fiber.List.filter ~max_fibers:size (fun x -> Eio.Fiber.yield (); run t (fun () -> f x)) l let filter_map_list : 'a 'b . t -> ('a -> 'b option) -> 'a list -> 'b list = fun t f l -> Eio.Fiber.List.filter_map ~max_fibers:size (fun x -> Eio.Fiber.yield (); run t (fun () -> f x)) l ================================================ FILE: lib/task_pool.mli ================================================ type t val size : int val make : sw:Eio.Switch.t -> _ Eio.Domain_manager.t -> t val run : t -> (unit -> 'a) -> 'a val map_list : t -> ('a -> 'b) -> 'a list -> 'b list val filter_list : t -> ('a -> bool) -> 'a list -> 'a list val filter_map_list : t -> ('a -> 'b option) -> 'a list -> 'b list ================================================ FILE: lib/tokenization.ml ================================================ let chunk_tokens (s : (int * string) Seq.t) : (int * string) Seq.t = let rec aux offset s = match s () with | Seq.Nil -> Seq.empty | Seq.Cons ((pos, word), rest) -> ( let word_len = String.length word in if word_len <= Params.max_token_size then ( fun () -> Seq.Cons ((pos + offset, word), aux offset rest) ) else ( let up_to_limit = String.sub word 0 Params.max_token_size in let rest_of_token = String.sub word Params.max_token_size (word_len - Params.max_token_size) in fun () -> Seq.Cons ((pos + offset, up_to_limit), (aux (offset + 1) (Seq.cons (pos, rest_of_token) rest))) ) ) in aux 0 s type token = | Space of string | Text of string let split_utf8_seg (s : string) : string list = let open Angstrom in let open Parser_components in let token_p = choice [ take_while1 is_alphanum; utf_8_char; ] in let tokens_p = many token_p in match Angstrom.(parse_string ~consume:Consume.All) tokens_p s with | Ok l -> l | Error _ -> [] let tokenize_with_pos ~drop_spaces (s : string) : (int * string) Seq.t = let segmenter = Uuseg.create `Word in let s = Misc_utils.sanitize_string s in let s_len = String.length s in let acc : token Dynarray.t = Dynarray.create () in let buf : Uchar.t Dynarray.t = Dynarray.create () in let sbuf = Buffer.create 256 in let flush_to_acc () = if Dynarray.length buf > 0 then ( Dynarray.iter (Buffer.add_utf_8_uchar sbuf) buf; if Uucp.White.is_white_space (Dynarray.get buf 0) then ( Dynarray.add_last acc (Space (Buffer.contents sbuf)) ) else ( Buffer.contents sbuf |> split_utf8_seg |> List.iter (fun s -> Dynarray.add_last acc (Text s) ) ); Dynarray.clear buf; Buffer.clear sbuf ) in let rec add v = match Uuseg.add segmenter v with | `Uchar uc -> ( Dynarray.add_last buf uc; add `Await ) | `Boundary -> ( flush_to_acc (); add `Await ) | `Await | `End -> () in let rec aux pos = if pos >= s_len then ( add `End; flush_to_acc () ) else ( let decode = String.get_utf_8_uchar s pos in if Uchar.utf_decode_is_valid decode then ( let uchar = Uchar.utf_decode_uchar decode in add (`Uchar uchar); aux (pos + Uchar.utf_decode_length decode) ) else ( aux (pos + 1) ) ) in aux 0; Dynarray.to_seq acc |> Seq.mapi (fun i x -> (i, x)) |> Seq.filter_map (fun ((i, token) : int * token) -> match token with | Text s -> Some (i, s) | Space s -> if drop_spaces then None else Some (i, s) ) |> chunk_tokens let tokenize ~drop_spaces s = tokenize_with_pos ~drop_spaces s |> Seq.map snd ================================================ FILE: lib/word_db.ml ================================================ type t = { lock : Eio.Mutex.t; mutable size : int; mutable size_written_to_db : int; mutable word_of_id : string Int_map.t; id_of_word : (string, int) Hashtbl.t; } let t : t = { lock = Eio.Mutex.create (); size = 0; size_written_to_db = 0; word_of_id = Int_map.empty; id_of_word = Hashtbl.create 100_000; } let lock : type a. (unit -> a) -> a = fun f -> Eio.Mutex.use_rw ~protect:true t.lock f let filter pool (f : string -> bool) : (int * string) Dynarray.t = let word_of_id = lock (fun () -> t.word_of_id ) in let max_end_exc_seen = ref 0 in let chunk_size = !Params.index_chunk_size * 10 in let chunk_start_end_exc_ranges = OSeq.(0 -- (t.size - 1) / chunk_size) |> Seq.map (fun chunk_index -> let start = chunk_index * chunk_size in let end_exc = min ((chunk_index + 1) * chunk_size) t.size in max_end_exc_seen := max !max_end_exc_seen end_exc; (start, end_exc) ) |> List.of_seq in assert (!max_end_exc_seen = t.size); let batches = chunk_start_end_exc_ranges |> Task_pool.map_list pool (fun (start, end_exc) -> let acc = Dynarray.create () in for i=start to end_exc-1 do let word = Int_map.find i word_of_id in if f word then ( Dynarray.add_last acc (i, word) ) done; acc ) in let acc = Dynarray.create () in List.iter (fun batch -> Dynarray.append acc batch ) batches; acc let add (word : string) : int = lock (fun () -> match Hashtbl.find_opt t.id_of_word word with | Some id -> id | None -> ( let id = t.size in t.size <- t.size + 1; t.word_of_id <- Int_map.add id word t.word_of_id; Hashtbl.replace t.id_of_word word id; id ) ) let word_of_id i : string = lock (fun () -> Int_map.find i t.word_of_id ) let id_of_word s : int option = lock (fun () -> Hashtbl.find_opt t.id_of_word s ) let read_from_db () : unit = let open Sqlite3_utils in lock (fun () -> with_db (fun db -> t.word_of_id <- Int_map.empty; Hashtbl.clear t.id_of_word; iter_stmt ~db {| SELECT id, word FROM word ORDER by id |} ~names:[] (fun data -> let id = Data.to_int_exn data.(0) in let word = Data.to_string_exn data.(1) in t.word_of_id <- Int_map.add id word t.word_of_id; Hashtbl.replace t.id_of_word word id; ) ); t.size <- Int_map.cardinal t.word_of_id; t.size_written_to_db <- t.size; ) let write_to_db db ~already_in_transaction : unit = let open Sqlite3_utils in lock (fun () -> if not already_in_transaction then ( step_stmt ~db "BEGIN IMMEDIATE" ignore; ); let word_table_size = step_stmt ~db {| SELECT COUNT(1) FROM word |} (fun stmt -> Int64.to_int (column_int64 stmt 0) ) in if word_table_size <> t.size_written_to_db then ( Misc_utils.exit_with_error_msg "unexpected change in word table, likely due to indexing from another Docfd instance"; ); with_stmt ~db {| INSERT INTO word (id, word) VALUES (@id, @word) ON CONFLICT(id) DO NOTHING |} (fun stmt -> for id = t.size_written_to_db to t.size-1 do let word = Int_map.find id t.word_of_id in bind_names stmt [ ("@id", INT (Int64.of_int id)) ; ("@word", TEXT word) ]; step stmt; reset stmt; done ); if not already_in_transaction then ( step_stmt ~db "COMMIT" ignore; ); t.size_written_to_db <- t.size; ) ================================================ FILE: lib/word_db.mli ================================================ type t val add : string -> int val filter : Task_pool.t -> (string -> bool) -> (int * string) Dynarray.t val word_of_id : int -> string val id_of_word : string -> int option val read_from_db : unit -> unit val write_to_db : Sqlite3.db -> already_in_transaction:bool -> unit ================================================ FILE: line-wrapping-tests.t/dune ================================================ (cram (deps ../bin/docfd.exe)) ================================================ FILE: line-wrapping-tests.t/long-words.txt ================================================ 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 abcdefghijklmnopqrstuvwxyz 01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 abcdefghijklmnopqrstuvwxyz 01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz ================================================ FILE: line-wrapping-tests.t/run.t ================================================ Word breaking: $ docfd --cache-dir .cache --search-result-print-snippet-min-size 0 long-words.txt --sample "01 ab" --search-result-print-text-width 80 $TESTCASE_ROOT/long-words.txt 1: 01234567890123456789012345678901234567890123456789012345678901234567890123456 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 78901234567890123456789 ^^^^^^^^^^^^^^^^^^^^^^^ 2: 3: abcdefghijklmnopqrstuvwxyz ^^^^^^^^^^^^^^^^^^^^^^^^^^ 16: 0123456789012345678901234567890123456789012345678901234567890123456789012345 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 678901234567890123456789 ^^^^^^^^^^^^^^^^^^^^^^^^ 17: 18: abcdefghijklmnopqrstuvwxyz ^^^^^^^^^^^^^^^^^^^^^^^^^^ 1: 01234567890123456789012345678901234567890123456789012345678901234567890123456 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 78901234567890123456789 ^^^^^^^^^^^^^^^^^^^^^^^ 2: 3: abcdefghijklmnopqrstuvwxyz 4: 01234567890123456789012345678901234567890123456789012345678901234567890123456 78901234567890123456789012345678901234567890123456789012345678901234567890123 4567890123456789012345678901234567890123456789 5: abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz 6: 7: 8: 9: 10: 11: 12: 13: 14: 15: 16: 0123456789012345678901234567890123456789012345678901234567890123456789012345 678901234567890123456789 17: 18: abcdefghijklmnopqrstuvwxyz ^^^^^^^^^^^^^^^^^^^^^^^^^^ 3: abcdefghijklmnopqrstuvwxyz ^^^^^^^^^^^^^^^^^^^^^^^^^^ 4: 01234567890123456789012345678901234567890123456789012345678901234567890123456 78901234567890123456789012345678901234567890123456789012345678901234567890123 4567890123456789012345678901234567890123456789 5: abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz 6: 7: 8: 9: 10: 11: 12: 13: 14: 15: 16: 0123456789012345678901234567890123456789012345678901234567890123456789012345 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 678901234567890123456789 ^^^^^^^^^^^^^^^^^^^^^^^^ 1: 01234567890123456789012345678901234567890123456789012345678901234567890123456 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 78901234567890123456789 ^^^^^^^^^^^^^^^^^^^^^^^ 2: 3: abcdefghijklmnopqrstuvwxyz 4: 01234567890123456789012345678901234567890123456789012345678901234567890123456 78901234567890123456789012345678901234567890123456789012345678901234567890123 4567890123456789012345678901234567890123456789 5: abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ $ docfd --cache-dir .cache --search-result-print-snippet-min-size 0 long-words.txt --sample "01 ab" --search-result-print-text-width 20 $TESTCASE_ROOT/long-words.txt 1: 01234567890123456 ^^^^^^^^^^^^^^^^^ 78901234567890123 ^^^^^^^^^^^^^^^^^ 45678901234567890 ^^^^^^^^^^^^^^^^^ 12345678901234567 ^^^^^^^^^^^^^^^^^ 89012345678901234 ^^^^^^^^^^^^^^^^^ 567890123456789 ^^^^^^^^^^^^^^^ 2: 3: abcdefghijklmnopq ^^^^^^^^^^^^^^^^^ rstuvwxyz ^^^^^^^^^ 16: 0123456789012345 ^^^^^^^^^^^^^^^^ 6789012345678901 ^^^^^^^^^^^^^^^^ 2345678901234567 ^^^^^^^^^^^^^^^^ 8901234567890123 ^^^^^^^^^^^^^^^^ 4567890123456789 ^^^^^^^^^^^^^^^^ 0123456789012345 ^^^^^^^^^^^^^^^^ 6789 ^^^^ 17: 18: abcdefghijklmnop ^^^^^^^^^^^^^^^^ qrstuvwxyz ^^^^^^^^^^ 1: 01234567890123456 ^^^^^^^^^^^^^^^^^ 78901234567890123 ^^^^^^^^^^^^^^^^^ 45678901234567890 ^^^^^^^^^^^^^^^^^ 12345678901234567 ^^^^^^^^^^^^^^^^^ 89012345678901234 ^^^^^^^^^^^^^^^^^ 567890123456789 ^^^^^^^^^^^^^^^ 2: 3: abcdefghijklmnopq rstuvwxyz 4: 01234567890123456 78901234567890123 45678901234567890 12345678901234567 89012345678901234 56789012345678901 23456789012345678 90123456789012345 67890123456789012 34567890123456789 01234567890123456 7890123456789 5: abcdefghijklmnopq rstuvwxyzabcdefgh ijklmnopqrstuvwxy z 6: 7: 8: 9: 10: 11: 12: 13: 14: 15: 16: 0123456789012345 6789012345678901 2345678901234567 8901234567890123 4567890123456789 0123456789012345 6789 17: 18: abcdefghijklmnop ^^^^^^^^^^^^^^^^ qrstuvwxyz ^^^^^^^^^^ 3: abcdefghijklmnopq ^^^^^^^^^^^^^^^^^ rstuvwxyz ^^^^^^^^^ 4: 01234567890123456 78901234567890123 45678901234567890 12345678901234567 89012345678901234 56789012345678901 23456789012345678 90123456789012345 67890123456789012 34567890123456789 01234567890123456 7890123456789 5: abcdefghijklmnopq rstuvwxyzabcdefgh ijklmnopqrstuvwxy z 6: 7: 8: 9: 10: 11: 12: 13: 14: 15: 16: 0123456789012345 ^^^^^^^^^^^^^^^^ 6789012345678901 ^^^^^^^^^^^^^^^^ 2345678901234567 ^^^^^^^^^^^^^^^^ 8901234567890123 ^^^^^^^^^^^^^^^^ 4567890123456789 ^^^^^^^^^^^^^^^^ 0123456789012345 ^^^^^^^^^^^^^^^^ 6789 ^^^^ 1: 01234567890123456 ^^^^^^^^^^^^^^^^^ 78901234567890123 ^^^^^^^^^^^^^^^^^ 45678901234567890 ^^^^^^^^^^^^^^^^^ 12345678901234567 ^^^^^^^^^^^^^^^^^ 89012345678901234 ^^^^^^^^^^^^^^^^^ 567890123456789 ^^^^^^^^^^^^^^^ 2: 3: abcdefghijklmnopq rstuvwxyz 4: 01234567890123456 78901234567890123 45678901234567890 12345678901234567 89012345678901234 56789012345678901 23456789012345678 90123456789012345 67890123456789012 34567890123456789 01234567890123456 7890123456789 5: abcdefghijklmnopq ^^^^^^^^^^^^^^^^^ rstuvwxyzabcdefgh ^^^^^^^^^^^^^^^^^ ijklmnopqrstuvwxy ^^^^^^^^^^^^^^^^^ z ^ $ docfd --cache-dir .cache --search-result-print-snippet-min-size 0 words.txt --sample "01 ab" --search-result-print-text-width 5 $TESTCASE_ROOT/words.txt 1: 01 ^^ 23 ^^ 45 ^^ 2: 3: ab ^^ cd ^^ ef ^^ g ^ 15: 0 ^ 1 ^ 2 ^ 3 ^ 4 ^ 5 ^ 16: 17: a ^ b ^ c ^ d ^ e ^ f ^ g ^ 1: 01 ^^ 23 ^^ 45 ^^ 2: 3: ab cd ef g 4: 5: 6: 7: 8: 9: 10: 11: 12: 13: 14: 15: 0 1 2 3 4 5 16: 17: a ^ b ^ c ^ d ^ e ^ f ^ g ^ 3: ab ^^ cd ^^ ef ^^ g ^ 4: 5: 6: 7: 8: 9: 10: 11: 12: 13: 14: 15: 0 ^ 1 ^ 2 ^ 3 ^ 4 ^ 5 ^ $ docfd --cache-dir .cache --search-result-print-snippet-min-size 0 words.txt --sample "01 ab" --search-result-print-text-width 1 $TESTCASE_ROOT/words.txt 1: 0 ^ 1 ^ 2 ^ 3 ^ 4 ^ 5 ^ 2: 3: a ^ b ^ c ^ d ^ e ^ f ^ g ^ 15: 0 ^ 1 ^ 2 ^ 3 ^ 4 ^ 5 ^ 16: 17: a ^ b ^ c ^ d ^ e ^ f ^ g ^ 1: 0 ^ 1 ^ 2 ^ 3 ^ 4 ^ 5 ^ 2: 3: a b c d e f g 4: 5: 6: 7: 8: 9: 10: 11: 12: 13: 14: 15: 0 1 2 3 4 5 16: 17: a ^ b ^ c ^ d ^ e ^ f ^ g ^ 3: a ^ b ^ c ^ d ^ e ^ f ^ g ^ 4: 5: 6: 7: 8: 9: 10: 11: 12: 13: 14: 15: 0 ^ 1 ^ 2 ^ 3 ^ 4 ^ 5 ^ Line wrapping and word breaking: $ docfd --cache-dir .cache --search-result-print-snippet-min-size 0 sentences.txt --sample "lorem" --search-result-print-text-width 80 $TESTCASE_ROOT/sentences.txt 1: Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod ^^^^^ tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. $ docfd --cache-dir .cache --search-result-print-snippet-min-size 0 sentences.txt --sample "lorem" --search-result-print-text-width 20 $TESTCASE_ROOT/sentences.txt 1: Lorem ipsum ^^^^^ dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. $ docfd --cache-dir .cache --search-result-print-snippet-min-size 0 sentences.txt --sample "lorem" --search-result-print-text-width 10 $TESTCASE_ROOT/sentences.txt 1: Lorem ^^^^^ ipsum dolor sit amet, consect etur adipisc ing elit, sed do eiusmod tempor incidid unt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercit ation ullamco laboris nisi ut aliquip ex ea commodo consequ at . Duis aute irure dolor in reprehe nderit in volupta te velit esse cillum dolore eu fugiat nulla pariatu r . Excepte ur sint occaeca t cupidat at non proiden t , sunt in culpa qui officia deserun t mollit anim id est laborum . $ docfd --cache-dir .cache --search-result-print-snippet-min-size 0 sentences.txt --sample "laborum 0" --search-result-print-text-width 80 $TESTCASE_ROOT/sentences.txt 1: Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. ^^^^^^^ 2: 0 12 abcd efghi jkl mnopqrst uvwx yz 0123456 789012345 ^ 1: Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. ^^^^^^^ 2: 0 12 abcd efghi jkl mnopqrst uvwx yz 0123456 789012345 ^^^^^^^ 1: Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. ^^^^^^^ 2: 0 12 abcd efghi jkl mnopqrst uvwx yz 0123456 789012345 ^^^^^^^^^ $ docfd --cache-dir .cache --search-result-print-snippet-min-size 0 sentences.txt --sample "laborum 0" --search-result-print-text-width 20 $TESTCASE_ROOT/sentences.txt 1: Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. ^^^^^^^ 2: 0 12 abcd efghi ^ jkl mnopqrst uvwx yz 0123456 789012345 1: Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. ^^^^^^^ 2: 0 12 abcd efghi jkl mnopqrst uvwx yz 0123456 ^^^^^^^ 789012345 1: Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. ^^^^^^^ 2: 0 12 abcd efghi jkl mnopqrst uvwx yz 0123456 789012345 ^^^^^^^^^ $ docfd --cache-dir .cache --search-result-print-snippet-min-size 0 sentences.txt --sample "laborum 0" --search-result-print-text-width 10 $TESTCASE_ROOT/sentences.txt 1: Lorem ipsum dolor sit amet, consect etur adipisc ing elit, sed do eiusmod tempor incidid unt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercit ation ullamco laboris nisi ut aliquip ex ea commodo consequ at . Duis aute irure dolor in reprehe nderit in volupta te velit esse cillum dolore eu fugiat nulla pariatu r . Excepte ur sint occaeca t cupidat at non proiden t , sunt in culpa qui officia deserun t mollit anim id est laborum ^^^^^^^ . 2: 0 12 ^ abcd efghi jkl mnopqrs t uvwx yz 0123456 7890123 45 1: Lorem ipsum dolor sit amet, consect etur adipisc ing elit, sed do eiusmod tempor incidid unt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercit ation ullamco laboris nisi ut aliquip ex ea commodo consequ at . Duis aute irure dolor in reprehe nderit in volupta te velit esse cillum dolore eu fugiat nulla pariatu r . Excepte ur sint occaeca t cupidat at non proiden t , sunt in culpa qui officia deserun t mollit anim id est laborum ^^^^^^^ . 2: 0 12 abcd efghi jkl mnopqrs t uvwx yz 0123456 ^^^^^^^ 7890123 45 1: Lorem ipsum dolor sit amet, consect etur adipisc ing elit, sed do eiusmod tempor incidid unt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercit ation ullamco laboris nisi ut aliquip ex ea commodo consequ at . Duis aute irure dolor in reprehe nderit in volupta te velit esse cillum dolore eu fugiat nulla pariatu r . Excepte ur sint occaeca t cupidat at non proiden t , sunt in culpa qui officia deserun t mollit anim id est laborum ^^^^^^^ . 2: 0 12 abcd efghi jkl mnopqrs t uvwx yz 0123456 7890123 ^^^^^^^ 45 ^^ ================================================ FILE: line-wrapping-tests.t/sentences.txt ================================================ Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. 0 12 abcd efghi jkl mnopqrst uvwx yz 0123456 789012345 ================================================ FILE: line-wrapping-tests.t/words.txt ================================================ 012345 abcdefg 012345 abcdefg ================================================ FILE: match-type-tests.t/dune ================================================ (cram (deps ../bin/docfd.exe)) ================================================ FILE: match-type-tests.t/run.t ================================================ Exact match: $ docfd --cache-dir .cache test.txt --sample "'abc" [1] $ docfd --cache-dir .cache test.txt --sample "'abcd" $TESTCASE_ROOT/test.txt 1: abcd ^^^^ 2: abcdef 3: ABCD 1: abcd 2: abcdef 3: ABCD ^^^^ 4: ABCDEF 5: ABcd 3: ABCD 4: ABCDEF 5: ABcd ^^^^ 6: ABcdEF 7: 6: ABcdEF 7: 8: 'abcd ^^^^ 9: 'abcd' 10: ^efgh 7: 8: 'abcd 9: 'abcd' ^^^^ 10: ^efgh 11: ^^efgh $ docfd --cache-dir .cache test.txt --sample "\\'abcd" $TESTCASE_ROOT/test.txt 6: ABcdEF 7: 8: 'abcd ^^^^^ 9: 'abcd' 10: ^efgh 7: 8: 'abcd 9: 'abcd' ^^^^^ 10: ^efgh 11: ^^efgh 6: ABcdEF 7: 8: 'abcd ^^^^ 9: 'abcd' ^ 10: ^efgh 11: ^^efgh 7: 8: 'abcd 9: 'abcd' ^^^^^ 10: ^efgh 11: ^^efgh 4: ABCDEF 5: ABcd 6: ABcdEF ^^^^^^ 7: 8: 'abcd ^ 9: 'abcd' 10: ^efgh $ docfd --cache-dir .cache test.txt --sample "'abcdef" $TESTCASE_ROOT/test.txt 1: abcd 2: abcdef ^^^^^^ 3: ABCD 4: ABCDEF 2: abcdef 3: ABCD 4: ABCDEF ^^^^^^ 5: ABcd 6: ABcdEF 4: ABCDEF 5: ABcd 6: ABcdEF ^^^^^^ 7: 8: 'abcd $ docfd --cache-dir .cache test.txt --sample "''abcd" $TESTCASE_ROOT/test.txt 6: ABcdEF 7: 8: 'abcd ^^^^^ 9: 'abcd' 10: ^efgh 7: 8: 'abcd 9: 'abcd' ^^^^^ 10: ^efgh 11: ^^efgh Exact match smart case sensitivity: $ docfd --cache-dir .cache test.txt --sample "'ABCD" $TESTCASE_ROOT/test.txt 1: abcd 2: abcdef 3: ABCD ^^^^ 4: ABCDEF 5: ABcd $ docfd --cache-dir .cache test.txt --sample "'ABcd" $TESTCASE_ROOT/test.txt 3: ABCD 4: ABCDEF 5: ABcd ^^^^ 6: ABcdEF 7: Prefix match: $ docfd --cache-dir .cache test.txt --sample "^bcd" [1] $ docfd --cache-dir .cache test.txt --sample "^abcd" $TESTCASE_ROOT/test.txt 1: abcd ^^^^ 2: abcdef 3: ABCD 1: abcd 2: abcdef 3: ABCD ^^^^ 4: ABCDEF 5: ABcd 3: ABCD 4: ABCDEF 5: ABcd ^^^^ 6: ABcdEF 7: 6: ABcdEF 7: 8: 'abcd ^^^^ 9: 'abcd' 10: ^efgh 7: 8: 'abcd 9: 'abcd' ^^^^ 10: ^efgh 11: ^^efgh $ docfd --cache-dir .cache test.txt --sample "^abcdef" $TESTCASE_ROOT/test.txt 1: abcd 2: abcdef ^^^^^^ 3: ABCD 4: ABCDEF 2: abcdef 3: ABCD 4: ABCDEF ^^^^^^ 5: ABcd 6: ABcdEF 4: ABCDEF 5: ABcd 6: ABcdEF ^^^^^^ 7: 8: 'abcd $ docfd --cache-dir .cache test.txt --sample "^'abcd" $TESTCASE_ROOT/test.txt 6: ABcdEF 7: 8: 'abcd ^^^^^ 9: 'abcd' 10: ^efgh 7: 8: 'abcd 9: 'abcd' ^^^^^ 10: ^efgh 11: ^^efgh Prefix match smart case sensitivity: $ docfd --cache-dir .cache test.txt --sample "^ABCD" $TESTCASE_ROOT/test.txt 1: abcd 2: abcdef 3: ABCD ^^^^ 4: ABCDEF 5: ABcd 2: abcdef 3: ABCD 4: ABCDEF ^^^^^^ 5: ABcd 6: ABcdEF $ docfd --cache-dir .cache test.txt --sample "^ABcd" $TESTCASE_ROOT/test.txt 3: ABCD 4: ABCDEF 5: ABcd ^^^^ 6: ABcdEF 7: 4: ABCDEF 5: ABcd 6: ABcdEF ^^^^^^ 7: 8: 'abcd Suffix match: $ docfd --cache-dir .cache test.txt --sample 'bcd$' $TESTCASE_ROOT/test.txt 1: abcd ^^^^ 2: abcdef 3: ABCD 1: abcd 2: abcdef 3: ABCD ^^^^ 4: ABCDEF 5: ABcd 3: ABCD 4: ABCDEF 5: ABcd ^^^^ 6: ABcdEF 7: 6: ABcdEF 7: 8: 'abcd ^^^^ 9: 'abcd' 10: ^efgh 7: 8: 'abcd 9: 'abcd' ^^^^ 10: ^efgh 11: ^^efgh $ docfd --cache-dir .cache test.txt --sample 'abcd$$' $TESTCASE_ROOT/test.txt 13: efgh$$ 14: 15: abcd$ ^^^^^ 16: efgh$ 17: $ docfd --cache-dir .cache test.txt --sample 'ef$' $TESTCASE_ROOT/test.txt 1: abcd 2: abcdef ^^^^^^ 3: ABCD 4: ABCDEF 2: abcdef 3: ABCD 4: ABCDEF ^^^^^^ 5: ABcd 6: ABcdEF 4: ABCDEF 5: ABcd 6: ABcdEF ^^^^^^ 7: 8: 'abcd Suffix match smart case sensitivity: $ docfd --cache-dir .cache test.txt --sample 'ABCD$' $TESTCASE_ROOT/test.txt 1: abcd 2: abcdef 3: ABCD ^^^^ 4: ABCDEF 5: ABcd $ docfd --cache-dir .cache test.txt --sample 'EF$' $TESTCASE_ROOT/test.txt 2: abcdef 3: ABCD 4: ABCDEF ^^^^^^ 5: ABcd 6: ABcdEF 4: ABCDEF 5: ABcd 6: ABcdEF ^^^^^^ 7: 8: 'abcd Fuzzy match explicit spaces: $ docfd --cache-dir .cache test.txt --sample 'hel~word' $TESTCASE_ROOT/test.txt 16: efgh$ 17: 18: hello world ^^^^^^^^^^^ 19: hello world 20: 17: 18: hello world 19: hello world ^^^^^^^^^^^^^ 20: 21: Hello world 19: hello world 20: 21: Hello world ^^^^^^^^^^^ 22: 23: HELLO WORLD 21: Hello world 22: 23: HELLO WORLD ^^^^^^^^^^^ 16: efgh$ 17: 18: hello world ^^^^^^ 19: hello world ^^^^^ 20: 21: Hello world Exact match explicit spaces: $ docfd --cache-dir .cache test.txt --sample "'hello~world" $TESTCASE_ROOT/test.txt 16: efgh$ 17: 18: hello world ^^^^^^^^^^^ 19: hello world 20: 17: 18: hello world 19: hello world ^^^^^^^^^^^^^ 20: 21: Hello world 19: hello world 20: 21: Hello world ^^^^^^^^^^^ 22: 23: HELLO WORLD 21: Hello world 22: 23: HELLO WORLD ^^^^^^^^^^^ $ docfd --cache-dir .cache test.txt --sample "'Hello~world" $TESTCASE_ROOT/test.txt 19: hello world 20: 21: Hello world ^^^^^^^^^^^ 22: 23: HELLO WORLD $ docfd --cache-dir .cache test.txt --sample "'Hello~World" [1] Prefix match explicit spaces: $ docfd --cache-dir .cache test.txt --sample '^hello~wo' $TESTCASE_ROOT/test.txt 16: efgh$ 17: 18: hello world ^^^^^^^^^^^ 19: hello world 20: 17: 18: hello world 19: hello world ^^^^^^^^^^^^^ 20: 21: Hello world 19: hello world 20: 21: Hello world ^^^^^^^^^^^ 22: 23: HELLO WORLD 21: Hello world 22: 23: HELLO WORLD ^^^^^^^^^^^ $ docfd --cache-dir .cache test.txt --sample '^ello~wo' [1] Suffix match explicit spaces: $ docfd --cache-dir .cache test.txt --sample 'lo~world$' $TESTCASE_ROOT/test.txt 16: efgh$ 17: 18: hello world ^^^^^^^^^^^ 19: hello world 20: 17: 18: hello world 19: hello world ^^^^^^^^^^^^^ 20: 21: Hello world 19: hello world 20: 21: Hello world ^^^^^^^^^^^ 22: 23: HELLO WORLD 21: Hello world 22: 23: HELLO WORLD ^^^^^^^^^^^ $ docfd --cache-dir .cache test.txt --sample 'lo~worl$' [1] ================================================ FILE: match-type-tests.t/test.txt ================================================ abcd abcdef ABCD ABCDEF ABcd ABcdEF 'abcd 'abcd' ^efgh ^^efgh efgh$ efgh$$ abcd$ efgh$ hello world hello world Hello world HELLO WORLD ================================================ FILE: misc-behavior-tests.t/abcd.txt ================================================ abcd ================================================ FILE: misc-behavior-tests.t/dune ================================================ (cram (deps ../bin/docfd.exe)) ================================================ FILE: misc-behavior-tests.t/run.t ================================================ Stdin temp file cleanup: $ echo "abcd" | docfd --cache-dir .cache --search "a" | tail -n +2 1: abcd ^^^^ $ ls /tmp/docfd-* ls: cannot access '/tmp/docfd-*': No such file or directory [2] Stdin and path both specified: $ echo "0123" | docfd --cache-dir .cache abcd.txt --search "01" # Should not print anything since stdin should be ignored. [1] $ echo "0123" | docfd --cache-dir .cache abcd.txt --search "ab" $TESTCASE_ROOT/abcd.txt 1: abcd ^^^^ --underline: $ docfd --cache-dir .cache --underline never abcd.txt --search "ab|cd" $TESTCASE_ROOT/abcd.txt 1: abcd 1: abcd $ docfd --cache-dir .cache --underline always abcd.txt --search "ab|cd" $TESTCASE_ROOT/abcd.txt 1: abcd ^^^^ 1: abcd ^^^^ $ docfd --cache-dir .cache --underline auto abcd.txt --search "ab|cd" $TESTCASE_ROOT/abcd.txt 1: abcd ^^^^ 1: abcd ^^^^ --color: $ docfd --cache-dir .cache --color never abcd.txt --search "ab|cd" $TESTCASE_ROOT/abcd.txt 1: abcd ^^^^ 1: abcd ^^^^ $ # The output below is messed up after passing through Dune, I do not know why. $ docfd --cache-dir .cache --color always abcd.txt --search "ab|cd" $TESTCASE_ROOT/abcd.txt1: abcd ^^^^ 1: abcd ^^^^ $ docfd --cache-dir .cache --color auto abcd.txt --search "ab|cd" $TESTCASE_ROOT/abcd.txt 1: abcd ^^^^ 1: abcd ^^^^ ================================================ FILE: non-interactive-mode-return-code-tests.t/dune ================================================ (cram (deps ../bin/docfd.exe)) ================================================ FILE: non-interactive-mode-return-code-tests.t/run.t ================================================ Setup: $ echo "0123 abcd" >> test0.txt $ echo "0123 efgh" >> test1.txt --sample, text all files: $ docfd --sample "'0123" . $TESTCASE_ROOT/test1.txt 1: 0123 efgh ^^^^ $TESTCASE_ROOT/test0.txt 1: 0123 abcd ^^^^ $ docfd --sample "'0123" . -l $TESTCASE_ROOT/test1.txt $TESTCASE_ROOT/test0.txt $ docfd --sample "'0123" . --files-without-match [1] --sample, text in only one file: $ docfd --sample "'abcd" . $TESTCASE_ROOT/test0.txt 1: 0123 abcd ^^^^ $ docfd --sample "'abcd" . -l $TESTCASE_ROOT/test0.txt $ docfd --sample "'abcd" . --files-without-match $TESTCASE_ROOT/test1.txt --sample, text not in any file: $ docfd --sample "'hello" . [1] $ docfd --sample "'hello" . -l [1] $ docfd --sample "'hello" . --files-without-match $TESTCASE_ROOT/test0.txt $TESTCASE_ROOT/test1.txt --search, text all files: $ docfd --search "'0123" . $TESTCASE_ROOT/test1.txt 1: 0123 efgh ^^^^ $TESTCASE_ROOT/test0.txt 1: 0123 abcd ^^^^ $ docfd --search "'0123" . -l $TESTCASE_ROOT/test1.txt $TESTCASE_ROOT/test0.txt $ docfd --search "'0123" . --files-without-match [1] --search, text in only one file: $ docfd --search "'abcd" . $TESTCASE_ROOT/test0.txt 1: 0123 abcd ^^^^ $ docfd --search "'abcd" . -l $TESTCASE_ROOT/test0.txt $ docfd --search "'abcd" . --files-without-match $TESTCASE_ROOT/test1.txt --search, text not in any file: $ docfd --search "'hello" . [1] $ docfd --search "'hello" . -l [1] $ docfd --search "'hello" . --files-without-match $TESTCASE_ROOT/test0.txt $TESTCASE_ROOT/test1.txt ================================================ FILE: open-with-tests.t/dune ================================================ (cram (deps ../bin/docfd.exe)) ================================================ FILE: open-with-tests.t/run.t ================================================ Error case tests: $ docfd --index-only --open-with pdf:term='okular {path}' error: failed to parse pdf:term=okular {path}, invalid launch mode [1] $ docfd --index-only --open-with pdf='okular {path}' error: failed to parse pdf=okular {path}, expected char : [1] $ docfd --index-only --open-with pdfterminal='okular {path}' error: failed to parse pdfterminal=okular {path}, expected char : [1] $ docfd --index-only --open-with pdf:terminal='okular path}' Initializing in-memory index $ docfd --index-only --open-with pdf:terminal='okular {path' error: failed to parse pdf:terminal=okular {path, expected char } [1] $ docfd --index-only --open-with pdf:terminal='okular {abc}' error: failed to parse pdf:terminal=okular {abc}, invalid placeholder [1] PDF parsing test, terminal launch mode: $ docfd --index-only --open-with pdf:terminal='okular {path}' Initializing in-memory index $ docfd --index-only --open-with pdf:terminal='okular {page_num}' Initializing in-memory index $ docfd --index-only --open-with pdf:terminal='okular {line_num}' error: failed to parse pdf:terminal=okular {line_num}, line_num not available [1] $ docfd --index-only --open-with pdf:terminal='okular {search_word}' Initializing in-memory index PDF parsing test, detached launch mode: $ docfd --index-only --open-with pdf:detached='okular {path}' Initializing in-memory index $ docfd --index-only --open-with pdf:detached='okular {page_num}' Initializing in-memory index $ docfd --index-only --open-with pdf:detached='okular {line_num}' error: failed to parse pdf:detached=okular {line_num}, line_num not available [1] $ docfd --index-only --open-with pdf:detached='okular {search_word}' Initializing in-memory index Pandoc supported extensions parsing test, terminal launch mode: $ docfd --index-only --open-with odt:terminal='xdg-open {path}' Initializing in-memory index $ docfd --index-only --open-with odt:terminal='xdg-open {page_num}' error: failed to parse odt:terminal=xdg-open {page_num}, page_num not available [1] $ docfd --index-only --open-with odt:terminal='xdg-open {line_num}' Initializing in-memory index $ docfd --index-only --open-with odt:terminal='xdg-open {search_word}' error: failed to parse odt:terminal=xdg-open {search_word}, search_word not available [1] Pandoc supported extensions parsing test, detached launch mode: $ docfd --index-only --open-with odt:detached='xdg-open {path}' Initializing in-memory index $ docfd --index-only --open-with odt:detached='xdg-open {page_num}' error: failed to parse odt:detached=xdg-open {page_num}, page_num not available [1] $ docfd --index-only --open-with odt:detached='xdg-open {line_num}' Initializing in-memory index $ docfd --index-only --open-with odt:detached='xdg-open {search_word}' error: failed to parse odt:detached=xdg-open {search_word}, search_word not available [1] Text parsing test, terminal launch mode: $ docfd --index-only --open-with txt:terminal='nano {path}' Initializing in-memory index $ docfd --index-only --open-with txt:terminal='nano {page_num}' error: failed to parse txt:terminal=nano {page_num}, page_num not available [1] $ docfd --index-only --open-with txt:terminal='nano {line_num}' Initializing in-memory index $ docfd --index-only --open-with txt:terminal='nano {search_word}' error: failed to parse txt:terminal=nano {search_word}, search_word not available [1] Text parsing test, detached launch mode: $ docfd --index-only --open-with txt:detached='nano {path}' Initializing in-memory index $ docfd --index-only --open-with txt:detached='nano {page_num}' error: failed to parse txt:detached=nano {page_num}, page_num not available [1] $ docfd --index-only --open-with txt:detached='nano {line_num}' Initializing in-memory index $ docfd --index-only --open-with txt:detached='nano {search_word}' error: failed to parse txt:detached=nano {search_word}, search_word not available [1] Unrecognized extensions parsing test, terminal launch mode: $ docfd --index-only --open-with abc:terminal='nano {path}' Initializing in-memory index $ docfd --index-only --open-with abc:terminal='nano {page_num}' error: failed to parse abc:terminal=nano {page_num}, page_num not available [1] $ docfd --index-only --open-with abc:terminal='nano {line_num}' Initializing in-memory index $ docfd --index-only --open-with abc:terminal='nano {search_word}' error: failed to parse abc:terminal=nano {search_word}, search_word not available [1] Unrecognized extensions parsing test, detached launch mode: $ docfd --index-only --open-with abc:detached='nano {path}' Initializing in-memory index $ docfd --index-only --open-with abc:detached='nano {page_num}' error: failed to parse abc:detached=nano {page_num}, page_num not available [1] $ docfd --index-only --open-with abc:detached='nano {line_num}' Initializing in-memory index $ docfd --index-only --open-with abc:detached='nano {search_word}' error: failed to parse abc:detached=nano {search_word}, search_word not available [1] ================================================ FILE: printing-tests.t/empty.txt ================================================ ================================================ FILE: printing-tests.t/run.t ================================================ --sample: $ docfd --cache-dir .cache --sample abcd . $TESTCASE_ROOT/test3.txt 1: abcd ^^^^ $TESTCASE_ROOT/test2.txt 1: hello 2: 3: abcd ^^^^ 4: 5: abcdefgh 5: abcdefgh 6: 7: hello world abcd ^^^^ 3: abcd 4: 5: abcdefgh ^^^^^^^^ 6: 7: hello world abcd --search: $ docfd --cache-dir .cache --search abcd . $TESTCASE_ROOT/test3.txt 1: abcd ^^^^ $TESTCASE_ROOT/test2.txt 1: hello 2: 3: abcd ^^^^ 4: 5: abcdefgh 5: abcdefgh 6: 7: hello world abcd ^^^^ 3: abcd 4: 5: abcdefgh ^^^^^^^^ 6: 7: hello world abcd -l/--files-with-match: $ docfd --cache-dir .cache --sample abcd . -l $TESTCASE_ROOT/test3.txt $TESTCASE_ROOT/test2.txt $ docfd --cache-dir .cache --sample abcd . --files-with-match $TESTCASE_ROOT/test3.txt $TESTCASE_ROOT/test2.txt --files-without-match: $ docfd --cache-dir .cache --sample abcd . --files-without-match $TESTCASE_ROOT/empty.txt $TESTCASE_ROOT/test0.txt $TESTCASE_ROOT/test1.txt $TESTCASE_ROOT/test4.txt ================================================ FILE: printing-tests.t/test0.txt ================================================ hello ================================================ FILE: printing-tests.t/test1.txt ================================================ hello ================================================ FILE: printing-tests.t/test2.txt ================================================ hello abcd abcdefgh hello world abcd ================================================ FILE: printing-tests.t/test3.txt ================================================ abcd ================================================ FILE: printing-tests.t/test4.txt ================================================ efgh ================================================ FILE: profiling/dune ================================================ (rule (targets string_set.ml) (deps ../lib/string_set.ml) (action (copy# %{deps} %{targets})) ) (rule (targets misc_utils.ml) (deps ../lib/misc_utils.ml) (action (copy# %{deps} %{targets})) ) (executable (flags (-w "+a-4-9-29-37-40-42-44-48-50-32-30-70@8")) (name main) (libraries docfd_lib containers cmdliner fmt notty notty.unix nottui lwd oseq eio eio_main digestif.ocaml digestif ) ) ================================================ FILE: profiling/main.ml ================================================ open Docfd_lib let lines = [ "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Integer placerat lacus non cursus"; "tincidunt. Suspendisse viverra leo ac quam tincidunt, quis euismod neque tempus. "; "Vestibulum rutrum commodo tristique. Curabitur tristique dapibus dolor, vitae porttitor"; "est tristique quis. Sed urna ex, gravida vitae ipsum vel, fermentum viverra risus. "; "Maecenas et nulla iaculis, bibendum libero vitae, varius est. Fusce eros enim, placerat "; "quis magna eu, rutrum vulputate ante. Praesent non mi vel ipsum finibus lobortis. "; "Duis posuere auctor hendrerit. Nunc sodales egestas vestibulum. Quisque suscipit maximus "; "aliquam. Pellentesque tempor mi condimentum bibendum bibendum. Donec vitae accumsan quam, "; "nec vulputate lectus. Nulla ligula ipsum, dictum vel augue at, semper vestibulum ex. "; "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed arcu ligula, cursus nec "; "lacinia ut, lobortis in libero."; ""; "Sed ultricies placerat urna, hendrerit ornare elit semper sit amet. Praesent "; "pretium blandit velit, eu imperdiet lectus tincidunt ut. Suspendisse eget eros "; "tellus. Nulla tristique vel libero non dapibus. Ut scelerisque sem sit amet "; "odio mattis vestibulum. Nam vitae commodo mi. Vestibulum consequat orci at tellus porta "; "placerat."; ""; "In vel mi vestibulum felis accumsan congue eget efficitur tortor. Integer "; "quam purus, malesuada vel nisl at, posuere vestibulum augue. Curabitur velit "; "tortor, vestibulum id placerat eu, convallis at velit. Ut a lectus "; "quis erat tincidunt aliquet. Etiam ut erat magna. Maecenas quis commodo leo, "; "eleifend elementum ante. Nullam dapibus erat augue, a bibendum quam volutpat id. "; "Morbi in ullamcorper arcu. Fusce venenatis lacus purus, vel pellentesque mi "; "elementum a. Maecenas at mattis massa. Fusce ut elit tortor. Morbi rhoncus "; "molestie orci eu malesuada. Aliquam gravida rutrum sem, vitae condimentum magna "; "convallis pulvinar. Duis urna lacus, ultrices a ultrices pharetra, eleifend "; "in ante. Fusce id elementum dolor. Nullam ornare nisl ac ultrices lobortis."; ""; "Proin ullamcorper vulputate enim sed facilisis. Praesent vel mi metus. "; "Fusce sagittis efficitur odio at condimentum. Nullam mollis lacinia consequat. "; "Integer vel ex sit amet nunc aliquam molestie et eu nibh. Nam leo nunc, laoreet "; "vitae iaculis sit amet, dapibus sit amet neque. Suspendisse eleifend, leo eget "; "tempor molestie, massa enim auctor dui, quis vehicula erat urna id felis. Vivamus "; "pharetra, sem non tempus ornare, risus tortor posuere tortor, eu pellentesque eros est "; "ac erat. Sed sit amet tellus nisl. Phasellus magna urna, tincidunt in sem "; "id, aliquam vulputate leo. Sed eleifend justo eu mauris egestas imperdiet. Fusce sagittis"; ", turpis ac efficitur pulvinar, purus tellus gravida sem, eget accumsan nisl sapien a "; "ante. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac "; "turpis egestas. Nunc eget nibh orci. Cras facilisis facilisis sapien, "; "a vehicula lorem imperdiet vel. Proin vel nulla nisi."; ""; "Aenean sit amet risus at lectus pellentesque pellentesque at eu quam. "; "Duis euismod porttitor ante quis lacinia. Cras sit amet vulputate nunc. "; "Integer sollicitudin vitae sapien finibus fermentum. Donec eu tellus "; "suscipit, dignissim turpis non, eleifend massa. Nullam quis ex nisi. "; "Quisque dignissim quis leo eu finibus. "; ] let bench ~name ~cycle (f : unit -> 'a) = let start_time = Sys.time () in for _=0 to cycle-1 do f () |> ignore done; let end_time = Sys.time () in Printf.printf "%s: time per cycle: %6fs\n" name ((end_time -. start_time) /. (float_of_int cycle)) let main env = Eio.Switch.run @@ fun sw -> assert (Option.is_none (init ~db_path:"test.db" ~document_count_limit:1000)); let pool = Task_pool.make ~sw (Eio.Stdenv.domain_mgr env) in let _raw = Index.Raw.of_lines pool (List.to_seq lines) in Params'.max_fuzzy_edit_dist := 3; let search_exp = Search_exp.parse "vestibul rutru" |> Option.get in let s = "PellentesquePellentesque" in for len=1 to 20 do let limit = 2 in bench ~name:(Fmt.str "Spelll.of_string, limit: %d, len %2d:" limit len) ~cycle:10 (fun () -> Spelll.of_string ~limit:2 (String.sub s 0 len)) done; for len=1 to 20 do let limit = 1 in bench ~name:(Fmt.str "Spelll.of_string, limit: %d, len %2d:" limit len) ~cycle:10 (fun () -> Spelll.of_string ~limit:1 (String.sub s 0 len)) done; bench ~name:"Index.search" ~cycle:1000 (fun () -> Index.search pool (Stop_signal.make ()) ~search_scope:None search_exp); () let () = Eio_main.run main ================================================ FILE: publish.sh ================================================ #!/usr/bin/env bash RETAG_CONFIRM_TEXT="retag-docfd" opam_repo="$HOME/opam-repository" echo "Checking if $opam_repo exists" if [ ! -d "$opam_repo" ]; then echo "$opam_repo does not exist" exit 1 fi ver=$(cat CHANGELOG.md \ | grep '## ' \ | head -n 1 \ | sed -n 's/^## \s*\(\S*\)$/\1/p') echo "Detected version for Docfd:" $ver git_tag="$ver" echo "Computed git tag for Docfd:" $git_tag read -p "Are the version and git tag correct [y/n]? " ans if [[ $ans != "y" ]]; then echo "Publishing canceled" exit 0 fi echo "Checking if $git_tag exists in repo already" if [[ $(git tag -l "$git_tag") == "" ]]; then echo "Tagging commit" git tag "$git_tag" else read -p "Tag already exists, retag [y/n]? " ans if [[ $ans == "y" ]]; then read -p "Type \"$RETAG_CONFIRM_TEXT\" to confirm: " ans if [[ $ans != "$RETAG_CONFIRM_TEXT" ]]; then echo "Publishing canceled" exit 0 fi echo "Removing tag" git tag -d "$git_tag" git push --delete origin "$git_tag" echo "Tagging commit" git tag "$git_tag" fi fi echo "Pushing all tags" git push --tags archive="$git_tag".tar.gz echo "Archiving as $archive" rm -f "$archive" git archive --output=./"$archive" "$git_tag" echo "Hashing $archive" hash_cmd=sha256sum archive_hash=$("$hash_cmd" "$archive" | awk '{ print $1 }') echo "Hash from $hash_cmd:" $archive_hash packages=( "docfd" ) for package in ${packages[@]}; do package_dir="$opam_repo"/packages/"$package"/"$package"."$ver" dest_opam="$package_dir"/opam echo "Making directory $package_dir" mkdir -p "$package_dir" echo "Copying $package.opam over" cp "$package.opam" "$dest_opam" echo "Adding url section to $dest_opam" echo " url { src: \"https://github.com/darrenldl/docfd/releases/download/$git_tag/$archive\" checksum: \"sha256=$archive_hash\" } " >> "$dest_opam" done ================================================ FILE: run-container.sh ================================================ #!/usr/bin/env bash podman run -it \ -v ~/docfd:/home/docfd \ --workdir /home/docfd \ --env VISUAL=nano \ --rm \ localhost/docfd \ /bin/bash --login ================================================ FILE: script-tests.t/dune ================================================ (cram (deps ../bin/docfd.exe)) ================================================ FILE: script-tests.t/run.t ================================================ Setup: $ echo "abcd" > test0.txt $ echo "efgh" > test1.txt $ echo "hijk" > test2.txt $ echo "0123" > test3.txt $ echo "search: ^ab" >> 0.docfd-script $ echo "search: 'xyz" >> 1.docfd-script $ tree . |-- 0.docfd-script |-- 1.docfd-script |-- dune -> ../../../../default/script-tests.t/dune |-- test0.txt |-- test1.txt |-- test2.txt `-- test3.txt 0 directories, 7 files Basic: $ docfd -l --script 0.docfd-script . $TESTCASE_ROOT/test0.txt $ docfd -l --script 1.docfd-script . [1] ================================================ FILE: search-scope-narrowing-tests.t/dune ================================================ (cram (deps ../bin/docfd.exe)) ================================================ FILE: search-scope-narrowing-tests.t/run.t ================================================ Setup: $ echo "abcd" >> test0.txt $ echo "efgh" >> test0.txt $ echo "0123" >> test0.txt $ echo "ijkl" >> test0.txt $ echo "abcd" >> test1.txt $ echo "efgh" >> test1.txt $ echo "0123" >> test1.txt $ echo "ijkl" >> test1.txt $ tree . |-- dune -> ../../../../default/search-scope-narrowing-tests.t/dune |-- test0.txt `-- test1.txt 0 directories, 3 files Single restriction: $ # Case 0 for single restriction $ echo "" > test.docfd-script $ echo "search: 'abcd" >> test.docfd-script $ echo "narrow level: 1" >> test.docfd-script $ echo "search: 'efgh" >> test.docfd-script $ docfd --tokens-per-search-scope-level 1 --script test.docfd-script test0.txt -l $TESTCASE_ROOT/test0.txt $ # Case 1 for single restriction $ echo "" > test.docfd-script $ echo "search: 'abcd" >> test.docfd-script $ echo "narrow level: 1" >> test.docfd-script $ echo "search: '0123" >> test.docfd-script $ docfd --tokens-per-search-scope-level 1 --script test.docfd-script test0.txt -l [1] $ # Case 2 for single restriction $ echo "" > test.docfd-script $ echo "search: 'abcd" >> test.docfd-script $ echo "narrow level: 1" >> test.docfd-script $ echo "narrow level: 0" >> test.docfd-script $ echo "search: '0123" >> test.docfd-script $ docfd --tokens-per-search-scope-level 1 --script test.docfd-script test0.txt -l $TESTCASE_ROOT/test0.txt Chained restriction: $ # Case 0 for chained restrictions $ echo "" > test.docfd-script $ echo "search: 'abcd" >> test.docfd-script $ echo "narrow level: 1" >> test.docfd-script $ echo "search: '0123" >> test.docfd-script $ echo "narrow level: 1" >> test.docfd-script $ echo "search: 'efgh" >> test.docfd-script $ docfd --tokens-per-search-scope-level 1 --script test.docfd-script test0.txt -l [1] $ # Case 1 for chained restrictions $ echo "" > test.docfd-script $ echo "search: 'abcd" >> test.docfd-script $ echo "narrow level: 2" >> test.docfd-script $ echo "search: '0123" >> test.docfd-script $ echo "narrow level: 2" >> test.docfd-script $ echo "search: 'efgh" >> test.docfd-script $ docfd --tokens-per-search-scope-level 1 --script test.docfd-script test0.txt -l $TESTCASE_ROOT/test0.txt $ # Case 2 for chained restrictions $ echo "" > test.docfd-script $ echo "search: 'abcd" >> test.docfd-script $ echo "narrow level: 2" >> test.docfd-script $ echo "search: '0123" >> test.docfd-script $ echo "narrow level: 1" >> test.docfd-script $ echo "search: 'efgh" >> test.docfd-script $ docfd --tokens-per-search-scope-level 1 --script test.docfd-script test0.txt -l $TESTCASE_ROOT/test0.txt File path filter + restrictions: $ # Baseline case: "clear filter" after "search" should trigger a search for each file that has not been searched through yet $ # So both documents should appear $ echo "" > test.docfd-script $ echo "search: 'abcd" >> test.docfd-script $ echo "filter: path-glob:test0.txt" >> test.docfd-script $ echo "search: 'efgh" >> test.docfd-script $ echo "clear filter" >> test.docfd-script $ docfd --tokens-per-search-scope-level 1 --script test.docfd-script -l . $TESTCASE_ROOT/test1.txt $TESTCASE_ROOT/test0.txt $ # Baseline case quoted string using single quote $ echo "" > test.docfd-script $ echo "search: 'abcd" >> test.docfd-script $ echo "filter: path-glob:'test0.txt'" >> test.docfd-script $ echo "search: 'efgh" >> test.docfd-script $ echo "clear filter" >> test.docfd-script $ docfd --tokens-per-search-scope-level 1 --script test.docfd-script -l . $TESTCASE_ROOT/test1.txt $TESTCASE_ROOT/test0.txt $ # Baseline case quoted string using double quote $ echo "" > test.docfd-script $ echo "search: 'abcd" >> test.docfd-script $ echo 'filter: path-glob:"test0.txt"' >> test.docfd-script $ echo "search: 'efgh" >> test.docfd-script $ echo "clear filter" >> test.docfd-script $ docfd --tokens-per-search-scope-level 1 --script test.docfd-script -l . $TESTCASE_ROOT/test1.txt $TESTCASE_ROOT/test0.txt $ # Since there is no "search" after "narrow", both documents should still appear $ echo "" > test.docfd-script $ echo "search: 'abcd" >> test.docfd-script $ echo "filter: path-glob:test0.txt" >> test.docfd-script $ echo "narrow level: 1" >> test.docfd-script $ echo "clear filter" >> test.docfd-script $ docfd --tokens-per-search-scope-level 1 --script test.docfd-script -l . $TESTCASE_ROOT/test1.txt $TESTCASE_ROOT/test0.txt $ # "narrow" + "search" after filtering should prevent test1.txt from appearing, even after we clear the filter $ echo "" > test.docfd-script $ echo "search: 'abcd" >> test.docfd-script $ echo "filter: path-glob:test0.txt" >> test.docfd-script $ echo "narrow level: 1" >> test.docfd-script $ echo "search: 'efgh" >> test.docfd-script $ echo "clear filter" >> test.docfd-script $ docfd --tokens-per-search-scope-level 1 --script test.docfd-script -l . $TESTCASE_ROOT/test0.txt $ # Similar to the above case, but the order of "search" and "clear filter" is swapped $ # test1.txt still should not appear $ echo "" > test.docfd-script $ echo "search: 'abcd" >> test.docfd-script $ echo "filter: path-glob:test0.txt" >> test.docfd-script $ echo "narrow level: 1" >> test.docfd-script $ echo "clear filter" >> test.docfd-script $ echo "search: 'efgh" >> test.docfd-script $ docfd --tokens-per-search-scope-level 1 --script test.docfd-script -l . $TESTCASE_ROOT/test0.txt $ # Similar to the above case, but we also reset the search scope via "narrow level: 0" $ # Since resetting the search scope does not refresh the search results, test1.txt should still not appear as there is not another "search" $ echo "" > test.docfd-script $ echo "search: 'abcd" >> test.docfd-script $ echo "filter: path-glob:test0.txt" >> test.docfd-script $ echo "narrow level: 1" >> test.docfd-script $ echo "clear filter" >> test.docfd-script $ echo "search: 'efgh" >> test.docfd-script $ echo "narrow level: 0" >> test.docfd-script $ docfd --tokens-per-search-scope-level 1 --script test.docfd-script -l . $TESTCASE_ROOT/test0.txt $ # Similar to the above case, but we search again after resetting search scope $ echo "" > test.docfd-script $ echo "search: 'abcd" >> test.docfd-script $ echo "filter: path-glob:test0.txt" >> test.docfd-script $ echo "narrow level: 1" >> test.docfd-script $ echo "clear filter" >> test.docfd-script $ echo "search: 'efgh" >> test.docfd-script $ echo "narrow level: 0" >> test.docfd-script $ echo "search: 'efgh" >> test.docfd-script $ docfd --tokens-per-search-scope-level 1 --script test.docfd-script -l . $TESTCASE_ROOT/test1.txt $TESTCASE_ROOT/test0.txt $ # Simplified version of the above case where we skip the search before "narrow level: 0" $ echo "" > test.docfd-script $ echo "search: 'abcd" >> test.docfd-script $ echo "filter: path-glob:test0.txt" >> test.docfd-script $ echo "narrow level: 1" >> test.docfd-script $ echo "clear filter" >> test.docfd-script $ echo "narrow level: 0" >> test.docfd-script $ echo "search: 'efgh" >> test.docfd-script $ docfd --tokens-per-search-scope-level 1 --script test.docfd-script -l . $TESTCASE_ROOT/test1.txt $TESTCASE_ROOT/test0.txt $ # Similar to the above case, but the order of "clear filter" and "narrow level: 0" is swapped $ # Both documents should still appear $ echo "" > test.docfd-script $ echo "search: 'abcd" >> test.docfd-script $ echo "filter: path-glob:test0.txt" >> test.docfd-script $ echo "narrow level: 1" >> test.docfd-script $ echo "narrow level: 0" >> test.docfd-script $ echo "clear filter" >> test.docfd-script $ echo "search: 'efgh" >> test.docfd-script $ docfd --tokens-per-search-scope-level 1 --script test.docfd-script -l . $TESTCASE_ROOT/test1.txt $TESTCASE_ROOT/test0.txt $ # Similar to the above case, but the order of "clear filter" and "search" is swapped $ # Both documents should still appear $ echo "" > test.docfd-script $ echo "search: 'abcd" >> test.docfd-script $ echo "filter: path-glob:test0.txt" >> test.docfd-script $ echo "narrow level: 1" >> test.docfd-script $ echo "narrow level: 0" >> test.docfd-script $ echo "search: 'efgh" >> test.docfd-script $ echo "clear filter" >> test.docfd-script $ docfd --tokens-per-search-scope-level 1 --script test.docfd-script -l . $TESTCASE_ROOT/test1.txt $TESTCASE_ROOT/test0.txt ================================================ FILE: tests/dune ================================================ (executable (flags (-w "+a-4-9-29-37-40-42-44-48-50-70@8" -g)) (name main) (libraries qcheck-core qcheck-alcotest alcotest docfd_lib eio eio_main ) ) ================================================ FILE: tests/main.ml ================================================ open Docfd_lib let () = Eio_main.run (fun env -> Eio.Switch.run (fun sw -> let _task_pool = Task_pool.make ~sw (Eio.Stdenv.domain_mgr env) in let alco_suites = [ ("Search_exp_tests.Alco", Search_exp_tests.Alco.suite); ("Utils_tests.Alco", Utils_tests.Alco.suite); ] in let qc_suites = [ ] |> List.map (fun (name, suite) -> (name, List.map QCheck_alcotest.to_alcotest suite)) in let suites = alco_suites @ qc_suites in Alcotest.run "docfd-lib" suites ) ) ================================================ FILE: tests/search_exp_tests.ml ================================================ open Docfd_lib open Test_utils module Alco = struct let test_invalid_exp (s : string) = Alcotest.(check bool) "true" true (Option.is_none (Search_exp.parse s)) let test_empty_phrase (s : string) = let phrase = Search_phrase.parse s in Alcotest.(check bool) "case0" true (Search_phrase.is_empty phrase); Alcotest.(check bool) "case1" true (List.is_empty (Search_phrase.enriched_tokens phrase)) let test_empty_exp (s : string) = let exp = Search_exp.parse s |> Option.get in Alcotest.(check bool) "case0" true (Search_exp.is_empty exp); let flattened = Search_exp.flattened exp in Alcotest.(check bool) "case1" true (List.is_empty flattened || List.for_all Search_phrase.is_empty flattened) let at s : Search_phrase.annotated_token = Search_phrase.{ data = `String s; group_id = 0 } let atm m : Search_phrase.annotated_token = Search_phrase.{ data = `Match_typ_marker m; group_id = 0 } let ats : Search_phrase.annotated_token = Search_phrase.{ data = `Explicit_spaces; group_id = 0 } let et ?(m : Search_phrase.match_typ = `Fuzzy) string is_linked_to_prev is_linked_to_next : Search_phrase.Enriched_token.t = let automaton = Spelll.of_string ~limit:0 "" in Search_phrase.Enriched_token.make (`String string) ~is_linked_to_prev ~is_linked_to_next automaton m let ets ?(m : Search_phrase.match_typ = `Fuzzy) is_linked_to_prev is_linked_to_next = let automaton = Spelll.of_string ~limit:0 "" in Search_phrase.Enriched_token.make `Explicit_spaces ~is_linked_to_prev ~is_linked_to_next automaton m let test_exp ?(neg = false) (s : string) (l : (Search_phrase.annotated_token list * Search_phrase.Enriched_token.t list) list) = let neg' = neg in let phrases = l |> List.map fst |> List.map (fun l -> List.to_seq l |> Search_phrase.of_annotated_tokens) in let enriched_token_list_list = List.map snd l in Alcotest.(check (if neg' then ( neg (list search_phrase_testable) ) else ( list search_phrase_testable ))) (Fmt.str "case0 of %S" s) (List.sort Search_phrase.compare phrases) (Search_exp.parse s |> Option.get |> Search_exp.flattened |> List.sort Search_phrase.compare ); Alcotest.(check (list (list enriched_token_testable))) (Fmt.str "case1 of %S" s) enriched_token_list_list (phrases |> List.map Search_phrase.enriched_tokens ) let corpus () = test_empty_exp ""; test_empty_phrase ""; test_empty_exp " "; test_empty_phrase " "; test_empty_exp "\r\n"; test_empty_phrase "\r\n"; test_empty_exp "\t"; test_empty_phrase "\t"; test_empty_exp "\r\n\t"; test_empty_phrase "\r\n\t"; test_empty_exp " \r \n \t "; test_empty_phrase " \r \n \t "; test_empty_exp "()"; test_empty_exp " () "; test_empty_exp "( )"; test_empty_exp " ( ) "; test_empty_exp " ( ) () "; test_empty_exp " ( ( ) ) () "; test_empty_exp " ( () ) (( )) "; test_empty_exp " ( () ) (( () )) "; test_invalid_exp " ( ) ( "; test_invalid_exp " ) ( "; test_invalid_exp " ( ( ) "; test_invalid_exp " ( ( ) "; test_invalid_exp "?"; test_invalid_exp "? "; test_exp "\\?" [ ([ at "?" ], [ et "?" false false ]) ]; test_exp "hello_world" [ ([ at "hello"; at "_"; at "world" ], [ et "hello" false true; et "_" true true; et "world" true false ]) ]; test_exp "(hello)" [ ([ at "hello" ], [ et "hello" false false ]) ]; test_exp "()hello" [ ([ at "hello" ], [ et "hello" false false ]) ]; test_exp "hello()" [ ([ at "hello" ], [ et "hello" false false ]) ]; test_exp "( ) hello" [ ([ at "hello" ], [ et "hello" false false ]) ]; test_exp "hello ( )" [ ([ at "hello" ], [ et "hello" false false ]) ]; test_exp "?hello" [ ([], []) ; ([ at "hello" ], [ et "hello" false false ]) ]; test_exp "(?hello)" [ ([], []) ; ([ at "hello" ], [ et "hello" false false ]) ]; test_exp "?(hello)" [ ([], []) ; ([ at "hello" ], [ et "hello" false false ]) ]; test_exp "?hello()" [ ([], []) ; ([ at "hello" ], [ et "hello" false false ]) ]; test_exp "?hello ()" [ ([], []) ; ([ at "hello" ], [ et "hello" false false ]) ]; test_exp "? hello" [ ([], []) ; ([ at "hello" ], [ et "hello" false false ]) ]; test_exp "?hello world" [ ([ at "world" ], [ et "world" false false ]) ; ([ at "hello"; at " "; at "world" ], [ et "hello" false false; et "world" false false ]) ]; test_exp "? hello world" [ ([ at "world" ], [ et "world" false false ]) ; ([ at "hello"; at " "; at "world" ], [ et "hello" false false; et "world" false false ]) ]; test_exp "?(hello) world" [ ([ at "world" ], [ et "world" false false ] ) ; ([ at "hello"; at " "; at "world" ], [ et "hello" false false; et "world" false false ] ) ]; test_exp "? (hello) world" [ ([ at "world" ], [ et "world" false false ] ) ; ([ at "hello"; at " "; at "world" ], [ et "hello" false false; et "world" false false ]) ]; test_exp "?(hello world) abcd" [ ([ at "abcd" ], [ et "abcd" false false ] ) ; ([ at "hello"; at " "; at "world"; at " "; at "abcd" ], [ et "hello" false false; et "world" false false; et "abcd" false false ] ) ]; test_exp "ab ?(hello world) cd" [ ([ at "ab"; at " "; at "cd" ], [ et "ab" false false; et "cd" false false ]) ; ([ at "ab"; at " "; at "hello"; at " "; at "world"; at " "; at "cd" ], [ et "ab" false false; et "hello" false false; et "world" false false; et "cd" false false ]) ]; test_exp "ab ?hello world cd" [ ([ at "ab"; at " "; at "world"; at " "; at "cd" ], [ et "ab" false false; et "world" false false; et "cd" false false ]) ; ([ at "ab"; at " "; at "hello"; at " "; at "world"; at " "; at "cd" ], [ et "ab" false false; et "hello" false false; et "world" false false; et "cd" false false ]) ]; test_exp "go (left | right)" [ ([ at "go"; at " "; at "left" ], [ et "go" false false; et "left" false false ]) ; ([ at "go"; at " "; at "right" ], [ et "go" false false; et "right" false false ]) ]; test_exp "go (?up | left | right)" [ ([ at "go" ], [ et "go" false false ]) ; ([ at "go"; at " "; at "up" ], [ et "go" false false; et "up" false false ]) ; ([ at "go"; at " "; at "left" ], [ et "go" false false; et "left" false false ]) ; ([ at "go"; at " "; at "right" ], [ et "go" false false; et "right" false false ])]; test_exp "(left | right) (up | down)" [ ([ at "left"; at " "; at "up" ], [ et "left" false false; et "up" false false ]) ; ([ at "left"; at " "; at "down" ], [ et "left" false false; et "down" false false ]) ; ([ at "right"; at " "; at "up" ], [ et "right" false false; et "up" false false ]) ; ([ at "right"; at " "; at "down" ], [ et "right" false false; et "down" false false ]) ]; test_exp "((a|b)(c|d)) (e | f)" [ ([ at "a"; at " "; at "c"; at " "; at "e" ], [ et "a" false false; et "c" false false; et "e" false false ]) ; ([ at "a"; at " "; at "c"; at " "; at "f" ], [ et "a" false false; et "c" false false; et "f" false false ]) ; ([ at "a"; at " "; at "d"; at " "; at "e" ], [ et "a" false false; et "d" false false; et "e" false false ]) ; ([ at "a"; at " "; at "d"; at " "; at "f" ], [ et "a" false false; et "d" false false; et "f" false false ]) ; ([ at "b"; at " "; at "c"; at " "; at "e" ], [ et "b" false false; et "c" false false; et "e" false false ]) ; ([ at "b"; at " "; at "c"; at " "; at "f" ], [ et "b" false false; et "c" false false; et "f" false false ]) ; ([ at "b"; at " "; at "d"; at " "; at "e" ], [ et "b" false false; et "d" false false; et "e" false false ]) ; ([ at "b"; at " "; at "d"; at " "; at "f" ], [ et "b" false false; et "d" false false; et "f" false false ]) ]; test_exp "(?left | right) (up | down)" [ ([ at "up" ], [ et "up" false false ]) ; ([ at "down" ], [ et "down" false false ]) ; ([ at "left"; at " "; at "up" ], [ et "left" false false; et "up" false false ]) ; ([ at "left"; at " "; at "down" ], [ et "left" false false; et "down" false false ]) ; ([ at "right"; at " "; at "up" ], [ et "right" false false; et "up" false false ]) ; ([ at "right"; at " "; at "down" ], [ et "right" false false; et "down" false false ]) ]; test_exp "go (left | right) or ( up | down )" [ ([ at "go"; at " "; at "left"; at " "; at "or"; at " "; at "up" ], [ et "go" false false; et "left" false false; et "or" false false; et "up" false false ]) ; ([ at "go"; at " "; at "left"; at " "; at "or"; at " "; at "down" ], [ et "go" false false; et "left" false false; et "or" false false; et "down" false false ]) ; ([ at "go"; at " "; at "right"; at " "; at "or"; at " "; at "up" ], [ et "go" false false; et "right" false false; et "or" false false; et "up" false false ]) ; ([ at "go"; at " "; at "right"; at " "; at "or"; at " "; at "down" ], [ et "go" false false; et "right" false false; et "or" false false; et "down" false false ]) ]; test_exp "and/or" [ ([ at "and"; at "/"; at "or" ], [ et "and" false true; et "/" true true; et "or" true false ]) ]; test_exp ~neg:true "and/or" [ ([ at "and"; at " "; at "/"; at " "; at "or" ], [ et "and" false false; et "/" false false; et "or" false false ]) ]; test_exp ~neg:true "and/or" [ ([ at "and"; at " "; at "/"; at "or" ], [ et "and" false false; et "/" false true; et "or" true false ]) ]; test_exp ~neg:true "and/or" [ ([ at "and"; at "/"; at " "; at "or" ], [ et "and" false true; et "/" true false; et "or" false false ]) ]; test_exp "and / or" [ ([ at "and"; at " "; at "/"; at " "; at "or" ], [ et "and" false false; et "/" false false; et "or" false false ]) ]; test_exp ~neg:true "and / or" [ ([ at "and"; at "/"; at "or" ], [ et "and" false true; et "/" true true; et "or" true false ]) ]; test_exp ~neg:true "and / or" [ ([ at "and"; at " "; at "/"; at "or" ], [ et "and" false false; et "/" false true; et "or" true false ]) ]; test_exp ~neg:true "and / or" [ ([ at "and"; at "/"; at " "; at "or" ], [ et "and" false true; et "/" true false; et "or" false false ]) ]; test_exp "(and)/ or" [ ([ at "and"; at " "; at "/"; at " "; at "or" ], [ et "and" false false; et "/" false false; et "or" false false ]) ]; test_exp ~neg:true "(and)/ or" [ ([ at "and"; at "/"; at " "; at "or" ], [ et "and" false true; et "/" true false; et "or" false false ]) ]; test_exp "and(/) or" [ ([ at "and"; at " "; at "/"; at " "; at "or" ], [ et "and" false false; et "/" false false; et "or" false false ]) ]; test_exp ~neg:true "and(/) or" [ ([ at "and"; at "/"; at " "; at "or" ], [ et "and" false true; et "/" true false; et "or" false false ]) ]; test_exp "and/(or)" [ ([ at "and"; at "/"; at " "; at "or" ], [ et "and" false true; et "/" true false; et "or" false false ]) ]; test_exp ~neg:true "and/(or)" [ ([ at "and"; at "/"; at "or" ], [ et "and" false true; et "/" true true; et "or" true false ]) ]; test_exp "go (left | right) and/or ( up | down )" [ ([ at "go"; at " "; at "left"; at " "; at "and"; at "/"; at "or"; at " "; at "up" ], [ et "go" false false; et "left" false false; et "and" false true; et "/" true true; et "or" true false; et "up" false false ]) ; ([ at "go"; at " "; at "left"; at " "; at "and"; at "/"; at "or"; at " "; at "down" ], [ et "go" false false; et "left" false false; et "and" false true; et "/" true true; et "or" true false; et "down" false false ]) ; ([ at "go"; at " "; at "right"; at " "; at "and"; at "/"; at "or"; at " "; at "up" ], [ et "go" false false; et "right" false false; et "and" false true; et "/" true true; et "or" true false; et "up" false false ]) ; ([ at "go"; at " "; at "right"; at " "; at "and"; at "/"; at "or"; at " "; at "down" ], [ et "go" false false; et "right" false false; et "and" false true; et "/" true true; et "or" true false; et "down" false false ]) ]; test_exp "go (left | right) and / or ( up | down )" [ ([ at "go"; at " "; at "left"; at " "; at "and"; at " "; at "/"; at " "; at "or"; at " "; at "up" ], [ et "go" false false; et "left" false false; et "and" false false; et "/" false false; et "or" false false; et "up" false false ]) ; ([ at "go"; at " "; at "left"; at " "; at "and"; at " "; at "/"; at " "; at "or"; at " "; at "down" ], [ et "go" false false; et "left" false false; et "and" false false; et "/" false false; et "or" false false; et "down" false false ]) ; ([ at "go"; at " "; at "right"; at " "; at "and"; at " "; at "/"; at " "; at "or"; at " "; at "up" ], [ et "go" false false; et "right" false false; et "and" false false; et "/" false false; et "or" false false; et "up" false false ]) ; ([ at "go"; at " "; at "right"; at " "; at "and"; at " "; at "/"; at " "; at "or"; at " "; at "down" ], [ et "go" false false; et "right" false false; et "and" false false; et "/" false false; et "or" false false; et "down" false false ]) ]; test_exp "go ?(left | right) ( up | down )" [ ([ at "go"; at " "; at "up" ], [ et "go" false false; et "up" false false ]) ; ([ at "go"; at " "; at "down" ], [ et "go" false false; et "down" false false ]) ; ([ at "go"; at " "; at "left"; at " "; at "up" ], [ et "go" false false; et "left" false false; et "up" false false ]) ; ([ at "go"; at " "; at "left"; at " "; at "down" ], [ et "go" false false; et "left" false false; et "down" false false ]) ; ([ at "go"; at " "; at "right"; at " "; at "up" ], [ et "go" false false; et "right" false false; et "up" false false ]) ; ([ at "go"; at " "; at "right"; at " "; at "down" ], [ et "go" false false; et "right" false false; et "down" false false ]) ]; test_exp "go ?((left | right) or) ( up | down )" [ ([ at "go"; at " "; at "up" ], [ et "go" false false; et "up" false false ]) ; ([ at "go"; at " "; at "down" ], [ et "go" false false; et "down" false false ]) ; ([ at "go"; at " "; at "left"; at " "; at "or"; at " "; at "up" ], [ et "go" false false; et "left" false false; et "or" false false; et "up" false false ]) ; ([ at "go"; at " "; at "left"; at " "; at "or"; at " "; at "down" ], [ et "go" false false; et "left" false false; et "or" false false; et "down" false false ]) ; ([ at "go"; at " "; at "right"; at " "; at "or"; at " "; at "up" ], [ et "go" false false; et "right" false false; et "or" false false; et "up" false false ]) ; ([ at "go"; at " "; at "right"; at " "; at "or"; at " "; at "down" ], [ et "go" false false; et "right" false false; et "or" false false; et "down" false false ]) ]; test_exp "go ?(?(left | right) or) ( up | down )" [ ([ at "go"; at " "; at "up" ], [ et "go" false false; et "up" false false ]) ; ([ at "go"; at " "; at "down" ], [ et "go" false false; et "down" false false ]) ; ([ at "go"; at " "; at "or"; at " "; at "up" ], [ et "go" false false; et "or" false false; et "up" false false ]) ; ([ at "go"; at " "; at "or"; at " "; at "down" ], [ et "go" false false; et "or" false false; et "down" false false ]) ; ([ at "go"; at " "; at "left"; at " "; at "or"; at " "; at "up" ], [ et "go" false false; et "left" false false; et "or" false false; et "up" false false ]) ; ([ at "go"; at " "; at "left"; at " "; at "or"; at " "; at "down" ], [ et "go" false false; et "left" false false; et "or" false false; et "down" false false ]) ; ([ at "go"; at " "; at "right"; at " "; at "or"; at " "; at "up" ], [ et "go" false false; et "right" false false; et "or" false false; et "up" false false ]) ; ([ at "go"; at " "; at "right"; at " "; at "or"; at " "; at "down" ], [ et "go" false false; et "right" false false; et "or" false false; et "down" false false ]) ]; test_exp "go ?(?(left | right) or) or ( ?up | down )" [ ([ at "go"; at " "; at "or" ], [ et "go" false false; et "or" false false ]) ; ([ at "go"; at " "; at "or"; at " "; at "up" ], [ et "go" false false; et "or" false false; et "up" false false ]) ; ([ at "go"; at " "; at "or"; at " "; at "down" ], [ et "go" false false; et "or" false false; et "down" false false ]) ; ([ at "go"; at " "; at "or"; at " "; at "or" ], [ et "go" false false; et "or" false false; et "or" false false ]) ; ([ at "go"; at " "; at "or"; at " "; at "or"; at " "; at "up" ], [ et "go" false false; et "or" false false; et "or" false false; et "up" false false ]) ; ([ at "go"; at " "; at "or"; at " "; at "or"; at " "; at "down" ], [ et "go" false false; et "or" false false; et "or" false false; et "down" false false ]) ; ([ at "go"; at " "; at "left"; at " "; at "or"; at " "; at "or" ], [ et "go" false false; et "left" false false; et "or" false false; et "or" false false ]) ; ([ at "go"; at " "; at "left"; at " "; at "or"; at " "; at "or"; at " "; at "up" ], [ et "go" false false; et "left" false false; et "or" false false; et "or" false false; et "up" false false ]) ; ([ at "go"; at " "; at "left"; at " "; at "or"; at " "; at "or"; at " "; at "down" ], [ et "go" false false; et "left" false false; et "or" false false; et "or" false false; et "down" false false ]) ; ([ at "go"; at " "; at "right"; at " "; at "or"; at " "; at "or" ], [ et "go" false false; et "right" false false; et "or" false false; et "or" false false ]) ; ([ at "go"; at " "; at "right"; at " "; at "or"; at " "; at "or"; at " "; at "up" ], [ et "go" false false; et "right" false false; et "or" false false; et "or" false false; et "up" false false ]) ; ([ at "go"; at " "; at "right"; at " "; at "or"; at " "; at "or"; at " "; at "down" ], [ et "go" false false; et "right" false false; et "or" false false; et "or" false false; et "down" false false ]) ]; test_exp "- - -" [ ([ at "-"; at " "; at "-"; at " "; at "-" ], [ et "-" false false; et "-" false false; et "-" false false ]) ]; test_exp "-- -" [ ([ at "-"; at "-"; at " "; at "-" ], [ et "-" false true; et "-" true false; et "-" false false ]) ]; test_exp "\\'abcd" [ ([ at "'"; at "abcd" ], [ et "'" false true; et "abcd" true false ]) ]; test_exp "'abcd" [ ([ atm `Exact; at "abcd" ], [ et ~m:`Exact "abcd" false false ]) ]; test_exp "' abcd" [ ([ atm `Exact; at " "; at "abcd" ], [ et "'" false false; et "abcd" false false ]) ]; test_exp "' abcd" [ ([ at "'"; at " "; at "abcd" ], [ et "'" false false; et "abcd" false false ]) ]; test_exp "\\^abcd" [ ([ at "^"; at "abcd" ], [ et "^" false true; et "abcd" true false ]) ]; test_exp "^abcd" [ ([ atm `Prefix; at "abcd" ], [ et ~m:`Prefix "abcd" false false ]) ]; test_exp "^ abcd" [ ([ at "^"; at " "; at "abcd" ], [ et "^" false false; et "abcd" false false ]) ]; test_exp "^ abcd" [ ([ atm `Prefix; at " "; at "abcd" ], [ et "^" false false; et "abcd" false false ]) ]; test_exp "abcd$" [ ([ at "abcd"; atm `Suffix ], [ et ~m:`Suffix "abcd" false false ]) ]; test_exp "abcd $" [ ([ at "abcd"; at " "; at "$" ], [ et "abcd" false false; et "$" false false ]) ]; test_exp "abcd $" [ ([ at "abcd"; at " "; atm `Suffix ], [ et "abcd" false false; et "$" false false ]) ]; test_exp "''abcd" [ ([ atm `Exact; atm `Exact; at "abcd" ], [ et ~m:`Exact "'" false true; et ~m:`Exact "abcd" true false ]) ]; test_exp "^^abcd" [ ([ atm `Prefix; at "^"; at "abcd" ], [ et ~m:`Exact "^" false true; et ~m:`Prefix "abcd" true false ]) ]; test_exp "abcd$$" [ ([ at "abcd"; at "$"; atm `Suffix ], [ et ~m:`Suffix "abcd" false true; et ~m:`Exact "$" true false ]) ]; test_exp "abcd$$" [ ([ at "abcd"; atm `Suffix; atm `Suffix ], [ et ~m:`Suffix "abcd" false true; et ~m:`Exact "$" true false ]) ]; test_exp "'ab~cd" [ ([ atm `Exact; at "ab"; ats; at "cd" ], [ et ~m:`Exact "ab" false true ; ets ~m:`Exact true true ; et ~m:`Exact "cd" true false ]) ]; test_exp "^ab~cd$" [ ([ atm `Prefix; at "ab"; ats; at "cd"; atm `Suffix ], [ et ~m:`Exact "ab" false true ; ets ~m:`Exact true true ; et ~m:`Exact "cd" true true ; et ~m:`Prefix "$" true false ]) ]; test_exp "ab~cd$" [ ([ at "ab"; ats; at "cd"; atm `Suffix ], [ et ~m:`Suffix "ab" false true ; ets ~m:`Exact true true ; et ~m:`Exact "cd" true false ]) ]; test_exp " ~cd$" [ ([ ats; at "cd"; atm `Suffix ], [ ets ~m:`Suffix false true ; et ~m:`Exact "cd" true false ]) ]; test_exp "'^abcd efgh$$ ij$kl$" [ ([ atm `Exact ; atm `Prefix ; at "abcd" ; at " " ; at "efgh" ; atm `Suffix ; atm `Suffix ; at " " ; at "ij" ; atm `Suffix ; at "kl" ; atm `Suffix ], [ et ~m:`Exact "^" false true ; et ~m:`Exact "abcd" true false ; et ~m:`Suffix "efgh" false true ; et ~m:`Exact "$" true false ; et ~m:`Suffix "ij" false true ; et ~m:`Exact "$" true true ; et ~m:`Exact "kl" true false ]) ]; test_exp "'^abcd efgh$$ ij$kl$" [ ([ atm `Exact ; at "^" ; at "abcd" ; at " " ; at "efgh" ; at "$" ; atm `Suffix ; at " " ; at "ij" ; at "$" ; at "kl" ; atm `Suffix ], [ et ~m:`Exact "^" false true ; et ~m:`Exact "abcd" true false ; et ~m:`Suffix "efgh" false true ; et ~m:`Exact "$" true false ; et ~m:`Suffix "ij" false true ; et ~m:`Exact "$" true true ; et ~m:`Exact "kl" true false ]) ]; () let suite = [ Alcotest.test_case "corpus" `Quick corpus; ] end ================================================ FILE: tests/test_utils.ml ================================================ open Docfd_lib let enriched_token_testable : (module Alcotest.TESTABLE with type t = Search_phrase.Enriched_token.t) = (module Search_phrase.Enriched_token) let search_phrase_testable : (module Alcotest.TESTABLE with type t = Search_phrase.t) = (module Search_phrase) ================================================ FILE: tests/utils_tests.ml ================================================ open Docfd_lib module Alco = struct let normalize_path_to_absolute_corpus () = let test expected input = Alcotest.(check string) (Printf.sprintf "%s becomes %s" input expected) expected (Misc_utils'.normalize_path_to_absolute input) in let cwd = Sys.getcwd () in let test' expected input = test (if expected = "" then cwd else Filename.concat cwd expected) input in test "/" "/.."; test "/" "/.."; test "/" "/abcd/.."; test "/" "/abcd/.."; test "/abc" "/abc/"; test "/abc" "/abc/def/.."; test "/abc" "/abc//"; test "/abc/def" "/abc//def"; test "/abc/def" "/abc/./def"; test "/abc/def" "/abc/.///def/."; test "/def" "/abc/.//../def/."; test' "" "abc/.."; test' "def" "abc/../def"; test' "abc/def" "abc////.//./././/def"; () let normalize_glob_to_absolute_corpus () = let test expected input = Alcotest.(check string) (Printf.sprintf "%s becomes %s" input expected) expected (Misc_utils'.normalize_glob_to_absolute input) in let cwd = Sys.getcwd () in let test' expected input = test (if expected = "" then cwd else Filename.concat cwd expected) input in test "/" "/.."; test "/" "/.."; test "/" "/abcd/.."; test "/" "/abcd/.."; test "/abc" "/abc/"; test "/abc" "/abc/def/.."; test "/abc" "/abc/*/.."; test "/abc/**/.." "/abc/**/.."; test "/abc/**/def" "/abc/**/def"; test "/**/def/*/.." "/abc/../**/def/*/.."; test "/abc/**/def" "/abc/.////**/def"; test "/abc" "/abc//"; test "/abc/def" "/abc//def"; test "/abc/def" "/abc/./def"; test "/abc/def" "/abc/.///def/."; test "/def" "/abc/.//../def/."; test' "" "abc/.."; test' "def" "abc/../def"; test' "abc/def" "abc////.//./././/def"; test' "abc/**/../def" "abc/**/../def"; test' "abc/**/../def" "abc/*/../**/../def"; test' "abc/*/def" "abc/*/*/../def"; () let suite = [ Alcotest.test_case "normalize_path_to_absolute_corpus" `Quick normalize_path_to_absolute_corpus; Alcotest.test_case "normalize_glob_to_absolute_corpus" `Quick normalize_glob_to_absolute_corpus; ] end ================================================ FILE: update-version-string.py ================================================ import os import re ml_path = "bin/version_string.ml" version = os.environ.get('DOCFD_VERSION_OVERRIDE') if version is None or version == "": with open("CHANGELOG.md") as f: for line in f: if line.startswith("## ") and not ("future release" in line.lower()): version = line.split(" ")[1].strip() break print(f"Detected version for Docfd: {version}") print(f"Writing to {ml_path}") with open(ml_path, "w") as f: f.write(f"let s = \"{version}\"") f.write("\n")