Repository: darrenldl/docfd
Branch: main
Commit: 203de25de255
Files: 125
Total size: 618.6 KB
Directory structure:
gitextract_8iyswmwt/
├── .gitattributes
├── .github/
│ └── workflows/
│ └── deploy.yml
├── .gitignore
├── CHANGELOG.md
├── LICENSE
├── Makefile
├── README.md
├── bin/
│ ├── BLAKE2B.ml
│ ├── UI.ml
│ ├── UI_base.ml
│ ├── args.ml
│ ├── clipboard.ml
│ ├── command.ml
│ ├── content_and_search_result_rendering.ml
│ ├── debug_utils.ml
│ ├── docfd.ml
│ ├── document.ml
│ ├── document.mli
│ ├── document_pipeline.ml
│ ├── document_pipeline.mli
│ ├── document_src.ml
│ ├── dune
│ ├── file_utils.ml
│ ├── filter_exp.ml
│ ├── glob.ml
│ ├── glob.mli
│ ├── lock_protected_cell.ml
│ ├── lock_protected_cell.mli
│ ├── misc_utils.ml
│ ├── params.ml
│ ├── path_opening.ml
│ ├── ping.ml
│ ├── ping.mli
│ ├── printers.ml
│ ├── proc_utils.ml
│ ├── result_syntax.ml
│ ├── script.ml
│ ├── search_mode.ml
│ ├── session.ml
│ ├── session.mli
│ ├── session_manager.ml
│ ├── session_manager.mli
│ ├── string_utils.ml
│ ├── version_string.ml
│ └── xdg_utils.ml
├── containers/
│ ├── Containerfile.demo-vhs
│ └── Containerfile.docfd
├── demo-vhs-tapes/
│ ├── repo-non-interactive.tape
│ ├── repo.tape
│ └── ui-screenshot.tape
├── demo-vhs.sh
├── docfd.opam
├── docfd.opam.locked
├── docfd.opam.template
├── dune-project
├── file-collection-tests.t/
│ ├── dune
│ └── run.t
├── lib/
│ ├── GZIP.ml
│ ├── char_map.ml
│ ├── doc_id_db.ml
│ ├── doc_id_db.mli
│ ├── docfd_lib.ml
│ ├── dune
│ ├── index.ml
│ ├── index.mli
│ ├── int_map.ml
│ ├── int_set.ml
│ ├── link.ml
│ ├── misc_utils.ml
│ ├── option_syntax.ml
│ ├── params.ml
│ ├── parser_components.ml
│ ├── search_exp.ml
│ ├── search_exp.mli
│ ├── search_phrase.ml
│ ├── search_phrase.mli
│ ├── search_result.ml
│ ├── search_result.mli
│ ├── search_result_heap.ml
│ ├── sqlite3_utils.ml
│ ├── stop_signal.ml
│ ├── stop_signal.mli
│ ├── string_map.ml
│ ├── string_set.ml
│ ├── task_pool.ml
│ ├── task_pool.mli
│ ├── tokenization.ml
│ ├── word_db.ml
│ └── word_db.mli
├── line-wrapping-tests.t/
│ ├── dune
│ ├── long-words.txt
│ ├── run.t
│ ├── sentences.txt
│ └── words.txt
├── match-type-tests.t/
│ ├── dune
│ ├── run.t
│ └── test.txt
├── misc-behavior-tests.t/
│ ├── abcd.txt
│ ├── dune
│ └── run.t
├── non-interactive-mode-return-code-tests.t/
│ ├── dune
│ └── run.t
├── open-with-tests.t/
│ ├── dune
│ └── run.t
├── printing-tests.t/
│ ├── empty.txt
│ ├── run.t
│ ├── test0.txt
│ ├── test1.txt
│ ├── test2.txt
│ ├── test3.txt
│ └── test4.txt
├── profiling/
│ ├── dune
│ └── main.ml
├── publish.sh
├── run-container.sh
├── script-tests.t/
│ ├── dune
│ └── run.t
├── search-scope-narrowing-tests.t/
│ ├── dune
│ └── run.t
├── tests/
│ ├── dune
│ ├── main.ml
│ ├── search_exp_tests.ml
│ ├── test_utils.ml
│ └── utils_tests.ml
└── update-version-string.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitattributes
================================================
*.t/run.t linguist-vendored
================================================
FILE: .github/workflows/deploy.yml
================================================
name: Deploy on release
on:
push:
tags:
- "[0-9]*"
- "test*"
branches:
- "ci-test"
jobs:
build:
strategy:
fail-fast: false
matrix:
os:
- ubuntu-22.04
- ubuntu-22.04-arm
- macos-latest
runs-on: ${{ matrix.os }}
steps:
- name: Checkout code
uses: actions/checkout@v4
- run: echo "GITHUB_TAG=$(git describe --always --tags)" >> $GITHUB_ENV
- if: ${{ startsWith(matrix.os, 'ubuntu') && !endsWith(matrix.os, 'arm') }}
run: echo "OS_SHORT_NAME=linux" >> $GITHUB_ENV
- if: ${{ startsWith(matrix.os, 'ubuntu') && endsWith(matrix.os, 'arm') }}
run: echo "OS_SHORT_NAME=linux-arm" >> $GITHUB_ENV
- if: ${{ startsWith(matrix.os, 'macos') }}
run: echo "OS_SHORT_NAME=macos" >> $GITHUB_ENV
- if: ${{ startsWith(matrix.os, 'windows') }}
run: echo "OS_SHORT_NAME=windows" >> $GITHUB_ENV
- name: Set up OCaml for Linux
uses: ocaml/setup-ocaml@v3
with:
ocaml-compiler: "5.2.1"
- run: opam install dune
- run: opam install . --deps-only --with-test
- name: Use commit hash as version if on ci-test branch
if: ${{ github.ref_name == 'ci-test' }}
run: |
echo "DOCFD_VERSION_OVERRIDE=${{ env.GITHUB_TAG }}" >> $GITHUB_ENV
- name: Create build for macOS
if: ${{ env.OS_SHORT_NAME == 'macos' }}
run: |
export DOCFD_VERSION_OVERRIDE=${{ env.GITHUB_TAG }}
opam exec -- make release-build
- name: Create static build for Linux
if: ${{ env.OS_SHORT_NAME == 'linux' }}
run: |
export DOCFD_VERSION_OVERRIDE=${{ env.GITHUB_TAG }}
opam exec -- make release-static-build
- name: Create static build for Linux ARM
if: ${{ env.OS_SHORT_NAME == 'linux-arm' }}
run: |
export DOCFD_VERSION_OVERRIDE=${{ env.GITHUB_TAG }}
opam exec -- make release-static-build-arm
- name: Package into tar.gz
run: |
mv release/docfd docfd
tar -cvzf docfd-${{ env.GITHUB_TAG }}-${{ env.OS_SHORT_NAME }}.tar.gz docfd
- name: Upload artifacts
if: ${{ github.ref_name == 'ci-test' }}
uses: actions/upload-artifact@v4
with:
name: docfd-${{ env.GITHUB_TAG }}-${{ env.OS_SHORT_NAME }}.tar.gz
path: docfd-${{ env.GITHUB_TAG }}-${{ env.OS_SHORT_NAME }}.tar.gz
- name: Release
if: ${{ github.ref_name != 'ci-test' }}
uses: softprops/action-gh-release@v2
with:
files: |
docfd-${{ env.GITHUB_TAG }}-${{ env.OS_SHORT_NAME }}.tar.gz
- name: Release preview
if: ${{ github.ref_name == 'ci-test' }}
uses: softprops/action-gh-release@v2
with:
tag_name: preview
name: "Preview Build"
body: "Automated preview build from commit ${{ github.sha }}. This release is updated on every push to ci-test."
prerelease: true
files: |
docfd-${{ env.GITHUB_TAG }}-${{ env.OS_SHORT_NAME }}.tar.gz
================================================
FILE: .gitignore
================================================
_build/
_coverage/
.merlin
*.rst~
*.install
bisect*.out
bisect*.coverage
fuzz-*-input
fuzz-*-output
fuzz-logs/
/test*.md
/test*.txt
test*.pdf
test*.docx
*.tar.gz
/release/
perf.data*
*.pdf
.cache
/*.log
*.mp4
dummy.gif
*.db
*.db-journal
================================================
FILE: CHANGELOG.md
================================================
# Changelog
## 13.0.0
- Removed fzf dependency entirely
- Switched from fzf to a built-in implementation for fuzzy selection
menus
- Replaced "sort by fzf" functionality with built-in PATH-FUZZY-RANK mode
- Renamed OPEN-SCRIPT to SCRIPTS
- Moved DELETE-SCRIPT functionality to `Ctrl`+`X` under SCRIPTS
- Added `y` key binding to copy in LINKS
- Changed key binding for rotation of key binding info grid from `?` to `<` and `>`
- Added "focus content" via `z` which hides the bottom right pane
- Added key binding info pane toggle via `?`
- Made the UX of various input fields slightly more polished
- This includes adding `Esc` for cancelling during SAVE-SCRIPT
- Added request handling debouncing to reduce pointless workload triggered during
fast typing
- Improved link extraction
- Minor UI fixes and polishes
## 12.3.2
- Fixed missing stop signal passing for `content:"..."` filter expression handling
- Previously, filter expressions with `content:"..."` were not
cancelled properly
- Added additional guards against potential freezing due to DB pool
exhaustion or DB connection issues
## 12.3.1
- Fixed key binding info grid
- Added the missing `l` key binding info
- Fixed some other key binding labels
## 12.3.0
**Note**: This update contains a feature that requires removing the existing index DB to take effect.
- Docfd script comment support improvement
- Added `;` prefix for system comments, which are not preserved after editing of command history
- `#` now denotes user comment and is preserved after editing of command history
- Ordering of `#` is also preserved during saving a session as script
- Added link opening support via LINKS mode
- This will **only** work after recreating the index DB
- `l` opens LINKS mode with the same navigation keybinds
- `Enter` to open link
- `o` to open link and remain in LINKS
- Links which are closest to the selected search result will be prioritized first
- Added key binding `xh` to clear command history quickly
## 12.2.0
- Dependencies adjustment for CI build
- Internal refactoring
- Refactored document store module into session state module to
better reflect that the data structure is capturing not just
documents and search results, but also some UI states, etc
- Moved screen split handling to the level of session state instead of
plain UI state
- This allows Docfd script to better capture the view on screen
- Added `Shift`+`Tab` for changing screen split ratio in the other
direction, and removed the "rotating" behaviour of `Tab`
## 12.1.0
- Added missing sorting based on paths when path dates are the same
- Added PDF viewer integration for Zathura on Linux
- Moved sorting handling to the level of document store command instead of just plain UI update
- This allows Docfd script to better capture the view on screen
- Fixed default cache directory location on macOS
- Changed from `~/Library/Application Support` to `~/Library/Caches`
- Added `--reverse` to fzf invocation for better UX
- Added `Ctrl`+`D` for script deletion
## 12.0.0
This contains a **breaking** DB change, you will need to remove index DB generated by Docfd version prior to 12.0.0-alpha.13
#### Highlights of changes since 11.0.1
- Moved to using a global word table to reduce index DB size and speed up search (12.0.0-alpha.13)
- This is the **breaking** DB change stated above
- Added further search speed optimizations (12.0.0)
- Added an additional document pruning stage
- Added a first word candidate pruning stage based on length of the first search word
- Searching for short words should now feel much more responsive
- Replaced filter glob with a more powerful filter language, with
autocomplete in filter field (12.0.0-alpha.1, 12.0.0-alpha.2,
12.0.0-alpha.5, 12.0.0-alpha.6, 12.0.0-alpha.10, 12.0.0-alpha.11)
- Added content view pane scrolling (12.0.0-alpha.5, 12.0.0-alpha.8)
- Controlled by `-`/`=`
- Added "save script" and "load script" functionality to make it
actually viable to reuse Docfd commands (12.0.0-alpha.8,
12.0.0-alpha.9)
- SQL query optimizations for prefix and exact search terms
(12.0.0-alpha.3)
- Key binding info grid improvements (12.0.0-alpha.4)
- Added more key bindings
- Packed columns more tightly
- Added `--paths-from -` to accept list of paths from stdin
(12.0.0-alpha.3)
- Added WSL clipboard integration (12.0.0-alpha.4)
- Added more marking key bindings (12.0.0-alpha.4)
- `mark listed` (`ml`) marks all currently listed documents
- `unmark listed` (`Ml`) unmarks all currently listed documents
- `--open-with` placeholder handling fixes (12.0.0-alpha.4)
- Using `{page_num}` and `{line_num}` crashes in 11.0.1
when there are no search results
- Added sorting to document list (12.0.0-alpha.11, 12.0.0)
- `s` for sort ascending mode and `Shift+S` for sort descending mode
- Under the sort modes, the sort by types are as follows:
- `p` sort by path
- `d` sort by path date
- `s` sort by score
- `m` sort by modification time
- `f` sort by an interactive fzf search
- Selected option will be ranked the highest
- Rest of the documents will be ranked using the ranking from fzf
- Adjusted attributes listed in document list entry (12.0.0-alpha.11)
- Added path date
- Replaced last scan time with last modified time
- Reworked the internal architecture of document store snapshots
storage and management, which makes the overall interaction
between UI and core code much more robust (12.0.0-alpha.11)
#### Changes since 12.0.0-alpha.13
- Added further search speed optimizations:
- Added an additional document pruning stage
- Added a first word candidate pruning stage based on length of the first search word
- Searching for short words should now feel much more responsive
- Fixed interaction with fzf (which is used in some selection menus) on macOS
due to different behavior of `Unix.waitpid` on macOS compared to Linux
- Document sorting fine tuning
- Fixed document sorting fallback behavior
- If there is no search expression but sorting method chosen is to
sort by score, then sorting method falls back to the option
specified by `--sort-no-score`
- Fixed macOS detection
- Updated `--open-with` to accept a list of extensions, e.g.
`--open-with ts,js:detached="... {path}"`
- Added sort by fzf functionality
- Under sort mode
- `f` sort by an interactive fzf search
- Selected option will be ranked the highest
- Rest of the documents will be ranked using the ranking from fzf
## 12.0.0-alpha.13
- Moved to using a global word table to reduce index DB size and speed up search
- This is a **breaking** DB change, you will need to remove index DB generated by older versions of Docfd
- Added missing mutexes for caches, should further reduce random crashes
- Added more path date extraction formats
- `yyyy-mmm-dd`, `yyyy-mmmm-dd`, `dd-mmm-yyyy`, `dd-mmmm-yyyy`
- `-` is an optional separator that is not a digit and not a letter
## 12.0.0-alpha.12
- Made resetting of search result selection and content view offset less aggressive
- Some changes in 12.0.0-alpha.11 caused some UI counters to reset more frequently than desired
## 12.0.0-alpha.11
- Removed disabling of drop mode key binding `d` when searching or filtering is ongoing
- Fixed content view pane offset not resetting when mouse is used to scroll search result list
- Fixed content view pane staying small while scrolling up when the search result is close to the bottom of the file
- Swapped all mutexes to Eio mutexes to hopefully remove the very random freezes that occur quite rarely
- They feel like deadlocks due to mixing Eio mutexes
(which block fiber) and stdlib mutexes (which block an entire domain)
- Added sorting to document list
- `s` for sort ascending mode and `Shift+S` for sort descending mode
- Under the sort modes, the sort by types are as follows:
- `p` sort by path
- `d` sort by path date
- `s` sort by score
- `m` sort by modification time
- Added `--sort` and `--sort-no-score`
- Latter is mainly useful for when `--files-without-match` is used
- Added `yyyymmdd` path date extraction
- Added `mod-date` to filter language
- Adjusted attributes listed in document list entry
- Added path date
- Replaced last scan time with last modified time
- Reworked `--script` into `--script` and `--start-with-script`
- `--script` is now only for non-interactive use
- `--start-with-script` is only for interactive use
- This mirrors the duals `--filter` vs `--start-with-filter` and `--search` vs `--start-with-search`
- Reworked the internal architecture of document store snapshots storage and management
- Snapshots are now centrally managed by `Document_store_manager`, along with
improvements to snapshot handling logic in general
- This makes the overall interaction between UI and core code
much more robust, and eliminates random workarounds used to
deal with UI and data synchronization, which have
been riddled with random minor bugs
## 12.0.0-alpha.10
- Added basic autocomplete to filter field
- Improved script save autocomplete to insert longest common prefix
- Fixed script save autocomplete so it no longer erases original text when no recommendations are available
## 12.0.0-alpha.9
- Disabled `Tab` handling in edit fields to reduce friction in UX
- Added `nano`-style autocomplete to save commands field with listing of existing scripts
## 12.0.0-alpha.8
- Changed `--commands-from` to `--script`
- Added "save commands as script" and "load script" functionality to streamline reusing of commands
- Improved content view pane scrolling control
- The internal counter no longer scrolls past the limit
## 12.0.0-alpha.7
- Fixed interactive use of `--commands-from`
- Added `mark listed` and `unmark listed` to template command history file help info
## 12.0.0-alpha.6
- Fixed `not` operator parsing
- Previously `not ext:txt and not ext:md` would be parsed as `not (ext:txt and not ext:md)`, which is not what is typically expected
- `not` now binds tightly, so `not ext:txt and not ext:md` is parsed as `(not ext:txt) and (not ext:md)`
## 12.0.0-alpha.5
- Added content view pane scrolling
- Controlled by `-`/`=`
- Removed extraneous marking functionality
- `mark unlisted`
- `unmark unlisted`
- Added `"..."` as a shorthand to `content:"..."` to filter expression
- For example, `content:keyword AND path-date:>2025-01-01` can be written as `"keyword" AND path-date:>2025-01-01`
- The quotation is necessary to differentiate between typos
and actual query, otherwise incorrect input like
`pathfuzzy:...` would be parsed as content queries instead
## 12.0.0-alpha.4
- Added additional marking functionality
- `mark listed` (`ml`) marks all currently listed documents
- `mark unlisted` (`mL`) marks all currently unlisted documents
- `unmark listed` (`Ml`) unmarks all currently listed documents
- `unmark unlisted` (`ML`) unmarks all currently unlisted documents
- `unmark all` is moved to key binding `Ma`
- Reworked key binding info grid to pack columns more tightly
- Added WSL clipboard integration
- Minor fix in command history file template help text
- Added `Tab` key to key binding info grid
- Added key binding info about scrolling through document list and search result list
- Minor fix for `{line_num}` placeholder handling in `--open-with`
- This should always be usable for text files but previously
Docfd crashes when `{line_num}` is specified in `--open-with`
and user opens a text file when no search has been made
- This is fixed by defaulting `{line_num}` to 1 when
there are no search results present
- Minor fix for `{page_num}` and `{search_word}` placeholders handling in `--open-with`
- This should always be usable for PDF files but previously
Docfd crashes when `{page_num}` or `{search_word}` is specified in `--open-with`
and user opens a PDF file when no search has been made
- This is fixed by defaulting `{page_num}` to 1
and `{search_word}` to empty string when
there are no search results present
## 12.0.0-alpha.3
- **Users are advised to recreate the index DB**
- Adjusted SQL indices and swapped to specialized SQL queries
for exact and prefix search terms, e.g. `'hello`, `^worl`
- Handling of these terms is now 10-20% faster depending on the document
- Fixed command history recomputation not using the reloaded version
of document store
- This issue is most noticeable when you've edited a text file after hitting `Enter` in Docfd (after which Docfd reloads the file for you),
and you hit `h` to modify the command history
- The replaying of the command history would use the old copy of the file instead of the new edited version of the text file
- Added missing SQL transaction in code path for reloading a single document
- Previously, reloading a single document was incredibly slow, which was very noticeable if you edited a text file
after hitting `Enter` in Docfd, unless the text file was very small
- Updated `--paths-from` argument handling
- Added `--paths-from -` for accepting list of paths from stdin
- Adjusted to accept comma separated list of paths, e.g. `--paths-from path-list0.txt,path-list1.txt`
- Removed builtin piping to fzf triggered by providing `?` as a file path, e.g. `docfd ?`
- The `--paths-from -` handling makes this obsolete and a lot less flexible by comparison
- Fixed interaction between search and filter
- Previously, starting a search would incorrectly cancel an ongoing filtering operation.
Now only a new filtering operation can cancel an ongoing filtering operation.
A new search still cancels an ongoing search.
- Starting a new filtering operation also still cancels any ongoing search. This is fine since the search results
are refreshed after the filtering has been completed.
- The refreshing of the search results also means that the following sequences of events are still handled correctly,
namely they still arrive at the same normal form of the document store:
- Example 1:
- (0) Filter `f_exp0` (filtering is canceled by step (2), but the updating of filter expression is never canceled)
- (1) Search `s_exp0` (search is canceled by step (2), but the updating of search expression is never canceled)
- (2) Filter `f_exp1` (refreshes search results using `s_exp0`)
- Example 2:
- (0) Search `s_exp0` (search is canceled by step (1), but updating of search expression is never canceled)
- (1) Filter `f_exp0` (this stage is canceled by step (2),
either during the filtering or during the
refreshing of search results, but the updating
of filter expression is never canceled)
- (2) Filter `f_exp1` (refreshes search results using `s_exp0`)
- Renaming query expression/language to filter expression/language in help text and documentation
- Added a separate loading indicator for filter field
- Fixed concurrency issue where an update of document store may cause the
filter field and search field in UI to be out of sync with the actual
filter expression and search expression used by the underlying document store
- Suppose we have the following sequence of events:
- (0) Document store `store0` carries filter expression
`f_exp0` and search expression `s_exp0`, which we write
as pair `(f_exp0, s_exp0)`
- (1) User initiates filter/search operation by placing `(f_exp1, s_exp1)` into the input fields.
We name the document store resulting from this filter/search operation as `store1a`,
which carries `(f_exp1, s_exp1)` when finalized.
- (2) While filter/search operation is ongoing,
user drops a set of documents from the
current document store. Since `store1a` is not
finalized yet, the current document store is still `store0`, thus the new document store encoding the result of the drop operation, `store1b`, is computed from `store0` instead of `store1a`.
In other words, both `store1a` and `store1b` share
`store0` as their parent.
Note that `store1b` carries `(f_exp0, s_exp0)` as
inherited from `store0`,
since a drop operation does not alter the filter expression or search expression.
- (3) As a drop operation immediately updates the document store
and cancels ongoing filter/search operation, step (2) canceled the computation of `store1a`, and instead places `store1b` as the current document store.
- However, this means the input fields are `(f_exp1, s_exp1)`
while the current document store `store1b` actually carries
`(f_exp0, s_exp0)`.
The fix in this update is then to add an
extra "sync from input fields" step whenever a document store
is updated. To illustrate, we continue from the above
sequence of events, where the updated version of Docfd
carries out the following step missing from previous
versions.
- (4) Update input fields to `(f_exp0, s_exp0)`
- This addresses the mismatch between the underlying document store and the UI input fields.
- In practice this is very unlikely to occur with human input, as the modes that update document store
are disabled if document store manager is carrying out any ongoing filtering or search.
However, since the UI is async, there will be gaps in timing between UI input/feedback and actual updates of values,
opening up to TOCTOU problems.
So there is always a chance that a document store update will be requested before the modes are are disabled.
- Made interrupted filter/search operation to not yield a document store at all instead of yielding an empty document store
to simplify reasoning about filter/search cancellations and UI fields being in sync
## 12.0.0-alpha.2
- Added `path-date` clause to query expression
- This allows filtering based on date recognized from document path, for example, `path-date:>=2025-01-01 AND path-date:<2025-02-01`
would allow `/home/user/meeting-notes-2025-01-10.md` to pass through
- This gives a very lightweight method of attaching date information to any document
- See [relevant Wiki page](https://github.com/darrenldl/docfd/wiki/Document-filtering) for details
## 12.0.0-alpha.1
- Added a more powerful filter mode that replaces the filter glob mode and "pipe to fzf" feature
- Filter query mode uses a proper query language that supports file path globbing and file path fuzzy matching among other features
- This mode uses key binding `f`
- Removed `q` exit key binding to avoid accidental exiting
## 11.0.1
- Added better search cancellation handling, removing massive lags in some scenarios
## 11.0.0
- Minor fix for search scope narrowing logic:
- Search scope should also be set to empty if the document is not passing the file filter, not just when the search results are empty
- The old behavior can be confusing when a document passes an old file filter and thus has search results in memory,
but fail to pass a new file filter,
yet appears in later searches when file filter is reset
- It is simpler to make it so if a document is not listed for
whatever reason, search scope of that document just becomes
empty during narrowing
- Added missing commands in the list of possible commands in the command history file template
- `clear search`
- `clear filter`
- Minor breaking change, filter regex mode should have been called filter glob mode
- The key binding `fr` is changed to `fg`
- Changed UI text "File path filter" to "File path glob" to be more descriptive
## 10.2.0
- Added `--open-with` to allow customising the command used to open a file based on file extension
- Example: `--open-with pdf:detached='okular {path}'`
- Can be specified multiple times
- Added non-interactive use of `--commands-from`
- Non-interactive use can be triggered by pairing `--commands-from` with `-l`/`--files-with-match`
- Useful for advanced document management workflow
- Adjustments to search scope narrowing
- Added `narrow level: 0` for resetting the search scopes of
all documents back to full
- Narrowing now no longer drops unlisted document, so the
previous set of documents remain accessible for later
searches after resetting the search scopes
- Reworked search into multi-stage pipeline
- This improves the search speed by around 30%
- The core search procedure was reworked into an API that
generates grouped search jobs which can be easily distributed
to threads.
This gives a better workload distribution than the current
multithreading approach.
## 10.1.3
- Minor fixes
- "Reload document" now removes the document if the document is no longer accessible
- Docfd now only checks the existence of directly specified files
at launch, e.g. `file.txt` in `docfd file.txt`. This means
"reload all documents" now does not error out due to files becoming
no longer accessible.
## 10.1.2
- Minor fix for "reload all doucments" when fzf was used to pick documents initially, i.e. `docfd [PATH]... ?`, or any variation where `?` appears anywhere in the path list
- Under this workflow, later "reload all" should use the same selection
instead of having the user select again in fzf, which is cumbersome
- Now Docfd correctly reuses the selection when "reload all" is requested,
if fzf was used initially to pick documents
- This does technically mean the functionality is now less flexible,
since if `docfd ?` alike is used, "reload all" no longer discovers
new files
- But the convenience from reusing the selection outweighs the flexibility
in practically all use cases from author's experience
## 10.1.1
- Minor fix for "filter files via fzf" functionality
- Previously, if instead of making a selection,
the user quits fzf (e.g. pressing `Ctrl`+`C`, `Ctrl`+`Q`),
Docfd also closes with it
- Now Docfd just discards the interaction and goes back to the main UI
## 10.1.0
- Added back index DB entry pruning
- Previously missing after swapping to SQLite DB
- Also renamed `--cache-soft-limit` to `--cache-limit` to
reflect the new pruning logic
- Fixes [issue #12](https://github.com/darrenldl/docfd/issues/12)
- Swapped to a better `doc_id` allocation strategy to minimise
`doc_id` size in DB
- Added blinking when drop mode is disabled but `d` is pressed
## 10.0.0
- Reworked document indexing into a multi-stage pipeline
- This significantly improves the indexing throughput by allowing
I/O tasks and computational tasks to run concurrently
- See [issue #11](https://github.com/darrenldl/docfd/issues/11)
- **Breaking** changes in index DB design - index DBs made by previous version
of Docfd are not compatible
- Optimized DB design, on average the index DB is roughly 60% smaller
compared to Docfd 9.0.0 index DB
- See [issue #11](https://github.com/darrenldl/docfd/issues/11)
- Added functionality to filter files via fzf
- This is grouped under filter mode. The previous filter mode
is renamed to filter regex mode.
- `f` enters filter mode
- `f` again activates filter files via fzf functionality
- `r` activates the filter regex mode, which was previously
just called the filter mode
- Fixed incomplete search results when file path filter field is updated while
search is ongoing
- Updating file path filter always cancels the current search (if there is one)
and start a new search after the filter is in place
- Previously, documents with partial search results due to cancellation
are kept
- Docfd now discards said documents, forcing the new search to complete the
search results of these documents
- Removed `--no-cache` flag
- Previously was unused completey
- It is difficult to share an in-memory SQlite DB
between threads, so discarding this flag entirely
- See [issue #11](https://github.com/darrenldl/docfd/issues/11)
- Swapped to using proper unicode segmentation for tokenisation
- This should reduce the index size for Western non-English languages
significantly
- Added screen split ratios for hiding left or right pane completely
- Minor UI/UX fixes
- Drop mode is now disabled when search is still ongoing or when either search field or filter field has an error
- Added missing update of search and filter status when undoing/redoing, or when replaying command history
- This is most noticeable when the status indicates an error, but undoing does not return it to OK
## 9.0.0
- Swapped over to using SQLite for index
- Memory usage is much slimmer/stays flat
- For the sample of 1.4GB worth of PDFs used, after indexing, 9.0.0-rc1 uses
1.9GB of memory, while 9.0.0-rc2 uses 39MB
- Search is a bit slower
- Added token length limit of 500 bytes to accommodate word table limit in index DB
- This means during indexing, if Docfd encounters a very long token,
e.g. serial number, long hex string, it will be split into chunks of
up to 500 bytes
- Added `Ctrl`+`C` exit key binding to key binding info on screen
- Updated exit keys
- To exit Docfd: `q`, `Ctrl`+`Q` or `Ctrl`+`C`
- To exit other modes: `Esc`
- Now defaults to not scanning hidden files and directories
- This behaviour is now enabled via the `--hidden` flag
- Changed to allow `--add-exts` and `--single-line-add-exts` to be specified multiple times
- Changed return code to be 1 when there are no results for `--sample` or `--search`
- Added `--no-pdftotext` and `--no-pandoc` flags
- Docfd also notes the presence of these flags in error message if there
are PDF files but no pdftotext command is available, and same with files
relying on pandoc
- Renamed `drop path` command to just `drop`
- Added drop unselected key binding, and the associated command `drop all except`
- Various key binding help info grid adjustments
## 9.0.0-rc1
- Changed default cache size from 100 to 10000
- Index after compression doesn't take up that much space, and storage is
generally cheap enough these days
- Adjusted cache eviction behaviour to be less strict on when eviction happens
and thus less expensive
- Renamed `--cache-size` to `--cache-soft-limit`
- Removed periodic GC compact call to avoid freezes when working with many
files
- Removed GC compact call during file indexing core loops to reduce overhead
- Added progress bars to initial document processing stage
- Swapped to using C backend for BLAKE2B hashing, this gives >20x speedup depending on CPU
- Swapped from JSON+GZIP to CBOR+GZIP serialization for indices
- Changed help info rotation key from `h` to `?`
- Renamed discard mode to drop mode
- Added command history editing functionality
- Added `--commands-from` command line argument
- Added `--tokens-per-search-scope-level` command line argument
- Concurrency related bug fixes
- Unlikely to encounter in normal workflows with human input speed
- https://github.com/darrenldl/docfd/commit/14fcc45b746e6156f29eb989d70700476977a3d7
- https://github.com/darrenldl/docfd/commit/bfd63d93562f8785ecad8152005aa0f823185699
- https://github.com/darrenldl/docfd/commit/4e0aa6785ce80630d0cd3cda6e316b7b15a4fb4b
- Replaced print mode with copy mode
- Replaced single file view with key binding to change screen split ratio
to remove feature discrepencies
- Added narrow mode for search scope narrowing
- Renamed `--index-chunk-token-count` to `--index-chunk-size`
- Renamed `--sample-count-per-doc` to `--samples-per-doc`
## 8.0.3
- Fixed single file view crash
## 8.0.2
- Reworked asynchronous search/filter UI code to avoid noticeable lag due to
waiting for cancellations that take too long
- Previously there was still a lockstep somewhere that would prevent UI
from progressing if previous search was still being canceled
- The current implementation allows newest requests to override older
requests entirely, and not wait for cancellations at all
- Adjusted document counter in multi-file view to be visible even when no files
are listed
## 8.0.1
- Fixed missing file path filter field update when undoing or redoing document
store updates
- Fixed case insensitive marker handling in glob command line arguments
## 8.0.0
- Removed `--markdown-headings atx` from pandoc commandline
arguments
- Removed `Alt`+`U` undo key binding
- Removed `Alt`+`E` redo key binding
- Removed `Ctrl`+`Q` exit key binding
- Added documentation for undo, redo key bindings
- Added clear mode and moved clear search field key binding
under this mode for multi-file view
- Added file path filtering functionality to multi-file view
## 7.1.0
- Added initial macOS support
- Likely to have bugs, but will need macOS users to report back
- Major speedup from letting `pdftotext` output everything in one pass and split
on Docfd side instead of asking `pdftotext` to output one page per invocation
- For very large PDFs the indexing used to take minutes but now only takes
seconds
- Page count may be inaccurate if the PDF page contains form feed character
itself (not fully sure if `pdftotext` filters the form feed character from
content), but should be rare
- Significant reduction of index file size by adding GZIP
compression to the index JSON
## 7.0.0
- Added discard mode to multi-file view
- Changed to using thin bars as pane separators, i.e. tmux style
- Added `g` and `G` key bindings for going to top and bottom of document list respectively
- Added `-l`/`--files-with-match` and `--files-without-match` for printing just paths
in non-interactive mode
- Grouped print key bindings under print mode
- Added more print key bindings
- Grouped reload key bindings under reload mode
- Added fixes to ensure Docfd does not exit until all printing is done
- Slimmed down memory usage by switching to OCaml 5.2 which enables use of `Gc.compact`
- Still no auto-compaction yet, however, will need to wait for a future
OCaml release
- Added `h` key binding to rotate key binding info grid
- Added exact, prefix and suffix search syntax from fzf
- Fixed extraneous document path print in non-interactive mode when documents have no search results
- Added "explicit spaces" token `~` to match spaces
## 6.0.1
- Fixed random UI freezes when updating search field
- This is due to a race condition in the search cancellation mechanism that
may cause UI fiber to starve and wait forever for a cancellation
acknowledgement
- This mechanism was put in place for asynchronous search since 4.0.0
- As usual with race conditions, this only manifests under some specific
timing by chance
## 6.0.0
- Fixed help message of `--max-linked-token-search-dist`
- Fixed search result printing where output gets chopped off if terminal width is too small
- Added smart additional line grabbing for search result printing
- `--search-result-print-snippet-min-size N`
- If the search result to be printed has fewer than `N` non-space tokens,
then Docfd tries to add surrounding lines to the snippet
to give better context.
- `--search-result-print-snippet-max-add-lines`
- Controls maximum number of surrounding lines that can be added in each direction.
- Added search result underlining when output is not a terminal,
e.g. redirected to file, piped to another command
- Changed `--search` to show all search results
- Added `--sample` that uses `--search` previous behavior where (by default)
only a handful of top search results are picked for each document
- Changed `--search-result-count-per-doc` to `--sample-count-per-doc`
- Added `--color` and `--underline` for controlling behavior of search result
printing, they can take one of:
- `never`
- `always`
- `auto`
- Removed blinking for `Tab` key presses
## 5.1.0
- Fixed help message of `--max-token-search-dist`
- Adjusted path display in UI to hide current working directory segment when
applicable
- Added missing blinking for `Tab` key presses
## 5.0.0
- Added file globbing support in the form of `--glob` argument
- Added single line search mode arguments
- `--single-line-exts`
- `--single-line-add-exts`
- `--single-line-glob`
- `--single-line`
- Fixed crash on empty file
- This was due to assertion failure of `max_line_num` in
`Content_and_search_result_render.content_snippet`
- Changed search result printing via `Shift+P` and `p` within TUI to not exit
after printing, allowing printing of more results
- Added blinking to key binding info grid to give better visual feedback,
especially for the new behavior of search result printing
- Changed to allow `--paths-from` to be specified multiple times
- Fixed handling of `.htm` files
- `htm` is not a valid value for pandoc's `--format` argument
- Now it is rewritten to `html` before being passed to pandoc
- Changed `--max-depth`:
- Changed default from 10 to 100
- Changed to accept 0
## 4.0.0
- Made document search asynchronous to search field input, so UI remains
smooth even if search is slow
- Added status to search bar:
- `OK` means Docfd is idling
- `...` means Docfd is searching
- `ERR` means Docfd failed to parse the search expression
- Added search cancellation. Triggered by editing or clearing search field.
- Added dynamic search distance adjustment based on notion of linked tokens
- Two tokens are linked if there is no space between them,
e.g. `-` and `>` are linked in `->`, but not in `- >`
- Replaced `word` with `token` in the following options for consistency
- `--max-word-search-dist`
- `--index-chunk-word-count`
- Replaced `word` with `token` in user-facing text
## 3.0.0
- Fixed crash from search result snippet being bigger the content view pane
- Crash was from `Content_and_search_result_render.color_word_image_grid`
- Added key bindings
- `p`: exit and print search result to stderr
- `Shift+P`: exit and print file path to stderr
- Changed `--debug-log -` to use stderr instead of stdout
- Added non-interactive search mode where search results are printed to stdout
- `--search EXP` invokes non-interactive search mode with search expression `EXP`
- `--search-result-count-per-document` sets the number of top search results printed per document
- `--search-result-print-text-width` sets the text width to use when printing
- Added `--start-with-search` to prefill the search field in interactive mode
- Removed content requirement expression from multi-file view
- Originally designed for file filtering, but I have almost never used
it since its addition in 1.0.0
- Added word based line wrapping to following components of document list in multi-file view
- Document title
- Document path
- Document content preview
- Added word breaking in word based line wrapping logic so all of the original characters
are displayed even when the terminal width is very small or when a word/token is very long
- Added `--paths-from` to specify a file containing list of paths to (also) be scanned
- Fixed search result centering in presence of line wrapping
- Renamed `--max-fuzzy-edit` to `--max-fuzzy-edit-dist` for consistency
- Changed error messages to not be capitalized to follow Rust's and Go's
guidelines on error messages
- Added fallback rendering text so Docfd does not crash from trying
to render invalid text.
- Added pandoc integration
- Changed the logic of determining when to use stdin as document source
- Now if any paths are specified, stdin is ignored
- This change mostly came from Dune's cram test mechanism
not providing a tty to stdin, so previously Docfd would keep
trying to source from stdin even when explicit paths are provided
## 2.2.0
- Restored behaviour of skipping file extension checks for top-level
user specified files. This behaviour was likely removed during some
previous overhaul.
- This means, for instance, `docfd bin/docfd.ml` will now open the file
just fine without `--add-exts ml`
- Bumped default max word search distance from 20 to 50
- Added consideration for balanced opening closing symbols in search result ranking
- Namely symbol pairs: `()`, `[]`, `{}`
- Fixed crash from reading from stdin
- This was caused by calling `Notty_unix.Term.release` after closing the underlying
file descriptor in stdin input mode
- Added back handling of optional operator `?` in search expression
- Added test corpus to check translation of search expression to search phrases
## 2.1.0
- Added text editor integration for `jed`/`xjed`
- See [PR #3](https://github.com/darrenldl/docfd/pull/3)
by [kseistrup](https://github.com/kseistrup)
## 2.0.0
- Added "Last scan" field display to multi-file view and single file view
- Reduced screen flashing by only recreating `Notty_unix.Term.t` when needed
- Added code to recursively mkdir cache directory if needed
- Search procedure parameter tuning
- UI tuning
- Added search expression support
- Adjusted quit key bindings to be: `Esc`, `Ctrl+C`, and `Ctrl+Q`
- Added file selection support via `fzf`
## 1.9.0
- Added PDF viewer integration for:
- okular
- evince
- xreader
- atril
- mupdf
- Fixed change in terminal behavior after invoking text editor
by recreating `Notty_unix.Term.t`
- Fixed file auto-reloading to apply to all file types instead of
just text files
## 1.8.0
- Swapped to using Nottui at [a337a77](https://github.com/let-def/lwd/commit/a337a778001e6c1dbaed7e758c9e05f300abd388)
which fixes event handling, and pasting into edit field works correctly as a result
- Caching is now disabled if number of documents exceeds cache size
- Moved index cache to `XDG_CACHE_HOME/docfd`, which overall
defaults to `$HOME/.cache/docfd`
- Added cache related arguments
- `--cache-dir`
- `--cache-size`
- `--no-cache`
- Fixed search result centering in content view pane
- Changed `--debug` to `--debug-log` to support outputting debug log to a file
- Fixed file opening failure due to exhausting file descriptors
- This was caused by not bounding the number of concurrent fibers when loading files
via `Document.of_path` in `Eio.Fiber.List.filter_map`
- Added `--index-only` flag
- Fixed document rescanning in multi-file view
## 1.7.3
- Fixed crash from using mouse scrolling in multi-file view
- The mouse handler did not reset the search result selected
when selecting a different document
- This leads to out of bound access if the newly selected document
does not have enough search results
## 1.7.2
- Fixed content pane sometimes not showing all the lines
depending on terminal size and width of lines
- Made chunk size dynamic for parallel search
## 1.7.1
- Parallelization fine-tuning
## 1.7.0
- Added back parallel search
- General optimizations
- Added index file rotation
## 1.6.3
- Further underestimate space available for the purpose of line wrapping
## 1.6.2
- Fixed line wrapping
## 1.6.1
- Fixed line wrapping
## 1.6.0
- Docfd now saves stdin into a tmp file before processing
to allow opening in text editor
- Added `--add-exts` argument for additional file extensions
- Added real-time response to terminal size changes
## 1.5.3
- Updated key binding info pane of multi-file view
## 1.5.2
- Added line number into search result ranking consideration
## 1.5.1
- Tuned search procedure and search result ranking
- Made substring bidirectional matching differently weighted based
on direction
- Made reverse substring match require at least 3 characters
- Case-sensitive bonus only applies if search phrase
is not all ascii lowercase
## 1.5.0
- Made substring matching bidirectional
- Tuned search result ranking
## 1.4.0
- Moved reading of environment variables `VISUAL` and `EDITOR` to program start
- Performance tuning
- Increased cache size for search phrase automata
## 1.3.4
- Added dispatching of search to task pool at file granularity
## 1.3.3
- Performance tuning
- Switched back to using the old default max word search distance of 20
- Reduced default max fuzzy edit distance from 3 to 2 to prevent massive
slowdown on long words
## 1.3.2
- Performance tuning
- Added caching to search phrase automata construction
- Removed dispatching of search to task pool
- Adjusted search result limits
## 1.3.1
- Added more commandline argument error checking
- Adjusted help messages
- Adjusted max word search range calculation
- Renamed `max-word-search-range` to `max-word-search-dist`
## 1.3.0
- Index data structure optimizations
- Search procedure optimizations
## 1.2.2
- Fixed editor recognition for kakoune
## 1.2.1
- Fixed search results when multiple words are involved
## 1.2.0
- Removed UI components for search cancellation
- Added real time refresh of search
- Added code to open selected text file to selected search result for:
- nano
- neovim/vim/vi
- helix
- kakoune
- emacs
- micro
- Added "rescan for documents" to multi-file view
## 1.1.1
- Fixed releasing Notty terminal too early
## 1.1.0
- Added index saving and loading
- Added search cancellation
## 1.0.2
- Fixed file tree scan
## 1.0.1
- Minor UI tweaks
## 1.0.0
- Added expression language for file filtering in multi-file view
- Adjusted default file tree depth
- Added `--exts` argument for configuring file extensions recognized
- Fixed parameters passing from binary to library
## 0.9.0
- Added PDF search support via `pdftotext`
- Added UTF-8 support
## 0.8.6
- Minor wording fix
## 0.8.5
- Added check to skip re-searching if search phrase is equivalent to the previous one
## 0.8.4
- Index data structure optimization
- Code cleanup
## 0.8.3
- Optimized multi-file view reload so it does not redo the search over all documents
- Implemented a proper document store
## 0.8.2
- Fixed single file view document reloading not refreshing search results
## 0.8.1
- Replaced shared data structures with multicore safe versions
- Fixed work partitioning for parallel indexing
## 0.8.0
- Added multicore support for indexing and searching
## 0.7.4
- Fixed crashing and incorrect rendering in some cases of files with blank lines
- This is due to `Index.line_count` being incorrectly calculated
- Added auto refresh on change of file
- Change detection is based on file modification time
- Added reload file via `r` key
## 0.7.3
- Bumped the default word search range from 15 to 40
- Since spaces are also counted as words in the index,
15 doesn't actually give a lot of range
- Added minor optimization to search
## 0.7.2
- Code refactoring
## 0.7.1
- Delayed `Nottui_unix` term creation so pre TUI
printing like `--version` would work
- Added back mouse scrolling support
- Added Page Up and Page Down keys support
## 0.7.0
- Fixed indexing bug
- Added UI mode switch
- Adjusted status bar to show current file name in single file mode
- Adjusted content view to track search result
- Added content view to single file mode
## 0.6.3
- Adjusted status bar to not display index of document selected
when in single document mode
- Edited debug message a bit
## 0.6.2
- Fixed typo in error message
## 0.6.1
- Added check of whether provided files exist
## 0.6.0
- Upgraded status bar and help text/key binding info
## 0.5.9
- Changed help text to status bar + help text
## 0.5.8
- Fixed debug print of file paths
- Tuned UI text slightly
## 0.5.7
- Changed word db to do global word recording to further reduce memory footprint
## 0.5.6
- Optimized overall memory footprint
- Content index memory usage
- Switched to using content index to render content
lines instead of storing file lines again after indexing
## 0.5.5
- Fixed weighing of fuzzy matches
- Fixed bug in scoring of substring matches
## 0.5.4
- Fixed handling of search phrase with uppercase characters
- Prioritized search results that match the case
## 0.5.3
- Cleaned up code
## 0.5.2
- Cleaned up code
## 0.5.1
- Cleaned up code and debug info print a bit
## 0.5.0
- Removed tags handling
- Added stdin piping support
## 0.4.1
- Tuning content search result scoring
## 0.4.0
- Improved content search result scoring
- Added limit on content search results to consider to avoid
slowdown
- General optimizations
## 0.3.3
- Fixed crash due to not resetting content search result selection
when changing document selection
## 0.3.2
- Fixed internal line numbering, but displayed line numbering
still begins at 1
## 0.3.1
- Adjusted line number to begin at 1
## 0.3.0
- Adjusted colouring
## 0.2.9
- Fixed word position tracking in content indexing
## 0.2.8
- Fixed content indexing
## 0.2.7
- Changed to vim style highlighting for content search results
- Color adjustments in general
## 0.2.6
- Added single file UI mode
- Added support for specifying multiple files in command line
## 0.2.5
- Added limit to word search range of each step in content search
- This speeds up usual search while giving good enough results,
and prevents search from becoming very slow in large documents
## 0.2.4
- Adjusted displayed document list size
- Updated style of document list view
## 0.2.3
- Added sanitization to file view text
- Docfd now accepts file being passed as argument
## 0.2.2
- Fixed tokenization of user provided content search input
- Fixed content indexing to not include spaces
## 0.2.1
- Optimized file discovery procedure
- Added `--max-depth` option to limit scanning depth
- Added content search results view
- Adjusted tokenization procedure
## 0.2.0
- Switched to interactive TUI
- Renamed to Docfd
## 0.1.6
- Optimized parsing code slightly
## 0.1.5
- Adjusted parsing code slightly
## 0.1.4
- Adjusted `--tags` and `--ltags` output slightly
## 0.1.3
- Upgraded `--tags` and `--ltags` output to be more human readable
when output is terminal
- Changed behavior to output each tag in individual line when output
is not terminal
## 0.1.2
- Fixed output text when output is not terminal
## 0.1.1
- Fixed checking of whether output is terminal
## 0.1.0
- Flipped output positions of file path and tags
## 0.0.9
- Notefd now adds color to title and matching tags if output is terminal
- Improved fuzzy search index building
## 0.0.8
- Code cleanup
## 0.0.7
- Made file recognition more lenient
- Added support for alternative tag section syntax
- `| ... |`
- `@ ... @`
## 0.0.6
- Fixed Notefd to only handle consecutive tag sections
## 0.0.5
- Added `--tags` and `--ltags` flags
- Adjusted parsing to allow multiple tag sections
## 0.0.4
- Fixed tag extraction
## 0.0.3
- Made header extraction more robust to files with very long lines
## 0.0.2
- Added `-s` for case-insensitive substring tag match
- Renamed `-p` to `-e` for exact tag match
## 0.0.1
- Base version
================================================
FILE: LICENSE
================================================
MIT License
Copyright (c) 2022 Di Long Li
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================
FILE: Makefile
================================================
SRCFILES = lib/*.ml lib/*.mli bin/*.ml bin/*.mli profiling/*.ml tests/*.ml
OCPINDENT = ocp-indent \
--inplace \
$(SRCFILES)
.PHONY: all
all :
python3 update-version-string.py
dune build @all
.PHONY: podman-build
podman-build:
podman build --format docker -t localhost/docfd -f containers/Containerfile.docfd .
.PHONY: podman-build-demo-vhs
podman-build-demo-vhs:
podman build --format docker -t localhost/docfd-demo-vhs -f containers/Containerfile.demo-vhs .
.PHONY: lock
lock:
opam lock .
.PHONY: release-build
release-build :
python3 update-version-string.py
dune build --release bin/docfd.exe
mkdir -p release
cp -f _build/default/bin/docfd.exe release/docfd
chmod 755 release/docfd
.PHONY: release-static-build
release-static-build :
python3 update-version-string.py
OCAMLPARAM='_,ccopt=-static' dune build --release bin/docfd.exe
mkdir -p release
cp -f _build/default/bin/docfd.exe release/docfd
chmod 755 release/docfd
.PHONY: release-static-build-arm
release-static-build-arm :
python3 update-version-string.py
OCAMLPARAM='_,ccopt=-static,fPIC' dune build --release bin/docfd.exe
mkdir -p release
cp -f _build/default/bin/docfd.exe release/docfd
chmod 755 release/docfd
.PHONY: tests
tests :
# Cleaning and rebuilding here to make sure cram tests actually use a recent binary,
# since Dune (as of 3.14.0) doesn't trigger rebuild of binary when
# invoking cram tests, even if the source code has changed.
make clean
make
OCAMLRUNPARAM=b dune exec tests/main.exe --no-buffer --force
dune build @file-collection-tests
dune build @line-wrapping-tests
dune build @misc-behavior-tests
dune build @printing-tests
dune build @match-type-tests
dune build @open-with-tests
dune build @non-interactive-mode-return-code-tests
dune build @search-scope-narrowing-tests
dune build @script-tests
.PHONY: demo-vhs
demo-vhs :
for file in demo-vhs-tapes/*; do ./demo-vhs.sh $$file; done
rm dummy.gif
.PHONY: profile
profile :
OCAMLPARAM='_,ccopt=-static' dune build --release profiling/main.exe
.PHONY: format
format :
$(OCPINDENT)
.PHONY : clean
clean:
dune clean
================================================
FILE: README.md
================================================
# Docfd
[Online Demo](https://demo.docfd.sh)
TUI multiline fuzzy document finder
Think interactive grep for text files, PDFs, DOCXs, etc,
but word/token based instead of regex and line based,
so you can search across lines easily.
Docfd aims to provide good UX via integration with common text editors
and PDF viewers,
so you can jump directly to a search result with a single key press.
---
Interactive use

Non-interactive use

## Features
- Multithreaded indexing and searching
- Multiline fuzzy search of multiple files
- Content view pane that shows the snippet surrounding the search result selected
- Text editor and PDF viewer integration
- Editable command history - rewrite/plan your actions in text editor
- Search scope narrowing - limit scope of next search based on current search results
- Clipboard integration
## Why Docfd might be for you
You want a standalone, offline TUI search tool that
allows you to immediately start searching without any complicated setup.
Docfd only starts processing the current directory or
specified directories/files upon start.
Hashing is used to pick out files that have not been indexed yet.
There is no need to wait for a background indexer to refresh
before you get up-to-date results.
You don't want to move everything into a central storage, and want to just keep your current folder structure
There are no strings attached with using Docfd.
Docfd does not require you to import your files into
any special storage system, so you can continue mix and match
tools to best handle your files.
You want to script or record your search
Docfd comes with a simple scripting language,
which is already used to capture your actions in the TUI.
Finally found what you need after many steps?
Save the session as a script with `Ctrl`+`S`!
Then open it next time with `Ctrl`+`O`.
## Why Docfd might not be for you
Docfd is not all-encompassing
Docfd does not try to be a full blown document management system such as Paperless-ngx.
While there may be significant overlaps in terms of the search functionality, Docfd will fall short for almost any other kind of features, such as storage management, tagging, web interface, OCR, email ingestion.
Docfd is not a "proper" search engine
Docfd is a search engine in the sense that it uses the same
fundamental principles, i.e. inverted indices, but it lacks features
that you would expect from a "proper" search engine solution, e.g.
[Apache Lucene](https://lucene.apache.org/),
[Tantivy](https://github.com/quickwit-oss/tantivy),
[Lnx](https://github.com/lnx-search/lnx).
Here are some of the fundamental features which I think are crucial to a proper search engine, but Docfd lacks:
- You cannot customize what are indexed by Docfd
- You cannot add a new type of ranking
- Docfd lacks support for languages other than English
- Docfd does not scale very well to very large quantity of documents
- Search should still be serviceable when you reach beyond, say, 10k documents, but it will be noticeably more sluggish
Some of these shortcomings are fundamental to the goals of Docfd. For instance,
Docfd is primarily a standalone desktop TUI tool with quick startup and should not impact other desktop applications.
As such, some performance related engineering choices typical for a proper search engine
are difficult to accommodate as they require longer startup and significantly more memory usage.
Other shortcomings are due to limited time and limited return on efforts - if one is to push Docfd so much to reach the feature parity
and performance of a proper search engine, then one might as well just use an existing search engine to begin with.
If your notes are consistently very short, and you only want to do simple searches, then there are better options
If you follow note taking methodologies such as Zettelkasten, where each note consists of very few lines, then using a combination of grep and file preview tool can yield a much faster search experience by skipping out on indexing and consideration of word proximity.
Docfd does not "stream" its search results
One user feedback received was that searching felt slow when Docfd is still conducting the search as UI is not updated result by result. By comparison, fzf felt faster as results start to immediately pop into the screen.
It is fundamentally more difficult to implement this streaming behavior nicely in Docfd, as Docfd operates with snapshots in mind (e.g. allowing you to undo/redo commands), while fzf does not. More specifically, it is much easier to wait for all search results to be ready, and finalize as a snapshot before presenting onto Docfd UI.
So while possible to implement in Docfd, it is unclear if the effort is worthwhile with the additional system complexity in mind.
## Installation
Statically linked binaries for Linux and macOS are available via
[GitHub releases](https://github.com/darrenldl/docfd/releases).
Docfd is also packaged on the following platforms for Linux:
- [opam](https://ocaml.org/p/docfd/latest)
- [AUR](https://aur.archlinux.org/packages/docfd-bin) (as `docfd-bin`)
- First packaged by [@kseistrup](https://github.com/kseistrup), now maintained by Dominiquini
- Nix (as `docfd`)
- Packaged by [@chewblacka](https://github.com/chewblacka)
The only way to use Docfd on Windows right now is via WSL.
**Notes for packagers**: Outside of the OCaml toolchain for building (if you are
packaging from source), Docfd also requires the following
external tools at run time for full functionality:
- `pdftotext` from `poppler-utils` for PDF support
- `pandoc` for support of `.epub`, `.odt`, `.docx`, `.fb2`, `.ipynb`, `.html`, and `.htm` files
- `wl-clibpard` for clipboard support on Wayland
- `xclip` for clipboard support on X11
## Basic usage
The typical usage of Docfd is to either `cd` into the directory of interest
and launch `docfd` directly, or specify the paths as arguments:
```
docfd [PATH]...
```
The list of paths can contain directories.
Each directory in the list is scanned recursively for
files with the following extensions by default:
- For multiline search mode:
- `.txt`,
`.md`,
`.pdf`,
`.epub`,
`.odt`,
`.docx`,
`.fb2`,
`.ipynb`,
`.html`,
`.htm`
- For single line search mode:
- `.log`,
`.csv`,
`.tsv`
You can change the file extensions to use via
`--exts` and `--single-line-exts`,
or add onto the list of extensions via
`--add-exts` and `--single-line-add-exts`.
If the list `PATH`s is empty,
then Docfd defaults to scanning the
current directory `.`
unless any of the following is used:
`--paths-from`, `--glob`, `--single-line-glob`.
## Documentation
See [GitHub Wiki](https://github.com/darrenldl/docfd/wiki) for
more examples/cookbook, and technical details.
## Changelog
[CHANGELOG](CHANGELOG.md)
## Limitations
- Docfd generally expects one intance per index DB
- You should pick a different cache directory (which houses
the index DB) via `--cache-dir`
if you need multiple instances
- There are safe guards to avoid corruptions even if you do run
multiple instances of Docfd, but note that the instances of Docfd
may exit unexpectedly
- That being said, running multiple instances of Docfd which are only reading
the index DB and not updating it should be fine
- File auto-reloading is not supported for PDF files,
as PDF viewers are invoked in the background via shell.
It is possible to support this properly
in the ways listed below, but requires
a lot of engineering for potentially very little gain:
- Docfd waits for PDF viewer to terminate fully
before resuming, but this
prohibits viewing multiple search results
simultaneously in different PDF viewer instances.
- Docfd manages the launched PDF viewers completely,
but these viewers are closed when Docfd terminates.
- Docfd invokes the PDF viewers via shell
so they stay open when Docfd terminates.
Docfd instead periodically checks if they are still running
via the PDF viewers' process IDs,
but this requires handling forks.
- Outside of tracking whether the PDF viewer instances
interacting with the files are still running,
Docfd also needs to set up file update handling
either via `inotify` or via checking
file modification times periodically.
## Acknowledgement
- Big thanks to [@lunacookies](https://github.com/lunacookies) and
[@jthvai](https://github.com/jthvai) for the many UI/UX discussions and
suggestions
- Demo gifs and some screenshots are made using [vhs](https://github.com/charmbracelet/vhs).
- [ripgrep-all](https://github.com/phiresky/ripgrep-all) was used as reference
for text extraction software choices
- [Marc Coquand](https://mccd.space) (author of
[Stitch](https://git.mccd.space/pub/stitch/)) for discussions and inspiration
of results narrowing functionality
- Part of the search syntax was copied from [fzf](https://github.com/junegunn/fzf)
- Command history editing workflow was inspired by Git interactive rebase workflow, e.g. `git rebase -i`
- [PDF corpora](https://github.com/pdf-association/pdf-corpora) from PDF association was used to stress test performance
================================================
FILE: bin/BLAKE2B.ml
================================================
module B = Digestif.Make_BLAKE2B (struct
let digest_size = 64
end)
let hash_of_file ~env ~path =
let fs = Eio.Stdenv.fs env in
let ctx = ref B.empty in
try
Eio.Path.(with_open_in (fs / path))
(fun flow ->
match
Eio.Buf_read.parse ~max_size:Params.hash_chunk_size
(fun buf ->
try
while true do
ctx := B.feed_string !ctx (Eio.Buf_read.take Params.hash_chunk_size buf)
done
with
| End_of_file ->
ctx := B.feed_string !ctx (Eio.Buf_read.take_all buf)
)
flow
with
| Ok () -> Ok (!ctx |> B.get |> B.to_hex)
| Error (`Msg msg) -> Error msg
)
with
| _ -> Error (Printf.sprintf "failed to hash file: %s" (Filename.quote path))
================================================
FILE: bin/UI.ml
================================================
open Docfd_lib
open Lwd_infix
module Vars = struct
let script_name_field = Lwd.var UI_base.empty_text_field
let script_name_field_focus_handle = Nottui.Focus.make ()
let path_fuzzy_rank_field = Lwd.var UI_base.empty_text_field
let path_fuzzy_rank_field_focus_handle = Nottui.Focus.make ()
let script_files : string Dynarray.t Lwd.var = Lwd.var (Dynarray.create ())
let usable_script_files : (string Dynarray.t * Int_set.t Dynarray.t option) Lwd.t =
let$* arr = Lwd.get script_files in
let$* input_mode = Lwd.get UI_base.Vars.input_mode in
let$ script_name_specified, _ = Lwd.get script_name_field in
match input_mode with
| Save_script -> (
(Dynarray.filter
(CCString.starts_with ~prefix:script_name_specified)
arr,
None)
)
| Scripts | Delete_script_confirm (_, _) -> (
match Search_exp.parse script_name_specified with
| None -> (
(Dynarray.filter
(CCString.starts_with ~prefix:script_name_specified)
arr,
None)
)
| Some exp -> (
if Search_exp.is_empty exp then (
(arr, None)
) else (
let ranking =
Misc_utils.fuzzy_rank_assoc
(Stop_signal.make ())
~get_key:Filename.chop_extension
exp
(Dynarray.to_seq arr)
in
Dynarray.to_seq ranking
|> Seq.map (fun (path, search_result) ->
(path, Misc_utils.highlights_of_search_result search_result)
)
|> Seq.split
|> (fun (s0, s1) ->
(Dynarray.of_seq s0, Some (Dynarray.of_seq s1)))
)
)
)
| _ -> (
(Dynarray.create (), None)
)
end
let refresh_script_files () =
Lwd.set UI_base.Vars.index_of_script_selected 0;
File_utils.list_files_recursive_filter_by_exts
~max_depth:1
~report_progress:(fun () -> ())
~exts:[ Params.docfd_script_ext ]
(Seq.return (Params.script_dir ()))
|> String_set.to_seq
|> Seq.map Filename.basename
|> Dynarray.of_seq
|> Lwd.set Vars.script_files
let reload_document (doc : Document.t) =
let pool = UI_base.task_pool () in
let path = Document.path doc in
let doc =
match
Document.of_path
~env:(UI_base.eio_env ())
pool
~already_in_transaction:false
(Document.search_mode doc)
path
with
| Ok doc -> Some doc
| Error _ -> (
None
)
in
let session_state =
Session_manager.lock_with_view (fun view ->
view.init_state
)
|> (fun state ->
match doc with
| Some doc -> (
Session.State.add_document pool doc state
)
| None -> (
Session.State.drop (`Path path) state
)
)
in
Session_manager.update_starting_state session_state
let reload_document_selected
~(search_result_groups : Session.search_result_group array)
: unit =
if Array.length search_result_groups > 0 then (
let index = Lwd.peek UI_base.Vars.index_of_document_selected in
let doc, _search_results = search_result_groups.(index) in
reload_document doc;
)
let toggle_mark ~path =
Session_manager.update_from_cur_snapshot
(fun cur_snapshot ->
let state = Session.Snapshot.state cur_snapshot in
let new_command =
if
String_set.mem
path
(Session.State.marked_document_paths state)
then (
`Unmark path
) else (
`Mark path
)
in
state
|> Session.run_command
(UI_base.task_pool ())
new_command
|> Option.get
|> (fun (new_command, state) ->
Session.Snapshot.make
~last_command:(Some new_command)
state)
)
let drop ~document_count (choice : [`Path of string | `All_except of string | `Marked | `Unmarked | `Listed | `Unlisted]) =
let new_command =
match choice with
| `Path path -> (
let n = Lwd.peek UI_base.Vars.index_of_document_selected in
UI_base.set_document_selected ~choice_count:(document_count - 1) n;
`Drop path
)
| `All_except path -> (
UI_base.set_document_selected ~choice_count:1 0;
`Drop_all_except path
)
| `Marked -> (
UI_base.reset_document_selected ();
`Drop_marked
)
| `Unmarked -> (
UI_base.reset_document_selected ();
`Drop_unmarked
)
| `Listed -> (
UI_base.reset_document_selected ();
`Drop_listed
)
| `Unlisted -> (
UI_base.reset_document_selected ();
`Drop_unlisted
)
in
Session_manager.update_from_cur_snapshot (fun cur_snapshot ->
Session.Snapshot.state cur_snapshot
|> Session.run_command
(UI_base.task_pool ())
new_command
|> Option.get
|> (fun (new_command, state) ->
Session.Snapshot.make
~last_command:(Some new_command)
state)
)
let mark (choice : [`Path of string | `Listed]) =
let new_command =
match choice with
| `Path path -> `Mark path
| `Listed -> `Mark_listed
in
Session_manager.update_from_cur_snapshot (fun cur_snapshot ->
Session.Snapshot.state cur_snapshot
|> Session.run_command
(UI_base.task_pool ())
new_command
|> Option.get
|> (fun (new_command, state) ->
Session.Snapshot.make
~last_command:(Some new_command)
state)
)
let unmark (choice : [`Path of string | `Listed | `All]) =
let new_command =
match choice with
| `Path path -> `Unmark path
| `Listed -> `Unmark_listed
| `All -> `Unmark_all
in
Session_manager.update_from_cur_snapshot (fun cur_snapshot ->
Session.Snapshot.state cur_snapshot
|> Session.run_command
(UI_base.task_pool ())
new_command
|> Option.get
|> (fun (new_command, state) ->
Session.Snapshot.make
~last_command:(Some new_command)
state)
)
let sort (sort_by : Command.Sort_by.t) =
UI_base.reset_document_selected ();
let new_command = `Sort (sort_by, Command.Sort_by.default_no_score) in
Session_manager.update_from_cur_snapshot (fun cur_snapshot ->
Session.Snapshot.state cur_snapshot
|> Session.run_command
(UI_base.task_pool ())
new_command
|> Option.get
|> (fun (new_command, state) ->
Session.Snapshot.make
~last_command:(Some new_command)
state)
)
let narrow_search_scope_to_level ~level =
Session_manager.update_from_cur_snapshot (fun cur_snapshot ->
Session.Snapshot.make
~last_command:(Some (`Narrow_level level))
(Session.State.narrow_search_scope_to_level
~level
(Session.Snapshot.state cur_snapshot))
)
let update_filter ~commit () =
let s = fst @@ Lwd.peek UI_base.Vars.filter_field in
Session_manager.submit_filter_req ~commit s
let update_search ~commit () =
let s = fst @@ Lwd.peek UI_base.Vars.search_field in
Session_manager.submit_search_req ~commit s
let update_path_fuzzy_rank ~commit () =
let s = fst @@ Lwd.peek Vars.path_fuzzy_rank_field in
Session_manager.submit_path_fuzzy_rank_req ~commit s
let compute_save_script_path base_name =
let dir = Params.script_dir () in
File_utils.mkdir_recursive dir;
Filename.concat
dir
(Fmt.str "%s%s" base_name Params.docfd_script_ext)
let save_script ~path =
Session_manager.stop_filter_and_search_and_restore_input_fields ();
let lines =
Session_manager.lock_with_view (fun view ->
view.snapshots
|> Dynarray.to_seq
|> Seq.filter_map (fun (snapshot : Session.Snapshot.t) ->
Option.map
Command.to_string
(Session.Snapshot.last_command snapshot)
)
|> List.of_seq
)
in
try
CCIO.with_out path (fun oc ->
CCIO.write_lines_l oc lines;
)
with
| Sys_error _ -> (
Misc_utils.exit_with_error_msg
(Fmt.str "failed to write script %s" path)
)
module Top_pane = struct
module Document_list = struct
let render_document_entry
~input_mode
~width
~documents_marked
~(search_result_group : Session.search_result_group)
~path_highlights
~selected
: Notty.image =
let open Notty in
let open Notty.Infix in
let (doc, search_results) = search_result_group in
let search_result_score_image =
if Option.is_some !Params.debug_output then (
if Array.length search_results = 0 then
I.empty
else (
let x = search_results.(0) in
I.strf "(Best search result score: %f)" (Search_result.score x)
)
) else (
I.empty
)
in
let sub_item_base_left_padding = I.string A.empty " " in
let sub_item_width = width - I.width sub_item_base_left_padding - 2 in
let preview_image =
let preview_left_padding_per_line =
I.string A.(bg lightgreen) " "
<|>
I.string A.empty " "
in
let preview_line_images =
let line_count =
min Params.preview_line_count (Index.global_line_count ~doc_id:(Document.doc_id doc))
in
OSeq.(0 --^ line_count)
|> Seq.map (fun global_line_num ->
Index.words_of_global_line_num ~doc_id:(Document.doc_id doc) global_line_num
|> Dynarray.to_list
|> Content_and_search_result_rendering.Text_block_rendering.of_words ~width:sub_item_width
)
|> Seq.map (fun img ->
let left_padding =
OSeq.(0 --^ I.height img)
|> Seq.map (fun _ -> preview_left_padding_per_line)
|> List.of_seq
|> I.vcat
in
left_padding <|> img
)
|> List.of_seq
in
I.vcat preview_line_images
in
let path_highlights =
match input_mode with
| UI_base.Path_fuzzy_rank -> (
String_map.find_opt (Document.path doc) path_highlights
)
| _ -> (
None
)
in
let path_image =
Document.path doc
|> File_utils.remove_cwd_from_path
|> Tokenization.tokenize ~drop_spaces:false
|> List.of_seq
|> Content_and_search_result_rendering.Text_block_rendering.of_words
~width:sub_item_width
?highlights:path_highlights
in
let path_image_with_prefix =
(I.string A.(fg lightgreen) "@ ")
<|>
path_image
in
let path_date_image =
(match Document.path_date doc with
| None -> I.void 0 0
| Some date -> (
I.string A.(fg lightgreen) " ⤷ "
<|>
I.string A.empty
(Timedesc.Date.to_rfc3339 date)
)
)
in
let last_modified_image =
I.string A.(fg lightgreen) "Last modified: "
<|>
I.string A.empty
(Timedesc.to_string ~format:Params.last_modified_format_string (Document.mod_time doc))
in
let marked = String_set.mem (Document.path doc) documents_marked in
let title =
let attr =
if selected then (
A.(fg lightblue ++ st bold)
) else (
A.(fg lightblue)
)
in
match Document.title doc with
| None ->
I.void 0 1
| Some title -> (
title
|> Tokenization.tokenize ~drop_spaces:false
|> List.of_seq
|> Content_and_search_result_rendering.Text_block_rendering.of_words ~attr ~width
)
in
match input_mode with
| UI_base.Path_fuzzy_rank -> (
path_image
)
| _ -> (
(
(if marked then I.strf "> " else I.void 0 1)
<|>
title
)
<->
(
sub_item_base_left_padding
<|>
I.vcat
[ search_result_score_image;
path_image_with_prefix;
path_date_image;
preview_image;
last_modified_image;
]
)
)
let main
~width
~height
~documents_marked
~(search_result_groups : Session.search_result_group array)
~(document_selected : int)
: Nottui.ui Lwd.t =
let document_count = Array.length search_result_groups in
let$* input_mode = Lwd.get UI_base.Vars.input_mode in
let$* (_cur_ver, snapshot) = Session_manager.cur_snapshot in
let state = Session.Snapshot.state snapshot in
let render_pane () =
let rec aux index height_filled acc =
if index < document_count
&& height_filled < height
then (
let selected = Int.equal document_selected index in
let img =
render_document_entry
~input_mode
~width
~documents_marked
~search_result_group:search_result_groups.(index)
~path_highlights:(Session.State.path_highlights state)
~selected
in
aux (index + 1) (height_filled + Notty.I.height img) (img :: acc)
) else (
List.rev acc
|> List.map Nottui.Ui.atom
|> Nottui.Ui.vcat
)
in
if document_count = 0 then (
Nottui.Ui.empty
) else (
aux document_selected 0 []
)
in
let$ background = UI_base.full_term_sized_background in
Nottui.Ui.join_z background (render_pane ())
|> Nottui.Ui.mouse_area
(UI_base.mouse_handler
~f:(fun direction ->
let offset =
match direction with
| `Up -> -1
| `Down -> 1
in
let document_current_choice =
Lwd.peek UI_base.Vars.index_of_document_selected
in
UI_base.set_document_selected
~choice_count:document_count
(document_current_choice + offset);
)
)
end
module Right_pane = struct
module Search_result_list = struct
let main
~height
~width
~(search_result_group : Session.search_result_group)
~(index_of_search_result_selected : int Lwd.var)
: Nottui.ui Lwd.t =
let (document, search_results) = search_result_group in
let search_result_selected = Lwd.peek index_of_search_result_selected in
let result_count = Array.length search_results in
if result_count = 0 then (
Lwd.return Nottui.Ui.empty
) else (
let images =
Misc_utils.array_sub_seq
~start:search_result_selected
~end_exc:(min result_count (search_result_selected + height))
search_results
|> Seq.map (Content_and_search_result_rendering.search_result
~doc_id:(Document.doc_id document)
~render_mode:(UI_base.render_mode_of_document document)
~width
)
|> List.of_seq
in
let pane =
images
|> List.map (fun img ->
Nottui.Ui.atom Notty.I.(img <-> strf "")
)
|> Nottui.Ui.vcat
in
let$ background = UI_base.full_term_sized_background in
Nottui.Ui.join_z background pane
|> Nottui.Ui.mouse_area
(UI_base.mouse_handler
~f:(fun direction ->
let n = Lwd.peek index_of_search_result_selected in
let offset =
match direction with
| `Up -> -1
| `Down -> 1
in
UI_base.set_search_result_selected
~choice_count:result_count
(n + offset)
)
)
)
end
module Link_list = struct
let main
~height
~width
~(search_result_group : Session.search_result_group)
~(index_of_link_selected : int Lwd.var)
: Nottui.ui Lwd.t =
let (document, _search_results) = search_result_group in
let links = Document.links document in
let link_selected = Lwd.peek index_of_link_selected in
let link_count = Array.length links in
if link_count = 0 then (
Lwd.return Nottui.Ui.empty
) else (
let start = max 0 (link_selected - height / 2) in
let end_exc = min link_count (start + height) in
let render ~width s =
s
|> Tokenization.tokenize ~drop_spaces:false
|> List.of_seq
|> Content_and_search_result_rendering.Text_block_rendering.of_words
~width
in
let pane =
Misc_utils.array_sub_seq ~start ~end_exc
links
|> Seq.map (fun link -> link.Link.link)
|> List.of_seq
|> Content_and_search_result_rendering.centered_list
~height
~width
~render
(link_selected - start)
|> Nottui.Ui.atom
in
let$ background = UI_base.full_term_sized_background in
Nottui.Ui.join_z background pane
|> Nottui.Ui.mouse_area
(UI_base.mouse_handler
~f:(fun direction ->
let n = Lwd.peek index_of_link_selected in
let offset =
match direction with
| `Up -> -1
| `Down -> 1
in
UI_base.set_link_selected
~choice_count:link_count
(n + offset)
)
)
)
end
let main
~width
~height
~(search_result_groups : Session.search_result_group array)
~(document_selected : int)
~show_bottom_right_pane
: Nottui.ui Lwd.t =
if Array.length search_result_groups = 0 then (
let blank ~height =
let _ = height in
Nottui_widgets.empty_lwd
in
UI_base.vpane ~width ~height
blank blank
) else (
let$* input_mode = Lwd.get UI_base.Vars.input_mode in
let$* search_result_selected = Lwd.get UI_base.Vars.index_of_search_result_selected in
let$* link_selected = Lwd.get UI_base.Vars.index_of_link_selected in
let search_result_group = search_result_groups.(document_selected) in
if show_bottom_right_pane then (
UI_base.vpane ~width ~height
(UI_base.Content_view.main
~input_mode
~width
~search_result_group
~search_result_selected
~link_selected)
(match input_mode with
| Links -> (
Link_list.main
~width
~search_result_group
~index_of_link_selected:UI_base.Vars.index_of_link_selected
)
| _ -> (
Search_result_list.main
~width
~search_result_group
~index_of_search_result_selected:UI_base.Vars.index_of_search_result_selected
))
) else (
UI_base.Content_view.main
~input_mode
~width
~height
~search_result_group
~search_result_selected
~link_selected
)
)
end
let item_list
~width
~height
~selected
?highlights
items
: Nottui.ui Lwd.t =
let arr = Dynarray.create () in
Seq.iter (fun i ->
Dynarray.add_last arr (Dynarray.get items i)
) OSeq.(selected --^ Dynarray.length items);
Dynarray.to_seq arr
|> Seq.mapi (fun i s ->
let highlights =
Option.map (fun highlights ->
Dynarray.get highlights (selected + i)
)
highlights
in
s
|> Tokenization.tokenize ~drop_spaces:false
|> List.of_seq
|> Content_and_search_result_rendering.Text_block_rendering.of_words
~width
?highlights
|> Nottui.Ui.atom
)
|> List.of_seq
|> Nottui.Ui.vcat
|> Nottui.Ui.resize ~w:width ~h:height
|> Lwd.return
let main
~width
~height
~documents_marked
~screen_split
~show_bottom_right_pane
~(search_result_groups : Session.search_result_group array)
: Nottui.ui Lwd.t =
let$* input_mode = Lwd.get UI_base.Vars.input_mode in
let$* script_selected = Lwd.get UI_base.Vars.index_of_script_selected in
let$* usable_scripts, usable_script_highlights =
Vars.usable_script_files
in
let file =
if script_selected < Dynarray.length usable_scripts then (
Some (Dynarray.get usable_scripts script_selected)
) else (
None
)
in
let$* selected = Lwd.get UI_base.Vars.index_of_script_selected in
match input_mode with
| Save_script -> (
item_list
~width
~height
~selected
usable_scripts
)
| Scripts | Delete_script_confirm (_, _) -> (
let lines =
try
match file with
| None -> []
| Some file -> (
let dir = Params.script_dir () in
CCIO.with_in (Filename.concat dir file) (fun ic ->
CCIO.read_lines_seq ic
|> OSeq.take height
|> List.of_seq
)
)
with
| Sys_error _ -> []
in
UI_base.hpane ~l_ratio:0.5 ~width ~height
(item_list
~height
~selected
?highlights:usable_script_highlights
usable_scripts)
(fun ~width ->
List.map (fun s -> Nottui.Ui.atom (Notty.I.strf "%s" s)) lines
|> Nottui.Ui.vcat
|> Nottui.Ui.resize ~w:width ~h:height
|> Lwd.return
)
)
| _ -> (
let$* document_selected = Lwd.get UI_base.Vars.index_of_document_selected in
let l_ratio =
match screen_split with
| `Even -> 0.50
| `Focus_left -> 1.0
| `Wide_left -> 0.618
| `Focus_right -> 0.0
| `Wide_right -> 1.0 -. 0.618
in
UI_base.hpane ~l_ratio ~width ~height
(Document_list.main
~height
~documents_marked
~search_result_groups
~document_selected)
(Right_pane.main
~height
~search_result_groups
~document_selected
~show_bottom_right_pane)
)
end
module Bottom_pane = struct
let status_bar
~width
~(search_result_groups : Session.search_result_group array)
~(input_mode : UI_base.input_mode)
: Nottui.Ui.t Lwd.t =
let open Notty.Infix in
let input_mode_image =
UI_base.Input_mode_map.find input_mode UI_base.Status_bar.input_mode_images
in
let attr = UI_base.Status_bar.attr in
let$* usable_scripts, _usable_script_highlights = Vars.usable_script_files in
let$* script_selected = Lwd.get UI_base.Vars.index_of_script_selected in
let usable_script_count = Dynarray.length usable_scripts in
let selected_script_file_and_path =
if usable_script_count > 0 then (
let dir = Params.script_dir () in
let file = Dynarray.get usable_scripts script_selected in
Some (file, Filename.concat dir file)
) else (
None
)
in
match input_mode with
| Save_script | Scripts -> (
let text_field = Vars.script_name_field in
let prompt =
match input_mode with
| Save_script -> "Save as"
| Scripts -> "Filter"
| _ -> failwith "unexpected case"
in
let on_tab =
match input_mode with
| Save_script -> (
Some (fun (text, _) ->
let best_fit =
let usable_script_count = Dynarray.length usable_scripts in
if usable_script_count = 0 then (
text
) else if usable_script_count = 1 then (
Filename.chop_extension (Dynarray.get usable_scripts 0)
) else (
usable_scripts
|> Dynarray.to_seq
|> String_utils.longest_common_prefix
)
in
Lwd.set text_field (best_fit, String.length best_fit)
)
)
| _ -> None
in
let on_submit =
match input_mode with
| Save_script -> (
(fun (text, _x) ->
Lwd.set text_field UI_base.empty_text_field;
Nottui.Focus.release Vars.script_name_field_focus_handle;
Lwd.set UI_base.Vars.input_mode
(if String.length text = 0 then
Save_script_no_name
else
Save_script_overwrite text
);
)
)
| Scripts -> (
(fun (_text, _x) ->
Option.iter (fun (_file, path) ->
Lwd.set text_field UI_base.empty_text_field;
Nottui.Focus.release Vars.script_name_field_focus_handle;
Lwd.set UI_base.Vars.quit true;
UI_base.Vars.action := Some (Open_script path);
Lwd.set UI_base.Vars.input_mode Navigate;
) selected_script_file_and_path;
)
)
| _ -> failwith "unexpected case"
in
let on_up_down =
match input_mode with
| Save_script -> None
| Scripts -> (
Some (fun up_down _ ->
UI_base.set_script_selected
~choice_count:usable_script_count
(script_selected +
(match up_down with
| `Up -> (-1)
| `Down -> 1)
))
)
| _ -> failwith "unexpected case"
in
let on_ctrl_prefixed =
match input_mode with
| Scripts -> (
Some (fun key (_text, _x) ->
match key with
| (`ASCII 'X', [`Ctrl]) -> (
Option.iter (fun (file, path) ->
Lwd.set UI_base.Vars.input_mode (Delete_script_confirm (file, path))
) selected_script_file_and_path;
`Handled
)
| _ -> (
`Unhandled
)
)
)
| _ -> None
in
let$* content =
Nottui_widgets.hbox
[
Lwd.return
(Nottui.Ui.atom
(Notty.I.hcat
[
input_mode_image;
UI_base.Status_bar.element_spacer;
Notty.I.strf ~attr "%s: [ " prompt;
]));
UI_base.edit_field text_field
~focus:Vars.script_name_field_focus_handle
~on_cancel:(fun (_, _) ->
Lwd.set UI_base.Vars.input_mode Navigate
)
~on_change:(fun (text, x) ->
Lwd.set text_field (text, x);
)
~on_submit
?on_tab
?on_up_down
?on_ctrl_prefixed;
Lwd.return (Nottui.Ui.atom (Notty.I.strf ~attr " ] + %s" Params.docfd_script_ext));
]
in
let$ bar = UI_base.Status_bar.background_bar in
Nottui.Ui.join_z bar content
)
| Save_script_overwrite script_name -> (
let path = compute_save_script_path script_name in
if Sys.file_exists path then (
let$* content =
Lwd.return
(Nottui.Ui.atom
(Notty.I.hcat
[
input_mode_image;
UI_base.Status_bar.element_spacer;
Notty.I.strf ~attr "%s already exists, overwrite?"
(Filename.basename path);
]))
in
let$ bar = UI_base.Status_bar.background_bar in
Nottui.Ui.join_z bar content
) else (
save_script ~path;
Lwd.set UI_base.Vars.input_mode (Save_script_edit script_name);
UI_base.Status_bar.background_bar
)
)
| Save_script_no_name -> (
let$* content =
Lwd.return
(Nottui.Ui.atom
(Notty.I.hcat
[
input_mode_image;
UI_base.Status_bar.element_spacer;
Notty.I.strf ~attr "No name entered, saving skipped";
]))
in
let$ bar = UI_base.Status_bar.background_bar in
Nottui.Ui.join_z bar content
)
| Save_script_edit script_name -> (
let path = compute_save_script_path script_name in
let$* content =
Lwd.return
(Nottui.Ui.atom
(Notty.I.hcat
[
input_mode_image;
UI_base.Status_bar.element_spacer;
Notty.I.strf ~attr "Do you want to edit %s to add comments etc?" (Filename.basename path);
]))
in
let$ bar = UI_base.Status_bar.background_bar in
Nottui.Ui.join_z bar content
)
| Delete_script_confirm (script, _) -> (
let$* content =
Lwd.return
(Nottui.Ui.atom
(Notty.I.hcat
[
input_mode_image;
UI_base.Status_bar.element_spacer;
Notty.I.strf ~attr "Confirm deletion of %s?" script;
]))
in
let$ bar = UI_base.Status_bar.background_bar in
Nottui.Ui.join_z bar content
)
| Path_fuzzy_rank -> (
let text_field = Vars.path_fuzzy_rank_field in
let document_count = Array.length search_result_groups in
let$* document_current_choice = Lwd.get UI_base.Vars.index_of_document_selected in
let$* content =
Nottui_widgets.hbox
[
Lwd.return
(Nottui.Ui.atom
(Notty.I.hcat
[
input_mode_image;
UI_base.Status_bar.element_spacer;
]));
UI_base.edit_field text_field
~focus:Vars.path_fuzzy_rank_field_focus_handle
~on_cancel:(fun (_, _) ->
Lwd.set UI_base.Vars.input_mode Navigate
)
~on_change:(fun (text, x) ->
Lwd.set text_field (text, x);
update_path_fuzzy_rank ~commit:false ();
)
~on_submit:(fun (text, x) ->
Lwd.set text_field (text, x);
update_path_fuzzy_rank ~commit:true ();
Lwd.set UI_base.Vars.input_mode Navigate
)
~on_up_down:(fun ud _ ->
let offset =
match ud with
| `Up -> -1
| `Down -> 1
in
UI_base.set_document_selected
~choice_count:document_count
(document_current_choice + offset);
)
;
]
in
let$ bar = UI_base.Status_bar.background_bar in
Nottui.Ui.join_z bar content
)
| _ -> (
let$* index_of_document_selected = Lwd.get UI_base.Vars.index_of_document_selected in
let document_count = Array.length search_result_groups in
let$* (_cur_ver, snapshot) = Session_manager.cur_snapshot in
let content =
let file_shown_count =
Notty.I.strf ~attr
"%5d/%d documents listed"
document_count
(Session.Snapshot.state snapshot
|> Session.State.size)
in
let hint =
Notty.I.strf ~attr "< and > to see more key binding info, ? to toggle hide"
in
let hint_len = Notty.I.width hint in
let hint_overlay =
Notty.I.void
(width - hint_len) 1
<|>
hint
in
let core =
if document_count = 0 then (
[
UI_base.Status_bar.element_spacer;
file_shown_count;
]
) else (
let index_of_selected =
Notty.I.strf ~attr
"Index of document selected: %d"
index_of_document_selected
in
[
file_shown_count;
UI_base.Status_bar.element_spacer;
index_of_selected;
]
)
in
Notty.I.zcat
[
Notty.I.hcat
(input_mode_image
::
UI_base.Status_bar.element_spacer
::
core);
hint_overlay;
]
|> Nottui.Ui.atom
in
let$ bar = UI_base.Status_bar.background_bar in
Nottui.Ui.join_z bar content
)
module Key_binding_info = struct
let grid_contents : UI_base.Key_binding_info.grid_contents =
let open UI_base.Key_binding_info in
let empty_row =
[
{ label = ""; msg = "" };
]
in
let navigate_grid =
[
[
{ label = "Enter"; msg = "open document" };
{ label = "/"; msg = "SEARCH" };
{ label = "↑/↓/j/k"; msg = "select document" };
{ label = "s"; msg = "SORT-ASC" };
{ label = "Tab"; msg = "expand right pane" };
{ label = "y"; msg = "COPY" };
{ label = "n"; msg = "NARROW" };
{ label = "Space"; msg = "toggle mark" };
{ label = "h"; msg = "command history" };
{ label = "Ctrl+S"; msg = "save session as script" };
];
[
{ label = "v"; msg = "focus content" };
{ label = "f"; msg = "FILTER" };
{ label = "Shift+↑/↓/j/k"; msg = "select search result" };
{ label = "Shift+S"; msg = "SORT-DESC" };
{ label = "Shift+Tab"; msg = "expand left pane" };
{ label = "Shift+Y"; msg = "COPY-PATHS" };
{ label = "d"; msg = "DROP" };
{ label = "m"; msg = "MARK" };
{ label = ""; msg = "" };
{ label = "Ctrl+O"; msg = "SCRIPTS" };
];
[
{ label = "Ctrl+C"; msg = "exit" };
{ label = "x"; msg = "CLEAR" };
{ label = "-/="; msg = "scroll content view" };
{ label = "l"; msg = "LINKS" };
{ label = ""; msg = "" };
{ label = ""; msg = "" };
{ label = "r"; msg = "RELOAD" };
{ label = "Shift+M"; msg = "UNMARK" };
{ label = ""; msg = "" };
{ label = ""; msg = "" };
];
]
in
let search_grid =
[
[
{ label = "Enter"; msg = "exit SEARCH" };
];
]
in
let filter_grid =
[
[
{ label = "Enter"; msg = "exit FILTER" };
{ label = "Tab"; msg = "autocomplete" };
];
]
in
let save_script_grid =
[
[
{ label = "Enter"; msg = "confirm answer" };
{ label = "Tab"; msg = "autocomplete" };
{ label = "Esc"; msg = "cancel" };
];
empty_row;
empty_row;
]
in
let save_script_confirm_grid =
[
[
{ label = "y"; msg = "confirm overwrite" };
{ label = "Esc/n"; msg = "cancel" };
];
empty_row;
empty_row;
]
in
let save_script_cancel_grid =
[
[
{ label = "Enter"; msg = "confirm" };
];
empty_row;
empty_row;
]
in
let save_script_edit_grid =
[
[
{ label = "y"; msg = "open in editor" };
{ label = "Esc/n"; msg = "skip" };
];
empty_row;
empty_row;
]
in
let scripts_grid =
[
[
{ label = "Enter"; msg = "open" };
{ label = "↑/↓"; msg = "select" };
{ label = "Ctrl+X"; msg = "delete" };
{ label = "Esc"; msg = "cancel" };
];
empty_row;
empty_row;
]
in
let delete_script_confirm_grid =
[
[
{ label = "y"; msg = "confirm deletion" };
{ label = "Esc/n"; msg = "cancel" };
];
empty_row;
empty_row;
]
in
let clear_grid =
[
[
{ label = "/"; msg = "search field" };
{ label = "f"; msg = "filter field" };
{ label = "h"; msg = "command history" };
];
[
{ label = "Esc"; msg = "cancel" };
];
empty_row;
]
in
let sort_asc_grid =
[
[
{ label = "s"; msg = "score" };
{ label = "p"; msg = "path" };
{ label = "d"; msg = "path date" };
{ label = "m"; msg = "mod time" };
];
[
{ label = "Esc"; msg = "cancel" };
];
empty_row;
]
in
let sort_desc_grid =
[
[
{ label = "s"; msg = "score" };
{ label = "p"; msg = "path" };
{ label = "d"; msg = "path date" };
{ label = "m"; msg = "mod time" };
];
[
{ label = "Esc"; msg = "cancel" };
];
empty_row;
]
in
let drop_grid =
[
[
{ label = "d"; msg = "selected" };
{ label = "l"; msg = "listed" };
{ label = "m"; msg = "marked" };
];
[
{ label = "Shift+D"; msg = "unselected" };
{ label = "Shift+L"; msg = "unlisted" };
{ label = "Shift+M"; msg = "unmarked" };
];
[
{ label = "Esc"; msg = "cancel" };
];
]
in
let mark_grid =
[
[
{ label = "l"; msg = "listed" };
];
empty_row;
[
{ label = "Esc"; msg = "cancel" };
];
]
in
let unmark_grid =
[
[
{ label = "l"; msg = "listed" };
{ label = "a"; msg = "all" };
];
empty_row;
[
{ label = "Esc"; msg = "cancel" };
];
]
in
let copy_grid =
[
[
{ label = "y"; msg = "selected search result" };
{ label = "m"; msg = "results of marked documents" };
{ label = "l"; msg = "results of listed documents" };
];
[
{ label = "a"; msg = "results of selected document" };
];
[
{ label = "Esc"; msg = "cancel" };
];
]
in
let copy_paths_grid =
[
[
{ label = "y"; msg = "path of selected document" };
{ label = "m"; msg = "paths of marked documents" };
{ label = "l"; msg = "paths of listed documents" };
];
[
{ label = ""; msg = "" };
{ label = "Shift+M"; msg = "paths of unmarked documents" };
{ label = "Shift+L"; msg = "paths of unlisted documents" };
];
[
{ label = "Esc"; msg = "cancel" };
];
]
in
let narrow_grid =
[
[
{ label = "0-9"; msg = "narrow search scope to level N" };
];
[
{ label = "Esc"; msg = "cancel" };
];
empty_row;
]
in
let reload_grid =
[
[
{ label = "r"; msg = "selected" };
{ label = "a"; msg = "all" };
];
[
{ label = "Esc"; msg = "cancel" };
];
empty_row;
]
in
let links_grid =
[
[
{ label = "Enter"; msg = "open" };
{ label = "o"; msg = "open and remain in LINKS" };
{ label = "↑/↓/j/k"; msg = "select" };
{ label = "y"; msg = "copy" };
];
[
{ label = "Esc"; msg = "exit" };
];
empty_row;
]
in
let path_fuzzy_rank_grid =
[
[
{ label = "Enter"; msg = "pin to top" };
{ label = "↑/↓"; msg = "select" };
];
[
{ label = "Esc"; msg = "exit" };
];
empty_row;
]
in
[
(Navigate, navigate_grid);
(Search, search_grid);
(Filter, filter_grid);
(Clear, clear_grid);
(Sort `Asc, sort_asc_grid);
(Sort `Desc, sort_desc_grid);
(Drop, drop_grid);
(Mark, mark_grid);
(Unmark, unmark_grid);
(Narrow, narrow_grid);
(Copy, copy_grid);
(Copy_paths, copy_paths_grid);
(Reload, reload_grid);
(Save_script, save_script_grid);
(Save_script_overwrite "", save_script_confirm_grid);
(Save_script_no_name, save_script_cancel_grid);
(Save_script_edit "", save_script_edit_grid);
(Scripts, scripts_grid);
(Delete_script_confirm ("", ""), delete_script_confirm_grid);
(Links, links_grid);
(Path_fuzzy_rank, path_fuzzy_rank_grid);
]
let grid_lookup = UI_base.Key_binding_info.make_grid_lookup grid_contents
let main ~input_mode ~show_key_binding_info_pane =
if show_key_binding_info_pane then (
UI_base.Key_binding_info.main ~grid_lookup ~input_mode
) else (
Lwd.return (Nottui.Ui.atom (Notty.I.void 0 0))
)
end
let autocomplete_grid ~input_mode ~width =
match input_mode with
| UI_base.Filter -> (
let$* l = Lwd.get UI_base.Vars.autocomplete_choices in
let max_len =
List.fold_left (fun n x ->
max n (String.length x)
) 0 l
in
let cell_len = max_len + 4 in
let cells_per_row = width / cell_len in
l
|> CCList.chunks cells_per_row
|> (fun rows ->
let row_count = List.length rows in
let padding =
if row_count < 2 then (
CCList.(0 --^ (2 - row_count))
|> List.map (fun _ -> [ "" ])
) else (
[]
)
in
rows @ padding
)
|> List.map (fun row ->
List.map (fun s ->
let full_background = Notty.I.void cell_len 1 in
Notty.I.(strf "%s" s > full_background)
|> Nottui.Ui.atom
|> Lwd.return
) row
)
|> Nottui_widgets.grid
~pad:(Nottui.Gravity.make ~h:`Negative ~v:`Negative)
)
| _ -> Lwd.return (Nottui.Ui.atom (Notty.I.void 0 0))
let filter_bar =
UI_base.Filter_bar.main
~text_field:UI_base.Vars.filter_field
~focus_handle:UI_base.Vars.filter_field_focus_handle
~on_change:(update_filter ~commit:false)
~on_submit:(update_filter ~commit:true)
let search_bar ~input_mode =
UI_base.Search_bar.main ~input_mode
~text_field:UI_base.Vars.search_field
~focus_handle:UI_base.Vars.search_field_focus_handle
~on_change:(update_search ~commit:false)
~on_submit:(update_search ~commit:true)
let main ~width ~show_key_binding_info_pane ~search_result_groups =
let$* input_mode = Lwd.get UI_base.Vars.input_mode in
Nottui_widgets.vbox
[
status_bar ~width ~search_result_groups ~input_mode;
Key_binding_info.main ~input_mode ~show_key_binding_info_pane;
autocomplete_grid ~input_mode ~width;
filter_bar ~input_mode;
search_bar ~input_mode;
]
end
let keyboard_handler
~(session_state : Session.State.t)
~(search_result_groups : Session.search_result_group array)
(key : Nottui.Ui.key)
=
let document_count =
Array.length search_result_groups
in
let document_current_choice =
Lwd.peek UI_base.Vars.index_of_document_selected
in
let search_result_group =
if document_count = 0 then
None
else
Some search_result_groups.(document_current_choice)
in
let search_result_choice_count =
match search_result_group with
| None -> 0
| Some (_doc, search_results) -> Array.length search_results
in
let link_choice_count =
match search_result_group with
| None -> 0
| Some (doc, _search_results) -> Array.length (Document.links doc)
in
let search_result_current_choice =
Lwd.peek UI_base.Vars.index_of_search_result_selected
in
let link_current_choice =
Lwd.peek UI_base.Vars.index_of_link_selected
in
match Lwd.peek UI_base.Vars.input_mode with
| Navigate -> (
match key with
| (`ASCII 'C', [`Ctrl])
| (`ASCII 'Q', [`Ctrl]) -> (
Lwd.set UI_base.Vars.quit true;
UI_base.Vars.action := None;
`Handled
)
| (`ASCII '<', []) -> (
UI_base.Key_binding_info.decr_rotation ();
`Handled
)
| (`ASCII '>', []) -> (
UI_base.Key_binding_info.incr_rotation ();
`Handled
)
| (`ASCII ' ', []) -> (
let index = Lwd.peek UI_base.Vars.index_of_document_selected in
if index < Array.length search_result_groups then (
let doc, _ = search_result_groups.(index) in
toggle_mark ~path:(Document.path doc)
);
`Handled
)
| (`ASCII 'm', []) -> (
UI_base.set_input_mode Mark;
`Handled
)
| (`ASCII 'M', []) -> (
UI_base.set_input_mode Unmark;
`Handled
)
| (`ASCII 'd', []) -> (
UI_base.set_input_mode Drop;
`Handled
)
| (`ASCII 'n', []) -> (
UI_base.set_input_mode Narrow;
`Handled
)
| (`ASCII 'r', []) -> (
UI_base.set_input_mode Reload;
`Handled
)
| (`ASCII 'y', []) -> (
UI_base.set_input_mode Copy;
`Handled
)
| (`ASCII 'Y', []) -> (
UI_base.set_input_mode Copy_paths;
`Handled
)
| (`Arrow `Left, [])
| (`ASCII 'u', [])
| (`ASCII 'Z', [`Ctrl]) -> (
Session_manager.shift_ver ~offset:(-1);
`Handled
)
| (`Arrow `Right, [])
| (`ASCII 'R', [`Ctrl])
| (`ASCII 'Y', [`Ctrl]) -> (
Session_manager.shift_ver ~offset:1;
`Handled
)
| (`Tab, [])
| (`Tab, [`Shift]) -> (
let direction =
match key with
| (_, [`Shift]) -> `Expand_left
| (_, _) -> `Expand_right
in
Session_manager.update_from_cur_snapshot
(fun cur_snapshot ->
let state = Session.Snapshot.state cur_snapshot in
let cur = Session.State.screen_split state in
let offset =
match direction with
| `Expand_left -> 1
| `Expand_right -> -1
in
let next =
Command.screen_split_of_int
(Command.int_of_screen_split cur + offset)
in
let command = `Split_screen next in
state
|> Session.run_command
(UI_base.task_pool ())
command
|> Option.get
|> (fun (command, state) ->
Session.Snapshot.make
~last_command:(Some command)
state)
);
`Handled
)
| (`ASCII '?', [])
| (`ASCII 'v', []) -> (
let pane =
match key with
| (`ASCII '?', []) -> `Key_binding_info
| (`ASCII 'v', []) -> `Bottom_right
| _ -> failwith "unexpected case"
in
Session_manager.update_from_cur_snapshot
(fun cur_snapshot ->
let state = Session.Snapshot.state cur_snapshot in
let cur = Session.State.show_pane state pane in
let command =
if cur then (
`Hide_pane pane
) else (
`Show_pane pane
) in
state
|> Session.run_command
(UI_base.task_pool ())
command
|> Option.get
|> (fun (command, state) ->
Session.Snapshot.make
~last_command:(Some command)
state)
);
`Handled
)
| (`ASCII '=', []) -> (
UI_base.incr_content_view_offset ();
`Handled
)
| (`ASCII '-', []) -> (
UI_base.decr_content_view_offset ();
`Handled
)
| (`Page `Down, [`Shift])
| (`ASCII 'J', [])
| (`Arrow `Down, [`Shift]) -> (
UI_base.set_search_result_selected
~choice_count:search_result_choice_count
(search_result_current_choice+1);
`Handled
)
| (`Page `Up, [`Shift])
| (`ASCII 'K', [])
| (`Arrow `Up, [`Shift]) -> (
UI_base.set_search_result_selected
~choice_count:search_result_choice_count
(search_result_current_choice-1);
`Handled
)
| (`Page `Down, [])
| (`ASCII 'j', [])
| (`Arrow `Down, []) -> (
if document_count = 1 then (
UI_base.set_search_result_selected
~choice_count:search_result_choice_count
(search_result_current_choice+1);
`Handled
) else (
UI_base.set_document_selected
~choice_count:document_count
(document_current_choice+1);
`Handled
)
)
| (`Page `Up, [])
| (`ASCII 'k', [])
| (`Arrow `Up, []) -> (
if document_count = 1 then (
UI_base.set_search_result_selected
~choice_count:search_result_choice_count
(search_result_current_choice-1);
`Handled
) else (
UI_base.set_document_selected
~choice_count:document_count
(document_current_choice-1);
`Handled
)
)
| (`ASCII 'g', []) -> (
UI_base.set_document_selected
~choice_count:document_count
0;
`Handled
)
| (`ASCII 'G', []) -> (
UI_base.set_document_selected
~choice_count:document_count
(document_count - 1);
`Handled
)
| (`ASCII 'f', []) -> (
Nottui.Focus.request UI_base.Vars.filter_field_focus_handle;
UI_base.set_input_mode Filter;
`Handled
)
| (`ASCII 'F', []) -> (
Nottui.Focus.request Vars.path_fuzzy_rank_field_focus_handle;
Lwd.set Vars.path_fuzzy_rank_field UI_base.empty_text_field;
UI_base.set_input_mode Path_fuzzy_rank;
`Handled
)
| (`ASCII '/', []) -> (
Nottui.Focus.request UI_base.Vars.search_field_focus_handle;
UI_base.set_input_mode Search;
`Handled
)
| (`ASCII 'h', []) -> (
Lwd.set UI_base.Vars.quit true;
UI_base.Vars.action := Some UI_base.Edit_command_history;
`Handled
)
| (`ASCII 'S', [`Ctrl]) -> (
UI_base.set_input_mode Save_script;
refresh_script_files ();
Nottui.Focus.request Vars.script_name_field_focus_handle;
`Handled
)
| (`ASCII 'O', [`Ctrl]) -> (
UI_base.set_input_mode Scripts;
refresh_script_files ();
Nottui.Focus.request Vars.script_name_field_focus_handle;
`Handled
)
| (`ASCII 'x', []) -> (
UI_base.set_input_mode Clear;
`Handled
)
| (`ASCII 'l', []) -> (
UI_base.set_input_mode Links;
if search_result_choice_count > 0 then (
let (doc, search_results) = Option.get search_result_group in
let search_result = search_results.(search_result_current_choice) in
let links = Document.links doc in
let avg_pos =
List.fold_left (fun min_max_pos search_result ->
let { Search_result.found_word_pos; _ } = search_result in
match min_max_pos with
| None -> Some (found_word_pos, found_word_pos)
| Some (min_pos, max_pos) -> (
Some (min found_word_pos min_pos,
max found_word_pos max_pos)
)
)
None
(Search_result.found_phrase search_result)
|> (fun x ->
let (x, y) = Option.get x in
(x + y) / 2)
in
let before, exact, after = Int_map.split avg_pos (Document.link_index_of_start_pos doc) in
let index =
match exact with
| Some index -> Some index
| None -> (
match
Int_map.max_binding_opt before,
Int_map.min_binding_opt after
with
| Some (pos_x, index_x), Some (pos_y, index_y) -> (
let diff_x = Int.to_float (Int.abs (pos_x - avg_pos)) in
let diff_y = Int.to_float (Int.abs (pos_y - avg_pos)) in
(* We prefer picking y (link after search result)
over x (link before search result), as it usually feels more
intuitive to jump forward than backward.
But if distance to x is <= 50% the distance
to y, then we resort to x.
*)
if diff_x /. diff_y <= 0.5 then (
Some index_x
) else (
let link_x = links.(index_x) in
let end_inc_pos_x = link_x.Link.end_inc_pos in
if pos_x <= avg_pos && avg_pos <= end_inc_pos_x then (
Some index_x
) else (
Some index_y
)
)
)
| Some (_pos, index), None
| None, Some (_pos, index) -> Some index
| None, None -> None
)
in
match index with
| None -> ()
| Some index -> (
UI_base.set_link_selected
~choice_count:link_choice_count
index
)
);
`Handled
)
| (`ASCII 's', []) -> (
UI_base.set_input_mode (Sort `Asc);
`Handled
)
| (`ASCII 'S', []) -> (
UI_base.set_input_mode (Sort `Desc);
`Handled
)
| (`Enter, []) -> (
Option.iter (fun (doc, search_results) ->
let search_result =
if search_result_current_choice < Array.length search_results then
Some search_results.(search_result_current_choice)
else
None
in
Lwd.set UI_base.Vars.quit true;
UI_base.Vars.action :=
Some (UI_base.Open_file_and_search_result (doc, search_result));
)
search_result_group;
`Handled
)
| _ -> `Handled
)
| Sort order -> (
let exit =
match key with
| (`Escape, []) -> true
| (`ASCII 's', []) -> (
sort (`Score, order);
true
)
| (`ASCII 'p', []) -> (
sort (`Path, order);
true
)
| (`ASCII 'd', []) -> (
sort (`Path_date, order);
true
)
| (`ASCII 'm', []) -> (
sort (`Mod_time, order);
true
)
| _ -> false
in
if exit then (
UI_base.set_input_mode Navigate;
);
`Handled
)
| Clear -> (
let exit =
match key with
| (`Escape, []) -> true
| (`ASCII '/', []) -> (
Lwd.set UI_base.Vars.search_field UI_base.empty_text_field;
update_search ~commit:true ();
true
)
| (`ASCII 'f', []) -> (
Lwd.set UI_base.Vars.filter_field UI_base.empty_text_field;
update_filter ~commit:true ();
true
)
| (`ASCII 'h', []) -> (
Lwd.set UI_base.Vars.quit true;
UI_base.Vars.action := Some UI_base.Clear_command_history;
true
)
| _ -> false
in
if exit then (
UI_base.set_input_mode Navigate;
);
`Handled
)
| Drop -> (
let exit =
match key with
| (`Escape, []) -> true
| (`ASCII '>', []) -> (
UI_base.Key_binding_info.incr_rotation ();
false
)
| (`ASCII 'd', []) -> (
Option.iter (fun (doc, _search_results) ->
drop ~document_count (`Path (Document.path doc))
) search_result_group;
true
)
| (`ASCII 'D', []) -> (
Option.iter (fun (doc, _search_results) ->
drop ~document_count (`All_except (Document.path doc))
) search_result_group;
true
)
| (`ASCII 'l', []) -> (
drop ~document_count `Listed;
true
)
| (`ASCII 'L', []) -> (
drop ~document_count `Unlisted;
true
)
| (`ASCII 'm', []) -> (
drop ~document_count `Marked;
true
)
| (`ASCII 'M', []) -> (
drop ~document_count `Unmarked;
true
)
| _ -> false
in
if exit then (
UI_base.set_input_mode Navigate;
);
`Handled
)
| Mark -> (
let exit =
match key with
| (`Escape, []) -> true
| (`ASCII '>', []) -> (
UI_base.Key_binding_info.incr_rotation ();
false
)
| (`ASCII 'l', []) -> (
mark `Listed;
true
)
| _ -> false
in
if exit then (
UI_base.set_input_mode Navigate;
);
`Handled
)
| Unmark -> (
let exit =
match key with
| (`Escape, []) -> true
| (`ASCII '>', []) -> (
UI_base.Key_binding_info.incr_rotation ();
false
)
| (`ASCII 'l', []) -> (
unmark `Listed;
true
)
| (`ASCII 'a', []) -> (
unmark `All;
true
)
| _ -> false
in
if exit then (
UI_base.set_input_mode Navigate;
);
`Handled
)
| Narrow -> (
let exit =
match key with
| (`Escape, []) -> true
| (`ASCII '>', []) -> (
UI_base.Key_binding_info.incr_rotation ();
false
)
| (`ASCII c, []) -> (
let code_0 = Char.code '0' in
let code_9 = Char.code '9' in
let code_c = Char.code c in
if code_0 <= code_c && code_c <= code_9 then (
let level = code_c - code_0 in
narrow_search_scope_to_level ~level;
true
) else (
false
)
)
| _ -> false
in
if exit then (
UI_base.set_input_mode Navigate;
);
`Handled
)
| Copy -> (
let copy_search_result_groups (s : Session.search_result_group Seq.t) =
Clipboard.pipe_to_clipboard (fun oc ->
Printers.search_result_groups
~color:false
~underline:true
oc
s
)
in
let copy_search_result_group x =
copy_search_result_groups (Seq.return x)
in
let exit =
match key with
| (`Escape, []) -> true
| (`ASCII '>', []) -> (
UI_base.Key_binding_info.incr_rotation ();
false
)
| (`ASCII 'y', []) -> (
Option.iter (fun (doc, search_results) ->
copy_search_result_group
(doc,
(if search_result_current_choice < Array.length search_results then
[|search_results.(search_result_current_choice)|]
else
[||])
)
)
search_result_group;
true
)
| (`ASCII 'a', []) -> (
Option.iter
copy_search_result_group
search_result_group;
true
)
| (`ASCII 'm', []) -> (
let marked =
Session.State.marked_document_paths session_state
in
search_result_groups
|> Array.to_seq
|> Seq.filter (fun (doc, _) ->
String_set.mem (Document.path doc) marked)
|> copy_search_result_groups;
true
)
| (`ASCII 'l', []) -> (
search_result_groups
|> Array.to_seq
|> copy_search_result_groups;
true
)
| _ -> false
in
if exit then (
UI_base.set_input_mode Navigate;
);
`Handled
)
| Copy_paths -> (
let copy_paths s =
Clipboard.pipe_to_clipboard (fun oc ->
Seq.iter (Printers.path_image ~color:false oc) s
)
in
let exit =
match key with
| (`Escape, []) -> true
| (`ASCII '>', []) -> (
UI_base.Key_binding_info.incr_rotation ();
false
)
| (`ASCII 'y', []) -> (
Option.iter (fun (doc, _search_results) ->
copy_paths (Seq.return (Document.path doc))
)
search_result_group;
true
)
| (`ASCII 'm', []) -> (
String_set.inter
(Session.State.usable_document_paths session_state)
(Session.State.marked_document_paths session_state)
|> String_set.to_seq
|> copy_paths;
true
)
| (`ASCII 'M', []) -> (
String_set.diff
(Session.State.usable_document_paths session_state)
(Session.State.marked_document_paths session_state)
|> String_set.to_seq
|> copy_paths;
true
)
| (`ASCII 'l', []) -> (
Session.State.usable_document_paths session_state
|> String_set.to_seq
|> copy_paths;
true
)
| (`ASCII 'L', []) -> (
Session.State.unusable_document_paths session_state
|> copy_paths;
true
)
| _ -> false
in
if exit then (
UI_base.set_input_mode Navigate;
);
`Handled
)
| Reload -> (
let exit =
(match key with
| (`Escape, []) -> true
| (`ASCII '>', []) -> (
UI_base.Key_binding_info.incr_rotation ();
false
)
| (`ASCII 'r', []) -> (
reload_document_selected ~search_result_groups;
true
)
| (`ASCII 'a', []) -> (
UI_base.reset_document_selected ();
Lwd.set UI_base.Vars.quit true;
UI_base.Vars.action := Some UI_base.Recompute_document_src;
true
)
| _ -> false
);
in
if exit then (
UI_base.set_input_mode Navigate;
);
`Handled
)
| Links -> (
let doc_and_link =
if link_choice_count > 0 then (
Option.map (fun (doc, _search_results) ->
(doc, (Document.links doc).(link_current_choice))
) search_result_group
) else (
None
)
in
let set_action_to_open_link () =
Option.iter (fun (doc, link) ->
Lwd.set UI_base.Vars.quit true;
UI_base.Vars.action :=
Some (UI_base.Open_link (doc, link))
) doc_and_link
in
let exit =
(match key with
| (`Escape, []) -> true
| (`Enter, []) -> (
set_action_to_open_link ();
true
)
| (`ASCII 'o', []) -> (
set_action_to_open_link ();
false
)
| (`Page `Down, [])
| (`ASCII 'j', [])
| (`Arrow `Down, []) -> (
UI_base.set_link_selected
~choice_count:link_choice_count
(link_current_choice+1);
false
)
| (`Page `Up, [])
| (`ASCII 'k', [])
| (`Arrow `Up, []) -> (
UI_base.set_link_selected
~choice_count:link_choice_count
(link_current_choice-1);
false
)
| (`ASCII 'y', []) -> (
Option.iter (fun (_doc, link) ->
Clipboard.pipe_to_clipboard (fun oc ->
output_string oc link.Link.link
);
) doc_and_link;
true
)
| _ -> false
);
in
if exit then (
UI_base.set_input_mode Navigate;
);
`Handled
)
| Save_script_overwrite script_name -> (
(match key with
| (`Escape, [])
| (`ASCII 'n', []) -> (
UI_base.set_input_mode Navigate;
)
| (`ASCII 'y', []) -> (
let path = compute_save_script_path script_name in
save_script ~path;
UI_base.set_input_mode (Save_script_edit script_name);
)
| _ -> ()
);
`Handled
)
| Save_script_no_name -> (
let exit =
(match key with
| (`Enter, []) -> true
| _ -> false
);
in
if exit then (
UI_base.set_input_mode Navigate;
);
`Handled
)
| Save_script_edit script_name -> (
let exit =
(match key with
| (`Escape, [])
| (`ASCII 'n', []) -> true
| (`ASCII 'y', []) -> (
let path = compute_save_script_path script_name in
Lwd.set UI_base.Vars.quit true;
UI_base.Vars.action := Some (UI_base.Edit_script path);
true
)
| _ -> false
);
in
if exit then (
UI_base.set_input_mode Navigate;
);
`Handled
)
| Delete_script_confirm (_script, path) -> (
(match key with
| (`Escape, [])
| (`ASCII 'n', []) -> (
UI_base.set_input_mode Scripts;
)
| (`ASCII 'y', []) -> (
Sys.remove path;
refresh_script_files ();
UI_base.set_input_mode Scripts;
)
| _ -> ()
);
`Handled
)
| _ -> `Unhandled
let main : Nottui.ui Lwd.t =
let$* (_, snapshot) =
Session_manager.cur_snapshot
in
let session_state =
Session.Snapshot.state snapshot
in
let search_result_groups =
Session.State.search_result_groups session_state
in
let document_count = Array.length search_result_groups in
UI_base.set_document_selected
~choice_count:document_count
(Lwd.peek UI_base.Vars.index_of_document_selected);
if document_count > 0 then (
UI_base.set_search_result_selected
~choice_count:(Array.length
(snd search_result_groups.(Lwd.peek UI_base.Vars.index_of_document_selected)))
(Lwd.peek UI_base.Vars.index_of_search_result_selected)
);
if document_count > 0 then (
UI_base.set_link_selected
~choice_count:(search_result_groups.(Lwd.peek UI_base.Vars.index_of_document_selected)
|> fst
|> Document.links
|> Array.length)
(Lwd.peek UI_base.Vars.index_of_link_selected)
);
let$* (term_width, term_height) = Lwd.get UI_base.Vars.term_width_height in
let show_key_binding_info_pane = Session.State.show_pane session_state `Key_binding_info in
let$* bottom_pane =
Bottom_pane.main
~width:term_width
~search_result_groups
~show_key_binding_info_pane
in
let bottom_pane_height = Nottui.Ui.layout_height bottom_pane in
let top_pane_height = term_height - bottom_pane_height in
let screen_split = Session.State.screen_split session_state in
let show_bottom_right_pane = Session.State.show_pane session_state `Bottom_right in
let$* top_pane =
Top_pane.main
~width:term_width
~height:top_pane_height
~documents_marked:(Session.State.marked_document_paths session_state)
~screen_split
~show_bottom_right_pane
~search_result_groups
in
Nottui_widgets.vbox
[
Lwd.return (Nottui.Ui.keyboard_area
(keyboard_handler ~session_state ~search_result_groups)
top_pane);
Lwd.return bottom_pane;
]
================================================
FILE: bin/UI_base.ml
================================================
open Docfd_lib
open Lwd_infix
type input_mode =
| Navigate
| Search
| Filter
| Clear
| Sort of [ `Asc | `Desc ]
| Drop
| Mark
| Unmark
| Narrow
| Copy
| Copy_paths
| Reload
| Save_script
| Save_script_overwrite of string
| Save_script_no_name
| Save_script_edit of string
| Scripts
| Delete_script_confirm of string * string
| Links
| Path_fuzzy_rank
[@@deriving ord]
module Input_mode_map = Map.Make (struct
type t = input_mode
let compare x y =
match x, y with
| Save_script_overwrite _, Save_script_overwrite _ -> 0
| Save_script_edit _, Save_script_edit _ -> 0
| Delete_script_confirm _, Delete_script_confirm _ -> 0
| _, _ -> compare_input_mode x y
end)
type top_level_action =
| Recompute_document_src
| Open_file_and_search_result of Document.t * Search_result.t option
| Open_link of (Document.t * Link.t)
| Clear_command_history
| Edit_command_history
| Open_script of string
| Edit_script of string
type search_status = [
| `Idle
| `Searching
| `Parse_error
]
type filter_status = [
| `Idle
| `Filtering
| `Parse_error
]
let empty_text_field = ("", 0)
let render_mode_of_document (doc : Document.t)
: Content_and_search_result_rendering.render_mode =
match File_utils.format_of_file (Document.path doc) with
| `PDF -> `Page_num_only
| `Pandoc_supported_format -> `None
| `Text | `Other -> `Line_num_only
module Vars = struct
let quit = Lwd.var false
let pool : Task_pool.t option Atomic.t = Atomic.make None
let action : top_level_action option ref = ref None
let eio_env : Eio_unix.Stdenv.base option ref = ref None
let hide_document_list : bool Lwd.var = Lwd.var false
let input_mode : input_mode Lwd.var = Lwd.var Navigate
let document_src : Document_src.t ref = ref (Document_src.(Files empty_file_collection))
let term : Notty_unix.Term.t option ref = ref None
let term_width_height : (int * int) Lwd.var = Lwd.var (0, 0)
let content_view_offset = Lwd.var 0
let autocomplete_choices = Lwd.var []
let filter_field = Lwd.var empty_text_field
let filter_field_focus_handle = Nottui.Focus.make ()
let search_field = Lwd.var empty_text_field
let search_field_focus_handle = Nottui.Focus.make ()
let search_ui_status : search_status Lwd.var = Lwd.var `Idle
let filter_ui_status : filter_status Lwd.var = Lwd.var `Idle
let index_of_document_selected = Lwd.var 0
let index_of_search_result_selected = Lwd.var 0
let index_of_link_selected = Lwd.var 0
let index_of_script_selected = Lwd.var 0
end
let reset_content_view_offset () =
Lwd.set Vars.content_view_offset 0
let decr_content_view_offset () =
let x = Lwd.peek Vars.content_view_offset in
Lwd.set Vars.content_view_offset (x - 1)
let incr_content_view_offset () =
let x = Lwd.peek Vars.content_view_offset in
Lwd.set Vars.content_view_offset (x + 1)
let reset_document_selected () =
reset_content_view_offset ();
Lwd.set Vars.index_of_document_selected 0;
Lwd.set Vars.index_of_search_result_selected 0;
Lwd.set Vars.index_of_link_selected 0
let set_document_selected ~choice_count n =
let n = Misc_utils.bound_selection ~choice_count n in
let old = Lwd.peek Vars.index_of_document_selected in
if old <> n then (
reset_content_view_offset ();
Lwd.set Vars.index_of_document_selected n;
Lwd.set Vars.index_of_search_result_selected 0;
Lwd.set Vars.index_of_link_selected 0;
)
let set_search_result_selected ~choice_count n =
let old = Lwd.peek Vars.index_of_search_result_selected in
if old <> n then (
reset_content_view_offset ();
let n = Misc_utils.bound_selection ~choice_count n in
Lwd.set Vars.index_of_search_result_selected n
)
let set_link_selected ~choice_count n =
let old = Lwd.peek Vars.index_of_link_selected in
if old <> n then (
reset_content_view_offset ();
let n = Misc_utils.bound_selection ~choice_count n in
Lwd.set Vars.index_of_link_selected n
)
let set_script_selected ~choice_count n =
let old = Lwd.peek Vars.index_of_script_selected in
if old <> n then (
let n = Misc_utils.bound_selection ~choice_count n in
Lwd.set Vars.index_of_script_selected n
)
let task_pool () =
Option.get (Atomic.get Vars.pool)
let eio_env () =
Option.get !Vars.eio_env
let term () =
Option.get !Vars.term
let full_term_sized_background =
let$ (term_width, term_height) = Lwd.get Vars.term_width_height in
Notty.I.void term_width term_height
|> Nottui.Ui.atom
let vbar ~height =
let uc = Uchar.of_int 0x2502 in
Notty.I.uchar Notty.A.(fg white) uc 1 height
|> Nottui.Ui.atom
let hbar ~width =
let uc = Uchar.of_int 0x2015 in
Notty.I.uchar Notty.A.(fg white) uc width 1
|> Nottui.Ui.atom
let hpane
~l_ratio
~width
~height
(x : width:int -> Nottui.ui Lwd.t)
(y : width:int -> Nottui.ui Lwd.t)
: Nottui.ui Lwd.t =
let l_width =
(* Minus 1 for pane separator bar. *)
Int.to_float width *. l_ratio
|> Float.floor
|> Int.of_float
|> (fun x ->
if x = 0 || x = width then (
x
) else (
x - 1
))
in
let r_width =
(* Minus 1 here too just to be conservative. *)
width - l_width - 1
in
let crop w x = Nottui.Ui.resize ~w ~h:height x in
let x () =
let$ x = x ~width:l_width in
crop l_width x
in
let y () =
let$ y = y ~width:r_width in
crop r_width y
in
if l_width = 0 then (
y ()
) else if r_width = 0 then (
x ()
) else (
let$* x = x () in
let$ y = y () in
Nottui.Ui.hcat [
x;
vbar ~height;
y;
]
)
let vpane
~width
~height
(x : height:int -> Nottui.ui Lwd.t)
(y : height:int -> Nottui.ui Lwd.t)
: Nottui.ui Lwd.t =
let t_height =
(Misc_utils.div_round_up height 2)
in
let b_height =
(* Minus 1 for pane separator bar. *)
(height / 2) - 1
in
let$* x = x ~height:t_height in
let$ y = y ~height:b_height in
let crop h x = Nottui.Ui.resize ~w:width ~h x in
Nottui.Ui.vcat [
crop t_height x;
hbar ~width;
crop b_height y;
]
let (mini, maxi, clampi) = Lwd_utils.(mini, maxi, clampi)
(* Modified from upstream Nottui source code. *)
let edit_field
~focus
~on_change
~on_submit
~on_cancel
?(on_tab : ((string * int) -> unit) option)
?(on_up_down : ([ `Up | `Down ] -> (string * int) -> unit) option)
?(on_ctrl_prefixed : (Nottui.Ui.key -> (string * int) -> [ `Handled | `Unhandled ]) option)
state
=
let update _focus_h focus (text, pos) =
let pos = clampi pos ~min:0 ~max:(String.length text) in
let content =
Nottui.Ui.atom @@ Notty.I.hcat @@
if Nottui.Focus.has_focus focus then (
let attr = Notty.A.(bg lightblue) in
let len = String.length text in
(if pos >= len
then [Notty.I.string attr text]
else [Notty.I.string attr (String.sub text 0 pos)])
@
(if pos < String.length text then
[Notty.I.string Notty.A.(bg lightred) (String.sub text pos 1);
Notty.I.string attr (String.sub text (pos + 1) (len - pos - 1))]
else [Notty.I.string Notty.A.(bg lightred) " "]);
) else
[Notty.I.string Notty.A.(st underline) (if text = "" then " " else text)]
in
let handler = function
| `Escape, [] -> (
on_cancel (text, pos);
`Handled
)
| `ASCII k, [] -> (
let text =
if pos < String.length text then (
String.sub text 0 pos ^ String.make 1 k ^
String.sub text pos (String.length text - pos)
) else (
text ^ String.make 1 k
)
in
on_change (text, (pos + 1));
`Handled
)
| `Backspace, _ -> (
let text =
if pos > 0 then (
if pos < String.length text then (
String.sub text 0 (pos - 1) ^
String.sub text pos (String.length text - pos)
) else if String.length text > 0 then (
String.sub text 0 (String.length text - 1)
) else text
) else text
in
let pos = maxi 0 (pos - 1) in
on_change (text, pos);
`Handled
)
| `Enter, _ -> (
on_submit (text, pos);
`Handled
)
| `Arrow `Left, [] -> (
let pos = mini (String.length text) pos in
if pos > 0 then (
on_change (text, pos - 1);
`Handled
)
else `Unhandled
)
| `Arrow `Right, [] -> (
let pos = pos + 1 in
if pos <= String.length text
then (on_change (text, pos); `Handled)
else `Unhandled
)
| (`Arrow (`Up as ud), [])
| (`Arrow (`Down as ud), []) -> (
match on_up_down with
| None -> `Unhandled
| Some on_up_down -> (
on_up_down ud (text, pos);
`Handled
)
)
| (`Tab, []) -> (
match on_tab with
| None -> `Unhandled
| Some on_tab -> (
on_tab (text, pos);
`Handled
)
)
| (_, [`Ctrl]) as x -> (
match on_ctrl_prefixed with
| None -> `Unhandled
| Some f -> (
f x (text, pos)
)
)
| _ -> `Unhandled
in
Nottui.Ui.keyboard_area ~focus handler content
in
let state = Lwd.get state in
let node =
Lwd.map2 ~f:(update focus) (Nottui.Focus.status focus) state
in
let mouse_grab (text, pos) ~x ~y:_ = function
| `Left ->
if x <> pos then on_change (text, x);
Nottui.Focus.request focus;
`Handled
| _ -> `Unhandled
in
Lwd.map2 state node ~f:(fun state content ->
Nottui.Ui.mouse_area (mouse_grab state) content
)
let mouse_handler
~(f : [ `Up | `Down ] -> unit)
~x ~y
(button : Notty.Unescape.button)
=
let _ = x in
let _ = y in
match button with
| `Scroll `Down -> (
f `Down;
`Handled
)
| `Scroll `Up -> (
f `Up;
`Handled
)
| _ -> `Unhandled
module Content_view = struct
let main
~(input_mode : input_mode)
~height
~width
~(search_result_group : Document.t * Search_result.t array)
~(search_result_selected : int)
~(link_selected : int)
: Nottui.ui Lwd.t =
let (document, search_results) = search_result_group in
let links = Document.links document in
let data =
let search_result_count = Array.length search_results in
let link_count = Array.length links in
match input_mode with
| Links -> (
if link_count = 0 then (
None
) else (
Some (`Link links.(link_selected))
)
)
| _ -> (
if search_result_count = 0 then (
None
) else (
Some (`Search_result search_results.(search_result_selected))
)
)
in
let$* _ = Lwd.get Vars.content_view_offset in
let content =
Content_and_search_result_rendering.content_snippet
~doc_id:(Document.doc_id document)
~view_offset:Vars.content_view_offset
?data
~height
~width
()
in
let$* background = full_term_sized_background in
Nottui.Ui.join_z background (Nottui.Ui.atom content)
|> Nottui.Ui.mouse_area
(mouse_handler
~f:(fun direction ->
match direction with
| `Up -> decr_content_view_offset ()
| `Down -> incr_content_view_offset ()
)
)
|> Lwd.return
end
module Status_bar = struct
let fg_color = Notty.A.black
let bg_color = Notty.A.white
let attr = Notty.A.(bg bg_color ++ fg fg_color)
let background_bar : Nottui.Ui.t Lwd.t =
let$ (term_width, _term_height) = Lwd.get Vars.term_width_height in
Notty.I.char Notty.A.(bg bg_color) ' ' term_width 1
|> Nottui.Ui.atom
let element_spacing = 4
let element_spacer =
Notty.(I.string
A.(bg bg_color ++ fg fg_color))
(String.make element_spacing ' ')
let input_mode_images =
let l =
[ (Navigate, "NAVIGATE")
; (Search, "SEARCH")
; (Filter, "FILTER")
; (Clear, "CLEAR")
; (Sort `Asc, "SORT-ASC")
; (Sort `Desc, "SORT-DESC")
; (Drop, "DROP")
; (Mark, "MARK")
; (Unmark, "UNMARK")
; (Narrow, "NARROW")
; (Copy, "COPY")
; (Copy_paths, "COPY-PATHS")
; (Reload, "RELOAD")
; (Save_script, "SAVE-SCRIPT")
; (Save_script_overwrite "", "SAVE-SCRIPT")
; (Save_script_no_name, "SAVE-SCRIPT")
; (Save_script_edit "", "SAVE-SCRIPT")
; (Scripts, "SCRIPTS")
; (Delete_script_confirm ("", ""), "DELETE-SCRIPT")
; (Links, "LINKS")
; (Path_fuzzy_rank, "PATH-FUZZY-RANK")
]
in
let max_input_mode_string_len =
List.fold_left (fun acc (_, s) ->
max acc (String.length s)
)
0
l
in
let input_mode_string_background =
Notty.I.char Notty.A.(bg bg_color) ' ' max_input_mode_string_len 1
in
List.fold_left (fun m (mode, s) ->
let s = Notty.(I.string A.(bg bg_color ++ fg fg_color ++ st bold) s) in
Input_mode_map.add mode Notty.I.(s > input_mode_string_background) m
)
Input_mode_map.empty
l
end
module Key_binding_info = struct
let rotation : int Lwd.var = Lwd.var 0
let incr_rotation () =
Lwd.set rotation (Lwd.peek rotation + 1)
let decr_rotation () =
Lwd.set rotation (Lwd.peek rotation - 1)
let reset_rotation () =
Lwd.set rotation 0
type labelled_msg = {
label : string;
msg : string;
}
type labelled_msg_line = labelled_msg list
type grid_contents = (input_mode * (labelled_msg_line list)) list
type grid_lookup = Nottui.ui Lwd.t Input_mode_map.t
let grid_lights : (string, Mtime.t ref * bool Lwd.var list) Hashtbl.t = Hashtbl.create 100
let lock = Eio.Mutex.create ()
let grid_light_on_req : string Eio.Stream.t = Eio.Stream.create 100
let grid_light_off_req : (Mtime.t * Mtime.t * string) Eio.Stream.t = Eio.Stream.create 100
let blink label =
Eio.Stream.add grid_light_on_req label
let grid_light_fiber () =
let clock = Eio.Stdenv.mono_clock (eio_env ()) in
Eio.Fiber.both
(fun () ->
while true do
let label = Eio.Stream.take grid_light_on_req in
let ts_now = Eio.Time.Mono.now clock in
Eio.Mutex.use_rw lock ~protect:false (fun () ->
match Hashtbl.find_opt grid_lights label with
| None -> failwith "unexpected case"
| Some (ts, l) -> (
ts := ts_now;
List.iter (fun x -> Lwd.set x true) l;
Eio.Stream.add
grid_light_off_req
(ts_now, Option.get (Mtime.(add_span ts_now Params.blink_on_duration)), label);
)
)
done
)
(fun () ->
while true do
let ts_req_time, ts_target_time, label = Eio.Stream.take grid_light_off_req in
Eio.Time.Mono.sleep_until clock ts_target_time;
Eio.Mutex.use_rw lock ~protect:false (fun () ->
match Hashtbl.find_opt grid_lights label with
| None -> failwith "unexpected case"
| Some (ts_last_update, l) -> (
if Mtime.equal !ts_last_update ts_req_time then (
List.iter (fun x -> Lwd.set x false) l;
)
)
)
done
)
let make_grid_lookup grid_contents : grid_lookup =
let max_label_msg_len_lookup : (input_mode * (int * int) Int_map.t) list =
grid_contents
|> List.map (fun (grid_key, grid) ->
let lookup =
List.fold_left
(fun (acc : (int * int) Int_map.t) (line : labelled_msg_line) ->
line
|> List.to_seq
|> Seq.fold_lefti
(fun (acc : (int * int) Int_map.t) col ({ label; msg } : labelled_msg) ->
let label_len =
Uuseg_string.fold_utf_8 `Grapheme_cluster (fun x _ -> x + 1) 0 label
in
let msg_len =
Uuseg_string.fold_utf_8 `Grapheme_cluster (fun x _ -> x + 1) 0 msg
in
let (max_label_len, max_msg_len) =
match Int_map.find_opt col acc with
| None -> (label_len, msg_len)
| Some (max_label_len, max_msg_len) -> (
(max max_label_len label_len,
max max_msg_len msg_len)
)
in
Int_map.add col (max_label_len, max_msg_len) acc
)
acc
)
Int_map.empty
grid
in
(grid_key, lookup)
)
in
let label_msg_pair grid_key col { label; msg } : Nottui.ui Lwd.t =
let (max_label_len, max_msg_len) =
List.assoc grid_key max_label_msg_len_lookup
|> Int_map.find col
in
let light_on_var = Lwd.var false in
Eio.Mutex.use_rw lock ~protect:false (fun () ->
let x =
match Hashtbl.find_opt grid_lights label with
| None -> (ref Mtime.min_stamp, [ light_on_var ])
| Some (x, l) -> (x, light_on_var :: l)
in
Hashtbl.replace grid_lights label x
);
let$ light_on = Lwd.get light_on_var in
let label_attr =
if light_on then
Notty.A.(fg black ++ bg lightyellow ++ st bold)
else
Notty.A.(fg lightyellow ++ st bold)
in
let msg_attr = Notty.A.empty in
let msg = String.capitalize_ascii msg in
let content = Notty.(I.hcat
[ I.(string label_attr label
>
(string label_attr (String.make max_label_len ' ')))
; I.string A.empty " "
; I.string msg_attr msg
]
)
in
let full_background =
Notty.I.void (max_label_len + 1 + max_msg_len + 3) 1
in
Notty.I.(content > full_background)
|> Nottui.Ui.atom
in
List.fold_left (fun m (grid_key, grid_contents) ->
let max_row_size =
List.fold_left (fun n l ->
max n (List.length l)
)
0
grid_contents
in
let grid_contents =
grid_contents
|> List.map (fun l ->
let padding =
List.init (max_row_size - List.length l)
(fun _ -> { label = ""; msg = "" })
in
List.mapi (fun col x ->
label_msg_pair grid_key col x
)
(l @ padding)
)
in
let grid =
let$* rotation = Lwd.get rotation in
grid_contents
|> List.map (fun l ->
Misc_utils.rotate_list
(((rotation mod max_row_size) + max_row_size)
mod
max_row_size
)
l
)
|> Nottui_widgets.grid
~pad:(Nottui.Gravity.make ~h:`Negative ~v:`Negative)
in
Input_mode_map.add grid_key grid m
)
Input_mode_map.empty
grid_contents
let main ~(grid_lookup : grid_lookup) ~(input_mode : input_mode) =
Input_mode_map.find input_mode grid_lookup
end
let filter_bar_label_string = "Document filter"
let search_bar_label_string = "Content search"
let max_label_length =
List.fold_left (fun acc s ->
max acc (String.length s)
)
0
[ filter_bar_label_string
; search_bar_label_string
]
let pad_label_string s =
CCString.pad ~side:`Right ~c:' ' max_label_length s
let autocomplete ~choices (text, pos) : string * int =
let left = String.sub text 0 pos in
let right = String.sub text pos (String.length text - pos) in
let grab_input_word (s : string) =
let rec aux acc i s =
if i < 0 then (
CCString.of_list acc
) else (
let c = s.[i] in
if Parser_components.is_alphanum c
|| c = '-'
|| c = ':'
then (
aux (c :: acc) (i - 1) s
) else (
aux acc (-1) s
)
)
in
aux [] (String.length s - 1) s
in
let current_input_word = grab_input_word left in
let usable_choices =
List.filter
(CCString.prefix ~pre:current_input_word)
choices
in
Lwd.set
Vars.autocomplete_choices usable_choices;
match usable_choices with
| [] -> (text, pos)
| _ -> (
let best_fit = usable_choices
|> List.to_seq
|> String_utils.longest_common_prefix
in
let left =
String.sub
left
0
(String.length left - String.length current_input_word)
in
(String.concat "" [ left; best_fit; right ],
pos + (String.length best_fit - String.length current_input_word))
)
module Filter_bar = struct
let label_string = pad_label_string filter_bar_label_string
let label ~(input_mode : input_mode) =
let attr =
match input_mode with
| Filter -> Notty.A.(st bold)
| _ -> Notty.A.empty
in
Notty.I.string attr label_string
|> Nottui.Ui.atom
|> Lwd.return
let status =
let$* status = Lwd.get Vars.filter_ui_status in
(match status with
| `Idle -> (
Notty.I.string Notty.A.(fg lightgreen)
" OK"
)
| `Filtering -> (
Notty.I.string Notty.A.(fg lightyellow)
" ..."
)
| `Parse_error -> (
Notty.I.string Notty.A.(fg lightred)
" ERR"
)
)
|> Nottui.Ui.atom
|> Lwd.return
let autocomplete_choices =
[ "path-date:"
; "path-fuzzy:"
; "path-glob:"
; "ext:"
; "content:"
; "mod-date:"
]
let main
~input_mode
~(text_field : (string * int) Lwd.var)
~focus_handle
~on_change
~on_submit
: Nottui.ui Lwd.t =
Nottui_widgets.hbox
[
label ~input_mode;
status;
Lwd.return (Nottui.Ui.atom (Notty.I.strf ": "));
edit_field text_field
~focus:focus_handle
~on_cancel:(fun (_text, _x) -> ())
~on_change:(fun (text, x) ->
Lwd.set text_field (text, x);
on_change ();
)
~on_submit:(fun (text, x) ->
Lwd.set text_field (text, x);
on_submit ();
Lwd.set Vars.autocomplete_choices [];
Nottui.Focus.release focus_handle;
Lwd.set Vars.input_mode Navigate
)
~on_tab:(fun (text, pos) ->
let (text, pos) =
autocomplete ~choices:autocomplete_choices (text, pos)
in
Lwd.set text_field (text, pos)
);
]
end
module Search_bar = struct
let label_string = pad_label_string search_bar_label_string
let label ~(input_mode : input_mode) =
let attr =
match input_mode with
| Search -> Notty.A.(st bold)
| _ -> Notty.A.empty
in
Notty.I.string attr label_string
|> Nottui.Ui.atom
|> Lwd.return
let status =
let$* status = Lwd.get Vars.search_ui_status in
(match status with
| `Idle -> (
Notty.I.string Notty.A.(fg lightgreen)
" OK"
)
| `Searching -> (
Notty.I.string Notty.A.(fg lightyellow)
" ..."
)
| `Parse_error -> (
Notty.I.string Notty.A.(fg lightred)
" ERR"
)
)
|> Nottui.Ui.atom
|> Lwd.return
let main
~input_mode
~(text_field : (string * int) Lwd.var)
~focus_handle
~on_change
~on_submit
: Nottui.ui Lwd.t =
Nottui_widgets.hbox
[
label ~input_mode;
status;
Lwd.return (Nottui.Ui.atom (Notty.I.strf ": "));
edit_field text_field
~focus:focus_handle
~on_cancel:(fun (_text, _x) -> ())
~on_change:(fun (text, x) ->
Lwd.set text_field (text, x);
on_change ();
)
~on_submit:(fun (text, x) ->
Lwd.set text_field (text, x);
on_submit ();
Lwd.set Vars.autocomplete_choices [];
Nottui.Focus.release focus_handle;
Lwd.set Vars.input_mode Navigate
)
~on_tab:(fun (_, _) -> ());
]
end
let term' : unit -> Notty_unix.Term.t = term
let ui_loop ~quit ~term root =
let renderer = Nottui.Renderer.make () in
let root =
let$ root = root in
root
(* |> Nottui.Ui.event_filter (fun x ->
match x with
| `Key (`Escape, []) -> (
Lwd.set quit true;
`Handled
)
| _ -> `Unhandled
) *)
in
let rec loop () =
if not (Lwd.peek quit) then (
let (term_width, term_height) = Notty_unix.Term.size (term' ()) in
let (prev_term_width, prev_term_height) = Lwd.peek Vars.term_width_height in
if term_width <> prev_term_width || term_height <> prev_term_height then (
Lwd.set Vars.term_width_height (term_width, term_height)
);
Nottui_unix.step
~process_event:true
~timeout:0.05
~renderer
term
(Lwd.observe @@ root);
Eio.Fiber.yield ();
loop ()
)
in
loop ()
let set_input_mode mode =
Lwd.set Vars.input_mode mode;
Key_binding_info.reset_rotation ()
================================================
FILE: bin/args.ml
================================================
open Cmdliner
open Docfd_lib
open Misc_utils
let no_pdftotext_arg_name = "no-pdftotext"
let no_pdftotext_arg =
let doc =
Fmt.str {|Disable use of pdftotext command.
Files that require use of pdftotext are excluded.
|}
in
Arg.(value & flag & info [ no_pdftotext_arg_name ] ~doc)
let no_pandoc_arg_name = "no-pandoc"
let no_pandoc_arg =
let doc =
Fmt.str {|Disable use of pandoc command.
Files that require use of pandoc are excluded.
|}
in
Arg.(value & flag & info [ no_pandoc_arg_name ] ~doc)
let hidden_arg_name = "hidden"
let hidden_arg =
let doc =
Fmt.str {|Scan hidden files and directories.
By default, hidden files and directories are skipped.
A file or directory is hidden if the base name starts
with a dot, e.g. ".gitignore".
|}
in
Arg.(value & flag & info [ hidden_arg_name ] ~doc)
let max_depth_arg_name = "max-depth"
let max_depth_arg =
let doc =
Fmt.str
"Scan up to N levels when exploring file trees.
This applies to directory paths provided
and ** in globs.
Note that --%s 0 results in no-op when scanning
directories, and --%s 1 means only scanning for
direct children."
max_depth_arg_name
max_depth_arg_name
in
Arg.(
value
& opt int Params.default_max_file_tree_scan_depth
& info [ max_depth_arg_name ] ~doc ~docv:"N"
)
let exts_arg_name = "exts"
let exts_arg =
let doc =
"File extensions to use, comma separated. Leading dots of any extension are removed."
in
Arg.(
value
& opt string Params.default_recognized_exts
& info [ exts_arg_name ] ~doc ~docv:"EXTS"
)
let single_file_exts_arg_name = Fmt.str "single-line-%s" exts_arg_name
let single_line_exts_arg =
let doc =
Fmt.str "Same as --%s, but use single line search mode instead.
If an extension appears in both --%s and --%s,
then single line search mode is used for that extension."
exts_arg_name
exts_arg_name
single_file_exts_arg_name
in
Arg.(
value
& opt string Params.default_recognized_single_line_exts
& info [ single_file_exts_arg_name ] ~doc ~docv:"EXTS"
)
let add_exts_arg_name = "add-exts"
let add_exts_arg =
let doc =
"Additional file extensions to use, comma separated.
May be specified multiple times."
in
Arg.(
value
& opt_all string []
& info [ add_exts_arg_name ] ~doc ~docv:"EXTS"
)
let single_line_add_exts_arg_name = Fmt.str "single-line-%s" add_exts_arg_name
let single_line_add_exts_arg =
let doc =
Fmt.str "Same as --%s, but use single line search mode instead." add_exts_arg_name
in
Arg.(
value
& opt_all string []
& info [ single_line_add_exts_arg_name ] ~doc ~docv:"EXTS"
)
let max_fuzzy_edit_dist_arg_name = "max-fuzzy-edit-dist"
let max_fuzzy_edit_dist_arg =
let doc =
"Maximum edit distance for fuzzy matches."
in
Arg.(
value
& opt int Params.default_max_fuzzy_edit_dist
& info [ max_fuzzy_edit_dist_arg_name ] ~doc ~docv:"N"
)
let max_token_search_dist_arg_name = "max-token-search-dist"
let max_token_search_dist_arg =
let doc =
"Maximum distance to look for the next matching token in document.
If two tokens are adjacent, then they are 1 distance away from each other.
Note that contiguous spaces count as one token as well."
in
Arg.(
value
& opt int Params.default_max_token_search_dist
& info [ max_token_search_dist_arg_name ] ~doc ~docv:"N"
)
let max_linked_token_search_dist_arg_name = "max-linked-token-search-dist"
let max_linked_token_search_dist_arg =
let doc =
Fmt.str
{|Similar to %s but for linked tokens.
Two tokens are linked if there is no space between them in the search phrase,
e.g. "-" and ">" are linked in "->" but not in "- >",
"and" "/" "or" are linked in "and/or" but not in "and / or".|}
max_token_search_dist_arg_name
in
Arg.(
value
& opt int Params.default_max_linked_token_search_dist
& info [ max_linked_token_search_dist_arg_name ] ~doc ~docv:"N"
)
let tokens_per_search_scope_level_arg_name = "tokens-per-search-scope-level"
let tokens_per_search_scope_level_arg =
let doc =
Fmt.str
{|Number of tokens to use around the current search
results for each search scope level in narrow mode.|}
in
Arg.(
value
& opt int Params.default_tokens_per_search_scope_level
& info [ tokens_per_search_scope_level_arg_name ] ~doc ~docv:"N"
)
let index_chunk_size_arg_name = "index-chunk-size"
let index_chunk_size_arg =
let doc =
"Number of tokens to send as a job unit to the thread pool for indexing."
in
Arg.(
value
& opt int Params.default_index_chunk_size
& info [ index_chunk_size_arg_name ] ~doc ~docv:"N"
)
let cache_dir_arg =
let doc =
"Docfd cache directory, mainly for index DB."
in
let cache_home = Xdg_utils.cache_home in
Arg.(
value
& opt string (Filename.concat cache_home "docfd")
& info [ "cache-dir" ] ~doc ~docv:"DIR"
)
let cache_limit_arg_name = "cache-limit"
let cache_limit_arg =
let doc =
"Maximum number of documents to keep in index.
Docfd resets the cache to this limit at launch."
in
Arg.(
value
& opt int Params.default_cache_limit
& info [ cache_limit_arg_name ] ~doc ~docv:"N"
)
let data_dir_arg =
let doc =
"Docfd data directory."
in
let data_home = Xdg_utils.data_home in
Arg.(
value
& opt string (Filename.concat data_home "docfd")
& info [ "data-dir" ] ~doc ~docv:"DIR"
)
let index_only_arg =
let doc =
Fmt.str "Exit after indexing."
in
Arg.(value & flag & info [ "index-only" ] ~doc)
let debug_log_arg =
let doc =
Fmt.str "Specify debug log file to use and enable debug mode where
additional checks are enabled and additional info is displayed on UI.
If FILE is -, then debug log is printed to stderr instead.
Otherwise FILE is opened in append mode for log writing."
in
Arg.(
value
& opt (some string) None
& info [ "debug-log" ] ~doc ~docv:"FILE"
)
let start_with_filter_arg_name = "start-with-filter"
let start_with_filter_arg =
let doc =
Fmt.str "Start interactive mode with an initial filter using expression EXP."
in
Arg.(
value
& opt (some string) None
& info [ start_with_filter_arg_name ] ~doc ~docv:"EXP"
)
let start_with_search_arg_name = "start-with-search"
let start_with_search_arg =
let doc =
Fmt.str "Start interactive mode with search expression EXP."
in
Arg.(
value
& opt (some string) None
& info [ start_with_search_arg_name ] ~doc ~docv:"EXP"
)
let sample_arg_name = "sample"
let samples_per_doc_arg_name = "samples-per-doc"
let sample_arg =
let doc =
Fmt.str "Search with expression EXP in non-interactive mode but only
show top N results where N is controlled by --%s."
samples_per_doc_arg_name
in
Arg.(
value
& opt (some string) None
& info [ sample_arg_name ] ~doc ~docv:"EXP"
)
let samples_per_doc_arg =
let doc =
Fmt.str
"Number of search results to show per document when --%s is used
or when samples printing is triggered."
sample_arg_name
in
Arg.(
value
& opt int Params.default_samples_per_document
& info [ samples_per_doc_arg_name ] ~doc ~docv:"N"
)
let search_arg_name = "search"
let search_arg =
let doc =
"Search with expression EXP in non-interactive mode and show all results."
in
Arg.(
value
& opt (some string) None
& info [ search_arg_name ] ~doc ~docv:"EXP"
)
let filter_arg_name = "filter"
let filter_arg =
let doc =
Fmt.str
"Filter with expression EXP in non-interactive mode. May be combined with --%s or --%s."
search_arg_name
sample_arg_name
in
Arg.(
value
& opt (some string) None
& info [ filter_arg_name ] ~doc ~docv:"EXP"
)
let sort_arg_name = "sort"
let sort_arg =
let doc =
Fmt.str
"Sort document by: TYPE,ORDER. TYPE is one of: path, path-date, score, mod-time. ORDER is one of: asc, desc."
in
Arg.(
value
& opt string Params.default_sort_by_arg
& info [ sort_arg_name ] ~doc ~docv:"TYPE,ORDER"
)
let style_mode_options = [ ("never", `Never); ("always", `Always); ("auto", `Auto) ]
let color_arg =
let doc =
Fmt.str
"Set color mode for search result printing, one of: %s."
(String.concat ", " (List.map fst style_mode_options))
in
Arg.(
value
& opt (Arg.enum style_mode_options) `Auto
& info [ "color" ] ~doc ~docv:"MODE"
)
let underline_arg =
let doc =
Fmt.str
"Set underline mode for search result printing, one of: %s."
(String.concat ", " (List.map fst style_mode_options))
in
Arg.(
value
& opt (Arg.enum style_mode_options) `Auto
& info [ "underline" ] ~doc ~docv:"MODE"
)
let search_result_print_text_width_arg_name = "search-result-print-text-width"
let search_result_print_text_width_arg =
let doc =
"Text width to use when printing search results."
in
Arg.(
value
& opt int Params.default_search_result_print_text_width
& info [ search_result_print_text_width_arg_name ] ~doc ~docv:"N"
)
let search_result_print_snippet_min_size_arg_name = "search-result-print-snippet-min-size"
let search_result_print_snippet_min_size_arg =
let doc =
"If the search result to be printed has fewer than N non-space tokens,
then Docfd tries to add surrounding lines to the snippet
to give better context."
in
Arg.(
value
& opt int Params.default_search_result_print_snippet_min_size
& info [ search_result_print_snippet_min_size_arg_name ] ~doc ~docv:"N"
)
let search_result_print_snippet_max_add_lines_arg_name = "search-result-print-snippet-max-add-lines"
let search_result_print_snippet_max_add_lines_arg =
let doc =
"This controls the maximum number of surrounding lines
Docfd can add in each direction."
in
Arg.(
value
& opt int Params.default_search_result_print_snippet_max_additional_lines_each_direction
& info [ search_result_print_snippet_max_add_lines_arg_name ] ~doc ~docv:"N"
)
let script_arg_name = "script"
let script_arg =
let doc =
Fmt.str "Read and run Docfd script FILE."
in
Arg.(
value
& opt (some string) None
& info [ script_arg_name ] ~doc ~docv:"FILE"
)
let start_with_script_arg_name = "start-with-script"
let start_with_script_arg =
let doc =
Fmt.str "Read and run Docfd script FILE, then continue in interactive mode."
in
Arg.(
value
& opt (some string) None
& info [ start_with_script_arg_name ] ~doc ~docv:"FILE"
)
let paths_from_arg_name = "paths-from"
let paths_from_arg =
let doc =
Fmt.str "Read list of paths from FILES,
which is a comma separated list of files,
and add to the final list of paths to be scanned.
For example, \"--%s path-list0.txt,path-list1.txt\".
If - is in FILES, then stdin is also read for
list of paths to be scanned. This is useful
for piping, e.g. \"find -name '*.txt' | docfd --%s -\""
paths_from_arg_name
paths_from_arg_name
in
Arg.(
value
& opt_all string []
& info [ paths_from_arg_name ] ~doc ~docv:"FILES"
)
let glob_arg_name = "glob"
let glob_arg =
let doc =
"Add to the final list of paths to be scanned using glob pattern.
The pattern should pick up the files directly.
Directories picked up by the pattern are not further scanned
for files with suitable extensions."
in
Arg.(
value
& opt_all string []
& info [ glob_arg_name ] ~doc ~docv:"PATTERN"
)
let single_line_glob_arg_name = Fmt.str "single-line-%s" glob_arg_name
let single_line_glob_arg =
let doc =
Fmt.str
"Same as --%s, but use single line search mode instead.
If the file are picked up by both patterns from --%s and --%s,
then single line search mode is used."
glob_arg_name
glob_arg_name
single_line_glob_arg_name
in
Arg.(
value
& opt_all string []
& info [ single_line_glob_arg_name ] ~doc ~docv:"PATTERN"
)
let single_line_arg =
let doc =
"Use single line search mode by default."
in
Arg.(
value
& flag
& info [ "single-line" ] ~doc
)
let open_with_arg_name = "open-with"
let open_with_arg =
let doc =
Fmt.str "Specify custom command CMD for
opening files with file extension EXT.
May be specified multiple times.
Leading dots of EXT are removed.
LAUNCH_MODE specifies how the command should be
executed:
`terminal` - for commands which Docfd
should run in the terminal and wait for completion,
e.g. text editors, pagers.
`detached` - for background
commands, such as PDF viewers or
other GUI tools.
CMD may contain the following placeholders:
{path} - file path,
{page_num} - page number (PDF only),
{line_num} - line number (not available in PDF),
{search_word} - most unique word of the page
(PDF only, useful for passing to PDF viewers as search term).
Examples: \"pdf:detached='okular --page {page_num} --find {search_word} {path}'\",
\"txt:terminal='nano +{line_num} {path}'\".
"
in
Arg.(
value
& opt_all string []
& info [ open_with_arg_name ] ~doc ~docv:"EXT:LAUNCH_MODE=CMD"
)
let files_with_match_arg_name = "files-with-match"
let files_with_match_arg =
let doc =
Fmt.str "If paired with
--%s or --%s,
then print the paths of documents with at least one match
instead of printing the search results.
If paired with --%s, then print paths of documents
that would have be listed in the UI
after running the commands in interactive mode."
search_arg_name
sample_arg_name
script_arg_name
in
Arg.(
value
& flag
& info [ "l"; files_with_match_arg_name ] ~doc
)
let files_without_match_arg_name = "files-without-match"
let files_without_match_arg =
let doc =
Fmt.str "If paired with
--%s or --%s,
then print the paths of documents with no matches
instead of printing the search results.
Cannot be paired with --%s."
search_arg_name
sample_arg_name
script_arg_name
in
Arg.(
value
& flag
& info [ files_without_match_arg_name ] ~doc
)
let sort_no_score_arg_name = "sort-no-score"
let sort_no_score_arg =
let doc =
Fmt.str
"Same as --%s but sorting TYPE cannot be score. Used for scenarios when no scores are available, e.g. --%s is used."
sort_no_score_arg_name
files_without_match_arg_name
in
Arg.(
value
& opt string Params.default_sort_by_no_score_arg
& info [ sort_no_score_arg_name ] ~doc ~docv:"TYPE,ORDER"
)
let paths_arg =
let doc =
Fmt.str
"PATH can be either file or directory.
Directories are scanned for files with matching extensions.
If no paths are provided,
then Docfd defaults to scanning the current working directory
unless any of the following is used: %a.
To use piped stdin as input, the list of paths must be empty."
Fmt.(list ~sep:comma (fun fmt s -> Fmt.pf fmt "--%s" s))
[ paths_from_arg_name; glob_arg_name; single_line_glob_arg_name ]
in
Arg.(value & pos_all string [] & info [] ~doc ~docv:"PATH")
let check
~max_depth
~max_fuzzy_edit_dist
~max_token_search_dist
~max_linked_token_search_dist
~tokens_per_search_scope_level
~index_chunk_size
~cache_limit
~start_with_filter
~start_with_search
~filter_exp
~sample_search_exp
~samples_per_doc
~search_exp
~search_result_print_text_width
~search_result_print_snippet_min_size
~search_result_print_max_add_lines
~start_with_script
~script
~paths_from
~print_files_with_match
~print_files_without_match
=
if max_depth < 0 then (
exit_with_error_msg
(Fmt.str "invalid %s: cannot be < 0" max_depth_arg_name)
);
if max_fuzzy_edit_dist < 0 then (
exit_with_error_msg
(Fmt.str "invalid %s: cannot be < 0" max_fuzzy_edit_dist_arg_name)
);
if max_token_search_dist < 1 then (
exit_with_error_msg
(Fmt.str "invalid %s: cannot be < 1" max_token_search_dist_arg_name)
);
if max_linked_token_search_dist < 1 then (
exit_with_error_msg
(Fmt.str "invalid %s: cannot be < 1" max_linked_token_search_dist_arg_name)
);
if tokens_per_search_scope_level < 1 then (
exit_with_error_msg
(Fmt.str "invalid %s: cannot be < 1" tokens_per_search_scope_level_arg_name)
);
if index_chunk_size < 1 then (
exit_with_error_msg
(Fmt.str "invalid %s: cannot be < 1" index_chunk_size_arg_name)
);
if cache_limit < 1 then (
exit_with_error_msg
(Fmt.str "invalid %s: cannot be < 1" cache_limit_arg_name)
);
if samples_per_doc < 1 then (
exit_with_error_msg
(Fmt.str "invalid %s: cannot be < 1" samples_per_doc_arg_name)
);
if search_result_print_text_width < 1 then (
exit_with_error_msg
(Fmt.str "invalid %s: cannot be < 1" search_result_print_text_width_arg_name)
);
if search_result_print_snippet_min_size < 0 then (
exit_with_error_msg
(Fmt.str "invalid %s: cannot be < 0" search_result_print_snippet_min_size_arg_name)
);
if search_result_print_max_add_lines < 0 then (
exit_with_error_msg
(Fmt.str "invalid %s: cannot be < 0" search_result_print_snippet_max_add_lines_arg_name)
);
if Option.is_some filter_exp then (
if not (
Option.is_some sample_search_exp
||
Option.is_some search_exp
||
print_files_with_match
||
print_files_without_match
)
then (
exit_with_error_msg
(Fmt.str "--%s must be used with at least one of: --%s, --%s, --%s, --%s"
filter_arg_name
search_arg_name
sample_arg_name
files_with_match_arg_name
files_without_match_arg_name
)
)
);
let cannot_be_used_together x y =
exit_with_error_msg
(Fmt.str "--%s and --%s cannot be used together" x y)
in
(match print_files_with_match, print_files_without_match with
| true, true -> (
cannot_be_used_together
files_with_match_arg_name files_without_match_arg_name
)
| true, false -> (
if not (
Option.is_some filter_exp
||
Option.is_some sample_search_exp
||
Option.is_some search_exp
||
Option.is_some script
)
then (
exit_with_error_msg
(Fmt.str "--%s cannot be used without one of: --%s, --%s, --%s, --%s"
files_with_match_arg_name
filter_arg_name
sample_arg_name
search_arg_name
script_arg_name
)
)
)
| false, true -> (
if not (
Option.is_some filter_exp
||
Option.is_some sample_search_exp
||
Option.is_some search_exp
)
then (
exit_with_error_msg
(Fmt.str "--%s cannot be used without one of: --%s, --%s, --%s"
files_without_match_arg_name
filter_arg_name
sample_arg_name
search_arg_name
)
)
)
| false, false -> ()
);
(
let l = List.filter (fun x -> x = "-") paths_from in
if List.length l > 1 then (
exit_with_error_msg
(Fmt.str "at most one \"-\" may be supplied to --%s" paths_from_arg_name)
)
);
(match filter_exp with
| None -> ()
| Some filter_exp_string -> (
match
Filter_exp.parse filter_exp_string
with
| None -> (
exit_with_error_msg "failed to parse filter exp"
)
| Some _ -> ()
)
);
(match sample_search_exp, search_exp with
| None, None -> ()
| Some _, Some _ -> (
exit_with_error_msg
(Fmt.str "%s and %s cannot be used together" sample_arg_name search_arg_name)
)
| Some search_exp_string, None
| None, Some search_exp_string -> (
match
Search_exp.parse search_exp_string
with
| None -> (
exit_with_error_msg "failed to parse search exp"
)
| Some _ -> ()
)
);
let start_with_arg_check ~arg_name =
if Option.is_some filter_exp then (
cannot_be_used_together arg_name filter_arg_name
);
if Option.is_some sample_search_exp then (
cannot_be_used_together arg_name sample_arg_name
);
if Option.is_some search_exp then (
cannot_be_used_together arg_name search_arg_name
);
in
if Option.is_some start_with_filter then (
start_with_arg_check ~arg_name:start_with_filter_arg_name
);
if Option.is_some start_with_search then (
start_with_arg_check ~arg_name:start_with_search_arg_name
);
if Option.is_some start_with_script then (
start_with_arg_check ~arg_name:start_with_script_arg_name
);
let script_common_check ~arg_name =
if Option.is_some filter_exp then (
cannot_be_used_together arg_name filter_arg_name
);
if Option.is_some sample_search_exp then (
cannot_be_used_together arg_name sample_arg_name
);
if Option.is_some search_exp then (
cannot_be_used_together arg_name search_arg_name
);
if Option.is_some start_with_filter then (
cannot_be_used_together arg_name start_with_filter_arg_name
);
if Option.is_some start_with_search then (
cannot_be_used_together arg_name start_with_search_arg_name
);
if print_files_without_match then (
cannot_be_used_together arg_name files_without_match_arg_name
);
in
if Option.is_some script then (
script_common_check ~arg_name:script_arg_name;
if Option.is_some start_with_script then (
cannot_be_used_together script_arg_name start_with_script_arg_name
);
);
if Option.is_some start_with_script then (
script_common_check ~arg_name:start_with_script_arg_name;
)
================================================
FILE: bin/clipboard.ml
================================================
let pipe_to_clipboard (f : out_channel -> unit) : unit =
match Params.clipboard_copy_cmd_and_args with
| None -> ()
| Some (cmd, args) -> (
Proc_utils.pipe_to_command f
cmd args
)
================================================
FILE: bin/command.ml
================================================
open Docfd_lib
module Sort_by = struct
type typ = [
| `Path_date
| `Path
| `Score
| `Mod_time
]
type t = typ * Document.Compare.order
let default : t = (`Score, `Desc)
let default_no_score : t = (`Path, `Asc)
let pp formatter ((typ, order) : t) =
Fmt.pf formatter "%s,%s"
(match typ with
| `Path_date -> "path-date"
| `Path -> "path"
| `Score -> "score"
| `Mod_time -> "mod-time"
)
(match order with
| `Asc -> "asc"
| `Desc -> "desc"
)
let p ~no_score : t Angstrom.t =
let open Angstrom in
let open Parser_components in
skip_spaces *>
(choice (List.filter_map
Fun.id ([
Some (string "path-date" *> return `Path_date);
Some (string "path" *> return `Path);
(if no_score then
None
else
Some (string "score" *> return `Score));
Some (string "mod-time" *> return `Mod_time);
]))
<|>
(take_while (fun c -> is_not_space c && c <> ',') >>=
fun s -> fail (Fmt.str "unrecognized sort by type: %s" s))
)
>>= fun typ ->
skip_spaces *>
char ',' *> skip_spaces *>
(choice [
string "asc" *> return `Asc;
string "desc" *> return `Desc;
]
<|>
(take_while is_not_space >>=
fun s -> fail (Fmt.str "unrecognized sort by order: %s" s))
)
>>= fun order -> (
return (typ, order)
)
let parse ~no_score s =
match Angstrom.(parse_string ~consume:Consume.All) (p ~no_score) s with
| Ok t -> Ok t
| Error msg -> Error msg
end
type screen_split = [
| `Even
| `Focus_left
| `Wide_left
| `Focus_right
| `Wide_right
]
let screen_split_of_int (x : int) : screen_split =
if x <= 0 then
`Focus_right
else if x = 1 then
`Wide_right
else if x = 2 then
`Even
else if x = 3 then
`Wide_left
else
`Focus_left
let int_of_screen_split (x : screen_split) =
match x with
| `Focus_right -> 0
| `Wide_right -> 1
| `Even -> 2
| `Wide_left -> 3
| `Focus_left -> 4
type pane = [
| `Bottom_right
| `Key_binding_info
]
let string_of_pane (x : pane) =
match x with
| `Bottom_right -> "bottom-right"
| `Key_binding_info -> "key-binding-info"
type t = [
| `Mark of string
| `Mark_listed
| `Unmark of string
| `Unmark_listed
| `Unmark_all
| `Drop of string
| `Drop_all_except of string
| `Drop_marked
| `Drop_unmarked
| `Drop_listed
| `Drop_unlisted
| `Narrow_level of int
| `Sort of Sort_by.t * Sort_by.t
| `Path_fuzzy_rank of string * int String_map.t option
| `Split_screen of screen_split
| `Hide_pane of pane
| `Show_pane of pane
| `Comment of string
| `Focus of string
| `Search of string
| `Filter of string
]
let pp fmt (t : t) =
match t with
| `Mark s -> Fmt.pf fmt "mark: %s" s
| `Mark_listed -> Fmt.pf fmt "mark listed"
| `Unmark s -> Fmt.pf fmt "unmark: %s" s
| `Unmark_listed -> Fmt.pf fmt "unmark listed"
| `Unmark_all -> Fmt.pf fmt "unmark all"
| `Drop s -> Fmt.pf fmt "drop: %s" s
| `Drop_all_except s -> Fmt.pf fmt "drop all except: %s" s
| `Drop_marked -> Fmt.pf fmt "drop marked"
| `Drop_unmarked -> Fmt.pf fmt "drop unmarked"
| `Drop_listed -> Fmt.pf fmt "drop listed"
| `Drop_unlisted -> Fmt.pf fmt "drop unlisted"
| `Narrow_level x -> Fmt.pf fmt "narrow level: %d" x
| `Sort (x, y) -> (
Fmt.pf fmt "sort by: %a; %a"
Sort_by.pp
x
Sort_by.pp
y
)
| `Path_fuzzy_rank (s, _ranking) -> (
Fmt.pf fmt "path fuzzy rank: %s" s
)
| `Split_screen s -> (
Fmt.pf fmt "split screen: %s"
(match s with
| `Even -> "even"
| `Focus_left -> "focus-left"
| `Wide_left -> "wide-left"
| `Focus_right -> "focus-right"
| `Wide_right -> "wide-right"
)
)
| `Hide_pane pane -> (
Fmt.pf fmt "hide-pane: %s" (string_of_pane pane)
)
| `Show_pane pane -> (
Fmt.pf fmt "show-pane: %s" (string_of_pane pane)
)
| `Comment s -> Fmt.pf fmt "#%s" s
| `Focus s -> Fmt.pf fmt "focus: %s" s
| `Search s -> (
if String.length s = 0 then (
Fmt.pf fmt "clear search"
) else (
Fmt.pf fmt "search: %s" s
)
)
| `Filter s -> (
if String.length s = 0 then (
Fmt.pf fmt "clear filter"
) else (
Fmt.pf fmt "filter: %s" s
)
)
let to_string (t : t) =
Fmt.str "%a" pp t
module Parsers = struct
type t' = t
open Angstrom
open Parser_components
let any_string_trimmed =
any_string >>| String.trim
let p : t' Angstrom.t =
skip_spaces *>
choice [
string "mark" *> skip_spaces *> (
choice [
char ':' *> skip_spaces *>
any_string_trimmed >>| (fun s -> (`Mark s));
string "listed" *> skip_spaces *> return `Mark_listed;
]
);
string "unmark" *> skip_spaces *> (
choice [
string "listed" *> skip_spaces *> return `Unmark_listed;
string "all" *> skip_spaces *> return `Unmark_all;
char ':' *> skip_spaces *>
any_string_trimmed >>| (fun s -> (`Unmark s));
]
);
string "drop" *> skip_spaces *> (
choice [
char ':' *> skip_spaces *>
any_string_trimmed >>| (fun s -> (`Drop s));
string "all" *> skip_spaces *>
string "except" *> skip_spaces *> char ':' *> skip_spaces *>
any_string_trimmed >>| (fun s -> (`Drop_all_except s));
string "listed" *> skip_spaces *> return `Drop_listed;
string "unlisted" *> skip_spaces *> return `Drop_unlisted;
string "marked" *> skip_spaces *> return `Drop_marked;
string "unmarked" *> skip_spaces *> return `Drop_unmarked;
]
);
string "narrow" *> skip_spaces *> (
choice [
string "level" *> skip_spaces *>
char ':' *> skip_spaces *>
satisfy (function '0'..'9' -> true | _ -> false) <* skip_spaces >>|
(fun c -> `Narrow_level (Char.code c - Char.code '0'));
]
);
string "clear" *> skip_spaces *> (
choice [
string "search" *> skip_spaces *> return (`Search "");
string "filter" *> skip_spaces *> return (`Filter "");
]
);
string "path" *> skip_spaces *>
string "fuzzy" *> skip_spaces *>
string "rank" *> skip_spaces *>
char ':' *> skip_spaces *>
any_string_trimmed >>|
(fun s -> `Path_fuzzy_rank (s, None));
(string "sort" *> skip_spaces *>
string "by" *> skip_spaces *>
char ':' *> skip_spaces *>
Sort_by.p ~no_score:false >>= fun sort_by ->
skip_spaces *>
char ';' *>
skip_spaces *>
Sort_by.p ~no_score:true >>= fun sort_by_no_score ->
skip_spaces *>
return (`Sort (sort_by, sort_by_no_score)));
string "focus" *> skip_spaces *>
char ':' *> skip_spaces *>
any_string_trimmed >>| (fun s -> (`Focus s));
string "split" *> skip_spaces *>
string "screen" *> skip_spaces *>
char ':' *> skip_spaces *> (
choice [
string "even" *> skip_spaces *> return (`Split_screen `Even);
string "focus-left" *> skip_spaces *> return (`Split_screen `Focus_left);
string "wide-left" *> skip_spaces *> return (`Split_screen `Wide_left);
string "focus-right" *> skip_spaces *> return (`Split_screen `Focus_right);
string "wide-right" *> skip_spaces *> return (`Split_screen `Wide_right);
]
);
string "hide-pane" *> skip_spaces *>
char ':' *> skip_spaces *> (
choice [
string "bottom-right" *> skip_spaces *> return (`Hide_pane `Bottom_right);
string "key-binding-info" *> skip_spaces *> return (`Hide_pane `Key_binding_info);
]
);
string "show-pane" *> skip_spaces *>
char ':' *> skip_spaces *> (
choice [
string "bottom-right" *> skip_spaces *> return (`Show_pane `Bottom_right);
string "key-binding-info" *> skip_spaces *> return (`Show_pane `Key_binding_info);
]
);
string "#" *> any_string >>| (fun s -> (`Comment s));
string "search" *> skip_spaces *>
char ':' *> skip_spaces *>
any_string_trimmed >>| (fun s -> (`Search s));
string "filter" *> skip_spaces *>
char ':' *> skip_spaces *>
any_string_trimmed >>| (fun s -> (`Filter s));
]
end
let of_string (s : string) : t option =
match Angstrom.(parse_string ~consume:Consume.All) Parsers.p s with
| Ok t -> Some t
| Error _ -> None
================================================
FILE: bin/content_and_search_result_rendering.ml
================================================
open Docfd_lib
module I = Notty.I
module A = Notty.A
type cell_typ = [
| `Plain
| `Search_result
]
type cell = {
word : string;
typ : cell_typ;
}
module Text_block_rendering = struct
let hchunk_rev ~width (img : Notty.image) : Notty.image list =
let open Notty in
let rec aux acc img =
let img_width = I.width img in
if img_width <= width then (
img :: acc
) else (
let acc = (I.hcrop 0 (img_width - width) img) :: acc in
aux acc (I.hcrop width 0 img)
)
in
aux [] img
let of_cells ?attr ~width ?(underline = false) (cells : cell list) : Notty.image * Int_set.t =
let open Notty.Infix in
assert (width > 0);
let rendered_lines_with_search_result_words = ref Int_set.empty in
let grid : Notty.image list list =
List.fold_left
(fun ((cur_len, acc) : int * Notty.image list list) (cell : cell) ->
let attr =
match attr with
| Some attr -> attr
| None -> (match cell.typ with
| `Plain -> A.empty
| `Search_result -> A.(fg black ++ bg lightyellow)
)
in
let word =
(match I.string attr cell.word with
| s -> s
| exception _ -> (
I.string A.(fg lightred) (String.make (String.length cell.word) '?')
))
in
let word_len = I.width word in
let word =
match cell.typ with
| `Plain -> word
| `Search_result -> (
if underline then (
word
<->
(I.string A.empty (String.make word_len '^'))
) else (
word
)
)
in
let new_len = cur_len + word_len in
let cur_len, acc =
if new_len <= width then (
match acc with
| [] -> (new_len, [ [ word ] ])
| line :: rest -> (
(new_len, (word :: line) :: rest)
)
) else (
if word_len <= width then (
(word_len, [ word ] :: acc)
) else (
let lines =
hchunk_rev ~width word
|> List.map (fun x -> [ x ])
in
(0, [] :: (lines @ acc))
)
)
in
(match cell.typ with
| `Plain -> ()
| `Search_result -> (
rendered_lines_with_search_result_words :=
Int_set.add (List.length acc - 1) !rendered_lines_with_search_result_words
));
(cur_len, acc)
)
(0, [])
cells
|> snd
|> List.rev_map List.rev
in
let img =
grid
|> List.map I.hcat
|> I.vcat
in
(img, !rendered_lines_with_search_result_words)
let of_words ?attr ~width ?underline ?(highlights = Int_set.empty) (words : string list) : Notty.image =
of_cells
?attr
~width
?underline
(List.mapi
(fun i word ->
if Int_set.mem i highlights then (
{ word; typ = `Search_result }
) else (
{ word; typ = `Plain }
)
)
words)
|> fst
end
type word_grid = {
start_global_line_num : int;
data : cell array array;
}
let start_and_end_inc_global_line_num_of_search_result
~doc_id
(search_result : Search_result.t)
: (int * int) =
match Search_result.found_phrase search_result with
| [] -> failwith "unexpected case"
| l -> (
List.fold_left (fun s_e Search_result.{ found_word_pos; _ } ->
let loc = Index.loc_of_pos ~doc_id found_word_pos in
let line_loc = Index.Loc.line_loc loc in
let global_line_num = Index.Line_loc.global_line_num line_loc in
match s_e with
| None -> (
Some (global_line_num, global_line_num)
)
| Some (s, e) -> (
Some (min s global_line_num, max global_line_num e)
)
)
None
l
|> Option.get
)
let word_grid_of_index
~doc_id
~start_global_line_num
~end_inc_global_line_num
: word_grid =
let global_line_count = Index.global_line_count ~doc_id in
let check x =
assert (0 <= x);
assert (x <= global_line_count - 1);
in
check start_global_line_num;
check end_inc_global_line_num;
if global_line_count = 0 then (
{ start_global_line_num = 0; data = [||] }
) else (
let data =
OSeq.(start_global_line_num -- end_inc_global_line_num)
|> Seq.map (fun global_line_num ->
let data =
Index.words_of_global_line_num ~doc_id global_line_num
|> Dynarray.to_seq
|> Seq.map (fun word -> { word; typ = `Plain })
|> Array.of_seq
in
data
)
|> Array.of_seq
in
{ start_global_line_num; data }
)
let mark_in_word_grid
~doc_id
(grid : word_grid)
(positions : int list)
: unit =
let grid_end_inc_global_line_num = grid.start_global_line_num + Array.length grid.data - 1 in
List.iter (fun pos ->
let loc = Index.loc_of_pos ~doc_id pos in
let line_loc = Index.Loc.line_loc loc in
let global_line_num = Index.Line_loc.global_line_num line_loc in
if grid.start_global_line_num <= global_line_num
&& global_line_num <= grid_end_inc_global_line_num
then (
let pos_in_line = Index.Loc.pos_in_line loc in
let row = global_line_num - grid.start_global_line_num in
let cell = grid.data.(row).(pos_in_line) in
grid.data.(row).(pos_in_line) <- { cell with typ = `Search_result }
)
)
positions
let mark_search_result_in_word_grid
~doc_id
(grid : word_grid)
(search_result : Search_result.t)
: unit =
Search_result.found_phrase search_result
|> List.map (fun Search_result.{ found_word_pos; _ } ->
found_word_pos
)
|> mark_in_word_grid ~doc_id grid
let mark_link_in_word_grid
~doc_id
(grid : word_grid)
(link : Link.t)
: unit =
let { Link.start_pos; end_inc_pos; _ } = link in
CCList.(start_pos -- end_inc_pos)
|> mark_in_word_grid ~doc_id grid
type render_mode = [
| `Page_num_only
| `Line_num_only
| `Page_and_line_num
| `None
]
let render_grid
~doc_id
~(view_offset : int Lwd.var option)
~(render_mode : render_mode)
~width
?(height : int option)
?underline
(grid : word_grid)
: Notty.image =
let (_rendered_line_count, rendered_lines_with_search_result_words), images =
grid.data
|> Array.to_list
|> CCList.fold_map_i
(fun (rendered_line_count, rendered_lines_with_search_result_words_acc) i cells ->
let cells = Array.to_list cells in
let global_line_num = grid.start_global_line_num + i in
let line_loc = Index.line_loc_of_global_line_num ~doc_id global_line_num in
let displayed_line_num = Index.Line_loc.line_num_in_page line_loc + 1 in
let displayed_page_num = Index.Line_loc.page_num line_loc + 1 in
let left_column_label =
match render_mode with
| `Page_num_only -> (
I.hcat
[ I.strf ~attr:A.(fg lightyellow) "Page %d" displayed_page_num
; I.strf ": " ]
)
| `Line_num_only -> (
I.hcat
[ I.strf ~attr:A.(fg lightyellow) "%d" displayed_line_num
; I.strf ": " ]
)
| `Page_and_line_num -> (
I.hcat
[ I.strf ~attr:A.(fg lightyellow) "Page %d, %d"
displayed_page_num
displayed_line_num
; I.strf ": " ]
)
| `None -> (
I.void 0 1
)
in
let content_width = max 1 (width - I.width left_column_label) in
let content, rendered_lines_with_search_result_words =
Text_block_rendering.of_cells ?underline ~width:content_width cells
in
((rendered_line_count + I.height content,
rendered_lines_with_search_result_words
|> Int_set.map (fun x -> x + rendered_line_count)
|> Int_set.union rendered_lines_with_search_result_words_acc
),
I.hcat [ left_column_label; content ])
)
(0, Int_set.empty)
in
let img = I.vcat images in
match height with
| None -> img
| Some height -> (
let focal_point_offset =
match
Int_set.min_elt_opt rendered_lines_with_search_result_words,
Int_set.max_elt_opt rendered_lines_with_search_result_words
with
| Some start_rendered_line_num, Some end_inc_rendered_line_num -> (
Misc_utils.div_round_to_closest
(start_rendered_line_num + end_inc_rendered_line_num)
2
)
| _, _ -> 0
in
let target_region_start =
max 0 (focal_point_offset - (Misc_utils.div_round_to_closest height 2))
in
let img_height = I.height img in
let target_region_end_exc =
min
img_height
(target_region_start + height)
in
let view_offset_old =
match view_offset with
| None -> 0
| Some x -> Lwd.peek x
in
let view_offset' =
if view_offset_old >= 0 then (
min
view_offset_old
(img_height - target_region_end_exc)
) else (
let view_offset_old = Int.abs view_offset_old in
- (min
view_offset_old
(target_region_start - 0))
)
in
Option.iter (fun x ->
if view_offset_old <> view_offset' then (
Lwd.set x view_offset'
)) view_offset;
let target_region_start, target_region_end_exc =
if view_offset' >= 0 then (
(target_region_start + view_offset',
target_region_end_exc + view_offset')
) else (
(* If the offset is negative (shifting view upwards),
then make the bottom border "sticky".
In other words, if the height of view window
is smaller than the height of pane,
and the view is shifting upwards, don't bother
moving the bottom border.
This prevents the rendered content snippet view staying
small and cropping the bottom text when scrolling up
if the view started out being small (due to the search
result being close to the bottom of the file).
*)
let view_window_height = target_region_end_exc - target_region_start in
(target_region_start + view_offset',
if view_window_height < height then (
target_region_end_exc
) else (
target_region_end_exc + view_offset'
)
)
)
in
I.vcrop target_region_start (img_height - target_region_end_exc) img
)
let content_snippet
~doc_id
~(view_offset : int Lwd.var)
?(data : [ `Search_result of Search_result.t | `Link of Link.t ] option)
~(width : int)
~(height : int)
?underline
()
: Notty.image =
let max_end_inc_global_line_num = Index.global_line_count ~doc_id - 1 in
assert (height > 0);
let compute_final_line_num_range
~(view_offset : int Lwd.var)
~start_global_line_num
: int * int =
let end_inc_global_line_num =
min
max_end_inc_global_line_num
(start_global_line_num + height - 1)
in
(* We grow the area in one direction
rather than shifting the area, in order
to not interfere with the focal point offset computation
in render_grid.
The number of lines to grow is an overapproximation
of the actual lines required, as a
line may wrap into multiple rendered lines
in the rendered view if it is longer than
the width of the content pane.
But we do not know how many lines (or partial segments
of lines) exactly until we
actually render the view/word grid.
*)
let view_offset' = Lwd.peek view_offset in
if view_offset' >= 0 then (
let end_inc_global_line_num =
min
max_end_inc_global_line_num
(end_inc_global_line_num + view_offset')
in
(start_global_line_num, end_inc_global_line_num)
) else (
let start_global_line_num =
max
0
(start_global_line_num - Int.abs view_offset')
in
(start_global_line_num, end_inc_global_line_num)
)
in
match data with
| None -> (
let start_global_line_num, end_inc_global_line_num =
compute_final_line_num_range
~view_offset
~start_global_line_num:0
in
let grid =
word_grid_of_index
~doc_id
~start_global_line_num
~end_inc_global_line_num
in
render_grid
~doc_id
~view_offset:(Some view_offset)
~render_mode:`None
~width
~height
?underline
grid
)
| Some data -> (
let focal_line =
match data with
| `Search_result search_result -> (
let (relevant_start_line, relevant_end_inc_line) =
start_and_end_inc_global_line_num_of_search_result ~doc_id search_result
in
let avg = (relevant_start_line + relevant_end_inc_line) / 2 in
avg
)
| `Link link -> (
let loc = Index.loc_of_pos ~doc_id link.Link.start_pos in
let line_loc = Index.Loc.line_loc loc in
Index.Line_loc.global_line_num line_loc
)
in
let start_global_line_num, end_inc_global_line_num =
compute_final_line_num_range
~view_offset
~start_global_line_num:(
max
0
(focal_line - (Misc_utils.div_round_to_closest height 2))
)
in
let grid =
word_grid_of_index
~doc_id
~start_global_line_num
~end_inc_global_line_num
in
(match data with
| `Search_result search_result -> (
mark_search_result_in_word_grid ~doc_id grid search_result
)
| `Link link -> (
mark_link_in_word_grid ~doc_id grid link
)
);
render_grid
~doc_id
~view_offset:(Some view_offset)
~render_mode:`None
~width
~height
?underline
grid
)
let word_is_not_space s =
String.length s > 0 && not (Parser_components.is_space s.[0])
let grab_additional_lines
~doc_id
~non_space_word_count
start_global_line_num
end_inc_global_line_num
: int * int =
let max_end_inc_global_line_num = Index.global_line_count ~doc_id - 1 in
let non_space_word_count_of_line n =
Index.words_of_global_line_num ~doc_id n
|> Dynarray.to_seq
|> Seq.filter word_is_not_space
|> Seq.length
in
let rec aux ~non_space_word_count ~i x y =
if i < !Params.search_result_print_snippet_max_additional_lines_each_direction
&& non_space_word_count < !Params.search_result_print_snippet_min_size
then (
let x, top_add_count =
let n = x - 1 in
if n >= 0 then (
(n, non_space_word_count_of_line n)
) else (
(x, 0)
)
in
let y, bottom_add_count =
let n = y + 1 in
if n <= max_end_inc_global_line_num then (
(n, non_space_word_count_of_line n)
) else (
(y, 0)
)
in
let non_space_word_count =
non_space_word_count
+ top_add_count
+ bottom_add_count
in
aux ~non_space_word_count ~i:(i + 1) x y
) else (
(x, y)
)
in
aux ~non_space_word_count ~i:0 start_global_line_num end_inc_global_line_num
let search_result
~doc_id
~render_mode
~width
?underline
?(fill_in_context = false)
(search_result : Search_result.t)
: Notty.image =
let open Notty in
let open Notty.Infix in
let (start_global_line_num, end_inc_global_line_num) =
start_and_end_inc_global_line_num_of_search_result ~doc_id search_result
|> (fun (x, y) ->
if fill_in_context then (
let non_space_word_count =
Search_result.search_phrase search_result
|> Search_phrase.enriched_tokens
|> List.filter_map (fun token ->
match Search_phrase.Enriched_token.data token with
| `Explicit_spaces -> None
| `String s -> (
assert (word_is_not_space s);
Some s
)
)
|> List.length
in
grab_additional_lines ~doc_id ~non_space_word_count x y
) else (
(x, y)
)
)
in
let grid =
word_grid_of_index
~doc_id
~start_global_line_num
~end_inc_global_line_num
in
mark_search_result_in_word_grid ~doc_id grid search_result;
let img =
render_grid
~doc_id
~view_offset:None
~render_mode
~width
?underline
grid
in
if Option.is_some !Params.debug_output then (
let score = Search_result.score search_result in
I.strf "(Score: %f)" score
<->
img
) else (
img
)
let centered_list
~width
~height
~(render : width:int -> 'a -> Notty.image)
(selection : int)
(l : 'a list)
: Notty.image =
let height_rendered_before_selection = ref 0 in
let img =
l
|> List.mapi (fun i x ->
let img =
let x =
if i = selection then (
let img = render ~width:(width - 2) x in
Notty.I.(strf ~attr:A.(fg lightyellow) "> "
<|>
img
)
) else (
render ~width x
)
in
Notty.I.(x <-> strf "")
in
if i < selection then (
height_rendered_before_selection := !height_rendered_before_selection + Notty.I.height img;
);
img
)
|> Notty.I.vcat
in
let focal_point_offset = !height_rendered_before_selection in
let img_height = I.height img in
let target_region_start =
max 0 (focal_point_offset - (Misc_utils.div_round_to_closest height 2))
in
let target_region_end_exc =
min
img_height
(target_region_start + height)
in
I.vcrop target_region_start (img_height - target_region_end_exc) img
================================================
FILE: bin/debug_utils.ml
================================================
let do_if_debug (f : out_channel -> unit) =
match !Params.debug_output with
| None -> ()
| Some oc -> (
f oc
)
================================================
FILE: bin/docfd.ml
================================================
open Cmdliner
open Lwd_infix
open Docfd_lib
open Debug_utils
open Misc_utils
open File_utils
let compute_paths_from_globs ~report_progress globs =
Seq.iter (fun s ->
match Glob.parse s with
| Some _ -> ()
| None -> (
exit_with_error_msg
(Fmt.str "failed to parse glob pattern: \"%s\"" s)
)
) globs;
list_files_recursive_filter_by_globs ~report_progress globs
type file_constraints = {
no_pdftotext : bool;
no_pandoc : bool;
paths_were_specified_by_user : bool;
exts : string list;
single_line_exts : string list;
directly_specified_paths : String_set.t;
globs : String_set.t;
single_line_globs : String_set.t;
}
let make_file_constraints
~no_pdftotext
~no_pandoc
~(exts : string list)
~(single_line_exts : string list)
~(paths : string list)
~(paths_from_file_or_stdin : string list option)
~(globs : string list)
~(single_line_globs : string list)
: file_constraints =
match
paths,
paths_from_file_or_stdin,
globs,
single_line_globs
with
| [], None, [], [] -> (
{
no_pdftotext;
no_pandoc;
paths_were_specified_by_user = false;
exts;
single_line_exts;
directly_specified_paths = String_set.of_list [ "." ];
globs = String_set.empty;
single_line_globs = String_set.empty;
}
)
| _, _, _, _ -> (
let paths_from_file_or_stdin = Option.value ~default:[] paths_from_file_or_stdin in
let directly_specified_paths = String_set.of_list (paths @ paths_from_file_or_stdin) in
let globs = String_set.of_list globs in
let single_line_globs = String_set.of_list single_line_globs in
{
no_pdftotext;
no_pandoc;
paths_were_specified_by_user = true;
exts;
single_line_exts;
directly_specified_paths;
globs;
single_line_globs;
}
)
let files_satisfying_constraints
~interactive
(cons : file_constraints)
: Document_src.file_collection =
let bar =
let open Progress.Line in
list
[ const "Scanning"
; spinner ()
]
in
progress_with_reporter
~interactive
bar
(fun report_progress : Document_src.file_collection ->
let single_line_search_mode_applies file =
List.mem (extension_of_file file) cons.single_line_exts
in
let single_line_search_mode_paths_by_exts, default_search_mode_paths_by_exts =
cons.directly_specified_paths
|> String_set.to_seq
|> list_files_recursive_filter_by_exts
~report_progress
~exts:(cons.exts @ cons.single_line_exts)
|> String_set.partition single_line_search_mode_applies
in
let paths_from_single_line_globs =
cons.single_line_globs
|> String_set.to_seq
|> compute_paths_from_globs ~report_progress
in
let single_line_search_mode_paths_from_globs, default_search_mode_paths_from_globs =
cons.globs
|> String_set.to_seq
|> compute_paths_from_globs ~report_progress
|> String_set.partition single_line_search_mode_applies
in
let single_line_search_mode_files =
single_line_search_mode_paths_by_exts
|> String_set.union paths_from_single_line_globs
|> String_set.union single_line_search_mode_paths_from_globs
in
let default_search_mode_files =
default_search_mode_paths_by_exts
|> String_set.union default_search_mode_paths_from_globs
|> (fun s -> String_set.diff s single_line_search_mode_files)
in
do_if_debug (fun oc ->
Printf.fprintf oc "Checking if single line search mode files and default search mode files are disjoint\n";
if String_set.is_empty
(String_set.inter
single_line_search_mode_files
default_search_mode_files)
then (
Printf.fprintf oc "Check successful\n"
) else (
failwith "check failed"
);
let all_files =
single_line_search_mode_paths_by_exts
|> String_set.union default_search_mode_paths_by_exts
|> String_set.union paths_from_single_line_globs
|> String_set.union single_line_search_mode_paths_from_globs
|> String_set.union default_search_mode_paths_from_globs
in
let single_line_search_mode_files', default_search_mode_files' =
String_set.partition (fun s ->
single_line_search_mode_applies s
||
String_set.mem s paths_from_single_line_globs
)
all_files
in
Printf.fprintf oc "Checking if efficiently computed and naively computed results for single line search mode files are consistent\n";
if String_set.equal
single_line_search_mode_files
single_line_search_mode_files'
then (
Printf.fprintf oc "Check successful\n"
) else (
failwith "check failed"
);
Printf.fprintf oc "Checking if efficiently computed and naively computed results for default search mode files are consistent\n";
if String_set.equal
default_search_mode_files
default_search_mode_files'
then (
Printf.fprintf oc "Check successful\n"
) else (
failwith "check failed"
)
);
let filter_for_no_pdftotext_or_no_pandoc (s : String_set.t) =
if cons.no_pdftotext || cons.no_pandoc then (
String_set.filter
(fun s ->
match File_utils.format_of_file s with
| `PDF -> not cons.no_pdftotext
| `Pandoc_supported_format -> not cons.no_pandoc
| `Text | `Other -> true
)
s
) else (
s
)
in
let default_search_mode_files =
filter_for_no_pdftotext_or_no_pandoc default_search_mode_files
in
let single_line_search_mode_files =
filter_for_no_pdftotext_or_no_pandoc single_line_search_mode_files
in
{
default_search_mode_files;
single_line_search_mode_files;
}
)
let init_session_state_of_document_src ~env ~interactive pool (document_src : Document_src.t) =
let file_bar ~total_file_count =
let open Progress.Line in
list
[ brackets (elapsed ())
; bar ~width:(`Fixed 20) total_file_count
; percentage_of total_file_count
; const "ETA: " ++ eta total_file_count
]
in
let byte_bar ~total_byte_count =
let open Progress.Line in
list
[ brackets (elapsed ())
; bar ~width:(`Fixed 20) total_byte_count
; percentage_of total_byte_count
; bytes_per_sec
; const "ETA: " ++ eta total_byte_count
]
in
let all_documents : Document.t list list =
match document_src with
| Document_src.Stdin path -> (
match
Document.of_path
~env
pool
~already_in_transaction:false
!Params.default_search_mode
path
with
| Ok x -> [ [ x ] ]
| Error msg -> (
exit_with_error_msg msg
)
)
| Files { default_search_mode_files; single_line_search_mode_files } -> (
let print_stage_stats ~file_count ~total_byte_count =
Printf.printf "- File count: %6d\n" file_count;
Printf.printf "- MiB: %8.1f\n"
(Misc_utils.mib_of_bytes total_byte_count);
in
let total_file_count, files =
Seq.append
(Seq.map (fun path -> (!Params.default_search_mode, path))
(String_set.to_seq default_search_mode_files))
(Seq.map (fun path -> (`Single_line, path))
(String_set.to_seq single_line_search_mode_files))
|> Misc_utils.length_and_list_of_seq
in
if interactive then (
Printf.printf "Collecting file stats\n";
flush stdout;
);
let documents_total_byte_count, document_sizes =
progress_with_reporter
~interactive
(file_bar ~total_file_count)
(fun report_progress ->
List.fold_left (fun (total_size, m) (_, path) ->
let res =
match File_utils.file_size path with
| None -> (total_size, m)
| Some x -> (total_size + x, String_map.add path x m)
in
report_progress 1;
res
)
(0, String_map.empty)
files
)
in
if interactive then (
print_stage_stats
~file_count:total_file_count
~total_byte_count:documents_total_byte_count
);
if interactive then (
Printf.printf "Hashing\n";
flush stdout;
);
let file_and_hash_list =
match files with
| [] -> []
| _ -> (
files
|> (fun l ->
progress_with_reporter
~interactive
(byte_bar ~total_byte_count:documents_total_byte_count)
(fun report_progress ->
Task_pool.filter_map_list pool (fun (search_mode, path) ->
do_if_debug (fun oc ->
Printf.fprintf oc "Hashing document: %s\n" (Filename.quote path);
);
let res =
match BLAKE2B.hash_of_file ~env ~path with
| Ok hash -> Some (search_mode, path, hash)
| Error msg -> (
do_if_debug (fun oc ->
Printf.fprintf oc "Error: %s\n" msg
);
None
)
in
(match String_map.find_opt path document_sizes with
| None -> ()
| Some x -> report_progress x);
res
)
l
)
)
)
in
if interactive then (
Printf.printf "Allocating document IDs\n";
flush stdout;
);
progress_with_reporter
~interactive
(file_bar ~total_file_count)
(fun report_progress ->
file_and_hash_list
|> List.to_seq
|> Seq.map (fun (_, _, doc_hash) ->
report_progress 1;
doc_hash)
|> Doc_id_db.allocate_bulk
);
let indexed_files, unindexed_files =
let open Sqlite3_utils in
with_stmt
Index.is_indexed_sql
(fun stmt ->
List.partition (fun (_, _, doc_hash) ->
bind_names stmt [ ("@doc_hash", TEXT doc_hash) ];
step stmt;
let indexed = data_count stmt > 0 in
reset stmt;
indexed
)
file_and_hash_list
)
in
indexed_files
|> List.map (fun (_, _, doc_hash) ->
Doc_id_db.doc_id_of_doc_hash doc_hash
)
|> Index.refresh_last_used_batch;
let load_document ~env pool search_mode ~doc_hash path =
do_if_debug (fun oc ->
Printf.fprintf oc "Loading document: %s\n" (Filename.quote path);
);
do_if_debug (fun oc ->
Printf.fprintf oc "Using %s search mode for document %s\n"
(match search_mode with
| `Single_line -> "single line"
| `Multiline -> "multiline"
)
(Filename.quote path)
);
match
Document.of_path
~env
pool
~already_in_transaction:false
search_mode
~doc_hash
path
with
| Ok x -> (
do_if_debug (fun oc ->
Printf.fprintf oc "Document %s loaded successfully\n" (Filename.quote path);
);
Some x
)
| Error msg -> (
do_if_debug (fun oc ->
Printf.fprintf oc "Error: %s\n" msg
);
None
)
in
if interactive then (
Printf.printf "Processing indexed files\n";
flush stdout;
);
let indexed_files =
indexed_files
|> List.filter_map (fun (search_mode, path, doc_hash) ->
load_document ~env pool search_mode ~doc_hash path
)
in
if interactive then (
Printf.printf "Indexing remaining files\n";
flush stdout;
);
let unindexed_file_count, unindexed_files_byte_count =
List.fold_left (fun (file_count, byte_count) (_, path, _) ->
(file_count + 1,
byte_count + Option.value ~default:0 (String_map.find_opt path document_sizes))
)
(0, 0)
unindexed_files
in
if interactive then (
print_stage_stats
~file_count:unindexed_file_count
~total_byte_count:unindexed_files_byte_count;
);
let pipeline = Document_pipeline.make ~env pool in
let _, unindexed_files =
Eio.Fiber.pair
(fun () ->
Document_pipeline.run pipeline
)
(fun () ->
(match unindexed_files with
| [] -> ()
| _ -> (
progress_with_reporter
~interactive
(byte_bar ~total_byte_count:unindexed_files_byte_count)
(fun report_progress ->
unindexed_files
|> List.iter (fun (search_mode, path, doc_hash) ->
Document_pipeline.feed
pipeline
search_mode
~doc_hash
path;
(match String_map.find_opt path document_sizes with
| None -> ()
| Some x -> report_progress x
)
)
)
));
if interactive then (
Printf.printf "Finalizing index\n";
flush stdout;
);
Document_pipeline.finalize pipeline
)
in
[ indexed_files; unindexed_files ]
)
in
let state =
all_documents
|> List.to_seq
|> Seq.flat_map List.to_seq
|> Session.State.of_seq pool
in
Gc.compact ();
state
let parse_sort_by_arg ~no_score (s : string) : Command.Sort_by.t =
match Command.Sort_by.parse ~no_score s with
| Ok t -> t
| Error msg -> (
let msg = CCString.chop_prefix ~pre:": " msg
|> Option.value ~default:msg
in
exit_with_error_msg
(Fmt.str "failed to parse --%s argument: %s"
(if no_score then (
Args.sort_no_score_arg_name
) else (
Args.sort_arg_name
))
msg
)
)
let run
~(eio_env : Eio_unix.Stdenv.base)
~sw
(debug_log : string option)
(no_pdftotext : bool)
(no_pandoc : bool)
(scan_hidden : bool)
(max_depth : int)
(max_fuzzy_edit_dist : int)
(max_token_search_dist : int)
(max_linked_token_search_dist : int)
(tokens_per_search_scope_level : int)
(index_chunk_size : int)
(exts : string)
(single_line_exts : string)
(additional_exts : string list)
(single_line_additional_exts : string list)
(cache_dir : string)
(cache_limit : int)
(data_dir : string)
(index_only : bool)
(start_with_filter : string option)
(start_with_search : string option)
(filter_exp : string option)
(sample_search_exp : string option)
(samples_per_doc : int)
(search_exp : string option)
(sort_by : string)
(sort_by_no_score : string)
(print_color_mode : Params.style_mode)
(print_underline_mode : Params.style_mode)
(search_result_print_text_width : int)
(search_result_print_snippet_min_size : int)
(search_result_print_max_add_lines : int)
(start_with_script : string option)
(script : string option)
(paths_from : string list)
(globs : string list)
(single_line_globs : string list)
(single_line_search_mode_by_default : bool)
(path_open_specs : string list)
(print_files_with_match : bool)
(print_files_without_match : bool)
(paths : string list)
=
let env = eio_env in
Args.check
~max_depth
~max_fuzzy_edit_dist
~max_token_search_dist
~max_linked_token_search_dist
~tokens_per_search_scope_level
~index_chunk_size
~cache_limit
~start_with_filter
~start_with_search
~filter_exp
~sample_search_exp
~samples_per_doc
~search_exp
~search_result_print_text_width
~search_result_print_snippet_min_size
~search_result_print_max_add_lines
~start_with_script
~script
~paths_from
~print_files_with_match
~print_files_without_match;
Params.debug_output := (match debug_log with
| None -> None
| Some "-" -> Some stderr
| Some debug_log -> (
try
Some (
open_out_gen
[ Open_append; Open_creat; Open_wronly; Open_text ]
0o644
debug_log
)
with
| Sys_error _ -> (
exit_with_error_msg
(Fmt.str "failed to open debug log file %s" (Filename.quote debug_log))
)
)
);
Params.scan_hidden := scan_hidden;
Params.max_file_tree_scan_depth := max_depth;
Params.max_fuzzy_edit_dist := max_fuzzy_edit_dist;
Params.max_token_search_dist := max_token_search_dist;
Params.max_linked_token_search_dist := max_linked_token_search_dist;
Params.tokens_per_search_scope_level := tokens_per_search_scope_level;
Params.index_chunk_size := index_chunk_size;
Params.cache_limit := cache_limit;
Params.search_result_print_text_width := search_result_print_text_width;
Params.search_result_print_snippet_min_size := search_result_print_snippet_min_size;
Params.search_result_print_snippet_max_additional_lines_each_direction :=
search_result_print_max_add_lines;
Params.samples_per_document := samples_per_doc;
let sort_by = parse_sort_by_arg ~no_score:false sort_by in
let sort_by_no_score = parse_sort_by_arg ~no_score:true sort_by_no_score in
Params.cache_dir := (
mkdir_recursive cache_dir;
Some cache_dir
);
Params.data_dir := (
mkdir_recursive data_dir;
Some data_dir
);
Params.default_search_mode := (
if single_line_search_mode_by_default then (
`Single_line
) else (
`Multiline
)
);
List.iter (fun spec ->
match Path_opening.parse_spec spec with
| Error msg -> (
exit_with_error_msg (Fmt.str "failed to parse %s, %s" spec msg)
)
| Ok (exts, launch_mode, cmd) -> (
List.iter (fun ext ->
Hashtbl.replace Path_opening.specs ext (launch_mode, cmd)
)
exts
)
) path_open_specs;
let db_path = Filename.concat cache_dir Params.db_file_name in
(match Docfd_lib.init ~db_path ~document_count_limit:cache_limit with
| None -> ()
| Some msg -> exit_with_error_msg msg
);
let interactive =
Option.is_none filter_exp
&&
Option.is_none sample_search_exp
&&
Option.is_none search_exp
&&
not print_files_with_match
&&
not print_files_without_match
&&
Option.is_none script
in
if interactive then (
Printf.printf "Initializing in-memory index\n";
flush stdout;
);
Word_db.read_from_db ();
Index.State.read_from_db ();
(match Sys.getenv_opt "VISUAL", Sys.getenv_opt "EDITOR" with
| None, None -> (
exit_with_error_msg
(Fmt.str "environment variable VISUAL or EDITOR needs to be set")
)
| Some editor, _
| None, Some editor -> (
Params.text_editor := editor;
)
);
Lwd.unsafe_mutation_logger := (fun () -> ());
let recognized_exts =
compute_total_recognized_exts ~exts ~additional_exts
in
let recognized_single_line_exts =
compute_total_recognized_exts ~exts:single_line_exts ~additional_exts:single_line_additional_exts
in
(match recognized_exts, recognized_single_line_exts, globs, single_line_globs with
| [], [], [], [] -> (
exit_with_error_msg
(Fmt.str "no usable file extensions or glob patterns")
)
| _, _, _, _ -> ()
);
let paths_from_file_or_stdin =
match paths_from with
| [] -> None
| l -> (
l
|> List.to_seq
|> Seq.map (String.split_on_char ',')
|> Seq.flat_map List.to_seq
|> Seq.flat_map (fun paths_from ->
(match paths_from with
| "-" -> (
CCIO.read_lines_l stdin
)
| _ -> (
try
CCIO.with_in paths_from CCIO.read_lines_l
with
| Sys_error _ -> (
exit_with_error_msg
(Fmt.str "failed to read list of paths from %s" (Filename.quote paths_from))
)
)
)
|> List.to_seq
)
|> List.of_seq
|> Option.some
)
in
let file_constraints =
make_file_constraints
~no_pdftotext
~no_pandoc
~exts:recognized_exts
~single_line_exts:recognized_single_line_exts
~paths
~paths_from_file_or_stdin
~globs
~single_line_globs
in
let pool = Task_pool.make ~sw (Eio.Stdenv.domain_mgr env) in
Atomic.set UI_base.Vars.pool (Some pool);
String_set.iter (fun path ->
if not (Sys.file_exists path) then (
exit_with_error_msg
(Fmt.str "path %s does not exist" (Filename.quote path))
)
)
file_constraints.directly_specified_paths;
let compute_if_hide_document_list_initially_and_document_src : unit -> bool * Document_src.t =
let stdin_tmp_file = ref None in
(fun () ->
let file_collection =
files_satisfying_constraints ~interactive file_constraints
in
if file_constraints.paths_were_specified_by_user
|| stdin_is_atty ()
then (
let hide_document_list =
match Document_src.file_collection_size file_collection with
| 0 -> false
| 1 -> true
| _ -> false
in
(hide_document_list, Files file_collection)
) else (
match !stdin_tmp_file with
| None -> (
match read_in_channel_to_tmp_file stdin with
| Ok tmp_file -> (
stdin_tmp_file := Some tmp_file;
(true, Stdin tmp_file)
)
| Error msg -> (
exit_with_error_msg msg
)
)
| Some tmp_file -> (
(true, Stdin tmp_file)
)
)
)
in
let compute_document_src () =
snd (compute_if_hide_document_list_initially_and_document_src ())
in
let hide_document_list_initially, init_document_src =
compute_if_hide_document_list_initially_and_document_src ()
in
let clean_up () =
match init_document_src with
| Stdin tmp_file -> (
try
Sys.remove tmp_file
with
| Sys_error _ -> ()
)
| Files _ -> ()
in
do_if_debug (fun oc ->
Printf.fprintf oc "Scanning completed\n"
);
do_if_debug (fun oc ->
match init_document_src with
| Stdin _ -> Printf.fprintf oc "Document source: stdin\n"
| Files file_collection -> (
Printf.fprintf oc "Document source: files\n";
Document_src.seq_of_file_collection file_collection
|> Seq.iter (fun file ->
Printf.fprintf oc "File: %s\n" (Filename.quote file);
)
)
);
(match init_document_src with
| Stdin _ -> ()
| Files file_collection -> (
let pdftotext_exists = Proc_utils.command_exists "pdftotext" in
let pandoc_exists = Proc_utils.command_exists "pandoc" in
let formats = Document_src.seq_of_file_collection file_collection
|> Seq.map format_of_file
|> Seq.fold_left (fun acc x -> File_format_set.add x acc) File_format_set.empty
in
if not pdftotext_exists && File_format_set.mem `PDF formats then (
exit_with_error_msg
(Fmt.str "command pdftotext not found, use --%s to disable use of pdftotext" Args.no_pdftotext_arg_name)
);
if File_format_set.mem `Pandoc_supported_format formats then (
if not pandoc_exists then (
exit_with_error_msg
(Fmt.str "command pandoc not found, use --%s to disable use of pandoc" Args.no_pandoc_arg_name)
);
);
)
);
Lwd.set UI_base.Vars.hide_document_list hide_document_list_initially;
init_session_state_of_document_src ~env pool ~interactive init_document_src
|> (fun state ->
Session.run_command pool (`Sort (sort_by, sort_by_no_score)) state
|> Option.get
|> snd
)
|> Session_manager.update_starting_state;
if index_only then (
clean_up ();
exit 0
);
let print_oc = stdout in
let print_with_color =
match print_color_mode with
| `Never -> false
| `Always -> true
| `Auto -> Out_channel.isatty print_oc
in
let print_with_underline =
match print_underline_mode with
| `Never -> false
| `Always -> true
| `Auto -> not (Out_channel.isatty print_oc)
in
let snapshots_from_script =
match script, start_with_script with
| None, None -> None
| Some _, Some _ -> failwith "unexpected case"
| Some script, None
| None, Some script -> (
let init_state =
Session_manager.lock_with_view (fun view ->
view.init_state
)
in
match
Script.run
pool
~init_state
~path:script
with
| Error msg -> exit_with_error_msg msg
| Ok snapshots -> Some snapshots
)
in
(match snapshots_from_script with
| None -> ()
| Some snapshots -> (
Session_manager.load_snapshots snapshots;
));
if not interactive then (
let filter_exp_and_original_string =
match filter_exp with
| None -> None
| Some s ->
Some (Option.get (Filter_exp.parse s), s)
in
let print_limit_per_doc, search_exp_and_original_string =
match sample_search_exp, search_exp with
| None, None -> (None, None)
| Some _, Some _ -> failwith "unexpected case"
| Some search_exp_string, None
| None, Some search_exp_string -> (
let print_limit_per_doc =
match sample_search_exp with
| Some _ -> Some samples_per_doc
| None -> None
in
let search_exp =
Option.get (Search_exp.parse search_exp_string)
in
do_if_debug (fun oc ->
Fmt.pf
(Format.formatter_of_out_channel oc)
"Search expression: @[%a@]@." Search_exp.pp search_exp
);
(print_limit_per_doc, Some (search_exp, search_exp_string))
)
in
let session_state =
match snapshots_from_script with
| None -> (
Session_manager.lock_with_view (fun view ->
view.init_state
)
|> (fun state ->
match filter_exp_and_original_string with
| None -> state
| Some (filter_exp, filter_exp_string) -> (
Option.get
(Session.State.update_filter_exp
pool
(Stop_signal.make ())
filter_exp_string
filter_exp
state
)
)
)
|> (fun state ->
match search_exp_and_original_string with
| None -> state
| Some (search_exp, search_exp_string) -> (
Option.get
(Session.State.update_search_exp
pool
(Stop_signal.make ())
search_exp_string
search_exp
state
)
)
)
)
| Some _ -> (
Session_manager.lock_with_view (fun view ->
Dynarray.get_last view.snapshots
|> Session.Snapshot.state
)
)
in
let oc = stdout in
let no_results =
if print_files_with_match then (
let arr =
Session.State.search_result_groups session_state
in
Array.iter (fun (doc, _search_result) ->
Printers.path_image ~color:print_with_color oc (Document.path doc)
) arr;
Array.length arr = 0
) else if print_files_without_match then (
let arr =
Session.State.unusable_documents session_state
|> Array.of_seq
in
let (sort_by_typ, sort_by_order) = sort_by_no_score in
let f =
match sort_by_typ with
| `Path_date -> Document.Compare.path_date sort_by_order
| `Mod_time -> Document.Compare.mod_time sort_by_order
| `Path -> Document.Compare.path sort_by_order
| `Score -> failwith "unexpected case"
in
Array.sort f arr;
Array.iter (fun doc ->
Printers.path_image ~color:print_with_color oc (Document.path doc)
) arr;
Array.length arr = 0
) else (
let s =
Session.State.search_result_groups session_state
|> Array.to_seq
|> Seq.map (fun (doc, arr) ->
let arr =
match print_limit_per_doc with
| None -> arr
| Some n -> (
Array.sub
arr
0
(min (Array.length arr) n)
)
in
(doc, arr)
)
in
Printers.search_result_groups
~color:print_with_color
~underline:print_with_underline
oc
s;
Seq.is_empty s
)
in
clean_up ();
if no_results then (
exit 1
) else (
exit 0
)
);
UI_base.Vars.eio_env := Some env;
let root : Nottui.ui Lwd.t =
let$* (term_width, term_height) = Lwd.get UI_base.Vars.term_width_height in
if term_width <= 40 || term_height <= 20 then (
let msg = Nottui.Ui.atom (Notty.I.strf "Terminal size too small") in
let keyboard_handler (key : Nottui.Ui.key) =
match key with
| (`Escape, [])
| (`ASCII 'Q', [`Ctrl])
| (`ASCII 'C', [`Ctrl]) -> (
Lwd.set UI_base.Vars.quit true;
UI_base.Vars.action := None;
`Handled
)
| _ -> `Unhandled
in
Lwd.return (Nottui.Ui.keyboard_area keyboard_handler msg)
) else (
UI.main
)
in
let get_term, close_term =
let term_and_tty_fd = ref None in
((fun () ->
match !term_and_tty_fd with
| None -> (
if stdin_is_atty () then (
let term = Notty_unix.Term.create () in
term_and_tty_fd := Some (term, None);
term
) else (
let input =
Unix.(openfile "/dev/tty" [ O_RDONLY ] 0o444)
in
let term = Notty_unix.Term.create ~input () in
term_and_tty_fd := Some (term, Some input);
term
)
)
| Some (term, _tty_fd) -> term
),
(fun () ->
match !term_and_tty_fd with
| None -> ()
| Some (term, tty_fd) -> (
Notty_unix.Term.release term;
(match tty_fd with
| None -> ()
| Some fd -> Unix.close fd);
term_and_tty_fd := None
)
)
)
in
let rec loop () =
Sys.command "clear -x" |> ignore;
let term = get_term () in
UI_base.Vars.term := Some term;
UI_base.Vars.action := None;
Lwd.set UI_base.Vars.quit false;
UI_base.ui_loop
~quit:UI_base.Vars.quit
~term
root;
match !UI_base.Vars.action with
| None -> ()
| Some action -> (
Session_manager.stop_filter_and_search_and_restore_input_fields ();
match action with
| UI_base.Recompute_document_src -> (
close_term ();
let new_starting_state =
compute_document_src ()
|> init_session_state_of_document_src ~env ~interactive pool
in
Session_manager.update_starting_state new_starting_state;
loop ()
)
| Open_file_and_search_result (doc, search_result) -> (
let doc_id = Document.doc_id doc in
let path = Document.path doc in
let old_stats = Unix.stat path in
Path_opening.main
~close_term
~path
~doc_id_and_search_result:(Option.map (fun x -> (doc_id, x)) search_result);
let new_stats = Unix.stat path in
if
Float.abs
(new_stats.st_mtime -. old_stats.st_mtime) >= Params.float_compare_margin
then (
UI.reload_document doc
);
loop ()
)
| Open_link (doc, link) -> (
Path_opening.open_link
~close_term
~doc
link;
loop ()
)
| Edit_command_history -> (
let file = Filename.temp_file "" Params.docfd_script_ext in
let init_snapshots =
Session_manager.lock_with_view (fun view ->
view.snapshots
)
in
let init_lines =
Seq.append
(
init_snapshots
|> Dynarray.to_seq
|> Seq.filter_map (fun (snapshot : Session.Snapshot.t) ->
Option.map
Command.to_string
(Session.Snapshot.last_command snapshot)
)
)
(
List.to_seq
[
"";
"; You are viewing/editing Docfd command history.";
"; If any change is made to this file, Docfd will replay the commands from the start.";
";";
"; There are two types of comments:";
"; - System comments begin with `;`, and are not preserved after editing of command history.";
"; These are for communication of error message to user during command history editing.";
"; - User comments begin with `#`, and are preserved after editing of command history.";
";";
"; If a line is not blank and is not a comment,";
"; then the line should contain exactly one command.";
"; A command cannot be written across multiple lines.";
";";
"; Starting point is v0, the state with the full set of documents.";
"; Each command adds one to the version number.";
"; Command at the top is oldest, command at bottom is the newest.";
";";
"; Note that for commands that accept text, all text after `:` is trimmed and then used in full.";
"; This means \" and ' are treated literally and are not used to delimit strings.";
";";
"; Possible commands:";
Fmt.str "; - %a" Command.pp (`Search "search phrase");
Fmt.str "; - %a" Command.pp (`Search "");
Fmt.str "; - %a" Command.pp (`Filter "path-fuzzy:\"file txt\"");
Fmt.str "; - %a" Command.pp (`Filter "");
Fmt.str "; - %a" Command.pp (`Sort (Command.Sort_by.default, Command.Sort_by.default_no_score));
Fmt.str "; - %a" Command.pp (`Path_fuzzy_rank ("readme", None));
Fmt.str "; - %a" Command.pp (`Narrow_level 1);
Fmt.str "; - %a" Command.pp (`Mark "/path/to/document");
Fmt.str "; - %a" Command.pp `Mark_listed;
Fmt.str "; - %a" Command.pp (`Unmark "/path/to/document");
Fmt.str "; - %a" Command.pp `Unmark_listed;
Fmt.str "; - %a" Command.pp `Unmark_all;
Fmt.str "; - %a" Command.pp (`Drop "/path/to/document");
Fmt.str "; - %a" Command.pp (`Drop_all_except "/path/to/document");
Fmt.str "; - %a" Command.pp `Drop_marked;
Fmt.str "; - %a" Command.pp `Drop_unmarked;
Fmt.str "; - %a" Command.pp `Drop_listed;
Fmt.str "; - %a" Command.pp `Drop_unlisted;
]
)
|> List.of_seq
in
let init_state =
Session_manager.lock_with_view (fun view ->
view.init_state
)
in
let rec aux rerun snapshots lines : [ `No_changes | `Changes_made of Session.Snapshot.t Dynarray.t ] =
CCIO.with_out file (fun oc ->
CCIO.write_lines_l oc lines;
);
let old_stats = Unix.stat file in
close_term ();
Path_opening.config_and_cmd_to_open_text_file
~path:file
~line_num:(max 1 (Dynarray.length snapshots - 1))
()
|> (fun (config, cmd) ->
Result.get_ok (Path_opening.resolve_cmd config cmd)
)
|> Sys.command
|> ignore;
let new_stats = Unix.stat file in
if
rerun
||
Float.abs
(new_stats.st_mtime -. old_stats.st_mtime) >= Params.float_compare_margin
then (
let state = ref init_state in
let snapshots = Dynarray.create () in
Dynarray.add_last
snapshots
(Session.Snapshot.make
~last_command:None
init_state);
let rerun = ref false in
let lines =
CCIO.with_in file (fun ic ->
CCIO.read_lines_l ic
|> CCList.flat_map (fun line ->
if
String_utils.line_is_blank_or_system_comment line
then (
[ line ]
) else (
match Command.of_string line with
| None -> (
rerun := true;
[
line;
"# Failed to parse the above command"
]
)
| Some command -> (
match Session.run_command pool command !state with
| None -> (
rerun := true;
[
line;
"# Failed to run the above command, check if the arguments are correct"
]
)
| Some (command, x) -> (
state := x;
let snapshot =
Session.Snapshot.make
~last_command:(Some command)
!state
in
Dynarray.add_last
snapshots
snapshot;
[ line ]
)
)
)
)
)
in
if !rerun then (
aux true snapshots lines
) else (
`Changes_made snapshots
)
) else (
`No_changes
)
in
(try
let res =
aux false init_snapshots init_lines
in
(try
Sys.remove file;
with
| Sys_error _ -> ()
);
(match res with
| `No_changes -> ()
| `Changes_made snapshots -> (
Session_manager.load_snapshots snapshots
)
);
with
| Sys_error _ -> (
exit_with_error_msg
(Fmt.str "failed to read or write temporary command history file %s" (Filename.quote file))
));
loop ()
)
| Clear_command_history -> (
let init_state =
Session_manager.lock_with_view (fun view ->
view.init_state
)
in
let snapshots = Dynarray.create () in
Dynarray.add_last
snapshots
(Session.Snapshot.make
~last_command:None
init_state);
Session_manager.load_snapshots snapshots;
loop ()
)
| Open_script path -> (
let init_state =
Session_manager.lock_with_view (fun view ->
view.init_state
)
in
match
Script.run
pool
~init_state
~path
with
| Error msg -> exit_with_error_msg msg
| Ok snapshots -> (
Session_manager.load_snapshots snapshots;
loop ()
)
)
| Edit_script path -> (
close_term ();
Path_opening.config_and_cmd_to_open_text_file
~path
()
|> (fun (config, cmd) ->
Result.get_ok (Path_opening.resolve_cmd config cmd)
)
|> Sys.command
|> ignore;
loop ()
)
)
in
Eio.Fiber.any [
(fun () ->
Eio.Domain_manager.run (Eio.Stdenv.domain_mgr env)
(fun () -> Session_manager.worker_fiber pool));
Session_manager.manager_fiber;
UI_base.Key_binding_info.grid_light_fiber;
(fun () ->
(match start_with_filter with
| None -> ()
| Some start_with_filter -> (
let start_with_filter_len = String.length start_with_filter in
Lwd.set UI_base.Vars.filter_field (start_with_filter, start_with_filter_len);
UI.update_filter ~commit:true ();
));
(match start_with_search with
| None -> ()
| Some start_with_search -> (
let start_with_search_len = String.length start_with_search in
Lwd.set UI_base.Vars.search_field (start_with_search, start_with_search_len);
UI.update_search ~commit:true ();
));
loop ();
);
];
close_term ();
clean_up ();
(match debug_log with
| Some "-" -> ()
| _ -> (
match !Params.debug_output with
| None -> ()
| Some oc -> (
close_out oc
)
)
)
let cmd ~eio_env ~sw =
let open Term in
let open Args in
let doc = "TUI multiline fuzzy document finder" in
let version = Version_string.s in
Cmd.v (Cmd.info "docfd" ~version ~doc)
(const (run ~eio_env ~sw)
$ debug_log_arg
$ no_pdftotext_arg
$ no_pandoc_arg
$ hidden_arg
$ max_depth_arg
$ max_fuzzy_edit_dist_arg
$ max_token_search_dist_arg
$ max_linked_token_search_dist_arg
$ tokens_per_search_scope_level_arg
$ index_chunk_size_arg
$ exts_arg
$ single_line_exts_arg
$ add_exts_arg
$ single_line_add_exts_arg
$ cache_dir_arg
$ cache_limit_arg
$ data_dir_arg
$ index_only_arg
$ start_with_filter_arg
$ start_with_search_arg
$ filter_arg
$ sample_arg
$ samples_per_doc_arg
$ search_arg
$ sort_arg
$ sort_no_score_arg
$ color_arg
$ underline_arg
$ search_result_print_text_width_arg
$ search_result_print_snippet_min_size_arg
$ search_result_print_snippet_max_add_lines_arg
$ start_with_script_arg
$ script_arg
$ paths_from_arg
$ glob_arg
$ single_line_glob_arg
$ single_line_arg
$ open_with_arg
$ files_with_match_arg
$ files_without_match_arg
$ paths_arg)
let () =
if Sys.win32 then (
exit_with_error_msg "Windows is not supported"
);
Random.self_init ();
Eio_posix.run (fun eio_env ->
Eio.Switch.run (fun sw ->
exit (Cmd.eval (cmd ~eio_env ~sw))
))
================================================
FILE: bin/document.ml
================================================
open Result_syntax
open Docfd_lib
type t = {
search_mode : Search_mode.t;
path : string;
path_parts : string list;
path_date : Timedesc.Date.t option;
mod_time : Timedesc.t;
title : string option;
doc_id : int64;
doc_hash : string;
word_ids : Int_set.t;
search_scope : Diet.Int.t option;
links : Link.t array;
link_index_of_start_pos : int Int_map.t;
last_scan : Timedesc.t;
}
let equal (x : t) (y : t) =
x.search_mode = y.search_mode
&&
String.equal x.path y.path
&&
String.equal x.doc_hash y.doc_hash
&&
Option.equal Diet.Int.equal x.search_scope y.search_scope
let compute_path_parts (path : string) =
let path_parts = Tokenization.tokenize ~drop_spaces:false path
|> List.of_seq
in
(path_parts)
let compute_link_index_of_start_pos links =
CCArray.foldi (fun acc i link ->
Int_map.add link.Link.start_pos i acc
)
Int_map.empty
links
let search_mode (t : t) = t.search_mode
let path (t : t) = t.path
let path_parts (t : t) = t.path_parts
let path_date (t : t) = t.path_date
let mod_time (t : t) = t.mod_time
let title (t : t) = t.title
let word_ids (t : t) = t.word_ids
let doc_hash (t : t) = t.doc_hash
let doc_id (t : t) = t.doc_id
let search_scope (t : t) = t.search_scope
let last_scan (t : t) = t.last_scan
let links (t : t) = t.links
let link_index_of_start_pos (t : t) = t.link_index_of_start_pos
let refresh_modification_time ~path =
let time = Unix.time () in
Unix.utimes path time time
let reset_search_scope_to_full (t : t) : t =
{ t with search_scope = None }
let inter_search_scope (x : Diet.Int.t) (t : t) : t =
let search_scope =
match t.search_scope with
| None -> x
| Some y -> Diet.Int.inter x y
in
{ t with search_scope = Some search_scope }
module Compare = struct
type order = [
| `Asc
| `Desc
]
let mod_time order d0 d1 =
match order with
| `Asc ->
Timedesc.compare_chrono_min (mod_time d0) (mod_time d1)
| `Desc ->
Timedesc.compare_chrono_min (mod_time d1) (mod_time d0)
let path order d0 d1 =
match order with
| `Asc ->
String.compare (path d0) (path d1)
| `Desc ->
String.compare (path d1) (path d0)
let path_date order d0 d1 =
let fallback () = path order d0 d1 in
match path_date d0, path_date d1 with
| None, None -> fallback ()
| None, Some _ -> (
(* Always shuffle document with no path date to the back. *)
1
)
| Some _, None -> (
(* Always shuffle document with no path date to the back. *)
-1
)
| Some x0, Some x1 -> (
match order with
| `Asc -> (
match Timedesc.Date.compare x0 x1 with
| 0 -> fallback ()
| n -> n
)
| `Desc -> (
match Timedesc.Date.compare x1 x0 with
| 0 -> fallback ()
| n -> n
)
)
end
module Ir0 = struct
type t = {
search_mode : Search_mode.t;
doc_id : int64;
doc_hash : string;
path : string;
last_scan : Timedesc.t;
}
let of_path ~(env : Eio_unix.Stdenv.base) search_mode ?doc_hash path : (t, string) result =
let* doc_hash =
match doc_hash with
| Some x -> Ok x
| None -> BLAKE2B.hash_of_file ~env ~path
in
let doc_id = Doc_id_db.doc_id_of_doc_hash doc_hash in
Ok {
search_mode;
doc_id;
doc_hash;
path;
last_scan = Timedesc.now ~tz_of_date_time:Params.tz ();
}
end
module Ir1 = struct
type t = {
search_mode : Search_mode.t;
doc_id : int64;
doc_hash : string;
path : string;
data : [ `Lines of string Dynarray.t | `Pages of string list Dynarray.t ];
last_scan : Timedesc.t;
}
let of_path_to_text ~env ~doc_id ~doc_hash search_mode last_scan path : (t, string) result =
let fs = Eio.Stdenv.fs env in
try
let data =
Eio.Path.(with_lines (fs / path))
(fun lines ->
`Lines (Dynarray.of_seq lines)
)
in
Ok {
search_mode;
doc_id;
doc_hash;
path;
data;
last_scan;
}
with
| Failure _
| End_of_file
| Eio.Buf_read.Buffer_limit_exceeded -> (
Error (Printf.sprintf "failed to read file: %s" (Filename.quote path))
)
let of_path_to_pdf ~env ~doc_id ~doc_hash search_mode last_scan path : (t, string) result =
let proc_mgr = Eio.Stdenv.process_mgr env in
let fs = Eio.Stdenv.fs env in
try
let cmd = [ "pdftotext"; path; "-" ] in
let pages =
match Proc_utils.run_return_stdout ~proc_mgr ~fs ~split_mode:`On_form_feed cmd with
| None -> Seq.empty
| Some pages -> (
List.to_seq pages
|> Seq.map (fun page -> String.split_on_char '\n' page)
)
in
let data = `Pages (Dynarray.of_seq pages) in
Ok {
search_mode;
doc_id;
doc_hash;
path;
data;
last_scan;
}
with
| Failure _
| End_of_file
| Eio.Buf_read.Buffer_limit_exceeded -> (
Error (Printf.sprintf "failed to read file: %s" (Filename.quote path))
)
let of_path_to_pandoc_supported_format ~env ~doc_id ~doc_hash search_mode last_scan path : (t, string) result =
let proc_mgr = Eio.Stdenv.process_mgr env in
let fs = Eio.Stdenv.fs env in
let ext = File_utils.extension_of_file path in
let from_format = ext
|> String_utils.remove_leading_dots
|> (fun s ->
match s with
| "htm" -> "html"
| _ -> s
)
in
let cmd = [ "pandoc"
; "--from"
; from_format
; "--to"
; "plain"
; "--wrap"
; "none"
; path
]
in
let error_msg = Fmt.str "failed to extract text from %s" (Filename.quote path) in
match
Proc_utils.run_return_stdout
~proc_mgr
~fs
~split_mode:`On_line_split
cmd
with
| None -> (
Error error_msg
)
| Some lines -> (
let data = `Lines (Dynarray.of_list lines) in
Ok {
search_mode;
doc_id;
doc_hash;
path;
data;
last_scan;
}
)
let of_ir0 ~(env : Eio_unix.Stdenv.base) (ir0 : Ir0.t) : (t, string) result =
let { Ir0.search_mode; doc_id; doc_hash; path; last_scan } = ir0 in
match File_utils.format_of_file path with
| `PDF -> (
of_path_to_pdf ~env ~doc_id ~doc_hash search_mode last_scan path
)
| `Pandoc_supported_format -> (
of_path_to_pandoc_supported_format ~env ~doc_id ~doc_hash search_mode last_scan path
)
| `Text | `Other -> (
of_path_to_text ~env ~doc_id ~doc_hash search_mode last_scan path
)
end
module Date_extraction = struct
let yyyy = "(\\d{4})"
let mm = "([01]\\d)"
let mmm = "(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)"
let mmmm = "(january|february|march|april|may|june|july|august|september|october|november|december)"
let dd = "([0-3]\\d)"
let int_of_month_string s =
match String.lowercase_ascii (String.sub s 0 3) with
| "jan" -> 1
| "feb" -> 2
| "mar" -> 3
| "apr" -> 4
| "may" -> 5
| "jun" -> 6
| "jul" -> 7
| "aug" -> 8
| "sep" -> 9
| "oct" -> 10
| "nov" -> 11
| "dec" -> 12
| _ -> failwith "unexpected case"
let yyyy_mm_dd =
let re =
Fmt.str
"(?:^|.*[^\\d])%s[^\\d]%s[^\\d]%s(?:$|[^\\d])"
yyyy
mm
dd
|> Re.Pcre.re
|> Re.compile
in
fun s ->
try
let g = Re.exec re s in
let start = Re.Group.start g 1 in
let y = Re.Group.get g 1 |> int_of_string in
let m = Re.Group.get g 2 |> int_of_string in
let d = Re.Group.get g 3 |> int_of_string in
Some (start, (y, m, d))
with
| _ -> None
let yyyy_month_dd ~month ~reverse =
let re =
let g1, g3 =
if not reverse then (
(yyyy, dd)
) else (
(dd, yyyy)
)
in
Fmt.str
"(?:^|.*[^\\d])%s[^A-Za-z\\d]?%s[^A-Za-z\\d]?%s(?:$|[^\\d])"
g1
month
g3
|> Re.Pcre.re
|> Re.no_case
|> Re.compile
in
fun s ->
try
let g = Re.exec re s in
let start = Re.Group.start g 1 in
let y_group_index, d_group_index =
if not reverse then (
(1, 3)
) else (
(3, 1)
)
in
let y = Re.Group.get g y_group_index |> int_of_string in
let m = int_of_month_string (Re.Group.get g 2) in
let d = Re.Group.get g d_group_index |> int_of_string in
Some (start, (y, m, d))
with
| _ -> None
let yyyymmdd =
let re =
Fmt.str
"(?:^|.*[^\\d])%s%s%s"
yyyy
mm
dd
|> Re.Pcre.re
|> Re.compile
in
fun s ->
try
let g = Re.exec re s in
let start = Re.Group.start g 1 in
let y = Re.Group.get g 1 |> int_of_string in
let m = Re.Group.get g 2 |> int_of_string in
let d = Re.Group.get g 3 |> int_of_string in
Some (start, (y, m, d))
with
| _ -> None
let extract s =
let rec aux acc l =
match l with
| [] -> (
match acc with
| None -> None
| Some (_start_match_pos, (year, month, day)) -> (
match Timedesc.Date.Ymd.make ~year ~month ~day with
| Ok date -> Some date
| Error _ -> None
)
)
| f :: fs -> (
let acc =
match acc, f s with
| None, x -> x
| Some x, None -> Some x
| Some (start_match_pos, ymd),
Some (start_match_pos', ymd') -> (
if start_match_pos' > start_match_pos then (
Some (start_match_pos', ymd')
) else (
Some (start_match_pos, ymd)
)
)
in
aux acc fs
)
in
aux
None
[
yyyy_mm_dd;
yyyy_month_dd ~month:mmm ~reverse:true;
yyyy_month_dd ~month:mmm ~reverse:false;
yyyy_month_dd ~month:mmmm ~reverse:true;
yyyy_month_dd ~month:mmmm ~reverse:false;
yyyymmdd;
]
end
module Ir2 = struct
type t = {
search_mode : Search_mode.t;
doc_id : int64;
doc_hash : string;
path : string;
path_parts : string list;
path_date : Timedesc.Date.t option;
mod_time : Timedesc.t;
title : string option;
raw : Index.Raw.t;
links : Link.t array;
last_scan : Timedesc.t;
}
type work_stage =
| Title
| Content
let parse_lines pool (s : string Seq.t) : string option * Index.Raw.t =
let rec aux (stage : work_stage) title s =
match stage with
| Content -> (
let raw = Index.Raw.of_lines pool s in
(title, raw)
)
| Title -> (
match s () with
| Seq.Nil -> aux Content title Seq.empty
| Seq.Cons (x, xs) -> (
aux Content (Some (Misc_utils.sanitize_string x)) (Seq.cons x xs)
)
)
in
aux Title None s
let parse_pages pool (s : string list Seq.t) : string option * Index.Raw.t =
let rec aux (stage : work_stage) title s =
match stage with
| Content -> (
let raw = Index.Raw.of_pages pool s in
(title, raw)
)
| Title -> (
match s () with
| Seq.Nil -> aux Content title Seq.empty
| Seq.Cons (x, xs) -> (
let title =
match x with
| [] -> None
| x :: _ ->
Some (Misc_utils.sanitize_string x)
in
aux Content title (Seq.cons x xs)
)
)
in
aux Title None s
let of_ir1 pool (ir : Ir1.t) : t =
let { Ir1.search_mode; doc_id; doc_hash; path; data; last_scan } = ir in
let path_parts = compute_path_parts path in
let path_date = Date_extraction.extract path in
let stats = Unix.stat path in
let mod_time = Timedesc.of_timestamp_float_s_exn stats.Unix.st_mtime in
let title, raw =
match data with
| `Lines x -> (
parse_lines pool (Dynarray.to_seq x)
)
| `Pages x -> (
parse_pages pool (Dynarray.to_seq x)
)
in
let links = Index.Raw.links raw in
{
search_mode;
path;
path_parts;
path_date;
mod_time;
doc_id;
doc_hash;
title;
raw;
links;
last_scan;
}
end
let of_ir2 db ~already_in_transaction (ir : Ir2.t) : t =
let
{
Ir2.search_mode;
path;
path_parts;
path_date;
mod_time;
title;
doc_id;
doc_hash;
raw;
links;
last_scan;
} = ir in
Word_db.write_to_db db ~already_in_transaction;
Index.write_raw_to_db db ~already_in_transaction ~doc_id raw;
let link_index_of_start_pos = compute_link_index_of_start_pos links in
{
search_mode;
path;
path_parts;
path_date;
mod_time;
title;
doc_id;
doc_hash;
word_ids = Index.Raw.word_ids raw;
search_scope = None;
links;
link_index_of_start_pos;
last_scan;
}
let of_path
~(env : Eio_unix.Stdenv.base)
pool
~already_in_transaction
search_mode
?doc_hash
path
: (t, string) result =
let open Sqlite3_utils in
let* doc_hash =
match doc_hash with
| Some x -> Ok x
| None -> BLAKE2B.hash_of_file ~env ~path
in
if Index.is_indexed ~doc_hash then (
let doc_id = Doc_id_db.doc_id_of_doc_hash doc_hash in
let title =
if Index.global_line_count ~doc_id = 0 then
None
else
Some (Index.line_of_global_line_num ~doc_id 0)
in
let path_parts = compute_path_parts path in
let path_date = Date_extraction.extract path in
let stats = Unix.stat path in
let mod_time = Timedesc.of_timestamp_float_s_exn stats.Unix.st_mtime in
let links = Index.links ~doc_id in
Ok
{
search_mode;
path;
path_parts;
path_date;
mod_time;
title;
doc_id;
doc_hash;
word_ids = Index.word_ids ~doc_id;
search_scope = None;
links;
link_index_of_start_pos = compute_link_index_of_start_pos links;
last_scan = Timedesc.now ~tz_of_date_time:Params.tz ()
}
) else (
let* ir0 = Ir0.of_path ~env search_mode ~doc_hash path in
let* ir1 = Ir1.of_ir0 ~env ir0 in
let ir2 = Ir2.of_ir1 pool ir1 in
let res =
with_db (fun db ->
Ok (of_ir2 db ~already_in_transaction ir2)
)
in
res
)
let satisfies_filter_exp pool stop_signal ~global_first_word_candidates_lookup (exp : Filter_exp.t) (t : t) : bool =
let open Filter_exp in
let date_f (op : Filter_exp.compare_op) =
match op with
| Eq -> Timedesc.Date.equal
| Le -> Timedesc.Date.le
| Ge -> Timedesc.Date.ge
| Lt -> Timedesc.Date.lt
| Gt -> Timedesc.Date.gt
in
let rec aux exp =
match exp with
| Empty -> true
| Path_date (op, date) -> (
match t.path_date with
| None -> false
| Some path_date -> (
date_f op path_date date
)
)
| Mod_date (op, date) -> (
date_f op (Timedesc.date t.mod_time) date
)
| Path_fuzzy exp -> (
List.exists (fun phrase ->
List.for_all (fun token ->
List.exists (fun path_part ->
Search_phrase.Enriched_token.compatible_with_word token path_part
)
t.path_parts
)
(Search_phrase.enriched_tokens phrase)
)
(Search_exp.flattened exp)
)
| Path_glob glob -> (
Glob.is_empty glob || Glob.match_ glob t.path
)
| Ext ext -> (
File_utils.extension_of_file t.path = ext
)
| Content exp -> (
try
Index.search
pool
stop_signal
~terminate_on_result_found:true
~doc_id:t.doc_id
~doc_word_ids:(word_ids t)
~global_first_word_candidates_lookup
~within_same_line:false
~search_scope:None
exp
|> ignore;
false
with
| Index.Search_job.Result_found -> true
)
| Binary_op (op, e1, e2) -> (
match op with
| And -> aux e1 && aux e2
| Or -> aux e1 || aux e2
)
| Unary_op (op, e) -> (
match op with
| Not -> not (aux e)
)
in
aux exp
================================================
FILE: bin/document.mli
================================================
open Docfd_lib
type t
val equal : t -> t -> bool
module Compare : sig
type order = [
| `Asc
| `Desc
]
val mod_time : order -> t -> t -> int
val path_date : order -> t -> t -> int
val path : order -> t -> t -> int
end
val search_mode : t -> Search_mode.t
val path : t -> string
val path_date : t -> Timedesc.Date.t option
val mod_time : t -> Timedesc.t
val title : t -> string option
val word_ids : t -> Int_set.t
val doc_hash : t -> string
val doc_id : t -> int64
val search_scope : t -> Diet.Int.t option
val last_scan : t -> Timedesc.t
val links : t -> Link.t array
val link_index_of_start_pos : t -> int Int_map.t
val satisfies_filter_exp :
Task_pool.t ->
Stop_signal.t ->
global_first_word_candidates_lookup:Int_set.t Search_phrase.Enriched_token.Data_map.t ->
Filter_exp.t ->
t ->
bool
val of_path :
env:Eio_unix.Stdenv.base ->
Task_pool.t ->
already_in_transaction:bool ->
Search_mode.t ->
?doc_hash:string ->
string ->
(t, string) result
val reset_search_scope_to_full : t -> t
val inter_search_scope : Diet.Int.t -> t -> t
module Ir0 : sig
type t
val of_path :
env:Eio_unix.Stdenv.base ->
Search_mode.t ->
?doc_hash:string ->
string ->
(t, string) result
end
module Ir1 : sig
type t
val of_ir0 :
env:Eio_unix.Stdenv.base ->
Ir0.t ->
(t, string) result
end
module Ir2 : sig
type t
val of_ir1 : Task_pool.t -> Ir1.t -> t
end
val of_ir2 :
Sqlite3.db ->
already_in_transaction:bool ->
Ir2.t ->
t
================================================
FILE: bin/document_pipeline.ml
================================================
open Docfd_lib
open Debug_utils
type t = {
env : Eio_unix.Stdenv.base;
pool : Task_pool.t;
ir0_queue : Document.Ir0.t option Eio.Stream.t;
ir1_of_ir0_workers_batch_release : Eio.Semaphore.t;
ir1_queue : Document.Ir1.t option Eio.Stream.t;
ir2_of_ir1_workers_batch_release : Eio.Semaphore.t;
ir2_queue : Document.Ir2.t option Eio.Stream.t;
documents : Document.t Dynarray.t;
result : Document.t Dynarray.t Eio.Stream.t;
}
let make ~env pool : t =
{
env;
pool;
ir0_queue = Eio.Stream.create 100;
ir1_of_ir0_workers_batch_release = Eio.Semaphore.make 0;
ir1_queue = Eio.Stream.create 100;
ir2_of_ir1_workers_batch_release = Eio.Semaphore.make 0;
ir2_queue = Eio.Stream.create 100;
documents = Dynarray.create ();
result = Eio.Stream.create 1;
}
let ir1_of_ir0_worker (t : t) =
let run = ref true in
while !run do
match Eio.Stream.take t.ir0_queue with
| None -> (
Eio.Semaphore.release t.ir1_of_ir0_workers_batch_release;
run := false
)
| Some ir0 -> (
match Document.Ir1.of_ir0 ~env:t.env ir0 with
| Error msg -> (
do_if_debug (fun oc ->
Printf.fprintf oc "Error: %s\n" msg
)
)
| Ok ir1 -> (
Eio.Stream.add t.ir1_queue (Some ir1)
)
)
done
let ir2_of_ir1_worker (t : t) =
let run = ref true in
while !run do
match Eio.Stream.take t.ir1_queue with
| None -> (
Eio.Semaphore.release t.ir2_of_ir1_workers_batch_release;
run := false
)
| Some ir -> (
Eio.Stream.add t.ir2_queue (Some (Document.Ir2.of_ir1 t.pool ir))
)
done
let document_of_ir2_worker (t : t) =
let open Sqlite3_utils in
let run = ref true in
let counter = ref 0 in
let outstanding_transaction = ref false in
with_db (fun db ->
while !run do
if !counter = 0 then (
step_stmt ~db "BEGIN IMMEDIATE" ignore;
outstanding_transaction := true;
);
(match Eio.Stream.take t.ir2_queue with
| None -> (
run := false
)
| Some ir -> (
let doc = Document.of_ir2 db ~already_in_transaction:true ir in
Dynarray.add_last t.documents doc;
do_if_debug (fun oc ->
Printf.fprintf oc "Document %s loaded successfully\n" (Filename.quote (Document.path doc));
);
));
if !counter >= 100 then (
step_stmt ~db "COMMIT" ignore;
outstanding_transaction := false;
counter := 0;
) else (
incr counter;
);
done;
if !outstanding_transaction then (
step_stmt ~db "COMMIT" ignore;
)
)
let feed (t : t) search_mode ~doc_hash path =
do_if_debug (fun oc ->
Printf.fprintf oc "Loading document: %s\n" (Filename.quote path);
);
do_if_debug (fun oc ->
Printf.fprintf oc "Using %s search mode for document %s\n"
(match search_mode with
| `Single_line -> "single line"
| `Multiline -> "multiline"
)
(Filename.quote path)
);
match Document.Ir0.of_path ~env:t.env search_mode ~doc_hash path with
| Error msg -> (
do_if_debug (fun oc ->
Printf.fprintf oc "Error: %s\n" msg
)
)
| Ok ir0 -> (
Eio.Stream.add t.ir0_queue (Some ir0)
)
let run (t : t) =
Eio.Fiber.all
(List.concat
[ CCList.(0 --^ Task_pool.size)
|> List.map (fun _ -> (fun () -> ir1_of_ir0_worker t))
; CCList.(0 --^ Task_pool.size)
|> List.map (fun _ -> (fun () -> ir2_of_ir1_worker t))
; [ fun () -> document_of_ir2_worker t ]
]
);
Eio.Stream.add t.result t.documents
let finalize (t : t) =
for _ = 0 to Task_pool.size - 1 do
Eio.Stream.add t.ir0_queue None;
done;
for _ = 0 to Task_pool.size - 1 do
Eio.Semaphore.acquire t.ir1_of_ir0_workers_batch_release;
done;
for _ = 0 to Task_pool.size - 1 do
Eio.Stream.add t.ir1_queue None;
done;
for _ = 0 to Task_pool.size - 1 do
Eio.Semaphore.acquire t.ir2_of_ir1_workers_batch_release;
done;
Eio.Stream.add t.ir2_queue None;
Dynarray.to_list (Eio.Stream.take t.result)
================================================
FILE: bin/document_pipeline.mli
================================================
type t
val make : env:Eio_unix.Stdenv.base -> Docfd_lib.Task_pool.t -> t
val feed : t -> Search_mode.t -> doc_hash:string -> string -> unit
val run : t -> unit
val finalize : t -> Document.t list
================================================
FILE: bin/document_src.ml
================================================
type file_collection = {
default_search_mode_files : String_set.t;
single_line_search_mode_files : String_set.t;
}
let seq_of_file_collection (x : file_collection) =
Seq.append
(String_set.to_seq x.default_search_mode_files)
(String_set.to_seq x.single_line_search_mode_files)
let file_collection_size (x : file_collection) =
String_set.cardinal x.default_search_mode_files
+ String_set.cardinal x.single_line_search_mode_files
let empty_file_collection =
{
default_search_mode_files = String_set.empty;
single_line_search_mode_files = String_set.empty;
}
type t =
| Stdin of string
| Files of file_collection
================================================
FILE: bin/dune
================================================
(rule
(targets int_map.ml)
(deps ../lib/int_map.ml)
(action (copy# %{deps} %{targets}))
)
(rule
(targets int_set.ml)
(deps ../lib/int_set.ml)
(action (copy# %{deps} %{targets}))
)
(rule
(targets string_map.ml)
(deps ../lib/string_map.ml)
(action (copy# %{deps} %{targets}))
)
(rule
(targets char_map.ml)
(deps ../lib/char_map.ml)
(action (copy# %{deps} %{targets}))
)
(rule
(targets string_set.ml)
(deps ../lib/string_set.ml)
(action (copy# %{deps} %{targets}))
)
(rule
(targets parser_components.ml)
(deps ../lib/parser_components.ml)
(action (copy# %{deps} %{targets}))
)
(executable
(flags (-w "+a-4-9-29-37-40-42-44-48-50-32-30-70@8" -g))
(name docfd)
(public_name docfd)
(preprocess (pps ppx_deriving.show ppx_deriving.ord))
(libraries docfd_lib
containers
containers.unix
cmdliner
fmt
notty
notty.unix
nottui
nottui-unix
lwd
oseq
eio
eio_main
eio_posix
digestif.c
digestif
timedesc
timedesc-tzlocal.unix-or-utc
re
progress
diet
sqlite3
)
)
================================================
FILE: bin/file_utils.ml
================================================
open Misc_utils
open Debug_utils
let extension_of_file (s : string) =
Filename.extension s
|> String.lowercase_ascii
type file_format = [ `PDF | `Pandoc_supported_format | `Text | `Other ] [@@deriving ord]
module File_format_set = CCSet.Make (struct
type t = file_format
let compare = compare_file_format
end)
let format_of_file (s : string) : file_format =
let ext = extension_of_file s in
if ext = ".pdf" then (
`PDF
) else if List.mem ext Params.pandoc_supported_exts then (
`Pandoc_supported_format
) else if String_set.mem ext Params.common_text_file_exts
|| String_set.mem ext Params.common_code_file_exts
then (
`Text
) else (
`Other
)
type typ = [
| `File
| `Dir
]
type is_link = [
| `Is_link
| `Not_link
]
let typ_of_path (path : string) : (typ * is_link) option =
try
let stat = Unix.lstat path in
match stat.st_kind with
| S_REG -> Some (`File, `Not_link)
| S_DIR -> Some (`Dir, `Not_link)
| S_LNK -> (
let stat = Unix.stat path in
match stat.st_kind with
| S_REG -> Some (`File, `Is_link)
| S_DIR -> Some (`Dir, `Is_link)
| _ -> None
)
| _ -> None
with
| _ -> None
let cwd_with_trailing_sep () = Sys.getcwd () ^ Filename.dir_sep
let remove_cwd_from_path (s : string) =
let pre = cwd_with_trailing_sep () in
match CCString.chop_prefix ~pre s with
| None -> s
| Some s -> s
let read_in_channel_to_tmp_file (ic : in_channel) : (string, string) result =
let file = Filename.temp_file "docfd-" ".txt" in
try
CCIO.with_out file (fun oc ->
CCIO.copy_into ic oc
);
Ok file
with
| _ -> (
Error (Fmt.str "failed to write stdin to %s" (Filename.quote file))
)
let next_choices path : string Seq.t =
try
Sys.readdir path
|> Array.to_seq
with
| _ -> Seq.empty
let list_files_recursive
?(max_depth = !Params.max_file_tree_scan_depth)
~(report_progress : unit -> unit)
~(filter : int -> string -> bool)
(path : string)
: String_set.t =
let acc = ref String_set.empty in
let add x =
acc := String_set.add x !acc
in
let rec aux depth path =
report_progress ();
let hidden =
String.starts_with ~prefix:"." (Filename.basename path)
in
if ((hidden && !Params.scan_hidden) || not hidden)
&& depth <= max_depth then (
match typ_of_path path with
| Some (`Dir, _) -> (
next_choices path
|> Seq.iter (fun f ->
aux (depth + 1) (Filename.concat path f)
)
)
| Some (`File, _) -> (
if filter depth path then (
add path
)
)
| _ -> ()
)
in
aux 0 (normalize_path_to_absolute path);
!acc
let list_files_recursive_filter_by_globs
?max_depth
~(report_progress : unit -> unit)
(globs : string Seq.t)
: String_set.t =
let acc = ref String_set.empty in
let add x =
acc := String_set.add x !acc
in
let parse_glob ~case_sensitive s =
match Glob.parse ~case_sensitive s with
| None -> (
failwith (Fmt.str "expected subpath of a valid glob pattern to also be valid: \"%s\"" s)
)
| Some x -> x
in
let rec aux ~case_sensitive (path_parts : string list) (glob_parts : string list) =
report_progress ();
let path = path_of_parts path_parts in
match typ_of_path path, glob_parts with
| Some (`File, _), [] -> add path
| Some (`File, _), _ -> ()
| Some (`Dir, _), [] -> ()
| Some (`Dir, _), x :: xs -> (
match x with
| "" | "." -> aux ~case_sensitive path_parts xs
| ".." -> (
let path_parts =
match path_parts with
| [] -> []
| _ :: xs -> xs
in
aux ~case_sensitive path_parts xs
)
| "**" -> (
let glob_string = String.concat Filename.dir_sep (path :: glob_parts) in
do_if_debug (fun oc ->
Printf.fprintf oc "Compiling glob using pattern: %s\n" glob_string
);
let glob = parse_glob ~case_sensitive glob_string in
path
|> list_files_recursive
?max_depth
~report_progress
~filter:(fun _depth path ->
Glob.match_ glob path
)
|> String_set.iter (fun path ->
do_if_debug (fun oc ->
Printf.fprintf oc "Glob %s matches path %s\n" glob_string path
);
add path
)
)
| _ -> (
let glob = parse_glob ~case_sensitive x in
next_choices path
|> Seq.iter (fun f ->
if Glob.match_ glob f then (
aux ~case_sensitive (f :: path_parts) xs
)
)
)
)
| None, _ -> ()
| exception _ -> ()
in
Seq.iter (fun glob ->
let case_sensitive =
Glob.parse glob
|> Option.get
|> Glob.case_sensitive
in
let glob_parts = CCString.split ~by:Filename.dir_sep glob in
match glob_parts with
| "" :: l -> (
(* Absolute path on Unix-like systems *)
aux ~case_sensitive [ "" ] l
)
| _ -> (
aux ~case_sensitive (cwd_path_parts ()) glob_parts
)
) globs;
!acc
let list_files_recursive_filter_by_exts
?max_depth
~report_progress
~(exts : string list)
(paths : string Seq.t)
: String_set.t =
let filter depth path =
let ext = extension_of_file path in
depth = 0 || List.mem ext exts
in
paths
|> Seq.map normalize_path_to_absolute
|> Seq.map (list_files_recursive ?max_depth ~report_progress ~filter)
|> Seq.fold_left String_set.union String_set.empty
let mkdir_recursive (dir : string) : unit =
let rec aux first acc parts =
match parts with
| [] -> ()
| "" :: xs -> (
if first then
aux false Filename.dir_sep xs
else
aux false "" xs
)
| x :: xs -> (
let acc = Filename.concat acc x in
match Sys.is_directory acc with
| true -> aux false acc xs
| false -> (
exit_with_error_msg
(Fmt.str "%s is not a directory" (Filename.quote acc))
)
| exception (Sys_error _) -> (
do_if_debug (fun oc ->
Printf.fprintf oc "Creating directory: %s\n" (Filename.quote acc)
);
(try
Sys.mkdir acc 0o755
with
| _ -> (
exit_with_error_msg
(Fmt.str "failed to create directory: %s" (Filename.quote acc))
)
);
aux false acc xs
)
)
in
aux true "" (CCString.split ~by:Filename.dir_sep dir)
let file_size (path : string) : int option =
try
let st = Unix.stat path in
Some st.st_size
with
| _ -> None
================================================
FILE: bin/filter_exp.ml
================================================
open Docfd_lib
type t =
| Empty
| Path_date of compare_op * Timedesc.Date.t
| Path_fuzzy of Search_exp.t
| Path_glob of Glob.t
| Ext of string
| Mod_date of compare_op * Timedesc.Date.t
| Content of Search_exp.t
| Binary_op of binary_op * t * t
| Unary_op of unary_op * t
and binary_op =
| And
| Or
and unary_op =
| Not
and compare_op =
| Eq
| Le
| Ge
| Lt
| Gt
let empty = Empty
let is_empty (e : t) =
match e with
| Empty -> true
| _ -> false
let equal (e1 : t) (e2 : t) =
let rec aux e1 e2 =
match e1, e2 with
| Empty, Empty -> true
| Path_date (op1, x1), Path_date (op2, x2) ->
op1 = op2 && Timedesc.Date.equal x1 x2
| Path_fuzzy x, Path_fuzzy y -> Search_exp.equal x y
| Path_glob x, Path_glob y -> Glob.equal x y
| Ext x, Ext y -> String.equal x y
| Content x, Content y -> Search_exp.equal x y
| Binary_op (op1, x1, y1), Binary_op (op2, x2, y2) ->
op1 = op2 && aux x1 x2 && aux y1 y2
| Unary_op (op1, x1), Unary_op (op2, x2) ->
op1 = op2 && aux x1 x2
| _, _ -> false
in
aux e1 e2
let all_content_search_exps (t : t) : Search_exp.t list =
let rec aux t =
match t with
| Empty
| Path_date _
| Path_fuzzy _
| Path_glob _
| Ext _
| Mod_date _ -> []
| Content e -> [e]
| Binary_op (_op, e1, e2) -> aux e1 @ aux e2
| Unary_op (_op, e) -> aux e
in
aux t
module Parsers = struct
type exp = t
open Angstrom
open Parser_components
let alphanum_symbol_string =
take_while1 (fun c ->
is_letter c
||
is_digit c
||
(match c with
| '&'
| '|' -> true
| _ -> false
)
)
let maybe_quoted_string ?(force_quote = false) () =
(
(choice
[
char '"';
char '\'';
]
>>= fun c -> return (Some c)
)
<|>
(if force_quote then
fail ""
else
return None)
)
>>= fun quote_char ->
many1 (
take_while1 (fun c ->
match c with
| '\\' -> false
| c -> (
match quote_char with
| None -> (
is_not_space c
&&
(match c with
| '('
| ')' -> false
| _ -> true
)
)
| Some quote_char -> c <> quote_char
)
)
<|>
(char '\\' *> any_char >>| fun c -> Printf.sprintf "%c" c)
)
>>= fun l ->
let s = String.concat "" l in
(end_of_input *> return s)
<|>
(match quote_char with
| None -> return s
| Some quote_char -> char quote_char *> return s)
let search_exp ?force_quote () =
maybe_quoted_string ?force_quote ()
>>= fun s ->
match Search_exp.parse s with
| None -> fail ""
| Some x -> return x
let glob =
maybe_quoted_string ()
>>= fun s ->
let s = Misc_utils.normalize_filter_glob_if_not_empty s in
match Glob.parse s with
| None -> fail ""
| Some x -> return x
let ext =
maybe_quoted_string ()
>>| fun s ->
s
|> String.lowercase_ascii
|> String_utils.remove_leading_dots
|> Fmt.str ".%s"
let compare_op =
choice
[
char '=' *> skip_spaces *> return Eq;
string "<=" *> skip_spaces *> return Le;
string ">=" *> skip_spaces *> return Ge;
char '<' *> skip_spaces *> return Lt;
char '>' *> skip_spaces *> return Gt;
]
let date =
any_string
>>= fun s ->
match Timedesc.Date.Ymd.of_iso8601 s with
| Ok x -> return x
| Error _ -> fail ""
let compare_date f =
let p =
compare_op >>= fun op ->
date >>| fun date ->
f (op, date)
in
maybe_quoted_string ()
>>= fun s ->
match Angstrom.(parse_string ~consume:Consume.All) p s with
| Ok x -> return x
| Error s -> fail s
let binary_op op_strings op =
alphanum_symbol_string >>= fun s ->
skip_spaces *>
(
if List.mem (String.lowercase_ascii s) op_strings then (
return (fun x y -> Binary_op (op, x, y))
) else (
fail ""
)
)
let and_op = binary_op [ "and" ] And
let or_op = binary_op [ "or" ] Or
let unary_op op_strings op =
alphanum_symbol_string >>= fun s ->
skip_spaces *>
(
if List.mem (String.lowercase_ascii s) op_strings then (
return (fun x -> Unary_op (op, x))
) else (
fail ""
)
)
let not_op = unary_op [ "not" ] Not
let p =
skip_spaces *>
(
(end_of_input *> return empty)
<|>
fix (fun (exp : exp Angstrom.t) ->
let base =
choice [
(search_exp ~force_quote:true () >>|
fun x -> Content x);
(string "content:" *>
search_exp () >>| fun x -> Content x);
(string "path-date:" *>
compare_date (fun (op, date) -> Path_date (op, date)));
(string "path-fuzzy:" *>
search_exp () >>| fun x -> Path_fuzzy x);
(string "path-glob:" *>
glob >>| fun x -> Path_glob x);
(string "mod-date:" *>
compare_date (fun (op, date) -> Mod_date (op, date)));
(string "ext:" *>
ext >>| fun x -> Ext x);
(char '(' *> skip_spaces *> exp <* char ')');
]
<* skip_spaces
in
let maybe_neg =
(not_op >>= fun f ->
skip_spaces *> base >>| f)
<|>
base
in
let conj = chainl1 maybe_neg and_op in
chainl1 conj or_op
)
)
<* skip_spaces
end
let parse s =
match Angstrom.(parse_string ~consume:Consume.All) Parsers.p s with
| Ok e -> Some e
| Error _ -> None
================================================
FILE: bin/glob.ml
================================================
type t = {
case_sensitive : bool;
string : string;
re : Re.re;
}
let equal x y =
x.case_sensitive = y.case_sensitive
&&
String.equal x.string y.string
let case_sensitive t = t.case_sensitive
let string t = t.string
let is_empty t = String.length t.string = 0
module Parsers = struct
open Angstrom
type part = [
| `Case_insensitive
| `String of string
]
let parts : part list Angstrom.t =
many (
(take_while1 (fun c ->
match c with
| '\\' -> false
| _ -> true
)
>>| fun s -> `String s)
<|>
(char '\\' *> any_char >>= fun c ->
if c = 'c' then return `Case_insensitive
else return (`String (Printf.sprintf "%c" c)))
)
end
let parse ?case_sensitive (s : string) : t option =
match Angstrom.(parse_string ~consume:Consume.All) Parsers.parts s with
| Error _ -> None
| Ok parts -> (
let case_insensitive = ref false in
let s =
parts
|> List.filter_map (fun x ->
match x with
| `String s -> Some s
| `Case_insensitive -> (
case_insensitive := true;
None
)
)
|> String.concat ""
in
let case_sensitive =
match case_sensitive with
| None -> not !case_insensitive
| Some x -> x
in
try
let re =
s
|> (fun s -> if case_sensitive then s else String.lowercase_ascii s)
|> Re.Glob.glob
~anchored:true
~pathname:true
~match_backslashes:false
~period:true
~expand_braces:false
~double_asterisk:true
|> Re.compile
in
Some
{
case_sensitive;
string = s;
re;
}
with
| _ -> None
)
let match_ t (s : string) =
is_empty t
|| Re.execp t.re (if t.case_sensitive then s else String.lowercase_ascii s)
================================================
FILE: bin/glob.mli
================================================
type t
val parse : ?case_sensitive:bool -> string -> t option
val equal : t -> t -> bool
val is_empty : t -> bool
val case_sensitive : t -> bool
val string : t -> string
val match_ : t -> string -> bool
================================================
FILE: bin/lock_protected_cell.ml
================================================
type 'a t = {
lock : Eio.Mutex.t;
mutable data : 'a option;
}
let make () =
{
lock = Eio.Mutex.create ();
data = None;
}
let set (t : 'a t) (x : 'a) =
Eio.Mutex.use_rw t.lock ~protect:false (fun () ->
t.data <- Some x
)
let unset (t : 'a t) =
Eio.Mutex.use_rw t.lock ~protect:false (fun () ->
t.data <- None
)
let get (t : 'a t) : 'a option =
Eio.Mutex.use_rw t.lock ~protect:false (fun () ->
let x = t.data in
t.data <- None;
x
)
================================================
FILE: bin/lock_protected_cell.mli
================================================
type 'a t
val make : unit -> 'a t
val set : 'a t -> 'a -> unit
val unset : 'a t -> unit
val get : 'a t -> 'a option
================================================
FILE: bin/misc_utils.ml
================================================
open Docfd_lib
include Docfd_lib.Misc_utils'
let bound_selection ~choice_count (x : int) : int =
max 0 (min (choice_count - 1) x)
let frequencies_of_words_ci (s : string Seq.t) : int String_map.t =
Seq.fold_left (fun m word ->
let word = String.lowercase_ascii word in
let count = Option.value ~default:0
(String_map.find_opt word m)
in
String_map.add word (count + 1) m
)
String_map.empty
s
let stdin_is_atty () =
Unix.isatty Unix.stdin
let stdout_is_atty () =
Unix.isatty Unix.stdout
let stderr_is_atty () =
Unix.isatty Unix.stderr
let compute_total_recognized_exts ~exts ~additional_exts =
let split_on_comma = String.split_on_char ',' in
(split_on_comma exts)
:: (List.map split_on_comma additional_exts)
|> List.to_seq
|> Seq.flat_map List.to_seq
|> Seq.map (fun s ->
s
|> CCString.trim
|> String_utils.remove_leading_dots
|> String.lowercase_ascii
)
|> Seq.filter (fun s -> s <> "")
|> Seq.map (fun s -> Printf.sprintf ".%s" s)
|> List.of_seq
let array_sub_seq : 'a. start:int -> end_exc:int -> 'a array -> 'a Seq.t =
fun ~start ~end_exc arr ->
let count = Array.length arr in
let end_exc = min count end_exc in
let rec aux start =
if start < end_exc then (
Seq.cons arr.(start) (aux (start + 1))
) else (
Seq.empty
)
in
aux start
let rotate_list (x : int) (l : 'a list) : 'a list =
let arr = Array.of_list l in
let len = Array.length arr in
Seq.append
(array_sub_seq ~start:x ~end_exc:len arr)
(array_sub_seq ~start:0 ~end_exc:x arr)
|> List.of_seq
let drain_eio_stream (x : 'a Eio.Stream.t) =
let rec aux () =
match Eio.Stream.take_nonblocking x with
| None -> ()
| Some _ -> aux ()
in
aux ()
let mib_of_bytes (x : int) =
(Int.to_float x) /. (1024.0 *. 1024.0)
let progress_with_reporter ~interactive bar f =
if interactive then (
Progress.with_reporter
~config:(Progress.Config.v ~ppf:Format.std_formatter ())
bar
(fun report_progress ->
let report_progress =
let lock = Eio.Mutex.create () in
fun x ->
Eio.Mutex.use_rw lock ~protect:false (fun () ->
report_progress x
)
in
f report_progress
)
) else (
f (fun _ -> ())
)
let normalize_filter_glob_if_not_empty (s : string) =
if String.length s = 0 then (
s
) else (
normalize_glob_to_absolute s
)
let trim_angstrom_error_msg (s : string) =
CCString.chop_prefix ~pre:": " s
|> Option.value ~default:s
let ranking_of_ranked_document_list (l : string list) : int String_map.t =
CCList.foldi (fun acc i path ->
String_map.add path i acc
) String_map.empty l
let fuzzy_rank_assoc
(stop_signal : Stop_signal.t)
~(get_key : 'a -> string)
(exp : Search_exp.t)
(items : 'a Seq.t)
: ('a * Search_result.t) Dynarray.t =
let pick_best_search_result (s : Search_result.t Seq.t) : Search_result.t option =
Seq.fold_left (fun best x ->
match best with
| None -> Some x
| Some best -> (
if Search_result.score x > Search_result.score best then (
Some x
) else (
Some best
)
)
)
None
s
in
let search_in_line (line : string) (exp : Search_exp.t) : Search_result.t option =
let parts = Tokenization.tokenize ~drop_spaces:false line
|> List.of_seq
in
Search_exp.flattened exp
|> List.to_seq
|> Seq.flat_map (fun phrase ->
Search_phrase.enriched_tokens phrase
|> List.to_seq
|> Seq.map (fun token ->
List.to_seq parts
|> Seq.mapi (fun i part ->
(i, part)
)
|> Seq.filter_map (fun (i, part) ->
if
Search_phrase.Enriched_token.compatible_with_word token part
then (
Some (i, part, String.lowercase_ascii part)
) else (
None
)
)
)
|> OSeq.cartesian_product
|> Seq.map (fun l ->
let found_phrase =
List.map (fun (i, part, part_ci) ->
Search_result.{
found_word_pos = i;
found_word_ci = part_ci;
found_word = part;
}
)
l
in
Search_result.make phrase
~found_phrase
~found_phrase_opening_closing_symbol_match_count:0
)
)
|> pick_best_search_result
in
Eio.Fiber.first
(fun () ->
Stop_signal.await stop_signal;
Dynarray.create ()
)
(fun () ->
items
|> Seq.fold_left (fun acc item ->
let line = get_key item in
match search_in_line line exp with
| None -> acc
| Some best_result -> (
(item, best_result) :: acc
)
)
[]
|> List.sort (fun (_x0, r0) (_x1, r1) ->
Search_result.compare_relevance r0 r1
)
|> Dynarray.of_list
)
let highlights_of_search_result (search_result : Search_result.t) =
List.fold_left
(fun acc (x : Search_result.indexed_found_word) ->
Int_set.add x.found_word_pos acc
)
Int_set.empty
(Search_result.found_phrase search_result)
================================================
FILE: bin/params.ml
================================================
include Docfd_lib.Params'
let debug_output : out_channel option ref = ref None
let scan_hidden = ref false
let default_max_file_tree_scan_depth = 100
let max_file_tree_scan_depth = ref default_max_file_tree_scan_depth
let preview_line_count = 5
let default_tokens_per_search_scope_level = 100
let tokens_per_search_scope_level = ref default_tokens_per_search_scope_level
let docfd_script_ext = ".docfd-script"
let pandoc_supported_exts =
[ ".epub"
; ".odt"
; ".docx"
; ".fb2"
; ".ipynb"
; ".html"
; ".htm"
]
let common_text_file_exts =
String_set.of_list
[ ".txt"
; ".md"
; ".markdown"
; ".rst"
; ".adoc"
; ".org"
; ".json"
; ".yaml"
; ".yml"
; ".toml"
; ".xml"
; ".csv"
; ".tsv"
; ".conf"
; ".cfg"
; ".ini"
; ".env"
; ".log"
]
let common_code_file_exts =
String_set.of_list
[ ".sh"
; ".bash"
; ".zsh"
; ".fish"
; ".ksh"
; ".csh"
; ".tcsh"
; ".ps1"
; ".psm1"
; ".psd1"
; ".cmd"
; ".bat"
; ".py"
; ".pyi"
; ".pyw"
; ".ipynb"
; ".pxd"
; ".pxi"
; ".js"
; ".mjs"
; ".cjs"
; ".jsx"
; ".ts"
; ".tsx"
; ".vite"
; ".webpack"
; ".html"
; ".htm"
; ".xhtml"
; ".css"
; ".scss"
; ".sass"
; ".less"
; ".vue"
; ".svelte"
; ".svg"
; ".c"
; ".h"
; ".i"
; ".cpp"
; ".cc"
; ".cxx"
; ".hpp"
; ".hh"
; ".hxx"
; ".m"
; ".mm"
; ".java"
; ".kt"
; ".kts"
; ".scala"
; ".groovy"
; ".clj"
; ".cljs"
; ".cljc"
; ".edn"
; ".go"
; ".rs"
; ".zig"
; ".nim"
; ".nims"
; ".hs"
; ".lhs"
; ".ml"
; ".mli"
; ".re"
; ".rei"
; ".elm"
; ".idr"
; ".agda"
; ".lua"
; ".pl"
; ".pm"
; ".rb"
; ".tcl"
; ".raku"
; ".jl"
; ".r"
; ".cs"
; ".csx"
; ".fs"
; ".fsi"
; ".fsx"
; ".vb"
; ".asm"
; ".s"
; ".S"
; ".ll"
; ".bc"
; ".wat"
; ".wasm"
; ".swift"
; ".storyboard"
; ".xib"
; ".json"
; ".jsonc"
; ".json5"
; ".yaml"
; ".yml"
; ".toml"
; ".xml"
; ".ini"
; ".cfg"
; ".conf"
; ".env"
; ".properties"
; ".sql"
; ".psql"
; ".cql"
; ".gql"
; ".graphql"
; ".mk"
; ".make"
; ".cmake"
; ".gradle"
; ".gradle.kts"
; ".bazel"
; ".bzl"
; ".ninja"
; ".build"
; ".Dockerfile"
; ".dockerignore"
; ".compose"
; ".compose.yml"
; ".tf"
; ".tfvars"
; ".hcl"
; ".ansible"
; ".playbook"
; ".md"
; ".markdown"
; ".rst"
; ".adoc"
; ".org"
; ".tex"
; ".vim"
; ".vimrc"
; ".emacs"
; ".editorconfig"
; ".awk"
; ".sed"
; ".pug"
; ".jade"
; ".haml"
; ".qs"
; ".kql"
]
let default_recognized_exts =
([ "txt"; "md"; "pdf" ]
@
pandoc_supported_exts
)
|> List.map String_utils.remove_leading_dots
|> String.concat ","
let default_recognized_single_line_exts =
[ "log"; "csv"; "tsv" ]
|> List.map String_utils.remove_leading_dots
|> String.concat ","
let default_search_mode : Search_mode.t ref = ref `Multiline
let default_sort_by_arg = "score,desc"
let default_sort_by_no_score_arg = "path,asc"
let index_file_ext = ".index"
let db_file_name = "index.db"
let hash_chunk_size = 4096
let text_editor = ref ""
let default_samples_per_document = 5
let samples_per_document = ref default_samples_per_document
type style_mode = [ `Never | `Always | `Auto ]
let default_search_result_print_text_width = 80
let search_result_print_text_width = ref default_search_result_print_text_width
let default_search_result_print_snippet_min_size = 10
let search_result_print_snippet_min_size = ref default_search_result_print_snippet_min_size
let default_search_result_print_snippet_max_additional_lines_each_direction = 2
let search_result_print_snippet_max_additional_lines_each_direction =
ref default_search_result_print_snippet_max_additional_lines_each_direction
let default_cache_limit = 10_000
let cache_limit = ref default_cache_limit
let cache_dir : string option ref = ref None
let data_dir : string option ref = ref None
let script_dir () =
Filename.concat
(Option.get !data_dir)
"scripts"
let tz : Timedesc.Time_zone.t =
Option.value ~default:Timedesc.Time_zone.utc
(Timedesc.Time_zone.local ())
let last_scan_format_string =
"{year}-{mon:0X}-{day:0X} {hour:0X}:{min:0X}:{sec:0X}"
^
(match Timedesc.Time_zone.local () with
| None -> "Z"
| Some _ -> "")
let last_modified_format_string =
"{year}-{mon:0X}-{day:0X} {hour:0X}:{min:0X}"
^
(match Timedesc.Time_zone.local () with
| None -> "Z"
| Some _ -> "")
let blink_on_duration : Mtime.span = Mtime.Span.(140 * ms)
let session_manager_request_debounce_interval = Mtime.Span.(200 * ms)
let session_manager_request_debounce_wait_buffer = Mtime.Span.(5 * ms)
let os_typ : [ `Darwin | `Linux ] =
let s = CCUnix.call_stdout "uname"
|> String.trim
|> String.lowercase_ascii
in
match s with
| "darwin" -> `Darwin
| _ -> `Linux
let clipboard_copy_cmd_and_args =
match os_typ with
| `Darwin -> Some ("pbcopy", [||])
| `Linux -> (
if Proc_utils.command_exists "clip.exe" then (
Some ("clip.exe", [||])
) else (
match Sys.getenv_opt "XDG_SESSION_TYPE" with
| None -> None
| Some s -> (
match String.lowercase_ascii s with
| "x11" -> Some ("xclip", [| "-sel"; "clip" |])
| "wayland" -> Some ("wl-copy", [|"-n"|])
| _ -> None
)
)
)
================================================
FILE: bin/path_opening.ml
================================================
open Docfd_lib
open Debug_utils
type launch_mode = [ `Terminal | `Detached ]
type spec = string list * launch_mode * string
let specs : (string, launch_mode * string) Hashtbl.t = Hashtbl.create 128
module Parsers = struct
open Angstrom
open Parser_components
let expected_char c =
fail (Fmt.str "expected char %c" c)
let inner ~path ~page_num ~line_num ~search_word : string t =
choice [
string "path" *> commit *> return path;
string "page_num" *> commit
>>= (fun _ ->
match page_num with
| None -> fail "page_num not available"
| Some n -> return (Fmt.str "%d" n)
);
string "line_num" *> commit
>>= (fun _ ->
match line_num with
| None -> fail "line_num not available"
| Some n -> return (Fmt.str "%d" n)
);
string "search_word" *> commit
>>= (fun _ ->
match search_word with
| None -> fail "search_word not available"
| Some s -> return (Fmt.str "'%s'" s));
]
<|>
fail "invalid placeholder"
let cmd ~path ~page_num ~line_num ~search_word : string t =
let single =
choice [
(string "{{" >>| fun _ -> Fmt.str "{");
(char '{' *> commit *>
inner ~path ~page_num ~line_num ~search_word <*
(char '}' <|> expected_char '}'));
(take_while1 (function '{' -> false | _ -> true));
]
in
many single
>>| fun l -> String.concat "" l
let spec : spec t =
sep_by (char ',')
(take_while1 (function ':' | ',' -> false | _ -> true))
>>= fun exts ->
(char ':' <|> expected_char ':')*>
(choice [
string "terminal" *> return `Terminal;
string "detached" *> return `Detached;
]
<|>
fail "invalid launch mode")
>>= fun launch_mode ->
(char '=' <|> expected_char '=') *> any_string
>>= fun cmd ->
return (exts, launch_mode, cmd)
end
module Config = struct
type t = {
quote_path : bool;
path : string;
page_num : int option;
line_num : int option;
search_word : string option;
launch_mode : launch_mode;
}
let make ?(quote_path = true) ~path ?page_num ?line_num ?search_word ~launch_mode () : t =
{
quote_path;
path;
page_num;
line_num;
search_word;
launch_mode;
}
end
let resolve_cmd (config : Config.t) (s : string) : (string, string) result =
let open Angstrom in
let { Config.quote_path; path; page_num; line_num; search_word } = config in
let path =
if quote_path then
Filename.quote path
else
path
in
match
parse_string ~consume:All (Parsers.cmd ~path ~page_num ~line_num ~search_word) s
with
| Error msg -> Error (Misc_utils.trim_angstrom_error_msg msg)
| Ok s -> Ok s
let parse_spec (s : string) : (spec, string) result =
let open Angstrom in
match
parse_string ~consume:All Parsers.spec s
with
| Error msg -> Error (Misc_utils.trim_angstrom_error_msg msg)
| Ok (exts', launch_mode, cmd) -> (
let rec aux acc exts =
match exts with
| [] -> Ok (List.rev acc, launch_mode, cmd)
| ext :: rest -> (
let ext = ext
|> String.lowercase_ascii
|> String_utils.remove_leading_dots
|> Fmt.str ".%s"
in
let config =
if ext = ".pdf" then (
Config.make
~path:"path"
~page_num:1
~search_word:"word"
~launch_mode:`Detached
()
) else (
Config.make
~path:"path"
~line_num:1
~launch_mode:`Terminal
()
)
in
match
resolve_cmd config cmd
with
| Error msg -> Error msg
| Ok _ -> aux (ext :: acc) rest
)
in
aux [] exts'
)
let xdg_open_cmd =
"xdg-open {path}"
let pandoc_supported_format_config_and_cmd ~path =
(Config.make ~path ~launch_mode:`Detached (),
xdg_open_cmd)
let fallback_cmd : string =
match Params.os_typ with
| `Linux -> xdg_open_cmd
| `Darwin -> "open {path}"
let compute_most_unique_word_and_residing_page_num ~doc_id found_phrase =
let page_nums = found_phrase
|> List.map (fun word ->
word.Search_result.found_word_pos
|> (fun pos -> Index.loc_of_pos ~doc_id pos)
|> Index.Loc.line_loc
|> Index.Line_loc.page_num
)
|> List.sort_uniq Int.compare
in
let frequency_of_word_of_page_ci : int String_map.t Int_map.t =
List.fold_left (fun acc page_num ->
let m = Misc_utils.frequencies_of_words_ci
(Index.words_of_page_num ~doc_id page_num
|> Dynarray.to_seq)
in
Int_map.add page_num m acc
)
Int_map.empty
page_nums
in
found_phrase
|> List.map (fun word ->
let page_num =
Index.loc_of_pos ~doc_id word.Search_result.found_word_pos
|> Index.Loc.line_loc
|> Index.Line_loc.page_num
in
let m = Int_map.find page_num frequency_of_word_of_page_ci in
let freq =
String_map.fold (fun word_on_page_ci freq acc_freq ->
if
CCString.find ~sub:word.Search_result.found_word_ci word_on_page_ci >= 0
then (
acc_freq + freq
) else (
acc_freq
)
)
m
0
in
(word, page_num, freq)
)
|> List.fold_left (fun best x ->
let (_x_word, _x_page_num, x_freq) = x in
match best with
| None -> Some x
| Some (_best_word, _best_page_num, best_freq) -> (
if x_freq < best_freq then
Some x
else
best
)
)
None
|> Option.get
|> (fun (word, page_num, _freq) ->
(word.found_word, page_num))
let pdf_config_and_cmd ~path ~doc_id_and_search_result : Config.t * string =
let config =
let page_num, search_word =
match doc_id_and_search_result with
| None -> (
(1, "")
)
| Some (doc_id, search_result) -> (
let found_phrase = Search_result.found_phrase search_result in
let (most_unique_word, most_unique_word_page_num) =
compute_most_unique_word_and_residing_page_num ~doc_id found_phrase
in
let page_num = most_unique_word_page_num + 1 in
(page_num, most_unique_word)
)
in
Config.make ~path ~page_num ~search_word ~launch_mode:`Detached ()
in
let cmd =
match Params.os_typ with
| `Linux -> (
match Xdg_utils.default_desktop_file_path `PDF with
| None -> fallback_cmd
| Some viewer_desktop_file_path -> (
let flatpak_package_name =
let s = Filename.basename viewer_desktop_file_path in
Option.value ~default:s
(CCString.chop_suffix ~suf:".desktop" s)
in
let viewer_desktop_file_path_lowercase_ascii =
String.lowercase_ascii viewer_desktop_file_path
in
let contains sub =
CCString.find ~sub viewer_desktop_file_path_lowercase_ascii >= 0
in
let make_command name args =
if contains "flatpak" then
Fmt.str "flatpak run %s %s" flatpak_package_name args
else
Fmt.str "%s %s" name args
in
match doc_id_and_search_result with
| None -> fallback_cmd
| Some _ -> (
if contains "okular" then
make_command "okular"
"--page {page_num} --find {search_word} {path}"
else if contains "evince" then
make_command "evince"
"--page-index {page_num} --find {search_word} {path}"
else if contains "xreader" then
make_command "xreader"
"--page-index {page_num} --find {search_word} {path}"
else if contains "atril" then
make_command "atril"
"--page-index {page_num} --find {search_word} {path}"
else if contains "zathura" then
(* Check zathura before mupdf as desktop file for
zathura might be `org.pwmt.zathura-pdf-mupdf.desktop`
*)
make_command "zathura"
"--page {page_num} --find {search_word} {path}"
else if contains "mupdf" then
make_command "mupdf" "{path} {page_num}"
else
fallback_cmd
)
)
)
| `Darwin -> fallback_cmd
in
(config, cmd)
let config_and_cmd_to_open_text_file ~path ?(line_num = 1) () : Config.t * string =
let editor = !Params.text_editor in
let config =
Config.make ~path ~line_num ~launch_mode:`Terminal ()
in
let cmd =
match Filename.basename editor with
| "nano" ->
Fmt.str "%s +{line_num} {path}" editor
| "nvim" | "vim" | "vi" ->
Fmt.str "%s +{line_num} {path}" editor
| "kak" ->
Fmt.str "%s +{line_num} {path}" editor
| "hx" ->
Fmt.str "%s {path}:{line_num}" editor
| "emacs" ->
Fmt.str "%s +{line_num} {path}" editor
| "micro" ->
Fmt.str "%s {path}:{line_num}" editor
| "jed" | "xjed" ->
Fmt.str "%s {path} -g {line_num}" editor
| _ ->
Fmt.str "%s {path}" editor
in
(config, cmd)
let text_config_and_cmd ~path ~doc_id_and_search_result : Config.t * string =
let line_num =
match doc_id_and_search_result with
| None -> None
| Some (doc_id, search_result) -> (
let first_word = List.hd @@ Search_result.found_phrase search_result in
let first_word_loc =
Index.loc_of_pos ~doc_id first_word.Search_result.found_word_pos
in
first_word_loc
|> Index.Loc.line_loc
|> Index.Line_loc.line_num_in_page
|> (fun x -> x + 1)
|> Option.some
)
in
config_and_cmd_to_open_text_file
~path
?line_num
()
let main ~close_term ~path ~doc_id_and_search_result =
let ext = File_utils.extension_of_file path in
let config, cmd =
(match File_utils.format_of_file path with
| `PDF -> (
pdf_config_and_cmd ~path ~doc_id_and_search_result
)
| `Pandoc_supported_format -> (
pandoc_supported_format_config_and_cmd ~path
)
| `Text -> (
text_config_and_cmd ~path ~doc_id_and_search_result
)
| `Other -> (
(Config.make ~path ~launch_mode:`Detached (), fallback_cmd)
)
)
|> (fun (config, cmd) ->
match Hashtbl.find_opt specs ext with
| None -> (
(config, cmd)
)
| Some (launch_mode, cmd) -> (
({ config with launch_mode }, cmd)
)
)
|> (fun (config, cmd) ->
(config, Result.get_ok (resolve_cmd config cmd)))
in
match config.launch_mode with
| `Terminal -> (
let cmd =
if Misc_utils.stdin_is_atty () then (
cmd
) else (
Fmt.str "
Printf.fprintf oc "System command: %s\n" cmd
);
Sys.command cmd |> ignore
)
| `Detached -> (
do_if_debug (fun oc ->
Printf.fprintf oc "System command: %s\n" cmd
);
Proc_utils.run_in_background cmd |> ignore
)
let find_project_root path =
let rec aux arr =
let cur = CCString.concat_seq ~sep:Filename.dir_sep (Dynarray.to_seq arr) in
if Dynarray.length arr = 0 then (
None
) else if Dynarray.length arr = 3
&& Dynarray.get arr 0 = "home"
then (
Some cur
) else (
let candidates =
try
Some (Sys.readdir cur)
with
| _ -> None
in
match candidates with
| None -> (
None
)
| Some candidates -> (
let root_indicator_exists =
Array.exists (fun name ->
List.mem name
[ ".git"
; ".hg"
; ".svn"
; ".obsidian"
; ".logseq"
; ".tangent"
]
)
candidates
in
if root_indicator_exists then (
Some cur
) else (
Dynarray.pop_last arr |> ignore;
aux arr
)
)
)
in
let arr = Dynarray.of_list (CCString.split ~by:Filename.dir_sep path) in
aux arr
let open_link ~close_term ~doc link =
let { Link.typ; link; _ } = link in
let doc_path = Document.path doc in
let doc_dir = Filename.dirname doc_path in
let doc_ext = Filename.extension doc_path in
let resolve_wiki_link link =
let link =
Option.value ~default:link
(CCString.chop_prefix ~pre:"/" link)
in
let link_with_ext = link ^ doc_ext in
if link.[0] = '.' then (
Filename.concat doc_dir link_with_ext
) else (
let wiki_root =
Option.value ~default:doc_dir (find_project_root doc_dir)
in
let candidates = File_utils.list_files_recursive
~report_progress:(fun () -> ())
~filter:(fun _depth path ->
let path_no_ext =
try
Filename.chop_extension path
with
| _ -> path
in
CCString.suffix ~suf:link path_no_ext
)
wiki_root
in
match
String_set.find_first_opt (fun path ->
CCString.suffix ~suf:link_with_ext path
) candidates
with
| Some x -> x
| None -> (
match String_set.min_elt_opt candidates with
| Some x -> x
| None -> Filename.concat wiki_root link_with_ext
)
)
in
if String.length link > 0 then (
match typ with
| `Markdown -> (
let path =
if Filename.is_relative link then (
Filename.concat doc_dir link
) else (
let project_root =
Option.value ~default:doc_dir (find_project_root doc_dir)
in
Filename.concat project_root link
)
in
main ~close_term ~path ~doc_id_and_search_result:None
)
| `Wiki -> (
let path = resolve_wiki_link link in
main ~close_term ~path ~doc_id_and_search_result:None
)
| `URL -> (
let config = Config.make ~path:link ~launch_mode:`Detached () in
resolve_cmd config fallback_cmd
|> Result.get_ok
|> Proc_utils.run_in_background
|> ignore
)
)
================================================
FILE: bin/ping.ml
================================================
type t = {
queue : unit Eio.Stream.t;
}
let make () =
{
queue = Eio.Stream.create Int.max_int;
}
let ping (t : t) =
Eio.Stream.add t.queue ()
let wait (t : t) =
Eio.Stream.take t.queue;
Misc_utils.drain_eio_stream t.queue
let clear (t : t) =
Misc_utils.drain_eio_stream t.queue
================================================
FILE: bin/ping.mli
================================================
type t
val make : unit -> t
val ping : t -> unit
val wait : t -> unit
val clear : t -> unit
================================================
FILE: bin/printers.ml
================================================
let output_image ~color (oc : out_channel) (img : Notty.image) : unit =
let open Notty in
let buf = Buffer.create 1024 in
let cap =
if color then
Cap.ansi
else
Cap.dumb
in
Render.to_buffer buf cap (0, 0) (I.width img, I.height img) img;
Buffer.output_buffer oc buf
let newline_image oc =
Notty_unix.eol (Notty.I.void 0 1)
|> output_image ~color:false oc
let path_image ~color oc path =
Notty.I.string Notty.A.(fg magenta) path
|> Notty_unix.eol
|> output_image ~color oc
let search_result_group ~color ~underline (oc : out_channel) ((document, results) : Session.search_result_group) =
let path = Document.path document in
path_image ~color oc path;
Array.iteri (fun i search_result ->
if i > 0 then (
newline_image oc
);
let img =
Content_and_search_result_rendering.search_result
~doc_id:(Document.doc_id document)
~render_mode:(UI_base.render_mode_of_document document)
~width:!Params.search_result_print_text_width
~underline
~fill_in_context:true
search_result
in
Notty_unix.eol img
|> output_image ~color oc;
) results
let search_result_groups
~color
~underline
(oc : out_channel)
(s : Session.search_result_group Seq.t)
=
Seq.iteri (fun i x ->
if i > 0 then (
newline_image oc;
);
search_result_group ~color ~underline oc x
) s
================================================
FILE: bin/proc_utils.ml
================================================
open Misc_utils
let command_exists (cmd : string) : bool =
Sys.command (Fmt.str "command -v %s 2>/dev/null 1>/dev/null" (Filename.quote cmd)) = 0
let run_in_background (cmd : string) =
Sys.command (Fmt.str "%s 2>/dev/null 1>/dev/null &" cmd)
let run_return_stdout
~proc_mgr
~fs
~(split_mode : [ `On_line_split | `On_form_feed ])
(cmd : string list)
: string list option =
let form_feed = Char.chr 0x0C in
Eio.Path.(with_open_out
~create:`Never
(fs / "/dev/null"))
(fun stderr ->
let output =
try
let lines =
Eio.Process.parse_out proc_mgr
(match split_mode with
| `On_line_split -> Eio.Buf_read.(map List.of_seq lines)
| `On_form_feed -> (
let p =
let open Eio.Buf_read in
let open Syntax in
let* c = peek_char in
(match c with
| None -> return ()
| Some c -> (
if c = form_feed then (
skip 1
) else (
return ()
)
))
*>
(take_while (fun c -> c <> form_feed))
in
Eio.Buf_read.(map List.of_seq (seq p))
)
)
~stderr cmd
in
Some lines
with
| _ -> None
in
output
)
let pipe_to_command (f : out_channel -> unit) command args =
if not (command_exists command) then (
exit_with_error_msg
(Fmt.str "command %s not found" command)
);
let oc =
Unix.open_process_args_out
command (Array.append [|command|] args)
in
f oc;
Out_channel.flush oc;
Out_channel.close oc
================================================
FILE: bin/result_syntax.ml
================================================
let ( let* ) = Result.bind
let ( let+ ) x y = Result.map y x
================================================
FILE: bin/script.ml
================================================
let run pool ~init_state ~path
: (Session.Snapshot.t Dynarray.t, string) result =
let exception Error_with_msg of string in
let snapshots = Dynarray.create () in
try
let lines =
try
CCIO.with_in path CCIO.read_lines_l
with
| Sys_error _ -> (
raise (Error_with_msg (Fmt.str "failed to read script %s" (Filename.quote path)))
)
in
Dynarray.add_last
snapshots
(Session.Snapshot.make
~last_command:None
init_state);
lines
|> CCList.foldi (fun state i line ->
let line_num_in_error_msg = i + 1 in
if String_utils.line_is_blank_or_system_comment line then (
state
) else (
match Command.of_string line with
| None -> (
raise (Error_with_msg
(Fmt.str "failed to parse command on line %d: %s"
line_num_in_error_msg line))
)
| Some command -> (
match Session.run_command pool command state with
| None -> (
raise (Error_with_msg
(Fmt.str "failed to run command on line %d: %s"
line_num_in_error_msg line))
)
| Some (command, state) -> (
let snapshot =
Session.Snapshot.make
~last_command:(Some command)
state
in
Dynarray.add_last snapshots snapshot;
state
)
)
)
) init_state
|> ignore;
Ok snapshots
with
| Error_with_msg msg -> Error msg
================================================
FILE: bin/search_mode.ml
================================================
type t = [
| `Single_line
| `Multiline
]
================================================
FILE: bin/session.ml
================================================
open Docfd_lib
type search_result_group = Document.t * Search_result.t array
module State = struct
module Sort_by = struct
type typ = [
| `Path_date
| `Path
| `Score
| `Mod_time
| `Ranking of int String_map.t
]
type t = typ * Document.Compare.order
end
type t = {
all_documents : Document.t String_map.t;
filter_exp : Filter_exp.t;
filter_exp_string : string;
documents_passing_filter : String_set.t;
documents_marked : String_set.t;
search_exp : Search_exp.t;
search_exp_string : string;
search_results : Search_result.t array String_map.t;
sort_by : Sort_by.t;
sort_by_no_score : Sort_by.t;
screen_split : Command.screen_split;
show_bottom_right_pane : bool;
show_key_binding_info_pane : bool;
focus_list : string list;
path_highlights : Int_set.t String_map.t;
}
let equal (x : t) (y : t) =
String_map.equal Document.equal x.all_documents y.all_documents
&&
String.equal x.filter_exp_string y.filter_exp_string
&&
String_set.equal x.documents_passing_filter y.documents_passing_filter
&&
String_set.equal x.documents_marked y.documents_marked
&&
String.equal x.search_exp_string y.search_exp_string
&&
String_map.equal
(Array.for_all2 Search_result.equal)
x.search_results
y.search_results
let size (t : t) =
String_map.cardinal t.all_documents
let empty : t =
{
all_documents = String_map.empty;
filter_exp = Filter_exp.empty;
filter_exp_string = "";
documents_passing_filter = String_set.empty;
documents_marked = String_set.empty;
search_exp = Search_exp.empty;
search_exp_string = "";
search_results = String_map.empty;
sort_by = Command.Sort_by.default
|> (fun (typ, order) -> ((typ :> Sort_by.typ), order));
sort_by_no_score = Command.Sort_by.default_no_score
|> (fun (typ, order) -> ((typ :> Sort_by.typ), order));
screen_split = `Even;
show_bottom_right_pane = true;
show_key_binding_info_pane = true;
focus_list = [];
path_highlights = String_map.empty;
}
let filter_exp (t : t) = t.filter_exp
let filter_exp_string (t : t) = t.filter_exp_string
let search_exp (t : t) = t.search_exp
let search_exp_string (t : t) = t.search_exp_string
let path_highlights (t : t) = t.path_highlights
let clear_path_highlights (t : t) =
{ t with path_highlights = String_map.empty }
let screen_split (t : t) = t.screen_split
let show_pane (t : t) (pane : Command.pane) =
match pane with
| `Bottom_right -> t.show_bottom_right_pane
| `Key_binding_info -> t.show_key_binding_info_pane
let refresh_search_results pool stop_signal (t : t) : t option =
let cancellation_notifier = Atomic.make false in
let updates =
Eio.Fiber.first
(fun () ->
Stop_signal.await stop_signal;
Atomic.set cancellation_notifier true;
String_map.empty)
(fun () ->
let global_first_word_candidates_lookup =
Index.generate_global_first_word_candidates_lookup
pool
t.search_exp
in
let usable_doc_ids =
let bv = CCBV.empty () in
Search_phrase.Enriched_token.Data_map.iter
(fun _word word_ids ->
Int_set.iter (fun word_id ->
Index.State.union_doc_ids_of_word_id_into_bv ~word_id ~into:bv
)
word_ids
)
global_first_word_candidates_lookup;
bv
in
let documents_to_search_through =
t.documents_passing_filter
|> String_set.to_seq
|> Seq.map (fun path -> (path, String_map.find path t.all_documents))
|> Seq.filter (fun (path, doc) ->
Option.is_none (String_map.find_opt path t.search_results)
&& CCBV.get usable_doc_ids (Int64.to_int @@ Document.doc_id doc)
)
|> List.of_seq
in
documents_to_search_through
|> Task_pool.map_list pool (fun (path, doc) ->
let within_same_line =
match Document.search_mode doc with
| `Single_line -> true
| `Multiline -> false
in
Index.make_search_job_groups
stop_signal
~cancellation_notifier
~doc_id:(Document.doc_id doc)
~doc_word_ids:(Document.word_ids doc)
~global_first_word_candidates_lookup
~within_same_line
~search_scope:(Document.search_scope doc)
t.search_exp
|> Seq.map (fun x -> (path, x))
|> List.of_seq
)
|> List.concat
|> Task_pool.map_list pool (fun (path, search_job_group) ->
let heap = Index.Search_job_group.run search_job_group in
(path, heap)
)
|> List.fold_left (fun acc (path, heap) ->
Eio.Fiber.yield ();
let heap =
String_map.find_opt path acc
|> Option.value ~default:Search_result_heap.empty
|> Search_result_heap.merge heap
in
String_map.add path heap acc
)
String_map.empty
|> String_map.map (fun v ->
Eio.Fiber.yield ();
let arr =
Search_result_heap.to_seq v
|> Array.of_seq
in
Array.sort Search_result.compare_relevance arr;
arr
)
)
in
if Atomic.get cancellation_notifier then (
None
) else (
let search_results =
String_map.union (fun _k v1 _v2 -> Some v1)
updates
t.search_results
in
Some { t with search_results }
)
let update_filter_exp
pool
stop_signal
filter_exp_string
filter_exp
(t : t)
: t option =
if Filter_exp.equal filter_exp t.filter_exp then (
Some { t with filter_exp_string }
) else (
let cancellation_notifier = Atomic.make false in
let documents_passing_filter =
Eio.Fiber.first
(fun () ->
Stop_signal.await stop_signal;
Atomic.set cancellation_notifier true;
String_set.empty
)
(fun () ->
let global_first_word_candidates_lookup =
Filter_exp.all_content_search_exps filter_exp
|> List.fold_left (fun acc search_exp ->
Index.generate_global_first_word_candidates_lookup
pool
~acc
search_exp
)
Search_phrase.Enriched_token.Data_map.empty
in
t.all_documents
|> String_map.to_seq
|> Seq.map snd
|> (fun s ->
if Filter_exp.is_empty filter_exp then (
s
) else (
Seq.filter (fun s ->
Eio.Fiber.yield ();
Document.satisfies_filter_exp
pool
stop_signal
~global_first_word_candidates_lookup
filter_exp
s
) s
)
)
|> Seq.map Document.path
|> String_set.of_seq
)
in
if Atomic.get cancellation_notifier then (
None
) else (
{ t with
filter_exp_string;
filter_exp;
documents_passing_filter;
}
|> refresh_search_results pool stop_signal
)
)
let update_search_exp pool stop_signal search_exp_string search_exp (t : t) : t option =
if Search_exp.equal search_exp t.search_exp then (
Some { t with search_exp_string }
) else (
{ t with
search_exp;
search_exp_string;
search_results = String_map.empty;
}
|> refresh_search_results pool stop_signal
)
let add_document pool (doc : Document.t) (t : t) : t =
let within_same_line =
match Document.search_mode doc with
| `Single_line -> true
| `Multiline -> false
in
let path = Document.path doc in
let documents_passing_filter =
let global_first_word_candidates_lookup =
Filter_exp.all_content_search_exps t.filter_exp
|> List.fold_left (fun acc search_exp ->
Index.generate_global_first_word_candidates_lookup
pool
~acc
search_exp
)
Search_phrase.Enriched_token.Data_map.empty
in
if Document.satisfies_filter_exp pool (Stop_signal.make ()) ~global_first_word_candidates_lookup t.filter_exp doc
then
String_set.add path t.documents_passing_filter
else
t.documents_passing_filter
in
let search_results =
let global_first_word_candidates_lookup =
Index.generate_global_first_word_candidates_lookup
pool
t.search_exp
in
String_map.add
path
(Index.search
pool
(Stop_signal.make ())
~doc_id:(Document.doc_id doc)
~doc_word_ids:(Document.word_ids doc)
~global_first_word_candidates_lookup
~within_same_line
~search_scope:(Document.search_scope doc)
t.search_exp
|> Option.get
)
t.search_results
in
{ t with
all_documents =
String_map.add
path
doc
t.all_documents;
documents_passing_filter;
search_results;
}
let of_seq pool (s : Document.t Seq.t) =
Seq.fold_left (fun t doc ->
add_document pool doc t
)
empty
s
module Compare_search_result_group = struct
let mod_time order (d0, _s0) (d1, _s1) =
Document.Compare.mod_time order d0 d1
let path_date order (d0, _s0) (d1, _s1) =
Document.Compare.path_date order d0 d1
let path order (d0, _s0) (d1, _s1) =
Document.Compare.path order d0 d1
let ranking ranking order (d0, _s0) (d1, _s1) =
match
String_map.find_opt (Document.path d0) ranking,
String_map.find_opt (Document.path d1) ranking
with
| None, None -> Document.Compare.path order d0 d1
| None, Some _ -> (
(* Always shuffle document with no ranking to the back. *)
1
)
| Some _, None -> (
(* Always shuffle document with no ranking to the back. *)
-1
)
| Some x0, Some x1 -> (
match order with
| `Asc -> Int.compare x0 x1
| `Desc -> Int.compare x1 x0
)
let score order (_d0, s0) (_d1, s1) =
assert (Array.length s0 > 0);
assert (Array.length s1 > 0);
(* Search_result.compare_relevance puts the more relevant
result to the front, so we flip the comparison here to
obtain an ordering of "lowest score" first to match the
usual definition of "sort by score in ascending order".
*)
match order with
| `Asc -> Search_result.compare_relevance s1.(0) s0.(0)
| `Desc -> Search_result.compare_relevance s0.(0) s1.(0)
end
let search_result_groups
(t : t)
: (Document.t * Search_result.t array) array =
let no_search_exp = Search_exp.is_empty t.search_exp in
let arr =
t.documents_passing_filter
|> String_set.to_seq
|> Seq.map (fun path ->
(path, String_map.find path t.all_documents)
)
|> (fun s ->
if no_search_exp then (
Seq.map (fun (_path, doc) -> (doc, [||])) s
) else (
Seq.filter_map (fun (path, doc) ->
match String_map.find_opt path t.search_results with
| None -> None
| Some search_results -> (
if Array.length search_results = 0 then
None
else
Some (doc, search_results)
)
) s
)
)
|> Array.of_seq
in
let rec f (sort_by_typ, sort_by_order) =
match sort_by_typ with
| `Path_date -> Compare_search_result_group.path_date sort_by_order
| `Mod_time -> Compare_search_result_group.mod_time sort_by_order
| `Path -> Compare_search_result_group.path sort_by_order
| `Score -> (
if no_search_exp then (
f t.sort_by_no_score
) else (
Compare_search_result_group.score sort_by_order
)
)
| `Ranking ranking -> (
Compare_search_result_group.ranking ranking sort_by_order
)
in
Array.sort (f t.sort_by) arr;
let focus_ranking =
List.rev t.focus_list
|> CCList.foldi (fun ranking i x ->
String_map.add x i ranking) String_map.empty
in
Array.stable_sort (fun (d0, _) (d1, _) ->
match
String_map.find_opt (Document.path d0) focus_ranking,
String_map.find_opt (Document.path d1) focus_ranking
with
| Some x0, Some x1 -> Int.compare x0 x1
| Some _, None -> -1
| None, Some _ -> 1
| None, None -> 0
) arr;
arr
let usable_document_paths (t : t) : String_set.t =
search_result_groups t
|> Array.to_seq
|> Seq.map (fun (doc, _) -> Document.path doc)
|> String_set.of_seq
let marked_document_paths (t : t) =
t.documents_marked
let all_document_paths (t : t) : string Seq.t =
t.all_documents
|> String_map.to_seq
|> Seq.map fst
let unusable_documents (t : t) =
let s = usable_document_paths t in
t.all_documents
|> String_map.to_seq
|> Seq.filter (fun (path, _doc) ->
not (String_set.mem path s))
|> Seq.map snd
let unusable_document_paths (t : t) =
unusable_documents t
|> Seq.map Document.path
let mark
(choice :
[ `Path of string
| `Usable
| `Unusable ])
t
: t =
match choice with
| `Path path -> (
match String_map.find_opt path t.all_documents with
| None -> t
| Some _ -> (
let documents_marked =
String_set.add path t.documents_marked
in
{ t with documents_marked }
)
)
| `Usable -> (
let documents_marked =
String_set.union
t.documents_marked
(usable_document_paths t)
in
{ t with documents_marked }
)
| `Unusable -> (
let documents_marked =
Seq.fold_left
(fun acc x -> String_set.add x acc)
t.documents_marked
(unusable_document_paths t)
in
{ t with documents_marked }
)
let unmark
(choice :
[ `Path of string
| `Usable
| `Unusable
| `All ])
t
: t =
match choice with
| `Path path -> (
let documents_marked =
String_set.remove path t.documents_marked
in
{ t with documents_marked }
)
| `Usable -> (
let documents_marked =
String_set.diff
t.documents_marked
(usable_document_paths t)
in
{ t with documents_marked }
)
| `Unusable -> (
let documents_marked =
Seq.fold_left
(fun acc x -> String_set.remove x acc)
t.documents_marked
(unusable_document_paths t)
in
{ t with documents_marked }
)
| `All -> (
{ t with documents_marked = String_set.empty }
)
let drop
(choice :
[ `Path of string
| `All_except of string
| `Marked
| `Unmarked
| `Usable
| `Unusable ])
(t : t)
: t =
let aux ~(keep : string -> bool) =
let keep' : 'a. string -> 'a -> bool =
fun path _ ->
keep path
in
{ all_documents = String_map.filter keep' t.all_documents;
filter_exp = t.filter_exp;
filter_exp_string = t.filter_exp_string;
documents_passing_filter = String_set.filter keep t.documents_passing_filter;
documents_marked = String_set.filter keep t.documents_marked;
search_exp = t.search_exp;
search_exp_string = t.search_exp_string;
search_results = String_map.filter keep' t.search_results;
sort_by = t.sort_by;
sort_by_no_score = t.sort_by_no_score;
screen_split = t.screen_split;
show_bottom_right_pane = t.show_bottom_right_pane;
show_key_binding_info_pane = t.show_key_binding_info_pane;
focus_list = t.focus_list;
path_highlights = t.path_highlights;
}
in
match choice with
| `Path path -> (
{ all_documents = String_map.remove path t.all_documents;
filter_exp = t.filter_exp;
filter_exp_string = t.filter_exp_string;
documents_passing_filter = String_set.remove path t.documents_passing_filter;
documents_marked = String_set.remove path t.documents_marked;
search_exp = t.search_exp;
search_exp_string = t.search_exp_string;
search_results = String_map.remove path t.search_results;
sort_by = t.sort_by;
sort_by_no_score = t.sort_by_no_score;
screen_split = t.screen_split;
show_bottom_right_pane = t.show_bottom_right_pane;
show_key_binding_info_pane = t.show_key_binding_info_pane;
focus_list = t.focus_list;
path_highlights = t.path_highlights;
}
)
| `All_except path -> (
let keep path' =
String.equal path' path
in
aux ~keep
)
| `Marked -> (
let keep path =
not (String_set.mem path t.documents_marked)
in
aux ~keep
)
| `Unmarked -> (
let keep path =
String_set.mem path t.documents_marked
in
aux ~keep
)
| `Usable | `Unusable -> (
let usable_document_paths =
usable_document_paths t
in
let document_is_usable path =
String_set.mem path usable_document_paths
in
let keep path =
match choice with
| `Usable -> not (document_is_usable path)
| `Unusable -> document_is_usable path
| _ -> failwith "unexpected case"
in
aux ~keep
)
let update_path_fuzzy_ranking stop_signal exp (t : t) : t option =
let cancellation_notifier = Atomic.make false in
let ranking, path_highlights =
Eio.Fiber.first
(fun () ->
Stop_signal.await stop_signal;
Atomic.set cancellation_notifier true;
(String_map.empty, String_map.empty)
)
(fun () ->
let l =
usable_document_paths t
|> String_set.to_seq
|> Seq.map File_utils.remove_cwd_from_path
|> Misc_utils.fuzzy_rank_assoc
(Stop_signal.make ())
~get_key:Fun.id
exp
|> Dynarray.to_list
|> List.map (fun (path, x) ->
(Misc_utils.normalize_path_to_absolute path, x))
in
let ranking =
List.map fst l
|> Misc_utils.ranking_of_ranked_document_list
in
let highlights =
List.fold_left (fun acc (path, search_result) ->
String_map.add
path
(Misc_utils.highlights_of_search_result search_result)
acc
)
String_map.empty
l
in
(ranking, highlights)
)
in
if Atomic.get cancellation_notifier then (
None
) else (
let sort_by = (`Ranking ranking, `Asc) in
Some {
t with sort_by;
sort_by_no_score = sort_by;
path_highlights;
}
)
let narrow_search_scope_to_level ~level (t : t) : t =
let all_documents =
if level = 0 then (
String_map.mapi (fun _path doc ->
Document.reset_search_scope_to_full doc
)
t.all_documents
) else (
String_map.mapi (fun path doc ->
let doc_id = Document.doc_id doc in
let search_scope =
match String_map.find_opt path t.search_results with
| None -> Diet.Int.empty
| Some search_results -> (
if String_set.mem path t.documents_passing_filter then (
Array.fold_left (fun scope search_result ->
let s, e =
List.fold_left (fun s_e Search_result.{ found_word_pos; _ } ->
match s_e with
| None -> Some (found_word_pos, found_word_pos)
| Some (s, e) -> (
Some (min s found_word_pos, max found_word_pos e)
)
)
None
(Search_result.found_phrase search_result)
|> Option.get
in
let offset = level * !Params.tokens_per_search_scope_level in
let s, e =
(max 0 (s - offset), min (Index.max_pos ~doc_id) (e + offset))
in
Diet.Int.add
(Diet.Int.Interval.make s e)
scope
)
Diet.Int.empty
search_results
) else (
Diet.Int.empty
)
)
in
Document.inter_search_scope
search_scope
doc
)
t.all_documents
)
in
{ t with all_documents }
end
let run_command pool (command : Command.t) (st : State.t) : (Command.t * State.t) option =
let open State in
let reset_focus_list st = { st with focus_list = [] } in
match command with
| `Mark path -> (
Some (command, mark (`Path path) st)
)
| `Mark_listed -> (
Some (command, mark `Usable st)
)
| `Unmark path -> (
Some (command, unmark (`Path path) st)
)
| `Unmark_listed -> (
Some (command, unmark `Usable st)
)
| `Unmark_all -> (
Some (command, unmark `All st)
)
| `Drop s -> (
Some (command, drop (`Path s) st)
)
| `Drop_all_except s -> (
Some (command, drop (`All_except s) st)
)
| `Drop_marked -> (
Some (command, drop `Marked st)
)
| `Drop_unmarked -> (
Some (command, drop `Unmarked st)
)
| `Drop_listed -> (
Some (command, drop `Usable st)
)
| `Drop_unlisted -> (
Some (command, drop `Unusable st)
)
| `Narrow_level level -> (
Some (command, narrow_search_scope_to_level ~level st)
)
| `Focus path -> (
let focus_list = path :: st.focus_list in
Some (command, { st with focus_list })
)
| `Sort (sort_by, sort_by_no_score) -> (
let st = reset_focus_list st in
let sort_by =
sort_by
|> (fun (typ, order) -> ((typ :> Sort_by.typ), order))
in
let sort_by_no_score =
sort_by_no_score
|> (fun (typ, order) -> ((typ :> Sort_by.typ), order))
in
Some (command, { st with sort_by; sort_by_no_score })
)
| `Path_fuzzy_rank (s, ranking) -> (
let st = reset_focus_list st in
match Search_exp.parse s with
| None -> None
| Some exp -> (
update_path_fuzzy_ranking
(Stop_signal.make ())
exp
st
|> Option.map (fun state ->
let command = `Path_fuzzy_rank (s, ranking) in
(command, state)
)
)
)
| `Split_screen screen_split -> (
Some (command, { st with screen_split })
)
| `Hide_pane pane -> (
let st =
match pane with
| `Bottom_right -> { st with show_bottom_right_pane = false }
| `Key_binding_info -> { st with show_key_binding_info_pane = false }
in
Some (command, st)
)
| `Show_pane pane -> (
let st =
match pane with
| `Bottom_right -> { st with show_bottom_right_pane = true }
| `Key_binding_info -> { st with show_key_binding_info_pane = true }
in
Some (command, st)
)
| `Comment _ -> (
Some (command, st)
)
| `Search s -> (
match Search_exp.parse s with
| None -> None
| Some search_exp -> (
update_search_exp
pool
(Stop_signal.make ())
s
search_exp
st
|> Option.map (fun state -> (command, state))
)
)
| `Filter s -> (
match Filter_exp.parse s with
| None -> None
| Some exp -> (
update_filter_exp
pool
(Stop_signal.make ())
s
exp
st
|> Option.map (fun state -> (command, state))
)
)
module Snapshot = struct
let counter = ref 0
type t = {
last_command : Command.t option;
state : State.t;
committed : bool;
id : int;
}
let committed t = t.committed
let last_command t = t.last_command
let state t = t.state
let id t = t.id
let equal_id x y =
id x = id y
let make ?(committed = true) ~last_command state : t =
let id = !counter in
counter := id + 1;
{ last_command; state; id; committed }
let make_empty ?committed () =
make ?committed ~last_command:None State.empty
let update_state state t =
{ t with state }
end
================================================
FILE: bin/session.mli
================================================
open Docfd_lib
type search_result_group = Document.t * Search_result.t array
module State : sig
type t
val equal : t -> t -> bool
val size : t -> int
val empty : t
val update_filter_exp :
Task_pool.t ->
Stop_signal.t ->
string ->
Filter_exp.t ->
t ->
t option
val update_search_exp :
Task_pool.t ->
Stop_signal.t ->
string ->
Search_exp.t ->
t ->
t option
val filter_exp : t -> Filter_exp.t
val filter_exp_string : t -> string
val search_exp : t -> Search_exp.t
val search_exp_string : t -> string
val path_highlights : t -> Int_set.t String_map.t
val clear_path_highlights : t -> t
val add_document : Task_pool.t -> Document.t -> t -> t
val of_seq : Task_pool.t -> Document.t Seq.t -> t
val search_result_groups : t -> search_result_group array
val usable_document_paths : t -> String_set.t
val unusable_documents : t -> Document.t Seq.t
val unusable_document_paths : t -> string Seq.t
val all_document_paths : t -> string Seq.t
val marked_document_paths : t -> String_set.t
val mark : [ `Path of string | `Usable | `Unusable ] -> t -> t
val unmark : [ `Path of string | `Usable | `Unusable | `All ] -> t -> t
val drop : [ `Path of string | `All_except of string | `Marked | `Unmarked | `Usable | `Unusable ] -> t -> t
val update_path_fuzzy_ranking : Stop_signal.t -> Search_exp.t -> t -> t option
val narrow_search_scope_to_level : level:int -> t -> t
val screen_split : t -> Command.screen_split
val show_pane : t -> Command.pane -> bool
end
val run_command : Task_pool.t -> Command.t -> State.t -> (Command.t * State.t) option
module Snapshot : sig
type t
val committed : t -> bool
val last_command : t -> Command.t option
val state : t -> State.t
val id : t -> int
val equal_id : t -> t -> bool
val make : ?committed:bool -> last_command:Command.t option -> State.t -> t
val make_empty : ?committed:bool -> unit -> t
val update_state : State.t -> t -> t
end
================================================
FILE: bin/session_manager.ml
================================================
open Docfd_lib
let last_request_timestamp : Mtime.t Atomic.t =
Atomic.make (Mtime_clock.now ())
let search_request : (bool * string) Lock_protected_cell.t =
Lock_protected_cell.make ()
let filter_request : (bool * string) Lock_protected_cell.t =
Lock_protected_cell.make ()
let version_shift_request : int Lock_protected_cell.t =
Lock_protected_cell.make ()
let path_fuzzy_rank_request : (bool * string) Lock_protected_cell.t =
Lock_protected_cell.make ()
let worker_ping : Ping.t = Ping.make ()
let _requester_lock = Eio.Mutex.create ()
let lock_as_requester : type a. (unit -> a) -> a =
fun f ->
Eio.Mutex.use_rw ~protect:false _requester_lock f
(* let requester_ping : Ping.t = Ping.make () *)
type egress_payload =
| Search_exp_parse_error
| Searching
| Search_cancelled
| Search_done of int * Session.Snapshot.t
| Filter_parse_error
| Filtering
| Filtering_cancelled
| Filtering_done of int * Session.Snapshot.t
| Path_fuzzy_rank_done of int * Session.Snapshot.t * bool
let egress : egress_payload Eio.Stream.t =
Eio.Stream.create 0
let egress_ack : unit Eio.Stream.t =
Eio.Stream.create 0
let stop_filter_signal = Atomic.make (Stop_signal.make ())
let stop_search_signal = Atomic.make (Stop_signal.make ())
let stop_path_fuzzy_rank_signal = Atomic.make (Stop_signal.make ())
let stop_filter () =
let x = Atomic.exchange stop_filter_signal (Stop_signal.make ()) in
Stop_signal.broadcast x
let stop_search () =
let x = Atomic.exchange stop_search_signal (Stop_signal.make ()) in
Stop_signal.broadcast x
let stop_path_fuzzy_rank () =
let x = Atomic.exchange stop_path_fuzzy_rank_signal (Stop_signal.make ()) in
Stop_signal.broadcast x
let _worker_state_lock = Eio.Mutex.create ()
let lock_worker_state : type a. (unit -> a) -> a =
fun f ->
Eio.Mutex.use_rw ~protect:false _worker_state_lock f
let init_state : Session.State.t ref = ref Session.State.empty
let snapshots =
let arr = Dynarray.create () in
Dynarray.add_last arr (Session.Snapshot.make_empty ());
arr
let cur_ver = ref 0
let cur_snapshot_var = Lwd.var (0, Session.Snapshot.make_empty ())
let cur_snapshot = Lwd.get cur_snapshot_var
type view = {
init_state : Session.State.t;
snapshots : Session.Snapshot.t Dynarray.t;
cur_ver : int;
}
let sync_input_fields_from_snapshot
(x : Session.Snapshot.t)
=
let state = Session.Snapshot.state x in
Session.State.filter_exp_string state
|> (fun s ->
Lwd.set UI_base.Vars.filter_field (s, String.length s));
Session.State.search_exp_string state
|> (fun s ->
Lwd.set UI_base.Vars.search_field (s, String.length s))
let lock_for_external_editing ~clean_up f =
(* This blocks further requests from being made. *)
lock_as_requester (fun () ->
(* We try to get worker to finish ASAP. *)
stop_filter ();
stop_search ();
(* Locking worker also locks the manager, as egress_ack forces
lock-step progression of the system.
*)
lock_worker_state (fun () ->
(* Clear any outstanding requests. *)
Lock_protected_cell.unset filter_request;
Lock_protected_cell.unset search_request;
let result = f () in
if clean_up then (
Lwd.set UI_base.Vars.search_ui_status `Idle;
Lwd.set UI_base.Vars.filter_ui_status `Idle;
let snapshot = Dynarray.get snapshots !cur_ver in
Lwd.set cur_snapshot_var (!cur_ver, snapshot);
sync_input_fields_from_snapshot snapshot;
);
result
)
)
let lock_with_view : type a. (view -> a) -> a =
fun f ->
lock_for_external_editing ~clean_up:false (fun () ->
f
{
init_state = !init_state;
snapshots = Dynarray.copy snapshots;
cur_ver = !cur_ver;
}
)
let update_starting_state (starting_state : Session.State.t) =
lock_for_external_editing ~clean_up:true (fun () ->
let pool = UI_base.task_pool () in
init_state := starting_state;
let starting_snapshot =
Session.Snapshot.make
~last_command:None
starting_state
in
Dynarray.set snapshots 0 starting_snapshot;
for i=1 to Dynarray.length snapshots - 1 do
let prev = Dynarray.get snapshots (i - 1) in
let prev_state = Session.Snapshot.state prev in
let cur = Dynarray.get snapshots i in
let state =
match Session.Snapshot.last_command cur with
| None -> prev_state
| Some command ->
Session.run_command pool command prev_state
|> Option.map snd
|> Option.value ~default:prev_state
in
Dynarray.set
snapshots
i
(Session.Snapshot.update_state state cur)
done;
cur_ver := (Dynarray.length snapshots - 1);
)
let load_snapshots snapshots' =
assert (Dynarray.length snapshots' > 0);
lock_for_external_editing ~clean_up:true (fun () ->
assert
(Session.State.equal
(Session.Snapshot.state @@ Dynarray.get snapshots' 0)
!init_state);
Dynarray.clear snapshots;
Dynarray.append snapshots snapshots';
cur_ver := (Dynarray.length snapshots - 1);
)
let stop_filter_and_search_and_restore_input_fields () =
lock_for_external_editing ~clean_up:true (fun () ->
()
)
let shift_ver ~offset =
lock_for_external_editing ~clean_up:true (fun () ->
let new_ver = !cur_ver + offset in
if 0 <= new_ver && new_ver < Dynarray.length snapshots then (
cur_ver := new_ver;
)
)
let update_from_cur_snapshot f =
lock_for_external_editing ~clean_up:true (fun () ->
Dynarray.truncate snapshots (!cur_ver + 1);
let next_snapshot = f (Dynarray.get_last snapshots) in
Dynarray.add_last snapshots next_snapshot;
cur_ver := Dynarray.length snapshots - 1;
)
let manager_fiber () =
(* This fiber handles updates of Lwd.var which are not thread-safe,
and thus cannot be done by worker_fiber directly
*)
let update_snapshot ?(reset_document_selected = true) ver snapshot =
if reset_document_selected then (
UI_base.reset_document_selected ();
);
Lwd.set cur_snapshot_var (ver, snapshot);
in
while true do
let payload = Eio.Stream.take egress in
(match payload with
| Search_exp_parse_error -> (
Lwd.set UI_base.Vars.search_ui_status `Parse_error
)
| Searching -> (
Lwd.set UI_base.Vars.search_ui_status `Searching
)
| Filtering -> (
Lwd.set UI_base.Vars.filter_ui_status `Filtering
)
| Search_cancelled -> (
)
| Search_done (ver, snapshot) -> (
update_snapshot ver snapshot;
Lwd.set UI_base.Vars.search_ui_status `Idle
)
| Filter_parse_error -> (
Lwd.set UI_base.Vars.filter_ui_status `Parse_error
)
| Filtering_cancelled -> (
)
| Filtering_done (ver, snapshot) -> (
update_snapshot ver snapshot;
Lwd.set UI_base.Vars.filter_ui_status `Idle
)
| Path_fuzzy_rank_done (ver, snapshot, commit) -> (
let snapshot =
if commit then (
let state =
Session.Snapshot.state snapshot
|> Session.State.clear_path_highlights
in
Session.Snapshot.update_state state snapshot
) else (
snapshot
)
in
update_snapshot ~reset_document_selected:false ver snapshot;
)
);
Eio.Stream.add egress_ack ();
done
let worker_fiber pool =
(* This fiber runs in a background domain to allow the UI code in the main
domain to immediately continue running after key presses that trigger
searches or search cancellations.
This removes the need to make the code of session module always yield
frequently.
*)
let get_cur_snapshot () =
Dynarray.get snapshots !cur_ver
in
let add_snapshot
?(overwrite_if_last_snapshot_satisfies = fun _ -> false)
snapshot
=
Dynarray.truncate snapshots (!cur_ver + 1);
let last_snapshot = Dynarray.get_last snapshots in
if !cur_ver > 0 && overwrite_if_last_snapshot_satisfies last_snapshot then (
Dynarray.set snapshots !cur_ver snapshot;
) else (
Dynarray.add_last snapshots snapshot;
incr cur_ver;
);
in
let send_to_manager x =
Eio.Stream.add egress x;
Eio.Stream.take egress_ack;
in
let cancelled_search_request : (bool * string) option ref = ref None in
let process_search_req stop_signal ~commit (s : string) =
cancelled_search_request := None;
match Search_exp.parse s with
| None -> (
send_to_manager Search_exp_parse_error
)
| Some search_exp -> (
send_to_manager Searching;
let state =
get_cur_snapshot ()
|> Session.Snapshot.state
|> Session.State.update_search_exp
pool
stop_signal
s
search_exp
in
match state with
| None -> (
send_to_manager Search_cancelled;
cancelled_search_request := Some (commit, s);
)
| Some state -> (
let command = Some (`Search s) in
let snapshot =
Session.Snapshot.make
~committed:commit
~last_command:command
state
in
add_snapshot
~overwrite_if_last_snapshot_satisfies:(fun snapshot ->
match Session.Snapshot.last_command snapshot with
| Some (`Search s') -> (
not (Session.Snapshot.committed snapshot)
||
s' = s
)
| _ -> false
)
snapshot;
send_to_manager (Search_done (!cur_ver, snapshot))
)
)
in
let process_filter_req stop_signal ~commit (s : string) =
match Filter_exp.parse s with
| Some filter_exp -> (
send_to_manager Filtering;
let state =
get_cur_snapshot ()
|> Session.Snapshot.state
|> Session.State.update_filter_exp
pool
stop_signal
s
filter_exp
in
match state with
| None -> (
send_to_manager Filtering_cancelled
)
| Some state -> (
let command = Some (`Filter s) in
let snapshot =
Session.Snapshot.make
~committed:commit
~last_command:command
state
in
add_snapshot
~overwrite_if_last_snapshot_satisfies:(fun snapshot ->
match Session.Snapshot.last_command snapshot with
| Some (`Filter s') -> (
not (Session.Snapshot.committed snapshot)
||
s' = s
)
| _ -> false
)
snapshot;
send_to_manager (Filtering_done (!cur_ver, snapshot))
)
)
| None -> (
send_to_manager Filter_parse_error
)
in
let process_path_fuzzy_rank_req stop_signal ~commit (s : string) =
match Search_exp.parse s with
| None -> ()
| Some exp -> (
let state =
get_cur_snapshot ()
|> Session.Snapshot.state
|> Session.State.update_path_fuzzy_ranking
stop_signal
exp
in
match state with
| None -> (
)
| Some state -> (
let command = Some (`Path_fuzzy_rank (s, None)) in
let snapshot =
Session.Snapshot.make
~committed:commit
~last_command:command
state
in
add_snapshot
~overwrite_if_last_snapshot_satisfies:(fun snapshot ->
match Session.Snapshot.last_command snapshot with
| Some (`Path_fuzzy_rank (s', _)) -> (
not (Session.Snapshot.committed snapshot)
||
s' = s
)
| _ -> false
)
snapshot;
send_to_manager (Path_fuzzy_rank_done (!cur_ver, snapshot, commit))
)
)
in
while true do
Ping.wait worker_ping;
let time_since_last_request =
Mtime.span
(Atomic.get last_request_timestamp)
(Mtime_clock.now ())
in
if
Mtime.Span.is_shorter
time_since_last_request
~than:Params.session_manager_request_debounce_interval
then (
let clock = Eio.Stdenv.mono_clock (UI_base.eio_env ()) in
let sleep_duration_s =
Mtime.Span.abs_diff
time_since_last_request
Params.session_manager_request_debounce_interval
|> Mtime.Span.add Params.session_manager_request_debounce_wait_buffer
|> Mtime.Span.to_float_ns
|> (fun x -> x /. 1_000_000_000.0)
in
Eio.Time.Mono.sleep clock sleep_duration_s;
Ping.ping worker_ping
) else (
lock_worker_state (fun () ->
(match Lock_protected_cell.get filter_request with
| None -> ()
| Some (commit, s) -> (
process_filter_req (Atomic.get stop_filter_signal) ~commit s
)
);
(match Lock_protected_cell.get search_request with
| None -> !cancelled_search_request
| Some (commit, s) -> Some (commit, s)
)
|> Option.iter (fun (commit, s) ->
process_search_req (Atomic.get stop_search_signal) ~commit s
);
Lock_protected_cell.get path_fuzzy_rank_request
|> Option.iter (fun (commit, s) ->
process_path_fuzzy_rank_req (Atomic.get stop_path_fuzzy_rank_signal) ~commit s
);
)
)
done
let submit_filter_req ~commit (s : string) =
lock_as_requester (fun () ->
stop_filter ();
stop_search ();
Lock_protected_cell.set filter_request (commit, s);
Ping.ping worker_ping
)
let submit_search_req ~commit (s : string) =
lock_as_requester (fun () ->
stop_search ();
Lock_protected_cell.set search_request (commit, s);
Ping.ping worker_ping
)
let submit_path_fuzzy_rank_req ~commit (s : string) =
lock_as_requester (fun () ->
stop_path_fuzzy_rank ();
Lock_protected_cell.set path_fuzzy_rank_request (commit, s);
Ping.ping worker_ping
)
================================================
FILE: bin/session_manager.mli
================================================
open Docfd_lib
val manager_fiber : unit -> unit
val worker_fiber : Task_pool.t -> unit
val cur_snapshot : (int * Session.Snapshot.t) Lwd.t
type view = {
init_state : Session.State.t;
snapshots : Session.Snapshot.t Dynarray.t;
cur_ver : int;
}
val lock_with_view : (view -> 'a) -> 'a
val update_starting_state : Session.State.t -> unit
val load_snapshots : Session.Snapshot.t Dynarray.t -> unit
val shift_ver : offset:int -> unit
val update_from_cur_snapshot : (Session.Snapshot.t -> Session.Snapshot.t) -> unit
val submit_filter_req : commit:bool -> string -> unit
val submit_search_req : commit:bool -> string -> unit
val submit_path_fuzzy_rank_req : commit:bool -> string -> unit
val stop_filter_and_search_and_restore_input_fields : unit -> unit
================================================
FILE: bin/string_utils.ml
================================================
let remove_leading_dots (s : string) =
let str_len = String.length s in
if str_len = 0 then (
""
) else (
let rec aux pos =
if pos < str_len then (
if String.get s pos = '.' then
aux (pos + 1)
else (
String.sub s pos (str_len - pos)
)
) else (
""
)
in
aux 0
)
let line_is_system_comment line =
CCString.starts_with ~prefix:";" line
let line_is_blank_or_system_comment line =
line_is_system_comment line
||
String.length (String.trim line) = 0
let longest_common_prefix (seq : string Seq.t) : string =
let prefix = ref "" in
Seq.iteri (fun i s ->
if i = 0 then (
prefix := s
) else (
let match_len = ref 0 in
let prefix_len = String.length !prefix in
String.iteri (fun i c ->
if !match_len = i
&& i < prefix_len
&& !prefix.[i] = c then (
incr match_len
)
) s;
prefix :=
String.sub !prefix 0 (min !match_len prefix_len)
)
) seq;
!prefix
================================================
FILE: bin/version_string.ml
================================================
let s = "13.0.0"
================================================
FILE: bin/xdg_utils.ml
================================================
let all_desktop_files () : string Seq.t =
match Sys.getenv_opt "XDG_DATA_DIRS" with
| None -> Seq.empty
| Some s -> (
String.split_on_char ':' s
|> List.to_seq
|> Seq.flat_map (fun dir ->
let dir = Filename.concat dir "applications" in
try
Sys.readdir dir
|> Array.to_seq
|> Seq.map (Filename.concat dir)
with
| _ -> Seq.empty
)
)
let path_of_desktop_file file =
let rec aux paths =
match paths () with
| Seq.Nil -> None
| Seq.Cons (path, rest) -> (
if String.equal file (Filename.basename path) then (
Some path
) else (
aux rest
)
)
in
aux (all_desktop_files ())
let default_desktop_file_path (typ : [ `PDF ]) =
let mime_typ =
match typ with
| `PDF -> "application/pdf"
(* | `ODT -> "application/vnd.oasis.opendocument.text"
| `DOCX -> "application/vnd.openxmlformats-officedocument.wordprocessingml.document" *)
in
let (stdout, _, ret) = CCUnix.call "xdg-mime query default %s" mime_typ in
if ret = 0 then (
path_of_desktop_file (CCString.trim stdout)
) else (
None
)
let data_home =
let home_dir =
match Sys.getenv_opt "HOME" with
| None -> (
Misc_utils.exit_with_error_msg "environment variable HOME is not set";
)
| Some home -> home
in
match Params.os_typ with
| `Linux -> (
match Sys.getenv_opt "XDG_DATA_HOME" with
| None -> Filename.concat home_dir ".local/share"
| Some x -> x
)
| `Darwin -> (
Filename.concat home_dir
(Filename.concat "Library" "Application Support")
)
let cache_home =
let home_dir =
match Sys.getenv_opt "HOME" with
| None -> (
Misc_utils.exit_with_error_msg "environment variable HOME is not set";
)
| Some home -> home
in
match Params.os_typ with
| `Linux -> (
match Sys.getenv_opt "XDG_CACHE_HOME" with
| None -> Filename.concat home_dir ".cache"
| Some x -> x
)
| `Darwin -> (
Filename.concat home_dir
(Filename.concat "Library" "Caches")
)
================================================
FILE: containers/Containerfile.demo-vhs
================================================
FROM ghcr.io/charmbracelet/vhs
RUN apt-get update
RUN apt-get install -y poppler-utils
RUN apt-get install -y neovim
================================================
FILE: containers/Containerfile.docfd
================================================
FROM docker.io/alpine:3.22
USER root
RUN apk add linux-headers
RUN apk add poppler-utils
RUN apk add sqlite sqlite-libs sqlite-dev sqlite-static
RUN apk add sqlite-analyzer
RUN apk add make
RUN apk add clang
RUN apk add opam
RUN apk add git
RUN apk add python3
# RUN ln -s $(which opam-2.2) /usr/local/bin/opam
RUN opam --version
RUN apk add bash
RUN opam init --disable-sandboxing
RUN opam install --yes dune
RUN opam install --yes utop ocp-indent
COPY . /root/docfd
# RUN chown -R root:root /home/opam/docfd
WORKDIR /root/docfd
RUN eval $(opam env) && dune build docfd.opam
RUN opam install --yes . --deps-only --with-test
RUN echo 'eval $(opam env)' >> /root/.bash_profile
================================================
FILE: demo-vhs-tapes/repo-non-interactive.tape
================================================
Output demo-vhs-gifs/repo-non-interactive.gif
Set Padding 0
Set Framerate 10
Set Width 1366
Set Height 768
Set FontSize 15
Set TypingSpeed 100ms
Type@50ms 'docfd README.md --sample "interactive grep across lines"'
Enter
Sleep 4s
================================================
FILE: demo-vhs-tapes/repo.tape
================================================
Output demo-vhs-gifs/repo.gif
Set Padding 0
Set Framerate 10
Set Width 1366
Set Height 768
Set FontSize 15
Set TypingSpeed 100ms
Type@50ms "docfd *.md"
Sleep 1s
Enter
Sleep 1s
Type "/"
Type "fuzz search"
Enter
Sleep 1s
Type "f"
Type "path-fuzzy:readme"
Enter
Sleep 1s
Enter
Sleep 1s
Type "zz"
Sleep 1s
Type "O"
Type@50ms "Docfd opens the editor to where the search result is when we hit Enter."
Escape
Sleep 2s
Type@200ms ":q!"
Enter
Sleep 4s
================================================
FILE: demo-vhs-tapes/ui-screenshot.tape
================================================
Output dummy.gif
Set Padding 0
Set Framerate 10
Set Width 1366
Set Height 768
Set FontSize 15
Set TypingSpeed 100ms
Type@10ms "docfd *.md"
Enter
Sleep 1s
Screenshot screenshots/ui0.png
Sleep 1s
================================================
FILE: demo-vhs.sh
================================================
#!/usr/bin/env bash
podman run --rm -v $PWD:/vhs \
--env 'VISUAL=nvim' \
-v $PWD/release/docfd:/usr/bin/docfd \
localhost/docfd-demo-vhs \
"$@"
================================================
FILE: docfd.opam
================================================
# This file is generated by dune, edit dune-project instead
opam-version: "2.0"
synopsis: "TUI multiline fuzzy document finder"
description: """
Think interactive grep for text files, PDFs, DOCXs, etc,
but word/token based instead of regex and line based,
so you can search across lines easily.
Docfd aims to provide good UX via integration with common text editors
and PDF viewers,
so you can jump directly to a search result with a single key press.
Features:
- Multithreaded indexing and searching
- Multiline fuzzy search of multiple files or a single file
- Swap between multi-file view and single file view on the fly
- Content view pane that shows the snippet surrounding the search result selected
- Text editor and PDF viewer integration
"""
maintainer: ["Darren Li"]
authors: ["Darren Li"]
license: "MIT"
tags: ["fuzzy" "document" "finder"]
homepage: "https://github.com/darrenldl/docfd"
doc: "https://github.com/darrenldl/docfd"
bug-reports: "https://github.com/darrenldl/docfd/issues"
depends: [
"ocaml" {>= "5.2"}
"dune" {>= "3.4"}
"fmt" {>= "0.9.0"}
"angstrom" {>= "0.15.0"}
"containers" {>= "3.12"}
"oseq"
"spelll"
"notty-community"
"nottui" {>= "0.4"}
"nottui-unix" {>= "0.4"}
"lwd"
"cmdliner" {>= "2.0.0"}
"eio" {>= "0.14"}
"digestif"
"eio_main" {>= "1.3"}
"containers-data"
"timedesc" {>= "3.1.0"}
"re" {>= "1.11.0"}
"ppx_deriving" {>= "5.0"}
"decompress"
"progress" {>= "0.5.0"}
"diet"
"sqlite3"
"uuseg"
"uucp"
"alcotest" {with-test}
"qcheck-alcotest" {with-test}
"qcheck" {with-test}
"odoc" {with-doc}
]
dev-repo: "git+https://github.com/darrenldl/docfd.git"
build: [
["dune" "subst"] {dev}
[
"dune"
"build"
"-p"
name
"-j"
jobs
"@install"
"@doc" {with-doc}
]
]
================================================
FILE: docfd.opam.locked
================================================
opam-version: "2.0"
name: "docfd"
version: "3.0.0"
synopsis: "TUI multiline fuzzy document finder"
maintainer: "Darren Li"
authors: "Darren Li"
license: "MIT"
tags: ["fuzzy" "document" "finder"]
homepage: "https://github.com/darrenldl/docfd"
doc: "https://github.com/darrenldl/docfd"
bug-reports: "https://github.com/darrenldl/docfd/issues"
depends: [
"alcotest" {= "1.8.0" & with-test}
"angstrom" {= "0.16.1"}
"astring" {= "0.8.5" & with-test}
"base-bigarray" {= "base"}
"base-bytes" {= "base"}
"base-domains" {= "base"}
"base-nnp" {= "base"}
"base-threads" {= "base"}
"base-unix" {= "base"}
"bigstringaf" {= "0.10.0"}
"checkseum" {= "0.5.2"}
"cmdliner" {= "1.3.0"}
"conf-pkg-config" {= "3"}
"conf-sqlite3" {= "1"}
"containers" {= "3.15"}
"containers-data" {= "3.15"}
"cppo" {= "1.8.0"}
"csexp" {= "1.5.2"}
"cstruct" {= "6.2.0"}
"decompress" {= "1.5.3"}
"diet" {= "0.4"}
"digestif" {= "1.2.0"}
"domain-local-await" {= "1.0.1"}
"dune" {= "3.17.1"}
"dune-configurator" {= "3.17.1"}
"eio" {= "1.2"}
"eio_linux" {= "1.2"}
"eio_main" {= "1.2"}
"eio_posix" {= "1.2"}
"either" {= "1.0.0"}
"eqaf" {= "0.10"}
"fmt" {= "0.9.0"}
"hmap" {= "0.8.1"}
"host-arch-x86_64" {= "1"}
"host-system-other" {= "1"}
"iomux" {= "0.3"}
"logs" {= "0.7.0"}
"lwd" {= "0.3"}
"lwt" {= "5.9.0"}
"lwt-dllist" {= "1.0.1"}
"mtime" {= "2.1.0"}
"nottui" {= "0.3"}
"notty" {= "0.2.3"}
"ocaml" {= "5.2.1"}
"ocaml-base-compiler" {= "5.2.1"}
"ocaml-compiler-libs" {= "v0.17.0"}
"ocaml-config" {= "3"}
"ocaml-options-vanilla" {= "1"}
"ocaml-syntax-shims" {= "1.0.0"}
"ocamlbuild" {= "0.15.0"}
"ocamlfind" {= "1.9.6"}
"ocplib-endian" {= "1.2"}
"optint" {= "0.3.0"}
"oseq" {= "0.5.1"}
"ounit2" {= "2.2.7" & with-test}
"ppx_derivers" {= "1.2.1"}
"ppx_deriving" {= "6.0.3"}
"ppxlib" {= "0.33.0"}
"progress" {= "0.4.0"}
"psq" {= "0.2.1"}
"ptime" {= "1.2.0"}
"qcheck" {= "0.23" & with-test}
"qcheck-alcotest" {= "0.23" & with-test}
"qcheck-core" {= "0.23" & with-test}
"qcheck-ounit" {= "0.23" & with-test}
"re" {= "1.12.0"}
"result" {= "1.5"}
"seq" {= "base"}
"sexplib0" {= "v0.17.0"}
"spelll" {= "0.4"}
"sqlite3" {= "5.2.0"}
"stdlib-shims" {= "0.3.0"}
"terminal" {= "0.4.0"}
"thread-table" {= "1.0.0"}
"timedesc" {= "3.1.0"}
"timedesc-tzdb" {= "3.1.0"}
"timedesc-tzlocal" {= "3.1.0"}
"topkg" {= "1.0.7"}
"uring" {= "0.9"}
"uucp" {= "16.0.0"}
"uuseg" {= "16.0.0"}
"uutf" {= "1.0.3"}
"vector" {= "1.0.0"}
]
build: [
["dune" "subst"] {dev}
["dune" "build" "-p" name "-j" jobs "@install" "@doc" {with-doc}]
]
dev-repo: "git+https://github.com/darrenldl/docfd.git"
pin-depends: [
[
"nottui.0.3"
"git+https://github.com/let-def/lwd.git#a337a778001e6c1dbaed7e758c9e05f300abd388"
]
[
"notty.0.2.3"
"git+https://github.com/ocaml-dune/notty.git#b6e1036c61521be3b1f4d585895ac598bdf4ab8d"
]
[
"ocaml-base-compiler.5.2.1"
"https://github.com/ocaml/ocaml/releases/download/5.2.1/ocaml-5.2.1.tar.gz"
]
]
description: """\
Think interactive grep for text files, PDFs, DOCXs, etc,
but word/token based instead of regex and line based,
so you can search across lines easily.
Docfd aims to provide good UX via integration with common text editors
and PDF viewers,
so you can jump directly to a search result with a single key press.
Features:
- Multithreaded indexing and searching
- Multiline fuzzy search of multiple files or a single file
- Swap between multi-file view and single file view on the fly
- Content view pane that shows the snippet surrounding the search result selected
- Text editor and PDF viewer integration"""
================================================
FILE: docfd.opam.template
================================================
build: [
["dune" "subst"] {dev}
[
"dune"
"build"
"-p"
name
"-j"
jobs
"@install"
"@doc" {with-doc}
]
]
================================================
FILE: dune-project
================================================
(lang dune 3.4)
(name docfd)
(generate_opam_files true)
(source
(github darrenldl/docfd))
(authors "Darren Li")
(maintainers "Darren Li")
(license MIT)
(package
(name docfd)
(synopsis "TUI multiline fuzzy document finder")
(description "
Think interactive grep for text files, PDFs, DOCXs, etc,
but word/token based instead of regex and line based,
so you can search across lines easily.
Docfd aims to provide good UX via integration with common text editors
and PDF viewers,
so you can jump directly to a search result with a single key press.
Features:
- Multithreaded indexing and searching
- Multiline fuzzy search of multiple files or a single file
- Swap between multi-file view and single file view on the fly
- Content view pane that shows the snippet surrounding the search result selected
- Text editor and PDF viewer integration
")
(documentation https://github.com/darrenldl/docfd)
(depends
(ocaml (>= "5.2"))
dune
(fmt (>= "0.9.0"))
(angstrom (>= "0.15.0"))
(containers (>= "3.12"))
oseq
spelll
notty-community
(nottui (>= "0.4"))
(nottui-unix (>= "0.4"))
lwd
(cmdliner (>= "2.0.0"))
(eio (>= "0.14"))
digestif
(eio_main (>= "1.3"))
containers-data
(timedesc (>= "3.1.0"))
(re (>= "1.11.0"))
(ppx_deriving (>= "5.0"))
decompress
(progress (>= "0.5.0"))
diet
sqlite3
uuseg
uucp
(alcotest :with-test)
(qcheck-alcotest :with-test)
(qcheck :with-test)
)
(tags
("fuzzy" "document" "finder"
))
)
================================================
FILE: file-collection-tests.t/dune
================================================
(cram
(deps ../bin/docfd.exe))
================================================
FILE: file-collection-tests.t/run.t
================================================
Setup:
$ touch no-ext
$ touch empty-paths.txt
$ echo "test.txt" >> paths
$ echo "test-symlink.txt" >> paths
$ echo "test0" >> paths
$ echo "test1/ijkl" >> paths
$ echo "test2/" >> paths
$ echo "test3/" >> paths
$ echo "test.log" >> single-path0.txt
$ echo "test.txt" >> single-path1.txt
$ touch test.ext0
$ touch test.log
$ touch test.md
$ touch test.txt
$ mkdir test0
$ touch test0/1234.md
$ touch test0/abcd.txt
$ mkdir test0/abcd
$ touch test0/abcd/efgh.md
$ touch test0/abcd/efgh.txt
$ mkdir test1
$ touch test1/5678.md
$ touch test1/ijkl.txt
$ mkdir test1/ijkl
$ touch test1/ijkl/mnop.md
$ touch test1/ijkl/mnop.txt
$ mkdir test2
$ touch test2/1234.md
$ mkdir test2/abcd
$ touch test2/abcd/efgh.md
$ ln -s $(pwd)/test0/abcd/efgh.txt test2/abcd/efgh.txt
$ ln -s ../test1/5678.md test2/56.md
$ ln -s ../test1/ijkl test2/ijkl
$ ln -s test2 test3
$ ln -s test.txt test-symlink.txt
$ mkdir miXeD-CaSe
$ touch miXeD-CaSe/AbCd.md
$ tree
.
|-- dune -> ../../../../default/file-collection-tests.t/dune
|-- empty-paths.txt
|-- miXeD-CaSe
| `-- AbCd.md
|-- no-ext
|-- paths
|-- single-path0.txt
|-- single-path1.txt
|-- test-symlink.txt -> test.txt
|-- test.ext0
|-- test.log
|-- test.md
|-- test.txt
|-- test0
| |-- 1234.md
| |-- abcd
| | |-- efgh.md
| | `-- efgh.txt
| `-- abcd.txt
|-- test1
| |-- 5678.md
| |-- ijkl
| | |-- mnop.md
| | `-- mnop.txt
| `-- ijkl.txt
|-- test2
| |-- 1234.md
| |-- 56.md -> ../test1/5678.md
| |-- abcd
| | |-- efgh.md
| | `-- efgh.txt -> $TESTCASE_ROOT/test0/abcd/efgh.txt
| `-- ijkl -> ../test1/ijkl
`-- test3 -> test2
7 directories, 26 files
Basic invocation for reference:
$ docfd --debug-log - --cache-dir .cache --index-only . 2>&1 | grep '^Using .* search mode' | sort
Using multiline search mode for document '$TESTCASE_ROOT/empty-paths.txt'
Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md'
Using multiline search mode for document '$TESTCASE_ROOT/single-path0.txt'
Using multiline search mode for document '$TESTCASE_ROOT/single-path1.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test.md'
Using multiline search mode for document '$TESTCASE_ROOT/test.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test0/1234.md'
Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.md'
Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test1/5678.md'
Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.md'
Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test2/1234.md'
Using multiline search mode for document '$TESTCASE_ROOT/test2/56.md'
Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.md'
Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.md'
Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test3/1234.md'
Using multiline search mode for document '$TESTCASE_ROOT/test3/56.md'
Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.md'
Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.md'
Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.txt'
Using single line search mode for document '$TESTCASE_ROOT/test.log'
--max-depth 0:
$ docfd --debug-log - --cache-dir .cache --index-only --max-depth 0 . 2>&1 | grep '^Using .* search mode' | sort
$ docfd --debug-log - --cache-dir .cache --index-only --max-depth 0 test.txt 2>&1 | grep '^Using .* search mode' | sort
Using multiline search mode for document '$TESTCASE_ROOT/test.txt'
--max-depth 1:
$ docfd --debug-log - --cache-dir .cache --index-only --max-depth 1 . 2>&1 | grep '^Using .* search mode' | sort
Using multiline search mode for document '$TESTCASE_ROOT/empty-paths.txt'
Using multiline search mode for document '$TESTCASE_ROOT/single-path0.txt'
Using multiline search mode for document '$TESTCASE_ROOT/single-path1.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test.md'
Using multiline search mode for document '$TESTCASE_ROOT/test.txt'
Using single line search mode for document '$TESTCASE_ROOT/test.log'
$ docfd --debug-log - --cache-dir .cache --index-only --max-depth 1 --glob '**/*.md' 2>&1 | grep '^Using .* search mode' | sort
Using multiline search mode for document '$TESTCASE_ROOT/test.md'
--max-depth 2:
$ docfd --debug-log - --cache-dir .cache --index-only --max-depth 2 . 2>&1 | grep '^Using .* search mode' | sort
Using multiline search mode for document '$TESTCASE_ROOT/empty-paths.txt'
Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md'
Using multiline search mode for document '$TESTCASE_ROOT/single-path0.txt'
Using multiline search mode for document '$TESTCASE_ROOT/single-path1.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test.md'
Using multiline search mode for document '$TESTCASE_ROOT/test.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test0/1234.md'
Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test1/5678.md'
Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test2/1234.md'
Using multiline search mode for document '$TESTCASE_ROOT/test2/56.md'
Using multiline search mode for document '$TESTCASE_ROOT/test3/1234.md'
Using multiline search mode for document '$TESTCASE_ROOT/test3/56.md'
Using single line search mode for document '$TESTCASE_ROOT/test.log'
$ docfd --debug-log - --cache-dir .cache --index-only --max-depth 2 --glob '**/*.md' 2>&1 | grep '^Using .* search mode' | sort
Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md'
Using multiline search mode for document '$TESTCASE_ROOT/test.md'
Using multiline search mode for document '$TESTCASE_ROOT/test0/1234.md'
Using multiline search mode for document '$TESTCASE_ROOT/test1/5678.md'
Using multiline search mode for document '$TESTCASE_ROOT/test2/1234.md'
Using multiline search mode for document '$TESTCASE_ROOT/test2/56.md'
Using multiline search mode for document '$TESTCASE_ROOT/test3/1234.md'
Using multiline search mode for document '$TESTCASE_ROOT/test3/56.md'
Default path is not picked if --paths-from is used:
$ docfd --debug-log - --cache-dir .cache --index-only --paths-from empty-paths.txt 2>&1 | grep '^Using .* search mode' | sort
Default path is not picked if --glob is used:
$ docfd --debug-log - --cache-dir .cache --index-only --glob '*.log' 2>&1 | grep '^Using .* search mode' | sort
Using single line search mode for document '$TESTCASE_ROOT/test.log'
Default path is not picked if --single-line-glob is used:
$ docfd --debug-log - --cache-dir .cache --index-only --single-line-glob '*.log' 2>&1 | grep '^Using .* search mode' | sort
Using single line search mode for document '$TESTCASE_ROOT/test.log'
Multiple --paths-from:
$ docfd --debug-log - --cache-dir .cache --index-only --paths-from single-path0.txt --paths-from single-path1.txt 2>&1 | grep '^Using .* search mode' | sort
Using multiline search mode for document '$TESTCASE_ROOT/test.txt'
Using single line search mode for document '$TESTCASE_ROOT/test.log'
Empty --exts:
$ docfd --debug-log - --cache-dir .cache --index-only --exts "" . 2>&1 | grep '^Using .* search mode' | sort
Using single line search mode for document '$TESTCASE_ROOT/test.log'
Empty --single-line-exts:
$ docfd --debug-log - --cache-dir .cache --index-only --single-line-exts "" . 2>&1 | grep '^Using .* search mode' | sort
Using multiline search mode for document '$TESTCASE_ROOT/empty-paths.txt'
Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md'
Using multiline search mode for document '$TESTCASE_ROOT/single-path0.txt'
Using multiline search mode for document '$TESTCASE_ROOT/single-path1.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test.md'
Using multiline search mode for document '$TESTCASE_ROOT/test.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test0/1234.md'
Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.md'
Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test1/5678.md'
Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.md'
Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test2/1234.md'
Using multiline search mode for document '$TESTCASE_ROOT/test2/56.md'
Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.md'
Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.md'
Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test3/1234.md'
Using multiline search mode for document '$TESTCASE_ROOT/test3/56.md'
Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.md'
Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.md'
Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.txt'
Empty --exts and --single-line-exts:
$ docfd --debug-log - --cache-dir .cache --index-only --exts "" --single-line-exts "" .
Initializing in-memory index
error: no usable file extensions or glob patterns
[1]
--add-exts:
$ docfd --debug-log - --cache-dir .cache --index-only --add-exts ext0 . 2>&1 | grep '^Using .* search mode' | sort | grep "ext0"
Using multiline search mode for document '$TESTCASE_ROOT/test.ext0'
--single-line-add-exts:
$ docfd --debug-log - --cache-dir .cache --index-only --single-line-add-exts ext0 . 2>&1 | grep '^Using .* search mode' | sort | grep "ext0"
Using single line search mode for document '$TESTCASE_ROOT/test.ext0'
Picking via multiple --glob:
$ docfd --debug-log - --cache-dir .cache --index-only --glob '*.txt' --glob '*.md' --glob '*.log' 2>&1 | grep '^Using .* search mode' | sort
Using multiline search mode for document '$TESTCASE_ROOT/empty-paths.txt'
Using multiline search mode for document '$TESTCASE_ROOT/single-path0.txt'
Using multiline search mode for document '$TESTCASE_ROOT/single-path1.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test.md'
Using multiline search mode for document '$TESTCASE_ROOT/test.txt'
Using single line search mode for document '$TESTCASE_ROOT/test.log'
Picking via multiple --single-line-glob:
$ docfd --debug-log - --cache-dir .cache --index-only --single-line-glob '*.txt' --single-line-glob '*.md' --single-line-glob '*.log' 2>&1 | grep '^Using .* search mode' | sort
Using single line search mode for document '$TESTCASE_ROOT/empty-paths.txt'
Using single line search mode for document '$TESTCASE_ROOT/single-path0.txt'
Using single line search mode for document '$TESTCASE_ROOT/single-path1.txt'
Using single line search mode for document '$TESTCASE_ROOT/test-symlink.txt'
Using single line search mode for document '$TESTCASE_ROOT/test.log'
Using single line search mode for document '$TESTCASE_ROOT/test.md'
Using single line search mode for document '$TESTCASE_ROOT/test.txt'
Picking via multiple --glob and --single-line-glob:
$ # --single-line-glob for .txt files
$ docfd --debug-log - --cache-dir .cache --index-only --single-line-glob '*.txt' --glob '*.md' --glob '*.log' 2>&1 | grep '^Using .* search mode' | sort
Using multiline search mode for document '$TESTCASE_ROOT/test.md'
Using single line search mode for document '$TESTCASE_ROOT/empty-paths.txt'
Using single line search mode for document '$TESTCASE_ROOT/single-path0.txt'
Using single line search mode for document '$TESTCASE_ROOT/single-path1.txt'
Using single line search mode for document '$TESTCASE_ROOT/test-symlink.txt'
Using single line search mode for document '$TESTCASE_ROOT/test.log'
Using single line search mode for document '$TESTCASE_ROOT/test.txt'
$ # --single-line-glob for .md files
$ docfd --debug-log - --cache-dir .cache --index-only --glob '*.txt' --single-line-glob '*.md' --glob '*.log' 2>&1 | grep '^Using .* search mode' | sort
Using multiline search mode for document '$TESTCASE_ROOT/empty-paths.txt'
Using multiline search mode for document '$TESTCASE_ROOT/single-path0.txt'
Using multiline search mode for document '$TESTCASE_ROOT/single-path1.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test.txt'
Using single line search mode for document '$TESTCASE_ROOT/test.log'
Using single line search mode for document '$TESTCASE_ROOT/test.md'
$ # --single-line-glob for .log files
$ docfd --debug-log - --cache-dir .cache --index-only --glob '*.txt' --glob '*.md' --single-line-glob '*.log' 2>&1 | grep '^Using .* search mode' | sort
Using multiline search mode for document '$TESTCASE_ROOT/empty-paths.txt'
Using multiline search mode for document '$TESTCASE_ROOT/single-path0.txt'
Using multiline search mode for document '$TESTCASE_ROOT/single-path1.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test.md'
Using multiline search mode for document '$TESTCASE_ROOT/test.txt'
Using single line search mode for document '$TESTCASE_ROOT/test.log'
$ # --glob for .txt files
$ docfd --debug-log - --cache-dir .cache --index-only --glob '*.txt' --single-line-glob '*.md' --single-line-glob '*.log' 2>&1 | grep '^Using .* search mode' | sort
Using multiline search mode for document '$TESTCASE_ROOT/empty-paths.txt'
Using multiline search mode for document '$TESTCASE_ROOT/single-path0.txt'
Using multiline search mode for document '$TESTCASE_ROOT/single-path1.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test.txt'
Using single line search mode for document '$TESTCASE_ROOT/test.log'
Using single line search mode for document '$TESTCASE_ROOT/test.md'
$ # --glob for .md files
$ docfd --debug-log - --cache-dir .cache --index-only --single-line-glob '*.txt' --glob '*.md' --single-line-glob '*.log' 2>&1 | grep '^Using .* search mode' | sort
Using multiline search mode for document '$TESTCASE_ROOT/test.md'
Using single line search mode for document '$TESTCASE_ROOT/empty-paths.txt'
Using single line search mode for document '$TESTCASE_ROOT/single-path0.txt'
Using single line search mode for document '$TESTCASE_ROOT/single-path1.txt'
Using single line search mode for document '$TESTCASE_ROOT/test-symlink.txt'
Using single line search mode for document '$TESTCASE_ROOT/test.log'
Using single line search mode for document '$TESTCASE_ROOT/test.txt'
$ # --glob for .log files
$ docfd --debug-log - --cache-dir .cache --index-only --single-line-glob '*.txt' --single-line-glob '*.md' --glob '*.log' 2>&1 | grep '^Using .* search mode' | sort
Using single line search mode for document '$TESTCASE_ROOT/empty-paths.txt'
Using single line search mode for document '$TESTCASE_ROOT/single-path0.txt'
Using single line search mode for document '$TESTCASE_ROOT/single-path1.txt'
Using single line search mode for document '$TESTCASE_ROOT/test-symlink.txt'
Using single line search mode for document '$TESTCASE_ROOT/test.log'
Using single line search mode for document '$TESTCASE_ROOT/test.md'
Using single line search mode for document '$TESTCASE_ROOT/test.txt'
--single-line-exts and --exts:
$ docfd --debug-log - --cache-dir .cache --index-only --single-line-exts md --exts md . 2>&1 | grep -e '^Using .* search mode' -e '^Checking.*search mode' | sort
Checking if efficiently computed and naively computed results for default search mode files are consistent
Checking if efficiently computed and naively computed results for single line search mode files are consistent
Checking if single line search mode files and default search mode files are disjoint
Using single line search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md'
Using single line search mode for document '$TESTCASE_ROOT/test.md'
Using single line search mode for document '$TESTCASE_ROOT/test0/1234.md'
Using single line search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.md'
Using single line search mode for document '$TESTCASE_ROOT/test1/5678.md'
Using single line search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.md'
Using single line search mode for document '$TESTCASE_ROOT/test2/1234.md'
Using single line search mode for document '$TESTCASE_ROOT/test2/56.md'
Using single line search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.md'
Using single line search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.md'
Using single line search mode for document '$TESTCASE_ROOT/test3/1234.md'
Using single line search mode for document '$TESTCASE_ROOT/test3/56.md'
Using single line search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.md'
Using single line search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.md'
--single-line-exts and --glob:
$ docfd --debug-log - --cache-dir .cache --index-only --exts "" --single-line-exts md --glob '**/*.md' 2>&1 | grep -e '^Using .* search mode' -e '^Checking.*search mode' | sort
Checking if efficiently computed and naively computed results for default search mode files are consistent
Checking if efficiently computed and naively computed results for single line search mode files are consistent
Checking if single line search mode files and default search mode files are disjoint
Using single line search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md'
Using single line search mode for document '$TESTCASE_ROOT/test.md'
Using single line search mode for document '$TESTCASE_ROOT/test0/1234.md'
Using single line search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.md'
Using single line search mode for document '$TESTCASE_ROOT/test1/5678.md'
Using single line search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.md'
Using single line search mode for document '$TESTCASE_ROOT/test2/1234.md'
Using single line search mode for document '$TESTCASE_ROOT/test2/56.md'
Using single line search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.md'
Using single line search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.md'
Using single line search mode for document '$TESTCASE_ROOT/test3/1234.md'
Using single line search mode for document '$TESTCASE_ROOT/test3/56.md'
Using single line search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.md'
Using single line search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.md'
--single-line-glob and --exts:
$ docfd --debug-log - --cache-dir .cache --index-only --single-line-glob '*.md' --exts md . 2>&1 | grep -e '^Using .* search mode' -e '^Checking.*search mode'| sort
Checking if efficiently computed and naively computed results for default search mode files are consistent
Checking if efficiently computed and naively computed results for single line search mode files are consistent
Checking if single line search mode files and default search mode files are disjoint
Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md'
Using multiline search mode for document '$TESTCASE_ROOT/test0/1234.md'
Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.md'
Using multiline search mode for document '$TESTCASE_ROOT/test1/5678.md'
Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.md'
Using multiline search mode for document '$TESTCASE_ROOT/test2/1234.md'
Using multiline search mode for document '$TESTCASE_ROOT/test2/56.md'
Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.md'
Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.md'
Using multiline search mode for document '$TESTCASE_ROOT/test3/1234.md'
Using multiline search mode for document '$TESTCASE_ROOT/test3/56.md'
Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.md'
Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.md'
Using single line search mode for document '$TESTCASE_ROOT/test.log'
Using single line search mode for document '$TESTCASE_ROOT/test.md'
--single-line-glob and --glob:
$ docfd --debug-log - --cache-dir .cache --index-only --exts "" --single-line-glob '*.md' --glob '**/*.md' 2>&1 | grep -e '^Using .* search mode' -e '^Checking.*search mode' | sort
Checking if efficiently computed and naively computed results for default search mode files are consistent
Checking if efficiently computed and naively computed results for single line search mode files are consistent
Checking if single line search mode files and default search mode files are disjoint
Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md'
Using multiline search mode for document '$TESTCASE_ROOT/test0/1234.md'
Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.md'
Using multiline search mode for document '$TESTCASE_ROOT/test1/5678.md'
Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.md'
Using multiline search mode for document '$TESTCASE_ROOT/test2/1234.md'
Using multiline search mode for document '$TESTCASE_ROOT/test2/56.md'
Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.md'
Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.md'
Using multiline search mode for document '$TESTCASE_ROOT/test3/1234.md'
Using multiline search mode for document '$TESTCASE_ROOT/test3/56.md'
Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.md'
Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.md'
Using single line search mode for document '$TESTCASE_ROOT/test.md'
--exts applies to directories in FILE in --paths-from FILE:
$ docfd --debug-log - --cache-dir .cache --index-only --paths-from paths --exts md 2>&1 | grep '^Using .* search mode' | sort
Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test0/1234.md'
Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.md'
Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.md'
Using multiline search mode for document '$TESTCASE_ROOT/test2/1234.md'
Using multiline search mode for document '$TESTCASE_ROOT/test2/56.md'
Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.md'
Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.md'
Using multiline search mode for document '$TESTCASE_ROOT/test3/1234.md'
Using multiline search mode for document '$TESTCASE_ROOT/test3/56.md'
Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.md'
Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.md'
--single-line-exts apply to directories in FILE in --paths-from FILE:
$ docfd --debug-log - --cache-dir .cache --index-only --paths-from paths --single-line-exts md 2>&1 | grep '^Using .* search mode' | sort
Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.txt'
Using single line search mode for document '$TESTCASE_ROOT/test0/1234.md'
Using single line search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.md'
Using single line search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.md'
Using single line search mode for document '$TESTCASE_ROOT/test2/1234.md'
Using single line search mode for document '$TESTCASE_ROOT/test2/56.md'
Using single line search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.md'
Using single line search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.md'
Using single line search mode for document '$TESTCASE_ROOT/test3/1234.md'
Using single line search mode for document '$TESTCASE_ROOT/test3/56.md'
Using single line search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.md'
Using single line search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.md'
Top-level symlinks:
$ docfd --debug-log - --cache-dir .cache --index-only test-symlink.txt 2>&1 | grep '^Using .* search mode' | sort
Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt'
$ docfd --debug-log - --cache-dir .cache --index-only test3 2>&1 | grep '^Using .* search mode' | sort
Using multiline search mode for document '$TESTCASE_ROOT/test3/1234.md'
Using multiline search mode for document '$TESTCASE_ROOT/test3/56.md'
Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.md'
Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.md'
Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.txt'
Top-level files and --single-line-exts:
$ docfd --debug-log - --cache-dir .cache --index-only test.txt --single-line-exts txt 2>&1 | grep '^Using .* search mode' | sort
Using single line search mode for document '$TESTCASE_ROOT/test.txt'
Top-level files and --single-line-glob:
$ docfd --debug-log - --cache-dir .cache --index-only test.txt --single-line-glob '*.txt' 2>&1 | grep '^Using .* search mode' | sort
Using single line search mode for document '$TESTCASE_ROOT/empty-paths.txt'
Using single line search mode for document '$TESTCASE_ROOT/single-path0.txt'
Using single line search mode for document '$TESTCASE_ROOT/single-path1.txt'
Using single line search mode for document '$TESTCASE_ROOT/test-symlink.txt'
Using single line search mode for document '$TESTCASE_ROOT/test.txt'
--glob and unrecognized extensions:
$ docfd --debug-log - --cache-dir .cache --index-only --exts md --glob "*.txt" . 2>&1 | grep '^Using .* search mode' | sort
Using multiline search mode for document '$TESTCASE_ROOT/empty-paths.txt'
Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md'
Using multiline search mode for document '$TESTCASE_ROOT/single-path0.txt'
Using multiline search mode for document '$TESTCASE_ROOT/single-path1.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test.md'
Using multiline search mode for document '$TESTCASE_ROOT/test.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test0/1234.md'
Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.md'
Using multiline search mode for document '$TESTCASE_ROOT/test1/5678.md'
Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.md'
Using multiline search mode for document '$TESTCASE_ROOT/test2/1234.md'
Using multiline search mode for document '$TESTCASE_ROOT/test2/56.md'
Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.md'
Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.md'
Using multiline search mode for document '$TESTCASE_ROOT/test3/1234.md'
Using multiline search mode for document '$TESTCASE_ROOT/test3/56.md'
Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.md'
Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.md'
Using single line search mode for document '$TESTCASE_ROOT/test.log'
--single-line-glob and unrecognized extensions:
$ docfd --debug-log - --cache-dir .cache --index-only --exts md --single-line-glob "*.txt" . 2>&1 | grep '^Using .* search mode' | sort
Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md'
Using multiline search mode for document '$TESTCASE_ROOT/test.md'
Using multiline search mode for document '$TESTCASE_ROOT/test0/1234.md'
Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.md'
Using multiline search mode for document '$TESTCASE_ROOT/test1/5678.md'
Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.md'
Using multiline search mode for document '$TESTCASE_ROOT/test2/1234.md'
Using multiline search mode for document '$TESTCASE_ROOT/test2/56.md'
Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.md'
Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.md'
Using multiline search mode for document '$TESTCASE_ROOT/test3/1234.md'
Using multiline search mode for document '$TESTCASE_ROOT/test3/56.md'
Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.md'
Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.md'
Using single line search mode for document '$TESTCASE_ROOT/empty-paths.txt'
Using single line search mode for document '$TESTCASE_ROOT/single-path0.txt'
Using single line search mode for document '$TESTCASE_ROOT/single-path1.txt'
Using single line search mode for document '$TESTCASE_ROOT/test-symlink.txt'
Using single line search mode for document '$TESTCASE_ROOT/test.log'
Using single line search mode for document '$TESTCASE_ROOT/test.txt'
--single-line:
$ docfd --debug-log - --cache-dir .cache --index-only --single-line . 2>&1 | grep '^Using .* search mode' | sort
Using single line search mode for document '$TESTCASE_ROOT/empty-paths.txt'
Using single line search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md'
Using single line search mode for document '$TESTCASE_ROOT/single-path0.txt'
Using single line search mode for document '$TESTCASE_ROOT/single-path1.txt'
Using single line search mode for document '$TESTCASE_ROOT/test-symlink.txt'
Using single line search mode for document '$TESTCASE_ROOT/test.log'
Using single line search mode for document '$TESTCASE_ROOT/test.md'
Using single line search mode for document '$TESTCASE_ROOT/test.txt'
Using single line search mode for document '$TESTCASE_ROOT/test0/1234.md'
Using single line search mode for document '$TESTCASE_ROOT/test0/abcd.txt'
Using single line search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.md'
Using single line search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.txt'
Using single line search mode for document '$TESTCASE_ROOT/test1/5678.md'
Using single line search mode for document '$TESTCASE_ROOT/test1/ijkl.txt'
Using single line search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.md'
Using single line search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.txt'
Using single line search mode for document '$TESTCASE_ROOT/test2/1234.md'
Using single line search mode for document '$TESTCASE_ROOT/test2/56.md'
Using single line search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.md'
Using single line search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.txt'
Using single line search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.md'
Using single line search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.txt'
Using single line search mode for document '$TESTCASE_ROOT/test3/1234.md'
Using single line search mode for document '$TESTCASE_ROOT/test3/56.md'
Using single line search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.md'
Using single line search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.txt'
Using single line search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.md'
Using single line search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.txt'
--glob and --single-line:
$ docfd --debug-log - --cache-dir .cache --index-only --single-line --glob '**/*.txt' 2>&1 | grep '^Using .* search mode' | sort
Using single line search mode for document '$TESTCASE_ROOT/empty-paths.txt'
Using single line search mode for document '$TESTCASE_ROOT/single-path0.txt'
Using single line search mode for document '$TESTCASE_ROOT/single-path1.txt'
Using single line search mode for document '$TESTCASE_ROOT/test-symlink.txt'
Using single line search mode for document '$TESTCASE_ROOT/test.txt'
Using single line search mode for document '$TESTCASE_ROOT/test0/abcd.txt'
Using single line search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.txt'
Using single line search mode for document '$TESTCASE_ROOT/test1/ijkl.txt'
Using single line search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.txt'
Using single line search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.txt'
Using single line search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.txt'
Using single line search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.txt'
Using single line search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.txt'
Top-level files with unrecognized extensions are still picked:
$ docfd --debug-log - --cache-dir .cache --index-only --exts md test.txt . 2>&1 | grep '^Using .* search mode' | sort | grep 'test.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test.txt'
Top-level files without extensions are still picked:
$ docfd --debug-log - --cache-dir .cache --index-only --exts md no-ext . 2>&1 | grep '^Using .* search mode' | sort | grep 'no-ext'
Using multiline search mode for document '$TESTCASE_ROOT/no-ext'
Current working directory is symlink:
$ cd test3
$ docfd --debug-log - --cache-dir .cache --index-only . 2>&1 | grep '^Using .* search mode' | sort
Using multiline search mode for document '$TESTCASE_ROOT/test2/1234.md'
Using multiline search mode for document '$TESTCASE_ROOT/test2/56.md'
Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.md'
Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.md'
Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.txt'
$ docfd --debug-log - --cache-dir .cache --index-only --glob '*.txt' . 2>&1 | grep '^Using .* search mode' | sort
Using multiline search mode for document '$TESTCASE_ROOT/test2/1234.md'
Using multiline search mode for document '$TESTCASE_ROOT/test2/56.md'
Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.md'
Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.md'
Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.txt'
$ docfd --debug-log - --cache-dir .cache --index-only --glob '**/*.txt' . 2>&1 | grep '^Using .* search mode' | sort
Using multiline search mode for document '$TESTCASE_ROOT/test2/1234.md'
Using multiline search mode for document '$TESTCASE_ROOT/test2/56.md'
Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.md'
Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.md'
Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.txt'
$ docfd --debug-log - --cache-dir .cache --index-only --glob "$(pwd)/**/*.txt" . 2>&1 | grep '^Using .* search mode' | sort
Using multiline search mode for document '$TESTCASE_ROOT/test2/1234.md'
Using multiline search mode for document '$TESTCASE_ROOT/test2/56.md'
Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.md'
Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.md'
Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.txt'
$ cd ..
'./' in glob:
$ docfd --debug-log - --cache-dir .cache --index-only --glob './*.txt' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
Using multiline search mode for document '$TESTCASE_ROOT/empty-paths.txt'
Using multiline search mode for document '$TESTCASE_ROOT/single-path0.txt'
Using multiline search mode for document '$TESTCASE_ROOT/single-path1.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test.txt'
'..' in glob:
$ docfd --debug-log - --cache-dir .cache --index-only --glob 'test1/../*.txt' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
Using multiline search mode for document '$TESTCASE_ROOT/empty-paths.txt'
Using multiline search mode for document '$TESTCASE_ROOT/single-path0.txt'
Using multiline search mode for document '$TESTCASE_ROOT/single-path1.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test.txt'
Directories in glob:
$ docfd --debug-log - --cache-dir .cache --index-only --glob '.' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
$ docfd --debug-log - --cache-dir .cache --index-only --glob '..' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
$ docfd --debug-log - --cache-dir .cache --index-only --glob 'test0/' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
$ docfd --debug-log - --cache-dir .cache --index-only --glob 'test3' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
Crossing symlinks explicitly in glob:
$ docfd --debug-log - --cache-dir .cache --index-only --glob 'test3/../*.txt' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
Using multiline search mode for document '$TESTCASE_ROOT/empty-paths.txt'
Using multiline search mode for document '$TESTCASE_ROOT/single-path0.txt'
Using multiline search mode for document '$TESTCASE_ROOT/single-path1.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test.txt'
$ docfd --debug-log - --cache-dir .cache --index-only --glob 'test3/abcd/*.txt' --glob 'test3/abcd/*.md' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.md'
Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.txt'
$ docfd --debug-log - --cache-dir .cache --index-only --glob 'test3/ijkl/*.txt' --glob 'test3/ijkl/*.md' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.md'
Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.txt'
'**' in glob:
$ docfd --debug-log - --cache-dir .cache --index-only --glob '**/*.txt' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/empty-paths.txt
Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/single-path0.txt
Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/single-path1.txt
Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test-symlink.txt
Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test.txt
Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test0/abcd.txt
Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test0/abcd/efgh.txt
Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test1/ijkl.txt
Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test1/ijkl/mnop.txt
Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test2/abcd/efgh.txt
Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test2/ijkl/mnop.txt
Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test3/abcd/efgh.txt
Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test3/ijkl/mnop.txt
Using multiline search mode for document '$TESTCASE_ROOT/empty-paths.txt'
Using multiline search mode for document '$TESTCASE_ROOT/single-path0.txt'
Using multiline search mode for document '$TESTCASE_ROOT/single-path1.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.txt'
$ docfd --debug-log - --cache-dir .cache --index-only --glob '**/**/*.txt' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/empty-paths.txt
Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/single-path0.txt
Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/single-path1.txt
Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test-symlink.txt
Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test.txt
Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test0/abcd.txt
Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test0/abcd/efgh.txt
Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test1/ijkl.txt
Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test1/ijkl/mnop.txt
Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test2/abcd/efgh.txt
Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test2/ijkl/mnop.txt
Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test3/abcd/efgh.txt
Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test3/ijkl/mnop.txt
Using multiline search mode for document '$TESTCASE_ROOT/empty-paths.txt'
Using multiline search mode for document '$TESTCASE_ROOT/single-path0.txt'
Using multiline search mode for document '$TESTCASE_ROOT/single-path1.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.txt'
$ docfd --debug-log - --cache-dir .cache --index-only --glob "$(pwd)/**/*.txt" 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/empty-paths.txt
Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/single-path0.txt
Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/single-path1.txt
Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test-symlink.txt
Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test.txt
Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test0/abcd.txt
Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test0/abcd/efgh.txt
Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test1/ijkl.txt
Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test1/ijkl/mnop.txt
Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test2/abcd/efgh.txt
Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test2/ijkl/mnop.txt
Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test3/abcd/efgh.txt
Glob $TESTCASE_ROOT/**/*.txt matches path $TESTCASE_ROOT/test3/ijkl/mnop.txt
Using multiline search mode for document '$TESTCASE_ROOT/empty-paths.txt'
Using multiline search mode for document '$TESTCASE_ROOT/single-path0.txt'
Using multiline search mode for document '$TESTCASE_ROOT/single-path1.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.txt'
$ docfd --debug-log - --cache-dir .cache --index-only --glob "$(pwd)/**/**/*.txt" 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/empty-paths.txt
Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/single-path0.txt
Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/single-path1.txt
Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test-symlink.txt
Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test.txt
Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test0/abcd.txt
Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test0/abcd/efgh.txt
Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test1/ijkl.txt
Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test1/ijkl/mnop.txt
Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test2/abcd/efgh.txt
Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test2/ijkl/mnop.txt
Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test3/abcd/efgh.txt
Glob $TESTCASE_ROOT/**/**/*.txt matches path $TESTCASE_ROOT/test3/ijkl/mnop.txt
Using multiline search mode for document '$TESTCASE_ROOT/empty-paths.txt'
Using multiline search mode for document '$TESTCASE_ROOT/single-path0.txt'
Using multiline search mode for document '$TESTCASE_ROOT/single-path1.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test-symlink.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd/efgh.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl/mnop.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test2/abcd/efgh.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test2/ijkl/mnop.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test3/abcd/efgh.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test3/ijkl/mnop.txt'
$ docfd --debug-log - --cache-dir .cache --index-only --glob "**/test[01]/*.txt" 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
Glob $TESTCASE_ROOT/**/test[01]/*.txt matches path $TESTCASE_ROOT/test0/abcd.txt
Glob $TESTCASE_ROOT/**/test[01]/*.txt matches path $TESTCASE_ROOT/test1/ijkl.txt
Using multiline search mode for document '$TESTCASE_ROOT/test0/abcd.txt'
Using multiline search mode for document '$TESTCASE_ROOT/test1/ijkl.txt'
Case insensitive marker:
$ # Exact match without marker
$ docfd --debug-log - --cache-dir .cache --index-only --glob 'miXeD-CaSe/AbCd.md' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md'
$ # All lowercase glob
$ docfd --debug-log - --cache-dir .cache --index-only --glob 'mixed-case/abcd.md' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
$ docfd --debug-log - --cache-dir .cache --index-only --glob '\cmixed-case/abcd.md' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md'
$ docfd --debug-log - --cache-dir .cache --index-only --glob 'mixed-\ccase/abcd.md' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md'
$ docfd --debug-log - --cache-dir .cache --index-only --glob 'mixed-case/\cabcd.md' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md'
$ docfd --debug-log - --cache-dir .cache --index-only --glob 'mixed-case/abcd.md\c' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md'
$ # All uppercase glob
$ docfd --debug-log - --cache-dir .cache --index-only --glob 'MIXED-CASE/ABCD.MD' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
$ docfd --debug-log - --cache-dir .cache --index-only --glob '\cMIXED-CASE/ABCD.MD' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md'
$ docfd --debug-log - --cache-dir .cache --index-only --glob 'MIX\cED-CASE/ABCD.MD' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md'
$ docfd --debug-log - --cache-dir .cache --index-only --glob 'MIXED-CASE/\cABCD.MD' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md'
$ docfd --debug-log - --cache-dir .cache --index-only --glob 'MIXED-CASE/ABCD.MD\c' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md'
$ # Mixed case glob
$ docfd --debug-log - --cache-dir .cache --index-only --glob 'MixeD-CaSE/aBcD.mD' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
$ docfd --debug-log - --cache-dir .cache --index-only --glob '\cMixeD-CaSE/aBcD.mD' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md'
$ docfd --debug-log - --cache-dir .cache --index-only --glob 'MixeD\c-CaSE/aBcD.mD' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md'
$ docfd --debug-log - --cache-dir .cache --index-only --glob 'MixeD-CaSE/\caBcD.mD' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md'
$ docfd --debug-log - --cache-dir .cache --index-only --glob 'MixeD-CaSE/aBcD.mD\c' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
Using multiline search mode for document '$TESTCASE_ROOT/miXeD-CaSe/AbCd.md'
Double escape characters:
$ docfd --debug-log - --cache-dir .cache --index-only --glob '\\cMixeD-CaSE/AbCd.mD' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
$ docfd --debug-log - --cache-dir .cache --index-only --glob 'MixeD\\c-CaSE/AbCd.mD' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
$ docfd --debug-log - --cache-dir .cache --index-only --glob 'MixeD-CaSE/AbCd.mD\\c' 2>&1 | grep -e '^Using .* search mode' -e '^Glob' | sort
================================================
FILE: lib/GZIP.ml
================================================
(* Basically fully copied from examples in Decompress manual *)
let time () =
Int32.of_float (Unix.gettimeofday ())
let compress (s : string) : string =
let i = De.bigstring_create De.io_buffer_size in
let o = De.bigstring_create De.io_buffer_size in
let config = Gz.Higher.configuration Gz.Unix time in
let w = De.Lz77.make_window ~bits:15 in
let q = De.Queue.create 1024 in
let res = Buffer.create 4096 in
let cur = ref 0 in
let refill buf =
let len = min (String.length s - !cur) De.io_buffer_size in
Bigstringaf.blit_from_string s ~src_off:!cur buf ~dst_off:0 ~len;
cur := !cur + len;
len
in
let flush buf len =
let str = Bigstringaf.substring buf ~off:0 ~len in
Buffer.add_string res str
in
Gz.Higher.compress ~w ~q ~level:4 ~refill ~flush () config i o;
Buffer.contents res
let decompress (s : string) : string option =
let i = De.bigstring_create De.io_buffer_size in
let o = De.bigstring_create De.io_buffer_size in
let r = Buffer.create 0x1000 in
let cur = ref 0 in
let refill buf =
let len = min (String.length s - !cur) De.io_buffer_size in
Bigstringaf.blit_from_string s ~src_off:!cur buf ~dst_off:0 ~len;
cur := !cur + len;
len
in
let flush buf len =
let str = Bigstringaf.substring buf ~off:0 ~len in
Buffer.add_string r str
in
match Gz.Higher.uncompress ~refill ~flush i o with
| Ok _ -> Some (Buffer.contents r)
| Error _ -> None
================================================
FILE: lib/char_map.ml
================================================
include CCMap.Make (Char)
================================================
FILE: lib/doc_id_db.ml
================================================
type t = {
lock : Eio.Mutex.t;
doc_id_of_doc_hash : (string, int64) Hashtbl.t;
}
let t : t =
{
lock = Eio.Mutex.create ();
doc_id_of_doc_hash = Hashtbl.create 10_000;
}
let lock : type a. (unit -> a) -> a =
fun f ->
Eio.Mutex.use_rw ~protect:true t.lock f
let allocate_bulk (doc_hashes : string Seq.t) : unit =
let open Sqlite3_utils in
lock (fun () ->
with_db (fun db ->
with_stmt ~db
{|
INSERT INTO doc_info
(id, hash, status)
VALUES
(
(SELECT
IFNULL(
(
SELECT a.id - 1 AS id
FROM doc_info a
LEFT JOIN doc_info b ON a.id - 1 = b.id
WHERE b.id IS NULL AND a.id - 1 >= 0
UNION
SELECT a.id + 1 AS id
FROM doc_info a
LEFT JOIN doc_info b ON a.id + 1 = b.id
WHERE b.id IS NULL
ORDER BY id
LIMIT 1
),
0
)
),
@doc_hash,
'ONGOING'
)
ON CONFLICT(hash) DO NOTHING
|}
(fun stmt ->
Seq.iter (fun doc_hash ->
bind_names stmt [ ("@doc_hash", TEXT doc_hash) ];
step stmt;
reset stmt;
)
doc_hashes
);
with_stmt ~db
{|
SELECT id
FROM doc_info
WHERE hash = @doc_hash
|}
(fun stmt ->
Seq.iter (fun doc_hash ->
bind_names stmt [ ("@doc_hash", TEXT doc_hash) ];
step stmt;
Hashtbl.add t.doc_id_of_doc_hash doc_hash (column_int64 stmt 0);
reset stmt;
)
doc_hashes
)
)
)
let doc_id_of_doc_hash (doc_hash : string) : int64 =
let test =
lock (fun () ->
Hashtbl.find_opt t.doc_id_of_doc_hash doc_hash
)
in
match test with
| Some id -> id
| None -> (
allocate_bulk (Seq.return doc_hash);
lock (fun () ->
Hashtbl.find t.doc_id_of_doc_hash doc_hash
)
)
================================================
FILE: lib/doc_id_db.mli
================================================
val allocate_bulk : string Seq.t -> unit
val doc_id_of_doc_hash : string -> int64
================================================
FILE: lib/docfd_lib.ml
================================================
module Index = Index
module Doc_id_db = Doc_id_db
module Link = Link
module Search_result = Search_result
module Search_phrase = Search_phrase
module Search_exp = Search_exp
module Search_result_heap = Search_result_heap
module Word_db = Word_db
module Tokenization = Tokenization
module Params' = Params
module Task_pool = Task_pool
module Stop_signal = Stop_signal
module Parser_components = Parser_components
module Misc_utils' = Misc_utils
module Sqlite3_utils = Sqlite3_utils
let init ~db_path ~document_count_limit =
let open Sqlite3_utils in
let db = db_open db_path in
let db_res =
Sqlite3.exec db Params.db_schema
in
let res =
if not (Rc.is_success db_res) then (
Some (Fmt.str
"failed to initialize index DB: %s" (Rc.to_string db_res))
) else (
Params.db_path := Some db_path;
if Index.document_count () >= document_count_limit then (
Index.prune_old_documents ~keep_n_latest:document_count_limit
);
None
)
in
while not (db_close db) do Unix.sleepf 0.1 done;
res
================================================
FILE: lib/dune
================================================
(library
(flags (-w "+a-4-9-29-37-40-42-44-48-50-32-30-70@8"))
(name docfd_lib)
(preprocess (pps
ppx_deriving.show
ppx_deriving.ord
ppx_deriving.eq
))
(libraries containers
containers-data
angstrom
fmt
spelll
oseq
eio
decompress.gz
bigstringaf
unix
diet
sqlite3
uuseg
uucp
timedesc
timedesc-tzlocal.unix-or-utc
)
)
================================================
FILE: lib/index.ml
================================================
module Line_loc = struct
type t = {
page_num : int;
line_num_in_page : int;
global_line_num : int;
}
[@@deriving eq]
let page_num t = t.page_num
let line_num_in_page t = t.line_num_in_page
let global_line_num t = t.global_line_num
let compare (x : t) (y : t) =
Int.compare x.global_line_num y.global_line_num
end
module Loc = struct
type t = {
line_loc : Line_loc.t;
pos_in_line : int;
}
[@@deriving eq]
let line_loc t = t.line_loc
let pos_in_line t = t.pos_in_line
end
module Raw = struct
type t = {
pos_s_of_word : Int_set.t Int_map.t;
loc_of_pos : Loc.t Int_map.t;
line_loc_of_global_line_num : Line_loc.t Int_map.t;
start_end_inc_pos_of_global_line_num : (int * int) Int_map.t;
start_end_inc_pos_of_page_num : (int * int) Int_map.t;
word_of_pos : int Int_map.t;
line_count_of_page_num : int Int_map.t;
page_count : int;
global_line_count : int;
links : Link.t array;
}
type multi_indexed_word = {
pos : int;
loc : Loc.t;
word : string;
}
type chunk = multi_indexed_word array
let make () : t = {
pos_s_of_word = Int_map.empty;
loc_of_pos = Int_map.empty;
line_loc_of_global_line_num = Int_map.empty;
start_end_inc_pos_of_global_line_num = Int_map.empty;
start_end_inc_pos_of_page_num = Int_map.empty;
word_of_pos = Int_map.empty;
line_count_of_page_num = Int_map.empty;
page_count = 0;
global_line_count = 0;
links = [||];
}
let word_ids (t : t) : Int_set.t =
Int_map.fold (fun word_id _pos_s acc ->
Int_set.add word_id acc
)
t.pos_s_of_word
Int_set.empty
let links (t : t) = t.links
let union (x : t) (y : t) =
{
pos_s_of_word =
Int_map.union (fun _k s0 s1 -> Some (Int_set.union s0 s1))
x.pos_s_of_word
y.pos_s_of_word;
loc_of_pos =
Int_map.union (fun _k x _ -> Some x)
x.loc_of_pos
y.loc_of_pos;
line_loc_of_global_line_num =
Int_map.union (fun _k x _ -> Some x)
x.line_loc_of_global_line_num
y.line_loc_of_global_line_num;
start_end_inc_pos_of_global_line_num =
Int_map.union (fun _k (start_x, end_inc_x) (start_y, end_inc_y) ->
Some (min start_x start_y, max end_inc_x end_inc_y))
x.start_end_inc_pos_of_global_line_num
y.start_end_inc_pos_of_global_line_num;
start_end_inc_pos_of_page_num =
Int_map.union (fun _k (start_x, end_inc_x) (start_y, end_inc_y) ->
Some (min start_x start_y, max end_inc_x end_inc_y))
x.start_end_inc_pos_of_page_num
y.start_end_inc_pos_of_page_num;
word_of_pos =
Int_map.union (fun _k x _ -> Some x)
x.word_of_pos
y.word_of_pos;
line_count_of_page_num =
Int_map.union (fun _k x y -> Some (max x y))
x.line_count_of_page_num
y.line_count_of_page_num;
page_count = max x.page_count y.page_count;
global_line_count = max x.global_line_count y.global_line_count;
links = [||];
}
let words_of_lines
(s : (Line_loc.t * string) Seq.t)
: multi_indexed_word Seq.t =
s
|> Seq.flat_map (fun (line_loc, s) ->
let seq = Tokenization.tokenize_with_pos ~drop_spaces:false s in
if Seq.is_empty seq then (
let empty_word = ({ Loc.line_loc; pos_in_line = 0 }, "") in
Seq.return empty_word
) else (
Seq.map (fun (pos_in_line, word) ->
({ Loc.line_loc; pos_in_line }, word))
seq
)
)
|> Seq.mapi (fun pos (loc, word) ->
{ pos; loc; word })
let of_chunk (arr : chunk) : t =
Array.fold_left
(fun
{ pos_s_of_word;
loc_of_pos;
line_loc_of_global_line_num;
start_end_inc_pos_of_global_line_num;
start_end_inc_pos_of_page_num;
word_of_pos;
line_count_of_page_num;
page_count;
global_line_count;
}
{ pos; loc; word } ->
let word_id =
Word_db.add word
in
let line_loc = loc.Loc.line_loc in
let global_line_num = line_loc.global_line_num in
let page_num = line_loc.page_num in
let pos_s =
Int_map.find_opt word_id pos_s_of_word
|> Option.value ~default:Int_set.empty
|> Int_set.add pos
in
let cur_page_line_count =
Option.value ~default:0
(Int_map.find_opt page_num line_count_of_page_num)
in
{ pos_s_of_word = Int_map.add word_id pos_s pos_s_of_word;
loc_of_pos = Int_map.add pos loc loc_of_pos;
line_loc_of_global_line_num =
Int_map.add global_line_num line_loc line_loc_of_global_line_num;
start_end_inc_pos_of_global_line_num =
Int_map.add
global_line_num
(match Int_map.find_opt global_line_num start_end_inc_pos_of_global_line_num with
| None -> (pos, pos)
| Some (x, y) -> (min x pos, max y pos))
start_end_inc_pos_of_global_line_num;
start_end_inc_pos_of_page_num =
Int_map.add
page_num
(match Int_map.find_opt page_num start_end_inc_pos_of_page_num with
| None -> (pos, pos)
| Some (x, y) -> (min x pos, max y pos))
start_end_inc_pos_of_page_num;
word_of_pos = Int_map.add pos word_id word_of_pos;
line_count_of_page_num =
Int_map.add line_loc.page_num (max cur_page_line_count (line_loc.line_num_in_page + 1)) line_count_of_page_num;
page_count = max page_count (line_loc.page_num + 1);
global_line_count = max global_line_count (global_line_num + 1);
links = [||];
}
)
(make ())
arr
let chunks_of_words (s : multi_indexed_word Seq.t) : chunk Seq.t =
let empty_word =
let line_loc =
{ Line_loc.page_num = 0; line_num_in_page = 0; global_line_num = 0 }
in
let loc = { Loc.line_loc; pos_in_line = 0 } in
{ pos = 0; loc; word = "" }
in
(if Seq.is_empty s then (
Seq.return empty_word
) else (
s
))
|> OSeq.chunks !Params.index_chunk_size
let extract_links (t : t) : Link.t array =
let flush_buf link_typ ~acc ~buf =
match buf with
| [] -> (acc, buf)
| _ -> (
let start_pos, end_inc_pos, strings =
List.fold_left (fun (start, end_inc, strings) (pos, word) ->
let start =
match start with
| None -> Some pos
| Some start -> Some (min pos start)
in
let end_inc =
match end_inc with
| None -> Some pos
| Some end_inc -> Some (max pos end_inc)
in
(start, end_inc, word :: strings)
)
(None, None, [])
buf
in
let start_pos = Option.get start_pos in
let end_inc_pos = Option.get end_inc_pos in
let link = Link.{
start_pos;
end_inc_pos;
typ = link_typ;
link = String.concat "" strings;
}
in
(link :: acc, [])
)
in
let process_line (line : (int * string) Dynarray.t) : Link.t list =
assert (Dynarray.length line > 0);
let word_at pos = Dynarray.get line pos in
let word_string_at pos = snd @@ word_at pos in
let word_ci_string_at pos = String.lowercase_ascii @@ word_string_at pos in
let rec aux
(state : [ `Scanning | `In_link of Link.typ ])
~(acc : Link.t list)
~(buf : (int * string) list)
(cur : int)
: Link.t list
=
if cur >= Dynarray.length line then (
match state with
| `Scanning -> (
acc
)
| `In_link link_typ -> (
let acc, _buf = flush_buf link_typ ~acc ~buf in
acc
)
) else (
let words_left = Dynarray.length line - cur - 1 in
let pos, word = word_at cur in
let word_ci = String.lowercase_ascii word in
match state with
| `Scanning -> (
let link_typ =
if List.mem word_ci [ "https"; "http"; "file" ]
&& words_left >= 3
&& word_ci_string_at (cur + 1) = ":"
&& word_ci_string_at (cur + 2) = "/"
&& word_ci_string_at (cur + 3) = "/"
then (
Some `URL
) else if cur >= 2
&& word_ci_string_at (cur - 2) = "]"
&& word_ci_string_at (cur - 1) = "("
then (
Some `Markdown
) else if cur >= 2
&& word_ci_string_at (cur - 2) = "["
&& word_ci_string_at (cur - 1) = "["
then (
Some `Wiki
) else (
None
)
in
match link_typ with
| Some link_typ -> (
aux (`In_link link_typ) ~acc ~buf:((pos, word) :: buf) (cur + 1)
)
| None -> (
aux `Scanning ~acc ~buf (cur + 1)
)
)
| `In_link link_typ -> (
let link_ended =
String.length word = 0
|| Parser_components.is_space word.[0]
|| List.mem word
[ "]"
; ")"
; "|"
; "\""
; "<"
; ">"
; "{"
; "}"
; "^"
; "\\"
]
in
if link_ended then (
let acc, buf = flush_buf link_typ ~acc ~buf in
aux `Scanning ~acc ~buf (cur + 1)
) else (
aux state ~acc ~buf:((pos, word) :: buf) (cur + 1)
)
)
)
in
aux `Scanning ~acc:[] ~buf:[] 0
in
let lines_with (mode : [ `Any_of | `All_of ]) l =
let line_nums_by_word =
l
|> List.to_seq
|> Seq.filter_map Word_db.id_of_word
|> Seq.map (fun word_id ->
Int_map.find_opt word_id t.pos_s_of_word
|> Option.value ~default:Int_set.empty
|> Int_set.to_seq
|> Seq.fold_left (fun acc pos ->
let loc = Int_map.find pos t.loc_of_pos in
let line_loc = Loc.line_loc loc in
Int_set.add line_loc.global_line_num acc
)
Int_set.empty
)
in
line_nums_by_word
|> Seq.fold_left
(match mode with
| `Any_of -> (fun acc s ->
let acc = Option.value ~default:Int_set.empty acc in
Some (Int_set.union acc s)
)
| `All_of -> (fun acc s ->
match acc with
| None -> Some s
| Some acc -> Some (Int_set.inter acc s)
)
)
None
|> Option.value ~default:Int_set.empty
in
let url_line_candidates =
Int_set.inter
(lines_with `Any_of [ "http"; "https"; "file" ])
(lines_with `All_of [ ":"; "/" ])
in
let markdown_and_wiki_line_candidates =
lines_with `All_of [ "["; "]" ]
in
let line_candidates =
Int_set.union
url_line_candidates
markdown_and_wiki_line_candidates
|> Int_set.to_seq
in
let rec aux acc line_nums =
match line_nums () with
| Seq.Nil -> List.rev acc
| Seq.Cons (cur, rest) -> (
let start, end_inc = Int_map.find cur t.start_end_inc_pos_of_global_line_num in
let links =
OSeq.(start -- end_inc)
|> Seq.map (fun pos -> (pos, Int_map.find pos t.word_of_pos))
|> Seq.map (fun (pos, word_id) -> (pos, Word_db.word_of_id word_id))
|> Dynarray.of_seq
|> process_line
in
aux (links @ acc) rest
)
in
aux [] line_candidates
|> Array.of_list
let of_seq pool (s : (Line_loc.t * string) Seq.t) : t =
let indices =
s
|> Seq.map (fun (line_loc, s) -> (line_loc, Misc_utils.sanitize_string s))
|> words_of_lines
|> chunks_of_words
|> List.of_seq
|> Task_pool.map_list pool of_chunk
in
let res =
List.fold_left (fun acc index ->
union acc index
)
(make ())
indices
in
let links = extract_links res in
{ res with links }
let of_lines pool (s : string Seq.t) : t =
s
|> Seq.mapi (fun global_line_num line ->
({ Line_loc.page_num = 0; line_num_in_page = global_line_num; global_line_num }, line)
)
|> of_seq pool
let of_pages pool (s : string list Seq.t) : t =
s
|> Seq.mapi (fun page_num page ->
(page_num, page)
)
|> Seq.flat_map (fun (page_num, page) ->
match page with
| [] -> (
let empty_line = ({ Line_loc.page_num; line_num_in_page = 0; global_line_num = 0 }, "") in
Seq.return empty_line
)
| _ -> (
List.to_seq page
|> Seq.mapi (fun line_num_in_page line ->
({ Line_loc.page_num; line_num_in_page; global_line_num = 0 }, line)
)
)
)
|> Seq.mapi (fun global_line_num ((line_loc : Line_loc.t), line) ->
({ line_loc with global_line_num }, line)
)
|> of_seq pool
end
module State : sig
val add_word_id_doc_id_link : word_id:int -> doc_id:int64 -> unit
val read_from_db : unit -> unit
val union_doc_ids_of_word_id_into_bv : word_id:int -> into:CCBV.t -> unit
end = struct
type t = {
lock : Eio.Mutex.t;
doc_ids_of_word_id : (int, CCBV.t) Hashtbl.t;
}
let t : t =
{
lock = Eio.Mutex.create ();
doc_ids_of_word_id = Hashtbl.create 100_000;
}
let lock : type a. (unit -> a) -> a =
fun f ->
Eio.Mutex.use_rw ~protect:true t.lock f
let find_doc_ids_bv ~word_id =
match Hashtbl.find_opt t.doc_ids_of_word_id word_id with
| Some doc_ids -> doc_ids
| None -> (
let bv = CCBV.empty () in
Hashtbl.replace t.doc_ids_of_word_id word_id bv;
bv
)
let union_doc_ids_of_word_id_into_bv ~word_id ~into =
lock (fun () ->
let bv = find_doc_ids_bv ~word_id in
CCBV.union_into ~into bv
)
let add_word_id_doc_id_link ~word_id ~doc_id =
lock (fun () ->
let doc_ids = find_doc_ids_bv ~word_id in
CCBV.set doc_ids (Int64.to_int doc_id)
)
let read_from_db () : unit =
let open Sqlite3_utils in
lock (fun () ->
with_db (fun db ->
iter_stmt ~db
{|
SELECT word_id, doc_id
FROM word_id_doc_id_link
|}
~names:[]
(fun data ->
let word_id = Data.to_int_exn data.(0) in
let doc_id = Data.to_int_exn data.(1) in
let doc_ids = find_doc_ids_bv ~word_id in
CCBV.set doc_ids doc_id
)
)
)
end
let now_int64 () =
Timedesc.Timestamp.now ()
|> Timedesc.Timestamp.get_s
let refresh_last_used_batch (doc_ids : int64 list) : unit =
let open Sqlite3_utils in
let now = now_int64 () in
with_db (fun db ->
step_stmt ~db "BEGIN IMMEDIATE" ignore;
List.iter (fun doc_id ->
step_stmt ~db
{|
UPDATE doc_info
SET last_used = @now
WHERE id = @doc_id
|}
~names:[ ("@doc_id", INT doc_id)
; ("@now", INT now)
]
ignore;
)
doc_ids;
step_stmt ~db "COMMIT" ignore;
)
let document_count () : int =
let open Sqlite3_utils in
with_db (fun db ->
step_stmt ~db "SELECT COUNT(1) FROM doc_info"
(fun stmt ->
Int64.to_int (column_int64 stmt 0)
)
)
let prune_old_documents ~keep_n_latest : unit =
let open Sqlite3_utils in
with_db (fun db ->
step_stmt ~db "BEGIN IMMEDIATE" ignore;
step_stmt ~db "DROP TABLE IF EXISTS temp.docs_to_drop" ignore;
step_stmt ~db "CREATE TEMP TABLE docs_to_drop (hash TEXT, id INTEGER)" ignore;
step_stmt ~db
{|
INSERT INTO temp.docs_to_drop
SELECT hash, id
FROM doc_info
ORDER BY last_used DESC
LIMIT -1
OFFSET @offset
|}
~names:[("@offset", INT (Int64.of_int keep_n_latest))]
ignore;
let drop_based_on_doc_id ?(id_column = "doc_id") table =
step_stmt ~db
(Fmt.str
{|
DELETE FROM %s
WHERE EXISTS (
SELECT 1 FROM temp.docs_to_drop WHERE %s.%s = temp.docs_to_drop.id
)
|}
table
table
id_column
)
ignore
in
drop_based_on_doc_id ~id_column:"id" "doc_info";
drop_based_on_doc_id "line_info";
drop_based_on_doc_id "page_info";
drop_based_on_doc_id "position";
drop_based_on_doc_id "word_id_doc_id_link";
step_stmt ~db "DROP TABLE temp.docs_to_drop" ignore;
step_stmt ~db "COMMIT" ignore;
)
let write_raw_to_db db ~already_in_transaction ~doc_id (x : Raw.t) : unit =
let open Sqlite3_utils in
let now = now_int64 () in
with_db ~db (fun db ->
step_stmt ~db
{|
UPDATE doc_info
SET page_count = @page_count,
global_line_count = @global_line_count,
max_pos = @max_pos,
last_used = @now,
status = 'ONGOING'
WHERE
id = @doc_id
|}
~names:[ ("@doc_id", INT doc_id)
; ("@page_count", INT (Int64.of_int x.page_count))
; ("@global_line_count", INT (Int64.of_int x.global_line_count))
; ("@max_pos", INT (Int64.of_int (Int_map.max_binding x.word_of_pos |> fst)))
; ("@now", INT now)
]
ignore;
if not already_in_transaction then (
step_stmt ~db "BEGIN IMMEDIATE" ignore;
);
with_stmt ~db
{|
INSERT INTO page_info
(doc_id, page_num, line_count, start_pos, end_inc_pos)
VALUES
(@doc_id, @page_num, @line_count, @start_pos, @end_inc_pos)
ON CONFLICT(doc_id, page_num) DO NOTHING
|}
(fun stmt ->
Int_map.iter (fun page_num line_count ->
let (start_pos, end_inc_pos) =
Int_map.find page_num x.start_end_inc_pos_of_page_num
in
bind_names stmt [ ("@doc_id", INT doc_id)
; ("@page_num", INT (Int64.of_int page_num))
; ("@line_count", INT (Int64.of_int line_count))
; ("@start_pos", INT (Int64.of_int start_pos))
; ("@end_inc_pos", INT (Int64.of_int end_inc_pos))
];
step stmt;
reset stmt;
)
x.line_count_of_page_num
);
with_stmt ~db
{|
INSERT INTO line_info
(doc_id, global_line_num, start_pos, end_inc_pos, page_num, line_num_in_page)
VALUES
(@doc_id, @global_line_num, @start_pos, @end_inc_pos, @page_num, @line_num_in_page)
ON CONFLICT(doc_id, global_line_num) DO NOTHING
|}
(fun stmt ->
Int_map.iter (fun line_num line_loc ->
let (start_pos, end_inc_pos) =
Int_map.find line_num x.start_end_inc_pos_of_global_line_num
in
let page_num = line_loc.Line_loc.page_num in
let line_num_in_page = line_loc.Line_loc.line_num_in_page in
bind_names stmt [ ("@doc_id", INT doc_id)
; ("@global_line_num", INT (Int64.of_int line_num))
; ("@start_pos", INT (Int64.of_int start_pos))
; ("@end_inc_pos", INT (Int64.of_int end_inc_pos))
; ("@page_num", INT (Int64.of_int page_num))
; ("@line_num_in_page", INT (Int64.of_int line_num_in_page))
];
step stmt;
reset stmt;
)
x.line_loc_of_global_line_num;
);
with_stmt ~db
{|
INSERT INTO position
(doc_id, pos, word_id)
VALUES
(@doc_id, @pos, @word_id)
ON CONFLICT(doc_id, pos) DO NOTHING
|}
(fun stmt ->
Int_map.iter (fun word_id pos_s ->
Int_set.iter (fun pos ->
bind_names stmt
[ ("@doc_id", INT doc_id)
; ("@pos", INT (Int64.of_int pos))
; ("@word_id", INT (Int64.of_int word_id))
];
step stmt;
reset stmt;
)
pos_s
)
x.pos_s_of_word
);
with_stmt ~db
{|
INSERT INTO link
(doc_id, start_pos, end_inc_pos, typ, link)
VALUES
(@doc_id, @start_pos, @end_inc_pos, @typ, @link)
ON CONFLICT(doc_id, start_pos, end_inc_pos) DO NOTHING
|}
(fun stmt ->
Array.iter (fun link ->
let { Link.start_pos; end_inc_pos; typ; link } = link in
let typ = Link.string_of_typ typ in
bind_names stmt
[ ("@doc_id", INT doc_id)
; ("@start_pos", INT (Int64.of_int start_pos))
; ("@end_inc_pos", INT (Int64.of_int end_inc_pos))
; ("@typ", TEXT typ)
; ("@link", TEXT link)
];
step stmt;
reset stmt;
)
x.links
);
with_stmt ~db
{|
INSERT INTO word_id_doc_id_link
(word_id, doc_id)
VALUES
(@word_id, @doc_id)
ON CONFLICT(word_id, doc_id) DO NOTHING
|}
(fun stmt ->
Int_map.iter (fun word_id _pos_s ->
State.add_word_id_doc_id_link ~word_id ~doc_id;
bind_names stmt
[ ("@word_id", INT (Int64.of_int word_id))
; ("@doc_id", INT doc_id)
];
step stmt;
reset stmt;
)
x.pos_s_of_word
);
step_stmt ~db
{|
UPDATE doc_info
SET status = 'COMPLETED'
WHERE id = @doc_id
|}
~names:[ ("@doc_id", INT doc_id) ]
ignore;
if not already_in_transaction then (
step_stmt ~db "COMMIT" ignore;
);
)
let global_line_count =
let open Sqlite3_utils in
fun ~doc_id ->
step_stmt
{|
SELECT global_line_count FROM doc_info
WHERE id = @doc_id
|}
~names:[ ("@doc_id", INT doc_id) ]
(fun stmt ->
column_int stmt 0
)
let page_count ~doc_id =
let open Sqlite3_utils in
step_stmt
{|
SELECT page_count FROM doc_info
WHERE id = @doc_id
|}
~names:[("@doc_id", INT doc_id)]
(fun stmt ->
column_int stmt 0
)
let ccvector_of_int_map
: 'a . 'a Int_map.t -> 'a CCVector.ro_vector =
fun m ->
Int_map.to_seq m
|> Seq.map snd
|> CCVector.of_seq
|> CCVector.freeze
let is_indexed_sql =
{|
SELECT 1
FROM doc_info
WHERE hash = @doc_hash
AND status = 'COMPLETED'
|}
let is_indexed ~doc_hash =
let open Sqlite3_utils in
step_stmt
is_indexed_sql
~names:[ ("@doc_hash", TEXT doc_hash) ]
(fun stmt ->
data_count stmt > 0
)
let word_of_pos ~doc_id pos : string =
let open Sqlite3_utils in
step_stmt
{|
SELECT word.word
FROM position p
JOIN word
ON word.id = p.word_id
WHERE p.doc_id = @doc_id
AND p.pos = @pos
|}
~names:[ ("@doc_id", INT doc_id)
; ("@pos", INT (Int64.of_int pos)) ]
(fun stmt ->
column_text stmt 0
)
let word_ci_of_pos ~doc_id pos : string =
word_of_pos ~doc_id pos
|> String.lowercase_ascii
let words_between_start_and_end_inc : doc_id:int64 -> int * int -> string Dynarray.t =
let lock = Eio.Mutex.create () in
let cache =
CCCache.lru ~eq:(fun (x0, y0, z0) (x1, y1, z1) ->
Int64.equal x0 x1
&& Int.equal y0 y1
&& Int.equal z0 z1
)
10240
in
fun ~doc_id (start, end_inc) ->
Eio.Mutex.use_rw ~protect:false lock (fun () ->
CCCache.with_cache cache (fun (doc_id, start, end_inc) ->
let open Sqlite3_utils in
let acc = Dynarray.create () in
iter_stmt
{|
SELECT word.word
FROM position p
JOIN word
ON word.id = p.word_id
WHERE p.doc_id = @doc_id
AND p.pos BETWEEN @start AND @end_inc
ORDER BY p.pos
|}
~names:[ ("@doc_id", INT doc_id)
; ("@start", INT (Int64.of_int start))
; ("@end_inc", INT (Int64.of_int end_inc))
]
(fun data ->
Dynarray.add_last acc (Data.to_string_exn data.(0))
);
acc
)
(doc_id, start, end_inc)
)
let words_of_global_line_num : doc_id:int64 -> int -> string Dynarray.t =
let lock = Eio.Mutex.create () in
let cache =
CCCache.lru ~eq:(fun (x0, y0) (x1, y1) ->
Int64.equal x0 x1 && Int.equal y0 y1)
10240
in
fun ~doc_id x ->
Eio.Mutex.use_rw ~protect:false lock (fun () ->
CCCache.with_cache cache (fun (doc_id, x) ->
let open Sqlite3_utils in
if x >= global_line_count ~doc_id then (
invalid_arg "Index.words_of_global_line_num: global_line_num out of range"
) else (
let start, end_inc =
step_stmt
{|
SELECT start_pos, end_inc_pos
FROM line_info
WHERE doc_id = @doc_id
AND global_line_num = @x
|}
~names:[ ("@doc_id", INT doc_id)
; ("@x", INT (Int64.of_int x))
]
(fun stmt ->
(column_int stmt 0, column_int stmt 1)
)
in
words_between_start_and_end_inc ~doc_id (start, end_inc)
)
)
(doc_id, x)
)
let words_of_page_num ~doc_id x : string Dynarray.t =
let open Sqlite3_utils in
if x >= page_count ~doc_id then (
invalid_arg "Index.words_of_page_num: page_num out of range"
) else (
let start, end_inc =
step_stmt
{|
SELECT start_pos, end_inc_pos
FROM page_info
WHERE doc_id = @doc_id
AND page_num = @x
|}
~names:[ ("@doc_id", INT doc_id)
; ("@x", INT (Int64.of_int x))
]
(fun stmt ->
(column_int stmt 0, column_int stmt 1)
)
in
words_between_start_and_end_inc ~doc_id (start, end_inc)
)
let line_of_global_line_num ~doc_id x =
if x >= global_line_count ~doc_id then (
invalid_arg "Index.line_of_global_line_num: global_line_num out of range"
) else (
words_of_global_line_num ~doc_id x
|> Dynarray.to_list
|> String.concat ""
)
let line_loc_of_global_line_num ~doc_id global_line_num : Line_loc.t =
let open Sqlite3_utils in
if global_line_num >= global_line_count ~doc_id then (
invalid_arg "Index.line_loc_of_global_line_num: global_line_num out of range"
) else (
let page_num, line_num_in_page =
step_stmt
{|
SELECT page_num, line_num_in_page
FROM line_info
WHERE doc_id = @doc_id
AND global_line_num = @global_line_num
|}
~names:[ ("@doc_id", INT doc_id)
; ("@global_line_num", INT (Int64.of_int global_line_num)) ]
(fun stmt ->
(column_int stmt 0, column_int stmt 1)
)
in
{ Line_loc.page_num; line_num_in_page; global_line_num }
)
let loc_of_pos ~doc_id pos : Loc.t =
let open Sqlite3_utils in
let pos_in_line, global_line_num =
step_stmt
{|
SELECT @pos - start_pos, global_line_num
FROM line_info
WHERE doc_id = @doc_id
AND @pos BETWEEN start_pos AND end_inc_pos
|}
~names:[ ("@doc_id", INT doc_id)
; ("@pos", INT (Int64.of_int pos)) ]
(fun stmt ->
(column_int stmt 0, column_int stmt 1)
)
in
let line_loc = line_loc_of_global_line_num ~doc_id global_line_num in
{ line_loc; pos_in_line }
let max_pos ~doc_id =
let open Sqlite3_utils in
step_stmt
{|
SELECT max_pos
FROM doc_info
WHERE id = @doc_id
|}
~names:[ ("@doc_id", INT doc_id) ]
(fun stmt ->
column_int stmt 0
)
let line_count_of_page_num ~doc_id page : int =
let open Sqlite3_utils in
step_stmt
{|
SELECT line_count
FROM page_info
WHERE doc_id = @doc_id
AND page = @page
|}
~names:[ ("@doc_id", INT doc_id)
; ("@page", INT (Int64.of_int page)) ]
(fun stmt ->
column_int stmt 0
)
let start_end_inc_pos_of_global_line_num ~doc_id global_line_num =
let open Sqlite3_utils in
if global_line_num >= global_line_count ~doc_id then (
invalid_arg "Index.start_end_inc_pos_of_global_line_num: global_line_num out of range"
) else (
step_stmt
{|
SELECT start_pos, end_inc_pos
FROM line_info
WHERE doc_id = @doc_id
AND global_line_num = @global_line_num
|}
~names:[ ("@doc_id", INT doc_id)
; ("@global_line_num", INT (Int64.of_int global_line_num)) ]
(fun stmt ->
(column_int stmt 0, column_int stmt 1)
)
)
module Search = struct
module ET = Search_phrase.Enriched_token
let positions_of_words
~doc_id
(words : int Seq.t)
: int Dynarray.t =
let open Sqlite3_utils in
let acc = Dynarray.create () in
let f data =
Dynarray.add_last acc (Data.to_int_exn data.(0))
in
with_stmt
{|
SELECT
p.pos
FROM position p
WHERE doc_id = @doc_id
AND word_id = @word_id
ORDER BY p.pos
|}
(fun stmt ->
Seq.iter (fun word_id ->
bind_names stmt [ ("@doc_id", INT doc_id)
; ("@word_id", INT (Int64.of_int word_id))
];
Rc.check (iter stmt ~f);
reset stmt;
)
words
);
acc
let usable_positions
~doc_id
?within
~around_pos
(token : Search_phrase.Enriched_token.t)
: int Seq.t =
let open Sqlite3_utils in
Eio.Fiber.yield ();
let match_typ = ET.match_typ token in
let start, end_inc =
let start, end_inc =
if ET.is_linked_to_prev token then (
match match_typ with
| `Fuzzy ->
(around_pos - !Params.max_linked_token_search_dist,
around_pos + !Params.max_linked_token_search_dist)
| `Exact | `Prefix | `Suffix ->
(around_pos + 1,
around_pos + 1)
) else (
(around_pos - !Params.max_token_search_dist,
around_pos + !Params.max_token_search_dist)
)
in
match within with
| None -> (start, end_inc)
| Some (within_start_pos, within_end_inc_pos) -> (
(max within_start_pos start, min within_end_inc_pos end_inc)
)
in
let positions : int Dynarray.t =
let acc : int Dynarray.t =
Dynarray.create ()
in
let cache : (string, bool) Hashtbl.t = Hashtbl.create 100 in
let f data =
Eio.Fiber.yield ();
let indexed_word = Data.to_string_exn data.(0) in
let pos = Data.to_int_exn data.(1) in
let compatible =
match Hashtbl.find_opt cache indexed_word with
| None -> (
let compatible = ET.compatible_with_word token indexed_word in
Hashtbl.replace cache indexed_word compatible;
compatible
)
| Some compatible -> compatible
in
if compatible then (
Dynarray.add_last acc pos
)
in
(
let extra_sql =
match ET.data token with
| `Explicit_spaces -> (
{|AND (
word LIKE ' %'
OR
word LIKE char(9) || '%'
OR
word LIKE char(10) || '%'
OR
word LIKE char(13) || '%'
)
|}
)
| `String search_word -> (
let search_word = search_word
|> CCString.replace ~sub:"'" ~by:"''"
|> CCString.replace ~sub:"\\" ~by:"\\\\"
|> CCString.replace ~sub:"%" ~by:"\\%"
in
match match_typ with
| `Fuzzy | `Suffix -> ""
| `Exact -> (
Fmt.str "AND word LIKE '%s' ESCAPE '\\'" search_word
)
| `Prefix -> (
Fmt.str "AND word LIKE '%s%%' ESCAPE '\\'" search_word
)
)
in
iter_stmt
(Fmt.str
{|
SELECT
word.word AS word,
p.pos as pos
FROM position p
JOIN word
ON p.word_id = word.id
WHERE p.doc_id = @doc_id
AND p.pos BETWEEN @start AND @end_inc
%s
|}
extra_sql)
~names:[ ("@doc_id", INT doc_id)
; ("@start", INT (Int64.of_int start))
; ("@end_inc", INT (Int64.of_int end_inc))
]
f
);
acc
in
Dynarray.to_seq positions
let search_around_pos
~doc_id
~(within : (int * int) option)
(around_pos : int)
(l : Search_phrase.Enriched_token.t list)
: int list Seq.t =
let rec aux around_pos l =
Eio.Fiber.yield ();
match l with
| [] -> Seq.return []
| token :: rest -> (
usable_positions
~doc_id
?within
~around_pos
token
|> Seq.flat_map (fun pos ->
aux pos rest
|> Seq.map (fun l -> pos :: l)
)
)
in
aux around_pos l
let search_result_heap_merge_with_yield x y =
Eio.Fiber.yield ();
Search_result_heap.merge x y
module Search_job = struct
exception Result_found
type t = {
stop_signal : Stop_signal.t;
terminate_on_result_found : bool;
cancellation_notifier : bool Atomic.t;
doc_id : int64;
within_same_line : bool;
phrase : Search_phrase.t;
start_pos : int;
search_limit_per_start : int;
}
let make
stop_signal
~terminate_on_result_found
~cancellation_notifier
~doc_id
~within_same_line
~phrase
~start_pos
~search_limit_per_start
=
{
stop_signal;
terminate_on_result_found;
cancellation_notifier;
doc_id;
within_same_line;
phrase;
start_pos;
search_limit_per_start;
}
let run (t : t) : Search_result_heap.t =
match Search_phrase.enriched_tokens t.phrase with
| [] -> Search_result_heap.empty
| _ :: rest -> (
let doc_id = t.doc_id in
let within =
if t.within_same_line then (
let loc = loc_of_pos ~doc_id t.start_pos in
Some (start_end_inc_pos_of_global_line_num ~doc_id loc.line_loc.global_line_num)
) else (
None
)
in
Eio.Fiber.first
(fun () ->
Stop_signal.await t.stop_signal;
Atomic.set t.cancellation_notifier true;
Search_result_heap.empty)
(fun () ->
search_around_pos
~doc_id
~within
t.start_pos
rest
|> Seq.map (fun l -> t.start_pos :: l)
|> Seq.map (fun (l : int list) ->
if t.terminate_on_result_found then (
raise Result_found
);
Eio.Fiber.yield ();
let opening_closing_symbol_pairs =
List.map (fun pos -> word_of_pos ~doc_id pos) l
|> Misc_utils.opening_closing_symbol_pairs
in
let found_phrase_opening_closing_symbol_match_count =
let pos_arr : int array = Array.of_list l in
List.fold_left (fun total (x, y) ->
let pos_x = pos_arr.(x) in
let pos_y = pos_arr.(y) in
let c_x = String.get (word_of_pos ~doc_id pos_x) 0 in
let c_y = String.get (word_of_pos ~doc_id pos_y) 0 in
assert (List.exists (fun (x, y) -> c_x = x && c_y = y)
Params.opening_closing_symbols);
if pos_x < pos_y then (
let outstanding_opening_symbol_count =
OSeq.(pos_x + 1 --^ pos_y)
|> Seq.fold_left (fun count pos ->
match count with
| Some count -> (
let word = word_of_pos ~doc_id pos in
if String.length word = 1 then (
if String.get word 0 = c_x then (
Some (count + 1)
) else if String.get word 0 = c_y then (
if count = 0 then (
None
) else (
Some (count - 1)
)
) else (
Some count
)
) else (
Some count
)
)
| None -> None
)
(Some 0)
in
match outstanding_opening_symbol_count with
| Some 0 -> total + 1
| _ -> total
) else (
total
)
)
0
opening_closing_symbol_pairs
in
Search_result.make
t.phrase
~found_phrase:(List.map
(fun pos ->
Search_result.{
found_word_pos = pos;
found_word_ci = word_ci_of_pos ~doc_id pos;
found_word = word_of_pos ~doc_id pos;
}) l)
~found_phrase_opening_closing_symbol_match_count
)
|> Seq.fold_left (fun best_results r ->
Eio.Fiber.yield ();
let best_results = Search_result_heap.add best_results r in
if Search_result_heap.size best_results <= t.search_limit_per_start then (
best_results
) else (
let best_results, _ = Search_result_heap.take_exn best_results in
best_results
)
)
Search_result_heap.empty
)
)
end
module Search_job_group = struct
type t = {
terminate_on_result_found : bool;
stop_signal : Stop_signal.t;
cancellation_notifier : bool Atomic.t;
doc_id : int64;
within_same_line : bool;
phrase : Search_phrase.t;
possible_start_pos_list : int list;
search_limit_per_start : int;
}
let unpack (group : t) : Search_job.t Seq.t =
let
{
stop_signal;
terminate_on_result_found;
cancellation_notifier;
doc_id;
within_same_line;
phrase;
possible_start_pos_list;
search_limit_per_start;
} = group in
List.to_seq possible_start_pos_list
|> Seq.map (fun start_pos ->
Search_job.make
stop_signal
~terminate_on_result_found
~cancellation_notifier
~doc_id
~within_same_line
~phrase
~start_pos
~search_limit_per_start
)
let run (t : t) =
unpack t
|> Seq.map Search_job.run
|> Seq.fold_left Search_result_heap.merge Search_result_heap.empty
end
let make_search_job_groups
stop_signal
?(terminate_on_result_found = false)
~(cancellation_notifier : bool Atomic.t)
~doc_id
~doc_word_ids
~global_first_word_candidates_lookup
~within_same_line
~(search_scope : Diet.Int.t option)
(exp : Search_exp.t)
: Search_job_group.t Seq.t =
if Search_exp.is_empty exp then (
Seq.empty
) else (
Search_exp.flattened exp
|> List.to_seq
|> Seq.flat_map (fun phrase ->
let first_word_candidates =
match Search_phrase.enriched_tokens phrase with
| [] -> failwith "unexpected case"
| first_word :: _ -> (
Search_phrase.Enriched_token.Data_map.find
(Search_phrase.Enriched_token.data first_word)
global_first_word_candidates_lookup
|> Int_set.inter doc_word_ids
)
in
let possible_starts =
first_word_candidates
|> Int_set.to_seq
|> positions_of_words ~doc_id
|> (fun arr ->
match search_scope with
| None -> arr
| Some search_scope -> (
Dynarray.filter (fun x ->
Diet.Int.mem x search_scope
) arr
)
)
in
let possible_start_count = Dynarray.length possible_starts in
if possible_start_count = 0 then (
Seq.empty
) else (
let search_limit_per_start =
max
Params.search_result_min_per_start
(
(Params.default_search_result_total_per_document + possible_start_count - 1) / possible_start_count
)
in
let search_chunk_size =
max 10 (possible_start_count / Task_pool.size)
in
OSeq.(0 --^ possible_start_count)
|> OSeq.chunks search_chunk_size
|> Seq.map (fun index_arr ->
Array.map (fun i ->
Dynarray.get possible_starts i
) index_arr
|> Array.to_list
)
|> Seq.map (fun possible_start_pos_list ->
{
Search_job_group.stop_signal;
terminate_on_result_found;
cancellation_notifier;
doc_id;
within_same_line;
phrase;
possible_start_pos_list;
search_limit_per_start;
}
)
)
)
)
let search
pool
stop_signal
?terminate_on_result_found
~cancellation_notifier
~doc_id
~doc_word_ids
~global_first_word_candidates_lookup
~within_same_line
~search_scope
(exp : Search_exp.t)
: Search_result_heap.t =
make_search_job_groups
stop_signal
?terminate_on_result_found
~cancellation_notifier
~doc_id
~doc_word_ids
~global_first_word_candidates_lookup
~within_same_line
~search_scope
exp
|> List.of_seq
|> Task_pool.map_list pool Search_job_group.run
|> List.fold_left search_result_heap_merge_with_yield Search_result_heap.empty
end
let search
pool
stop_signal
?terminate_on_result_found
~doc_id
~doc_word_ids
~global_first_word_candidates_lookup
~within_same_line
~search_scope
(exp : Search_exp.t)
: Search_result.t array option =
let cancellation_notifier = Atomic.make false in
let arr =
Search.search
pool
stop_signal
?terminate_on_result_found
~cancellation_notifier
~doc_id
~doc_word_ids
~global_first_word_candidates_lookup
~within_same_line
~search_scope
exp
|> Search_result_heap.to_seq
|> Array.of_seq
in
if Atomic.get cancellation_notifier then (
None
) else (
Array.sort Search_result.compare_relevance arr;
Some arr
)
module Search_job = Search.Search_job
module Search_job_group = Search.Search_job_group
let make_search_job_groups = Search.make_search_job_groups
module Word_candidate_heap = CCHeap.Make_from_compare (struct
type t = int * float
let compare (_x0, y0) (_x1, y1) = Float.compare y0 y1
end)
let generate_global_first_word_candidates_lookup
pool
?(acc = Search_phrase.Enriched_token.Data_map.empty)
(exp : Search_exp.t)
: Int_set.t Search_phrase.Enriched_token.Data_map.t =
Search_exp.flattened exp
|> List.fold_left (fun acc phrase ->
match Search_phrase.enriched_tokens phrase with
| [] -> failwith "unexpected case"
| first_word :: _rest -> (
let data = Search_phrase.Enriched_token.data first_word in
match Search_phrase.Enriched_token.Data_map.find_opt data acc with
| None -> (
let score ~search_word ~found_word =
let search_word_ci = String.lowercase_ascii search_word in
let search_word_len = Int.to_float (String.length search_word) in
let found_word_ci = String.lowercase_ascii found_word in
let found_word_len = Int.to_float (String.length found_word) in
if String.equal search_word found_word then (
1.0
) else if String.equal search_word_ci found_word_ci then (
0.9
) else if CCString.find ~sub:search_word found_word >= 0 then (
search_word_len
/.
found_word_len
) else if CCString.find ~sub:search_word_ci found_word_ci >= 0 then (
0.9
*.
(search_word_len
/.
found_word_len)
) else (
1.0
-.
(Int.to_float (Spelll.edit_distance search_word_ci found_word_ci)
/.
search_word_len
)
)
in
let candidates =
Word_db.filter
pool
(Search_phrase.Enriched_token.compatible_with_word first_word)
in
let candidates =
match data with
| `Explicit_spaces -> (
Dynarray.fold_left (fun acc (id, _word) ->
Int_set.add id acc
)
Int_set.empty
candidates
)
| `String s -> (
let s_len = Int.to_float (String.length s) in
let max_candidate_count = min 500 (Int.of_float @@ Float.pow 2.5 s_len) in
Dynarray.fold_left (fun acc (id, word) ->
let acc = Word_candidate_heap.add acc (id, score ~search_word:s ~found_word:word) in
if Word_candidate_heap.size acc <= max_candidate_count then (
acc
) else (
let acc, _ = Word_candidate_heap.take_exn acc in
acc
)
)
Word_candidate_heap.empty
candidates
|> Word_candidate_heap.to_seq
|> Seq.map (fun (id, _score) ->
id
)
|> Int_set.of_seq
)
in
Search_phrase.Enriched_token.Data_map.add
data
candidates
acc
)
| Some _ -> acc
)
)
acc
let word_ids ~doc_id =
let open Sqlite3_utils in
with_db (fun db ->
fold_stmt ~db
{|
SELECT word_id_doc_id_link.word_id
FROM word_id_doc_id_link
WHERE word_id_doc_id_link.doc_id = @doc_id
|}
~names:[ ("@doc_id", INT doc_id) ]
(fun acc data ->
Int_set.add (Data.to_int_exn data.(0)) acc
)
Int_set.empty
)
let links ~doc_id : Link.t array =
let open Sqlite3_utils in
let acc = Dynarray.create () in
with_db (fun db ->
iter_stmt ~db
{|
SELECT start_pos, end_inc_pos, typ, link
FROM link
WHERE link.doc_id = @doc_id
|}
~names:[ ("@doc_id", INT doc_id) ]
(fun data ->
let start_pos = Data.to_int_exn data.(0) in
let end_inc_pos = Data.to_int_exn data.(1) in
let typ =
Data.to_string_exn data.(2)
|> Link.typ_of_string
|> Option.get
in
let link = Data.to_string_exn data.(3) in
Dynarray.add_last acc { Link.start_pos; end_inc_pos; typ; link }
);
);
Dynarray.to_array acc
================================================
FILE: lib/index.mli
================================================
module Line_loc : sig
type t
val page_num : t -> int
val line_num_in_page : t -> int
val global_line_num : t -> int
val compare : t -> t -> int
end
module Loc : sig
type t
val line_loc : t -> Line_loc.t
val pos_in_line : t -> int
end
val word_ci_of_pos : doc_id:int64 -> int -> string
val word_of_pos : doc_id:int64 -> int -> string
val words_of_global_line_num : doc_id:int64 -> int -> string Dynarray.t
val line_of_global_line_num : doc_id:int64 -> int -> string
val line_loc_of_global_line_num : doc_id:int64 -> int -> Line_loc.t
val loc_of_pos : doc_id:int64 -> int -> Loc.t
val max_pos : doc_id:int64 -> int
val words_of_page_num : doc_id:int64 -> int -> string Dynarray.t
val line_count_of_page_num : doc_id:int64 -> int -> int
val generate_global_first_word_candidates_lookup :
Task_pool.t ->
?acc:Int_set.t Search_phrase.Enriched_token.Data_map.t ->
Search_exp.t ->
Int_set.t Search_phrase.Enriched_token.Data_map.t
val search :
Task_pool.t ->
Stop_signal.t ->
?terminate_on_result_found : bool ->
doc_id:int64 ->
doc_word_ids:Int_set.t ->
global_first_word_candidates_lookup:Int_set.t Search_phrase.Enriched_token.Data_map.t ->
within_same_line:bool ->
search_scope:Diet.Int.t option ->
Search_exp.t ->
Search_result.t array option
module Search_job : sig
exception Result_found
type t
val run : t -> Search_result_heap.t
end
module Search_job_group : sig
type t
val unpack : t -> Search_job.t Seq.t
val run : t -> Search_result_heap.t
end
val make_search_job_groups :
Stop_signal.t ->
?terminate_on_result_found : bool ->
cancellation_notifier:bool Atomic.t ->
doc_id:int64 ->
doc_word_ids:Int_set.t ->
global_first_word_candidates_lookup:Int_set.t Search_phrase.Enriched_token.Data_map.t ->
within_same_line:bool ->
search_scope:Diet.Int.t option ->
Search_exp.t ->
Search_job_group.t Seq.t
val global_line_count : doc_id:int64 -> int
val page_count : doc_id:int64 -> int
val is_indexed_sql : string
val is_indexed : doc_hash:string -> bool
val refresh_last_used_batch : int64 list -> unit
val document_count : unit -> int
val prune_old_documents : keep_n_latest:int -> unit
module Raw : sig
type t
val word_ids : t -> Int_set.t
val of_lines : Task_pool.t -> string Seq.t -> t
val of_pages : Task_pool.t -> string list Seq.t -> t
val links : t -> Link.t array
end
val word_ids : doc_id:int64 -> Int_set.t
val write_raw_to_db :
Sqlite3.db ->
already_in_transaction:bool ->
doc_id:int64 ->
Raw.t ->
unit
module State : sig
val union_doc_ids_of_word_id_into_bv : word_id:int -> into:CCBV.t -> unit
val read_from_db : unit -> unit
end
val links : doc_id:int64 -> Link.t array
================================================
FILE: lib/int_map.ml
================================================
include CCMap.Make (Int)
================================================
FILE: lib/int_set.ml
================================================
include CCSet.Make (Int)
================================================
FILE: lib/link.ml
================================================
type typ = [
| `Markdown
| `Wiki
| `URL
]
let string_of_typ (typ : typ) =
match typ with
| `Markdown -> "markdown"
| `Wiki -> "wiki"
| `URL -> "url"
let typ_of_string (s : string) : typ option =
match String.lowercase_ascii s with
| "markdown" -> Some `Markdown
| "wiki" -> Some `Wiki
| "url" -> Some `URL
| _ -> None
type t = {
start_pos : int;
end_inc_pos : int;
typ : typ;
link : string;
}
================================================
FILE: lib/misc_utils.ml
================================================
let exit_with_error_msg (msg : string) =
Printf.printf "error: %s\n" msg;
exit 1
let ci_string_set_of_list (l : string list) =
l
|> List.map String.lowercase_ascii
|> String_set.of_list
let first_n_chars_of_string_contains ~n s c =
let s_len = String.length s in
let s =
if s_len <= n then
s
else
String.sub s 0 n
in
String.contains s c
let char_is_usable c =
let code = Char.code c in
(0x20 <= code && code <= 0x7E)
let sanitize_string s =
let s_len = String.length s in
let bytes = Bytes.make s_len ' ' in
let rec aux pos =
if pos >= s_len then
String.of_bytes bytes
else (
let decode = String.get_utf_8_uchar s pos in
if Uchar.utf_decode_is_valid decode then (
let c = String.get_uint8 s pos in
if c land 0b1000_0000 = 0b0000_0000 then (
if 0x20 <= c && c <= 0x7E then (
Bytes.blit_string s pos bytes pos 1
);
aux (pos+1)
) else (
let len = Uchar.utf_decode_length decode in
Bytes.blit_string s pos bytes pos len;
aux (pos+len)
)
) else (
aux (pos+1)
)
)
in
aux 0
let length_and_list_of_seq (s : 'a Seq.t) : int * 'a list =
let len, acc =
Seq.fold_left (fun (len, acc) x ->
(len + 1, x :: acc)
)
(0, [])
s
in
(len, List.rev acc)
let div_round_to_closest x y =
(x + (y / 2)) / y
let div_round_up x y =
(x + (y - 1)) / y
let opening_closing_symbol_pairs (l : string list) : (int * int) list =
let _, pairs =
CCList.foldi
(fun ((m, pairs) : (int list Char_map.t) * ((int * int) list)) i s ->
if String.length s = 1 then (
let c = String.get s 0 in
match List.assoc_opt c Params.opening_closing_symbols with
| Some _ -> (
let stack =
match Char_map.find_opt c m with
| None -> []
| Some l -> l
in
(Char_map.add c (i :: stack) m, pairs)
)
| None -> (
match List.assoc_opt c Params.opening_closing_symbols_flipped with
| Some corresponding_open_symbol -> (
let stack =
match Char_map.find_opt corresponding_open_symbol m with
| None -> []
| Some l -> l
in
match stack with
| [] -> (m, pairs)
| x :: xs -> (
(Char_map.add corresponding_open_symbol xs m, (x, i) :: pairs)
)
)
| None -> (m, pairs)
)
) else (
(m, pairs)
)
)
(Char_map.empty, [])
l
in
pairs
let cwd_path_parts () =
Sys.getcwd ()
|> CCString.split ~by:Filename.dir_sep
|> List.rev
let path_of_parts parts =
match List.rev parts with
| [] | [ "" ] -> Filename.dir_sep
| [ x ] -> x
| l -> String.concat Filename.dir_sep l
let normalize_glob_to_absolute glob =
let rec aux acc parts =
match parts with
| [] -> path_of_parts acc
| x :: xs -> (
match x with
| "" | "." -> aux acc xs
| ".." -> (
let acc =
match acc with
| [] -> []
| _ :: xs -> xs
in
aux acc xs
)
| "**" -> (
aux (List.rev parts @ acc) []
)
| _ -> (
aux (x :: acc) xs
)
)
in
let glob_parts = CCString.split ~by:Filename.dir_sep glob in
match glob_parts with
| "" :: l -> (
(* Absolute path on Unix-like systems *)
aux [ "" ] l
)
| _ -> (
aux (cwd_path_parts ()) glob_parts
)
let normalize_path_to_absolute path =
let rec aux acc path_parts =
match path_parts with
| [] -> path_of_parts acc
| x :: xs -> (
match x with
| "" | "." -> aux acc xs
| ".." -> (
let acc =
match acc with
| [] -> []
| _ :: xs -> xs
in
aux acc xs
)
| _ -> (
aux (x :: acc) xs
)
)
in
let path_parts = CCString.split ~by:Filename.dir_sep path in
match path_parts with
| "" :: l -> (
(* Absolute path on Unix-like systems *)
aux [ "" ] l
)
| _ -> (
aux (cwd_path_parts ()) path_parts
)
================================================
FILE: lib/option_syntax.ml
================================================
let ( let* ) = Option.bind
let ( let+ ) x y = Option.map y x
================================================
FILE: lib/params.ml
================================================
let default_search_result_total_per_document = 50
let search_result_min_per_start = 5
let max_token_size = 500
let default_max_token_search_dist = 50
let max_token_search_dist = ref default_max_token_search_dist
let default_max_linked_token_search_dist = 2
let max_linked_token_search_dist = ref default_max_linked_token_search_dist
let default_index_chunk_size = 5000
let index_chunk_size = ref default_index_chunk_size
let search_word_automaton_cache_size = 200
let float_compare_margin = 0.000_001
let opening_closing_symbols = [ ('(', ')')
; ('[', ']')
; ('{', '}')
]
let opening_closing_symbols_flipped = List.map (fun (x, y) -> (y, x)) opening_closing_symbols
let default_max_fuzzy_edit_dist = 2
let max_fuzzy_edit_dist = ref default_max_fuzzy_edit_dist
let db_schema =
{|
CREATE TABLE IF NOT EXISTS line_info (
doc_id INTEGER,
global_line_num INTEGER,
start_pos INTEGER,
end_inc_pos INTEGER,
page_num INTEGER,
line_num_in_page INTEGER,
PRIMARY KEY (doc_id, global_line_num)
) WITHOUT ROWID;
CREATE INDEX IF NOT EXISTS line_info_index_1 ON line_info (start_pos);
CREATE INDEX IF NOT EXISTS line_info_index_2 ON line_info (end_inc_pos);
CREATE TABLE IF NOT EXISTS position (
doc_id INTEGER,
pos INTEGER,
word_id INTEGER,
PRIMARY KEY (doc_id, pos)
) WITHOUT ROWID;
CREATE INDEX IF NOT EXISTS position_index_1 ON position (doc_id, word_id, pos);
CREATE TABLE IF NOT EXISTS page_info (
doc_id INTEGER,
page_num INTEGER,
line_count INTEGER,
start_pos INTEGER,
end_inc_pos INTEGER,
PRIMARY KEY (doc_id, page_num)
) WITHOUT ROWID;
CREATE TABLE IF NOT EXISTS doc_info (
hash TEXT PRIMARY KEY,
id INTEGER,
page_count INTEGER,
global_line_count INTEGER,
max_pos INTEGER,
last_used INTEGER,
status TEXT
) WITHOUT ROWID;
CREATE INDEX IF NOT EXISTS doc_info_index_1 ON doc_info (id);
CREATE INDEX IF NOT EXISTS doc_info_index_2 ON doc_info (last_used);
CREATE TABLE IF NOT EXISTS link (
doc_id INTEGER,
start_pos INTEGER,
end_inc_pos INTEGER,
typ TEXT,
link TEXT,
PRIMARY KEY (doc_id, start_pos, end_inc_pos)
) WITHOUT ROWID;
CREATE TABLE IF NOT EXISTS word (
id INTEGER,
word TEXT,
PRIMARY KEY (id)
) WITHOUT ROWID;
CREATE INDEX IF NOT EXISTS word_index_1 ON word (word);
CREATE INDEX IF NOT EXISTS word_index_2 ON word (word COLLATE NOCASE);
CREATE TABLE IF NOT EXISTS word_id_doc_id_link (
word_id INTEGER,
doc_id INTEGER,
PRIMARY KEY (word_id, doc_id)
) WITHOUT ROWID;
CREATE INDEX IF NOT EXISTS word_id_doc_id_link_index_1 ON word_id_doc_id_link (doc_id);
|}
let db_path : string option ref = ref None
================================================
FILE: lib/parser_components.ml
================================================
open Angstrom
let is_space c =
match c with
| ' '
| '\t'
| '\n'
| '\r' -> true
| _ -> false
let skip_spaces = skip_while is_space
let is_not_space c = not (is_space c)
let any_string : string t = take_while (fun _ -> true)
let is_letter c =
match c with
| 'A'..'Z'
| 'a'..'z' -> true
| _ -> false
let is_digit c =
match c with
| '0'..'9' -> true
| _ -> false
let is_alphanum c =
is_letter c || is_digit c
let is_possibly_utf_8 c =
let c = Char.code c in
c land 0b1000_0000 <> 0b0000_0000
let utf_8_char =
peek_char >>= fun c ->
match c with
| None -> fail "Eof"
| Some c -> (
let c = Char.code c in
if c land 0b1000_0000 = 0b0000_0000 then (
take 1
) else if c land 0b1110_0000 = 0b1100_0000 then (
take 2
) else if c land 0b1111_0000 = 0b1110_0000 then (
take 3
) else if c land 0b1111_1000 = 0b1111_0000 then (
take 4
) else (
fail "Invalid UTF-8"
)
)
(* Copied from Angstrom README. *)
let chainl1 e op =
let rec go acc =
(lift2 (fun f x -> f acc x) op e >>= go) <|> return acc in
e >>= fun init -> go init
================================================
FILE: lib/search_exp.ml
================================================
type match_typ_marker = [ `Exact | `Prefix | `Suffix ]
[@@deriving show]
type exp = [
| `Word of string
| `Match_typ_marker of match_typ_marker
| `Explicit_spaces
| `List of exp list
| `Paren of exp
| `Binary_op of binary_op * exp * exp
| `Optional of exp
]
[@@deriving show]
and binary_op =
| Or
[@@deriving show]
type t = {
exp : exp;
flattened : Search_phrase.t list;
}
[@@deriving show]
let flattened (t : t) = t.flattened
let empty : t = {
exp = `List [];
flattened = [];
}
let is_empty (t : t) =
t.flattened = []
||
List.for_all Search_phrase.is_empty t.flattened
let equal (t1 : t) (t2 : t) =
let rec aux (e1 : exp) (e2 : exp) =
match e1, e2 with
| `Word x1, `Word x2 -> String.equal x1 x2
| `List l1, `List l2 -> List.equal aux l1 l2
| `Paren e1, `Paren e2 -> aux e1 e2
| `Binary_op (Or, e1x, e1y), `Binary_op (Or, e2x, e2y) ->
aux e1x e2x && aux e1y e2y
| `Optional e1, `Optional e2 -> aux e1 e2
| _, _ -> false
in
aux t1.exp t2.exp
let as_paren x : exp = `Paren x
let as_list l : exp = `List l
let as_word s : exp = `Word s
let as_word_list (l : string list) : exp = `List (List.map as_word l)
module Parsers = struct
open Angstrom
open Parser_components
let phrase : string list Angstrom.t =
many1 (
take_while1 (fun c ->
match c with
| '?'
| '|'
| '\\'
| '('
| ')'
| '\''
| '^'
| '$'
| '~' -> false
| _ -> true
)
<|>
(char '\\' *> any_char >>| fun c -> Printf.sprintf "%c" c)
)
>>| fun l ->
String.concat "" l
|> Tokenization.tokenize ~drop_spaces:false
|> List.of_seq
let or_op =
char '|' *> skip_spaces *> return (fun x y -> `Binary_op (Or, x, y))
let p : exp Angstrom.t =
fix (fun (exp : exp Angstrom.t) : exp Angstrom.t ->
let base =
choice [
(phrase >>| as_word_list);
(char '\'' *> return (`Match_typ_marker `Exact));
(char '^' *> return (`Match_typ_marker `Prefix));
(char '$' *> return (`Match_typ_marker `Suffix));
(char '~' *> return (`Explicit_spaces));
(string "()" *> return (as_word_list []));
(char '(' *> exp <* char ')' >>| as_paren);
]
in
let opt_base =
choice [
(char '?' *> skip_spaces *> phrase
>>| fun l ->
match l with
| [] -> failwith "unexpected case"
| x :: xs -> (
as_list [ `Optional (as_word x); as_word_list xs ]
)
);
(char '?' *> skip_spaces *> base >>| fun p -> `Optional p);
base;
]
in
let opt_bases =
many1 opt_base
>>| fun l -> `List l
in
chainl1 opt_bases or_op
)
<* skip_spaces
end
let flatten_nested_lists (exp : exp) : exp =
let rec aux (exp : exp) =
match exp with
| `Word _
| `Match_typ_marker _
| `Explicit_spaces -> exp
| `List l -> (
`List
(CCList.flat_map (fun e ->
match aux e with
| `List l -> l
| x -> [ x ]
) l)
)
| `Paren e -> `Paren (aux e)
| `Binary_op (op, x, y) -> `Binary_op (op, aux x, aux y)
| `Optional e -> `Optional (aux e)
in
aux exp
let flatten (exp : exp) : Search_phrase.t list =
let get_group_id =
let counter = ref 0 in
fun () ->
let x = !counter in
counter := x + 1;
x
in
let rec aux group_id (exp : exp) : Search_phrase.annotated_token list Seq.t =
match exp with
| `Match_typ_marker x -> (
Seq.return [
Search_phrase.{ data = `Match_typ_marker x; group_id }
]
)
| `Word s ->
Seq.return [
Search_phrase.{ data = `String s; group_id }
]
| `Explicit_spaces ->
Seq.return [
Search_phrase.{ data = `Explicit_spaces; group_id }
]
| `List l -> (
l
|> List.to_seq
|> Seq.map (aux group_id)
|> OSeq.cartesian_product
|> Seq.map List.concat
)
| `Paren e -> (
aux (get_group_id ()) e
)
| `Binary_op (Or, x, y) -> (
Seq.append
(aux group_id x)
(aux group_id y)
)
| `Optional x -> (
Seq.cons [] (aux (get_group_id ()) x)
)
in
aux (get_group_id ()) exp
|> Seq.map (fun l ->
List.to_seq l
|> Search_phrase.of_annotated_tokens)
|> List.of_seq
|> List.sort_uniq Search_phrase.compare
let parse s =
if String.length s = 0 || String.for_all Parser_components.is_space s then (
Some empty
) else (
match Angstrom.(parse_string ~consume:Consume.All) Parsers.p s with
| Ok exp -> (
let exp = flatten_nested_lists exp in
Some
{ exp;
flattened = flatten exp;
}
)
| Error _ -> None
)
================================================
FILE: lib/search_exp.mli
================================================
type t
val pp : Format.formatter -> t -> unit
val empty : t
val is_empty : t -> bool
val flattened : t -> Search_phrase.t list
val parse : string -> t option
val equal : t -> t -> bool
================================================
FILE: lib/search_phrase.ml
================================================
type match_typ = [
| `Fuzzy
| `Exact
| `Suffix
| `Prefix
]
[@@deriving show, ord]
type match_typ_marker = [ `Exact | `Prefix | `Suffix ]
[@@deriving show, ord]
let char_of_match_typ_marker (x : match_typ_marker) =
match x with
| `Exact -> '\''
| `Prefix -> '^'
| `Suffix -> '$'
let string_of_match_typ_marker (x : match_typ_marker) =
match x with
| `Exact -> "\'"
| `Prefix -> "^"
| `Suffix -> "$"
type annotated_token = {
data : [
| `String of string
| `Match_typ_marker of match_typ_marker
| `Explicit_spaces
];
group_id : int;
}
[@@deriving show]
type ir0 = {
data : [
| `String of string
| `Match_typ_marker of match_typ_marker
| `Explicit_spaces
];
is_linked_to_prev : bool;
is_linked_to_next : bool;
match_typ : match_typ option;
}
module Enriched_token = struct
type data = [ `String of string | `Explicit_spaces ]
[@@deriving ord]
module Data_map = Map.Make (struct
type t = data
let compare = compare_data
end)
let pp_data formatter data =
Fmt.pf formatter "%s"
(match data with
| `String s -> s
| `Explicit_spaces -> " ")
type t = {
data : data;
is_linked_to_prev : bool;
is_linked_to_next : bool;
automaton : Spelll.automaton;
match_typ : match_typ;
}
let make data ~is_linked_to_prev ~is_linked_to_next automaton match_typ =
{ data; is_linked_to_prev; is_linked_to_next; automaton; match_typ }
let pp fmt (x : t) =
Fmt.pf fmt "%a:%b:%b:%a"
pp_data
x.data
x.is_linked_to_prev
x.is_linked_to_next
pp_match_typ
x.match_typ
let data (t : t) =
t.data
let match_typ (t : t) =
t.match_typ
let automaton (t : t) =
t.automaton
let is_linked_to_prev (t : t) =
t.is_linked_to_prev
let is_linked_to_next (t : t) =
t.is_linked_to_next
let compare (x : t) (y : t) =
match compare_data x.data y.data with
| 0 -> (
match Bool.compare x.is_linked_to_prev y.is_linked_to_prev with
| 0 -> (
match Bool.compare x.is_linked_to_next y.is_linked_to_next with
| 0 -> (
compare_match_typ x.match_typ y.match_typ
)
| n -> n
)
| n -> n
)
| n -> n
let equal (x : t) (y : t) =
compare x y = 0
let compatible_with_word (token : t) indexed_word =
String.length indexed_word > 0
&&
(match data token with
| `Explicit_spaces -> (
Parser_components.is_space indexed_word.[0]
)
| `String search_word -> (
let search_word_ci = String.lowercase_ascii search_word in
let indexed_word_ci = String.lowercase_ascii indexed_word in
let use_ci_match = String.equal search_word search_word_ci in
let search_word_len = String.length search_word in
let indexed_word_len = String.length indexed_word in
let edit_dist_based_match_min_len = !Params.max_fuzzy_edit_dist + 1 in
if Parser_components.is_possibly_utf_8 indexed_word.[0] then (
String.equal search_word indexed_word
) else (
match match_typ token with
| `Fuzzy -> (
String.equal search_word_ci indexed_word_ci
|| CCString.find ~sub:search_word_ci indexed_word_ci >= 0
|| (indexed_word_len >= 2
&& CCString.find ~sub:indexed_word_ci search_word_ci >= 0)
|| (search_word_len >= edit_dist_based_match_min_len
&& indexed_word_len >= edit_dist_based_match_min_len
&& Misc_utils.first_n_chars_of_string_contains ~n:3 indexed_word_ci search_word_ci.[0]
&& Spelll.match_with (automaton token) indexed_word_ci)
)
| `Exact -> (
if use_ci_match then (
String.equal search_word_ci indexed_word_ci
) else (
String.equal search_word indexed_word
)
)
| `Prefix -> (
if use_ci_match then (
CCString.prefix ~pre:search_word_ci indexed_word_ci
) else (
CCString.prefix ~pre:search_word indexed_word
)
)
| `Suffix -> (
if use_ci_match then (
CCString.suffix ~suf:search_word_ci indexed_word_ci
) else (
CCString.suffix ~suf:search_word indexed_word
)
)
)
)
)
end
type t = {
annotated_tokens : annotated_token list;
enriched_tokens : Enriched_token.t list;
}
let is_empty (t : t) =
List.is_empty t.enriched_tokens
let pp fmt (t : t) =
Fmt.pf fmt "%a"
Fmt.(list ~sep:sp Enriched_token.pp)
t.enriched_tokens
type cache = {
mutex : Eio.Mutex.t;
cache : (string, Spelll.automaton) CCCache.t;
}
let cache = {
mutex = Eio.Mutex.create ();
cache = CCCache.lru ~eq:String.equal Params.search_word_automaton_cache_size;
}
let compare (t1 : t) (t2 : t) =
List.compare Enriched_token.compare t1.enriched_tokens t2.enriched_tokens
let equal (t1 : t) (t2 : t) =
compare t1 t2 = 0
let empty : t =
{
annotated_tokens = [];
enriched_tokens = [];
}
let ir0_s_of_annotated_tokens (tokens : annotated_token Seq.t) : ir0 list =
let token_is_space (token : annotated_token) =
match token.data with
| `String s -> Parser_components.is_space (String.get s 0)
| _ -> false
in
let rec aux acc (prev_token : annotated_token option) (tokens : annotated_token Seq.t) =
match tokens () with
| Seq.Nil -> List.rev acc
| Seq.Cons (token, rest) -> (
let is_linked_to_prev =
match prev_token with
| None -> false
| Some prev_token -> (
(prev_token.group_id = token.group_id)
&&
(not (token_is_space prev_token))
)
in
if token_is_space token then (
aux acc None rest
) else (
let ir0 : ir0 =
{ data = token.data;
is_linked_to_prev;
is_linked_to_next = false;
match_typ = None;
}
in
aux (ir0 :: acc) (Some token) rest
)
)
in
aux [] None tokens
let ir0_s_link_forward (ir0_s : ir0 list) : ir0 list =
List.rev ir0_s
|> List.fold_left (fun (acc, next) x ->
match next with
| None -> (x :: acc, Some x)
| Some next -> (
let x = { x with is_linked_to_next = next.is_linked_to_prev } in
(x :: acc, Some x)
)
)
([], None)
|> fst
let ir0_process_exact_prefix_match_typ_markers (ir0_s : ir0 list) : ir0 list =
let rec aux
(acc : ir0 list)
token_removed
(marker : ([ `Exact ] * [ `Exact | `Prefix ]) option)
(ir0_s : ir0 list)
=
match ir0_s with
| [] -> List.rev acc
| x :: xs -> (
match marker with
| None -> (
let default () =
aux (x :: acc) false None xs
in
match x.data with
| `String _ | `Explicit_spaces ->
default ()
| `Match_typ_marker m -> (
match x.match_typ with
| None -> (
if x.is_linked_to_next then (
match m with
| `Exact ->
aux acc true (Some (`Exact, `Exact)) xs
| `Prefix ->
aux acc true (Some (`Exact, `Prefix)) xs
| _ ->
default ()
) else (
default ()
)
)
| Some _ ->
default ()
)
)
| Some (m, m_last) -> (
let x =
if x.is_linked_to_prev then (
{ x with
is_linked_to_prev = not token_removed;
match_typ = Some (
if x.is_linked_to_next then
(m :> match_typ)
else
(m_last :> match_typ)
);
}
) else (
x
)
in
let marker =
if x.is_linked_to_next then
marker
else
None
in
aux (x :: acc) false marker xs
)
)
in
aux [] false None ir0_s
let ir0_process_suffix_match_typ_markers (ir0_s : ir0 list) : ir0 list =
let rec aux
(acc : ir0 list)
token_removed
(marker : ([ `Suffix ] * [ `Exact ]) option)
(ir0_s : ir0 list)
=
match ir0_s with
| [] -> acc
| x :: xs -> (
match marker with
| None -> (
let default () =
aux (x :: acc) false None xs
in
match x.data with
| `String _ | `Explicit_spaces ->
default ()
| `Match_typ_marker m -> (
match x.match_typ with
| None -> (
if x.is_linked_to_prev then (
match m with
| `Suffix ->
aux acc true (Some (`Suffix, `Exact)) xs
| _ ->
default ()
) else (
default ()
)
)
| Some _ ->
default ()
)
)
| Some (m_first, m) -> (
let x =
if x.is_linked_to_next then (
{ x with
is_linked_to_next = not token_removed;
match_typ = Some (
if x.is_linked_to_prev then
(m :> match_typ)
else
(m_first :> match_typ)
);
}
) else (
x
)
in
let marker =
if x.is_linked_to_prev then
marker
else
None
in
aux (x :: acc) false marker xs
)
)
in
aux [] false None (List.rev ir0_s)
let enriched_tokens_of_ir0 (ir0_s : ir0 list) : Enriched_token.t list =
List.map (fun (ir0 : ir0) ->
let data =
match ir0.data with
| `String s -> `String s
| `Match_typ_marker m ->
`String (string_of_match_typ_marker m)
| `Explicit_spaces -> `Explicit_spaces
in
let is_linked_to_prev = ir0.is_linked_to_prev in
let is_linked_to_next = ir0.is_linked_to_next in
let automaton =
match data with
| `String string -> (
Eio.Mutex.use_rw cache.mutex ~protect:false (fun () ->
let automaton =
CCCache.with_cache cache.cache
(Spelll.of_string ~limit:!Params.max_fuzzy_edit_dist)
string
in
automaton
)
)
| `Explicit_spaces ->
Spelll.of_string ~limit:0 ""
in
Enriched_token.make
data
~is_linked_to_prev
~is_linked_to_next
automaton
(Option.value ~default:`Fuzzy ir0.match_typ)
) ir0_s
let of_annotated_tokens
(annotated_tokens : annotated_token Seq.t)
=
let enriched_tokens =
annotated_tokens
|> ir0_s_of_annotated_tokens
|> ir0_s_link_forward
|> ir0_process_exact_prefix_match_typ_markers
|> ir0_process_suffix_match_typ_markers
|> enriched_tokens_of_ir0
in
{
annotated_tokens = List.of_seq annotated_tokens;
enriched_tokens;
}
let of_tokens
(tokens : string Seq.t)
=
tokens
|> Seq.map (fun s -> { data = `String s; group_id = 0 })
|> of_annotated_tokens
let parse phrase =
phrase
|> Tokenization.tokenize ~drop_spaces:false
|> of_tokens
let annotated_tokens t =
t.annotated_tokens
let enriched_tokens t =
t.enriched_tokens
================================================
FILE: lib/search_phrase.mli
================================================
type match_typ = [
| `Fuzzy
| `Exact
| `Suffix
| `Prefix
]
[@@deriving show, ord]
type match_typ_marker = [ `Exact | `Prefix | `Suffix ]
[@@deriving show]
type annotated_token = {
data : [
| `String of string
| `Match_typ_marker of match_typ_marker
| `Explicit_spaces
];
group_id : int;
}
[@@deriving show]
module Enriched_token : sig
type data = [ `String of string | `Explicit_spaces ]
[@@deriving ord]
module Data_map : Map.S with type key = data
type t
val make :
data ->
is_linked_to_prev:bool ->
is_linked_to_next:bool ->
Spelll.automaton ->
match_typ ->
t
val data : t -> data
val equal : t -> t -> bool
val pp : Format.formatter -> t -> unit
val match_typ : t -> match_typ
val is_linked_to_prev : t -> bool
val is_linked_to_next : t -> bool
val automaton : t -> Spelll.automaton
val compatible_with_word : t -> string -> bool
end
type t
val empty : t
val compare : t -> t -> int
val equal : t -> t -> bool
val pp : Format.formatter -> t -> unit
val of_annotated_tokens : annotated_token Seq.t -> t
val of_tokens : string Seq.t -> t
val parse : string -> t
val is_empty : t -> bool
val annotated_tokens : t -> annotated_token list
val enriched_tokens : t -> Enriched_token.t list
================================================
FILE: lib/search_result.ml
================================================
type indexed_found_word = {
found_word_pos : int;
found_word_ci : string;
found_word : string;
}
type t = {
score : float;
search_phrase : Search_phrase.t;
found_phrase : indexed_found_word list;
}
let equal t1 t2 =
Search_phrase.equal t1.search_phrase t2.search_phrase
&& List.length t1.found_phrase = List.length t2.found_phrase
&& List.for_all2 (fun x1 x2 -> x1.found_word_pos = x2.found_word_pos)
t1.found_phrase t2.found_phrase
module Score = struct
module ET = Search_phrase.Enriched_token
type stats = {
total_search_char_count : float;
total_found_char_count : float;
exact_match_found_char_count : float;
ci_exact_match_found_char_count : float;
sub_match_search_word_in_found_word_char_count : float;
sub_match_found_word_in_search_word_char_count : float;
sub_match_search_char_count : float;
sub_match_found_char_count : float;
ci_sub_match_search_word_in_found_word_char_count : float;
ci_sub_match_found_word_in_search_word_char_count : float;
ci_sub_match_search_char_count : float;
ci_sub_match_found_char_count : float;
fuzzy_match_edit_distance : float;
fuzzy_match_search_char_count : float;
fuzzy_match_found_char_count : float;
}
let empty_stats = {
total_search_char_count = 0.0;
total_found_char_count = 0.0;
exact_match_found_char_count = 0.0;
ci_exact_match_found_char_count = 0.0;
sub_match_search_word_in_found_word_char_count = 0.0;
sub_match_found_word_in_search_word_char_count = 0.0;
sub_match_search_char_count = 0.0;
sub_match_found_char_count = 0.0;
ci_sub_match_search_word_in_found_word_char_count = 0.0;
ci_sub_match_found_word_in_search_word_char_count = 0.0;
ci_sub_match_search_char_count = 0.0;
ci_sub_match_found_char_count = 0.0;
fuzzy_match_edit_distance = 0.0;
fuzzy_match_search_char_count = 0.0;
fuzzy_match_found_char_count = 0.0;
}
let score
(search_phrase : Search_phrase.t)
~(found_phrase : indexed_found_word list)
~(found_phrase_opening_closing_symbol_match_count : int)
: float =
assert (not (Search_phrase.is_empty search_phrase));
assert (List.length (Search_phrase.enriched_tokens search_phrase) = List.length found_phrase);
let found_phrase_opening_closing_symbol_match_count =
Int.to_float found_phrase_opening_closing_symbol_match_count
in
let quite_close_to_zero x =
Float.abs x < Params.float_compare_margin
in
let add_sub_match_search_and_found_char_count ~search_word_len ~found_word_len stats =
{ stats with
sub_match_search_char_count =
stats.sub_match_search_char_count +. search_word_len;
sub_match_found_char_count =
stats.sub_match_found_char_count +. found_word_len;
}
in
let add_ci_sub_match_search_and_found_char_count ~search_word_len ~found_word_len stats =
{ stats with
ci_sub_match_search_char_count =
stats.ci_sub_match_search_char_count +. search_word_len;
ci_sub_match_found_char_count =
stats.ci_sub_match_found_char_count +. found_word_len;
}
in
let stats =
List.fold_left2 (fun (stats : stats) (token : ET.t) { found_word_ci; found_word; _ } ->
let found_word_len = Int.to_float (String.length found_word) in
match ET.data token with
| `Explicit_spaces -> (
{ stats with
total_search_char_count =
stats.total_search_char_count +. 1.0;
total_found_char_count =
stats.total_found_char_count +. 1.0;
exact_match_found_char_count =
stats.exact_match_found_char_count +. 1.0;
}
)
| `String search_word -> (
let search_word_len = Int.to_float (String.length search_word) in
let search_word_ci = String.lowercase_ascii search_word in
let stats =
{ stats with
total_search_char_count =
stats.total_search_char_count +. search_word_len;
total_found_char_count =
stats.total_found_char_count +. found_word_len;
}
in
if String.equal search_word found_word then (
{ stats with
exact_match_found_char_count =
stats.exact_match_found_char_count +. found_word_len;
}
) else if String.equal search_word_ci found_word_ci then (
{ stats with
ci_exact_match_found_char_count =
stats.ci_exact_match_found_char_count +. found_word_len;
}
) else if CCString.find ~sub:search_word found_word >= 0 then (
{ stats with
sub_match_search_word_in_found_word_char_count =
stats.sub_match_search_word_in_found_word_char_count +. search_word_len;
}
|> add_sub_match_search_and_found_char_count
~search_word_len
~found_word_len
) else if CCString.find ~sub:found_word search_word >= 0 then (
{ stats with
sub_match_found_word_in_search_word_char_count =
stats.sub_match_found_word_in_search_word_char_count +. found_word_len;
}
|> add_sub_match_search_and_found_char_count
~search_word_len
~found_word_len
) else if CCString.find ~sub:search_word_ci found_word_ci >= 0 then (
{ stats with
ci_sub_match_search_word_in_found_word_char_count =
stats.ci_sub_match_search_word_in_found_word_char_count +. search_word_len;
}
|> add_ci_sub_match_search_and_found_char_count
~search_word_len
~found_word_len
) else if CCString.find ~sub:found_word_ci search_word_ci >= 0 then (
{ stats with
ci_sub_match_found_word_in_search_word_char_count =
stats.ci_sub_match_found_word_in_search_word_char_count +. found_word_len;
}
|> add_ci_sub_match_search_and_found_char_count
~search_word_len
~found_word_len
) else (
{ stats with
fuzzy_match_edit_distance =
stats.fuzzy_match_edit_distance
+. Int.to_float (Spelll.edit_distance search_word_ci found_word_ci);
fuzzy_match_search_char_count =
stats.fuzzy_match_search_char_count +. search_word_len;
fuzzy_match_found_char_count =
stats.fuzzy_match_found_char_count +. found_word_len;
}
)
)
)
empty_stats
(Search_phrase.enriched_tokens search_phrase)
found_phrase
in
let search_phrase_length =
Search_phrase.enriched_tokens search_phrase
|> List.length
|> Int.to_float
in
let unique_match_count =
found_phrase
|> List.map (fun x -> x.found_word_pos)
|> List.sort_uniq Int.compare
|> List.length
|> Int.to_float
in
let (total_distance, out_of_order_match_count, _) =
List.fold_left
(fun (total_dist, out_of_order_match_count, last_pos) { found_word_pos = pos; _ } ->
match last_pos with
| None -> (total_dist, out_of_order_match_count, Some pos)
| Some last_pos ->
let total_dist = total_dist +. Int.to_float (abs (pos - last_pos)) in
let out_of_order_match_count =
if last_pos <= pos then
out_of_order_match_count
else
out_of_order_match_count +. 1.0
in
(total_dist, out_of_order_match_count, Some pos)
)
(0.0, 0.0, None)
found_phrase
in
let average_distance =
let gaps = unique_match_count -. 1.0 in
assert (gaps >= 0.0);
if quite_close_to_zero gaps then (
0.0
) else (
total_distance /. gaps
)
in
let exact_match_score =
if quite_close_to_zero stats.exact_match_found_char_count then (
0.0
) else (
1.0
)
in
let ci_exact_match_score =
if quite_close_to_zero stats.ci_exact_match_found_char_count then (
0.0
) else (
1.0
)
in
let sub_match_score_s_in_f =
if quite_close_to_zero stats.sub_match_found_char_count then (
0.0
) else (
stats.sub_match_search_word_in_found_word_char_count
/.
stats.sub_match_found_char_count
)
in
let sub_match_score_f_in_s =
if quite_close_to_zero stats.sub_match_search_char_count then (
0.0
) else (
stats.sub_match_found_word_in_search_word_char_count
/.
stats.sub_match_search_char_count
)
in
let ci_sub_match_score_s_in_f =
if quite_close_to_zero stats.ci_sub_match_found_char_count then (
0.0
) else (
stats.ci_sub_match_search_word_in_found_word_char_count
/.
stats.ci_sub_match_found_char_count
)
in
let ci_sub_match_score_f_in_s =
if quite_close_to_zero stats.ci_sub_match_search_char_count then (
0.0
) else (
stats.ci_sub_match_found_word_in_search_word_char_count
/.
stats.ci_sub_match_search_char_count
)
in
let fuzzy_match_score =
if quite_close_to_zero stats.fuzzy_match_search_char_count then (
0.0
) else (
1.0
-.
(stats.fuzzy_match_edit_distance
/.
stats.fuzzy_match_search_char_count)
)
in
let total_char_count =
stats.total_search_char_count +. stats.total_found_char_count
in
let case_sensitive_bonus_multiplier =
if
List.exists
(fun token ->
match ET.data token with
| `Explicit_spaces -> false
| `String search_word -> (
not
(String.equal
(String.lowercase_ascii search_word)
search_word))
)
(Search_phrase.enriched_tokens search_phrase)
then (
1.10
) else (
1.00
)
in
let exact_match_weight =
case_sensitive_bonus_multiplier
*.
(stats.exact_match_found_char_count *. 2.0) /. total_char_count
in
let ci_exact_match_weight =
(stats.ci_exact_match_found_char_count *. 2.0) /. total_char_count
in
let f_in_s_penalty_multiplier = 0.8 in
let sub_match_weight_s_in_f =
case_sensitive_bonus_multiplier
*.
stats.sub_match_found_char_count /. stats.total_found_char_count
in
let sub_match_weight_f_in_s =
case_sensitive_bonus_multiplier
*.
f_in_s_penalty_multiplier
*.
(stats.sub_match_search_char_count /. stats.total_search_char_count)
in
let ci_sub_match_weight_s_in_f =
(stats.ci_sub_match_found_char_count /. stats.total_found_char_count)
in
let ci_sub_match_weight_f_in_s =
f_in_s_penalty_multiplier
*.
(stats.ci_sub_match_search_char_count /. stats.total_search_char_count)
in
let fuzzy_match_weight =
(stats.fuzzy_match_found_char_count *. 2.0) /. total_char_count
in
let distance_score =
if average_distance <= 1.0 then (
1.0
) else (
1.0 -. (0.20 *. (average_distance /. (Int.to_float !Params.max_token_search_dist)))
)
in
(1.0 +. (0.10 *. search_phrase_length))
*.
(1.0 +. (0.10 *. (unique_match_count /. search_phrase_length)))
*.
(1.0 -. (0.10 *. (out_of_order_match_count /. search_phrase_length)))
*.
(1.00 +. (0.10 *. found_phrase_opening_closing_symbol_match_count))
*.
distance_score
*.
(
(exact_match_weight *. exact_match_score)
+.
(ci_exact_match_weight *. ci_exact_match_score)
+.
(sub_match_weight_s_in_f *. sub_match_score_s_in_f)
+.
(sub_match_weight_f_in_s *. sub_match_score_f_in_s)
+.
(ci_sub_match_weight_s_in_f *. ci_sub_match_score_s_in_f)
+.
(ci_sub_match_weight_f_in_s *. ci_sub_match_score_f_in_s)
+.
(fuzzy_match_weight *. fuzzy_match_score)
)
end
let make search_phrase ~found_phrase ~found_phrase_opening_closing_symbol_match_count =
let len = List.length (Search_phrase.enriched_tokens search_phrase) in
if len = 0 then (
invalid_arg "Search_result.make search_phrase is empty"
) else if len <> List.length found_phrase then (
invalid_arg "length of found_phrase does not match length of search_phrase"
) else (
let score = Score.score search_phrase ~found_phrase ~found_phrase_opening_closing_symbol_match_count in
{ score; search_phrase; found_phrase }
)
let search_phrase (t : t) =
t.search_phrase
let found_phrase (t : t) =
t.found_phrase
let score (t : t) =
t.score
let compare_relevance (t1 : t) (t2 : t) =
(* Order the more relevant result to the front. *)
if Float.abs (t1.score -. t2.score) < Params.float_compare_margin then (
(* If scores are within the comparison margin,
then order result that appears earlier to the front. *)
let t1_found_phrase_start_pos = (List.hd t1.found_phrase).found_word_pos in
let t2_found_phrase_start_pos = (List.hd t2.found_phrase).found_word_pos in
Int.compare t1_found_phrase_start_pos t2_found_phrase_start_pos
) else (
(* Otherwise just order result with higher score to the front. *)
Float.compare t2.score t1.score
)
================================================
FILE: lib/search_result.mli
================================================
type indexed_found_word = {
found_word_pos : int;
found_word_ci : string;
found_word : string;
}
type t
val make :
Search_phrase.t ->
found_phrase:indexed_found_word list ->
found_phrase_opening_closing_symbol_match_count:int ->
t
val search_phrase : t -> Search_phrase.t
val found_phrase : t -> indexed_found_word list
val score : t -> float
val equal : t -> t -> bool
val compare_relevance : t -> t -> int
================================================
FILE: lib/search_result_heap.ml
================================================
include CCHeap.Make (struct
type t = Search_result.t
let leq x y =
(Search_result.score x) <= (Search_result.score y)
end)
================================================
FILE: lib/sqlite3_utils.ml
================================================
include Sqlite3
let db_pool =
Eio.Pool.create
(* This is not ideal since validate is not called until next use of the
pool, so an idle DB connection could be held for a lot longer than
described here. But this seems to be the best we can do.
*)
~validate:(fun (last_used, _db) ->
Unix.time () -. !last_used <= 30.0
)
~dispose:(fun (_last_used, db) ->
(* Best-effort closing. *)
try
let try_count = ref 0 in
while !try_count < 10 && not (db_close db) do
Unix.sleepf 0.01;
incr try_count;
done
with
| _ -> ()
)
Task_pool.size
(fun () ->
(ref (Unix.time ()),
db_open
~mutex:`FULL
(CCOption.get_exn_or "Docfd_lib.Params.db_path uninitialized" !Params.db_path)
)
)
let with_db : type a. ?db:db -> (db -> a) -> a =
fun ?db f ->
match db with
| None -> (
Eio.Pool.use db_pool ~never_block:true (fun (last_used, db) ->
last_used := Unix.time ();
f db
)
)
| Some db -> (
f db
)
let retry_if_busy (f : unit -> Sqlite3.Rc.t) =
let rec aux tries_left =
let r = f () in
if tries_left > 0 then (
match r with
| BUSY -> (
Unix.sleepf 0.1;
aux (tries_left - 1)
)
| _ -> r
) else (
r
)
in
aux 50
let exec db s =
retry_if_busy (fun () -> Sqlite3.exec db s)
|> Sqlite3.Rc.check
let prepare db s =
Sqlite3.prepare db s
let bind_names stmt l =
retry_if_busy (fun () -> Sqlite3.bind_names stmt l)
|> Sqlite3.Rc.check
let reset stmt =
retry_if_busy (fun () -> Sqlite3.reset stmt)
|> Sqlite3.Rc.check
let step stmt =
match retry_if_busy (fun () -> Sqlite3.step stmt) with
| OK | DONE | ROW -> ()
| x -> Sqlite3.Rc.check x
let finalize stmt =
retry_if_busy (fun () -> Sqlite3.finalize stmt)
|> Sqlite3.Rc.check
let with_stmt : type a. ?db:db -> string -> ?names:((string * Sqlite3.Data.t) list) -> (Sqlite3.stmt -> a) -> a =
fun ?db s ?names f ->
with_db ?db (fun db ->
let stmt = prepare db s in
Option.iter
(fun names -> bind_names stmt names)
names;
let res = f stmt in
finalize stmt;
res
)
let step_stmt : type a. ?db:db -> string -> ?names:((string * Data.t) list) -> (stmt -> a) -> a =
fun ?db s ?names f ->
with_stmt ?db s ?names
(fun stmt ->
step stmt;
f stmt
)
let iter_stmt ?db s ?names (f : Data.t array -> unit) =
with_stmt ?db s ?names
(fun stmt ->
Rc.check (Sqlite3.iter stmt ~f)
)
let fold_stmt : type a. ?db:db -> string -> ?names:((string * Data.t) list) -> (a -> Sqlite3.Data.t array -> a) -> a -> a =
fun ?db s ?names f init ->
with_stmt ?db s ?names
(fun stmt ->
let rc, res = Sqlite3.fold stmt ~f ~init in
Sqlite3.Rc.check rc;
res
)
================================================
FILE: lib/stop_signal.ml
================================================
type t = {
mutable stop : bool;
cond : Eio.Condition.t;
mutex : Eio.Mutex.t;
}
let make () =
{
stop = false;
cond = Eio.Condition.create ();
mutex = Eio.Mutex.create ();
}
let await (t : t) =
Eio.Mutex.use_ro t.mutex (fun () ->
while not t.stop do
Eio.Condition.await t.cond t.mutex
done
)
let broadcast (t : t) =
Eio.Mutex.use_rw ~protect:false t.mutex
(fun () -> t.stop <- true);
Eio.Condition.broadcast t.cond
================================================
FILE: lib/stop_signal.mli
================================================
type t
val make : unit -> t
val await : t -> unit
val broadcast : t -> unit
================================================
FILE: lib/string_map.ml
================================================
include CCMap.Make (String)
================================================
FILE: lib/string_set.ml
================================================
include CCSet.Make (String)
================================================
FILE: lib/task_pool.ml
================================================
type t = Eio.Executor_pool.t
let size = max 1 (Domain.recommended_domain_count () - 1)
let make ~sw mgr =
Eio.Executor_pool.create ~sw ~domain_count:size mgr
let run (t : t) (f : unit -> 'a) : 'a =
Eio.Executor_pool.submit_exn t ~weight:1.0 f
let map_list : 'a 'b . t -> ('a -> 'b) -> 'a list -> 'b list =
fun t f l ->
Eio.Fiber.List.map ~max_fibers:size
(fun x ->
Eio.Fiber.yield ();
run t (fun () -> f x))
l
let filter_list : 'a 'b . t -> ('a -> bool) -> 'a list -> 'a list =
fun t f l ->
Eio.Fiber.List.filter ~max_fibers:size
(fun x ->
Eio.Fiber.yield ();
run t (fun () -> f x))
l
let filter_map_list : 'a 'b . t -> ('a -> 'b option) -> 'a list -> 'b list =
fun t f l ->
Eio.Fiber.List.filter_map ~max_fibers:size
(fun x ->
Eio.Fiber.yield ();
run t (fun () -> f x))
l
================================================
FILE: lib/task_pool.mli
================================================
type t
val size : int
val make : sw:Eio.Switch.t -> _ Eio.Domain_manager.t -> t
val run : t -> (unit -> 'a) -> 'a
val map_list : t -> ('a -> 'b) -> 'a list -> 'b list
val filter_list : t -> ('a -> bool) -> 'a list -> 'a list
val filter_map_list : t -> ('a -> 'b option) -> 'a list -> 'b list
================================================
FILE: lib/tokenization.ml
================================================
let chunk_tokens (s : (int * string) Seq.t) : (int * string) Seq.t =
let rec aux offset s =
match s () with
| Seq.Nil -> Seq.empty
| Seq.Cons ((pos, word), rest) -> (
let word_len = String.length word in
if word_len <= Params.max_token_size then (
fun () -> Seq.Cons ((pos + offset, word), aux offset rest)
) else (
let up_to_limit =
String.sub word 0 Params.max_token_size
in
let rest_of_token =
String.sub word Params.max_token_size (word_len - Params.max_token_size)
in
fun () ->
Seq.Cons
((pos + offset, up_to_limit),
(aux (offset + 1) (Seq.cons (pos, rest_of_token) rest)))
)
)
in
aux 0 s
type token =
| Space of string
| Text of string
let split_utf8_seg (s : string) : string list =
let open Angstrom in
let open Parser_components in
let token_p =
choice [
take_while1 is_alphanum;
utf_8_char;
]
in
let tokens_p = many token_p in
match Angstrom.(parse_string ~consume:Consume.All) tokens_p s with
| Ok l -> l
| Error _ -> []
let tokenize_with_pos ~drop_spaces (s : string) : (int * string) Seq.t =
let segmenter = Uuseg.create `Word in
let s = Misc_utils.sanitize_string s in
let s_len = String.length s in
let acc : token Dynarray.t = Dynarray.create () in
let buf : Uchar.t Dynarray.t = Dynarray.create () in
let sbuf = Buffer.create 256 in
let flush_to_acc () =
if Dynarray.length buf > 0 then (
Dynarray.iter (Buffer.add_utf_8_uchar sbuf) buf;
if Uucp.White.is_white_space (Dynarray.get buf 0) then (
Dynarray.add_last acc (Space (Buffer.contents sbuf))
) else (
Buffer.contents sbuf
|> split_utf8_seg
|> List.iter (fun s ->
Dynarray.add_last acc (Text s)
)
);
Dynarray.clear buf;
Buffer.clear sbuf
)
in
let rec add v =
match Uuseg.add segmenter v with
| `Uchar uc -> (
Dynarray.add_last buf uc;
add `Await
)
| `Boundary -> (
flush_to_acc ();
add `Await
)
| `Await | `End -> ()
in
let rec aux pos =
if pos >= s_len then (
add `End;
flush_to_acc ()
) else (
let decode = String.get_utf_8_uchar s pos in
if Uchar.utf_decode_is_valid decode then (
let uchar = Uchar.utf_decode_uchar decode in
add (`Uchar uchar);
aux (pos + Uchar.utf_decode_length decode)
) else (
aux (pos + 1)
)
)
in
aux 0;
Dynarray.to_seq acc
|> Seq.mapi (fun i x -> (i, x))
|> Seq.filter_map (fun ((i, token) : int * token) ->
match token with
| Text s -> Some (i, s)
| Space s ->
if drop_spaces then
None
else
Some (i, s)
)
|> chunk_tokens
let tokenize ~drop_spaces s =
tokenize_with_pos ~drop_spaces s
|> Seq.map snd
================================================
FILE: lib/word_db.ml
================================================
type t = {
lock : Eio.Mutex.t;
mutable size : int;
mutable size_written_to_db : int;
mutable word_of_id : string Int_map.t;
id_of_word : (string, int) Hashtbl.t;
}
let t : t =
{
lock = Eio.Mutex.create ();
size = 0;
size_written_to_db = 0;
word_of_id = Int_map.empty;
id_of_word = Hashtbl.create 100_000;
}
let lock : type a. (unit -> a) -> a =
fun f ->
Eio.Mutex.use_rw ~protect:true t.lock f
let filter pool (f : string -> bool) : (int * string) Dynarray.t =
let word_of_id =
lock (fun () ->
t.word_of_id
)
in
let max_end_exc_seen = ref 0 in
let chunk_size = !Params.index_chunk_size * 10 in
let chunk_start_end_exc_ranges =
OSeq.(0 -- (t.size - 1) / chunk_size)
|> Seq.map (fun chunk_index ->
let start = chunk_index * chunk_size in
let end_exc =
min
((chunk_index + 1) * chunk_size)
t.size
in
max_end_exc_seen := max !max_end_exc_seen end_exc;
(start, end_exc)
)
|> List.of_seq
in
assert (!max_end_exc_seen = t.size);
let batches =
chunk_start_end_exc_ranges
|> Task_pool.map_list pool (fun (start, end_exc) ->
let acc = Dynarray.create () in
for i=start to end_exc-1 do
let word = Int_map.find i word_of_id in
if f word then (
Dynarray.add_last acc (i, word)
)
done;
acc
)
in
let acc = Dynarray.create () in
List.iter (fun batch ->
Dynarray.append acc batch
) batches;
acc
let add (word : string) : int =
lock (fun () ->
match Hashtbl.find_opt t.id_of_word word with
| Some id -> id
| None -> (
let id = t.size in
t.size <- t.size + 1;
t.word_of_id <- Int_map.add id word t.word_of_id;
Hashtbl.replace t.id_of_word word id;
id
)
)
let word_of_id i : string =
lock (fun () ->
Int_map.find i t.word_of_id
)
let id_of_word s : int option =
lock (fun () ->
Hashtbl.find_opt t.id_of_word s
)
let read_from_db () : unit =
let open Sqlite3_utils in
lock (fun () ->
with_db (fun db ->
t.word_of_id <- Int_map.empty;
Hashtbl.clear t.id_of_word;
iter_stmt ~db
{|
SELECT id, word
FROM word
ORDER by id
|}
~names:[]
(fun data ->
let id = Data.to_int_exn data.(0) in
let word = Data.to_string_exn data.(1) in
t.word_of_id <- Int_map.add id word t.word_of_id;
Hashtbl.replace t.id_of_word word id;
)
);
t.size <- Int_map.cardinal t.word_of_id;
t.size_written_to_db <- t.size;
)
let write_to_db db ~already_in_transaction : unit =
let open Sqlite3_utils in
lock (fun () ->
if not already_in_transaction then (
step_stmt ~db "BEGIN IMMEDIATE" ignore;
);
let word_table_size =
step_stmt ~db
{|
SELECT COUNT(1) FROM word
|}
(fun stmt ->
Int64.to_int (column_int64 stmt 0)
)
in
if word_table_size <> t.size_written_to_db then (
Misc_utils.exit_with_error_msg
"unexpected change in word table, likely due to indexing from another Docfd instance";
);
with_stmt ~db
{|
INSERT INTO word
(id, word)
VALUES
(@id, @word)
ON CONFLICT(id) DO NOTHING
|}
(fun stmt ->
for id = t.size_written_to_db to t.size-1 do
let word = Int_map.find id t.word_of_id in
bind_names
stmt
[ ("@id", INT (Int64.of_int id))
; ("@word", TEXT word)
];
step stmt;
reset stmt;
done
);
if not already_in_transaction then (
step_stmt ~db "COMMIT" ignore;
);
t.size_written_to_db <- t.size;
)
================================================
FILE: lib/word_db.mli
================================================
type t
val add : string -> int
val filter : Task_pool.t -> (string -> bool) -> (int * string) Dynarray.t
val word_of_id : int -> string
val id_of_word : string -> int option
val read_from_db : unit -> unit
val write_to_db : Sqlite3.db -> already_in_transaction:bool -> unit
================================================
FILE: line-wrapping-tests.t/dune
================================================
(cram
(deps ../bin/docfd.exe))
================================================
FILE: line-wrapping-tests.t/long-words.txt
================================================
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
abcdefghijklmnopqrstuvwxyz
01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz
0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
abcdefghijklmnopqrstuvwxyz
01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz
================================================
FILE: line-wrapping-tests.t/run.t
================================================
Word breaking:
$ docfd --cache-dir .cache --search-result-print-snippet-min-size 0 long-words.txt --sample "01 ab" --search-result-print-text-width 80
$TESTCASE_ROOT/long-words.txt
1: 01234567890123456789012345678901234567890123456789012345678901234567890123456
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
78901234567890123456789
^^^^^^^^^^^^^^^^^^^^^^^
2:
3: abcdefghijklmnopqrstuvwxyz
^^^^^^^^^^^^^^^^^^^^^^^^^^
16: 0123456789012345678901234567890123456789012345678901234567890123456789012345
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
678901234567890123456789
^^^^^^^^^^^^^^^^^^^^^^^^
17:
18: abcdefghijklmnopqrstuvwxyz
^^^^^^^^^^^^^^^^^^^^^^^^^^
1: 01234567890123456789012345678901234567890123456789012345678901234567890123456
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
78901234567890123456789
^^^^^^^^^^^^^^^^^^^^^^^
2:
3: abcdefghijklmnopqrstuvwxyz
4: 01234567890123456789012345678901234567890123456789012345678901234567890123456
78901234567890123456789012345678901234567890123456789012345678901234567890123
4567890123456789012345678901234567890123456789
5: abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16: 0123456789012345678901234567890123456789012345678901234567890123456789012345
678901234567890123456789
17:
18: abcdefghijklmnopqrstuvwxyz
^^^^^^^^^^^^^^^^^^^^^^^^^^
3: abcdefghijklmnopqrstuvwxyz
^^^^^^^^^^^^^^^^^^^^^^^^^^
4: 01234567890123456789012345678901234567890123456789012345678901234567890123456
78901234567890123456789012345678901234567890123456789012345678901234567890123
4567890123456789012345678901234567890123456789
5: abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16: 0123456789012345678901234567890123456789012345678901234567890123456789012345
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
678901234567890123456789
^^^^^^^^^^^^^^^^^^^^^^^^
1: 01234567890123456789012345678901234567890123456789012345678901234567890123456
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
78901234567890123456789
^^^^^^^^^^^^^^^^^^^^^^^
2:
3: abcdefghijklmnopqrstuvwxyz
4: 01234567890123456789012345678901234567890123456789012345678901234567890123456
78901234567890123456789012345678901234567890123456789012345678901234567890123
4567890123456789012345678901234567890123456789
5: abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
$ docfd --cache-dir .cache --search-result-print-snippet-min-size 0 long-words.txt --sample "01 ab" --search-result-print-text-width 20
$TESTCASE_ROOT/long-words.txt
1: 01234567890123456
^^^^^^^^^^^^^^^^^
78901234567890123
^^^^^^^^^^^^^^^^^
45678901234567890
^^^^^^^^^^^^^^^^^
12345678901234567
^^^^^^^^^^^^^^^^^
89012345678901234
^^^^^^^^^^^^^^^^^
567890123456789
^^^^^^^^^^^^^^^
2:
3: abcdefghijklmnopq
^^^^^^^^^^^^^^^^^
rstuvwxyz
^^^^^^^^^
16: 0123456789012345
^^^^^^^^^^^^^^^^
6789012345678901
^^^^^^^^^^^^^^^^
2345678901234567
^^^^^^^^^^^^^^^^
8901234567890123
^^^^^^^^^^^^^^^^
4567890123456789
^^^^^^^^^^^^^^^^
0123456789012345
^^^^^^^^^^^^^^^^
6789
^^^^
17:
18: abcdefghijklmnop
^^^^^^^^^^^^^^^^
qrstuvwxyz
^^^^^^^^^^
1: 01234567890123456
^^^^^^^^^^^^^^^^^
78901234567890123
^^^^^^^^^^^^^^^^^
45678901234567890
^^^^^^^^^^^^^^^^^
12345678901234567
^^^^^^^^^^^^^^^^^
89012345678901234
^^^^^^^^^^^^^^^^^
567890123456789
^^^^^^^^^^^^^^^
2:
3: abcdefghijklmnopq
rstuvwxyz
4: 01234567890123456
78901234567890123
45678901234567890
12345678901234567
89012345678901234
56789012345678901
23456789012345678
90123456789012345
67890123456789012
34567890123456789
01234567890123456
7890123456789
5: abcdefghijklmnopq
rstuvwxyzabcdefgh
ijklmnopqrstuvwxy
z
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16: 0123456789012345
6789012345678901
2345678901234567
8901234567890123
4567890123456789
0123456789012345
6789
17:
18: abcdefghijklmnop
^^^^^^^^^^^^^^^^
qrstuvwxyz
^^^^^^^^^^
3: abcdefghijklmnopq
^^^^^^^^^^^^^^^^^
rstuvwxyz
^^^^^^^^^
4: 01234567890123456
78901234567890123
45678901234567890
12345678901234567
89012345678901234
56789012345678901
23456789012345678
90123456789012345
67890123456789012
34567890123456789
01234567890123456
7890123456789
5: abcdefghijklmnopq
rstuvwxyzabcdefgh
ijklmnopqrstuvwxy
z
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16: 0123456789012345
^^^^^^^^^^^^^^^^
6789012345678901
^^^^^^^^^^^^^^^^
2345678901234567
^^^^^^^^^^^^^^^^
8901234567890123
^^^^^^^^^^^^^^^^
4567890123456789
^^^^^^^^^^^^^^^^
0123456789012345
^^^^^^^^^^^^^^^^
6789
^^^^
1: 01234567890123456
^^^^^^^^^^^^^^^^^
78901234567890123
^^^^^^^^^^^^^^^^^
45678901234567890
^^^^^^^^^^^^^^^^^
12345678901234567
^^^^^^^^^^^^^^^^^
89012345678901234
^^^^^^^^^^^^^^^^^
567890123456789
^^^^^^^^^^^^^^^
2:
3: abcdefghijklmnopq
rstuvwxyz
4: 01234567890123456
78901234567890123
45678901234567890
12345678901234567
89012345678901234
56789012345678901
23456789012345678
90123456789012345
67890123456789012
34567890123456789
01234567890123456
7890123456789
5: abcdefghijklmnopq
^^^^^^^^^^^^^^^^^
rstuvwxyzabcdefgh
^^^^^^^^^^^^^^^^^
ijklmnopqrstuvwxy
^^^^^^^^^^^^^^^^^
z
^
$ docfd --cache-dir .cache --search-result-print-snippet-min-size 0 words.txt --sample "01 ab" --search-result-print-text-width 5
$TESTCASE_ROOT/words.txt
1: 01
^^
23
^^
45
^^
2:
3: ab
^^
cd
^^
ef
^^
g
^
15: 0
^
1
^
2
^
3
^
4
^
5
^
16:
17: a
^
b
^
c
^
d
^
e
^
f
^
g
^
1: 01
^^
23
^^
45
^^
2:
3: ab
cd
ef
g
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15: 0
1
2
3
4
5
16:
17: a
^
b
^
c
^
d
^
e
^
f
^
g
^
3: ab
^^
cd
^^
ef
^^
g
^
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15: 0
^
1
^
2
^
3
^
4
^
5
^
$ docfd --cache-dir .cache --search-result-print-snippet-min-size 0 words.txt --sample "01 ab" --search-result-print-text-width 1
$TESTCASE_ROOT/words.txt
1: 0
^
1
^
2
^
3
^
4
^
5
^
2:
3: a
^
b
^
c
^
d
^
e
^
f
^
g
^
15: 0
^
1
^
2
^
3
^
4
^
5
^
16:
17: a
^
b
^
c
^
d
^
e
^
f
^
g
^
1: 0
^
1
^
2
^
3
^
4
^
5
^
2:
3: a
b
c
d
e
f
g
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15: 0
1
2
3
4
5
16:
17: a
^
b
^
c
^
d
^
e
^
f
^
g
^
3: a
^
b
^
c
^
d
^
e
^
f
^
g
^
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15: 0
^
1
^
2
^
3
^
4
^
5
^
Line wrapping and word breaking:
$ docfd --cache-dir .cache --search-result-print-snippet-min-size 0 sentences.txt --sample "lorem" --search-result-print-text-width 80
$TESTCASE_ROOT/sentences.txt
1: Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod
^^^^^
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
$ docfd --cache-dir .cache --search-result-print-snippet-min-size 0 sentences.txt --sample "lorem" --search-result-print-text-width 20
$TESTCASE_ROOT/sentences.txt
1: Lorem ipsum
^^^^^
dolor sit amet,
consectetur
adipiscing elit,
sed do eiusmod
tempor incididunt
ut labore et
dolore magna
aliqua. Ut enim
ad minim veniam,
quis nostrud
exercitation
ullamco laboris
nisi ut aliquip
ex ea commodo
consequat. Duis
aute irure dolor
in reprehenderit
in voluptate
velit esse cillum
dolore eu fugiat
nulla pariatur.
Excepteur sint
occaecat
cupidatat non
proident, sunt in
culpa qui
officia deserunt
mollit anim id
est laborum.
$ docfd --cache-dir .cache --search-result-print-snippet-min-size 0 sentences.txt --sample "lorem" --search-result-print-text-width 10
$TESTCASE_ROOT/sentences.txt
1:
Lorem
^^^^^
ipsum
dolor
sit
amet,
consect
etur
adipisc
ing
elit,
sed do
eiusmod
tempor
incidid
unt
ut
labore
et
dolore
magna
aliqua.
Ut
enim ad
minim
veniam,
quis
nostrud
exercit
ation
ullamco
laboris
nisi
ut
aliquip
ex ea
commodo
consequ
at
. Duis
aute
irure
dolor
in
reprehe
nderit
in
volupta
te
velit
esse
cillum
dolore
eu
fugiat
nulla
pariatu
r
.
Excepte
ur
sint
occaeca
t
cupidat
at
non
proiden
t
, sunt
in
culpa
qui
officia
deserun
t
mollit
anim
id est
laborum
.
$ docfd --cache-dir .cache --search-result-print-snippet-min-size 0 sentences.txt --sample "laborum 0" --search-result-print-text-width 80
$TESTCASE_ROOT/sentences.txt
1: Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
^^^^^^^
2: 0 12 abcd efghi jkl mnopqrst uvwx yz 0123456 789012345
^
1: Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
^^^^^^^
2: 0 12 abcd efghi jkl mnopqrst uvwx yz 0123456 789012345
^^^^^^^
1: Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
^^^^^^^
2: 0 12 abcd efghi jkl mnopqrst uvwx yz 0123456 789012345
^^^^^^^^^
$ docfd --cache-dir .cache --search-result-print-snippet-min-size 0 sentences.txt --sample "laborum 0" --search-result-print-text-width 20
$TESTCASE_ROOT/sentences.txt
1: Lorem ipsum
dolor sit amet,
consectetur
adipiscing elit,
sed do eiusmod
tempor incididunt
ut labore et
dolore magna
aliqua. Ut enim
ad minim veniam,
quis nostrud
exercitation
ullamco laboris
nisi ut aliquip
ex ea commodo
consequat. Duis
aute irure dolor
in reprehenderit
in voluptate
velit esse cillum
dolore eu fugiat
nulla pariatur.
Excepteur sint
occaecat
cupidatat non
proident, sunt in
culpa qui
officia deserunt
mollit anim id
est laborum.
^^^^^^^
2: 0 12 abcd efghi
^
jkl mnopqrst uvwx
yz 0123456
789012345
1: Lorem ipsum
dolor sit amet,
consectetur
adipiscing elit,
sed do eiusmod
tempor incididunt
ut labore et
dolore magna
aliqua. Ut enim
ad minim veniam,
quis nostrud
exercitation
ullamco laboris
nisi ut aliquip
ex ea commodo
consequat. Duis
aute irure dolor
in reprehenderit
in voluptate
velit esse cillum
dolore eu fugiat
nulla pariatur.
Excepteur sint
occaecat
cupidatat non
proident, sunt in
culpa qui
officia deserunt
mollit anim id
est laborum.
^^^^^^^
2: 0 12 abcd efghi
jkl mnopqrst uvwx
yz 0123456
^^^^^^^
789012345
1: Lorem ipsum
dolor sit amet,
consectetur
adipiscing elit,
sed do eiusmod
tempor incididunt
ut labore et
dolore magna
aliqua. Ut enim
ad minim veniam,
quis nostrud
exercitation
ullamco laboris
nisi ut aliquip
ex ea commodo
consequat. Duis
aute irure dolor
in reprehenderit
in voluptate
velit esse cillum
dolore eu fugiat
nulla pariatur.
Excepteur sint
occaecat
cupidatat non
proident, sunt in
culpa qui
officia deserunt
mollit anim id
est laborum.
^^^^^^^
2: 0 12 abcd efghi
jkl mnopqrst uvwx
yz 0123456
789012345
^^^^^^^^^
$ docfd --cache-dir .cache --search-result-print-snippet-min-size 0 sentences.txt --sample "laborum 0" --search-result-print-text-width 10
$TESTCASE_ROOT/sentences.txt
1:
Lorem
ipsum
dolor
sit
amet,
consect
etur
adipisc
ing
elit,
sed do
eiusmod
tempor
incidid
unt
ut
labore
et
dolore
magna
aliqua.
Ut
enim ad
minim
veniam,
quis
nostrud
exercit
ation
ullamco
laboris
nisi
ut
aliquip
ex ea
commodo
consequ
at
. Duis
aute
irure
dolor
in
reprehe
nderit
in
volupta
te
velit
esse
cillum
dolore
eu
fugiat
nulla
pariatu
r
.
Excepte
ur
sint
occaeca
t
cupidat
at
non
proiden
t
, sunt
in
culpa
qui
officia
deserun
t
mollit
anim
id est
laborum
^^^^^^^
.
2: 0 12
^
abcd
efghi
jkl
mnopqrs
t
uvwx
yz
0123456
7890123
45
1:
Lorem
ipsum
dolor
sit
amet,
consect
etur
adipisc
ing
elit,
sed do
eiusmod
tempor
incidid
unt
ut
labore
et
dolore
magna
aliqua.
Ut
enim ad
minim
veniam,
quis
nostrud
exercit
ation
ullamco
laboris
nisi
ut
aliquip
ex ea
commodo
consequ
at
. Duis
aute
irure
dolor
in
reprehe
nderit
in
volupta
te
velit
esse
cillum
dolore
eu
fugiat
nulla
pariatu
r
.
Excepte
ur
sint
occaeca
t
cupidat
at
non
proiden
t
, sunt
in
culpa
qui
officia
deserun
t
mollit
anim
id est
laborum
^^^^^^^
.
2: 0 12
abcd
efghi
jkl
mnopqrs
t
uvwx
yz
0123456
^^^^^^^
7890123
45
1:
Lorem
ipsum
dolor
sit
amet,
consect
etur
adipisc
ing
elit,
sed do
eiusmod
tempor
incidid
unt
ut
labore
et
dolore
magna
aliqua.
Ut
enim ad
minim
veniam,
quis
nostrud
exercit
ation
ullamco
laboris
nisi
ut
aliquip
ex ea
commodo
consequ
at
. Duis
aute
irure
dolor
in
reprehe
nderit
in
volupta
te
velit
esse
cillum
dolore
eu
fugiat
nulla
pariatu
r
.
Excepte
ur
sint
occaeca
t
cupidat
at
non
proiden
t
, sunt
in
culpa
qui
officia
deserun
t
mollit
anim
id est
laborum
^^^^^^^
.
2: 0 12
abcd
efghi
jkl
mnopqrs
t
uvwx
yz
0123456
7890123
^^^^^^^
45
^^
================================================
FILE: line-wrapping-tests.t/sentences.txt
================================================
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
0 12 abcd efghi jkl mnopqrst uvwx yz 0123456 789012345
================================================
FILE: line-wrapping-tests.t/words.txt
================================================
012345
abcdefg
012345
abcdefg
================================================
FILE: match-type-tests.t/dune
================================================
(cram
(deps ../bin/docfd.exe))
================================================
FILE: match-type-tests.t/run.t
================================================
Exact match:
$ docfd --cache-dir .cache test.txt --sample "'abc"
[1]
$ docfd --cache-dir .cache test.txt --sample "'abcd"
$TESTCASE_ROOT/test.txt
1: abcd
^^^^
2: abcdef
3: ABCD
1: abcd
2: abcdef
3: ABCD
^^^^
4: ABCDEF
5: ABcd
3: ABCD
4: ABCDEF
5: ABcd
^^^^
6: ABcdEF
7:
6: ABcdEF
7:
8: 'abcd
^^^^
9: 'abcd'
10: ^efgh
7:
8: 'abcd
9: 'abcd'
^^^^
10: ^efgh
11: ^^efgh
$ docfd --cache-dir .cache test.txt --sample "\\'abcd"
$TESTCASE_ROOT/test.txt
6: ABcdEF
7:
8: 'abcd
^^^^^
9: 'abcd'
10: ^efgh
7:
8: 'abcd
9: 'abcd'
^^^^^
10: ^efgh
11: ^^efgh
6: ABcdEF
7:
8: 'abcd
^^^^
9: 'abcd'
^
10: ^efgh
11: ^^efgh
7:
8: 'abcd
9: 'abcd'
^^^^^
10: ^efgh
11: ^^efgh
4: ABCDEF
5: ABcd
6: ABcdEF
^^^^^^
7:
8: 'abcd
^
9: 'abcd'
10: ^efgh
$ docfd --cache-dir .cache test.txt --sample "'abcdef"
$TESTCASE_ROOT/test.txt
1: abcd
2: abcdef
^^^^^^
3: ABCD
4: ABCDEF
2: abcdef
3: ABCD
4: ABCDEF
^^^^^^
5: ABcd
6: ABcdEF
4: ABCDEF
5: ABcd
6: ABcdEF
^^^^^^
7:
8: 'abcd
$ docfd --cache-dir .cache test.txt --sample "''abcd"
$TESTCASE_ROOT/test.txt
6: ABcdEF
7:
8: 'abcd
^^^^^
9: 'abcd'
10: ^efgh
7:
8: 'abcd
9: 'abcd'
^^^^^
10: ^efgh
11: ^^efgh
Exact match smart case sensitivity:
$ docfd --cache-dir .cache test.txt --sample "'ABCD"
$TESTCASE_ROOT/test.txt
1: abcd
2: abcdef
3: ABCD
^^^^
4: ABCDEF
5: ABcd
$ docfd --cache-dir .cache test.txt --sample "'ABcd"
$TESTCASE_ROOT/test.txt
3: ABCD
4: ABCDEF
5: ABcd
^^^^
6: ABcdEF
7:
Prefix match:
$ docfd --cache-dir .cache test.txt --sample "^bcd"
[1]
$ docfd --cache-dir .cache test.txt --sample "^abcd"
$TESTCASE_ROOT/test.txt
1: abcd
^^^^
2: abcdef
3: ABCD
1: abcd
2: abcdef
3: ABCD
^^^^
4: ABCDEF
5: ABcd
3: ABCD
4: ABCDEF
5: ABcd
^^^^
6: ABcdEF
7:
6: ABcdEF
7:
8: 'abcd
^^^^
9: 'abcd'
10: ^efgh
7:
8: 'abcd
9: 'abcd'
^^^^
10: ^efgh
11: ^^efgh
$ docfd --cache-dir .cache test.txt --sample "^abcdef"
$TESTCASE_ROOT/test.txt
1: abcd
2: abcdef
^^^^^^
3: ABCD
4: ABCDEF
2: abcdef
3: ABCD
4: ABCDEF
^^^^^^
5: ABcd
6: ABcdEF
4: ABCDEF
5: ABcd
6: ABcdEF
^^^^^^
7:
8: 'abcd
$ docfd --cache-dir .cache test.txt --sample "^'abcd"
$TESTCASE_ROOT/test.txt
6: ABcdEF
7:
8: 'abcd
^^^^^
9: 'abcd'
10: ^efgh
7:
8: 'abcd
9: 'abcd'
^^^^^
10: ^efgh
11: ^^efgh
Prefix match smart case sensitivity:
$ docfd --cache-dir .cache test.txt --sample "^ABCD"
$TESTCASE_ROOT/test.txt
1: abcd
2: abcdef
3: ABCD
^^^^
4: ABCDEF
5: ABcd
2: abcdef
3: ABCD
4: ABCDEF
^^^^^^
5: ABcd
6: ABcdEF
$ docfd --cache-dir .cache test.txt --sample "^ABcd"
$TESTCASE_ROOT/test.txt
3: ABCD
4: ABCDEF
5: ABcd
^^^^
6: ABcdEF
7:
4: ABCDEF
5: ABcd
6: ABcdEF
^^^^^^
7:
8: 'abcd
Suffix match:
$ docfd --cache-dir .cache test.txt --sample 'bcd$'
$TESTCASE_ROOT/test.txt
1: abcd
^^^^
2: abcdef
3: ABCD
1: abcd
2: abcdef
3: ABCD
^^^^
4: ABCDEF
5: ABcd
3: ABCD
4: ABCDEF
5: ABcd
^^^^
6: ABcdEF
7:
6: ABcdEF
7:
8: 'abcd
^^^^
9: 'abcd'
10: ^efgh
7:
8: 'abcd
9: 'abcd'
^^^^
10: ^efgh
11: ^^efgh
$ docfd --cache-dir .cache test.txt --sample 'abcd$$'
$TESTCASE_ROOT/test.txt
13: efgh$$
14:
15: abcd$
^^^^^
16: efgh$
17:
$ docfd --cache-dir .cache test.txt --sample 'ef$'
$TESTCASE_ROOT/test.txt
1: abcd
2: abcdef
^^^^^^
3: ABCD
4: ABCDEF
2: abcdef
3: ABCD
4: ABCDEF
^^^^^^
5: ABcd
6: ABcdEF
4: ABCDEF
5: ABcd
6: ABcdEF
^^^^^^
7:
8: 'abcd
Suffix match smart case sensitivity:
$ docfd --cache-dir .cache test.txt --sample 'ABCD$'
$TESTCASE_ROOT/test.txt
1: abcd
2: abcdef
3: ABCD
^^^^
4: ABCDEF
5: ABcd
$ docfd --cache-dir .cache test.txt --sample 'EF$'
$TESTCASE_ROOT/test.txt
2: abcdef
3: ABCD
4: ABCDEF
^^^^^^
5: ABcd
6: ABcdEF
4: ABCDEF
5: ABcd
6: ABcdEF
^^^^^^
7:
8: 'abcd
Fuzzy match explicit spaces:
$ docfd --cache-dir .cache test.txt --sample 'hel~word'
$TESTCASE_ROOT/test.txt
16: efgh$
17:
18: hello world
^^^^^^^^^^^
19: hello world
20:
17:
18: hello world
19: hello world
^^^^^^^^^^^^^
20:
21: Hello world
19: hello world
20:
21: Hello world
^^^^^^^^^^^
22:
23: HELLO WORLD
21: Hello world
22:
23: HELLO WORLD
^^^^^^^^^^^
16: efgh$
17:
18: hello world
^^^^^^
19: hello world
^^^^^
20:
21: Hello world
Exact match explicit spaces:
$ docfd --cache-dir .cache test.txt --sample "'hello~world"
$TESTCASE_ROOT/test.txt
16: efgh$
17:
18: hello world
^^^^^^^^^^^
19: hello world
20:
17:
18: hello world
19: hello world
^^^^^^^^^^^^^
20:
21: Hello world
19: hello world
20:
21: Hello world
^^^^^^^^^^^
22:
23: HELLO WORLD
21: Hello world
22:
23: HELLO WORLD
^^^^^^^^^^^
$ docfd --cache-dir .cache test.txt --sample "'Hello~world"
$TESTCASE_ROOT/test.txt
19: hello world
20:
21: Hello world
^^^^^^^^^^^
22:
23: HELLO WORLD
$ docfd --cache-dir .cache test.txt --sample "'Hello~World"
[1]
Prefix match explicit spaces:
$ docfd --cache-dir .cache test.txt --sample '^hello~wo'
$TESTCASE_ROOT/test.txt
16: efgh$
17:
18: hello world
^^^^^^^^^^^
19: hello world
20:
17:
18: hello world
19: hello world
^^^^^^^^^^^^^
20:
21: Hello world
19: hello world
20:
21: Hello world
^^^^^^^^^^^
22:
23: HELLO WORLD
21: Hello world
22:
23: HELLO WORLD
^^^^^^^^^^^
$ docfd --cache-dir .cache test.txt --sample '^ello~wo'
[1]
Suffix match explicit spaces:
$ docfd --cache-dir .cache test.txt --sample 'lo~world$'
$TESTCASE_ROOT/test.txt
16: efgh$
17:
18: hello world
^^^^^^^^^^^
19: hello world
20:
17:
18: hello world
19: hello world
^^^^^^^^^^^^^
20:
21: Hello world
19: hello world
20:
21: Hello world
^^^^^^^^^^^
22:
23: HELLO WORLD
21: Hello world
22:
23: HELLO WORLD
^^^^^^^^^^^
$ docfd --cache-dir .cache test.txt --sample 'lo~worl$'
[1]
================================================
FILE: match-type-tests.t/test.txt
================================================
abcd
abcdef
ABCD
ABCDEF
ABcd
ABcdEF
'abcd
'abcd'
^efgh
^^efgh
efgh$
efgh$$
abcd$
efgh$
hello world
hello world
Hello world
HELLO WORLD
================================================
FILE: misc-behavior-tests.t/abcd.txt
================================================
abcd
================================================
FILE: misc-behavior-tests.t/dune
================================================
(cram
(deps ../bin/docfd.exe))
================================================
FILE: misc-behavior-tests.t/run.t
================================================
Stdin temp file cleanup:
$ echo "abcd" | docfd --cache-dir .cache --search "a" | tail -n +2
1: abcd
^^^^
$ ls /tmp/docfd-*
ls: cannot access '/tmp/docfd-*': No such file or directory
[2]
Stdin and path both specified:
$ echo "0123" | docfd --cache-dir .cache abcd.txt --search "01" # Should not print anything since stdin should be ignored.
[1]
$ echo "0123" | docfd --cache-dir .cache abcd.txt --search "ab"
$TESTCASE_ROOT/abcd.txt
1: abcd
^^^^
--underline:
$ docfd --cache-dir .cache --underline never abcd.txt --search "ab|cd"
$TESTCASE_ROOT/abcd.txt
1: abcd
1: abcd
$ docfd --cache-dir .cache --underline always abcd.txt --search "ab|cd"
$TESTCASE_ROOT/abcd.txt
1: abcd
^^^^
1: abcd
^^^^
$ docfd --cache-dir .cache --underline auto abcd.txt --search "ab|cd"
$TESTCASE_ROOT/abcd.txt
1: abcd
^^^^
1: abcd
^^^^
--color:
$ docfd --cache-dir .cache --color never abcd.txt --search "ab|cd"
$TESTCASE_ROOT/abcd.txt
1: abcd
^^^^
1: abcd
^^^^
$ # The output below is messed up after passing through Dune, I do not know why.
$ docfd --cache-dir .cache --color always abcd.txt --search "ab|cd"
$TESTCASE_ROOT/abcd.txt1: abcd ^^^^
1: abcd ^^^^
$ docfd --cache-dir .cache --color auto abcd.txt --search "ab|cd"
$TESTCASE_ROOT/abcd.txt
1: abcd
^^^^
1: abcd
^^^^
================================================
FILE: non-interactive-mode-return-code-tests.t/dune
================================================
(cram
(deps ../bin/docfd.exe))
================================================
FILE: non-interactive-mode-return-code-tests.t/run.t
================================================
Setup:
$ echo "0123 abcd" >> test0.txt
$ echo "0123 efgh" >> test1.txt
--sample, text all files:
$ docfd --sample "'0123" .
$TESTCASE_ROOT/test1.txt
1: 0123 efgh
^^^^
$TESTCASE_ROOT/test0.txt
1: 0123 abcd
^^^^
$ docfd --sample "'0123" . -l
$TESTCASE_ROOT/test1.txt
$TESTCASE_ROOT/test0.txt
$ docfd --sample "'0123" . --files-without-match
[1]
--sample, text in only one file:
$ docfd --sample "'abcd" .
$TESTCASE_ROOT/test0.txt
1: 0123 abcd
^^^^
$ docfd --sample "'abcd" . -l
$TESTCASE_ROOT/test0.txt
$ docfd --sample "'abcd" . --files-without-match
$TESTCASE_ROOT/test1.txt
--sample, text not in any file:
$ docfd --sample "'hello" .
[1]
$ docfd --sample "'hello" . -l
[1]
$ docfd --sample "'hello" . --files-without-match
$TESTCASE_ROOT/test0.txt
$TESTCASE_ROOT/test1.txt
--search, text all files:
$ docfd --search "'0123" .
$TESTCASE_ROOT/test1.txt
1: 0123 efgh
^^^^
$TESTCASE_ROOT/test0.txt
1: 0123 abcd
^^^^
$ docfd --search "'0123" . -l
$TESTCASE_ROOT/test1.txt
$TESTCASE_ROOT/test0.txt
$ docfd --search "'0123" . --files-without-match
[1]
--search, text in only one file:
$ docfd --search "'abcd" .
$TESTCASE_ROOT/test0.txt
1: 0123 abcd
^^^^
$ docfd --search "'abcd" . -l
$TESTCASE_ROOT/test0.txt
$ docfd --search "'abcd" . --files-without-match
$TESTCASE_ROOT/test1.txt
--search, text not in any file:
$ docfd --search "'hello" .
[1]
$ docfd --search "'hello" . -l
[1]
$ docfd --search "'hello" . --files-without-match
$TESTCASE_ROOT/test0.txt
$TESTCASE_ROOT/test1.txt
================================================
FILE: open-with-tests.t/dune
================================================
(cram
(deps ../bin/docfd.exe))
================================================
FILE: open-with-tests.t/run.t
================================================
Error case tests:
$ docfd --index-only --open-with pdf:term='okular {path}'
error: failed to parse pdf:term=okular {path}, invalid launch mode
[1]
$ docfd --index-only --open-with pdf='okular {path}'
error: failed to parse pdf=okular {path}, expected char :
[1]
$ docfd --index-only --open-with pdfterminal='okular {path}'
error: failed to parse pdfterminal=okular {path}, expected char :
[1]
$ docfd --index-only --open-with pdf:terminal='okular path}'
Initializing in-memory index
$ docfd --index-only --open-with pdf:terminal='okular {path'
error: failed to parse pdf:terminal=okular {path, expected char }
[1]
$ docfd --index-only --open-with pdf:terminal='okular {abc}'
error: failed to parse pdf:terminal=okular {abc}, invalid placeholder
[1]
PDF parsing test, terminal launch mode:
$ docfd --index-only --open-with pdf:terminal='okular {path}'
Initializing in-memory index
$ docfd --index-only --open-with pdf:terminal='okular {page_num}'
Initializing in-memory index
$ docfd --index-only --open-with pdf:terminal='okular {line_num}'
error: failed to parse pdf:terminal=okular {line_num}, line_num not available
[1]
$ docfd --index-only --open-with pdf:terminal='okular {search_word}'
Initializing in-memory index
PDF parsing test, detached launch mode:
$ docfd --index-only --open-with pdf:detached='okular {path}'
Initializing in-memory index
$ docfd --index-only --open-with pdf:detached='okular {page_num}'
Initializing in-memory index
$ docfd --index-only --open-with pdf:detached='okular {line_num}'
error: failed to parse pdf:detached=okular {line_num}, line_num not available
[1]
$ docfd --index-only --open-with pdf:detached='okular {search_word}'
Initializing in-memory index
Pandoc supported extensions parsing test, terminal launch mode:
$ docfd --index-only --open-with odt:terminal='xdg-open {path}'
Initializing in-memory index
$ docfd --index-only --open-with odt:terminal='xdg-open {page_num}'
error: failed to parse odt:terminal=xdg-open {page_num}, page_num not available
[1]
$ docfd --index-only --open-with odt:terminal='xdg-open {line_num}'
Initializing in-memory index
$ docfd --index-only --open-with odt:terminal='xdg-open {search_word}'
error: failed to parse odt:terminal=xdg-open {search_word}, search_word not available
[1]
Pandoc supported extensions parsing test, detached launch mode:
$ docfd --index-only --open-with odt:detached='xdg-open {path}'
Initializing in-memory index
$ docfd --index-only --open-with odt:detached='xdg-open {page_num}'
error: failed to parse odt:detached=xdg-open {page_num}, page_num not available
[1]
$ docfd --index-only --open-with odt:detached='xdg-open {line_num}'
Initializing in-memory index
$ docfd --index-only --open-with odt:detached='xdg-open {search_word}'
error: failed to parse odt:detached=xdg-open {search_word}, search_word not available
[1]
Text parsing test, terminal launch mode:
$ docfd --index-only --open-with txt:terminal='nano {path}'
Initializing in-memory index
$ docfd --index-only --open-with txt:terminal='nano {page_num}'
error: failed to parse txt:terminal=nano {page_num}, page_num not available
[1]
$ docfd --index-only --open-with txt:terminal='nano {line_num}'
Initializing in-memory index
$ docfd --index-only --open-with txt:terminal='nano {search_word}'
error: failed to parse txt:terminal=nano {search_word}, search_word not available
[1]
Text parsing test, detached launch mode:
$ docfd --index-only --open-with txt:detached='nano {path}'
Initializing in-memory index
$ docfd --index-only --open-with txt:detached='nano {page_num}'
error: failed to parse txt:detached=nano {page_num}, page_num not available
[1]
$ docfd --index-only --open-with txt:detached='nano {line_num}'
Initializing in-memory index
$ docfd --index-only --open-with txt:detached='nano {search_word}'
error: failed to parse txt:detached=nano {search_word}, search_word not available
[1]
Unrecognized extensions parsing test, terminal launch mode:
$ docfd --index-only --open-with abc:terminal='nano {path}'
Initializing in-memory index
$ docfd --index-only --open-with abc:terminal='nano {page_num}'
error: failed to parse abc:terminal=nano {page_num}, page_num not available
[1]
$ docfd --index-only --open-with abc:terminal='nano {line_num}'
Initializing in-memory index
$ docfd --index-only --open-with abc:terminal='nano {search_word}'
error: failed to parse abc:terminal=nano {search_word}, search_word not available
[1]
Unrecognized extensions parsing test, detached launch mode:
$ docfd --index-only --open-with abc:detached='nano {path}'
Initializing in-memory index
$ docfd --index-only --open-with abc:detached='nano {page_num}'
error: failed to parse abc:detached=nano {page_num}, page_num not available
[1]
$ docfd --index-only --open-with abc:detached='nano {line_num}'
Initializing in-memory index
$ docfd --index-only --open-with abc:detached='nano {search_word}'
error: failed to parse abc:detached=nano {search_word}, search_word not available
[1]
================================================
FILE: printing-tests.t/empty.txt
================================================
================================================
FILE: printing-tests.t/run.t
================================================
--sample:
$ docfd --cache-dir .cache --sample abcd .
$TESTCASE_ROOT/test3.txt
1: abcd
^^^^
$TESTCASE_ROOT/test2.txt
1: hello
2:
3: abcd
^^^^
4:
5: abcdefgh
5: abcdefgh
6:
7: hello world abcd
^^^^
3: abcd
4:
5: abcdefgh
^^^^^^^^
6:
7: hello world abcd
--search:
$ docfd --cache-dir .cache --search abcd .
$TESTCASE_ROOT/test3.txt
1: abcd
^^^^
$TESTCASE_ROOT/test2.txt
1: hello
2:
3: abcd
^^^^
4:
5: abcdefgh
5: abcdefgh
6:
7: hello world abcd
^^^^
3: abcd
4:
5: abcdefgh
^^^^^^^^
6:
7: hello world abcd
-l/--files-with-match:
$ docfd --cache-dir .cache --sample abcd . -l
$TESTCASE_ROOT/test3.txt
$TESTCASE_ROOT/test2.txt
$ docfd --cache-dir .cache --sample abcd . --files-with-match
$TESTCASE_ROOT/test3.txt
$TESTCASE_ROOT/test2.txt
--files-without-match:
$ docfd --cache-dir .cache --sample abcd . --files-without-match
$TESTCASE_ROOT/empty.txt
$TESTCASE_ROOT/test0.txt
$TESTCASE_ROOT/test1.txt
$TESTCASE_ROOT/test4.txt
================================================
FILE: printing-tests.t/test0.txt
================================================
hello
================================================
FILE: printing-tests.t/test1.txt
================================================
hello
================================================
FILE: printing-tests.t/test2.txt
================================================
hello
abcd
abcdefgh
hello world abcd
================================================
FILE: printing-tests.t/test3.txt
================================================
abcd
================================================
FILE: printing-tests.t/test4.txt
================================================
efgh
================================================
FILE: profiling/dune
================================================
(rule
(targets string_set.ml)
(deps ../lib/string_set.ml)
(action (copy# %{deps} %{targets}))
)
(rule
(targets misc_utils.ml)
(deps ../lib/misc_utils.ml)
(action (copy# %{deps} %{targets}))
)
(executable
(flags (-w "+a-4-9-29-37-40-42-44-48-50-32-30-70@8"))
(name main)
(libraries docfd_lib
containers
cmdliner
fmt
notty
notty.unix
nottui
lwd
oseq
eio
eio_main
digestif.ocaml
digestif
)
)
================================================
FILE: profiling/main.ml
================================================
open Docfd_lib
let lines = [
"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Integer placerat lacus non cursus";
"tincidunt. Suspendisse viverra leo ac quam tincidunt, quis euismod neque tempus. ";
"Vestibulum rutrum commodo tristique. Curabitur tristique dapibus dolor, vitae porttitor";
"est tristique quis. Sed urna ex, gravida vitae ipsum vel, fermentum viverra risus. ";
"Maecenas et nulla iaculis, bibendum libero vitae, varius est. Fusce eros enim, placerat ";
"quis magna eu, rutrum vulputate ante. Praesent non mi vel ipsum finibus lobortis. ";
"Duis posuere auctor hendrerit. Nunc sodales egestas vestibulum. Quisque suscipit maximus ";
"aliquam. Pellentesque tempor mi condimentum bibendum bibendum. Donec vitae accumsan quam, ";
"nec vulputate lectus. Nulla ligula ipsum, dictum vel augue at, semper vestibulum ex. ";
"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed arcu ligula, cursus nec ";
"lacinia ut, lobortis in libero.";
"";
"Sed ultricies placerat urna, hendrerit ornare elit semper sit amet. Praesent ";
"pretium blandit velit, eu imperdiet lectus tincidunt ut. Suspendisse eget eros ";
"tellus. Nulla tristique vel libero non dapibus. Ut scelerisque sem sit amet ";
"odio mattis vestibulum. Nam vitae commodo mi. Vestibulum consequat orci at tellus porta ";
"placerat.";
"";
"In vel mi vestibulum felis accumsan congue eget efficitur tortor. Integer ";
"quam purus, malesuada vel nisl at, posuere vestibulum augue. Curabitur velit ";
"tortor, vestibulum id placerat eu, convallis at velit. Ut a lectus ";
"quis erat tincidunt aliquet. Etiam ut erat magna. Maecenas quis commodo leo, ";
"eleifend elementum ante. Nullam dapibus erat augue, a bibendum quam volutpat id. ";
"Morbi in ullamcorper arcu. Fusce venenatis lacus purus, vel pellentesque mi ";
"elementum a. Maecenas at mattis massa. Fusce ut elit tortor. Morbi rhoncus ";
"molestie orci eu malesuada. Aliquam gravida rutrum sem, vitae condimentum magna ";
"convallis pulvinar. Duis urna lacus, ultrices a ultrices pharetra, eleifend ";
"in ante. Fusce id elementum dolor. Nullam ornare nisl ac ultrices lobortis.";
"";
"Proin ullamcorper vulputate enim sed facilisis. Praesent vel mi metus. ";
"Fusce sagittis efficitur odio at condimentum. Nullam mollis lacinia consequat. ";
"Integer vel ex sit amet nunc aliquam molestie et eu nibh. Nam leo nunc, laoreet ";
"vitae iaculis sit amet, dapibus sit amet neque. Suspendisse eleifend, leo eget ";
"tempor molestie, massa enim auctor dui, quis vehicula erat urna id felis. Vivamus ";
"pharetra, sem non tempus ornare, risus tortor posuere tortor, eu pellentesque eros est ";
"ac erat. Sed sit amet tellus nisl. Phasellus magna urna, tincidunt in sem ";
"id, aliquam vulputate leo. Sed eleifend justo eu mauris egestas imperdiet. Fusce sagittis";
", turpis ac efficitur pulvinar, purus tellus gravida sem, eget accumsan nisl sapien a ";
"ante. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac ";
"turpis egestas. Nunc eget nibh orci. Cras facilisis facilisis sapien, ";
"a vehicula lorem imperdiet vel. Proin vel nulla nisi.";
"";
"Aenean sit amet risus at lectus pellentesque pellentesque at eu quam. ";
"Duis euismod porttitor ante quis lacinia. Cras sit amet vulputate nunc. ";
"Integer sollicitudin vitae sapien finibus fermentum. Donec eu tellus ";
"suscipit, dignissim turpis non, eleifend massa. Nullam quis ex nisi. ";
"Quisque dignissim quis leo eu finibus. ";
]
let bench ~name ~cycle (f : unit -> 'a) =
let start_time = Sys.time () in
for _=0 to cycle-1 do
f () |> ignore
done;
let end_time = Sys.time () in
Printf.printf "%s: time per cycle: %6fs\n" name
((end_time -. start_time) /. (float_of_int cycle))
let main env =
Eio.Switch.run @@ fun sw ->
assert (Option.is_none (init ~db_path:"test.db" ~document_count_limit:1000));
let pool = Task_pool.make ~sw (Eio.Stdenv.domain_mgr env) in
let _raw = Index.Raw.of_lines pool (List.to_seq lines) in
Params'.max_fuzzy_edit_dist := 3;
let search_exp = Search_exp.parse "vestibul rutru" |> Option.get in
let s = "PellentesquePellentesque" in
for len=1 to 20 do
let limit = 2 in
bench ~name:(Fmt.str "Spelll.of_string, limit: %d, len %2d:" limit len) ~cycle:10 (fun () ->
Spelll.of_string ~limit:2 (String.sub s 0 len))
done;
for len=1 to 20 do
let limit = 1 in
bench ~name:(Fmt.str "Spelll.of_string, limit: %d, len %2d:" limit len) ~cycle:10 (fun () ->
Spelll.of_string ~limit:1 (String.sub s 0 len))
done;
bench ~name:"Index.search" ~cycle:1000 (fun () ->
Index.search pool (Stop_signal.make ()) ~search_scope:None search_exp);
()
let () = Eio_main.run main
================================================
FILE: publish.sh
================================================
#!/usr/bin/env bash
RETAG_CONFIRM_TEXT="retag-docfd"
opam_repo="$HOME/opam-repository"
echo "Checking if $opam_repo exists"
if [ ! -d "$opam_repo" ]; then
echo "$opam_repo does not exist"
exit 1
fi
ver=$(cat CHANGELOG.md \
| grep '## ' \
| head -n 1 \
| sed -n 's/^## \s*\(\S*\)$/\1/p')
echo "Detected version for Docfd:" $ver
git_tag="$ver"
echo "Computed git tag for Docfd:" $git_tag
read -p "Are the version and git tag correct [y/n]? " ans
if [[ $ans != "y" ]]; then
echo "Publishing canceled"
exit 0
fi
echo "Checking if $git_tag exists in repo already"
if [[ $(git tag -l "$git_tag") == "" ]]; then
echo "Tagging commit"
git tag "$git_tag"
else
read -p "Tag already exists, retag [y/n]? " ans
if [[ $ans == "y" ]]; then
read -p "Type \"$RETAG_CONFIRM_TEXT\" to confirm: " ans
if [[ $ans != "$RETAG_CONFIRM_TEXT" ]]; then
echo "Publishing canceled"
exit 0
fi
echo "Removing tag"
git tag -d "$git_tag"
git push --delete origin "$git_tag"
echo "Tagging commit"
git tag "$git_tag"
fi
fi
echo "Pushing all tags"
git push --tags
archive="$git_tag".tar.gz
echo "Archiving as $archive"
rm -f "$archive"
git archive --output=./"$archive" "$git_tag"
echo "Hashing $archive"
hash_cmd=sha256sum
archive_hash=$("$hash_cmd" "$archive" | awk '{ print $1 }')
echo "Hash from $hash_cmd:" $archive_hash
packages=(
"docfd"
)
for package in ${packages[@]}; do
package_dir="$opam_repo"/packages/"$package"/"$package"."$ver"
dest_opam="$package_dir"/opam
echo "Making directory $package_dir"
mkdir -p "$package_dir"
echo "Copying $package.opam over"
cp "$package.opam" "$dest_opam"
echo "Adding url section to $dest_opam"
echo "
url {
src:
\"https://github.com/darrenldl/docfd/releases/download/$git_tag/$archive\"
checksum:
\"sha256=$archive_hash\"
}
" >> "$dest_opam"
done
================================================
FILE: run-container.sh
================================================
#!/usr/bin/env bash
podman run -it \
-v ~/docfd:/home/docfd \
--workdir /home/docfd \
--env VISUAL=nano \
--rm \
localhost/docfd \
/bin/bash --login
================================================
FILE: script-tests.t/dune
================================================
(cram
(deps ../bin/docfd.exe))
================================================
FILE: script-tests.t/run.t
================================================
Setup:
$ echo "abcd" > test0.txt
$ echo "efgh" > test1.txt
$ echo "hijk" > test2.txt
$ echo "0123" > test3.txt
$ echo "search: ^ab" >> 0.docfd-script
$ echo "search: 'xyz" >> 1.docfd-script
$ tree
.
|-- 0.docfd-script
|-- 1.docfd-script
|-- dune -> ../../../../default/script-tests.t/dune
|-- test0.txt
|-- test1.txt
|-- test2.txt
`-- test3.txt
0 directories, 7 files
Basic:
$ docfd -l --script 0.docfd-script .
$TESTCASE_ROOT/test0.txt
$ docfd -l --script 1.docfd-script .
[1]
================================================
FILE: search-scope-narrowing-tests.t/dune
================================================
(cram
(deps ../bin/docfd.exe))
================================================
FILE: search-scope-narrowing-tests.t/run.t
================================================
Setup:
$ echo "abcd" >> test0.txt
$ echo "efgh" >> test0.txt
$ echo "0123" >> test0.txt
$ echo "ijkl" >> test0.txt
$ echo "abcd" >> test1.txt
$ echo "efgh" >> test1.txt
$ echo "0123" >> test1.txt
$ echo "ijkl" >> test1.txt
$ tree
.
|-- dune -> ../../../../default/search-scope-narrowing-tests.t/dune
|-- test0.txt
`-- test1.txt
0 directories, 3 files
Single restriction:
$ # Case 0 for single restriction
$ echo "" > test.docfd-script
$ echo "search: 'abcd" >> test.docfd-script
$ echo "narrow level: 1" >> test.docfd-script
$ echo "search: 'efgh" >> test.docfd-script
$ docfd --tokens-per-search-scope-level 1 --script test.docfd-script test0.txt -l
$TESTCASE_ROOT/test0.txt
$ # Case 1 for single restriction
$ echo "" > test.docfd-script
$ echo "search: 'abcd" >> test.docfd-script
$ echo "narrow level: 1" >> test.docfd-script
$ echo "search: '0123" >> test.docfd-script
$ docfd --tokens-per-search-scope-level 1 --script test.docfd-script test0.txt -l
[1]
$ # Case 2 for single restriction
$ echo "" > test.docfd-script
$ echo "search: 'abcd" >> test.docfd-script
$ echo "narrow level: 1" >> test.docfd-script
$ echo "narrow level: 0" >> test.docfd-script
$ echo "search: '0123" >> test.docfd-script
$ docfd --tokens-per-search-scope-level 1 --script test.docfd-script test0.txt -l
$TESTCASE_ROOT/test0.txt
Chained restriction:
$ # Case 0 for chained restrictions
$ echo "" > test.docfd-script
$ echo "search: 'abcd" >> test.docfd-script
$ echo "narrow level: 1" >> test.docfd-script
$ echo "search: '0123" >> test.docfd-script
$ echo "narrow level: 1" >> test.docfd-script
$ echo "search: 'efgh" >> test.docfd-script
$ docfd --tokens-per-search-scope-level 1 --script test.docfd-script test0.txt -l
[1]
$ # Case 1 for chained restrictions
$ echo "" > test.docfd-script
$ echo "search: 'abcd" >> test.docfd-script
$ echo "narrow level: 2" >> test.docfd-script
$ echo "search: '0123" >> test.docfd-script
$ echo "narrow level: 2" >> test.docfd-script
$ echo "search: 'efgh" >> test.docfd-script
$ docfd --tokens-per-search-scope-level 1 --script test.docfd-script test0.txt -l
$TESTCASE_ROOT/test0.txt
$ # Case 2 for chained restrictions
$ echo "" > test.docfd-script
$ echo "search: 'abcd" >> test.docfd-script
$ echo "narrow level: 2" >> test.docfd-script
$ echo "search: '0123" >> test.docfd-script
$ echo "narrow level: 1" >> test.docfd-script
$ echo "search: 'efgh" >> test.docfd-script
$ docfd --tokens-per-search-scope-level 1 --script test.docfd-script test0.txt -l
$TESTCASE_ROOT/test0.txt
File path filter + restrictions:
$ # Baseline case: "clear filter" after "search" should trigger a search for each file that has not been searched through yet
$ # So both documents should appear
$ echo "" > test.docfd-script
$ echo "search: 'abcd" >> test.docfd-script
$ echo "filter: path-glob:test0.txt" >> test.docfd-script
$ echo "search: 'efgh" >> test.docfd-script
$ echo "clear filter" >> test.docfd-script
$ docfd --tokens-per-search-scope-level 1 --script test.docfd-script -l .
$TESTCASE_ROOT/test1.txt
$TESTCASE_ROOT/test0.txt
$ # Baseline case quoted string using single quote
$ echo "" > test.docfd-script
$ echo "search: 'abcd" >> test.docfd-script
$ echo "filter: path-glob:'test0.txt'" >> test.docfd-script
$ echo "search: 'efgh" >> test.docfd-script
$ echo "clear filter" >> test.docfd-script
$ docfd --tokens-per-search-scope-level 1 --script test.docfd-script -l .
$TESTCASE_ROOT/test1.txt
$TESTCASE_ROOT/test0.txt
$ # Baseline case quoted string using double quote
$ echo "" > test.docfd-script
$ echo "search: 'abcd" >> test.docfd-script
$ echo 'filter: path-glob:"test0.txt"' >> test.docfd-script
$ echo "search: 'efgh" >> test.docfd-script
$ echo "clear filter" >> test.docfd-script
$ docfd --tokens-per-search-scope-level 1 --script test.docfd-script -l .
$TESTCASE_ROOT/test1.txt
$TESTCASE_ROOT/test0.txt
$ # Since there is no "search" after "narrow", both documents should still appear
$ echo "" > test.docfd-script
$ echo "search: 'abcd" >> test.docfd-script
$ echo "filter: path-glob:test0.txt" >> test.docfd-script
$ echo "narrow level: 1" >> test.docfd-script
$ echo "clear filter" >> test.docfd-script
$ docfd --tokens-per-search-scope-level 1 --script test.docfd-script -l .
$TESTCASE_ROOT/test1.txt
$TESTCASE_ROOT/test0.txt
$ # "narrow" + "search" after filtering should prevent test1.txt from appearing, even after we clear the filter
$ echo "" > test.docfd-script
$ echo "search: 'abcd" >> test.docfd-script
$ echo "filter: path-glob:test0.txt" >> test.docfd-script
$ echo "narrow level: 1" >> test.docfd-script
$ echo "search: 'efgh" >> test.docfd-script
$ echo "clear filter" >> test.docfd-script
$ docfd --tokens-per-search-scope-level 1 --script test.docfd-script -l .
$TESTCASE_ROOT/test0.txt
$ # Similar to the above case, but the order of "search" and "clear filter" is swapped
$ # test1.txt still should not appear
$ echo "" > test.docfd-script
$ echo "search: 'abcd" >> test.docfd-script
$ echo "filter: path-glob:test0.txt" >> test.docfd-script
$ echo "narrow level: 1" >> test.docfd-script
$ echo "clear filter" >> test.docfd-script
$ echo "search: 'efgh" >> test.docfd-script
$ docfd --tokens-per-search-scope-level 1 --script test.docfd-script -l .
$TESTCASE_ROOT/test0.txt
$ # Similar to the above case, but we also reset the search scope via "narrow level: 0"
$ # Since resetting the search scope does not refresh the search results, test1.txt should still not appear as there is not another "search"
$ echo "" > test.docfd-script
$ echo "search: 'abcd" >> test.docfd-script
$ echo "filter: path-glob:test0.txt" >> test.docfd-script
$ echo "narrow level: 1" >> test.docfd-script
$ echo "clear filter" >> test.docfd-script
$ echo "search: 'efgh" >> test.docfd-script
$ echo "narrow level: 0" >> test.docfd-script
$ docfd --tokens-per-search-scope-level 1 --script test.docfd-script -l .
$TESTCASE_ROOT/test0.txt
$ # Similar to the above case, but we search again after resetting search scope
$ echo "" > test.docfd-script
$ echo "search: 'abcd" >> test.docfd-script
$ echo "filter: path-glob:test0.txt" >> test.docfd-script
$ echo "narrow level: 1" >> test.docfd-script
$ echo "clear filter" >> test.docfd-script
$ echo "search: 'efgh" >> test.docfd-script
$ echo "narrow level: 0" >> test.docfd-script
$ echo "search: 'efgh" >> test.docfd-script
$ docfd --tokens-per-search-scope-level 1 --script test.docfd-script -l .
$TESTCASE_ROOT/test1.txt
$TESTCASE_ROOT/test0.txt
$ # Simplified version of the above case where we skip the search before "narrow level: 0"
$ echo "" > test.docfd-script
$ echo "search: 'abcd" >> test.docfd-script
$ echo "filter: path-glob:test0.txt" >> test.docfd-script
$ echo "narrow level: 1" >> test.docfd-script
$ echo "clear filter" >> test.docfd-script
$ echo "narrow level: 0" >> test.docfd-script
$ echo "search: 'efgh" >> test.docfd-script
$ docfd --tokens-per-search-scope-level 1 --script test.docfd-script -l .
$TESTCASE_ROOT/test1.txt
$TESTCASE_ROOT/test0.txt
$ # Similar to the above case, but the order of "clear filter" and "narrow level: 0" is swapped
$ # Both documents should still appear
$ echo "" > test.docfd-script
$ echo "search: 'abcd" >> test.docfd-script
$ echo "filter: path-glob:test0.txt" >> test.docfd-script
$ echo "narrow level: 1" >> test.docfd-script
$ echo "narrow level: 0" >> test.docfd-script
$ echo "clear filter" >> test.docfd-script
$ echo "search: 'efgh" >> test.docfd-script
$ docfd --tokens-per-search-scope-level 1 --script test.docfd-script -l .
$TESTCASE_ROOT/test1.txt
$TESTCASE_ROOT/test0.txt
$ # Similar to the above case, but the order of "clear filter" and "search" is swapped
$ # Both documents should still appear
$ echo "" > test.docfd-script
$ echo "search: 'abcd" >> test.docfd-script
$ echo "filter: path-glob:test0.txt" >> test.docfd-script
$ echo "narrow level: 1" >> test.docfd-script
$ echo "narrow level: 0" >> test.docfd-script
$ echo "search: 'efgh" >> test.docfd-script
$ echo "clear filter" >> test.docfd-script
$ docfd --tokens-per-search-scope-level 1 --script test.docfd-script -l .
$TESTCASE_ROOT/test1.txt
$TESTCASE_ROOT/test0.txt
================================================
FILE: tests/dune
================================================
(executable
(flags (-w "+a-4-9-29-37-40-42-44-48-50-70@8" -g))
(name main)
(libraries qcheck-core
qcheck-alcotest
alcotest
docfd_lib
eio
eio_main
)
)
================================================
FILE: tests/main.ml
================================================
open Docfd_lib
let () =
Eio_main.run (fun env ->
Eio.Switch.run (fun sw ->
let _task_pool = Task_pool.make ~sw (Eio.Stdenv.domain_mgr env) in
let alco_suites =
[
("Search_exp_tests.Alco", Search_exp_tests.Alco.suite);
("Utils_tests.Alco", Utils_tests.Alco.suite);
]
in
let qc_suites =
[
]
|> List.map (fun (name, suite) ->
(name, List.map QCheck_alcotest.to_alcotest suite))
in
let suites = alco_suites @ qc_suites in
Alcotest.run "docfd-lib" suites
)
)
================================================
FILE: tests/search_exp_tests.ml
================================================
open Docfd_lib
open Test_utils
module Alco = struct
let test_invalid_exp (s : string) =
Alcotest.(check bool)
"true"
true
(Option.is_none
(Search_exp.parse s))
let test_empty_phrase (s : string) =
let phrase = Search_phrase.parse s in
Alcotest.(check bool)
"case0"
true
(Search_phrase.is_empty phrase);
Alcotest.(check bool)
"case1"
true
(List.is_empty (Search_phrase.enriched_tokens phrase))
let test_empty_exp (s : string) =
let exp = Search_exp.parse s |> Option.get in
Alcotest.(check bool)
"case0"
true
(Search_exp.is_empty exp);
let flattened = Search_exp.flattened exp in
Alcotest.(check bool)
"case1"
true
(List.is_empty flattened
||
List.for_all Search_phrase.is_empty flattened)
let at s : Search_phrase.annotated_token =
Search_phrase.{ data = `String s; group_id = 0 }
let atm m : Search_phrase.annotated_token =
Search_phrase.{ data = `Match_typ_marker m; group_id = 0 }
let ats : Search_phrase.annotated_token =
Search_phrase.{ data = `Explicit_spaces; group_id = 0 }
let et
?(m : Search_phrase.match_typ = `Fuzzy)
string
is_linked_to_prev
is_linked_to_next
: Search_phrase.Enriched_token.t =
let automaton = Spelll.of_string ~limit:0 "" in
Search_phrase.Enriched_token.make
(`String string)
~is_linked_to_prev
~is_linked_to_next
automaton
m
let ets
?(m : Search_phrase.match_typ = `Fuzzy)
is_linked_to_prev
is_linked_to_next
=
let automaton = Spelll.of_string ~limit:0 "" in
Search_phrase.Enriched_token.make
`Explicit_spaces
~is_linked_to_prev
~is_linked_to_next
automaton
m
let test_exp
?(neg = false)
(s : string)
(l : (Search_phrase.annotated_token list * Search_phrase.Enriched_token.t list) list)
=
let neg' = neg in
let phrases =
l
|> List.map fst
|> List.map (fun l ->
List.to_seq l
|> Search_phrase.of_annotated_tokens)
in
let enriched_token_list_list =
List.map snd l
in
Alcotest.(check
(if neg' then (
neg (list search_phrase_testable)
) else (
list search_phrase_testable
)))
(Fmt.str "case0 of %S" s)
(List.sort Search_phrase.compare phrases)
(Search_exp.parse s
|> Option.get
|> Search_exp.flattened
|> List.sort Search_phrase.compare
);
Alcotest.(check (list (list enriched_token_testable)))
(Fmt.str "case1 of %S" s)
enriched_token_list_list
(phrases
|> List.map Search_phrase.enriched_tokens
)
let corpus () =
test_empty_exp "";
test_empty_phrase "";
test_empty_exp " ";
test_empty_phrase " ";
test_empty_exp "\r\n";
test_empty_phrase "\r\n";
test_empty_exp "\t";
test_empty_phrase "\t";
test_empty_exp "\r\n\t";
test_empty_phrase "\r\n\t";
test_empty_exp " \r \n \t ";
test_empty_phrase " \r \n \t ";
test_empty_exp "()";
test_empty_exp " () ";
test_empty_exp "( )";
test_empty_exp " ( ) ";
test_empty_exp " ( ) () ";
test_empty_exp " ( ( ) ) () ";
test_empty_exp " ( () ) (( )) ";
test_empty_exp " ( () ) (( () )) ";
test_invalid_exp " ( ) ( ";
test_invalid_exp " ) ( ";
test_invalid_exp " ( ( ) ";
test_invalid_exp " ( ( ) ";
test_invalid_exp "?";
test_invalid_exp "? ";
test_exp "\\?"
[ ([ at "?" ],
[ et "?" false false ])
];
test_exp "hello_world"
[ ([ at "hello"; at "_"; at "world" ],
[ et "hello" false true; et "_" true true; et "world" true false ])
];
test_exp "(hello)"
[ ([ at "hello" ],
[ et "hello" false false ])
];
test_exp "()hello"
[ ([ at "hello" ],
[ et "hello" false false ])
];
test_exp "hello()"
[ ([ at "hello" ],
[ et "hello" false false ])
];
test_exp "( ) hello"
[ ([ at "hello" ],
[ et "hello" false false ])
];
test_exp "hello ( )"
[ ([ at "hello" ],
[ et "hello" false false ])
];
test_exp "?hello"
[ ([], [])
; ([ at "hello" ],
[ et "hello" false false ])
];
test_exp "(?hello)"
[ ([], [])
; ([ at "hello" ],
[ et "hello" false false ])
];
test_exp "?(hello)"
[ ([], [])
; ([ at "hello" ],
[ et "hello" false false ])
];
test_exp "?hello()"
[ ([], [])
; ([ at "hello" ],
[ et "hello" false false ])
];
test_exp "?hello ()"
[ ([], [])
; ([ at "hello" ],
[ et "hello" false false ])
];
test_exp "? hello"
[ ([], [])
; ([ at "hello" ],
[ et "hello" false false ])
];
test_exp "?hello world"
[ ([ at "world" ],
[ et "world" false false ])
; ([ at "hello"; at " "; at "world" ],
[ et "hello" false false; et "world" false false ])
];
test_exp "? hello world"
[ ([ at "world" ],
[ et "world" false false ])
; ([ at "hello"; at " "; at "world" ],
[ et "hello" false false; et "world" false false ])
];
test_exp "?(hello) world"
[ ([ at "world" ],
[ et "world" false false ] )
; ([ at "hello"; at " "; at "world" ],
[ et "hello" false false; et "world" false false ] )
];
test_exp "? (hello) world"
[ ([ at "world" ],
[ et "world" false false ] )
; ([ at "hello"; at " "; at "world" ],
[ et "hello" false false; et "world" false false ])
];
test_exp "?(hello world) abcd"
[ ([ at "abcd" ],
[ et "abcd" false false ] )
; ([ at "hello"; at " "; at "world"; at " "; at "abcd" ],
[ et "hello" false false; et "world" false false; et "abcd" false false ] )
];
test_exp "ab ?(hello world) cd"
[ ([ at "ab"; at " "; at "cd" ],
[ et "ab" false false; et "cd" false false ])
; ([ at "ab"; at " "; at "hello"; at " "; at "world"; at " "; at "cd" ],
[ et "ab" false false; et "hello" false false; et "world" false false; et "cd" false false ])
];
test_exp "ab ?hello world cd"
[ ([ at "ab"; at " "; at "world"; at " "; at "cd" ],
[ et "ab" false false; et "world" false false; et "cd" false false ])
; ([ at "ab"; at " "; at "hello"; at " "; at "world"; at " "; at "cd" ],
[ et "ab" false false; et "hello" false false; et "world" false false; et "cd" false false ])
];
test_exp "go (left | right)"
[ ([ at "go"; at " "; at "left" ],
[ et "go" false false; et "left" false false ])
; ([ at "go"; at " "; at "right" ],
[ et "go" false false; et "right" false false ])
];
test_exp "go (?up | left | right)"
[ ([ at "go" ],
[ et "go" false false ])
; ([ at "go"; at " "; at "up" ],
[ et "go" false false; et "up" false false ])
; ([ at "go"; at " "; at "left" ],
[ et "go" false false; et "left" false false ])
; ([ at "go"; at " "; at "right" ],
[ et "go" false false; et "right" false false ])];
test_exp "(left | right) (up | down)"
[ ([ at "left"; at " "; at "up" ],
[ et "left" false false; et "up" false false ])
; ([ at "left"; at " "; at "down" ],
[ et "left" false false; et "down" false false ])
; ([ at "right"; at " "; at "up" ],
[ et "right" false false; et "up" false false ])
; ([ at "right"; at " "; at "down" ],
[ et "right" false false; et "down" false false ])
];
test_exp "((a|b)(c|d)) (e | f)"
[ ([ at "a"; at " "; at "c"; at " "; at "e" ],
[ et "a" false false; et "c" false false; et "e" false false ])
; ([ at "a"; at " "; at "c"; at " "; at "f" ],
[ et "a" false false; et "c" false false; et "f" false false ])
; ([ at "a"; at " "; at "d"; at " "; at "e" ],
[ et "a" false false; et "d" false false; et "e" false false ])
; ([ at "a"; at " "; at "d"; at " "; at "f" ],
[ et "a" false false; et "d" false false; et "f" false false ])
; ([ at "b"; at " "; at "c"; at " "; at "e" ],
[ et "b" false false; et "c" false false; et "e" false false ])
; ([ at "b"; at " "; at "c"; at " "; at "f" ],
[ et "b" false false; et "c" false false; et "f" false false ])
; ([ at "b"; at " "; at "d"; at " "; at "e" ],
[ et "b" false false; et "d" false false; et "e" false false ])
; ([ at "b"; at " "; at "d"; at " "; at "f" ],
[ et "b" false false; et "d" false false; et "f" false false ])
];
test_exp "(?left | right) (up | down)"
[ ([ at "up" ],
[ et "up" false false ])
; ([ at "down" ],
[ et "down" false false ])
; ([ at "left"; at " "; at "up" ],
[ et "left" false false; et "up" false false ])
; ([ at "left"; at " "; at "down" ],
[ et "left" false false; et "down" false false ])
; ([ at "right"; at " "; at "up" ],
[ et "right" false false; et "up" false false ])
; ([ at "right"; at " "; at "down" ],
[ et "right" false false; et "down" false false ])
];
test_exp "go (left | right) or ( up | down )"
[ ([ at "go"; at " "; at "left"; at " "; at "or"; at " "; at "up" ],
[ et "go" false false; et "left" false false; et "or" false false; et "up" false false ])
; ([ at "go"; at " "; at "left"; at " "; at "or"; at " "; at "down" ],
[ et "go" false false; et "left" false false; et "or" false false; et "down" false false ])
; ([ at "go"; at " "; at "right"; at " "; at "or"; at " "; at "up" ],
[ et "go" false false; et "right" false false; et "or" false false; et "up" false false ])
; ([ at "go"; at " "; at "right"; at " "; at "or"; at " "; at "down" ],
[ et "go" false false; et "right" false false; et "or" false false; et "down" false false ])
];
test_exp "and/or"
[ ([ at "and"; at "/"; at "or" ],
[ et "and" false true; et "/" true true; et "or" true false ])
];
test_exp ~neg:true "and/or"
[ ([ at "and"; at " "; at "/"; at " "; at "or" ],
[ et "and" false false; et "/" false false; et "or" false false ])
];
test_exp ~neg:true "and/or"
[ ([ at "and"; at " "; at "/"; at "or" ],
[ et "and" false false; et "/" false true; et "or" true false ])
];
test_exp ~neg:true "and/or"
[ ([ at "and"; at "/"; at " "; at "or" ],
[ et "and" false true; et "/" true false; et "or" false false ])
];
test_exp "and / or"
[ ([ at "and"; at " "; at "/"; at " "; at "or" ],
[ et "and" false false; et "/" false false; et "or" false false ])
];
test_exp ~neg:true "and / or"
[ ([ at "and"; at "/"; at "or" ],
[ et "and" false true; et "/" true true; et "or" true false ])
];
test_exp ~neg:true "and / or"
[ ([ at "and"; at " "; at "/"; at "or" ],
[ et "and" false false; et "/" false true; et "or" true false ])
];
test_exp ~neg:true "and / or"
[ ([ at "and"; at "/"; at " "; at "or" ],
[ et "and" false true; et "/" true false; et "or" false false ])
];
test_exp "(and)/ or"
[ ([ at "and"; at " "; at "/"; at " "; at "or" ],
[ et "and" false false; et "/" false false; et "or" false false ])
];
test_exp ~neg:true "(and)/ or"
[ ([ at "and"; at "/"; at " "; at "or" ],
[ et "and" false true; et "/" true false; et "or" false false ])
];
test_exp "and(/) or"
[ ([ at "and"; at " "; at "/"; at " "; at "or" ],
[ et "and" false false; et "/" false false; et "or" false false ])
];
test_exp ~neg:true "and(/) or"
[ ([ at "and"; at "/"; at " "; at "or" ],
[ et "and" false true; et "/" true false; et "or" false false ])
];
test_exp "and/(or)"
[ ([ at "and"; at "/"; at " "; at "or" ],
[ et "and" false true; et "/" true false; et "or" false false ])
];
test_exp ~neg:true "and/(or)"
[ ([ at "and"; at "/"; at "or" ],
[ et "and" false true; et "/" true true; et "or" true false ])
];
test_exp "go (left | right) and/or ( up | down )"
[ ([ at "go"; at " "; at "left"; at " "; at "and"; at "/"; at "or"; at " "; at "up" ],
[ et "go" false false; et "left" false false; et "and" false true; et "/" true true; et "or" true false; et "up" false false ])
; ([ at "go"; at " "; at "left"; at " "; at "and"; at "/"; at "or"; at " "; at "down" ],
[ et "go" false false; et "left" false false; et "and" false true; et "/" true true; et "or" true false; et "down" false false ])
; ([ at "go"; at " "; at "right"; at " "; at "and"; at "/"; at "or"; at " "; at "up" ],
[ et "go" false false; et "right" false false; et "and" false true; et "/" true true; et "or" true false; et "up" false false ])
; ([ at "go"; at " "; at "right"; at " "; at "and"; at "/"; at "or"; at " "; at "down" ],
[ et "go" false false; et "right" false false; et "and" false true; et "/" true true; et "or" true false; et "down" false false ])
];
test_exp "go (left | right) and / or ( up | down )"
[ ([ at "go"; at " "; at "left"; at " "; at "and"; at " "; at "/"; at " "; at "or"; at " "; at "up" ],
[ et "go" false false; et "left" false false; et "and" false false; et "/" false false; et "or" false false; et "up" false false ])
; ([ at "go"; at " "; at "left"; at " "; at "and"; at " "; at "/"; at " "; at "or"; at " "; at "down" ],
[ et "go" false false; et "left" false false; et "and" false false; et "/" false false; et "or" false false; et "down" false false ])
; ([ at "go"; at " "; at "right"; at " "; at "and"; at " "; at "/"; at " "; at "or"; at " "; at "up" ],
[ et "go" false false; et "right" false false; et "and" false false; et "/" false false; et "or" false false; et "up" false false ])
; ([ at "go"; at " "; at "right"; at " "; at "and"; at " "; at "/"; at " "; at "or"; at " "; at "down" ],
[ et "go" false false; et "right" false false; et "and" false false; et "/" false false; et "or" false false; et "down" false false ])
];
test_exp "go ?(left | right) ( up | down )"
[ ([ at "go"; at " "; at "up" ],
[ et "go" false false; et "up" false false ])
; ([ at "go"; at " "; at "down" ],
[ et "go" false false; et "down" false false ])
; ([ at "go"; at " "; at "left"; at " "; at "up" ],
[ et "go" false false; et "left" false false; et "up" false false ])
; ([ at "go"; at " "; at "left"; at " "; at "down" ],
[ et "go" false false; et "left" false false; et "down" false false ])
; ([ at "go"; at " "; at "right"; at " "; at "up" ],
[ et "go" false false; et "right" false false; et "up" false false ])
; ([ at "go"; at " "; at "right"; at " "; at "down" ],
[ et "go" false false; et "right" false false; et "down" false false ])
];
test_exp "go ?((left | right) or) ( up | down )"
[ ([ at "go"; at " "; at "up" ],
[ et "go" false false; et "up" false false ])
; ([ at "go"; at " "; at "down" ],
[ et "go" false false; et "down" false false ])
; ([ at "go"; at " "; at "left"; at " "; at "or"; at " "; at "up" ],
[ et "go" false false; et "left" false false; et "or" false false; et "up" false false ])
; ([ at "go"; at " "; at "left"; at " "; at "or"; at " "; at "down" ],
[ et "go" false false; et "left" false false; et "or" false false; et "down" false false ])
; ([ at "go"; at " "; at "right"; at " "; at "or"; at " "; at "up" ],
[ et "go" false false; et "right" false false; et "or" false false; et "up" false false ])
; ([ at "go"; at " "; at "right"; at " "; at "or"; at " "; at "down" ],
[ et "go" false false; et "right" false false; et "or" false false; et "down" false false ])
];
test_exp "go ?(?(left | right) or) ( up | down )"
[ ([ at "go"; at " "; at "up" ],
[ et "go" false false; et "up" false false ])
; ([ at "go"; at " "; at "down" ],
[ et "go" false false; et "down" false false ])
; ([ at "go"; at " "; at "or"; at " "; at "up" ],
[ et "go" false false; et "or" false false; et "up" false false ])
; ([ at "go"; at " "; at "or"; at " "; at "down" ],
[ et "go" false false; et "or" false false; et "down" false false ])
; ([ at "go"; at " "; at "left"; at " "; at "or"; at " "; at "up" ],
[ et "go" false false; et "left" false false; et "or" false false; et "up" false false ])
; ([ at "go"; at " "; at "left"; at " "; at "or"; at " "; at "down" ],
[ et "go" false false; et "left" false false; et "or" false false; et "down" false false ])
; ([ at "go"; at " "; at "right"; at " "; at "or"; at " "; at "up" ],
[ et "go" false false; et "right" false false; et "or" false false; et "up" false false ])
; ([ at "go"; at " "; at "right"; at " "; at "or"; at " "; at "down" ],
[ et "go" false false; et "right" false false; et "or" false false; et "down" false false ])
];
test_exp "go ?(?(left | right) or) or ( ?up | down )"
[ ([ at "go"; at " "; at "or" ],
[ et "go" false false; et "or" false false ])
; ([ at "go"; at " "; at "or"; at " "; at "up" ],
[ et "go" false false; et "or" false false; et "up" false false ])
; ([ at "go"; at " "; at "or"; at " "; at "down" ],
[ et "go" false false; et "or" false false; et "down" false false ])
; ([ at "go"; at " "; at "or"; at " "; at "or" ],
[ et "go" false false; et "or" false false; et "or" false false ])
; ([ at "go"; at " "; at "or"; at " "; at "or"; at " "; at "up" ],
[ et "go" false false; et "or" false false; et "or" false false; et "up" false false ])
; ([ at "go"; at " "; at "or"; at " "; at "or"; at " "; at "down" ],
[ et "go" false false; et "or" false false; et "or" false false; et "down" false false ])
; ([ at "go"; at " "; at "left"; at " "; at "or"; at " "; at "or" ],
[ et "go" false false; et "left" false false; et "or" false false; et "or" false false ])
; ([ at "go"; at " "; at "left"; at " "; at "or"; at " "; at "or"; at " "; at "up" ],
[ et "go" false false; et "left" false false; et "or" false false; et "or" false false; et "up" false false ])
; ([ at "go"; at " "; at "left"; at " "; at "or"; at " "; at "or"; at " "; at "down" ],
[ et "go" false false; et "left" false false; et "or" false false; et "or" false false; et "down" false false ])
; ([ at "go"; at " "; at "right"; at " "; at "or"; at " "; at "or" ],
[ et "go" false false; et "right" false false; et "or" false false; et "or" false false ])
; ([ at "go"; at " "; at "right"; at " "; at "or"; at " "; at "or"; at " "; at "up" ],
[ et "go" false false; et "right" false false; et "or" false false; et "or" false false; et "up" false false ])
; ([ at "go"; at " "; at "right"; at " "; at "or"; at " "; at "or"; at " "; at "down" ],
[ et "go" false false; et "right" false false; et "or" false false; et "or" false false; et "down" false false ])
];
test_exp "- - -"
[ ([ at "-"; at " "; at "-"; at " "; at "-" ],
[ et "-" false false; et "-" false false; et "-" false false ])
];
test_exp "-- -"
[ ([ at "-"; at "-"; at " "; at "-" ],
[ et "-" false true; et "-" true false; et "-" false false ])
];
test_exp "\\'abcd"
[ ([ at "'"; at "abcd" ],
[ et "'" false true; et "abcd" true false ])
];
test_exp "'abcd"
[ ([ atm `Exact; at "abcd" ],
[ et ~m:`Exact "abcd" false false ])
];
test_exp "' abcd"
[ ([ atm `Exact; at " "; at "abcd" ],
[ et "'" false false; et "abcd" false false ])
];
test_exp "' abcd"
[ ([ at "'"; at " "; at "abcd" ],
[ et "'" false false; et "abcd" false false ])
];
test_exp "\\^abcd"
[ ([ at "^"; at "abcd" ],
[ et "^" false true; et "abcd" true false ])
];
test_exp "^abcd"
[ ([ atm `Prefix; at "abcd" ],
[ et ~m:`Prefix "abcd" false false ])
];
test_exp "^ abcd"
[ ([ at "^"; at " "; at "abcd" ],
[ et "^" false false; et "abcd" false false ])
];
test_exp "^ abcd"
[ ([ atm `Prefix; at " "; at "abcd" ],
[ et "^" false false; et "abcd" false false ])
];
test_exp "abcd$"
[ ([ at "abcd"; atm `Suffix ],
[ et ~m:`Suffix "abcd" false false ])
];
test_exp "abcd $"
[ ([ at "abcd"; at " "; at "$" ],
[ et "abcd" false false; et "$" false false ])
];
test_exp "abcd $"
[ ([ at "abcd"; at " "; atm `Suffix ],
[ et "abcd" false false; et "$" false false ])
];
test_exp "''abcd"
[ ([ atm `Exact; atm `Exact; at "abcd" ],
[ et ~m:`Exact "'" false true; et ~m:`Exact "abcd" true false ])
];
test_exp "^^abcd"
[ ([ atm `Prefix; at "^"; at "abcd" ],
[ et ~m:`Exact "^" false true; et ~m:`Prefix "abcd" true false ])
];
test_exp "abcd$$"
[ ([ at "abcd"; at "$"; atm `Suffix ],
[ et ~m:`Suffix "abcd" false true; et ~m:`Exact "$" true false ])
];
test_exp "abcd$$"
[ ([ at "abcd"; atm `Suffix; atm `Suffix ],
[ et ~m:`Suffix "abcd" false true; et ~m:`Exact "$" true false ])
];
test_exp "'ab~cd"
[ ([ atm `Exact; at "ab"; ats; at "cd" ],
[ et ~m:`Exact "ab" false true
; ets ~m:`Exact true true
; et ~m:`Exact "cd" true false ])
];
test_exp "^ab~cd$"
[ ([ atm `Prefix; at "ab"; ats; at "cd"; atm `Suffix ],
[ et ~m:`Exact "ab" false true
; ets ~m:`Exact true true
; et ~m:`Exact "cd" true true
; et ~m:`Prefix "$" true false
])
];
test_exp "ab~cd$"
[ ([ at "ab"; ats; at "cd"; atm `Suffix ],
[ et ~m:`Suffix "ab" false true
; ets ~m:`Exact true true
; et ~m:`Exact "cd" true false
])
];
test_exp " ~cd$"
[ ([ ats; at "cd"; atm `Suffix ],
[ ets ~m:`Suffix false true
; et ~m:`Exact "cd" true false
])
];
test_exp "'^abcd efgh$$ ij$kl$"
[ ([ atm `Exact
; atm `Prefix
; at "abcd"
; at " "
; at "efgh"
; atm `Suffix
; atm `Suffix
; at " "
; at "ij"
; atm `Suffix
; at "kl"
; atm `Suffix
],
[ et ~m:`Exact "^" false true
; et ~m:`Exact "abcd" true false
; et ~m:`Suffix "efgh" false true
; et ~m:`Exact "$" true false
; et ~m:`Suffix "ij" false true
; et ~m:`Exact "$" true true
; et ~m:`Exact "kl" true false
])
];
test_exp "'^abcd efgh$$ ij$kl$"
[ ([ atm `Exact
; at "^"
; at "abcd"
; at " "
; at "efgh"
; at "$"
; atm `Suffix
; at " "
; at "ij"
; at "$"
; at "kl"
; atm `Suffix
],
[ et ~m:`Exact "^" false true
; et ~m:`Exact "abcd" true false
; et ~m:`Suffix "efgh" false true
; et ~m:`Exact "$" true false
; et ~m:`Suffix "ij" false true
; et ~m:`Exact "$" true true
; et ~m:`Exact "kl" true false
])
];
()
let suite =
[
Alcotest.test_case "corpus" `Quick corpus;
]
end
================================================
FILE: tests/test_utils.ml
================================================
open Docfd_lib
let enriched_token_testable : (module Alcotest.TESTABLE with type t = Search_phrase.Enriched_token.t) =
(module Search_phrase.Enriched_token)
let search_phrase_testable : (module Alcotest.TESTABLE with type t = Search_phrase.t) =
(module Search_phrase)
================================================
FILE: tests/utils_tests.ml
================================================
open Docfd_lib
module Alco = struct
let normalize_path_to_absolute_corpus () =
let test expected input =
Alcotest.(check string)
(Printf.sprintf "%s becomes %s" input expected)
expected
(Misc_utils'.normalize_path_to_absolute input)
in
let cwd = Sys.getcwd () in
let test' expected input =
test (if expected = "" then cwd else Filename.concat cwd expected) input
in
test "/" "/..";
test "/" "/..";
test "/" "/abcd/..";
test "/" "/abcd/..";
test "/abc" "/abc/";
test "/abc" "/abc/def/..";
test "/abc" "/abc//";
test "/abc/def" "/abc//def";
test "/abc/def" "/abc/./def";
test "/abc/def" "/abc/.///def/.";
test "/def" "/abc/.//../def/.";
test' "" "abc/..";
test' "def" "abc/../def";
test' "abc/def" "abc////.//./././/def";
()
let normalize_glob_to_absolute_corpus () =
let test expected input =
Alcotest.(check string)
(Printf.sprintf "%s becomes %s" input expected)
expected
(Misc_utils'.normalize_glob_to_absolute input)
in
let cwd = Sys.getcwd () in
let test' expected input =
test (if expected = "" then cwd else Filename.concat cwd expected) input
in
test "/" "/..";
test "/" "/..";
test "/" "/abcd/..";
test "/" "/abcd/..";
test "/abc" "/abc/";
test "/abc" "/abc/def/..";
test "/abc" "/abc/*/..";
test "/abc/**/.." "/abc/**/..";
test "/abc/**/def" "/abc/**/def";
test "/**/def/*/.." "/abc/../**/def/*/..";
test "/abc/**/def" "/abc/.////**/def";
test "/abc" "/abc//";
test "/abc/def" "/abc//def";
test "/abc/def" "/abc/./def";
test "/abc/def" "/abc/.///def/.";
test "/def" "/abc/.//../def/.";
test' "" "abc/..";
test' "def" "abc/../def";
test' "abc/def" "abc////.//./././/def";
test' "abc/**/../def" "abc/**/../def";
test' "abc/**/../def" "abc/*/../**/../def";
test' "abc/*/def" "abc/*/*/../def";
()
let suite =
[
Alcotest.test_case
"normalize_path_to_absolute_corpus"
`Quick
normalize_path_to_absolute_corpus;
Alcotest.test_case
"normalize_glob_to_absolute_corpus"
`Quick
normalize_glob_to_absolute_corpus;
]
end
================================================
FILE: update-version-string.py
================================================
import os
import re
ml_path = "bin/version_string.ml"
version = os.environ.get('DOCFD_VERSION_OVERRIDE')
if version is None or version == "":
with open("CHANGELOG.md") as f:
for line in f:
if line.startswith("## ") and not ("future release" in line.lower()):
version = line.split(" ")[1].strip()
break
print(f"Detected version for Docfd: {version}")
print(f"Writing to {ml_path}")
with open(ml_path, "w") as f:
f.write(f"let s = \"{version}\"")
f.write("\n")