Repository: heywhy/ex_elasticlunr
Branch: master
Commit: 0fcb8dede8ac
Files: 61
Total size: 136.6 KB
Directory structure:
gitextract_q80yq73w/
├── .credo.exs
├── .formatter.exs
├── .github/
│ ├── FUNDING.yml
│ ├── pull_request_template.md
│ └── workflows/
│ ├── dialyzer.yml
│ └── test.yml
├── .gitignore
├── .vscode/
│ └── extensions.json
├── BACKERS.md
├── LICENSE
├── README.md
├── coveralls.json
├── docs.livemd
├── lib/
│ └── elasticlunr/
│ ├── application.ex
│ ├── core/
│ │ ├── document_store.ex
│ │ ├── field.ex
│ │ ├── index.ex
│ │ └── token.ex
│ ├── db.ex
│ ├── deserializer.ex
│ ├── dsl/
│ │ ├── query/
│ │ │ ├── bool_query.ex
│ │ │ ├── match_all_query.ex
│ │ │ ├── match_query.ex
│ │ │ ├── not_query.ex
│ │ │ └── terms_query.ex
│ │ ├── query.ex
│ │ └── query_repository.ex
│ ├── manager/
│ │ └── index_manager.ex
│ ├── pipeline/
│ │ ├── stemmer.ex
│ │ ├── stop_word_filter.ex
│ │ └── trimmer.ex
│ ├── pipeline.ex
│ ├── protocol_implementations.ex
│ ├── serializer.ex
│ ├── storage/
│ │ ├── blackhole.ex
│ │ ├── disk.ex
│ │ └── provider.ex
│ ├── storage.ex
│ ├── tokenizer.ex
│ ├── utils/
│ │ └── process.ex
│ └── utlis.ex
├── mix.exs
└── test/
├── core/
│ ├── document_store_test.exs
│ ├── field_test.exs
│ └── index_test.exs
├── deserializer_test.exs
├── dsl_test.exs
├── manager/
│ └── index_manager_test.exs
├── pipeline/
│ ├── stemmer_test.exs
│ ├── stop_word_filter_test.exs
│ └── trimmer_test.exs
├── pipeline_test.exs
├── serializer_test.exs
├── storage/
│ └── disk_test.exs
├── storage_test.exs
├── support/
│ └── fixture/
│ ├── fixture.ex
│ ├── stemmer_fixture.json
│ ├── users.data
│ └── users.index
├── test_helper.exs
└── tokenizer_test.exs
================================================
FILE CONTENTS
================================================
================================================
FILE: .credo.exs
================================================
# This file contains the configuration for Credo and you are probably reading
# this after creating it with `mix credo.gen.config`.
#
# If you find anything wrong or unclear in this file, please report an
# issue on GitHub: https://github.com/rrrene/credo/issues
#
%{
#
# You can have as many configs as you like in the `configs:` field.
configs: [
%{
#
# Run any config using `mix credo -C <name>`. If no config name is given
# "default" is used.
#
name: "default",
#
# These are the files included in the analysis:
files: %{
#
# You can give explicit globs or simply directories.
# In the latter case `**/*.{ex,exs}` will be used.
#
included: [
"lib/",
"src/",
"test/"
],
excluded: [~r"/_build/", ~r"/deps/", ~r"/node_modules/"]
},
#
# Load and configure plugins here:
#
plugins: [],
#
# If you create your own checks, you must specify the source files for
# them here, so they can be loaded by Credo before running the analysis.
#
requires: [],
#
# If you want to enforce a style guide and need a more traditional linting
# experience, you can change `strict` to `true` below:
#
strict: false,
#
# To modify the timeout for parsing files, change this value:
#
parse_timeout: 5000,
#
# If you want to use uncolored output by default, you can change `color`
# to `false` below:
#
color: true,
#
# You can customize the parameters of any check by adding a second element
# to the tuple.
#
# To disable a check put `false` as second element:
#
# {Credo.Check.Design.DuplicatedCode, false}
#
checks: [
#
## Consistency Checks
#
{Credo.Check.Consistency.ExceptionNames, []},
{Credo.Check.Consistency.LineEndings, []},
{Credo.Check.Consistency.ParameterPatternMatching, []},
{Credo.Check.Consistency.SpaceAroundOperators, []},
{Credo.Check.Consistency.SpaceInParentheses, []},
{Credo.Check.Consistency.TabsOrSpaces, []},
#
## Design Checks
#
# You can customize the priority of any check
# Priority values are: `low, normal, high, higher`
#
{Credo.Check.Design.AliasUsage,
[priority: :low, if_nested_deeper_than: 2, if_called_more_often_than: 0]},
# You can also customize the exit_status of each check.
# If you don't want TODO comments to cause `mix credo` to fail, just
# set this value to 0 (zero).
#
{Credo.Check.Design.TagTODO, [exit_status: 2]},
{Credo.Check.Design.TagFIXME, []},
#
## Readability Checks
#
{Credo.Check.Readability.AliasOrder, []},
{Credo.Check.Readability.FunctionNames, []},
{Credo.Check.Readability.LargeNumbers, []},
{Credo.Check.Readability.MaxLineLength, [priority: :low, max_length: 120]},
{Credo.Check.Readability.ModuleAttributeNames, []},
{Credo.Check.Readability.ModuleDoc, false},
{Credo.Check.Readability.ModuleNames, []},
{Credo.Check.Readability.ParenthesesInCondition, []},
{Credo.Check.Readability.ParenthesesOnZeroArityDefs, []},
{Credo.Check.Readability.PredicateFunctionNames, []},
{Credo.Check.Readability.PreferImplicitTry, []},
{Credo.Check.Readability.RedundantBlankLines, []},
{Credo.Check.Readability.Semicolons, []},
{Credo.Check.Readability.SpaceAfterCommas, []},
{Credo.Check.Readability.StringSigils, []},
{Credo.Check.Readability.TrailingBlankLine, []},
{Credo.Check.Readability.TrailingWhiteSpace, []},
{Credo.Check.Readability.UnnecessaryAliasExpansion, []},
{Credo.Check.Readability.VariableNames, []},
#
## Refactoring Opportunities
#
{Credo.Check.Refactor.CondStatements, []},
{Credo.Check.Refactor.CyclomaticComplexity, []},
{Credo.Check.Refactor.FunctionArity, []},
{Credo.Check.Refactor.LongQuoteBlocks, []},
# {Credo.Check.Refactor.MapInto, []},
{Credo.Check.Refactor.MatchInCondition, []},
{Credo.Check.Refactor.NegatedConditionsInUnless, []},
{Credo.Check.Refactor.NegatedConditionsWithElse, []},
{Credo.Check.Refactor.Nesting, []},
{Credo.Check.Refactor.UnlessWithElse, []},
{Credo.Check.Refactor.WithClauses, []},
#
## Warnings
#
{Credo.Check.Warning.ApplicationConfigInModuleAttribute, []},
{Credo.Check.Warning.BoolOperationOnSameValues, []},
{Credo.Check.Warning.ExpensiveEmptyEnumCheck, []},
{Credo.Check.Warning.IExPry, []},
{Credo.Check.Warning.IoInspect, []},
# {Credo.Check.Warning.LazyLogging, []},
{Credo.Check.Warning.MixEnv, false},
{Credo.Check.Warning.OperationOnSameValues, []},
{Credo.Check.Warning.OperationWithConstantResult, []},
{Credo.Check.Warning.RaiseInsideRescue, []},
{Credo.Check.Warning.UnusedEnumOperation, []},
{Credo.Check.Warning.UnusedFileOperation, []},
{Credo.Check.Warning.UnusedKeywordOperation, []},
{Credo.Check.Warning.UnusedListOperation, []},
{Credo.Check.Warning.UnusedPathOperation, []},
{Credo.Check.Warning.UnusedRegexOperation, []},
{Credo.Check.Warning.UnusedStringOperation, []},
{Credo.Check.Warning.UnusedTupleOperation, []},
{Credo.Check.Warning.UnsafeExec, []},
#
# Checks scheduled for next check update (opt-in for now, just replace `false` with `[]`)
#
# Controversial and experimental checks (opt-in, just replace `false` with `[]`)
#
{Credo.Check.Consistency.MultiAliasImportRequireUse, false},
{Credo.Check.Consistency.UnusedVariableNames, false},
{Credo.Check.Design.DuplicatedCode, false},
{Credo.Check.Readability.AliasAs, false},
{Credo.Check.Readability.BlockPipe, false},
{Credo.Check.Readability.ImplTrue, false},
{Credo.Check.Readability.MultiAlias, false},
{Credo.Check.Readability.SeparateAliasRequire, false},
{Credo.Check.Readability.SinglePipe, false},
{Credo.Check.Readability.Specs, false},
{Credo.Check.Readability.StrictModuleLayout, false},
{Credo.Check.Readability.WithCustomTaggedTuple, false},
{Credo.Check.Refactor.ABCSize, false},
{Credo.Check.Refactor.AppendSingleItem, false},
{Credo.Check.Refactor.DoubleBooleanNegation, false},
{Credo.Check.Refactor.ModuleDependencies, false},
{Credo.Check.Refactor.NegatedIsNil, false},
{Credo.Check.Refactor.PipeChainStart, false},
{Credo.Check.Refactor.VariableRebinding, false},
{Credo.Check.Warning.LeakyEnvironment, false},
{Credo.Check.Warning.MapGetUnsafePass, false},
{Credo.Check.Warning.UnsafeToAtom, false}
#
# Custom checks can be created using `mix credo.gen.check`.
#
]
}
]
}
================================================
FILE: .formatter.exs
================================================
# Used by "mix format"
[
inputs: ["{mix,.formatter}.exs", "{config,lib,test}/**/*.{ex,exs}"]
]
================================================
FILE: .github/FUNDING.yml
================================================
# These are supported funding model platforms
github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
patreon: atandarash
open_collective: # Replace with a single Open Collective username
ko_fi: # Replace with a single Ko-fi username
tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
liberapay: # Replace with a single Liberapay username
issuehunt: # Replace with a single IssueHunt username
otechie: # Replace with a single Otechie username
lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
================================================
FILE: .github/pull_request_template.md
================================================
## Overview
_Write a short description of what your PR does_
## Related Issues
_List all related issues. Add linking prefixes (closes, fixes, resolves, etc.)_
## TODO
_Write down what steps need to be done for this PR._
- [ ] Update PR to include task link
- [ ] Add error handling
- [ ] Add Loom video demo
- [ ] GitHub Actions are all passing
- [ ] Ensure your PR has been reviewed and you have also implemented all feedback requested
- [ ] Update PR label to the right stage
## Testing
### How to test:
_Write down steps needed, if any, to test your PR locally in case the preview links do not work_
1. Create an index
2. Add these documents
...
### What to test:
_Write down a checklist for others to copy and tick when testing your PR_
- [ ] Searching for "me" or other variations works
================================================
FILE: .github/workflows/dialyzer.yml
================================================
name: Dialyzer
on: push
jobs:
analyze:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: erlef/setup-beam@v1
id: beam
with:
otp-version: "24"
elixir-version: "1.12"
- name: Restore Cached Dependencies
uses: actions/cache@v2
id: mix-cache
with:
path: |
deps
_build
key: ${{ runner.os }}-${{ steps.beam.outputs.elixir-version }}-${{ steps.beam.outputs.otp-version }}-${{ hashFiles('mix.lock') }}
- name: Restore PLT cache
uses: actions/cache@v2
id: plt-cache
with:
key: |
${{ runner.os }}-${{ steps.beam.outputs.elixir-version }}-${{ steps.beam.outputs.otp-version }}-plt
restore-keys: |
${{ runner.os }}-${{ steps.beam.outputs.elixir-version }}-${{ steps.beam.outputs.otp-version }}-plt
path: |
priv/plts
- name: Install Dependencies
if: steps.mix-cache.outputs.cache-hit != 'true'
run: |
mix deps.get
- name: Create PLTs
if: steps.plt-cache.outputs.cache-hit != 'true'
run: mix dialyzer --plt
- name: Run dialyzer
run: mix dialyzer
================================================
FILE: .github/workflows/test.yml
================================================
name: Test
on: push
env:
MIX_ENV: test
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: erlef/setup-beam@v1
id: beam
with:
otp-version: "24"
elixir-version: "1.12"
- name: Restore Cached Dependencies
uses: actions/cache@v2
id: mix-cache
with:
path: |
deps
_build
key: ${{ runner.os }}-${{ steps.beam.outputs.elixir-version }}-${{ steps.beam.outputs.otp-version }}-${{ hashFiles('mix.lock') }}
- name: Install Dependencies
if: steps.mix-cache.outputs.cache-hit != 'true'
run: |
mix deps.get
- name: Check Codebase Standard
run: |
mix format --check-formatted
mix credo
- name: Run Tests
run: |
mix coveralls.json
- name: Upload Coverage Reports
uses: codecov/codecov-action@v2
with:
directory: ./cover
fail_ci_if_error: true
================================================
FILE: .gitignore
================================================
# The directory Mix will write compiled artifacts to.
/_build/
# If you run "mix test --cover", coverage assets end up here.
/cover/
# The directory Mix downloads your dependencies sources to.
/deps/
# Where third-party dependencies like ExDoc output generated docs.
/doc/
# Ignore .fetch files in case you like to edit your project deps locally.
/.fetch
# If the VM crashes, it generates a dump, let's ignore it too.
erl_crash.dump
# Also ignore archive artifacts (built via "mix archive.build").
*.ez
# Ignore package tarball (built via "mix hex.build").
elasticlunr-*.tar
# Temporary files for e.g. tests
/tmp
# Generated files for Dialyxir
/priv/plts/*.plt
/priv/plts/*.plt.hash
================================================
FILE: .vscode/extensions.json
================================================
{
"recommendations": [
"pantajoe.vscode-elixir-credo",
"jakebecker.elixir-ls",
"pgourlain.erlang"
]
}
================================================
FILE: BACKERS.md
================================================
<h1 align="center">Sponsors & Backers</h1>
Elasticlunr is an MIT-licensed open source project. It's an independent project with its ongoing development made possible entirely thanks to the support by these awesome [backers](https://github.com/heywhy/ex_elasticlunr/blob/dev/BACKERS.md). If you'd like to join them, please consider:
- [Become a backer or sponsor on Patreon](https://www.patreon.com/atandarash).
================================================
FILE: LICENSE
================================================
MIT License
Copyright (c) 2021 Atanda Rasheed
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================
FILE: README.md
================================================
# Elasticlunr
[](https://github.com/heywhy/ex_elasticlunr/actions) [](https://github.com/heywhy/ex_elasticlunr/actions) [](https://codecov.io/gh/heywhy/ex_elasticlunr)
Elasticlunr is a small, full-text search library for use in the Elixir environment. It indexes JSON documents and provides a friendly search interface to retrieve documents.
## Why
The library is built for web applications that do not require the deployment complexities of popular search engines while taking advantage of the Beam capabilities.
Imagine how much is gained when the search functionality of your application resides in the same environment (Beam VM) as your business logic; search resolves faster, the number of services (Elasticsearch, Solr, and so on) to monitor reduces.
## Installation
The library can be installed by adding `elasticlunr` to your list of dependencies in mix.exs:
```elixir
def deps do
[
{:elasticlunr, "~> 0.6"}
]
end
```
Documentation can be found at [hexdocs.pm](https://hexdocs.pm/elasticlunr). See blog post [Introduction to Elasticlunr](https://atandarash.me/blog/introduction-to-elasticlunr) and [Livebook](#livebook) for examples.
## Features
1. Query-Time Boosting, you don't need to set up boosting weight in the index building procedure, Query-Time Boosting makes it more flexible so you could try different boosting schemes
2. More Rational Scoring Mechanism, Elasticlunr uses a similar scoring mechanism as Elasticsearch, and also this scoring mechanism is used by Lucene
3. Field-Search, you can choose which field to index and which field to search
4. Boolean Model, you can set which field to search and the boolean model for each query token, such as "OR" and "AND"
5. Combined Boolean Model, TF/IDF Model, and the Vector Space Model make the results ranking more reliable.
## Token Expansion
Sometimes users want to expand a query token to increase RECALL. For example, user query token is "micro", and assume "microwave" and "microscope" are in the index, if the user chooses to expand the query token "micro" to increase RECALL, both "microwave" and "microscope" will be returned and search in the index. The query results from expanded tokens are penalized because they are not the same as the query token.
## Livebook
The repository includes a livebook file that you can run. You can click the button below to run it using [livebook.dev](https://livebook.dev)!
[](https://livebook.dev/run?url=https%3A%2F%2Fgithub.com%2Fheywhy%2Fex_elasticlunr%2Fblob%2Fmaster%2Fdocs.livemd)
## Storage
Elasticlunr allows you to write your indexes to whatever storage provider you want. You don't need to acess the `Elasticlunr.Storage` module directly, it is used by the `Elasticlunr.IndexManager`. See available providers below:
* [Blackhole](https://github.com/heywhy/ex_elasticlunr/blob/master/lib/elasticlunr/storage/blackhole.ex)
* [Disk](https://github.com/heywhy/ex_elasticlunr/blob/master/lib/elasticlunr/storage/disk.ex)
* [S3](https://github.com/heywhy/ex_elasticlunr_s3)
To configure what provider to use:
```elixir
config :elasticlunr,
storage: Elasticlunr.Storage.S3
```
Note that all indexes in storage are preloaded on application startup. To see the available provider configuration, you should reference it module.
## License
Elasticlunr is released under the MIT License - see the [LICENSE](https://github.com/heywhy/ex_elasticlunr/blob/master/LICENSE) file.
================================================
FILE: coveralls.json
================================================
{
"coverage_options": {
"treat_no_relevant_lines_as_covered": true,
"minimum_coverage": 90
}
}
================================================
FILE: docs.livemd
================================================
# Elasticlunr
## Description
Elasticlunr is a small, full-text search library for use in the Elixir environment. It indexes JSON documents and provides a friendly search interface to retrieve documents.
The library is built for web applications that do not require the deployment complexities of popular search engines while taking advantage of the Beam capabilities.
Imagine how much is gained when the search functionality of your application resides in the same environment (Beam VM) as your business logic; search resolves faster, the number of services (Elasticsearch, Solr, and so on) to monitor reduces.
## Getting Started
```elixir
Mix.install([
{:kino, "~> 0.4"},
{:elasticlunr, "~> 0.6"}
])
```
## What's an Index?
An index is a collection of structured data that is referred to when looking for results that are relevant to a specific query.
In RDBMS, a table can be likened to an index, meaning that you can store, update, delete and search documents in an index. But the difference here is that an index has a pipeline that every JSON document passes through before it becomes searchable.
```elixir
alias Elasticlunr.{Index, Pipeline}
# the library comes with a default set of pipeline functions
pipeline = Pipeline.new(Pipeline.default_runners())
index = Index.new(pipeline: pipeline)
```
The above code block creates a new index with a pipeline of default functions that work with the English language.
The new index does not define the expected structure of the JSON documents to be indexed. To fix this, let's assume we are building an index of blog posts, and each post consists of the `author`, `content`, `category`, and `title` attributes.
```elixir
index =
index
|> Index.add_field("title")
|> Index.add_field("author")
|> Index.add_field("content")
|> Index.add_field("category")
```
## Indexing Documents
Following our example or use-case above, to make the blog posts searchable we need to add them to the index so that they can be analyzed and transformed appropriately.
```elixir
documents = [
%{
"id" => 1,
"author" => "Mark Ericksen",
"title" => "Saving and Restoring LiveView State using the Browser",
"category" => "elixir liveview browser",
"content" =>
"There are multiple ways to save and restore state for your LiveView processes. You can use an external cache like Redis, your database, or even the browser itself. Sometimes there are situations where you either can’t or don’t want to store the state on the server. In situations like that, you do have the option of storing the state in the user’s browser. This post explains how you use the browser to store state and how your LiveView process can get it back later. We’ll go through the code so you can add something similar to your own project. We cover what data to store, how to do it securely, and restoring the state on demand."
},
%{
"id" => 2,
"author" => "Mika Kalathil",
"title" => "Creating Reusable Ecto Code",
"category" => "elixir ecto sql",
"content" =>
"Creating a highly reusable Ecto API is one of the ways we can create long-term sustainable code for ourselves, while growing it with our application to allow for infinite combination possibilites and high code reusability. If we write our Ecto code correctly, we can not only have a very well defined split between query definition and combination/execution using our context but also have the ability to re-use the queries we design individually, together with others to create larger complex queries."
},
%{
"id" => 3,
"author" => "Mark Ericksen",
"title" => "ThinkingElixir 079: Collaborative Music in LiveView with Nathan Willson",
"category" => "elixir podcast liveview",
"content" =>
"In episode 79 of Thinking Elixir, we talk with Nathan Willson about GEMS, his collaborative music generator written in LiveView. He explains how it’s built, the JS sound library integrations, what could be done by Phoenix and what is done in the browser. Nathan shares how he deployed it globally to 10 regions using Fly.io. We go over some of the challenges he overcame creating an audio focused web application. It’s a fun open-source project that pushes the boundaries of what we think LiveView apps can do!"
},
%{
"id" => 4,
"title" => "ThinkingElixir 078: Logflare with Chase Granberry",
"author" => "Mark Ericksen",
"category" => "elixir podcast logging logflare",
"content" =>
"In episode 78 of Thinking Elixir, we talk with Chase Granberry about Logflare. We learn why Chase started the company, what Logflare does, how it’s built on Elixir, about their custom Elixir logger, where the data is stored, how it’s queried, and more! We talk about dealing with the constant stream of log data, how Logflare is collecting and displaying metrics, and talk more about Supabase acquiring the company!"
}
]
index = Index.add_documents(index, documents)
```
## Search Index
The search results is a list of maps and each map contains specific keys, `matched`, `positions`, `ref`, and `score`. See the definitions below:
* **matched:** this field tells the number of attributes where the given query matches
* **score:** the value shows how well the document ranks compared to other documents
* **ref:** this is the document id
* **positions:** this is a map that shows the positions of the matching words in the document
```elixir
search_query = Kino.Input.text("Search", default: "elixir")
```
```elixir
search_query = Kino.Input.read(search_query)
results = Index.search(index, search_query)
```
**NB:** Don't forget to fiddle with the search input.
## Nested Document Attributes
As seen in the earlier example all documents indexed were without nested attributes. But Imagine a situation where your data source returns documents with nested attributes, and you want to search by these attributes - it's possible with Elasticlunr by specifying the top-level attribute.
Let's say our data source returns a list of users with their address which is an object and you want to index this information so that you can query them.
```elixir
# the library comes with a default set of pipeline functions
pipeline = Pipeline.new(Pipeline.default_runners())
users_index =
Index.new(pipeline: pipeline)
|> Index.add_field("name")
|> Index.add_field("address")
|> Index.add_field("education")
```
Automatically, Elasticlunr will flatten the nested attributes to the level that when using the advanced query DSL you can use dot notation to filter the search results. Now, let's add a few user objects to the index:
```elixir
documents = [
%{
"id" => 1,
"name" => "rose mary",
"education" => "BSc.",
"address" => %{
"line1" => "Brooklyn Street",
"line2" => "4181",
"city" => "Portland",
"state" => "Oregon",
"country" => "USA"
}
},
%{
"id" => 2,
"name" => "jason richard",
"education" => "Msc.",
"address" => %{
"line1" => "Crown Street",
"line2" => "2057",
"city" => "St Malo",
"state" => "Quebec",
"country" => "CA"
}
},
%{
"id" => 3,
"name" => "peters book",
"education" => "BSc.",
"address" => %{
"line1" => "Murry Street",
"line2" => "2285",
"city" => "Norfolk",
"state" => "Virginia",
"country" => "USA"
}
},
%{
"id" => 4,
"name" => "jason mount",
"education" => "Highschool",
"address" => %{
"line1" => "Aspen Court",
"line2" => "2057",
"city" => "Boston",
"state" => "Massachusetts",
"country" => "USA"
}
}
]
users_index = Index.add_documents(users_index, documents)
```
```elixir
search_query = Kino.Input.text("Search users", default: "jason murry")
```
```elixir
search_query = Kino.Input.read(search_query)
Index.search(users_index, search_query)
```
## Index Manager
The manager includes different CRUD functions to help you manage your index after mutating the state. First of all, let's get indexes to manage by the manager:
```elixir
alias Elasticlunr.IndexManager
IndexManager.loaded_indices()
```
As seen above the list is empty. Now let's add an index:
```elixir
IndexManager.save(users_index)
IndexManager.loaded_indices()
|> Enum.any?(&(&1 == users_index.name))
|> IO.inspect(label: :users_index_exists)
IndexManager.loaded_indices()
```
The manager now has the `users_index` in memory for access.
## Query DSL
Like every other search engine, you can make more advanced search queries depending on your
requirements, and I'm pleased to tell you that Elasticlunr has not left out such capabilities.
So, in the proceeding parts of this docs, I will be highlighting the available query types
provided by the library and how you can use them.
It's important to note that Elasticlunr tries to replicate popular Query DSL (Domain Specific Language)
with the same behavior as Elasticsearch, which means the learning curve reduces if you have
experience using the search engine. For Elasticlunr, there are the `bool`, `match`, `match_all`,
`not`, and `terms` query types you can use to retrieve insights about an index.
## Bool
The `bool` query is used with a combination of queries to retrieve documents matching the boolean
combinations of clauses. Consider these clauses to be everything that comes after the `SELECT`
statement in relational databases.
<!-- livebook:{"break_markdown":true} -->
The `bool` query is built using one or more clauses to achieve desired results, and each clause
has its type, see below:
Clause | Description
---|---
`must` | The clause must appear in the matching documents, and this affects the document's score.
`must_not` | The clause must not appear in the matching document. Scoring is ignored because the clause is executed in the filter context.
`filter` | Like `must`, the clause must appear in the matching documents but scoring is ignored for the query.
`should` | The clause should appear in the matching document.
It's important to note that only scores from the `must` and `should` clauses contribute to the
final score of the matching document.
```elixir
Index.search(index, %{
"query" => %{
"bool" => %{
"must" => %{
"terms" => %{"content" => "use"}
},
"should" => %{
"terms" => %{"category" => "elixir"}
},
"filter" => %{
"match" => %{
"id" => 3
}
},
"must_not" => %{
"match" => %{
"author" => "mika"
}
},
"minimum_should_match" => 1
}
}
})
```
You can use the minimum_should_match parameter to specify the number or percentage of should
clauses returned documents must match.
If the bool query includes at least one should clause and no must or filter clauses, the default
value is 1. Otherwise, the default value is 0.
## Match
The `match` query is the standard query used for full-text search, including support for fuzzy
matching. The provided text is analyzed before matching it against documents.
```elixir
Index.search(index, %{
"query" => %{
"match" => %{
"content" => %{
"query" => "liveview browser"
}
}
}
})
```
A `match` query accepts one or more top-level fields you wish to search, in the example above,
it's the `content` field. Note that when you have more than one top-level fields, the `match`
query is rewritten to a `bool` query internally by the library. Now, let's see what parameters
are accepted by the `match` query below:
Parameter | Description
---|---
`query` | String you wish to find in the provided field.
`expand` | Increase token recall, see [token expansion](https://github.com/heywhy/ex_elasticlunr#token-expansion).
`fuzziness` | Maximum edit distance allowed for matching.
`boost` | Floating point number used to decrease or increase the relevance scores of a query. Defaults to 1.0.
`operator` | The boolean operator used to interpret the `query` value. Available values for the `operator` option are `or` and `and`. Defaults to `or`.
`minimum_should_match` | Minimum number of clauses that a document must match for it to be returned.
## Match All
The most simple query, which matches all documents, gives them a score of 1.0 each.
Parameter | Description
---|---
`boost` | Floating point number used to decrease or increase the relevance scores of a query. Defaults to 1.0.
```elixir
Index.search(index, %{
"query" => %{
"match_all" => %{}
}
})
```
## Not
The `not` query inverts the result of the nested query giving the matched documents a score of
1.0 each.
```elixir
Index.search(index, %{
"query" => %{
"not" => %{
"match" => %{
"content" => %{
"query" => "elixir"
}
}
}
}
})
```
## Terms
The query return documents that contain the exact terms in a given field. The `terms` query should
be used to find documents based on a precise value such as a price, a product ID, or a username.
```elixir
Index.search(index, %{
"query" => %{
"terms" => %{
"content" => %{
"value" => "think"
}
}
}
})
```
A `terms` query accepts one or more top-level fields you wish to search, in the example above,
it's the `content` field. Note that when you have more than one top-level fields, the `terms`
query is rewritten to a `bool` query internally by the library. Now, let's see what parameters
are accepted by the `terms` query below:
Parameter | Description
---|---
`value` | A term you wish to find in the provided field. The term must match exactly the field value to return a document.
`boost` | Floating point number used to decrease or increase the relevance scores of a query. Defaults to 1.0.
================================================
FILE: lib/elasticlunr/application.ex
================================================
defmodule Elasticlunr.Application do
# See https://hexdocs.pm/elixir/Application.html
# for more information on OTP Applications
@moduledoc false
use Application
alias Elasticlunr.IndexManager
@impl true
def start(_type, _args) do
children = [
{Registry, name: Elasticlunr.IndexRegistry, keys: :unique},
{DynamicSupervisor, name: Elasticlunr.IndexSupervisor, strategy: :one_for_one}
# Starts a worker by calling: Elasticlunr.Worker.start_link(arg)
# {Elasticlunr.Worker, arg}
]
# See https://hexdocs.pm/elixir/Supervisor.html
# for other strategies and supported options
opts = [strategy: :one_for_one, name: Elasticlunr.Supervisor]
case Supervisor.start_link(children, opts) do
{:ok, _} = result ->
:ok = IndexManager.preload()
result
err ->
err
end
end
end
================================================
FILE: lib/elasticlunr/core/document_store.ex
================================================
defmodule Elasticlunr.DocumentStore do
alias Elasticlunr.{Field, Index}
defstruct save: true, documents: %{}, document_info: %{}, length: 0
@type t :: %__MODULE__{
save: boolean(),
documents: map(),
document_info: map(),
length: pos_integer()
}
@spec new(boolean()) :: t()
def new(save \\ true) do
struct!(%__MODULE__{}, %{save: save})
end
@spec add(t(), Field.document_ref(), map()) :: t()
def add(%__MODULE__{documents: documents, length: length, save: save} = store, ref, document) do
length =
case exists?(store, ref) do
true ->
length
false ->
length + 1
end
documents =
case save do
true ->
Map.put(documents, ref, document)
false ->
Map.put(documents, ref, nil)
end
%{store | length: length, documents: documents}
end
@spec get(t(), Field.document_ref()) :: map() | nil
def get(%__MODULE__{documents: documents}, ref), do: Map.get(documents, ref)
@spec remove(t(), Field.document_ref()) :: map() | nil
def remove(
%__MODULE__{document_info: document_info, documents: documents, length: length} = store,
ref
) do
case exists?(store, ref) do
true ->
length = length - 1
documents = Map.delete(documents, ref)
document_info = Map.delete(document_info, ref)
%{store | document_info: document_info, documents: documents, length: length}
false ->
store
end
end
@spec exists?(t(), Field.document_ref()) :: boolean()
def exists?(%__MODULE__{documents: documents}, ref), do: Map.has_key?(documents, ref)
@spec add_field_length(t(), Field.document_ref(), Index.document_field(), pos_integer()) :: t()
def add_field_length(%__MODULE__{document_info: document_info} = store, ref, field, length) do
case exists?(store, ref) do
false ->
store
true ->
info =
document_info
|> Map.get(ref, %{})
|> Map.put(field, length)
document_info = Map.put(document_info, ref, info)
%{store | document_info: document_info}
end
end
@spec update_field_length(t(), Field.document_ref(), Index.document_field(), pos_integer()) ::
t()
def update_field_length(%__MODULE__{} = store, ref, field, length),
do: add_field_length(store, ref, field, length)
@spec get_field_length(t(), Field.document_ref(), Index.document_field()) :: pos_integer()
def get_field_length(%__MODULE__{document_info: document_info} = store, ref, field) do
case exists?(store, ref) do
false ->
nil
true ->
document_info
|> Map.get(ref, %{})
|> Map.get(field)
end
end
@spec reset(t(), boolean()) :: t()
def reset(%__MODULE__{}, save \\ true), do: new(save)
end
================================================
FILE: lib/elasticlunr/core/field.ex
================================================
defmodule Elasticlunr.Field do
alias Elasticlunr.{DB, Pipeline, Token, Utils}
@fields ~w[db name pipeline query_pipeline store store_positions]a
@enforce_keys @fields
defstruct @fields
@type flnorm :: integer() | float()
@type t :: %__MODULE__{
db: DB.t(),
name: String.t(),
pipeline: Pipeline.t() | nil,
query_pipeline: Pipeline.t() | nil,
store: boolean(),
store_positions: boolean()
}
@type document_ref :: atom() | binary()
@type document :: %{id: document_ref(), content: binary()}
@type token_info :: %{
term: term,
tf: map(),
idf: map(),
flnorm: flnorm(),
documents: map()
}
@spec new(keyword) :: t()
def new(opts) do
attrs = [
db: Keyword.get(opts, :db),
name: Keyword.get(opts, :name),
pipeline: Keyword.get(opts, :pipeline),
store: Keyword.get(opts, :store_documents, false),
query_pipeline: Keyword.get(opts, :query_pipeline),
store_positions: Keyword.get(opts, :store_positions, false)
]
struct!(__MODULE__, attrs)
end
@spec documents(t()) :: list(document_ref())
def documents(%__MODULE__{db: db, name: name}) do
case DB.match_object(db, {{:field_ids, name, :_}}) do
[] ->
[]
ids ->
Stream.map(ids, fn {{:field_ids, _, id}} -> id end)
end
end
@spec term_frequency(t(), binary()) :: map()
def term_frequency(%__MODULE__{} = field, term) do
tf_lookup(field, term)
end
@spec has_token(t(), binary()) :: boolean()
def has_token(%__MODULE__{} = field, term) do
DB.member?(field.db, {:field_idf, field.name, term})
end
@spec get_token(t(), binary()) :: token_info() | nil
def get_token(%__MODULE__{} = field, term) do
case idf_lookup(field, term) do
nil ->
nil
_ ->
flnorm = flnorm_lookup(field)
to_field_token(field, term, flnorm)
end
end
@spec set_query_pipeline(t(), module()) :: t()
def set_query_pipeline(%__MODULE__{} = field, pipeline) do
%{field | query_pipeline: pipeline}
end
@spec add(t(), list(document())) :: t()
def add(%__MODULE__{pipeline: pipeline} = field, documents) do
Enum.each(documents, fn %{id: id, content: content} ->
unless DB.member?(field.db, {:field_ids, field.name, id}) do
tokens = Pipeline.run(pipeline, content)
add_id(field, id)
update_field_stats(field, id, tokens)
end
end)
recalculate_idf(field)
end
@spec length(t(), atom()) :: pos_integer()
def length(%__MODULE__{db: db, name: name}, :ids) do
fun = [{{{:field_ids, name, :_}}, [], [true]}]
DB.select_count(db, fun)
end
@spec length(t(), atom(), String.t()) :: pos_integer()
def length(%__MODULE__{db: db, name: name}, :term, term) do
fun = [
{{{:field_term, name, term, :_}, :_}, [], [true]}
]
DB.select_count(db, fun)
end
def length(%__MODULE__{db: db, name: name}, :tf, term) do
fun = [
{{{:field_tf, name, term, :_}, :_}, [], [true]}
]
DB.select_count(db, fun)
end
def length(%__MODULE__{db: db, name: name}, :idf, term) do
fun = [
{{{:field_idf, name, term}, :_}, [], [true]}
]
DB.select_count(db, fun)
end
@spec update(t(), list(document())) :: t()
def update(%__MODULE__{} = field, documents) do
document_ids = Enum.map(documents, & &1.id)
field
|> remove(document_ids)
|> add(documents)
end
@spec remove(t(), list(document_ref())) :: t()
def remove(%__MODULE__{db: db, name: name} = field, document_ids) do
Enum.each(document_ids, fn id ->
true = DB.match_delete(db, {{:field_term, name, :_, id}, :_})
true = DB.match_delete(db, {{:field_tf, name, :_, id}, :_})
true = DB.match_delete(db, {{:field_idf, name, :_}, :_})
true = DB.delete(db, {:field_ids, name, id})
end)
recalculate_idf(field)
end
@spec analyze(t(), any(), keyword) :: list(Token.t())
def analyze(%__MODULE__{pipeline: pipeline, query_pipeline: query_pipeline}, content, options) do
case Keyword.get(options, :is_query, false) && not is_nil(query_pipeline) do
true ->
Pipeline.run(query_pipeline, content)
false ->
Pipeline.run(pipeline, content)
end
end
@spec terms(t(), keyword()) :: any()
def terms(%__MODULE__{} = field, query) do
fuzz = Keyword.get(query, :fuzziness, 0)
msm = Keyword.get(query, :minimum_should_match, 1)
terms = terms_lookup(field)
matching_docs =
Stream.map(query[:terms], fn
%Regex{} = re -> re
val -> to_token(val)
end)
|> Enum.reduce(%{}, fn
%Regex{} = re, matching_docs ->
matched_terms = Stream.filter(terms, &Regex.match?(re, elem(&1, 0)))
Enum.reduce(matched_terms, matching_docs, fn {term, _, _}, matching_docs ->
ids = matching_ids(field, term)
filter_ids(field, ids, term, matching_docs, query)
end)
%Token{token: term}, matching_docs ->
matching_docs =
case fuzz == 0 && length(field, :term, term) > 0 do
true ->
ids = matching_ids(field, term)
filter_ids(field, ids, term, matching_docs, query)
false ->
matching_docs
end
match_with_fuzz(field, term, fuzz, query, matching_docs)
end)
if msm <= 1 do
matching_docs
else
matching_docs
|> Stream.filter(fn {_key, content} ->
Enum.count(content) >= msm
end)
|> Enum.into(%{})
end
end
@spec tokens(Elasticlunr.Field.t()) :: Enumerable.t()
def tokens(%__MODULE__{} = field) do
flnorm = flnorm_lookup(field)
unique_terms_lookup(field)
|> Stream.map(fn {term, _, _} ->
to_field_token(field, term, flnorm)
end)
end
defp update_field_stats(%{db: db, name: name} = field, id, tokens) do
Enum.each(tokens, fn token ->
%Token{token: term} = token
term_attrs = term_lookup(field, term, id)
term_attrs =
case Token.get_position(token) do
nil ->
term_attrs
position ->
%{term_attrs | positions: term_attrs.positions ++ [position]}
end
term_attrs = %{term_attrs | total: term_attrs.total + 1}
true = DB.insert(db, {{:field_term, name, term, id}, term_attrs})
true = DB.insert(db, {{:field_tf, name, term, id}, :math.sqrt(term_attrs.total)})
end)
end
defp add_id(%{db: db, name: name}, id) do
true = DB.insert(db, {{:field_ids, name, id}})
end
defp matched_documents_for_term(%{db: db, name: name}, term) do
db
|> DB.match_object({{:field_term, name, term, :_}, :_})
|> Stream.map(fn {{:field_term, _, _, id}, _} -> id end)
end
defp term_lookup(%{db: db, name: name}, term, id) do
case DB.match_object(db, {{:field_term, name, term, id}, :_}) do
[] ->
%{total: 0, positions: []}
[{_, attrs}] ->
attrs
end
end
defp terms_lookup(%{db: db, name: name}) do
db
|> DB.match_object({{:field_term, name, :_, :_}, :_})
|> Stream.map(&termify/1)
end
defp terms_lookup(%{db: db, name: name}, term) do
db
|> DB.match_object({{:field_term, name, term, :_}, :_})
|> Stream.map(&termify/1)
end
defp termify({{:field_term, _, term, id}, attrs}), do: {term, id, attrs}
defp tf_lookup(%{db: db, name: name}, term) do
case DB.match_object(db, {{:field_tf, name, term, :_}, :_}) do
[] ->
nil
terms ->
terms
|> Stream.map(fn {{:field_tf, _, _, id}, count} ->
{id, count}
end)
end
end
defp tf_lookup(%{db: db, name: name}, term, id) do
case DB.match_object(db, {{:field_tf, name, term, id}, :_}) do
[] ->
nil
[{{:field_tf, _, _, id}, count}] ->
{id, count}
end
end
defp idf_lookup(%{db: db, name: name}, term) do
case DB.match_object(db, {{:field_idf, name, term}, :_}) do
[] ->
nil
[{{:field_idf, _, _}, value}] ->
value
end
end
defp flnorm_lookup(%{db: db, name: name}) do
case DB.lookup(db, {:field_flnorm, name}) do
[] ->
1
[{{:field_flnorm, _}, value}] ->
value
end
end
defp unique_terms_lookup(field) do
terms_lookup(field)
|> Stream.uniq_by(&elem(&1, 0))
end
defp recalculate_idf(field) do
terms = unique_terms_lookup(field)
terms_length = Enum.count(terms)
ids_length = length(field, :ids)
flnorm =
case terms_length > 0 do
true ->
1 / :math.sqrt(terms_length)
false ->
0
end
:ok =
terms
|> Task.async_stream(fn {term, _id, _attrs} ->
count = length(field, :term, term) + 1
value = 1 + :math.log10(ids_length / count)
true = DB.insert(field.db, {{:field_idf, field.name, term}, value})
end)
|> Stream.run()
true = DB.insert(field.db, {{:field_flnorm, field.name}, flnorm})
field
end
defp filter_ids(field, ids, term, matching_docs, query) do
docs = Keyword.get(query, :docs)
case docs do
docs when is_list(docs) ->
Stream.filter(ids, &(&1 in docs))
_ ->
ids
end
|> get_matching_docs(field, term, matching_docs)
end
defp get_matching_docs(docs, field, term, matching_docs) do
docs
|> Enum.reduce(matching_docs, fn id, matching_docs ->
matched =
matching_docs
|> Map.get(id, [])
|> Kernel.++([extract_matched(field, term, id)])
Map.put(matching_docs, id, matched)
end)
end
defp match_with_fuzz(field, term, fuzz, query, matching_docs) when fuzz > 0 do
field
|> unique_terms_lookup()
|> Enum.reduce(matching_docs, fn {key, _id, _attr}, matching_docs ->
if Utils.levenshtein_distance(key, term) <= fuzz do
ids = matching_ids(field, term)
filter_ids(field, ids, key, matching_docs, query)
else
matching_docs
end
end)
end
defp match_with_fuzz(_field, _term, _fuzz, _query, matching_docs), do: matching_docs
defp matching_ids(field, term) do
terms_lookup(field, term)
|> Stream.map(&elem(&1, 1))
end
defp get_content(_field, _id) do
nil
end
defp extract_matched(field, term, id) do
attrs = term_lookup(field, term, id)
positions = Map.get(attrs, :positions)
{^id, tf} = tf_lookup(field, term, id)
%{
tf: tf,
ref: id,
positions: positions,
norm: flnorm_lookup(field),
idf: idf_lookup(field, term),
content: get_content(field, id)
}
end
defp to_token(%Token{} = token), do: token
defp to_token(token), do: Token.new(token)
defp to_field_token(field, term, flnorm) do
%{
term: term,
norm: flnorm,
tf: length(field, :tf, term),
idf: idf_lookup(field, term),
documents: matched_documents_for_term(field, term)
}
end
end
================================================
FILE: lib/elasticlunr/core/index.ex
================================================
defmodule Elasticlunr.Index.IdPipeline do
@moduledoc false
alias Elasticlunr.{Pipeline, Token}
@behaviour Pipeline
@impl true
def call(%Token{} = token), do: token
end
defmodule Elasticlunr.Index do
alias Elasticlunr.{DB, Field, Pipeline}
alias Elasticlunr.Index.IdPipeline
alias Elasticlunr.Dsl.{Query, QueryRepository}
alias Uniq.UUID
@fields ~w[db fields name ref pipeline documents_size store_positions store_documents]a
@enforce_keys @fields
defstruct @fields
@type document_field :: atom() | binary()
@type t :: %__MODULE__{
db: DB.t(),
fields: map(),
documents_size: integer(),
ref: Field.document_ref(),
pipeline: Pipeline.t(),
name: atom() | binary(),
store_positions: boolean(),
store_documents: boolean()
}
@type search_query :: binary() | map()
@type search_result :: any()
@spec new(keyword()) :: t()
def new(opts \\ []) do
ref = Keyword.get(opts, :ref, "id")
pipeline = Keyword.get_lazy(opts, :pipeline, &Pipeline.new/0)
name = Keyword.get_lazy(opts, :name, &UUID.uuid4/0)
db_name = String.to_atom("elasticlunr_#{name}")
db = DB.init(db_name, ~w[ordered_set public]a)
id_field = Field.new(db: db, name: ref, pipeline: Pipeline.new([IdPipeline]))
fields = Map.put(%{}, to_string(ref), id_field)
attrs = %{
db: db,
documents_size: 0,
ref: ref,
fields: fields,
pipeline: pipeline,
name: name,
store_documents: Keyword.get(opts, :store_documents, true),
store_positions: Keyword.get(opts, :store_positions, true)
}
struct!(__MODULE__, attrs)
end
@spec add_field(t(), document_field(), keyword()) :: t()
def add_field(
%__MODULE__{
db: db,
fields: fields,
pipeline: pipeline,
store_positions: store_positions,
store_documents: store_documents
} = index,
field,
opts \\ []
)
when is_binary(field) do
opts =
opts
|> Keyword.put(:db, db)
|> Keyword.put(:name, field)
|> Keyword.put_new(:pipeline, pipeline)
|> Keyword.put_new(:store_documents, store_documents)
|> Keyword.put_new(:store_positions, store_positions)
%{index | fields: Map.put(fields, field, Field.new(opts))}
end
@spec update_field(t(), document_field(), Field.t()) :: t()
def update_field(%__MODULE__{fields: fields} = index, name, %Field{} = field) do
if not Map.has_key?(fields, name) do
raise "Unknown field #{name} in index"
end
update_documents_size(%{index | fields: Map.put(fields, name, field)})
end
@spec get_fields(t()) :: list(Field.document_ref() | document_field())
def get_fields(%__MODULE__{fields: fields}), do: Map.keys(fields)
@spec get_field(t(), document_field()) :: Field.t()
def get_field(%__MODULE__{fields: fields}, field) do
Map.get(fields, field)
end
@spec save_document(t(), boolean()) :: t()
def save_document(%__MODULE__{fields: fields} = index, save) do
fields =
fields
|> Enum.map(fn {key, field} -> {key, %{field | store: save}} end)
|> Enum.into(%{})
%{index | fields: fields}
end
@spec add_documents(t(), list(map())) :: t()
def add_documents(%__MODULE__{fields: fields, ref: ref} = index, documents) do
:ok = persist(fields, ref, documents, &Field.add/2)
update_documents_size(index)
end
@spec update_documents(t(), list(map())) :: t()
def update_documents(%__MODULE__{ref: ref, fields: fields} = index, documents) do
:ok = persist(fields, ref, documents, &Field.update/2)
update_documents_size(index)
end
@spec remove_documents(t(), list(Field.document_ref())) :: t()
def remove_documents(%__MODULE__{fields: fields} = index, document_ids) do
Enum.each(fields, fn {_, field} ->
Field.remove(field, document_ids)
end)
update_documents_size(index)
end
@spec analyze(t(), document_field(), any(), keyword()) :: Enumerable.t()
def analyze(%__MODULE__{fields: fields}, field, content, options) do
fields
|> Map.get(field)
|> Field.analyze(content, options)
end
@spec terms(t(), keyword()) :: Enumerable.t()
def terms(%__MODULE__{fields: fields}, query) do
field = Keyword.get(query, :field)
fields
|> Map.get(field)
|> Field.terms(query)
end
@spec all(t()) :: list(Field.document_ref())
def all(%__MODULE__{ref: ref, fields: fields}) do
fields
|> Map.get(ref)
|> Field.documents()
end
@spec update_documents_size(t()) :: t()
def update_documents_size(%__MODULE__{fields: fields} = index) do
size =
Enum.reduce(fields, 0, fn {_, field}, acc ->
size = Field.length(field, :ids)
if size > acc do
size
else
acc
end
end)
%{index | documents_size: size}
end
@spec search(t(), search_query(), map() | nil) :: list(search_result())
def search(index, query, opts \\ nil)
def search(%__MODULE__{}, nil, _opts), do: []
def search(%__MODULE__{ref: ref} = index, query, nil) when is_binary(query) do
fields = get_fields(index)
matches =
fields
|> Enum.reject(&(&1 == ref))
|> Enum.map(fn field ->
%{"match" => %{field => query}}
end)
elasticsearch(index, %{
"query" => %{
"bool" => %{
"should" => matches
}
}
})
end
def search(%__MODULE__{ref: ref} = index, query, %{"fields" => fields}) when is_binary(query) do
matches =
fields
|> Enum.filter(fn field ->
with true <- field != ref,
true <- Map.has_key?(fields, field),
%{"boost" => boost} <- Map.get(fields, field) do
boost > 0
end
end)
|> Enum.map(fn field ->
%{"boost" => boost} = Map.get(fields, field)
match = %{field => query}
%{"match" => match, "boost" => boost}
end)
elasticsearch(index, %{
"query" => %{
"bool" => %{
"should" => matches
}
}
})
end
def search(%__MODULE__{} = index, %{"query" => _} = query, _opts),
do: elasticsearch(index, query)
def search(%__MODULE__{} = index, query, nil) when is_map(query),
do: search(index, query, %{"operator" => "OR"})
def search(%__MODULE__{} = index, %{} = query, options) do
matches =
query
|> Enum.map(fn {field, content} ->
expand = Map.get(options, "expand", false)
operator =
options
|> Map.get("bool", "or")
|> String.downcase()
%{
"expand" => expand,
"match" => %{"operator" => operator, field => content}
}
end)
elasticsearch(index, %{
"query" => %{
"bool" => %{
"should" => matches
}
}
})
end
defp elasticsearch(index, %{"query" => root}) do
{key, value} = Query.split_root(root)
query = QueryRepository.parse(key, value, root)
query
|> QueryRepository.score(index)
|> Enum.sort(fn a, b -> a.score > b.score end)
end
defp elasticsearch(_index, _query) do
raise "Root object must have a query element"
end
defp flatten_document(document, prefix \\ "") do
Enum.reduce(document, %{}, fn
{key, value}, transformed when is_map(value) ->
mapped = flatten_document(value, "#{prefix}#{key}.")
Map.merge(transformed, mapped)
{key, value}, transformed ->
Map.put(transformed, "#{prefix}#{key}", value)
end)
end
defp persist(fields, ref, documents, persist_fn) do
Task.async_stream(documents, fn document ->
document = flatten_document(document)
save(fields, ref, document, persist_fn)
end)
|> Stream.run()
end
defp save(fields, ref, document, callback) do
Enum.each(fields, fn {attribute, field} ->
if document[attribute] do
data = [
%{id: document[ref], content: document[attribute]}
]
callback.(field, data)
end
end)
end
end
================================================
FILE: lib/elasticlunr/core/token.ex
================================================
defmodule Elasticlunr.Token do
defstruct ~w[token metadata]a
@type t :: %__MODULE__{
token: binary(),
metadata: map()
}
@spec new(binary(), map()) :: t()
def new(token, metadata \\ %{}) do
struct!(__MODULE__, token: token, metadata: metadata)
end
@spec update(t(), keyword()) :: t()
def update(%__MODULE__{token: str, metadata: metadata} = token, opts) do
opts =
opts
|> Keyword.put_new(:token, str)
|> Keyword.put_new(:metadata, metadata)
struct!(token, opts)
end
@spec get_position(t()) :: {integer(), integer()} | nil
def get_position(%__MODULE__{metadata: %{start: start, end: end_1}}), do: {start, end_1}
def get_position(%__MODULE__{metadata: %{}}), do: nil
end
================================================
FILE: lib/elasticlunr/db.ex
================================================
defmodule Elasticlunr.DB do
defstruct [:name, :options]
@type t :: %__MODULE__{
name: atom(),
options: list(atom())
}
@spec init(atom(), list()) :: t()
def init(name, opts \\ []) when is_atom(name) do
default = ~w[compressed named_table]a
options = Enum.uniq(default ++ opts)
unless Enum.member?(:ets.all(), name) do
:ets.new(name, options)
end
struct!(__MODULE__, name: name, options: options)
end
@spec delete(t(), term()) :: boolean()
def delete(%__MODULE__{name: name}, pattern), do: :ets.delete(name, pattern)
@spec destroy(t()) :: boolean()
def destroy(%__MODULE__{name: name}) do
if Enum.member?(:ets.all(), name) do
:ets.delete(name)
else
true
end
end
@spec insert(t(), term()) :: boolean()
def insert(%__MODULE__{name: name}, data), do: :ets.insert(name, data)
@spec lookup(t(), term()) :: list(term())
def lookup(%__MODULE__{name: name}, key), do: :ets.lookup(name, key)
@spec member?(t(), term()) :: boolean()
def member?(%__MODULE__{name: name}, key), do: :ets.member(name, key)
@spec match_delete(t(), term()) :: boolean()
def match_delete(%__MODULE__{name: name}, pattern), do: :ets.match_delete(name, pattern)
@spec match_object(t(), term()) :: list(term())
def match_object(%__MODULE__{name: name}, spec), do: :ets.match_object(name, spec)
@spec select_count(t(), term()) :: pos_integer()
def select_count(%__MODULE__{name: name}, spec), do: :ets.select_count(name, spec)
@spec from(t(), keyword()) :: {:ok, t()}
def from(%__MODULE__{name: name} = db, file: file) do
with true <- File.exists?(file),
{:ok, ^name} <- :dets.open_file(name, file: file),
true <- :ets.from_dets(name, name) do
{:ok, db}
end
end
@spec to(t(), keyword()) :: :ok
def to(%__MODULE__{name: name}, file: file) do
unless Enum.member?(:dets.all(), name) do
:dets.open_file(name, ram_file: true, file: file)
end
with ^name <- :ets.to_dets(name, name) do
:dets.close(name)
end
end
end
================================================
FILE: lib/elasticlunr/deserializer.ex
================================================
defprotocol Elasticlunr.Deserializer do
@spec deserialize(Enum.t()) :: Elasticlunr.Index.t()
def deserialize(data)
end
defmodule Elasticlunr.Deserializer.Parser do
alias Elasticlunr.{Index, Pipeline}
@spec process(Enum.t()) :: Index.t()
def process(data) do
Enum.reduce(data, nil, fn line, acc ->
[command | opts] =
String.trim(line)
|> String.split("#")
case parse(command, acc, opts) do
{%Index{}, _extra} = acc ->
acc
%Index{} = index ->
index
end
end)
|> case do
{%Index{} = index, _} ->
index
result ->
result
end
end
defp parse(command, acc, [opts]), do: parse(command, acc, opts)
defp parse("settings", nil, opts) do
opts = to_options(opts)
{_, pipeline_map} =
opts[:pipeline]
|> String.split(",")
|> Enum.reduce({0, %{}}, fn callback, {index, map} ->
{index + 1, Map.put(map, to_string(index), String.to_atom(callback))}
end)
opts = Keyword.replace(opts, :pipeline, parse_pipeline(opts[:pipeline]))
{Index.new(opts), %{pipeline: pipeline_map}}
end
defp parse("db", acc, _), do: acc
defp parse("field", {index, extra}, opts) do
opts = to_options(opts)
opts =
Enum.map(opts, fn
{:pipeline, value} ->
{:pipeline, parse_pipeline(value, extra[:pipeline])}
option ->
option
end)
index = Index.add_field(index, opts[:name], opts)
{index, extra}
end
defp parse(_, acc, _), do: acc
defp parse_pipeline(option, cache \\ %{}) do
callbacks =
option
|> String.split(",")
|> Enum.map(fn callback ->
Map.get_lazy(cache, callback, fn -> String.to_atom(callback) end)
end)
Pipeline.new(callbacks)
end
defp to_options(options) when is_binary(options) do
String.split(options, "|")
|> Enum.reduce([], fn option, acc ->
[key | values] = String.split(option, ":")
[value] = values
Keyword.put(acc, String.to_atom(key), parse_value(value))
end)
end
defp parse_value("true"), do: true
defp parse_value("false"), do: false
defp parse_value(val), do: val
end
================================================
FILE: lib/elasticlunr/dsl/query/bool_query.ex
================================================
defmodule Elasticlunr.Dsl.BoolQuery do
use Elasticlunr.Dsl.Query
alias Elasticlunr.Index
alias Elasticlunr.Dsl.{NotQuery, Query, QueryRepository}
defstruct ~w[rewritten should must must_not filter minimum_should_match]a
@type clause :: struct() | list(struct())
@type t :: %__MODULE__{
filter: clause(),
should: clause(),
must: nil | struct(),
must_not: nil | struct(),
rewritten: boolean(),
minimum_should_match: integer()
}
@spec new(keyword) :: t()
def new(opts) do
attrs = %{
should: Keyword.get(opts, :should, []),
must: Keyword.get(opts, :must),
must_not: Keyword.get(opts, :must_not),
filter: Keyword.get(opts, :filter),
rewritten: Keyword.get(opts, :rewritten, false),
minimum_should_match: extract_minimum_should_match(opts)
}
struct!(__MODULE__, attrs)
end
@impl true
def rewrite(
%__MODULE__{
filter: filter,
must: must,
must_not: must_not,
should: should,
minimum_should_match: minimum_should_match
},
%Index{} = index
) do
should =
should
|> Kernel.||([])
|> Enum.map(&QueryRepository.rewrite(&1, index))
must =
case must do
nil ->
nil
mod when is_struct(mod) ->
QueryRepository.rewrite(mod, index)
end
filters = filter || []
filters =
case must_not do
nil ->
filters
must_not when is_struct(must_not) ->
query =
must_not
|> QueryRepository.rewrite(index)
|> NotQuery.new()
[query] ++ filters
end
|> Enum.map(&QueryRepository.rewrite(&1, index))
opts = [
must: must,
should: should,
filter: filters,
rewritten: true,
minimum_should_match: minimum_should_match
]
new(opts)
end
@impl true
def score(%__MODULE__{rewritten: false} = query, %Index{} = index, options) do
query
|> rewrite(index)
|> score(index, options)
end
def score(
%__MODULE__{
must: must,
filter: filter,
should: should,
minimum_should_match: minimum_should_match
},
%Index{} = index,
_options
) do
filter_results = filter_result(filter, index)
filter_results = filter_must(must, filter_results, index)
{docs, filtered} =
case filter_results do
false ->
{%{}, nil}
value ->
Enum.reduce(value, {%{}, []}, fn %{ref: ref, score: score}, {docs, filtered} ->
filtered = [ref] ++ filtered
doc = %{
ref: ref,
matched: 0,
positions: %{},
score: score || 0
}
docs = Map.put(docs, ref, doc)
{docs, filtered}
end)
end
{docs, _filtered} =
should
|> Enum.reduce({docs, filtered}, fn query, {docs, filtered} ->
opts =
case filtered do
nil ->
[]
filtered ->
[filtered: filtered]
end
results = QueryRepository.score(query, index, opts)
docs =
results
|> Enum.reduce(docs, fn doc, docs ->
ob =
Map.get(docs, doc.ref, %{
ref: doc.ref,
score: 0,
matched: 0,
positions: %{}
})
%{matched: matched, score: score, positions: positions} = ob
# credo:disable-for-lines:3
positions =
Map.get(doc, :positions, %{})
|> Enum.reduce(positions, fn {field, tokens}, positions ->
p = Map.get(positions, field, [])
p = Enum.reduce(tokens, p, &(&2 ++ [&1]))
Map.put(positions, field, p)
end)
doc_score = Map.get(doc, :score, 0)
ob = %{ob | positions: positions, matched: matched + 1, score: score + doc_score}
Map.put(docs, doc.ref, ob)
end)
{docs, filtered}
end)
docs
|> Stream.map(&elem(&1, 1))
|> Stream.filter(fn doc -> doc.matched >= minimum_should_match && doc.score > 0 end)
end
defp filter_result(nil, _index), do: false
defp filter_result([], _index), do: false
defp filter_result(filter, index) do
filter
|> Enum.reduce(false, fn query, acc ->
q =
case acc do
false ->
[]
val ->
[filtered: Enum.map(val, & &1.ref)]
end
QueryRepository.filter(query, index, q)
end)
end
defp filter_must(nil, filter_results, _index), do: filter_results
defp filter_must(must_query, filter_results, index) when is_struct(must_query) do
q =
case filter_results do
false ->
[]
results ->
[filtered: Enum.map(results, & &1.ref)]
end
QueryRepository.score(must_query, index, q)
end
@impl true
def parse(options, _query_options, repo) do
default_mapper = fn query ->
case Query.split_root(query) do
{key, value} ->
repo.parse(key, value, query)
_ ->
repo.parse("match_all", [])
end
end
[]
|> patch_options(:should, options, default_mapper)
|> patch_options(:filter, options, default_mapper)
|> patch_options(:must, options, repo)
|> patch_options(:must_not, options, repo)
|> patch_options(:minimum_should_match, options)
|> __MODULE__.new()
end
defp patch_options(opts, :should, options, mapper) do
case Map.get(options, "should") do
nil ->
opts
should when is_list(should) ->
should =
should
|> Enum.map(mapper)
Keyword.put(opts, :should, should)
should ->
Keyword.put(opts, :should, [mapper.(should)])
end
end
defp patch_options(opts, :filter, options, mapper) do
case Map.get(options, "filter") do
nil ->
opts
filter when is_list(filter) ->
filter = Enum.map(filter, mapper)
Keyword.put(opts, :filter, filter)
filter ->
Keyword.put(opts, :filter, [mapper.(filter)])
end
end
defp patch_options(opts, :must, options, repo) do
case Map.get(options, "must") do
nil ->
opts
must when is_map(must) ->
{key, options} = Query.split_root(must)
must = repo.parse(key, options, must)
Keyword.put(opts, :must, must)
end
end
defp patch_options(opts, :must_not, options, repo) do
case Map.get(options, "must_not") do
nil ->
opts
must_not ->
{key, options} = Query.split_root(must_not)
q = repo.parse(key, options, must_not)
Keyword.put(opts, :must_not, q)
end
end
defp patch_options(opts, :minimum_should_match, options) do
options
|> Map.get("minimum_should_match")
|> case do
nil ->
opts
value when is_integer(value) ->
value <= Keyword.get(opts, :should) |> Enum.count()
end
|> case do
true ->
minimum_should_match = Map.get(options, "minimum_should_match")
Keyword.put(opts, :minimum_should_match, minimum_should_match)
_ ->
opts
end
end
defp extract_minimum_should_match(opts) do
default_value =
case not is_empty_clause?(opts[:should]) and
(is_empty_clause?(opts[:must]) or is_empty_clause?(opts[:filter])) do
true -> 1
false -> 0
end
Keyword.get(opts, :minimum_should_match, default_value)
end
defp is_empty_clause?(nil), do: true
defp is_empty_clause?(list) when is_list(list), do: Enum.empty?(list)
defp is_empty_clause?(%{}), do: false
end
================================================
FILE: lib/elasticlunr/dsl/query/match_all_query.ex
================================================
defmodule Elasticlunr.Dsl.MatchAllQuery do
use Elasticlunr.Dsl.Query
alias Elasticlunr.Index
defstruct ~w[boost]a
@type t :: %__MODULE__{boost: integer()}
def new(boost \\ 1), do: struct!(__MODULE__, boost: boost)
@impl true
def parse(options, _query_options, _repo) do
options
|> Map.get("boost", 1)
|> __MODULE__.new()
end
@impl true
def score(%__MODULE__{boost: boost}, %Index{} = index, _options) do
doc_ids = Index.all(index)
Stream.map(doc_ids, &%{ref: &1, score: 1.0 * boost})
end
end
================================================
FILE: lib/elasticlunr/dsl/query/match_query.ex
================================================
defmodule Elasticlunr.Dsl.MatchQuery do
use Elasticlunr.Dsl.Query
alias Elasticlunr.{Index}
alias Elasticlunr.Dsl.{MatchAllQuery, Query, QueryRepository, TermsQuery}
defstruct ~w[expand field query boost fuzziness minimum_should_match operator]a
@type t :: %__MODULE__{
expand: boolean(),
boost: integer(),
field: Index.document_field(),
query: any(),
fuzziness: integer(),
operator: binary(),
minimum_should_match: pos_integer()
}
@spec new(keyword) :: t()
def new(opts) do
attrs = %{
expand: Keyword.get(opts, :expand, false),
field: Keyword.get(opts, :field, ""),
query: Keyword.get(opts, :query, ""),
boost: Keyword.get(opts, :boost, 1),
fuzziness: Keyword.get(opts, :fuzziness, 0),
operator: Keyword.get(opts, :operator, "or"),
minimum_should_match: Keyword.get(opts, :minimum_should_match, 1)
}
struct!(__MODULE__, attrs)
end
@impl true
def rewrite(
%__MODULE__{
boost: boost,
field: field,
query: query,
expand: expand,
operator: operator,
fuzziness: fuzziness,
minimum_should_match: minimum_should_match
},
%Index{} = index
) do
tokens = Index.analyze(index, field, query, is_query: true)
tokens_length = length(tokens)
cond do
tokens_length > 1 ->
minimum_should_match =
case operator == "and" && minimum_should_match == 0 do
true ->
tokens_length
false ->
minimum_should_match
end
TermsQuery.new(
field: field,
expand: expand,
terms: tokens,
fuzziness: fuzziness,
boost: boost,
minimum_should_match: minimum_should_match
)
tokens_length == 1 ->
TermsQuery.new(
field: field,
expand: expand,
terms: tokens,
fuzziness: fuzziness,
boost: boost
)
true ->
MatchAllQuery.new()
end
end
@impl true
def score(%__MODULE__{} = module, %Index{} = index, options) do
module
|> rewrite(index)
|> QueryRepository.score(index, options)
end
@impl true
def parse(options, _query_options, repo) do
cond do
Enum.empty?(options) ->
repo.parse("match_all", %{})
Enum.count(options) > 1 ->
minimum_should_match = Enum.count(options)
should =
Enum.map(options, fn {field, content} ->
%{"match" => %{field => content}}
end)
repo.parse("bool", %{
"should" => should,
"minimum_should_match" => minimum_should_match
})
true ->
{field, params} = Query.split_root(options)
opts = to_match_params(params)
new(
field: field,
query: Keyword.get(opts, :query),
expand: Keyword.get(opts, :expand),
operator: Keyword.get(opts, :operator),
fuzziness: Keyword.get(opts, :fuzziness),
minimum_should_match: Keyword.get(opts, :minimum_should_match)
)
end
end
defp to_match_params(params) when is_map(params) do
query = Map.get(params, "query")
fuzziness = Map.get(params, "fuzziness", 0)
operator = Map.get(params, "operator", "or")
expand = Map.get(params, "expand", false)
minimum_should_match = Map.get(params, "minimum_should_match", default_min_match(params))
[
query: query,
expand: expand,
operator: operator,
fuzziness: fuzziness,
minimum_should_match: minimum_should_match
]
end
defp to_match_params(params), do: to_match_params(%{"query" => params})
defp default_min_match(params) do
case Map.get(params, "operator") == "and" do
true ->
0
false ->
1
end
end
end
================================================
FILE: lib/elasticlunr/dsl/query/not_query.ex
================================================
defmodule Elasticlunr.Dsl.NotQuery do
use Elasticlunr.Dsl.Query
alias Elasticlunr.Index
alias Elasticlunr.Dsl.{Query, QueryRepository}
defstruct ~w[inner_query]a
@type t :: %__MODULE__{inner_query: struct()}
@spec new(struct()) :: t()
def new(inner_query), do: %__MODULE__{inner_query: inner_query}
@impl true
def parse(options, _query_options, _repo) do
{key, value} = Query.split_root(options)
key
|> QueryRepository.parse(value, options)
|> new()
end
@impl true
def score(%__MODULE__{inner_query: inner_query}, %Index{} = index, options) do
query_all = Index.all(index)
query_score = QueryRepository.score(inner_query, index, options)
matched_ids = Enum.map(query_score, & &1.ref)
query_all
|> Stream.reject(&(&1 in matched_ids))
|> Stream.map(&%{ref: &1, score: 1})
end
end
================================================
FILE: lib/elasticlunr/dsl/query/terms_query.ex
================================================
defmodule Elasticlunr.Dsl.TermsQuery do
use Elasticlunr.Dsl.Query
alias Elasticlunr.Dsl.Query
alias Elasticlunr.{Index, Token}
defstruct ~w[minimum_should_match expand field terms boost fuzziness]a
@type t :: %__MODULE__{
minimum_should_match: pos_integer(),
expand: boolean(),
field: Index.document_field(),
terms: list(Token.t()),
boost: integer(),
fuzziness: integer()
}
@options ~w[boost expand fuzziness minimum_should_match]
@spec new(keyword()) :: t()
def new(opts) do
attrs = %{
minimum_should_match: Keyword.get(opts, :minimum_should_match, 1),
expand: Keyword.get(opts, :expand, false),
field: Keyword.get(opts, :field, ""),
terms: Keyword.get(opts, :terms, []),
boost: Keyword.get(opts, :boost, 1),
fuzziness: Keyword.get(opts, :fuzziness, 0)
}
struct!(__MODULE__, attrs)
end
@impl true
def score(
%__MODULE__{
boost: boost,
field: field,
expand: expand,
terms: terms,
fuzziness: fuzziness,
minimum_should_match: minimum_should_match
},
%Index{} = index,
options \\ []
) do
terms =
case expand do
true ->
Enum.map(terms, fn
%Token{token: token} ->
Regex.compile!("^#{token}.*")
token ->
Regex.compile!("^#{token}.*")
end)
false ->
terms
end
query = [
field: field,
terms: terms,
fuzziness: fuzziness,
minimum_should_match: minimum_should_match
]
query =
case Keyword.get(options, :filtered) do
nil ->
query
filtered when is_list(filtered) ->
Keyword.put(query, :docs, filtered)
end
docs = Index.terms(index, query)
pick_highest_score = fn a, b ->
if(hd(a) > hd(b), do: a, else: b)
end
Stream.map(docs, &elem(&1, 0))
|> Enum.reduce([], fn id, matched ->
[score, doc] =
Map.get(docs, id)
|> Stream.map(fn doc ->
[doc.tf * :math.pow(doc.idf, 2) * doc.norm, doc]
end)
|> Enum.reduce([0, nil], pick_highest_score)
ob = %{
ref: id,
field: field,
score: score * boost,
positions: Map.put(%{}, field, doc.positions)
}
matched ++ [ob]
end)
end
@impl true
def parse(options, _query_options, repo) do
cond do
Enum.empty?(options) ->
repo.parse("match_all", %{})
Enum.count(options) > 1 ->
should =
options
|> Enum.reject(fn {key, _field} -> key in @options end)
|> Enum.map(fn {field, terms} ->
%{"terms" => %{field => terms}}
end)
repo.parse("bool", %{"should" => should})
true ->
{field, params} = Query.split_root(options)
terms = get_terms(params)
opts = to_terms_params(params)
__MODULE__.new([field: field, terms: terms] ++ opts)
end
end
defp get_terms(params) when is_map(params) do
params
|> Map.get("value")
|> to_list()
end
defp get_terms(value), do: to_list(value)
defp to_terms_params(params) when is_map(params) do
[]
|> update_options(params, :minimum_should_match)
|> update_options(params, :fuzziness)
|> update_options(params, :expand)
|> update_options(params, :boost)
end
defp to_terms_params(params), do: to_terms_params(%{"value" => params})
defp update_options(opts, params, key) do
case Map.get(params, to_string(key)) do
nil ->
opts
value ->
Keyword.put(opts, key, value)
end
end
defp to_list(value) when is_list(value), do: value
defp to_list(value), do: [value]
end
================================================
FILE: lib/elasticlunr/dsl/query.ex
================================================
defmodule Elasticlunr.Dsl.Query do
alias Elasticlunr.{Field, Index, Dsl.QueryRepository}
@type score_results ::
list(%{
score: integer(),
ref: Field.document_ref()
})
@callback filter(module :: struct(), index :: Index.t(), options :: keyword()) :: list()
@callback score(module :: struct(), index :: Index.t(), options :: keyword()) ::
score_results() | %Stream{}
@callback rewrite(module :: struct(), index :: Index.t()) :: struct()
@callback parse(options :: map(), query_options :: map(), repo :: module()) ::
struct()
@spec split_root(map() | tuple()) :: {atom(), any()} | any()
def split_root(root) when is_map(root) do
[root_key] = Map.keys(root)
value = Map.get(root, root_key)
{root_key, value}
end
def split_root({_, _} = root), do: root
def split_root(root), do: root
defmacro __using__(_) do
quote location: :keep do
@before_compile Elasticlunr.Dsl.Query
@behaviour Elasticlunr.Dsl.Query
end
end
defmacro __before_compile__(_) do
mod = __CALLER__.module
quote bind_quoted: [mod: mod] do
if not Module.defines?(mod, {:filter, 3}) do
@impl true
def filter(query, index, options) do
query
|> QueryRepository.score(index, options)
|> Enum.filter(&(&1.score > 0))
end
end
if not Module.defines?(mod, {:rewrite, 2}) do
@impl true
def rewrite(query, _index), do: query
end
end
end
end
================================================
FILE: lib/elasticlunr/dsl/query_repository.ex
================================================
defmodule Elasticlunr.Dsl.QueryRepository do
alias Elasticlunr.Index
alias Elasticlunr.Dsl.{BoolQuery, MatchAllQuery, MatchQuery, NotQuery, TermsQuery}
def get("not"), do: NotQuery
def get("bool"), do: BoolQuery
def get("match"), do: MatchQuery
def get("terms"), do: TermsQuery
def get("match_all"), do: MatchAllQuery
def get(element), do: raise("Unknown query type #{element}")
@spec parse(binary(), map(), map(), module()) :: struct()
def parse(module, options, query_options \\ %{}, repo \\ __MODULE__) do
module = get(module)
module.parse(options, query_options, repo)
end
@spec score(struct(), Index.t(), keyword()) :: list()
def score(query, index, options \\ []) when is_struct(query) do
query.__struct__.score(query, index, options)
end
@spec filter(struct(), Index.t(), keyword()) :: list()
def filter(query, index, options \\ []) when is_struct(query) do
query.__struct__.filter(query, index, options)
end
@spec rewrite(struct(), Index.t()) :: struct()
def rewrite(query, index) when is_struct(query) do
query.__struct__.rewrite(query, index)
end
end
================================================
FILE: lib/elasticlunr/manager/index_manager.ex
================================================
defmodule Elasticlunr.IndexManager do
use GenServer
alias Elasticlunr.{Index, IndexRegistry, IndexSupervisor, Storage}
alias Elasticlunr.Utils.Process
@spec preload() :: :ok
def preload do
Storage.all()
|> Stream.each(&start/1)
|> Stream.run()
end
@spec get(binary()) :: Index.t() | :not_running
def get(name) do
case loaded?(name) do
true -> name |> via |> GenServer.call(:get)
false -> :not_running
end
end
@spec save(Index.t()) :: {:ok, Index.t()} | {:error, any()}
def save(%Index{} = index) do
with {:ok, _} <- start(index),
:ok <- Storage.write(index) do
{:ok, index}
end
end
@spec update(Index.t()) :: Index.t() | :not_running
def update(%Index{name: name} = index) do
with true <- loaded?(name),
index <- name |> via |> GenServer.call({:update, index}),
:ok <- Storage.write(index) do
index
else
false ->
:not_running
err ->
err
end
end
@spec remove(Index.t()) :: :ok | :not_running
def remove(%Index{name: name}) do
with [{pid, _}] <- Registry.lookup(IndexRegistry, name),
:ok <- Storage.delete(name),
:ok <- DynamicSupervisor.terminate_child(IndexSupervisor, pid) do
:ok
else
_ ->
:not_running
end
end
@spec loaded?(binary()) :: boolean()
def loaded?(name) do
loaded_indices()
|> Enum.any?(fn
^name ->
true
_ ->
false
end)
end
@spec loaded_indices :: [binary()]
def loaded_indices do
Process.active_processes(IndexSupervisor, IndexRegistry, __MODULE__)
end
@spec init(Index.t()) :: {:ok, Index.t()}
def init(%Index{} = index) do
{:ok, index}
end
@spec start_link(Index.t()) :: :ignore | {:error, any} | {:ok, pid}
def start_link(%Index{name: name} = index) do
GenServer.start_link(__MODULE__, index, name: via(name), hibernate_after: 5_000)
end
@spec child_spec(Index.t()) :: map()
def child_spec(%Index{name: id} = index) do
%{
id: {__MODULE__, id},
start: {__MODULE__, :start_link, [index]},
restart: :transient
}
end
@spec via(binary()) :: {:via, Registry, {IndexRegistry, atom()}}
def via(name) do
{:via, Registry, {IndexRegistry, name}}
end
def handle_call(:get, _from, index) do
{:reply, index, index}
end
def handle_call({:update, index}, _from, _state) do
{:reply, index, index}
end
defp start(index) do
DynamicSupervisor.start_child(IndexSupervisor, {__MODULE__, index})
end
end
================================================
FILE: lib/elasticlunr/pipeline/stemmer.ex
================================================
defmodule Elasticlunr.Pipeline.Stemmer do
alias Elasticlunr.Token
@behaviour Elasticlunr.Pipeline
@impl true
def call(%Token{token: str} = token) do
Token.update(token, token: Stemmer.stem(str))
end
end
================================================
FILE: lib/elasticlunr/pipeline/stop_word_filter.ex
================================================
defmodule Elasticlunr.Pipeline.StopWordFilter do
alias Elasticlunr.Token
@behaviour Elasticlunr.Pipeline
@default_stop_words [
"a",
"able",
"about",
"across",
"after",
"all",
"almost",
"also",
"am",
"among",
"an",
"and",
"any",
"are",
"as",
"at",
"be",
"because",
"been",
"but",
"by",
"can",
"cannot",
"could",
"dear",
"did",
"do",
"does",
"either",
"else",
"ever",
"every",
"for",
"from",
"get",
"got",
"had",
"has",
"have",
"he",
"her",
"hers",
"him",
"his",
"how",
"however",
"i",
"if",
"in",
"into",
"is",
"it",
"its",
"just",
"least",
"let",
"like",
"likely",
"may",
"me",
"might",
"most",
"must",
"my",
"neither",
"no",
"nor",
"not",
"of",
"off",
"often",
"on",
"only",
"or",
"other",
"our",
"own",
"rather",
"said",
"say",
"says",
"she",
"should",
"since",
"so",
"some",
"than",
"that",
"the",
"their",
"them",
"then",
"there",
"these",
"they",
"this",
"tis",
"to",
"too",
"twas",
"us",
"wants",
"was",
"we",
"were",
"what",
"when",
"where",
"which",
"while",
"who",
"whom",
"why",
"will",
"with",
"would",
"yet",
"you",
"your"
]
@impl true
def call(%Token{token: token}) when token in @default_stop_words, do: nil
def call(token), do: token
end
================================================
FILE: lib/elasticlunr/pipeline/trimmer.ex
================================================
defmodule Elasticlunr.Pipeline.Trimmer do
alias Elasticlunr.Token
@behaviour Elasticlunr.Pipeline
@impl true
def call(%Token{token: str} = token) do
str = Regex.replace(~r/^\W+/, str, "")
str = Regex.replace(~r/\W+$/, str, "")
Token.update(token, token: str)
end
end
================================================
FILE: lib/elasticlunr/pipeline.ex
================================================
defmodule Elasticlunr.Pipeline do
alias Elasticlunr.{Token, Tokenizer}
alias Elasticlunr.Pipeline.{Stemmer, StopWordFilter, Trimmer}
defstruct callback: []
@type t :: %__MODULE__{
callback: list(module() | function())
}
@callback call(Token.t()) :: Token.t() | list(Token.t()) | nil
@spec new(list(module())) :: struct
def new(callbacks \\ []) do
struct!(__MODULE__, callback: callbacks)
end
@spec add(t(), module()) :: t()
def add(%__MODULE__{callback: callback} = pipeline, module) do
callback = Enum.uniq([module] ++ callback)
%{pipeline | callback: callback}
end
@spec default_runners() :: list(module())
def default_runners, do: [Trimmer, StopWordFilter, Stemmer]
@spec run(Elasticlunr.Pipeline.t(), list(Token.t())) :: list(Token.t())
def run(%__MODULE__{} = pipeline, tokens) when not is_list(tokens) do
tokens = Tokenizer.tokenize(tokens)
run(pipeline, tokens)
end
def run(%__MODULE__{callback: []}, tokens), do: tokens
def run(%__MODULE__{callback: callback}, tokens) do
callback
|> Enum.reduce(tokens, fn module, acc ->
excute_runner(acc, module)
end)
end
@spec insert_before(t(), module(), module()) :: t()
def insert_before(%__MODULE__{callback: callback} = pipeline, module, before_module) do
case Enum.find_index(callback, &(&1 == before_module)) do
nil ->
add(pipeline, module)
index ->
callback =
callback
|> List.insert_at(index, module)
|> Enum.uniq()
%{pipeline | callback: callback}
end
end
@spec insert_after(t(), module(), module()) :: t()
def insert_after(%__MODULE__{callback: callback} = pipeline, module, before_module) do
case Enum.find_index(callback, &(&1 == before_module)) do
nil ->
add(pipeline, module)
index ->
callback =
callback
|> List.insert_at(index + 1, module)
|> Enum.uniq()
%{pipeline | callback: callback}
end
end
@spec remove(t(), module()) :: t()
def remove(%__MODULE__{callback: callback} = pipeline, module) do
callback = Enum.reject(callback, &(&1 == module))
%{pipeline | callback: callback}
end
defp excute_runner(tokens, module) do
Enum.reduce(tokens, [], fn token, state ->
output = execute(module, token)
output =
case is_list(output) do
true ->
output
false ->
[output]
end
output = Enum.filter(output, &(not is_nil(&1)))
state ++ output
end)
end
defp execute(callback, token) when is_function(callback), do: callback.(token)
defp execute(module, token), do: module.call(token)
end
================================================
FILE: lib/elasticlunr/protocol_implementations.ex
================================================
defimpl Elasticlunr.Serializer, for: Elasticlunr.Pipeline do
alias Elasticlunr.Pipeline
def serialize(%Pipeline{callback: callback}, opts) do
cache = Keyword.get(opts, :pipeline, %{})
Enum.map_join(callback, ",", &Map.get(cache, &1, &1))
end
end
defimpl Elasticlunr.Serializer, for: Elasticlunr.Field do
alias Elasticlunr.{Field, Serializer}
def serialize(
%Field{
pipeline: pipeline,
store: store_documents,
store_positions: store_positions
},
opts
) do
name = Keyword.get(opts, :name)
pipeline = Serializer.serialize(pipeline, opts)
"field#name:#{name}|pipeline:#{pipeline}|store_documents:#{store_documents}|store_positions:#{store_positions}"
end
end
defimpl Elasticlunr.Serializer, for: Elasticlunr.DB do
alias Elasticlunr.DB
def serialize(%DB{name: name, options: options}, _opts) do
options = Enum.map_join(options, ",", &to_string(&1))
"db#name:#{name}|options:#{options}"
end
end
defimpl Elasticlunr.Serializer, for: Elasticlunr.Index do
alias Elasticlunr.{Index, Serializer}
def serialize(%Index{db: db, fields: fields, name: name, pipeline: pipeline, ref: ref}, _opts) do
pipeline_opt = Serializer.serialize(pipeline)
db_settings = Serializer.serialize(db)
{_, pipeline_map} =
Enum.reduce(pipeline.callback, {0, %{}}, fn callback, {index, map} ->
{index + 1, Map.put(map, callback, index)}
end)
settings = "settings#name:#{name}|ref:#{ref}|pipeline:#{pipeline_opt}"
fields_settings =
Stream.map(fields, fn {name, field} ->
Serializer.serialize(field, name: name, pipeline: pipeline_map)
end)
[settings, db_settings, fields_settings]
|> Stream.flat_map(fn
list when is_list(list) -> list
value when is_binary(value) -> [value]
value -> value
end)
end
end
defimpl Jason.Encoder, for: Tuple do
def encode({start_pos, end_pos}, opts) do
[start_pos, end_pos]
|> Jason.Encode.list(opts)
end
end
defimpl Elasticlunr.Deserializer, for: Stream do
alias Elasticlunr.Deserializer.Parser
def deserialize(data) do
Parser.process(data)
end
end
defimpl Elasticlunr.Deserializer, for: File.Stream do
alias Elasticlunr.Deserializer.Parser
def deserialize(data) do
Parser.process(data)
end
end
================================================
FILE: lib/elasticlunr/serializer.ex
================================================
defprotocol Elasticlunr.Serializer do
@spec serialize(struct(), keyword()) :: binary() | function()
def serialize(index, opts \\ [])
end
================================================
FILE: lib/elasticlunr/storage/blackhole.ex
================================================
defmodule Elasticlunr.Storage.Blackhole do
@moduledoc """
As the name implies, nothing is written nowhere.
"""
use Elasticlunr.Storage
@impl true
def load_all, do: []
@impl true
def write(_index), do: :ok
@impl true
def read(_name), do: {:error, "can't read index from blackhole"}
@impl true
def delete(_name), do: :ok
end
================================================
FILE: lib/elasticlunr/storage/disk.ex
================================================
defmodule Elasticlunr.Storage.Disk do
@moduledoc """
This storage provider writes data to the local disk of the running application.
```elixir
config :elasticlunr,
storage: Elasticlunr.Storage.Disk
config :elasticlunr, Elasticlunr.Storage.Disk,
directory: "/path/to/project/storage"
```
"""
use Elasticlunr.Storage
alias Elasticlunr.{DB, Deserializer, Index, Serializer}
require Logger
@data_file_ext "data"
@index_file_ext "index"
@extensions [@data_file_ext, @index_file_ext]
@impl true
def write(%Index{db: db, name: name} = index) do
directory = config(:directory, ".")
data = Serializer.serialize(index)
with %{data: data_file, index: index_file} <- filenames(directory, name),
:ok <- DB.to(db, file: data_file) do
write_serialized_index_to_file(index_file, data)
end
end
@impl true
def read(name) do
directory = config(:directory, ".")
%{data: data_file, index: index_file} = filenames(directory, name)
index =
File.stream!(index_file, ~w[compressed]a)
|> Deserializer.deserialize()
with %Index{db: db} <- index,
{:ok, db} <- DB.from(db, file: data_file) do
Index.update_documents_size(%{index | db: db})
else
false ->
Logger.info("[elasticlunr] unable to data for index #{index.name}")
index
end
end
@impl true
def load_all do
files()
|> Stream.filter(&String.ends_with?(&1, @index_file_ext))
|> Stream.map(fn file ->
name = without_ext(file, @index_file_ext)
read(name)
end)
end
@impl true
def delete(name) do
directory = config(:directory, ".")
%{data: data_file, index: index_file} = filenames(directory, name)
with :ok <- File.rm(index_file) do
File.rm(data_file)
end
end
@spec files() :: list(binary())
def files do
directory = config(:directory, ".")
extensions = Enum.map_join(@extensions, ",", & &1)
match = Path.join(directory, "*.{#{extensions}}")
Path.wildcard(match)
|> Enum.map(&Path.expand/1)
end
@spec write_serialized_index_to_file(binary(), Enum.t()) :: :ok
def write_serialized_index_to_file(path, data) do
data
|> Stream.into(File.stream!(path, ~w[compressed]a), &"#{&1}\n")
|> Stream.run()
end
defp filenames(directory, name) do
%{
index: Path.join(directory, "#{name}.#{@index_file_ext}"),
data: Path.join(directory, "#{name}.#{@data_file_ext}") |> String.to_charlist()
}
end
defp without_ext(file, ext), do: Path.basename(file, ".#{ext}")
end
================================================
FILE: lib/elasticlunr/storage/provider.ex
================================================
defmodule Elasticlunr.Storage.Provider do
@moduledoc false
alias Elasticlunr.Index
@callback load_all() :: Enum.t()
@callback read(name :: binary()) :: Index.t() | {:error, any()}
@callback delete(name :: binary()) :: :ok | {:error, any()}
@callback write(index :: Index.t()) :: :ok | {:error, any()}
end
================================================
FILE: lib/elasticlunr/storage.ex
================================================
defmodule Elasticlunr.Storage do
@moduledoc """
This is the storage interface that's used by the index manager.
```elixir
config :elasticlunr,
storage: Elasticlunr.Storage.Blackhole # this is the default provider
```
"""
alias Elasticlunr.Index
alias Elasticlunr.Storage.Blackhole
@spec all() :: Enum.t()
def all do
provider().load_all()
end
@spec write(Index.t()) :: :ok | {:error, any()}
def write(%Index{} = index) do
provider().write(index)
end
@spec read(binary()) :: Index.t() | {:error, any()}
def read(index_name) do
provider().read(index_name)
end
@spec delete(binary()) :: :ok | {:error, any()}
def delete(index_name) do
provider().delete(index_name)
end
defp provider, do: Application.get_env(:elasticlunr, :storage, Blackhole)
defmacro __using__(_) do
quote location: :keep do
@behaviour Elasticlunr.Storage.Provider
defp config(key, default \\ nil) do
Keyword.get(config_all(), key, default)
end
defp config_all, do: Application.get_env(:elasticlunr, __MODULE__, [])
end
end
end
================================================
FILE: lib/elasticlunr/tokenizer.ex
================================================
defmodule Elasticlunr.Tokenizer do
alias Elasticlunr.Token
@default_separator ~r/[\s\-]+/
@spec tokenize(binary() | number(), Regex.t()) :: list(Token.t())
def tokenize(str, separator \\ @default_separator)
def tokenize(str, separator) when is_binary(str), do: split(str, separator)
def tokenize(num, separator) when is_number(num) do
num
|> to_string()
|> split(separator)
end
defp split(str, separator) do
slice_end = 0
slice_start = 0
str_length = String.length(str)
str
|> String.downcase()
|> run_split(separator, slice_start, slice_end, str_length, [])
end
defp run_split(str, separator, slice_start, slice_end, str_length, tokens)
when slice_end <= str_length do
char = String.at(str, slice_end)
slice_length = slice_end - slice_start
with true <- match_string?(char, separator) || slice_end == str_length,
{:s, true} <- {:s, slice_length > 0} do
token =
str
|> String.slice(slice_start, slice_length)
|> to_token(slice_start, slice_length)
tokens = tokens ++ [token]
slice_start = slice_end + 1
run_split(str, separator, slice_start, slice_end + 1, str_length, tokens)
else
{:s, false} ->
index = slice_end + 1
run_split(str, separator, index, index, str_length, tokens)
false ->
run_split(str, separator, slice_start, slice_end + 1, str_length, tokens)
end
end
defp run_split(_str, _separator, _slice_start, _slice_end, _str_length, tokens) do
tokens
end
defp match_string?(nil, _separator), do: false
defp match_string?(char, separator) do
String.match?(char, separator)
end
defp to_token(str, start_index, end_index) do
Token.new(str, %{
end: end_index,
start: start_index
})
end
end
================================================
FILE: lib/elasticlunr/utils/process.ex
================================================
defmodule Elasticlunr.Utils.Process do
@spec child_pid?(tuple, atom) :: boolean
def child_pid?({:undefined, pid, :worker, [mod]}, mod) when is_pid(pid), do: true
def child_pid?(_child, _module), do: false
@spec id_from_pid(tuple, atom, atom) :: [atom | binary]
def id_from_pid({:undefined, pid, :worker, [mod]}, registry, mod),
do: Registry.keys(registry, pid)
@spec active_processes(atom, atom, atom) :: [any()]
def active_processes(supervisor, registry, module) do
supervisor
|> DynamicSupervisor.which_children()
|> Enum.filter(&child_pid?(&1, module))
|> Enum.flat_map(&id_from_pid(&1, registry, module))
end
end
================================================
FILE: lib/elasticlunr/utlis.ex
================================================
defmodule Elasticlunr.Utils do
@spec levenshtein_distance(binary, binary) :: integer()
def levenshtein_distance(a, b) do
ta = String.downcase(a) |> to_charlist |> List.to_tuple()
tb = String.downcase(b) |> to_charlist |> List.to_tuple()
m = tuple_size(ta)
n = tuple_size(tb)
costs = Enum.reduce(0..m, %{}, fn i, acc -> Map.put(acc, {i, 0}, i) end)
costs = Enum.reduce(0..n, costs, fn j, acc -> Map.put(acc, {0, j}, j) end)
Enum.reduce(0..(n - 1), costs, fn j, acc ->
Enum.reduce(0..(m - 1), acc, fn i, map ->
# credo:disable-for-lines:2
d =
if elem(ta, i) == elem(tb, j) do
map[{i, j}]
else
# deletion
Enum.min([
map[{i, j + 1}] + 1,
# insertion
map[{i + 1, j}] + 1,
# substitution
map[{i, j}] + 1
])
end
Map.put(map, {i + 1, j + 1}, d)
end)
end)
|> Map.get({m, n})
end
end
================================================
FILE: mix.exs
================================================
defmodule Elasticlunr.MixProject do
use Mix.Project
@source_url "https://github.com/heywhy/ex_elasticlunr"
def project do
[
app: :elasticlunr,
version: "0.6.4",
elixir: "~> 1.11",
elixirc_paths: elixirc_paths(Mix.env()),
start_permanent: Mix.env() == :prod,
description: description(),
package: package(),
aliases: aliases(),
deps: deps(),
source_url: @source_url,
# Coverage
test_coverage: [tool: ExCoveralls],
preferred_cli_env: [
coveralls: :test,
"coveralls.detail": :test,
"coveralls.post": :test,
"coveralls.html": :test,
"coveralls.json": :test
],
# Dialyxir
dialyzer: [
plt_file: {:no_warn, "priv/plts/dialyzer.plt"}
],
# Docs
name: "Elasticlunr",
homepage_url: "https://hexdocs.pm/elasticlunr",
docs: [
main: "readme",
extras: ["README.md", "LICENSE"]
]
]
end
# Run "mix help compile.app" to learn about applications.
def application do
[
extra_applications: [:logger, :crypto],
mod: {Elasticlunr.Application, []}
]
end
# Specifies which paths to compile per environment.
defp elixirc_paths(:test), do: ["lib", "test/support"]
defp elixirc_paths(_), do: ["lib"]
# Run "mix help deps" to learn about dependencies.
defp deps do
[
{:credo, "~> 1.5", only: [:dev, :test], runtime: false},
{:dialyxir, "~> 1.1", only: :dev, runtime: false},
{:ex_doc, "~> 0.25", only: :dev, runtime: false},
{:excoveralls, "~> 0.14", only: :test},
{:faker, "~> 0.16", only: :test},
{:jason, "~> 1.3"},
{:mox, "~> 1.0", only: :test},
{:stemmer, "~> 1.0"},
{:uniq, "~> 0.4"}
]
end
defp aliases do
[
test: ~w[format credo test]
]
end
defp description do
"Elasticlunr is a lightweight full-text search engine. It's a port of Elasticlunr.js with more improvements."
end
defp package do
[
files: ["lib", "mix.exs", "README.md"],
maintainers: ["Atanda Rasheed"],
licenses: ["MIT License"],
links: %{
"GitHub" => @source_url,
"Docs" => "https://hexdocs.pm/elasticlunr"
}
]
end
end
================================================
FILE: test/core/document_store_test.exs
================================================
defmodule Elasticlunr.DocumentStoreTest do
use ExUnit.Case
alias Elasticlunr.DocumentStore
describe "creating a new document store" do
test "defaults save attribute to true" do
assert %DocumentStore{documents: %{}, document_info: %{}, length: 0, save: true} =
DocumentStore.new()
end
test "without saving documents" do
assert %DocumentStore{documents: %{}, document_info: %{}, length: 0, save: false} =
DocumentStore.new(false)
end
end
describe "adding document to document store" do
test "adds a new document and save document" do
document = %{id: 10}
document_store = DocumentStore.new()
assert %DocumentStore{documents: %{10 => ^document}} =
DocumentStore.add(document_store, 10, document)
end
test "saves document and update length" do
document_store = DocumentStore.new()
assert document_store = DocumentStore.add(document_store, 10, %{id: 10})
assert %DocumentStore{length: 1} = document_store
assert %DocumentStore{length: 2} = DocumentStore.add(document_store, 1, %{id: 1})
end
test "updates document data and does not update length" do
document_store = DocumentStore.new()
assert document_store = DocumentStore.add(document_store, 10, %{id: 10})
assert %DocumentStore{length: 1, documents: %{10 => %{id: 10}}} = document_store
assert %DocumentStore{length: 1, documents: %{10 => %{id: 1}}} =
DocumentStore.add(document_store, 10, %{id: 1})
end
test "checks if document exists" do
document_store = DocumentStore.new()
assert document_store = DocumentStore.add(document_store, 10, %{id: 10})
assert DocumentStore.exists?(document_store, 10)
refute DocumentStore.exists?(document_store, 100)
end
end
describe "retrieving document from document store" do
test "returns document" do
document = %{id: 10}
document_store =
DocumentStore.new()
|> DocumentStore.add(10, document)
assert ^document = DocumentStore.get(document_store, 10)
end
test "returns nil for non-existing document" do
document_store = DocumentStore.new()
assert is_nil(DocumentStore.get(document_store, 10))
end
test "returns nil for non-persitent store" do
document = %{id: 10}
document_store =
DocumentStore.new(false)
|> DocumentStore.add(10, document)
refute ^document = DocumentStore.get(document_store, 10)
end
end
describe "removing document from document store" do
test "removes document" do
document = %{id: 10}
document_store =
DocumentStore.new()
|> DocumentStore.add(10, document)
assert %DocumentStore{length: 1, documents: %{10 => %{id: 10}}} = document_store
assert %DocumentStore{length: 0, documents: %{}} = DocumentStore.remove(document_store, 10)
end
end
describe "adding field length of document field" do
test "adds field length" do
document = %{id: 10}
document_store =
DocumentStore.new()
|> DocumentStore.add(10, document)
assert %DocumentStore{
length: 1,
documents: %{10 => %{id: 10}},
document_info: %{10 => %{name: 20}}
} = DocumentStore.add_field_length(document_store, 10, :name, 20)
end
test "updates field length" do
document = %{id: 10}
document_store =
DocumentStore.new()
|> DocumentStore.add(10, document)
assert %DocumentStore{document_info: %{10 => %{name: 20}}} =
DocumentStore.add_field_length(document_store, 10, :name, 20)
assert %DocumentStore{document_info: %{10 => %{name: 36}}} =
DocumentStore.update_field_length(document_store, 10, :name, 36)
end
end
describe "retrieving document field length" do
test "returns nil" do
document = %{id: 10}
document_store =
DocumentStore.new()
|> DocumentStore.add(10, document)
assert is_nil(DocumentStore.get_field_length(document_store, 10, :name))
end
test "returns field length" do
document = %{id: 10}
document_store =
DocumentStore.new()
|> DocumentStore.add(10, document)
|> DocumentStore.add_field_length(10, :name, 20)
assert 20 = DocumentStore.get_field_length(document_store, 10, :name)
end
end
describe "reset document store" do
test "clears store attributes" do
document = %{id: 10}
assert document_store =
DocumentStore.new()
|> DocumentStore.add(10, document)
|> DocumentStore.add_field_length(10, :name, 20)
assert %DocumentStore{} = document_store
assert %DocumentStore{documents: %{}, document_info: %{}, length: 0, save: true} =
DocumentStore.reset(document_store)
end
end
end
================================================
FILE: test/core/field_test.exs
================================================
defmodule Elasticlunr.FieldTest do
use ExUnit.Case
alias Elasticlunr.{DB, Field, Pipeline, Token}
setup context do
opts = [
pipeline: Pipeline.new(),
db: DB.init(:field_test, ~w[public]a)
]
field =
Field.new(opts)
|> Field.add([%{id: 1, content: "hello world"}])
:ok = on_exit(fn -> true = DB.destroy(field.db) end)
Map.put(context, :field, field)
end
test "tokens/1", %{field: field} do
tokens = Field.tokens(field)
assert %Stream{} = tokens
refute Enum.empty?(tokens)
assert [%{tf: 1, documents: documents} | _] = Enum.to_list(tokens)
assert [1] = Enum.to_list(documents)
end
test "documents/1", %{field: field} do
assert documents = Field.documents(field)
assert [1] = Enum.to_list(documents)
end
test "term_frequency/2", %{field: field} do
assert tf = Field.term_frequency(field, "hello")
assert [{1, 1.0}] = Enum.to_list(tf)
refute Field.term_frequency(field, "missing")
end
test "has_token/2", %{field: field} do
assert Field.has_token(field, "hello")
refute Field.has_token(field, "missing")
end
test "get_token/2", %{field: field} do
assert %{term: "hello", tf: 1} = Field.get_token(field, "hello")
refute Field.get_token(field, "missing")
end
test "set_query_pipeline/2", %{field: field} do
pipeline = Pipeline.new()
assert %Field{query_pipeline: nil} = field
assert %Field{query_pipeline: ^pipeline} = Field.set_query_pipeline(field, pipeline)
end
test "add/2", %{field: field} do
assert Enum.count(Field.documents(field)) == 1
assert field = Field.add(field, [%{id: 10, content: "testing"}])
assert Enum.count(Field.documents(field)) == 2
assert Field.has_token(field, "testing")
end
test "length/2", %{field: field} do
assert Field.length(field, :ids) == 1
assert Field.length(field, :idf, "hello") == 1
assert Field.length(field, :term, "world") == 1
assert Field.length(field, :tf, "world") == 1
end
test "update/2", %{field: field} do
assert field = Field.update(field, [%{id: 1, content: "worse"}])
assert Field.has_token(field, "worse")
assert Enum.count(Field.documents(field)) == 1
end
test "remove/2", %{field: field} do
assert field = Field.remove(field, [1])
refute Field.has_token(field, "worse")
assert Enum.empty?(Field.documents(field))
end
test "analyze/3", %{field: field} do
assert [%Token{token: "coming"}] = Field.analyze(field, "coming", [])
assert [%Token{token: "coming"}] = Field.analyze(field, "coming", is_query: true)
assert [%Token{token: "foo"}] =
field
|> Field.set_query_pipeline(Pipeline.new([fn _ -> Token.new("foo") end]))
|> Field.analyze("coming", is_query: true)
end
test "terms/3", %{field: field} do
assert %{1 => _} = Field.terms(field, terms: ["hello"])
assert %{1 => _} = Field.terms(field, terms: [~r/hello/])
assert %{1 => _} = Field.terms(field, terms: ["hello"], fuzziness: 2)
assert Enum.empty?(Field.terms(field, terms: ["missing"]))
end
end
================================================
FILE: test/core/index_test.exs
================================================
defmodule Elasticlunr.IndexTest do
use ExUnit.Case
alias Elasticlunr.{Field, Index, Pipeline, Token}
alias Faker.Address.En, as: Address
describe "creating an index" do
test "creates a new instance" do
assert %Index{name: name} = Index.new()
assert is_binary(name)
assert %Index{name: :test_index, ref: "id", fields: %{}} = Index.new(name: :test_index)
assert %Index{name: :test_index, ref: "name", fields: %{}} =
Index.new(name: :test_index, ref: "name")
end
test "creates a new instance and populate fields" do
assert %Index{fields: %{"id" => %Field{}, "name" => %Field{}}} =
Index.add_field(Index.new(), "name")
end
end
describe "modifying an index" do
test "adds new fields" do
index = Index.new()
assert %Index{fields: %{}} = index
assert index = Index.add_field(index, "name")
assert %Index{fields: %{"name" => %Field{}}} = index
assert %Index{fields: %{"name" => %Field{}, "bio" => %Field{}}} =
Index.add_field(index, "bio")
end
test "save document" do
index = Index.add_field(Index.new(), "name")
assert %Index{fields: %{"name" => %Field{store: true}}} = index
assert %Index{fields: %{"name" => %Field{store: false}}} = Index.save_document(index, false)
end
test "updates a field" do
index = Index.new()
assert %Index{fields: %{}} = index
assert index = Index.add_field(index, "name")
assert field = Index.get_field(index, "name")
assert %Field{query_pipeline: nil} = field
pipeline = Pipeline.new()
assert %Field{query_pipeline: ^pipeline} =
index
|> Index.update_field("name", %{field | query_pipeline: pipeline})
|> Index.get_field("name")
end
test "fails to update missing field" do
index = Index.new()
assert %Index{fields: %{}} = index
assert_raise RuntimeError, "Unknown field address in index", fn ->
Index.update_field(index, "address", Field.new([]))
end
end
end
describe "fiddling with an index" do
test "adds document" do
index =
Index.new()
|> Index.add_field("bio")
assert index =
Index.add_documents(index, [
%{
"id" => 10,
"bio" => Faker.Lorem.paragraph()
}
])
assert %Index{documents_size: 1} = index
assert %Index{documents_size: 2} =
Index.add_documents(index, [
%{
"id" => 29,
"bio" => Faker.Lorem.paragraph()
}
])
end
@tag :skip
test "adds documents and flatten nested attributes" do
index =
Index.new()
|> Index.add_field("name")
|> Index.add_field("address")
document = %{
"id" => 20,
"name" => "nelson",
"address" => %{
"city" => Address.city(),
"country" => Address.country_code(),
"line1" => Address.street_address(),
"line2" => Address.secondary_address(),
"state" => Address.state()
}
}
index = Index.add_documents(index, [document])
query = %{
"bool" => %{
"should" => %{
"match" => %{"address.city" => get_in(document, ~w[address city])}
}
}
}
assert %Index{fields: %{"address.city" => %Field{}}, documents_size: 1} = index
refute Index.search(index, %{"query" => query}) |> Enum.empty?()
end
@tag :skip
test "removes documents with nested attributes" do
index =
Index.new()
|> Index.add_field("name")
|> Index.add_field("address")
document = %{
"id" => 20,
"name" => "nelson",
"address" => %{
"city" => Address.city(),
"country" => Address.country_code(),
"line1" => Address.street_address(),
"line2" => Address.secondary_address(),
"state" => Address.state()
}
}
index = Index.add_documents(index, [document])
assert %Index{fields: %{"address.city" => %Field{}}, documents_size: 1} = index
assert %Index{fields: %{"address.city" => %Field{}}, documents_size: 0} =
Index.remove_documents(index, [20])
end
test "allows addition of document with empty field" do
index =
Index.new()
|> Index.add_field("bio")
|> Index.add_field("title")
assert index = Index.add_documents(index, [%{"id" => 10, "bio" => "", "title" => "test"}])
assert term_frequency =
index
|> Index.get_field("title")
|> Field.term_frequency("test")
assert index
|> Index.get_field("title")
|> Field.length(:tf, "test")
|> Kernel.==(1)
assert term_frequency
|> Enum.find(&(elem(&1, 0) == 10))
|> Kernel.==({10, 1})
end
@tag :skip
test "fails when adding duplicate document" do
index = Index.add_field(Index.new(), "bio")
document = %{
"id" => 10,
"bio" => Faker.Lorem.paragraph()
}
assert index = Index.add_documents(index, [document])
assert_raise RuntimeError, "Document id 10 already exists in the index", fn ->
Index.add_documents(index, [document])
end
end
test "removes document" do
index =
Index.new()
|> Index.add_field("id")
|> Index.add_field("bio")
document = %{
"id" => 10,
"bio" => "this is a test"
}
document_2 = %{
"id" => 30,
"bio" => "this is another test"
}
assert index = Index.add_documents(index, [document_2, document])
assert %Index{documents_size: 2} = index
assert index = Index.remove_documents(index, [10])
assert %Index{documents_size: 1} = index
assert field = Index.get_field(index, "bio")
refute Field.has_token(field, "a")
assert Field.has_token(field, "another")
assert is_nil(Field.get_token(field, "a"))
assert %{idf: idf} = Field.get_token(field, "another")
assert idf > 0
%{documents: documents} = Field.get_token(field, "another")
assert [30] = Enum.to_list(documents)
end
test "does not remove unknown document" do
index = Index.add_field(Index.new(), "bio")
document = %{
"id" => 10,
"bio" => Faker.Lorem.paragraph()
}
assert index = Index.add_documents(index, [document])
assert %Index{documents_size: 1} = index
assert %Index{documents_size: 1} = Index.remove_documents(index, [11])
end
test "update existing document" do
index = Index.add_field(Index.new(), "bio")
document = %{
"id" => 10,
"bio" => Faker.Lorem.paragraph()
}
index = Index.add_documents(index, [document])
assert %Index{documents_size: 1} = index
updated_document = %{document | "bio" => Faker.Lorem.paragraph()}
assert %Index{documents_size: 1} = Index.update_documents(index, [updated_document])
end
test "search for a document" do
index = Index.add_field(Index.new(), "bio")
document = %{
"id" => 10,
"bio" => "foo"
}
index = Index.add_documents(index, [document])
assert Index.search(index, "foo") |> Enum.count() == 1
updated_document = %{document | "bio" => "bar"}
index = Index.update_documents(index, [updated_document])
assert Index.search(index, "bar") |> Enum.count() == 1
assert Index.search(index, "foo") |> Enum.empty?()
end
test "allows the use of multiple, different pipelines for searching and indexing" do
index = Index.add_field(Index.new(), "info")
callback = fn %Token{token: token} ->
tokens = [token]
case token == "foo" do
false ->
tokens
true ->
~w[bar baz barry] ++ tokens
end
end
query_pipeline = Pipeline.new([callback])
field =
index
|> Index.get_field("info")
|> Field.set_query_pipeline(query_pipeline)
index = Index.update_field(index, "info", field)
index =
index
|> Index.add_documents([
%{"id" => "a", "info" => "Barry had a beer with Fred in the bar"},
%{"id" => "b", "info" => "the bar is empty"}
])
results =
Index.search(index, %{
"query" => %{
"match" => %{"info" => "foo"}
}
})
assert Enum.count(results) == 2
assert [%{score: score_1}, %{score: score_2}] = results
assert score_2 < score_1
results =
Index.search(index, %{
"query" => %{
"match" => %{"info" => "fred"}
}
})
assert Enum.count(results) == 1
end
end
end
================================================
FILE: test/deserializer_test.exs
================================================
defmodule Elasticlunr.DeserializerTest do
use ExUnit.Case
alias Elasticlunr.{Deserializer, Index}
test "deserialize index" do
data = [
"settings#name:index|ref:id|pipeline:",
"db#name:elasticlunr_index|options:compressed,named_table,set,public",
"field#name:id|pipeline:Elixir.Elasticlunr.Index.IdPipeline|store_documents:false|store_positions:false"
]
index =
to_stream(data)
|> Deserializer.deserialize()
assert %Index{name: "index"} = index
end
defp to_stream(data) do
Stream.iterate(0, &(&1 + 1))
|> Stream.map(&Enum.at(data, &1))
|> Stream.take(Enum.count(data))
end
end
================================================
FILE: test/dsl_test.exs
================================================
defmodule Elasticlunr.DslTest do
use ExUnit.Case
alias Elasticlunr.{Index, Pipeline, Token}
alias Elasticlunr.Dsl.{BoolQuery, MatchAllQuery, MatchQuery, NotQuery, TermsQuery}
alias Elasticlunr.Dsl.QueryRepository
setup context do
callback = fn
%Token{} = token ->
token
str ->
str
|> String.split(" ")
|> String.downcase()
|> Enum.map(&Token.new(&1))
end
pipeline = Pipeline.new([callback])
index =
Index.new()
|> Index.add_field("content", pipeline: pipeline)
|> Index.add_documents([
%{"id" => 1, "content" => "The quick fox jumped over the lazy dog"},
%{
"id" => 2,
"content" =>
"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Maecenas viverra enim non purus rutrum porta ut non urna. Nullam eu ante eget nisi laoreet pretium. Curabitur varius velit vel viverra facilisis. Pellentesque et condimentum mauris. Quisque faucibus varius interdum. Fusce cursus pretium tempus. Ut gravida tortor et mi dignissim sagittis. Aliquam ullamcorper dignissim arcu sollicitudin fermentum. Nunc elementum tortor ex, sit amet posuere lectus accumsan quis. Vivamus sit amet eros blandit, sagittis quam at, vulputate felis. Ut faucibus pretium feugiat. Fusce diam felis, euismod ac tellus id, blandit venenatis dolor. Nullam porttitor suscipit diam, a feugiat dui pharetra at."
},
%{"id" => 3, "content" => "Lorem dog"},
%{
"id" => 4,
"content" => "livebook is elixir's own jupyter. it's a very impressive impression."
},
%{
"id" => 5,
"content" =>
"there are lots of contributors to the elixir project and many cool projects using elixir, ex. livebook, elixir_nx and so on"
}
])
Map.put(context, :index, index)
end
describe "match_all" do
test "parses correctly" do
assert %MatchAllQuery{boost: 2.5} = QueryRepository.parse("match_all", %{"boost" => 2.5})
end
test "correctly operates match_all query", %{index: index} do
query = MatchAllQuery.new()
assert result = MatchAllQuery.score(query, index, [])
assert Enum.count(result) == 5
for %{score: score} <- result do
assert score == 1
end
end
end
describe "terms" do
test "parses correctly" do
assert %MatchAllQuery{} = QueryRepository.parse("terms", %{})
assert %TermsQuery{field: "name", terms: ["nelson"]} =
QueryRepository.parse("terms", %{"name" => "nelson"})
assert %TermsQuery{field: "name", terms: ["kim"]} =
QueryRepository.parse("terms", %{
"name" => %{"value" => "kim"}
})
assert %BoolQuery{
should: [
%TermsQuery{field: "country", terms: ["us"], boost: 1},
%TermsQuery{field: "name", terms: ["john"], boost: 1}
]
} = QueryRepository.parse("terms", %{"name" => "john", "country" => "us"})
end
test "performs base functionality", %{index: index} do
query =
TermsQuery.new(
field: "content",
terms: ["fox"]
)
assert result = TermsQuery.score(query, index, [])
assert Enum.count(result) == 1
assert [%{ref: 1}] = result
end
test "boost", %{index: index} do
non_boost_query =
TermsQuery.new(
field: "content",
terms: ["fox"]
)
boost_query =
TermsQuery.new(
field: "content",
terms: ["fox"],
boost: 2
)
assert boost_result = TermsQuery.score(boost_query, index, [])
assert non_boost_result = TermsQuery.score(non_boost_query, index, [])
assert Enum.count(boost_result) == Enum.count(non_boost_result)
assert [%{score: score_1}] = boost_result
assert [%{score: score_2}] = non_boost_result
assert score_1 == score_2 * 2
end
end
describe "bool" do
test "parses correctly" do
assert %BoolQuery{must: %TermsQuery{field: "country", terms: ["us"]}} =
QueryRepository.parse("bool", %{"must" => %{"terms" => %{"country" => "us"}}})
assert %BoolQuery{
minimum_should_match: 0,
must: %TermsQuery{field: "country", terms: ["us"]},
must_not: %TermsQuery{field: "gender", terms: ["male"]},
filter: [%MatchQuery{field: "balance", query: 1000}],
should: [%MatchQuery{field: "account_type", query: "savings"}]
} =
QueryRepository.parse("bool", %{
"must" => %{"terms" => %{"country" => "us"}},
"must_not" => %{"terms" => %{"gender" => "male"}},
"filter" => %{"match" => %{"balance" => 1000}},
"should" => %{"match" => %{"account_type" => "savings"}}
})
end
test "filters via must functionality", %{index: index} do
query =
BoolQuery.new(
must: TermsQuery.new(field: "content", terms: ["lorem"]),
should: [
TermsQuery.new(field: "content", terms: ["dog"])
]
)
assert BoolQuery.score(query, index, []) |> Enum.count() == 1
end
test "filters via must_not functionality", %{index: index} do
query =
BoolQuery.new(
must: TermsQuery.new(field: "content", terms: ["lorem"]),
must_not: TermsQuery.new(field: "content", terms: ["ipsum"]),
should: [
TermsQuery.new(field: "content", terms: ["dog"])
]
)
refute BoolQuery.score(query, index, [])
|> Enum.empty?()
end
test "check if document has positions before trying to acess it", %{index: index} do
assert Index.search(index, "me") |> Enum.empty?()
end
end
describe "match" do
test "parses correctly" do
assert %MatchAllQuery{boost: 1} = QueryRepository.parse("match", %{})
assert %MatchQuery{field: "country", query: "us"} =
QueryRepository.parse("match", %{"country" => "us"})
assert %MatchQuery{field: "country", query: "us", operator: "and"} =
QueryRepository.parse("match", %{
"country" => %{"query" => "us", "operator" => "and"}
})
assert %BoolQuery{
should: [
%MatchQuery{field: "city", query: "arizona"},
%MatchQuery{field: "country", query: "us"}
]
} =
QueryRepository.parse("match", %{
"city" => "arizona",
"country" => "us"
})
end
test "performs base functionality", %{index: index} do
query = MatchQuery.new(field: "content", query: "brown fox")
assert results = MatchQuery.score(query, index, [])
assert Enum.count(results) == 1
assert [%{ref: 1}] = results
end
test "honours minimum_should_match", %{index: index} do
query = MatchQuery.new(field: "content", query: "brown fox quick", minimum_should_match: 2)
assert results = MatchQuery.score(query, index, [])
assert Enum.count(results) == 1
assert [%{ref: 1}] = results
end
test "honours and operator", %{index: index} do
query =
MatchQuery.new(
field: "content",
query: "fox quick",
operator: "and"
)
assert results = MatchQuery.score(query, index, [])
assert Enum.count(results) == 1
assert [%{ref: 1}] = results
end
end
describe "not" do
test "parses correctly" do
assert %NotQuery{inner_query: %BoolQuery{}} = QueryRepository.parse("not", %{"bool" => %{}})
assert %NotQuery{inner_query: %BoolQuery{}} =
QueryRepository.parse("not", %{
"bool" => %{
"should" => [
%{"match" => %{"name" => "john"}}
]
}
})
end
test "applies inner query", %{index: index} do
query =
NotQuery.new(
BoolQuery.new(
should: [
MatchQuery.new(field: "content", query: "quick"),
MatchQuery.new(field: "content", query: "lorem")
]
)
)
assert results = NotQuery.score(query, index, [])
assert Enum.count(results) == 2
end
end
end
================================================
FILE: test/manager/index_manager_test.exs
================================================
defmodule Elasticlunr.IndexManagerTest do
use ExUnit.Case
alias Elasticlunr.{Index, IndexManager}
describe "working with index manager" do
test "saves an index" do
index = Index.new()
assert {:ok, ^index} = IndexManager.save(index)
end
test "fails when saving duplicate index" do
index = Index.new()
assert {:ok, ^index} = IndexManager.save(index)
assert {:error, {:already_started, _}} = IndexManager.save(index)
end
test "updates existing index" do
index = Index.new()
assert {:ok, ^index} = IndexManager.save(index)
assert ^index = IndexManager.update(index)
end
test "fails update action for non-existent index" do
index = Index.new()
assert :not_running = IndexManager.update(index)
end
test "removes an index" do
index = Index.new()
assert {:ok, ^index} = IndexManager.save(index)
assert :ok = IndexManager.remove(index)
assert :not_running = IndexManager.get(index.name)
end
test "fails to remove a non-existent index" do
index = Index.new()
assert :not_running = IndexManager.remove(index)
end
test "return a running instance" do
index = Index.new()
{:ok, _} = IndexManager.save(index)
assert ^index = IndexManager.get(index.name)
assert :not_running = IndexManager.get("unknown-index")
end
end
end
================================================
FILE: test/pipeline/stemmer_test.exs
================================================
defmodule Elasticlunr.Pipeline.StemmerTest do
use ExUnit.Case
alias Elasticlunr.Token
alias Elasticlunr.{Pipeline, Pipeline.Stemmer}
import Elasticlunr.Test.Fixture
describe "running stemmer against tokens" do
test "works as expected" do
stemmer_fixture()
|> Enum.each(fn {word, stemmed_word} ->
token = Token.new(word)
assert Stemmer.call(token) == Token.new(stemmed_word)
end)
end
test "is a default runner for default pipeline" do
assert Pipeline.default_runners()
|> Enum.any?(fn
Stemmer -> true
_ -> false
end)
end
end
end
================================================
FILE: test/pipeline/stop_word_filter_test.exs
================================================
defmodule Elasticlunr.Pipeline.StopWordFilterTest do
@moduledoc false
use ExUnit.Case
alias Elasticlunr.{Pipeline, Token}
alias Elasticlunr.Pipeline.StopWordFilter
describe "running stop_word_filter against tokens" do
test "is a default runner for default pipeline" do
assert Pipeline.default_runners()
|> Enum.any?(fn
StopWordFilter -> true
_ -> false
end)
end
test "removes stop words" do
stop_words = ~w[the and but than when]
assert [] =
stop_words
|> Enum.map(&Token.new/1)
|> Enum.reject(&is_nil(StopWordFilter.call(&1)))
end
end
end
================================================
FILE: test/pipeline/trimmer_test.exs
================================================
defmodule Elasticlunr.Pipeline.TimmerTest do
@moduledoc false
use ExUnit.Case
alias Elasticlunr.{Pipeline, Token}
alias Elasticlunr.Pipeline.Trimmer
describe "running trimmer against tokens" do
test "is a default runner for default pipeline" do
assert Pipeline.default_runners()
|> Enum.any?(fn
Trimmer -> true
_ -> false
end)
end
test "passes through latin characters" do
assert %Token{token: "hello"} = Token.new("hello")
end
test "removes leading and trailing punctuation" do
assert %Token{token: "hello"} = Token.new("hello.") |> Trimmer.call()
assert %Token{token: "it's"} = Token.new("it's") |> Trimmer.call()
assert %Token{token: "james"} = Token.new("james'") |> Trimmer.call()
assert %Token{token: "stop"} = Token.new("stop!'") |> Trimmer.call()
assert %Token{token: "first"} = Token.new("first'") |> Trimmer.call()
assert %Token{token: ""} = Token.new("") |> Trimmer.call()
assert %Token{token: "tag"} = Token.new("[tag]") |> Trimmer.call()
assert %Token{token: "tag"} = Token.new("[[[tag]]]") |> Trimmer.call()
assert %Token{token: "hello"} = Token.new("[[!@#@!hello]]]}}}") |> Trimmer.call()
assert %Token{token: "hello"} = Token.new("~!@@@hello***()()()]]") |> Trimmer.call()
end
end
end
================================================
FILE: test/pipeline_test.exs
================================================
defmodule Elasticlunr.PipelineTest do
use ExUnit.Case
alias Elasticlunr.{Pipeline, Token, Tokenizer}
alias Elasticlunr.Pipeline.{Stemmer, StopWordFilter, Trimmer}
describe "creating pipeline" do
test "adds a runner to the queue" do
assert pipeline = Pipeline.new([])
assert %Pipeline{callback: []} = pipeline
assert %Pipeline{callback: [Trimmer]} = Pipeline.add(pipeline, Trimmer)
end
test "ignores duplicate runner in the queue" do
pipeline = Pipeline.new([])
assert %Pipeline{callback: []} = pipeline
assert %Pipeline{callback: [Trimmer]} = Pipeline.add(pipeline, Trimmer)
assert %Pipeline{callback: [Trimmer]} = Pipeline.add(pipeline, Trimmer)
end
end
describe "updating pipeline" do
test "removes runner from queue" do
pipeline = Pipeline.new([Stemmer, Trimmer])
assert %Pipeline{callback: [Stemmer, Trimmer]} = pipeline
assert %Pipeline{callback: [Stemmer]} = Pipeline.remove(pipeline, Trimmer)
end
test "inserts runner at position" do
pipeline = Pipeline.new([Stemmer, Trimmer])
assert %Pipeline{callback: [Stemmer, Trimmer]} = pipeline
assert pipeline = Pipeline.insert_before(pipeline, StopWordFilter, Trimmer)
assert %Pipeline{callback: [Stemmer, StopWordFilter, Trimmer]} = pipeline
assert pipeline = Pipeline.remove(pipeline, Stemmer)
assert %Pipeline{callback: [StopWordFilter, Trimmer]} = pipeline
assert %Pipeline{callback: [StopWordFilter, Stemmer, Trimmer]} =
Pipeline.insert_after(pipeline, Stemmer, StopWordFilter)
end
end
describe "running pipeline" do
test "executes runners in the queue" do
pipeline = Pipeline.new(Pipeline.default_runners())
tokens = Tokenizer.tokenize("consignment worlds")
assert [
%Token{token: "consign"},
%Token{token: "world"}
] = Pipeline.run(pipeline, tokens)
end
test "runs a custom function" do
pipeline = Pipeline.new([& &1])
tokens = Tokenizer.tokenize("consignment worlds")
assert ^tokens = Pipeline.run(pipeline, tokens)
end
end
end
================================================
FILE: test/serializer_test.exs
================================================
defmodule Elasticlunr.SerializerTest do
use ExUnit.Case
alias Elasticlunr.{Index, Serializer}
test "serialize index without documents" do
index = Index.new(name: "index")
structure = [
"settings#name:index|ref:id|pipeline:",
"db#name:elasticlunr_index|options:compressed,named_table,ordered_set,public",
"field#name:id|pipeline:Elixir.Elasticlunr.Index.IdPipeline|store_documents:false|store_positions:false"
]
data = Serializer.serialize(index) |> Enum.into([])
assert structure == data
end
test "serialize index with documents" do
index =
Index.new(name: "index")
|> Index.add_field("body")
|> Index.add_documents([%{"id" => 1, "body" => "hello world"}])
structure = [
"settings#name:index|ref:id|pipeline:",
"db#name:elasticlunr_index|options:compressed,named_table,ordered_set,public",
"field#name:body|pipeline:|store_documents:true|store_positions:true",
"field#name:id|pipeline:Elixir.Elasticlunr.Index.IdPipeline|store_documents:false|store_positions:false"
]
data = Serializer.serialize(index) |> Enum.into([])
assert structure == data
end
end
================================================
FILE: test/storage/disk_test.exs
================================================
defmodule Elasticlunr.Storage.DiskTest do
use ExUnit.Case
alias Elasticlunr.Index
alias Elasticlunr.Pipeline
alias Elasticlunr.Storage.Disk
@otp_app :elasticlunr
setup do
storage_path = Path.join(__DIR__, "../../storage")
Application.put_env(@otp_app, Disk, directory: storage_path)
on_exit(fn ->
Enum.each(Disk.files(), &File.rm!/1)
Application.delete_env(@otp_app, Disk)
end)
end
defp fixture_storage(_context) do
opts = Application.get_env(@otp_app, Disk)
storage_path = Path.join(__DIR__, "../support/fixture")
Application.put_env(@otp_app, Disk, directory: storage_path)
on_exit(fn ->
Application.put_env(@otp_app, Disk, opts)
end)
end
describe "serializing an index" do
test "writes to disk" do
index = Index.new()
options = Application.get_env(@otp_app, Disk)
file = Path.join(options[:directory], "#{index.name}.index")
assert :ok = Disk.write(index)
assert File.exists?(file)
assert {:ok, %File.Stat{size: size}} = File.stat(file)
assert size > 0
end
end
describe "unserializing an index" do
test "reads from disk" do
pipeline = Pipeline.new(Pipeline.default_runners())
document = %{
"id" => Faker.UUID.v4(),
"last_name" => Faker.Person.last_name(),
"first_name" => Faker.Person.first_name()
}
index =
Index.new(pipeline: pipeline)
|> Index.add_field("first_name")
|> Index.add_field("last_name")
|> Index.add_documents([document])
:ok = Disk.write(index)
assert index == Disk.read(index.name)
end
end
describe "getting all serialized indexes" do
setup [:fixture_storage]
test "loads and deserialize indexes" do
assert [%Index{name: "users"} = index] =
Disk.load_all()
|> Enum.to_list()
assert [_] = Index.search(index, "rose")
end
end
describe "deleting index from storage" do
test "works successfully" do
index = Index.new()
options = Application.get_env(@otp_app, Disk)
file = Path.join(options[:directory], "#{index.name}.index")
:ok = Disk.write(index)
assert :ok = Disk.delete(index.name)
refute File.exists?(file)
end
test "fails for missing index" do
assert {:error, :enoent} = Disk.delete("missing")
end
end
end
================================================
FILE: test/storage_test.exs
================================================
defmodule Elasticlunr.StorageTest do
use ExUnit.Case
alias Elasticlunr.{Index, Storage}
alias Elasticlunr.Storage.{Blackhole, Mock}
import Mox
setup do
Mox.stub_with(Mock, Blackhole)
Application.put_env(:elasticlunr, :storage, Mock)
on_exit(fn ->
Application.delete_env(:elasticlunr, :storage)
end)
end
test "preload/0" do
index = Index.new()
expect(Mock, :load_all, fn -> [index] end)
assert [^index] = Storage.all()
end
test "write/1" do
index = Index.new()
expect(Mock, :write, 2, fn
^index -> :ok
%{name: nil} -> {:error, "invalid index"}
end)
assert :ok = Storage.write(index)
assert {:error, "invalid index"} = Storage.write(Index.new(name: nil))
end
test "read/1" do
expect(Mock, :read, 2, fn
"missing" -> {:error, "missing index"}
name -> Index.new(name: name)
end)
assert {:error, "missing index"} = Storage.read("missing")
assert %Index{name: "users"} = Storage.read("users")
end
test "delete/1" do
expect(Mock, :delete, 2, fn
"unknown-index" -> :error
_ -> :ok
end)
assert :error = Storage.delete("unknown-index")
assert :ok = Storage.delete("users")
end
end
================================================
FILE: test/support/fixture/fixture.ex
================================================
defmodule Elasticlunr.Test.Fixture do
@moduledoc false
@spec stemmer_fixture() :: map()
def stemmer_fixture do
with path <- Path.join(__DIR__, "./stemmer_fixture.json"),
{:ok, content} <- File.read(path),
{:ok, map} <- Jason.decode(content) do
map
end
end
end
================================================
FILE: test/support/fixture/stemmer_fixture.json
================================================
{
"consign": "consign",
"consigned": "consign",
"consigning": "consign",
"consignment": "consign",
"consist": "consist",
"consisted": "consist",
"consistency": "consist",
"consistent": "consist",
"consistently": "consist",
"consisting": "consist",
"consists": "consist",
"consolation": "consol",
"consolations": "consol",
"consolatory": "consolatori",
"console": "consol",
"consoled": "consol",
"consoles": "consol",
"consolidate": "consolid",
"consolidated": "consolid",
"consolidating": "consolid",
"consoling": "consol",
"consols": "consol",
"consonant": "conson",
"consort": "consort",
"consorted": "consort",
"consorting": "consort",
"conspicuous": "conspicu",
"conspicuously": "conspicu",
"conspiracy": "conspiraci",
"conspirator": "conspir",
"conspirators": "conspir",
"conspire": "conspir",
"conspired": "conspir",
"conspiring": "conspir",
"constable": "constabl",
"constables": "constabl",
"constance": "constanc",
"constancy": "constanc",
"constant": "constant",
"knack": "knack",
"knackeries": "knackeri",
"knacks": "knack",
"knag": "knag",
"knave": "knave",
"knaves": "knave",
"knavish": "knavish",
"kneaded": "knead",
"kneading": "knead",
"knee": "knee",
"kneel": "kneel",
"kneeled": "kneel",
"kneeling": "kneel",
"kneels": "kneel",
"knees": "knee",
"knell": "knell",
"knelt": "knelt",
"knew": "knew",
"knick": "knick",
"knif": "knif",
"knife": "knife",
"knight": "knight",
"knights": "knight",
"knit": "knit",
"knits": "knit",
"knitted": "knit",
"knitting": "knit",
"knives": "knive",
"knob": "knob",
"knobs": "knob",
"knock": "knock",
"knocked": "knock",
"knocker": "knocker",
"knockers": "knocker",
"knocking": "knock",
"knocks": "knock",
"knopp": "knopp",
"knot": "knot",
"knots": "knot",
"lay": "lay",
"try": "tri"
}
================================================
FILE: test/test_helper.exs
================================================
ExUnit.start()
Faker.start()
Mox.defmock(Elasticlunr.Storage.Mock, for: Elasticlunr.Storage.Provider)
================================================
FILE: test/tokenizer_test.exs
================================================
defmodule Elasticlunr.TokenizerTest do
use ExUnit.Case
alias Elasticlunr.{Token, Tokenizer}
describe "tokenizing string" do
test "splits to list of tokens" do
str = "the man came home"
tokenized_str = [
Token.new("the", %{start: 0, end: 3}),
Token.new("man", %{start: 4, end: 3}),
Token.new("came", %{start: 8, end: 4}),
Token.new("home", %{start: 13, end: 4})
]
assert ^tokenized_str = Tokenizer.tokenize(str)
end
test "downcase tokens" do
assert ~w[foo bar] =
Tokenizer.tokenize("FOO BAR")
|> Enum.map(& &1.token)
end
test "removes whitespace and hyphens" do
assert ~w[foo bar] =
Tokenizer.tokenize(" FOO BAR ")
|> Enum.map(& &1.token)
assert ~w[take the new york san francisco flight] =
Tokenizer.tokenize("take the New York-San Francisco flight")
|> Enum.map(& &1.token)
assert ~w[solve for a b] =
Tokenizer.tokenize("Solve for A - B")
|> Enum.map(& &1.token)
end
test "with custom separator" do
assert ~w[hello world i love] =
Tokenizer.tokenize("hello/world/I/love", ~r/\/+/)
|> Enum.map(& &1.token)
assert ~w[hello world i love] =
Tokenizer.tokenize("hello\\world\\I\\love", ~r/[\\]+/)
|> Enum.map(& &1.token)
assert ~w[hello world apple pie] =
Tokenizer.tokenize("hello/world/%%%apple%pie", ~r/[\/\%]+/)
|> Enum.map(& &1.token)
end
end
end
gitextract_q80yq73w/
├── .credo.exs
├── .formatter.exs
├── .github/
│ ├── FUNDING.yml
│ ├── pull_request_template.md
│ └── workflows/
│ ├── dialyzer.yml
│ └── test.yml
├── .gitignore
├── .vscode/
│ └── extensions.json
├── BACKERS.md
├── LICENSE
├── README.md
├── coveralls.json
├── docs.livemd
├── lib/
│ └── elasticlunr/
│ ├── application.ex
│ ├── core/
│ │ ├── document_store.ex
│ │ ├── field.ex
│ │ ├── index.ex
│ │ └── token.ex
│ ├── db.ex
│ ├── deserializer.ex
│ ├── dsl/
│ │ ├── query/
│ │ │ ├── bool_query.ex
│ │ │ ├── match_all_query.ex
│ │ │ ├── match_query.ex
│ │ │ ├── not_query.ex
│ │ │ └── terms_query.ex
│ │ ├── query.ex
│ │ └── query_repository.ex
│ ├── manager/
│ │ └── index_manager.ex
│ ├── pipeline/
│ │ ├── stemmer.ex
│ │ ├── stop_word_filter.ex
│ │ └── trimmer.ex
│ ├── pipeline.ex
│ ├── protocol_implementations.ex
│ ├── serializer.ex
│ ├── storage/
│ │ ├── blackhole.ex
│ │ ├── disk.ex
│ │ └── provider.ex
│ ├── storage.ex
│ ├── tokenizer.ex
│ ├── utils/
│ │ └── process.ex
│ └── utlis.ex
├── mix.exs
└── test/
├── core/
│ ├── document_store_test.exs
│ ├── field_test.exs
│ └── index_test.exs
├── deserializer_test.exs
├── dsl_test.exs
├── manager/
│ └── index_manager_test.exs
├── pipeline/
│ ├── stemmer_test.exs
│ ├── stop_word_filter_test.exs
│ └── trimmer_test.exs
├── pipeline_test.exs
├── serializer_test.exs
├── storage/
│ └── disk_test.exs
├── storage_test.exs
├── support/
│ └── fixture/
│ ├── fixture.ex
│ ├── stemmer_fixture.json
│ ├── users.data
│ └── users.index
├── test_helper.exs
└── tokenizer_test.exs
SYMBOL INDEX (247 symbols across 42 files)
FILE: lib/elasticlunr/application.ex
class Elasticlunr.Application (line 1) | defmodule Elasticlunr.Application
method start (line 11) | def start(_type, _args) do
FILE: lib/elasticlunr/core/document_store.ex
class Elasticlunr.DocumentStore (line 1) | defmodule Elasticlunr.DocumentStore
method new (line 14) | def new(save \\ true) do
method add (line 19) | def add(%__MODULE__{documents: documents, length: length, save: save} ...
method get (line 42) | def get(%__MODULE__{documents: documents}, ref), do: Map.get(documents...
method remove (line 45) | def remove(
method exists? (line 63) | def exists?(%__MODULE__{documents: documents}, ref), do: Map.has_key?(...
method add_field_length (line 66) | def add_field_length(%__MODULE__{document_info: document_info} = store...
method update_field_length (line 84) | def update_field_length(%__MODULE__{} = store, ref, field, length),
method get_field_length (line 88) | def get_field_length(%__MODULE__{document_info: document_info} = store...
method reset (line 101) | def reset(%__MODULE__{}, save \\ true), do: new(save)
FILE: lib/elasticlunr/core/field.ex
class Elasticlunr.Field (line 1) | defmodule Elasticlunr.Field
method new (line 31) | def new(opts) do
method documents (line 45) | def documents(%__MODULE__{db: db, name: name}) do
method term_frequency (line 56) | def term_frequency(%__MODULE__{} = field, term) do
method has_token (line 61) | def has_token(%__MODULE__{} = field, term) do
method get_token (line 66) | def get_token(%__MODULE__{} = field, term) do
method set_query_pipeline (line 78) | def set_query_pipeline(%__MODULE__{} = field, pipeline) do
method add (line 83) | def add(%__MODULE__{pipeline: pipeline} = field, documents) do
method length (line 97) | def length(%__MODULE__{db: db, name: name}, :ids) do
method length (line 103) | def length(%__MODULE__{db: db, name: name}, :term, term) do
method length (line 111) | def length(%__MODULE__{db: db, name: name}, :tf, term) do
method length (line 119) | def length(%__MODULE__{db: db, name: name}, :idf, term) do
method update (line 128) | def update(%__MODULE__{} = field, documents) do
method remove (line 137) | def remove(%__MODULE__{db: db, name: name} = field, document_ids) do
method analyze (line 149) | def analyze(%__MODULE__{pipeline: pipeline, query_pipeline: query_pipe...
method terms (line 160) | def terms(%__MODULE__{} = field, query) do
method tokens (line 208) | def tokens(%__MODULE__{} = field) do
method update_field_stats (line 217) | defp update_field_stats(%{db: db, name: name} = field, id, tokens) do
method add_id (line 239) | defp add_id(%{db: db, name: name}, id) do
method matched_documents_for_term (line 243) | defp matched_documents_for_term(%{db: db, name: name}, term) do
method term_lookup (line 249) | defp term_lookup(%{db: db, name: name}, term, id) do
method terms_lookup (line 259) | defp terms_lookup(%{db: db, name: name}) do
method terms_lookup (line 265) | defp terms_lookup(%{db: db, name: name}, term) do
method termify (line 271) | defp termify({{:field_term, _, term, id}, attrs}), do: {term, id, attrs}
method tf_lookup (line 273) | defp tf_lookup(%{db: db, name: name}, term) do
method tf_lookup (line 286) | defp tf_lookup(%{db: db, name: name}, term, id) do
method idf_lookup (line 296) | defp idf_lookup(%{db: db, name: name}, term) do
method flnorm_lookup (line 306) | defp flnorm_lookup(%{db: db, name: name}) do
method unique_terms_lookup (line 316) | defp unique_terms_lookup(field) do
method recalculate_idf (line 321) | defp recalculate_idf(field) do
method filter_ids (line 351) | defp filter_ids(field, ids, term, matching_docs, query) do
method get_matching_docs (line 364) | defp get_matching_docs(docs, field, term, matching_docs) do
method match_with_fuzz (line 389) | defp match_with_fuzz(_field, _term, _fuzz, _query, matching_docs), do:...
method matching_ids (line 391) | defp matching_ids(field, term) do
method get_content (line 396) | defp get_content(_field, _id) do
method extract_matched (line 400) | defp extract_matched(field, term, id) do
method to_token (line 415) | defp to_token(%Token{} = token), do: token
method to_token (line 416) | defp to_token(token), do: Token.new(token)
method to_field_token (line 418) | defp to_field_token(field, term, flnorm) do
FILE: lib/elasticlunr/core/index.ex
class Elasticlunr.Index (line 12) | defmodule Elasticlunr.Index
class Elasticlunr.Index.IdPipeline (line 1) | defmodule Elasticlunr.Index.IdPipeline
method call (line 9) | def call(%Token{} = token), do: token
method new (line 39) | def new(opts \\ []) do
method update_field (line 89) | def update_field(%__MODULE__{fields: fields} = index, name, %Field{} =...
method get_fields (line 98) | def get_fields(%__MODULE__{fields: fields}), do: Map.keys(fields)
method get_field (line 101) | def get_field(%__MODULE__{fields: fields}, field) do
method save_document (line 106) | def save_document(%__MODULE__{fields: fields} = index, save) do
method add_documents (line 116) | def add_documents(%__MODULE__{fields: fields, ref: ref} = index, docum...
method update_documents (line 123) | def update_documents(%__MODULE__{ref: ref, fields: fields} = index, do...
method remove_documents (line 130) | def remove_documents(%__MODULE__{fields: fields} = index, document_ids...
method analyze (line 139) | def analyze(%__MODULE__{fields: fields}, field, content, options) do
method terms (line 146) | def terms(%__MODULE__{fields: fields}, query) do
method all (line 155) | def all(%__MODULE__{ref: ref, fields: fields}) do
method update_documents_size (line 162) | def update_documents_size(%__MODULE__{fields: fields} = index) do
method search (line 178) | def search(index, query, opts \\ nil)
method search (line 179) | def search(%__MODULE__{}, nil, _opts), do: []
method search (line 226) | def search(%__MODULE__{} = index, %{"query" => _} = query, _opts),
method search (line 232) | def search(%__MODULE__{} = index, %{} = query, options) do
method elasticsearch (line 258) | defp elasticsearch(index, %{"query" => root}) do
method elasticsearch (line 268) | defp elasticsearch(_index, _query) do
method flatten_document (line 272) | defp flatten_document(document, prefix \\ "") do
method persist (line 283) | defp persist(fields, ref, documents, persist_fn) do
method save (line 291) | defp save(fields, ref, document, callback) do
FILE: lib/elasticlunr/core/token.ex
class Elasticlunr.Token (line 1) | defmodule Elasticlunr.Token
method new (line 10) | def new(token, metadata \\ %{}) do
method update (line 15) | def update(%__MODULE__{token: str, metadata: metadata} = token, opts) do
method get_position (line 25) | def get_position(%__MODULE__{metadata: %{start: start, end: end_1}}), ...
method get_position (line 26) | def get_position(%__MODULE__{metadata: %{}}), do: nil
FILE: lib/elasticlunr/db.ex
class Elasticlunr.DB (line 1) | defmodule Elasticlunr.DB
method delete (line 22) | def delete(%__MODULE__{name: name}, pattern), do: :ets.delete(name, pa...
method destroy (line 25) | def destroy(%__MODULE__{name: name}) do
method insert (line 34) | def insert(%__MODULE__{name: name}, data), do: :ets.insert(name, data)
method lookup (line 37) | def lookup(%__MODULE__{name: name}, key), do: :ets.lookup(name, key)
method member? (line 40) | def member?(%__MODULE__{name: name}, key), do: :ets.member(name, key)
method match_delete (line 43) | def match_delete(%__MODULE__{name: name}, pattern), do: :ets.match_del...
method match_object (line 46) | def match_object(%__MODULE__{name: name}, spec), do: :ets.match_object...
method select_count (line 49) | def select_count(%__MODULE__{name: name}, spec), do: :ets.select_count...
method from (line 52) | def from(%__MODULE__{name: name} = db, file: file) do
method to (line 61) | def to(%__MODULE__{name: name}, file: file) do
FILE: lib/elasticlunr/deserializer.ex
class Elasticlunr.Deserializer.Parser (line 6) | defmodule Elasticlunr.Deserializer.Parser
method process (line 10) | def process(data) do
method parse (line 33) | defp parse(command, acc, [opts]), do: parse(command, acc, opts)
method parse (line 35) | defp parse("settings", nil, opts) do
method parse (line 50) | defp parse("db", acc, _), do: acc
method parse (line 52) | defp parse("field", {index, extra}, opts) do
method parse (line 68) | defp parse(_, acc, _), do: acc
method parse_pipeline (line 70) | defp parse_pipeline(option, cache \\ %{}) do
method parse_value (line 90) | defp parse_value("true"), do: true
method parse_value (line 91) | defp parse_value("false"), do: false
method parse_value (line 92) | defp parse_value(val), do: val
FILE: lib/elasticlunr/dsl/query.ex
class Elasticlunr.Dsl.Query (line 1) | defmodule Elasticlunr.Dsl.Query
method split_root (line 25) | def split_root({_, _} = root), do: root
method split_root (line 26) | def split_root(root), do: root
FILE: lib/elasticlunr/dsl/query/bool_query.ex
class Elasticlunr.Dsl.BoolQuery (line 1) | defmodule Elasticlunr.Dsl.BoolQuery
method new (line 21) | def new(opts) do
method rewrite (line 35) | def rewrite(
method score (line 88) | def score(%__MODULE__{rewritten: false} = query, %Index{} = index, opt...
method score (line 94) | def score(
method filter_result (line 180) | defp filter_result(nil, _index), do: false
method filter_result (line 181) | defp filter_result([], _index), do: false
method filter_result (line 183) | defp filter_result(filter, index) do
method filter_must (line 199) | defp filter_must(nil, filter_results, _index), do: filter_results
method parse (line 215) | def parse(options, _query_options, repo) do
method patch_options (line 235) | defp patch_options(opts, :should, options, mapper) do
method patch_options (line 252) | defp patch_options(opts, :filter, options, mapper) do
method patch_options (line 266) | defp patch_options(opts, :must, options, repo) do
method patch_options (line 279) | defp patch_options(opts, :must_not, options, repo) do
method patch_options (line 293) | defp patch_options(opts, :minimum_should_match, options) do
method extract_minimum_should_match (line 313) | defp extract_minimum_should_match(opts) do
method is_empty_clause? (line 324) | defp is_empty_clause?(nil), do: true
method is_empty_clause? (line 326) | defp is_empty_clause?(%{}), do: false
FILE: lib/elasticlunr/dsl/query/match_all_query.ex
class Elasticlunr.Dsl.MatchAllQuery (line 1) | defmodule Elasticlunr.Dsl.MatchAllQuery
method new (line 9) | def new(boost \\ 1), do: struct!(__MODULE__, boost: boost)
method parse (line 12) | def parse(options, _query_options, _repo) do
method score (line 19) | def score(%__MODULE__{boost: boost}, %Index{} = index, _options) do
FILE: lib/elasticlunr/dsl/query/match_query.ex
class Elasticlunr.Dsl.MatchQuery (line 1) | defmodule Elasticlunr.Dsl.MatchQuery
method new (line 20) | def new(opts) do
method rewrite (line 35) | def rewrite(
method score (line 86) | def score(%__MODULE__{} = module, %Index{} = index, options) do
method parse (line 93) | def parse(options, _query_options, repo) do
method to_match_params (line 144) | defp to_match_params(params), do: to_match_params(%{"query" => params})
method default_min_match (line 146) | defp default_min_match(params) do
FILE: lib/elasticlunr/dsl/query/not_query.ex
class Elasticlunr.Dsl.NotQuery (line 1) | defmodule Elasticlunr.Dsl.NotQuery
method new (line 11) | def new(inner_query), do: %__MODULE__{inner_query: inner_query}
method parse (line 14) | def parse(options, _query_options, _repo) do
method score (line 23) | def score(%__MODULE__{inner_query: inner_query}, %Index{} = index, opt...
FILE: lib/elasticlunr/dsl/query/terms_query.ex
class Elasticlunr.Dsl.TermsQuery (line 1) | defmodule Elasticlunr.Dsl.TermsQuery
method new (line 21) | def new(opts) do
method score (line 35) | def score(
method parse (line 105) | def parse(options, _query_options, repo) do
method get_terms (line 135) | defp get_terms(value), do: to_list(value)
method to_terms_params (line 145) | defp to_terms_params(params), do: to_terms_params(%{"value" => params})
method update_options (line 147) | defp update_options(opts, params, key) do
method to_list (line 158) | defp to_list(value), do: [value]
FILE: lib/elasticlunr/dsl/query_repository.ex
class Elasticlunr.Dsl.QueryRepository (line 1) | defmodule Elasticlunr.Dsl.QueryRepository
method get (line 5) | def get("not"), do: NotQuery
method get (line 6) | def get("bool"), do: BoolQuery
method get (line 7) | def get("match"), do: MatchQuery
method get (line 8) | def get("terms"), do: TermsQuery
method get (line 9) | def get("match_all"), do: MatchAllQuery
method get (line 10) | def get(element), do: raise("Unknown query type #{element}")
method parse (line 13) | def parse(module, options, query_options \\ %{}, repo \\ __MODULE__) do
FILE: lib/elasticlunr/manager/index_manager.ex
class Elasticlunr.IndexManager (line 1) | defmodule Elasticlunr.IndexManager
method preload (line 8) | def preload do
method get (line 15) | def get(name) do
method save (line 23) | def save(%Index{} = index) do
method update (line 31) | def update(%Index{name: name} = index) do
method remove (line 46) | def remove(%Index{name: name}) do
method loaded? (line 58) | def loaded?(name) do
method loaded_indices (line 70) | def loaded_indices do
method init (line 75) | def init(%Index{} = index) do
method start_link (line 80) | def start_link(%Index{name: name} = index) do
method child_spec (line 85) | def child_spec(%Index{name: id} = index) do
method via (line 94) | def via(name) do
method handle_call (line 98) | def handle_call(:get, _from, index) do
method handle_call (line 102) | def handle_call({:update, index}, _from, _state) do
method start (line 106) | defp start(index) do
FILE: lib/elasticlunr/pipeline.ex
class Elasticlunr.Pipeline (line 1) | defmodule Elasticlunr.Pipeline
method new (line 14) | def new(callbacks \\ []) do
method add (line 19) | def add(%__MODULE__{callback: callback} = pipeline, module) do
method default_runners (line 25) | def default_runners, do: [Trimmer, StopWordFilter, Stemmer]
method run (line 33) | def run(%__MODULE__{callback: []}, tokens), do: tokens
method run (line 35) | def run(%__MODULE__{callback: callback}, tokens) do
method insert_before (line 43) | def insert_before(%__MODULE__{callback: callback} = pipeline, module, ...
method insert_after (line 59) | def insert_after(%__MODULE__{callback: callback} = pipeline, module, b...
method remove (line 75) | def remove(%__MODULE__{callback: callback} = pipeline, module) do
method excute_runner (line 80) | defp excute_runner(tokens, module) do
method execute (line 100) | defp execute(module, token), do: module.call(token)
FILE: lib/elasticlunr/pipeline/stemmer.ex
class Elasticlunr.Pipeline.Stemmer (line 1) | defmodule Elasticlunr.Pipeline.Stemmer
method call (line 7) | def call(%Token{token: str} = token) do
FILE: lib/elasticlunr/pipeline/stop_word_filter.ex
class Elasticlunr.Pipeline.StopWordFilter (line 1) | defmodule Elasticlunr.Pipeline.StopWordFilter
method call (line 130) | def call(token), do: token
FILE: lib/elasticlunr/pipeline/trimmer.ex
class Elasticlunr.Pipeline.Trimmer (line 1) | defmodule Elasticlunr.Pipeline.Trimmer
method call (line 7) | def call(%Token{token: str} = token) do
FILE: lib/elasticlunr/storage.ex
class Elasticlunr.Storage (line 1) | defmodule Elasticlunr.Storage
method all (line 13) | def all do
method write (line 18) | def write(%Index{} = index) do
method read (line 23) | def read(index_name) do
method delete (line 28) | def delete(index_name) do
method provider (line 32) | defp provider, do: Application.get_env(:elasticlunr, :storage, Blackhole)
FILE: lib/elasticlunr/storage/blackhole.ex
class Elasticlunr.Storage.Blackhole (line 1) | defmodule Elasticlunr.Storage.Blackhole
method load_all (line 8) | def load_all, do: []
method write (line 11) | def write(_index), do: :ok
method read (line 14) | def read(_name), do: {:error, "can't read index from blackhole"}
method delete (line 17) | def delete(_name), do: :ok
FILE: lib/elasticlunr/storage/disk.ex
class Elasticlunr.Storage.Disk (line 1) | defmodule Elasticlunr.Storage.Disk
method write (line 23) | def write(%Index{db: db, name: name} = index) do
method read (line 34) | def read(name) do
method load_all (line 53) | def load_all do
method delete (line 63) | def delete(name) do
method files (line 73) | def files do
method write_serialized_index_to_file (line 83) | def write_serialized_index_to_file(path, data) do
method filenames (line 89) | defp filenames(directory, name) do
method without_ext (line 96) | defp without_ext(file, ext), do: Path.basename(file, ".#{ext}")
FILE: lib/elasticlunr/storage/provider.ex
class Elasticlunr.Storage.Provider (line 1) | defmodule Elasticlunr.Storage.Provider
FILE: lib/elasticlunr/tokenizer.ex
class Elasticlunr.Tokenizer (line 1) | defmodule Elasticlunr.Tokenizer
method tokenize (line 7) | def tokenize(str, separator \\ @default_separator)
method split (line 16) | defp split(str, separator) do
method run_split (line 51) | defp run_split(_str, _separator, _slice_start, _slice_end, _str_length...
method match_string? (line 55) | defp match_string?(nil, _separator), do: false
method match_string? (line 57) | defp match_string?(char, separator) do
method to_token (line 61) | defp to_token(str, start_index, end_index) do
FILE: lib/elasticlunr/utils/process.ex
class Elasticlunr.Utils.Process (line 1) | defmodule Elasticlunr.Utils.Process
method child_pid? (line 4) | def child_pid?(_child, _module), do: false
method id_from_pid (line 7) | def id_from_pid({:undefined, pid, :worker, [mod]}, registry, mod),
method active_processes (line 11) | def active_processes(supervisor, registry, module) do
FILE: lib/elasticlunr/utlis.ex
class Elasticlunr.Utils (line 1) | defmodule Elasticlunr.Utils
method levenshtein_distance (line 3) | def levenshtein_distance(a, b) do
FILE: mix.exs
class Elasticlunr.MixProject (line 1) | defmodule Elasticlunr.MixProject
method project (line 6) | def project do
method application (line 45) | def application do
method elixirc_paths (line 53) | defp elixirc_paths(:test), do: ["lib", "test/support"]
method elixirc_paths (line 54) | defp elixirc_paths(_), do: ["lib"]
method deps (line 57) | defp deps do
method aliases (line 71) | defp aliases do
method description (line 77) | defp description do
method package (line 81) | defp package do
FILE: test/core/document_store_test.exs
class Elasticlunr.DocumentStoreTest (line 1) | defmodule Elasticlunr.DocumentStoreTest
FILE: test/core/field_test.exs
class Elasticlunr.FieldTest (line 1) | defmodule Elasticlunr.FieldTest
FILE: test/core/index_test.exs
class Elasticlunr.IndexTest (line 1) | defmodule Elasticlunr.IndexTest
FILE: test/deserializer_test.exs
class Elasticlunr.DeserializerTest (line 1) | defmodule Elasticlunr.DeserializerTest
method to_stream (line 20) | defp to_stream(data) do
FILE: test/dsl_test.exs
class Elasticlunr.DslTest (line 1) | defmodule Elasticlunr.DslTest
FILE: test/manager/index_manager_test.exs
class Elasticlunr.IndexManagerTest (line 1) | defmodule Elasticlunr.IndexManagerTest
FILE: test/pipeline/stemmer_test.exs
class Elasticlunr.Pipeline.StemmerTest (line 1) | defmodule Elasticlunr.Pipeline.StemmerTest
FILE: test/pipeline/stop_word_filter_test.exs
class Elasticlunr.Pipeline.StopWordFilterTest (line 1) | defmodule Elasticlunr.Pipeline.StopWordFilterTest
FILE: test/pipeline/trimmer_test.exs
class Elasticlunr.Pipeline.TimmerTest (line 1) | defmodule Elasticlunr.Pipeline.TimmerTest
FILE: test/pipeline_test.exs
class Elasticlunr.PipelineTest (line 1) | defmodule Elasticlunr.PipelineTest
FILE: test/serializer_test.exs
class Elasticlunr.SerializerTest (line 1) | defmodule Elasticlunr.SerializerTest
FILE: test/storage/disk_test.exs
class Elasticlunr.Storage.DiskTest (line 1) | defmodule Elasticlunr.Storage.DiskTest
method fixture_storage (line 22) | defp fixture_storage(_context) do
FILE: test/storage_test.exs
class Elasticlunr.StorageTest (line 1) | defmodule Elasticlunr.StorageTest
FILE: test/support/fixture/fixture.ex
class Elasticlunr.Test.Fixture (line 1) | defmodule Elasticlunr.Test.Fixture
method stemmer_fixture (line 5) | def stemmer_fixture do
FILE: test/tokenizer_test.exs
class Elasticlunr.TokenizerTest (line 1) | defmodule Elasticlunr.TokenizerTest
Condensed preview — 61 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (150K chars).
[
{
"path": ".credo.exs",
"chars": 7229,
"preview": "# This file contains the configuration for Credo and you are probably reading\n# this after creating it with `mix credo.g"
},
{
"path": ".formatter.exs",
"chars": 97,
"preview": "# Used by \"mix format\"\n[\n inputs: [\"{mix,.formatter}.exs\", \"{config,lib,test}/**/*.{ex,exs}\"]\n]\n"
},
{
"path": ".github/FUNDING.yml",
"chars": 773,
"preview": "# These are supported funding model platforms\n\ngithub: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [u"
},
{
"path": ".github/pull_request_template.md",
"chars": 808,
"preview": "## Overview\n\n_Write a short description of what your PR does_\n\n## Related Issues\n\n_List all related issues. Add linking "
},
{
"path": ".github/workflows/dialyzer.yml",
"chars": 1251,
"preview": "name: Dialyzer\n\non: push\n\njobs:\n analyze:\n runs-on: ubuntu-latest\n steps:\n - uses: actions/checkout@v2\n "
},
{
"path": ".github/workflows/test.yml",
"chars": 1077,
"preview": "name: Test\n\non: push\n\nenv:\n MIX_ENV: test\n GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}\n\njobs:\n test:\n runs-on: ubunt"
},
{
"path": ".gitignore",
"chars": 693,
"preview": "# The directory Mix will write compiled artifacts to.\n/_build/\n\n# If you run \"mix test --cover\", coverage assets end up "
},
{
"path": ".vscode/extensions.json",
"chars": 133,
"preview": "{\n \"recommendations\": [\n \"pantajoe.vscode-elixir-credo\",\n \"jakebecker.elixir-ls\",\n \"pgourlain.er"
},
{
"path": "BACKERS.md",
"chars": 416,
"preview": "<h1 align=\"center\">Sponsors & Backers</h1>\n\nElasticlunr is an MIT-licensed open source project. It's an independent "
},
{
"path": "LICENSE",
"chars": 1070,
"preview": "MIT License\n\nCopyright (c) 2021 Atanda Rasheed\n\nPermission is hereby granted, free of charge, to any person obtaining a "
},
{
"path": "README.md",
"chars": 3787,
"preview": "# Elasticlunr\n\n[,\n "
},
{
"path": "lib/elasticlunr/db.ex",
"chars": 2079,
"preview": "defmodule Elasticlunr.DB do\n defstruct [:name, :options]\n\n @type t :: %__MODULE__{\n name: atom(),\n o"
},
{
"path": "lib/elasticlunr/deserializer.ex",
"chars": 2198,
"preview": "defprotocol Elasticlunr.Deserializer do\n @spec deserialize(Enum.t()) :: Elasticlunr.Index.t()\n def deserialize(data)\ne"
},
{
"path": "lib/elasticlunr/dsl/query/bool_query.ex",
"chars": 7848,
"preview": "defmodule Elasticlunr.Dsl.BoolQuery do\n use Elasticlunr.Dsl.Query\n\n alias Elasticlunr.Index\n alias Elasticlunr.Dsl.{N"
},
{
"path": "lib/elasticlunr/dsl/query/match_all_query.ex",
"chars": 540,
"preview": "defmodule Elasticlunr.Dsl.MatchAllQuery do\n use Elasticlunr.Dsl.Query\n\n alias Elasticlunr.Index\n\n defstruct ~w[boost]"
},
{
"path": "lib/elasticlunr/dsl/query/match_query.ex",
"chars": 3923,
"preview": "defmodule Elasticlunr.Dsl.MatchQuery do\n use Elasticlunr.Dsl.Query\n\n alias Elasticlunr.{Index}\n alias Elasticlunr.Dsl"
},
{
"path": "lib/elasticlunr/dsl/query/not_query.ex",
"chars": 852,
"preview": "defmodule Elasticlunr.Dsl.NotQuery do\n use Elasticlunr.Dsl.Query\n\n alias Elasticlunr.Index\n alias Elasticlunr.Dsl.{Qu"
},
{
"path": "lib/elasticlunr/dsl/query/terms_query.ex",
"chars": 3813,
"preview": "defmodule Elasticlunr.Dsl.TermsQuery do\n use Elasticlunr.Dsl.Query\n\n alias Elasticlunr.Dsl.Query\n alias Elasticlunr.{"
},
{
"path": "lib/elasticlunr/dsl/query.ex",
"chars": 1539,
"preview": "defmodule Elasticlunr.Dsl.Query do\n alias Elasticlunr.{Field, Index, Dsl.QueryRepository}\n\n @type score_results ::\n "
},
{
"path": "lib/elasticlunr/dsl/query_repository.ex",
"chars": 1126,
"preview": "defmodule Elasticlunr.Dsl.QueryRepository do\n alias Elasticlunr.Index\n alias Elasticlunr.Dsl.{BoolQuery, MatchAllQuery"
},
{
"path": "lib/elasticlunr/manager/index_manager.ex",
"chars": 2562,
"preview": "defmodule Elasticlunr.IndexManager do\n use GenServer\n\n alias Elasticlunr.{Index, IndexRegistry, IndexSupervisor, Stora"
},
{
"path": "lib/elasticlunr/pipeline/stemmer.ex",
"chars": 219,
"preview": "defmodule Elasticlunr.Pipeline.Stemmer do\n alias Elasticlunr.Token\n\n @behaviour Elasticlunr.Pipeline\n\n @impl true\n d"
},
{
"path": "lib/elasticlunr/pipeline/stop_word_filter.ex",
"chars": 1646,
"preview": "defmodule Elasticlunr.Pipeline.StopWordFilter do\n alias Elasticlunr.Token\n\n @behaviour Elasticlunr.Pipeline\n\n @defaul"
},
{
"path": "lib/elasticlunr/pipeline/trimmer.ex",
"chars": 292,
"preview": "defmodule Elasticlunr.Pipeline.Trimmer do\n alias Elasticlunr.Token\n\n @behaviour Elasticlunr.Pipeline\n\n @impl true\n d"
},
{
"path": "lib/elasticlunr/pipeline.ex",
"chars": 2727,
"preview": "defmodule Elasticlunr.Pipeline do\n alias Elasticlunr.{Token, Tokenizer}\n alias Elasticlunr.Pipeline.{Stemmer, StopWord"
},
{
"path": "lib/elasticlunr/protocol_implementations.ex",
"chars": 2340,
"preview": "defimpl Elasticlunr.Serializer, for: Elasticlunr.Pipeline do\n alias Elasticlunr.Pipeline\n\n def serialize(%Pipeline{cal"
},
{
"path": "lib/elasticlunr/serializer.ex",
"chars": 141,
"preview": "defprotocol Elasticlunr.Serializer do\n @spec serialize(struct(), keyword()) :: binary() | function()\n def serialize(in"
},
{
"path": "lib/elasticlunr/storage/blackhole.ex",
"chars": 351,
"preview": "defmodule Elasticlunr.Storage.Blackhole do\n @moduledoc \"\"\"\n As the name implies, nothing is written nowhere.\n \"\"\"\n u"
},
{
"path": "lib/elasticlunr/storage/disk.ex",
"chars": 2571,
"preview": "defmodule Elasticlunr.Storage.Disk do\n @moduledoc \"\"\"\n This storage provider writes data to the local disk of the runn"
},
{
"path": "lib/elasticlunr/storage/provider.ex",
"chars": 319,
"preview": "defmodule Elasticlunr.Storage.Provider do\n @moduledoc false\n\n alias Elasticlunr.Index\n\n @callback load_all() :: Enum."
},
{
"path": "lib/elasticlunr/storage.ex",
"chars": 1106,
"preview": "defmodule Elasticlunr.Storage do\n @moduledoc \"\"\"\n This is the storage interface that's used by the index manager.\n ``"
},
{
"path": "lib/elasticlunr/tokenizer.ex",
"chars": 1828,
"preview": "defmodule Elasticlunr.Tokenizer do\n alias Elasticlunr.Token\n\n @default_separator ~r/[\\s\\-]+/\n\n @spec tokenize(binary("
},
{
"path": "lib/elasticlunr/utils/process.ex",
"chars": 655,
"preview": "defmodule Elasticlunr.Utils.Process do\n @spec child_pid?(tuple, atom) :: boolean\n def child_pid?({:undefined, pid, :wo"
},
{
"path": "lib/elasticlunr/utlis.ex",
"chars": 1002,
"preview": "defmodule Elasticlunr.Utils do\n @spec levenshtein_distance(binary, binary) :: integer()\n def levenshtein_distance(a, b"
},
{
"path": "mix.exs",
"chars": 2267,
"preview": "defmodule Elasticlunr.MixProject do\n use Mix.Project\n\n @source_url \"https://github.com/heywhy/ex_elasticlunr\"\n\n def p"
},
{
"path": "test/core/document_store_test.exs",
"chars": 4946,
"preview": "defmodule Elasticlunr.DocumentStoreTest do\n use ExUnit.Case\n\n alias Elasticlunr.DocumentStore\n\n describe \"creating a "
},
{
"path": "test/core/field_test.exs",
"chars": 3111,
"preview": "defmodule Elasticlunr.FieldTest do\n use ExUnit.Case\n\n alias Elasticlunr.{DB, Field, Pipeline, Token}\n\n setup context "
},
{
"path": "test/core/index_test.exs",
"chars": 9011,
"preview": "defmodule Elasticlunr.IndexTest do\n use ExUnit.Case\n\n alias Elasticlunr.{Field, Index, Pipeline, Token}\n alias Faker."
},
{
"path": "test/deserializer_test.exs",
"chars": 651,
"preview": "defmodule Elasticlunr.DeserializerTest do\n use ExUnit.Case\n\n alias Elasticlunr.{Deserializer, Index}\n\n test \"deserial"
},
{
"path": "test/dsl_test.exs",
"chars": 8448,
"preview": "defmodule Elasticlunr.DslTest do\n use ExUnit.Case\n\n alias Elasticlunr.{Index, Pipeline, Token}\n alias Elasticlunr.Dsl"
},
{
"path": "test/manager/index_manager_test.exs",
"chars": 1406,
"preview": "defmodule Elasticlunr.IndexManagerTest do\n use ExUnit.Case\n\n alias Elasticlunr.{Index, IndexManager}\n\n describe \"work"
},
{
"path": "test/pipeline/stemmer_test.exs",
"chars": 656,
"preview": "defmodule Elasticlunr.Pipeline.StemmerTest do\n use ExUnit.Case\n\n alias Elasticlunr.Token\n alias Elasticlunr.{Pipeline"
},
{
"path": "test/pipeline/stop_word_filter_test.exs",
"chars": 691,
"preview": "defmodule Elasticlunr.Pipeline.StopWordFilterTest do\n @moduledoc false\n use ExUnit.Case\n\n alias Elasticlunr.{Pipeline"
},
{
"path": "test/pipeline/trimmer_test.exs",
"chars": 1369,
"preview": "defmodule Elasticlunr.Pipeline.TimmerTest do\n @moduledoc false\n use ExUnit.Case\n\n alias Elasticlunr.{Pipeline, Token}"
},
{
"path": "test/pipeline_test.exs",
"chars": 2161,
"preview": "defmodule Elasticlunr.PipelineTest do\n use ExUnit.Case\n\n alias Elasticlunr.{Pipeline, Token, Tokenizer}\n alias Elasti"
},
{
"path": "test/serializer_test.exs",
"chars": 1174,
"preview": "defmodule Elasticlunr.SerializerTest do\n use ExUnit.Case\n\n alias Elasticlunr.{Index, Serializer}\n\n test \"serialize in"
},
{
"path": "test/storage/disk_test.exs",
"chars": 2401,
"preview": "defmodule Elasticlunr.Storage.DiskTest do\n use ExUnit.Case\n\n alias Elasticlunr.Index\n alias Elasticlunr.Pipeline\n al"
},
{
"path": "test/storage_test.exs",
"chars": 1234,
"preview": "defmodule Elasticlunr.StorageTest do\n use ExUnit.Case\n\n alias Elasticlunr.{Index, Storage}\n alias Elasticlunr.Storage"
},
{
"path": "test/support/fixture/fixture.ex",
"chars": 301,
"preview": "defmodule Elasticlunr.Test.Fixture do\n @moduledoc false\n\n @spec stemmer_fixture() :: map()\n def stemmer_fixture do\n "
},
{
"path": "test/support/fixture/stemmer_fixture.json",
"chars": 1894,
"preview": "{\n \"consign\": \"consign\",\n \"consigned\": \"consign\",\n \"consigning\": \"consign\",\n \"consignment\": \"consign\",\n \"consist\": "
},
{
"path": "test/test_helper.exs",
"chars": 103,
"preview": "ExUnit.start()\nFaker.start()\n\nMox.defmock(Elasticlunr.Storage.Mock, for: Elasticlunr.Storage.Provider)\n"
},
{
"path": "test/tokenizer_test.exs",
"chars": 1613,
"preview": "defmodule Elasticlunr.TokenizerTest do\n use ExUnit.Case\n\n alias Elasticlunr.{Token, Tokenizer}\n\n describe \"tokenizing"
}
]
// ... and 2 more files (download for full content)
About this extraction
This page contains the full source code of the heywhy/ex_elasticlunr GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 61 files (136.6 KB), approximately 38.4k tokens, and a symbol index with 247 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.