Repository: heywhy/ex_elasticlunr Branch: master Commit: 0fcb8dede8ac Files: 61 Total size: 136.6 KB Directory structure: gitextract_q80yq73w/ ├── .credo.exs ├── .formatter.exs ├── .github/ │ ├── FUNDING.yml │ ├── pull_request_template.md │ └── workflows/ │ ├── dialyzer.yml │ └── test.yml ├── .gitignore ├── .vscode/ │ └── extensions.json ├── BACKERS.md ├── LICENSE ├── README.md ├── coveralls.json ├── docs.livemd ├── lib/ │ └── elasticlunr/ │ ├── application.ex │ ├── core/ │ │ ├── document_store.ex │ │ ├── field.ex │ │ ├── index.ex │ │ └── token.ex │ ├── db.ex │ ├── deserializer.ex │ ├── dsl/ │ │ ├── query/ │ │ │ ├── bool_query.ex │ │ │ ├── match_all_query.ex │ │ │ ├── match_query.ex │ │ │ ├── not_query.ex │ │ │ └── terms_query.ex │ │ ├── query.ex │ │ └── query_repository.ex │ ├── manager/ │ │ └── index_manager.ex │ ├── pipeline/ │ │ ├── stemmer.ex │ │ ├── stop_word_filter.ex │ │ └── trimmer.ex │ ├── pipeline.ex │ ├── protocol_implementations.ex │ ├── serializer.ex │ ├── storage/ │ │ ├── blackhole.ex │ │ ├── disk.ex │ │ └── provider.ex │ ├── storage.ex │ ├── tokenizer.ex │ ├── utils/ │ │ └── process.ex │ └── utlis.ex ├── mix.exs └── test/ ├── core/ │ ├── document_store_test.exs │ ├── field_test.exs │ └── index_test.exs ├── deserializer_test.exs ├── dsl_test.exs ├── manager/ │ └── index_manager_test.exs ├── pipeline/ │ ├── stemmer_test.exs │ ├── stop_word_filter_test.exs │ └── trimmer_test.exs ├── pipeline_test.exs ├── serializer_test.exs ├── storage/ │ └── disk_test.exs ├── storage_test.exs ├── support/ │ └── fixture/ │ ├── fixture.ex │ ├── stemmer_fixture.json │ ├── users.data │ └── users.index ├── test_helper.exs └── tokenizer_test.exs ================================================ FILE CONTENTS ================================================ ================================================ FILE: .credo.exs ================================================ # This file contains the configuration for Credo and you are probably reading # this after creating it with `mix credo.gen.config`. # # If you find anything wrong or unclear in this file, please report an # issue on GitHub: https://github.com/rrrene/credo/issues # %{ # # You can have as many configs as you like in the `configs:` field. configs: [ %{ # # Run any config using `mix credo -C `. If no config name is given # "default" is used. # name: "default", # # These are the files included in the analysis: files: %{ # # You can give explicit globs or simply directories. # In the latter case `**/*.{ex,exs}` will be used. # included: [ "lib/", "src/", "test/" ], excluded: [~r"/_build/", ~r"/deps/", ~r"/node_modules/"] }, # # Load and configure plugins here: # plugins: [], # # If you create your own checks, you must specify the source files for # them here, so they can be loaded by Credo before running the analysis. # requires: [], # # If you want to enforce a style guide and need a more traditional linting # experience, you can change `strict` to `true` below: # strict: false, # # To modify the timeout for parsing files, change this value: # parse_timeout: 5000, # # If you want to use uncolored output by default, you can change `color` # to `false` below: # color: true, # # You can customize the parameters of any check by adding a second element # to the tuple. # # To disable a check put `false` as second element: # # {Credo.Check.Design.DuplicatedCode, false} # checks: [ # ## Consistency Checks # {Credo.Check.Consistency.ExceptionNames, []}, {Credo.Check.Consistency.LineEndings, []}, {Credo.Check.Consistency.ParameterPatternMatching, []}, {Credo.Check.Consistency.SpaceAroundOperators, []}, {Credo.Check.Consistency.SpaceInParentheses, []}, {Credo.Check.Consistency.TabsOrSpaces, []}, # ## Design Checks # # You can customize the priority of any check # Priority values are: `low, normal, high, higher` # {Credo.Check.Design.AliasUsage, [priority: :low, if_nested_deeper_than: 2, if_called_more_often_than: 0]}, # You can also customize the exit_status of each check. # If you don't want TODO comments to cause `mix credo` to fail, just # set this value to 0 (zero). # {Credo.Check.Design.TagTODO, [exit_status: 2]}, {Credo.Check.Design.TagFIXME, []}, # ## Readability Checks # {Credo.Check.Readability.AliasOrder, []}, {Credo.Check.Readability.FunctionNames, []}, {Credo.Check.Readability.LargeNumbers, []}, {Credo.Check.Readability.MaxLineLength, [priority: :low, max_length: 120]}, {Credo.Check.Readability.ModuleAttributeNames, []}, {Credo.Check.Readability.ModuleDoc, false}, {Credo.Check.Readability.ModuleNames, []}, {Credo.Check.Readability.ParenthesesInCondition, []}, {Credo.Check.Readability.ParenthesesOnZeroArityDefs, []}, {Credo.Check.Readability.PredicateFunctionNames, []}, {Credo.Check.Readability.PreferImplicitTry, []}, {Credo.Check.Readability.RedundantBlankLines, []}, {Credo.Check.Readability.Semicolons, []}, {Credo.Check.Readability.SpaceAfterCommas, []}, {Credo.Check.Readability.StringSigils, []}, {Credo.Check.Readability.TrailingBlankLine, []}, {Credo.Check.Readability.TrailingWhiteSpace, []}, {Credo.Check.Readability.UnnecessaryAliasExpansion, []}, {Credo.Check.Readability.VariableNames, []}, # ## Refactoring Opportunities # {Credo.Check.Refactor.CondStatements, []}, {Credo.Check.Refactor.CyclomaticComplexity, []}, {Credo.Check.Refactor.FunctionArity, []}, {Credo.Check.Refactor.LongQuoteBlocks, []}, # {Credo.Check.Refactor.MapInto, []}, {Credo.Check.Refactor.MatchInCondition, []}, {Credo.Check.Refactor.NegatedConditionsInUnless, []}, {Credo.Check.Refactor.NegatedConditionsWithElse, []}, {Credo.Check.Refactor.Nesting, []}, {Credo.Check.Refactor.UnlessWithElse, []}, {Credo.Check.Refactor.WithClauses, []}, # ## Warnings # {Credo.Check.Warning.ApplicationConfigInModuleAttribute, []}, {Credo.Check.Warning.BoolOperationOnSameValues, []}, {Credo.Check.Warning.ExpensiveEmptyEnumCheck, []}, {Credo.Check.Warning.IExPry, []}, {Credo.Check.Warning.IoInspect, []}, # {Credo.Check.Warning.LazyLogging, []}, {Credo.Check.Warning.MixEnv, false}, {Credo.Check.Warning.OperationOnSameValues, []}, {Credo.Check.Warning.OperationWithConstantResult, []}, {Credo.Check.Warning.RaiseInsideRescue, []}, {Credo.Check.Warning.UnusedEnumOperation, []}, {Credo.Check.Warning.UnusedFileOperation, []}, {Credo.Check.Warning.UnusedKeywordOperation, []}, {Credo.Check.Warning.UnusedListOperation, []}, {Credo.Check.Warning.UnusedPathOperation, []}, {Credo.Check.Warning.UnusedRegexOperation, []}, {Credo.Check.Warning.UnusedStringOperation, []}, {Credo.Check.Warning.UnusedTupleOperation, []}, {Credo.Check.Warning.UnsafeExec, []}, # # Checks scheduled for next check update (opt-in for now, just replace `false` with `[]`) # # Controversial and experimental checks (opt-in, just replace `false` with `[]`) # {Credo.Check.Consistency.MultiAliasImportRequireUse, false}, {Credo.Check.Consistency.UnusedVariableNames, false}, {Credo.Check.Design.DuplicatedCode, false}, {Credo.Check.Readability.AliasAs, false}, {Credo.Check.Readability.BlockPipe, false}, {Credo.Check.Readability.ImplTrue, false}, {Credo.Check.Readability.MultiAlias, false}, {Credo.Check.Readability.SeparateAliasRequire, false}, {Credo.Check.Readability.SinglePipe, false}, {Credo.Check.Readability.Specs, false}, {Credo.Check.Readability.StrictModuleLayout, false}, {Credo.Check.Readability.WithCustomTaggedTuple, false}, {Credo.Check.Refactor.ABCSize, false}, {Credo.Check.Refactor.AppendSingleItem, false}, {Credo.Check.Refactor.DoubleBooleanNegation, false}, {Credo.Check.Refactor.ModuleDependencies, false}, {Credo.Check.Refactor.NegatedIsNil, false}, {Credo.Check.Refactor.PipeChainStart, false}, {Credo.Check.Refactor.VariableRebinding, false}, {Credo.Check.Warning.LeakyEnvironment, false}, {Credo.Check.Warning.MapGetUnsafePass, false}, {Credo.Check.Warning.UnsafeToAtom, false} # # Custom checks can be created using `mix credo.gen.check`. # ] } ] } ================================================ FILE: .formatter.exs ================================================ # Used by "mix format" [ inputs: ["{mix,.formatter}.exs", "{config,lib,test}/**/*.{ex,exs}"] ] ================================================ FILE: .github/FUNDING.yml ================================================ # These are supported funding model platforms github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] patreon: atandarash open_collective: # Replace with a single Open Collective username ko_fi: # Replace with a single Ko-fi username tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry liberapay: # Replace with a single Liberapay username issuehunt: # Replace with a single IssueHunt username otechie: # Replace with a single Otechie username lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] ================================================ FILE: .github/pull_request_template.md ================================================ ## Overview _Write a short description of what your PR does_ ## Related Issues _List all related issues. Add linking prefixes (closes, fixes, resolves, etc.)_ ## TODO _Write down what steps need to be done for this PR._ - [ ] Update PR to include task link - [ ] Add error handling - [ ] Add Loom video demo - [ ] GitHub Actions are all passing - [ ] Ensure your PR has been reviewed and you have also implemented all feedback requested - [ ] Update PR label to the right stage ## Testing ### How to test: _Write down steps needed, if any, to test your PR locally in case the preview links do not work_ 1. Create an index 2. Add these documents ... ### What to test: _Write down a checklist for others to copy and tick when testing your PR_ - [ ] Searching for "me" or other variations works ================================================ FILE: .github/workflows/dialyzer.yml ================================================ name: Dialyzer on: push jobs: analyze: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - uses: erlef/setup-beam@v1 id: beam with: otp-version: "24" elixir-version: "1.12" - name: Restore Cached Dependencies uses: actions/cache@v2 id: mix-cache with: path: | deps _build key: ${{ runner.os }}-${{ steps.beam.outputs.elixir-version }}-${{ steps.beam.outputs.otp-version }}-${{ hashFiles('mix.lock') }} - name: Restore PLT cache uses: actions/cache@v2 id: plt-cache with: key: | ${{ runner.os }}-${{ steps.beam.outputs.elixir-version }}-${{ steps.beam.outputs.otp-version }}-plt restore-keys: | ${{ runner.os }}-${{ steps.beam.outputs.elixir-version }}-${{ steps.beam.outputs.otp-version }}-plt path: | priv/plts - name: Install Dependencies if: steps.mix-cache.outputs.cache-hit != 'true' run: | mix deps.get - name: Create PLTs if: steps.plt-cache.outputs.cache-hit != 'true' run: mix dialyzer --plt - name: Run dialyzer run: mix dialyzer ================================================ FILE: .github/workflows/test.yml ================================================ name: Test on: push env: MIX_ENV: test GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} jobs: test: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - uses: erlef/setup-beam@v1 id: beam with: otp-version: "24" elixir-version: "1.12" - name: Restore Cached Dependencies uses: actions/cache@v2 id: mix-cache with: path: | deps _build key: ${{ runner.os }}-${{ steps.beam.outputs.elixir-version }}-${{ steps.beam.outputs.otp-version }}-${{ hashFiles('mix.lock') }} - name: Install Dependencies if: steps.mix-cache.outputs.cache-hit != 'true' run: | mix deps.get - name: Check Codebase Standard run: | mix format --check-formatted mix credo - name: Run Tests run: | mix coveralls.json - name: Upload Coverage Reports uses: codecov/codecov-action@v2 with: directory: ./cover fail_ci_if_error: true ================================================ FILE: .gitignore ================================================ # The directory Mix will write compiled artifacts to. /_build/ # If you run "mix test --cover", coverage assets end up here. /cover/ # The directory Mix downloads your dependencies sources to. /deps/ # Where third-party dependencies like ExDoc output generated docs. /doc/ # Ignore .fetch files in case you like to edit your project deps locally. /.fetch # If the VM crashes, it generates a dump, let's ignore it too. erl_crash.dump # Also ignore archive artifacts (built via "mix archive.build"). *.ez # Ignore package tarball (built via "mix hex.build"). elasticlunr-*.tar # Temporary files for e.g. tests /tmp # Generated files for Dialyxir /priv/plts/*.plt /priv/plts/*.plt.hash ================================================ FILE: .vscode/extensions.json ================================================ { "recommendations": [ "pantajoe.vscode-elixir-credo", "jakebecker.elixir-ls", "pgourlain.erlang" ] } ================================================ FILE: BACKERS.md ================================================

Sponsors & Backers

Elasticlunr is an MIT-licensed open source project. It's an independent project with its ongoing development made possible entirely thanks to the support by these awesome [backers](https://github.com/heywhy/ex_elasticlunr/blob/dev/BACKERS.md). If you'd like to join them, please consider: - [Become a backer or sponsor on Patreon](https://www.patreon.com/atandarash). ================================================ FILE: LICENSE ================================================ MIT License Copyright (c) 2021 Atanda Rasheed Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ # Elasticlunr [![Dialyzer](https://github.com/heywhy/ex_elasticlunr/actions/workflows/dialyzer.yml/badge.svg?branch=master)](https://github.com/heywhy/ex_elasticlunr/actions) [![Test](https://github.com/heywhy/ex_elasticlunr/actions/workflows/test.yml/badge.svg?branch=master)](https://github.com/heywhy/ex_elasticlunr/actions) [![Codecov](https://codecov.io/gh/heywhy/ex_elasticlunr/branch/master/graph/badge.svg?token=ZDA9GUTAFJ)](https://codecov.io/gh/heywhy/ex_elasticlunr) Elasticlunr is a small, full-text search library for use in the Elixir environment. It indexes JSON documents and provides a friendly search interface to retrieve documents. ## Why The library is built for web applications that do not require the deployment complexities of popular search engines while taking advantage of the Beam capabilities. Imagine how much is gained when the search functionality of your application resides in the same environment (Beam VM) as your business logic; search resolves faster, the number of services (Elasticsearch, Solr, and so on) to monitor reduces. ## Installation The library can be installed by adding `elasticlunr` to your list of dependencies in mix.exs: ```elixir def deps do [ {:elasticlunr, "~> 0.6"} ] end ``` Documentation can be found at [hexdocs.pm](https://hexdocs.pm/elasticlunr). See blog post [Introduction to Elasticlunr](https://atandarash.me/blog/introduction-to-elasticlunr) and [Livebook](#livebook) for examples. ## Features 1. Query-Time Boosting, you don't need to set up boosting weight in the index building procedure, Query-Time Boosting makes it more flexible so you could try different boosting schemes 2. More Rational Scoring Mechanism, Elasticlunr uses a similar scoring mechanism as Elasticsearch, and also this scoring mechanism is used by Lucene 3. Field-Search, you can choose which field to index and which field to search 4. Boolean Model, you can set which field to search and the boolean model for each query token, such as "OR" and "AND" 5. Combined Boolean Model, TF/IDF Model, and the Vector Space Model make the results ranking more reliable. ## Token Expansion Sometimes users want to expand a query token to increase RECALL. For example, user query token is "micro", and assume "microwave" and "microscope" are in the index, if the user chooses to expand the query token "micro" to increase RECALL, both "microwave" and "microscope" will be returned and search in the index. The query results from expanded tokens are penalized because they are not the same as the query token. ## Livebook The repository includes a livebook file that you can run. You can click the button below to run it using [livebook.dev](https://livebook.dev)! [![Run in Livebook](https://livebook.dev/badge/v1/blue.svg)](https://livebook.dev/run?url=https%3A%2F%2Fgithub.com%2Fheywhy%2Fex_elasticlunr%2Fblob%2Fmaster%2Fdocs.livemd) ## Storage Elasticlunr allows you to write your indexes to whatever storage provider you want. You don't need to acess the `Elasticlunr.Storage` module directly, it is used by the `Elasticlunr.IndexManager`. See available providers below: * [Blackhole](https://github.com/heywhy/ex_elasticlunr/blob/master/lib/elasticlunr/storage/blackhole.ex) * [Disk](https://github.com/heywhy/ex_elasticlunr/blob/master/lib/elasticlunr/storage/disk.ex) * [S3](https://github.com/heywhy/ex_elasticlunr_s3) To configure what provider to use: ```elixir config :elasticlunr, storage: Elasticlunr.Storage.S3 ``` Note that all indexes in storage are preloaded on application startup. To see the available provider configuration, you should reference it module. ## License Elasticlunr is released under the MIT License - see the [LICENSE](https://github.com/heywhy/ex_elasticlunr/blob/master/LICENSE) file. ================================================ FILE: coveralls.json ================================================ { "coverage_options": { "treat_no_relevant_lines_as_covered": true, "minimum_coverage": 90 } } ================================================ FILE: docs.livemd ================================================ # Elasticlunr ## Description Elasticlunr is a small, full-text search library for use in the Elixir environment. It indexes JSON documents and provides a friendly search interface to retrieve documents. The library is built for web applications that do not require the deployment complexities of popular search engines while taking advantage of the Beam capabilities. Imagine how much is gained when the search functionality of your application resides in the same environment (Beam VM) as your business logic; search resolves faster, the number of services (Elasticsearch, Solr, and so on) to monitor reduces. ## Getting Started ```elixir Mix.install([ {:kino, "~> 0.4"}, {:elasticlunr, "~> 0.6"} ]) ``` ## What's an Index? An index is a collection of structured data that is referred to when looking for results that are relevant to a specific query. In RDBMS, a table can be likened to an index, meaning that you can store, update, delete and search documents in an index. But the difference here is that an index has a pipeline that every JSON document passes through before it becomes searchable. ```elixir alias Elasticlunr.{Index, Pipeline} # the library comes with a default set of pipeline functions pipeline = Pipeline.new(Pipeline.default_runners()) index = Index.new(pipeline: pipeline) ``` The above code block creates a new index with a pipeline of default functions that work with the English language. The new index does not define the expected structure of the JSON documents to be indexed. To fix this, let's assume we are building an index of blog posts, and each post consists of the `author`, `content`, `category`, and `title` attributes. ```elixir index = index |> Index.add_field("title") |> Index.add_field("author") |> Index.add_field("content") |> Index.add_field("category") ``` ## Indexing Documents Following our example or use-case above, to make the blog posts searchable we need to add them to the index so that they can be analyzed and transformed appropriately. ```elixir documents = [ %{ "id" => 1, "author" => "Mark Ericksen", "title" => "Saving and Restoring LiveView State using the Browser", "category" => "elixir liveview browser", "content" => "There are multiple ways to save and restore state for your LiveView processes. You can use an external cache like Redis, your database, or even the browser itself. Sometimes there are situations where you either can’t or don’t want to store the state on the server. In situations like that, you do have the option of storing the state in the user’s browser. This post explains how you use the browser to store state and how your LiveView process can get it back later. We’ll go through the code so you can add something similar to your own project. We cover what data to store, how to do it securely, and restoring the state on demand." }, %{ "id" => 2, "author" => "Mika Kalathil", "title" => "Creating Reusable Ecto Code", "category" => "elixir ecto sql", "content" => "Creating a highly reusable Ecto API is one of the ways we can create long-term sustainable code for ourselves, while growing it with our application to allow for infinite combination possibilites and high code reusability. If we write our Ecto code correctly, we can not only have a very well defined split between query definition and combination/execution using our context but also have the ability to re-use the queries we design individually, together with others to create larger complex queries." }, %{ "id" => 3, "author" => "Mark Ericksen", "title" => "ThinkingElixir 079: Collaborative Music in LiveView with Nathan Willson", "category" => "elixir podcast liveview", "content" => "In episode 79 of Thinking Elixir, we talk with Nathan Willson about GEMS, his collaborative music generator written in LiveView. He explains how it’s built, the JS sound library integrations, what could be done by Phoenix and what is done in the browser. Nathan shares how he deployed it globally to 10 regions using Fly.io. We go over some of the challenges he overcame creating an audio focused web application. It’s a fun open-source project that pushes the boundaries of what we think LiveView apps can do!" }, %{ "id" => 4, "title" => "ThinkingElixir 078: Logflare with Chase Granberry", "author" => "Mark Ericksen", "category" => "elixir podcast logging logflare", "content" => "In episode 78 of Thinking Elixir, we talk with Chase Granberry about Logflare. We learn why Chase started the company, what Logflare does, how it’s built on Elixir, about their custom Elixir logger, where the data is stored, how it’s queried, and more! We talk about dealing with the constant stream of log data, how Logflare is collecting and displaying metrics, and talk more about Supabase acquiring the company!" } ] index = Index.add_documents(index, documents) ``` ## Search Index The search results is a list of maps and each map contains specific keys, `matched`, `positions`, `ref`, and `score`. See the definitions below: * **matched:** this field tells the number of attributes where the given query matches * **score:** the value shows how well the document ranks compared to other documents * **ref:** this is the document id * **positions:** this is a map that shows the positions of the matching words in the document ```elixir search_query = Kino.Input.text("Search", default: "elixir") ``` ```elixir search_query = Kino.Input.read(search_query) results = Index.search(index, search_query) ``` **NB:** Don't forget to fiddle with the search input. ## Nested Document Attributes As seen in the earlier example all documents indexed were without nested attributes. But Imagine a situation where your data source returns documents with nested attributes, and you want to search by these attributes - it's possible with Elasticlunr by specifying the top-level attribute. Let's say our data source returns a list of users with their address which is an object and you want to index this information so that you can query them. ```elixir # the library comes with a default set of pipeline functions pipeline = Pipeline.new(Pipeline.default_runners()) users_index = Index.new(pipeline: pipeline) |> Index.add_field("name") |> Index.add_field("address") |> Index.add_field("education") ``` Automatically, Elasticlunr will flatten the nested attributes to the level that when using the advanced query DSL you can use dot notation to filter the search results. Now, let's add a few user objects to the index: ```elixir documents = [ %{ "id" => 1, "name" => "rose mary", "education" => "BSc.", "address" => %{ "line1" => "Brooklyn Street", "line2" => "4181", "city" => "Portland", "state" => "Oregon", "country" => "USA" } }, %{ "id" => 2, "name" => "jason richard", "education" => "Msc.", "address" => %{ "line1" => "Crown Street", "line2" => "2057", "city" => "St Malo", "state" => "Quebec", "country" => "CA" } }, %{ "id" => 3, "name" => "peters book", "education" => "BSc.", "address" => %{ "line1" => "Murry Street", "line2" => "2285", "city" => "Norfolk", "state" => "Virginia", "country" => "USA" } }, %{ "id" => 4, "name" => "jason mount", "education" => "Highschool", "address" => %{ "line1" => "Aspen Court", "line2" => "2057", "city" => "Boston", "state" => "Massachusetts", "country" => "USA" } } ] users_index = Index.add_documents(users_index, documents) ``` ```elixir search_query = Kino.Input.text("Search users", default: "jason murry") ``` ```elixir search_query = Kino.Input.read(search_query) Index.search(users_index, search_query) ``` ## Index Manager The manager includes different CRUD functions to help you manage your index after mutating the state. First of all, let's get indexes to manage by the manager: ```elixir alias Elasticlunr.IndexManager IndexManager.loaded_indices() ``` As seen above the list is empty. Now let's add an index: ```elixir IndexManager.save(users_index) IndexManager.loaded_indices() |> Enum.any?(&(&1 == users_index.name)) |> IO.inspect(label: :users_index_exists) IndexManager.loaded_indices() ``` The manager now has the `users_index` in memory for access. ## Query DSL Like every other search engine, you can make more advanced search queries depending on your requirements, and I'm pleased to tell you that Elasticlunr has not left out such capabilities. So, in the proceeding parts of this docs, I will be highlighting the available query types provided by the library and how you can use them. It's important to note that Elasticlunr tries to replicate popular Query DSL (Domain Specific Language) with the same behavior as Elasticsearch, which means the learning curve reduces if you have experience using the search engine. For Elasticlunr, there are the `bool`, `match`, `match_all`, `not`, and `terms` query types you can use to retrieve insights about an index. ## Bool The `bool` query is used with a combination of queries to retrieve documents matching the boolean combinations of clauses. Consider these clauses to be everything that comes after the `SELECT` statement in relational databases. The `bool` query is built using one or more clauses to achieve desired results, and each clause has its type, see below: Clause | Description ---|--- `must` | The clause must appear in the matching documents, and this affects the document's score. `must_not` | The clause must not appear in the matching document. Scoring is ignored because the clause is executed in the filter context. `filter` | Like `must`, the clause must appear in the matching documents but scoring is ignored for the query. `should` | The clause should appear in the matching document. It's important to note that only scores from the `must` and `should` clauses contribute to the final score of the matching document. ```elixir Index.search(index, %{ "query" => %{ "bool" => %{ "must" => %{ "terms" => %{"content" => "use"} }, "should" => %{ "terms" => %{"category" => "elixir"} }, "filter" => %{ "match" => %{ "id" => 3 } }, "must_not" => %{ "match" => %{ "author" => "mika" } }, "minimum_should_match" => 1 } } }) ``` You can use the minimum_should_match parameter to specify the number or percentage of should clauses returned documents must match. If the bool query includes at least one should clause and no must or filter clauses, the default value is 1. Otherwise, the default value is 0. ## Match The `match` query is the standard query used for full-text search, including support for fuzzy matching. The provided text is analyzed before matching it against documents. ```elixir Index.search(index, %{ "query" => %{ "match" => %{ "content" => %{ "query" => "liveview browser" } } } }) ``` A `match` query accepts one or more top-level fields you wish to search, in the example above, it's the `content` field. Note that when you have more than one top-level fields, the `match` query is rewritten to a `bool` query internally by the library. Now, let's see what parameters are accepted by the `match` query below: Parameter | Description ---|--- `query` | String you wish to find in the provided field. `expand` | Increase token recall, see [token expansion](https://github.com/heywhy/ex_elasticlunr#token-expansion). `fuzziness` | Maximum edit distance allowed for matching. `boost` | Floating point number used to decrease or increase the relevance scores of a query. Defaults to 1.0. `operator` | The boolean operator used to interpret the `query` value. Available values for the `operator` option are `or` and `and`. Defaults to `or`. `minimum_should_match` | Minimum number of clauses that a document must match for it to be returned. ## Match All The most simple query, which matches all documents, gives them a score of 1.0 each. Parameter | Description ---|--- `boost` | Floating point number used to decrease or increase the relevance scores of a query. Defaults to 1.0. ```elixir Index.search(index, %{ "query" => %{ "match_all" => %{} } }) ``` ## Not The `not` query inverts the result of the nested query giving the matched documents a score of 1.0 each. ```elixir Index.search(index, %{ "query" => %{ "not" => %{ "match" => %{ "content" => %{ "query" => "elixir" } } } } }) ``` ## Terms The query return documents that contain the exact terms in a given field. The `terms` query should be used to find documents based on a precise value such as a price, a product ID, or a username. ```elixir Index.search(index, %{ "query" => %{ "terms" => %{ "content" => %{ "value" => "think" } } } }) ``` A `terms` query accepts one or more top-level fields you wish to search, in the example above, it's the `content` field. Note that when you have more than one top-level fields, the `terms` query is rewritten to a `bool` query internally by the library. Now, let's see what parameters are accepted by the `terms` query below: Parameter | Description ---|--- `value` | A term you wish to find in the provided field. The term must match exactly the field value to return a document. `boost` | Floating point number used to decrease or increase the relevance scores of a query. Defaults to 1.0. ================================================ FILE: lib/elasticlunr/application.ex ================================================ defmodule Elasticlunr.Application do # See https://hexdocs.pm/elixir/Application.html # for more information on OTP Applications @moduledoc false use Application alias Elasticlunr.IndexManager @impl true def start(_type, _args) do children = [ {Registry, name: Elasticlunr.IndexRegistry, keys: :unique}, {DynamicSupervisor, name: Elasticlunr.IndexSupervisor, strategy: :one_for_one} # Starts a worker by calling: Elasticlunr.Worker.start_link(arg) # {Elasticlunr.Worker, arg} ] # See https://hexdocs.pm/elixir/Supervisor.html # for other strategies and supported options opts = [strategy: :one_for_one, name: Elasticlunr.Supervisor] case Supervisor.start_link(children, opts) do {:ok, _} = result -> :ok = IndexManager.preload() result err -> err end end end ================================================ FILE: lib/elasticlunr/core/document_store.ex ================================================ defmodule Elasticlunr.DocumentStore do alias Elasticlunr.{Field, Index} defstruct save: true, documents: %{}, document_info: %{}, length: 0 @type t :: %__MODULE__{ save: boolean(), documents: map(), document_info: map(), length: pos_integer() } @spec new(boolean()) :: t() def new(save \\ true) do struct!(%__MODULE__{}, %{save: save}) end @spec add(t(), Field.document_ref(), map()) :: t() def add(%__MODULE__{documents: documents, length: length, save: save} = store, ref, document) do length = case exists?(store, ref) do true -> length false -> length + 1 end documents = case save do true -> Map.put(documents, ref, document) false -> Map.put(documents, ref, nil) end %{store | length: length, documents: documents} end @spec get(t(), Field.document_ref()) :: map() | nil def get(%__MODULE__{documents: documents}, ref), do: Map.get(documents, ref) @spec remove(t(), Field.document_ref()) :: map() | nil def remove( %__MODULE__{document_info: document_info, documents: documents, length: length} = store, ref ) do case exists?(store, ref) do true -> length = length - 1 documents = Map.delete(documents, ref) document_info = Map.delete(document_info, ref) %{store | document_info: document_info, documents: documents, length: length} false -> store end end @spec exists?(t(), Field.document_ref()) :: boolean() def exists?(%__MODULE__{documents: documents}, ref), do: Map.has_key?(documents, ref) @spec add_field_length(t(), Field.document_ref(), Index.document_field(), pos_integer()) :: t() def add_field_length(%__MODULE__{document_info: document_info} = store, ref, field, length) do case exists?(store, ref) do false -> store true -> info = document_info |> Map.get(ref, %{}) |> Map.put(field, length) document_info = Map.put(document_info, ref, info) %{store | document_info: document_info} end end @spec update_field_length(t(), Field.document_ref(), Index.document_field(), pos_integer()) :: t() def update_field_length(%__MODULE__{} = store, ref, field, length), do: add_field_length(store, ref, field, length) @spec get_field_length(t(), Field.document_ref(), Index.document_field()) :: pos_integer() def get_field_length(%__MODULE__{document_info: document_info} = store, ref, field) do case exists?(store, ref) do false -> nil true -> document_info |> Map.get(ref, %{}) |> Map.get(field) end end @spec reset(t(), boolean()) :: t() def reset(%__MODULE__{}, save \\ true), do: new(save) end ================================================ FILE: lib/elasticlunr/core/field.ex ================================================ defmodule Elasticlunr.Field do alias Elasticlunr.{DB, Pipeline, Token, Utils} @fields ~w[db name pipeline query_pipeline store store_positions]a @enforce_keys @fields defstruct @fields @type flnorm :: integer() | float() @type t :: %__MODULE__{ db: DB.t(), name: String.t(), pipeline: Pipeline.t() | nil, query_pipeline: Pipeline.t() | nil, store: boolean(), store_positions: boolean() } @type document_ref :: atom() | binary() @type document :: %{id: document_ref(), content: binary()} @type token_info :: %{ term: term, tf: map(), idf: map(), flnorm: flnorm(), documents: map() } @spec new(keyword) :: t() def new(opts) do attrs = [ db: Keyword.get(opts, :db), name: Keyword.get(opts, :name), pipeline: Keyword.get(opts, :pipeline), store: Keyword.get(opts, :store_documents, false), query_pipeline: Keyword.get(opts, :query_pipeline), store_positions: Keyword.get(opts, :store_positions, false) ] struct!(__MODULE__, attrs) end @spec documents(t()) :: list(document_ref()) def documents(%__MODULE__{db: db, name: name}) do case DB.match_object(db, {{:field_ids, name, :_}}) do [] -> [] ids -> Stream.map(ids, fn {{:field_ids, _, id}} -> id end) end end @spec term_frequency(t(), binary()) :: map() def term_frequency(%__MODULE__{} = field, term) do tf_lookup(field, term) end @spec has_token(t(), binary()) :: boolean() def has_token(%__MODULE__{} = field, term) do DB.member?(field.db, {:field_idf, field.name, term}) end @spec get_token(t(), binary()) :: token_info() | nil def get_token(%__MODULE__{} = field, term) do case idf_lookup(field, term) do nil -> nil _ -> flnorm = flnorm_lookup(field) to_field_token(field, term, flnorm) end end @spec set_query_pipeline(t(), module()) :: t() def set_query_pipeline(%__MODULE__{} = field, pipeline) do %{field | query_pipeline: pipeline} end @spec add(t(), list(document())) :: t() def add(%__MODULE__{pipeline: pipeline} = field, documents) do Enum.each(documents, fn %{id: id, content: content} -> unless DB.member?(field.db, {:field_ids, field.name, id}) do tokens = Pipeline.run(pipeline, content) add_id(field, id) update_field_stats(field, id, tokens) end end) recalculate_idf(field) end @spec length(t(), atom()) :: pos_integer() def length(%__MODULE__{db: db, name: name}, :ids) do fun = [{{{:field_ids, name, :_}}, [], [true]}] DB.select_count(db, fun) end @spec length(t(), atom(), String.t()) :: pos_integer() def length(%__MODULE__{db: db, name: name}, :term, term) do fun = [ {{{:field_term, name, term, :_}, :_}, [], [true]} ] DB.select_count(db, fun) end def length(%__MODULE__{db: db, name: name}, :tf, term) do fun = [ {{{:field_tf, name, term, :_}, :_}, [], [true]} ] DB.select_count(db, fun) end def length(%__MODULE__{db: db, name: name}, :idf, term) do fun = [ {{{:field_idf, name, term}, :_}, [], [true]} ] DB.select_count(db, fun) end @spec update(t(), list(document())) :: t() def update(%__MODULE__{} = field, documents) do document_ids = Enum.map(documents, & &1.id) field |> remove(document_ids) |> add(documents) end @spec remove(t(), list(document_ref())) :: t() def remove(%__MODULE__{db: db, name: name} = field, document_ids) do Enum.each(document_ids, fn id -> true = DB.match_delete(db, {{:field_term, name, :_, id}, :_}) true = DB.match_delete(db, {{:field_tf, name, :_, id}, :_}) true = DB.match_delete(db, {{:field_idf, name, :_}, :_}) true = DB.delete(db, {:field_ids, name, id}) end) recalculate_idf(field) end @spec analyze(t(), any(), keyword) :: list(Token.t()) def analyze(%__MODULE__{pipeline: pipeline, query_pipeline: query_pipeline}, content, options) do case Keyword.get(options, :is_query, false) && not is_nil(query_pipeline) do true -> Pipeline.run(query_pipeline, content) false -> Pipeline.run(pipeline, content) end end @spec terms(t(), keyword()) :: any() def terms(%__MODULE__{} = field, query) do fuzz = Keyword.get(query, :fuzziness, 0) msm = Keyword.get(query, :minimum_should_match, 1) terms = terms_lookup(field) matching_docs = Stream.map(query[:terms], fn %Regex{} = re -> re val -> to_token(val) end) |> Enum.reduce(%{}, fn %Regex{} = re, matching_docs -> matched_terms = Stream.filter(terms, &Regex.match?(re, elem(&1, 0))) Enum.reduce(matched_terms, matching_docs, fn {term, _, _}, matching_docs -> ids = matching_ids(field, term) filter_ids(field, ids, term, matching_docs, query) end) %Token{token: term}, matching_docs -> matching_docs = case fuzz == 0 && length(field, :term, term) > 0 do true -> ids = matching_ids(field, term) filter_ids(field, ids, term, matching_docs, query) false -> matching_docs end match_with_fuzz(field, term, fuzz, query, matching_docs) end) if msm <= 1 do matching_docs else matching_docs |> Stream.filter(fn {_key, content} -> Enum.count(content) >= msm end) |> Enum.into(%{}) end end @spec tokens(Elasticlunr.Field.t()) :: Enumerable.t() def tokens(%__MODULE__{} = field) do flnorm = flnorm_lookup(field) unique_terms_lookup(field) |> Stream.map(fn {term, _, _} -> to_field_token(field, term, flnorm) end) end defp update_field_stats(%{db: db, name: name} = field, id, tokens) do Enum.each(tokens, fn token -> %Token{token: term} = token term_attrs = term_lookup(field, term, id) term_attrs = case Token.get_position(token) do nil -> term_attrs position -> %{term_attrs | positions: term_attrs.positions ++ [position]} end term_attrs = %{term_attrs | total: term_attrs.total + 1} true = DB.insert(db, {{:field_term, name, term, id}, term_attrs}) true = DB.insert(db, {{:field_tf, name, term, id}, :math.sqrt(term_attrs.total)}) end) end defp add_id(%{db: db, name: name}, id) do true = DB.insert(db, {{:field_ids, name, id}}) end defp matched_documents_for_term(%{db: db, name: name}, term) do db |> DB.match_object({{:field_term, name, term, :_}, :_}) |> Stream.map(fn {{:field_term, _, _, id}, _} -> id end) end defp term_lookup(%{db: db, name: name}, term, id) do case DB.match_object(db, {{:field_term, name, term, id}, :_}) do [] -> %{total: 0, positions: []} [{_, attrs}] -> attrs end end defp terms_lookup(%{db: db, name: name}) do db |> DB.match_object({{:field_term, name, :_, :_}, :_}) |> Stream.map(&termify/1) end defp terms_lookup(%{db: db, name: name}, term) do db |> DB.match_object({{:field_term, name, term, :_}, :_}) |> Stream.map(&termify/1) end defp termify({{:field_term, _, term, id}, attrs}), do: {term, id, attrs} defp tf_lookup(%{db: db, name: name}, term) do case DB.match_object(db, {{:field_tf, name, term, :_}, :_}) do [] -> nil terms -> terms |> Stream.map(fn {{:field_tf, _, _, id}, count} -> {id, count} end) end end defp tf_lookup(%{db: db, name: name}, term, id) do case DB.match_object(db, {{:field_tf, name, term, id}, :_}) do [] -> nil [{{:field_tf, _, _, id}, count}] -> {id, count} end end defp idf_lookup(%{db: db, name: name}, term) do case DB.match_object(db, {{:field_idf, name, term}, :_}) do [] -> nil [{{:field_idf, _, _}, value}] -> value end end defp flnorm_lookup(%{db: db, name: name}) do case DB.lookup(db, {:field_flnorm, name}) do [] -> 1 [{{:field_flnorm, _}, value}] -> value end end defp unique_terms_lookup(field) do terms_lookup(field) |> Stream.uniq_by(&elem(&1, 0)) end defp recalculate_idf(field) do terms = unique_terms_lookup(field) terms_length = Enum.count(terms) ids_length = length(field, :ids) flnorm = case terms_length > 0 do true -> 1 / :math.sqrt(terms_length) false -> 0 end :ok = terms |> Task.async_stream(fn {term, _id, _attrs} -> count = length(field, :term, term) + 1 value = 1 + :math.log10(ids_length / count) true = DB.insert(field.db, {{:field_idf, field.name, term}, value}) end) |> Stream.run() true = DB.insert(field.db, {{:field_flnorm, field.name}, flnorm}) field end defp filter_ids(field, ids, term, matching_docs, query) do docs = Keyword.get(query, :docs) case docs do docs when is_list(docs) -> Stream.filter(ids, &(&1 in docs)) _ -> ids end |> get_matching_docs(field, term, matching_docs) end defp get_matching_docs(docs, field, term, matching_docs) do docs |> Enum.reduce(matching_docs, fn id, matching_docs -> matched = matching_docs |> Map.get(id, []) |> Kernel.++([extract_matched(field, term, id)]) Map.put(matching_docs, id, matched) end) end defp match_with_fuzz(field, term, fuzz, query, matching_docs) when fuzz > 0 do field |> unique_terms_lookup() |> Enum.reduce(matching_docs, fn {key, _id, _attr}, matching_docs -> if Utils.levenshtein_distance(key, term) <= fuzz do ids = matching_ids(field, term) filter_ids(field, ids, key, matching_docs, query) else matching_docs end end) end defp match_with_fuzz(_field, _term, _fuzz, _query, matching_docs), do: matching_docs defp matching_ids(field, term) do terms_lookup(field, term) |> Stream.map(&elem(&1, 1)) end defp get_content(_field, _id) do nil end defp extract_matched(field, term, id) do attrs = term_lookup(field, term, id) positions = Map.get(attrs, :positions) {^id, tf} = tf_lookup(field, term, id) %{ tf: tf, ref: id, positions: positions, norm: flnorm_lookup(field), idf: idf_lookup(field, term), content: get_content(field, id) } end defp to_token(%Token{} = token), do: token defp to_token(token), do: Token.new(token) defp to_field_token(field, term, flnorm) do %{ term: term, norm: flnorm, tf: length(field, :tf, term), idf: idf_lookup(field, term), documents: matched_documents_for_term(field, term) } end end ================================================ FILE: lib/elasticlunr/core/index.ex ================================================ defmodule Elasticlunr.Index.IdPipeline do @moduledoc false alias Elasticlunr.{Pipeline, Token} @behaviour Pipeline @impl true def call(%Token{} = token), do: token end defmodule Elasticlunr.Index do alias Elasticlunr.{DB, Field, Pipeline} alias Elasticlunr.Index.IdPipeline alias Elasticlunr.Dsl.{Query, QueryRepository} alias Uniq.UUID @fields ~w[db fields name ref pipeline documents_size store_positions store_documents]a @enforce_keys @fields defstruct @fields @type document_field :: atom() | binary() @type t :: %__MODULE__{ db: DB.t(), fields: map(), documents_size: integer(), ref: Field.document_ref(), pipeline: Pipeline.t(), name: atom() | binary(), store_positions: boolean(), store_documents: boolean() } @type search_query :: binary() | map() @type search_result :: any() @spec new(keyword()) :: t() def new(opts \\ []) do ref = Keyword.get(opts, :ref, "id") pipeline = Keyword.get_lazy(opts, :pipeline, &Pipeline.new/0) name = Keyword.get_lazy(opts, :name, &UUID.uuid4/0) db_name = String.to_atom("elasticlunr_#{name}") db = DB.init(db_name, ~w[ordered_set public]a) id_field = Field.new(db: db, name: ref, pipeline: Pipeline.new([IdPipeline])) fields = Map.put(%{}, to_string(ref), id_field) attrs = %{ db: db, documents_size: 0, ref: ref, fields: fields, pipeline: pipeline, name: name, store_documents: Keyword.get(opts, :store_documents, true), store_positions: Keyword.get(opts, :store_positions, true) } struct!(__MODULE__, attrs) end @spec add_field(t(), document_field(), keyword()) :: t() def add_field( %__MODULE__{ db: db, fields: fields, pipeline: pipeline, store_positions: store_positions, store_documents: store_documents } = index, field, opts \\ [] ) when is_binary(field) do opts = opts |> Keyword.put(:db, db) |> Keyword.put(:name, field) |> Keyword.put_new(:pipeline, pipeline) |> Keyword.put_new(:store_documents, store_documents) |> Keyword.put_new(:store_positions, store_positions) %{index | fields: Map.put(fields, field, Field.new(opts))} end @spec update_field(t(), document_field(), Field.t()) :: t() def update_field(%__MODULE__{fields: fields} = index, name, %Field{} = field) do if not Map.has_key?(fields, name) do raise "Unknown field #{name} in index" end update_documents_size(%{index | fields: Map.put(fields, name, field)}) end @spec get_fields(t()) :: list(Field.document_ref() | document_field()) def get_fields(%__MODULE__{fields: fields}), do: Map.keys(fields) @spec get_field(t(), document_field()) :: Field.t() def get_field(%__MODULE__{fields: fields}, field) do Map.get(fields, field) end @spec save_document(t(), boolean()) :: t() def save_document(%__MODULE__{fields: fields} = index, save) do fields = fields |> Enum.map(fn {key, field} -> {key, %{field | store: save}} end) |> Enum.into(%{}) %{index | fields: fields} end @spec add_documents(t(), list(map())) :: t() def add_documents(%__MODULE__{fields: fields, ref: ref} = index, documents) do :ok = persist(fields, ref, documents, &Field.add/2) update_documents_size(index) end @spec update_documents(t(), list(map())) :: t() def update_documents(%__MODULE__{ref: ref, fields: fields} = index, documents) do :ok = persist(fields, ref, documents, &Field.update/2) update_documents_size(index) end @spec remove_documents(t(), list(Field.document_ref())) :: t() def remove_documents(%__MODULE__{fields: fields} = index, document_ids) do Enum.each(fields, fn {_, field} -> Field.remove(field, document_ids) end) update_documents_size(index) end @spec analyze(t(), document_field(), any(), keyword()) :: Enumerable.t() def analyze(%__MODULE__{fields: fields}, field, content, options) do fields |> Map.get(field) |> Field.analyze(content, options) end @spec terms(t(), keyword()) :: Enumerable.t() def terms(%__MODULE__{fields: fields}, query) do field = Keyword.get(query, :field) fields |> Map.get(field) |> Field.terms(query) end @spec all(t()) :: list(Field.document_ref()) def all(%__MODULE__{ref: ref, fields: fields}) do fields |> Map.get(ref) |> Field.documents() end @spec update_documents_size(t()) :: t() def update_documents_size(%__MODULE__{fields: fields} = index) do size = Enum.reduce(fields, 0, fn {_, field}, acc -> size = Field.length(field, :ids) if size > acc do size else acc end end) %{index | documents_size: size} end @spec search(t(), search_query(), map() | nil) :: list(search_result()) def search(index, query, opts \\ nil) def search(%__MODULE__{}, nil, _opts), do: [] def search(%__MODULE__{ref: ref} = index, query, nil) when is_binary(query) do fields = get_fields(index) matches = fields |> Enum.reject(&(&1 == ref)) |> Enum.map(fn field -> %{"match" => %{field => query}} end) elasticsearch(index, %{ "query" => %{ "bool" => %{ "should" => matches } } }) end def search(%__MODULE__{ref: ref} = index, query, %{"fields" => fields}) when is_binary(query) do matches = fields |> Enum.filter(fn field -> with true <- field != ref, true <- Map.has_key?(fields, field), %{"boost" => boost} <- Map.get(fields, field) do boost > 0 end end) |> Enum.map(fn field -> %{"boost" => boost} = Map.get(fields, field) match = %{field => query} %{"match" => match, "boost" => boost} end) elasticsearch(index, %{ "query" => %{ "bool" => %{ "should" => matches } } }) end def search(%__MODULE__{} = index, %{"query" => _} = query, _opts), do: elasticsearch(index, query) def search(%__MODULE__{} = index, query, nil) when is_map(query), do: search(index, query, %{"operator" => "OR"}) def search(%__MODULE__{} = index, %{} = query, options) do matches = query |> Enum.map(fn {field, content} -> expand = Map.get(options, "expand", false) operator = options |> Map.get("bool", "or") |> String.downcase() %{ "expand" => expand, "match" => %{"operator" => operator, field => content} } end) elasticsearch(index, %{ "query" => %{ "bool" => %{ "should" => matches } } }) end defp elasticsearch(index, %{"query" => root}) do {key, value} = Query.split_root(root) query = QueryRepository.parse(key, value, root) query |> QueryRepository.score(index) |> Enum.sort(fn a, b -> a.score > b.score end) end defp elasticsearch(_index, _query) do raise "Root object must have a query element" end defp flatten_document(document, prefix \\ "") do Enum.reduce(document, %{}, fn {key, value}, transformed when is_map(value) -> mapped = flatten_document(value, "#{prefix}#{key}.") Map.merge(transformed, mapped) {key, value}, transformed -> Map.put(transformed, "#{prefix}#{key}", value) end) end defp persist(fields, ref, documents, persist_fn) do Task.async_stream(documents, fn document -> document = flatten_document(document) save(fields, ref, document, persist_fn) end) |> Stream.run() end defp save(fields, ref, document, callback) do Enum.each(fields, fn {attribute, field} -> if document[attribute] do data = [ %{id: document[ref], content: document[attribute]} ] callback.(field, data) end end) end end ================================================ FILE: lib/elasticlunr/core/token.ex ================================================ defmodule Elasticlunr.Token do defstruct ~w[token metadata]a @type t :: %__MODULE__{ token: binary(), metadata: map() } @spec new(binary(), map()) :: t() def new(token, metadata \\ %{}) do struct!(__MODULE__, token: token, metadata: metadata) end @spec update(t(), keyword()) :: t() def update(%__MODULE__{token: str, metadata: metadata} = token, opts) do opts = opts |> Keyword.put_new(:token, str) |> Keyword.put_new(:metadata, metadata) struct!(token, opts) end @spec get_position(t()) :: {integer(), integer()} | nil def get_position(%__MODULE__{metadata: %{start: start, end: end_1}}), do: {start, end_1} def get_position(%__MODULE__{metadata: %{}}), do: nil end ================================================ FILE: lib/elasticlunr/db.ex ================================================ defmodule Elasticlunr.DB do defstruct [:name, :options] @type t :: %__MODULE__{ name: atom(), options: list(atom()) } @spec init(atom(), list()) :: t() def init(name, opts \\ []) when is_atom(name) do default = ~w[compressed named_table]a options = Enum.uniq(default ++ opts) unless Enum.member?(:ets.all(), name) do :ets.new(name, options) end struct!(__MODULE__, name: name, options: options) end @spec delete(t(), term()) :: boolean() def delete(%__MODULE__{name: name}, pattern), do: :ets.delete(name, pattern) @spec destroy(t()) :: boolean() def destroy(%__MODULE__{name: name}) do if Enum.member?(:ets.all(), name) do :ets.delete(name) else true end end @spec insert(t(), term()) :: boolean() def insert(%__MODULE__{name: name}, data), do: :ets.insert(name, data) @spec lookup(t(), term()) :: list(term()) def lookup(%__MODULE__{name: name}, key), do: :ets.lookup(name, key) @spec member?(t(), term()) :: boolean() def member?(%__MODULE__{name: name}, key), do: :ets.member(name, key) @spec match_delete(t(), term()) :: boolean() def match_delete(%__MODULE__{name: name}, pattern), do: :ets.match_delete(name, pattern) @spec match_object(t(), term()) :: list(term()) def match_object(%__MODULE__{name: name}, spec), do: :ets.match_object(name, spec) @spec select_count(t(), term()) :: pos_integer() def select_count(%__MODULE__{name: name}, spec), do: :ets.select_count(name, spec) @spec from(t(), keyword()) :: {:ok, t()} def from(%__MODULE__{name: name} = db, file: file) do with true <- File.exists?(file), {:ok, ^name} <- :dets.open_file(name, file: file), true <- :ets.from_dets(name, name) do {:ok, db} end end @spec to(t(), keyword()) :: :ok def to(%__MODULE__{name: name}, file: file) do unless Enum.member?(:dets.all(), name) do :dets.open_file(name, ram_file: true, file: file) end with ^name <- :ets.to_dets(name, name) do :dets.close(name) end end end ================================================ FILE: lib/elasticlunr/deserializer.ex ================================================ defprotocol Elasticlunr.Deserializer do @spec deserialize(Enum.t()) :: Elasticlunr.Index.t() def deserialize(data) end defmodule Elasticlunr.Deserializer.Parser do alias Elasticlunr.{Index, Pipeline} @spec process(Enum.t()) :: Index.t() def process(data) do Enum.reduce(data, nil, fn line, acc -> [command | opts] = String.trim(line) |> String.split("#") case parse(command, acc, opts) do {%Index{}, _extra} = acc -> acc %Index{} = index -> index end end) |> case do {%Index{} = index, _} -> index result -> result end end defp parse(command, acc, [opts]), do: parse(command, acc, opts) defp parse("settings", nil, opts) do opts = to_options(opts) {_, pipeline_map} = opts[:pipeline] |> String.split(",") |> Enum.reduce({0, %{}}, fn callback, {index, map} -> {index + 1, Map.put(map, to_string(index), String.to_atom(callback))} end) opts = Keyword.replace(opts, :pipeline, parse_pipeline(opts[:pipeline])) {Index.new(opts), %{pipeline: pipeline_map}} end defp parse("db", acc, _), do: acc defp parse("field", {index, extra}, opts) do opts = to_options(opts) opts = Enum.map(opts, fn {:pipeline, value} -> {:pipeline, parse_pipeline(value, extra[:pipeline])} option -> option end) index = Index.add_field(index, opts[:name], opts) {index, extra} end defp parse(_, acc, _), do: acc defp parse_pipeline(option, cache \\ %{}) do callbacks = option |> String.split(",") |> Enum.map(fn callback -> Map.get_lazy(cache, callback, fn -> String.to_atom(callback) end) end) Pipeline.new(callbacks) end defp to_options(options) when is_binary(options) do String.split(options, "|") |> Enum.reduce([], fn option, acc -> [key | values] = String.split(option, ":") [value] = values Keyword.put(acc, String.to_atom(key), parse_value(value)) end) end defp parse_value("true"), do: true defp parse_value("false"), do: false defp parse_value(val), do: val end ================================================ FILE: lib/elasticlunr/dsl/query/bool_query.ex ================================================ defmodule Elasticlunr.Dsl.BoolQuery do use Elasticlunr.Dsl.Query alias Elasticlunr.Index alias Elasticlunr.Dsl.{NotQuery, Query, QueryRepository} defstruct ~w[rewritten should must must_not filter minimum_should_match]a @type clause :: struct() | list(struct()) @type t :: %__MODULE__{ filter: clause(), should: clause(), must: nil | struct(), must_not: nil | struct(), rewritten: boolean(), minimum_should_match: integer() } @spec new(keyword) :: t() def new(opts) do attrs = %{ should: Keyword.get(opts, :should, []), must: Keyword.get(opts, :must), must_not: Keyword.get(opts, :must_not), filter: Keyword.get(opts, :filter), rewritten: Keyword.get(opts, :rewritten, false), minimum_should_match: extract_minimum_should_match(opts) } struct!(__MODULE__, attrs) end @impl true def rewrite( %__MODULE__{ filter: filter, must: must, must_not: must_not, should: should, minimum_should_match: minimum_should_match }, %Index{} = index ) do should = should |> Kernel.||([]) |> Enum.map(&QueryRepository.rewrite(&1, index)) must = case must do nil -> nil mod when is_struct(mod) -> QueryRepository.rewrite(mod, index) end filters = filter || [] filters = case must_not do nil -> filters must_not when is_struct(must_not) -> query = must_not |> QueryRepository.rewrite(index) |> NotQuery.new() [query] ++ filters end |> Enum.map(&QueryRepository.rewrite(&1, index)) opts = [ must: must, should: should, filter: filters, rewritten: true, minimum_should_match: minimum_should_match ] new(opts) end @impl true def score(%__MODULE__{rewritten: false} = query, %Index{} = index, options) do query |> rewrite(index) |> score(index, options) end def score( %__MODULE__{ must: must, filter: filter, should: should, minimum_should_match: minimum_should_match }, %Index{} = index, _options ) do filter_results = filter_result(filter, index) filter_results = filter_must(must, filter_results, index) {docs, filtered} = case filter_results do false -> {%{}, nil} value -> Enum.reduce(value, {%{}, []}, fn %{ref: ref, score: score}, {docs, filtered} -> filtered = [ref] ++ filtered doc = %{ ref: ref, matched: 0, positions: %{}, score: score || 0 } docs = Map.put(docs, ref, doc) {docs, filtered} end) end {docs, _filtered} = should |> Enum.reduce({docs, filtered}, fn query, {docs, filtered} -> opts = case filtered do nil -> [] filtered -> [filtered: filtered] end results = QueryRepository.score(query, index, opts) docs = results |> Enum.reduce(docs, fn doc, docs -> ob = Map.get(docs, doc.ref, %{ ref: doc.ref, score: 0, matched: 0, positions: %{} }) %{matched: matched, score: score, positions: positions} = ob # credo:disable-for-lines:3 positions = Map.get(doc, :positions, %{}) |> Enum.reduce(positions, fn {field, tokens}, positions -> p = Map.get(positions, field, []) p = Enum.reduce(tokens, p, &(&2 ++ [&1])) Map.put(positions, field, p) end) doc_score = Map.get(doc, :score, 0) ob = %{ob | positions: positions, matched: matched + 1, score: score + doc_score} Map.put(docs, doc.ref, ob) end) {docs, filtered} end) docs |> Stream.map(&elem(&1, 1)) |> Stream.filter(fn doc -> doc.matched >= minimum_should_match && doc.score > 0 end) end defp filter_result(nil, _index), do: false defp filter_result([], _index), do: false defp filter_result(filter, index) do filter |> Enum.reduce(false, fn query, acc -> q = case acc do false -> [] val -> [filtered: Enum.map(val, & &1.ref)] end QueryRepository.filter(query, index, q) end) end defp filter_must(nil, filter_results, _index), do: filter_results defp filter_must(must_query, filter_results, index) when is_struct(must_query) do q = case filter_results do false -> [] results -> [filtered: Enum.map(results, & &1.ref)] end QueryRepository.score(must_query, index, q) end @impl true def parse(options, _query_options, repo) do default_mapper = fn query -> case Query.split_root(query) do {key, value} -> repo.parse(key, value, query) _ -> repo.parse("match_all", []) end end [] |> patch_options(:should, options, default_mapper) |> patch_options(:filter, options, default_mapper) |> patch_options(:must, options, repo) |> patch_options(:must_not, options, repo) |> patch_options(:minimum_should_match, options) |> __MODULE__.new() end defp patch_options(opts, :should, options, mapper) do case Map.get(options, "should") do nil -> opts should when is_list(should) -> should = should |> Enum.map(mapper) Keyword.put(opts, :should, should) should -> Keyword.put(opts, :should, [mapper.(should)]) end end defp patch_options(opts, :filter, options, mapper) do case Map.get(options, "filter") do nil -> opts filter when is_list(filter) -> filter = Enum.map(filter, mapper) Keyword.put(opts, :filter, filter) filter -> Keyword.put(opts, :filter, [mapper.(filter)]) end end defp patch_options(opts, :must, options, repo) do case Map.get(options, "must") do nil -> opts must when is_map(must) -> {key, options} = Query.split_root(must) must = repo.parse(key, options, must) Keyword.put(opts, :must, must) end end defp patch_options(opts, :must_not, options, repo) do case Map.get(options, "must_not") do nil -> opts must_not -> {key, options} = Query.split_root(must_not) q = repo.parse(key, options, must_not) Keyword.put(opts, :must_not, q) end end defp patch_options(opts, :minimum_should_match, options) do options |> Map.get("minimum_should_match") |> case do nil -> opts value when is_integer(value) -> value <= Keyword.get(opts, :should) |> Enum.count() end |> case do true -> minimum_should_match = Map.get(options, "minimum_should_match") Keyword.put(opts, :minimum_should_match, minimum_should_match) _ -> opts end end defp extract_minimum_should_match(opts) do default_value = case not is_empty_clause?(opts[:should]) and (is_empty_clause?(opts[:must]) or is_empty_clause?(opts[:filter])) do true -> 1 false -> 0 end Keyword.get(opts, :minimum_should_match, default_value) end defp is_empty_clause?(nil), do: true defp is_empty_clause?(list) when is_list(list), do: Enum.empty?(list) defp is_empty_clause?(%{}), do: false end ================================================ FILE: lib/elasticlunr/dsl/query/match_all_query.ex ================================================ defmodule Elasticlunr.Dsl.MatchAllQuery do use Elasticlunr.Dsl.Query alias Elasticlunr.Index defstruct ~w[boost]a @type t :: %__MODULE__{boost: integer()} def new(boost \\ 1), do: struct!(__MODULE__, boost: boost) @impl true def parse(options, _query_options, _repo) do options |> Map.get("boost", 1) |> __MODULE__.new() end @impl true def score(%__MODULE__{boost: boost}, %Index{} = index, _options) do doc_ids = Index.all(index) Stream.map(doc_ids, &%{ref: &1, score: 1.0 * boost}) end end ================================================ FILE: lib/elasticlunr/dsl/query/match_query.ex ================================================ defmodule Elasticlunr.Dsl.MatchQuery do use Elasticlunr.Dsl.Query alias Elasticlunr.{Index} alias Elasticlunr.Dsl.{MatchAllQuery, Query, QueryRepository, TermsQuery} defstruct ~w[expand field query boost fuzziness minimum_should_match operator]a @type t :: %__MODULE__{ expand: boolean(), boost: integer(), field: Index.document_field(), query: any(), fuzziness: integer(), operator: binary(), minimum_should_match: pos_integer() } @spec new(keyword) :: t() def new(opts) do attrs = %{ expand: Keyword.get(opts, :expand, false), field: Keyword.get(opts, :field, ""), query: Keyword.get(opts, :query, ""), boost: Keyword.get(opts, :boost, 1), fuzziness: Keyword.get(opts, :fuzziness, 0), operator: Keyword.get(opts, :operator, "or"), minimum_should_match: Keyword.get(opts, :minimum_should_match, 1) } struct!(__MODULE__, attrs) end @impl true def rewrite( %__MODULE__{ boost: boost, field: field, query: query, expand: expand, operator: operator, fuzziness: fuzziness, minimum_should_match: minimum_should_match }, %Index{} = index ) do tokens = Index.analyze(index, field, query, is_query: true) tokens_length = length(tokens) cond do tokens_length > 1 -> minimum_should_match = case operator == "and" && minimum_should_match == 0 do true -> tokens_length false -> minimum_should_match end TermsQuery.new( field: field, expand: expand, terms: tokens, fuzziness: fuzziness, boost: boost, minimum_should_match: minimum_should_match ) tokens_length == 1 -> TermsQuery.new( field: field, expand: expand, terms: tokens, fuzziness: fuzziness, boost: boost ) true -> MatchAllQuery.new() end end @impl true def score(%__MODULE__{} = module, %Index{} = index, options) do module |> rewrite(index) |> QueryRepository.score(index, options) end @impl true def parse(options, _query_options, repo) do cond do Enum.empty?(options) -> repo.parse("match_all", %{}) Enum.count(options) > 1 -> minimum_should_match = Enum.count(options) should = Enum.map(options, fn {field, content} -> %{"match" => %{field => content}} end) repo.parse("bool", %{ "should" => should, "minimum_should_match" => minimum_should_match }) true -> {field, params} = Query.split_root(options) opts = to_match_params(params) new( field: field, query: Keyword.get(opts, :query), expand: Keyword.get(opts, :expand), operator: Keyword.get(opts, :operator), fuzziness: Keyword.get(opts, :fuzziness), minimum_should_match: Keyword.get(opts, :minimum_should_match) ) end end defp to_match_params(params) when is_map(params) do query = Map.get(params, "query") fuzziness = Map.get(params, "fuzziness", 0) operator = Map.get(params, "operator", "or") expand = Map.get(params, "expand", false) minimum_should_match = Map.get(params, "minimum_should_match", default_min_match(params)) [ query: query, expand: expand, operator: operator, fuzziness: fuzziness, minimum_should_match: minimum_should_match ] end defp to_match_params(params), do: to_match_params(%{"query" => params}) defp default_min_match(params) do case Map.get(params, "operator") == "and" do true -> 0 false -> 1 end end end ================================================ FILE: lib/elasticlunr/dsl/query/not_query.ex ================================================ defmodule Elasticlunr.Dsl.NotQuery do use Elasticlunr.Dsl.Query alias Elasticlunr.Index alias Elasticlunr.Dsl.{Query, QueryRepository} defstruct ~w[inner_query]a @type t :: %__MODULE__{inner_query: struct()} @spec new(struct()) :: t() def new(inner_query), do: %__MODULE__{inner_query: inner_query} @impl true def parse(options, _query_options, _repo) do {key, value} = Query.split_root(options) key |> QueryRepository.parse(value, options) |> new() end @impl true def score(%__MODULE__{inner_query: inner_query}, %Index{} = index, options) do query_all = Index.all(index) query_score = QueryRepository.score(inner_query, index, options) matched_ids = Enum.map(query_score, & &1.ref) query_all |> Stream.reject(&(&1 in matched_ids)) |> Stream.map(&%{ref: &1, score: 1}) end end ================================================ FILE: lib/elasticlunr/dsl/query/terms_query.ex ================================================ defmodule Elasticlunr.Dsl.TermsQuery do use Elasticlunr.Dsl.Query alias Elasticlunr.Dsl.Query alias Elasticlunr.{Index, Token} defstruct ~w[minimum_should_match expand field terms boost fuzziness]a @type t :: %__MODULE__{ minimum_should_match: pos_integer(), expand: boolean(), field: Index.document_field(), terms: list(Token.t()), boost: integer(), fuzziness: integer() } @options ~w[boost expand fuzziness minimum_should_match] @spec new(keyword()) :: t() def new(opts) do attrs = %{ minimum_should_match: Keyword.get(opts, :minimum_should_match, 1), expand: Keyword.get(opts, :expand, false), field: Keyword.get(opts, :field, ""), terms: Keyword.get(opts, :terms, []), boost: Keyword.get(opts, :boost, 1), fuzziness: Keyword.get(opts, :fuzziness, 0) } struct!(__MODULE__, attrs) end @impl true def score( %__MODULE__{ boost: boost, field: field, expand: expand, terms: terms, fuzziness: fuzziness, minimum_should_match: minimum_should_match }, %Index{} = index, options \\ [] ) do terms = case expand do true -> Enum.map(terms, fn %Token{token: token} -> Regex.compile!("^#{token}.*") token -> Regex.compile!("^#{token}.*") end) false -> terms end query = [ field: field, terms: terms, fuzziness: fuzziness, minimum_should_match: minimum_should_match ] query = case Keyword.get(options, :filtered) do nil -> query filtered when is_list(filtered) -> Keyword.put(query, :docs, filtered) end docs = Index.terms(index, query) pick_highest_score = fn a, b -> if(hd(a) > hd(b), do: a, else: b) end Stream.map(docs, &elem(&1, 0)) |> Enum.reduce([], fn id, matched -> [score, doc] = Map.get(docs, id) |> Stream.map(fn doc -> [doc.tf * :math.pow(doc.idf, 2) * doc.norm, doc] end) |> Enum.reduce([0, nil], pick_highest_score) ob = %{ ref: id, field: field, score: score * boost, positions: Map.put(%{}, field, doc.positions) } matched ++ [ob] end) end @impl true def parse(options, _query_options, repo) do cond do Enum.empty?(options) -> repo.parse("match_all", %{}) Enum.count(options) > 1 -> should = options |> Enum.reject(fn {key, _field} -> key in @options end) |> Enum.map(fn {field, terms} -> %{"terms" => %{field => terms}} end) repo.parse("bool", %{"should" => should}) true -> {field, params} = Query.split_root(options) terms = get_terms(params) opts = to_terms_params(params) __MODULE__.new([field: field, terms: terms] ++ opts) end end defp get_terms(params) when is_map(params) do params |> Map.get("value") |> to_list() end defp get_terms(value), do: to_list(value) defp to_terms_params(params) when is_map(params) do [] |> update_options(params, :minimum_should_match) |> update_options(params, :fuzziness) |> update_options(params, :expand) |> update_options(params, :boost) end defp to_terms_params(params), do: to_terms_params(%{"value" => params}) defp update_options(opts, params, key) do case Map.get(params, to_string(key)) do nil -> opts value -> Keyword.put(opts, key, value) end end defp to_list(value) when is_list(value), do: value defp to_list(value), do: [value] end ================================================ FILE: lib/elasticlunr/dsl/query.ex ================================================ defmodule Elasticlunr.Dsl.Query do alias Elasticlunr.{Field, Index, Dsl.QueryRepository} @type score_results :: list(%{ score: integer(), ref: Field.document_ref() }) @callback filter(module :: struct(), index :: Index.t(), options :: keyword()) :: list() @callback score(module :: struct(), index :: Index.t(), options :: keyword()) :: score_results() | %Stream{} @callback rewrite(module :: struct(), index :: Index.t()) :: struct() @callback parse(options :: map(), query_options :: map(), repo :: module()) :: struct() @spec split_root(map() | tuple()) :: {atom(), any()} | any() def split_root(root) when is_map(root) do [root_key] = Map.keys(root) value = Map.get(root, root_key) {root_key, value} end def split_root({_, _} = root), do: root def split_root(root), do: root defmacro __using__(_) do quote location: :keep do @before_compile Elasticlunr.Dsl.Query @behaviour Elasticlunr.Dsl.Query end end defmacro __before_compile__(_) do mod = __CALLER__.module quote bind_quoted: [mod: mod] do if not Module.defines?(mod, {:filter, 3}) do @impl true def filter(query, index, options) do query |> QueryRepository.score(index, options) |> Enum.filter(&(&1.score > 0)) end end if not Module.defines?(mod, {:rewrite, 2}) do @impl true def rewrite(query, _index), do: query end end end end ================================================ FILE: lib/elasticlunr/dsl/query_repository.ex ================================================ defmodule Elasticlunr.Dsl.QueryRepository do alias Elasticlunr.Index alias Elasticlunr.Dsl.{BoolQuery, MatchAllQuery, MatchQuery, NotQuery, TermsQuery} def get("not"), do: NotQuery def get("bool"), do: BoolQuery def get("match"), do: MatchQuery def get("terms"), do: TermsQuery def get("match_all"), do: MatchAllQuery def get(element), do: raise("Unknown query type #{element}") @spec parse(binary(), map(), map(), module()) :: struct() def parse(module, options, query_options \\ %{}, repo \\ __MODULE__) do module = get(module) module.parse(options, query_options, repo) end @spec score(struct(), Index.t(), keyword()) :: list() def score(query, index, options \\ []) when is_struct(query) do query.__struct__.score(query, index, options) end @spec filter(struct(), Index.t(), keyword()) :: list() def filter(query, index, options \\ []) when is_struct(query) do query.__struct__.filter(query, index, options) end @spec rewrite(struct(), Index.t()) :: struct() def rewrite(query, index) when is_struct(query) do query.__struct__.rewrite(query, index) end end ================================================ FILE: lib/elasticlunr/manager/index_manager.ex ================================================ defmodule Elasticlunr.IndexManager do use GenServer alias Elasticlunr.{Index, IndexRegistry, IndexSupervisor, Storage} alias Elasticlunr.Utils.Process @spec preload() :: :ok def preload do Storage.all() |> Stream.each(&start/1) |> Stream.run() end @spec get(binary()) :: Index.t() | :not_running def get(name) do case loaded?(name) do true -> name |> via |> GenServer.call(:get) false -> :not_running end end @spec save(Index.t()) :: {:ok, Index.t()} | {:error, any()} def save(%Index{} = index) do with {:ok, _} <- start(index), :ok <- Storage.write(index) do {:ok, index} end end @spec update(Index.t()) :: Index.t() | :not_running def update(%Index{name: name} = index) do with true <- loaded?(name), index <- name |> via |> GenServer.call({:update, index}), :ok <- Storage.write(index) do index else false -> :not_running err -> err end end @spec remove(Index.t()) :: :ok | :not_running def remove(%Index{name: name}) do with [{pid, _}] <- Registry.lookup(IndexRegistry, name), :ok <- Storage.delete(name), :ok <- DynamicSupervisor.terminate_child(IndexSupervisor, pid) do :ok else _ -> :not_running end end @spec loaded?(binary()) :: boolean() def loaded?(name) do loaded_indices() |> Enum.any?(fn ^name -> true _ -> false end) end @spec loaded_indices :: [binary()] def loaded_indices do Process.active_processes(IndexSupervisor, IndexRegistry, __MODULE__) end @spec init(Index.t()) :: {:ok, Index.t()} def init(%Index{} = index) do {:ok, index} end @spec start_link(Index.t()) :: :ignore | {:error, any} | {:ok, pid} def start_link(%Index{name: name} = index) do GenServer.start_link(__MODULE__, index, name: via(name), hibernate_after: 5_000) end @spec child_spec(Index.t()) :: map() def child_spec(%Index{name: id} = index) do %{ id: {__MODULE__, id}, start: {__MODULE__, :start_link, [index]}, restart: :transient } end @spec via(binary()) :: {:via, Registry, {IndexRegistry, atom()}} def via(name) do {:via, Registry, {IndexRegistry, name}} end def handle_call(:get, _from, index) do {:reply, index, index} end def handle_call({:update, index}, _from, _state) do {:reply, index, index} end defp start(index) do DynamicSupervisor.start_child(IndexSupervisor, {__MODULE__, index}) end end ================================================ FILE: lib/elasticlunr/pipeline/stemmer.ex ================================================ defmodule Elasticlunr.Pipeline.Stemmer do alias Elasticlunr.Token @behaviour Elasticlunr.Pipeline @impl true def call(%Token{token: str} = token) do Token.update(token, token: Stemmer.stem(str)) end end ================================================ FILE: lib/elasticlunr/pipeline/stop_word_filter.ex ================================================ defmodule Elasticlunr.Pipeline.StopWordFilter do alias Elasticlunr.Token @behaviour Elasticlunr.Pipeline @default_stop_words [ "a", "able", "about", "across", "after", "all", "almost", "also", "am", "among", "an", "and", "any", "are", "as", "at", "be", "because", "been", "but", "by", "can", "cannot", "could", "dear", "did", "do", "does", "either", "else", "ever", "every", "for", "from", "get", "got", "had", "has", "have", "he", "her", "hers", "him", "his", "how", "however", "i", "if", "in", "into", "is", "it", "its", "just", "least", "let", "like", "likely", "may", "me", "might", "most", "must", "my", "neither", "no", "nor", "not", "of", "off", "often", "on", "only", "or", "other", "our", "own", "rather", "said", "say", "says", "she", "should", "since", "so", "some", "than", "that", "the", "their", "them", "then", "there", "these", "they", "this", "tis", "to", "too", "twas", "us", "wants", "was", "we", "were", "what", "when", "where", "which", "while", "who", "whom", "why", "will", "with", "would", "yet", "you", "your" ] @impl true def call(%Token{token: token}) when token in @default_stop_words, do: nil def call(token), do: token end ================================================ FILE: lib/elasticlunr/pipeline/trimmer.ex ================================================ defmodule Elasticlunr.Pipeline.Trimmer do alias Elasticlunr.Token @behaviour Elasticlunr.Pipeline @impl true def call(%Token{token: str} = token) do str = Regex.replace(~r/^\W+/, str, "") str = Regex.replace(~r/\W+$/, str, "") Token.update(token, token: str) end end ================================================ FILE: lib/elasticlunr/pipeline.ex ================================================ defmodule Elasticlunr.Pipeline do alias Elasticlunr.{Token, Tokenizer} alias Elasticlunr.Pipeline.{Stemmer, StopWordFilter, Trimmer} defstruct callback: [] @type t :: %__MODULE__{ callback: list(module() | function()) } @callback call(Token.t()) :: Token.t() | list(Token.t()) | nil @spec new(list(module())) :: struct def new(callbacks \\ []) do struct!(__MODULE__, callback: callbacks) end @spec add(t(), module()) :: t() def add(%__MODULE__{callback: callback} = pipeline, module) do callback = Enum.uniq([module] ++ callback) %{pipeline | callback: callback} end @spec default_runners() :: list(module()) def default_runners, do: [Trimmer, StopWordFilter, Stemmer] @spec run(Elasticlunr.Pipeline.t(), list(Token.t())) :: list(Token.t()) def run(%__MODULE__{} = pipeline, tokens) when not is_list(tokens) do tokens = Tokenizer.tokenize(tokens) run(pipeline, tokens) end def run(%__MODULE__{callback: []}, tokens), do: tokens def run(%__MODULE__{callback: callback}, tokens) do callback |> Enum.reduce(tokens, fn module, acc -> excute_runner(acc, module) end) end @spec insert_before(t(), module(), module()) :: t() def insert_before(%__MODULE__{callback: callback} = pipeline, module, before_module) do case Enum.find_index(callback, &(&1 == before_module)) do nil -> add(pipeline, module) index -> callback = callback |> List.insert_at(index, module) |> Enum.uniq() %{pipeline | callback: callback} end end @spec insert_after(t(), module(), module()) :: t() def insert_after(%__MODULE__{callback: callback} = pipeline, module, before_module) do case Enum.find_index(callback, &(&1 == before_module)) do nil -> add(pipeline, module) index -> callback = callback |> List.insert_at(index + 1, module) |> Enum.uniq() %{pipeline | callback: callback} end end @spec remove(t(), module()) :: t() def remove(%__MODULE__{callback: callback} = pipeline, module) do callback = Enum.reject(callback, &(&1 == module)) %{pipeline | callback: callback} end defp excute_runner(tokens, module) do Enum.reduce(tokens, [], fn token, state -> output = execute(module, token) output = case is_list(output) do true -> output false -> [output] end output = Enum.filter(output, &(not is_nil(&1))) state ++ output end) end defp execute(callback, token) when is_function(callback), do: callback.(token) defp execute(module, token), do: module.call(token) end ================================================ FILE: lib/elasticlunr/protocol_implementations.ex ================================================ defimpl Elasticlunr.Serializer, for: Elasticlunr.Pipeline do alias Elasticlunr.Pipeline def serialize(%Pipeline{callback: callback}, opts) do cache = Keyword.get(opts, :pipeline, %{}) Enum.map_join(callback, ",", &Map.get(cache, &1, &1)) end end defimpl Elasticlunr.Serializer, for: Elasticlunr.Field do alias Elasticlunr.{Field, Serializer} def serialize( %Field{ pipeline: pipeline, store: store_documents, store_positions: store_positions }, opts ) do name = Keyword.get(opts, :name) pipeline = Serializer.serialize(pipeline, opts) "field#name:#{name}|pipeline:#{pipeline}|store_documents:#{store_documents}|store_positions:#{store_positions}" end end defimpl Elasticlunr.Serializer, for: Elasticlunr.DB do alias Elasticlunr.DB def serialize(%DB{name: name, options: options}, _opts) do options = Enum.map_join(options, ",", &to_string(&1)) "db#name:#{name}|options:#{options}" end end defimpl Elasticlunr.Serializer, for: Elasticlunr.Index do alias Elasticlunr.{Index, Serializer} def serialize(%Index{db: db, fields: fields, name: name, pipeline: pipeline, ref: ref}, _opts) do pipeline_opt = Serializer.serialize(pipeline) db_settings = Serializer.serialize(db) {_, pipeline_map} = Enum.reduce(pipeline.callback, {0, %{}}, fn callback, {index, map} -> {index + 1, Map.put(map, callback, index)} end) settings = "settings#name:#{name}|ref:#{ref}|pipeline:#{pipeline_opt}" fields_settings = Stream.map(fields, fn {name, field} -> Serializer.serialize(field, name: name, pipeline: pipeline_map) end) [settings, db_settings, fields_settings] |> Stream.flat_map(fn list when is_list(list) -> list value when is_binary(value) -> [value] value -> value end) end end defimpl Jason.Encoder, for: Tuple do def encode({start_pos, end_pos}, opts) do [start_pos, end_pos] |> Jason.Encode.list(opts) end end defimpl Elasticlunr.Deserializer, for: Stream do alias Elasticlunr.Deserializer.Parser def deserialize(data) do Parser.process(data) end end defimpl Elasticlunr.Deserializer, for: File.Stream do alias Elasticlunr.Deserializer.Parser def deserialize(data) do Parser.process(data) end end ================================================ FILE: lib/elasticlunr/serializer.ex ================================================ defprotocol Elasticlunr.Serializer do @spec serialize(struct(), keyword()) :: binary() | function() def serialize(index, opts \\ []) end ================================================ FILE: lib/elasticlunr/storage/blackhole.ex ================================================ defmodule Elasticlunr.Storage.Blackhole do @moduledoc """ As the name implies, nothing is written nowhere. """ use Elasticlunr.Storage @impl true def load_all, do: [] @impl true def write(_index), do: :ok @impl true def read(_name), do: {:error, "can't read index from blackhole"} @impl true def delete(_name), do: :ok end ================================================ FILE: lib/elasticlunr/storage/disk.ex ================================================ defmodule Elasticlunr.Storage.Disk do @moduledoc """ This storage provider writes data to the local disk of the running application. ```elixir config :elasticlunr, storage: Elasticlunr.Storage.Disk config :elasticlunr, Elasticlunr.Storage.Disk, directory: "/path/to/project/storage" ``` """ use Elasticlunr.Storage alias Elasticlunr.{DB, Deserializer, Index, Serializer} require Logger @data_file_ext "data" @index_file_ext "index" @extensions [@data_file_ext, @index_file_ext] @impl true def write(%Index{db: db, name: name} = index) do directory = config(:directory, ".") data = Serializer.serialize(index) with %{data: data_file, index: index_file} <- filenames(directory, name), :ok <- DB.to(db, file: data_file) do write_serialized_index_to_file(index_file, data) end end @impl true def read(name) do directory = config(:directory, ".") %{data: data_file, index: index_file} = filenames(directory, name) index = File.stream!(index_file, ~w[compressed]a) |> Deserializer.deserialize() with %Index{db: db} <- index, {:ok, db} <- DB.from(db, file: data_file) do Index.update_documents_size(%{index | db: db}) else false -> Logger.info("[elasticlunr] unable to data for index #{index.name}") index end end @impl true def load_all do files() |> Stream.filter(&String.ends_with?(&1, @index_file_ext)) |> Stream.map(fn file -> name = without_ext(file, @index_file_ext) read(name) end) end @impl true def delete(name) do directory = config(:directory, ".") %{data: data_file, index: index_file} = filenames(directory, name) with :ok <- File.rm(index_file) do File.rm(data_file) end end @spec files() :: list(binary()) def files do directory = config(:directory, ".") extensions = Enum.map_join(@extensions, ",", & &1) match = Path.join(directory, "*.{#{extensions}}") Path.wildcard(match) |> Enum.map(&Path.expand/1) end @spec write_serialized_index_to_file(binary(), Enum.t()) :: :ok def write_serialized_index_to_file(path, data) do data |> Stream.into(File.stream!(path, ~w[compressed]a), &"#{&1}\n") |> Stream.run() end defp filenames(directory, name) do %{ index: Path.join(directory, "#{name}.#{@index_file_ext}"), data: Path.join(directory, "#{name}.#{@data_file_ext}") |> String.to_charlist() } end defp without_ext(file, ext), do: Path.basename(file, ".#{ext}") end ================================================ FILE: lib/elasticlunr/storage/provider.ex ================================================ defmodule Elasticlunr.Storage.Provider do @moduledoc false alias Elasticlunr.Index @callback load_all() :: Enum.t() @callback read(name :: binary()) :: Index.t() | {:error, any()} @callback delete(name :: binary()) :: :ok | {:error, any()} @callback write(index :: Index.t()) :: :ok | {:error, any()} end ================================================ FILE: lib/elasticlunr/storage.ex ================================================ defmodule Elasticlunr.Storage do @moduledoc """ This is the storage interface that's used by the index manager. ```elixir config :elasticlunr, storage: Elasticlunr.Storage.Blackhole # this is the default provider ``` """ alias Elasticlunr.Index alias Elasticlunr.Storage.Blackhole @spec all() :: Enum.t() def all do provider().load_all() end @spec write(Index.t()) :: :ok | {:error, any()} def write(%Index{} = index) do provider().write(index) end @spec read(binary()) :: Index.t() | {:error, any()} def read(index_name) do provider().read(index_name) end @spec delete(binary()) :: :ok | {:error, any()} def delete(index_name) do provider().delete(index_name) end defp provider, do: Application.get_env(:elasticlunr, :storage, Blackhole) defmacro __using__(_) do quote location: :keep do @behaviour Elasticlunr.Storage.Provider defp config(key, default \\ nil) do Keyword.get(config_all(), key, default) end defp config_all, do: Application.get_env(:elasticlunr, __MODULE__, []) end end end ================================================ FILE: lib/elasticlunr/tokenizer.ex ================================================ defmodule Elasticlunr.Tokenizer do alias Elasticlunr.Token @default_separator ~r/[\s\-]+/ @spec tokenize(binary() | number(), Regex.t()) :: list(Token.t()) def tokenize(str, separator \\ @default_separator) def tokenize(str, separator) when is_binary(str), do: split(str, separator) def tokenize(num, separator) when is_number(num) do num |> to_string() |> split(separator) end defp split(str, separator) do slice_end = 0 slice_start = 0 str_length = String.length(str) str |> String.downcase() |> run_split(separator, slice_start, slice_end, str_length, []) end defp run_split(str, separator, slice_start, slice_end, str_length, tokens) when slice_end <= str_length do char = String.at(str, slice_end) slice_length = slice_end - slice_start with true <- match_string?(char, separator) || slice_end == str_length, {:s, true} <- {:s, slice_length > 0} do token = str |> String.slice(slice_start, slice_length) |> to_token(slice_start, slice_length) tokens = tokens ++ [token] slice_start = slice_end + 1 run_split(str, separator, slice_start, slice_end + 1, str_length, tokens) else {:s, false} -> index = slice_end + 1 run_split(str, separator, index, index, str_length, tokens) false -> run_split(str, separator, slice_start, slice_end + 1, str_length, tokens) end end defp run_split(_str, _separator, _slice_start, _slice_end, _str_length, tokens) do tokens end defp match_string?(nil, _separator), do: false defp match_string?(char, separator) do String.match?(char, separator) end defp to_token(str, start_index, end_index) do Token.new(str, %{ end: end_index, start: start_index }) end end ================================================ FILE: lib/elasticlunr/utils/process.ex ================================================ defmodule Elasticlunr.Utils.Process do @spec child_pid?(tuple, atom) :: boolean def child_pid?({:undefined, pid, :worker, [mod]}, mod) when is_pid(pid), do: true def child_pid?(_child, _module), do: false @spec id_from_pid(tuple, atom, atom) :: [atom | binary] def id_from_pid({:undefined, pid, :worker, [mod]}, registry, mod), do: Registry.keys(registry, pid) @spec active_processes(atom, atom, atom) :: [any()] def active_processes(supervisor, registry, module) do supervisor |> DynamicSupervisor.which_children() |> Enum.filter(&child_pid?(&1, module)) |> Enum.flat_map(&id_from_pid(&1, registry, module)) end end ================================================ FILE: lib/elasticlunr/utlis.ex ================================================ defmodule Elasticlunr.Utils do @spec levenshtein_distance(binary, binary) :: integer() def levenshtein_distance(a, b) do ta = String.downcase(a) |> to_charlist |> List.to_tuple() tb = String.downcase(b) |> to_charlist |> List.to_tuple() m = tuple_size(ta) n = tuple_size(tb) costs = Enum.reduce(0..m, %{}, fn i, acc -> Map.put(acc, {i, 0}, i) end) costs = Enum.reduce(0..n, costs, fn j, acc -> Map.put(acc, {0, j}, j) end) Enum.reduce(0..(n - 1), costs, fn j, acc -> Enum.reduce(0..(m - 1), acc, fn i, map -> # credo:disable-for-lines:2 d = if elem(ta, i) == elem(tb, j) do map[{i, j}] else # deletion Enum.min([ map[{i, j + 1}] + 1, # insertion map[{i + 1, j}] + 1, # substitution map[{i, j}] + 1 ]) end Map.put(map, {i + 1, j + 1}, d) end) end) |> Map.get({m, n}) end end ================================================ FILE: mix.exs ================================================ defmodule Elasticlunr.MixProject do use Mix.Project @source_url "https://github.com/heywhy/ex_elasticlunr" def project do [ app: :elasticlunr, version: "0.6.4", elixir: "~> 1.11", elixirc_paths: elixirc_paths(Mix.env()), start_permanent: Mix.env() == :prod, description: description(), package: package(), aliases: aliases(), deps: deps(), source_url: @source_url, # Coverage test_coverage: [tool: ExCoveralls], preferred_cli_env: [ coveralls: :test, "coveralls.detail": :test, "coveralls.post": :test, "coveralls.html": :test, "coveralls.json": :test ], # Dialyxir dialyzer: [ plt_file: {:no_warn, "priv/plts/dialyzer.plt"} ], # Docs name: "Elasticlunr", homepage_url: "https://hexdocs.pm/elasticlunr", docs: [ main: "readme", extras: ["README.md", "LICENSE"] ] ] end # Run "mix help compile.app" to learn about applications. def application do [ extra_applications: [:logger, :crypto], mod: {Elasticlunr.Application, []} ] end # Specifies which paths to compile per environment. defp elixirc_paths(:test), do: ["lib", "test/support"] defp elixirc_paths(_), do: ["lib"] # Run "mix help deps" to learn about dependencies. defp deps do [ {:credo, "~> 1.5", only: [:dev, :test], runtime: false}, {:dialyxir, "~> 1.1", only: :dev, runtime: false}, {:ex_doc, "~> 0.25", only: :dev, runtime: false}, {:excoveralls, "~> 0.14", only: :test}, {:faker, "~> 0.16", only: :test}, {:jason, "~> 1.3"}, {:mox, "~> 1.0", only: :test}, {:stemmer, "~> 1.0"}, {:uniq, "~> 0.4"} ] end defp aliases do [ test: ~w[format credo test] ] end defp description do "Elasticlunr is a lightweight full-text search engine. It's a port of Elasticlunr.js with more improvements." end defp package do [ files: ["lib", "mix.exs", "README.md"], maintainers: ["Atanda Rasheed"], licenses: ["MIT License"], links: %{ "GitHub" => @source_url, "Docs" => "https://hexdocs.pm/elasticlunr" } ] end end ================================================ FILE: test/core/document_store_test.exs ================================================ defmodule Elasticlunr.DocumentStoreTest do use ExUnit.Case alias Elasticlunr.DocumentStore describe "creating a new document store" do test "defaults save attribute to true" do assert %DocumentStore{documents: %{}, document_info: %{}, length: 0, save: true} = DocumentStore.new() end test "without saving documents" do assert %DocumentStore{documents: %{}, document_info: %{}, length: 0, save: false} = DocumentStore.new(false) end end describe "adding document to document store" do test "adds a new document and save document" do document = %{id: 10} document_store = DocumentStore.new() assert %DocumentStore{documents: %{10 => ^document}} = DocumentStore.add(document_store, 10, document) end test "saves document and update length" do document_store = DocumentStore.new() assert document_store = DocumentStore.add(document_store, 10, %{id: 10}) assert %DocumentStore{length: 1} = document_store assert %DocumentStore{length: 2} = DocumentStore.add(document_store, 1, %{id: 1}) end test "updates document data and does not update length" do document_store = DocumentStore.new() assert document_store = DocumentStore.add(document_store, 10, %{id: 10}) assert %DocumentStore{length: 1, documents: %{10 => %{id: 10}}} = document_store assert %DocumentStore{length: 1, documents: %{10 => %{id: 1}}} = DocumentStore.add(document_store, 10, %{id: 1}) end test "checks if document exists" do document_store = DocumentStore.new() assert document_store = DocumentStore.add(document_store, 10, %{id: 10}) assert DocumentStore.exists?(document_store, 10) refute DocumentStore.exists?(document_store, 100) end end describe "retrieving document from document store" do test "returns document" do document = %{id: 10} document_store = DocumentStore.new() |> DocumentStore.add(10, document) assert ^document = DocumentStore.get(document_store, 10) end test "returns nil for non-existing document" do document_store = DocumentStore.new() assert is_nil(DocumentStore.get(document_store, 10)) end test "returns nil for non-persitent store" do document = %{id: 10} document_store = DocumentStore.new(false) |> DocumentStore.add(10, document) refute ^document = DocumentStore.get(document_store, 10) end end describe "removing document from document store" do test "removes document" do document = %{id: 10} document_store = DocumentStore.new() |> DocumentStore.add(10, document) assert %DocumentStore{length: 1, documents: %{10 => %{id: 10}}} = document_store assert %DocumentStore{length: 0, documents: %{}} = DocumentStore.remove(document_store, 10) end end describe "adding field length of document field" do test "adds field length" do document = %{id: 10} document_store = DocumentStore.new() |> DocumentStore.add(10, document) assert %DocumentStore{ length: 1, documents: %{10 => %{id: 10}}, document_info: %{10 => %{name: 20}} } = DocumentStore.add_field_length(document_store, 10, :name, 20) end test "updates field length" do document = %{id: 10} document_store = DocumentStore.new() |> DocumentStore.add(10, document) assert %DocumentStore{document_info: %{10 => %{name: 20}}} = DocumentStore.add_field_length(document_store, 10, :name, 20) assert %DocumentStore{document_info: %{10 => %{name: 36}}} = DocumentStore.update_field_length(document_store, 10, :name, 36) end end describe "retrieving document field length" do test "returns nil" do document = %{id: 10} document_store = DocumentStore.new() |> DocumentStore.add(10, document) assert is_nil(DocumentStore.get_field_length(document_store, 10, :name)) end test "returns field length" do document = %{id: 10} document_store = DocumentStore.new() |> DocumentStore.add(10, document) |> DocumentStore.add_field_length(10, :name, 20) assert 20 = DocumentStore.get_field_length(document_store, 10, :name) end end describe "reset document store" do test "clears store attributes" do document = %{id: 10} assert document_store = DocumentStore.new() |> DocumentStore.add(10, document) |> DocumentStore.add_field_length(10, :name, 20) assert %DocumentStore{} = document_store assert %DocumentStore{documents: %{}, document_info: %{}, length: 0, save: true} = DocumentStore.reset(document_store) end end end ================================================ FILE: test/core/field_test.exs ================================================ defmodule Elasticlunr.FieldTest do use ExUnit.Case alias Elasticlunr.{DB, Field, Pipeline, Token} setup context do opts = [ pipeline: Pipeline.new(), db: DB.init(:field_test, ~w[public]a) ] field = Field.new(opts) |> Field.add([%{id: 1, content: "hello world"}]) :ok = on_exit(fn -> true = DB.destroy(field.db) end) Map.put(context, :field, field) end test "tokens/1", %{field: field} do tokens = Field.tokens(field) assert %Stream{} = tokens refute Enum.empty?(tokens) assert [%{tf: 1, documents: documents} | _] = Enum.to_list(tokens) assert [1] = Enum.to_list(documents) end test "documents/1", %{field: field} do assert documents = Field.documents(field) assert [1] = Enum.to_list(documents) end test "term_frequency/2", %{field: field} do assert tf = Field.term_frequency(field, "hello") assert [{1, 1.0}] = Enum.to_list(tf) refute Field.term_frequency(field, "missing") end test "has_token/2", %{field: field} do assert Field.has_token(field, "hello") refute Field.has_token(field, "missing") end test "get_token/2", %{field: field} do assert %{term: "hello", tf: 1} = Field.get_token(field, "hello") refute Field.get_token(field, "missing") end test "set_query_pipeline/2", %{field: field} do pipeline = Pipeline.new() assert %Field{query_pipeline: nil} = field assert %Field{query_pipeline: ^pipeline} = Field.set_query_pipeline(field, pipeline) end test "add/2", %{field: field} do assert Enum.count(Field.documents(field)) == 1 assert field = Field.add(field, [%{id: 10, content: "testing"}]) assert Enum.count(Field.documents(field)) == 2 assert Field.has_token(field, "testing") end test "length/2", %{field: field} do assert Field.length(field, :ids) == 1 assert Field.length(field, :idf, "hello") == 1 assert Field.length(field, :term, "world") == 1 assert Field.length(field, :tf, "world") == 1 end test "update/2", %{field: field} do assert field = Field.update(field, [%{id: 1, content: "worse"}]) assert Field.has_token(field, "worse") assert Enum.count(Field.documents(field)) == 1 end test "remove/2", %{field: field} do assert field = Field.remove(field, [1]) refute Field.has_token(field, "worse") assert Enum.empty?(Field.documents(field)) end test "analyze/3", %{field: field} do assert [%Token{token: "coming"}] = Field.analyze(field, "coming", []) assert [%Token{token: "coming"}] = Field.analyze(field, "coming", is_query: true) assert [%Token{token: "foo"}] = field |> Field.set_query_pipeline(Pipeline.new([fn _ -> Token.new("foo") end])) |> Field.analyze("coming", is_query: true) end test "terms/3", %{field: field} do assert %{1 => _} = Field.terms(field, terms: ["hello"]) assert %{1 => _} = Field.terms(field, terms: [~r/hello/]) assert %{1 => _} = Field.terms(field, terms: ["hello"], fuzziness: 2) assert Enum.empty?(Field.terms(field, terms: ["missing"])) end end ================================================ FILE: test/core/index_test.exs ================================================ defmodule Elasticlunr.IndexTest do use ExUnit.Case alias Elasticlunr.{Field, Index, Pipeline, Token} alias Faker.Address.En, as: Address describe "creating an index" do test "creates a new instance" do assert %Index{name: name} = Index.new() assert is_binary(name) assert %Index{name: :test_index, ref: "id", fields: %{}} = Index.new(name: :test_index) assert %Index{name: :test_index, ref: "name", fields: %{}} = Index.new(name: :test_index, ref: "name") end test "creates a new instance and populate fields" do assert %Index{fields: %{"id" => %Field{}, "name" => %Field{}}} = Index.add_field(Index.new(), "name") end end describe "modifying an index" do test "adds new fields" do index = Index.new() assert %Index{fields: %{}} = index assert index = Index.add_field(index, "name") assert %Index{fields: %{"name" => %Field{}}} = index assert %Index{fields: %{"name" => %Field{}, "bio" => %Field{}}} = Index.add_field(index, "bio") end test "save document" do index = Index.add_field(Index.new(), "name") assert %Index{fields: %{"name" => %Field{store: true}}} = index assert %Index{fields: %{"name" => %Field{store: false}}} = Index.save_document(index, false) end test "updates a field" do index = Index.new() assert %Index{fields: %{}} = index assert index = Index.add_field(index, "name") assert field = Index.get_field(index, "name") assert %Field{query_pipeline: nil} = field pipeline = Pipeline.new() assert %Field{query_pipeline: ^pipeline} = index |> Index.update_field("name", %{field | query_pipeline: pipeline}) |> Index.get_field("name") end test "fails to update missing field" do index = Index.new() assert %Index{fields: %{}} = index assert_raise RuntimeError, "Unknown field address in index", fn -> Index.update_field(index, "address", Field.new([])) end end end describe "fiddling with an index" do test "adds document" do index = Index.new() |> Index.add_field("bio") assert index = Index.add_documents(index, [ %{ "id" => 10, "bio" => Faker.Lorem.paragraph() } ]) assert %Index{documents_size: 1} = index assert %Index{documents_size: 2} = Index.add_documents(index, [ %{ "id" => 29, "bio" => Faker.Lorem.paragraph() } ]) end @tag :skip test "adds documents and flatten nested attributes" do index = Index.new() |> Index.add_field("name") |> Index.add_field("address") document = %{ "id" => 20, "name" => "nelson", "address" => %{ "city" => Address.city(), "country" => Address.country_code(), "line1" => Address.street_address(), "line2" => Address.secondary_address(), "state" => Address.state() } } index = Index.add_documents(index, [document]) query = %{ "bool" => %{ "should" => %{ "match" => %{"address.city" => get_in(document, ~w[address city])} } } } assert %Index{fields: %{"address.city" => %Field{}}, documents_size: 1} = index refute Index.search(index, %{"query" => query}) |> Enum.empty?() end @tag :skip test "removes documents with nested attributes" do index = Index.new() |> Index.add_field("name") |> Index.add_field("address") document = %{ "id" => 20, "name" => "nelson", "address" => %{ "city" => Address.city(), "country" => Address.country_code(), "line1" => Address.street_address(), "line2" => Address.secondary_address(), "state" => Address.state() } } index = Index.add_documents(index, [document]) assert %Index{fields: %{"address.city" => %Field{}}, documents_size: 1} = index assert %Index{fields: %{"address.city" => %Field{}}, documents_size: 0} = Index.remove_documents(index, [20]) end test "allows addition of document with empty field" do index = Index.new() |> Index.add_field("bio") |> Index.add_field("title") assert index = Index.add_documents(index, [%{"id" => 10, "bio" => "", "title" => "test"}]) assert term_frequency = index |> Index.get_field("title") |> Field.term_frequency("test") assert index |> Index.get_field("title") |> Field.length(:tf, "test") |> Kernel.==(1) assert term_frequency |> Enum.find(&(elem(&1, 0) == 10)) |> Kernel.==({10, 1}) end @tag :skip test "fails when adding duplicate document" do index = Index.add_field(Index.new(), "bio") document = %{ "id" => 10, "bio" => Faker.Lorem.paragraph() } assert index = Index.add_documents(index, [document]) assert_raise RuntimeError, "Document id 10 already exists in the index", fn -> Index.add_documents(index, [document]) end end test "removes document" do index = Index.new() |> Index.add_field("id") |> Index.add_field("bio") document = %{ "id" => 10, "bio" => "this is a test" } document_2 = %{ "id" => 30, "bio" => "this is another test" } assert index = Index.add_documents(index, [document_2, document]) assert %Index{documents_size: 2} = index assert index = Index.remove_documents(index, [10]) assert %Index{documents_size: 1} = index assert field = Index.get_field(index, "bio") refute Field.has_token(field, "a") assert Field.has_token(field, "another") assert is_nil(Field.get_token(field, "a")) assert %{idf: idf} = Field.get_token(field, "another") assert idf > 0 %{documents: documents} = Field.get_token(field, "another") assert [30] = Enum.to_list(documents) end test "does not remove unknown document" do index = Index.add_field(Index.new(), "bio") document = %{ "id" => 10, "bio" => Faker.Lorem.paragraph() } assert index = Index.add_documents(index, [document]) assert %Index{documents_size: 1} = index assert %Index{documents_size: 1} = Index.remove_documents(index, [11]) end test "update existing document" do index = Index.add_field(Index.new(), "bio") document = %{ "id" => 10, "bio" => Faker.Lorem.paragraph() } index = Index.add_documents(index, [document]) assert %Index{documents_size: 1} = index updated_document = %{document | "bio" => Faker.Lorem.paragraph()} assert %Index{documents_size: 1} = Index.update_documents(index, [updated_document]) end test "search for a document" do index = Index.add_field(Index.new(), "bio") document = %{ "id" => 10, "bio" => "foo" } index = Index.add_documents(index, [document]) assert Index.search(index, "foo") |> Enum.count() == 1 updated_document = %{document | "bio" => "bar"} index = Index.update_documents(index, [updated_document]) assert Index.search(index, "bar") |> Enum.count() == 1 assert Index.search(index, "foo") |> Enum.empty?() end test "allows the use of multiple, different pipelines for searching and indexing" do index = Index.add_field(Index.new(), "info") callback = fn %Token{token: token} -> tokens = [token] case token == "foo" do false -> tokens true -> ~w[bar baz barry] ++ tokens end end query_pipeline = Pipeline.new([callback]) field = index |> Index.get_field("info") |> Field.set_query_pipeline(query_pipeline) index = Index.update_field(index, "info", field) index = index |> Index.add_documents([ %{"id" => "a", "info" => "Barry had a beer with Fred in the bar"}, %{"id" => "b", "info" => "the bar is empty"} ]) results = Index.search(index, %{ "query" => %{ "match" => %{"info" => "foo"} } }) assert Enum.count(results) == 2 assert [%{score: score_1}, %{score: score_2}] = results assert score_2 < score_1 results = Index.search(index, %{ "query" => %{ "match" => %{"info" => "fred"} } }) assert Enum.count(results) == 1 end end end ================================================ FILE: test/deserializer_test.exs ================================================ defmodule Elasticlunr.DeserializerTest do use ExUnit.Case alias Elasticlunr.{Deserializer, Index} test "deserialize index" do data = [ "settings#name:index|ref:id|pipeline:", "db#name:elasticlunr_index|options:compressed,named_table,set,public", "field#name:id|pipeline:Elixir.Elasticlunr.Index.IdPipeline|store_documents:false|store_positions:false" ] index = to_stream(data) |> Deserializer.deserialize() assert %Index{name: "index"} = index end defp to_stream(data) do Stream.iterate(0, &(&1 + 1)) |> Stream.map(&Enum.at(data, &1)) |> Stream.take(Enum.count(data)) end end ================================================ FILE: test/dsl_test.exs ================================================ defmodule Elasticlunr.DslTest do use ExUnit.Case alias Elasticlunr.{Index, Pipeline, Token} alias Elasticlunr.Dsl.{BoolQuery, MatchAllQuery, MatchQuery, NotQuery, TermsQuery} alias Elasticlunr.Dsl.QueryRepository setup context do callback = fn %Token{} = token -> token str -> str |> String.split(" ") |> String.downcase() |> Enum.map(&Token.new(&1)) end pipeline = Pipeline.new([callback]) index = Index.new() |> Index.add_field("content", pipeline: pipeline) |> Index.add_documents([ %{"id" => 1, "content" => "The quick fox jumped over the lazy dog"}, %{ "id" => 2, "content" => "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Maecenas viverra enim non purus rutrum porta ut non urna. Nullam eu ante eget nisi laoreet pretium. Curabitur varius velit vel viverra facilisis. Pellentesque et condimentum mauris. Quisque faucibus varius interdum. Fusce cursus pretium tempus. Ut gravida tortor et mi dignissim sagittis. Aliquam ullamcorper dignissim arcu sollicitudin fermentum. Nunc elementum tortor ex, sit amet posuere lectus accumsan quis. Vivamus sit amet eros blandit, sagittis quam at, vulputate felis. Ut faucibus pretium feugiat. Fusce diam felis, euismod ac tellus id, blandit venenatis dolor. Nullam porttitor suscipit diam, a feugiat dui pharetra at." }, %{"id" => 3, "content" => "Lorem dog"}, %{ "id" => 4, "content" => "livebook is elixir's own jupyter. it's a very impressive impression." }, %{ "id" => 5, "content" => "there are lots of contributors to the elixir project and many cool projects using elixir, ex. livebook, elixir_nx and so on" } ]) Map.put(context, :index, index) end describe "match_all" do test "parses correctly" do assert %MatchAllQuery{boost: 2.5} = QueryRepository.parse("match_all", %{"boost" => 2.5}) end test "correctly operates match_all query", %{index: index} do query = MatchAllQuery.new() assert result = MatchAllQuery.score(query, index, []) assert Enum.count(result) == 5 for %{score: score} <- result do assert score == 1 end end end describe "terms" do test "parses correctly" do assert %MatchAllQuery{} = QueryRepository.parse("terms", %{}) assert %TermsQuery{field: "name", terms: ["nelson"]} = QueryRepository.parse("terms", %{"name" => "nelson"}) assert %TermsQuery{field: "name", terms: ["kim"]} = QueryRepository.parse("terms", %{ "name" => %{"value" => "kim"} }) assert %BoolQuery{ should: [ %TermsQuery{field: "country", terms: ["us"], boost: 1}, %TermsQuery{field: "name", terms: ["john"], boost: 1} ] } = QueryRepository.parse("terms", %{"name" => "john", "country" => "us"}) end test "performs base functionality", %{index: index} do query = TermsQuery.new( field: "content", terms: ["fox"] ) assert result = TermsQuery.score(query, index, []) assert Enum.count(result) == 1 assert [%{ref: 1}] = result end test "boost", %{index: index} do non_boost_query = TermsQuery.new( field: "content", terms: ["fox"] ) boost_query = TermsQuery.new( field: "content", terms: ["fox"], boost: 2 ) assert boost_result = TermsQuery.score(boost_query, index, []) assert non_boost_result = TermsQuery.score(non_boost_query, index, []) assert Enum.count(boost_result) == Enum.count(non_boost_result) assert [%{score: score_1}] = boost_result assert [%{score: score_2}] = non_boost_result assert score_1 == score_2 * 2 end end describe "bool" do test "parses correctly" do assert %BoolQuery{must: %TermsQuery{field: "country", terms: ["us"]}} = QueryRepository.parse("bool", %{"must" => %{"terms" => %{"country" => "us"}}}) assert %BoolQuery{ minimum_should_match: 0, must: %TermsQuery{field: "country", terms: ["us"]}, must_not: %TermsQuery{field: "gender", terms: ["male"]}, filter: [%MatchQuery{field: "balance", query: 1000}], should: [%MatchQuery{field: "account_type", query: "savings"}] } = QueryRepository.parse("bool", %{ "must" => %{"terms" => %{"country" => "us"}}, "must_not" => %{"terms" => %{"gender" => "male"}}, "filter" => %{"match" => %{"balance" => 1000}}, "should" => %{"match" => %{"account_type" => "savings"}} }) end test "filters via must functionality", %{index: index} do query = BoolQuery.new( must: TermsQuery.new(field: "content", terms: ["lorem"]), should: [ TermsQuery.new(field: "content", terms: ["dog"]) ] ) assert BoolQuery.score(query, index, []) |> Enum.count() == 1 end test "filters via must_not functionality", %{index: index} do query = BoolQuery.new( must: TermsQuery.new(field: "content", terms: ["lorem"]), must_not: TermsQuery.new(field: "content", terms: ["ipsum"]), should: [ TermsQuery.new(field: "content", terms: ["dog"]) ] ) refute BoolQuery.score(query, index, []) |> Enum.empty?() end test "check if document has positions before trying to acess it", %{index: index} do assert Index.search(index, "me") |> Enum.empty?() end end describe "match" do test "parses correctly" do assert %MatchAllQuery{boost: 1} = QueryRepository.parse("match", %{}) assert %MatchQuery{field: "country", query: "us"} = QueryRepository.parse("match", %{"country" => "us"}) assert %MatchQuery{field: "country", query: "us", operator: "and"} = QueryRepository.parse("match", %{ "country" => %{"query" => "us", "operator" => "and"} }) assert %BoolQuery{ should: [ %MatchQuery{field: "city", query: "arizona"}, %MatchQuery{field: "country", query: "us"} ] } = QueryRepository.parse("match", %{ "city" => "arizona", "country" => "us" }) end test "performs base functionality", %{index: index} do query = MatchQuery.new(field: "content", query: "brown fox") assert results = MatchQuery.score(query, index, []) assert Enum.count(results) == 1 assert [%{ref: 1}] = results end test "honours minimum_should_match", %{index: index} do query = MatchQuery.new(field: "content", query: "brown fox quick", minimum_should_match: 2) assert results = MatchQuery.score(query, index, []) assert Enum.count(results) == 1 assert [%{ref: 1}] = results end test "honours and operator", %{index: index} do query = MatchQuery.new( field: "content", query: "fox quick", operator: "and" ) assert results = MatchQuery.score(query, index, []) assert Enum.count(results) == 1 assert [%{ref: 1}] = results end end describe "not" do test "parses correctly" do assert %NotQuery{inner_query: %BoolQuery{}} = QueryRepository.parse("not", %{"bool" => %{}}) assert %NotQuery{inner_query: %BoolQuery{}} = QueryRepository.parse("not", %{ "bool" => %{ "should" => [ %{"match" => %{"name" => "john"}} ] } }) end test "applies inner query", %{index: index} do query = NotQuery.new( BoolQuery.new( should: [ MatchQuery.new(field: "content", query: "quick"), MatchQuery.new(field: "content", query: "lorem") ] ) ) assert results = NotQuery.score(query, index, []) assert Enum.count(results) == 2 end end end ================================================ FILE: test/manager/index_manager_test.exs ================================================ defmodule Elasticlunr.IndexManagerTest do use ExUnit.Case alias Elasticlunr.{Index, IndexManager} describe "working with index manager" do test "saves an index" do index = Index.new() assert {:ok, ^index} = IndexManager.save(index) end test "fails when saving duplicate index" do index = Index.new() assert {:ok, ^index} = IndexManager.save(index) assert {:error, {:already_started, _}} = IndexManager.save(index) end test "updates existing index" do index = Index.new() assert {:ok, ^index} = IndexManager.save(index) assert ^index = IndexManager.update(index) end test "fails update action for non-existent index" do index = Index.new() assert :not_running = IndexManager.update(index) end test "removes an index" do index = Index.new() assert {:ok, ^index} = IndexManager.save(index) assert :ok = IndexManager.remove(index) assert :not_running = IndexManager.get(index.name) end test "fails to remove a non-existent index" do index = Index.new() assert :not_running = IndexManager.remove(index) end test "return a running instance" do index = Index.new() {:ok, _} = IndexManager.save(index) assert ^index = IndexManager.get(index.name) assert :not_running = IndexManager.get("unknown-index") end end end ================================================ FILE: test/pipeline/stemmer_test.exs ================================================ defmodule Elasticlunr.Pipeline.StemmerTest do use ExUnit.Case alias Elasticlunr.Token alias Elasticlunr.{Pipeline, Pipeline.Stemmer} import Elasticlunr.Test.Fixture describe "running stemmer against tokens" do test "works as expected" do stemmer_fixture() |> Enum.each(fn {word, stemmed_word} -> token = Token.new(word) assert Stemmer.call(token) == Token.new(stemmed_word) end) end test "is a default runner for default pipeline" do assert Pipeline.default_runners() |> Enum.any?(fn Stemmer -> true _ -> false end) end end end ================================================ FILE: test/pipeline/stop_word_filter_test.exs ================================================ defmodule Elasticlunr.Pipeline.StopWordFilterTest do @moduledoc false use ExUnit.Case alias Elasticlunr.{Pipeline, Token} alias Elasticlunr.Pipeline.StopWordFilter describe "running stop_word_filter against tokens" do test "is a default runner for default pipeline" do assert Pipeline.default_runners() |> Enum.any?(fn StopWordFilter -> true _ -> false end) end test "removes stop words" do stop_words = ~w[the and but than when] assert [] = stop_words |> Enum.map(&Token.new/1) |> Enum.reject(&is_nil(StopWordFilter.call(&1))) end end end ================================================ FILE: test/pipeline/trimmer_test.exs ================================================ defmodule Elasticlunr.Pipeline.TimmerTest do @moduledoc false use ExUnit.Case alias Elasticlunr.{Pipeline, Token} alias Elasticlunr.Pipeline.Trimmer describe "running trimmer against tokens" do test "is a default runner for default pipeline" do assert Pipeline.default_runners() |> Enum.any?(fn Trimmer -> true _ -> false end) end test "passes through latin characters" do assert %Token{token: "hello"} = Token.new("hello") end test "removes leading and trailing punctuation" do assert %Token{token: "hello"} = Token.new("hello.") |> Trimmer.call() assert %Token{token: "it's"} = Token.new("it's") |> Trimmer.call() assert %Token{token: "james"} = Token.new("james'") |> Trimmer.call() assert %Token{token: "stop"} = Token.new("stop!'") |> Trimmer.call() assert %Token{token: "first"} = Token.new("first'") |> Trimmer.call() assert %Token{token: ""} = Token.new("") |> Trimmer.call() assert %Token{token: "tag"} = Token.new("[tag]") |> Trimmer.call() assert %Token{token: "tag"} = Token.new("[[[tag]]]") |> Trimmer.call() assert %Token{token: "hello"} = Token.new("[[!@#@!hello]]]}}}") |> Trimmer.call() assert %Token{token: "hello"} = Token.new("~!@@@hello***()()()]]") |> Trimmer.call() end end end ================================================ FILE: test/pipeline_test.exs ================================================ defmodule Elasticlunr.PipelineTest do use ExUnit.Case alias Elasticlunr.{Pipeline, Token, Tokenizer} alias Elasticlunr.Pipeline.{Stemmer, StopWordFilter, Trimmer} describe "creating pipeline" do test "adds a runner to the queue" do assert pipeline = Pipeline.new([]) assert %Pipeline{callback: []} = pipeline assert %Pipeline{callback: [Trimmer]} = Pipeline.add(pipeline, Trimmer) end test "ignores duplicate runner in the queue" do pipeline = Pipeline.new([]) assert %Pipeline{callback: []} = pipeline assert %Pipeline{callback: [Trimmer]} = Pipeline.add(pipeline, Trimmer) assert %Pipeline{callback: [Trimmer]} = Pipeline.add(pipeline, Trimmer) end end describe "updating pipeline" do test "removes runner from queue" do pipeline = Pipeline.new([Stemmer, Trimmer]) assert %Pipeline{callback: [Stemmer, Trimmer]} = pipeline assert %Pipeline{callback: [Stemmer]} = Pipeline.remove(pipeline, Trimmer) end test "inserts runner at position" do pipeline = Pipeline.new([Stemmer, Trimmer]) assert %Pipeline{callback: [Stemmer, Trimmer]} = pipeline assert pipeline = Pipeline.insert_before(pipeline, StopWordFilter, Trimmer) assert %Pipeline{callback: [Stemmer, StopWordFilter, Trimmer]} = pipeline assert pipeline = Pipeline.remove(pipeline, Stemmer) assert %Pipeline{callback: [StopWordFilter, Trimmer]} = pipeline assert %Pipeline{callback: [StopWordFilter, Stemmer, Trimmer]} = Pipeline.insert_after(pipeline, Stemmer, StopWordFilter) end end describe "running pipeline" do test "executes runners in the queue" do pipeline = Pipeline.new(Pipeline.default_runners()) tokens = Tokenizer.tokenize("consignment worlds") assert [ %Token{token: "consign"}, %Token{token: "world"} ] = Pipeline.run(pipeline, tokens) end test "runs a custom function" do pipeline = Pipeline.new([& &1]) tokens = Tokenizer.tokenize("consignment worlds") assert ^tokens = Pipeline.run(pipeline, tokens) end end end ================================================ FILE: test/serializer_test.exs ================================================ defmodule Elasticlunr.SerializerTest do use ExUnit.Case alias Elasticlunr.{Index, Serializer} test "serialize index without documents" do index = Index.new(name: "index") structure = [ "settings#name:index|ref:id|pipeline:", "db#name:elasticlunr_index|options:compressed,named_table,ordered_set,public", "field#name:id|pipeline:Elixir.Elasticlunr.Index.IdPipeline|store_documents:false|store_positions:false" ] data = Serializer.serialize(index) |> Enum.into([]) assert structure == data end test "serialize index with documents" do index = Index.new(name: "index") |> Index.add_field("body") |> Index.add_documents([%{"id" => 1, "body" => "hello world"}]) structure = [ "settings#name:index|ref:id|pipeline:", "db#name:elasticlunr_index|options:compressed,named_table,ordered_set,public", "field#name:body|pipeline:|store_documents:true|store_positions:true", "field#name:id|pipeline:Elixir.Elasticlunr.Index.IdPipeline|store_documents:false|store_positions:false" ] data = Serializer.serialize(index) |> Enum.into([]) assert structure == data end end ================================================ FILE: test/storage/disk_test.exs ================================================ defmodule Elasticlunr.Storage.DiskTest do use ExUnit.Case alias Elasticlunr.Index alias Elasticlunr.Pipeline alias Elasticlunr.Storage.Disk @otp_app :elasticlunr setup do storage_path = Path.join(__DIR__, "../../storage") Application.put_env(@otp_app, Disk, directory: storage_path) on_exit(fn -> Enum.each(Disk.files(), &File.rm!/1) Application.delete_env(@otp_app, Disk) end) end defp fixture_storage(_context) do opts = Application.get_env(@otp_app, Disk) storage_path = Path.join(__DIR__, "../support/fixture") Application.put_env(@otp_app, Disk, directory: storage_path) on_exit(fn -> Application.put_env(@otp_app, Disk, opts) end) end describe "serializing an index" do test "writes to disk" do index = Index.new() options = Application.get_env(@otp_app, Disk) file = Path.join(options[:directory], "#{index.name}.index") assert :ok = Disk.write(index) assert File.exists?(file) assert {:ok, %File.Stat{size: size}} = File.stat(file) assert size > 0 end end describe "unserializing an index" do test "reads from disk" do pipeline = Pipeline.new(Pipeline.default_runners()) document = %{ "id" => Faker.UUID.v4(), "last_name" => Faker.Person.last_name(), "first_name" => Faker.Person.first_name() } index = Index.new(pipeline: pipeline) |> Index.add_field("first_name") |> Index.add_field("last_name") |> Index.add_documents([document]) :ok = Disk.write(index) assert index == Disk.read(index.name) end end describe "getting all serialized indexes" do setup [:fixture_storage] test "loads and deserialize indexes" do assert [%Index{name: "users"} = index] = Disk.load_all() |> Enum.to_list() assert [_] = Index.search(index, "rose") end end describe "deleting index from storage" do test "works successfully" do index = Index.new() options = Application.get_env(@otp_app, Disk) file = Path.join(options[:directory], "#{index.name}.index") :ok = Disk.write(index) assert :ok = Disk.delete(index.name) refute File.exists?(file) end test "fails for missing index" do assert {:error, :enoent} = Disk.delete("missing") end end end ================================================ FILE: test/storage_test.exs ================================================ defmodule Elasticlunr.StorageTest do use ExUnit.Case alias Elasticlunr.{Index, Storage} alias Elasticlunr.Storage.{Blackhole, Mock} import Mox setup do Mox.stub_with(Mock, Blackhole) Application.put_env(:elasticlunr, :storage, Mock) on_exit(fn -> Application.delete_env(:elasticlunr, :storage) end) end test "preload/0" do index = Index.new() expect(Mock, :load_all, fn -> [index] end) assert [^index] = Storage.all() end test "write/1" do index = Index.new() expect(Mock, :write, 2, fn ^index -> :ok %{name: nil} -> {:error, "invalid index"} end) assert :ok = Storage.write(index) assert {:error, "invalid index"} = Storage.write(Index.new(name: nil)) end test "read/1" do expect(Mock, :read, 2, fn "missing" -> {:error, "missing index"} name -> Index.new(name: name) end) assert {:error, "missing index"} = Storage.read("missing") assert %Index{name: "users"} = Storage.read("users") end test "delete/1" do expect(Mock, :delete, 2, fn "unknown-index" -> :error _ -> :ok end) assert :error = Storage.delete("unknown-index") assert :ok = Storage.delete("users") end end ================================================ FILE: test/support/fixture/fixture.ex ================================================ defmodule Elasticlunr.Test.Fixture do @moduledoc false @spec stemmer_fixture() :: map() def stemmer_fixture do with path <- Path.join(__DIR__, "./stemmer_fixture.json"), {:ok, content} <- File.read(path), {:ok, map} <- Jason.decode(content) do map end end end ================================================ FILE: test/support/fixture/stemmer_fixture.json ================================================ { "consign": "consign", "consigned": "consign", "consigning": "consign", "consignment": "consign", "consist": "consist", "consisted": "consist", "consistency": "consist", "consistent": "consist", "consistently": "consist", "consisting": "consist", "consists": "consist", "consolation": "consol", "consolations": "consol", "consolatory": "consolatori", "console": "consol", "consoled": "consol", "consoles": "consol", "consolidate": "consolid", "consolidated": "consolid", "consolidating": "consolid", "consoling": "consol", "consols": "consol", "consonant": "conson", "consort": "consort", "consorted": "consort", "consorting": "consort", "conspicuous": "conspicu", "conspicuously": "conspicu", "conspiracy": "conspiraci", "conspirator": "conspir", "conspirators": "conspir", "conspire": "conspir", "conspired": "conspir", "conspiring": "conspir", "constable": "constabl", "constables": "constabl", "constance": "constanc", "constancy": "constanc", "constant": "constant", "knack": "knack", "knackeries": "knackeri", "knacks": "knack", "knag": "knag", "knave": "knave", "knaves": "knave", "knavish": "knavish", "kneaded": "knead", "kneading": "knead", "knee": "knee", "kneel": "kneel", "kneeled": "kneel", "kneeling": "kneel", "kneels": "kneel", "knees": "knee", "knell": "knell", "knelt": "knelt", "knew": "knew", "knick": "knick", "knif": "knif", "knife": "knife", "knight": "knight", "knights": "knight", "knit": "knit", "knits": "knit", "knitted": "knit", "knitting": "knit", "knives": "knive", "knob": "knob", "knobs": "knob", "knock": "knock", "knocked": "knock", "knocker": "knocker", "knockers": "knocker", "knocking": "knock", "knocks": "knock", "knopp": "knopp", "knot": "knot", "knots": "knot", "lay": "lay", "try": "tri" } ================================================ FILE: test/test_helper.exs ================================================ ExUnit.start() Faker.start() Mox.defmock(Elasticlunr.Storage.Mock, for: Elasticlunr.Storage.Provider) ================================================ FILE: test/tokenizer_test.exs ================================================ defmodule Elasticlunr.TokenizerTest do use ExUnit.Case alias Elasticlunr.{Token, Tokenizer} describe "tokenizing string" do test "splits to list of tokens" do str = "the man came home" tokenized_str = [ Token.new("the", %{start: 0, end: 3}), Token.new("man", %{start: 4, end: 3}), Token.new("came", %{start: 8, end: 4}), Token.new("home", %{start: 13, end: 4}) ] assert ^tokenized_str = Tokenizer.tokenize(str) end test "downcase tokens" do assert ~w[foo bar] = Tokenizer.tokenize("FOO BAR") |> Enum.map(& &1.token) end test "removes whitespace and hyphens" do assert ~w[foo bar] = Tokenizer.tokenize(" FOO BAR ") |> Enum.map(& &1.token) assert ~w[take the new york san francisco flight] = Tokenizer.tokenize("take the New York-San Francisco flight") |> Enum.map(& &1.token) assert ~w[solve for a b] = Tokenizer.tokenize("Solve for A - B") |> Enum.map(& &1.token) end test "with custom separator" do assert ~w[hello world i love] = Tokenizer.tokenize("hello/world/I/love", ~r/\/+/) |> Enum.map(& &1.token) assert ~w[hello world i love] = Tokenizer.tokenize("hello\\world\\I\\love", ~r/[\\]+/) |> Enum.map(& &1.token) assert ~w[hello world apple pie] = Tokenizer.tokenize("hello/world/%%%apple%pie", ~r/[\/\%]+/) |> Enum.map(& &1.token) end end end