Repository: ankane/searchkick Branch: master Commit: 1009d03107a2 Files: 114 Total size: 425.2 KB Directory structure: gitextract_mcchxu51/ ├── .github/ │ ├── ISSUE_TEMPLATE/ │ │ ├── bug_report.md │ │ ├── config.yml │ │ └── feature_request.md │ ├── pull_request_template.md │ └── workflows/ │ └── build.yml ├── .gitignore ├── CHANGELOG.md ├── Gemfile ├── LICENSE.txt ├── README.md ├── Rakefile ├── benchmark/ │ ├── Gemfile │ ├── index.rb │ ├── relation.rb │ └── search.rb ├── examples/ │ ├── Gemfile │ ├── hybrid.rb │ └── semantic.rb ├── gemfiles/ │ ├── activerecord72.gemfile │ ├── activerecord80.gemfile │ ├── mongoid8.gemfile │ ├── mongoid9.gemfile │ ├── opensearch2.gemfile │ └── opensearch3.gemfile ├── lib/ │ ├── searchkick/ │ │ ├── bulk_reindex_job.rb │ │ ├── controller_runtime.rb │ │ ├── hash_wrapper.rb │ │ ├── index.rb │ │ ├── index_cache.rb │ │ ├── index_options.rb │ │ ├── indexer.rb │ │ ├── log_subscriber.rb │ │ ├── middleware.rb │ │ ├── model.rb │ │ ├── multi_search.rb │ │ ├── process_batch_job.rb │ │ ├── process_queue_job.rb │ │ ├── query.rb │ │ ├── railtie.rb │ │ ├── record_data.rb │ │ ├── record_indexer.rb │ │ ├── reindex_queue.rb │ │ ├── reindex_v2_job.rb │ │ ├── relation.rb │ │ ├── relation_indexer.rb │ │ ├── reranking.rb │ │ ├── results.rb │ │ ├── script.rb │ │ ├── version.rb │ │ └── where.rb │ ├── searchkick.rb │ └── tasks/ │ └── searchkick.rake ├── searchkick.gemspec └── test/ ├── aggs_test.rb ├── boost_test.rb ├── callbacks_test.rb ├── conversions_test.rb ├── default_scope_test.rb ├── exclude_test.rb ├── geo_shape_test.rb ├── highlight_test.rb ├── hybrid_test.rb ├── index_cache_test.rb ├── index_options_test.rb ├── index_test.rb ├── inheritance_test.rb ├── knn_test.rb ├── language_test.rb ├── load_test.rb ├── log_subscriber_test.rb ├── marshal_test.rb ├── match_test.rb ├── misspellings_test.rb ├── models/ │ ├── animal.rb │ ├── artist.rb │ ├── band.rb │ ├── product.rb │ ├── region.rb │ ├── sku.rb │ ├── song.rb │ ├── speaker.rb │ └── store.rb ├── multi_indices_test.rb ├── multi_search_test.rb ├── multi_tenancy_test.rb ├── notifications_test.rb ├── order_test.rb ├── pagination_test.rb ├── parameters_test.rb ├── partial_match_test.rb ├── partial_reindex_test.rb ├── query_test.rb ├── reindex_test.rb ├── reindex_v2_job_test.rb ├── relation_test.rb ├── results_test.rb ├── routing_test.rb ├── scroll_test.rb ├── search_synonyms_test.rb ├── search_test.rb ├── select_test.rb ├── should_index_test.rb ├── similar_test.rb ├── suggest_test.rb ├── support/ │ ├── activerecord.rb │ ├── apartment.rb │ ├── helpers.rb │ ├── kaminari.yml │ ├── mongoid.rb │ └── redis.rb ├── synonyms_test.rb ├── test_helper.rb ├── unscope_test.rb └── where_test.rb ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/ISSUE_TEMPLATE/bug_report.md ================================================ --- name: Bug report about: Create a report to help us improve title: '' labels: bug report assignees: '' --- **First** Search existing issues to see if it’s been reported and make sure you’re on the latest version. **Describe the bug** A clear and concise description of the bug. **To reproduce** Use this code to reproduce when possible: ```ruby require "bundler/inline" gemfile do source "https://rubygems.org" gem "activerecord", require: "active_record" gem "activejob", require: "active_job" gem "sqlite3" gem "searchkick", git: "https://github.com/ankane/searchkick.git" # uncomment one # gem "elasticsearch" # gem "opensearch-ruby" end puts "Searchkick version: #{Searchkick::VERSION}" puts "Server version: #{Searchkick.server_version}" ActiveRecord::Base.establish_connection adapter: "sqlite3", database: ":memory:" ActiveJob::Base.queue_adapter = :inline ActiveRecord::Schema.define do create_table :products do |t| t.string :name end end class Product < ActiveRecord::Base searchkick end Product.reindex Product.create!(name: "Test") Product.search_index.refresh p Product.search("test", fields: [:name]).response ``` **Additional context** Add any other context. ================================================ FILE: .github/ISSUE_TEMPLATE/config.yml ================================================ blank_issues_enabled: false contact_links: - name: Help url: https://stackoverflow.com/questions/tagged/searchkick about: Ask and answer questions here ================================================ FILE: .github/ISSUE_TEMPLATE/feature_request.md ================================================ --- name: Feature request about: Suggest an idea for this project title: '' labels: feature request assignees: '' --- **First** Search existing issues to see if it’s been discussed. **Is your feature request related to a problem? Please describe.** A clear and concise description of the problem. **Describe the solution you'd like** A clear and concise description of your idea. **Additional context** Add any other context. ================================================ FILE: .github/pull_request_template.md ================================================ Thanks for contributing. You’re awesome! A few things to keep in mind: - Keep changes to a minimum - Follow the existing style - Add one or more tests if possible Finally, replace all this with a description of the changes. ================================================ FILE: .github/workflows/build.yml ================================================ name: build on: [push, pull_request] jobs: build: strategy: fail-fast: false matrix: include: - ruby: "4.0" gemfile: Gemfile elasticsearch: 9 - ruby: 3.3 gemfile: gemfiles/activerecord80.gemfile elasticsearch: 9.0.0 - ruby: 3.2 gemfile: gemfiles/activerecord72.gemfile elasticsearch: 8 - ruby: 3.4 gemfile: gemfiles/opensearch3.gemfile opensearch: 3 - ruby: 3.3 gemfile: gemfiles/opensearch2.gemfile opensearch: 2 - ruby: 3.4 gemfile: gemfiles/mongoid9.gemfile elasticsearch: 9 mongodb: true - ruby: 3.2 gemfile: gemfiles/mongoid8.gemfile # TODO fix plugin installation for earlier versions elasticsearch: 8.5.0 mongodb: true runs-on: ubuntu-latest env: BUNDLE_GEMFILE: ${{ matrix.gemfile }} steps: - uses: actions/checkout@v6 - uses: ruby/setup-ruby@v1 with: ruby-version: ${{ matrix.ruby }} bundler-cache: true - run: bundle update - uses: actions/cache@v5 if: ${{ matrix.elasticsearch }} with: path: ~/elasticsearch key: ${{ runner.os }}-elasticsearch-${{ matrix.elasticsearch }} - uses: ankane/setup-elasticsearch@v1 if: ${{ matrix.elasticsearch }} with: elasticsearch-version: ${{ matrix.elasticsearch }} plugins: | analysis-kuromoji analysis-smartcn analysis-stempel analysis-ukrainian - uses: actions/cache@v5 if: ${{ matrix.opensearch }} with: path: ~/opensearch key: ${{ runner.os }}-opensearch-${{ matrix.opensearch }} - uses: ankane/setup-opensearch@v1 if: ${{ matrix.opensearch }} with: opensearch-version: ${{ matrix.opensearch }} plugins: | analysis-kuromoji analysis-smartcn analysis-stempel analysis-ukrainian - uses: ankane/setup-mongodb@v1 if: ${{ matrix.mongodb }} - run: | sudo apt-get update sudo apt-get install redis-server sudo systemctl start redis-server - run: bundle exec rake test ================================================ FILE: .gitignore ================================================ *.gem *.rbc .bundle .config .yardoc *.lock InstalledFiles _yardoc coverage doc/ lib/bundler/man pkg rdoc spec/reports test/tmp test/version_tmp tmp *.log .DS_Store .ruby-* .idea/ *.sqlite3 ================================================ FILE: CHANGELOG.md ================================================ ## 6.1.1 (unreleased) - Fixed smart aggs behavior with `_and` ## 6.1.0 (2026-02-18) - Added `per` method - Fixed error with `aggs` method and non-hash arguments - Fixed smart aggs behavior when multiple `where` calls ## 6.0.3 (2026-01-06) - Fixed `inspect` method for `Relation` ## 6.0.2 (2025-10-24) - Fixed `as_json` method for `HashWrapper` ## 6.0.1 (2025-10-24) - Fixed `to_json` method for `HashWrapper` ## 6.0.0 (2025-10-19) - Added new query builder API (similar to Active Record) - Added `conversions_v2` option - Added `job_options` option - Added `parent_job` option - Added `opaque_id` option - Added `callback_options` option - Added `ignore_missing` option for partial reindex - Added support for `exists: false` - Added `quantization` to `knn` option for Elasticsearch - Changed async reindex to use ranges for numeric primary keys with Active Record - Fixed error with `case_sensitive` option and synonyms - Removed default quantization for `knn` option for Elasticsearch 8.14+ - Removed `results` method (use `to_a` instead) - Removed `execute` option and method (no longer needed) - Removed `options` method (use individual methods instead) - Removed dependency on Hashie - Deprecated `conversions` option in favor of `conversions_v2` - Dropped support for Elasticsearch 7 and OpenSearch 1 - Dropped support for Active Record < 7.2 - Dropped support for Redis < 6.2 ## 5.5.2 (2025-05-20) - Fixed `scope` option for partial reindex ## 5.5.1 (2025-04-24) - Added support for `elasticsearch` 9 gem ## 5.5.0 (2025-04-03) - Added `m` and `ef_construction` to `knn` index option - Added `ef_search` to `knn` search option - Fixed exact cosine distance for OpenSearch 2.19+ - Dropped support for Ruby < 3.2 and Active Record < 7.1 - Dropped support for Mongoid < 8 ## 5.4.0 (2024-09-04) - Added `knn` option - Added `rrf` method - Added experimental support for scripting to `where` option - Added warning for `exists` with non-`true` values - Added warning for full reindex and `:queue` mode - Fixed `per_page` method when paginating beyond `max_result_window` - Dropped support for Ruby < 3.1 ## 5.3.1 (2023-11-28) - Fixed error with misspellings below and failed queries ## 5.3.0 (2023-07-02) - Fixed error with `cutoff_frequency` - Dropped support for Ruby < 3 and Active Record < 6.1 - Dropped support for Mongoid < 7 ## 5.2.4 (2023-05-11) - Fixed error with non-string routing and `:async` mode ## 5.2.3 (2023-04-12) - Fixed error with missing records and multiple models ## 5.2.2 (2023-04-01) - Fixed `total_docs` method - Fixed deprecation warning with Active Support 7.1 ## 5.2.1 (2023-02-21) - Added support for `redis-client` gem ## 5.2.0 (2023-02-08) - Added model name to warning about missing records - Fixed unnecessary data loading when reindexing relations with `:async` and `:queue` modes ## 5.1.2 (2023-01-29) - Fixed error with missing point in time ## 5.1.1 (2022-12-05) - Added support for strings for `offset` and `per_page` ## 5.1.0 (2022-10-12) - Added support for fractional search timeout - Fixed search timeout with `elasticsearch` 8+ and `opensearch-ruby` gems - Fixed search timeout not applying to `multi_search` ## 5.0.5 (2022-10-09) - Added `model` method to `Searchkick::Relation` - Fixed deprecation warning with `redis` gem - Fixed `respond_to?` method on relation loading relation - Fixed `Relation loaded` error for non-mutating methods on relation ## 5.0.4 (2022-06-16) - Added `max_result_window` option - Improved error message for unsupported versions of Elasticsearch ## 5.0.3 (2022-03-13) - Fixed context for index name for inherited models ## 5.0.2 (2022-03-03) - Fixed index name for inherited models ## 5.0.1 (2022-02-27) - Prefer `mode: :async` over `async: true` for full reindex - Fixed instance method overriding with concerns ## 5.0.0 (2022-02-21) - Searches now use lazy loading (similar to Active Record) - Added `unscope` option to better support working with default scopes - Added support for `:async` and `:queue` modes for `reindex` on relation - Added basic protection from unfiltered parameters to `where` option - Added `models` option to `similar` method - Changed async full reindex to fetch ids instead of using ranges for numeric primary keys with Active Record - Changed `searchkick_index_options` to return symbol keys (instead of mix of strings and symbols) - Changed non-anchored regular expressions to match expected results (previously warned) - Changed record reindex to return `true` to match model and relation reindex - Updated async reindex job to call `search_import` for nested associations - Fixed removing records when `should_index?` is `false` when `reindex` called on relation - Fixed issue with `merge_mappings` for fields that use `searchkick` options - Raise error when `search` called on relations - Raise `ArgumentError` (instead of warning) for invalid regular expression modifiers - Raise `ArgumentError` instead of `RuntimeError` for unknown operators - Removed mapping of `id` to `_id` with `order` option (not supported in Elasticsearch 8) - Removed `wordnet` option (no longer worked) - Removed dependency on `elasticsearch` gem (can use `elasticsearch` or `opensearch-ruby`) - Dropped support for Elasticsearch 6 - Dropped support for Ruby < 2.6 and Active Record < 5.2 - Dropped support for NoBrainer and Cequel - Dropped support for `faraday_middleware-aws-signers-v4` (use `faraday_middleware-aws-sigv4` instead) ## 4.6.3 (2021-11-19) - Added support for reloadable synonyms for OpenSearch - Added experimental support for `opensearch-ruby` gem - Removed `elasticsearch-xpack` dependency for reloadable synonyms ## 4.6.2 (2021-11-15) - Added support for beginless ranges to `where` option - Fixed `like` and `ilike` with `+` character - Fixed warning about accessing system indices when no model or index specified ## 4.6.1 (2021-09-25) - Added `ilike` operator for Elasticsearch 7.10+ - Fixed missing methods with `multi_search` ## 4.6.0 (2021-08-22) - Added support for case-insensitive regular expressions with Elasticsearch 7.10+ - Added support for `OPENSEARCH_URL` - Fixed error with `debug` option ## 4.5.2 (2021-08-05) - Fixed error with reindex queue - Fixed error with `model_name` method with multiple models - Fixed error with `debug` option with elasticsearch-ruby 7.14 ## 4.5.1 (2021-08-03) - Improved performance of reindex queue ## 4.5.0 (2021-06-07) - Added experimental support for OpenSearch - Added support for synonyms in Japanese ## 4.4.4 (2021-03-12) - Fixed `too_long_frame_exception` with `scroll` method - Fixed multi-word emoji tokenization ## 4.4.3 (2021-02-25) - Added support for Hunspell - Fixed warning about accessing system indices ## 4.4.2 (2020-11-23) - Added `missing_records` method to results - Fixed issue with `like` and special characters ## 4.4.1 (2020-06-24) - Added `stem_exclusion` and `stemmer_override` options - Added `with_score` method to search results - Improved error message for `reload_synonyms` with non-OSS version of Elasticsearch - Improved output for reindex rake task ## 4.4.0 (2020-06-17) - Added support for reloadable, multi-word, search time synonyms - Fixed another deprecation warning in Ruby 2.7 ## 4.3.1 (2020-05-13) - Fixed error with `exclude` in certain cases for Elasticsearch 7.7 ## 4.3.0 (2020-02-19) - Fixed `like` queries with `"` character - Better error when invalid parameters passed to `where` ## 4.2.1 (2020-01-27) - Fixed deprecation warnings with Elasticsearch - Fixed deprecation warnings in Ruby 2.7 ## 4.2.0 (2019-12-18) - Added safety check for multiple `Model.reindex` - Added `deep_paging` option - Added request parameters to search notifications and curl representation - Removed curl from search notifications to prevent confusion ## 4.1.1 (2019-11-19) - Added `chinese2` and `korean2` languages - Improved performance of async full reindex - Fixed `searchkick:reindex:all` rake task for Rails 6 ## 4.1.0 (2019-08-01) - Added `like` operator - Added `exists` operator - Added warnings for certain regular expressions - Fixed anchored regular expressions ## 4.0.2 (2019-06-04) - Added block form of `scroll` - Added `clear_scroll` method - Fixed custom mappings ## 4.0.1 (2019-05-30) - Added support for scroll API - Made type optional for custom mapping for Elasticsearch 6 - Fixed error when suggestions empty - Fixed `models` option with inheritance ## 4.0.0 (2019-04-11) - Added support for Elasticsearch 7 - Added `models` option Breaking changes - Removed support for Elasticsearch 5 - Removed support for multi-word synonyms (they no longer work with shingles) - Removed support for Active Record < 5 ## 3.1.3 (2019-04-11) - Added support for endless ranges - Added support for routing to `similar` method - Added `prefix` to `where` - Fixed error with elasticsearch-ruby 6.3 - Fixed error with some language stemmers and Elasticsearch 6.5 - Fixed issue with misspellings below and body block ## 3.1.2 (2018-09-27) - Improved performance of indices boost - Fixed deletes with routing and `async` callbacks - Fixed deletes with routing and `queue` callbacks - Fixed deprecation warnings - Fixed field misspellings for older partial match format ## 3.1.1 (2018-08-09) - Added per-field misspellings - Added `case_sensitive` option - Added `stem` option - Added `total_entries` option - Fixed `exclude` option with match all - Fixed `with_highlights` method ## 3.1.0 (2018-05-12) - Added `:inline` as alias for `true` for `callbacks` and `mode` options - Friendlier error message for bad mapping with partial matches - Warn when records in search index do not exist in database - Easier merging for `merge_mapping` - Fixed `with_hit` and `with_highlights` when records in search index do not exist in database - Fixed error with highlights and match all ## 3.0.3 (2018-04-22) - Added support for pagination with `body` option - Added `boost_by_recency` option - Fixed "Model Search Data" output for `debug` option - Fixed `reindex_status` error - Fixed error with optional operators in Ruby regexp - Fixed deprecation warnings for Elasticsearch 6.2+ ## 3.0.2 (2018-03-26) - Added support for Korean and Vietnamese - Fixed `Unsupported argument type: Symbol` for async partial reindex - Fixed infinite recursion with multi search and misspellings below - Do not raise an error when `id` is indexed ## 3.0.1 (2018-03-14) - Added `scope` option for partial reindex - Added support for Japanese, Polish, and Ukrainian ## 3.0.0 (2018-03-03) - Added support for Chinese - No longer requires fields to query for Elasticsearch 6 - Results can be marshaled by default (unless using `highlight` option) Breaking changes - Removed support for Elasticsearch 2 - Removed support for Active Record < 4.2 and Mongoid < 5 - Types are no longer used - The `_all` field is disabled by default in Elasticsearch 5 - Conversions are not stemmed by default - An `ArgumentError` is raised instead of a warning when options are incompatible with the `body` option - Removed `log` option from `boost_by` - Removed `Model.enable_search_callbacks`, `Model.disable_search_callbacks`, and `Model.search_callbacks?` - Removed `reindex_async` method, as `reindex` now defaults to callbacks mode specified on the model - Removed `async` option from `record.reindex` - Removed `search_hit` method - use `with_hit` instead - Removed `each_with_hit` - use `with_hit.each` instead - Removed `with_details` - use `with_highlights` instead - Bumped default `limit` to 10,000 ## 2.5.0 (2018-02-15) - Try requests 3 times before raising error - Better exception when trying to access results for failed multi-search query - More efficient aggregations with `where` clauses - Added support for `faraday_middleware-aws-sigv4` - Added `credentials` option to `aws_credentials` - Added `modifier` option to `boost_by` - Added `scope_results` option - Added `factor` option to `boost_by_distance` ## 2.4.0 (2017-11-14) - Fixed `similar` for Elasticsearch 6 - Added `inheritance` option - Added `_type` option - Fixed `Must specify fields to search` error when searching `*` ## 2.3.2 (2017-09-08) - Added `_all` and `default_fields` options - Added global `index_prefix` option - Added `wait` option to async reindex - Added `model_includes` option - Added `missing` option for `boost_by` - Raise error for `reindex_status` when Redis not configured - Warn when incompatible options used with `body` option - Fixed bug where `routing` and `type` options were silently ignored with `body` option - Fixed `reindex(async: true)` for non-numeric primary keys in Postgres ## 2.3.1 (2017-07-06) - Added support for `reindex(async: true)` for non-numeric primary keys - Added `conversions_term` option - Added support for passing fields to `suggest` option - Fixed `page_view_entries` for Kaminari ## 2.3.0 (2017-05-06) - Fixed analyzer on dynamically mapped fields - Fixed error with `similar` method and `_all` field - Throw error when fields are needed - Added `queue_name` option - No longer require synonyms to be lowercase ## 2.2.1 (2017-04-16) - Added `avg`, `cardinality`, `max`, `min`, and `sum` aggregations - Added `load: {dumpable: true}` option - Added `index_suffix` option - Accept string for `exclude` option ## 2.2.0 (2017-03-19) - Fixed bug with text values longer than 256 characters and `_all` field - see [#850](https://github.com/ankane/searchkick/issues/850) - Fixed issue with `_all` field in `searchable` - Fixed `exclude` option with `word_start` ## 2.1.1 (2017-01-17) - Fixed duplicate notifications - Added support for `connection_pool` - Added `exclude` option ## 2.1.0 (2017-01-15) - Background reindexing and queues are officially supported - Log updates and deletes ## 2.0.4 (2017-01-15) - Added support for queuing updates [experimental] - Added `refresh_interval` option to `reindex` - Prefer `search_index` over `searchkick_index` ## 2.0.3 (2017-01-12) - Added `async` option to `reindex` [experimental] - Added `misspellings?` method to results ## 2.0.2 (2017-01-08) - Added `retain` option to `reindex` - Added support for attributes in highlight tags - Fixed potentially silent errors in reindex job - Improved syntax for `boost_by_distance` ## 2.0.1 (2016-12-30) - Added `search_hit` and `search_highlights` methods to models - Improved reindex performance ## 2.0.0 (2016-12-28) - Added support for `reindex` on associations Breaking changes - Removed support for Elasticsearch 1 as it reaches [end of life](https://www.elastic.co/support/eol) - Removed facets, legacy options, and legacy methods - Invalid options now throw an `ArgumentError` - The `query` and `json` options have been removed in favor of `body` - The `include` option has been removed in favor of `includes` - The `personalize` option has been removed in favor of `boost_where` - The `partial` option has been removed in favor of `operator` - Renamed `select_v2` to `select` (legacy `select` no longer available) - The `_all` field is disabled if `searchable` option is used (for performance) - The `partial_reindex(:method_name)` method has been replaced with `reindex(:method_name)` - The `unsearchable` and `only_analyzed` options have been removed in favor of `searchable` and `filterable` - `load: false` no longer returns an array in Elasticsearch 2 ## 1.5.1 (2016-12-28) - Added `client_options` - Added `refresh` option to `reindex` method - Improved syntax for partial reindex ## 1.5.0 (2016-12-23) - Added support for geo shape indexing and queries - Added `_and`, `_or`, `_not` to `where` option ## 1.4.2 (2016-12-21) - Added support for directional synonyms - Easier AWS setup - Fixed `total_docs` method for ES 5+ - Fixed exception on update errors ## 1.4.1 (2016-12-11) - Added `partial_reindex` method - Added `debug` option to `search` method - Added `profile` option ## 1.4.0 (2016-10-26) - Official support for Elasticsearch 5 - Boost exact matches for partial matching - Added `searchkick_debug` method - Added `geo_polygon` filter ## 1.3.6 (2016-10-08) - Fixed `Job adapter not found` error ## 1.3.5 (2016-09-27) - Added support for Elasticsearch 5.0 beta - Added `request_params` option - Added `filterable` option ## 1.3.4 (2016-08-23) - Added `resume` option to reindex - Added search timeout to payload ## 1.3.3 (2016-08-02) - Fix for namespaced models (broken in 1.3.2) ## 1.3.2 (2016-08-01) - Added `body_options` option - Added `date_histogram` aggregation - Added `indices_boost` option - Added support for multiple conversions ## 1.3.1 (2016-07-10) - Fixed error with Ruby 2.0 - Fixed error with indexing large fields ## 1.3.0 (2016-05-04) - Added support for Elasticsearch 5.0 alpha - Added support for phrase matches - Added support for procs for `index_prefix` option ## 1.2.1 (2016-02-15) - Added `multi_search` method - Added support for routing for Elasticsearch 2 - Added support for `search_document_id` and `search_document_type` in models - Fixed error with instrumentation for searching multiple models - Fixed instrumentation for bulk updates ## 1.2.0 (2016-02-03) - Fixed deprecation warnings with `alias_method_chain` - Added `analyzed_only` option for large text fields - Added `encoder` option to highlight - Fixed issue in `similar` method with `per_page` option - Added basic support for multiple models ## 1.1.2 (2015-12-18) - Added bulk updates with `callbacks` method - Added `bulk_delete` method - Added `search_timeout` option - Fixed bug with new location format for `boost_by_distance` ## 1.1.1 (2015-12-14) - Added support for `{lat: lat, lon: lon}` as preferred format for locations ## 1.1.0 (2015-12-08) - Added `below` option to misspellings to improve performance - Fixed synonyms for `word_*` partial matches - Added `searchable` option - Added `similarity` option - Added `match` option - Added `word` option - Added highlighted fields to `load: false` ## 1.0.3 (2015-11-27) - Added support for Elasticsearch 2.1 ## 1.0.2 (2015-11-15) - Throw `Searchkick::ImportError` for errors when importing records - Errors now inherit from `Searchkick::Error` - Added `order` option to aggregations - Added `mapping` method ## 1.0.1 (2015-11-05) - Added aggregations method to get raw response - Use `execute: false` for lazy loading - Return nil when no aggs - Added emoji search ## 1.0.0 (2015-10-30) - Added support for Elasticsearch 2.0 - Added support for aggregations - Added ability to use misspellings for partial matches - Added `fragment_size` option for highlight - Added `took` method to results Breaking changes - Raise `Searchkick::DangerousOperation` error when calling reindex with scope - Enabled misspellings by default for partial matches - Enabled transpositions by default for misspellings ## 0.9.1 (2015-08-31) - `and` now matches `&` - Added `transpositions` option to misspellings - Added `boost_mode` and `log` options to `boost_by` - Added `prefix_length` option to `misspellings` - Added ability to set env ## 0.9.0 (2015-06-07) - Much better performance for where queries if no facets - Added basic support for regex - Added support for routing - Made `Searchkick.disable_callbacks` thread-safe ## 0.8.7 (2015-02-14) - Fixed Mongoid import ## 0.8.6 (2015-02-10) - Added support for NoBrainer - Added `stem_conversions: false` option - Added support for multiple `boost_where` values on the same field - Added support for array of values for `boost_where` - Fixed suggestions with partial match boost - Fixed redefining existing instance methods in models ## 0.8.5 (2014-11-11) - Added support for Elasticsearch 1.4 - Added `unsearchable` option - Added `select: true` option - Added `body` option ## 0.8.4 (2014-11-05) - Added `boost_by_distance` - More flexible highlight options - Better `env` logic ## 0.8.3 (2014-09-20) - Added support for Active Job - Added `timeout` setting - Fixed import with no records ## 0.8.2 (2014-08-18) - Added `async` to `callbacks` option - Added `wordnet` option - Added `edit_distance` option to eventually replace `distance` option - Catch misspelling of `misspellings` option - Improved logging ## 0.8.1 (2014-08-16) - Added `search_method_name` option - Fixed `order` for array of hashes - Added support for Mongoid 2 ## 0.8.0 (2014-07-12) - Added support for Elasticsearch 1.2 ## 0.7.9 (2014-06-30) - Added `tokens` method - Added `json` option - Added exact matches - Added `prev_page` for Kaminari pagination - Added `import` option to reindex ## 0.7.8 (2014-06-22) - Added `boost_by` and `boost_where` options - Added ability to boost fields - `name^10` - Added `select` option for `load: false` ## 0.7.7 (2014-06-10) - Added support for automatic failover - Fixed `operator` option (and default) for partial matches ## 0.7.6 (2014-05-20) - Added `stats` option to facets - Added `padding` option ## 0.7.5 (2014-05-13) - Do not throw errors when index becomes out of sync with database - Added custom exception types - Fixed `offset` and `offset_value` ## 0.7.4 (2014-05-06) - Fixed reindex with inheritance ## 0.7.3 (2014-04-30) - Fixed multi-index searches - Fixed suggestions for partial matches - Added `offset` and `length` for improved pagination ## 0.7.2 (2014-04-24) - Added smart facets - Added more fields to `load: false` result - Fixed logging for multi-index searches - Added `first_page?` and `last_page?` for improved Kaminari support ## 0.7.1 (2014-04-12) - Fixed huge issue w/ zero-downtime reindexing on 0.90 ## 0.7.0 (2014-04-10) - Added support for Elasticsearch 1.1 - Dropped support for Elasticsearch below 0.90.4 (unfortunate side effect of above) ## 0.6.3 (2014-04-08) - Removed patron since no support for Windows - Added error if `searchkick` is called multiple times ## 0.6.2 (2014-04-05) - Added logging - Fixed index_name option - Added ability to use proc as the index name ## 0.6.1 (2014-03-24) - Fixed huge issue w/ zero-downtime reindexing on 0.90 and elasticsearch-ruby 1.0 - Restore load: false behavior - Restore total_entries method ## 0.6.0 (2014-03-22) - Moved to elasticsearch-ruby - Added support for modifying the query and viewing the response - Added support for page_entries_info method ## 0.5.3 (2014-02-24) - Fixed bug w/ word_* queries ## 0.5.2 (2014-02-12) - Use after_commit hook for Active Record to prevent data inconsistencies ## 0.5.1 (2014-02-12) - Replaced stop words with common terms query - Added language option - Fixed bug with empty array in where clause - Fixed bug with MongoDB integer _id - Fixed reindex bug when callbacks disabled ## 0.5.0 (2014-01-20) - Better control over partial matches - Added merge_mappings option - Added batch_size option - Fixed bug with nil where clauses ## 0.4.2 (2013-12-29) - Added `should_index?` method to control which records are indexed - Added ability to temporarily disable callbacks - Added custom mappings ## 0.4.1 (2013-12-19) - Fixed issue w/ inheritance mapping ## 0.4.0 (2013-12-11) - Added support for Mongoid 4 - Added support for multiple locations ## 0.3.5 (2013-12-08) - Added facet ranges - Added all operator ## 0.3.4 (2013-11-22) - Added highlighting - Added :distance option to misspellings - Fixed issue w/ BigDecimal serialization ## 0.3.3 (2013-11-04) - Better error messages - Added where: {field: nil} queries ## 0.3.2 (2013-11-02) - Added support for single table inheritance - Removed Tire::Model::Search ## 0.3.1 (2013-11-02) - Added index_prefix option - Fixed ES issue with incorrect facet counts - Added option to turn off special characters ## 0.3.0 (2013-11-02) - Fixed reversed coordinates - Added bounded by a box queries - Expanded `or` queries ## 0.2.8 (2013-09-30) - Added option to disable callbacks - Fixed bug with facets with Elasticsearch 0.90.5 ## 0.2.7 (2013-09-23) - Added limit to facet - Improved similar items ## 0.2.6 (2013-09-10) - Added option to disable misspellings ## 0.2.5 (2013-08-30) - Added geospartial searches - Create alias before importing document if no alias exists - Fixed exception when :per_page option is a string - Check `RAILS_ENV` if `RACK_ENV` is not set ## 0.2.4 (2013-08-20) - Use `to_hash` instead of `as_json` for default `search_data` method - Works for Mongoid 1.3 - Use one shard in test environment for consistent scores ## 0.2.3 (2013-08-16) - Setup Travis - Clean old indices before reindex - Search for `*` returns all results - Fixed pagination - Added `similar` method ## 0.2.2 (2013-08-11) - Clean old indices after reindex - More expansions for fuzzy queries ## 0.2.1 (2013-08-11) - Added Rails logger - Only fetch ids when `load: true` ## 0.2.0 (2013-08-10) - Added autocomplete - Added “Did you mean” suggestions - Added personalized searches ## 0.1.4 (2013-08-03) - Bug fix ## 0.1.3 (2013-08-03) - Changed edit distance to one for misspellings - Raise errors when indexing fails - Fixed pagination - Fixed :include option ## 0.1.2 (2013-07-30) - Use conversions by default ## 0.1.1 (2013-07-29) - Renamed `_source` to `search_data` - Renamed `searchkick_import` to `search_import` ## 0.1.0 (2013-07-28) - Added `_source` method - Added `index_name` option ## 0.0.2 (2013-07-17) - Added `conversions` option ## 0.0.1 (2013-07-14) - First release ================================================ FILE: Gemfile ================================================ source "https://rubygems.org" gemspec gem "rake" gem "minitest" gem "sqlite3", platform: :ruby gem "sqlite3-ffi", platform: :jruby gem "activerecord", "~> 8.1.0" gem "actionpack", "~> 8.1.0" gem "activejob", "~> 8.1.0", require: "active_job" gem "elasticsearch", "~> 9" gem "redis-client" gem "connection_pool" gem "kaminari" gem "gemoji-parser" gem "parallel_tests" gem "typhoeus", platform: :mri gem "cgi" # for elasticsearch ================================================ FILE: LICENSE.txt ================================================ Copyright (c) 2013-2026 Andrew Kane MIT License Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ # Searchkick :rocket: Intelligent search made easy **Searchkick learns what your users are looking for.** As more people search, it gets smarter and the results get better. It’s friendly for developers - and magical for your users. Searchkick handles: - stemming - `tomatoes` matches `tomato` - special characters - `jalapeno` matches `jalapeño` - extra whitespace - `dishwasher` matches `dish washer` - misspellings - `zuchini` matches `zucchini` - custom synonyms - `pop` matches `soda` Plus: - query like SQL - no need to learn a new query language - reindex without downtime - easily personalize results for each user - autocomplete - “Did you mean” suggestions - supports many languages - works with Active Record and Mongoid Check out [Searchjoy](https://github.com/ankane/searchjoy) for analytics and [Autosuggest](https://github.com/ankane/autosuggest) for query suggestions :tangerine: Battle-tested at [Instacart](https://www.instacart.com/opensource) [![Build Status](https://github.com/ankane/searchkick/actions/workflows/build.yml/badge.svg)](https://github.com/ankane/searchkick/actions) ## Contents - [Getting Started](#getting-started) - [Querying](#querying) - [Indexing](#indexing) - [Intelligent Search](#intelligent-search) - [Instant Search / Autocomplete](#instant-search--autocomplete) - [Aggregations](#aggregations) - [Testing](#testing) - [Deployment](#deployment) - [Performance](#performance) - [Advanced Search](#advanced) - [Reference](#reference) - [Contributing](#contributing) Searchkick 6.0 was recently released! See [how to upgrade](#upgrading) ## Getting Started Install [Elasticsearch](https://www.elastic.co/downloads/elasticsearch) or [OpenSearch](https://opensearch.org/downloads.html). For Homebrew, use: ```sh brew install opensearch brew services start opensearch ``` Add these lines to your application’s Gemfile: ```ruby gem "searchkick" gem "elasticsearch" # select one gem "opensearch-ruby" # select one ``` The latest version works with Elasticsearch 8 and 9 and OpenSearch 2 and 3. For Elasticsearch 7 and OpenSearch 1, use version 5.5.2 and [this readme](https://github.com/ankane/searchkick/blob/v5.5.2/README.md). Add `searchkick` to models you want to search. ```ruby class Product < ApplicationRecord searchkick end ``` Add data to the search index. ```ruby Product.reindex ``` And to query, use: ```ruby products = Product.search("apples") products.each do |product| puts product.name end ``` Searchkick supports the complete [Elasticsearch Search API](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-search.html) and [OpenSearch Search API](https://opensearch.org/docs/latest/opensearch/rest-api/search/). As your search becomes more advanced, we recommend you use the [search server DSL](#advanced) for maximum flexibility. ## Querying Query like SQL ```ruby Product.search("apples").where(in_stock: true).limit(10).offset(50) ``` Search specific fields ```ruby fields(:name, :brand) ``` Where ```ruby where(store_id: 1, expires_at: Time.now..) ``` [These types of filters are supported](#filtering) Order ```ruby order(_score: :desc) # most relevant first - default ``` [All of these sort options are supported](https://www.elastic.co/guide/en/elasticsearch/reference/current/sort-search-results.html) Limit / offset ```ruby limit(20).offset(40) ``` Select ```ruby select(:name) ``` [These source filtering options are supported](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-fields.html#source-filtering) ### Results Searches return a `Searchkick::Relation` object. This responds like an array to most methods. ```ruby results = Product.search("milk") results.size results.any? results.each { |result| ... } ``` By default, ids are fetched from the search server and records are fetched from your database. To fetch everything from the search server, use: ```ruby Product.search("apples").load(false) ``` Get total results ```ruby results.total_count ``` Get the time the search took (in milliseconds) ```ruby results.took ``` Get the full response from the search server ```ruby results.response ``` **Note:** By default, Elasticsearch and OpenSearch [limit paging](#deep-paging) to the first 10,000 results for performance. This applies to the total count as well. ### Filtering Equal ```ruby where(store_id: 1) ``` Not equal ```ruby where.not(store_id: 2) ``` Greater than (`gt`), less than (`lt`), greater than or equal (`gte`), less than or equal (`lte`) ```ruby where(expires_at: {gt: Time.now}) ``` Range ```ruby where(orders_count: 1..10) ``` In ```ruby where(aisle_id: [25, 30]) ``` Not in ```ruby where.not(aisle_id: [25, 30]) ``` Contains all ```ruby where(user_ids: {all: [1, 3]}) ``` Like ```ruby where(category: {like: "%frozen%"}) ``` Case-insensitive like ```ruby where(category: {ilike: "%frozen%"}) ``` Regular expression ```ruby where(category: /frozen .+/) ``` Prefix ```ruby where(category: {prefix: "frozen"}) ``` Exists ```ruby where(store_id: {exists: true}) ``` Combine filters with OR ```ruby where(_or: [{in_stock: true}, {backordered: true}]) ``` ### Boosting Boost important fields ```ruby fields("title^10", "description") ``` Boost by the value of a field (field must be numeric) ```ruby boost_by(:orders_count) # give popular documents a little boost boost_by(orders_count: {factor: 10}) # default factor is 1 ``` Boost matching documents ```ruby boost_where(user_id: 1) boost_where(user_id: {value: 1, factor: 100}) # default factor is 1000 boost_where(user_id: [{value: 1, factor: 100}, {value: 2, factor: 200}]) ``` Boost by recency ```ruby boost_by_recency(created_at: {scale: "7d", decay: 0.5}) ``` You can also boost by: - [Conversions](#intelligent-search) - [Distance](#boost-by-distance) ### Get Everything Use a `*` for the query. ```ruby Product.search("*") ``` ### Pagination Plays nicely with kaminari and will_paginate. ```ruby # controller @products = Product.search("milk").page(params[:page]).per_page(20) ``` View with kaminari ```erb <%= paginate @products %> ``` View with will_paginate ```erb <%= will_paginate @products %> ``` ### Partial Matches By default, results must match all words in the query. ```ruby Product.search("fresh honey") # fresh AND honey ``` To change this, use: ```ruby Product.search("fresh honey").operator("or") # fresh OR honey ``` By default, results must match the entire word - `back` will not match `backpack`. You can change this behavior with: ```ruby class Product < ApplicationRecord searchkick word_start: [:name] end ``` And to search (after you reindex): ```ruby Product.search("back").fields(:name).match(:word_start) ``` Available options are: Option | Matches | Example --- | --- | --- `:word` | entire word | `apple` matches `apple` `:word_start` | start of word | `app` matches `apple` `:word_middle` | any part of word | `ppl` matches `apple` `:word_end` | end of word | `ple` matches `apple` `:text_start` | start of text | `gre` matches `green apple`, `app` does not match `:text_middle` | any part of text | `een app` matches `green apple` `:text_end` | end of text | `ple` matches `green apple`, `een` does not match The default is `:word`. The most matches will happen with `:word_middle`. To specify different matching for different fields, use: ```ruby Product.search(query).fields({name: :word_start}, {brand: :word_middle}) ``` ### Exact Matches To match a field exactly (case-sensitive), use: ```ruby Product.search(query).fields({name: :exact}) ``` ### Phrase Matches To only match the exact order, use: ```ruby Product.search("fresh honey").match(:phrase) ``` ### Stemming and Language Searchkick stems words by default for better matching. `apple` and `apples` both stem to `appl`, so searches for either term will have the same matches. Searchkick defaults to English for stemming. To change this, use: ```ruby class Product < ApplicationRecord searchkick language: "german" end ``` See the [list of languages](https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-stemmer-tokenfilter.html#analysis-stemmer-tokenfilter-configure-parms). A few languages require plugins: - `chinese` - [analysis-ik plugin](https://github.com/medcl/elasticsearch-analysis-ik) - `chinese2` - [analysis-smartcn plugin](https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-smartcn.html) - `japanese` - [analysis-kuromoji plugin](https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-kuromoji.html) - `korean` - [analysis-openkoreantext plugin](https://github.com/open-korean-text/elasticsearch-analysis-openkoreantext) - `korean2` - [analysis-nori plugin](https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-nori.html) - `polish` - [analysis-stempel plugin](https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-stempel.html) - `ukrainian` - [analysis-ukrainian plugin](https://www.elastic.co/guide/en/elasticsearch/plugins/7.4/analysis-ukrainian.html) - `vietnamese` - [analysis-vietnamese plugin](https://github.com/duydo/elasticsearch-analysis-vietnamese) You can also use a Hunspell dictionary for stemming. ```ruby class Product < ApplicationRecord searchkick stemmer: {type: "hunspell", locale: "en_US"} end ``` Disable stemming with: ```ruby class Image < ApplicationRecord searchkick stem: false end ``` Exclude certain words from stemming with: ```ruby class Image < ApplicationRecord searchkick stem_exclusion: ["apples"] end ``` Or change how words are stemmed: ```ruby class Image < ApplicationRecord searchkick stemmer_override: ["apples => other"] end ``` ### Synonyms ```ruby class Product < ApplicationRecord searchkick search_synonyms: [["pop", "soda"], ["burger", "hamburger"]] end ``` Call `Product.reindex` after changing synonyms. Synonyms are applied at search time before stemming, and can be a single word or multiple words. For directional synonyms, use: ```ruby search_synonyms: ["lightbulb => halogenlamp"] ``` ### Dynamic Synonyms The above approach works well when your synonym list is static, but in practice, this is often not the case. When you analyze search conversions, you often want to add new synonyms without a full reindex. We recommend placing synonyms in a file on the search server (in the `config` directory). This allows you to reload synonyms without reindexing. ```txt pop, soda burger, hamburger ``` Then use: ```ruby class Product < ApplicationRecord searchkick search_synonyms: "synonyms.txt" end ``` And reload with: ```ruby Product.search_index.reload_synonyms ``` ### Misspellings By default, Searchkick handles misspelled queries by returning results with an [edit distance](https://en.wikipedia.org/wiki/Levenshtein_distance) of one. You can change this with: ```ruby Product.search("zucini").misspellings(edit_distance: 2) # zucchini ``` To prevent poor precision and improve performance for correctly spelled queries (which should be a majority for most applications), Searchkick can first perform a search without misspellings, and if there are too few results, perform another with them. ```ruby Product.search("zuchini").misspellings(below: 5) ``` If there are fewer than 5 results, a 2nd search is performed with misspellings enabled. The result of this query is returned. Turn off misspellings with: ```ruby Product.search("zuchini").misspellings(false) # no zucchini ``` Specify which fields can include misspellings with: ```ruby Product.search("zucini").fields(:name, :color).misspellings(fields: [:name]) ``` > When doing this, you must also specify fields to search ### Bad Matches If a user searches `butter`, they may also get results for `peanut butter`. To prevent this, use: ```ruby Product.search("butter").exclude("peanut butter") ``` You can map queries and terms to exclude with: ```ruby exclude_queries = { "butter" => ["peanut butter"], "cream" => ["ice cream", "whipped cream"] } Product.search(query).exclude(exclude_queries[query]) ``` You can demote results by boosting by a factor less than one: ```ruby Product.search("butter").boost_where(category: {value: "pantry", factor: 0.5}) ``` ### Emoji Search :ice_cream::cake: and get `ice cream cake`! Add this line to your application’s Gemfile: ```ruby gem "gemoji-parser" ``` And use: ```ruby Product.search("🍨🍰").emoji ``` ## Indexing Control what data is indexed with the `search_data` method. Call `Product.reindex` after changing this method. ```ruby class Product < ApplicationRecord belongs_to :department def search_data { name: name, department_name: department.name, on_sale: sale_price.present? } end end ``` Searchkick uses `find_in_batches` to import documents. To eager load associations, use the `search_import` scope. ```ruby class Product < ApplicationRecord scope :search_import, -> { includes(:department) } end ``` By default, all records are indexed. To control which records are indexed, use the `should_index?` method. ```ruby class Product < ApplicationRecord def should_index? active # only index active records end end ``` If a reindex is interrupted, you can resume it with: ```ruby Product.reindex(resume: true) ``` For large data sets, try [parallel reindexing](#parallel-reindexing). ### To Reindex, or Not to Reindex #### Reindex - when you install or upgrade searchkick - change the `search_data` method - change the `searchkick` method #### No need to reindex - app starts ### Strategies There are four strategies for keeping the index synced with your database. 1. Inline (default) Anytime a record is inserted, updated, or deleted 2. Asynchronous Use background jobs for better performance ```ruby class Product < ApplicationRecord searchkick callbacks: :async end ``` Jobs are added to a queue named `searchkick`. 3. Queuing Push ids of records that need updated to a queue and reindex in the background in batches. This is more performant than the asynchronous method, which updates records individually. See [how to set up](#queuing). 4. Manual Turn off automatic syncing ```ruby class Product < ApplicationRecord searchkick callbacks: false end ``` And reindex a record or relation manually. ```ruby product.reindex # or store.products.reindex(mode: :async) ``` You can also do bulk updates. ```ruby Searchkick.callbacks(:bulk) do Product.find_each(&:update_fields) end ``` Or temporarily skip updates. ```ruby Searchkick.callbacks(false) do Product.find_each(&:update_fields) end ``` Or override the model’s strategy. ```ruby product.reindex(mode: :async) # :inline or :queue ``` ### Associations Data is **not** automatically synced when an association is updated. If this is desired, add a callback to reindex: ```ruby class Image < ApplicationRecord belongs_to :product after_commit :reindex_product def reindex_product product.reindex end end ``` ### Default Scopes If you have a default scope that filters records, use the `should_index?` method to exclude them from indexing: ```ruby class Product < ApplicationRecord default_scope { where(deleted_at: nil) } def should_index? deleted_at.nil? end end ``` If you want to index and search filtered records, set: ```ruby class Product < ApplicationRecord searchkick unscope: true end ``` ## Intelligent Search The best starting point to improve your search **by far** is to track searches and conversions. [Searchjoy](https://github.com/ankane/searchjoy) makes it easy. ```ruby Product.search("apple").track(user_id: current_user.id) ``` [See the docs](https://github.com/ankane/searchjoy) for how to install and use. Focus on top searches with a low conversion rate. Searchkick can then use the conversion data to learn what users are looking for. If a user searches for “ice cream” and adds Ben & Jerry’s Chunky Monkey to the cart (our conversion metric at Instacart), that item gets a little more weight for similar searches. This can make a huge difference on the quality of your search. Add conversion data with: ```ruby class Product < ApplicationRecord has_many :conversions, class_name: "Searchjoy::Conversion", as: :convertable has_many :searches, class_name: "Searchjoy::Search", through: :conversions searchkick conversions_v2: [:conversions] # name of field def search_data { name: name, conversions: searches.group(:query).distinct.count(:user_id) # {"ice cream" => 234, "chocolate" => 67, "cream" => 2} } end end ``` Reindex and set up a cron job to add new conversions daily. For zero downtime deployment, temporarily set `conversions_v2(false)` in your search calls until the data is reindexed. ### Performant Conversions A performant way to do conversions is to cache them to prevent N+1 queries. For Postgres, create a migration with: ```ruby add_column :products, :search_conversions, :jsonb ``` For MySQL, use `:json`, and for others, use `:text` with a [JSON serializer](https://api.rubyonrails.org/classes/ActiveRecord/AttributeMethods/Serialization/ClassMethods.html). Next, update your model. Create a separate method for conversion data so you can use [partial reindexing](#partial-reindexing). ```ruby class Product < ApplicationRecord searchkick conversions_v2: [:conversions] def search_data { name: name, category: category }.merge(conversions_data) end def conversions_data { conversions: search_conversions || {} } end end ``` Deploy and reindex your data. For zero downtime deployment, temporarily set `conversions_v2(false)` in your search calls until the data is reindexed. ```ruby Product.reindex ``` Then, create a job to update the conversions column and reindex records with new conversions. Here’s one you can use for Searchjoy: ```ruby class UpdateConversionsJob < ApplicationJob def perform(class_name, since: nil, update: true, reindex: true) model = Searchkick.load_model(class_name) # get records that have a recent conversion recently_converted_ids = Searchjoy::Conversion.where(convertable_type: class_name, created_at: since..) .order(:convertable_id).distinct.pluck(:convertable_id) # split into batches recently_converted_ids.in_groups_of(1000, false) do |ids| if update # fetch conversions conversions = Searchjoy::Conversion.where(convertable_id: ids, convertable_type: class_name) .joins(:search).where.not(searchjoy_searches: {user_id: nil}) .group(:convertable_id, :query).distinct.count(:user_id) # group by record conversions_by_record = {} conversions.each do |(id, query), count| (conversions_by_record[id] ||= {})[query] = count end # update conversions column model.transaction do conversions_by_record.each do |id, conversions| model.where(id: id).update_all(search_conversions: conversions) end end end if reindex # reindex conversions data model.where(id: ids).reindex(:conversions_data, ignore_missing: true) end end end end ``` Run the job: ```ruby UpdateConversionsJob.perform_now("Product") ``` And set it up to run daily. ```ruby UpdateConversionsJob.perform_later("Product", since: 1.day.ago) ``` ## Personalized Results Order results differently for each user. For example, show a user’s previously purchased products before other results. ```ruby class Product < ApplicationRecord def search_data { name: name, orderer_ids: orders.pluck(:user_id) # boost this product for these users } end end ``` Reindex and search with: ```ruby Product.search("milk").boost_where(orderer_ids: current_user.id) ``` ## Instant Search / Autocomplete Autocomplete predicts what a user will type, making the search experience faster and easier. ![Autocomplete](https://gist.githubusercontent.com/ankane/b6988db2802aca68a589b31e41b44195/raw/40febe948427e5bc53ec4e5dc248822855fef76f/autocomplete.png) **Note:** To autocomplete on search terms rather than results, check out [Autosuggest](https://github.com/ankane/autosuggest). **Note 2:** If you only have a few thousand records, don’t use Searchkick for autocomplete. It’s *much* faster to load all records into JavaScript and autocomplete there (eliminates network requests). First, specify which fields use this feature. This is necessary since autocomplete can increase the index size significantly, but don’t worry - this gives you blazing fast queries. ```ruby class Movie < ApplicationRecord searchkick word_start: [:title, :director] end ``` Reindex and search with: ```ruby Movie.search("jurassic pa").fields(:title).match(:word_start) ``` Use a front-end library like [typeahead.js](https://twitter.github.io/typeahead.js/) to show the results. #### Here’s how to make it work with Rails First, add a route and controller action. ```ruby class MoviesController < ApplicationController def autocomplete render json: Movie.search(params[:query]).fields("title^5", "director") .match(:word_start).limit(10).load(false).misspellings(below: 5).map(&:title) end end ``` **Note:** Use `load(false)` and `misspellings(below: n)` (or `misspellings(false)`) for best performance. Then add the search box and JavaScript code to a view. ```html ``` ## Suggestions ![Suggest](https://gist.githubusercontent.com/ankane/b6988db2802aca68a589b31e41b44195/raw/40febe948427e5bc53ec4e5dc248822855fef76f/recursion.png) ```ruby class Product < ApplicationRecord searchkick suggest: [:name] # fields to generate suggestions end ``` Reindex and search with: ```ruby products = Product.search("peantu butta").suggest products.suggestions # ["peanut butter"] ``` ## Aggregations [Aggregations](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations.html) provide aggregated search data. ![Aggregations](https://gist.githubusercontent.com/ankane/b6988db2802aca68a589b31e41b44195/raw/40febe948427e5bc53ec4e5dc248822855fef76f/facets.png) ```ruby products = Product.search("chuck taylor").aggs(:product_type, :gender, :brand) products.aggs ``` By default, `where` conditions apply to aggregations. ```ruby Product.search("wingtips").where(color: "brandy").aggs(:size) # aggregations for brandy wingtips are returned ``` Change this with: ```ruby Product.search("wingtips").where(color: "brandy").aggs(:size).smart_aggs(false) # aggregations for all wingtips are returned ``` Set `where` conditions for each aggregation separately with: ```ruby Product.search("wingtips").aggs(size: {where: {color: "brandy"}}) ``` Limit ```ruby Product.search("apples").aggs(store_id: {limit: 10}) ``` Order ```ruby Product.search("wingtips").aggs(color: {order: {"_key" => "asc"}}) # alphabetically ``` [All of these options are supported](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-terms-aggregation.html#search-aggregations-bucket-terms-aggregation-order) Ranges ```ruby price_ranges = [{to: 20}, {from: 20, to: 50}, {from: 50}] Product.search("*").aggs(price: {ranges: price_ranges}) ``` Minimum document count ```ruby Product.search("apples").aggs(store_id: {min_doc_count: 2}) ``` Script support ```ruby Product.search("*").aggs(color: {script: {source: "'Color: ' + _value"}}) ``` Date histogram ```ruby Product.search("pear").aggs(products_per_year: {date_histogram: {field: :created_at, interval: :year}}) ``` For other aggregation types, including sub-aggregations, use `body_options`: ```ruby Product.search("orange").body_options(aggs: {price: {histogram: {field: :price, interval: 10}}}) ``` ## Highlight Specify which fields to index with highlighting. ```ruby class Band < ApplicationRecord searchkick highlight: [:name] end ``` Highlight the search query in the results. ```ruby bands = Band.search("cinema").highlight ``` View the highlighted fields with: ```ruby bands.with_highlights.each do |band, highlights| highlights[:name] # "Two Door Cinema Club" end ``` To change the tag, use: ```ruby Band.search("cinema").highlight(tag: "") ``` To highlight and search different fields, use: ```ruby Band.search("cinema").fields(:name).highlight(fields: [:description]) ``` By default, the entire field is highlighted. To get small snippets instead, use: ```ruby bands = Band.search("cinema").highlight(fragment_size: 20) bands.with_highlights(multiple: true).each do |band, highlights| highlights[:name].join(" and ") end ``` Additional options can be specified for each field: ```ruby Band.search("cinema").fields(:name).highlight(fields: {name: {fragment_size: 200}}) ``` You can find available highlight options in the [Elasticsearch](https://www.elastic.co/guide/en/elasticsearch/reference/current/highlighting.html) or [OpenSearch](https://opensearch.org/docs/latest/search-plugins/searching-data/highlight/) reference. ## Similar Items Find similar items ```ruby product = Product.first product.similar.fields(:name).where(size: "12 oz") ``` ## Geospatial Searches ```ruby class Restaurant < ApplicationRecord searchkick locations: [:location] def search_data attributes.merge(location: {lat: latitude, lon: longitude}) end end ``` Reindex and search with: ```ruby Restaurant.search("pizza").where(location: {near: {lat: 37, lon: -114}, within: "100mi"}) # or 160km ``` Bounded by a box ```ruby Restaurant.search("sushi").where(location: {top_left: {lat: 38, lon: -123}, bottom_right: {lat: 37, lon: -122}}) ``` **Note:** `top_right` and `bottom_left` also work Bounded by a polygon ```ruby Restaurant.search("dessert").where(location: {geo_polygon: {points: [{lat: 38, lon: -123}, {lat: 39, lon: -123}, {lat: 37, lon: 122}]}}) ``` ### Boost By Distance Boost results by distance - closer results are boosted more ```ruby Restaurant.search("noodles").boost_by_distance(location: {origin: {lat: 37, lon: -122}}) ``` Also supports [additional options](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-function-score-query.html#function-decay) ```ruby Restaurant.search("wings").boost_by_distance(location: {origin: {lat: 37, lon: -122}, function: "linear", scale: "30mi", decay: 0.5}) ``` ### Geo Shapes You can also index and search geo shapes. ```ruby class Restaurant < ApplicationRecord searchkick geo_shape: [:bounds] def search_data attributes.merge( bounds: { type: "envelope", coordinates: [{lat: 4, lon: 1}, {lat: 2, lon: 3}] } ) end end ``` See the [Elasticsearch documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/geo-shape.html) for details. Find shapes intersecting with the query shape ```ruby Restaurant.search("soup").where(bounds: {geo_shape: {type: "polygon", coordinates: [[{lat: 38, lon: -123}, ...]]}}) ``` Falling entirely within the query shape ```ruby Restaurant.search("salad").where(bounds: {geo_shape: {type: "circle", relation: "within", coordinates: {lat: 38, lon: -123}, radius: "1km"}}) ``` Not touching the query shape ```ruby Restaurant.search("burger").where(bounds: {geo_shape: {type: "envelope", relation: "disjoint", coordinates: [{lat: 38, lon: -123}, {lat: 37, lon: -122}]}}) ``` ## Inheritance Searchkick supports single table inheritance. ```ruby class Dog < Animal end ``` In your parent model, set: ```ruby class Animal < ApplicationRecord searchkick inheritance: true end ``` The parent and child model can both reindex. ```ruby Animal.reindex Dog.reindex # equivalent, all animals reindexed ``` And to search, use: ```ruby Animal.search("*") # all animals Dog.search("*") # just dogs Animal.search("*").type(Cat, Dog) # just cats and dogs ``` **Notes:** 1. The `suggest` option retrieves suggestions from the parent at the moment. ```ruby Dog.search("airbudd").suggest # suggestions for all animals ``` 2. This relies on a `type` field that is automatically added to the indexed document. Be wary of defining your own `type` field in `search_data`, as it will take precedence. ## Debugging Queries To help with debugging queries, you can use: ```ruby Product.search("soap").debug ``` This prints useful info to `stdout`. See how the search server scores your queries with: ```ruby Product.search("soap").explain.response ``` See how the search server tokenizes your queries with: ```ruby Product.search_index.tokens("Dish Washer Soap", analyzer: "searchkick_index") # ["dish", "dishwash", "washer", "washersoap", "soap"] Product.search_index.tokens("dishwasher soap", analyzer: "searchkick_search") # ["dishwashersoap"] - no match Product.search_index.tokens("dishwasher soap", analyzer: "searchkick_search2") # ["dishwash", "soap"] - match!! ``` Partial matches ```ruby Product.search_index.tokens("San Diego", analyzer: "searchkick_word_start_index") # ["s", "sa", "san", "d", "di", "die", "dieg", "diego"] Product.search_index.tokens("dieg", analyzer: "searchkick_word_search") # ["dieg"] - match!! ``` See the [complete list of analyzers](lib/searchkick/index_options.rb#L36). ## Testing As you iterate on your search, it’s a good idea to add tests. For performance, only enable Searchkick callbacks for the tests that need it. ### Rails Add to your `test/test_helper.rb`: ```ruby module ActiveSupport class TestCase parallelize_setup do |worker| Searchkick.index_suffix = worker # reindex models for parallel tests Product.reindex end end end # reindex models for non-parallel tests Product.reindex # and disable callbacks Searchkick.disable_callbacks ``` And use: ```ruby class ProductTest < ActiveSupport::TestCase setup do Searchkick.enable_callbacks end teardown do Searchkick.disable_callbacks end test "search" do Product.create!(name: "Apple") Product.search_index.refresh assert_equal ["Apple"], Product.search("apple").map(&:name) end end ``` ### Minitest Add to your `test/test_helper.rb`: ```ruby # reindex models Product.reindex # and disable callbacks Searchkick.disable_callbacks ``` And use: ```ruby class ProductTest < Minitest::Test def setup Searchkick.enable_callbacks end def teardown Searchkick.disable_callbacks end def test_search Product.create!(name: "Apple") Product.search_index.refresh assert_equal ["Apple"], Product.search("apple").map(&:name) end end ``` ### RSpec Add to your `spec/spec_helper.rb`: ```ruby RSpec.configure do |config| config.before(:suite) do # reindex models Product.reindex # and disable callbacks Searchkick.disable_callbacks end config.around(:each, search: true) do |example| Searchkick.callbacks(nil) do example.run end end end ``` And use: ```ruby describe Product, search: true do it "searches" do Product.create!(name: "Apple") Product.search_index.refresh assert_equal ["Apple"], Product.search("apple").map(&:name) end end ``` ### Factory Bot Define a trait for each model: ```ruby FactoryBot.define do factory :product do trait :reindex do after(:create) do |product, _| product.reindex(refresh: true) end end end end ``` And use: ```ruby FactoryBot.create(:product, :reindex) ``` ### GitHub Actions Check out [setup-elasticsearch](https://github.com/ankane/setup-elasticsearch) for an easy way to install Elasticsearch: ```yml - uses: ankane/setup-elasticsearch@v1 ``` And [setup-opensearch](https://github.com/ankane/setup-opensearch) for an easy way to install OpenSearch: ```yml - uses: ankane/setup-opensearch@v1 ``` ## Deployment For the search server, Searchkick uses `ENV["ELASTICSEARCH_URL"]` for Elasticsearch and `ENV["OPENSEARCH_URL"]` for OpenSearch. This defaults to `http://localhost:9200`. - [Elastic Cloud](#elastic-cloud) - [Amazon OpenSearch Service](#amazon-opensearch-service) - [Heroku](#heroku) - [Self-Hosted and Other](#self-hosted-and-other) ### Elastic Cloud Create an initializer `config/initializers/elasticsearch.rb` with: ```ruby ENV["ELASTICSEARCH_URL"] = "https://user:password@host:port" ``` Then deploy and reindex: ```sh rake searchkick:reindex:all ``` ### Amazon OpenSearch Service Create an initializer `config/initializers/opensearch.rb` with: ```ruby ENV["OPENSEARCH_URL"] = "https://es-domain-1234.us-east-1.es.amazonaws.com:443" ``` To use signed requests, include in your Gemfile: ```ruby gem "faraday_middleware-aws-sigv4" ``` and add to your initializer: ```ruby Searchkick.aws_credentials = { access_key_id: ENV["AWS_ACCESS_KEY_ID"], secret_access_key: ENV["AWS_SECRET_ACCESS_KEY"], region: "us-east-1" } ``` Then deploy and reindex: ```sh rake searchkick:reindex:all ``` ### Heroku Choose an add-on: [Bonsai](https://elements.heroku.com/addons/bonsai), [SearchBox](https://elements.heroku.com/addons/searchbox), or [Elastic Cloud](https://elements.heroku.com/addons/foundelasticsearch). For Elasticsearch on Bonsai: ```sh heroku addons:create bonsai heroku config:set ELASTICSEARCH_URL=`heroku config:get BONSAI_URL` ``` For OpenSearch on Bonsai: ```sh heroku addons:create bonsai --engine=opensearch heroku config:set OPENSEARCH_URL=`heroku config:get BONSAI_URL` ``` For SearchBox: ```sh heroku addons:create searchbox:starter heroku config:set ELASTICSEARCH_URL=`heroku config:get SEARCHBOX_URL` ``` For Elastic Cloud (previously Found): ```sh heroku addons:create foundelasticsearch heroku addons:open foundelasticsearch ``` Visit the Shield page and reset your password. You’ll need to add the username and password to your url. Get the existing url with: ```sh heroku config:get FOUNDELASTICSEARCH_URL ``` And add `elastic:password@` right after `https://` and add port `9243` at the end: ```sh heroku config:set ELASTICSEARCH_URL=https://elastic:password@12345.us-east-1.aws.found.io:9243 ``` Then deploy and reindex: ```sh heroku run rake searchkick:reindex:all ``` ### Self-Hosted and Other Create an initializer with: ```ruby ENV["ELASTICSEARCH_URL"] = "https://user:password@host:port" # or ENV["OPENSEARCH_URL"] = "https://user:password@host:port" ``` Then deploy and reindex: ```sh rake searchkick:reindex:all ``` ### Data Protection We recommend encrypting data at rest and in transit (even inside your own network). This is especially important if you send [personal data](https://en.wikipedia.org/wiki/Personally_identifiable_information) of your users to the search server. Bonsai, Elastic Cloud, and Amazon OpenSearch Service all support encryption at rest and HTTPS. ### Automatic Failover Create an initializer with multiple hosts: ```ruby ENV["ELASTICSEARCH_URL"] = "https://user:password@host1,https://user:password@host2" # or ENV["OPENSEARCH_URL"] = "https://user:password@host1,https://user:password@host2" ``` ### Client Options Create an initializer with: ```ruby Searchkick.client_options[:reload_connections] = true ``` See the docs for [Elasticsearch](https://www.elastic.co/guide/en/elasticsearch/client/ruby-api/current/advanced-config.html) or [Opensearch](https://rubydoc.info/gems/opensearch-transport#configuration) for a complete list of options. ### Lograge Add the following to `config/environments/production.rb`: ```ruby config.lograge.custom_options = lambda do |event| options = {} options[:search] = event.payload[:searchkick_runtime] if event.payload[:searchkick_runtime].to_f > 0 options end ``` See [Production Rails](https://github.com/ankane/production_rails) for other good practices. ## Performance ### Persistent HTTP Connections Significantly increase performance with persistent HTTP connections. Add [Typhoeus](https://github.com/typhoeus/typhoeus) to your Gemfile and it’ll automatically be used. ```ruby gem "typhoeus" ``` To reduce log noise, create an initializer with: ```ruby Ethon.logger = Logger.new(nil) ``` ### Searchable Fields By default, all string fields are searchable (can be used in `fields` option). Speed up indexing and reduce index size by only making some fields searchable. ```ruby class Product < ApplicationRecord searchkick searchable: [:name] end ``` ### Filterable Fields By default, all string fields are filterable (can be used in `where` option). Speed up indexing and reduce index size by only making some fields filterable. ```ruby class Product < ApplicationRecord searchkick filterable: [:brand] end ``` **Note:** Non-string fields are always filterable and should not be passed to this option. ### Parallel Reindexing For large data sets, you can use background jobs to parallelize reindexing. ```ruby Product.reindex(mode: :async) # {index_name: "products_production_20250111210018065"} ``` Once the jobs complete, promote the new index with: ```ruby Product.search_index.promote(index_name) ``` You can optionally track the status with Redis: ```ruby Searchkick.redis = Redis.new ``` And use: ```ruby Searchkick.reindex_status(index_name) ``` You can also have Searchkick wait for reindexing to complete ```ruby Product.reindex(mode: :async, wait: true) ``` You can use your background job framework to control concurrency. For Solid Queue, create an initializer with: ```ruby module SearchkickBulkReindexConcurrency extend ActiveSupport::Concern included do limits_concurrency to: 3, key: "" end end Rails.application.config.after_initialize do Searchkick::BulkReindexJob.include(SearchkickBulkReindexConcurrency) end ``` This will allow only 3 jobs to run at once. ### Refresh Interval You can specify a longer refresh interval while reindexing to increase performance. ```ruby Product.reindex(mode: :async, refresh_interval: "30s") ``` **Note:** This only makes a noticeable difference with parallel reindexing. When promoting, have it restored to the value in your mapping (defaults to `1s`). ```ruby Product.search_index.promote(index_name, update_refresh_interval: true) ``` ### Queuing Push ids of records needing reindexing to a queue and reindex in bulk for better performance. First, set up Redis in an initializer. We recommend using [connection_pool](https://github.com/mperham/connection_pool). ```ruby Searchkick.redis = ConnectionPool.new { Redis.new } ``` And ask your models to queue updates. ```ruby class Product < ApplicationRecord searchkick callbacks: :queue end ``` Then, set up a background job to run. ```ruby Searchkick::ProcessQueueJob.perform_later(class_name: "Product") ``` You can check the queue length with: ```ruby Product.search_index.reindex_queue.length ``` For more tips, check out [Keeping Elasticsearch in Sync](https://www.elastic.co/blog/found-keeping-elasticsearch-in-sync). ### Routing Searchkick supports [routing](https://www.elastic.co/blog/customizing-your-document-routing), which can significantly speed up searches. ```ruby class Business < ApplicationRecord searchkick routing: true def search_routing city_id end end ``` Reindex and search with: ```ruby Business.search("ice cream").routing(params[:city_id]) ``` ### Partial Reindexing Reindex a subset of attributes to reduce time spent generating search data and cut down on network traffic. ```ruby class Product < ApplicationRecord def search_data { name: name, category: category }.merge(prices_data) end def prices_data { price: price, sale_price: sale_price } end end ``` And use: ```ruby Product.reindex(:prices_data) ``` Ignore errors for missing documents with: ```ruby Product.reindex(:prices_data, ignore_missing: true) ``` ## Advanced Searchkick makes it easy to use the Elasticsearch or OpenSearch DSL on its own. ### Advanced Mapping Create a custom mapping: ```ruby class Product < ApplicationRecord searchkick mappings: { properties: { name: {type: "keyword"} } } end ``` **Note:** If you use a custom mapping, you'll need to use [custom searching](#advanced-search) as well. To keep the mappings and settings generated by Searchkick, use: ```ruby class Product < ApplicationRecord searchkick merge_mappings: true, mappings: {...} end ``` ### Advanced Search And use the `body` option to search: ```ruby products = Product.search.body(query: {match: {name: "milk"}}) ``` View the response with: ```ruby products.response ``` To modify the query generated by Searchkick, use: ```ruby products = Product.search("milk").body_options(min_score: 1) ``` or ```ruby products = Product.search("apples") do |body| body[:min_score] = 1 end ``` ### Client To access the `Elasticsearch::Client` or `OpenSearch::Client` directly, use: ```ruby Searchkick.client ``` ## Multi Search To batch search requests for performance, use: ```ruby products = Product.search("snacks") coupons = Coupon.search("snacks") Searchkick.multi_search([products, coupons]) ``` Then use `products` and `coupons` as typical results. **Note:** Errors are not raised as with single requests. Use the `error` method on each query to check for errors. ## Multiple Models Search across multiple models with: ```ruby Searchkick.search("milk").models(Product, Category) ``` Boost specific models with: ```ruby indices_boost(Category => 2, Product => 1) ``` ## Multi-Tenancy Check out [this great post](https://www.tiagoamaro.com.br/2014/12/11/multi-tenancy-with-searchkick/) on the [Apartment](https://github.com/influitive/apartment) gem. Follow a similar pattern if you use another gem. ## Scroll API Searchkick also supports the [scroll API](https://www.elastic.co/guide/en/elasticsearch/reference/current/paginate-search-results.html#scroll-search-results). Scrolling is not intended for real time user requests, but rather for processing large amounts of data. ```ruby Product.search("*").scroll("1m") do |batch| # process batch ... end ``` You can also scroll batches manually. ```ruby products = Product.search("*").scroll("1m") while products.any? # process batch ... products = products.scroll end products.clear_scroll ``` ## Deep Paging By default, Elasticsearch and OpenSearch limit paging to the first 10,000 results. [Here’s why](https://www.elastic.co/guide/en/elasticsearch/guide/current/pagination.html). We don’t recommend changing this, but if you really need all results, you can use: ```ruby class Product < ApplicationRecord searchkick deep_paging: true end ``` If you just need an accurate total count, you can instead use: ```ruby Product.search("pears").body_options(track_total_hits: true) ``` ## Nested Data To query nested data, use dot notation. ```ruby Product.search("san").fields("store.city").where("store.zip_code" => 12345) ``` ## Nearest Neighbor Search *Available for Elasticsearch 8.6+ and OpenSearch 2.4+* ```ruby class Product < ApplicationRecord searchkick knn: {embedding: {dimensions: 3, distance: "cosine"}} end ``` Also supports `euclidean` and `inner_product` Reindex and search with: ```ruby Product.search.knn(field: :embedding, vector: [1, 2, 3]).limit(10) ``` ### HNSW Options Nearest neighbor search uses [HNSW](https://en.wikipedia.org/wiki/Hierarchical_navigable_small_world) for indexing. Specify `m` and `ef_construction` ```ruby class Product < ApplicationRecord searchkick knn: {embedding: {dimensions: 3, distance: "cosine", m: 16, ef_construction: 100}} end ``` Specify `ef_search` ```ruby Product.search.knn(field: :embedding, vector: [1, 2, 3], ef_search: 40).limit(10) ``` ## Semantic Search First, add [nearest neighbor search](#nearest-neighbor-search) to your model ```ruby class Product < ApplicationRecord searchkick knn: {embedding: {dimensions: 768, distance: "cosine"}} end ``` Generate an embedding for each record (you can use an external service or a library like [Informers](https://github.com/ankane/informers)) ```ruby embed = Informers.pipeline("embedding", "Snowflake/snowflake-arctic-embed-m-v1.5") embed_options = {model_output: "sentence_embedding", pooling: "none"} # specific to embedding model Product.find_each do |product| embedding = embed.(product.name, **embed_options) product.update!(embedding: embedding) end ``` For search, generate an embedding for the query (the query prefix is specific to the [embedding model](https://huggingface.co/Snowflake/snowflake-arctic-embed-m-v1.5)) ```ruby query_prefix = "Represent this sentence for searching relevant passages: " query_embedding = embed.(query_prefix + query, **embed_options) ``` And perform nearest neighbor search ```ruby Product.search.knn(field: :embedding, vector: query_embedding).limit(20) ``` See a [full example](examples/semantic.rb) ## Hybrid Search Perform keyword search and semantic search in parallel ```ruby keyword_search = Product.search(query).limit(20) semantic_search = Product.search.knn(field: :embedding, vector: query_embedding).limit(20) Searchkick.multi_search([keyword_search, semantic_search]) ``` To combine the results, use Reciprocal Rank Fusion (RRF) ```ruby Searchkick::Reranking.rrf(keyword_search, semantic_search).first(5) ``` Or a reranking model ```ruby rerank = Informers.pipeline("reranking", "mixedbread-ai/mxbai-rerank-xsmall-v1") results = (keyword_search.to_a + semantic_search.to_a).uniq rerank.(query, results.map(&:name)).first(5).map { |v| results[v[:doc_id]] } ``` See a [full example](examples/hybrid.rb) ## Reference Reindex one record ```ruby product = Product.find(1) product.reindex ``` Reindex multiple records ```ruby Product.where(store_id: 1).reindex ``` Reindex associations ```ruby store.products.reindex ``` Remove old indices ```ruby Product.search_index.clean_indices ``` Use custom settings ```ruby class Product < ApplicationRecord searchkick settings: {number_of_shards: 3} end ``` Use a different index name ```ruby class Product < ApplicationRecord searchkick index_name: "products_v2" end ``` Use a dynamic index name ```ruby class Product < ApplicationRecord searchkick index_name: -> { "#{name.tableize}-#{I18n.locale}" } end ``` Prefix the index name ```ruby class Product < ApplicationRecord searchkick index_prefix: "datakick" end ``` For all models ```ruby Searchkick.index_prefix = "datakick" ``` Use a different term for boosting by conversions ```ruby Product.search("banana").conversions_v2(term: "organic banana") ``` Define multiple conversion fields ```ruby class Product < ApplicationRecord has_many :searches, class_name: "Searchjoy::Search" searchkick conversions_v2: ["unique_conversions", "total_conversions"] def search_data { name: name, unique_conversions: searches.group(:query).distinct.count(:user_id), total_conversions: searches.group(:query).count } end end ``` And specify which to use ```ruby Product.search("banana") # boost by both fields (default) Product.search("banana").conversions_v2("total_conversions") # only boost by total_conversions Product.search("banana").conversions_v2(false) # no conversion boosting ``` Change timeout ```ruby Searchkick.timeout = 15 # defaults to 10 ``` Set a lower timeout for searches ```ruby Searchkick.search_timeout = 3 ``` Change the search method name ```ruby Searchkick.search_method_name = :lookup ``` Change the queue name ```ruby Searchkick.queue_name = :search_reindex # defaults to :searchkick ``` Change the queue name or priority for a model ```ruby class Product < ApplicationRecord searchkick job_options: {queue: "critical", priority: 10} end ``` Change the queue name or priority for a specific call ```ruby Product.reindex(mode: :async, job_options: {queue: "critical", priority: 10}) ``` Change the parent job ```ruby Searchkick.parent_job = "ApplicationJob" # defaults to "ActiveJob::Base" ``` Eager load associations ```ruby Product.search("milk").includes(:brand, :stores) ``` Eager load different associations by model ```ruby Searchkick.search("*").models(Product, Store).model_includes(Product => [:store], Store => [:product]) ``` Run additional scopes on results ```ruby Product.search("milk").scope_results(->(r) { r.with_attached_images }) ``` Set opaque id for slow logs ```ruby Product.search("milk").opaque_id("some-id") # or Searchkick.multi_search(searches, opaque_id: "some-id") ``` Specify default fields to search ```ruby class Product < ApplicationRecord searchkick default_fields: [:name] end ``` Turn off special characters ```ruby class Product < ApplicationRecord # A will not match Ä searchkick special_characters: false end ``` Turn on stemming for conversions ```ruby class Product < ApplicationRecord searchkick stem_conversions: true end ``` Make search case-sensitive ```ruby class Product < ApplicationRecord searchkick case_sensitive: true end ``` **Note:** If misspellings are enabled (default), results with a single character case difference will match. Turn off misspellings if this is not desired. Change import batch size ```ruby class Product < ApplicationRecord searchkick batch_size: 200 # defaults to 1000 end ``` Create index without importing ```ruby Product.reindex(import: false) ``` Use a different id ```ruby class Product < ApplicationRecord def search_document_id custom_id end end ``` Add [request parameters](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-search.html#search-search-api-query-params) like `search_type` ```ruby Product.search("carrots").request_params(search_type: "dfs_query_then_fetch") ``` Set options across all models ```ruby Searchkick.model_options = { batch_size: 200 } ``` Reindex conditionally ```ruby class Product < ApplicationRecord searchkick callback_options: {if: :search_data_changed?} def search_data_changed? previous_changes.include?("name") end end ``` Reindex all models - Rails only ```sh rake searchkick:reindex:all ``` Turn on misspellings after a certain number of characters ```ruby Product.search("api").misspellings(prefix_length: 2) # api, apt, no ahi ``` BigDecimal values are indexed as floats by default so they can be used for boosting. Convert them to strings to keep full precision. ```ruby class Product < ApplicationRecord def search_data { units: units.to_s("F") } end end ``` ## Gotchas ### Consistency Elasticsearch and OpenSearch are eventually consistent, meaning it can take up to a second for a change to reflect in search. You can use the `refresh` method to have it show up immediately. ```ruby product.save! Product.search_index.refresh ``` ### Inconsistent Scores Due to the distributed nature of Elasticsearch and OpenSearch, you can get incorrect results when the number of documents in the index is low. You can [read more about it here](https://www.elastic.co/blog/understanding-query-then-fetch-vs-dfs-query-then-fetch). To fix this, do: ```ruby class Product < ApplicationRecord searchkick settings: {number_of_shards: 1} end ``` For convenience, this is set by default in the test environment. ## Upgrading ### 6.0 Searchkick 6 brings a new query builder API: ```ruby Product.search("apples").where(in_stock: true).limit(10).offset(50) ``` All existing options can be used as methods, or you can continue to use the existing API. This release also significantly improves the performance of searches when using conversions. To upgrade conversions without downtime, add `conversions_v2` to your model and an additional field to `search_data`: ```ruby class Product < ApplicationRecord searchkick conversions: [:conversions], conversions_v2: [:conversions_v2] def search_data conversions = searches.group(:query).distinct.count(:user_id) { conversions: conversions, conversions_v2: conversions } end end ``` Reindex, then remove `conversions`: ```ruby class Product < ApplicationRecord searchkick conversions_v2: [:conversions_v2] def search_data { conversions_v2: searches.group(:query).distinct.count(:user_id) } end end ``` Other improvements include the option to ignore errors for missing documents with partial reindexing and more customization for background jobs. Check out the [changelog](https://github.com/ankane/searchkick/blob/master/CHANGELOG.md) for the full list of changes. ## History View the [changelog](https://github.com/ankane/searchkick/blob/master/CHANGELOG.md) ## Thanks Thanks to Karel Minarik for [Elasticsearch Ruby](https://github.com/elasticsearch/elasticsearch-ruby) and [Tire](https://github.com/karmi/retire), Jaroslav Kalistsuk for [zero downtime reindexing](https://gist.github.com/jarosan/3124884), and Alex Leschenko for [Elasticsearch autocomplete](https://github.com/leschenko/elasticsearch_autocomplete). ## Contributing Everyone is encouraged to help improve this project. Here are a few ways you can help: - [Report bugs](https://github.com/ankane/searchkick/issues) - Fix bugs and [submit pull requests](https://github.com/ankane/searchkick/pulls) - Write, clarify, or fix documentation - Suggest or add new features To get started with development: ```sh git clone https://github.com/ankane/searchkick.git cd searchkick bundle install bundle exec rake test ``` Feel free to open an issue to get feedback on your idea before spending too much time on it. ================================================ FILE: Rakefile ================================================ require "bundler/gem_tasks" require "rake/testtask" Rake::TestTask.new do |t| t.pattern = "test/**/*_test.rb" end task default: :test # to test in parallel, uncomment and run: # rake parallel:test # require "parallel_tests/tasks" ================================================ FILE: benchmark/Gemfile ================================================ source "https://rubygems.org" gemspec path: "../" gem "sqlite3" gem "pg" gem "activerecord", "~> 8.0.0" gem "activejob" gem "elasticsearch" # gem "opensearch-ruby" gem "redis" gem "sidekiq" # performance gem "typhoeus" gem "oj" gem "json" # profiling gem "ruby-prof" gem "allocation_stats" gem "get_process_mem" gem "memory_profiler" # gem "allocation_tracer" gem "benchmark-ips" ================================================ FILE: benchmark/index.rb ================================================ require "bundler/setup" Bundler.require(:default) require "active_record" require "active_job" require "benchmark" require "active_support/notifications" ActiveSupport::Notifications.subscribe "request.searchkick" do |*args| event = ActiveSupport::Notifications::Event.new(*args) # puts "Import: #{event.duration.round}ms" end # ActiveJob::Base.queue_adapter = :sidekiq class SearchSerializer def dump(object) JSON.generate(object) end end # Elasticsearch::API.settings[:serializer] = SearchSerializer.new # OpenSearch::API.settings[:serializer] = SearchSerializer.new Searchkick.redis = Redis.new ActiveRecord.default_timezone = :utc ActiveRecord::Base.time_zone_aware_attributes = true ActiveRecord::Base.establish_connection adapter: "sqlite3", database: "/tmp/searchkick" # ActiveRecord::Base.establish_connection "postgresql://localhost/searchkick_bench" # ActiveRecord::Base.logger = Logger.new(STDOUT) ActiveJob::Base.logger = nil class Product < ActiveRecord::Base searchkick batch_size: 1000 def search_data { name: name, color: color, store_id: store_id } end end if ENV["SETUP"] total_docs = 100000 ActiveRecord::Schema.define do create_table :products, force: :cascade do |t| t.string :name t.string :color t.integer :store_id end end records = [] total_docs.times do |i| records << { name: "Product #{i}", color: ["red", "blue"].sample, store_id: rand(10) } end Product.insert_all(records) puts "Imported" end result = nil report = nil stats = nil Product.searchkick_index.delete rescue nil GC.start GC.disable start_mem = GetProcessMem.new.mb time = Benchmark.realtime do # result = RubyProf::Profile.profile do # report = MemoryProfiler.report do # stats = AllocationStats.trace do reindex = Product.reindex #(async: true) # p reindex # end # 60.times do |i| # if reindex.is_a?(Hash) # docs = Searchkick::Index.new(reindex[:index_name]).total_docs # else # docs = Product.searchkick_index.total_docs # end # puts "#{i}: #{docs}" # if docs == total_docs # break # end # p Searchkick.reindex_status(reindex[:index_name]) if reindex.is_a?(Hash) # sleep(1) # # Product.searchkick_index.refresh # end end puts "Time: #{time.round(1)}s" if result printer = RubyProf::GraphPrinter.new(result) printer.print(STDOUT, min_percent: 5) end if report puts report.pretty_print end if stats puts result.allocations(alias_paths: true).group_by(:sourcefile, :class).to_text end ================================================ FILE: benchmark/relation.rb ================================================ require "bundler/setup" Bundler.require(:default) require "active_record" class Product < ActiveRecord::Base searchkick end Product.all # initial Active Record allocations stats = AllocationStats.trace do Product.search("apples").where(store_id: 1).where(in_stock: true).order(:name).limit(10).offset(50) end puts stats.allocations(alias_paths: true).to_text ================================================ FILE: benchmark/search.rb ================================================ require "bundler/setup" Bundler.require(:default) require "active_record" require "benchmark/ips" ActiveRecord.default_timezone = :utc ActiveRecord::Base.time_zone_aware_attributes = true ActiveRecord::Base.establish_connection adapter: "sqlite3", database: "/tmp/searchkick" class Product < ActiveRecord::Base searchkick batch_size: 1000 def search_data { name: name, color: color, store_id: store_id } end end if ENV["SETUP"] total_docs = 1000000 ActiveRecord::Schema.define do create_table :products, force: :cascade do |t| t.string :name t.string :color t.integer :store_id end end records = [] total_docs.times do |i| records << { name: "Product #{i}", color: ["red", "blue"].sample, store_id: rand(10) } end Product.insert_all(records) puts "Imported" Product.reindex puts "Reindexed" end query = Product.search("product", fields: [:name], where: {color: "red", store_id: 5}, limit: 10000, load: false) pp query.body.as_json puts Benchmark.ips do |x| x.report { query.dup.load } end ================================================ FILE: examples/Gemfile ================================================ source "https://rubygems.org" gemspec path: ".." gem "activerecord" gem "elasticsearch" gem "informers" gem "opensearch-ruby" gem "sqlite3" ================================================ FILE: examples/hybrid.rb ================================================ require "bundler/setup" require "active_record" require "elasticsearch" # or "opensearch-ruby" require "informers" require "searchkick" ActiveRecord::Base.establish_connection adapter: "sqlite3", database: ":memory:" ActiveRecord::Schema.verbose = false ActiveRecord::Schema.define do create_table :products do |t| t.string :name t.json :embedding end end class Product < ActiveRecord::Base searchkick knn: {embedding: {dimensions: 768, distance: "cosine"}} end Product.reindex Product.create!(name: "Breakfast cereal") Product.create!(name: "Ice cream") Product.create!(name: "Eggs") embed = Informers.pipeline("embedding", "Snowflake/snowflake-arctic-embed-m-v1.5") embed_options = {model_output: "sentence_embedding", pooling: "none"} # specific to embedding model Product.find_each do |product| embedding = embed.(product.name, **embed_options) product.update!(embedding: embedding) end Product.search_index.refresh query = "breakfast" keyword_search = Product.search(query, limit: 20) # the query prefix is specific to the embedding model (https://huggingface.co/Snowflake/snowflake-arctic-embed-m-v1.5) query_prefix = "Represent this sentence for searching relevant passages: " query_embedding = embed.(query_prefix + query, **embed_options) semantic_search = Product.search(knn: {field: :embedding, vector: query_embedding}, limit: 20) Searchkick.multi_search([keyword_search, semantic_search]) # to combine the results, use Reciprocal Rank Fusion (RRF) p Searchkick::Reranking.rrf(keyword_search, semantic_search).first(5).map { |v| v[:result].name } # or a reranking model rerank = Informers.pipeline("reranking", "mixedbread-ai/mxbai-rerank-xsmall-v1") results = (keyword_search.to_a + semantic_search.to_a).uniq p rerank.(query, results.map(&:name)).first(5).map { |v| results[v[:doc_id]] }.map(&:name) ================================================ FILE: examples/semantic.rb ================================================ require "bundler/setup" require "active_record" require "elasticsearch" # or "opensearch-ruby" require "informers" require "searchkick" ActiveRecord::Base.establish_connection adapter: "sqlite3", database: ":memory:" ActiveRecord::Schema.verbose = false ActiveRecord::Schema.define do create_table :products do |t| t.string :name t.json :embedding end end class Product < ActiveRecord::Base searchkick knn: {embedding: {dimensions: 768, distance: "cosine"}} end Product.reindex Product.create!(name: "Cereal") Product.create!(name: "Ice cream") Product.create!(name: "Eggs") embed = Informers.pipeline("embedding", "Snowflake/snowflake-arctic-embed-m-v1.5") embed_options = {model_output: "sentence_embedding", pooling: "none"} # specific to embedding model Product.find_each do |product| embedding = embed.(product.name, **embed_options) product.update!(embedding: embedding) end Product.search_index.refresh query = "breakfast" # the query prefix is specific to the embedding model (https://huggingface.co/Snowflake/snowflake-arctic-embed-m-v1.5) query_prefix = "Represent this sentence for searching relevant passages: " query_embedding = embed.(query_prefix + query, **embed_options) pp Product.search(knn: {field: :embedding, vector: query_embedding}, limit: 20).map(&:name) ================================================ FILE: gemfiles/activerecord72.gemfile ================================================ source "https://rubygems.org" gemspec path: ".." gem "rake" gem "minitest" gem "sqlite3" gem "activerecord", "~> 7.2.0" gem "actionpack", "~> 7.2.0" gem "activejob", "~> 7.2.0", require: "active_job" gem "elasticsearch", "~> 8" gem "redis-client" gem "connection_pool" gem "kaminari" gem "gemoji-parser" ================================================ FILE: gemfiles/activerecord80.gemfile ================================================ source "https://rubygems.org" gemspec path: ".." gem "rake" gem "minitest" gem "sqlite3" gem "activerecord", "~> 8.0.0" gem "actionpack", "~> 8.0.0" gem "activejob", "~> 8.0.0", require: "active_job" gem "elasticsearch", "~> 9" gem "redis-client" gem "connection_pool" gem "kaminari" gem "gemoji-parser" ================================================ FILE: gemfiles/mongoid8.gemfile ================================================ source "https://rubygems.org" gemspec path: ".." gem "rake" gem "minitest" gem "mongoid", "~> 8" gem "activejob", require: "active_job" gem "redis" gem "elasticsearch", "~> 8" gem "actionpack" gem "kaminari" gem "gemoji-parser" gem "ostruct" # for mongoid ================================================ FILE: gemfiles/mongoid9.gemfile ================================================ source "https://rubygems.org" gemspec path: ".." gem "rake" gem "minitest" gem "mongoid", "~> 9" gem "activejob", require: "active_job" gem "redis" gem "elasticsearch", "~> 9" gem "actionpack" gem "kaminari" gem "gemoji-parser" gem "ostruct" # for mongoid ================================================ FILE: gemfiles/opensearch2.gemfile ================================================ source "https://rubygems.org" gemspec path: ".." gem "rake" gem "minitest" gem "sqlite3" gem "activerecord", "~> 7.2.0" gem "actionpack", "~> 7.2.0" gem "activejob", "~> 7.2.0", require: "active_job" gem "opensearch-ruby", "~> 2" gem "redis-client" gem "connection_pool" gem "kaminari" gem "gemoji-parser" gem "parallel_tests" gem "typhoeus" ================================================ FILE: gemfiles/opensearch3.gemfile ================================================ source "https://rubygems.org" gemspec path: ".." gem "rake" gem "minitest" gem "sqlite3" gem "activerecord", "~> 8.0.0" gem "actionpack", "~> 8.0.0" gem "activejob", "~> 8.0.0", require: "active_job" gem "opensearch-ruby", "~> 3" gem "redis-client" gem "connection_pool" gem "kaminari" gem "gemoji-parser" gem "parallel_tests" gem "typhoeus" ================================================ FILE: lib/searchkick/bulk_reindex_job.rb ================================================ module Searchkick class BulkReindexJob < Searchkick.parent_job.constantize queue_as { Searchkick.queue_name } def perform(class_name:, record_ids: nil, index_name: nil, method_name: nil, batch_id: nil, min_id: nil, max_id: nil, ignore_missing: nil) model = Searchkick.load_model(class_name) index = model.searchkick_index(name: index_name) record_ids ||= min_id..max_id relation = Searchkick.scope(model) relation = Searchkick.load_records(relation, record_ids) relation = relation.search_import if relation.respond_to?(:search_import) RecordIndexer.new(index).reindex(relation, mode: :inline, method_name: method_name, ignore_missing: ignore_missing, full: false) RelationIndexer.new(index).batch_completed(batch_id) if batch_id end end end ================================================ FILE: lib/searchkick/controller_runtime.rb ================================================ # based on https://gist.github.com/mnutt/566725 module Searchkick module ControllerRuntime extend ActiveSupport::Concern protected attr_internal :searchkick_runtime def process_action(action, *args) # We also need to reset the runtime before each action # because of queries in middleware or in cases we are streaming # and it won't be cleaned up by the method below. Searchkick::LogSubscriber.reset_runtime super end def cleanup_view_runtime searchkick_rt_before_render = Searchkick::LogSubscriber.reset_runtime runtime = super searchkick_rt_after_render = Searchkick::LogSubscriber.reset_runtime self.searchkick_runtime = searchkick_rt_before_render + searchkick_rt_after_render runtime - searchkick_rt_after_render end def append_info_to_payload(payload) super payload[:searchkick_runtime] = (searchkick_runtime || 0) + Searchkick::LogSubscriber.reset_runtime end module ClassMethods def log_process_action(payload) messages = super runtime = payload[:searchkick_runtime] messages << ("Searchkick: %.1fms" % runtime.to_f) if runtime.to_f > 0 messages end end end end ================================================ FILE: lib/searchkick/hash_wrapper.rb ================================================ module Searchkick class HashWrapper def initialize(attributes) @attributes = attributes end def [](name) @attributes[name.to_s] end def to_h @attributes end def as_json(...) @attributes.as_json(...) end def to_json(...) @attributes.to_json(...) end def method_missing(name, ...) if @attributes.key?(name.to_s) self[name] else super end end def respond_to_missing?(name, ...) @attributes.key?(name.to_s) || super end def inspect attributes = @attributes.reject { |k, v| k[0] == "_" }.map { |k, v| "#{k}: #{v.inspect}" } attributes.unshift(attributes.pop) # move id to start "#<#{self.class.name} #{attributes.join(", ")}>" end end end ================================================ FILE: lib/searchkick/index.rb ================================================ module Searchkick class Index attr_reader :name, :options def initialize(name, options = {}) @name = name @options = options @klass_document_type = {} # cache end def index_options IndexOptions.new(self).index_options end def create(body = {}) client.indices.create index: name, body: body end def delete if alias_exists? # can't call delete directly on aliases in ES 6 indices = client.indices.get_alias(name: name).keys client.indices.delete index: indices else client.indices.delete index: name end end def exists? client.indices.exists index: name end def refresh client.indices.refresh index: name end def alias_exists? client.indices.exists_alias name: name end # call to_h for consistent results between elasticsearch gem 7 and 8 # could do for all API calls, but just do for ones where return value is focus for now def mapping client.indices.get_mapping(index: name).to_h end # call to_h for consistent results between elasticsearch gem 7 and 8 def settings client.indices.get_settings(index: name).to_h end def refresh_interval index_settings["refresh_interval"] end def update_settings(settings) client.indices.put_settings index: name, body: settings end def tokens(text, options = {}) client.indices.analyze(body: {text: text}.merge(options), index: name)["tokens"].map { |t| t["token"] } end def total_docs response = client.search( index: name, body: { query: {match_all: {}}, size: 0, track_total_hits: true } ) Results.new(nil, response).total_count end def promote(new_name, update_refresh_interval: false) if update_refresh_interval new_index = Index.new(new_name, @options) settings = options[:settings] || {} refresh_interval = (settings[:index] && settings[:index][:refresh_interval]) || "1s" new_index.update_settings(index: {refresh_interval: refresh_interval}) end old_indices = begin client.indices.get_alias(name: name).keys rescue => e raise e unless Searchkick.not_found_error?(e) {} end actions = old_indices.map { |old_name| {remove: {index: old_name, alias: name}} } + [{add: {index: new_name, alias: name}}] client.indices.update_aliases body: {actions: actions} end alias_method :swap, :promote def retrieve(record) record_data = RecordData.new(self, record).record_data # remove underscore get_options = record_data.to_h { |k, v| [k.to_s.delete_prefix("_").to_sym, v] } client.get(get_options)["_source"] end def all_indices(unaliased: false) indices = begin if client.indices.respond_to?(:get_alias) client.indices.get_alias(index: "#{name}*") else client.indices.get_aliases end rescue => e raise e unless Searchkick.not_found_error?(e) {} end indices = indices.select { |_k, v| v.empty? || v["aliases"].empty? } if unaliased indices.select { |k, _v| k =~ /\A#{Regexp.escape(name)}_\d{14,17}\z/ }.keys end # remove old indices that start w/ index_name def clean_indices indices = all_indices(unaliased: true) indices.each do |index| Index.new(index).delete end indices end def store(record) notify(record, "Store") do queue_index([record]) end end def remove(record) notify(record, "Remove") do queue_delete([record]) end end def update_record(record, method_name) notify(record, "Update") do queue_update([record], method_name) end end def bulk_delete(records) return if records.empty? notify_bulk(records, "Delete") do queue_delete(records) end end def bulk_index(records) return if records.empty? notify_bulk(records, "Import") do queue_index(records) end end alias_method :import, :bulk_index def bulk_update(records, method_name, ignore_missing: nil) return if records.empty? notify_bulk(records, "Update") do queue_update(records, method_name, ignore_missing: ignore_missing) end end def search_id(record) RecordData.new(self, record).search_id end def document_type(record) RecordData.new(self, record).document_type end def similar_record(record, **options) options[:per_page] ||= 10 options[:similar] = [RecordData.new(self, record).record_data] options[:models] ||= [record.class] unless options.key?(:model) Searchkick.search("*", **options) end def reload_synonyms if Searchkick.opensearch? client.transport.perform_request "POST", "_plugins/_refresh_search_analyzers/#{CGI.escape(name)}" else begin client.transport.perform_request("GET", "#{CGI.escape(name)}/_reload_search_analyzers") rescue => e raise Error, "Requires non-OSS version of Elasticsearch" if Searchkick.not_allowed_error?(e) raise e end end end # queue def reindex_queue ReindexQueue.new(name) end # reindex # note: this is designed to be used internally # so it does not check object matches index class def reindex(object, method_name: nil, ignore_missing: nil, full: false, **options) if @options[:job_options] options[:job_options] = (@options[:job_options] || {}).merge(options[:job_options] || {}) end if object.is_a?(Array) # note: purposefully skip full return reindex_records(object, method_name: method_name, ignore_missing: ignore_missing, **options) end if !object.respond_to?(:searchkick_klass) raise Error, "Cannot reindex object" end scoped = Searchkick.relation?(object) # call searchkick_klass for inheritance relation = scoped ? object.all : Searchkick.scope(object.searchkick_klass).all refresh = options.fetch(:refresh, !scoped) options.delete(:refresh) if method_name || (scoped && !full) mode = options.delete(:mode) || :inline scope = options.delete(:scope) job_options = options.delete(:job_options) raise ArgumentError, "unsupported keywords: #{options.keys.map(&:inspect).join(", ")}" if options.any? # import only import_scope(relation, method_name: method_name, mode: mode, scope: scope, ignore_missing: ignore_missing, job_options: job_options) self.refresh if refresh true else async = options.delete(:async) if async if async.is_a?(Hash) && async[:wait] Searchkick.warn "async option is deprecated - use mode: :async, wait: true instead" options[:wait] = true unless options.key?(:wait) else Searchkick.warn "async option is deprecated - use mode: :async instead" end options[:mode] ||= :async end full_reindex(relation, **options) end end def create_index(index_options: nil) index_options ||= self.index_options index = Index.new("#{name}_#{Time.now.strftime('%Y%m%d%H%M%S%L')}", @options) index.create(index_options) index end def import_scope(relation, **options) relation_indexer.reindex(relation, **options) end def batches_left relation_indexer.batches_left end # private def klass_document_type(klass, ignore_type = false) @klass_document_type[[klass, ignore_type]] ||= begin if !ignore_type && klass.searchkick_klass.searchkick_options[:_type] type = klass.searchkick_klass.searchkick_options[:_type] type = type.call if type.respond_to?(:call) type else klass.model_name.to_s.underscore end end end # private def conversions_fields @conversions_fields ||= begin conversions = Array(options[:conversions]) conversions.map(&:to_s) + conversions.map(&:to_sym) end end # private def conversions_v2_fields @conversions_v2_fields ||= Array(options[:conversions_v2]).map(&:to_s) end # private def suggest_fields @suggest_fields ||= Array(options[:suggest]).map(&:to_s) end # private def locations_fields @locations_fields ||= begin locations = Array(options[:locations]) locations.map(&:to_s) + locations.map(&:to_sym) end end # private def uuid index_settings["uuid"] end protected def client Searchkick.client end def queue_index(records) Searchkick.indexer.queue(records.map { |r| RecordData.new(self, r).index_data }) end def queue_delete(records) Searchkick.indexer.queue(records.reject { |r| r.id.blank? }.map { |r| RecordData.new(self, r).delete_data }) end def queue_update(records, method_name, ignore_missing:) items = records.map { |r| RecordData.new(self, r).update_data(method_name) } items.each { |i| i.instance_variable_set(:@ignore_missing, true) } if ignore_missing Searchkick.indexer.queue(items) end def relation_indexer @relation_indexer ||= RelationIndexer.new(self) end def index_settings settings.values.first["settings"]["index"] end def import_before_promotion(index, relation, **import_options) index.import_scope(relation, **import_options) end def reindex_records(object, mode: nil, refresh: false, **options) mode ||= Searchkick.callbacks_value || @options[:callbacks] || :inline mode = :inline if mode == :bulk result = RecordIndexer.new(self).reindex(object, mode: mode, full: false, **options) self.refresh if refresh result end # https://gist.github.com/jarosan/3124884 # https://www.elastic.co/blog/changing-mapping-with-zero-downtime/ def full_reindex(relation, import: true, resume: false, retain: false, mode: nil, refresh_interval: nil, scope: nil, wait: nil, job_options: nil) raise ArgumentError, "wait only available in :async mode" if !wait.nil? && mode != :async raise ArgumentError, "Full reindex does not support :queue mode - use :async mode instead" if mode == :queue if resume index_name = all_indices.sort.last raise Error, "No index to resume" unless index_name index = Index.new(index_name, @options) else clean_indices unless retain index_options = relation.searchkick_index_options index_options.deep_merge!(settings: {index: {refresh_interval: refresh_interval}}) if refresh_interval index = create_index(index_options: index_options) end import_options = { mode: (mode || :inline), full: true, resume: resume, scope: scope, job_options: job_options } uuid = index.uuid # check if alias exists alias_exists = alias_exists? if alias_exists import_before_promotion(index, relation, **import_options) if import # get existing indices to remove unless mode == :async check_uuid(uuid, index.uuid) promote(index.name, update_refresh_interval: !refresh_interval.nil?) clean_indices unless retain end else delete if exists? promote(index.name, update_refresh_interval: !refresh_interval.nil?) # import after promotion index.import_scope(relation, **import_options) if import end if mode == :async if wait puts "Created index: #{index.name}" puts "Jobs queued. Waiting..." loop do sleep 3 status = Searchkick.reindex_status(index.name) break if status[:completed] puts "Batches left: #{status[:batches_left]}" end # already promoted if alias didn't exist if alias_exists puts "Jobs complete. Promoting..." check_uuid(uuid, index.uuid) promote(index.name, update_refresh_interval: !refresh_interval.nil?) end clean_indices unless retain puts "SUCCESS!" end {index_name: index.name} else index.refresh true end rescue => e if Searchkick.transport_error?(e) && (e.message.include?("No handler for type [text]") || e.message.include?("class java.util.ArrayList cannot be cast to class java.util.Map")) raise UnsupportedVersionError end raise e end # safety check # still a chance for race condition since its called before promotion # ideal is for user to disable automatic index creation # https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-index_.html#index-creation def check_uuid(old_uuid, new_uuid) if old_uuid != new_uuid raise Error, "Safety check failed - only run one Model.reindex per model at a time" end end def notify(record, name) if Searchkick.callbacks_value == :bulk yield else name = "#{record.class.searchkick_klass.name} #{name}" if record && record.class.searchkick_klass event = { name: name, id: search_id(record) } ActiveSupport::Notifications.instrument("request.searchkick", event) do yield end end end def notify_bulk(records, name) if Searchkick.callbacks_value == :bulk yield else event = { name: "#{records.first.class.searchkick_klass.name} #{name}", count: records.size } ActiveSupport::Notifications.instrument("request.searchkick", event) do yield end end end end end ================================================ FILE: lib/searchkick/index_cache.rb ================================================ module Searchkick class IndexCache def initialize(max_size: 20) @data = {} @mutex = Mutex.new @max_size = max_size end # probably a better pattern for this # but keep it simple def fetch(name) # thread-safe in MRI without mutex # due to how context switching works @mutex.synchronize do if @data.key?(name) @data[name] else @data.clear if @data.size >= @max_size @data[name] = yield end end end def clear @mutex.synchronize do @data.clear end end end end ================================================ FILE: lib/searchkick/index_options.rb ================================================ module Searchkick class IndexOptions attr_reader :options def initialize(index) @options = index.options end def index_options # mortal symbols are garbage collected in Ruby 2.2+ custom_settings = (options[:settings] || {}).deep_symbolize_keys custom_mappings = (options[:mappings] || {}).deep_symbolize_keys if options[:mappings] && !options[:merge_mappings] settings = custom_settings mappings = custom_mappings else settings = generate_settings.deep_symbolize_keys.deep_merge(custom_settings) mappings = generate_mappings.deep_symbolize_keys.deep_merge(custom_mappings) end set_deep_paging(settings) if options[:deep_paging] || options[:max_result_window] { settings: settings, mappings: mappings } end def generate_settings language = options[:language] language = language.call if language.respond_to?(:call) settings = { analysis: { analyzer: { searchkick_keyword: { type: "custom", tokenizer: "keyword", filter: ["lowercase"] + (options[:stem_conversions] ? ["searchkick_stemmer"] : []) }, default_analyzer => { type: "custom", # character filters -> tokenizer -> token filters # https://www.elastic.co/guide/en/elasticsearch/guide/current/analysis-intro.html char_filter: ["ampersand"], tokenizer: "standard", # synonym should come last, after stemming and shingle # shingle must come before searchkick_stemmer filter: ["lowercase", "asciifolding", "searchkick_index_shingle", "searchkick_stemmer"] }, searchkick_search: { type: "custom", char_filter: ["ampersand"], tokenizer: "standard", filter: ["lowercase", "asciifolding", "searchkick_search_shingle", "searchkick_stemmer"] }, searchkick_search2: { type: "custom", char_filter: ["ampersand"], tokenizer: "standard", filter: ["lowercase", "asciifolding", "searchkick_stemmer"] }, # https://github.com/leschenko/elasticsearch_autocomplete/blob/master/lib/elasticsearch_autocomplete/analyzers.rb searchkick_autocomplete_search: { type: "custom", tokenizer: "keyword", filter: ["lowercase", "asciifolding"] }, searchkick_word_search: { type: "custom", tokenizer: "standard", filter: ["lowercase", "asciifolding"] }, searchkick_suggest_index: { type: "custom", tokenizer: "standard", filter: ["lowercase", "asciifolding", "searchkick_suggest_shingle"] }, searchkick_text_start_index: { type: "custom", tokenizer: "keyword", filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"] }, searchkick_text_middle_index: { type: "custom", tokenizer: "keyword", filter: ["lowercase", "asciifolding", "searchkick_ngram"] }, searchkick_text_end_index: { type: "custom", tokenizer: "keyword", filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"] }, searchkick_word_start_index: { type: "custom", tokenizer: "standard", filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"] }, searchkick_word_middle_index: { type: "custom", tokenizer: "standard", filter: ["lowercase", "asciifolding", "searchkick_ngram"] }, searchkick_word_end_index: { type: "custom", tokenizer: "standard", filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"] } }, filter: { searchkick_index_shingle: { type: "shingle", token_separator: "" }, # lucky find https://web.archiveorange.com/archive/v/AAfXfQ17f57FcRINsof7 searchkick_search_shingle: { type: "shingle", token_separator: "", output_unigrams: false, output_unigrams_if_no_shingles: true }, searchkick_suggest_shingle: { type: "shingle", max_shingle_size: 5 }, searchkick_edge_ngram: { type: "edge_ngram", min_gram: 1, max_gram: 50 }, searchkick_ngram: { type: "ngram", min_gram: 1, max_gram: 50 }, searchkick_stemmer: { # use stemmer if language is lowercase, snowball otherwise type: language == language.to_s.downcase ? "stemmer" : "snowball", language: language || "English" } }, char_filter: { # https://www.elastic.co/guide/en/elasticsearch/guide/current/custom-analyzers.html # &_to_and ampersand: { type: "mapping", mappings: ["&=> and "] } } } } raise ArgumentError, "Can't pass both language and stemmer" if options[:stemmer] && language update_language(settings, language) update_stemming(settings) if Searchkick.env == "test" settings[:number_of_shards] = 1 settings[:number_of_replicas] = 0 end if options[:similarity] settings[:similarity] = {default: {type: options[:similarity]}} end settings[:index] = { max_ngram_diff: 49, max_shingle_diff: 4 } if options[:knn] unless Searchkick.knn_support? if Searchkick.opensearch? raise Error, "knn requires OpenSearch 2.4+" else raise Error, "knn requires Elasticsearch 8.6+" end end if Searchkick.opensearch? && options[:knn].any? { |_, v| !v[:distance].nil? } # only enable if doing approximate search settings[:index][:knn] = true end end add_synonyms(settings) add_search_synonyms(settings) if options[:special_characters] == false settings[:analysis][:analyzer].each_value do |analyzer_settings| analyzer_settings[:filter].reject! { |f| f == "asciifolding" } end end if options[:case_sensitive] settings[:analysis][:analyzer].each do |_, analyzer| analyzer[:filter].delete("lowercase") end end settings end def update_language(settings, language) case language when "chinese" settings[:analysis][:analyzer].merge!( default_analyzer => { type: "ik_smart" }, searchkick_search: { type: "ik_smart" }, searchkick_search2: { type: "ik_max_word" } ) when "chinese2", "smartcn" settings[:analysis][:analyzer].merge!( default_analyzer => { type: "smartcn" }, searchkick_search: { type: "smartcn" }, searchkick_search2: { type: "smartcn" } ) when "japanese", "japanese2" analyzer = { type: "custom", tokenizer: "kuromoji_tokenizer", filter: [ "kuromoji_baseform", "kuromoji_part_of_speech", "cjk_width", "ja_stop", "searchkick_stemmer", "lowercase" ] } settings[:analysis][:analyzer].merge!( default_analyzer => analyzer.deep_dup, searchkick_search: analyzer.deep_dup, searchkick_search2: analyzer.deep_dup ) settings[:analysis][:filter][:searchkick_stemmer] = { type: "kuromoji_stemmer" } when "korean" settings[:analysis][:analyzer].merge!( default_analyzer => { type: "openkoreantext-analyzer" }, searchkick_search: { type: "openkoreantext-analyzer" }, searchkick_search2: { type: "openkoreantext-analyzer" } ) when "korean2" settings[:analysis][:analyzer].merge!( default_analyzer => { type: "nori" }, searchkick_search: { type: "nori" }, searchkick_search2: { type: "nori" } ) when "vietnamese" settings[:analysis][:analyzer].merge!( default_analyzer => { type: "vi_analyzer" }, searchkick_search: { type: "vi_analyzer" }, searchkick_search2: { type: "vi_analyzer" } ) when "polish", "ukrainian" settings[:analysis][:analyzer].merge!( default_analyzer => { type: language }, searchkick_search: { type: language }, searchkick_search2: { type: language } ) end end def update_stemming(settings) if options[:stemmer] stemmer = options[:stemmer] # could also support snowball and stemmer case stemmer[:type] when "hunspell" # supports all token filter options settings[:analysis][:filter][:searchkick_stemmer] = stemmer else raise ArgumentError, "Unknown stemmer: #{stemmer[:type]}" end end stem = options[:stem] # language analyzer used stem = false if settings[:analysis][:analyzer][default_analyzer][:type] != "custom" if stem == false settings[:analysis][:filter].delete(:searchkick_stemmer) settings[:analysis][:analyzer].each do |_, analyzer| analyzer[:filter].delete("searchkick_stemmer") if analyzer[:filter] end end if options[:stemmer_override] stemmer_override = { type: "stemmer_override" } if options[:stemmer_override].is_a?(String) stemmer_override[:rules_path] = options[:stemmer_override] else stemmer_override[:rules] = options[:stemmer_override] end settings[:analysis][:filter][:searchkick_stemmer_override] = stemmer_override settings[:analysis][:analyzer].each do |_, analyzer| stemmer_index = analyzer[:filter].index("searchkick_stemmer") if analyzer[:filter] analyzer[:filter].insert(stemmer_index, "searchkick_stemmer_override") if stemmer_index end end if options[:stem_exclusion] settings[:analysis][:filter][:searchkick_stem_exclusion] = { type: "keyword_marker", keywords: options[:stem_exclusion] } settings[:analysis][:analyzer].each do |_, analyzer| stemmer_index = analyzer[:filter].index("searchkick_stemmer") if analyzer[:filter] analyzer[:filter].insert(stemmer_index, "searchkick_stem_exclusion") if stemmer_index end end end def generate_mappings mapping = {} keyword_mapping = {type: "keyword"} keyword_mapping[:ignore_above] = options[:ignore_above] || 30000 # conversions Array(options[:conversions]).each do |conversions_field| mapping[conversions_field] = { type: "nested", properties: { query: {type: default_type, analyzer: "searchkick_keyword"}, count: {type: "integer"} } } end Array(options[:conversions_v2]).each do |conversions_field| mapping[conversions_field] = { type: "rank_features" } end if (Array(options[:conversions_v2]).map(&:to_s) & Array(options[:conversions]).map(&:to_s)).any? raise ArgumentError, "Must have separate conversions fields" end mapping_options = [:suggest, :word, :text_start, :text_middle, :text_end, :word_start, :word_middle, :word_end, :highlight, :searchable, :filterable] .to_h { |type| [type, (options[type] || []).map(&:to_s)] } word = options[:word] != false && (!options[:match] || options[:match] == :word) mapping_options[:searchable].delete("_all") analyzed_field_options = {type: default_type, index: true, analyzer: default_analyzer.to_s} mapping_options.values.flatten.uniq.each do |field| fields = {} if options.key?(:filterable) && !mapping_options[:filterable].include?(field) fields[field] = {type: default_type, index: false} else fields[field] = keyword_mapping end if !options[:searchable] || mapping_options[:searchable].include?(field) if word fields[:analyzed] = analyzed_field_options if mapping_options[:highlight].include?(field) fields[:analyzed][:term_vector] = "with_positions_offsets" end end mapping_options.except(:highlight, :searchable, :filterable, :word).each do |type, f| if options[:match] == type || f.include?(field) fields[type] = {type: default_type, index: true, analyzer: "searchkick_#{type}_index"} end end end mapping[field] = fields[field].merge(fields: fields.except(field)) end (options[:locations] || []).map(&:to_s).each do |field| mapping[field] = { type: "geo_point" } end options[:geo_shape] = options[:geo_shape].product([{}]).to_h if options[:geo_shape].is_a?(Array) (options[:geo_shape] || {}).each do |field, shape_options| mapping[field] = shape_options.merge(type: "geo_shape") end (options[:knn] || []).each do |field, knn_options| distance = knn_options[:distance] quantization = knn_options[:quantization] if Searchkick.opensearch? if distance.nil? # avoid server crash if method not specified raise ArgumentError, "Must specify a distance for OpenSearch" end vector_options = { type: "knn_vector", dimension: knn_options[:dimensions] } if !distance.nil? space_type = case distance when "cosine" "cosinesimil" when "euclidean" "l2" when "inner_product" "innerproduct" else raise ArgumentError, "Unknown distance: #{distance}" end if !quantization.nil? raise ArgumentError, "Quantization not supported yet for OpenSearch" end vector_options[:method] = { name: "hnsw", space_type: space_type, engine: "lucene", parameters: knn_options.slice(:m, :ef_construction) } end mapping[field.to_s] = vector_options else vector_options = { type: "dense_vector", dims: knn_options[:dimensions], index: !distance.nil? } if !distance.nil? vector_options[:similarity] = case distance when "cosine" "cosine" when "euclidean" "l2_norm" when "inner_product" "max_inner_product" else raise ArgumentError, "Unknown distance: #{distance}" end type = case quantization when "int8", "int4", "bbq" "#{quantization}_hnsw" when nil "hnsw" else raise ArgumentError, "Unknown quantization: #{quantization}" end vector_index_options = knn_options.slice(:m, :ef_construction) vector_options[:index_options] = {type: type}.merge(vector_index_options) end mapping[field.to_s] = vector_options end end if options[:inheritance] mapping[:type] = keyword_mapping end routing = {} if options[:routing] routing = {required: true} unless options[:routing] == true routing[:path] = options[:routing].to_s end end dynamic_fields = { # analyzed field must be the default field for include_in_all # https://www.elastic.co/guide/reference/mapping/multi-field-type/ # however, we can include the not_analyzed field in _all # and the _all index analyzer will take care of it "{name}" => keyword_mapping } if options.key?(:filterable) dynamic_fields["{name}"] = {type: default_type, index: false} end unless options[:searchable] if options[:match] && options[:match] != :word dynamic_fields[options[:match]] = {type: default_type, index: true, analyzer: "searchkick_#{options[:match]}_index"} end if word dynamic_fields[:analyzed] = analyzed_field_options end end # https://www.elastic.co/guide/reference/mapping/multi-field-type/ multi_field = dynamic_fields["{name}"].merge(fields: dynamic_fields.except("{name}")) mappings = { properties: mapping, _routing: routing, # https://gist.github.com/kimchy/2898285 dynamic_templates: [ { string_template: { match: "*", match_mapping_type: "string", mapping: multi_field } } ] } mappings end def add_synonyms(settings) synonyms = options[:synonyms] || [] synonyms = synonyms.call if synonyms.respond_to?(:call) if synonyms.any? settings[:analysis][:filter][:searchkick_synonym] = { type: "synonym", # only remove a single space from synonyms so three-word synonyms will fail noisily instead of silently synonyms: synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.map { |s2| s2.sub(/\s+/, "") }.join(",") : s }.map(&:downcase) } # choosing a place for the synonym filter when stemming is not easy # https://groups.google.com/forum/#!topic/elasticsearch/p7qcQlgHdB8 # TODO use a snowball stemmer on synonyms when creating the token filter # https://discuss.elastic.co/t/synonym-multi-words-search/10964 # I find the following approach effective if you are doing multi-word synonyms (synonym phrases): # - Only apply the synonym expansion at index time # - Don't have the synonym filter applied search # - Use directional synonyms where appropriate. You want to make sure that you're not injecting terms that are too general. settings[:analysis][:analyzer][default_analyzer][:filter].insert(2, "searchkick_synonym") %w(word_start word_middle word_end).each do |type| settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_synonym") end end end def add_search_synonyms(settings) search_synonyms = options[:search_synonyms] || [] search_synonyms = search_synonyms.call if search_synonyms.respond_to?(:call) if search_synonyms.is_a?(String) || search_synonyms.any? if search_synonyms.is_a?(String) synonym_graph = { type: "synonym_graph", synonyms_path: search_synonyms, updateable: true } else synonym_graph = { type: "synonym_graph", # TODO confirm this is correct synonyms: search_synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.join(",") : s }.map(&:downcase) } end settings[:analysis][:filter][:searchkick_synonym_graph] = synonym_graph if ["japanese", "japanese2"].include?(options[:language]) [:searchkick_search, :searchkick_search2].each do |analyzer| settings[:analysis][:analyzer][analyzer][:filter].insert(4, "searchkick_synonym_graph") end else [:searchkick_search2, :searchkick_word_search].each do |analyzer| unless settings[:analysis][:analyzer][analyzer].key?(:filter) raise Error, "Search synonyms are not supported yet for language" end settings[:analysis][:analyzer][analyzer][:filter].insert(2, "searchkick_synonym_graph") end end end end def set_deep_paging(settings) if !settings.dig(:index, :max_result_window) && !settings[:"index.max_result_window"] settings[:index] ||= {} settings[:index][:max_result_window] = options[:max_result_window] || 1_000_000_000 end end def index_type @index_type ||= begin index_type = options[:_type] index_type = index_type.call if index_type.respond_to?(:call) index_type end end def default_type "text" end def default_analyzer :searchkick_index end end end ================================================ FILE: lib/searchkick/indexer.rb ================================================ # thread-local (technically fiber-local) indexer # used to aggregate bulk callbacks across models module Searchkick class Indexer attr_reader :queued_items def initialize @queued_items = [] end def queue(items) @queued_items.concat(items) perform unless Searchkick.callbacks_value == :bulk end def perform items = @queued_items @queued_items = [] return if items.empty? response = Searchkick.client.bulk(body: items) if response["errors"] # note: delete does not set error when item not found first_with_error = response["items"].map do |item| (item["index"] || item["delete"] || item["update"]) end.find.with_index { |item, i| item["error"] && !ignore_missing?(items[i], item["error"]) } if first_with_error raise ImportError, "#{first_with_error["error"]} on item with id '#{first_with_error["_id"]}'" end end # maybe return response in future nil end private def ignore_missing?(item, error) error["type"] == "document_missing_exception" && item.instance_variable_defined?(:@ignore_missing) end end end ================================================ FILE: lib/searchkick/log_subscriber.rb ================================================ # based on https://gist.github.com/mnutt/566725 module Searchkick class LogSubscriber < ActiveSupport::LogSubscriber def self.runtime=(value) Thread.current[:searchkick_runtime] = value end def self.runtime Thread.current[:searchkick_runtime] ||= 0 end def self.reset_runtime rt = runtime self.runtime = 0 rt end def search(event) self.class.runtime += event.duration return unless logger.debug? payload = event.payload name = "#{payload[:name]} (#{event.duration.round(1)}ms)" index = payload[:query][:index].is_a?(Array) ? payload[:query][:index].join(",") : payload[:query][:index] type = payload[:query][:type] request_params = payload[:query].except(:index, :type, :body, :opaque_id) params = [] request_params.each do |k, v| params << "#{CGI.escape(k.to_s)}=#{CGI.escape(v.to_s)}" end debug " #{color(name, YELLOW, bold: true)} #{index}#{type ? "/#{type.join(',')}" : ''}/_search#{params.any? ? '?' + params.join('&') : nil} #{payload[:query][:body].to_json}" end def request(event) self.class.runtime += event.duration return unless logger.debug? payload = event.payload name = "#{payload[:name]} (#{event.duration.round(1)}ms)" debug " #{color(name, YELLOW, bold: true)} #{payload.except(:name).to_json}" end def multi_search(event) self.class.runtime += event.duration return unless logger.debug? payload = event.payload name = "#{payload[:name]} (#{event.duration.round(1)}ms)" debug " #{color(name, YELLOW, bold: true)} _msearch #{payload[:body]}" end end end ================================================ FILE: lib/searchkick/middleware.rb ================================================ require "faraday" module Searchkick class Middleware < Faraday::Middleware def call(env) path = env[:url].path.to_s if path.end_with?("/_search") env[:request][:timeout] = Searchkick.search_timeout elsif path.end_with?("/_msearch") # assume no concurrent searches for timeout for now searches = env[:request_body].count("\n") / 2 # do not allow timeout to exceed Searchkick.timeout timeout = [Searchkick.search_timeout * searches, Searchkick.timeout].min env[:request][:timeout] = timeout end @app.call(env) end end end ================================================ FILE: lib/searchkick/model.rb ================================================ module Searchkick module Model def searchkick(**options) options = Searchkick.model_options.deep_merge(options) if options[:conversions] Searchkick.warn("The `conversions` option is deprecated in favor of `conversions_v2`, which provides much better search performance. Upgrade to `conversions_v2` or rename `conversions` to `conversions_v1`") end if options.key?(:conversions_v1) options[:conversions] = options.delete(:conversions_v1) end unknown_keywords = options.keys - [:_all, :_type, :batch_size, :callbacks, :callback_options, :case_sensitive, :conversions, :conversions_v2, :deep_paging, :default_fields, :filterable, :geo_shape, :highlight, :ignore_above, :index_name, :index_prefix, :inheritance, :job_options, :knn, :language, :locations, :mappings, :match, :max_result_window, :merge_mappings, :routing, :searchable, :search_synonyms, :settings, :similarity, :special_characters, :stem, :stemmer, :stem_conversions, :stem_exclusion, :stemmer_override, :suggest, :synonyms, :text_end, :text_middle, :text_start, :unscope, :word, :word_end, :word_middle, :word_start] raise ArgumentError, "unknown keywords: #{unknown_keywords.join(", ")}" if unknown_keywords.any? raise "Only call searchkick once per model" if respond_to?(:searchkick_index) Searchkick.models << self options[:_type] ||= -> { searchkick_index.klass_document_type(self, true) } options[:class_name] = model_name.name callbacks = options.key?(:callbacks) ? options[:callbacks] : :inline unless [:inline, true, false, :async, :queue].include?(callbacks) raise ArgumentError, "Invalid value for callbacks" end callback_options = (options[:callback_options] || {}).dup callback_options[:if] = [-> { Searchkick.callbacks?(default: callbacks) }, callback_options[:if]].compact.flatten(1) base = self mod = Module.new include(mod) mod.module_eval do def reindex(method_name = nil, mode: nil, refresh: false, ignore_missing: nil, job_options: nil) self.class.searchkick_index.reindex([self], method_name: method_name, mode: mode, refresh: refresh, ignore_missing: ignore_missing, job_options: job_options, single: true) end unless base.method_defined?(:reindex) def similar(**options) self.class.searchkick_index.similar_record(self, **options) end unless base.method_defined?(:similar) def search_data data = respond_to?(:to_hash) ? to_hash : serializable_hash data.delete("id") data.delete("_id") data.delete("_type") data end unless base.method_defined?(:search_data) def should_index? true end unless base.method_defined?(:should_index?) end class_eval do cattr_reader :searchkick_options, :searchkick_klass, instance_reader: false class_variable_set :@@searchkick_options, options.dup class_variable_set :@@searchkick_klass, self class_variable_set :@@searchkick_index_cache, Searchkick::IndexCache.new class << self def searchkick_search(term = "*", **options, &block) if Searchkick.relation?(self) raise Searchkick::Error, "search must be called on model, not relation" end Searchkick.search(term, model: self, **options, &block) end alias_method Searchkick.search_method_name, :searchkick_search if Searchkick.search_method_name def searchkick_index(name: nil) index_name = name || searchkick_klass.searchkick_index_name index_name = index_name.call if index_name.respond_to?(:call) index_cache = class_variable_get(:@@searchkick_index_cache) index_cache.fetch(index_name) { Searchkick::Index.new(index_name, searchkick_options) } end alias_method :search_index, :searchkick_index unless method_defined?(:search_index) def searchkick_reindex(method_name = nil, **options) searchkick_index.reindex(self, method_name: method_name, **options) end alias_method :reindex, :searchkick_reindex unless method_defined?(:reindex) def searchkick_index_options searchkick_index.index_options end def searchkick_index_name @searchkick_index_name ||= begin options = class_variable_get(:@@searchkick_options) if options[:index_name] options[:index_name] elsif options[:index_prefix].respond_to?(:call) -> { [options[:index_prefix].call, model_name.plural, Searchkick.env, Searchkick.index_suffix].compact.join("_") } else [options.key?(:index_prefix) ? options[:index_prefix] : Searchkick.index_prefix, model_name.plural, Searchkick.env, Searchkick.index_suffix].compact.join("_") end end end end # always add callbacks, even when callbacks is false # so Model.callbacks block can be used if respond_to?(:after_commit) after_commit :reindex, **callback_options elsif respond_to?(:after_save) after_save :reindex, **callback_options after_destroy :reindex, **callback_options end end end end end ================================================ FILE: lib/searchkick/multi_search.rb ================================================ module Searchkick class MultiSearch attr_reader :queries def initialize(queries, opaque_id: nil) @queries = queries @opaque_id = opaque_id end def perform if queries.any? perform_search(queries) end end private def perform_search(search_queries, perform_retry: true) params = { body: search_queries.flat_map { |q| [q.params.except(:body), q.body] } } params[:opaque_id] = @opaque_id if @opaque_id responses = client.msearch(params)["responses"] retry_queries = [] search_queries.each_with_index do |query, i| if perform_retry && query.retry_misspellings?(responses[i]) query.send(:prepare) # okay, since we don't want to expose this method outside Searchkick retry_queries << query else query.handle_response(responses[i]) end end if retry_queries.any? perform_search(retry_queries, perform_retry: false) end search_queries end def client Searchkick.client end end end ================================================ FILE: lib/searchkick/process_batch_job.rb ================================================ module Searchkick class ProcessBatchJob < Searchkick.parent_job.constantize queue_as { Searchkick.queue_name } def perform(class_name:, record_ids:, index_name: nil) model = Searchkick.load_model(class_name) index = model.searchkick_index(name: index_name) items = record_ids.map do |r| parts = r.split(/(? e handle_error(e) end handle_response(response) end end def handle_response(response) opts = { page: @page, per_page: @per_page, padding: @padding, load: @load, includes: options[:includes], model_includes: options[:model_includes], json: !@json.nil?, match_suffix: @match_suffix, highlight: options[:highlight], highlighted_fields: @highlighted_fields || [], misspellings: @misspellings, term: term, scope_results: options[:scope_results], total_entries: options[:total_entries], index_mapping: @index_mapping, suggest: options[:suggest], scroll: options[:scroll], opaque_id: options[:opaque_id] } if options[:debug] server = Searchkick.opensearch? ? "OpenSearch" : "Elasticsearch" puts "Searchkick #{Searchkick::VERSION}" puts "#{server} #{Searchkick.server_version}" puts puts "Model Options" pp searchkick_options puts puts "Search Options" pp options puts if searchkick_index puts "Record Data" begin pp klass.limit(3).map { |r| RecordData.new(searchkick_index, r).index_data } rescue => e puts "#{e.class.name}: #{e.message}" end puts puts "Mapping" puts JSON.pretty_generate(searchkick_index.mapping) puts puts "Settings" puts JSON.pretty_generate(searchkick_index.settings) puts end puts "Query" puts JSON.pretty_generate(params[:body]) puts puts "Results" puts JSON.pretty_generate(response.to_h) end # set execute for multi search @execute = Results.new(searchkick_klass, response, opts) end def retry_misspellings?(response) @misspellings_below && response["error"].nil? && Results.new(searchkick_klass, response).total_count < @misspellings_below end private def handle_error(e) status_code = e.message[1..3].to_i if status_code == 404 if e.message.include?("No search context found for id") raise MissingIndexError, "No search context found for id" else raise MissingIndexError, "Index missing - run #{reindex_command}" end elsif status_code == 500 && ( e.message.include?("IllegalArgumentException[minimumSimilarity >= 1]") || e.message.include?("No query registered for [multi_match]") || e.message.include?("[match] query does not support [cutoff_frequency]") || e.message.include?("No query registered for [function_score]") ) raise UnsupportedVersionError elsif status_code == 400 if ( e.message.include?("bool query does not support [filter]") || e.message.include?("[bool] filter does not support [filter]") ) raise UnsupportedVersionError elsif e.message.match?(/analyzer \[searchkick_.+\] not found/) raise InvalidQueryError, "Bad mapping - run #{reindex_command}" else raise InvalidQueryError, e.message end else raise e end end def reindex_command searchkick_klass ? "#{searchkick_klass.name}.reindex" : "reindex" end def execute_search name = searchkick_klass ? "#{searchkick_klass.name} Search" : "Search" event = { name: name, query: params } ActiveSupport::Notifications.instrument("search.searchkick", event) do Searchkick.client.search(params) end end def prepare boost_fields, fields = set_fields operator = options[:operator] || "and" # pagination page = [options[:page].to_i, 1].max # maybe use index.max_result_window in the future default_limit = searchkick_options[:deep_paging] ? 1_000_000_000 : 10_000 per_page = (options[:limit] || options[:per_page] || default_limit).to_i padding = [options[:padding].to_i, 0].max offset = (options[:offset] || (page - 1) * per_page + padding).to_i scroll = options[:scroll] opaque_id = options[:opaque_id] max_result_window = searchkick_options[:max_result_window] original_per_page = per_page if max_result_window offset = max_result_window if offset > max_result_window per_page = max_result_window - offset if offset + per_page > max_result_window end # model and eager loading load = options[:load].nil? ? true : options[:load] all = term == "*" @json = options[:body] if @json ignored_options = options.keys & [:aggs, :boost, :boost_by, :boost_by_distance, :boost_by_recency, :boost_where, :conversions, :conversions_term, :exclude, :explain, :fields, :highlight, :indices_boost, :match, :misspellings, :operator, :order, :profile, :select, :smart_aggs, :suggest, :where] raise ArgumentError, "Options incompatible with body option: #{ignored_options.join(", ")}" if ignored_options.any? payload = @json else must_not = [] should = [] if options[:similar] like = options[:similar] == true ? term : options[:similar] query = { more_like_this: { like: like, min_doc_freq: 1, min_term_freq: 1, analyzer: "searchkick_search2" } } if fields.all? { |f| f.start_with?("*.") } raise ArgumentError, "Must specify fields to search" end if fields != ["_all"] query[:more_like_this][:fields] = fields end elsif all && !options[:exclude] query = { match_all: {} } else queries = [] misspellings = if options.key?(:misspellings) options[:misspellings] else true end if misspellings.is_a?(Hash) && misspellings[:below] && !@misspellings_below @misspellings_below = misspellings[:below].to_i misspellings = false end if misspellings != false edit_distance = (misspellings.is_a?(Hash) && (misspellings[:edit_distance] || misspellings[:distance])) || 1 transpositions = if misspellings.is_a?(Hash) && misspellings.key?(:transpositions) {fuzzy_transpositions: misspellings[:transpositions]} else {fuzzy_transpositions: true} end prefix_length = (misspellings.is_a?(Hash) && misspellings[:prefix_length]) || 0 default_max_expansions = @misspellings_below ? 20 : 3 max_expansions = (misspellings.is_a?(Hash) && misspellings[:max_expansions]) || default_max_expansions misspellings_fields = misspellings.is_a?(Hash) && misspellings.key?(:fields) && misspellings[:fields].map(&:to_s) if misspellings_fields missing_fields = misspellings_fields - fields.map { |f| base_field(f) } if missing_fields.any? raise ArgumentError, "All fields in per-field misspellings must also be specified in fields option" end end @misspellings = true else @misspellings = false end fields.each do |field| queries_to_add = [] qs = [] factor = boost_fields[field] || 1 shared_options = { query: term, boost: 10 * factor } match_type = if field.end_with?(".phrase") field = if field == "_all.phrase" "_all" else field.sub(/\.phrase\z/, ".analyzed") end :match_phrase else :match end shared_options[:operator] = operator if match_type == :match exclude_analyzer = nil exclude_field = field field_misspellings = misspellings && (!misspellings_fields || misspellings_fields.include?(base_field(field))) if field == "_all" || field.end_with?(".analyzed") qs << shared_options.merge(analyzer: "searchkick_search") # searchkick_search and searchkick_search2 are the same for some languages unless %w(japanese japanese2 korean polish ukrainian vietnamese).include?(searchkick_options[:language]) qs << shared_options.merge(analyzer: "searchkick_search2") end exclude_analyzer = "searchkick_search2" elsif field.end_with?(".exact") f = field.split(".")[0..-2].join(".") queries_to_add << {match: {f => shared_options.merge(analyzer: "keyword")}} exclude_field = f exclude_analyzer = "keyword" else analyzer = field.match?(/\.word_(start|middle|end)\z/) ? "searchkick_word_search" : "searchkick_autocomplete_search" qs << shared_options.merge(analyzer: analyzer) exclude_analyzer = analyzer end if field_misspellings != false && match_type == :match qs.concat(qs.map { |q| q.except(:cutoff_frequency).merge(fuzziness: edit_distance, prefix_length: prefix_length, max_expansions: max_expansions, boost: factor).merge(transpositions) }) end if field.start_with?("*.") q2 = qs.map { |q| {multi_match: q.merge(fields: [field], type: match_type == :match_phrase ? "phrase" : "best_fields")} } else q2 = qs.map { |q| {match_type => {field => q}} } end # boost exact matches more if field =~ /\.word_(start|middle|end)\z/ && searchkick_options[:word] != false queries_to_add << { bool: { must: { bool: { should: q2 } }, should: {match_type => {field.sub(/\.word_(start|middle|end)\z/, ".analyzed") => qs.first}} } } else queries_to_add.concat(q2) end queries << queries_to_add if options[:exclude] must_not.concat(set_exclude(exclude_field, exclude_analyzer)) end end # all + exclude option if all query = { match_all: {} } should = [] else # higher score for matching more fields payload = { bool: { should: queries.map { |qs| {dis_max: {queries: qs}} } } } should.concat(set_conversions) should.concat(set_conversions_v2) end query = payload end payload = {} # type when inheritance where = ensure_permitted(options[:where] || {}).dup if searchkick_options[:inheritance] && (options[:type] || (klass != searchkick_klass && searchkick_index)) where[:type] = [options[:type] || klass].flatten.map { |v| searchkick_index.klass_document_type(v, true) } end models = Array(options[:models]) if models.any? { |m| m != m.searchkick_klass } index_type_or = models.map do |m| v = {_index: m.searchkick_index.name} v[:type] = m.searchkick_index.klass_document_type(m, true) if m != m.searchkick_klass v end where[:or] = Array(where[:or]) + [index_type_or] end # start everything as efficient filters # move to post_filters as aggs demand filters = where_filters(where) post_filters = [] # aggregations set_aggregations(payload, filters, post_filters) if options[:aggs] # post filters set_post_filters(payload, post_filters) if post_filters.any? custom_filters = [] multiply_filters = [] set_boost_by(multiply_filters, custom_filters) set_boost_where(custom_filters) set_boost_by_distance(custom_filters) if options[:boost_by_distance] set_boost_by_recency(custom_filters) if options[:boost_by_recency] payload[:query] = build_query(query, filters, should, must_not, custom_filters, multiply_filters) payload[:explain] = options[:explain] if options[:explain] payload[:profile] = options[:profile] if options[:profile] # order set_order(payload) if options[:order] # indices_boost set_boost_by_indices(payload) # suggestions set_suggestions(payload, options[:suggest]) if options[:suggest] # highlight set_highlights(payload, fields) if options[:highlight] # timeout shortly after client times out payload[:timeout] ||= "#{((Searchkick.search_timeout + 1) * 1000).round}ms" # An empty array will cause only the _id and _type for each hit to be returned # https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-source-filtering.html if options[:select] if options[:select] == [] # intuitively [] makes sense to return no fields, but ES by default returns all fields payload[:_source] = false else payload[:_source] = options[:select] end elsif load payload[:_source] = false end end # knn set_knn(payload, options[:knn], per_page, offset) if options[:knn] # pagination pagination_options = options[:page] || options[:limit] || options[:per_page] || options[:offset] || options[:padding] if !options[:body] || pagination_options payload[:size] = per_page payload[:from] = offset if offset > 0 end # type if !searchkick_options[:inheritance] && (options[:type] || (klass != searchkick_klass && searchkick_index)) @type = [options[:type] || klass].flatten.map { |v| searchkick_index.klass_document_type(v) } end # routing @routing = options[:routing] if options[:routing] if track_total_hits? payload[:track_total_hits] = true end # merge more body options payload = payload.deep_merge(options[:body_options]) if options[:body_options] # run block options[:block].call(payload) if options[:block] # scroll optimization when iterating over all docs # https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-scroll.html if options[:scroll] && payload[:query] == {match_all: {}} payload[:sort] ||= ["_doc"] end @body = payload @page = page @per_page = original_per_page @padding = padding @load = load @scroll = scroll @opaque_id = opaque_id end def set_fields boost_fields = {} fields = options[:fields] || searchkick_options[:default_fields] || searchkick_options[:searchable] all = searchkick_options.key?(:_all) ? searchkick_options[:_all] : false default_match = options[:match] || searchkick_options[:match] || :word fields = if fields fields.map do |value| k, v = value.is_a?(Hash) ? value.to_a.first : [value, default_match] k2, boost = k.to_s.split("^", 2) field = "#{k2}.#{v == :word ? 'analyzed' : v}" boost_fields[field] = boost.to_f if boost field end elsif all && default_match == :word ["_all"] elsif all && default_match == :phrase ["_all.phrase"] elsif term != "*" && default_match == :exact raise ArgumentError, "Must specify fields to search" else [default_match == :word ? "*.analyzed" : "*.#{default_match}"] end [boost_fields, fields] end def build_query(query, filters, should, must_not, custom_filters, multiply_filters) if filters.any? || must_not.any? || should.any? bool = {} bool[:must] = query if query bool[:filter] = filters if filters.any? # where bool[:must_not] = must_not if must_not.any? # exclude bool[:should] = should if should.any? # conversions query = {bool: bool} end if custom_filters.any? query = { function_score: { functions: custom_filters, query: query, score_mode: "sum" } } end if multiply_filters.any? query = { function_score: { functions: multiply_filters, query: query, score_mode: "multiply" } } end query end def set_conversions conversions_fields = Array(options[:conversions] || searchkick_options[:conversions]).map(&:to_s) if conversions_fields.present? && options[:conversions] != false conversions_fields.map do |conversions_field| { nested: { path: conversions_field, score_mode: "sum", query: { function_score: { boost_mode: "replace", query: { match: { "#{conversions_field}.query" => options[:conversions_term] || term } }, field_value_factor: { field: "#{conversions_field}.count" } } } } } end else [] end end def set_conversions_v2 conversions_v2 = options[:conversions_v2] return [] if conversions_v2.nil? && !searchkick_options[:conversions_v2] return [] if conversions_v2 == false # disable if searchkick_options[:conversions] to make it easy to upgrade without downtime return [] if conversions_v2.nil? && searchkick_options[:conversions] unless conversions_v2.is_a?(Hash) conversions_v2 = {field: conversions_v2} end conversions_fields = case conversions_v2[:field] when true, nil Array(searchkick_options[:conversions_v2]).map(&:to_s) else [conversions_v2[:field].to_s] end conversions_term = (conversions_v2[:term] || options[:conversions_term] || term).to_s unless searchkick_options[:case_sensitive] conversions_term = conversions_term.downcase end conversions_term = conversions_term.gsub(".", "*") conversions_fields.map do |conversions_field| { rank_feature: { field: "#{conversions_field}.#{conversions_term}", linear: {}, boost: conversions_v2[:factor] || 1 } } end end def set_exclude(field, analyzer) Array(options[:exclude]).map do |phrase| { multi_match: { fields: [field], query: phrase, analyzer: analyzer, type: "phrase" } } end end def set_boost_by_distance(custom_filters) boost_by_distance = options[:boost_by_distance] || {} # legacy format if boost_by_distance[:field] boost_by_distance = {boost_by_distance[:field] => boost_by_distance.except(:field)} end boost_by_distance.each do |field, attributes| attributes = {function: :gauss, scale: "5mi"}.merge(attributes) unless attributes[:origin] raise ArgumentError, "boost_by_distance requires :origin" end function_params = attributes.except(:factor, :function) function_params[:origin] = location_value(function_params[:origin]) custom_filters << { weight: attributes[:factor] || 1, attributes[:function] => { field => function_params } } end end def set_boost_by_recency(custom_filters) options[:boost_by_recency].each do |field, attributes| attributes = {function: :gauss, origin: Time.now}.merge(attributes) custom_filters << { weight: attributes[:factor] || 1, attributes[:function] => { field => attributes.except(:factor, :function) } } end end def set_boost_by(multiply_filters, custom_filters) boost_by = options[:boost_by] || {} if boost_by.is_a?(Array) boost_by = boost_by.to_h { |f| [f, {factor: 1}] } elsif boost_by.is_a?(Hash) multiply_by, boost_by = boost_by.transform_values(&:dup).partition { |_, v| v.delete(:boost_mode) == "multiply" }.map(&:to_h) end boost_by[options[:boost]] = {factor: 1} if options[:boost] custom_filters.concat boost_filters(boost_by, modifier: "ln2p") multiply_filters.concat boost_filters(multiply_by || {}) end def set_boost_where(custom_filters) boost_where = options[:boost_where] || {} boost_where.each do |field, value| if value.is_a?(Array) && value.first.is_a?(Hash) value.each do |value_factor| custom_filters << custom_filter(field, value_factor[:value], value_factor[:factor]) end elsif value.is_a?(Hash) custom_filters << custom_filter(field, value[:value], value[:factor]) else factor = 1000 custom_filters << custom_filter(field, value, factor) end end end def set_boost_by_indices(payload) return unless options[:indices_boost] indices_boost = options[:indices_boost].map do |key, boost| index = key.respond_to?(:searchkick_index) ? key.searchkick_index.name : key {index => boost} end payload[:indices_boost] = indices_boost end def set_suggestions(payload, suggest) suggest_fields = nil if suggest.is_a?(Array) suggest_fields = suggest else suggest_fields = (searchkick_options[:suggest] || []).map(&:to_s) # intersection if options[:fields] suggest_fields &= options[:fields].map { |v| (v.is_a?(Hash) ? v.keys.first : v).to_s.split("^", 2).first } end end if suggest_fields.any? payload[:suggest] = {text: term} suggest_fields.each do |field| payload[:suggest][field] = { phrase: { field: "#{field}.suggest" } } end else raise ArgumentError, "Must pass fields to suggest option" end end def set_highlights(payload, fields) payload[:highlight] = { fields: fields.to_h { |f| [f, {}] }, fragment_size: 0 } if options[:highlight].is_a?(Hash) if (tag = options[:highlight][:tag]) payload[:highlight][:pre_tags] = [tag] payload[:highlight][:post_tags] = [tag.to_s.gsub(/\A<(\w+).+/, "")] end if (fragment_size = options[:highlight][:fragment_size]) payload[:highlight][:fragment_size] = fragment_size end if (encoder = options[:highlight][:encoder]) payload[:highlight][:encoder] = encoder end highlight_fields = options[:highlight][:fields] if highlight_fields payload[:highlight][:fields] = {} highlight_fields.each do |name, opts| payload[:highlight][:fields]["#{name}.#{@match_suffix}"] = opts || {} end end end @highlighted_fields = payload[:highlight][:fields].keys end def set_aggregations(payload, filters, post_filters) aggs = options[:aggs] payload[:aggs] = {} aggs = aggs.to_h { |f| [f, {}] } if aggs.is_a?(Array) # convert to more advanced syntax aggs.each do |field, agg_options| size = agg_options[:limit] ? agg_options[:limit] : 1_000 shared_agg_options = agg_options.except(:limit, :field, :ranges, :date_ranges, :where) if agg_options[:ranges] payload[:aggs][field] = { range: { field: agg_options[:field] || field, ranges: agg_options[:ranges] }.merge(shared_agg_options) } elsif agg_options[:date_ranges] payload[:aggs][field] = { date_range: { field: agg_options[:field] || field, ranges: agg_options[:date_ranges] }.merge(shared_agg_options) } elsif (histogram = agg_options[:date_histogram]) payload[:aggs][field] = { date_histogram: histogram }.merge(shared_agg_options) elsif (metric = @@metric_aggs.find { |k| agg_options.has_key?(k) }) payload[:aggs][field] = { metric => { field: agg_options[metric][:field] || field } }.merge(shared_agg_options) else payload[:aggs][field] = { terms: { field: agg_options[:field] || field, size: size }.merge(shared_agg_options) } end agg_where = ensure_permitted(agg_options[:where] || {}) if options[:smart_aggs] != false && options[:where] where = ensure_permitted(options[:where]) where_without_field = where.reject { |k| k == field } # where_without_field = where_without_field(where, field.to_s) if where_without_field.any? if agg_where.any? agg_where = where.merge(agg_where) # agg_where = combine_agg_where(agg_where, where_without_field) else agg_where = where_without_field end end end agg_filters = where_filters(agg_where) # only do one level comparison for simplicity filters.select! do |filter| if agg_filters.include?(filter) true else post_filters << filter false end end if agg_filters.any? payload[:aggs][field] = { filter: { bool: { must: agg_filters } }, aggs: { field => payload[:aggs][field] } } end end end def where_without_field(where, field) result = {} where.each do |f, v| case f when :_and r = v.map { |v2| where_without_field(v2, field) }.reject(&:empty?) result[f] = r unless r.empty? when :_or r = v.map { |v2| where_without_field(v2, field) } result[f] = r unless r.any?(&:empty?) when :or r = v.map { |v2| v2.map { |v3| where_without_field(v3, field) }.reject { |v2| v2.any?(&:empty?) } } result[f] = r unless r.empty? when :_not r = where_without_field(v, field) result[f] = r unless r.empty? when :_script result[f] = v else if f.to_s != field result[f] = v end end end result end def combine_agg_where(agg_where, where) result = agg_where.dup field_keys = result.except(:_and, :_or, :or, :_not, :_script).transform_keys(&:to_s) where.each do |f, v| case f when :_and, :_or, :or, :_not, :_script if result.key?(f) # combine with _and if needed result[:_and] ||= [] result[:_and] += [{f => v}] else result[f] = v end else result[f] = v unless field_keys.include?(f.to_s) end end result end def set_knn(payload, knn, per_page, offset) if term != "*" raise ArgumentError, "Use Searchkick.multi_search for hybrid search" end field = knn[:field] field_options = searchkick_options.dig(:knn, field.to_sym) || searchkick_options.dig(:knn, field.to_s) || {} vector = knn[:vector] distance = knn[:distance] || field_options[:distance] exact = knn[:exact] exact = field_options[:distance].nil? || distance != field_options[:distance] if exact.nil? k = per_page + offset ef_search = knn[:ef_search] filter = payload.delete(:query) if distance.nil? raise ArgumentError, "distance required" elsif !exact && distance != field_options[:distance] raise ArgumentError, "distance must match searchkick options for approximate search" end if Searchkick.opensearch? if exact # https://opensearch.org/docs/latest/search-plugins/knn/knn-score-script/#spaces space_type = case distance when "cosine" "cosinesimil" when "euclidean" "l2" when "taxicab" "l1" when "inner_product" "innerproduct" when "chebyshev" "linf" else raise ArgumentError, "Unknown distance: #{distance}" end payload[:query] = { script_score: { query: { bool: { must: [filter, {exists: {field: field}}] } }, script: { source: "knn_score", lang: "knn", params: { field: field, query_value: vector, space_type: space_type } }, boost: distance == "cosine" && Searchkick.server_below?("2.19.0") ? 0.5 : 1.0 } } else if ef_search && Searchkick.server_below?("2.16.0") raise Error, "ef_search requires OpenSearch 2.16+" end payload[:query] = { knn: { field.to_sym => { vector: vector, k: k, filter: filter }.merge(ef_search ? {method_parameters: {ef_search: ef_search}} : {}) } } end else if exact # prevent incorrect distances/results with Elasticsearch 9.0.0-rc1 if !Searchkick.server_below?("9.0.0") && field_options[:distance] == "cosine" && distance != "cosine" raise ArgumentError, "distance must match searchkick options" end # https://github.com/elastic/elasticsearch/blob/main/docs/reference/vectors/vector-functions.asciidoc source = case distance when "cosine" "(cosineSimilarity(params.query_vector, params.field) + 1.0) * 0.5" when "euclidean" "double l2 = l2norm(params.query_vector, params.field); 1 / (1 + l2 * l2)" when "taxicab" "1 / (1 + l1norm(params.query_vector, params.field))" when "inner_product" "double dot = dotProduct(params.query_vector, params.field); dot > 0 ? dot + 1 : 1 / (1 - dot)" else raise ArgumentError, "Unknown distance: #{distance}" end payload[:query] = { script_score: { query: { bool: { must: [filter, {exists: {field: field}}] } }, script: { source: source, params: { field: field, query_vector: vector } } } } else payload[:knn] = { field: field, query_vector: vector, k: k, filter: filter }.merge(ef_search ? {num_candidates: ef_search} : {}) end end end def set_post_filters(payload, post_filters) payload[:post_filter] = { bool: { filter: post_filters } } end def set_order(payload) value = options[:order] payload[:sort] = value.is_a?(Enumerable) ? value : {value => :asc} end # provides *very* basic protection from unfiltered parameters # this is not meant to be comprehensive and may be expanded in the future def ensure_permitted(obj) obj.to_h end def where_filters(where) filters = [] (where || {}).each do |field, value| field = :_id if field.to_s == "id" # update smart aggs when adding new symbol if field == :or value.each do |or_clause| filters << {bool: {should: or_clause.map { |or_statement| {bool: {filter: where_filters(or_statement)}} }}} end elsif field == :_or filters << {bool: {should: value.map { |or_statement| {bool: {filter: where_filters(or_statement)}} }}} elsif field == :_not filters << {bool: {must_not: where_filters(value)}} elsif field == :_and filters << {bool: {must: value.map { |or_statement| {bool: {filter: where_filters(or_statement)}} }}} elsif field == :_script unless value.is_a?(Script) raise TypeError, "expected Searchkick::Script" end filters << {script: {script: {source: value.source, lang: value.lang, params: value.params}}} else # expand ranges if value.is_a?(Range) value = expand_range(value) end value = {in: value} if value.is_a?(Array) if value.is_a?(Hash) value.each do |op, op_value| case op when :within, :bottom_right, :bottom_left # do nothing when :near filters << { geo_distance: { field => location_value(op_value), distance: value[:within] || "50mi" } } when :geo_polygon filters << { geo_polygon: { field => op_value } } when :geo_shape shape = op_value.except(:relation) shape[:coordinates] = coordinate_array(shape[:coordinates]) if shape[:coordinates] filters << { geo_shape: { field => { relation: op_value[:relation] || "intersects", shape: shape } } } when :top_left filters << { geo_bounding_box: { field => { top_left: location_value(op_value), bottom_right: location_value(value[:bottom_right]) } } } when :top_right filters << { geo_bounding_box: { field => { top_right: location_value(op_value), bottom_left: location_value(value[:bottom_left]) } } } when :like, :ilike # based on Postgres # https://www.postgresql.org/docs/current/functions-matching.html # % matches zero or more characters # _ matches one character # \ is escape character # escape Lucene reserved characters # https://www.elastic.co/guide/en/elasticsearch/reference/current/regexp-syntax.html#regexp-optional-operators reserved = %w(\\ . ? + * | { } [ ] ( ) ") regex = op_value.dup reserved.each do |v| regex.gsub!(v, "\\\\" + v) end regex = regex.gsub(/(? {value: regex, flags: "NONE", case_insensitive: true}}} else filters << {regexp: {field => {value: regex, flags: "NONE"}}} end when :prefix filters << {prefix: {field => {value: op_value}}} when :regexp # support for regexp queries without using a regexp ruby object filters << {regexp: {field => {value: op_value}}} when :not, :_not # not equal filters << {bool: {must_not: term_filters(field, op_value)}} when :all op_value.each do |val| filters << term_filters(field, val) end when :in filters << term_filters(field, op_value) when :exists case op_value when true filters << {exists: {field: field}} when false filters << {bool: {must_not: {exists: {field: field}}}} else raise ArgumentError, "Passing a value other than true or false to exists is not supported" end else range_query = case op when :gt {gt: op_value} when :gte {gte: op_value} when :lt {lt: op_value} when :lte {lte: op_value} else raise ArgumentError, "Unknown where operator: #{op.inspect}" end # issue 132 if (existing = filters.find { |f| f[:range] && f[:range][field] }) existing[:range][field].merge!(range_query) else filters << {range: {field => range_query}} end end end else filters << term_filters(field, value) end end end filters end def term_filters(field, value) if value.is_a?(Array) # in query if value.any?(&:nil?) {bool: {should: [term_filters(field, nil), term_filters(field, value.compact)]}} else {terms: {field => value}} end elsif value.nil? {bool: {must_not: {exists: {field: field}}}} elsif value.is_a?(Regexp) source = value.source # TODO handle other regexp options # TODO handle other anchor characters, like ^, $, \Z if source.start_with?("\\A") source = source[2..-1] else source = ".*#{source}" end if source.end_with?("\\z") source = source[0..-3] else source = "#{source}.*" end {regexp: {field => {value: source, flags: "NONE", case_insensitive: value.casefold?}}} else # TODO add this for other values if value.as_json.is_a?(Enumerable) # query will fail, but this is better # same message as Active Record raise TypeError, "can't cast #{value.class.name}" end {term: {field => {value: value}}} end end def custom_filter(field, value, factor) { filter: where_filters(field => value), weight: factor } end def boost_filter(field, factor: 1, modifier: nil, missing: nil) script_score = { field_value_factor: { field: field, factor: factor.to_f, modifier: modifier } } if missing script_score[:field_value_factor][:missing] = missing.to_f else script_score[:filter] = { exists: { field: field } } end script_score end def boost_filters(boost_by, modifier: nil) boost_by.map do |field, value| boost_filter(field, modifier: modifier, **value) end end # Recursively descend through nesting of arrays until we reach either a lat/lon object or an array of numbers, # eventually returning the same structure with all values transformed to [lon, lat]. # def coordinate_array(value) if value.is_a?(Hash) [value[:lon], value[:lat]] elsif value.is_a?(Array) and !value[0].is_a?(Numeric) value.map { |a| coordinate_array(a) } else value end end def location_value(value) if value.is_a?(Array) value.map(&:to_f).reverse else value end end def expand_range(range) expanded = {} expanded[:gte] = range.begin if range.begin if range.end && !(range.end.respond_to?(:infinite?) && range.end.infinite?) expanded[range.exclude_end? ? :lt : :lte] = range.end end expanded end def base_field(k) k.sub(/\.(analyzed|word_start|word_middle|word_end|text_start|text_middle|text_end|exact)\z/, "") end def track_total_hits? searchkick_options[:deep_paging] || body_options[:track_total_hits] end def body_options options[:body_options] || {} end end end ================================================ FILE: lib/searchkick/railtie.rb ================================================ module Searchkick class Railtie < Rails::Railtie rake_tasks do load "tasks/searchkick.rake" end end end ================================================ FILE: lib/searchkick/record_data.rb ================================================ module Searchkick class RecordData TYPE_KEYS = ["type", :type] attr_reader :index, :record def initialize(index, record) @index = index @record = record end def index_data data = record_data data[:data] = search_data {index: data} end def update_data(method_name) data = record_data data[:data] = {doc: search_data(method_name)} {update: data} end def delete_data {delete: record_data} end # custom id can be useful for load: false def search_id id = record.respond_to?(:search_document_id) ? record.search_document_id : record.id id.is_a?(Numeric) ? id : id.to_s end def document_type(ignore_type = false) index.klass_document_type(record.class, ignore_type) end def record_data data = { _index: index.name, _id: search_id } data[:routing] = record.search_routing if record.respond_to?(:search_routing) data end private def search_data(method_name = nil) partial_reindex = !method_name.nil? source = record.send(method_name || :search_data) # conversions index.conversions_fields.each do |conversions_field| if source[conversions_field] source[conversions_field] = source[conversions_field].map { |k, v| {query: k, count: v} } end end index.conversions_v2_fields.each do |conversions_field| key = source.key?(conversions_field) ? conversions_field : conversions_field.to_sym if !partial_reindex || source[key] if index.options[:case_sensitive] source[key] = (source[key] || {}).reduce(Hash.new(0)) do |memo, (k, v)| memo[k.to_s.gsub(".", "*")] += v memo end else source[key] = (source[key] || {}).reduce(Hash.new(0)) do |memo, (k, v)| memo[k.to_s.downcase.gsub(".", "*")] += v memo end end end end # hack to prevent generator field doesn't exist error if !partial_reindex index.suggest_fields.each do |field| if !source.key?(field) && !source.key?(field.to_sym) source[field] = nil end end end # locations index.locations_fields.each do |field| if source[field] if !source[field].is_a?(Hash) && (source[field].first.is_a?(Array) || source[field].first.is_a?(Hash)) # multiple locations source[field] = source[field].map { |a| location_value(a) } else source[field] = location_value(source[field]) end end end if index.options[:inheritance] if !TYPE_KEYS.any? { |tk| source.key?(tk) } source[:type] = document_type(true) end end cast_big_decimal(source) source end def location_value(value) if value.is_a?(Array) value.map(&:to_f).reverse elsif value.is_a?(Hash) {lat: value[:lat].to_f, lon: value[:lon].to_f} else value end end # change all BigDecimal values to floats due to # https://github.com/rails/rails/issues/6033 # possible loss of precision :/ def cast_big_decimal(obj) case obj when BigDecimal obj.to_f when Hash obj.each do |k, v| # performance if v.is_a?(BigDecimal) obj[k] = v.to_f elsif v.is_a?(Enumerable) obj[k] = cast_big_decimal(v) end end when Enumerable obj.map do |v| cast_big_decimal(v) end else obj end end end end ================================================ FILE: lib/searchkick/record_indexer.rb ================================================ module Searchkick class RecordIndexer attr_reader :index def initialize(index) @index = index end def reindex(records, mode:, method_name:, ignore_missing:, full: false, single: false, job_options: nil) # prevents exists? check if records is a relation records = records.to_a return if records.empty? case mode when :async unless defined?(ActiveJob) raise Error, "Active Job not found" end job_options ||= {} # only add if set for backwards compatibility extra_options = {} if ignore_missing extra_options[:ignore_missing] = ignore_missing end # we could likely combine ReindexV2Job, BulkReindexJob, and ProcessBatchJob # but keep them separate for now if single record = records.first # always pass routing in case record is deleted # before the async job runs if record.respond_to?(:search_routing) routing = record.search_routing end Searchkick::ReindexV2Job.set(**job_options).perform_later( record.class.name, record.id.to_s, method_name ? method_name.to_s : nil, routing: routing, index_name: index.name, **extra_options ) else Searchkick::BulkReindexJob.set(**job_options).perform_later( class_name: records.first.class.searchkick_options[:class_name], record_ids: records.map { |r| r.id.to_s }, index_name: index.name, method_name: method_name ? method_name.to_s : nil, **extra_options ) end when :queue if method_name raise Error, "Partial reindex not supported with queue option" end index.reindex_queue.push_records(records) when true, :inline index_records, other_records = records.partition { |r| index_record?(r) } import_inline(index_records, !full ? other_records : [], method_name: method_name, ignore_missing: ignore_missing, single: single) else raise ArgumentError, "Invalid value for mode" end # return true like model and relation reindex for now true end def reindex_items(klass, items, method_name:, ignore_missing:, single: false) routing = items.to_h { |r| [r[:id], r[:routing]] } record_ids = routing.keys relation = Searchkick.load_records(klass, record_ids) # call search_import even for single records for nested associations relation = relation.search_import if relation.respond_to?(:search_import) records = relation.select(&:should_index?) # determine which records to delete delete_ids = record_ids - records.map { |r| r.id.to_s } delete_records = delete_ids.map do |id| construct_record(klass, id, routing[id]) end import_inline(records, delete_records, method_name: method_name, ignore_missing: ignore_missing, single: single) end private def index_record?(record) record.persisted? && !record.destroyed? && record.should_index? end # import in single request with retries def import_inline(index_records, delete_records, method_name:, ignore_missing:, single:) return if index_records.empty? && delete_records.empty? maybe_bulk(index_records, delete_records, method_name, single) do if index_records.any? if method_name index.bulk_update(index_records, method_name, ignore_missing: ignore_missing) else index.bulk_index(index_records) end end if delete_records.any? index.bulk_delete(delete_records) end end end def maybe_bulk(index_records, delete_records, method_name, single) if Searchkick.callbacks_value == :bulk yield else # set action and data action = if single && index_records.empty? "Remove" elsif method_name "Update" else single ? "Store" : "Import" end record = index_records.first || delete_records.first name = record.class.searchkick_klass.name message = lambda do |event| event[:name] = "#{name} #{action}" if single event[:id] = index.search_id(record) else event[:count] = index_records.size + delete_records.size end end with_retries do Searchkick.callbacks(:bulk, message: message) do yield end end end end def construct_record(klass, id, routing) record = klass.new record.id = id if routing record.define_singleton_method(:search_routing) do routing end end record end def with_retries retries = 0 begin yield rescue Faraday::ClientError => e if retries < 1 retries += 1 retry end raise e end end end end ================================================ FILE: lib/searchkick/reindex_queue.rb ================================================ module Searchkick class ReindexQueue attr_reader :name def initialize(name) @name = name raise Error, "Searchkick.redis not set" unless Searchkick.redis end # supports single and multiple ids def push(record_ids) Searchkick.with_redis { |r| r.call("LPUSH", redis_key, record_ids) } end def push_records(records) record_ids = records.map do |record| # always pass routing in case record is deleted # before the queue job runs if record.respond_to?(:search_routing) routing = record.search_routing end # escape pipe with double pipe value = escape(record.id.to_s) value = "#{value}|#{escape(routing)}" if routing value end push(record_ids) end # TODO use reliable queuing def reserve(limit: 1000) Searchkick.with_redis { |r| r.call("RPOP", redis_key, limit) }.to_a end def clear Searchkick.with_redis { |r| r.call("DEL", redis_key) } end def length Searchkick.with_redis { |r| r.call("LLEN", redis_key) } end private def redis_key "searchkick:reindex_queue:#{name}" end def escape(value) value.to_s.gsub("|", "||") end end end ================================================ FILE: lib/searchkick/reindex_v2_job.rb ================================================ module Searchkick class ReindexV2Job < Searchkick.parent_job.constantize queue_as { Searchkick.queue_name } def perform(class_name, id, method_name = nil, routing: nil, index_name: nil, ignore_missing: nil) model = Searchkick.load_model(class_name, allow_child: true) index = model.searchkick_index(name: index_name) # use should_index? to decide whether to index (not default scope) # just like saving inline # could use Searchkick.scope() in future # but keep for now for backwards compatibility model = model.unscoped if model.respond_to?(:unscoped) items = [{id: id, routing: routing}] RecordIndexer.new(index).reindex_items(model, items, method_name: method_name, ignore_missing: ignore_missing, single: true) end end end ================================================ FILE: lib/searchkick/relation.rb ================================================ module Searchkick class Relation NO_DEFAULT_VALUE = Object.new # note: modifying body directly is not supported # and has no impact on query after being executed # TODO freeze body object? delegate :params, to: :query delegate_missing_to :private_execute attr_reader :model alias_method :klass, :model def initialize(model, term = "*", **options) @model = model @term = term @options = options # generate query to validate options query if options.any? end # same as Active Record def inspect entries = private_execute.first(11).map!(&:inspect) entries[10] = "..." if entries.size == 11 "#<#{self.class.name} [#{entries.join(', ')}]>" end def aggs(*args, **kwargs) if args.empty? && kwargs.empty? private_execute.aggs else clone.aggs!(*args, **kwargs) end end def aggs!(*args, **kwargs) check_loaded aggs = {} args.flatten.each do |arg| if arg.is_a?(Hash) aggs.merge!(arg) else aggs[arg] = {} end end aggs.merge!(kwargs) merge_option(:aggs, aggs) self end def body(value = NO_DEFAULT_VALUE) if value == NO_DEFAULT_VALUE query.body else clone.body!(value) end end def body!(value) check_loaded @options[:body] = value self end def body_options(value) clone.body_options!(value) end def body_options!(value) check_loaded merge_option(:body_options, value) self end def boost(value) clone.boost!(value) end def boost!(value) check_loaded @options[:boost] = value self end def boost_by(value) clone.boost_by!(value) end def boost_by!(value) check_loaded if value.is_a?(Array) value = value.to_h { |f| [f, {factor: 1}] } elsif !value.is_a?(Hash) value = {value => {factor: 1}} end merge_option(:boost_by, value) self end def boost_by_distance(value) clone.boost_by_distance!(value) end def boost_by_distance!(value) check_loaded # legacy format value = {value[:field] => value.except(:field)} if value[:field] merge_option(:boost_by_distance, value) self end def boost_by_recency(value) clone.boost_by_recency!(value) end def boost_by_recency!(value) check_loaded merge_option(:boost_by_recency, value) self end def boost_where(value) clone.boost_where!(value) end def boost_where!(value) check_loaded # TODO merge duplicate fields merge_option(:boost_where, value) self end def conversions(value) clone.conversions!(value) end def conversions!(value) check_loaded @options[:conversions] = value self end def conversions_v1(value) clone.conversions_v1!(value) end def conversions_v1!(value) check_loaded @options[:conversions_v1] = value self end def conversions_v2(value) clone.conversions_v2!(value) end def conversions_v2!(value) check_loaded @options[:conversions_v2] = value self end def conversions_term(value) clone.conversions_term!(value) end def conversions_term!(value) check_loaded @options[:conversions_term] = value self end def debug(value = true) clone.debug!(value) end def debug!(value = true) check_loaded @options[:debug] = value self end def emoji(value = true) clone.emoji!(value) end def emoji!(value = true) check_loaded @options[:emoji] = value self end def exclude(*values) clone.exclude!(*values) end def exclude!(*values) check_loaded concat_option(:exclude, values.flatten) self end def explain(value = true) clone.explain!(value) end def explain!(value = true) check_loaded @options[:explain] = value self end def fields(*values) clone.fields!(*values) end def fields!(*values) check_loaded concat_option(:fields, values.flatten) self end def highlight(value) clone.highlight!(value) end def highlight!(value) check_loaded @options[:highlight] = value self end def includes(*values) clone.includes!(*values) end def includes!(*values) check_loaded concat_option(:includes, values.flatten) self end def index_name(*values) clone.index_name!(*values) end def index_name!(*values) check_loaded values = values.flatten if values.all? { |v| v.respond_to?(:searchkick_index) } models!(*values) else concat_option(:index_name, values) self end end def indices_boost(value) clone.indices_boost!(value) end def indices_boost!(value) check_loaded merge_option(:indices_boost, value) self end def knn(value) clone.knn!(value) end def knn!(value) check_loaded @options[:knn] = value self end def limit(value) clone.limit!(value) end def limit!(value) check_loaded @options[:limit] = value self end def load(value = NO_DEFAULT_VALUE) if value == NO_DEFAULT_VALUE private_execute self else clone.load!(value) end end def load!(value) check_loaded @options[:load] = value self end def match(value) clone.match!(value) end def match!(value) check_loaded @options[:match] = value self end def misspellings(value) clone.misspellings!(value) end def misspellings!(value) check_loaded @options[:misspellings] = value self end def models(*values) clone.models!(*values) end def models!(*values) check_loaded concat_option(:models, values.flatten) self end def model_includes(*values) clone.model_includes!(*values) end def model_includes!(*values) check_loaded concat_option(:model_includes, values.flatten) self end def offset(value = NO_DEFAULT_VALUE) if value == NO_DEFAULT_VALUE private_execute.offset else clone.offset!(value) end end def offset!(value) check_loaded @options[:offset] = value self end def opaque_id(value) clone.opaque_id!(value) end def opaque_id!(value) check_loaded @options[:opaque_id] = value self end def operator(value) clone.operator!(value) end def operator!(value) check_loaded @options[:operator] = value self end def order(*values) clone.order!(*values) end def order!(*values) check_loaded concat_option(:order, values.flatten) self end def padding(value = NO_DEFAULT_VALUE) if value == NO_DEFAULT_VALUE private_execute.padding else clone.padding!(value) end end def padding!(value) check_loaded @options[:padding] = value self end def page(value) clone.page!(value) end def page!(value) check_loaded @options[:page] = value self end def per_page(value = NO_DEFAULT_VALUE) if value == NO_DEFAULT_VALUE private_execute.per_page else clone.per_page!(value) end end def per(value) per_page(value) end def per_page!(value) check_loaded # TODO set limit? @options[:per_page] = value self end def profile(value = true) clone.profile!(value) end def profile!(value = true) check_loaded @options[:profile] = value self end def request_params(value) clone.request_params!(value) end def request_params!(value) check_loaded merge_option(:request_params, value) self end def routing(value) clone.routing!(value) end def routing!(value) check_loaded @options[:routing] = value self end def scope_results(value) clone.scope_results!(value) end def scope_results!(value) check_loaded @options[:scope_results] = value self end def scroll(value = NO_DEFAULT_VALUE, &block) if value == NO_DEFAULT_VALUE private_execute.scroll(&block) elsif block_given? clone.scroll!(value).scroll(&block) else clone.scroll!(value) end end def scroll!(value) check_loaded @options[:scroll] = value self end def select(*values, &block) if block_given? private_execute.select(*values, &block) else clone.select!(*values) end end def select!(*values) check_loaded concat_option(:select, values.flatten) self end def similar(value = true) clone.similar!(value) end def similar!(value = true) check_loaded @options[:similar] = value self end def smart_aggs(value) clone.smart_aggs!(value) end def smart_aggs!(value) check_loaded @options[:smart_aggs] = value self end def suggest(value = true) clone.suggest!(value) end def suggest!(value = true) check_loaded @options[:suggest] = value self end def total_entries(value = NO_DEFAULT_VALUE) if value == NO_DEFAULT_VALUE private_execute.total_entries else clone.total_entries!(value) end end def total_entries!(value) check_loaded @options[:total_entries] = value self end def track(value = true) clone.track!(value) end def track!(value = true) check_loaded @options[:track] = value self end def type(*values) clone.type!(*values) end def type!(*values) check_loaded concat_option(:type, values.flatten) self end def where(value = NO_DEFAULT_VALUE) if value == NO_DEFAULT_VALUE Where.new(self) else clone.where!(value) end end def where!(value) check_loaded value = ensure_permitted(value) if @options[:where] # keep simple when possible for smart aggs if !@options[:where].keys.intersect?(value.keys) merge_option(:where, value) elsif @options[:where][:_and].is_a?(Array) merge_option(:where, {_and: @options[:where][:_and] + [value]}) else @options[:where] = {_and: [@options[:where], value]} end else @options[:where] = value end self end def first(value = NO_DEFAULT_VALUE) result = if loaded? private_execute else limit = value == NO_DEFAULT_VALUE ? 1 : value previous_limit = (@options[:limit] || @options[:per_page])&.to_i if previous_limit && previous_limit < limit limit = previous_limit end limit(limit).load end if value == NO_DEFAULT_VALUE result.first else result.first(value) end end def pluck(*keys) if !loaded? && @options[:load] == false select(*keys).send(:private_execute).pluck(*keys) else private_execute.pluck(*keys) end end def reorder(*values) clone.reorder!(*values) end def reorder!(*values) check_loaded @options[:order] = values self end def reselect(*values) clone.reselect!(*values) end def reselect!(*values) check_loaded @options[:select] = values self end def rewhere(value) clone.rewhere!(value) end def rewhere!(value) check_loaded @options[:where] = ensure_permitted(value) self end def only(*keys) Relation.new(@model, @term, **@options.slice(*keys)) end def except(*keys) Relation.new(@model, @term, **@options.except(*keys)) end def loaded? !@execute.nil? end undef_method :respond_to_missing? def respond_to_missing?(...) Results.new(nil, nil, nil).respond_to?(...) || super end # TODO uncomment in 7.0 # def to_json(...) # private_execute.to_a.to_json(...) # end # TODO uncomment in 7.0 # def as_json(...) # private_execute.to_a.as_json(...) # end def to_yaml private_execute.to_a.to_yaml end private def private_execute @execute ||= query.execute end def query @query ||= Query.new(@model, @term, **@options) end def check_loaded raise Error, "Relation loaded" if loaded? # reset query since options will change @query = nil end # provides *very* basic protection from unfiltered parameters # this is not meant to be comprehensive and may be expanded in the future def ensure_permitted(obj) obj.to_h end def initialize_copy(other) super # shallow dup and avoid updating values in-place @options = @options.dup @execute = nil end def concat_option(key, value) if @options[key] @options[key] += value else @options[key] = value.to_ary end end def merge_option(key, value) if @options[key] @options[key] = @options[key].merge(value) else @options[key] = value.to_hash end end end end ================================================ FILE: lib/searchkick/relation_indexer.rb ================================================ module Searchkick class RelationIndexer attr_reader :index def initialize(index) @index = index end def reindex(relation, mode:, method_name: nil, ignore_missing: nil, full: false, resume: false, scope: nil, job_options: nil) # apply scopes if scope relation = relation.send(scope) elsif relation.respond_to?(:search_import) relation = relation.search_import end # remove unneeded loading for async and queue if mode == :async || mode == :queue if relation.respond_to?(:primary_key) relation = relation.except(:includes, :preload) unless mode == :queue && relation.klass.method_defined?(:search_routing) relation = relation.except(:select).select(relation.primary_key) end elsif relation.respond_to?(:only) unless mode == :queue && relation.klass.method_defined?(:search_routing) relation = relation.only(:_id) end end end if mode == :async && full return full_reindex_async(relation, job_options: job_options) end relation = resume_relation(relation) if resume reindex_options = { mode: mode, method_name: method_name, full: full, ignore_missing: ignore_missing, job_options: job_options } record_indexer = RecordIndexer.new(index) in_batches(relation) do |items| record_indexer.reindex(items, **reindex_options) end end def batches_left Searchkick.with_redis { |r| r.call("SCARD", batches_key) } end def batch_completed(batch_id) Searchkick.with_redis { |r| r.call("SREM", batches_key, [batch_id]) } end private def resume_relation(relation) if relation.respond_to?(:primary_key) # use total docs instead of max id since there's not a great way # to get the max _id without scripting since it's a string where = relation.arel_table[relation.primary_key].gt(index.total_docs) relation = relation.where(where) else raise Error, "Resume not supported for Mongoid" end end def in_batches(relation) if relation.respond_to?(:find_in_batches) klass = relation.klass # remove order to prevent possible warnings relation.except(:order).find_in_batches(batch_size: batch_size) do |batch| # prevent scope from affecting search_data as well as inline jobs # Active Record runs relation calls in scoping block # https://github.com/rails/rails/blob/main/activerecord/lib/active_record/relation/delegation.rb # note: we could probably just call klass.current_scope = nil # anywhere in reindex method (after initial all call), # but this is more cautious previous_scope = klass.current_scope(true) if previous_scope begin klass.current_scope = nil yield batch ensure klass.current_scope = previous_scope end else yield batch end end else klass = relation.klass each_batch(relation, batch_size: batch_size) do |batch| # prevent scope from affecting search_data as well as inline jobs # note: Model.with_scope doesn't always restore scope, so use custom logic previous_scope = Mongoid::Threaded.current_scope(klass) if previous_scope begin Mongoid::Threaded.set_current_scope(nil, klass) yield batch ensure Mongoid::Threaded.set_current_scope(previous_scope, klass) end else yield batch end end end end def each_batch(relation, batch_size:) # https://github.com/karmi/tire/blob/master/lib/tire/model/import.rb # use cursor for Mongoid items = [] relation.all.each do |item| items << item if items.length == batch_size yield items items = [] end end yield items if items.any? end def batch_size @batch_size ||= index.options[:batch_size] || 1000 end def full_reindex_async(relation, job_options: nil) batch_id = 1 class_name = relation.searchkick_options[:class_name] starting_id = false if relation.respond_to?(:primary_key) primary_key = relation.primary_key starting_id = begin relation.minimum(primary_key) rescue ActiveRecord::StatementInvalid false end end if starting_id.nil? # no records, do nothing elsif starting_id.is_a?(Numeric) max_id = relation.maximum(primary_key) batches_count = ((max_id - starting_id + 1) / batch_size.to_f).ceil batches_count.times do |i| min_id = starting_id + (i * batch_size) batch_job(class_name, batch_id, job_options, min_id: min_id, max_id: min_id + batch_size - 1) batch_id += 1 end else in_batches(relation) do |items| batch_job(class_name, batch_id, job_options, record_ids: items.map(&:id).map { |v| v.instance_of?(Integer) ? v : v.to_s }) batch_id += 1 end end end def batch_job(class_name, batch_id, job_options, **options) job_options ||= {} # TODO expire Redis key Searchkick.with_redis { |r| r.call("SADD", batches_key, [batch_id]) } Searchkick::BulkReindexJob.set(**job_options).perform_later( class_name: class_name, index_name: index.name, batch_id: batch_id, **options ) end def batches_key "searchkick:reindex:#{index.name}:batches" end end end ================================================ FILE: lib/searchkick/reranking.rb ================================================ module Searchkick module Reranking def self.rrf(first_ranking, *rankings, k: 60) rankings.unshift(first_ranking) rankings.map!(&:to_ary) ranks = [] results = [] rankings.each do |ranking| ranks << ranking.map.with_index.to_h { |v, i| [v, i + 1] } results.concat(ranking) end results = results.uniq.map do |result| score = ranks.sum do |rank| r = rank[result] r ? 1.0 / (k + r) : 0.0 end {result: result, score: score} end results.sort_by { |v| -v[:score] } end end end ================================================ FILE: lib/searchkick/results.rb ================================================ module Searchkick class Results include Enumerable extend Forwardable attr_reader :response def_delegators :results, :each, :any?, :empty?, :size, :length, :slice, :[], :to_ary def initialize(klass, response, options = {}) @klass = klass @response = response @options = options end def with_hit return enum_for(:with_hit) unless block_given? build_hits.each do |result| yield result end end def missing_records @missing_records ||= with_hit_and_missing_records[1] end def suggestions if response["suggest"] response["suggest"].values.flat_map { |v| v.first["options"] }.sort_by { |o| -o["score"] }.map { |o| o["text"] }.uniq elsif options[:suggest] [] else raise "Pass `suggest: true` to the search method for suggestions" end end def aggregations response["aggregations"] end def aggs @aggs ||= begin if aggregations aggregations.dup.each do |field, filtered_agg| buckets = filtered_agg[field] # move the buckets one level above into the field hash if buckets filtered_agg.delete(field) filtered_agg.merge!(buckets) end end end end end def took response["took"] end def error response["error"] end def model_name if klass.nil? ActiveModel::Name.new(self.class, nil, 'Result') else klass.model_name end end def entry_name(options = {}) if options.empty? # backward compatibility model_name.human.downcase else default = options[:count] == 1 ? model_name.human : model_name.human.pluralize model_name.human(options.reverse_merge(default: default)) end end def total_count if options[:total_entries] options[:total_entries] elsif response["hits"]["total"].is_a?(Hash) response["hits"]["total"]["value"] else response["hits"]["total"] end end alias_method :total_entries, :total_count def current_page options[:page] end def per_page options[:per_page] end alias_method :limit_value, :per_page def padding options[:padding] end def total_pages (total_count / per_page.to_f).ceil end alias_method :num_pages, :total_pages def offset_value (current_page - 1) * per_page + padding end alias_method :offset, :offset_value def previous_page current_page > 1 ? (current_page - 1) : nil end alias_method :prev_page, :previous_page def next_page current_page < total_pages ? (current_page + 1) : nil end def first_page? previous_page.nil? end def last_page? next_page.nil? end def out_of_range? current_page > total_pages end def hits if error raise Error, "Query error - use the error method to view it" else @response["hits"]["hits"] end end def highlights(multiple: false) hits.map do |hit| hit_highlights(hit, multiple: multiple) end end def with_highlights(multiple: false) return enum_for(:with_highlights, multiple: multiple) unless block_given? with_hit.each do |result, hit| yield result, hit_highlights(hit, multiple: multiple) end end def with_score return enum_for(:with_score) unless block_given? with_hit.each do |result, hit| yield result, hit["_score"] end end def misspellings? @options[:misspellings] end def scroll_id @response["_scroll_id"] end def scroll raise Error, "Pass `scroll` option to the search method for scrolling" unless scroll_id if block_given? records = self while records.any? yield records records = records.scroll end records.clear_scroll else begin # TODO Active Support notifications for this scroll call params = { scroll: options[:scroll], body: {scroll_id: scroll_id} } params[:opaque_id] = options[:opaque_id] if options[:opaque_id] Results.new(@klass, Searchkick.client.scroll(params), @options) rescue => e if Searchkick.not_found_error?(e) && e.message =~ /search_context_missing_exception/i raise Error, "Scroll id has expired" else raise e end end end end def clear_scroll begin # try to clear scroll # not required as scroll will expire # but there is a cost to open scrolls Searchkick.client.clear_scroll(scroll_id: scroll_id) rescue => e raise e unless Searchkick.transport_error?(e) end end private attr_reader :klass, :options def results @results ||= with_hit.map(&:first) end def with_hit_and_missing_records @with_hit_and_missing_records ||= begin missing_records = [] if options[:load] grouped_hits = hits.group_by { |hit, _| hit["_index"] } # determine models index_models = {} grouped_hits.each do |index, _| models = if @klass [@klass] else index_alias = index.split("_")[0..-2].join("_") Array((options[:index_mapping] || {})[index_alias]) end raise Error, "Unknown model for index: #{index}. Pass the `models` option to the search method." unless models.any? index_models[index] = models end # fetch results results = {} grouped_hits.each do |index, index_hits| results[index] = {} index_models[index].each do |model| results[index].merge!(results_query(model, index_hits).to_a.index_by { |r| r.id.to_s }) end end # sort results = hits.map do |hit| result = results[hit["_index"]][hit["_id"].to_s] if result && !(options[:load].is_a?(Hash) && options[:load][:dumpable]) if (hit["highlight"] || options[:highlight]) && !result.respond_to?(:search_highlights) highlights = hit_highlights(hit) result.define_singleton_method(:search_highlights) do highlights end end end [result, hit] end.select do |result, hit| unless result models = index_models[hit["_index"]] missing_records << { id: hit["_id"], # may be multiple models for inheritance with child models # not ideal to return different types # but this situation shouldn't be common model: models.size == 1 ? models.first : models } end result end else results = hits.map do |hit| result = if hit["_source"] hit.except("_source").merge(hit["_source"]) elsif hit["fields"] hit.except("fields").merge(hit["fields"]) else hit end if hit["highlight"] || options[:highlight] highlight = hit["highlight"].to_a.to_h { |k, v| [base_field(k), v.first] } options[:highlighted_fields].map { |k| base_field(k) }.each do |k| result["highlighted_#{k}"] ||= (highlight[k] || result[k]) end end result["id"] ||= result["_id"] # needed for legacy reasons [HashWrapper.new(result), hit] end end [results, missing_records] end end def build_hits @build_hits ||= begin if missing_records.any? Searchkick.warn("Records in search index do not exist in database: #{missing_records.map { |v| "#{Array(v[:model]).map(&:model_name).sort.join("/")} #{v[:id]}" }.join(", ")}") end with_hit_and_missing_records[0] end end def results_query(records, hits) records = Searchkick.scope(records) ids = hits.map { |hit| hit["_id"] } if options[:includes] || options[:model_includes] included_relations = [] combine_includes(included_relations, options[:includes]) combine_includes(included_relations, options[:model_includes][records]) if options[:model_includes] records = records.includes(included_relations) end if options[:scope_results] records = options[:scope_results].call(records) end Searchkick.load_records(records, ids) end def combine_includes(result, inc) if inc if inc.is_a?(Array) result.concat(inc) else result << inc end end end def base_field(k) k.sub(/\.(analyzed|word_start|word_middle|word_end|text_start|text_middle|text_end|exact)\z/, "") end def hit_highlights(hit, multiple: false) if hit["highlight"] hit["highlight"].to_h { |k, v| [(options[:json] ? k : k.sub(/\.#{@options[:match_suffix]}\z/, "")).to_sym, multiple ? v : v.first] } else {} end end end end ================================================ FILE: lib/searchkick/script.rb ================================================ module Searchkick class Script attr_reader :source, :lang, :params def initialize(source, lang: "painless", params: {}) @source = source @lang = lang @params = params end end end ================================================ FILE: lib/searchkick/version.rb ================================================ module Searchkick VERSION = "6.1.0" end ================================================ FILE: lib/searchkick/where.rb ================================================ module Searchkick class Where def initialize(relation) @relation = relation end def not(value) @relation.where(_not: value) end end end ================================================ FILE: lib/searchkick.rb ================================================ # dependencies require "active_support" require "active_support/core_ext/hash/deep_merge" require "active_support/core_ext/module/attr_internal" require "active_support/core_ext/module/delegation" require "active_support/deprecation" require "active_support/log_subscriber" require "active_support/notifications" # stdlib require "forwardable" # modules require_relative "searchkick/controller_runtime" require_relative "searchkick/index" require_relative "searchkick/index_cache" require_relative "searchkick/index_options" require_relative "searchkick/indexer" require_relative "searchkick/hash_wrapper" require_relative "searchkick/log_subscriber" require_relative "searchkick/model" require_relative "searchkick/multi_search" require_relative "searchkick/query" require_relative "searchkick/reindex_queue" require_relative "searchkick/record_data" require_relative "searchkick/record_indexer" require_relative "searchkick/relation" require_relative "searchkick/relation_indexer" require_relative "searchkick/reranking" require_relative "searchkick/results" require_relative "searchkick/script" require_relative "searchkick/version" require_relative "searchkick/where" # integrations require_relative "searchkick/railtie" if defined?(Rails) module Searchkick # requires faraday autoload :Middleware, "searchkick/middleware" # background jobs autoload :BulkReindexJob, "searchkick/bulk_reindex_job" autoload :ProcessBatchJob, "searchkick/process_batch_job" autoload :ProcessQueueJob, "searchkick/process_queue_job" autoload :ReindexV2Job, "searchkick/reindex_v2_job" # errors class Error < StandardError; end class MissingIndexError < Error; end class UnsupportedVersionError < Error def message "This version of Searchkick requires Elasticsearch 8+ or OpenSearch 2+" end end class InvalidQueryError < Error; end class DangerousOperation < Error; end class ImportError < Error; end class << self attr_accessor :search_method_name, :timeout, :models, :client_options, :redis, :index_prefix, :index_suffix, :queue_name, :model_options, :client_type, :parent_job attr_writer :client, :env, :search_timeout attr_reader :aws_credentials end self.search_method_name = :search self.timeout = 10 self.models = [] self.client_options = {} self.queue_name = :searchkick self.model_options = {} self.parent_job = "ActiveJob::Base" def self.client @client ||= begin client_type = if self.client_type self.client_type elsif defined?(OpenSearch::Client) && defined?(Elasticsearch::Client) raise Error, "Multiple clients found - set Searchkick.client_type = :elasticsearch or :opensearch" elsif defined?(OpenSearch::Client) :opensearch elsif defined?(Elasticsearch::Client) :elasticsearch else raise Error, "No client found - install the `elasticsearch` or `opensearch-ruby` gem" end if client_type == :opensearch OpenSearch::Client.new({ url: ENV["OPENSEARCH_URL"], transport_options: {request: {timeout: timeout}}, retry_on_failure: 2 }.deep_merge(client_options)) do |f| f.use Searchkick::Middleware f.request :aws_sigv4, signer_middleware_aws_params if aws_credentials end else raise Error, "The `elasticsearch` gem must be 8+" if Elasticsearch::VERSION.to_i < 8 Elasticsearch::Client.new({ url: ENV["ELASTICSEARCH_URL"], transport_options: {request: {timeout: timeout}}, retry_on_failure: 2 }.deep_merge(client_options)) do |f| f.use Searchkick::Middleware f.request :aws_sigv4, signer_middleware_aws_params if aws_credentials end end end end def self.env @env ||= ENV["RAILS_ENV"] || ENV["RACK_ENV"] || "development" end def self.search_timeout (defined?(@search_timeout) && @search_timeout) || timeout end # private def self.server_info @server_info ||= client.info end def self.server_version @server_version ||= server_info["version"]["number"] end def self.opensearch? unless defined?(@opensearch) @opensearch = server_info["version"]["distribution"] == "opensearch" end @opensearch end def self.server_below?(version) Gem::Version.new(server_version.split("-")[0]) < Gem::Version.new(version.split("-")[0]) end # private def self.knn_support? if opensearch? !server_below?("2.4.0") else !server_below?("8.6.0") end end def self.search(term = "*", model: nil, **options, &block) options = options.dup klass = model # convert index_name into models if possible # this should allow for easier upgrade if options[:index_name] && !options[:models] && Array(options[:index_name]).all? { |v| v.respond_to?(:searchkick_index) } options[:models] = options.delete(:index_name) end # make Searchkick.search(models: [Product]) and Product.search equivalent unless klass models = Array(options[:models]) if models.size == 1 klass = models.first options.delete(:models) end end if klass if (options[:models] && Array(options[:models]) != [klass]) || Array(options[:index_name]).any? { |v| v.respond_to?(:searchkick_index) && v != klass } raise ArgumentError, "Use Searchkick.search to search multiple models" end end options = options.merge(block: block) if block Relation.new(klass, term, **options) end def self.multi_search(queries, opaque_id: nil) return if queries.empty? queries = queries.map { |q| q.send(:query) } event = { name: "Multi Search", body: queries.flat_map { |q| [q.params.except(:body).to_json, q.body.to_json] }.map { |v| "#{v}\n" }.join } ActiveSupport::Notifications.instrument("multi_search.searchkick", event) do MultiSearch.new(queries, opaque_id: opaque_id).perform end end # script # experimental def self.script(source, **options) Script.new(source, **options) end # callbacks def self.enable_callbacks self.callbacks_value = nil end def self.disable_callbacks self.callbacks_value = false end def self.callbacks?(default: true) if callbacks_value.nil? default else callbacks_value != false end end # message is private def self.callbacks(value = nil, message: nil) if block_given? previous_value = callbacks_value begin self.callbacks_value = value result = yield if callbacks_value == :bulk && indexer.queued_items.any? event = {} if message message.call(event) else event[:name] = "Bulk" event[:count] = indexer.queued_items.size end ActiveSupport::Notifications.instrument("request.searchkick", event) do indexer.perform end end result ensure self.callbacks_value = previous_value end else self.callbacks_value = value end end def self.aws_credentials=(creds) require "faraday_middleware/aws_sigv4" @aws_credentials = creds @client = nil # reset client end def self.reindex_status(index_name) raise Error, "Redis not configured" unless redis batches_left = Index.new(index_name).batches_left { completed: batches_left == 0, batches_left: batches_left } end def self.with_redis if redis if redis.respond_to?(:with) redis.with do |r| yield r end else yield redis end end end def self.warn(message) super("[searchkick] WARNING: #{message}") end # private def self.load_records(relation, ids) relation = if relation.respond_to?(:primary_key) primary_key = relation.primary_key raise Error, "Need primary key to load records" if !primary_key relation.where(primary_key => ids) elsif relation.respond_to?(:queryable) relation.queryable.for_ids(ids) end raise Error, "Not sure how to load records" if !relation relation end # public (for reindexing conversions) def self.load_model(class_name, allow_child: false) model = class_name.safe_constantize raise Error, "Could not find class: #{class_name}" unless model if allow_child unless model.respond_to?(:searchkick_klass) raise Error, "#{class_name} is not a searchkick model" end else unless Searchkick.models.include?(model) raise Error, "#{class_name} is not a searchkick model" end end model end # private def self.indexer Thread.current[:searchkick_indexer] ||= Indexer.new end # private def self.callbacks_value Thread.current[:searchkick_callbacks_enabled] end # private def self.callbacks_value=(value) Thread.current[:searchkick_callbacks_enabled] = value end # private def self.signer_middleware_aws_params {service: "es", region: "us-east-1"}.merge(aws_credentials) end # private # methods are forwarded to base class # this check to see if scope exists on that class # it's a bit tricky, but this seems to work def self.relation?(klass) if klass.respond_to?(:current_scope) !klass.current_scope.nil? else klass.is_a?(Mongoid::Criteria) || !Mongoid::Threaded.current_scope(klass).nil? end end # private def self.scope(model) # safety check to make sure used properly in code raise Error, "Cannot scope relation" if relation?(model) if model.searchkick_options[:unscope] model.unscoped else model end end # private def self.not_found_error?(e) (defined?(Elastic::Transport) && e.is_a?(Elastic::Transport::Transport::Errors::NotFound)) || (defined?(Elasticsearch::Transport) && e.is_a?(Elasticsearch::Transport::Transport::Errors::NotFound)) || (defined?(OpenSearch) && e.is_a?(OpenSearch::Transport::Transport::Errors::NotFound)) end # private def self.transport_error?(e) (defined?(Elastic::Transport) && e.is_a?(Elastic::Transport::Transport::Error)) || (defined?(Elasticsearch::Transport) && e.is_a?(Elasticsearch::Transport::Transport::Error)) || (defined?(OpenSearch) && e.is_a?(OpenSearch::Transport::Transport::Error)) end # private def self.not_allowed_error?(e) (defined?(Elastic::Transport) && e.is_a?(Elastic::Transport::Transport::Errors::MethodNotAllowed)) || (defined?(Elasticsearch::Transport) && e.is_a?(Elasticsearch::Transport::Transport::Errors::MethodNotAllowed)) || (defined?(OpenSearch) && e.is_a?(OpenSearch::Transport::Transport::Errors::MethodNotAllowed)) end end ActiveSupport.on_load(:active_record) do extend Searchkick::Model end ActiveSupport.on_load(:mongoid) do Mongoid::Document::ClassMethods.include Searchkick::Model end ActiveSupport.on_load(:action_controller) do include Searchkick::ControllerRuntime end Searchkick::LogSubscriber.attach_to :searchkick ================================================ FILE: lib/tasks/searchkick.rake ================================================ namespace :searchkick do desc "reindex a model (specify CLASS)" task reindex: :environment do class_name = ENV["CLASS"] abort "USAGE: rake searchkick:reindex CLASS=Product" unless class_name model = begin Searchkick.load_model(class_name) rescue Searchkick::Error => e abort e.message end puts "Reindexing #{model.name}..." model.reindex puts "Reindex successful" end namespace :reindex do desc "reindex all models" task all: :environment do # eager load models to populate Searchkick.models if Rails.respond_to?(:autoloaders) && Rails.autoloaders.zeitwerk_enabled? # fix for https://github.com/rails/rails/issues/37006 Zeitwerk::Loader.eager_load_all else Rails.application.eager_load! end Searchkick.models.each do |model| puts "Reindexing #{model.name}..." model.reindex end puts "Reindex complete" end end end ================================================ FILE: searchkick.gemspec ================================================ require_relative "lib/searchkick/version" Gem::Specification.new do |spec| spec.name = "searchkick" spec.version = Searchkick::VERSION spec.summary = "Intelligent search made easy with Rails and Elasticsearch or OpenSearch" spec.homepage = "https://github.com/ankane/searchkick" spec.license = "MIT" spec.author = "Andrew Kane" spec.email = "andrew@ankane.org" spec.files = Dir["*.{md,txt}", "{lib}/**/*"] spec.require_path = "lib" spec.required_ruby_version = ">= 3.2" spec.add_dependency "activemodel", ">= 7.2" end ================================================ FILE: test/aggs_test.rb ================================================ require_relative "test_helper" class AggsTest < Minitest::Test def setup super store [ {name: "Product Show", latitude: 37.7833, longitude: 12.4167, store_id: 1, in_stock: true, color: "blue", price: 21, created_at: 2.days.ago}, {name: "Product Hide", latitude: 29.4167, longitude: -98.5000, store_id: 2, in_stock: false, color: "green", price: 25, created_at: 2.days.from_now}, {name: "Product B", latitude: 43.9333, longitude: -122.4667, store_id: 2, in_stock: false, color: "red", price: 5, created_at: Time.now}, {name: "Foo", latitude: 43.9333, longitude: 12.4667, store_id: 3, in_stock: false, color: "yellow", price: 15, created_at: Time.now} ] end def test_single assert_aggs ({"store_id" => {1 => 1, 2 => 2}}), aggs: [:store_id] assert_aggs ({"store_id" => {1 => 1, 2 => 2}}), Product.search("Product").aggs(:store_id) end def test_multiple expected = {"store_id" => {1 => 1, 2 => 2}, "color" => {"blue" => 1, "green" => 1, "red" => 1}} assert_aggs expected, aggs: [:store_id, :color] assert_aggs expected, Product.search("Product").aggs(:store_id, :color) assert_aggs expected, Product.search("Product").aggs([:store_id, :color]) end def test_multiple_where expected = {"store_id" => {1 => 1}, "color" => {"blue" => 1, "green" => 1, "red" => 1}} assert_aggs expected, aggs: {color: {}, store_id: {where: {in_stock: true}}} assert_aggs expected, Product.search("Product").aggs(:color, store_id: {where: {in_stock: true}}) end def test_none assert_nil Product.search("*").aggs end def test_where assert_aggs ({"store_id" => {1 => 1}}), aggs: {store_id: {where: {in_stock: true}}} assert_aggs ({"store_id" => {1 => 1}}), Product.search("Product").aggs(store_id: {where: {in_stock: true}}) assert_aggs ({"store_id" => {1 => 1}}), Product.search("Product").aggs({store_id: {where: {in_stock: true}}}) assert_aggs ({"store_id" => {1 => 1}}), aggs: {store_id: {where: {_not: {in_stock: false}}}} end def test_field assert_aggs ({"store_id" => {1 => 1, 2 => 2}}), aggs: {store_id: {}} assert_aggs ({"store_id" => {1 => 1, 2 => 2}}), aggs: {store_id: {field: "store_id"}} assert_aggs ({"store_id_new" => {1 => 1, 2 => 2}}), aggs: {store_id_new: {field: "store_id"}} end def test_min_doc_count assert_aggs ({"store_id" => {2 => 2}}), aggs: {store_id: {min_doc_count: 2}} end def test_script expected = {"color" => {"Color: blue" => 1, "Color: green" => 1, "Color: red" => 1}} assert_aggs expected, aggs: {color: {script: {source: "'Color: ' + _value"}}} end def test_order agg = Product.search("Product", aggs: {color: {order: {_key: "desc"}}}).aggs["color"] assert_equal ["red", "green", "blue"], agg["buckets"].map { |b| b["key"] } end def test_limit agg = Product.search("Product", aggs: {store_id: {limit: 1}}).aggs["store_id"] assert_equal 1, agg["buckets"].size # assert_equal 3, agg["doc_count"] assert_equal(1, agg["sum_other_doc_count"]) end def test_ranges price_ranges = [{to: 10}, {from: 10, to: 20}, {from: 20}] agg = Product.search("Product", aggs: {price: {ranges: price_ranges}}).aggs["price"] assert_equal 3, agg["buckets"].size assert_equal 10.0, agg["buckets"][0]["to"] assert_equal 20.0, agg["buckets"][2]["from"] assert_equal 1, agg["buckets"][0]["doc_count"] assert_equal 0, agg["buckets"][1]["doc_count"] assert_equal 2, agg["buckets"][2]["doc_count"] end def test_date_ranges ranges = [{to: 1.day.ago}, {from: 1.day.ago, to: 1.day.from_now}, {from: 1.day.from_now}] agg = Product.search("Product", aggs: {created_at: {date_ranges: ranges}}).aggs["created_at"] assert_equal 1, agg["buckets"][0]["doc_count"] assert_equal 1, agg["buckets"][1]["doc_count"] assert_equal 1, agg["buckets"][2]["doc_count"] end def test_group_by_date store [{name: "Old Product", created_at: 3.years.ago}] aggs = {products_per_year: {date_histogram: {field: :created_at, calendar_interval: :year}}} products = Product.search("Product", where: {created_at: {lt: Time.now}}, aggs: aggs) assert_equal 4, products.aggs["products_per_year"]["buckets"].size end def test_time_zone start_time = Time.at(1529366400) store [ {name: "Opera House Pass", created_at: start_time}, {name: "London Eye Pass", created_at: start_time + 16.hours}, {name: "London Tube Pass", created_at: start_time + 16.hours} ] london_aggs = {products_per_day: {date_histogram: {field: :created_at, calendar_interval: :day, time_zone: "+01:00"}}} expected = [ {"key_as_string" => "2018-06-19T00:00:00.000+01:00", "key" => 1529362800000, "doc_count" => 3} ] assert_equal expected, Product.search("Pass", aggs: london_aggs).aggs["products_per_day"]["buckets"] sydney_aggs = {products_per_day: {date_histogram: {field: :created_at, calendar_interval: :day, time_zone: "+10:00"}}} expected = [ {"key_as_string" => "2018-06-19T00:00:00.000+10:00", "key" => 1529330400000, "doc_count" => 1}, {"key_as_string" => "2018-06-20T00:00:00.000+10:00", "key" => 1529416800000, "doc_count" => 2} ] assert_equal expected, Product.search("Pass", aggs: sydney_aggs).aggs["products_per_day"]["buckets"] end def test_avg products = Product.search("*", aggs: {avg_price: {avg: {field: :price}}}) assert_equal 16.5, products.aggs["avg_price"]["value"] end def test_cardinality products = Product.search("*", aggs: {total_stores: {cardinality: {field: :store_id}}}) assert_equal 3, products.aggs["total_stores"]["value"] end def test_min_max products = Product.search("*", aggs: {min_price: {min: {field: :price}}, max_price: {max: {field: :price}}}) assert_equal 5, products.aggs["min_price"]["value"] assert_equal 25, products.aggs["max_price"]["value"] end def test_sum products = Product.search("*", aggs: {sum_price: {sum: {field: :price}}}) assert_equal 66, products.aggs["sum_price"]["value"] end def test_body_options expected = {"price" => {0.0 => 1, 10.0 => 0, 20.0 => 2}} assert_aggs expected, body_options: {aggs: {price: {histogram: {field: :price, interval: 10}}}} end def test_smart_aggs assert_aggs ({"store_id" => {1 => 1}}), where: {in_stock: true}, aggs: [:store_id] assert_aggs ({"store_id" => {1 => 1, 2 => 2}}), where: {in_stock: true}, aggs: [:store_id], smart_aggs: false assert_aggs ({"store_id" => {2 => 2}}), where: {_not: {in_stock: true}}, aggs: [:store_id] assert_aggs ({"store_id" => {1 => 1, 2 => 2}}), where: {_not: {in_stock: true}}, aggs: [:store_id], smart_aggs: false assert_aggs ({"store_id" => {1 => 1}}), where: {_and: [{in_stock: true}]}, aggs: [:store_id] assert_aggs ({"store_id" => {1 => 1, 2 => 2}}), where: {_and: [{in_stock: true}]}, aggs: [:store_id], smart_aggs: false assert_aggs ({"store_id" => {1 => 1}}), where: {_or: [{in_stock: true}]}, aggs: [:store_id] assert_aggs ({"store_id" => {1 => 1, 2 => 2}}), where: {_or: [{in_stock: true}]}, aggs: [:store_id], smart_aggs: false assert_aggs ({"store_id" => {1 => 1}}), where: {or: [[{in_stock: true}]]}, aggs: [:store_id] assert_aggs ({"store_id" => {1 => 1, 2 => 2}}), where: {or: [[{in_stock: true}]]}, aggs: [:store_id], smart_aggs: false assert_aggs ({"store_id" => {1 => 1}}), where: {_script: Searchkick.script("doc['in_stock'].value")}, aggs: [:store_id] assert_aggs ({"store_id" => {1 => 1, 2 => 2}}), where: {_script: Searchkick.script("doc['in_stock'].value")}, aggs: [:store_id], smart_aggs: false end def test_smart_aggs_overlap assert_aggs ({"store_id" => {1 => 1, 2 => 2}}), where: {store_id: 2}, aggs: [:store_id] assert_aggs ({"store_id" => {1 => 1, 2 => 2}}), where: {store_id: 2}, aggs: [:store_id], smart_aggs: false # TODO change assert_aggs ({"store_id" => {2 => 2}}), where: {store_id: 2}, aggs: ["store_id"] # TODO change assert_aggs ({"store_id" => {2 => 2}}), where: {"store_id" => 2}, aggs: [:store_id] assert_aggs ({"store_id" => {1 => 1, 2 => 2}}), where: {store_id: {not: 2}}, aggs: [:store_id] assert_aggs ({"store_id" => {1 => 1, 2 => 2}}), where: {store_id: {not: 2}}, aggs: [:store_id], smart_aggs: false assert_aggs ({"store_id" => {1 => 1, 2 => 2}}), where: {store_id: {gt: 2}}, aggs: [:store_id] assert_aggs ({"store_id" => {1 => 1, 2 => 2}}), where: {store_id: {gt: 2}}, aggs: [:store_id], smart_aggs: false # TODO change assert_aggs ({"store_id" => {1 => 1}}), where: {_not: {store_id: 2}}, aggs: [:store_id] assert_aggs ({"store_id" => {1 => 1, 2 => 2}}), where: {_not: {store_id: 2}}, aggs: [:store_id], smart_aggs: false # TODO change assert_aggs ({"store_id" => {2 => 2}}), where: {_and: [{store_id: 2}]}, aggs: [:store_id] assert_aggs ({"store_id" => {1 => 1, 2 => 2}}), where: {_and: [{store_id: 2}]}, aggs: [:store_id], smart_aggs: false # TODO change assert_aggs ({"store_id" => {}}), where: {_and: [{store_id: 2}, {in_stock: true}]}, aggs: [:store_id] assert_aggs ({"store_id" => {1 => 1, 2 => 2}}), where: {_and: [{store_id: 2}, {in_stock: true}]}, aggs: [:store_id], smart_aggs: false # TODO change assert_aggs ({"store_id" => {2 => 2}}), where: {_or: [{store_id: 2}]}, aggs: [:store_id] assert_aggs ({"store_id" => {1 => 1, 2 => 2}}), where: {_or: [{store_id: 2}]}, aggs: [:store_id], smart_aggs: false assert_aggs ({"store_id" => {1 => 1, 2 => 2}}), where: {_or: [{store_id: 2}, {in_stock: true}]}, aggs: [:store_id] assert_aggs ({"store_id" => {1 => 1, 2 => 2}}), where: {_or: [{store_id: 2}, {in_stock: true}]}, aggs: [:store_id], smart_aggs: false # TODO change assert_aggs ({"store_id" => {2 => 2}}), where: {or: [[{store_id: 2}]]}, aggs: [:store_id] assert_aggs ({"store_id" => {1 => 1, 2 => 2}}), where: {or: [[{store_id: 2}]]}, aggs: [:store_id], smart_aggs: false assert_aggs ({"store_id" => {1 => 1, 2 => 1}}), where: {store_id: 2, price: {gt: 5}}, aggs: [:store_id] assert_aggs ({"store_id" => {1 => 1, 2 => 2}}), where: {store_id: 2, price: {gt: 5}}, aggs: [:store_id], smart_aggs: false end def test_smart_aggs_agg_where assert_aggs ({"store_id" => {2 => 1}}), where: {color: "red"}, aggs: {store_id: {where: {in_stock: false}}} assert_aggs ({"store_id" => {2 => 2}}), where: {color: "red"}, aggs: {store_id: {where: {in_stock: false}}}, smart_aggs: false assert_aggs ({"store_id" => {}}), where: {color: "blue"}, aggs: {store_id: {where: {in_stock: false}}} assert_aggs ({"store_id" => {2 => 2}}), where: {color: "blue"}, aggs: {store_id: {where: {in_stock: false}}}, smart_aggs: false # TODO change assert_aggs ({"store_id" => {2 => 2}}), where: {_not: {color: "red"}}, aggs: {store_id: {where: {_not: {in_stock: true}}}} assert_aggs ({"store_id" => {2 => 2}}), where: {_not: {color: "red"}}, aggs: {store_id: {where: {_not: {in_stock: true}}}}, smart_aggs: false # TODO change assert_aggs ({"store_id" => {2 => 2}}), where: {_and: [{color: "red"}]}, aggs: {store_id: {where: {_and: [{in_stock: false}]}}} assert_aggs ({"store_id" => {2 => 2}}), where: {_and: [{color: "red"}]}, aggs: {store_id: {where: {_and: [{in_stock: false}]}}}, smart_aggs: false # TODO change assert_aggs ({"store_id" => {2 => 2}}), where: {_or: [{color: "red"}]}, aggs: {store_id: {where: {_or: [{in_stock: false}]}}} assert_aggs ({"store_id" => {2 => 2}}), where: {_or: [{color: "red"}]}, aggs: {store_id: {where: {_or: [{in_stock: false}]}}}, smart_aggs: false # TODO change assert_aggs ({"store_id" => {2 => 2}}), where: {or: [[{color: "red"}]]}, aggs: {store_id: {where: {or: [[{in_stock: false}]]}}} assert_aggs ({"store_id" => {2 => 2}}), where: {or: [[{color: "red"}]]}, aggs: {store_id: {where: {or: [[{in_stock: false}]]}}}, smart_aggs: false # TODO change assert_aggs ({"store_id" => {2 => 2}}), where: {_script: Searchkick.script("doc['color'].value == 'red'")}, aggs: {store_id: {where: {_script: Searchkick.script("!doc['in_stock'].value")}}} assert_aggs ({"store_id" => {2 => 2}}), where: {_script: Searchkick.script("doc['color'].value == 'red'")}, aggs: {store_id: {where: {_script: Searchkick.script("!doc['in_stock'].value")}}}, smart_aggs: false end # only basic conditions are overridden (the rest are additive) def test_smart_aggs_agg_where_overlap assert_aggs ({"store_id" => {}}), where: {color: "red"}, aggs: {store_id: {where: {in_stock: false, color: "blue"}}} assert_aggs ({"store_id" => {}}), where: {color: "red"}, aggs: {store_id: {where: {in_stock: false, color: "blue"}}}, smart_aggs: false assert_aggs ({"store_id" => {2 => 1}}), where: {color: "blue"}, aggs: {store_id: {where: {in_stock: false, color: "red"}}} assert_aggs ({"store_id" => {2 => 1}}), where: {color: "blue"}, aggs: {store_id: {where: {in_stock: false, color: "red"}}}, smart_aggs: false # TODO change assert_aggs ({"store_id" => {}}), where: {color: "blue"}, aggs: {store_id: {where: {in_stock: false, "color" => "red"}}} # TODO change assert_aggs ({"store_id" => {}}), where: {"color" => "blue"}, aggs: {store_id: {where: {in_stock: false, color: "red"}}} assert_aggs ({"store_id" => {}}), where: {_and: [{color: "blue"}]}, aggs: {store_id: {where: {in_stock: false, color: "red"}}} assert_aggs ({"store_id" => {2 => 1}}), where: {_and: [{color: "blue"}]}, aggs: {store_id: {where: {in_stock: false, color: "red"}}}, smart_aggs: false end def test_smart_aggs_relation # TODO change assert_aggs ({"store_id" => {1 => 1}}), Product.search("Product").where.not(store_id: 2).aggs(:store_id) assert_aggs ({"store_id" => {1 => 1, 2 => 2}}), Product.search("Product").where.not(store_id: 2).aggs(:store_id).smart_aggs(false) assert_aggs ({"store_id" => {1 => 1, 2 => 1}}), Product.search("Product").where(store_id: 2).where(price: {gt: 5}).aggs(:store_id) assert_aggs ({"store_id" => {1 => 1, 2 => 1}}), Product.search("Product").where(store_id: 2, price: {gt: 5}).aggs(:store_id) assert_aggs ({"store_id" => {1 => 1, 2 => 1}}), Product.search("Product").where(_and: [{price: {gt: 5}}]).where(store_id: 2).aggs(:store_id) assert_aggs ({"store_id" => {2 => 2}}), Product.search("Product").where(color: "red").aggs(store_id: {where: {in_stock: false}}).smart_aggs(false) end protected def assert_aggs(expected, options) if options.is_a?(Searchkick::Relation) assert_equal expected, agg_buckets(options) else assert_equal expected, agg_buckets(Product.search("Product", **options)) assert_equal expected, agg_buckets(build_relation(Product, "Product", **options)) end end def agg_buckets(relation) relation.aggs.to_h { |f, a| [f, a["buckets"].to_h { |v| [v["key"], v["doc_count"]] }] } end end ================================================ FILE: test/boost_test.rb ================================================ require_relative "test_helper" class BoostTest < Minitest::Test # global boost def test_boost store [ {name: "Tomato A"}, {name: "Tomato B", orders_count: 10}, {name: "Tomato C", orders_count: 100} ] assert_order "tomato", ["Tomato C", "Tomato B", "Tomato A"], boost: "orders_count" end def test_boost_zero store [ {name: "Zero Boost", orders_count: 0} ] assert_order "zero", ["Zero Boost"], boost: "orders_count" end # fields def test_fields store [ {name: "Red", color: "White"}, {name: "White", color: "Red Red Red"} ] assert_order "red", ["Red", "White"], fields: ["name^10", "color"] end def test_fields_decimal store [ {name: "Red", color: "White"}, {name: "White", color: "Red Red Red"} ] assert_order "red", ["Red", "White"], fields: ["name^10.5", "color"] end def test_fields_word_start store [ {name: "Red", color: "White"}, {name: "White", color: "Red Red Red"} ] assert_order "red", ["Red", "White"], fields: [{"name^10" => :word_start}, "color"] end # for issue #855 def test_fields_apostrophes store_names ["Valentine's Day Special"] assert_search "Valentines", ["Valentine's Day Special"], fields: ["name^5"] assert_search "Valentine's", ["Valentine's Day Special"], fields: ["name^5"] assert_search "Valentine", ["Valentine's Day Special"], fields: ["name^5"] end def test_boost_by store [ {name: "Tomato A"}, {name: "Tomato B", orders_count: 10}, {name: "Tomato C", orders_count: 100} ] assert_order "tomato", ["Tomato C", "Tomato B", "Tomato A"], boost_by: [:orders_count] assert_order "tomato", ["Tomato C", "Tomato B", "Tomato A"], boost_by: {orders_count: {factor: 10}} end def test_boost_by_missing store [ {name: "Tomato A"}, {name: "Tomato B", orders_count: 10} ] assert_order "tomato", ["Tomato A", "Tomato B"], boost_by: {orders_count: {missing: 100}} end def test_boost_by_boost_mode_multiply store [ {name: "Tomato A", found_rate: 0.9}, {name: "Tomato B"}, {name: "Tomato C", found_rate: 0.5} ] assert_order "tomato", ["Tomato B", "Tomato A", "Tomato C"], boost_by: {found_rate: {boost_mode: "multiply"}} end def test_boost_where store [ {name: "Tomato A"}, {name: "Tomato B", user_ids: [1, 2]}, {name: "Tomato C", user_ids: [3]} ] assert_first "tomato", "Tomato B", boost_where: {user_ids: 2} assert_first "tomato", "Tomato B", boost_where: {user_ids: 1..2} assert_first "tomato", "Tomato B", boost_where: {user_ids: [1, 4]} assert_first "tomato", "Tomato B", boost_where: {user_ids: {value: 2, factor: 10}} assert_first "tomato", "Tomato B", boost_where: {user_ids: {value: [1, 4], factor: 10}} assert_order "tomato", ["Tomato C", "Tomato B", "Tomato A"], boost_where: {user_ids: [{value: 1, factor: 10}, {value: 3, factor: 20}]} end def test_boost_where_negative_boost store [ {name: "Tomato A"}, {name: "Tomato B", user_ids: [2]}, {name: "Tomato C", user_ids: [2]} ] assert_first "tomato", "Tomato A", boost_where: {user_ids: {value: 2, factor: 0.5}} end def test_boost_by_recency store [ {name: "Article 1", created_at: 2.days.ago}, {name: "Article 2", created_at: 1.day.ago}, {name: "Article 3", created_at: Time.now} ] assert_order "article", ["Article 3", "Article 2", "Article 1"], boost_by_recency: {created_at: {scale: "7d", decay: 0.5}} end def test_boost_by_recency_origin store [ {name: "Article 1", created_at: 2.days.ago}, {name: "Article 2", created_at: 1.day.ago}, {name: "Article 3", created_at: Time.now} ] assert_order "article", ["Article 1", "Article 2", "Article 3"], boost_by_recency: {created_at: {origin: 2.days.ago, scale: "7d", decay: 0.5}} end def test_boost_by_distance store [ {name: "San Francisco", latitude: 37.7833, longitude: -122.4167}, {name: "San Antonio", latitude: 29.4167, longitude: -98.5000}, {name: "San Marino", latitude: 43.9333, longitude: 12.4667} ] assert_order "san", ["San Francisco", "San Antonio", "San Marino"], boost_by_distance: {field: :location, origin: [37, -122], scale: "1000mi"} end def test_boost_by_distance_hash store [ {name: "San Francisco", latitude: 37.7833, longitude: -122.4167}, {name: "San Antonio", latitude: 29.4167, longitude: -98.5000}, {name: "San Marino", latitude: 43.9333, longitude: 12.4667} ] assert_order "san", ["San Francisco", "San Antonio", "San Marino"], boost_by_distance: {field: :location, origin: {lat: 37, lon: -122}, scale: "1000mi"} end def test_boost_by_distance_v2 store [ {name: "San Francisco", latitude: 37.7833, longitude: -122.4167}, {name: "San Antonio", latitude: 29.4167, longitude: -98.5000}, {name: "San Marino", latitude: 43.9333, longitude: 12.4667} ] assert_order "san", ["San Francisco", "San Antonio", "San Marino"], boost_by_distance: {location: {origin: [37, -122], scale: "1000mi"}} end def test_boost_by_distance_v2_hash store [ {name: "San Francisco", latitude: 37.7833, longitude: -122.4167}, {name: "San Antonio", latitude: 29.4167, longitude: -98.5000}, {name: "San Marino", latitude: 43.9333, longitude: 12.4667} ] assert_order "san", ["San Francisco", "San Antonio", "San Marino"], boost_by_distance: {location: {origin: {lat: 37, lon: -122}, scale: "1000mi"}} end def test_boost_by_distance_v2_factor store [ {name: "San Francisco", latitude: 37.7833, longitude: -122.4167, found_rate: 0.1}, {name: "San Antonio", latitude: 29.4167, longitude: -98.5000, found_rate: 0.99}, {name: "San Marino", latitude: 43.9333, longitude: 12.4667, found_rate: 0.2} ] assert_order "san", ["San Antonio", "San Francisco", "San Marino"], boost_by: {found_rate: {factor: 100}}, boost_by_distance: {location: {origin: [37, -122], scale: "1000mi"}} assert_order "san", ["San Francisco", "San Antonio", "San Marino"], boost_by: {found_rate: {factor: 100}}, boost_by_distance: {location: {origin: [37, -122], scale: "1000mi", factor: 100}} end def test_boost_by_indices setup_animal store_names ["Rex"], Animal store_names ["Rexx"], Product assert_order "Rex", ["Rexx", "Rex"], {models: [Animal, Product], indices_boost: {Animal => 1, Product => 200}, fields: [:name]}, Searchkick end end ================================================ FILE: test/callbacks_test.rb ================================================ require_relative "test_helper" class CallbacksTest < Minitest::Test def test_false Searchkick.callbacks(false) do store_names ["Product A", "Product B"] end assert_search "product", [] end def test_bulk Searchkick.callbacks(:bulk) do store_names ["Product A", "Product B"] end Product.searchkick_index.refresh assert_search "product", ["Product A", "Product B"] end def test_async assert_enqueued_jobs 2 do Searchkick.callbacks(:async) do store_names ["Product A", "Product B"] end end end def test_queue # TODO figure out which earlier test leaves records in index Product.reindex reindex_queue = Product.searchkick_index.reindex_queue reindex_queue.clear Searchkick.callbacks(:queue) do store_names ["Product A", "Product B"] end Product.searchkick_index.refresh assert_search "product", [], load: false, conversions: false assert_equal 2, reindex_queue.length perform_enqueued_jobs do Searchkick::ProcessQueueJob.perform_now(class_name: "Product") end Product.searchkick_index.refresh assert_search "product", ["Product A", "Product B"], load: false assert_equal 0, reindex_queue.length Searchkick.callbacks(:queue) do Product.where(name: "Product B").destroy_all Product.create!(name: "Product C") end Product.searchkick_index.refresh assert_search "product", ["Product A", "Product B"], load: false assert_equal 2, reindex_queue.length perform_enqueued_jobs do Searchkick::ProcessQueueJob.perform_now(class_name: "Product") end Product.searchkick_index.refresh assert_search "product", ["Product A", "Product C"], load: false assert_equal 0, reindex_queue.length # ensure no error with empty queue Searchkick::ProcessQueueJob.perform_now(class_name: "Product") end def test_record_async with_options({callbacks: :async}, Song) do assert_enqueued_jobs 1 do Song.create!(name: "Product A") end assert_enqueued_jobs 1 do Song.first.reindex end end end def test_relation_async with_options({callbacks: :async}, Song) do assert_enqueued_jobs 0 do Song.all.reindex end end end def test_disable_callbacks # make sure callbacks default to on assert Searchkick.callbacks? store_names ["Product A"] Searchkick.disable_callbacks assert !Searchkick.callbacks? store_names ["Product B"] assert_search "product", ["Product A"] Searchkick.enable_callbacks Product.reindex assert_search "product", ["Product A", "Product B"] end end ================================================ FILE: test/conversions_test.rb ================================================ require_relative "test_helper" class ConversionsTest < Minitest::Test def setup super setup_speaker end def test_v1 store [ {name: "Tomato A", conversions: {"tomato" => 1}}, {name: "Tomato B", conversions: {"tomato" => 2}}, {name: "Tomato C", conversions: {"tomato" => 3}} ] assert_order "tomato", ["Tomato C", "Tomato B", "Tomato A"] assert_order "TOMATO", ["Tomato C", "Tomato B", "Tomato A"] assert_equal_scores "tomato", conversions_v1: false end def test_v1_case store [ {name: "Tomato A", conversions: {"tomato" => 1, "TOMATO" => 1, "tOmAtO" => 1}}, {name: "Tomato B", conversions: {"tomato" => 2}} ] assert_order "tomato", ["Tomato A", "Tomato B"] end def test_v1_case_sensitive with_options(case_sensitive: true) do store [ {name: "Tomato A", conversions: {"Tomato" => 1, "TOMATO" => 1, "tOmAtO" => 1}}, {name: "Tomato B", conversions: {"Tomato" => 2}} ] assert_order "Tomato", ["Tomato B", "Tomato A"] end ensure Product.reindex end def test_v1_term store [ {name: "Tomato A", conversions: {"tomato" => 1, "soup" => 3}}, {name: "Tomato B", conversions: {"tomato" => 2, "soup" => 2}}, {name: "Tomato C", conversions: {"tomato" => 3, "soup" => 1}} ] assert_order "tomato", ["Tomato A", "Tomato B", "Tomato C"], conversions_term: "soup" end def test_v1_weight Product.reindex store [ {name: "Product Boost", orders_count: 20}, {name: "Product Conversions", conversions: {"product" => 10}} ] assert_order "product", ["Product Conversions", "Product Boost"], boost: "orders_count" end def test_v1_multiple_conversions store [ {name: "Speaker A", conversions_a: {"speaker" => 1}, conversions_b: {"speaker" => 6}}, {name: "Speaker B", conversions_a: {"speaker" => 2}, conversions_b: {"speaker" => 5}}, {name: "Speaker C", conversions_a: {"speaker" => 3}, conversions_b: {"speaker" => 4}} ], Speaker assert_equal_scores "speaker", {conversions_v1: false}, Speaker assert_equal_scores "speaker", {}, Speaker assert_equal_scores "speaker", {conversions_v1: ["conversions_a", "conversions_b"]}, Speaker assert_equal_scores "speaker", {conversions_v1: ["conversions_b", "conversions_a"]}, Speaker assert_order "speaker", ["Speaker C", "Speaker B", "Speaker A"], {conversions_v1: "conversions_a"}, Speaker assert_order "speaker", ["Speaker A", "Speaker B", "Speaker C"], {conversions_v1: "conversions_b"}, Speaker end def test_v1_multiple_conversions_with_boost_term store [ {name: "Speaker A", conversions_a: {"speaker" => 4, "speaker_1" => 1}}, {name: "Speaker B", conversions_a: {"speaker" => 3, "speaker_1" => 2}}, {name: "Speaker C", conversions_a: {"speaker" => 2, "speaker_1" => 3}}, {name: "Speaker D", conversions_a: {"speaker" => 1, "speaker_1" => 4}} ], Speaker assert_order "speaker", ["Speaker A", "Speaker B", "Speaker C", "Speaker D"], {conversions_v1: "conversions_a"}, Speaker assert_order "speaker", ["Speaker D", "Speaker C", "Speaker B", "Speaker A"], {conversions_v1: "conversions_a", conversions_term: "speaker_1"}, Speaker end def test_v2 store [ {name: "Tomato A", conversions_v2: {"tomato" => 1}}, {name: "Tomato B", conversions_v2: {"tomato" => 2}}, {name: "Tomato C", conversions_v2: {"tomato" => 3}} ] assert_order "tomato", ["Tomato C", "Tomato B", "Tomato A"], conversions_v2: true assert_order "TOMATO", ["Tomato C", "Tomato B", "Tomato A"], conversions_v2: true assert_equal_scores "tomato", conversions_v2: false end def test_v2_case store [ {name: "Tomato A", conversions_v2: {"tomato" => 1, "TOMATO" => 1, "tOmAtO" => 1}}, {name: "Tomato B", conversions_v2: {"tomato" => 2}} ] assert_order "tomato", ["Tomato A", "Tomato B"], conversions_v2: true end def test_v2_case_sensitive with_options(case_sensitive: true) do store [ {name: "Tomato A", conversions_v2: {"Tomato" => 1, "TOMATO" => 1, "tOmAtO" => 1}}, {name: "Tomato B", conversions_v2: {"Tomato" => 2}} ] assert_order "Tomato", ["Tomato B", "Tomato A"], conversions_v2: true end ensure Product.reindex end def test_v2_term store [ {name: "Tomato A", conversions_v2: {"tomato" => 1, "soup" => 3}}, {name: "Tomato B", conversions_v2: {"tomato" => 2, "soup" => 2}}, {name: "Tomato C", conversions_v2: {"tomato" => 3, "soup" => 1}} ] assert_order "tomato", ["Tomato A", "Tomato B", "Tomato C"], conversions_v2: {term: "soup"} assert_order "tomato", ["Tomato A", "Tomato B", "Tomato C"], conversions_v2: true, conversions_term: "soup" end def test_v2_weight Product.reindex store [ {name: "Product Boost", orders_count: 20}, {name: "Product Conversions", conversions_v2: {"product" => 10}} ] assert_order "product", ["Product Conversions", "Product Boost"], conversions_v2: true, boost: "orders_count" end def test_v2_space store [ {name: "Tomato A", conversions_v2: {"tomato juice" => 1}}, {name: "Tomato B", conversions_v2: {"tomato juice" => 2}}, {name: "Tomato C", conversions_v2: {"tomato juice" => 3}} ] assert_order "tomato", ["Tomato C", "Tomato B", "Tomato A"], conversions_v2: {term: "tomato juice"} end def test_v2_dot store [ {name: "Tomato A", conversions_v2: {"tomato.juice" => 1}}, {name: "Tomato B", conversions_v2: {"tomato.juice" => 2}}, {name: "Tomato C", conversions_v2: {"tomato.juice" => 3}} ] assert_order "tomato", ["Tomato C", "Tomato B", "Tomato A"], conversions_v2: {term: "tomato.juice"} end def test_v2_unicode store [ {name: "Tomato A", conversions_v2: {"喰らう" => 1}}, {name: "Tomato B", conversions_v2: {"喰らう" => 2}}, {name: "Tomato C", conversions_v2: {"喰らう" => 3}} ] assert_order "tomato", ["Tomato C", "Tomato B", "Tomato A"], conversions_v2: {term: "喰らう"} end def test_v2_score store [ {name: "Tomato A", conversions: {"tomato" => 1}, conversions_v2: {"tomato" => 1}}, {name: "Tomato B", conversions: {"tomato" => 2}, conversions_v2: {"tomato" => 2}}, {name: "Tomato C", conversions: {"tomato" => 3}, conversions_v2: {"tomato" => 3}} ] scores = Product.search("tomato", conversions_v2: false, load: false).map(&:_score) scores_v2 = Product.search("tomato", conversions_v1: false, conversions_v2: true, load: false).map(&:_score) assert_equal scores, scores_v2 end def test_v2_factor store [ {name: "Tomato A", conversions: {"tomato" => 1}, conversions_v2: {"tomato" => 1}}, {name: "Tomato B", conversions: {"tomato" => 2}, conversions_v2: {"tomato" => 2}}, {name: "Tomato C", conversions: {"tomato" => 3}, conversions_v2: {"tomato" => 3}} ] scores = Product.search("tomato", conversions_v1: false, conversions_v2: true, load: false).map(&:_score) scores2 = Product.search("tomato", conversions_v1: false, conversions_v2: {factor: 3}, load: false).map(&:_score) diffs = scores.zip(scores2).map { |a, b| b - a } assert_in_delta 6, diffs[0] assert_in_delta 4, diffs[1] assert_in_delta 2, diffs[2] end def test_v2_no_tokenization store [ {name: "Tomato A"}, {name: "Tomato B", conversions_v2: {"tomato juice" => 2}}, {name: "Tomato C", conversions_v2: {"tomato vine" => 3}} ] assert_equal_scores "tomato", conversions_v2: true end def test_v2_max_conversions conversions = 66000.times.to_h { |i| ["term#{i}", 1] } store [{name: "Tomato A", conversions_v2: conversions}] conversions.merge!(1000.times.to_h { |i| ["term#{conversions.size + i}", 1] }) assert_raises(Searchkick::ImportError) do store [{name: "Tomato B", conversions_v2: conversions}] end end def test_v2_max_length store [{name: "Tomato A", conversions_v2: {"a"*32766 => 1}}] assert_raises(Searchkick::ImportError) do store [{name: "Tomato B", conversions_v2: {"a"*32767 => 1}}] end end def test_v2_zero error = assert_raises(Searchkick::ImportError) do store [{name: "Tomato A", conversions_v2: {"tomato" => 0}}] end assert_match "must be a positive normal float", error.message end def test_v2_partial_reindex store [ {name: "Tomato A", conversions_v2: {"tomato" => 1}}, {name: "Tomato B", conversions_v2: {"tomato" => 2}}, {name: "Tomato C", conversions_v2: {"tomato" => 3}} ] Product.reindex(:search_name, refresh: true) assert_order "tomato", ["Tomato C", "Tomato B", "Tomato A"], conversions_v2: true end end ================================================ FILE: test/default_scope_test.rb ================================================ require_relative "test_helper" class DefaultScopeTest < Minitest::Test def setup setup_model(Band) end def test_reindex store [ {name: "Test", active: true}, {name: "Test 2", active: false} ], reindex: false Band.reindex assert_search "*", ["Test"], {load: false} end def test_search Band.reindex Band.search("*") # test works error = assert_raises(Searchkick::Error) do Band.all.search("*") end assert_equal "search must be called on model, not relation", error.message end def default_model Band end end ================================================ FILE: test/exclude_test.rb ================================================ require_relative "test_helper" class ExcludeTest < Minitest::Test def test_butter store_names ["Butter Tub", "Peanut Butter Tub"] assert_search "butter", ["Butter Tub"], exclude: ["peanut butter"] end def test_butter_word_start store_names ["Butter Tub", "Peanut Butter Tub"] assert_search "butter", ["Butter Tub"], exclude: ["peanut butter"], match: :word_start end def test_butter_exact store_names ["Butter Tub", "Peanut Butter Tub"] assert_search "butter", [], exclude: ["peanut butter"], fields: [{name: :exact}] end def test_same_exact store_names ["Butter Tub", "Peanut Butter Tub"] assert_search "Butter Tub", ["Butter Tub"], exclude: ["Peanut Butter Tub"], fields: [{name: :exact}] end def test_egg_word_start store_names ["eggs", "eggplant"] assert_search "egg", ["eggs"], exclude: ["eggplant"], match: :word_start end def test_string store_names ["Butter Tub", "Peanut Butter Tub"] assert_search "butter", ["Butter Tub"], exclude: "peanut butter" end def test_match_all store_names ["Butter"] assert_search "*", [], exclude: "butter" end def test_match_all_fields store_names ["Butter"] assert_search "*", [], fields: [:name], exclude: "butter" assert_search "*", ["Butter"], fields: [:color], exclude: "butter" end end ================================================ FILE: test/geo_shape_test.rb ================================================ require_relative "test_helper" class GeoShapeTest < Minitest::Test def setup setup_region store [ { name: "Region A", text: "The witch had a cat", territory: { type: "polygon", coordinates: [[[30, 40], [35, 45], [40, 40], [40, 30], [30, 30], [30, 40]]] } }, { name: "Region B", text: "and a very tall hat", territory: { type: "polygon", coordinates: [[[50, 60], [55, 65], [60, 60], [60, 50], [50, 50], [50, 60]]] } }, { name: "Region C", text: "and long ginger hair which she wore in a plait", territory: { type: "polygon", coordinates: [[[10, 20], [15, 25], [20, 20], [20, 10], [10, 10], [10, 20]]] } } ] end def test_envelope assert_search "*", ["Region A"], { where: { territory: { geo_shape: { type: "envelope", coordinates: [[28, 42], [32, 38]] } } } } end def test_polygon assert_search "*", ["Region A"], { where: { territory: { geo_shape: { type: "polygon", coordinates: [[[38, 42], [42, 42], [42, 38], [38, 38], [38, 42]]] } } } } end def test_multipolygon assert_search "*", ["Region A", "Region B"], { where: { territory: { geo_shape: { type: "multipolygon", coordinates: [ [[[38, 42], [42, 42], [42, 38], [38, 38], [38, 42]]], [[[58, 62], [62, 62], [62, 58], [58, 58], [58, 62]]] ] } } } } end def test_disjoint assert_search "*", ["Region B", "Region C"], { where: { territory: { geo_shape: { type: "envelope", relation: "disjoint", coordinates: [[28, 42], [32, 38]] } } } } end def test_within assert_search "*", ["Region A"], { where: { territory: { geo_shape: { type: "envelope", relation: "within", coordinates: [[20, 50], [50, 20]] } } } } end def test_search_match assert_search "witch", ["Region A"], { where: { territory: { geo_shape: { type: "envelope", coordinates: [[28, 42], [32, 38]] } } } } end def test_search_no_match assert_search "ginger hair", [], { where: { territory: { geo_shape: { type: "envelope", coordinates: [[28, 42], [32, 38]] } } } } end def test_latlon assert_search "*", ["Region A"], { where: { territory: { geo_shape: { type: "envelope", coordinates: [{lat: 42, lon: 28}, {lat: 38, lon: 32}] } } } } end def default_model Region end end ================================================ FILE: test/highlight_test.rb ================================================ require_relative "test_helper" class HighlightTest < Minitest::Test def test_basic store_names ["Two Door Cinema Club"] assert_equal "Two Door Cinema Club", Product.search("cinema", highlight: true).highlights.first[:name] end def test_with_highlights store_names ["Two Door Cinema Club"] assert_equal "Two Door Cinema Club", Product.search("cinema", highlight: true).with_highlights.first.last[:name] end def test_tag store_names ["Two Door Cinema Club"] assert_equal "Two Door Cinema Club", Product.search("cinema", highlight: {tag: ""}).highlights.first[:name] assert_equal "Two Door Cinema Club", Product.search("cinema").highlight(tag: "").highlights.first[:name] end def test_tag_class store_names ["Two Door Cinema Club"] assert_equal "Two Door Cinema Club", Product.search("cinema", highlight: {tag: ""}).highlights.first[:name] end def test_very_long store_names [("Two Door Cinema Club " * 100).strip] assert_equal ("Two Door Cinema Club " * 100).strip, Product.search("cinema", highlight: true).highlights.first[:name] end def test_multiple_fields store [{name: "Two Door Cinema Club", color: "Cinema Orange"}] highlights = Product.search("cinema", fields: [:name, :color], highlight: true).highlights.first assert_equal "Two Door Cinema Club", highlights[:name] assert_equal "Cinema Orange", highlights[:color] end def test_fields store [{name: "Two Door Cinema Club", color: "Cinema Orange"}] highlights = Product.search("cinema", fields: [:name, :color], highlight: {fields: [:name]}).highlights.first assert_equal "Two Door Cinema Club", highlights[:name] assert_nil highlights[:color] end def test_field_options store_names ["Two Door Cinema Club are a Northern Irish indie rock band"] fragment_size = ENV["MATCH"] == "word_start" ? 26 : 21 assert_equal "Two Door Cinema Club are", Product.search("cinema", highlight: {fields: {name: {fragment_size: fragment_size}}}).highlights.first[:name] end def test_multiple_words store_names ["Hello World Hello"] assert_equal "Hello World Hello", Product.search("hello", highlight: true).highlights.first[:name] end def test_encoder store_names ["Hello"] assert_equal "<b>Hello</b>", Product.search("hello", highlight: {encoder: "html"}, misspellings: false).highlights.first[:name] end def test_word_middle store_names ["Two Door Cinema Club"] assert_equal "Two Door Cinema Club", Product.search("ine", match: :word_middle, highlight: true).highlights.first[:name] end def test_body skip if ENV["MATCH"] == "word_start" store_names ["Two Door Cinema Club"] body = { query: { match: { "name.analyzed" => "cinema" } }, highlight: { pre_tags: [""], post_tags: [""], fields: { "name.analyzed" => {} } } } assert_equal "Two Door Cinema Club", Product.search(body: body).highlights.first[:"name.analyzed"] end def test_multiple_highlights store_names ["Two Door Cinema Club Some Other Words And Much More Doors Cinema Club"] highlights = Product.search("cinema", highlight: {fragment_size: 20}).highlights(multiple: true).first[:name] assert highlights.is_a?(Array) assert_equal highlights.count, 2 refute_equal highlights.first, highlights.last highlights.each do |highlight| assert highlight.include?("Cinema") end end def test_search_highlights_method store_names ["Two Door Cinema Club"] assert_equal "Two Door Cinema Club", Product.search("cinema", highlight: true).first.search_highlights[:name] end def test_match_all store_names ["Two Door Cinema Club"] assert_nil Product.search("*", highlight: true).highlights.first[:name] end def test_match_all_load_false store_names ["Two Door Cinema Club"] assert_nil Product.search("*", highlight: true, load: false).highlights.first[:name] end def test_match_all_search_highlights store_names ["Two Door Cinema Club"] assert_nil Product.search("*", highlight: true).first.search_highlights[:name] end end ================================================ FILE: test/hybrid_test.rb ================================================ require_relative "test_helper" class HybridTest < Minitest::Test def setup skip unless Searchkick.knn_support? super end def test_search error = assert_raises(ArgumentError) do Product.search("product", knn: {field: :embedding, vector: [1, 2, 3]}) end assert_equal "Use Searchkick.multi_search for hybrid search", error.message end def test_multi_search store [ {name: "The dog is barking", embedding: [1, 2, 0]}, {name: "The cat is purring", embedding: [1, 0, 0]}, {name: "The bear is growling", embedding: [1, 2, 3]} ] keyword_search = Product.search("growling bear") semantic_search = Product.search(knn: {field: :embedding, vector: [1, 2, 3]}) Searchkick.multi_search([keyword_search, semantic_search]) results = Searchkick::Reranking.rrf(keyword_search, semantic_search) expected = ["The bear is growling", "The dog is barking", "The cat is purring"] assert_equal expected, results.map { |v| v[:result].name } assert_in_delta 0.03279, results[0][:score] assert_in_delta 0.01612, results[1][:score] assert_in_delta 0.01587, results[2][:score] end end ================================================ FILE: test/index_cache_test.rb ================================================ require_relative "test_helper" class IndexCacheTest < Minitest::Test def setup Product.class_variable_get(:@@searchkick_index_cache).clear end def test_default object_id = Product.searchkick_index.object_id 3.times do assert_equal object_id, Product.searchkick_index.object_id end end def test_max_size starting_ids = object_ids(20) assert_equal starting_ids, object_ids(20) Product.searchkick_index(name: "other") refute_equal starting_ids, object_ids(20) end def test_thread_safe object_ids = with_threads { object_ids(20) } assert_equal object_ids[0], object_ids[1] assert_equal object_ids[0], object_ids[2] end # object ids can differ since threads progress at different speeds # test to make sure doesn't crash def test_thread_safe_max_size with_threads { object_ids(1000) } end private def object_ids(count) count.times.map { |i| Product.searchkick_index(name: "index#{i}").object_id } end def with_threads previous = Thread.report_on_exception begin Thread.report_on_exception = true 3.times.map { Thread.new { yield } }.map(&:join).map(&:value) ensure Thread.report_on_exception = previous end end end ================================================ FILE: test/index_options_test.rb ================================================ require_relative "test_helper" class IndexOptionsTest < Minitest::Test def setup Song.destroy_all end def test_case_sensitive with_options({case_sensitive: true}) do store_names ["Test", "test"] assert_search "test", ["test"], {misspellings: false} end end def test_no_stemming with_options({stem: false}) do store_names ["milk", "milks"] assert_search "milks", ["milks"], {misspellings: false} end end def test_no_stem_exclusion with_options({}) do store_names ["animals", "anime"] assert_search "animals", ["animals", "anime"], {misspellings: false} assert_search "anime", ["animals", "anime"], {misspellings: false} assert_equal ["anim"], Song.searchkick_index.tokens("anime", analyzer: "searchkick_index") assert_equal ["anim"], Song.searchkick_index.tokens("anime", analyzer: "searchkick_search2") end end def test_stem_exclusion with_options({stem_exclusion: ["anime"]}) do store_names ["animals", "anime"] assert_search "animals", ["animals"], {misspellings: false} assert_search "anime", ["anime"], {misspellings: false} assert_equal ["anime"], Song.searchkick_index.tokens("anime", analyzer: "searchkick_index") assert_equal ["anime"], Song.searchkick_index.tokens("anime", analyzer: "searchkick_search2") end end def test_no_stemmer_override with_options({}) do store_names ["animals", "animations"] assert_search "animals", ["animals", "animations"], {misspellings: false} assert_search "animations", ["animals", "animations"], {misspellings: false} assert_equal ["anim"], Song.searchkick_index.tokens("animations", analyzer: "searchkick_index") assert_equal ["anim"], Song.searchkick_index.tokens("animations", analyzer: "searchkick_search2") end end def test_stemmer_override with_options({stemmer_override: ["animations => animat"]}) do store_names ["animals", "animations"] assert_search "animals", ["animals"], {misspellings: false} assert_search "animations", ["animations"], {misspellings: false} assert_equal ["animat"], Song.searchkick_index.tokens("animations", analyzer: "searchkick_index") assert_equal ["animat"], Song.searchkick_index.tokens("animations", analyzer: "searchkick_search2") end end def test_special_characters with_options({special_characters: false}) do store_names ["jalapeño"] assert_search "jalapeno", [], {misspellings: false} end end def test_index_name with_options({index_name: "songs_v2"}) do assert_equal "songs_v2", Song.searchkick_index.name end end def test_index_name_callable with_options({index_name: -> { "songs_v2" }}) do assert_equal "songs_v2", Song.searchkick_index.name end end def test_index_prefix with_options({index_prefix: "hello"}) do assert_equal "hello_songs_test", Song.searchkick_index.name end end def test_index_prefix_callable with_options({index_prefix: -> { "hello" }}) do assert_equal "hello_songs_test", Song.searchkick_index.name end end def default_model Song end end ================================================ FILE: test/index_test.rb ================================================ require_relative "test_helper" class IndexTest < Minitest::Test def setup super setup_region end def test_tokens assert_equal ["dollar", "dollartre", "tree"], Product.searchkick_index.tokens("Dollar Tree", analyzer: "searchkick_index") end def test_tokens_analyzer assert_equal ["dollar", "tree"], Product.searchkick_index.tokens("Dollar Tree", analyzer: "searchkick_search2") end def test_total_docs store_names ["Product A"] assert_equal 1, Product.searchkick_index.total_docs end def test_clean_indices suffix = Searchkick.index_suffix ? "_#{Searchkick.index_suffix}" : "" old_index = Searchkick::Index.new("products_test#{suffix}_20130801000000000") different_index = Searchkick::Index.new("items_test#{suffix}_20130801000000000") old_index.delete if old_index.exists? different_index.delete if different_index.exists? # create indexes old_index.create different_index.create Product.searchkick_index.clean_indices assert Product.searchkick_index.exists? assert different_index.exists? assert !old_index.exists? end def test_clean_indices_old_format suffix = Searchkick.index_suffix ? "_#{Searchkick.index_suffix}" : "" old_index = Searchkick::Index.new("products_test#{suffix}_20130801000000") old_index.create Product.searchkick_index.clean_indices assert !old_index.exists? end def test_retain Product.reindex assert_equal 1, Product.searchkick_index.all_indices.size Product.reindex(retain: true) assert_equal 2, Product.searchkick_index.all_indices.size end def test_mappings store_names ["Dollar Tree"], Store assert_equal ["Dollar Tree"], Store.search(body: {query: {match: {name: "dollar"}}}).map(&:name) mapping = Store.searchkick_index.mapping assert_kind_of Hash, mapping assert_equal "text", mapping.values.first["mappings"]["properties"]["name"]["type"] end def test_settings assert_kind_of Hash, Store.searchkick_index.settings end def test_remove_blank_id store_names ["Product A"] Product.searchkick_index.remove(Product.new) assert_search "product", ["Product A"] ensure Product.reindex end # keep simple for now, but maybe return client response in future def test_store_response product = Searchkick.callbacks(false) { Product.create!(name: "Product A") } assert_nil Product.searchkick_index.store(product) end # keep simple for now, but maybe return client response in future def test_bulk_index_response product = Searchkick.callbacks(false) { Product.create!(name: "Product A") } assert_nil Product.searchkick_index.bulk_index([product]) end # TODO move def test_filterable store [{name: "Product A", alt_description: "Hello"}] error = assert_raises(Searchkick::InvalidQueryError) do assert_search "*", [], where: {alt_description: "Hello"} end assert_match "Cannot search on field [alt_description] since it is not indexed", error.message end def test_filterable_non_string store [{name: "Product A", store_id: 1}] assert_search "*", ["Product A"], where: {store_id: 1} end def test_large_value large_value = 1000.times.map { "hello" }.join(" ") store [{name: "Product A", text: large_value}], Region assert_search "product", ["Product A"], {}, Region assert_search "hello", ["Product A"], {fields: [:name, :text]}, Region assert_search "hello", ["Product A"], {}, Region assert_search "*", ["Product A"], {where: {text: large_value}}, Region end def test_very_large_value # terms must be < 32 KB with Elasticsearch 8.10.3+ # https://github.com/elastic/elasticsearch/pull/99818 large_value = 5400.times.map { "hello" }.join(" ") store [{name: "Product A", text: large_value}], Region assert_search "product", ["Product A"], {}, Region assert_search "hello", ["Product A"], {fields: [:name, :text]}, Region assert_search "hello", ["Product A"], {}, Region # keyword not indexed assert_search "*", [], {where: {text: large_value}}, Region end def test_bulk_import_raises_error valid_dog = Product.create(name: "2016-01-02") invalid_dog = Product.create(name: "Ol' One-Leg") mapping = { properties: { name: {type: "date"} } } index = Searchkick::Index.new "dogs", mappings: mapping, _type: "dog" index.delete if index.exists? index.create_index index.store valid_dog assert_raises(Searchkick::ImportError) do index.bulk_index [valid_dog, invalid_dog] end end end ================================================ FILE: test/inheritance_test.rb ================================================ require_relative "test_helper" class InheritanceTest < Minitest::Test def setup super setup_animal end def test_child_reindex store_names ["Max"], Cat assert Dog.reindex assert_equal 1, Animal.search("*").size end def test_child_index_name assert_equal "animals_test#{ENV["TEST_ENV_NUMBER"]}", Dog.searchkick_index.name end def test_child_search store_names ["Bear"], Dog store_names ["Bear"], Cat assert_equal 1, Dog.search("bear").size end def test_parent_search store_names ["Bear"], Dog store_names ["Bear"], Cat assert_equal 2, Animal.search("bear").size end def test_force_one_type store_names ["Green Bear"], Dog store_names ["Blue Bear"], Cat assert_equal ["Blue Bear"], Animal.search("bear", type: [Cat]).map(&:name) end def test_force_multiple_types store_names ["Green Bear"], Dog store_names ["Blue Bear"], Cat store_names ["Red Bear"], Animal assert_equal ["Green Bear", "Blue Bear"], Animal.search("bear", type: [Dog, Cat]).map(&:name) end def test_child_autocomplete store_names ["Max"], Cat store_names ["Mark"], Dog assert_equal ["Max"], Cat.search("ma", fields: [:name], match: :text_start).map(&:name) end def test_parent_autocomplete store_names ["Max"], Cat store_names ["Bear"], Dog assert_equal ["Bear"], Animal.search("bea", fields: [:name], match: :text_start).map(&:name).sort end # def test_child_suggest # store_names ["Shark"], Cat # store_names ["Sharp"], Dog # assert_equal ["shark"], Cat.search("shar", fields: [:name], suggest: true).suggestions # end def test_parent_suggest store_names ["Shark"], Cat store_names ["Tiger"], Dog assert_equal ["tiger"], Animal.search("tige", fields: [:name], suggest: true).suggestions.sort end def test_reindex store_names ["Bear A"], Cat store_names ["Bear B"], Dog Animal.reindex assert_equal 2, Animal.search("bear").size end def test_child_models_option store_names ["Bear A"], Cat store_names ["Bear B"], Dog Animal.reindex # note: the models option is less efficient than Animal.search("bear", type: [Cat, Dog]) # since it requires two database calls instead of one to Animal assert_equal 2, Searchkick.search("bear", models: [Cat, Dog]).size end def test_missing_records store_names ["Bear A"], Cat store_names ["Bear B"], Dog Animal.reindex record = Animal.find_by(name: "Bear A") record.delete assert_output nil, /\[searchkick\] WARNING: Records in search index do not exist in database: Cat\/Dog \d+/ do result = Searchkick.search("bear", models: [Cat, Dog]) assert_equal ["Bear B"], result.map(&:name) assert_equal [record.id.to_s], result.missing_records.map { |v| v[:id] } assert_equal [[Cat, Dog]], result.missing_records.map { |v| v[:model].sort_by(&:model_name) } end assert_empty Product.search("bear", load: false).missing_records ensure Animal.reindex end def test_inherited_and_non_inherited_models store_names ["Bear A"], Cat store_names ["Bear B"], Dog store_names ["Bear C"] Animal.reindex assert_equal 2, Searchkick.search("bear", models: [Cat, Product]).size assert_equal 2, Searchkick.search("bear", models: [Cat, Product]).hits.size assert_equal 2, Searchkick.search("bear", models: [Cat, Product], per_page: 1).total_pages end # TODO move somewhere better def test_multiple_indices store_names ["Product A"] store_names ["Product B"], Animal assert_search "product", ["Product A", "Product B"], {models: [Product, Animal], conversions: false}, Searchkick assert_search "product", ["Product A", "Product B"], {index_name: [Product, Animal], conversions: false}, Searchkick end def test_index_name_model store_names ["Product A"] assert_equal ["Product A"], Searchkick.search("product", index_name: [Product]).map(&:name) end def test_index_name_string store_names ["Product A"] error = assert_raises Searchkick::Error do Searchkick.search("product", index_name: [Product.searchkick_index.name]).map(&:name) end assert_includes error.message, "Unknown model" end def test_similar store_names ["Dog", "Other dog"], Dog store_names ["Not dog"], Cat dog = Dog.find_by!(name: "Dog") assert_equal ["Other dog"], dog.similar(fields: [:name]).map(&:name) assert_equal ["Not dog", "Other dog"], dog.similar(fields: [:name], models: [Animal]).map(&:name).sort assert_equal ["Not dog"], dog.similar(fields: [:name], models: [Cat]).map(&:name).sort end end ================================================ FILE: test/knn_test.rb ================================================ require_relative "test_helper" class KnnTest < Minitest::Test def setup skip unless Searchkick.knn_support? super # prevent null_pointer_exception with OpenSearch 3 Product.reindex if Searchkick.opensearch? && !Searchkick.server_below?("3.0.0") end def test_basic store [{name: "A", embedding: [1, 2, 3]}, {name: "B", embedding: [-1, -2, -3]}, {name: "C"}] assert_order "*", ["A", "B"], knn: {field: :embedding, vector: [1, 2, 3]} scores = Product.search(knn: {field: :embedding, vector: [1, 2, 3]}).hits.map { |v| v["_score"] } assert_in_delta 1, scores[0] assert_in_delta 0, scores[1] end def test_basic_exact store [{name: "A", embedding: [1, 2, 3]}, {name: "B", embedding: [-1, -2, -3]}, {name: "C"}] assert_order "*", ["A", "B"], knn: {field: :embedding, vector: [1, 2, 3], exact: true} scores = Product.search(knn: {field: :embedding, vector: [1, 2, 3], exact: true}).hits.map { |v| v["_score"] } assert_in_delta 1, scores[0] assert_in_delta 0, scores[1] end def test_where store [ {name: "A", store_id: 1, embedding: [1, 2, 3]}, {name: "B", store_id: 2, embedding: [1, 2, 3]}, {name: "C", store_id: 1, embedding: [-1, -2, -3]}, {name: "D", store_id: 1} ] assert_order "*", ["A", "C"], knn: {field: :embedding, vector: [1, 2, 3]}, where: {store_id: 1} end def test_where_exact store [ {name: "A", store_id: 1, embedding: [1, 2, 3]}, {name: "B", store_id: 2, embedding: [1, 2, 3]}, {name: "C", store_id: 1, embedding: [-1, -2, -3]}, {name: "D", store_id: 1} ] assert_order "*", ["A", "C"], knn: {field: :embedding, vector: [1, 2, 3], exact: true}, where: {store_id: 1} end def test_pagination store [ {name: "A", embedding: [1, 2, 3]}, {name: "B", embedding: [1, 2, 0]}, {name: "C", embedding: [-1, -2, 0]}, {name: "D", embedding: [-1, -2, -3]}, {name: "E"} ] assert_order "*", ["B", "C"], knn: {field: :embedding, vector: [1, 2, 3]}, limit: 2, offset: 1 end def test_pagination_exact store [ {name: "A", embedding: [1, 2, 3]}, {name: "B", embedding: [1, 2, 0]}, {name: "C", embedding: [-1, -2, 0]}, {name: "D", embedding: [-1, -2, -3]}, {name: "E"} ] assert_order "*", ["B", "C"], knn: {field: :embedding, vector: [1, 2, 3], exact: true}, limit: 2, offset: 1 end def test_euclidean store [{name: "A", embedding3: [1, 2, 3]}, {name: "B", embedding3: [1, 5, 7]}, {name: "C"}] assert_order "*", ["A", "B"], knn: {field: :embedding3, vector: [1, 2, 3]} scores = Product.search(knn: {field: :embedding3, vector: [1, 2, 3]}).hits.map { |v| v["_score"] } assert_in_delta 1.0 / (1 + 0), scores[0] assert_in_delta 1.0 / (1 + 5**2), scores[1] end def test_euclidean_exact store [{name: "A", embedding2: [1, 2, 3]}, {name: "B", embedding2: [1, 5, 7]}, {name: "C"}] assert_order "*", ["A", "B"], knn: {field: :embedding2, vector: [1, 2, 3], distance: "euclidean"} scores = Product.search(knn: {field: :embedding2, vector: [1, 2, 3], distance: "euclidean"}).hits.map { |v| v["_score"] } assert_in_delta 1.0 / (1 + 0), scores[0] assert_in_delta 1.0 / (1 + 5**2), scores[1] end def test_taxicab_exact store [{name: "A", embedding2: [1, 2, 3]}, {name: "B", embedding2: [1, 5, 7]}, {name: "C"}] assert_order "*", ["A", "B"], knn: {field: :embedding2, vector: [1, 2, 3], distance: "taxicab"} scores = Product.search(knn: {field: :embedding2, vector: [1, 2, 3], distance: "taxicab"}).hits.map { |v| v["_score"] } assert_in_delta 1.0 / (1 + 0), scores[0] assert_in_delta 1.0 / (1 + 7), scores[1] end def test_chebyshev_exact skip unless Searchkick.opensearch? store [{name: "A", embedding: [1, 2, 3]}, {name: "B", embedding: [1, 5, 7]}, {name: "C"}] assert_order "*", ["A", "B"], knn: {field: :embedding, vector: [1, 2, 3], distance: "chebyshev"} scores = Product.search(knn: {field: :embedding, vector: [1, 2, 3], distance: "chebyshev"}).hits.map { |v| v["_score"] } assert_in_delta 1.0 / (1 + 0), scores[0] assert_in_delta 1.0 / (1 + 4), scores[1] end def test_inner_product store [{name: "A", embedding2: [-1, -2, -3]}, {name: "B", embedding2: [1, 5, 7]}, {name: "C"}] assert_order "*", ["B", "A"], knn: {field: :embedding2, vector: [1, 2, 3], distance: "inner_product"} scores = Product.search(knn: {field: :embedding2, vector: [1, 2, 3], distance: "inner_product"}).hits.map { |v| v["_score"] } # d > 0: d + 1 # else: 1 / (1 - d) assert_in_delta 1 + 32, scores[0], (!Searchkick.opensearch? ? 0.5 : 0.001) assert_in_delta 1.0 / (1 + 14), scores[1] end def test_inner_product_exact store [{name: "A", embedding3: [-1, -2, -3]}, {name: "B", embedding3: [1, 5, 7]}, {name: "C"}] assert_order "*", ["B", "A"], knn: {field: :embedding3, vector: [1, 2, 3], distance: "inner_product"} scores = Product.search(knn: {field: :embedding3, vector: [1, 2, 3], distance: "inner_product"}).hits.map { |v| v["_score"] } assert_in_delta 1 + 32, scores[0] assert_in_delta 1.0 / (1 + 14), scores[1] end def test_unindexed skip if Searchkick.opensearch? store [{name: "A", embedding4: [1, 2, 3]}, {name: "B", embedding4: [-1, -2, -3]}, {name: "C"}] assert_order "*", ["A", "B"], knn: {field: :embedding4, vector: [1, 2, 3], distance: "cosine"} scores = Product.search(knn: {field: :embedding4, vector: [1, 2, 3], distance: "cosine"}).hits.map { |v| v["_score"] } assert_in_delta 1, scores[0] assert_in_delta 0, scores[1] error = assert_raises(ArgumentError) do Product.search(knn: {field: :embedding4, vector: [1, 2, 3]}) end assert_match "distance required", error.message error = assert_raises(ArgumentError) do Product.search(knn: {field: :embedding4, vector: [1, 2, 3], exact: false}) end assert_match "distance required", error.message error = assert_raises(ArgumentError) do Product.search(knn: {field: :embedding, vector: [1, 2, 3], distance: "euclidean", exact: false}) end assert_equal "distance must match searchkick options for approximate search", error.message if !Searchkick.server_below?("9.0.0") error = assert_raises(ArgumentError) do Product.search(knn: {field: :embedding, vector: [1, 2, 3], distance: "euclidean"}) end assert_equal "distance must match searchkick options", error.message end end def test_explain store [{name: "A", embedding: [1, 2, 3], embedding2: [1, 2, 3], embedding3: [1, 2, 3], embedding4: [1, 2, 3]}] assert_approx true, :embedding, "cosine" if Searchkick.opensearch? || Searchkick.server_below?("9.0.0") assert_approx false, :embedding, "euclidean" assert_approx false, :embedding, "inner_product" assert_approx false, :embedding, "taxicab" end if Searchkick.opensearch? assert_approx false, :embedding, "chebyshev" end assert_approx false, :embedding3, "cosine" assert_approx true, :embedding3, "euclidean" assert_approx false, :embedding3, "inner_product" unless Searchkick.opensearch? assert_approx false, :embedding4, "cosine" assert_approx false, :embedding4, "euclidean" assert_approx false, :embedding4, "inner_product" end assert_approx false, :embedding2, "cosine" assert_approx false, :embedding2, "euclidean" assert_approx true, :embedding2, "inner_product" assert_approx false, :embedding, "cosine", exact: true assert_approx true, :embedding, "cosine", exact: false error = assert_raises(ArgumentError) do assert_approx true, :embedding, "euclidean", exact: false end assert_equal "distance must match searchkick options for approximate search", error.message end def test_ef_search skip if Searchkick.opensearch? && Searchkick.server_below?("2.16.0") store [{name: "A", embedding: [1, 2, 3]}, {name: "B", embedding: [-1, -2, -3]}, {name: "C"}] assert_order "*", ["A", "B"], knn: {field: :embedding, vector: [1, 2, 3], ef_search: 20}, limit: 10 end private def assert_approx(approx, field, distance, **knn_options) response = Product.search(knn: {field: field, vector: [1, 2, 3], distance: distance, **knn_options}, explain: true).response.to_s if approx if Searchkick.opensearch? assert_match "within top", response else assert_match "within top k documents", response end else if Searchkick.opensearch? assert_match "knn_score", response else assert_match "params.query_vector", response end end end end ================================================ FILE: test/language_test.rb ================================================ require_relative "test_helper" class LanguageTest < Minitest::Test def setup skip "Requires plugin" unless ci? || ENV["TEST_LANGUAGE"] Song.destroy_all end def test_chinese skip if ci? # requires https://github.com/medcl/elasticsearch-analysis-ik with_options({language: "chinese"}) do store_names ["中华人民共和国国歌"] assert_language_search "中华人民共和国", ["中华人民共和国国歌"] assert_language_search "国歌", ["中华人民共和国国歌"] assert_language_search "人", [] end end def test_chinese2 # requires https://www.elastic.co/guide/en/elasticsearch/plugins/7.4/analysis-smartcn.html with_options({language: "chinese2"}) do store_names ["中华人民共和国国歌"] assert_language_search "中华人民共和国", ["中华人民共和国国歌"] # assert_language_search "国歌", ["中华人民共和国国歌"] assert_language_search "人", [] end end def test_japanese # requires https://www.elastic.co/guide/en/elasticsearch/plugins/7.4/analysis-kuromoji.html with_options({language: "japanese"}) do store_names ["JR新宿駅の近くにビールを飲みに行こうか"] assert_language_search "飲む", ["JR新宿駅の近くにビールを飲みに行こうか"] assert_language_search "jr", ["JR新宿駅の近くにビールを飲みに行こうか"] assert_language_search "新", [] end end def test_japanese_search_synonyms # requires https://www.elastic.co/guide/en/elasticsearch/plugins/7.4/analysis-kuromoji.html with_options({language: "japanese", search_synonyms: [["飲む", "喰らう"]]}) do store_names ["JR新宿駅の近くにビールを飲みに行こうか"] assert_language_search "喰らう", ["JR新宿駅の近くにビールを飲みに行こうか"] assert_language_search "新", [] end end def test_korean skip if ci? # requires https://github.com/open-korean-text/elasticsearch-analysis-openkoreantext with_options({language: "korean"}) do store_names ["한국어를 처리하는 예시입니닼ㅋㅋ"] assert_language_search "처리", ["한국어를 처리하는 예시입니닼ㅋㅋ"] assert_language_search "한국어", ["한국어를 처리하는 예시입니닼ㅋㅋ"] assert_language_search "를", [] end end def test_korean2 skip if ci? # requires https://www.elastic.co/guide/en/elasticsearch/plugins/7.4/analysis-nori.html with_options({language: "korean2"}) do store_names ["한국어를 처리하는 예시입니닼ㅋㅋ"] assert_language_search "처리", ["한국어를 처리하는 예시입니닼ㅋㅋ"] assert_language_search "한국어", ["한국어를 처리하는 예시입니닼ㅋㅋ"] assert_language_search "를", [] end end def test_polish # requires https://www.elastic.co/guide/en/elasticsearch/plugins/7.4/analysis-stempel.html with_options({language: "polish"}) do store_names ["polski"] assert_language_search "polskimi", ["polski"] end end def test_ukrainian # requires https://www.elastic.co/guide/en/elasticsearch/plugins/7.4/analysis-ukrainian.html with_options({language: "ukrainian"}) do store_names ["ресторани"] assert_language_search "ресторан", ["ресторани"] end end def test_vietnamese skip if ci? # requires https://github.com/duydo/elasticsearch-analysis-vietnamese with_options({language: "vietnamese"}) do store_names ["công nghệ thông tin Việt Nam"] assert_language_search "công nghệ thông tin", ["công nghệ thông tin Việt Nam"] assert_language_search "công", [] end end def test_stemmer_hunspell skip if ci? with_options({stemmer: {type: "hunspell", locale: "en_US"}}) do store_names ["the foxes jumping quickly"] assert_language_search "fox", ["the foxes jumping quickly"] end end def test_stemmer_unknown_type error = assert_raises(ArgumentError) do with_options({stemmer: {type: "bad"}}) do end end assert_equal "Unknown stemmer: bad", error.message end def test_stemmer_language skip if ci? error = assert_raises(ArgumentError) do with_options({stemmer: {type: "hunspell", locale: "en_US"}, language: "english"}) do end end assert_equal "Can't pass both language and stemmer", error.message end def assert_language_search(term, expected) assert_search term, expected, {misspellings: false} end def default_model Song end end ================================================ FILE: test/load_test.rb ================================================ require_relative "test_helper" class LoadTest < Minitest::Test def test_default store_names ["Product A"] product = Product.search("product").first assert_kind_of Product, product if mongoid? assert_match "# "Product A"}), product.as_json(only: ["name"]) assert_equal ({"name" => "Product A"}), product.as_json(only: [:name]) refute product.as_json(except: ["name"]).key?("name") refute product.as_json(except: [:name]).key?("name") assert_empty product.as_json(only: ["missing"]) if mongoid? product.as_json(methods: [:missing]) else assert_raises(NoMethodError) do product.as_json(methods: [:missing]) end end end def test_false store_names ["Product A"] product = Product.search("product", load: false).first assert_kind_of Searchkick::HashWrapper, product assert_match "# "Product A"}), product.as_json(only: ["name"]) # same behavior as Hashie::Mash assert_empty product.as_json(only: [:name]) refute product.as_json(except: ["name"]).key?("name") # same behavior as Hashie::Mash assert product.as_json(except: [:name]).key?("name") assert_empty product.as_json(only: ["missing"]) # same behavior as Hashie::Mash product.as_json(methods: [:missing]) end def test_false_methods store_names ["Product A"] assert_equal "Product A", Product.search("product", load: false).first.name end def test_false_with_includes store_names ["Product A"] assert_kind_of Searchkick::HashWrapper, Product.search("product", load: false, includes: [:store]).first end def test_false_nested_object aisle = {"id" => 1, "name" => "Frozen"} store [{name: "Product A", aisle: aisle}] assert_equal aisle, Product.search("product", load: false).first.aisle.to_hash end end ================================================ FILE: test/log_subscriber_test.rb ================================================ require_relative "test_helper" class LogSubscriberTest < Minitest::Test def test_create output = capture_logs do Product.create!(name: "Product A") end assert_match "Product Store", output end def test_update product = Product.create!(name: "Product A") output = capture_logs do product.reindex(:search_name) end assert_match "Product Update", output end def test_destroy product = Product.create!(name: "Product A") output = capture_logs do product.destroy end assert_match "Product Remove", output end def test_bulk output = capture_logs do Searchkick.callbacks(:bulk) do Product.create!(name: "Product A") end end assert_match "Bulk", output refute_match "Product Store", output end def test_reindex create_products output = capture_logs do Product.reindex end assert_match "Product Import", output assert_match '"count":3', output end def test_reindex_relation products = create_products output = capture_logs do Product.where.not(id: products.last.id).reindex end assert_match "Product Import", output assert_match '"count":2', output end def test_search # prevent warnings Product.searchkick_index.refresh output = capture_logs do Product.search("product").to_a end assert_match "Product Search", output end def test_multi_search # prevent warnings Product.searchkick_index.refresh output = capture_logs do Searchkick.multi_search([Product.search("product")]) end assert_match "Multi Search", output end private def create_products Searchkick.callbacks(false) do 3.times.map do Product.create!(name: "Product A") end end end def capture_logs previous_logger = ActiveSupport::LogSubscriber.logger io = StringIO.new begin ActiveSupport::LogSubscriber.logger = ActiveSupport::Logger.new(io) yield io.rewind output = io.read previous_logger.debug(output) if previous_logger puts output if ENV["LOG_SUBSCRIBER"] output ensure ActiveSupport::LogSubscriber.logger = previous_logger end end end ================================================ FILE: test/marshal_test.rb ================================================ require_relative "test_helper" class MarshalTest < Minitest::Test def test_marshal store_names ["Product A"] assert Marshal.dump(Product.search("*").to_a) end def test_marshal_highlights store_names ["Product A"] assert Marshal.dump(Product.search("product", highlight: true, load: {dumpable: true}).to_a) end end ================================================ FILE: test/match_test.rb ================================================ require_relative "test_helper" class MatchTest < Minitest::Test # exact def test_match store_names ["Whole Milk", "Fat Free Milk", "Milk"] assert_search "milk", ["Milk", "Whole Milk", "Fat Free Milk"] end def test_case store_names ["Whole Milk", "Fat Free Milk", "Milk"] assert_search "MILK", ["Milk", "Whole Milk", "Fat Free Milk"] end def test_cheese_space_in_index store_names ["Pepper Jack Cheese Skewers"] assert_search "pepperjack cheese skewers", ["Pepper Jack Cheese Skewers"] end # def test_cheese_space_in_query # store_names ["Pepperjack Cheese Skewers"] # assert_search "pepper jack cheese skewers", ["Pepperjack Cheese Skewers"] # end def test_middle_token store_names ["Dish Washer Amazing Organic Soap"] assert_search "dish soap", ["Dish Washer Amazing Organic Soap"] end def test_middle_token_wine store_names ["Beringer Wine Founders Estate Chardonnay"] assert_search "beringer chardonnay", ["Beringer Wine Founders Estate Chardonnay"] end def test_percent store_names ["1% Milk", "Whole Milk"] assert_search "1%", ["1% Milk"] end # ascii def test_jalapenos store_names ["Jalapeño"] assert_search "jalapeno", ["Jalapeño"] end def test_swedish store_names ["ÅÄÖ"] assert_search "aao", ["ÅÄÖ"] end # stemming def test_stemming store_names ["Whole Milk", "Fat Free Milk", "Milk"] assert_search "milks", ["Milk", "Whole Milk", "Fat Free Milk"] assert_search "milks", ["Milk", "Whole Milk", "Fat Free Milk"], misspellings: false end def test_stemming_tokens assert_equal ["milk"], Product.searchkick_index.tokens("milks", analyzer: "searchkick_search") assert_equal ["milk"], Product.searchkick_index.tokens("milks", analyzer: "searchkick_search2") end # fuzzy def test_misspelling_sriracha store_names ["Sriracha"] assert_search "siracha", ["Sriracha"] end def test_misspelling_multiple store_names ["Greek Yogurt", "Green Onions"] assert_search "greed", ["Greek Yogurt", "Green Onions"] end def test_short_word store_names ["Finn"] assert_search "fin", ["Finn"] end def test_edit_distance_two store_names ["Bingo"] assert_search "bin", [] assert_search "bingooo", [] assert_search "mango", [] end def test_edit_distance_one store_names ["Bingo"] assert_search "bing", ["Bingo"] assert_search "bingoo", ["Bingo"] assert_search "ringo", ["Bingo"] end def test_edit_distance_long_word store_names ["thisisareallylongword"] assert_search "thisisareallylongwor", ["thisisareallylongword"] # missing letter assert_search "thisisareelylongword", [] # edit distance = 2 end def test_misspelling_tabasco store_names ["Tabasco"] assert_search "tobasco", ["Tabasco"] end def test_misspelling_zucchini store_names ["Zucchini"] assert_search "zuchini", ["Zucchini"] end def test_misspelling_ziploc store_names ["Ziploc"] assert_search "zip lock", ["Ziploc"] end def test_misspelling_zucchini_transposition store_names ["zucchini"] assert_search "zuccihni", ["zucchini"] # need to specify field # as transposition option isn't supported for multi_match queries # until Elasticsearch 6.1 assert_search "zuccihni", [], misspellings: {transpositions: false}, fields: [:name] end def test_misspelling_lasagna store_names ["lasagna"] assert_search "lasanga", ["lasagna"], misspellings: {transpositions: true} assert_search "lasgana", ["lasagna"], misspellings: {transpositions: true} assert_search "lasaang", [], misspellings: {transpositions: true} # triple transposition, shouldn't work assert_search "lsagana", [], misspellings: {transpositions: true} # triple transposition, shouldn't work end def test_misspelling_lasagna_pasta store_names ["lasagna pasta"] assert_search "lasanga", ["lasagna pasta"], misspellings: {transpositions: true} assert_search "lasanga pasta", ["lasagna pasta"], misspellings: {transpositions: true} assert_search "lasanga pasat", ["lasagna pasta"], misspellings: {transpositions: true} # both words misspelled with a transposition should still work end def test_misspellings_word_start store_names ["Sriracha"] assert_search "siracha", ["Sriracha"], fields: [{name: :word_start}] end # spaces def test_spaces_in_field store_names ["Red Bull"] assert_search "redbull", ["Red Bull"], misspellings: false end def test_spaces_in_query store_names ["Dishwasher"] assert_search "dish washer", ["Dishwasher"], misspellings: false end def test_spaces_three_words store_names ["Dish Washer Soap", "Dish Washer"] assert_search "dish washer soap", ["Dish Washer Soap"] end def test_spaces_stemming store_names ["Almond Milk"] assert_search "almondmilks", ["Almond Milk"] end # other def test_all store_names ["Product A", "Product B"] assert_search "*", ["Product A", "Product B"] end def test_no_arguments store_names [] assert_equal [], Product.search.to_a end def test_no_term store_names ["Product A"] assert_equal ["Product A"], Product.search(where: {name: "Product A"}).map(&:name) end def test_to_be_or_not_to_be store_names ["to be or not to be"] assert_search "to be", ["to be or not to be"] end def test_apostrophe store_names ["Ben and Jerry's"] assert_search "ben and jerrys", ["Ben and Jerry's"] end def test_apostrophe_search store_names ["Ben and Jerrys"] assert_search "ben and jerry's", ["Ben and Jerrys"] end def test_ampersand_index store_names ["Ben & Jerry's"] assert_search "ben and jerrys", ["Ben & Jerry's"] end def test_ampersand_search store_names ["Ben and Jerry's"] assert_search "ben & jerrys", ["Ben and Jerry's"] end def test_phrase store_names ["Fresh Honey", "Honey Fresh"] assert_search "fresh honey", ["Fresh Honey"], match: :phrase end def test_phrase_again store_names ["Social entrepreneurs don't have it easy raising capital"] assert_search "social entrepreneurs don't have it easy raising capital", ["Social entrepreneurs don't have it easy raising capital"], match: :phrase end def test_phrase_order store_names ["Wheat Bread", "Whole Wheat Bread"] assert_order "wheat bread", ["Wheat Bread", "Whole Wheat Bread"], match: :phrase, fields: [:name] end def test_dynamic_fields setup_speaker store_names ["Red Bull"], Speaker assert_search "redbull", ["Red Bull"], {fields: [:name]}, Speaker end def test_unsearchable skip store [ {name: "Unsearchable", description: "Almond"} ] assert_search "almond", [] end def test_unsearchable_where store [ {name: "Unsearchable", description: "Almond"} ] assert_search "*", ["Unsearchable"], where: {description: "Almond"} end def test_emoji store_names ["Banana"] assert_search "🍌", ["Banana"], emoji: true end def test_emoji_multiple store_names ["Ice Cream Cake"] assert_search "🍨🍰", ["Ice Cream Cake"], emoji: true assert_search "🍨🍰", ["Ice Cream Cake"], emoji: true, misspellings: false end # operator def test_operator store_names ["fresh", "honey"] assert_search "fresh honey", ["fresh", "honey"], {operator: "or"} assert_search "fresh honey", [], {operator: "and"} assert_search "fresh honey", ["fresh", "honey"], {operator: :or} assert_search "fresh honey", ["fresh", "honey"], {operator: :or, body_options: {track_total_hits: true}} assert_search "fresh honey", [], {operator: :or, fields: [:name], match: :phrase, body_options: {track_total_hits: true}} end def test_operator_scoring store_names ["Big Red Circle", "Big Green Circle", "Small Orange Circle"] assert_order "big red circle", ["Big Red Circle", "Big Green Circle", "Small Orange Circle"], operator: "or" end # fields def test_fields_operator store [ {name: "red", color: "red"}, {name: "blue", color: "blue"}, {name: "cyan", color: "blue green"}, {name: "magenta", color: "red blue"}, {name: "green", color: "green"} ] assert_search "red blue", ["red", "blue", "cyan", "magenta"], operator: "or", fields: ["color"] end def test_fields store [ {name: "red", color: "light blue"}, {name: "blue", color: "red fish"} ] assert_search "blue", ["red"], fields: ["color"] end def test_non_existent_field store_names ["Milk"] assert_search "milk", [], fields: ["not_here"] end def test_fields_both_match # have same score due to dismax store [ {name: "Blue A", color: "red"}, {name: "Blue B", color: "light blue"} ] assert_first "blue", "Blue B", fields: [:name, :color] end end ================================================ FILE: test/misspellings_test.rb ================================================ require_relative "test_helper" class MisspellingsTest < Minitest::Test def test_false store_names ["abc", "abd", "aee"] assert_search "abc", ["abc"], misspellings: false end def test_distance store_names ["abbb", "aabb"] assert_search "aaaa", ["aabb"], misspellings: {distance: 2} end def test_prefix_length store_names ["ap", "api", "apt", "any", "nap", "ah", "ahi"] assert_search "ap", ["ap", "api", "apt"], misspellings: {prefix_length: 2} assert_search "api", ["ap", "api", "apt"], misspellings: {prefix_length: 2} end def test_prefix_length_operator store_names ["ap", "api", "apt", "any", "nap", "ah", "aha"] assert_search "ap ah", ["ap", "ah", "api", "apt", "aha"], operator: "or", misspellings: {prefix_length: 2} assert_search "api ahi", ["ap", "api", "apt", "ah", "aha"], operator: "or", misspellings: {prefix_length: 2} end def test_fields_operator store [ {name: "red", color: "red"}, {name: "blue", color: "blue"}, {name: "cyan", color: "blue green"}, {name: "magenta", color: "red blue"}, {name: "green", color: "green"} ] assert_search "red blue", ["red", "blue", "cyan", "magenta"], operator: "or", fields: ["color"], misspellings: false end def test_below_unmet store_names ["abc", "abd", "aee"] assert_search "abc", ["abc", "abd"], misspellings: {below: 2} end def test_below_unmet_result store_names ["abc", "abd", "aee"] assert Product.search("abc", misspellings: {below: 2}).misspellings? end def test_below_met store_names ["abc", "abd", "aee"] assert_search "abc", ["abc"], misspellings: {below: 1} end def test_below_met_result store_names ["abc", "abd", "aee"] assert !Product.search("abc", misspellings: {below: 1}).misspellings? end def test_field_correct_spelling_still_works store [{name: "Sriracha", color: "blue"}] assert_misspellings "Sriracha", ["Sriracha"], {fields: [:name, :color]} assert_misspellings "blue", ["Sriracha"], {fields: [:name, :color]} end def test_field_enabled store [{name: "Sriracha", color: "blue"}] assert_misspellings "siracha", ["Sriracha"], {fields: [:name]} assert_misspellings "clue", ["Sriracha"], {fields: [:color]} end def test_field_disabled store [{name: "Sriracha", color: "blue"}] assert_misspellings "siracha", [], {fields: [:color]} assert_misspellings "clue", [], {fields: [:name]} end def test_field_with_transpositions store [{name: "Sriracha", color: "blue"}] assert_misspellings "lbue", [], {transpositions: false, fields: [:color]} end def test_field_with_edit_distance store [{name: "Sriracha", color: "blue"}] assert_misspellings "crue", ["Sriracha"], {edit_distance: 2, fields: [:color]} end def test_field_multiple store [ {name: "Greek Yogurt", color: "white"}, {name: "Green Onions", color: "yellow"} ] assert_misspellings "greed", ["Greek Yogurt", "Green Onions"], {fields: [:name, :color]} assert_misspellings "mellow", ["Green Onions"], {fields: [:name, :color]} end def test_field_requires_explicit_search_fields store_names ["Sriracha"] assert_raises(ArgumentError) do assert_search "siracha", ["Sriracha"], {misspellings: {fields: [:name]}} end end def test_field_word_start store_names ["Sriracha"] assert_search "siracha", ["Sriracha"], fields: [{name: :word_middle}], misspellings: {fields: [:name]} end private def assert_misspellings(term, expected, misspellings = {}, model = default_model) options = { fields: [:name, :color], misspellings: misspellings } assert_search(term, expected, options, model) end end ================================================ FILE: test/models/animal.rb ================================================ class Animal searchkick \ inheritance: true, text_start: [:name], suggest: [:name] end ================================================ FILE: test/models/artist.rb ================================================ class Artist searchkick unscope: true def should_index? should_index end end ================================================ FILE: test/models/band.rb ================================================ class Band searchkick end ================================================ FILE: test/models/product.rb ================================================ class Product searchkick \ synonyms: [ ["clorox", "bleach"], ["burger", "hamburger"], ["bandaid", "bandages"], ["UPPERCASE", "lowercase"], "lightbulb => led,lightbulb", "lightbulb => halogenlamp" ], suggest: [:name, :color], conversions_v1: [:conversions], conversions_v2: [:conversions_v2], locations: [:location, :multiple_locations], text_start: [:name], text_middle: [:name], text_end: [:name], word_start: [:name], word_middle: [:name], word_end: [:name], highlight: [:name], filterable: [:name, :color, :description], similarity: "BM25", match: ENV["MATCH"] ? ENV["MATCH"].to_sym : nil, knn: Searchkick.knn_support? ? { embedding: {dimensions: 3, distance: "cosine", m: 16, ef_construction: 100}, embedding2: {dimensions: 3, distance: "inner_product"}, embedding3: {dimensions: 3, distance: "euclidean"} }.merge(Searchkick.opensearch? ? {} : {embedding4: {dimensions: 3}}) : nil attr_accessor :conversions, :conversions_v2, :user_ids, :aisle, :details class << self attr_accessor :dynamic_data end def search_data return self.class.dynamic_data.call if self.class.dynamic_data serializable_hash.except("id", "_id").merge( conversions: conversions, conversions_v2: conversions_v2, user_ids: user_ids, location: {lat: latitude, lon: longitude}, multiple_locations: [{lat: latitude, lon: longitude}, {lat: 0, lon: 0}], aisle: aisle, details: details ) end def should_index? name != "DO NOT INDEX" end def search_name { name: name } end end ================================================ FILE: test/models/region.rb ================================================ class Region searchkick \ geo_shape: [:territory] attr_accessor :territory def search_data { name: name, text: text, territory: territory } end end ================================================ FILE: test/models/sku.rb ================================================ class Sku searchkick callbacks: :async end ================================================ FILE: test/models/song.rb ================================================ class Song searchkick def search_routing name end end ================================================ FILE: test/models/speaker.rb ================================================ class Speaker searchkick \ conversions_v1: ["conversions_a", "conversions_b"], search_synonyms: [ ["clorox", "bleach"], ["burger", "hamburger"], ["bandaids", "bandages"], ["UPPERCASE", "lowercase"], "led => led,lightbulb", "halogen lamp => lightbulb", ["United States of America", "USA"] ], word_start: [:name] attr_accessor :conversions_a, :conversions_b, :aisle def search_data serializable_hash.except("id", "_id").merge( conversions_a: conversions_a, conversions_b: conversions_b, aisle: aisle ) end end ================================================ FILE: test/models/store.rb ================================================ class Store mappings = { properties: { name: {type: "text"} } } searchkick \ routing: true, merge_mappings: true, mappings: mappings def search_document_id id end def search_routing name end end ================================================ FILE: test/multi_indices_test.rb ================================================ require_relative "test_helper" class MultiIndicesTest < Minitest::Test def setup super setup_speaker end def test_basic store_names ["Product A"] store_names ["Product B"], Speaker assert_search_multi "product", ["Product A", "Product B"] end def test_index_name store_names ["Product A"] assert_equal ["Product A"], Product.search("product", index_name: Product.searchkick_index.name).map(&:name) assert_equal ["Product A"], Product.search("product", index_name: Product).map(&:name) Speaker.searchkick_index.refresh assert_equal [], Product.search("product", index_name: Speaker.searchkick_index.name, conversions: false).map(&:name) end def test_models_and_index_name store_names ["Product A"] store_names ["Product B"], Speaker assert_equal ["Product A"], Searchkick.search("product", models: [Product, Store], index_name: Product.searchkick_index.name).map(&:name) error = assert_raises(Searchkick::Error) do Searchkick.search("product", models: [Product, Store], index_name: Speaker.searchkick_index.name).map(&:name) end assert_includes error.message, "Unknown model" # legacy assert_equal ["Product A"], Searchkick.search("product", index_name: [Product, Store]).map(&:name) end def test_model_with_another_model error = assert_raises(ArgumentError) do Product.search(models: [Store]) end assert_includes error.message, "Use Searchkick.search" end def test_model_with_another_model_in_index_name error = assert_raises(ArgumentError) do # legacy protection Product.search(index_name: [Store, "another"]) end assert_includes error.message, "Use Searchkick.search" end def test_no_models_or_index_name store_names ["Product A"] error = assert_raises(Searchkick::Error) do Searchkick.search("product").to_a end assert_includes error.message, "Unknown model" end def test_no_models_or_index_name_load_false store_names ["Product A"] Searchkick.search("product", load: false).to_a end private def assert_search_multi(term, expected, options = {}) options[:models] = [Product, Speaker] options[:fields] = [:name] assert_search(term, expected, options, Searchkick) end end ================================================ FILE: test/multi_search_test.rb ================================================ require_relative "test_helper" class MultiSearchTest < Minitest::Test def test_basic store_names ["Product A"] store_names ["Store A"], Store products = Product.search("*") stores = Store.search("*") Searchkick.multi_search([products, stores]) assert_equal ["Product A"], products.map(&:name) assert_equal ["Store A"], stores.map(&:name) end def test_methods result = Product.search("*") query = Product.search("*") assert_empty(result.methods - query.methods) end def test_error store_names ["Product A"] products = Product.search("*") stores = Store.search("*", order: [:bad_field]) Searchkick.multi_search([products, stores]) assert !products.error assert stores.error end def test_misspellings_below_unmet store_names ["abc", "abd", "aee"] products = Product.search("abc", misspellings: {below: 5}) Searchkick.multi_search([products]) assert_equal ["abc", "abd"], products.map(&:name) end def test_misspellings_below_error products = Product.search("abc", order: [:bad_field], misspellings: {below: 1}) Searchkick.multi_search([products]) assert products.error end def test_query_error products = Product.search("*", order: {bad_field: :asc}) Searchkick.multi_search([products]) assert products.error error = assert_raises(Searchkick::Error) { products.to_a } assert_equal error.message, "Query error - use the error method to view it" end end ================================================ FILE: test/multi_tenancy_test.rb ================================================ require_relative "test_helper" class MultiTenancyTest < Minitest::Test def setup skip unless defined?(Apartment) end def test_basic Apartment::Tenant.switch!("tenant1") store_names ["Product A"] Apartment::Tenant.switch!("tenant2") store_names ["Product B"] Apartment::Tenant.switch!("tenant1") assert_search "product", ["Product A"], {load: false} Apartment::Tenant.switch!("tenant2") assert_search "product", ["Product B"], {load: false} end def teardown Apartment::Tenant.reset if defined?(Apartment) end def default_model Tenant end end ================================================ FILE: test/notifications_test.rb ================================================ require_relative "test_helper" class NotificationsTest < Minitest::Test def test_search Product.searchkick_index.refresh notifications = capture_notifications do Product.search("product").to_a end assert_equal 1, notifications.size assert_equal "search.searchkick", notifications.last[:name] end private def capture_notifications notifications = [] callback = lambda do |name, started, finished, unique_id, payload| notifications << {name: name, payload: payload} end ActiveSupport::Notifications.subscribed(callback, /searchkick/) do yield end notifications end end ================================================ FILE: test/order_test.rb ================================================ require_relative "test_helper" class OrderTest < Minitest::Test def test_hash store_names ["Product A", "Product B", "Product C", "Product D"] assert_order "product", ["Product D", "Product C", "Product B", "Product A"], order: {name: :desc} assert_order_relation ["Product D", "Product C", "Product B", "Product A"], Product.search("product").order(name: :desc) end def test_string store_names ["Product A", "Product B", "Product C", "Product D"] assert_order "product", ["Product A", "Product B", "Product C", "Product D"], order: "name" assert_order_relation ["Product A", "Product B", "Product C", "Product D"], Product.search("product").order("name") end def test_multiple store [ {name: "Product A", color: "blue", store_id: 1}, {name: "Product B", color: "red", store_id: 3}, {name: "Product C", color: "red", store_id: 2} ] assert_order "product", ["Product A", "Product B", "Product C"], order: {color: :asc, store_id: :desc} assert_order_relation ["Product A", "Product B", "Product C"], Product.search("product").order(color: :asc, store_id: :desc) assert_order_relation ["Product A", "Product B", "Product C"], Product.search("product").order(:color, store_id: :desc) assert_order_relation ["Product A", "Product B", "Product C"], Product.search("product").order(color: :asc).order(store_id: :desc) assert_order_relation ["Product B", "Product C", "Product A"], Product.search("product").order(color: :asc).reorder(store_id: :desc) end def test_unmapped_type Product.searchkick_index.refresh assert_order "product", [], order: {not_mapped: {unmapped_type: "long"}} assert_order_relation [], Product.search("product").order(not_mapped: {unmapped_type: "long"}) end def test_array store [{name: "San Francisco", latitude: 37.7833, longitude: -122.4167}] assert_order "francisco", ["San Francisco"], order: [{_geo_distance: {location: "0,0"}}] assert_order_relation ["San Francisco"], Product.search("francisco").order([{_geo_distance: {location: "0,0"}}]) end def test_script store_names ["Red", "Green", "Blue"] order = {_script: {type: "number", script: {source: "doc['name'].value.length() * -1"}}} assert_order "*", ["Green", "Blue", "Red"], order: order assert_order_relation ["Green", "Blue", "Red"], Product.search("*").order(order) end end ================================================ FILE: test/pagination_test.rb ================================================ require_relative "test_helper" class PaginationTest < Minitest::Test def test_limit store_names ["Product A", "Product B", "Product C", "Product D"] assert_order "product", ["Product A", "Product B"], order: {name: :asc}, limit: 2 assert_order_relation ["Product A", "Product B"], Product.search("product").order(name: :asc).limit(2) end def test_no_limit names = 20.times.map { |i| "Product #{i}" } store_names names assert_search "product", names end def test_offset store_names ["Product A", "Product B", "Product C", "Product D"] assert_order "product", ["Product C", "Product D"], order: {name: :asc}, offset: 2, limit: 100 assert_order_relation ["Product C", "Product D"], Product.search("product").order(name: :asc).offset(2).limit(100) end def test_pagination store_names ["Product A", "Product B", "Product C", "Product D", "Product E", "Product F"] products = Product.search("product", order: {name: :asc}, page: 2, per_page: 2, padding: 1) assert_equal ["Product D", "Product E"], products.map(&:name) assert_equal "product", products.entry_name assert_equal 2, products.current_page assert_equal 1, products.padding assert_equal 2, products.per_page assert_equal 2, products.size assert_equal 2, products.length assert_equal 3, products.total_pages assert_equal 6, products.total_count assert_equal 6, products.total_entries assert_equal 2, products.limit_value assert_equal 3, products.offset_value assert_equal 3, products.offset assert_equal 3, products.next_page assert_equal 1, products.previous_page assert_equal 1, products.prev_page assert !products.first_page? assert !products.last_page? assert !products.empty? assert !products.out_of_range? assert products.any? end def test_relation store_names ["Product A", "Product B", "Product C", "Product D", "Product E", "Product F"] products = Product.search("product", padding: 1).order(name: :asc).page(2).per_page(2) assert_equal ["Product D", "Product E"], products.map(&:name) assert_equal "product", products.entry_name assert_equal 2, products.current_page assert_equal 1, products.padding assert_equal 2, products.per_page assert_equal 2, products.size assert_equal 2, products.length assert_equal 3, products.total_pages assert_equal 6, products.total_count assert_equal 6, products.total_entries assert_equal 2, products.limit_value assert_equal 3, products.offset_value assert_equal 3, products.offset assert_equal 3, products.next_page assert_equal 1, products.previous_page assert_equal 1, products.prev_page assert !products.first_page? assert !products.last_page? assert !products.empty? assert !products.out_of_range? assert products.any? end def test_per store_names ["Product A", "Product B", "Product C"] assert_order_relation ["Product A", "Product B"], Product.search("product").order(name: :asc).per(2) end def test_body store_names ["Product A", "Product B", "Product C", "Product D", "Product E", "Product F"] products = Product.search("product", body: {query: {match_all: {}}, sort: [{name: "asc"}]}, page: 2, per_page: 2, padding: 1) assert_equal ["Product D", "Product E"], products.map(&:name) assert_equal "product", products.entry_name assert_equal 2, products.current_page assert_equal 1, products.padding assert_equal 2, products.per_page assert_equal 2, products.size assert_equal 2, products.length assert_equal 3, products.total_pages assert_equal 6, products.total_count assert_equal 6, products.total_entries assert_equal 2, products.limit_value assert_equal 3, products.offset_value assert_equal 3, products.offset assert_equal 3, products.next_page assert_equal 1, products.previous_page assert_equal 1, products.prev_page assert !products.first_page? assert !products.last_page? assert !products.empty? assert !products.out_of_range? assert products.any? end def test_nil_page store_names ["Product A", "Product B", "Product C", "Product D", "Product E"] products = Product.search("product", order: {name: :asc}, page: nil, per_page: 2) assert_equal ["Product A", "Product B"], products.map(&:name) assert_equal 1, products.current_page assert products.first_page? end def test_strings store_names ["Product A", "Product B", "Product C", "Product D", "Product E", "Product F"] products = Product.search("product", order: {name: :asc}, page: "2", per_page: "2", padding: "1") assert_equal ["Product D", "Product E"], products.map(&:name) products = Product.search("product", order: {name: :asc}, limit: "2", offset: "3") assert_equal ["Product D", "Product E"], products.map(&:name) end def test_total_entries products = Product.search("product", total_entries: 4) assert_equal 4, products.total_entries end def test_kaminari require "action_view" I18n.load_path = Dir["test/support/kaminari.yml"] I18n.backend.load_translations view = ActionView::Base.new(ActionView::LookupContext.new([]), [], nil) store_names ["Product A"] assert_equal "Displaying 1 product", view.page_entries_info(Product.search("product")) store_names ["Product B"] assert_equal "Displaying all 2 products", view.page_entries_info(Product.search("product")) store_names ["Product C"] assert_equal "Displaying products 1 - 2 of 3 in total", view.page_entries_info(Product.search("product").per_page(2)) end def test_deep_paging with_options({deep_paging: true}, Song) do assert_empty Song.search("*", offset: 10000, limit: 1).to_a end end def test_no_deep_paging Song.reindex error = assert_raises(Searchkick::InvalidQueryError) do Song.search("*", offset: 10000, limit: 1).to_a end assert_match "Result window is too large", error.message end def test_max_result_window Song.delete_all with_options({max_result_window: 10000}, Song) do relation = Song.search("*", offset: 10000, limit: 1) assert_empty relation.to_a assert_equal 1, relation.per_page assert_equal 0, relation.total_pages end end def test_search_after store_names ["Product A", "Product B", "Product C", "Product D"] # ensure different created_at store_names ["Product B"] options = {order: {name: :asc, created_at: :asc}, per_page: 2} products = Product.search("product", **options) assert_equal ["Product A", "Product B"], products.map(&:name) search_after = products.hits.last["sort"] products = Product.search("product", body_options: {search_after: search_after}, **options) assert_equal ["Product B", "Product C"], products.map(&:name) search_after = products.hits.last["sort"] products = Product.search("product", body_options: {search_after: search_after}, **options) assert_equal ["Product D"], products.map(&:name) end def test_pit skip unless pit_supported? store_names ["Product A", "Product B", "Product D", "Product E", "Product G"] pit_id = if Searchkick.opensearch? path = "#{CGI.escape(Product.searchkick_index.name)}/_search/point_in_time" Searchkick.client.transport.perform_request("POST", path, {keep_alive: "5s"}).body["pit_id"] else Searchkick.client.open_point_in_time(index: Product.searchkick_index.name, keep_alive: "5s")["id"] end store_names ["Product C", "Product F"] options = { order: {name: :asc}, per_page: 2, body_options: {pit: {id: pit_id}}, index_name: "" } products = Product.search("product", **options) assert_equal ["Product A", "Product B"], products.map(&:name) products = Product.search("product", page: 2, **options) assert_equal ["Product D", "Product E"], products.map(&:name) products = Product.search("product", page: 3, **options) assert_equal ["Product G"], products.map(&:name) products = Product.search("product", page: 4, **options) assert_empty products.map(&:name) if Searchkick.opensearch? Searchkick.client.transport.perform_request("DELETE", "_search/point_in_time", {}, {pit_id: pit_id}) else Searchkick.client.close_point_in_time(body: {id: pit_id}) end error = assert_raises do Product.search("product", **options).load end assert_match "No search context found for id", error.message end private def pit_supported? Searchkick.opensearch? ? !Searchkick.server_below?("2.4.0") : true end end ================================================ FILE: test/parameters_test.rb ================================================ require_relative "test_helper" class ParametersTest < Minitest::Test def setup require "action_controller" super end def test_options params = ActionController::Parameters.new({store_id: 1}) assert_raises(ActionController::UnfilteredParameters) do Product.search("*", **params) end end def test_where params = ActionController::Parameters.new({store_id: 1}) assert_raises(ActionController::UnfilteredParameters) do Product.search("*", where: params) end end def test_where_relation params = ActionController::Parameters.new({store_id: 1}) assert_raises(ActionController::UnfilteredParameters) do Product.search("*").where(params) end end def test_rewhere_relation params = ActionController::Parameters.new({store_id: 1}) assert_raises(ActionController::UnfilteredParameters) do Product.search("*").where(params) end end def test_where_permitted store [{name: "Product A", store_id: 1}, {name: "Product B", store_id: 2}] params = ActionController::Parameters.new({store_id: 1}) assert_search "product", ["Product A"], where: params.permit(:store_id) end def test_where_permitted_relation store [{name: "Product A", store_id: 1}, {name: "Product B", store_id: 2}] params = ActionController::Parameters.new({store_id: 1}) assert_search_relation ["Product A"], Product.search("product").where(params.permit(:store_id)) end def test_rewhere_permitted_relation store [{name: "Product A", store_id: 1}, {name: "Product B", store_id: 2}] params = ActionController::Parameters.new({store_id: 1}) assert_search_relation ["Product A"], Product.search("product").rewhere(params.permit(:store_id)) end def test_where_value store [{name: "Product A", store_id: 1}, {name: "Product B", store_id: 2}] params = ActionController::Parameters.new({store_id: 1}) assert_search "product", ["Product A"], where: {store_id: params[:store_id]} end def test_where_value_relation store [{name: "Product A", store_id: 1}, {name: "Product B", store_id: 2}] params = ActionController::Parameters.new({store_id: 1}) assert_search_relation ["Product A"], Product.search("product").where(store_id: params[:store_id]) end def test_rewhere_value_relation store [{name: "Product A", store_id: 1}, {name: "Product B", store_id: 2}] params = ActionController::Parameters.new({store_id: 1}) assert_search_relation ["Product A"], Product.search("product").where(store_id: params[:store_id]) end def test_where_hash params = ActionController::Parameters.new({store_id: {value: 10, boost: 2}}) error = assert_raises(TypeError) do assert_search "product", [], where: {store_id: params[:store_id]} end assert_equal error.message, "can't cast ActionController::Parameters" end # TODO raise error without to_a def test_where_hash_relation params = ActionController::Parameters.new({store_id: {value: 10, boost: 2}}) error = assert_raises(TypeError) do Product.search("product").where(store_id: params[:store_id]).to_a end assert_equal error.message, "can't cast ActionController::Parameters" end # TODO raise error without to_a def test_rewhere_hash_relation params = ActionController::Parameters.new({store_id: {value: 10, boost: 2}}) error = assert_raises(TypeError) do Product.search("product").rewhere(store_id: params[:store_id]).to_a end assert_equal error.message, "can't cast ActionController::Parameters" end def test_aggs_where params = ActionController::Parameters.new({store_id: 1}) assert_raises(ActionController::UnfilteredParameters) do Product.search("*", aggs: {size: {where: params}}) end end def test_aggs_where_smart_aggs_false params = ActionController::Parameters.new({store_id: 1}) assert_raises(ActionController::UnfilteredParameters) do Product.search("*", aggs: {size: {where: params}}, smart_aggs: false) end end end ================================================ FILE: test/partial_match_test.rb ================================================ require_relative "test_helper" class PartialMatchTest < Minitest::Test def test_autocomplete store_names ["Hummus"] assert_search "hum", ["Hummus"], match: :text_start end def test_autocomplete_two_words store_names ["Organic Hummus"] assert_search "hum", [], match: :text_start end def test_autocomplete_fields store_names ["Hummus"] assert_search "hum", ["Hummus"], match: :text_start, fields: [:name] end def test_text_start store_names ["Where in the World is Carmen San Diego"] assert_search "where in the world is", ["Where in the World is Carmen San Diego"], fields: [{name: :text_start}] assert_search "in the world", [], fields: [{name: :text_start}] end def test_text_middle store_names ["Where in the World is Carmen San Diego"] assert_search "where in the world is", ["Where in the World is Carmen San Diego"], fields: [{name: :text_middle}] assert_search "n the wor", ["Where in the World is Carmen San Diego"], fields: [{name: :text_middle}] assert_search "men san diego", ["Where in the World is Carmen San Diego"], fields: [{name: :text_middle}] assert_search "world carmen", [], fields: [{name: :text_middle}] end def test_text_end store_names ["Where in the World is Carmen San Diego"] assert_search "men san diego", ["Where in the World is Carmen San Diego"], fields: [{name: :text_end}] assert_search "carmen san", [], fields: [{name: :text_end}] end def test_word_start store_names ["Where in the World is Carmen San Diego"] assert_search "car san wor", ["Where in the World is Carmen San Diego"], fields: [{name: :word_start}] end def test_word_middle store_names ["Where in the World is Carmen San Diego"] assert_search "orl", ["Where in the World is Carmen San Diego"], fields: [{name: :word_middle}] end def test_word_end store_names ["Where in the World is Carmen San Diego"] assert_search "rld men ego", ["Where in the World is Carmen San Diego"], fields: [{name: :word_end}] end def test_word_start_multiple_words store_names ["Dark Grey", "Dark Blue"] assert_search "dark grey", ["Dark Grey"], fields: [{name: :word_start}] end def test_word_start_exact store_names ["Back Scratcher", "Backpack"] assert_order "back", ["Back Scratcher", "Backpack"], fields: [{name: :word_start}] end def test_word_start_exact_martin store_names ["Martina", "Martin"] assert_order "martin", ["Martin", "Martina"], fields: [{name: :word_start}] end # TODO find a better place def test_exact store_names ["hi@example.org"] assert_search "hi@example.org", ["hi@example.org"], fields: [{name: :exact}] end def test_exact_case store_names ["Hello"] assert_search "hello", [], fields: [{name: :exact}] assert_search "Hello", ["Hello"], fields: [{name: :exact}] end end ================================================ FILE: test/partial_reindex_test.rb ================================================ require_relative "test_helper" class PartialReindexTest < Minitest::Test def test_record_inline store [{name: "Hi", color: "Blue"}] product = Product.first Searchkick.callbacks(false) do product.update!(name: "Bye", color: "Red") end product.reindex(:search_name, refresh: true) # name updated, but not color assert_search "bye", ["Bye"], fields: [:name], load: false assert_search "blue", ["Bye"], fields: [:color], load: false end def test_record_async store [{name: "Hi", color: "Blue"}] product = Product.first Searchkick.callbacks(false) do product.update!(name: "Bye", color: "Red") end perform_enqueued_jobs do product.reindex(:search_name, mode: :async) end Product.searchkick_index.refresh # name updated, but not color assert_search "bye", ["Bye"], fields: [:name], load: false assert_search "blue", ["Bye"], fields: [:color], load: false end def test_record_queue product = Product.create!(name: "Hi") error = assert_raises(Searchkick::Error) do product.reindex(:search_name, mode: :queue) end assert_equal "Partial reindex not supported with queue option", error.message end def test_record_missing_inline store [{name: "Hi", color: "Blue"}] product = Product.first Product.searchkick_index.remove(product) error = assert_raises(Searchkick::ImportError) do product.reindex(:search_name) end assert_match "document missing", error.message end def test_record_ignore_missing_inline store [{name: "Hi", color: "Blue"}] product = Product.first Product.searchkick_index.remove(product) product.reindex(:search_name, ignore_missing: true) Searchkick.callbacks(:bulk) do product.reindex(:search_name, ignore_missing: true) end end def test_record_missing_async store [{name: "Hi", color: "Blue"}] product = Product.first Product.searchkick_index.remove(product) perform_enqueued_jobs do error = assert_raises(Searchkick::ImportError) do product.reindex(:search_name, mode: :async) end assert_match "document missing", error.message end end def test_record_ignore_missing_async store [{name: "Hi", color: "Blue"}] product = Product.first Product.searchkick_index.remove(product) perform_enqueued_jobs do product.reindex(:search_name, mode: :async, ignore_missing: true) end end def test_relation_inline store [{name: "Hi", color: "Blue"}] product = Product.first Searchkick.callbacks(false) do product.update!(name: "Bye", color: "Red") end Product.reindex(:search_name) # name updated, but not color assert_search "bye", ["Bye"], fields: [:name], load: false assert_search "blue", ["Bye"], fields: [:color], load: false # scope Product.reindex(:search_name, scope: :all) end def test_relation_async store [{name: "Hi", color: "Blue"}] product = Product.first Searchkick.callbacks(false) do product.update!(name: "Bye", color: "Red") end perform_enqueued_jobs do Product.reindex(:search_name, mode: :async) end # name updated, but not color assert_search "bye", ["Bye"], fields: [:name], load: false assert_search "blue", ["Bye"], fields: [:color], load: false end def test_relation_queue Product.create!(name: "Hi") error = assert_raises(Searchkick::Error) do Product.reindex(:search_name, mode: :queue) end assert_equal "Partial reindex not supported with queue option", error.message end def test_relation_missing_inline store [{name: "Hi", color: "Blue"}] product = Product.first Product.searchkick_index.remove(product) error = assert_raises(Searchkick::ImportError) do Product.reindex(:search_name) end assert_match "document missing", error.message end def test_relation_ignore_missing_inline store [{name: "Hi", color: "Blue"}] product = Product.first Product.searchkick_index.remove(product) Product.where(id: product.id).reindex(:search_name, ignore_missing: true) end def test_relation_missing_async store [{name: "Hi", color: "Blue"}] product = Product.first Product.searchkick_index.remove(product) perform_enqueued_jobs do error = assert_raises(Searchkick::ImportError) do Product.reindex(:search_name, mode: :async) end assert_match "document missing", error.message end end def test_relation_ignore_missing_async store [{name: "Hi", color: "Blue"}] product = Product.first Product.searchkick_index.remove(product) perform_enqueued_jobs do Product.where(id: product.id).reindex(:search_name, mode: :async, ignore_missing: true) end end end ================================================ FILE: test/query_test.rb ================================================ require_relative "test_helper" class QueryTest < Minitest::Test def test_basic store_names ["Milk", "Apple"] query = Product.search("milk", body: {query: {match_all: {}}}) assert_equal ["Apple", "Milk"], query.map(&:name).sort end def test_with_uneffective_min_score store_names ["Milk", "Milk2"] assert_search "milk", ["Milk", "Milk2"], body_options: {min_score: 0.0001} end def test_default_timeout assert_equal "6000ms", Product.search("*").body[:timeout] end def test_timeout_override assert_equal "1s", Product.search("*", body_options: {timeout: "1s"}).body[:timeout] end def test_request_params assert_equal "dfs_query_then_fetch", Product.search("*", request_params: {search_type: "dfs_query_then_fetch"}).params[:search_type] end def test_opaque_id store_names ["Milk"] set_search_slow_log(0) Product.search("*", opaque_id: "search").load Product.search("*").opaque_id("search_relation").load Product.search("*", scroll: "5s", opaque_id: "scroll").scroll { } Searchkick.multi_search([Product.search("*")], opaque_id: "multi_search") ensure set_search_slow_log(-1) end def test_debug store_names ["Milk"] out, _ = capture_io do assert_search "milk", ["Milk"], debug: true end refute_includes out, "Error" end def test_big_decimal store [ {name: "Product", latitude: 80.0} ] assert_search "product", ["Product"], where: {latitude: {gt: 79}} end # body_options def test_body_options_should_merge_into_body query = Product.search("*", body_options: {min_score: 1.0}) assert_equal 1.0, query.body[:min_score] end # nested def test_nested_search setup_speaker store [{name: "Product A", aisle: {"id" => 1, "name" => "Frozen"}}], Speaker assert_search "frozen", ["Product A"], {fields: ["aisle.name"]}, Speaker end # other tests def test_includes skip unless activerecord? store_names ["Product A"] assert Product.search("product", includes: [:store]).first.association(:store).loaded? assert Product.search("product").includes(:store).first.association(:store).loaded? end def test_model_includes skip unless activerecord? store_names ["Product A"] store_names ["Store A"], Store associations = {Product => [:store], Store => [:products]} result = Searchkick.search("*", models: [Product, Store], model_includes: associations) assert_equal 2, result.length result.group_by(&:class).each_pair do |model, records| assert records.first.association(associations[model].first).loaded? end end def test_scope_results skip unless activerecord? store_names ["Product A", "Product B"] assert_warns "Records in search index do not exist in database" do assert_search "product", ["Product A"], scope_results: ->(r) { r.where(name: "Product A") } end end def test_scope_results_relation skip unless activerecord? store_names ["Product A", "Product B"] assert_warns "Records in search index do not exist in database" do assert_search_relation ["Product A"], Product.search("product").scope_results(->(r) { r.where(name: "Product A") }) end end private def set_search_slow_log(value) settings = { "index.search.slowlog.threshold.query.warn" => value } Product.searchkick_index.update_settings(settings) end end ================================================ FILE: test/reindex_test.rb ================================================ require_relative "test_helper" class ReindexTest < Minitest::Test def test_record_inline store_names ["Product A", "Product B"], reindex: false product = Product.find_by!(name: "Product A") assert_equal true, product.reindex(refresh: true) assert_search "product", ["Product A"] end def test_record_destroyed store_names ["Product A", "Product B"] product = Product.find_by!(name: "Product A") product.destroy Product.searchkick_index.refresh assert_equal true, product.reindex end def test_record_async store_names ["Product A", "Product B"], reindex: false product = Product.find_by!(name: "Product A") perform_enqueued_jobs do assert_equal true, product.reindex(mode: :async) end Product.searchkick_index.refresh assert_search "product", ["Product A"] end def test_record_async_job_options product = Product.create!(name: "Product A") assert_enqueued_jobs(1, queue: "test") do assert_equal true, product.reindex(mode: :async, job_options: {queue: "test"}) end end def test_record_queue reindex_queue = Product.searchkick_index.reindex_queue reindex_queue.clear store_names ["Product A", "Product B"], reindex: false product = Product.find_by!(name: "Product A") assert_equal true, product.reindex(mode: :queue) Product.searchkick_index.refresh assert_search "product", [] perform_enqueued_jobs do Searchkick::ProcessQueueJob.perform_now(class_name: "Product") end Product.searchkick_index.refresh assert_search "product", ["Product A"] end def test_process_queue_job_options product = Product.create!(name: "Product A") product.reindex(mode: :queue) assert_enqueued_jobs(1, queue: "test") do Searchkick::ProcessQueueJob.perform_now(class_name: "Product", job_options: {queue: "test"}) end end def test_record_index store_names ["Product A", "Product B"], reindex: false product = Product.find_by!(name: "Product A") assert_equal true, Product.searchkick_index.reindex([product], refresh: true) assert_search "product", ["Product A"] end def test_relation_inline store_names ["Product A"] store_names ["Product B", "Product C"], reindex: false Product.where(name: "Product B").reindex(refresh: true) assert_search "product", ["Product A", "Product B"] end def test_relation_associations store_names ["Product A"] store = Store.create!(name: "Test") Product.create!(name: "Product B", store_id: store.id) assert_equal true, store.products.reindex(refresh: true) assert_search "product", ["Product A", "Product B"] end def test_relation_scoping store_names ["Product A", "Product B"] Product.dynamic_data = lambda do { name: "Count #{Product.count}" } end Product.where(name: "Product A").reindex(refresh: true) assert_search "count", ["Count 2"], load: false ensure Product.dynamic_data = nil end def test_relation_scoping_restored # TODO add test for Mongoid skip unless activerecord? assert_nil Product.current_scope Product.where(name: "Product A").scoping do scope = Product.current_scope refute_nil scope Product.all.reindex(refresh: true) # note: should be reset even if we don't do it assert_equal scope, Product.current_scope end assert_nil Product.current_scope end def test_relation_should_index store_names ["Product A", "Product B"] Searchkick.callbacks(false) do Product.find_by(name: "Product B").update!(name: "DO NOT INDEX") end assert_equal true, Product.where(name: "DO NOT INDEX").reindex Product.searchkick_index.refresh assert_search "product", ["Product A"] end def test_relation_async store_names ["Product A"] store_names ["Product B", "Product C"], reindex: false perform_enqueued_jobs do Product.where(name: "Product B").reindex(mode: :async) end Product.searchkick_index.refresh assert_search "product", ["Product A", "Product B"] end def test_relation_async_should_index store_names ["Product A", "Product B"] Searchkick.callbacks(false) do Product.find_by(name: "Product B").update!(name: "DO NOT INDEX") end perform_enqueued_jobs do assert_equal true, Product.where(name: "DO NOT INDEX").reindex(mode: :async) end Product.searchkick_index.refresh assert_search "product", ["Product A"] end def test_relation_async_routing store_names ["Store A"], Store, reindex: false perform_enqueued_jobs do Store.where(name: "Store A").reindex(mode: :async) end Store.searchkick_index.refresh assert_search "*", ["Store A"], {routing: "Store A"}, Store end def test_relation_async_job_options store_names ["Store A"], Store, reindex: false assert_enqueued_jobs(1, queue: "test") do Store.where(name: "Store A").reindex(mode: :async, job_options: {queue: "test"}) end end def test_relation_queue reindex_queue = Product.searchkick_index.reindex_queue reindex_queue.clear store_names ["Product A"] store_names ["Product B", "Product C"], reindex: false Product.where(name: "Product B").reindex(mode: :queue) Product.searchkick_index.refresh assert_search "product", ["Product A"] perform_enqueued_jobs do Searchkick::ProcessQueueJob.perform_now(class_name: "Product") end Product.searchkick_index.refresh assert_search "product", ["Product A", "Product B"] end def test_relation_queue_all reindex_queue = Product.searchkick_index.reindex_queue reindex_queue.clear store_names ["Product A"] store_names ["Product B", "Product C"], reindex: false Product.all.reindex(mode: :queue) Product.searchkick_index.refresh assert_search "product", ["Product A"] perform_enqueued_jobs do Searchkick::ProcessQueueJob.perform_now(class_name: "Product") end Product.searchkick_index.refresh assert_search "product", ["Product A", "Product B", "Product C"] end def test_relation_queue_routing reindex_queue = Store.searchkick_index.reindex_queue reindex_queue.clear store_names ["Store A"], Store, reindex: false Store.where(name: "Store A").reindex(mode: :queue) Store.searchkick_index.refresh assert_search "*", [], {}, Store perform_enqueued_jobs do Searchkick::ProcessQueueJob.perform_now(class_name: "Store") end Store.searchkick_index.refresh assert_search "*", ["Store A"], {routing: "Store A"}, Store end def test_relation_index store_names ["Product A"] store_names ["Product B", "Product C"], reindex: false Product.searchkick_index.reindex(Product.where(name: "Product B"), refresh: true) assert_search "product", ["Product A", "Product B"] end def test_full_async store_names ["Product A"], reindex: false reindex = nil perform_enqueued_jobs do reindex = Product.reindex(mode: :async) assert_search "product", [], conversions: false end index = Searchkick::Index.new(reindex[:index_name]) index.refresh assert_equal 1, index.total_docs reindex_status = Searchkick.reindex_status(reindex[:name]) assert_equal true, reindex_status[:completed] assert_equal 0, reindex_status[:batches_left] Product.searchkick_index.promote(reindex[:index_name]) assert_search "product", ["Product A"] end def test_full_async_should_index store_names ["Product A", "Product B", "DO NOT INDEX"], reindex: false reindex = nil perform_enqueued_jobs do reindex = Product.reindex(mode: :async) end index = Searchkick::Index.new(reindex[:index_name]) index.refresh assert_equal 2, index.total_docs index.delete end def test_full_async_wait store_names ["Product A"], reindex: false perform_enqueued_jobs do capture_io do Product.reindex(mode: :async, wait: true) end end assert_search "product", ["Product A"] end def test_full_async_job_options store_names ["Product A"], reindex: false assert_enqueued_jobs(1, queue: "test") do Product.reindex(mode: :async, job_options: {queue: "test"}) end end def test_full_async_non_integer_pk Sku.create(id: SecureRandom.hex, name: "Test") reindex = nil perform_enqueued_jobs do reindex = Sku.reindex(mode: :async) assert_search "sku", [], conversions: false end index = Searchkick::Index.new(reindex[:index_name]) index.refresh assert_equal 1, index.total_docs index.delete ensure Sku.destroy_all end def test_full_queue error = assert_raises(ArgumentError) do Product.reindex(mode: :queue) end assert_equal "Full reindex does not support :queue mode - use :async mode instead", error.message end def test_full_refresh_interval reindex = Product.reindex(refresh_interval: "30s", mode: :async, import: false) index = Searchkick::Index.new(reindex[:index_name]) assert_nil Product.searchkick_index.refresh_interval assert_equal "30s", index.refresh_interval Product.searchkick_index.promote(index.name, update_refresh_interval: true) assert_equal "1s", index.refresh_interval assert_equal "1s", Product.searchkick_index.refresh_interval end def test_full_resume Product.searchkick_index.clean_indices if mongoid? error = assert_raises(Searchkick::Error) do Product.reindex(resume: true) end assert_equal "Resume not supported for Mongoid", error.message else assert Product.reindex(resume: true) end end def test_full_refresh Product.reindex(refresh: true) end def test_full_partial_async store_names ["Product A"] Product.reindex(:search_name, mode: :async) assert_search "product", ["Product A"] end def test_wait_not_async error = assert_raises(ArgumentError) do Product.reindex(wait: false) end assert_equal "wait only available in :async mode", error.message end def test_object_index error = assert_raises(Searchkick::Error) do Product.searchkick_index.reindex(Object.new) end assert_equal "Cannot reindex object", error.message end def test_transaction skip unless activerecord? Product.transaction do store_names ["Product A"] raise ActiveRecord::Rollback end assert_search "*", [] end def test_both_paths Product.searchkick_index.delete if Product.searchkick_index.exists? Product.reindex Product.reindex # run twice for both index paths end end ================================================ FILE: test/reindex_v2_job_test.rb ================================================ require_relative "test_helper" class ReindexV2JobTest < Minitest::Test def test_create product = Searchkick.callbacks(false) { Product.create!(name: "Boom") } Product.searchkick_index.refresh assert_search "*", [] Searchkick::ReindexV2Job.perform_now("Product", product.id.to_s) Product.searchkick_index.refresh assert_search "*", ["Boom"] end def test_destroy product = Searchkick.callbacks(false) { Product.create!(name: "Boom") } Product.reindex assert_search "*", ["Boom"] Searchkick.callbacks(false) { product.destroy } Searchkick::ReindexV2Job.perform_now("Product", product.id.to_s) Product.searchkick_index.refresh assert_search "*", [] end end ================================================ FILE: test/relation_test.rb ================================================ require_relative "test_helper" class RelationTest < Minitest::Test def test_loaded Product.searchkick_index.refresh products = Product.search("*") refute products.loaded? assert_equal 0, products.count assert products.loaded? refute products.clone.loaded? refute products.dup.loaded? refute products.limit(2).loaded? error = assert_raises(Searchkick::Error) do products.limit!(2) end assert_equal "Relation loaded", error.message end def test_mutating store_names ["Product A", "Product B"] products = Product.search("*").order(:name) products.limit!(1) assert_equal ["Product A"], products.map(&:name) end def test_non_mutating store_names ["Product A", "Product B"] products = Product.search("*").order(:name) products.limit(1) assert_equal ["Product A", "Product B"], products.map(&:name) end def test_load products = Product.search("*") refute products.loaded? assert products.load.loaded? assert products.load.load.loaded? end def test_clone products = Product.search("*") assert_equal 10, products.limit(10).limit_value assert_equal 10000, products.limit_value end def test_only assert_equal 10, Product.search("*").limit(10).only(:limit).limit_value end def test_except assert_equal 10000, Product.search("*").limit(10).except(:limit).limit_value end def test_first store_names ["Product A", "Product B"] products = Product.search("product") assert_kind_of Product, products.first assert_kind_of Array, products.first(1) assert_equal 1, products.limit(1).first(2).size end def test_first_loaded store_names ["Product A", "Product B"] products = Product.search("product").load assert_kind_of Product, products.first end # TODO call pluck or select on Active Record query # currently uses pluck from Active Support enumerable def test_pluck store_names ["Product A", "Product B"] assert_equal ["Product A", "Product B"], Product.search("product").pluck(:name).sort assert_equal ["Product A", "Product B"], Product.search("product").load(false).pluck(:name).sort end def test_model assert_equal Product, Product.search("product").model assert_nil Searchkick.search("product").model end def test_klass assert_equal Product, Product.search("product").klass assert_nil Searchkick.search("product").klass end def test_respond_to relation = Product.search("product") assert relation.respond_to?(:page) assert relation.respond_to?(:response) assert relation.respond_to?(:size) refute relation.respond_to?(:hello) refute relation.loaded? end def test_inspect store_names ["Product A"] assert_match "# :word_start}] end def test_multiple_models skip # flaky test store_names ["Great White Shark", "Hammerhead Shark", "Tiger Shark"] assert_equal "how big is a tiger shark", Searchkick.search("How Big is a Tigre Shar", suggest: [:name], fields: [:name]).suggestions.first end def test_multiple_models_no_fields store_names ["Great White Shark", "Hammerhead Shark", "Tiger Shark"] assert_raises(ArgumentError) { Searchkick.search("How Big is a Tigre Shar", suggest: true) } end def test_star assert_equal [], Product.search("*", suggest: true).suggestions end protected def assert_suggest(term, expected, options = {}) result = Product.search(term, suggest: true, **options).suggestions.first if expected.nil? assert_nil result else assert_equal expected, result end end # any order def assert_suggest_all(term, expected, options = {}) assert_equal expected.sort, Product.search(term, suggest: true, **options).suggestions.sort end end ================================================ FILE: test/support/activerecord.rb ================================================ require "active_record" # for debugging ActiveRecord::Base.logger = $logger # rails does this in activerecord/lib/active_record/railtie.rb ActiveRecord.default_timezone = :utc ActiveRecord::Base.time_zone_aware_attributes = true # migrations ActiveRecord::Base.establish_connection adapter: "sqlite3", database: ":memory:" require_relative "apartment" if defined?(Apartment) ActiveRecord::Migration.verbose = ENV["VERBOSE"] ActiveRecord::Schema.define do create_table :products do |t| t.string :name t.integer :store_id t.boolean :in_stock t.boolean :backordered t.integer :orders_count t.decimal :found_rate t.integer :price t.string :color t.decimal :latitude, precision: 10, scale: 7 t.decimal :longitude, precision: 10, scale: 7 t.text :description t.text :alt_description t.text :embedding t.text :embedding2 t.text :embedding3 t.text :embedding4 t.timestamps null: true end create_table :stores do |t| t.string :name end create_table :regions do |t| t.string :name t.text :text end create_table :speakers do |t| t.string :name end create_table :animals do |t| t.string :name t.string :type end create_table :skus, id: :uuid do |t| t.string :name end create_table :songs do |t| t.string :name end create_table :bands do |t| t.string :name t.boolean :active end create_table :artists do |t| t.string :name t.boolean :active t.boolean :should_index end end class Product < ActiveRecord::Base belongs_to :store serialize :embedding, coder: JSON serialize :embedding2, coder: JSON serialize :embedding3, coder: JSON serialize :embedding4, coder: JSON end class Store < ActiveRecord::Base has_many :products end class Region < ActiveRecord::Base end class Speaker < ActiveRecord::Base end class Animal < ActiveRecord::Base end class Dog < Animal end class Cat < Animal end class Sku < ActiveRecord::Base end class Song < ActiveRecord::Base end class Band < ActiveRecord::Base default_scope { where(active: true).order(:name) } end class Artist < ActiveRecord::Base default_scope { where(active: true).order(:name) } end ================================================ FILE: test/support/apartment.rb ================================================ module Rails def self.env ENV["RACK_ENV"] end end tenants = ["tenant1", "tenant2"] Apartment.configure do |config| config.tenant_names = tenants config.database_schema_file = false config.excluded_models = ["Product", "Store", "Region", "Speaker", "Animal", "Dog", "Cat", "Sku", "Song", "Band"] end class Tenant < ActiveRecord::Base searchkick index_prefix: -> { Apartment::Tenant.current } end tenants.each do |tenant| begin Apartment::Tenant.create(tenant) rescue Apartment::TenantExists # do nothing end Apartment::Tenant.switch!(tenant) ActiveRecord::Schema.define do create_table :tenants, force: true do |t| t.string :name t.timestamps null: true end end Tenant.reindex end Apartment::Tenant.reset ================================================ FILE: test/support/helpers.rb ================================================ class Minitest::Test include ActiveJob::TestHelper def setup [Product, Store].each do |model| setup_model(model) end end protected def setup_animal setup_model(Animal) end def setup_region setup_model(Region) end def setup_speaker setup_model(Speaker) end def setup_model(model) # reindex once ($setup_model ||= {})[model] ||= (model.reindex || true) # clear every time Searchkick.callbacks(:bulk) do model.destroy_all end end def store(documents, model = default_model, reindex: true) if reindex with_callbacks(:bulk) do with_transaction(model) do model.create!(documents.shuffle) end end model.searchkick_index.refresh else Searchkick.callbacks(false) do with_transaction(model) do model.create!(documents.shuffle) end end # prevent warnings model.searchkick_index.refresh end end def store_names(names, model = default_model, reindex: true) store names.map { |name| {name: name} }, model, reindex: reindex end # no order def assert_search(term, expected, options = {}, model = default_model) assert_equal expected.sort, model.search(term, **options).map(&:name).sort assert_equal expected.sort, build_relation(model, term, **options).map(&:name).sort end def assert_search_relation(expected, relation) assert_equal expected.sort, relation.map(&:name).sort end def assert_order(term, expected, options = {}, model = default_model) assert_equal expected, model.search(term, **options).map(&:name) assert_equal expected, build_relation(model, term, **options).map(&:name) end def assert_order_relation(expected, relation) assert_equal expected, relation.map(&:name) end def assert_equal_scores(term, options = {}, model = default_model) assert_equal 1, model.search(term, **options).hits.map { |a| a["_score"] }.uniq.size end def assert_first(term, expected, options = {}, model = default_model) assert_equal expected, model.search(term, **options).map(&:name).first end def assert_warns(message) _, stderr = capture_io do yield end assert_match "[searchkick] WARNING: #{message}", stderr end def build_relation(model, term, **options) relation = model.search(term) options.each do |k, v| relation = relation.public_send(k, v) end relation end def with_options(options, model = default_model) previous_options = model.searchkick_options.dup begin model.instance_variable_set(:@searchkick_index_name, nil) model.searchkick_options.merge!(options) model.reindex yield ensure model.instance_variable_set(:@searchkick_index_name, nil) model.searchkick_options.clear model.searchkick_options.merge!(previous_options) end end def with_callbacks(value, &block) if Searchkick.callbacks?(default: nil).nil? Searchkick.callbacks(value, &block) else yield end end def with_transaction(model, &block) if model.respond_to?(:transaction) && !mongoid? model.transaction(&block) else yield end end def activerecord? defined?(ActiveRecord) end def mongoid? defined?(Mongoid) end def default_model Product end def ci? ENV["CI"] end # for Active Job helpers def tagged_logger end end ================================================ FILE: test/support/kaminari.yml ================================================ en: views: pagination: first: "« First" last: "Last »" previous: "‹ Prev" next: "Next ›" truncate: "…" helpers: page_entries_info: entry: zero: "entries" one: "entry" other: "entries" one_page: display_entries: zero: "No %{entry_name} found" one: "Displaying 1 %{entry_name}" other: "Displaying all %{count} %{entry_name}" more_pages: display_entries: "Displaying %{entry_name} %{first} - %{last} of %{total} in total" ================================================ FILE: test/support/mongoid.rb ================================================ Mongoid.logger = $logger Mongo::Logger.logger = $logger if defined?(Mongo::Logger) Mongoid.configure do |config| config.connect_to "searchkick_test", server_selection_timeout: 1 end class Product include Mongoid::Document include Mongoid::Timestamps field :name field :store_id, type: Integer field :in_stock, type: Boolean field :backordered, type: Boolean field :orders_count, type: Integer field :found_rate, type: BigDecimal field :price, type: Integer field :color field :latitude, type: BigDecimal field :longitude, type: BigDecimal field :description field :alt_description field :embedding, type: Array field :embedding2, type: Array field :embedding3, type: Array field :embedding4, type: Array end class Store include Mongoid::Document has_many :products field :name end class Region include Mongoid::Document field :name field :text end class Speaker include Mongoid::Document field :name end class Animal include Mongoid::Document field :name end class Dog < Animal end class Cat < Animal end class Sku include Mongoid::Document field :name end class Song include Mongoid::Document field :name end class Band include Mongoid::Document field :name field :active, type: Mongoid::Boolean default_scope -> { where(active: true).order(name: 1) } end class Artist include Mongoid::Document field :name field :active, type: Mongoid::Boolean field :should_index, type: Mongoid::Boolean default_scope -> { where(active: true).order(name: 1) } end ================================================ FILE: test/support/redis.rb ================================================ options = {} options[:logger] = $logger if !defined?(RedisClient) Searchkick.redis = if !defined?(Redis) RedisClient.config.new_pool elsif defined?(ConnectionPool) ConnectionPool.new { Redis.new(**options) } else Redis.new(**options) end module RedisInstrumentation def call(command, redis_config) $logger.info "[redis] #{command.inspect}" super end def call_pipelined(commands, redis_config) $logger.info "[redis] #{commands.inspect}" super end end RedisClient.register(RedisInstrumentation) if defined?(RedisClient) ================================================ FILE: test/synonyms_test.rb ================================================ require_relative "test_helper" class SynonymsTest < Minitest::Test def test_bleach store_names ["Clorox Bleach", "Kroger Bleach"] assert_search "clorox", ["Clorox Bleach", "Kroger Bleach"] end def test_burger_buns store_names ["Hamburger Buns"] assert_search "burger buns", ["Hamburger Buns"] end def test_bandaids store_names ["Band-Aid", "Kroger 12-Pack Bandages"] assert_search "bandaids", ["Band-Aid", "Kroger 12-Pack Bandages"] end def test_reverse store_names ["Hamburger"] assert_search "burger", ["Hamburger"] end def test_stemmed store_names ["Burger"] assert_search "hamburgers", ["Burger"] end def test_word_start store_names ["Clorox Bleach", "Kroger Bleach"] assert_search "clorox", ["Clorox Bleach", "Kroger Bleach"], fields: [{name: :word_start}] end def test_directional store_names ["Lightbulb", "Green Onions", "Led"] assert_search "led", ["Lightbulb", "Led"] assert_search "Lightbulb", ["Lightbulb"] assert_search "Halogen Lamp", ["Lightbulb"] assert_search "onions", ["Green Onions"] end def test_case store_names ["Uppercase"] assert_search "lowercase", ["Uppercase"] end end ================================================ FILE: test/test_helper.rb ================================================ require "bundler/setup" Bundler.require(:default) require "minitest/autorun" require "active_support/notifications" ENV["RACK_ENV"] = "test" # for reloadable synonyms if ENV["CI"] ENV["ES_PATH"] ||= File.join(ENV["HOME"], Searchkick.opensearch? ? "opensearch" : "elasticsearch", Searchkick.server_version) end $logger = ActiveSupport::Logger.new(ENV["VERBOSE"] ? STDOUT : nil) if ENV["LOG_TRANSPORT"] transport_logger = ActiveSupport::Logger.new(STDOUT) if Searchkick.client.transport.respond_to?(:transport) Searchkick.client.transport.transport.logger = transport_logger else Searchkick.client.transport.logger = transport_logger end end Searchkick.search_timeout = 5 Searchkick.index_suffix = ENV["TEST_ENV_NUMBER"] # for parallel tests puts "Running against #{Searchkick.opensearch? ? "OpenSearch" : "Elasticsearch"} #{Searchkick.server_version}" I18n.config.enforce_available_locales = true ActiveJob::Base.logger = $logger ActiveJob::Base.queue_adapter = :test ActiveSupport::LogSubscriber.logger = ActiveSupport::Logger.new(STDOUT) if ENV["VERBOSE"] if defined?(Mongoid) require_relative "support/mongoid" else require_relative "support/activerecord" end require_relative "support/redis" # models Dir["#{__dir__}/models/*"].each do |file| require file end require_relative "support/helpers" ================================================ FILE: test/unscope_test.rb ================================================ require_relative "test_helper" class UnscopeTest < Minitest::Test def setup @@once ||= Artist.reindex Artist.unscoped.destroy_all end def test_reindex create_records Artist.reindex assert_search "*", ["Test", "Test 2"] assert_search "*", ["Test", "Test 2"], {load: false} end def test_relation_async create_records perform_enqueued_jobs do Artist.unscoped.reindex(mode: :async) end Artist.searchkick_index.refresh assert_search "*", ["Test", "Test 2"] end def create_records store [ {name: "Test", active: true, should_index: true}, {name: "Test 2", active: false, should_index: true}, {name: "Test 3", active: false, should_index: false} ], reindex: false end def default_model Artist end end ================================================ FILE: test/where_test.rb ================================================ require_relative "test_helper" class WhereTest < Minitest::Test def test_where now = Time.now store [ {name: "Product A", store_id: 1, in_stock: true, backordered: true, created_at: now, orders_count: 4, user_ids: [1, 2, 3]}, {name: "Product B", store_id: 2, in_stock: true, backordered: false, created_at: now - 1, orders_count: 3, user_ids: [1]}, {name: "Product C", store_id: 3, in_stock: false, backordered: true, created_at: now - 2, orders_count: 2, user_ids: [1, 3]}, {name: "Product D", store_id: 4, in_stock: false, backordered: false, created_at: now - 3, orders_count: 1} ] assert_search "product", ["Product A", "Product B"], where: {in_stock: true} # arrays assert_search "product", ["Product A"], where: {user_ids: 2} assert_search "product", ["Product A", "Product C"], where: {user_ids: [2, 3]} # date assert_search "product", ["Product A"], where: {created_at: {gt: now - 1}} assert_search "product", ["Product A", "Product B"], where: {created_at: {gte: now - 1}} assert_search "product", ["Product D"], where: {created_at: {lt: now - 2}} assert_search "product", ["Product C", "Product D"], where: {created_at: {lte: now - 2}} # integer assert_search "product", ["Product A"], where: {store_id: {lt: 2}} assert_search "product", ["Product A", "Product B"], where: {store_id: {lte: 2}} assert_search "product", ["Product D"], where: {store_id: {gt: 3}} assert_search "product", ["Product C", "Product D"], where: {store_id: {gte: 3}} # range assert_search "product", ["Product A", "Product B"], where: {store_id: 1..2} assert_search "product", ["Product A"], where: {store_id: 1...2} assert_search "product", ["Product A", "Product B"], where: {store_id: [1, 2]} assert_search "product", ["Product B", "Product C", "Product D"], where: {store_id: {not: 1}} assert_search "product", ["Product B", "Product C", "Product D"], where: {store_id: {_not: 1}} assert_search "product", ["Product C", "Product D"], where: {store_id: {not: [1, 2]}} assert_search "product", ["Product C", "Product D"], where: {store_id: {_not: [1, 2]}} assert_search "product", ["Product A"], where: {user_ids: {lte: 2, gte: 2}} assert_search "product", ["Product A", "Product B", "Product C", "Product D"], where: {store_id: -Float::INFINITY..Float::INFINITY} assert_search "product", ["Product C", "Product D"], where: {store_id: 3..Float::INFINITY} assert_search "product", ["Product A", "Product B"], where: {store_id: -Float::INFINITY..2} assert_search "product", ["Product C", "Product D"], where: {store_id: 3..} assert_search "product", ["Product A", "Product B"], where: {store_id: ..2} assert_search "product", ["Product A", "Product B"], where: {store_id: ...3} # or assert_search "product", ["Product A", "Product B", "Product C"], where: {or: [[{in_stock: true}, {store_id: 3}]]} assert_search "product", ["Product A", "Product B", "Product C"], where: {or: [[{orders_count: [2, 4]}, {store_id: [1, 2]}]]} assert_search "product", ["Product A", "Product D"], where: {or: [[{orders_count: 1}, {created_at: {gte: now - 1}, backordered: true}]]} # _or assert_search "product", ["Product A", "Product B", "Product C"], where: {_or: [{in_stock: true}, {store_id: 3}]} assert_search "product", ["Product A", "Product B", "Product C"], where: {_or: [{orders_count: [2, 4]}, {store_id: [1, 2]}]} assert_search "product", ["Product A", "Product D"], where: {_or: [{orders_count: 1}, {created_at: {gte: now - 1}, backordered: true}]} # _and assert_search "product", ["Product A"], where: {_and: [{in_stock: true}, {backordered: true}]} # _not assert_search "product", ["Product B", "Product C"], where: {_not: {_or: [{orders_count: 1}, {created_at: {gte: now - 1}, backordered: true}]}} # all assert_search "product", ["Product A", "Product C"], where: {user_ids: {all: [1, 3]}} assert_search "product", [], where: {user_ids: {all: [1, 2, 3, 4]}} # any / nested terms assert_search "product", ["Product B", "Product C"], where: {user_ids: {not: [2], in: [1, 3]}} assert_search "product", ["Product B", "Product C"], where: {user_ids: {_not: [2], in: [1, 3]}} # not assert_search "product", ["Product D"], where: {user_ids: nil} assert_search "product", ["Product A", "Product B", "Product C"], where: {user_ids: {not: nil}} assert_search "product", ["Product A", "Product B", "Product C"], where: {user_ids: {_not: nil}} assert_search "product", ["Product A", "Product C", "Product D"], where: {user_ids: [3, nil]} assert_search "product", ["Product B"], where: {user_ids: {not: [3, nil]}} assert_search "product", ["Product B"], where: {user_ids: {_not: [3, nil]}} end def test_relation now = Time.now store [ {name: "Product A", store_id: 1, in_stock: true, backordered: true, created_at: now, orders_count: 4, user_ids: [1, 2, 3]}, {name: "Product B", store_id: 2, in_stock: true, backordered: false, created_at: now - 1, orders_count: 3, user_ids: [1]}, {name: "Product C", store_id: 3, in_stock: false, backordered: true, created_at: now - 2, orders_count: 2, user_ids: [1, 3]}, {name: "Product D", store_id: 4, in_stock: false, backordered: false, created_at: now - 3, orders_count: 1} ] assert_search_relation ["Product A", "Product B"], Product.search("product").where(in_stock: true) # multiple where assert_search_relation ["Product A"], Product.search("product").where(in_stock: true).where(backordered: true) assert_search_relation ["Product A"], Product.search("product").where.not(store_id: 2).where.not(store_id: 3).where.not(store_id: 4) assert_search_relation [], Product.search("product").where(in_stock: true).where(in_stock: false) assert_search_relation [], Product.search("product").where(in_stock: true).where("in_stock" => false) # rewhere assert_search_relation ["Product A", "Product C"], Product.search("product").where(in_stock: true).rewhere(backordered: true) # not assert_search_relation ["Product C", "Product D"], Product.search("product").where.not(in_stock: true) assert_search_relation ["Product C"], Product.search("product").where.not(in_stock: true).where(backordered: true) assert_search_relation ["Product A", "Product C"], Product.search("product").where.not(store_id: [2, 4]) # compound assert_search_relation ["Product B", "Product C"], Product.search("product").where(_or: [{in_stock: true}, {backordered: true}]).where(_or: [{store_id: 2}, {orders_count: 2}]) end def test_string_operators error = assert_raises(ArgumentError) do assert_search "product", [], where: {store_id: {"lt" => 2}} end assert_includes error.message, "Unknown where operator" end def test_unknown_operator error = assert_raises(ArgumentError) do assert_search "product", [], where: {store_id: {contains: "%2%"}} end assert_includes error.message, "Unknown where operator" end def test_regexp store_names ["Product A"] assert_search "*", ["Product A"], where: {name: /\APro.+\z/} end def test_alternate_regexp store_names ["Product A", "Item B"] assert_search "*", ["Product A"], where: {name: {regexp: "Pro.+"}} end def test_special_regexp store_names ["Product ", "Item "] assert_search "*", ["Product "], where: {name: /\APro.+<.+\z/} end def test_regexp_not_anchored store_names ["abcde"] assert_search "*", ["abcde"], where: {name: /abcd/} assert_search "*", ["abcde"], where: {name: /bcde/} assert_search "*", ["abcde"], where: {name: /abcde/} assert_search "*", ["abcde"], where: {name: /.*bcd.*/} end def test_regexp_anchored store_names ["abcde"] assert_search "*", ["abcde"], where: {name: /\Aabcde\z/} assert_search "*", ["abcde"], where: {name: /\Aabc/} assert_search "*", ["abcde"], where: {name: /cde\z/} assert_search "*", [], where: {name: /\Abcd/} assert_search "*", [], where: {name: /bcd\z/} end def test_regexp_case store_names ["abcde"] assert_search "*", [], where: {name: /\AABCDE\z/} assert_search "*", ["abcde"], where: {name: /\AABCDE\z/i} end def test_prefix store_names ["Product A", "Product B", "Item C"] assert_search "*", ["Product A", "Product B"], where: {name: {prefix: "Pro"}} end def test_exists store [ {name: "Product A", user_ids: [1, 2]}, {name: "Product B"} ] assert_search "product", ["Product A"], where: {user_ids: {exists: true}} assert_search "product", ["Product B"], where: {user_ids: {exists: false}} error = assert_raises(ArgumentError) do assert_search "product", ["Product A"], where: {user_ids: {exists: nil}} end assert_equal "Passing a value other than true or false to exists is not supported", error.message end def test_like store_names ["Product ABC", "Product DEF"] assert_search "product", ["Product ABC"], where: {name: {like: "%ABC%"}} assert_search "product", ["Product ABC"], where: {name: {like: "%ABC"}} assert_search "product", [], where: {name: {like: "ABC"}} assert_search "product", [], where: {name: {like: "ABC%"}} assert_search "product", [], where: {name: {like: "ABC%"}} assert_search "product", ["Product ABC"], where: {name: {like: "Product_ABC"}} end def test_like_escape store_names ["Product 100%", "Product 1000"] assert_search "product", ["Product 100%"], where: {name: {like: "% 100\\%"}} end def test_like_special_characters store_names [ "Product ABC", "Product.ABC", "Product?ABC", "Product+ABC", "Product*ABC", "Product|ABC", "Product{ABC}", "Product[ABC]", "Product(ABC)", "Product\"ABC\"", "Product\\ABC" ] assert_search "*", ["Product.ABC"], where: {name: {like: "Product.A%"}} assert_search "*", ["Product?ABC"], where: {name: {like: "Product?A%"}} assert_search "*", ["Product+ABC"], where: {name: {like: "Product+A%"}} assert_search "*", ["Product*ABC"], where: {name: {like: "Product*A%"}} assert_search "*", ["Product|ABC"], where: {name: {like: "Product|A%"}} assert_search "*", ["Product{ABC}"], where: {name: {like: "%{ABC}"}} assert_search "*", ["Product[ABC]"], where: {name: {like: "%[ABC]"}} assert_search "*", ["Product(ABC)"], where: {name: {like: "%(ABC)"}} assert_search "*", ["Product\"ABC\""], where: {name: {like: "%\"ABC\""}} assert_search "*", ["Product\\ABC"], where: {name: {like: "Product\\A%"}} end def test_like_optional_operators store_names ["Product A&B", "Product B", "Product <3", "Product @Home"] assert_search "product", ["Product A&B"], where: {name: {like: "%A&B"}} assert_search "product", ["Product <3"], where: {name: {like: "%<%"}} assert_search "product", ["Product @Home"], where: {name: {like: "%@Home%"}} end def test_ilike store_names ["Product ABC", "Product DEF"] assert_search "product", ["Product ABC"], where: {name: {ilike: "%abc%"}} assert_search "product", ["Product ABC"], where: {name: {ilike: "%abc"}} assert_search "product", [], where: {name: {ilike: "abc"}} assert_search "product", [], where: {name: {ilike: "abc%"}} assert_search "product", [], where: {name: {ilike: "abc%"}} assert_search "product", ["Product ABC"], where: {name: {ilike: "Product_abc"}} end def test_ilike_escape store_names ["Product 100%", "Product B"] assert_search "product", ["Product 100%"], where: {name: {ilike: "% 100\\%"}} end def test_ilike_special_characters store_names ["Product ABC\"", "Product B"] assert_search "product", ["Product ABC\""], where: {name: {ilike: "%abc\""}} end def test_ilike_optional_operators store_names ["Product A&B", "Product B", "Product <3", "Product @Home"] assert_search "product", ["Product A&B"], where: {name: {ilike: "%a&b"}} assert_search "product", ["Product <3"], where: {name: {ilike: "%<%"}} assert_search "product", ["Product @Home"], where: {name: {ilike: "%@home%"}} end def test_script store [ {name: "Product A", store_id: 1}, {name: "Product B", store_id: 10} ] assert_search "product", ["Product A"], where: {_script: Searchkick.script("doc['store_id'].value < 10")} assert_search "product", ["Product A"], where: {_script: Searchkick.script("doc['store_id'].value < 10", lang: "expression")} assert_search "product", ["Product A"], where: {_script: Searchkick.script("doc['store_id'].value < params['value']", params: {value: 10})} end def test_script_string error = assert_raises(TypeError) do assert_search "product", ["Product A"], where: {_script: "doc['store_id'].value < 10"} end assert_equal "expected Searchkick::Script", error.message end def test_string store [ {name: "Product A", color: "RED"} ] assert_search "product", ["Product A"], where: {color: "RED"} end def test_nil store [ {name: "Product A"}, {name: "Product B", color: "red"} ] assert_search "product", ["Product A"], where: {color: nil} end def test_id store_names ["Product A"] product = Product.first assert_search "product", ["Product A"], where: {id: product.id.to_s} end def test_empty store_names ["Product A"] assert_search "product", ["Product A"], where: {} end def test_empty_array store_names ["Product A"] assert_search "product", [], where: {store_id: []} end # https://discuss.elastic.co/t/numeric-range-quey-or-filter-in-an-array-field-possible-or-not/14053 # https://gist.github.com/jprante/7099463 def test_range_array store [ {name: "Product A", user_ids: [11, 23, 13, 16, 17, 23]}, {name: "Product B", user_ids: [1, 2, 3, 4, 5, 6, 7, 8, 9]}, {name: "Product C", user_ids: [101, 230, 150, 200]} ] assert_search "product", ["Product A"], where: {user_ids: {gt: 10, lt: 24}} end def test_range_array_again store [ {name: "Product A", user_ids: [19, 32, 42]}, {name: "Product B", user_ids: [13, 40, 52]} ] assert_search "product", ["Product A"], where: {user_ids: {gt: 26, lt: 36}} end def test_near store [ {name: "San Francisco", latitude: 37.7833, longitude: -122.4167}, {name: "San Antonio", latitude: 29.4167, longitude: -98.5000} ] assert_search "san", ["San Francisco"], where: {location: {near: [37.5, -122.5]}} end def test_near_hash store [ {name: "San Francisco", latitude: 37.7833, longitude: -122.4167}, {name: "San Antonio", latitude: 29.4167, longitude: -98.5000} ] assert_search "san", ["San Francisco"], where: {location: {near: {lat: 37.5, lon: -122.5}}} end def test_near_within store [ {name: "San Francisco", latitude: 37.7833, longitude: -122.4167}, {name: "San Antonio", latitude: 29.4167, longitude: -98.5000}, {name: "San Marino", latitude: 43.9333, longitude: 12.4667} ] assert_search "san", ["San Francisco", "San Antonio"], where: {location: {near: [37, -122], within: "2000mi"}} end def test_near_within_hash store [ {name: "San Francisco", latitude: 37.7833, longitude: -122.4167}, {name: "San Antonio", latitude: 29.4167, longitude: -98.5000}, {name: "San Marino", latitude: 43.9333, longitude: 12.4667} ] assert_search "san", ["San Francisco", "San Antonio"], where: {location: {near: {lat: 37, lon: -122}, within: "2000mi"}} end def test_geo_polygon store [ {name: "San Francisco", latitude: 37.7833, longitude: -122.4167}, {name: "San Antonio", latitude: 29.4167, longitude: -98.5000}, {name: "San Marino", latitude: 43.9333, longitude: 12.4667} ] polygon = [ {lat: 42.185695, lon: -125.496146}, {lat: 42.185695, lon: -94.125535}, {lat: 27.122789, lon: -94.125535}, {lat: 27.12278, lon: -125.496146} ] assert_search "san", ["San Francisco", "San Antonio"], where: {location: {geo_polygon: {points: polygon}}} polygon << polygon.first assert_search "san", ["San Francisco", "San Antonio"], where: {location: {geo_shape: {type: "polygon", coordinates: [polygon]}}} end def test_top_left_bottom_right store [ {name: "San Francisco", latitude: 37.7833, longitude: -122.4167}, {name: "San Antonio", latitude: 29.4167, longitude: -98.5000} ] assert_search "san", ["San Francisco"], where: {location: {top_left: [38, -123], bottom_right: [37, -122]}} end def test_top_left_bottom_right_hash store [ {name: "San Francisco", latitude: 37.7833, longitude: -122.4167}, {name: "San Antonio", latitude: 29.4167, longitude: -98.5000} ] assert_search "san", ["San Francisco"], where: {location: {top_left: {lat: 38, lon: -123}, bottom_right: {lat: 37, lon: -122}}} end def test_top_right_bottom_left store [ {name: "San Francisco", latitude: 37.7833, longitude: -122.4167}, {name: "San Antonio", latitude: 29.4167, longitude: -98.5000} ] assert_search "san", ["San Francisco"], where: {location: {top_right: [38, -122], bottom_left: [37, -123]}} end def test_top_right_bottom_left_hash store [ {name: "San Francisco", latitude: 37.7833, longitude: -122.4167}, {name: "San Antonio", latitude: 29.4167, longitude: -98.5000} ] assert_search "san", ["San Francisco"], where: {location: {top_right: {lat: 38, lon: -122}, bottom_left: {lat: 37, lon: -123}}} end def test_multiple_locations store [ {name: "San Francisco", latitude: 37.7833, longitude: -122.4167}, {name: "San Antonio", latitude: 29.4167, longitude: -98.5000} ] assert_search "san", ["San Francisco"], where: {multiple_locations: {near: [37.5, -122.5]}} end def test_multiple_locations_with_term_filter store [ {name: "San Francisco", latitude: 37.7833, longitude: -122.4167}, {name: "San Antonio", latitude: 29.4167, longitude: -98.5000} ] assert_search "san", [], where: {multiple_locations: {near: [37.5, -122.5]}, name: "San Antonio"} assert_search "san", ["San Francisco"], where: {multiple_locations: {near: [37.5, -122.5]}, name: "San Francisco"} end def test_multiple_locations_hash store [ {name: "San Francisco", latitude: 37.7833, longitude: -122.4167}, {name: "San Antonio", latitude: 29.4167, longitude: -98.5000} ] assert_search "san", ["San Francisco"], where: {multiple_locations: {near: {lat: 37.5, lon: -122.5}}} end def test_nested store [ {name: "Product A", details: {year: 2016}} ] assert_search "product", ["Product A"], where: {"details.year" => 2016} end end