Repository: ankane/searchkick
Branch: master
Commit: 1009d03107a2
Files: 114
Total size: 425.2 KB
Directory structure:
gitextract_mcchxu51/
├── .github/
│ ├── ISSUE_TEMPLATE/
│ │ ├── bug_report.md
│ │ ├── config.yml
│ │ └── feature_request.md
│ ├── pull_request_template.md
│ └── workflows/
│ └── build.yml
├── .gitignore
├── CHANGELOG.md
├── Gemfile
├── LICENSE.txt
├── README.md
├── Rakefile
├── benchmark/
│ ├── Gemfile
│ ├── index.rb
│ ├── relation.rb
│ └── search.rb
├── examples/
│ ├── Gemfile
│ ├── hybrid.rb
│ └── semantic.rb
├── gemfiles/
│ ├── activerecord72.gemfile
│ ├── activerecord80.gemfile
│ ├── mongoid8.gemfile
│ ├── mongoid9.gemfile
│ ├── opensearch2.gemfile
│ └── opensearch3.gemfile
├── lib/
│ ├── searchkick/
│ │ ├── bulk_reindex_job.rb
│ │ ├── controller_runtime.rb
│ │ ├── hash_wrapper.rb
│ │ ├── index.rb
│ │ ├── index_cache.rb
│ │ ├── index_options.rb
│ │ ├── indexer.rb
│ │ ├── log_subscriber.rb
│ │ ├── middleware.rb
│ │ ├── model.rb
│ │ ├── multi_search.rb
│ │ ├── process_batch_job.rb
│ │ ├── process_queue_job.rb
│ │ ├── query.rb
│ │ ├── railtie.rb
│ │ ├── record_data.rb
│ │ ├── record_indexer.rb
│ │ ├── reindex_queue.rb
│ │ ├── reindex_v2_job.rb
│ │ ├── relation.rb
│ │ ├── relation_indexer.rb
│ │ ├── reranking.rb
│ │ ├── results.rb
│ │ ├── script.rb
│ │ ├── version.rb
│ │ └── where.rb
│ ├── searchkick.rb
│ └── tasks/
│ └── searchkick.rake
├── searchkick.gemspec
└── test/
├── aggs_test.rb
├── boost_test.rb
├── callbacks_test.rb
├── conversions_test.rb
├── default_scope_test.rb
├── exclude_test.rb
├── geo_shape_test.rb
├── highlight_test.rb
├── hybrid_test.rb
├── index_cache_test.rb
├── index_options_test.rb
├── index_test.rb
├── inheritance_test.rb
├── knn_test.rb
├── language_test.rb
├── load_test.rb
├── log_subscriber_test.rb
├── marshal_test.rb
├── match_test.rb
├── misspellings_test.rb
├── models/
│ ├── animal.rb
│ ├── artist.rb
│ ├── band.rb
│ ├── product.rb
│ ├── region.rb
│ ├── sku.rb
│ ├── song.rb
│ ├── speaker.rb
│ └── store.rb
├── multi_indices_test.rb
├── multi_search_test.rb
├── multi_tenancy_test.rb
├── notifications_test.rb
├── order_test.rb
├── pagination_test.rb
├── parameters_test.rb
├── partial_match_test.rb
├── partial_reindex_test.rb
├── query_test.rb
├── reindex_test.rb
├── reindex_v2_job_test.rb
├── relation_test.rb
├── results_test.rb
├── routing_test.rb
├── scroll_test.rb
├── search_synonyms_test.rb
├── search_test.rb
├── select_test.rb
├── should_index_test.rb
├── similar_test.rb
├── suggest_test.rb
├── support/
│ ├── activerecord.rb
│ ├── apartment.rb
│ ├── helpers.rb
│ ├── kaminari.yml
│ ├── mongoid.rb
│ └── redis.rb
├── synonyms_test.rb
├── test_helper.rb
├── unscope_test.rb
└── where_test.rb
================================================
FILE CONTENTS
================================================
================================================
FILE: .github/ISSUE_TEMPLATE/bug_report.md
================================================
---
name: Bug report
about: Create a report to help us improve
title: ''
labels: bug report
assignees: ''
---
**First**
Search existing issues to see if it’s been reported and make sure you’re on the latest version.
**Describe the bug**
A clear and concise description of the bug.
**To reproduce**
Use this code to reproduce when possible:
```ruby
require "bundler/inline"
gemfile do
source "https://rubygems.org"
gem "activerecord", require: "active_record"
gem "activejob", require: "active_job"
gem "sqlite3"
gem "searchkick", git: "https://github.com/ankane/searchkick.git"
# uncomment one
# gem "elasticsearch"
# gem "opensearch-ruby"
end
puts "Searchkick version: #{Searchkick::VERSION}"
puts "Server version: #{Searchkick.server_version}"
ActiveRecord::Base.establish_connection adapter: "sqlite3", database: ":memory:"
ActiveJob::Base.queue_adapter = :inline
ActiveRecord::Schema.define do
create_table :products do |t|
t.string :name
end
end
class Product < ActiveRecord::Base
searchkick
end
Product.reindex
Product.create!(name: "Test")
Product.search_index.refresh
p Product.search("test", fields: [:name]).response
```
**Additional context**
Add any other context.
================================================
FILE: .github/ISSUE_TEMPLATE/config.yml
================================================
blank_issues_enabled: false
contact_links:
- name: Help
url: https://stackoverflow.com/questions/tagged/searchkick
about: Ask and answer questions here
================================================
FILE: .github/ISSUE_TEMPLATE/feature_request.md
================================================
---
name: Feature request
about: Suggest an idea for this project
title: ''
labels: feature request
assignees: ''
---
**First**
Search existing issues to see if it’s been discussed.
**Is your feature request related to a problem? Please describe.**
A clear and concise description of the problem.
**Describe the solution you'd like**
A clear and concise description of your idea.
**Additional context**
Add any other context.
================================================
FILE: .github/pull_request_template.md
================================================
Thanks for contributing. You’re awesome! A few things to keep in mind:
- Keep changes to a minimum
- Follow the existing style
- Add one or more tests if possible
Finally, replace all this with a description of the changes.
================================================
FILE: .github/workflows/build.yml
================================================
name: build
on: [push, pull_request]
jobs:
build:
strategy:
fail-fast: false
matrix:
include:
- ruby: "4.0"
gemfile: Gemfile
elasticsearch: 9
- ruby: 3.3
gemfile: gemfiles/activerecord80.gemfile
elasticsearch: 9.0.0
- ruby: 3.2
gemfile: gemfiles/activerecord72.gemfile
elasticsearch: 8
- ruby: 3.4
gemfile: gemfiles/opensearch3.gemfile
opensearch: 3
- ruby: 3.3
gemfile: gemfiles/opensearch2.gemfile
opensearch: 2
- ruby: 3.4
gemfile: gemfiles/mongoid9.gemfile
elasticsearch: 9
mongodb: true
- ruby: 3.2
gemfile: gemfiles/mongoid8.gemfile
# TODO fix plugin installation for earlier versions
elasticsearch: 8.5.0
mongodb: true
runs-on: ubuntu-latest
env:
BUNDLE_GEMFILE: ${{ matrix.gemfile }}
steps:
- uses: actions/checkout@v6
- uses: ruby/setup-ruby@v1
with:
ruby-version: ${{ matrix.ruby }}
bundler-cache: true
- run: bundle update
- uses: actions/cache@v5
if: ${{ matrix.elasticsearch }}
with:
path: ~/elasticsearch
key: ${{ runner.os }}-elasticsearch-${{ matrix.elasticsearch }}
- uses: ankane/setup-elasticsearch@v1
if: ${{ matrix.elasticsearch }}
with:
elasticsearch-version: ${{ matrix.elasticsearch }}
plugins: |
analysis-kuromoji
analysis-smartcn
analysis-stempel
analysis-ukrainian
- uses: actions/cache@v5
if: ${{ matrix.opensearch }}
with:
path: ~/opensearch
key: ${{ runner.os }}-opensearch-${{ matrix.opensearch }}
- uses: ankane/setup-opensearch@v1
if: ${{ matrix.opensearch }}
with:
opensearch-version: ${{ matrix.opensearch }}
plugins: |
analysis-kuromoji
analysis-smartcn
analysis-stempel
analysis-ukrainian
- uses: ankane/setup-mongodb@v1
if: ${{ matrix.mongodb }}
- run: |
sudo apt-get update
sudo apt-get install redis-server
sudo systemctl start redis-server
- run: bundle exec rake test
================================================
FILE: .gitignore
================================================
*.gem
*.rbc
.bundle
.config
.yardoc
*.lock
InstalledFiles
_yardoc
coverage
doc/
lib/bundler/man
pkg
rdoc
spec/reports
test/tmp
test/version_tmp
tmp
*.log
.DS_Store
.ruby-*
.idea/
*.sqlite3
================================================
FILE: CHANGELOG.md
================================================
## 6.1.1 (unreleased)
- Fixed smart aggs behavior with `_and`
## 6.1.0 (2026-02-18)
- Added `per` method
- Fixed error with `aggs` method and non-hash arguments
- Fixed smart aggs behavior when multiple `where` calls
## 6.0.3 (2026-01-06)
- Fixed `inspect` method for `Relation`
## 6.0.2 (2025-10-24)
- Fixed `as_json` method for `HashWrapper`
## 6.0.1 (2025-10-24)
- Fixed `to_json` method for `HashWrapper`
## 6.0.0 (2025-10-19)
- Added new query builder API (similar to Active Record)
- Added `conversions_v2` option
- Added `job_options` option
- Added `parent_job` option
- Added `opaque_id` option
- Added `callback_options` option
- Added `ignore_missing` option for partial reindex
- Added support for `exists: false`
- Added `quantization` to `knn` option for Elasticsearch
- Changed async reindex to use ranges for numeric primary keys with Active Record
- Fixed error with `case_sensitive` option and synonyms
- Removed default quantization for `knn` option for Elasticsearch 8.14+
- Removed `results` method (use `to_a` instead)
- Removed `execute` option and method (no longer needed)
- Removed `options` method (use individual methods instead)
- Removed dependency on Hashie
- Deprecated `conversions` option in favor of `conversions_v2`
- Dropped support for Elasticsearch 7 and OpenSearch 1
- Dropped support for Active Record < 7.2
- Dropped support for Redis < 6.2
## 5.5.2 (2025-05-20)
- Fixed `scope` option for partial reindex
## 5.5.1 (2025-04-24)
- Added support for `elasticsearch` 9 gem
## 5.5.0 (2025-04-03)
- Added `m` and `ef_construction` to `knn` index option
- Added `ef_search` to `knn` search option
- Fixed exact cosine distance for OpenSearch 2.19+
- Dropped support for Ruby < 3.2 and Active Record < 7.1
- Dropped support for Mongoid < 8
## 5.4.0 (2024-09-04)
- Added `knn` option
- Added `rrf` method
- Added experimental support for scripting to `where` option
- Added warning for `exists` with non-`true` values
- Added warning for full reindex and `:queue` mode
- Fixed `per_page` method when paginating beyond `max_result_window`
- Dropped support for Ruby < 3.1
## 5.3.1 (2023-11-28)
- Fixed error with misspellings below and failed queries
## 5.3.0 (2023-07-02)
- Fixed error with `cutoff_frequency`
- Dropped support for Ruby < 3 and Active Record < 6.1
- Dropped support for Mongoid < 7
## 5.2.4 (2023-05-11)
- Fixed error with non-string routing and `:async` mode
## 5.2.3 (2023-04-12)
- Fixed error with missing records and multiple models
## 5.2.2 (2023-04-01)
- Fixed `total_docs` method
- Fixed deprecation warning with Active Support 7.1
## 5.2.1 (2023-02-21)
- Added support for `redis-client` gem
## 5.2.0 (2023-02-08)
- Added model name to warning about missing records
- Fixed unnecessary data loading when reindexing relations with `:async` and `:queue` modes
## 5.1.2 (2023-01-29)
- Fixed error with missing point in time
## 5.1.1 (2022-12-05)
- Added support for strings for `offset` and `per_page`
## 5.1.0 (2022-10-12)
- Added support for fractional search timeout
- Fixed search timeout with `elasticsearch` 8+ and `opensearch-ruby` gems
- Fixed search timeout not applying to `multi_search`
## 5.0.5 (2022-10-09)
- Added `model` method to `Searchkick::Relation`
- Fixed deprecation warning with `redis` gem
- Fixed `respond_to?` method on relation loading relation
- Fixed `Relation loaded` error for non-mutating methods on relation
## 5.0.4 (2022-06-16)
- Added `max_result_window` option
- Improved error message for unsupported versions of Elasticsearch
## 5.0.3 (2022-03-13)
- Fixed context for index name for inherited models
## 5.0.2 (2022-03-03)
- Fixed index name for inherited models
## 5.0.1 (2022-02-27)
- Prefer `mode: :async` over `async: true` for full reindex
- Fixed instance method overriding with concerns
## 5.0.0 (2022-02-21)
- Searches now use lazy loading (similar to Active Record)
- Added `unscope` option to better support working with default scopes
- Added support for `:async` and `:queue` modes for `reindex` on relation
- Added basic protection from unfiltered parameters to `where` option
- Added `models` option to `similar` method
- Changed async full reindex to fetch ids instead of using ranges for numeric primary keys with Active Record
- Changed `searchkick_index_options` to return symbol keys (instead of mix of strings and symbols)
- Changed non-anchored regular expressions to match expected results (previously warned)
- Changed record reindex to return `true` to match model and relation reindex
- Updated async reindex job to call `search_import` for nested associations
- Fixed removing records when `should_index?` is `false` when `reindex` called on relation
- Fixed issue with `merge_mappings` for fields that use `searchkick` options
- Raise error when `search` called on relations
- Raise `ArgumentError` (instead of warning) for invalid regular expression modifiers
- Raise `ArgumentError` instead of `RuntimeError` for unknown operators
- Removed mapping of `id` to `_id` with `order` option (not supported in Elasticsearch 8)
- Removed `wordnet` option (no longer worked)
- Removed dependency on `elasticsearch` gem (can use `elasticsearch` or `opensearch-ruby`)
- Dropped support for Elasticsearch 6
- Dropped support for Ruby < 2.6 and Active Record < 5.2
- Dropped support for NoBrainer and Cequel
- Dropped support for `faraday_middleware-aws-signers-v4` (use `faraday_middleware-aws-sigv4` instead)
## 4.6.3 (2021-11-19)
- Added support for reloadable synonyms for OpenSearch
- Added experimental support for `opensearch-ruby` gem
- Removed `elasticsearch-xpack` dependency for reloadable synonyms
## 4.6.2 (2021-11-15)
- Added support for beginless ranges to `where` option
- Fixed `like` and `ilike` with `+` character
- Fixed warning about accessing system indices when no model or index specified
## 4.6.1 (2021-09-25)
- Added `ilike` operator for Elasticsearch 7.10+
- Fixed missing methods with `multi_search`
## 4.6.0 (2021-08-22)
- Added support for case-insensitive regular expressions with Elasticsearch 7.10+
- Added support for `OPENSEARCH_URL`
- Fixed error with `debug` option
## 4.5.2 (2021-08-05)
- Fixed error with reindex queue
- Fixed error with `model_name` method with multiple models
- Fixed error with `debug` option with elasticsearch-ruby 7.14
## 4.5.1 (2021-08-03)
- Improved performance of reindex queue
## 4.5.0 (2021-06-07)
- Added experimental support for OpenSearch
- Added support for synonyms in Japanese
## 4.4.4 (2021-03-12)
- Fixed `too_long_frame_exception` with `scroll` method
- Fixed multi-word emoji tokenization
## 4.4.3 (2021-02-25)
- Added support for Hunspell
- Fixed warning about accessing system indices
## 4.4.2 (2020-11-23)
- Added `missing_records` method to results
- Fixed issue with `like` and special characters
## 4.4.1 (2020-06-24)
- Added `stem_exclusion` and `stemmer_override` options
- Added `with_score` method to search results
- Improved error message for `reload_synonyms` with non-OSS version of Elasticsearch
- Improved output for reindex rake task
## 4.4.0 (2020-06-17)
- Added support for reloadable, multi-word, search time synonyms
- Fixed another deprecation warning in Ruby 2.7
## 4.3.1 (2020-05-13)
- Fixed error with `exclude` in certain cases for Elasticsearch 7.7
## 4.3.0 (2020-02-19)
- Fixed `like` queries with `"` character
- Better error when invalid parameters passed to `where`
## 4.2.1 (2020-01-27)
- Fixed deprecation warnings with Elasticsearch
- Fixed deprecation warnings in Ruby 2.7
## 4.2.0 (2019-12-18)
- Added safety check for multiple `Model.reindex`
- Added `deep_paging` option
- Added request parameters to search notifications and curl representation
- Removed curl from search notifications to prevent confusion
## 4.1.1 (2019-11-19)
- Added `chinese2` and `korean2` languages
- Improved performance of async full reindex
- Fixed `searchkick:reindex:all` rake task for Rails 6
## 4.1.0 (2019-08-01)
- Added `like` operator
- Added `exists` operator
- Added warnings for certain regular expressions
- Fixed anchored regular expressions
## 4.0.2 (2019-06-04)
- Added block form of `scroll`
- Added `clear_scroll` method
- Fixed custom mappings
## 4.0.1 (2019-05-30)
- Added support for scroll API
- Made type optional for custom mapping for Elasticsearch 6
- Fixed error when suggestions empty
- Fixed `models` option with inheritance
## 4.0.0 (2019-04-11)
- Added support for Elasticsearch 7
- Added `models` option
Breaking changes
- Removed support for Elasticsearch 5
- Removed support for multi-word synonyms (they no longer work with shingles)
- Removed support for Active Record < 5
## 3.1.3 (2019-04-11)
- Added support for endless ranges
- Added support for routing to `similar` method
- Added `prefix` to `where`
- Fixed error with elasticsearch-ruby 6.3
- Fixed error with some language stemmers and Elasticsearch 6.5
- Fixed issue with misspellings below and body block
## 3.1.2 (2018-09-27)
- Improved performance of indices boost
- Fixed deletes with routing and `async` callbacks
- Fixed deletes with routing and `queue` callbacks
- Fixed deprecation warnings
- Fixed field misspellings for older partial match format
## 3.1.1 (2018-08-09)
- Added per-field misspellings
- Added `case_sensitive` option
- Added `stem` option
- Added `total_entries` option
- Fixed `exclude` option with match all
- Fixed `with_highlights` method
## 3.1.0 (2018-05-12)
- Added `:inline` as alias for `true` for `callbacks` and `mode` options
- Friendlier error message for bad mapping with partial matches
- Warn when records in search index do not exist in database
- Easier merging for `merge_mapping`
- Fixed `with_hit` and `with_highlights` when records in search index do not exist in database
- Fixed error with highlights and match all
## 3.0.3 (2018-04-22)
- Added support for pagination with `body` option
- Added `boost_by_recency` option
- Fixed "Model Search Data" output for `debug` option
- Fixed `reindex_status` error
- Fixed error with optional operators in Ruby regexp
- Fixed deprecation warnings for Elasticsearch 6.2+
## 3.0.2 (2018-03-26)
- Added support for Korean and Vietnamese
- Fixed `Unsupported argument type: Symbol` for async partial reindex
- Fixed infinite recursion with multi search and misspellings below
- Do not raise an error when `id` is indexed
## 3.0.1 (2018-03-14)
- Added `scope` option for partial reindex
- Added support for Japanese, Polish, and Ukrainian
## 3.0.0 (2018-03-03)
- Added support for Chinese
- No longer requires fields to query for Elasticsearch 6
- Results can be marshaled by default (unless using `highlight` option)
Breaking changes
- Removed support for Elasticsearch 2
- Removed support for Active Record < 4.2 and Mongoid < 5
- Types are no longer used
- The `_all` field is disabled by default in Elasticsearch 5
- Conversions are not stemmed by default
- An `ArgumentError` is raised instead of a warning when options are incompatible with the `body` option
- Removed `log` option from `boost_by`
- Removed `Model.enable_search_callbacks`, `Model.disable_search_callbacks`, and `Model.search_callbacks?`
- Removed `reindex_async` method, as `reindex` now defaults to callbacks mode specified on the model
- Removed `async` option from `record.reindex`
- Removed `search_hit` method - use `with_hit` instead
- Removed `each_with_hit` - use `with_hit.each` instead
- Removed `with_details` - use `with_highlights` instead
- Bumped default `limit` to 10,000
## 2.5.0 (2018-02-15)
- Try requests 3 times before raising error
- Better exception when trying to access results for failed multi-search query
- More efficient aggregations with `where` clauses
- Added support for `faraday_middleware-aws-sigv4`
- Added `credentials` option to `aws_credentials`
- Added `modifier` option to `boost_by`
- Added `scope_results` option
- Added `factor` option to `boost_by_distance`
## 2.4.0 (2017-11-14)
- Fixed `similar` for Elasticsearch 6
- Added `inheritance` option
- Added `_type` option
- Fixed `Must specify fields to search` error when searching `*`
## 2.3.2 (2017-09-08)
- Added `_all` and `default_fields` options
- Added global `index_prefix` option
- Added `wait` option to async reindex
- Added `model_includes` option
- Added `missing` option for `boost_by`
- Raise error for `reindex_status` when Redis not configured
- Warn when incompatible options used with `body` option
- Fixed bug where `routing` and `type` options were silently ignored with `body` option
- Fixed `reindex(async: true)` for non-numeric primary keys in Postgres
## 2.3.1 (2017-07-06)
- Added support for `reindex(async: true)` for non-numeric primary keys
- Added `conversions_term` option
- Added support for passing fields to `suggest` option
- Fixed `page_view_entries` for Kaminari
## 2.3.0 (2017-05-06)
- Fixed analyzer on dynamically mapped fields
- Fixed error with `similar` method and `_all` field
- Throw error when fields are needed
- Added `queue_name` option
- No longer require synonyms to be lowercase
## 2.2.1 (2017-04-16)
- Added `avg`, `cardinality`, `max`, `min`, and `sum` aggregations
- Added `load: {dumpable: true}` option
- Added `index_suffix` option
- Accept string for `exclude` option
## 2.2.0 (2017-03-19)
- Fixed bug with text values longer than 256 characters and `_all` field - see [#850](https://github.com/ankane/searchkick/issues/850)
- Fixed issue with `_all` field in `searchable`
- Fixed `exclude` option with `word_start`
## 2.1.1 (2017-01-17)
- Fixed duplicate notifications
- Added support for `connection_pool`
- Added `exclude` option
## 2.1.0 (2017-01-15)
- Background reindexing and queues are officially supported
- Log updates and deletes
## 2.0.4 (2017-01-15)
- Added support for queuing updates [experimental]
- Added `refresh_interval` option to `reindex`
- Prefer `search_index` over `searchkick_index`
## 2.0.3 (2017-01-12)
- Added `async` option to `reindex` [experimental]
- Added `misspellings?` method to results
## 2.0.2 (2017-01-08)
- Added `retain` option to `reindex`
- Added support for attributes in highlight tags
- Fixed potentially silent errors in reindex job
- Improved syntax for `boost_by_distance`
## 2.0.1 (2016-12-30)
- Added `search_hit` and `search_highlights` methods to models
- Improved reindex performance
## 2.0.0 (2016-12-28)
- Added support for `reindex` on associations
Breaking changes
- Removed support for Elasticsearch 1 as it reaches [end of life](https://www.elastic.co/support/eol)
- Removed facets, legacy options, and legacy methods
- Invalid options now throw an `ArgumentError`
- The `query` and `json` options have been removed in favor of `body`
- The `include` option has been removed in favor of `includes`
- The `personalize` option has been removed in favor of `boost_where`
- The `partial` option has been removed in favor of `operator`
- Renamed `select_v2` to `select` (legacy `select` no longer available)
- The `_all` field is disabled if `searchable` option is used (for performance)
- The `partial_reindex(:method_name)` method has been replaced with `reindex(:method_name)`
- The `unsearchable` and `only_analyzed` options have been removed in favor of `searchable` and `filterable`
- `load: false` no longer returns an array in Elasticsearch 2
## 1.5.1 (2016-12-28)
- Added `client_options`
- Added `refresh` option to `reindex` method
- Improved syntax for partial reindex
## 1.5.0 (2016-12-23)
- Added support for geo shape indexing and queries
- Added `_and`, `_or`, `_not` to `where` option
## 1.4.2 (2016-12-21)
- Added support for directional synonyms
- Easier AWS setup
- Fixed `total_docs` method for ES 5+
- Fixed exception on update errors
## 1.4.1 (2016-12-11)
- Added `partial_reindex` method
- Added `debug` option to `search` method
- Added `profile` option
## 1.4.0 (2016-10-26)
- Official support for Elasticsearch 5
- Boost exact matches for partial matching
- Added `searchkick_debug` method
- Added `geo_polygon` filter
## 1.3.6 (2016-10-08)
- Fixed `Job adapter not found` error
## 1.3.5 (2016-09-27)
- Added support for Elasticsearch 5.0 beta
- Added `request_params` option
- Added `filterable` option
## 1.3.4 (2016-08-23)
- Added `resume` option to reindex
- Added search timeout to payload
## 1.3.3 (2016-08-02)
- Fix for namespaced models (broken in 1.3.2)
## 1.3.2 (2016-08-01)
- Added `body_options` option
- Added `date_histogram` aggregation
- Added `indices_boost` option
- Added support for multiple conversions
## 1.3.1 (2016-07-10)
- Fixed error with Ruby 2.0
- Fixed error with indexing large fields
## 1.3.0 (2016-05-04)
- Added support for Elasticsearch 5.0 alpha
- Added support for phrase matches
- Added support for procs for `index_prefix` option
## 1.2.1 (2016-02-15)
- Added `multi_search` method
- Added support for routing for Elasticsearch 2
- Added support for `search_document_id` and `search_document_type` in models
- Fixed error with instrumentation for searching multiple models
- Fixed instrumentation for bulk updates
## 1.2.0 (2016-02-03)
- Fixed deprecation warnings with `alias_method_chain`
- Added `analyzed_only` option for large text fields
- Added `encoder` option to highlight
- Fixed issue in `similar` method with `per_page` option
- Added basic support for multiple models
## 1.1.2 (2015-12-18)
- Added bulk updates with `callbacks` method
- Added `bulk_delete` method
- Added `search_timeout` option
- Fixed bug with new location format for `boost_by_distance`
## 1.1.1 (2015-12-14)
- Added support for `{lat: lat, lon: lon}` as preferred format for locations
## 1.1.0 (2015-12-08)
- Added `below` option to misspellings to improve performance
- Fixed synonyms for `word_*` partial matches
- Added `searchable` option
- Added `similarity` option
- Added `match` option
- Added `word` option
- Added highlighted fields to `load: false`
## 1.0.3 (2015-11-27)
- Added support for Elasticsearch 2.1
## 1.0.2 (2015-11-15)
- Throw `Searchkick::ImportError` for errors when importing records
- Errors now inherit from `Searchkick::Error`
- Added `order` option to aggregations
- Added `mapping` method
## 1.0.1 (2015-11-05)
- Added aggregations method to get raw response
- Use `execute: false` for lazy loading
- Return nil when no aggs
- Added emoji search
## 1.0.0 (2015-10-30)
- Added support for Elasticsearch 2.0
- Added support for aggregations
- Added ability to use misspellings for partial matches
- Added `fragment_size` option for highlight
- Added `took` method to results
Breaking changes
- Raise `Searchkick::DangerousOperation` error when calling reindex with scope
- Enabled misspellings by default for partial matches
- Enabled transpositions by default for misspellings
## 0.9.1 (2015-08-31)
- `and` now matches `&`
- Added `transpositions` option to misspellings
- Added `boost_mode` and `log` options to `boost_by`
- Added `prefix_length` option to `misspellings`
- Added ability to set env
## 0.9.0 (2015-06-07)
- Much better performance for where queries if no facets
- Added basic support for regex
- Added support for routing
- Made `Searchkick.disable_callbacks` thread-safe
## 0.8.7 (2015-02-14)
- Fixed Mongoid import
## 0.8.6 (2015-02-10)
- Added support for NoBrainer
- Added `stem_conversions: false` option
- Added support for multiple `boost_where` values on the same field
- Added support for array of values for `boost_where`
- Fixed suggestions with partial match boost
- Fixed redefining existing instance methods in models
## 0.8.5 (2014-11-11)
- Added support for Elasticsearch 1.4
- Added `unsearchable` option
- Added `select: true` option
- Added `body` option
## 0.8.4 (2014-11-05)
- Added `boost_by_distance`
- More flexible highlight options
- Better `env` logic
## 0.8.3 (2014-09-20)
- Added support for Active Job
- Added `timeout` setting
- Fixed import with no records
## 0.8.2 (2014-08-18)
- Added `async` to `callbacks` option
- Added `wordnet` option
- Added `edit_distance` option to eventually replace `distance` option
- Catch misspelling of `misspellings` option
- Improved logging
## 0.8.1 (2014-08-16)
- Added `search_method_name` option
- Fixed `order` for array of hashes
- Added support for Mongoid 2
## 0.8.0 (2014-07-12)
- Added support for Elasticsearch 1.2
## 0.7.9 (2014-06-30)
- Added `tokens` method
- Added `json` option
- Added exact matches
- Added `prev_page` for Kaminari pagination
- Added `import` option to reindex
## 0.7.8 (2014-06-22)
- Added `boost_by` and `boost_where` options
- Added ability to boost fields - `name^10`
- Added `select` option for `load: false`
## 0.7.7 (2014-06-10)
- Added support for automatic failover
- Fixed `operator` option (and default) for partial matches
## 0.7.6 (2014-05-20)
- Added `stats` option to facets
- Added `padding` option
## 0.7.5 (2014-05-13)
- Do not throw errors when index becomes out of sync with database
- Added custom exception types
- Fixed `offset` and `offset_value`
## 0.7.4 (2014-05-06)
- Fixed reindex with inheritance
## 0.7.3 (2014-04-30)
- Fixed multi-index searches
- Fixed suggestions for partial matches
- Added `offset` and `length` for improved pagination
## 0.7.2 (2014-04-24)
- Added smart facets
- Added more fields to `load: false` result
- Fixed logging for multi-index searches
- Added `first_page?` and `last_page?` for improved Kaminari support
## 0.7.1 (2014-04-12)
- Fixed huge issue w/ zero-downtime reindexing on 0.90
## 0.7.0 (2014-04-10)
- Added support for Elasticsearch 1.1
- Dropped support for Elasticsearch below 0.90.4 (unfortunate side effect of above)
## 0.6.3 (2014-04-08)
- Removed patron since no support for Windows
- Added error if `searchkick` is called multiple times
## 0.6.2 (2014-04-05)
- Added logging
- Fixed index_name option
- Added ability to use proc as the index name
## 0.6.1 (2014-03-24)
- Fixed huge issue w/ zero-downtime reindexing on 0.90 and elasticsearch-ruby 1.0
- Restore load: false behavior
- Restore total_entries method
## 0.6.0 (2014-03-22)
- Moved to elasticsearch-ruby
- Added support for modifying the query and viewing the response
- Added support for page_entries_info method
## 0.5.3 (2014-02-24)
- Fixed bug w/ word_* queries
## 0.5.2 (2014-02-12)
- Use after_commit hook for Active Record to prevent data inconsistencies
## 0.5.1 (2014-02-12)
- Replaced stop words with common terms query
- Added language option
- Fixed bug with empty array in where clause
- Fixed bug with MongoDB integer _id
- Fixed reindex bug when callbacks disabled
## 0.5.0 (2014-01-20)
- Better control over partial matches
- Added merge_mappings option
- Added batch_size option
- Fixed bug with nil where clauses
## 0.4.2 (2013-12-29)
- Added `should_index?` method to control which records are indexed
- Added ability to temporarily disable callbacks
- Added custom mappings
## 0.4.1 (2013-12-19)
- Fixed issue w/ inheritance mapping
## 0.4.0 (2013-12-11)
- Added support for Mongoid 4
- Added support for multiple locations
## 0.3.5 (2013-12-08)
- Added facet ranges
- Added all operator
## 0.3.4 (2013-11-22)
- Added highlighting
- Added :distance option to misspellings
- Fixed issue w/ BigDecimal serialization
## 0.3.3 (2013-11-04)
- Better error messages
- Added where: {field: nil} queries
## 0.3.2 (2013-11-02)
- Added support for single table inheritance
- Removed Tire::Model::Search
## 0.3.1 (2013-11-02)
- Added index_prefix option
- Fixed ES issue with incorrect facet counts
- Added option to turn off special characters
## 0.3.0 (2013-11-02)
- Fixed reversed coordinates
- Added bounded by a box queries
- Expanded `or` queries
## 0.2.8 (2013-09-30)
- Added option to disable callbacks
- Fixed bug with facets with Elasticsearch 0.90.5
## 0.2.7 (2013-09-23)
- Added limit to facet
- Improved similar items
## 0.2.6 (2013-09-10)
- Added option to disable misspellings
## 0.2.5 (2013-08-30)
- Added geospartial searches
- Create alias before importing document if no alias exists
- Fixed exception when :per_page option is a string
- Check `RAILS_ENV` if `RACK_ENV` is not set
## 0.2.4 (2013-08-20)
- Use `to_hash` instead of `as_json` for default `search_data` method
- Works for Mongoid 1.3
- Use one shard in test environment for consistent scores
## 0.2.3 (2013-08-16)
- Setup Travis
- Clean old indices before reindex
- Search for `*` returns all results
- Fixed pagination
- Added `similar` method
## 0.2.2 (2013-08-11)
- Clean old indices after reindex
- More expansions for fuzzy queries
## 0.2.1 (2013-08-11)
- Added Rails logger
- Only fetch ids when `load: true`
## 0.2.0 (2013-08-10)
- Added autocomplete
- Added “Did you mean” suggestions
- Added personalized searches
## 0.1.4 (2013-08-03)
- Bug fix
## 0.1.3 (2013-08-03)
- Changed edit distance to one for misspellings
- Raise errors when indexing fails
- Fixed pagination
- Fixed :include option
## 0.1.2 (2013-07-30)
- Use conversions by default
## 0.1.1 (2013-07-29)
- Renamed `_source` to `search_data`
- Renamed `searchkick_import` to `search_import`
## 0.1.0 (2013-07-28)
- Added `_source` method
- Added `index_name` option
## 0.0.2 (2013-07-17)
- Added `conversions` option
## 0.0.1 (2013-07-14)
- First release
================================================
FILE: Gemfile
================================================
source "https://rubygems.org"
gemspec
gem "rake"
gem "minitest"
gem "sqlite3", platform: :ruby
gem "sqlite3-ffi", platform: :jruby
gem "activerecord", "~> 8.1.0"
gem "actionpack", "~> 8.1.0"
gem "activejob", "~> 8.1.0", require: "active_job"
gem "elasticsearch", "~> 9"
gem "redis-client"
gem "connection_pool"
gem "kaminari"
gem "gemoji-parser"
gem "parallel_tests"
gem "typhoeus", platform: :mri
gem "cgi" # for elasticsearch
================================================
FILE: LICENSE.txt
================================================
Copyright (c) 2013-2026 Andrew Kane
MIT License
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
================================================
FILE: README.md
================================================
# Searchkick
:rocket: Intelligent search made easy
**Searchkick learns what your users are looking for.** As more people search, it gets smarter and the results get better. It’s friendly for developers - and magical for your users.
Searchkick handles:
- stemming - `tomatoes` matches `tomato`
- special characters - `jalapeno` matches `jalapeño`
- extra whitespace - `dishwasher` matches `dish washer`
- misspellings - `zuchini` matches `zucchini`
- custom synonyms - `pop` matches `soda`
Plus:
- query like SQL - no need to learn a new query language
- reindex without downtime
- easily personalize results for each user
- autocomplete
- “Did you mean” suggestions
- supports many languages
- works with Active Record and Mongoid
Check out [Searchjoy](https://github.com/ankane/searchjoy) for analytics and [Autosuggest](https://github.com/ankane/autosuggest) for query suggestions
:tangerine: Battle-tested at [Instacart](https://www.instacart.com/opensource)
[](https://github.com/ankane/searchkick/actions)
## Contents
- [Getting Started](#getting-started)
- [Querying](#querying)
- [Indexing](#indexing)
- [Intelligent Search](#intelligent-search)
- [Instant Search / Autocomplete](#instant-search--autocomplete)
- [Aggregations](#aggregations)
- [Testing](#testing)
- [Deployment](#deployment)
- [Performance](#performance)
- [Advanced Search](#advanced)
- [Reference](#reference)
- [Contributing](#contributing)
Searchkick 6.0 was recently released! See [how to upgrade](#upgrading)
## Getting Started
Install [Elasticsearch](https://www.elastic.co/downloads/elasticsearch) or [OpenSearch](https://opensearch.org/downloads.html). For Homebrew, use:
```sh
brew install opensearch
brew services start opensearch
```
Add these lines to your application’s Gemfile:
```ruby
gem "searchkick"
gem "elasticsearch" # select one
gem "opensearch-ruby" # select one
```
The latest version works with Elasticsearch 8 and 9 and OpenSearch 2 and 3. For Elasticsearch 7 and OpenSearch 1, use version 5.5.2 and [this readme](https://github.com/ankane/searchkick/blob/v5.5.2/README.md).
Add `searchkick` to models you want to search.
```ruby
class Product < ApplicationRecord
searchkick
end
```
Add data to the search index.
```ruby
Product.reindex
```
And to query, use:
```ruby
products = Product.search("apples")
products.each do |product|
puts product.name
end
```
Searchkick supports the complete [Elasticsearch Search API](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-search.html) and [OpenSearch Search API](https://opensearch.org/docs/latest/opensearch/rest-api/search/). As your search becomes more advanced, we recommend you use the [search server DSL](#advanced) for maximum flexibility.
## Querying
Query like SQL
```ruby
Product.search("apples").where(in_stock: true).limit(10).offset(50)
```
Search specific fields
```ruby
fields(:name, :brand)
```
Where
```ruby
where(store_id: 1, expires_at: Time.now..)
```
[These types of filters are supported](#filtering)
Order
```ruby
order(_score: :desc) # most relevant first - default
```
[All of these sort options are supported](https://www.elastic.co/guide/en/elasticsearch/reference/current/sort-search-results.html)
Limit / offset
```ruby
limit(20).offset(40)
```
Select
```ruby
select(:name)
```
[These source filtering options are supported](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-fields.html#source-filtering)
### Results
Searches return a `Searchkick::Relation` object. This responds like an array to most methods.
```ruby
results = Product.search("milk")
results.size
results.any?
results.each { |result| ... }
```
By default, ids are fetched from the search server and records are fetched from your database. To fetch everything from the search server, use:
```ruby
Product.search("apples").load(false)
```
Get total results
```ruby
results.total_count
```
Get the time the search took (in milliseconds)
```ruby
results.took
```
Get the full response from the search server
```ruby
results.response
```
**Note:** By default, Elasticsearch and OpenSearch [limit paging](#deep-paging) to the first 10,000 results for performance. This applies to the total count as well.
### Filtering
Equal
```ruby
where(store_id: 1)
```
Not equal
```ruby
where.not(store_id: 2)
```
Greater than (`gt`), less than (`lt`), greater than or equal (`gte`), less than or equal (`lte`)
```ruby
where(expires_at: {gt: Time.now})
```
Range
```ruby
where(orders_count: 1..10)
```
In
```ruby
where(aisle_id: [25, 30])
```
Not in
```ruby
where.not(aisle_id: [25, 30])
```
Contains all
```ruby
where(user_ids: {all: [1, 3]})
```
Like
```ruby
where(category: {like: "%frozen%"})
```
Case-insensitive like
```ruby
where(category: {ilike: "%frozen%"})
```
Regular expression
```ruby
where(category: /frozen .+/)
```
Prefix
```ruby
where(category: {prefix: "frozen"})
```
Exists
```ruby
where(store_id: {exists: true})
```
Combine filters with OR
```ruby
where(_or: [{in_stock: true}, {backordered: true}])
```
### Boosting
Boost important fields
```ruby
fields("title^10", "description")
```
Boost by the value of a field (field must be numeric)
```ruby
boost_by(:orders_count) # give popular documents a little boost
boost_by(orders_count: {factor: 10}) # default factor is 1
```
Boost matching documents
```ruby
boost_where(user_id: 1)
boost_where(user_id: {value: 1, factor: 100}) # default factor is 1000
boost_where(user_id: [{value: 1, factor: 100}, {value: 2, factor: 200}])
```
Boost by recency
```ruby
boost_by_recency(created_at: {scale: "7d", decay: 0.5})
```
You can also boost by:
- [Conversions](#intelligent-search)
- [Distance](#boost-by-distance)
### Get Everything
Use a `*` for the query.
```ruby
Product.search("*")
```
### Pagination
Plays nicely with kaminari and will_paginate.
```ruby
# controller
@products = Product.search("milk").page(params[:page]).per_page(20)
```
View with kaminari
```erb
<%= paginate @products %>
```
View with will_paginate
```erb
<%= will_paginate @products %>
```
### Partial Matches
By default, results must match all words in the query.
```ruby
Product.search("fresh honey") # fresh AND honey
```
To change this, use:
```ruby
Product.search("fresh honey").operator("or") # fresh OR honey
```
By default, results must match the entire word - `back` will not match `backpack`. You can change this behavior with:
```ruby
class Product < ApplicationRecord
searchkick word_start: [:name]
end
```
And to search (after you reindex):
```ruby
Product.search("back").fields(:name).match(:word_start)
```
Available options are:
Option | Matches | Example
--- | --- | ---
`:word` | entire word | `apple` matches `apple`
`:word_start` | start of word | `app` matches `apple`
`:word_middle` | any part of word | `ppl` matches `apple`
`:word_end` | end of word | `ple` matches `apple`
`:text_start` | start of text | `gre` matches `green apple`, `app` does not match
`:text_middle` | any part of text | `een app` matches `green apple`
`:text_end` | end of text | `ple` matches `green apple`, `een` does not match
The default is `:word`. The most matches will happen with `:word_middle`.
To specify different matching for different fields, use:
```ruby
Product.search(query).fields({name: :word_start}, {brand: :word_middle})
```
### Exact Matches
To match a field exactly (case-sensitive), use:
```ruby
Product.search(query).fields({name: :exact})
```
### Phrase Matches
To only match the exact order, use:
```ruby
Product.search("fresh honey").match(:phrase)
```
### Stemming and Language
Searchkick stems words by default for better matching. `apple` and `apples` both stem to `appl`, so searches for either term will have the same matches.
Searchkick defaults to English for stemming. To change this, use:
```ruby
class Product < ApplicationRecord
searchkick language: "german"
end
```
See the [list of languages](https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-stemmer-tokenfilter.html#analysis-stemmer-tokenfilter-configure-parms). A few languages require plugins:
- `chinese` - [analysis-ik plugin](https://github.com/medcl/elasticsearch-analysis-ik)
- `chinese2` - [analysis-smartcn plugin](https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-smartcn.html)
- `japanese` - [analysis-kuromoji plugin](https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-kuromoji.html)
- `korean` - [analysis-openkoreantext plugin](https://github.com/open-korean-text/elasticsearch-analysis-openkoreantext)
- `korean2` - [analysis-nori plugin](https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-nori.html)
- `polish` - [analysis-stempel plugin](https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-stempel.html)
- `ukrainian` - [analysis-ukrainian plugin](https://www.elastic.co/guide/en/elasticsearch/plugins/7.4/analysis-ukrainian.html)
- `vietnamese` - [analysis-vietnamese plugin](https://github.com/duydo/elasticsearch-analysis-vietnamese)
You can also use a Hunspell dictionary for stemming.
```ruby
class Product < ApplicationRecord
searchkick stemmer: {type: "hunspell", locale: "en_US"}
end
```
Disable stemming with:
```ruby
class Image < ApplicationRecord
searchkick stem: false
end
```
Exclude certain words from stemming with:
```ruby
class Image < ApplicationRecord
searchkick stem_exclusion: ["apples"]
end
```
Or change how words are stemmed:
```ruby
class Image < ApplicationRecord
searchkick stemmer_override: ["apples => other"]
end
```
### Synonyms
```ruby
class Product < ApplicationRecord
searchkick search_synonyms: [["pop", "soda"], ["burger", "hamburger"]]
end
```
Call `Product.reindex` after changing synonyms. Synonyms are applied at search time before stemming, and can be a single word or multiple words.
For directional synonyms, use:
```ruby
search_synonyms: ["lightbulb => halogenlamp"]
```
### Dynamic Synonyms
The above approach works well when your synonym list is static, but in practice, this is often not the case. When you analyze search conversions, you often want to add new synonyms without a full reindex. We recommend placing synonyms in a file on the search server (in the `config` directory). This allows you to reload synonyms without reindexing.
```txt
pop, soda
burger, hamburger
```
Then use:
```ruby
class Product < ApplicationRecord
searchkick search_synonyms: "synonyms.txt"
end
```
And reload with:
```ruby
Product.search_index.reload_synonyms
```
### Misspellings
By default, Searchkick handles misspelled queries by returning results with an [edit distance](https://en.wikipedia.org/wiki/Levenshtein_distance) of one.
You can change this with:
```ruby
Product.search("zucini").misspellings(edit_distance: 2) # zucchini
```
To prevent poor precision and improve performance for correctly spelled queries (which should be a majority for most applications), Searchkick can first perform a search without misspellings, and if there are too few results, perform another with them.
```ruby
Product.search("zuchini").misspellings(below: 5)
```
If there are fewer than 5 results, a 2nd search is performed with misspellings enabled. The result of this query is returned.
Turn off misspellings with:
```ruby
Product.search("zuchini").misspellings(false) # no zucchini
```
Specify which fields can include misspellings with:
```ruby
Product.search("zucini").fields(:name, :color).misspellings(fields: [:name])
```
> When doing this, you must also specify fields to search
### Bad Matches
If a user searches `butter`, they may also get results for `peanut butter`. To prevent this, use:
```ruby
Product.search("butter").exclude("peanut butter")
```
You can map queries and terms to exclude with:
```ruby
exclude_queries = {
"butter" => ["peanut butter"],
"cream" => ["ice cream", "whipped cream"]
}
Product.search(query).exclude(exclude_queries[query])
```
You can demote results by boosting by a factor less than one:
```ruby
Product.search("butter").boost_where(category: {value: "pantry", factor: 0.5})
```
### Emoji
Search :ice_cream::cake: and get `ice cream cake`!
Add this line to your application’s Gemfile:
```ruby
gem "gemoji-parser"
```
And use:
```ruby
Product.search("🍨🍰").emoji
```
## Indexing
Control what data is indexed with the `search_data` method. Call `Product.reindex` after changing this method.
```ruby
class Product < ApplicationRecord
belongs_to :department
def search_data
{
name: name,
department_name: department.name,
on_sale: sale_price.present?
}
end
end
```
Searchkick uses `find_in_batches` to import documents. To eager load associations, use the `search_import` scope.
```ruby
class Product < ApplicationRecord
scope :search_import, -> { includes(:department) }
end
```
By default, all records are indexed. To control which records are indexed, use the `should_index?` method.
```ruby
class Product < ApplicationRecord
def should_index?
active # only index active records
end
end
```
If a reindex is interrupted, you can resume it with:
```ruby
Product.reindex(resume: true)
```
For large data sets, try [parallel reindexing](#parallel-reindexing).
### To Reindex, or Not to Reindex
#### Reindex
- when you install or upgrade searchkick
- change the `search_data` method
- change the `searchkick` method
#### No need to reindex
- app starts
### Strategies
There are four strategies for keeping the index synced with your database.
1. Inline (default)
Anytime a record is inserted, updated, or deleted
2. Asynchronous
Use background jobs for better performance
```ruby
class Product < ApplicationRecord
searchkick callbacks: :async
end
```
Jobs are added to a queue named `searchkick`.
3. Queuing
Push ids of records that need updated to a queue and reindex in the background in batches. This is more performant than the asynchronous method, which updates records individually. See [how to set up](#queuing).
4. Manual
Turn off automatic syncing
```ruby
class Product < ApplicationRecord
searchkick callbacks: false
end
```
And reindex a record or relation manually.
```ruby
product.reindex
# or
store.products.reindex(mode: :async)
```
You can also do bulk updates.
```ruby
Searchkick.callbacks(:bulk) do
Product.find_each(&:update_fields)
end
```
Or temporarily skip updates.
```ruby
Searchkick.callbacks(false) do
Product.find_each(&:update_fields)
end
```
Or override the model’s strategy.
```ruby
product.reindex(mode: :async) # :inline or :queue
```
### Associations
Data is **not** automatically synced when an association is updated. If this is desired, add a callback to reindex:
```ruby
class Image < ApplicationRecord
belongs_to :product
after_commit :reindex_product
def reindex_product
product.reindex
end
end
```
### Default Scopes
If you have a default scope that filters records, use the `should_index?` method to exclude them from indexing:
```ruby
class Product < ApplicationRecord
default_scope { where(deleted_at: nil) }
def should_index?
deleted_at.nil?
end
end
```
If you want to index and search filtered records, set:
```ruby
class Product < ApplicationRecord
searchkick unscope: true
end
```
## Intelligent Search
The best starting point to improve your search **by far** is to track searches and conversions. [Searchjoy](https://github.com/ankane/searchjoy) makes it easy.
```ruby
Product.search("apple").track(user_id: current_user.id)
```
[See the docs](https://github.com/ankane/searchjoy) for how to install and use. Focus on top searches with a low conversion rate.
Searchkick can then use the conversion data to learn what users are looking for. If a user searches for “ice cream” and adds Ben & Jerry’s Chunky Monkey to the cart (our conversion metric at Instacart), that item gets a little more weight for similar searches. This can make a huge difference on the quality of your search.
Add conversion data with:
```ruby
class Product < ApplicationRecord
has_many :conversions, class_name: "Searchjoy::Conversion", as: :convertable
has_many :searches, class_name: "Searchjoy::Search", through: :conversions
searchkick conversions_v2: [:conversions] # name of field
def search_data
{
name: name,
conversions: searches.group(:query).distinct.count(:user_id)
# {"ice cream" => 234, "chocolate" => 67, "cream" => 2}
}
end
end
```
Reindex and set up a cron job to add new conversions daily. For zero downtime deployment, temporarily set `conversions_v2(false)` in your search calls until the data is reindexed.
### Performant Conversions
A performant way to do conversions is to cache them to prevent N+1 queries. For Postgres, create a migration with:
```ruby
add_column :products, :search_conversions, :jsonb
```
For MySQL, use `:json`, and for others, use `:text` with a [JSON serializer](https://api.rubyonrails.org/classes/ActiveRecord/AttributeMethods/Serialization/ClassMethods.html).
Next, update your model. Create a separate method for conversion data so you can use [partial reindexing](#partial-reindexing).
```ruby
class Product < ApplicationRecord
searchkick conversions_v2: [:conversions]
def search_data
{
name: name,
category: category
}.merge(conversions_data)
end
def conversions_data
{
conversions: search_conversions || {}
}
end
end
```
Deploy and reindex your data. For zero downtime deployment, temporarily set `conversions_v2(false)` in your search calls until the data is reindexed.
```ruby
Product.reindex
```
Then, create a job to update the conversions column and reindex records with new conversions. Here’s one you can use for Searchjoy:
```ruby
class UpdateConversionsJob < ApplicationJob
def perform(class_name, since: nil, update: true, reindex: true)
model = Searchkick.load_model(class_name)
# get records that have a recent conversion
recently_converted_ids =
Searchjoy::Conversion.where(convertable_type: class_name, created_at: since..)
.order(:convertable_id).distinct.pluck(:convertable_id)
# split into batches
recently_converted_ids.in_groups_of(1000, false) do |ids|
if update
# fetch conversions
conversions =
Searchjoy::Conversion.where(convertable_id: ids, convertable_type: class_name)
.joins(:search).where.not(searchjoy_searches: {user_id: nil})
.group(:convertable_id, :query).distinct.count(:user_id)
# group by record
conversions_by_record = {}
conversions.each do |(id, query), count|
(conversions_by_record[id] ||= {})[query] = count
end
# update conversions column
model.transaction do
conversions_by_record.each do |id, conversions|
model.where(id: id).update_all(search_conversions: conversions)
end
end
end
if reindex
# reindex conversions data
model.where(id: ids).reindex(:conversions_data, ignore_missing: true)
end
end
end
end
```
Run the job:
```ruby
UpdateConversionsJob.perform_now("Product")
```
And set it up to run daily.
```ruby
UpdateConversionsJob.perform_later("Product", since: 1.day.ago)
```
## Personalized Results
Order results differently for each user. For example, show a user’s previously purchased products before other results.
```ruby
class Product < ApplicationRecord
def search_data
{
name: name,
orderer_ids: orders.pluck(:user_id) # boost this product for these users
}
end
end
```
Reindex and search with:
```ruby
Product.search("milk").boost_where(orderer_ids: current_user.id)
```
## Instant Search / Autocomplete
Autocomplete predicts what a user will type, making the search experience faster and easier.

**Note:** To autocomplete on search terms rather than results, check out [Autosuggest](https://github.com/ankane/autosuggest).
**Note 2:** If you only have a few thousand records, don’t use Searchkick for autocomplete. It’s *much* faster to load all records into JavaScript and autocomplete there (eliminates network requests).
First, specify which fields use this feature. This is necessary since autocomplete can increase the index size significantly, but don’t worry - this gives you blazing fast queries.
```ruby
class Movie < ApplicationRecord
searchkick word_start: [:title, :director]
end
```
Reindex and search with:
```ruby
Movie.search("jurassic pa").fields(:title).match(:word_start)
```
Use a front-end library like [typeahead.js](https://twitter.github.io/typeahead.js/) to show the results.
#### Here’s how to make it work with Rails
First, add a route and controller action.
```ruby
class MoviesController < ApplicationController
def autocomplete
render json: Movie.search(params[:query]).fields("title^5", "director")
.match(:word_start).limit(10).load(false).misspellings(below: 5).map(&:title)
end
end
```
**Note:** Use `load(false)` and `misspellings(below: n)` (or `misspellings(false)`) for best performance.
Then add the search box and JavaScript code to a view.
```html
```
## Suggestions

```ruby
class Product < ApplicationRecord
searchkick suggest: [:name] # fields to generate suggestions
end
```
Reindex and search with:
```ruby
products = Product.search("peantu butta").suggest
products.suggestions # ["peanut butter"]
```
## Aggregations
[Aggregations](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations.html) provide aggregated search data.

```ruby
products = Product.search("chuck taylor").aggs(:product_type, :gender, :brand)
products.aggs
```
By default, `where` conditions apply to aggregations.
```ruby
Product.search("wingtips").where(color: "brandy").aggs(:size)
# aggregations for brandy wingtips are returned
```
Change this with:
```ruby
Product.search("wingtips").where(color: "brandy").aggs(:size).smart_aggs(false)
# aggregations for all wingtips are returned
```
Set `where` conditions for each aggregation separately with:
```ruby
Product.search("wingtips").aggs(size: {where: {color: "brandy"}})
```
Limit
```ruby
Product.search("apples").aggs(store_id: {limit: 10})
```
Order
```ruby
Product.search("wingtips").aggs(color: {order: {"_key" => "asc"}}) # alphabetically
```
[All of these options are supported](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-terms-aggregation.html#search-aggregations-bucket-terms-aggregation-order)
Ranges
```ruby
price_ranges = [{to: 20}, {from: 20, to: 50}, {from: 50}]
Product.search("*").aggs(price: {ranges: price_ranges})
```
Minimum document count
```ruby
Product.search("apples").aggs(store_id: {min_doc_count: 2})
```
Script support
```ruby
Product.search("*").aggs(color: {script: {source: "'Color: ' + _value"}})
```
Date histogram
```ruby
Product.search("pear").aggs(products_per_year: {date_histogram: {field: :created_at, interval: :year}})
```
For other aggregation types, including sub-aggregations, use `body_options`:
```ruby
Product.search("orange").body_options(aggs: {price: {histogram: {field: :price, interval: 10}}})
```
## Highlight
Specify which fields to index with highlighting.
```ruby
class Band < ApplicationRecord
searchkick highlight: [:name]
end
```
Highlight the search query in the results.
```ruby
bands = Band.search("cinema").highlight
```
View the highlighted fields with:
```ruby
bands.with_highlights.each do |band, highlights|
highlights[:name] # "Two Door Cinema Club"
end
```
To change the tag, use:
```ruby
Band.search("cinema").highlight(tag: "")
```
To highlight and search different fields, use:
```ruby
Band.search("cinema").fields(:name).highlight(fields: [:description])
```
By default, the entire field is highlighted. To get small snippets instead, use:
```ruby
bands = Band.search("cinema").highlight(fragment_size: 20)
bands.with_highlights(multiple: true).each do |band, highlights|
highlights[:name].join(" and ")
end
```
Additional options can be specified for each field:
```ruby
Band.search("cinema").fields(:name).highlight(fields: {name: {fragment_size: 200}})
```
You can find available highlight options in the [Elasticsearch](https://www.elastic.co/guide/en/elasticsearch/reference/current/highlighting.html) or [OpenSearch](https://opensearch.org/docs/latest/search-plugins/searching-data/highlight/) reference.
## Similar Items
Find similar items
```ruby
product = Product.first
product.similar.fields(:name).where(size: "12 oz")
```
## Geospatial Searches
```ruby
class Restaurant < ApplicationRecord
searchkick locations: [:location]
def search_data
attributes.merge(location: {lat: latitude, lon: longitude})
end
end
```
Reindex and search with:
```ruby
Restaurant.search("pizza").where(location: {near: {lat: 37, lon: -114}, within: "100mi"}) # or 160km
```
Bounded by a box
```ruby
Restaurant.search("sushi").where(location: {top_left: {lat: 38, lon: -123}, bottom_right: {lat: 37, lon: -122}})
```
**Note:** `top_right` and `bottom_left` also work
Bounded by a polygon
```ruby
Restaurant.search("dessert").where(location: {geo_polygon: {points: [{lat: 38, lon: -123}, {lat: 39, lon: -123}, {lat: 37, lon: 122}]}})
```
### Boost By Distance
Boost results by distance - closer results are boosted more
```ruby
Restaurant.search("noodles").boost_by_distance(location: {origin: {lat: 37, lon: -122}})
```
Also supports [additional options](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-function-score-query.html#function-decay)
```ruby
Restaurant.search("wings").boost_by_distance(location: {origin: {lat: 37, lon: -122}, function: "linear", scale: "30mi", decay: 0.5})
```
### Geo Shapes
You can also index and search geo shapes.
```ruby
class Restaurant < ApplicationRecord
searchkick geo_shape: [:bounds]
def search_data
attributes.merge(
bounds: {
type: "envelope",
coordinates: [{lat: 4, lon: 1}, {lat: 2, lon: 3}]
}
)
end
end
```
See the [Elasticsearch documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/geo-shape.html) for details.
Find shapes intersecting with the query shape
```ruby
Restaurant.search("soup").where(bounds: {geo_shape: {type: "polygon", coordinates: [[{lat: 38, lon: -123}, ...]]}})
```
Falling entirely within the query shape
```ruby
Restaurant.search("salad").where(bounds: {geo_shape: {type: "circle", relation: "within", coordinates: {lat: 38, lon: -123}, radius: "1km"}})
```
Not touching the query shape
```ruby
Restaurant.search("burger").where(bounds: {geo_shape: {type: "envelope", relation: "disjoint", coordinates: [{lat: 38, lon: -123}, {lat: 37, lon: -122}]}})
```
## Inheritance
Searchkick supports single table inheritance.
```ruby
class Dog < Animal
end
```
In your parent model, set:
```ruby
class Animal < ApplicationRecord
searchkick inheritance: true
end
```
The parent and child model can both reindex.
```ruby
Animal.reindex
Dog.reindex # equivalent, all animals reindexed
```
And to search, use:
```ruby
Animal.search("*") # all animals
Dog.search("*") # just dogs
Animal.search("*").type(Cat, Dog) # just cats and dogs
```
**Notes:**
1. The `suggest` option retrieves suggestions from the parent at the moment.
```ruby
Dog.search("airbudd").suggest # suggestions for all animals
```
2. This relies on a `type` field that is automatically added to the indexed document. Be wary of defining your own `type` field in `search_data`, as it will take precedence.
## Debugging Queries
To help with debugging queries, you can use:
```ruby
Product.search("soap").debug
```
This prints useful info to `stdout`.
See how the search server scores your queries with:
```ruby
Product.search("soap").explain.response
```
See how the search server tokenizes your queries with:
```ruby
Product.search_index.tokens("Dish Washer Soap", analyzer: "searchkick_index")
# ["dish", "dishwash", "washer", "washersoap", "soap"]
Product.search_index.tokens("dishwasher soap", analyzer: "searchkick_search")
# ["dishwashersoap"] - no match
Product.search_index.tokens("dishwasher soap", analyzer: "searchkick_search2")
# ["dishwash", "soap"] - match!!
```
Partial matches
```ruby
Product.search_index.tokens("San Diego", analyzer: "searchkick_word_start_index")
# ["s", "sa", "san", "d", "di", "die", "dieg", "diego"]
Product.search_index.tokens("dieg", analyzer: "searchkick_word_search")
# ["dieg"] - match!!
```
See the [complete list of analyzers](lib/searchkick/index_options.rb#L36).
## Testing
As you iterate on your search, it’s a good idea to add tests.
For performance, only enable Searchkick callbacks for the tests that need it.
### Rails
Add to your `test/test_helper.rb`:
```ruby
module ActiveSupport
class TestCase
parallelize_setup do |worker|
Searchkick.index_suffix = worker
# reindex models for parallel tests
Product.reindex
end
end
end
# reindex models for non-parallel tests
Product.reindex
# and disable callbacks
Searchkick.disable_callbacks
```
And use:
```ruby
class ProductTest < ActiveSupport::TestCase
setup do
Searchkick.enable_callbacks
end
teardown do
Searchkick.disable_callbacks
end
test "search" do
Product.create!(name: "Apple")
Product.search_index.refresh
assert_equal ["Apple"], Product.search("apple").map(&:name)
end
end
```
### Minitest
Add to your `test/test_helper.rb`:
```ruby
# reindex models
Product.reindex
# and disable callbacks
Searchkick.disable_callbacks
```
And use:
```ruby
class ProductTest < Minitest::Test
def setup
Searchkick.enable_callbacks
end
def teardown
Searchkick.disable_callbacks
end
def test_search
Product.create!(name: "Apple")
Product.search_index.refresh
assert_equal ["Apple"], Product.search("apple").map(&:name)
end
end
```
### RSpec
Add to your `spec/spec_helper.rb`:
```ruby
RSpec.configure do |config|
config.before(:suite) do
# reindex models
Product.reindex
# and disable callbacks
Searchkick.disable_callbacks
end
config.around(:each, search: true) do |example|
Searchkick.callbacks(nil) do
example.run
end
end
end
```
And use:
```ruby
describe Product, search: true do
it "searches" do
Product.create!(name: "Apple")
Product.search_index.refresh
assert_equal ["Apple"], Product.search("apple").map(&:name)
end
end
```
### Factory Bot
Define a trait for each model:
```ruby
FactoryBot.define do
factory :product do
trait :reindex do
after(:create) do |product, _|
product.reindex(refresh: true)
end
end
end
end
```
And use:
```ruby
FactoryBot.create(:product, :reindex)
```
### GitHub Actions
Check out [setup-elasticsearch](https://github.com/ankane/setup-elasticsearch) for an easy way to install Elasticsearch:
```yml
- uses: ankane/setup-elasticsearch@v1
```
And [setup-opensearch](https://github.com/ankane/setup-opensearch) for an easy way to install OpenSearch:
```yml
- uses: ankane/setup-opensearch@v1
```
## Deployment
For the search server, Searchkick uses `ENV["ELASTICSEARCH_URL"]` for Elasticsearch and `ENV["OPENSEARCH_URL"]` for OpenSearch. This defaults to `http://localhost:9200`.
- [Elastic Cloud](#elastic-cloud)
- [Amazon OpenSearch Service](#amazon-opensearch-service)
- [Heroku](#heroku)
- [Self-Hosted and Other](#self-hosted-and-other)
### Elastic Cloud
Create an initializer `config/initializers/elasticsearch.rb` with:
```ruby
ENV["ELASTICSEARCH_URL"] = "https://user:password@host:port"
```
Then deploy and reindex:
```sh
rake searchkick:reindex:all
```
### Amazon OpenSearch Service
Create an initializer `config/initializers/opensearch.rb` with:
```ruby
ENV["OPENSEARCH_URL"] = "https://es-domain-1234.us-east-1.es.amazonaws.com:443"
```
To use signed requests, include in your Gemfile:
```ruby
gem "faraday_middleware-aws-sigv4"
```
and add to your initializer:
```ruby
Searchkick.aws_credentials = {
access_key_id: ENV["AWS_ACCESS_KEY_ID"],
secret_access_key: ENV["AWS_SECRET_ACCESS_KEY"],
region: "us-east-1"
}
```
Then deploy and reindex:
```sh
rake searchkick:reindex:all
```
### Heroku
Choose an add-on: [Bonsai](https://elements.heroku.com/addons/bonsai), [SearchBox](https://elements.heroku.com/addons/searchbox), or [Elastic Cloud](https://elements.heroku.com/addons/foundelasticsearch).
For Elasticsearch on Bonsai:
```sh
heroku addons:create bonsai
heroku config:set ELASTICSEARCH_URL=`heroku config:get BONSAI_URL`
```
For OpenSearch on Bonsai:
```sh
heroku addons:create bonsai --engine=opensearch
heroku config:set OPENSEARCH_URL=`heroku config:get BONSAI_URL`
```
For SearchBox:
```sh
heroku addons:create searchbox:starter
heroku config:set ELASTICSEARCH_URL=`heroku config:get SEARCHBOX_URL`
```
For Elastic Cloud (previously Found):
```sh
heroku addons:create foundelasticsearch
heroku addons:open foundelasticsearch
```
Visit the Shield page and reset your password. You’ll need to add the username and password to your url. Get the existing url with:
```sh
heroku config:get FOUNDELASTICSEARCH_URL
```
And add `elastic:password@` right after `https://` and add port `9243` at the end:
```sh
heroku config:set ELASTICSEARCH_URL=https://elastic:password@12345.us-east-1.aws.found.io:9243
```
Then deploy and reindex:
```sh
heroku run rake searchkick:reindex:all
```
### Self-Hosted and Other
Create an initializer with:
```ruby
ENV["ELASTICSEARCH_URL"] = "https://user:password@host:port"
# or
ENV["OPENSEARCH_URL"] = "https://user:password@host:port"
```
Then deploy and reindex:
```sh
rake searchkick:reindex:all
```
### Data Protection
We recommend encrypting data at rest and in transit (even inside your own network). This is especially important if you send [personal data](https://en.wikipedia.org/wiki/Personally_identifiable_information) of your users to the search server.
Bonsai, Elastic Cloud, and Amazon OpenSearch Service all support encryption at rest and HTTPS.
### Automatic Failover
Create an initializer with multiple hosts:
```ruby
ENV["ELASTICSEARCH_URL"] = "https://user:password@host1,https://user:password@host2"
# or
ENV["OPENSEARCH_URL"] = "https://user:password@host1,https://user:password@host2"
```
### Client Options
Create an initializer with:
```ruby
Searchkick.client_options[:reload_connections] = true
```
See the docs for [Elasticsearch](https://www.elastic.co/guide/en/elasticsearch/client/ruby-api/current/advanced-config.html) or [Opensearch](https://rubydoc.info/gems/opensearch-transport#configuration) for a complete list of options.
### Lograge
Add the following to `config/environments/production.rb`:
```ruby
config.lograge.custom_options = lambda do |event|
options = {}
options[:search] = event.payload[:searchkick_runtime] if event.payload[:searchkick_runtime].to_f > 0
options
end
```
See [Production Rails](https://github.com/ankane/production_rails) for other good practices.
## Performance
### Persistent HTTP Connections
Significantly increase performance with persistent HTTP connections. Add [Typhoeus](https://github.com/typhoeus/typhoeus) to your Gemfile and it’ll automatically be used.
```ruby
gem "typhoeus"
```
To reduce log noise, create an initializer with:
```ruby
Ethon.logger = Logger.new(nil)
```
### Searchable Fields
By default, all string fields are searchable (can be used in `fields` option). Speed up indexing and reduce index size by only making some fields searchable.
```ruby
class Product < ApplicationRecord
searchkick searchable: [:name]
end
```
### Filterable Fields
By default, all string fields are filterable (can be used in `where` option). Speed up indexing and reduce index size by only making some fields filterable.
```ruby
class Product < ApplicationRecord
searchkick filterable: [:brand]
end
```
**Note:** Non-string fields are always filterable and should not be passed to this option.
### Parallel Reindexing
For large data sets, you can use background jobs to parallelize reindexing.
```ruby
Product.reindex(mode: :async)
# {index_name: "products_production_20250111210018065"}
```
Once the jobs complete, promote the new index with:
```ruby
Product.search_index.promote(index_name)
```
You can optionally track the status with Redis:
```ruby
Searchkick.redis = Redis.new
```
And use:
```ruby
Searchkick.reindex_status(index_name)
```
You can also have Searchkick wait for reindexing to complete
```ruby
Product.reindex(mode: :async, wait: true)
```
You can use your background job framework to control concurrency. For Solid Queue, create an initializer with:
```ruby
module SearchkickBulkReindexConcurrency
extend ActiveSupport::Concern
included do
limits_concurrency to: 3, key: ""
end
end
Rails.application.config.after_initialize do
Searchkick::BulkReindexJob.include(SearchkickBulkReindexConcurrency)
end
```
This will allow only 3 jobs to run at once.
### Refresh Interval
You can specify a longer refresh interval while reindexing to increase performance.
```ruby
Product.reindex(mode: :async, refresh_interval: "30s")
```
**Note:** This only makes a noticeable difference with parallel reindexing.
When promoting, have it restored to the value in your mapping (defaults to `1s`).
```ruby
Product.search_index.promote(index_name, update_refresh_interval: true)
```
### Queuing
Push ids of records needing reindexing to a queue and reindex in bulk for better performance. First, set up Redis in an initializer. We recommend using [connection_pool](https://github.com/mperham/connection_pool).
```ruby
Searchkick.redis = ConnectionPool.new { Redis.new }
```
And ask your models to queue updates.
```ruby
class Product < ApplicationRecord
searchkick callbacks: :queue
end
```
Then, set up a background job to run.
```ruby
Searchkick::ProcessQueueJob.perform_later(class_name: "Product")
```
You can check the queue length with:
```ruby
Product.search_index.reindex_queue.length
```
For more tips, check out [Keeping Elasticsearch in Sync](https://www.elastic.co/blog/found-keeping-elasticsearch-in-sync).
### Routing
Searchkick supports [routing](https://www.elastic.co/blog/customizing-your-document-routing), which can significantly speed up searches.
```ruby
class Business < ApplicationRecord
searchkick routing: true
def search_routing
city_id
end
end
```
Reindex and search with:
```ruby
Business.search("ice cream").routing(params[:city_id])
```
### Partial Reindexing
Reindex a subset of attributes to reduce time spent generating search data and cut down on network traffic.
```ruby
class Product < ApplicationRecord
def search_data
{
name: name,
category: category
}.merge(prices_data)
end
def prices_data
{
price: price,
sale_price: sale_price
}
end
end
```
And use:
```ruby
Product.reindex(:prices_data)
```
Ignore errors for missing documents with:
```ruby
Product.reindex(:prices_data, ignore_missing: true)
```
## Advanced
Searchkick makes it easy to use the Elasticsearch or OpenSearch DSL on its own.
### Advanced Mapping
Create a custom mapping:
```ruby
class Product < ApplicationRecord
searchkick mappings: {
properties: {
name: {type: "keyword"}
}
}
end
```
**Note:** If you use a custom mapping, you'll need to use [custom searching](#advanced-search) as well.
To keep the mappings and settings generated by Searchkick, use:
```ruby
class Product < ApplicationRecord
searchkick merge_mappings: true, mappings: {...}
end
```
### Advanced Search
And use the `body` option to search:
```ruby
products = Product.search.body(query: {match: {name: "milk"}})
```
View the response with:
```ruby
products.response
```
To modify the query generated by Searchkick, use:
```ruby
products = Product.search("milk").body_options(min_score: 1)
```
or
```ruby
products =
Product.search("apples") do |body|
body[:min_score] = 1
end
```
### Client
To access the `Elasticsearch::Client` or `OpenSearch::Client` directly, use:
```ruby
Searchkick.client
```
## Multi Search
To batch search requests for performance, use:
```ruby
products = Product.search("snacks")
coupons = Coupon.search("snacks")
Searchkick.multi_search([products, coupons])
```
Then use `products` and `coupons` as typical results.
**Note:** Errors are not raised as with single requests. Use the `error` method on each query to check for errors.
## Multiple Models
Search across multiple models with:
```ruby
Searchkick.search("milk").models(Product, Category)
```
Boost specific models with:
```ruby
indices_boost(Category => 2, Product => 1)
```
## Multi-Tenancy
Check out [this great post](https://www.tiagoamaro.com.br/2014/12/11/multi-tenancy-with-searchkick/) on the [Apartment](https://github.com/influitive/apartment) gem. Follow a similar pattern if you use another gem.
## Scroll API
Searchkick also supports the [scroll API](https://www.elastic.co/guide/en/elasticsearch/reference/current/paginate-search-results.html#scroll-search-results). Scrolling is not intended for real time user requests, but rather for processing large amounts of data.
```ruby
Product.search("*").scroll("1m") do |batch|
# process batch ...
end
```
You can also scroll batches manually.
```ruby
products = Product.search("*").scroll("1m")
while products.any?
# process batch ...
products = products.scroll
end
products.clear_scroll
```
## Deep Paging
By default, Elasticsearch and OpenSearch limit paging to the first 10,000 results. [Here’s why](https://www.elastic.co/guide/en/elasticsearch/guide/current/pagination.html). We don’t recommend changing this, but if you really need all results, you can use:
```ruby
class Product < ApplicationRecord
searchkick deep_paging: true
end
```
If you just need an accurate total count, you can instead use:
```ruby
Product.search("pears").body_options(track_total_hits: true)
```
## Nested Data
To query nested data, use dot notation.
```ruby
Product.search("san").fields("store.city").where("store.zip_code" => 12345)
```
## Nearest Neighbor Search
*Available for Elasticsearch 8.6+ and OpenSearch 2.4+*
```ruby
class Product < ApplicationRecord
searchkick knn: {embedding: {dimensions: 3, distance: "cosine"}}
end
```
Also supports `euclidean` and `inner_product`
Reindex and search with:
```ruby
Product.search.knn(field: :embedding, vector: [1, 2, 3]).limit(10)
```
### HNSW Options
Nearest neighbor search uses [HNSW](https://en.wikipedia.org/wiki/Hierarchical_navigable_small_world) for indexing.
Specify `m` and `ef_construction`
```ruby
class Product < ApplicationRecord
searchkick knn: {embedding: {dimensions: 3, distance: "cosine", m: 16, ef_construction: 100}}
end
```
Specify `ef_search`
```ruby
Product.search.knn(field: :embedding, vector: [1, 2, 3], ef_search: 40).limit(10)
```
## Semantic Search
First, add [nearest neighbor search](#nearest-neighbor-search) to your model
```ruby
class Product < ApplicationRecord
searchkick knn: {embedding: {dimensions: 768, distance: "cosine"}}
end
```
Generate an embedding for each record (you can use an external service or a library like [Informers](https://github.com/ankane/informers))
```ruby
embed = Informers.pipeline("embedding", "Snowflake/snowflake-arctic-embed-m-v1.5")
embed_options = {model_output: "sentence_embedding", pooling: "none"} # specific to embedding model
Product.find_each do |product|
embedding = embed.(product.name, **embed_options)
product.update!(embedding: embedding)
end
```
For search, generate an embedding for the query (the query prefix is specific to the [embedding model](https://huggingface.co/Snowflake/snowflake-arctic-embed-m-v1.5))
```ruby
query_prefix = "Represent this sentence for searching relevant passages: "
query_embedding = embed.(query_prefix + query, **embed_options)
```
And perform nearest neighbor search
```ruby
Product.search.knn(field: :embedding, vector: query_embedding).limit(20)
```
See a [full example](examples/semantic.rb)
## Hybrid Search
Perform keyword search and semantic search in parallel
```ruby
keyword_search = Product.search(query).limit(20)
semantic_search = Product.search.knn(field: :embedding, vector: query_embedding).limit(20)
Searchkick.multi_search([keyword_search, semantic_search])
```
To combine the results, use Reciprocal Rank Fusion (RRF)
```ruby
Searchkick::Reranking.rrf(keyword_search, semantic_search).first(5)
```
Or a reranking model
```ruby
rerank = Informers.pipeline("reranking", "mixedbread-ai/mxbai-rerank-xsmall-v1")
results = (keyword_search.to_a + semantic_search.to_a).uniq
rerank.(query, results.map(&:name)).first(5).map { |v| results[v[:doc_id]] }
```
See a [full example](examples/hybrid.rb)
## Reference
Reindex one record
```ruby
product = Product.find(1)
product.reindex
```
Reindex multiple records
```ruby
Product.where(store_id: 1).reindex
```
Reindex associations
```ruby
store.products.reindex
```
Remove old indices
```ruby
Product.search_index.clean_indices
```
Use custom settings
```ruby
class Product < ApplicationRecord
searchkick settings: {number_of_shards: 3}
end
```
Use a different index name
```ruby
class Product < ApplicationRecord
searchkick index_name: "products_v2"
end
```
Use a dynamic index name
```ruby
class Product < ApplicationRecord
searchkick index_name: -> { "#{name.tableize}-#{I18n.locale}" }
end
```
Prefix the index name
```ruby
class Product < ApplicationRecord
searchkick index_prefix: "datakick"
end
```
For all models
```ruby
Searchkick.index_prefix = "datakick"
```
Use a different term for boosting by conversions
```ruby
Product.search("banana").conversions_v2(term: "organic banana")
```
Define multiple conversion fields
```ruby
class Product < ApplicationRecord
has_many :searches, class_name: "Searchjoy::Search"
searchkick conversions_v2: ["unique_conversions", "total_conversions"]
def search_data
{
name: name,
unique_conversions: searches.group(:query).distinct.count(:user_id),
total_conversions: searches.group(:query).count
}
end
end
```
And specify which to use
```ruby
Product.search("banana") # boost by both fields (default)
Product.search("banana").conversions_v2("total_conversions") # only boost by total_conversions
Product.search("banana").conversions_v2(false) # no conversion boosting
```
Change timeout
```ruby
Searchkick.timeout = 15 # defaults to 10
```
Set a lower timeout for searches
```ruby
Searchkick.search_timeout = 3
```
Change the search method name
```ruby
Searchkick.search_method_name = :lookup
```
Change the queue name
```ruby
Searchkick.queue_name = :search_reindex # defaults to :searchkick
```
Change the queue name or priority for a model
```ruby
class Product < ApplicationRecord
searchkick job_options: {queue: "critical", priority: 10}
end
```
Change the queue name or priority for a specific call
```ruby
Product.reindex(mode: :async, job_options: {queue: "critical", priority: 10})
```
Change the parent job
```ruby
Searchkick.parent_job = "ApplicationJob" # defaults to "ActiveJob::Base"
```
Eager load associations
```ruby
Product.search("milk").includes(:brand, :stores)
```
Eager load different associations by model
```ruby
Searchkick.search("*").models(Product, Store).model_includes(Product => [:store], Store => [:product])
```
Run additional scopes on results
```ruby
Product.search("milk").scope_results(->(r) { r.with_attached_images })
```
Set opaque id for slow logs
```ruby
Product.search("milk").opaque_id("some-id")
# or
Searchkick.multi_search(searches, opaque_id: "some-id")
```
Specify default fields to search
```ruby
class Product < ApplicationRecord
searchkick default_fields: [:name]
end
```
Turn off special characters
```ruby
class Product < ApplicationRecord
# A will not match Ä
searchkick special_characters: false
end
```
Turn on stemming for conversions
```ruby
class Product < ApplicationRecord
searchkick stem_conversions: true
end
```
Make search case-sensitive
```ruby
class Product < ApplicationRecord
searchkick case_sensitive: true
end
```
**Note:** If misspellings are enabled (default), results with a single character case difference will match. Turn off misspellings if this is not desired.
Change import batch size
```ruby
class Product < ApplicationRecord
searchkick batch_size: 200 # defaults to 1000
end
```
Create index without importing
```ruby
Product.reindex(import: false)
```
Use a different id
```ruby
class Product < ApplicationRecord
def search_document_id
custom_id
end
end
```
Add [request parameters](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-search.html#search-search-api-query-params) like `search_type`
```ruby
Product.search("carrots").request_params(search_type: "dfs_query_then_fetch")
```
Set options across all models
```ruby
Searchkick.model_options = {
batch_size: 200
}
```
Reindex conditionally
```ruby
class Product < ApplicationRecord
searchkick callback_options: {if: :search_data_changed?}
def search_data_changed?
previous_changes.include?("name")
end
end
```
Reindex all models - Rails only
```sh
rake searchkick:reindex:all
```
Turn on misspellings after a certain number of characters
```ruby
Product.search("api").misspellings(prefix_length: 2) # api, apt, no ahi
```
BigDecimal values are indexed as floats by default so they can be used for boosting. Convert them to strings to keep full precision.
```ruby
class Product < ApplicationRecord
def search_data
{
units: units.to_s("F")
}
end
end
```
## Gotchas
### Consistency
Elasticsearch and OpenSearch are eventually consistent, meaning it can take up to a second for a change to reflect in search. You can use the `refresh` method to have it show up immediately.
```ruby
product.save!
Product.search_index.refresh
```
### Inconsistent Scores
Due to the distributed nature of Elasticsearch and OpenSearch, you can get incorrect results when the number of documents in the index is low. You can [read more about it here](https://www.elastic.co/blog/understanding-query-then-fetch-vs-dfs-query-then-fetch). To fix this, do:
```ruby
class Product < ApplicationRecord
searchkick settings: {number_of_shards: 1}
end
```
For convenience, this is set by default in the test environment.
## Upgrading
### 6.0
Searchkick 6 brings a new query builder API:
```ruby
Product.search("apples").where(in_stock: true).limit(10).offset(50)
```
All existing options can be used as methods, or you can continue to use the existing API.
This release also significantly improves the performance of searches when using conversions. To upgrade conversions without downtime, add `conversions_v2` to your model and an additional field to `search_data`:
```ruby
class Product < ApplicationRecord
searchkick conversions: [:conversions], conversions_v2: [:conversions_v2]
def search_data
conversions = searches.group(:query).distinct.count(:user_id)
{
conversions: conversions,
conversions_v2: conversions
}
end
end
```
Reindex, then remove `conversions`:
```ruby
class Product < ApplicationRecord
searchkick conversions_v2: [:conversions_v2]
def search_data
{
conversions_v2: searches.group(:query).distinct.count(:user_id)
}
end
end
```
Other improvements include the option to ignore errors for missing documents with partial reindexing and more customization for background jobs. Check out the [changelog](https://github.com/ankane/searchkick/blob/master/CHANGELOG.md) for the full list of changes.
## History
View the [changelog](https://github.com/ankane/searchkick/blob/master/CHANGELOG.md)
## Thanks
Thanks to Karel Minarik for [Elasticsearch Ruby](https://github.com/elasticsearch/elasticsearch-ruby) and [Tire](https://github.com/karmi/retire), Jaroslav Kalistsuk for [zero downtime reindexing](https://gist.github.com/jarosan/3124884), and Alex Leschenko for [Elasticsearch autocomplete](https://github.com/leschenko/elasticsearch_autocomplete).
## Contributing
Everyone is encouraged to help improve this project. Here are a few ways you can help:
- [Report bugs](https://github.com/ankane/searchkick/issues)
- Fix bugs and [submit pull requests](https://github.com/ankane/searchkick/pulls)
- Write, clarify, or fix documentation
- Suggest or add new features
To get started with development:
```sh
git clone https://github.com/ankane/searchkick.git
cd searchkick
bundle install
bundle exec rake test
```
Feel free to open an issue to get feedback on your idea before spending too much time on it.
================================================
FILE: Rakefile
================================================
require "bundler/gem_tasks"
require "rake/testtask"
Rake::TestTask.new do |t|
t.pattern = "test/**/*_test.rb"
end
task default: :test
# to test in parallel, uncomment and run:
# rake parallel:test
# require "parallel_tests/tasks"
================================================
FILE: benchmark/Gemfile
================================================
source "https://rubygems.org"
gemspec path: "../"
gem "sqlite3"
gem "pg"
gem "activerecord", "~> 8.0.0"
gem "activejob"
gem "elasticsearch"
# gem "opensearch-ruby"
gem "redis"
gem "sidekiq"
# performance
gem "typhoeus"
gem "oj"
gem "json"
# profiling
gem "ruby-prof"
gem "allocation_stats"
gem "get_process_mem"
gem "memory_profiler"
# gem "allocation_tracer"
gem "benchmark-ips"
================================================
FILE: benchmark/index.rb
================================================
require "bundler/setup"
Bundler.require(:default)
require "active_record"
require "active_job"
require "benchmark"
require "active_support/notifications"
ActiveSupport::Notifications.subscribe "request.searchkick" do |*args|
event = ActiveSupport::Notifications::Event.new(*args)
# puts "Import: #{event.duration.round}ms"
end
# ActiveJob::Base.queue_adapter = :sidekiq
class SearchSerializer
def dump(object)
JSON.generate(object)
end
end
# Elasticsearch::API.settings[:serializer] = SearchSerializer.new
# OpenSearch::API.settings[:serializer] = SearchSerializer.new
Searchkick.redis = Redis.new
ActiveRecord.default_timezone = :utc
ActiveRecord::Base.time_zone_aware_attributes = true
ActiveRecord::Base.establish_connection adapter: "sqlite3", database: "/tmp/searchkick"
# ActiveRecord::Base.establish_connection "postgresql://localhost/searchkick_bench"
# ActiveRecord::Base.logger = Logger.new(STDOUT)
ActiveJob::Base.logger = nil
class Product < ActiveRecord::Base
searchkick batch_size: 1000
def search_data
{
name: name,
color: color,
store_id: store_id
}
end
end
if ENV["SETUP"]
total_docs = 100000
ActiveRecord::Schema.define do
create_table :products, force: :cascade do |t|
t.string :name
t.string :color
t.integer :store_id
end
end
records = []
total_docs.times do |i|
records << {
name: "Product #{i}",
color: ["red", "blue"].sample,
store_id: rand(10)
}
end
Product.insert_all(records)
puts "Imported"
end
result = nil
report = nil
stats = nil
Product.searchkick_index.delete rescue nil
GC.start
GC.disable
start_mem = GetProcessMem.new.mb
time =
Benchmark.realtime do
# result = RubyProf::Profile.profile do
# report = MemoryProfiler.report do
# stats = AllocationStats.trace do
reindex = Product.reindex #(async: true)
# p reindex
# end
# 60.times do |i|
# if reindex.is_a?(Hash)
# docs = Searchkick::Index.new(reindex[:index_name]).total_docs
# else
# docs = Product.searchkick_index.total_docs
# end
# puts "#{i}: #{docs}"
# if docs == total_docs
# break
# end
# p Searchkick.reindex_status(reindex[:index_name]) if reindex.is_a?(Hash)
# sleep(1)
# # Product.searchkick_index.refresh
# end
end
puts "Time: #{time.round(1)}s"
if result
printer = RubyProf::GraphPrinter.new(result)
printer.print(STDOUT, min_percent: 5)
end
if report
puts report.pretty_print
end
if stats
puts result.allocations(alias_paths: true).group_by(:sourcefile, :class).to_text
end
================================================
FILE: benchmark/relation.rb
================================================
require "bundler/setup"
Bundler.require(:default)
require "active_record"
class Product < ActiveRecord::Base
searchkick
end
Product.all # initial Active Record allocations
stats = AllocationStats.trace do
Product.search("apples").where(store_id: 1).where(in_stock: true).order(:name).limit(10).offset(50)
end
puts stats.allocations(alias_paths: true).to_text
================================================
FILE: benchmark/search.rb
================================================
require "bundler/setup"
Bundler.require(:default)
require "active_record"
require "benchmark/ips"
ActiveRecord.default_timezone = :utc
ActiveRecord::Base.time_zone_aware_attributes = true
ActiveRecord::Base.establish_connection adapter: "sqlite3", database: "/tmp/searchkick"
class Product < ActiveRecord::Base
searchkick batch_size: 1000
def search_data
{
name: name,
color: color,
store_id: store_id
}
end
end
if ENV["SETUP"]
total_docs = 1000000
ActiveRecord::Schema.define do
create_table :products, force: :cascade do |t|
t.string :name
t.string :color
t.integer :store_id
end
end
records = []
total_docs.times do |i|
records << {
name: "Product #{i}",
color: ["red", "blue"].sample,
store_id: rand(10)
}
end
Product.insert_all(records)
puts "Imported"
Product.reindex
puts "Reindexed"
end
query = Product.search("product", fields: [:name], where: {color: "red", store_id: 5}, limit: 10000, load: false)
pp query.body.as_json
puts
Benchmark.ips do |x|
x.report { query.dup.load }
end
================================================
FILE: examples/Gemfile
================================================
source "https://rubygems.org"
gemspec path: ".."
gem "activerecord"
gem "elasticsearch"
gem "informers"
gem "opensearch-ruby"
gem "sqlite3"
================================================
FILE: examples/hybrid.rb
================================================
require "bundler/setup"
require "active_record"
require "elasticsearch" # or "opensearch-ruby"
require "informers"
require "searchkick"
ActiveRecord::Base.establish_connection adapter: "sqlite3", database: ":memory:"
ActiveRecord::Schema.verbose = false
ActiveRecord::Schema.define do
create_table :products do |t|
t.string :name
t.json :embedding
end
end
class Product < ActiveRecord::Base
searchkick knn: {embedding: {dimensions: 768, distance: "cosine"}}
end
Product.reindex
Product.create!(name: "Breakfast cereal")
Product.create!(name: "Ice cream")
Product.create!(name: "Eggs")
embed = Informers.pipeline("embedding", "Snowflake/snowflake-arctic-embed-m-v1.5")
embed_options = {model_output: "sentence_embedding", pooling: "none"} # specific to embedding model
Product.find_each do |product|
embedding = embed.(product.name, **embed_options)
product.update!(embedding: embedding)
end
Product.search_index.refresh
query = "breakfast"
keyword_search = Product.search(query, limit: 20)
# the query prefix is specific to the embedding model (https://huggingface.co/Snowflake/snowflake-arctic-embed-m-v1.5)
query_prefix = "Represent this sentence for searching relevant passages: "
query_embedding = embed.(query_prefix + query, **embed_options)
semantic_search = Product.search(knn: {field: :embedding, vector: query_embedding}, limit: 20)
Searchkick.multi_search([keyword_search, semantic_search])
# to combine the results, use Reciprocal Rank Fusion (RRF)
p Searchkick::Reranking.rrf(keyword_search, semantic_search).first(5).map { |v| v[:result].name }
# or a reranking model
rerank = Informers.pipeline("reranking", "mixedbread-ai/mxbai-rerank-xsmall-v1")
results = (keyword_search.to_a + semantic_search.to_a).uniq
p rerank.(query, results.map(&:name)).first(5).map { |v| results[v[:doc_id]] }.map(&:name)
================================================
FILE: examples/semantic.rb
================================================
require "bundler/setup"
require "active_record"
require "elasticsearch" # or "opensearch-ruby"
require "informers"
require "searchkick"
ActiveRecord::Base.establish_connection adapter: "sqlite3", database: ":memory:"
ActiveRecord::Schema.verbose = false
ActiveRecord::Schema.define do
create_table :products do |t|
t.string :name
t.json :embedding
end
end
class Product < ActiveRecord::Base
searchkick knn: {embedding: {dimensions: 768, distance: "cosine"}}
end
Product.reindex
Product.create!(name: "Cereal")
Product.create!(name: "Ice cream")
Product.create!(name: "Eggs")
embed = Informers.pipeline("embedding", "Snowflake/snowflake-arctic-embed-m-v1.5")
embed_options = {model_output: "sentence_embedding", pooling: "none"} # specific to embedding model
Product.find_each do |product|
embedding = embed.(product.name, **embed_options)
product.update!(embedding: embedding)
end
Product.search_index.refresh
query = "breakfast"
# the query prefix is specific to the embedding model (https://huggingface.co/Snowflake/snowflake-arctic-embed-m-v1.5)
query_prefix = "Represent this sentence for searching relevant passages: "
query_embedding = embed.(query_prefix + query, **embed_options)
pp Product.search(knn: {field: :embedding, vector: query_embedding}, limit: 20).map(&:name)
================================================
FILE: gemfiles/activerecord72.gemfile
================================================
source "https://rubygems.org"
gemspec path: ".."
gem "rake"
gem "minitest"
gem "sqlite3"
gem "activerecord", "~> 7.2.0"
gem "actionpack", "~> 7.2.0"
gem "activejob", "~> 7.2.0", require: "active_job"
gem "elasticsearch", "~> 8"
gem "redis-client"
gem "connection_pool"
gem "kaminari"
gem "gemoji-parser"
================================================
FILE: gemfiles/activerecord80.gemfile
================================================
source "https://rubygems.org"
gemspec path: ".."
gem "rake"
gem "minitest"
gem "sqlite3"
gem "activerecord", "~> 8.0.0"
gem "actionpack", "~> 8.0.0"
gem "activejob", "~> 8.0.0", require: "active_job"
gem "elasticsearch", "~> 9"
gem "redis-client"
gem "connection_pool"
gem "kaminari"
gem "gemoji-parser"
================================================
FILE: gemfiles/mongoid8.gemfile
================================================
source "https://rubygems.org"
gemspec path: ".."
gem "rake"
gem "minitest"
gem "mongoid", "~> 8"
gem "activejob", require: "active_job"
gem "redis"
gem "elasticsearch", "~> 8"
gem "actionpack"
gem "kaminari"
gem "gemoji-parser"
gem "ostruct" # for mongoid
================================================
FILE: gemfiles/mongoid9.gemfile
================================================
source "https://rubygems.org"
gemspec path: ".."
gem "rake"
gem "minitest"
gem "mongoid", "~> 9"
gem "activejob", require: "active_job"
gem "redis"
gem "elasticsearch", "~> 9"
gem "actionpack"
gem "kaminari"
gem "gemoji-parser"
gem "ostruct" # for mongoid
================================================
FILE: gemfiles/opensearch2.gemfile
================================================
source "https://rubygems.org"
gemspec path: ".."
gem "rake"
gem "minitest"
gem "sqlite3"
gem "activerecord", "~> 7.2.0"
gem "actionpack", "~> 7.2.0"
gem "activejob", "~> 7.2.0", require: "active_job"
gem "opensearch-ruby", "~> 2"
gem "redis-client"
gem "connection_pool"
gem "kaminari"
gem "gemoji-parser"
gem "parallel_tests"
gem "typhoeus"
================================================
FILE: gemfiles/opensearch3.gemfile
================================================
source "https://rubygems.org"
gemspec path: ".."
gem "rake"
gem "minitest"
gem "sqlite3"
gem "activerecord", "~> 8.0.0"
gem "actionpack", "~> 8.0.0"
gem "activejob", "~> 8.0.0", require: "active_job"
gem "opensearch-ruby", "~> 3"
gem "redis-client"
gem "connection_pool"
gem "kaminari"
gem "gemoji-parser"
gem "parallel_tests"
gem "typhoeus"
================================================
FILE: lib/searchkick/bulk_reindex_job.rb
================================================
module Searchkick
class BulkReindexJob < Searchkick.parent_job.constantize
queue_as { Searchkick.queue_name }
def perform(class_name:, record_ids: nil, index_name: nil, method_name: nil, batch_id: nil, min_id: nil, max_id: nil, ignore_missing: nil)
model = Searchkick.load_model(class_name)
index = model.searchkick_index(name: index_name)
record_ids ||= min_id..max_id
relation = Searchkick.scope(model)
relation = Searchkick.load_records(relation, record_ids)
relation = relation.search_import if relation.respond_to?(:search_import)
RecordIndexer.new(index).reindex(relation, mode: :inline, method_name: method_name, ignore_missing: ignore_missing, full: false)
RelationIndexer.new(index).batch_completed(batch_id) if batch_id
end
end
end
================================================
FILE: lib/searchkick/controller_runtime.rb
================================================
# based on https://gist.github.com/mnutt/566725
module Searchkick
module ControllerRuntime
extend ActiveSupport::Concern
protected
attr_internal :searchkick_runtime
def process_action(action, *args)
# We also need to reset the runtime before each action
# because of queries in middleware or in cases we are streaming
# and it won't be cleaned up by the method below.
Searchkick::LogSubscriber.reset_runtime
super
end
def cleanup_view_runtime
searchkick_rt_before_render = Searchkick::LogSubscriber.reset_runtime
runtime = super
searchkick_rt_after_render = Searchkick::LogSubscriber.reset_runtime
self.searchkick_runtime = searchkick_rt_before_render + searchkick_rt_after_render
runtime - searchkick_rt_after_render
end
def append_info_to_payload(payload)
super
payload[:searchkick_runtime] = (searchkick_runtime || 0) + Searchkick::LogSubscriber.reset_runtime
end
module ClassMethods
def log_process_action(payload)
messages = super
runtime = payload[:searchkick_runtime]
messages << ("Searchkick: %.1fms" % runtime.to_f) if runtime.to_f > 0
messages
end
end
end
end
================================================
FILE: lib/searchkick/hash_wrapper.rb
================================================
module Searchkick
class HashWrapper
def initialize(attributes)
@attributes = attributes
end
def [](name)
@attributes[name.to_s]
end
def to_h
@attributes
end
def as_json(...)
@attributes.as_json(...)
end
def to_json(...)
@attributes.to_json(...)
end
def method_missing(name, ...)
if @attributes.key?(name.to_s)
self[name]
else
super
end
end
def respond_to_missing?(name, ...)
@attributes.key?(name.to_s) || super
end
def inspect
attributes = @attributes.reject { |k, v| k[0] == "_" }.map { |k, v| "#{k}: #{v.inspect}" }
attributes.unshift(attributes.pop) # move id to start
"#<#{self.class.name} #{attributes.join(", ")}>"
end
end
end
================================================
FILE: lib/searchkick/index.rb
================================================
module Searchkick
class Index
attr_reader :name, :options
def initialize(name, options = {})
@name = name
@options = options
@klass_document_type = {} # cache
end
def index_options
IndexOptions.new(self).index_options
end
def create(body = {})
client.indices.create index: name, body: body
end
def delete
if alias_exists?
# can't call delete directly on aliases in ES 6
indices = client.indices.get_alias(name: name).keys
client.indices.delete index: indices
else
client.indices.delete index: name
end
end
def exists?
client.indices.exists index: name
end
def refresh
client.indices.refresh index: name
end
def alias_exists?
client.indices.exists_alias name: name
end
# call to_h for consistent results between elasticsearch gem 7 and 8
# could do for all API calls, but just do for ones where return value is focus for now
def mapping
client.indices.get_mapping(index: name).to_h
end
# call to_h for consistent results between elasticsearch gem 7 and 8
def settings
client.indices.get_settings(index: name).to_h
end
def refresh_interval
index_settings["refresh_interval"]
end
def update_settings(settings)
client.indices.put_settings index: name, body: settings
end
def tokens(text, options = {})
client.indices.analyze(body: {text: text}.merge(options), index: name)["tokens"].map { |t| t["token"] }
end
def total_docs
response =
client.search(
index: name,
body: {
query: {match_all: {}},
size: 0,
track_total_hits: true
}
)
Results.new(nil, response).total_count
end
def promote(new_name, update_refresh_interval: false)
if update_refresh_interval
new_index = Index.new(new_name, @options)
settings = options[:settings] || {}
refresh_interval = (settings[:index] && settings[:index][:refresh_interval]) || "1s"
new_index.update_settings(index: {refresh_interval: refresh_interval})
end
old_indices =
begin
client.indices.get_alias(name: name).keys
rescue => e
raise e unless Searchkick.not_found_error?(e)
{}
end
actions = old_indices.map { |old_name| {remove: {index: old_name, alias: name}} } + [{add: {index: new_name, alias: name}}]
client.indices.update_aliases body: {actions: actions}
end
alias_method :swap, :promote
def retrieve(record)
record_data = RecordData.new(self, record).record_data
# remove underscore
get_options = record_data.to_h { |k, v| [k.to_s.delete_prefix("_").to_sym, v] }
client.get(get_options)["_source"]
end
def all_indices(unaliased: false)
indices =
begin
if client.indices.respond_to?(:get_alias)
client.indices.get_alias(index: "#{name}*")
else
client.indices.get_aliases
end
rescue => e
raise e unless Searchkick.not_found_error?(e)
{}
end
indices = indices.select { |_k, v| v.empty? || v["aliases"].empty? } if unaliased
indices.select { |k, _v| k =~ /\A#{Regexp.escape(name)}_\d{14,17}\z/ }.keys
end
# remove old indices that start w/ index_name
def clean_indices
indices = all_indices(unaliased: true)
indices.each do |index|
Index.new(index).delete
end
indices
end
def store(record)
notify(record, "Store") do
queue_index([record])
end
end
def remove(record)
notify(record, "Remove") do
queue_delete([record])
end
end
def update_record(record, method_name)
notify(record, "Update") do
queue_update([record], method_name)
end
end
def bulk_delete(records)
return if records.empty?
notify_bulk(records, "Delete") do
queue_delete(records)
end
end
def bulk_index(records)
return if records.empty?
notify_bulk(records, "Import") do
queue_index(records)
end
end
alias_method :import, :bulk_index
def bulk_update(records, method_name, ignore_missing: nil)
return if records.empty?
notify_bulk(records, "Update") do
queue_update(records, method_name, ignore_missing: ignore_missing)
end
end
def search_id(record)
RecordData.new(self, record).search_id
end
def document_type(record)
RecordData.new(self, record).document_type
end
def similar_record(record, **options)
options[:per_page] ||= 10
options[:similar] = [RecordData.new(self, record).record_data]
options[:models] ||= [record.class] unless options.key?(:model)
Searchkick.search("*", **options)
end
def reload_synonyms
if Searchkick.opensearch?
client.transport.perform_request "POST", "_plugins/_refresh_search_analyzers/#{CGI.escape(name)}"
else
begin
client.transport.perform_request("GET", "#{CGI.escape(name)}/_reload_search_analyzers")
rescue => e
raise Error, "Requires non-OSS version of Elasticsearch" if Searchkick.not_allowed_error?(e)
raise e
end
end
end
# queue
def reindex_queue
ReindexQueue.new(name)
end
# reindex
# note: this is designed to be used internally
# so it does not check object matches index class
def reindex(object, method_name: nil, ignore_missing: nil, full: false, **options)
if @options[:job_options]
options[:job_options] = (@options[:job_options] || {}).merge(options[:job_options] || {})
end
if object.is_a?(Array)
# note: purposefully skip full
return reindex_records(object, method_name: method_name, ignore_missing: ignore_missing, **options)
end
if !object.respond_to?(:searchkick_klass)
raise Error, "Cannot reindex object"
end
scoped = Searchkick.relation?(object)
# call searchkick_klass for inheritance
relation = scoped ? object.all : Searchkick.scope(object.searchkick_klass).all
refresh = options.fetch(:refresh, !scoped)
options.delete(:refresh)
if method_name || (scoped && !full)
mode = options.delete(:mode) || :inline
scope = options.delete(:scope)
job_options = options.delete(:job_options)
raise ArgumentError, "unsupported keywords: #{options.keys.map(&:inspect).join(", ")}" if options.any?
# import only
import_scope(relation, method_name: method_name, mode: mode, scope: scope, ignore_missing: ignore_missing, job_options: job_options)
self.refresh if refresh
true
else
async = options.delete(:async)
if async
if async.is_a?(Hash) && async[:wait]
Searchkick.warn "async option is deprecated - use mode: :async, wait: true instead"
options[:wait] = true unless options.key?(:wait)
else
Searchkick.warn "async option is deprecated - use mode: :async instead"
end
options[:mode] ||= :async
end
full_reindex(relation, **options)
end
end
def create_index(index_options: nil)
index_options ||= self.index_options
index = Index.new("#{name}_#{Time.now.strftime('%Y%m%d%H%M%S%L')}", @options)
index.create(index_options)
index
end
def import_scope(relation, **options)
relation_indexer.reindex(relation, **options)
end
def batches_left
relation_indexer.batches_left
end
# private
def klass_document_type(klass, ignore_type = false)
@klass_document_type[[klass, ignore_type]] ||= begin
if !ignore_type && klass.searchkick_klass.searchkick_options[:_type]
type = klass.searchkick_klass.searchkick_options[:_type]
type = type.call if type.respond_to?(:call)
type
else
klass.model_name.to_s.underscore
end
end
end
# private
def conversions_fields
@conversions_fields ||= begin
conversions = Array(options[:conversions])
conversions.map(&:to_s) + conversions.map(&:to_sym)
end
end
# private
def conversions_v2_fields
@conversions_v2_fields ||= Array(options[:conversions_v2]).map(&:to_s)
end
# private
def suggest_fields
@suggest_fields ||= Array(options[:suggest]).map(&:to_s)
end
# private
def locations_fields
@locations_fields ||= begin
locations = Array(options[:locations])
locations.map(&:to_s) + locations.map(&:to_sym)
end
end
# private
def uuid
index_settings["uuid"]
end
protected
def client
Searchkick.client
end
def queue_index(records)
Searchkick.indexer.queue(records.map { |r| RecordData.new(self, r).index_data })
end
def queue_delete(records)
Searchkick.indexer.queue(records.reject { |r| r.id.blank? }.map { |r| RecordData.new(self, r).delete_data })
end
def queue_update(records, method_name, ignore_missing:)
items = records.map { |r| RecordData.new(self, r).update_data(method_name) }
items.each { |i| i.instance_variable_set(:@ignore_missing, true) } if ignore_missing
Searchkick.indexer.queue(items)
end
def relation_indexer
@relation_indexer ||= RelationIndexer.new(self)
end
def index_settings
settings.values.first["settings"]["index"]
end
def import_before_promotion(index, relation, **import_options)
index.import_scope(relation, **import_options)
end
def reindex_records(object, mode: nil, refresh: false, **options)
mode ||= Searchkick.callbacks_value || @options[:callbacks] || :inline
mode = :inline if mode == :bulk
result = RecordIndexer.new(self).reindex(object, mode: mode, full: false, **options)
self.refresh if refresh
result
end
# https://gist.github.com/jarosan/3124884
# https://www.elastic.co/blog/changing-mapping-with-zero-downtime/
def full_reindex(relation, import: true, resume: false, retain: false, mode: nil, refresh_interval: nil, scope: nil, wait: nil, job_options: nil)
raise ArgumentError, "wait only available in :async mode" if !wait.nil? && mode != :async
raise ArgumentError, "Full reindex does not support :queue mode - use :async mode instead" if mode == :queue
if resume
index_name = all_indices.sort.last
raise Error, "No index to resume" unless index_name
index = Index.new(index_name, @options)
else
clean_indices unless retain
index_options = relation.searchkick_index_options
index_options.deep_merge!(settings: {index: {refresh_interval: refresh_interval}}) if refresh_interval
index = create_index(index_options: index_options)
end
import_options = {
mode: (mode || :inline),
full: true,
resume: resume,
scope: scope,
job_options: job_options
}
uuid = index.uuid
# check if alias exists
alias_exists = alias_exists?
if alias_exists
import_before_promotion(index, relation, **import_options) if import
# get existing indices to remove
unless mode == :async
check_uuid(uuid, index.uuid)
promote(index.name, update_refresh_interval: !refresh_interval.nil?)
clean_indices unless retain
end
else
delete if exists?
promote(index.name, update_refresh_interval: !refresh_interval.nil?)
# import after promotion
index.import_scope(relation, **import_options) if import
end
if mode == :async
if wait
puts "Created index: #{index.name}"
puts "Jobs queued. Waiting..."
loop do
sleep 3
status = Searchkick.reindex_status(index.name)
break if status[:completed]
puts "Batches left: #{status[:batches_left]}"
end
# already promoted if alias didn't exist
if alias_exists
puts "Jobs complete. Promoting..."
check_uuid(uuid, index.uuid)
promote(index.name, update_refresh_interval: !refresh_interval.nil?)
end
clean_indices unless retain
puts "SUCCESS!"
end
{index_name: index.name}
else
index.refresh
true
end
rescue => e
if Searchkick.transport_error?(e) && (e.message.include?("No handler for type [text]") || e.message.include?("class java.util.ArrayList cannot be cast to class java.util.Map"))
raise UnsupportedVersionError
end
raise e
end
# safety check
# still a chance for race condition since its called before promotion
# ideal is for user to disable automatic index creation
# https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-index_.html#index-creation
def check_uuid(old_uuid, new_uuid)
if old_uuid != new_uuid
raise Error, "Safety check failed - only run one Model.reindex per model at a time"
end
end
def notify(record, name)
if Searchkick.callbacks_value == :bulk
yield
else
name = "#{record.class.searchkick_klass.name} #{name}" if record && record.class.searchkick_klass
event = {
name: name,
id: search_id(record)
}
ActiveSupport::Notifications.instrument("request.searchkick", event) do
yield
end
end
end
def notify_bulk(records, name)
if Searchkick.callbacks_value == :bulk
yield
else
event = {
name: "#{records.first.class.searchkick_klass.name} #{name}",
count: records.size
}
ActiveSupport::Notifications.instrument("request.searchkick", event) do
yield
end
end
end
end
end
================================================
FILE: lib/searchkick/index_cache.rb
================================================
module Searchkick
class IndexCache
def initialize(max_size: 20)
@data = {}
@mutex = Mutex.new
@max_size = max_size
end
# probably a better pattern for this
# but keep it simple
def fetch(name)
# thread-safe in MRI without mutex
# due to how context switching works
@mutex.synchronize do
if @data.key?(name)
@data[name]
else
@data.clear if @data.size >= @max_size
@data[name] = yield
end
end
end
def clear
@mutex.synchronize do
@data.clear
end
end
end
end
================================================
FILE: lib/searchkick/index_options.rb
================================================
module Searchkick
class IndexOptions
attr_reader :options
def initialize(index)
@options = index.options
end
def index_options
# mortal symbols are garbage collected in Ruby 2.2+
custom_settings = (options[:settings] || {}).deep_symbolize_keys
custom_mappings = (options[:mappings] || {}).deep_symbolize_keys
if options[:mappings] && !options[:merge_mappings]
settings = custom_settings
mappings = custom_mappings
else
settings = generate_settings.deep_symbolize_keys.deep_merge(custom_settings)
mappings = generate_mappings.deep_symbolize_keys.deep_merge(custom_mappings)
end
set_deep_paging(settings) if options[:deep_paging] || options[:max_result_window]
{
settings: settings,
mappings: mappings
}
end
def generate_settings
language = options[:language]
language = language.call if language.respond_to?(:call)
settings = {
analysis: {
analyzer: {
searchkick_keyword: {
type: "custom",
tokenizer: "keyword",
filter: ["lowercase"] + (options[:stem_conversions] ? ["searchkick_stemmer"] : [])
},
default_analyzer => {
type: "custom",
# character filters -> tokenizer -> token filters
# https://www.elastic.co/guide/en/elasticsearch/guide/current/analysis-intro.html
char_filter: ["ampersand"],
tokenizer: "standard",
# synonym should come last, after stemming and shingle
# shingle must come before searchkick_stemmer
filter: ["lowercase", "asciifolding", "searchkick_index_shingle", "searchkick_stemmer"]
},
searchkick_search: {
type: "custom",
char_filter: ["ampersand"],
tokenizer: "standard",
filter: ["lowercase", "asciifolding", "searchkick_search_shingle", "searchkick_stemmer"]
},
searchkick_search2: {
type: "custom",
char_filter: ["ampersand"],
tokenizer: "standard",
filter: ["lowercase", "asciifolding", "searchkick_stemmer"]
},
# https://github.com/leschenko/elasticsearch_autocomplete/blob/master/lib/elasticsearch_autocomplete/analyzers.rb
searchkick_autocomplete_search: {
type: "custom",
tokenizer: "keyword",
filter: ["lowercase", "asciifolding"]
},
searchkick_word_search: {
type: "custom",
tokenizer: "standard",
filter: ["lowercase", "asciifolding"]
},
searchkick_suggest_index: {
type: "custom",
tokenizer: "standard",
filter: ["lowercase", "asciifolding", "searchkick_suggest_shingle"]
},
searchkick_text_start_index: {
type: "custom",
tokenizer: "keyword",
filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
},
searchkick_text_middle_index: {
type: "custom",
tokenizer: "keyword",
filter: ["lowercase", "asciifolding", "searchkick_ngram"]
},
searchkick_text_end_index: {
type: "custom",
tokenizer: "keyword",
filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
},
searchkick_word_start_index: {
type: "custom",
tokenizer: "standard",
filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
},
searchkick_word_middle_index: {
type: "custom",
tokenizer: "standard",
filter: ["lowercase", "asciifolding", "searchkick_ngram"]
},
searchkick_word_end_index: {
type: "custom",
tokenizer: "standard",
filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
}
},
filter: {
searchkick_index_shingle: {
type: "shingle",
token_separator: ""
},
# lucky find https://web.archiveorange.com/archive/v/AAfXfQ17f57FcRINsof7
searchkick_search_shingle: {
type: "shingle",
token_separator: "",
output_unigrams: false,
output_unigrams_if_no_shingles: true
},
searchkick_suggest_shingle: {
type: "shingle",
max_shingle_size: 5
},
searchkick_edge_ngram: {
type: "edge_ngram",
min_gram: 1,
max_gram: 50
},
searchkick_ngram: {
type: "ngram",
min_gram: 1,
max_gram: 50
},
searchkick_stemmer: {
# use stemmer if language is lowercase, snowball otherwise
type: language == language.to_s.downcase ? "stemmer" : "snowball",
language: language || "English"
}
},
char_filter: {
# https://www.elastic.co/guide/en/elasticsearch/guide/current/custom-analyzers.html
# &_to_and
ampersand: {
type: "mapping",
mappings: ["&=> and "]
}
}
}
}
raise ArgumentError, "Can't pass both language and stemmer" if options[:stemmer] && language
update_language(settings, language)
update_stemming(settings)
if Searchkick.env == "test"
settings[:number_of_shards] = 1
settings[:number_of_replicas] = 0
end
if options[:similarity]
settings[:similarity] = {default: {type: options[:similarity]}}
end
settings[:index] = {
max_ngram_diff: 49,
max_shingle_diff: 4
}
if options[:knn]
unless Searchkick.knn_support?
if Searchkick.opensearch?
raise Error, "knn requires OpenSearch 2.4+"
else
raise Error, "knn requires Elasticsearch 8.6+"
end
end
if Searchkick.opensearch? && options[:knn].any? { |_, v| !v[:distance].nil? }
# only enable if doing approximate search
settings[:index][:knn] = true
end
end
add_synonyms(settings)
add_search_synonyms(settings)
if options[:special_characters] == false
settings[:analysis][:analyzer].each_value do |analyzer_settings|
analyzer_settings[:filter].reject! { |f| f == "asciifolding" }
end
end
if options[:case_sensitive]
settings[:analysis][:analyzer].each do |_, analyzer|
analyzer[:filter].delete("lowercase")
end
end
settings
end
def update_language(settings, language)
case language
when "chinese"
settings[:analysis][:analyzer].merge!(
default_analyzer => {
type: "ik_smart"
},
searchkick_search: {
type: "ik_smart"
},
searchkick_search2: {
type: "ik_max_word"
}
)
when "chinese2", "smartcn"
settings[:analysis][:analyzer].merge!(
default_analyzer => {
type: "smartcn"
},
searchkick_search: {
type: "smartcn"
},
searchkick_search2: {
type: "smartcn"
}
)
when "japanese", "japanese2"
analyzer = {
type: "custom",
tokenizer: "kuromoji_tokenizer",
filter: [
"kuromoji_baseform",
"kuromoji_part_of_speech",
"cjk_width",
"ja_stop",
"searchkick_stemmer",
"lowercase"
]
}
settings[:analysis][:analyzer].merge!(
default_analyzer => analyzer.deep_dup,
searchkick_search: analyzer.deep_dup,
searchkick_search2: analyzer.deep_dup
)
settings[:analysis][:filter][:searchkick_stemmer] = {
type: "kuromoji_stemmer"
}
when "korean"
settings[:analysis][:analyzer].merge!(
default_analyzer => {
type: "openkoreantext-analyzer"
},
searchkick_search: {
type: "openkoreantext-analyzer"
},
searchkick_search2: {
type: "openkoreantext-analyzer"
}
)
when "korean2"
settings[:analysis][:analyzer].merge!(
default_analyzer => {
type: "nori"
},
searchkick_search: {
type: "nori"
},
searchkick_search2: {
type: "nori"
}
)
when "vietnamese"
settings[:analysis][:analyzer].merge!(
default_analyzer => {
type: "vi_analyzer"
},
searchkick_search: {
type: "vi_analyzer"
},
searchkick_search2: {
type: "vi_analyzer"
}
)
when "polish", "ukrainian"
settings[:analysis][:analyzer].merge!(
default_analyzer => {
type: language
},
searchkick_search: {
type: language
},
searchkick_search2: {
type: language
}
)
end
end
def update_stemming(settings)
if options[:stemmer]
stemmer = options[:stemmer]
# could also support snowball and stemmer
case stemmer[:type]
when "hunspell"
# supports all token filter options
settings[:analysis][:filter][:searchkick_stemmer] = stemmer
else
raise ArgumentError, "Unknown stemmer: #{stemmer[:type]}"
end
end
stem = options[:stem]
# language analyzer used
stem = false if settings[:analysis][:analyzer][default_analyzer][:type] != "custom"
if stem == false
settings[:analysis][:filter].delete(:searchkick_stemmer)
settings[:analysis][:analyzer].each do |_, analyzer|
analyzer[:filter].delete("searchkick_stemmer") if analyzer[:filter]
end
end
if options[:stemmer_override]
stemmer_override = {
type: "stemmer_override"
}
if options[:stemmer_override].is_a?(String)
stemmer_override[:rules_path] = options[:stemmer_override]
else
stemmer_override[:rules] = options[:stemmer_override]
end
settings[:analysis][:filter][:searchkick_stemmer_override] = stemmer_override
settings[:analysis][:analyzer].each do |_, analyzer|
stemmer_index = analyzer[:filter].index("searchkick_stemmer") if analyzer[:filter]
analyzer[:filter].insert(stemmer_index, "searchkick_stemmer_override") if stemmer_index
end
end
if options[:stem_exclusion]
settings[:analysis][:filter][:searchkick_stem_exclusion] = {
type: "keyword_marker",
keywords: options[:stem_exclusion]
}
settings[:analysis][:analyzer].each do |_, analyzer|
stemmer_index = analyzer[:filter].index("searchkick_stemmer") if analyzer[:filter]
analyzer[:filter].insert(stemmer_index, "searchkick_stem_exclusion") if stemmer_index
end
end
end
def generate_mappings
mapping = {}
keyword_mapping = {type: "keyword"}
keyword_mapping[:ignore_above] = options[:ignore_above] || 30000
# conversions
Array(options[:conversions]).each do |conversions_field|
mapping[conversions_field] = {
type: "nested",
properties: {
query: {type: default_type, analyzer: "searchkick_keyword"},
count: {type: "integer"}
}
}
end
Array(options[:conversions_v2]).each do |conversions_field|
mapping[conversions_field] = {
type: "rank_features"
}
end
if (Array(options[:conversions_v2]).map(&:to_s) & Array(options[:conversions]).map(&:to_s)).any?
raise ArgumentError, "Must have separate conversions fields"
end
mapping_options =
[:suggest, :word, :text_start, :text_middle, :text_end, :word_start, :word_middle, :word_end, :highlight, :searchable, :filterable]
.to_h { |type| [type, (options[type] || []).map(&:to_s)] }
word = options[:word] != false && (!options[:match] || options[:match] == :word)
mapping_options[:searchable].delete("_all")
analyzed_field_options = {type: default_type, index: true, analyzer: default_analyzer.to_s}
mapping_options.values.flatten.uniq.each do |field|
fields = {}
if options.key?(:filterable) && !mapping_options[:filterable].include?(field)
fields[field] = {type: default_type, index: false}
else
fields[field] = keyword_mapping
end
if !options[:searchable] || mapping_options[:searchable].include?(field)
if word
fields[:analyzed] = analyzed_field_options
if mapping_options[:highlight].include?(field)
fields[:analyzed][:term_vector] = "with_positions_offsets"
end
end
mapping_options.except(:highlight, :searchable, :filterable, :word).each do |type, f|
if options[:match] == type || f.include?(field)
fields[type] = {type: default_type, index: true, analyzer: "searchkick_#{type}_index"}
end
end
end
mapping[field] = fields[field].merge(fields: fields.except(field))
end
(options[:locations] || []).map(&:to_s).each do |field|
mapping[field] = {
type: "geo_point"
}
end
options[:geo_shape] = options[:geo_shape].product([{}]).to_h if options[:geo_shape].is_a?(Array)
(options[:geo_shape] || {}).each do |field, shape_options|
mapping[field] = shape_options.merge(type: "geo_shape")
end
(options[:knn] || []).each do |field, knn_options|
distance = knn_options[:distance]
quantization = knn_options[:quantization]
if Searchkick.opensearch?
if distance.nil?
# avoid server crash if method not specified
raise ArgumentError, "Must specify a distance for OpenSearch"
end
vector_options = {
type: "knn_vector",
dimension: knn_options[:dimensions]
}
if !distance.nil?
space_type =
case distance
when "cosine"
"cosinesimil"
when "euclidean"
"l2"
when "inner_product"
"innerproduct"
else
raise ArgumentError, "Unknown distance: #{distance}"
end
if !quantization.nil?
raise ArgumentError, "Quantization not supported yet for OpenSearch"
end
vector_options[:method] = {
name: "hnsw",
space_type: space_type,
engine: "lucene",
parameters: knn_options.slice(:m, :ef_construction)
}
end
mapping[field.to_s] = vector_options
else
vector_options = {
type: "dense_vector",
dims: knn_options[:dimensions],
index: !distance.nil?
}
if !distance.nil?
vector_options[:similarity] =
case distance
when "cosine"
"cosine"
when "euclidean"
"l2_norm"
when "inner_product"
"max_inner_product"
else
raise ArgumentError, "Unknown distance: #{distance}"
end
type =
case quantization
when "int8", "int4", "bbq"
"#{quantization}_hnsw"
when nil
"hnsw"
else
raise ArgumentError, "Unknown quantization: #{quantization}"
end
vector_index_options = knn_options.slice(:m, :ef_construction)
vector_options[:index_options] = {type: type}.merge(vector_index_options)
end
mapping[field.to_s] = vector_options
end
end
if options[:inheritance]
mapping[:type] = keyword_mapping
end
routing = {}
if options[:routing]
routing = {required: true}
unless options[:routing] == true
routing[:path] = options[:routing].to_s
end
end
dynamic_fields = {
# analyzed field must be the default field for include_in_all
# https://www.elastic.co/guide/reference/mapping/multi-field-type/
# however, we can include the not_analyzed field in _all
# and the _all index analyzer will take care of it
"{name}" => keyword_mapping
}
if options.key?(:filterable)
dynamic_fields["{name}"] = {type: default_type, index: false}
end
unless options[:searchable]
if options[:match] && options[:match] != :word
dynamic_fields[options[:match]] = {type: default_type, index: true, analyzer: "searchkick_#{options[:match]}_index"}
end
if word
dynamic_fields[:analyzed] = analyzed_field_options
end
end
# https://www.elastic.co/guide/reference/mapping/multi-field-type/
multi_field = dynamic_fields["{name}"].merge(fields: dynamic_fields.except("{name}"))
mappings = {
properties: mapping,
_routing: routing,
# https://gist.github.com/kimchy/2898285
dynamic_templates: [
{
string_template: {
match: "*",
match_mapping_type: "string",
mapping: multi_field
}
}
]
}
mappings
end
def add_synonyms(settings)
synonyms = options[:synonyms] || []
synonyms = synonyms.call if synonyms.respond_to?(:call)
if synonyms.any?
settings[:analysis][:filter][:searchkick_synonym] = {
type: "synonym",
# only remove a single space from synonyms so three-word synonyms will fail noisily instead of silently
synonyms: synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.map { |s2| s2.sub(/\s+/, "") }.join(",") : s }.map(&:downcase)
}
# choosing a place for the synonym filter when stemming is not easy
# https://groups.google.com/forum/#!topic/elasticsearch/p7qcQlgHdB8
# TODO use a snowball stemmer on synonyms when creating the token filter
# https://discuss.elastic.co/t/synonym-multi-words-search/10964
# I find the following approach effective if you are doing multi-word synonyms (synonym phrases):
# - Only apply the synonym expansion at index time
# - Don't have the synonym filter applied search
# - Use directional synonyms where appropriate. You want to make sure that you're not injecting terms that are too general.
settings[:analysis][:analyzer][default_analyzer][:filter].insert(2, "searchkick_synonym")
%w(word_start word_middle word_end).each do |type|
settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_synonym")
end
end
end
def add_search_synonyms(settings)
search_synonyms = options[:search_synonyms] || []
search_synonyms = search_synonyms.call if search_synonyms.respond_to?(:call)
if search_synonyms.is_a?(String) || search_synonyms.any?
if search_synonyms.is_a?(String)
synonym_graph = {
type: "synonym_graph",
synonyms_path: search_synonyms,
updateable: true
}
else
synonym_graph = {
type: "synonym_graph",
# TODO confirm this is correct
synonyms: search_synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.join(",") : s }.map(&:downcase)
}
end
settings[:analysis][:filter][:searchkick_synonym_graph] = synonym_graph
if ["japanese", "japanese2"].include?(options[:language])
[:searchkick_search, :searchkick_search2].each do |analyzer|
settings[:analysis][:analyzer][analyzer][:filter].insert(4, "searchkick_synonym_graph")
end
else
[:searchkick_search2, :searchkick_word_search].each do |analyzer|
unless settings[:analysis][:analyzer][analyzer].key?(:filter)
raise Error, "Search synonyms are not supported yet for language"
end
settings[:analysis][:analyzer][analyzer][:filter].insert(2, "searchkick_synonym_graph")
end
end
end
end
def set_deep_paging(settings)
if !settings.dig(:index, :max_result_window) && !settings[:"index.max_result_window"]
settings[:index] ||= {}
settings[:index][:max_result_window] = options[:max_result_window] || 1_000_000_000
end
end
def index_type
@index_type ||= begin
index_type = options[:_type]
index_type = index_type.call if index_type.respond_to?(:call)
index_type
end
end
def default_type
"text"
end
def default_analyzer
:searchkick_index
end
end
end
================================================
FILE: lib/searchkick/indexer.rb
================================================
# thread-local (technically fiber-local) indexer
# used to aggregate bulk callbacks across models
module Searchkick
class Indexer
attr_reader :queued_items
def initialize
@queued_items = []
end
def queue(items)
@queued_items.concat(items)
perform unless Searchkick.callbacks_value == :bulk
end
def perform
items = @queued_items
@queued_items = []
return if items.empty?
response = Searchkick.client.bulk(body: items)
if response["errors"]
# note: delete does not set error when item not found
first_with_error = response["items"].map do |item|
(item["index"] || item["delete"] || item["update"])
end.find.with_index { |item, i| item["error"] && !ignore_missing?(items[i], item["error"]) }
if first_with_error
raise ImportError, "#{first_with_error["error"]} on item with id '#{first_with_error["_id"]}'"
end
end
# maybe return response in future
nil
end
private
def ignore_missing?(item, error)
error["type"] == "document_missing_exception" && item.instance_variable_defined?(:@ignore_missing)
end
end
end
================================================
FILE: lib/searchkick/log_subscriber.rb
================================================
# based on https://gist.github.com/mnutt/566725
module Searchkick
class LogSubscriber < ActiveSupport::LogSubscriber
def self.runtime=(value)
Thread.current[:searchkick_runtime] = value
end
def self.runtime
Thread.current[:searchkick_runtime] ||= 0
end
def self.reset_runtime
rt = runtime
self.runtime = 0
rt
end
def search(event)
self.class.runtime += event.duration
return unless logger.debug?
payload = event.payload
name = "#{payload[:name]} (#{event.duration.round(1)}ms)"
index = payload[:query][:index].is_a?(Array) ? payload[:query][:index].join(",") : payload[:query][:index]
type = payload[:query][:type]
request_params = payload[:query].except(:index, :type, :body, :opaque_id)
params = []
request_params.each do |k, v|
params << "#{CGI.escape(k.to_s)}=#{CGI.escape(v.to_s)}"
end
debug " #{color(name, YELLOW, bold: true)} #{index}#{type ? "/#{type.join(',')}" : ''}/_search#{params.any? ? '?' + params.join('&') : nil} #{payload[:query][:body].to_json}"
end
def request(event)
self.class.runtime += event.duration
return unless logger.debug?
payload = event.payload
name = "#{payload[:name]} (#{event.duration.round(1)}ms)"
debug " #{color(name, YELLOW, bold: true)} #{payload.except(:name).to_json}"
end
def multi_search(event)
self.class.runtime += event.duration
return unless logger.debug?
payload = event.payload
name = "#{payload[:name]} (#{event.duration.round(1)}ms)"
debug " #{color(name, YELLOW, bold: true)} _msearch #{payload[:body]}"
end
end
end
================================================
FILE: lib/searchkick/middleware.rb
================================================
require "faraday"
module Searchkick
class Middleware < Faraday::Middleware
def call(env)
path = env[:url].path.to_s
if path.end_with?("/_search")
env[:request][:timeout] = Searchkick.search_timeout
elsif path.end_with?("/_msearch")
# assume no concurrent searches for timeout for now
searches = env[:request_body].count("\n") / 2
# do not allow timeout to exceed Searchkick.timeout
timeout = [Searchkick.search_timeout * searches, Searchkick.timeout].min
env[:request][:timeout] = timeout
end
@app.call(env)
end
end
end
================================================
FILE: lib/searchkick/model.rb
================================================
module Searchkick
module Model
def searchkick(**options)
options = Searchkick.model_options.deep_merge(options)
if options[:conversions]
Searchkick.warn("The `conversions` option is deprecated in favor of `conversions_v2`, which provides much better search performance. Upgrade to `conversions_v2` or rename `conversions` to `conversions_v1`")
end
if options.key?(:conversions_v1)
options[:conversions] = options.delete(:conversions_v1)
end
unknown_keywords = options.keys - [:_all, :_type, :batch_size, :callbacks, :callback_options, :case_sensitive, :conversions, :conversions_v2, :deep_paging, :default_fields,
:filterable, :geo_shape, :highlight, :ignore_above, :index_name, :index_prefix, :inheritance, :job_options, :knn, :language,
:locations, :mappings, :match, :max_result_window, :merge_mappings, :routing, :searchable, :search_synonyms, :settings, :similarity,
:special_characters, :stem, :stemmer, :stem_conversions, :stem_exclusion, :stemmer_override, :suggest, :synonyms, :text_end,
:text_middle, :text_start, :unscope, :word, :word_end, :word_middle, :word_start]
raise ArgumentError, "unknown keywords: #{unknown_keywords.join(", ")}" if unknown_keywords.any?
raise "Only call searchkick once per model" if respond_to?(:searchkick_index)
Searchkick.models << self
options[:_type] ||= -> { searchkick_index.klass_document_type(self, true) }
options[:class_name] = model_name.name
callbacks = options.key?(:callbacks) ? options[:callbacks] : :inline
unless [:inline, true, false, :async, :queue].include?(callbacks)
raise ArgumentError, "Invalid value for callbacks"
end
callback_options = (options[:callback_options] || {}).dup
callback_options[:if] = [-> { Searchkick.callbacks?(default: callbacks) }, callback_options[:if]].compact.flatten(1)
base = self
mod = Module.new
include(mod)
mod.module_eval do
def reindex(method_name = nil, mode: nil, refresh: false, ignore_missing: nil, job_options: nil)
self.class.searchkick_index.reindex([self], method_name: method_name, mode: mode, refresh: refresh, ignore_missing: ignore_missing, job_options: job_options, single: true)
end unless base.method_defined?(:reindex)
def similar(**options)
self.class.searchkick_index.similar_record(self, **options)
end unless base.method_defined?(:similar)
def search_data
data = respond_to?(:to_hash) ? to_hash : serializable_hash
data.delete("id")
data.delete("_id")
data.delete("_type")
data
end unless base.method_defined?(:search_data)
def should_index?
true
end unless base.method_defined?(:should_index?)
end
class_eval do
cattr_reader :searchkick_options, :searchkick_klass, instance_reader: false
class_variable_set :@@searchkick_options, options.dup
class_variable_set :@@searchkick_klass, self
class_variable_set :@@searchkick_index_cache, Searchkick::IndexCache.new
class << self
def searchkick_search(term = "*", **options, &block)
if Searchkick.relation?(self)
raise Searchkick::Error, "search must be called on model, not relation"
end
Searchkick.search(term, model: self, **options, &block)
end
alias_method Searchkick.search_method_name, :searchkick_search if Searchkick.search_method_name
def searchkick_index(name: nil)
index_name = name || searchkick_klass.searchkick_index_name
index_name = index_name.call if index_name.respond_to?(:call)
index_cache = class_variable_get(:@@searchkick_index_cache)
index_cache.fetch(index_name) { Searchkick::Index.new(index_name, searchkick_options) }
end
alias_method :search_index, :searchkick_index unless method_defined?(:search_index)
def searchkick_reindex(method_name = nil, **options)
searchkick_index.reindex(self, method_name: method_name, **options)
end
alias_method :reindex, :searchkick_reindex unless method_defined?(:reindex)
def searchkick_index_options
searchkick_index.index_options
end
def searchkick_index_name
@searchkick_index_name ||= begin
options = class_variable_get(:@@searchkick_options)
if options[:index_name]
options[:index_name]
elsif options[:index_prefix].respond_to?(:call)
-> { [options[:index_prefix].call, model_name.plural, Searchkick.env, Searchkick.index_suffix].compact.join("_") }
else
[options.key?(:index_prefix) ? options[:index_prefix] : Searchkick.index_prefix, model_name.plural, Searchkick.env, Searchkick.index_suffix].compact.join("_")
end
end
end
end
# always add callbacks, even when callbacks is false
# so Model.callbacks block can be used
if respond_to?(:after_commit)
after_commit :reindex, **callback_options
elsif respond_to?(:after_save)
after_save :reindex, **callback_options
after_destroy :reindex, **callback_options
end
end
end
end
end
================================================
FILE: lib/searchkick/multi_search.rb
================================================
module Searchkick
class MultiSearch
attr_reader :queries
def initialize(queries, opaque_id: nil)
@queries = queries
@opaque_id = opaque_id
end
def perform
if queries.any?
perform_search(queries)
end
end
private
def perform_search(search_queries, perform_retry: true)
params = {
body: search_queries.flat_map { |q| [q.params.except(:body), q.body] }
}
params[:opaque_id] = @opaque_id if @opaque_id
responses = client.msearch(params)["responses"]
retry_queries = []
search_queries.each_with_index do |query, i|
if perform_retry && query.retry_misspellings?(responses[i])
query.send(:prepare) # okay, since we don't want to expose this method outside Searchkick
retry_queries << query
else
query.handle_response(responses[i])
end
end
if retry_queries.any?
perform_search(retry_queries, perform_retry: false)
end
search_queries
end
def client
Searchkick.client
end
end
end
================================================
FILE: lib/searchkick/process_batch_job.rb
================================================
module Searchkick
class ProcessBatchJob < Searchkick.parent_job.constantize
queue_as { Searchkick.queue_name }
def perform(class_name:, record_ids:, index_name: nil)
model = Searchkick.load_model(class_name)
index = model.searchkick_index(name: index_name)
items =
record_ids.map do |r|
parts = r.split(/(? e
handle_error(e)
end
handle_response(response)
end
end
def handle_response(response)
opts = {
page: @page,
per_page: @per_page,
padding: @padding,
load: @load,
includes: options[:includes],
model_includes: options[:model_includes],
json: !@json.nil?,
match_suffix: @match_suffix,
highlight: options[:highlight],
highlighted_fields: @highlighted_fields || [],
misspellings: @misspellings,
term: term,
scope_results: options[:scope_results],
total_entries: options[:total_entries],
index_mapping: @index_mapping,
suggest: options[:suggest],
scroll: options[:scroll],
opaque_id: options[:opaque_id]
}
if options[:debug]
server = Searchkick.opensearch? ? "OpenSearch" : "Elasticsearch"
puts "Searchkick #{Searchkick::VERSION}"
puts "#{server} #{Searchkick.server_version}"
puts
puts "Model Options"
pp searchkick_options
puts
puts "Search Options"
pp options
puts
if searchkick_index
puts "Record Data"
begin
pp klass.limit(3).map { |r| RecordData.new(searchkick_index, r).index_data }
rescue => e
puts "#{e.class.name}: #{e.message}"
end
puts
puts "Mapping"
puts JSON.pretty_generate(searchkick_index.mapping)
puts
puts "Settings"
puts JSON.pretty_generate(searchkick_index.settings)
puts
end
puts "Query"
puts JSON.pretty_generate(params[:body])
puts
puts "Results"
puts JSON.pretty_generate(response.to_h)
end
# set execute for multi search
@execute = Results.new(searchkick_klass, response, opts)
end
def retry_misspellings?(response)
@misspellings_below && response["error"].nil? && Results.new(searchkick_klass, response).total_count < @misspellings_below
end
private
def handle_error(e)
status_code = e.message[1..3].to_i
if status_code == 404
if e.message.include?("No search context found for id")
raise MissingIndexError, "No search context found for id"
else
raise MissingIndexError, "Index missing - run #{reindex_command}"
end
elsif status_code == 500 && (
e.message.include?("IllegalArgumentException[minimumSimilarity >= 1]") ||
e.message.include?("No query registered for [multi_match]") ||
e.message.include?("[match] query does not support [cutoff_frequency]") ||
e.message.include?("No query registered for [function_score]")
)
raise UnsupportedVersionError
elsif status_code == 400
if (
e.message.include?("bool query does not support [filter]") ||
e.message.include?("[bool] filter does not support [filter]")
)
raise UnsupportedVersionError
elsif e.message.match?(/analyzer \[searchkick_.+\] not found/)
raise InvalidQueryError, "Bad mapping - run #{reindex_command}"
else
raise InvalidQueryError, e.message
end
else
raise e
end
end
def reindex_command
searchkick_klass ? "#{searchkick_klass.name}.reindex" : "reindex"
end
def execute_search
name = searchkick_klass ? "#{searchkick_klass.name} Search" : "Search"
event = {
name: name,
query: params
}
ActiveSupport::Notifications.instrument("search.searchkick", event) do
Searchkick.client.search(params)
end
end
def prepare
boost_fields, fields = set_fields
operator = options[:operator] || "and"
# pagination
page = [options[:page].to_i, 1].max
# maybe use index.max_result_window in the future
default_limit = searchkick_options[:deep_paging] ? 1_000_000_000 : 10_000
per_page = (options[:limit] || options[:per_page] || default_limit).to_i
padding = [options[:padding].to_i, 0].max
offset = (options[:offset] || (page - 1) * per_page + padding).to_i
scroll = options[:scroll]
opaque_id = options[:opaque_id]
max_result_window = searchkick_options[:max_result_window]
original_per_page = per_page
if max_result_window
offset = max_result_window if offset > max_result_window
per_page = max_result_window - offset if offset + per_page > max_result_window
end
# model and eager loading
load = options[:load].nil? ? true : options[:load]
all = term == "*"
@json = options[:body]
if @json
ignored_options = options.keys & [:aggs, :boost,
:boost_by, :boost_by_distance, :boost_by_recency, :boost_where, :conversions, :conversions_term, :exclude, :explain,
:fields, :highlight, :indices_boost, :match, :misspellings, :operator, :order,
:profile, :select, :smart_aggs, :suggest, :where]
raise ArgumentError, "Options incompatible with body option: #{ignored_options.join(", ")}" if ignored_options.any?
payload = @json
else
must_not = []
should = []
if options[:similar]
like = options[:similar] == true ? term : options[:similar]
query = {
more_like_this: {
like: like,
min_doc_freq: 1,
min_term_freq: 1,
analyzer: "searchkick_search2"
}
}
if fields.all? { |f| f.start_with?("*.") }
raise ArgumentError, "Must specify fields to search"
end
if fields != ["_all"]
query[:more_like_this][:fields] = fields
end
elsif all && !options[:exclude]
query = {
match_all: {}
}
else
queries = []
misspellings =
if options.key?(:misspellings)
options[:misspellings]
else
true
end
if misspellings.is_a?(Hash) && misspellings[:below] && !@misspellings_below
@misspellings_below = misspellings[:below].to_i
misspellings = false
end
if misspellings != false
edit_distance = (misspellings.is_a?(Hash) && (misspellings[:edit_distance] || misspellings[:distance])) || 1
transpositions =
if misspellings.is_a?(Hash) && misspellings.key?(:transpositions)
{fuzzy_transpositions: misspellings[:transpositions]}
else
{fuzzy_transpositions: true}
end
prefix_length = (misspellings.is_a?(Hash) && misspellings[:prefix_length]) || 0
default_max_expansions = @misspellings_below ? 20 : 3
max_expansions = (misspellings.is_a?(Hash) && misspellings[:max_expansions]) || default_max_expansions
misspellings_fields = misspellings.is_a?(Hash) && misspellings.key?(:fields) && misspellings[:fields].map(&:to_s)
if misspellings_fields
missing_fields = misspellings_fields - fields.map { |f| base_field(f) }
if missing_fields.any?
raise ArgumentError, "All fields in per-field misspellings must also be specified in fields option"
end
end
@misspellings = true
else
@misspellings = false
end
fields.each do |field|
queries_to_add = []
qs = []
factor = boost_fields[field] || 1
shared_options = {
query: term,
boost: 10 * factor
}
match_type =
if field.end_with?(".phrase")
field =
if field == "_all.phrase"
"_all"
else
field.sub(/\.phrase\z/, ".analyzed")
end
:match_phrase
else
:match
end
shared_options[:operator] = operator if match_type == :match
exclude_analyzer = nil
exclude_field = field
field_misspellings = misspellings && (!misspellings_fields || misspellings_fields.include?(base_field(field)))
if field == "_all" || field.end_with?(".analyzed")
qs << shared_options.merge(analyzer: "searchkick_search")
# searchkick_search and searchkick_search2 are the same for some languages
unless %w(japanese japanese2 korean polish ukrainian vietnamese).include?(searchkick_options[:language])
qs << shared_options.merge(analyzer: "searchkick_search2")
end
exclude_analyzer = "searchkick_search2"
elsif field.end_with?(".exact")
f = field.split(".")[0..-2].join(".")
queries_to_add << {match: {f => shared_options.merge(analyzer: "keyword")}}
exclude_field = f
exclude_analyzer = "keyword"
else
analyzer = field.match?(/\.word_(start|middle|end)\z/) ? "searchkick_word_search" : "searchkick_autocomplete_search"
qs << shared_options.merge(analyzer: analyzer)
exclude_analyzer = analyzer
end
if field_misspellings != false && match_type == :match
qs.concat(qs.map { |q| q.except(:cutoff_frequency).merge(fuzziness: edit_distance, prefix_length: prefix_length, max_expansions: max_expansions, boost: factor).merge(transpositions) })
end
if field.start_with?("*.")
q2 = qs.map { |q| {multi_match: q.merge(fields: [field], type: match_type == :match_phrase ? "phrase" : "best_fields")} }
else
q2 = qs.map { |q| {match_type => {field => q}} }
end
# boost exact matches more
if field =~ /\.word_(start|middle|end)\z/ && searchkick_options[:word] != false
queries_to_add << {
bool: {
must: {
bool: {
should: q2
}
},
should: {match_type => {field.sub(/\.word_(start|middle|end)\z/, ".analyzed") => qs.first}}
}
}
else
queries_to_add.concat(q2)
end
queries << queries_to_add
if options[:exclude]
must_not.concat(set_exclude(exclude_field, exclude_analyzer))
end
end
# all + exclude option
if all
query = {
match_all: {}
}
should = []
else
# higher score for matching more fields
payload = {
bool: {
should: queries.map { |qs| {dis_max: {queries: qs}} }
}
}
should.concat(set_conversions)
should.concat(set_conversions_v2)
end
query = payload
end
payload = {}
# type when inheritance
where = ensure_permitted(options[:where] || {}).dup
if searchkick_options[:inheritance] && (options[:type] || (klass != searchkick_klass && searchkick_index))
where[:type] = [options[:type] || klass].flatten.map { |v| searchkick_index.klass_document_type(v, true) }
end
models = Array(options[:models])
if models.any? { |m| m != m.searchkick_klass }
index_type_or =
models.map do |m|
v = {_index: m.searchkick_index.name}
v[:type] = m.searchkick_index.klass_document_type(m, true) if m != m.searchkick_klass
v
end
where[:or] = Array(where[:or]) + [index_type_or]
end
# start everything as efficient filters
# move to post_filters as aggs demand
filters = where_filters(where)
post_filters = []
# aggregations
set_aggregations(payload, filters, post_filters) if options[:aggs]
# post filters
set_post_filters(payload, post_filters) if post_filters.any?
custom_filters = []
multiply_filters = []
set_boost_by(multiply_filters, custom_filters)
set_boost_where(custom_filters)
set_boost_by_distance(custom_filters) if options[:boost_by_distance]
set_boost_by_recency(custom_filters) if options[:boost_by_recency]
payload[:query] = build_query(query, filters, should, must_not, custom_filters, multiply_filters)
payload[:explain] = options[:explain] if options[:explain]
payload[:profile] = options[:profile] if options[:profile]
# order
set_order(payload) if options[:order]
# indices_boost
set_boost_by_indices(payload)
# suggestions
set_suggestions(payload, options[:suggest]) if options[:suggest]
# highlight
set_highlights(payload, fields) if options[:highlight]
# timeout shortly after client times out
payload[:timeout] ||= "#{((Searchkick.search_timeout + 1) * 1000).round}ms"
# An empty array will cause only the _id and _type for each hit to be returned
# https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-source-filtering.html
if options[:select]
if options[:select] == []
# intuitively [] makes sense to return no fields, but ES by default returns all fields
payload[:_source] = false
else
payload[:_source] = options[:select]
end
elsif load
payload[:_source] = false
end
end
# knn
set_knn(payload, options[:knn], per_page, offset) if options[:knn]
# pagination
pagination_options = options[:page] || options[:limit] || options[:per_page] || options[:offset] || options[:padding]
if !options[:body] || pagination_options
payload[:size] = per_page
payload[:from] = offset if offset > 0
end
# type
if !searchkick_options[:inheritance] && (options[:type] || (klass != searchkick_klass && searchkick_index))
@type = [options[:type] || klass].flatten.map { |v| searchkick_index.klass_document_type(v) }
end
# routing
@routing = options[:routing] if options[:routing]
if track_total_hits?
payload[:track_total_hits] = true
end
# merge more body options
payload = payload.deep_merge(options[:body_options]) if options[:body_options]
# run block
options[:block].call(payload) if options[:block]
# scroll optimization when iterating over all docs
# https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-scroll.html
if options[:scroll] && payload[:query] == {match_all: {}}
payload[:sort] ||= ["_doc"]
end
@body = payload
@page = page
@per_page = original_per_page
@padding = padding
@load = load
@scroll = scroll
@opaque_id = opaque_id
end
def set_fields
boost_fields = {}
fields = options[:fields] || searchkick_options[:default_fields] || searchkick_options[:searchable]
all = searchkick_options.key?(:_all) ? searchkick_options[:_all] : false
default_match = options[:match] || searchkick_options[:match] || :word
fields =
if fields
fields.map do |value|
k, v = value.is_a?(Hash) ? value.to_a.first : [value, default_match]
k2, boost = k.to_s.split("^", 2)
field = "#{k2}.#{v == :word ? 'analyzed' : v}"
boost_fields[field] = boost.to_f if boost
field
end
elsif all && default_match == :word
["_all"]
elsif all && default_match == :phrase
["_all.phrase"]
elsif term != "*" && default_match == :exact
raise ArgumentError, "Must specify fields to search"
else
[default_match == :word ? "*.analyzed" : "*.#{default_match}"]
end
[boost_fields, fields]
end
def build_query(query, filters, should, must_not, custom_filters, multiply_filters)
if filters.any? || must_not.any? || should.any?
bool = {}
bool[:must] = query if query
bool[:filter] = filters if filters.any? # where
bool[:must_not] = must_not if must_not.any? # exclude
bool[:should] = should if should.any? # conversions
query = {bool: bool}
end
if custom_filters.any?
query = {
function_score: {
functions: custom_filters,
query: query,
score_mode: "sum"
}
}
end
if multiply_filters.any?
query = {
function_score: {
functions: multiply_filters,
query: query,
score_mode: "multiply"
}
}
end
query
end
def set_conversions
conversions_fields = Array(options[:conversions] || searchkick_options[:conversions]).map(&:to_s)
if conversions_fields.present? && options[:conversions] != false
conversions_fields.map do |conversions_field|
{
nested: {
path: conversions_field,
score_mode: "sum",
query: {
function_score: {
boost_mode: "replace",
query: {
match: {
"#{conversions_field}.query" => options[:conversions_term] || term
}
},
field_value_factor: {
field: "#{conversions_field}.count"
}
}
}
}
}
end
else
[]
end
end
def set_conversions_v2
conversions_v2 = options[:conversions_v2]
return [] if conversions_v2.nil? && !searchkick_options[:conversions_v2]
return [] if conversions_v2 == false
# disable if searchkick_options[:conversions] to make it easy to upgrade without downtime
return [] if conversions_v2.nil? && searchkick_options[:conversions]
unless conversions_v2.is_a?(Hash)
conversions_v2 = {field: conversions_v2}
end
conversions_fields =
case conversions_v2[:field]
when true, nil
Array(searchkick_options[:conversions_v2]).map(&:to_s)
else
[conversions_v2[:field].to_s]
end
conversions_term = (conversions_v2[:term] || options[:conversions_term] || term).to_s
unless searchkick_options[:case_sensitive]
conversions_term = conversions_term.downcase
end
conversions_term = conversions_term.gsub(".", "*")
conversions_fields.map do |conversions_field|
{
rank_feature: {
field: "#{conversions_field}.#{conversions_term}",
linear: {},
boost: conversions_v2[:factor] || 1
}
}
end
end
def set_exclude(field, analyzer)
Array(options[:exclude]).map do |phrase|
{
multi_match: {
fields: [field],
query: phrase,
analyzer: analyzer,
type: "phrase"
}
}
end
end
def set_boost_by_distance(custom_filters)
boost_by_distance = options[:boost_by_distance] || {}
# legacy format
if boost_by_distance[:field]
boost_by_distance = {boost_by_distance[:field] => boost_by_distance.except(:field)}
end
boost_by_distance.each do |field, attributes|
attributes = {function: :gauss, scale: "5mi"}.merge(attributes)
unless attributes[:origin]
raise ArgumentError, "boost_by_distance requires :origin"
end
function_params = attributes.except(:factor, :function)
function_params[:origin] = location_value(function_params[:origin])
custom_filters << {
weight: attributes[:factor] || 1,
attributes[:function] => {
field => function_params
}
}
end
end
def set_boost_by_recency(custom_filters)
options[:boost_by_recency].each do |field, attributes|
attributes = {function: :gauss, origin: Time.now}.merge(attributes)
custom_filters << {
weight: attributes[:factor] || 1,
attributes[:function] => {
field => attributes.except(:factor, :function)
}
}
end
end
def set_boost_by(multiply_filters, custom_filters)
boost_by = options[:boost_by] || {}
if boost_by.is_a?(Array)
boost_by = boost_by.to_h { |f| [f, {factor: 1}] }
elsif boost_by.is_a?(Hash)
multiply_by, boost_by = boost_by.transform_values(&:dup).partition { |_, v| v.delete(:boost_mode) == "multiply" }.map(&:to_h)
end
boost_by[options[:boost]] = {factor: 1} if options[:boost]
custom_filters.concat boost_filters(boost_by, modifier: "ln2p")
multiply_filters.concat boost_filters(multiply_by || {})
end
def set_boost_where(custom_filters)
boost_where = options[:boost_where] || {}
boost_where.each do |field, value|
if value.is_a?(Array) && value.first.is_a?(Hash)
value.each do |value_factor|
custom_filters << custom_filter(field, value_factor[:value], value_factor[:factor])
end
elsif value.is_a?(Hash)
custom_filters << custom_filter(field, value[:value], value[:factor])
else
factor = 1000
custom_filters << custom_filter(field, value, factor)
end
end
end
def set_boost_by_indices(payload)
return unless options[:indices_boost]
indices_boost = options[:indices_boost].map do |key, boost|
index = key.respond_to?(:searchkick_index) ? key.searchkick_index.name : key
{index => boost}
end
payload[:indices_boost] = indices_boost
end
def set_suggestions(payload, suggest)
suggest_fields = nil
if suggest.is_a?(Array)
suggest_fields = suggest
else
suggest_fields = (searchkick_options[:suggest] || []).map(&:to_s)
# intersection
if options[:fields]
suggest_fields &= options[:fields].map { |v| (v.is_a?(Hash) ? v.keys.first : v).to_s.split("^", 2).first }
end
end
if suggest_fields.any?
payload[:suggest] = {text: term}
suggest_fields.each do |field|
payload[:suggest][field] = {
phrase: {
field: "#{field}.suggest"
}
}
end
else
raise ArgumentError, "Must pass fields to suggest option"
end
end
def set_highlights(payload, fields)
payload[:highlight] = {
fields: fields.to_h { |f| [f, {}] },
fragment_size: 0
}
if options[:highlight].is_a?(Hash)
if (tag = options[:highlight][:tag])
payload[:highlight][:pre_tags] = [tag]
payload[:highlight][:post_tags] = [tag.to_s.gsub(/\A<(\w+).+/, "\\1>")]
end
if (fragment_size = options[:highlight][:fragment_size])
payload[:highlight][:fragment_size] = fragment_size
end
if (encoder = options[:highlight][:encoder])
payload[:highlight][:encoder] = encoder
end
highlight_fields = options[:highlight][:fields]
if highlight_fields
payload[:highlight][:fields] = {}
highlight_fields.each do |name, opts|
payload[:highlight][:fields]["#{name}.#{@match_suffix}"] = opts || {}
end
end
end
@highlighted_fields = payload[:highlight][:fields].keys
end
def set_aggregations(payload, filters, post_filters)
aggs = options[:aggs]
payload[:aggs] = {}
aggs = aggs.to_h { |f| [f, {}] } if aggs.is_a?(Array) # convert to more advanced syntax
aggs.each do |field, agg_options|
size = agg_options[:limit] ? agg_options[:limit] : 1_000
shared_agg_options = agg_options.except(:limit, :field, :ranges, :date_ranges, :where)
if agg_options[:ranges]
payload[:aggs][field] = {
range: {
field: agg_options[:field] || field,
ranges: agg_options[:ranges]
}.merge(shared_agg_options)
}
elsif agg_options[:date_ranges]
payload[:aggs][field] = {
date_range: {
field: agg_options[:field] || field,
ranges: agg_options[:date_ranges]
}.merge(shared_agg_options)
}
elsif (histogram = agg_options[:date_histogram])
payload[:aggs][field] = {
date_histogram: histogram
}.merge(shared_agg_options)
elsif (metric = @@metric_aggs.find { |k| agg_options.has_key?(k) })
payload[:aggs][field] = {
metric => {
field: agg_options[metric][:field] || field
}
}.merge(shared_agg_options)
else
payload[:aggs][field] = {
terms: {
field: agg_options[:field] || field,
size: size
}.merge(shared_agg_options)
}
end
agg_where = ensure_permitted(agg_options[:where] || {})
if options[:smart_aggs] != false && options[:where]
where = ensure_permitted(options[:where])
where_without_field = where.reject { |k| k == field }
# where_without_field = where_without_field(where, field.to_s)
if where_without_field.any?
if agg_where.any?
agg_where = where.merge(agg_where)
# agg_where = combine_agg_where(agg_where, where_without_field)
else
agg_where = where_without_field
end
end
end
agg_filters = where_filters(agg_where)
# only do one level comparison for simplicity
filters.select! do |filter|
if agg_filters.include?(filter)
true
else
post_filters << filter
false
end
end
if agg_filters.any?
payload[:aggs][field] = {
filter: {
bool: {
must: agg_filters
}
},
aggs: {
field => payload[:aggs][field]
}
}
end
end
end
def where_without_field(where, field)
result = {}
where.each do |f, v|
case f
when :_and
r = v.map { |v2| where_without_field(v2, field) }.reject(&:empty?)
result[f] = r unless r.empty?
when :_or
r = v.map { |v2| where_without_field(v2, field) }
result[f] = r unless r.any?(&:empty?)
when :or
r = v.map { |v2| v2.map { |v3| where_without_field(v3, field) }.reject { |v2| v2.any?(&:empty?) } }
result[f] = r unless r.empty?
when :_not
r = where_without_field(v, field)
result[f] = r unless r.empty?
when :_script
result[f] = v
else
if f.to_s != field
result[f] = v
end
end
end
result
end
def combine_agg_where(agg_where, where)
result = agg_where.dup
field_keys = result.except(:_and, :_or, :or, :_not, :_script).transform_keys(&:to_s)
where.each do |f, v|
case f
when :_and, :_or, :or, :_not, :_script
if result.key?(f)
# combine with _and if needed
result[:_and] ||= []
result[:_and] += [{f => v}]
else
result[f] = v
end
else
result[f] = v unless field_keys.include?(f.to_s)
end
end
result
end
def set_knn(payload, knn, per_page, offset)
if term != "*"
raise ArgumentError, "Use Searchkick.multi_search for hybrid search"
end
field = knn[:field]
field_options = searchkick_options.dig(:knn, field.to_sym) || searchkick_options.dig(:knn, field.to_s) || {}
vector = knn[:vector]
distance = knn[:distance] || field_options[:distance]
exact = knn[:exact]
exact = field_options[:distance].nil? || distance != field_options[:distance] if exact.nil?
k = per_page + offset
ef_search = knn[:ef_search]
filter = payload.delete(:query)
if distance.nil?
raise ArgumentError, "distance required"
elsif !exact && distance != field_options[:distance]
raise ArgumentError, "distance must match searchkick options for approximate search"
end
if Searchkick.opensearch?
if exact
# https://opensearch.org/docs/latest/search-plugins/knn/knn-score-script/#spaces
space_type =
case distance
when "cosine"
"cosinesimil"
when "euclidean"
"l2"
when "taxicab"
"l1"
when "inner_product"
"innerproduct"
when "chebyshev"
"linf"
else
raise ArgumentError, "Unknown distance: #{distance}"
end
payload[:query] = {
script_score: {
query: {
bool: {
must: [filter, {exists: {field: field}}]
}
},
script: {
source: "knn_score",
lang: "knn",
params: {
field: field,
query_value: vector,
space_type: space_type
}
},
boost: distance == "cosine" && Searchkick.server_below?("2.19.0") ? 0.5 : 1.0
}
}
else
if ef_search && Searchkick.server_below?("2.16.0")
raise Error, "ef_search requires OpenSearch 2.16+"
end
payload[:query] = {
knn: {
field.to_sym => {
vector: vector,
k: k,
filter: filter
}.merge(ef_search ? {method_parameters: {ef_search: ef_search}} : {})
}
}
end
else
if exact
# prevent incorrect distances/results with Elasticsearch 9.0.0-rc1
if !Searchkick.server_below?("9.0.0") && field_options[:distance] == "cosine" && distance != "cosine"
raise ArgumentError, "distance must match searchkick options"
end
# https://github.com/elastic/elasticsearch/blob/main/docs/reference/vectors/vector-functions.asciidoc
source =
case distance
when "cosine"
"(cosineSimilarity(params.query_vector, params.field) + 1.0) * 0.5"
when "euclidean"
"double l2 = l2norm(params.query_vector, params.field); 1 / (1 + l2 * l2)"
when "taxicab"
"1 / (1 + l1norm(params.query_vector, params.field))"
when "inner_product"
"double dot = dotProduct(params.query_vector, params.field); dot > 0 ? dot + 1 : 1 / (1 - dot)"
else
raise ArgumentError, "Unknown distance: #{distance}"
end
payload[:query] = {
script_score: {
query: {
bool: {
must: [filter, {exists: {field: field}}]
}
},
script: {
source: source,
params: {
field: field,
query_vector: vector
}
}
}
}
else
payload[:knn] = {
field: field,
query_vector: vector,
k: k,
filter: filter
}.merge(ef_search ? {num_candidates: ef_search} : {})
end
end
end
def set_post_filters(payload, post_filters)
payload[:post_filter] = {
bool: {
filter: post_filters
}
}
end
def set_order(payload)
value = options[:order]
payload[:sort] = value.is_a?(Enumerable) ? value : {value => :asc}
end
# provides *very* basic protection from unfiltered parameters
# this is not meant to be comprehensive and may be expanded in the future
def ensure_permitted(obj)
obj.to_h
end
def where_filters(where)
filters = []
(where || {}).each do |field, value|
field = :_id if field.to_s == "id"
# update smart aggs when adding new symbol
if field == :or
value.each do |or_clause|
filters << {bool: {should: or_clause.map { |or_statement| {bool: {filter: where_filters(or_statement)}} }}}
end
elsif field == :_or
filters << {bool: {should: value.map { |or_statement| {bool: {filter: where_filters(or_statement)}} }}}
elsif field == :_not
filters << {bool: {must_not: where_filters(value)}}
elsif field == :_and
filters << {bool: {must: value.map { |or_statement| {bool: {filter: where_filters(or_statement)}} }}}
elsif field == :_script
unless value.is_a?(Script)
raise TypeError, "expected Searchkick::Script"
end
filters << {script: {script: {source: value.source, lang: value.lang, params: value.params}}}
else
# expand ranges
if value.is_a?(Range)
value = expand_range(value)
end
value = {in: value} if value.is_a?(Array)
if value.is_a?(Hash)
value.each do |op, op_value|
case op
when :within, :bottom_right, :bottom_left
# do nothing
when :near
filters << {
geo_distance: {
field => location_value(op_value),
distance: value[:within] || "50mi"
}
}
when :geo_polygon
filters << {
geo_polygon: {
field => op_value
}
}
when :geo_shape
shape = op_value.except(:relation)
shape[:coordinates] = coordinate_array(shape[:coordinates]) if shape[:coordinates]
filters << {
geo_shape: {
field => {
relation: op_value[:relation] || "intersects",
shape: shape
}
}
}
when :top_left
filters << {
geo_bounding_box: {
field => {
top_left: location_value(op_value),
bottom_right: location_value(value[:bottom_right])
}
}
}
when :top_right
filters << {
geo_bounding_box: {
field => {
top_right: location_value(op_value),
bottom_left: location_value(value[:bottom_left])
}
}
}
when :like, :ilike
# based on Postgres
# https://www.postgresql.org/docs/current/functions-matching.html
# % matches zero or more characters
# _ matches one character
# \ is escape character
# escape Lucene reserved characters
# https://www.elastic.co/guide/en/elasticsearch/reference/current/regexp-syntax.html#regexp-optional-operators
reserved = %w(\\ . ? + * | { } [ ] ( ) ")
regex = op_value.dup
reserved.each do |v|
regex.gsub!(v, "\\\\" + v)
end
regex = regex.gsub(/(? {value: regex, flags: "NONE", case_insensitive: true}}}
else
filters << {regexp: {field => {value: regex, flags: "NONE"}}}
end
when :prefix
filters << {prefix: {field => {value: op_value}}}
when :regexp # support for regexp queries without using a regexp ruby object
filters << {regexp: {field => {value: op_value}}}
when :not, :_not # not equal
filters << {bool: {must_not: term_filters(field, op_value)}}
when :all
op_value.each do |val|
filters << term_filters(field, val)
end
when :in
filters << term_filters(field, op_value)
when :exists
case op_value
when true
filters << {exists: {field: field}}
when false
filters << {bool: {must_not: {exists: {field: field}}}}
else
raise ArgumentError, "Passing a value other than true or false to exists is not supported"
end
else
range_query =
case op
when :gt
{gt: op_value}
when :gte
{gte: op_value}
when :lt
{lt: op_value}
when :lte
{lte: op_value}
else
raise ArgumentError, "Unknown where operator: #{op.inspect}"
end
# issue 132
if (existing = filters.find { |f| f[:range] && f[:range][field] })
existing[:range][field].merge!(range_query)
else
filters << {range: {field => range_query}}
end
end
end
else
filters << term_filters(field, value)
end
end
end
filters
end
def term_filters(field, value)
if value.is_a?(Array) # in query
if value.any?(&:nil?)
{bool: {should: [term_filters(field, nil), term_filters(field, value.compact)]}}
else
{terms: {field => value}}
end
elsif value.nil?
{bool: {must_not: {exists: {field: field}}}}
elsif value.is_a?(Regexp)
source = value.source
# TODO handle other regexp options
# TODO handle other anchor characters, like ^, $, \Z
if source.start_with?("\\A")
source = source[2..-1]
else
source = ".*#{source}"
end
if source.end_with?("\\z")
source = source[0..-3]
else
source = "#{source}.*"
end
{regexp: {field => {value: source, flags: "NONE", case_insensitive: value.casefold?}}}
else
# TODO add this for other values
if value.as_json.is_a?(Enumerable)
# query will fail, but this is better
# same message as Active Record
raise TypeError, "can't cast #{value.class.name}"
end
{term: {field => {value: value}}}
end
end
def custom_filter(field, value, factor)
{
filter: where_filters(field => value),
weight: factor
}
end
def boost_filter(field, factor: 1, modifier: nil, missing: nil)
script_score = {
field_value_factor: {
field: field,
factor: factor.to_f,
modifier: modifier
}
}
if missing
script_score[:field_value_factor][:missing] = missing.to_f
else
script_score[:filter] = {
exists: {
field: field
}
}
end
script_score
end
def boost_filters(boost_by, modifier: nil)
boost_by.map do |field, value|
boost_filter(field, modifier: modifier, **value)
end
end
# Recursively descend through nesting of arrays until we reach either a lat/lon object or an array of numbers,
# eventually returning the same structure with all values transformed to [lon, lat].
#
def coordinate_array(value)
if value.is_a?(Hash)
[value[:lon], value[:lat]]
elsif value.is_a?(Array) and !value[0].is_a?(Numeric)
value.map { |a| coordinate_array(a) }
else
value
end
end
def location_value(value)
if value.is_a?(Array)
value.map(&:to_f).reverse
else
value
end
end
def expand_range(range)
expanded = {}
expanded[:gte] = range.begin if range.begin
if range.end && !(range.end.respond_to?(:infinite?) && range.end.infinite?)
expanded[range.exclude_end? ? :lt : :lte] = range.end
end
expanded
end
def base_field(k)
k.sub(/\.(analyzed|word_start|word_middle|word_end|text_start|text_middle|text_end|exact)\z/, "")
end
def track_total_hits?
searchkick_options[:deep_paging] || body_options[:track_total_hits]
end
def body_options
options[:body_options] || {}
end
end
end
================================================
FILE: lib/searchkick/railtie.rb
================================================
module Searchkick
class Railtie < Rails::Railtie
rake_tasks do
load "tasks/searchkick.rake"
end
end
end
================================================
FILE: lib/searchkick/record_data.rb
================================================
module Searchkick
class RecordData
TYPE_KEYS = ["type", :type]
attr_reader :index, :record
def initialize(index, record)
@index = index
@record = record
end
def index_data
data = record_data
data[:data] = search_data
{index: data}
end
def update_data(method_name)
data = record_data
data[:data] = {doc: search_data(method_name)}
{update: data}
end
def delete_data
{delete: record_data}
end
# custom id can be useful for load: false
def search_id
id = record.respond_to?(:search_document_id) ? record.search_document_id : record.id
id.is_a?(Numeric) ? id : id.to_s
end
def document_type(ignore_type = false)
index.klass_document_type(record.class, ignore_type)
end
def record_data
data = {
_index: index.name,
_id: search_id
}
data[:routing] = record.search_routing if record.respond_to?(:search_routing)
data
end
private
def search_data(method_name = nil)
partial_reindex = !method_name.nil?
source = record.send(method_name || :search_data)
# conversions
index.conversions_fields.each do |conversions_field|
if source[conversions_field]
source[conversions_field] = source[conversions_field].map { |k, v| {query: k, count: v} }
end
end
index.conversions_v2_fields.each do |conversions_field|
key = source.key?(conversions_field) ? conversions_field : conversions_field.to_sym
if !partial_reindex || source[key]
if index.options[:case_sensitive]
source[key] =
(source[key] || {}).reduce(Hash.new(0)) do |memo, (k, v)|
memo[k.to_s.gsub(".", "*")] += v
memo
end
else
source[key] =
(source[key] || {}).reduce(Hash.new(0)) do |memo, (k, v)|
memo[k.to_s.downcase.gsub(".", "*")] += v
memo
end
end
end
end
# hack to prevent generator field doesn't exist error
if !partial_reindex
index.suggest_fields.each do |field|
if !source.key?(field) && !source.key?(field.to_sym)
source[field] = nil
end
end
end
# locations
index.locations_fields.each do |field|
if source[field]
if !source[field].is_a?(Hash) && (source[field].first.is_a?(Array) || source[field].first.is_a?(Hash))
# multiple locations
source[field] = source[field].map { |a| location_value(a) }
else
source[field] = location_value(source[field])
end
end
end
if index.options[:inheritance]
if !TYPE_KEYS.any? { |tk| source.key?(tk) }
source[:type] = document_type(true)
end
end
cast_big_decimal(source)
source
end
def location_value(value)
if value.is_a?(Array)
value.map(&:to_f).reverse
elsif value.is_a?(Hash)
{lat: value[:lat].to_f, lon: value[:lon].to_f}
else
value
end
end
# change all BigDecimal values to floats due to
# https://github.com/rails/rails/issues/6033
# possible loss of precision :/
def cast_big_decimal(obj)
case obj
when BigDecimal
obj.to_f
when Hash
obj.each do |k, v|
# performance
if v.is_a?(BigDecimal)
obj[k] = v.to_f
elsif v.is_a?(Enumerable)
obj[k] = cast_big_decimal(v)
end
end
when Enumerable
obj.map do |v|
cast_big_decimal(v)
end
else
obj
end
end
end
end
================================================
FILE: lib/searchkick/record_indexer.rb
================================================
module Searchkick
class RecordIndexer
attr_reader :index
def initialize(index)
@index = index
end
def reindex(records, mode:, method_name:, ignore_missing:, full: false, single: false, job_options: nil)
# prevents exists? check if records is a relation
records = records.to_a
return if records.empty?
case mode
when :async
unless defined?(ActiveJob)
raise Error, "Active Job not found"
end
job_options ||= {}
# only add if set for backwards compatibility
extra_options = {}
if ignore_missing
extra_options[:ignore_missing] = ignore_missing
end
# we could likely combine ReindexV2Job, BulkReindexJob, and ProcessBatchJob
# but keep them separate for now
if single
record = records.first
# always pass routing in case record is deleted
# before the async job runs
if record.respond_to?(:search_routing)
routing = record.search_routing
end
Searchkick::ReindexV2Job.set(**job_options).perform_later(
record.class.name,
record.id.to_s,
method_name ? method_name.to_s : nil,
routing: routing,
index_name: index.name,
**extra_options
)
else
Searchkick::BulkReindexJob.set(**job_options).perform_later(
class_name: records.first.class.searchkick_options[:class_name],
record_ids: records.map { |r| r.id.to_s },
index_name: index.name,
method_name: method_name ? method_name.to_s : nil,
**extra_options
)
end
when :queue
if method_name
raise Error, "Partial reindex not supported with queue option"
end
index.reindex_queue.push_records(records)
when true, :inline
index_records, other_records = records.partition { |r| index_record?(r) }
import_inline(index_records, !full ? other_records : [], method_name: method_name, ignore_missing: ignore_missing, single: single)
else
raise ArgumentError, "Invalid value for mode"
end
# return true like model and relation reindex for now
true
end
def reindex_items(klass, items, method_name:, ignore_missing:, single: false)
routing = items.to_h { |r| [r[:id], r[:routing]] }
record_ids = routing.keys
relation = Searchkick.load_records(klass, record_ids)
# call search_import even for single records for nested associations
relation = relation.search_import if relation.respond_to?(:search_import)
records = relation.select(&:should_index?)
# determine which records to delete
delete_ids = record_ids - records.map { |r| r.id.to_s }
delete_records =
delete_ids.map do |id|
construct_record(klass, id, routing[id])
end
import_inline(records, delete_records, method_name: method_name, ignore_missing: ignore_missing, single: single)
end
private
def index_record?(record)
record.persisted? && !record.destroyed? && record.should_index?
end
# import in single request with retries
def import_inline(index_records, delete_records, method_name:, ignore_missing:, single:)
return if index_records.empty? && delete_records.empty?
maybe_bulk(index_records, delete_records, method_name, single) do
if index_records.any?
if method_name
index.bulk_update(index_records, method_name, ignore_missing: ignore_missing)
else
index.bulk_index(index_records)
end
end
if delete_records.any?
index.bulk_delete(delete_records)
end
end
end
def maybe_bulk(index_records, delete_records, method_name, single)
if Searchkick.callbacks_value == :bulk
yield
else
# set action and data
action =
if single && index_records.empty?
"Remove"
elsif method_name
"Update"
else
single ? "Store" : "Import"
end
record = index_records.first || delete_records.first
name = record.class.searchkick_klass.name
message = lambda do |event|
event[:name] = "#{name} #{action}"
if single
event[:id] = index.search_id(record)
else
event[:count] = index_records.size + delete_records.size
end
end
with_retries do
Searchkick.callbacks(:bulk, message: message) do
yield
end
end
end
end
def construct_record(klass, id, routing)
record = klass.new
record.id = id
if routing
record.define_singleton_method(:search_routing) do
routing
end
end
record
end
def with_retries
retries = 0
begin
yield
rescue Faraday::ClientError => e
if retries < 1
retries += 1
retry
end
raise e
end
end
end
end
================================================
FILE: lib/searchkick/reindex_queue.rb
================================================
module Searchkick
class ReindexQueue
attr_reader :name
def initialize(name)
@name = name
raise Error, "Searchkick.redis not set" unless Searchkick.redis
end
# supports single and multiple ids
def push(record_ids)
Searchkick.with_redis { |r| r.call("LPUSH", redis_key, record_ids) }
end
def push_records(records)
record_ids =
records.map do |record|
# always pass routing in case record is deleted
# before the queue job runs
if record.respond_to?(:search_routing)
routing = record.search_routing
end
# escape pipe with double pipe
value = escape(record.id.to_s)
value = "#{value}|#{escape(routing)}" if routing
value
end
push(record_ids)
end
# TODO use reliable queuing
def reserve(limit: 1000)
Searchkick.with_redis { |r| r.call("RPOP", redis_key, limit) }.to_a
end
def clear
Searchkick.with_redis { |r| r.call("DEL", redis_key) }
end
def length
Searchkick.with_redis { |r| r.call("LLEN", redis_key) }
end
private
def redis_key
"searchkick:reindex_queue:#{name}"
end
def escape(value)
value.to_s.gsub("|", "||")
end
end
end
================================================
FILE: lib/searchkick/reindex_v2_job.rb
================================================
module Searchkick
class ReindexV2Job < Searchkick.parent_job.constantize
queue_as { Searchkick.queue_name }
def perform(class_name, id, method_name = nil, routing: nil, index_name: nil, ignore_missing: nil)
model = Searchkick.load_model(class_name, allow_child: true)
index = model.searchkick_index(name: index_name)
# use should_index? to decide whether to index (not default scope)
# just like saving inline
# could use Searchkick.scope() in future
# but keep for now for backwards compatibility
model = model.unscoped if model.respond_to?(:unscoped)
items = [{id: id, routing: routing}]
RecordIndexer.new(index).reindex_items(model, items, method_name: method_name, ignore_missing: ignore_missing, single: true)
end
end
end
================================================
FILE: lib/searchkick/relation.rb
================================================
module Searchkick
class Relation
NO_DEFAULT_VALUE = Object.new
# note: modifying body directly is not supported
# and has no impact on query after being executed
# TODO freeze body object?
delegate :params, to: :query
delegate_missing_to :private_execute
attr_reader :model
alias_method :klass, :model
def initialize(model, term = "*", **options)
@model = model
@term = term
@options = options
# generate query to validate options
query if options.any?
end
# same as Active Record
def inspect
entries = private_execute.first(11).map!(&:inspect)
entries[10] = "..." if entries.size == 11
"#<#{self.class.name} [#{entries.join(', ')}]>"
end
def aggs(*args, **kwargs)
if args.empty? && kwargs.empty?
private_execute.aggs
else
clone.aggs!(*args, **kwargs)
end
end
def aggs!(*args, **kwargs)
check_loaded
aggs = {}
args.flatten.each do |arg|
if arg.is_a?(Hash)
aggs.merge!(arg)
else
aggs[arg] = {}
end
end
aggs.merge!(kwargs)
merge_option(:aggs, aggs)
self
end
def body(value = NO_DEFAULT_VALUE)
if value == NO_DEFAULT_VALUE
query.body
else
clone.body!(value)
end
end
def body!(value)
check_loaded
@options[:body] = value
self
end
def body_options(value)
clone.body_options!(value)
end
def body_options!(value)
check_loaded
merge_option(:body_options, value)
self
end
def boost(value)
clone.boost!(value)
end
def boost!(value)
check_loaded
@options[:boost] = value
self
end
def boost_by(value)
clone.boost_by!(value)
end
def boost_by!(value)
check_loaded
if value.is_a?(Array)
value = value.to_h { |f| [f, {factor: 1}] }
elsif !value.is_a?(Hash)
value = {value => {factor: 1}}
end
merge_option(:boost_by, value)
self
end
def boost_by_distance(value)
clone.boost_by_distance!(value)
end
def boost_by_distance!(value)
check_loaded
# legacy format
value = {value[:field] => value.except(:field)} if value[:field]
merge_option(:boost_by_distance, value)
self
end
def boost_by_recency(value)
clone.boost_by_recency!(value)
end
def boost_by_recency!(value)
check_loaded
merge_option(:boost_by_recency, value)
self
end
def boost_where(value)
clone.boost_where!(value)
end
def boost_where!(value)
check_loaded
# TODO merge duplicate fields
merge_option(:boost_where, value)
self
end
def conversions(value)
clone.conversions!(value)
end
def conversions!(value)
check_loaded
@options[:conversions] = value
self
end
def conversions_v1(value)
clone.conversions_v1!(value)
end
def conversions_v1!(value)
check_loaded
@options[:conversions_v1] = value
self
end
def conversions_v2(value)
clone.conversions_v2!(value)
end
def conversions_v2!(value)
check_loaded
@options[:conversions_v2] = value
self
end
def conversions_term(value)
clone.conversions_term!(value)
end
def conversions_term!(value)
check_loaded
@options[:conversions_term] = value
self
end
def debug(value = true)
clone.debug!(value)
end
def debug!(value = true)
check_loaded
@options[:debug] = value
self
end
def emoji(value = true)
clone.emoji!(value)
end
def emoji!(value = true)
check_loaded
@options[:emoji] = value
self
end
def exclude(*values)
clone.exclude!(*values)
end
def exclude!(*values)
check_loaded
concat_option(:exclude, values.flatten)
self
end
def explain(value = true)
clone.explain!(value)
end
def explain!(value = true)
check_loaded
@options[:explain] = value
self
end
def fields(*values)
clone.fields!(*values)
end
def fields!(*values)
check_loaded
concat_option(:fields, values.flatten)
self
end
def highlight(value)
clone.highlight!(value)
end
def highlight!(value)
check_loaded
@options[:highlight] = value
self
end
def includes(*values)
clone.includes!(*values)
end
def includes!(*values)
check_loaded
concat_option(:includes, values.flatten)
self
end
def index_name(*values)
clone.index_name!(*values)
end
def index_name!(*values)
check_loaded
values = values.flatten
if values.all? { |v| v.respond_to?(:searchkick_index) }
models!(*values)
else
concat_option(:index_name, values)
self
end
end
def indices_boost(value)
clone.indices_boost!(value)
end
def indices_boost!(value)
check_loaded
merge_option(:indices_boost, value)
self
end
def knn(value)
clone.knn!(value)
end
def knn!(value)
check_loaded
@options[:knn] = value
self
end
def limit(value)
clone.limit!(value)
end
def limit!(value)
check_loaded
@options[:limit] = value
self
end
def load(value = NO_DEFAULT_VALUE)
if value == NO_DEFAULT_VALUE
private_execute
self
else
clone.load!(value)
end
end
def load!(value)
check_loaded
@options[:load] = value
self
end
def match(value)
clone.match!(value)
end
def match!(value)
check_loaded
@options[:match] = value
self
end
def misspellings(value)
clone.misspellings!(value)
end
def misspellings!(value)
check_loaded
@options[:misspellings] = value
self
end
def models(*values)
clone.models!(*values)
end
def models!(*values)
check_loaded
concat_option(:models, values.flatten)
self
end
def model_includes(*values)
clone.model_includes!(*values)
end
def model_includes!(*values)
check_loaded
concat_option(:model_includes, values.flatten)
self
end
def offset(value = NO_DEFAULT_VALUE)
if value == NO_DEFAULT_VALUE
private_execute.offset
else
clone.offset!(value)
end
end
def offset!(value)
check_loaded
@options[:offset] = value
self
end
def opaque_id(value)
clone.opaque_id!(value)
end
def opaque_id!(value)
check_loaded
@options[:opaque_id] = value
self
end
def operator(value)
clone.operator!(value)
end
def operator!(value)
check_loaded
@options[:operator] = value
self
end
def order(*values)
clone.order!(*values)
end
def order!(*values)
check_loaded
concat_option(:order, values.flatten)
self
end
def padding(value = NO_DEFAULT_VALUE)
if value == NO_DEFAULT_VALUE
private_execute.padding
else
clone.padding!(value)
end
end
def padding!(value)
check_loaded
@options[:padding] = value
self
end
def page(value)
clone.page!(value)
end
def page!(value)
check_loaded
@options[:page] = value
self
end
def per_page(value = NO_DEFAULT_VALUE)
if value == NO_DEFAULT_VALUE
private_execute.per_page
else
clone.per_page!(value)
end
end
def per(value)
per_page(value)
end
def per_page!(value)
check_loaded
# TODO set limit?
@options[:per_page] = value
self
end
def profile(value = true)
clone.profile!(value)
end
def profile!(value = true)
check_loaded
@options[:profile] = value
self
end
def request_params(value)
clone.request_params!(value)
end
def request_params!(value)
check_loaded
merge_option(:request_params, value)
self
end
def routing(value)
clone.routing!(value)
end
def routing!(value)
check_loaded
@options[:routing] = value
self
end
def scope_results(value)
clone.scope_results!(value)
end
def scope_results!(value)
check_loaded
@options[:scope_results] = value
self
end
def scroll(value = NO_DEFAULT_VALUE, &block)
if value == NO_DEFAULT_VALUE
private_execute.scroll(&block)
elsif block_given?
clone.scroll!(value).scroll(&block)
else
clone.scroll!(value)
end
end
def scroll!(value)
check_loaded
@options[:scroll] = value
self
end
def select(*values, &block)
if block_given?
private_execute.select(*values, &block)
else
clone.select!(*values)
end
end
def select!(*values)
check_loaded
concat_option(:select, values.flatten)
self
end
def similar(value = true)
clone.similar!(value)
end
def similar!(value = true)
check_loaded
@options[:similar] = value
self
end
def smart_aggs(value)
clone.smart_aggs!(value)
end
def smart_aggs!(value)
check_loaded
@options[:smart_aggs] = value
self
end
def suggest(value = true)
clone.suggest!(value)
end
def suggest!(value = true)
check_loaded
@options[:suggest] = value
self
end
def total_entries(value = NO_DEFAULT_VALUE)
if value == NO_DEFAULT_VALUE
private_execute.total_entries
else
clone.total_entries!(value)
end
end
def total_entries!(value)
check_loaded
@options[:total_entries] = value
self
end
def track(value = true)
clone.track!(value)
end
def track!(value = true)
check_loaded
@options[:track] = value
self
end
def type(*values)
clone.type!(*values)
end
def type!(*values)
check_loaded
concat_option(:type, values.flatten)
self
end
def where(value = NO_DEFAULT_VALUE)
if value == NO_DEFAULT_VALUE
Where.new(self)
else
clone.where!(value)
end
end
def where!(value)
check_loaded
value = ensure_permitted(value)
if @options[:where]
# keep simple when possible for smart aggs
if !@options[:where].keys.intersect?(value.keys)
merge_option(:where, value)
elsif @options[:where][:_and].is_a?(Array)
merge_option(:where, {_and: @options[:where][:_and] + [value]})
else
@options[:where] = {_and: [@options[:where], value]}
end
else
@options[:where] = value
end
self
end
def first(value = NO_DEFAULT_VALUE)
result =
if loaded?
private_execute
else
limit = value == NO_DEFAULT_VALUE ? 1 : value
previous_limit = (@options[:limit] || @options[:per_page])&.to_i
if previous_limit && previous_limit < limit
limit = previous_limit
end
limit(limit).load
end
if value == NO_DEFAULT_VALUE
result.first
else
result.first(value)
end
end
def pluck(*keys)
if !loaded? && @options[:load] == false
select(*keys).send(:private_execute).pluck(*keys)
else
private_execute.pluck(*keys)
end
end
def reorder(*values)
clone.reorder!(*values)
end
def reorder!(*values)
check_loaded
@options[:order] = values
self
end
def reselect(*values)
clone.reselect!(*values)
end
def reselect!(*values)
check_loaded
@options[:select] = values
self
end
def rewhere(value)
clone.rewhere!(value)
end
def rewhere!(value)
check_loaded
@options[:where] = ensure_permitted(value)
self
end
def only(*keys)
Relation.new(@model, @term, **@options.slice(*keys))
end
def except(*keys)
Relation.new(@model, @term, **@options.except(*keys))
end
def loaded?
!@execute.nil?
end
undef_method :respond_to_missing?
def respond_to_missing?(...)
Results.new(nil, nil, nil).respond_to?(...) || super
end
# TODO uncomment in 7.0
# def to_json(...)
# private_execute.to_a.to_json(...)
# end
# TODO uncomment in 7.0
# def as_json(...)
# private_execute.to_a.as_json(...)
# end
def to_yaml
private_execute.to_a.to_yaml
end
private
def private_execute
@execute ||= query.execute
end
def query
@query ||= Query.new(@model, @term, **@options)
end
def check_loaded
raise Error, "Relation loaded" if loaded?
# reset query since options will change
@query = nil
end
# provides *very* basic protection from unfiltered parameters
# this is not meant to be comprehensive and may be expanded in the future
def ensure_permitted(obj)
obj.to_h
end
def initialize_copy(other)
super
# shallow dup and avoid updating values in-place
@options = @options.dup
@execute = nil
end
def concat_option(key, value)
if @options[key]
@options[key] += value
else
@options[key] = value.to_ary
end
end
def merge_option(key, value)
if @options[key]
@options[key] = @options[key].merge(value)
else
@options[key] = value.to_hash
end
end
end
end
================================================
FILE: lib/searchkick/relation_indexer.rb
================================================
module Searchkick
class RelationIndexer
attr_reader :index
def initialize(index)
@index = index
end
def reindex(relation, mode:, method_name: nil, ignore_missing: nil, full: false, resume: false, scope: nil, job_options: nil)
# apply scopes
if scope
relation = relation.send(scope)
elsif relation.respond_to?(:search_import)
relation = relation.search_import
end
# remove unneeded loading for async and queue
if mode == :async || mode == :queue
if relation.respond_to?(:primary_key)
relation = relation.except(:includes, :preload)
unless mode == :queue && relation.klass.method_defined?(:search_routing)
relation = relation.except(:select).select(relation.primary_key)
end
elsif relation.respond_to?(:only)
unless mode == :queue && relation.klass.method_defined?(:search_routing)
relation = relation.only(:_id)
end
end
end
if mode == :async && full
return full_reindex_async(relation, job_options: job_options)
end
relation = resume_relation(relation) if resume
reindex_options = {
mode: mode,
method_name: method_name,
full: full,
ignore_missing: ignore_missing,
job_options: job_options
}
record_indexer = RecordIndexer.new(index)
in_batches(relation) do |items|
record_indexer.reindex(items, **reindex_options)
end
end
def batches_left
Searchkick.with_redis { |r| r.call("SCARD", batches_key) }
end
def batch_completed(batch_id)
Searchkick.with_redis { |r| r.call("SREM", batches_key, [batch_id]) }
end
private
def resume_relation(relation)
if relation.respond_to?(:primary_key)
# use total docs instead of max id since there's not a great way
# to get the max _id without scripting since it's a string
where = relation.arel_table[relation.primary_key].gt(index.total_docs)
relation = relation.where(where)
else
raise Error, "Resume not supported for Mongoid"
end
end
def in_batches(relation)
if relation.respond_to?(:find_in_batches)
klass = relation.klass
# remove order to prevent possible warnings
relation.except(:order).find_in_batches(batch_size: batch_size) do |batch|
# prevent scope from affecting search_data as well as inline jobs
# Active Record runs relation calls in scoping block
# https://github.com/rails/rails/blob/main/activerecord/lib/active_record/relation/delegation.rb
# note: we could probably just call klass.current_scope = nil
# anywhere in reindex method (after initial all call),
# but this is more cautious
previous_scope = klass.current_scope(true)
if previous_scope
begin
klass.current_scope = nil
yield batch
ensure
klass.current_scope = previous_scope
end
else
yield batch
end
end
else
klass = relation.klass
each_batch(relation, batch_size: batch_size) do |batch|
# prevent scope from affecting search_data as well as inline jobs
# note: Model.with_scope doesn't always restore scope, so use custom logic
previous_scope = Mongoid::Threaded.current_scope(klass)
if previous_scope
begin
Mongoid::Threaded.set_current_scope(nil, klass)
yield batch
ensure
Mongoid::Threaded.set_current_scope(previous_scope, klass)
end
else
yield batch
end
end
end
end
def each_batch(relation, batch_size:)
# https://github.com/karmi/tire/blob/master/lib/tire/model/import.rb
# use cursor for Mongoid
items = []
relation.all.each do |item|
items << item
if items.length == batch_size
yield items
items = []
end
end
yield items if items.any?
end
def batch_size
@batch_size ||= index.options[:batch_size] || 1000
end
def full_reindex_async(relation, job_options: nil)
batch_id = 1
class_name = relation.searchkick_options[:class_name]
starting_id = false
if relation.respond_to?(:primary_key)
primary_key = relation.primary_key
starting_id =
begin
relation.minimum(primary_key)
rescue ActiveRecord::StatementInvalid
false
end
end
if starting_id.nil?
# no records, do nothing
elsif starting_id.is_a?(Numeric)
max_id = relation.maximum(primary_key)
batches_count = ((max_id - starting_id + 1) / batch_size.to_f).ceil
batches_count.times do |i|
min_id = starting_id + (i * batch_size)
batch_job(class_name, batch_id, job_options, min_id: min_id, max_id: min_id + batch_size - 1)
batch_id += 1
end
else
in_batches(relation) do |items|
batch_job(class_name, batch_id, job_options, record_ids: items.map(&:id).map { |v| v.instance_of?(Integer) ? v : v.to_s })
batch_id += 1
end
end
end
def batch_job(class_name, batch_id, job_options, **options)
job_options ||= {}
# TODO expire Redis key
Searchkick.with_redis { |r| r.call("SADD", batches_key, [batch_id]) }
Searchkick::BulkReindexJob.set(**job_options).perform_later(
class_name: class_name,
index_name: index.name,
batch_id: batch_id,
**options
)
end
def batches_key
"searchkick:reindex:#{index.name}:batches"
end
end
end
================================================
FILE: lib/searchkick/reranking.rb
================================================
module Searchkick
module Reranking
def self.rrf(first_ranking, *rankings, k: 60)
rankings.unshift(first_ranking)
rankings.map!(&:to_ary)
ranks = []
results = []
rankings.each do |ranking|
ranks << ranking.map.with_index.to_h { |v, i| [v, i + 1] }
results.concat(ranking)
end
results =
results.uniq.map do |result|
score =
ranks.sum do |rank|
r = rank[result]
r ? 1.0 / (k + r) : 0.0
end
{result: result, score: score}
end
results.sort_by { |v| -v[:score] }
end
end
end
================================================
FILE: lib/searchkick/results.rb
================================================
module Searchkick
class Results
include Enumerable
extend Forwardable
attr_reader :response
def_delegators :results, :each, :any?, :empty?, :size, :length, :slice, :[], :to_ary
def initialize(klass, response, options = {})
@klass = klass
@response = response
@options = options
end
def with_hit
return enum_for(:with_hit) unless block_given?
build_hits.each do |result|
yield result
end
end
def missing_records
@missing_records ||= with_hit_and_missing_records[1]
end
def suggestions
if response["suggest"]
response["suggest"].values.flat_map { |v| v.first["options"] }.sort_by { |o| -o["score"] }.map { |o| o["text"] }.uniq
elsif options[:suggest]
[]
else
raise "Pass `suggest: true` to the search method for suggestions"
end
end
def aggregations
response["aggregations"]
end
def aggs
@aggs ||= begin
if aggregations
aggregations.dup.each do |field, filtered_agg|
buckets = filtered_agg[field]
# move the buckets one level above into the field hash
if buckets
filtered_agg.delete(field)
filtered_agg.merge!(buckets)
end
end
end
end
end
def took
response["took"]
end
def error
response["error"]
end
def model_name
if klass.nil?
ActiveModel::Name.new(self.class, nil, 'Result')
else
klass.model_name
end
end
def entry_name(options = {})
if options.empty?
# backward compatibility
model_name.human.downcase
else
default = options[:count] == 1 ? model_name.human : model_name.human.pluralize
model_name.human(options.reverse_merge(default: default))
end
end
def total_count
if options[:total_entries]
options[:total_entries]
elsif response["hits"]["total"].is_a?(Hash)
response["hits"]["total"]["value"]
else
response["hits"]["total"]
end
end
alias_method :total_entries, :total_count
def current_page
options[:page]
end
def per_page
options[:per_page]
end
alias_method :limit_value, :per_page
def padding
options[:padding]
end
def total_pages
(total_count / per_page.to_f).ceil
end
alias_method :num_pages, :total_pages
def offset_value
(current_page - 1) * per_page + padding
end
alias_method :offset, :offset_value
def previous_page
current_page > 1 ? (current_page - 1) : nil
end
alias_method :prev_page, :previous_page
def next_page
current_page < total_pages ? (current_page + 1) : nil
end
def first_page?
previous_page.nil?
end
def last_page?
next_page.nil?
end
def out_of_range?
current_page > total_pages
end
def hits
if error
raise Error, "Query error - use the error method to view it"
else
@response["hits"]["hits"]
end
end
def highlights(multiple: false)
hits.map do |hit|
hit_highlights(hit, multiple: multiple)
end
end
def with_highlights(multiple: false)
return enum_for(:with_highlights, multiple: multiple) unless block_given?
with_hit.each do |result, hit|
yield result, hit_highlights(hit, multiple: multiple)
end
end
def with_score
return enum_for(:with_score) unless block_given?
with_hit.each do |result, hit|
yield result, hit["_score"]
end
end
def misspellings?
@options[:misspellings]
end
def scroll_id
@response["_scroll_id"]
end
def scroll
raise Error, "Pass `scroll` option to the search method for scrolling" unless scroll_id
if block_given?
records = self
while records.any?
yield records
records = records.scroll
end
records.clear_scroll
else
begin
# TODO Active Support notifications for this scroll call
params = {
scroll: options[:scroll],
body: {scroll_id: scroll_id}
}
params[:opaque_id] = options[:opaque_id] if options[:opaque_id]
Results.new(@klass, Searchkick.client.scroll(params), @options)
rescue => e
if Searchkick.not_found_error?(e) && e.message =~ /search_context_missing_exception/i
raise Error, "Scroll id has expired"
else
raise e
end
end
end
end
def clear_scroll
begin
# try to clear scroll
# not required as scroll will expire
# but there is a cost to open scrolls
Searchkick.client.clear_scroll(scroll_id: scroll_id)
rescue => e
raise e unless Searchkick.transport_error?(e)
end
end
private
attr_reader :klass, :options
def results
@results ||= with_hit.map(&:first)
end
def with_hit_and_missing_records
@with_hit_and_missing_records ||= begin
missing_records = []
if options[:load]
grouped_hits = hits.group_by { |hit, _| hit["_index"] }
# determine models
index_models = {}
grouped_hits.each do |index, _|
models =
if @klass
[@klass]
else
index_alias = index.split("_")[0..-2].join("_")
Array((options[:index_mapping] || {})[index_alias])
end
raise Error, "Unknown model for index: #{index}. Pass the `models` option to the search method." unless models.any?
index_models[index] = models
end
# fetch results
results = {}
grouped_hits.each do |index, index_hits|
results[index] = {}
index_models[index].each do |model|
results[index].merge!(results_query(model, index_hits).to_a.index_by { |r| r.id.to_s })
end
end
# sort
results =
hits.map do |hit|
result = results[hit["_index"]][hit["_id"].to_s]
if result && !(options[:load].is_a?(Hash) && options[:load][:dumpable])
if (hit["highlight"] || options[:highlight]) && !result.respond_to?(:search_highlights)
highlights = hit_highlights(hit)
result.define_singleton_method(:search_highlights) do
highlights
end
end
end
[result, hit]
end.select do |result, hit|
unless result
models = index_models[hit["_index"]]
missing_records << {
id: hit["_id"],
# may be multiple models for inheritance with child models
# not ideal to return different types
# but this situation shouldn't be common
model: models.size == 1 ? models.first : models
}
end
result
end
else
results =
hits.map do |hit|
result =
if hit["_source"]
hit.except("_source").merge(hit["_source"])
elsif hit["fields"]
hit.except("fields").merge(hit["fields"])
else
hit
end
if hit["highlight"] || options[:highlight]
highlight = hit["highlight"].to_a.to_h { |k, v| [base_field(k), v.first] }
options[:highlighted_fields].map { |k| base_field(k) }.each do |k|
result["highlighted_#{k}"] ||= (highlight[k] || result[k])
end
end
result["id"] ||= result["_id"] # needed for legacy reasons
[HashWrapper.new(result), hit]
end
end
[results, missing_records]
end
end
def build_hits
@build_hits ||= begin
if missing_records.any?
Searchkick.warn("Records in search index do not exist in database: #{missing_records.map { |v| "#{Array(v[:model]).map(&:model_name).sort.join("/")} #{v[:id]}" }.join(", ")}")
end
with_hit_and_missing_records[0]
end
end
def results_query(records, hits)
records = Searchkick.scope(records)
ids = hits.map { |hit| hit["_id"] }
if options[:includes] || options[:model_includes]
included_relations = []
combine_includes(included_relations, options[:includes])
combine_includes(included_relations, options[:model_includes][records]) if options[:model_includes]
records = records.includes(included_relations)
end
if options[:scope_results]
records = options[:scope_results].call(records)
end
Searchkick.load_records(records, ids)
end
def combine_includes(result, inc)
if inc
if inc.is_a?(Array)
result.concat(inc)
else
result << inc
end
end
end
def base_field(k)
k.sub(/\.(analyzed|word_start|word_middle|word_end|text_start|text_middle|text_end|exact)\z/, "")
end
def hit_highlights(hit, multiple: false)
if hit["highlight"]
hit["highlight"].to_h { |k, v| [(options[:json] ? k : k.sub(/\.#{@options[:match_suffix]}\z/, "")).to_sym, multiple ? v : v.first] }
else
{}
end
end
end
end
================================================
FILE: lib/searchkick/script.rb
================================================
module Searchkick
class Script
attr_reader :source, :lang, :params
def initialize(source, lang: "painless", params: {})
@source = source
@lang = lang
@params = params
end
end
end
================================================
FILE: lib/searchkick/version.rb
================================================
module Searchkick
VERSION = "6.1.0"
end
================================================
FILE: lib/searchkick/where.rb
================================================
module Searchkick
class Where
def initialize(relation)
@relation = relation
end
def not(value)
@relation.where(_not: value)
end
end
end
================================================
FILE: lib/searchkick.rb
================================================
# dependencies
require "active_support"
require "active_support/core_ext/hash/deep_merge"
require "active_support/core_ext/module/attr_internal"
require "active_support/core_ext/module/delegation"
require "active_support/deprecation"
require "active_support/log_subscriber"
require "active_support/notifications"
# stdlib
require "forwardable"
# modules
require_relative "searchkick/controller_runtime"
require_relative "searchkick/index"
require_relative "searchkick/index_cache"
require_relative "searchkick/index_options"
require_relative "searchkick/indexer"
require_relative "searchkick/hash_wrapper"
require_relative "searchkick/log_subscriber"
require_relative "searchkick/model"
require_relative "searchkick/multi_search"
require_relative "searchkick/query"
require_relative "searchkick/reindex_queue"
require_relative "searchkick/record_data"
require_relative "searchkick/record_indexer"
require_relative "searchkick/relation"
require_relative "searchkick/relation_indexer"
require_relative "searchkick/reranking"
require_relative "searchkick/results"
require_relative "searchkick/script"
require_relative "searchkick/version"
require_relative "searchkick/where"
# integrations
require_relative "searchkick/railtie" if defined?(Rails)
module Searchkick
# requires faraday
autoload :Middleware, "searchkick/middleware"
# background jobs
autoload :BulkReindexJob, "searchkick/bulk_reindex_job"
autoload :ProcessBatchJob, "searchkick/process_batch_job"
autoload :ProcessQueueJob, "searchkick/process_queue_job"
autoload :ReindexV2Job, "searchkick/reindex_v2_job"
# errors
class Error < StandardError; end
class MissingIndexError < Error; end
class UnsupportedVersionError < Error
def message
"This version of Searchkick requires Elasticsearch 8+ or OpenSearch 2+"
end
end
class InvalidQueryError < Error; end
class DangerousOperation < Error; end
class ImportError < Error; end
class << self
attr_accessor :search_method_name, :timeout, :models, :client_options, :redis, :index_prefix, :index_suffix, :queue_name, :model_options, :client_type, :parent_job
attr_writer :client, :env, :search_timeout
attr_reader :aws_credentials
end
self.search_method_name = :search
self.timeout = 10
self.models = []
self.client_options = {}
self.queue_name = :searchkick
self.model_options = {}
self.parent_job = "ActiveJob::Base"
def self.client
@client ||= begin
client_type =
if self.client_type
self.client_type
elsif defined?(OpenSearch::Client) && defined?(Elasticsearch::Client)
raise Error, "Multiple clients found - set Searchkick.client_type = :elasticsearch or :opensearch"
elsif defined?(OpenSearch::Client)
:opensearch
elsif defined?(Elasticsearch::Client)
:elasticsearch
else
raise Error, "No client found - install the `elasticsearch` or `opensearch-ruby` gem"
end
if client_type == :opensearch
OpenSearch::Client.new({
url: ENV["OPENSEARCH_URL"],
transport_options: {request: {timeout: timeout}},
retry_on_failure: 2
}.deep_merge(client_options)) do |f|
f.use Searchkick::Middleware
f.request :aws_sigv4, signer_middleware_aws_params if aws_credentials
end
else
raise Error, "The `elasticsearch` gem must be 8+" if Elasticsearch::VERSION.to_i < 8
Elasticsearch::Client.new({
url: ENV["ELASTICSEARCH_URL"],
transport_options: {request: {timeout: timeout}},
retry_on_failure: 2
}.deep_merge(client_options)) do |f|
f.use Searchkick::Middleware
f.request :aws_sigv4, signer_middleware_aws_params if aws_credentials
end
end
end
end
def self.env
@env ||= ENV["RAILS_ENV"] || ENV["RACK_ENV"] || "development"
end
def self.search_timeout
(defined?(@search_timeout) && @search_timeout) || timeout
end
# private
def self.server_info
@server_info ||= client.info
end
def self.server_version
@server_version ||= server_info["version"]["number"]
end
def self.opensearch?
unless defined?(@opensearch)
@opensearch = server_info["version"]["distribution"] == "opensearch"
end
@opensearch
end
def self.server_below?(version)
Gem::Version.new(server_version.split("-")[0]) < Gem::Version.new(version.split("-")[0])
end
# private
def self.knn_support?
if opensearch?
!server_below?("2.4.0")
else
!server_below?("8.6.0")
end
end
def self.search(term = "*", model: nil, **options, &block)
options = options.dup
klass = model
# convert index_name into models if possible
# this should allow for easier upgrade
if options[:index_name] && !options[:models] && Array(options[:index_name]).all? { |v| v.respond_to?(:searchkick_index) }
options[:models] = options.delete(:index_name)
end
# make Searchkick.search(models: [Product]) and Product.search equivalent
unless klass
models = Array(options[:models])
if models.size == 1
klass = models.first
options.delete(:models)
end
end
if klass
if (options[:models] && Array(options[:models]) != [klass]) || Array(options[:index_name]).any? { |v| v.respond_to?(:searchkick_index) && v != klass }
raise ArgumentError, "Use Searchkick.search to search multiple models"
end
end
options = options.merge(block: block) if block
Relation.new(klass, term, **options)
end
def self.multi_search(queries, opaque_id: nil)
return if queries.empty?
queries = queries.map { |q| q.send(:query) }
event = {
name: "Multi Search",
body: queries.flat_map { |q| [q.params.except(:body).to_json, q.body.to_json] }.map { |v| "#{v}\n" }.join
}
ActiveSupport::Notifications.instrument("multi_search.searchkick", event) do
MultiSearch.new(queries, opaque_id: opaque_id).perform
end
end
# script
# experimental
def self.script(source, **options)
Script.new(source, **options)
end
# callbacks
def self.enable_callbacks
self.callbacks_value = nil
end
def self.disable_callbacks
self.callbacks_value = false
end
def self.callbacks?(default: true)
if callbacks_value.nil?
default
else
callbacks_value != false
end
end
# message is private
def self.callbacks(value = nil, message: nil)
if block_given?
previous_value = callbacks_value
begin
self.callbacks_value = value
result = yield
if callbacks_value == :bulk && indexer.queued_items.any?
event = {}
if message
message.call(event)
else
event[:name] = "Bulk"
event[:count] = indexer.queued_items.size
end
ActiveSupport::Notifications.instrument("request.searchkick", event) do
indexer.perform
end
end
result
ensure
self.callbacks_value = previous_value
end
else
self.callbacks_value = value
end
end
def self.aws_credentials=(creds)
require "faraday_middleware/aws_sigv4"
@aws_credentials = creds
@client = nil # reset client
end
def self.reindex_status(index_name)
raise Error, "Redis not configured" unless redis
batches_left = Index.new(index_name).batches_left
{
completed: batches_left == 0,
batches_left: batches_left
}
end
def self.with_redis
if redis
if redis.respond_to?(:with)
redis.with do |r|
yield r
end
else
yield redis
end
end
end
def self.warn(message)
super("[searchkick] WARNING: #{message}")
end
# private
def self.load_records(relation, ids)
relation =
if relation.respond_to?(:primary_key)
primary_key = relation.primary_key
raise Error, "Need primary key to load records" if !primary_key
relation.where(primary_key => ids)
elsif relation.respond_to?(:queryable)
relation.queryable.for_ids(ids)
end
raise Error, "Not sure how to load records" if !relation
relation
end
# public (for reindexing conversions)
def self.load_model(class_name, allow_child: false)
model = class_name.safe_constantize
raise Error, "Could not find class: #{class_name}" unless model
if allow_child
unless model.respond_to?(:searchkick_klass)
raise Error, "#{class_name} is not a searchkick model"
end
else
unless Searchkick.models.include?(model)
raise Error, "#{class_name} is not a searchkick model"
end
end
model
end
# private
def self.indexer
Thread.current[:searchkick_indexer] ||= Indexer.new
end
# private
def self.callbacks_value
Thread.current[:searchkick_callbacks_enabled]
end
# private
def self.callbacks_value=(value)
Thread.current[:searchkick_callbacks_enabled] = value
end
# private
def self.signer_middleware_aws_params
{service: "es", region: "us-east-1"}.merge(aws_credentials)
end
# private
# methods are forwarded to base class
# this check to see if scope exists on that class
# it's a bit tricky, but this seems to work
def self.relation?(klass)
if klass.respond_to?(:current_scope)
!klass.current_scope.nil?
else
klass.is_a?(Mongoid::Criteria) || !Mongoid::Threaded.current_scope(klass).nil?
end
end
# private
def self.scope(model)
# safety check to make sure used properly in code
raise Error, "Cannot scope relation" if relation?(model)
if model.searchkick_options[:unscope]
model.unscoped
else
model
end
end
# private
def self.not_found_error?(e)
(defined?(Elastic::Transport) && e.is_a?(Elastic::Transport::Transport::Errors::NotFound)) ||
(defined?(Elasticsearch::Transport) && e.is_a?(Elasticsearch::Transport::Transport::Errors::NotFound)) ||
(defined?(OpenSearch) && e.is_a?(OpenSearch::Transport::Transport::Errors::NotFound))
end
# private
def self.transport_error?(e)
(defined?(Elastic::Transport) && e.is_a?(Elastic::Transport::Transport::Error)) ||
(defined?(Elasticsearch::Transport) && e.is_a?(Elasticsearch::Transport::Transport::Error)) ||
(defined?(OpenSearch) && e.is_a?(OpenSearch::Transport::Transport::Error))
end
# private
def self.not_allowed_error?(e)
(defined?(Elastic::Transport) && e.is_a?(Elastic::Transport::Transport::Errors::MethodNotAllowed)) ||
(defined?(Elasticsearch::Transport) && e.is_a?(Elasticsearch::Transport::Transport::Errors::MethodNotAllowed)) ||
(defined?(OpenSearch) && e.is_a?(OpenSearch::Transport::Transport::Errors::MethodNotAllowed))
end
end
ActiveSupport.on_load(:active_record) do
extend Searchkick::Model
end
ActiveSupport.on_load(:mongoid) do
Mongoid::Document::ClassMethods.include Searchkick::Model
end
ActiveSupport.on_load(:action_controller) do
include Searchkick::ControllerRuntime
end
Searchkick::LogSubscriber.attach_to :searchkick
================================================
FILE: lib/tasks/searchkick.rake
================================================
namespace :searchkick do
desc "reindex a model (specify CLASS)"
task reindex: :environment do
class_name = ENV["CLASS"]
abort "USAGE: rake searchkick:reindex CLASS=Product" unless class_name
model =
begin
Searchkick.load_model(class_name)
rescue Searchkick::Error => e
abort e.message
end
puts "Reindexing #{model.name}..."
model.reindex
puts "Reindex successful"
end
namespace :reindex do
desc "reindex all models"
task all: :environment do
# eager load models to populate Searchkick.models
if Rails.respond_to?(:autoloaders) && Rails.autoloaders.zeitwerk_enabled?
# fix for https://github.com/rails/rails/issues/37006
Zeitwerk::Loader.eager_load_all
else
Rails.application.eager_load!
end
Searchkick.models.each do |model|
puts "Reindexing #{model.name}..."
model.reindex
end
puts "Reindex complete"
end
end
end
================================================
FILE: searchkick.gemspec
================================================
require_relative "lib/searchkick/version"
Gem::Specification.new do |spec|
spec.name = "searchkick"
spec.version = Searchkick::VERSION
spec.summary = "Intelligent search made easy with Rails and Elasticsearch or OpenSearch"
spec.homepage = "https://github.com/ankane/searchkick"
spec.license = "MIT"
spec.author = "Andrew Kane"
spec.email = "andrew@ankane.org"
spec.files = Dir["*.{md,txt}", "{lib}/**/*"]
spec.require_path = "lib"
spec.required_ruby_version = ">= 3.2"
spec.add_dependency "activemodel", ">= 7.2"
end
================================================
FILE: test/aggs_test.rb
================================================
require_relative "test_helper"
class AggsTest < Minitest::Test
def setup
super
store [
{name: "Product Show", latitude: 37.7833, longitude: 12.4167, store_id: 1, in_stock: true, color: "blue", price: 21, created_at: 2.days.ago},
{name: "Product Hide", latitude: 29.4167, longitude: -98.5000, store_id: 2, in_stock: false, color: "green", price: 25, created_at: 2.days.from_now},
{name: "Product B", latitude: 43.9333, longitude: -122.4667, store_id: 2, in_stock: false, color: "red", price: 5, created_at: Time.now},
{name: "Foo", latitude: 43.9333, longitude: 12.4667, store_id: 3, in_stock: false, color: "yellow", price: 15, created_at: Time.now}
]
end
def test_single
assert_aggs ({"store_id" => {1 => 1, 2 => 2}}), aggs: [:store_id]
assert_aggs ({"store_id" => {1 => 1, 2 => 2}}), Product.search("Product").aggs(:store_id)
end
def test_multiple
expected = {"store_id" => {1 => 1, 2 => 2}, "color" => {"blue" => 1, "green" => 1, "red" => 1}}
assert_aggs expected, aggs: [:store_id, :color]
assert_aggs expected, Product.search("Product").aggs(:store_id, :color)
assert_aggs expected, Product.search("Product").aggs([:store_id, :color])
end
def test_multiple_where
expected = {"store_id" => {1 => 1}, "color" => {"blue" => 1, "green" => 1, "red" => 1}}
assert_aggs expected, aggs: {color: {}, store_id: {where: {in_stock: true}}}
assert_aggs expected, Product.search("Product").aggs(:color, store_id: {where: {in_stock: true}})
end
def test_none
assert_nil Product.search("*").aggs
end
def test_where
assert_aggs ({"store_id" => {1 => 1}}), aggs: {store_id: {where: {in_stock: true}}}
assert_aggs ({"store_id" => {1 => 1}}), Product.search("Product").aggs(store_id: {where: {in_stock: true}})
assert_aggs ({"store_id" => {1 => 1}}), Product.search("Product").aggs({store_id: {where: {in_stock: true}}})
assert_aggs ({"store_id" => {1 => 1}}), aggs: {store_id: {where: {_not: {in_stock: false}}}}
end
def test_field
assert_aggs ({"store_id" => {1 => 1, 2 => 2}}), aggs: {store_id: {}}
assert_aggs ({"store_id" => {1 => 1, 2 => 2}}), aggs: {store_id: {field: "store_id"}}
assert_aggs ({"store_id_new" => {1 => 1, 2 => 2}}), aggs: {store_id_new: {field: "store_id"}}
end
def test_min_doc_count
assert_aggs ({"store_id" => {2 => 2}}), aggs: {store_id: {min_doc_count: 2}}
end
def test_script
expected = {"color" => {"Color: blue" => 1, "Color: green" => 1, "Color: red" => 1}}
assert_aggs expected, aggs: {color: {script: {source: "'Color: ' + _value"}}}
end
def test_order
agg = Product.search("Product", aggs: {color: {order: {_key: "desc"}}}).aggs["color"]
assert_equal ["red", "green", "blue"], agg["buckets"].map { |b| b["key"] }
end
def test_limit
agg = Product.search("Product", aggs: {store_id: {limit: 1}}).aggs["store_id"]
assert_equal 1, agg["buckets"].size
# assert_equal 3, agg["doc_count"]
assert_equal(1, agg["sum_other_doc_count"])
end
def test_ranges
price_ranges = [{to: 10}, {from: 10, to: 20}, {from: 20}]
agg = Product.search("Product", aggs: {price: {ranges: price_ranges}}).aggs["price"]
assert_equal 3, agg["buckets"].size
assert_equal 10.0, agg["buckets"][0]["to"]
assert_equal 20.0, agg["buckets"][2]["from"]
assert_equal 1, agg["buckets"][0]["doc_count"]
assert_equal 0, agg["buckets"][1]["doc_count"]
assert_equal 2, agg["buckets"][2]["doc_count"]
end
def test_date_ranges
ranges = [{to: 1.day.ago}, {from: 1.day.ago, to: 1.day.from_now}, {from: 1.day.from_now}]
agg = Product.search("Product", aggs: {created_at: {date_ranges: ranges}}).aggs["created_at"]
assert_equal 1, agg["buckets"][0]["doc_count"]
assert_equal 1, agg["buckets"][1]["doc_count"]
assert_equal 1, agg["buckets"][2]["doc_count"]
end
def test_group_by_date
store [{name: "Old Product", created_at: 3.years.ago}]
aggs = {products_per_year: {date_histogram: {field: :created_at, calendar_interval: :year}}}
products = Product.search("Product", where: {created_at: {lt: Time.now}}, aggs: aggs)
assert_equal 4, products.aggs["products_per_year"]["buckets"].size
end
def test_time_zone
start_time = Time.at(1529366400)
store [
{name: "Opera House Pass", created_at: start_time},
{name: "London Eye Pass", created_at: start_time + 16.hours},
{name: "London Tube Pass", created_at: start_time + 16.hours}
]
london_aggs = {products_per_day: {date_histogram: {field: :created_at, calendar_interval: :day, time_zone: "+01:00"}}}
expected = [
{"key_as_string" => "2018-06-19T00:00:00.000+01:00", "key" => 1529362800000, "doc_count" => 3}
]
assert_equal expected, Product.search("Pass", aggs: london_aggs).aggs["products_per_day"]["buckets"]
sydney_aggs = {products_per_day: {date_histogram: {field: :created_at, calendar_interval: :day, time_zone: "+10:00"}}}
expected = [
{"key_as_string" => "2018-06-19T00:00:00.000+10:00", "key" => 1529330400000, "doc_count" => 1},
{"key_as_string" => "2018-06-20T00:00:00.000+10:00", "key" => 1529416800000, "doc_count" => 2}
]
assert_equal expected, Product.search("Pass", aggs: sydney_aggs).aggs["products_per_day"]["buckets"]
end
def test_avg
products = Product.search("*", aggs: {avg_price: {avg: {field: :price}}})
assert_equal 16.5, products.aggs["avg_price"]["value"]
end
def test_cardinality
products = Product.search("*", aggs: {total_stores: {cardinality: {field: :store_id}}})
assert_equal 3, products.aggs["total_stores"]["value"]
end
def test_min_max
products = Product.search("*", aggs: {min_price: {min: {field: :price}}, max_price: {max: {field: :price}}})
assert_equal 5, products.aggs["min_price"]["value"]
assert_equal 25, products.aggs["max_price"]["value"]
end
def test_sum
products = Product.search("*", aggs: {sum_price: {sum: {field: :price}}})
assert_equal 66, products.aggs["sum_price"]["value"]
end
def test_body_options
expected = {"price" => {0.0 => 1, 10.0 => 0, 20.0 => 2}}
assert_aggs expected, body_options: {aggs: {price: {histogram: {field: :price, interval: 10}}}}
end
def test_smart_aggs
assert_aggs ({"store_id" => {1 => 1}}), where: {in_stock: true}, aggs: [:store_id]
assert_aggs ({"store_id" => {1 => 1, 2 => 2}}), where: {in_stock: true}, aggs: [:store_id], smart_aggs: false
assert_aggs ({"store_id" => {2 => 2}}), where: {_not: {in_stock: true}}, aggs: [:store_id]
assert_aggs ({"store_id" => {1 => 1, 2 => 2}}), where: {_not: {in_stock: true}}, aggs: [:store_id], smart_aggs: false
assert_aggs ({"store_id" => {1 => 1}}), where: {_and: [{in_stock: true}]}, aggs: [:store_id]
assert_aggs ({"store_id" => {1 => 1, 2 => 2}}), where: {_and: [{in_stock: true}]}, aggs: [:store_id], smart_aggs: false
assert_aggs ({"store_id" => {1 => 1}}), where: {_or: [{in_stock: true}]}, aggs: [:store_id]
assert_aggs ({"store_id" => {1 => 1, 2 => 2}}), where: {_or: [{in_stock: true}]}, aggs: [:store_id], smart_aggs: false
assert_aggs ({"store_id" => {1 => 1}}), where: {or: [[{in_stock: true}]]}, aggs: [:store_id]
assert_aggs ({"store_id" => {1 => 1, 2 => 2}}), where: {or: [[{in_stock: true}]]}, aggs: [:store_id], smart_aggs: false
assert_aggs ({"store_id" => {1 => 1}}), where: {_script: Searchkick.script("doc['in_stock'].value")}, aggs: [:store_id]
assert_aggs ({"store_id" => {1 => 1, 2 => 2}}), where: {_script: Searchkick.script("doc['in_stock'].value")}, aggs: [:store_id], smart_aggs: false
end
def test_smart_aggs_overlap
assert_aggs ({"store_id" => {1 => 1, 2 => 2}}), where: {store_id: 2}, aggs: [:store_id]
assert_aggs ({"store_id" => {1 => 1, 2 => 2}}), where: {store_id: 2}, aggs: [:store_id], smart_aggs: false
# TODO change
assert_aggs ({"store_id" => {2 => 2}}), where: {store_id: 2}, aggs: ["store_id"]
# TODO change
assert_aggs ({"store_id" => {2 => 2}}), where: {"store_id" => 2}, aggs: [:store_id]
assert_aggs ({"store_id" => {1 => 1, 2 => 2}}), where: {store_id: {not: 2}}, aggs: [:store_id]
assert_aggs ({"store_id" => {1 => 1, 2 => 2}}), where: {store_id: {not: 2}}, aggs: [:store_id], smart_aggs: false
assert_aggs ({"store_id" => {1 => 1, 2 => 2}}), where: {store_id: {gt: 2}}, aggs: [:store_id]
assert_aggs ({"store_id" => {1 => 1, 2 => 2}}), where: {store_id: {gt: 2}}, aggs: [:store_id], smart_aggs: false
# TODO change
assert_aggs ({"store_id" => {1 => 1}}), where: {_not: {store_id: 2}}, aggs: [:store_id]
assert_aggs ({"store_id" => {1 => 1, 2 => 2}}), where: {_not: {store_id: 2}}, aggs: [:store_id], smart_aggs: false
# TODO change
assert_aggs ({"store_id" => {2 => 2}}), where: {_and: [{store_id: 2}]}, aggs: [:store_id]
assert_aggs ({"store_id" => {1 => 1, 2 => 2}}), where: {_and: [{store_id: 2}]}, aggs: [:store_id], smart_aggs: false
# TODO change
assert_aggs ({"store_id" => {}}), where: {_and: [{store_id: 2}, {in_stock: true}]}, aggs: [:store_id]
assert_aggs ({"store_id" => {1 => 1, 2 => 2}}), where: {_and: [{store_id: 2}, {in_stock: true}]}, aggs: [:store_id], smart_aggs: false
# TODO change
assert_aggs ({"store_id" => {2 => 2}}), where: {_or: [{store_id: 2}]}, aggs: [:store_id]
assert_aggs ({"store_id" => {1 => 1, 2 => 2}}), where: {_or: [{store_id: 2}]}, aggs: [:store_id], smart_aggs: false
assert_aggs ({"store_id" => {1 => 1, 2 => 2}}), where: {_or: [{store_id: 2}, {in_stock: true}]}, aggs: [:store_id]
assert_aggs ({"store_id" => {1 => 1, 2 => 2}}), where: {_or: [{store_id: 2}, {in_stock: true}]}, aggs: [:store_id], smart_aggs: false
# TODO change
assert_aggs ({"store_id" => {2 => 2}}), where: {or: [[{store_id: 2}]]}, aggs: [:store_id]
assert_aggs ({"store_id" => {1 => 1, 2 => 2}}), where: {or: [[{store_id: 2}]]}, aggs: [:store_id], smart_aggs: false
assert_aggs ({"store_id" => {1 => 1, 2 => 1}}), where: {store_id: 2, price: {gt: 5}}, aggs: [:store_id]
assert_aggs ({"store_id" => {1 => 1, 2 => 2}}), where: {store_id: 2, price: {gt: 5}}, aggs: [:store_id], smart_aggs: false
end
def test_smart_aggs_agg_where
assert_aggs ({"store_id" => {2 => 1}}), where: {color: "red"}, aggs: {store_id: {where: {in_stock: false}}}
assert_aggs ({"store_id" => {2 => 2}}), where: {color: "red"}, aggs: {store_id: {where: {in_stock: false}}}, smart_aggs: false
assert_aggs ({"store_id" => {}}), where: {color: "blue"}, aggs: {store_id: {where: {in_stock: false}}}
assert_aggs ({"store_id" => {2 => 2}}), where: {color: "blue"}, aggs: {store_id: {where: {in_stock: false}}}, smart_aggs: false
# TODO change
assert_aggs ({"store_id" => {2 => 2}}), where: {_not: {color: "red"}}, aggs: {store_id: {where: {_not: {in_stock: true}}}}
assert_aggs ({"store_id" => {2 => 2}}), where: {_not: {color: "red"}}, aggs: {store_id: {where: {_not: {in_stock: true}}}}, smart_aggs: false
# TODO change
assert_aggs ({"store_id" => {2 => 2}}), where: {_and: [{color: "red"}]}, aggs: {store_id: {where: {_and: [{in_stock: false}]}}}
assert_aggs ({"store_id" => {2 => 2}}), where: {_and: [{color: "red"}]}, aggs: {store_id: {where: {_and: [{in_stock: false}]}}}, smart_aggs: false
# TODO change
assert_aggs ({"store_id" => {2 => 2}}), where: {_or: [{color: "red"}]}, aggs: {store_id: {where: {_or: [{in_stock: false}]}}}
assert_aggs ({"store_id" => {2 => 2}}), where: {_or: [{color: "red"}]}, aggs: {store_id: {where: {_or: [{in_stock: false}]}}}, smart_aggs: false
# TODO change
assert_aggs ({"store_id" => {2 => 2}}), where: {or: [[{color: "red"}]]}, aggs: {store_id: {where: {or: [[{in_stock: false}]]}}}
assert_aggs ({"store_id" => {2 => 2}}), where: {or: [[{color: "red"}]]}, aggs: {store_id: {where: {or: [[{in_stock: false}]]}}}, smart_aggs: false
# TODO change
assert_aggs ({"store_id" => {2 => 2}}), where: {_script: Searchkick.script("doc['color'].value == 'red'")}, aggs: {store_id: {where: {_script: Searchkick.script("!doc['in_stock'].value")}}}
assert_aggs ({"store_id" => {2 => 2}}), where: {_script: Searchkick.script("doc['color'].value == 'red'")}, aggs: {store_id: {where: {_script: Searchkick.script("!doc['in_stock'].value")}}}, smart_aggs: false
end
# only basic conditions are overridden (the rest are additive)
def test_smart_aggs_agg_where_overlap
assert_aggs ({"store_id" => {}}), where: {color: "red"}, aggs: {store_id: {where: {in_stock: false, color: "blue"}}}
assert_aggs ({"store_id" => {}}), where: {color: "red"}, aggs: {store_id: {where: {in_stock: false, color: "blue"}}}, smart_aggs: false
assert_aggs ({"store_id" => {2 => 1}}), where: {color: "blue"}, aggs: {store_id: {where: {in_stock: false, color: "red"}}}
assert_aggs ({"store_id" => {2 => 1}}), where: {color: "blue"}, aggs: {store_id: {where: {in_stock: false, color: "red"}}}, smart_aggs: false
# TODO change
assert_aggs ({"store_id" => {}}), where: {color: "blue"}, aggs: {store_id: {where: {in_stock: false, "color" => "red"}}}
# TODO change
assert_aggs ({"store_id" => {}}), where: {"color" => "blue"}, aggs: {store_id: {where: {in_stock: false, color: "red"}}}
assert_aggs ({"store_id" => {}}), where: {_and: [{color: "blue"}]}, aggs: {store_id: {where: {in_stock: false, color: "red"}}}
assert_aggs ({"store_id" => {2 => 1}}), where: {_and: [{color: "blue"}]}, aggs: {store_id: {where: {in_stock: false, color: "red"}}}, smart_aggs: false
end
def test_smart_aggs_relation
# TODO change
assert_aggs ({"store_id" => {1 => 1}}), Product.search("Product").where.not(store_id: 2).aggs(:store_id)
assert_aggs ({"store_id" => {1 => 1, 2 => 2}}), Product.search("Product").where.not(store_id: 2).aggs(:store_id).smart_aggs(false)
assert_aggs ({"store_id" => {1 => 1, 2 => 1}}), Product.search("Product").where(store_id: 2).where(price: {gt: 5}).aggs(:store_id)
assert_aggs ({"store_id" => {1 => 1, 2 => 1}}), Product.search("Product").where(store_id: 2, price: {gt: 5}).aggs(:store_id)
assert_aggs ({"store_id" => {1 => 1, 2 => 1}}), Product.search("Product").where(_and: [{price: {gt: 5}}]).where(store_id: 2).aggs(:store_id)
assert_aggs ({"store_id" => {2 => 2}}), Product.search("Product").where(color: "red").aggs(store_id: {where: {in_stock: false}}).smart_aggs(false)
end
protected
def assert_aggs(expected, options)
if options.is_a?(Searchkick::Relation)
assert_equal expected, agg_buckets(options)
else
assert_equal expected, agg_buckets(Product.search("Product", **options))
assert_equal expected, agg_buckets(build_relation(Product, "Product", **options))
end
end
def agg_buckets(relation)
relation.aggs.to_h { |f, a| [f, a["buckets"].to_h { |v| [v["key"], v["doc_count"]] }] }
end
end
================================================
FILE: test/boost_test.rb
================================================
require_relative "test_helper"
class BoostTest < Minitest::Test
# global boost
def test_boost
store [
{name: "Tomato A"},
{name: "Tomato B", orders_count: 10},
{name: "Tomato C", orders_count: 100}
]
assert_order "tomato", ["Tomato C", "Tomato B", "Tomato A"], boost: "orders_count"
end
def test_boost_zero
store [
{name: "Zero Boost", orders_count: 0}
]
assert_order "zero", ["Zero Boost"], boost: "orders_count"
end
# fields
def test_fields
store [
{name: "Red", color: "White"},
{name: "White", color: "Red Red Red"}
]
assert_order "red", ["Red", "White"], fields: ["name^10", "color"]
end
def test_fields_decimal
store [
{name: "Red", color: "White"},
{name: "White", color: "Red Red Red"}
]
assert_order "red", ["Red", "White"], fields: ["name^10.5", "color"]
end
def test_fields_word_start
store [
{name: "Red", color: "White"},
{name: "White", color: "Red Red Red"}
]
assert_order "red", ["Red", "White"], fields: [{"name^10" => :word_start}, "color"]
end
# for issue #855
def test_fields_apostrophes
store_names ["Valentine's Day Special"]
assert_search "Valentines", ["Valentine's Day Special"], fields: ["name^5"]
assert_search "Valentine's", ["Valentine's Day Special"], fields: ["name^5"]
assert_search "Valentine", ["Valentine's Day Special"], fields: ["name^5"]
end
def test_boost_by
store [
{name: "Tomato A"},
{name: "Tomato B", orders_count: 10},
{name: "Tomato C", orders_count: 100}
]
assert_order "tomato", ["Tomato C", "Tomato B", "Tomato A"], boost_by: [:orders_count]
assert_order "tomato", ["Tomato C", "Tomato B", "Tomato A"], boost_by: {orders_count: {factor: 10}}
end
def test_boost_by_missing
store [
{name: "Tomato A"},
{name: "Tomato B", orders_count: 10}
]
assert_order "tomato", ["Tomato A", "Tomato B"], boost_by: {orders_count: {missing: 100}}
end
def test_boost_by_boost_mode_multiply
store [
{name: "Tomato A", found_rate: 0.9},
{name: "Tomato B"},
{name: "Tomato C", found_rate: 0.5}
]
assert_order "tomato", ["Tomato B", "Tomato A", "Tomato C"], boost_by: {found_rate: {boost_mode: "multiply"}}
end
def test_boost_where
store [
{name: "Tomato A"},
{name: "Tomato B", user_ids: [1, 2]},
{name: "Tomato C", user_ids: [3]}
]
assert_first "tomato", "Tomato B", boost_where: {user_ids: 2}
assert_first "tomato", "Tomato B", boost_where: {user_ids: 1..2}
assert_first "tomato", "Tomato B", boost_where: {user_ids: [1, 4]}
assert_first "tomato", "Tomato B", boost_where: {user_ids: {value: 2, factor: 10}}
assert_first "tomato", "Tomato B", boost_where: {user_ids: {value: [1, 4], factor: 10}}
assert_order "tomato", ["Tomato C", "Tomato B", "Tomato A"], boost_where: {user_ids: [{value: 1, factor: 10}, {value: 3, factor: 20}]}
end
def test_boost_where_negative_boost
store [
{name: "Tomato A"},
{name: "Tomato B", user_ids: [2]},
{name: "Tomato C", user_ids: [2]}
]
assert_first "tomato", "Tomato A", boost_where: {user_ids: {value: 2, factor: 0.5}}
end
def test_boost_by_recency
store [
{name: "Article 1", created_at: 2.days.ago},
{name: "Article 2", created_at: 1.day.ago},
{name: "Article 3", created_at: Time.now}
]
assert_order "article", ["Article 3", "Article 2", "Article 1"], boost_by_recency: {created_at: {scale: "7d", decay: 0.5}}
end
def test_boost_by_recency_origin
store [
{name: "Article 1", created_at: 2.days.ago},
{name: "Article 2", created_at: 1.day.ago},
{name: "Article 3", created_at: Time.now}
]
assert_order "article", ["Article 1", "Article 2", "Article 3"], boost_by_recency: {created_at: {origin: 2.days.ago, scale: "7d", decay: 0.5}}
end
def test_boost_by_distance
store [
{name: "San Francisco", latitude: 37.7833, longitude: -122.4167},
{name: "San Antonio", latitude: 29.4167, longitude: -98.5000},
{name: "San Marino", latitude: 43.9333, longitude: 12.4667}
]
assert_order "san", ["San Francisco", "San Antonio", "San Marino"], boost_by_distance: {field: :location, origin: [37, -122], scale: "1000mi"}
end
def test_boost_by_distance_hash
store [
{name: "San Francisco", latitude: 37.7833, longitude: -122.4167},
{name: "San Antonio", latitude: 29.4167, longitude: -98.5000},
{name: "San Marino", latitude: 43.9333, longitude: 12.4667}
]
assert_order "san", ["San Francisco", "San Antonio", "San Marino"], boost_by_distance: {field: :location, origin: {lat: 37, lon: -122}, scale: "1000mi"}
end
def test_boost_by_distance_v2
store [
{name: "San Francisco", latitude: 37.7833, longitude: -122.4167},
{name: "San Antonio", latitude: 29.4167, longitude: -98.5000},
{name: "San Marino", latitude: 43.9333, longitude: 12.4667}
]
assert_order "san", ["San Francisco", "San Antonio", "San Marino"], boost_by_distance: {location: {origin: [37, -122], scale: "1000mi"}}
end
def test_boost_by_distance_v2_hash
store [
{name: "San Francisco", latitude: 37.7833, longitude: -122.4167},
{name: "San Antonio", latitude: 29.4167, longitude: -98.5000},
{name: "San Marino", latitude: 43.9333, longitude: 12.4667}
]
assert_order "san", ["San Francisco", "San Antonio", "San Marino"], boost_by_distance: {location: {origin: {lat: 37, lon: -122}, scale: "1000mi"}}
end
def test_boost_by_distance_v2_factor
store [
{name: "San Francisco", latitude: 37.7833, longitude: -122.4167, found_rate: 0.1},
{name: "San Antonio", latitude: 29.4167, longitude: -98.5000, found_rate: 0.99},
{name: "San Marino", latitude: 43.9333, longitude: 12.4667, found_rate: 0.2}
]
assert_order "san", ["San Antonio", "San Francisco", "San Marino"], boost_by: {found_rate: {factor: 100}}, boost_by_distance: {location: {origin: [37, -122], scale: "1000mi"}}
assert_order "san", ["San Francisco", "San Antonio", "San Marino"], boost_by: {found_rate: {factor: 100}}, boost_by_distance: {location: {origin: [37, -122], scale: "1000mi", factor: 100}}
end
def test_boost_by_indices
setup_animal
store_names ["Rex"], Animal
store_names ["Rexx"], Product
assert_order "Rex", ["Rexx", "Rex"], {models: [Animal, Product], indices_boost: {Animal => 1, Product => 200}, fields: [:name]}, Searchkick
end
end
================================================
FILE: test/callbacks_test.rb
================================================
require_relative "test_helper"
class CallbacksTest < Minitest::Test
def test_false
Searchkick.callbacks(false) do
store_names ["Product A", "Product B"]
end
assert_search "product", []
end
def test_bulk
Searchkick.callbacks(:bulk) do
store_names ["Product A", "Product B"]
end
Product.searchkick_index.refresh
assert_search "product", ["Product A", "Product B"]
end
def test_async
assert_enqueued_jobs 2 do
Searchkick.callbacks(:async) do
store_names ["Product A", "Product B"]
end
end
end
def test_queue
# TODO figure out which earlier test leaves records in index
Product.reindex
reindex_queue = Product.searchkick_index.reindex_queue
reindex_queue.clear
Searchkick.callbacks(:queue) do
store_names ["Product A", "Product B"]
end
Product.searchkick_index.refresh
assert_search "product", [], load: false, conversions: false
assert_equal 2, reindex_queue.length
perform_enqueued_jobs do
Searchkick::ProcessQueueJob.perform_now(class_name: "Product")
end
Product.searchkick_index.refresh
assert_search "product", ["Product A", "Product B"], load: false
assert_equal 0, reindex_queue.length
Searchkick.callbacks(:queue) do
Product.where(name: "Product B").destroy_all
Product.create!(name: "Product C")
end
Product.searchkick_index.refresh
assert_search "product", ["Product A", "Product B"], load: false
assert_equal 2, reindex_queue.length
perform_enqueued_jobs do
Searchkick::ProcessQueueJob.perform_now(class_name: "Product")
end
Product.searchkick_index.refresh
assert_search "product", ["Product A", "Product C"], load: false
assert_equal 0, reindex_queue.length
# ensure no error with empty queue
Searchkick::ProcessQueueJob.perform_now(class_name: "Product")
end
def test_record_async
with_options({callbacks: :async}, Song) do
assert_enqueued_jobs 1 do
Song.create!(name: "Product A")
end
assert_enqueued_jobs 1 do
Song.first.reindex
end
end
end
def test_relation_async
with_options({callbacks: :async}, Song) do
assert_enqueued_jobs 0 do
Song.all.reindex
end
end
end
def test_disable_callbacks
# make sure callbacks default to on
assert Searchkick.callbacks?
store_names ["Product A"]
Searchkick.disable_callbacks
assert !Searchkick.callbacks?
store_names ["Product B"]
assert_search "product", ["Product A"]
Searchkick.enable_callbacks
Product.reindex
assert_search "product", ["Product A", "Product B"]
end
end
================================================
FILE: test/conversions_test.rb
================================================
require_relative "test_helper"
class ConversionsTest < Minitest::Test
def setup
super
setup_speaker
end
def test_v1
store [
{name: "Tomato A", conversions: {"tomato" => 1}},
{name: "Tomato B", conversions: {"tomato" => 2}},
{name: "Tomato C", conversions: {"tomato" => 3}}
]
assert_order "tomato", ["Tomato C", "Tomato B", "Tomato A"]
assert_order "TOMATO", ["Tomato C", "Tomato B", "Tomato A"]
assert_equal_scores "tomato", conversions_v1: false
end
def test_v1_case
store [
{name: "Tomato A", conversions: {"tomato" => 1, "TOMATO" => 1, "tOmAtO" => 1}},
{name: "Tomato B", conversions: {"tomato" => 2}}
]
assert_order "tomato", ["Tomato A", "Tomato B"]
end
def test_v1_case_sensitive
with_options(case_sensitive: true) do
store [
{name: "Tomato A", conversions: {"Tomato" => 1, "TOMATO" => 1, "tOmAtO" => 1}},
{name: "Tomato B", conversions: {"Tomato" => 2}}
]
assert_order "Tomato", ["Tomato B", "Tomato A"]
end
ensure
Product.reindex
end
def test_v1_term
store [
{name: "Tomato A", conversions: {"tomato" => 1, "soup" => 3}},
{name: "Tomato B", conversions: {"tomato" => 2, "soup" => 2}},
{name: "Tomato C", conversions: {"tomato" => 3, "soup" => 1}}
]
assert_order "tomato", ["Tomato A", "Tomato B", "Tomato C"], conversions_term: "soup"
end
def test_v1_weight
Product.reindex
store [
{name: "Product Boost", orders_count: 20},
{name: "Product Conversions", conversions: {"product" => 10}}
]
assert_order "product", ["Product Conversions", "Product Boost"], boost: "orders_count"
end
def test_v1_multiple_conversions
store [
{name: "Speaker A", conversions_a: {"speaker" => 1}, conversions_b: {"speaker" => 6}},
{name: "Speaker B", conversions_a: {"speaker" => 2}, conversions_b: {"speaker" => 5}},
{name: "Speaker C", conversions_a: {"speaker" => 3}, conversions_b: {"speaker" => 4}}
], Speaker
assert_equal_scores "speaker", {conversions_v1: false}, Speaker
assert_equal_scores "speaker", {}, Speaker
assert_equal_scores "speaker", {conversions_v1: ["conversions_a", "conversions_b"]}, Speaker
assert_equal_scores "speaker", {conversions_v1: ["conversions_b", "conversions_a"]}, Speaker
assert_order "speaker", ["Speaker C", "Speaker B", "Speaker A"], {conversions_v1: "conversions_a"}, Speaker
assert_order "speaker", ["Speaker A", "Speaker B", "Speaker C"], {conversions_v1: "conversions_b"}, Speaker
end
def test_v1_multiple_conversions_with_boost_term
store [
{name: "Speaker A", conversions_a: {"speaker" => 4, "speaker_1" => 1}},
{name: "Speaker B", conversions_a: {"speaker" => 3, "speaker_1" => 2}},
{name: "Speaker C", conversions_a: {"speaker" => 2, "speaker_1" => 3}},
{name: "Speaker D", conversions_a: {"speaker" => 1, "speaker_1" => 4}}
], Speaker
assert_order "speaker", ["Speaker A", "Speaker B", "Speaker C", "Speaker D"], {conversions_v1: "conversions_a"}, Speaker
assert_order "speaker", ["Speaker D", "Speaker C", "Speaker B", "Speaker A"], {conversions_v1: "conversions_a", conversions_term: "speaker_1"}, Speaker
end
def test_v2
store [
{name: "Tomato A", conversions_v2: {"tomato" => 1}},
{name: "Tomato B", conversions_v2: {"tomato" => 2}},
{name: "Tomato C", conversions_v2: {"tomato" => 3}}
]
assert_order "tomato", ["Tomato C", "Tomato B", "Tomato A"], conversions_v2: true
assert_order "TOMATO", ["Tomato C", "Tomato B", "Tomato A"], conversions_v2: true
assert_equal_scores "tomato", conversions_v2: false
end
def test_v2_case
store [
{name: "Tomato A", conversions_v2: {"tomato" => 1, "TOMATO" => 1, "tOmAtO" => 1}},
{name: "Tomato B", conversions_v2: {"tomato" => 2}}
]
assert_order "tomato", ["Tomato A", "Tomato B"], conversions_v2: true
end
def test_v2_case_sensitive
with_options(case_sensitive: true) do
store [
{name: "Tomato A", conversions_v2: {"Tomato" => 1, "TOMATO" => 1, "tOmAtO" => 1}},
{name: "Tomato B", conversions_v2: {"Tomato" => 2}}
]
assert_order "Tomato", ["Tomato B", "Tomato A"], conversions_v2: true
end
ensure
Product.reindex
end
def test_v2_term
store [
{name: "Tomato A", conversions_v2: {"tomato" => 1, "soup" => 3}},
{name: "Tomato B", conversions_v2: {"tomato" => 2, "soup" => 2}},
{name: "Tomato C", conversions_v2: {"tomato" => 3, "soup" => 1}}
]
assert_order "tomato", ["Tomato A", "Tomato B", "Tomato C"], conversions_v2: {term: "soup"}
assert_order "tomato", ["Tomato A", "Tomato B", "Tomato C"], conversions_v2: true, conversions_term: "soup"
end
def test_v2_weight
Product.reindex
store [
{name: "Product Boost", orders_count: 20},
{name: "Product Conversions", conversions_v2: {"product" => 10}}
]
assert_order "product", ["Product Conversions", "Product Boost"], conversions_v2: true, boost: "orders_count"
end
def test_v2_space
store [
{name: "Tomato A", conversions_v2: {"tomato juice" => 1}},
{name: "Tomato B", conversions_v2: {"tomato juice" => 2}},
{name: "Tomato C", conversions_v2: {"tomato juice" => 3}}
]
assert_order "tomato", ["Tomato C", "Tomato B", "Tomato A"], conversions_v2: {term: "tomato juice"}
end
def test_v2_dot
store [
{name: "Tomato A", conversions_v2: {"tomato.juice" => 1}},
{name: "Tomato B", conversions_v2: {"tomato.juice" => 2}},
{name: "Tomato C", conversions_v2: {"tomato.juice" => 3}}
]
assert_order "tomato", ["Tomato C", "Tomato B", "Tomato A"], conversions_v2: {term: "tomato.juice"}
end
def test_v2_unicode
store [
{name: "Tomato A", conversions_v2: {"喰らう" => 1}},
{name: "Tomato B", conversions_v2: {"喰らう" => 2}},
{name: "Tomato C", conversions_v2: {"喰らう" => 3}}
]
assert_order "tomato", ["Tomato C", "Tomato B", "Tomato A"], conversions_v2: {term: "喰らう"}
end
def test_v2_score
store [
{name: "Tomato A", conversions: {"tomato" => 1}, conversions_v2: {"tomato" => 1}},
{name: "Tomato B", conversions: {"tomato" => 2}, conversions_v2: {"tomato" => 2}},
{name: "Tomato C", conversions: {"tomato" => 3}, conversions_v2: {"tomato" => 3}}
]
scores = Product.search("tomato", conversions_v2: false, load: false).map(&:_score)
scores_v2 = Product.search("tomato", conversions_v1: false, conversions_v2: true, load: false).map(&:_score)
assert_equal scores, scores_v2
end
def test_v2_factor
store [
{name: "Tomato A", conversions: {"tomato" => 1}, conversions_v2: {"tomato" => 1}},
{name: "Tomato B", conversions: {"tomato" => 2}, conversions_v2: {"tomato" => 2}},
{name: "Tomato C", conversions: {"tomato" => 3}, conversions_v2: {"tomato" => 3}}
]
scores = Product.search("tomato", conversions_v1: false, conversions_v2: true, load: false).map(&:_score)
scores2 = Product.search("tomato", conversions_v1: false, conversions_v2: {factor: 3}, load: false).map(&:_score)
diffs = scores.zip(scores2).map { |a, b| b - a }
assert_in_delta 6, diffs[0]
assert_in_delta 4, diffs[1]
assert_in_delta 2, diffs[2]
end
def test_v2_no_tokenization
store [
{name: "Tomato A"},
{name: "Tomato B", conversions_v2: {"tomato juice" => 2}},
{name: "Tomato C", conversions_v2: {"tomato vine" => 3}}
]
assert_equal_scores "tomato", conversions_v2: true
end
def test_v2_max_conversions
conversions = 66000.times.to_h { |i| ["term#{i}", 1] }
store [{name: "Tomato A", conversions_v2: conversions}]
conversions.merge!(1000.times.to_h { |i| ["term#{conversions.size + i}", 1] })
assert_raises(Searchkick::ImportError) do
store [{name: "Tomato B", conversions_v2: conversions}]
end
end
def test_v2_max_length
store [{name: "Tomato A", conversions_v2: {"a"*32766 => 1}}]
assert_raises(Searchkick::ImportError) do
store [{name: "Tomato B", conversions_v2: {"a"*32767 => 1}}]
end
end
def test_v2_zero
error = assert_raises(Searchkick::ImportError) do
store [{name: "Tomato A", conversions_v2: {"tomato" => 0}}]
end
assert_match "must be a positive normal float", error.message
end
def test_v2_partial_reindex
store [
{name: "Tomato A", conversions_v2: {"tomato" => 1}},
{name: "Tomato B", conversions_v2: {"tomato" => 2}},
{name: "Tomato C", conversions_v2: {"tomato" => 3}}
]
Product.reindex(:search_name, refresh: true)
assert_order "tomato", ["Tomato C", "Tomato B", "Tomato A"], conversions_v2: true
end
end
================================================
FILE: test/default_scope_test.rb
================================================
require_relative "test_helper"
class DefaultScopeTest < Minitest::Test
def setup
setup_model(Band)
end
def test_reindex
store [
{name: "Test", active: true},
{name: "Test 2", active: false}
], reindex: false
Band.reindex
assert_search "*", ["Test"], {load: false}
end
def test_search
Band.reindex
Band.search("*") # test works
error = assert_raises(Searchkick::Error) do
Band.all.search("*")
end
assert_equal "search must be called on model, not relation", error.message
end
def default_model
Band
end
end
================================================
FILE: test/exclude_test.rb
================================================
require_relative "test_helper"
class ExcludeTest < Minitest::Test
def test_butter
store_names ["Butter Tub", "Peanut Butter Tub"]
assert_search "butter", ["Butter Tub"], exclude: ["peanut butter"]
end
def test_butter_word_start
store_names ["Butter Tub", "Peanut Butter Tub"]
assert_search "butter", ["Butter Tub"], exclude: ["peanut butter"], match: :word_start
end
def test_butter_exact
store_names ["Butter Tub", "Peanut Butter Tub"]
assert_search "butter", [], exclude: ["peanut butter"], fields: [{name: :exact}]
end
def test_same_exact
store_names ["Butter Tub", "Peanut Butter Tub"]
assert_search "Butter Tub", ["Butter Tub"], exclude: ["Peanut Butter Tub"], fields: [{name: :exact}]
end
def test_egg_word_start
store_names ["eggs", "eggplant"]
assert_search "egg", ["eggs"], exclude: ["eggplant"], match: :word_start
end
def test_string
store_names ["Butter Tub", "Peanut Butter Tub"]
assert_search "butter", ["Butter Tub"], exclude: "peanut butter"
end
def test_match_all
store_names ["Butter"]
assert_search "*", [], exclude: "butter"
end
def test_match_all_fields
store_names ["Butter"]
assert_search "*", [], fields: [:name], exclude: "butter"
assert_search "*", ["Butter"], fields: [:color], exclude: "butter"
end
end
================================================
FILE: test/geo_shape_test.rb
================================================
require_relative "test_helper"
class GeoShapeTest < Minitest::Test
def setup
setup_region
store [
{
name: "Region A",
text: "The witch had a cat",
territory: {
type: "polygon",
coordinates: [[[30, 40], [35, 45], [40, 40], [40, 30], [30, 30], [30, 40]]]
}
},
{
name: "Region B",
text: "and a very tall hat",
territory: {
type: "polygon",
coordinates: [[[50, 60], [55, 65], [60, 60], [60, 50], [50, 50], [50, 60]]]
}
},
{
name: "Region C",
text: "and long ginger hair which she wore in a plait",
territory: {
type: "polygon",
coordinates: [[[10, 20], [15, 25], [20, 20], [20, 10], [10, 10], [10, 20]]]
}
}
]
end
def test_envelope
assert_search "*", ["Region A"], {
where: {
territory: {
geo_shape: {
type: "envelope",
coordinates: [[28, 42], [32, 38]]
}
}
}
}
end
def test_polygon
assert_search "*", ["Region A"], {
where: {
territory: {
geo_shape: {
type: "polygon",
coordinates: [[[38, 42], [42, 42], [42, 38], [38, 38], [38, 42]]]
}
}
}
}
end
def test_multipolygon
assert_search "*", ["Region A", "Region B"], {
where: {
territory: {
geo_shape: {
type: "multipolygon",
coordinates: [
[[[38, 42], [42, 42], [42, 38], [38, 38], [38, 42]]],
[[[58, 62], [62, 62], [62, 58], [58, 58], [58, 62]]]
]
}
}
}
}
end
def test_disjoint
assert_search "*", ["Region B", "Region C"], {
where: {
territory: {
geo_shape: {
type: "envelope",
relation: "disjoint",
coordinates: [[28, 42], [32, 38]]
}
}
}
}
end
def test_within
assert_search "*", ["Region A"], {
where: {
territory: {
geo_shape: {
type: "envelope",
relation: "within",
coordinates: [[20, 50], [50, 20]]
}
}
}
}
end
def test_search_match
assert_search "witch", ["Region A"], {
where: {
territory: {
geo_shape: {
type: "envelope",
coordinates: [[28, 42], [32, 38]]
}
}
}
}
end
def test_search_no_match
assert_search "ginger hair", [], {
where: {
territory: {
geo_shape: {
type: "envelope",
coordinates: [[28, 42], [32, 38]]
}
}
}
}
end
def test_latlon
assert_search "*", ["Region A"], {
where: {
territory: {
geo_shape: {
type: "envelope",
coordinates: [{lat: 42, lon: 28}, {lat: 38, lon: 32}]
}
}
}
}
end
def default_model
Region
end
end
================================================
FILE: test/highlight_test.rb
================================================
require_relative "test_helper"
class HighlightTest < Minitest::Test
def test_basic
store_names ["Two Door Cinema Club"]
assert_equal "Two Door Cinema Club", Product.search("cinema", highlight: true).highlights.first[:name]
end
def test_with_highlights
store_names ["Two Door Cinema Club"]
assert_equal "Two Door Cinema Club", Product.search("cinema", highlight: true).with_highlights.first.last[:name]
end
def test_tag
store_names ["Two Door Cinema Club"]
assert_equal "Two Door Cinema Club", Product.search("cinema", highlight: {tag: ""}).highlights.first[:name]
assert_equal "Two Door Cinema Club", Product.search("cinema").highlight(tag: "").highlights.first[:name]
end
def test_tag_class
store_names ["Two Door Cinema Club"]
assert_equal "Two Door Cinema Club", Product.search("cinema", highlight: {tag: ""}).highlights.first[:name]
end
def test_very_long
store_names [("Two Door Cinema Club " * 100).strip]
assert_equal ("Two Door Cinema Club " * 100).strip, Product.search("cinema", highlight: true).highlights.first[:name]
end
def test_multiple_fields
store [{name: "Two Door Cinema Club", color: "Cinema Orange"}]
highlights = Product.search("cinema", fields: [:name, :color], highlight: true).highlights.first
assert_equal "Two Door Cinema Club", highlights[:name]
assert_equal "Cinema Orange", highlights[:color]
end
def test_fields
store [{name: "Two Door Cinema Club", color: "Cinema Orange"}]
highlights = Product.search("cinema", fields: [:name, :color], highlight: {fields: [:name]}).highlights.first
assert_equal "Two Door Cinema Club", highlights[:name]
assert_nil highlights[:color]
end
def test_field_options
store_names ["Two Door Cinema Club are a Northern Irish indie rock band"]
fragment_size = ENV["MATCH"] == "word_start" ? 26 : 21
assert_equal "Two Door Cinema Club are", Product.search("cinema", highlight: {fields: {name: {fragment_size: fragment_size}}}).highlights.first[:name]
end
def test_multiple_words
store_names ["Hello World Hello"]
assert_equal "Hello World Hello", Product.search("hello", highlight: true).highlights.first[:name]
end
def test_encoder
store_names ["Hello"]
assert_equal "<b>Hello</b>", Product.search("hello", highlight: {encoder: "html"}, misspellings: false).highlights.first[:name]
end
def test_word_middle
store_names ["Two Door Cinema Club"]
assert_equal "Two Door Cinema Club", Product.search("ine", match: :word_middle, highlight: true).highlights.first[:name]
end
def test_body
skip if ENV["MATCH"] == "word_start"
store_names ["Two Door Cinema Club"]
body = {
query: {
match: {
"name.analyzed" => "cinema"
}
},
highlight: {
pre_tags: [""],
post_tags: [""],
fields: {
"name.analyzed" => {}
}
}
}
assert_equal "Two Door Cinema Club", Product.search(body: body).highlights.first[:"name.analyzed"]
end
def test_multiple_highlights
store_names ["Two Door Cinema Club Some Other Words And Much More Doors Cinema Club"]
highlights = Product.search("cinema", highlight: {fragment_size: 20}).highlights(multiple: true).first[:name]
assert highlights.is_a?(Array)
assert_equal highlights.count, 2
refute_equal highlights.first, highlights.last
highlights.each do |highlight|
assert highlight.include?("Cinema")
end
end
def test_search_highlights_method
store_names ["Two Door Cinema Club"]
assert_equal "Two Door Cinema Club", Product.search("cinema", highlight: true).first.search_highlights[:name]
end
def test_match_all
store_names ["Two Door Cinema Club"]
assert_nil Product.search("*", highlight: true).highlights.first[:name]
end
def test_match_all_load_false
store_names ["Two Door Cinema Club"]
assert_nil Product.search("*", highlight: true, load: false).highlights.first[:name]
end
def test_match_all_search_highlights
store_names ["Two Door Cinema Club"]
assert_nil Product.search("*", highlight: true).first.search_highlights[:name]
end
end
================================================
FILE: test/hybrid_test.rb
================================================
require_relative "test_helper"
class HybridTest < Minitest::Test
def setup
skip unless Searchkick.knn_support?
super
end
def test_search
error = assert_raises(ArgumentError) do
Product.search("product", knn: {field: :embedding, vector: [1, 2, 3]})
end
assert_equal "Use Searchkick.multi_search for hybrid search", error.message
end
def test_multi_search
store [
{name: "The dog is barking", embedding: [1, 2, 0]},
{name: "The cat is purring", embedding: [1, 0, 0]},
{name: "The bear is growling", embedding: [1, 2, 3]}
]
keyword_search = Product.search("growling bear")
semantic_search = Product.search(knn: {field: :embedding, vector: [1, 2, 3]})
Searchkick.multi_search([keyword_search, semantic_search])
results = Searchkick::Reranking.rrf(keyword_search, semantic_search)
expected = ["The bear is growling", "The dog is barking", "The cat is purring"]
assert_equal expected, results.map { |v| v[:result].name }
assert_in_delta 0.03279, results[0][:score]
assert_in_delta 0.01612, results[1][:score]
assert_in_delta 0.01587, results[2][:score]
end
end
================================================
FILE: test/index_cache_test.rb
================================================
require_relative "test_helper"
class IndexCacheTest < Minitest::Test
def setup
Product.class_variable_get(:@@searchkick_index_cache).clear
end
def test_default
object_id = Product.searchkick_index.object_id
3.times do
assert_equal object_id, Product.searchkick_index.object_id
end
end
def test_max_size
starting_ids = object_ids(20)
assert_equal starting_ids, object_ids(20)
Product.searchkick_index(name: "other")
refute_equal starting_ids, object_ids(20)
end
def test_thread_safe
object_ids = with_threads { object_ids(20) }
assert_equal object_ids[0], object_ids[1]
assert_equal object_ids[0], object_ids[2]
end
# object ids can differ since threads progress at different speeds
# test to make sure doesn't crash
def test_thread_safe_max_size
with_threads { object_ids(1000) }
end
private
def object_ids(count)
count.times.map { |i| Product.searchkick_index(name: "index#{i}").object_id }
end
def with_threads
previous = Thread.report_on_exception
begin
Thread.report_on_exception = true
3.times.map { Thread.new { yield } }.map(&:join).map(&:value)
ensure
Thread.report_on_exception = previous
end
end
end
================================================
FILE: test/index_options_test.rb
================================================
require_relative "test_helper"
class IndexOptionsTest < Minitest::Test
def setup
Song.destroy_all
end
def test_case_sensitive
with_options({case_sensitive: true}) do
store_names ["Test", "test"]
assert_search "test", ["test"], {misspellings: false}
end
end
def test_no_stemming
with_options({stem: false}) do
store_names ["milk", "milks"]
assert_search "milks", ["milks"], {misspellings: false}
end
end
def test_no_stem_exclusion
with_options({}) do
store_names ["animals", "anime"]
assert_search "animals", ["animals", "anime"], {misspellings: false}
assert_search "anime", ["animals", "anime"], {misspellings: false}
assert_equal ["anim"], Song.searchkick_index.tokens("anime", analyzer: "searchkick_index")
assert_equal ["anim"], Song.searchkick_index.tokens("anime", analyzer: "searchkick_search2")
end
end
def test_stem_exclusion
with_options({stem_exclusion: ["anime"]}) do
store_names ["animals", "anime"]
assert_search "animals", ["animals"], {misspellings: false}
assert_search "anime", ["anime"], {misspellings: false}
assert_equal ["anime"], Song.searchkick_index.tokens("anime", analyzer: "searchkick_index")
assert_equal ["anime"], Song.searchkick_index.tokens("anime", analyzer: "searchkick_search2")
end
end
def test_no_stemmer_override
with_options({}) do
store_names ["animals", "animations"]
assert_search "animals", ["animals", "animations"], {misspellings: false}
assert_search "animations", ["animals", "animations"], {misspellings: false}
assert_equal ["anim"], Song.searchkick_index.tokens("animations", analyzer: "searchkick_index")
assert_equal ["anim"], Song.searchkick_index.tokens("animations", analyzer: "searchkick_search2")
end
end
def test_stemmer_override
with_options({stemmer_override: ["animations => animat"]}) do
store_names ["animals", "animations"]
assert_search "animals", ["animals"], {misspellings: false}
assert_search "animations", ["animations"], {misspellings: false}
assert_equal ["animat"], Song.searchkick_index.tokens("animations", analyzer: "searchkick_index")
assert_equal ["animat"], Song.searchkick_index.tokens("animations", analyzer: "searchkick_search2")
end
end
def test_special_characters
with_options({special_characters: false}) do
store_names ["jalapeño"]
assert_search "jalapeno", [], {misspellings: false}
end
end
def test_index_name
with_options({index_name: "songs_v2"}) do
assert_equal "songs_v2", Song.searchkick_index.name
end
end
def test_index_name_callable
with_options({index_name: -> { "songs_v2" }}) do
assert_equal "songs_v2", Song.searchkick_index.name
end
end
def test_index_prefix
with_options({index_prefix: "hello"}) do
assert_equal "hello_songs_test", Song.searchkick_index.name
end
end
def test_index_prefix_callable
with_options({index_prefix: -> { "hello" }}) do
assert_equal "hello_songs_test", Song.searchkick_index.name
end
end
def default_model
Song
end
end
================================================
FILE: test/index_test.rb
================================================
require_relative "test_helper"
class IndexTest < Minitest::Test
def setup
super
setup_region
end
def test_tokens
assert_equal ["dollar", "dollartre", "tree"], Product.searchkick_index.tokens("Dollar Tree", analyzer: "searchkick_index")
end
def test_tokens_analyzer
assert_equal ["dollar", "tree"], Product.searchkick_index.tokens("Dollar Tree", analyzer: "searchkick_search2")
end
def test_total_docs
store_names ["Product A"]
assert_equal 1, Product.searchkick_index.total_docs
end
def test_clean_indices
suffix = Searchkick.index_suffix ? "_#{Searchkick.index_suffix}" : ""
old_index = Searchkick::Index.new("products_test#{suffix}_20130801000000000")
different_index = Searchkick::Index.new("items_test#{suffix}_20130801000000000")
old_index.delete if old_index.exists?
different_index.delete if different_index.exists?
# create indexes
old_index.create
different_index.create
Product.searchkick_index.clean_indices
assert Product.searchkick_index.exists?
assert different_index.exists?
assert !old_index.exists?
end
def test_clean_indices_old_format
suffix = Searchkick.index_suffix ? "_#{Searchkick.index_suffix}" : ""
old_index = Searchkick::Index.new("products_test#{suffix}_20130801000000")
old_index.create
Product.searchkick_index.clean_indices
assert !old_index.exists?
end
def test_retain
Product.reindex
assert_equal 1, Product.searchkick_index.all_indices.size
Product.reindex(retain: true)
assert_equal 2, Product.searchkick_index.all_indices.size
end
def test_mappings
store_names ["Dollar Tree"], Store
assert_equal ["Dollar Tree"], Store.search(body: {query: {match: {name: "dollar"}}}).map(&:name)
mapping = Store.searchkick_index.mapping
assert_kind_of Hash, mapping
assert_equal "text", mapping.values.first["mappings"]["properties"]["name"]["type"]
end
def test_settings
assert_kind_of Hash, Store.searchkick_index.settings
end
def test_remove_blank_id
store_names ["Product A"]
Product.searchkick_index.remove(Product.new)
assert_search "product", ["Product A"]
ensure
Product.reindex
end
# keep simple for now, but maybe return client response in future
def test_store_response
product = Searchkick.callbacks(false) { Product.create!(name: "Product A") }
assert_nil Product.searchkick_index.store(product)
end
# keep simple for now, but maybe return client response in future
def test_bulk_index_response
product = Searchkick.callbacks(false) { Product.create!(name: "Product A") }
assert_nil Product.searchkick_index.bulk_index([product])
end
# TODO move
def test_filterable
store [{name: "Product A", alt_description: "Hello"}]
error = assert_raises(Searchkick::InvalidQueryError) do
assert_search "*", [], where: {alt_description: "Hello"}
end
assert_match "Cannot search on field [alt_description] since it is not indexed", error.message
end
def test_filterable_non_string
store [{name: "Product A", store_id: 1}]
assert_search "*", ["Product A"], where: {store_id: 1}
end
def test_large_value
large_value = 1000.times.map { "hello" }.join(" ")
store [{name: "Product A", text: large_value}], Region
assert_search "product", ["Product A"], {}, Region
assert_search "hello", ["Product A"], {fields: [:name, :text]}, Region
assert_search "hello", ["Product A"], {}, Region
assert_search "*", ["Product A"], {where: {text: large_value}}, Region
end
def test_very_large_value
# terms must be < 32 KB with Elasticsearch 8.10.3+
# https://github.com/elastic/elasticsearch/pull/99818
large_value = 5400.times.map { "hello" }.join(" ")
store [{name: "Product A", text: large_value}], Region
assert_search "product", ["Product A"], {}, Region
assert_search "hello", ["Product A"], {fields: [:name, :text]}, Region
assert_search "hello", ["Product A"], {}, Region
# keyword not indexed
assert_search "*", [], {where: {text: large_value}}, Region
end
def test_bulk_import_raises_error
valid_dog = Product.create(name: "2016-01-02")
invalid_dog = Product.create(name: "Ol' One-Leg")
mapping = {
properties: {
name: {type: "date"}
}
}
index = Searchkick::Index.new "dogs", mappings: mapping, _type: "dog"
index.delete if index.exists?
index.create_index
index.store valid_dog
assert_raises(Searchkick::ImportError) do
index.bulk_index [valid_dog, invalid_dog]
end
end
end
================================================
FILE: test/inheritance_test.rb
================================================
require_relative "test_helper"
class InheritanceTest < Minitest::Test
def setup
super
setup_animal
end
def test_child_reindex
store_names ["Max"], Cat
assert Dog.reindex
assert_equal 1, Animal.search("*").size
end
def test_child_index_name
assert_equal "animals_test#{ENV["TEST_ENV_NUMBER"]}", Dog.searchkick_index.name
end
def test_child_search
store_names ["Bear"], Dog
store_names ["Bear"], Cat
assert_equal 1, Dog.search("bear").size
end
def test_parent_search
store_names ["Bear"], Dog
store_names ["Bear"], Cat
assert_equal 2, Animal.search("bear").size
end
def test_force_one_type
store_names ["Green Bear"], Dog
store_names ["Blue Bear"], Cat
assert_equal ["Blue Bear"], Animal.search("bear", type: [Cat]).map(&:name)
end
def test_force_multiple_types
store_names ["Green Bear"], Dog
store_names ["Blue Bear"], Cat
store_names ["Red Bear"], Animal
assert_equal ["Green Bear", "Blue Bear"], Animal.search("bear", type: [Dog, Cat]).map(&:name)
end
def test_child_autocomplete
store_names ["Max"], Cat
store_names ["Mark"], Dog
assert_equal ["Max"], Cat.search("ma", fields: [:name], match: :text_start).map(&:name)
end
def test_parent_autocomplete
store_names ["Max"], Cat
store_names ["Bear"], Dog
assert_equal ["Bear"], Animal.search("bea", fields: [:name], match: :text_start).map(&:name).sort
end
# def test_child_suggest
# store_names ["Shark"], Cat
# store_names ["Sharp"], Dog
# assert_equal ["shark"], Cat.search("shar", fields: [:name], suggest: true).suggestions
# end
def test_parent_suggest
store_names ["Shark"], Cat
store_names ["Tiger"], Dog
assert_equal ["tiger"], Animal.search("tige", fields: [:name], suggest: true).suggestions.sort
end
def test_reindex
store_names ["Bear A"], Cat
store_names ["Bear B"], Dog
Animal.reindex
assert_equal 2, Animal.search("bear").size
end
def test_child_models_option
store_names ["Bear A"], Cat
store_names ["Bear B"], Dog
Animal.reindex
# note: the models option is less efficient than Animal.search("bear", type: [Cat, Dog])
# since it requires two database calls instead of one to Animal
assert_equal 2, Searchkick.search("bear", models: [Cat, Dog]).size
end
def test_missing_records
store_names ["Bear A"], Cat
store_names ["Bear B"], Dog
Animal.reindex
record = Animal.find_by(name: "Bear A")
record.delete
assert_output nil, /\[searchkick\] WARNING: Records in search index do not exist in database: Cat\/Dog \d+/ do
result = Searchkick.search("bear", models: [Cat, Dog])
assert_equal ["Bear B"], result.map(&:name)
assert_equal [record.id.to_s], result.missing_records.map { |v| v[:id] }
assert_equal [[Cat, Dog]], result.missing_records.map { |v| v[:model].sort_by(&:model_name) }
end
assert_empty Product.search("bear", load: false).missing_records
ensure
Animal.reindex
end
def test_inherited_and_non_inherited_models
store_names ["Bear A"], Cat
store_names ["Bear B"], Dog
store_names ["Bear C"]
Animal.reindex
assert_equal 2, Searchkick.search("bear", models: [Cat, Product]).size
assert_equal 2, Searchkick.search("bear", models: [Cat, Product]).hits.size
assert_equal 2, Searchkick.search("bear", models: [Cat, Product], per_page: 1).total_pages
end
# TODO move somewhere better
def test_multiple_indices
store_names ["Product A"]
store_names ["Product B"], Animal
assert_search "product", ["Product A", "Product B"], {models: [Product, Animal], conversions: false}, Searchkick
assert_search "product", ["Product A", "Product B"], {index_name: [Product, Animal], conversions: false}, Searchkick
end
def test_index_name_model
store_names ["Product A"]
assert_equal ["Product A"], Searchkick.search("product", index_name: [Product]).map(&:name)
end
def test_index_name_string
store_names ["Product A"]
error = assert_raises Searchkick::Error do
Searchkick.search("product", index_name: [Product.searchkick_index.name]).map(&:name)
end
assert_includes error.message, "Unknown model"
end
def test_similar
store_names ["Dog", "Other dog"], Dog
store_names ["Not dog"], Cat
dog = Dog.find_by!(name: "Dog")
assert_equal ["Other dog"], dog.similar(fields: [:name]).map(&:name)
assert_equal ["Not dog", "Other dog"], dog.similar(fields: [:name], models: [Animal]).map(&:name).sort
assert_equal ["Not dog"], dog.similar(fields: [:name], models: [Cat]).map(&:name).sort
end
end
================================================
FILE: test/knn_test.rb
================================================
require_relative "test_helper"
class KnnTest < Minitest::Test
def setup
skip unless Searchkick.knn_support?
super
# prevent null_pointer_exception with OpenSearch 3
Product.reindex if Searchkick.opensearch? && !Searchkick.server_below?("3.0.0")
end
def test_basic
store [{name: "A", embedding: [1, 2, 3]}, {name: "B", embedding: [-1, -2, -3]}, {name: "C"}]
assert_order "*", ["A", "B"], knn: {field: :embedding, vector: [1, 2, 3]}
scores = Product.search(knn: {field: :embedding, vector: [1, 2, 3]}).hits.map { |v| v["_score"] }
assert_in_delta 1, scores[0]
assert_in_delta 0, scores[1]
end
def test_basic_exact
store [{name: "A", embedding: [1, 2, 3]}, {name: "B", embedding: [-1, -2, -3]}, {name: "C"}]
assert_order "*", ["A", "B"], knn: {field: :embedding, vector: [1, 2, 3], exact: true}
scores = Product.search(knn: {field: :embedding, vector: [1, 2, 3], exact: true}).hits.map { |v| v["_score"] }
assert_in_delta 1, scores[0]
assert_in_delta 0, scores[1]
end
def test_where
store [
{name: "A", store_id: 1, embedding: [1, 2, 3]},
{name: "B", store_id: 2, embedding: [1, 2, 3]},
{name: "C", store_id: 1, embedding: [-1, -2, -3]},
{name: "D", store_id: 1}
]
assert_order "*", ["A", "C"], knn: {field: :embedding, vector: [1, 2, 3]}, where: {store_id: 1}
end
def test_where_exact
store [
{name: "A", store_id: 1, embedding: [1, 2, 3]},
{name: "B", store_id: 2, embedding: [1, 2, 3]},
{name: "C", store_id: 1, embedding: [-1, -2, -3]},
{name: "D", store_id: 1}
]
assert_order "*", ["A", "C"], knn: {field: :embedding, vector: [1, 2, 3], exact: true}, where: {store_id: 1}
end
def test_pagination
store [
{name: "A", embedding: [1, 2, 3]},
{name: "B", embedding: [1, 2, 0]},
{name: "C", embedding: [-1, -2, 0]},
{name: "D", embedding: [-1, -2, -3]},
{name: "E"}
]
assert_order "*", ["B", "C"], knn: {field: :embedding, vector: [1, 2, 3]}, limit: 2, offset: 1
end
def test_pagination_exact
store [
{name: "A", embedding: [1, 2, 3]},
{name: "B", embedding: [1, 2, 0]},
{name: "C", embedding: [-1, -2, 0]},
{name: "D", embedding: [-1, -2, -3]},
{name: "E"}
]
assert_order "*", ["B", "C"], knn: {field: :embedding, vector: [1, 2, 3], exact: true}, limit: 2, offset: 1
end
def test_euclidean
store [{name: "A", embedding3: [1, 2, 3]}, {name: "B", embedding3: [1, 5, 7]}, {name: "C"}]
assert_order "*", ["A", "B"], knn: {field: :embedding3, vector: [1, 2, 3]}
scores = Product.search(knn: {field: :embedding3, vector: [1, 2, 3]}).hits.map { |v| v["_score"] }
assert_in_delta 1.0 / (1 + 0), scores[0]
assert_in_delta 1.0 / (1 + 5**2), scores[1]
end
def test_euclidean_exact
store [{name: "A", embedding2: [1, 2, 3]}, {name: "B", embedding2: [1, 5, 7]}, {name: "C"}]
assert_order "*", ["A", "B"], knn: {field: :embedding2, vector: [1, 2, 3], distance: "euclidean"}
scores = Product.search(knn: {field: :embedding2, vector: [1, 2, 3], distance: "euclidean"}).hits.map { |v| v["_score"] }
assert_in_delta 1.0 / (1 + 0), scores[0]
assert_in_delta 1.0 / (1 + 5**2), scores[1]
end
def test_taxicab_exact
store [{name: "A", embedding2: [1, 2, 3]}, {name: "B", embedding2: [1, 5, 7]}, {name: "C"}]
assert_order "*", ["A", "B"], knn: {field: :embedding2, vector: [1, 2, 3], distance: "taxicab"}
scores = Product.search(knn: {field: :embedding2, vector: [1, 2, 3], distance: "taxicab"}).hits.map { |v| v["_score"] }
assert_in_delta 1.0 / (1 + 0), scores[0]
assert_in_delta 1.0 / (1 + 7), scores[1]
end
def test_chebyshev_exact
skip unless Searchkick.opensearch?
store [{name: "A", embedding: [1, 2, 3]}, {name: "B", embedding: [1, 5, 7]}, {name: "C"}]
assert_order "*", ["A", "B"], knn: {field: :embedding, vector: [1, 2, 3], distance: "chebyshev"}
scores = Product.search(knn: {field: :embedding, vector: [1, 2, 3], distance: "chebyshev"}).hits.map { |v| v["_score"] }
assert_in_delta 1.0 / (1 + 0), scores[0]
assert_in_delta 1.0 / (1 + 4), scores[1]
end
def test_inner_product
store [{name: "A", embedding2: [-1, -2, -3]}, {name: "B", embedding2: [1, 5, 7]}, {name: "C"}]
assert_order "*", ["B", "A"], knn: {field: :embedding2, vector: [1, 2, 3], distance: "inner_product"}
scores = Product.search(knn: {field: :embedding2, vector: [1, 2, 3], distance: "inner_product"}).hits.map { |v| v["_score"] }
# d > 0: d + 1
# else: 1 / (1 - d)
assert_in_delta 1 + 32, scores[0], (!Searchkick.opensearch? ? 0.5 : 0.001)
assert_in_delta 1.0 / (1 + 14), scores[1]
end
def test_inner_product_exact
store [{name: "A", embedding3: [-1, -2, -3]}, {name: "B", embedding3: [1, 5, 7]}, {name: "C"}]
assert_order "*", ["B", "A"], knn: {field: :embedding3, vector: [1, 2, 3], distance: "inner_product"}
scores = Product.search(knn: {field: :embedding3, vector: [1, 2, 3], distance: "inner_product"}).hits.map { |v| v["_score"] }
assert_in_delta 1 + 32, scores[0]
assert_in_delta 1.0 / (1 + 14), scores[1]
end
def test_unindexed
skip if Searchkick.opensearch?
store [{name: "A", embedding4: [1, 2, 3]}, {name: "B", embedding4: [-1, -2, -3]}, {name: "C"}]
assert_order "*", ["A", "B"], knn: {field: :embedding4, vector: [1, 2, 3], distance: "cosine"}
scores = Product.search(knn: {field: :embedding4, vector: [1, 2, 3], distance: "cosine"}).hits.map { |v| v["_score"] }
assert_in_delta 1, scores[0]
assert_in_delta 0, scores[1]
error = assert_raises(ArgumentError) do
Product.search(knn: {field: :embedding4, vector: [1, 2, 3]})
end
assert_match "distance required", error.message
error = assert_raises(ArgumentError) do
Product.search(knn: {field: :embedding4, vector: [1, 2, 3], exact: false})
end
assert_match "distance required", error.message
error = assert_raises(ArgumentError) do
Product.search(knn: {field: :embedding, vector: [1, 2, 3], distance: "euclidean", exact: false})
end
assert_equal "distance must match searchkick options for approximate search", error.message
if !Searchkick.server_below?("9.0.0")
error = assert_raises(ArgumentError) do
Product.search(knn: {field: :embedding, vector: [1, 2, 3], distance: "euclidean"})
end
assert_equal "distance must match searchkick options", error.message
end
end
def test_explain
store [{name: "A", embedding: [1, 2, 3], embedding2: [1, 2, 3], embedding3: [1, 2, 3], embedding4: [1, 2, 3]}]
assert_approx true, :embedding, "cosine"
if Searchkick.opensearch? || Searchkick.server_below?("9.0.0")
assert_approx false, :embedding, "euclidean"
assert_approx false, :embedding, "inner_product"
assert_approx false, :embedding, "taxicab"
end
if Searchkick.opensearch?
assert_approx false, :embedding, "chebyshev"
end
assert_approx false, :embedding3, "cosine"
assert_approx true, :embedding3, "euclidean"
assert_approx false, :embedding3, "inner_product"
unless Searchkick.opensearch?
assert_approx false, :embedding4, "cosine"
assert_approx false, :embedding4, "euclidean"
assert_approx false, :embedding4, "inner_product"
end
assert_approx false, :embedding2, "cosine"
assert_approx false, :embedding2, "euclidean"
assert_approx true, :embedding2, "inner_product"
assert_approx false, :embedding, "cosine", exact: true
assert_approx true, :embedding, "cosine", exact: false
error = assert_raises(ArgumentError) do
assert_approx true, :embedding, "euclidean", exact: false
end
assert_equal "distance must match searchkick options for approximate search", error.message
end
def test_ef_search
skip if Searchkick.opensearch? && Searchkick.server_below?("2.16.0")
store [{name: "A", embedding: [1, 2, 3]}, {name: "B", embedding: [-1, -2, -3]}, {name: "C"}]
assert_order "*", ["A", "B"], knn: {field: :embedding, vector: [1, 2, 3], ef_search: 20}, limit: 10
end
private
def assert_approx(approx, field, distance, **knn_options)
response = Product.search(knn: {field: field, vector: [1, 2, 3], distance: distance, **knn_options}, explain: true).response.to_s
if approx
if Searchkick.opensearch?
assert_match "within top", response
else
assert_match "within top k documents", response
end
else
if Searchkick.opensearch?
assert_match "knn_score", response
else
assert_match "params.query_vector", response
end
end
end
end
================================================
FILE: test/language_test.rb
================================================
require_relative "test_helper"
class LanguageTest < Minitest::Test
def setup
skip "Requires plugin" unless ci? || ENV["TEST_LANGUAGE"]
Song.destroy_all
end
def test_chinese
skip if ci?
# requires https://github.com/medcl/elasticsearch-analysis-ik
with_options({language: "chinese"}) do
store_names ["中华人民共和国国歌"]
assert_language_search "中华人民共和国", ["中华人民共和国国歌"]
assert_language_search "国歌", ["中华人民共和国国歌"]
assert_language_search "人", []
end
end
def test_chinese2
# requires https://www.elastic.co/guide/en/elasticsearch/plugins/7.4/analysis-smartcn.html
with_options({language: "chinese2"}) do
store_names ["中华人民共和国国歌"]
assert_language_search "中华人民共和国", ["中华人民共和国国歌"]
# assert_language_search "国歌", ["中华人民共和国国歌"]
assert_language_search "人", []
end
end
def test_japanese
# requires https://www.elastic.co/guide/en/elasticsearch/plugins/7.4/analysis-kuromoji.html
with_options({language: "japanese"}) do
store_names ["JR新宿駅の近くにビールを飲みに行こうか"]
assert_language_search "飲む", ["JR新宿駅の近くにビールを飲みに行こうか"]
assert_language_search "jr", ["JR新宿駅の近くにビールを飲みに行こうか"]
assert_language_search "新", []
end
end
def test_japanese_search_synonyms
# requires https://www.elastic.co/guide/en/elasticsearch/plugins/7.4/analysis-kuromoji.html
with_options({language: "japanese", search_synonyms: [["飲む", "喰らう"]]}) do
store_names ["JR新宿駅の近くにビールを飲みに行こうか"]
assert_language_search "喰らう", ["JR新宿駅の近くにビールを飲みに行こうか"]
assert_language_search "新", []
end
end
def test_korean
skip if ci?
# requires https://github.com/open-korean-text/elasticsearch-analysis-openkoreantext
with_options({language: "korean"}) do
store_names ["한국어를 처리하는 예시입니닼ㅋㅋ"]
assert_language_search "처리", ["한국어를 처리하는 예시입니닼ㅋㅋ"]
assert_language_search "한국어", ["한국어를 처리하는 예시입니닼ㅋㅋ"]
assert_language_search "를", []
end
end
def test_korean2
skip if ci?
# requires https://www.elastic.co/guide/en/elasticsearch/plugins/7.4/analysis-nori.html
with_options({language: "korean2"}) do
store_names ["한국어를 처리하는 예시입니닼ㅋㅋ"]
assert_language_search "처리", ["한국어를 처리하는 예시입니닼ㅋㅋ"]
assert_language_search "한국어", ["한국어를 처리하는 예시입니닼ㅋㅋ"]
assert_language_search "를", []
end
end
def test_polish
# requires https://www.elastic.co/guide/en/elasticsearch/plugins/7.4/analysis-stempel.html
with_options({language: "polish"}) do
store_names ["polski"]
assert_language_search "polskimi", ["polski"]
end
end
def test_ukrainian
# requires https://www.elastic.co/guide/en/elasticsearch/plugins/7.4/analysis-ukrainian.html
with_options({language: "ukrainian"}) do
store_names ["ресторани"]
assert_language_search "ресторан", ["ресторани"]
end
end
def test_vietnamese
skip if ci?
# requires https://github.com/duydo/elasticsearch-analysis-vietnamese
with_options({language: "vietnamese"}) do
store_names ["công nghệ thông tin Việt Nam"]
assert_language_search "công nghệ thông tin", ["công nghệ thông tin Việt Nam"]
assert_language_search "công", []
end
end
def test_stemmer_hunspell
skip if ci?
with_options({stemmer: {type: "hunspell", locale: "en_US"}}) do
store_names ["the foxes jumping quickly"]
assert_language_search "fox", ["the foxes jumping quickly"]
end
end
def test_stemmer_unknown_type
error = assert_raises(ArgumentError) do
with_options({stemmer: {type: "bad"}}) do
end
end
assert_equal "Unknown stemmer: bad", error.message
end
def test_stemmer_language
skip if ci?
error = assert_raises(ArgumentError) do
with_options({stemmer: {type: "hunspell", locale: "en_US"}, language: "english"}) do
end
end
assert_equal "Can't pass both language and stemmer", error.message
end
def assert_language_search(term, expected)
assert_search term, expected, {misspellings: false}
end
def default_model
Song
end
end
================================================
FILE: test/load_test.rb
================================================
require_relative "test_helper"
class LoadTest < Minitest::Test
def test_default
store_names ["Product A"]
product = Product.search("product").first
assert_kind_of Product, product
if mongoid?
assert_match "# "Product A"}), product.as_json(only: ["name"])
assert_equal ({"name" => "Product A"}), product.as_json(only: [:name])
refute product.as_json(except: ["name"]).key?("name")
refute product.as_json(except: [:name]).key?("name")
assert_empty product.as_json(only: ["missing"])
if mongoid?
product.as_json(methods: [:missing])
else
assert_raises(NoMethodError) do
product.as_json(methods: [:missing])
end
end
end
def test_false
store_names ["Product A"]
product = Product.search("product", load: false).first
assert_kind_of Searchkick::HashWrapper, product
assert_match "# "Product A"}), product.as_json(only: ["name"])
# same behavior as Hashie::Mash
assert_empty product.as_json(only: [:name])
refute product.as_json(except: ["name"]).key?("name")
# same behavior as Hashie::Mash
assert product.as_json(except: [:name]).key?("name")
assert_empty product.as_json(only: ["missing"])
# same behavior as Hashie::Mash
product.as_json(methods: [:missing])
end
def test_false_methods
store_names ["Product A"]
assert_equal "Product A", Product.search("product", load: false).first.name
end
def test_false_with_includes
store_names ["Product A"]
assert_kind_of Searchkick::HashWrapper, Product.search("product", load: false, includes: [:store]).first
end
def test_false_nested_object
aisle = {"id" => 1, "name" => "Frozen"}
store [{name: "Product A", aisle: aisle}]
assert_equal aisle, Product.search("product", load: false).first.aisle.to_hash
end
end
================================================
FILE: test/log_subscriber_test.rb
================================================
require_relative "test_helper"
class LogSubscriberTest < Minitest::Test
def test_create
output = capture_logs do
Product.create!(name: "Product A")
end
assert_match "Product Store", output
end
def test_update
product = Product.create!(name: "Product A")
output = capture_logs do
product.reindex(:search_name)
end
assert_match "Product Update", output
end
def test_destroy
product = Product.create!(name: "Product A")
output = capture_logs do
product.destroy
end
assert_match "Product Remove", output
end
def test_bulk
output = capture_logs do
Searchkick.callbacks(:bulk) do
Product.create!(name: "Product A")
end
end
assert_match "Bulk", output
refute_match "Product Store", output
end
def test_reindex
create_products
output = capture_logs do
Product.reindex
end
assert_match "Product Import", output
assert_match '"count":3', output
end
def test_reindex_relation
products = create_products
output = capture_logs do
Product.where.not(id: products.last.id).reindex
end
assert_match "Product Import", output
assert_match '"count":2', output
end
def test_search
# prevent warnings
Product.searchkick_index.refresh
output = capture_logs do
Product.search("product").to_a
end
assert_match "Product Search", output
end
def test_multi_search
# prevent warnings
Product.searchkick_index.refresh
output = capture_logs do
Searchkick.multi_search([Product.search("product")])
end
assert_match "Multi Search", output
end
private
def create_products
Searchkick.callbacks(false) do
3.times.map do
Product.create!(name: "Product A")
end
end
end
def capture_logs
previous_logger = ActiveSupport::LogSubscriber.logger
io = StringIO.new
begin
ActiveSupport::LogSubscriber.logger = ActiveSupport::Logger.new(io)
yield
io.rewind
output = io.read
previous_logger.debug(output) if previous_logger
puts output if ENV["LOG_SUBSCRIBER"]
output
ensure
ActiveSupport::LogSubscriber.logger = previous_logger
end
end
end
================================================
FILE: test/marshal_test.rb
================================================
require_relative "test_helper"
class MarshalTest < Minitest::Test
def test_marshal
store_names ["Product A"]
assert Marshal.dump(Product.search("*").to_a)
end
def test_marshal_highlights
store_names ["Product A"]
assert Marshal.dump(Product.search("product", highlight: true, load: {dumpable: true}).to_a)
end
end
================================================
FILE: test/match_test.rb
================================================
require_relative "test_helper"
class MatchTest < Minitest::Test
# exact
def test_match
store_names ["Whole Milk", "Fat Free Milk", "Milk"]
assert_search "milk", ["Milk", "Whole Milk", "Fat Free Milk"]
end
def test_case
store_names ["Whole Milk", "Fat Free Milk", "Milk"]
assert_search "MILK", ["Milk", "Whole Milk", "Fat Free Milk"]
end
def test_cheese_space_in_index
store_names ["Pepper Jack Cheese Skewers"]
assert_search "pepperjack cheese skewers", ["Pepper Jack Cheese Skewers"]
end
# def test_cheese_space_in_query
# store_names ["Pepperjack Cheese Skewers"]
# assert_search "pepper jack cheese skewers", ["Pepperjack Cheese Skewers"]
# end
def test_middle_token
store_names ["Dish Washer Amazing Organic Soap"]
assert_search "dish soap", ["Dish Washer Amazing Organic Soap"]
end
def test_middle_token_wine
store_names ["Beringer Wine Founders Estate Chardonnay"]
assert_search "beringer chardonnay", ["Beringer Wine Founders Estate Chardonnay"]
end
def test_percent
store_names ["1% Milk", "Whole Milk"]
assert_search "1%", ["1% Milk"]
end
# ascii
def test_jalapenos
store_names ["Jalapeño"]
assert_search "jalapeno", ["Jalapeño"]
end
def test_swedish
store_names ["ÅÄÖ"]
assert_search "aao", ["ÅÄÖ"]
end
# stemming
def test_stemming
store_names ["Whole Milk", "Fat Free Milk", "Milk"]
assert_search "milks", ["Milk", "Whole Milk", "Fat Free Milk"]
assert_search "milks", ["Milk", "Whole Milk", "Fat Free Milk"], misspellings: false
end
def test_stemming_tokens
assert_equal ["milk"], Product.searchkick_index.tokens("milks", analyzer: "searchkick_search")
assert_equal ["milk"], Product.searchkick_index.tokens("milks", analyzer: "searchkick_search2")
end
# fuzzy
def test_misspelling_sriracha
store_names ["Sriracha"]
assert_search "siracha", ["Sriracha"]
end
def test_misspelling_multiple
store_names ["Greek Yogurt", "Green Onions"]
assert_search "greed", ["Greek Yogurt", "Green Onions"]
end
def test_short_word
store_names ["Finn"]
assert_search "fin", ["Finn"]
end
def test_edit_distance_two
store_names ["Bingo"]
assert_search "bin", []
assert_search "bingooo", []
assert_search "mango", []
end
def test_edit_distance_one
store_names ["Bingo"]
assert_search "bing", ["Bingo"]
assert_search "bingoo", ["Bingo"]
assert_search "ringo", ["Bingo"]
end
def test_edit_distance_long_word
store_names ["thisisareallylongword"]
assert_search "thisisareallylongwor", ["thisisareallylongword"] # missing letter
assert_search "thisisareelylongword", [] # edit distance = 2
end
def test_misspelling_tabasco
store_names ["Tabasco"]
assert_search "tobasco", ["Tabasco"]
end
def test_misspelling_zucchini
store_names ["Zucchini"]
assert_search "zuchini", ["Zucchini"]
end
def test_misspelling_ziploc
store_names ["Ziploc"]
assert_search "zip lock", ["Ziploc"]
end
def test_misspelling_zucchini_transposition
store_names ["zucchini"]
assert_search "zuccihni", ["zucchini"]
# need to specify field
# as transposition option isn't supported for multi_match queries
# until Elasticsearch 6.1
assert_search "zuccihni", [], misspellings: {transpositions: false}, fields: [:name]
end
def test_misspelling_lasagna
store_names ["lasagna"]
assert_search "lasanga", ["lasagna"], misspellings: {transpositions: true}
assert_search "lasgana", ["lasagna"], misspellings: {transpositions: true}
assert_search "lasaang", [], misspellings: {transpositions: true} # triple transposition, shouldn't work
assert_search "lsagana", [], misspellings: {transpositions: true} # triple transposition, shouldn't work
end
def test_misspelling_lasagna_pasta
store_names ["lasagna pasta"]
assert_search "lasanga", ["lasagna pasta"], misspellings: {transpositions: true}
assert_search "lasanga pasta", ["lasagna pasta"], misspellings: {transpositions: true}
assert_search "lasanga pasat", ["lasagna pasta"], misspellings: {transpositions: true} # both words misspelled with a transposition should still work
end
def test_misspellings_word_start
store_names ["Sriracha"]
assert_search "siracha", ["Sriracha"], fields: [{name: :word_start}]
end
# spaces
def test_spaces_in_field
store_names ["Red Bull"]
assert_search "redbull", ["Red Bull"], misspellings: false
end
def test_spaces_in_query
store_names ["Dishwasher"]
assert_search "dish washer", ["Dishwasher"], misspellings: false
end
def test_spaces_three_words
store_names ["Dish Washer Soap", "Dish Washer"]
assert_search "dish washer soap", ["Dish Washer Soap"]
end
def test_spaces_stemming
store_names ["Almond Milk"]
assert_search "almondmilks", ["Almond Milk"]
end
# other
def test_all
store_names ["Product A", "Product B"]
assert_search "*", ["Product A", "Product B"]
end
def test_no_arguments
store_names []
assert_equal [], Product.search.to_a
end
def test_no_term
store_names ["Product A"]
assert_equal ["Product A"], Product.search(where: {name: "Product A"}).map(&:name)
end
def test_to_be_or_not_to_be
store_names ["to be or not to be"]
assert_search "to be", ["to be or not to be"]
end
def test_apostrophe
store_names ["Ben and Jerry's"]
assert_search "ben and jerrys", ["Ben and Jerry's"]
end
def test_apostrophe_search
store_names ["Ben and Jerrys"]
assert_search "ben and jerry's", ["Ben and Jerrys"]
end
def test_ampersand_index
store_names ["Ben & Jerry's"]
assert_search "ben and jerrys", ["Ben & Jerry's"]
end
def test_ampersand_search
store_names ["Ben and Jerry's"]
assert_search "ben & jerrys", ["Ben and Jerry's"]
end
def test_phrase
store_names ["Fresh Honey", "Honey Fresh"]
assert_search "fresh honey", ["Fresh Honey"], match: :phrase
end
def test_phrase_again
store_names ["Social entrepreneurs don't have it easy raising capital"]
assert_search "social entrepreneurs don't have it easy raising capital", ["Social entrepreneurs don't have it easy raising capital"], match: :phrase
end
def test_phrase_order
store_names ["Wheat Bread", "Whole Wheat Bread"]
assert_order "wheat bread", ["Wheat Bread", "Whole Wheat Bread"], match: :phrase, fields: [:name]
end
def test_dynamic_fields
setup_speaker
store_names ["Red Bull"], Speaker
assert_search "redbull", ["Red Bull"], {fields: [:name]}, Speaker
end
def test_unsearchable
skip
store [
{name: "Unsearchable", description: "Almond"}
]
assert_search "almond", []
end
def test_unsearchable_where
store [
{name: "Unsearchable", description: "Almond"}
]
assert_search "*", ["Unsearchable"], where: {description: "Almond"}
end
def test_emoji
store_names ["Banana"]
assert_search "🍌", ["Banana"], emoji: true
end
def test_emoji_multiple
store_names ["Ice Cream Cake"]
assert_search "🍨🍰", ["Ice Cream Cake"], emoji: true
assert_search "🍨🍰", ["Ice Cream Cake"], emoji: true, misspellings: false
end
# operator
def test_operator
store_names ["fresh", "honey"]
assert_search "fresh honey", ["fresh", "honey"], {operator: "or"}
assert_search "fresh honey", [], {operator: "and"}
assert_search "fresh honey", ["fresh", "honey"], {operator: :or}
assert_search "fresh honey", ["fresh", "honey"], {operator: :or, body_options: {track_total_hits: true}}
assert_search "fresh honey", [], {operator: :or, fields: [:name], match: :phrase, body_options: {track_total_hits: true}}
end
def test_operator_scoring
store_names ["Big Red Circle", "Big Green Circle", "Small Orange Circle"]
assert_order "big red circle", ["Big Red Circle", "Big Green Circle", "Small Orange Circle"], operator: "or"
end
# fields
def test_fields_operator
store [
{name: "red", color: "red"},
{name: "blue", color: "blue"},
{name: "cyan", color: "blue green"},
{name: "magenta", color: "red blue"},
{name: "green", color: "green"}
]
assert_search "red blue", ["red", "blue", "cyan", "magenta"], operator: "or", fields: ["color"]
end
def test_fields
store [
{name: "red", color: "light blue"},
{name: "blue", color: "red fish"}
]
assert_search "blue", ["red"], fields: ["color"]
end
def test_non_existent_field
store_names ["Milk"]
assert_search "milk", [], fields: ["not_here"]
end
def test_fields_both_match
# have same score due to dismax
store [
{name: "Blue A", color: "red"},
{name: "Blue B", color: "light blue"}
]
assert_first "blue", "Blue B", fields: [:name, :color]
end
end
================================================
FILE: test/misspellings_test.rb
================================================
require_relative "test_helper"
class MisspellingsTest < Minitest::Test
def test_false
store_names ["abc", "abd", "aee"]
assert_search "abc", ["abc"], misspellings: false
end
def test_distance
store_names ["abbb", "aabb"]
assert_search "aaaa", ["aabb"], misspellings: {distance: 2}
end
def test_prefix_length
store_names ["ap", "api", "apt", "any", "nap", "ah", "ahi"]
assert_search "ap", ["ap", "api", "apt"], misspellings: {prefix_length: 2}
assert_search "api", ["ap", "api", "apt"], misspellings: {prefix_length: 2}
end
def test_prefix_length_operator
store_names ["ap", "api", "apt", "any", "nap", "ah", "aha"]
assert_search "ap ah", ["ap", "ah", "api", "apt", "aha"], operator: "or", misspellings: {prefix_length: 2}
assert_search "api ahi", ["ap", "api", "apt", "ah", "aha"], operator: "or", misspellings: {prefix_length: 2}
end
def test_fields_operator
store [
{name: "red", color: "red"},
{name: "blue", color: "blue"},
{name: "cyan", color: "blue green"},
{name: "magenta", color: "red blue"},
{name: "green", color: "green"}
]
assert_search "red blue", ["red", "blue", "cyan", "magenta"], operator: "or", fields: ["color"], misspellings: false
end
def test_below_unmet
store_names ["abc", "abd", "aee"]
assert_search "abc", ["abc", "abd"], misspellings: {below: 2}
end
def test_below_unmet_result
store_names ["abc", "abd", "aee"]
assert Product.search("abc", misspellings: {below: 2}).misspellings?
end
def test_below_met
store_names ["abc", "abd", "aee"]
assert_search "abc", ["abc"], misspellings: {below: 1}
end
def test_below_met_result
store_names ["abc", "abd", "aee"]
assert !Product.search("abc", misspellings: {below: 1}).misspellings?
end
def test_field_correct_spelling_still_works
store [{name: "Sriracha", color: "blue"}]
assert_misspellings "Sriracha", ["Sriracha"], {fields: [:name, :color]}
assert_misspellings "blue", ["Sriracha"], {fields: [:name, :color]}
end
def test_field_enabled
store [{name: "Sriracha", color: "blue"}]
assert_misspellings "siracha", ["Sriracha"], {fields: [:name]}
assert_misspellings "clue", ["Sriracha"], {fields: [:color]}
end
def test_field_disabled
store [{name: "Sriracha", color: "blue"}]
assert_misspellings "siracha", [], {fields: [:color]}
assert_misspellings "clue", [], {fields: [:name]}
end
def test_field_with_transpositions
store [{name: "Sriracha", color: "blue"}]
assert_misspellings "lbue", [], {transpositions: false, fields: [:color]}
end
def test_field_with_edit_distance
store [{name: "Sriracha", color: "blue"}]
assert_misspellings "crue", ["Sriracha"], {edit_distance: 2, fields: [:color]}
end
def test_field_multiple
store [
{name: "Greek Yogurt", color: "white"},
{name: "Green Onions", color: "yellow"}
]
assert_misspellings "greed", ["Greek Yogurt", "Green Onions"], {fields: [:name, :color]}
assert_misspellings "mellow", ["Green Onions"], {fields: [:name, :color]}
end
def test_field_requires_explicit_search_fields
store_names ["Sriracha"]
assert_raises(ArgumentError) do
assert_search "siracha", ["Sriracha"], {misspellings: {fields: [:name]}}
end
end
def test_field_word_start
store_names ["Sriracha"]
assert_search "siracha", ["Sriracha"], fields: [{name: :word_middle}], misspellings: {fields: [:name]}
end
private
def assert_misspellings(term, expected, misspellings = {}, model = default_model)
options = {
fields: [:name, :color],
misspellings: misspellings
}
assert_search(term, expected, options, model)
end
end
================================================
FILE: test/models/animal.rb
================================================
class Animal
searchkick \
inheritance: true,
text_start: [:name],
suggest: [:name]
end
================================================
FILE: test/models/artist.rb
================================================
class Artist
searchkick unscope: true
def should_index?
should_index
end
end
================================================
FILE: test/models/band.rb
================================================
class Band
searchkick
end
================================================
FILE: test/models/product.rb
================================================
class Product
searchkick \
synonyms: [
["clorox", "bleach"],
["burger", "hamburger"],
["bandaid", "bandages"],
["UPPERCASE", "lowercase"],
"lightbulb => led,lightbulb",
"lightbulb => halogenlamp"
],
suggest: [:name, :color],
conversions_v1: [:conversions],
conversions_v2: [:conversions_v2],
locations: [:location, :multiple_locations],
text_start: [:name],
text_middle: [:name],
text_end: [:name],
word_start: [:name],
word_middle: [:name],
word_end: [:name],
highlight: [:name],
filterable: [:name, :color, :description],
similarity: "BM25",
match: ENV["MATCH"] ? ENV["MATCH"].to_sym : nil,
knn: Searchkick.knn_support? ? {
embedding: {dimensions: 3, distance: "cosine", m: 16, ef_construction: 100},
embedding2: {dimensions: 3, distance: "inner_product"},
embedding3: {dimensions: 3, distance: "euclidean"}
}.merge(Searchkick.opensearch? ? {} : {embedding4: {dimensions: 3}}) : nil
attr_accessor :conversions, :conversions_v2, :user_ids, :aisle, :details
class << self
attr_accessor :dynamic_data
end
def search_data
return self.class.dynamic_data.call if self.class.dynamic_data
serializable_hash.except("id", "_id").merge(
conversions: conversions,
conversions_v2: conversions_v2,
user_ids: user_ids,
location: {lat: latitude, lon: longitude},
multiple_locations: [{lat: latitude, lon: longitude}, {lat: 0, lon: 0}],
aisle: aisle,
details: details
)
end
def should_index?
name != "DO NOT INDEX"
end
def search_name
{
name: name
}
end
end
================================================
FILE: test/models/region.rb
================================================
class Region
searchkick \
geo_shape: [:territory]
attr_accessor :territory
def search_data
{
name: name,
text: text,
territory: territory
}
end
end
================================================
FILE: test/models/sku.rb
================================================
class Sku
searchkick callbacks: :async
end
================================================
FILE: test/models/song.rb
================================================
class Song
searchkick
def search_routing
name
end
end
================================================
FILE: test/models/speaker.rb
================================================
class Speaker
searchkick \
conversions_v1: ["conversions_a", "conversions_b"],
search_synonyms: [
["clorox", "bleach"],
["burger", "hamburger"],
["bandaids", "bandages"],
["UPPERCASE", "lowercase"],
"led => led,lightbulb",
"halogen lamp => lightbulb",
["United States of America", "USA"]
],
word_start: [:name]
attr_accessor :conversions_a, :conversions_b, :aisle
def search_data
serializable_hash.except("id", "_id").merge(
conversions_a: conversions_a,
conversions_b: conversions_b,
aisle: aisle
)
end
end
================================================
FILE: test/models/store.rb
================================================
class Store
mappings = {
properties: {
name: {type: "text"}
}
}
searchkick \
routing: true,
merge_mappings: true,
mappings: mappings
def search_document_id
id
end
def search_routing
name
end
end
================================================
FILE: test/multi_indices_test.rb
================================================
require_relative "test_helper"
class MultiIndicesTest < Minitest::Test
def setup
super
setup_speaker
end
def test_basic
store_names ["Product A"]
store_names ["Product B"], Speaker
assert_search_multi "product", ["Product A", "Product B"]
end
def test_index_name
store_names ["Product A"]
assert_equal ["Product A"], Product.search("product", index_name: Product.searchkick_index.name).map(&:name)
assert_equal ["Product A"], Product.search("product", index_name: Product).map(&:name)
Speaker.searchkick_index.refresh
assert_equal [], Product.search("product", index_name: Speaker.searchkick_index.name, conversions: false).map(&:name)
end
def test_models_and_index_name
store_names ["Product A"]
store_names ["Product B"], Speaker
assert_equal ["Product A"], Searchkick.search("product", models: [Product, Store], index_name: Product.searchkick_index.name).map(&:name)
error = assert_raises(Searchkick::Error) do
Searchkick.search("product", models: [Product, Store], index_name: Speaker.searchkick_index.name).map(&:name)
end
assert_includes error.message, "Unknown model"
# legacy
assert_equal ["Product A"], Searchkick.search("product", index_name: [Product, Store]).map(&:name)
end
def test_model_with_another_model
error = assert_raises(ArgumentError) do
Product.search(models: [Store])
end
assert_includes error.message, "Use Searchkick.search"
end
def test_model_with_another_model_in_index_name
error = assert_raises(ArgumentError) do
# legacy protection
Product.search(index_name: [Store, "another"])
end
assert_includes error.message, "Use Searchkick.search"
end
def test_no_models_or_index_name
store_names ["Product A"]
error = assert_raises(Searchkick::Error) do
Searchkick.search("product").to_a
end
assert_includes error.message, "Unknown model"
end
def test_no_models_or_index_name_load_false
store_names ["Product A"]
Searchkick.search("product", load: false).to_a
end
private
def assert_search_multi(term, expected, options = {})
options[:models] = [Product, Speaker]
options[:fields] = [:name]
assert_search(term, expected, options, Searchkick)
end
end
================================================
FILE: test/multi_search_test.rb
================================================
require_relative "test_helper"
class MultiSearchTest < Minitest::Test
def test_basic
store_names ["Product A"]
store_names ["Store A"], Store
products = Product.search("*")
stores = Store.search("*")
Searchkick.multi_search([products, stores])
assert_equal ["Product A"], products.map(&:name)
assert_equal ["Store A"], stores.map(&:name)
end
def test_methods
result = Product.search("*")
query = Product.search("*")
assert_empty(result.methods - query.methods)
end
def test_error
store_names ["Product A"]
products = Product.search("*")
stores = Store.search("*", order: [:bad_field])
Searchkick.multi_search([products, stores])
assert !products.error
assert stores.error
end
def test_misspellings_below_unmet
store_names ["abc", "abd", "aee"]
products = Product.search("abc", misspellings: {below: 5})
Searchkick.multi_search([products])
assert_equal ["abc", "abd"], products.map(&:name)
end
def test_misspellings_below_error
products = Product.search("abc", order: [:bad_field], misspellings: {below: 1})
Searchkick.multi_search([products])
assert products.error
end
def test_query_error
products = Product.search("*", order: {bad_field: :asc})
Searchkick.multi_search([products])
assert products.error
error = assert_raises(Searchkick::Error) { products.to_a }
assert_equal error.message, "Query error - use the error method to view it"
end
end
================================================
FILE: test/multi_tenancy_test.rb
================================================
require_relative "test_helper"
class MultiTenancyTest < Minitest::Test
def setup
skip unless defined?(Apartment)
end
def test_basic
Apartment::Tenant.switch!("tenant1")
store_names ["Product A"]
Apartment::Tenant.switch!("tenant2")
store_names ["Product B"]
Apartment::Tenant.switch!("tenant1")
assert_search "product", ["Product A"], {load: false}
Apartment::Tenant.switch!("tenant2")
assert_search "product", ["Product B"], {load: false}
end
def teardown
Apartment::Tenant.reset if defined?(Apartment)
end
def default_model
Tenant
end
end
================================================
FILE: test/notifications_test.rb
================================================
require_relative "test_helper"
class NotificationsTest < Minitest::Test
def test_search
Product.searchkick_index.refresh
notifications = capture_notifications do
Product.search("product").to_a
end
assert_equal 1, notifications.size
assert_equal "search.searchkick", notifications.last[:name]
end
private
def capture_notifications
notifications = []
callback = lambda do |name, started, finished, unique_id, payload|
notifications << {name: name, payload: payload}
end
ActiveSupport::Notifications.subscribed(callback, /searchkick/) do
yield
end
notifications
end
end
================================================
FILE: test/order_test.rb
================================================
require_relative "test_helper"
class OrderTest < Minitest::Test
def test_hash
store_names ["Product A", "Product B", "Product C", "Product D"]
assert_order "product", ["Product D", "Product C", "Product B", "Product A"], order: {name: :desc}
assert_order_relation ["Product D", "Product C", "Product B", "Product A"], Product.search("product").order(name: :desc)
end
def test_string
store_names ["Product A", "Product B", "Product C", "Product D"]
assert_order "product", ["Product A", "Product B", "Product C", "Product D"], order: "name"
assert_order_relation ["Product A", "Product B", "Product C", "Product D"], Product.search("product").order("name")
end
def test_multiple
store [
{name: "Product A", color: "blue", store_id: 1},
{name: "Product B", color: "red", store_id: 3},
{name: "Product C", color: "red", store_id: 2}
]
assert_order "product", ["Product A", "Product B", "Product C"], order: {color: :asc, store_id: :desc}
assert_order_relation ["Product A", "Product B", "Product C"], Product.search("product").order(color: :asc, store_id: :desc)
assert_order_relation ["Product A", "Product B", "Product C"], Product.search("product").order(:color, store_id: :desc)
assert_order_relation ["Product A", "Product B", "Product C"], Product.search("product").order(color: :asc).order(store_id: :desc)
assert_order_relation ["Product B", "Product C", "Product A"], Product.search("product").order(color: :asc).reorder(store_id: :desc)
end
def test_unmapped_type
Product.searchkick_index.refresh
assert_order "product", [], order: {not_mapped: {unmapped_type: "long"}}
assert_order_relation [], Product.search("product").order(not_mapped: {unmapped_type: "long"})
end
def test_array
store [{name: "San Francisco", latitude: 37.7833, longitude: -122.4167}]
assert_order "francisco", ["San Francisco"], order: [{_geo_distance: {location: "0,0"}}]
assert_order_relation ["San Francisco"], Product.search("francisco").order([{_geo_distance: {location: "0,0"}}])
end
def test_script
store_names ["Red", "Green", "Blue"]
order = {_script: {type: "number", script: {source: "doc['name'].value.length() * -1"}}}
assert_order "*", ["Green", "Blue", "Red"], order: order
assert_order_relation ["Green", "Blue", "Red"], Product.search("*").order(order)
end
end
================================================
FILE: test/pagination_test.rb
================================================
require_relative "test_helper"
class PaginationTest < Minitest::Test
def test_limit
store_names ["Product A", "Product B", "Product C", "Product D"]
assert_order "product", ["Product A", "Product B"], order: {name: :asc}, limit: 2
assert_order_relation ["Product A", "Product B"], Product.search("product").order(name: :asc).limit(2)
end
def test_no_limit
names = 20.times.map { |i| "Product #{i}" }
store_names names
assert_search "product", names
end
def test_offset
store_names ["Product A", "Product B", "Product C", "Product D"]
assert_order "product", ["Product C", "Product D"], order: {name: :asc}, offset: 2, limit: 100
assert_order_relation ["Product C", "Product D"], Product.search("product").order(name: :asc).offset(2).limit(100)
end
def test_pagination
store_names ["Product A", "Product B", "Product C", "Product D", "Product E", "Product F"]
products = Product.search("product", order: {name: :asc}, page: 2, per_page: 2, padding: 1)
assert_equal ["Product D", "Product E"], products.map(&:name)
assert_equal "product", products.entry_name
assert_equal 2, products.current_page
assert_equal 1, products.padding
assert_equal 2, products.per_page
assert_equal 2, products.size
assert_equal 2, products.length
assert_equal 3, products.total_pages
assert_equal 6, products.total_count
assert_equal 6, products.total_entries
assert_equal 2, products.limit_value
assert_equal 3, products.offset_value
assert_equal 3, products.offset
assert_equal 3, products.next_page
assert_equal 1, products.previous_page
assert_equal 1, products.prev_page
assert !products.first_page?
assert !products.last_page?
assert !products.empty?
assert !products.out_of_range?
assert products.any?
end
def test_relation
store_names ["Product A", "Product B", "Product C", "Product D", "Product E", "Product F"]
products = Product.search("product", padding: 1).order(name: :asc).page(2).per_page(2)
assert_equal ["Product D", "Product E"], products.map(&:name)
assert_equal "product", products.entry_name
assert_equal 2, products.current_page
assert_equal 1, products.padding
assert_equal 2, products.per_page
assert_equal 2, products.size
assert_equal 2, products.length
assert_equal 3, products.total_pages
assert_equal 6, products.total_count
assert_equal 6, products.total_entries
assert_equal 2, products.limit_value
assert_equal 3, products.offset_value
assert_equal 3, products.offset
assert_equal 3, products.next_page
assert_equal 1, products.previous_page
assert_equal 1, products.prev_page
assert !products.first_page?
assert !products.last_page?
assert !products.empty?
assert !products.out_of_range?
assert products.any?
end
def test_per
store_names ["Product A", "Product B", "Product C"]
assert_order_relation ["Product A", "Product B"], Product.search("product").order(name: :asc).per(2)
end
def test_body
store_names ["Product A", "Product B", "Product C", "Product D", "Product E", "Product F"]
products = Product.search("product", body: {query: {match_all: {}}, sort: [{name: "asc"}]}, page: 2, per_page: 2, padding: 1)
assert_equal ["Product D", "Product E"], products.map(&:name)
assert_equal "product", products.entry_name
assert_equal 2, products.current_page
assert_equal 1, products.padding
assert_equal 2, products.per_page
assert_equal 2, products.size
assert_equal 2, products.length
assert_equal 3, products.total_pages
assert_equal 6, products.total_count
assert_equal 6, products.total_entries
assert_equal 2, products.limit_value
assert_equal 3, products.offset_value
assert_equal 3, products.offset
assert_equal 3, products.next_page
assert_equal 1, products.previous_page
assert_equal 1, products.prev_page
assert !products.first_page?
assert !products.last_page?
assert !products.empty?
assert !products.out_of_range?
assert products.any?
end
def test_nil_page
store_names ["Product A", "Product B", "Product C", "Product D", "Product E"]
products = Product.search("product", order: {name: :asc}, page: nil, per_page: 2)
assert_equal ["Product A", "Product B"], products.map(&:name)
assert_equal 1, products.current_page
assert products.first_page?
end
def test_strings
store_names ["Product A", "Product B", "Product C", "Product D", "Product E", "Product F"]
products = Product.search("product", order: {name: :asc}, page: "2", per_page: "2", padding: "1")
assert_equal ["Product D", "Product E"], products.map(&:name)
products = Product.search("product", order: {name: :asc}, limit: "2", offset: "3")
assert_equal ["Product D", "Product E"], products.map(&:name)
end
def test_total_entries
products = Product.search("product", total_entries: 4)
assert_equal 4, products.total_entries
end
def test_kaminari
require "action_view"
I18n.load_path = Dir["test/support/kaminari.yml"]
I18n.backend.load_translations
view = ActionView::Base.new(ActionView::LookupContext.new([]), [], nil)
store_names ["Product A"]
assert_equal "Displaying 1 product", view.page_entries_info(Product.search("product"))
store_names ["Product B"]
assert_equal "Displaying all 2 products", view.page_entries_info(Product.search("product"))
store_names ["Product C"]
assert_equal "Displaying products 1 - 2 of 3 in total", view.page_entries_info(Product.search("product").per_page(2))
end
def test_deep_paging
with_options({deep_paging: true}, Song) do
assert_empty Song.search("*", offset: 10000, limit: 1).to_a
end
end
def test_no_deep_paging
Song.reindex
error = assert_raises(Searchkick::InvalidQueryError) do
Song.search("*", offset: 10000, limit: 1).to_a
end
assert_match "Result window is too large", error.message
end
def test_max_result_window
Song.delete_all
with_options({max_result_window: 10000}, Song) do
relation = Song.search("*", offset: 10000, limit: 1)
assert_empty relation.to_a
assert_equal 1, relation.per_page
assert_equal 0, relation.total_pages
end
end
def test_search_after
store_names ["Product A", "Product B", "Product C", "Product D"]
# ensure different created_at
store_names ["Product B"]
options = {order: {name: :asc, created_at: :asc}, per_page: 2}
products = Product.search("product", **options)
assert_equal ["Product A", "Product B"], products.map(&:name)
search_after = products.hits.last["sort"]
products = Product.search("product", body_options: {search_after: search_after}, **options)
assert_equal ["Product B", "Product C"], products.map(&:name)
search_after = products.hits.last["sort"]
products = Product.search("product", body_options: {search_after: search_after}, **options)
assert_equal ["Product D"], products.map(&:name)
end
def test_pit
skip unless pit_supported?
store_names ["Product A", "Product B", "Product D", "Product E", "Product G"]
pit_id =
if Searchkick.opensearch?
path = "#{CGI.escape(Product.searchkick_index.name)}/_search/point_in_time"
Searchkick.client.transport.perform_request("POST", path, {keep_alive: "5s"}).body["pit_id"]
else
Searchkick.client.open_point_in_time(index: Product.searchkick_index.name, keep_alive: "5s")["id"]
end
store_names ["Product C", "Product F"]
options = {
order: {name: :asc},
per_page: 2,
body_options: {pit: {id: pit_id}},
index_name: ""
}
products = Product.search("product", **options)
assert_equal ["Product A", "Product B"], products.map(&:name)
products = Product.search("product", page: 2, **options)
assert_equal ["Product D", "Product E"], products.map(&:name)
products = Product.search("product", page: 3, **options)
assert_equal ["Product G"], products.map(&:name)
products = Product.search("product", page: 4, **options)
assert_empty products.map(&:name)
if Searchkick.opensearch?
Searchkick.client.transport.perform_request("DELETE", "_search/point_in_time", {}, {pit_id: pit_id})
else
Searchkick.client.close_point_in_time(body: {id: pit_id})
end
error = assert_raises do
Product.search("product", **options).load
end
assert_match "No search context found for id", error.message
end
private
def pit_supported?
Searchkick.opensearch? ? !Searchkick.server_below?("2.4.0") : true
end
end
================================================
FILE: test/parameters_test.rb
================================================
require_relative "test_helper"
class ParametersTest < Minitest::Test
def setup
require "action_controller"
super
end
def test_options
params = ActionController::Parameters.new({store_id: 1})
assert_raises(ActionController::UnfilteredParameters) do
Product.search("*", **params)
end
end
def test_where
params = ActionController::Parameters.new({store_id: 1})
assert_raises(ActionController::UnfilteredParameters) do
Product.search("*", where: params)
end
end
def test_where_relation
params = ActionController::Parameters.new({store_id: 1})
assert_raises(ActionController::UnfilteredParameters) do
Product.search("*").where(params)
end
end
def test_rewhere_relation
params = ActionController::Parameters.new({store_id: 1})
assert_raises(ActionController::UnfilteredParameters) do
Product.search("*").where(params)
end
end
def test_where_permitted
store [{name: "Product A", store_id: 1}, {name: "Product B", store_id: 2}]
params = ActionController::Parameters.new({store_id: 1})
assert_search "product", ["Product A"], where: params.permit(:store_id)
end
def test_where_permitted_relation
store [{name: "Product A", store_id: 1}, {name: "Product B", store_id: 2}]
params = ActionController::Parameters.new({store_id: 1})
assert_search_relation ["Product A"], Product.search("product").where(params.permit(:store_id))
end
def test_rewhere_permitted_relation
store [{name: "Product A", store_id: 1}, {name: "Product B", store_id: 2}]
params = ActionController::Parameters.new({store_id: 1})
assert_search_relation ["Product A"], Product.search("product").rewhere(params.permit(:store_id))
end
def test_where_value
store [{name: "Product A", store_id: 1}, {name: "Product B", store_id: 2}]
params = ActionController::Parameters.new({store_id: 1})
assert_search "product", ["Product A"], where: {store_id: params[:store_id]}
end
def test_where_value_relation
store [{name: "Product A", store_id: 1}, {name: "Product B", store_id: 2}]
params = ActionController::Parameters.new({store_id: 1})
assert_search_relation ["Product A"], Product.search("product").where(store_id: params[:store_id])
end
def test_rewhere_value_relation
store [{name: "Product A", store_id: 1}, {name: "Product B", store_id: 2}]
params = ActionController::Parameters.new({store_id: 1})
assert_search_relation ["Product A"], Product.search("product").where(store_id: params[:store_id])
end
def test_where_hash
params = ActionController::Parameters.new({store_id: {value: 10, boost: 2}})
error = assert_raises(TypeError) do
assert_search "product", [], where: {store_id: params[:store_id]}
end
assert_equal error.message, "can't cast ActionController::Parameters"
end
# TODO raise error without to_a
def test_where_hash_relation
params = ActionController::Parameters.new({store_id: {value: 10, boost: 2}})
error = assert_raises(TypeError) do
Product.search("product").where(store_id: params[:store_id]).to_a
end
assert_equal error.message, "can't cast ActionController::Parameters"
end
# TODO raise error without to_a
def test_rewhere_hash_relation
params = ActionController::Parameters.new({store_id: {value: 10, boost: 2}})
error = assert_raises(TypeError) do
Product.search("product").rewhere(store_id: params[:store_id]).to_a
end
assert_equal error.message, "can't cast ActionController::Parameters"
end
def test_aggs_where
params = ActionController::Parameters.new({store_id: 1})
assert_raises(ActionController::UnfilteredParameters) do
Product.search("*", aggs: {size: {where: params}})
end
end
def test_aggs_where_smart_aggs_false
params = ActionController::Parameters.new({store_id: 1})
assert_raises(ActionController::UnfilteredParameters) do
Product.search("*", aggs: {size: {where: params}}, smart_aggs: false)
end
end
end
================================================
FILE: test/partial_match_test.rb
================================================
require_relative "test_helper"
class PartialMatchTest < Minitest::Test
def test_autocomplete
store_names ["Hummus"]
assert_search "hum", ["Hummus"], match: :text_start
end
def test_autocomplete_two_words
store_names ["Organic Hummus"]
assert_search "hum", [], match: :text_start
end
def test_autocomplete_fields
store_names ["Hummus"]
assert_search "hum", ["Hummus"], match: :text_start, fields: [:name]
end
def test_text_start
store_names ["Where in the World is Carmen San Diego"]
assert_search "where in the world is", ["Where in the World is Carmen San Diego"], fields: [{name: :text_start}]
assert_search "in the world", [], fields: [{name: :text_start}]
end
def test_text_middle
store_names ["Where in the World is Carmen San Diego"]
assert_search "where in the world is", ["Where in the World is Carmen San Diego"], fields: [{name: :text_middle}]
assert_search "n the wor", ["Where in the World is Carmen San Diego"], fields: [{name: :text_middle}]
assert_search "men san diego", ["Where in the World is Carmen San Diego"], fields: [{name: :text_middle}]
assert_search "world carmen", [], fields: [{name: :text_middle}]
end
def test_text_end
store_names ["Where in the World is Carmen San Diego"]
assert_search "men san diego", ["Where in the World is Carmen San Diego"], fields: [{name: :text_end}]
assert_search "carmen san", [], fields: [{name: :text_end}]
end
def test_word_start
store_names ["Where in the World is Carmen San Diego"]
assert_search "car san wor", ["Where in the World is Carmen San Diego"], fields: [{name: :word_start}]
end
def test_word_middle
store_names ["Where in the World is Carmen San Diego"]
assert_search "orl", ["Where in the World is Carmen San Diego"], fields: [{name: :word_middle}]
end
def test_word_end
store_names ["Where in the World is Carmen San Diego"]
assert_search "rld men ego", ["Where in the World is Carmen San Diego"], fields: [{name: :word_end}]
end
def test_word_start_multiple_words
store_names ["Dark Grey", "Dark Blue"]
assert_search "dark grey", ["Dark Grey"], fields: [{name: :word_start}]
end
def test_word_start_exact
store_names ["Back Scratcher", "Backpack"]
assert_order "back", ["Back Scratcher", "Backpack"], fields: [{name: :word_start}]
end
def test_word_start_exact_martin
store_names ["Martina", "Martin"]
assert_order "martin", ["Martin", "Martina"], fields: [{name: :word_start}]
end
# TODO find a better place
def test_exact
store_names ["hi@example.org"]
assert_search "hi@example.org", ["hi@example.org"], fields: [{name: :exact}]
end
def test_exact_case
store_names ["Hello"]
assert_search "hello", [], fields: [{name: :exact}]
assert_search "Hello", ["Hello"], fields: [{name: :exact}]
end
end
================================================
FILE: test/partial_reindex_test.rb
================================================
require_relative "test_helper"
class PartialReindexTest < Minitest::Test
def test_record_inline
store [{name: "Hi", color: "Blue"}]
product = Product.first
Searchkick.callbacks(false) do
product.update!(name: "Bye", color: "Red")
end
product.reindex(:search_name, refresh: true)
# name updated, but not color
assert_search "bye", ["Bye"], fields: [:name], load: false
assert_search "blue", ["Bye"], fields: [:color], load: false
end
def test_record_async
store [{name: "Hi", color: "Blue"}]
product = Product.first
Searchkick.callbacks(false) do
product.update!(name: "Bye", color: "Red")
end
perform_enqueued_jobs do
product.reindex(:search_name, mode: :async)
end
Product.searchkick_index.refresh
# name updated, but not color
assert_search "bye", ["Bye"], fields: [:name], load: false
assert_search "blue", ["Bye"], fields: [:color], load: false
end
def test_record_queue
product = Product.create!(name: "Hi")
error = assert_raises(Searchkick::Error) do
product.reindex(:search_name, mode: :queue)
end
assert_equal "Partial reindex not supported with queue option", error.message
end
def test_record_missing_inline
store [{name: "Hi", color: "Blue"}]
product = Product.first
Product.searchkick_index.remove(product)
error = assert_raises(Searchkick::ImportError) do
product.reindex(:search_name)
end
assert_match "document missing", error.message
end
def test_record_ignore_missing_inline
store [{name: "Hi", color: "Blue"}]
product = Product.first
Product.searchkick_index.remove(product)
product.reindex(:search_name, ignore_missing: true)
Searchkick.callbacks(:bulk) do
product.reindex(:search_name, ignore_missing: true)
end
end
def test_record_missing_async
store [{name: "Hi", color: "Blue"}]
product = Product.first
Product.searchkick_index.remove(product)
perform_enqueued_jobs do
error = assert_raises(Searchkick::ImportError) do
product.reindex(:search_name, mode: :async)
end
assert_match "document missing", error.message
end
end
def test_record_ignore_missing_async
store [{name: "Hi", color: "Blue"}]
product = Product.first
Product.searchkick_index.remove(product)
perform_enqueued_jobs do
product.reindex(:search_name, mode: :async, ignore_missing: true)
end
end
def test_relation_inline
store [{name: "Hi", color: "Blue"}]
product = Product.first
Searchkick.callbacks(false) do
product.update!(name: "Bye", color: "Red")
end
Product.reindex(:search_name)
# name updated, but not color
assert_search "bye", ["Bye"], fields: [:name], load: false
assert_search "blue", ["Bye"], fields: [:color], load: false
# scope
Product.reindex(:search_name, scope: :all)
end
def test_relation_async
store [{name: "Hi", color: "Blue"}]
product = Product.first
Searchkick.callbacks(false) do
product.update!(name: "Bye", color: "Red")
end
perform_enqueued_jobs do
Product.reindex(:search_name, mode: :async)
end
# name updated, but not color
assert_search "bye", ["Bye"], fields: [:name], load: false
assert_search "blue", ["Bye"], fields: [:color], load: false
end
def test_relation_queue
Product.create!(name: "Hi")
error = assert_raises(Searchkick::Error) do
Product.reindex(:search_name, mode: :queue)
end
assert_equal "Partial reindex not supported with queue option", error.message
end
def test_relation_missing_inline
store [{name: "Hi", color: "Blue"}]
product = Product.first
Product.searchkick_index.remove(product)
error = assert_raises(Searchkick::ImportError) do
Product.reindex(:search_name)
end
assert_match "document missing", error.message
end
def test_relation_ignore_missing_inline
store [{name: "Hi", color: "Blue"}]
product = Product.first
Product.searchkick_index.remove(product)
Product.where(id: product.id).reindex(:search_name, ignore_missing: true)
end
def test_relation_missing_async
store [{name: "Hi", color: "Blue"}]
product = Product.first
Product.searchkick_index.remove(product)
perform_enqueued_jobs do
error = assert_raises(Searchkick::ImportError) do
Product.reindex(:search_name, mode: :async)
end
assert_match "document missing", error.message
end
end
def test_relation_ignore_missing_async
store [{name: "Hi", color: "Blue"}]
product = Product.first
Product.searchkick_index.remove(product)
perform_enqueued_jobs do
Product.where(id: product.id).reindex(:search_name, mode: :async, ignore_missing: true)
end
end
end
================================================
FILE: test/query_test.rb
================================================
require_relative "test_helper"
class QueryTest < Minitest::Test
def test_basic
store_names ["Milk", "Apple"]
query = Product.search("milk", body: {query: {match_all: {}}})
assert_equal ["Apple", "Milk"], query.map(&:name).sort
end
def test_with_uneffective_min_score
store_names ["Milk", "Milk2"]
assert_search "milk", ["Milk", "Milk2"], body_options: {min_score: 0.0001}
end
def test_default_timeout
assert_equal "6000ms", Product.search("*").body[:timeout]
end
def test_timeout_override
assert_equal "1s", Product.search("*", body_options: {timeout: "1s"}).body[:timeout]
end
def test_request_params
assert_equal "dfs_query_then_fetch", Product.search("*", request_params: {search_type: "dfs_query_then_fetch"}).params[:search_type]
end
def test_opaque_id
store_names ["Milk"]
set_search_slow_log(0)
Product.search("*", opaque_id: "search").load
Product.search("*").opaque_id("search_relation").load
Product.search("*", scroll: "5s", opaque_id: "scroll").scroll { }
Searchkick.multi_search([Product.search("*")], opaque_id: "multi_search")
ensure
set_search_slow_log(-1)
end
def test_debug
store_names ["Milk"]
out, _ = capture_io do
assert_search "milk", ["Milk"], debug: true
end
refute_includes out, "Error"
end
def test_big_decimal
store [
{name: "Product", latitude: 80.0}
]
assert_search "product", ["Product"], where: {latitude: {gt: 79}}
end
# body_options
def test_body_options_should_merge_into_body
query = Product.search("*", body_options: {min_score: 1.0})
assert_equal 1.0, query.body[:min_score]
end
# nested
def test_nested_search
setup_speaker
store [{name: "Product A", aisle: {"id" => 1, "name" => "Frozen"}}], Speaker
assert_search "frozen", ["Product A"], {fields: ["aisle.name"]}, Speaker
end
# other tests
def test_includes
skip unless activerecord?
store_names ["Product A"]
assert Product.search("product", includes: [:store]).first.association(:store).loaded?
assert Product.search("product").includes(:store).first.association(:store).loaded?
end
def test_model_includes
skip unless activerecord?
store_names ["Product A"]
store_names ["Store A"], Store
associations = {Product => [:store], Store => [:products]}
result = Searchkick.search("*", models: [Product, Store], model_includes: associations)
assert_equal 2, result.length
result.group_by(&:class).each_pair do |model, records|
assert records.first.association(associations[model].first).loaded?
end
end
def test_scope_results
skip unless activerecord?
store_names ["Product A", "Product B"]
assert_warns "Records in search index do not exist in database" do
assert_search "product", ["Product A"], scope_results: ->(r) { r.where(name: "Product A") }
end
end
def test_scope_results_relation
skip unless activerecord?
store_names ["Product A", "Product B"]
assert_warns "Records in search index do not exist in database" do
assert_search_relation ["Product A"], Product.search("product").scope_results(->(r) { r.where(name: "Product A") })
end
end
private
def set_search_slow_log(value)
settings = {
"index.search.slowlog.threshold.query.warn" => value
}
Product.searchkick_index.update_settings(settings)
end
end
================================================
FILE: test/reindex_test.rb
================================================
require_relative "test_helper"
class ReindexTest < Minitest::Test
def test_record_inline
store_names ["Product A", "Product B"], reindex: false
product = Product.find_by!(name: "Product A")
assert_equal true, product.reindex(refresh: true)
assert_search "product", ["Product A"]
end
def test_record_destroyed
store_names ["Product A", "Product B"]
product = Product.find_by!(name: "Product A")
product.destroy
Product.searchkick_index.refresh
assert_equal true, product.reindex
end
def test_record_async
store_names ["Product A", "Product B"], reindex: false
product = Product.find_by!(name: "Product A")
perform_enqueued_jobs do
assert_equal true, product.reindex(mode: :async)
end
Product.searchkick_index.refresh
assert_search "product", ["Product A"]
end
def test_record_async_job_options
product = Product.create!(name: "Product A")
assert_enqueued_jobs(1, queue: "test") do
assert_equal true, product.reindex(mode: :async, job_options: {queue: "test"})
end
end
def test_record_queue
reindex_queue = Product.searchkick_index.reindex_queue
reindex_queue.clear
store_names ["Product A", "Product B"], reindex: false
product = Product.find_by!(name: "Product A")
assert_equal true, product.reindex(mode: :queue)
Product.searchkick_index.refresh
assert_search "product", []
perform_enqueued_jobs do
Searchkick::ProcessQueueJob.perform_now(class_name: "Product")
end
Product.searchkick_index.refresh
assert_search "product", ["Product A"]
end
def test_process_queue_job_options
product = Product.create!(name: "Product A")
product.reindex(mode: :queue)
assert_enqueued_jobs(1, queue: "test") do
Searchkick::ProcessQueueJob.perform_now(class_name: "Product", job_options: {queue: "test"})
end
end
def test_record_index
store_names ["Product A", "Product B"], reindex: false
product = Product.find_by!(name: "Product A")
assert_equal true, Product.searchkick_index.reindex([product], refresh: true)
assert_search "product", ["Product A"]
end
def test_relation_inline
store_names ["Product A"]
store_names ["Product B", "Product C"], reindex: false
Product.where(name: "Product B").reindex(refresh: true)
assert_search "product", ["Product A", "Product B"]
end
def test_relation_associations
store_names ["Product A"]
store = Store.create!(name: "Test")
Product.create!(name: "Product B", store_id: store.id)
assert_equal true, store.products.reindex(refresh: true)
assert_search "product", ["Product A", "Product B"]
end
def test_relation_scoping
store_names ["Product A", "Product B"]
Product.dynamic_data = lambda do
{
name: "Count #{Product.count}"
}
end
Product.where(name: "Product A").reindex(refresh: true)
assert_search "count", ["Count 2"], load: false
ensure
Product.dynamic_data = nil
end
def test_relation_scoping_restored
# TODO add test for Mongoid
skip unless activerecord?
assert_nil Product.current_scope
Product.where(name: "Product A").scoping do
scope = Product.current_scope
refute_nil scope
Product.all.reindex(refresh: true)
# note: should be reset even if we don't do it
assert_equal scope, Product.current_scope
end
assert_nil Product.current_scope
end
def test_relation_should_index
store_names ["Product A", "Product B"]
Searchkick.callbacks(false) do
Product.find_by(name: "Product B").update!(name: "DO NOT INDEX")
end
assert_equal true, Product.where(name: "DO NOT INDEX").reindex
Product.searchkick_index.refresh
assert_search "product", ["Product A"]
end
def test_relation_async
store_names ["Product A"]
store_names ["Product B", "Product C"], reindex: false
perform_enqueued_jobs do
Product.where(name: "Product B").reindex(mode: :async)
end
Product.searchkick_index.refresh
assert_search "product", ["Product A", "Product B"]
end
def test_relation_async_should_index
store_names ["Product A", "Product B"]
Searchkick.callbacks(false) do
Product.find_by(name: "Product B").update!(name: "DO NOT INDEX")
end
perform_enqueued_jobs do
assert_equal true, Product.where(name: "DO NOT INDEX").reindex(mode: :async)
end
Product.searchkick_index.refresh
assert_search "product", ["Product A"]
end
def test_relation_async_routing
store_names ["Store A"], Store, reindex: false
perform_enqueued_jobs do
Store.where(name: "Store A").reindex(mode: :async)
end
Store.searchkick_index.refresh
assert_search "*", ["Store A"], {routing: "Store A"}, Store
end
def test_relation_async_job_options
store_names ["Store A"], Store, reindex: false
assert_enqueued_jobs(1, queue: "test") do
Store.where(name: "Store A").reindex(mode: :async, job_options: {queue: "test"})
end
end
def test_relation_queue
reindex_queue = Product.searchkick_index.reindex_queue
reindex_queue.clear
store_names ["Product A"]
store_names ["Product B", "Product C"], reindex: false
Product.where(name: "Product B").reindex(mode: :queue)
Product.searchkick_index.refresh
assert_search "product", ["Product A"]
perform_enqueued_jobs do
Searchkick::ProcessQueueJob.perform_now(class_name: "Product")
end
Product.searchkick_index.refresh
assert_search "product", ["Product A", "Product B"]
end
def test_relation_queue_all
reindex_queue = Product.searchkick_index.reindex_queue
reindex_queue.clear
store_names ["Product A"]
store_names ["Product B", "Product C"], reindex: false
Product.all.reindex(mode: :queue)
Product.searchkick_index.refresh
assert_search "product", ["Product A"]
perform_enqueued_jobs do
Searchkick::ProcessQueueJob.perform_now(class_name: "Product")
end
Product.searchkick_index.refresh
assert_search "product", ["Product A", "Product B", "Product C"]
end
def test_relation_queue_routing
reindex_queue = Store.searchkick_index.reindex_queue
reindex_queue.clear
store_names ["Store A"], Store, reindex: false
Store.where(name: "Store A").reindex(mode: :queue)
Store.searchkick_index.refresh
assert_search "*", [], {}, Store
perform_enqueued_jobs do
Searchkick::ProcessQueueJob.perform_now(class_name: "Store")
end
Store.searchkick_index.refresh
assert_search "*", ["Store A"], {routing: "Store A"}, Store
end
def test_relation_index
store_names ["Product A"]
store_names ["Product B", "Product C"], reindex: false
Product.searchkick_index.reindex(Product.where(name: "Product B"), refresh: true)
assert_search "product", ["Product A", "Product B"]
end
def test_full_async
store_names ["Product A"], reindex: false
reindex = nil
perform_enqueued_jobs do
reindex = Product.reindex(mode: :async)
assert_search "product", [], conversions: false
end
index = Searchkick::Index.new(reindex[:index_name])
index.refresh
assert_equal 1, index.total_docs
reindex_status = Searchkick.reindex_status(reindex[:name])
assert_equal true, reindex_status[:completed]
assert_equal 0, reindex_status[:batches_left]
Product.searchkick_index.promote(reindex[:index_name])
assert_search "product", ["Product A"]
end
def test_full_async_should_index
store_names ["Product A", "Product B", "DO NOT INDEX"], reindex: false
reindex = nil
perform_enqueued_jobs do
reindex = Product.reindex(mode: :async)
end
index = Searchkick::Index.new(reindex[:index_name])
index.refresh
assert_equal 2, index.total_docs
index.delete
end
def test_full_async_wait
store_names ["Product A"], reindex: false
perform_enqueued_jobs do
capture_io do
Product.reindex(mode: :async, wait: true)
end
end
assert_search "product", ["Product A"]
end
def test_full_async_job_options
store_names ["Product A"], reindex: false
assert_enqueued_jobs(1, queue: "test") do
Product.reindex(mode: :async, job_options: {queue: "test"})
end
end
def test_full_async_non_integer_pk
Sku.create(id: SecureRandom.hex, name: "Test")
reindex = nil
perform_enqueued_jobs do
reindex = Sku.reindex(mode: :async)
assert_search "sku", [], conversions: false
end
index = Searchkick::Index.new(reindex[:index_name])
index.refresh
assert_equal 1, index.total_docs
index.delete
ensure
Sku.destroy_all
end
def test_full_queue
error = assert_raises(ArgumentError) do
Product.reindex(mode: :queue)
end
assert_equal "Full reindex does not support :queue mode - use :async mode instead", error.message
end
def test_full_refresh_interval
reindex = Product.reindex(refresh_interval: "30s", mode: :async, import: false)
index = Searchkick::Index.new(reindex[:index_name])
assert_nil Product.searchkick_index.refresh_interval
assert_equal "30s", index.refresh_interval
Product.searchkick_index.promote(index.name, update_refresh_interval: true)
assert_equal "1s", index.refresh_interval
assert_equal "1s", Product.searchkick_index.refresh_interval
end
def test_full_resume
Product.searchkick_index.clean_indices
if mongoid?
error = assert_raises(Searchkick::Error) do
Product.reindex(resume: true)
end
assert_equal "Resume not supported for Mongoid", error.message
else
assert Product.reindex(resume: true)
end
end
def test_full_refresh
Product.reindex(refresh: true)
end
def test_full_partial_async
store_names ["Product A"]
Product.reindex(:search_name, mode: :async)
assert_search "product", ["Product A"]
end
def test_wait_not_async
error = assert_raises(ArgumentError) do
Product.reindex(wait: false)
end
assert_equal "wait only available in :async mode", error.message
end
def test_object_index
error = assert_raises(Searchkick::Error) do
Product.searchkick_index.reindex(Object.new)
end
assert_equal "Cannot reindex object", error.message
end
def test_transaction
skip unless activerecord?
Product.transaction do
store_names ["Product A"]
raise ActiveRecord::Rollback
end
assert_search "*", []
end
def test_both_paths
Product.searchkick_index.delete if Product.searchkick_index.exists?
Product.reindex
Product.reindex # run twice for both index paths
end
end
================================================
FILE: test/reindex_v2_job_test.rb
================================================
require_relative "test_helper"
class ReindexV2JobTest < Minitest::Test
def test_create
product = Searchkick.callbacks(false) { Product.create!(name: "Boom") }
Product.searchkick_index.refresh
assert_search "*", []
Searchkick::ReindexV2Job.perform_now("Product", product.id.to_s)
Product.searchkick_index.refresh
assert_search "*", ["Boom"]
end
def test_destroy
product = Searchkick.callbacks(false) { Product.create!(name: "Boom") }
Product.reindex
assert_search "*", ["Boom"]
Searchkick.callbacks(false) { product.destroy }
Searchkick::ReindexV2Job.perform_now("Product", product.id.to_s)
Product.searchkick_index.refresh
assert_search "*", []
end
end
================================================
FILE: test/relation_test.rb
================================================
require_relative "test_helper"
class RelationTest < Minitest::Test
def test_loaded
Product.searchkick_index.refresh
products = Product.search("*")
refute products.loaded?
assert_equal 0, products.count
assert products.loaded?
refute products.clone.loaded?
refute products.dup.loaded?
refute products.limit(2).loaded?
error = assert_raises(Searchkick::Error) do
products.limit!(2)
end
assert_equal "Relation loaded", error.message
end
def test_mutating
store_names ["Product A", "Product B"]
products = Product.search("*").order(:name)
products.limit!(1)
assert_equal ["Product A"], products.map(&:name)
end
def test_non_mutating
store_names ["Product A", "Product B"]
products = Product.search("*").order(:name)
products.limit(1)
assert_equal ["Product A", "Product B"], products.map(&:name)
end
def test_load
products = Product.search("*")
refute products.loaded?
assert products.load.loaded?
assert products.load.load.loaded?
end
def test_clone
products = Product.search("*")
assert_equal 10, products.limit(10).limit_value
assert_equal 10000, products.limit_value
end
def test_only
assert_equal 10, Product.search("*").limit(10).only(:limit).limit_value
end
def test_except
assert_equal 10000, Product.search("*").limit(10).except(:limit).limit_value
end
def test_first
store_names ["Product A", "Product B"]
products = Product.search("product")
assert_kind_of Product, products.first
assert_kind_of Array, products.first(1)
assert_equal 1, products.limit(1).first(2).size
end
def test_first_loaded
store_names ["Product A", "Product B"]
products = Product.search("product").load
assert_kind_of Product, products.first
end
# TODO call pluck or select on Active Record query
# currently uses pluck from Active Support enumerable
def test_pluck
store_names ["Product A", "Product B"]
assert_equal ["Product A", "Product B"], Product.search("product").pluck(:name).sort
assert_equal ["Product A", "Product B"], Product.search("product").load(false).pluck(:name).sort
end
def test_model
assert_equal Product, Product.search("product").model
assert_nil Searchkick.search("product").model
end
def test_klass
assert_equal Product, Product.search("product").klass
assert_nil Searchkick.search("product").klass
end
def test_respond_to
relation = Product.search("product")
assert relation.respond_to?(:page)
assert relation.respond_to?(:response)
assert relation.respond_to?(:size)
refute relation.respond_to?(:hello)
refute relation.loaded?
end
def test_inspect
store_names ["Product A"]
assert_match "# :word_start}]
end
def test_multiple_models
skip # flaky test
store_names ["Great White Shark", "Hammerhead Shark", "Tiger Shark"]
assert_equal "how big is a tiger shark", Searchkick.search("How Big is a Tigre Shar", suggest: [:name], fields: [:name]).suggestions.first
end
def test_multiple_models_no_fields
store_names ["Great White Shark", "Hammerhead Shark", "Tiger Shark"]
assert_raises(ArgumentError) { Searchkick.search("How Big is a Tigre Shar", suggest: true) }
end
def test_star
assert_equal [], Product.search("*", suggest: true).suggestions
end
protected
def assert_suggest(term, expected, options = {})
result = Product.search(term, suggest: true, **options).suggestions.first
if expected.nil?
assert_nil result
else
assert_equal expected, result
end
end
# any order
def assert_suggest_all(term, expected, options = {})
assert_equal expected.sort, Product.search(term, suggest: true, **options).suggestions.sort
end
end
================================================
FILE: test/support/activerecord.rb
================================================
require "active_record"
# for debugging
ActiveRecord::Base.logger = $logger
# rails does this in activerecord/lib/active_record/railtie.rb
ActiveRecord.default_timezone = :utc
ActiveRecord::Base.time_zone_aware_attributes = true
# migrations
ActiveRecord::Base.establish_connection adapter: "sqlite3", database: ":memory:"
require_relative "apartment" if defined?(Apartment)
ActiveRecord::Migration.verbose = ENV["VERBOSE"]
ActiveRecord::Schema.define do
create_table :products do |t|
t.string :name
t.integer :store_id
t.boolean :in_stock
t.boolean :backordered
t.integer :orders_count
t.decimal :found_rate
t.integer :price
t.string :color
t.decimal :latitude, precision: 10, scale: 7
t.decimal :longitude, precision: 10, scale: 7
t.text :description
t.text :alt_description
t.text :embedding
t.text :embedding2
t.text :embedding3
t.text :embedding4
t.timestamps null: true
end
create_table :stores do |t|
t.string :name
end
create_table :regions do |t|
t.string :name
t.text :text
end
create_table :speakers do |t|
t.string :name
end
create_table :animals do |t|
t.string :name
t.string :type
end
create_table :skus, id: :uuid do |t|
t.string :name
end
create_table :songs do |t|
t.string :name
end
create_table :bands do |t|
t.string :name
t.boolean :active
end
create_table :artists do |t|
t.string :name
t.boolean :active
t.boolean :should_index
end
end
class Product < ActiveRecord::Base
belongs_to :store
serialize :embedding, coder: JSON
serialize :embedding2, coder: JSON
serialize :embedding3, coder: JSON
serialize :embedding4, coder: JSON
end
class Store < ActiveRecord::Base
has_many :products
end
class Region < ActiveRecord::Base
end
class Speaker < ActiveRecord::Base
end
class Animal < ActiveRecord::Base
end
class Dog < Animal
end
class Cat < Animal
end
class Sku < ActiveRecord::Base
end
class Song < ActiveRecord::Base
end
class Band < ActiveRecord::Base
default_scope { where(active: true).order(:name) }
end
class Artist < ActiveRecord::Base
default_scope { where(active: true).order(:name) }
end
================================================
FILE: test/support/apartment.rb
================================================
module Rails
def self.env
ENV["RACK_ENV"]
end
end
tenants = ["tenant1", "tenant2"]
Apartment.configure do |config|
config.tenant_names = tenants
config.database_schema_file = false
config.excluded_models = ["Product", "Store", "Region", "Speaker", "Animal", "Dog", "Cat", "Sku", "Song", "Band"]
end
class Tenant < ActiveRecord::Base
searchkick index_prefix: -> { Apartment::Tenant.current }
end
tenants.each do |tenant|
begin
Apartment::Tenant.create(tenant)
rescue Apartment::TenantExists
# do nothing
end
Apartment::Tenant.switch!(tenant)
ActiveRecord::Schema.define do
create_table :tenants, force: true do |t|
t.string :name
t.timestamps null: true
end
end
Tenant.reindex
end
Apartment::Tenant.reset
================================================
FILE: test/support/helpers.rb
================================================
class Minitest::Test
include ActiveJob::TestHelper
def setup
[Product, Store].each do |model|
setup_model(model)
end
end
protected
def setup_animal
setup_model(Animal)
end
def setup_region
setup_model(Region)
end
def setup_speaker
setup_model(Speaker)
end
def setup_model(model)
# reindex once
($setup_model ||= {})[model] ||= (model.reindex || true)
# clear every time
Searchkick.callbacks(:bulk) do
model.destroy_all
end
end
def store(documents, model = default_model, reindex: true)
if reindex
with_callbacks(:bulk) do
with_transaction(model) do
model.create!(documents.shuffle)
end
end
model.searchkick_index.refresh
else
Searchkick.callbacks(false) do
with_transaction(model) do
model.create!(documents.shuffle)
end
end
# prevent warnings
model.searchkick_index.refresh
end
end
def store_names(names, model = default_model, reindex: true)
store names.map { |name| {name: name} }, model, reindex: reindex
end
# no order
def assert_search(term, expected, options = {}, model = default_model)
assert_equal expected.sort, model.search(term, **options).map(&:name).sort
assert_equal expected.sort, build_relation(model, term, **options).map(&:name).sort
end
def assert_search_relation(expected, relation)
assert_equal expected.sort, relation.map(&:name).sort
end
def assert_order(term, expected, options = {}, model = default_model)
assert_equal expected, model.search(term, **options).map(&:name)
assert_equal expected, build_relation(model, term, **options).map(&:name)
end
def assert_order_relation(expected, relation)
assert_equal expected, relation.map(&:name)
end
def assert_equal_scores(term, options = {}, model = default_model)
assert_equal 1, model.search(term, **options).hits.map { |a| a["_score"] }.uniq.size
end
def assert_first(term, expected, options = {}, model = default_model)
assert_equal expected, model.search(term, **options).map(&:name).first
end
def assert_warns(message)
_, stderr = capture_io do
yield
end
assert_match "[searchkick] WARNING: #{message}", stderr
end
def build_relation(model, term, **options)
relation = model.search(term)
options.each do |k, v|
relation = relation.public_send(k, v)
end
relation
end
def with_options(options, model = default_model)
previous_options = model.searchkick_options.dup
begin
model.instance_variable_set(:@searchkick_index_name, nil)
model.searchkick_options.merge!(options)
model.reindex
yield
ensure
model.instance_variable_set(:@searchkick_index_name, nil)
model.searchkick_options.clear
model.searchkick_options.merge!(previous_options)
end
end
def with_callbacks(value, &block)
if Searchkick.callbacks?(default: nil).nil?
Searchkick.callbacks(value, &block)
else
yield
end
end
def with_transaction(model, &block)
if model.respond_to?(:transaction) && !mongoid?
model.transaction(&block)
else
yield
end
end
def activerecord?
defined?(ActiveRecord)
end
def mongoid?
defined?(Mongoid)
end
def default_model
Product
end
def ci?
ENV["CI"]
end
# for Active Job helpers
def tagged_logger
end
end
================================================
FILE: test/support/kaminari.yml
================================================
en:
views:
pagination:
first: "« First"
last: "Last »"
previous: "‹ Prev"
next: "Next ›"
truncate: "…"
helpers:
page_entries_info:
entry:
zero: "entries"
one: "entry"
other: "entries"
one_page:
display_entries:
zero: "No %{entry_name} found"
one: "Displaying 1 %{entry_name}"
other: "Displaying all %{count} %{entry_name}"
more_pages:
display_entries: "Displaying %{entry_name} %{first} - %{last} of %{total} in total"
================================================
FILE: test/support/mongoid.rb
================================================
Mongoid.logger = $logger
Mongo::Logger.logger = $logger if defined?(Mongo::Logger)
Mongoid.configure do |config|
config.connect_to "searchkick_test", server_selection_timeout: 1
end
class Product
include Mongoid::Document
include Mongoid::Timestamps
field :name
field :store_id, type: Integer
field :in_stock, type: Boolean
field :backordered, type: Boolean
field :orders_count, type: Integer
field :found_rate, type: BigDecimal
field :price, type: Integer
field :color
field :latitude, type: BigDecimal
field :longitude, type: BigDecimal
field :description
field :alt_description
field :embedding, type: Array
field :embedding2, type: Array
field :embedding3, type: Array
field :embedding4, type: Array
end
class Store
include Mongoid::Document
has_many :products
field :name
end
class Region
include Mongoid::Document
field :name
field :text
end
class Speaker
include Mongoid::Document
field :name
end
class Animal
include Mongoid::Document
field :name
end
class Dog < Animal
end
class Cat < Animal
end
class Sku
include Mongoid::Document
field :name
end
class Song
include Mongoid::Document
field :name
end
class Band
include Mongoid::Document
field :name
field :active, type: Mongoid::Boolean
default_scope -> { where(active: true).order(name: 1) }
end
class Artist
include Mongoid::Document
field :name
field :active, type: Mongoid::Boolean
field :should_index, type: Mongoid::Boolean
default_scope -> { where(active: true).order(name: 1) }
end
================================================
FILE: test/support/redis.rb
================================================
options = {}
options[:logger] = $logger if !defined?(RedisClient)
Searchkick.redis =
if !defined?(Redis)
RedisClient.config.new_pool
elsif defined?(ConnectionPool)
ConnectionPool.new { Redis.new(**options) }
else
Redis.new(**options)
end
module RedisInstrumentation
def call(command, redis_config)
$logger.info "[redis] #{command.inspect}"
super
end
def call_pipelined(commands, redis_config)
$logger.info "[redis] #{commands.inspect}"
super
end
end
RedisClient.register(RedisInstrumentation) if defined?(RedisClient)
================================================
FILE: test/synonyms_test.rb
================================================
require_relative "test_helper"
class SynonymsTest < Minitest::Test
def test_bleach
store_names ["Clorox Bleach", "Kroger Bleach"]
assert_search "clorox", ["Clorox Bleach", "Kroger Bleach"]
end
def test_burger_buns
store_names ["Hamburger Buns"]
assert_search "burger buns", ["Hamburger Buns"]
end
def test_bandaids
store_names ["Band-Aid", "Kroger 12-Pack Bandages"]
assert_search "bandaids", ["Band-Aid", "Kroger 12-Pack Bandages"]
end
def test_reverse
store_names ["Hamburger"]
assert_search "burger", ["Hamburger"]
end
def test_stemmed
store_names ["Burger"]
assert_search "hamburgers", ["Burger"]
end
def test_word_start
store_names ["Clorox Bleach", "Kroger Bleach"]
assert_search "clorox", ["Clorox Bleach", "Kroger Bleach"], fields: [{name: :word_start}]
end
def test_directional
store_names ["Lightbulb", "Green Onions", "Led"]
assert_search "led", ["Lightbulb", "Led"]
assert_search "Lightbulb", ["Lightbulb"]
assert_search "Halogen Lamp", ["Lightbulb"]
assert_search "onions", ["Green Onions"]
end
def test_case
store_names ["Uppercase"]
assert_search "lowercase", ["Uppercase"]
end
end
================================================
FILE: test/test_helper.rb
================================================
require "bundler/setup"
Bundler.require(:default)
require "minitest/autorun"
require "active_support/notifications"
ENV["RACK_ENV"] = "test"
# for reloadable synonyms
if ENV["CI"]
ENV["ES_PATH"] ||= File.join(ENV["HOME"], Searchkick.opensearch? ? "opensearch" : "elasticsearch", Searchkick.server_version)
end
$logger = ActiveSupport::Logger.new(ENV["VERBOSE"] ? STDOUT : nil)
if ENV["LOG_TRANSPORT"]
transport_logger = ActiveSupport::Logger.new(STDOUT)
if Searchkick.client.transport.respond_to?(:transport)
Searchkick.client.transport.transport.logger = transport_logger
else
Searchkick.client.transport.logger = transport_logger
end
end
Searchkick.search_timeout = 5
Searchkick.index_suffix = ENV["TEST_ENV_NUMBER"] # for parallel tests
puts "Running against #{Searchkick.opensearch? ? "OpenSearch" : "Elasticsearch"} #{Searchkick.server_version}"
I18n.config.enforce_available_locales = true
ActiveJob::Base.logger = $logger
ActiveJob::Base.queue_adapter = :test
ActiveSupport::LogSubscriber.logger = ActiveSupport::Logger.new(STDOUT) if ENV["VERBOSE"]
if defined?(Mongoid)
require_relative "support/mongoid"
else
require_relative "support/activerecord"
end
require_relative "support/redis"
# models
Dir["#{__dir__}/models/*"].each do |file|
require file
end
require_relative "support/helpers"
================================================
FILE: test/unscope_test.rb
================================================
require_relative "test_helper"
class UnscopeTest < Minitest::Test
def setup
@@once ||= Artist.reindex
Artist.unscoped.destroy_all
end
def test_reindex
create_records
Artist.reindex
assert_search "*", ["Test", "Test 2"]
assert_search "*", ["Test", "Test 2"], {load: false}
end
def test_relation_async
create_records
perform_enqueued_jobs do
Artist.unscoped.reindex(mode: :async)
end
Artist.searchkick_index.refresh
assert_search "*", ["Test", "Test 2"]
end
def create_records
store [
{name: "Test", active: true, should_index: true},
{name: "Test 2", active: false, should_index: true},
{name: "Test 3", active: false, should_index: false}
], reindex: false
end
def default_model
Artist
end
end
================================================
FILE: test/where_test.rb
================================================
require_relative "test_helper"
class WhereTest < Minitest::Test
def test_where
now = Time.now
store [
{name: "Product A", store_id: 1, in_stock: true, backordered: true, created_at: now, orders_count: 4, user_ids: [1, 2, 3]},
{name: "Product B", store_id: 2, in_stock: true, backordered: false, created_at: now - 1, orders_count: 3, user_ids: [1]},
{name: "Product C", store_id: 3, in_stock: false, backordered: true, created_at: now - 2, orders_count: 2, user_ids: [1, 3]},
{name: "Product D", store_id: 4, in_stock: false, backordered: false, created_at: now - 3, orders_count: 1}
]
assert_search "product", ["Product A", "Product B"], where: {in_stock: true}
# arrays
assert_search "product", ["Product A"], where: {user_ids: 2}
assert_search "product", ["Product A", "Product C"], where: {user_ids: [2, 3]}
# date
assert_search "product", ["Product A"], where: {created_at: {gt: now - 1}}
assert_search "product", ["Product A", "Product B"], where: {created_at: {gte: now - 1}}
assert_search "product", ["Product D"], where: {created_at: {lt: now - 2}}
assert_search "product", ["Product C", "Product D"], where: {created_at: {lte: now - 2}}
# integer
assert_search "product", ["Product A"], where: {store_id: {lt: 2}}
assert_search "product", ["Product A", "Product B"], where: {store_id: {lte: 2}}
assert_search "product", ["Product D"], where: {store_id: {gt: 3}}
assert_search "product", ["Product C", "Product D"], where: {store_id: {gte: 3}}
# range
assert_search "product", ["Product A", "Product B"], where: {store_id: 1..2}
assert_search "product", ["Product A"], where: {store_id: 1...2}
assert_search "product", ["Product A", "Product B"], where: {store_id: [1, 2]}
assert_search "product", ["Product B", "Product C", "Product D"], where: {store_id: {not: 1}}
assert_search "product", ["Product B", "Product C", "Product D"], where: {store_id: {_not: 1}}
assert_search "product", ["Product C", "Product D"], where: {store_id: {not: [1, 2]}}
assert_search "product", ["Product C", "Product D"], where: {store_id: {_not: [1, 2]}}
assert_search "product", ["Product A"], where: {user_ids: {lte: 2, gte: 2}}
assert_search "product", ["Product A", "Product B", "Product C", "Product D"], where: {store_id: -Float::INFINITY..Float::INFINITY}
assert_search "product", ["Product C", "Product D"], where: {store_id: 3..Float::INFINITY}
assert_search "product", ["Product A", "Product B"], where: {store_id: -Float::INFINITY..2}
assert_search "product", ["Product C", "Product D"], where: {store_id: 3..}
assert_search "product", ["Product A", "Product B"], where: {store_id: ..2}
assert_search "product", ["Product A", "Product B"], where: {store_id: ...3}
# or
assert_search "product", ["Product A", "Product B", "Product C"], where: {or: [[{in_stock: true}, {store_id: 3}]]}
assert_search "product", ["Product A", "Product B", "Product C"], where: {or: [[{orders_count: [2, 4]}, {store_id: [1, 2]}]]}
assert_search "product", ["Product A", "Product D"], where: {or: [[{orders_count: 1}, {created_at: {gte: now - 1}, backordered: true}]]}
# _or
assert_search "product", ["Product A", "Product B", "Product C"], where: {_or: [{in_stock: true}, {store_id: 3}]}
assert_search "product", ["Product A", "Product B", "Product C"], where: {_or: [{orders_count: [2, 4]}, {store_id: [1, 2]}]}
assert_search "product", ["Product A", "Product D"], where: {_or: [{orders_count: 1}, {created_at: {gte: now - 1}, backordered: true}]}
# _and
assert_search "product", ["Product A"], where: {_and: [{in_stock: true}, {backordered: true}]}
# _not
assert_search "product", ["Product B", "Product C"], where: {_not: {_or: [{orders_count: 1}, {created_at: {gte: now - 1}, backordered: true}]}}
# all
assert_search "product", ["Product A", "Product C"], where: {user_ids: {all: [1, 3]}}
assert_search "product", [], where: {user_ids: {all: [1, 2, 3, 4]}}
# any / nested terms
assert_search "product", ["Product B", "Product C"], where: {user_ids: {not: [2], in: [1, 3]}}
assert_search "product", ["Product B", "Product C"], where: {user_ids: {_not: [2], in: [1, 3]}}
# not
assert_search "product", ["Product D"], where: {user_ids: nil}
assert_search "product", ["Product A", "Product B", "Product C"], where: {user_ids: {not: nil}}
assert_search "product", ["Product A", "Product B", "Product C"], where: {user_ids: {_not: nil}}
assert_search "product", ["Product A", "Product C", "Product D"], where: {user_ids: [3, nil]}
assert_search "product", ["Product B"], where: {user_ids: {not: [3, nil]}}
assert_search "product", ["Product B"], where: {user_ids: {_not: [3, nil]}}
end
def test_relation
now = Time.now
store [
{name: "Product A", store_id: 1, in_stock: true, backordered: true, created_at: now, orders_count: 4, user_ids: [1, 2, 3]},
{name: "Product B", store_id: 2, in_stock: true, backordered: false, created_at: now - 1, orders_count: 3, user_ids: [1]},
{name: "Product C", store_id: 3, in_stock: false, backordered: true, created_at: now - 2, orders_count: 2, user_ids: [1, 3]},
{name: "Product D", store_id: 4, in_stock: false, backordered: false, created_at: now - 3, orders_count: 1}
]
assert_search_relation ["Product A", "Product B"], Product.search("product").where(in_stock: true)
# multiple where
assert_search_relation ["Product A"], Product.search("product").where(in_stock: true).where(backordered: true)
assert_search_relation ["Product A"], Product.search("product").where.not(store_id: 2).where.not(store_id: 3).where.not(store_id: 4)
assert_search_relation [], Product.search("product").where(in_stock: true).where(in_stock: false)
assert_search_relation [], Product.search("product").where(in_stock: true).where("in_stock" => false)
# rewhere
assert_search_relation ["Product A", "Product C"], Product.search("product").where(in_stock: true).rewhere(backordered: true)
# not
assert_search_relation ["Product C", "Product D"], Product.search("product").where.not(in_stock: true)
assert_search_relation ["Product C"], Product.search("product").where.not(in_stock: true).where(backordered: true)
assert_search_relation ["Product A", "Product C"], Product.search("product").where.not(store_id: [2, 4])
# compound
assert_search_relation ["Product B", "Product C"], Product.search("product").where(_or: [{in_stock: true}, {backordered: true}]).where(_or: [{store_id: 2}, {orders_count: 2}])
end
def test_string_operators
error = assert_raises(ArgumentError) do
assert_search "product", [], where: {store_id: {"lt" => 2}}
end
assert_includes error.message, "Unknown where operator"
end
def test_unknown_operator
error = assert_raises(ArgumentError) do
assert_search "product", [], where: {store_id: {contains: "%2%"}}
end
assert_includes error.message, "Unknown where operator"
end
def test_regexp
store_names ["Product A"]
assert_search "*", ["Product A"], where: {name: /\APro.+\z/}
end
def test_alternate_regexp
store_names ["Product A", "Item B"]
assert_search "*", ["Product A"], where: {name: {regexp: "Pro.+"}}
end
def test_special_regexp
store_names ["Product ", "Item "]
assert_search "*", ["Product "], where: {name: /\APro.+<.+\z/}
end
def test_regexp_not_anchored
store_names ["abcde"]
assert_search "*", ["abcde"], where: {name: /abcd/}
assert_search "*", ["abcde"], where: {name: /bcde/}
assert_search "*", ["abcde"], where: {name: /abcde/}
assert_search "*", ["abcde"], where: {name: /.*bcd.*/}
end
def test_regexp_anchored
store_names ["abcde"]
assert_search "*", ["abcde"], where: {name: /\Aabcde\z/}
assert_search "*", ["abcde"], where: {name: /\Aabc/}
assert_search "*", ["abcde"], where: {name: /cde\z/}
assert_search "*", [], where: {name: /\Abcd/}
assert_search "*", [], where: {name: /bcd\z/}
end
def test_regexp_case
store_names ["abcde"]
assert_search "*", [], where: {name: /\AABCDE\z/}
assert_search "*", ["abcde"], where: {name: /\AABCDE\z/i}
end
def test_prefix
store_names ["Product A", "Product B", "Item C"]
assert_search "*", ["Product A", "Product B"], where: {name: {prefix: "Pro"}}
end
def test_exists
store [
{name: "Product A", user_ids: [1, 2]},
{name: "Product B"}
]
assert_search "product", ["Product A"], where: {user_ids: {exists: true}}
assert_search "product", ["Product B"], where: {user_ids: {exists: false}}
error = assert_raises(ArgumentError) do
assert_search "product", ["Product A"], where: {user_ids: {exists: nil}}
end
assert_equal "Passing a value other than true or false to exists is not supported", error.message
end
def test_like
store_names ["Product ABC", "Product DEF"]
assert_search "product", ["Product ABC"], where: {name: {like: "%ABC%"}}
assert_search "product", ["Product ABC"], where: {name: {like: "%ABC"}}
assert_search "product", [], where: {name: {like: "ABC"}}
assert_search "product", [], where: {name: {like: "ABC%"}}
assert_search "product", [], where: {name: {like: "ABC%"}}
assert_search "product", ["Product ABC"], where: {name: {like: "Product_ABC"}}
end
def test_like_escape
store_names ["Product 100%", "Product 1000"]
assert_search "product", ["Product 100%"], where: {name: {like: "% 100\\%"}}
end
def test_like_special_characters
store_names [
"Product ABC", "Product.ABC", "Product?ABC", "Product+ABC", "Product*ABC", "Product|ABC",
"Product{ABC}", "Product[ABC]", "Product(ABC)", "Product\"ABC\"", "Product\\ABC"
]
assert_search "*", ["Product.ABC"], where: {name: {like: "Product.A%"}}
assert_search "*", ["Product?ABC"], where: {name: {like: "Product?A%"}}
assert_search "*", ["Product+ABC"], where: {name: {like: "Product+A%"}}
assert_search "*", ["Product*ABC"], where: {name: {like: "Product*A%"}}
assert_search "*", ["Product|ABC"], where: {name: {like: "Product|A%"}}
assert_search "*", ["Product{ABC}"], where: {name: {like: "%{ABC}"}}
assert_search "*", ["Product[ABC]"], where: {name: {like: "%[ABC]"}}
assert_search "*", ["Product(ABC)"], where: {name: {like: "%(ABC)"}}
assert_search "*", ["Product\"ABC\""], where: {name: {like: "%\"ABC\""}}
assert_search "*", ["Product\\ABC"], where: {name: {like: "Product\\A%"}}
end
def test_like_optional_operators
store_names ["Product A&B", "Product B", "Product <3", "Product @Home"]
assert_search "product", ["Product A&B"], where: {name: {like: "%A&B"}}
assert_search "product", ["Product <3"], where: {name: {like: "%<%"}}
assert_search "product", ["Product @Home"], where: {name: {like: "%@Home%"}}
end
def test_ilike
store_names ["Product ABC", "Product DEF"]
assert_search "product", ["Product ABC"], where: {name: {ilike: "%abc%"}}
assert_search "product", ["Product ABC"], where: {name: {ilike: "%abc"}}
assert_search "product", [], where: {name: {ilike: "abc"}}
assert_search "product", [], where: {name: {ilike: "abc%"}}
assert_search "product", [], where: {name: {ilike: "abc%"}}
assert_search "product", ["Product ABC"], where: {name: {ilike: "Product_abc"}}
end
def test_ilike_escape
store_names ["Product 100%", "Product B"]
assert_search "product", ["Product 100%"], where: {name: {ilike: "% 100\\%"}}
end
def test_ilike_special_characters
store_names ["Product ABC\"", "Product B"]
assert_search "product", ["Product ABC\""], where: {name: {ilike: "%abc\""}}
end
def test_ilike_optional_operators
store_names ["Product A&B", "Product B", "Product <3", "Product @Home"]
assert_search "product", ["Product A&B"], where: {name: {ilike: "%a&b"}}
assert_search "product", ["Product <3"], where: {name: {ilike: "%<%"}}
assert_search "product", ["Product @Home"], where: {name: {ilike: "%@home%"}}
end
def test_script
store [
{name: "Product A", store_id: 1},
{name: "Product B", store_id: 10}
]
assert_search "product", ["Product A"], where: {_script: Searchkick.script("doc['store_id'].value < 10")}
assert_search "product", ["Product A"], where: {_script: Searchkick.script("doc['store_id'].value < 10", lang: "expression")}
assert_search "product", ["Product A"], where: {_script: Searchkick.script("doc['store_id'].value < params['value']", params: {value: 10})}
end
def test_script_string
error = assert_raises(TypeError) do
assert_search "product", ["Product A"], where: {_script: "doc['store_id'].value < 10"}
end
assert_equal "expected Searchkick::Script", error.message
end
def test_string
store [
{name: "Product A", color: "RED"}
]
assert_search "product", ["Product A"], where: {color: "RED"}
end
def test_nil
store [
{name: "Product A"},
{name: "Product B", color: "red"}
]
assert_search "product", ["Product A"], where: {color: nil}
end
def test_id
store_names ["Product A"]
product = Product.first
assert_search "product", ["Product A"], where: {id: product.id.to_s}
end
def test_empty
store_names ["Product A"]
assert_search "product", ["Product A"], where: {}
end
def test_empty_array
store_names ["Product A"]
assert_search "product", [], where: {store_id: []}
end
# https://discuss.elastic.co/t/numeric-range-quey-or-filter-in-an-array-field-possible-or-not/14053
# https://gist.github.com/jprante/7099463
def test_range_array
store [
{name: "Product A", user_ids: [11, 23, 13, 16, 17, 23]},
{name: "Product B", user_ids: [1, 2, 3, 4, 5, 6, 7, 8, 9]},
{name: "Product C", user_ids: [101, 230, 150, 200]}
]
assert_search "product", ["Product A"], where: {user_ids: {gt: 10, lt: 24}}
end
def test_range_array_again
store [
{name: "Product A", user_ids: [19, 32, 42]},
{name: "Product B", user_ids: [13, 40, 52]}
]
assert_search "product", ["Product A"], where: {user_ids: {gt: 26, lt: 36}}
end
def test_near
store [
{name: "San Francisco", latitude: 37.7833, longitude: -122.4167},
{name: "San Antonio", latitude: 29.4167, longitude: -98.5000}
]
assert_search "san", ["San Francisco"], where: {location: {near: [37.5, -122.5]}}
end
def test_near_hash
store [
{name: "San Francisco", latitude: 37.7833, longitude: -122.4167},
{name: "San Antonio", latitude: 29.4167, longitude: -98.5000}
]
assert_search "san", ["San Francisco"], where: {location: {near: {lat: 37.5, lon: -122.5}}}
end
def test_near_within
store [
{name: "San Francisco", latitude: 37.7833, longitude: -122.4167},
{name: "San Antonio", latitude: 29.4167, longitude: -98.5000},
{name: "San Marino", latitude: 43.9333, longitude: 12.4667}
]
assert_search "san", ["San Francisco", "San Antonio"], where: {location: {near: [37, -122], within: "2000mi"}}
end
def test_near_within_hash
store [
{name: "San Francisco", latitude: 37.7833, longitude: -122.4167},
{name: "San Antonio", latitude: 29.4167, longitude: -98.5000},
{name: "San Marino", latitude: 43.9333, longitude: 12.4667}
]
assert_search "san", ["San Francisco", "San Antonio"], where: {location: {near: {lat: 37, lon: -122}, within: "2000mi"}}
end
def test_geo_polygon
store [
{name: "San Francisco", latitude: 37.7833, longitude: -122.4167},
{name: "San Antonio", latitude: 29.4167, longitude: -98.5000},
{name: "San Marino", latitude: 43.9333, longitude: 12.4667}
]
polygon = [
{lat: 42.185695, lon: -125.496146},
{lat: 42.185695, lon: -94.125535},
{lat: 27.122789, lon: -94.125535},
{lat: 27.12278, lon: -125.496146}
]
assert_search "san", ["San Francisco", "San Antonio"], where: {location: {geo_polygon: {points: polygon}}}
polygon << polygon.first
assert_search "san", ["San Francisco", "San Antonio"], where: {location: {geo_shape: {type: "polygon", coordinates: [polygon]}}}
end
def test_top_left_bottom_right
store [
{name: "San Francisco", latitude: 37.7833, longitude: -122.4167},
{name: "San Antonio", latitude: 29.4167, longitude: -98.5000}
]
assert_search "san", ["San Francisco"], where: {location: {top_left: [38, -123], bottom_right: [37, -122]}}
end
def test_top_left_bottom_right_hash
store [
{name: "San Francisco", latitude: 37.7833, longitude: -122.4167},
{name: "San Antonio", latitude: 29.4167, longitude: -98.5000}
]
assert_search "san", ["San Francisco"], where: {location: {top_left: {lat: 38, lon: -123}, bottom_right: {lat: 37, lon: -122}}}
end
def test_top_right_bottom_left
store [
{name: "San Francisco", latitude: 37.7833, longitude: -122.4167},
{name: "San Antonio", latitude: 29.4167, longitude: -98.5000}
]
assert_search "san", ["San Francisco"], where: {location: {top_right: [38, -122], bottom_left: [37, -123]}}
end
def test_top_right_bottom_left_hash
store [
{name: "San Francisco", latitude: 37.7833, longitude: -122.4167},
{name: "San Antonio", latitude: 29.4167, longitude: -98.5000}
]
assert_search "san", ["San Francisco"], where: {location: {top_right: {lat: 38, lon: -122}, bottom_left: {lat: 37, lon: -123}}}
end
def test_multiple_locations
store [
{name: "San Francisco", latitude: 37.7833, longitude: -122.4167},
{name: "San Antonio", latitude: 29.4167, longitude: -98.5000}
]
assert_search "san", ["San Francisco"], where: {multiple_locations: {near: [37.5, -122.5]}}
end
def test_multiple_locations_with_term_filter
store [
{name: "San Francisco", latitude: 37.7833, longitude: -122.4167},
{name: "San Antonio", latitude: 29.4167, longitude: -98.5000}
]
assert_search "san", [], where: {multiple_locations: {near: [37.5, -122.5]}, name: "San Antonio"}
assert_search "san", ["San Francisco"], where: {multiple_locations: {near: [37.5, -122.5]}, name: "San Francisco"}
end
def test_multiple_locations_hash
store [
{name: "San Francisco", latitude: 37.7833, longitude: -122.4167},
{name: "San Antonio", latitude: 29.4167, longitude: -98.5000}
]
assert_search "san", ["San Francisco"], where: {multiple_locations: {near: {lat: 37.5, lon: -122.5}}}
end
def test_nested
store [
{name: "Product A", details: {year: 2016}}
]
assert_search "product", ["Product A"], where: {"details.year" => 2016}
end
end