Showing preview only (958K chars total). Download the full file or copy to clipboard to get everything.
Repository: olivernn/lunr.js
Branch: master
Commit: aa5a878f62a6
Files: 77
Total size: 924.1 KB
Directory structure:
gitextract_xvibk5q7/
├── .eslintrc.json
├── .gitignore
├── .travis.yml
├── CHANGELOG.md
├── CNAME
├── CONTRIBUTING.md
├── LICENSE
├── Makefile
├── README.md
├── VERSION
├── build/
│ ├── bower.json.template
│ ├── component.json.template
│ ├── jsdoc.conf.json
│ ├── package.json.template
│ ├── release.sh
│ ├── wrapper_end
│ └── wrapper_start
├── index.html
├── lib/
│ ├── builder.js
│ ├── field_ref.js
│ ├── idf.js
│ ├── index.js
│ ├── lunr.js
│ ├── match_data.js
│ ├── pipeline.js
│ ├── query.js
│ ├── query_lexer.js
│ ├── query_parse_error.js
│ ├── query_parser.js
│ ├── set.js
│ ├── stemmer.js
│ ├── stop_word_filter.js
│ ├── token.js
│ ├── token_set.js
│ ├── token_set_builder.js
│ ├── tokenizer.js
│ ├── trimmer.js
│ ├── utils.js
│ └── vector.js
├── lunr.js
├── notes
├── package.json
├── perf/
│ ├── builder_perf.js
│ ├── perf_helper.js
│ ├── pipeline_perf.js
│ ├── query_parser_perf.js
│ ├── search_perf.js
│ ├── stemmer_perf.js
│ ├── token_set_perf.js
│ ├── tokenizer_perf.js
│ └── vector_perf.js
├── styles.css
└── test/
├── builder_test.js
├── env/
│ ├── chai.js
│ ├── index.mustache
│ ├── mocha.css
│ └── mocha.js
├── field_ref_test.js
├── fixtures/
│ └── stemming_vocab.json
├── index.html
├── match_data_test.js
├── pipeline_test.js
├── query_lexer_test.js
├── query_parser_test.js
├── query_test.js
├── search_test.js
├── serialization_test.js
├── set_test.js
├── stemmer_test.js
├── stop_word_filter_test.js
├── test_helper.js
├── token_set_test.js
├── token_test.js
├── tokenizer_test.js
├── trimmer_test.js
├── utils_test.js
└── vector_test.js
================================================
FILE CONTENTS
================================================
================================================
FILE: .eslintrc.json
================================================
{
"env": {
"browser": true,
"node": true
},
"globals": {
"lunr": true
},
"extends": "eslint:recommended",
"plugins": [
"spellcheck"
],
"rules": {
"spellcheck/spell-checker": [1,
{
"lang": "en_GB",
"skipWords": [
"lunr", "val", "param", "idx", "utils", "namespace", "eslint", "latin",
"str", "len", "sqrt", "wildcard", "concat", "metadata", "fn", "params",
"lexeme", "lex", "pos", "typedef", "wildcards", "lexemes", "fns", "stemmer",
"attrs", "tf", "idf", "lookups", "whitelist", "whitelisted", "tokenizer",
"whitespace", "automata", "i", "obj", "anymore", "lexer", "var", "refs",
"serializable", "tis", "twas", "int", "args", "unshift", "plugins", "upsert",
"upserting", "readonly", "baz", "tokenization", "lunrjs", "com", "olivernn",
"github", "js"
]
}
],
"no-constant-condition": [
"error",
{ "checkLoops": false }
],
"no-redeclare": "off",
"dot-location": [
"error",
"property"
],
"no-alert": "error",
"no-caller": "error",
"no-eval": "error",
"no-implied-eval": "error",
"no-extend-native": "error",
"no-implicit-globals": "error",
"no-multi-spaces": "error",
"array-bracket-spacing": "error",
"block-spacing": "error",
"brace-style": [
"error",
"1tbs",
{ "allowSingleLine": true }
],
"camelcase": "error",
"comma-dangle": "error",
"comma-spacing": "error",
"comma-style": "error",
"computed-property-spacing": "error",
"func-style": "error",
"indent": [
"error",
2,
{ "VariableDeclarator": 2, "SwitchCase": 1 }
],
"key-spacing": "error",
"keyword-spacing": "error",
"linebreak-style": "error",
"new-cap": "error",
"no-trailing-spaces": "error",
"no-whitespace-before-property": "error",
"semi": ["error", "never"],
"space-before-function-paren": ["error", "always"],
"space-in-parens": "error",
"space-infix-ops": "error"
}
}
================================================
FILE: .gitignore
================================================
/node_modules
docs/
test/env/file_list.json
================================================
FILE: .travis.yml
================================================
language: node_js
node_js:
- "node"
- "6"
- "5"
- "4"
script: "make"
addons:
artifacts:
branch: master
paths:
- ./docs
target_paths: /docs
================================================
FILE: CHANGELOG.md
================================================
# Changelog
## 2.3.9
* Fix bug [#469](https://github.com/olivernn/lunr.js/issues/469) where a union with a complete set returned a non-complete set. Thanks [Bertrand Le Roy](https://github.com/bleroy) for reporting and fixing.
## 2.3.8
* Fix bug [#422](https://github.com/olivernn/lunr.js/issues/422) where a pipline function that returned null was not skipping the token as described in the documentation. Thanks [Stephen Cleary](https://github.com/StephenCleary) and [Rob Hoelz](https://github.com/hoelzro) for reporting and investigating.
## 2.3.7
* Fix bug [#417](https://github.com/olivernn/lunr.js/issues/417) where leading white space would cause token position metadata to be reported incorrectly. Thanks [Rob Hoelz](https://github.com/hoelzro) for the fix.
## 2.3.6
* Fix bug [#390](https://github.com/olivernn/lunr.js/issues/390) with fuzzy matching that meant deletions at the end of a word would not match. Thanks [Luca Ongaro](https://github.com/lucaong) for reporting.
## 2.3.5
* Fix bug [#375](https://github.com/olivernn/lunr.js/issues/375) with fuzzy matching that meant insertions at the end of a word would not match. Thanks [Luca Ongaro](https://github.com/lucaong) for reporting and to [Rob Hoelz](https://github.com/hoelzro) for providing a fix.
* Switch to using `Array.isArray` when checking for results from pipeline functions to support `vm.runInContext`, [#381](https://github.com/olivernn/lunr.js/pull/381) thanks [Rob Hoelz](https://github.com/hoelzro).
## 2.3.4
* Ensure that [inverted index is prototype-less](https://github.com/olivernn/lunr.js/pull/378) after serialization, thanks [Rob Hoelz](https://github.com/hoelzro).
## 2.3.3
* Fig bugs [#270](https://github.com/olivernn/lunr.js/issues/270) and [#368](https://github.com/olivernn/lunr.js/issues/368), some wildcard searches over long tokens could be extremely slow, potentially pinning the current thread indefinitely. Thanks [Kyle Spearrin](https://github.com/kspearrin) and [Mohamed Eltuhamy](https://github.com/meltuhamy) for reporting.
## 2.3.2
* Fix bug [#369](https://github.com/olivernn/lunr.js/issues/369) in parsing queries that include either a boost or edit distance modifier followed by a presence modifier on a subsequent term. Thanks [mtdjr](https://github.com/mtdjr) for reporting.
## 2.3.1
* Add workaround for inconsistent browser behaviour [#279](https://github.com/olivernn/lunr.js/issues/279), thanks [Luca Ongaro](https://github.com/lucaong).
* Fix bug in intersect/union of `lunr.Set` [#360](https://github.com/olivernn/lunr.js/issues/360), thanks [Brandon Bethke](https://github.com/brandon-bethke-neudesic) for reporting.
## 2.3.0
* Add support for build time field and document boosts.
* Add support for indexing nested document fields using field extractors.
* Prevent usage of problematic characters in field names, thanks [Stephane Mankowski](https://github.com/miraks31).
* Fix bug when using an array of tokens in a single query term, thanks [Michael Manukyan](https://github.com/mike1808).
## 2.2.1
* Fix bug [#344](https://github.com/olivernn/lunr.js/issues/344) in logic for required terms in multiple fields, thanks [Stephane Mankowski](https://github.com/miraks31).
* Upgrade mocha and fix some test snafus.
## 2.2.0
* Add support for queries with term presence, e.g. required terms and prohibited terms.
* Add support for using the output of `lunr.tokenizer` directly with `lunr.Query#term`.
* Add field name metadata to tokens in build and search pipelines.
* Fix documentation for `lunr.Index` constructor, thanks [Michael Manukyan](https://github.com/mike1808).
## 2.1.6
* Improve pipeline performance for large fields [#329](https://github.com/olivernn/lunr.js/pull/329), thanks [andymcm](https://github.com/andymcm).
## 2.1.5
* Fix bug [#320](https://github.com/olivernn/lunr.js/issues/320) which caused result metadata to be nested under search term instead of field name. Thanks [Jonny Gerig Meyer](https://github.com/jgerigmeyer) for reporting and fixing.
## 2.1.4
* Cache inverse document calculation during build to improve build performance.
* Introduce new method for combining term metadata at search time.
* Improve performance of searches with duplicate search terms.
* Tweaks to build process.
## 2.1.3
* Remove private tag from `lunr.Builder#build`, it should be public, thanks [Sean Tan](https://github.com/seantanly).
## 2.1.2
* Fix bug [#282](https://github.com/olivernn/lunr.js/issues/282) which caused metadata stored in the index to be mutated during search, thanks [Andrew Aldridge](https://github.com/i80and).
## 2.1.1
* Fix bug [#280](https://github.com/olivernn/lunr.js/issues/280) in builder where an object with prototype was being used as a Map, thanks [Pete Bacon Darwin](https://github.com/petebacondarwin).
## 2.1.0
* Improve handling of term boosts across multiple fields [#263](https://github.com/olivernn/lunr.js/issues/263)
* Enable escaping of special characters when performing a search [#271](https://github.com/olivernn/lunr.js/issues/271)
* Add ability to programatically include leading and trailing wildcards when performing a query.
## 2.0.4
* Fix bug in IDF calculation that meant the weight for common words was not correctly calculated.
## 2.0.3
* Fix bug [#256](https://github.com/olivernn/lunr.js/issues/256) where duplicate query terms could cause a 'duplicate index' error when building the query vector. Thanks [Bjorn Svensson](https://github.com/bsvensson), [Jason Feng](https://github.com/IYCI), and [et1421](https://github.com/et1421) for reporting and confirming the issue.
## 2.0.2
* Fix bug [#255](https://github.com/olivernn/lunr.js/issues/255) where search queries used a different separator than the tokeniser causing some terms to not be searchable. Thanks [Wes Cossick](https://github.com/WesCossick) for reporting.
* Reduce precision of term scores stored in document vectors to reduce the size of serialised indexes by ~15%, thanks [Qvatra](https://github.com/Qvatra) for the idea.
## 2.0.1
* Fix regression [#254](https://github.com/olivernn/lunr.js/issues/254) where documents containing terms that match properties from Object.prototype cause errors during indexing. Thanks [VonFry](https://github.com/VonFry) for reporting.
## 2.0.0
* Indexes are now immutable, this allows for more space efficient indexes, more advanced searching and better performance.
* Text processing can now attach metadata to tokens the enter the index, this opens up the possibility of highlighting search terms in results.
* More advanced searching including search time field boosts, search by field, fuzzy matching and leading and trailing wildcards.
## 1.0.0
* Deprecate incorrectly spelled lunr.tokenizer.separator.
* No other changes, but bumping to 1.0.0 because it's overdue, and the interfaces are pretty stable now. It also paves the way for 2.0.0...
## 0.7.2
* Fix bug when loading a serialised tokeniser [#226](https://github.com/olivernn/lunr.js/issues/226), thanks [Alex Turpin](https://github.com/alexturpin) for reporting the issue.
* Learn how to spell separator [#223](https://github.com/olivernn/lunr.js/pull/223), thanks [peterennis](https://github.com/peterennis) for helping me learn to spell.
## 0.7.1
* Correctly set the license using the @license doc tag [#217](https://github.com/olivernn/lunr.js/issues/217), thanks [Carlos Araya](https://github.com/caraya).
## 0.7.0
* Make tokenizer a property of the index, allowing for different indexes to use different tokenizers [#205](https://github.com/olivernn/lunr.js/pull/205) and [#21](https://github.com/olivernn/lunr.js/issues/21).
* Fix bug that prevented very large documents from being indexed [#203](https://github.com/olivernn/lunr.js/pull/203), thanks [Daniel Grießhaber](https://github.com/dangrie158).
* Performance improvements when adding documents to the index [#208](https://github.com/olivernn/lunr.js/pull/208), thanks [Dougal Matthews](https://github.com/d0ugal).
## 0.6.0
* Ensure document ref property type is preserved when returning results [#117](https://github.com/olivernn/lunr.js/issues/117), thanks [Kyle Kirby](https://github.com/kkirby).
* Introduce `lunr.generateStopWordFilter` for generating a stop word filter from a provided list of stop words.
* Replace array-like string access with ES3 compatible `String.prototype.charAt` [#186](https://github.com/olivernn/lunr.js/pull/186), thanks [jkellerer](https://github.com/jkellerer).
* Move empty string filtering from `lunr.trimmer` to `lunr.Pipeline.prototype.run` so that empty tokens do not enter the index, regardless of the trimmer being used [#178](https://github.com/olivernn/lunr.js/issues/178), [#177](https://github.com/olivernn/lunr.js/issues/177) and [#174](https://github.com/olivernn/lunr.js/issues/174)
* Allow tokenization of arrays with null and non string elements [#172](https://github.com/olivernn/lunr.js/issues/172).
* Parameterize the seperator used by `lunr.tokenizer`, fixes [#102](https://github.com/olivernn/lunr.js/issues/102).
## 0.5.12
* Implement `lunr.stopWordFilter` with an object instead of using `lunr.SortedSet` [#170](https://github.com/olivernn/lunr.js/pull/170), resulting in a performance boost for the text processing pipeline, thanks to [Brian Vaughn](https://github.com/bvaughn).
* Ensure that `lunr.trimmer` does not introduce empty tokens into the index, [#166](https://github.com/olivernn/lunr.js/pull/166), thanks to [janeisklar](https://github.com/janeisklar)
## 0.5.11
* Fix [bug](https://github.com/olivernn/lunr.js/issues/162) when using the unminified build of lunr in some project builds, thanks [Alessio Michelini](https://github.com/darkmavis1980)
## 0.5.10
* Fix bug in IDF calculation, thanks to [weixsong](https://github.com/weixsong) for discovering the issue.
* Documentation fixes [#111](https://github.com/olivernn/lunr.js/pull/111) thanks [Chris Van](https://github.com/cvan).
* Remove version from bower.json as it is not needed [#160](https://github.com/olivernn/lunr.js/pull/160), thanks [Kevin Kirsche](https://github.com/kkirsche)
* Fix link to augment.js on the home page [#159](https://github.com/olivernn/lunr.js/issues/159), thanks [Gábor Nádai](https://github.com/mefiblogger)
## 0.5.9
* Remove recursion from SortedSet#indexOf and SortedSet#locationFor to gain small performance gains in Index#search and Index#add
* Fix incorrect handling of non existant functions when adding/removing from a Pipeline [#146](https://github.com/olivernn/lunr.js/issues/146) thanks to [weixsong](https://github.com/weixsong)
## 0.5.8
* Fix typo when referencing Martin Porter's home page http://tartarus.org/~martin/ [#132](https://github.com/olivernn/lunr.js/pull/132) thanks [James Aylett](https://github.com/jaylett)
* Performance improvement for tokenizer [#139](https://github.com/olivernn/lunr.js/pull/139) thanks [Arun Srinivasan](https://github.com/satchmorun)
* Fix vector magnitude caching bug :flushed: [#142](https://github.com/olivernn/lunr.js/pull/142) thanks [Richard Poole](https://github.com/richardpoole)
* Fix vector insertion bug that prevented lesser ordered nodes to be inserted into a vector [#143](https://github.com/olivernn/lunr.js/pull/143) thanks [Richard Poole](https://github.com/richardpoole)
* Fix inefficient use of arguments in SortedSet add method, thanks to [Max Nordlund](https://github.com/maxnordlund).
* Fix deprecated use of path.exists in test server [#141](https://github.com/olivernn/lunr.js/pull/141) thanks [wei song](https://github.com/weixsong)
## 0.5.7
* Performance improvement for stemmer [#124](https://github.com/olivernn/lunr.js/pull/124) thanks [Tony Jacobs](https://github.com/tony-jacobs)
## 0.5.6
* Performance improvement when add documents to the index [#114](https://github.com/olivernn/lunr.js/pull/114) thanks [Alex Holmes](https://github.com/alex2)
## 0.5.5
* Fix bug in tokenizer introduced in 0.5.4 [#101](https://github.com/olivernn/lunr.js/pull/101) thanks [Nolan Lawson](https://github.com/nolanlawson)
## 0.5.4
* Tokenizer also splits on hyphens [#98](https://github.com/olivernn/lunr.js/pull/98/files) thanks [Nolan Lawson](https://github.com/nolanlawson)
## 0.5.3
* Correctly stem words ending with the letter 'y' [#84](https://github.com/olivernn/lunr.js/pull/84) thanks [Mihai Valentin](https://github.com/MihaiValentin)
* Improve build tools and dev dependency installation [#78](https://github.com/olivernn/lunr.js/pull/78) thanks [Ben Pickles](https://github.com/benpickles)
## 0.5.2
* Use npm they said, it'll be easy they said.
## 0.5.1
* Because [npm issues](https://github.com/olivernn/lunr.js/issues/77) :(
## 0.5.0
* Add plugin support to enable i18n and other extensions to lunr.
* Add AMD support [#72](https://github.com/olivernn/lunr.js/issues/72) thanks [lnwdr](https://github.com/lnwdr).
* lunr.Vector now implemented using linked lists for better performance especially in indexes with large numbers of unique tokens.
* Build system clean up.
## 0.4.5
* Fix performance regression introduced in 0.4.4 by fixing #64.
## 0.4.4
* Fix bug [#64](https://github.com/olivernn/lunr.js/issues/64) idf cache should handle tokens with the same name as object properties, thanks [gitgrimbo](https://github.com/gitgrimbo).
* Intersperse source files with a semicolon as part of the build process, fixes [#61](https://github.com/olivernn/lunr.js/issues/61), thanks [shyndman](https://github.com/shyndman).
## 0.4.3
* Fix bug [#49](https://github.com/olivernn/lunr.js/issues/49) tokenizer should handle null and undefined as arguments, thanks [jona](https://github.com/jona).
## 0.4.2
* Fix bug [#47](https://github.com/olivernn/lunr.js/issues/47) tokenizer converts its input to a string before trying to split it into tokens, thanks [mikhailkozlov](https://github.com/mikhailkozlov).
## 0.4.1
* Fix bug [#41](https://github.com/olivernn/lunr.js/issues/41) that caused issues when indexing mixed case tags, thanks [Aptary](https://github.com/Aptary)
## 0.4.0
* Add index mutation events ('add', 'update' and 'remove').
* Performance improvements to searching.
* Penalise non-exact matches so exact matches are better ranked than expanded matches.
## 0.3.3
* Fix bug [#32](https://github.com/olivernn/lunr.js/pull/32) which prevented lunr being used where a `console` object is not present, thanks [Tony Marklove](https://github.com/jjbananas) and [wyuenho](https://github.com/wyuenho)
## 0.3.2
* Fix bug [#27](https://github.com/olivernn/lunr.js/pull/27) when trying to calculate tf with empty fields, thanks [Gambhiro](https://github.com/gambhiro)
## 0.3.1
* Fix bug [#24](https://github.com/olivernn/lunr.js/pull/24) that caused an error when trying to remove a non-existant document from the index, thanks [Jesús Leganés Combarro](https://github.com/piranna)
## 0.3.0
* Implement [JSON serialisation](https://github.com/olivernn/lunr.js/pull/14), allows indexes to be loaded and dumped, thanks [ssured](https://github.com/ssured).
* Performance improvements to searching and indexing.
* Fix bug [#15](https://github.com/olivernn/lunr.js/pull/15) with tokeniser that added stray empty white space to the index, thanks [ssured](https://github.com/ssured).
## 0.2.3
* Fix issue with searching for a term not in the index [#12](https://github.com/olivernn/lunr.js/issues/12), thanks [mcnerthney](https://github.com/mcnerthney) and [makoto](https://github.com/makoto)
## 0.2.2
* Boost exact term matches so they are better ranked than expanded term matches, fixes [#10](https://github.com/olivernn/lunr.js/issues/10), thanks [ssured](https://github.com/ssured)
## 0.2.1
* Changes to the build process.
* Add component.json and package.json
* Add phantomjs test runner
* Remove redundant attributes
* Many [spelling corrections](https://github.com/olivernn/lunr.js/pull/8), thanks [Pascal Borreli](https://github.com/pborreli)
================================================
FILE: CNAME
================================================
lunrjs.com
================================================
FILE: CONTRIBUTING.md
================================================
Contributions are very welcome. To make the process as easy as possible please follow these steps:
* Open an issue detailing the bug you've found, or the feature you wish to add. Simplified working examples using something like [jsFiddle](http://jsfiddle.net) make it easier to diagnose your problem.
* Add tests for your code (so I don't accidentally break it in the future).
* Don't change version numbers or make new builds as part of your changes.
* Don't change the built versions of the library; only make changes to code in the `lib` directory.
# Developer Dependencies
A JavaScript runtime is required for building the library.
Run the tests (using PhantomJS):
make test
The tests can also be run in the browser by starting the test server:
make server
This will start a server on port 3000, the tests are then available at `/test`.
================================================
FILE: LICENSE
================================================
Copyright (C) 2013 by Oliver Nightingale
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
================================================
FILE: Makefile
================================================
SRC = lib/lunr.js \
lib/utils.js \
lib/field_ref.js \
lib/set.js \
lib/idf.js \
lib/token.js \
lib/tokenizer.js \
lib/pipeline.js \
lib/vector.js \
lib/stemmer.js \
lib/stop_word_filter.js \
lib/trimmer.js \
lib/token_set.js \
lib/token_set_builder.js \
lib/index.js \
lib/builder.js \
lib/match_data.js \
lib/query.js \
lib/query_parse_error.js \
lib/query_lexer.js \
lib/query_parser.js \
YEAR = $(shell date +%Y)
VERSION = $(shell cat VERSION)
NODE ?= $(shell which node)
NPM ?= $(shell which npm)
UGLIFYJS ?= ./node_modules/.bin/uglifyjs
MOCHA ?= ./node_modules/.bin/mocha
MUSTACHE ?= ./node_modules/.bin/mustache
ESLINT ?= ./node_modules/.bin/eslint
JSDOC ?= ./node_modules/.bin/jsdoc
NODE_STATIC ?= ./node_modules/.bin/static
all: test lint docs
release: lunr.js lunr.min.js bower.json package.json component.json docs
lunr.js: $(SRC)
cat build/wrapper_start $^ build/wrapper_end | \
sed "s/@YEAR/${YEAR}/" | \
sed "s/@VERSION/${VERSION}/" > $@
lunr.min.js: lunr.js
${UGLIFYJS} --compress --mangle --comments < $< > $@
%.json: build/%.json.template
cat $< | sed "s/@VERSION/${VERSION}/" > $@
size: lunr.min.js
@gzip -c lunr.min.js | wc -c
server: test/index.html
${NODE_STATIC} -a 0.0.0.0 -H '{"Cache-Control": "no-cache, must-revalidate"}'
lint: $(SRC)
${ESLINT} $^
perf/*_perf.js:
${NODE} -r ./perf/perf_helper.js $@
benchmark: perf/*_perf.js
test: node_modules lunr.js
${MOCHA} test/*.js -u tdd -r test/test_helper.js -R dot -C
test/inspect: node_modules lunr.js
${MOCHA} test/*.js -u tdd -r test/test_helper.js -R dot -C --inspect-brk=0.0.0.0:9292
test/env/file_list.json: $(wildcard test/*test.js)
${NODE} -p 'JSON.stringify({test_files: process.argv.slice(1)})' $^ > $@
test/index.html: test/env/file_list.json test/env/index.mustache
${MUSTACHE} $^ > $@
docs: $(SRC)
${JSDOC} -R README.md -d docs -c build/jsdoc.conf.json $^
clean:
rm -f lunr{.min,}.js
rm -rf docs
rm *.json
reset:
git checkout lunr.* *.json
node_modules: package.json
${NPM} -s install
.PHONY: test clean docs reset perf/*_perf.js test/inspect
================================================
FILE: README.md
================================================
# Lunr.js
[](https://gitter.im/olivernn/lunr.js?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
[](https://travis-ci.org/olivernn/lunr.js)
A bit like Solr, but much smaller and not as bright.
## Example
A very simple search index can be created using the following:
```javascript
var idx = lunr(function () {
this.field('title')
this.field('body')
this.add({
"title": "Twelfth-Night",
"body": "If music be the food of love, play on: Give me excess of it…",
"author": "William Shakespeare",
"id": "1"
})
})
```
Then searching is as simple as:
```javascript
idx.search("love")
```
This returns a list of matching documents with a score of how closely they match the search query as well as any associated metadata about the match:
```javascript
[
{
"ref": "1",
"score": 0.3535533905932737,
"matchData": {
"metadata": {
"love": {
"body": {}
}
}
}
}
]
```
[API documentation](https://lunrjs.com/docs/index.html) is available, as well as a [full working example](https://olivernn.github.io/moonwalkers/).
## Description
Lunr.js is a small, full-text search library for use in the browser. It indexes JSON documents and provides a simple search interface for retrieving documents that best match text queries.
## Why
For web applications with all their data already sitting in the client, it makes sense to be able to search that data on the client too. It saves adding extra, compacted services on the server. A local search index will be quicker, there is no network overhead, and will remain available and usable even without a network connection.
## Installation
Simply include the lunr.js source file in the page that you want to use it. Lunr.js is supported in all modern browsers.
Alternatively an npm package is also available `npm install lunr`.
Browsers that do not support ES5 will require a JavaScript shim for Lunr to work. You can either use [Augment.js](https://github.com/olivernn/augment.js), [ES5-Shim](https://github.com/kriskowal/es5-shim) or any library that patches old browsers to provide an ES5 compatible JavaScript environment.
## Features
* Full text search support for 14 languages
* Boost terms at query time or boost entire documents at index time
* Scope searches to specific fields
* Fuzzy term matching with wildcards or edit distance
## Contributing
See the [`CONTRIBUTING.md` file](CONTRIBUTING.md).
================================================
FILE: VERSION
================================================
2.3.9
================================================
FILE: build/bower.json.template
================================================
{
"name": "lunr.js",
"version": "@VERSION",
"main": "lunr.js",
"ignore": [
"tests/",
"perf/",
"build/",
"docs/"
]
}
================================================
FILE: build/component.json.template
================================================
{
"name": "lunr",
"repo": "olivernn/lunr.js",
"version": "@VERSION",
"description": "Simple full-text search in your browser.",
"license": "MIT",
"main": "lunr.js",
"scripts": ["lunr.js"]
}
================================================
FILE: build/jsdoc.conf.json
================================================
{
"plugins": ["plugins/markdown"],
"destination": "docs",
"readme": "README.md",
"templates": {
"default": {
"useLongnameInNav": true
}
}
}
================================================
FILE: build/package.json.template
================================================
{
"name": "lunr",
"description": "Simple full-text search in your browser.",
"version": "@VERSION",
"author": "Oliver Nightingale",
"keywords": ["search"],
"homepage": "https://lunrjs.com",
"bugs": "https://github.com/olivernn/lunr.js/issues",
"main": "lunr.js",
"license": "MIT",
"repository": {
"type": "git",
"url": "https://github.com/olivernn/lunr.js.git"
},
"devDependencies": {
"benchmark": "2.1.x",
"chai": "3.5.x",
"eslint-plugin-spellcheck": "0.0.8",
"eslint": "3.4.x",
"jsdoc": "3.5.x",
"mocha": "3.3.x",
"mustache": "2.2.x",
"node-static": "0.7.x",
"uglify-js": "2.6.x",
"word-list": "1.0.x"
},
"scripts": {
"test": "make test"
}
}
================================================
FILE: build/release.sh
================================================
#!/usr/bin/env bash
file_has_changed () {
if [ ! -f $1 ]; then
return 1
fi
for f in `git ls-files --modified`; do
[[ "$f" == "$1" ]] && return 0
done
return 1
}
version_is_unique () {
for v in `git tag -l`; do
[[ "$v" == "v$1" ]] && return 1
done
return 0
}
on_master_branch () {
[[ $(git symbolic-ref --short -q HEAD) == "master" ]] && return 0
return 1
}
version=$(cat VERSION)
previous_version=$(git describe --abbrev=0)
if ! on_master_branch; then
echo -e "\033[0;31mRefusing to release from non master branch.\033[0m"
exit 1
fi
if ! file_has_changed "VERSION"; then
echo -e "\033[0;31mRefusing to release because VERSION has not changed.\033[0m"
exit 1
fi
if ! file_has_changed "CHANGELOG.md"; then
echo -e "\033[0;31mRefusing to release because CHANGELOG.md has not been updated.\033[0m"
exit 1
fi
if ! file_has_changed "package.json"; then
echo -e "\033[0;31mRefusing to release because package.json has not been updated.\033[0m"
exit 1
fi
if ! version_is_unique $version; then
echo -e "\033[0;31mRefusing to release because VERSION is not unique.\033[0m"
exit 1
fi
echo -e "\033[1mAbout to release v$version with the following changes:\033[0m"
git log --date=short --pretty=format:"%ad %h%x09%an%x09%s" $previous_version..HEAD
echo
echo -e "\033[1mThe following files will be part of the release commit:\033[0m"
git ls-files --modified
echo
read -e -p "Are you sure you want to release? " -n 1 -r
echo
if [[ $REPLY =~ ^[Yy]$ ]]; then
echo -e "\033[0;32mReleasing...\033[0m"
echo
git commit -a -m "Build version $version"
git tag -a v$version -m "Version $version"
git push origin master
git push --tags
npm publish
else
echo -e "\033[0;31mCancelling...\033[0m"
fi
================================================
FILE: build/wrapper_end
================================================
/**
* export the module via AMD, CommonJS or as a browser global
* Export code from https://github.com/umdjs/umd/blob/master/returnExports.js
*/
;(function (root, factory) {
if (typeof define === 'function' && define.amd) {
// AMD. Register as an anonymous module.
define(factory)
} else if (typeof exports === 'object') {
/**
* Node. Does not work with strict CommonJS, but
* only CommonJS-like enviroments that support module.exports,
* like Node.
*/
module.exports = factory()
} else {
// Browser globals (root is window)
root.lunr = factory()
}
}(this, function () {
/**
* Just return a value to define the module export.
* This example returns an object, but the module
* can return a function as the exported value.
*/
return lunr
}))
})();
================================================
FILE: build/wrapper_start
================================================
/**
* lunr - http://lunrjs.com - A bit like Solr, but much smaller and not as bright - @VERSION
* Copyright (C) @YEAR Oliver Nightingale
* @license MIT
*/
;(function(){
================================================
FILE: index.html
================================================
<!DOCTYPE html>
<html>
<head>
<title>lunr.js - A bit like Solr, but much smaller and not as bright</title>
<link rel="stylesheet" href="/styles.css" type="text/css">
<script type="text/javascript" src="/lunr.min.js">
</script>
<script type="text/javascript">
var _gaq = _gaq || [];
_gaq.push(['_setAccount', 'UA-25695442-4']);
_gaq.push(['_trackPageview']);
(function() {
var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
})();
</script>
</head>
<body>
<div class='wrap'>
<header>
<h1>lunr<span>.js</span></h1>
<h2>Simple full-text search in your browser</h2>
</header>
<nav>
<ul>
<li><a href="/docs">Docs</a></li>
<li><a href="/example">Examples</a></li>
<li><a href="http://github.com/olivernn/lunr.js">Code</a></li>
</ul>
</nav>
<section class="columns">
<article>
<header>
<h3>Get Started</h3>
</header>
<p>Open your browser's <a id="developer-tools" target="_blank">developer tools</a> on this page to follow along.</p>
<p>Set up an index for your notes:</p>
<pre>
var index = lunr(function () {
this.field('title', {boost: 10})
this.field('body')
this.ref('id')
})</pre>
<p>Add documents to your index</p>
<pre>
index.add({
id: 1,
title: 'Foo',
body: 'Foo foo foo!'
})
index.add({
id: 2,
title: 'Bar',
body: 'Bar bar bar!'
})</pre>
<p>Search your documents</p>
<pre>
index.search('foo')</pre>
</article>
<article>
<header>
<h3>About</h3>
</header>
<p>lunr.js is a simple full text search engine for your client side applications. It is designed to be small, yet full featured, enabling you to provide a great search experience without the need for external, server side, search services.</p>
<p>lunr.js has no external dependencies, although it does require a modern browser with ES5 support. In older browsers you can use an ES5 shim, such as <a href="https://olivernn.github.io/augment.js/">augment.js</a>, to provide any missing JavaScript functionality.</p>
</article>
<article class="download">
<header>
<h3>Download</h3>
</header>
<ul>
<li><a href="https://raw.github.com/olivernn/lunr.js/master/lunr.js">lunr.js</a> - uncompressed</li>
<li><a href="https://raw.github.com/olivernn/lunr.js/master/lunr.min.js">lunr.min.js</a> - minified</li>
</ul>
</article>
</section>
<section>
<article>
<header>
<h3>Pipeline</h3>
</header>
<p>Every document and search query that enters lunr is passed through a text <a href="/docs#Pipeline">processing pipeline</a>. The pipeline is simply a stack of functions that perform some processing on the text. Pipeline functions act on the text one token at a time, and what they return is passed to the next function in the pipeline.</p>
<p>By default lunr adds a <a href="/docs#stopWordFilter">stop word filter</a> and <a href="/docs#stemmer">stemmer</a> to the pipeline. You can also add your own processors or remove the default ones depending on your requirements. The stemmer currently used is an English language stemmer, which could be replaced with a non-English language stemmer if required, or a <a href="http://en.wikipedia.org/wiki/Metaphone">Metaphoning</a> processor could be added.</p>
<pre>
var index = lunr(function () {
this.pipeline.add(function (token, tokenIndex, tokens) {
// text processing in here
})
this.pipeline.after(lunr.stopWordFilter, function (token, tokenIndex, tokens) {
// text processing in here
})
})
</pre>
<p>Functions in the pipeline are called with three arguments: the current token being processed; the index of that token in the array of tokens, and the whole list of tokens part of the document being processed. This enables simple unigram processing of tokens as well as more sophisticated n-gram processing.</p>
<p>The function should return the processed version of the text, which will in turn be passed to the next function in the pipeline. Returning <code>undefined</code> will prevent any further processing of the token, and that token will not make it to the index.</p>
</article>
</section>
<section class="columns">
<article>
<header>
<h3>Tokenization</h3>
</header>
<p>Tokenization is how lunr converts documents and searches into individual tokens, ready to be run through the text processing pipeline and entered or looked up in the index.</p>
<p>The default tokenizer included with lunr is designed to handle general english text well, although application, or language specific tokenizers can be used instead.</p>
</article>
<article>
<header>
<h3>Stemming</h3>
</header>
<p>Stemming increases the recall of the search index by reducing related words down to their stem, so that non-exact search terms still match relevant documents. For example 'search', 'searching' and 'searched' all get reduced to the stem 'search'.</p>
<p>lunr automatically includes a stemmer based on <a href="http://tartarus.org/martin/PorterStemmer/">Martin Porter's</a> algorithms.</p>
</article>
<article>
<header>
<h3>Stop words</h3>
</header>
<p>Stop words are words that are very common and are not useful in differentiating between documents. These are automatically removed by lunr. This helps to reduce the size of the index and improve search speed and accuracy.</p>
<p>The default stop word filter contains a large list of very common words in English. For best results a corpus specific stop word filter can also be added to the pipeline. The search algorithm already penalises more common words, but preventing them from entering the index at all can be very beneficial for both space and speed performance.</p>
</article>
</section>
<footer>
<ul>
<li>Code by <a href="http://twitter.com/olivernn">Oliver Nightingale</a></li>
<li><a href="http://github.com/olivernn/lunr.js">Code</a></li>
<li><a href="/docs">Documentation</a></li>
<li><a href="http://github.com/olivernn/lunr.js/issues">Issues</a></li>
</ul>
</footer>
</div>
<script>
(function (ua) {
// some _really_ basic browser sniffing to choose the right dev tools link.
var consoleSrc,
CHROME = /Chrome/,
CHROMIUM = /Chromium/,
FIREFOX = /Firefox/,
MSIE = /MSIE/
OPERA = /(OPR|Opera)/,
SAFARI = /Safari/,
SEAMONKEY = /Seamonkey/
switch (true) {
case FIREFOX.test(ua) && !SEAMONKEY.test(ua):
// firefox
consoleSrc = "https://developer.mozilla.org/en-US/docs/Tools/Web_Console"
break
case CHROME.test(ua):
// chrome
consoleSrc = "https://developer.chrome.com/devtools"
break
case SAFARI.test(ua) && !CHROME.test(ua) && !CHROMIUM.test(ua):
// safari
consoleSrc = "https://developer.apple.com/library/safari/documentation/AppleApplications/Conceptual/Safari_Developer_Guide/GettingStarted/GettingStarted.html#//apple_ref/doc/uid/TP40007874-CH2-SW1"
break
case OPERA.test(ua):
// opera
consoleSrc = "http://www.opera.com/dragonfly/"
break
case MSIE.test(ua):
// IE
consoleSrc = "http://msdn.microsoft.com/en-us/library/ie/hh673541(v=vs.85).aspx"
break
}
if (consoleSrc) document.getElementById('developer-tools').href = consoleSrc
})(navigator.userAgent)
</script>
<script>
(function (hijs) {
//
// hijs - JavaScript Syntax Highlighter
//
// Copyright (c) 2010 Alexis Sellier
//
// All elements which match this will be syntax highlighted.
var selector = hijs || 'pre';
var keywords = ('var function if else for while break switch case do new null in with void '
+'continue delete return this true false throw catch typeof with instanceof').split(' '),
special = ('eval window document undefined NaN Infinity parseInt parseFloat '
+'encodeURI decodeURI encodeURIComponent decodeURIComponent').split(' ');
// Syntax definition
// The key becomes the class name of the <span>
// around the matched block of code.
var syntax = [
['comment', /(\/\*(?:[^*\n]|\*+[^\/*])*\*+\/)/g],
['comment', /(\/\/[^\n]*)/g],
['string' , /("(?:(?!")[^\\\n]|\\.)*"|'(?:(?!')[^\\\n]|\\.)*')/g],
['regexp' , /(\/.+\/[mgi]*)(?!\s*\w)/g],
['class' , /\b([A-Z][a-zA-Z]+)\b/g],
['number' , /\b([0-9]+(?:\.[0-9]+)?)\b/g],
['keyword', new(RegExp)('\\b(' + keywords.join('|') + ')\\b', 'g')],
['special', new(RegExp)('\\b(' + special.join('|') + ')\\b', 'g')]
];
var nodes, table = {};
if (/^[a-z]+$/.test(selector)) {
nodes = document.getElementsByTagName(selector);
} else if (/^\.[\w-]+$/.test(selector)) {
nodes = document.getElementsByClassName(selector.slice(1));
} else if (document.querySelectorAll) {
nodes = document.querySelectorAll(selector);
} else {
nodes = [];
}
for (var i = 0, children; i < nodes.length; i++) {
children = nodes[i].childNodes;
for (var j = 0, str; j < children.length; j++) {
code = children[j];
if (code.length >= 0) { // It's a text node
// Don't highlight command-line snippets
if (! /^\$/.test(code.nodeValue.trim())) {
syntax.forEach(function (s) {
var k = s[0], v = s[1];
code.nodeValue = code.nodeValue.replace(v, function (_, m) {
return '\u00ab' + encode(k) + '\u00b7'
+ encode(m) +
'\u00b7' + encode(k) + '\u00bb';
});
});
}
}
}
}
for (var i = 0; i < nodes.length; i++) {
nodes[i].innerHTML =
nodes[i].innerHTML.replace(/\u00ab(.+?)\u00b7(.+?)\u00b7\1\u00bb/g, function (_, name, value) {
value = value.replace(/\u00ab[^\u00b7]+\u00b7/g, '').replace(/\u00b7[^\u00bb]+\u00bb/g, '');
return '<span class="' + decode(name) + '">' + escape(decode(value)) + '</span>';
});
}
function escape(str) {
return str.replace(/</g, '<').replace(/>/g, '>');
}
// Encode ASCII characters to, and from Braille
function encode (str, encoded) {
table[encoded = str.split('').map(function (s) {
if (s.charCodeAt(0) > 127) { return s }
return String.fromCharCode(s.charCodeAt(0) + 0x2800);
}).join('')] = str;
return encoded;
}
function decode (str) {
if (str in table) {
return table[str];
} else {
return str.trim().split('').map(function (s) {
if (s.charCodeAt(0) - 0x2800 > 127) { return s }
return String.fromCharCode(s.charCodeAt(0) - 0x2800);
}).join('');
}
}
})(window.hijs);
</script>
</body>
</html>
================================================
FILE: lib/builder.js
================================================
/*!
* lunr.Builder
* Copyright (C) @YEAR Oliver Nightingale
*/
/**
* lunr.Builder performs indexing on a set of documents and
* returns instances of lunr.Index ready for querying.
*
* All configuration of the index is done via the builder, the
* fields to index, the document reference, the text processing
* pipeline and document scoring parameters are all set on the
* builder before indexing.
*
* @constructor
* @property {string} _ref - Internal reference to the document reference field.
* @property {string[]} _fields - Internal reference to the document fields to index.
* @property {object} invertedIndex - The inverted index maps terms to document fields.
* @property {object} documentTermFrequencies - Keeps track of document term frequencies.
* @property {object} documentLengths - Keeps track of the length of documents added to the index.
* @property {lunr.tokenizer} tokenizer - Function for splitting strings into tokens for indexing.
* @property {lunr.Pipeline} pipeline - The pipeline performs text processing on tokens before indexing.
* @property {lunr.Pipeline} searchPipeline - A pipeline for processing search terms before querying the index.
* @property {number} documentCount - Keeps track of the total number of documents indexed.
* @property {number} _b - A parameter to control field length normalization, setting this to 0 disabled normalization, 1 fully normalizes field lengths, the default value is 0.75.
* @property {number} _k1 - A parameter to control how quickly an increase in term frequency results in term frequency saturation, the default value is 1.2.
* @property {number} termIndex - A counter incremented for each unique term, used to identify a terms position in the vector space.
* @property {array} metadataWhitelist - A list of metadata keys that have been whitelisted for entry in the index.
*/
lunr.Builder = function () {
this._ref = "id"
this._fields = Object.create(null)
this._documents = Object.create(null)
this.invertedIndex = Object.create(null)
this.fieldTermFrequencies = {}
this.fieldLengths = {}
this.tokenizer = lunr.tokenizer
this.pipeline = new lunr.Pipeline
this.searchPipeline = new lunr.Pipeline
this.documentCount = 0
this._b = 0.75
this._k1 = 1.2
this.termIndex = 0
this.metadataWhitelist = []
}
/**
* Sets the document field used as the document reference. Every document must have this field.
* The type of this field in the document should be a string, if it is not a string it will be
* coerced into a string by calling toString.
*
* The default ref is 'id'.
*
* The ref should _not_ be changed during indexing, it should be set before any documents are
* added to the index. Changing it during indexing can lead to inconsistent results.
*
* @param {string} ref - The name of the reference field in the document.
*/
lunr.Builder.prototype.ref = function (ref) {
this._ref = ref
}
/**
* A function that is used to extract a field from a document.
*
* Lunr expects a field to be at the top level of a document, if however the field
* is deeply nested within a document an extractor function can be used to extract
* the right field for indexing.
*
* @callback fieldExtractor
* @param {object} doc - The document being added to the index.
* @returns {?(string|object|object[])} obj - The object that will be indexed for this field.
* @example <caption>Extracting a nested field</caption>
* function (doc) { return doc.nested.field }
*/
/**
* Adds a field to the list of document fields that will be indexed. Every document being
* indexed should have this field. Null values for this field in indexed documents will
* not cause errors but will limit the chance of that document being retrieved by searches.
*
* All fields should be added before adding documents to the index. Adding fields after
* a document has been indexed will have no effect on already indexed documents.
*
* Fields can be boosted at build time. This allows terms within that field to have more
* importance when ranking search results. Use a field boost to specify that matches within
* one field are more important than other fields.
*
* @param {string} fieldName - The name of a field to index in all documents.
* @param {object} attributes - Optional attributes associated with this field.
* @param {number} [attributes.boost=1] - Boost applied to all terms within this field.
* @param {fieldExtractor} [attributes.extractor] - Function to extract a field from a document.
* @throws {RangeError} fieldName cannot contain unsupported characters '/'
*/
lunr.Builder.prototype.field = function (fieldName, attributes) {
if (/\//.test(fieldName)) {
throw new RangeError ("Field '" + fieldName + "' contains illegal character '/'")
}
this._fields[fieldName] = attributes || {}
}
/**
* A parameter to tune the amount of field length normalisation that is applied when
* calculating relevance scores. A value of 0 will completely disable any normalisation
* and a value of 1 will fully normalise field lengths. The default is 0.75. Values of b
* will be clamped to the range 0 - 1.
*
* @param {number} number - The value to set for this tuning parameter.
*/
lunr.Builder.prototype.b = function (number) {
if (number < 0) {
this._b = 0
} else if (number > 1) {
this._b = 1
} else {
this._b = number
}
}
/**
* A parameter that controls the speed at which a rise in term frequency results in term
* frequency saturation. The default value is 1.2. Setting this to a higher value will give
* slower saturation levels, a lower value will result in quicker saturation.
*
* @param {number} number - The value to set for this tuning parameter.
*/
lunr.Builder.prototype.k1 = function (number) {
this._k1 = number
}
/**
* Adds a document to the index.
*
* Before adding fields to the index the index should have been fully setup, with the document
* ref and all fields to index already having been specified.
*
* The document must have a field name as specified by the ref (by default this is 'id') and
* it should have all fields defined for indexing, though null or undefined values will not
* cause errors.
*
* Entire documents can be boosted at build time. Applying a boost to a document indicates that
* this document should rank higher in search results than other documents.
*
* @param {object} doc - The document to add to the index.
* @param {object} attributes - Optional attributes associated with this document.
* @param {number} [attributes.boost=1] - Boost applied to all terms within this document.
*/
lunr.Builder.prototype.add = function (doc, attributes) {
var docRef = doc[this._ref],
fields = Object.keys(this._fields)
this._documents[docRef] = attributes || {}
this.documentCount += 1
for (var i = 0; i < fields.length; i++) {
var fieldName = fields[i],
extractor = this._fields[fieldName].extractor,
field = extractor ? extractor(doc) : doc[fieldName],
tokens = this.tokenizer(field, {
fields: [fieldName]
}),
terms = this.pipeline.run(tokens),
fieldRef = new lunr.FieldRef (docRef, fieldName),
fieldTerms = Object.create(null)
this.fieldTermFrequencies[fieldRef] = fieldTerms
this.fieldLengths[fieldRef] = 0
// store the length of this field for this document
this.fieldLengths[fieldRef] += terms.length
// calculate term frequencies for this field
for (var j = 0; j < terms.length; j++) {
var term = terms[j]
if (fieldTerms[term] == undefined) {
fieldTerms[term] = 0
}
fieldTerms[term] += 1
// add to inverted index
// create an initial posting if one doesn't exist
if (this.invertedIndex[term] == undefined) {
var posting = Object.create(null)
posting["_index"] = this.termIndex
this.termIndex += 1
for (var k = 0; k < fields.length; k++) {
posting[fields[k]] = Object.create(null)
}
this.invertedIndex[term] = posting
}
// add an entry for this term/fieldName/docRef to the invertedIndex
if (this.invertedIndex[term][fieldName][docRef] == undefined) {
this.invertedIndex[term][fieldName][docRef] = Object.create(null)
}
// store all whitelisted metadata about this token in the
// inverted index
for (var l = 0; l < this.metadataWhitelist.length; l++) {
var metadataKey = this.metadataWhitelist[l],
metadata = term.metadata[metadataKey]
if (this.invertedIndex[term][fieldName][docRef][metadataKey] == undefined) {
this.invertedIndex[term][fieldName][docRef][metadataKey] = []
}
this.invertedIndex[term][fieldName][docRef][metadataKey].push(metadata)
}
}
}
}
/**
* Calculates the average document length for this index
*
* @private
*/
lunr.Builder.prototype.calculateAverageFieldLengths = function () {
var fieldRefs = Object.keys(this.fieldLengths),
numberOfFields = fieldRefs.length,
accumulator = {},
documentsWithField = {}
for (var i = 0; i < numberOfFields; i++) {
var fieldRef = lunr.FieldRef.fromString(fieldRefs[i]),
field = fieldRef.fieldName
documentsWithField[field] || (documentsWithField[field] = 0)
documentsWithField[field] += 1
accumulator[field] || (accumulator[field] = 0)
accumulator[field] += this.fieldLengths[fieldRef]
}
var fields = Object.keys(this._fields)
for (var i = 0; i < fields.length; i++) {
var fieldName = fields[i]
accumulator[fieldName] = accumulator[fieldName] / documentsWithField[fieldName]
}
this.averageFieldLength = accumulator
}
/**
* Builds a vector space model of every document using lunr.Vector
*
* @private
*/
lunr.Builder.prototype.createFieldVectors = function () {
var fieldVectors = {},
fieldRefs = Object.keys(this.fieldTermFrequencies),
fieldRefsLength = fieldRefs.length,
termIdfCache = Object.create(null)
for (var i = 0; i < fieldRefsLength; i++) {
var fieldRef = lunr.FieldRef.fromString(fieldRefs[i]),
fieldName = fieldRef.fieldName,
fieldLength = this.fieldLengths[fieldRef],
fieldVector = new lunr.Vector,
termFrequencies = this.fieldTermFrequencies[fieldRef],
terms = Object.keys(termFrequencies),
termsLength = terms.length
var fieldBoost = this._fields[fieldName].boost || 1,
docBoost = this._documents[fieldRef.docRef].boost || 1
for (var j = 0; j < termsLength; j++) {
var term = terms[j],
tf = termFrequencies[term],
termIndex = this.invertedIndex[term]._index,
idf, score, scoreWithPrecision
if (termIdfCache[term] === undefined) {
idf = lunr.idf(this.invertedIndex[term], this.documentCount)
termIdfCache[term] = idf
} else {
idf = termIdfCache[term]
}
score = idf * ((this._k1 + 1) * tf) / (this._k1 * (1 - this._b + this._b * (fieldLength / this.averageFieldLength[fieldName])) + tf)
score *= fieldBoost
score *= docBoost
scoreWithPrecision = Math.round(score * 1000) / 1000
// Converts 1.23456789 to 1.234.
// Reducing the precision so that the vectors take up less
// space when serialised. Doing it now so that they behave
// the same before and after serialisation. Also, this is
// the fastest approach to reducing a number's precision in
// JavaScript.
fieldVector.insert(termIndex, scoreWithPrecision)
}
fieldVectors[fieldRef] = fieldVector
}
this.fieldVectors = fieldVectors
}
/**
* Creates a token set of all tokens in the index using lunr.TokenSet
*
* @private
*/
lunr.Builder.prototype.createTokenSet = function () {
this.tokenSet = lunr.TokenSet.fromArray(
Object.keys(this.invertedIndex).sort()
)
}
/**
* Builds the index, creating an instance of lunr.Index.
*
* This completes the indexing process and should only be called
* once all documents have been added to the index.
*
* @returns {lunr.Index}
*/
lunr.Builder.prototype.build = function () {
this.calculateAverageFieldLengths()
this.createFieldVectors()
this.createTokenSet()
return new lunr.Index({
invertedIndex: this.invertedIndex,
fieldVectors: this.fieldVectors,
tokenSet: this.tokenSet,
fields: Object.keys(this._fields),
pipeline: this.searchPipeline
})
}
/**
* Applies a plugin to the index builder.
*
* A plugin is a function that is called with the index builder as its context.
* Plugins can be used to customise or extend the behaviour of the index
* in some way. A plugin is just a function, that encapsulated the custom
* behaviour that should be applied when building the index.
*
* The plugin function will be called with the index builder as its argument, additional
* arguments can also be passed when calling use. The function will be called
* with the index builder as its context.
*
* @param {Function} plugin The plugin to apply.
*/
lunr.Builder.prototype.use = function (fn) {
var args = Array.prototype.slice.call(arguments, 1)
args.unshift(this)
fn.apply(this, args)
}
================================================
FILE: lib/field_ref.js
================================================
lunr.FieldRef = function (docRef, fieldName, stringValue) {
this.docRef = docRef
this.fieldName = fieldName
this._stringValue = stringValue
}
lunr.FieldRef.joiner = "/"
lunr.FieldRef.fromString = function (s) {
var n = s.indexOf(lunr.FieldRef.joiner)
if (n === -1) {
throw "malformed field ref string"
}
var fieldRef = s.slice(0, n),
docRef = s.slice(n + 1)
return new lunr.FieldRef (docRef, fieldRef, s)
}
lunr.FieldRef.prototype.toString = function () {
if (this._stringValue == undefined) {
this._stringValue = this.fieldName + lunr.FieldRef.joiner + this.docRef
}
return this._stringValue
}
================================================
FILE: lib/idf.js
================================================
/**
* A function to calculate the inverse document frequency for
* a posting. This is shared between the builder and the index
*
* @private
* @param {object} posting - The posting for a given term
* @param {number} documentCount - The total number of documents.
*/
lunr.idf = function (posting, documentCount) {
var documentsWithTerm = 0
for (var fieldName in posting) {
if (fieldName == '_index') continue // Ignore the term index, its not a field
documentsWithTerm += Object.keys(posting[fieldName]).length
}
var x = (documentCount - documentsWithTerm + 0.5) / (documentsWithTerm + 0.5)
return Math.log(1 + Math.abs(x))
}
================================================
FILE: lib/index.js
================================================
/*!
* lunr.Index
* Copyright (C) @YEAR Oliver Nightingale
*/
/**
* An index contains the built index of all documents and provides a query interface
* to the index.
*
* Usually instances of lunr.Index will not be created using this constructor, instead
* lunr.Builder should be used to construct new indexes, or lunr.Index.load should be
* used to load previously built and serialized indexes.
*
* @constructor
* @param {Object} attrs - The attributes of the built search index.
* @param {Object} attrs.invertedIndex - An index of term/field to document reference.
* @param {Object<string, lunr.Vector>} attrs.fieldVectors - Field vectors
* @param {lunr.TokenSet} attrs.tokenSet - An set of all corpus tokens.
* @param {string[]} attrs.fields - The names of indexed document fields.
* @param {lunr.Pipeline} attrs.pipeline - The pipeline to use for search terms.
*/
lunr.Index = function (attrs) {
this.invertedIndex = attrs.invertedIndex
this.fieldVectors = attrs.fieldVectors
this.tokenSet = attrs.tokenSet
this.fields = attrs.fields
this.pipeline = attrs.pipeline
}
/**
* A result contains details of a document matching a search query.
* @typedef {Object} lunr.Index~Result
* @property {string} ref - The reference of the document this result represents.
* @property {number} score - A number between 0 and 1 representing how similar this document is to the query.
* @property {lunr.MatchData} matchData - Contains metadata about this match including which term(s) caused the match.
*/
/**
* Although lunr provides the ability to create queries using lunr.Query, it also provides a simple
* query language which itself is parsed into an instance of lunr.Query.
*
* For programmatically building queries it is advised to directly use lunr.Query, the query language
* is best used for human entered text rather than program generated text.
*
* At its simplest queries can just be a single term, e.g. `hello`, multiple terms are also supported
* and will be combined with OR, e.g `hello world` will match documents that contain either 'hello'
* or 'world', though those that contain both will rank higher in the results.
*
* Wildcards can be included in terms to match one or more unspecified characters, these wildcards can
* be inserted anywhere within the term, and more than one wildcard can exist in a single term. Adding
* wildcards will increase the number of documents that will be found but can also have a negative
* impact on query performance, especially with wildcards at the beginning of a term.
*
* Terms can be restricted to specific fields, e.g. `title:hello`, only documents with the term
* hello in the title field will match this query. Using a field not present in the index will lead
* to an error being thrown.
*
* Modifiers can also be added to terms, lunr supports edit distance and boost modifiers on terms. A term
* boost will make documents matching that term score higher, e.g. `foo^5`. Edit distance is also supported
* to provide fuzzy matching, e.g. 'hello~2' will match documents with hello with an edit distance of 2.
* Avoid large values for edit distance to improve query performance.
*
* Each term also supports a presence modifier. By default a term's presence in document is optional, however
* this can be changed to either required or prohibited. For a term's presence to be required in a document the
* term should be prefixed with a '+', e.g. `+foo bar` is a search for documents that must contain 'foo' and
* optionally contain 'bar'. Conversely a leading '-' sets the terms presence to prohibited, i.e. it must not
* appear in a document, e.g. `-foo bar` is a search for documents that do not contain 'foo' but may contain 'bar'.
*
* To escape special characters the backslash character '\' can be used, this allows searches to include
* characters that would normally be considered modifiers, e.g. `foo\~2` will search for a term "foo~2" instead
* of attempting to apply a boost of 2 to the search term "foo".
*
* @typedef {string} lunr.Index~QueryString
* @example <caption>Simple single term query</caption>
* hello
* @example <caption>Multiple term query</caption>
* hello world
* @example <caption>term scoped to a field</caption>
* title:hello
* @example <caption>term with a boost of 10</caption>
* hello^10
* @example <caption>term with an edit distance of 2</caption>
* hello~2
* @example <caption>terms with presence modifiers</caption>
* -foo +bar baz
*/
/**
* Performs a search against the index using lunr query syntax.
*
* Results will be returned sorted by their score, the most relevant results
* will be returned first. For details on how the score is calculated, please see
* the {@link https://lunrjs.com/guides/searching.html#scoring|guide}.
*
* For more programmatic querying use lunr.Index#query.
*
* @param {lunr.Index~QueryString} queryString - A string containing a lunr query.
* @throws {lunr.QueryParseError} If the passed query string cannot be parsed.
* @returns {lunr.Index~Result[]}
*/
lunr.Index.prototype.search = function (queryString) {
return this.query(function (query) {
var parser = new lunr.QueryParser(queryString, query)
parser.parse()
})
}
/**
* A query builder callback provides a query object to be used to express
* the query to perform on the index.
*
* @callback lunr.Index~queryBuilder
* @param {lunr.Query} query - The query object to build up.
* @this lunr.Query
*/
/**
* Performs a query against the index using the yielded lunr.Query object.
*
* If performing programmatic queries against the index, this method is preferred
* over lunr.Index#search so as to avoid the additional query parsing overhead.
*
* A query object is yielded to the supplied function which should be used to
* express the query to be run against the index.
*
* Note that although this function takes a callback parameter it is _not_ an
* asynchronous operation, the callback is just yielded a query object to be
* customized.
*
* @param {lunr.Index~queryBuilder} fn - A function that is used to build the query.
* @returns {lunr.Index~Result[]}
*/
lunr.Index.prototype.query = function (fn) {
// for each query clause
// * process terms
// * expand terms from token set
// * find matching documents and metadata
// * get document vectors
// * score documents
var query = new lunr.Query(this.fields),
matchingFields = Object.create(null),
queryVectors = Object.create(null),
termFieldCache = Object.create(null),
requiredMatches = Object.create(null),
prohibitedMatches = Object.create(null)
/*
* To support field level boosts a query vector is created per
* field. An empty vector is eagerly created to support negated
* queries.
*/
for (var i = 0; i < this.fields.length; i++) {
queryVectors[this.fields[i]] = new lunr.Vector
}
fn.call(query, query)
for (var i = 0; i < query.clauses.length; i++) {
/*
* Unless the pipeline has been disabled for this term, which is
* the case for terms with wildcards, we need to pass the clause
* term through the search pipeline. A pipeline returns an array
* of processed terms. Pipeline functions may expand the passed
* term, which means we may end up performing multiple index lookups
* for a single query term.
*/
var clause = query.clauses[i],
terms = null,
clauseMatches = lunr.Set.empty
if (clause.usePipeline) {
terms = this.pipeline.runString(clause.term, {
fields: clause.fields
})
} else {
terms = [clause.term]
}
for (var m = 0; m < terms.length; m++) {
var term = terms[m]
/*
* Each term returned from the pipeline needs to use the same query
* clause object, e.g. the same boost and or edit distance. The
* simplest way to do this is to re-use the clause object but mutate
* its term property.
*/
clause.term = term
/*
* From the term in the clause we create a token set which will then
* be used to intersect the indexes token set to get a list of terms
* to lookup in the inverted index
*/
var termTokenSet = lunr.TokenSet.fromClause(clause),
expandedTerms = this.tokenSet.intersect(termTokenSet).toArray()
/*
* If a term marked as required does not exist in the tokenSet it is
* impossible for the search to return any matches. We set all the field
* scoped required matches set to empty and stop examining any further
* clauses.
*/
if (expandedTerms.length === 0 && clause.presence === lunr.Query.presence.REQUIRED) {
for (var k = 0; k < clause.fields.length; k++) {
var field = clause.fields[k]
requiredMatches[field] = lunr.Set.empty
}
break
}
for (var j = 0; j < expandedTerms.length; j++) {
/*
* For each term get the posting and termIndex, this is required for
* building the query vector.
*/
var expandedTerm = expandedTerms[j],
posting = this.invertedIndex[expandedTerm],
termIndex = posting._index
for (var k = 0; k < clause.fields.length; k++) {
/*
* For each field that this query term is scoped by (by default
* all fields are in scope) we need to get all the document refs
* that have this term in that field.
*
* The posting is the entry in the invertedIndex for the matching
* term from above.
*/
var field = clause.fields[k],
fieldPosting = posting[field],
matchingDocumentRefs = Object.keys(fieldPosting),
termField = expandedTerm + "/" + field,
matchingDocumentsSet = new lunr.Set(matchingDocumentRefs)
/*
* if the presence of this term is required ensure that the matching
* documents are added to the set of required matches for this clause.
*
*/
if (clause.presence == lunr.Query.presence.REQUIRED) {
clauseMatches = clauseMatches.union(matchingDocumentsSet)
if (requiredMatches[field] === undefined) {
requiredMatches[field] = lunr.Set.complete
}
}
/*
* if the presence of this term is prohibited ensure that the matching
* documents are added to the set of prohibited matches for this field,
* creating that set if it does not yet exist.
*/
if (clause.presence == lunr.Query.presence.PROHIBITED) {
if (prohibitedMatches[field] === undefined) {
prohibitedMatches[field] = lunr.Set.empty
}
prohibitedMatches[field] = prohibitedMatches[field].union(matchingDocumentsSet)
/*
* Prohibited matches should not be part of the query vector used for
* similarity scoring and no metadata should be extracted so we continue
* to the next field
*/
continue
}
/*
* The query field vector is populated using the termIndex found for
* the term and a unit value with the appropriate boost applied.
* Using upsert because there could already be an entry in the vector
* for the term we are working with. In that case we just add the scores
* together.
*/
queryVectors[field].upsert(termIndex, clause.boost, function (a, b) { return a + b })
/**
* If we've already seen this term, field combo then we've already collected
* the matching documents and metadata, no need to go through all that again
*/
if (termFieldCache[termField]) {
continue
}
for (var l = 0; l < matchingDocumentRefs.length; l++) {
/*
* All metadata for this term/field/document triple
* are then extracted and collected into an instance
* of lunr.MatchData ready to be returned in the query
* results
*/
var matchingDocumentRef = matchingDocumentRefs[l],
matchingFieldRef = new lunr.FieldRef (matchingDocumentRef, field),
metadata = fieldPosting[matchingDocumentRef],
fieldMatch
if ((fieldMatch = matchingFields[matchingFieldRef]) === undefined) {
matchingFields[matchingFieldRef] = new lunr.MatchData (expandedTerm, field, metadata)
} else {
fieldMatch.add(expandedTerm, field, metadata)
}
}
termFieldCache[termField] = true
}
}
}
/**
* If the presence was required we need to update the requiredMatches field sets.
* We do this after all fields for the term have collected their matches because
* the clause terms presence is required in _any_ of the fields not _all_ of the
* fields.
*/
if (clause.presence === lunr.Query.presence.REQUIRED) {
for (var k = 0; k < clause.fields.length; k++) {
var field = clause.fields[k]
requiredMatches[field] = requiredMatches[field].intersect(clauseMatches)
}
}
}
/**
* Need to combine the field scoped required and prohibited
* matching documents into a global set of required and prohibited
* matches
*/
var allRequiredMatches = lunr.Set.complete,
allProhibitedMatches = lunr.Set.empty
for (var i = 0; i < this.fields.length; i++) {
var field = this.fields[i]
if (requiredMatches[field]) {
allRequiredMatches = allRequiredMatches.intersect(requiredMatches[field])
}
if (prohibitedMatches[field]) {
allProhibitedMatches = allProhibitedMatches.union(prohibitedMatches[field])
}
}
var matchingFieldRefs = Object.keys(matchingFields),
results = [],
matches = Object.create(null)
/*
* If the query is negated (contains only prohibited terms)
* we need to get _all_ fieldRefs currently existing in the
* index. This is only done when we know that the query is
* entirely prohibited terms to avoid any cost of getting all
* fieldRefs unnecessarily.
*
* Additionally, blank MatchData must be created to correctly
* populate the results.
*/
if (query.isNegated()) {
matchingFieldRefs = Object.keys(this.fieldVectors)
for (var i = 0; i < matchingFieldRefs.length; i++) {
var matchingFieldRef = matchingFieldRefs[i]
var fieldRef = lunr.FieldRef.fromString(matchingFieldRef)
matchingFields[matchingFieldRef] = new lunr.MatchData
}
}
for (var i = 0; i < matchingFieldRefs.length; i++) {
/*
* Currently we have document fields that match the query, but we
* need to return documents. The matchData and scores are combined
* from multiple fields belonging to the same document.
*
* Scores are calculated by field, using the query vectors created
* above, and combined into a final document score using addition.
*/
var fieldRef = lunr.FieldRef.fromString(matchingFieldRefs[i]),
docRef = fieldRef.docRef
if (!allRequiredMatches.contains(docRef)) {
continue
}
if (allProhibitedMatches.contains(docRef)) {
continue
}
var fieldVector = this.fieldVectors[fieldRef],
score = queryVectors[fieldRef.fieldName].similarity(fieldVector),
docMatch
if ((docMatch = matches[docRef]) !== undefined) {
docMatch.score += score
docMatch.matchData.combine(matchingFields[fieldRef])
} else {
var match = {
ref: docRef,
score: score,
matchData: matchingFields[fieldRef]
}
matches[docRef] = match
results.push(match)
}
}
/*
* Sort the results objects by score, highest first.
*/
return results.sort(function (a, b) {
return b.score - a.score
})
}
/**
* Prepares the index for JSON serialization.
*
* The schema for this JSON blob will be described in a
* separate JSON schema file.
*
* @returns {Object}
*/
lunr.Index.prototype.toJSON = function () {
var invertedIndex = Object.keys(this.invertedIndex)
.sort()
.map(function (term) {
return [term, this.invertedIndex[term]]
}, this)
var fieldVectors = Object.keys(this.fieldVectors)
.map(function (ref) {
return [ref, this.fieldVectors[ref].toJSON()]
}, this)
return {
version: lunr.version,
fields: this.fields,
fieldVectors: fieldVectors,
invertedIndex: invertedIndex,
pipeline: this.pipeline.toJSON()
}
}
/**
* Loads a previously serialized lunr.Index
*
* @param {Object} serializedIndex - A previously serialized lunr.Index
* @returns {lunr.Index}
*/
lunr.Index.load = function (serializedIndex) {
var attrs = {},
fieldVectors = {},
serializedVectors = serializedIndex.fieldVectors,
invertedIndex = Object.create(null),
serializedInvertedIndex = serializedIndex.invertedIndex,
tokenSetBuilder = new lunr.TokenSet.Builder,
pipeline = lunr.Pipeline.load(serializedIndex.pipeline)
if (serializedIndex.version != lunr.version) {
lunr.utils.warn("Version mismatch when loading serialised index. Current version of lunr '" + lunr.version + "' does not match serialized index '" + serializedIndex.version + "'")
}
for (var i = 0; i < serializedVectors.length; i++) {
var tuple = serializedVectors[i],
ref = tuple[0],
elements = tuple[1]
fieldVectors[ref] = new lunr.Vector(elements)
}
for (var i = 0; i < serializedInvertedIndex.length; i++) {
var tuple = serializedInvertedIndex[i],
term = tuple[0],
posting = tuple[1]
tokenSetBuilder.insert(term)
invertedIndex[term] = posting
}
tokenSetBuilder.finish()
attrs.fields = serializedIndex.fields
attrs.fieldVectors = fieldVectors
attrs.invertedIndex = invertedIndex
attrs.tokenSet = tokenSetBuilder.root
attrs.pipeline = pipeline
return new lunr.Index(attrs)
}
================================================
FILE: lib/lunr.js
================================================
/**
* A convenience function for configuring and constructing
* a new lunr Index.
*
* A lunr.Builder instance is created and the pipeline setup
* with a trimmer, stop word filter and stemmer.
*
* This builder object is yielded to the configuration function
* that is passed as a parameter, allowing the list of fields
* and other builder parameters to be customised.
*
* All documents _must_ be added within the passed config function.
*
* @example
* var idx = lunr(function () {
* this.field('title')
* this.field('body')
* this.ref('id')
*
* documents.forEach(function (doc) {
* this.add(doc)
* }, this)
* })
*
* @see {@link lunr.Builder}
* @see {@link lunr.Pipeline}
* @see {@link lunr.trimmer}
* @see {@link lunr.stopWordFilter}
* @see {@link lunr.stemmer}
* @namespace {function} lunr
*/
var lunr = function (config) {
var builder = new lunr.Builder
builder.pipeline.add(
lunr.trimmer,
lunr.stopWordFilter,
lunr.stemmer
)
builder.searchPipeline.add(
lunr.stemmer
)
config.call(builder, builder)
return builder.build()
}
lunr.version = "@VERSION"
================================================
FILE: lib/match_data.js
================================================
/**
* Contains and collects metadata about a matching document.
* A single instance of lunr.MatchData is returned as part of every
* lunr.Index~Result.
*
* @constructor
* @param {string} term - The term this match data is associated with
* @param {string} field - The field in which the term was found
* @param {object} metadata - The metadata recorded about this term in this field
* @property {object} metadata - A cloned collection of metadata associated with this document.
* @see {@link lunr.Index~Result}
*/
lunr.MatchData = function (term, field, metadata) {
var clonedMetadata = Object.create(null),
metadataKeys = Object.keys(metadata || {})
// Cloning the metadata to prevent the original
// being mutated during match data combination.
// Metadata is kept in an array within the inverted
// index so cloning the data can be done with
// Array#slice
for (var i = 0; i < metadataKeys.length; i++) {
var key = metadataKeys[i]
clonedMetadata[key] = metadata[key].slice()
}
this.metadata = Object.create(null)
if (term !== undefined) {
this.metadata[term] = Object.create(null)
this.metadata[term][field] = clonedMetadata
}
}
/**
* An instance of lunr.MatchData will be created for every term that matches a
* document. However only one instance is required in a lunr.Index~Result. This
* method combines metadata from another instance of lunr.MatchData with this
* objects metadata.
*
* @param {lunr.MatchData} otherMatchData - Another instance of match data to merge with this one.
* @see {@link lunr.Index~Result}
*/
lunr.MatchData.prototype.combine = function (otherMatchData) {
var terms = Object.keys(otherMatchData.metadata)
for (var i = 0; i < terms.length; i++) {
var term = terms[i],
fields = Object.keys(otherMatchData.metadata[term])
if (this.metadata[term] == undefined) {
this.metadata[term] = Object.create(null)
}
for (var j = 0; j < fields.length; j++) {
var field = fields[j],
keys = Object.keys(otherMatchData.metadata[term][field])
if (this.metadata[term][field] == undefined) {
this.metadata[term][field] = Object.create(null)
}
for (var k = 0; k < keys.length; k++) {
var key = keys[k]
if (this.metadata[term][field][key] == undefined) {
this.metadata[term][field][key] = otherMatchData.metadata[term][field][key]
} else {
this.metadata[term][field][key] = this.metadata[term][field][key].concat(otherMatchData.metadata[term][field][key])
}
}
}
}
}
/**
* Add metadata for a term/field pair to this instance of match data.
*
* @param {string} term - The term this match data is associated with
* @param {string} field - The field in which the term was found
* @param {object} metadata - The metadata recorded about this term in this field
*/
lunr.MatchData.prototype.add = function (term, field, metadata) {
if (!(term in this.metadata)) {
this.metadata[term] = Object.create(null)
this.metadata[term][field] = metadata
return
}
if (!(field in this.metadata[term])) {
this.metadata[term][field] = metadata
return
}
var metadataKeys = Object.keys(metadata)
for (var i = 0; i < metadataKeys.length; i++) {
var key = metadataKeys[i]
if (key in this.metadata[term][field]) {
this.metadata[term][field][key] = this.metadata[term][field][key].concat(metadata[key])
} else {
this.metadata[term][field][key] = metadata[key]
}
}
}
================================================
FILE: lib/pipeline.js
================================================
/*!
* lunr.Pipeline
* Copyright (C) @YEAR Oliver Nightingale
*/
/**
* lunr.Pipelines maintain an ordered list of functions to be applied to all
* tokens in documents entering the search index and queries being ran against
* the index.
*
* An instance of lunr.Index created with the lunr shortcut will contain a
* pipeline with a stop word filter and an English language stemmer. Extra
* functions can be added before or after either of these functions or these
* default functions can be removed.
*
* When run the pipeline will call each function in turn, passing a token, the
* index of that token in the original list of all tokens and finally a list of
* all the original tokens.
*
* The output of functions in the pipeline will be passed to the next function
* in the pipeline. To exclude a token from entering the index the function
* should return undefined, the rest of the pipeline will not be called with
* this token.
*
* For serialisation of pipelines to work, all functions used in an instance of
* a pipeline should be registered with lunr.Pipeline. Registered functions can
* then be loaded. If trying to load a serialised pipeline that uses functions
* that are not registered an error will be thrown.
*
* If not planning on serialising the pipeline then registering pipeline functions
* is not necessary.
*
* @constructor
*/
lunr.Pipeline = function () {
this._stack = []
}
lunr.Pipeline.registeredFunctions = Object.create(null)
/**
* A pipeline function maps lunr.Token to lunr.Token. A lunr.Token contains the token
* string as well as all known metadata. A pipeline function can mutate the token string
* or mutate (or add) metadata for a given token.
*
* A pipeline function can indicate that the passed token should be discarded by returning
* null, undefined or an empty string. This token will not be passed to any downstream pipeline
* functions and will not be added to the index.
*
* Multiple tokens can be returned by returning an array of tokens. Each token will be passed
* to any downstream pipeline functions and all will returned tokens will be added to the index.
*
* Any number of pipeline functions may be chained together using a lunr.Pipeline.
*
* @interface lunr.PipelineFunction
* @param {lunr.Token} token - A token from the document being processed.
* @param {number} i - The index of this token in the complete list of tokens for this document/field.
* @param {lunr.Token[]} tokens - All tokens for this document/field.
* @returns {(?lunr.Token|lunr.Token[])}
*/
/**
* Register a function with the pipeline.
*
* Functions that are used in the pipeline should be registered if the pipeline
* needs to be serialised, or a serialised pipeline needs to be loaded.
*
* Registering a function does not add it to a pipeline, functions must still be
* added to instances of the pipeline for them to be used when running a pipeline.
*
* @param {lunr.PipelineFunction} fn - The function to check for.
* @param {String} label - The label to register this function with
*/
lunr.Pipeline.registerFunction = function (fn, label) {
if (label in this.registeredFunctions) {
lunr.utils.warn('Overwriting existing registered function: ' + label)
}
fn.label = label
lunr.Pipeline.registeredFunctions[fn.label] = fn
}
/**
* Warns if the function is not registered as a Pipeline function.
*
* @param {lunr.PipelineFunction} fn - The function to check for.
* @private
*/
lunr.Pipeline.warnIfFunctionNotRegistered = function (fn) {
var isRegistered = fn.label && (fn.label in this.registeredFunctions)
if (!isRegistered) {
lunr.utils.warn('Function is not registered with pipeline. This may cause problems when serialising the index.\n', fn)
}
}
/**
* Loads a previously serialised pipeline.
*
* All functions to be loaded must already be registered with lunr.Pipeline.
* If any function from the serialised data has not been registered then an
* error will be thrown.
*
* @param {Object} serialised - The serialised pipeline to load.
* @returns {lunr.Pipeline}
*/
lunr.Pipeline.load = function (serialised) {
var pipeline = new lunr.Pipeline
serialised.forEach(function (fnName) {
var fn = lunr.Pipeline.registeredFunctions[fnName]
if (fn) {
pipeline.add(fn)
} else {
throw new Error('Cannot load unregistered function: ' + fnName)
}
})
return pipeline
}
/**
* Adds new functions to the end of the pipeline.
*
* Logs a warning if the function has not been registered.
*
* @param {lunr.PipelineFunction[]} functions - Any number of functions to add to the pipeline.
*/
lunr.Pipeline.prototype.add = function () {
var fns = Array.prototype.slice.call(arguments)
fns.forEach(function (fn) {
lunr.Pipeline.warnIfFunctionNotRegistered(fn)
this._stack.push(fn)
}, this)
}
/**
* Adds a single function after a function that already exists in the
* pipeline.
*
* Logs a warning if the function has not been registered.
*
* @param {lunr.PipelineFunction} existingFn - A function that already exists in the pipeline.
* @param {lunr.PipelineFunction} newFn - The new function to add to the pipeline.
*/
lunr.Pipeline.prototype.after = function (existingFn, newFn) {
lunr.Pipeline.warnIfFunctionNotRegistered(newFn)
var pos = this._stack.indexOf(existingFn)
if (pos == -1) {
throw new Error('Cannot find existingFn')
}
pos = pos + 1
this._stack.splice(pos, 0, newFn)
}
/**
* Adds a single function before a function that already exists in the
* pipeline.
*
* Logs a warning if the function has not been registered.
*
* @param {lunr.PipelineFunction} existingFn - A function that already exists in the pipeline.
* @param {lunr.PipelineFunction} newFn - The new function to add to the pipeline.
*/
lunr.Pipeline.prototype.before = function (existingFn, newFn) {
lunr.Pipeline.warnIfFunctionNotRegistered(newFn)
var pos = this._stack.indexOf(existingFn)
if (pos == -1) {
throw new Error('Cannot find existingFn')
}
this._stack.splice(pos, 0, newFn)
}
/**
* Removes a function from the pipeline.
*
* @param {lunr.PipelineFunction} fn The function to remove from the pipeline.
*/
lunr.Pipeline.prototype.remove = function (fn) {
var pos = this._stack.indexOf(fn)
if (pos == -1) {
return
}
this._stack.splice(pos, 1)
}
/**
* Runs the current list of functions that make up the pipeline against the
* passed tokens.
*
* @param {Array} tokens The tokens to run through the pipeline.
* @returns {Array}
*/
lunr.Pipeline.prototype.run = function (tokens) {
var stackLength = this._stack.length
for (var i = 0; i < stackLength; i++) {
var fn = this._stack[i]
var memo = []
for (var j = 0; j < tokens.length; j++) {
var result = fn(tokens[j], j, tokens)
if (result === null || result === void 0 || result === '') continue
if (Array.isArray(result)) {
for (var k = 0; k < result.length; k++) {
memo.push(result[k])
}
} else {
memo.push(result)
}
}
tokens = memo
}
return tokens
}
/**
* Convenience method for passing a string through a pipeline and getting
* strings out. This method takes care of wrapping the passed string in a
* token and mapping the resulting tokens back to strings.
*
* @param {string} str - The string to pass through the pipeline.
* @param {?object} metadata - Optional metadata to associate with the token
* passed to the pipeline.
* @returns {string[]}
*/
lunr.Pipeline.prototype.runString = function (str, metadata) {
var token = new lunr.Token (str, metadata)
return this.run([token]).map(function (t) {
return t.toString()
})
}
/**
* Resets the pipeline by removing any existing processors.
*
*/
lunr.Pipeline.prototype.reset = function () {
this._stack = []
}
/**
* Returns a representation of the pipeline ready for serialisation.
*
* Logs a warning if the function has not been registered.
*
* @returns {Array}
*/
lunr.Pipeline.prototype.toJSON = function () {
return this._stack.map(function (fn) {
lunr.Pipeline.warnIfFunctionNotRegistered(fn)
return fn.label
})
}
================================================
FILE: lib/query.js
================================================
/**
* A lunr.Query provides a programmatic way of defining queries to be performed
* against a {@link lunr.Index}.
*
* Prefer constructing a lunr.Query using the {@link lunr.Index#query} method
* so the query object is pre-initialized with the right index fields.
*
* @constructor
* @property {lunr.Query~Clause[]} clauses - An array of query clauses.
* @property {string[]} allFields - An array of all available fields in a lunr.Index.
*/
lunr.Query = function (allFields) {
this.clauses = []
this.allFields = allFields
}
/**
* Constants for indicating what kind of automatic wildcard insertion will be used when constructing a query clause.
*
* This allows wildcards to be added to the beginning and end of a term without having to manually do any string
* concatenation.
*
* The wildcard constants can be bitwise combined to select both leading and trailing wildcards.
*
* @constant
* @default
* @property {number} wildcard.NONE - The term will have no wildcards inserted, this is the default behaviour
* @property {number} wildcard.LEADING - Prepend the term with a wildcard, unless a leading wildcard already exists
* @property {number} wildcard.TRAILING - Append a wildcard to the term, unless a trailing wildcard already exists
* @see lunr.Query~Clause
* @see lunr.Query#clause
* @see lunr.Query#term
* @example <caption>query term with trailing wildcard</caption>
* query.term('foo', { wildcard: lunr.Query.wildcard.TRAILING })
* @example <caption>query term with leading and trailing wildcard</caption>
* query.term('foo', {
* wildcard: lunr.Query.wildcard.LEADING | lunr.Query.wildcard.TRAILING
* })
*/
lunr.Query.wildcard = new String ("*")
lunr.Query.wildcard.NONE = 0
lunr.Query.wildcard.LEADING = 1
lunr.Query.wildcard.TRAILING = 2
/**
* Constants for indicating what kind of presence a term must have in matching documents.
*
* @constant
* @enum {number}
* @see lunr.Query~Clause
* @see lunr.Query#clause
* @see lunr.Query#term
* @example <caption>query term with required presence</caption>
* query.term('foo', { presence: lunr.Query.presence.REQUIRED })
*/
lunr.Query.presence = {
/**
* Term's presence in a document is optional, this is the default value.
*/
OPTIONAL: 1,
/**
* Term's presence in a document is required, documents that do not contain
* this term will not be returned.
*/
REQUIRED: 2,
/**
* Term's presence in a document is prohibited, documents that do contain
* this term will not be returned.
*/
PROHIBITED: 3
}
/**
* A single clause in a {@link lunr.Query} contains a term and details on how to
* match that term against a {@link lunr.Index}.
*
* @typedef {Object} lunr.Query~Clause
* @property {string[]} fields - The fields in an index this clause should be matched against.
* @property {number} [boost=1] - Any boost that should be applied when matching this clause.
* @property {number} [editDistance] - Whether the term should have fuzzy matching applied, and how fuzzy the match should be.
* @property {boolean} [usePipeline] - Whether the term should be passed through the search pipeline.
* @property {number} [wildcard=lunr.Query.wildcard.NONE] - Whether the term should have wildcards appended or prepended.
* @property {number} [presence=lunr.Query.presence.OPTIONAL] - The terms presence in any matching documents.
*/
/**
* Adds a {@link lunr.Query~Clause} to this query.
*
* Unless the clause contains the fields to be matched all fields will be matched. In addition
* a default boost of 1 is applied to the clause.
*
* @param {lunr.Query~Clause} clause - The clause to add to this query.
* @see lunr.Query~Clause
* @returns {lunr.Query}
*/
lunr.Query.prototype.clause = function (clause) {
if (!('fields' in clause)) {
clause.fields = this.allFields
}
if (!('boost' in clause)) {
clause.boost = 1
}
if (!('usePipeline' in clause)) {
clause.usePipeline = true
}
if (!('wildcard' in clause)) {
clause.wildcard = lunr.Query.wildcard.NONE
}
if ((clause.wildcard & lunr.Query.wildcard.LEADING) && (clause.term.charAt(0) != lunr.Query.wildcard)) {
clause.term = "*" + clause.term
}
if ((clause.wildcard & lunr.Query.wildcard.TRAILING) && (clause.term.slice(-1) != lunr.Query.wildcard)) {
clause.term = "" + clause.term + "*"
}
if (!('presence' in clause)) {
clause.presence = lunr.Query.presence.OPTIONAL
}
this.clauses.push(clause)
return this
}
/**
* A negated query is one in which every clause has a presence of
* prohibited. These queries require some special processing to return
* the expected results.
*
* @returns boolean
*/
lunr.Query.prototype.isNegated = function () {
for (var i = 0; i < this.clauses.length; i++) {
if (this.clauses[i].presence != lunr.Query.presence.PROHIBITED) {
return false
}
}
return true
}
/**
* Adds a term to the current query, under the covers this will create a {@link lunr.Query~Clause}
* to the list of clauses that make up this query.
*
* The term is used as is, i.e. no tokenization will be performed by this method. Instead conversion
* to a token or token-like string should be done before calling this method.
*
* The term will be converted to a string by calling `toString`. Multiple terms can be passed as an
* array, each term in the array will share the same options.
*
* @param {object|object[]} term - The term(s) to add to the query.
* @param {object} [options] - Any additional properties to add to the query clause.
* @returns {lunr.Query}
* @see lunr.Query#clause
* @see lunr.Query~Clause
* @example <caption>adding a single term to a query</caption>
* query.term("foo")
* @example <caption>adding a single term to a query and specifying search fields, term boost and automatic trailing wildcard</caption>
* query.term("foo", {
* fields: ["title"],
* boost: 10,
* wildcard: lunr.Query.wildcard.TRAILING
* })
* @example <caption>using lunr.tokenizer to convert a string to tokens before using them as terms</caption>
* query.term(lunr.tokenizer("foo bar"))
*/
lunr.Query.prototype.term = function (term, options) {
if (Array.isArray(term)) {
term.forEach(function (t) { this.term(t, lunr.utils.clone(options)) }, this)
return this
}
var clause = options || {}
clause.term = term.toString()
this.clause(clause)
return this
}
================================================
FILE: lib/query_lexer.js
================================================
lunr.QueryLexer = function (str) {
this.lexemes = []
this.str = str
this.length = str.length
this.pos = 0
this.start = 0
this.escapeCharPositions = []
}
lunr.QueryLexer.prototype.run = function () {
var state = lunr.QueryLexer.lexText
while (state) {
state = state(this)
}
}
lunr.QueryLexer.prototype.sliceString = function () {
var subSlices = [],
sliceStart = this.start,
sliceEnd = this.pos
for (var i = 0; i < this.escapeCharPositions.length; i++) {
sliceEnd = this.escapeCharPositions[i]
subSlices.push(this.str.slice(sliceStart, sliceEnd))
sliceStart = sliceEnd + 1
}
subSlices.push(this.str.slice(sliceStart, this.pos))
this.escapeCharPositions.length = 0
return subSlices.join('')
}
lunr.QueryLexer.prototype.emit = function (type) {
this.lexemes.push({
type: type,
str: this.sliceString(),
start: this.start,
end: this.pos
})
this.start = this.pos
}
lunr.QueryLexer.prototype.escapeCharacter = function () {
this.escapeCharPositions.push(this.pos - 1)
this.pos += 1
}
lunr.QueryLexer.prototype.next = function () {
if (this.pos >= this.length) {
return lunr.QueryLexer.EOS
}
var char = this.str.charAt(this.pos)
this.pos += 1
return char
}
lunr.QueryLexer.prototype.width = function () {
return this.pos - this.start
}
lunr.QueryLexer.prototype.ignore = function () {
if (this.start == this.pos) {
this.pos += 1
}
this.start = this.pos
}
lunr.QueryLexer.prototype.backup = function () {
this.pos -= 1
}
lunr.QueryLexer.prototype.acceptDigitRun = function () {
var char, charCode
do {
char = this.next()
charCode = char.charCodeAt(0)
} while (charCode > 47 && charCode < 58)
if (char != lunr.QueryLexer.EOS) {
this.backup()
}
}
lunr.QueryLexer.prototype.more = function () {
return this.pos < this.length
}
lunr.QueryLexer.EOS = 'EOS'
lunr.QueryLexer.FIELD = 'FIELD'
lunr.QueryLexer.TERM = 'TERM'
lunr.QueryLexer.EDIT_DISTANCE = 'EDIT_DISTANCE'
lunr.QueryLexer.BOOST = 'BOOST'
lunr.QueryLexer.PRESENCE = 'PRESENCE'
lunr.QueryLexer.lexField = function (lexer) {
lexer.backup()
lexer.emit(lunr.QueryLexer.FIELD)
lexer.ignore()
return lunr.QueryLexer.lexText
}
lunr.QueryLexer.lexTerm = function (lexer) {
if (lexer.width() > 1) {
lexer.backup()
lexer.emit(lunr.QueryLexer.TERM)
}
lexer.ignore()
if (lexer.more()) {
return lunr.QueryLexer.lexText
}
}
lunr.QueryLexer.lexEditDistance = function (lexer) {
lexer.ignore()
lexer.acceptDigitRun()
lexer.emit(lunr.QueryLexer.EDIT_DISTANCE)
return lunr.QueryLexer.lexText
}
lunr.QueryLexer.lexBoost = function (lexer) {
lexer.ignore()
lexer.acceptDigitRun()
lexer.emit(lunr.QueryLexer.BOOST)
return lunr.QueryLexer.lexText
}
lunr.QueryLexer.lexEOS = function (lexer) {
if (lexer.width() > 0) {
lexer.emit(lunr.QueryLexer.TERM)
}
}
// This matches the separator used when tokenising fields
// within a document. These should match otherwise it is
// not possible to search for some tokens within a document.
//
// It is possible for the user to change the separator on the
// tokenizer so it _might_ clash with any other of the special
// characters already used within the search string, e.g. :.
//
// This means that it is possible to change the separator in
// such a way that makes some words unsearchable using a search
// string.
lunr.QueryLexer.termSeparator = lunr.tokenizer.separator
lunr.QueryLexer.lexText = function (lexer) {
while (true) {
var char = lexer.next()
if (char == lunr.QueryLexer.EOS) {
return lunr.QueryLexer.lexEOS
}
// Escape character is '\'
if (char.charCodeAt(0) == 92) {
lexer.escapeCharacter()
continue
}
if (char == ":") {
return lunr.QueryLexer.lexField
}
if (char == "~") {
lexer.backup()
if (lexer.width() > 0) {
lexer.emit(lunr.QueryLexer.TERM)
}
return lunr.QueryLexer.lexEditDistance
}
if (char == "^") {
lexer.backup()
if (lexer.width() > 0) {
lexer.emit(lunr.QueryLexer.TERM)
}
return lunr.QueryLexer.lexBoost
}
// "+" indicates term presence is required
// checking for length to ensure that only
// leading "+" are considered
if (char == "+" && lexer.width() === 1) {
lexer.emit(lunr.QueryLexer.PRESENCE)
return lunr.QueryLexer.lexText
}
// "-" indicates term presence is prohibited
// checking for length to ensure that only
// leading "-" are considered
if (char == "-" && lexer.width() === 1) {
lexer.emit(lunr.QueryLexer.PRESENCE)
return lunr.QueryLexer.lexText
}
if (char.match(lunr.QueryLexer.termSeparator)) {
return lunr.QueryLexer.lexTerm
}
}
}
================================================
FILE: lib/query_parse_error.js
================================================
lunr.QueryParseError = function (message, start, end) {
this.name = "QueryParseError"
this.message = message
this.start = start
this.end = end
}
lunr.QueryParseError.prototype = new Error
================================================
FILE: lib/query_parser.js
================================================
lunr.QueryParser = function (str, query) {
this.lexer = new lunr.QueryLexer (str)
this.query = query
this.currentClause = {}
this.lexemeIdx = 0
}
lunr.QueryParser.prototype.parse = function () {
this.lexer.run()
this.lexemes = this.lexer.lexemes
var state = lunr.QueryParser.parseClause
while (state) {
state = state(this)
}
return this.query
}
lunr.QueryParser.prototype.peekLexeme = function () {
return this.lexemes[this.lexemeIdx]
}
lunr.QueryParser.prototype.consumeLexeme = function () {
var lexeme = this.peekLexeme()
this.lexemeIdx += 1
return lexeme
}
lunr.QueryParser.prototype.nextClause = function () {
var completedClause = this.currentClause
this.query.clause(completedClause)
this.currentClause = {}
}
lunr.QueryParser.parseClause = function (parser) {
var lexeme = parser.peekLexeme()
if (lexeme == undefined) {
return
}
switch (lexeme.type) {
case lunr.QueryLexer.PRESENCE:
return lunr.QueryParser.parsePresence
case lunr.QueryLexer.FIELD:
return lunr.QueryParser.parseField
case lunr.QueryLexer.TERM:
return lunr.QueryParser.parseTerm
default:
var errorMessage = "expected either a field or a term, found " + lexeme.type
if (lexeme.str.length >= 1) {
errorMessage += " with value '" + lexeme.str + "'"
}
throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end)
}
}
lunr.QueryParser.parsePresence = function (parser) {
var lexeme = parser.consumeLexeme()
if (lexeme == undefined) {
return
}
switch (lexeme.str) {
case "-":
parser.currentClause.presence = lunr.Query.presence.PROHIBITED
break
case "+":
parser.currentClause.presence = lunr.Query.presence.REQUIRED
break
default:
var errorMessage = "unrecognised presence operator'" + lexeme.str + "'"
throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end)
}
var nextLexeme = parser.peekLexeme()
if (nextLexeme == undefined) {
var errorMessage = "expecting term or field, found nothing"
throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end)
}
switch (nextLexeme.type) {
case lunr.QueryLexer.FIELD:
return lunr.QueryParser.parseField
case lunr.QueryLexer.TERM:
return lunr.QueryParser.parseTerm
default:
var errorMessage = "expecting term or field, found '" + nextLexeme.type + "'"
throw new lunr.QueryParseError (errorMessage, nextLexeme.start, nextLexeme.end)
}
}
lunr.QueryParser.parseField = function (parser) {
var lexeme = parser.consumeLexeme()
if (lexeme == undefined) {
return
}
if (parser.query.allFields.indexOf(lexeme.str) == -1) {
var possibleFields = parser.query.allFields.map(function (f) { return "'" + f + "'" }).join(', '),
errorMessage = "unrecognised field '" + lexeme.str + "', possible fields: " + possibleFields
throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end)
}
parser.currentClause.fields = [lexeme.str]
var nextLexeme = parser.peekLexeme()
if (nextLexeme == undefined) {
var errorMessage = "expecting term, found nothing"
throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end)
}
switch (nextLexeme.type) {
case lunr.QueryLexer.TERM:
return lunr.QueryParser.parseTerm
default:
var errorMessage = "expecting term, found '" + nextLexeme.type + "'"
throw new lunr.QueryParseError (errorMessage, nextLexeme.start, nextLexeme.end)
}
}
lunr.QueryParser.parseTerm = function (parser) {
var lexeme = parser.consumeLexeme()
if (lexeme == undefined) {
return
}
parser.currentClause.term = lexeme.str.toLowerCase()
if (lexeme.str.indexOf("*") != -1) {
parser.currentClause.usePipeline = false
}
var nextLexeme = parser.peekLexeme()
if (nextLexeme == undefined) {
parser.nextClause()
return
}
switch (nextLexeme.type) {
case lunr.QueryLexer.TERM:
parser.nextClause()
return lunr.QueryParser.parseTerm
case lunr.QueryLexer.FIELD:
parser.nextClause()
return lunr.QueryParser.parseField
case lunr.QueryLexer.EDIT_DISTANCE:
return lunr.QueryParser.parseEditDistance
case lunr.QueryLexer.BOOST:
return lunr.QueryParser.parseBoost
case lunr.QueryLexer.PRESENCE:
parser.nextClause()
return lunr.QueryParser.parsePresence
default:
var errorMessage = "Unexpected lexeme type '" + nextLexeme.type + "'"
throw new lunr.QueryParseError (errorMessage, nextLexeme.start, nextLexeme.end)
}
}
lunr.QueryParser.parseEditDistance = function (parser) {
var lexeme = parser.consumeLexeme()
if (lexeme == undefined) {
return
}
var editDistance = parseInt(lexeme.str, 10)
if (isNaN(editDistance)) {
var errorMessage = "edit distance must be numeric"
throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end)
}
parser.currentClause.editDistance = editDistance
var nextLexeme = parser.peekLexeme()
if (nextLexeme == undefined) {
parser.nextClause()
return
}
switch (nextLexeme.type) {
case lunr.QueryLexer.TERM:
parser.nextClause()
return lunr.QueryParser.parseTerm
case lunr.QueryLexer.FIELD:
parser.nextClause()
return lunr.QueryParser.parseField
case lunr.QueryLexer.EDIT_DISTANCE:
return lunr.QueryParser.parseEditDistance
case lunr.QueryLexer.BOOST:
return lunr.QueryParser.parseBoost
case lunr.QueryLexer.PRESENCE:
parser.nextClause()
return lunr.QueryParser.parsePresence
default:
var errorMessage = "Unexpected lexeme type '" + nextLexeme.type + "'"
throw new lunr.QueryParseError (errorMessage, nextLexeme.start, nextLexeme.end)
}
}
lunr.QueryParser.parseBoost = function (parser) {
var lexeme = parser.consumeLexeme()
if (lexeme == undefined) {
return
}
var boost = parseInt(lexeme.str, 10)
if (isNaN(boost)) {
var errorMessage = "boost must be numeric"
throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end)
}
parser.currentClause.boost = boost
var nextLexeme = parser.peekLexeme()
if (nextLexeme == undefined) {
parser.nextClause()
return
}
switch (nextLexeme.type) {
case lunr.QueryLexer.TERM:
parser.nextClause()
return lunr.QueryParser.parseTerm
case lunr.QueryLexer.FIELD:
parser.nextClause()
return lunr.QueryParser.parseField
case lunr.QueryLexer.EDIT_DISTANCE:
return lunr.QueryParser.parseEditDistance
case lunr.QueryLexer.BOOST:
return lunr.QueryParser.parseBoost
case lunr.QueryLexer.PRESENCE:
parser.nextClause()
return lunr.QueryParser.parsePresence
default:
var errorMessage = "Unexpected lexeme type '" + nextLexeme.type + "'"
throw new lunr.QueryParseError (errorMessage, nextLexeme.start, nextLexeme.end)
}
}
================================================
FILE: lib/set.js
================================================
/*!
* lunr.Set
* Copyright (C) @YEAR Oliver Nightingale
*/
/**
* A lunr set.
*
* @constructor
*/
lunr.Set = function (elements) {
this.elements = Object.create(null)
if (elements) {
this.length = elements.length
for (var i = 0; i < this.length; i++) {
this.elements[elements[i]] = true
}
} else {
this.length = 0
}
}
/**
* A complete set that contains all elements.
*
* @static
* @readonly
* @type {lunr.Set}
*/
lunr.Set.complete = {
intersect: function (other) {
return other
},
union: function () {
return this
},
contains: function () {
return true
}
}
/**
* An empty set that contains no elements.
*
* @static
* @readonly
* @type {lunr.Set}
*/
lunr.Set.empty = {
intersect: function () {
return this
},
union: function (other) {
return other
},
contains: function () {
return false
}
}
/**
* Returns true if this set contains the specified object.
*
* @param {object} object - Object whose presence in this set is to be tested.
* @returns {boolean} - True if this set contains the specified object.
*/
lunr.Set.prototype.contains = function (object) {
return !!this.elements[object]
}
/**
* Returns a new set containing only the elements that are present in both
* this set and the specified set.
*
* @param {lunr.Set} other - set to intersect with this set.
* @returns {lunr.Set} a new set that is the intersection of this and the specified set.
*/
lunr.Set.prototype.intersect = function (other) {
var a, b, elements, intersection = []
if (other === lunr.Set.complete) {
return this
}
if (other === lunr.Set.empty) {
return other
}
if (this.length < other.length) {
a = this
b = other
} else {
a = other
b = this
}
elements = Object.keys(a.elements)
for (var i = 0; i < elements.length; i++) {
var element = elements[i]
if (element in b.elements) {
intersection.push(element)
}
}
return new lunr.Set (intersection)
}
/**
* Returns a new set combining the elements of this and the specified set.
*
* @param {lunr.Set} other - set to union with this set.
* @return {lunr.Set} a new set that is the union of this and the specified set.
*/
lunr.Set.prototype.union = function (other) {
if (other === lunr.Set.complete) {
return lunr.Set.complete
}
if (other === lunr.Set.empty) {
return this
}
return new lunr.Set(Object.keys(this.elements).concat(Object.keys(other.elements)))
}
================================================
FILE: lib/stemmer.js
================================================
/* eslint-disable */
/*!
* lunr.stemmer
* Copyright (C) @YEAR Oliver Nightingale
* Includes code from - http://tartarus.org/~martin/PorterStemmer/js.txt
*/
/**
* lunr.stemmer is an english language stemmer, this is a JavaScript
* implementation of the PorterStemmer taken from http://tartarus.org/~martin
*
* @static
* @implements {lunr.PipelineFunction}
* @param {lunr.Token} token - The string to stem
* @returns {lunr.Token}
* @see {@link lunr.Pipeline}
* @function
*/
lunr.stemmer = (function(){
var step2list = {
"ational" : "ate",
"tional" : "tion",
"enci" : "ence",
"anci" : "ance",
"izer" : "ize",
"bli" : "ble",
"alli" : "al",
"entli" : "ent",
"eli" : "e",
"ousli" : "ous",
"ization" : "ize",
"ation" : "ate",
"ator" : "ate",
"alism" : "al",
"iveness" : "ive",
"fulness" : "ful",
"ousness" : "ous",
"aliti" : "al",
"iviti" : "ive",
"biliti" : "ble",
"logi" : "log"
},
step3list = {
"icate" : "ic",
"ative" : "",
"alize" : "al",
"iciti" : "ic",
"ical" : "ic",
"ful" : "",
"ness" : ""
},
c = "[^aeiou]", // consonant
v = "[aeiouy]", // vowel
C = c + "[^aeiouy]*", // consonant sequence
V = v + "[aeiou]*", // vowel sequence
mgr0 = "^(" + C + ")?" + V + C, // [C]VC... is m>0
meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$", // [C]VC[V] is m=1
mgr1 = "^(" + C + ")?" + V + C + V + C, // [C]VCVC... is m>1
s_v = "^(" + C + ")?" + v; // vowel in stem
var re_mgr0 = new RegExp(mgr0);
var re_mgr1 = new RegExp(mgr1);
var re_meq1 = new RegExp(meq1);
var re_s_v = new RegExp(s_v);
var re_1a = /^(.+?)(ss|i)es$/;
var re2_1a = /^(.+?)([^s])s$/;
var re_1b = /^(.+?)eed$/;
var re2_1b = /^(.+?)(ed|ing)$/;
var re_1b_2 = /.$/;
var re2_1b_2 = /(at|bl|iz)$/;
var re3_1b_2 = new RegExp("([^aeiouylsz])\\1$");
var re4_1b_2 = new RegExp("^" + C + v + "[^aeiouwxy]$");
var re_1c = /^(.+?[^aeiou])y$/;
var re_2 = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/;
var re_3 = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/;
var re_4 = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/;
var re2_4 = /^(.+?)(s|t)(ion)$/;
var re_5 = /^(.+?)e$/;
var re_5_1 = /ll$/;
var re3_5 = new RegExp("^" + C + v + "[^aeiouwxy]$");
var porterStemmer = function porterStemmer(w) {
var stem,
suffix,
firstch,
re,
re2,
re3,
re4;
if (w.length < 3) { return w; }
firstch = w.substr(0,1);
if (firstch == "y") {
w = firstch.toUpperCase() + w.substr(1);
}
// Step 1a
re = re_1a
re2 = re2_1a;
if (re.test(w)) { w = w.replace(re,"$1$2"); }
else if (re2.test(w)) { w = w.replace(re2,"$1$2"); }
// Step 1b
re = re_1b;
re2 = re2_1b;
if (re.test(w)) {
var fp = re.exec(w);
re = re_mgr0;
if (re.test(fp[1])) {
re = re_1b_2;
w = w.replace(re,"");
}
} else if (re2.test(w)) {
var fp = re2.exec(w);
stem = fp[1];
re2 = re_s_v;
if (re2.test(stem)) {
w = stem;
re2 = re2_1b_2;
re3 = re3_1b_2;
re4 = re4_1b_2;
if (re2.test(w)) { w = w + "e"; }
else if (re3.test(w)) { re = re_1b_2; w = w.replace(re,""); }
else if (re4.test(w)) { w = w + "e"; }
}
}
// Step 1c - replace suffix y or Y by i if preceded by a non-vowel which is not the first letter of the word (so cry -> cri, by -> by, say -> say)
re = re_1c;
if (re.test(w)) {
var fp = re.exec(w);
stem = fp[1];
w = stem + "i";
}
// Step 2
re = re_2;
if (re.test(w)) {
var fp = re.exec(w);
stem = fp[1];
suffix = fp[2];
re = re_mgr0;
if (re.test(stem)) {
w = stem + step2list[suffix];
}
}
// Step 3
re = re_3;
if (re.test(w)) {
var fp = re.exec(w);
stem = fp[1];
suffix = fp[2];
re = re_mgr0;
if (re.test(stem)) {
w = stem + step3list[suffix];
}
}
// Step 4
re = re_4;
re2 = re2_4;
if (re.test(w)) {
var fp = re.exec(w);
stem = fp[1];
re = re_mgr1;
if (re.test(stem)) {
w = stem;
}
} else if (re2.test(w)) {
var fp = re2.exec(w);
stem = fp[1] + fp[2];
re2 = re_mgr1;
if (re2.test(stem)) {
w = stem;
}
}
// Step 5
re = re_5;
if (re.test(w)) {
var fp = re.exec(w);
stem = fp[1];
re = re_mgr1;
re2 = re_meq1;
re3 = re3_5;
if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) {
w = stem;
}
}
re = re_5_1;
re2 = re_mgr1;
if (re.test(w) && re2.test(w)) {
re = re_1b_2;
w = w.replace(re,"");
}
// and turn initial Y back to y
if (firstch == "y") {
w = firstch.toLowerCase() + w.substr(1);
}
return w;
};
return function (token) {
return token.update(porterStemmer);
}
})();
lunr.Pipeline.registerFunction(lunr.stemmer, 'stemmer')
================================================
FILE: lib/stop_word_filter.js
================================================
/*!
* lunr.stopWordFilter
* Copyright (C) @YEAR Oliver Nightingale
*/
/**
* lunr.generateStopWordFilter builds a stopWordFilter function from the provided
* list of stop words.
*
* The built in lunr.stopWordFilter is built using this generator and can be used
* to generate custom stopWordFilters for applications or non English languages.
*
* @function
* @param {Array} token The token to pass through the filter
* @returns {lunr.PipelineFunction}
* @see lunr.Pipeline
* @see lunr.stopWordFilter
*/
lunr.generateStopWordFilter = function (stopWords) {
var words = stopWords.reduce(function (memo, stopWord) {
memo[stopWord] = stopWord
return memo
}, {})
return function (token) {
if (token && words[token.toString()] !== token.toString()) return token
}
}
/**
* lunr.stopWordFilter is an English language stop word list filter, any words
* contained in the list will not be passed through the filter.
*
* This is intended to be used in the Pipeline. If the token does not pass the
* filter then undefined will be returned.
*
* @function
* @implements {lunr.PipelineFunction}
* @params {lunr.Token} token - A token to check for being a stop word.
* @returns {lunr.Token}
* @see {@link lunr.Pipeline}
*/
lunr.stopWordFilter = lunr.generateStopWordFilter([
'a',
'able',
'about',
'across',
'after',
'all',
'almost',
'also',
'am',
'among',
'an',
'and',
'any',
'are',
'as',
'at',
'be',
'because',
'been',
'but',
'by',
'can',
'cannot',
'could',
'dear',
'did',
'do',
'does',
'either',
'else',
'ever',
'every',
'for',
'from',
'get',
'got',
'had',
'has',
'have',
'he',
'her',
'hers',
'him',
'his',
'how',
'however',
'i',
'if',
'in',
'into',
'is',
'it',
'its',
'just',
'least',
'let',
'like',
'likely',
'may',
'me',
'might',
'most',
'must',
'my',
'neither',
'no',
'nor',
'not',
'of',
'off',
'often',
'on',
'only',
'or',
'other',
'our',
'own',
'rather',
'said',
'say',
'says',
'she',
'should',
'since',
'so',
'some',
'than',
'that',
'the',
'their',
'them',
'then',
'there',
'these',
'they',
'this',
'tis',
'to',
'too',
'twas',
'us',
'wants',
'was',
'we',
'were',
'what',
'when',
'where',
'which',
'while',
'who',
'whom',
'why',
'will',
'with',
'would',
'yet',
'you',
'your'
])
lunr.Pipeline.registerFunction(lunr.stopWordFilter, 'stopWordFilter')
================================================
FILE: lib/token.js
================================================
/**
* A token wraps a string representation of a token
* as it is passed through the text processing pipeline.
*
* @constructor
* @param {string} [str=''] - The string token being wrapped.
* @param {object} [metadata={}] - Metadata associated with this token.
*/
lunr.Token = function (str, metadata) {
this.str = str || ""
this.metadata = metadata || {}
}
/**
* Returns the token string that is being wrapped by this object.
*
* @returns {string}
*/
lunr.Token.prototype.toString = function () {
return this.str
}
/**
* A token update function is used when updating or optionally
* when cloning a token.
*
* @callback lunr.Token~updateFunction
* @param {string} str - The string representation of the token.
* @param {Object} metadata - All metadata associated with this token.
*/
/**
* Applies the given function to the wrapped string token.
*
* @example
* token.update(function (str, metadata) {
* return str.toUpperCase()
* })
*
* @param {lunr.Token~updateFunction} fn - A function to apply to the token string.
* @returns {lunr.Token}
*/
lunr.Token.prototype.update = function (fn) {
this.str = fn(this.str, this.metadata)
return this
}
/**
* Creates a clone of this token. Optionally a function can be
* applied to the cloned token.
*
* @param {lunr.Token~updateFunction} [fn] - An optional function to apply to the cloned token.
* @returns {lunr.Token}
*/
lunr.Token.prototype.clone = function (fn) {
fn = fn || function (s) { return s }
return new lunr.Token (fn(this.str, this.metadata), this.metadata)
}
================================================
FILE: lib/token_set.js
================================================
/*!
* lunr.TokenSet
* Copyright (C) @YEAR Oliver Nightingale
*/
/**
* A token set is used to store the unique list of all tokens
* within an index. Token sets are also used to represent an
* incoming query to the index, this query token set and index
* token set are then intersected to find which tokens to look
* up in the inverted index.
*
* A token set can hold multiple tokens, as in the case of the
* index token set, or it can hold a single token as in the
* case of a simple query token set.
*
* Additionally token sets are used to perform wildcard matching.
* Leading, contained and trailing wildcards are supported, and
* from this edit distance matching can also be provided.
*
* Token sets are implemented as a minimal finite state automata,
* where both common prefixes and suffixes are shared between tokens.
* This helps to reduce the space used for storing the token set.
*
* @constructor
*/
lunr.TokenSet = function () {
this.final = false
this.edges = {}
this.id = lunr.TokenSet._nextId
lunr.TokenSet._nextId += 1
}
/**
* Keeps track of the next, auto increment, identifier to assign
* to a new tokenSet.
*
* TokenSets require a unique identifier to be correctly minimised.
*
* @private
*/
lunr.TokenSet._nextId = 1
/**
* Creates a TokenSet instance from the given sorted array of words.
*
* @param {String[]} arr - A sorted array of strings to create the set from.
* @returns {lunr.TokenSet}
* @throws Will throw an error if the input array is not sorted.
*/
lunr.TokenSet.fromArray = function (arr) {
var builder = new lunr.TokenSet.Builder
for (var i = 0, len = arr.length; i < len; i++) {
builder.insert(arr[i])
}
builder.finish()
return builder.root
}
/**
* Creates a token set from a query clause.
*
* @private
* @param {Object} clause - A single clause from lunr.Query.
* @param {string} clause.term - The query clause term.
* @param {number} [clause.editDistance] - The optional edit distance for the term.
* @returns {lunr.TokenSet}
*/
lunr.TokenSet.fromClause = function (clause) {
if ('editDistance' in clause) {
return lunr.TokenSet.fromFuzzyString(clause.term, clause.editDistance)
} else {
return lunr.TokenSet.fromString(clause.term)
}
}
/**
* Creates a token set representing a single string with a specified
* edit distance.
*
* Insertions, deletions, substitutions and transpositions are each
* treated as an edit distance of 1.
*
* Increasing the allowed edit distance will have a dramatic impact
* on the performance of both creating and intersecting these TokenSets.
* It is advised to keep the edit distance less than 3.
*
* @param {string} str - The string to create the token set from.
* @param {number} editDistance - The allowed edit distance to match.
* @returns {lunr.Vector}
*/
lunr.TokenSet.fromFuzzyString = function (str, editDistance) {
var root = new lunr.TokenSet
var stack = [{
node: root,
editsRemaining: editDistance,
str: str
}]
while (stack.length) {
var frame = stack.pop()
// no edit
if (frame.str.length > 0) {
var char = frame.str.charAt(0),
noEditNode
if (char in frame.node.edges) {
noEditNode = frame.node.edges[char]
} else {
noEditNode = new lunr.TokenSet
frame.node.edges[char] = noEditNode
}
if (frame.str.length == 1) {
noEditNode.final = true
}
stack.push({
node: noEditNode,
editsRemaining: frame.editsRemaining,
str: frame.str.slice(1)
})
}
if (frame.editsRemaining == 0) {
continue
}
// insertion
if ("*" in frame.node.edges) {
var insertionNode = frame.node.edges["*"]
} else {
var insertionNode = new lunr.TokenSet
frame.node.edges["*"] = insertionNode
}
if (frame.str.length == 0) {
insertionNode.final = true
}
stack.push({
node: insertionNode,
editsRemaining: frame.editsRemaining - 1,
str: frame.str
})
// deletion
// can only do a deletion if we have enough edits remaining
// and if there are characters left to delete in the string
if (frame.str.length > 1) {
stack.push({
node: frame.node,
editsRemaining: frame.editsRemaining - 1,
str: frame.str.slice(1)
})
}
// deletion
// just removing the last character from the str
if (frame.str.length == 1) {
frame.node.final = true
}
// substitution
// can only do a substitution if we have enough edits remaining
// and if there are characters left to substitute
if (frame.str.length >= 1) {
if ("*" in frame.node.edges) {
var substitutionNode = frame.node.edges["*"]
} else {
var substitutionNode = new lunr.TokenSet
frame.node.edges["*"] = substitutionNode
}
if (frame.str.length == 1) {
substitutionNode.final = true
}
stack.push({
node: substitutionNode,
editsRemaining: frame.editsRemaining - 1,
str: frame.str.slice(1)
})
}
// transposition
// can only do a transposition if there are edits remaining
// and there are enough characters to transpose
if (frame.str.length > 1) {
var charA = frame.str.charAt(0),
charB = frame.str.charAt(1),
transposeNode
if (charB in frame.node.edges) {
transposeNode = frame.node.edges[charB]
} else {
transposeNode = new lunr.TokenSet
frame.node.edges[charB] = transposeNode
}
if (frame.str.length == 1) {
transposeNode.final = true
}
stack.push({
node: transposeNode,
editsRemaining: frame.editsRemaining - 1,
str: charA + frame.str.slice(2)
})
}
}
return root
}
/**
* Creates a TokenSet from a string.
*
* The string may contain one or more wildcard characters (*)
* that will allow wildcard matching when intersecting with
* another TokenSet.
*
* @param {string} str - The string to create a TokenSet from.
* @returns {lunr.TokenSet}
*/
lunr.TokenSet.fromString = function (str) {
var node = new lunr.TokenSet,
root = node
/*
* Iterates through all characters within the passed string
* appending a node for each character.
*
* When a wildcard character is found then a self
* referencing edge is introduced to continually match
* any number of any characters.
*/
for (var i = 0, len = str.length; i < len; i++) {
var char = str[i],
final = (i == len - 1)
if (char == "*") {
node.edges[char] = node
node.final = final
} else {
var next = new lunr.TokenSet
next.final = final
node.edges[char] = next
node = next
}
}
return root
}
/**
* Converts this TokenSet into an array of strings
* contained within the TokenSet.
*
* This is not intended to be used on a TokenSet that
* contains wildcards, in these cases the results are
* undefined and are likely to cause an infinite loop.
*
* @returns {string[]}
*/
lunr.TokenSet.prototype.toArray = function () {
var words = []
var stack = [{
prefix: "",
node: this
}]
while (stack.length) {
var frame = stack.pop(),
edges = Object.keys(frame.node.edges),
len = edges.length
if (frame.node.final) {
/* In Safari, at this point the prefix is sometimes corrupted, see:
* https://github.com/olivernn/lunr.js/issues/279 Calling any
* String.prototype method forces Safari to "cast" this string to what
* it's supposed to be, fixing the bug. */
frame.prefix.charAt(0)
words.push(frame.prefix)
}
for (var i = 0; i < len; i++) {
var edge = edges[i]
stack.push({
prefix: frame.prefix.concat(edge),
node: frame.node.edges[edge]
})
}
}
return words
}
/**
* Generates a string representation of a TokenSet.
*
* This is intended to allow TokenSets to be used as keys
* in objects, largely to aid the construction and minimisation
* of a TokenSet. As such it is not designed to be a human
* friendly representation of the TokenSet.
*
* @returns {string}
*/
lunr.TokenSet.prototype.toString = function () {
// NOTE: Using Object.keys here as this.edges is very likely
// to enter 'hash-mode' with many keys being added
//
// avoiding a for-in loop here as it leads to the function
// being de-optimised (at least in V8). From some simple
// benchmarks the performance is comparable, but allowing
// V8 to optimize may mean easy performance wins in the future.
if (this._str) {
return this._str
}
var str = this.final ? '1' : '0',
labels = Object.keys(this.edges).sort(),
len = labels.length
for (var i = 0; i < len; i++) {
var label = labels[i],
node = this.edges[label]
str = str + label + node.id
}
return str
}
/**
* Returns a new TokenSet that is the intersection of
* this TokenSet and the passed TokenSet.
*
* This intersection will take into account any wildcards
* contained within the TokenSet.
*
* @param {lunr.TokenSet} b - An other TokenSet to intersect with.
* @returns {lunr.TokenSet}
*/
lunr.TokenSet.prototype.intersect = function (b) {
var output = new lunr.TokenSet,
frame = undefined
var stack = [{
qNode: b,
output: output,
node: this
}]
while (stack.length) {
frame = stack.pop()
// NOTE: As with the #toString method, we are using
// Object.keys and a for loop instead of a for-in loop
// as both of these objects enter 'hash' mode, causing
// the function to be de-optimised in V8
var qEdges = Object.keys(frame.qNode.edges),
qLen = qEdges.length,
nEdges = Object.keys(frame.node.edges),
nLen = nEdges.length
for (var q = 0; q < qLen; q++) {
var qEdge = qEdges[q]
for (var n = 0; n < nLen; n++) {
var nEdge = nEdges[n]
if (nEdge == qEdge || qEdge == '*') {
var node = frame.node.edges[nEdge],
qNode = frame.qNode.edges[qEdge],
final = node.final && qNode.final,
next = undefined
if (nEdge in frame.output.edges) {
// an edge already exists for this character
// no need to create a new node, just set the finality
// bit unless this node is already final
next = frame.output.edges[nEdge]
next.final = next.final || final
} else {
// no edge exists yet, must create one
// set the finality bit and insert it
// into the output
next = new lunr.TokenSet
next.final = final
frame.output.edges[nEdge] = next
}
stack.push({
qNode: qNode,
output: next,
node: node
})
}
}
}
}
return output
}
================================================
FILE: lib/token_set_builder.js
================================================
lunr.TokenSet.Builder = function () {
this.previousWord = ""
this.root = new lunr.TokenSet
this.uncheckedNodes = []
this.minimizedNodes = {}
}
lunr.TokenSet.Builder.prototype.insert = function (word) {
var node,
commonPrefix = 0
if (word < this.previousWord) {
throw new Error ("Out of order word insertion")
}
for (var i = 0; i < word.length && i < this.previousWord.length; i++) {
if (word[i] != this.previousWord[i]) break
commonPrefix++
}
this.minimize(commonPrefix)
if (this.uncheckedNodes.length == 0) {
node = this.root
} else {
node = this.uncheckedNodes[this.uncheckedNodes.length - 1].child
}
for (var i = commonPrefix; i < word.length; i++) {
var nextNode = new lunr.TokenSet,
char = word[i]
node.edges[char] = nextNode
this.uncheckedNodes.push({
parent: node,
char: char,
child: nextNode
})
node = nextNode
}
node.final = true
this.previousWord = word
}
lunr.TokenSet.Builder.prototype.finish = function () {
this.minimize(0)
}
lunr.TokenSet.Builder.prototype.minimize = function (downTo) {
for (var i = this.uncheckedNodes.length - 1; i >= downTo; i--) {
var node = this.uncheckedNodes[i],
childKey = node.child.toString()
if (childKey in this.minimizedNodes) {
node.parent.edges[node.char] = this.minimizedNodes[childKey]
} else {
// Cache the key for this node since
// we know it can't change anymore
node.child._str = childKey
this.minimizedNodes[childKey] = node.child
}
this.uncheckedNodes.pop()
}
}
================================================
FILE: lib/tokenizer.js
================================================
/*!
* lunr.tokenizer
* Copyright (C) @YEAR Oliver Nightingale
*/
/**
* A function for splitting a string into tokens ready to be inserted into
* the search index. Uses `lunr.tokenizer.separator` to split strings, change
* the value of this property to change how strings are split into tokens.
*
* This tokenizer will convert its parameter to a string by calling `toString` and
* then will split this string on the character in `lunr.tokenizer.separator`.
* Arrays will have their elements converted to strings and wrapped in a lunr.Token.
*
* Optional metadata can be passed to the tokenizer, this metadata will be cloned and
* added as metadata to every token that is created from the object to be tokenized.
*
* @static
* @param {?(string|object|object[])} obj - The object to convert into tokens
* @param {?object} metadata - Optional metadata to associate with every token
* @returns {lunr.Token[]}
* @see {@link lunr.Pipeline}
*/
lunr.tokenizer = function (obj, metadata) {
if (obj == null || obj == undefined) {
return []
}
if (Array.isArray(obj)) {
return obj.map(function (t) {
return new lunr.Token(
lunr.utils.asString(t).toLowerCase(),
lunr.utils.clone(metadata)
)
})
}
var str = obj.toString().toLowerCase(),
len = str.length,
tokens = []
for (var sliceEnd = 0, sliceStart = 0; sliceEnd <= len; sliceEnd++) {
var char = str.charAt(sliceEnd),
sliceLength = sliceEnd - sliceStart
if ((char.match(lunr.tokenizer.separator) || sliceEnd == len)) {
if (sliceLength > 0) {
var tokenMetadata = lunr.utils.clone(metadata) || {}
tokenMetadata["position"] = [sliceStart, sliceLength]
tokenMetadata["index"] = tokens.length
tokens.push(
new lunr.Token (
str.slice(sliceStart, sliceEnd),
tokenMetadata
)
)
}
sliceStart = sliceEnd + 1
}
}
return tokens
}
/**
* The separator used to split a string into tokens. Override this property to change the behaviour of
* `lunr.tokenizer` behaviour when tokenizing strings. By default this splits on whitespace and hyphens.
*
* @static
* @see lunr.tokenizer
*/
lunr.tokenizer.separator = /[\s\-]+/
================================================
FILE: lib/trimmer.js
================================================
/*!
* lunr.trimmer
* Copyright (C) @YEAR Oliver Nightingale
*/
/**
* lunr.trimmer is a pipeline function for trimming non word
* characters from the beginning and end of tokens before they
* enter the index.
*
* This implementation may not work correctly for non latin
* characters and should either be removed or adapted for use
* with languages with non-latin characters.
*
* @static
* @implements {lunr.PipelineFunction}
* @param {lunr.Token} token The token to pass through the filter
* @returns {lunr.Token}
* @see lunr.Pipeline
*/
lunr.trimmer = function (token) {
return token.update(function (s) {
return s.replace(/^\W+/, '').replace(/\W+$/, '')
})
}
lunr.Pipeline.registerFunction(lunr.trimmer, 'trimmer')
================================================
FILE: lib/utils.js
================================================
/*!
* lunr.utils
* Copyright (C) @YEAR Oliver Nightingale
*/
/**
* A namespace containing utils for the rest of the lunr library
* @namespace lunr.utils
*/
lunr.utils = {}
/**
* Print a warning message to the console.
*
* @param {String} message The message to be printed.
* @memberOf lunr.utils
* @function
*/
lunr.utils.warn = (function (global) {
/* eslint-disable no-console */
return function (message) {
if (global.console && console.warn) {
console.warn(message)
}
}
/* eslint-enable no-console */
})(this)
/**
* Convert an object to a string.
*
* In the case of `null` and `undefined` the function returns
* the empty string, in all other cases the result of calling
* `toString` on the passed object is returned.
*
* @param {Any} obj The object to convert to a string.
* @return {String} string representation of the passed object.
* @memberOf lunr.utils
*/
lunr.utils.asString = function (obj) {
if (obj === void 0 || obj === null) {
return ""
} else {
return obj.toString()
}
}
/**
* Clones an object.
*
* Will create a copy of an existing object such that any mutations
* on the copy cannot affect the original.
*
* Only shallow objects are supported, passing a nested object to this
* function will cause a TypeError.
*
* Objects with primitives, and arrays of primitives are supported.
*
* @param {Object} obj The object to clone.
* @return {Object} a clone of the passed object.
* @throws {TypeError} when a nested object is passed.
* @memberOf Utils
*/
lunr.utils.clone = function (obj) {
if (obj === null || obj === undefined) {
return obj
}
var clone = Object.create(null),
keys = Object.keys(obj)
for (var i = 0; i < keys.length; i++) {
var key = keys[i],
val = obj[key]
if (Array.isArray(val)) {
clone[key] = val.slice()
continue
}
if (typeof val === 'string' ||
typeof val === 'number' ||
typeof val === 'boolean') {
clone[key] = val
continue
}
throw new TypeError("clone is not deep and does not support nested objects")
}
return clone
}
================================================
FILE: lib/vector.js
================================================
/*!
* lunr.Vector
* Copyright (C) @YEAR Oliver Nightingale
*/
/**
* A vector is used to construct the vector space of documents and queries. These
* vectors support operations to determine the similarity between two documents or
* a document and a query.
*
* Normally no parameters are required for initializing a vector, but in the case of
* loading a previously dumped vector the raw elements can be provided to the constructor.
*
* For performance reasons vectors are implemented with a flat array, where an elements
* index is immediately followed by its value. E.g. [index, value, index, value]. This
* allows the underlying array to be as sparse as possible and still offer decent
* performance when being used for vector calculations.
*
* @constructor
* @param {Number[]} [elements] - The flat list of element index and element value pairs.
*/
lunr.Vector = function (elements) {
this._magnitude = 0
this.elements = elements || []
}
/**
* Calculates the position within the vector to insert a given index.
*
* This is used internally by insert and upsert. If there are duplicate indexes then
* the position is returned as if the value for that index were to be updated, but it
* is the callers responsibility to check whether there is a duplicate at that index
*
* @param {Number} insertIdx - The index at which the element should be inserted.
* @returns {Number}
*/
lunr.Vector.prototype.positionForIndex = function (index) {
// For an empty vector the tuple can be inserted at the beginning
if (this.elements.length == 0) {
return 0
}
var start = 0,
end = this.elements.length / 2,
sliceLength = end - start,
pivotPoint = Math.floor(sliceLength / 2),
pivotIndex = this.elements[pivotPoint * 2]
while (sliceLength > 1) {
if (pivotIndex < index) {
start = pivotPoint
}
if (pivotIndex > index) {
end = pivotPoint
}
if (pivotIndex == index) {
break
}
sliceLength = end - start
pivotPoint = start + Math.floor(sliceLength / 2)
pivotIndex = this.elements[pivotPoint * 2]
}
if (pivotIndex == index) {
return pivotPoint * 2
}
if (pivotIndex > index) {
return pivotPoint * 2
}
if (pivotIndex < index) {
return (pivotPoint + 1) * 2
}
}
/**
* Inserts an element at an index within the vector.
*
* Does not allow duplicates, will throw an error if there is already an entry
* for this index.
*
* @param {Number} insertIdx - The index at which the element should be inserted.
* @param {Number} val - The value to be inserted into the vector.
*/
lunr.Vector.prototype.insert = function (insertIdx, val) {
this.upsert(insertIdx, val, function () {
throw "duplicate index"
})
}
/**
* Inserts or updates an existing index within the vector.
*
* @param {Number} insertIdx - The index at which the element should be inserted.
* @param {Number} val - The value to be inserted into the vector.
* @param {function} fn - A function that is called for updates, the existing value and the
* requested value are passed as arguments
*/
lunr.Vector.prototype.upsert = function (insertIdx, val, fn) {
this._magnitude = 0
var position = this.positionForIndex(insertIdx)
if (this.elements[position] == insertIdx) {
this.elements[position + 1] = fn(this.elements[position + 1], val)
} else {
this.elements.splice(position, 0, insertIdx, val)
}
}
/**
* Calculates the magnitude of this vector.
*
* @returns {Number}
*/
lunr.Vector.prototype.magnitude = function () {
if (this._magnitude) return this._magnitude
var sumOfSquares = 0,
elementsLength = this.elements.length
for (var i = 1; i < elementsLength; i += 2) {
var val = this.elements[i]
sumOfSquares += val * val
}
return this._magnitude = Math.sqrt(sumOfSquares)
}
/**
* Calculates the dot product of this vector and another vector.
*
* @param {lunr.Vector} otherVector - The vector to compute the dot product with.
* @returns {Number}
*/
lunr.Vector.prototype.dot = function (otherVector) {
var dotProduct = 0,
a = this.elements, b = otherVector.elements,
aLen = a.length, bLen = b.length,
aVal = 0, bVal = 0,
i = 0, j = 0
while (i < aLen && j < bLen) {
aVal = a[i], bVal = b[j]
if (aVal < bVal) {
i += 2
} else if (aVal > bVal) {
j += 2
} else if (aVal == bVal) {
dotProduct += a[i + 1] * b[j + 1]
i += 2
j += 2
}
}
return dotProduct
}
/**
* Calculates the similarity between this vector and another vector.
*
* @param {lunr.Vector} otherVector - The other vector to calculate the
* similarity with.
* @returns {Number}
*/
lunr.Vector.prototype.similarity = function (otherVector) {
return this.dot(otherVector) / this.magnitude() || 0
}
/**
* Converts the vector to an array of the elements within the vector.
*
* @returns {Number[]}
*/
lunr.Vector.prototype.toArray = function () {
var output = new Array (this.elements.length / 2)
for (var i = 1, j = 0; i < this.elements.length; i += 2, j++) {
output[j] = this.elements[i]
}
return output
}
/**
* A JSON serializable representation of the vector.
*
* @returns {Number[]}
*/
lunr.Vector.prototype.toJSON = function () {
return this.elements
}
================================================
FILE: lunr.js
================================================
/**
* lunr - http://lunrjs.com - A bit like Solr, but much smaller and not as bright - 2.3.9
* Copyright (C) 2020 Oliver Nightingale
* @license MIT
*/
;(function(){
/**
* A convenience function for configuring and constructing
* a new lunr Index.
*
* A lunr.Builder instance is created and the pipeline setup
* with a trimmer, stop word filter and stemmer.
*
* This builder object is yielded to the configuration function
* that is passed as a parameter, allowing the list of fields
* and other builder parameters to be customised.
*
* All documents _must_ be added within the passed config function.
*
* @example
* var idx = lunr(function () {
* this.field('title')
* this.field('body')
* this.ref('id')
*
* documents.forEach(function (doc) {
* this.add(doc)
* }, this)
* })
*
* @see {@link lunr.Builder}
* @see {@link lunr.Pipeline}
* @see {@link lunr.trimmer}
* @see {@link lunr.stopWordFilter}
* @see {@link lunr.stemmer}
* @namespace {function} lunr
*/
var lunr = function (config) {
var builder = new lunr.Builder
builder.pipeline.add(
lunr.trimmer,
lunr.stopWordFilter,
lunr.stemmer
)
builder.searchPipeline.add(
lunr.stemmer
)
config.call(builder, builder)
return builder.build()
}
lunr.version = "2.3.9"
/*!
* lunr.utils
* Copyright (C) 2020 Oliver Nightingale
*/
/**
* A namespace containing utils for the rest of the lunr library
* @namespace lunr.utils
*/
lunr.utils = {}
/**
* Print a warning message to the console.
*
* @param {String} message The message to be printed.
* @memberOf lunr.utils
* @function
*/
lunr.utils.warn = (function (global) {
/* eslint-disable no-console */
return function (message) {
if (global.console && console.warn) {
console.warn(message)
}
}
/* eslint-enable no-console */
})(this)
/**
* Convert an object to a string.
*
* In the case of `null` and `undefined` the function returns
* the empty string, in all other cases the result of calling
* `toString` on the passed object is returned.
*
* @param {Any} obj The object to convert to a string.
* @return {String} string representation of the passed object.
* @memberOf lunr.utils
*/
lunr.utils.asString = function (obj) {
if (obj === void 0 || obj === null) {
return ""
} else {
return obj.toString()
}
}
/**
* Clones an object.
*
* Will create a copy of an existing object such that any mutations
* on the copy cannot affect the original.
*
* Only shallow objects are supported, passing a nested object to this
* function will cause a TypeError.
*
* Objects with primitives, and arrays of primitives are supported.
*
* @param {Object} obj The object to clone.
* @return {Object} a clone of the passed object.
* @throws {TypeError} when a nested object is passed.
* @memberOf Utils
*/
lunr.utils.clone = function (obj) {
if (obj === null || obj === undefined) {
return obj
}
var clone = Object.create(null),
keys = Object.keys(obj)
for (var i = 0; i < keys.length; i++) {
var key = keys[i],
val = obj[key]
if (Array.isArray(val)) {
clone[key] = val.slice()
continue
}
if (typeof val === 'string' ||
typeof val === 'number' ||
typeof val === 'boolean') {
clone[key] = val
continue
}
throw new TypeError("clone is not deep and does not support nested objects")
}
return clone
}
lunr.FieldRef = function (docRef, fieldName, stringValue) {
this.docRef = docRef
this.fieldName = fieldName
this._stringValue = stringValue
}
lunr.FieldRef.joiner = "/"
lunr.FieldRef.fromString = function (s) {
var n = s.indexOf(lunr.FieldRef.joiner)
if (n === -1) {
throw "malformed field ref string"
}
var fieldRef = s.slice(0, n),
docRef = s.slice(n + 1)
return new lunr.FieldRef (docRef, fieldRef, s)
}
lunr.FieldRef.prototype.toString = function () {
if (this._stringValue == undefined) {
this._stringValue = this.fieldName + lunr.FieldRef.joiner + this.docRef
}
return this._stringValue
}
/*!
* lunr.Set
* Copyright (C) 2020 Oliver Nightingale
*/
/**
* A lunr set.
*
* @constructor
*/
lunr.Set = function (elements) {
this.elements = Object.create(null)
if (elements) {
this.length = elements.length
for (var i = 0; i < this.length; i++) {
this.elements[elements[i]] = true
}
} else {
this.length = 0
}
}
/**
* A complete set that contains all elements.
*
* @static
* @readonly
* @type {lunr.Set}
*/
lunr.Set.complete = {
intersect: function (other) {
return other
},
union: function () {
return this
},
contains: function () {
return true
}
}
/**
* An empty set that contains no elements.
*
* @static
* @readonly
* @type {lunr.Set}
*/
lunr.Set.empty = {
intersect: function () {
return this
},
union: function (other) {
return other
},
contains: function () {
return false
}
}
/**
* Returns true if this set contains the specified object.
*
* @param {object} object - Object whose presence in this set is to be tested.
* @returns {boolean} - True if this set contains the specified object.
*/
lunr.Set.prototype.contains = function (object) {
return !!this.elements[object]
}
/**
* Returns a new set containing only the elements that are present in both
* this set and the specified set.
*
* @param {lunr.Set} other - set to intersect with this set.
* @returns {lunr.Set} a new set that is the intersection of this and the specified set.
*/
lunr.Set.prototype.intersect = function (other) {
var a, b, elements, intersection = []
if (other === lunr.Set.complete) {
return this
}
if (other === lunr.Set.empty) {
return other
}
if (this.length < other.length) {
a = this
b = other
} else {
a = other
b = this
}
elements = Object.keys(a.elements)
for (var i = 0; i < elements.length; i++) {
var element = elements[i]
if (element in b.elements) {
intersection.push(element)
}
}
return new lunr.Set (intersection)
}
/**
* Returns a new set combining the elements of this and the specified set.
*
* @param {lunr.Set} other - set to union with this set.
* @return {lunr.Set} a new set that is the union of this and the specified set.
*/
lunr.Set.prototype.union = function (other) {
if (other === lunr.Set.complete) {
return lunr.Set.complete
}
if (other === lunr.Set.empty) {
return this
}
return new lunr.Set(Object.keys(this.elements).concat(Object.keys(other.elements)))
}
/**
* A function to calculate the inverse document frequency for
* a posting. This is shared between the builder and the index
*
* @private
* @param {object} posting - The posting for a given term
* @param {number} documentCount - The total number of documents.
*/
lunr.idf = function (posting, documentCount) {
var documentsWithTerm = 0
for (var fieldName in posting) {
if (fieldName == '_index') continue // Ignore the term index, its not a field
documentsWithTerm += Object.keys(posting[fieldName]).length
}
var x = (documentCount - documentsWithTerm + 0.5) / (documentsWithTerm + 0.5)
return Math.log(1 + Math.abs(x))
}
/**
* A token wraps a string representation of a token
* as it is passed through the text processing pipeline.
*
* @constructor
* @param {string} [str=''] - The string token being wrapped.
* @param {object} [metadata={}] - Metadata associated with this token.
*/
lunr.Token = function (str, metadata) {
this.str = str || ""
this.metadata = metadata || {}
}
/**
* Returns the token string that is being wrapped by this object.
*
* @returns {string}
*/
lunr.Token.prototype.toString = function () {
return this.str
}
/**
* A token update function is used when updating or optionally
* when cloning a token.
*
* @callback lunr.Token~updateFunction
* @param {string} str - The string representation of the token.
* @param {Object} metadata - All metadata associated with this token.
*/
/**
* Applies the given function to the wrapped string token.
*
* @example
* token.update(function (str, metadata) {
* return str.toUpperCase()
* })
*
* @param {lunr.Token~updateFunction} fn - A function to apply to the token string.
* @returns {lunr.Token}
*/
lunr.Token.prototype.update = function (fn) {
this.str = fn(this.str, this.metadata)
return this
}
/**
* Creates a clone of this token. Optionally a function can be
* applied to the cloned token.
*
* @param {lunr.Token~updateFunction} [fn] - An optional function to apply to the cloned token.
* @returns {lunr.Token}
*/
lunr.Token.prototype.clone = function (fn) {
fn = fn || function (s) { return s }
return new lunr.Token (fn(this.str, this.metadata), this.metadata)
}
/*!
* lunr.tokenizer
* Copyright (C) 2020 Oliver Nightingale
*/
/**
* A function for splitting a string into tokens ready to be inserted into
* the search index. Uses `lunr.tokenizer.separator` to split strings, change
* the value of this property to change how strings are split into tokens.
*
* This tokenizer will convert its parameter to a string by calling `toString` and
* then will split this string on the character in `lunr.tokenizer.separator`.
* Arrays will have their elements converted to strings and wrapped in a lunr.Token.
*
* Optional metadata can be passed to the tokenizer, this metadata will be cloned and
* added as metadata to every token that is created from the object to be tokenized.
*
* @static
* @param {?(string|object|object[])} obj - The object to convert into tokens
* @param {?object} metadata - Optional metadata to associate with every token
* @returns {lunr.Token[]}
* @see {@link lunr.Pipeline}
*/
lunr.tokenizer = function (obj, metadata) {
if (obj == null || obj == undefined) {
return []
}
if (Array.isArray(obj)) {
return obj.map(function (t) {
return new lunr.Token(
lunr.utils.asString(t).toLowerCase(),
lunr.utils.clone(metadata)
)
})
}
var str = obj.toString().toLowerCase(),
len = str.length,
tokens = []
for (var sliceEnd = 0, sliceStart = 0; sliceEnd <= len; sliceEnd++) {
var char = str.charAt(sliceEnd),
sliceLength = sliceEnd - sliceStart
if ((char.match(lunr.tokenizer.separator) || sliceEnd == len)) {
if (sliceLength > 0) {
var tokenMetadata = lunr.utils.clone(metadata) || {}
tokenMetadata["position"] = [sliceStart, sliceLength]
tokenMetadata["index"] = tokens.length
tokens.push(
new lunr.Token (
str.slice(sliceStart, sliceEnd),
tokenMetadata
)
)
}
sliceStart = sliceEnd + 1
}
}
return tokens
}
/**
* The separator used to split a string into tokens. Override this property to change the behaviour of
* `lunr.tokenizer` behaviour when tokenizing strings. By default this splits on whitespace and hyphens.
*
* @static
* @see lunr.tokenizer
*/
lunr.tokenizer.separator = /[\s\-]+/
/*!
* lunr.Pipeline
* Copyright (C) 2020 Oliver Nightingale
*/
/**
* lunr.Pipelines maintain an ordered list of functions to be applied to all
* tokens in documents entering the search index and queries being ran against
* the index.
*
* An instance of lunr.Index created with the lunr shortcut will contain a
* pipeline with a stop word filter and an English language stemmer. Extra
* functions can be added before or after either of these functions or these
* default functions can be removed.
*
* When run the pipeline will call each function in turn, passing a token, the
* index of that token in the original list of all tokens and finally a list of
* all the original tokens.
*
* The output of functions in the pipeline will be passed to the next function
* in the pipeline. To exclude a token from entering the index the function
* should return undefined, the rest of the pipeline will not be called with
* this token.
*
* For serialisation of pipelines to work, all functions used in an instance of
* a pipeline should be registered with lunr.Pipeline. Registered functions can
* then be loaded. If trying to load a serialised pipeline that uses functions
* that are not registered an error will be thrown.
*
* If not planning on serialising the pipeline then registering pipeline functions
* is not necessary.
*
* @constructor
*/
lunr.Pipeline = function () {
this._stack = []
}
lunr.Pipeline.registeredFunctions = Object.create(null)
/**
* A pipeline function maps lunr.Token to lunr.Token. A lunr.Token contains the token
* string as well as all known metadata. A pipeline function can mutate the token string
* or mutate (or add) metadata for a given token.
*
* A pipeline function can indicate that the passed token should be discarded by returning
* null, undefined or an empty string. This token will not be passed to any downstream pipeline
* functions and will not be added to the index.
*
* Multiple tokens can be returned by returning an array of tokens. Each token will be passed
* to any downstream pipeline functions and all will returned tokens will be added to the index.
*
* Any number of pipeline functions may be chained together using a lunr.Pipeline.
*
* @interface lunr.PipelineFunction
* @param {lunr.Token} token - A token from the document being processed.
* @param {number} i - The index of this token in the complete list of tokens for this document/field.
* @param {lunr.Token[]} tokens - All tokens for this document/field.
* @returns {(?lunr.Token|lunr.Token[])}
*/
/**
* Register a function with the pipeline.
*
* Functions that are used in the pipeline should be registered if the pipeline
* needs to be serialised, or a serialised pipeline needs to be loaded.
*
* Registering a function does not add it to a pipeline, functions must still be
* added to instances of the pipeline for them to be used when running a pipeline.
*
* @param {lunr.PipelineFunction} fn - The function to check for.
* @param {String} label - The label to register this function with
*/
lunr.Pipeline.registerFunction = function (fn, label) {
if (label in this.registeredFunctions) {
lunr.utils.warn('Overwriting existing registered function: ' + label)
}
fn.label = label
lunr.Pipeline.registeredFunctions[fn.label] = fn
}
/**
* Warns if the function is not registered as a Pipeline function.
*
* @param {lunr.PipelineFunction} fn - The function to check for.
* @private
*/
lunr.Pipeline.warnIfFunctionNotRegistered = function (fn) {
var isRegistered = fn.label && (fn.label in this.registeredFunctions)
if (!isRegistered) {
lunr.utils.warn('Function is not registered with pipeline. This may cause problems when serialising the index.\n', fn)
}
}
/**
* Loads a previously serialised pipeline.
*
* All functions to be loaded must already be registered with lunr.Pipeline.
* If any function from the serialised data has not been registered then an
* error will be thrown.
*
* @param {Object} serialised - The serialised pipeline to load.
* @returns {lunr.Pipeline}
*/
lunr.Pipeline.load = function (serialised) {
var pipeline = new lunr.Pipeline
serialised.forEach(function (fnName) {
var fn = lunr.Pipeline.registeredFunctions[fnName]
if (fn) {
pipeline.add(fn)
} else {
throw new Error('Cannot load unregistered function: ' + fnName)
}
})
return pipeline
}
/**
* Adds new functions to the end of the pipeline.
*
* Logs a warning if the function has not been registered.
*
* @param {lunr.PipelineFunction[]} functions - Any number of functions to add to the pipeline.
*/
lunr.Pipeline.prototype.add = function () {
var fns = Array.prototype.slice.call(arguments)
fns.forEach(function (fn) {
lunr.Pipeline.warnIfFunctionNotRegistered(fn)
this._stack.push(fn)
}, this)
}
/**
* Adds a single function after a function that already exists in the
* pipeline.
*
* Logs a warning if the function has not been registered.
*
* @param {lunr.PipelineFunction} existingFn - A function that already exists in the pipeline.
* @param {lunr.PipelineFunction} newFn - The new function to add to the pipeline.
*/
lunr.Pipeline.prototype.after = function (existingFn, newFn) {
lunr.Pipeline.warnIfFunctionNotRegistered(newFn)
var pos = this._stack.indexOf(existingFn)
if (pos == -1) {
throw new Error('Cannot find existingFn')
}
pos = pos + 1
this._stack.splice(pos, 0, newFn)
}
/**
* Adds a single function before a function that already exists in the
* pipeline.
*
* Logs a warning if the function has not been registered.
*
* @param {lunr.PipelineFunction} existingFn - A function that already exists in the pipeline.
* @param {lunr.PipelineFunction} newFn - The new function to add to the pipeline.
*/
lunr.Pipeline.prototype.before = function (existingFn, newFn) {
lunr.Pipeline.warnIfFunctionNotRegistered(newFn)
var pos = this._stack.indexOf(existingFn)
if (pos == -1) {
throw new Error('Cannot find existingFn')
}
this._stack.splice(pos, 0, newFn)
}
/**
* Removes a function from the pipeline.
*
* @param {lunr.PipelineFunction} fn The function to remove from the pipeline.
*/
lunr.Pipeline.prototype.remove = function (fn) {
var pos = this._stack.indexOf(fn)
if (pos == -1) {
return
}
this._stack.splice(pos, 1)
}
/**
* Runs the current list of functions that make up the pipeline against the
* passed tokens.
*
* @param {Array} tokens The tokens to run through the pipeline.
* @returns {Array}
*/
lunr.Pipeline.prototype.run = function (tokens) {
var stackLength = this._stack.length
for (var i = 0; i < stackLength; i++) {
var fn = this._stack[i]
var memo = []
for (var j = 0; j < tokens.length; j++) {
var result = fn(tokens[j], j, tokens)
if (result === null || result === void 0 || result === '') continue
if (Array.isArray(result)) {
for (var k = 0; k < result.length; k++) {
memo.push(result[k])
}
} else {
memo.push(result)
}
}
tokens = memo
}
return tokens
}
/**
* Convenience method for passing a string through a pipeline and getting
* strings out. This method takes care of wrapping the passed string in a
* token and mapping the resulting tokens back to strings.
*
* @param {string} str - The string to pass through the pipeline.
* @param {?object} metadata - Optional metadata to associate with the token
* passed to the pipeline.
* @returns {string[]}
*/
lunr.Pipeline.prototype.runString = function (str, metadata) {
var token = new lunr.Token (str, metadata)
return this.run([token]).map(function (t) {
return t.toString()
})
}
/**
* Resets the pipeline by removing any existing processors.
*
*/
lunr.Pipeline.prototype.reset = function () {
this._stack = []
}
/**
* Returns a representation of the pipeline ready for serialisation.
*
* Logs a warning if the function has not been registered.
*
* @returns {Array}
*/
lunr.Pipeline.prototype.toJSON = function () {
return this._stack.map(function (fn) {
lunr.Pipeline.warnIfFunctionNotRegistered(fn)
return fn.label
})
}
/*!
* lunr.Vector
* Copyright (C) 2020 Oliver Nightingale
*/
/**
* A vector is used to construct the vector space of documents and queries. These
* vectors support operations to determine the similarity between two documents or
* a document and a query.
*
* Normally no parameters are required for initializing a vector, but in the case of
* loading a previously dumped vector the raw elements can be provided to the constructor.
*
* For performance reasons vectors are implemented with a flat array, where an elements
* index is immediately followed by its value. E.g. [index, value, index, value]. This
* allows the underlying array to be as sparse as possible and still offer decent
* performance when being used for vector calculations.
*
* @constructor
* @param {Number[]} [elements] - The flat list of element index and element value pairs.
*/
lunr.Vector = function (elements) {
this._magnitude = 0
this.elements = elements || []
}
/**
* Calculates the position within the vector to insert a given index.
*
* This is used internally by insert and upsert. If there are duplicate indexes then
* the position is returned as if the value for that index were to be updated, but it
* is the callers responsibility to check whether there is a duplicate at that index
*
* @param {Number} insertIdx - The index at which the element should be inserted.
* @returns {Number}
*/
lunr.Vector.prototype.positionForIndex = function (index) {
// For an empty vector the tuple can be inserted at the beginning
if (this.elements.length == 0) {
return 0
}
var start = 0,
end = this.elements.length / 2,
sliceLength = end - start,
pivotPoint = Math.floor(sliceLength / 2),
pivotIndex = this.elements[pivotPoint * 2]
while (sliceLength > 1) {
if (pivotIndex < index) {
start = pivotPoint
}
if (pivotIndex > index) {
end = pivotPoint
}
if (pivotIndex == index) {
break
}
sliceLength = end - start
pivotPoint = start + Math.floor(sliceLength / 2)
pivotIndex = this.elements[pivotPoint * 2]
}
if (pivotIndex == index) {
return pivotPoint * 2
}
if (pivotIndex > index) {
return pivotPoint * 2
}
if (pivotIndex < index) {
return (pivotPoint + 1) * 2
}
}
/**
* Inserts an element at an index within the vector.
*
* Does not allow duplicates, will throw an error if there is already an entry
* for this index.
*
* @param {Number} insertIdx - The index at which the element should be inserted.
* @param {Number} val - The value to be inserted into the vector.
*/
lunr.Vector.prototype.insert = function (insertIdx, val) {
this.upsert(insertIdx, val, function () {
throw "duplicate index"
})
}
/**
* Inserts or updates an existing index within the vector.
*
* @param {Number} insertIdx - The index at which the element should be inserted.
* @param {Number} val - The value to be inserted into the vector.
* @param {function} fn - A function that is called for updates, the existing value and the
* requested value are passed as arguments
*/
lunr.Vector.prototype.upsert = function (insertIdx, val, fn) {
this._magnitude = 0
var position = this.positionForIndex(insertIdx)
if (this.elements[position] == insertIdx) {
this.elements[position + 1] = fn(this.elements[position + 1], val)
} else {
this.elements.splice(position, 0, insertIdx, val)
}
}
/**
* Calculates the magnitude of this vector.
*
* @returns {Number}
*/
lunr.Vector.prototype.magnitude = function () {
if (this._magnitude) return this._magnitude
var sumOfSquares = 0,
elementsLength = this.elements.length
for (var i = 1; i < elementsLength; i += 2) {
var val = this.elements[i]
sumOfSquares += val * val
}
return this._magnitude = Math.sqrt(sumOfSquares)
}
/**
* Calculates the dot product of this vector and another vector.
*
* @param {lunr.Vector} otherVector - The vector to compute the dot product with.
* @returns {Number}
*/
lunr.Vector.prototype.dot = function (otherVector) {
var dotProduct = 0,
a = this.elements, b = otherVector.elements,
aLen = a.length, bLen = b.length,
aVal = 0, bVal = 0,
i = 0, j = 0
while (i < aLen && j < bLen) {
aVal = a[i], bVal = b[j]
if (aVal < bVal) {
i += 2
} else if (aVal > bVal) {
j += 2
} else if (aVal == bVal) {
dotProduct += a[i + 1] * b[j + 1]
i += 2
j += 2
}
}
return dotProduct
}
/**
* Calculates the similarity between this vector and another vector.
*
* @param {lunr.Vector} otherVector - The other vector to calculate the
* similarity with.
* @returns {Number}
*/
lunr.Vector.prototype.similarity = function (otherVector) {
return this.dot(otherVector) / this.magnitude() || 0
}
/**
* Converts the vector to an array of the elements within the vector.
*
* @returns {Number[]}
*/
lunr.Vector.prototype.toArray = function () {
var output = new Array (this.elements.length / 2)
for (var i = 1, j = 0; i < this.elements.length; i += 2, j++) {
output[j] = this.elements[i]
}
return output
}
/**
* A JSON serializable representation of the vector.
*
* @returns {Number[]}
*/
lunr.Vector.prototype.toJSON = function () {
return this.elements
}
/* eslint-disable */
/*!
* lunr.stemmer
* Copyright (C) 2020 Oliver Nightingale
* Includes code from - http://tartarus.org/~martin/PorterStemmer/js.txt
*/
/**
* lunr.stemmer is an english language stemmer, this is a JavaScript
* implementation of the PorterStemmer taken from http://tartarus.org/~martin
*
* @static
* @implements {lunr.PipelineFunction}
* @param {lunr.Token} token - The string to stem
* @returns {lunr.Token}
* @see {@link lunr.Pipeline}
* @function
*/
lunr.stemmer = (function(){
var step2list = {
"ational" : "ate",
"tional" : "tion",
"enci" : "ence",
"anci" : "ance",
"izer" : "ize",
"bli" : "ble",
"alli" : "al",
"entli" : "ent",
"eli" : "e",
"ousli" : "ous",
"ization" : "ize",
"ation" : "ate",
"ator" : "ate",
"alism" : "al",
"iveness" : "ive",
"fulness" : "ful",
"ousness" : "ous",
"aliti" : "al",
"iviti" : "ive",
"biliti" : "ble",
"logi" : "log"
},
step3list = {
"icate" : "ic",
"ative" : "",
"alize" : "al",
"iciti" : "ic",
"ical" : "ic",
"ful" : "",
"ness" : ""
},
c = "[^aeiou]", // consonant
v = "[aeiouy]", // vowel
C = c + "[^aeiouy]*", // consonant sequence
V = v + "[aeiou]*", // vowel sequence
mgr0 = "^(" + C + ")?" + V + C, // [C]VC... is m>0
meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$", // [C]VC[V] is m=1
mgr1 = "^(" + C + ")?" + V + C + V + C, // [C]VCVC... is m>1
s_v = "^(" + C + ")?" + v; // vowel in stem
var re_mgr0 = new RegExp(mgr0);
var re_mgr1 = new RegExp(mgr1);
var re_meq1 = new RegExp(meq1);
var re_s_v = new RegExp(s_v);
var re_1a = /^(.+?)(ss|i)es$/;
var re2_1a = /^(.+?)([^s])s$/;
var re_1b = /^(.+?)eed$/;
var re2_1b = /^(.+?)(ed|ing)$/;
var re_1b_2 = /.$/;
var re2_1b_2 = /(at|bl|iz)$/;
var re3_1b_2 = new RegExp("([^aeiouylsz])\\1$");
var re4_1b_2 = new RegExp("^" + C + v + "[^aeiouwxy]$");
var re_1c = /^(.+?[^aeiou])y$/;
var re_2 = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/;
var re_3 = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/;
var re_4 = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/;
var re2_4 = /^(.+?)(s|t)(ion)$/;
var re_5 = /^(.+?)e$/;
var re_5_1 = /ll$/;
var re3_5 = new RegExp("^" + C + v + "[^aeiouwxy]$");
var porterStemmer = function porterStemmer(w) {
var stem,
suffix,
firstch,
re,
re2,
re3,
re4;
if (w.length < 3) { return w; }
firstch = w.substr(0,1);
if (firstch == "y") {
w = firstch.toUpperCase() + w.substr(1);
}
// Step 1a
re = re_1a
re2 = re2_1a;
if (re.test(w)) { w = w.replace(re,"$1$2"); }
else if (re2.test(w)) { w = w.replace(re2,"$1$2"); }
// Step 1b
re = re_1b;
re2 = re2_1b;
if (re.test(w)) {
var fp = re.exec(w);
re = re_mgr0;
if (re.test(fp[1])) {
re = re_1b_2;
w = w.replace(re,"");
}
} else if (re2.test(w)) {
var fp = re2.exec(w);
stem = fp[1];
re2 = re_s_v;
if (re2.test(stem)) {
w = stem;
re2 = re2_1b_2;
re3 = re3_1b_2;
re4 = re4_1b_2;
if (re2.test(w)) { w = w + "e"; }
else if (re3.test(w)) { re = re_1b_2; w = w.replace(re,""); }
else if (re4.test(w)) { w = w + "e"; }
}
}
// Step 1c - replace suffix y or Y by i if preceded by a non-vowel which is not the first letter of the word (so cry -> cri, by -> by, say -> say)
re = re_1c;
if (re.test(w)) {
var fp = re.exec(w);
stem = fp[1];
w = stem + "i";
}
// Step 2
re = re_2;
if (re.test(w)) {
var fp = re.exec(w);
stem = fp[1];
suffix = fp[2];
re = re_mgr0;
if (re.test(stem)) {
w = stem + step2list[suffix];
}
}
// Step 3
re = re_3;
if (re.test(w)) {
var fp = re.exec(w);
stem = fp[1];
suffix = fp[2];
re = re_mgr0;
if (re.test(stem)) {
w = stem + step3list[suffix];
}
}
// Step 4
re = re_4;
re2 = re2_4;
if (re.test(w)) {
var fp = re.exec(w);
stem = fp[1];
re = re_mgr1;
if (re.test(stem)) {
w = stem;
}
} else if (re2.test(w)) {
var fp = re2.exec(w);
stem = fp[1] + fp[2];
re2 = re_mgr1;
if (re2.test(stem)) {
w = stem;
}
}
// Step 5
re = re_5;
if (re.test(w)) {
var fp = re.exec(w);
stem = fp[1];
re = re_mgr1;
re2 = re_meq1;
re3 = re3_5;
if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) {
w = stem;
}
}
re = re_5_1;
re2 = re_mgr1;
if (re.test(w) && re2.test(w)) {
re = re_1b_2;
w = w.replace(re,"");
}
// and turn initial Y back to y
if (firstch == "y") {
w = firstch.toLowerCase() + w.substr(1);
}
return w;
};
return function (token) {
return token.update(porterStemmer);
}
})();
lunr.Pipeline.registerFunction(lunr.stemmer, 'stemmer')
/*!
* lunr.stopWordFilter
* Copyright (C) 2020 Oliver Nightingale
*/
/**
* lunr.generateStopWordFilter builds a stopWordFilter function from the provided
* list of stop words.
*
* The built in lunr.stopWordFilter is built using this generator and can be used
* to generate custom stopWordFilters for applications or non English languages.
*
* @function
* @param {Array} token The token to pass through the filter
* @returns {lunr.PipelineFunction}
* @see lunr.Pipeline
* @see lunr.stopWordFilter
*/
lunr.generateStopWordFilter = function (stopWords) {
var words = stopWords.reduce(function (memo, stopWord) {
memo[stopWord] = stopWord
return memo
}, {})
return function (token) {
if (token && words[token.toString()] !== token.toString()) return token
}
}
/**
* lunr.stopWordFilter is an English language stop word list filter, any words
* contained in the list will not be passed through the filter.
*
* This is intended to be used in the Pipeline. If the token does not pass the
* filter then undefined will be returned.
*
* @function
* @implements {lunr.PipelineFunction}
* @params {lunr.Token} token - A token to check for being a stop word.
* @returns {lunr.Token}
* @see {@link lunr.Pipeline}
*/
lunr.stopWordFilter = lunr.generateStopWordFilter([
'a',
'able',
'about',
'across',
'after',
'all',
'almost',
'also',
'am',
'among',
'an',
'and',
'any',
'are',
'as',
'at',
'be',
'because',
'been',
'but',
'by',
'can',
'cannot',
'could',
'dear',
'did',
'do',
'does',
'either',
'else',
'ever',
'every',
'for',
'from',
'get',
'got',
'had',
'has',
'have',
'he',
'her',
'hers',
'him',
'his',
'how',
'however',
'i',
'if',
'in',
'into',
'is',
'it',
'its',
'just',
'least',
'let',
'like',
'likely',
'may',
'me',
'might',
'most',
'must',
'my',
'neither',
'no',
'nor',
'not',
'of',
'off',
'often',
'on',
'only',
'or',
'other',
'our',
'own',
'rather',
'said',
'say',
'says',
'she',
'should',
'since',
'so',
'some',
'than',
'that',
'the',
'their',
'them',
'then',
'there',
'these',
'they',
'this',
'tis',
'to',
'too',
'twas',
'us',
'wants',
'was',
'we',
'were',
'what',
'when',
'where',
'which',
'while',
'who',
'whom',
'why',
'will',
'with',
'would',
'yet',
'you',
'your'
])
lunr.Pipeline.registerFunction(lunr.stopWordFilter, 'stopWordFilter')
/*!
* lunr.trimmer
* Copyright (C) 2020 Oliver Nightingale
*/
/**
* lunr.trimmer is a pipeline function for trimming non word
* characters from the beginning and end of tokens before they
* enter the index.
*
* This implementation may not work correctly for non latin
* characters and should either be removed or adapted for use
* with languages with non-latin characters.
*
* @static
* @implements {lunr.PipelineFunction}
* @param {lunr.Token} token The token to pass through the filter
* @returns {lunr.Token}
* @see lunr.Pipeline
*/
lunr.trimmer = function (token) {
return token.update(function (s) {
return s.replace(/^\W+/, '').replace(/\W+$/, '')
})
}
lunr.Pipeline.registerFunction(lunr.trimmer, 'trimmer')
/*!
* lunr.TokenSet
* Copyright (C) 2020 Oliver Nightingale
*/
/**
* A token set is used to store the unique list of all tokens
* within an index. Token sets are also used to represent an
* incoming query to the index, this query token set and index
* token set are then intersected to find which tokens to look
* up in the inverted index.
*
* A token set can hold multiple tokens, as in the case of the
* index token set, or it can hold a single token as in the
* case of a simple query token set.
*
* Additionally token sets are used to perform wildcard matching.
* Leading, contained and trailing wildcards are supported, and
* from this edit distance matching can also be provided.
*
* Token sets are implemented as a minimal finite state automata,
* where both common prefixes and suffixes are shared between tokens.
* This helps to reduce the space used for storing the token set.
*
* @constructor
*/
lunr.TokenSet = function () {
this.final = false
this.edges = {}
this.id = lunr.TokenSet._nextId
lunr.TokenSet._nextId += 1
}
/**
* Keeps track of the next, auto increment, identifier to assign
* to a new tokenSet.
*
* TokenSets require a unique identifier to be correctly minimised.
*
* @private
*/
lunr.TokenSet._nextId = 1
/**
* Creates a TokenSet instance from the given sorted array of words.
*
* @param {String[]} arr - A sorted array of strings to create the set from.
* @returns {lunr.TokenSet}
* @throws Will throw an error if the input array is not sorted.
*/
lunr.TokenSet.fromArray = function (arr) {
var builder = new lunr.TokenSet.Builder
for (var i = 0, len = arr.length; i < len; i++) {
builder.insert(arr[i])
}
builder.finish()
return builder.root
}
/**
* Creates a token set from a query clause.
*
* @private
* @param {Object} clause - A single clause from lunr.Query.
* @param {string} clause.term - The query clause term.
* @param {number} [clause.editDistance] - The optional edit distance for the term.
* @returns {lunr.TokenSet}
*/
lunr.TokenSet.fromClause = function (clause) {
if ('editDistance' in clause) {
return lunr.TokenSet.fromFuzzyString(clause.term, clause.editDistance)
} else {
return lunr.TokenSet.fromString(clause.term)
}
}
/**
* Creates a token set representing a single string with a specified
* edit distance.
*
* Insertions, deletions, substitutions and transpositions are each
* treated as an edit distance of 1.
*
* Increasing the allowed edit distance will have a dramatic impact
* on the performance of both creating and intersecting these TokenSets.
* It is advised to keep the edit distance less than 3.
*
* @param {string} str - The string to create the token set from.
* @param {number} editDistance - The allowed edit distance to match.
* @returns {lunr.Vector}
*/
lunr.TokenSet.fromFuzzyString = function (str, editDistance) {
var root = new lunr.TokenSet
var stack = [{
node: root,
editsRemaining: editDistance,
str: str
}]
while (stack.length) {
var frame = stack.pop()
// no edit
if (frame.str.length > 0) {
var char = frame.str.charAt(0),
noEditNode
if (char in frame.node.edges) {
noEditNode = frame.node.edges[char]
} else {
noEditNode = new lunr.TokenSet
frame.node.edges[char] = noEditNode
}
if (frame.str.length == 1) {
noEditNode.final = true
}
stack.push({
node: noEditNode,
editsRemaining: frame.editsRemaining,
str: frame.str.slice(1)
})
}
if (frame.editsRemaining == 0) {
continue
}
// insertion
if ("*" in frame.node.edges) {
var insertionNode = frame.node.edges["*"]
} else {
var insertionNode = new lunr.TokenSet
frame.node.edges["*"] = insertionNode
}
if (frame.str.length == 0) {
insertionNode.final = true
}
stack.push({
node: insertionNode,
editsRemaining: frame.editsRemaining - 1,
str: frame.str
})
// deletion
// can only do a deletion if we have enough edits remaining
// and if there are characters left to delete in the string
if (frame.str.length > 1) {
stack.push({
node: frame.node,
editsRemaining: frame.editsRemaining - 1,
str: frame.str.slice(1)
})
}
// deletion
// just removing the last character from the str
if (frame.str.length == 1) {
frame.node.final = true
}
// substitution
// can only do a substitution if we have enough edits remaining
// and if there are characters left to substitute
if (frame.str.length >= 1) {
if ("*" in frame.node.edges) {
var substitutionNode = frame.node.edges["*"]
} else {
var substitutionNode = new lunr.TokenSet
frame.node.edges["*"] = substitutionNode
}
if (frame.str.length == 1) {
substitutionNode.final = true
}
stack.push({
node: substitutionNode,
editsRemaining: frame.editsRemaining - 1,
str: frame.str.slice(1)
})
}
// transposition
// can only do a transposition if there are edits remaining
// and there are enough characters to transpose
if (frame.str.length > 1) {
var charA = frame.str.charAt(0),
charB = frame.str.charAt(1),
transposeNode
if (charB in frame.node.edges) {
transposeNode = frame.node.edges[charB]
} else {
transposeNode = new lunr.TokenSet
frame.node.edges[charB] = transposeNode
}
if (frame.str.length == 1) {
transposeNode.final = true
}
stack.push({
node: transposeNode,
editsRemaining: frame.editsRemaining - 1,
str: charA + frame.str.slice(2)
})
}
}
return root
}
/**
* Creates a TokenSet from a string.
*
* The string may contain one or more wildcard characters (*)
* that will allow wildcard matching when intersecting with
* another TokenSet.
*
* @param {string} str - The string to create a TokenSet from.
* @returns {lunr.TokenSet}
*/
lunr.TokenSet.fromString = function (str) {
var node = new lunr.TokenSet,
root = node
/*
* Iterates through all characters within the passed string
* appending a node for each character.
*
* When a wildcard character is found then a self
* referencing edge is introduced to continually match
* any number of any characters.
*/
for (var i = 0, len = str.length; i < len; i++) {
var char = str[i],
final = (i == len - 1)
if (char == "*") {
node.edges[char] = node
node.final = final
} else {
var next = new lunr.TokenSet
next.final = final
node.edges[char] = next
node = next
}
}
return root
}
/**
* Converts this TokenSet into an array of strings
* contained within the TokenSet.
*
* This is not intended to be used on a TokenSet that
* contains wildcards, in these cases the results are
* undefined and are likely to cause an infinite loop.
*
* @returns {string[]}
*/
lunr.TokenSet.prototype.toArray = function () {
var words = []
var stack = [{
prefix: "",
node: this
}]
while (stack.length) {
var frame = stack.pop(),
edges = Object.keys(frame.node.edges),
len = edges.length
if (frame.node.final) {
/* In Safari, at this point the prefix is sometimes corrupted, see:
* https://github.com/olivernn/lunr.js/issues/279 Calling any
* String.prototype method forces Safari to "cast" this string to what
* it's supposed to be, fixing the bug. */
frame.prefix.charAt(0)
words.push(frame.prefix)
}
for (var i = 0; i < len; i++) {
var edge = edges[i]
stack.push({
prefix: frame.prefix.concat(edge),
node: frame.node.edges[edge]
})
}
}
return words
}
/**
* Generates a string representation of a TokenSet.
*
* This is intended to allow TokenSets to be used as keys
* in objects, largely to aid the construction and minimisation
* of a TokenSet. As such it is not designed to be a human
* friendly representation of the TokenSet.
*
* @returns {string}
*/
lunr.TokenSet.prototype.toString = function () {
// NOTE: Using Object.keys here as this.edges is very likely
// to enter 'hash-mode' with many keys being added
//
// avoiding a for-in loop here as it leads to the function
// being de-optimised (at least in V8). From some simple
// benchmarks the performance is comparable, but allowing
// V8 to optimize may mean easy performance wins in the future.
if (this._str) {
return this._str
}
var str = this.final ? '1' : '0',
labels = Object.keys(this.edges).sort(),
len = labels.length
for (var i = 0; i < len; i++) {
var label = labels[i],
node = this.edges[label]
str = str + label + node.id
}
return str
}
/**
* Returns a new TokenSet that is the intersection of
* this TokenSet and the passed TokenSet.
*
* This intersection will take into account any wildcards
* contained within the TokenSet.
*
* @param {lunr.TokenSet} b - An other TokenSet to intersect with.
* @returns {lunr.TokenSet}
*/
lunr.TokenSet.prototype.intersect = function (b) {
var output = new lunr.TokenSet,
frame = undefined
var stack = [{
qNode: b,
output: output,
node: this
}]
while (stack.length) {
frame = stack.pop()
// NOTE: As with the #toString method, we are using
// Object.keys and a for loop instead of a for-in loop
// as both of these objects enter 'hash' mode, causing
// the function to be de-optimised in V8
var qEdges = Object.keys(frame.qNode.edges),
qLen = qEdges.length,
nEdges = Object.keys(frame.node.edges),
nLen = nEdges.length
for (var q = 0; q < qLen; q++) {
var qEdge = qEdges[q]
for (var n = 0; n < nLen; n++) {
var nEdge = nEdges[n]
if (nEdge == qEdge || qEdge == '*') {
var node = frame.node.edges[nEdge],
qNode = frame.qNode.edges[qEdge],
final = node.final && qNode.final,
next = undefined
if (nEdge in frame.output.edges) {
// an edge already exists for this character
// no need to create a new node, just set the finality
// bit unless this node is already final
next = frame.output.edges[nEdge]
next.final = next.final || final
} else {
// no edge exists yet, must create one
// set the finality bit and insert it
// into the output
next = new lunr.TokenSet
next.final = final
frame.output.edges[nEdge] = next
}
stack.push({
qNode: qNode,
output: next,
node: node
})
}
}
}
}
return output
}
lunr.TokenSet.Builder = function () {
this.previousWord = ""
this.root = new lunr.TokenSet
this.uncheckedNodes = []
this.minimizedNodes = {}
}
lunr.TokenSet.Builder.prototype.insert = function (word) {
var node,
commonPrefix = 0
if (word < this.previousWord) {
throw new Error ("Out of order word insertion")
}
for (var i = 0; i < word.length && i < this.previousWord.length; i++) {
if (word[i] != this.previousWord[i]) break
commonPrefix++
}
this.minimize(commonPrefix)
if (this.uncheckedNodes.length == 0) {
node = this.root
} else {
node = this.uncheckedNodes[this.uncheckedNodes.length - 1].child
}
for (var i = commonPrefix; i < word.length; i++) {
var nextNode = new lunr.TokenSet,
char = word[i]
node.edges[char] = nextNode
this.uncheckedNodes.push({
parent: node,
char: char,
child: nextNode
})
node = nextNode
}
node.final = true
this.previousWord = word
}
lunr.TokenSet.Builder.prototype.finish = function () {
this.minimize(0)
}
lunr.TokenSet.Builder.prototype.minimize = function (downTo) {
for (var i = this.uncheckedNodes.length - 1; i >= downTo; i--) {
var node = this.uncheckedNodes[i],
childKey = node.child.toString()
if (childKey in this.minimizedNodes) {
node.parent.edges[node.char] = this.minimizedNodes[childKey]
} else {
// Cache the key for this node since
// we know it can't change anymore
node.child._str = childKey
this.minimizedNodes[childKey] = node.child
}
this.uncheckedNodes.pop()
}
}
/*!
* lunr.Index
* Copyright (C) 2020 Oliver Nightingale
*/
/**
* An index contains the built index of all documents and provides a query interface
* to the index.
*
* Usually instances of lunr.Index will not be created using this constructor, instead
* lunr.Builder should be used to construct new indexes, or lunr.Index.load should be
* used to load previously built and serialized indexes.
*
* @constructor
* @param {Object} attrs - The attributes of the built search index.
* @param {Object} attrs.invertedIndex - An index of term/field to document reference.
* @param {Object<string, lunr.Vector>} attrs.fieldVectors - Field vectors
* @param {lunr.TokenSet} attrs.tokenSet - An set of all corpus tokens.
* @param {string[]} attrs.fields - The names of indexed document fields.
* @param {lunr.Pipeline} attrs.pipeline - The pipeline to use for search terms.
*/
lunr.Index = function (attrs) {
this.invertedIndex = attrs.invertedIndex
this.fieldVectors = attrs.fieldVectors
this.tokenSet = attrs.tokenSet
this.fields = attrs.fields
this.pipeline = attrs.pipeline
}
/**
* A result contains details of a document matching a search query.
* @typedef {Object} lunr.Index~Result
* @property {string} ref - The reference of the document this result represents.
* @property {number} score - A number between 0 and 1 representing how similar this document is to the query.
* @property {lunr.MatchData} matchData - Contains metadata about this match including which term(s) caused the match.
*/
/**
* Although lunr provides the ability to create queries using lunr.Query, it also provides a simple
* query language which itself is parsed into an instance of lunr.Query.
*
* For programmatically building queries it is advised to directly use lunr.Query, the query language
* is best used for human entered text rather than program generated text.
*
* At its simplest queries can just be a single term, e.g. `hello`, multiple terms are also supported
* and will be combined with OR, e.g `hello world` will match documents that contain either 'hello'
* or 'world', though those that contain both will rank higher in the results.
*
* Wildcards can be included in terms to match one or more unspecified characters, these wildcards can
* be inserted anywhere within the term, and more than one wildcard can exist in a single term. Adding
* wildcards will increase the number of documents that will be found but can also have a negative
* impact on query performance, especially with wildcards at the beginning of a term.
*
* Terms can be restricted to specific fields, e.g. `title:hello`, only documents with the term
* hello in the title field will match this query. Using a field not present in the index will lead
* to an error being thrown.
*
* Modifiers can also be added to terms, lunr supports edit distance and boost modifiers on terms. A term
* boost will make documents matching that term score higher, e.g. `foo^5`. Edit distance is also supported
* to provide fuzzy matching, e.g. 'hello~2' will match documents with hello with an edit distance of 2.
* Avoid large values for edit distance to improve query performance.
*
* Each term also supports a presence modifier. By default a term's presence in document is optional, however
* this can be changed to either required or prohibited. For a term's presence to be required in a document the
* term should be prefixed with a '+', e.g. `+foo bar` is a search for documents that must contain 'foo' and
* optionally contain 'bar'. Conversely a leading '-' sets the terms presence to prohibited, i.e. it must not
* appear in a document, e.g. `-foo bar` is a search for documents that do not contain 'foo' but may contain 'bar'.
*
* To escape special characters the backslash character '\' can be used, this allows searches to include
* characters that would normally be considered modifiers, e.g. `foo\~2` will search for a term "foo~2" instead
* of attempting to apply a boost of 2 to the search term "foo".
*
* @typedef {string} lunr.Index~QueryString
* @example <caption>Simple single term query</caption>
* hello
* @example <caption>Multiple term query</caption>
* hello world
* @example <caption>term scoped to a field</caption>
* title:hello
* @example <caption>term with a boost of 10</caption>
* hello^10
* @example <caption>term with an edit distance of 2</caption>
* hello~2
* @example <caption>terms with presence modifiers</caption>
* -foo +bar baz
*/
/**
* Performs a search against the index using lunr query syntax.
*
* Results will be returned sorted by their score, the most relevant results
* will be returned first. For details on how the score is calculated, please see
* the {@link https://lunrjs.com/guides/searching.html#scoring|guide}.
*
* For more programmatic querying use lunr.Index#query.
*
* @param {lunr.Index~QueryString} queryString - A string containing a lunr query.
* @throws {lunr.QueryParseError} If the passed query string cannot be parsed.
* @returns {lunr.Index~Result[]}
*/
lunr.Index.prototype.search = function (queryString) {
return this.query(function (query) {
var parser = new lunr.QueryParser(queryString, query)
parser.parse()
})
}
/**
* A query builder callback provides a query object to be used to express
* the query to perform on the index.
*
* @callback lunr.Index~queryBuilder
* @param {lunr.Query} query - The query object to build up.
* @this lunr.Query
*/
/**
* Performs a query against the index using the yielded lunr.Query object.
*
* If performing programmatic queries against the index, this method is preferred
* over lunr.Index#search so as to avoid the additional query parsing overhead.
*
* A query object is yielded to the supplied function which should be used to
* express the query to be run against the index.
*
* Note that although this function takes a callback parameter it is _not_ an
* asynchronous operation, the callback is just yielded a query object to be
* customized.
*
* @param {lunr.Index~queryBuilder} fn - A function that is used to build the query.
* @returns {lunr.Index~Result[]}
*/
lunr.Index.prototype.query = function (fn) {
// for each query clause
// * process terms
// * expand terms from token set
// * find matching documents and metadata
// * get document vectors
// * score documents
var query = new lunr.Query(this.fields),
matchingFields = Object.create(null),
queryVectors = Object.create(null),
termFieldCache = Object.create(null),
requiredMatches = Object.create(null),
prohibitedMatches = Object.create(null)
/*
* To support field level boosts a query vector is created per
* field. An empty vector is eagerly created to support negated
* queries.
*/
for (var i = 0; i < this.fields.length; i++) {
queryVectors[this.fields[i]] = new lunr.Vector
}
fn.call(query, query)
for (var i = 0; i < query.clauses.length; i++) {
/*
* Unless the pipeline has been disabled for this term, which is
* the case for terms with wildcards, we need to pass the clause
* term through the search pipeline. A pipeline returns an array
* of processed terms. Pipeline functions may expand the passed
* term, which means we may end up performing multiple index lookups
* for a single query term.
*/
var clause = query.clauses[i],
terms = null,
clauseMatches = lunr.Set.empty
if (clause.usePipeline) {
terms = this.pipeline.runString(clause.term, {
fields: clause.fields
})
} else {
terms = [clause.term]
}
for (var m = 0; m < terms.length; m++) {
var term = terms[m]
/*
* Each term returned from the pipeline needs to use the same query
* clause object, e.g. the same boost and or edit distance. The
* simplest way to do this is to re-use the clause object but mutate
* its term property.
*/
clause.term = term
/*
* From the term in the clause we create a token set which will then
* be used to intersect the indexes token set to get a list of terms
* to lookup in the inverted index
*/
var termTokenSet = lunr.TokenSet.fromClause(clause),
expandedTerms = this.tokenSet.intersect(termTokenSet).toArray()
/*
* If a term marked as required does not exist in the tokenSet it is
* impossible for the search to return any matches. We set all the field
* scoped required matches set to empty and stop examining any further
* clauses.
*/
if (expandedTerms.length === 0 && clause.presence === lunr.Query.presence.REQUIRED) {
for (var k = 0; k < clause.fields.length; k++) {
var field = clause.fields[k]
requiredMatches[field] = lunr.Set.empty
}
break
}
for (var j = 0; j < expandedTerms.length; j++) {
/*
* For each term get the posting and termIndex, this is required for
* building the query vector.
*/
var expandedTerm = expandedTerms[j],
posting = this.invertedIndex[expandedTerm],
termIndex = posting._index
for (var k = 0; k < clause.fields.length; k++) {
/*
* For each field that this query term is scoped by (by default
* all fields are in scope) we need to get all the document refs
* that have this term in that field.
*
* The posting is the entry in the invertedIndex for the matching
* term from above.
*/
var field = clause.fields[k],
fieldPosting = posting[field],
matchingDocumentRefs = Object.keys(fieldPosting),
termField = expandedTerm + "/" + field,
matchingDocumentsSet = new lunr.Set(matchingDocumentRefs)
/*
* if the presen
gitextract_xvibk5q7/
├── .eslintrc.json
├── .gitignore
├── .travis.yml
├── CHANGELOG.md
├── CNAME
├── CONTRIBUTING.md
├── LICENSE
├── Makefile
├── README.md
├── VERSION
├── build/
│ ├── bower.json.template
│ ├── component.json.template
│ ├── jsdoc.conf.json
│ ├── package.json.template
│ ├── release.sh
│ ├── wrapper_end
│ └── wrapper_start
├── index.html
├── lib/
│ ├── builder.js
│ ├── field_ref.js
│ ├── idf.js
│ ├── index.js
│ ├── lunr.js
│ ├── match_data.js
│ ├── pipeline.js
│ ├── query.js
│ ├── query_lexer.js
│ ├── query_parse_error.js
│ ├── query_parser.js
│ ├── set.js
│ ├── stemmer.js
│ ├── stop_word_filter.js
│ ├── token.js
│ ├── token_set.js
│ ├── token_set_builder.js
│ ├── tokenizer.js
│ ├── trimmer.js
│ ├── utils.js
│ └── vector.js
├── lunr.js
├── notes
├── package.json
├── perf/
│ ├── builder_perf.js
│ ├── perf_helper.js
│ ├── pipeline_perf.js
│ ├── query_parser_perf.js
│ ├── search_perf.js
│ ├── stemmer_perf.js
│ ├── token_set_perf.js
│ ├── tokenizer_perf.js
│ └── vector_perf.js
├── styles.css
└── test/
├── builder_test.js
├── env/
│ ├── chai.js
│ ├── index.mustache
│ ├── mocha.css
│ └── mocha.js
├── field_ref_test.js
├── fixtures/
│ └── stemming_vocab.json
├── index.html
├── match_data_test.js
├── pipeline_test.js
├── query_lexer_test.js
├── query_parser_test.js
├── query_test.js
├── search_test.js
├── serialization_test.js
├── set_test.js
├── stemmer_test.js
├── stop_word_filter_test.js
├── test_helper.js
├── token_set_test.js
├── token_test.js
├── tokenizer_test.js
├── trimmer_test.js
├── utils_test.js
└── vector_test.js
SYMBOL INDEX (377 symbols across 2 files)
FILE: test/env/chai.js
function s (line 1) | function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&re...
function Assertion (line 131) | function Assertion (obj, msg, stack) {
function an (line 452) | function an (type, msg) {
function includeChainingBehavior (line 490) | function includeChainingBehavior () {
function include (line 494) | function include (val, msg) {
function checkArguments (line 734) | function checkArguments () {
function assertEqual (line 770) | function assertEqual (val, msg) {
function assertEql (line 807) | function assertEql(obj, msg) {
function assertAbove (line 846) | function assertAbove (n, msg) {
function assertLeast (line 895) | function assertLeast (n, msg) {
function assertBelow (line 944) | function assertBelow (n, msg) {
function assertMost (line 993) | function assertMost (n, msg) {
function assertInstanceOf (line 1081) | function assertInstanceOf (constructor, msg) {
function assertOwnProperty (line 1222) | function assertOwnProperty (name, msg) {
function assertOwnPropertyDescriptor (line 1255) | function assertOwnPropertyDescriptor (name, descriptor, msg) {
function assertLengthChain (line 1324) | function assertLengthChain () {
function assertLength (line 1328) | function assertLength (n, msg) {
function assertMatch (line 1360) | function assertMatch(re, msg) {
function assertKeys (line 1439) | function assertKeys (keys) {
function assertThrows (line 1559) | function assertThrows (constructor, errMsg, msg) {
function respondTo (line 1703) | function respondTo (method, msg) {
function satisfy (line 1757) | function satisfy (matcher, msg) {
function closeTo (line 1789) | function closeTo(expected, delta, msg) {
function isSubsetOf (line 1808) | function isSubsetOf(subset, superset, cmp) {
function oneOf (line 1891) | function oneOf (list, msg) {
function assertChanges (line 1929) | function assertChanges (object, prop, msg) {
function assertIncreases (line 1967) | function assertIncreases (object, prop, msg) {
function assertDecreases (line 2005) | function assertDecreases (object, prop, msg) {
function loadShould (line 3844) | function loadShould () {
function parsePath (line 4518) | function parsePath (path) {
function _getPathValue (line 4545) | function _getPathValue (parsed, obj, index) {
function addProperty (line 4634) | function addProperty(property) {
function inspect (line 4870) | function inspect(obj, showHidden, depth, colors) {
function formatValue (line 4891) | function formatValue(ctx, value, recurseTimes) {
function formatPrimitive (line 5031) | function formatPrimitive(ctx, value) {
function formatError (line 5058) | function formatError(value) {
function formatArray (line 5063) | function formatArray(ctx, value, recurseTimes, visibleKeys, keys) {
function formatProperty (line 5083) | function formatProperty(ctx, value, recurseTimes, visibleKeys, key, arra...
function reduceToSingleString (line 5143) | function reduceToSingleString(output, base, braces) {
function isArray (line 5163) | function isArray(ar) {
function isRegExp (line 5168) | function isRegExp(re) {
function isDate (line 5172) | function isDate(d) {
function isError (line 5176) | function isError(e) {
function objectToString (line 5180) | function objectToString(o) {
function exclude (line 5496) | function exclude () {
function AssertionError (line 5535) | function AssertionError (message, _props, ssf) {
function deepEqual (line 5637) | function deepEqual(a, b, m) {
function sameValue (line 5667) | function sameValue(a, b) {
function typeEqual (line 5683) | function typeEqual(a, b) {
function dateEqual (line 5696) | function dateEqual(a, b) {
function regexpEqual (line 5710) | function regexpEqual(a, b) {
function argumentsEqual (line 5726) | function argumentsEqual(a, b, m) {
function enumerable (line 5740) | function enumerable(a) {
function iterableEqual (line 5755) | function iterableEqual(a, b) {
function bufferEqual (line 5780) | function bufferEqual(a, b) {
function isValue (line 5793) | function isValue(a) {
function objectEqual (line 5808) | function objectEqual(a, b, m) {
function getType (line 5896) | function getType (obj) {
function Library (line 5918) | function Library () {
function getType (line 6031) | function getType(obj) {
function Library (line 6055) | function Library() {
FILE: test/env/mocha.js
function s (line 1) | function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&re...
function timeslice (line 72) | function timeslice() {
function isArray (line 196) | function isArray(val) {
function EventEmitter (line 205) | function EventEmitter() {}
function on (line 244) | function on() {
function Progress (line 382) | function Progress() {
function Context (line 524) | function Context() {}
function Hook (line 639) | function Hook(title, fn) {
function visit (line 969) | function visit(obj, file) {
function image (line 1264) | function image(name) {
function Mocha (line 1287) | function Mocha(options) {
function done (line 1723) | function done(failures) {
function parse (line 1774) | function parse(str) {
function shortFormat (line 1816) | function shortFormat(ms) {
function longFormat (line 1839) | function longFormat(ms) {
function plural (line 1855) | function plural(ms, n, name) {
function Pending (line 1878) | function Pending(message) {
function Base (line 2122) | function Base(runner) {
function pad (line 2228) | function pad(str, len) {
function inlineDiff (line 2241) | function inlineDiff(err, escape) {
function unifiedDiff (line 2275) | function unifiedDiff(err, escape) {
function errorDiff (line 2316) | function errorDiff(err, type, escape) {
function escapeInvisibles (line 2337) | function escapeInvisibles(line) {
function colorLines (line 2351) | function colorLines(name, str) {
function sameType (line 2370) | function sameType(a, b) {
function Doc (line 2395) | function Doc(runner) {
function Dot (line 2461) | function Dot(runner) {
function HTML (line 2558) | function HTML(runner) {
function makeUrl (line 2728) | function makeUrl(s) {
function error (line 2780) | function error(msg) {
function fragment (line 2789) | function fragment(html) {
function hideSuitesWithout (line 2811) | function hideSuitesWithout(classname) {
function unhide (line 2824) | function unhide() {
function text (line 2837) | function text(el, contents) {
function on (line 2848) | function on(el, event, fn) {
function List (line 2897) | function List(runner) {
function clean (line 2931) | function clean(test) {
function JSONReporter (line 2961) | function JSONReporter(runner) {
function clean (line 3009) | function clean(test) {
function errorJSON (line 3026) | function errorJSON(err) {
function Landing (line 3076) | function Landing(runner) {
function List (line 3154) | function List(runner) {
function Markdown (line 3223) | function Markdown(runner) {
function Min (line 3318) | function Min(runner) {
function NyanCat (line 3359) | function NyanCat(runner) {
function draw (line 3429) | function draw(type, n) {
function write (line 3597) | function write(string) {
function Progress (line 3632) | function Progress(runner, options) {
function Spec (line 3716) | function Spec(runner) {
function TAP (line 3795) | function TAP(runner) {
function title (line 3842) | function title(test) {
function XUnit (line 3884) | function XUnit(runner, options) {
function tag (line 3997) | function tag(name, attrs, close, content) {
function Runnable (line 4063) | function Runnable(title, fn) {
function multiple (line 4284) | function multiple(err) {
function done (line 4293) | function done(err) {
function callFn (line 4350) | function callFn(fn) {
function callFnAsync (line 4373) | function callFnAsync(fn) {
function Runner (line 4460) | function Runner(suite, delay) {
function next (line 4678) | function next(i) {
function next (line 4740) | function next(suite) {
function hookErr (line 4843) | function hookErr(_, errSuite, after) {
function next (line 4869) | function next(err, errSuite) {
function next (line 4992) | function next(errSuite) {
function done (line 5026) | function done(errSuite) {
function cleanSuiteReferences (line 5136) | function cleanSuiteReferences(suite) {
function uncaught (line 5185) | function uncaught(err) {
function start (line 5189) | function start() {
function filterOnly (line 5245) | function filterOnly(suite) {
function hasOnly (line 5276) | function hasOnly(suite) {
function filterLeaks (line 5288) | function filterLeaks(ok, globals) {
function extraGlobals (line 5329) | function extraGlobals() {
function Suite (line 5390) | function Suite(title, parentContext) {
function Test (line 5771) | function Test(title, fn) {
function pad (line 5810) | function pad(number) {
function toISOString (line 5823) | function toISOString(date) {
function ignored (line 6070) | function ignored(path) {
function highlight (line 6174) | function highlight(js) {
function emptyRepresentation (line 6213) | function emptyRepresentation(value, type) {
function jsonStringify (line 6307) | function jsonStringify(object, spaces, depth) {
function withStack (line 6415) | function withStack(value, fn) {
function isMochaInternal (line 6570) | function isMochaInternal(line) {
function isNodeInternal (line 6577) | function isNodeInternal(line) {
function init (line 6632) | function init () {
function toByteArray (line 6645) | function toByteArray (b64) {
function tripletToBase64 (line 6687) | function tripletToBase64 (num) {
function encodeChunk (line 6691) | function encodeChunk (uint8, start, end) {
function fromByteArray (line 6701) | function fromByteArray (uint8) {
function BrowserStdout (line 6745) | function BrowserStdout(opts) {
function typedArraySupport (line 6931) | function typedArraySupport () {
function kMaxLength (line 6943) | function kMaxLength () {
function createBuffer (line 6949) | function createBuffer (that, length) {
function Buffer (line 6978) | function Buffer (arg, encodingOrOffset, length) {
function from (line 7003) | function from (that, value, encodingOrOffset, length) {
function assertSize (line 7044) | function assertSize (size) {
function alloc (line 7050) | function alloc (that, size, fill, encoding) {
function allocUnsafe (line 7074) | function allocUnsafe (that, size) {
function fromString (line 7098) | function fromString (that, string, encoding) {
function fromArrayLike (line 7114) | function fromArrayLike (that, array) {
function fromArrayBuffer (line 7123) | function fromArrayBuffer (that, array, byteOffset, length) {
function fromObject (line 7151) | function fromObject (that, obj) {
function checked (line 7181) | function checked (length) {
function SlowBuffer (line 7191) | function SlowBuffer (length) {
function byteLength (line 7274) | function byteLength (string, encoding) {
function slowToString (line 7320) | function slowToString (encoding, start, end) {
function swap (line 7393) | function swap (b, n, m) {
function arrayIndexOf (line 7504) | function arrayIndexOf (arr, val, byteOffset, encoding) {
function hexWrite (line 7587) | function hexWrite (buf, string, offset, length) {
function utf8Write (line 7614) | function utf8Write (buf, string, offset, length) {
function asciiWrite (line 7618) | function asciiWrite (buf, string, offset, length) {
function binaryWrite (line 7622) | function binaryWrite (buf, string, offset, length) {
function base64Write (line 7626) | function base64Write (buf, string, offset, length) {
function ucs2Write (line 7630) | function ucs2Write (buf, string, offset, length) {
function base64Slice (line 7712) | function base64Slice (buf, start, end) {
function utf8Slice (line 7720) | function utf8Slice (buf, start, end) {
function decodeCodePointsArray (line 7798) | function decodeCodePointsArray (codePoints) {
function asciiSlice (line 7816) | function asciiSlice (buf, start, end) {
function binarySlice (line 7826) | function binarySlice (buf, start, end) {
function hexSlice (line 7836) | function hexSlice (buf, start, end) {
function utf16leSlice (line 7849) | function utf16leSlice (buf, start, end) {
function checkOffset (line 7897) | function checkOffset (offset, ext, length) {
function checkInt (line 8058) | function checkInt (buf, value, offset, ext, max, min) {
function objectWriteUInt16 (line 8111) | function objectWriteUInt16 (buf, value, offset, littleEndian) {
function objectWriteUInt32 (line 8145) | function objectWriteUInt32 (buf, value, offset, littleEndian) {
function checkIEEE754 (line 8295) | function checkIEEE754 (buf, value, offset, ext, max, min) {
function writeFloat (line 8300) | function writeFloat (buf, value, offset, littleEndian, noAssert) {
function writeDouble (line 8316) | function writeDouble (buf, value, offset, littleEndian, noAssert) {
function base64clean (line 8449) | function base64clean (str) {
function stringtrim (line 8461) | function stringtrim (str) {
function toHex (line 8466) | function toHex (n) {
function utf8ToBytes (line 8471) | function utf8ToBytes (string, units) {
function asciiToBytes (line 8551) | function asciiToBytes (str) {
function utf16leToBytes (line 8560) | function utf16leToBytes (str, units) {
function base64ToBytes (line 8576) | function base64ToBytes (str) {
function blitBuffer (line 8580) | function blitBuffer (src, dst, offset, length) {
function isnan (line 8588) | function isnan (val) {
function isArray (line 8619) | function isArray(arg) {
function isBoolean (line 8627) | function isBoolean(arg) {
function isNull (line 8632) | function isNull(arg) {
function isNullOrUndefined (line 8637) | function isNullOrUndefined(arg) {
function isNumber (line 8642) | function isNumber(arg) {
function isString (line 8647) | function isString(arg) {
function isSymbol (line 8652) | function isSymbol(arg) {
function isUndefined (line 8657) | function isUndefined(arg) {
function isRegExp (line 8662) | function isRegExp(re) {
function isObject (line 8667) | function isObject(arg) {
function isDate (line 8672) | function isDate(d) {
function isError (line 8677) | function isError(e) {
function isFunction (line 8682) | function isFunction(arg) {
function isPrimitive (line 8687) | function isPrimitive(arg) {
function objectToString (line 8699) | function objectToString(o) {
function map (line 8725) | function map(arr, mapper, that) {
function clonePath (line 8737) | function clonePath(path) {
function removeEmpty (line 8740) | function removeEmpty(array) {
function escapeHTML (line 8749) | function escapeHTML(s) {
function canonicalize (line 8761) | function canonicalize(obj, stack, replacementStack) {
function buildValues (line 8806) | function buildValues(components, newString, oldString, useLongestToken) {
function Diff (line 8850) | function Diff(ignoreWhitespace) {
function done (line 8857) | function done(value) {
function execEditLength (line 8893) | function execEditLength() {
function contextLines (line 9120) | function contextLines(lines) {
function eofNL (line 9125) | function eofNL(curRange, i, current) {
function EventEmitter (line 9360) | function EventEmitter() {
function g (line 9498) | function g() {
function isFunction (line 9626) | function isFunction(arg) {
function isNumber (line 9630) | function isNumber(arg) {
function isObject (line 9634) | function isObject(arg) {
function isUndefined (line 9638) | function isUndefined(arg) {
function which (line 9658) | function which(name) {
function growl (line 9807) | function growl(msg, options, fn) {
function runInContext (line 10103) | function runInContext(context, exports) {
function baseAssign (line 11000) | function baseAssign(object, source) {
function baseCopy (line 11027) | function baseCopy(source, props, object) {
function object (line 11061) | function object() {}
function isObject (line 11092) | function isObject(value) {
function isObjectLike (line 11124) | function isObjectLike(value) {
function getNative (line 11157) | function getNative(object, key) {
function isFunction (line 11178) | function isFunction(value) {
function isObject (line 11205) | function isObject(value) {
function isNative (line 11228) | function isNative(value) {
function baseProperty (line 11266) | function baseProperty(key) {
function isArrayLike (line 11291) | function isArrayLike(value) {
function isIndex (line 11303) | function isIndex(value, length) {
function isIterateeCall (line 11318) | function isIterateeCall(value, index, object) {
function isLength (line 11341) | function isLength(value) {
function isObject (line 11365) | function isObject(value) {
function create (line 11421) | function create(prototype, properties, guard) {
function baseProperty (line 11471) | function baseProperty(key) {
function isArguments (line 11505) | function isArguments(value) {
function isArrayLike (line 11535) | function isArrayLike(value) {
function isArrayLikeObject (line 11562) | function isArrayLikeObject(value) {
function isFunction (line 11582) | function isFunction(value) {
function isLength (line 11614) | function isLength(value) {
function isObject (line 11642) | function isObject(value) {
function isObjectLike (line 11670) | function isObjectLike(value) {
function isObjectLike (line 11700) | function isObjectLike(value) {
function getNative (line 11742) | function getNative(object, key) {
function isLength (line 11756) | function isLength(value) {
function isFunction (line 11796) | function isFunction(value) {
function isObject (line 11823) | function isObject(value) {
function isNative (line 11846) | function isNative(value) {
function baseProperty (line 11896) | function baseProperty(key) {
function isArrayLike (line 11921) | function isArrayLike(value) {
function isIndex (line 11933) | function isIndex(value, length) {
function isLength (line 11948) | function isLength(value) {
function shimKeys (line 11960) | function shimKeys(object) {
function isObject (line 12000) | function isObject(value) {
function keysIn (line 12065) | function keysIn(object) {
function mkdirP (line 12104) | function mkdirP (p, opts, f, made) {
function nextTick (line 12257) | function nextTick(fn, arg1, arg2, arg3) {
function cleanUpNextTick (line 12326) | function cleanUpNextTick() {
function drainQueue (line 12341) | function drainQueue() {
function Item (line 12379) | function Item(fun, array) {
function noop (line 12393) | function noop() {}
function Duplex (line 12456) | function Duplex(options) {
function onend (line 12473) | function onend() {
function onEndNT (line 12483) | function onEndNT(self) {
function forEach (line 12487) | function forEach(xs, f) {
function PassThrough (line 12510) | function PassThrough(options) {
function prependListener (line 12580) | function prependListener(emitter, event, fn) {
function ReadableState (line 12592) | function ReadableState(options, stream) {
function Readable (line 12659) | function Readable(options) {
function readableAddChunk (line 12702) | function readableAddChunk(stream, state, chunk, encoding, addToFront) {
function needMoreData (line 12757) | function needMoreData(state) {
function computeNewHighWaterMark (line 12771) | function computeNewHighWaterMark(n) {
function howMuchToRead (line 12787) | function howMuchToRead(n, state) {
function chunkInvalid (line 12919) | function chunkInvalid(state, chunk) {
function onEofChunk (line 12927) | function onEofChunk(stream, state) {
function emitReadable (line 12945) | function emitReadable(stream) {
function emitReadable_ (line 12955) | function emitReadable_(stream) {
function maybeReadMore (line 12967) | function maybeReadMore(stream, state) {
function maybeReadMore_ (line 12974) | function maybeReadMore_(stream, state) {
function onunpipe (line 13018) | function onunpipe(readable) {
function onend (line 13025) | function onend() {
function cleanup (line 13038) | function cleanup() {
function ondata (line 13061) | function ondata(chunk) {
function onerror (line 13079) | function onerror(er) {
function onclose (line 13090) | function onclose() {
function onfinish (line 13095) | function onfinish() {
function unpipe (line 13102) | function unpipe() {
function pipeOnDrain (line 13119) | function pipeOnDrain(src) {
function nReadingNextTick (line 13209) | function nReadingNextTick(self) {
function resume (line 13226) | function resume(stream, state) {
function resume_ (line 13233) | function resume_(stream, state) {
function flow (line 13255) | function flow(stream) {
function fromList (line 13333) | function fromList(n, state) {
function endReadable (line 13380) | function endReadable(stream) {
function endReadableNT (line 13393) | function endReadableNT(state, stream) {
function forEach (line 13402) | function forEach(xs, f) {
function indexOf (line 13408) | function indexOf(xs, x) {
function TransformState (line 13471) | function TransformState(stream) {
function afterTransform (line 13483) | function afterTransform(stream, er, data) {
function Transform (line 13505) | function Transform(options) {
function done (line 13582) | function done(stream, er) {
function nop (line 13645) | function nop() {}
function WriteReq (line 13647) | function WriteReq(chunk, encoding, cb) {
function WritableState (line 13655) | function WritableState(options, stream) {
function Writable (line 13771) | function Writable(options) {
function writeAfterEnd (line 13797) | function writeAfterEnd(stream, cb) {
function validChunk (line 13809) | function validChunk(stream, state, chunk, cb) {
function decodeChunk (line 13873) | function decodeChunk(state, chunk, encoding) {
function writeOrBuffer (line 13883) | function writeOrBuffer(stream, state, chunk, encoding, cb) {
function doWrite (line 13911) | function doWrite(stream, state, writev, len, chunk, encoding, cb) {
function onwriteError (line 13920) | function onwriteError(stream, state, sync, er, cb) {
function onwriteStateUpdate (line 13928) | function onwriteStateUpdate(state) {
function onwrite (line 13935) | function onwrite(stream, er) {
function afterWrite (line 13960) | function afterWrite(stream, state, finished, cb) {
function onwriteDrain (line 13970) | function onwriteDrain(stream, state) {
function clearBuffer (line 13978) | function clearBuffer(stream, state) {
function needFinish (line 14065) | function needFinish(state) {
function prefinish (line 14069) | function prefinish(stream, state) {
function finishMaybe (line 14076) | function finishMaybe(stream, state) {
function endWritable (line 14090) | function endWritable(stream, state, cb) {
function CorkedRequest (line 14102) | function CorkedRequest(state) {
function Stream (line 14196) | function Stream() {
function ondata (line 14203) | function ondata(chunk) {
function ondrain (line 14213) | function ondrain() {
function onend (line 14229) | function onend() {
function onclose (line 14237) | function onclose() {
function onerror (line 14245) | function onerror(er) {
function cleanup (line 14256) | function cleanup() {
function assertEncoding (line 14316) | function assertEncoding(encoding) {
function passThroughWrite (line 14492) | function passThroughWrite(buffer) {
function utf16DetectIncompleteChar (line 14496) | function utf16DetectIncompleteChar(buffer) {
function base64DetectIncompleteChar (line 14501) | function base64DetectIncompleteChar(buffer) {
function deprecate (line 14533) | function deprecate (fn, msg) {
function config (line 14564) | function config (name) {
function deprecated (line 14663) | function deprecated() {
function inspect (line 14710) | function inspect(obj, opts) {
function stylizeWithColor (line 14768) | function stylizeWithColor(str, styleType) {
function stylizeNoColor (line 14780) | function stylizeNoColor(str, styleType) {
function arrayToHash (line 14785) | function arrayToHash(array) {
function formatValue (line 14796) | function formatValue(ctx, value, recurseTimes) {
function formatPrimitive (line 14909) | function formatPrimitive(ctx, value) {
function formatError (line 14928) | function formatError(value) {
function formatArray (line 14933) | function formatArray(ctx, value, recurseTimes, visibleKeys, keys) {
function formatProperty (line 14953) | function formatProperty(ctx, value, recurseTimes, visibleKeys, key, arra...
function reduceToSingleString (line 15012) | function reduceToSingleString(output, base, braces) {
function isArray (line 15035) | function isArray(ar) {
function isBoolean (line 15040) | function isBoolean(arg) {
function isNull (line 15045) | function isNull(arg) {
function isNullOrUndefined (line 15050) | function isNullOrUndefined(arg) {
function isNumber (line 15055) | function isNumber(arg) {
function isString (line 15060) | function isString(arg) {
function isSymbol (line 15065) | function isSymbol(arg) {
function isUndefined (line 15070) | function isUndefined(arg) {
function isRegExp (line 15075) | function isRegExp(re) {
function isObject (line 15080) | function isObject(arg) {
function isDate (line 15085) | function isDate(d) {
function isError (line 15090) | function isError(e) {
function isFunction (line 15096) | function isFunction(arg) {
function isPrimitive (line 15101) | function isPrimitive(arg) {
function objectToString (line 15113) | function objectToString(o) {
function pad (line 15118) | function pad(n) {
function timestamp (line 15127) | function timestamp() {
function hasOwnProperty (line 15169) | function hasOwnProperty(obj, prop) {
Condensed preview — 77 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (988K chars).
[
{
"path": ".eslintrc.json",
"chars": 2097,
"preview": "{\n \"env\": {\n \"browser\": true,\n \"node\": true\n },\n\n \"globals\": {\n \"lunr\": true\n },\n\n \"extends\": \"eslint:reco"
},
{
"path": ".gitignore",
"chars": 44,
"preview": "/node_modules\ndocs/\ntest/env/file_list.json\n"
},
{
"path": ".travis.yml",
"chars": 168,
"preview": "language: node_js\nnode_js:\n - \"node\"\n - \"6\"\n - \"5\"\n - \"4\"\nscript: \"make\"\naddons:\n artifacts:\n branch: master\n "
},
{
"path": "CHANGELOG.md",
"chars": 15893,
"preview": "# Changelog\n\n## 2.3.9\n\n* Fix bug [#469](https://github.com/olivernn/lunr.js/issues/469) where a union with a complete se"
},
{
"path": "CNAME",
"chars": 11,
"preview": "lunrjs.com\n"
},
{
"path": "CONTRIBUTING.md",
"chars": 858,
"preview": "Contributions are very welcome. To make the process as easy as possible please follow these steps:\n\n* Open an issue deta"
},
{
"path": "LICENSE",
"chars": 1065,
"preview": "Copyright (C) 2013 by Oliver Nightingale\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\no"
},
{
"path": "Makefile",
"chars": 2085,
"preview": "\nSRC = lib/lunr.js \\\n\tlib/utils.js \\\n\tlib/field_ref.js \\\n\tlib/set.js \\\n\tlib/idf.js \\\n\tlib/token.js \\\n\tlib/tokenizer.js \\"
},
{
"path": "README.md",
"chars": 2635,
"preview": "# Lunr.js\n\n[](https://gi"
},
{
"path": "VERSION",
"chars": 5,
"preview": "2.3.9"
},
{
"path": "build/bower.json.template",
"chars": 142,
"preview": "{\n \"name\": \"lunr.js\",\n \"version\": \"@VERSION\",\n \"main\": \"lunr.js\",\n \"ignore\": [\n \"tests/\",\n \"perf/\",\n \"build"
},
{
"path": "build/component.json.template",
"chars": 204,
"preview": "{\n \"name\": \"lunr\",\n \"repo\": \"olivernn/lunr.js\",\n \"version\": \"@VERSION\",\n \"description\": \"Simple full-text search in "
},
{
"path": "build/jsdoc.conf.json",
"chars": 165,
"preview": "{\n \"plugins\": [\"plugins/markdown\"],\n \"destination\": \"docs\",\n \"readme\": \"README.md\",\n \"templates\": {\n \"default\": {"
},
{
"path": "build/package.json.template",
"chars": 726,
"preview": "{\n \"name\": \"lunr\",\n \"description\": \"Simple full-text search in your browser.\",\n \"version\": \"@VERSION\",\n \"author\": \"O"
},
{
"path": "build/release.sh",
"chars": 1758,
"preview": "#!/usr/bin/env bash\n\nfile_has_changed () {\n if [ ! -f $1 ]; then\n return 1\n fi\n\n for f in `git ls-files --modified"
},
{
"path": "build/wrapper_end",
"chars": 873,
"preview": "\n /**\n * export the module via AMD, CommonJS or as a browser global\n * Export code from https://github.com/umdjs/um"
},
{
"path": "build/wrapper_start",
"chars": 175,
"preview": "/**\n * lunr - http://lunrjs.com - A bit like Solr, but much smaller and not as bright - @VERSION\n * Copyright (C) @YEAR "
},
{
"path": "index.html",
"chars": 11825,
"preview": "<!DOCTYPE html>\n<html>\n<head>\n <title>lunr.js - A bit like Solr, but much smaller and not as bright</title>\n <link rel"
},
{
"path": "lib/builder.js",
"chars": 13213,
"preview": "/*!\n * lunr.Builder\n * Copyright (C) @YEAR Oliver Nightingale\n */\n\n/**\n * lunr.Builder performs indexing on a set of doc"
},
{
"path": "lib/field_ref.js",
"chars": 638,
"preview": "lunr.FieldRef = function (docRef, fieldName, stringValue) {\n this.docRef = docRef\n this.fieldName = fieldName\n this._"
},
{
"path": "lib/idf.js",
"chars": 653,
"preview": "/**\n * A function to calculate the inverse document frequency for\n * a posting. This is shared between the builder and t"
},
{
"path": "lib/index.js",
"chars": 18172,
"preview": "/*!\n * lunr.Index\n * Copyright (C) @YEAR Oliver Nightingale\n */\n\n/**\n * An index contains the built index of all documen"
},
{
"path": "lib/lunr.js",
"chars": 1129,
"preview": "/**\n * A convenience function for configuring and constructing\n * a new lunr Index.\n *\n * A lunr.Builder instance is cre"
},
{
"path": "lib/match_data.js",
"chars": 3536,
"preview": "/**\n * Contains and collects metadata about a matching document.\n * A single instance of lunr.MatchData is returned as p"
},
{
"path": "lib/pipeline.js",
"chars": 8185,
"preview": "/*!\n * lunr.Pipeline\n * Copyright (C) @YEAR Oliver Nightingale\n */\n\n/**\n * lunr.Pipelines maintain an ordered list of fu"
},
{
"path": "lib/query.js",
"chars": 6388,
"preview": "/**\n * A lunr.Query provides a programmatic way of defining queries to be performed\n * against a {@link lunr.Index}.\n *\n"
},
{
"path": "lib/query_lexer.js",
"chars": 4791,
"preview": "lunr.QueryLexer = function (str) {\n this.lexemes = []\n this.str = str\n this.length = str.length\n this.pos = 0\n this"
},
{
"path": "lib/query_parse_error.js",
"chars": 197,
"preview": "lunr.QueryParseError = function (message, start, end) {\n this.name = \"QueryParseError\"\n this.message = message\n this."
},
{
"path": "lib/query_parser.js",
"chars": 6949,
"preview": "lunr.QueryParser = function (str, query) {\n this.lexer = new lunr.QueryLexer (str)\n this.query = query\n this.currentC"
},
{
"path": "lib/set.js",
"chars": 2503,
"preview": "/*!\n * lunr.Set\n * Copyright (C) @YEAR Oliver Nightingale\n */\n\n/**\n * A lunr set.\n *\n * @constructor\n */\nlunr.Set = func"
},
{
"path": "lib/stemmer.js",
"chars": 5330,
"preview": "/* eslint-disable */\n/*!\n * lunr.stemmer\n * Copyright (C) @YEAR Oliver Nightingale\n * Includes code from - http://tartar"
},
{
"path": "lib/stop_word_filter.js",
"chars": 2525,
"preview": "/*!\n * lunr.stopWordFilter\n * Copyright (C) @YEAR Oliver Nightingale\n */\n\n/**\n * lunr.generateStopWordFilter builds a st"
},
{
"path": "lib/token.js",
"chars": 1568,
"preview": "/**\n * A token wraps a string representation of a token\n * as it is passed through the text processing pipeline.\n *\n * @"
},
{
"path": "lib/token_set.js",
"chars": 10955,
"preview": "/*!\n * lunr.TokenSet\n * Copyright (C) @YEAR Oliver Nightingale\n */\n\n/**\n * A token set is used to store the unique list "
},
{
"path": "lib/token_set_builder.js",
"chars": 1604,
"preview": "lunr.TokenSet.Builder = function () {\n this.previousWord = \"\"\n this.root = new lunr.TokenSet\n this.uncheckedNodes = ["
},
{
"path": "lib/tokenizer.js",
"chars": 2265,
"preview": "/*!\n * lunr.tokenizer\n * Copyright (C) @YEAR Oliver Nightingale\n */\n\n/**\n * A function for splitting a string into token"
},
{
"path": "lib/trimmer.js",
"chars": 743,
"preview": "/*!\n * lunr.trimmer\n * Copyright (C) @YEAR Oliver Nightingale\n */\n\n/**\n * lunr.trimmer is a pipeline function for trimmi"
},
{
"path": "lib/utils.js",
"chars": 2137,
"preview": "/*!\n * lunr.utils\n * Copyright (C) @YEAR Oliver Nightingale\n */\n\n/**\n * A namespace containing utils for the rest of the"
},
{
"path": "lib/vector.js",
"chars": 5293,
"preview": "/*!\n * lunr.Vector\n * Copyright (C) @YEAR Oliver Nightingale\n */\n\n/**\n * A vector is used to construct the vector space "
},
{
"path": "lunr.js",
"chars": 99804,
"preview": "/**\n * lunr - http://lunrjs.com - A bit like Solr, but much smaller and not as bright - 2.3.9\n * Copyright (C) 2020 Oliv"
},
{
"path": "notes",
"chars": 1053,
"preview": "1 - \"Mr. Green killed Colonel Mustard in the study with the candlestick. Mr. Green is not a very nice fellow.\"\n2 - \"Prof"
},
{
"path": "package.json",
"chars": 723,
"preview": "{\n \"name\": \"lunr\",\n \"description\": \"Simple full-text search in your browser.\",\n \"version\": \"2.3.9\",\n \"author\": \"Oliv"
},
{
"path": "perf/builder_perf.js",
"chars": 795,
"preview": "suite('lunr.Builder', function () {\n var documents = [{\n id: 'a',\n title: 'Mr. Green kills Colonel Mustard',\n "
},
{
"path": "perf/perf_helper.js",
"chars": 615,
"preview": "var lunr = require('../lunr.js'),\n Benchmark = require('benchmark'),\n wordList = require('word-list'),\n fs = re"
},
{
"path": "perf/pipeline_perf.js",
"chars": 1152,
"preview": "suite('lunr.Pipeline', function () { \n var tokenToToken = function(token) {\n return token\n }\n\n var tokenToTokenAr"
},
{
"path": "perf/query_parser_perf.js",
"chars": 464,
"preview": "suite('lunr.QueryParser', function () {\n var parse = function (q) {\n var query = new lunr.Query (['title', 'body']),"
},
{
"path": "perf/search_perf.js",
"chars": 1691,
"preview": "suite('search', function () {\n var documents = [{\n id: 'a',\n title: 'Mr. Green kills Colonel Mustard',\n body: "
},
{
"path": "perf/stemmer_perf.js",
"chars": 175,
"preview": "suite('lunr.stemmer', function () {\n this.add('#call', function () {\n for (var i = 0; i < words.length; i++) {\n "
},
{
"path": "perf/token_set_perf.js",
"chars": 1056,
"preview": "suite('lunr.TokenSet', function () {\n var tokenSet = lunr.TokenSet.fromArray([\n 'january', 'february', 'march', 'apr"
},
{
"path": "perf/tokenizer_perf.js",
"chars": 568,
"preview": "suite('lunr.tokenizer', function () {\n var lorem = \"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiu"
},
{
"path": "perf/vector_perf.js",
"chars": 575,
"preview": "suite('lunr.Vector', function () {\n var index, val\n\n var v1 = new lunr.Vector,\n v2 = new lunr.Vector\n\n for (var "
},
{
"path": "styles.css",
"chars": 1576,
"preview": "body {\n background-color: #081f28;\n color: #708284;\n font-family: 'Helvetica Neue', Helvetica, sans-serif;\n font-siz"
},
{
"path": "test/builder_test.js",
"chars": 6495,
"preview": "suite('lunr.Builder', function () {\n suite('#add', function () {\n setup(function () {\n this.builder = new lunr."
},
{
"path": "test/env/chai.js",
"chars": 158570,
"preview": "(function(f){if(typeof exports===\"object\"&&typeof module!==\"undefined\"){module.exports=f()}else if(typeof define===\"func"
},
{
"path": "test/env/index.mustache",
"chars": 930,
"preview": "<html>\n<head>\n <meta charset=\"utf-8\">\n <title>Mocha Tests</title>\n <link href=\"/test/env/mocha.css\" rel=\"stylesheet\" "
},
{
"path": "test/env/mocha.css",
"chars": 5610,
"preview": "@charset \"utf-8\";\n\nbody {\n margin:0;\n}\n\n#mocha {\n font: 20px/1.5 \"Helvetica Neue\", Helvetica, Arial, sans-serif;\n mar"
},
{
"path": "test/env/mocha.js",
"chars": 411343,
"preview": "(function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require==\"function\"&&require;if(!u&&a)return a(o,!0)"
},
{
"path": "test/field_ref_test.js",
"chars": 1016,
"preview": "suite('lunr.FieldRef', function () {\n suite('#toString', function () {\n test('combines document ref and field name',"
},
{
"path": "test/fixtures/stemming_vocab.json",
"chars": 1574,
"preview": "{\"consign\":\"consign\",\"consigned\":\"consign\",\"consigning\":\"consign\",\"consignment\":\"consign\",\"consist\":\"consist\",\"consisted"
},
{
"path": "test/index.html",
"chars": 1745,
"preview": "<html>\n<head>\n <meta charset=\"utf-8\">\n <title>Mocha Tests</title>\n <link href=\"/test/env/mocha.css\" rel=\"stylesheet\" "
},
{
"path": "test/match_data_test.js",
"chars": 1169,
"preview": "suite('lunr.MatchData', function () {\n suite('#combine', function () {\n setup(function () {\n this.match = new l"
},
{
"path": "test/pipeline_test.js",
"chars": 7166,
"preview": "suite('lunr.Pipeline', function () {\n var noop = function () {}\n\n setup(function () {\n this.existingRegisteredFunct"
},
{
"path": "test/query_lexer_test.js",
"chars": 16625,
"preview": "suite('lunr.QueryLexer', function () {\n suite('#run', function () {\n\n var lex = function (str) {\n var lexer = n"
},
{
"path": "test/query_parser_test.js",
"chars": 14602,
"preview": "suite('lunr.QueryParser', function () {\n var parse = function (q) {\n var query = new lunr.Query (['title', 'body']),"
},
{
"path": "test/query_test.js",
"chars": 6291,
"preview": "suite('lunr.Query', function () {\n var allFields = ['title', 'body']\n\n suite('#term', function () {\n setup(function"
},
{
"path": "test/search_test.js",
"chars": 32793,
"preview": "suite('search', function () {\n setup(function () {\n this.documents = [{\n id: 'a',\n title: 'Mr. Green kills"
},
{
"path": "test/serialization_test.js",
"chars": 1628,
"preview": "suite('serialization', function () {\n setup(function () {\n var documents = [{\n id: 'a',\n title: 'Mr. Green"
},
{
"path": "test/set_test.js",
"chars": 4267,
"preview": "suite('lunr.Set', function () {\n suite('#contains', function () {\n suite('complete set', function () {\n test('r"
},
{
"path": "test/stemmer_test.js",
"chars": 705,
"preview": "suite('lunr.stemmer', function () {\n test('reduces words to their stem', function (done) {\n withFixture('stemming_vo"
},
{
"path": "test/stop_word_filter_test.js",
"chars": 944,
"preview": "suite('lunr.stopWordFilter', function () {\n test('filters stop words', function () {\n var stopWords = ['the', 'and',"
},
{
"path": "test/test_helper.js",
"chars": 328,
"preview": "var lunr = require('../lunr.js'),\n assert = require('chai').assert,\n fs = require('fs'),\n path = require('path'"
},
{
"path": "test/token_set_test.js",
"chars": 10789,
"preview": "suite('lunr.TokenSet', function () {\n suite('#toString', function () {\n test('includes node finality', function () {"
},
{
"path": "test/token_test.js",
"chars": 1535,
"preview": "suite('lunr.Token', function () {\n suite('#toString', function () {\n test('converts the token to a string', function"
},
{
"path": "test/tokenizer_test.js",
"chars": 3654,
"preview": "suite('lunr.tokenizer', function () {\n var toString = function (o) { return o.toString() }\n\n test('splitting into toke"
},
{
"path": "test/trimmer_test.js",
"chars": 975,
"preview": "suite('lunr.trimmer', function () {\n test('latin characters', function () {\n var token = new lunr.Token ('hello')\n "
},
{
"path": "test/utils_test.js",
"chars": 1893,
"preview": "suite('lunr.utils', function () {\n suite('#clone', function () {\n var subject = function (obj) {\n setup(functio"
},
{
"path": "test/vector_test.js",
"chars": 3928,
"preview": "suite('lunr.Vector', function () {\n var vectorFromArgs = function () {\n var vector = new lunr.Vector\n\n Array.prot"
}
]
About this extraction
This page contains the full source code of the olivernn/lunr.js GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 77 files (924.1 KB), approximately 248.7k tokens, and a symbol index with 377 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.